fix: surface provider decision in otel diagnostics
This commit is contained in:
@@ -90,6 +90,7 @@ export function compactDiagnoseCodeAgentResult(value: unknown): Record<string, u
|
||||
observabilityGap: source.observabilityGap ?? null,
|
||||
businessTraceIds: source.businessTraceIds ?? null,
|
||||
businessTraceScope: source.businessTraceScope ?? null,
|
||||
providerDecision: source.providerDecision ?? null,
|
||||
identity: compactDiagnoseIdentity(source.identity),
|
||||
agentrun: compactDiagnoseAgentRun(source.agentrun),
|
||||
hwlabReadModel: source.hwlabReadModel ?? null,
|
||||
@@ -252,7 +253,7 @@ export function compactSpanList(value: unknown, limit: number): unknown[] {
|
||||
return {
|
||||
name: span.name ?? null,
|
||||
service: span.service ?? null,
|
||||
attributes: compactRecord(attrs, ["failureKind", "terminalStatus", "status", "eventType", "idleMs", "waitingFor", "lastEventLabel", "http.route", "http.status_code", "http.response.status_code", "workbench.session_id", "workbench.trace_id", "workbench.turn_id", "workbench.read_model.route", "workbench.read_model.count", "workbench.read_model.family", "workbench.read_model.status", "workbench.read_model.reason", "hwlab.http.stage", "hwlab.http.phase", "hwlab.http.phase.outcome", "hwlab.live_builds.service_id", "hwlab.live_builds.service_kind", "hwlab.live_builds.external", "hwlab.live_builds.deploy_manifest_status", "hwlab.live_builds.artifact_catalog_status"]),
|
||||
attributes: compactRecord(attrs, ["failureKind", "terminalStatus", "status", "eventType", "idleMs", "waitingFor", "lastEventLabel", "http.route", "http.status_code", "http.response.status_code", "workbench.session_id", "workbench.trace_id", "workbench.turn_id", "workbench.read_model.route", "workbench.read_model.count", "workbench.read_model.family", "workbench.read_model.status", "workbench.read_model.reason", "hwlab.http.stage", "hwlab.http.phase", "hwlab.http.phase.outcome", "hwlab.live_builds.service_id", "hwlab.live_builds.service_kind", "hwlab.live_builds.external", "hwlab.live_builds.deploy_manifest_status", "hwlab.live_builds.artifact_catalog_status", "providerProfile", "model", "modelId", "providerModel", "code_agent.stage", "agent.chat.session_id", "agent.chat.provider_profile", "agent.chat.provider_profile_source", "agent.chat.model", "agent.chat.model_id", "agent.chat.provider_model", "defaultProviderProfile", "adapter", "adapterEnabled", "agentRunManagerHost", "agentRunRunnerNamespace", "agentRunSourceCommitPresent"]),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
@@ -90,6 +90,13 @@ IMPORTANT_ATTRS = [
|
||||
"error.code", "error.category", "error.layer",
|
||||
"stage", "causeStage", "causeCode",
|
||||
"eventType", "runnerId", "attemptId", "backendProfile",
|
||||
"providerProfile", "model", "modelId", "providerModel",
|
||||
"code_agent.stage", "agent.chat.session_id",
|
||||
"agent.chat.provider_profile", "agent.chat.provider_profile_source",
|
||||
"agent.chat.model", "agent.chat.model_id", "agent.chat.provider_model",
|
||||
"defaultProviderProfile", "adapter", "adapterEnabled",
|
||||
"agentRunManagerHost", "agentRunRunnerNamespace",
|
||||
"agentRunSourceCommitPresent",
|
||||
"sourceCommit", "jobName", "podName", "logPath",
|
||||
"toolName", "type", "itemType", "itemId", "status", "exitCode",
|
||||
"durationMs", "cwd", "processId", "command", "commandFingerprint",
|
||||
@@ -407,6 +414,13 @@ IMPORTANT_ATTRS = [
|
||||
"error.code", "error.category", "error.layer",
|
||||
"stage", "causeStage", "causeCode",
|
||||
"eventType", "runnerId", "attemptId", "backendProfile",
|
||||
"providerProfile", "model", "modelId", "providerModel",
|
||||
"code_agent.stage", "agent.chat.session_id",
|
||||
"agent.chat.provider_profile", "agent.chat.provider_profile_source",
|
||||
"agent.chat.model", "agent.chat.model_id", "agent.chat.provider_model",
|
||||
"defaultProviderProfile", "adapter", "adapterEnabled",
|
||||
"agentRunManagerHost", "agentRunRunnerNamespace",
|
||||
"agentRunSourceCommitPresent",
|
||||
"sourceCommit", "jobName", "podName", "logPath",
|
||||
"toolName", "type", "itemType", "itemId", "status", "exitCode",
|
||||
"durationMs", "cwd", "processId", "command", "commandFingerprint",
|
||||
@@ -972,6 +986,13 @@ IMPORTANT_ATTRS = [
|
||||
"error.code", "error.category", "error.layer",
|
||||
"stage", "causeStage", "causeCode",
|
||||
"eventType", "runnerId", "attemptId", "backendProfile",
|
||||
"providerProfile", "model", "modelId", "providerModel",
|
||||
"code_agent.stage", "agent.chat.session_id",
|
||||
"agent.chat.provider_profile", "agent.chat.provider_profile_source",
|
||||
"agent.chat.model", "agent.chat.model_id", "agent.chat.provider_model",
|
||||
"defaultProviderProfile", "adapter", "adapterEnabled",
|
||||
"agentRunManagerHost", "agentRunRunnerNamespace",
|
||||
"agentRunSourceCommitPresent",
|
||||
"sourceCommit", "jobName", "podName", "logPath",
|
||||
"toolName", "type", "itemType", "itemId", "status", "exitCode",
|
||||
"durationMs", "cwd", "processId", "command", "commandFingerprint",
|
||||
@@ -1412,6 +1433,13 @@ def tiny_span(item):
|
||||
"hwlab.live_builds.service_id", "hwlab.live_builds.service_kind",
|
||||
"hwlab.live_builds.external", "hwlab.live_builds.deploy_manifest_status",
|
||||
"hwlab.live_builds.artifact_catalog_status",
|
||||
"providerProfile", "model", "modelId", "providerModel",
|
||||
"code_agent.stage", "agent.chat.session_id",
|
||||
"agent.chat.provider_profile", "agent.chat.provider_profile_source",
|
||||
"agent.chat.model", "agent.chat.model_id", "agent.chat.provider_model",
|
||||
"defaultProviderProfile", "adapter", "adapterEnabled",
|
||||
"agentRunManagerHost", "agentRunRunnerNamespace",
|
||||
"agentRunSourceCommitPresent",
|
||||
):
|
||||
if key in attrs:
|
||||
keep_attrs[key] = attrs[key]
|
||||
@@ -1450,6 +1478,55 @@ def identity_from_spans(spans):
|
||||
identity[key] = preferred if preferred is not None else fallback
|
||||
return identity
|
||||
|
||||
def attr_first(attrs, keys):
|
||||
if not isinstance(attrs, dict):
|
||||
return None
|
||||
for key in keys:
|
||||
value = attrs.get(key)
|
||||
if value not in (None, ""):
|
||||
return value
|
||||
return None
|
||||
|
||||
def provider_decision_summary(spans):
|
||||
rows = []
|
||||
for item in spans:
|
||||
attrs = item.get("attributes", {}) if isinstance(item.get("attributes"), dict) else {}
|
||||
name = str(item.get("name") or "")
|
||||
if name != "provider_decision" and attr_first(attrs, ("agent.chat.provider_profile", "providerProfile", "defaultProviderProfile")) in (None, ""):
|
||||
continue
|
||||
row = {
|
||||
"span": name or None,
|
||||
"service": item.get("service"),
|
||||
"traceId": attr_first(attrs, ("traceId", "workbench.trace_id", "workbench.turn_id")),
|
||||
"sessionId": attr_first(attrs, ("agent.chat.session_id", "sessionId", "workbench.session_id")),
|
||||
"providerProfile": attr_first(attrs, ("agent.chat.provider_profile", "providerProfile", "backendProfile", "defaultProviderProfile")),
|
||||
"providerProfileSource": attr_first(attrs, ("agent.chat.provider_profile_source", "providerProfileSource")),
|
||||
"defaultProviderProfile": attr_first(attrs, ("defaultProviderProfile",)),
|
||||
"model": attr_first(attrs, ("agent.chat.model", "agent.chat.model_id", "agent.chat.provider_model", "model", "modelId", "providerModel")),
|
||||
"adapter": attr_first(attrs, ("adapter",)),
|
||||
"adapterEnabled": attr_first(attrs, ("adapterEnabled",)),
|
||||
"managerHost": attr_first(attrs, ("agentRunManagerHost",)),
|
||||
"runnerNamespace": attr_first(attrs, ("agentRunRunnerNamespace",)),
|
||||
"sourceCommitPresent": attr_first(attrs, ("agentRunSourceCommitPresent",)),
|
||||
}
|
||||
compact = {key: value for key, value in row.items() if value not in (None, "")}
|
||||
if compact:
|
||||
rows.append(compact)
|
||||
primary = rows[0] if rows else {}
|
||||
return {
|
||||
"providerProfile": primary.get("providerProfile"),
|
||||
"providerProfileSource": primary.get("providerProfileSource"),
|
||||
"defaultProviderProfile": primary.get("defaultProviderProfile"),
|
||||
"model": primary.get("model"),
|
||||
"adapter": primary.get("adapter"),
|
||||
"adapterEnabled": primary.get("adapterEnabled"),
|
||||
"managerHost": primary.get("managerHost"),
|
||||
"runnerNamespace": primary.get("runnerNamespace"),
|
||||
"sourceCommitPresent": primary.get("sourceCommitPresent"),
|
||||
"spanCount": len(rows),
|
||||
"decisions": rows[:5],
|
||||
}
|
||||
|
||||
def span_business_trace_id(item):
|
||||
attrs = item.get("attributes", {}) if isinstance(item.get("attributes"), dict) else {}
|
||||
for key in ("traceId", "workbench.trace_id", "workbench.turn_id"):
|
||||
@@ -2036,6 +2113,7 @@ def candidate_score(trace_id, meta, trace_body, trace_rc, trace_err):
|
||||
"terminalStatus": agentrun.get("terminalStatus"),
|
||||
"errorSpanCount": len(error_spans),
|
||||
"businessTraceScope": business_scope,
|
||||
"providerDecision": provider_decision_summary(spans),
|
||||
"candidateQuality": candidate_quality,
|
||||
"lowConfidence": candidate_quality != "normal",
|
||||
"rootTraceName": meta_root_name or None,
|
||||
@@ -2187,6 +2265,7 @@ idle_warning_spans = [item for item in spans if str(item.get("name") or "") == "
|
||||
http_summary = http_status_summary(spans)
|
||||
read_model = hwlab_read_model_summary(spans)
|
||||
identity = identity_from_spans(spans)
|
||||
provider_decision = provider_decision_summary(spans)
|
||||
agentrun_authority = agentrun_authority_summary(identity)
|
||||
agentrun = agentrun_summary(spans, agentrun_authority)
|
||||
lag = projection_lag_summary(agentrun, read_model)
|
||||
@@ -2254,6 +2333,9 @@ summary = {
|
||||
"failureKind": agentrun.get("failureKind"),
|
||||
"observabilityGap": observability_gap.get("status"),
|
||||
"businessTraceScope": business_scope.get("mode"),
|
||||
"providerProfile": provider_decision.get("providerProfile"),
|
||||
"providerModel": provider_decision.get("model"),
|
||||
"providerAdapter": provider_decision.get("adapter"),
|
||||
},
|
||||
}
|
||||
evidence = {
|
||||
@@ -2284,6 +2366,7 @@ payload = {
|
||||
"observabilityGap": observability_gap,
|
||||
"businessTraceIds": business_trace_ids[:20],
|
||||
"businessTraceScope": business_scope,
|
||||
"providerDecision": provider_decision,
|
||||
"identity": identity,
|
||||
"agentrun": {
|
||||
"terminalStatus": agentrun.get("terminalStatus"),
|
||||
|
||||
@@ -382,6 +382,7 @@ export function renderTraceTable(input: {
|
||||
const turnRows = traceTurnStatusRows(allSpans);
|
||||
const projectionRows = traceProjectionRows(allSpans);
|
||||
const projectionSyncRows = traceProjectionSyncRows(allSpans);
|
||||
const providerRows = traceProviderDecisionRows(allSpans);
|
||||
const countRows = asArray(input.result.spanNameCounts).slice(0, 8).map((item) => {
|
||||
const row = asPlainRecord(item) ?? {};
|
||||
return [shortenEnd(textValue(row.name), 64), textValue(row.count)];
|
||||
@@ -406,6 +407,9 @@ export function renderTraceTable(input: {
|
||||
"Identity:",
|
||||
formatTable(["FIELD", "VALUE"], identityRows.length > 0 ? identityRows : [["-", "-"]]),
|
||||
"",
|
||||
"Provider decision:",
|
||||
formatTable(["SERVICE", "PROFILE", "SOURCE", "DEFAULT", "MODEL", "ADAPTER", "RUNNER_NS", "SESSION"], providerRows.length > 0 ? providerRows : [["-", "-", "-", "-", "-", "-", "-", "-"]]),
|
||||
"",
|
||||
"Key spans:",
|
||||
formatTable(["NAME", "SERVICE", "ROUTE", "STATUS", "DUR_MS", "RES_MS", "FETCH_REQ", "REQ_WAIT", "RESP_XFER", "PROTO", "DETAIL"], spanRows.length > 0 ? spanRows : [["-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-"]]),
|
||||
"",
|
||||
@@ -483,6 +487,27 @@ function traceRunnerRows(spans: Record<string, unknown>[]): string[][] {
|
||||
return dedupeRows(rows).slice(0, 12);
|
||||
}
|
||||
|
||||
function traceProviderDecisionRows(spans: Record<string, unknown>[]): string[][] {
|
||||
const rows = spans.filter((span) => {
|
||||
const attrs = asPlainRecord(span.attributes);
|
||||
return textValue(span.name) === "provider_decision"
|
||||
|| spanColumnAttr(attrs, ["agent.chat.provider_profile", "providerProfile", "defaultProviderProfile"]) !== "-";
|
||||
}).map((span) => {
|
||||
const attrs = asPlainRecord(span.attributes);
|
||||
return [
|
||||
shortenEnd(textValue(span.service), 20),
|
||||
shortenEnd(spanColumnAttr(attrs, ["agent.chat.provider_profile", "providerProfile", "backendProfile", "defaultProviderProfile"]), 22),
|
||||
shortenEnd(spanColumnAttr(attrs, ["agent.chat.provider_profile_source", "providerProfileSource"]), 12),
|
||||
shortenEnd(spanColumnAttr(attrs, ["defaultProviderProfile"]), 22),
|
||||
shortenEnd(spanColumnAttr(attrs, ["agent.chat.model", "agent.chat.model_id", "agent.chat.provider_model", "model", "modelId", "providerModel"]), 28),
|
||||
shortenEnd(spanColumnAttr(attrs, ["adapter"]), 16),
|
||||
shortenEnd(spanColumnAttr(attrs, ["agentRunRunnerNamespace"]), 16),
|
||||
shortenMiddle(spanColumnAttr(attrs, ["agent.chat.session_id", "sessionId", "workbench.session_id"]), 24),
|
||||
];
|
||||
});
|
||||
return dedupeRows(rows).slice(0, 8);
|
||||
}
|
||||
|
||||
function traceReadWindowRows(spans: Record<string, unknown>[]): string[][] {
|
||||
const rows = spans.filter((span) => textValue(span.name) === "trace_events_read").map((span) => {
|
||||
const attrs = asPlainRecord(span.attributes);
|
||||
@@ -678,6 +703,7 @@ export function renderDiagnoseCodeAgentTable(input: {
|
||||
const evidence = asPlainRecord(input.result.evidence);
|
||||
const observabilityGap = asPlainRecord(input.result.observabilityGap);
|
||||
const servicePath = asPlainRecord(input.result.servicePath);
|
||||
const providerDecision = asPlainRecord(input.result.providerDecision);
|
||||
const rootCauses = asArray(input.result.rootCauseCandidates).map((item) => asPlainRecord(item) ?? {});
|
||||
const http = asPlainRecord(input.result.http);
|
||||
const services = joinValues(input.result.services, 54);
|
||||
@@ -727,6 +753,7 @@ export function renderDiagnoseCodeAgentTable(input: {
|
||||
` requested runId=${requestedRunId} commandId=${requestedCommandId} sessionId=${requestedSessionId} runnerJobId=${requestedRunnerJobId}`,
|
||||
` observed runId=${observedRunId} commandId=${observedCommandId} sessionId=${observedSessionId} runnerJobId=${observedRunnerJobId} runnerId=${textValue(identity?.runnerId)}`,
|
||||
` backendProfile=${textValue(identity?.backendProfile)} sourceCommit=${shortenMiddle(textValue(identity?.sourceCommit), 20)}`,
|
||||
` providerProfile=${textValue(providerDecision?.providerProfile)} model=${textValue(providerDecision?.model)} source=${textValue(providerDecision?.providerProfileSource)} adapter=${textValue(providerDecision?.adapter)} runnerNamespace=${textValue(providerDecision?.runnerNamespace)}`,
|
||||
"",
|
||||
"Root causes:",
|
||||
formatTable(["CODE", "CONF", "SUMMARY"], rootRows.length > 0 ? rootRows : [["-", "-", "-"]]),
|
||||
|
||||
Reference in New Issue
Block a user