fix: surface provider decision in otel diagnostics

This commit is contained in:
Codex
2026-07-01 16:39:40 +00:00
parent 0dcdf0f5f7
commit 6222744119
3 changed files with 112 additions and 1 deletions
@@ -90,6 +90,7 @@ export function compactDiagnoseCodeAgentResult(value: unknown): Record<string, u
observabilityGap: source.observabilityGap ?? null,
businessTraceIds: source.businessTraceIds ?? null,
businessTraceScope: source.businessTraceScope ?? null,
providerDecision: source.providerDecision ?? null,
identity: compactDiagnoseIdentity(source.identity),
agentrun: compactDiagnoseAgentRun(source.agentrun),
hwlabReadModel: source.hwlabReadModel ?? null,
@@ -252,7 +253,7 @@ export function compactSpanList(value: unknown, limit: number): unknown[] {
return {
name: span.name ?? null,
service: span.service ?? null,
attributes: compactRecord(attrs, ["failureKind", "terminalStatus", "status", "eventType", "idleMs", "waitingFor", "lastEventLabel", "http.route", "http.status_code", "http.response.status_code", "workbench.session_id", "workbench.trace_id", "workbench.turn_id", "workbench.read_model.route", "workbench.read_model.count", "workbench.read_model.family", "workbench.read_model.status", "workbench.read_model.reason", "hwlab.http.stage", "hwlab.http.phase", "hwlab.http.phase.outcome", "hwlab.live_builds.service_id", "hwlab.live_builds.service_kind", "hwlab.live_builds.external", "hwlab.live_builds.deploy_manifest_status", "hwlab.live_builds.artifact_catalog_status"]),
attributes: compactRecord(attrs, ["failureKind", "terminalStatus", "status", "eventType", "idleMs", "waitingFor", "lastEventLabel", "http.route", "http.status_code", "http.response.status_code", "workbench.session_id", "workbench.trace_id", "workbench.turn_id", "workbench.read_model.route", "workbench.read_model.count", "workbench.read_model.family", "workbench.read_model.status", "workbench.read_model.reason", "hwlab.http.stage", "hwlab.http.phase", "hwlab.http.phase.outcome", "hwlab.live_builds.service_id", "hwlab.live_builds.service_kind", "hwlab.live_builds.external", "hwlab.live_builds.deploy_manifest_status", "hwlab.live_builds.artifact_catalog_status", "providerProfile", "model", "modelId", "providerModel", "code_agent.stage", "agent.chat.session_id", "agent.chat.provider_profile", "agent.chat.provider_profile_source", "agent.chat.model", "agent.chat.model_id", "agent.chat.provider_model", "defaultProviderProfile", "adapter", "adapterEnabled", "agentRunManagerHost", "agentRunRunnerNamespace", "agentRunSourceCommitPresent"]),
};
});
}
@@ -90,6 +90,13 @@ IMPORTANT_ATTRS = [
"error.code", "error.category", "error.layer",
"stage", "causeStage", "causeCode",
"eventType", "runnerId", "attemptId", "backendProfile",
"providerProfile", "model", "modelId", "providerModel",
"code_agent.stage", "agent.chat.session_id",
"agent.chat.provider_profile", "agent.chat.provider_profile_source",
"agent.chat.model", "agent.chat.model_id", "agent.chat.provider_model",
"defaultProviderProfile", "adapter", "adapterEnabled",
"agentRunManagerHost", "agentRunRunnerNamespace",
"agentRunSourceCommitPresent",
"sourceCommit", "jobName", "podName", "logPath",
"toolName", "type", "itemType", "itemId", "status", "exitCode",
"durationMs", "cwd", "processId", "command", "commandFingerprint",
@@ -407,6 +414,13 @@ IMPORTANT_ATTRS = [
"error.code", "error.category", "error.layer",
"stage", "causeStage", "causeCode",
"eventType", "runnerId", "attemptId", "backendProfile",
"providerProfile", "model", "modelId", "providerModel",
"code_agent.stage", "agent.chat.session_id",
"agent.chat.provider_profile", "agent.chat.provider_profile_source",
"agent.chat.model", "agent.chat.model_id", "agent.chat.provider_model",
"defaultProviderProfile", "adapter", "adapterEnabled",
"agentRunManagerHost", "agentRunRunnerNamespace",
"agentRunSourceCommitPresent",
"sourceCommit", "jobName", "podName", "logPath",
"toolName", "type", "itemType", "itemId", "status", "exitCode",
"durationMs", "cwd", "processId", "command", "commandFingerprint",
@@ -972,6 +986,13 @@ IMPORTANT_ATTRS = [
"error.code", "error.category", "error.layer",
"stage", "causeStage", "causeCode",
"eventType", "runnerId", "attemptId", "backendProfile",
"providerProfile", "model", "modelId", "providerModel",
"code_agent.stage", "agent.chat.session_id",
"agent.chat.provider_profile", "agent.chat.provider_profile_source",
"agent.chat.model", "agent.chat.model_id", "agent.chat.provider_model",
"defaultProviderProfile", "adapter", "adapterEnabled",
"agentRunManagerHost", "agentRunRunnerNamespace",
"agentRunSourceCommitPresent",
"sourceCommit", "jobName", "podName", "logPath",
"toolName", "type", "itemType", "itemId", "status", "exitCode",
"durationMs", "cwd", "processId", "command", "commandFingerprint",
@@ -1412,6 +1433,13 @@ def tiny_span(item):
"hwlab.live_builds.service_id", "hwlab.live_builds.service_kind",
"hwlab.live_builds.external", "hwlab.live_builds.deploy_manifest_status",
"hwlab.live_builds.artifact_catalog_status",
"providerProfile", "model", "modelId", "providerModel",
"code_agent.stage", "agent.chat.session_id",
"agent.chat.provider_profile", "agent.chat.provider_profile_source",
"agent.chat.model", "agent.chat.model_id", "agent.chat.provider_model",
"defaultProviderProfile", "adapter", "adapterEnabled",
"agentRunManagerHost", "agentRunRunnerNamespace",
"agentRunSourceCommitPresent",
):
if key in attrs:
keep_attrs[key] = attrs[key]
@@ -1450,6 +1478,55 @@ def identity_from_spans(spans):
identity[key] = preferred if preferred is not None else fallback
return identity
def attr_first(attrs, keys):
if not isinstance(attrs, dict):
return None
for key in keys:
value = attrs.get(key)
if value not in (None, ""):
return value
return None
def provider_decision_summary(spans):
rows = []
for item in spans:
attrs = item.get("attributes", {}) if isinstance(item.get("attributes"), dict) else {}
name = str(item.get("name") or "")
if name != "provider_decision" and attr_first(attrs, ("agent.chat.provider_profile", "providerProfile", "defaultProviderProfile")) in (None, ""):
continue
row = {
"span": name or None,
"service": item.get("service"),
"traceId": attr_first(attrs, ("traceId", "workbench.trace_id", "workbench.turn_id")),
"sessionId": attr_first(attrs, ("agent.chat.session_id", "sessionId", "workbench.session_id")),
"providerProfile": attr_first(attrs, ("agent.chat.provider_profile", "providerProfile", "backendProfile", "defaultProviderProfile")),
"providerProfileSource": attr_first(attrs, ("agent.chat.provider_profile_source", "providerProfileSource")),
"defaultProviderProfile": attr_first(attrs, ("defaultProviderProfile",)),
"model": attr_first(attrs, ("agent.chat.model", "agent.chat.model_id", "agent.chat.provider_model", "model", "modelId", "providerModel")),
"adapter": attr_first(attrs, ("adapter",)),
"adapterEnabled": attr_first(attrs, ("adapterEnabled",)),
"managerHost": attr_first(attrs, ("agentRunManagerHost",)),
"runnerNamespace": attr_first(attrs, ("agentRunRunnerNamespace",)),
"sourceCommitPresent": attr_first(attrs, ("agentRunSourceCommitPresent",)),
}
compact = {key: value for key, value in row.items() if value not in (None, "")}
if compact:
rows.append(compact)
primary = rows[0] if rows else {}
return {
"providerProfile": primary.get("providerProfile"),
"providerProfileSource": primary.get("providerProfileSource"),
"defaultProviderProfile": primary.get("defaultProviderProfile"),
"model": primary.get("model"),
"adapter": primary.get("adapter"),
"adapterEnabled": primary.get("adapterEnabled"),
"managerHost": primary.get("managerHost"),
"runnerNamespace": primary.get("runnerNamespace"),
"sourceCommitPresent": primary.get("sourceCommitPresent"),
"spanCount": len(rows),
"decisions": rows[:5],
}
def span_business_trace_id(item):
attrs = item.get("attributes", {}) if isinstance(item.get("attributes"), dict) else {}
for key in ("traceId", "workbench.trace_id", "workbench.turn_id"):
@@ -2036,6 +2113,7 @@ def candidate_score(trace_id, meta, trace_body, trace_rc, trace_err):
"terminalStatus": agentrun.get("terminalStatus"),
"errorSpanCount": len(error_spans),
"businessTraceScope": business_scope,
"providerDecision": provider_decision_summary(spans),
"candidateQuality": candidate_quality,
"lowConfidence": candidate_quality != "normal",
"rootTraceName": meta_root_name or None,
@@ -2187,6 +2265,7 @@ idle_warning_spans = [item for item in spans if str(item.get("name") or "") == "
http_summary = http_status_summary(spans)
read_model = hwlab_read_model_summary(spans)
identity = identity_from_spans(spans)
provider_decision = provider_decision_summary(spans)
agentrun_authority = agentrun_authority_summary(identity)
agentrun = agentrun_summary(spans, agentrun_authority)
lag = projection_lag_summary(agentrun, read_model)
@@ -2254,6 +2333,9 @@ summary = {
"failureKind": agentrun.get("failureKind"),
"observabilityGap": observability_gap.get("status"),
"businessTraceScope": business_scope.get("mode"),
"providerProfile": provider_decision.get("providerProfile"),
"providerModel": provider_decision.get("model"),
"providerAdapter": provider_decision.get("adapter"),
},
}
evidence = {
@@ -2284,6 +2366,7 @@ payload = {
"observabilityGap": observability_gap,
"businessTraceIds": business_trace_ids[:20],
"businessTraceScope": business_scope,
"providerDecision": provider_decision,
"identity": identity,
"agentrun": {
"terminalStatus": agentrun.get("terminalStatus"),
@@ -382,6 +382,7 @@ export function renderTraceTable(input: {
const turnRows = traceTurnStatusRows(allSpans);
const projectionRows = traceProjectionRows(allSpans);
const projectionSyncRows = traceProjectionSyncRows(allSpans);
const providerRows = traceProviderDecisionRows(allSpans);
const countRows = asArray(input.result.spanNameCounts).slice(0, 8).map((item) => {
const row = asPlainRecord(item) ?? {};
return [shortenEnd(textValue(row.name), 64), textValue(row.count)];
@@ -406,6 +407,9 @@ export function renderTraceTable(input: {
"Identity:",
formatTable(["FIELD", "VALUE"], identityRows.length > 0 ? identityRows : [["-", "-"]]),
"",
"Provider decision:",
formatTable(["SERVICE", "PROFILE", "SOURCE", "DEFAULT", "MODEL", "ADAPTER", "RUNNER_NS", "SESSION"], providerRows.length > 0 ? providerRows : [["-", "-", "-", "-", "-", "-", "-", "-"]]),
"",
"Key spans:",
formatTable(["NAME", "SERVICE", "ROUTE", "STATUS", "DUR_MS", "RES_MS", "FETCH_REQ", "REQ_WAIT", "RESP_XFER", "PROTO", "DETAIL"], spanRows.length > 0 ? spanRows : [["-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-"]]),
"",
@@ -483,6 +487,27 @@ function traceRunnerRows(spans: Record<string, unknown>[]): string[][] {
return dedupeRows(rows).slice(0, 12);
}
function traceProviderDecisionRows(spans: Record<string, unknown>[]): string[][] {
const rows = spans.filter((span) => {
const attrs = asPlainRecord(span.attributes);
return textValue(span.name) === "provider_decision"
|| spanColumnAttr(attrs, ["agent.chat.provider_profile", "providerProfile", "defaultProviderProfile"]) !== "-";
}).map((span) => {
const attrs = asPlainRecord(span.attributes);
return [
shortenEnd(textValue(span.service), 20),
shortenEnd(spanColumnAttr(attrs, ["agent.chat.provider_profile", "providerProfile", "backendProfile", "defaultProviderProfile"]), 22),
shortenEnd(spanColumnAttr(attrs, ["agent.chat.provider_profile_source", "providerProfileSource"]), 12),
shortenEnd(spanColumnAttr(attrs, ["defaultProviderProfile"]), 22),
shortenEnd(spanColumnAttr(attrs, ["agent.chat.model", "agent.chat.model_id", "agent.chat.provider_model", "model", "modelId", "providerModel"]), 28),
shortenEnd(spanColumnAttr(attrs, ["adapter"]), 16),
shortenEnd(spanColumnAttr(attrs, ["agentRunRunnerNamespace"]), 16),
shortenMiddle(spanColumnAttr(attrs, ["agent.chat.session_id", "sessionId", "workbench.session_id"]), 24),
];
});
return dedupeRows(rows).slice(0, 8);
}
function traceReadWindowRows(spans: Record<string, unknown>[]): string[][] {
const rows = spans.filter((span) => textValue(span.name) === "trace_events_read").map((span) => {
const attrs = asPlainRecord(span.attributes);
@@ -678,6 +703,7 @@ export function renderDiagnoseCodeAgentTable(input: {
const evidence = asPlainRecord(input.result.evidence);
const observabilityGap = asPlainRecord(input.result.observabilityGap);
const servicePath = asPlainRecord(input.result.servicePath);
const providerDecision = asPlainRecord(input.result.providerDecision);
const rootCauses = asArray(input.result.rootCauseCandidates).map((item) => asPlainRecord(item) ?? {});
const http = asPlainRecord(input.result.http);
const services = joinValues(input.result.services, 54);
@@ -727,6 +753,7 @@ export function renderDiagnoseCodeAgentTable(input: {
` requested runId=${requestedRunId} commandId=${requestedCommandId} sessionId=${requestedSessionId} runnerJobId=${requestedRunnerJobId}`,
` observed runId=${observedRunId} commandId=${observedCommandId} sessionId=${observedSessionId} runnerJobId=${observedRunnerJobId} runnerId=${textValue(identity?.runnerId)}`,
` backendProfile=${textValue(identity?.backendProfile)} sourceCommit=${shortenMiddle(textValue(identity?.sourceCommit), 20)}`,
` providerProfile=${textValue(providerDecision?.providerProfile)} model=${textValue(providerDecision?.model)} source=${textValue(providerDecision?.providerProfileSource)} adapter=${textValue(providerDecision?.adapter)} runnerNamespace=${textValue(providerDecision?.runnerNamespace)}`,
"",
"Root causes:",
formatTable(["CODE", "CONF", "SUMMARY"], rootRows.length > 0 ? rootRows : [["-", "-", "-"]]),