fix: expose provider retry diagnostics in otel (#215)

* fix: expose provider retry diagnostics in otel

* fix: place retry diagnostics in notification attributes
This commit is contained in:
Lyon
2026-06-21 14:17:40 +08:00
committed by GitHub
parent f7592ca09f
commit f29bb80793
2 changed files with 69 additions and 1 deletions
+61 -1
View File
@@ -911,7 +911,7 @@ function normalizeCodexNotification(message: JsonRecord, suppressed: SuppressedN
const messageText = typeof error.message === "string" ? error.message : "Codex app-server error";
const failureKind = classifyCodexErrorRecord(error, "backend-failed");
const terminal = params.willRetry === true ? undefined : { status: "failed" as const, failureKind, message: redactText(messageText) };
return { events: [{ type: "error", payload: { failureKind, error: redactJson(error), willRetry: params.willRetry === true } }], ...(terminal ? { terminal } : {}) };
return { events: [{ type: "error", payload: { failureKind, message: redactText(messageText), error: redactJson(error), willRetry: params.willRetry === true, ...providerRetryDiagnostics(error, params) } }], ...(terminal ? { terminal } : {}) };
}
if (method === "turn/completed") {
const turn = asRecordAt(params, "turn");
@@ -1435,6 +1435,7 @@ function notificationOtelAttributes(message: JsonRecord): JsonRecord {
: method === "turn/completed" && turnStatus !== "completed"
? classifyCodexErrorRecord(Object.keys(error).length > 0 ? error : { message: turnStatus ?? "unknown" }, "backend-failed")
: null;
const retryDiagnostics = method === "error" ? providerRetryDiagnostics(error, params) : {};
return {
method,
itemId: stringAt(item, "id") ?? stringAt(params, "itemId"),
@@ -1444,11 +1445,65 @@ function notificationOtelAttributes(message: JsonRecord): JsonRecord {
turnId: stringAt(turn, "id"),
failureKind,
willRetry: typeof params.willRetry === "boolean" ? params.willRetry : null,
...retryDiagnostics,
deltaChars: typeof params.delta === "string" ? params.delta.length : null,
valuesPrinted: false,
};
}
function providerRetryDiagnostics(error: JsonRecord, params: JsonRecord): JsonRecord {
const message = typeof error.message === "string" ? error.message : "";
const additionalDetails = typeof error.additionalDetails === "string" ? error.additionalDetails : "";
const text = `${message}\n${additionalDetails}`;
const retry = parseRetryCounter(text);
const responseStreamDisconnected = asRecordAt(asRecordAt(error, "codexErrorInfo"), "responseStreamDisconnected");
const upstreamHttpStatus = numberAt(responseStreamDisconnected, "httpStatusCode") ?? parseHttpStatus(text);
const retryBackoffMs = numberAt(error, "retryBackoffMs") ?? numberAt(error, "backoffMs") ?? numberAt(params, "retryBackoffMs") ?? numberAt(params, "backoffMs");
const providerErrorClass = providerErrorClassForText(text, upstreamHttpStatus);
return {
retryAttempt: retry?.attempt ?? null,
retryMax: retry?.max ?? null,
retryExhausted: retry ? retry.attempt >= retry.max : null,
retryBackoffMs,
upstreamHttpStatus,
upstreamHost: parseUrlHost(text),
providerErrorClass,
errorSummary: redactText(message || additionalDetails || providerErrorClass),
valuesPrinted: false,
};
}
function parseRetryCounter(text: string): { attempt: number; max: number } | null {
const match = /(?:reconnect(?:ing)?|retry(?:ing)?)?[^0-9]{0,24}(\d+)\s*\/\s*(\d+)/iu.exec(text);
if (!match) return null;
const attempt = Number(match[1]);
const max = Number(match[2]);
if (!Number.isFinite(attempt) || !Number.isFinite(max) || attempt < 0 || max <= 0) return null;
return { attempt: Math.floor(attempt), max: Math.floor(max) };
}
function parseHttpStatus(text: string): number | null {
const match = /(?:http\s*)?(?:status\s*)?(\b[1-5][0-9]{2}\b)/iu.exec(text);
if (!match) return null;
const status = Number(match[1]);
return Number.isFinite(status) ? status : null;
}
function parseUrlHost(text: string): string | null {
const match = /https?:\/\/([^\s/]+)(?:\/|\s|$)/iu.exec(text);
return match ? match[1].toLowerCase() : null;
}
function providerErrorClassForText(text: string, httpStatus: number | null): string {
const lower = text.toLowerCase();
if (httpStatus !== null) return httpStatus >= 500 ? "provider-http-5xx" : httpStatus === 429 ? "provider-rate-limit" : httpStatus === 401 || httpStatus === 403 ? "provider-auth" : "provider-http-status";
if (/connection refused|econnrefused|os error 111/u.test(lower)) return "provider-connection-refused";
if (/timed out|timeout|etimedout/u.test(lower)) return "provider-timeout";
if (/reset|econnreset/u.test(lower)) return "provider-connection-reset";
if (/disconnect|stream/u.test(lower)) return "provider-stream-disconnected";
return "provider-unavailable";
}
function toolCallSummaryFromNotification(message: JsonRecord): JsonRecord | null {
const method = typeof message.method === "string" ? message.method : "";
if (method !== "item/started" && method !== "item/completed") return null;
@@ -1609,6 +1664,11 @@ function stringAt(value: JsonRecord, key: string): string | null {
return typeof value[key] === "string" && String(value[key]).length > 0 ? String(value[key]) : null;
}
function numberAt(value: JsonRecord, key: string): number | null {
const next = value[key];
return typeof next === "number" && Number.isFinite(next) ? next : null;
}
function shortHash(value: string): string {
return createHash("sha256").update(value).digest("hex").slice(0, 12);
}
+8
View File
@@ -1024,6 +1024,14 @@ function emitRunEventOtelSpan(type: RunEvent["type"], payload: JsonRecord, run:
threadId: stringJsonValue(payload.threadId),
turnId: stringJsonValue(payload.turnId),
willRetry: typeof payload.willRetry === "boolean" ? payload.willRetry : null,
retryAttempt: numberJsonValue(payload.retryAttempt),
retryMax: numberJsonValue(payload.retryMax),
retryExhausted: typeof payload.retryExhausted === "boolean" ? payload.retryExhausted : null,
retryBackoffMs: numberJsonValue(payload.retryBackoffMs),
upstreamHttpStatus: numberJsonValue(payload.upstreamHttpStatus),
upstreamHost: stringJsonValue(payload.upstreamHost),
providerErrorClass: stringJsonValue(payload.providerErrorClass),
errorSummary: boundedJsonString(payload.errorSummary, 300),
message: boundedJsonString(payload.message, 300),
},
});