fix: 将 runner 错误事件写入 OTel trace

This commit is contained in:
lyon
2026-06-20 09:02:20 +08:00
parent 3e5a025366
commit f1fe46e84f
+55 -1
View File
@@ -759,9 +759,17 @@ async function route({ method, url, body, store, sourceCommit, authSummary, runn
}
const eventsAppendMatch = path.match(/^\/api\/v1\/runs\/([^/]+)\/events$/u);
if (method === "POST" && eventsAppendMatch) {
const startedAt = Date.now();
const runId = eventsAppendMatch[1] ?? "";
const record = asRecord(body, "event");
const type = typeof record.type === "string" ? record.type as RunEvent["type"] : "backend_status";
return await store.appendEvent(eventsAppendMatch[1] ?? "", type, asRecord(record.payload ?? {}, "event.payload")) as unknown as JsonValue;
const payload = asRecord(record.payload ?? {}, "event.payload");
const [run, event] = await Promise.all([
store.getRun(runId),
store.appendEvent(runId, type, payload),
]);
emitRunEventOtelSpan(type, payload, run, startedAt);
return event as unknown as JsonValue;
}
const statusMatch = path.match(/^\/api\/v1\/runs\/([^/]+)\/status$/u);
if (method === "PATCH" && statusMatch) {
@@ -962,6 +970,52 @@ function commandIsTerminal(command: CommandRecord): boolean {
return command.state === "completed" || command.state === "failed" || command.state === "cancelled";
}
function emitRunEventOtelSpan(type: RunEvent["type"], payload: JsonRecord, run: RunRecord, startedAt: number): void {
const phase = stringJsonValue(payload.phase);
const terminalStatus = stringJsonValue(payload.terminalStatus);
const failureKind = stringJsonValue(payload.failureKind);
const eventName = runEventOtelSpanName(type, phase, terminalStatus, failureKind);
if (!eventName) return;
const isError = type === "error" || terminalStatus === "failed" || terminalStatus === "blocked";
void emitAgentRunOtelSpan(eventName, run, process.env, {
startTimeMs: startedAt,
kind: 2,
status: isError ? "error" : "ok",
error: isError ? boundedJsonString(payload.message, 300) ?? failureKind ?? eventName : undefined,
attributes: {
"http.method": "POST",
"http.route": "/api/v1/runs/:runId/events",
"http.status_code": 200,
eventType: type,
phase,
terminalStatus,
failureKind,
commandId: stringJsonValue(payload.commandId),
attemptId: stringJsonValue(payload.attemptId),
runnerId: stringJsonValue(payload.runnerId),
threadId: stringJsonValue(payload.threadId),
turnId: stringJsonValue(payload.turnId),
willRetry: typeof payload.willRetry === "boolean" ? payload.willRetry : null,
message: boundedJsonString(payload.message, 300),
},
});
}
function runEventOtelSpanName(type: RunEvent["type"], phase: string | null, terminalStatus: string | null, failureKind: string | null): string | null {
if (type === "error") return failureKind ? `runner_error.${otelNamePart(failureKind)}` : "runner_error";
if (type === "terminal_status") return terminalStatus ? `runner_terminal.${otelNamePart(terminalStatus)}` : "runner_terminal";
if (type !== "backend_status") return null;
if (!phase) return null;
if (phase === "cancel-requested" || phase === "turn-cancelled" || phase === "command-terminal") return `runner_${otelNamePart(phase)}`;
if (phase.startsWith("runner-claim-") || phase.startsWith("turn/interrupt:")) return `runner_${otelNamePart(phase)}`;
return null;
}
function otelNamePart(value: string): string {
const normalized = value.toLowerCase().replace(/[^a-z0-9]+/gu, "_").replace(/^_+|_+$/gu, "");
return normalized.length > 0 ? normalized : "unknown";
}
function optionalString(value: JsonValue | undefined): string | null {
return typeof value === "string" && value.trim().length > 0 ? value.trim() : null;
}