Merge pull request #200 from pikasTech/fix/agentrun-event-otel-error-spans
fix: 将 runner 错误事件写入 OTel trace
This commit is contained in:
+55
-1
@@ -759,9 +759,17 @@ async function route({ method, url, body, store, sourceCommit, authSummary, runn
|
|||||||
}
|
}
|
||||||
const eventsAppendMatch = path.match(/^\/api\/v1\/runs\/([^/]+)\/events$/u);
|
const eventsAppendMatch = path.match(/^\/api\/v1\/runs\/([^/]+)\/events$/u);
|
||||||
if (method === "POST" && eventsAppendMatch) {
|
if (method === "POST" && eventsAppendMatch) {
|
||||||
|
const startedAt = Date.now();
|
||||||
|
const runId = eventsAppendMatch[1] ?? "";
|
||||||
const record = asRecord(body, "event");
|
const record = asRecord(body, "event");
|
||||||
const type = typeof record.type === "string" ? record.type as RunEvent["type"] : "backend_status";
|
const type = typeof record.type === "string" ? record.type as RunEvent["type"] : "backend_status";
|
||||||
return await store.appendEvent(eventsAppendMatch[1] ?? "", type, asRecord(record.payload ?? {}, "event.payload")) as unknown as JsonValue;
|
const payload = asRecord(record.payload ?? {}, "event.payload");
|
||||||
|
const [run, event] = await Promise.all([
|
||||||
|
store.getRun(runId),
|
||||||
|
store.appendEvent(runId, type, payload),
|
||||||
|
]);
|
||||||
|
emitRunEventOtelSpan(type, payload, run, startedAt);
|
||||||
|
return event as unknown as JsonValue;
|
||||||
}
|
}
|
||||||
const statusMatch = path.match(/^\/api\/v1\/runs\/([^/]+)\/status$/u);
|
const statusMatch = path.match(/^\/api\/v1\/runs\/([^/]+)\/status$/u);
|
||||||
if (method === "PATCH" && statusMatch) {
|
if (method === "PATCH" && statusMatch) {
|
||||||
@@ -962,6 +970,52 @@ function commandIsTerminal(command: CommandRecord): boolean {
|
|||||||
return command.state === "completed" || command.state === "failed" || command.state === "cancelled";
|
return command.state === "completed" || command.state === "failed" || command.state === "cancelled";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function emitRunEventOtelSpan(type: RunEvent["type"], payload: JsonRecord, run: RunRecord, startedAt: number): void {
|
||||||
|
const phase = stringJsonValue(payload.phase);
|
||||||
|
const terminalStatus = stringJsonValue(payload.terminalStatus);
|
||||||
|
const failureKind = stringJsonValue(payload.failureKind);
|
||||||
|
const eventName = runEventOtelSpanName(type, phase, terminalStatus, failureKind);
|
||||||
|
if (!eventName) return;
|
||||||
|
const isError = type === "error" || terminalStatus === "failed" || terminalStatus === "blocked";
|
||||||
|
void emitAgentRunOtelSpan(eventName, run, process.env, {
|
||||||
|
startTimeMs: startedAt,
|
||||||
|
kind: 2,
|
||||||
|
status: isError ? "error" : "ok",
|
||||||
|
error: isError ? boundedJsonString(payload.message, 300) ?? failureKind ?? eventName : undefined,
|
||||||
|
attributes: {
|
||||||
|
"http.method": "POST",
|
||||||
|
"http.route": "/api/v1/runs/:runId/events",
|
||||||
|
"http.status_code": 200,
|
||||||
|
eventType: type,
|
||||||
|
phase,
|
||||||
|
terminalStatus,
|
||||||
|
failureKind,
|
||||||
|
commandId: stringJsonValue(payload.commandId),
|
||||||
|
attemptId: stringJsonValue(payload.attemptId),
|
||||||
|
runnerId: stringJsonValue(payload.runnerId),
|
||||||
|
threadId: stringJsonValue(payload.threadId),
|
||||||
|
turnId: stringJsonValue(payload.turnId),
|
||||||
|
willRetry: typeof payload.willRetry === "boolean" ? payload.willRetry : null,
|
||||||
|
message: boundedJsonString(payload.message, 300),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function runEventOtelSpanName(type: RunEvent["type"], phase: string | null, terminalStatus: string | null, failureKind: string | null): string | null {
|
||||||
|
if (type === "error") return failureKind ? `runner_error.${otelNamePart(failureKind)}` : "runner_error";
|
||||||
|
if (type === "terminal_status") return terminalStatus ? `runner_terminal.${otelNamePart(terminalStatus)}` : "runner_terminal";
|
||||||
|
if (type !== "backend_status") return null;
|
||||||
|
if (!phase) return null;
|
||||||
|
if (phase === "cancel-requested" || phase === "turn-cancelled" || phase === "command-terminal") return `runner_${otelNamePart(phase)}`;
|
||||||
|
if (phase.startsWith("runner-claim-") || phase.startsWith("turn/interrupt:")) return `runner_${otelNamePart(phase)}`;
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function otelNamePart(value: string): string {
|
||||||
|
const normalized = value.toLowerCase().replace(/[^a-z0-9]+/gu, "_").replace(/^_+|_+$/gu, "");
|
||||||
|
return normalized.length > 0 ? normalized : "unknown";
|
||||||
|
}
|
||||||
|
|
||||||
function optionalString(value: JsonValue | undefined): string | null {
|
function optionalString(value: JsonValue | undefined): string | null {
|
||||||
return typeof value === "string" && value.trim().length > 0 ? value.trim() : null;
|
return typeof value === "string" && value.trim().length > 0 ? value.trim() : null;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user