From a6f7581b965fa8ed0d8c9cc358b2434aa964b336 Mon Sep 17 00:00:00 2001 From: Lyon <88232613+pikasTech@users.noreply.github.com> Date: Tue, 2 Jun 2026 10:11:31 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E7=BB=A7=E7=BB=AD=E6=94=B6=E6=95=9B=20c?= =?UTF-8?q?odex=20trace=20=E6=AE=8B=E4=BD=99=E5=99=AA=E5=A3=B0=20(#68)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Codex --- docs/reference/spec-v01-agentrun-runner.md | 2 +- docs/reference/spec-v01-backend-adapter.md | 2 +- src/backend/codex-stdio.ts | 2 +- src/runner/run-once.ts | 23 ++++++++++--------- src/selftest/cases/30-codex-stdio.ts | 5 ++++ .../cases/50-hwlab-manual-dispatch.ts | 2 +- 6 files changed, 21 insertions(+), 15 deletions(-) diff --git a/docs/reference/spec-v01-agentrun-runner.md b/docs/reference/spec-v01-agentrun-runner.md index 368f281..c6ccc53 100644 --- a/docs/reference/spec-v01-agentrun-runner.md +++ b/docs/reference/spec-v01-agentrun-runner.md @@ -62,7 +62,7 @@ claimed -> lease_lost 规则: - runner 必须先 register,再 claim run;claim 失败不能继续调用 backend。 -- lease heartbeat 必须可观察;过期或冲突时写入 failure event 或明确退出原因。 +- lease heartbeat 必须通过 manager lease/status 可观察;不得把周期性心跳或 backend running tick 写成 durable trace event 刷屏。长 turn 只在 `backend-turn-finished` 中输出有界 progress 摘要;过期或冲突时写入 failure event 或明确退出原因。 - command 只能从 manager poll;不得从本地文件或临时参数伪造正式 command。 - runner 的普通 poll 只选择 pending `turn`;当 backend adapter 暴露 active turn control 后,runner 才在同 run 内轮询 pending `steer` command,ack 后调用 backend 的 steer 能力并单独终结该 steer command。active turn 结束后到达的 steer 必须结构化 blocked,不得启动新 turn,也不得把 run 标为 terminal。 - backend 产生的所有可见输出必须先经过 adapter normalization 和 redaction,再 append 到 manager;backend_status 至少包含 redacted profile/backendKind/protocol 摘要。 diff --git a/docs/reference/spec-v01-backend-adapter.md b/docs/reference/spec-v01-backend-adapter.md index aa88e9e..ac636c7 100644 --- a/docs/reference/spec-v01-backend-adapter.md +++ b/docs/reference/spec-v01-backend-adapter.md @@ -69,7 +69,7 @@ Adapter 输出给 runner 的 event 类型至少包括: 事件必须有上限和分页友好形态。大型日志、完整 stdout 或完整 trace 应进入 logPath 或后续 artifact,不得一次性塞入单个 event 造成输出爆炸。 -Codex app-server 的低价值内部 notification 必须在 AgentRun adapter 层收敛,不得要求 HWLAB Web/CLI 或其他消费侧自行过滤。以下事件默认不作为 durable trace event 持久化:`item/reasoning/textDelta`、纯 `reasoning` item 的 `item/started|item/completed`、`thread/tokenUsage/updated`、`account/rateLimits/updated`、普通 `warning` 和 `configWarning`。adapter 可以输出一条有界 `backend_status.phase=codex-app-server-notifications-suppressed` 摘要,只包含计数、method 和 item type,不包含 reasoning 文本、Secret、token 或 env value。真实 `agentMessage`、`commandExecution`、`command_output`、error、terminal 和生命周期事件必须继续保留。 +Codex app-server 的低价值内部 notification 必须在 AgentRun adapter 层收敛,不得要求 HWLAB Web/CLI 或其他消费侧自行过滤。以下事件默认不作为 durable trace event 持久化:`item/reasoning/textDelta`、纯 `reasoning` item 的 `item/started|item/completed`、非 `commandExecution` item 的通用 `item/started|item/completed`、`thread/tokenUsage/updated`、`account/rateLimits/updated`、普通 `warning` 和 `configWarning`。adapter 可以输出一条有界 `backend_status.phase=codex-app-server-notifications-suppressed` 摘要,只包含计数、method 和 item type,不包含 reasoning 文本、Secret、token 或 env value。真实 `agentMessage`、`commandExecution`、`command_output`、error、terminal 和关键生命周期事件必须继续保留。 ## Failure Mapping diff --git a/src/backend/codex-stdio.ts b/src/backend/codex-stdio.ts index 80227a7..2d20ad0 100644 --- a/src/backend/codex-stdio.ts +++ b/src/backend/codex-stdio.ts @@ -606,7 +606,7 @@ function normalizeCodexNotification(message: JsonRecord, suppressed: SuppressedN if (method === "item/started" || method === "item/completed") { const item = asRecordAt(params, "item"); const itemType = typeof item.type === "string" ? item.type : "unknown"; - if (isSuppressedCodexItemType(itemType)) { + if (itemType !== "commandExecution" || isSuppressedCodexItemType(itemType)) { recordSuppressedNotification(suppressed, method, itemType); return { events: [] }; } diff --git a/src/runner/run-once.ts b/src/runner/run-once.ts index eabf4e1..ba90150 100644 --- a/src/runner/run-once.ts +++ b/src/runner/run-once.ts @@ -153,7 +153,7 @@ async function executeCommand(api: RunnerManagerApi, options: RunnerOnceOptions, await api.appendEvent(options.runId, { type: "backend_status", payload: { phase: "backend-turn-started", commandId: command.id, attemptId, runnerId: runner.id, backendProfile: options.backendProfile ?? null, workspaceReady: Boolean(workspacePath) } }); const abortController = new AbortController(); const stopCancelWatch = watchCancellation(api, options.runId, command.id, abortController); - const stopBackendProgress = startBackendProgress(api, options.runId, command.id, attemptId, runner.id, options.backendProfile ?? null); + const backendProgress = startBackendProgress(); let stopSteerWatch: (() => void) | undefined; try { const latestRun = await api.getRun(options.runId); @@ -184,8 +184,8 @@ async function executeCommand(api: RunnerManagerApi, options: RunnerOnceOptions, return await reportCommandFailure(api, options.runId, command.id, runner, attemptId, failure, "runner:execute"); } finally { stopSteerWatch?.(); - stopBackendProgress(); - await appendBestEffort(api, options.runId, { type: "backend_status", payload: { phase: "backend-turn-finished", commandId: command.id, attemptId, runnerId: runner.id } }); + const progressSummary = backendProgress.stop(); + await appendBestEffort(api, options.runId, { type: "backend_status", payload: { phase: "backend-turn-finished", commandId: command.id, attemptId, runnerId: runner.id, ...progressSummary } }); stopCancelWatch(); } } @@ -307,20 +307,21 @@ function startHeartbeat(api: RunnerManagerApi, runId: string, runnerId: string, }; } -function startBackendProgress(api: RunnerManagerApi, runId: string, commandId: string, attemptId: string, runnerId: string, backendProfile: string | null): () => void { +function startBackendProgress(): { stop: () => JsonRecord } { let stopped = false; let ticks = 0; const startedAt = Date.now(); - const emit = async (): Promise => { + const tick = (): void => { if (stopped) return; ticks += 1; - await appendBestEffort(api, runId, { type: "backend_status", payload: { phase: "backend-turn-running", commandId, attemptId, runnerId, backendProfile, elapsedMs: Date.now() - startedAt, ticks } }); }; - const timer = setInterval(() => { void emit(); }, 10_000); - void emit(); - return () => { - stopped = true; - clearInterval(timer); + const timer = setInterval(tick, 10_000); + return { + stop: () => { + stopped = true; + clearInterval(timer); + return { elapsedMs: Date.now() - startedAt, progressTicks: ticks, progressEventsPrinted: false }; + }, }; } diff --git a/src/selftest/cases/30-codex-stdio.ts b/src/selftest/cases/30-codex-stdio.ts index eef8c67..416e937 100644 --- a/src/selftest/cases/30-codex-stdio.ts +++ b/src/selftest/cases/30-codex-stdio.ts @@ -125,6 +125,7 @@ const selfTest: SelfTestCase = async (context) => { assert.equal(noisyResult.terminalStatus, "completed", "noisy reasoning turn should complete"); const noisyEvents = await client.get(`/api/v1/runs/${noisy.runId}/events?afterSeq=0&limit=100`) as { items?: Array<{ type: string; payload: unknown }> }; const noisyItems = noisyEvents.items ?? []; + const noisyPhases = noisyItems.map((event) => eventPayload(event).phase).filter(Boolean); assert.equal(noisyItems.some((event) => event.type === "backend_status" && eventPayload(event).phase === "item/reasoning/textDelta"), false, "reasoning textDelta must not be persisted as backend_status"); assert.equal(noisyItems.some((event) => event.type === "backend_status" && eventPayload(event).phase === "thread/tokenUsage/updated"), false, "token usage update must not be persisted as backend_status"); assert.equal(noisyItems.some((event) => event.type === "backend_status" && eventPayload(event).phase === "account/rateLimits/updated"), false, "rate limit update must not be persisted as backend_status"); @@ -132,6 +133,10 @@ const selfTest: SelfTestCase = async (context) => { assert.equal(noisyItems.some((event) => event.type === "backend_status" && eventPayload(event).phase === "configWarning"), false, "low value config warnings must not be persisted as backend_status"); assert.equal(noisyItems.some((event) => event.type === "tool_call" && eventPayloadItem(event).type === "reasoning"), false, "reasoning items must not be persisted as tool_call"); assert.ok(noisyItems.some((event) => event.type === "tool_call" && eventPayload(event).method === "item/started" && eventPayloadItem(event).type === "commandExecution"), "real commandExecution tool call should remain visible"); + assert.equal(noisyItems.some((event) => event.type === "tool_call" && eventPayloadItem(event).type !== "commandExecution"), false, "non-commandExecution item lifecycle must not be persisted as tool_call"); + assert.equal(noisyPhases.includes("backend-turn-running"), false, "backend progress ticks must be summarized instead of persisted as durable trace events"); + const noisyFinished = noisyItems.find((event) => event.type === "backend_status" && eventPayload(event).phase === "backend-turn-finished"); + assert.equal(eventPayload(noisyFinished ?? { payload: {} }).progressEventsPrinted, false, "backend-turn-finished must declare progress ticks were not printed as events"); assert.ok(noisyItems.some((event) => event.type === "assistant_message" && eventPayload(event).text === "noise filtered final"), "final assistant_message should remain visible"); const suppression = noisyItems.find((event) => event.type === "backend_status" && eventPayload(event).phase === "codex-app-server-notifications-suppressed"); assert.ok(suppression, "suppression summary must be emitted when noisy notifications are filtered"); diff --git a/src/selftest/cases/50-hwlab-manual-dispatch.ts b/src/selftest/cases/50-hwlab-manual-dispatch.ts index 07a0dbd..8802265 100644 --- a/src/selftest/cases/50-hwlab-manual-dispatch.ts +++ b/src/selftest/cases/50-hwlab-manual-dispatch.ts @@ -123,9 +123,9 @@ console.log(JSON.stringify({ apiVersion: manifest.apiVersion, kind: manifest.kin assert.equal(multiEvents.filter((event) => event.type === "backend_status" && event.payload?.phase === "resource-bundle-materialized").length, 1); for (const commandId of [multiTurn.commandId, secondCommand.id]) { assert.ok(multiEvents.some((event) => event.type === "backend_status" && event.payload?.phase === "backend-turn-started" && event.payload?.commandId === commandId), `command ${commandId} must emit backend-turn-started before waiting on Codex`); - assert.ok(multiEvents.some((event) => event.type === "backend_status" && event.payload?.phase === "backend-turn-running" && event.payload?.commandId === commandId), `command ${commandId} must emit backend-turn-running while Codex is active`); assert.ok(multiEvents.some((event) => event.type === "backend_status" && event.payload?.phase === "backend-turn-finished" && event.payload?.commandId === commandId), `command ${commandId} must emit backend-turn-finished after Codex returns`); } + assert.equal(multiEvents.some((event) => event.type === "backend_status" && event.payload?.phase === "backend-turn-running"), false, "backend-turn-running ticks must not be persisted as durable trace events"); assert.equal(multiEvents.filter((event) => event.type === "backend_status" && event.payload?.phase === "command-terminal").length, 2); const secondEnvelope = await client.get(`/api/v1/runs/${multiTurn.runId}/commands/${secondCommand.id}/result`) as JsonRecord; assert.equal(secondEnvelope.terminalStatus, "completed");