From ebc5bdb8b1869b702bdfc1e8b5857876c695b44d Mon Sep 17 00:00:00 2001 From: Lyon <88232613+pikasTech@users.noreply.github.com> Date: Tue, 2 Jun 2026 10:28:35 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E6=94=B6=E6=95=9B=20commandExecution=20?= =?UTF-8?q?toolcall=20=E6=91=98=E8=A6=81=20(#70)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Codex --- docs/reference/spec-v01-backend-adapter.md | 2 ++ src/backend/codex-stdio.ts | 39 ++++++++++++++++++++-- src/common/events.ts | 18 ++++++++++ src/selftest/cases/30-codex-stdio.ts | 14 ++++++-- 4 files changed, 67 insertions(+), 6 deletions(-) diff --git a/docs/reference/spec-v01-backend-adapter.md b/docs/reference/spec-v01-backend-adapter.md index ac636c7..62f5d9a 100644 --- a/docs/reference/spec-v01-backend-adapter.md +++ b/docs/reference/spec-v01-backend-adapter.md @@ -71,6 +71,8 @@ Adapter 输出给 runner 的 event 类型至少包括: Codex app-server 的低价值内部 notification 必须在 AgentRun adapter 层收敛,不得要求 HWLAB Web/CLI 或其他消费侧自行过滤。以下事件默认不作为 durable trace event 持久化:`item/reasoning/textDelta`、纯 `reasoning` item 的 `item/started|item/completed`、非 `commandExecution` item 的通用 `item/started|item/completed`、`thread/tokenUsage/updated`、`account/rateLimits/updated`、普通 `warning` 和 `configWarning`。adapter 可以输出一条有界 `backend_status.phase=codex-app-server-notifications-suppressed` 摘要,只包含计数、method 和 item type,不包含 reasoning 文本、Secret、token 或 env value。真实 `agentMessage`、`commandExecution`、`command_output`、error、terminal 和关键生命周期事件必须继续保留。 +`commandExecution` 的 `tool_call` event 只能输出面向人和消费侧的扁平字段,例如 `method`、`itemId`、`toolName`、`type`、`command`、`cwd`、`status`、`processId` 和 `valuesPrinted=false`。不得把 Codex app-server 的原始 `item` JSON、`itemPreview` 或嵌套协议摘要写入 `message`、`outputSummary`、`stdoutSummary` 或 payload;命令实际 stdout/stderr 只通过 `command_output` 或 completed `commandExecution` 摘要输出。 + ## Failure Mapping Adapter 必须把 backend 错误映射为稳定 failureKind: diff --git a/src/backend/codex-stdio.ts b/src/backend/codex-stdio.ts index 14a2504..a8add20 100644 --- a/src/backend/codex-stdio.ts +++ b/src/backend/codex-stdio.ts @@ -695,9 +695,42 @@ function terminalStatusFromValue(value: unknown): TerminalStatus { function toolCallPayload(method: string, item: JsonRecord): JsonRecord { const redacted = redactJson(item); - const summary = boundedTextSummary(JSON.stringify(redacted)); - if (summary.outputTruncated !== true) return { method, item: redacted, summary, outputBytes: summary.outputBytes, outputTruncated: false }; - return { method, itemPreview: summary.text, summary, outputBytes: summary.outputBytes, outputTruncated: true }; + const itemId = typeof redacted.id === "string" ? redacted.id : null; + const itemType = typeof redacted.type === "string" ? redacted.type : "unknown"; + const command = typeof redacted.command === "string" ? redacted.command : null; + const cwd = typeof redacted.cwd === "string" ? redacted.cwd : null; + const status = typeof redacted.status === "string" ? redacted.status : null; + const processId = typeof redacted.processId === "string" || typeof redacted.processId === "number" ? String(redacted.processId) : null; + const exitCode = typeof redacted.exitCode === "number" ? redacted.exitCode : null; + const durationMs = typeof redacted.durationMs === "number" ? redacted.durationMs : null; + const outputSummary = toolCallOutputSummary(redacted); + return { + method, + itemId, + type: itemType, + toolName: itemType, + ...(command ? { command } : {}), + ...(cwd ? { cwd } : {}), + ...(status ? { status } : {}), + ...(processId ? { processId } : {}), + ...(exitCode !== null ? { exitCode } : {}), + ...(durationMs !== null ? { durationMs } : {}), + ...(outputSummary ? { outputSummary } : {}), + valuesPrinted: false, + }; +} + +function toolCallOutputSummary(item: JsonRecord): string | null { + const direct = item.outputSummary ?? item.stdoutSummary ?? item.message; + if (typeof direct === "string" && direct.trim().length > 0) return String(boundedTextSummary(direct).text); + const summary = item.summary; + if (typeof summary === "object" && summary !== null && !Array.isArray(summary) && typeof (summary as JsonRecord).text === "string") { + const text = String((summary as JsonRecord).text); + if (text.trim().length > 0) return String(boundedTextSummary(text).text); + } + const aggregated = item.aggregatedOutput; + if (typeof aggregated === "string" && aggregated.trim().length > 0) return String(boundedTextSummary(aggregated).text); + return null; } function withOptionalModel(params: JsonRecord, model: string | undefined): JsonRecord { diff --git a/src/common/events.ts b/src/common/events.ts index 8aa1ddb..ecfedcb 100644 --- a/src/common/events.ts +++ b/src/common/events.ts @@ -78,6 +78,10 @@ function normalizeTextPayload(payload: JsonRecord): JsonRecord { function normalizeToolCallPayload(payload: JsonRecord): JsonRecord { const redacted = redactJson(payload); + if (isCommandExecutionToolCall(redacted)) { + const summary = boundedTextSummary(commandExecutionToolCallText(redacted)); + return { ...redacted, summary, outputBytes: summary.outputBytes, outputTruncated: summary.outputTruncated }; + } const json = JSON.stringify(redacted); const summary = boundedTextSummary(json); if (summary.outputTruncated !== true) return { ...redacted, summary, outputBytes: summary.outputBytes, outputTruncated: false }; @@ -89,3 +93,17 @@ function normalizeToolCallPayload(payload: JsonRecord): JsonRecord { outputTruncated: true, }; } + +function isCommandExecutionToolCall(payload: JsonRecord): boolean { + return payload.toolName === "commandExecution" || payload.type === "commandExecution"; +} + +function commandExecutionToolCallText(payload: JsonRecord): string { + const method = typeof payload.method === "string" ? payload.method.replace(/^item\//, "") : "tool"; + const status = typeof payload.status === "string" ? payload.status : method; + const command = typeof payload.command === "string" ? payload.command : "commandExecution"; + const exitCode = typeof payload.exitCode === "number" ? ` exit=${payload.exitCode}` : ""; + const durationMs = typeof payload.durationMs === "number" ? ` durationMs=${payload.durationMs}` : ""; + const outputSummary = typeof payload.outputSummary === "string" && payload.outputSummary.trim().length > 0 ? ` output=${payload.outputSummary}` : ""; + return `commandExecution ${status}: ${command}${exitCode}${durationMs}${outputSummary}`; +} diff --git a/src/selftest/cases/30-codex-stdio.ts b/src/selftest/cases/30-codex-stdio.ts index 70752ee..4a3f92b 100644 --- a/src/selftest/cases/30-codex-stdio.ts +++ b/src/selftest/cases/30-codex-stdio.ts @@ -117,6 +117,14 @@ const selfTest: SelfTestCase = async (context) => { const live = await createRunWithCommand(client, context, "hello live events", "selftest-live-tool-events", 15_000); const livePromise = runOnce({ managerUrl: server.baseUrl, runId: live.runId, codexCommand: context.fakeCodexCommand, codexArgs: context.fakeCodexArgs, codexHome: context.codexHome, env: { CODEX_HOME: context.codexHome, AGENTRUN_FAKE_CODEX_MODE: "slow-tool-events" }, oneShot: true }) as Promise; await waitForEvent(client, live.runId, (event) => event.type === "tool_call" && eventPayload(event).method === "item/started", "live tool_call start event"); + const liveEvents = await client.get(`/api/v1/runs/${live.runId}/events?afterSeq=0&limit=100`) as { items?: Array<{ type: string; payload: unknown }> }; + const liveToolStart = (liveEvents.items ?? []).find((event) => event.type === "tool_call" && eventPayload(event).method === "item/started") ?? { payload: {} }; + assert.equal(eventPayload(liveToolStart).item, undefined, "tool_call started event must not persist raw Codex item JSON"); + assert.equal(eventPayload(liveToolStart).itemPreview, undefined, "tool_call started event must not persist raw Codex item preview"); + assert.equal(JSON.stringify(eventPayload(liveToolStart).summary ?? {}).includes("\\\"method\\\":\\\"item/started\\\""), false, "tool_call started event summary must not embed raw protocol JSON"); + assert.equal(String(eventPayload(liveToolStart).summary ? (eventPayload(liveToolStart).summary as JsonRecord).text ?? "" : "").includes("commandExecution started:"), true, "tool_call started event summary should be human readable"); + assert.equal(eventPayload(liveToolStart).toolName, "commandExecution"); + assert.equal(eventPayload(liveToolStart).type, "commandExecution"); await waitForEvent(client, live.runId, (event) => event.type === "command_output" && String(eventPayload(event).text ?? "").includes("live output"), "live command output event"); const liveResult = await livePromise; assert.equal(liveResult.terminalStatus, "completed", "slow live tool event turn should complete"); @@ -132,9 +140,9 @@ const selfTest: SelfTestCase = async (context) => { assert.equal(noisyItems.some((event) => event.type === "backend_status" && eventPayload(event).phase === "account/rateLimits/updated"), false, "rate limit update must not be persisted as backend_status"); assert.equal(noisyItems.some((event) => event.type === "backend_status" && eventPayload(event).phase === "warning"), false, "low value warnings must not be persisted as backend_status"); assert.equal(noisyItems.some((event) => event.type === "backend_status" && eventPayload(event).phase === "configWarning"), false, "low value config warnings must not be persisted as backend_status"); - assert.equal(noisyItems.some((event) => event.type === "tool_call" && eventPayloadItem(event).type === "reasoning"), false, "reasoning items must not be persisted as tool_call"); - assert.ok(noisyItems.some((event) => event.type === "tool_call" && eventPayload(event).method === "item/started" && eventPayloadItem(event).type === "commandExecution"), "real commandExecution tool call should remain visible"); - assert.equal(noisyItems.some((event) => event.type === "tool_call" && eventPayloadItem(event).type !== "commandExecution"), false, "non-commandExecution item lifecycle must not be persisted as tool_call"); + assert.equal(noisyItems.some((event) => event.type === "tool_call" && eventPayload(event).type === "reasoning"), false, "reasoning items must not be persisted as tool_call"); + assert.ok(noisyItems.some((event) => event.type === "tool_call" && eventPayload(event).method === "item/started" && eventPayload(event).type === "commandExecution"), "real commandExecution tool call should remain visible"); + assert.equal(noisyItems.some((event) => event.type === "tool_call" && eventPayload(event).type !== "commandExecution"), false, "non-commandExecution item lifecycle must not be persisted as tool_call"); assert.equal(noisyPhases.includes("backend-turn-running"), false, "backend progress ticks must be summarized instead of persisted as durable trace events"); const noisyFinished = noisyItems.find((event) => event.type === "backend_status" && eventPayload(event).phase === "backend-turn-finished"); assert.equal(eventPayload(noisyFinished ?? { payload: {} }).progressEventsPrinted, false, "backend-turn-finished must declare progress ticks were not printed as events");