From b94b9b20279377c5dc44058feadda198a5fff757 Mon Sep 17 00:00:00 2001 From: lyon Date: Mon, 15 Jun 2026 23:49:00 +0800 Subject: [PATCH] fix: expose mcp tool calls in trace events --- docs/reference/spec-v01-backend-adapter.md | 4 +- src/backend/codex-stdio.ts | 50 ++++++++++++++++++++-- src/selftest/cases/30-codex-stdio.ts | 23 +++++++++- src/selftest/fake-codex-app-server.ts | 11 +++++ 4 files changed, 81 insertions(+), 7 deletions(-) diff --git a/docs/reference/spec-v01-backend-adapter.md b/docs/reference/spec-v01-backend-adapter.md index 0511b6f..bd751d2 100644 --- a/docs/reference/spec-v01-backend-adapter.md +++ b/docs/reference/spec-v01-backend-adapter.md @@ -70,9 +70,9 @@ Adapter 输出给 runner 的 event 类型至少包括: 事件必须有上限和分页友好形态。大型日志、完整 stdout 或完整 trace 应进入 logPath 或后续 artifact,不得一次性塞入单个 event 造成输出爆炸。 -Codex app-server 的低价值内部 notification 必须在 AgentRun adapter 层收敛,不得要求 HWLAB Web/CLI 或其他消费侧自行过滤。以下事件默认不作为 durable trace event 持久化:`item/reasoning/textDelta`、纯 `reasoning` item 的 `item/started|item/completed`、非用户可见工具 item 的通用 `item/started|item/completed`、`thread/tokenUsage/updated`、`account/rateLimits/updated`、普通 `warning` 和 `configWarning`。adapter 可以输出一条有界 `backend_status.phase=codex-app-server-notifications-suppressed` 摘要,只包含总数、`methods: [{ method, count }]` 和 `itemTypes: [{ itemType, count }]`,不包含 reasoning 文本、Secret、token 或 env value。method 和 item type 不得作为 JSON object key 输出,避免 `thread/tokenUsage/updated` 这类协议名被 redaction 误判为敏感 key。真实 `agentMessage`、`commandExecution`、`webSearch`、`command_output`、error、terminal 和关键生命周期事件必须继续保留。 +Codex app-server 的低价值内部 notification 必须在 AgentRun adapter 层收敛,不得要求 HWLAB Web/CLI 或其他消费侧自行过滤。以下事件默认不作为 durable trace event 持久化:`item/reasoning/textDelta`、纯 `reasoning` item 的 `item/started|item/completed`、非用户可见工具 item 的通用 `item/started|item/completed`、`thread/tokenUsage/updated`、`account/rateLimits/updated`、普通 `warning` 和 `configWarning`。adapter 可以输出一条有界 `backend_status.phase=codex-app-server-notifications-suppressed` 摘要,只包含总数、`methods: [{ method, count }]` 和 `itemTypes: [{ itemType, count }]`,不包含 reasoning 文本、Secret、token 或 env value。method 和 item type 不得作为 JSON object key 输出,避免 `thread/tokenUsage/updated` 这类协议名被 redaction 误判为敏感 key。真实 `agentMessage`、`commandExecution`、`webSearch`、`mcpToolCall`、`dynamicToolCall`、`command_output`、error、terminal 和关键生命周期事件必须继续保留。 -用户可见工具生命周期的 `tool_call` event 只能输出面向人和消费侧的扁平字段,例如 `method`、`itemId`、`toolName`、`type`、`command`、`cwd`、`status`、`processId` 和 `valuesPrinted=false`。当前可见工具类型包括 `commandExecution` 和 `webSearch`;不得把 Codex app-server 的原始 `item` JSON、`itemPreview` 或嵌套协议摘要写入 `message`、`outputSummary`、`stdoutSummary` 或 payload;命令实际 stdout/stderr 只通过 `command_output` 或 completed `commandExecution` 摘要输出。 +用户可见工具生命周期的 `tool_call` event 只能输出面向人和消费侧的扁平字段,例如 `method`、`itemId`、`toolName`、`type`、`command`、`cwd`、`status`、`processId` 和 `valuesPrinted=false`。当前可见工具类型包括 `commandExecution`、`webSearch`、`mcpToolCall` 和 `dynamicToolCall`;`mcpToolCall` / `dynamicToolCall` 的 `command` 必须是工具名加 redacted 参数摘要,便于 HWLAB Trace 单行展示调用意图。不得把 Codex app-server 的原始 `item` JSON、`itemPreview` 或嵌套协议摘要写入 `message`、`outputSummary`、`stdoutSummary` 或 payload;命令实际 stdout/stderr 只通过 `command_output` 或 completed `commandExecution` 摘要输出。 ## Failure Mapping diff --git a/src/backend/codex-stdio.ts b/src/backend/codex-stdio.ts index 3ea5a8b..a4a1a4f 100644 --- a/src/backend/codex-stdio.ts +++ b/src/backend/codex-stdio.ts @@ -849,7 +849,7 @@ function isSuppressedCodexStatusNotification(method: string): boolean { } function isVisibleCodexToolItemType(itemType: string): boolean { - return itemType === "commandExecution" || itemType === "webSearch"; + return itemType === "commandExecution" || itemType === "webSearch" || itemType === "mcpToolCall" || itemType === "dynamicToolCall"; } function assistantMessageEventForCompleted(message: CompletedAssistantMessage, messageIndex: number): BackendEvent { @@ -944,9 +944,10 @@ function toolCallPayload(method: string, item: JsonRecord): JsonRecord { const redacted = redactJson(item); const itemId = typeof redacted.id === "string" ? redacted.id : null; const itemType = typeof redacted.type === "string" ? redacted.type : "unknown"; - const command = typeof redacted.command === "string" ? redacted.command : null; + const toolName = toolCallName(redacted, itemType); + const command = toolCallCommandSummary(redacted, itemType, toolName); const cwd = typeof redacted.cwd === "string" ? redacted.cwd : null; - const status = typeof redacted.status === "string" ? redacted.status : null; + const status = toolCallStatus(method, redacted); const processId = typeof redacted.processId === "string" || typeof redacted.processId === "number" ? String(redacted.processId) : null; const exitCode = typeof redacted.exitCode === "number" ? redacted.exitCode : null; const durationMs = typeof redacted.durationMs === "number" ? redacted.durationMs : null; @@ -955,7 +956,7 @@ function toolCallPayload(method: string, item: JsonRecord): JsonRecord { method, itemId, type: itemType, - toolName: itemType, + toolName, ...(command ? { command } : {}), ...(cwd ? { cwd } : {}), ...(status ? { status } : {}), @@ -967,6 +968,47 @@ function toolCallPayload(method: string, item: JsonRecord): JsonRecord { }; } +function toolCallStatus(method: string, item: JsonRecord): string | null { + if (typeof item.status === "string" && item.status.trim().length > 0) return item.status; + if (method === "item/started") return "started"; + if (method === "item/completed") return "completed"; + return null; +} + +function toolCallName(item: JsonRecord, itemType: string): string { + const direct = firstToolCallString(item, ["toolName", "name", "tool", "functionName"]); + const server = firstToolCallString(item, ["serverName", "server", "mcpServer"]); + if (server && direct && !direct.includes(server)) return `${server}.${direct}`; + return direct ?? itemType; +} + +function toolCallCommandSummary(item: JsonRecord, itemType: string, toolName: string): string | null { + const direct = typeof item.command === "string" && item.command.trim().length > 0 ? item.command : null; + if (direct) return direct; + if (itemType !== "mcpToolCall" && itemType !== "dynamicToolCall") return null; + const input = toolCallInputSummary(item); + return input ? `${toolName} ${input}` : toolName; +} + +function toolCallInputSummary(item: JsonRecord): string | null { + for (const key of ["arguments", "args", "input", "params", "parameters"] as const) { + if (!Object.prototype.hasOwnProperty.call(item, key)) continue; + const value = item[key]; + if (value === null || value === undefined) continue; + const text = typeof value === "string" ? value : JSON.stringify(value); + if (typeof text === "string" && text.trim().length > 0 && text.trim() !== "{}") return String(boundedTextSummary(text, { limitChars: 600 }).text); + } + return null; +} + +function firstToolCallString(item: JsonRecord, keys: readonly string[]): string | null { + for (const key of keys) { + const value = item[key]; + if (typeof value === "string" && value.trim().length > 0) return value; + } + return null; +} + function toolCallOutputSummary(item: JsonRecord): string | null { const direct = item.outputSummary ?? item.stdoutSummary ?? item.message; if (typeof direct === "string" && direct.trim().length > 0) return String(boundedTextSummary(direct).text); diff --git a/src/selftest/cases/30-codex-stdio.ts b/src/selftest/cases/30-codex-stdio.ts index 49a278f..21c9ad0 100644 --- a/src/selftest/cases/30-codex-stdio.ts +++ b/src/selftest/cases/30-codex-stdio.ts @@ -140,6 +140,23 @@ const selfTest: SelfTestCase = async (context) => { assert.equal(webSearchItems.some((event) => event.type === "tool_call" && eventPayload(event).type === "reasoning"), false, "reasoning items must still not be persisted as tool_call"); assertNoSecretLeak(webSearchEvents); + const mcpTool = await createRunWithCommand(client, context, "hello mcp paper search", "selftest-mcp-tool-call", 15_000); + const mcpToolResult = await runOnce({ managerUrl: server.baseUrl, runId: mcpTool.runId, codexCommand: context.fakeCodexCommand, codexArgs: context.fakeCodexArgs, codexHome: context.codexHome, env: { CODEX_HOME: context.codexHome, AGENTRUN_FAKE_CODEX_MODE: "mcp-tool-call" }, oneShot: true }) as JsonRecord; + assert.equal(mcpToolResult.terminalStatus, "completed", "MCP tool call turn should complete"); + const mcpToolEvents = await client.get(`/api/v1/runs/${mcpTool.runId}/events?afterSeq=0&limit=100`) as { items?: Array<{ type: string; payload: unknown }> }; + const mcpToolItems = mcpToolEvents.items ?? []; + const mcpStarted = mcpToolItems.find((event) => event.type === "tool_call" && eventPayload(event).type === "mcpToolCall" && eventPayload(event).method === "item/started"); + const mcpCompleted = mcpToolItems.find((event) => event.type === "tool_call" && eventPayload(event).type === "mcpToolCall" && eventPayload(event).method === "item/completed"); + assert.ok(mcpStarted, "mcpToolCall start must remain visible as tool_call"); + assert.ok(mcpCompleted, "mcpToolCall completion must remain visible as tool_call"); + assert.equal(eventPayload(mcpStarted ?? { payload: {} }).toolName, "mcp__codex_apps__scispace__search_papers"); + assert.match(String(eventPayload(mcpStarted ?? { payload: {} }).command ?? ""), /large language models/u, "MCP tool command summary should include redacted call arguments"); + assert.equal(eventPayload(mcpStarted ?? { payload: {} }).item, undefined, "mcpToolCall event must not persist raw Codex item JSON"); + assert.equal(eventPayload(mcpStarted ?? { payload: {} }).itemPreview, undefined, "mcpToolCall event must not persist raw Codex item preview"); + assert.ok(mcpToolItems.some((event) => event.type === "assistant_message" && eventPayload(event).text === "Paper search tool completed."), "MCP final assistant message should remain visible"); + assert.equal(mcpToolItems.some((event) => event.type === "backend_status" && JSON.stringify(eventPayload(event).itemTypes ?? []).includes("mcpToolCall")), false, "mcpToolCall must not be counted as a suppressed notification"); + assertNoSecretLeak(mcpToolEvents); + const staleThread = await createStaleThreadRun(client, context); const staleThreadResult = await runOnce({ managerUrl: server.baseUrl, @@ -197,7 +214,7 @@ const selfTest: SelfTestCase = async (context) => { assert.equal(noisyItems.some((event) => event.type === "backend_status" && eventPayload(event).phase === "configWarning"), false, "low value config warnings must not be persisted as backend_status"); assert.equal(noisyItems.some((event) => event.type === "tool_call" && eventPayload(event).type === "reasoning"), false, "reasoning items must not be persisted as tool_call"); assert.ok(noisyItems.some((event) => event.type === "tool_call" && eventPayload(event).method === "item/started" && eventPayload(event).type === "commandExecution"), "real commandExecution tool call should remain visible"); - assert.equal(noisyItems.some((event) => event.type === "tool_call" && eventPayload(event).type !== "commandExecution" && eventPayload(event).type !== "webSearch"), false, "only user-visible tool lifecycle items should be persisted as tool_call"); + assert.equal(noisyItems.some((event) => event.type === "tool_call" && !isVisibleToolType(String(eventPayload(event).type ?? ""))), false, "only user-visible tool lifecycle items should be persisted as tool_call"); assert.equal(noisyItems.some((event) => event.type === "backend_status" && String(eventPayload(event).phase ?? "").startsWith("item/agentMessage:")), false, "agentMessage lifecycle must not be persisted as backend_status noise"); assert.equal(noisyPhases.includes("backend-turn-running"), false, "backend progress ticks must be summarized instead of persisted as durable trace events"); const noisyFinished = noisyItems.find((event) => event.type === "backend_status" && eventPayload(event).phase === "backend-turn-finished"); @@ -461,6 +478,10 @@ function eventPayload(event: { payload: unknown }): JsonRecord { return typeof event.payload === "object" && event.payload !== null && !Array.isArray(event.payload) ? event.payload as JsonRecord : {}; } +function isVisibleToolType(value: string): boolean { + return value === "commandExecution" || value === "webSearch" || value === "mcpToolCall" || value === "dynamicToolCall"; +} + function countEntriesByName(value: unknown, keyName: "method" | "itemType"): Record { const output: Record = {}; if (!Array.isArray(value)) return output; diff --git a/src/selftest/fake-codex-app-server.ts b/src/selftest/fake-codex-app-server.ts index 24771ea..6749732 100644 --- a/src/selftest/fake-codex-app-server.ts +++ b/src/selftest/fake-codex-app-server.ts @@ -239,6 +239,17 @@ for await (const line of rl) { respond(message.id, { turn }); continue; } + if (mode === "mcp-tool-call") { + turnCounter += 1; + const turn = { id: `turn_selftest_${turnCounter}`, status: "completed" }; + notify("turn/started", { turn }); + notify("item/started", { item: { id: "mcp_search_selftest", type: "mcpToolCall", name: "mcp__codex_apps__scispace__search_papers", arguments: { searchTerm: "large language models" } } }); + notify("item/completed", { item: { id: "mcp_search_selftest", type: "mcpToolCall", name: "mcp__codex_apps__scispace__search_papers", status: "completed", arguments: { searchTerm: "large language models" }, outputSummary: "returned paper titles and abstracts" } }); + notify("item/completed", { item: { id: "msg_mcp_search", type: "agentMessage", text: "Paper search tool completed." } }); + notify("turn/completed", { turn }); + respond(message.id, { turn }); + continue; + } if (mode === "slow-tool-events") { turnCounter += 1; const turn = { id: `turn_selftest_${turnCounter}`, status: "completed" };