From b94b9b20279377c5dc44058feadda198a5fff757 Mon Sep 17 00:00:00 2001
From: lyon <liang6516@outlook.com>
Date: Mon, 15 Jun 2026 23:49:00 +0800
Subject: [PATCH] fix: expose mcp tool calls in trace events

---
 docs/reference/spec-v01-backend-adapter.md |  4 +-
 src/backend/codex-stdio.ts                 | 50 ++++++++++++++++++++--
 src/selftest/cases/30-codex-stdio.ts       | 23 +++++++++-
 src/selftest/fake-codex-app-server.ts      | 11 +++++
 4 files changed, 81 insertions(+), 7 deletions(-)

diff --git a/docs/reference/spec-v01-backend-adapter.md b/docs/reference/spec-v01-backend-adapter.md
index 0511b6f..bd751d2 100644
--- a/docs/reference/spec-v01-backend-adapter.md
+++ b/docs/reference/spec-v01-backend-adapter.md
@@ -70,9 +70,9 @@ Adapter 输出给 runner 的 event 类型至少包括：
 
 事件必须有上限和分页友好形态。大型日志、完整 stdout 或完整 trace 应进入 logPath 或后续 artifact，不得一次性塞入单个 event 造成输出爆炸。
 
-Codex app-server 的低价值内部 notification 必须在 AgentRun adapter 层收敛，不得要求 HWLAB Web/CLI 或其他消费侧自行过滤。以下事件默认不作为 durable trace event 持久化：`item/reasoning/textDelta`、纯 `reasoning` item 的 `item/started|item/completed`、非用户可见工具 item 的通用 `item/started|item/completed`、`thread/tokenUsage/updated`、`account/rateLimits/updated`、普通 `warning` 和 `configWarning`。adapter 可以输出一条有界 `backend_status.phase=codex-app-server-notifications-suppressed` 摘要，只包含总数、`methods: [{ method, count }]` 和 `itemTypes: [{ itemType, count }]`，不包含 reasoning 文本、Secret、token 或 env value。method 和 item type 不得作为 JSON object key 输出，避免 `thread/tokenUsage/updated` 这类协议名被 redaction 误判为敏感 key。真实 `agentMessage`、`commandExecution`、`webSearch`、`command_output`、error、terminal 和关键生命周期事件必须继续保留。
+Codex app-server 的低价值内部 notification 必须在 AgentRun adapter 层收敛，不得要求 HWLAB Web/CLI 或其他消费侧自行过滤。以下事件默认不作为 durable trace event 持久化：`item/reasoning/textDelta`、纯 `reasoning` item 的 `item/started|item/completed`、非用户可见工具 item 的通用 `item/started|item/completed`、`thread/tokenUsage/updated`、`account/rateLimits/updated`、普通 `warning` 和 `configWarning`。adapter 可以输出一条有界 `backend_status.phase=codex-app-server-notifications-suppressed` 摘要，只包含总数、`methods: [{ method, count }]` 和 `itemTypes: [{ itemType, count }]`，不包含 reasoning 文本、Secret、token 或 env value。method 和 item type 不得作为 JSON object key 输出，避免 `thread/tokenUsage/updated` 这类协议名被 redaction 误判为敏感 key。真实 `agentMessage`、`commandExecution`、`webSearch`、`mcpToolCall`、`dynamicToolCall`、`command_output`、error、terminal 和关键生命周期事件必须继续保留。
 
-用户可见工具生命周期的 `tool_call` event 只能输出面向人和消费侧的扁平字段，例如 `method`、`itemId`、`toolName`、`type`、`command`、`cwd`、`status`、`processId` 和 `valuesPrinted=false`。当前可见工具类型包括 `commandExecution` 和 `webSearch`；不得把 Codex app-server 的原始 `item` JSON、`itemPreview` 或嵌套协议摘要写入 `message`、`outputSummary`、`stdoutSummary` 或 payload；命令实际 stdout/stderr 只通过 `command_output` 或 completed `commandExecution` 摘要输出。
+用户可见工具生命周期的 `tool_call` event 只能输出面向人和消费侧的扁平字段，例如 `method`、`itemId`、`toolName`、`type`、`command`、`cwd`、`status`、`processId` 和 `valuesPrinted=false`。当前可见工具类型包括 `commandExecution`、`webSearch`、`mcpToolCall` 和 `dynamicToolCall`；`mcpToolCall` / `dynamicToolCall` 的 `command` 必须是工具名加 redacted 参数摘要，便于 HWLAB Trace 单行展示调用意图。不得把 Codex app-server 的原始 `item` JSON、`itemPreview` 或嵌套协议摘要写入 `message`、`outputSummary`、`stdoutSummary` 或 payload；命令实际 stdout/stderr 只通过 `command_output` 或 completed `commandExecution` 摘要输出。
 
 ## Failure Mapping
 
diff --git a/src/backend/codex-stdio.ts b/src/backend/codex-stdio.ts
index 3ea5a8b..a4a1a4f 100644
--- a/src/backend/codex-stdio.ts
+++ b/src/backend/codex-stdio.ts
@@ -849,7 +849,7 @@ function isSuppressedCodexStatusNotification(method: string): boolean {
 }
 
 function isVisibleCodexToolItemType(itemType: string): boolean {
-  return itemType === "commandExecution" || itemType === "webSearch";
+  return itemType === "commandExecution" || itemType === "webSearch" || itemType === "mcpToolCall" || itemType === "dynamicToolCall";
 }
 
 function assistantMessageEventForCompleted(message: CompletedAssistantMessage, messageIndex: number): BackendEvent {
@@ -944,9 +944,10 @@ function toolCallPayload(method: string, item: JsonRecord): JsonRecord {
   const redacted = redactJson(item);
   const itemId = typeof redacted.id === "string" ? redacted.id : null;
   const itemType = typeof redacted.type === "string" ? redacted.type : "unknown";
-  const command = typeof redacted.command === "string" ? redacted.command : null;
+  const toolName = toolCallName(redacted, itemType);
+  const command = toolCallCommandSummary(redacted, itemType, toolName);
   const cwd = typeof redacted.cwd === "string" ? redacted.cwd : null;
-  const status = typeof redacted.status === "string" ? redacted.status : null;
+  const status = toolCallStatus(method, redacted);
   const processId = typeof redacted.processId === "string" || typeof redacted.processId === "number" ? String(redacted.processId) : null;
   const exitCode = typeof redacted.exitCode === "number" ? redacted.exitCode : null;
   const durationMs = typeof redacted.durationMs === "number" ? redacted.durationMs : null;
@@ -955,7 +956,7 @@ function toolCallPayload(method: string, item: JsonRecord): JsonRecord {
     method,
     itemId,
     type: itemType,
-    toolName: itemType,
+    toolName,
     ...(command ? { command } : {}),
     ...(cwd ? { cwd } : {}),
     ...(status ? { status } : {}),
@@ -967,6 +968,47 @@ function toolCallPayload(method: string, item: JsonRecord): JsonRecord {
   };
 }
 
+function toolCallStatus(method: string, item: JsonRecord): string | null {
+  if (typeof item.status === "string" && item.status.trim().length > 0) return item.status;
+  if (method === "item/started") return "started";
+  if (method === "item/completed") return "completed";
+  return null;
+}
+
+function toolCallName(item: JsonRecord, itemType: string): string {
+  const direct = firstToolCallString(item, ["toolName", "name", "tool", "functionName"]);
+  const server = firstToolCallString(item, ["serverName", "server", "mcpServer"]);
+  if (server && direct && !direct.includes(server)) return `${server}.${direct}`;
+  return direct ?? itemType;
+}
+
+function toolCallCommandSummary(item: JsonRecord, itemType: string, toolName: string): string | null {
+  const direct = typeof item.command === "string" && item.command.trim().length > 0 ? item.command : null;
+  if (direct) return direct;
+  if (itemType !== "mcpToolCall" && itemType !== "dynamicToolCall") return null;
+  const input = toolCallInputSummary(item);
+  return input ? `${toolName} ${input}` : toolName;
+}
+
+function toolCallInputSummary(item: JsonRecord): string | null {
+  for (const key of ["arguments", "args", "input", "params", "parameters"] as const) {
+    if (!Object.prototype.hasOwnProperty.call(item, key)) continue;
+    const value = item[key];
+    if (value === null || value === undefined) continue;
+    const text = typeof value === "string" ? value : JSON.stringify(value);
+    if (typeof text === "string" && text.trim().length > 0 && text.trim() !== "{}") return String(boundedTextSummary(text, { limitChars: 600 }).text);
+  }
+  return null;
+}
+
+function firstToolCallString(item: JsonRecord, keys: readonly string[]): string | null {
+  for (const key of keys) {
+    const value = item[key];
+    if (typeof value === "string" && value.trim().length > 0) return value;
+  }
+  return null;
+}
+
 function toolCallOutputSummary(item: JsonRecord): string | null {
   const direct = item.outputSummary ?? item.stdoutSummary ?? item.message;
   if (typeof direct === "string" && direct.trim().length > 0) return String(boundedTextSummary(direct).text);
diff --git a/src/selftest/cases/30-codex-stdio.ts b/src/selftest/cases/30-codex-stdio.ts
index 49a278f..21c9ad0 100644
--- a/src/selftest/cases/30-codex-stdio.ts
+++ b/src/selftest/cases/30-codex-stdio.ts
@@ -140,6 +140,23 @@ const selfTest: SelfTestCase = async (context) => {
     assert.equal(webSearchItems.some((event) => event.type === "tool_call" && eventPayload(event).type === "reasoning"), false, "reasoning items must still not be persisted as tool_call");
     assertNoSecretLeak(webSearchEvents);
 
+    const mcpTool = await createRunWithCommand(client, context, "hello mcp paper search", "selftest-mcp-tool-call", 15_000);
+    const mcpToolResult = await runOnce({ managerUrl: server.baseUrl, runId: mcpTool.runId, codexCommand: context.fakeCodexCommand, codexArgs: context.fakeCodexArgs, codexHome: context.codexHome, env: { CODEX_HOME: context.codexHome, AGENTRUN_FAKE_CODEX_MODE: "mcp-tool-call" }, oneShot: true }) as JsonRecord;
+    assert.equal(mcpToolResult.terminalStatus, "completed", "MCP tool call turn should complete");
+    const mcpToolEvents = await client.get(`/api/v1/runs/${mcpTool.runId}/events?afterSeq=0&limit=100`) as { items?: Array<{ type: string; payload: unknown }> };
+    const mcpToolItems = mcpToolEvents.items ?? [];
+    const mcpStarted = mcpToolItems.find((event) => event.type === "tool_call" && eventPayload(event).type === "mcpToolCall" && eventPayload(event).method === "item/started");
+    const mcpCompleted = mcpToolItems.find((event) => event.type === "tool_call" && eventPayload(event).type === "mcpToolCall" && eventPayload(event).method === "item/completed");
+    assert.ok(mcpStarted, "mcpToolCall start must remain visible as tool_call");
+    assert.ok(mcpCompleted, "mcpToolCall completion must remain visible as tool_call");
+    assert.equal(eventPayload(mcpStarted ?? { payload: {} }).toolName, "mcp__codex_apps__scispace__search_papers");
+    assert.match(String(eventPayload(mcpStarted ?? { payload: {} }).command ?? ""), /large language models/u, "MCP tool command summary should include redacted call arguments");
+    assert.equal(eventPayload(mcpStarted ?? { payload: {} }).item, undefined, "mcpToolCall event must not persist raw Codex item JSON");
+    assert.equal(eventPayload(mcpStarted ?? { payload: {} }).itemPreview, undefined, "mcpToolCall event must not persist raw Codex item preview");
+    assert.ok(mcpToolItems.some((event) => event.type === "assistant_message" && eventPayload(event).text === "Paper search tool completed."), "MCP final assistant message should remain visible");
+    assert.equal(mcpToolItems.some((event) => event.type === "backend_status" && JSON.stringify(eventPayload(event).itemTypes ?? []).includes("mcpToolCall")), false, "mcpToolCall must not be counted as a suppressed notification");
+    assertNoSecretLeak(mcpToolEvents);
+
     const staleThread = await createStaleThreadRun(client, context);
     const staleThreadResult = await runOnce({
       managerUrl: server.baseUrl,
@@ -197,7 +214,7 @@ const selfTest: SelfTestCase = async (context) => {
     assert.equal(noisyItems.some((event) => event.type === "backend_status" && eventPayload(event).phase === "configWarning"), false, "low value config warnings must not be persisted as backend_status");
     assert.equal(noisyItems.some((event) => event.type === "tool_call" && eventPayload(event).type === "reasoning"), false, "reasoning items must not be persisted as tool_call");
     assert.ok(noisyItems.some((event) => event.type === "tool_call" && eventPayload(event).method === "item/started" && eventPayload(event).type === "commandExecution"), "real commandExecution tool call should remain visible");
-    assert.equal(noisyItems.some((event) => event.type === "tool_call" && eventPayload(event).type !== "commandExecution" && eventPayload(event).type !== "webSearch"), false, "only user-visible tool lifecycle items should be persisted as tool_call");
+    assert.equal(noisyItems.some((event) => event.type === "tool_call" && !isVisibleToolType(String(eventPayload(event).type ?? ""))), false, "only user-visible tool lifecycle items should be persisted as tool_call");
     assert.equal(noisyItems.some((event) => event.type === "backend_status" && String(eventPayload(event).phase ?? "").startsWith("item/agentMessage:")), false, "agentMessage lifecycle must not be persisted as backend_status noise");
     assert.equal(noisyPhases.includes("backend-turn-running"), false, "backend progress ticks must be summarized instead of persisted as durable trace events");
     const noisyFinished = noisyItems.find((event) => event.type === "backend_status" && eventPayload(event).phase === "backend-turn-finished");
@@ -461,6 +478,10 @@ function eventPayload(event: { payload: unknown }): JsonRecord {
   return typeof event.payload === "object" && event.payload !== null && !Array.isArray(event.payload) ? event.payload as JsonRecord : {};
 }
 
+function isVisibleToolType(value: string): boolean {
+  return value === "commandExecution" || value === "webSearch" || value === "mcpToolCall" || value === "dynamicToolCall";
+}
+
 function countEntriesByName(value: unknown, keyName: "method" | "itemType"): Record<string, number> {
   const output: Record<string, number> = {};
   if (!Array.isArray(value)) return output;
diff --git a/src/selftest/fake-codex-app-server.ts b/src/selftest/fake-codex-app-server.ts
index 24771ea..6749732 100644
--- a/src/selftest/fake-codex-app-server.ts
+++ b/src/selftest/fake-codex-app-server.ts
@@ -239,6 +239,17 @@ for await (const line of rl) {
       respond(message.id, { turn });
       continue;
     }
+    if (mode === "mcp-tool-call") {
+      turnCounter += 1;
+      const turn = { id: `turn_selftest_${turnCounter}`, status: "completed" };
+      notify("turn/started", { turn });
+      notify("item/started", { item: { id: "mcp_search_selftest", type: "mcpToolCall", name: "mcp__codex_apps__scispace__search_papers", arguments: { searchTerm: "large language models" } } });
+      notify("item/completed", { item: { id: "mcp_search_selftest", type: "mcpToolCall", name: "mcp__codex_apps__scispace__search_papers", status: "completed", arguments: { searchTerm: "large language models" }, outputSummary: "returned paper titles and abstracts" } });
+      notify("item/completed", { item: { id: "msg_mcp_search", type: "agentMessage", text: "Paper search tool completed." } });
+      notify("turn/completed", { turn });
+      respond(message.id, { turn });
+      continue;
+    }
     if (mode === "slow-tool-events") {
       turnCounter += 1;
       const turn = { id: `turn_selftest_${turnCounter}`, status: "completed" };