Merge pull request #186 from pikasTech/fix/issue-1291-mcp-tool-trace

修复 MCP 工具调用 Trace 可见性
2026-06-15 23:49:55 +08:00
parent 990165c097 b94b9b2027
commit 88a17933c7
4 changed files with 81 additions and 7 deletions
@@ -70,9 +70,9 @@ Adapter 输出给 runner 的 event 类型至少包括：

 事件必须有上限和分页友好形态。大型日志、完整 stdout 或完整 trace 应进入 logPath 或后续 artifact，不得一次性塞入单个 event 造成输出爆炸。

-Codex app-server 的低价值内部 notification 必须在 AgentRun adapter 层收敛，不得要求 HWLAB Web/CLI 或其他消费侧自行过滤。以下事件默认不作为 durable trace event 持久化：`item/reasoning/textDelta`、纯 `reasoning` item 的 `item/started|item/completed`、非用户可见工具 item 的通用 `item/started|item/completed`、`thread/tokenUsage/updated`、`account/rateLimits/updated`、普通 `warning` 和 `configWarning`。adapter 可以输出一条有界 `backend_status.phase=codex-app-server-notifications-suppressed` 摘要，只包含总数、`methods: [{ method, count }]` 和 `itemTypes: [{ itemType, count }]`，不包含 reasoning 文本、Secret、token 或 env value。method 和 item type 不得作为 JSON object key 输出，避免 `thread/tokenUsage/updated` 这类协议名被 redaction 误判为敏感 key。真实 `agentMessage`、`commandExecution`、`webSearch`、`command_output`、error、terminal 和关键生命周期事件必须继续保留。
+Codex app-server 的低价值内部 notification 必须在 AgentRun adapter 层收敛，不得要求 HWLAB Web/CLI 或其他消费侧自行过滤。以下事件默认不作为 durable trace event 持久化：`item/reasoning/textDelta`、纯 `reasoning` item 的 `item/started|item/completed`、非用户可见工具 item 的通用 `item/started|item/completed`、`thread/tokenUsage/updated`、`account/rateLimits/updated`、普通 `warning` 和 `configWarning`。adapter 可以输出一条有界 `backend_status.phase=codex-app-server-notifications-suppressed` 摘要，只包含总数、`methods: [{ method, count }]` 和 `itemTypes: [{ itemType, count }]`，不包含 reasoning 文本、Secret、token 或 env value。method 和 item type 不得作为 JSON object key 输出，避免 `thread/tokenUsage/updated` 这类协议名被 redaction 误判为敏感 key。真实 `agentMessage`、`commandExecution`、`webSearch`、`mcpToolCall`、`dynamicToolCall`、`command_output`、error、terminal 和关键生命周期事件必须继续保留。

-用户可见工具生命周期的 `tool_call` event 只能输出面向人和消费侧的扁平字段，例如 `method`、`itemId`、`toolName`、`type`、`command`、`cwd`、`status`、`processId` 和 `valuesPrinted=false`。当前可见工具类型包括 `commandExecution` 和 `webSearch`；不得把 Codex app-server 的原始 `item` JSON、`itemPreview` 或嵌套协议摘要写入 `message`、`outputSummary`、`stdoutSummary` 或 payload；命令实际 stdout/stderr 只通过 `command_output` 或 completed `commandExecution` 摘要输出。
+用户可见工具生命周期的 `tool_call` event 只能输出面向人和消费侧的扁平字段，例如 `method`、`itemId`、`toolName`、`type`、`command`、`cwd`、`status`、`processId` 和 `valuesPrinted=false`。当前可见工具类型包括 `commandExecution`、`webSearch`、`mcpToolCall` 和 `dynamicToolCall`；`mcpToolCall` / `dynamicToolCall` 的 `command` 必须是工具名加 redacted 参数摘要，便于 HWLAB Trace 单行展示调用意图。不得把 Codex app-server 的原始 `item` JSON、`itemPreview` 或嵌套协议摘要写入 `message`、`outputSummary`、`stdoutSummary` 或 payload；命令实际 stdout/stderr 只通过 `command_output` 或 completed `commandExecution` 摘要输出。

 ## Failure Mapping

@@ -849,7 +849,7 @@ function isSuppressedCodexStatusNotification(method: string): boolean {
 }

 function isVisibleCodexToolItemType(itemType: string): boolean {
-  return itemType === "commandExecution" || itemType === "webSearch";
+  return itemType === "commandExecution" || itemType === "webSearch" || itemType === "mcpToolCall" || itemType === "dynamicToolCall";
 }

 function assistantMessageEventForCompleted(message: CompletedAssistantMessage, messageIndex: number): BackendEvent {
@@ -944,9 +944,10 @@ function toolCallPayload(method: string, item: JsonRecord): JsonRecord {
  const redacted = redactJson(item);
  const itemId = typeof redacted.id === "string" ? redacted.id : null;
  const itemType = typeof redacted.type === "string" ? redacted.type : "unknown";
-  const command = typeof redacted.command === "string" ? redacted.command : null;
+  const toolName = toolCallName(redacted, itemType);
+  const command = toolCallCommandSummary(redacted, itemType, toolName);
  const cwd = typeof redacted.cwd === "string" ? redacted.cwd : null;
-  const status = typeof redacted.status === "string" ? redacted.status : null;
+  const status = toolCallStatus(method, redacted);
  const processId = typeof redacted.processId === "string" || typeof redacted.processId === "number" ? String(redacted.processId) : null;
  const exitCode = typeof redacted.exitCode === "number" ? redacted.exitCode : null;
  const durationMs = typeof redacted.durationMs === "number" ? redacted.durationMs : null;
@@ -955,7 +956,7 @@ function toolCallPayload(method: string, item: JsonRecord): JsonRecord {
    method,
    itemId,
    type: itemType,
-    toolName: itemType,
+    toolName,
    ...(command ? { command } : {}),
    ...(cwd ? { cwd } : {}),
    ...(status ? { status } : {}),
@@ -967,6 +968,47 @@ function toolCallPayload(method: string, item: JsonRecord): JsonRecord {
  };
 }

+function toolCallStatus(method: string, item: JsonRecord): string | null {
+  if (typeof item.status === "string" && item.status.trim().length > 0) return item.status;
+  if (method === "item/started") return "started";
+  if (method === "item/completed") return "completed";
+  return null;
+}
+
+function toolCallName(item: JsonRecord, itemType: string): string {
+  const direct = firstToolCallString(item, ["toolName", "name", "tool", "functionName"]);
+  const server = firstToolCallString(item, ["serverName", "server", "mcpServer"]);
+  if (server && direct && !direct.includes(server)) return `${server}.${direct}`;
+  return direct ?? itemType;
+}
+
+function toolCallCommandSummary(item: JsonRecord, itemType: string, toolName: string): string | null {
+  const direct = typeof item.command === "string" && item.command.trim().length > 0 ? item.command : null;
+  if (direct) return direct;
+  if (itemType !== "mcpToolCall" && itemType !== "dynamicToolCall") return null;
+  const input = toolCallInputSummary(item);
+  return input ? `${toolName} ${input}` : toolName;
+}
+
+function toolCallInputSummary(item: JsonRecord): string | null {
+  for (const key of ["arguments", "args", "input", "params", "parameters"] as const) {
+    if (!Object.prototype.hasOwnProperty.call(item, key)) continue;
+    const value = item[key];
+    if (value === null || value === undefined) continue;
+    const text = typeof value === "string" ? value : JSON.stringify(value);
+    if (typeof text === "string" && text.trim().length > 0 && text.trim() !== "{}") return String(boundedTextSummary(text, { limitChars: 600 }).text);
+  }
+  return null;
+}
+
+function firstToolCallString(item: JsonRecord, keys: readonly string[]): string | null {
+  for (const key of keys) {
+    const value = item[key];
+    if (typeof value === "string" && value.trim().length > 0) return value;
+  }
+  return null;
+}
+
 function toolCallOutputSummary(item: JsonRecord): string | null {
  const direct = item.outputSummary ?? item.stdoutSummary ?? item.message;
  if (typeof direct === "string" && direct.trim().length > 0) return String(boundedTextSummary(direct).text);
@@ -140,6 +140,23 @@ const selfTest: SelfTestCase = async (context) => {
    assert.equal(webSearchItems.some((event) => event.type === "tool_call" && eventPayload(event).type === "reasoning"), false, "reasoning items must still not be persisted as tool_call");
    assertNoSecretLeak(webSearchEvents);

+    const mcpTool = await createRunWithCommand(client, context, "hello mcp paper search", "selftest-mcp-tool-call", 15_000);
+    const mcpToolResult = await runOnce({ managerUrl: server.baseUrl, runId: mcpTool.runId, codexCommand: context.fakeCodexCommand, codexArgs: context.fakeCodexArgs, codexHome: context.codexHome, env: { CODEX_HOME: context.codexHome, AGENTRUN_FAKE_CODEX_MODE: "mcp-tool-call" }, oneShot: true }) as JsonRecord;
+    assert.equal(mcpToolResult.terminalStatus, "completed", "MCP tool call turn should complete");
+    const mcpToolEvents = await client.get(`/api/v1/runs/${mcpTool.runId}/events?afterSeq=0&limit=100`) as { items?: Array<{ type: string; payload: unknown }> };
+    const mcpToolItems = mcpToolEvents.items ?? [];
+    const mcpStarted = mcpToolItems.find((event) => event.type === "tool_call" && eventPayload(event).type === "mcpToolCall" && eventPayload(event).method === "item/started");
+    const mcpCompleted = mcpToolItems.find((event) => event.type === "tool_call" && eventPayload(event).type === "mcpToolCall" && eventPayload(event).method === "item/completed");
+    assert.ok(mcpStarted, "mcpToolCall start must remain visible as tool_call");
+    assert.ok(mcpCompleted, "mcpToolCall completion must remain visible as tool_call");
+    assert.equal(eventPayload(mcpStarted ?? { payload: {} }).toolName, "mcp__codex_apps__scispace__search_papers");
+    assert.match(String(eventPayload(mcpStarted ?? { payload: {} }).command ?? ""), /large language models/u, "MCP tool command summary should include redacted call arguments");
+    assert.equal(eventPayload(mcpStarted ?? { payload: {} }).item, undefined, "mcpToolCall event must not persist raw Codex item JSON");
+    assert.equal(eventPayload(mcpStarted ?? { payload: {} }).itemPreview, undefined, "mcpToolCall event must not persist raw Codex item preview");
+    assert.ok(mcpToolItems.some((event) => event.type === "assistant_message" && eventPayload(event).text === "Paper search tool completed."), "MCP final assistant message should remain visible");
+    assert.equal(mcpToolItems.some((event) => event.type === "backend_status" && JSON.stringify(eventPayload(event).itemTypes ?? []).includes("mcpToolCall")), false, "mcpToolCall must not be counted as a suppressed notification");
+    assertNoSecretLeak(mcpToolEvents);
+
    const staleThread = await createStaleThreadRun(client, context);
    const staleThreadResult = await runOnce({
      managerUrl: server.baseUrl,
@@ -197,7 +214,7 @@ const selfTest: SelfTestCase = async (context) => {
    assert.equal(noisyItems.some((event) => event.type === "backend_status" && eventPayload(event).phase === "configWarning"), false, "low value config warnings must not be persisted as backend_status");
    assert.equal(noisyItems.some((event) => event.type === "tool_call" && eventPayload(event).type === "reasoning"), false, "reasoning items must not be persisted as tool_call");
    assert.ok(noisyItems.some((event) => event.type === "tool_call" && eventPayload(event).method === "item/started" && eventPayload(event).type === "commandExecution"), "real commandExecution tool call should remain visible");
-    assert.equal(noisyItems.some((event) => event.type === "tool_call" && eventPayload(event).type !== "commandExecution" && eventPayload(event).type !== "webSearch"), false, "only user-visible tool lifecycle items should be persisted as tool_call");
+    assert.equal(noisyItems.some((event) => event.type === "tool_call" && !isVisibleToolType(String(eventPayload(event).type ?? ""))), false, "only user-visible tool lifecycle items should be persisted as tool_call");
    assert.equal(noisyItems.some((event) => event.type === "backend_status" && String(eventPayload(event).phase ?? "").startsWith("item/agentMessage:")), false, "agentMessage lifecycle must not be persisted as backend_status noise");
    assert.equal(noisyPhases.includes("backend-turn-running"), false, "backend progress ticks must be summarized instead of persisted as durable trace events");
    const noisyFinished = noisyItems.find((event) => event.type === "backend_status" && eventPayload(event).phase === "backend-turn-finished");
@@ -461,6 +478,10 @@ function eventPayload(event: { payload: unknown }): JsonRecord {
  return typeof event.payload === "object" && event.payload !== null && !Array.isArray(event.payload) ? event.payload as JsonRecord : {};
 }

+function isVisibleToolType(value: string): boolean {
+  return value === "commandExecution" || value === "webSearch" || value === "mcpToolCall" || value === "dynamicToolCall";
+}
+
 function countEntriesByName(value: unknown, keyName: "method" | "itemType"): Record<string, number> {
  const output: Record<string, number> = {};
  if (!Array.isArray(value)) return output;
@@ -239,6 +239,17 @@ for await (const line of rl) {
      respond(message.id, { turn });
      continue;
    }
+    if (mode === "mcp-tool-call") {
+      turnCounter += 1;
+      const turn = { id: `turn_selftest_${turnCounter}`, status: "completed" };
+      notify("turn/started", { turn });
+      notify("item/started", { item: { id: "mcp_search_selftest", type: "mcpToolCall", name: "mcp__codex_apps__scispace__search_papers", arguments: { searchTerm: "large language models" } } });
+      notify("item/completed", { item: { id: "mcp_search_selftest", type: "mcpToolCall", name: "mcp__codex_apps__scispace__search_papers", status: "completed", arguments: { searchTerm: "large language models" }, outputSummary: "returned paper titles and abstracts" } });
+      notify("item/completed", { item: { id: "msg_mcp_search", type: "agentMessage", text: "Paper search tool completed." } });
+      notify("turn/completed", { turn });
+      respond(message.id, { turn });
+      continue;
+    }
    if (mode === "slow-tool-events") {
      turnCounter += 1;
      const turn = { id: `turn_selftest_${turnCounter}`, status: "completed" };