Merge pull request #186 from pikasTech/fix/issue-1291-mcp-tool-trace

修复 MCP 工具调用 Trace 可见性
This commit is contained in:
Lyon
2026-06-15 23:49:55 +08:00
committed by GitHub
4 changed files with 81 additions and 7 deletions
+2 -2
View File
@@ -70,9 +70,9 @@ Adapter 输出给 runner 的 event 类型至少包括:
事件必须有上限和分页友好形态。大型日志、完整 stdout 或完整 trace 应进入 logPath 或后续 artifact,不得一次性塞入单个 event 造成输出爆炸。
Codex app-server 的低价值内部 notification 必须在 AgentRun adapter 层收敛,不得要求 HWLAB Web/CLI 或其他消费侧自行过滤。以下事件默认不作为 durable trace event 持久化:`item/reasoning/textDelta`、纯 `reasoning` item 的 `item/started|item/completed`、非用户可见工具 item 的通用 `item/started|item/completed``thread/tokenUsage/updated``account/rateLimits/updated`、普通 `warning``configWarning`。adapter 可以输出一条有界 `backend_status.phase=codex-app-server-notifications-suppressed` 摘要,只包含总数、`methods: [{ method, count }]``itemTypes: [{ itemType, count }]`,不包含 reasoning 文本、Secret、token 或 env value。method 和 item type 不得作为 JSON object key 输出,避免 `thread/tokenUsage/updated` 这类协议名被 redaction 误判为敏感 key。真实 `agentMessage``commandExecution``webSearch``command_output`、error、terminal 和关键生命周期事件必须继续保留。
Codex app-server 的低价值内部 notification 必须在 AgentRun adapter 层收敛,不得要求 HWLAB Web/CLI 或其他消费侧自行过滤。以下事件默认不作为 durable trace event 持久化:`item/reasoning/textDelta`、纯 `reasoning` item 的 `item/started|item/completed`、非用户可见工具 item 的通用 `item/started|item/completed``thread/tokenUsage/updated``account/rateLimits/updated`、普通 `warning``configWarning`。adapter 可以输出一条有界 `backend_status.phase=codex-app-server-notifications-suppressed` 摘要,只包含总数、`methods: [{ method, count }]``itemTypes: [{ itemType, count }]`,不包含 reasoning 文本、Secret、token 或 env value。method 和 item type 不得作为 JSON object key 输出,避免 `thread/tokenUsage/updated` 这类协议名被 redaction 误判为敏感 key。真实 `agentMessage``commandExecution``webSearch``mcpToolCall``dynamicToolCall``command_output`、error、terminal 和关键生命周期事件必须继续保留。
用户可见工具生命周期的 `tool_call` event 只能输出面向人和消费侧的扁平字段,例如 `method``itemId``toolName``type``command``cwd``status``processId``valuesPrinted=false`。当前可见工具类型包括 `commandExecution``webSearch`不得把 Codex app-server 的原始 `item` JSON、`itemPreview` 或嵌套协议摘要写入 `message``outputSummary``stdoutSummary` 或 payload;命令实际 stdout/stderr 只通过 `command_output` 或 completed `commandExecution` 摘要输出。
用户可见工具生命周期的 `tool_call` event 只能输出面向人和消费侧的扁平字段,例如 `method``itemId``toolName``type``command``cwd``status``processId``valuesPrinted=false`。当前可见工具类型包括 `commandExecution``webSearch``mcpToolCall``dynamicToolCall``mcpToolCall` / `dynamicToolCall``command` 必须是工具名加 redacted 参数摘要,便于 HWLAB Trace 单行展示调用意图。不得把 Codex app-server 的原始 `item` JSON、`itemPreview` 或嵌套协议摘要写入 `message``outputSummary``stdoutSummary` 或 payload;命令实际 stdout/stderr 只通过 `command_output` 或 completed `commandExecution` 摘要输出。
## Failure Mapping
+46 -4
View File
@@ -849,7 +849,7 @@ function isSuppressedCodexStatusNotification(method: string): boolean {
}
function isVisibleCodexToolItemType(itemType: string): boolean {
return itemType === "commandExecution" || itemType === "webSearch";
return itemType === "commandExecution" || itemType === "webSearch" || itemType === "mcpToolCall" || itemType === "dynamicToolCall";
}
function assistantMessageEventForCompleted(message: CompletedAssistantMessage, messageIndex: number): BackendEvent {
@@ -944,9 +944,10 @@ function toolCallPayload(method: string, item: JsonRecord): JsonRecord {
const redacted = redactJson(item);
const itemId = typeof redacted.id === "string" ? redacted.id : null;
const itemType = typeof redacted.type === "string" ? redacted.type : "unknown";
const command = typeof redacted.command === "string" ? redacted.command : null;
const toolName = toolCallName(redacted, itemType);
const command = toolCallCommandSummary(redacted, itemType, toolName);
const cwd = typeof redacted.cwd === "string" ? redacted.cwd : null;
const status = typeof redacted.status === "string" ? redacted.status : null;
const status = toolCallStatus(method, redacted);
const processId = typeof redacted.processId === "string" || typeof redacted.processId === "number" ? String(redacted.processId) : null;
const exitCode = typeof redacted.exitCode === "number" ? redacted.exitCode : null;
const durationMs = typeof redacted.durationMs === "number" ? redacted.durationMs : null;
@@ -955,7 +956,7 @@ function toolCallPayload(method: string, item: JsonRecord): JsonRecord {
method,
itemId,
type: itemType,
toolName: itemType,
toolName,
...(command ? { command } : {}),
...(cwd ? { cwd } : {}),
...(status ? { status } : {}),
@@ -967,6 +968,47 @@ function toolCallPayload(method: string, item: JsonRecord): JsonRecord {
};
}
function toolCallStatus(method: string, item: JsonRecord): string | null {
if (typeof item.status === "string" && item.status.trim().length > 0) return item.status;
if (method === "item/started") return "started";
if (method === "item/completed") return "completed";
return null;
}
function toolCallName(item: JsonRecord, itemType: string): string {
const direct = firstToolCallString(item, ["toolName", "name", "tool", "functionName"]);
const server = firstToolCallString(item, ["serverName", "server", "mcpServer"]);
if (server && direct && !direct.includes(server)) return `${server}.${direct}`;
return direct ?? itemType;
}
function toolCallCommandSummary(item: JsonRecord, itemType: string, toolName: string): string | null {
const direct = typeof item.command === "string" && item.command.trim().length > 0 ? item.command : null;
if (direct) return direct;
if (itemType !== "mcpToolCall" && itemType !== "dynamicToolCall") return null;
const input = toolCallInputSummary(item);
return input ? `${toolName} ${input}` : toolName;
}
function toolCallInputSummary(item: JsonRecord): string | null {
for (const key of ["arguments", "args", "input", "params", "parameters"] as const) {
if (!Object.prototype.hasOwnProperty.call(item, key)) continue;
const value = item[key];
if (value === null || value === undefined) continue;
const text = typeof value === "string" ? value : JSON.stringify(value);
if (typeof text === "string" && text.trim().length > 0 && text.trim() !== "{}") return String(boundedTextSummary(text, { limitChars: 600 }).text);
}
return null;
}
function firstToolCallString(item: JsonRecord, keys: readonly string[]): string | null {
for (const key of keys) {
const value = item[key];
if (typeof value === "string" && value.trim().length > 0) return value;
}
return null;
}
function toolCallOutputSummary(item: JsonRecord): string | null {
const direct = item.outputSummary ?? item.stdoutSummary ?? item.message;
if (typeof direct === "string" && direct.trim().length > 0) return String(boundedTextSummary(direct).text);
+22 -1
View File
@@ -140,6 +140,23 @@ const selfTest: SelfTestCase = async (context) => {
assert.equal(webSearchItems.some((event) => event.type === "tool_call" && eventPayload(event).type === "reasoning"), false, "reasoning items must still not be persisted as tool_call");
assertNoSecretLeak(webSearchEvents);
const mcpTool = await createRunWithCommand(client, context, "hello mcp paper search", "selftest-mcp-tool-call", 15_000);
const mcpToolResult = await runOnce({ managerUrl: server.baseUrl, runId: mcpTool.runId, codexCommand: context.fakeCodexCommand, codexArgs: context.fakeCodexArgs, codexHome: context.codexHome, env: { CODEX_HOME: context.codexHome, AGENTRUN_FAKE_CODEX_MODE: "mcp-tool-call" }, oneShot: true }) as JsonRecord;
assert.equal(mcpToolResult.terminalStatus, "completed", "MCP tool call turn should complete");
const mcpToolEvents = await client.get(`/api/v1/runs/${mcpTool.runId}/events?afterSeq=0&limit=100`) as { items?: Array<{ type: string; payload: unknown }> };
const mcpToolItems = mcpToolEvents.items ?? [];
const mcpStarted = mcpToolItems.find((event) => event.type === "tool_call" && eventPayload(event).type === "mcpToolCall" && eventPayload(event).method === "item/started");
const mcpCompleted = mcpToolItems.find((event) => event.type === "tool_call" && eventPayload(event).type === "mcpToolCall" && eventPayload(event).method === "item/completed");
assert.ok(mcpStarted, "mcpToolCall start must remain visible as tool_call");
assert.ok(mcpCompleted, "mcpToolCall completion must remain visible as tool_call");
assert.equal(eventPayload(mcpStarted ?? { payload: {} }).toolName, "mcp__codex_apps__scispace__search_papers");
assert.match(String(eventPayload(mcpStarted ?? { payload: {} }).command ?? ""), /large language models/u, "MCP tool command summary should include redacted call arguments");
assert.equal(eventPayload(mcpStarted ?? { payload: {} }).item, undefined, "mcpToolCall event must not persist raw Codex item JSON");
assert.equal(eventPayload(mcpStarted ?? { payload: {} }).itemPreview, undefined, "mcpToolCall event must not persist raw Codex item preview");
assert.ok(mcpToolItems.some((event) => event.type === "assistant_message" && eventPayload(event).text === "Paper search tool completed."), "MCP final assistant message should remain visible");
assert.equal(mcpToolItems.some((event) => event.type === "backend_status" && JSON.stringify(eventPayload(event).itemTypes ?? []).includes("mcpToolCall")), false, "mcpToolCall must not be counted as a suppressed notification");
assertNoSecretLeak(mcpToolEvents);
const staleThread = await createStaleThreadRun(client, context);
const staleThreadResult = await runOnce({
managerUrl: server.baseUrl,
@@ -197,7 +214,7 @@ const selfTest: SelfTestCase = async (context) => {
assert.equal(noisyItems.some((event) => event.type === "backend_status" && eventPayload(event).phase === "configWarning"), false, "low value config warnings must not be persisted as backend_status");
assert.equal(noisyItems.some((event) => event.type === "tool_call" && eventPayload(event).type === "reasoning"), false, "reasoning items must not be persisted as tool_call");
assert.ok(noisyItems.some((event) => event.type === "tool_call" && eventPayload(event).method === "item/started" && eventPayload(event).type === "commandExecution"), "real commandExecution tool call should remain visible");
assert.equal(noisyItems.some((event) => event.type === "tool_call" && eventPayload(event).type !== "commandExecution" && eventPayload(event).type !== "webSearch"), false, "only user-visible tool lifecycle items should be persisted as tool_call");
assert.equal(noisyItems.some((event) => event.type === "tool_call" && !isVisibleToolType(String(eventPayload(event).type ?? ""))), false, "only user-visible tool lifecycle items should be persisted as tool_call");
assert.equal(noisyItems.some((event) => event.type === "backend_status" && String(eventPayload(event).phase ?? "").startsWith("item/agentMessage:")), false, "agentMessage lifecycle must not be persisted as backend_status noise");
assert.equal(noisyPhases.includes("backend-turn-running"), false, "backend progress ticks must be summarized instead of persisted as durable trace events");
const noisyFinished = noisyItems.find((event) => event.type === "backend_status" && eventPayload(event).phase === "backend-turn-finished");
@@ -461,6 +478,10 @@ function eventPayload(event: { payload: unknown }): JsonRecord {
return typeof event.payload === "object" && event.payload !== null && !Array.isArray(event.payload) ? event.payload as JsonRecord : {};
}
function isVisibleToolType(value: string): boolean {
return value === "commandExecution" || value === "webSearch" || value === "mcpToolCall" || value === "dynamicToolCall";
}
function countEntriesByName(value: unknown, keyName: "method" | "itemType"): Record<string, number> {
const output: Record<string, number> = {};
if (!Array.isArray(value)) return output;
+11
View File
@@ -239,6 +239,17 @@ for await (const line of rl) {
respond(message.id, { turn });
continue;
}
if (mode === "mcp-tool-call") {
turnCounter += 1;
const turn = { id: `turn_selftest_${turnCounter}`, status: "completed" };
notify("turn/started", { turn });
notify("item/started", { item: { id: "mcp_search_selftest", type: "mcpToolCall", name: "mcp__codex_apps__scispace__search_papers", arguments: { searchTerm: "large language models" } } });
notify("item/completed", { item: { id: "mcp_search_selftest", type: "mcpToolCall", name: "mcp__codex_apps__scispace__search_papers", status: "completed", arguments: { searchTerm: "large language models" }, outputSummary: "returned paper titles and abstracts" } });
notify("item/completed", { item: { id: "msg_mcp_search", type: "agentMessage", text: "Paper search tool completed." } });
notify("turn/completed", { turn });
respond(message.id, { turn });
continue;
}
if (mode === "slow-tool-events") {
turnCounter += 1;
const turn = { id: `turn_selftest_${turnCounter}`, status: "completed" };