Merge pull request #186 from pikasTech/fix/issue-1291-mcp-tool-trace
修复 MCP 工具调用 Trace 可见性
This commit is contained in:
@@ -70,9 +70,9 @@ Adapter 输出给 runner 的 event 类型至少包括:
|
||||
|
||||
事件必须有上限和分页友好形态。大型日志、完整 stdout 或完整 trace 应进入 logPath 或后续 artifact,不得一次性塞入单个 event 造成输出爆炸。
|
||||
|
||||
Codex app-server 的低价值内部 notification 必须在 AgentRun adapter 层收敛,不得要求 HWLAB Web/CLI 或其他消费侧自行过滤。以下事件默认不作为 durable trace event 持久化:`item/reasoning/textDelta`、纯 `reasoning` item 的 `item/started|item/completed`、非用户可见工具 item 的通用 `item/started|item/completed`、`thread/tokenUsage/updated`、`account/rateLimits/updated`、普通 `warning` 和 `configWarning`。adapter 可以输出一条有界 `backend_status.phase=codex-app-server-notifications-suppressed` 摘要,只包含总数、`methods: [{ method, count }]` 和 `itemTypes: [{ itemType, count }]`,不包含 reasoning 文本、Secret、token 或 env value。method 和 item type 不得作为 JSON object key 输出,避免 `thread/tokenUsage/updated` 这类协议名被 redaction 误判为敏感 key。真实 `agentMessage`、`commandExecution`、`webSearch`、`command_output`、error、terminal 和关键生命周期事件必须继续保留。
|
||||
Codex app-server 的低价值内部 notification 必须在 AgentRun adapter 层收敛,不得要求 HWLAB Web/CLI 或其他消费侧自行过滤。以下事件默认不作为 durable trace event 持久化:`item/reasoning/textDelta`、纯 `reasoning` item 的 `item/started|item/completed`、非用户可见工具 item 的通用 `item/started|item/completed`、`thread/tokenUsage/updated`、`account/rateLimits/updated`、普通 `warning` 和 `configWarning`。adapter 可以输出一条有界 `backend_status.phase=codex-app-server-notifications-suppressed` 摘要,只包含总数、`methods: [{ method, count }]` 和 `itemTypes: [{ itemType, count }]`,不包含 reasoning 文本、Secret、token 或 env value。method 和 item type 不得作为 JSON object key 输出,避免 `thread/tokenUsage/updated` 这类协议名被 redaction 误判为敏感 key。真实 `agentMessage`、`commandExecution`、`webSearch`、`mcpToolCall`、`dynamicToolCall`、`command_output`、error、terminal 和关键生命周期事件必须继续保留。
|
||||
|
||||
用户可见工具生命周期的 `tool_call` event 只能输出面向人和消费侧的扁平字段,例如 `method`、`itemId`、`toolName`、`type`、`command`、`cwd`、`status`、`processId` 和 `valuesPrinted=false`。当前可见工具类型包括 `commandExecution` 和 `webSearch`;不得把 Codex app-server 的原始 `item` JSON、`itemPreview` 或嵌套协议摘要写入 `message`、`outputSummary`、`stdoutSummary` 或 payload;命令实际 stdout/stderr 只通过 `command_output` 或 completed `commandExecution` 摘要输出。
|
||||
用户可见工具生命周期的 `tool_call` event 只能输出面向人和消费侧的扁平字段,例如 `method`、`itemId`、`toolName`、`type`、`command`、`cwd`、`status`、`processId` 和 `valuesPrinted=false`。当前可见工具类型包括 `commandExecution`、`webSearch`、`mcpToolCall` 和 `dynamicToolCall`;`mcpToolCall` / `dynamicToolCall` 的 `command` 必须是工具名加 redacted 参数摘要,便于 HWLAB Trace 单行展示调用意图。不得把 Codex app-server 的原始 `item` JSON、`itemPreview` 或嵌套协议摘要写入 `message`、`outputSummary`、`stdoutSummary` 或 payload;命令实际 stdout/stderr 只通过 `command_output` 或 completed `commandExecution` 摘要输出。
|
||||
|
||||
## Failure Mapping
|
||||
|
||||
|
||||
@@ -849,7 +849,7 @@ function isSuppressedCodexStatusNotification(method: string): boolean {
|
||||
}
|
||||
|
||||
function isVisibleCodexToolItemType(itemType: string): boolean {
|
||||
return itemType === "commandExecution" || itemType === "webSearch";
|
||||
return itemType === "commandExecution" || itemType === "webSearch" || itemType === "mcpToolCall" || itemType === "dynamicToolCall";
|
||||
}
|
||||
|
||||
function assistantMessageEventForCompleted(message: CompletedAssistantMessage, messageIndex: number): BackendEvent {
|
||||
@@ -944,9 +944,10 @@ function toolCallPayload(method: string, item: JsonRecord): JsonRecord {
|
||||
const redacted = redactJson(item);
|
||||
const itemId = typeof redacted.id === "string" ? redacted.id : null;
|
||||
const itemType = typeof redacted.type === "string" ? redacted.type : "unknown";
|
||||
const command = typeof redacted.command === "string" ? redacted.command : null;
|
||||
const toolName = toolCallName(redacted, itemType);
|
||||
const command = toolCallCommandSummary(redacted, itemType, toolName);
|
||||
const cwd = typeof redacted.cwd === "string" ? redacted.cwd : null;
|
||||
const status = typeof redacted.status === "string" ? redacted.status : null;
|
||||
const status = toolCallStatus(method, redacted);
|
||||
const processId = typeof redacted.processId === "string" || typeof redacted.processId === "number" ? String(redacted.processId) : null;
|
||||
const exitCode = typeof redacted.exitCode === "number" ? redacted.exitCode : null;
|
||||
const durationMs = typeof redacted.durationMs === "number" ? redacted.durationMs : null;
|
||||
@@ -955,7 +956,7 @@ function toolCallPayload(method: string, item: JsonRecord): JsonRecord {
|
||||
method,
|
||||
itemId,
|
||||
type: itemType,
|
||||
toolName: itemType,
|
||||
toolName,
|
||||
...(command ? { command } : {}),
|
||||
...(cwd ? { cwd } : {}),
|
||||
...(status ? { status } : {}),
|
||||
@@ -967,6 +968,47 @@ function toolCallPayload(method: string, item: JsonRecord): JsonRecord {
|
||||
};
|
||||
}
|
||||
|
||||
function toolCallStatus(method: string, item: JsonRecord): string | null {
|
||||
if (typeof item.status === "string" && item.status.trim().length > 0) return item.status;
|
||||
if (method === "item/started") return "started";
|
||||
if (method === "item/completed") return "completed";
|
||||
return null;
|
||||
}
|
||||
|
||||
function toolCallName(item: JsonRecord, itemType: string): string {
|
||||
const direct = firstToolCallString(item, ["toolName", "name", "tool", "functionName"]);
|
||||
const server = firstToolCallString(item, ["serverName", "server", "mcpServer"]);
|
||||
if (server && direct && !direct.includes(server)) return `${server}.${direct}`;
|
||||
return direct ?? itemType;
|
||||
}
|
||||
|
||||
function toolCallCommandSummary(item: JsonRecord, itemType: string, toolName: string): string | null {
|
||||
const direct = typeof item.command === "string" && item.command.trim().length > 0 ? item.command : null;
|
||||
if (direct) return direct;
|
||||
if (itemType !== "mcpToolCall" && itemType !== "dynamicToolCall") return null;
|
||||
const input = toolCallInputSummary(item);
|
||||
return input ? `${toolName} ${input}` : toolName;
|
||||
}
|
||||
|
||||
function toolCallInputSummary(item: JsonRecord): string | null {
|
||||
for (const key of ["arguments", "args", "input", "params", "parameters"] as const) {
|
||||
if (!Object.prototype.hasOwnProperty.call(item, key)) continue;
|
||||
const value = item[key];
|
||||
if (value === null || value === undefined) continue;
|
||||
const text = typeof value === "string" ? value : JSON.stringify(value);
|
||||
if (typeof text === "string" && text.trim().length > 0 && text.trim() !== "{}") return String(boundedTextSummary(text, { limitChars: 600 }).text);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function firstToolCallString(item: JsonRecord, keys: readonly string[]): string | null {
|
||||
for (const key of keys) {
|
||||
const value = item[key];
|
||||
if (typeof value === "string" && value.trim().length > 0) return value;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function toolCallOutputSummary(item: JsonRecord): string | null {
|
||||
const direct = item.outputSummary ?? item.stdoutSummary ?? item.message;
|
||||
if (typeof direct === "string" && direct.trim().length > 0) return String(boundedTextSummary(direct).text);
|
||||
|
||||
@@ -140,6 +140,23 @@ const selfTest: SelfTestCase = async (context) => {
|
||||
assert.equal(webSearchItems.some((event) => event.type === "tool_call" && eventPayload(event).type === "reasoning"), false, "reasoning items must still not be persisted as tool_call");
|
||||
assertNoSecretLeak(webSearchEvents);
|
||||
|
||||
const mcpTool = await createRunWithCommand(client, context, "hello mcp paper search", "selftest-mcp-tool-call", 15_000);
|
||||
const mcpToolResult = await runOnce({ managerUrl: server.baseUrl, runId: mcpTool.runId, codexCommand: context.fakeCodexCommand, codexArgs: context.fakeCodexArgs, codexHome: context.codexHome, env: { CODEX_HOME: context.codexHome, AGENTRUN_FAKE_CODEX_MODE: "mcp-tool-call" }, oneShot: true }) as JsonRecord;
|
||||
assert.equal(mcpToolResult.terminalStatus, "completed", "MCP tool call turn should complete");
|
||||
const mcpToolEvents = await client.get(`/api/v1/runs/${mcpTool.runId}/events?afterSeq=0&limit=100`) as { items?: Array<{ type: string; payload: unknown }> };
|
||||
const mcpToolItems = mcpToolEvents.items ?? [];
|
||||
const mcpStarted = mcpToolItems.find((event) => event.type === "tool_call" && eventPayload(event).type === "mcpToolCall" && eventPayload(event).method === "item/started");
|
||||
const mcpCompleted = mcpToolItems.find((event) => event.type === "tool_call" && eventPayload(event).type === "mcpToolCall" && eventPayload(event).method === "item/completed");
|
||||
assert.ok(mcpStarted, "mcpToolCall start must remain visible as tool_call");
|
||||
assert.ok(mcpCompleted, "mcpToolCall completion must remain visible as tool_call");
|
||||
assert.equal(eventPayload(mcpStarted ?? { payload: {} }).toolName, "mcp__codex_apps__scispace__search_papers");
|
||||
assert.match(String(eventPayload(mcpStarted ?? { payload: {} }).command ?? ""), /large language models/u, "MCP tool command summary should include redacted call arguments");
|
||||
assert.equal(eventPayload(mcpStarted ?? { payload: {} }).item, undefined, "mcpToolCall event must not persist raw Codex item JSON");
|
||||
assert.equal(eventPayload(mcpStarted ?? { payload: {} }).itemPreview, undefined, "mcpToolCall event must not persist raw Codex item preview");
|
||||
assert.ok(mcpToolItems.some((event) => event.type === "assistant_message" && eventPayload(event).text === "Paper search tool completed."), "MCP final assistant message should remain visible");
|
||||
assert.equal(mcpToolItems.some((event) => event.type === "backend_status" && JSON.stringify(eventPayload(event).itemTypes ?? []).includes("mcpToolCall")), false, "mcpToolCall must not be counted as a suppressed notification");
|
||||
assertNoSecretLeak(mcpToolEvents);
|
||||
|
||||
const staleThread = await createStaleThreadRun(client, context);
|
||||
const staleThreadResult = await runOnce({
|
||||
managerUrl: server.baseUrl,
|
||||
@@ -197,7 +214,7 @@ const selfTest: SelfTestCase = async (context) => {
|
||||
assert.equal(noisyItems.some((event) => event.type === "backend_status" && eventPayload(event).phase === "configWarning"), false, "low value config warnings must not be persisted as backend_status");
|
||||
assert.equal(noisyItems.some((event) => event.type === "tool_call" && eventPayload(event).type === "reasoning"), false, "reasoning items must not be persisted as tool_call");
|
||||
assert.ok(noisyItems.some((event) => event.type === "tool_call" && eventPayload(event).method === "item/started" && eventPayload(event).type === "commandExecution"), "real commandExecution tool call should remain visible");
|
||||
assert.equal(noisyItems.some((event) => event.type === "tool_call" && eventPayload(event).type !== "commandExecution" && eventPayload(event).type !== "webSearch"), false, "only user-visible tool lifecycle items should be persisted as tool_call");
|
||||
assert.equal(noisyItems.some((event) => event.type === "tool_call" && !isVisibleToolType(String(eventPayload(event).type ?? ""))), false, "only user-visible tool lifecycle items should be persisted as tool_call");
|
||||
assert.equal(noisyItems.some((event) => event.type === "backend_status" && String(eventPayload(event).phase ?? "").startsWith("item/agentMessage:")), false, "agentMessage lifecycle must not be persisted as backend_status noise");
|
||||
assert.equal(noisyPhases.includes("backend-turn-running"), false, "backend progress ticks must be summarized instead of persisted as durable trace events");
|
||||
const noisyFinished = noisyItems.find((event) => event.type === "backend_status" && eventPayload(event).phase === "backend-turn-finished");
|
||||
@@ -461,6 +478,10 @@ function eventPayload(event: { payload: unknown }): JsonRecord {
|
||||
return typeof event.payload === "object" && event.payload !== null && !Array.isArray(event.payload) ? event.payload as JsonRecord : {};
|
||||
}
|
||||
|
||||
function isVisibleToolType(value: string): boolean {
|
||||
return value === "commandExecution" || value === "webSearch" || value === "mcpToolCall" || value === "dynamicToolCall";
|
||||
}
|
||||
|
||||
function countEntriesByName(value: unknown, keyName: "method" | "itemType"): Record<string, number> {
|
||||
const output: Record<string, number> = {};
|
||||
if (!Array.isArray(value)) return output;
|
||||
|
||||
@@ -239,6 +239,17 @@ for await (const line of rl) {
|
||||
respond(message.id, { turn });
|
||||
continue;
|
||||
}
|
||||
if (mode === "mcp-tool-call") {
|
||||
turnCounter += 1;
|
||||
const turn = { id: `turn_selftest_${turnCounter}`, status: "completed" };
|
||||
notify("turn/started", { turn });
|
||||
notify("item/started", { item: { id: "mcp_search_selftest", type: "mcpToolCall", name: "mcp__codex_apps__scispace__search_papers", arguments: { searchTerm: "large language models" } } });
|
||||
notify("item/completed", { item: { id: "mcp_search_selftest", type: "mcpToolCall", name: "mcp__codex_apps__scispace__search_papers", status: "completed", arguments: { searchTerm: "large language models" }, outputSummary: "returned paper titles and abstracts" } });
|
||||
notify("item/completed", { item: { id: "msg_mcp_search", type: "agentMessage", text: "Paper search tool completed." } });
|
||||
notify("turn/completed", { turn });
|
||||
respond(message.id, { turn });
|
||||
continue;
|
||||
}
|
||||
if (mode === "slow-tool-events") {
|
||||
turnCounter += 1;
|
||||
const turn = { id: `turn_selftest_${turnCounter}`, status: "completed" };
|
||||
|
||||
Reference in New Issue
Block a user