fix: 收敛 commandExecution toolcall 摘要 (#70)
Co-authored-by: Codex <codex@pikas.tech>
This commit is contained in:
@@ -71,6 +71,8 @@ Adapter 输出给 runner 的 event 类型至少包括:
|
||||
|
||||
Codex app-server 的低价值内部 notification 必须在 AgentRun adapter 层收敛,不得要求 HWLAB Web/CLI 或其他消费侧自行过滤。以下事件默认不作为 durable trace event 持久化:`item/reasoning/textDelta`、纯 `reasoning` item 的 `item/started|item/completed`、非 `commandExecution` item 的通用 `item/started|item/completed`、`thread/tokenUsage/updated`、`account/rateLimits/updated`、普通 `warning` 和 `configWarning`。adapter 可以输出一条有界 `backend_status.phase=codex-app-server-notifications-suppressed` 摘要,只包含计数、method 和 item type,不包含 reasoning 文本、Secret、token 或 env value。真实 `agentMessage`、`commandExecution`、`command_output`、error、terminal 和关键生命周期事件必须继续保留。
|
||||
|
||||
`commandExecution` 的 `tool_call` event 只能输出面向人和消费侧的扁平字段,例如 `method`、`itemId`、`toolName`、`type`、`command`、`cwd`、`status`、`processId` 和 `valuesPrinted=false`。不得把 Codex app-server 的原始 `item` JSON、`itemPreview` 或嵌套协议摘要写入 `message`、`outputSummary`、`stdoutSummary` 或 payload;命令实际 stdout/stderr 只通过 `command_output` 或 completed `commandExecution` 摘要输出。
|
||||
|
||||
## Failure Mapping
|
||||
|
||||
Adapter 必须把 backend 错误映射为稳定 failureKind:
|
||||
|
||||
@@ -695,9 +695,42 @@ function terminalStatusFromValue(value: unknown): TerminalStatus {
|
||||
|
||||
function toolCallPayload(method: string, item: JsonRecord): JsonRecord {
|
||||
const redacted = redactJson(item);
|
||||
const summary = boundedTextSummary(JSON.stringify(redacted));
|
||||
if (summary.outputTruncated !== true) return { method, item: redacted, summary, outputBytes: summary.outputBytes, outputTruncated: false };
|
||||
return { method, itemPreview: summary.text, summary, outputBytes: summary.outputBytes, outputTruncated: true };
|
||||
const itemId = typeof redacted.id === "string" ? redacted.id : null;
|
||||
const itemType = typeof redacted.type === "string" ? redacted.type : "unknown";
|
||||
const command = typeof redacted.command === "string" ? redacted.command : null;
|
||||
const cwd = typeof redacted.cwd === "string" ? redacted.cwd : null;
|
||||
const status = typeof redacted.status === "string" ? redacted.status : null;
|
||||
const processId = typeof redacted.processId === "string" || typeof redacted.processId === "number" ? String(redacted.processId) : null;
|
||||
const exitCode = typeof redacted.exitCode === "number" ? redacted.exitCode : null;
|
||||
const durationMs = typeof redacted.durationMs === "number" ? redacted.durationMs : null;
|
||||
const outputSummary = toolCallOutputSummary(redacted);
|
||||
return {
|
||||
method,
|
||||
itemId,
|
||||
type: itemType,
|
||||
toolName: itemType,
|
||||
...(command ? { command } : {}),
|
||||
...(cwd ? { cwd } : {}),
|
||||
...(status ? { status } : {}),
|
||||
...(processId ? { processId } : {}),
|
||||
...(exitCode !== null ? { exitCode } : {}),
|
||||
...(durationMs !== null ? { durationMs } : {}),
|
||||
...(outputSummary ? { outputSummary } : {}),
|
||||
valuesPrinted: false,
|
||||
};
|
||||
}
|
||||
|
||||
function toolCallOutputSummary(item: JsonRecord): string | null {
|
||||
const direct = item.outputSummary ?? item.stdoutSummary ?? item.message;
|
||||
if (typeof direct === "string" && direct.trim().length > 0) return String(boundedTextSummary(direct).text);
|
||||
const summary = item.summary;
|
||||
if (typeof summary === "object" && summary !== null && !Array.isArray(summary) && typeof (summary as JsonRecord).text === "string") {
|
||||
const text = String((summary as JsonRecord).text);
|
||||
if (text.trim().length > 0) return String(boundedTextSummary(text).text);
|
||||
}
|
||||
const aggregated = item.aggregatedOutput;
|
||||
if (typeof aggregated === "string" && aggregated.trim().length > 0) return String(boundedTextSummary(aggregated).text);
|
||||
return null;
|
||||
}
|
||||
|
||||
function withOptionalModel(params: JsonRecord, model: string | undefined): JsonRecord {
|
||||
|
||||
@@ -78,6 +78,10 @@ function normalizeTextPayload(payload: JsonRecord): JsonRecord {
|
||||
|
||||
function normalizeToolCallPayload(payload: JsonRecord): JsonRecord {
|
||||
const redacted = redactJson(payload);
|
||||
if (isCommandExecutionToolCall(redacted)) {
|
||||
const summary = boundedTextSummary(commandExecutionToolCallText(redacted));
|
||||
return { ...redacted, summary, outputBytes: summary.outputBytes, outputTruncated: summary.outputTruncated };
|
||||
}
|
||||
const json = JSON.stringify(redacted);
|
||||
const summary = boundedTextSummary(json);
|
||||
if (summary.outputTruncated !== true) return { ...redacted, summary, outputBytes: summary.outputBytes, outputTruncated: false };
|
||||
@@ -89,3 +93,17 @@ function normalizeToolCallPayload(payload: JsonRecord): JsonRecord {
|
||||
outputTruncated: true,
|
||||
};
|
||||
}
|
||||
|
||||
function isCommandExecutionToolCall(payload: JsonRecord): boolean {
|
||||
return payload.toolName === "commandExecution" || payload.type === "commandExecution";
|
||||
}
|
||||
|
||||
function commandExecutionToolCallText(payload: JsonRecord): string {
|
||||
const method = typeof payload.method === "string" ? payload.method.replace(/^item\//, "") : "tool";
|
||||
const status = typeof payload.status === "string" ? payload.status : method;
|
||||
const command = typeof payload.command === "string" ? payload.command : "commandExecution";
|
||||
const exitCode = typeof payload.exitCode === "number" ? ` exit=${payload.exitCode}` : "";
|
||||
const durationMs = typeof payload.durationMs === "number" ? ` durationMs=${payload.durationMs}` : "";
|
||||
const outputSummary = typeof payload.outputSummary === "string" && payload.outputSummary.trim().length > 0 ? ` output=${payload.outputSummary}` : "";
|
||||
return `commandExecution ${status}: ${command}${exitCode}${durationMs}${outputSummary}`;
|
||||
}
|
||||
|
||||
@@ -117,6 +117,14 @@ const selfTest: SelfTestCase = async (context) => {
|
||||
const live = await createRunWithCommand(client, context, "hello live events", "selftest-live-tool-events", 15_000);
|
||||
const livePromise = runOnce({ managerUrl: server.baseUrl, runId: live.runId, codexCommand: context.fakeCodexCommand, codexArgs: context.fakeCodexArgs, codexHome: context.codexHome, env: { CODEX_HOME: context.codexHome, AGENTRUN_FAKE_CODEX_MODE: "slow-tool-events" }, oneShot: true }) as Promise<JsonRecord>;
|
||||
await waitForEvent(client, live.runId, (event) => event.type === "tool_call" && eventPayload(event).method === "item/started", "live tool_call start event");
|
||||
const liveEvents = await client.get(`/api/v1/runs/${live.runId}/events?afterSeq=0&limit=100`) as { items?: Array<{ type: string; payload: unknown }> };
|
||||
const liveToolStart = (liveEvents.items ?? []).find((event) => event.type === "tool_call" && eventPayload(event).method === "item/started") ?? { payload: {} };
|
||||
assert.equal(eventPayload(liveToolStart).item, undefined, "tool_call started event must not persist raw Codex item JSON");
|
||||
assert.equal(eventPayload(liveToolStart).itemPreview, undefined, "tool_call started event must not persist raw Codex item preview");
|
||||
assert.equal(JSON.stringify(eventPayload(liveToolStart).summary ?? {}).includes("\\\"method\\\":\\\"item/started\\\""), false, "tool_call started event summary must not embed raw protocol JSON");
|
||||
assert.equal(String(eventPayload(liveToolStart).summary ? (eventPayload(liveToolStart).summary as JsonRecord).text ?? "" : "").includes("commandExecution started:"), true, "tool_call started event summary should be human readable");
|
||||
assert.equal(eventPayload(liveToolStart).toolName, "commandExecution");
|
||||
assert.equal(eventPayload(liveToolStart).type, "commandExecution");
|
||||
await waitForEvent(client, live.runId, (event) => event.type === "command_output" && String(eventPayload(event).text ?? "").includes("live output"), "live command output event");
|
||||
const liveResult = await livePromise;
|
||||
assert.equal(liveResult.terminalStatus, "completed", "slow live tool event turn should complete");
|
||||
@@ -132,9 +140,9 @@ const selfTest: SelfTestCase = async (context) => {
|
||||
assert.equal(noisyItems.some((event) => event.type === "backend_status" && eventPayload(event).phase === "account/rateLimits/updated"), false, "rate limit update must not be persisted as backend_status");
|
||||
assert.equal(noisyItems.some((event) => event.type === "backend_status" && eventPayload(event).phase === "warning"), false, "low value warnings must not be persisted as backend_status");
|
||||
assert.equal(noisyItems.some((event) => event.type === "backend_status" && eventPayload(event).phase === "configWarning"), false, "low value config warnings must not be persisted as backend_status");
|
||||
assert.equal(noisyItems.some((event) => event.type === "tool_call" && eventPayloadItem(event).type === "reasoning"), false, "reasoning items must not be persisted as tool_call");
|
||||
assert.ok(noisyItems.some((event) => event.type === "tool_call" && eventPayload(event).method === "item/started" && eventPayloadItem(event).type === "commandExecution"), "real commandExecution tool call should remain visible");
|
||||
assert.equal(noisyItems.some((event) => event.type === "tool_call" && eventPayloadItem(event).type !== "commandExecution"), false, "non-commandExecution item lifecycle must not be persisted as tool_call");
|
||||
assert.equal(noisyItems.some((event) => event.type === "tool_call" && eventPayload(event).type === "reasoning"), false, "reasoning items must not be persisted as tool_call");
|
||||
assert.ok(noisyItems.some((event) => event.type === "tool_call" && eventPayload(event).method === "item/started" && eventPayload(event).type === "commandExecution"), "real commandExecution tool call should remain visible");
|
||||
assert.equal(noisyItems.some((event) => event.type === "tool_call" && eventPayload(event).type !== "commandExecution"), false, "non-commandExecution item lifecycle must not be persisted as tool_call");
|
||||
assert.equal(noisyPhases.includes("backend-turn-running"), false, "backend progress ticks must be summarized instead of persisted as durable trace events");
|
||||
const noisyFinished = noisyItems.find((event) => event.type === "backend_status" && eventPayload(event).phase === "backend-turn-finished");
|
||||
assert.equal(eventPayload(noisyFinished ?? { payload: {} }).progressEventsPrinted, false, "backend-turn-finished must declare progress ticks were not printed as events");
|
||||
|
||||
Reference in New Issue
Block a user