diff --git a/docs/reference/spec-v01-agentrun-mgr.md b/docs/reference/spec-v01-agentrun-mgr.md index 04d931e..4bd1f71 100644 --- a/docs/reference/spec-v01-agentrun-mgr.md +++ b/docs/reference/spec-v01-agentrun-mgr.md @@ -163,6 +163,7 @@ Manager 只承接 HWLAB v0.2 Code Agent 的通用执行事实,不承接 HWLAB | `lastSeq` / `eventCount` | 支持调用方增量轮询和 result/trace reconciliation。 | | `runId` / `commandId` / `attemptId` | 支持调用方持久关联和问题定位。 | | `artifactSummary` | 第一阶段只放有界摘要、字节数、截断标记和必要引用;不内嵌大 stdout/stderr。 | +| `toolCallSummary` | 输出有界、脱敏的 tool call 状态摘要,至少包含 `count`、`statusCounts`、`exitCodeCounts` 和最近若干条 `items` 的 `method/toolName/type/status/exitCode/command`。消费侧必须用它区分 AgentRun command terminal、agent 内部工具执行和后置诊断,不得用单一 `hwpodExitCode` 覆盖 AgentRun 成功终态。 | `assistant_message` partial、`command_output` 存在、stdout 非空、backend transport close 或 idle timeout 都不能单独让 result 进入 `completed`。 diff --git a/src/mgr/result.ts b/src/mgr/result.ts index 4ac0eba..54cba7f 100644 --- a/src/mgr/result.ts +++ b/src/mgr/result.ts @@ -1,6 +1,10 @@ import type { AgentRunStore } from "./store.js"; import type { CommandRecord, JsonRecord, JsonValue, RunEvent, RunRecord, RunnerJobRecord, TerminalStatus } from "../common/types.js"; -import { outputBytesFromPayload, outputTruncatedFromPayload } from "../common/output.js"; +import { boundedTextSummary, outputBytesFromPayload, outputTruncatedFromPayload } from "../common/output.js"; + +const maxToolCallSummaryItems = 40; +const toolCallCommandLimitChars = 600; +const toolCallFieldLimitChars = 200; export async function buildRunResult(store: AgentRunStore, runId: string, commandId?: string): Promise { const run = await store.getRun(runId); @@ -37,6 +41,7 @@ export async function buildRunResult(store: AgentRunStore, runId: string, comman lastSeq: events.at(-1)?.seq ?? 0, eventCount: events.length, artifactSummary: artifactSummary(scopedEvents), + toolCallSummary: toolCallSummary(scopedEvents), sessionRef: sessionSummary(run), resourceBundleRef: resourceBundleSummary(run, events), runnerJobCount: jobs.length, @@ -145,6 +150,61 @@ function artifactSummary(events: RunEvent[]): JsonRecord { return { commandOutputEvents, diffEvents, toolCallEvents, outputChars, outputBytes, truncatedEvents: outputTruncatedEvents, outputTruncatedEvents, stdoutSummary: streamSummary.stdout, stderrSummary: streamSummary.stderr }; } +function toolCallSummary(events: RunEvent[]): JsonRecord { + const toolCallEvents = events.filter((event) => event.type === "tool_call"); + const statusCounts: Record = {}; + const exitCodeCounts: Record = {}; + for (const event of toolCallEvents) { + const status = typeof event.payload.status === "string" && event.payload.status.length > 0 ? event.payload.status : "unknown"; + statusCounts[status] = (statusCounts[status] ?? 0) + 1; + const exitCode = normalizedExitCode(event.payload.exitCode); + if (exitCode !== null) exitCodeCounts[String(exitCode)] = (exitCodeCounts[String(exitCode)] ?? 0) + 1; + } + const window = toolCallEvents.slice(-maxToolCallSummaryItems); + return { + count: toolCallEvents.length, + statusCounts, + exitCodeCounts, + items: window.map((event) => toolCallItemSummary(event)), + itemsOmitted: Math.max(0, toolCallEvents.length - window.length), + itemWindow: "latest", + valuesPrinted: false, + }; +} + +function toolCallItemSummary(event: RunEvent): JsonRecord { + const payload = event.payload; + return { + seq: event.seq, + createdAt: event.createdAt, + method: boundedOptionalString(payload.method, toolCallFieldLimitChars), + toolName: boundedOptionalString(payload.toolName, toolCallFieldLimitChars), + type: boundedOptionalString(payload.type, toolCallFieldLimitChars), + itemId: boundedOptionalString(payload.itemId, toolCallFieldLimitChars), + status: boundedOptionalString(payload.status, toolCallFieldLimitChars), + exitCode: normalizedExitCode(payload.exitCode), + processId: boundedOptionalString(payload.processId, toolCallFieldLimitChars), + cwd: boundedOptionalString(payload.cwd, toolCallFieldLimitChars), + command: boundedOptionalString(payload.command, toolCallCommandLimitChars), + commandTruncated: optionalStringTruncated(payload.command, toolCallCommandLimitChars), + valuesPrinted: false, + }; +} + +function boundedOptionalString(value: JsonValue | undefined, limitChars: number): string | null { + if (typeof value !== "string") return null; + return boundedTextSummary(value, { limitChars }).text as string; +} + +function optionalStringTruncated(value: JsonValue | undefined, limitChars: number): boolean { + if (typeof value !== "string") return false; + return boundedTextSummary(value, { limitChars }).outputTruncated === true; +} + +function normalizedExitCode(value: JsonValue | undefined): number | null { + return typeof value === "number" && Number.isFinite(value) ? value : null; +} + function attemptFromEvents(events: RunEvent[]): string | null { for (const event of [...events].reverse()) { const value = event.payload.attemptId; diff --git a/src/selftest/cases/60-hwlab-baseline-contract.ts b/src/selftest/cases/60-hwlab-baseline-contract.ts index 3993443..c18275c 100644 --- a/src/selftest/cases/60-hwlab-baseline-contract.ts +++ b/src/selftest/cases/60-hwlab-baseline-contract.ts @@ -72,7 +72,7 @@ async function assertBackendPreflight(client: ManagerClient): Promise { async function assertEventContractAndCompletedSemantics(client: ManagerClient, context: SelfTestContext, managerUrl: string): Promise { const happy = await createRunWithCommand(client, context, "hello event contract", "selftest-event-contract", 15_000); - await client.post(`/api/v1/runs/${happy.runId}/events`, { type: "tool_call", payload: { method: "selftest/tool", item: { command: "echo ok" } } }); + await client.post(`/api/v1/runs/${happy.runId}/events`, { type: "tool_call", payload: { method: "item/completed", type: "commandExecution", toolName: "commandExecution", itemId: "call_selftest_hwpod", command: "authorization=Bearer selftest-redacted-value hwpod build --hwpod-id d601-f103-v2", cwd: "/workspace/hwlab", status: "completed", exitCode: 0, processId: "1234" } }); await client.post(`/api/v1/runs/${happy.runId}/events`, { type: "diff", payload: { filesChanged: 1, summary: "selftest diff" } }); const result = await runOnce({ managerUrl, runId: happy.runId, codexCommand: context.fakeCodexCommand, codexArgs: context.fakeCodexArgs, codexHome: context.codexHome, env: { CODEX_HOME: context.codexHome }, oneShot: true }); assert.equal(result.terminalStatus, "completed"); @@ -86,6 +86,15 @@ async function assertEventContractAndCompletedSemantics(client: ManagerClient, c assert.equal(envelope.completed, true); assert.equal(envelope.terminalStatus, "completed"); assert.equal(envelope.terminalSource, "command-record"); + const toolCallSummary = envelope.toolCallSummary as JsonRecord; + assert.equal(toolCallSummary.count, 1); + assert.deepEqual(toolCallSummary.statusCounts, { completed: 1 }); + assert.deepEqual(toolCallSummary.exitCodeCounts, { "0": 1 }); + const toolCallItems = toolCallSummary.items as JsonRecord[]; + assert.equal(toolCallItems[0]?.status, "completed"); + assert.equal(toolCallItems[0]?.exitCode, 0); + assert.match(String(toolCallItems[0]?.command), /hwpod build/u); + assert.doesNotMatch(String(toolCallItems[0]?.command), /selftest-redacted-value/u); assertNoSecretLeak({ eventsResponse, envelope }); const partial = await createRunWithCommand(client, context, "partial should not complete", "selftest-partial-not-completed", 15_000);