fix: 增加 result tool call 摘要

2026-06-09 21:41:26 +08:00
parent e54b7fe0a4
commit 0dc5a3f966
3 changed files with 72 additions and 2 deletions
@@ -163,6 +163,7 @@ Manager 只承接 HWLAB v0.2 Code Agent 的通用执行事实，不承接 HWLAB
 | `lastSeq` / `eventCount` | 支持调用方增量轮询和 result/trace reconciliation。 |
 | `runId` / `commandId` / `attemptId` | 支持调用方持久关联和问题定位。 |
 | `artifactSummary` | 第一阶段只放有界摘要、字节数、截断标记和必要引用；不内嵌大 stdout/stderr。 |
+| `toolCallSummary` | 输出有界、脱敏的 tool call 状态摘要，至少包含 `count`、`statusCounts`、`exitCodeCounts` 和最近若干条 `items` 的 `method/toolName/type/status/exitCode/command`。消费侧必须用它区分 AgentRun command terminal、agent 内部工具执行和后置诊断，不得用单一 `hwpodExitCode` 覆盖 AgentRun 成功终态。 |

 `assistant_message` partial、`command_output` 存在、stdout 非空、backend transport close 或 idle timeout 都不能单独让 result 进入 `completed`。

@@ -1,6 +1,10 @@
 import type { AgentRunStore } from "./store.js";
 import type { CommandRecord, JsonRecord, JsonValue, RunEvent, RunRecord, RunnerJobRecord, TerminalStatus } from "../common/types.js";
-import { outputBytesFromPayload, outputTruncatedFromPayload } from "../common/output.js";
+import { boundedTextSummary, outputBytesFromPayload, outputTruncatedFromPayload } from "../common/output.js";
+
+const maxToolCallSummaryItems = 40;
+const toolCallCommandLimitChars = 600;
+const toolCallFieldLimitChars = 200;

 export async function buildRunResult(store: AgentRunStore, runId: string, commandId?: string): Promise<JsonRecord> {
  const run = await store.getRun(runId);
@@ -37,6 +41,7 @@ export async function buildRunResult(store: AgentRunStore, runId: string, comman
    lastSeq: events.at(-1)?.seq ?? 0,
    eventCount: events.length,
    artifactSummary: artifactSummary(scopedEvents),
+    toolCallSummary: toolCallSummary(scopedEvents),
    sessionRef: sessionSummary(run),
    resourceBundleRef: resourceBundleSummary(run, events),
    runnerJobCount: jobs.length,
@@ -145,6 +150,61 @@ function artifactSummary(events: RunEvent[]): JsonRecord {
  return { commandOutputEvents, diffEvents, toolCallEvents, outputChars, outputBytes, truncatedEvents: outputTruncatedEvents, outputTruncatedEvents, stdoutSummary: streamSummary.stdout, stderrSummary: streamSummary.stderr };
 }

+function toolCallSummary(events: RunEvent[]): JsonRecord {
+  const toolCallEvents = events.filter((event) => event.type === "tool_call");
+  const statusCounts: Record<string, number> = {};
+  const exitCodeCounts: Record<string, number> = {};
+  for (const event of toolCallEvents) {
+    const status = typeof event.payload.status === "string" && event.payload.status.length > 0 ? event.payload.status : "unknown";
+    statusCounts[status] = (statusCounts[status] ?? 0) + 1;
+    const exitCode = normalizedExitCode(event.payload.exitCode);
+    if (exitCode !== null) exitCodeCounts[String(exitCode)] = (exitCodeCounts[String(exitCode)] ?? 0) + 1;
+  }
+  const window = toolCallEvents.slice(-maxToolCallSummaryItems);
+  return {
+    count: toolCallEvents.length,
+    statusCounts,
+    exitCodeCounts,
+    items: window.map((event) => toolCallItemSummary(event)),
+    itemsOmitted: Math.max(0, toolCallEvents.length - window.length),
+    itemWindow: "latest",
+    valuesPrinted: false,
+  };
+}
+
+function toolCallItemSummary(event: RunEvent): JsonRecord {
+  const payload = event.payload;
+  return {
+    seq: event.seq,
+    createdAt: event.createdAt,
+    method: boundedOptionalString(payload.method, toolCallFieldLimitChars),
+    toolName: boundedOptionalString(payload.toolName, toolCallFieldLimitChars),
+    type: boundedOptionalString(payload.type, toolCallFieldLimitChars),
+    itemId: boundedOptionalString(payload.itemId, toolCallFieldLimitChars),
+    status: boundedOptionalString(payload.status, toolCallFieldLimitChars),
+    exitCode: normalizedExitCode(payload.exitCode),
+    processId: boundedOptionalString(payload.processId, toolCallFieldLimitChars),
+    cwd: boundedOptionalString(payload.cwd, toolCallFieldLimitChars),
+    command: boundedOptionalString(payload.command, toolCallCommandLimitChars),
+    commandTruncated: optionalStringTruncated(payload.command, toolCallCommandLimitChars),
+    valuesPrinted: false,
+  };
+}
+
+function boundedOptionalString(value: JsonValue | undefined, limitChars: number): string | null {
+  if (typeof value !== "string") return null;
+  return boundedTextSummary(value, { limitChars }).text as string;
+}
+
+function optionalStringTruncated(value: JsonValue | undefined, limitChars: number): boolean {
+  if (typeof value !== "string") return false;
+  return boundedTextSummary(value, { limitChars }).outputTruncated === true;
+}
+
+function normalizedExitCode(value: JsonValue | undefined): number | null {
+  return typeof value === "number" && Number.isFinite(value) ? value : null;
+}
+
 function attemptFromEvents(events: RunEvent[]): string | null {
  for (const event of [...events].reverse()) {
    const value = event.payload.attemptId;
@@ -72,7 +72,7 @@ async function assertBackendPreflight(client: ManagerClient): Promise<void> {

 async function assertEventContractAndCompletedSemantics(client: ManagerClient, context: SelfTestContext, managerUrl: string): Promise<void> {
  const happy = await createRunWithCommand(client, context, "hello event contract", "selftest-event-contract", 15_000);
-  await client.post(`/api/v1/runs/${happy.runId}/events`, { type: "tool_call", payload: { method: "selftest/tool", item: { command: "echo ok" } } });
+  await client.post(`/api/v1/runs/${happy.runId}/events`, { type: "tool_call", payload: { method: "item/completed", type: "commandExecution", toolName: "commandExecution", itemId: "call_selftest_hwpod", command: "authorization=Bearer selftest-redacted-value hwpod build --hwpod-id d601-f103-v2", cwd: "/workspace/hwlab", status: "completed", exitCode: 0, processId: "1234" } });
  await client.post(`/api/v1/runs/${happy.runId}/events`, { type: "diff", payload: { filesChanged: 1, summary: "selftest diff" } });
  const result = await runOnce({ managerUrl, runId: happy.runId, codexCommand: context.fakeCodexCommand, codexArgs: context.fakeCodexArgs, codexHome: context.codexHome, env: { CODEX_HOME: context.codexHome }, oneShot: true });
  assert.equal(result.terminalStatus, "completed");
@@ -86,6 +86,15 @@ async function assertEventContractAndCompletedSemantics(client: ManagerClient, c
  assert.equal(envelope.completed, true);
  assert.equal(envelope.terminalStatus, "completed");
  assert.equal(envelope.terminalSource, "command-record");
+  const toolCallSummary = envelope.toolCallSummary as JsonRecord;
+  assert.equal(toolCallSummary.count, 1);
+  assert.deepEqual(toolCallSummary.statusCounts, { completed: 1 });
+  assert.deepEqual(toolCallSummary.exitCodeCounts, { "0": 1 });
+  const toolCallItems = toolCallSummary.items as JsonRecord[];
+  assert.equal(toolCallItems[0]?.status, "completed");
+  assert.equal(toolCallItems[0]?.exitCode, 0);
+  assert.match(String(toolCallItems[0]?.command), /hwpod build/u);
+  assert.doesNotMatch(String(toolCallItems[0]?.command), /selftest-redacted-value/u);
  assertNoSecretLeak({ eventsResponse, envelope });

  const partial = await createRunWithCommand(client, context, "partial should not complete", "selftest-partial-not-completed", 15_000);