fix(code-queue): return terminal read summaries

Merge PR #92 after rebasing onto current master and validating focused Code Queue CLI contracts. Adds bounded terminal review data to codex read without prompt/tool log disclosure.
2026-05-23 15:45:23 +08:00
parent 7c039a6f67
commit 7d3b06bd22
5 changed files with 429 additions and 6 deletions
@@ -0,0 +1,242 @@
+import { codexReadTaskForTest } from "./src/code-queue";
+
+type JsonRecord = Record<string, unknown>;
+type FetchCall = { path: string; init?: { method?: string; body?: unknown } };
+
+const promptSecret = "PROMPT_BODY_SHOULD_NOT_LEAK_FROM_CODEX_READ";
+const toolSecret = "TOOL_LOG_SHOULD_NOT_LEAK_FROM_CODEX_READ";
+const feedbackSecret = "FEEDBACK_PROMPT_SHOULD_NOT_LEAK_FROM_CODEX_READ";
+const referenceSecret = "REFERENCE_INJECTION_BASE_PROMPT_SHOULD_NOT_LEAK_FROM_CODEX_READ";
+
+function assertCondition(condition: unknown, message: string, detail: unknown = {}): void {
+  if (!condition) throw new Error(`${message}: ${JSON.stringify(detail)}`);
+}
+
+function asRecord(value: unknown, label: string): JsonRecord {
+  assertCondition(typeof value === "object" && value !== null && !Array.isArray(value), `${label} must be an object`, value);
+  return value as JsonRecord;
+}
+
+function asArray(value: unknown, label: string): unknown[] {
+  assertCondition(Array.isArray(value), `${label} must be an array`, value);
+  return value as unknown[];
+}
+
+function taskIdFromPath(path: string): string {
+  const match = path.match(/\/api\/tasks\/([^/?]+)/u);
+  return decodeURIComponent(match?.[1] ?? "unknown");
+}
+
+function summaryFor(taskId: string, status: "succeeded" | "failed"): JsonRecord {
+  const failed = status === "failed";
+  const finalResponse = failed
+    ? "Failure summary: tests did not pass, but the runner reported the exact failing command."
+    : "Final response: implemented the terminal read fix and validated the focused contract.";
+  return {
+    id: taskId,
+    queueId: "hwlab",
+    status,
+    providerId: "D601",
+    executionMode: "container",
+    model: "gpt-5.5",
+    agentPort: "codex",
+    cwd: "/workspace/unidesk",
+    reasoningEffort: "medium",
+    maxAttempts: 2,
+    currentAttempt: failed ? 2 : 1,
+    currentMode: failed ? "retry" : "initial",
+    judgeFailCount: failed ? 2 : 0,
+    judgeFailRetryLimit: 3,
+    codexThreadId: `thread_${taskId}`,
+    activeTurnId: null,
+    createdAt: "2026-05-22T00:00:00.000Z",
+    startedAt: "2026-05-22T00:01:00.000Z",
+    updatedAt: "2026-05-22T00:03:00.000Z",
+    finishedAt: "2026-05-22T00:03:00.000Z",
+    timing: { totalMs: 120000 },
+    initialPrompt: `${promptSecret}\nPlease fix the task.`,
+    prompt: `${promptSecret}\nPlease fix the task.`,
+    referenceTaskIds: ["codex_reference_task"],
+    referenceInjection: {
+      version: 2,
+      injectedAt: "2026-05-22T00:00:30.000Z",
+      basePrompt: referenceSecret,
+      directReferenceTaskIds: ["codex_reference_task"],
+      maxRounds: 3,
+      truncated: false,
+      itemCount: 1,
+      items: [{
+        round: 1,
+        roundIndex: 0,
+        taskId: "codex_reference_task",
+        viaTaskId: null,
+        status: "succeeded",
+        providerId: "D601",
+        executionMode: "container",
+        model: "gpt-5.5",
+        cwd: "/workspace/unidesk",
+        createdAt: "2026-05-21T00:00:00.000Z",
+        updatedAt: "2026-05-21T00:03:00.000Z",
+        promptChars: 9999,
+        finalResponseChars: 1234,
+        finalResponseAt: "2026-05-21T00:03:00.000Z",
+        finalResponseSource: "finalResponse",
+        referenceTaskIds: [],
+        cliHint: "bun scripts/cli.ts codex task codex_reference_task",
+      }],
+    },
+    lastAssistantMessage: {
+      at: "2026-05-22T00:03:00.000Z",
+      seq: 41,
+      source: "finalResponse",
+      text: finalResponse,
+    },
+    toolSummary: {
+      count: 3,
+      returned: 1,
+      limit: 1,
+      truncated: true,
+      items: [{ seq: 39, kind: "ran", outputPreview: toolSecret }],
+    },
+    attempts: [
+      {
+        index: failed ? 2 : 1,
+        mode: failed ? "retry" : "initial",
+        terminalStatus: failed ? "failed" : "completed",
+        appServerExitCode: failed ? 1 : 0,
+        appServerSignal: null,
+        error: failed ? "focused contract failed" : null,
+        stderrTail: failed ? "bun scripts/code-queue-cli-read-terminal-contract-test.ts failed" : "",
+        startedAt: "2026-05-22T00:01:00.000Z",
+        finishedAt: "2026-05-22T00:03:00.000Z",
+        outputStartSeq: 1,
+        outputEndSeq: 42,
+        finalResponse,
+        finalResponsePreview: finalResponse,
+        finalResponseChars: finalResponse.length,
+        feedbackPromptPreview: feedbackSecret,
+        judge: failed ? { decision: "fail", confidence: 0.88, reason: "contract failed" } : { decision: "complete", confidence: 0.97, reason: "verified" },
+        runnerErrorClassification: failed ? { class: "test-failure", retryable: false } : null,
+      },
+    ],
+    lastJudge: failed ? { decision: "fail", confidence: 0.88, reason: "contract failed", source: "minimax" } : { decision: "complete", confidence: 0.97, reason: "verified", source: "minimax" },
+    lastError: failed ? "focused contract failed" : null,
+    cancelRequested: false,
+    transcriptCount: 12,
+    transcriptMaxSeq: 42,
+    outputCount: 42,
+    retainedOutputCount: 20,
+    outputMaxSeq: 42,
+    eventCount: 5,
+  };
+}
+
+function readTerminalFixture(calls: FetchCall[]): (path: string, init?: { method?: string; body?: unknown }) => unknown {
+  return (path, init) => {
+    calls.push({ path, init });
+    const taskId = taskIdFromPath(path);
+    const status = taskId.includes("failed") ? "failed" : "succeeded";
+    if (path.includes("/summary")) {
+      return { ok: true, status: 200, body: { ok: true, summary: summaryFor(taskId, status) } };
+    }
+    if (path.includes("/read")) {
+      return {
+        ok: true,
+        status: 200,
+        body: {
+          ok: true,
+          task: {
+            id: taskId,
+            queueId: "hwlab",
+            status,
+            readAt: "2026-05-22T00:04:00.000Z",
+            terminalUnread: false,
+          },
+          queue: { counts: { [status]: 1 }, unreadTerminal: 0 },
+        },
+      };
+    }
+    throw new Error(`unexpected path ${path}`);
+  };
+}
+
+function missingTaskFixture(calls: FetchCall[]): (path: string, init?: { method?: string; body?: unknown }) => unknown {
+  return (path, init) => {
+    calls.push({ path, init });
+    return { ok: true, status: 404, body: { ok: false, error: "task not found" } };
+  };
+}
+
+function assertTerminalReadShape(result: unknown, taskId: string, status: "succeeded" | "failed"): void {
+  const data = asRecord(result, "result");
+  const task = asRecord(data.task, "task");
+  const finalResponse = asRecord(task.finalResponse, "finalResponse");
+  const attempts = asRecord(task.attempts, "attempts");
+  const lastAttempt = asRecord(attempts.lastAttempt, "lastAttempt");
+  const read = asRecord(data.read, "read");
+  const disclosure = asRecord(task.disclosure, "disclosure");
+  const body = JSON.stringify(result);
+
+  assertCondition(task.id === taskId, "read result must preserve task id", task);
+  assertCondition(task.queueId === "hwlab", "read result must preserve queue id", task);
+  assertCondition(task.status === status, "read result must preserve terminal status", task);
+  assertCondition(task.model === "gpt-5.5" && task.providerId === "D601" && task.cwd === "/workspace/unidesk", "read result must preserve stable execution metadata", task);
+  assertCondition(task.createdAt === "2026-05-22T00:00:00.000Z", "read result must include createdAt", task);
+  assertCondition(task.startedAt === "2026-05-22T00:01:00.000Z", "read result must include startedAt", task);
+  assertCondition(task.updatedAt === "2026-05-22T00:03:00.000Z", "read result must include updatedAt", task);
+  assertCondition(task.finishedAt === "2026-05-22T00:03:00.000Z", "read result must include finishedAt", task);
+  assertCondition(task.readAt === "2026-05-22T00:04:00.000Z" && task.terminalUnread === false, "read result must preserve read acknowledgement", task);
+  assertCondition(read.marked === true && read.terminalUnread === false, "top-level read acknowledgement must be stable", read);
+  assertCondition(String(finalResponse.text ?? "").includes(status === "failed" ? "Failure summary" : "Final response"), "read result must include final response text", finalResponse);
+  assertCondition(finalResponse.chars === String(finalResponse.text ?? "").length && finalResponse.truncated === false, "read result must include bounded final response preview metadata", finalResponse);
+  assertCondition(lastAttempt.terminalStatus === (status === "failed" ? "failed" : "completed"), "read result must include terminal attempt summary", lastAttempt);
+  assertCondition(disclosure.promptIncluded === false && disclosure.toolLogsIncluded === false && disclosure.finalResponseIncluded === true, "read disclosure policy must be explicit", disclosure);
+  const commands = asRecord(task.commands, "task.commands");
+  assertCondition(String(commands.detail ?? "") === `bun scripts/cli.ts codex task ${taskId} --detail`, "read result must include detail drill-down command", commands);
+  assertCondition(String(commands.trace ?? "").includes(`codex task ${taskId} --trace`), "read result must include trace drill-down command", commands);
+  assertCondition(String(commands.output ?? "").includes(`codex output ${taskId}`), "read result must include output drill-down command", commands);
+  assertCondition(!body.includes(promptSecret), "read result must not leak prompt body", body);
+  assertCondition(!body.includes(toolSecret), "read result must not leak tool logs", body);
+  assertCondition(!body.includes(feedbackSecret), "read result must not leak feedback prompt body", body);
+  assertCondition(!body.includes(referenceSecret), "read result must not leak reference injection base prompt", body);
+  if (status === "failed") {
+    assertCondition(task.lastError === "focused contract failed", "failed read must include lastError", task);
+    assertCondition(String(asRecord(lastAttempt.stderrTail, "stderrTail").text ?? "").includes("contract-test"), "failed read must include stderr tail", lastAttempt);
+    assertCondition(asRecord(lastAttempt.runnerErrorClassification, "runnerErrorClassification").class === "test-failure", "failed read must include runner error classification", lastAttempt);
+  }
+}
+
+function run(): JsonRecord {
+  const succeededCalls: FetchCall[] = [];
+  const succeeded = codexReadTaskForTest("codex_succeeded_terminal", readTerminalFixture(succeededCalls));
+  assertTerminalReadShape(succeeded, "codex_succeeded_terminal", "succeeded");
+  assertCondition(succeededCalls.length === 2, "succeeded read must fetch summary then mark read", succeededCalls);
+  assertCondition(succeededCalls[0]?.path.includes("/summary?toolLimit=3") && succeededCalls[1]?.path.includes("/read"), "succeeded read call order must preserve body before mutation", succeededCalls);
+  assertCondition(succeededCalls[1]?.init?.method === "POST", "read mutation must use POST", succeededCalls);
+
+  const failedCalls: FetchCall[] = [];
+  const failed = codexReadTaskForTest("codex_failed_terminal", readTerminalFixture(failedCalls));
+  assertTerminalReadShape(failed, "codex_failed_terminal", "failed");
+
+  const missingCalls: FetchCall[] = [];
+  let missingError: Error | null = null;
+  try {
+    codexReadTaskForTest("codex_missing_terminal", missingTaskFixture(missingCalls));
+  } catch (error) {
+    missingError = error instanceof Error ? error : new Error(String(error));
+  }
+  assertCondition(missingError !== null && missingError.message.includes("task not found"), "missing task must fail with task not found", missingError?.message);
+  assertCondition(missingCalls.length === 1 && missingCalls[0]?.path.includes("/summary"), "missing task must not issue read mutation after failed lookup", missingCalls);
+
+  return {
+    ok: true,
+    checks: [
+      "succeeded terminal read returns status, queue, timestamps, final response preview, and drill-down commands from summary before marking read",
+      "failed terminal read returns final response, lastError, stderr tail, and runner classification",
+      "missing task fails before issuing a read mutation",
+      "prompt, tool logs, and feedback prompts stay behind progressive disclosure commands",
+    ],
+  };
+}
+
+process.stdout.write(`${JSON.stringify(run(), null, 2)}\n`);