pikasTech-unidesk/scripts/code-queue-cli-read-terminal-contract-test.ts

import { codexReadTaskForTest } from "./src/code-queue";

type JsonRecord = Record<string, unknown>;
type FetchCall = { path: string; init?: { method?: string; body?: unknown } };

const promptSecret = "PROMPT_BODY_SHOULD_NOT_LEAK_FROM_CODEX_READ";
const toolSecret = "TOOL_LOG_SHOULD_NOT_LEAK_FROM_CODEX_READ";
const feedbackSecret = "FEEDBACK_PROMPT_SHOULD_NOT_LEAK_FROM_CODEX_READ";
const referenceSecret = "REFERENCE_INJECTION_BASE_PROMPT_SHOULD_NOT_LEAK_FROM_CODEX_READ";

function assertCondition(condition: unknown, message: string, detail: unknown = {}): void {
  if (!condition) throw new Error(`${message}: ${JSON.stringify(detail)}`);
}

function asRecord(value: unknown, label: string): JsonRecord {
  assertCondition(typeof value === "object" && value !== null && !Array.isArray(value), `${label} must be an object`, value);
  return value as JsonRecord;
}

function asArray(value: unknown, label: string): unknown[] {
  assertCondition(Array.isArray(value), `${label} must be an array`, value);
  return value as unknown[];
}

function taskIdFromPath(path: string): string {
  const match = path.match(/\/api\/tasks\/([^/?]+)/u);
  return decodeURIComponent(match?.[1] ?? "unknown");
}

function summaryFor(taskId: string, status: "succeeded" | "failed"): JsonRecord {
  const failed = status === "failed";
  const finalResponse = failed
    ? "Failure summary: tests did not pass, but the runner reported the exact failing command."
    : "Final response: implemented the terminal read fix and validated the focused contract.";
  return {
    id: taskId,
    queueId: "hwlab",
    status,
    providerId: "D601",
    executionMode: "container",
    model: "gpt-5.5",
    agentPort: "codex",
    cwd: "/workspace/unidesk",
    reasoningEffort: "medium",
    maxAttempts: 2,
    currentAttempt: failed ? 2 : 1,
    currentMode: failed ? "retry" : "initial",
    judgeFailCount: failed ? 2 : 0,
    judgeFailRetryLimit: 3,
    codexThreadId: `thread_${taskId}`,
    activeTurnId: null,
    createdAt: "2026-05-22T00:00:00.000Z",
    startedAt: "2026-05-22T00:01:00.000Z",
    updatedAt: "2026-05-22T00:03:00.000Z",
    finishedAt: "2026-05-22T00:03:00.000Z",
    timing: { totalMs: 120000 },
    initialPrompt: `${promptSecret}\nPlease fix the task.`,
    prompt: `${promptSecret}\nPlease fix the task.`,
    referenceTaskIds: ["codex_reference_task"],
    referenceInjection: {
      version: 2,
      injectedAt: "2026-05-22T00:00:30.000Z",
      basePrompt: referenceSecret,
      directReferenceTaskIds: ["codex_reference_task"],
      maxRounds: 3,
      truncated: false,
      itemCount: 1,
      items: [{
        round: 1,
        roundIndex: 0,
        taskId: "codex_reference_task",
        viaTaskId: null,
        status: "succeeded",
        providerId: "D601",
        executionMode: "container",
        model: "gpt-5.5",
        cwd: "/workspace/unidesk",
        createdAt: "2026-05-21T00:00:00.000Z",
        updatedAt: "2026-05-21T00:03:00.000Z",
        promptChars: 9999,
        finalResponseChars: 1234,
        finalResponseAt: "2026-05-21T00:03:00.000Z",
        finalResponseSource: "finalResponse",
        referenceTaskIds: [],
        cliHint: "bun scripts/cli.ts codex task codex_reference_task",
      }],
    },
    lastAssistantMessage: {
      at: "2026-05-22T00:03:00.000Z",
      seq: 41,
      source: "finalResponse",
      text: finalResponse,
    },
    toolSummary: {
      count: 3,
      returned: 1,
      limit: 1,
      truncated: true,
      items: [{ seq: 39, kind: "ran", outputPreview: toolSecret }],
    },
    attempts: [
      {
        index: failed ? 2 : 1,
        mode: failed ? "retry" : "initial",
        terminalStatus: failed ? "failed" : "completed",
        appServerExitCode: failed ? 1 : 0,
        appServerSignal: null,
        error: failed ? "focused contract failed" : null,
        stderrTail: failed ? "bun scripts/code-queue-cli-read-terminal-contract-test.ts failed" : "",
        startedAt: "2026-05-22T00:01:00.000Z",
        finishedAt: "2026-05-22T00:03:00.000Z",
        outputStartSeq: 1,
        outputEndSeq: 42,
        finalResponse,
        finalResponsePreview: finalResponse,
        finalResponseChars: finalResponse.length,
        feedbackPromptPreview: feedbackSecret,
        judge: failed ? { decision: "fail", confidence: 0.88, reason: "contract failed" } : { decision: "complete", confidence: 0.97, reason: "verified" },
        runnerErrorClassification: failed ? { class: "test-failure", retryable: false } : null,
      },
    ],
    lastJudge: failed ? { decision: "fail", confidence: 0.88, reason: "contract failed", source: "minimax" } : { decision: "complete", confidence: 0.97, reason: "verified", source: "minimax" },
    lastError: failed ? "focused contract failed" : null,
    cancelRequested: false,
    transcriptCount: 12,
    transcriptMaxSeq: 42,
    outputCount: 42,
    retainedOutputCount: 20,
    outputMaxSeq: 42,
    eventCount: 5,
  };
}

function readTerminalFixture(calls: FetchCall[]): (path: string, init?: { method?: string; body?: unknown }) => unknown {
  return (path, init) => {
    calls.push({ path, init });
    const taskId = taskIdFromPath(path);
    const status = taskId.includes("failed") ? "failed" : "succeeded";
    if (path.includes("/summary")) {
      return { ok: true, status: 200, body: { ok: true, summary: summaryFor(taskId, status) } };
    }
    if (path.includes("/read")) {
      return {
        ok: true,
        status: 200,
        body: {
          ok: true,
          task: {
            id: taskId,
            queueId: "hwlab",
            status,
            readAt: "2026-05-22T00:04:00.000Z",
            terminalUnread: false,
          },
          queue: { counts: { [status]: 1 }, unreadTerminal: 0 },
        },
      };
    }
    throw new Error(`unexpected path ${path}`);
  };
}

function missingTaskFixture(calls: FetchCall[]): (path: string, init?: { method?: string; body?: unknown }) => unknown {
  return (path, init) => {
    calls.push({ path, init });
    return { ok: true, status: 404, body: { ok: false, error: "task not found" } };
  };
}

function assertTerminalReadShape(result: unknown, taskId: string, status: "succeeded" | "failed"): void {
  const data = asRecord(result, "result");
  const task = asRecord(data.task, "task");
  const finalResponse = asRecord(task.finalResponse, "finalResponse");
  const attempts = asRecord(task.attempts, "attempts");
  const lastAttempt = asRecord(attempts.lastAttempt, "lastAttempt");
  const read = asRecord(data.read, "read");
  const disclosure = asRecord(task.disclosure, "disclosure");
  const body = JSON.stringify(result);

  assertCondition(task.id === taskId, "read result must preserve task id", task);
  assertCondition(task.queueId === "hwlab", "read result must preserve queue id", task);
  assertCondition(task.status === status, "read result must preserve terminal status", task);
  assertCondition(task.model === "gpt-5.5" && task.providerId === "D601" && task.cwd === "/workspace/unidesk", "read result must preserve stable execution metadata", task);
  assertCondition(task.createdAt === "2026-05-22T00:00:00.000Z", "read result must include createdAt", task);
  assertCondition(task.startedAt === "2026-05-22T00:01:00.000Z", "read result must include startedAt", task);
  assertCondition(task.updatedAt === "2026-05-22T00:03:00.000Z", "read result must include updatedAt", task);
  assertCondition(task.finishedAt === "2026-05-22T00:03:00.000Z", "read result must include finishedAt", task);
  assertCondition(task.readAt === "2026-05-22T00:04:00.000Z" && task.terminalUnread === false, "read result must preserve read acknowledgement", task);
  assertCondition(read.marked === true && read.terminalUnread === false, "top-level read acknowledgement must be stable", read);
  assertCondition(String(finalResponse.text ?? "").includes(status === "failed" ? "Failure summary" : "Final response"), "read result must include final response text", finalResponse);
  assertCondition(finalResponse.chars === String(finalResponse.text ?? "").length && finalResponse.truncated === false, "read result must include bounded final response preview metadata", finalResponse);
  assertCondition(lastAttempt.terminalStatus === (status === "failed" ? "failed" : "completed"), "read result must include terminal attempt summary", lastAttempt);
  assertCondition(disclosure.promptIncluded === false && disclosure.toolLogsIncluded === false && disclosure.finalResponseIncluded === true, "read disclosure policy must be explicit", disclosure);
  const commands = asRecord(task.commands, "task.commands");
  assertCondition(String(commands.detail ?? "") === `bun scripts/cli.ts codex task ${taskId} --detail`, "read result must include detail drill-down command", commands);
  assertCondition(String(commands.trace ?? "").includes(`codex task ${taskId} --trace`), "read result must include trace drill-down command", commands);
  assertCondition(String(commands.output ?? "").includes(`codex output ${taskId}`), "read result must include output drill-down command", commands);
  assertCondition(!body.includes(promptSecret), "read result must not leak prompt body", body);
  assertCondition(!body.includes(toolSecret), "read result must not leak tool logs", body);
  assertCondition(!body.includes(feedbackSecret), "read result must not leak feedback prompt body", body);
  assertCondition(!body.includes(referenceSecret), "read result must not leak reference injection base prompt", body);
  if (status === "failed") {
    assertCondition(task.lastError === "focused contract failed", "failed read must include lastError", task);
    assertCondition(String(asRecord(lastAttempt.stderrTail, "stderrTail").text ?? "").includes("contract-test"), "failed read must include stderr tail", lastAttempt);
    assertCondition(asRecord(lastAttempt.runnerErrorClassification, "runnerErrorClassification").class === "test-failure", "failed read must include runner error classification", lastAttempt);
  }
}

function run(): JsonRecord {
  const succeededCalls: FetchCall[] = [];
  const succeeded = codexReadTaskForTest("codex_succeeded_terminal", readTerminalFixture(succeededCalls));
  assertTerminalReadShape(succeeded, "codex_succeeded_terminal", "succeeded");
  assertCondition(succeededCalls.length === 2, "succeeded read must fetch summary then mark read", succeededCalls);
  assertCondition(succeededCalls[0]?.path.includes("/summary?toolLimit=3") && succeededCalls[1]?.path.includes("/read"), "succeeded read call order must preserve body before mutation", succeededCalls);
  assertCondition(succeededCalls[1]?.init?.method === "POST", "read mutation must use POST", succeededCalls);

  const failedCalls: FetchCall[] = [];
  const failed = codexReadTaskForTest("codex_failed_terminal", readTerminalFixture(failedCalls));
  assertTerminalReadShape(failed, "codex_failed_terminal", "failed");

  const missingCalls: FetchCall[] = [];
  let missingError: Error | null = null;
  try {
    codexReadTaskForTest("codex_missing_terminal", missingTaskFixture(missingCalls));
  } catch (error) {
    missingError = error instanceof Error ? error : new Error(String(error));
  }
  assertCondition(missingError !== null && missingError.message.includes("task not found"), "missing task must fail with task not found", missingError?.message);
  assertCondition(missingCalls.length === 1 && missingCalls[0]?.path.includes("/summary"), "missing task must not issue read mutation after failed lookup", missingCalls);

  return {
    ok: true,
    checks: [
      "succeeded terminal read returns status, queue, timestamps, final response preview, and drill-down commands from summary before marking read",
      "failed terminal read returns final response, lastError, stderr tail, and runner classification",
      "missing task fails before issuing a read mutation",
      "prompt, tool logs, and feedback prompts stay behind progressive disclosure commands",
    ],
  };
}

process.stdout.write(`${JSON.stringify(run(), null, 2)}\n`);