fix(code-queue): return terminal read summaries

Merge PR #92 after rebasing onto current master and validating focused Code Queue CLI contracts. Adds bounded terminal review data to codex read without prompt/tool log disclosure.
This commit is contained in:
Lyon
2026-05-23 15:45:23 +08:00
committed by GitHub
parent 7c039a6f67
commit 7d3b06bd22
5 changed files with 429 additions and 6 deletions
@@ -0,0 +1,242 @@
import { codexReadTaskForTest } from "./src/code-queue";
type JsonRecord = Record<string, unknown>;
type FetchCall = { path: string; init?: { method?: string; body?: unknown } };
const promptSecret = "PROMPT_BODY_SHOULD_NOT_LEAK_FROM_CODEX_READ";
const toolSecret = "TOOL_LOG_SHOULD_NOT_LEAK_FROM_CODEX_READ";
const feedbackSecret = "FEEDBACK_PROMPT_SHOULD_NOT_LEAK_FROM_CODEX_READ";
const referenceSecret = "REFERENCE_INJECTION_BASE_PROMPT_SHOULD_NOT_LEAK_FROM_CODEX_READ";
function assertCondition(condition: unknown, message: string, detail: unknown = {}): void {
if (!condition) throw new Error(`${message}: ${JSON.stringify(detail)}`);
}
function asRecord(value: unknown, label: string): JsonRecord {
assertCondition(typeof value === "object" && value !== null && !Array.isArray(value), `${label} must be an object`, value);
return value as JsonRecord;
}
function asArray(value: unknown, label: string): unknown[] {
assertCondition(Array.isArray(value), `${label} must be an array`, value);
return value as unknown[];
}
function taskIdFromPath(path: string): string {
const match = path.match(/\/api\/tasks\/([^/?]+)/u);
return decodeURIComponent(match?.[1] ?? "unknown");
}
function summaryFor(taskId: string, status: "succeeded" | "failed"): JsonRecord {
const failed = status === "failed";
const finalResponse = failed
? "Failure summary: tests did not pass, but the runner reported the exact failing command."
: "Final response: implemented the terminal read fix and validated the focused contract.";
return {
id: taskId,
queueId: "hwlab",
status,
providerId: "D601",
executionMode: "container",
model: "gpt-5.5",
agentPort: "codex",
cwd: "/workspace/unidesk",
reasoningEffort: "medium",
maxAttempts: 2,
currentAttempt: failed ? 2 : 1,
currentMode: failed ? "retry" : "initial",
judgeFailCount: failed ? 2 : 0,
judgeFailRetryLimit: 3,
codexThreadId: `thread_${taskId}`,
activeTurnId: null,
createdAt: "2026-05-22T00:00:00.000Z",
startedAt: "2026-05-22T00:01:00.000Z",
updatedAt: "2026-05-22T00:03:00.000Z",
finishedAt: "2026-05-22T00:03:00.000Z",
timing: { totalMs: 120000 },
initialPrompt: `${promptSecret}\nPlease fix the task.`,
prompt: `${promptSecret}\nPlease fix the task.`,
referenceTaskIds: ["codex_reference_task"],
referenceInjection: {
version: 2,
injectedAt: "2026-05-22T00:00:30.000Z",
basePrompt: referenceSecret,
directReferenceTaskIds: ["codex_reference_task"],
maxRounds: 3,
truncated: false,
itemCount: 1,
items: [{
round: 1,
roundIndex: 0,
taskId: "codex_reference_task",
viaTaskId: null,
status: "succeeded",
providerId: "D601",
executionMode: "container",
model: "gpt-5.5",
cwd: "/workspace/unidesk",
createdAt: "2026-05-21T00:00:00.000Z",
updatedAt: "2026-05-21T00:03:00.000Z",
promptChars: 9999,
finalResponseChars: 1234,
finalResponseAt: "2026-05-21T00:03:00.000Z",
finalResponseSource: "finalResponse",
referenceTaskIds: [],
cliHint: "bun scripts/cli.ts codex task codex_reference_task",
}],
},
lastAssistantMessage: {
at: "2026-05-22T00:03:00.000Z",
seq: 41,
source: "finalResponse",
text: finalResponse,
},
toolSummary: {
count: 3,
returned: 1,
limit: 1,
truncated: true,
items: [{ seq: 39, kind: "ran", outputPreview: toolSecret }],
},
attempts: [
{
index: failed ? 2 : 1,
mode: failed ? "retry" : "initial",
terminalStatus: failed ? "failed" : "completed",
appServerExitCode: failed ? 1 : 0,
appServerSignal: null,
error: failed ? "focused contract failed" : null,
stderrTail: failed ? "bun scripts/code-queue-cli-read-terminal-contract-test.ts failed" : "",
startedAt: "2026-05-22T00:01:00.000Z",
finishedAt: "2026-05-22T00:03:00.000Z",
outputStartSeq: 1,
outputEndSeq: 42,
finalResponse,
finalResponsePreview: finalResponse,
finalResponseChars: finalResponse.length,
feedbackPromptPreview: feedbackSecret,
judge: failed ? { decision: "fail", confidence: 0.88, reason: "contract failed" } : { decision: "complete", confidence: 0.97, reason: "verified" },
runnerErrorClassification: failed ? { class: "test-failure", retryable: false } : null,
},
],
lastJudge: failed ? { decision: "fail", confidence: 0.88, reason: "contract failed", source: "minimax" } : { decision: "complete", confidence: 0.97, reason: "verified", source: "minimax" },
lastError: failed ? "focused contract failed" : null,
cancelRequested: false,
transcriptCount: 12,
transcriptMaxSeq: 42,
outputCount: 42,
retainedOutputCount: 20,
outputMaxSeq: 42,
eventCount: 5,
};
}
function readTerminalFixture(calls: FetchCall[]): (path: string, init?: { method?: string; body?: unknown }) => unknown {
return (path, init) => {
calls.push({ path, init });
const taskId = taskIdFromPath(path);
const status = taskId.includes("failed") ? "failed" : "succeeded";
if (path.includes("/summary")) {
return { ok: true, status: 200, body: { ok: true, summary: summaryFor(taskId, status) } };
}
if (path.includes("/read")) {
return {
ok: true,
status: 200,
body: {
ok: true,
task: {
id: taskId,
queueId: "hwlab",
status,
readAt: "2026-05-22T00:04:00.000Z",
terminalUnread: false,
},
queue: { counts: { [status]: 1 }, unreadTerminal: 0 },
},
};
}
throw new Error(`unexpected path ${path}`);
};
}
function missingTaskFixture(calls: FetchCall[]): (path: string, init?: { method?: string; body?: unknown }) => unknown {
return (path, init) => {
calls.push({ path, init });
return { ok: true, status: 404, body: { ok: false, error: "task not found" } };
};
}
function assertTerminalReadShape(result: unknown, taskId: string, status: "succeeded" | "failed"): void {
const data = asRecord(result, "result");
const task = asRecord(data.task, "task");
const finalResponse = asRecord(task.finalResponse, "finalResponse");
const attempts = asRecord(task.attempts, "attempts");
const lastAttempt = asRecord(attempts.lastAttempt, "lastAttempt");
const read = asRecord(data.read, "read");
const disclosure = asRecord(task.disclosure, "disclosure");
const body = JSON.stringify(result);
assertCondition(task.id === taskId, "read result must preserve task id", task);
assertCondition(task.queueId === "hwlab", "read result must preserve queue id", task);
assertCondition(task.status === status, "read result must preserve terminal status", task);
assertCondition(task.model === "gpt-5.5" && task.providerId === "D601" && task.cwd === "/workspace/unidesk", "read result must preserve stable execution metadata", task);
assertCondition(task.createdAt === "2026-05-22T00:00:00.000Z", "read result must include createdAt", task);
assertCondition(task.startedAt === "2026-05-22T00:01:00.000Z", "read result must include startedAt", task);
assertCondition(task.updatedAt === "2026-05-22T00:03:00.000Z", "read result must include updatedAt", task);
assertCondition(task.finishedAt === "2026-05-22T00:03:00.000Z", "read result must include finishedAt", task);
assertCondition(task.readAt === "2026-05-22T00:04:00.000Z" && task.terminalUnread === false, "read result must preserve read acknowledgement", task);
assertCondition(read.marked === true && read.terminalUnread === false, "top-level read acknowledgement must be stable", read);
assertCondition(String(finalResponse.text ?? "").includes(status === "failed" ? "Failure summary" : "Final response"), "read result must include final response text", finalResponse);
assertCondition(finalResponse.chars === String(finalResponse.text ?? "").length && finalResponse.truncated === false, "read result must include bounded final response preview metadata", finalResponse);
assertCondition(lastAttempt.terminalStatus === (status === "failed" ? "failed" : "completed"), "read result must include terminal attempt summary", lastAttempt);
assertCondition(disclosure.promptIncluded === false && disclosure.toolLogsIncluded === false && disclosure.finalResponseIncluded === true, "read disclosure policy must be explicit", disclosure);
const commands = asRecord(task.commands, "task.commands");
assertCondition(String(commands.detail ?? "") === `bun scripts/cli.ts codex task ${taskId} --detail`, "read result must include detail drill-down command", commands);
assertCondition(String(commands.trace ?? "").includes(`codex task ${taskId} --trace`), "read result must include trace drill-down command", commands);
assertCondition(String(commands.output ?? "").includes(`codex output ${taskId}`), "read result must include output drill-down command", commands);
assertCondition(!body.includes(promptSecret), "read result must not leak prompt body", body);
assertCondition(!body.includes(toolSecret), "read result must not leak tool logs", body);
assertCondition(!body.includes(feedbackSecret), "read result must not leak feedback prompt body", body);
assertCondition(!body.includes(referenceSecret), "read result must not leak reference injection base prompt", body);
if (status === "failed") {
assertCondition(task.lastError === "focused contract failed", "failed read must include lastError", task);
assertCondition(String(asRecord(lastAttempt.stderrTail, "stderrTail").text ?? "").includes("contract-test"), "failed read must include stderr tail", lastAttempt);
assertCondition(asRecord(lastAttempt.runnerErrorClassification, "runnerErrorClassification").class === "test-failure", "failed read must include runner error classification", lastAttempt);
}
}
function run(): JsonRecord {
const succeededCalls: FetchCall[] = [];
const succeeded = codexReadTaskForTest("codex_succeeded_terminal", readTerminalFixture(succeededCalls));
assertTerminalReadShape(succeeded, "codex_succeeded_terminal", "succeeded");
assertCondition(succeededCalls.length === 2, "succeeded read must fetch summary then mark read", succeededCalls);
assertCondition(succeededCalls[0]?.path.includes("/summary?toolLimit=3") && succeededCalls[1]?.path.includes("/read"), "succeeded read call order must preserve body before mutation", succeededCalls);
assertCondition(succeededCalls[1]?.init?.method === "POST", "read mutation must use POST", succeededCalls);
const failedCalls: FetchCall[] = [];
const failed = codexReadTaskForTest("codex_failed_terminal", readTerminalFixture(failedCalls));
assertTerminalReadShape(failed, "codex_failed_terminal", "failed");
const missingCalls: FetchCall[] = [];
let missingError: Error | null = null;
try {
codexReadTaskForTest("codex_missing_terminal", missingTaskFixture(missingCalls));
} catch (error) {
missingError = error instanceof Error ? error : new Error(String(error));
}
assertCondition(missingError !== null && missingError.message.includes("task not found"), "missing task must fail with task not found", missingError?.message);
assertCondition(missingCalls.length === 1 && missingCalls[0]?.path.includes("/summary"), "missing task must not issue read mutation after failed lookup", missingCalls);
return {
ok: true,
checks: [
"succeeded terminal read returns status, queue, timestamps, final response preview, and drill-down commands from summary before marking read",
"failed terminal read returns final response, lastError, stderr tail, and runner classification",
"missing task fails before issuing a read mutation",
"prompt, tool logs, and feedback prompts stay behind progressive disclosure commands",
],
};
}
process.stdout.write(`${JSON.stringify(run(), null, 2)}\n`);