Files
pikasTech-unidesk/scripts/code-queue-supervisor-disclosure-contract-test.ts
T

388 lines
25 KiB
TypeScript

import { codexTasksQueryForTest } from "./src/code-queue";
type JsonRecord = Record<string, unknown>;
function assertCondition(condition: unknown, message: string, detail: JsonRecord = {}): void {
if (!condition) throw new Error(`${message}: ${JSON.stringify(detail)}`);
}
function asRecord(value: unknown): JsonRecord {
assertCondition(typeof value === "object" && value !== null && !Array.isArray(value), "expected JSON object", { value });
return value as JsonRecord;
}
function asArray(value: unknown): unknown[] {
assertCondition(Array.isArray(value), "expected JSON array", { value });
return value as unknown[];
}
function longText(marker: string, repeat: number): string {
return Array.from({ length: repeat }, (_, index) => `${marker}-${index} #132 Gate report diagnostic review evidence direct workbench fix`).join("\n");
}
function manyIds(prefix: string, count: number): string[] {
return Array.from({ length: count }, (_, index) => `${prefix}-${String(index + 1).padStart(3, "0")}`);
}
function task(id: string, status: string, updatedAt: string, readAt: string | null = null): JsonRecord {
return {
id,
queueId: "default",
status,
currentAttempt: status === "running" ? 2 : 1,
updatedAt,
finishedAt: status === "succeeded" ? updatedAt : null,
readAt,
prompt: longText(`prompt-${id}`, 90),
basePrompt: longText(`base-${id}`, 70),
displayPrompt: longText(`display-${id}`, 80),
lastAssistantMessage: {
at: updatedAt,
seq: 99,
source: "assistant",
text: longText(`assistant-${id}`, 120),
},
};
}
function fixtureResponse(path: string): JsonRecord {
if (path.includes("/summary")) {
const taskId = decodeURIComponent(path.split("/api/tasks/")[1]?.split("/")[0] ?? "unknown");
return {
ok: true,
status: 200,
body: {
ok: true,
summary: {
id: taskId,
queueId: "default",
status: taskId.includes("running") ? "running" : "succeeded",
currentAttempt: 1,
maxAttempts: 99,
prompt: longText(`summary-prompt-${taskId}`, 100),
basePrompt: longText(`summary-base-${taskId}`, 80),
lastAssistantMessage: {
at: "2026-05-22T00:00:00.000Z",
seq: 120,
source: "finalResponse",
text: longText(`summary-assistant-${taskId}`, 130),
},
commands: {
show: `bun scripts/cli.ts codex task ${taskId}`,
trace: `bun scripts/cli.ts codex task ${taskId} --trace --tail --limit 80`,
},
},
},
};
}
assertCondition(path.startsWith("/api/microservices/code-queue/proxy/api/tasks/overview"), "unexpected path", { path });
return {
ok: true,
status: 200,
body: {
ok: true,
queue: {
counts: {
running: 15,
judging: 0,
queued: 4,
retry_wait: 1,
succeeded: 13,
},
maxActiveQueues: 12,
executionDiagnostics: {
state: "split-brain",
splitBrain: true,
effectiveLiveness: "live",
splitBrainLive: true,
recommendedAction: "continue-supervision",
livenessSummary: longText("split-brain-live-summary", 45),
databaseActiveTaskCount: 80,
databaseActiveTaskIds: manyIds("db-active", 80),
schedulerActiveRunSlotCount: 30,
schedulerActiveTaskIds: manyIds("scheduler-active", 30),
activeHeartbeatCount: 80,
activeHeartbeatTaskIds: manyIds("active-heartbeat", 80),
heartbeatFreshTaskIds: manyIds("fresh-heartbeat", 80),
heartbeatExpiredTaskIds: [],
heartbeatMissingTaskIds: [],
staleRecoveryCandidateTaskIds: [],
heartbeatRiskTaskIds: [],
traceGapTaskIds: manyIds("trace-gap", 60),
traceGapNotStaleTaskIds: manyIds("trace-gap-fresh", 40),
reasons: Array.from({ length: 24 }, (_, index) => longText(`diagnostic-reason-${index + 1}`, 10)),
oaPublisher: {
pendingTaskIds: manyIds("oa-pending", 80),
lastError: longText("oa-publisher-error", 60),
},
},
},
pagination: {
limit: 200,
returned: 15,
total: 33,
hasMore: false,
nextBeforeId: null,
includeActive: true,
},
tasks: [
task("task-running", "running", "2026-05-22T00:09:00.000Z"),
task("task-succeeded-1", "succeeded", "2026-05-22T00:08:00.000Z"),
task("task-succeeded-2", "succeeded", "2026-05-22T00:07:00.000Z"),
task("task-succeeded-3", "succeeded", "2026-05-22T00:06:00.000Z"),
task("task-succeeded-4", "succeeded", "2026-05-22T00:05:00.000Z"),
task("task-succeeded-5", "succeeded", "2026-05-22T00:04:00.000Z"),
task("task-succeeded-6", "succeeded", "2026-05-22T00:03:00.000Z"),
task("task-succeeded-7", "succeeded", "2026-05-22T00:02:00.000Z"),
task("task-read-1", "succeeded", "2026-05-22T00:01:50.000Z", "2026-05-22T00:01:55.000Z"),
task("task-read-2", "succeeded", "2026-05-22T00:01:40.000Z", "2026-05-22T00:01:45.000Z"),
task("task-read-3", "succeeded", "2026-05-22T00:01:30.000Z", "2026-05-22T00:01:35.000Z"),
task("task-read-4", "succeeded", "2026-05-22T00:01:20.000Z", "2026-05-22T00:01:25.000Z"),
task("task-read-5", "succeeded", "2026-05-22T00:01:10.000Z", "2026-05-22T00:01:15.000Z"),
task("task-read-6", "succeeded", "2026-05-22T00:01:05.000Z", "2026-05-22T00:01:09.000Z"),
task("task-queued", "queued", "2026-05-22T00:01:00.000Z"),
],
},
};
}
function manyRunningFixtureResponse(path: string): JsonRecord {
if (path.includes("/summary")) return fixtureResponse(path);
assertCondition(path.startsWith("/api/microservices/code-queue/proxy/api/tasks/overview"), "unexpected path", { path });
const tasks = Array.from({ length: 40 }, (_, index) => task(
`task-running-${String(index + 1).padStart(2, "0")}`,
"running",
`2026-05-22T00:${String(59 - index).padStart(2, "0")}:00.000Z`,
));
return {
ok: true,
status: 200,
body: {
ok: true,
queue: {
counts: {
running: 40,
judging: 0,
queued: 7,
retry_wait: 2,
},
maxActiveQueues: 50,
executionDiagnostics: {
state: "healthy",
databaseActiveTaskCount: 40,
databaseActiveTaskIds: manyIds("running-active", 40),
activeHeartbeatCount: 40,
activeHeartbeatTaskIds: manyIds("running-heartbeat", 40),
heartbeatFreshTaskIds: manyIds("running-fresh", 40),
},
},
pagination: {
limit: 200,
returned: 40,
total: 40,
hasMore: false,
nextBeforeId: null,
includeActive: true,
},
tasks,
},
};
}
function splitBrainLiveSupervisorFixtureResponse(path: string): JsonRecord {
if (path.includes("/summary")) return fixtureResponse(path);
assertCondition(path.startsWith("/api/microservices/code-queue/proxy/api/tasks/overview"), "unexpected path", { path });
const liveTaskIds = manyIds("split-live", 8);
const tasks = liveTaskIds.map((taskId, index) => task(
taskId,
"running",
`2026-05-22T01:${String(50 - index).padStart(2, "0")}:00.000Z`,
));
return {
ok: true,
status: 200,
body: {
ok: true,
queue: {
counts: { running: 8 },
activeQueueIds: [],
activeTaskIds: [],
activeRunSlotCount: 0,
databaseActiveTaskCount: 8,
executionDiagnostics: {
state: "split-brain",
splitBrain: true,
splitBrainLive: true,
effectiveLiveness: "live",
recommendedAction: "continue-supervision",
databaseActiveTaskCount: 8,
databaseActiveTaskIds: liveTaskIds,
schedulerActiveRunSlotCount: 0,
schedulerActiveTaskIds: [],
activeHeartbeatCount: 8,
activeHeartbeatTaskIds: liveTaskIds,
heartbeatFreshTaskIds: liveTaskIds,
heartbeatExpiredTaskIds: [],
heartbeatMissingTaskIds: [],
staleRecoveryCandidateTaskIds: [],
heartbeatRiskTaskIds: [],
},
},
pagination: {
limit: 200,
returned: 8,
total: 8,
hasMore: false,
nextBeforeId: null,
includeActive: true,
},
tasks,
},
};
}
export function runCodeQueueSupervisorDisclosureContract(): JsonRecord {
const supervisor = codexTasksQueryForTest(["--view", "supervisor", "--limit", "20"], fixtureResponse);
const cappedLimit = codexTasksQueryForTest(["--view", "supervisor", "--limit", "260"], fixtureResponse);
const full = codexTasksQueryForTest(["--view", "full", "--limit", "20"], fixtureResponse);
const cappedFull = codexTasksQueryForTest(["--view", "full", "--limit", "260"], fixtureResponse);
const runningFiltered = codexTasksQueryForTest(["--status", "running", "--limit", "40"], manyRunningFixtureResponse);
const unreadFiltered = codexTasksQueryForTest(["--unread", "--limit", "20"], fixtureResponse);
const splitBrainLive = codexTasksQueryForTest(["--view", "supervisor", "--limit", "20"], splitBrainLiveSupervisorFixtureResponse);
const supervisorBody = JSON.stringify(supervisor);
const fullBody = JSON.stringify(full);
const runningFilteredBody = JSON.stringify(runningFiltered);
const unreadFilteredBody = JSON.stringify(unreadFiltered);
const supervisorData = asRecord(supervisor);
const supervisorView = asRecord(supervisorData.supervisor);
const cappedSupervisorView = asRecord(asRecord(cappedLimit).supervisor);
const runningFilteredView = asRecord(asRecord(runningFiltered).supervisor);
const runningFilteredSection = asRecord(runningFilteredView.running);
const unreadFilteredView = asRecord(asRecord(unreadFiltered).supervisor);
const unreadFilteredSection = asRecord(unreadFilteredView.completedUnread);
const disclosure = asRecord(supervisorView.disclosure);
const runningItem = asRecord(asArray(asRecord(supervisorView.running).items)[0]);
const recentCompleted = asRecord(supervisorView.recentCompleted);
const recentItems = asArray(recentCompleted.items);
const fullItem = asRecord(asArray(asRecord(asRecord(full).tasks).items)[0]);
const completedUnread = asRecord(supervisorView.completedUnread);
const fullTasks = asRecord(asRecord(full).tasks);
const cappedFullTasks = asRecord(asRecord(cappedFull).tasks);
const diagnostics = asRecord(supervisorView.executionDiagnostics);
const filters = asRecord(supervisorView.filters);
const activeRunning = asRecord(supervisorView.activeRunning);
const activeRunningRowPage = asRecord(activeRunning.rowPage);
const activeRunningRedline = asRecord(activeRunning.redline);
const activeRunningCommands = asRecord(activeRunning.commands);
const counts = asRecord(supervisorView.counts);
const outputBudget = asRecord(asRecord(disclosure.outputBudget));
const listBudget = asRecord(diagnostics.listBudget);
const omittedCounts = asRecord(listBudget.omittedCounts);
const splitBrainLiveView = asRecord(asRecord(splitBrainLive).supervisor);
const splitBrainLiveActivity = asRecord(splitBrainLiveView.activity);
const splitBrainLiveConcurrency = asRecord(splitBrainLiveView.commanderConcurrency);
const splitBrainLiveCounts = asRecord(splitBrainLiveView.counts);
const cappedFilters = asRecord(cappedSupervisorView.filters);
const cappedSource = asRecord(cappedSupervisorView.source);
const cappedLimitPolicy = asRecord(asRecord(cappedSupervisorView.disclosure).limitPolicy);
const cappedCommands = asRecord(cappedSupervisorView.commands);
const cappedFullFilters = asRecord(cappedFullTasks.filters);
const cappedFullSource = asRecord(cappedFullTasks.source);
assertCondition(supervisorBody.length < fullBody.length * 0.55, "supervisor output should be materially smaller than full output", { supervisorChars: supervisorBody.length, fullChars: fullBody.length });
assertCondition(supervisorBody.length < 45_000, "supervisor output should remain bounded even with large diagnostics", { supervisorChars: supervisorBody.length });
assertCondition(cappedFilters.requestedLimit === 260 && cappedFilters.effectiveLimit === 100 && cappedFilters.limit === 100 && cappedFilters.limitCapped === true, "supervisor filters should disclose requested and capped effective limit", cappedFilters);
assertCondition(cappedSource.requestedLimit === 200 && cappedSource.effectiveLimit === 200 && cappedSource.limit === 200 && cappedSource.returned === 15, "supervisor source should disclose independent overview fetch limit", cappedSource);
assertCondition(cappedLimitPolicy.requestedLimit === 260 && cappedLimitPolicy.effectiveLimit === 100 && cappedLimitPolicy.sourceFetchLimit === 200 && cappedLimitPolicy.sourceEffectiveLimit === 200, "supervisor disclosure should summarize requested/effective/source limits", cappedLimitPolicy);
assertCondition(String(cappedCommands.refresh ?? "").includes("--limit 260") && String(cappedCommands.byStatus ?? "").includes("--limit 260"), "supervisor follow-up commands should preserve requested limit", cappedCommands);
assertCondition(cappedFullFilters.requestedLimit === 260 && cappedFullFilters.effectiveLimit === 100 && cappedFullFilters.limitCapped === true, "full view filters should disclose capped requested limit", cappedFullFilters);
assertCondition(cappedFullSource.requestedLimit === 200 && cappedFullSource.effectiveLimit === 200, "full view source should disclose independent overview fetch limit", cappedFullSource);
assertCondition(recentItems.length === 3, "recentCompleted should be capped below --limit by default", { returned: recentItems.length });
assertCondition(asArray(completedUnread.items).length === 3, "completedUnread should be locally paged and kept separate from recentCompleted", completedUnread);
assertCondition(recentItems.every((item) => asRecord(item).unreadTerminal === false), "recentCompleted should not duplicate unread terminal tasks", { recentItems });
assertCondition(diagnostics.databaseActiveTaskIds === undefined, "supervisor diagnostics should not expose verbose databaseActiveTaskIds by default", diagnostics);
assertCondition(omittedCounts.databaseActiveTaskIds === 77, "diagnostic omitted counts should preserve full visibility metadata", omittedCounts);
assertCondition(diagnostics.effectiveLiveness === "live", "supervisor liveness summary should keep split-brain live explicit", diagnostics);
assertCondition(diagnostics.recommendedAction === "continue-supervision", "supervisor liveness summary should recommend continued supervision", diagnostics);
assertCondition(diagnostics.splitBrainLive === true, "supervisor liveness summary should mark splitBrainLive", diagnostics);
assertCondition(diagnostics.activeHeartbeatCount === 80, "supervisor liveness summary should foreground active heartbeat count", diagnostics);
assertCondition(asArray(diagnostics.heartbeatFreshTaskIds).length === 3, "supervisor diagnostics should keep heartbeatFreshTaskIds bounded", diagnostics);
assertCondition(String(diagnostics.interpretation ?? "").includes("continue supervision"), "supervisor liveness interpretation should not imply scheduler stoppage", diagnostics);
assertCondition(asArray(diagnostics.reasons).length === 2, "diagnostic reasons should be capped", diagnostics);
assertCondition(diagnostics.livenessSummary === undefined, "supervisor diagnostics should omit liveness summary preview by default", diagnostics);
assertCondition(listBudget.truncated === true && typeof listBudget.rawCommand === "string", "diagnostic list budget should disclose raw command", listBudget);
assertCondition(asArray(runningItem.issues).includes("#132"), "supervisor row should expose issue refs for triage", runningItem);
assertCondition(runningItem.status === "running", "fixture running row should keep raw scheduler status", runningItem);
assertCondition(String(runningItem.statusLabel ?? "").includes("awaiting terminal/judge"), "running finalResponse row should expose awaiting terminal/judge label", runningItem);
assertCondition(runningItem.awaitingTerminalJudge === true && runningItem.closeoutState === "awaiting-terminal-or-judge", "running finalResponse row should be marked as not ready for closeout", runningItem);
assertCondition(String(runningItem.closeoutHint ?? "").includes("wait for terminal status and judge"), "running finalResponse row should explain commander interpretation", runningItem);
assertCondition(Number(runningItem.promptChars) > String(runningItem.prompt ?? "").length && runningItem.promptTruncated === true, "supervisor prompt must be a short flat preview with original char count", runningItem);
assertCondition(Number(runningItem.lastChars) > String(runningItem.last ?? "").length && runningItem.lastTruncated === true, "supervisor body must be a short flat preview with original char count", runningItem);
assertCondition(runningItem.commands === undefined && runningItem.promptPreview === undefined && runningItem.lastAssistantMessage === undefined, "supervisor rows must not expose repeated commands or legacy long list fields", runningItem);
assertCondition(asRecord(fullItem.promptPreview).chars !== undefined && fullItem.lastAssistantMessage !== undefined, "full view must retain detailed task row fields", fullItem);
assertCondition(fullItem.status === "running" && String(fullItem.statusLabel ?? "").includes("awaiting terminal/judge"), "full view should keep raw status while exposing derived closeout label", fullItem);
assertCondition(fullItem.awaitingTerminalJudge === true && fullItem.closeoutState === "awaiting-terminal-or-judge", "full view should expose awaiting terminal/judge state", fullItem);
assertCondition(fullTasks.returned === 15, "full view must not inherit supervisor recentCompleted cap", fullTasks);
assertCondition(filters.requestedLimit === 20 && filters.limit === 20 && filters.limitCapped === false, "supervisor filters should disclose requested vs effective limit", filters);
assertCondition(outputBudget.requestedLimit === 20 && outputBudget.effectiveLimit === 20 && outputBudget.sectionReturnedLimit === 3, "supervisor must expose output budget metadata", outputBudget);
assertCondition(activeRunning.count === 15 && activeRunning.exact === true && activeRunning.source === "queue-summary-counts", "activeRunning should expose exact running+judging count from queue summary", activeRunning);
assertCondition(activeRunningRowPage.returned === 1 && activeRunningRowPage.returnedLimit === 3 && String(activeRunningRowPage.distinction ?? "").includes("row page"), "activeRunning row page should distinguish returned rows from active count", activeRunningRowPage);
assertCondition(activeRunningRedline.countField === "supervisor.activeRunning.count" && activeRunningRedline.hardRedline === 15 && activeRunningRedline.state === "at-or-over-hard-redline", "activeRunning redline should name count field and interpretation", activeRunningRedline);
assertCondition(counts.activeRunningCount === 15 && counts.activeRunningExact === true && counts.activeRunningRowsReturned === 1, "supervisor counts should separate active count from returned running rows", counts);
assertCondition(String(activeRunningCommands.running ?? "").includes("--status running,judging"), "activeRunning should provide running drilldown", activeRunningCommands);
assertCondition(asArray(runningFilteredSection.items).length === 3, "running status filter should be locally paged below --limit", runningFilteredSection);
assertCondition(runningFilteredSection.count === 40 && runningFilteredSection.hasMore === true, "running status filter should preserve count and hasMore", runningFilteredSection);
assertCondition(String(asRecord(runningFilteredSection.commands).next ?? "").includes("--before-id task-running-03"), "running status filter should provide next page command", runningFilteredSection);
assertCondition(runningFilteredBody.length < 14_000, "running status filter output should remain bounded", { chars: runningFilteredBody.length });
assertCondition(asArray(unreadFilteredSection.items).length <= 3, "unread list should be locally paged below --limit", unreadFilteredSection);
assertCondition(unreadFilteredBody.length < 14_000, "unread output should remain bounded", { chars: unreadFilteredBody.length });
assertCondition(splitBrainLiveCounts.running === 8, "split-brain supervisor should preserve DB running task count", splitBrainLiveCounts);
assertCondition(splitBrainLiveCounts.commanderActiveRunnerCount === 8, "split-brain supervisor should mirror commander active count in counts", splitBrainLiveCounts);
assertCondition(splitBrainLiveCounts.effectiveActive === 8, "split-brain supervisor should foreground effective active count", splitBrainLiveCounts);
assertCondition(splitBrainLiveCounts.databaseRunning === 8, "split-brain supervisor should distinguish database running tasks", splitBrainLiveCounts);
assertCondition(splitBrainLiveCounts.heartbeatFreshActive === 8, "split-brain supervisor should distinguish heartbeat-effective active runners", splitBrainLiveCounts);
assertCondition(splitBrainLiveCounts.schedulerLocalActiveQueues === 0, "split-brain supervisor should preserve zero scheduler-local active queues", splitBrainLiveCounts);
assertCondition(splitBrainLiveActivity.effectiveActiveTaskCount === 8, "split-brain supervisor activity should expose effective active count", splitBrainLiveActivity);
assertCondition(splitBrainLiveActivity.effectiveActiveSource === "heartbeat-fresh", "split-brain supervisor activity should prefer heartbeat-fresh source", splitBrainLiveActivity);
assertCondition(splitBrainLiveActivity.databaseRunningTaskCount === 8, "split-brain supervisor activity should expose DB running count", splitBrainLiveActivity);
assertCondition(splitBrainLiveActivity.heartbeatFreshActiveTaskCount === 8, "split-brain supervisor activity should expose heartbeat-fresh active count", splitBrainLiveActivity);
assertCondition(splitBrainLiveActivity.schedulerLocalActiveQueueCount === 0, "split-brain supervisor activity should expose scheduler-local queue count", splitBrainLiveActivity);
assertCondition(splitBrainLiveActivity.schedulerLocalActiveRunSlotCount === 0, "split-brain supervisor activity should expose scheduler-local slot count", splitBrainLiveActivity);
assertCondition(splitBrainLiveActivity.splitBrainLive === true, "split-brain supervisor activity should mark live split-brain", splitBrainLiveActivity);
assertCondition(splitBrainLiveActivity.splitBrainDisposition === "live-count-as-active", "split-brain supervisor activity should classify live split-brain as active capacity", splitBrainLiveActivity);
assertCondition(splitBrainLiveActivity.commanderConcurrency !== undefined, "split-brain supervisor activity should include commander concurrency guidance", splitBrainLiveActivity);
assertCondition(splitBrainLiveConcurrency.activeRunnerCount === 8, "split-brain supervisor should expose commander-facing active runner count", splitBrainLiveConcurrency);
assertCondition(splitBrainLiveConcurrency.activeRunnerCountField === "activity.effectiveActiveTaskCount", "split-brain supervisor should name the field to use", splitBrainLiveConcurrency);
assertCondition(splitBrainLiveConcurrency.splitBrainDisposition === "live-count-as-active", "split-brain supervisor should explain live split-brain disposition", splitBrainLiveConcurrency);
assertCondition(splitBrainLiveConcurrency.interventionRequired === false, "fresh split-brain supervisor should not require intervention", splitBrainLiveConcurrency);
assertCondition(String(splitBrainLiveConcurrency.decisionRule ?? "").includes("15 - activeRunnerCount"), "split-brain supervisor should give 15-concurrency arithmetic", splitBrainLiveConcurrency);
assertCondition(String(splitBrainLiveActivity.activeQueueIdsNote ?? "").includes("zero local queue ids does not mean zero active runners"), "split-brain supervisor activity should explain activeQueueIds are local-only", splitBrainLiveActivity);
assertCondition(String(splitBrainLiveActivity.interpretation ?? "").includes("continue supervision"), "split-brain supervisor activity should not imply scheduler stoppage", splitBrainLiveActivity);
return {
ok: true,
checks: [
"supervisor output materially smaller than full",
"recentCompleted capped",
"explicit --limit cap disclosed",
"running/unread locally paged",
"split-brain diagnostics capped",
"active running exact count exposed",
"requested/effective/returned limits disclosed",
"prompt/body previews bounded",
"running finalResponse rows labeled awaiting terminal/judge",
"drill-down commands preserved",
"full view remains detailed",
"split-brain live supervisor activity distinguishes scheduler-local, database, and heartbeat counts",
"commander concurrency block names the active runner count and 15-concurrency rule",
],
supervisorChars: supervisorBody.length,
fullChars: fullBody.length,
};
}
if (import.meta.main) {
process.stdout.write(`${JSON.stringify(runCodeQueueSupervisorDisclosureContract(), null, 2)}\n`);
}