Files
pikasTech-unidesk/scripts/code-queue-commander-view-contract-test.ts
T
2026-05-24 05:12:46 +00:00

387 lines
25 KiB
TypeScript

import { spawnSync } from "node:child_process";
import { codexTasksQueryForTest } from "./src/code-queue";
type JsonRecord = Record<string, unknown>;
type RequestRecord = { path: string; method: string };
function assertCondition(condition: unknown, message: string, detail: JsonRecord = {}): void {
if (!condition) throw new Error(`${message}: ${JSON.stringify(detail)}`);
}
function asRecord(value: unknown): JsonRecord {
assertCondition(typeof value === "object" && value !== null && !Array.isArray(value), "expected JSON object", { value });
return value as JsonRecord;
}
function asArray(value: unknown): unknown[] {
assertCondition(Array.isArray(value), "expected JSON array", { value });
return value as unknown[];
}
function longText(marker: string, repeat: number): string {
return Array.from({ length: repeat }, (_, index) => `${marker}-${index} status evidence command output final response prompt body should stay capped`).join("\n");
}
function task(id: string, status: string, updatedAt: string, prompt: string, readAt: string | null = null, finalText = ""): JsonRecord {
return {
id,
queueId: "default",
status,
currentAttempt: status === "queued" || status === "retry_wait" ? 0 : 1,
updatedAt,
finishedAt: status === "succeeded" || status === "failed" || status === "canceled" ? updatedAt : null,
readAt,
prompt: `${prompt}\n${longText(`raw-prompt-${id}`, 80)}`,
basePrompt: `${prompt}\n${longText(`base-prompt-${id}`, 60)}`,
displayPrompt: `${prompt}\n${longText(`display-prompt-${id}`, 70)}`,
lastAssistantMessage: finalText.length === 0 ? null : {
at: updatedAt,
seq: 42,
source: "finalResponse",
text: `${finalText}\n${longText(`assistant-${id}`, 100)}`,
},
};
}
function summaryForTask(taskId: string): JsonRecord {
const finalText = taskId === "task-running-risk"
? "Blocked by provider auth token timeout and cannot proceed without commander authorization."
: taskId === "task-failed-unread"
? "CI failed and final response reports missing e2e evidence."
: taskId === "task-running-watch"
? "Implementation finished but task is still awaiting judge."
: "Completed with compact evidence.";
return {
ok: true,
status: 200,
body: {
ok: true,
summary: {
id: taskId,
queueId: "default",
status: taskId.includes("running") ? "running" : taskId.includes("failed") ? "failed" : "succeeded",
currentAttempt: 1,
maxAttempts: 99,
prompt: longText(`summary-prompt-${taskId}`, 90),
basePrompt: longText(`summary-base-${taskId}`, 70),
lastAssistantMessage: {
at: "2026-05-22T00:59:00.000Z",
seq: 120,
source: "finalResponse",
text: `${finalText}\n${longText(`summary-final-${taskId}`, 120)}`,
},
},
},
};
}
function noisyCommanderFixture(path: string, requests: RequestRecord[] = []): JsonRecord {
requests.push({ path, method: "GET" });
if (path.includes("/summary")) {
const taskId = decodeURIComponent(path.split("/api/tasks/")[1]?.split("/")[0] ?? "unknown");
return summaryForTask(taskId);
}
assertCondition(path.startsWith("/api/microservices/code-queue/proxy/api/tasks/overview"), "unexpected path", { path });
return {
ok: true,
status: 200,
body: {
ok: true,
queue: {
counts: {
running: 12,
judging: 2,
queued: 18,
retry_wait: 4,
succeeded: 28,
failed: 3,
canceled: 1,
},
unreadTerminal: 8,
maxActiveQueues: 15,
executionDiagnostics: {
now: "2026-05-22T01:00:00.000Z",
state: "stale-active",
effectiveLiveness: "at-risk",
recommendedAction: "investigate-heartbeat-risk",
databaseActiveTaskCount: 14,
databaseActiveTaskIds: ["task-running-risk", "task-running-watch"],
activeHeartbeatCount: 13,
heartbeatFreshTaskIds: ["task-running-watch"],
heartbeatRiskTaskIds: ["task-running-risk"],
heartbeatExpiredTaskIds: ["task-running-risk"],
heartbeatMissingTaskIds: [],
staleRecoveryCandidateTaskIds: ["task-running-risk"],
traceGapTaskIds: ["task-running-risk", "task-running-watch"],
reasons: [longText("diagnostic-reason", 30), longText("diagnostic-reason-two", 30)],
},
},
pagination: {
limit: 200,
returned: 12,
total: 68,
hasMore: true,
nextBeforeId: "task-oldest-page",
includeActive: true,
},
tasks: [
task("task-running-risk", "running", "2026-05-22T00:00:00.000Z", "HWLAB#7 backend-core provider token blocker for M3 hardware workbench", null, "Blocked by provider auth token timeout."),
task("task-running-watch", "judging", "2026-05-22T00:52:00.000Z", "pikasTech/HWLAB#164 user-facing patch-panel verification", null, "Final response ready while judge is pending."),
task("task-failed-unread", "failed", "2026-05-22T00:50:00.000Z", "UniDesk#20 CI e2e evidence gate for commander view", null, "CI failed and needs read closeout."),
task("task-succeeded-unread", "succeeded", "2026-05-22T00:49:00.000Z", "pikasTech/HWLAB#317 deployment artifact digest publish evidence", null, "Artifact published."),
task("task-canceled-unread", "canceled", "2026-05-22T00:48:00.000Z", "UniDesk#118 diagnostics gate report stale commander loop", null, "Canceled after blocker."),
task("task-queued-priority", "queued", "2026-05-22T00:47:00.000Z", "HWLAB#99 business user-facing dashboard fix waiting for runner"),
task("task-retry-priority", "retry_wait", "2026-05-22T00:46:00.000Z", "HWLAB#116 infrastructure blocker retry_wait due to github transient"),
task("task-recent-read-docs", "succeeded", "2026-05-22T00:45:00.000Z", "docs governance reference update", "2026-05-22T00:45:01.000Z"),
task("task-recent-read-business", "succeeded", "2026-05-22T00:44:00.000Z", "business user-facing workbench UI fix", "2026-05-22T00:44:01.000Z"),
task("task-recent-read-evidence", "succeeded", "2026-05-22T00:43:00.000Z", "ci e2e evidence smoke report", "2026-05-22T00:43:01.000Z"),
task("task-recent-read-artifact", "succeeded", "2026-05-22T00:42:00.000Z", "deployment artifact registry digest", "2026-05-22T00:42:01.000Z"),
task("task-recent-read-diagnostic", "succeeded", "2026-05-22T00:41:00.000Z", "diagnostics gate report", "2026-05-22T00:41:01.000Z"),
],
},
};
}
function readyCommanderFixture(path: string): JsonRecord {
if (path.includes("/summary")) {
const taskId = decodeURIComponent(path.split("/api/tasks/")[1]?.split("/")[0] ?? "unknown");
return {
ok: true,
status: 200,
body: {
ok: true,
summary: {
id: taskId,
queueId: "default",
status: "succeeded",
currentAttempt: 1,
maxAttempts: 1,
prompt: "D601 Code Queue GPT-5.5 runner completed normal workflow with ready platform status.",
lastAssistantMessage: {
at: "2026-05-22T00:59:00.000Z",
seq: 12,
source: "finalResponse",
text: "Completed routine workflow with ready platform status and no follow-up incident.",
},
},
},
};
}
assertCondition(path.startsWith("/api/microservices/code-queue/proxy/api/tasks/overview"), "unexpected ready fixture path", { path });
const tasks = Array.from({ length: 12 }, (_, index) => task(
`task-ready-${index + 1}`,
"succeeded",
`2026-05-22T00:${String(40 - index).padStart(2, "0")}:00.000Z`,
index === 2
? "D601 Code Queue GPT-5.5 runner commander audit: infrastructure.status=ready riskCounts.infrastructureBlocker=0; do not classify all history as infrastructure-blocker"
: index % 3 === 0
? "D601 Code Queue GPT-5.5 runner workflow fix for UniDesk#20 commander CLI behavior"
: index % 3 === 1
? "D601 Code Queue GPT-5.5 runner user-facing HWLAB workbench implementation"
: "D601 Code Queue GPT-5.5 runner routine unknown historical task",
"2026-05-22T01:00:00.000Z",
"Routine final response.",
));
return {
ok: true,
status: 200,
body: {
ok: true,
queue: {
counts: {
running: 0,
judging: 0,
queued: 0,
retry_wait: 0,
succeeded: tasks.length,
failed: 0,
canceled: 0,
},
unreadTerminal: 0,
maxActiveQueues: 15,
storage: {
postgresReady: true,
health: {
status: "ready",
degraded: false,
signals: [],
},
},
executionDiagnostics: {
now: "2026-05-22T01:00:00.000Z",
state: "ready",
effectiveLiveness: "idle",
recommendedAction: "continue-supervision",
databaseActiveTaskCount: 0,
schedulerActiveRunSlotCount: 0,
activeHeartbeatCount: 0,
heartbeatRiskTaskIds: [],
staleRecoveryCandidateTaskIds: [],
traceGapTaskIds: [],
},
},
pagination: {
limit: 200,
returned: tasks.length,
total: tasks.length,
hasMore: false,
nextBeforeId: null,
includeActive: true,
},
tasks,
},
};
}
export function runCodeQueueCommanderViewContract(): JsonRecord {
const commanderRequests: RequestRecord[] = [];
const commanderLimit8Requests: RequestRecord[] = [];
const fetchCommander = (path: string): JsonRecord => noisyCommanderFixture(path, commanderRequests);
const fetchCommanderLimit8 = (path: string): JsonRecord => noisyCommanderFixture(path, commanderLimit8Requests);
const fetchNoisy = (path: string): JsonRecord => noisyCommanderFixture(path);
const commander = codexTasksQueryForTest(["--view", "commander", "--limit", "260"], fetchCommander);
const commanderTerminalAliases = codexTasksQueryForTest(["--view", "commander", "--status", "completed,cancelled", "--limit", "8"], fetchNoisy);
const supervisor = codexTasksQueryForTest(["--view", "supervisor", "--limit", "260"], fetchNoisy);
const full = codexTasksQueryForTest(["--view", "full", "--limit", "260"], fetchNoisy);
const commanderLimit8 = codexTasksQueryForTest(["--view", "commander", "--limit", "8"], fetchCommanderLimit8);
const readyCommander = codexTasksQueryForTest(["--view", "commander", "--limit", "120"], readyCommanderFixture);
const fullLimit8 = codexTasksQueryForTest(["--view", "full", "--limit", "8"], fetchNoisy);
const unreadLimit8 = codexTasksQueryForTest(["--unread", "--limit", "8"], fetchNoisy);
const commanderBody = JSON.stringify(commander);
const commanderTerminalAliasBody = JSON.stringify(commanderTerminalAliases);
const commanderLimit8Body = JSON.stringify(commanderLimit8);
const fullLimit8Body = JSON.stringify(fullLimit8);
const unreadLimit8Body = JSON.stringify(unreadLimit8);
const fullBody = JSON.stringify(full);
const commanderView = asRecord(asRecord(commander).commander);
const commanderTerminalAliasView = asRecord(asRecord(commanderTerminalAliases).commander);
const commanderLimit8View = asRecord(asRecord(commanderLimit8).commander);
const readyCommanderView = asRecord(asRecord(readyCommander).commander);
const supervisorView = asRecord(asRecord(supervisor).supervisor);
const filters = asRecord(commanderView.filters);
const activeRunners = asRecord(commanderView.activeRunners);
const backlog = asRecord(commanderView.queueBacklog);
const terminalUnread = asRecord(commanderView.terminalUnread);
const riskCounts = asRecord(commanderView.riskCounts);
const attentionCounts = asRecord(riskCounts.attention);
const highPriorityIssues = asRecord(commanderView.highPriorityIssues);
const classification = asRecord(commanderView.classification);
const byCategory = asRecord(classification.byCategory);
const readyRiskCounts = asRecord(readyCommanderView.riskCounts);
const readyClassification = asRecord(readyCommanderView.classification);
const readyByCategory = asRecord(readyClassification.byCategory);
const readyInfrastructure = asRecord(readyCommanderView.infrastructure);
const commands = asRecord(commanderView.commands);
const attention = asRecord(commanderView.attention);
const attentionItems = asArray(attention.items).map(asRecord);
const sections = asRecord(commanderView.sections);
const terminalUnreadSection = asRecord(sections.terminalUnread);
const recentCompletedSection = asRecord(sections.recentCompleted);
const recentIds = asArray(recentCompletedSection.items).map((item) => String(asRecord(item).id ?? ""));
const terminalIds = asArray(terminalUnreadSection.items).map((item) => String(asRecord(item).id ?? ""));
const terminalAliasFilters = asRecord(commanderTerminalAliasView.filters);
const terminalAliasSections = asRecord(commanderTerminalAliasView.sections);
const terminalAliasRecentCompleted = asRecord(terminalAliasSections.recentCompleted);
const terminalAliasCommands = asRecord(commanderTerminalAliasView.commands);
const activeItems = asArray(activeRunners.items).map(asRecord);
const runningRisk = attentionItems.find((item) => item.id === "task-running-risk") ?? {};
const limit8ActiveRunners = asRecord(commanderLimit8View.activeRunners);
const limit8Sections = asRecord(commanderLimit8View.sections);
const limit8TerminalUnread = asRecord(limit8Sections.terminalUnread);
const limit8Commands = asRecord(commanderLimit8View.commands);
const limit8Attention = asRecord(commanderLimit8View.attention);
const limit8AttentionItems = asArray(limit8Attention.items).map(asRecord);
assertCondition(commanderBody.length < 30_000, "commander output should stay under the noisy fixture budget", { chars: commanderBody.length });
assertCondition(commanderBody.length < fullBody.length * 0.65, "commander output should stay materially smaller than full output", { commanderChars: commanderBody.length, fullChars: fullBody.length });
assertCondition(filters.requestedLimit === 260 && filters.effectiveLimit === 100 && filters.limitCapped === true, "commander view should disclose requested/effective limit cap", filters);
assertCondition(activeRunners.count === 14 && activeRunners.exact === true && activeRunners.source === "database-active", "commander view should expose exact active runner count and source/disposition", activeRunners);
assertCondition(backlog.queued === 18 && backlog.retryWait === 4 && backlog.total === 22 && backlog.exact === true, "commander view should expose queued/retry_wait exact counts", backlog);
assertCondition(terminalUnread.total === 8 && terminalUnread.rowsReturned === 3 && terminalUnread.rowsOmitted === 5 && terminalUnread.exact === true, "commander view should expose terminal unread count plus omitted rows", terminalUnread);
assertCondition(activeItems.some((item) => item.id === "task-running-risk") && activeItems.some((item) => item.id === "task-running-watch"), "commander activeRunners should include compact active task items", activeRunners);
assertCondition(attentionCounts.total === 4 && attentionCounts.returned === 4 && attentionCounts.omitted === 0, "commander attention counts should preserve non-terminal attention totals", attentionCounts);
assertCondition(highPriorityIssues.present === true && highPriorityIssues.matchedCount === 7, "commander should surface tracked high-priority issues", highPriorityIssues);
assertCondition(Number(byCategory["user-facing"] ?? 0) >= 1
&& Number(byCategory["cd-artifact"] ?? 0) >= 1
&& Number(byCategory["noise-report"] ?? 0) >= 1
&& Number(byCategory["infra-governance"] ?? 0) >= 1
&& Number(byCategory["infrastructure-blocker"] ?? 0) >= 1, "deterministic classifier should cover requested categories", byCategory);
assertCondition(classification.deterministic === true, "classification metadata should be deterministic", classification);
assertCondition(Number(readyRiskCounts.infrastructureBlocker ?? 0) === 0, "ready commander page should not report infrastructure blocker risk", readyRiskCounts);
assertCondition(readyInfrastructure.infrastructureBlocker === false && readyInfrastructure.status === "ready", "ready commander page should surface ready infrastructure", readyInfrastructure);
assertCondition(Number(readyByCategory["infrastructure-blocker"] ?? 0) === 0, "runner/governance boilerplate must not classify historical tasks as infrastructure-blocker", readyByCategory);
assertCondition(Number(readyByCategory["workflow"] ?? 0) + Number(readyByCategory["user-facing"] ?? 0) + Number(readyByCategory["infra-governance"] ?? 0) + Number(readyByCategory["unknown"] ?? 0) === 12, "ready fixture tasks should be split without blocker overreporting", readyByCategory);
assertCondition(String(commands.refresh ?? "").includes("--view commander"), "commander refresh command should preserve explicit commander view", commands);
assertCondition(String(commands.supervisor ?? "").startsWith("bun scripts/cli.ts codex tasks") && !String(commands.supervisor ?? "").includes("--view commander"), "commander should keep supervisor drilldown command", commands);
assertCondition(String(commands.full ?? "").includes("--view full"), "commander should keep full drilldown command", commands);
assertCondition(String(commands.rawOverview ?? "").includes("microservice proxy code-queue") && String(commands.rawOverview ?? "").includes("--raw"), "commander should expose raw overview drilldown", commands);
assertCondition(String(commands.traceTemplate ?? "").includes("codex task <taskId> --trace"), "commander should expose trace drilldown template", commands);
assertCondition(String(commands.outputTemplate ?? "").includes("codex output <taskId>"), "commander should expose output drilldown template", commands);
assertCondition(String(commands.showTemplate ?? "").includes("codex task <taskId>"), "commander should include task drilldown template for attention rows", commands);
assertCondition(asArray(runningRisk.riskSignals).includes("stale-recovery-candidate") && asArray(runningRisk.riskSignals).includes("blocked"), "active risk row should expose stale/blocker signals", runningRisk);
assertCondition(!attentionItems.some((item) => item.id === "task-failed-unread"), "default commander attention should not expand terminal unread items", { attentionItems });
assertCondition(!commanderBody.includes("raw-prompt-task-running-risk-20"), "commander output should not dump long raw prompt bodies", { chars: commanderBody.length });
assertCondition(!commanderBody.includes("summary-final-task-running-risk-20"), "commander output should not dump long final response bodies", { chars: commanderBody.length });
assertCondition(!commanderBody.includes("\"prompt\""), "commander output should not include prompt preview fields by default", { commanderBody });
assertCondition(!commanderBody.includes("\"last\""), "commander output should not include final-response preview fields by default", { commanderBody });
assertCondition(!recentIds.some((id) => terminalIds.includes(id)), "recentCompleted section must not duplicate terminalUnread rows", { recentIds, terminalIds });
assertCondition(recentIds.length === 3, "recentCompleted commander section should be independently capped", { recentIds });
assertCondition(terminalUnreadSection.returned === 0 && asArray(terminalUnreadSection.items).length === 0, "default commander terminal unread section should omit item details", terminalUnreadSection);
assertCondition(String(asRecord(terminalUnreadSection.commands).unread ?? "").includes("codex unread"), "terminal unread section should point to codex unread drill-down", terminalUnreadSection);
assertCondition(JSON.stringify(terminalAliasFilters.status) === JSON.stringify(["succeeded", "canceled"]), "completed/cancelled status aliases should normalize to succeeded/canceled", terminalAliasFilters);
assertCondition(!commanderTerminalAliasBody.includes("task-failed-unread"), "completed/cancelled aliases should filter out failed tasks", { commanderTerminalAliasBody });
assertCondition(String(terminalAliasCommands.refresh ?? "").includes("--status succeeded,canceled"), "normalized status aliases should be preserved in drill-down commands", terminalAliasCommands);
assertCondition(asArray(terminalAliasRecentCompleted.items).some((item) => asRecord(item).id === "task-recent-read-docs"), "completed alias should include read succeeded tasks in recent completed", terminalAliasRecentCompleted);
const badStatus = spawnSync("bun", ["scripts/cli.ts", "codex", "tasks", "--status", "done", "--limit", "1"], {
cwd: process.cwd(),
encoding: "utf8",
});
const badStatusJson = JSON.parse(badStatus.stdout) as JsonRecord;
assertCondition(badStatus.status !== 0, "unsupported codex tasks --status should fail", { status: badStatus.status, stdout: badStatus.stdout, stderr: badStatus.stderr });
const badStatusError = asRecord(badStatusJson.error);
assertCondition(badStatusError.degradedReason === "validation-failed", "unsupported status should return structured validation error", badStatusError);
assertCondition(Array.isArray(badStatusError.supported) && asArray(badStatusError.supported).includes("succeeded"), "unsupported status should list supported values", badStatusError);
assertCondition(Array.isArray(badStatusError.aliases) && asArray(badStatusError.aliases).includes("completed->succeeded") && asArray(badStatusError.aliases).includes("cancelled->canceled"), "unsupported status should list common aliases", badStatusError);
assertCondition(!badStatus.stdout.includes("stack") && !badStatus.stdout.includes("at parseTasksOptions"), "expected codex tasks parameter errors should not print stack traces by default", { stdout: badStatus.stdout });
assertCondition(asRecord(supervisorView.completedUnread).count === 3 && asRecord(supervisorView.recentCompleted).count === 5, "supervisor view should remain available and keep separate unread/recent sections", supervisorView);
assertCondition(commanderLimit8Body.length < 16_000, "commander --limit 8 output should stay compact for polling", { chars: commanderLimit8Body });
assertCondition(asRecord(commanderLimit8View.filters).requestedLimit === 8, "commander --limit 8 should preserve requested limit disclosure", commanderLimit8View);
assertCondition(asArray(limit8ActiveRunners.items).some((item) => asRecord(item).id === "task-running-risk"), "commander --limit 8 should keep active items", limit8ActiveRunners);
assertCondition(limit8TerminalUnread.returned === 0 && asArray(limit8TerminalUnread.items).length === 0, "commander --limit 8 should not expand terminal unread item details", limit8TerminalUnread);
assertCondition(!limit8AttentionItems.some((item) => String(item.id ?? "").includes("unread")), "commander --limit 8 attention should omit terminal unread rows", { limit8AttentionItems });
assertCondition(String(limit8Commands.unread ?? "").includes("codex unread"), "commander --limit 8 should keep unread drill-down command", limit8Commands);
assertCondition(String(limit8Commands.full ?? "").includes("--view full"), "commander --limit 8 should keep full drill-down command", limit8Commands);
assertCondition(!commanderLimit8Body.includes("RAW_PROMPT_SHOULD_NOT_LEAK") && !commanderLimit8Body.includes("raw-prompt-task-failed-unread"), "commander --limit 8 should not print unread prompt details", { commanderLimit8Body });
assertCondition(!commanderLimit8Body.includes("summary-final-task-failed-unread"), "commander --limit 8 should not print unread final-response details", { commanderLimit8Body });
assertCondition(fullLimit8Body.includes("raw-prompt-task-failed-unread") || fullLimit8Body.includes("display-prompt-task-failed-unread"), "--view full should still expose task detail previews", { fullLimit8Body });
assertCondition(unreadLimit8Body.includes("task-failed-unread") && unreadLimit8Body.includes("readTemplate"), "supervisor unread drill-down should still expose terminal unread task ids", { unreadLimit8Body });
assertCondition(!commanderLimit8Requests.some((request) => request.path.includes("task-failed-unread") && request.path.includes("/summary")), "default commander --limit 8 should not fetch terminal unread summaries", { commanderLimit8Requests });
return {
ok: true,
checks: [
"commander view is explicit and bounded",
"exact active/queued/retry_wait/terminal-unread counts are preserved",
"attention rows expose active, queued/retry_wait and blocker signals",
"high-priority issue refs are surfaced",
"deterministic classifier emits requested categories",
"ready infrastructure pages do not classify all historical runner tasks as infrastructure-blocker",
"drilldown commands are present without prompt/final-response flood",
"commander --limit 8 omits terminal unread details and prompt previews",
"codex tasks --status completed,cancelled aliases normalize to succeeded,canceled",
"codex tasks invalid --status returns compact structured suggestions without stack noise",
"full and unread drill-down paths still expose details",
"recent completed does not duplicate terminal unread",
"supervisor/full views remain available",
],
commanderChars: commanderBody.length,
commanderLimit8Chars: commanderLimit8Body.length,
fullChars: fullBody.length,
};
}
if (import.meta.main) {
process.stdout.write(`${JSON.stringify(runCodeQueueCommanderViewContract(), null, 2)}\n`);
}