Merge pull request #104 from pikasTech/fix/code-queue-awaiting-terminal-label

fix: label code queue final response closeout
This commit is contained in:
Lyon
2026-05-23 15:10:27 +08:00
committed by GitHub
3 changed files with 57 additions and 2 deletions
+2
View File
@@ -223,6 +223,8 @@ bun scripts/cli.ts codex pr-preflight --remote --issue <issue-number>
默认 supervisor 视图必须保持低噪声。`running``completedUnread``queued` 即使传入较大的 `--limit`,默认也只返回一个很小的有界页,并通过 section `commands.next` 继续分页;`--limit` 保留为扫描/分页预算和 full view 返回预算,不得让一次 supervisor 调用输出几十条肥行。每个任务行只应带 task id 和必要摘要,`show``detail``trace``output``full``read` 使用 section template 表达,让下一步渐进披露动作明确且不重复;默认不得嵌入完整 queue 列表、完整 final response、raw output 页或完整 trace 行。`recentCompleted` 必须默认限量,且不得重复 `completedUnread` 里的未读终态,避免完成历史把当前 running、阻塞和未读审阅挤出视野;需要完整当前页时显式使用 `--view full``executionDiagnostics` 只能展示有界 task-id/reason 预览、总数、截断标记和 omitted counts;需要全量诊断时使用输出中的 raw command。`commands.read` 只是在人工审阅后的建议命令,listing 命令绝不能自动执行。
`codex tasks` 中的 `status` 永远是 scheduler/control-plane 原始状态,不因为看到 worker final response 而改写。若某个非终态任务的最后 assistant 文本来自 `finalResponse`CLI 会额外显示 `statusLabel``awaitingTerminalJudge=true``closeoutState=awaiting-terminal-or-judge``awaiting-judge`,并附带 closeout hint。指挥官应把这类行理解为“worker 已经产出最终回复文本,但 Code Queue 还在等待 agent terminal event、scheduler 写回或 judge 结果”;它仍占用 active/running 监督窗口,不能按完成任务 `read` 或验收,直到 `status` 进入 `succeeded``failed``canceled` 并可审阅 judge/terminal 记录。
这条规则直接服务 HWLAB #132:指挥官要优先看到真实业务推进、部署修复、阻塞和需要人工审阅的未读结果,Gate/报告/审查/诊断类任务只能作为折叠的分类信号存在,不能在默认输出中用长 prompt/body 抢占上下文。
完成未读任务的审阅也必须遵循渐进披露。指挥官默认只拉取原始 prompt 和最终 response,用它判断任务是否声称完成、是否有明显越界、是否缺少验收证据;不要默认拉完整 trace、全量 tool summary 或 raw output。`codex task --detail` 也是有界摘要,只提供少量 attempt/tool 行和短文本预览;需要完整证据时再继续展开 `--detail --full --tool-limit N`、分页 `--trace`,或按 seq 读取 `codex output``codex output` 默认仍会限制返回行数和单条文本预览;只有明确使用 `--full-text` 且选定 seq window 时才读取该页全文。只有当 final response 与目标不一致、证据不足、远端 commit 无法验证、任务疑似造假、或需要追溯失败原因时,才进入这些展开路径。这条规则的目标是降低上下文压力,同时保留通过多步查询拿到完整证据的能力。
@@ -64,7 +64,7 @@ function fixtureResponse(path: string): JsonRecord {
lastAssistantMessage: {
at: "2026-05-22T00:00:00.000Z",
seq: 120,
source: "assistant",
source: "finalResponse",
text: longText(`summary-assistant-${taskId}`, 130),
},
commands: {
@@ -218,10 +218,16 @@ export function runCodeQueueSupervisorDisclosureContract(): JsonRecord {
assertCondition(diagnostics.livenessSummary === undefined, "supervisor diagnostics should omit liveness summary preview by default", diagnostics);
assertCondition(listBudget.truncated === true && typeof listBudget.rawCommand === "string", "diagnostic list budget should disclose raw command", listBudget);
assertCondition(asArray(runningItem.issues).includes("#132"), "supervisor row should expose issue refs for triage", runningItem);
assertCondition(runningItem.status === "running", "fixture running row should keep raw scheduler status", runningItem);
assertCondition(String(runningItem.statusLabel ?? "").includes("awaiting terminal/judge"), "running finalResponse row should expose awaiting terminal/judge label", runningItem);
assertCondition(runningItem.awaitingTerminalJudge === true && runningItem.closeoutState === "awaiting-terminal-or-judge", "running finalResponse row should be marked as not ready for closeout", runningItem);
assertCondition(String(runningItem.closeoutHint ?? "").includes("wait for terminal status and judge"), "running finalResponse row should explain commander interpretation", runningItem);
assertCondition(Number(runningItem.promptChars) > String(runningItem.prompt ?? "").length && runningItem.promptTruncated === true, "supervisor prompt must be a short flat preview with original char count", runningItem);
assertCondition(Number(runningItem.lastChars) > String(runningItem.last ?? "").length && runningItem.lastTruncated === true, "supervisor body must be a short flat preview with original char count", runningItem);
assertCondition(runningItem.commands === undefined && runningItem.promptPreview === undefined && runningItem.lastAssistantMessage === undefined, "supervisor rows must not expose repeated commands or legacy long list fields", runningItem);
assertCondition(asRecord(fullItem.promptPreview).chars !== undefined && fullItem.lastAssistantMessage !== undefined, "full view must retain detailed task row fields", fullItem);
assertCondition(fullItem.status === "running" && String(fullItem.statusLabel ?? "").includes("awaiting terminal/judge"), "full view should keep raw status while exposing derived closeout label", fullItem);
assertCondition(fullItem.awaitingTerminalJudge === true && fullItem.closeoutState === "awaiting-terminal-or-judge", "full view should expose awaiting terminal/judge state", fullItem);
assertCondition(fullTasks.returned === 15, "full view must not inherit supervisor recentCompleted cap", fullTasks);
const budget = asRecord(disclosure.outputBudget);
assertCondition(budget.recentCompletedReturnedLimit === 3 && budget.sectionReturnedLimit === 3, "supervisor must expose output budget metadata", disclosure);
@@ -240,6 +246,7 @@ export function runCodeQueueSupervisorDisclosureContract(): JsonRecord {
"running/unread locally paged",
"split-brain diagnostics capped",
"prompt/body previews bounded",
"running finalResponse rows labeled awaiting terminal/judge",
"drill-down commands preserved",
"full view remains detailed",
],
+47 -1
View File
@@ -200,6 +200,11 @@ interface CodexTasksEntry {
taskId: string;
queueId: string | null;
status: string | null;
statusLabel: string | null;
awaitingTerminalJudge?: boolean;
closeoutState?: "awaiting-terminal-or-judge" | "awaiting-judge";
closeoutHint?: string;
finalResponseAt?: unknown;
currentAttempt: number | null;
updatedAt: string | null;
finishedAt: string | null;
@@ -235,6 +240,11 @@ interface CodexTasksSupervisorEntry {
id: string;
queue: string | null;
status: string | null;
statusLabel?: string;
awaitingTerminalJudge?: boolean;
closeoutState?: "awaiting-terminal-or-judge" | "awaiting-judge";
closeoutHint?: string;
finalResponseAt?: unknown;
attempt: number | null;
updatedAt: string | null;
finishedAt?: string | null;
@@ -1983,10 +1993,31 @@ function supervisorLastMessage(summaryLastAssistant: unknown, maxChars: number):
};
}
const awaitingTerminalJudgeHint = "finalResponse is visible while task status is non-terminal; wait for terminal status and judge before closeout.";
function finalResponseAwaitingTerminalStatus(status: string | null, summaryLastAssistant: unknown): {
label: string;
state: "awaiting-terminal-or-judge" | "awaiting-judge";
finalResponseAt: unknown;
} | null {
if (status !== "running" && status !== "judging") return null;
const record = asRecord(summaryLastAssistant);
if (record === null) return null;
if (asString(record.source) !== "finalResponse") return null;
if (asString(record.text).trim().length === 0) return null;
return {
label: status === "judging" ? "judging (awaiting judge)" : "running (awaiting terminal/judge)",
state: status === "judging" ? "awaiting-judge" : "awaiting-terminal-or-judge",
finalResponseAt: record.at ?? null,
};
}
function taskWatchEntry(task: Record<string, unknown>, summary: Record<string, unknown> | null): CodexTasksEntry {
const taskId = asString(task.id);
const summaryCommands = summary === null ? null : asRecord(summary.commands);
const summaryLastAssistant = summary?.lastAssistantMessage ?? task.lastAssistantMessage;
const status = asString(task.status) || null;
const awaitingStatus = finalResponseAwaitingTerminalStatus(status, summaryLastAssistant);
const promptPreview = textPreview(asString(task.displayPrompt ?? task.basePrompt ?? task.prompt), 360);
const showCommand = typeof summary?.cliHint === "string" && summary.cliHint.length > 0
? summary.cliHint
@@ -1999,7 +2030,14 @@ function taskWatchEntry(task: Record<string, unknown>, summary: Record<string, u
return {
taskId,
queueId: asString(task.queueId) || null,
status: asString(task.status) || null,
status,
statusLabel: awaitingStatus?.label ?? status,
...(awaitingStatus === null ? {} : {
awaitingTerminalJudge: true,
closeoutState: awaitingStatus.state,
closeoutHint: awaitingTerminalJudgeHint,
finalResponseAt: awaitingStatus.finalResponseAt,
}),
currentAttempt: typeof task.currentAttempt === "number" && Number.isFinite(task.currentAttempt) ? task.currentAttempt : null,
updatedAt: asString(task.updatedAt) || null,
finishedAt: asString(task.finishedAt) || null,
@@ -2025,6 +2063,7 @@ function taskSupervisorEntry(task: Record<string, unknown>, summary: Record<stri
const taskId = asString(task.id);
const summaryLastAssistant = summary?.lastAssistantMessage ?? task.lastAssistantMessage;
const status = asString(task.status) || null;
const awaitingStatus = finalResponseAwaitingTerminalStatus(status, summaryLastAssistant);
const unreadTerminal = taskUnreadTerminal(task);
const classification = taskClassification(task, summary);
const queuedReason = compactQueuedReason(task.queuedReason);
@@ -2034,6 +2073,13 @@ function taskSupervisorEntry(task: Record<string, unknown>, summary: Record<stri
id: taskId,
queue: asString(task.queueId) || null,
status,
...(awaitingStatus === null ? {} : {
statusLabel: awaitingStatus.label,
awaitingTerminalJudge: true,
closeoutState: awaitingStatus.state,
closeoutHint: awaitingTerminalJudgeHint,
finalResponseAt: awaitingStatus.finalResponseAt,
}),
attempt: typeof task.currentAttempt === "number" && Number.isFinite(task.currentAttempt) ? task.currentAttempt : null,
updatedAt: asString(task.updatedAt) || null,
...(isTerminalTaskStatus(status) ? { finishedAt: asString(task.finishedAt) || null, unreadTerminal } : {}),