Merge pull request #104 from pikasTech/fix/code-queue-awaiting-terminal-label
fix: label code queue final response closeout
This commit is contained in:
@@ -223,6 +223,8 @@ bun scripts/cli.ts codex pr-preflight --remote --issue <issue-number>
|
||||
|
||||
默认 supervisor 视图必须保持低噪声。`running`、`completedUnread` 和 `queued` 即使传入较大的 `--limit`,默认也只返回一个很小的有界页,并通过 section `commands.next` 继续分页;`--limit` 保留为扫描/分页预算和 full view 返回预算,不得让一次 supervisor 调用输出几十条肥行。每个任务行只应带 task id 和必要摘要,`show`、`detail`、`trace`、`output`、`full`、`read` 使用 section template 表达,让下一步渐进披露动作明确且不重复;默认不得嵌入完整 queue 列表、完整 final response、raw output 页或完整 trace 行。`recentCompleted` 必须默认限量,且不得重复 `completedUnread` 里的未读终态,避免完成历史把当前 running、阻塞和未读审阅挤出视野;需要完整当前页时显式使用 `--view full`。`executionDiagnostics` 只能展示有界 task-id/reason 预览、总数、截断标记和 omitted counts;需要全量诊断时使用输出中的 raw command。`commands.read` 只是在人工审阅后的建议命令,listing 命令绝不能自动执行。
|
||||
|
||||
`codex tasks` 中的 `status` 永远是 scheduler/control-plane 原始状态,不因为看到 worker final response 而改写。若某个非终态任务的最后 assistant 文本来自 `finalResponse`,CLI 会额外显示 `statusLabel`、`awaitingTerminalJudge=true`、`closeoutState=awaiting-terminal-or-judge` 或 `awaiting-judge`,并附带 closeout hint。指挥官应把这类行理解为“worker 已经产出最终回复文本,但 Code Queue 还在等待 agent terminal event、scheduler 写回或 judge 结果”;它仍占用 active/running 监督窗口,不能按完成任务 `read` 或验收,直到 `status` 进入 `succeeded`、`failed` 或 `canceled` 并可审阅 judge/terminal 记录。
|
||||
|
||||
这条规则直接服务 HWLAB #132:指挥官要优先看到真实业务推进、部署修复、阻塞和需要人工审阅的未读结果,Gate/报告/审查/诊断类任务只能作为折叠的分类信号存在,不能在默认输出中用长 prompt/body 抢占上下文。
|
||||
|
||||
完成未读任务的审阅也必须遵循渐进披露。指挥官默认只拉取原始 prompt 和最终 response,用它判断任务是否声称完成、是否有明显越界、是否缺少验收证据;不要默认拉完整 trace、全量 tool summary 或 raw output。`codex task --detail` 也是有界摘要,只提供少量 attempt/tool 行和短文本预览;需要完整证据时再继续展开 `--detail --full --tool-limit N`、分页 `--trace`,或按 seq 读取 `codex output`。`codex output` 默认仍会限制返回行数和单条文本预览;只有明确使用 `--full-text` 且选定 seq window 时才读取该页全文。只有当 final response 与目标不一致、证据不足、远端 commit 无法验证、任务疑似造假、或需要追溯失败原因时,才进入这些展开路径。这条规则的目标是降低上下文压力,同时保留通过多步查询拿到完整证据的能力。
|
||||
|
||||
@@ -64,7 +64,7 @@ function fixtureResponse(path: string): JsonRecord {
|
||||
lastAssistantMessage: {
|
||||
at: "2026-05-22T00:00:00.000Z",
|
||||
seq: 120,
|
||||
source: "assistant",
|
||||
source: "finalResponse",
|
||||
text: longText(`summary-assistant-${taskId}`, 130),
|
||||
},
|
||||
commands: {
|
||||
@@ -218,10 +218,16 @@ export function runCodeQueueSupervisorDisclosureContract(): JsonRecord {
|
||||
assertCondition(diagnostics.livenessSummary === undefined, "supervisor diagnostics should omit liveness summary preview by default", diagnostics);
|
||||
assertCondition(listBudget.truncated === true && typeof listBudget.rawCommand === "string", "diagnostic list budget should disclose raw command", listBudget);
|
||||
assertCondition(asArray(runningItem.issues).includes("#132"), "supervisor row should expose issue refs for triage", runningItem);
|
||||
assertCondition(runningItem.status === "running", "fixture running row should keep raw scheduler status", runningItem);
|
||||
assertCondition(String(runningItem.statusLabel ?? "").includes("awaiting terminal/judge"), "running finalResponse row should expose awaiting terminal/judge label", runningItem);
|
||||
assertCondition(runningItem.awaitingTerminalJudge === true && runningItem.closeoutState === "awaiting-terminal-or-judge", "running finalResponse row should be marked as not ready for closeout", runningItem);
|
||||
assertCondition(String(runningItem.closeoutHint ?? "").includes("wait for terminal status and judge"), "running finalResponse row should explain commander interpretation", runningItem);
|
||||
assertCondition(Number(runningItem.promptChars) > String(runningItem.prompt ?? "").length && runningItem.promptTruncated === true, "supervisor prompt must be a short flat preview with original char count", runningItem);
|
||||
assertCondition(Number(runningItem.lastChars) > String(runningItem.last ?? "").length && runningItem.lastTruncated === true, "supervisor body must be a short flat preview with original char count", runningItem);
|
||||
assertCondition(runningItem.commands === undefined && runningItem.promptPreview === undefined && runningItem.lastAssistantMessage === undefined, "supervisor rows must not expose repeated commands or legacy long list fields", runningItem);
|
||||
assertCondition(asRecord(fullItem.promptPreview).chars !== undefined && fullItem.lastAssistantMessage !== undefined, "full view must retain detailed task row fields", fullItem);
|
||||
assertCondition(fullItem.status === "running" && String(fullItem.statusLabel ?? "").includes("awaiting terminal/judge"), "full view should keep raw status while exposing derived closeout label", fullItem);
|
||||
assertCondition(fullItem.awaitingTerminalJudge === true && fullItem.closeoutState === "awaiting-terminal-or-judge", "full view should expose awaiting terminal/judge state", fullItem);
|
||||
assertCondition(fullTasks.returned === 15, "full view must not inherit supervisor recentCompleted cap", fullTasks);
|
||||
const budget = asRecord(disclosure.outputBudget);
|
||||
assertCondition(budget.recentCompletedReturnedLimit === 3 && budget.sectionReturnedLimit === 3, "supervisor must expose output budget metadata", disclosure);
|
||||
@@ -240,6 +246,7 @@ export function runCodeQueueSupervisorDisclosureContract(): JsonRecord {
|
||||
"running/unread locally paged",
|
||||
"split-brain diagnostics capped",
|
||||
"prompt/body previews bounded",
|
||||
"running finalResponse rows labeled awaiting terminal/judge",
|
||||
"drill-down commands preserved",
|
||||
"full view remains detailed",
|
||||
],
|
||||
|
||||
@@ -200,6 +200,11 @@ interface CodexTasksEntry {
|
||||
taskId: string;
|
||||
queueId: string | null;
|
||||
status: string | null;
|
||||
statusLabel: string | null;
|
||||
awaitingTerminalJudge?: boolean;
|
||||
closeoutState?: "awaiting-terminal-or-judge" | "awaiting-judge";
|
||||
closeoutHint?: string;
|
||||
finalResponseAt?: unknown;
|
||||
currentAttempt: number | null;
|
||||
updatedAt: string | null;
|
||||
finishedAt: string | null;
|
||||
@@ -235,6 +240,11 @@ interface CodexTasksSupervisorEntry {
|
||||
id: string;
|
||||
queue: string | null;
|
||||
status: string | null;
|
||||
statusLabel?: string;
|
||||
awaitingTerminalJudge?: boolean;
|
||||
closeoutState?: "awaiting-terminal-or-judge" | "awaiting-judge";
|
||||
closeoutHint?: string;
|
||||
finalResponseAt?: unknown;
|
||||
attempt: number | null;
|
||||
updatedAt: string | null;
|
||||
finishedAt?: string | null;
|
||||
@@ -1983,10 +1993,31 @@ function supervisorLastMessage(summaryLastAssistant: unknown, maxChars: number):
|
||||
};
|
||||
}
|
||||
|
||||
const awaitingTerminalJudgeHint = "finalResponse is visible while task status is non-terminal; wait for terminal status and judge before closeout.";
|
||||
|
||||
function finalResponseAwaitingTerminalStatus(status: string | null, summaryLastAssistant: unknown): {
|
||||
label: string;
|
||||
state: "awaiting-terminal-or-judge" | "awaiting-judge";
|
||||
finalResponseAt: unknown;
|
||||
} | null {
|
||||
if (status !== "running" && status !== "judging") return null;
|
||||
const record = asRecord(summaryLastAssistant);
|
||||
if (record === null) return null;
|
||||
if (asString(record.source) !== "finalResponse") return null;
|
||||
if (asString(record.text).trim().length === 0) return null;
|
||||
return {
|
||||
label: status === "judging" ? "judging (awaiting judge)" : "running (awaiting terminal/judge)",
|
||||
state: status === "judging" ? "awaiting-judge" : "awaiting-terminal-or-judge",
|
||||
finalResponseAt: record.at ?? null,
|
||||
};
|
||||
}
|
||||
|
||||
function taskWatchEntry(task: Record<string, unknown>, summary: Record<string, unknown> | null): CodexTasksEntry {
|
||||
const taskId = asString(task.id);
|
||||
const summaryCommands = summary === null ? null : asRecord(summary.commands);
|
||||
const summaryLastAssistant = summary?.lastAssistantMessage ?? task.lastAssistantMessage;
|
||||
const status = asString(task.status) || null;
|
||||
const awaitingStatus = finalResponseAwaitingTerminalStatus(status, summaryLastAssistant);
|
||||
const promptPreview = textPreview(asString(task.displayPrompt ?? task.basePrompt ?? task.prompt), 360);
|
||||
const showCommand = typeof summary?.cliHint === "string" && summary.cliHint.length > 0
|
||||
? summary.cliHint
|
||||
@@ -1999,7 +2030,14 @@ function taskWatchEntry(task: Record<string, unknown>, summary: Record<string, u
|
||||
return {
|
||||
taskId,
|
||||
queueId: asString(task.queueId) || null,
|
||||
status: asString(task.status) || null,
|
||||
status,
|
||||
statusLabel: awaitingStatus?.label ?? status,
|
||||
...(awaitingStatus === null ? {} : {
|
||||
awaitingTerminalJudge: true,
|
||||
closeoutState: awaitingStatus.state,
|
||||
closeoutHint: awaitingTerminalJudgeHint,
|
||||
finalResponseAt: awaitingStatus.finalResponseAt,
|
||||
}),
|
||||
currentAttempt: typeof task.currentAttempt === "number" && Number.isFinite(task.currentAttempt) ? task.currentAttempt : null,
|
||||
updatedAt: asString(task.updatedAt) || null,
|
||||
finishedAt: asString(task.finishedAt) || null,
|
||||
@@ -2025,6 +2063,7 @@ function taskSupervisorEntry(task: Record<string, unknown>, summary: Record<stri
|
||||
const taskId = asString(task.id);
|
||||
const summaryLastAssistant = summary?.lastAssistantMessage ?? task.lastAssistantMessage;
|
||||
const status = asString(task.status) || null;
|
||||
const awaitingStatus = finalResponseAwaitingTerminalStatus(status, summaryLastAssistant);
|
||||
const unreadTerminal = taskUnreadTerminal(task);
|
||||
const classification = taskClassification(task, summary);
|
||||
const queuedReason = compactQueuedReason(task.queuedReason);
|
||||
@@ -2034,6 +2073,13 @@ function taskSupervisorEntry(task: Record<string, unknown>, summary: Record<stri
|
||||
id: taskId,
|
||||
queue: asString(task.queueId) || null,
|
||||
status,
|
||||
...(awaitingStatus === null ? {} : {
|
||||
statusLabel: awaitingStatus.label,
|
||||
awaitingTerminalJudge: true,
|
||||
closeoutState: awaitingStatus.state,
|
||||
closeoutHint: awaitingTerminalJudgeHint,
|
||||
finalResponseAt: awaitingStatus.finalResponseAt,
|
||||
}),
|
||||
attempt: typeof task.currentAttempt === "number" && Number.isFinite(task.currentAttempt) ? task.currentAttempt : null,
|
||||
updatedAt: asString(task.updatedAt) || null,
|
||||
...(isTerminalTaskStatus(status) ? { finishedAt: asString(task.finishedAt) || null, unreadTerminal } : {}),
|
||||
|
||||
Reference in New Issue
Block a user