fix: preserve live mutating code queue retries

This commit is contained in:
Codex
2026-05-29 23:17:32 +00:00
parent 4326ffe2f5
commit 1445f155b3
3 changed files with 38 additions and 1 deletions
+4
View File
@@ -102,6 +102,10 @@ Code Queue 派单模型按成本、可信度和 blast radius 分层:GPT-5.5/Co
`codex prompt-lint [prompt|--prompt-file path|--prompt-stdin]` 是同一套派单前 guardrail 的本地 dry-run 入口,用于检查 runner prompt 是否声明了 `DEV test class`、是否列出允许的 live mutation、禁止动作和 closeout 字段。它只返回分类、缺失或矛盾项和有界 evidence,不提交任务、不连接 live service、不打印完整 prompt。`codex submit --dry-run``codex steer --dry-run` 会嵌入同一 `promptLint` 结果;`dispatchDisposition=needs-authorization` 时,指挥官必须补齐授权或把 prompt 降到 `read-only` 范围后再派发/steer。
Device Pod 类 DS 派单必须把工具可用性设计进 prompt,而不是靠事后强制纠偏。prompt 应明确唯一 pod、workspace selector、目标工程/target、允许的 live mutation、禁止的 pod/BOOT/生产/密钥/数据库范围和 closeout 字段;文本源码修改默认要求 `hwpod ... workspace apply-patch`,新文件使用 `apply-patch --add-file`,整文件替换使用 `apply-patch --replace-file`,不要优先 `workspace put`。命令入口默认写短别名 `hwpod`,不要写长路径 `node /app/skills/device-pod-cli/scripts/device-pod-cli.mjs`;添加 Keil 源文件、build clean、download、UART/JSON-RPC smoke 也应走 `hwpod`。prompt 中只允许把 `/app/tools/tran.mjs``/app/tools/hwlab-gateway-tran.mjs`、临时 Python/PowerShell/JS 上传脚本列为禁止绕行;如果 DS 仍然需要这些绕行,指挥官应先把缺失能力补进 `device-pod-cli`/`hwpod`,再重置 workspace 让 DS 复测。
Device Pod 类 DS 验收不能只看最终回复。指挥官必须用 `codex task <taskId> --trace` / `codex output <taskId>` 审计实际命令面:确认是否使用 `hwpod`,是否出现长 CLI 路径、`tran.mjs``hwlab-gateway-tran.mjs`、临时脚本上传、`workspace put` 或构建产物 patch/put/delete;同时核对 build job、download job、UART/JSON-RPC 或屏幕/串口等硬件证据。若任务因为模型上游 429/503、transport 断连或 Code Queue continuation 被错误降级而没有进入工具调用,不应把它记作 device-pod-cli 失败样本,应先处理调度/运行面摩擦,再重新派发干净任务。
并发治理按模型和风险一起决定。GPT-5.5 常规并发目标是 5 条 lane;当写入范围互不重叠、heartbeat/trace 健康、完成质量稳定时可以短时提高到 10。MiniMax 只承接简单任务时可以提高到 10,但必须保留指挥官审阅和证据核验。DeepSeek 用于中等复杂度任务,默认按约 5 条 lane 观察质量,再根据成功率和 reviewer 负载逐步调整。并发扩张的前提永远是任务质量和可观测性,而不是模型价格。
模型选择矩阵:
@@ -842,6 +842,10 @@ function applyJudgeSafetyOverrides(task: QueueTask, result: CodexRunResult, judg
return judge;
}
export function applyJudgeSafetyOverridesForTest(task: QueueTask, result: CodexRunResult, judge: JudgeResult): JudgeResult {
return applyJudgeSafetyOverrides(task, result, judge);
}
export async function judgeTask(task: QueueTask, result: CodexRunResult): Promise<JudgeResult> {
if (config().minimaxApiKey.length === 0) {
const judge = applyJudgeSafetyOverrides(task, result, fallbackJudge(result));
@@ -2,7 +2,7 @@
import { minimaxM27Model } from "./code-agent/common";
import { openCodeTransportClosedBeforeTerminal, remoteOpenCodeRunCommandForTest } from "./code-agent/opencode";
import { continuePromptSourceBudgetChars, miniMaxJudgeMessages, parsedContinuePromptForJudge, parseJudgeJson, queueRecoveryRetryPrompt, retryPrompt } from "./judge";
import { applyJudgeSafetyOverridesForTest, continuePromptSourceBudgetChars, miniMaxJudgeMessages, parsedContinuePromptForJudge, parseJudgeJson, queueRecoveryRetryPrompt, retryPrompt } from "./judge";
import { codeQueueEnvironmentHintTitle, injectCodeQueueEnvironmentHint, promptWithCodeQueueEnvironmentHint, userPromptForDisplay } from "./prompts";
import { buildTaskTranscript, safePreview, taskTraceSummaryFixtureResponse, transcriptLineSummaryLines } from "./task-view";
import type { ActiveRunSlotWaiter } from "./code-agent/common";
@@ -265,6 +265,30 @@ async function runReferenceInjectionSelfTest(): Promise<JsonValue> {
...Array.from({ length: 80 }, (_, index) => `验收点 ${index + 1}: 基于当前 thread 上文补齐缺失证据,并在最终 response 中写出真实命令/API/UI 结果。`),
].join("\n");
const explicitRetryPrompt = retryPrompt(retryTask, { decision: "retry", confidence: 1, reason: "Long MiniMax feedback fixture", continuePrompt: explicitLongContinuePrompt, source: "minimax" });
const liveMutatingDevicePodPrompt = [
"DEV test class: live-mutating hardware smoke on G14 DEV device-pod only.",
"",
"Allowed live mutation:",
"- 仅允许通过 `hwpod` 修改 `device-pod-71-freq` 的 `workspace:/projects/01_baseline` 下 firmware 源码和 Keil project membership。",
"",
"Forbidden actions:",
"- 禁止使用 `/app/tools/tran.mjs`,禁止上传临时脚本。",
"",
"Closeout fields:",
"- `DEV test class`、`Allowed mutation actually used`、`Forbidden actions avoided`。",
].join("\n");
const liveMutatingDevicePodTask = testTask("codex_live_mutating_device_pod_retry", liveMutatingDevicePodPrompt, "", [], "2026-05-08T00:32:00.000Z");
const liveMutatingDevicePodRetry = applyJudgeSafetyOverridesForTest(liveMutatingDevicePodTask, {
threadId: "thread_live_mutating_device_pod_retry",
turnId: "turn_live_mutating_device_pod_retry",
finalResponse: "",
terminalStatus: "failed",
terminalError: "unexpected status 503 Service Unavailable",
transportClosedBeforeTerminal: false,
appServerExit: { code: 0, signal: null, stderrTail: "" },
events: [],
}, { decision: "retry", confidence: 0.7, reason: "Codex turn 失败。", continuePrompt: "继续完成原始任务。", source: "fallback" });
const liveMutatingContinuePrompt = liveMutatingDevicePodRetry.continuePrompt ?? "";
let longMiniMaxPromptRejectedAtSource = false;
try {
parsedContinuePromptForJudge({ continuePrompt: `${"x".repeat(continuePromptSourceBudgetChars + 1)}` }, "retry");
@@ -277,6 +301,10 @@ async function runReferenceInjectionSelfTest(): Promise<JsonValue> {
assertReferenceTest(!recoveryAfterDeepReference.includes("Reference Round"), "queue recovery prompt should not re-inject reference rounds");
assertReferenceTest(explicitRetryPrompt === explicitLongContinuePrompt, "explicit continuePrompt should not be tail-truncated");
assertReferenceTest(!explicitRetryPrompt.includes("已截断"), "explicit continuePrompt should not include truncation marker");
assertReferenceTest(liveMutatingContinuePrompt.includes("live-mutating hardware smoke"), "live-mutating retry should preserve the original authorization summary");
assertReferenceTest(liveMutatingContinuePrompt.includes("hwpod"), "live-mutating retry should preserve tool authorization details");
assertReferenceTest(!liveMutatingContinuePrompt.includes("只读 continuation"), "live-mutating retry must not be downgraded to read-only continuation");
assertReferenceTest(!liveMutatingContinuePrompt.includes("不改变运行态、生产态或持久化状态"), "live-mutating retry must not add generic read-only mutation bans");
assertReferenceTest(longMiniMaxPromptRejectedAtSource, "over-budget MiniMax continuePrompt should be rejected for source repair");
return {
ok: true,
@@ -291,6 +319,7 @@ async function runReferenceInjectionSelfTest(): Promise<JsonValue> {
{ name: "retry_prompt_does_not_reinject_reference_graph", ok: true, chars: retryAfterDeepReference.length },
{ name: "queue_recovery_prompt_is_compact", ok: true, chars: recoveryAfterDeepReference.length },
{ name: "explicit_continue_prompt_not_tail_truncated", ok: true, chars: explicitRetryPrompt.length },
{ name: "live_mutating_retry_not_downgraded_to_readonly", ok: true, chars: liveMutatingContinuePrompt.length },
{ name: "over_budget_minimax_continue_prompt_requires_source_repair", ok: true, budgetChars: continuePromptSourceBudgetChars },
],
promptPreview: safePreview(promptC, 1200),