fix: preserve live mutating code queue retries

2026-05-29 23:17:32 +00:00
parent 4326ffe2f5
commit 1445f155b3
3 changed files with 38 additions and 1 deletions
@@ -102,6 +102,10 @@ Code Queue 派单模型按成本、可信度和 blast radius 分层：GPT-5.5/Co

 `codex prompt-lint [prompt|--prompt-file path|--prompt-stdin]` 是同一套派单前 guardrail 的本地 dry-run 入口，用于检查 runner prompt 是否声明了 `DEV test class`、是否列出允许的 live mutation、禁止动作和 closeout 字段。它只返回分类、缺失或矛盾项和有界 evidence，不提交任务、不连接 live service、不打印完整 prompt。`codex submit --dry-run` 和 `codex steer --dry-run` 会嵌入同一 `promptLint` 结果；`dispatchDisposition=needs-authorization` 时，指挥官必须补齐授权或把 prompt 降到 `read-only` 范围后再派发/steer。

+Device Pod 类 DS 派单必须把工具可用性设计进 prompt，而不是靠事后强制纠偏。prompt 应明确唯一 pod、workspace selector、目标工程/target、允许的 live mutation、禁止的 pod/BOOT/生产/密钥/数据库范围和 closeout 字段；文本源码修改默认要求 `hwpod ... workspace apply-patch`，新文件使用 `apply-patch --add-file`，整文件替换使用 `apply-patch --replace-file`，不要优先 `workspace put`。命令入口默认写短别名 `hwpod`，不要写长路径 `node /app/skills/device-pod-cli/scripts/device-pod-cli.mjs`；添加 Keil 源文件、build clean、download、UART/JSON-RPC smoke 也应走 `hwpod`。prompt 中只允许把 `/app/tools/tran.mjs`、`/app/tools/hwlab-gateway-tran.mjs`、临时 Python/PowerShell/JS 上传脚本列为禁止绕行；如果 DS 仍然需要这些绕行，指挥官应先把缺失能力补进 `device-pod-cli`/`hwpod`，再重置 workspace 让 DS 复测。
+
+Device Pod 类 DS 验收不能只看最终回复。指挥官必须用 `codex task <taskId> --trace` / `codex output <taskId>` 审计实际命令面：确认是否使用 `hwpod`，是否出现长 CLI 路径、`tran.mjs`、`hwlab-gateway-tran.mjs`、临时脚本上传、`workspace put` 或构建产物 patch/put/delete；同时核对 build job、download job、UART/JSON-RPC 或屏幕/串口等硬件证据。若任务因为模型上游 429/503、transport 断连或 Code Queue continuation 被错误降级而没有进入工具调用，不应把它记作 device-pod-cli 失败样本，应先处理调度/运行面摩擦，再重新派发干净任务。
+
 并发治理按模型和风险一起决定。GPT-5.5 常规并发目标是 5 条 lane；当写入范围互不重叠、heartbeat/trace 健康、完成质量稳定时可以短时提高到 10。MiniMax 只承接简单任务时可以提高到 10，但必须保留指挥官审阅和证据核验。DeepSeek 用于中等复杂度任务，默认按约 5 条 lane 观察质量，再根据成功率和 reviewer 负载逐步调整。并发扩张的前提永远是任务质量和可观测性，而不是模型价格。

 模型选择矩阵：
@@ -842,6 +842,10 @@ function applyJudgeSafetyOverrides(task: QueueTask, result: CodexRunResult, judg
  return judge;
 }

+export function applyJudgeSafetyOverridesForTest(task: QueueTask, result: CodexRunResult, judge: JudgeResult): JudgeResult {
+  return applyJudgeSafetyOverrides(task, result, judge);
+}
+
 export async function judgeTask(task: QueueTask, result: CodexRunResult): Promise<JudgeResult> {
  if (config().minimaxApiKey.length === 0) {
    const judge = applyJudgeSafetyOverrides(task, result, fallbackJudge(result));
@@ -2,7 +2,7 @@

 import { minimaxM27Model } from "./code-agent/common";
 import { openCodeTransportClosedBeforeTerminal, remoteOpenCodeRunCommandForTest } from "./code-agent/opencode";
-import { continuePromptSourceBudgetChars, miniMaxJudgeMessages, parsedContinuePromptForJudge, parseJudgeJson, queueRecoveryRetryPrompt, retryPrompt } from "./judge";
+import { applyJudgeSafetyOverridesForTest, continuePromptSourceBudgetChars, miniMaxJudgeMessages, parsedContinuePromptForJudge, parseJudgeJson, queueRecoveryRetryPrompt, retryPrompt } from "./judge";
 import { codeQueueEnvironmentHintTitle, injectCodeQueueEnvironmentHint, promptWithCodeQueueEnvironmentHint, userPromptForDisplay } from "./prompts";
 import { buildTaskTranscript, safePreview, taskTraceSummaryFixtureResponse, transcriptLineSummaryLines } from "./task-view";
 import type { ActiveRunSlotWaiter } from "./code-agent/common";
@@ -265,6 +265,30 @@ async function runReferenceInjectionSelfTest(): Promise<JsonValue> {
    ...Array.from({ length: 80 }, (_, index) => `验收点 ${index + 1}: 基于当前 thread 上文补齐缺失证据，并在最终 response 中写出真实命令/API/UI 结果。`),
  ].join("\n");
  const explicitRetryPrompt = retryPrompt(retryTask, { decision: "retry", confidence: 1, reason: "Long MiniMax feedback fixture", continuePrompt: explicitLongContinuePrompt, source: "minimax" });
+  const liveMutatingDevicePodPrompt = [
+    "DEV test class: live-mutating hardware smoke on G14 DEV device-pod only.",
+    "",
+    "Allowed live mutation:",
+    "- 仅允许通过 `hwpod` 修改 `device-pod-71-freq` 的 `workspace:/projects/01_baseline` 下 firmware 源码和 Keil project membership。",
+    "",
+    "Forbidden actions:",
+    "- 禁止使用 `/app/tools/tran.mjs`，禁止上传临时脚本。",
+    "",
+    "Closeout fields:",
+    "- `DEV test class`、`Allowed mutation actually used`、`Forbidden actions avoided`。",
+  ].join("\n");
+  const liveMutatingDevicePodTask = testTask("codex_live_mutating_device_pod_retry", liveMutatingDevicePodPrompt, "", [], "2026-05-08T00:32:00.000Z");
+  const liveMutatingDevicePodRetry = applyJudgeSafetyOverridesForTest(liveMutatingDevicePodTask, {
+    threadId: "thread_live_mutating_device_pod_retry",
+    turnId: "turn_live_mutating_device_pod_retry",
+    finalResponse: "",
+    terminalStatus: "failed",
+    terminalError: "unexpected status 503 Service Unavailable",
+    transportClosedBeforeTerminal: false,
+    appServerExit: { code: 0, signal: null, stderrTail: "" },
+    events: [],
+  }, { decision: "retry", confidence: 0.7, reason: "Codex turn 失败。", continuePrompt: "继续完成原始任务。", source: "fallback" });
+  const liveMutatingContinuePrompt = liveMutatingDevicePodRetry.continuePrompt ?? "";
  let longMiniMaxPromptRejectedAtSource = false;
  try {
    parsedContinuePromptForJudge({ continuePrompt: `${"x".repeat(continuePromptSourceBudgetChars + 1)}` }, "retry");
@@ -277,6 +301,10 @@ async function runReferenceInjectionSelfTest(): Promise<JsonValue> {
  assertReferenceTest(!recoveryAfterDeepReference.includes("Reference Round"), "queue recovery prompt should not re-inject reference rounds");
  assertReferenceTest(explicitRetryPrompt === explicitLongContinuePrompt, "explicit continuePrompt should not be tail-truncated");
  assertReferenceTest(!explicitRetryPrompt.includes("已截断"), "explicit continuePrompt should not include truncation marker");
+  assertReferenceTest(liveMutatingContinuePrompt.includes("live-mutating hardware smoke"), "live-mutating retry should preserve the original authorization summary");
+  assertReferenceTest(liveMutatingContinuePrompt.includes("hwpod"), "live-mutating retry should preserve tool authorization details");
+  assertReferenceTest(!liveMutatingContinuePrompt.includes("只读 continuation"), "live-mutating retry must not be downgraded to read-only continuation");
+  assertReferenceTest(!liveMutatingContinuePrompt.includes("不改变运行态、生产态或持久化状态"), "live-mutating retry must not add generic read-only mutation bans");
  assertReferenceTest(longMiniMaxPromptRejectedAtSource, "over-budget MiniMax continuePrompt should be rejected for source repair");
  return {
    ok: true,
@@ -291,6 +319,7 @@ async function runReferenceInjectionSelfTest(): Promise<JsonValue> {
      { name: "retry_prompt_does_not_reinject_reference_graph", ok: true, chars: retryAfterDeepReference.length },
      { name: "queue_recovery_prompt_is_compact", ok: true, chars: recoveryAfterDeepReference.length },
      { name: "explicit_continue_prompt_not_tail_truncated", ok: true, chars: explicitRetryPrompt.length },
+      { name: "live_mutating_retry_not_downgraded_to_readonly", ok: true, chars: liveMutatingContinuePrompt.length },
      { name: "over_budget_minimax_continue_prompt_requires_source_repair", ok: true, budgetChars: continuePromptSourceBudgetChars },
    ],
    promptPreview: safePreview(promptC, 1200),