docs: add code queue minimax routing guard

This commit is contained in:
Codex
2026-05-21 01:14:15 +00:00
parent acc03b10f4
commit a43c24dd88
6 changed files with 387 additions and 10 deletions
+1 -1
View File
@@ -46,7 +46,7 @@ UniDesk 是一个以主 server 为统一入口的分布式工作平台;本文
- `bun scripts/cli.ts gh auth status|issue ...|pr list|view|create|comment` / `bun scripts/code-queue-pr-preflight-example.ts`:通过 REST 执行安全 GitHub issue 读写、脱敏 auth/status 诊断、body-file Markdown 写入、#24 指挥简报新增时间线 ClaudeQQ 通知、escape 扫描、只读 cleanup-plan 和 #20 board-audit、PR 创建/评论 dry-run 与 runner PR preflight`gh pr merge` 当前仍结构化拒绝,规则见 `docs/reference/cli.md``docs/reference/code-queue-supervision.md`
- `bun scripts/cli.ts ci install/status/run/publish-backend-core/publish-user-service/run-dev-e2e/logs`:在 D601 原生 k3s 上安装和运行 Tekton CI,支持每 commit 检查、Code Queue 只读性能门禁、`CI.json` catalog 驱动的 backend-core 与 user-service commit-pinned 镜像发布和手动触发的 `origin/master:deploy.json#environments.dev` 临时 namespace e2ecatalog/producer/consumer 分工见 `docs/reference/cicd-standardization.md``run-dev-e2e` 的 Git 控制 runner、短 launcher 和 no-CD 边界见 `docs/reference/dev-ci-runner.md`Tekton 规则见 `docs/reference/ci.md`
- `bun scripts/cli.ts codex deploy <commitId>`:旧 Code Queue 兼容部署入口已禁用,原因是它会绕过受控部署边界直连 D601 部署 Code Queue;规则见 `docs/reference/codex-deploy.md`
- `bun scripts/cli.ts codex submit [prompt] [--prompt-file path|--prompt-stdin] [--queue <id>]` / `codex pr-preflight [--remote]`:前者通过 backend-core 私有代理提交 Code Queue 任务;后者只读检查 D601 scheduler/runner 的 GitHub token、egress 和 PR 能力,PR 型派单前必须使用,规则见 `docs/reference/cli.md`
- `bun scripts/cli.ts codex submit [prompt] [--prompt-file path|--prompt-stdin] [--queue <id>]` / `codex pr-preflight [--remote]`:前者通过 backend-core 私有代理提交 Code Queue 任务`--dry-run` 会给出 MiniMax/GPT/人工路由建议但不改写 payload;后者只读检查 D601 scheduler/runner 的 GitHub token、egress 和 PR 能力,PR 型派单前必须使用,规则见 `docs/reference/cli.md``docs/reference/code-queue-supervision.md`
- `bun scripts/cli.ts codex task <taskId>`:按 Code Queue 任务 ID 查询默认审阅摘要,只返回原始 prompt、最终 response、最后错误和渐进披露命令;需要工具调用、attempt/judge 和详细耗时时显式加 `--detail`
- `bun scripts/cli.ts codex judge <taskId> --attempt <n> [--dry-run]`:按指定 task/attempt 用与队列 worker 相同的上下文构建和 MiniMax judge 调用路径单步复现完成判定;`--dry-run` 只输出 prompt/payload 诊断。
- `bun scripts/cli.ts codex steer <taskId> [prompt|--prompt-file path|--prompt-stdin] [--dry-run]`:通过 Code Queue 私有代理向运行中的 active turn 注入纠偏提示,正式替代底层 `microservice proxy ... /steer` 调用。
+1 -1
View File
@@ -40,7 +40,7 @@ CLI 可以从 `master` 快速演进,但必须兼容 `deploy.json` 固定的 CI
- `ci install|status|run|publish-backend-core|publish-user-service|run-dev-e2e|logs` 管理 D601 原生 k3s 上的 Tekton CI。`run` 手动创建每 commit 检查和 Code Queue 只读性能门禁;`publish-backend-core``publish-user-service` 从 pushed Git commit 构建并发布 `127.0.0.1:5000/unidesk/<service>:<commit>` commit-pinned artifacts,输出 `artifactSummary`(含 `serviceId``sourceCommit``sourceRepo``dockerfile``imageRef``tag``digest``digestRef`),但不部署生产;`run-dev-e2e` 的 Git 控制 runner、短 launcher、host fetch 边界、临时 smoke namespace 和 no-CD 规则只在 `docs/reference/dev-ci-runner.md` 定义;Tekton CI 通用规则见 `docs/reference/ci.md`
- `schedule list|get|runs|run|retry-run|delete|upsert-pgdata-backup` 管理 backend-core 定时任务和运行历史。`schedule list``schedule get``schedule runs --limit N``schedule runs <scheduleId> --limit N` 是只读观察入口;`schedule run``schedule retry-run``schedule delete``schedule upsert-pgdata-backup` 会触发运行或写入配置,生产恢复时必须有明确授权。`schedule runs --limit N` 是全局历史视图,返回 `scope=global``scheduleId=null``schedule runs <scheduleId> --limit N` 是指定 schedule 历史视图,返回 `scope=schedule` 和对应 `scheduleId`。CLI 必须拒绝 `schedule runs 50` 这类纯数字位置参数,并提示使用 `schedule runs --limit 50`,避免把空数组误判成“没有历史 run”。`schedule run <id> --wait-ms N` 触发同一 schedule,并且即使 wait 超时也必须返回 `newRunId``observeCommand``schedule retry-run <failedRunId>` 只接受 failed run,从原 run 反查 `scheduleId` 后重触发同一 schedule,并输出 `originalRunId``scheduleId``newRunId``observeCommand`。当 backend-core 目标容器缺失或只观察到 verify-only 容器时,schedule/microservice 命令必须以非零退出并返回 `failureKind=target-stack-not-running``runnerDisposition=infra-blocked``readOnlyCommands``authorizationRequiredForRecovery`,不得把 Docker 的 `No such container` 当成成功的空历史。
- `codex deploy <commitId>` 是旧 Code Queue 兼容部署入口,已禁用以防止维护通道直连 D601 部署 Code Queue;当前 dev 自动化只做 `ci run-dev-e2e` smoke,不提供 Code Queue CD,详细规则见 `docs/reference/codex-deploy.md`
- `codex submit [prompt] [--prompt-file path|--prompt-stdin] [--queue queueId] [--provider-id id] [--cwd path] [--model model] [--reasoning-effort effort] [--execution-mode mode] [--max-attempts N] [--reference-task-id id] [--dry-run]` 通过 backend-core 私有代理向稳定 `code-queue` 用户服务路径提交任务;prompt 必须且只能来自位置参数、文件或 stdin 之一,`--dry-run` 只返回结构化请求且不实际入队。提交确认和 dry-run 必须返回完整 prompt、字符数和 `truncated=false`,不能套用任务详情的预览截断策略,否则长任务 prompt 无法被人工验收。真实提交会经过本机本地串行化保护和短节流,避免同一指挥端并发 submit 把低内存主机或 `code-queue-mgr` 控制面打抖;返回值会附带 `submitConcurrencyGuard` 说明本次提交的锁与等待信息。backend-core 默认把提交、队列 CRUD、已读状态、历史摘要和轻量 Trace 读取分流到主 server `code-queue-mgr`,由它写入主 PostgreSQLD601 scheduler 只轮询并执行已入库任务。
- `codex submit [prompt] [--prompt-file path|--prompt-stdin] [--queue queueId] [--provider-id id] [--cwd path] [--model model] [--reasoning-effort effort] [--execution-mode mode] [--max-attempts N] [--reference-task-id id] [--dry-run]` 通过 backend-core 私有代理向稳定 `code-queue` 用户服务路径提交任务;prompt 必须且只能来自位置参数、文件或 stdin 之一,`--dry-run` 只返回结构化请求且不实际入队。dry-run 会额外输出 `routingRecommendation`,包含推荐 route、runner、model、风险信号、prompt 自包含/issue 非唯一来源/prod-secret-DB 禁止/证据要求等 guard 状态;该建议只用于指挥官 preflight,不会改写 payload,也不假设生产 MiniMax 可用。提交确认和 dry-run 必须返回完整 prompt、字符数和 `truncated=false`,不能套用任务详情的预览截断策略,否则长任务 prompt 无法被人工验收。真实提交会经过本机本地串行化保护和短节流,避免同一指挥端并发 submit 把低内存主机或 `code-queue-mgr` 控制面打抖;返回值会附带 `submitConcurrencyGuard` 说明本次提交的锁与等待信息。backend-core 默认把提交、队列 CRUD、已读状态、历史摘要和轻量 Trace 读取分流到主 server `code-queue-mgr`,由它写入主 PostgreSQLD601 scheduler 只轮询并执行已入库任务。
- `codex pr-preflight [--remote] [--push-dry-run --push-dry-run-ref refs/heads/probe/<name>] [--issue N] [--full]` 通过稳定 `code-queue` proxy 请求 D601 scheduler `/api/runtime-preflight`,用于 PR 型派单 admission。输出会压缩展示 scheduler/runner 的 token 覆盖、工具、agent port、Git worktree、GitHub egress、repo/issue/PR 只读探测和可选 push dry-run;只报告 `GH_TOKEN`/`GITHUB_TOKEN` 是否存在和来源 key,不打印值。缺少 env token 时顶层 `ok=false``runnerDisposition=infra-blocked``tokenCoverage.missing` 同时列出 `GH_TOKEN``GITHUB_TOKEN`
- `codex task <taskId>` 通过 Code Queue 私有代理按任务 ID 查询结构化审阅摘要;默认只返回任务身份、执行 Provider、工作目录、attempt 计数、原始 prompt、最终 response、最后错误和渐进披露命令,适合指挥官审阅完成未读任务且避免上下文爆炸。需要旧式详细摘要时显式加 `--detail`;需要完整 prompt/response 文本时加 `--full`;需要工具调用、judge、attempt 全量摘要时使用 `--detail --full --tool-limit N`。该摘要读取默认由主 server `code-queue-mgr` 从 PostgreSQL 返回,不依赖 D601 `code-queue-read` Service 可用。
- `codex tasks [--view supervisor|full] [--queue id] [--status succeeded|running|queued|failed|canceled|judging|retry_wait[,..]] [--unread|--unread-only] [--limit N] [--before-id id]` 通过同一私有代理输出渐进式披露视图。默认 `supervisor` 只返回 `running``completedUnread``recentCompleted``queued``executionDiagnostics` 摘要,不嵌入完整 Trace、final response 或全量 overview;每个条目都带 `commands.show``commands.trace``commands.output``commands.read``commands.full``--unread``--unread-only` 的别名,必须只保留未读终态;`--status` 必须真实过滤支持的状态,未知参数或未知状态必须结构化失败,不能静默忽略。需要完整当前页任务简表时显式使用 `--view full``--full`,仍受 `--limit``--before-id` 分页约束。
+35 -8
View File
@@ -43,16 +43,41 @@
## 模型和成本路由
MiniMax/OpenCode runner 的可用性是 Code Queue 的主要能力方向之一。它不是可选实验项,而是控制 GPT-5.5 成本、扩大并发吞吐、让简单任务不占用高成本模型的基础设施能力。
MiniMax/OpenCode runner 的可用性是 Code Queue 的主要能力方向之一。它不是可选实验项,而是控制 GPT-5.5 成本、扩大并发吞吐、让简单任务不占用高成本模型的基础设施能力。但 MiniMax 幻觉和伪造证据风险显著高于 GPT-5.5,因此只能承接低权限、低风险、低 blast radius 且可用外部证据复核的任务。
指挥官派单时应按任务难度、风险和验收成本选择模型:
MiniMax 路由策略的前置条件是 runner live smoke 稳定通过,并且失败能被结构化归类。#30 只证明 dev smoke 已有成功证据;生产 rollout 仍按受控节奏推进,指挥官不能假设 prod MiniMax 已可用。若 MiniMax/OpenCode 本身不可用,相关修复归入 Code Queue 运行态维护线;在修通前,不应用 MiniMax 承担会阻塞主交付链路的任务。
- 低风险、边界清晰、可用文件/命令/commit 直接验收的任务,优先分配给 MiniMax/OpenCode,例如只读调查、文档初稿、表格整理、轻量 CLI contract、局部测试补齐和小范围样板代码
- 中等复杂度、需要跨文件判断但不触碰生产运行态的任务,可以在 MiniMax 可用且已有成功样本后尝试分担;prompt 必须更窄,验收证据必须更具体。
- 高风险任务继续优先使用 GPT-5.5,例如生产部署、凭证和网络变更、Code Queue/backend-core 运行态修复、跨模块架构调整、复杂回滚、以及最终质量裁决。
- MiniMax 任务不得降低验收标准。指挥官必须核验它声称读取的文件、执行的命令、远端 commit、测试输出和 live 证据;对推测、遗漏、伪造或把失败写成成功的情况,应拆小重派或改用 GPT-5.5。
`codex submit --dry-run` 是派单前的轻量 preflight。它只输出 `routingRecommendation`,帮助指挥官看到推荐 runner/model、风险信号和缺失的 prompt guard;它不会修改真实提交 payload,也不会替代指挥官判断。真实派单是否使用 `--model minimax-m2.7` 仍由指挥官显式决定
MiniMax 路由策略的前置条件是 runner live smoke 稳定通过,并且失败能被结构化归类。若 MiniMax/OpenCode 本身不可用,相关修复归入 Code Queue 运行态维护线;在修通前,不应用 MiniMax 承担会阻塞主交付链路的任务。
MiniMax/OpenCode 可承担任务必须同时满足这些条件:
| 类型 | 适用边界 | 必须证据 | 不可越界 |
| --- | --- | --- | --- |
| 只读调查 | 查找文件、梳理现有实现、列出候选入口、对比文档口径 | 文件路径、行号、命令输出摘要 | 不得把 issue 内容当唯一输入,不得声称读取了无法验证的远端状态 |
| 中文文档初稿或长期参考补丁 | `docs/reference/``AGENTS.md` 索引、一句话摘要、表格治理规则 | diff、文档位置、轻量格式/grep 检查 | 不写流水账,不改 release/v1 runtime |
| 轻量 CLI dry-run/preflight | 只读或 dry-run 输出、contract test、参数校验、错误分类 | dry-run JSON、contract test、`bun` 脚本验证 | 不改 runtime 调度核心,不触碰生产服务 |
| 局部测试补齐 | 单文件或小范围 contract/unit test,覆盖明确回归 | 测试命令、失败前提、通过输出 | 不跑 heavy check、E2E、Playwright |
| 小范围样板代码 | 非共享核心、可快速 review、可用类型检查或 dry-run 验证 | 修改文件、轻量类型/脚本验证、commit | 不跨多服务,不改凭证、网络、部署或数据库 |
| 数据整理和看板候选草案 | 生成 #20/#24 更新草案、任务表、验收 checklist | 草案 diff、来源列表、人工待审标记 | 不直接替代指挥官审阅,不自动清空未读任务 |
GPT-5.5/Codex 必须承担这些任务:
| 类型 | 原因 | 验收重点 |
| --- | --- | --- |
| Code Queue/backend-core/provider-gateway/k3sctl-adapter 运行态修复 | 运行面错误会影响调度、观测和恢复路径,MiniMax 误判成本高 | 多信号诊断、轻量 contract、必要时 dev 验证;不隐式 prod rollout |
| 跨模块架构或共享契约调整 | 需要理解系统边界、兼容性和长期演进 | 文档、代码、测试和回滚边界一致 |
| CI/CD、artifact、deploy、release/v1 治理 | 易影响发布真相和稳定维护线 | dry-run/plan、commit-pinned 证据、release governance 一致 |
| 安全、凭证、网络、egress/proxy 变更 | 涉及 secret 泄漏、访问路径和外部服务 | 不输出 token,最小权限,结构化失败 |
| 复杂 bug 修复和最终质量裁决 | 需要辨别不完整证据、伪造、隐藏失败 | trace/output/commit/test 交叉验证 |
| 生产部署方案设计或回滚方案 | 生产 blast radius 高,即使只写方案也需高可信推理 | 明确非目标、授权点、验证和回滚 |
只能由人工或指挥官处理的任务包括:真实生产重启、Code Queue backend 重建、运行中任务 interrupt/cancel、密钥读取或轮换、数据库手工写入、破坏性 Git 操作、强制回滚、把完成未读任务批量标记已读、以及任何需要用户授权的高风险恢复动作。这类任务可以让 worker 起草方案或 checklist,但执行权不下放给 MiniMax,也不应下放给普通 GPT worker,除非用户明确授权并给出边界。
MiniMax prompt 必须自包含:目标、背景、写入范围、禁止动作、验证命令、final response 字段都要写进 promptGitHub issue 只能作为辅助引用,不能作为唯一来源。禁止使用“读取 issue 后按里面做”这类 prompt。若确实需要 issue 内容,指挥官先把关键需求、约束和验收点摘入 prompt,再附 issue URL。
MiniMax 风险控制必须固定包含:禁止 prod/重启/密钥/DB 写入;禁止 release/v1 runtime 修改;禁止 heavy check/E2E/Playwright,除非任务明确改为 GPT-5.5 且用户授权;必须给出可验证证据,包括修改文件、命令、测试输出、commit 和未覆盖风险;完成后保持未读,由指挥官用 `codex task <taskId>` 审阅后再单独 `codex read <taskId>`
对 MiniMax 的验收不能看 final response 自述。指挥官至少核验 diff、commit 是否在声明目标分支可 fetch、轻量验证命令是否真实存在、输出是否和任务范围匹配;遇到推测、遗漏、伪造证据或把失败写成成功时,先拆成更小任务或改派 GPT-5.5。
## GitHub Issue 和 PR 使用
@@ -111,7 +136,7 @@ Runner preflight 优先使用执行面诊断入口:
bun scripts/cli.ts codex pr-preflight --remote --issue 20
```
该命令经 backend-core 稳定 `code-queue` proxy 访问 D601 scheduler 的 `/api/runtime-preflight`,报告 scheduler/runner 环境里的 `GH_TOKEN`/`GITHUB_TOKEN` 覆盖、工具、Git worktree、GitHub egress、repo/issue/PR 只读探测和可选 push dry-run。缺少 env token 时必须返回 `ok=false``runnerDisposition=infra-blocked``tokenCoverage.missing=["GH_TOKEN","GITHUB_TOKEN"]`,因为 provider dev container 只能转发 scheduler 已经拥有的 token。
该命令经 backend-core 稳定 `code-queue` proxy 访问 D601 scheduler 的 `/api/runtime-preflight`,报告 scheduler/runner 环境里的 `GH_TOKEN`/`GITHUB_TOKEN` 覆盖、工具、Git worktree、GitHub egress、repo/issue/PR 只读探测和可选 push dry-run。需要复核 PR body/创建命令 guard 时追加 `--pr-create-dry-run --pr-create-dry-run-head <head>`;该 guard 只执行 dry-run,不创建 PR。缺少 env token 时必须返回 `ok=false``runnerDisposition=infra-blocked``tokenCoverage.missing=["GH_TOKEN","GITHUB_TOKEN"]`,因为 provider dev container 只能转发 scheduler 已经拥有的 token。
本地 runner preflight 示例:
@@ -182,6 +207,8 @@ D601 artifact registry 的 systemd unit inactive 不等于 D601 全局离线。
每次新增 Code Queue 任务、补发 follow-up task,或处理一批完成未读任务后,都必须同步更新 GitHub 总看板 issue `#20` 的正文主表;如果发生实质态势变化,还要同步更新指挥简报 issue `#24` 的正文。看板更新应反映当前任务分布、关键 blocker 和粗略进度,不要只改聊天上下文或只改单个 issue,而让总态势图落后于实际调度状态。
MiniMax 成本路由接入 #20 看板时,#20 的每个 Code Queue 任务行都应显式保留推荐或实际 runner/model、风险等级、验证证据和审阅状态。推荐列的语义只来自指挥官判断或 `codex submit --dry-run``routingRecommendation`,不能来自 worker 自评。MiniMax 任务在 #20 中必须先保持“待指挥官审阅”状态;只有指挥官核验 diff、commit、轻量验证和未越界后,才更新为已验收并执行 `codex read <taskId>`。若 dry-run 给出 `commander-human-only` 或 GPT-5.5 recommendation#20 不应把该任务降级给 MiniMax;若必须降级,必须在看板 progress 中写明人工接受的风险和额外审阅动作。
## 指挥工作流
对每个活跃任务,按顺序评估四件事:
@@ -0,0 +1,71 @@
import { codexSubmitRoutingRecommendationForTest } from "./src/code-queue";
type JsonRecord = Record<string, unknown>;
function assertCondition(condition: unknown, message: string, detail: unknown = {}): void {
if (!condition) throw new Error(`${message}: ${JSON.stringify(detail)}`);
}
function asRecord(value: unknown): JsonRecord {
assertCondition(typeof value === "object" && value !== null && !Array.isArray(value), "expected JSON object", { value });
return value as JsonRecord;
}
const lowRiskPrompt = `
目标:更新 docs/reference/code-queue-supervision.md 中的 MiniMax 派单规则。
范围:只改中文长期文档和一个轻量 dry-run contract test,不触碰 runtime 调度核心。
禁止:不要重启服务,不要读取密钥,不要写数据库,不要部署 prod。
验证:运行 bun scripts/code-queue-submit-routing-contract-test.ts,并在 final response 给出验证证据、commit 和风险。
背景:本 prompt 是完整需求来源,GitHub issue 只能作为辅助引用,不能作为唯一来源。需要 dry-run/preflight 输出帮助指挥官判断 runner/model。请保持改动低风险、可审阅、可回滚,并让指挥官完成后审阅未读任务。
`;
const runtimePrompt = `
目标:修复 Code Queue runtime scheduler 的 active run 状态机。
范围:src/components/microservices/code-queue/src/index.ts 和 runtime-preflight。
禁止:不要部署 prod。
验证:需要证明 scheduler heartbeat、active run、OpenCode session recovery 都正确。
`;
const commanderOnlyPrompt = `
目标:在 production 上 deploy apply 并 restart code-queue,必要时读取 secret token 和写 PostgreSQL 修复任务状态。
验证:live health。
`;
export function runCodeQueueSubmitRoutingContract(): JsonRecord {
const lowRisk = codexSubmitRoutingRecommendationForTest(lowRiskPrompt);
assertCondition(lowRisk.route === "minimax-opencode", "low-risk self-contained prompt should be a MiniMax candidate", lowRisk);
assertCondition(lowRisk.recommendedRunner === "opencode", "MiniMax candidate should recommend OpenCode", lowRisk);
assertCondition(lowRisk.recommendedModel === "minimax-m2.7", "MiniMax candidate should recommend minimax-m2.7", lowRisk);
assertCondition(asRecord(lowRisk.riskControls).promptSelfContained === true, "low-risk prompt should be self-contained", lowRisk);
assertCondition(asRecord(lowRisk.riskControls).issueIsNotOnlySource === true, "issue must not be the only source", lowRisk);
const runtime = codexSubmitRoutingRecommendationForTest(runtimePrompt);
assertCondition(runtime.route === "gpt-5.5-codex", "runtime/core work should stay on GPT-5.5", runtime);
assertCondition(runtime.recommendedRunner === "codex", "runtime/core work should recommend Codex runner", runtime);
assertCondition(runtime.recommendedModel === "gpt-5.5", "runtime/core work should recommend GPT-5.5", runtime);
const commanderOnly = codexSubmitRoutingRecommendationForTest(commanderOnlyPrompt);
assertCondition(commanderOnly.route === "commander-human-only", "prod restart/secrets/DB work should be commander-only", commanderOnly);
assertCondition(commanderOnly.recommendedRunner === "commander", "commander-only work should not recommend a runner", commanderOnly);
assertCondition(commanderOnly.recommendedModel === null, "commander-only work should not recommend a model", commanderOnly);
const explicitGpt = codexSubmitRoutingRecommendationForTest(lowRiskPrompt, "gpt-5.5");
const explicitRequest = asRecord(explicitGpt.explicitRequest);
assertCondition(explicitRequest.runner === "codex", "explicit gpt model should map to Codex", explicitGpt);
assertCondition(String(explicitRequest.note ?? "").includes("differs"), "explicit model mismatch should be visible", explicitGpt);
assertCondition(asRecord(explicitGpt.routingPolicy).doesNotChangeSubmittedPayload === true, "dry-run recommendation must not rewrite payload", explicitGpt);
return {
ok: true,
checks: [
"low-risk self-contained prompts recommend minimax-m2.7/OpenCode",
"runtime/core work recommends GPT-5.5/Codex",
"prod/restart/secret/DB work is commander-only",
"explicit --model mismatch is visible and payload is unchanged",
],
};
}
if (import.meta.main) {
process.stdout.write(`${JSON.stringify(runCodeQueueSubmitRoutingContract(), null, 2)}\n`);
}
+3
View File
@@ -282,6 +282,7 @@ export function runChecks(config: UniDeskConfig, options: CheckOptions = default
fileItem("scripts/src/code-queue-liveness-fixtures.ts"),
fileItem("scripts/code-queue-trace-summary-contract-test.ts"),
fileItem("scripts/code-queue-pr-preflight-contract-test.ts"),
fileItem("scripts/code-queue-submit-routing-contract-test.ts"),
fileItem("scripts/src/ci.ts"),
fileItem("scripts/src/e2e.ts"),
fileItem("scripts/deploy-artifact-matrix-contract-test.ts"),
@@ -303,6 +304,7 @@ export function runChecks(config: UniDeskConfig, options: CheckOptions = default
items.push(commandItem("code-queue:issue3-diagnostics-and-image-preflight", ["bun", "scripts/code-queue-issue3-regression-test.ts"], 30_000));
items.push(commandItem("code-queue:trace-summary-contract", ["bun", "scripts/code-queue-trace-summary-contract-test.ts"], 30_000));
items.push(commandItem("code-queue:pr-preflight-contract", ["bun", "scripts/code-queue-pr-preflight-contract-test.ts"], 30_000));
items.push(commandItem("code-queue:submit-routing-contract", ["bun", "scripts/code-queue-submit-routing-contract-test.ts"], 30_000));
items.push(commandItem("deploy:artifact-matrix-contract", ["bun", "scripts/deploy-artifact-matrix-contract-test.ts"], 30_000));
items.push(commandItem("code-queue:active-run-heartbeat-visible", ["bun", "scripts/code-queue-liveness-diagnostics-test.ts", "--only", "code-queue:active-run-heartbeat-visible"], 30_000));
items.push(commandItem("code-queue:trace-gap-not-stale", ["bun", "scripts/code-queue-liveness-diagnostics-test.ts", "--only", "code-queue:trace-gap-not-stale"], 30_000));
@@ -320,6 +322,7 @@ export function runChecks(config: UniDeskConfig, options: CheckOptions = default
items.push(skippedItem("code-queue:issue3-diagnostics-and-image-preflight", "Code Queue issue #3 regression fixtures are opt-in with script checks", "--scripts-typecheck or --full"));
items.push(skippedItem("code-queue:trace-summary-contract", "Code Queue trace summary contract is opt-in with script checks", "--scripts-typecheck or --full"));
items.push(skippedItem("code-queue:pr-preflight-contract", "Code Queue PR preflight contract is opt-in with script checks", "--scripts-typecheck or --full"));
items.push(skippedItem("code-queue:submit-routing-contract", "Code Queue submit routing contract is opt-in with script checks", "--scripts-typecheck or --full"));
items.push(skippedItem("deploy:artifact-matrix-contract", "deploy artifact matrix contract is opt-in with script checks", "--scripts-typecheck or --full"));
items.push(skippedItem("code-queue:liveness-diagnostics-fixtures", "Code Queue liveness diagnostics fixtures are opt-in with script checks", "--scripts-typecheck or --full"));
items.push(skippedItem("baidu-netdisk:artifact-guard-contract", "Baidu Netdisk artifact guard contract is opt-in with script checks", "--scripts-typecheck or --full"));
+276
View File
@@ -11,6 +11,8 @@ const defaultTextPreviewChars = 12_000;
const defaultTasksLimit = 20;
const maxTasksLimit = 100;
const steerPromptPreviewChars = 320;
const minimaxSubmitModel = "minimax-m2.7";
const gptSubmitModel = "gpt-5.5";
const submitLockWaitMs = 60_000;
const submitLockPollMs = 250;
const submitLockStaleMs = 120_000;
@@ -56,6 +58,43 @@ interface CodexSubmitOptions {
dryRun: boolean;
}
type SubmitRoute = "minimax-opencode" | "gpt-5.5-codex" | "commander-human-only";
type SubmitRouteSignalSeverity = "info" | "warning" | "block";
interface SubmitRouteSignal {
id: string;
severity: SubmitRouteSignalSeverity;
matched: boolean;
evidence: string[];
message: string;
}
interface SubmitRoutingRecommendation {
route: SubmitRoute;
recommendedRunner: "opencode" | "codex" | "commander";
recommendedModel: string | null;
confidence: "medium" | "high";
reason: string;
signals: SubmitRouteSignal[];
riskControls: {
promptSelfContained: boolean;
issueIsNotOnlySource: boolean;
noProdRestartSecretOrDbWrite: boolean;
evidenceRequiredByPrompt: boolean;
commanderMustReviewUnread: true;
};
explicitRequest: {
model: string | null;
runner: "opencode" | "codex" | null;
note: string | null;
};
routingPolicy: {
dryRunOnly: true;
doesNotChangeSubmittedPayload: true;
prodMiniMaxAssumedAvailable: false;
};
}
interface CodexSteerOptions {
prompt: string;
dryRun: boolean;
@@ -532,6 +571,222 @@ function compactQueuedReason(value: unknown): Record<string, unknown> | null {
};
}
function normalizeSubmitModel(value: string | null | undefined): string {
const raw = String(value ?? "").trim();
if (raw.length === 0) return raw;
const lower = raw.toLowerCase();
const leaf = lower.includes("/") ? lower.split("/").at(-1) ?? lower : lower;
if (leaf === minimaxSubmitModel || leaf === "m2.7") return minimaxSubmitModel;
return raw;
}
function submitRunnerForModel(model: string | null | undefined): "opencode" | "codex" | null {
const normalized = normalizeSubmitModel(model);
if (normalized.length === 0) return null;
return normalized === minimaxSubmitModel ? "opencode" : "codex";
}
function regexEvidence(text: string, patterns: RegExp[], limit = 6): string[] {
const evidence: string[] = [];
for (const pattern of patterns) {
for (const match of text.matchAll(pattern)) {
const value = String(match[0] ?? "").trim().replace(/\s+/gu, " ");
if (value.length > 0 && !evidence.includes(value)) evidence.push(value);
if (evidence.length >= limit) return evidence;
}
}
return evidence;
}
function regexEvidenceWithoutNegatedContext(text: string, patterns: RegExp[], limit = 6): string[] {
const evidence: string[] = [];
for (const pattern of patterns) {
for (const match of text.matchAll(pattern)) {
const index = match.index ?? 0;
const context = text.slice(Math.max(0, index - 36), Math.min(text.length, index + String(match[0] ?? "").length + 36));
if (/(?:|||||||avoid|forbid|forbidden|do not|don't|must not|no\s+)/iu.test(context)) continue;
const value = String(match[0] ?? "").trim().replace(/\s+/gu, " ");
if (value.length > 0 && !evidence.includes(value)) evidence.push(value);
if (evidence.length >= limit) return evidence;
}
}
return evidence;
}
function routeSignal(id: string, severity: SubmitRouteSignalSeverity, evidence: string[], message: string): SubmitRouteSignal {
return { id, severity, matched: evidence.length > 0, evidence, message };
}
function submitRoutingRecommendation(options: CodexSubmitOptions): SubmitRoutingRecommendation {
const prompt = options.prompt;
const lower = prompt.toLowerCase();
const prodOrStateMutation = regexEvidenceWithoutNegatedContext(lower, [
/\bprod(?:uction)?\b/gu,
/\brestart(?:ing|ed)?\b/gu,
/\brebuild(?:ing|ed)?\b/gu,
/\bdeploy(?:ment|ing|ed)?\b/gu,
/\bserver\s+rebuild\b/gu,
/\bdeploy\s+apply\b/gu,
/\binterrupt\b|\bcancel\b/gu,
/\bsecret\b|\btoken\b|\bapi[_-]?key\b|\bcredential\b/gu,
/\bpostgres(?:ql)?\b|\bpsql\b|\bdatabase\b|\bdb\b|\bsql\b|\bmigration\b/gu,
]);
const runtimeCore = regexEvidence(lower, [
/\bcode[- ]queue\s+(?:runtime|scheduler|backend|execution|runner)\b/gu,
/\bbackend-core\b/gu,
/\bprovider-gateway\b/gu,
/\bk3sctl-adapter\b/gu,
/\bruntime-preflight\b/gu,
/\bactive\s+run\b/gu,
]);
const issueReference = regexEvidence(lower, [
/\bgithub\s+issue\b/gu,
/\bissue\s+#?\d+\b/gu,
/#\d+/gu,
/\bgh\s+issue\s+view\b/gu,
]);
const issueOnly = regexEvidence(lower, [
/\bread\s+(?:the\s+)?issue\b/gu,
/\bsee\s+(?:github\s+)?issue\b/gu,
/\bfrom\s+issue\s+#?\d+\b/gu,
/\bissue\s+(?:has|contains)\s+(?:the\s+)?(?:full|complete)\s+(?:context|requirements?)\b/gu,
/\s*(?:github\s*)?issue/gu,
/\s*(?:github\s*)?issue/gu,
]);
const issueAuxiliaryGuard = regexEvidence(prompt, [
/issue[^\n]*/giu,
/issue[^\n]*/giu,
/issue[^\n]*not[^\n]*only[^\n]*source/giu,
/issue[^\n]*auxiliary[^\n]*reference/giu,
]);
const lowRiskEvidence = regexEvidence(lower, [
/\bdry-run\b/gu,
/\bpreflight\b/gu,
/\bcontract\s+test\b/gu,
/\btypecheck\b/gu,
/\bdocs?\b|\bdocumentation\b/gu,
/\bread[- ]?only\b/gu,
//gu,
//gu,
//gu,
]);
const evidenceRequest = regexEvidence(lower, [
/\bevidence\b/gu,
/\bverification\b|\bverified\b|\bvalidate\b|\bvalidation\b/gu,
/\btest(?:s|ed|ing)?\b/gu,
/\bdry-run\b/gu,
/\bcommit\b/gu,
//gu,
//gu,
//gu,
]);
const selfContainedHints = regexEvidence(prompt, [
/[:]/gu,
/[:]/gu,
/[:]/gu,
/[:]/gu,
/final response/giu,
//gu,
/ prompt/gu,
]);
const destructiveWords = regexEvidenceWithoutNegatedContext(lower, [
/\brm\s+-rf\b/gu,
/\bgit\s+reset\s+--hard\b/gu,
/\bgit\s+checkout\s+--\b/gu,
/\bdrop\s+table\b/gu,
/\btruncate\s+table\b/gu,
/\bdelete\s+from\b/gu,
]);
const crossModule = regexEvidence(lower, [
/\bcross[- ]module\b/gu,
/\barchitecture\b|\barchitectural\b/gu,
/\brelease\/v1\b/gu,
/\bci\/cd\b/gu,
/\brollout\b|\brollback\b/gu,
//gu,
//gu,
/|/gu,
]);
const model = normalizeSubmitModel(options.model);
const explicitRunner = submitRunnerForModel(model);
const promptSelfContained = prompt.length >= 700 || selfContainedHints.length >= 3;
const issueIsNotOnlySource = issueReference.length === 0 || issueAuxiliaryGuard.length > 0 || issueOnly.length === 0 && prompt.length >= 500;
const noProdRestartSecretOrDbWrite = prodOrStateMutation.length === 0 && destructiveWords.length === 0;
const evidenceRequiredByPrompt = evidenceRequest.length > 0;
const signals = [
routeSignal("prod-state-secret-db-write", "block", [...prodOrStateMutation, ...destructiveWords], "Mentions production/state mutation, restart, secrets, DB writes, or destructive commands."),
routeSignal("runtime-core", "warning", runtimeCore, "Touches Code Queue runtime, backend-core, provider-gateway, k3s adapter, or active run behavior."),
routeSignal("issue-source-risk", "warning", issueOnly, "Prompt appears to rely on GitHub issue reading as task context."),
routeSignal("issue-auxiliary-source-guard", "info", issueAuxiliaryGuard, "Prompt explicitly says GitHub issue is auxiliary and not the only source."),
routeSignal("cross-module-release", "warning", crossModule, "Mentions cross-module architecture, CI/CD rollout, release line, or rollback work."),
routeSignal("low-risk-verifiable", "info", lowRiskEvidence, "Mentions low-risk or verifiable work such as docs, read-only checks, dry-run, preflight, or contract tests."),
routeSignal("evidence-requested", "info", evidenceRequest, "Prompt asks for tests, validation, commit, or evidence."),
routeSignal("self-contained-hints", "info", selfContainedHints, "Prompt includes explicit task sections that make it easier to verify without reading an issue."),
];
let route: SubmitRoute = "gpt-5.5-codex";
let recommendedRunner: SubmitRoutingRecommendation["recommendedRunner"] = "codex";
let recommendedModel: string | null = gptSubmitModel;
let confidence: SubmitRoutingRecommendation["confidence"] = "medium";
let reason = "Default to GPT-5.5 when the prompt is not clearly low-risk and self-contained.";
if (prodOrStateMutation.length > 0 || destructiveWords.length > 0) {
route = "commander-human-only";
recommendedRunner = "commander";
recommendedModel = null;
confidence = "high";
reason = "This task mentions production/state mutation, restart, secrets, DB writes, or destructive operations; keep it with the commander or a human.";
} else if (runtimeCore.length > 0 || crossModule.length > 0) {
route = "gpt-5.5-codex";
confidence = "high";
reason = "This task touches runtime/core/cross-module or release-governance surfaces, so it should stay on GPT-5.5.";
} else if (promptSelfContained && issueIsNotOnlySource && evidenceRequiredByPrompt && lowRiskEvidence.length > 0) {
route = "minimax-opencode";
recommendedRunner = "opencode";
recommendedModel = minimaxSubmitModel;
confidence = "high";
reason = "The prompt looks self-contained, low-risk, and asks for verifiable evidence; it is a MiniMax/OpenCode candidate if the runner smoke is currently green.";
} else if (lowRiskEvidence.length > 0 && issueIsNotOnlySource && noProdRestartSecretOrDbWrite) {
route = "minimax-opencode";
recommendedRunner = "opencode";
recommendedModel = minimaxSubmitModel;
confidence = "medium";
reason = "The prompt has low-risk signals, but the commander should tighten self-contained context and evidence requirements before relying on MiniMax.";
}
const explicitNote = model.length === 0
? null
: explicitRunner === recommendedRunner
? "Explicit --model matches the dry-run recommendation."
: "Explicit --model differs from the dry-run recommendation; this dry-run does not rewrite the payload.";
return {
route,
recommendedRunner,
recommendedModel,
confidence,
reason,
signals,
riskControls: {
promptSelfContained,
issueIsNotOnlySource,
noProdRestartSecretOrDbWrite,
evidenceRequiredByPrompt,
commanderMustReviewUnread: true,
},
explicitRequest: {
model: model.length === 0 ? null : model,
runner: explicitRunner,
note: explicitNote,
},
routingPolicy: {
dryRunOnly: true,
doesNotChangeSubmittedPayload: true,
prodMiniMaxAssumedAvailable: false,
},
};
}
function compactSchedulerHeartbeat(value: unknown): Record<string, unknown> | null {
const record = asRecord(value);
if (record === null) return null;
@@ -2149,6 +2404,21 @@ export function codexPrPreflightQueryForTest(optionArgs: string[], fetcher: Code
return codeQueuePrPreflight(optionArgs, fetcher);
}
export function codexSubmitRoutingRecommendationForTest(prompt: string, model?: string): SubmitRoutingRecommendation {
return submitRoutingRecommendation({
prompt,
model,
queueId: undefined,
providerId: undefined,
cwd: undefined,
reasoningEffort: undefined,
executionMode: undefined,
maxAttempts: undefined,
referenceTaskIds: [],
dryRun: true,
});
}
function codexSubmitTask(args: string[]): unknown {
const options = parseSubmitOptions(args);
const payload = submitPayload(options);
@@ -2156,10 +2426,16 @@ function codexSubmitTask(args: string[]): unknown {
return {
ok: true,
dryRun: true,
routingRecommendation: submitRoutingRecommendation(options),
request: {
...payload,
prompt: textView(options.prompt, true, 3000),
},
commands: {
submitAsRequested: "remove --dry-run to submit exactly this payload",
minimaxCandidate: `bun scripts/cli.ts codex submit --prompt-file <path> --model ${minimaxSubmitModel} --dry-run`,
gptCandidate: `bun scripts/cli.ts codex submit --prompt-file <path> --model ${gptSubmitModel} --dry-run`,
},
};
}
const locked = runWithSubmitLock(() => unwrapCodexResponse(coreInternalFetch(codeQueueProxyPath("/api/tasks"), { method: "POST", body: payload })));