docs: add code queue minimax routing guard
This commit is contained in:
@@ -46,7 +46,7 @@ UniDesk 是一个以主 server 为统一入口的分布式工作平台;本文
|
||||
- `bun scripts/cli.ts gh auth status|issue ...|pr list|view|create|comment` / `bun scripts/code-queue-pr-preflight-example.ts`:通过 REST 执行安全 GitHub issue 读写、脱敏 auth/status 诊断、body-file Markdown 写入、#24 指挥简报新增时间线 ClaudeQQ 通知、escape 扫描、只读 cleanup-plan 和 #20 board-audit、PR 创建/评论 dry-run 与 runner PR preflight;`gh pr merge` 当前仍结构化拒绝,规则见 `docs/reference/cli.md` 和 `docs/reference/code-queue-supervision.md`。
|
||||
- `bun scripts/cli.ts ci install/status/run/publish-backend-core/publish-user-service/run-dev-e2e/logs`:在 D601 原生 k3s 上安装和运行 Tekton CI,支持每 commit 检查、Code Queue 只读性能门禁、`CI.json` catalog 驱动的 backend-core 与 user-service commit-pinned 镜像发布和手动触发的 `origin/master:deploy.json#environments.dev` 临时 namespace e2e;catalog/producer/consumer 分工见 `docs/reference/cicd-standardization.md`,`run-dev-e2e` 的 Git 控制 runner、短 launcher 和 no-CD 边界见 `docs/reference/dev-ci-runner.md`,Tekton 规则见 `docs/reference/ci.md`。
|
||||
- `bun scripts/cli.ts codex deploy <commitId>`:旧 Code Queue 兼容部署入口已禁用,原因是它会绕过受控部署边界直连 D601 部署 Code Queue;规则见 `docs/reference/codex-deploy.md`。
|
||||
- `bun scripts/cli.ts codex submit [prompt] [--prompt-file path|--prompt-stdin] [--queue <id>]` / `codex pr-preflight [--remote]`:前者通过 backend-core 私有代理提交 Code Queue 任务;后者只读检查 D601 scheduler/runner 的 GitHub token、egress 和 PR 能力,PR 型派单前必须使用,规则见 `docs/reference/cli.md`。
|
||||
- `bun scripts/cli.ts codex submit [prompt] [--prompt-file path|--prompt-stdin] [--queue <id>]` / `codex pr-preflight [--remote]`:前者通过 backend-core 私有代理提交 Code Queue 任务,`--dry-run` 会给出 MiniMax/GPT/人工路由建议但不改写 payload;后者只读检查 D601 scheduler/runner 的 GitHub token、egress 和 PR 能力,PR 型派单前必须使用,规则见 `docs/reference/cli.md` 和 `docs/reference/code-queue-supervision.md`。
|
||||
- `bun scripts/cli.ts codex task <taskId>`:按 Code Queue 任务 ID 查询默认审阅摘要,只返回原始 prompt、最终 response、最后错误和渐进披露命令;需要工具调用、attempt/judge 和详细耗时时显式加 `--detail`。
|
||||
- `bun scripts/cli.ts codex judge <taskId> --attempt <n> [--dry-run]`:按指定 task/attempt 用与队列 worker 相同的上下文构建和 MiniMax judge 调用路径单步复现完成判定;`--dry-run` 只输出 prompt/payload 诊断。
|
||||
- `bun scripts/cli.ts codex steer <taskId> [prompt|--prompt-file path|--prompt-stdin] [--dry-run]`:通过 Code Queue 私有代理向运行中的 active turn 注入纠偏提示,正式替代底层 `microservice proxy ... /steer` 调用。
|
||||
|
||||
@@ -40,7 +40,7 @@ CLI 可以从 `master` 快速演进,但必须兼容 `deploy.json` 固定的 CI
|
||||
- `ci install|status|run|publish-backend-core|publish-user-service|run-dev-e2e|logs` 管理 D601 原生 k3s 上的 Tekton CI。`run` 手动创建每 commit 检查和 Code Queue 只读性能门禁;`publish-backend-core` 与 `publish-user-service` 从 pushed Git commit 构建并发布 `127.0.0.1:5000/unidesk/<service>:<commit>` commit-pinned artifacts,输出 `artifactSummary`(含 `serviceId`、`sourceCommit`、`sourceRepo`、`dockerfile`、`imageRef`、`tag`、`digest`、`digestRef`),但不部署生产;`run-dev-e2e` 的 Git 控制 runner、短 launcher、host fetch 边界、临时 smoke namespace 和 no-CD 规则只在 `docs/reference/dev-ci-runner.md` 定义;Tekton CI 通用规则见 `docs/reference/ci.md`。
|
||||
- `schedule list|get|runs|run|retry-run|delete|upsert-pgdata-backup` 管理 backend-core 定时任务和运行历史。`schedule list`、`schedule get`、`schedule runs --limit N` 和 `schedule runs <scheduleId> --limit N` 是只读观察入口;`schedule run`、`schedule retry-run`、`schedule delete` 和 `schedule upsert-pgdata-backup` 会触发运行或写入配置,生产恢复时必须有明确授权。`schedule runs --limit N` 是全局历史视图,返回 `scope=global` 和 `scheduleId=null`;`schedule runs <scheduleId> --limit N` 是指定 schedule 历史视图,返回 `scope=schedule` 和对应 `scheduleId`。CLI 必须拒绝 `schedule runs 50` 这类纯数字位置参数,并提示使用 `schedule runs --limit 50`,避免把空数组误判成“没有历史 run”。`schedule run <id> --wait-ms N` 触发同一 schedule,并且即使 wait 超时也必须返回 `newRunId` 和 `observeCommand`;`schedule retry-run <failedRunId>` 只接受 failed run,从原 run 反查 `scheduleId` 后重触发同一 schedule,并输出 `originalRunId`、`scheduleId`、`newRunId` 和 `observeCommand`。当 backend-core 目标容器缺失或只观察到 verify-only 容器时,schedule/microservice 命令必须以非零退出并返回 `failureKind=target-stack-not-running`、`runnerDisposition=infra-blocked`、`readOnlyCommands` 和 `authorizationRequiredForRecovery`,不得把 Docker 的 `No such container` 当成成功的空历史。
|
||||
- `codex deploy <commitId>` 是旧 Code Queue 兼容部署入口,已禁用以防止维护通道直连 D601 部署 Code Queue;当前 dev 自动化只做 `ci run-dev-e2e` smoke,不提供 Code Queue CD,详细规则见 `docs/reference/codex-deploy.md`。
|
||||
- `codex submit [prompt] [--prompt-file path|--prompt-stdin] [--queue queueId] [--provider-id id] [--cwd path] [--model model] [--reasoning-effort effort] [--execution-mode mode] [--max-attempts N] [--reference-task-id id] [--dry-run]` 通过 backend-core 私有代理向稳定 `code-queue` 用户服务路径提交任务;prompt 必须且只能来自位置参数、文件或 stdin 之一,`--dry-run` 只返回结构化请求且不实际入队。提交确认和 dry-run 必须返回完整 prompt、字符数和 `truncated=false`,不能套用任务详情的预览截断策略,否则长任务 prompt 无法被人工验收。真实提交会经过本机本地串行化保护和短节流,避免同一指挥端并发 submit 把低内存主机或 `code-queue-mgr` 控制面打抖;返回值会附带 `submitConcurrencyGuard` 说明本次提交的锁与等待信息。backend-core 默认把提交、队列 CRUD、已读状态、历史摘要和轻量 Trace 读取分流到主 server `code-queue-mgr`,由它写入主 PostgreSQL;D601 scheduler 只轮询并执行已入库任务。
|
||||
- `codex submit [prompt] [--prompt-file path|--prompt-stdin] [--queue queueId] [--provider-id id] [--cwd path] [--model model] [--reasoning-effort effort] [--execution-mode mode] [--max-attempts N] [--reference-task-id id] [--dry-run]` 通过 backend-core 私有代理向稳定 `code-queue` 用户服务路径提交任务;prompt 必须且只能来自位置参数、文件或 stdin 之一,`--dry-run` 只返回结构化请求且不实际入队。dry-run 会额外输出 `routingRecommendation`,包含推荐 route、runner、model、风险信号、prompt 自包含/issue 非唯一来源/prod-secret-DB 禁止/证据要求等 guard 状态;该建议只用于指挥官 preflight,不会改写 payload,也不假设生产 MiniMax 可用。提交确认和 dry-run 必须返回完整 prompt、字符数和 `truncated=false`,不能套用任务详情的预览截断策略,否则长任务 prompt 无法被人工验收。真实提交会经过本机本地串行化保护和短节流,避免同一指挥端并发 submit 把低内存主机或 `code-queue-mgr` 控制面打抖;返回值会附带 `submitConcurrencyGuard` 说明本次提交的锁与等待信息。backend-core 默认把提交、队列 CRUD、已读状态、历史摘要和轻量 Trace 读取分流到主 server `code-queue-mgr`,由它写入主 PostgreSQL;D601 scheduler 只轮询并执行已入库任务。
|
||||
- `codex pr-preflight [--remote] [--push-dry-run --push-dry-run-ref refs/heads/probe/<name>] [--issue N] [--full]` 通过稳定 `code-queue` proxy 请求 D601 scheduler `/api/runtime-preflight`,用于 PR 型派单 admission。输出会压缩展示 scheduler/runner 的 token 覆盖、工具、agent port、Git worktree、GitHub egress、repo/issue/PR 只读探测和可选 push dry-run;只报告 `GH_TOKEN`/`GITHUB_TOKEN` 是否存在和来源 key,不打印值。缺少 env token 时顶层 `ok=false`、`runnerDisposition=infra-blocked`,`tokenCoverage.missing` 同时列出 `GH_TOKEN` 与 `GITHUB_TOKEN`。
|
||||
- `codex task <taskId>` 通过 Code Queue 私有代理按任务 ID 查询结构化审阅摘要;默认只返回任务身份、执行 Provider、工作目录、attempt 计数、原始 prompt、最终 response、最后错误和渐进披露命令,适合指挥官审阅完成未读任务且避免上下文爆炸。需要旧式详细摘要时显式加 `--detail`;需要完整 prompt/response 文本时加 `--full`;需要工具调用、judge、attempt 全量摘要时使用 `--detail --full --tool-limit N`。该摘要读取默认由主 server `code-queue-mgr` 从 PostgreSQL 返回,不依赖 D601 `code-queue-read` Service 可用。
|
||||
- `codex tasks [--view supervisor|full] [--queue id] [--status succeeded|running|queued|failed|canceled|judging|retry_wait[,..]] [--unread|--unread-only] [--limit N] [--before-id id]` 通过同一私有代理输出渐进式披露视图。默认 `supervisor` 只返回 `running`、`completedUnread`、`recentCompleted`、`queued` 和 `executionDiagnostics` 摘要,不嵌入完整 Trace、final response 或全量 overview;每个条目都带 `commands.show`、`commands.trace`、`commands.output`、`commands.read` 和 `commands.full`。`--unread` 是 `--unread-only` 的别名,必须只保留未读终态;`--status` 必须真实过滤支持的状态,未知参数或未知状态必须结构化失败,不能静默忽略。需要完整当前页任务简表时显式使用 `--view full` 或 `--full`,仍受 `--limit` 和 `--before-id` 分页约束。
|
||||
|
||||
@@ -43,16 +43,41 @@
|
||||
|
||||
## 模型和成本路由
|
||||
|
||||
MiniMax/OpenCode runner 的可用性是 Code Queue 的主要能力方向之一。它不是可选实验项,而是控制 GPT-5.5 成本、扩大并发吞吐、让简单任务不占用高成本模型的基础设施能力。
|
||||
MiniMax/OpenCode runner 的可用性是 Code Queue 的主要能力方向之一。它不是可选实验项,而是控制 GPT-5.5 成本、扩大并发吞吐、让简单任务不占用高成本模型的基础设施能力。但 MiniMax 幻觉和伪造证据风险显著高于 GPT-5.5,因此只能承接低权限、低风险、低 blast radius 且可用外部证据复核的任务。
|
||||
|
||||
指挥官派单时应按任务难度、风险和验收成本选择模型:
|
||||
MiniMax 路由策略的前置条件是 runner live smoke 稳定通过,并且失败能被结构化归类。#30 只证明 dev smoke 已有成功证据;生产 rollout 仍按受控节奏推进,指挥官不能假设 prod MiniMax 已可用。若 MiniMax/OpenCode 本身不可用,相关修复归入 Code Queue 运行态维护线;在修通前,不应用 MiniMax 承担会阻塞主交付链路的任务。
|
||||
|
||||
- 低风险、边界清晰、可用文件/命令/commit 直接验收的任务,优先分配给 MiniMax/OpenCode,例如只读调查、文档初稿、表格整理、轻量 CLI contract、局部测试补齐和小范围样板代码。
|
||||
- 中等复杂度、需要跨文件判断但不触碰生产运行态的任务,可以在 MiniMax 可用且已有成功样本后尝试分担;prompt 必须更窄,验收证据必须更具体。
|
||||
- 高风险任务继续优先使用 GPT-5.5,例如生产部署、凭证和网络变更、Code Queue/backend-core 运行态修复、跨模块架构调整、复杂回滚、以及最终质量裁决。
|
||||
- MiniMax 任务不得降低验收标准。指挥官必须核验它声称读取的文件、执行的命令、远端 commit、测试输出和 live 证据;对推测、遗漏、伪造或把失败写成成功的情况,应拆小重派或改用 GPT-5.5。
|
||||
`codex submit --dry-run` 是派单前的轻量 preflight。它只输出 `routingRecommendation`,帮助指挥官看到推荐 runner/model、风险信号和缺失的 prompt guard;它不会修改真实提交 payload,也不会替代指挥官判断。真实派单是否使用 `--model minimax-m2.7` 仍由指挥官显式决定。
|
||||
|
||||
MiniMax 路由策略的前置条件是 runner live smoke 稳定通过,并且失败能被结构化归类。若 MiniMax/OpenCode 本身不可用,相关修复归入 Code Queue 运行态维护线;在修通前,不应用 MiniMax 承担会阻塞主交付链路的任务。
|
||||
MiniMax/OpenCode 可承担任务必须同时满足这些条件:
|
||||
|
||||
| 类型 | 适用边界 | 必须证据 | 不可越界 |
|
||||
| --- | --- | --- | --- |
|
||||
| 只读调查 | 查找文件、梳理现有实现、列出候选入口、对比文档口径 | 文件路径、行号、命令输出摘要 | 不得把 issue 内容当唯一输入,不得声称读取了无法验证的远端状态 |
|
||||
| 中文文档初稿或长期参考补丁 | `docs/reference/`、`AGENTS.md` 索引、一句话摘要、表格治理规则 | diff、文档位置、轻量格式/grep 检查 | 不写流水账,不改 release/v1 runtime |
|
||||
| 轻量 CLI dry-run/preflight | 只读或 dry-run 输出、contract test、参数校验、错误分类 | dry-run JSON、contract test、`bun` 脚本验证 | 不改 runtime 调度核心,不触碰生产服务 |
|
||||
| 局部测试补齐 | 单文件或小范围 contract/unit test,覆盖明确回归 | 测试命令、失败前提、通过输出 | 不跑 heavy check、E2E、Playwright |
|
||||
| 小范围样板代码 | 非共享核心、可快速 review、可用类型检查或 dry-run 验证 | 修改文件、轻量类型/脚本验证、commit | 不跨多服务,不改凭证、网络、部署或数据库 |
|
||||
| 数据整理和看板候选草案 | 生成 #20/#24 更新草案、任务表、验收 checklist | 草案 diff、来源列表、人工待审标记 | 不直接替代指挥官审阅,不自动清空未读任务 |
|
||||
|
||||
GPT-5.5/Codex 必须承担这些任务:
|
||||
|
||||
| 类型 | 原因 | 验收重点 |
|
||||
| --- | --- | --- |
|
||||
| Code Queue/backend-core/provider-gateway/k3sctl-adapter 运行态修复 | 运行面错误会影响调度、观测和恢复路径,MiniMax 误判成本高 | 多信号诊断、轻量 contract、必要时 dev 验证;不隐式 prod rollout |
|
||||
| 跨模块架构或共享契约调整 | 需要理解系统边界、兼容性和长期演进 | 文档、代码、测试和回滚边界一致 |
|
||||
| CI/CD、artifact、deploy、release/v1 治理 | 易影响发布真相和稳定维护线 | dry-run/plan、commit-pinned 证据、release governance 一致 |
|
||||
| 安全、凭证、网络、egress/proxy 变更 | 涉及 secret 泄漏、访问路径和外部服务 | 不输出 token,最小权限,结构化失败 |
|
||||
| 复杂 bug 修复和最终质量裁决 | 需要辨别不完整证据、伪造、隐藏失败 | trace/output/commit/test 交叉验证 |
|
||||
| 生产部署方案设计或回滚方案 | 生产 blast radius 高,即使只写方案也需高可信推理 | 明确非目标、授权点、验证和回滚 |
|
||||
|
||||
只能由人工或指挥官处理的任务包括:真实生产重启、Code Queue backend 重建、运行中任务 interrupt/cancel、密钥读取或轮换、数据库手工写入、破坏性 Git 操作、强制回滚、把完成未读任务批量标记已读、以及任何需要用户授权的高风险恢复动作。这类任务可以让 worker 起草方案或 checklist,但执行权不下放给 MiniMax,也不应下放给普通 GPT worker,除非用户明确授权并给出边界。
|
||||
|
||||
MiniMax prompt 必须自包含:目标、背景、写入范围、禁止动作、验证命令、final response 字段都要写进 prompt;GitHub issue 只能作为辅助引用,不能作为唯一来源。禁止使用“读取 issue 后按里面做”这类 prompt。若确实需要 issue 内容,指挥官先把关键需求、约束和验收点摘入 prompt,再附 issue URL。
|
||||
|
||||
MiniMax 风险控制必须固定包含:禁止 prod/重启/密钥/DB 写入;禁止 release/v1 runtime 修改;禁止 heavy check/E2E/Playwright,除非任务明确改为 GPT-5.5 且用户授权;必须给出可验证证据,包括修改文件、命令、测试输出、commit 和未覆盖风险;完成后保持未读,由指挥官用 `codex task <taskId>` 审阅后再单独 `codex read <taskId>`。
|
||||
|
||||
对 MiniMax 的验收不能看 final response 自述。指挥官至少核验 diff、commit 是否在声明目标分支可 fetch、轻量验证命令是否真实存在、输出是否和任务范围匹配;遇到推测、遗漏、伪造证据或把失败写成成功时,先拆成更小任务或改派 GPT-5.5。
|
||||
|
||||
## GitHub Issue 和 PR 使用
|
||||
|
||||
@@ -111,7 +136,7 @@ Runner preflight 优先使用执行面诊断入口:
|
||||
bun scripts/cli.ts codex pr-preflight --remote --issue 20
|
||||
```
|
||||
|
||||
该命令经 backend-core 稳定 `code-queue` proxy 访问 D601 scheduler 的 `/api/runtime-preflight`,报告 scheduler/runner 环境里的 `GH_TOKEN`/`GITHUB_TOKEN` 覆盖、工具、Git worktree、GitHub egress、repo/issue/PR 只读探测和可选 push dry-run。缺少 env token 时必须返回 `ok=false`、`runnerDisposition=infra-blocked` 和 `tokenCoverage.missing=["GH_TOKEN","GITHUB_TOKEN"]`,因为 provider dev container 只能转发 scheduler 已经拥有的 token。
|
||||
该命令经 backend-core 稳定 `code-queue` proxy 访问 D601 scheduler 的 `/api/runtime-preflight`,报告 scheduler/runner 环境里的 `GH_TOKEN`/`GITHUB_TOKEN` 覆盖、工具、Git worktree、GitHub egress、repo/issue/PR 只读探测和可选 push dry-run。需要复核 PR body/创建命令 guard 时追加 `--pr-create-dry-run --pr-create-dry-run-head <head>`;该 guard 只执行 dry-run,不创建 PR。缺少 env token 时必须返回 `ok=false`、`runnerDisposition=infra-blocked` 和 `tokenCoverage.missing=["GH_TOKEN","GITHUB_TOKEN"]`,因为 provider dev container 只能转发 scheduler 已经拥有的 token。
|
||||
|
||||
本地 runner preflight 示例:
|
||||
|
||||
@@ -182,6 +207,8 @@ D601 artifact registry 的 systemd unit inactive 不等于 D601 全局离线。
|
||||
|
||||
每次新增 Code Queue 任务、补发 follow-up task,或处理一批完成未读任务后,都必须同步更新 GitHub 总看板 issue `#20` 的正文主表;如果发生实质态势变化,还要同步更新指挥简报 issue `#24` 的正文。看板更新应反映当前任务分布、关键 blocker 和粗略进度,不要只改聊天上下文或只改单个 issue,而让总态势图落后于实际调度状态。
|
||||
|
||||
MiniMax 成本路由接入 #20 看板时,#20 的每个 Code Queue 任务行都应显式保留推荐或实际 runner/model、风险等级、验证证据和审阅状态。推荐列的语义只来自指挥官判断或 `codex submit --dry-run` 的 `routingRecommendation`,不能来自 worker 自评。MiniMax 任务在 #20 中必须先保持“待指挥官审阅”状态;只有指挥官核验 diff、commit、轻量验证和未越界后,才更新为已验收并执行 `codex read <taskId>`。若 dry-run 给出 `commander-human-only` 或 GPT-5.5 recommendation,#20 不应把该任务降级给 MiniMax;若必须降级,必须在看板 progress 中写明人工接受的风险和额外审阅动作。
|
||||
|
||||
## 指挥工作流
|
||||
|
||||
对每个活跃任务,按顺序评估四件事:
|
||||
|
||||
@@ -0,0 +1,71 @@
|
||||
import { codexSubmitRoutingRecommendationForTest } from "./src/code-queue";
|
||||
|
||||
type JsonRecord = Record<string, unknown>;
|
||||
|
||||
function assertCondition(condition: unknown, message: string, detail: unknown = {}): void {
|
||||
if (!condition) throw new Error(`${message}: ${JSON.stringify(detail)}`);
|
||||
}
|
||||
|
||||
function asRecord(value: unknown): JsonRecord {
|
||||
assertCondition(typeof value === "object" && value !== null && !Array.isArray(value), "expected JSON object", { value });
|
||||
return value as JsonRecord;
|
||||
}
|
||||
|
||||
const lowRiskPrompt = `
|
||||
目标:更新 docs/reference/code-queue-supervision.md 中的 MiniMax 派单规则。
|
||||
范围:只改中文长期文档和一个轻量 dry-run contract test,不触碰 runtime 调度核心。
|
||||
禁止:不要重启服务,不要读取密钥,不要写数据库,不要部署 prod。
|
||||
验证:运行 bun scripts/code-queue-submit-routing-contract-test.ts,并在 final response 给出验证证据、commit 和风险。
|
||||
背景:本 prompt 是完整需求来源,GitHub issue 只能作为辅助引用,不能作为唯一来源。需要 dry-run/preflight 输出帮助指挥官判断 runner/model。请保持改动低风险、可审阅、可回滚,并让指挥官完成后审阅未读任务。
|
||||
`;
|
||||
|
||||
const runtimePrompt = `
|
||||
目标:修复 Code Queue runtime scheduler 的 active run 状态机。
|
||||
范围:src/components/microservices/code-queue/src/index.ts 和 runtime-preflight。
|
||||
禁止:不要部署 prod。
|
||||
验证:需要证明 scheduler heartbeat、active run、OpenCode session recovery 都正确。
|
||||
`;
|
||||
|
||||
const commanderOnlyPrompt = `
|
||||
目标:在 production 上 deploy apply 并 restart code-queue,必要时读取 secret token 和写 PostgreSQL 修复任务状态。
|
||||
验证:live health。
|
||||
`;
|
||||
|
||||
export function runCodeQueueSubmitRoutingContract(): JsonRecord {
|
||||
const lowRisk = codexSubmitRoutingRecommendationForTest(lowRiskPrompt);
|
||||
assertCondition(lowRisk.route === "minimax-opencode", "low-risk self-contained prompt should be a MiniMax candidate", lowRisk);
|
||||
assertCondition(lowRisk.recommendedRunner === "opencode", "MiniMax candidate should recommend OpenCode", lowRisk);
|
||||
assertCondition(lowRisk.recommendedModel === "minimax-m2.7", "MiniMax candidate should recommend minimax-m2.7", lowRisk);
|
||||
assertCondition(asRecord(lowRisk.riskControls).promptSelfContained === true, "low-risk prompt should be self-contained", lowRisk);
|
||||
assertCondition(asRecord(lowRisk.riskControls).issueIsNotOnlySource === true, "issue must not be the only source", lowRisk);
|
||||
|
||||
const runtime = codexSubmitRoutingRecommendationForTest(runtimePrompt);
|
||||
assertCondition(runtime.route === "gpt-5.5-codex", "runtime/core work should stay on GPT-5.5", runtime);
|
||||
assertCondition(runtime.recommendedRunner === "codex", "runtime/core work should recommend Codex runner", runtime);
|
||||
assertCondition(runtime.recommendedModel === "gpt-5.5", "runtime/core work should recommend GPT-5.5", runtime);
|
||||
|
||||
const commanderOnly = codexSubmitRoutingRecommendationForTest(commanderOnlyPrompt);
|
||||
assertCondition(commanderOnly.route === "commander-human-only", "prod restart/secrets/DB work should be commander-only", commanderOnly);
|
||||
assertCondition(commanderOnly.recommendedRunner === "commander", "commander-only work should not recommend a runner", commanderOnly);
|
||||
assertCondition(commanderOnly.recommendedModel === null, "commander-only work should not recommend a model", commanderOnly);
|
||||
|
||||
const explicitGpt = codexSubmitRoutingRecommendationForTest(lowRiskPrompt, "gpt-5.5");
|
||||
const explicitRequest = asRecord(explicitGpt.explicitRequest);
|
||||
assertCondition(explicitRequest.runner === "codex", "explicit gpt model should map to Codex", explicitGpt);
|
||||
assertCondition(String(explicitRequest.note ?? "").includes("differs"), "explicit model mismatch should be visible", explicitGpt);
|
||||
assertCondition(asRecord(explicitGpt.routingPolicy).doesNotChangeSubmittedPayload === true, "dry-run recommendation must not rewrite payload", explicitGpt);
|
||||
|
||||
return {
|
||||
ok: true,
|
||||
checks: [
|
||||
"low-risk self-contained prompts recommend minimax-m2.7/OpenCode",
|
||||
"runtime/core work recommends GPT-5.5/Codex",
|
||||
"prod/restart/secret/DB work is commander-only",
|
||||
"explicit --model mismatch is visible and payload is unchanged",
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
if (import.meta.main) {
|
||||
process.stdout.write(`${JSON.stringify(runCodeQueueSubmitRoutingContract(), null, 2)}\n`);
|
||||
}
|
||||
@@ -282,6 +282,7 @@ export function runChecks(config: UniDeskConfig, options: CheckOptions = default
|
||||
fileItem("scripts/src/code-queue-liveness-fixtures.ts"),
|
||||
fileItem("scripts/code-queue-trace-summary-contract-test.ts"),
|
||||
fileItem("scripts/code-queue-pr-preflight-contract-test.ts"),
|
||||
fileItem("scripts/code-queue-submit-routing-contract-test.ts"),
|
||||
fileItem("scripts/src/ci.ts"),
|
||||
fileItem("scripts/src/e2e.ts"),
|
||||
fileItem("scripts/deploy-artifact-matrix-contract-test.ts"),
|
||||
@@ -303,6 +304,7 @@ export function runChecks(config: UniDeskConfig, options: CheckOptions = default
|
||||
items.push(commandItem("code-queue:issue3-diagnostics-and-image-preflight", ["bun", "scripts/code-queue-issue3-regression-test.ts"], 30_000));
|
||||
items.push(commandItem("code-queue:trace-summary-contract", ["bun", "scripts/code-queue-trace-summary-contract-test.ts"], 30_000));
|
||||
items.push(commandItem("code-queue:pr-preflight-contract", ["bun", "scripts/code-queue-pr-preflight-contract-test.ts"], 30_000));
|
||||
items.push(commandItem("code-queue:submit-routing-contract", ["bun", "scripts/code-queue-submit-routing-contract-test.ts"], 30_000));
|
||||
items.push(commandItem("deploy:artifact-matrix-contract", ["bun", "scripts/deploy-artifact-matrix-contract-test.ts"], 30_000));
|
||||
items.push(commandItem("code-queue:active-run-heartbeat-visible", ["bun", "scripts/code-queue-liveness-diagnostics-test.ts", "--only", "code-queue:active-run-heartbeat-visible"], 30_000));
|
||||
items.push(commandItem("code-queue:trace-gap-not-stale", ["bun", "scripts/code-queue-liveness-diagnostics-test.ts", "--only", "code-queue:trace-gap-not-stale"], 30_000));
|
||||
@@ -320,6 +322,7 @@ export function runChecks(config: UniDeskConfig, options: CheckOptions = default
|
||||
items.push(skippedItem("code-queue:issue3-diagnostics-and-image-preflight", "Code Queue issue #3 regression fixtures are opt-in with script checks", "--scripts-typecheck or --full"));
|
||||
items.push(skippedItem("code-queue:trace-summary-contract", "Code Queue trace summary contract is opt-in with script checks", "--scripts-typecheck or --full"));
|
||||
items.push(skippedItem("code-queue:pr-preflight-contract", "Code Queue PR preflight contract is opt-in with script checks", "--scripts-typecheck or --full"));
|
||||
items.push(skippedItem("code-queue:submit-routing-contract", "Code Queue submit routing contract is opt-in with script checks", "--scripts-typecheck or --full"));
|
||||
items.push(skippedItem("deploy:artifact-matrix-contract", "deploy artifact matrix contract is opt-in with script checks", "--scripts-typecheck or --full"));
|
||||
items.push(skippedItem("code-queue:liveness-diagnostics-fixtures", "Code Queue liveness diagnostics fixtures are opt-in with script checks", "--scripts-typecheck or --full"));
|
||||
items.push(skippedItem("baidu-netdisk:artifact-guard-contract", "Baidu Netdisk artifact guard contract is opt-in with script checks", "--scripts-typecheck or --full"));
|
||||
|
||||
@@ -11,6 +11,8 @@ const defaultTextPreviewChars = 12_000;
|
||||
const defaultTasksLimit = 20;
|
||||
const maxTasksLimit = 100;
|
||||
const steerPromptPreviewChars = 320;
|
||||
const minimaxSubmitModel = "minimax-m2.7";
|
||||
const gptSubmitModel = "gpt-5.5";
|
||||
const submitLockWaitMs = 60_000;
|
||||
const submitLockPollMs = 250;
|
||||
const submitLockStaleMs = 120_000;
|
||||
@@ -56,6 +58,43 @@ interface CodexSubmitOptions {
|
||||
dryRun: boolean;
|
||||
}
|
||||
|
||||
type SubmitRoute = "minimax-opencode" | "gpt-5.5-codex" | "commander-human-only";
|
||||
type SubmitRouteSignalSeverity = "info" | "warning" | "block";
|
||||
|
||||
interface SubmitRouteSignal {
|
||||
id: string;
|
||||
severity: SubmitRouteSignalSeverity;
|
||||
matched: boolean;
|
||||
evidence: string[];
|
||||
message: string;
|
||||
}
|
||||
|
||||
interface SubmitRoutingRecommendation {
|
||||
route: SubmitRoute;
|
||||
recommendedRunner: "opencode" | "codex" | "commander";
|
||||
recommendedModel: string | null;
|
||||
confidence: "medium" | "high";
|
||||
reason: string;
|
||||
signals: SubmitRouteSignal[];
|
||||
riskControls: {
|
||||
promptSelfContained: boolean;
|
||||
issueIsNotOnlySource: boolean;
|
||||
noProdRestartSecretOrDbWrite: boolean;
|
||||
evidenceRequiredByPrompt: boolean;
|
||||
commanderMustReviewUnread: true;
|
||||
};
|
||||
explicitRequest: {
|
||||
model: string | null;
|
||||
runner: "opencode" | "codex" | null;
|
||||
note: string | null;
|
||||
};
|
||||
routingPolicy: {
|
||||
dryRunOnly: true;
|
||||
doesNotChangeSubmittedPayload: true;
|
||||
prodMiniMaxAssumedAvailable: false;
|
||||
};
|
||||
}
|
||||
|
||||
interface CodexSteerOptions {
|
||||
prompt: string;
|
||||
dryRun: boolean;
|
||||
@@ -532,6 +571,222 @@ function compactQueuedReason(value: unknown): Record<string, unknown> | null {
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeSubmitModel(value: string | null | undefined): string {
|
||||
const raw = String(value ?? "").trim();
|
||||
if (raw.length === 0) return raw;
|
||||
const lower = raw.toLowerCase();
|
||||
const leaf = lower.includes("/") ? lower.split("/").at(-1) ?? lower : lower;
|
||||
if (leaf === minimaxSubmitModel || leaf === "m2.7") return minimaxSubmitModel;
|
||||
return raw;
|
||||
}
|
||||
|
||||
function submitRunnerForModel(model: string | null | undefined): "opencode" | "codex" | null {
|
||||
const normalized = normalizeSubmitModel(model);
|
||||
if (normalized.length === 0) return null;
|
||||
return normalized === minimaxSubmitModel ? "opencode" : "codex";
|
||||
}
|
||||
|
||||
function regexEvidence(text: string, patterns: RegExp[], limit = 6): string[] {
|
||||
const evidence: string[] = [];
|
||||
for (const pattern of patterns) {
|
||||
for (const match of text.matchAll(pattern)) {
|
||||
const value = String(match[0] ?? "").trim().replace(/\s+/gu, " ");
|
||||
if (value.length > 0 && !evidence.includes(value)) evidence.push(value);
|
||||
if (evidence.length >= limit) return evidence;
|
||||
}
|
||||
}
|
||||
return evidence;
|
||||
}
|
||||
|
||||
function regexEvidenceWithoutNegatedContext(text: string, patterns: RegExp[], limit = 6): string[] {
|
||||
const evidence: string[] = [];
|
||||
for (const pattern of patterns) {
|
||||
for (const match of text.matchAll(pattern)) {
|
||||
const index = match.index ?? 0;
|
||||
const context = text.slice(Math.max(0, index - 36), Math.min(text.length, index + String(match[0] ?? "").length + 36));
|
||||
if (/(?:禁止|不要|不得|不能|不应|请勿|严禁|avoid|forbid|forbidden|do not|don't|must not|no\s+)/iu.test(context)) continue;
|
||||
const value = String(match[0] ?? "").trim().replace(/\s+/gu, " ");
|
||||
if (value.length > 0 && !evidence.includes(value)) evidence.push(value);
|
||||
if (evidence.length >= limit) return evidence;
|
||||
}
|
||||
}
|
||||
return evidence;
|
||||
}
|
||||
|
||||
function routeSignal(id: string, severity: SubmitRouteSignalSeverity, evidence: string[], message: string): SubmitRouteSignal {
|
||||
return { id, severity, matched: evidence.length > 0, evidence, message };
|
||||
}
|
||||
|
||||
function submitRoutingRecommendation(options: CodexSubmitOptions): SubmitRoutingRecommendation {
|
||||
const prompt = options.prompt;
|
||||
const lower = prompt.toLowerCase();
|
||||
const prodOrStateMutation = regexEvidenceWithoutNegatedContext(lower, [
|
||||
/\bprod(?:uction)?\b/gu,
|
||||
/\brestart(?:ing|ed)?\b/gu,
|
||||
/\brebuild(?:ing|ed)?\b/gu,
|
||||
/\bdeploy(?:ment|ing|ed)?\b/gu,
|
||||
/\bserver\s+rebuild\b/gu,
|
||||
/\bdeploy\s+apply\b/gu,
|
||||
/\binterrupt\b|\bcancel\b/gu,
|
||||
/\bsecret\b|\btoken\b|\bapi[_-]?key\b|\bcredential\b/gu,
|
||||
/\bpostgres(?:ql)?\b|\bpsql\b|\bdatabase\b|\bdb\b|\bsql\b|\bmigration\b/gu,
|
||||
]);
|
||||
const runtimeCore = regexEvidence(lower, [
|
||||
/\bcode[- ]queue\s+(?:runtime|scheduler|backend|execution|runner)\b/gu,
|
||||
/\bbackend-core\b/gu,
|
||||
/\bprovider-gateway\b/gu,
|
||||
/\bk3sctl-adapter\b/gu,
|
||||
/\bruntime-preflight\b/gu,
|
||||
/\bactive\s+run\b/gu,
|
||||
]);
|
||||
const issueReference = regexEvidence(lower, [
|
||||
/\bgithub\s+issue\b/gu,
|
||||
/\bissue\s+#?\d+\b/gu,
|
||||
/#\d+/gu,
|
||||
/\bgh\s+issue\s+view\b/gu,
|
||||
]);
|
||||
const issueOnly = regexEvidence(lower, [
|
||||
/\bread\s+(?:the\s+)?issue\b/gu,
|
||||
/\bsee\s+(?:github\s+)?issue\b/gu,
|
||||
/\bfrom\s+issue\s+#?\d+\b/gu,
|
||||
/\bissue\s+(?:has|contains)\s+(?:the\s+)?(?:full|complete)\s+(?:context|requirements?)\b/gu,
|
||||
/读取\s*(?:github\s*)?issue/gu,
|
||||
/查看\s*(?:github\s*)?issue/gu,
|
||||
]);
|
||||
const issueAuxiliaryGuard = regexEvidence(prompt, [
|
||||
/issue[^。\n]*辅助引用/giu,
|
||||
/issue[^。\n]*不能作为唯一来源/giu,
|
||||
/issue[^。\n]*not[^。\n]*only[^。\n]*source/giu,
|
||||
/issue[^。\n]*auxiliary[^。\n]*reference/giu,
|
||||
]);
|
||||
const lowRiskEvidence = regexEvidence(lower, [
|
||||
/\bdry-run\b/gu,
|
||||
/\bpreflight\b/gu,
|
||||
/\bcontract\s+test\b/gu,
|
||||
/\btypecheck\b/gu,
|
||||
/\bdocs?\b|\bdocumentation\b/gu,
|
||||
/\bread[- ]?only\b/gu,
|
||||
/只读/gu,
|
||||
/文档/gu,
|
||||
/轻量/gu,
|
||||
]);
|
||||
const evidenceRequest = regexEvidence(lower, [
|
||||
/\bevidence\b/gu,
|
||||
/\bverification\b|\bverified\b|\bvalidate\b|\bvalidation\b/gu,
|
||||
/\btest(?:s|ed|ing)?\b/gu,
|
||||
/\bdry-run\b/gu,
|
||||
/\bcommit\b/gu,
|
||||
/验证/gu,
|
||||
/证据/gu,
|
||||
/自测/gu,
|
||||
]);
|
||||
const selfContainedHints = regexEvidence(prompt, [
|
||||
/目标[::]/gu,
|
||||
/范围[::]/gu,
|
||||
/禁止[::]/gu,
|
||||
/验证[::]/gu,
|
||||
/final response/giu,
|
||||
/完整需求/gu,
|
||||
/本 prompt/gu,
|
||||
]);
|
||||
const destructiveWords = regexEvidenceWithoutNegatedContext(lower, [
|
||||
/\brm\s+-rf\b/gu,
|
||||
/\bgit\s+reset\s+--hard\b/gu,
|
||||
/\bgit\s+checkout\s+--\b/gu,
|
||||
/\bdrop\s+table\b/gu,
|
||||
/\btruncate\s+table\b/gu,
|
||||
/\bdelete\s+from\b/gu,
|
||||
]);
|
||||
const crossModule = regexEvidence(lower, [
|
||||
/\bcross[- ]module\b/gu,
|
||||
/\barchitecture\b|\barchitectural\b/gu,
|
||||
/\brelease\/v1\b/gu,
|
||||
/\bci\/cd\b/gu,
|
||||
/\brollout\b|\brollback\b/gu,
|
||||
/跨模块/gu,
|
||||
/架构/gu,
|
||||
/回滚方案|复杂回滚/gu,
|
||||
]);
|
||||
const model = normalizeSubmitModel(options.model);
|
||||
const explicitRunner = submitRunnerForModel(model);
|
||||
const promptSelfContained = prompt.length >= 700 || selfContainedHints.length >= 3;
|
||||
const issueIsNotOnlySource = issueReference.length === 0 || issueAuxiliaryGuard.length > 0 || issueOnly.length === 0 && prompt.length >= 500;
|
||||
const noProdRestartSecretOrDbWrite = prodOrStateMutation.length === 0 && destructiveWords.length === 0;
|
||||
const evidenceRequiredByPrompt = evidenceRequest.length > 0;
|
||||
const signals = [
|
||||
routeSignal("prod-state-secret-db-write", "block", [...prodOrStateMutation, ...destructiveWords], "Mentions production/state mutation, restart, secrets, DB writes, or destructive commands."),
|
||||
routeSignal("runtime-core", "warning", runtimeCore, "Touches Code Queue runtime, backend-core, provider-gateway, k3s adapter, or active run behavior."),
|
||||
routeSignal("issue-source-risk", "warning", issueOnly, "Prompt appears to rely on GitHub issue reading as task context."),
|
||||
routeSignal("issue-auxiliary-source-guard", "info", issueAuxiliaryGuard, "Prompt explicitly says GitHub issue is auxiliary and not the only source."),
|
||||
routeSignal("cross-module-release", "warning", crossModule, "Mentions cross-module architecture, CI/CD rollout, release line, or rollback work."),
|
||||
routeSignal("low-risk-verifiable", "info", lowRiskEvidence, "Mentions low-risk or verifiable work such as docs, read-only checks, dry-run, preflight, or contract tests."),
|
||||
routeSignal("evidence-requested", "info", evidenceRequest, "Prompt asks for tests, validation, commit, or evidence."),
|
||||
routeSignal("self-contained-hints", "info", selfContainedHints, "Prompt includes explicit task sections that make it easier to verify without reading an issue."),
|
||||
];
|
||||
|
||||
let route: SubmitRoute = "gpt-5.5-codex";
|
||||
let recommendedRunner: SubmitRoutingRecommendation["recommendedRunner"] = "codex";
|
||||
let recommendedModel: string | null = gptSubmitModel;
|
||||
let confidence: SubmitRoutingRecommendation["confidence"] = "medium";
|
||||
let reason = "Default to GPT-5.5 when the prompt is not clearly low-risk and self-contained.";
|
||||
|
||||
if (prodOrStateMutation.length > 0 || destructiveWords.length > 0) {
|
||||
route = "commander-human-only";
|
||||
recommendedRunner = "commander";
|
||||
recommendedModel = null;
|
||||
confidence = "high";
|
||||
reason = "This task mentions production/state mutation, restart, secrets, DB writes, or destructive operations; keep it with the commander or a human.";
|
||||
} else if (runtimeCore.length > 0 || crossModule.length > 0) {
|
||||
route = "gpt-5.5-codex";
|
||||
confidence = "high";
|
||||
reason = "This task touches runtime/core/cross-module or release-governance surfaces, so it should stay on GPT-5.5.";
|
||||
} else if (promptSelfContained && issueIsNotOnlySource && evidenceRequiredByPrompt && lowRiskEvidence.length > 0) {
|
||||
route = "minimax-opencode";
|
||||
recommendedRunner = "opencode";
|
||||
recommendedModel = minimaxSubmitModel;
|
||||
confidence = "high";
|
||||
reason = "The prompt looks self-contained, low-risk, and asks for verifiable evidence; it is a MiniMax/OpenCode candidate if the runner smoke is currently green.";
|
||||
} else if (lowRiskEvidence.length > 0 && issueIsNotOnlySource && noProdRestartSecretOrDbWrite) {
|
||||
route = "minimax-opencode";
|
||||
recommendedRunner = "opencode";
|
||||
recommendedModel = minimaxSubmitModel;
|
||||
confidence = "medium";
|
||||
reason = "The prompt has low-risk signals, but the commander should tighten self-contained context and evidence requirements before relying on MiniMax.";
|
||||
}
|
||||
|
||||
const explicitNote = model.length === 0
|
||||
? null
|
||||
: explicitRunner === recommendedRunner
|
||||
? "Explicit --model matches the dry-run recommendation."
|
||||
: "Explicit --model differs from the dry-run recommendation; this dry-run does not rewrite the payload.";
|
||||
|
||||
return {
|
||||
route,
|
||||
recommendedRunner,
|
||||
recommendedModel,
|
||||
confidence,
|
||||
reason,
|
||||
signals,
|
||||
riskControls: {
|
||||
promptSelfContained,
|
||||
issueIsNotOnlySource,
|
||||
noProdRestartSecretOrDbWrite,
|
||||
evidenceRequiredByPrompt,
|
||||
commanderMustReviewUnread: true,
|
||||
},
|
||||
explicitRequest: {
|
||||
model: model.length === 0 ? null : model,
|
||||
runner: explicitRunner,
|
||||
note: explicitNote,
|
||||
},
|
||||
routingPolicy: {
|
||||
dryRunOnly: true,
|
||||
doesNotChangeSubmittedPayload: true,
|
||||
prodMiniMaxAssumedAvailable: false,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function compactSchedulerHeartbeat(value: unknown): Record<string, unknown> | null {
|
||||
const record = asRecord(value);
|
||||
if (record === null) return null;
|
||||
@@ -2149,6 +2404,21 @@ export function codexPrPreflightQueryForTest(optionArgs: string[], fetcher: Code
|
||||
return codeQueuePrPreflight(optionArgs, fetcher);
|
||||
}
|
||||
|
||||
export function codexSubmitRoutingRecommendationForTest(prompt: string, model?: string): SubmitRoutingRecommendation {
|
||||
return submitRoutingRecommendation({
|
||||
prompt,
|
||||
model,
|
||||
queueId: undefined,
|
||||
providerId: undefined,
|
||||
cwd: undefined,
|
||||
reasoningEffort: undefined,
|
||||
executionMode: undefined,
|
||||
maxAttempts: undefined,
|
||||
referenceTaskIds: [],
|
||||
dryRun: true,
|
||||
});
|
||||
}
|
||||
|
||||
function codexSubmitTask(args: string[]): unknown {
|
||||
const options = parseSubmitOptions(args);
|
||||
const payload = submitPayload(options);
|
||||
@@ -2156,10 +2426,16 @@ function codexSubmitTask(args: string[]): unknown {
|
||||
return {
|
||||
ok: true,
|
||||
dryRun: true,
|
||||
routingRecommendation: submitRoutingRecommendation(options),
|
||||
request: {
|
||||
...payload,
|
||||
prompt: textView(options.prompt, true, 3000),
|
||||
},
|
||||
commands: {
|
||||
submitAsRequested: "remove --dry-run to submit exactly this payload",
|
||||
minimaxCandidate: `bun scripts/cli.ts codex submit --prompt-file <path> --model ${minimaxSubmitModel} --dry-run`,
|
||||
gptCandidate: `bun scripts/cli.ts codex submit --prompt-file <path> --model ${gptSubmitModel} --dry-run`,
|
||||
},
|
||||
};
|
||||
}
|
||||
const locked = runWithSubmitLock(() => unwrapCodexResponse(coreInternalFetch(codeQueueProxyPath("/api/tasks"), { method: "POST", body: payload })));
|
||||
|
||||
Reference in New Issue
Block a user