diff --git a/AGENTS.md b/AGENTS.md index c07760f3..fb60ed48 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -44,6 +44,7 @@ UniDesk 是一个以主 server 为统一入口的分布式工作平台;本文 - `bun scripts/cli.ts dev-env validate [--manifest path] [--kubectl-dry-run]` / `dev-env prewarm-images`:离线校验 D601 `unidesk-dev` 生产隔离护栏和 dev workload manifests,或把开发底座基础镜像预热到 D601 原生 k3s containerd,规则见 `docs/reference/deploy.md` 与 `docs/reference/microservices.md`。 - `bun scripts/cli.ts artifact-registry plan|render|status|health|install|deploy-backend-core|deploy-service`:管理 D601 host-managed CNCF Distribution registry,并通过短生命周期 relay 或 D601 pull/import 做 commit-pinned pull-only artifact CD;`deploy-backend-core` 是 deprecated 兼容名,`findjob`/`pipeline` 支持 D601 direct dev/prod,`met-nonlinear` 和 `k3sctl-adapter` 只给受限计划路径,`code-queue` 只支持 dev,规则见 `docs/reference/artifact-registry.md`。 - `bun scripts/cli.ts gh auth status|issue ...|pr list|view|create|comment` / `bun scripts/code-queue-pr-preflight-example.ts`:通过 REST 执行安全 GitHub issue 读写、脱敏 auth/status 诊断、body-file Markdown 写入、#24 指挥简报新增时间线 ClaudeQQ 通知、escape 扫描、只读 cleanup-plan、#20 board-audit 和 #20 board-row list/get/update dry-run/并发保护、PR 创建/评论 dry-run 与 runner PR preflight;`gh pr merge` 当前仍结构化拒绝,规则见 `docs/reference/cli.md` 和 `docs/reference/code-queue-supervision.md`。 +- `bun scripts/cli.ts commander contract|plan --dry-run|approval request --dry-run`:查看 host Codex 指挥官直管微服务第一阶段 source/contract、bridge/state/trace/审批边界和 ClaudeQQ 高风险请示草案;当前只返回 dry-run 计划,不启动守护进程、不打开 SSH/PTY/stdio、不发送消息,规则见 `docs/reference/host-codex-commander.md`。 - `bun scripts/cli.ts ci install/status/run/publish-backend-core/publish-user-service/run-dev-e2e/logs`:在 D601 原生 k3s 上安装和运行 Tekton CI,支持每 commit 检查、Code Queue 只读性能门禁、`CI.json` catalog 驱动的 backend-core 与 user-service commit-pinned 镜像发布和手动触发的 `origin/master:deploy.json#environments.dev` 临时 namespace e2e;catalog/producer/consumer 分工见 `docs/reference/cicd-standardization.md`,`run-dev-e2e` 的 Git 控制 runner、短 launcher 和 no-CD 边界见 `docs/reference/dev-ci-runner.md`,Tekton 规则见 `docs/reference/ci.md`。 - `bun scripts/cli.ts codex deploy `:旧 Code Queue 兼容部署入口已禁用,原因是它会绕过受控部署边界直连 D601 部署 Code Queue;规则见 `docs/reference/codex-deploy.md`。 - `bun scripts/cli.ts codex submit [prompt] [--prompt-file path|--prompt-stdin] [--queue ]` / `codex pr-preflight [--remote]`:前者通过 backend-core 私有代理提交 Code Queue 任务,`--dry-run` 会给出 MiniMax/GPT/人工路由建议但不改写 payload;后者只读检查 D601 scheduler/runner 的 GitHub token、egress 和 PR 能力,PR 型派单前必须使用,规则见 `docs/reference/cli.md` 和 `docs/reference/code-queue-supervision.md`。 @@ -85,6 +86,7 @@ UniDesk 是一个以主 server 为统一入口的分布式工作平台;本文 - `docs/reference/cicd-standardization.md`:`CI.json` catalog、CI producer summary、blocked/upstream-image 服务、File Browser 上游镜像例外、legacy CI/CD 路径分类和 CD consumer 分工。 - `docs/reference/release-governance.md`:`release/v1` 稳定维护线、`master` 集成线、CI/CD server 版本固定、master CLI 兼容和 feature flag 治理规则;决策记录见 GitHub issue #6。 - `docs/reference/artifact-registry.md`:D601 host-managed CNCF Distribution registry、loopback-only 边界和 backend-core artifact CD 目标流程。 +- `docs/reference/host-codex-commander.md`:host Codex 指挥官直管微服务第一阶段 source/contract、CLI dry-run、状态模型、SSH/PTY/stdio bridge、#20/#46 入口和 ClaudeQQ 高风险审批边界。 - `docs/reference/user-service-delivery.md`:用户服务默认交付流程、CI 镜像构建与 registry、Baidu Netdisk 主 server 直管微服务样板、dev 自动测试、prod 拉镜像部署和 Decision Center 产品化需求管理规则。 - `docs/reference/dev-environment.md`:D601 `unidesk-dev` persistent dev 环境、18083 dev frontend proxy、`deploy apply --env dev` 服务范围和 Rust backend-core 只在 D601 编译的边界。 - `docs/reference/ci.md`:D601 k3s Tekton CI、只读主数据库性能门禁和 CLI 入口规则。 diff --git a/TEST.md b/TEST.md index 310507ff..29107932 100644 --- a/TEST.md +++ b/TEST.md @@ -142,3 +142,7 @@ ## T27 GitHub Issue/Comment 换行转义卫生扫描 阅读 `AGENTS.md` 和 `docs/reference/cli.md`,然后用 cli 手动测试以下内容:运行 `bun scripts/cli.ts gh help`,确认 help 中包含 `gh issue create --title --body-file <file> [--label label[,label...]]...`、`gh issue scan-escape`、`gh issue cleanup-plan`、`gh issue board-row list` 和 `gh issue board-row update`,notes 中明确推荐 `--body-file`、quoted heredoc、只读 cleanup-plan、board-row update 默认 dry-run 和 `--expect-body-sha`/`--expect-updated-at` 并发保护。运行 `bun scripts/gh-cli-issue-guard-contract-test.ts`,确认 mock GitHub 覆盖污染命中、说明性 `\n` 命中不误报、短 body/null body guard、body-file dry-run 写入路径、`issue create --label cli,infra --label ops --dry-run` labels 解析和 request plan、真实 create REST payload labels、missing label 的结构化 `validation-failed`、comment-id/body-id 定位和 cleanupSuggestions、board-row list/get 复用 #20 表格解析、board-row update 给出 old/new row、body SHA、guard 结果、表格管道转义、默认 dry-run 不写入、带 `--expect-body-sha` 时只对 mock server PATCH、以及 board-row move 结构化 unsupported。对真实仓库只允许运行 `bun scripts/cli.ts gh issue scan-escape --repo pikasTech/unidesk --limit <N> --dry-run`、`bun scripts/cli.ts gh issue cleanup-plan --repo pikasTech/unidesk --limit <N>`、`bun scripts/cli.ts gh issue board-row list --repo pikasTech/unidesk --board-issue 20 --state open --dry-run` 或 `bun scripts/cli.ts gh issue board-row get <issueNumber> --repo pikasTech/unidesk --board-issue 20` 这类只读命令;不得运行真实历史评论清理、不得真实改写 #20/#24 正文,除非另有明确人工指令并先审阅 dry-run 输出和 body SHA。 + +## T28 Host Codex Commander Contract + +阅读 `AGENTS.md` 和 `docs/reference/host-codex-commander.md`,然后用 cli 手动测试以下内容:运行 `bun scripts/host-codex-commander-contract-test.ts`,确认输出 `ok=true`;运行 `bun scripts/cli.ts commander contract`,确认返回 `phase=source-contract`、`serviceId=host-codex-commander`、`daemonImplemented=false`、`liveOperationsImplemented=false`,且 required capabilities 包含 host Codex 进程发现/启动计划、SSH/PTY/stdio bridge、prompt guidance、trace summary、#20/#46 入口和 ClaudeQQ 高风险审批入口;运行 `bun scripts/cli.ts commander plan --dry-run --session-id primary`,确认所有 top-level plan 均为 `mutation=false`,start plan `enabled=false`,不会打开 SSH/PTY/stdio、不会注入 prompt、不会发送 ClaudeQQ;运行 `bun scripts/cli.ts commander plan`,确认非 dry-run 返回非零状态和 `error=dry-run-required`;运行 `bun scripts/cli.ts commander approval request --action code-queue-task-interrupt --task-id <taskId> --reason '<reason>' --dry-run`,确认只生成 ClaudeQQ 审批草案且 `claudeqq.mutation=false`、`sendImplemented=false`;运行 `bun scripts/cli.ts commander approval request --action read-token-file --dry-run`,确认返回 `validation-failed`。本测试不得部署、不得重启 Code Queue backend、不得 cancel/interrupt 运行任务、不得读取或输出 token 明文。 diff --git a/docs/reference/cicd-standardization.md b/docs/reference/cicd-standardization.md index 58afcf5d..61303e50 100644 --- a/docs/reference/cicd-standardization.md +++ b/docs/reference/cicd-standardization.md @@ -210,3 +210,33 @@ The test checks that dev targets only `unidesk-dev`, prod exposes no runtime dep This precheck uses lightweight parsing and dry-run evidence only. It intentionally does not run full `check`, e2e, Playwright, or other broad browser/runtime test suites on the master server because those are outside the precheck scope and may exceed master-server resource limits. `backend-core` and D601 `code-queue` production validation are also out of scope; backend-core dev rollout can be attempted only through the existing D601 dev path, and a provider-offline result is an infrastructure blocker rather than permission to validate production. The structured read-only preflight entrypoints are `artifact-registry status|health` and `ci publish-user-service --dry-run`. Remote runners may call them through the frontend passthrough path, and the result must classify missing backend-core, database, provider or registry channels as `runnerDisposition=infra-blocked`. The detailed probe list remains in `missingChannels`; the stable runner-facing domain list is `missingControlChannels` with only `backend-core`, `database`, `provider` and `registry`. Those cases are infrastructure blockers, not business failures and not a license to retry a real publish. A non-dry-run publish may be attempted only where `controlledPublish` points: D601 CI, namespace `unidesk-ci`, PipelineRun `unidesk-user-service-artifact-publish`. + +## Next Stage Dispatch Matrix + +This matrix describes the next promotion stage after dry-run coverage is in place. It favors correctness over throughput, keeps `backend-core` and D601 `code-queue` dev-only, and splits dev and prod only where the runtime policy already allows it. + +| Service class | Target branch | CI current state | CD current state | DEV acceptance | PROD acceptance | Blockers | Suggested model | +| --- | --- | --- | --- | --- | --- | --- | --- | +| `backend-core` | `master` | source-build supported | dev target-side rollout only | Rust build + dev rollout proof on D601; no prod gate in this phase | not authorized | keep Rust iteration on D601 dev path; no prod validation | `GPT-5.5` | +| `code-queue` | `master` | source-build supported, dev-only | dev-only k3s consumer | dev artifact validation for `unidesk-dev` scheduler/read/write/provider-egress-proxy | not implemented; must remain unsupported | production boundary, hostPath/source contract, scheduler/egress dependency health | `GPT-5.5` | +| `frontend` | `master` | source-build supported | dev + prod artifact consumer | commit-pinned dev rollout and `/health.deploy.commit` | commit-pinned prod recreate and UI route verification | none beyond standard artifact/CD checks | `GPT-5.5` | +| `baidu-netdisk` | `master` | source-build supported | dev + prod artifact consumer | pull-only dev validation plus auth and proxy checks | pull-only prod recreate plus live commit and proxy checks | secret presence and `/health.auth` gate | `GPT-5.5` | +| `project-manager` | `master` | source-build supported | dev + prod artifact consumer | dev artifact validation with `/api/projects` | prod artifact validation with live commit proof | none beyond standard artifact/CD checks | `MiniMax` for dry-run/reporting, `GPT-5.5` for release sign-off | +| `oa-event-flow` | `master` | source-build supported | dev + prod artifact consumer | dev artifact validation with `/api/diagnostics` | prod artifact validation with live commit proof | none beyond standard artifact/CD checks | `MiniMax` for dry-run/reporting, `GPT-5.5` for release sign-off | +| `todo-note` | `master` | external source-build supported | dev + prod artifact consumer | dev recreate with PostgreSQL-backed deploy metadata | prod recreate with matching `deploy.commit` and `deploy.requestedCommit` | external repo fetch and runtime metadata consistency | `DeepSeek` for digesting external-source evidence, `GPT-5.5` for final gate | +| `decision-center` | `master` | source-build supported | dev + prod k3s consumer | dev gate with record CRUD, diary lifecycle, doc-number uniqueness and frontend visibility | manual prod acceptance after dev gate; verify health, records, diary editor and live commit | doc-management completeness, PostgreSQL truth, no public business ports | `GPT-5.5` | +| `mdtodo` | `master` | source-build supported | dev + prod k3s consumer | dev rollout with deployment metadata and `/health` or `/live` proof | prod rollout with service proxy verification and live commit proof | no NodePort/hostPort/public backend exposure | `MiniMax` for prompt prep, `GPT-5.5` for approval | +| `claudeqq` | `master` | source-build supported | dev + prod k3s consumer | dev rollout with Deployment metadata and health via Kubernetes API proxy | prod rollout with same commit-pinned artifact contract | NapCat/backend port exposure must stay private | `MiniMax` for prompt prep, `GPT-5.5` for approval | +| `findjob` | `master` | source-build supported | dev + prod direct Compose consumer | pull-only dev validation on D601 with image labels and `/api/health` | pull-only prod recreate with live commit proof | target-side compose health/labels only, no public business ports | `DeepSeek` for dry-run matrix drafting, `GPT-5.5` for final gate | +| `pipeline` | `master` | source-build supported | dev + prod direct Compose consumer | pull-only dev validation on D601 with image labels and `/health` | pull-only prod recreate with live commit proof | runtime contract is commit-label + compose service identity | `DeepSeek` for dry-run matrix drafting, `GPT-5.5` for final gate | +| `met-nonlinear` | `master` | source-build supported | dev dry-run only | runtime-verification-blocked until long-running TS service image contract is fixed | not authorized | image contract mismatch between ML Dockerfile and TS runtime service | `GPT-5.5` | +| `k3sctl-adapter` | `master` | source-build supported | plan/dry-run only | no normal dev target; only control-bridge health and recovery evidence | prod live apply requires supervisor confirmation | bridge recovery, k3s fault-domain isolation, no worker self-replacement | `GPT-5.5` | +| `filebrowser` / `filebrowser-d601` | `master` | upstream-image blocked | pull-only mirror target | digest resolution, mirror governance and private proxy health only | not in this phase | upstream digest/mirror worker not yet implemented | `DeepSeek` for evidence summarization, `GPT-5.5` for blocker resolution | + +Planned parallelism for the next wave should be three lanes: + +1. Lane A: `frontend`, `baidu-netdisk`, `project-manager`, `oa-event-flow`. +2. Lane B: `decision-center`, `mdtodo`, `claudeqq`, `todo-note`. +3. Lane C: `findjob`, `pipeline`, `met-nonlinear`, `k3sctl-adapter`, `code-queue`, `backend-core`. + +Lane C must stay split into dev-only work for `backend-core` and `code-queue`, plus read-only or dry-run work for `met-nonlinear` and `k3sctl-adapter`. `backend-core` and `code-queue` must not be promoted into prod acceptance tasks in this phase. diff --git a/docs/reference/cli.md b/docs/reference/cli.md index 3c4dbf19..3b77b38c 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -29,6 +29,7 @@ CLI 可以从 `master` 快速演进,但必须兼容 `deploy.json` 固定的 CI - `dev-env validate [--manifest path] [--kubectl-dry-run]` 离线校验 D601 `unidesk-dev` namespace、dev PostgreSQL 底座和 dev workload manifest。默认检查 `src/components/microservices/k3sctl-adapter/k3s/dev/unidesk-dev-foundation.k8s.yaml`;也可显式校验 `src/components/microservices/k3sctl-adapter/k3s/dev/unidesk-dev-core.k8s.yaml` 或 `src/components/microservices/k3sctl-adapter/k3s/dev/unidesk-dev-code-queue.k8s.yaml`。所有 namespaced 对象必须只落到 `unidesk-dev`,foundation manifest 必须包含 `postgres-dev` StatefulSet/Service、dev secret/config、迁移 Job 和 DB URL guard,core manifest 必须包含 `backend-core-dev`/`frontend-dev` Deployment/Service,Code Queue dev manifest 必须包含 `code-queue-scheduler-dev`、`code-queue-read-dev`、`code-queue-write-dev`、dev provider egress proxy,以及只读挂载宿主 `/home/ubuntu/.agents/skills` 到容器 `/root/.agents/skills` 的 `skills-dir` volume。加 `--kubectl-dry-run` 时额外执行 `kubectl apply --dry-run=client --validate=false -f <manifest>`,仍不 apply 资源。 - `dev-env prewarm-images [--image image] [--provider-id D601] [--no-pull] [--proxy-url URL] [--pull-timeout-ms N] [--dry-run]` 创建异步 job,通过 UniDesk SSH 维护桥在 D601 上把开发底座依赖镜像从 Docker 缓存导入原生 k3s containerd。默认镜像是 `postgres:16-alpine` 和 `rancher/mirrored-library-busybox:1.36.1`,用于避免 `postgres-dev` 与 local-path helper pod 卡在外部 registry 拉取。该命令固定验证 `/etc/rancher/k3s/k3s.yaml` 指向的 native k3s 上下文,并输出 `dev_env_containerd_image_ready=...` 作为成功判据;它不 apply manifest、不修改生产 `unidesk` namespace。 - `artifact-registry plan|render|status|health|install|deploy-backend-core|deploy-service` 管理 D601 host-managed CNCF Distribution registry 的声明、安装、只读检查和 pull-only artifact CD。该 registry 固定为 D601 loopback `127.0.0.1:5000`,由 systemd + Docker Compose 管理,位于 native k3s 故障域外;`deploy-service` 只拉取 CI 已发布的 commit-pinned 镜像、retag/recreate 或导入 native k3s,并做 live commit 验证,不构建 runtime source。`deploy-backend-core` 是 deprecated 兼容名,标准 backend-core prod CD 入口是 `deploy apply --env prod --service backend-core`。长期规则见 `docs/reference/artifact-registry.md`。 +- `commander contract|plan --dry-run|approval request --dry-run` 是 host Codex 指挥官直管微服务第一阶段 contract 入口。当前只返回 `phase=source-contract`、service/API/state/bridge/prompt/trace/#20/#46/ClaudeQQ 审批边界和 dry-run 计划,不启动 daemon、不打开 SSH/PTY/stdio、不注入 prompt、不发送 ClaudeQQ。`plan` 与 `approval request` 必须带 `--dry-run`;缺少时返回 `error=dry-run-required`。长期规则见 `docs/reference/host-codex-commander.md`。 - `gh auth status [--repo owner/name]` 探测 GitHub 操作前置条件并输出脱敏 JSON:是否存在 `gh` binary、是否存在 `GH_TOKEN`/`GITHUB_TOKEN` 或可用 `gh auth token` fallback、REST API 是否可达、目标 repo 是否可见、issue 是否可读。degraded reason 必须归类为 `missing-binary`、`missing-token`、`auth-failed`、`network-proxy-failed`、`permission-denied`、`repo-not-found`、`repo-forbidden`、`issue-not-found`、`pr-not-found`、`scope-insufficient`、`validation-failed`、`invalid-response` 或 `unsupported-command`,不得打印 token;失败对象必须包含 `runnerDisposition=infra-blocked|business-failed`,runner 应优先用该字段分流。 - `gh issue list [--state open|closed|all] [--limit N] [--repo owner/name] [--json number,title,state,url,updatedAt,createdAt,author,labels]` 通过 GitHub REST 列出 issue,默认 `state=open`、`limit=30`,输出稳定 JSON 且不依赖系统 `gh` binary。`--limit` 会映射到 GitHub `per_page` 并限制返回数量,避免一次拉爆上下文;未知 state 或未知 `--json` 字段必须结构化失败并带 `runnerDisposition=business-failed`。GitHub issues API 可能混入 PR,CLI 会从 `.data.issues` 中过滤 pull request。 - `gh issue read <number> [--repo owner/name] [--json body,title,state,comments]` 通过 GitHub REST 读取 issue title/body/state/url 和 comments,默认输出 JSON;`view` 只保留为兼容别名。兼容旧脚本的 `--json body` 和 `--json body,title,state,comments` 字段选择,且正文仍稳定暴露在 `.data.issue.body`,避免调用方因为 JSON 路径变化把空值当成正文。字段白名单是 `body,title,state,comments,number,url,author,createdAt,updatedAt`,未知字段必须结构化失败并带 `runnerDisposition=business-failed`。`gh issue create --title <title> --body-file <file> [--label label[,label...]]... [--dry-run]`、`gh issue update <number> --mode replace|append --body-file <file> [--title ...] [--dry-run]`、`gh issue comment create <number> --body-file <file> [--dry-run]`、`gh issue comment delete <commentId> [--dry-run]`、`gh issue close|reopen <number> [--dry-run]` 都走 REST,不依赖 `gh` binary。`--label` 仅用于 `issue create`,支持重复传入和逗号分隔;`--dry-run` 会展示解析后的 labels 与 request plan,正式创建时把 labels 放入 GitHub REST create-issue payload,GitHub 返回不存在 label 等 422 校验失败时 CLI 结构化返回 `validation-failed`,不静默成功。`gh issue delete <number>` 是结构化 `unsupported-command`,因为 GitHub REST 不支持 issue 硬删除;生命周期删除语义请使用 `close`。 diff --git a/docs/reference/code-queue-supervision.md b/docs/reference/code-queue-supervision.md index 3a119b39..82ba1eb4 100644 --- a/docs/reference/code-queue-supervision.md +++ b/docs/reference/code-queue-supervision.md @@ -189,6 +189,8 @@ ClaudeQQ 是面向用户的主动提醒通道,不是 #24 简报更新的自动 重启 Code Queue backend、重建 Code Queue backend 容器、重启 Code Queue 执行面,或对运行中 Code Queue 任务执行 interrupt/cancel 这类会改变执行状态的操作,都属于高风险干预。即使看起来是最小恢复动作,指挥官也必须先通过 ClaudeQQ 向用户上报原因、影响范围和拟执行动作,并等待用户明确同意;未获得同意前只能做只读诊断、记录 issue、更新看板和准备恢复方案。 +host Codex 指挥官正规化后仍受同一条高风险边界约束。`docs/reference/host-codex-commander.md` 中的直管微服务只能把 host Codex 保活、SSH/PTY/stdio bridge、prompt plan、trace summary、#20/#46 issue 入口和 ClaudeQQ 审批记录产品化;它不是 Code Queue runner,也不是 Code Queue 自部署通道。第一阶段 `bun scripts/cli.ts commander ...` 只允许输出 contract/dry-run 计划,不得实际重启 backend、interrupt/cancel task、读取 token 明文、打开 bridge 或发送 ClaudeQQ。 + 当多信号裁决显示 provider 服务器、D601 执行面或关键维护桥疑似需要人工检查时,指挥官可以在更新 #24/#40 等记录之外,通过 ClaudeQQ 额外提醒用户检查 provider 服务器状态。提醒只在首次确认、状态恶化、恢复或需要用户介入时发送,不能在每轮轮询中重复轰炸。ClaudeQQ 提醒是 best-effort:若 ClaudeQQ 本身依赖同一条故障 provider/k3sctl 链路而不可达,指挥官应把通知失败的原因写入 #24 或对应 blocker issue,并继续按轮询和恢复规则推进。 在 UniDesk CLI 中,`bun scripts/cli.ts provider triage <providerId>` 是只读多信号裁决入口,适合作为 worker 和指挥官的统一健康判断前置。它必须至少保留这些合同:`provider is not online` 这类单路径失败只应落到 `decision=retryable-transient` / `blockingDisposition=runner-local-observation-gap`,不得直接输出 `global-offline`;只有 provider-gateway/SSH/k3s/scheduler 等多个独立关键路径同时失败且缺少健康交叉证据,才允许输出 `decision=global-offline`;registry 或单个 service proxy 失败但 heartbeat、SSH 或节点视图仍健康时,应输出 `decision=service-degraded`。`recommendedCrossChecks` 必须包含 `debug health`、`debug dispatch <providerId> host.ssh --wait-ms 15000`、`ssh <providerId> argv true`、`artifact-registry health --provider-id <providerId>`、`microservice health k3sctl-adapter`、`microservice health code-queue` 与 `codex tasks --view supervisor --limit 20`。 diff --git a/docs/reference/host-codex-commander.md b/docs/reference/host-codex-commander.md new file mode 100644 index 00000000..971aa98f --- /dev/null +++ b/docs/reference/host-codex-commander.md @@ -0,0 +1,231 @@ +# Host Codex Commander Contract + +本文定义第一阶段的 host Codex 指挥官正规化设计。当前阶段只建立 source/contract、CLI dry-run stub、状态模型和安全边界;不部署、不重启、不上线 production,也不实现会实际执行后台动作的守护进程。 + +## 目标边界 + +host Codex 指挥官是独立用户服务/基础设施:在 master server host 上保留一个常驻 Codex 指挥会话,由未来的直管微服务负责保活、prompt 注入、trace 采集和高风险动作请示。它不替代 Code Queue runner,也不让 Code Queue 自己上线自己。 + +服务边界固定为三层: + +- host Codex 进程:运行在 master server host 的常驻 Codex 指挥会话,只负责监督、派单、审阅和恢复计划。 +- 直管控制微服务:运行在 host 侧或能直接接入 host TTY/stdio 的受控位置,负责发现/启动计划、SSH/PTY/stdio bridge、事件持久化、prompt plan、trace summary 和审批状态。 +- UniDesk/Code Queue/ClaudeQQ:Code Queue 继续作为任务执行面,GitHub issue 继续作为长期记录,ClaudeQQ 作为高风险动作请示入口。 + +第一阶段只允许这些产物: + +- `docs/reference/host-codex-commander.md` 长期 contract; +- `bun scripts/cli.ts commander contract` 和 `commander plan --dry-run` 这类只读/dry-run 输出; +- contract test 证明 CLI 不执行 live operation; +- 后续任务拆分。 + +## 非目标 + +当前阶段明确不做: + +- 不启动、停止或重启任何常驻 commander daemon; +- 不打开真实 PTY、stdio 或 SSH bridge; +- 不向 host Codex 注入真实 prompt; +- 不发送 ClaudeQQ 消息; +- 不部署、不重启、不上线 prod; +- 不直接重启 Code Queue backend,不重建 Code Queue backend 容器,不重启 Code Queue 执行面; +- 不 cancel 或 interrupt 运行中的 Code Queue task; +- 不读取、打印或持久化 token 明文。 + +这些动作即使未来实现,也必须先通过本文的审批和安全边界。 + +## CLI Contract + +统一入口: + +```bash +bun scripts/cli.ts commander contract +bun scripts/cli.ts commander plan --dry-run [--session-id primary] +bun scripts/cli.ts commander approval request --action <action> --dry-run [--reason text] [--task-id id] +``` + +所有命令默认输出 JSON,失败也必须有结构化 stdout 和非零退出码。`plan` 和 `approval request` 在第一阶段必须要求 `--dry-run`;没有 `--dry-run` 时必须返回 `ok=false`、`error=dry-run-required`,不能降级成真实执行。 + +`commander contract` 必须暴露: + +- `phase=source-contract`; +- `serviceId=host-codex-commander`; +- `daemonImplemented=false`; +- `liveOperationsImplemented=false`; +- 需要的 host Codex 发现/启动计划、SSH/PTY/stdio bridge、prompt guidance、trace summary、#20/#46 issue 入口、ClaudeQQ 审批入口; +- `safetyBoundary`。 + +`commander plan --dry-run` 必须输出: + +- process discovery signal 列表; +- host Codex start command shape,但 `enabled=false`; +- SSH/PTY/stdio bridge 设计和 guardrail; +- prompt guidance pipeline; +- trace summary sources 和 summary shape; +- #20/#46 read/write 入口; +- ClaudeQQ high-risk approval command shape; +- `mutation=false`。 + +`commander approval request --dry-run` 只生成审批草案。允许的 `--action` 固定为: + +- `code-queue-backend-restart` +- `code-queue-backend-rebuild` +- `code-queue-execution-plane-restart` +- `code-queue-task-interrupt` +- `code-queue-task-cancel` +- `prod-runtime-mutation` + +输出中 `claudeqq.mutation=false`、`sendImplemented=false`。真实发送和审批消费属于后续阶段。 + +## Future API Contract + +后续微服务 API 以 REST 为主,所有写入口默认异步、有 request id、有事件序列、有 redaction 结果: + +| Method | Path | 用途 | 第一阶段状态 | +| --- | --- | --- | --- | +| GET | `/health` | 服务健康、版本、日志路径、state root | contract only | +| GET | `/api/commander/contract` | 返回本文对应机器可读 contract | contract only | +| GET | `/api/commander/sessions` | 列出 host Codex session 摘要 | contract only | +| POST | `/api/commander/sessions/:sessionId/plan-start` | 生成发现/启动计划 | contract only | +| POST | `/api/commander/sessions/:sessionId/prompt-plan` | 生成 prompt 注入计划 | contract only | +| GET | `/api/commander/trace-summary` | 读取有界 trace summary | contract only | +| POST | `/api/commander/issues/:issueNumber/write-plan` | 生成 #20/#46 写入计划 | contract only | +| POST | `/api/commander/approvals` | 创建 ClaudeQQ 高风险请示草案 | contract only | + +后续实现不得绕过现有 CLI/服务边界。GitHub issue 写入仍使用 `bun scripts/cli.ts gh issue ... --body-file`、dry-run-first 和并发 guard;Code Queue 读写仍优先使用 `codex task/tasks/steer/read` 等正式入口。 + +## State Model + +状态根目录规划为 `.state/commander/`。后续服务至少拆成这些文件或等价表: + +- `sessions/<sessionId>.json`:host Codex session 摘要、pid/cwd 指纹、bridge 状态、lastSeq、heartbeat; +- `events/<sessionId>.jsonl`:prompt、trace、approval、bridge lifecycle 事件; +- `approvals/<approvalId>.json`:ClaudeQQ 请示草案、状态、授权绑定动作、过期时间; +- `locks/<name>.lock.d/`:启动、prompt injection、issue write、approval consume 的互斥锁; +- `redactions/<eventId>.json`:脱敏摘要,不保存明文 secret。 + +session 状态: + +| State | 含义 | +| --- | --- | +| `unknown` | 没有足够信号判断 host Codex 是否存在 | +| `discovered` | 通过 state/process/bridge heartbeat 找到候选会话 | +| `planned` | 只生成启动或接管计划,尚未执行 | +| `starting` | 后续 live executor 正在启动或 attach | +| `running` | heartbeat 和 trace 新鲜 | +| `attention_required` | 需要人工判断或审批 | +| `stopping` | 后续 live executor 正在退出 | +| `stopped` | 会话已终止 | +| `degraded` | bridge、trace 或状态持久化部分失败 | + +prompt 状态:`draft`、`planned`、`queued_for_injection`、`injected`、`rejected`、`failed`。 + +approval 状态:`draft`、`requested`、`approved`、`rejected`、`expired`、`consumed`。审批只能绑定一个具体 action、taskId 或 target,不得作为泛授权复用。 + +## Bridge Contract + +SSH bridge 只复用现有 UniDesk Host SSH / WSL SSH 维护桥。它用于 provider 只读诊断、受控维护命令和未来已审批恢复动作;不得作为 provider-gateway 自重建通道,也不得绕过 provider.upgrade 调度规则。 + +PTY bridge 用于 host Codex 交互会话保活和窗口化 transcript 采集。后续实现必须: + +- 有 heartbeat; +- stdout/stderr 分流或标注; +- 按 seq 写入事件; +- 默认有 byte/line 上限; +- prompt 注入前先落 plan 和 redaction summary; +- 注入失败必须可见,不得静默重试。 + +stdio bridge 用于非交互 Codex 或 helper subprocess。它必须有 argv 记录、cwd、env key allowlist、exit code、timeout 和 bounded output。env 只能记录 key 存在性或来源,不能记录值。 + +## Prompt Guidance + +prompt 注入前必须按顺序执行: + +1. 分类意图和风险; +2. 汇总当前 #20/#46/task/queue 的有界上下文; +3. 运行 forbidden-action guard; +4. 对 prompt 和上下文做 secret-like redaction; +5. 持久化 prompt plan; +6. 后续 live executor 仅在 policy pass 后注入。 + +遇到 Code Queue backend 重启/重建、执行面重启、task interrupt/cancel、prod mutation、token 访问、破坏性 Git 操作时,必须转入 ClaudeQQ 审批草案,而不是注入执行 prompt。 + +## Trace Summary + +trace summary 不是 raw transcript dump。默认输出应包含: + +- `taskId`、`sessionId`、`lastSeq`; +- 当前状态和 freshness; +- 最近关键事件; +- open questions; +- recommended next actions; +- redactionsApplied; +- drill-down 命令。 + +原始 transcript 必须分页读取,默认不在 summary 中展开。summary 可以引用 Code Queue `codex task --trace`、`codex output`、host Codex event JSONL 和 approval event,但不能把任一路径失败升级为全局故障,仍需遵守 `docs/reference/code-queue-supervision.md` 的多信号裁决规则。 + +## Issue Entrypoints + +#20 是 Code Queue 总看板入口。读取优先: + +```bash +bun scripts/cli.ts gh issue board-audit --board-issue 20 --dry-run +bun scripts/cli.ts gh issue board-row list --board-issue 20 +``` + +写入必须先 dry-run,再带 body SHA 或 updatedAt: + +```bash +bun scripts/cli.ts gh issue board-row update <issueNumber> --board-issue 20 --field progress --value <text> --expect-body-sha <sha> +``` + +#46 是每日指挥简报入口。读取: + +```bash +bun scripts/cli.ts gh issue read 46 --json body,title,state,updatedAt +``` + +写入: + +```bash +bun scripts/cli.ts gh issue update 46 --body-profile commander-brief --body-file <file> --expect-updated-at <ts> +``` + +所有 Markdown 正文必须来自 `--body-file`,禁止把正文拼入 shell 参数。任何 issue 写入都不能自动触发 ClaudeQQ,除非该动作本身是高风险请示或用户通知策略明确要求。 + +## Safety Boundary + +第一阶段所有 commander CLI 输出都必须是 `mutation=false`。 + +未来 live executor 在没有用户明确同意前也不得执行: + +- 重启或重建 Code Queue backend; +- 重启 Code Queue 执行面; +- interrupt 或 cancel 运行任务; +- 修改 production runtime; +- 读取或输出 token 明文; +- 直写 PostgreSQL 修补任务状态; +- 破坏性 Git 操作; +- 绕过 GitHub issue body-file 和并发 guard。 + +高风险动作流程固定为: + +1. 生成 ClaudeQQ 请示草案,说明原因、影响范围、拟执行动作和可回滚性; +2. 发送给配置的主用户私聊入口; +3. 等待明确批准; +4. 将批准绑定到唯一 action 和 target; +5. 执行前再次校验审批未过期且未被消费; +6. 执行后写入 #46 或对应 issue 的结果摘要。 + +ClaudeQQ 不可达时,不得把请求视为已批准;只能记录通知失败和继续只读诊断。 + +## Next Stage Tasks + +可派单的下一阶段任务: + +- 实现 `src/components/microservices/host-codex-commander` 服务骨架,提供 `/health` 和 `/api/commander/contract`,仍不启动 live bridge。 +- 增加 `.state/commander/` 文件状态读写模块和 redaction 单元测试。 +- 实现只读 host Codex process discovery,输出候选 pid/cwd/age,不 attach、不 kill。 +- 实现 trace summary 聚合器,读取 mock event JSONL 和 Code Queue bounded trace。 +- 实现 ClaudeQQ approval draft service,不发送消息,只落审批草案和 preview。 +- 设计第二阶段 live PTY/stdio bridge 的权限、日志、锁和超时测试。 diff --git a/scripts/cli.ts b/scripts/cli.ts index bd6880dd..8b94b0c7 100644 --- a/scripts/cli.ts +++ b/scripts/cli.ts @@ -19,6 +19,7 @@ import { runSwapCommand } from "./src/swap"; import { runDevEnvCommand } from "./src/dev-env"; import { runArtifactRegistryCommand } from "./src/artifact-registry"; import { runGhCommand } from "./src/gh"; +import { runCommanderCommand } from "./src/commander"; import { isHelpToken, rootHelp, serverHelp, sshHelp, staticNamespaceHelp } from "./src/help"; const remoteOptions = extractRemoteCliOptions(process.argv.slice(2)); @@ -177,6 +178,14 @@ async function main(): Promise<void> { return; } + if (top === "commander") { + const result = runCommanderCommand(args.slice(1)); + const ok = (result as { ok?: unknown }).ok !== false; + emitJson(commandName, result, ok); + if (!ok) process.exitCode = 1; + return; + } + const config = readConfig(); if (top === "ssh") { diff --git a/scripts/code-queue-submit-routing-contract-test.ts b/scripts/code-queue-submit-routing-contract-test.ts index c8868139..e954c1ab 100644 --- a/scripts/code-queue-submit-routing-contract-test.ts +++ b/scripts/code-queue-submit-routing-contract-test.ts @@ -26,6 +26,14 @@ const runtimePrompt = ` 验证:需要证明 scheduler heartbeat、active run、OpenCode session recovery 都正确。 `; +const mediumPrompt = ` +目标:实现一个前端 React 控制台组件的小功能,给 Code Queue 任务列表增加可折叠的验证证据摘要。 +范围:只改用户界面模块中的一个 TSX 组件和一个相邻的轻量 contract guard,不触碰 backend-core、Code Queue runtime、provider-gateway、k3sctl-adapter、部署配置或数据库 schema。 +禁止:不要部署 prod,不要重启服务,不要读取密钥,不要写数据库,不要修改 release/v1,不要跑 heavy check/e2e/Playwright。 +验证:运行针对该组件或 contract guard 的轻量脚本,final response 必须报告修改文件、验证命令、输出摘要、commit 和遗留风险。 +背景:本 prompt 是完整需求来源,GitHub issue 只能作为辅助引用,不能作为唯一来源。这个任务有真实代码变更和 UI 状态判断,复杂度高于只读文档,但写入边界局部、可审阅、可用轻量测试复核。 +`; + const commanderOnlyPrompt = ` 目标:在 production 上 deploy apply 并 restart code-queue,必要时读取 secret token 和写 PostgreSQL 修复任务状态。 验证:live health。 @@ -44,6 +52,18 @@ export function runCodeQueueSubmitRoutingContract(): JsonRecord { assertCondition(runtime.recommendedRunner === "codex", "runtime/core work should recommend Codex runner", runtime); assertCondition(runtime.recommendedModel === "gpt-5.5", "runtime/core work should recommend GPT-5.5", runtime); + const medium = codexSubmitRoutingRecommendationForTest(mediumPrompt, "deepseek"); + assertCondition(medium.route === "deepseek-codex", "medium bounded frontend work should recommend DeepSeek", medium); + assertCondition(medium.recommendedRunner === "codex", "DeepSeek work should use Codex runner", medium); + assertCondition(medium.recommendedModel === "deepseek-chat", "DeepSeek candidate should recommend deepseek-chat", medium); + assertCondition(asRecord(medium.riskControls).mediumComplexityCandidate === true, "medium prompt should satisfy medium complexity controls", medium); + assertCondition(asRecord(medium.explicitRequest).model === "deepseek-chat", "explicit deepseek alias should normalize to deepseek-chat", medium); + const policyContract = asRecord(medium.policyContract); + assertCondition(asRecord(policyContract.concurrency).gpt55Routine === 5, "policy contract should expose GPT-5.5 routine concurrency", policyContract); + assertCondition(asRecord(policyContract.concurrency).gpt55BurstMax === 10, "policy contract should expose GPT-5.5 burst concurrency", policyContract); + assertCondition(asRecord(policyContract.concurrency).minimaxSimpleMax === 10, "policy contract should expose MiniMax simple concurrency", policyContract); + assertCondition(asRecord(policyContract.concurrency).deepseekMediumDefault === 5, "policy contract should expose DeepSeek medium default concurrency", policyContract); + const commanderOnly = codexSubmitRoutingRecommendationForTest(commanderOnlyPrompt); assertCondition(commanderOnly.route === "commander-human-only", "prod restart/secrets/DB work should be commander-only", commanderOnly); assertCondition(commanderOnly.recommendedRunner === "commander", "commander-only work should not recommend a runner", commanderOnly); @@ -60,6 +80,8 @@ export function runCodeQueueSubmitRoutingContract(): JsonRecord { checks: [ "low-risk self-contained prompts recommend minimax-m2.7/OpenCode", "runtime/core work recommends GPT-5.5/Codex", + "medium bounded frontend work recommends deepseek-chat/Codex", + "dry-run policy contract exposes model-tier concurrency", "prod/restart/secret/DB work is commander-only", "explicit --model mismatch is visible and payload is unchanged", ], diff --git a/scripts/host-codex-commander-contract-test.ts b/scripts/host-codex-commander-contract-test.ts new file mode 100644 index 00000000..fc00c789 --- /dev/null +++ b/scripts/host-codex-commander-contract-test.ts @@ -0,0 +1,121 @@ +import { spawnSync } from "node:child_process"; +import { readFileSync } from "node:fs"; + +type JsonRecord = Record<string, unknown>; + +function assertCondition(condition: unknown, message: string, detail: unknown = {}): void { + if (!condition) throw new Error(`${message}: ${JSON.stringify(detail)}`); +} + +function asRecord(value: unknown, label: string): JsonRecord { + assertCondition(typeof value === "object" && value !== null && !Array.isArray(value), `${label} must be an object`, value); + return value as JsonRecord; +} + +function asStringArray(value: unknown, label: string): string[] { + assertCondition(Array.isArray(value) && value.every((item) => typeof item === "string"), `${label} must be a string array`, value); + return value as string[]; +} + +function runCli(args: string[], expectStatus: number): JsonRecord { + const result = spawnSync("bun", ["scripts/cli.ts", ...args], { + cwd: process.cwd(), + encoding: "utf8", + maxBuffer: 4 * 1024 * 1024, + }); + assertCondition(result.status === expectStatus, `status mismatch for ${args.join(" ")}`, { + status: result.status, + stdout: result.stdout.slice(-2000), + stderr: result.stderr.slice(-2000), + }); + return asRecord(JSON.parse(result.stdout) as unknown, "cli envelope"); +} + +function dataOf(envelope: JsonRecord): JsonRecord { + return asRecord(envelope.data, "data"); +} + +const contract = dataOf(runCli(["commander", "contract"], 0)); +assertCondition(contract.phase === "source-contract", "contract must identify source-contract phase", contract); +assertCondition(contract.serviceId === "host-codex-commander", "contract must expose service id", contract); +assertCondition(contract.daemonImplemented === false, "daemon must not be implemented in phase one", contract); +assertCondition(contract.liveOperationsImplemented === false, "live operations must not be implemented in phase one", contract); +const capabilities = asStringArray(contract.requiredCapabilities, "requiredCapabilities"); +for (const expected of [ + "host-codex-process-discovery", + "ssh-bridge-contract", + "pty-bridge-contract", + "stdio-bridge-contract", + "prompt-guidance-plan", + "trace-summary-plan", + "issue-20-board-read-write-entry", + "issue-46-brief-read-write-entry", + "claudeqq-high-risk-approval-entry", +]) { + assertCondition(capabilities.includes(expected), `missing required capability ${expected}`, capabilities); +} + +const safety = asRecord(contract.safetyBoundary, "safetyBoundary"); +assertCondition(safety.phaseOneMutationAllowed === false, "phase one must forbid mutation", safety); +const forbidden = asStringArray(safety.forbiddenWithoutExplicitUserApproval, "forbiddenWithoutExplicitUserApproval"); +assertCondition(forbidden.includes("code-queue-backend-restart"), "backend restart must require approval", forbidden); +assertCondition(forbidden.includes("code-queue-task-interrupt"), "task interrupt must require approval", forbidden); +assertCondition(forbidden.includes("code-queue-task-cancel"), "task cancel must require approval", forbidden); +const alwaysForbidden = asStringArray(safety.alwaysForbidden, "alwaysForbidden"); +assertCondition(alwaysForbidden.includes("print-token-values"), "contract must forbid token output", alwaysForbidden); + +const plan = dataOf(runCli(["commander", "plan", "--dry-run", "--session-id", "primary"], 0)); +assertCondition(plan.mutation === false, "plan must be non-mutating", plan); +assertCondition(asRecord(asRecord(plan.processDiscovery, "processDiscovery").startPlan, "startPlan").enabled === false, "start plan must be disabled", plan); +assertCondition(asRecord(plan.bridge, "bridge").mutation === false, "bridge plan must not open bridges", plan); +assertCondition(asRecord(plan.traceSummary, "traceSummary").mutation === false, "trace summary plan must be non-mutating", plan); +assertCondition(asRecord(plan.issueEntries, "issueEntries").mutation === false, "issue entry plan must be non-mutating", plan); +assertCondition(asRecord(plan.claudeqqApproval, "claudeqqApproval").mutation === false, "approval plan must be non-mutating", plan); + +const planWithoutDryRun = dataOf(runCli(["commander", "plan"], 1)); +assertCondition(planWithoutDryRun.error === "dry-run-required", "plan must require dry-run", planWithoutDryRun); + +const approval = dataOf(runCli([ + "commander", + "approval", + "request", + "--action", + "code-queue-task-interrupt", + "--task-id", + "task-123", + "--reason", + "heartbeat expired", + "--dry-run", +], 0)); +assertCondition(approval.mutation === false, "approval request must be non-mutating", approval); +assertCondition(approval.requiresExplicitUserApproval === true, "approval request must require explicit user approval", approval); +const claudeqq = asRecord(approval.claudeqq, "claudeqq"); +assertCondition(claudeqq.mutation === false, "ClaudeQQ preview must not send", claudeqq); +assertCondition(claudeqq.sendImplemented === false, "ClaudeQQ send must not be implemented", claudeqq); + +const invalidApproval = dataOf(runCli(["commander", "approval", "request", "--action", "read-token-file", "--dry-run"], 1)); +assertCondition(invalidApproval.error === "validation-failed", "unsupported approval action must fail validation", invalidApproval); + +const doc = readFileSync("docs/reference/host-codex-commander.md", "utf8"); +for (const snippet of [ + "不直接重启 Code Queue backend", + "不 cancel 或 interrupt 运行中的 Code Queue task", + "不读取、打印或持久化 token 明文", + "SSH/PTY/stdio", + "#20", + "#46", + "ClaudeQQ", +]) { + assertCondition(doc.includes(snippet), `reference doc missing snippet: ${snippet}`); +} + +process.stdout.write(`${JSON.stringify({ + ok: true, + checks: [ + "commander contract exposes host Codex service boundary and phase-one no-live-operation flags", + "dry-run plan covers process discovery, SSH/PTY/stdio bridge, prompt guidance, trace summary, #20/#46 and ClaudeQQ approval", + "non-dry-run plan is rejected", + "approval request is dry-run only and rejects unsupported high-risk actions", + "reference doc states backend restart, task interrupt/cancel, and token-output prohibitions", + ], +}, null, 2)}\n`); diff --git a/scripts/src/code-queue.ts b/scripts/src/code-queue.ts index 1960dc90..aedc9a73 100644 --- a/scripts/src/code-queue.ts +++ b/scripts/src/code-queue.ts @@ -12,6 +12,7 @@ const defaultTasksLimit = 20; const maxTasksLimit = 100; const steerPromptPreviewChars = 320; const minimaxSubmitModel = "minimax-m2.7"; +const deepseekSubmitModel = "deepseek-chat"; const gptSubmitModel = "gpt-5.5"; const submitLockWaitMs = 60_000; const submitLockPollMs = 250; @@ -58,7 +59,7 @@ interface CodexSubmitOptions { dryRun: boolean; } -type SubmitRoute = "minimax-opencode" | "gpt-5.5-codex" | "commander-human-only"; +type SubmitRoute = "minimax-opencode" | "deepseek-codex" | "gpt-5.5-codex" | "commander-human-only"; type SubmitRouteSignalSeverity = "info" | "warning" | "block"; interface SubmitRouteSignal { @@ -80,7 +81,9 @@ interface SubmitRoutingRecommendation { promptSelfContained: boolean; issueIsNotOnlySource: boolean; noProdRestartSecretOrDbWrite: boolean; + noRuntimeCoreOrReleaseWork: boolean; evidenceRequiredByPrompt: boolean; + mediumComplexityCandidate: boolean; commanderMustReviewUnread: true; }; explicitRequest: { @@ -92,6 +95,27 @@ interface SubmitRoutingRecommendation { dryRunOnly: true; doesNotChangeSubmittedPayload: true; prodMiniMaxAssumedAvailable: false; + prodDeepSeekAssumedAvailable: false; + runtimeAdmissionUnchanged: true; + }; + policyContract: { + selectionPrinciples: string[]; + concurrency: { + gpt55Routine: number; + gpt55BurstMax: number; + minimaxSimpleMax: number; + deepseekMediumDefault: number; + }; + modelTiers: Array<{ + model: string; + runner: "opencode" | "codex"; + taskRisk: string; + requiredGuards: string[]; + }>; + externalProvider429: { + commanderAction: string; + interveneWhen: string[]; + }; }; } @@ -579,6 +603,7 @@ function normalizeSubmitModel(value: string | null | undefined): string { const lower = raw.toLowerCase(); const leaf = lower.includes("/") ? lower.split("/").at(-1) ?? lower : lower; if (leaf === minimaxSubmitModel || leaf === "m2.7") return minimaxSubmitModel; + if (leaf === deepseekSubmitModel || leaf === "deepseek") return deepseekSubmitModel; return raw; } @@ -606,7 +631,7 @@ function regexEvidenceWithoutNegatedContext(text: string, patterns: RegExp[], li for (const match of text.matchAll(pattern)) { const index = match.index ?? 0; const context = text.slice(Math.max(0, index - 36), Math.min(text.length, index + String(match[0] ?? "").length + 36)); - if (/(?:禁止|不要|不得|不能|不应|请勿|严禁|avoid|forbid|forbidden|do not|don't|must not|no\s+)/iu.test(context)) continue; + if (/(?:禁止|不要|不得|不能|不应|不触碰|不修改|不涉及|不处理|不更改|请勿|严禁|avoid|forbid|forbidden|do not|don't|must not|no\s+)/iu.test(context)) continue; const value = String(match[0] ?? "").trim().replace(/\s+/gu, " "); if (value.length > 0 && !evidence.includes(value)) evidence.push(value); if (evidence.length >= limit) return evidence; @@ -619,6 +644,47 @@ function routeSignal(id: string, severity: SubmitRouteSignalSeverity, evidence: return { id, severity, matched: evidence.length > 0, evidence, message }; } +function submitPolicyContract(): SubmitRoutingRecommendation["policyContract"] { + return { + selectionPrinciples: [ + "Use GPT-5.5 for high-risk, runtime/core, security, CI/CD, deploy, release, and final quality calls.", + "Use DeepSeek for self-contained medium-complexity work with limited write scope and verifiable tests.", + "Use MiniMax only for simple, low-risk, self-contained work with external evidence and commander review.", + "Keep prod restart, secret access, DB writes, destructive Git, and running-task control with the commander or human.", + ], + concurrency: { + gpt55Routine: 5, + gpt55BurstMax: 10, + minimaxSimpleMax: 10, + deepseekMediumDefault: 5, + }, + modelTiers: [ + { + model: gptSubmitModel, + runner: "codex", + taskRisk: "high-risk-or-complex", + requiredGuards: ["bounded ownership", "multi-signal verification", "no implicit prod rollout"], + }, + { + model: deepseekSubmitModel, + runner: "codex", + taskRisk: "medium-complexity", + requiredGuards: ["self-contained prompt", "limited write scope", "contract/unit verification", "commander review"], + }, + { + model: minimaxSubmitModel, + runner: "opencode", + taskRisk: "simple-low-risk", + requiredGuards: ["issue is auxiliary only", "evidence required", "no prod/secrets/DB writes", "diff and test review"], + }, + ], + externalProvider429: { + commanderAction: "wait-while-exponential-backoff-is-healthy", + interveneWhen: ["heartbeat expired", "retry state machine stuck", "task lost", "retry attempts exhausted"], + }, + }; +} + function submitRoutingRecommendation(options: CodexSubmitOptions): SubmitRoutingRecommendation { const prompt = options.prompt; const lower = prompt.toLowerCase(); @@ -633,7 +699,7 @@ function submitRoutingRecommendation(options: CodexSubmitOptions): SubmitRouting /\bsecret\b|\btoken\b|\bapi[_-]?key\b|\bcredential\b/gu, /\bpostgres(?:ql)?\b|\bpsql\b|\bdatabase\b|\bdb\b|\bsql\b|\bmigration\b/gu, ]); - const runtimeCore = regexEvidence(lower, [ + const runtimeCore = regexEvidenceWithoutNegatedContext(lower, [ /\bcode[- ]queue\s+(?:runtime|scheduler|backend|execution|runner)\b/gu, /\bbackend-core\b/gu, /\bprovider-gateway\b/gu, @@ -672,6 +738,14 @@ function submitRoutingRecommendation(options: CodexSubmitOptions): SubmitRouting /文档/gu, /轻量/gu, ]); + const mediumComplexityEvidence = regexEvidence(lower, [ + /\bfront[- ]?end\b|\bfrontend\b|\breact\b|\btsx\b|\bcss\b|\bui\b|\bcomponent\b/gu, + /\buser[- ]service\b|\bservice\s+module\b/gu, + /\blocal\s+(?:module|helper|cli|tool)\b/gu, + /\bbounded\s+(?:bug\s*)?fix\b|\bsmall\s+(?:bug\s*)?fix\b/gu, + /\bunit\s+test\b|\bcontract\s+guard\b/gu, + /中等复杂|中等风险|前端|组件|样式|局部(?:模块|修复)|用户服务|契约守卫/gu, + ]); const evidenceRequest = regexEvidence(lower, [ /\bevidence\b/gu, /\bverification\b|\bverified\b|\bvalidate\b|\bvalidation\b/gu, @@ -699,7 +773,7 @@ function submitRoutingRecommendation(options: CodexSubmitOptions): SubmitRouting /\btruncate\s+table\b/gu, /\bdelete\s+from\b/gu, ]); - const crossModule = regexEvidence(lower, [ + const crossModule = regexEvidenceWithoutNegatedContext(lower, [ /\bcross[- ]module\b/gu, /\barchitecture\b|\barchitectural\b/gu, /\brelease\/v1\b/gu, @@ -714,13 +788,21 @@ function submitRoutingRecommendation(options: CodexSubmitOptions): SubmitRouting const promptSelfContained = prompt.length >= 700 || selfContainedHints.length >= 3; const issueIsNotOnlySource = issueReference.length === 0 || issueAuxiliaryGuard.length > 0 || issueOnly.length === 0 && prompt.length >= 500; const noProdRestartSecretOrDbWrite = prodOrStateMutation.length === 0 && destructiveWords.length === 0; + const noRuntimeCoreOrReleaseWork = runtimeCore.length === 0 && crossModule.length === 0; const evidenceRequiredByPrompt = evidenceRequest.length > 0; + const mediumComplexityCandidate = promptSelfContained + && issueIsNotOnlySource + && noProdRestartSecretOrDbWrite + && noRuntimeCoreOrReleaseWork + && evidenceRequiredByPrompt + && mediumComplexityEvidence.length > 0; const signals = [ routeSignal("prod-state-secret-db-write", "block", [...prodOrStateMutation, ...destructiveWords], "Mentions production/state mutation, restart, secrets, DB writes, or destructive commands."), routeSignal("runtime-core", "warning", runtimeCore, "Touches Code Queue runtime, backend-core, provider-gateway, k3s adapter, or active run behavior."), routeSignal("issue-source-risk", "warning", issueOnly, "Prompt appears to rely on GitHub issue reading as task context."), routeSignal("issue-auxiliary-source-guard", "info", issueAuxiliaryGuard, "Prompt explicitly says GitHub issue is auxiliary and not the only source."), routeSignal("cross-module-release", "warning", crossModule, "Mentions cross-module architecture, CI/CD rollout, release line, or rollback work."), + routeSignal("medium-complexity-verifiable", "info", mediumComplexityEvidence, "Mentions bounded medium-complexity work such as frontend, local CLI/helper, user-service module, or contract guard changes."), routeSignal("low-risk-verifiable", "info", lowRiskEvidence, "Mentions low-risk or verifiable work such as docs, read-only checks, dry-run, preflight, or contract tests."), routeSignal("evidence-requested", "info", evidenceRequest, "Prompt asks for tests, validation, commit, or evidence."), routeSignal("self-contained-hints", "info", selfContainedHints, "Prompt includes explicit task sections that make it easier to verify without reading an issue."), @@ -742,6 +824,18 @@ function submitRoutingRecommendation(options: CodexSubmitOptions): SubmitRouting route = "gpt-5.5-codex"; confidence = "high"; reason = "This task touches runtime/core/cross-module or release-governance surfaces, so it should stay on GPT-5.5."; + } else if (mediumComplexityCandidate) { + route = "deepseek-codex"; + recommendedRunner = "codex"; + recommendedModel = deepseekSubmitModel; + confidence = "high"; + reason = "The prompt looks self-contained, medium-complexity, and verifiable without production/state privileges; it is a DeepSeek/Codex candidate after commander review."; + } else if (mediumComplexityEvidence.length > 0 && issueIsNotOnlySource && noProdRestartSecretOrDbWrite && noRuntimeCoreOrReleaseWork) { + route = "deepseek-codex"; + recommendedRunner = "codex"; + recommendedModel = deepseekSubmitModel; + confidence = "medium"; + reason = "The prompt has medium-complexity signals, but the commander should tighten self-contained context, write scope, and verification requirements before relying on DeepSeek."; } else if (promptSelfContained && issueIsNotOnlySource && evidenceRequiredByPrompt && lowRiskEvidence.length > 0) { route = "minimax-opencode"; recommendedRunner = "opencode"; @@ -773,7 +867,9 @@ function submitRoutingRecommendation(options: CodexSubmitOptions): SubmitRouting promptSelfContained, issueIsNotOnlySource, noProdRestartSecretOrDbWrite, + noRuntimeCoreOrReleaseWork, evidenceRequiredByPrompt, + mediumComplexityCandidate, commanderMustReviewUnread: true, }, explicitRequest: { @@ -785,7 +881,10 @@ function submitRoutingRecommendation(options: CodexSubmitOptions): SubmitRouting dryRunOnly: true, doesNotChangeSubmittedPayload: true, prodMiniMaxAssumedAvailable: false, + prodDeepSeekAssumedAvailable: false, + runtimeAdmissionUnchanged: true, }, + policyContract: submitPolicyContract(), }; } @@ -2495,6 +2594,7 @@ function codexSubmitTask(args: string[]): unknown { commands: { submitAsRequested: "remove --dry-run to submit exactly this payload", minimaxCandidate: `bun scripts/cli.ts codex submit --prompt-file <path> --model ${minimaxSubmitModel} --dry-run`, + deepseekCandidate: `bun scripts/cli.ts codex submit --prompt-file <path> --model ${deepseekSubmitModel} --dry-run`, gptCandidate: `bun scripts/cli.ts codex submit --prompt-file <path> --model ${gptSubmitModel} --dry-run`, }, }; diff --git a/scripts/src/commander.ts b/scripts/src/commander.ts new file mode 100644 index 00000000..be5cd535 --- /dev/null +++ b/scripts/src/commander.ts @@ -0,0 +1,317 @@ +const requiredDryRunMessage = "This first-phase commander contract only supports dry-run planning; live daemon/control operations are not implemented."; + +const highRiskActions = [ + "code-queue-backend-restart", + "code-queue-backend-rebuild", + "code-queue-execution-plane-restart", + "code-queue-task-interrupt", + "code-queue-task-cancel", + "prod-runtime-mutation", +] as const; + +type HighRiskAction = typeof highRiskActions[number]; + +function hasFlag(args: string[], flag: string): boolean { + return args.includes(flag); +} + +function optionValue(args: string[], name: string): string | undefined { + const index = args.indexOf(name); + if (index === -1) return undefined; + const value = args[index + 1]; + if (value === undefined || value.startsWith("--")) throw new Error(`${name} requires a value`); + return value; +} + +function isHelpToken(value: string | undefined): boolean { + return value === "help" || value === "--help" || value === "-h"; +} + +function isHighRiskAction(value: string): value is HighRiskAction { + return highRiskActions.some((action) => action === value); +} + +function commanderHelp(): Record<string, unknown> { + return { + command: "commander", + output: "json", + description: "First-phase source/contract stub for the host Codex commander control microservice; no daemon or live control action is implemented.", + usage: [ + "bun scripts/cli.ts commander contract", + "bun scripts/cli.ts commander plan --dry-run [--session-id id]", + "bun scripts/cli.ts commander approval request --action <action> --dry-run [--reason text] [--task-id id]", + ], + highRiskActions, + reference: "docs/reference/host-codex-commander.md", + }; +} + +export function commanderContract(): Record<string, unknown> { + return { + ok: true, + phase: "source-contract", + serviceId: "host-codex-commander", + currentImplementation: "cli-contract-stub-only", + daemonImplemented: false, + liveOperationsImplemented: false, + purpose: "Keep a host Codex commander session observable and controllable through a future direct-managed microservice without replacing Code Queue runners.", + ownershipBoundary: { + hostCodexProcess: "Long-lived Codex process on the master server host.", + controlMicroservice: "Future direct-managed bridge that records state, mediates PTY/stdio/SSH streams, injects prompts, and summarizes traces.", + codeQueue: "Remains the task execution plane; the commander only supervises through existing safe CLI/API contracts.", + claudeqq: "Approval and user-notification path for high-risk actions.", + }, + requiredCapabilities: [ + "host-codex-process-discovery", + "host-codex-start-plan", + "ssh-bridge-contract", + "pty-bridge-contract", + "stdio-bridge-contract", + "prompt-guidance-plan", + "trace-summary-plan", + "issue-20-board-read-write-entry", + "issue-46-brief-read-write-entry", + "claudeqq-high-risk-approval-entry", + ], + apiContract: { + health: "GET /health", + contract: "GET /api/commander/contract", + sessions: "GET /api/commander/sessions", + sessionPlan: "POST /api/commander/sessions/:sessionId/plan-start", + promptPlan: "POST /api/commander/sessions/:sessionId/prompt-plan", + traceSummary: "GET /api/commander/trace-summary?taskId=<taskId>", + issueWritePlan: "POST /api/commander/issues/:issueNumber/write-plan", + approvalRequest: "POST /api/commander/approvals", + }, + stateModel: { + sessionStates: ["unknown", "discovered", "planned", "starting", "running", "attention_required", "stopping", "stopped", "degraded"], + promptStates: ["draft", "planned", "queued_for_injection", "injected", "rejected", "failed"], + approvalStates: ["draft", "requested", "approved", "rejected", "expired", "consumed"], + storageRoot: ".state/commander/", + redactionPolicy: "Never persist or print token, secret, password, key, cookie, or authorization values in cleartext.", + }, + safetyBoundary: safetyBoundary(), + }; +} + +function safetyBoundary(): Record<string, unknown> { + return { + phaseOneMutationAllowed: false, + forbiddenWithoutExplicitUserApproval: highRiskActions, + alwaysForbidden: [ + "print-token-values", + "read-token-files-for-display", + "direct-database-state-patch", + "bypass-code-queue-backend-confirmation-policy", + "replace-code-queue-runner", + "deploy-or-restart-production-runtime-from-this-contract-stub", + ], + confirmationPolicy: "High-risk actions must draft a ClaudeQQ request, wait for explicit user approval, bind approval to one exact action, and record the decision before any future live executor may proceed.", + }; +} + +function processDiscoveryPlan(sessionId: string): Record<string, unknown> { + return { + sessionId, + mutation: false, + signals: [ + ".state/commander/sessions/<sessionId>.json", + "host process table filtered by executable and cwd markers", + "PTY/stdio bridge heartbeat file", + "last prompt/trace event sequence", + ], + startPlan: { + enabled: false, + commandShape: "codex <reviewed host profile> --cwd /workspace/unidesk", + supervisor: "future direct-managed microservice on the master server host", + reason: requiredDryRunMessage, + }, + }; +} + +function bridgePlan(): Record<string, unknown> { + return { + mutation: false, + adapters: { + ssh: { + purpose: "Reach provider hosts through existing UniDesk Host SSH / WSL SSH maintenance bridge.", + allowedUse: ["readonly diagnostics", "bounded reviewed maintenance commands", "future approved recovery commands"], + }, + pty: { + purpose: "Keep the host Codex interactive session alive and observable.", + allowedUse: ["windowed stdout/stderr capture", "prompt injection after policy checks", "heartbeat"], + }, + stdio: { + purpose: "Bridge non-interactive Codex or helper subprocesses with bounded transcript capture.", + allowedUse: ["contracted command execution", "trace collection", "structured result capture"], + }, + }, + guardrails: [ + "bounded output by default", + "redact secret-like values before state persistence", + "persist event sequence before acknowledging prompt injection", + "never open an interactive shell from dry-run commands", + ], + }; +} + +function issueEntryPlan(): Record<string, unknown> { + return { + mutation: false, + issues: [ + { + number: 20, + role: "Code Queue total board", + readEntrypoints: ["bun scripts/cli.ts gh issue board-audit --board-issue 20 --dry-run", "bun scripts/cli.ts gh issue board-row list --board-issue 20"], + writeEntrypoints: ["bun scripts/cli.ts gh issue board-row update <issueNumber> --board-issue 20 --field <field> --value <text> --expect-body-sha <sha>"], + }, + { + number: 46, + role: "daily commander brief", + readEntrypoints: ["bun scripts/cli.ts gh issue read 46 --json body,title,state,updatedAt"], + writeEntrypoints: ["bun scripts/cli.ts gh issue update 46 --body-profile commander-brief --body-file <file> --expect-updated-at <ts>"], + }, + ], + writeGuards: ["body-file-only", "dry-run-first", "body-sha-or-updated-at-required", "no-token-output"], + }; +} + +function traceSummaryPlan(): Record<string, unknown> { + return { + mutation: false, + sources: [ + "bounded host Codex PTY/stdio event window", + "Code Queue task trace via codex task --trace", + "Code Queue output pages via codex output", + "commander approval and prompt event JSONL", + ], + summaryShape: { + taskId: "string|null", + sessionId: "string", + lastSeq: "number", + status: "running|attention_required|blocked|terminal|unknown", + keyEvents: "bounded array", + openQuestions: "bounded array", + recommendedNextActions: "bounded array", + redactionsApplied: "number", + }, + outputPolicy: "Default summaries must omit raw transcript text unless explicitly requested by a future reviewed endpoint.", + }; +} + +function promptGuidancePlan(): Record<string, unknown> { + return { + mutation: false, + stages: [ + "classify intent and risk", + "attach current #20/#46/task context summary", + "apply forbidden-action guard", + "redact secret-like strings", + "persist prompt plan", + "inject only through future live executor after policy checks", + ], + highRiskEscalation: "code-queue restart/rebuild, task interrupt/cancel, production mutation, token access, and destructive Git operations require ClaudeQQ approval first.", + }; +} + +function commanderPlan(args: string[]): Record<string, unknown> { + if (!hasFlag(args, "--dry-run")) { + return { + ok: false, + error: "dry-run-required", + message: requiredDryRunMessage, + command: "bun scripts/cli.ts commander plan --dry-run", + }; + } + const sessionId = optionValue(args, "--session-id") ?? "primary"; + return { + ok: true, + phase: "source-contract", + mode: "dry-run", + mutation: false, + serviceId: "host-codex-commander", + processDiscovery: processDiscoveryPlan(sessionId), + bridge: bridgePlan(), + promptGuidance: promptGuidancePlan(), + traceSummary: traceSummaryPlan(), + issueEntries: issueEntryPlan(), + claudeqqApproval: { + mutation: false, + commandShape: "bun scripts/cli.ts commander approval request --action <action> --dry-run", + highRiskActions, + }, + safetyBoundary: safetyBoundary(), + }; +} + +function commanderApprovalRequest(args: string[]): Record<string, unknown> { + if (!hasFlag(args, "--dry-run")) { + return { + ok: false, + error: "dry-run-required", + message: requiredDryRunMessage, + command: "bun scripts/cli.ts commander approval request --action <action> --dry-run", + }; + } + const action = optionValue(args, "--action"); + if (action === undefined) { + return { + ok: false, + error: "validation-failed", + message: "--action is required", + highRiskActions, + }; + } + if (!isHighRiskAction(action)) { + return { + ok: false, + error: "validation-failed", + message: `unsupported high-risk action: ${action}`, + highRiskActions, + }; + } + const reason = optionValue(args, "--reason") ?? "operator-supplied reason required before live execution"; + const taskId = optionValue(args, "--task-id") ?? null; + return { + ok: true, + phase: "source-contract", + mode: "dry-run", + mutation: false, + action, + taskId, + reason, + requiresExplicitUserApproval: true, + claudeqq: { + mutation: false, + endpointShape: "POST /api/microservices/claudeqq/proxy/api/push/text", + target: "configured primary user private chat", + messageTemplate: `Approval required for ${action}. Reason: ${reason}. Reply with explicit approval id before execution.`, + sendImplemented: false, + }, + approvalRecordShape: { + id: "commander-approval-<stable-id>", + action, + taskId, + reason, + status: "draft", + approvedBy: null, + approvedAt: null, + expiresAt: "future reviewed timeout", + }, + blockedUntilApproved: [action], + }; +} + +export function runCommanderCommand(args: string[]): Record<string, unknown> { + const [sub, second] = args; + if (sub === undefined || isHelpToken(sub)) return commanderHelp(); + if (sub === "contract") return commanderContract(); + if (sub === "plan") return commanderPlan(args.slice(1)); + if (sub === "approval" && second === "request") return commanderApprovalRequest(args.slice(2)); + return { + ok: false, + error: "unsupported-command", + message: `Unsupported commander command: ${args.join(" ")}`, + help: commanderHelp(), + }; +} diff --git a/scripts/src/help.ts b/scripts/src/help.ts index b8cb5b7c..3aa0f5a1 100644 --- a/scripts/src/help.ts +++ b/scripts/src/help.ts @@ -44,6 +44,7 @@ export function rootHelp(): unknown { { command: "dev-env validate|prewarm-images", description: "Validate D601 unidesk-dev guardrails or prewarm dev foundation images into native k3s containerd through a bounded async job." }, { command: "artifact-registry plan|render|status|health|install|deploy-backend-core|deploy-service", description: "Manage the D601 host-managed CNCF Distribution registry and run pull-only artifact CD for supported services, including D601 direct, k3s-managed, and code-queue dev-only consumers." }, { command: "gh auth|issue|pr", description: "Run safe GitHub issue and PR CRUD/lifecycle operations through REST with body-file update replace/append, comment delete, token diagnostics, hard delete unsupported, and merge blocked." }, + { command: "commander contract|plan --dry-run|approval request --dry-run", description: "First-phase host Codex commander source/contract design stub; returns boundaries and approval plans without starting daemons or executing live control actions." }, { command: "code-agent-sandbox", description: "Independent Code Agent Sandbox service skeleton for adapter, mode, and credential-boundary diagnostics." }, { command: "schedule list|get|runs|run|retry-run|delete", description: "Manage backend-core scheduled tasks and run history; schedule run <id> supports --wait-ms N and retry-run reuses the failed run's schedule." }, { command: "schedule upsert-pgdata-backup [--time HH:MM] [--remote-base /SERVER_DATA/UNIDESK_PG_DATA]", description: "Create or update the daily PGDATA physical backup task that uploads monthly rotated archives to Baidu Netdisk." }, @@ -185,6 +186,26 @@ function providerHelp(): unknown { }; } +function commanderHelp(): unknown { + return { + command: "commander contract|plan|approval", + output: "json", + usage: [ + "bun scripts/cli.ts commander contract", + "bun scripts/cli.ts commander plan --dry-run [--session-id id]", + "bun scripts/cli.ts commander approval request --action <action> --dry-run [--reason text] [--task-id id]", + ], + description: "Inspect the first-phase source/contract design for the future host Codex commander microservice.", + boundary: [ + "phase one is contract-only and never starts a daemon", + "dry-run commands never open SSH, PTY, or stdio bridges", + "high-risk actions only produce a ClaudeQQ approval draft", + "token and secret values must never be printed", + ], + reference: "docs/reference/host-codex-commander.md", + }; +} + function scheduleHelp(): unknown { return { command: "schedule list|get|runs|run|retry-run|delete|upsert-pgdata-backup", @@ -340,6 +361,7 @@ export function staticNamespaceHelp(args: string[]): unknown | null { if (top === "microservice") return microserviceHelp(); if (top === "decision" || top === "decision-center") return decisionHelp(); if (top === "provider") return providerHelp(); + if (top === "commander") return commanderHelp(); if (top === "schedule") return scheduleHelp(); if (top === "codex") return codexHelp(); if (top === "job") return jobHelp();