From f86a75791bfdea1b5a88807021ccd1ce365242f9 Mon Sep 17 00:00:00 2001 From: Codex Date: Mon, 18 May 2026 08:38:17 +0000 Subject: [PATCH] refactor: use git-controlled dev ci runner --- AGENTS.md | 11 +- TEST.md | 4 +- config.json | 54 -- deploy.json | 10 +- docs/reference/ci.md | 18 +- docs/reference/cli.md | 10 +- docs/reference/codex-deploy.md | 12 +- docs/reference/deploy.md | 49 +- docs/reference/deployment.md | 6 +- docs/reference/dev-ci-runner.md | 93 +++ docs/reference/microservices.md | 6 +- scripts/bootstrap/devops-install.sh | 180 ----- scripts/ci/dev-e2e.sh | 215 +++++ scripts/src/ci.ts | 248 +++--- scripts/src/deploy.ts | 41 +- .../microservices/devops/Dockerfile | 20 - src/components/microservices/devops/go.mod | 3 - src/components/microservices/devops/main.go | 739 ------------------ .../k3sctl-adapter/docker-compose.d601.yml | 2 +- .../k3sctl-adapter/k3s/devops.k3s.json | 37 - .../k3sctl-adapter/k3s/devops.k8s.yaml | 171 ---- .../microservices/k3sctl-adapter/src/index.ts | 2 +- 22 files changed, 529 insertions(+), 1402 deletions(-) create mode 100644 docs/reference/dev-ci-runner.md delete mode 100755 scripts/bootstrap/devops-install.sh create mode 100755 scripts/ci/dev-e2e.sh delete mode 100644 src/components/microservices/devops/Dockerfile delete mode 100644 src/components/microservices/devops/go.mod delete mode 100644 src/components/microservices/devops/main.go delete mode 100644 src/components/microservices/k3sctl-adapter/k3s/devops.k3s.json delete mode 100644 src/components/microservices/k3sctl-adapter/k3s/devops.k8s.yaml diff --git a/AGENTS.md b/AGENTS.md index 9fe9535d..ecd472b3 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -37,10 +37,10 @@ UniDesk 是一个以主 server 为统一入口的分布式工作平台;本文 - `bun scripts/cli.ts microservice list/status/health/diagnostics/tunnel-self-test/proxy`:管理和验证挂载在主 server、计算节点 Docker 或 k3s 控制面上的用户服务,`proxy` 支持受控 JSON body,OA Event Flow/Todo Note/Baidu Netdisk/Code Queue Manager on main-server、k3s Control/Code Queue 执行面/MDTODO/Decision Center/FindJob/Pipeline/MET Nonlinear on D601 的规则见 `docs/reference/microservices.md`。 - `bun scripts/cli.ts decision upload/list/show/health`:通过 backend-core 用户服务代理上传会议记录/决议 Markdown、列出记录和查看详情;Decision Center 运行在 D601 k3s,规则见 `docs/reference/microservices.md`。 - `bun scripts/cli.ts decision diary import/list/months/show`:把带日期标题的工作日志 Markdown 拆成 `YYYY-MM/YYYY-MM-DD.md` 日记条目并写入 PostgreSQL,规则见 `docs/reference/microservices.md`。 -- `bun scripts/cli.ts deploy check/plan/apply [--file deploy.json|--env dev|prod] [--service ]`:按根目录 `deploy.json` 或 `origin/master:deploy.json#environments.` 的服务 repo 和 commit 期望状态校验或更新用户服务;维护通道直连 D601 只允许 `--env dev --service devops` 做 DevOps 自举/修复,backend-core/frontend/code-queue 等直管或代管微服务必须经 DevOps 控制面部署;规则见 `docs/reference/deploy.md`。 +- `bun scripts/cli.ts deploy check/plan/apply [--file deploy.json|--env dev|prod] [--service ]`:按根目录 `deploy.json` 或 `origin/master:deploy.json#environments.` 的服务 repo 和 commit 期望状态校验或更新用户服务;维护通道直连 D601 不得部署 backend-core/frontend/code-queue/Decision Center/k3sctl-adapter 等直管或代管微服务;规则见 `docs/reference/deploy.md`。 - `bun scripts/cli.ts dev-env validate [--manifest path] [--kubectl-dry-run]` / `dev-env prewarm-images`:离线校验 D601 `unidesk-dev` 生产隔离护栏,或把开发底座基础镜像预热到 D601 原生 k3s containerd,规则见 `docs/reference/deploy.md` 与 `docs/reference/microservices.md`。 -- `bun scripts/cli.ts ci install/status/run/run-dev-e2e/logs`:在 D601 原生 k3s 上安装和运行 Tekton CI,支持每 commit 检查、Code Queue 只读性能门禁和手动触发的 `master:deploy.json#environments.dev` 临时 namespace e2e,不部署 CD;规则见 `docs/reference/ci.md`。 -- `bun scripts/cli.ts codex deploy `:旧 Code Queue 兼容部署入口已禁用,原因是它会绕过 DevOps 控制面直连 D601 部署 Code Queue;后续 Code Queue 部署必须经 DevOps 控制面,规则见 `docs/reference/codex-deploy.md`。 +- `bun scripts/cli.ts ci install/status/run/run-dev-e2e/logs`:在 D601 原生 k3s 上安装和运行 Tekton CI,支持每 commit 检查、Code Queue 只读性能门禁和手动触发的 `origin/master:deploy.json#environments.dev` 临时 namespace e2e;`run-dev-e2e` 的 Git 控制 runner、短 launcher 和 no-CD 边界见 `docs/reference/dev-ci-runner.md`,Tekton 规则见 `docs/reference/ci.md`。 +- `bun scripts/cli.ts codex deploy `:旧 Code Queue 兼容部署入口已禁用,原因是它会绕过受控部署边界直连 D601 部署 Code Queue;规则见 `docs/reference/codex-deploy.md`。 - `bun scripts/cli.ts codex submit [prompt] [--prompt-file path|--prompt-stdin] [--queue ]`:通过 backend-core 私有代理提交 Code Queue 任务;控制面默认走主 server `code-queue-mgr` 写入 PostgreSQL,`--dry-run` 可只检查请求体不入队,规则见 `docs/reference/cli.md`。 - `bun scripts/cli.ts codex task `:按 Code Queue 任务 ID 查询初始 prompt、最后 assistant message、工具调用摘要、attempt/judge/error 和耗时,便于新任务引用历史 session。 - `bun scripts/cli.ts codex judge --attempt [--dry-run]`:按指定 task/attempt 用与队列 worker 相同的上下文构建和 MiniMax judge 调用路径单步复现完成判定;`--dry-run` 只输出 prompt/payload 诊断。 @@ -73,6 +73,7 @@ UniDesk 是一个以主 server 为统一入口的分布式工作平台;本文 - `docs/reference/pipeline-oa-event-flow.md`:Pipeline/OA 事件流、审核/无审核流转、单步调试、甘特图渲染和最终去残留规则。 - `docs/reference/pipeline-model-proxy.md`:Pipeline v2 model proxy 链路架构、D601 宿主 proxy 服务部署、harness token 注入规则和 smoke test 验证流程。 - `docs/reference/deploy.md`:`deploy.json` desired-state、target-side build、一次性构建 proxy、直管/代管服务部署 executor 和 live commit 验证规则。 -- `docs/reference/ci.md`:D601 k3s Tekton CI、只读主数据库性能门禁、DevOps 控制面和 CLI 入口规则。 -- `docs/reference/codex-deploy.md`:D601 Code Queue 旧 `codex deploy ` 入口禁用原因、DevOps 控制面迁移边界和后续 CD 目标行为。 +- `docs/reference/ci.md`:D601 k3s Tekton CI、只读主数据库性能门禁和 CLI 入口规则。 +- `docs/reference/dev-ci-runner.md`:`ci run-dev-e2e` 的 Git 控制 runner、短 launcher、结果目录和 no-CD 边界。 +- `docs/reference/codex-deploy.md`:D601 Code Queue 旧 `codex deploy ` 入口禁用原因、受控部署边界和后续 CD 目标行为。 - `reference`:兼容旧路径的符号链接,指向 `docs/reference/`。 diff --git a/TEST.md b/TEST.md index 1d62d38d..c6a17b14 100644 --- a/TEST.md +++ b/TEST.md @@ -103,7 +103,7 @@ ## T23 D601 Code Queue User Service -阅读 `AGENTS.md`(本项目 `AGENTS.md` 同时承担 `SKILL.md` 对 `scripts/cli.ts` 的解释职责),然后用 cli 手动测试以下内容:运行 `bun scripts/cli.ts microservice list`,确认 `code-queue-mgr` 显示为 `providerId=main-server`、`deployment.mode=internal-sidecar`、Compose 后端 `http://code-queue-mgr:4278`、`frontend.integrated=false`,并确认稳定 `code-queue` 条目说明队列管理/提交/历史/轻量 Trace 默认由主 server `code-queue-mgr` 负责,D601 k3s Code Queue 只负责 scheduler/runner/active run control 和执行态写回;使用 `bun scripts/cli.ts server rebuild code-queue-mgr` 重建主 server 控制面,再运行 `bun scripts/cli.ts microservice health code-queue-mgr`、`bun scripts/cli.ts microservice health code-queue`、`bun scripts/cli.ts microservice proxy code-queue '/api/tasks/overview?limit=5&transcriptLimit=1&compact=1&afterSeq=0&preferId='`、`bun scripts/cli.ts codex submit --dry-run --queue ` 和 `bun scripts/cli.ts codex task <已有taskId>`,确认普通控制/读取路径经 backend-core 分流到 master `code-queue-mgr`,返回 `role=master-control-plane`、`schemaReady=true`、PostgreSQL pool 上限、`noRunnerDependencies=true`、任务初始 prompt、最后 assistant message、工具调用摘要、attempt/judge/error 和耗时,不依赖 D601 `code-queue-write` ready endpoint。随后运行 `bun scripts/cli.ts codex deploy <已push的commitId>`,确认命令返回结构化错误并明确说明维护通道直连 D601 部署已禁用、Code Queue 部署必须经 DevOps 控制面,且不会返回异步部署 job id;再运行 `bun scripts/cli.ts deploy apply --service code-queue --dry-run --run-now` 可只做 would-deploy 预览,去掉 `--dry-run` 时必须在运行时变更前拒绝 D601 非 DevOps 直连部署。确认主 server 根目录 `docker-compose.yml` 中只存在 `code-queue-mgr` 而不存在执行面 `code-queue` service,并通过 `bun scripts/cli.ts ssh D601 argv bash -lc 'systemctl is-active k3s && KUBECONFIG=/etc/rancher/k3s/k3s.yaml kubectl get nodes -o wide && sudo ctr --address /run/k3s/containerd/containerd.sock -n k8s.io images ls | grep -F docker.io/rancher/mirrored-pause:3.6 && ! docker ps --format "{{.Names}} {{.Image}}" | grep -E "[[:space:]]rancher/k3s:" && ! docker ps --format "{{.Names}}" | grep -Fx code-queue-backend'` 或等价检查证明 D601 k3s 是 WSL 原生 systemd 服务、native containerd 已有正确 pause sandbox 镜像、没有 active `rancher/k3s` 控制面容器且旧 direct Docker `code-queue-backend` 没有并行运行。运行 `bun scripts/cli.ts microservice proxy k3sctl-adapter /api/control-plane --raw` 和执行面专属 `bun scripts/cli.ts microservice proxy code-queue /api/dev-ready --raw`,确认 D601 scheduler/read/write ready endpoint、`queue.storage.primary=postgres`、`queue.storage.postgresReady=true`、`queue.devReady.missingTools=[]`、`queue.devReady.docker.versionOk=true`、`queue.devReady.docker.composeOk=true`;`queue.devReady.ssh.ready` 只在需要跨 Provider SSH/Windows-native 任务时作为强制项。在 D601 active Code Queue Pod 内验证主 PostgreSQL 端口映射可执行 `select 1`,主 OA Event Flow 端口映射 `/health` 可访问,集群内 ClaudeQQ Service `http://claudeqq.unidesk.svc.cluster.local:3290/health` 可访问;这些映射不得成为任意公网入口。 +阅读 `AGENTS.md`(本项目 `AGENTS.md` 同时承担 `SKILL.md` 对 `scripts/cli.ts` 的解释职责),然后用 cli 手动测试以下内容:运行 `bun scripts/cli.ts microservice list`,确认 `code-queue-mgr` 显示为 `providerId=main-server`、`deployment.mode=internal-sidecar`、Compose 后端 `http://code-queue-mgr:4278`、`frontend.integrated=false`,并确认稳定 `code-queue` 条目说明队列管理/提交/历史/轻量 Trace 默认由主 server `code-queue-mgr` 负责,D601 k3s Code Queue 只负责 scheduler/runner/active run control 和执行态写回;使用 `bun scripts/cli.ts server rebuild code-queue-mgr` 重建主 server 控制面,再运行 `bun scripts/cli.ts microservice health code-queue-mgr`、`bun scripts/cli.ts microservice health code-queue`、`bun scripts/cli.ts microservice proxy code-queue '/api/tasks/overview?limit=5&transcriptLimit=1&compact=1&afterSeq=0&preferId='`、`bun scripts/cli.ts codex submit --dry-run --queue ` 和 `bun scripts/cli.ts codex task <已有taskId>`,确认普通控制/读取路径经 backend-core 分流到 master `code-queue-mgr`,返回 `role=master-control-plane`、`schemaReady=true`、PostgreSQL pool 上限、`noRunnerDependencies=true`、任务初始 prompt、最后 assistant message、工具调用摘要、attempt/judge/error 和耗时,不依赖 D601 `code-queue-write` ready endpoint。随后运行 `bun scripts/cli.ts codex deploy <已push的commitId>`,确认命令返回结构化错误并明确说明维护通道直连 D601 部署已禁用,且不会返回异步部署 job id;再运行 `bun scripts/cli.ts deploy apply --service code-queue --dry-run --run-now` 可只做 would-deploy 预览,去掉 `--dry-run` 时必须在运行时变更前拒绝 D601 直连部署。确认主 server 根目录 `docker-compose.yml` 中只存在 `code-queue-mgr` 而不存在执行面 `code-queue` service,并通过 `bun scripts/cli.ts ssh D601 argv bash -lc 'systemctl is-active k3s && KUBECONFIG=/etc/rancher/k3s/k3s.yaml kubectl get nodes -o wide && sudo ctr --address /run/k3s/containerd/containerd.sock -n k8s.io images ls | grep -F docker.io/rancher/mirrored-pause:3.6 && ! docker ps --format "{{.Names}} {{.Image}}" | grep -E "[[:space:]]rancher/k3s:" && ! docker ps --format "{{.Names}}" | grep -Fx code-queue-backend'` 或等价检查证明 D601 k3s 是 WSL 原生 systemd 服务、native containerd 已有正确 pause sandbox 镜像、没有 active `rancher/k3s` 控制面容器且旧 direct Docker `code-queue-backend` 没有并行运行。运行 `bun scripts/cli.ts microservice proxy k3sctl-adapter /api/control-plane --raw` 和执行面专属 `bun scripts/cli.ts microservice proxy code-queue /api/dev-ready --raw`,确认 D601 scheduler/read/write ready endpoint、`queue.storage.primary=postgres`、`queue.storage.postgresReady=true`、`queue.devReady.missingTools=[]`、`queue.devReady.docker.versionOk=true`、`queue.devReady.docker.composeOk=true`;`queue.devReady.ssh.ready` 只在需要跨 Provider SSH/Windows-native 任务时作为强制项。在 D601 active Code Queue Pod 内验证主 PostgreSQL 端口映射可执行 `select 1`,主 OA Event Flow 端口映射 `/health` 可访问,集群内 ClaudeQQ Service `http://claudeqq.unidesk.svc.cluster.local:3290/health` 可访问;这些映射不得成为任意公网入口。 随后登录公网 frontend `http://74.48.78.17:18081/`,进入 `用户服务 / Code Queue`,确认页面显示默认模型 `gpt-5.5`、默认执行 Provider `D601`、默认工作目录 `/workspace`、模型下拉菜单包含 `gpt-5.4-mini`/`gpt-5.4`/`gpt-5.5`、入队份数、队列指标、任务 ID、复制任务 ID、引用按钮、任务耗时、引用任务 ID、清空输入、创建成功提示、任务提交表单、Trace 输出、attempt 表、MiniMax/fallback judge 状态、追加 prompt、打断和重试控件;通过页面提交一个小任务,确认任务进入 queued/running/succeeded 或可解释的 failed 状态,并且输出区能看到运行中的 Codex 消息。批量验收时设置 `入队份数=5` 或用 `---` 分隔 5 段 prompt,一次性入队 5 条任务,确认 5 条任务按顺序运行并全部进入 succeeded 或可解释的非成功终态,不能只运行第一条后停止;其中任一任务被 judge 判定 `fail` 时只能把当前任务标为 failed,后续 queued 任务仍必须继续推进。测试异常中断时可以提交长任务后点击 `打断`,确认任务变为 canceled 或被 judge 标记为非成功终态;自动重试只应在服务端/传输异常、任务正常结束但 execution record 显示未完成、或 judge 判定 retry 时发生;retry 必须复用已有 Codex thread 并 append 继续执行 prompt,只有当前任务 complete 后才推进队列中的下一个任务。MiniMax judge 必须能处理 Markdown fence/夹杂文本等 JSON 去噪;若去噪后仍失败,必须把解析错误和上一轮去噪前原始回答反馈给 MiniMax 修复后重试,日志中应出现 `judge_json_parse_retry`,且 repair 成功时仍以 `source=minimax` 返回。Codex provider key 只能通过 `OPENAI_API_KEY`、`CRS_OAI_KEY` 这类运行时环境透传,MiniMax API key 只能通过 D601 env-file 运行时环境传入,禁止写入 `config.json`、Dockerfile、源码或测试文档。 @@ -113,7 +113,7 @@ ## T23B D601 Decision Center User Service -阅读 `AGENTS.md` 和 `docs/reference/microservices.md`,运行 `bun scripts/cli.ts microservice list`,确认 `decision-center` 显示为 `providerId=D601`、`public=false`、`frontendOnly=true`、仓库 URL `https://github.com/pikasTech/unidesk`、k3s/k8s `k3s://unidesk/decision-center:4277` 逻辑服务映射、`deployment.mode=k3sctl-managed`、`runtime.orchestrator=k3sctl` 且无业务直连容器摘要;运行 `bun scripts/cli.ts deploy apply --service decision-center --run-now`,确认命令在运行时变更前返回结构化错误,说明维护通道直连 D601 只允许部署 DevOps;Decision Center 后续版本部署必须经 DevOps 控制面。随后运行 `bun scripts/cli.ts microservice health decision-center`,确认 `service=decision-center`、`storage=postgres`、`schemaReady=true` 且 health 中包含 `diaryEntryCount`;准备一份临时 Markdown 会议记录,运行 `bun scripts/cli.ts decision upload --title --type meeting --level G1 --status active --evidence <url>`,再运行 `bun scripts/cli.ts decision list` 和 `bun scripts/cli.ts decision show <id>`,确认 CLI 只通过 backend-core 用户服务代理访问,返回结构化 JSON 且能看到刚上传的记录。再准备一份包含 `# 2026年5月1日` 和 `# 2026年5月2日` 的临时工作日志 Markdown,运行 `bun scripts/cli.ts decision diary import <markdown-file> --source-file test-work-log.md --tag e2e`、`bun scripts/cli.ts decision diary months`、`bun scripts/cli.ts decision diary list --month 2026-05` 和 `bun scripts/cli.ts decision diary show 2026-05-01`,确认日记按 `YYYY-MM/YYYY-MM-DD.md` 虚拟路径拆分、写入 PostgreSQL 且重复导入幂等。最后登录公网 frontend `http://74.48.78.17:18081/`,进入 `用户服务 / Decision Center`,确认页面显示 G0/G1 目标、P0/P1 Blocker、停放事项、最近会议/决议、筛选、全部记录表和工作日记标签;日记标签可按月筛选并查看单日 Markdown 正文;页面不得提供聊天/LLM 会话窗口,默认不得裸 JSON,完整 JSON 只能通过 `查看原始JSON` 打开。 +阅读 `AGENTS.md` 和 `docs/reference/microservices.md`,运行 `bun scripts/cli.ts microservice list`,确认 `decision-center` 显示为 `providerId=D601`、`public=false`、`frontendOnly=true`、仓库 URL `https://github.com/pikasTech/unidesk`、k3s/k8s `k3s://unidesk/decision-center:4277` 逻辑服务映射、`deployment.mode=k3sctl-managed`、`runtime.orchestrator=k3sctl` 且无业务直连容器摘要;运行 `bun scripts/cli.ts deploy apply --service decision-center --run-now`,确认命令在运行时变更前返回结构化错误,说明维护通道直连 D601 不承担服务部署;Decision Center 后续版本部署必须经未来受控 target-side CD 路径。随后运行 `bun scripts/cli.ts microservice health decision-center`,确认 `service=decision-center`、`storage=postgres`、`schemaReady=true` 且 health 中包含 `diaryEntryCount`;准备一份临时 Markdown 会议记录,运行 `bun scripts/cli.ts decision upload <markdown-file> --title <title> --type meeting --level G1 --status active --evidence <url>`,再运行 `bun scripts/cli.ts decision list` 和 `bun scripts/cli.ts decision show <id>`,确认 CLI 只通过 backend-core 用户服务代理访问,返回结构化 JSON 且能看到刚上传的记录。再准备一份包含 `# 2026年5月1日` 和 `# 2026年5月2日` 的临时工作日志 Markdown,运行 `bun scripts/cli.ts decision diary import <markdown-file> --source-file test-work-log.md --tag e2e`、`bun scripts/cli.ts decision diary months`、`bun scripts/cli.ts decision diary list --month 2026-05` 和 `bun scripts/cli.ts decision diary show 2026-05-01`,确认日记按 `YYYY-MM/YYYY-MM-DD.md` 虚拟路径拆分、写入 PostgreSQL 且重复导入幂等。最后登录公网 frontend `http://74.48.78.17:18081/`,进入 `用户服务 / Decision Center`,确认页面显示 G0/G1 目标、P0/P1 Blocker、停放事项、最近会议/决议、筛选、全部记录表和工作日记标签;日记标签可按月筛选并查看单日 Markdown 正文;页面不得提供聊天/LLM 会话窗口,默认不得裸 JSON,完整 JSON 只能通过 `查看原始JSON` 打开。 ## T24 MET Nonlinear D601 GPU User Service diff --git a/config.json b/config.json index 8061f195..64587ef2 100644 --- a/config.json +++ b/config.json @@ -807,60 +807,6 @@ ], "activeNodeId": "D601" } - }, - { - "id": "devops", - "name": "DevOps Control", - "providerId": "D601", - "description": "DevOps Control 是 D601 k3s 代管的轻量 CI/CD 控制面,负责常态触发 master deploy.json dev namespace e2e、查询 PipelineRun 状态和日志;provider-gateway SSH 只保留 bootstrap 和故障维护。", - "repository": { - "url": "https://github.com/pikasTech/unidesk", - "commitId": "local", - "dockerfile": "src/components/microservices/devops/Dockerfile", - "composeFile": "src/components/microservices/k3sctl-adapter/k3s/devops.k3s.json", - "composeService": "devops", - "containerName": "k3s:devops" - }, - "backend": { - "nodeBaseUrl": "k3s://devops", - "nodeBindHost": "k3s://unidesk-ci/devops", - "nodePort": 4286, - "proxyMode": "k3sctl-adapter-http", - "frontendOnly": true, - "public": false, - "allowedMethods": [ - "GET", - "HEAD", - "POST" - ], - "allowedPathPrefixes": [ - "/health", - "/live", - "/logs", - "/api/" - ], - "healthPath": "/health", - "timeoutMs": 120000 - }, - "development": { - "providerId": "D601", - "sshPassthrough": true, - "worktreePath": "/home/ubuntu/.unidesk/devops-deploy" - }, - "frontend": { - "route": "/apps/devops", - "integrated": false - }, - "deployment": { - "mode": "k3sctl-managed", - "adapterServiceId": "k3sctl-adapter", - "k3sServiceId": "devops", - "namespace": "unidesk-ci", - "expectedNodeIds": [ - "D601" - ], - "activeNodeId": "D601" - } } ], "paths": { diff --git a/deploy.json b/deploy.json index 06bb9480..979128f6 100644 --- a/deploy.json +++ b/deploy.json @@ -71,6 +71,11 @@ ] }, "dev": { + "ci": { + "repo": "https://github.com/pikasTech/unidesk", + "scriptPath": "scripts/ci/dev-e2e.sh", + "timeoutMs": 1800000 + }, "services": [ { "id": "backend-core", @@ -86,11 +91,6 @@ "id": "code-queue", "repo": "https://github.com/pikasTech/unidesk", "commitId": "b265274" - }, - { - "id": "devops", - "repo": "https://github.com/pikasTech/unidesk", - "commitId": "b265274" } ] } diff --git a/docs/reference/ci.md b/docs/reference/ci.md index a1ca0956..619ff294 100644 --- a/docs/reference/ci.md +++ b/docs/reference/ci.md @@ -1,6 +1,6 @@ # UniDesk CI On D601 k3s -UniDesk CI is hosted on the D601 native k3s cluster with Tekton Pipelines and Tekton Triggers. It is CI only. CD remains separate from Tekton; D601 service deployment must go through the DevOps control plane, while maintenance-channel direct D601 apply is reserved for DevOps bootstrap/repair. No Tekton task may roll out production services. +UniDesk CI is hosted on the D601 native k3s cluster with Tekton Pipelines and Tekton Triggers. It is CI only. CD remains separate from Tekton. No Tekton task may roll out production services. ## Components @@ -9,9 +9,7 @@ UniDesk CI is hosted on the D601 native k3s cluster with Tekton Pipelines and Te - UniDesk CI namespace: `unidesk-ci`. - Manifests: `src/components/microservices/k3sctl-adapter/k3s/ci/`. - CLI entry: `bun scripts/cli.ts ci install|status|run|run-dev-e2e|logs`. -- DevOps control service: `src/components/microservices/devops`, normally installed in `unidesk-ci` and reached through the k3s service-proxy path. - -Bootstrap and recovery may reach D601 through backend-core `/api/dispatch` with the existing `host.ssh` provider capability, then run native `KUBECONFIG=/etc/rancher/k3s/k3s.yaml kubectl ...` on D601. That maintenance path is limited to DevOps bootstrap/repair and CI bootstrap checks; it must not deploy backend-core, frontend, Code Queue, Decision Center, k3sctl-adapter or other direct/managed microservices. Normal CI/CD control should move to `CLI -> backend-core -> k3sctl-adapter -> DevOps -> Kubernetes API/Tekton` after DevOps is healthy. No new public port is exposed. +- Dev namespace e2e runner: `bun scripts/cli.ts ci run-dev-e2e`; authoritative runner path, manifest contract and safety boundary are in `docs/reference/dev-ci-runner.md`. ## Pipeline Scope @@ -23,7 +21,7 @@ Each commit CI run performs: - Temporary `code-queue-ci-read` Deployment and ClusterIP Service in `unidesk-ci`. - Code Queue read performance checks against the production PostgreSQL through `d601-tcp-egress-gateway`. -`ci install` also prewarms the D601 k3s containerd runtime with the Tekton entrypoint/workingdir helper images, `oven/bun:1-debian`, `alpine/git:2.45.2` and `unidesk-code-queue:dev`. Missing images are pulled through the node-local provider-gateway WS egress proxy and then imported into native k3s containerd with digests preserved, so PipelineRun pods do not hang on external registry pulls. +`ci install` also prewarms the D601 k3s containerd runtime with the Tekton entrypoint/workingdir helper images, `oven/bun:1-debian`, `alpine/git:2.45.2` and `unidesk-code-queue:dev`. Missing images are pulled through the node-local provider-gateway WS egress proxy and then imported into native k3s containerd with digests preserved, so PipelineRun pods do not hang on external registry pulls. Sustained pull throughput below 1 MB/s is treated as a provider/main-server network or proxy degradation first, not as a Dockerfile or application failure. Git clone and dependency downloads inside the repo check task use `d601-provider-egress-proxy.unidesk.svc.cluster.local:18789`; the NO_PROXY list keeps the in-cluster read service, D601 TCP egress gateway and any in-cluster CI Git mirror on the cluster network. @@ -45,13 +43,9 @@ This means the CI service can read existing tasks, Trace summaries, Trace steps ## Dev Namespace E2E -`ci run-dev-e2e` is the manual CI entry for the dev desired-state smoke flow. The CLI fetches `origin/master:deploy.json`, reads `environments.dev`, records the `origin/master` commit that supplied the manifest, then normally calls DevOps through the existing microservice proxy to create a Tekton `PipelineRun`. The Pipeline verifies that the in-cluster Git fetch sees the same master commit before it reads `deploy.json`. +`ci run-dev-e2e` is the manual dev desired-state smoke flow. The single authoritative reference for its Git-controlled runner script, short launcher, result directory and no-CD boundary is `docs/reference/dev-ci-runner.md`. -`ci run-dev-e2e --direct` is reserved for CI bootstrap and recovery when DevOps is not healthy yet. It creates only the CI PipelineRun through the maintenance Host SSH path, does not deploy any direct/managed microservice, and must not become the normal CI control path. - -The first CI stage creates a temporary namespace named `unidesk-ci-e2e-<run-id>`, stores the selected desired manifest in a ConfigMap, starts an in-namespace smoke target, calls its `/health` endpoint through the Kubernetes Service DNS name, verifies the dev service commit IDs carried into the target, and deletes the namespace unless `--keep-namespace` is set. This stage proves the manual trigger, master desired-state pinning, namespace lifecycle, in-cluster Service DNS and e2e result path without mutating `unidesk`, `unidesk-dev`, production PostgreSQL, or any production workload. - -The current dev namespace e2e is a harness and smoke gate, not a full frontend/backend/code-queue stack rollout. Full-stack temporary namespace deployment can be added behind the same `run-dev-e2e` command after image build/import and per-run database bootstrap are promoted into CI. +The current dev namespace e2e is a harness and smoke gate, not a full frontend/backend/code-queue stack rollout. Full-stack temporary namespace deployment can be added behind the same command only after image build/import and per-run database bootstrap are promoted into a controlled deployment design. ## Performance Gate @@ -94,7 +88,7 @@ bun scripts/cli.ts ci run-dev-e2e --wait-ms 600000 Inspect a run: ```bash -bun scripts/cli.ts ci logs <pipelineRunName> +bun scripts/cli.ts ci logs <runId> ``` ## Trigger Boundary diff --git a/docs/reference/cli.md b/docs/reference/cli.md index 4a46f6d3..e67d620f 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -22,11 +22,11 @@ UniDesk 的统一 CLI 入口是根目录 `scripts/cli.ts`,运行方式固定 - `microservice list/status/health/diagnostics/tunnel-self-test/proxy` 通过 backend-core 内网 API 管理挂载在计算节点 Docker 或 k3s 控制面中的用户服务(底层命令名仍为 microservice);`health`、`diagnostics`、`tunnel-self-test` 和 `proxy` 会走真实 backend-core -> provider-gateway 或 k3sctl-adapter -> 节点服务链路,`proxy` 支持受控 JSON 请求体并对超大响应 body 默认输出有界预览,规则见 `docs/reference/microservices.md`。 - `decision upload/list/show/health` 通过 backend-core 用户服务代理访问 D601 k3s Decision Center,用于上传会议记录/决议 Markdown、列出权威记录、查看详情和健康检查;它不得直连 D601 Service、NodePort 或 provider-gateway 业务 HTTP。 - `decision diary import <markdown-file>` 将带 `# YYYY年M月D日`、`# YYYY-MM-DD` 或 `# YYYY/M/D` 标题的工作日志拆成每天一篇 Markdown 日记,按 `YYYY-MM/YYYY-MM-DD.md` 虚拟路径写入 Decision Center PostgreSQL;`decision diary list/months/show` 分别用于按月/日期查询、列出月份和查看单日正文。 -- `deploy check/plan/apply` 默认从根目录 `deploy.json` 读取服务 repo 与 commit 期望状态,join `config.json` 和现有 manifest 后使用 target-side build 单一路径校验或更新直管服务与 k3s 代管服务;`deploy plan --env dev|prod` 只从 `origin/master:deploy.json#environments.<env>` 读取 manifest 并输出 dry-run 环境计划,不使用本地 dirty worktree;维护通道直连 D601 apply 只允许 `--env dev --service devops` 做 DevOps 自举/修复,backend-core/frontend/code-queue 等 dev 服务必须经 DevOps 控制面部署,`--env prod` apply 仍禁用;规则见 `docs/reference/deploy.md`。 +- `deploy check/plan/apply` 默认从根目录 `deploy.json` 读取服务 repo 与 commit 期望状态,join `config.json` 和现有 manifest 后使用 target-side build 单一路径校验或更新已支持目标;`deploy plan --env dev|prod` 只从 `origin/master:deploy.json#environments.<env>` 读取 manifest 并输出 dry-run 环境计划,不使用本地 dirty worktree;当前 `deploy apply --env dev` 不做 D601 服务 rollout,dev desired-state smoke 使用 `ci run-dev-e2e`;规则见 `docs/reference/deploy.md` 和 `docs/reference/dev-ci-runner.md`。 - `dev-env validate [--manifest path] [--kubectl-dry-run]` 离线校验 D601 `unidesk-dev` namespace、dev PostgreSQL 底座和 dev workload manifest。默认检查 `src/components/microservices/k3sctl-adapter/k3s/dev/unidesk-dev-foundation.k8s.yaml`;也可显式校验 `src/components/microservices/k3sctl-adapter/k3s/dev/unidesk-dev-core.k8s.yaml` 或 `src/components/microservices/k3sctl-adapter/k3s/dev/unidesk-dev-code-queue.k8s.yaml`。所有 namespaced 对象必须只落到 `unidesk-dev`,foundation manifest 必须包含 `postgres-dev` StatefulSet/Service、dev secret/config、迁移 Job 和 DB URL guard,core manifest 必须包含 `backend-core-dev`/`frontend-dev` Deployment/Service,Code Queue dev manifest 必须包含 `code-queue-scheduler-dev`、`code-queue-read-dev`、`code-queue-write-dev` 和 dev provider egress proxy。加 `--kubectl-dry-run` 时额外执行 `kubectl apply --dry-run=client --validate=false -f <manifest>`,仍不 apply 资源。 - `dev-env prewarm-images [--image image] [--provider-id D601] [--no-pull] [--proxy-url URL] [--pull-timeout-ms N] [--dry-run]` 创建异步 job,通过 UniDesk SSH 维护桥在 D601 上把开发底座依赖镜像从 Docker 缓存导入原生 k3s containerd。默认镜像是 `postgres:16-alpine` 和 `rancher/mirrored-library-busybox:1.36.1`,用于避免 `postgres-dev` 与 local-path helper pod 卡在外部 registry 拉取。该命令固定验证 `/etc/rancher/k3s/k3s.yaml` 指向的 native k3s 上下文,并输出 `dev_env_containerd_image_ready=...` 作为成功判据;它不 apply manifest、不修改生产 `unidesk` namespace。 -- `ci install|status|run|run-dev-e2e|logs` 管理 D601 原生 k3s 上的 Tekton CI。`run` 手动创建每 commit 检查和 Code Queue 只读性能门禁;`run-dev-e2e` 手动读取 `origin/master:deploy.json#environments.dev`,创建临时 `unidesk-ci-e2e-*` namespace,验证 dev desired manifest、临时 Service DNS 和 smoke e2e 结果,默认清理 namespace,不修改 `unidesk`、`unidesk-dev` 或生产数据库;规则见 `docs/reference/ci.md`。 -- `codex deploy <commitId>` 是旧 Code Queue 兼容部署入口,已禁用以防止维护通道直连 D601 部署 Code Queue;后续 Code Queue 部署必须经 DevOps 控制面,详细规则见 `docs/reference/codex-deploy.md`。 +- `ci install|status|run|run-dev-e2e|logs` 管理 D601 原生 k3s 上的 Tekton CI。`run` 手动创建每 commit 检查和 Code Queue 只读性能门禁;`run-dev-e2e` 读取 `origin/master:deploy.json#environments.dev.ci`,发送短 launcher,让 D601 从同一 manifest commit 拉取 `scripts/ci/*.sh` runner 并创建临时 smoke namespace;规则见 `docs/reference/ci.md` 和 `docs/reference/dev-ci-runner.md`。 +- `codex deploy <commitId>` 是旧 Code Queue 兼容部署入口,已禁用以防止维护通道直连 D601 部署 Code Queue;当前 dev 自动化只做 `ci run-dev-e2e` smoke,不提供 Code Queue CD,详细规则见 `docs/reference/codex-deploy.md`。 - `codex submit [prompt] [--prompt-file path|--prompt-stdin] [--queue queueId] [--provider-id id] [--cwd path] [--model model] [--reasoning-effort effort] [--execution-mode mode] [--max-attempts N] [--reference-task-id id] [--dry-run]` 通过 backend-core 私有代理向稳定 `code-queue` 用户服务路径提交任务;prompt 必须且只能来自位置参数、文件或 stdin 之一,`--dry-run` 只返回结构化请求且不实际入队。提交确认和 dry-run 必须返回完整 prompt、字符数和 `truncated=false`,不能套用任务详情的预览截断策略,否则长任务 prompt 无法被人工验收。backend-core 默认把提交、队列 CRUD、已读状态、历史摘要和轻量 Trace 读取分流到主 server `code-queue-mgr`,由它写入主 PostgreSQL;D601 scheduler 只轮询并执行已入库任务。 - `codex task <taskId>` 通过 Code Queue 私有代理按任务 ID 查询结构化执行摘要;默认只返回有界 prompt/response 预览、执行 Provider、工作目录、最后 assistant message、最近工具调用摘要、attempt、judge、错误、耗时和 trace 翻页提示,适合在新队列任务中引用历史 session 且避免噪声爆炸。该摘要读取默认由主 server `code-queue-mgr` 从 PostgreSQL 返回,不依赖 D601 `code-queue-read` Service 可用。 - `codex task <taskId> --trace --tail|--from-start|--after-seq N|--before-seq N --limit N` 按页拉取 Code Queue 的逻辑 trace;响应会返回 `nextAfterSeq`、`previousBeforeSeq`、`hasMore`、`hasBefore` 和下一页/上一页命令,默认 `--trace` 取最新一页,需要完整 prompt/最后 response 时加 `--full`。 @@ -43,9 +43,9 @@ UniDesk 的统一 CLI 入口是根目录 `scripts/cli.ts`,运行方式固定 长时操作采用 Fire-and-Forget 模式:CLI 创建 `.state/jobs/{jobId}.json`,后台进程执行真实命令,并将 stdout、stderr 分别写入 `.state/jobs/{jobId}.stdout.log` 与 `.state/jobs/{jobId}.stderr.log`。调用者通过 `bun scripts/cli.ts job status <jobId>` 查询进度和尾部输出。 -`server rebuild` 与 `server start`、`server stop` 一样必须通过返回的 job id 确认结果;不要把连续 `server rebuild` 命令理解成“前一个重建已完成”,因为两个命令只是在快速创建异步 job。重建 frontend 的标准流程是运行 `bun scripts/cli.ts server rebuild frontend`,随后轮询 `bun scripts/cli.ts job status <jobId>` 到 `succeeded`,再用 `server status` 或 `e2e run` 验证公网 frontend;重建 Todo Note 后端使用 `bun scripts/cli.ts server rebuild todo-note`,随后用 `microservice health todo-note` 和 `microservice proxy todo-note /api/instances` 验证;重建 Code Queue Manager 使用 `bun scripts/cli.ts server rebuild code-queue-mgr`,随后用 `microservice health code-queue-mgr`、`microservice health code-queue` 和 `codex submit --dry-run` 验证主 server 控制面路径;重建 Project Manager 后端使用 `bun scripts/cli.ts server rebuild project-manager`,随后用 `microservice health project-manager` 和 `microservice proxy project-manager /api/projects` 验证;重建 Baidu Netdisk 后端使用 `bun scripts/cli.ts server rebuild baidu-netdisk`,随后用 `microservice health baidu-netdisk` 和 `microservice proxy baidu-netdisk /api/transfers` 验证;重建 OA Event Flow 后端使用 `bun scripts/cli.ts server rebuild oa-event-flow`,随后用 `microservice health oa-event-flow` 和 `microservice proxy oa-event-flow /api/diagnostics` 验证。D601 Code Queue 执行面和 Decision Center 后端由 D601 k3s/k8s 控制面代管,但不得再通过维护通道直连 D601 做部署;除 DevOps 自举/修复外,D601 直管或代管微服务必须由 DevOps 控制面执行部署、rollout 和 live commit 验证。不得把 `docker rm` 手工兜底当成正式交付步骤。 +`server rebuild` 与 `server start`、`server stop` 一样必须通过返回的 job id 确认结果;不要把连续 `server rebuild` 命令理解成“前一个重建已完成”,因为两个命令只是在快速创建异步 job。重建 frontend 的标准流程是运行 `bun scripts/cli.ts server rebuild frontend`,随后轮询 `bun scripts/cli.ts job status <jobId>` 到 `succeeded`,再用 `server status` 或 `e2e run` 验证公网 frontend;重建 Todo Note 后端使用 `bun scripts/cli.ts server rebuild todo-note`,随后用 `microservice health todo-note` 和 `microservice proxy todo-note /api/instances` 验证;重建 Code Queue Manager 使用 `bun scripts/cli.ts server rebuild code-queue-mgr`,随后用 `microservice health code-queue-mgr`、`microservice health code-queue` 和 `codex submit --dry-run` 验证主 server 控制面路径;重建 Project Manager 后端使用 `bun scripts/cli.ts server rebuild project-manager`,随后用 `microservice health project-manager` 和 `microservice proxy project-manager /api/projects` 验证;重建 Baidu Netdisk 后端使用 `bun scripts/cli.ts server rebuild baidu-netdisk`,随后用 `microservice health baidu-netdisk` 和 `microservice proxy baidu-netdisk /api/transfers` 验证;重建 OA Event Flow 后端使用 `bun scripts/cli.ts server rebuild oa-event-flow`,随后用 `microservice health oa-event-flow` 和 `microservice proxy oa-event-flow /api/diagnostics` 验证。D601 Code Queue 执行面和 Decision Center 后端由 D601 k3s/k8s 控制面代管,但当前不得通过维护通道直连 D601 做部署;正式 CD 控制路径另行设计,当前只允许 `ci run-dev-e2e` 启动一次性 smoke runner。不得把 `docker rm` 手工兜底当成正式交付步骤。 -新部署入口优先使用 `deploy apply`,但 D601 维护直连 apply 只服务 DevOps 自举/修复。旧的 `codex deploy` 已禁用;后续 Code Queue、Decision Center、backend-core dev、frontend dev 等 D601 服务部署应收敛到 DevOps 控制面:从 remote commit 导出源码,在目标节点一次性代理构建镜像,部署后用 live commit 校验证明不是旧服务。 +新部署入口优先使用 `deploy apply`,但当前 D601 维护直连 apply 不承担服务部署。旧的 `codex deploy` 已禁用;后续 Code Queue、Decision Center、backend-core dev、frontend dev 等 D601 服务部署应收敛到一条受控 target-side CD 路径:从 remote commit 导出源码,在目标节点一次性代理构建镜像,部署后用 live commit 校验证明不是旧服务。 ## Output Contract diff --git a/docs/reference/codex-deploy.md b/docs/reference/codex-deploy.md index 3857bdfb..3fbec6c0 100644 --- a/docs/reference/codex-deploy.md +++ b/docs/reference/codex-deploy.md @@ -1,8 +1,8 @@ # Code Queue Deploy -`bun scripts/cli.ts codex deploy <commitId>` 是旧兼容入口,现已禁用。原因是它会通过 backend-core `host.ssh` 维护通道直连 D601 部署 Code Queue,绕过 DevOps 控制面;维护通道直连 D601 现在只允许部署或修复 DevOps 本身。 +`bun scripts/cli.ts codex deploy <commitId>` 是旧兼容入口,现已禁用。原因是它会通过 backend-core `host.ssh` 维护通道直连 D601 部署 Code Queue,把维护入口扩张成第二套部署系统。 -Code Queue 后续正式部署必须由 DevOps 控制面执行:CLI 读取 `origin/master:deploy.json#environments.dev` 或生产 desired-state 后,经 backend-core、k3sctl-adapter 和 DevOps 触发 target-side build、k3s image import、rollout、stamp 和 live commit 验证。 +Code Queue 后续正式部署必须走一条受控 target-side CD 路径:读取 `origin/master:deploy.json#environments.dev` 或生产 desired-state,在目标节点执行 source fetch、build、k3s image import、rollout、stamp 和 live commit 验证。当前阶段不提供 Code Queue CD;只提供 `ci run-dev-e2e` dev smoke runner,规则见 `docs/reference/dev-ci-runner.md`。 ## Command @@ -10,13 +10,13 @@ Code Queue 后续正式部署必须由 DevOps 控制面执行:CLI 读取 `orig bun scripts/cli.ts codex deploy <commitId> ``` -该命令必须返回结构化错误,提示改用 DevOps 控制面;不得再创建后台部署 job。`--skip-build` 不再支持。 +该命令必须返回结构化错误,说明维护通道直连 D601 部署已禁用;不得再创建后台部署 job。`--skip-build` 不再支持。 ## Pipeline -历史部署 job 曾固定为以下步骤;它们现在只能作为 DevOps 控制面实现 Code Queue CD 时的目标行为,不能由 `codex deploy` 或维护通道直连触发: +历史部署 job 曾固定为以下步骤;它们现在只能作为未来受控 Code Queue CD 的目标行为,不能由 `codex deploy` 或维护通道直连触发: -1. 对 Code Queue 部署先确保 PostgreSQL 中存在 `unidesk_deploy_ssh_identities(id='github.com')`,该记录保存 GitHub deploy SSH identity 的 private key、public key fingerprint 和 github.com `known_hosts` 行。DevOps 控制面不得把 secret 写入 task payload、deploy 日志、Docker image 或 Kubernetes Secret。 +1. 对 Code Queue 部署先确保 PostgreSQL 中存在 `unidesk_deploy_ssh_identities(id='github.com')`,该记录保存 GitHub deploy SSH identity 的 private key、public key fingerprint 和 github.com `known_hosts` 行。未来受控 CD 不得把 secret 写入 task payload、deploy 日志、Docker image 或 Kubernetes Secret。 2. 在 D601 的 deploy cache 中通过本机 provider-gateway WS egress proxy 执行 `git fetch` remote,并用 `git archive <commitId>` 导出 tracked files 到一次性 export 目录;不得让 D601 直连 GitHub,也不得临时创建 SSH SOCKS、公网 master proxy 或 backend-core/provider-ingress fallback。 3. 用 `rsync --delete` 同步导出的 repo 到 `/home/ubuntu/cq-deploy`,保留 `.state/`、`logs/`、`.git/`、`node_modules/` 和 `dist/`。 4. 在 D601 用目标 Docker daemon 的本地 BuildKit builder 构建 `unidesk-code-queue:d601`,复用 D601 上已有基础镜像、inline cache 和 Code Queue build-base;provider-gateway WS egress 是唯一允许的构建代理通道,只作为本次 build 的环境变量与 build-arg 注入,并配合本次 build 的 `--network host` 让 RUN 阶段访问 D601 宿主 loopback proxy,不能污染 D601 宿主 Docker/HTTP proxy 配置,不能新建 SSH SOCKS、公网 master proxy 或直连 fallback。 @@ -28,7 +28,7 @@ bun scripts/cli.ts codex deploy <commitId> ## Observability -DevOps 控制面实现 Code Queue CD 后,部署触发本身不应阻塞等待完成。返回 JSON 中必须包含 run id、status command 或等价查询入口;后台日志必须有界可查,失败时能显示最后日志尾部。 +未来受控 Code Queue CD 实现后,部署触发本身不应阻塞等待完成。返回 JSON 中必须包含 run id、status command 或等价查询入口;后台日志必须有界可查,失败时能显示最后日志尾部。 部署 run 到 `succeeded` 时,必须已经完成 live commit 验证。需要人工复核时可用以下命令确认 `deploy.commit`: diff --git a/docs/reference/deploy.md b/docs/reference/deploy.md index 93b45250..0f2c2c0c 100644 --- a/docs/reference/deploy.md +++ b/docs/reference/deploy.md @@ -20,6 +20,11 @@ The root `deploy.json` is the single desired-state source for both prod and dev. ] }, "dev": { + "ci": { + "repo": "https://github.com/pikasTech/unidesk", + "scriptPath": "scripts/ci/dev-e2e.sh", + "timeoutMs": 1800000 + }, "services": [ { "id": "backend-core", @@ -34,35 +39,17 @@ The root `deploy.json` is the single desired-state source for both prod and dev. `schemaVersion=1` remains accepted only as a local compatibility format. Standard environment commands use `schemaVersion=2` and select `environments.dev.services` or `environments.prod.services`. -`deploy.json` must not contain provider IDs, ports, compose service names, Kubernetes namespace, health paths, environment variables, Dockerfile paths or build commands. The deploy reconciler joins each `id` with `config.json.microservices[]` and existing k3s manifests to resolve those details. A service listed in `deploy.json` but missing from `config.json` is an error. A service with no Dockerfile source artifact is reported as unsupported rather than silently skipped. `commitId` may be a unique pushed short SHA or a full SHA; every deploy command resolves it through the remote repository to a full 40-character commit before target-side build or rollout, and fails immediately if the SHA is missing or ambiguous. +`deploy.json` service entries must not contain provider IDs, ports, compose service names, Kubernetes namespace, health paths, environment variables, Dockerfile paths or build commands. The deploy reconciler joins each service `id` with `config.json.microservices[]` and existing k3s manifests to resolve those details. A service listed in `deploy.json` but missing from `config.json` is an error. A service with no Dockerfile source artifact is reported as unsupported rather than silently skipped. `commitId` may be a unique pushed short SHA or a full SHA; every deploy command resolves it through the remote repository to a full 40-character commit before target-side build or rollout, and fails immediately if the SHA is missing or ambiguous. -Environment mode never reads the local dirty working tree manifest. `deploy check --env ...`, `deploy plan --env ...` and `deploy apply --env ...` fetch `origin/master`, read `origin/master:deploy.json`, select `environments.<env>`, and report the manifest commit/blob, service commit IDs, target namespace, database fingerprint and Provider identity. Maintenance-channel direct D601 apply is intentionally narrow: only `deploy apply --env dev --service devops` may use that path, and only for DevOps bootstrap, repair or break-glass recovery. `deploy apply --env dev --service backend-core|frontend|code-queue` and local-manifest D601 service apply are rejected before runtime mutation; those services must be deployed by the DevOps control plane after it is healthy. `deploy apply --env prod` remains disabled until the production environment executor and authorization policy are explicitly added. +The only non-service execution declaration currently allowed under `environments.dev` is `ci`. It selects the Git-controlled one-shot dev CI runner; the authoritative `repo`, `scriptPath`, `timeoutMs`, short launcher and no-CD boundary are defined in `docs/reference/dev-ci-runner.md`. + +Environment mode never reads the local dirty working tree manifest. `deploy check --env ...`, `deploy plan --env ...` and `deploy apply --env ...` fetch `origin/master`, read `origin/master:deploy.json`, select `environments.<env>`, and report the manifest commit/blob, service commit IDs, target namespace, database fingerprint and Provider identity. Maintenance-channel direct D601 apply is intentionally narrow: no D601 backend-core/frontend/code-queue/Decision Center/k3sctl-adapter service deployment may use that path. `deploy apply --env prod` remains disabled until the production environment executor and authorization policy are explicitly added. `config.json.microservices[].repository.commitId` is retained for catalog compatibility, but `deploy.json` is the deployment version authority for the reconciler. -## DevOps Bootstrap +## Dev CI Runner -DevOps has an intentional first-install bootstrap path to avoid a circular dependency where the service that should deploy CI/CD must already exist before it can deploy itself. - -The only supported first-install shape is a one-shot D601-side script: - -```bash -tmp=$(mktemp) && curl -fsSL https://raw.githubusercontent.com/pikasTech/unidesk/master/scripts/bootstrap/devops-install.sh -o "$tmp" && sudo bash "$tmp" --commit <unidesk-commit-id> --env dev -``` - -The bootstrapper may use D601 local shell, native SSH or provider-gateway Host SSH as a maintenance bridge, but only for DevOps bootstrap, repair and break-glass recovery. This maintenance bridge must not deploy backend-core, frontend, Code Queue, Decision Center, k3sctl-adapter or any other direct/managed microservice. It must run source fetch, Go build, Docker build, k3s image import and Kubernetes apply on D601. The main server must not compile Go/Rust or build DevOps images for D601. - -The bootstrapper is deliberately narrow and idempotent: - -- Verify D601 native k3s and `/etc/rancher/k3s/k3s.yaml`. -- Clone or fetch the UniDesk repo on D601 and checkout the requested commit. -- Build `src/components/microservices/devops/Dockerfile` on D601. -- Import `unidesk-devops:dev` into native k3s containerd. -- Apply `src/components/microservices/k3sctl-adapter/k3s/devops.k8s.yaml` into `unidesk-ci`. -- Wait for `deployment/devops` rollout and `/health`. -- Write a local bootstrap receipt with repo, requested commit, resolved commit, namespace, image and health result. - -After DevOps is healthy, normal CI/CD control should move to `CLI -> backend-core -> k3sctl-adapter -> DevOps -> Kubernetes API/Tekton`. Host SSH remains a DevOps repair path, not a general CI/CD control plane and not a service deployment path. +Dev desired-state smoke verification is not a deploy executor. Use `bun scripts/cli.ts ci run-dev-e2e` for the Git-controlled temporary namespace runner described in `docs/reference/dev-ci-runner.md`. `deploy apply --env dev` must not roll out D601 direct or k3s-managed services in the current CI-only phase. ## D601 Dev Foundation @@ -91,7 +78,7 @@ Phase 3 introduces the dev backend/frontend manifest at `src/components/microser `backend-core-dev` must use `unidesk-dev-runtime-config` and `unidesk-dev-runtime-secrets`, connect to `postgres-dev.../unidesk_dev`, expose HTTP on 8080 and provider ingress on 8081, and write logs under `/var/log/unidesk-dev`. `frontend-dev` must set `CORE_INTERNAL_URL=http://backend-core-dev.unidesk-dev.svc.cluster.local:8080` and must not proxy to production backend-core. -The manifest keeps placeholder image tags and deploy commit values in source control. Maintenance-channel direct D601 apply must not deploy `backend-core-dev` or `frontend-dev`; the CLI rejects `deploy apply --env dev --service backend-core|frontend` before runtime mutation. Dev core deployment must be implemented as a DevOps-controlled CD action that fetches `origin/master:deploy.json`, selects `environments.dev`, materializes the requested source commit on D601, narrows the dev core control manifest to the selected Service/Deployment pair, replaces placeholders with the requested commit and dev image tag, builds on D601, imports the image into native k3s containerd, applies only the `unidesk-dev` objects and stamps the Deployment. Client dry-run and static validation are the required checks before any controlled apply: +The manifest keeps placeholder image tags and deploy commit values in source control. Maintenance-channel direct D601 apply must not deploy `backend-core-dev` or `frontend-dev`; the CLI rejects `deploy apply --env dev --service backend-core|frontend` before runtime mutation. Dev core deployment must be implemented later as a controlled D601 target-side action that fetches `origin/master:deploy.json`, selects `environments.dev`, materializes the requested source commit on D601, narrows the dev core control manifest to the selected Service/Deployment pair, replaces placeholders with the requested commit and dev image tag, builds on D601, imports the image into native k3s containerd, applies only the `unidesk-dev` objects and stamps the Deployment. Client dry-run and static validation are the required checks before any controlled apply: - `bun scripts/cli.ts dev-env validate --manifest src/components/microservices/k3sctl-adapter/k3s/dev/unidesk-dev-core.k8s.yaml` - `KUBECONFIG=/etc/rancher/k3s/k3s.yaml kubectl apply --dry-run=client --validate=false -f src/components/microservices/k3sctl-adapter/k3s/dev/unidesk-dev-core.k8s.yaml` @@ -104,7 +91,7 @@ Phase 5 introduces the dev Code Queue execution manifest at `src/components/micr All dev Code Queue components must use `unidesk-dev-runtime-config` and `unidesk-dev-runtime-secrets`, connect to `postgres-dev.../unidesk_dev`, write logs and state under `/home/ubuntu/unidesk-dev-code-queue-deploy/state`, and expose HTTP on 4222 only as ClusterIP services. The scheduler uses `CODE_QUEUE_MAIN_PROVIDER_ID=D601-dev`, `CODE_QUEUE_WORKDIR=/workspace-dev`, `CODE_QUEUE_REMOTE_WORKDIR=/home/ubuntu/unidesk-dev-workspace`, disables ClaudeQQ notifications by default, and does not use the production `d601-tcp-egress-gateway` or production PostgreSQL route. -Maintenance-channel direct D601 apply must not deploy dev Code Queue; the CLI rejects `deploy apply --env dev --service code-queue` and the old `codex deploy` compatibility entry is disabled. Dev Code Queue deployment must be a DevOps-controlled CD action that fetches `origin/master:deploy.json`, selects `environments.dev`, materializes the requested source commit on D601, uses the dev Code Queue control manifest from that D601 materialized commit, narrows it to Code Queue dev objects, replaces placeholders with the requested commit and `unidesk-code-queue:dev`, builds on D601, imports the image into native k3s containerd, applies only `unidesk-dev` objects and stamps the dev Deployments. Because Code Queue carries the agent toolchain and browser/runtime dependencies, dev builds may reuse an existing D601 `unidesk-code-queue:d601-build-base` or `unidesk-code-queue:d601` image when the dev build-base tag is absent, and the deploy executor allows a longer Code Queue build window than lightweight services. The scheduler has an explicit 5Gi memory limit and must use `Recreate` rollout strategy so an update does not temporarily require two scheduler replicas under the namespace quota. All dev Code Queue containers must set CPU limits so the namespace `LimitRange` does not inject a quota-breaking default CPU limit. Live health verification uses the Kubernetes API service proxy for the dev ClusterIP Service, not `kubectl exec` or debug binaries inside the application image. This first dev execution slice proves deployability, health and dev database isolation; wiring the dev frontend stable `code-queue` route through a dev `code-queue-mgr` is a separate later phase. +Maintenance-channel direct D601 apply must not deploy dev Code Queue; the CLI rejects `deploy apply --env dev --service code-queue` and the old `codex deploy` compatibility entry is disabled. Dev Code Queue deployment must be implemented later as a controlled D601 target-side action that fetches `origin/master:deploy.json`, selects `environments.dev`, materializes the requested source commit on D601, uses the dev Code Queue control manifest from that D601 materialized commit, narrows it to Code Queue dev objects, replaces placeholders with the requested commit and `unidesk-code-queue:dev`, builds on D601, imports the image into native k3s containerd, applies only `unidesk-dev` objects and stamps the dev Deployments. Because Code Queue carries the agent toolchain and browser/runtime dependencies, dev builds may reuse an existing D601 `unidesk-code-queue:d601-build-base` or `unidesk-code-queue:d601` image when the dev build-base tag is absent, and the deploy executor allows a longer Code Queue build window than lightweight services. The scheduler has an explicit 5Gi memory limit and must use `Recreate` rollout strategy so an update does not temporarily require two scheduler replicas under the namespace quota. All dev Code Queue containers must set CPU limits so the namespace `LimitRange` does not inject a quota-breaking default CPU limit. Live health verification uses the Kubernetes API service proxy for the dev ClusterIP Service, not `kubectl exec` or debug binaries inside the application image. This first dev execution slice proves deployability, health and dev database isolation; wiring the dev frontend stable `code-queue` route through a dev `code-queue-mgr` is a separate later phase. ## CLI @@ -114,7 +101,7 @@ Maintenance-channel direct D601 apply must not deploy dev Code Queue; the CLI re `bun scripts/cli.ts deploy plan --env dev [--service <id>]` reads `origin/master:deploy.json#environments.dev` and prints a dry-run environment plan without checking or mutating live runtime resources. `deploy check --env dev` uses the same dry-run environment plan. `--env prod` is available for parity as a dry-run planning path; it reads `origin/master:deploy.json#environments.prod` and must not use a dirty local `deploy.json`. -`bun scripts/cli.ts deploy apply [--file deploy.json | --env dev] [--service <id>] [--dry-run] [--force]` starts an asynchronous job only for supported targets. Use `bun scripts/cli.ts job status <jobId> --tail-bytes 30000` to observe progress. `--dry-run` resolves the same plan but does not build or replace runtime objects. `--force` rebuilds even when the live commit matches. Environment apply currently supports only `--env dev --service devops` on the D601 maintenance direct path; `--env prod` apply is rejected, and D601 non-DevOps service apply is rejected before any runtime mutation. +`bun scripts/cli.ts deploy apply [--file deploy.json | --env dev] [--service <id>] [--dry-run] [--force]` starts an asynchronous job only for supported targets. Use `bun scripts/cli.ts job status <jobId> --tail-bytes 30000` to observe progress. `--dry-run` resolves the same plan but does not build or replace runtime objects. `--force` rebuilds even when the live commit matches. Environment apply is not the dev e2e trigger; use `bun scripts/cli.ts ci run-dev-e2e` for the Git-controlled temporary namespace smoke flow. `--env prod` apply is rejected, and D601 service apply is rejected before any runtime mutation. All deploy commands output JSON. Long operations must use `.state/jobs/` and bounded log tails; no deploy path may succeed with missing progress output. @@ -153,13 +140,13 @@ The reconciler selects the executor from `config.json`: - `deployment.mode=unidesk-direct` on `main-server`: build the image on the main server, then use the fixed UniDesk Compose project and `up -d --no-build --no-deps --force-recreate <service>`. - `deployment.mode=internal-sidecar` on `main-server`: use the same main-server target-side source export, Docker build, image label stamping, fixed Compose project replacement and live commit verification as direct Compose services. This class is for private sidecars such as `code-queue-mgr`; it is still versioned by `deploy.json.commitId`, not by the operator's current worktree. -- `deployment.mode=unidesk-direct` on a provider: this executor is disabled for D601 service deployment except for the explicit DevOps bootstrap/repair path. The historical behavior dispatched `host.ssh` to the provider, built on the provider, then used the service's provider-local compose file and project; that shape must move behind DevOps for D601 services so the maintenance bridge cannot become a second deployment control plane. +- `deployment.mode=unidesk-direct` on a provider: this executor is disabled for D601 service deployment. The historical behavior dispatched `host.ssh` to the provider, built on the provider, then used the service's provider-local compose file and project; that shape must not remain a second deployment control plane. - Control bridges that UniDesk needs in order to inspect or repair an orchestrator must stay in this direct class. In particular, `k3sctl-adapter` is a UniDesk-managed bridge to native k3s and must remain outside k3s; Docker packaging on Docker Desktop/WSL must create an explicit host-local bridge, currently an adapter-container SSH local tunnel, to reach `/etc/rancher/k3s/k3s.yaml` and WSL `127.0.0.1:6443`. -- `deployment.mode=k3sctl-managed`: the target behavior is to build on the active control target, verify native k3s on the host OS/WSL distro, import the image into native k3s/containerd, apply the existing Kubernetes manifest, stamp the Deployment and wait for rollout. On D601, maintenance-channel direct execution of this behavior is reserved for DevOps itself; other k3s managed services must be reconciled by DevOps after bootstrap. The executor must use the native kubeconfig and containerd socket, for example `/etc/rancher/k3s/k3s.yaml` and `/run/k3s/containerd/containerd.sock`; running k3s itself in Docker is forbidden for both control-plane and worker nodes. A `rancher/k3s` image or legacy container may only be used as a temporary artifact source during migration, and any active containerized k3s control plane must be stopped before verification succeeds. The executor must preload a valid `rancher/mirrored-pause:3.6` sandbox image into native k3s containerd through the provider-gateway one-shot egress path, verify its entrypoint is `/pause`, and reject fake or sleep-based replacement images. Code Queue's k3s migration executor must also stop/remove the legacy direct Docker `code-queue-backend` after k3s rollout, so there is never a second scheduler running beside the native k3s scheduler. +- `deployment.mode=k3sctl-managed`: the target behavior is to build on the active control target, verify native k3s on the host OS/WSL distro, import the image into native k3s/containerd, apply the existing Kubernetes manifest, stamp the Deployment and wait for rollout. On D601, maintenance-channel direct execution of this behavior is not allowed for normal services; the current work is CI-only and must not roll out services. The executor must use the native kubeconfig and containerd socket, for example `/etc/rancher/k3s/k3s.yaml` and `/run/k3s/containerd/containerd.sock`; running k3s itself in Docker is forbidden for both control-plane and worker nodes. A `rancher/k3s` image or legacy container may only be used as a temporary artifact source during migration, and any active containerized k3s control plane must be stopped before verification succeeds. The executor must preload a valid `rancher/mirrored-pause:3.6` sandbox image into native k3s containerd through the provider-gateway one-shot egress path, verify its entrypoint is `/pause`, and reject fake or sleep-based replacement images. Code Queue's k3s migration executor must also stop/remove the legacy direct Docker `code-queue-backend` after k3s rollout, so there is never a second scheduler running beside the native k3s scheduler. -Existing service-specific commands such as Code Queue deploy are disabled as direct D601 deploy paths. Their build/import/rollout semantics should converge into DevOps-controlled CD instead of keeping a parallel implementation. +Existing service-specific commands such as Code Queue deploy are disabled as direct D601 deploy paths. Their build/import/rollout semantics should converge later into one controlled target-side deployment path instead of keeping parallel implementations. -Decision Center is a standard `k3sctl-managed` service in this model, but D601 maintenance-channel direct apply must not deploy it. DevOps-controlled CD for Decision Center should build `src/components/microservices/decision-center/Dockerfile` on D601, import `unidesk-decision-center:d601` into native k3s containerd, apply `src/components/microservices/k3sctl-adapter/k3s/decision-center.k8s.yaml`, stamp the Deployment, and verify health through `/api/microservices/decision-center/health`. It must not add a main-server Compose service, NodePort, hostPort, or provider-gateway direct HTTP backend for Decision Center. +Decision Center is a standard `k3sctl-managed` service in this model, but D601 maintenance-channel direct apply must not deploy it. Future controlled CD for Decision Center should build `src/components/microservices/decision-center/Dockerfile` on D601, import `unidesk-decision-center:d601` into native k3s containerd, apply `src/components/microservices/k3sctl-adapter/k3s/decision-center.k8s.yaml`, stamp the Deployment, and verify health through `/api/microservices/decision-center/health`. It must not add a main-server Compose service, NodePort, hostPort, or provider-gateway direct HTTP backend for Decision Center. ## CI Separation diff --git a/docs/reference/deployment.md b/docs/reference/deployment.md index 758283ca..d777144a 100644 --- a/docs/reference/deployment.md +++ b/docs/reference/deployment.md @@ -27,7 +27,7 @@ CLI 会优先使用 `docker compose` v2 plugin;当 v2 plugin 不存在时才 Compose v2 安装后仍然必须遵守 UniDesk 的服务控制入口:全栈生命周期用 `server start` / `server stop`,单服务重建用 `server rebuild <service>`。不要因为 v2 可用就直接在生产栈上手工执行未纳入 CLI 的 `up --build`、`down -v` 或跨项目清理命令;所有会影响容器的动作都应保持 job 可观测、Compose project 固定、database named volume 保留。主 server Compose 命令必须从 `providerGateway.upgrade.hostProjectRoot` 指定的 canonical UniDesk 根目录运行,临时 worktree、Code Queue 导出目录或实验分支不得复用生产 `-p unidesk` 和固定 `container_name` 去替换生产容器。 -版本化用户服务部署优先使用 `bun scripts/cli.ts deploy apply` 或 DevOps 控制面,但 D601 维护通道直连 apply 只允许部署或修复 DevOps 本身。`deploy.json` 只声明服务 `id`、`repo` 和 `commitId`;目标节点、Dockerfile、Compose、Kubernetes manifest、健康检查和代理路径继续来自 `config.json` 与现有 manifest。主 server 直管微服务和内部 sidecar,例如 `code-queue-mgr`,也必须支持这一路径:`deploy apply --service code-queue-mgr` 从 `deploy.json` 指定 commit 导出源码、构建镜像、替换固定 Compose service 并验证运行中镜像/健康信息的 commit。部署必须遵循 target-side build:服务部署到哪台 target,就在哪台 target 从 remote commit 导出源码、一次性代理构建镜像并部署;不得把中心构建镜像作为默认分发路径,也不得用 `docker commit` 或脏 worktree 作为部署输入。完整规则见 `docs/reference/deploy.md`。 +版本化用户服务部署优先使用 `bun scripts/cli.ts deploy apply` 已支持的受控路径;当前 D601 维护通道直连 apply 不承担服务部署,dev desired-state smoke 使用 `ci run-dev-e2e`。`deploy.json` 只声明服务 `id`、`repo` 和 `commitId`;目标节点、Dockerfile、Compose、Kubernetes manifest、健康检查和代理路径继续来自 `config.json` 与现有 manifest。主 server 直管微服务和内部 sidecar,例如 `code-queue-mgr`,也必须支持这一路径:`deploy apply --service code-queue-mgr` 从 `deploy.json` 指定 commit 导出源码、构建镜像、替换固定 Compose service 并验证运行中镜像/健康信息的 commit。部署必须遵循 target-side build:服务部署到哪台 target,就在哪台 target 从 remote commit 导出源码、一次性代理构建镜像并部署;不得把中心构建镜像作为默认分发路径,也不得用 `docker commit` 或脏 worktree 作为部署输入。完整规则见 `docs/reference/deploy.md`。 ## Main Server Swap @@ -43,7 +43,7 @@ swap 管理不能被强塞进所有热路径。`server start/status` 可以暴 ## Single Service Rebuild -前端、backend-core、本机 provider-gateway 或主 server 承载的 Todo Note/Code Queue Manager/Project Manager/Baidu Netdisk/OA Event Flow 用户服务需要非版本化本地重建时,统一使用 `bun scripts/cli.ts server rebuild <service>`,其中 `<service>` 只能是 `backend-core`、`frontend`、`provider-gateway`、`todo-note`、`code-queue-mgr`、`project-manager`、`baidu-netdisk` 或 `oa-event-flow`。需要按 commit 上线或恢复到 desired-state 时必须改用 `bun scripts/cli.ts deploy apply --service <id>`;直管微服务也不能把脏工作树或手工重建作为部署真相。D601 Code Queue 执行面、File Browser、FindJob、Pipeline、MET Nonlinear 和 ClaudeQQ 部署在计算节点,不属于主 server Compose 可重建服务;其中 D601 Code Queue 执行面不得再通过 `codex deploy` 或维护通道直连 D601 部署,必须经 DevOps 控制面执行 build-first、rollout 和 live commit 验证。 +前端、backend-core、本机 provider-gateway 或主 server 承载的 Todo Note/Code Queue Manager/Project Manager/Baidu Netdisk/OA Event Flow 用户服务需要非版本化本地重建时,统一使用 `bun scripts/cli.ts server rebuild <service>`,其中 `<service>` 只能是 `backend-core`、`frontend`、`provider-gateway`、`todo-note`、`code-queue-mgr`、`project-manager`、`baidu-netdisk` 或 `oa-event-flow`。需要按 commit 上线或恢复到 desired-state 时必须改用 `bun scripts/cli.ts deploy apply --service <id>`;直管微服务也不能把脏工作树或手工重建作为部署真相。D601 Code Queue 执行面、File Browser、FindJob、Pipeline、MET Nonlinear 和 ClaudeQQ 部署在计算节点,不属于主 server Compose 可重建服务;其中 D601 Code Queue 执行面不得再通过 `codex deploy` 或维护通道直连 D601 部署;未来正式 CD 必须经受控 target-side 路径执行 build-first、rollout 和 live commit 验证。 frontend 改动必须明确上线到公网:修改 `src/components/frontend/src/`、`src/components/frontend/public/style.css`、frontend 使用的共享 TSX/TS 模块或 WebUI 导航后,必须在同一变更集中执行 `bun scripts/cli.ts server rebuild frontend`,并等待 job 成功。公网 WebUI 的 `/app.js` 是 `unidesk-frontend` 容器启动时从镜像内源码转译生成的运行时 bundle;只改工作区文件、只跑 `bun run check`、只跑 `Bun.build` 或只刷新浏览器都不会替换已经运行的容器。 @@ -53,7 +53,7 @@ frontend 的 Docker 上线顺序为:先运行必要的本地校验,例如 `b 紧急灾备或数据迁移期间如需手工启动单个 Compose service,也必须保持与 CLI 相同的隔离语义:使用固定 `--env-file .state/docker-compose.env` 和 `up -d --no-deps <service>`,只启动目标容器;如果需要刷新 backend-core 的服务目录或环境变量,应把 `backend-core` 作为显式目标单独重建/替换,不能依赖 `up` 的依赖解析顺手重建 database、backend-core 或其他服务。 -正式流程不得依赖人工 `docker rm` 兜底;手工删除旧容器后若 job、Docker client 或 daemon 在 `up` 前中断,会直接造成用户服务代理失败。`server rebuild <service>`、`deploy apply` 和 DevOps 部署 run 都必须是可观测流程:build-first、受控替换、post-up validation、保留命名卷或 `.state` 运行态目录。Code Queue 等计算节点长任务服务即使被重建也必须依赖服务自身 restart-recovery 恢复任务,不能用“避免重建”掩盖恢复缺陷。 +正式流程不得依赖人工 `docker rm` 兜底;手工删除旧容器后若 job、Docker client 或 daemon 在 `up` 前中断,会直接造成用户服务代理失败。`server rebuild <service>`、`deploy apply` 和未来 D601 CD run 都必须是可观测流程:build-first、受控替换、post-up validation、保留命名卷或 `.state` 运行态目录。Code Queue 等计算节点长任务服务即使被重建也必须依赖服务自身 restart-recovery 恢复任务,不能用“避免重建”掩盖恢复缺陷。 ## User Service Restart Recovery diff --git a/docs/reference/dev-ci-runner.md b/docs/reference/dev-ci-runner.md new file mode 100644 index 00000000..9b34a21b --- /dev/null +++ b/docs/reference/dev-ci-runner.md @@ -0,0 +1,93 @@ +# Dev CI Runner + +`ci run-dev-e2e` is the single manual entry for dev desired-state smoke verification. It is deliberately smaller than a DevOps control plane: the CLI starts one Git-controlled runner on D601, D601 creates a temporary CI namespace, Tekton runs the smoke check, and the result is written back as files that the CLI can inspect. + +## Goal + +The runner exists to prove the dev desired state without interrupting production: + +- Dev/prod isolation: temporary namespaces and dev manifests must not mutate `unidesk`, `unidesk-dev`, production PostgreSQL, production Deployments, production Services or main-server Compose services. +- Version determinism: all runner inputs come from the pushed `origin/master` commit that supplied `deploy.json` and `origin/master:deploy.json#environments.dev`. +- D601 execution: Git fetch, Tekton PipelineRun creation, Kubernetes polling and e2e log collection happen on D601, not on the main master. +- CLI observability: the submit command returns a `runId`, result directory and next commands; `ci logs <runId>` can recover status after the local CLI exits. +- CI only: the flow may create CI-owned temporary resources, but it must not deploy backend-core, frontend, Code Queue, Decision Center, k3sctl-adapter or any other direct/managed service. + +## Non-Goals + +Do not add a long-lived DevOps service, run broker, webhook listener or second desired-state file for this phase. Do not turn Host SSH into a general deployment system. Future full-stack dev rollout or CD can reuse the same desired-state principles, but it must be designed as a separate controlled deployment path after this smoke runner is stable. + +## Manifest Contract + +`deploy.json` remains the only desired-state file. The dev environment may contain one non-service CI declaration: + +```json +{ + "schemaVersion": 2, + "environments": { + "dev": { + "ci": { + "repo": "https://github.com/pikasTech/unidesk", + "scriptPath": "scripts/ci/dev-e2e.sh", + "timeoutMs": 1800000 + }, + "services": [] + } + } +} +``` + +`scriptPath` must be a repo-relative `scripts/ci/*.sh` path. Inline shell bodies, arbitrary script paths, local dirty scripts and separate `develop.json` or CI manifest files are forbidden. The script is fetched from the same full 40-character manifest commit that supplied `deploy.json`, so the runner logic is auditable and rollbackable with the desired state. + +## Execution Path + +The automatic path is intentionally single and narrow: + +1. CLI fetches `origin/master` and reads `origin/master:deploy.json#environments.dev`. +2. CLI records the full manifest commit and generates a DNS-safe `runId`. +3. CLI sends a short launcher through backend-core `/api/dispatch` using the existing `host.ssh` provider capability for D601. +4. D601 creates `/tmp/unidesk-ci/<runId>` and `/home/ubuntu/.unidesk/runs/<runId>`. +5. D601 fetches the manifest commit from GitHub through the node-local provider-gateway WS egress proxy at `http://127.0.0.1:18789`. +6. D601 extracts the runner with `git show <commit>:<scriptPath> > /tmp/unidesk-ci/<runId>/runner.sh` and executes it. +7. The runner creates the Tekton PipelineRun in `unidesk-ci`, waits for completion when requested, and writes `result.json`, `launcher.log`, `runner.log`, PipelineRun JSON and pod logs under `/home/ubuntu/.unidesk/runs/<runId>/`. + +The CLI must not upload the runner script body. The submitted launcher may contain only repo, full commit, script path, run id, environment, timeout and keep-namespace settings plus the fixed fetch/execute wrapper. If k3s, Tekton or the provider egress proxy is unavailable, the run fails with visible logs; it must not fall back to an alternate deployment path. + +## Runner Contract + +The Git-controlled script must accept: + +```bash +scripts/ci/dev-e2e.sh \ + --run-id <runId> \ + --repo-url <repo> \ + --desired-ref master \ + --manifest-commit <full-sha> \ + --environment dev \ + --result-dir /home/ubuntu/.unidesk/runs/<runId> \ + --timeout-ms <ms> \ + [--keep-namespace] +``` + +The current script creates a Tekton `PipelineRun` for `pipeline/unidesk-dev-namespace-e2e`, stores the generated PipelineRun name in `pipelinerun.txt`, and writes a final `result.json` with `ok`, `status`, `runId`, `manifestCommit`, `pipelineRun`, `temporaryNamespace` and `finishedAt`. + +## Commands + +Start a run and return after dispatch: + +```bash +bun scripts/cli.ts ci run-dev-e2e +``` + +Start a run and wait up to ten minutes for completion: + +```bash +bun scripts/cli.ts ci run-dev-e2e --wait-ms 600000 +``` + +Inspect run files on D601: + +```bash +bun scripts/cli.ts ci logs <runId> +``` + +Regular Tekton CI remains documented in `docs/reference/ci.md`; deployment desired-state and target-side build rules remain documented in `docs/reference/deploy.md`. diff --git a/docs/reference/microservices.md b/docs/reference/microservices.md index 9e4a3e72..0f0041b1 100644 --- a/docs/reference/microservices.md +++ b/docs/reference/microservices.md @@ -188,8 +188,8 @@ D601 上必须显式使用原生 k3s kubeconfig:`KUBECONFIG=/etc/rancher/k3s/k - MiniMax/OpenCode 并发:`minimax-m2.7` 通过 OpenCode JSON 事件端口运行;每个 Code Queue task 必须使用独立的 OpenCode XDG data/config/cache/state 目录,禁止多队列并发任务共享同一个 OpenCode SQLite/WAL 状态目录,否则并发 smoke 会触发 `PRAGMA journal_mode = WAL` 之类的数据库锁或初始化错误。用于验证 k3s/k8s 链路的 MiniMax smoke 以“至少 4 个任务、分布到 2 个 queue、至少 2 个终态成功”为链路验收线;剩余失败如果是 OpenCode 最终回复捕获、业务任务判定或模型限流,应作为 Code Queue 执行可靠性问题单独排查,不能反推 k3s 代理链路失败。 - 默认出网代理:D601 active Code Queue Pod 必须默认把 `HTTP_PROXY`、`HTTPS_PROXY` 和 `ALL_PROXY` 注入给 Codex/OpenCode、`git`、`curl`、`npm` 等任务子进程;当前唯一上游是 D601 provider-gateway egress HTTP CONNECT 代理,并通过 Kubernetes `Service d601-provider-egress-proxy` 暴露给 `unidesk` namespace 内的 Pod。该 Service 通过 selector 指向 D601 上的 hostNetwork 桥接 Pod,桥接 Pod 在集群端监听 service port `18789`、在宿主侧只连接 `127.0.0.1:18789` 的 provider-gateway egress endpoint;不得再用手工 EndpointSlice、provider-gateway Docker bridge IP 或固定 `172.*` 地址作为长期拓扑。Pod 内代理 URL 使用 `http://d601-provider-egress-proxy.unidesk.svc.cluster.local:18789`,provider-gateway 宿主端口仍只允许绑定 `127.0.0.1`,不得开放公网;桥接 Pod 或 provider-gateway 重建后必须用 Code Queue `/health.egressProxy.connected=true` 验证。这里的 provider-gateway 只承担出网代理,不承担 Code Queue 业务 HTTP 代理;业务访问仍只能走 Kubernetes API service proxy。k3s/k8s 原生 egress gateway、service mesh 或 CNI egress policy 只作为后续网络层增强方向,当前交付态不引入第二套出网控制面。远程开发/执行容器不得只依赖这些环境变量,必须在容器网络层用 TUN 默认路由和 OUTPUT 防火墙强制外网流量只能经 master TUN 出口。 - 出网代理无 fallback 纪律:Code Queue 的运行时配置只允许一个默认出网路径,即 provider-gateway egress proxy;不得在代码中同时保留 Code Queue 自建 WebSocket proxy、临时 shell proxy、D601 本地直连公网、主 server direct HTTP proxy 等隐式分支。任何新增网络 fallback 都必须先进入本参考文档并配套 `/health` 可见状态,否则视为残留旧路径。 -- 上线纪律:Code Queue 相关的前端或后端改进必须在同一任务内正式上线并验证公网 frontend 或 live API,不能只停留在源码、构建产物或“后续再上线”。修改 Code Queue 自身时不得等待当前 Code Queue task 结束、等待 queue idle 或等待 `0 running` 后才重启;D601 active 实例的后端部署必须经 DevOps 控制面执行 build-first 镜像替换、k3s image import、manifest apply、rollout 和健康验证,并用 k3s adapter、Code Queue live API 或公网 frontend 证明任务和队列仍可读可继续。 -- 期望状态部署:Code Queue 仍由 `deploy.json` 的 repo 与 commit 声明版本,但维护通道直连 D601 只允许部署 DevOps 本身,不能再用 `deploy apply --service code-queue` 或 `codex deploy <commitId>` 部署 Code Queue。DevOps 控制面实现 Code Queue CD 时,应复用 `docs/reference/deploy.md` 的 target-side build 规范在 D601 构建、导入 k3s、rollout 并验证 live commit,不得维护第二套部署语义。 +- 上线纪律:Code Queue 相关的前端或后端改进必须在同一任务内正式上线并验证公网 frontend 或 live API,不能只停留在源码、构建产物或“后续再上线”。修改 Code Queue 自身时不得等待当前 Code Queue task 结束、等待 queue idle 或等待 `0 running` 后才重启;D601 active 实例的后端部署必须经未来受控 target-side CD 路径执行 build-first 镜像替换、k3s image import、manifest apply、rollout 和健康验证,并用 k3s adapter、Code Queue live API 或公网 frontend 证明任务和队列仍可读可继续。 +- 期望状态部署:Code Queue 仍由 `deploy.json` 的 repo 与 commit 声明版本,但维护通道直连 D601 不能再用 `deploy apply --service code-queue` 或 `codex deploy <commitId>` 部署 Code Queue。未来受控 Code Queue CD 应复用 `docs/reference/deploy.md` 的 target-side build 规范在 D601 构建、导入 k3s、rollout 并验证 live commit,不得维护第二套部署语义。 - 更名与灾备恢复:旧版 Codex 队列服务名只允许作为兼容诊断和一次性迁移来源;`code-queue-backend` 容器自身 `/health` 正常但 `microservice health code-queue` 返回 provider 直连错误时,优先判定为 backend-core 仍加载旧 `MICROSERVICES_JSON` 或 adapter manifest 未刷新,必须刷新 `.state/docker-compose.env`、重建/替换 `backend-core` 与 `k3sctl-adapter`,随后用 `microservice list` 验证 `code-queue` 的 `runtime.orchestrator=k3sctl`、`backend.proxyMode=k3sctl-adapter-http` 和无业务容器直连摘要。正式 k3s 部署成功后,旧 direct Docker `code-queue-backend` 必须停止并移除,不能与 `code-queue-scheduler` 同时运行;否则会形成双 scheduler、双健康来源和错误的恢复判断。 - Codex 认证:容器必须从 D601 的 `/home/ubuntu/.codex/config.toml` 同步 Codex provider 配置到 D601 `.state/code-queue/codex-home/config.toml`,并只读挂载 `/home/ubuntu/.codex/auth.json` 到容器 `/root/.codex/auth.json` 后同步到 `.state/code-queue/codex-home/auth.json`,让 `codex app-server` 使用与 host 一致的 provider 登录态;同时通过 D601 `.state/code-queue-d601.env` 或 k8s `code-queue-env` secret 透传 `OPENAI_API_KEY`、`CRS_OAI_KEY` 等 provider 所需变量。这些 provider 环境变量和 auth 文件不得写入仓库,必须由 D601 运行时文件或 k8s secret 注入,确保容器重建和重启后不会丢失认证。新增 provider 的 `env_key` 时必须增加同类运行时透传和 Compose/k8s 持久化,禁止把 Codex 或 MiniMax 密钥写入仓库文件。Code Queue 容器必须只读挂载 D601 WSL host 的 SSH 目录到 `/root/.ssh`(默认 `/home/ubuntu/.ssh`),让容器内 `git push`、`ssh -T git@github.com` 与 WSL host 使用同一套 GitHub SSH key/known_hosts;不得把私钥复制进镜像或仓库。 - Develop-ready 镜像:Code Queue 镜像必须在启动前预装 UniDesk/Pipeline 调试所需工具,至少包含 `codex`、`bun`、`node`、`npm`/`npx`、`git`、`rg`、`curl`、`python3`/`pip3`、`docker`、`docker compose`、`docker-compose`、`jq`、`ssh`、`rsync`、`make`、`gcc`/`g++`、`iptables`、`tar`、`gzip` 和 `unzip`;不得依赖 Codex 任务运行时再 `apt-get install` 这些基础环境。 @@ -200,7 +200,7 @@ D601 上必须显式使用原生 k3s kubeconfig:`KUBECONFIG=/etc/rancher/k3s/k - Codex 控制:服务内部启动 `codex app-server --listen stdio://`,用 JSON-RPC 调用 `thread/start`、`turn/start`、`turn/steer` 和 `turn/interrupt`,并监听 `turn/completed`、assistant delta、reasoning delta、command output delta、file diff delta 等通知生成前端可轮询的 transcript。 - 用户输入持久化:任务初始 prompt 以 `basePrompt/displayPrompt` 作为结构化来源,运行中追加的 `turn/steer` prompt 必须写入 `promptHistory`;transcript 构建时从这些结构化字段合成 `Submitted prompt` 和 `Steer prompt`,不能只依赖有 600 条上限的 raw output,否则长任务输出增长后会丢失关键人工指令。 - 队列语义:`POST /api/tasks` 或 `/api/tasks/batch` 入队,服务始终只运行一个 Codex turn;当前任务真正终止后才推进下一个任务。`GET /api/tasks` 与 `GET /api/tasks/{id}` 返回队列、attempt、judge 和输出;`GET /api/tasks/{id}/summary` 返回按任务 ID 查询的结构化摘要,包括初始 prompt、最后 assistant message、工具调用摘要、attempt、judge、错误和耗时;CLI 入口是 `bun scripts/cli.ts codex task <taskId>`。`GET|POST /api/tasks/{id}/judge?attempt=N` 与 CLI `bun scripts/cli.ts codex judge <taskId> --attempt N` 用于单步复现指定 attempt 的 judge,必须复用真实队列 worker 的上下文构建、prompt 压缩、MiniMax 调用、JSON 去噪/repair 和 fallback 路径;`dryRun=1`/`--dry-run` 只输出 prompt/payload 和重建诊断,不调用 MiniMax。`POST /api/tasks/{id}/steer` 向运行中 turn 推入 prompt;`POST /api/tasks/{id}/interrupt` 或 `DELETE /api/tasks/{id}` 打断/取消;`POST /api/tasks/{id}/retry` 手动重试。队列 worker 必须隔离单个 task 的异常,不能因为某个 app-server、数据库 claim、judge 异常、judge 超时或 judge 判定 `fail` 让后续 queued 任务停止;`fail` 只把当前任务标为 failed,随后必须继续扫描并推进下一个 queued/retry_wait 任务。数据库 claim 必须有硬超时且失败时释放 active run slot;judge 必须有独立 watchdog,超时后走 fallback judge 并继续推进。当存在 queued/retry_wait 且 worker 空闲时,watchdog 必须自动重新调度。 -- 稳定性与重启恢复:Code Queue 的第一目标是长期稳定可用;部署修复或运维排障时不得因为担心容器重启会打断任务而拒绝重启、重建或替换 active Pod。容器重启、服务进程重启和镜像替换后,队列、`promptHistory`、running/judging/retry_wait 任务和 active session 元数据必须从 PostgreSQL 恢复,并在已有 `codexThreadId` 可用时用 `thread/resume` 和 continuation prompt 无缝继续当前任务;如果原 app-server turn 已丢失,也必须把当前任务恢复到可 retry/continue 的状态,不能错误推进下一个任务或永久卡住。D601 侧重建必须走 DevOps 控制面;禁止先手工 `docker rm`、只手工 `docker compose up` 或用维护通道直连 D601 部署 Code Queue 再依赖后续命令补救,因为中断窗口会让 Pod/容器消失并触发 frontend/core 用户服务代理失败。重启后出现 active task 丢失、手动 steer/interrupt 记录丢失、running 任务卡死、误判完成、跳过当前任务、容器消失或阻塞队列,均属于 Code Queue 的 P0 核心缺陷,必须先修复并补充 restart-recovery 验收,不能把“避免重启”作为交付策略。 +- 稳定性与重启恢复:Code Queue 的第一目标是长期稳定可用;部署修复或运维排障时不得因为担心容器重启会打断任务而拒绝重启、重建或替换 active Pod。容器重启、服务进程重启和镜像替换后,队列、`promptHistory`、running/judging/retry_wait 任务和 active session 元数据必须从 PostgreSQL 恢复,并在已有 `codexThreadId` 可用时用 `thread/resume` 和 continuation prompt 无缝继续当前任务;如果原 app-server turn 已丢失,也必须把当前任务恢复到可 retry/continue 的状态,不能错误推进下一个任务或永久卡住。D601 侧重建必须走未来受控 target-side CD 路径;禁止先手工 `docker rm`、只手工 `docker compose up` 或用维护通道直连 D601 部署 Code Queue 再依赖后续命令补救,因为中断窗口会让 Pod/容器消失并触发 frontend/core 用户服务代理失败。重启后出现 active task 丢失、手动 steer/interrupt 记录丢失、running 任务卡死、误判完成、跳过当前任务、容器消失或阻塞队列,均属于 Code Queue 的 P0 核心缺陷,必须先修复并补充 restart-recovery 验收,不能把“避免重启”作为交付策略。 - 调度与 active run slot:Code Queue 必须把“queue processor 正在等待/退避/轮询”和“实际占用 Codex/OpenCode 子进程运行槽”分开建模;`CODE_QUEUE_MAX_ACTIVE_QUEUES` 只限制真实 active run slot,不能把 retry backoff、等待内存下降或等待前序任务的 `processingQueues` 计入 active slot,否则设置全局 active slot 上限时,一个空等队列会把其他 runnable queue 永久饿死。多个 queue 同时等待 active slot 时必须显式维护 FIFO waiter 队列,避免某个长 retry/backoff 队列刚释放 slot 就立刻重抢,导致更早进入等待的 `retry_wait` 任务长期饥饿;`/health` 必须同时暴露真实 `activeQueueIds`、`activeRunSlotCount`、等待中的 `processingQueueIds` 和 active slot waiters,排障时以 active run slot 与 waiter 顺序判断是否真的有任务在跑、谁应下一个启动。restart-recovery 后的 `retry_wait` 任务若缺失 `codexThreadId`/OpenCode session id,不得无限拒绝 retry;必须用紧凑 recovery prompt 和原始任务摘要重新开一个 agent thread/session,让任务继续推进并在 Trace 中留下 recovery 证据。任何修改 scheduler、retry backoff、queue move、manual retry、shutdown recovery 或内存等待逻辑时,都必须保留“空等 processor 不占 active run slot”、“等待者 FIFO 不饥饿”和“缺失 thread/session 可恢复”的自测或 live 验证。 - 内存优化过程与防回归:Code Queue 已迁移到 D601,但内存治理仍必须按“PostgreSQL 权威源优先、进程热状态最小化、容器硬上限兜底”的顺序设计。长期可复用的优化路径是:先确认任务、queue、readAt、promptHistory、active session 和通知 outbox 均可从 PostgreSQL 恢复;再把历史任务列表、详情、统计、Trace/output 和 `/health` 的只读查询改为 PostgreSQL 直读或聚合查询;随后只把 `queued`、`running`、`judging`、`retry_wait` 等调度必需任务载入 Bun 堆,并在 PostgreSQL 查询侧裁剪 hot `output`/`events`;最后用 dirty-only flush、append-only 输出归档、Codex SQLite 小批量导出、`bun --smol`、`mem_limit=600m`、`memswap_limit=1536m`、`NODE_OPTIONS=--max-old-space-size=768` 和 cgroup memory watchdog 作为运行时防线。PostgreSQL 到进程的单次读取足够快,不能为了减少 SQL 查询把全部历史 `task_json`、Trace、output 或统计摘要常驻内存;任何新增缓存都必须有默认较小的环境变量上限、明确淘汰策略、可从 PostgreSQL 或 append-only 归档重建,且不得影响重启恢复。新增或修改 `/api/tasks`、overview、stats、summary、transcript、output、trace、health、flush、scheduler 和通知路径时,禁止在常规请求中调用会物化全量历史任务 JSON 的代码,禁止启动后无条件重写全量历史 task JSON,禁止用未设上限的 `Map`/数组保存历史 output/event/Trace,`CODE_QUEUE_MAX_ACTIVE_QUEUES=0` 表示不按 queue 数量设置全局排队上限;如显式设置为正数,必须同时说明内存预算并补充内存压测验收。memory watchdog 必须以 cgroup working set 为主要判断,且在 swap 仍有余量时不得提前杀掉唯一 active run;否则 TypeScript/Playwright 这类短时高内存验证会被错误中断并让 retry 队列反复震荡。 - 列表/详情延迟优化原则:Code Queue 控制面交互的长期目标是常规历史规模下首屏、`GET /api/tasks/overview`、`POST /api/tasks/<id>/read` 和分页加载均在 1s 内完成;性能面板出现十几秒级 `core_proxy` 或 Code Queue 用户服务代理慢操作时,必须优先按后端查询形态和前后端通信策略定位,不能把问题归因于 React 渲染后只改 UI。后端优化顺序是:先为 queue、status、updated/created 时间、readAt/terminal unread 和常用筛选条件补齐 PostgreSQL 索引;再用 SQL `COUNT`、`GROUP BY`、条件聚合和分页 ID 查询生成 queue/status/stats/unread 摘要;随后按 ID 轻量加载当前页、selected、active 和 unread priority task,禁止为了列表或已读操作解析完整 Trace、output archive、Codex transcript 或物化全量历史 `task_json`。`read`/`read-all` 这类 mutation 必须是 SQL-only 更新并返回最小 patch/queue 计数,不能触发 overview 全量重算或重载所有任务;启动 warm 只能预热小体积聚合和索引路径,不得把历史任务作为常驻缓存。允许 frontend/backend 代理使用秒级、严格有界、mutation 自动失效的 overview micro-cache 来吸收重复刷新,但 cache 只能作为抖动保护,不能替代数据库索引、聚合查询和分页披露,也不能让 stale readAt/queue/status 状态跨设备可见。 diff --git a/scripts/bootstrap/devops-install.sh b/scripts/bootstrap/devops-install.sh deleted file mode 100755 index c2db9814..00000000 --- a/scripts/bootstrap/devops-install.sh +++ /dev/null @@ -1,180 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -repo_url="https://github.com/pikasTech/unidesk" -commit_id="" -environment="dev" -namespace="unidesk-ci" -work_dir="/home/ubuntu/.unidesk/bootstrap/devops" -image="unidesk-devops:dev" -dry_run="0" - -usage() { - cat <<'EOF' -Usage: - devops-install.sh --commit <commit> [--env dev] [--repo-url URL] [--namespace unidesk-ci] [--dry-run] - -This script is a one-shot D601 bootstrapper. It installs or repairs the UniDesk -DevOps control service in native k3s, then normal CI/CD should use DevOps APIs. -EOF -} - -while [ "$#" -gt 0 ]; do - case "$1" in - --commit|--commit-id) - commit_id="${2:-}" - shift 2 - ;; - --env|--environment) - environment="${2:-}" - shift 2 - ;; - --repo-url) - repo_url="${2:-}" - shift 2 - ;; - --namespace) - namespace="${2:-}" - shift 2 - ;; - --work-dir) - work_dir="${2:-}" - shift 2 - ;; - --dry-run) - dry_run="1" - shift - ;; - -h|--help) - usage - exit 0 - ;; - *) - echo "unknown argument: $1" >&2 - usage >&2 - exit 2 - ;; - esac -done - -if ! [[ "$commit_id" =~ ^[0-9a-fA-F]{7,40}$ ]]; then - echo "--commit must be a 7-40 character git SHA" >&2 - exit 2 -fi - -if [ "$environment" != "dev" ]; then - echo "only --env dev is supported by the first bootstrapper" >&2 - exit 2 -fi - -log() { - printf '{"at":"%s","event":"%s"}\n' "$(date -Iseconds)" "$*" -} - -need_cmd() { - if ! command -v "$1" >/dev/null 2>&1; then - echo "missing required command: $1" >&2 - exit 1 - fi -} - -root_exec() { - if [ "$(id -u)" = "0" ]; then - "$@" - elif sudo -n true >/dev/null 2>&1; then - sudo -n "$@" - else - echo "root access is required for k3s containerd import" >&2 - exit 1 - fi -} - -need_cmd git -need_cmd docker -need_cmd kubectl - -if [ ! -f /etc/rancher/k3s/k3s.yaml ]; then - echo "native k3s kubeconfig not found: /etc/rancher/k3s/k3s.yaml" >&2 - exit 1 -fi - -export KUBECONFIG=/etc/rancher/k3s/k3s.yaml -kubectl get nodes >/dev/null - -log "bootstrap_preflight_ok" -if [ "$dry_run" = "1" ]; then - exit 0 -fi - -repo_dir="$work_dir/repo" -mkdir -p "$work_dir" -if [ ! -d "$repo_dir/.git" ]; then - rm -rf "$repo_dir" - git clone --no-checkout "$repo_url" "$repo_dir" -fi - -git -C "$repo_dir" remote set-url origin "$repo_url" -git -C "$repo_dir" fetch --no-tags origin "$commit_id" || git -C "$repo_dir" fetch --no-tags origin '+refs/heads/*:refs/remotes/origin/*' -resolved="$(git -C "$repo_dir" rev-parse --verify "$commit_id^{commit}")" -git -C "$repo_dir" checkout --detach "$resolved" - -log "source_ready commit=$resolved" - -docker buildx build --load \ - --progress=plain \ - --label "unidesk.ai/service-id=devops" \ - --label "unidesk.ai/source-repo=$repo_url" \ - --label "unidesk.ai/source-commit=$resolved" \ - --label "unidesk.ai/dockerfile=src/components/microservices/devops/Dockerfile" \ - -t "$image" \ - -f "$repo_dir/src/components/microservices/devops/Dockerfile" \ - "$repo_dir" - -archive="$work_dir/devops-image.tar" -rm -f "$archive" -docker save "$image" -o "$archive" -root_exec ctr --address /run/k3s/containerd/containerd.sock -n k8s.io images import "$archive" - -manifest="$work_dir/devops.k8s.yaml" -cp "$repo_dir/src/components/microservices/k3sctl-adapter/k3s/devops.k8s.yaml" "$manifest" -python3 - "$manifest" "$image" "$repo_url" "$resolved" "$commit_id" <<'PY' -import re -import sys - -path, image, repo, commit, requested = sys.argv[1:] -text = open(path, encoding="utf-8").read() -text = re.sub(r"image: unidesk-devops:[^\n]+", f"image: {image}", text) -text = text.replace("value: https://github.com/pikasTech/unidesk", f"value: {repo}") -text = text.replace("unidesk.ai/deploy-commit: replace-with-deploy-env-commit", f"unidesk.ai/deploy-commit: {commit}") -text = text.replace("unidesk.ai/deploy-requested-commit: replace-with-deploy-env-commit", f"unidesk.ai/deploy-requested-commit: {requested}") -text = text.replace("value: replace-with-deploy-env-commit", f"value: {commit}") -open(path, "w", encoding="utf-8").write(text) -PY - -kubectl create namespace "$namespace" --dry-run=client -o yaml | kubectl apply -f - -kubectl apply -f "$manifest" -kubectl -n "$namespace" rollout status deployment/devops --timeout=180s -kubectl -n "$namespace" get --raw "/api/v1/namespaces/$namespace/services/http:devops:4286/proxy/health" >/tmp/unidesk-devops-health.json - -receipt="$work_dir/receipt.json" -python3 - "$receipt" "$repo_url" "$resolved" "$commit_id" "$namespace" "$image" </tmp/unidesk-devops-health.json <<'PY' -import json -import sys -from datetime import datetime, timezone - -path, repo, commit, requested, namespace, image = sys.argv[1:] -health = json.load(sys.stdin) -receipt = { - "installedAt": datetime.now(timezone.utc).isoformat(), - "repo": repo, - "commit": commit, - "requestedCommit": requested, - "namespace": namespace, - "image": image, - "health": health, -} -with open(path, "w", encoding="utf-8") as handle: - json.dump(receipt, handle, ensure_ascii=False, indent=2) - handle.write("\n") -print(json.dumps({"ok": True, "receipt": path, "health": health}, ensure_ascii=False)) -PY diff --git a/scripts/ci/dev-e2e.sh b/scripts/ci/dev-e2e.sh new file mode 100755 index 00000000..d33a17af --- /dev/null +++ b/scripts/ci/dev-e2e.sh @@ -0,0 +1,215 @@ +#!/usr/bin/env bash +set -euo pipefail + +run_id="" +repo_url="https://github.com/pikasTech/unidesk" +desired_ref="master" +manifest_commit="" +environment="dev" +result_dir="" +timeout_ms="1800000" +keep_namespace="false" + +usage() { + cat <<'EOF' +Usage: + dev-e2e.sh --run-id ID --manifest-commit COMMIT --result-dir DIR [--repo-url URL] [--desired-ref master] [--environment dev] [--timeout-ms MS] [--keep-namespace] + +This script runs the D601 dev namespace e2e harness from a Git-controlled blob. +It must be launched by the CLI with a short command; do not paste this script +body through the maintenance channel. +EOF +} + +while [ "$#" -gt 0 ]; do + case "$1" in + --run-id) + run_id="${2:-}" + shift 2 + ;; + --repo-url) + repo_url="${2:-}" + shift 2 + ;; + --desired-ref) + desired_ref="${2:-}" + shift 2 + ;; + --manifest-commit) + manifest_commit="${2:-}" + shift 2 + ;; + --environment) + environment="${2:-}" + shift 2 + ;; + --result-dir) + result_dir="${2:-}" + shift 2 + ;; + --timeout-ms) + timeout_ms="${2:-}" + shift 2 + ;; + --keep-namespace) + keep_namespace="true" + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "unknown argument: $1" >&2 + usage >&2 + exit 2 + ;; + esac +done + +if ! [[ "$run_id" =~ ^[a-z0-9]([-a-z0-9]{0,46}[a-z0-9])?$ ]]; then + echo "invalid --run-id: $run_id" >&2 + exit 2 +fi +if ! [[ "$manifest_commit" =~ ^[0-9a-f]{40}$ ]]; then + echo "--manifest-commit must be a full 40 character SHA" >&2 + exit 2 +fi +if [ "$environment" != "dev" ]; then + echo "only --environment dev is supported" >&2 + exit 2 +fi +if ! [[ "$timeout_ms" =~ ^[0-9]+$ ]] || [ "$timeout_ms" -le 0 ]; then + echo "--timeout-ms must be a positive integer" >&2 + exit 2 +fi +if [ -z "$result_dir" ]; then + result_dir="/home/ubuntu/.unidesk/runs/$run_id" +fi + +mkdir -p "$result_dir" +runner_log="$result_dir/runner.log" +result_json="$result_dir/result.json" +exec > >(tee -a "$runner_log") 2>&1 + +log_json() { + local event="$1" + shift || true + printf '{"at":"%s","event":"%s"' "$(date -Iseconds)" "$event" + while [ "$#" -gt 1 ]; do + printf ',"%s":%s' "$1" "$(printf '%s' "$2" | python3 -c 'import json,sys; print(json.dumps(sys.stdin.read()))')" + shift 2 + done + printf '}\n' +} + +write_result() { + local ok="$1" + local status="$2" + local detail="$3" + python3 - "$result_json" "$ok" "$status" "$detail" "$run_id" "$repo_url" "$desired_ref" "$manifest_commit" "$environment" "$pipeline_run" "$temporary_namespace" <<'PY' +import json +import sys +from datetime import datetime, timezone + +path, ok, status, detail, run_id, repo, desired_ref, commit, environment, pipeline_run, temporary_namespace = sys.argv[1:] +record = { + "ok": ok == "true", + "status": status, + "detail": detail, + "runId": run_id, + "repoUrl": repo, + "desiredRef": desired_ref, + "manifestCommit": commit, + "environment": environment, + "pipelineRun": pipeline_run or None, + "temporaryNamespace": temporary_namespace or None, + "finishedAt": datetime.now(timezone.utc).isoformat(), +} +with open(path, "w", encoding="utf-8") as handle: + json.dump(record, handle, ensure_ascii=False, indent=2) + handle.write("\n") +print(json.dumps(record, ensure_ascii=False)) +PY +} + +pipeline_run="" +temporary_namespace="unidesk-ci-e2e-$run_id" +trap 'code=$?; if [ "$code" -ne 0 ] && [ ! -f "$result_json" ]; then write_result false failed "runner exited with code $code" || true; fi' EXIT + +export KUBECONFIG=/etc/rancher/k3s/k3s.yaml +kubectl get nodes >/dev/null + +log_json runner_started run_id "$run_id" manifest_commit "$manifest_commit" +kubectl get pipeline/unidesk-dev-namespace-e2e -n unidesk-ci >/dev/null +kubectl get pvc/unidesk-ci-cache -n unidesk-ci >/dev/null + +pipeline_manifest="$result_dir/pipelinerun.yaml" +cat >"$pipeline_manifest" <<YAML +apiVersion: tekton.dev/v1 +kind: PipelineRun +metadata: + generateName: unidesk-dev-e2e-$run_id- + namespace: unidesk-ci + labels: + app.kubernetes.io/name: unidesk-dev-namespace-e2e + app.kubernetes.io/part-of: unidesk + unidesk.ai/ci-kind: dev-namespace-e2e + unidesk.ai/deploy-ref: master-deploy-json-dev + unidesk.ai/deploy-commit: "$manifest_commit" +spec: + pipelineRef: + name: unidesk-dev-namespace-e2e + taskRunTemplate: + serviceAccountName: unidesk-ci-runner + params: + - name: repo-url + value: "$repo_url" + - name: desired-ref + value: "$desired_ref" + - name: deploy-commit + value: "$manifest_commit" + - name: environment + value: "$environment" + - name: run-id + value: "$run_id" + - name: keep-namespace + value: "$keep_namespace" + workspaces: + - name: shared-workspace + persistentVolumeClaim: + claimName: unidesk-ci-cache +YAML + +pipeline_run="$(kubectl create -f "$pipeline_manifest" -o jsonpath='{.metadata.name}')" +printf '%s\n' "$pipeline_run" >"$result_dir/pipelinerun.txt" +log_json pipelinerun_created pipeline_run "$pipeline_run" namespace unidesk-ci + +deadline=$((SECONDS + (timeout_ms + 999) / 1000)) +condition="" +while [ "$SECONDS" -lt "$deadline" ]; do + condition="$(kubectl get "pipelinerun/$pipeline_run" -n unidesk-ci -o jsonpath='{range .status.conditions[?(@.type=="Succeeded")]}{.status}{"\t"}{.reason}{"\t"}{.message}{end}' 2>/dev/null || true)" + case "$condition" in + True*) + kubectl get "pipelinerun/$pipeline_run" -n unidesk-ci -o json >"$result_dir/pipelinerun.json" + kubectl get taskrun -n unidesk-ci -l "tekton.dev/pipelineRun=$pipeline_run" -o json >"$result_dir/taskruns.json" || true + kubectl logs -n unidesk-ci -l "tekton.dev/pipelineRun=$pipeline_run" --all-containers=true --tail=-1 >"$result_dir/pods.log" 2>&1 || true + write_result true succeeded "$condition" + exit 0 + ;; + False*) + kubectl get "pipelinerun/$pipeline_run" -n unidesk-ci -o json >"$result_dir/pipelinerun.json" || true + kubectl get taskrun -n unidesk-ci -l "tekton.dev/pipelineRun=$pipeline_run" -o json >"$result_dir/taskruns.json" || true + kubectl logs -n unidesk-ci -l "tekton.dev/pipelineRun=$pipeline_run" --all-containers=true --tail=-1 >"$result_dir/pods.log" 2>&1 || true + write_result false failed "$condition" + exit 1 + ;; + esac + sleep 2 +done + +kubectl get "pipelinerun/$pipeline_run" -n unidesk-ci -o json >"$result_dir/pipelinerun.json" || true +kubectl get taskrun -n unidesk-ci -l "tekton.dev/pipelineRun=$pipeline_run" -o json >"$result_dir/taskruns.json" || true +kubectl logs -n unidesk-ci -l "tekton.dev/pipelineRun=$pipeline_run" --all-containers=true --tail=-1 >"$result_dir/pods.log" 2>&1 || true +write_result false timeout "Timed out waiting for pipelinerun/$pipeline_run" +exit 124 diff --git a/scripts/src/ci.ts b/scripts/src/ci.ts index 6a94adf1..a9fc742f 100644 --- a/scripts/src/ci.ts +++ b/scripts/src/ci.ts @@ -39,11 +39,13 @@ interface CiDevE2EOptions { desiredRef: string; deployCommit: string; environment: "dev"; + scriptRepo: string; + scriptPath: string; + scriptTimeoutMs: number; services: Array<{ id: string; commitId: string; repo: string }>; runId: string; keepNamespace: boolean; waitMs: number; - direct: boolean; } interface DispatchResult { @@ -60,6 +62,11 @@ interface DeployDevManifestSummary { deployCommit: string; desiredRef: string; environment: "dev"; + ci: { + repo: string; + scriptPath: string; + timeoutMs: number; + }; services: Array<{ id: string; commitId: string; repo: string }>; } @@ -121,13 +128,26 @@ function coreBody(response: unknown): Record<string, unknown> | null { return asRecord(asRecord(response)?.body); } -function proxyBody(response: unknown): Record<string, unknown> | null { - const body = coreBody(response); - const nested = asRecord(body?.body); - return nested ?? body; +function positiveManifestNumber(value: unknown, fallback: number, path: string): number { + if (value === undefined || value === null) return fallback; + if (typeof value !== "number" || !Number.isInteger(value) || value <= 0) throw new Error(`${path} must be a positive integer`); + return value; } -async function dispatchSsh(command: string, waitMs: number, remoteTimeoutMs: number): Promise<DispatchResult> { +function requireManifestString(value: unknown, path: string): string { + if (typeof value !== "string" || value.length === 0) throw new Error(`${path} must be a non-empty string`); + return value; +} + +function requireCiScriptPath(value: unknown): string { + const scriptPath = requireManifestString(value, "environments.dev.ci.scriptPath"); + if (!scriptPath.startsWith("scripts/ci/") || scriptPath.includes("..") || scriptPath.startsWith("/") || !scriptPath.endsWith(".sh")) { + throw new Error("environments.dev.ci.scriptPath must be a repo-relative scripts/ci/*.sh path"); + } + return scriptPath; +} + +async function dispatchSsh(command: string, waitMs: number, remoteTimeoutMs: number, pollCompletion = true): Promise<DispatchResult> { const dispatchResponse = coreInternalFetch("/api/dispatch", { method: "POST", body: { @@ -155,7 +175,18 @@ async function dispatchSsh(command: string, waitMs: number, remoteTimeoutMs: num raw: dispatchResponse, }; } - const deadline = Date.now() + Math.max(waitMs, remoteTimeoutMs + 10_000); + if (!pollCompletion) { + return { + ok: true, + taskId, + status: "submitted", + stdout: "", + stderr: "", + exitCode: null, + raw: dispatchBody, + }; + } + const deadline = Date.now() + Math.max(waitMs, 1_000); let latest: unknown = null; while (Date.now() < deadline) { latest = coreInternalFetch(`/api/tasks/${encodeURIComponent(taskId)}`, { maxResponseBytes: 3_000_000 }); @@ -183,7 +214,7 @@ async function dispatchSsh(command: string, waitMs: number, remoteTimeoutMs: num taskId, status: "timeout", stdout: "", - stderr: `host.ssh task ${taskId} did not finish within ${Math.max(waitMs, remoteTimeoutMs + 10_000)}ms`, + stderr: `host.ssh task ${taskId} did not finish within ${Math.max(waitMs, 1_000)}ms`, exitCode: null, raw: latest, }; @@ -439,44 +470,6 @@ spec: `; } -function devE2EPipelineRunManifest(options: CiDevE2EOptions): string { - const deployRevisionLabel = options.deployCommit.slice(0, 40); - return `apiVersion: tekton.dev/v1 -kind: PipelineRun -metadata: - generateName: unidesk-dev-e2e-${options.runId}- - namespace: unidesk-ci - labels: - app.kubernetes.io/name: unidesk-dev-namespace-e2e - app.kubernetes.io/part-of: unidesk - unidesk.ai/ci-kind: dev-namespace-e2e - unidesk.ai/deploy-ref: master-deploy-json-dev - unidesk.ai/deploy-commit: ${JSON.stringify(deployRevisionLabel)} -spec: - pipelineRef: - name: unidesk-dev-namespace-e2e - taskRunTemplate: - serviceAccountName: unidesk-ci-runner - params: - - name: repo-url - value: ${JSON.stringify(options.repoUrl)} - - name: desired-ref - value: ${JSON.stringify(options.desiredRef)} - - name: deploy-commit - value: ${JSON.stringify(options.deployCommit)} - - name: environment - value: ${JSON.stringify(options.environment)} - - name: run-id - value: ${JSON.stringify(options.runId)} - - name: keep-namespace - value: ${JSON.stringify(options.keepNamespace ? "true" : "false")} - workspaces: - - name: shared-workspace - persistentVolumeClaim: - claimName: unidesk-ci-cache -`; -} - async function remoteCreatePipelineRun(manifest: string): Promise<string> { const encoded = Buffer.from(manifest, "utf8").toString("base64"); const token = randomUUID().replace(/-/gu, "").slice(0, 12); @@ -543,6 +536,68 @@ async function run(options: CiOptions): Promise<Record<string, unknown>> { }; } +async function runRemoteDevE2ELauncher(options: CiDevE2EOptions): Promise<DispatchResult> { + const scriptTimeoutMs = Math.max(options.scriptTimeoutMs, options.waitMs, 60_000); + const remoteTimeoutMs = scriptTimeoutMs + 120_000; + const waitMs = options.waitMs > 0 ? options.waitMs + 30_000 : 0; + const command = [ + "set -euo pipefail", + `run_id=${shellQuote(options.runId)}`, + `repo_url=${shellQuote(options.scriptRepo)}`, + `commit=${shellQuote(options.deployCommit)}`, + `script_path=${shellQuote(options.scriptPath)}`, + `desired_ref=${shellQuote(options.desiredRef)}`, + `environment=${shellQuote(options.environment)}`, + `keep_namespace=${shellQuote(options.keepNamespace ? "true" : "false")}`, + `timeout_ms=${shellQuote(String(scriptTimeoutMs))}`, + "work_dir=\"/tmp/unidesk-ci/$run_id\"", + "result_dir=\"/home/ubuntu/.unidesk/runs/$run_id\"", + "mkdir -p \"$work_dir\" \"$result_dir\"", + "launcher_log=\"$result_dir/launcher.log\"", + "exec > >(tee -a \"$launcher_log\") 2>&1", + "echo \"launcher_run_id=$run_id\"", + "echo \"launcher_repo=$repo_url\"", + "echo \"launcher_commit=$commit\"", + "echo \"launcher_script_path=$script_path\"", + "case \"$script_path\" in scripts/ci/*.sh) ;; *) echo \"invalid_script_path=$script_path\" >&2; exit 2 ;; esac", + "export DOCKER_CONFIG=/tmp/unidesk-ci-docker-config", + "mkdir -p \"$DOCKER_CONFIG\"", + "printf '{}\\n' > \"$DOCKER_CONFIG/config.json\"", + `build_proxy=${shellQuote(providerGatewayWsEgressProxyUrl)}`, + "export HTTP_PROXY=\"$build_proxy\" HTTPS_PROXY=\"$build_proxy\" ALL_PROXY=\"$build_proxy\"", + "export NO_PROXY=\"localhost,127.0.0.1,::1,host.docker.internal,.svc,.cluster.local,kubernetes.default.svc\"", + "if ! curl -fsSI --max-time 20 -x \"$build_proxy\" https://github.com >/dev/null; then", + " echo \"ci_provider_egress_proxy_unavailable=$build_proxy\" >&2", + " exit 1", + "fi", + "echo \"ci_provider_egress_proxy=provider-gateway-ws-egress:$build_proxy\"", + "repo_dir=\"$work_dir/repo\"", + "if [ ! -d \"$repo_dir/.git\" ]; then", + " git clone --no-checkout \"$repo_url\" \"$repo_dir\"", + "fi", + "git -C \"$repo_dir\" remote set-url origin \"$repo_url\"", + "git -C \"$repo_dir\" fetch --no-tags origin \"$commit\" || git -C \"$repo_dir\" fetch --no-tags origin '+refs/heads/*:refs/remotes/origin/*'", + "resolved=$(git -C \"$repo_dir\" rev-parse --verify \"$commit^{commit}\")", + "test \"$resolved\" = \"$commit\" || { echo \"resolved_commit_mismatch=$resolved expected=$commit\" >&2; exit 1; }", + "git -C \"$repo_dir\" cat-file -e \"$resolved:$script_path\"", + "git -C \"$repo_dir\" show \"$resolved:$script_path\" > \"$work_dir/runner.sh\"", + "chmod 700 \"$work_dir/runner.sh\"", + "echo \"runner_script_ready=$work_dir/runner.sh\"", + "runner_args=(", + " --run-id \"$run_id\"", + " --repo-url \"$repo_url\"", + " --desired-ref \"$desired_ref\"", + " --manifest-commit \"$commit\"", + " --environment \"$environment\"", + " --result-dir \"$result_dir\"", + " --timeout-ms \"$timeout_ms\"", + ")", + "if [ \"$keep_namespace\" = \"true\" ]; then runner_args+=(--keep-namespace); fi", + "bash \"$work_dir/runner.sh\" \"${runner_args[@]}\"", + ].join("\n"); + return dispatchSsh(command, waitMs, remoteTimeoutMs, options.waitMs > 0); +} + function resolveDeployDevManifest(desiredRef: string): DeployDevManifestSummary { const remoteRef = `refs/remotes/origin/${desiredRef}`; const fetch = runCommand(["git", "fetch", "--quiet", "origin", `+refs/heads/${desiredRef}:${remoteRef}`], repoRoot); @@ -556,6 +611,8 @@ function resolveDeployDevManifest(desiredRef: string): DeployDevManifestSummary if (record?.schemaVersion !== 2) throw new Error(`origin/${desiredRef}:deploy.json must use schemaVersion=2`); const environments = asRecord(record.environments); const dev = asRecord(environments?.dev); + const ci = asRecord(dev?.ci); + if (ci === null) throw new Error(`origin/${desiredRef}:deploy.json must contain environments.dev.ci`); const rawServices = Array.isArray(dev?.services) ? dev.services : []; const services = rawServices.map((item) => { const service = asRecord(item); @@ -570,6 +627,11 @@ function resolveDeployDevManifest(desiredRef: string): DeployDevManifestSummary deployCommit: deployCommitResult.stdout.trim(), desiredRef, environment: "dev", + ci: { + repo: requireManifestString(ci.repo, "environments.dev.ci.repo"), + scriptPath: requireCiScriptPath(ci.scriptPath), + timeoutMs: positiveManifestNumber(ci.timeoutMs, 1_800_000, "environments.dev.ci.timeoutMs"), + }, services, }; } @@ -580,68 +642,62 @@ function makeRunId(deployCommit: string): string { } async function runDevE2E(options: CiDevE2EOptions): Promise<Record<string, unknown>> { - if (!options.direct) { - const devopsResponse = coreInternalFetch("/api/microservices/devops/proxy/api/ci/dev-e2e/run", { - method: "POST", - body: { - repoUrl: options.repoUrl, - desiredRef: options.desiredRef, - environment: options.environment, - runId: options.runId, - keepNamespace: options.keepNamespace, - }, - maxResponseBytes: 2_000_000, - }); - const devopsBody = proxyBody(devopsResponse); - if (devopsBody?.ok === true) { - const pipelineRun = asString(devopsBody.pipelineRun); - const wait = pipelineRun.length > 0 ? await waitForPipelineRun(pipelineRun, options.waitMs) : null; - const waitSucceeded = wait === null || wait.exitCode === 0 || wait.stdout.trimStart().startsWith("True\tSucceeded\t"); - return { - ...devopsBody, - ok: waitSucceeded, - triggerMode: "devops-service", - wait: wait === null ? null : { - stdoutTail: wait.stdout.slice(-6000), - stderrTail: wait.stderr.slice(-6000), - }, - }; - } - return { - ok: false, - triggerMode: "devops-service", - error: "DevOps service trigger failed or did not return ok=true; use --direct only for CI bootstrap/recovery, never as a service deployment path.", - devopsResponse, - }; - } - const name = await remoteCreatePipelineRun(devE2EPipelineRunManifest(options)); - const wait = await waitForPipelineRun(name, options.waitMs); - const waitSucceeded = wait === null || wait.exitCode === 0 || wait.stdout.trimStart().startsWith("True\tSucceeded\t"); + const result = await runRemoteDevE2ELauncher(options); + const ok = result.ok && (result.exitCode === null || result.exitCode === 0); return { - ok: waitSucceeded, - pipelineRun: name, + ok, + runId: options.runId, namespace: "unidesk-ci", temporaryNamespace: `unidesk-ci-e2e-${options.runId}`, repoUrl: options.repoUrl, desiredRef: options.desiredRef, deployCommit: options.deployCommit, + scriptRepo: options.scriptRepo, + scriptPath: options.scriptPath, environment: options.environment, services: options.services, keepNamespace: options.keepNamespace, - triggerMode: "direct-maintenance", - wait: wait === null ? null : { - stdoutTail: wait.stdout.slice(-6000), - stderrTail: wait.stderr.slice(-6000), + triggerMode: "commit-pinned-ssh-launcher", + launcher: { + taskId: result.taskId, + status: result.status, + exitCode: result.exitCode, + stdoutTail: result.stdout.slice(-6000), + stderrTail: result.stderr.slice(-6000), }, + resultDir: `/home/ubuntu/.unidesk/runs/${options.runId}`, next: [ - `bun scripts/cli.ts ci logs ${name}`, + `bun scripts/cli.ts ci logs ${options.runId}`, "bun scripts/cli.ts ci status", ], }; } async function logs(name: string): Promise<Record<string, unknown>> { - if (name.length === 0) throw new Error("ci logs requires PipelineRun name"); + if (name.length === 0) throw new Error("ci logs requires run id or PipelineRun name"); + if (/^[a-z0-9]([-a-z0-9]{0,46}[a-z0-9])?$/u.test(name)) { + const result = await dispatchSsh([ + "set -euo pipefail", + `run_id=${shellQuote(name)}`, + "result_dir=\"/home/ubuntu/.unidesk/runs/$run_id\"", + "printf 'result_dir=%s\\n' \"$result_dir\"", + "found=0", + "if [ -f \"$result_dir/result.json\" ]; then found=1; echo '===== result.json'; cat \"$result_dir/result.json\"; fi", + "if [ -f \"$result_dir/launcher.log\" ]; then found=1; echo '===== launcher.log'; tail -n 160 \"$result_dir/launcher.log\"; fi", + "if [ -f \"$result_dir/runner.log\" ]; then found=1; echo '===== runner.log'; tail -n 240 \"$result_dir/runner.log\"; fi", + "if [ -f \"$result_dir/pods.log\" ]; then found=1; echo '===== pods.log'; tail -n 240 \"$result_dir/pods.log\"; fi", + "if [ \"$found\" = \"0\" ]; then echo \"no_run_files=$result_dir\" >&2; exit 42; fi", + ].join("\n"), 60_000, 45_000); + if (result.ok || result.exitCode !== 42) { + return { + ok: result.ok, + runId: name, + output: result.stdout, + stderr: result.stderr, + exitCode: result.exitCode, + }; + } + } const result = await runRemoteKubectl([ "set -euo pipefail", `kubectl get pipelinerun/${shellQuote(name)} -n unidesk-ci -o wide`, @@ -664,7 +720,7 @@ function help(): Record<string, unknown> { "bun scripts/cli.ts ci install", "bun scripts/cli.ts ci run --revision <commit>", "bun scripts/cli.ts ci run-dev-e2e --wait-ms 600000", - "bun scripts/cli.ts ci logs <pipelineRun>", + "bun scripts/cli.ts ci logs <runId>", ], tekton: { pipelineVersion: tektonPipelineVersion, @@ -676,9 +732,9 @@ function help(): Record<string, unknown> { }, }, runDevE2E: { - defaultTriggerMode: "devops-service", - directMaintenanceFlag: "--direct", + defaultTriggerMode: "commit-pinned-ssh-launcher", desiredState: "origin/master:deploy.json#environments.dev", + scriptSource: "origin/master:deploy.json#environments.dev.ci", }, }; } @@ -712,11 +768,13 @@ export async function runCiCommand(_config: UniDeskConfig, args: string[]): Prom desiredRef, deployCommit: manifest.deployCommit, environment: manifest.environment, + scriptRepo: manifest.ci.repo, + scriptPath: manifest.ci.scriptPath, + scriptTimeoutMs: manifest.ci.timeoutMs, services: manifest.services, runId, keepNamespace: boolFlag(args, "--keep-namespace"), waitMs, - direct: boolFlag(args, "--direct"), }); } if (action === "logs") return logs(nameArg ?? ""); diff --git a/scripts/src/deploy.ts b/scripts/src/deploy.ts index 933a4223..83085a0e 100644 --- a/scripts/src/deploy.ts +++ b/scripts/src/deploy.ts @@ -131,8 +131,8 @@ const nativeK3sInstallVersion = "v1.34.1+k3s1"; const nativeK3sImage = "rancher/k3s:v1.34.1-k3s1"; const nativeK3sCtrAddress = "/run/k3s/containerd/containerd.sock"; const unideskRepoUrl = "https://github.com/pikasTech/unidesk"; -const d601MaintenanceDeployAllowedServiceIds = new Set(["devops"]); -const devApplySupportedServiceIds = d601MaintenanceDeployAllowedServiceIds; +const d601MaintenanceDeployAllowedServiceIds = new Set<string>(); +const devApplySupportedServiceIds = new Set<string>(); const deployEnvironmentTargets: Record<DeployEnvironment, DeployEnvironmentTarget> = { dev: { environment: "dev", @@ -195,7 +195,7 @@ function deployHelp(action: string | undefined = undefined): Record<string, unkn }, options: [ { name: "--file <path>", default: defaultDeployFile, description: "Desired-state manifest path relative to the repo root. JSON and ESM JS manifests are supported, for example deploy.json or develop.js." }, - { name: "--env <dev|prod>", description: "Read the named environment from origin/master:deploy.json. Direct D601 apply is enabled only for DevOps bootstrap/repair." }, + { name: "--env <dev|prod>", description: "Read the named environment from origin/master:deploy.json. Direct D601 service apply is disabled in the current CI-only phase." }, { name: "--service <id>", description: "Limit reconcile to one service from the manifest." }, { name: "--dry-run", description: "Prepare and validate without mutating the target service." }, { name: "--force", description: "Redeploy even when the live commit appears up to date." }, @@ -665,20 +665,6 @@ function devK3sDeployService(id: string): UniDeskMicroserviceConfig | undefined allowedMethods: ["GET", "HEAD", "POST", "PUT", "PATCH", "DELETE"], allowedPathPrefixes: ["/", "/api/", "/logs"], }, - devops: { - name: "UniDesk DevOps Control", - description: "D601 k3s-managed DevOps control plane for normal CI trigger/status/log paths.", - dockerfile: "src/components/microservices/devops/Dockerfile", - composeFile: "src/components/microservices/k3sctl-adapter/k3s/devops.k3s.json", - composeService: "devops", - containerName: "k3s:devops", - nodeBaseUrl: "k3s://devops", - nodePort: 4286, - healthPath: "/health", - route: "/devops", - allowedMethods: ["GET", "HEAD", "POST"], - allowedPathPrefixes: ["/health", "/live", "/logs", "/api/"], - }, }; const spec = specs[id]; if (spec === undefined) return undefined; @@ -697,9 +683,9 @@ function devK3sDeployService(id: string): UniDeskMicroserviceConfig | undefined }, backend: { nodeBaseUrl: spec.nodeBaseUrl, - nodeBindHost: `k3s://${id === "devops" ? "unidesk-ci" : "unidesk-dev"}/${spec.composeService}`, + nodeBindHost: `k3s://unidesk-dev/${spec.composeService}`, nodePort: spec.nodePort, - proxyMode: id === "devops" ? "k3sctl-adapter-http" : "dev-k3s-direct", + proxyMode: "dev-k3s-direct", frontendOnly: true, public: false, allowedMethods: spec.allowedMethods, @@ -711,16 +697,14 @@ function devK3sDeployService(id: string): UniDeskMicroserviceConfig | undefined mode: "k3sctl-managed", adapterServiceId: "k3sctl-adapter", k3sServiceId: spec.composeService, - namespace: id === "devops" ? "unidesk-ci" : "unidesk-dev", + namespace: "unidesk-dev", expectedNodeIds: ["D601"], activeNodeId: "D601", }, development: { providerId: "D601", sshPassthrough: true, - worktreePath: id === "devops" - ? "/home/ubuntu/.unidesk/devops-deploy" - : id === "code-queue" + worktreePath: id === "code-queue" ? "/home/ubuntu/unidesk-dev-code-queue-deploy/code-queue" : `/home/ubuntu/unidesk-dev-core-deploy/${id}`, }, @@ -767,7 +751,7 @@ function selectServices(config: UniDeskConfig, manifest: DeployManifest, service if (manifest.environment === "dev") { const service = devK3sDeployService(desired.id); if (service === undefined) { - throw new Error(`deploy --env dev service ${desired.id} is not enabled in this executor yet; currently supported: ${[...devApplySupportedServiceIds].join(", ")}`); + throw new Error(`deploy --env dev service ${desired.id} is not enabled for direct rollout in the current CI-only phase`); } return { desired, config: service }; } @@ -1021,7 +1005,6 @@ function dockerBuildTimeoutMs(service: UniDeskMicroserviceConfig, options: Deplo function devK3sPrepullImages(service: UniDeskMicroserviceConfig): string[] { if (!isDevK3sDeployService(service)) return []; - if (service.id === "devops") return ["golang:1.23-bookworm", "debian:bookworm-slim"]; return ["oven/bun:1-alpine"]; } @@ -2186,7 +2169,7 @@ async function applyOneService(config: UniDeskConfig, service: UniDeskMicroservi ok: false, serviceId: service.id, skipped: true, - reason: `D601 maintenance-channel direct deployment is allowed only for ${[...d601MaintenanceDeployAllowedServiceIds].join(", ")} bootstrap/repair. Deploy ${service.id} through the DevOps control plane instead.`, + reason: `D601 maintenance-channel direct deployment is disabled for ${service.id}. Current dev automation is CI-only; use ci run-dev-e2e for the temporary namespace smoke runner.`, steps, }; } @@ -2361,7 +2344,7 @@ function blockedD601MaintenanceDeployServices(config: UniDeskConfig, manifest: D } function d601MaintenanceDeployBlockMessage(blocked: string[]): string { - return `D601 maintenance-channel direct deployment is allowed only for ${[...d601MaintenanceDeployAllowedServiceIds].join(", ")} bootstrap/repair; blocked services: ${blocked.join(", ")}. Use the DevOps control plane for other direct/managed microservices.`; + return `D601 maintenance-channel direct deployment is disabled for direct/managed services in the current CI-only phase; blocked services: ${blocked.join(", ")}. Use ci run-dev-e2e for dev smoke verification.`; } async function runApplyNow(config: UniDeskConfig, manifest: DeployManifest, options: DeployOptions): Promise<Record<string, unknown>> { @@ -2413,7 +2396,7 @@ export async function runDeployCommand(config: UniDeskConfig | null, args: strin if (options.environment !== "dev") throw new Error("deploy apply --env prod is not enabled yet"); const unsupported = unsupportedDevApplyServices(manifest, options.serviceId); if (unsupported.length > 0) { - throw new Error(`deploy apply --env dev currently supports only ${[...devApplySupportedServiceIds].join(", ")}; unsupported selected services: ${unsupported.join(", ")}`); + throw new Error(`deploy apply --env dev is disabled for direct service rollout in the current CI-only phase; unsupported selected services: ${unsupported.join(", ")}. Use ci run-dev-e2e for dev smoke verification.`); } if (config === null) throw new Error("deploy apply --env dev requires config.json"); if (!options.dryRun) { @@ -2438,5 +2421,5 @@ export async function runCodeQueueDeployCompatCommand(_config: UniDeskConfig, ar if (args.includes("--skip-build")) throw new Error("codex deploy is disabled; --skip-build is not supported"); const providerId = optionValue(args, ["--provider-id", "--provider"]) ?? "D601"; if (providerId !== "D601") throw new Error(`codex deploy compatibility path only supports D601; got ${providerId}`); - throw new Error("codex deploy is disabled because D601 maintenance-channel direct deployment is now reserved for DevOps bootstrap/repair. Use the DevOps control plane for Code Queue deployment."); + throw new Error("codex deploy is disabled because D601 maintenance-channel direct deployment must not deploy Code Queue. Current dev automation is CI-only; use ci run-dev-e2e for dev smoke verification."); } diff --git a/src/components/microservices/devops/Dockerfile b/src/components/microservices/devops/Dockerfile deleted file mode 100644 index c2f23022..00000000 --- a/src/components/microservices/devops/Dockerfile +++ /dev/null @@ -1,20 +0,0 @@ -ARG DEVOPS_GO_IMAGE=golang:1.23-bookworm -ARG DEVOPS_RUNTIME_IMAGE=debian:bookworm-slim - -FROM ${DEVOPS_GO_IMAGE} AS builder -WORKDIR /src -COPY src/components/microservices/devops/go.mod ./go.mod -COPY src/components/microservices/devops/main.go ./main.go -RUN CGO_ENABLED=0 GOOS=linux go build -trimpath -ldflags="-s -w" -o /out/unidesk-devops ./main.go - -FROM ${DEVOPS_RUNTIME_IMAGE} - -RUN apt-get update \ - && apt-get install -y --no-install-recommends ca-certificates curl git \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -COPY --from=builder /out/unidesk-devops /usr/local/bin/unidesk-devops - -EXPOSE 4286 -CMD ["/usr/local/bin/unidesk-devops"] diff --git a/src/components/microservices/devops/go.mod b/src/components/microservices/devops/go.mod deleted file mode 100644 index 7d48dfc6..00000000 --- a/src/components/microservices/devops/go.mod +++ /dev/null @@ -1,3 +0,0 @@ -module github.com/pikasTech/unidesk/src/components/microservices/devops - -go 1.23 diff --git a/src/components/microservices/devops/main.go b/src/components/microservices/devops/main.go deleted file mode 100644 index 3f62728e..00000000 --- a/src/components/microservices/devops/main.go +++ /dev/null @@ -1,739 +0,0 @@ -package main - -import ( - "bytes" - "context" - "crypto/rand" - "crypto/tls" - "crypto/x509" - "encoding/hex" - "encoding/json" - "errors" - "fmt" - "io" - "log" - "net/http" - "net/url" - "os" - "os/exec" - "path/filepath" - "regexp" - "strconv" - "strings" - "time" -) - -type jsonMap map[string]any - -type config struct { - Host string - Port int - Namespace string - RepoURL string - DesiredRef string - Environment string - GitProxyURL string - PipelineName string - PipelineServiceAccount string - WorkspaceClaim string - AppImage string - LogFile string -} - -type deployService struct { - ID string `json:"id"` - Repo string `json:"repo"` - CommitID string `json:"commitId"` -} - -type deploySummary struct { - DeployCommit string `json:"deployCommit"` - DesiredRef string `json:"desiredRef"` - Environment string `json:"environment"` - RepoURL string `json:"repoUrl"` - Services []deployService `json:"services"` -} - -type httpError struct { - Status int - Msg string - Detail jsonMap -} - -func (e *httpError) Error() string { - return e.Msg -} - -var ( - startedAt = time.Now().UTC().Format(time.RFC3339) - recentLogs []jsonMap - serviceConfig = readConfig() - refPattern = regexp.MustCompile(`^[A-Za-z0-9._/-]{1,160}$`) - runIDPattern = regexp.MustCompile(`^[a-z0-9]([-a-z0-9]{0,46}[a-z0-9])?$`) - commitIDPattern = regexp.MustCompile(`^[0-9a-f]{7,40}$`) -) - -func envString(name, fallback string) string { - value := os.Getenv(name) - if value == "" { - return fallback - } - return value -} - -func envInt(name string, fallback int) int { - raw := os.Getenv(name) - if raw == "" { - return fallback - } - value, err := strconv.Atoi(raw) - if err != nil || value <= 0 { - return fallback - } - return value -} - -func currentNamespace() string { - raw, err := os.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/namespace") - if err != nil { - return "unidesk-ci" - } - value := strings.TrimSpace(string(raw)) - if value == "" { - return "unidesk-ci" - } - return value -} - -func readConfig() config { - return config{ - Host: envString("HOST", "0.0.0.0"), - Port: envInt("PORT", 4286), - Namespace: envString("DEVOPS_NAMESPACE", currentNamespace()), - RepoURL: envString("DEVOPS_REPO_URL", "https://github.com/pikasTech/unidesk"), - DesiredRef: envString("DEVOPS_DESIRED_REF", "master"), - Environment: envString("DEVOPS_ENVIRONMENT", "dev"), - GitProxyURL: envString("DEVOPS_GIT_PROXY_URL", "http://d601-provider-egress-proxy.unidesk.svc.cluster.local:18789"), - PipelineName: envString("DEVOPS_DEV_E2E_PIPELINE", "unidesk-dev-namespace-e2e"), - PipelineServiceAccount: envString("DEVOPS_PIPELINE_SERVICE_ACCOUNT", "unidesk-ci-runner"), - WorkspaceClaim: envString("DEVOPS_PIPELINE_WORKSPACE_CLAIM", "unidesk-ci-cache"), - AppImage: envString("DEVOPS_DEV_E2E_APP_IMAGE", "unidesk-code-queue:dev"), - LogFile: envString("LOG_FILE", "/var/log/unidesk/devops.jsonl"), - } -} - -func appendLog(level, event string, detail jsonMap) { - record := jsonMap{"at": time.Now().UTC().Format(time.RFC3339), "service": "devops", "level": level, "event": event} - for key, value := range detail { - record[key] = value - } - recentLogs = append(recentLogs, record) - if len(recentLogs) > 300 { - recentLogs = recentLogs[len(recentLogs)-300:] - } - line, _ := json.Marshal(record) - log.Println(string(line)) - if serviceConfig.LogFile != "" { - _ = os.MkdirAll(filepath.Dir(serviceConfig.LogFile), 0o755) - if file, err := os.OpenFile(serviceConfig.LogFile, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644); err == nil { - _, _ = file.Write(append(line, '\n')) - _ = file.Close() - } - } -} - -func writeJSON(w http.ResponseWriter, status int, body any) { - w.Header().Set("content-type", "application/json; charset=utf-8") - w.WriteHeader(status) - _ = json.NewEncoder(w).Encode(body) -} - -func errorBody(err error) jsonMap { - body := jsonMap{"ok": false, "error": err.Error()} - var he *httpError - if errors.As(err, &he) { - for key, value := range he.Detail { - body[key] = value - } - } - return body -} - -func handleError(w http.ResponseWriter, err error) { - status := http.StatusInternalServerError - var he *httpError - if errors.As(err, &he) { - status = he.Status - } - appendLog(map[bool]string{true: "error", false: "warn"}[status >= 500], "request_failed", jsonMap{"status": status, "error": err.Error()}) - writeJSON(w, status, errorBody(err)) -} - -func readJSON(r *http.Request) (jsonMap, error) { - if r.Body == nil { - return jsonMap{}, nil - } - defer r.Body.Close() - body, err := io.ReadAll(io.LimitReader(r.Body, 1<<20)) - if err != nil { - return nil, err - } - if len(strings.TrimSpace(string(body))) == 0 { - return jsonMap{}, nil - } - var record jsonMap - if err := json.Unmarshal(body, &record); err != nil { - return nil, &httpError{Status: http.StatusBadRequest, Msg: "request body must be JSON"} - } - return record, nil -} - -func stringValue(value any) string { - text, _ := value.(string) - return text -} - -func boolValue(value any) bool { - switch item := value.(type) { - case bool: - return item - case string: - return item == "true" || item == "1" - case float64: - return item == 1 - default: - return false - } -} - -func requireRepoURL(value any) (string, error) { - repo := stringValue(value) - if repo == "" { - repo = serviceConfig.RepoURL - } - parsed, err := url.Parse(repo) - if err != nil || parsed.Scheme != "https" || parsed.Host == "" { - return "", &httpError{Status: http.StatusBadRequest, Msg: "repoUrl must be an https URL"} - } - return repo, nil -} - -func requireDesiredRef(value any) (string, error) { - ref := stringValue(value) - if ref == "" { - ref = serviceConfig.DesiredRef - } - if !refPattern.MatchString(ref) || strings.HasPrefix(ref, "-") || strings.Contains(ref, "..") { - return "", &httpError{Status: http.StatusBadRequest, Msg: "desired ref contains unsupported characters"} - } - return ref, nil -} - -func optionalRunID(value any) (string, error) { - runID := stringValue(value) - if runID == "" { - return "", nil - } - if !runIDPattern.MatchString(runID) { - return "", &httpError{Status: http.StatusBadRequest, Msg: "runId must be DNS-safe lowercase alnum/dash, max 48 chars"} - } - return runID, nil -} - -func gitEnv() []string { - env := os.Environ() - noProxy := "localhost,127.0.0.1,::1,d601-provider-egress-proxy,d601-provider-egress-proxy.unidesk,d601-provider-egress-proxy.unidesk.svc,d601-provider-egress-proxy.unidesk.svc.cluster.local" - add := map[string]string{ - "HTTP_PROXY": serviceConfig.GitProxyURL, - "HTTPS_PROXY": serviceConfig.GitProxyURL, - "ALL_PROXY": serviceConfig.GitProxyURL, - "NO_PROXY": noProxy, - "http_proxy": serviceConfig.GitProxyURL, - "https_proxy": serviceConfig.GitProxyURL, - "all_proxy": serviceConfig.GitProxyURL, - "no_proxy": noProxy, - } - for key, value := range add { - env = append(env, key+"="+value) - } - return env -} - -func runCommand(ctx context.Context, cwd string, args ...string) (string, string, error) { - cmd := exec.CommandContext(ctx, args[0], args[1:]...) - cmd.Dir = cwd - cmd.Env = gitEnv() - var stdout bytes.Buffer - var stderr bytes.Buffer - cmd.Stdout = &stdout - cmd.Stderr = &stderr - err := cmd.Run() - return stdout.String(), stderr.String(), err -} - -func resolveDeployManifest(repoURL, desiredRef, environment string) (deploySummary, error) { - dir, err := os.MkdirTemp("", "unidesk-devops-deploy-") - if err != nil { - return deploySummary{}, err - } - defer os.RemoveAll(dir) - - ctx, cancel := context.WithTimeout(context.Background(), 150*time.Second) - defer cancel() - if _, stderr, err := runCommand(ctx, dir, "git", "init", "-q"); err != nil { - return deploySummary{}, &httpError{Status: http.StatusBadGateway, Msg: "git init failed", Detail: jsonMap{"stderr": tail(stderr, 2000)}} - } - if _, stderr, err := runCommand(ctx, dir, "git", "remote", "add", "origin", repoURL); err != nil { - return deploySummary{}, &httpError{Status: http.StatusBadGateway, Msg: "git remote add failed", Detail: jsonMap{"stderr": tail(stderr, 2000)}} - } - if _, stderr, err := runCommand(ctx, dir, "git", "fetch", "--depth=1", "origin", desiredRef); err != nil { - return deploySummary{}, &httpError{Status: http.StatusBadGateway, Msg: "failed to fetch desired ref", Detail: jsonMap{"stderr": tail(stderr, 4000)}} - } - stdout, stderr, err := runCommand(ctx, dir, "git", "rev-parse", "FETCH_HEAD") - if err != nil { - return deploySummary{}, &httpError{Status: http.StatusBadGateway, Msg: "failed to resolve desired ref commit", Detail: jsonMap{"stderr": tail(stderr, 2000)}} - } - deployCommit := strings.TrimSpace(stdout) - stdout, stderr, err = runCommand(ctx, dir, "git", "show", "FETCH_HEAD:deploy.json") - if err != nil { - return deploySummary{}, &httpError{Status: http.StatusBadGateway, Msg: "failed to read deploy.json from desired ref", Detail: jsonMap{"stderr": tail(stderr, 4000)}} - } - return parseDeployManifest(stdout, repoURL, desiredRef, environment, deployCommit) -} - -func parseDeployManifest(raw, repoURL, desiredRef, environment, deployCommit string) (deploySummary, error) { - var parsed struct { - SchemaVersion int `json:"schemaVersion"` - Environments map[string]struct { - Services []deployService `json:"services"` - } `json:"environments"` - } - if err := json.Unmarshal([]byte(raw), &parsed); err != nil { - return deploySummary{}, &httpError{Status: http.StatusBadRequest, Msg: "deploy.json must be valid JSON"} - } - if parsed.SchemaVersion != 2 { - return deploySummary{}, &httpError{Status: http.StatusBadRequest, Msg: "deploy.json must use schemaVersion=2"} - } - env, ok := parsed.Environments[environment] - if !ok { - return deploySummary{}, &httpError{Status: http.StatusBadRequest, Msg: "deploy.json must contain requested environment", Detail: jsonMap{"environment": environment}} - } - if len(env.Services) == 0 { - return deploySummary{}, &httpError{Status: http.StatusBadRequest, Msg: "deploy.json environment must contain services", Detail: jsonMap{"environment": environment}} - } - for index, service := range env.Services { - service.CommitID = strings.ToLower(service.CommitID) - env.Services[index].CommitID = service.CommitID - if service.ID == "" || service.Repo == "" || !commitIDPattern.MatchString(service.CommitID) { - return deploySummary{}, &httpError{Status: http.StatusBadRequest, Msg: fmt.Sprintf("deploy.json environments.%s.services[%d] must contain id, repo and 7-40 char commitId", environment, index)} - } - } - return deploySummary{DeployCommit: deployCommit, DesiredRef: desiredRef, Environment: environment, RepoURL: repoURL, Services: env.Services}, nil -} - -func tail(value string, max int) string { - if len(value) <= max { - return value - } - return value[len(value)-max:] -} - -func randomSuffix() string { - var bytes [4]byte - if _, err := rand.Read(bytes[:]); err != nil { - return strconv.FormatInt(time.Now().UnixNano(), 36) - } - return hex.EncodeToString(bytes[:]) -} - -func makeRunID(deployCommit string) string { - stamp := time.Now().UTC().Format("20060102150405") - runID := fmt.Sprintf("%s-%s", stamp, strings.ToLower(deployCommit[:min(len(deployCommit), 8)])) - runID = regexp.MustCompile(`[^a-z0-9-]`).ReplaceAllString(runID, "-") - if len(runID) > 48 { - return runID[:48] - } - return runID -} - -func min(a, b int) int { - if a < b { - return a - } - return b -} - -func pipelineRunBody(summary deploySummary, runID string, keepNamespace bool) jsonMap { - return jsonMap{ - "apiVersion": "tekton.dev/v1", - "kind": "PipelineRun", - "metadata": jsonMap{ - "generateName": fmt.Sprintf("unidesk-dev-e2e-%s-", runID), - "namespace": serviceConfig.Namespace, - "labels": jsonMap{ - "app.kubernetes.io/name": "unidesk-dev-namespace-e2e", - "app.kubernetes.io/part-of": "unidesk", - "unidesk.ai/ci-kind": "dev-namespace-e2e", - "unidesk.ai/deploy-ref": "master-deploy-json-dev", - "unidesk.ai/deploy-commit": summary.DeployCommit[:min(len(summary.DeployCommit), 40)], - }, - }, - "spec": jsonMap{ - "pipelineRef": jsonMap{"name": serviceConfig.PipelineName}, - "taskRunTemplate": jsonMap{"serviceAccountName": serviceConfig.PipelineServiceAccount}, - "params": []jsonMap{ - {"name": "repo-url", "value": summary.RepoURL}, - {"name": "desired-ref", "value": summary.DesiredRef}, - {"name": "deploy-commit", "value": summary.DeployCommit}, - {"name": "environment", "value": summary.Environment}, - {"name": "run-id", "value": runID}, - {"name": "keep-namespace", "value": map[bool]string{true: "true", false: "false"}[keepNamespace]}, - {"name": "app-image", "value": serviceConfig.AppImage}, - }, - "workspaces": []jsonMap{ - {"name": "shared-workspace", "persistentVolumeClaim": jsonMap{"claimName": serviceConfig.WorkspaceClaim}}, - }, - }, - } -} - -func serviceAccountFile(name string) string { - return filepath.Join("/var/run/secrets/kubernetes.io/serviceaccount", name) -} - -func kubeClient() (*http.Client, error) { - caPEM, err := os.ReadFile(serviceAccountFile("ca.crt")) - if err != nil { - return nil, err - } - pool := x509.NewCertPool() - if !pool.AppendCertsFromPEM(caPEM) { - return nil, errors.New("failed to load service-account CA") - } - return &http.Client{Timeout: 120 * time.Second, Transport: &http.Transport{TLSClientConfig: &tls.Config{RootCAs: pool, MinVersion: tls.VersionTLS12}}}, nil -} - -func kubeURL(path string) string { - host := envString("KUBERNETES_SERVICE_HOST", "kubernetes.default.svc") - port := envString("KUBERNETES_SERVICE_PORT_HTTPS", envString("KUBERNETES_SERVICE_PORT", "443")) - return fmt.Sprintf("https://%s:%s%s", host, port, path) -} - -func kubeRequest(method, path string, body any) (int, []byte, error) { - client, err := kubeClient() - if err != nil { - return 0, nil, err - } - var reader io.Reader - if body != nil { - raw, err := json.Marshal(body) - if err != nil { - return 0, nil, err - } - reader = bytes.NewReader(raw) - } - req, err := http.NewRequest(method, kubeURL(path), reader) - if err != nil { - return 0, nil, err - } - token, err := os.ReadFile(serviceAccountFile("token")) - if err != nil { - return 0, nil, err - } - req.Header.Set("authorization", "Bearer "+strings.TrimSpace(string(token))) - req.Header.Set("accept", "application/json") - if body != nil { - req.Header.Set("content-type", "application/json") - } - res, err := client.Do(req) - if err != nil { - return 0, nil, err - } - defer res.Body.Close() - raw, err := io.ReadAll(io.LimitReader(res.Body, 8*1024*1024)) - if err != nil { - return res.StatusCode, nil, err - } - if res.StatusCode < 200 || res.StatusCode >= 300 { - return res.StatusCode, raw, &httpError{Status: res.StatusCode, Msg: "kubernetes api request failed", Detail: jsonMap{"path": path, "status": res.StatusCode, "body": tail(string(raw), 4000)}} - } - return res.StatusCode, raw, nil -} - -func metadataName(value any) string { - record, ok := value.(map[string]any) - if !ok { - return "" - } - metadata, ok := record["metadata"].(map[string]any) - if !ok { - return "" - } - return stringValue(metadata["name"]) -} - -func conditionSummary(value any) jsonMap { - record, ok := value.(map[string]any) - if !ok { - return jsonMap{"terminal": false, "status": "Unknown", "reason": "", "message": ""} - } - status, _ := record["status"].(map[string]any) - conditions, _ := status["conditions"].([]any) - for _, item := range conditions { - condition, ok := item.(map[string]any) - if !ok || condition["type"] != "Succeeded" { - continue - } - statusText := stringValue(condition["status"]) - return jsonMap{ - "terminal": statusText == "True" || statusText == "False", - "succeeded": statusText == "True", - "status": statusText, - "reason": stringValue(condition["reason"]), - "message": tail(stringValue(condition["message"]), 2000), - } - } - return jsonMap{"terminal": false, "status": "Unknown", "reason": "", "message": ""} -} - -func decodeJSON(raw []byte) any { - var value any - if err := json.Unmarshal(raw, &value); err != nil { - return jsonMap{"text": tail(string(raw), 4000)} - } - return value -} - -func listItems(value any) []any { - record, ok := value.(map[string]any) - if !ok { - return nil - } - items, _ := record["items"].([]any) - return items -} - -func compactTaskRun(value any) jsonMap { - record, _ := value.(map[string]any) - status, _ := record["status"].(map[string]any) - return jsonMap{ - "name": metadataName(value), - "condition": conditionSummary(value), - "podName": stringValue(status["podName"]), - "startTime": stringValue(status["startTime"]), - "completionTime": stringValue(status["completionTime"]), - } -} - -func compactPod(value any) jsonMap { - record, _ := value.(map[string]any) - status, _ := record["status"].(map[string]any) - spec, _ := record["spec"].(map[string]any) - return jsonMap{ - "name": metadataName(value), - "phase": stringValue(status["phase"]), - "nodeName": stringValue(spec["nodeName"]), - "podIP": stringValue(status["podIP"]), - "reason": stringValue(status["reason"]), - } -} - -func runStatus(name string) (jsonMap, error) { - namespace := url.PathEscape(serviceConfig.Namespace) - selector := url.QueryEscape("tekton.dev/pipelineRun=" + name) - _, pipelineRaw, err := kubeRequest("GET", fmt.Sprintf("/apis/tekton.dev/v1/namespaces/%s/pipelineruns/%s", namespace, url.PathEscape(name)), nil) - if err != nil { - return nil, err - } - _, taskRaw, _ := kubeRequest("GET", fmt.Sprintf("/apis/tekton.dev/v1/namespaces/%s/taskruns?labelSelector=%s", namespace, selector), nil) - _, podRaw, _ := kubeRequest("GET", fmt.Sprintf("/api/v1/namespaces/%s/pods?labelSelector=%s", namespace, selector), nil) - taskRuns := []jsonMap{} - for _, item := range listItems(decodeJSON(taskRaw)) { - taskRuns = append(taskRuns, compactTaskRun(item)) - } - pods := []jsonMap{} - for _, item := range listItems(decodeJSON(podRaw)) { - pods = append(pods, compactPod(item)) - } - pipeline := decodeJSON(pipelineRaw) - return jsonMap{"ok": true, "pipelineRun": name, "namespace": serviceConfig.Namespace, "condition": conditionSummary(pipeline), "taskRuns": taskRuns, "pods": pods}, nil -} - -func handleRunDevE2E(w http.ResponseWriter, r *http.Request) { - body, err := readJSON(r) - if err != nil { - handleError(w, err) - return - } - repoURL, err := requireRepoURL(body["repoUrl"]) - if err != nil { - handleError(w, err) - return - } - desiredRef, err := requireDesiredRef(body["desiredRef"]) - if err != nil { - handleError(w, err) - return - } - environment := stringValue(body["environment"]) - if environment == "" { - environment = serviceConfig.Environment - } - if environment != "dev" { - handleError(w, &httpError{Status: http.StatusBadRequest, Msg: "only environment=dev is enabled for dev e2e"}) - return - } - runID, err := optionalRunID(body["runId"]) - if err != nil { - handleError(w, err) - return - } - keepNamespace := boolValue(body["keepNamespace"]) - summary, err := resolveDeployManifest(repoURL, desiredRef, environment) - if err != nil { - handleError(w, err) - return - } - if runID == "" { - runID = makeRunID(summary.DeployCommit) - } - namespace := url.PathEscape(serviceConfig.Namespace) - _, raw, err := kubeRequest("POST", fmt.Sprintf("/apis/tekton.dev/v1/namespaces/%s/pipelineruns", namespace), pipelineRunBody(summary, runID, keepNamespace)) - if err != nil { - handleError(w, err) - return - } - name := metadataName(decodeJSON(raw)) - if name == "" { - name = "unknown-" + randomSuffix() - } - appendLog("info", "dev_e2e_started", jsonMap{"pipelineRun": name, "runId": runID, "deployCommit": summary.DeployCommit, "desiredRef": desiredRef, "environment": environment, "keepNamespace": keepNamespace}) - writeJSON(w, http.StatusOK, jsonMap{ - "ok": true, - "mode": "k3s-devops-managed", - "pipelineRun": name, - "namespace": serviceConfig.Namespace, - "temporaryNamespace": "unidesk-ci-e2e-" + runID, - "runId": runID, - "keepNamespace": keepNamespace, - "desiredRef": desiredRef, - "environment": environment, - "deployCommit": summary.DeployCommit, - "services": summary.Services, - "next": []string{"bun scripts/cli.ts ci logs " + name, "bun scripts/cli.ts ci status"}, - }) -} - -func handleLogs(w http.ResponseWriter, name string) { - namespace := url.PathEscape(serviceConfig.Namespace) - selector := url.QueryEscape("tekton.dev/pipelineRun=" + name) - _, podRaw, err := kubeRequest("GET", fmt.Sprintf("/api/v1/namespaces/%s/pods?labelSelector=%s", namespace, selector), nil) - if err != nil { - handleError(w, err) - return - } - logs := []jsonMap{} - for index, item := range listItems(decodeJSON(podRaw)) { - if index >= 12 { - break - } - podName := metadataName(item) - if podName == "" { - continue - } - _, raw, err := kubeRequest("GET", fmt.Sprintf("/api/v1/namespaces/%s/pods/%s/log?allContainers=true&tailLines=180", namespace, url.PathEscape(podName)), nil) - if err != nil { - logs = append(logs, jsonMap{"pod": podName, "ok": false, "error": err.Error()}) - } else { - logs = append(logs, jsonMap{"pod": podName, "ok": true, "text": tail(string(raw), 40000)}) - } - } - writeJSON(w, http.StatusOK, jsonMap{"ok": true, "pipelineRun": name, "namespace": serviceConfig.Namespace, "logs": logs}) -} - -func handleCIStatus(w http.ResponseWriter) { - namespace := url.PathEscape(serviceConfig.Namespace) - _, pipelinesRaw, _ := kubeRequest("GET", fmt.Sprintf("/apis/tekton.dev/v1/namespaces/%s/pipelines", namespace), nil) - _, tasksRaw, _ := kubeRequest("GET", fmt.Sprintf("/apis/tekton.dev/v1/namespaces/%s/tasks", namespace), nil) - _, runsRaw, _ := kubeRequest("GET", fmt.Sprintf("/apis/tekton.dev/v1/namespaces/%s/pipelineruns", namespace), nil) - writeJSON(w, http.StatusOK, jsonMap{ - "ok": true, - "service": "devops", - "namespace": serviceConfig.Namespace, - "mode": "k3s-devops-managed", - "pipelines": namesFromItems(decodeJSON(pipelinesRaw)), - "tasks": namesFromItems(decodeJSON(tasksRaw)), - "recentPipelineRuns": namesFromItems(decodeJSON(runsRaw)), - }) -} - -func namesFromItems(value any) []string { - names := []string{} - for _, item := range listItems(value) { - if name := metadataName(item); name != "" { - names = append(names, name) - } - } - if len(names) > 20 { - return names[len(names)-20:] - } - return names -} - -func router(w http.ResponseWriter, r *http.Request) { - if r.Method == http.MethodOptions { - writeJSON(w, http.StatusOK, jsonMap{"ok": true}) - return - } - switch { - case (r.URL.Path == "/" || r.URL.Path == "/health") && (r.Method == http.MethodGet || r.Method == http.MethodHead): - if r.Method == http.MethodHead { - w.WriteHeader(http.StatusOK) - return - } - writeJSON(w, http.StatusOK, jsonMap{ - "ok": true, - "service": "devops", - "startedAt": startedAt, - "namespace": serviceConfig.Namespace, - "mode": "k3s-devops-managed", - "normalControlPlane": "CLI -> backend-core -> k3sctl-adapter -> devops -> Kubernetes API/Tekton", - "breakGlass": "provider-gateway host.ssh remains bootstrap/recovery only", - }) - case r.URL.Path == "/live" && (r.Method == http.MethodGet || r.Method == http.MethodHead): - if r.Method == http.MethodHead { - w.WriteHeader(http.StatusOK) - return - } - writeJSON(w, http.StatusOK, jsonMap{"ok": true, "service": "devops", "startedAt": startedAt}) - case r.URL.Path == "/logs" && r.Method == http.MethodGet: - writeJSON(w, http.StatusOK, jsonMap{"ok": true, "logs": recentLogs}) - case r.URL.Path == "/api/ci/status" && r.Method == http.MethodGet: - handleCIStatus(w) - case r.URL.Path == "/api/ci/dev-e2e/run" && r.Method == http.MethodPost: - handleRunDevE2E(w, r) - case strings.HasPrefix(r.URL.Path, "/api/ci/runs/") && strings.HasSuffix(r.URL.Path, "/logs") && r.Method == http.MethodGet: - name := strings.TrimSuffix(strings.TrimPrefix(r.URL.Path, "/api/ci/runs/"), "/logs") - handleLogs(w, name) - case strings.HasPrefix(r.URL.Path, "/api/ci/runs/") && r.Method == http.MethodGet: - name := strings.TrimPrefix(r.URL.Path, "/api/ci/runs/") - status, err := runStatus(name) - if err != nil { - handleError(w, err) - return - } - writeJSON(w, http.StatusOK, status) - default: - writeJSON(w, http.StatusNotFound, jsonMap{"ok": false, "error": "not found"}) - } -} - -func main() { - appendLog("info", "service_started", jsonMap{"port": serviceConfig.Port, "namespace": serviceConfig.Namespace, "pipelineName": serviceConfig.PipelineName, "appImage": serviceConfig.AppImage}) - server := &http.Server{ - Addr: fmt.Sprintf("%s:%d", serviceConfig.Host, serviceConfig.Port), - Handler: http.HandlerFunc(router), - ReadHeaderTimeout: 5 * time.Second, - } - log.Fatal(server.ListenAndServe()) -} diff --git a/src/components/microservices/k3sctl-adapter/docker-compose.d601.yml b/src/components/microservices/k3sctl-adapter/docker-compose.d601.yml index b89a9b7e..dde350d9 100644 --- a/src/components/microservices/k3sctl-adapter/docker-compose.d601.yml +++ b/src/components/microservices/k3sctl-adapter/docker-compose.d601.yml @@ -41,7 +41,7 @@ services: K3SCTL_NATIVE_SERVICE_URL_MDTODO: "${K3SCTL_NATIVE_SERVICE_URL_MDTODO:-}" K3SCTL_NATIVE_SERVICE_URL_DECISION_CENTER: "${K3SCTL_NATIVE_SERVICE_URL_DECISION_CENTER:-}" K3SCTL_NATIVE_SERVICE_URL_DEVOPS: "${K3SCTL_NATIVE_SERVICE_URL_DEVOPS:-}" - K3SCTL_MANIFEST_PATHS: "${K3SCTL_MANIFEST_PATHS:-k3s/code-queue.k3s.json,k3s/mdtodo.k3s.json,k3s/claudeqq.k3s.json,k3s/decision-center.k3s.json,k3s/devops.k3s.json}" + K3SCTL_MANIFEST_PATHS: "${K3SCTL_MANIFEST_PATHS:-k3s/code-queue.k3s.json,k3s/mdtodo.k3s.json,k3s/claudeqq.k3s.json,k3s/decision-center.k3s.json}" K3SCTL_SERVICES_JSON: "${K3SCTL_SERVICES_JSON:-[]}" UNIDESK_LOG_RETENTION_BYTES: "${UNIDESK_LOG_RETENTION_BYTES:-512MiB}" volumes: diff --git a/src/components/microservices/k3sctl-adapter/k3s/devops.k3s.json b/src/components/microservices/k3sctl-adapter/k3s/devops.k3s.json deleted file mode 100644 index 2103acab..00000000 --- a/src/components/microservices/k3sctl-adapter/k3s/devops.k3s.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "apiVersion": "unidesk.ai/k3s/v1", - "kind": "ManagedKubernetesService", - "metadata": { - "name": "devops", - "namespace": "unidesk-ci" - }, - "spec": { - "adapterServiceId": "k3sctl-adapter", - "controlPlane": { - "type": "kubernetes", - "cluster": "unidesk-k3s", - "context": "unidesk-k3s" - }, - "route": { - "kind": "kubernetes-service", - "serviceName": "devops", - "servicePort": 4286 - }, - "activeInstanceId": "D601", - "singleWriter": true, - "expectedNodeIds": [ - "D601" - ], - "instances": [ - { - "id": "D601", - "nodeId": "D601", - "role": "primary", - "baseUrl": "kubernetes://unidesk-ci/services/devops:4286", - "healthPath": "/health", - "healthMode": "service-proxy" - } - ], - "requireAllInstancesHealthy": true - } -} diff --git a/src/components/microservices/k3sctl-adapter/k3s/devops.k8s.yaml b/src/components/microservices/k3sctl-adapter/k3s/devops.k8s.yaml deleted file mode 100644 index 1e420d44..00000000 --- a/src/components/microservices/k3sctl-adapter/k3s/devops.k8s.yaml +++ /dev/null @@ -1,171 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: devops - namespace: unidesk-ci - labels: - app.kubernetes.io/name: devops - app.kubernetes.io/part-of: unidesk ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: devops - namespace: unidesk-ci - labels: - app.kubernetes.io/name: devops - app.kubernetes.io/part-of: unidesk -rules: - - apiGroups: [""] - resources: ["pods"] - verbs: ["get", "list", "watch"] - - apiGroups: [""] - resources: ["pods/log"] - verbs: ["get", "list"] - - apiGroups: ["tekton.dev"] - resources: ["pipelines", "tasks", "taskruns"] - verbs: ["get", "list", "watch"] - - apiGroups: ["tekton.dev"] - resources: ["pipelineruns"] - verbs: ["get", "list", "watch", "create"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: devops - namespace: unidesk-ci - labels: - app.kubernetes.io/name: devops - app.kubernetes.io/part-of: unidesk -subjects: - - kind: ServiceAccount - name: devops - namespace: unidesk-ci -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: devops ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: devops - namespace: unidesk-ci - labels: - app.kubernetes.io/name: devops - app.kubernetes.io/part-of: unidesk - unidesk.ai/deployment-mode: k3sctl-managed - unidesk.ai/instance-id: D601 - unidesk.ai/deploy-service-id: devops -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: devops - unidesk.ai/instance-id: D601 - template: - metadata: - labels: - app.kubernetes.io/name: devops - app.kubernetes.io/part-of: unidesk - unidesk.ai/deployment-mode: k3sctl-managed - unidesk.ai/instance-id: D601 - unidesk.ai/node-id: D601 - unidesk.ai/deploy-service-id: devops - annotations: - unidesk.ai/deploy-service-id: devops - unidesk.ai/deploy-repo: https://github.com/pikasTech/unidesk - unidesk.ai/deploy-commit: replace-with-deploy-env-commit - unidesk.ai/deploy-requested-commit: replace-with-deploy-env-commit - spec: - serviceAccountName: devops - nodeSelector: - unidesk.ai/node-id: D601 - terminationGracePeriodSeconds: 10 - containers: - - name: devops - image: unidesk-devops:dev-placeholder - imagePullPolicy: IfNotPresent - ports: - - name: http - containerPort: 4286 - env: - - name: HOST - value: "0.0.0.0" - - name: PORT - value: "4286" - - name: DEVOPS_NAMESPACE - value: "unidesk-ci" - - name: DEVOPS_REPO_URL - value: "https://github.com/pikasTech/unidesk" - - name: DEVOPS_DESIRED_REF - value: "master" - - name: DEVOPS_ENVIRONMENT - value: "dev" - - name: UNIDESK_DEPLOY_SERVICE_ID - value: "devops" - - name: UNIDESK_DEPLOY_REPO - value: "https://github.com/pikasTech/unidesk" - - name: UNIDESK_DEPLOY_COMMIT - value: replace-with-deploy-env-commit - - name: UNIDESK_DEPLOY_REQUESTED_COMMIT - value: replace-with-deploy-env-commit - - name: DEVOPS_GIT_PROXY_URL - value: "http://d601-provider-egress-proxy.unidesk.svc.cluster.local:18789" - - name: DEVOPS_DEV_E2E_PIPELINE - value: "unidesk-dev-namespace-e2e" - - name: DEVOPS_PIPELINE_SERVICE_ACCOUNT - value: "unidesk-ci-runner" - - name: DEVOPS_PIPELINE_WORKSPACE_CLAIM - value: "unidesk-ci-cache" - - name: DEVOPS_DEV_E2E_APP_IMAGE - value: "unidesk-code-queue:dev" - - name: LOG_FILE - value: "/var/log/unidesk/devops.jsonl" - volumeMounts: - - name: logs - mountPath: /var/log/unidesk - readinessProbe: - httpGet: - path: /health - port: http - periodSeconds: 5 - timeoutSeconds: 2 - failureThreshold: 12 - livenessProbe: - httpGet: - path: /live - port: http - periodSeconds: 10 - timeoutSeconds: 2 - failureThreshold: 6 - resources: - requests: - cpu: 20m - memory: 48Mi - limits: - memory: 160Mi - volumes: - - name: logs - hostPath: - path: /home/ubuntu/.unidesk/devops-deploy/logs - type: DirectoryOrCreate ---- -apiVersion: v1 -kind: Service -metadata: - name: devops - namespace: unidesk-ci - labels: - app.kubernetes.io/name: devops - app.kubernetes.io/part-of: unidesk - unidesk.ai/deployment-mode: k3sctl-managed -spec: - type: ClusterIP - selector: - app.kubernetes.io/name: devops - unidesk.ai/instance-id: D601 - ports: - - name: http - port: 4286 - targetPort: http diff --git a/src/components/microservices/k3sctl-adapter/src/index.ts b/src/components/microservices/k3sctl-adapter/src/index.ts index 29abe0fc..91dd0839 100644 --- a/src/components/microservices/k3sctl-adapter/src/index.ts +++ b/src/components/microservices/k3sctl-adapter/src/index.ts @@ -274,7 +274,7 @@ function mergeServices(services: ManagedService[]): ManagedService[] { } function readConfig(): RuntimeConfig { - const paths = manifestPaths(envString("K3SCTL_MANIFEST_PATHS", "k3s/code-queue.k3s.json,k3s/mdtodo.k3s.json,k3s/claudeqq.k3s.json,k3s/decision-center.k3s.json,k3s/devops.k3s.json")); + const paths = manifestPaths(envString("K3SCTL_MANIFEST_PATHS", "k3s/code-queue.k3s.json,k3s/mdtodo.k3s.json,k3s/claudeqq.k3s.json,k3s/decision-center.k3s.json")); const inlineServices = parseServices(envString("K3SCTL_SERVICES_JSON", "[]")); const manifestServices = readManifestServices(paths); return {