From bfaea963ad9028f2d96b5b9505125a735934c3ae Mon Sep 17 00:00:00 2001 From: Codex Date: Sun, 17 May 2026 18:33:25 +0000 Subject: [PATCH] fix: prewarm d601 dev k3s images --- AGENTS.md | 2 +- docs/reference/cli.md | 1 + docs/reference/deploy.md | 2 + docs/reference/microservices.md | 2 +- scripts/cli.ts | 2 +- scripts/src/dev-env.ts | 188 ++++++++++++++++++++++++++++++-- 6 files changed, 187 insertions(+), 10 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 33d3c371..f6c04e32 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -37,7 +37,7 @@ UniDesk 是一个以主 server 为统一入口的分布式工作平台;本文 - `bun scripts/cli.ts microservice list/status/health/diagnostics/tunnel-self-test/proxy`:管理和验证挂载在主 server、计算节点 Docker 或 k3s 控制面上的用户服务,`proxy` 支持受控 JSON body,OA Event Flow/Todo Note/Baidu Netdisk/Code Queue Manager on main-server、k3s Control/Code Queue 执行面/MDTODO/Decision Center/FindJob/Pipeline/MET Nonlinear on D601 的规则见 `docs/reference/microservices.md`。 - `bun scripts/cli.ts decision upload/list/show/health`:通过 backend-core 用户服务代理上传会议记录/决议 Markdown、列出记录和查看详情;Decision Center 运行在 D601 k3s,规则见 `docs/reference/microservices.md`。 - `bun scripts/cli.ts deploy check/plan/apply [--file deploy.json] [--service ]`:按根目录 `deploy.json` 的服务 repo 和 commit 期望状态校验或更新用户服务,目标侧自行 fetch、构建、部署和 live commit 验证;规则见 `docs/reference/deploy.md`。 -- `bun scripts/cli.ts dev-env validate [--manifest path] [--kubectl-dry-run]`:离线校验 D601 `unidesk-dev` namespace、dev PostgreSQL 底座和 dev backend/frontend manifest 的生产隔离护栏,规则见 `docs/reference/deploy.md` 与 `docs/reference/microservices.md`。 +- `bun scripts/cli.ts dev-env validate [--manifest path] [--kubectl-dry-run]` / `dev-env prewarm-images`:离线校验 D601 `unidesk-dev` 生产隔离护栏,或把开发底座基础镜像预热到 D601 原生 k3s containerd,规则见 `docs/reference/deploy.md` 与 `docs/reference/microservices.md`。 - `bun scripts/cli.ts ci install/status/run/logs`:在 D601 原生 k3s 上安装和运行 Tekton CI,只做每 commit 检查和 Code Queue 只读性能门禁,不部署 CD;规则见 `docs/reference/ci.md`。 - `bun scripts/cli.ts codex deploy `:Code Queue 兼容部署入口,会生成临时 desired manifest 并调用 `deploy apply --service code-queue` 的同一条 target-side build 与 live commit 验证路径;规则见 `docs/reference/codex-deploy.md`。 - `bun scripts/cli.ts codex submit [prompt] [--prompt-file path|--prompt-stdin] [--queue ]`:通过 backend-core 私有代理提交 Code Queue 任务;控制面默认走主 server `code-queue-mgr` 写入 PostgreSQL,`--dry-run` 可只检查请求体不入队,规则见 `docs/reference/cli.md`。 diff --git a/docs/reference/cli.md b/docs/reference/cli.md index 90c055db..6529a6fd 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -23,6 +23,7 @@ UniDesk 的统一 CLI 入口是根目录 `scripts/cli.ts`,运行方式固定 - `decision upload/list/show/health` 通过 backend-core 用户服务代理访问 D601 k3s Decision Center,用于上传会议记录/决议 Markdown、列出权威记录、查看详情和健康检查;它不得直连 D601 Service、NodePort 或 provider-gateway 业务 HTTP。 - `deploy check/plan/apply` 默认从根目录 `deploy.json` 读取服务 repo 与 commit 期望状态,join `config.json` 和现有 manifest 后使用 target-side build 单一路径校验或更新直管服务与 k3s 代管服务;`deploy plan --env dev|prod` 在 Phase 0 只从固定 Git ref 读取 manifest 并输出 dry-run 环境计划,不使用本地 dirty worktree;规则见 `docs/reference/deploy.md`。 - `dev-env validate [--manifest path] [--kubectl-dry-run]` 离线校验 D601 `unidesk-dev` namespace、dev PostgreSQL 底座和 dev backend/frontend manifest。默认检查 `src/components/microservices/k3sctl-adapter/k3s/dev/unidesk-dev-foundation.k8s.yaml`;也可显式校验 `src/components/microservices/k3sctl-adapter/k3s/dev/unidesk-dev-core.k8s.yaml`。所有 namespaced 对象必须只落到 `unidesk-dev`,foundation manifest 必须包含 `postgres-dev` StatefulSet/Service、dev secret/config、迁移 Job 和 DB URL guard,core manifest 必须包含 `backend-core-dev`/`frontend-dev` Deployment/Service。加 `--kubectl-dry-run` 时额外执行 `kubectl apply --dry-run=client --validate=false -f `,仍不 apply 资源。 +- `dev-env prewarm-images [--image image] [--provider-id D601] [--no-pull] [--proxy-url URL] [--pull-timeout-ms N] [--dry-run]` 创建异步 job,通过 UniDesk SSH 维护桥在 D601 上把开发底座依赖镜像从 Docker 缓存导入原生 k3s containerd。默认镜像是 `postgres:16-alpine` 和 `rancher/mirrored-library-busybox:1.36.1`,用于避免 `postgres-dev` 与 local-path helper pod 卡在外部 registry 拉取。该命令固定验证 `/etc/rancher/k3s/k3s.yaml` 指向的 native k3s 上下文,并输出 `dev_env_containerd_image_ready=...` 作为成功判据;它不 apply manifest、不修改生产 `unidesk` namespace。 - `codex deploy ` 是 Code Queue 兼容部署入口,会生成临时 desired manifest 并调用 `deploy apply --service code-queue` 的同一条 target-side build、k3s import、rollout 和 live commit 验证路径;详细规则见 `docs/reference/codex-deploy.md`。 - `codex submit [prompt] [--prompt-file path|--prompt-stdin] [--queue queueId] [--provider-id id] [--cwd path] [--model model] [--reasoning-effort effort] [--execution-mode mode] [--max-attempts N] [--reference-task-id id] [--dry-run]` 通过 backend-core 私有代理向稳定 `code-queue` 用户服务路径提交任务;prompt 必须且只能来自位置参数、文件或 stdin 之一,`--dry-run` 只返回结构化请求且不实际入队。提交确认和 dry-run 必须返回完整 prompt、字符数和 `truncated=false`,不能套用任务详情的预览截断策略,否则长任务 prompt 无法被人工验收。backend-core 默认把提交、队列 CRUD、已读状态、历史摘要和轻量 Trace 读取分流到主 server `code-queue-mgr`,由它写入主 PostgreSQL;D601 scheduler 只轮询并执行已入库任务。 - `codex task ` 通过 Code Queue 私有代理按任务 ID 查询结构化执行摘要;默认只返回有界 prompt/response 预览、执行 Provider、工作目录、最后 assistant message、最近工具调用摘要、attempt、judge、错误、耗时和 trace 翻页提示,适合在新队列任务中引用历史 session 且避免噪声爆炸。该摘要读取默认由主 server `code-queue-mgr` 从 PostgreSQL 返回,不依赖 D601 `code-queue-read` Service 可用。 diff --git a/docs/reference/deploy.md b/docs/reference/deploy.md index afa511b3..6951e3da 100644 --- a/docs/reference/deploy.md +++ b/docs/reference/deploy.md @@ -48,6 +48,8 @@ It may create resources only in `unidesk-dev`: The manifest must not create, update, or delete production namespace resources, production DB objects, production PVCs, production Deployments/Services/Secrets, or main server Docker Compose services. Static validation is available through `bun scripts/cli.ts dev-env validate`; Kubernetes client dry-run is `bun scripts/cli.ts dev-env validate --kubectl-dry-run`. If applying manually during Phase 2, the only allowed apply target is this manifest and the post-check must prove production resources are unchanged, for example by comparing `kubectl -n unidesk get deploy,sts,svc,secret,pvc -o name` before and after. +Before applying the foundation on a fresh D601 native k3s runtime, run `bun scripts/cli.ts dev-env prewarm-images` and wait for the returned job to succeed. This imports the foundation images `postgres:16-alpine` and `rancher/mirrored-library-busybox:1.36.1` from Docker into `/run/k3s/containerd/containerd.sock`; k3s/containerd must not depend on live Docker Hub pulls during rollout. If this step is skipped, `postgres-dev` or the local-path helper pod can remain `ImagePullBackOff`, leaving the PVC pending even though the manifest is valid. + Phase 2 guardrails are deliberately limited to the dev manifest and CLI validator. Runtime startup guards for dev backend-core, Code Queue and Code Queue Manager must be reviewed and shipped as a separate change before dev workloads are exposed beyond dry-run or controlled apply. On D601, dev/prod k3s verification must use the native k3s kubeconfig explicitly: `KUBECONFIG=/etc/rancher/k3s/k3s.yaml`. The default `kubectl` context may point at Docker Desktop and is not an acceptable target for UniDesk k3s deploy validation. diff --git a/docs/reference/microservices.md b/docs/reference/microservices.md index ded6ebf7..710fb3ba 100644 --- a/docs/reference/microservices.md +++ b/docs/reference/microservices.md @@ -157,7 +157,7 @@ D601 开发环境底座只允许创建 `unidesk-dev` namespace 与 dev 专用对 `postgres-dev` 是 dev backend-core 与 dev Code Queue 状态的默认唯一数据库。dev 运行时必须使用 `postgres-dev.unidesk-dev.svc.cluster.local:5432/unidesk_dev` 和 dev Provider 身份 `D601-dev`;不得共享生产 `d601-tcp-egress-gateway.../unidesk`。当前 Phase 2 只提供 manifest 脚本和 `dev-env validate` 的静态护栏;backend-core、Code Queue 和 Code Queue Manager 的运行时启动护栏需在后续阶段单独评审后接入。 -验收入口:先运行 `bun scripts/cli.ts dev-env validate` 做静态资源与 DB URL 护栏检查;具备 D601 kubeconfig 时运行 `bun scripts/cli.ts dev-env validate --kubectl-dry-run` 做 Kubernetes client dry-run。若实际 apply,只能 apply 到 `unidesk-dev`,随后用 `kubectl -n unidesk-dev get pods,svc,pvc` 验证 dev DB ready,并对比 apply 前后的 `kubectl -n unidesk get deploy,sts,svc,secret,pvc -o name` 证明生产 workload 未变化。 +验收入口:先运行 `bun scripts/cli.ts dev-env validate` 做静态资源与 DB URL 护栏检查;具备 D601 kubeconfig 时运行 `bun scripts/cli.ts dev-env validate --kubectl-dry-run` 做 Kubernetes client dry-run。首次或镜像缓存不确定时,先运行 `bun scripts/cli.ts dev-env prewarm-images`,把 `postgres:16-alpine` 和 local-path helper 所需的 `rancher/mirrored-library-busybox:1.36.1` 导入 D601 原生 k3s containerd;否则 D601 的 Docker 代理/缓存正常也不能保证 k3s/containerd 能实时拉到外部镜像。若实际 apply,只能 apply 到 `unidesk-dev`,随后用 `kubectl -n unidesk-dev get pods,svc,pvc` 验证 dev DB ready,并对比 apply 前后的 `kubectl -n unidesk get deploy,sts,svc,secret,pvc -o name` 证明生产 workload 未变化。 D601 上必须显式使用原生 k3s kubeconfig:`KUBECONFIG=/etc/rancher/k3s/k3s.yaml`。默认 `kubectl` context 可能是 Docker Desktop,不能作为 UniDesk k3s deploy 或 dry-run 验收目标。 diff --git a/scripts/cli.ts b/scripts/cli.ts index 240ef6f2..33147f19 100644 --- a/scripts/cli.ts +++ b/scripts/cli.ts @@ -55,7 +55,7 @@ function help(): unknown { { command: "decision list [--type ...] [--status ...] [--level ...] [--linked-goal-id id] [--limit N]", description: "List Decision Center records through the user-service proxy." }, { command: "decision show ", description: "Show one Decision Center record." }, { command: "deploy check|plan|apply [--file deploy.json|--env dev|prod] [--service id] [--dry-run] [--force]", description: "Reconcile services from a repo+commit manifest; --env uses fixed environment refs for dry-run planning in Phase 0." }, - { command: "dev-env validate [--manifest path] [--kubectl-dry-run]", description: "Validate the D601 unidesk-dev namespace/database foundation manifest and production DB URL guardrails without applying resources." }, + { command: "dev-env validate|prewarm-images", description: "Validate D601 unidesk-dev guardrails or prewarm dev foundation images into native k3s containerd through a bounded async job." }, { command: "schedule list|get|runs|run|delete", description: "Manage backend-core scheduled tasks and run history; schedule run supports --wait-ms N." }, { command: "schedule upsert-pgdata-backup [--time HH:MM] [--remote-base /SERVER_DATA/UNIDESK_PG_DATA]", description: "Create or update the daily PGDATA physical backup task that uploads monthly rotated archives to Baidu Netdisk." }, { command: "codex deploy [--provider-id D601] [--timeout-ms N]", description: "Compatibility wrapper for deploy apply --service code-queue with a temporary repo+commit manifest." }, diff --git a/scripts/src/dev-env.ts b/scripts/src/dev-env.ts index 923fa410..0bfd2e52 100644 --- a/scripts/src/dev-env.ts +++ b/scripts/src/dev-env.ts @@ -1,10 +1,17 @@ import { readFileSync } from "node:fs"; import { runCommand } from "./command"; import { repoRoot, rootPath } from "./config"; +import { startJob } from "./jobs"; const defaultManifest = "src/components/microservices/k3sctl-adapter/k3s/dev/unidesk-dev-foundation.k8s.yaml"; const devNamespace = "unidesk-dev"; const prodNamespace = "unidesk"; +const defaultProviderId = "D601"; +const defaultProxyUrl = "http://127.0.0.1:18789"; +const defaultPrewarmImages = [ + "postgres:16-alpine", + "rancher/mirrored-library-busybox:1.36.1", +]; const foundationRequiredKinds = new Set([ "Namespace/unidesk-dev", "Secret/unidesk-dev-runtime-secrets", @@ -30,17 +37,36 @@ interface ManifestDocument { namespace: string | null; } -interface DevEnvOptions { +interface ValidateOptions { manifestPath: string; kubectlDryRun: boolean; } +interface PrewarmImagesOptions { + providerId: string; + images: string[]; + proxyUrl: string; + pullMissing: boolean; + pullTimeoutMs: number; + dryRun: boolean; +} + function isHelpArg(arg: string | undefined): boolean { return arg === "help" || arg === "--help" || arg === "-h"; } -function parseOptions(args: string[]): DevEnvOptions { - const options: DevEnvOptions = { manifestPath: defaultManifest, kubectlDryRun: false }; +function positiveInteger(value: string | undefined, option: string): number { + const parsed = Number(value); + if (!Number.isInteger(parsed) || parsed <= 0) throw new Error(`${option} must be a positive integer`); + return parsed; +} + +function rejectUnsafeToken(value: string, option: string): void { + if (/[\s\x00-\x1f\x7f]/u.test(value)) throw new Error(`${option} must not contain whitespace or control characters`); +} + +function parseValidateOptions(args: string[]): ValidateOptions { + const options: ValidateOptions = { manifestPath: defaultManifest, kubectlDryRun: false }; for (let index = 0; index < args.length; index += 1) { const arg = args[index]; if (arg === "--manifest") { @@ -57,6 +83,51 @@ function parseOptions(args: string[]): DevEnvOptions { return options; } +function parsePrewarmImagesOptions(args: string[]): PrewarmImagesOptions { + const images: string[] = []; + const options: PrewarmImagesOptions = { + providerId: defaultProviderId, + images, + proxyUrl: defaultProxyUrl, + pullMissing: true, + pullTimeoutMs: 300_000, + dryRun: false, + }; + for (let index = 0; index < args.length; index += 1) { + const arg = args[index]; + if (arg === "--provider-id") { + const value = args[index + 1]; + if (value === undefined || value.length === 0) throw new Error("--provider-id requires a value"); + rejectUnsafeToken(value, "--provider-id"); + options.providerId = value; + index += 1; + } else if (arg === "--image") { + const value = args[index + 1]; + if (value === undefined || value.length === 0) throw new Error("--image requires a value"); + rejectUnsafeToken(value, "--image"); + images.push(value); + index += 1; + } else if (arg === "--proxy-url") { + const value = args[index + 1]; + if (value === undefined || value.length === 0) throw new Error("--proxy-url requires a value"); + rejectUnsafeToken(value, "--proxy-url"); + options.proxyUrl = value; + index += 1; + } else if (arg === "--pull-timeout-ms") { + options.pullTimeoutMs = positiveInteger(args[index + 1], "--pull-timeout-ms"); + index += 1; + } else if (arg === "--no-pull") { + options.pullMissing = false; + } else if (arg === "--dry-run") { + options.dryRun = true; + } else if (!isHelpArg(arg)) { + throw new Error(`unknown dev-env prewarm-images option: ${arg}`); + } + } + if (options.images.length === 0) options.images = [...defaultPrewarmImages]; + return options; +} + function scalarAfter(text: string, key: string): string | null { const match = text.match(new RegExp(`^\\s*${key}:\\s*"?([^"\\n#]+)"?\\s*(?:#.*)?$`, "mu")); return match?.[1]?.trim() ?? null; @@ -135,17 +206,92 @@ function kubectlDryRun(manifestPath: string): unknown { }; } +function shellQuote(value: string): string { + return `'${value.replace(/'/g, "'\\''")}'`; +} + +function prewarmImagesScript(options: PrewarmImagesOptions): string { + const imageArray = options.images.map(shellQuote).join(" "); + const pullTimeoutSeconds = Math.max(1, Math.ceil(options.pullTimeoutMs / 1000)); + return [ + "set -euo pipefail", + "root_exec() {", + " if [ \"$(id -u)\" = \"0\" ]; then \"$@\"; return; fi", + " if sudo -n true >/dev/null 2>&1; then sudo -n \"$@\"; return; fi", + " if [ -x /mnt/c/Windows/System32/wsl.exe ]; then /mnt/c/Windows/System32/wsl.exe -u root -- \"$@\"; return; fi", + " echo 'dev_env_native_k3s_root_access=missing' >&2", + " return 1", + "}", + "normalize_image() {", + " image=\"$1\"", + " case \"$image\" in", + " *@*) printf '%s\\n' \"$image\" ;;", + " docker.io/*|ghcr.io/*|gcr.io/*|quay.io/*|cgr.dev/*|registry.*/*|localhost/*|*.*/*) printf '%s\\n' \"$image\" ;;", + " */*) printf 'docker.io/%s\\n' \"$image\" ;;", + " *) printf 'docker.io/library/%s\\n' \"$image\" ;;", + " esac", + "}", + `images=(${imageArray})`, + `proxy_url=${shellQuote(options.proxyUrl)}`, + `pull_missing=${options.pullMissing ? "1" : "0"}`, + `pull_timeout_seconds=${pullTimeoutSeconds}`, + "kubeconfig=/etc/rancher/k3s/k3s.yaml", + "ctr_address=/run/k3s/containerd/containerd.sock", + "export DOCKER_CONFIG=/tmp/unidesk-dev-env-docker-config", + "mkdir -p \"$DOCKER_CONFIG\"", + "printf '{}\\n' > \"$DOCKER_CONFIG/config.json\"", + "printf 'dev_env_k3s_context='", + "KUBECONFIG=\"$kubeconfig\" kubectl config current-context", + "printf 'dev_env_k3s_nodes='", + "KUBECONFIG=\"$kubeconfig\" kubectl get nodes -o name | tr '\\n' ' '", + "printf '\\n'", + "for image in \"${images[@]}\"; do", + " if docker image inspect \"$image\" >/dev/null 2>&1; then", + " echo dev_env_image_cached=$image", + " elif [ \"$pull_missing\" = \"1\" ]; then", + " echo dev_env_image_pull=$image", + " timeout \"$pull_timeout_seconds\" env HTTP_PROXY=\"$proxy_url\" HTTPS_PROXY=\"$proxy_url\" ALL_PROXY=\"$proxy_url\" NO_PROXY=\"localhost,127.0.0.1,::1,host.docker.internal\" docker pull --platform linux/amd64 \"$image\"", + " else", + " echo dev_env_image_missing=$image >&2", + " exit 1", + " fi", + "done", + "archive=$(mktemp /tmp/unidesk-dev-env-images.XXXXXX.tar)", + "list_file=$(mktemp /tmp/unidesk-dev-env-ctr-images.XXXXXX.txt)", + "trap 'rm -f \"$archive\" \"$list_file\"' EXIT", + "docker save \"${images[@]}\" -o \"$archive\"", + "root_exec ctr --address \"$ctr_address\" -n k8s.io images import --digests --all-platforms \"$archive\"", + "root_exec ctr --address \"$ctr_address\" -n k8s.io images ls > \"$list_file\"", + "missing=0", + "for image in \"${images[@]}\"; do", + " needle=$(normalize_image \"$image\")", + " if grep -F \"$needle\" \"$list_file\" >/dev/null || grep -F \"$image\" \"$list_file\" >/dev/null; then", + " echo dev_env_containerd_image_ready=$image", + " else", + " echo dev_env_containerd_image_missing=$image needle=$needle >&2", + " missing=1", + " fi", + "done", + "test \"$missing\" = \"0\"", + ].join("\n"); +} + function devEnvHelp(): Record { return { ok: true, - command: "dev-env validate", - usage: "bun scripts/cli.ts dev-env validate [--manifest path] [--kubectl-dry-run]", + command: "dev-env", + usage: [ + "bun scripts/cli.ts dev-env validate [--manifest path] [--kubectl-dry-run]", + "bun scripts/cli.ts dev-env prewarm-images [--image image] [--provider-id D601] [--no-pull] [--proxy-url URL] [--pull-timeout-ms N] [--dry-run]", + ], defaultManifest, + defaultPrewarmImages, checks: [ "all namespaced resources must target unidesk-dev", "required foundation resources or backend-core-dev/frontend-dev resources must exist", "dev DATABASE_URL values must target postgres-dev/unidesk_dev and not production routes", "--kubectl-dry-run optionally asks kubectl to client-dry-run the manifest without applying it", + "prewarm-images imports dev foundation images from Docker into native k3s containerd on D601", ], }; } @@ -153,9 +299,37 @@ function devEnvHelp(): Record { export function runDevEnvCommand(args: string[]): unknown { const action = args[0]; if (action === undefined || isHelpArg(action)) return devEnvHelp(); - if (action !== "validate") throw new Error("dev-env usage: bun scripts/cli.ts dev-env validate [--manifest path] [--kubectl-dry-run]"); + if (action === "prewarm-images") { + const options = parsePrewarmImagesOptions(args.slice(1)); + const script = prewarmImagesScript(options); + const command = [process.execPath, "scripts/cli.ts", "ssh", options.providerId, "argv", "bash", "-lc", script]; + if (options.dryRun) { + return { + ok: true, + dryRun: true, + providerId: options.providerId, + images: options.images, + proxyUrl: options.proxyUrl, + pullMissing: options.pullMissing, + pullTimeoutMs: options.pullTimeoutMs, + command, + }; + } + const job = startJob("dev_env_prewarm_images", command, `Prewarm ${options.images.length} dev foundation image(s) into ${options.providerId} native k3s containerd`); + return { + ok: true, + providerId: options.providerId, + images: options.images, + proxyUrl: options.proxyUrl, + pullMissing: options.pullMissing, + pullTimeoutMs: options.pullTimeoutMs, + job, + statusCommand: `bun scripts/cli.ts job status ${job.id} --tail-bytes 12000`, + }; + } + if (action !== "validate") throw new Error("dev-env usage: bun scripts/cli.ts dev-env validate [--manifest path] [--kubectl-dry-run] OR dev-env prewarm-images"); - const options = parseOptions(args.slice(1)); + const options = parseValidateOptions(args.slice(1)); const manifestPath = rootPath(options.manifestPath); const manifestText = readFileSync(manifestPath, "utf8"); const docs = parseManifestDocuments(manifestText);