From c93fb275c5ab4f5348573c3b915cf68e0c20d3a7 Mon Sep 17 00:00:00 2001 From: Codex Date: Sat, 23 May 2026 16:21:45 +0000 Subject: [PATCH] fix: harden d601 k3s guards --- AGENTS.md | 2 +- docs/reference/cicd-standardization.md | 2 +- docs/reference/cli.md | 2 +- docs/reference/deploy.md | 2 +- docs/reference/hwlab.md | 2 +- scripts/ci/dev-e2e.sh | 24 ++- scripts/hwlab-cd-wrapper-contract-test.ts | 66 +++++++- scripts/src/artifact-registry.ts | 10 +- scripts/src/check.ts | 1 + scripts/src/ci.ts | 19 ++- scripts/src/d601-k3s-guard.ts | 188 ++++++++++++++++++++++ scripts/src/deploy.ts | 17 +- scripts/src/dev-env.ts | 28 +++- scripts/src/hwlab-cd.ts | 47 +++--- 14 files changed, 353 insertions(+), 57 deletions(-) create mode 100644 scripts/src/d601-k3s-guard.ts diff --git a/AGENTS.md b/AGENTS.md index a87b55bc..73d03bc2 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -6,7 +6,7 @@ UniDesk 是一个以主 server 为统一入口的分布式工作平台;本文 - P0: D601 上的 Kubernetes 运行面只能以自部署原生 k3s 为准;Docker Desktop Kubernetes 已经停用并清理数据,任何人不得重新启用或把它作为 UniDesk/HWLAB 部署、CI/CD、诊断或验收目标。跟踪 issue: [pikasTech/unidesk#138](https://github.com/pikasTech/unidesk/issues/138),热修复背景见 [pikasTech/unidesk#118](https://github.com/pikasTech/unidesk/issues/118)。 - D601 上裸 `kubectl` 不可信:`/home/ubuntu/.kube/config` 可能仍残留 `docker-desktop` / `127.0.0.1:11700`。所有 D601 k3s 读写、Tekton、Code Queue、HWLAB/UniDesk DEV 部署与排障必须显式使用 `KUBECONFIG=/etc/rancher/k3s/k3s.yaml`,并在写操作前确认节点名是 `d601`。 -- 发现 `desktop-control-plane`、`docker-desktop` context、Docker Desktop Kubernetes namespace、旧 direct Docker `code-queue-backend` 或同一服务被 Docker Desktop k8s 与原生 k3s 同时承载时,必须立即停止部署动作并按 #138 处理;不要把第二控制面的状态当作恢复证据。 +- 写操作的实际目标 context/server/nodes 出现 `desktop-control-plane`、`docker-desktop` 或 `127.0.0.1:11700`,发现 Docker Desktop Kubernetes namespace、旧 direct Docker `code-queue-backend`,或同一服务被 Docker Desktop k8s 与原生 k3s 同时承载时,必须立即停止部署动作并按 #138 处理;裸 `kubectl` 默认 context 只作为诊断,不能把第二控制面的状态当作恢复证据。 ## Critical GitHub Issue Write Rule diff --git a/docs/reference/cicd-standardization.md b/docs/reference/cicd-standardization.md index a526a13c..ab326b7b 100644 --- a/docs/reference/cicd-standardization.md +++ b/docs/reference/cicd-standardization.md @@ -6,7 +6,7 @@ This document defines the stable split between CI artifact producers, artifact c ## D601 Control-Plane Guard -D601 CI/CD must target native k3s only. Docker Desktop Kubernetes has been disabled and must not be reintroduced; the incident and governance plan are tracked in [GitHub issue #138](https://github.com/pikasTech/unidesk/issues/138), with recovery context in [GitHub issue #118](https://github.com/pikasTech/unidesk/issues/118). CI producer, Tekton, deploy, artifact-registry and manual recovery scripts must not rely on default kubeconfig. They must export `KUBECONFIG=/etc/rancher/k3s/k3s.yaml`, verify node `d601`, and fail fast if the context/server indicates `docker-desktop`, `desktop-control-plane`, or `127.0.0.1:11700`. +D601 CI/CD must target native k3s only. Docker Desktop Kubernetes has been disabled and must not be reintroduced; the incident and governance plan are tracked in [GitHub issue #138](https://github.com/pikasTech/unidesk/issues/138), with recovery context in [GitHub issue #118](https://github.com/pikasTech/unidesk/issues/118). CI producer, Tekton, deploy, artifact-registry and manual recovery scripts must not rely on default kubeconfig. They must export `KUBECONFIG=/etc/rancher/k3s/k3s.yaml`, verify node `d601`, and fail fast if the actual target context/server/nodes indicate `docker-desktop`, `desktop-control-plane`, or `127.0.0.1:11700`. A stale default kubeconfig may be reported as a diagnostic, but it is not a blocker when the explicit D601 kubeconfig passes. ## Target Shape diff --git a/docs/reference/cli.md b/docs/reference/cli.md index fb58cca6..b63fc0c0 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -31,7 +31,7 @@ CLI 可以从 `master` 快速演进,但必须兼容 `deploy.json` 固定的 CI - `dev-env prewarm-images [--image image] [--provider-id D601] [--no-pull] [--proxy-url URL] [--pull-timeout-ms N] [--dry-run]` 创建异步 job,通过 UniDesk SSH 维护桥在 D601 上把开发底座依赖镜像从 Docker 缓存导入原生 k3s containerd。默认镜像是 `postgres:16-alpine` 和 `rancher/mirrored-library-busybox:1.36.1`,用于避免 `postgres-dev` 与 local-path helper pod 卡在外部 registry 拉取。该命令固定验证 `/etc/rancher/k3s/k3s.yaml` 指向的 native k3s 上下文,并输出 `dev_env_containerd_image_ready=...` 作为成功判据;它不 apply manifest、不修改生产 `unidesk` namespace。 - `artifact-registry plan|render|status|health|install|deploy-backend-core|deploy-service` 管理 D601 host-managed CNCF Distribution registry 的声明、安装、只读检查和 pull-only artifact CD。该 registry 固定为 D601 loopback `127.0.0.1:5000`,由 systemd + Docker Compose 管理,位于 native k3s 故障域外;`deploy-service` 只拉取 CI 已发布的 commit-pinned 镜像、retag/recreate 或导入 native k3s,并做 live commit 验证,不构建 runtime source。`deploy-backend-core` 是 deprecated 兼容名,标准 backend-core prod CD 入口是 `deploy apply --env prod --service backend-core`。长期规则见 `docs/reference/artifact-registry.md`。 - `commander contract|plan --dry-run|smoke --dry-run|approval request --dry-run|prompt-lint --kind gpt55-pr` 是 host Codex 指挥官直管微服务 skeleton 入口。当前命令返回 `phase=source-contract`、service/API/state/bridge/prompt/trace/#20/#46/ClaudeQQ 审批边界、.state/commander/ 状态模型、dev 无 daemon smoke contract、dry-run 计划和 GPT-5.5 PR prompt 边界辅助 lint,不接 live bridge、不注入 prompt、不发送 ClaudeQQ。`approval request --dry-run` 会生成 200 字以内中文纯文本 ClaudeQQ 审批草案、`notification-path-unavailable` blocker 和授权后唯一可用的 `bun scripts/cli.ts microservice proxy claudeqq /api/push/text --method POST --body-json '' --raw` 命令;不得提示使用本机 ClaudeQQ skill、powershell 或本地 server。`prompt-lint` 支持 `--prompt-file` 与 `--stdin`,输出 `ok`、`missingClauses`、`riskLevel`、`suggestedPatchSnippet` 且不回显完整 prompt;它是 commander 辅助检查,不是业务 PR 门禁,也不改变 `codex submit` 默认行为。`plan`、`smoke` 与 `approval request` 必须带 `--dry-run`;缺少时返回 `error=dry-run-required`。长期规则见 `docs/reference/host-codex-commander.md`。 -- `hwlab cd status --env dev` 和 `hwlab cd apply --env dev --dry-run` 是 HWLAB DEV CD 指挥侧 wrapper。它只调用 HWLAB repo-owned 受控入口,不内嵌发布 kubectl 逻辑:`status` 汇总 HWLAB repo path、Git clean/main/origin-main、`deploy/deploy.json`/artifact catalog/workloads 一致性、D601 native k3s guard、CD Lease lock、16666/16667 live revision;完整 stdout/stderr 写入 `.state/hwlab-cd//`,stdout 只返回有界摘要。wrapper 强制 `KUBECONFIG=/etc/rancher/k3s/k3s.yaml`,任何 `docker-desktop`、`desktop-control-plane` 或 `127.0.0.1:11700` 信号都会结构化拒绝。`apply --dry-run` 调用 HWLAB `scripts/dev-deploy-apply.mjs --dry-run --expect-blocked --kubeconfig /etc/rancher/k3s/k3s.yaml`;真实 apply 只暴露 `scripts/dev-cd-apply.mjs --apply --confirm-dev --confirmed-non-production --write-report` 命令形状并标注 host-commander-only,本 runner 不执行 live apply。长期规则见 `docs/reference/hwlab.md`。 +- `hwlab cd status --env dev` 和 `hwlab cd apply --env dev --dry-run` 是 HWLAB DEV CD 指挥侧 wrapper。它只调用 HWLAB repo-owned 受控入口,不内嵌发布 kubectl 逻辑:`status` 汇总 HWLAB repo path、Git clean/main/origin-main、`deploy/deploy.json`/artifact catalog/workloads 一致性、D601 native k3s guard、CD Lease lock、16666/16667 live revision;完整 stdout/stderr 写入 `.state/hwlab-cd//`,stdout 只返回有界摘要。wrapper 强制 `KUBECONFIG=/etc/rancher/k3s/k3s.yaml` 并只以这个显式目标作为 gate;显式目标出现 `docker-desktop`、`desktop-control-plane` 或 `127.0.0.1:11700` 信号会结构化拒绝,裸 `kubectl` 默认 context 只作为诊断。`apply --dry-run` 调用 HWLAB `scripts/dev-deploy-apply.mjs --dry-run --expect-blocked --kubeconfig /etc/rancher/k3s/k3s.yaml`;真实 apply 只暴露 `scripts/dev-cd-apply.mjs --apply --confirm-dev --confirmed-non-production --write-report` 命令形状并标注 host-commander-only,本 runner 不执行 live apply。长期规则见 `docs/reference/hwlab.md`。 - `gh auth status [--repo owner/name]` 探测 GitHub 操作前置条件并输出脱敏 JSON:是否存在 `gh` binary、是否存在 `GH_TOKEN`/`GITHUB_TOKEN` 或可用 `gh auth token` fallback、REST API 是否可达、目标 repo 是否可见、issue 是否可读。degraded reason 必须归类为 `missing-binary`、`missing-token`、`auth-failed`、`github-transient`、`network-proxy-failed`、`permission-denied`、`repo-not-found`、`repo-forbidden`、`issue-not-found`、`pr-not-found`、`scope-insufficient`、`validation-failed`、`invalid-response` 或 `unsupported-command`,不得打印 token;失败对象必须包含 `runnerDisposition=infra-blocked|business-failed`,runner 应优先用该字段分流。`github-transient` 表示 GitHub DNS/API 连接在收到 HTTP 状态前失败,输出应带 `retryable=true` 或等价 commander action;这不是缺 token、认证失败、权限不足或 PR 语义失败。 - `codex prompt-lint [prompt|--prompt-file path|--prompt-stdin]` 是派发/steer 前的本地 dry-run prompt lint。它只读取 prompt 文本,返回 `dryRun=true`、`mutation=false`、`declaredClass`、`effectiveClass`、`requiredClass`、`dispatchDisposition`、缺失或矛盾项和有界 evidence,不访问 live service、不提交任务、不打印完整 prompt。分级固定为 `read-only`、`live-read`、`live-mutating`;未声明时按 `read-only` 处理。`codex submit --dry-run` 与 `codex steer --dry-run` 会嵌入同一 `promptLint` 结果,帮助指挥官在 dispatch/steer 前发现缺失或矛盾的 live mutation 授权。长期规则见 `docs/reference/code-queue-supervision.md` 的 DEV 测试授权分级。 - `gh issue list [--state open|closed|all] [--limit N] [--repo owner/name] [--json number,title,state,url,updatedAt,createdAt,author,labels]` 通过 GitHub REST 列出 issue,默认 `state=open`、`limit=30`,输出稳定 JSON 且不依赖系统 `gh` binary。`--limit` 会映射到 GitHub `per_page` 并限制返回数量,避免一次拉爆上下文;未知 state 或未知 `--json` 字段必须结构化失败并带 `runnerDisposition=business-failed`。GitHub issues API 可能混入 PR,CLI 会从 `.data.issues` 中过滤 pull request。 diff --git a/docs/reference/deploy.md b/docs/reference/deploy.md index 161a5c02..65236642 100644 --- a/docs/reference/deploy.md +++ b/docs/reference/deploy.md @@ -14,7 +14,7 @@ D601 曾同时存在 Docker Desktop Kubernetes 与自部署 k3s,并已造成 ` KUBECONFIG=/etc/rancher/k3s/k3s.yaml kubectl get nodes -o jsonpath='{.items[*].metadata.name}' ``` -结果必须包含 `d601`。裸 `kubectl`、`docker-desktop` context、`desktop-control-plane` 节点或 `127.0.0.1:11700` server 都不是 UniDesk k3s 证据;出现这些信号时必须停止写操作并修复 kubeconfig/脚本入口。Docker daemon 仍可用于镜像构建、registry 或直管服务,但 Docker Desktop Kubernetes 不得与原生 k3s 共同承载 `unidesk*`、`hwlab-dev` 或 Code Queue 资源。 +结果必须包含 `d601`。裸 `kubectl` 不是 UniDesk k3s 证据;默认 kubeconfig 若残留 `docker-desktop`、`desktop-control-plane` 或 `127.0.0.1:11700`,只能作为诊断和修复提示,不能覆盖显式 D601 kubeconfig 的判定。写操作的实际目标 kubeconfig/context/server/nodes 若出现这些 Docker Desktop 信号,或 nodes 未包含 `d601`,必须停止写操作并修复 kubeconfig/脚本入口。Docker daemon 仍可用于镜像构建、registry 或直管服务,但 Docker Desktop Kubernetes 不得与原生 k3s 共同承载 `unidesk*`、`hwlab-dev` 或 Code Queue 资源。 ## Manifest diff --git a/docs/reference/hwlab.md b/docs/reference/hwlab.md index 22c0ef94..e4d68c37 100644 --- a/docs/reference/hwlab.md +++ b/docs/reference/hwlab.md @@ -54,7 +54,7 @@ wrapper 的职责是把 host commander 常用的 HWLAB DEV rollout 查看/准备 - `status` 只读汇总 HWLAB repo path、Git clean/main/origin-main、`deploy/deploy.json`/`deploy/artifact-catalog.dev.json`/`deploy/k8s/base/workloads.yaml` 一致性、D601 native k3s guard、`Lease/hwlab-dev/hwlab-dev-cd-lock`、公网 `16666/16667` live revision。 - `apply --dry-run` 调用 HWLAB `scripts/dev-deploy-apply.mjs --dry-run --expect-blocked --kubeconfig /etc/rancher/k3s/k3s.yaml`,只生成准备/阻塞摘要,不做真实 apply、rollout 或 live verification。 - 完整下游 stdout/stderr、HTTP body 和 kubectl 读命令输出写入 UniDesk `.state/hwlab-cd//` dump 目录;CLI stdout 只显示有界摘要和 dump path。 -- wrapper 显式注入 `KUBECONFIG=/etc/rancher/k3s/k3s.yaml`。若 `kubectl config current-context`、server 或 node 摘要出现 `docker-desktop`、`desktop-control-plane` 或 `127.0.0.1:11700`,命令必须拒绝继续。 +- wrapper 显式注入 `KUBECONFIG=/etc/rancher/k3s/k3s.yaml` 并以这个显式目标作为唯一 gate:目标 context/server/nodes 若出现 `docker-desktop`、`desktop-control-plane` 或 `127.0.0.1:11700` 必须拒绝继续,目标 nodes 未包含 `d601` 必须阻断。裸 `kubectl` 默认 context 只作为诊断输出;即使默认 kubeconfig 仍残留 Docker Desktop,只要显式 D601 kubeconfig 通过,也不能把默认 context 当成 CD blocker。 真实 DEV apply 只允许 host commander 在明确授权后执行。UniDesk wrapper 可以展示受控命令形状: diff --git a/scripts/ci/dev-e2e.sh b/scripts/ci/dev-e2e.sh index ee7eac19..c336372c 100755 --- a/scripts/ci/dev-e2e.sh +++ b/scripts/ci/dev-e2e.sh @@ -149,9 +149,27 @@ code_queue_image="" trap 'code=$?; if [ "$code" -ne 0 ] && [ ! -f "$result_json" ]; then write_result false failed "runner exited with code $code" || true; fi' EXIT export KUBECONFIG=/etc/rancher/k3s/k3s.yaml -kubectl get nodes >/dev/null -test "$(kubectl get nodes -o jsonpath='{.items[*].metadata.name}')" = "d601" -! kubectl config current-context | grep -Eq 'docker-desktop|desktop-control-plane' +if ! context=$(kubectl config current-context 2>&1); then + echo "d601_native_k3s_guard=blocked reason=context-read-failed detail=$context" >&2 + exit 1 +fi +if ! server=$(kubectl config view --minify -o 'jsonpath={.clusters[0].cluster.server}' 2>&1); then + echo "d601_native_k3s_guard=blocked reason=server-read-failed detail=$server" >&2 + exit 1 +fi +if ! nodes=$(kubectl get nodes -o 'jsonpath={range .items[*]}{.metadata.name}{"\n"}{end}' 2>&1); then + echo "d601_native_k3s_guard=blocked reason=nodes-read-failed detail=$nodes" >&2 + exit 1 +fi +if printf '%s\n%s\n%s\n' "$context" "$server" "$nodes" | grep -Eiq 'docker-desktop|desktop-control-plane|127\.0\.0\.1:11700'; then + echo "d601_native_k3s_guard=refused reason=forbidden-control-plane context=$context server=$server" >&2 + exit 1 +fi +if ! printf '%s\n' "$nodes" | grep -Fx d601 >/dev/null; then + echo "d601_native_k3s_guard=blocked reason=missing-d601-node nodes=$(printf '%s' "$nodes" | tr '\n' ',')" >&2 + exit 1 +fi +echo "d601_native_k3s_guard=pass kubeconfig=$KUBECONFIG context=$context server=$server node=d601" log_json runner_started run_id "$run_id" manifest_commit "$manifest_commit" kubectl get pipeline/unidesk-dev-namespace-e2e -n unidesk-ci >/dev/null diff --git a/scripts/hwlab-cd-wrapper-contract-test.ts b/scripts/hwlab-cd-wrapper-contract-test.ts index 44dae090..39840e33 100644 --- a/scripts/hwlab-cd-wrapper-contract-test.ts +++ b/scripts/hwlab-cd-wrapper-contract-test.ts @@ -56,19 +56,30 @@ function makeFakeHwlabRepo(): string { return root; } -function makeFakeBin(mode: "native" | "desktop"): string { +function makeFakeBin(mode: "native" | "desktop" | "stale-default" | "wrong-node"): string { const bin = join(tmpdir(), `unidesk-hwlab-cd-bin-${process.pid}-${Date.now()}-${mode}`); mkdirSync(bin, { recursive: true }); - const context = mode === "desktop" ? "docker-desktop" : "default"; - const server = mode === "desktop" ? "https://127.0.0.1:11700" : "https://127.0.0.1:6443"; - const nodes = mode === "desktop" ? "desktop-control-plane" : "d601"; + const explicitContext = mode === "desktop" ? "docker-desktop" : "default"; + const explicitServer = mode === "desktop" ? "https://127.0.0.1:11700" : "https://127.0.0.1:6443"; + const explicitNodes = mode === "desktop" ? "desktop-control-plane" : mode === "wrong-node" ? "d602" : "d601"; + const defaultContext = mode === "stale-default" ? "docker-desktop" : explicitContext; + const defaultServer = mode === "stale-default" ? "https://127.0.0.1:11700" : explicitServer; + const defaultNodes = mode === "stale-default" ? "desktop-control-plane" : explicitNodes; writeFileSync(join(bin, "kubectl"), [ "#!/usr/bin/env bash", "set -euo pipefail", "printf 'KUBECONFIG=%s\\n' \"${KUBECONFIG:-}\" >&2", - "if [[ \"$*\" == 'config current-context' ]]; then printf '%s\\n' " + JSON.stringify(context) + "; exit 0; fi", - "if [[ \"$*\" == 'config view --minify -o jsonpath={.clusters[0].cluster.server}' ]]; then printf '%s' " + JSON.stringify(server) + "; exit 0; fi", - "if [[ \"$*\" == 'get nodes -o jsonpath={range .items[*]}{.metadata.name}{\"\\n\"}{end}' ]]; then printf '%s\\n' " + JSON.stringify(nodes) + "; exit 0; fi", + "context=" + JSON.stringify(explicitContext), + "server=" + JSON.stringify(explicitServer), + "nodes=" + JSON.stringify(explicitNodes), + "if [[ \"${KUBECONFIG:-}\" == '' ]]; then", + " context=" + JSON.stringify(defaultContext), + " server=" + JSON.stringify(defaultServer), + " nodes=" + JSON.stringify(defaultNodes), + "fi", + "if [[ \"$*\" == 'config current-context' ]]; then printf '%s\\n' \"$context\"; exit 0; fi", + "if [[ \"$*\" == 'config view --minify -o jsonpath={.clusters[0].cluster.server}' ]]; then printf '%s' \"$server\"; exit 0; fi", + "if [[ \"$*\" == 'get nodes -o jsonpath={range .items[*]}{.metadata.name}{\"\\n\"}{end}' ]]; then printf '%s\\n' \"$nodes\"; exit 0; fi", "if [[ \"$*\" == '-n hwlab-dev get lease hwlab-dev-cd-lock -o json' ]]; then printf 'Error from server (NotFound): leases.coordination.k8s.io \"hwlab-dev-cd-lock\" not found\\n' >&2; exit 1; fi", "printf '{}\\n'", ].join("\n")); @@ -79,6 +90,8 @@ function makeFakeBin(mode: "native" | "desktop"): string { const fakeRepo = makeFakeHwlabRepo(); const nativeBin = makeFakeBin("native"); const desktopBin = makeFakeBin("desktop"); +const staleDefaultBin = makeFakeBin("stale-default"); +const wrongNodeBin = makeFakeBin("wrong-node"); const liveBody = "data:application/json,%7B%22serviceId%22%3A%22hwlab-cloud-web%22%2C%22environment%22%3A%22dev%22%2C%22status%22%3A%22ok%22%2C%22revision%22%3A%22abc1234%22%7D"; const apiBody = "data:application/json,%7B%22serviceId%22%3A%22hwlab-cloud-api%22%2C%22environment%22%3A%22dev%22%2C%22status%22%3A%22ok%22%2C%22revision%22%3A%22abc1234%22%7D"; @@ -103,6 +116,7 @@ const dryRunData = applyDryRun.data as JsonRecord; assert.equal(dryRunData.dryRun, true); assert.equal(dryRunData.mutation, false); assert.equal(((dryRunData.d601NativeK3sGuard as JsonRecord).injectedEnv as JsonRecord).KUBECONFIG, "/etc/rancher/k3s/k3s.yaml"); +assert.equal((dryRunData.d601NativeK3sGuard as JsonRecord).requiredNodePresent, true); assert.equal((dryRunData.controlledDryRun as JsonRecord).commandOk, true); assert.equal(((dryRunData.hostCommanderOnlyLiveApply as JsonRecord).commandShape as unknown[]).includes("scripts/dev-cd-apply.mjs"), true); @@ -139,6 +153,26 @@ assert.equal(((statusData.d601NativeK3sGuard as JsonRecord).injectedEnv as JsonR assert.equal((statusData.liveRevisions as JsonRecord).status, "observed"); assert.ok(typeof statusData.dumpDir === "string" && String(statusData.dumpDir).includes(".state/hwlab-cd")); +const staleDefaultOk = runCli([ + "hwlab", + "cd", + "apply", + "--env", + "dev", + "--dry-run", + "--hwlab-repo", + fakeRepo, +], { + PATH: `${staleDefaultBin}:${process.env.PATH ?? ""}`, + KUBECONFIG: "", +}); +assert.equal(staleDefaultOk.ok, true); +const staleDefaultGuard = (staleDefaultOk.data as JsonRecord).d601NativeK3sGuard as JsonRecord; +assert.equal(staleDefaultGuard.status, "pass"); +assert.equal(staleDefaultGuard.refusal, false); +assert.equal((staleDefaultGuard.defaultKubectlDiagnostic as JsonRecord).status, "stale-forbidden-default"); +assert.deepEqual((staleDefaultGuard.defaultKubectlDiagnostic as JsonRecord).refusalSignals, ["docker-desktop", "desktop-control-plane", "127.0.0.1:11700"]); + const desktopRefusal = runCli([ "hwlab", "cd", @@ -155,4 +189,22 @@ assert.equal(desktopRefusal.ok, false); assert.equal((desktopRefusal.data as JsonRecord).error, "native-k3s-guard-refused"); assert.deepEqual((desktopRefusal.data as JsonRecord).d601NativeK3sGuard && ((desktopRefusal.data as JsonRecord).d601NativeK3sGuard as JsonRecord).refusalSignals, ["docker-desktop", "desktop-control-plane", "127.0.0.1:11700"]); +const wrongNodeBlocked = runCli([ + "hwlab", + "cd", + "apply", + "--env", + "dev", + "--dry-run", + "--hwlab-repo", + fakeRepo, +], { + PATH: `${wrongNodeBin}:${process.env.PATH ?? ""}`, +}); +assert.equal(wrongNodeBlocked.ok, true); +const wrongNodeGuard = (wrongNodeBlocked.data as JsonRecord).d601NativeK3sGuard as JsonRecord; +assert.equal(wrongNodeGuard.status, "blocked"); +assert.equal(wrongNodeGuard.requiredNodePresent, false); +assert.equal(((wrongNodeBlocked.data as JsonRecord).blockers as JsonRecord[]).some((blocker) => blocker.scope === "d601-native-k3s-guard"), true); + console.log(JSON.stringify({ ok: true, checked: "hwlab-cd-wrapper-contract" })); diff --git a/scripts/src/artifact-registry.ts b/scripts/src/artifact-registry.ts index c547dcf0..14cdf5a9 100644 --- a/scripts/src/artifact-registry.ts +++ b/scripts/src/artifact-registry.ts @@ -16,6 +16,7 @@ import { type DeployJsonExecutorMirror, type DeployJsonServiceContract, } from "./deploy-json-contract"; +import { d601K3sGuardShellLines } from "./d601-k3s-guard"; export type ArtifactRegistryAction = "plan" | "render" | "status" | "health" | "install" | "deploy-backend-core" | "deploy-service"; type ArtifactDeployEnvironment = "prod" | "dev"; @@ -982,6 +983,10 @@ function shellQuote(value: string): string { return `'${value.replace(/'/g, `'\\''`)}'`; } +function d601K3sGuardScript(): string { + return d601K3sGuardShellLines().join("\n"); +} + function base64(value: string): string { return Buffer.from(value, "utf8").toString("base64"); } @@ -2350,6 +2355,7 @@ function d601DevFrontendAuthPatchScript(config: UniDeskConfig): string { SESSION_TTL_SECONDS: String(config.auth.sessionTtlSeconds), }; return [ + d601K3sGuardScript(), `secret_patch=${shellQuote(JSON.stringify({ data: secretData }))}`, `config_patch=${shellQuote(JSON.stringify({ data: configData }))}`, "kubectl -n unidesk-dev patch secret unidesk-dev-runtime-secrets --type merge -p \"$secret_patch\"", @@ -3095,13 +3101,11 @@ function d601K3sArtifactDeployScript(options: ArtifactRegistryOptions, spec: Art " [ -n \"${DOCKER_CONFIG:-}\" ] && rm -rf \"$DOCKER_CONFIG\"", "}", "trap cleanup_artifact_cd EXIT", - "export KUBECONFIG=/etc/rancher/k3s/k3s.yaml", "command -v docker >/dev/null", "command -v kubectl >/dev/null", "command -v ctr >/dev/null", "test -S /run/k3s/containerd/containerd.sock", - "test \"$(kubectl get nodes -o jsonpath='{.items[*].metadata.name}')\" = \"d601\"", - "! kubectl config current-context | grep -Eq 'docker-desktop|desktop-control-plane'", + d601K3sGuardScript(), `curl -fsSI -H 'Accept: application/vnd.docker.distribution.manifest.v2+json' ${shellQuote(`http://127.0.0.1:${options.port}/v2/${spec.registryRepository}/manifests/${commit}`)} >/dev/null`, "docker pull -q \"$registry_image\" >/dev/null", "label_commit=$(docker image inspect \"$registry_image\" --format '{{ index .Config.Labels \"unidesk.ai/source-commit\" }}')", diff --git a/scripts/src/check.ts b/scripts/src/check.ts index 918f3c27..3b67cb74 100644 --- a/scripts/src/check.ts +++ b/scripts/src/check.ts @@ -21,6 +21,7 @@ const syntaxFiles = [ "scripts/src/auth-broker.ts", "scripts/src/code-queue.ts", "scripts/src/command.ts", + "scripts/src/d601-k3s-guard.ts", "scripts/src/decision-center.ts", "scripts/src/dev-env.ts", "scripts/src/deploy.ts", diff --git a/scripts/src/ci.ts b/scripts/src/ci.ts index 3badd147..b196cac3 100644 --- a/scripts/src/ci.ts +++ b/scripts/src/ci.ts @@ -13,9 +13,10 @@ import { parseArtifactRegistryOptions, type ArtifactRegistryReadonlyProbe, } from "./artifact-registry"; +import { d601K3sGuardShellLines, d601NativeKubeconfig } from "./d601-k3s-guard"; const d601ProviderId = "D601"; -const d601Kubeconfig = "/etc/rancher/k3s/k3s.yaml"; +const d601Kubeconfig = d601NativeKubeconfig; const tektonPipelineVersion = "v1.12.0"; const tektonTriggersVersion = "v0.34.0"; const tektonPipelineReleaseUrl = `https://infra.tekton.dev/tekton-releases/pipeline/previous/${tektonPipelineVersion}/release.yaml`; @@ -503,7 +504,7 @@ function publishPreflightFailedScopes(preflight: PublishPreflight): string[] { function ciRunnerPreflightScript(sourceHostPath: string): string { return [ "set -euo pipefail", - `export KUBECONFIG=${shellQuote(d601Kubeconfig)}`, + ...d601K3sGuardShellLines(d601Kubeconfig), "printf 'provider_host_ssh=ok\\n'", "printf 'kubectl='", "command -v kubectl >/dev/null && printf 'ok\\n' || { printf 'missing\\n'; exit 127; }", @@ -530,11 +531,12 @@ function keyValueBool(stdout: string, key: string): boolean { const match = new RegExp(`^${key}=(.*)$`, "mu").exec(stdout); if (match === null) return false; const value = match[1]?.trim().toLowerCase() ?? ""; - return value === "true" || value === "ok"; + return value === "true" || value === "ok" || value === "pass" || value.startsWith("pass "); } function backendCoreCiRunnerReady(result: DispatchResult): boolean { return result.ok + && keyValueBool(result.stdout, "d601_native_k3s_guard") && keyValueBool(result.stdout, "kubectl") && keyValueBool(result.stdout, "docker") && keyValueBool(result.stdout, "namespace") @@ -751,7 +753,7 @@ async function runRemoteKubectl(script: string, waitMs = 60_000, remoteTimeoutMs async function runRemoteKubectlRaw(script: string, waitMs = 60_000, remoteTimeoutMs = 45_000): Promise { const command = [ "set -euo pipefail", - `export KUBECONFIG=${shellQuote(d601Kubeconfig)}`, + ...d601K3sGuardShellLines(d601Kubeconfig, { passOutput: "stderr" }), script, ].join("\n"); return dispatchSsh(command, waitMs, remoteTimeoutMs); @@ -857,7 +859,7 @@ async function remoteApplyManifest(path: string): Promise { if (!upload.ok) throw new Error(`failed to upload manifest ${path}: ${upload.stderr || upload.stdout}`); const script = [ "set -euo pipefail", - `export KUBECONFIG=${shellQuote(d601Kubeconfig)}`, + ...d601K3sGuardShellLines(d601Kubeconfig), "tmp=$(mktemp /tmp/unidesk-ci-apply.XXXXXX.yaml)", `b64_path=${shellQuote(b64Path)}`, "trap 'rm -f \"$tmp\" \"$b64_path\"' EXIT", @@ -872,7 +874,7 @@ async function prewarmCiRuntimeImages(): Promise { const images = ciRuntimeImages.map(shellQuote).join(" "); const script = [ "set -euo pipefail", - `export KUBECONFIG=${shellQuote(d601Kubeconfig)}`, + ...d601K3sGuardShellLines(d601Kubeconfig), "export DOCKER_CONFIG=/tmp/unidesk-ci-docker-config", "mkdir -p \"$DOCKER_CONFIG\"", "printf '{}\\n' > \"$DOCKER_CONFIG/config.json\"", @@ -946,7 +948,7 @@ async function install(): Promise> { await prewarmCiRuntimeImages(); const installTektonScript = [ "set -euo pipefail", - `export KUBECONFIG=${shellQuote(d601Kubeconfig)}`, + ...d601K3sGuardShellLines(d601Kubeconfig), `kubectl apply -f ${shellQuote(tektonPipelineReleaseUrl)}`, "kubectl wait --for=condition=Available deployment --all -n tekton-pipelines --timeout=900s", `kubectl apply -f ${shellQuote(tektonTriggersReleaseUrl)}`, @@ -1321,7 +1323,7 @@ async function waitForPipelineRun(name: string, waitMs: number): Promise/dev/null", "command -v docker >/dev/null", diff --git a/scripts/src/d601-k3s-guard.ts b/scripts/src/d601-k3s-guard.ts new file mode 100644 index 00000000..52c20230 --- /dev/null +++ b/scripts/src/d601-k3s-guard.ts @@ -0,0 +1,188 @@ +export const d601NativeKubeconfig = "/etc/rancher/k3s/k3s.yaml"; +export const d601RequiredNodeName = "d601"; + +export type D601K3sGuardStatus = "pass" | "refused" | "blocked"; + +export interface D601K3sTargetObservation { + kubeconfig: string; + expectedKubeconfig?: string; + currentContext: string | null; + apiServer: string | null; + nodeNames: string[]; + commandsOk: boolean; + combinedText: string; +} + +export interface D601K3sDefaultKubectlDiagnostic { + checked: boolean; + currentContext: string | null; + apiServer: string | null; + refusalSignals: string[]; + status: "clean" | "stale-forbidden-default" | "unavailable"; + summary: string; +} + +export interface D601K3sGuardClassification { + status: D601K3sGuardStatus; + refusal: boolean; + refusalSignals: string[]; + kubeconfig: string; + expectedKubeconfig: string; + currentContext: string | null; + apiServer: string | null; + nodeNames: string[]; + nodeCount: number; + requiredNodeName: string; + requiredNodePresent: boolean; + commandsOk: boolean; + defaultKubectlDiagnostic?: D601K3sDefaultKubectlDiagnostic; + summary: string; +} + +export interface D601K3sGuardShellOptions { + passOutput?: "stdout" | "stderr" | "quiet"; +} + +function uniqueSignals(signals: Array): string[] { + return [...new Set(signals.filter((signal): signal is string => signal !== null))]; +} + +export function d601ForbiddenKubeSignals(text: string): string[] { + return uniqueSignals([ + /docker-desktop/iu.test(text) ? "docker-desktop" : null, + /desktop-control-plane/iu.test(text) ? "desktop-control-plane" : null, + /127\.0\.0\.1:11700/u.test(text) ? "127.0.0.1:11700" : null, + ]); +} + +export function classifyD601DefaultKubectlDiagnostic(input: { + currentContext: string | null; + apiServer: string | null; + combinedText: string; + commandsOk: boolean; +}): D601K3sDefaultKubectlDiagnostic { + const refusalSignals = d601ForbiddenKubeSignals(input.combinedText); + if (!input.commandsOk) { + return { + checked: true, + currentContext: input.currentContext, + apiServer: input.apiServer, + refusalSignals, + status: "unavailable", + summary: "Default kubectl diagnostic could not read context/server; this does not block the explicit D601 target.", + }; + } + if (refusalSignals.length > 0) { + return { + checked: true, + currentContext: input.currentContext, + apiServer: input.apiServer, + refusalSignals, + status: "stale-forbidden-default", + summary: "Default kubectl resolves to a forbidden local control-plane signal; explicit D601 KUBECONFIG remains the deploy target.", + }; + } + return { + checked: true, + currentContext: input.currentContext, + apiServer: input.apiServer, + refusalSignals, + status: "clean", + summary: "Default kubectl diagnostic did not show Docker Desktop control-plane signals.", + }; +} + +export function classifyD601K3sTarget( + observation: D601K3sTargetObservation, + defaultKubectlDiagnostic?: D601K3sDefaultKubectlDiagnostic, +): D601K3sGuardClassification { + const expectedKubeconfig = observation.expectedKubeconfig ?? d601NativeKubeconfig; + const refusalSignals = d601ForbiddenKubeSignals(observation.combinedText); + const requiredNodePresent = observation.nodeNames.includes(d601RequiredNodeName); + const wrongKubeconfig = observation.kubeconfig !== expectedKubeconfig; + const refusal = refusalSignals.length > 0; + const status: D601K3sGuardStatus = refusal + ? "refused" + : !observation.commandsOk || wrongKubeconfig || !requiredNodePresent + ? "blocked" + : "pass"; + const defaultStale = defaultKubectlDiagnostic?.status === "stale-forbidden-default"; + const summary = refusal + ? "Refusing D601 k3s operation because the explicit target kubeconfig resolved to a forbidden Docker Desktop control-plane signal." + : wrongKubeconfig + ? `D601 k3s guard blocked: expected explicit KUBECONFIG=${expectedKubeconfig}.` + : !observation.commandsOk + ? "D601 k3s guard blocked: explicit target kubeconfig could not read context, server, and nodes." + : !requiredNodePresent + ? `D601 k3s guard blocked: explicit target kubeconfig did not report node ${d601RequiredNodeName}.` + : defaultStale + ? "D601 native k3s guard passed with explicit KUBECONFIG; stale default kubectl context was observed only as a diagnostic." + : "D601 native k3s guard passed with explicit KUBECONFIG."; + return { + status, + refusal, + refusalSignals, + kubeconfig: observation.kubeconfig, + expectedKubeconfig, + currentContext: observation.currentContext, + apiServer: observation.apiServer, + nodeNames: observation.nodeNames, + nodeCount: observation.nodeNames.length, + requiredNodeName: d601RequiredNodeName, + requiredNodePresent, + commandsOk: observation.commandsOk, + ...(defaultKubectlDiagnostic === undefined ? {} : { defaultKubectlDiagnostic }), + summary, + }; +} + +function shellQuote(value: string): string { + return `'${value.replace(/'/gu, "'\\''")}'`; +} + +export function d601K3sGuardShellLines(kubeconfig = d601NativeKubeconfig, options: D601K3sGuardShellOptions = {}): string[] { + const passOutput = options.passOutput ?? "stdout"; + const passLine = passOutput === "quiet" + ? ":" + : passOutput === "stderr" + ? "printf 'd601_native_k3s_guard=pass kubeconfig=%s context=%s server=%s node=%s\\n' \"$required_kubeconfig\" \"$context\" \"$server\" \"$required_node\" >&2" + : "printf 'd601_native_k3s_guard=pass kubeconfig=%s context=%s server=%s node=%s\\n' \"$required_kubeconfig\" \"$context\" \"$server\" \"$required_node\""; + return [ + `export KUBECONFIG=${shellQuote(kubeconfig)}`, + "d601_k3s_guard() {", + ` required_kubeconfig=${shellQuote(kubeconfig)}`, + ` required_node=${shellQuote(d601RequiredNodeName)}`, + " if [ \"${KUBECONFIG:-}\" != \"$required_kubeconfig\" ]; then", + " printf 'd601_native_k3s_guard=blocked reason=wrong-kubeconfig expected=%s actual=%s\\n' \"$required_kubeconfig\" \"${KUBECONFIG:-}\" >&2", + " return 1", + " fi", + " if ! command -v kubectl >/dev/null 2>&1; then", + " echo 'd601_native_k3s_guard=blocked reason=kubectl-missing' >&2", + " return 1", + " fi", + " if ! context=$(kubectl config current-context 2>&1); then", + " printf 'd601_native_k3s_guard=blocked reason=context-read-failed detail=%s\\n' \"$context\" >&2", + " return 1", + " fi", + " if ! server=$(kubectl config view --minify -o 'jsonpath={.clusters[0].cluster.server}' 2>&1); then", + " printf 'd601_native_k3s_guard=blocked reason=server-read-failed detail=%s\\n' \"$server\" >&2", + " return 1", + " fi", + " if ! nodes=$(kubectl get nodes -o 'jsonpath={range .items[*]}{.metadata.name}{\"\\n\"}{end}' 2>&1); then", + " printf 'd601_native_k3s_guard=blocked reason=nodes-read-failed detail=%s\\n' \"$nodes\" >&2", + " return 1", + " fi", + " combined=$(printf '%s\\n%s\\n%s\\n' \"$context\" \"$server\" \"$nodes\")", + " if printf '%s\\n' \"$combined\" | grep -Eiq 'docker-desktop|desktop-control-plane|127\\.0\\.0\\.1:11700'; then", + " printf 'd601_native_k3s_guard=refused reason=forbidden-control-plane context=%s server=%s nodes=%s\\n' \"$context\" \"$server\" \"$(printf '%s' \"$nodes\" | tr '\\n' ',')\" >&2", + " return 1", + " fi", + " if ! printf '%s\\n' \"$nodes\" | grep -Fx \"$required_node\" >/dev/null; then", + " printf 'd601_native_k3s_guard=blocked reason=missing-d601-node context=%s server=%s nodes=%s\\n' \"$context\" \"$server\" \"$(printf '%s' \"$nodes\" | tr '\\n' ',')\" >&2", + " return 1", + " fi", + ` ${passLine}`, + "}", + "d601_k3s_guard", + ]; +} diff --git a/scripts/src/deploy.ts b/scripts/src/deploy.ts index 14d79203..32104bc5 100644 --- a/scripts/src/deploy.ts +++ b/scripts/src/deploy.ts @@ -9,6 +9,7 @@ import { baiduNetdiskRuntimeSecretRequirements, runtimeSecretContractFromEnvText import { startJob } from "./jobs"; import { coreInternalFetch } from "./microservices"; import { codeQueueSourceImportPreflight, codeQueueSourceSubdir } from "./code-queue-source-guard"; +import { d601K3sGuardShellLines, d601NativeKubeconfig } from "./d601-k3s-guard"; import { compareDeployJsonExecutorMirrors, deployJsonCommitImage, @@ -138,7 +139,7 @@ const providerDispatchCompletionLagMs = 45_000; const pollIntervalMs = 5_000; const remoteDeployRoot = "/home/ubuntu/.unidesk/deploy"; const k8sNamespace = "unidesk"; -const k8sKubeconfig = "/etc/rancher/k3s/k3s.yaml"; +const k8sKubeconfig = d601NativeKubeconfig; // Production k3s hostPath repo. Code Queue production Pods mount this path as /app and /root/unidesk, // so deploy guards must validate this tree rather than config.json development.worktreePath. const k3sProductionHostPathRepoDir = "/home/ubuntu/cq-deploy"; @@ -260,6 +261,10 @@ function shellQuote(value: string): string { return `'${value.replace(/'/gu, `'\\''`)}'`; } +function d601K3sGuardScript(): string { + return d601K3sGuardShellLines(k8sKubeconfig).join("\n"); +} + function compactTail(text: string, maxChars = 1600): string { return text.length > maxChars ? text.slice(text.length - maxChars) : text; } @@ -1518,10 +1523,11 @@ function syncDevFrontendAuthScript(config: UniDeskConfig): string { }; return [ "set -euo pipefail", + d601K3sGuardScript(), `secret_patch=${shellQuote(JSON.stringify({ data }))}`, `config_patch=${shellQuote(JSON.stringify({ data: runtimeConfig }))}`, - `KUBECONFIG=${shellQuote(k8sKubeconfig)} kubectl -n unidesk-dev patch secret unidesk-dev-runtime-secrets --type merge -p "$secret_patch"`, - `KUBECONFIG=${shellQuote(k8sKubeconfig)} kubectl -n unidesk-dev patch configmap unidesk-dev-runtime-config --type merge -p "$config_patch"`, + `kubectl -n unidesk-dev patch secret unidesk-dev-runtime-secrets --type merge -p "$secret_patch"`, + `kubectl -n unidesk-dev patch configmap unidesk-dev-runtime-config --type merge -p "$config_patch"`, "echo dev_frontend_auth_synced=ok", ].join("\n"); } @@ -2124,6 +2130,7 @@ function ensureNativeK3sScript(): string { ` if KUBECONFIG=${shellQuote(k8sKubeconfig)} kubectl get nodes >/dev/null 2>&1; then break; fi`, " sleep 2", "done", + d601K3sGuardScript(), `KUBECONFIG=${shellQuote(k8sKubeconfig)} kubectl get nodes -l unidesk.ai/node-id=D601 --no-headers | grep -q .`, `KUBECONFIG=${shellQuote(k8sKubeconfig)} kubectl wait --for=condition=Ready node -l unidesk.ai/node-id=D601 --timeout=180s`, "install_system_images_from_legacy_k3s", @@ -2248,8 +2255,10 @@ function applyK8sScript(service: UniDeskMicroserviceConfig): string { ].join("\n") : ""; return [ + "set -euo pipefail", + d601K3sGuardScript(), cleanup, - `KUBECONFIG=${shellQuote(k8sKubeconfig)} kubectl apply -f ${shellQuote(manifest)}`, + `kubectl apply -f ${shellQuote(manifest)}`, ].filter(Boolean).join("\n"); } diff --git a/scripts/src/dev-env.ts b/scripts/src/dev-env.ts index 0711d50c..38518c88 100644 --- a/scripts/src/dev-env.ts +++ b/scripts/src/dev-env.ts @@ -1,6 +1,7 @@ import { readFileSync } from "node:fs"; import { runCommand } from "./command"; import { repoRoot, rootPath } from "./config"; +import { d601K3sGuardShellLines, d601NativeKubeconfig } from "./d601-k3s-guard"; import { startJob } from "./jobs"; const defaultManifest = "src/components/microservices/k3sctl-adapter/k3s/dev/unidesk-dev-foundation.k8s.yaml"; @@ -216,7 +217,26 @@ function validateDatabaseUrl(url: string): { ok: boolean; url: string; reason: s } function kubectlDryRun(manifestPath: string): unknown { - const kubeconfig = "/etc/rancher/k3s/k3s.yaml"; + const kubeconfig = d601NativeKubeconfig; + const guardScript = d601K3sGuardShellLines(kubeconfig).join("\n"); + const guard = runCommand(["sh", "-lc", guardScript], repoRoot, { + timeoutMs: 60_000, + env: { ...process.env, KUBECONFIG: kubeconfig }, + }); + const guarded = guard.exitCode === 0; + if (!guarded) { + return { + command: ["sh", "-lc", "d601 native k3s guard"], + kubeconfig, + exitCode: guard.exitCode, + signal: guard.signal, + timedOut: guard.timedOut, + ok: false, + guard: "d601-native-k3s", + stdoutTail: guard.stdout.slice(-4000), + stderrTail: guard.stderr.slice(-4000), + }; + } const result = runCommand(["kubectl", "apply", "--dry-run=client", "--validate=false", "-f", manifestPath], repoRoot, { timeoutMs: 60_000, env: { ...process.env, KUBECONFIG: kubeconfig }, @@ -262,15 +282,13 @@ function prewarmImagesScript(options: PrewarmImagesOptions): string { `proxy_url=${shellQuote(options.proxyUrl)}`, `pull_missing=${options.pullMissing ? "1" : "0"}`, `pull_timeout_seconds=${pullTimeoutSeconds}`, - "kubeconfig=/etc/rancher/k3s/k3s.yaml", "ctr_address=/run/k3s/containerd/containerd.sock", + ...d601K3sGuardShellLines(), "export DOCKER_CONFIG=/tmp/unidesk-dev-env-docker-config", "mkdir -p \"$DOCKER_CONFIG\"", "printf '{}\\n' > \"$DOCKER_CONFIG/config.json\"", - "printf 'dev_env_k3s_context='", - "KUBECONFIG=\"$kubeconfig\" kubectl config current-context", "printf 'dev_env_k3s_nodes='", - "KUBECONFIG=\"$kubeconfig\" kubectl get nodes -o name | tr '\\n' ' '", + "kubectl get nodes -o name | tr '\\n' ' '", "printf '\\n'", "for image in \"${images[@]}\"; do", " if docker image inspect \"$image\" >/dev/null 2>&1; then", diff --git a/scripts/src/hwlab-cd.ts b/scripts/src/hwlab-cd.ts index a2298dbc..ec865fba 100644 --- a/scripts/src/hwlab-cd.ts +++ b/scripts/src/hwlab-cd.ts @@ -4,6 +4,11 @@ import { createWriteStream, existsSync, mkdirSync, openSync, readSync, statSync, import { mkdir, writeFile } from "node:fs/promises"; import { join, resolve } from "node:path"; import { repoRoot, rootPath } from "./config"; +import { + classifyD601DefaultKubectlDiagnostic, + classifyD601K3sTarget, + d601NativeKubeconfig, +} from "./d601-k3s-guard"; type HwlabCdAction = "status" | "apply"; type HwlabCdEnvironment = "dev"; @@ -59,7 +64,7 @@ interface CommandView { const namespace = "hwlab-dev"; const lockName = "hwlab-dev-cd-lock"; -const nativeKubeconfig = "/etc/rancher/k3s/k3s.yaml"; +const nativeKubeconfig = d601NativeKubeconfig; const defaultFrontendLiveUrl = "http://74.48.78.17:16666/health/live"; const defaultApiLiveUrl = "http://74.48.78.17:16667/health/live"; const parseCaptureLimitBytes = 4 * 1024 * 1024; @@ -347,38 +352,36 @@ async function gitSummary(repoPath: string, dumpDir: string, timeoutMs: number): async function nativeK3sGuard(kubeconfig: string, dumpDir: string, timeoutMs: number): Promise> { const env = { ...process.env, KUBECONFIG: kubeconfig }; - const [context, server, nodes] = await Promise.all([ + const [context, server, nodes, defaultContext, defaultServer, defaultNodes] = await Promise.all([ runCaptured(["kubectl", "config", "current-context"], repoRoot, dumpDir, "k3s-current-context", { env, timeoutMs }), runCaptured(["kubectl", "config", "view", "--minify", "-o", "jsonpath={.clusters[0].cluster.server}"], repoRoot, dumpDir, "k3s-server", { env, timeoutMs }), runCaptured(["kubectl", "get", "nodes", "-o", "jsonpath={range .items[*]}{.metadata.name}{\"\\n\"}{end}"], repoRoot, dumpDir, "k3s-nodes", { env, timeoutMs }), + runCaptured(["kubectl", "config", "current-context"], repoRoot, dumpDir, "default-kubectl-current-context", { timeoutMs }), + runCaptured(["kubectl", "config", "view", "--minify", "-o", "jsonpath={.clusters[0].cluster.server}"], repoRoot, dumpDir, "default-kubectl-server", { timeoutMs }), + runCaptured(["kubectl", "get", "nodes", "-o", "jsonpath={range .items[*]}{.metadata.name}{\"\\n\"}{end}"], repoRoot, dumpDir, "default-kubectl-nodes", { timeoutMs }), ]); const contextText = context.stdoutText.trim(); const serverText = server.stdoutText.trim(); const nodeNames = nodes.stdoutText.split("\n").map((line) => line.trim()).filter((line) => line.length > 0); - const combined = `${context.stdoutText}\n${context.stderrText}\n${server.stdoutText}\n${server.stderrText}\n${nodes.stdoutText}\n${nodes.stderrText}`; - const refusalSignals = [ - /docker-desktop/iu.test(combined) ? "docker-desktop" : null, - /desktop-control-plane/iu.test(combined) ? "desktop-control-plane" : null, - /127\.0\.0\.1:11700/u.test(combined) ? "127.0.0.1:11700" : null, - ].filter((signal): signal is string => signal !== null); - const refusal = refusalSignals.length > 0; - const readable = context.ok && server.ok && nodes.ok; - return { - status: refusal ? "refused" : readable ? "pass" : "blocked", - refusal, - refusalSignals, + const defaultDiagnostic = classifyD601DefaultKubectlDiagnostic({ + currentContext: defaultContext.stdoutText.trim() || null, + apiServer: defaultServer.stdoutText.trim() || null, + combinedText: `${defaultContext.stdoutText}\n${defaultContext.stderrText}\n${defaultServer.stdoutText}\n${defaultServer.stderrText}\n${defaultNodes.stdoutText}\n${defaultNodes.stderrText}`, + commandsOk: defaultContext.ok && defaultServer.ok && defaultNodes.ok, + }); + const guard = classifyD601K3sTarget({ kubeconfig, - injectedEnv: { KUBECONFIG: kubeconfig }, + expectedKubeconfig: nativeKubeconfig, currentContext: contextText || null, apiServer: serverText || null, nodeNames, - nodeCount: nodeNames.length, - summary: refusal - ? "Refusing HWLAB CD because kubectl resolved to a Docker Desktop control plane signal." - : readable - ? "D601 native k3s guard passed with explicit KUBECONFIG." - : "D601 native k3s guard could not fully read context, server, and nodes.", - commands: [context, server, nodes].map(commandView), + commandsOk: context.ok && server.ok && nodes.ok, + combinedText: `${context.stdoutText}\n${context.stderrText}\n${server.stdoutText}\n${server.stderrText}\n${nodes.stdoutText}\n${nodes.stderrText}`, + }, defaultDiagnostic); + return { + ...guard, + injectedEnv: { KUBECONFIG: kubeconfig }, + commands: [context, server, nodes, defaultContext, defaultServer, defaultNodes].map(commandView), }; }