From db3092ad297321fc7c9c3906cffb002fb037ab1e Mon Sep 17 00:00:00 2001 From: Codex Date: Tue, 2 Jun 2026 10:22:52 +0000 Subject: [PATCH] feat: add HWLAB v02 secret bootstrap command --- docs/reference/cli.md | 3 +- scripts/src/hwlab-g14.ts | 169 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 169 insertions(+), 3 deletions(-) diff --git a/docs/reference/cli.md b/docs/reference/cli.md index 19bfeae9..3082bb09 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -8,7 +8,7 @@ CLI 可以从 `master` 快速演进,但必须兼容 `deploy.json` 固定的 CI CI/CD、GitOps、rollout、artifact 发布、PR 合并后的 DEV/PROD 滚动、PipelineRun 重跑/清理、Argo refresh 和运行面 retention 都必须由 UniDesk CLI 的高层子命令控制。稳定入口包括 `gh pr ...`、`hwlab g14 monitor-prs`、`agentrun v01 control-plane ...`、`deploy check|plan|apply`、`ci install|status|run|publish-*|logs`、`artifact-registry ...`、`server rebuild ...`、`dev-env ...` 和后续为特定运行面补充的同级命令。原生 `kubectl`、`argo`、`tkn`、`gh`、`curl` 或临时 shell 可以作为实现细节存在于 CLI 内部,但不能作为人工或 runner 的正式控制面。 -`ssh`/`tran kubectl|logs|get|describe` 仍是 CLI 介导的低层诊断底座,用于短查询、日志尾部、只读证据和一次性故障定位。它不应承载可重复的 CI/CD 写操作:创建/删除 PipelineRun、patch Pipeline/CronJob/RBAC、annotate Argo Application、触发/回滚 rollout、修改 retention 策略或清理运行面资源,都应该先落成 `bun scripts/cli.ts ...` 子命令,再由该子命令输出结构化 dry-run、执行摘要、保护对象、后续观察命令和失败分类。 +`ssh`/`tran kubectl|logs|get|describe` 仍是 CLI 介导的低层诊断底座,用于短查询、日志尾部、只读证据和一次性故障定位。它不应承载可重复的 CI/CD 写操作:创建/删除 PipelineRun、patch Pipeline/CronJob/RBAC、annotate Argo Application、触发/回滚 rollout、修改 retention 策略、确保 SecretRef 或清理运行面资源,都应该先落成 `bun scripts/cli.ts ...` 子命令,再由该子命令输出结构化 dry-run、执行摘要、保护对象、后续观察命令和失败分类。 当现有 CLI 对某个 CI/CD 操作缺字段、缺动作、缺状态或缺权限时,处理顺序是先补 CLI,再执行发布或治理动作。临时低层 route 写操作只允许用于一次性止血,并且必须随后把稳定能力补进 CLI 与本参考文档;不能把手工 `kubectl apply/delete/annotate`、原生 GitHub CLI、手写 REST 请求或 registry shell 脚本沉淀成长期流程。长时观察仍遵守 60 秒短查询和 submit-and-poll 语义,不用单个 `tran` 等待完整 PipelineRun 或 Argo rollout 结束。 @@ -50,6 +50,7 @@ CI/CD、GitOps、rollout、artifact 发布、PR 合并后的 DEV/PROD 滚动、P 创建 PipelineRun 前会读取 `devops-infra` mirror refs,若 `localV02` 未等于当前 source commit,则自动执行一次受控 manual `git-mirror sync` Job 并复核 ref,复核失败时停止触发,避免 Tekton `prepare-source` 已知失败;services 参数只包含 v02 runtime service matrix,`hwlab-cli` 是固定 repo 短连接源码工具,不进入 PipelineRun service build。 `--dry-run` 只报告是否会 pre-sync,不创建 Job;confirmed trigger 默认创建 `.state/jobs/` 异步 job 并立刻返回 `job.id`、`statusCommand`、stdout/stderr 路径,避免 git mirror pre-sync 或 PipelineRun 创建期间长时间阻塞;`--wait` 路径也必须向 stderr 输出 `hwlab.v02.trigger.progress` JSON 事件,覆盖 `control-plane-refresh`、`git-mirror-pre-sync`、`delete-existing-pipelinerun` 和 `create-pipelinerun`,避免异步 job 长时间只有启动命令而无法判断卡点;默认 JSON 必须对 `manifest_b64`、长脚本和远端 stdout/stderr 做有界摘要,保留长度与 hash,最终 trigger 结果只返回阶段摘要和关键 tail,完整内容通过 job stdout/stderr 文件渐进披露;只有现场同步调试才显式加 `--wait`;旧 `rerun-current` 只作为输入别名保留。PipelineRun `Completed`、Argo `Synced/Healthy` 和 `webAssets.ok=true` 只证明 G14 runtime 已更新;交付收口还必须用 `hwlab g14 git-mirror status` 查看 `cache.summary.pendingFlush`,若为 true,继续执行受控 `hwlab g14 git-mirror flush --confirm` 并用 job status 轮询到 `pendingFlush=false`。 - `hwlab g14 control-plane runtime-migration --lane v02 [--dry-run|--allow-live-db-read --dry-run|--confirm]` 只通过 `hwlab-v02` namespace 当前 `deployment/hwlab-cloud-api -c hwlab-cloud-api` 内 repo-owned migration CLI 执行;不读取或打印 Secret 值、不触碰 PROD、不绕到手工 `psql`。 +- `hwlab g14 secret status|ensure --lane v02 --name hwlab-v02-device-pod-api-key --key api-key [--dry-run|--confirm]` 是 HWLAB v0.2 runtime SecretRef bootstrap 的标准入口,用于确保 `deploy/deploy.json` 中 `HWLAB_DEVICE_POD_API_KEY=secretRef:hwlab-v02-device-pod-api-key/api-key` 对应的 Kubernetes Secret 存在。`status` 只返回 secret/key 是否存在和解码后的字节数;`ensure --dry-run` 只报告会创建还是保持;`ensure --confirm` 在 G14 k3s 侧生成随机值并 server-side apply Secret。该命令永远不读取、不打印、不回传 secret 明文,也不提供手工值注入、fallback session token 或临时 lease 路径。 - `hwlab g14 control-plane cleanup-runs --lane v02|g14|all [--min-age-minutes N] [--limit N] [--dry-run|--confirm]` 是完成态 PipelineRun 工作区 retention 入口;真实清理只删除已完成 PipelineRun,让 Tekton/local-path 回收临时 PVC,不触碰 registry storage、业务 PVC、Secret、runtime workload 或 GitOps desired state。 - `hwlab g14 control-plane cleanup-released-pvs --lane all [--limit N] [--dry-run|--confirm]` 是 local-path 未自动回收后的补充 retention 入口;只列并删除 `Released`、`local-path`、`Delete`、`claimNamespace=hwlab-ci` 且 claim 名称形如 Tekton 临时 `pvc-*` 的 PV。 - `hwlab g14 git-mirror status|apply|sync|flush [--dry-run|--confirm]` 是 `devops-infra` git mirror/relay 的受控维护入口:`apply` 渲染并 server-side apply `devops-infra/git-mirror.yaml`,同时删除遗留 `git-mirror-hwlab-sync` CronJob;`sync` 创建一次性 manual Job,把 GitHub allowlist refs 拉入本地 mirror;`flush` 创建一次性 manual Job,把本地 `v0.2-gitops` 快进推回 GitHub。 diff --git a/scripts/src/hwlab-g14.ts b/scripts/src/hwlab-g14.ts index 4c0308aa..a8730264 100644 --- a/scripts/src/hwlab-g14.ts +++ b/scripts/src/hwlab-g14.ts @@ -22,12 +22,16 @@ const V02_POLLER = "hwlab-v02-branch-poller"; const V02_RECONCILER = "hwlab-v02-control-plane-reconciler"; const V02_PIPELINERUN_PREFIX = "hwlab-v02-ci-poll"; const V02_CONTROL_PLANE_FIELD_MANAGER = "unidesk-hwlab-v02-control-plane"; +const V02_SECRET_FIELD_MANAGER = "unidesk-hwlab-v02-secret"; const V02_GIT_URL = "git@github.com:pikasTech/HWLAB.git"; const V02_GIT_READ_URL = "http://git-mirror-http.devops-infra.svc.cluster.local/pikasTech/HWLAB.git"; const V02_GIT_WRITE_URL = "http://git-mirror-write.devops-infra.svc.cluster.local/pikasTech/HWLAB.git"; const V02_GITOPS_BRANCH = "v0.2-gitops"; const V02_CATALOG_PATH = "deploy/artifact-catalog.v02.json"; const V02_RUNTIME_PATH = "deploy/gitops/g14/runtime-v02"; +const V02_RUNTIME_NAMESPACE = "hwlab-v02"; +const V02_DEVICE_POD_API_KEY_SECRET = "hwlab-v02-device-pod-api-key"; +const V02_DEVICE_POD_API_KEY_SECRET_KEY = "api-key"; const V02_REGISTRY_PREFIX = "127.0.0.1:5000/hwlab"; const V02_BASE_IMAGE = "127.0.0.1:5000/hwlab/hwlab-node20-base:20-bookworm-slim"; const GIT_MIRROR_NAMESPACE = "devops-infra"; @@ -121,6 +125,16 @@ interface G14GitMirrorOptions { timeoutSeconds: number; } +interface G14SecretOptions { + action: "status" | "ensure"; + lane: "v02"; + dryRun: boolean; + confirm: boolean; + name: typeof V02_DEVICE_POD_API_KEY_SECRET; + key: typeof V02_DEVICE_POD_API_KEY_SECRET_KEY; + timeoutSeconds: number; +} + interface CommandJsonResult { ok: boolean; command: string[]; @@ -319,6 +333,31 @@ function parseGitMirrorOptions(args: string[]): G14GitMirrorOptions { }; } +function parseSecretOptions(args: string[]): G14SecretOptions { + const [actionRaw] = args; + if (actionRaw !== "status" && actionRaw !== "ensure") { + throw new Error("secret usage: status|ensure --lane v02 --name hwlab-v02-device-pod-api-key --key api-key [--dry-run|--confirm]"); + } + const lane = optionValue(args, "--lane") ?? "v02"; + if (lane !== "v02") throw new Error("secret currently supports --lane v02"); + const name = optionValue(args, "--name") ?? V02_DEVICE_POD_API_KEY_SECRET; + if (name !== V02_DEVICE_POD_API_KEY_SECRET) throw new Error(`secret currently supports --name ${V02_DEVICE_POD_API_KEY_SECRET}`); + const key = optionValue(args, "--key") ?? V02_DEVICE_POD_API_KEY_SECRET_KEY; + if (key !== V02_DEVICE_POD_API_KEY_SECRET_KEY) throw new Error(`secret currently supports --key ${V02_DEVICE_POD_API_KEY_SECRET_KEY}`); + const confirm = args.includes("--confirm"); + const explicitDryRun = args.includes("--dry-run"); + if (confirm && explicitDryRun) throw new Error("secret accepts only one of --confirm or --dry-run"); + return { + action: actionRaw, + lane, + confirm, + dryRun: actionRaw === "status" ? true : explicitDryRun || !confirm, + name, + key, + timeoutSeconds: positiveIntegerOption(args, "--timeout-seconds", 120, 600), + }; +} + function positiveIntegerOption(args: string[], name: string, defaultValue: number, maxValue: number): number { const index = args.indexOf(name); if (index === -1) return defaultValue; @@ -2302,6 +2341,125 @@ function runV02ControlPlane(options: G14ControlPlaneOptions): Record/dev/null 2>&1 && printf yes || printf no; }", + "secret_b64() { kubectl -n \"$namespace\" get secret \"$name\" -o \"go-template={{ index .data \\\"$key\\\" }}\" 2>/dev/null || true; }", + "decoded_length() { if [ -n \"$1\" ]; then printf '%s' \"$1\" | base64 -d 2>/dev/null | wc -c | tr -d ' '; else printf '0'; fi; }", + "before_exists=$(secret_exists_flag)", + "before_b64=$(secret_b64)", + "before_key_present=$([ -n \"$before_b64\" ] && printf yes || printf no)", + "before_value_bytes=$(decoded_length \"$before_b64\")", + "action=observed", + "mutation=false", + "apply_exit=", + "if [ \"$action_request\" = ensure ]; then", + " if [ \"$dry_run\" = true ]; then", + " if [ \"$before_key_present\" = yes ] && [ \"$before_value_bytes\" -gt 0 ]; then action=kept; else action=would-create; fi", + " elif [ \"$before_key_present\" = yes ] && [ \"$before_value_bytes\" -gt 0 ]; then", + " action=kept", + " else", + " if ! command -v openssl >/dev/null 2>&1; then", + " action=openssl-missing", + " apply_exit=127", + " else", + " generated_key=$(openssl rand -base64 48)", + " kubectl -n \"$namespace\" create secret generic \"$name\" --from-literal=\"$key=$generated_key\" --dry-run=client -o yaml | kubectl apply --server-side --force-conflicts --field-manager=\"$field_manager\" -f -", + " apply_exit=$?", + " generated_key=", + " if [ \"$apply_exit\" -eq 0 ]; then action=ensured; mutation=true; else action=apply-failed; fi", + " fi", + " fi", + "fi", + emitAfterStatus, + "if [ -n \"$apply_exit\" ] && [ \"$apply_exit\" != 0 ]; then exit \"$apply_exit\"; fi", + ].join("\n"); +} + +function v02SecretStatusFromText(text: string, commandOk: boolean, exitCode: number | null, stderr: string): Record { + const fields = keyValueLinesFromText(text); + const afterExists = fields.afterExists === "yes"; + const afterKeyPresent = fields.afterKeyPresent === "yes"; + const afterValueBytes = numericField(fields.afterValueBytes); + const healthy = afterExists && afterKeyPresent && typeof afterValueBytes === "number" && afterValueBytes > 0; + return { + ok: commandOk && healthy, + namespace: fields.namespace || V02_RUNTIME_NAMESPACE, + secret: fields.secret || V02_DEVICE_POD_API_KEY_SECRET, + key: fields.key || V02_DEVICE_POD_API_KEY_SECRET_KEY, + action: fields.action || null, + dryRun: fields.dryRun === "true", + mutation: fields.mutation === "true", + before: { + exists: fields.beforeExists === "yes", + keyPresent: fields.beforeKeyPresent === "yes", + valueBytes: numericField(fields.beforeValueBytes), + }, + after: { + exists: afterExists, + keyPresent: afterKeyPresent, + valueBytes: afterValueBytes, + }, + applyExitCode: numericField(fields.applyExitCode), + exitCode, + stderr: commandOk ? "" : stderr.trim().slice(0, 2000), + valuesRedacted: true, + summary: healthy + ? `${fields.secret || V02_DEVICE_POD_API_KEY_SECRET}/${fields.key || V02_DEVICE_POD_API_KEY_SECRET_KEY} exists` + : `${fields.secret || V02_DEVICE_POD_API_KEY_SECRET}/${fields.key || V02_DEVICE_POD_API_KEY_SECRET_KEY} missing or empty`, + }; +} + +function runG14Secret(options: G14SecretOptions): Record { + const script = v02SecretScript(options); + const result = g14K3s(["script", "--", script], options.timeoutSeconds * 1000); + const status = v02SecretStatusFromText(statusText(result), isCommandSuccess(result), result.exitCode, result.stderr); + const dryRunOk = options.action === "ensure" && options.dryRun && isCommandSuccess(result); + const ok = dryRunOk ? true : status.ok === true; + return { + ok, + command: `hwlab g14 secret ${options.action} --lane v02`, + lane: options.lane, + namespace: V02_RUNTIME_NAMESPACE, + secret: options.name, + key: options.key, + mode: options.action === "status" ? "status" : options.dryRun ? "dry-run" : "confirmed-ensure", + status, + mutation: status.mutation === true, + result: compactCommandResult(result), + valuesRedacted: true, + next: ok && options.action === "status" + ? undefined + : { ensure: `bun scripts/cli.ts hwlab g14 secret ensure --lane v02 --name ${options.name} --key ${options.key} --confirm` }, + }; +} + function deleteLegacyGitMirrorCronJob(dryRun: boolean): CommandJsonResult { return g14K3s([ "kubectl", @@ -3800,6 +3958,9 @@ export function hwlabG14Help(): Record { "bun scripts/cli.ts hwlab g14 control-plane runtime-migration --lane v02 --dry-run", "bun scripts/cli.ts hwlab g14 control-plane runtime-migration --lane v02 --allow-live-db-read --dry-run", "bun scripts/cli.ts hwlab g14 control-plane runtime-migration --lane v02 --confirm", + "bun scripts/cli.ts hwlab g14 secret status --lane v02 --name hwlab-v02-device-pod-api-key --key api-key", + "bun scripts/cli.ts hwlab g14 secret ensure --lane v02 --name hwlab-v02-device-pod-api-key --key api-key --dry-run", + "bun scripts/cli.ts hwlab g14 secret ensure --lane v02 --name hwlab-v02-device-pod-api-key --key api-key --confirm", "bun scripts/cli.ts hwlab g14 git-mirror status", "bun scripts/cli.ts hwlab g14 git-mirror apply --confirm", "bun scripts/cli.ts hwlab g14 git-mirror sync --confirm", @@ -3810,7 +3971,7 @@ export function hwlabG14Help(): Record { "bun scripts/cli.ts hwlab g14 tools-image build --name ci-node-tools --tag node22-alpine-bun-v1 --confirm", "bun scripts/cli.ts job status --tail-bytes 30000", ], - description: "G14 HWLAB PR monitor, DEV rollout command, bounded v0.2 control-plane bootstrap/cleanup/runtime-migration helper, devops-infra git mirror maintenance, and controlled CI tools image build/status entry. The public monitor starts a fire-and-forget job; confirmed control-plane trigger-current and git-mirror sync/flush also return async jobs by default, with --wait reserved for explicit synchronous debugging. control-plane status/apply/cleanup-runs/cleanup-released-pvs/runtime-migration uses UniDesk G14:k3s routes for v0.2 Tekton/Argo control resources, runtime migration, and completed CI workspace retention only. git-mirror status/apply/sync/flush is the manual devops-infra mirror/relay control path and does not install a CronJob.", + description: "G14 HWLAB PR monitor, DEV rollout command, bounded v0.2 control-plane bootstrap/cleanup/runtime-migration helper, v0.2 runtime SecretRef bootstrap, devops-infra git mirror maintenance, and controlled CI tools image build/status entry. The public monitor starts a fire-and-forget job; confirmed control-plane trigger-current and git-mirror sync/flush also return async jobs by default, with --wait reserved for explicit synchronous debugging. control-plane status/apply/cleanup-runs/cleanup-released-pvs/runtime-migration uses UniDesk G14:k3s routes for v0.2 Tekton/Argo control resources, runtime migration, and completed CI workspace retention only. secret status/ensure is the standard v0.2 runtime SecretRef bootstrap path; it never reads or prints secret values. git-mirror status/apply/sync/flush is the manual devops-infra mirror/relay control path and does not install a CronJob.", defaults: { repo: HWLAB_REPO, base: G14_SOURCE_BRANCH, @@ -3846,6 +4007,10 @@ export async function runHwlabG14Command(_config: Config, args: string[]): Promi } return runV02ControlPlane(options); } + if (action === "secret") { + const options = parseSecretOptions(args.slice(1)); + return runG14Secret(options); + } if (action === "tools-image") { const options = parseToolsImageOptions(args.slice(1)); return runG14ToolsImage(options); @@ -3858,7 +4023,7 @@ export async function runHwlabG14Command(_config: Config, args: string[]): Promi return runG14GitMirror(options); } if (action !== "monitor-prs") { - return { ok: false, command: `hwlab g14 ${action ?? ""}`.trim(), degradedReason: "unsupported-command", message: "supported commands: hwlab g14 monitor-prs, hwlab g14 record-rollout, hwlab g14 control-plane, hwlab g14 git-mirror, hwlab g14 tools-image" }; + return { ok: false, command: `hwlab g14 ${action ?? ""}`.trim(), degradedReason: "unsupported-command", message: "supported commands: hwlab g14 monitor-prs, hwlab g14 record-rollout, hwlab g14 control-plane, hwlab g14 secret, hwlab g14 git-mirror, hwlab g14 tools-image" }; } const options = parseOptions(args.slice(1)); if (options.worker) return runMonitorWorker(options);