From 003b0ae7fc4ca48c8e4903076fb5f9c9110d718e Mon Sep 17 00:00:00 2001 From: Codex Date: Sat, 30 May 2026 21:45:42 +0000 Subject: [PATCH] fix: make hwlab g14 long actions observable --- docs/reference/cli.md | 7 +++- scripts/src/help.ts | 2 +- scripts/src/hwlab-g14.ts | 85 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 90 insertions(+), 4 deletions(-) diff --git a/docs/reference/cli.md b/docs/reference/cli.md index 4f1de3d4..7771b54f 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -44,11 +44,14 @@ CI/CD、GitOps、rollout、artifact 发布、PR 合并后的 DEV/PROD 滚动、P - `commander contract|plan --dry-run|smoke --dry-run|approval request --dry-run|prompt-lint --kind gpt55-pr` 是 host Codex 指挥官直管微服务 skeleton 入口。当前命令返回 `phase=source-contract`、service/API/state/bridge/prompt/trace/#20/#46/ClaudeQQ 审批边界、.state/commander/ 状态模型、dev 无 daemon smoke contract、dry-run 计划和 GPT-5.5 PR prompt 边界辅助 lint,不接 live bridge、不注入 prompt、不发送 ClaudeQQ。`approval request --dry-run` 会生成 200 字以内中文纯文本 ClaudeQQ 审批草案、`notification-path-unavailable` blocker 和授权后唯一可用的 `bun scripts/cli.ts microservice proxy claudeqq /api/push/text --method POST --body-json '' --raw` 命令;不得提示使用本机 ClaudeQQ skill、powershell 或本地 server。`prompt-lint` 支持 `--prompt-file` 与 `--stdin`,输出 `ok`、`missingClauses`、`riskLevel`、`suggestedPatchSnippet` 且不回显完整 prompt;它是 commander 辅助检查,不是业务 PR 门禁,也不改变 `codex submit` 默认行为。`plan`、`smoke` 与 `approval request` 必须带 `--dry-run`;缺少时返回 `error=dry-run-required`。长期规则见 `docs/reference/host-codex-commander.md`。 - `hwlab g14 monitor-prs [--once] [--dry-run] [--interval-seconds N] [--max-cycles N] [--timeout-seconds N]` 是当前 HWLAB G14 PR -> CI/CD -> DEV rollout 的一行式入口。普通调用创建 `.state/jobs/` 异步 job 并立刻返回 `job.id`、`statusCommand` 和 stdout/stderr 路径;后台 worker 每轮通过 UniDesk `gh pr list/preflight/merge` 监控 `pikasTech/HWLAB` base=`G14` 的 open PR,ready 时合并,然后通过 UniDesk `ssh G14:k3s` 观察 `hwlab-g14-ci-poll-`、Argo `hwlab-g14-dev` 和 DEV `/health/live`,直到 DEV `Synced/Healthy` 且 Deployment/StatefulSet ready;历史 `Completed` smoke/debug pod 不作为 rollout blocker。每次成功 DEV rollout 后,worker 会定位或创建 #7“指挥简报索引”中的北京日期每日简报 issue,并追加 CI/CD 耗时、CI/CD 关键指标、语义化上线 changelog、自动 diff 摘要、PipelineRun、GitOps revision 和 DEV 验证摘要;关键指标来自 G14 Tekton TaskRun results,固定包含 `lazy build reused: x/y`、reused services、rebuild services 和每个 service 的独立耗时/状态/backend,用于观察 lazy build 机制效果。语义化 changelog 优先从 PR body 的 `## 修改`/`## 变更`/`## Changelog` 等段落提取,diff 摘要只作为文件和统计证据保留,不替代 changelog。也可用 `hwlab g14 record-rollout --pr --source-commit ` 手动补记,手动补记同样会按 PipelineRun 采集 TaskRun 指标。状态指针按用途分离:长期监控只写 `.state/hwlab-g14/latest-monitor-job.json`,`--once` 写 `latest-once-job.json`,`--dry-run` 写 `latest-dry-run-job.json`,`--once --dry-run` 写 `latest-once-dry-run-job.json`,避免一次性收口覆盖持续监控入口。`--once --dry-run` 只做单轮监控和 merge plan,不写 GitHub、不等待 rollout。该命令禁止使用原生 `gh` 或手拼 GitHub 请求;如果 UniDesk `gh` 子命令字段或行为不够,必须先改进 `scripts/src/gh.ts` 后再使用。 - `hwlab g14 control-plane status|apply --lane v02 [--dry-run|--confirm]` 是 HWLAB `v0.2` 加法 lane 的受控 Tekton/Argo 控制面维护入口,只面向 G14 `/root/hwlab-v02`、branch `v0.2`、namespace `hwlab-ci` 和 Argo application `hwlab-g14-v02`;`status` 只读汇总 pipeline、RBAC/ServiceAccount、Argo、当前 commit PipelineRun 和遗留 v02 CronJob 清理状态;`apply` 先在 G14 workspace 快进并执行 render check,再经 `G14:k3s` server-side apply `tekton-v02/rbac.yaml`、`pipeline.yaml`、`argocd/project.yaml` 和 `argocd/application-v02.yaml`,confirmed apply 会删除遗留 v02 CronJob,但不会应用 runtime-v02 workload、Secret 或数据迁移。 -- `hwlab g14 control-plane trigger-current --lane v02 [--dry-run|--confirm]` 是 v02 标准手动触发入口:解析当前 `origin/v0.2` full SHA,创建 commit-pinned `hwlab-v02-ci-poll-` PipelineRun;读 Git 走 `git-mirror-http.devops-infra.svc.cluster.local`,GitOps promotion 写 `git-mirror-write.devops-infra.svc.cluster.local`;同名 PipelineRun 成功或运行中时拒绝重复触发,失败或不存在时才删除旧对象并重新创建;创建 PipelineRun 前会读取 `devops-infra` mirror refs,若 `localV02` 未等于当前 source commit,则自动执行一次受控 manual `git-mirror sync` Job 并复核 ref,复核失败时停止触发,避免 Tekton `prepare-source` 已知失败;services 参数只包含 v02 runtime service matrix,`hwlab-cli` 是固定 repo 短连接源码工具,不进入 PipelineRun service build;`--dry-run` 只报告是否会 pre-sync,不创建 Job;旧 `rerun-current` 只作为输入别名保留。 +- `hwlab g14 control-plane trigger-current --lane v02 [--dry-run|--confirm]` 是 v02 标准手动触发入口:解析当前 `origin/v0.2` full SHA,创建 commit-pinned `hwlab-v02-ci-poll-` PipelineRun;读 Git 走 `git-mirror-http.devops-infra.svc.cluster.local`,GitOps promotion 写 `git-mirror-write.devops-infra.svc.cluster.local`;同名 PipelineRun 成功或运行中时拒绝重复触发,失败或不存在时才删除旧对象并重新创建。 + 创建 PipelineRun 前会读取 `devops-infra` mirror refs,若 `localV02` 未等于当前 source commit,则自动执行一次受控 manual `git-mirror sync` Job 并复核 ref,复核失败时停止触发,避免 Tekton `prepare-source` 已知失败;services 参数只包含 v02 runtime service matrix,`hwlab-cli` 是固定 repo 短连接源码工具,不进入 PipelineRun service build。 + `--dry-run` 只报告是否会 pre-sync,不创建 Job;confirmed trigger 默认创建 `.state/jobs/` 异步 job 并立刻返回 `job.id`、`statusCommand`、stdout/stderr 路径,避免 git mirror pre-sync 或 PipelineRun 创建期间长时间无输出;只有现场同步调试才显式加 `--wait`;旧 `rerun-current` 只作为输入别名保留。 - `hwlab g14 control-plane runtime-migration --lane v02 [--dry-run|--allow-live-db-read --dry-run|--confirm]` 只通过 `hwlab-v02` namespace 当前 `deployment/hwlab-cloud-api -c hwlab-cloud-api` 内 repo-owned migration CLI 执行;不读取或打印 Secret 值、不触碰 PROD、不绕到手工 `psql`。 - `hwlab g14 control-plane cleanup-runs --lane v02|g14|all [--min-age-minutes N] [--limit N] [--dry-run|--confirm]` 是完成态 PipelineRun 工作区 retention 入口;真实清理只删除已完成 PipelineRun,让 Tekton/local-path 回收临时 PVC,不触碰 registry storage、业务 PVC、Secret、runtime workload 或 GitOps desired state。 - `hwlab g14 control-plane cleanup-released-pvs --lane all [--limit N] [--dry-run|--confirm]` 是 local-path 未自动回收后的补充 retention 入口;只列并删除 `Released`、`local-path`、`Delete`、`claimNamespace=hwlab-ci` 且 claim 名称形如 Tekton 临时 `pvc-*` 的 PV。 -- `hwlab g14 git-mirror status|apply|sync|flush [--dry-run|--confirm]` 是 `devops-infra` git mirror/relay 的受控维护入口:`apply` 渲染并 server-side apply `devops-infra/git-mirror.yaml`,同时删除遗留 `git-mirror-hwlab-sync` CronJob;`sync` 创建一次性 manual Job,把 GitHub allowlist refs 拉入本地 mirror;`flush` 创建一次性 manual Job,把本地 `v0.2-gitops` 快进推回 GitHub;`status` 返回 read/write URL、last sync/write/flush、本地 ref、GitHub staging ref 和 pending flush 状态;mirror 不设置 CronJob。 +- `hwlab g14 git-mirror status|apply|sync|flush [--dry-run|--confirm]` 是 `devops-infra` git mirror/relay 的受控维护入口:`apply` 渲染并 server-side apply `devops-infra/git-mirror.yaml`,同时删除遗留 `git-mirror-hwlab-sync` CronJob;`sync` 创建一次性 manual Job,把 GitHub allowlist refs 拉入本地 mirror;`flush` 创建一次性 manual Job,把本地 `v0.2-gitops` 快进推回 GitHub。 + `status` 返回 read/write URL、last sync/write/flush、本地 ref、GitHub staging ref 和 pending flush 状态;confirmed `sync` 和 `flush` 默认创建 `.state/jobs/` 异步 job 并立刻返回可查询状态,只有现场同步调试才显式加 `--wait`;mirror 不设置 CronJob。 - `hwlab g14 tools-image status|build --name ci-node-tools --tag [--dockerfile deploy/ci/hwlab-ci-node-tools.Dockerfile] [--dry-run|--confirm]` 是 G14 固定 HWLAB CI tools image 的受控 host build/push 入口;构建和 push 只发生在 G14 host 与本地 registry,不在 master server 构建,也不把 `apk add`/runtime install 塞进 Tekton PipelineRun。 - `ssh gh:/owner/repo ...` 把 GitHub issue/PR 映射成只读/受控写入的虚拟文本目录,适合日报、PR 正文和 issue 正文的小补丁维护:`ssh gh:/pikasTech/HWLAB ls` 展示 `pr/` 与 `issue/`,`ssh gh:/pikasTech/HWLAB/pr ls [--limit N] [--full]` 和 `ssh gh:/pikasTech/HWLAB/issue ls [--limit N] [--full]` 展示条目状态、楼层数、正文长度和标题,`ssh gh:/pikasTech/HWLAB/pr/507 ls` 展示单个 PR 的一楼正文文件,`ssh gh:/pikasTech/HWLAB/505/1 cat|rg|patch-apply` 兼容旧式 issue/PR number route。`patch-apply` 使用 UniDesk 默认 apply-patch v2 的虚拟文件 executor,把正文一楼映射为 `body.md`,写回仍走 `bun scripts/cli.ts gh issue/pr update` 的 guard/concurrency 规则;`rm` 对正文一楼结构化拒绝,避免误删 issue/PR 正文。大正文读取必须展开 UniDesk gh dump 文件,否则 `cat/rg/patch-apply` 会误读为空,这是 `gh:` 虚拟文件接口的 P0 可见性契约。 - `hwlab cd status|audit|preflight|apply --env dev [--dry-run]` 是旧 D601 HWLAB DEV CD 指挥侧 wrapper,仅用于显式 legacy 诊断和迁移对照。默认通过 UniDesk provider `host.ssh` 进入 D601,再调用 HWLAB repo-owned `scripts/dev-cd-apply.mjs`,不内嵌发布 kubectl 逻辑:`status` 汇总固定 CD mirror、Git clean/main/origin-main、`deploy/deploy.json`/artifact catalog/report、D601 native k3s guard 和 CD Lease lock,并用 `scripts/dev-cd-apply.mjs --status --skip-live-verify` 取得 target/promotion 摘要;`audit` 在 k3s/CD 恢复后做只读健康审计,返回有界 JSON 的 blocker 分类、D601 guard/node、SecretRef 存在性、registry 可达性、Lease phase/holder/staleness、deploy.json 与 artifact/workload image 收敛、current Deployment image/revision/rollout、16666/16667 public health commit/readiness 和 DB/runtime durability 摘要;`preflight` 进一步检查必需 SecretRef 对象/键存在性并运行 HWLAB `scripts/dev-cd-apply.mjs --dry-run --skip-live-verify` 受控事务摘要。完整远端 stdout/stderr 写入 D601 `~/.state/unidesk-hwlab-cd//` 和本地 `.state/hwlab-cd//` task dump,stdout 只返回有界摘要。默认 HWLAB CD repo 是 `/home/ubuntu/hwlab_cd`,`/home/ubuntu/hwlab` runner 历史目录不得作为发布真相。wrapper 强制 `KUBECONFIG=/etc/rancher/k3s/k3s.yaml` 并只以这个显式目标作为 gate;显式目标出现 `docker-desktop`、`desktop-control-plane` 或 `127.0.0.1:11700` 信号会结构化拒绝,audit/preflight/apply --dry-run 都必须观察到 node `d601`。真实 apply 只暴露 `scripts/dev-cd-apply.mjs --apply --confirm-dev --confirmed-non-production --write-report` 命令形状并标注 host-commander-only,本 runner 不执行 live apply、rollout、Lease mutation 或 DEV deploy apply。长期规则见 `docs/reference/hwlab.md`。 diff --git a/scripts/src/help.ts b/scripts/src/help.ts index 52d2cb6a..e6785d46 100644 --- a/scripts/src/help.ts +++ b/scripts/src/help.ts @@ -57,7 +57,7 @@ export function rootHelp(): unknown { { command: "auth-broker contract|health --dry-run|credential-request --dry-run|pr-preflight --dry-run", description: "Inspect the P0 Rust auth broker and CLI adapter contract without reading token values, writing GitHub, or starting services." }, { command: "gh preflight|auth|issue|pr", description: "Run safe GitHub issue and PR CRUD/lifecycle operations through REST with body-file update replace/append, comment delete, token diagnostics, PR closeout preflight, hard delete unsupported, and guarded PR merge." }, { command: "commander contract|plan --dry-run|smoke --dry-run|approval request --dry-run|prompt-lint --kind gpt55-pr", description: "Host Codex commander skeleton contract, no-daemon smoke plan, dry-run approval preview, and advisory GPT-5.5 PR prompt boundary lint without live bridges, message sends, or submit gating." }, - { command: "hwlab g14 monitor-prs | hwlab g14 control-plane status|apply|trigger-current|runtime-migration|cleanup-runs|cleanup-released-pvs | hwlab g14 git-mirror status|apply|sync|flush | hwlab g14 tools-image status|build", description: "Start the G14 PR monitor, run bounded v0.2 Tekton/Argo control-plane, manual PipelineRun trigger, runtime migration, CI workspace retention, manual devops-infra git mirror/relay maintenance, or fixed HWLAB CI tools image actions through UniDesk G14 routes." }, + { command: "hwlab g14 monitor-prs | hwlab g14 control-plane status|apply|trigger-current|runtime-migration|cleanup-runs|cleanup-released-pvs | hwlab g14 git-mirror status|apply|sync|flush | hwlab g14 tools-image status|build", description: "Start the G14 PR monitor, run bounded v0.2 Tekton/Argo control-plane, manual PipelineRun trigger, runtime migration, CI workspace retention, manual devops-infra git mirror/relay maintenance, or fixed HWLAB CI tools image actions through UniDesk G14 routes; long confirmed trigger/sync/flush actions return async jobs by default." }, { command: "hwlab cd audit --env dev | hwlab cd status --env dev | hwlab cd apply --env dev --dry-run", description: "Legacy D601 HWLAB DEV CD wrapper kept for explicit old-path diagnostics; current HWLAB rollout uses G14 GitOps." }, { command: "code-agent-sandbox", description: "Independent Code Agent Sandbox service skeleton for adapter, mode, and credential-boundary diagnostics." }, { command: "schedule list|get|runs|run|retry-run|delete", description: "Manage backend-core scheduled tasks and run history; schedule run supports --wait-ms N and retry-run reuses the failed run's schedule." }, diff --git a/scripts/src/hwlab-g14.ts b/scripts/src/hwlab-g14.ts index efb48db2..c8b68e8d 100644 --- a/scripts/src/hwlab-g14.ts +++ b/scripts/src/hwlab-g14.ts @@ -80,6 +80,7 @@ interface G14ControlPlaneOptions { lane: "v02" | "g14" | "all"; dryRun: boolean; confirm: boolean; + wait: boolean; allowLiveDbRead: boolean; timeoutSeconds: number; minAgeMinutes: number; @@ -100,6 +101,7 @@ interface G14GitMirrorOptions { action: "status" | "apply" | "sync" | "flush"; dryRun: boolean; confirm: boolean; + wait: boolean; timeoutSeconds: number; } @@ -227,6 +229,7 @@ function parseControlPlaneOptions(args: string[]): G14ControlPlaneOptions { action: actionRaw, lane, confirm, + wait: args.includes("--wait"), allowLiveDbRead, dryRun: actionRaw === "status" ? true : explicitDryRun || !confirm, timeoutSeconds: positiveIntegerOption(args, "--timeout-seconds", 120, 600), @@ -271,6 +274,7 @@ function parseGitMirrorOptions(args: string[]): G14GitMirrorOptions { return { action: actionRaw, confirm, + wait: args.includes("--wait"), dryRun: actionRaw === "status" ? true : explicitDryRun || !confirm, timeoutSeconds: positiveIntegerOption(args, "--timeout-seconds", actionRaw === "sync" || actionRaw === "flush" ? 300 : 120, 900), }; @@ -1458,6 +1462,76 @@ function runG14GitMirror(options: G14GitMirrorOptions): Record return runGitMirrorSync(options); } +function startAsyncHwlabG14Job(name: string, command: string[], note: string): Record { + const job = startJob(name, command, note); + const statusCommand = `bun scripts/cli.ts job status ${job.id} --tail-bytes 30000`; + return { + ok: true, + mode: "async-job", + job, + statusCommand, + tailCommand: `tail -f ${job.stdoutFile}`, + next: { + status: statusCommand, + tail: `tail -f ${job.stdoutFile}`, + }, + }; +} + +function startControlPlaneTriggerJob(options: G14ControlPlaneOptions): Record { + const command = [ + "bun", + "scripts/cli.ts", + "hwlab", + "g14", + "control-plane", + "trigger-current", + "--lane", + options.lane, + "--confirm", + "--timeout-seconds", + String(options.timeoutSeconds), + "--wait", + ]; + return { + command: "hwlab g14 control-plane trigger-current --lane v02", + lane: options.lane, + reason: "confirmed trigger can spend tens of seconds syncing git mirror and creating PipelineRun; default is fire-and-forget to avoid silent blocking", + waitCommand: command.join(" "), + ...startAsyncHwlabG14Job( + "hwlab_g14_v02_trigger_current", + command, + "Trigger HWLAB v0.2 current commit PipelineRun with git mirror pre-sync through G14 control-plane", + ), + }; +} + +function startGitMirrorJob(options: G14GitMirrorOptions): Record { + const command = [ + "bun", + "scripts/cli.ts", + "hwlab", + "g14", + "git-mirror", + options.action, + "--confirm", + "--timeout-seconds", + String(options.timeoutSeconds), + "--wait", + ]; + return { + command: `hwlab g14 git-mirror ${options.action}`, + namespace: GIT_MIRROR_NAMESPACE, + reason: "manual git mirror sync/flush waits for a Kubernetes Job; default is fire-and-forget to keep CLI output immediately visible", + waitCommand: command.join(" "), + ...startAsyncHwlabG14Job( + `hwlab_g14_git_mirror_${options.action}`, + command, + `Run HWLAB devops-infra git mirror ${options.action} through a bounded manual Kubernetes Job`, + ), + }; +} + function g14HostScript(script: string, timeoutMs = 120_000): CommandJsonResult { return cliJson(["ssh", G14_PROVIDER, "script", "--", script], timeoutMs); } @@ -2309,6 +2383,7 @@ export function hwlabG14Help(): Record { "bun scripts/cli.ts hwlab g14 control-plane apply --lane v02 --dry-run", "bun scripts/cli.ts hwlab g14 control-plane apply --lane v02 --confirm", "bun scripts/cli.ts hwlab g14 control-plane trigger-current --lane v02 --confirm", + "bun scripts/cli.ts hwlab g14 control-plane trigger-current --lane v02 --confirm --wait", "bun scripts/cli.ts hwlab g14 control-plane cleanup-runs --lane v02 --min-age-minutes 30 --limit 20 --dry-run", "bun scripts/cli.ts hwlab g14 control-plane cleanup-runs --lane v02 --min-age-minutes 30 --limit 20 --confirm", "bun scripts/cli.ts hwlab g14 control-plane cleanup-released-pvs --lane all --limit 20 --dry-run", @@ -2320,11 +2395,13 @@ export function hwlabG14Help(): Record { "bun scripts/cli.ts hwlab g14 git-mirror apply --confirm", "bun scripts/cli.ts hwlab g14 git-mirror sync --confirm", "bun scripts/cli.ts hwlab g14 git-mirror flush --confirm", + "bun scripts/cli.ts hwlab g14 git-mirror sync --confirm --wait", + "bun scripts/cli.ts hwlab g14 git-mirror flush --confirm --wait", "bun scripts/cli.ts hwlab g14 tools-image status --name ci-node-tools --tag node22-alpine-bun-v1", "bun scripts/cli.ts hwlab g14 tools-image build --name ci-node-tools --tag node22-alpine-bun-v1 --confirm", "bun scripts/cli.ts job status --tail-bytes 30000", ], - description: "G14 HWLAB PR monitor, DEV rollout command, bounded v0.2 control-plane bootstrap/cleanup/runtime-migration helper, devops-infra git mirror maintenance, and controlled CI tools image build/status entry. The public monitor starts a fire-and-forget job; control-plane status/apply/trigger-current/cleanup-runs/cleanup-released-pvs/runtime-migration uses UniDesk G14:k3s routes for v0.2 Tekton/Argo control resources, manual PipelineRun trigger, runtime migration, and completed CI workspace retention only. git-mirror status/apply/sync/flush is the manual devops-infra mirror/relay control path and does not install a CronJob.", + description: "G14 HWLAB PR monitor, DEV rollout command, bounded v0.2 control-plane bootstrap/cleanup/runtime-migration helper, devops-infra git mirror maintenance, and controlled CI tools image build/status entry. The public monitor starts a fire-and-forget job; confirmed control-plane trigger-current and git-mirror sync/flush also return async jobs by default, with --wait reserved for explicit synchronous debugging. control-plane status/apply/cleanup-runs/cleanup-released-pvs/runtime-migration uses UniDesk G14:k3s routes for v0.2 Tekton/Argo control resources, runtime migration, and completed CI workspace retention only. git-mirror status/apply/sync/flush is the manual devops-infra mirror/relay control path and does not install a CronJob.", defaults: { repo: HWLAB_REPO, base: G14_SOURCE_BRANCH, @@ -2355,6 +2432,9 @@ export async function runHwlabG14Command(_config: Config, args: string[]): Promi } if (action === "control-plane") { const options = parseControlPlaneOptions(args.slice(1)); + if (options.action === "trigger-current" && options.confirm && !options.dryRun && !options.wait) { + return startControlPlaneTriggerJob(options); + } return runV02ControlPlane(options); } if (action === "tools-image") { @@ -2363,6 +2443,9 @@ export async function runHwlabG14Command(_config: Config, args: string[]): Promi } if (action === "git-mirror") { const options = parseGitMirrorOptions(args.slice(1)); + if ((options.action === "sync" || options.action === "flush") && options.confirm && !options.dryRun && !options.wait) { + return startGitMirrorJob(options); + } return runG14GitMirror(options); } if (action !== "monitor-prs") {