From 9960b2609addc879b7b1a80b41e71cd6dcf8c0d9 Mon Sep 17 00:00:00 2001 From: Codex Date: Sat, 6 Jun 2026 09:08:12 +0000 Subject: [PATCH] feat: add controlled AgentRun CI cleanup --- docs/reference/cli.md | 2 +- scripts/agentrun-cli-contract-test.ts | 35 ++ ...mote-registry-maintenance-contract-test.ts | 34 ++ scripts/hwlab-g14-contract-test.ts | 5 + scripts/src/agentrun.ts | 551 +++++++++++++++++- scripts/src/gc-remote.ts | 6 - scripts/src/help.ts | 2 +- scripts/src/hwlab-g14.ts | 20 +- 8 files changed, 642 insertions(+), 13 deletions(-) create mode 100644 scripts/agentrun-cli-contract-test.ts create mode 100644 scripts/gc-remote-registry-maintenance-contract-test.ts diff --git a/docs/reference/cli.md b/docs/reference/cli.md index 66826eb3..6d6f1811 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -48,7 +48,7 @@ CI/CD、GitOps、rollout、artifact 发布、PR 合并后的 DEV/PROD 滚动、P - `commander contract|plan --dry-run|smoke --dry-run|approval request --dry-run|prompt-lint --kind gpt55-pr` 是 host Codex 指挥官直管微服务 skeleton 入口。当前命令返回 `phase=source-contract`、service/API/state/bridge/prompt/trace/#20/#46/ClaudeQQ 审批边界、.state/commander/ 状态模型、dev 无 daemon smoke contract、dry-run 计划和 GPT-5.5 PR prompt 边界辅助 lint,不接 live bridge、不注入 prompt、不发送 ClaudeQQ。`approval request --dry-run` 会生成 200 字以内中文纯文本 ClaudeQQ 审批草案、`notification-path-unavailable` blocker 和授权后唯一可用的 `bun scripts/cli.ts microservice proxy claudeqq /api/push/text --method POST --body-json '' --raw` 命令;不得提示使用本机 ClaudeQQ skill、powershell 或本地 server。`prompt-lint` 支持 `--prompt-file` 与 `--stdin`,输出 `ok`、`missingClauses`、`riskLevel`、`suggestedPatchSnippet` 且不回显完整 prompt;它是 commander 辅助检查,不是业务 PR 门禁,也不改变 `codex submit` 默认行为。`plan`、`smoke` 与 `approval request` 必须带 `--dry-run`;缺少时返回 `error=dry-run-required`。长期规则见 `docs/reference/host-codex-commander.md`。 - `hwlab g14 monitor-prs [--lane g14|v02] [--once] [--dry-run] [--interval-seconds N] [--max-cycles N] [--timeout-seconds N]` 是当前 HWLAB G14 PR -> CI/CD -> DEV rollout 的一行式入口。普通调用创建 `.state/jobs/` 异步 job 并立刻返回 `job.id`、`statusCommand` 和 stdout/stderr 路径;后台 worker 每轮通过 UniDesk `gh pr list/preflight/merge` 监控 `pikasTech/HWLAB` base=`G14` 的 open PR,ready 时合并,然后通过 UniDesk `trans G14:k3s` 观察 `hwlab-g14-ci-poll-`、Argo `hwlab-g14-dev` 和 DEV `/health/live`,直到 DEV `Synced/Healthy` 且 Deployment/StatefulSet ready;历史 `Completed` smoke/debug pod 不作为 rollout blocker。每次成功 DEV rollout 后,worker 会定位或创建 #7“指挥简报索引”中的北京日期每日简报 issue,并追加 CI/CD 耗时、CI/CD 关键指标、语义化上线 changelog、自动 diff 摘要、PipelineRun、GitOps revision 和 DEV 验证摘要;关键指标来自 G14 Tekton TaskRun results,固定包含 `lazy build reused: x/y`、reused services、rebuild services 和每个 service 的独立耗时/状态/backend,用于观察 lazy build 机制效果。语义化 changelog 优先从 PR body 的 `## 修改`/`## 变更`/`## Changelog` 等段落提取,diff 摘要只作为文件和统计证据保留,不替代 changelog。也可用 `hwlab g14 record-rollout --pr --source-commit ` 手动补记,手动补记同样会按 PipelineRun 采集 TaskRun 指标。G14 状态指针按用途分离:长期监控只写 `.state/hwlab-g14/latest-monitor-job.json`,`--once` 写 `latest-once-job.json`,`--dry-run` 写 `latest-dry-run-job.json`,`--once --dry-run` 写 `latest-once-dry-run-job.json`,避免一次性收口覆盖持续监控入口。`--once --dry-run` 只做单轮监控和 merge plan,不写 GitHub、不等待 rollout。该命令禁止使用原生 `gh` 或手拼 GitHub 请求;如果 UniDesk `gh` 子命令字段或行为不够,必须先改进 `scripts/src/gh.ts` 后再使用。 - `hwlab g14 monitor-prs --lane v02` 是 HWLAB `v0.2` 的 PR -> CI -> CD 自动化入口。它只监控 base=`v0.2` 的 open PR:每轮先用 UniDesk `gh pr preflight` 读取 GitHub CI/checks、mergeability 和冲突状态;pending 时在 PR 下写等待评论,blocked/conflict 时写阻塞评论;ready 时直接用 UniDesk `gh pr merge` 合并,不因为其他 commit 的运行中 PipelineRun 阻塞 merge 或 CI 启动。合并后执行受控 `control-plane trigger-current --lane v02 --confirm --wait`、轮询定点 `control-plane status --lane v02 --source-commit `,必要时执行 `git-mirror flush --confirm --wait`。v0.2 CD 采用 latest-only:旧 PipelineRun 不取消、不等待,但 promotion 写 `v0.2-gitops` 前必须重新确认 source head,stale commit 只能以 superseded/no-op 收口,不能回滚 runtime。不管 CD 成功、superseded、失败或超时,都在原 PR 下用 `gh pr comment create --body-stdin <<'EOF'` 追加语义化状态,正文固定包含起止时间、总耗时、冲突状态、CI/preflight conclusion、source commit、PipelineRun、targetValidation、Argo/webAssets 和 git mirror pendingFlush/githubInSync。评论去重状态写入 `.state/hwlab-g14/v02-pr-comment-signatures.json`,同一状态签名不会重复刷评论;v0.2 monitor 指针使用 `.state/hwlab-g14/latest-v02-monitor-job.json`、`latest-v02-once-job.json`、`latest-v02-dry-run-job.json` 和 `latest-v02-once-dry-run-job.json`,不会覆盖默认 G14 monitor 指针。`--lane v02 --once --dry-run` 只做单轮 preflight/merge/CD/comment plan,不写 GitHub、不触发 CD。 -- `agentrun v01 control-plane status|trigger-current|refresh [--dry-run|--confirm]` 是 AgentRun `v0.1` 在 G14 k3s 的受控 Tekton/Argo 入口。`status` 只读汇总固定 source worktree commit、对应 commit-pinned PipelineRun、GitOps latest、Argo Application、`agentrun-v01` manager source commit、`planArtifacts.summary`、env image result 和 git mirror 摘要,并报告 manager/Argo/GitOps 是否对齐当前 source commit。默认输出是 compact commander 视图:`summary` 给出 source、PipelineRun、Argo、manager image、git mirror 和 `aligned` 结论;`timings` 给出 `sourceMs`、`runtimeMs`、`gitMirrorMs` 和 `totalMs`;远端 stdout/stderr tail 默认省略,失败时仍展开必要 tail,完整 tail 用 `--full`,原始 git mirror cache 用 `--raw`。`status` 聚合 source 后会并行读取 runtime 和 git mirror,并向 stderr 输出 `agentrun.control-plane.status.progress` JSON 事件,覆盖 `source`、`runtime`、`git-mirror` 的 started/succeeded/failed 和 elapsedMs,避免 10s 以上状态聚合期间无可见进展;`trigger-current` 先快进 `G14:/root/agentrun-v01` 到 `origin/v0.1`,检查 `devops-infra` mirror 的 `localV01` 是否等于目标 source commit,必要时先执行受控 mirror sync,再创建 `agentrun-v01-ci-` PipelineRun。confirmed trigger 只提交 CI/CD 工作并返回后续 `status` 命令,不等待完整 PipelineRun;同名 PipelineRun 运行中或已成功时拒绝重复触发,只允许失败态重建或首次创建。`refresh` 只对 `argocd/agentrun-g14-v01` 执行 hard refresh,用于 GitOps promotion 已完成但 Argo 仍停留旧 revision 时的受控同步入口;它不直接 patch runtime workload。AgentRun 运行时和 SPEC 事实来源仍在 AgentRun 仓库,UniDesk 只维护受控运维入口。 +- `agentrun v01 control-plane status|trigger-current|refresh|cleanup-runs|cleanup-released-pvs [--dry-run|--confirm]` 是 AgentRun `v0.1` 在 G14 k3s 的受控 Tekton/Argo 入口。`status` 只读汇总固定 source worktree commit、对应 commit-pinned PipelineRun、GitOps latest、Argo Application、`agentrun-v01` manager source commit、`planArtifacts.summary`、env image result 和 git mirror 摘要,并报告 manager/Argo/GitOps 是否对齐当前 source commit。默认输出是 compact commander 视图:`summary` 给出 source、PipelineRun、Argo、manager image、git mirror 和 `aligned` 结论;`timings` 给出 `sourceMs`、`runtimeMs`、`gitMirrorMs` 和 `totalMs`;远端 stdout/stderr tail 默认省略,失败时仍展开必要 tail,完整 tail 用 `--full`,原始 git mirror cache 用 `--raw`。`status` 聚合 source 后会并行读取 runtime 和 git mirror,并向 stderr 输出 `agentrun.control-plane.status.progress` JSON 事件,覆盖 `source`、`runtime`、`git-mirror` 的 started/succeeded/failed 和 elapsedMs,避免 10s 以上状态聚合期间无可见进展;`trigger-current` 先快进 `G14:/root/agentrun-v01` 到 `origin/v0.1`,检查 `devops-infra` mirror 的 `localV01` 是否等于目标 source commit,必要时先执行受控 mirror sync,再创建 `agentrun-v01-ci-` PipelineRun。confirmed trigger 只提交 CI/CD 工作并返回后续 `status` 命令,不等待完整 PipelineRun;同名 PipelineRun 运行中或已成功时拒绝重复触发,只允许失败态重建或首次创建。`refresh` 只对 `argocd/agentrun-g14-v01` 执行 hard refresh,用于 GitOps promotion 已完成但 Argo 仍停留旧 revision 时的受控同步入口;它不直接 patch runtime workload。`cleanup-runs` 只清理 `agentrun-ci` 中已完成且超过 `--min-age-minutes` 的 `agentrun-v01-ci-*` PipelineRun,通过 Tekton ownerRef 回收临时 workspace PVC;dry-run 必须输出候选 PipelineRun、owned PVC、active mount 保护、local-path 实际估算 bytes 和 confirm 命令。`cleanup-released-pvs` 只处理 `agentrun-ci`、`local-path`、`Delete` reclaim policy 的 `Released` PV,用于 PipelineRun 删除后残留 PV 的二次回收;它不触碰 runtime namespace、业务 PVC、Secret、registry storage 或 GitOps desired state。AgentRun 运行时和 SPEC 事实来源仍在 AgentRun 仓库,UniDesk 只维护受控运维入口。 - `agentrun v01 git-mirror status|sync|flush [--dry-run|--confirm]` 是 AgentRun `v0.1` 使用 `devops-infra` git mirror/relay 的受控维护入口。`status` 默认返回 read/write URL、`localV01`、`githubV01`、`localGitops`、`githubGitops`、`pendingFlush`、`githubInSync` 和 exact full-SHA shallow fetch 摘要,不默认展开完整 cache stdout;需要探测 tail 时用 `--full`,需要原始 cache 输出时用 `--raw`。`sync` 创建 manual Job,把 GitHub `v0.1` 和 `v0.1-gitops` refs 拉入 `/cache/pikasTech/agentrun.git`;`flush` 把本地 `v0.1-gitops` 快进推回 GitHub。confirmed `sync`/`flush` 默认创建 `.state/jobs/` 异步 job 并立刻返回 `job.id`、`statusCommand` 和日志路径;只有现场同步调试才显式加 `--wait`。该入口与 HWLAB v0.2 mirror 共用 `devops-infra` 服务和 cache PVC,但 repo path、refs、status 文件和 CLI 命令彼此独立。 - `hwlab g14 control-plane status|apply --lane v02 [--dry-run|--confirm]` 是 HWLAB `v0.2` 加法 lane 的受控 Tekton/Argo 控制面维护入口,source commit 只来自 G14 专用 bare repo `/root/hwlab-v02-cicd.git` 的 `refs/remotes/origin/v0.2`;`/root/hwlab-v02` 只作为人工开发和短连接源码工具 workspace 被观测,dirty/stale 状态必须输出为 isolated warning 而不能阻塞 CI/CD。该入口面向 branch `v0.2`、namespace `hwlab-ci` 和 Argo application `hwlab-g14-v02`;默认 `status` 只读汇总最新 source head 的 pipeline、RBAC/ServiceAccount、Argo、当前 commit PipelineRun、当前 PipelineRun 的 TaskRun 条件摘要、最近 PipelineRun 摘要、活跃 PipelineRun、遗留 v02 CronJob 清理状态、commit alignment,以及 19666/19667 的 Cloud Web 静态资源和 API live 探针。分支被后续提交推进后,要复查已完成 run 时使用 `status --lane v02 --pipeline-run hwlab-v02-ci-poll-`;已知完整 source SHA 但不想依赖最新 head 时使用 `status --lane v02 --source-commit `。定点 `status` 输出 `statusTarget.mode` 和 `targetValidation`,只检查指定 PipelineRun/source commit 的证据;`targetValidation.state=passed` 表示该目标已满足 PipelineRun succeeded、Argo `Synced/Healthy`、19666/19667 探针、Git mirror flushed,并且该 run 的 `planArtifacts.rolloutServices` 运行时 source commit 对齐;`planArtifacts.reusedServices` 作为 runtime/provenance 证据呈现,但不能被强制要求等于目标 source commit。`targetValidation.state=superseded` 表示该目标已成功且 runtime 已被同一分支后续成功 PipelineRun 取代,`falseGreenGuard` 在该状态下应标为 superseded/not-applicable。两种状态都不得因为 `origin/v0.2` 后续推进而把历史 run 判为失败;默认不带定点参数时仍严格判定最新 source head alignment。TaskRun 摘要的 `performance` 字段会把超过 120s 的 build TaskRun 标为慢任务、超过 180s 标为 critical warning,用于暴露 env reuse/git mirror 命中率回归,但不作为阻断门禁;CI/CD 性能验收应同时看 `planArtifacts.summary`、`taskRuns.performance.warningCount` 和 PipelineRun duration,纯 CLI/文档或无 runtime 重建需求的后续提交应稳定表现为 `build=0 reuse=` 且无 build TaskRun warning,首次引入或切换 env image 时允许只构建必要 env image 一次。`webAssets` 必须直接给出 `readonly-rpc` 删除、sidebar/workspace/event panel 关键 CSS、`/app.js` 是否可读取和字节数、`/health/live` 与 API revision;`apiRevision` 是 cloud-api 服务自身 revision,Cloud Web 静态资源变更时允许它与 source commit 不同,不能把这种差异误判成 Cloud Web 未发布。默认只读取必要字段,禁止把完整 PipelineRun spec、Tekton 内联脚本、历史大对象或整份 CSS/HTML/JS 展开到默认输出;`apply` 先自动 fetch `/root/hwlab-v02-cicd.git` 并从 commit-pinned detached worktree 执行 render check,再经 `G14:k3s` server-side apply `tekton-v02/rbac.yaml`、`pipeline.yaml`、`argocd/project.yaml` 和 `argocd/application-v02.yaml`,confirmed apply 会删除遗留 v02 CronJob,但不会应用 runtime-v02 workload、Secret 或数据迁移。 - `hwlab g14 control-plane trigger-current --lane v02 [--dry-run|--confirm]` 是 v02 标准手动触发入口:先自动 fetch `/root/hwlab-v02-cicd.git`,解析当前 `origin/v0.2` full SHA,创建 commit-pinned `hwlab-v02-ci-poll-` PipelineRun;读 Git 走 `git-mirror-http.devops-infra.svc.cluster.local`,GitOps promotion 写 `git-mirror-write.devops-infra.svc.cluster.local`;confirmed trigger 在创建 PipelineRun 前会先按当前 source commit 在 G14 临时 detached worktree 中 render,再 server-side apply v02 Tekton RBAC、Pipeline 与 Argo Application,避免 CI/CD 脚本或 runtime-ready 逻辑已合并但集群仍执行旧 Pipeline 定义;该 render 不要求固定 `/root/hwlab-v02` 工作树 clean,也不得因 `.worktree/` 或其他并行未提交修改阻塞;同名 PipelineRun 存在时默认复用现有状态,不删除重建,失败 run 的重试策略必须显式设计,不能恢复默认 delete/create。 diff --git a/scripts/agentrun-cli-contract-test.ts b/scripts/agentrun-cli-contract-test.ts new file mode 100644 index 00000000..b4ee5995 --- /dev/null +++ b/scripts/agentrun-cli-contract-test.ts @@ -0,0 +1,35 @@ +import { agentRunHelp } from "./src/agentrun"; +import { rootHelp } from "./src/help"; + +function assertCondition(condition: unknown, message: string, detail: unknown = {}): void { + if (!condition) throw new Error(`${message}: ${JSON.stringify(detail)}`); +} + +const agentRunUsage = Array.isArray((agentRunHelp() as { usage?: unknown }).usage) + ? ((agentRunHelp() as { usage: unknown[] }).usage).map(String) + : []; + +assertCondition( + agentRunUsage.some((line) => line.includes("control-plane cleanup-runs --min-age-minutes 30 --limit 200 --dry-run")) + && agentRunUsage.some((line) => line.includes("control-plane cleanup-runs --min-age-minutes 30 --limit 200 --confirm")) + && agentRunUsage.some((line) => line.includes("control-plane cleanup-released-pvs --limit 200 --dry-run")) + && agentRunUsage.some((line) => line.includes("control-plane cleanup-released-pvs --limit 200 --confirm")), + "AgentRun help must expose controlled CI workspace retention commands", + agentRunUsage, +); + +const globalHelp = JSON.stringify(rootHelp()); + +assertCondition( + globalHelp.includes("agentrun v01 control-plane status|trigger-current|refresh|cleanup-runs|cleanup-released-pvs"), + "global help must index AgentRun cleanup entrypoints", + rootHelp(), +); + +console.log(JSON.stringify({ + ok: true, + checks: [ + "AgentRun command help exposes cleanup-runs and cleanup-released-pvs", + "global help indexes AgentRun cleanup entrypoints", + ], +})); diff --git a/scripts/gc-remote-registry-maintenance-contract-test.ts b/scripts/gc-remote-registry-maintenance-contract-test.ts new file mode 100644 index 00000000..ccfcd50d --- /dev/null +++ b/scripts/gc-remote-registry-maintenance-contract-test.ts @@ -0,0 +1,34 @@ +const sourceText = await Bun.file(new URL("./src/gc-remote.ts", import.meta.url)).text(); + +function assertCondition(condition: unknown, message: string, detail: unknown = {}): void { + if (!condition) throw new Error(`${message}: ${JSON.stringify(detail)}`); +} + +function functionBody(name: string): string { + const marker = `def ${name}():`; + const start = sourceText.indexOf(marker); + if (start < 0) return ""; + const next = sourceText.indexOf("\ndef ", start + marker.length); + return sourceText.slice(start, next < 0 ? sourceText.length : next); +} + +for (const name of ["execute_registry_retention", "execute_registry_garbage_collect_only"]) { + const body = functionBody(name); + const suspendIndex = body.indexOf("patch_cronjob_suspend(name, True)"); + const waitIndex = body.indexOf("idle_after_suspend = wait_no_active_hwlab_ci(180)"); + const refusalIndex = body.indexOf("refusing registry maintenance because hwlab-ci did not become idle after suspend"); + const preRefusalIndex = body.indexOf("refusing registry maintenance while hwlab-ci PipelineRun/TaskRun is active"); + assertCondition( + suspendIndex >= 0 && waitIndex > suspendIndex && refusalIndex > waitIndex && preRefusalIndex < 0, + "registry maintenance must suspend poller CronJobs before refusing on active hwlab-ci objects", + { name, suspendIndex, waitIndex, refusalIndex, preRefusalIndex }, + ); +} + +console.log(JSON.stringify({ + ok: true, + checks: [ + "registry retention suspends poller CronJobs before active-CI idle wait", + "registry GC-only suspends poller CronJobs before active-CI idle wait", + ], +})); diff --git a/scripts/hwlab-g14-contract-test.ts b/scripts/hwlab-g14-contract-test.ts index 666169a7..3c0db3ce 100644 --- a/scripts/hwlab-g14-contract-test.ts +++ b/scripts/hwlab-g14-contract-test.ts @@ -394,6 +394,11 @@ assertCondition( && sourceText.indexOf("if (args.includes(\"--status\")) return monitorStatus(options);") < sourceText.indexOf("const command = [\"bun\", \"scripts/cli.ts\", \"hwlab\", \"g14\", \"monitor-prs\""), "monitor-prs --status must be a read-only query before async monitor startJob", ); +assertCondition( + sourceText.includes("protectedLatestByPrefix") + && sourceText.includes("protected-latest-pipelinerun"), + "control-plane cleanup-runs must protect the latest PipelineRun per lane by default", +); const staleSuccessAlignment = v02CommitAlignment({ expectedSourceHead: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", diff --git a/scripts/src/agentrun.ts b/scripts/src/agentrun.ts index b5d0f2a2..bdfa082f 100644 --- a/scripts/src/agentrun.ts +++ b/scripts/src/agentrun.ts @@ -22,7 +22,7 @@ const mirrorToolsImage = "127.0.0.1:5000/hwlab/hwlab-ci-node-tools:node22-alpine export function agentRunHelp(): unknown { return { - command: "agentrun v01 control-plane status|trigger-current|refresh | git-mirror status|sync|flush", + command: "agentrun v01 control-plane status|trigger-current|refresh|cleanup-runs|cleanup-released-pvs | git-mirror status|sync|flush", output: "json", usage: [ "bun scripts/cli.ts agentrun v01 control-plane status", @@ -31,12 +31,16 @@ export function agentRunHelp(): unknown { "bun scripts/cli.ts agentrun v01 control-plane trigger-current --confirm", "bun scripts/cli.ts agentrun v01 control-plane refresh --dry-run", "bun scripts/cli.ts agentrun v01 control-plane refresh --confirm", + "bun scripts/cli.ts agentrun v01 control-plane cleanup-runs --min-age-minutes 30 --limit 200 --dry-run", + "bun scripts/cli.ts agentrun v01 control-plane cleanup-runs --min-age-minutes 30 --limit 200 --confirm", + "bun scripts/cli.ts agentrun v01 control-plane cleanup-released-pvs --limit 200 --dry-run", + "bun scripts/cli.ts agentrun v01 control-plane cleanup-released-pvs --limit 200 --confirm", "bun scripts/cli.ts agentrun v01 git-mirror status", "bun scripts/cli.ts agentrun v01 git-mirror status --full", "bun scripts/cli.ts agentrun v01 git-mirror sync --confirm", "bun scripts/cli.ts agentrun v01 git-mirror flush --confirm", ], - description: "Operate AgentRun v0.1 Tekton/Argo control plane and devops-infra git mirror through G14 routes; status is read-only and trigger-current pre-syncs mirror refs before creating the PipelineRun.", + description: "Operate AgentRun v0.1 Tekton/Argo control plane and devops-infra git mirror through G14 routes; status is read-only, trigger-current pre-syncs mirror refs before creating the PipelineRun, and cleanup-runs/cleanup-released-pvs provide controlled completed CI workspace retention.", }; } @@ -47,6 +51,8 @@ export async function runAgentRunCommand(config: UniDeskConfig, args: string[]): if (action === "status") return await status(config, parseDisclosureOptions(args.slice(3))); if (action === "trigger-current") return await triggerCurrent(config, parseTriggerOptions(args.slice(3))); if (action === "refresh") return await refresh(config, parseConfirmOptions(args.slice(3))); + if (action === "cleanup-runs") return await cleanupRuns(config, parseCleanupRunsOptions(args.slice(3))); + if (action === "cleanup-released-pvs") return await cleanupReleasedPvs(config, parseCleanupReleasedPvOptions(args.slice(3))); } if (group === "git-mirror") { if (action === "status") return await gitMirrorStatus(config, parseDisclosureOptions(args.slice(3))); @@ -74,6 +80,17 @@ interface GitMirrorOptions extends ConfirmOptions { wait: boolean; } +interface CleanupRunsOptions extends ConfirmOptions { + minAgeMinutes: number; + limit: number; + timeoutSeconds: number; +} + +interface CleanupReleasedPvOptions extends ConfirmOptions { + limit: number; + timeoutSeconds: number; +} + interface DisclosureOptions { full: boolean; raw: boolean; @@ -112,6 +129,57 @@ function parseGitMirrorOptions(args: string[]): GitMirrorOptions { return { ...base, timeoutSeconds, wait: args.includes("--wait") }; } +function parseCleanupRunsOptions(args: string[]): CleanupRunsOptions { + validateOptions(args, new Set(["--confirm", "--dry-run"]), new Set(["--min-age-minutes", "--limit", "--timeout-seconds"])); + const base = parseConfirmOptions(args); + return { + ...base, + minAgeMinutes: positiveIntegerOption(args, "--min-age-minutes", 60, 10080), + limit: positiveIntegerOption(args, "--limit", 20, 500), + timeoutSeconds: positiveIntegerOption(args, "--timeout-seconds", 180, 600), + }; +} + +function parseCleanupReleasedPvOptions(args: string[]): CleanupReleasedPvOptions { + validateOptions(args, new Set(["--confirm", "--dry-run"]), new Set(["--limit", "--timeout-seconds"])); + const base = parseConfirmOptions(args); + return { + ...base, + limit: positiveIntegerOption(args, "--limit", 20, 500), + timeoutSeconds: positiveIntegerOption(args, "--timeout-seconds", 120, 600), + }; +} + +function validateOptions(args: string[], booleanOptions: Set, valueOptions: Set): void { + for (let index = 0; index < args.length; index += 1) { + const arg = args[index]; + if (booleanOptions.has(arg)) continue; + if (valueOptions.has(arg)) { + const value = args[index + 1]; + if (value === undefined || value.startsWith("--")) throw new Error(`${arg} requires a value`); + index += 1; + continue; + } + throw new Error(`unsupported option: ${arg}`); + } +} + +function optionValue(args: string[], name: string): string | undefined { + const index = args.indexOf(name); + if (index === -1) return undefined; + const value = args[index + 1]; + if (value === undefined || value.startsWith("--")) throw new Error(`${name} requires a value`); + return value; +} + +function positiveIntegerOption(args: string[], name: string, defaultValue: number, maxValue: number): number { + const raw = optionValue(args, name); + if (raw === undefined) return defaultValue; + const value = Number(raw); + if (!Number.isInteger(value) || value < 0) throw new Error(`${name} must be a non-negative integer`); + return Math.min(value, maxValue); +} + async function status(config: UniDeskConfig, options: DisclosureOptions): Promise> { const sourceProbe = await timedStatusStage("source", () => capture(config, g14SourceRoute, ["script", "--", [ "cd /root/agentrun-v01", @@ -360,6 +428,469 @@ async function refresh(config: UniDeskConfig, options: ConfirmOptions): Promise< }; } +async function cleanupRuns(config: UniDeskConfig, options: CleanupRunsOptions): Promise> { + const result = await capture(config, g14K3sRoute, ["script", "--", cleanupRunsScript(options)]); + const payload = captureJsonPayload(result); + const ok = result.exitCode === 0 && payload.ok !== false; + const base = { + ...payload, + ok, + command: "agentrun v01 control-plane cleanup-runs", + mode: options.dryRun || !options.confirm ? "dry-run" : "confirmed-cleanup", + namespace: ciNamespace, + minAgeMinutes: options.minAgeMinutes, + limit: options.limit, + probe: compactCapture(result, { full: result.exitCode !== 0, stdoutTailChars: 3000, stderrTailChars: 3000 }), + }; + if (options.dryRun || !options.confirm) { + return { + ...base, + dryRun: true, + mutation: false, + next: { + confirm: `bun scripts/cli.ts agentrun v01 control-plane cleanup-runs --min-age-minutes ${options.minAgeMinutes} --limit ${options.limit} --confirm`, + }, + }; + } + return { + ...base, + dryRun: false, + mutation: true, + followUp: { + status: "bun scripts/cli.ts agentrun v01 control-plane status", + releasedPvs: `bun scripts/cli.ts agentrun v01 control-plane cleanup-released-pvs --limit ${options.limit} --dry-run`, + diskPressure: "trans G14:k3s kubectl get node ubuntu-rog-zephyrus-g14-ga401iv-ga401iv -o jsonpath='{range .status.conditions[*]}{.type}{\"=\"}{.status}{\" \"}{.reason}{\"\\n\"}{end}'", + }, + }; +} + +async function cleanupReleasedPvs(config: UniDeskConfig, options: CleanupReleasedPvOptions): Promise> { + const result = await capture(config, g14K3sRoute, ["script", "--", cleanupReleasedPvsScript(options)]); + const payload = captureJsonPayload(result); + const ok = result.exitCode === 0 && payload.ok !== false; + const base = { + ...payload, + ok, + command: "agentrun v01 control-plane cleanup-released-pvs", + mode: options.dryRun || !options.confirm ? "dry-run" : "confirmed-cleanup", + namespace: ciNamespace, + limit: options.limit, + probe: compactCapture(result, { full: result.exitCode !== 0, stdoutTailChars: 3000, stderrTailChars: 3000 }), + }; + if (options.dryRun || !options.confirm) { + return { + ...base, + dryRun: true, + mutation: false, + next: { + confirm: `bun scripts/cli.ts agentrun v01 control-plane cleanup-released-pvs --limit ${options.limit} --confirm`, + }, + }; + } + return { + ...base, + dryRun: false, + mutation: true, + followUp: { + cleanupRuns: `bun scripts/cli.ts agentrun v01 control-plane cleanup-runs --min-age-minutes 30 --limit ${options.limit} --dry-run`, + diskPressure: "trans G14:k3s kubectl get node ubuntu-rog-zephyrus-g14-ga401iv-ga401iv -o jsonpath='{range .status.conditions[*]}{.type}{\"=\"}{.status}{\" \"}{.reason}{\"\\n\"}{end}'", + }, + }; +} + +function cleanupRunsScript(options: CleanupRunsOptions): string { + return [ + "set -eu", + `namespace=${shQuote(ciNamespace)}`, + `min_age_minutes=${String(options.minAgeMinutes)}`, + `limit=${String(options.limit)}`, + `timeout_seconds=${String(options.timeoutSeconds)}`, + "tmp_dir=$(mktemp -d)", + "trap 'rm -rf \"$tmp_dir\"' EXIT", + "kubectl -n \"$namespace\" get pipelinerun -o json > \"$tmp_dir/pipelineruns.json\"", + "kubectl -n \"$namespace\" get pvc -o json > \"$tmp_dir/pvcs.json\"", + "kubectl get pv -o json > \"$tmp_dir/pvs.json\"", + "kubectl -n \"$namespace\" get pod -o json > \"$tmp_dir/pods.json\"", + "NAMESPACE=\"$namespace\" MIN_AGE_MINUTES=\"$min_age_minutes\" LIMIT=\"$limit\" TMP_DIR=\"$tmp_dir\" node <<'NODE' > \"$tmp_dir/plan.json\"", + cleanupRunsPlanNodeScript(), + "NODE", + "if [ " + shQuote(options.confirm && !options.dryRun ? "true" : "false") + " != true ]; then", + " cat \"$tmp_dir/plan.json\"", + " exit 0", + "fi", + "node -e 'const fs=require(\"node:fs\"); const plan=JSON.parse(fs.readFileSync(process.argv[1],\"utf8\")); const names=Array.isArray(plan.selectedPipelineRuns)?plan.selectedPipelineRuns:[]; fs.writeFileSync(process.argv[2], names.join(\"\\n\") + (names.length>0?\"\\n\":\"\"));' \"$tmp_dir/plan.json\" \"$tmp_dir/selected-names.txt\"", + "delete_exit=0", + "if [ -s \"$tmp_dir/selected-names.txt\" ]; then", + " xargs -r kubectl -n \"$namespace\" delete pipelinerun --ignore-not-found=true --wait=true --timeout=\"${timeout_seconds}s\" < \"$tmp_dir/selected-names.txt\" > \"$tmp_dir/delete.out\" 2> \"$tmp_dir/delete.err\" || delete_exit=$?", + "else", + " : > \"$tmp_dir/delete.out\"", + " : > \"$tmp_dir/delete.err\"", + "fi", + "kubectl -n \"$namespace\" get pvc -o json > \"$tmp_dir/pvcs-after.json\"", + "DELETE_EXIT=\"$delete_exit\" TMP_DIR=\"$tmp_dir\" node <<'NODE'", + cleanupRunsFinalizeNodeScript(), + "NODE", + ].join("\n"); +} + +function cleanupReleasedPvsScript(options: CleanupReleasedPvOptions): string { + return [ + "set -eu", + `namespace=${shQuote(ciNamespace)}`, + `limit=${String(options.limit)}`, + `timeout_seconds=${String(options.timeoutSeconds)}`, + "tmp_dir=$(mktemp -d)", + "trap 'rm -rf \"$tmp_dir\"' EXIT", + "kubectl get pv -o json > \"$tmp_dir/pvs.json\"", + "NAMESPACE=\"$namespace\" LIMIT=\"$limit\" TMP_DIR=\"$tmp_dir\" node <<'NODE' > \"$tmp_dir/plan.json\"", + cleanupReleasedPvsPlanNodeScript(), + "NODE", + "if [ " + shQuote(options.confirm && !options.dryRun ? "true" : "false") + " != true ]; then", + " cat \"$tmp_dir/plan.json\"", + " exit 0", + "fi", + "node -e 'const fs=require(\"node:fs\"); const plan=JSON.parse(fs.readFileSync(process.argv[1],\"utf8\")); const names=Array.isArray(plan.selectedPersistentVolumes)?plan.selectedPersistentVolumes:[]; fs.writeFileSync(process.argv[2], names.join(\"\\n\") + (names.length>0?\"\\n\":\"\"));' \"$tmp_dir/plan.json\" \"$tmp_dir/selected-pvs.txt\"", + "delete_exit=0", + "if [ -s \"$tmp_dir/selected-pvs.txt\" ]; then", + " xargs -r kubectl delete pv --ignore-not-found=true --wait=true --timeout=\"${timeout_seconds}s\" < \"$tmp_dir/selected-pvs.txt\" > \"$tmp_dir/delete.out\" 2> \"$tmp_dir/delete.err\" || delete_exit=$?", + "else", + " : > \"$tmp_dir/delete.out\"", + " : > \"$tmp_dir/delete.err\"", + "fi", + "kubectl get pv -o json > \"$tmp_dir/pvs-after.json\"", + "DELETE_EXIT=\"$delete_exit\" TMP_DIR=\"$tmp_dir\" node <<'NODE'", + cleanupReleasedPvsFinalizeNodeScript(), + "NODE", + ].join("\n"); +} + +function cleanupRunsPlanNodeScript(): string { + return String.raw` +const fs = require("node:fs"); +const path = require("node:path"); +const cp = require("node:child_process"); +const tmp = process.env.TMP_DIR; +const namespace = process.env.NAMESPACE; +const minAgeMinutes = Number(process.env.MIN_AGE_MINUTES || 60); +const limit = Number(process.env.LIMIT || 20); +const now = Date.now(); + +function readJson(name) { + return JSON.parse(fs.readFileSync(path.join(tmp, name), "utf8")); +} + +function conditionOf(item) { + const conditions = Array.isArray(item?.status?.conditions) ? item.status.conditions : []; + return conditions.find((entry) => entry.type === "Succeeded") || conditions[0] || {}; +} + +function ageMinutes(createdAt) { + const createdMs = Date.parse(createdAt || ""); + return Number.isFinite(createdMs) ? Math.floor((now - createdMs) / 60000) : null; +} + +function localPathOf(pv) { + return pv?.spec?.local?.path || pv?.spec?.hostPath?.path || null; +} + +function duBytes(hostPath) { + if (typeof hostPath !== "string" || !hostPath.startsWith("/var/lib/rancher/k3s/storage/")) return null; + try { + const out = cp.execFileSync("du", ["-sB1", hostPath], { encoding: "utf8", stdio: ["ignore", "pipe", "ignore"] }); + const bytes = Number(String(out).trim().split(/\s+/)[0]); + return Number.isFinite(bytes) ? bytes : null; + } catch { + return null; + } +} + +function formatBytes(bytes) { + if (!Number.isFinite(bytes)) return null; + const units = ["B", "KiB", "MiB", "GiB", "TiB"]; + let value = bytes; + let unit = 0; + while (value >= 1024 && unit < units.length - 1) { + value /= 1024; + unit += 1; + } + return value.toFixed(unit === 0 ? 0 : 1) + units[unit]; +} + +const pipelineRuns = readJson("pipelineruns.json"); +const pvcs = readJson("pvcs.json"); +const pvs = readJson("pvs.json"); +const pods = readJson("pods.json"); +const pvByName = new Map((Array.isArray(pvs.items) ? pvs.items : []).map((item) => [item?.metadata?.name, item])); +const activeClaimPods = new Map(); +for (const pod of Array.isArray(pods.items) ? pods.items : []) { + const phase = pod?.status?.phase || ""; + if (phase === "Succeeded" || phase === "Failed") continue; + for (const volume of Array.isArray(pod?.spec?.volumes) ? pod.spec.volumes : []) { + const claimName = volume?.persistentVolumeClaim?.claimName; + if (!claimName) continue; + const entry = activeClaimPods.get(claimName) || []; + entry.push(pod?.metadata?.name || null); + activeClaimPods.set(claimName, entry.filter(Boolean)); + } +} + +const pvcsByOwner = new Map(); +for (const pvc of Array.isArray(pvcs.items) ? pvcs.items : []) { + const owner = (Array.isArray(pvc?.metadata?.ownerReferences) ? pvc.metadata.ownerReferences : []).find((entry) => entry.kind === "PipelineRun"); + if (!owner?.name) continue; + const entry = pvcsByOwner.get(owner.name) || []; + entry.push(pvc); + pvcsByOwner.set(owner.name, entry); +} + +const allPipelineRuns = (Array.isArray(pipelineRuns.items) ? pipelineRuns.items : []) + .map((item) => { + const condition = conditionOf(item); + return { + name: item?.metadata?.name || "", + createdAt: item?.metadata?.creationTimestamp || null, + ageMinutes: ageMinutes(item?.metadata?.creationTimestamp), + status: condition.status || null, + reason: condition.reason || null, + }; + }) + .filter((item) => item.name.startsWith("agentrun-v01-ci-")); + +const protectedActivePipelineRuns = allPipelineRuns + .filter((item) => item.status !== "True" && item.status !== "False") + .sort((a, b) => String(a.createdAt).localeCompare(String(b.createdAt))); +const protectedLatestPipelineRun = allPipelineRuns + .filter((item) => item.status === "True" || item.status === "False") + .sort((a, b) => String(b.createdAt).localeCompare(String(a.createdAt)))[0]?.name || null; + +const candidates = allPipelineRuns + .filter((item) => item.status === "True" || item.status === "False") + .filter((item) => typeof item.ageMinutes === "number" && item.ageMinutes >= minAgeMinutes) + .sort((a, b) => String(a.createdAt).localeCompare(String(b.createdAt))) + .slice(0, limit) + .map((item) => { + const owned = pvcsByOwner.get(item.name) || []; + const activeMountPods = owned.flatMap((pvc) => activeClaimPods.get(pvc?.metadata?.name) || []); + const protectedLatest = item.name === protectedLatestPipelineRun; + return { + ...item, + selected: activeMountPods.length === 0 && !protectedLatest, + selectedReason: protectedLatest ? "protected-latest-pipelinerun" : activeMountPods.length === 0 ? "terminal-and-unmounted" : "owned-pvc-active-mounted", + ownedPvcCount: owned.length, + activeMountPods, + }; + }); + +const selectedPipelineRuns = candidates.filter((item) => item.selected).map((item) => item.name); +const selectedSet = new Set(selectedPipelineRuns); +const ownedPvcs = []; +const protectedOwnedPvcs = []; +for (const [owner, items] of pvcsByOwner.entries()) { + if (!selectedSet.has(owner) && !candidates.some((item) => item.name === owner)) continue; + for (const pvc of items) { + const volume = pvc?.spec?.volumeName || null; + const pv = volume ? pvByName.get(volume) : null; + const hostPath = localPathOf(pv); + const activeMountPods = activeClaimPods.get(pvc?.metadata?.name) || []; + const entry = { + name: pvc?.metadata?.name || null, + volume, + phase: pvc?.status?.phase || null, + ownerKind: "PipelineRun", + owner, + storageClass: pv?.spec?.storageClassName || null, + reclaimPolicy: pv?.spec?.persistentVolumeReclaimPolicy || null, + hostPath, + estimatedBytes: duBytes(hostPath), + activeMountPods, + }; + if (selectedSet.has(owner)) ownedPvcs.push(entry); + else protectedOwnedPvcs.push(entry); + } +} + +const estimatedReclaimBytes = ownedPvcs.reduce((sum, item) => sum + (Number.isFinite(item.estimatedBytes) ? item.estimatedBytes : 0), 0); +console.log(JSON.stringify({ + ok: true, + planKind: "agentrun-ci-completed-pipelinerun-workspace-retention", + generatedAt: new Date().toISOString(), + namespace, + criteria: { prefix: "agentrun-v01-ci-", terminalStatuses: ["True", "False"], minAgeMinutes, limit }, + candidates, + candidateCount: candidates.length, + protectedActivePipelineRuns, + protectedActivePipelineRunCount: protectedActivePipelineRuns.length, + protectedLatestPipelineRun, + selectedPipelineRuns, + selectedPipelineRunCount: selectedPipelineRuns.length, + ownedPvcs, + ownedPvcCount: ownedPvcs.length, + protectedOwnedPvcs, + protectedOwnedPvcCount: protectedOwnedPvcs.length, + estimatedReclaimBytes, + estimatedReclaimHuman: formatBytes(estimatedReclaimBytes), +})); +`; +} + +function cleanupRunsFinalizeNodeScript(): string { + return String.raw` +const fs = require("node:fs"); +const path = require("node:path"); +const tmp = process.env.TMP_DIR; +const deleteExit = Number(process.env.DELETE_EXIT || 0); +const plan = JSON.parse(fs.readFileSync(path.join(tmp, "plan.json"), "utf8")); +const pvcsAfter = JSON.parse(fs.readFileSync(path.join(tmp, "pvcs-after.json"), "utf8")); +const selected = new Set(Array.isArray(plan.selectedPipelineRuns) ? plan.selectedPipelineRuns : []); +const remainingOwnedPvcs = (Array.isArray(pvcsAfter.items) ? pvcsAfter.items : []) + .map((pvc) => { + const owner = (Array.isArray(pvc?.metadata?.ownerReferences) ? pvc.metadata.ownerReferences : []).find((entry) => entry.kind === "PipelineRun"); + return { + name: pvc?.metadata?.name || null, + volume: pvc?.spec?.volumeName || null, + phase: pvc?.status?.phase || null, + ownerKind: owner?.kind || null, + owner: owner?.name || null, + }; + }) + .filter((item) => item.owner && selected.has(item.owner)); + +function tail(name) { + try { + const text = fs.readFileSync(path.join(tmp, name), "utf8"); + return text.length > 3000 ? text.slice(-3000) : text; + } catch { + return ""; + } +} + +console.log(JSON.stringify({ + ...plan, + ok: deleteExit === 0, + deletedPipelineRuns: Array.from(selected), + deletedPipelineRunCount: selected.size, + deletion: { exitCode: deleteExit, stdoutTail: tail("delete.out"), stderrTail: tail("delete.err") }, + remainingOwnedPvcs, + remainingOwnedPvcCount: remainingOwnedPvcs.length, +})); +`; +} + +function cleanupReleasedPvsPlanNodeScript(): string { + return String.raw` +const fs = require("node:fs"); +const path = require("node:path"); +const cp = require("node:child_process"); +const tmp = process.env.TMP_DIR; +const namespace = process.env.NAMESPACE; +const limit = Number(process.env.LIMIT || 20); + +function readJson(name) { + return JSON.parse(fs.readFileSync(path.join(tmp, name), "utf8")); +} + +function localPathOf(pv) { + return pv?.spec?.local?.path || pv?.spec?.hostPath?.path || null; +} + +function duBytes(hostPath) { + if (typeof hostPath !== "string" || !hostPath.startsWith("/var/lib/rancher/k3s/storage/")) return null; + try { + const out = cp.execFileSync("du", ["-sB1", hostPath], { encoding: "utf8", stdio: ["ignore", "pipe", "ignore"] }); + const bytes = Number(String(out).trim().split(/\s+/)[0]); + return Number.isFinite(bytes) ? bytes : null; + } catch { + return null; + } +} + +function formatBytes(bytes) { + if (!Number.isFinite(bytes)) return null; + const units = ["B", "KiB", "MiB", "GiB", "TiB"]; + let value = bytes; + let unit = 0; + while (value >= 1024 && unit < units.length - 1) { + value /= 1024; + unit += 1; + } + return value.toFixed(unit === 0 ? 0 : 1) + units[unit]; +} + +const pvs = readJson("pvs.json"); +const candidates = (Array.isArray(pvs.items) ? pvs.items : []) + .map((pv) => { + const hostPath = localPathOf(pv); + return { + name: pv?.metadata?.name || "", + createdAt: pv?.metadata?.creationTimestamp || null, + phase: pv?.status?.phase || null, + storageClass: pv?.spec?.storageClassName || null, + reclaimPolicy: pv?.spec?.persistentVolumeReclaimPolicy || null, + claimNamespace: pv?.spec?.claimRef?.namespace || null, + claimName: pv?.spec?.claimRef?.name || null, + capacity: pv?.spec?.capacity?.storage || null, + hostPath, + estimatedBytes: duBytes(hostPath), + }; + }) + .filter((item) => item.phase === "Released") + .filter((item) => item.storageClass === "local-path" && item.reclaimPolicy === "Delete") + .filter((item) => item.claimNamespace === namespace) + .sort((a, b) => String(a.createdAt).localeCompare(String(b.createdAt))) + .slice(0, limit); + +const selectedPersistentVolumes = candidates.map((item) => item.name); +const estimatedReclaimBytes = candidates.reduce((sum, item) => sum + (Number.isFinite(item.estimatedBytes) ? item.estimatedBytes : 0), 0); +console.log(JSON.stringify({ + ok: true, + planKind: "agentrun-ci-released-local-path-pv-retention", + generatedAt: new Date().toISOString(), + namespace, + criteria: { phase: "Released", storageClass: "local-path", reclaimPolicy: "Delete", claimNamespace: namespace, limit }, + candidates, + candidateCount: candidates.length, + selectedPersistentVolumes, + selectedPersistentVolumeCount: selectedPersistentVolumes.length, + estimatedReclaimBytes, + estimatedReclaimHuman: formatBytes(estimatedReclaimBytes), +})); +`; +} + +function cleanupReleasedPvsFinalizeNodeScript(): string { + return String.raw` +const fs = require("node:fs"); +const path = require("node:path"); +const tmp = process.env.TMP_DIR; +const deleteExit = Number(process.env.DELETE_EXIT || 0); +const plan = JSON.parse(fs.readFileSync(path.join(tmp, "plan.json"), "utf8")); +const pvsAfter = JSON.parse(fs.readFileSync(path.join(tmp, "pvs-after.json"), "utf8")); +const selected = new Set(Array.isArray(plan.selectedPersistentVolumes) ? plan.selectedPersistentVolumes : []); +const remainingPersistentVolumes = (Array.isArray(pvsAfter.items) ? pvsAfter.items : []) + .filter((pv) => selected.has(pv?.metadata?.name)) + .map((pv) => ({ name: pv?.metadata?.name || null, phase: pv?.status?.phase || null, claimNamespace: pv?.spec?.claimRef?.namespace || null, claimName: pv?.spec?.claimRef?.name || null })); + +function tail(name) { + try { + const text = fs.readFileSync(path.join(tmp, name), "utf8"); + return text.length > 3000 ? text.slice(-3000) : text; + } catch { + return ""; + } +} + +console.log(JSON.stringify({ + ...plan, + ok: deleteExit === 0, + deletedPersistentVolumes: Array.from(selected), + deletedPersistentVolumeCount: selected.size, + deletion: { exitCode: deleteExit, stdoutTail: tail("delete.out"), stderrTail: tail("delete.err") }, + remainingPersistentVolumes, + remainingPersistentVolumeCount: remainingPersistentVolumes.length, +})); +`; +} + function statusScript(pipelineRun: string | null): string { const pr = pipelineRun ?? ""; return [ @@ -969,6 +1500,22 @@ function labeledJson(text: string, label: string): Record { } } +function captureJsonPayload(result: SshCaptureResult): Record { + const trimmed = result.stdout.trim(); + if (trimmed.length === 0) return {}; + try { + return record(JSON.parse(trimmed) as unknown); + } catch { + const lastJsonLine = trimmed.split(/\r?\n/u).reverse().find((line) => line.trim().startsWith("{") && line.trim().endsWith("}")); + if (lastJsonLine === undefined) return {}; + try { + return record(JSON.parse(lastJsonLine) as unknown); + } catch { + return {}; + } + } +} + function record(value: unknown): Record { return typeof value === "object" && value !== null && !Array.isArray(value) ? value as Record : {}; } diff --git a/scripts/src/gc-remote.ts b/scripts/src/gc-remote.ts index 62ef70dd..a265c19a 100644 --- a/scripts/src/gc-remote.ts +++ b/scripts/src/gc-remote.ts @@ -1014,9 +1014,6 @@ def execute_registry_retention(): deployment = registry_deployment_preflight() if not deployment.get("ok"): raise RuntimeError("registry deployment preflight failed: %s" % deployment.get("reason")) - active = active_hwlab_ci_writes() - if not active.get("ok") or int(active.get("activeCount") or 0) > 0: - raise RuntimeError("refusing registry maintenance while hwlab-ci PipelineRun/TaskRun is active") plan = plan_registry_retention() delete_rows = plan.get("deleteRows") or [] delete_revision_rows = plan.get("deleteRevisionRows") or [] @@ -1144,9 +1141,6 @@ def execute_registry_garbage_collect_only(): deployment = registry_deployment_preflight() if not deployment.get("ok"): raise RuntimeError("registry deployment preflight failed: %s" % deployment.get("reason")) - active = active_hwlab_ci_writes() - if not active.get("ok") or int(active.get("activeCount") or 0) > 0: - raise RuntimeError("refusing registry maintenance while hwlab-ci PipelineRun/TaskRun is active") cronjobs = ["hwlab-g14-branch-poller", "hwlab-v02-branch-poller"] original_crons = cronjob_suspend_states(cronjobs) before = du_size(REGISTRY_ROOT, 60) or 0 diff --git a/scripts/src/help.ts b/scripts/src/help.ts index a79530b9..05084bea 100644 --- a/scripts/src/help.ts +++ b/scripts/src/help.ts @@ -59,7 +59,7 @@ export function rootHelp(): unknown { { command: "gh preflight|auth|issue|pr", description: "Run safe GitHub issue and PR CRUD/lifecycle operations through REST with body-file update replace/append, comment delete, token diagnostics, PR closeout preflight, hard delete unsupported, and guarded PR merge." }, { command: "commander contract|plan --dry-run|smoke --dry-run|approval request --dry-run|prompt-lint --kind gpt55-pr", description: "Host Codex commander skeleton contract, no-daemon smoke plan, dry-run approval preview, and advisory GPT-5.5 PR prompt boundary lint without live bridges, message sends, or submit gating." }, { command: "hwlab g14 monitor-prs | hwlab g14 control-plane status|apply|trigger-current|runtime-migration|cleanup-runs|cleanup-released-pvs | hwlab g14 git-mirror status|apply|sync|flush | hwlab g14 tools-image status|build", description: "Start the G14 PR monitor, run bounded v0.2 Tekton/Argo control-plane, manual PipelineRun trigger, runtime migration, CI workspace retention, manual devops-infra git mirror/relay maintenance, or fixed HWLAB CI tools image actions through UniDesk G14 routes; long confirmed trigger/sync/flush actions return async jobs by default." }, - { command: "agentrun v01 control-plane status|trigger-current|refresh", description: "Run bounded AgentRun v0.1 Tekton/Argo status, manual PipelineRun trigger, and Argo refresh operations through UniDesk G14 routes." }, + { command: "agentrun v01 control-plane status|trigger-current|refresh|cleanup-runs|cleanup-released-pvs", description: "Run bounded AgentRun v0.1 Tekton/Argo status, manual PipelineRun trigger, Argo refresh, and completed CI workspace retention through UniDesk G14 routes." }, { command: "hwlab cd audit --env dev | hwlab cd status --env dev | hwlab cd apply --env dev --dry-run", description: "Legacy D601 HWLAB DEV CD wrapper kept for explicit old-path diagnostics; current HWLAB rollout uses G14 GitOps." }, { command: "code-agent-sandbox", description: "Independent Code Agent Sandbox service skeleton for adapter, mode, and credential-boundary diagnostics." }, { command: "schedule list|get|runs|run|retry-run|delete", description: "Manage backend-core scheduled tasks and run history; schedule run supports --wait-ms N and retry-run reuses the failed run's schedule." }, diff --git a/scripts/src/hwlab-g14.ts b/scripts/src/hwlab-g14.ts index 4527f37f..4e89268c 100644 --- a/scripts/src/hwlab-g14.ts +++ b/scripts/src/hwlab-g14.ts @@ -2510,7 +2510,7 @@ function listCleanupPipelineRuns(options: G14ControlPlaneOptions): Record line.trim()) .filter(Boolean) @@ -2521,10 +2521,24 @@ function listCleanupPipelineRuns(options: G14ControlPlaneOptions): Record item.name.length > 0 && prefixes.some((prefix) => item.name.startsWith(prefix))) - .filter((item) => targetPipelineRun === undefined || item.name === targetPipelineRun) .filter((item) => item.status === "True" || item.status === "False") + .sort((a, b) => String(a.createdAt).localeCompare(String(b.createdAt))); + const protectedLatestByPrefix = new Map(); + for (const prefix of prefixes) { + const latest = terminalRuns + .filter((item) => item.name.startsWith(prefix)) + .sort((a, b) => String(b.createdAt).localeCompare(String(a.createdAt)))[0]; + if (latest !== undefined) protectedLatestByPrefix.set(prefix, latest.name); + } + const candidates = terminalRuns + .filter((item) => targetPipelineRun === undefined || item.name === targetPipelineRun) .filter((item) => typeof item.ageMinutes === "number" && item.ageMinutes >= options.minAgeMinutes) - .sort((a, b) => String(a.createdAt).localeCompare(String(b.createdAt))) + .map((item) => { + const protectedLatest = targetPipelineRun === undefined && [...protectedLatestByPrefix.values()].includes(item.name); + return protectedLatest + ? { ...item, selected: false, selectedReason: "protected-latest-pipelinerun" } + : item; + }) .slice(0, options.limit); if (targetPipelineRun !== undefined && candidates.length === 0) { return [{