feat: add yaml-driven gc cleanup scopes (#988)
Co-authored-by: Codex <codex@noreply.local>
This commit is contained in:
@@ -105,9 +105,9 @@ bun scripts/cli.ts gc plan --target-use-percent 69 \
|
||||
--include-vpn-diagnostic-logs
|
||||
```
|
||||
|
||||
`--target-use-percent` 按 `df` 显示口径估算 shortfall。工具缓存、`/tmp` 非 allowlist 直接子项、VS Code 历史 server/extension 版本、VS Code CachedExtensionVSIXs 下载缓存、Baidu staging 旧 PGDATA tarball、UniDesk `.state` 历史诊断/部署产物、VPN 诊断 ring pcap 均默认不启用;必须显式 include 后才进入候选,且执行时仍受路径断言保护。stale `/tmp` 扫描按 `--limit` 有界枚举候选,避免为了估算全量临时目录而长时间无输出。`.state` retention 只通过 `--include-state-artifacts --state-artifact-keep-days N` 选择 `.state/e2e`、`.state/validation`、`.state/jobs`、`.state/codex-queue/output-archive` 下超过保留期的普通文件,以及 `.state/deploy/exports`、`.state/deploy/resolve` 下超过保留期的直接子目录;默认保留期 14 天。VS Code cached VSIX 只选择 `/root/.vscode-server/data/CachedExtensionVSIXs` 下超过 `--vscode-cached-vsix-keep-days` 的顶层普通缓存文件,执行前检查 active fd;不删除已安装 extensions、server 或 user data。VPN 诊断日志只选择 `/root/vpn-server/logs/hy2-udp-ring-*.pcap` 和 `hy2-monitor-ring-*.pcap` 中超过 `--vpn-diagnostic-log-keep-hours` 的普通文件,执行前检查 active fd;不删除 evidence JSONL。默认 GC 不触碰 `.state/recovery`、`.state/codex-queue/codex-home`、`.state/deploy/work`、`.state/baidu-netdisk`、PGDATA、Docker volumes/images、Codex sessions/auth state、active worktree、runtime image/snapshot state、Baidu staging 根目录、VPN 日志根目录或 VS Code user data。
|
||||
`--target-use-percent` 按 `df` 显示口径估算 shortfall。主 server GC 的默认 include、保留窗口、输出 limit、Codex session root、worktree main/root/baseRef、worktree 扫描预算和 `.state` allowlist roots 由 `config/unidesk-cli.yaml#gc` 拥有;CLI 参数只做一次性显式覆盖。工具缓存、`/tmp` 非 allowlist 直接子项、VS Code 历史 server/extension 版本、VS Code CachedExtensionVSIXs 下载缓存、Baidu staging 旧 PGDATA tarball、UniDesk `.state` 历史诊断/部署产物、`.state` stale scratch、Codex inactive sessions、merged worktrees、VPN 诊断 ring pcap 均默认不启用;必须显式 include 后才进入候选,且执行时仍受路径断言保护。stale `/tmp` 扫描按 `--limit` 有界枚举候选,避免为了估算全量临时目录而长时间无输出。`.state` retention 通过 `--include-state-artifacts` 和 `--include-state-stale-scratch` 读取 YAML allowlist;不得把 `.state` 根目录当成通用清理对象。Codex session 清理只删除 YAML root 下超过 keepHours 的普通 session 文件,永远不删除 auth/config。Worktree 清理只扫描 YAML root 下 inactive 且已合入 YAML baseRef 或 cherry-equivalent 的 worktree,run 删除前重新校验 full clean 状态并使用 `git worktree remove`。VS Code cached VSIX 只选择 `/root/.vscode-server/data/CachedExtensionVSIXs` 下超过 `--vscode-cached-vsix-keep-days` 的顶层普通缓存文件,执行前检查 active fd;不删除已安装 extensions、server 或 user data。VPN 诊断日志只选择 `/root/vpn-server/logs/hy2-udp-ring-*.pcap` 和 `hy2-monitor-ring-*.pcap` 中超过 `--vpn-diagnostic-log-keep-hours` 的普通文件,执行前检查 active fd;不删除 evidence JSONL。默认 GC 不触碰 `.state/recovery`、`.state/codex-queue/codex-home`、`.state/deploy/work`、`.state/baidu-netdisk`、PGDATA、Docker volumes/images、Codex auth/config state、active/unmerged/dirty worktree、runtime image/snapshot state、Baidu staging 根目录、VPN 日志根目录或 VS Code user data。
|
||||
|
||||
`gc policy install` 的每日 timer 会自动执行 24 小时 VPN 诊断 pcap retention、14 天 UniDesk `.state` artifact retention 和 7 天 VS Code CachedExtensionVSIXs retention,用于限制长期诊断/部署产物、tcpdump ring 文件与 VS Code 下载缓存增长;手动 `gc plan/run` 仍必须显式 `--include-vpn-diagnostic-logs` / `--include-state-artifacts` / `--include-vscode-cached-vsix` 才会列出或删除这些对象。
|
||||
`gc policy install` 的每日 timer 从 `config/unidesk-cli.yaml#gc.policyTimer` 渲染 VPN 诊断 pcap retention、UniDesk `.state` artifact retention 和 VS Code CachedExtensionVSIXs retention,用于限制长期诊断/部署产物、tcpdump ring 文件与 VS Code 下载缓存增长;手动 `gc plan/run` 仍必须显式 `--include-vpn-diagnostic-logs` / `--include-state-artifacts` / `--include-vscode-cached-vsix` 才会列出或删除这些对象。
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -12,3 +12,106 @@ github:
|
||||
initialDelayMs: 1000
|
||||
maxDelayMs: 16000
|
||||
factor: 2
|
||||
gc:
|
||||
targetUsePercent: null
|
||||
fileLogs:
|
||||
enabled: true
|
||||
keepDays: 7
|
||||
maxBytes: 50MiB
|
||||
tailBytes: 20MiB
|
||||
dockerLogs:
|
||||
enabled: true
|
||||
maxBytes: 50MiB
|
||||
journal:
|
||||
enabled: true
|
||||
targetBytes: 512MiB
|
||||
buildCache:
|
||||
enabled: true
|
||||
until: 24h
|
||||
all: false
|
||||
tmp:
|
||||
enabled: true
|
||||
minAgeHours: 24
|
||||
includeStale: false
|
||||
browserCache:
|
||||
enabled: false
|
||||
toolCaches:
|
||||
enabled: false
|
||||
vscode:
|
||||
staleServers:
|
||||
enabled: false
|
||||
keepServers: 2
|
||||
staleExtensions:
|
||||
enabled: false
|
||||
keepVersions: 1
|
||||
cachedVsix:
|
||||
enabled: false
|
||||
keepDays: 7
|
||||
baiduStaging:
|
||||
enabled: false
|
||||
keepDays: 10
|
||||
stateArtifacts:
|
||||
enabled: false
|
||||
keepDays: 14
|
||||
fileRoots:
|
||||
e2e: .state/e2e
|
||||
validation: .state/validation
|
||||
jobs: .state/jobs
|
||||
codex-queue-output-archive: .state/codex-queue/output-archive
|
||||
dirRoots:
|
||||
deploy-exports: .state/deploy/exports
|
||||
deploy-resolve: .state/deploy/resolve
|
||||
stateStaleScratch:
|
||||
enabled: false
|
||||
keepHours: 24
|
||||
fileRoots:
|
||||
playwright-cli-screenshots: .state/playwright-cli/screenshots
|
||||
playwright-cli-sessions: .state/playwright-cli/sessions
|
||||
perf: .state/perf
|
||||
tmp: .state/tmp
|
||||
web-observe: .state/web-observe
|
||||
dirRoots:
|
||||
hwlab-cd: .state/hwlab-cd
|
||||
codex-queue-stats-verify: .state/codex-queue-stats-verify
|
||||
codex-queue-perf: .state/codex-queue-perf
|
||||
tmp: .state/tmp
|
||||
codexSessions:
|
||||
enabled: false
|
||||
keepHours: 72
|
||||
root: /root/.codex/sessions
|
||||
mergedWorktrees:
|
||||
enabled: false
|
||||
keepHours: 24
|
||||
mainRoot: /root/unidesk
|
||||
root: /root/unidesk/.worktree
|
||||
baseRef: origin/master
|
||||
scanBudgetMs: 20000
|
||||
cherryCheckTimeoutMs: 1000
|
||||
estimateSizeInPlan: false
|
||||
vpnDiagnosticLogs:
|
||||
enabled: false
|
||||
keepHours: 24
|
||||
databaseSummary:
|
||||
enabled: true
|
||||
output:
|
||||
limit: 50
|
||||
resultLimit: 50
|
||||
full: false
|
||||
policyTimer:
|
||||
journald:
|
||||
systemMaxUse: 512MiB
|
||||
runtimeMaxUse: 128MiB
|
||||
maxRetentionSec: 7day
|
||||
daily:
|
||||
buildCacheUntil: 24h
|
||||
vpnDiagnosticLogs:
|
||||
enabled: true
|
||||
keepHours: 24
|
||||
stateArtifacts:
|
||||
enabled: true
|
||||
keepDays: 14
|
||||
vscodeCachedVsix:
|
||||
enabled: true
|
||||
keepDays: 7
|
||||
limit: 5000
|
||||
resultLimit: 25
|
||||
|
||||
+11
-3
@@ -2,11 +2,13 @@
|
||||
|
||||
UniDesk 的磁盘治理入口是 `bun scripts/cli.ts gc ...`。该入口用于短期一次性止血和低风险防膨胀策略,所有清理动作都必须先有结构化 plan,再通过显式确认执行。GC 不是通用 `rm -rf` 或原生命令集合;当目标磁盘水位无法在保护边界内下降到阈值以下时,应停止并升级为 retention/capacity 决策,而不是扩大清理范围。
|
||||
|
||||
所有主 server GC 可调策略都由 `config/unidesk-cli.yaml#gc` 拥有,包括默认 include 开关、保留窗口、输出 limit、Codex session root、worktree main/root/baseRef、worktree 扫描预算、`.state` allowlist roots 和是否在 plan 阶段估算 worktree size。CLI 参数只作为一次性显式覆盖;代码只校验 YAML 字段存在、类型正确和路径可渲染,不把这些策略值写成隐藏默认。
|
||||
|
||||
## Command Boundary
|
||||
|
||||
- `gc plan`:只读生成主 server 清理候选、估算收益、风险等级、保护对象和数据库诊断摘要。
|
||||
- `gc run --confirm`:只执行当前 plan 可见候选页,默认不执行分页隐藏候选;用 `--limit`、`--result-limit`、`--full|--raw` 控制披露和执行范围。
|
||||
- `gc policy plan|install`:渲染或安装低风险长期策略,例如 journald cap、每日 allowlisted 文件/tmp 清理 timer、24 小时 VPN 诊断 pcap retention、14 天 `.state` artifact retention 和 VS Code CachedExtensionVSIXs 下载缓存 retention。
|
||||
- `gc policy plan|install`:从 `config/unidesk-cli.yaml#gc.policyTimer` 渲染或安装低风险长期策略,例如 journald cap、每日 allowlisted 文件/tmp 清理 timer、VPN 诊断 pcap retention、`.state` artifact retention 和 VS Code CachedExtensionVSIXs 下载缓存 retention。
|
||||
- `gc db-trace plan|run --confirm --before-date YYYY-MM-DD --vacuum-full`:显式 trace 遥测留存入口;涉及数据库重写时按维护窗口处理。
|
||||
- `gc remote <providerId> plan|run --confirm|status --job-id <id>`:通过 UniDesk SSH 透传在 provider host 上执行受控 GC。远端长任务必须使用异步 job 和 `status` 短查询,不应让单次 SSH 等待完整 registry GC 或其他长清理。
|
||||
|
||||
@@ -29,6 +31,12 @@ UniDesk 的磁盘治理入口是 `bun scripts/cli.ts gc ...`。该入口用于
|
||||
|
||||
`gc run --confirm --include-state-artifacts` 执行前必须重新校验路径、保留期、对象类型和 symlink 状态。文件候选必须仍是 allowlist 根下的普通文件;deploy 目录候选必须仍是 `.state/deploy/exports` 或 `.state/deploy/resolve` 的直接子目录。该入口不得递归扩大成通用 `.state` 清空器,也不得选择 `.state` 根目录、allowlist 之外的目录、symlink、active worktree、runtime image 或 snapshot 状态。
|
||||
|
||||
`.state` scratch 清理是另一类显式入口:`--include-state-stale-scratch` 只读取 `config/unidesk-cli.yaml#gc.stateStaleScratch` 中声明的 fileRoots/dirRoots 和 keepHours。它用于历史临时验证、性能探针、Web observe、CD scratch 等可重建对象;`.state/recovery`、`.state/codex-queue/codex-home`、`.state/deploy/work`、`.state/baidu-netdisk`、Secret/sourceRef 和 runtime snapshot 仍作为 protected 输出,不允许通过这个入口扩大删除范围。
|
||||
|
||||
Codex session 清理由 `--include-codex-sessions` 显式启用,只删除 `config/unidesk-cli.yaml#gc.codexSessions.root` 下超过 YAML keepHours 的普通 session 文件。执行前必须重新校验路径仍在 YAML root 下、对象是普通文件、未被进程打开,删除后只向上清理空目录;`auth.json`、config、profile、Secret 或其他 Codex state 永远不通过该入口删除。
|
||||
|
||||
Worktree 清理由 `--include-merged-worktrees` 显式启用,只扫描 `config/unidesk-cli.yaml#gc.mergedWorktrees.root` 下的 worktree,主 worktree、当前执行 worktree、recent worktree、未合入 YAML baseRef 或 cherry-pick 等价未吸收的 worktree 都作为 protected 输出。plan 阶段按 YAML scanBudgetMs 有界扫描,超预算对象 protected;run 阶段删除前重新执行 full `git status --untracked-files=all`、inactive 和 merge/cherry-equivalence 校验,并通过 `git worktree remove` 删除,不用手工 `rm -rf`。
|
||||
|
||||
主 server VS Code 下载缓存默认不清理。`/root/.vscode-server/data/CachedExtensionVSIXs` 只用于 VS Code extension VSIX 下载缓存,可通过显式 `--include-vscode-cached-vsix` 进入候选;执行时只允许删除该目录下符合 extension-version 命名的顶层普通文件,并按 `--vscode-cached-vsix-keep-days` 保留近期缓存。执行前必须重新校验路径、文件名、非 symlink/regular file,并用 active-file 检查确认没有进程仍打开该文件。该入口不得触碰 `/root/.vscode-server/extensions`、`/root/.vscode-server/cli/servers`、VS Code user data 或任意 session/auth state。
|
||||
|
||||
## Protected Data
|
||||
@@ -44,7 +52,7 @@ UniDesk 的磁盘治理入口是 `bun scripts/cli.ts gc ...`。该入口用于
|
||||
| `.state/deploy/work` | 部署工作目录可能包含 active rollout 上下文 |
|
||||
| `.state/baidu-netdisk` | Baidu Netdisk token、任务、备份和 staging 状态需单独判定 |
|
||||
| active worktree、runtime image、runtime snapshot state | 当前执行面和运行面 provenance,不通过 `.state` artifact retention 删除 |
|
||||
| Codex sessions/auth | `~/.codex/sessions`、`~/.codex/auth.json` 等凭证和会话状态 |
|
||||
| Codex auth/config | `~/.codex/auth.json`、profile 和 config 等凭证状态;session 文件只能通过显式 `--include-codex-sessions` 按 YAML retention 清理 |
|
||||
| VPN diagnostic evidence logs | `/root/vpn-server/logs/hy2-server-evidence.jsonl` 等 active evidence 流用于网络排障,不随 pcap retention 删除 |
|
||||
| VS Code installed extensions/server/user data | 已安装扩展、server 版本和用户配置不是下载缓存,只能由专门 stale-version 规则或 VS Code 自身管理 |
|
||||
| D601 registry storage | artifact registry retention 需使用专门入口 |
|
||||
@@ -55,7 +63,7 @@ UniDesk 的磁盘治理入口是 `bun scripts/cli.ts gc ...`。该入口用于
|
||||
|
||||
如果需要触碰上表对象,必须先补高层 UniDesk CLI 子命令、dry-run 计划、保护对象、验证命令和失败分类;不能把原生 `kubectl`、`docker prune`、`crictl rmi` 或手写 registry shell 作为长期流程。
|
||||
|
||||
`gc policy install` 的每日 timer 会启用 14 天 `.state` artifact retention 和 7 天 VS Code CachedExtensionVSIXs retention,用来限制历史诊断/部署产物与 VS Code 下载缓存长期增长;手动 `gc plan/run` 仍默认不清 `.state` 或 VSIX 缓存,必须显式 `--include-state-artifacts` / `--include-vscode-cached-vsix` 才会列出或执行这些候选。policy timer 仍保护上表对象,并把输出限制在 `.state/gc/last-run.json` 和 `.state/gc/last-run.stderr`。
|
||||
`gc policy install` 的每日 timer 按 `config/unidesk-cli.yaml#gc.policyTimer` 启用 `.state` artifact retention、VPN pcap retention 和 VS Code CachedExtensionVSIXs retention,用来限制历史诊断/部署产物、tcpdump ring 文件与 VS Code 下载缓存长期增长;手动 `gc plan/run` 仍默认不清 `.state` 或 VSIX 缓存,必须显式 `--include-state-artifacts` / `--include-vscode-cached-vsix` 才会列出或执行这些候选。policy timer 仍保护上表对象,并把输出限制在 `.state/gc/last-run.json` 和 `.state/gc/last-run.stderr`。
|
||||
|
||||
## Remote G14 Policy
|
||||
|
||||
|
||||
+834
-105
File diff suppressed because it is too large
Load Diff
+10
-3
@@ -296,6 +296,7 @@ function gcHelp(): unknown {
|
||||
"bun scripts/cli.ts gc run --confirm --build-cache-all --include-browser-cache",
|
||||
"bun scripts/cli.ts gc run --confirm --include-browser-cache",
|
||||
"bun scripts/cli.ts gc plan --target-use-percent 59 --include-state-artifacts --state-artifact-keep-days 14 --full",
|
||||
"bun scripts/cli.ts gc plan --include-codex-sessions --include-merged-worktrees --include-state-stale-scratch",
|
||||
"bun scripts/cli.ts gc db-trace plan --before-date 2026-05-25",
|
||||
"bun scripts/cli.ts gc db-trace run --confirm --before-date 2026-05-25 --vacuum-full",
|
||||
"bun scripts/cli.ts gc policy plan",
|
||||
@@ -306,11 +307,12 @@ function gcHelp(): unknown {
|
||||
"bun scripts/cli.ts gc remote G14 status --job-id <id>",
|
||||
"bun scripts/cli.ts gc plan --full",
|
||||
],
|
||||
description: "Plan or execute bounded one-time disk relief for file logs, Docker json logs, systemd journal, Docker BuildKit cache, allowlisted /tmp artifacts, opt-in UniDesk .state artifact retention, scoped remote core dumps and explicitly scoped database trace telemetry retention.",
|
||||
description: "Plan or execute YAML-configured bounded one-time disk relief for file logs, Docker json logs, systemd journal, Docker BuildKit cache, allowlisted /tmp artifacts, opt-in UniDesk .state/session/worktree retention, scoped remote core dumps and explicitly scoped database trace telemetry retention.",
|
||||
safety: {
|
||||
default: "plan is read-only and mutation=false",
|
||||
runGuard: "run requires --confirm",
|
||||
protected: ["PostgreSQL PGDATA", "Docker volumes", "Docker images", ".state/recovery", ".state/codex-queue/codex-home", ".state/deploy/work", ".state/baidu-netdisk", "Codex sessions/auth", "active worktree/runtime image/snapshot", "D601 registry storage"],
|
||||
configSource: "config/unidesk-cli.yaml#gc owns retention windows, include defaults, worktree roots/baseRef, scan budgets, output limits and .state allowlist roots",
|
||||
protected: ["PostgreSQL PGDATA", "Docker volumes", "Docker images", ".state/recovery", ".state/codex-queue/codex-home", ".state/deploy/work", ".state/baidu-netdisk", "Codex auth/config", "active/unmerged/dirty worktree/runtime image/snapshot", "D601 registry storage"],
|
||||
database: "default gc run is database diagnostic-only; gc db-trace is the explicit trace telemetry retention path and requires --confirm plus --vacuum-full",
|
||||
},
|
||||
options: {
|
||||
@@ -336,9 +338,14 @@ function gcHelp(): unknown {
|
||||
"--include-browser-cache": "also remove repo-local .state/playwright-browsers cache",
|
||||
"--include-state-artifacts": "manual local gc only: opt in to stale UniDesk .state artifact retention for allowlisted diagnostic files and deploy artifact direct directories",
|
||||
"--state-artifact-keep-days N": "keep recent UniDesk .state artifacts for N days; default 14; must be a positive integer",
|
||||
"--include-state-stale-scratch": "manual local gc only: opt in to YAML allowlisted stale .state scratch roots; roots and keepHours come from config/unidesk-cli.yaml#gc",
|
||||
"--include-codex-sessions": "manual local gc only: delete inactive session files under YAML codexSessions.root after codexSessions.keepHours; never deletes auth/config",
|
||||
"--include-merged-worktrees": "manual local gc only: remove inactive .worktree entries whose HEAD is merged into or cherry-equivalent to YAML baseRef; run rechecks full clean status before deletion",
|
||||
"--worktree-scan-budget-ms N": "temporary override for YAML mergedWorktrees.scanBudgetMs; over-budget worktrees are protected, not deleted",
|
||||
"--worktree-cherry-check-timeout-ms N": "temporary override for YAML mergedWorktrees.cherryCheckTimeoutMs",
|
||||
"db-trace --before-date YYYY-MM-DD": "plan or delete default trace telemetry event types before the date",
|
||||
"db-trace run --vacuum-full": "rewrite public.oa_events after deletion so df can reclaim disk; requires maintenance window",
|
||||
"policy plan|install": "render or install journald caps and a daily file-log, allowlisted /tmp, VPN pcap and 14-day UniDesk .state artifact low-risk gc systemd timer",
|
||||
"policy plan|install": "render or install journald caps and a daily low-risk gc systemd timer from config/unidesk-cli.yaml#gc.policyTimer",
|
||||
"remote <providerId> plan|run": "run bounded GC through UniDesk SSH passthrough on a provider host; G14 protects HWLAB k3s/runtime/PVC/workspace paths, and HWLAB registry retention is explicit opt-in with workload-ref, digest-closure, recent-tag and per-repo tag protection",
|
||||
"--no-file-logs|--no-docker-logs|--no-journal|--no-build-cache|--no-tmp|--no-db-summary": "disable one collector",
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user