chore: add low-risk disk anti-bloat policy
This commit is contained in:
@@ -145,7 +145,7 @@ UniDesk 是一个以主 server 为统一入口的分布式工作平台;本文
|
||||
- `bun scripts/cli.ts server swap status|ensure [--path /swapfile] [--size 2GiB] [--dry-run]`:以 JSON 查看或幂等创建主 server swapfile,`ensure` 输出 before/after、动作、持久化状态和 degraded/failed 详情,规则见 `docs/reference/deployment.md`。
|
||||
- `bun scripts/cli.ts server logs [--tail-bytes N]`:分页返回文件日志与 Docker 日志尾部并带截断元数据,日志规则见 `docs/reference/observability.md`。
|
||||
- `bun scripts/cli.ts server cleanup plan [--min-age-hours N] [--limit N]`:只读/干跑生成主 server Docker 镜像清理计划,默认只列出至少 24 小时前创建的非保护镜像,输出 active/protected images、stale candidates、预计释放空间、风险等级和必须人工确认的 `docker image rm` 命令;禁止默认删除、禁止 prune、禁止触碰 database volume、registry storage 或 Baidu Netdisk 状态。
|
||||
- `bun scripts/cli.ts gc plan|run|db-trace --confirm`:主 server 磁盘高水位一次性缓解入口,覆盖日志、journald、Docker BuildKit cache、allowlisted `/tmp` 诊断目录和显式 trace 遥测留存;默认 `gc run` 不碰数据库,`gc db-trace` 需要单独确认和 `--vacuum-full`,规则见 `docs/reference/cli.md`。
|
||||
- `bun scripts/cli.ts gc plan|run|db-trace|policy`:主 server 磁盘高水位一次性缓解和低风险防膨胀入口,覆盖日志、journald、Docker BuildKit cache、allowlisted `/tmp` 诊断目录、显式 trace 遥测留存和 systemd 定时策略;`gc run` 和 `gc db-trace run` 需要显式确认,规则见 `docs/reference/cli.md`。
|
||||
- `bun scripts/cli.ts server rebuild <backend-core|frontend|dev-frontend-proxy|provider-gateway|todo-note|code-queue-mgr|project-manager|baidu-netdisk|oa-event-flow>`:以 build-first、Compose lock、no-deps force-recreate 和 post-up validation 的异步 job 重建主 server Compose 内单个服务;对 database、File Browser、Code Queue 执行面、k3sctl-adapter 或未知对象返回结构化 `unsupported-server-rebuild`,规则见 `docs/reference/deployment.md` 与 `docs/reference/cicd-standardization.md`。
|
||||
- `bun scripts/cli.ts provider attach <providerId> [--master-server URL] [--up] [--force]` / `bun scripts/cli.ts provider triage <providerId> [--observed-error text] [--observed-scope scope] [--microservice id ...] [--full|--raw]`:前者在新增计算节点上生成两项配置的 provider-gateway 挂载包;后者是只读多信号健康裁决入口,默认低噪声输出 `decision`、`healthyScopes`、`failedScopes`、`retryable` 和异常信号摘要,用来把单路径 `provider is not online`、SSH 超时、registry 失败或 proxy 失败归类为 `retryable-transient`、`service-degraded` 或 `global-offline`,完整 evidence 需显式 `--full|--raw`,规则见 `docs/reference/provider-gateway.md` 和 `docs/reference/code-queue-supervision.md`。
|
||||
- `bun scripts/cli.ts ssh <route> [operation args...]` / `tran <route> [operation args...]`:通过 provider-gateway 的 Host SSH / WSL SSH 维护桥进入 provider、host workspace、Windows cmd route、k3s 控制面或 pod workspace,并提供带 SHA-256 校验的 `upload`/`download` 文件传输;主 server 人工/Codex 分布式操作必须用本机 `tran` wrapper,CLI 参考和可移植脚本可保留完整命令,细则见 `docs/reference/cli.md`、`docs/reference/windows-passthrough.md` 和 `docs/reference/provider-gateway.md`。
|
||||
|
||||
+27
-9
@@ -1,3 +1,9 @@
|
||||
x-unidesk-log-rotation: &unidesk-log-rotation
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: "${UNIDESK_DOCKER_LOG_MAX_SIZE:-20m}"
|
||||
max-file: "${UNIDESK_DOCKER_LOG_MAX_FILE:-3}"
|
||||
|
||||
services:
|
||||
database:
|
||||
image: postgres:16-alpine
|
||||
@@ -38,6 +44,7 @@ services:
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 20
|
||||
logging: *unidesk-log-rotation
|
||||
|
||||
backend-core:
|
||||
image: unidesk-backend-core
|
||||
@@ -73,7 +80,7 @@ services:
|
||||
UNIDESK_DEPLOY_COMMIT: "${UNIDESK_DEPLOY_COMMIT:-}"
|
||||
UNIDESK_DEPLOY_REQUESTED_COMMIT: "${UNIDESK_DEPLOY_REQUESTED_COMMIT:-}"
|
||||
LOG_FILE: "/var/log/unidesk/${UNIDESK_LOG_DAY}/${UNIDESK_LOG_PREFIX}_backend-core.jsonl"
|
||||
UNIDESK_LOG_RETENTION_BYTES: "${UNIDESK_LOG_RETENTION_BYTES:-1GiB}"
|
||||
UNIDESK_LOG_RETENTION_BYTES: "${UNIDESK_LOG_RETENTION_BYTES:-512MiB}"
|
||||
volumes:
|
||||
- ${UNIDESK_LOG_DIR}:/var/log/unidesk
|
||||
- ./.state/baidu-netdisk/staging:/data/baidu-netdisk-staging
|
||||
@@ -84,6 +91,7 @@ services:
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 20
|
||||
logging: *unidesk-log-rotation
|
||||
|
||||
code-queue-mgr:
|
||||
image: code-queue-mgr
|
||||
@@ -107,7 +115,7 @@ services:
|
||||
CODE_QUEUE_WORKDIR: "/workspace"
|
||||
CODE_QUEUE_REMOTE_WORKDIR: "${UNIDESK_CODE_QUEUE_REMOTE_WORKDIR:-/home/ubuntu}"
|
||||
LOG_FILE: "/var/log/unidesk/${UNIDESK_LOG_DAY}/${UNIDESK_LOG_PREFIX}_code-queue-mgr.jsonl"
|
||||
UNIDESK_LOG_RETENTION_BYTES: "${UNIDESK_LOG_RETENTION_BYTES:-1GiB}"
|
||||
UNIDESK_LOG_RETENTION_BYTES: "${UNIDESK_LOG_RETENTION_BYTES:-512MiB}"
|
||||
UNIDESK_CODE_QUEUE_MGR_DEPLOY_REF: "${UNIDESK_CODE_QUEUE_MGR_DEPLOY_REF:-deploy.json#environments.prod.services.code-queue-mgr}"
|
||||
UNIDESK_CODE_QUEUE_MGR_DEPLOY_SERVICE_ID: "${UNIDESK_CODE_QUEUE_MGR_DEPLOY_SERVICE_ID:-code-queue-mgr}"
|
||||
UNIDESK_CODE_QUEUE_MGR_DEPLOY_REPO: "${UNIDESK_CODE_QUEUE_MGR_DEPLOY_REPO:-}"
|
||||
@@ -120,6 +128,7 @@ services:
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 20
|
||||
logging: *unidesk-log-rotation
|
||||
|
||||
code-agent-sandbox:
|
||||
image: code-agent-sandbox
|
||||
@@ -155,7 +164,7 @@ services:
|
||||
UNIDESK_DEPLOY_COMMIT: "${UNIDESK_CODE_AGENT_SANDBOX_DEPLOY_COMMIT:-}"
|
||||
UNIDESK_DEPLOY_REQUESTED_COMMIT: "${UNIDESK_CODE_AGENT_SANDBOX_DEPLOY_REQUESTED_COMMIT:-}"
|
||||
LOG_FILE: "/var/log/unidesk/${UNIDESK_LOG_DAY}/${UNIDESK_LOG_PREFIX}_code-agent-sandbox.jsonl"
|
||||
UNIDESK_LOG_RETENTION_BYTES: "${UNIDESK_LOG_RETENTION_BYTES:-1GiB}"
|
||||
UNIDESK_LOG_RETENTION_BYTES: "${UNIDESK_LOG_RETENTION_BYTES:-512MiB}"
|
||||
volumes:
|
||||
- ${UNIDESK_LOG_DIR}:/var/log/unidesk
|
||||
- ./.state/code-agent-sandbox:/var/lib/unidesk/code-agent-sandbox
|
||||
@@ -164,6 +173,7 @@ services:
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 20
|
||||
logging: *unidesk-log-rotation
|
||||
|
||||
auth-broker:
|
||||
image: auth-broker
|
||||
@@ -189,7 +199,7 @@ services:
|
||||
UNIDESK_DEPLOY_COMMIT: "${UNIDESK_AUTH_BROKER_DEPLOY_COMMIT:-}"
|
||||
UNIDESK_DEPLOY_REQUESTED_COMMIT: "${UNIDESK_AUTH_BROKER_DEPLOY_REQUESTED_COMMIT:-}"
|
||||
LOG_FILE: "/var/log/unidesk/${UNIDESK_LOG_DAY}/${UNIDESK_LOG_PREFIX}_auth-broker.jsonl"
|
||||
UNIDESK_LOG_RETENTION_BYTES: "${UNIDESK_LOG_RETENTION_BYTES:-1GiB}"
|
||||
UNIDESK_LOG_RETENTION_BYTES: "${UNIDESK_LOG_RETENTION_BYTES:-512MiB}"
|
||||
volumes:
|
||||
- ${UNIDESK_LOG_DIR}:/var/log/unidesk
|
||||
healthcheck:
|
||||
@@ -197,6 +207,7 @@ services:
|
||||
interval: 30s
|
||||
timeout: 3s
|
||||
retries: 1
|
||||
logging: *unidesk-log-rotation
|
||||
|
||||
todo-note:
|
||||
image: todo-note
|
||||
@@ -245,6 +256,7 @@ services:
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 20
|
||||
logging: *unidesk-log-rotation
|
||||
|
||||
oa-event-flow:
|
||||
image: oa-event-flow
|
||||
@@ -269,7 +281,7 @@ services:
|
||||
PIPELINE_OA_BRIDGE_INTERVAL_MS: "${UNIDESK_PIPELINE_OA_BRIDGE_INTERVAL_MS:-15000}"
|
||||
PIPELINE_OA_BRIDGE_RUN_LIMIT: "${UNIDESK_PIPELINE_OA_BRIDGE_RUN_LIMIT:-50}"
|
||||
LOG_FILE: "/var/log/unidesk/${UNIDESK_LOG_DAY}/${UNIDESK_LOG_PREFIX}_oa-event-flow.jsonl"
|
||||
UNIDESK_LOG_RETENTION_BYTES: "${UNIDESK_LOG_RETENTION_BYTES:-1GiB}"
|
||||
UNIDESK_LOG_RETENTION_BYTES: "${UNIDESK_LOG_RETENTION_BYTES:-512MiB}"
|
||||
UNIDESK_DEPLOY_REF: "${UNIDESK_OA_EVENT_FLOW_DEPLOY_REF:-deploy.json#environments.prod.services.oa-event-flow}"
|
||||
UNIDESK_DEPLOY_SERVICE_ID: "${UNIDESK_OA_EVENT_FLOW_DEPLOY_SERVICE_ID:-oa-event-flow}"
|
||||
UNIDESK_DEPLOY_REPO: "${UNIDESK_OA_EVENT_FLOW_DEPLOY_REPO:-}"
|
||||
@@ -282,6 +294,7 @@ services:
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 20
|
||||
logging: *unidesk-log-rotation
|
||||
|
||||
project-manager:
|
||||
image: project-manager
|
||||
@@ -300,7 +313,7 @@ services:
|
||||
DATABASE_URL: "postgres://${UNIDESK_DATABASE_USER}:${UNIDESK_DATABASE_PASSWORD}@database:5432/${UNIDESK_DATABASE_NAME}"
|
||||
DATABASE_POOL_MAX: "${UNIDESK_PROJECT_MANAGER_DATABASE_POOL_MAX:-1}"
|
||||
LOG_FILE: "/var/log/unidesk/${UNIDESK_LOG_DAY}/${UNIDESK_LOG_PREFIX}_project-manager.jsonl"
|
||||
UNIDESK_LOG_RETENTION_BYTES: "${UNIDESK_LOG_RETENTION_BYTES:-1GiB}"
|
||||
UNIDESK_LOG_RETENTION_BYTES: "${UNIDESK_LOG_RETENTION_BYTES:-512MiB}"
|
||||
UNIDESK_DEPLOY_REF: "${UNIDESK_PROJECT_MANAGER_DEPLOY_REF:-deploy.json#environments.prod.services.project-manager}"
|
||||
UNIDESK_DEPLOY_SERVICE_ID: "${UNIDESK_PROJECT_MANAGER_DEPLOY_SERVICE_ID:-project-manager}"
|
||||
UNIDESK_DEPLOY_REPO: "${UNIDESK_PROJECT_MANAGER_DEPLOY_REPO:-}"
|
||||
@@ -313,6 +326,7 @@ services:
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 20
|
||||
logging: *unidesk-log-rotation
|
||||
|
||||
baidu-netdisk:
|
||||
image: baidu-netdisk
|
||||
@@ -336,7 +350,7 @@ services:
|
||||
BAIDU_NETDISK_APP_ROOT: "${UNIDESK_BAIDU_NETDISK_APP_ROOT:-/}"
|
||||
BAIDU_NETDISK_STAGING_DIR: "/data/staging"
|
||||
LOG_FILE: "/var/log/unidesk/${UNIDESK_LOG_DAY}/${UNIDESK_LOG_PREFIX}_baidu-netdisk.jsonl"
|
||||
UNIDESK_LOG_RETENTION_BYTES: "${UNIDESK_LOG_RETENTION_BYTES:-1GiB}"
|
||||
UNIDESK_LOG_RETENTION_BYTES: "${UNIDESK_LOG_RETENTION_BYTES:-512MiB}"
|
||||
UNIDESK_DEPLOY_REF: "${UNIDESK_BAIDU_NETDISK_DEPLOY_REF:-deploy.json#environments.prod.services.baidu-netdisk}"
|
||||
UNIDESK_DEPLOY_SERVICE_ID: "${UNIDESK_BAIDU_NETDISK_DEPLOY_SERVICE_ID:-baidu-netdisk}"
|
||||
UNIDESK_DEPLOY_REPO: "${UNIDESK_BAIDU_NETDISK_DEPLOY_REPO:-}"
|
||||
@@ -350,6 +364,7 @@ services:
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 20
|
||||
logging: *unidesk-log-rotation
|
||||
|
||||
frontend:
|
||||
build:
|
||||
@@ -377,7 +392,7 @@ services:
|
||||
UNIDESK_DEPLOY_COMMIT: "${UNIDESK_FRONTEND_DEPLOY_COMMIT:-}"
|
||||
UNIDESK_DEPLOY_REQUESTED_COMMIT: "${UNIDESK_FRONTEND_DEPLOY_REQUESTED_COMMIT:-}"
|
||||
LOG_FILE: "/var/log/unidesk/${UNIDESK_LOG_DAY}/${UNIDESK_LOG_PREFIX}_frontend.jsonl"
|
||||
UNIDESK_LOG_RETENTION_BYTES: "${UNIDESK_LOG_RETENTION_BYTES:-1GiB}"
|
||||
UNIDESK_LOG_RETENTION_BYTES: "${UNIDESK_LOG_RETENTION_BYTES:-512MiB}"
|
||||
volumes:
|
||||
- ${UNIDESK_LOG_DIR}:/var/log/unidesk
|
||||
healthcheck:
|
||||
@@ -385,6 +400,7 @@ services:
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 20
|
||||
logging: *unidesk-log-rotation
|
||||
|
||||
dev-frontend-proxy:
|
||||
build:
|
||||
@@ -401,6 +417,7 @@ services:
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 12
|
||||
logging: *unidesk-log-rotation
|
||||
|
||||
provider-gateway:
|
||||
image: unidesk_provider-gateway
|
||||
@@ -431,7 +448,7 @@ services:
|
||||
PROVIDER_UPGRADE_SERVICE: "${UNIDESK_PROVIDER_UPGRADE_SERVICE}"
|
||||
PROVIDER_UPGRADE_RUNNER_IMAGE: "${UNIDESK_PROVIDER_UPGRADE_RUNNER_IMAGE}"
|
||||
LOG_FILE: "/var/log/unidesk/${UNIDESK_LOG_DAY}/${UNIDESK_LOG_PREFIX}_provider-gateway.jsonl"
|
||||
UNIDESK_LOG_RETENTION_BYTES: "${UNIDESK_LOG_RETENTION_BYTES:-1GiB}"
|
||||
UNIDESK_LOG_RETENTION_BYTES: "${UNIDESK_LOG_RETENTION_BYTES:-512MiB}"
|
||||
HOST_SSH_HOST: "${UNIDESK_HOST_SSH_HOST}"
|
||||
HOST_SSH_PORT: "${UNIDESK_HOST_SSH_PORT}"
|
||||
HOST_SSH_USER: "${UNIDESK_HOST_SSH_USER}"
|
||||
@@ -445,6 +462,7 @@ services:
|
||||
- ${UNIDESK_LOG_DIR}:/var/log/unidesk
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
logging: *unidesk-log-rotation
|
||||
|
||||
volumes:
|
||||
unidesk_pgdata_10gb:
|
||||
|
||||
@@ -25,7 +25,7 @@ CI/CD、GitOps、rollout、artifact 发布、PR 合并后的 DEV/PROD 滚动、P
|
||||
- `server swap status|ensure [--path /swapfile] [--size 2GiB] [--dry-run]` 是主 server swap 管理入口。`status` 仅读 `/proc/meminfo`、`/proc/swaps` 和 `/etc/fstab` 并返回 JSON;`ensure` 在已有任何 active swap 时只报告 no-op,在无 active swap 时创建固定 swapfile、`chmod 600`、`mkswap`、`swapon` 并尽量写入 `/etc/fstab`。输出必须包含 `before`、`after`、total memory、active swap、持久化状态、关键动作和错误详情;若 swap 已启用但 fstab 写入失败,状态为 `degraded`,调用者需按返回的 detail 修复持久化。
|
||||
- `server logs` 返回 `logs/` 文件日志和 Docker 容器日志的尾部,默认限制输出大小,避免日志爆炸。实现必须只读取文件末尾字节,不得为了 tail 先把巨大日志完整读入 CLI 内存。
|
||||
- `server cleanup plan [--min-age-hours N] [--limit N]` 只生成主 server Docker 镜像清理 dry-run 计划,不执行删除;默认 `--min-age-hours 24`,避免把刚发布或刚验证的镜像列为 stale。输出必须包含 `dryRun=true`、`mutation=false`、`policy.deletionExecuted=false`、active containers/images、受保护镜像、candidate stale images、估算释放空间、风险等级、`commandsToReview` 和人工审批清单。计划必须保守白名单:保留 running containers 使用的 image ID,保留 stopped containers 引用的 image ID 直到人工先复核容器,保留 `deploy.json`/`CI.json` 当前 commit-pinned artifact、Compose stable image、上游 digest pin 和 provider-gateway runner image;`protectedStorage` 必须显式列出 PostgreSQL named volume、Baidu Netdisk `.state`、D601 registry storage 和 Docker volumes/host data policy。该入口禁止生成或执行 `docker system prune`、`docker image prune`、`docker builder prune`、`docker volume rm`、`docker compose down -v`、数据库清理或 host data `rm` 命令;未来若增加真实删除,必须另设显式审批参数并先复核 dry-run 输出。
|
||||
- `gc plan|run --confirm` 是主 server 磁盘高水位的一次性短期缓解入口,覆盖 UniDesk 文件日志、Docker `json-file` 容器日志、systemd journal、Docker BuildKit cache 和 allowlisted `/tmp` 诊断目录。`gc plan` 默认只读并返回候选、风险、估算释放空间、受保护对象和数据库只读摘要;候选列表默认按释放空间排序限制为 50 条,`--limit N` 调整页大小,`--full|--raw` 才展开全部候选。输出中的 `estimatedReclaim` 表示全量候选估算,`returnedEstimatedReclaim` 表示当前输出页估算。`gc run` 必须显式 `--confirm`,只执行当前输出候选中的短期清理动作,因此默认不会执行被分页隐藏的候选。`gc` 不删除 Docker image/container/volume,不触碰 PostgreSQL PGDATA,不删除 Baidu Netdisk staging/backups 或 D601 registry storage;默认路径中数据库只做诊断摘要。`gc db-trace plan|run --confirm --before-date YYYY-MM-DD --vacuum-full` 是显式数据库 trace 遥测留存入口,只删除 `oa_events` 中默认 trace 高频事件类型在指定日期前的数据,并在 `--vacuum-full` 下重写 `public.oa_events` 让 `df` 真正回收磁盘;执行前必须确认近期 PostgreSQL basebackup,且应视为维护窗口操作。常用参数包括 `--logs-keep-days N`、`--file-log-max-bytes SIZE`、`--file-log-tail-bytes SIZE`、`--docker-log-max-bytes SIZE`、`--journal-target-size SIZE`、`--build-cache-until 24h`、显式清空全部 BuildKit cache 的 `--build-cache-all`、`--tmp-min-age-hours N` 和显式 `--include-browser-cache`。
|
||||
- `gc plan|run --confirm` 是主 server 磁盘高水位的一次性短期缓解入口,覆盖 UniDesk 文件日志、Docker `json-file` 容器日志、systemd journal、Docker BuildKit cache 和 allowlisted `/tmp` 诊断目录。`gc plan` 默认只读并返回候选、风险、估算释放空间、受保护对象和数据库只读摘要;候选列表默认按释放空间排序限制为 50 条,`--limit N` 调整页大小,`--full|--raw` 才展开全部候选。输出中的 `estimatedReclaim` 表示全量候选估算,`returnedEstimatedReclaim` 表示当前输出页估算;`gc run` 默认只返回前 50 条结果,`--result-limit N` 调整结果页大小,避免 GC 自身输出膨胀。`gc run` 必须显式 `--confirm`,只执行当前输出候选中的短期清理动作,因此默认不会执行被分页隐藏的候选。`gc` 不删除 Docker image/container/volume,不触碰 PostgreSQL PGDATA,不删除 Baidu Netdisk staging/backups 或 D601 registry storage;默认路径中数据库只做诊断摘要。`gc policy plan|install` 渲染或安装低风险长期策略:journald 512MiB 上限和每日 `unidesk-gc.timer`,该 timer 只运行文件日志与 allowlisted `/tmp` 低风险 GC,不主动 vacuum journal,不触碰数据库、Docker image/volume 或 Baidu staging,输出写入 `.state/gc/`。`gc db-trace plan|run --confirm --before-date YYYY-MM-DD --vacuum-full` 是显式数据库 trace 遥测留存入口,只删除 `oa_events` 中默认 trace 高频事件类型在指定日期前的数据,并在 `--vacuum-full` 下重写 `public.oa_events` 让 `df` 真正回收磁盘;执行前必须确认近期 PostgreSQL basebackup,且应视为维护窗口操作。常用参数包括 `--logs-keep-days N`、`--file-log-max-bytes SIZE`、`--file-log-tail-bytes SIZE`、`--docker-log-max-bytes SIZE`、`--journal-target-size SIZE`、`--build-cache-until 24h`、显式清空全部 BuildKit cache 的 `--build-cache-all`、`--tmp-min-age-hours N` 和显式 `--include-browser-cache`。
|
||||
- `server rebuild <backend-core|frontend|dev-frontend-proxy|provider-gateway|todo-note|code-queue-mgr|project-manager|baidu-netdisk|oa-event-flow>` 创建异步 job,先构建目标服务镜像,随后在 `.state/locks/server-compose.lock` 串行保护下用 `--no-deps --force-recreate` 替换目标 service 并等待容器 `healthy/running`;该命令用于替代手工删除容器的兜底流程,其中 `dev-frontend-proxy` 只更新主 server dev 入口薄代理,`todo-note`、`code-queue-mgr`、`project-manager`、`baidu-netdisk` 和 `oa-event-flow` 只重建主 server 承载的对应后端,不会重建或删除 database 命名卷。D601 Code Queue 执行面不由 `server rebuild` 管理,Rust backend-core 迭代不得用 `server rebuild backend-core` 在 master server 编译,规则见 `docs/reference/dev-environment.md`。
|
||||
- `provider attach <providerId> [--master-server URL] [--up] [--force]` 在新计算节点生成两项配置的 provider-gateway 挂载包:`.state/provider-<ID>.env` 默认只包含 `UNIDESK_MASTER_SERVER` 与 `PROVIDER_ID`,`provider-<ID>.yml` 固定 Docker socket、`pid: "host"`、`restart: always`、只读 `/workspace` 和 SSH 维护私钥挂载;`--up` 会立即执行生成的 `docker compose up -d --build`。`provider triage <providerId> [--observed-error text] [--observed-scope scope] [--microservice id ...] [--full|--raw]` 是只读多信号健康裁决入口,会把单路径 `provider is not online`、SSH 超时、registry 失败和 service proxy 失败归类成 `runner-local-observation-gap`、`service-degraded`、`provider-degraded` 或 `global-blocker`。默认输出只返回裁决、scope、失败/降级/未知信号和有界 evidence 摘要,完整 evidence 必须显式加 `--full` 或 `--raw`;推荐交叉验证命令仍包含 `debug health`、`debug dispatch <providerId> host.ssh --wait-ms 15000`、`ssh <providerId> argv true`、`artifact-registry health --provider-id <providerId>`、`microservice health k3sctl-adapter`、`microservice health code-queue` 和 `codex tasks --view supervisor --limit 20`。
|
||||
- `ssh <route> [operation args...]` / `tran <route> [operation args...]` 通过 backend-core 内网 WebSocket broker 和 provider-gateway 的 Host SSH / WSL SSH 维护桥连接目标节点;`route` 基础形态是 provider id,例如 `D601` 或 `G14`,也可以扩展为纯定位路径 `provider:plane[:namespace:resource[:container]]`,例如 `D601:win`、`D601:win/c/test`、`G14:k3s`、`D601:k3s` 或 `G14:k3s:<namespace>:<workload>`。WSL provider 的 Windows cmd 入口固定写 `tran D601:win cmd <command-line>`,需要 Windows cwd 时用 `tran D601:win/c/test cmd cd`,由 CLI 自动设置 `chcp 65001`、`PYTHONUTF8=1` 和 `PYTHONIOENCODING=utf-8`;命名只允许 `win`,不得使用 `win32`。非交互远端命令优先使用 `ssh <providerId> argv ...`;需要 shell 脚本、管道、变量或循环时优先使用 quoted heredoc 单步传输,例如 `tran G14 script <<'SCRIPT'`、`tran G14:k3s script <<'SCRIPT'` 或 `tran G14:k3s:<namespace>:<workload> script <<'SCRIPT'`,把脚本走 stdin。`script -- '<单个字符串>'` 是无需 stdin 的远端 shell one-liner,例如 `tran G14:/root/hwlab script -- 'cd /root/hwlab && git status --short --branch'`;`script -- <多个 argv>` 才是 direct argv,适合 `tran D601:/path script -- sed -n '1,20p' file` 这类带短横线的单进程命令。顶层 remote option parser 必须保留命令已经开始后的 `--`,不得把它吞成全局选项结束符。需要远端改文本文件时默认优先使用 `<route> apply-patch < patch.diff`;需要可靠传输非文本或整文件时使用 `<route> upload <local-file> <remote-file>` 和 `<route> download <remote-file> <local-file>`,CLI 会按字节数与 SHA-256 自动校验并在 provider-gateway stdin/argv 限制下切换客户端分块策略;需要旧 helper 时显式使用 `<provider>:k3s:<namespace>:<workload> apply-patch-v1` 或 `<providerId> apply-patch-v1`。ssh-like 命令遇到 timeout/kex/255 类失败时,CLI 会在 stderr 追加一行 `UNIDESK_SSH_HINT` JSON,提示 stdin script/argv 重试和 provider triage 交叉验证。
|
||||
|
||||
@@ -10,7 +10,11 @@ UniDesk 的可观测性优先级高于静默成功。CLI、服务日志、Docker
|
||||
|
||||
服务日志位于 `logs/{YYYYMMDD}/`,每次 `server start` 都生成新的本地时间戳前缀。新写入的 UniDesk JSONL 日志必须按小时切片:`logs/{YYYYMMDD}/{startStamp}_{YYYYMMDD}_{HH}_{service}.jsonl`,一天一个目录,禁止长期追加到单个巨大 JSONL。所有 UniDesk Bun 服务(frontend、provider-gateway、Code Queue、project-manager、baidu-netdisk 以及后续新增 Bun 服务)必须复用 `src/components/shared/src/rotating-jsonl.ts` 中的 `createHourlyJsonlWriter`;Rust backend-core 必须提供等价的 hourly rotation and retention behavior in `src/components/backend-core/src/logger.rs`。`LOG_FILE` 只作为推导 `logs` 根目录、启动前缀和 service 后缀的 base path,不得长期追加到单个文件。database 通过 PostgreSQL logging collector 写入同一日期目录。
|
||||
|
||||
日志保留默认按日志族限制为 `1GiB`:服务写入或 Code Queue 导出日志时必须扫描同一 service 后缀的历史文件,超过上限后自动删除最旧切片;当前活跃切片不能被保留清理删除。全局上限由 `UNIDESK_LOG_RETENTION_BYTES` 控制,服务级上限使用 `UNIDESK_<SERVICE>_LOG_MAX_BYTES`(如 `UNIDESK_FRONTEND_LOG_MAX_BYTES`、`UNIDESK_PROVIDER_GATEWAY_LOG_MAX_BYTES`),历史兼容变量只允许作为过渡入口。Codex app-server 的 `logs_*.sqlite` 仅作为 Codex 上游运行时的短暂缓冲,Code Queue 必须周期性导出为同样按小时切片的 `codex-app-server` JSONL,并删除/压缩已导出的 SQLite 行,避免 `logs_2.sqlite` 成为长期大文件。
|
||||
日志保留默认按日志族限制为 `512MiB`:服务写入或 Code Queue 导出日志时必须扫描同一 service 后缀的历史文件,超过上限后自动删除最旧切片;当前活跃切片不能被保留清理删除。全局上限由 `UNIDESK_LOG_RETENTION_BYTES` 控制,服务级上限使用 `UNIDESK_<SERVICE>_LOG_MAX_BYTES`(如 `UNIDESK_FRONTEND_LOG_MAX_BYTES`、`UNIDESK_PROVIDER_GATEWAY_LOG_MAX_BYTES`),历史兼容变量只允许作为过渡入口。主 server Compose 服务必须启用 Docker `json-file` 轮转,默认 `UNIDESK_DOCKER_LOG_MAX_SIZE=20m`、`UNIDESK_DOCKER_LOG_MAX_FILE=3`;该配置在服务重建或重建容器后生效。Codex app-server 的 `logs_*.sqlite` 仅作为 Codex 上游运行时的短暂缓冲,Code Queue 必须周期性导出为同样按小时切片的 `codex-app-server` JSONL,并删除/压缩已导出的 SQLite 行,避免 `logs_2.sqlite` 成为长期大文件。
|
||||
|
||||
主 server 应安装 `bun scripts/cli.ts gc policy install` 渲染的低风险防膨胀策略:systemd journal 上限 `512MiB`,并启用每日 `unidesk-gc.timer` 执行文件日志与 allowlisted `/tmp` 低风险 GC。该 timer 不主动 vacuum journal,不触碰数据库、Docker image/volume 或 Baidu staging;输出固定写入 `.state/gc/last-run.json` 和 `.state/gc/last-run.stderr`,不得把全量候选 JSON 打进 systemd journal;数据库 trace 留存仍必须由 `gc db-trace` 显式维护,不得加入默认 timer。
|
||||
|
||||
OA Event Flow 的高频 trace 统计不得把每个 `trace-stats-updated` 投影事件长期写入 `oa_events`;持久化真相是 `oa_trace_stats` 与 `oa_trace_steps`,SSE/API 发布时可以返回短暂投影通知用于实时 UI 刷新。需要历史回收时只通过 `gc db-trace plan|run` 做显式维护窗口操作,禁止把数据库 VACUUM FULL 或 trace 表大规模删除接入默认 timer。
|
||||
|
||||
新增或迁移服务的长期规范:Dockerfile 必须把 `src/components/shared` 复制到与仓库相同的相对路径,TypeScript 配置必须能解析 shared 引用,Compose 必须传入 `LOG_FILE` 和 `UNIDESK_LOG_RETENTION_BYTES`;如果服务需要在内存中暴露 `/logs`,可以继续维护有限 `recentLogs`,但落盘只能通过统一 hourly writer。业务归档日志(例如 Code Queue task output archive)可以保留 append-only 文件,但不得复用 UniDesk service JSONL 命名族,也不得替代 `/logs` 的结构化服务日志。
|
||||
|
||||
|
||||
@@ -222,7 +222,9 @@ export function writeComposeEnv(config: UniDeskConfig, freshLogPrefix: boolean):
|
||||
UNIDESK_LOG_DIR: logRoot,
|
||||
UNIDESK_LOG_DAY: logDay,
|
||||
UNIDESK_LOG_PREFIX: logPrefix,
|
||||
UNIDESK_LOG_RETENTION_BYTES: runtimeSecret("UNIDESK_LOG_RETENTION_BYTES") || "1GiB",
|
||||
UNIDESK_LOG_RETENTION_BYTES: runtimeSecretWithDefault("UNIDESK_LOG_RETENTION_BYTES", "512MiB", "1GiB"),
|
||||
UNIDESK_DOCKER_LOG_MAX_SIZE: runtimeSecret("UNIDESK_DOCKER_LOG_MAX_SIZE") || "20m",
|
||||
UNIDESK_DOCKER_LOG_MAX_FILE: runtimeSecret("UNIDESK_DOCKER_LOG_MAX_FILE") || "3",
|
||||
UNIDESK_HOST_ROOT_SSH_DIR: process.env.UNIDESK_HOST_ROOT_SSH_DIR || "/root/.ssh",
|
||||
UNIDESK_HOST_SSH_KEY_DIR: config.sshForwarding.keyDir,
|
||||
UNIDESK_HOST_SSH_HOST: config.sshForwarding.host,
|
||||
|
||||
+190
-3
@@ -1,5 +1,5 @@
|
||||
import { spawnSync } from "node:child_process";
|
||||
import { closeSync, existsSync, ftruncateSync, lstatSync, openSync, readdirSync, readSync, rmSync, statSync, unlinkSync, writeSync } from "node:fs";
|
||||
import { closeSync, existsSync, ftruncateSync, lstatSync, mkdirSync, openSync, readdirSync, readSync, rmSync, statSync, unlinkSync, writeFileSync, writeSync } from "node:fs";
|
||||
import { basename, join, resolve } from "node:path";
|
||||
|
||||
import { type UniDeskConfig, repoRoot, rootPath } from "./config";
|
||||
@@ -31,6 +31,7 @@ interface GcOptions {
|
||||
browserCache: boolean;
|
||||
dbSummary: boolean;
|
||||
limit: number;
|
||||
resultLimit: number;
|
||||
full: boolean;
|
||||
}
|
||||
|
||||
@@ -41,6 +42,11 @@ interface DbTraceGcOptions {
|
||||
confirm: boolean;
|
||||
}
|
||||
|
||||
interface GcPolicyOptions {
|
||||
dryRun: boolean;
|
||||
enableNow: boolean;
|
||||
}
|
||||
|
||||
interface DiskSnapshot {
|
||||
filesystem: string;
|
||||
sizeBytes: number;
|
||||
@@ -114,6 +120,9 @@ interface GcRunResult {
|
||||
failedCount: number;
|
||||
estimatedReclaimBytes: number;
|
||||
actualDiskReclaimBytes: number | null;
|
||||
resultCount: number;
|
||||
returnedResultCount: number;
|
||||
omittedResultCount: number;
|
||||
};
|
||||
results: Array<GcCandidate & { status: "succeeded" | "failed"; reclaimedBytes: number | null; error?: string; commandOutput?: unknown }>;
|
||||
protected: ProtectedGcItem[];
|
||||
@@ -136,6 +145,7 @@ const DEFAULT_OPTIONS: GcOptions = {
|
||||
browserCache: false,
|
||||
dbSummary: true,
|
||||
limit: 50,
|
||||
resultLimit: 50,
|
||||
full: false,
|
||||
};
|
||||
|
||||
@@ -173,6 +183,18 @@ const TMP_EXACT_PROTECT = new Set([
|
||||
|
||||
export async function runGcCommand(config: UniDeskConfig, args: string[]): Promise<unknown> {
|
||||
const [action = "plan", ...rest] = args;
|
||||
if (action === "policy") {
|
||||
const [subaction = "plan", ...policyArgs] = rest;
|
||||
const options = parseGcPolicyOptions(policyArgs);
|
||||
if (subaction === "plan" || subaction === "render" || subaction === "dry-run") return gcPolicyPlan(options);
|
||||
if (subaction === "install") return gcPolicyInstall(options);
|
||||
return {
|
||||
ok: false,
|
||||
error: "unsupported-gc-policy-action",
|
||||
action: subaction,
|
||||
supportedActions: ["plan", "render", "dry-run", "install"],
|
||||
};
|
||||
}
|
||||
if (action === "db-trace" || action === "db-trace-retention") {
|
||||
const [subaction = "plan", ...dbArgs] = rest;
|
||||
const options = parseDbTraceGcOptions(dbArgs);
|
||||
@@ -221,7 +243,7 @@ export async function runGcCommand(config: UniDeskConfig, args: string[]): Promi
|
||||
ok: false,
|
||||
error: "unsupported-gc-action",
|
||||
action,
|
||||
supportedActions: ["plan", "run"],
|
||||
supportedActions: ["plan", "run", "db-trace", "policy"],
|
||||
};
|
||||
}
|
||||
|
||||
@@ -324,6 +346,7 @@ export function gcRun(config: UniDeskConfig, options: GcOptions = DEFAULT_OPTION
|
||||
const failedCount = results.filter((item) => item.status === "failed").length;
|
||||
const estimatedReclaimBytes = plan.summary.returnedEstimatedReclaimBytes;
|
||||
const actualDiskReclaimBytes = diskBefore !== null && diskAfter !== null ? diskAfter.availableBytes - diskBefore.availableBytes : null;
|
||||
const returnedResults = returnedRunResults(results, options);
|
||||
|
||||
return {
|
||||
ok: failedCount === 0,
|
||||
@@ -341,8 +364,11 @@ export function gcRun(config: UniDeskConfig, options: GcOptions = DEFAULT_OPTION
|
||||
failedCount,
|
||||
estimatedReclaimBytes,
|
||||
actualDiskReclaimBytes,
|
||||
resultCount: results.length,
|
||||
returnedResultCount: returnedResults.length,
|
||||
omittedResultCount: Math.max(0, results.length - returnedResults.length),
|
||||
},
|
||||
results,
|
||||
results: returnedResults,
|
||||
protected: plan.protected,
|
||||
};
|
||||
}
|
||||
@@ -389,6 +415,10 @@ function parseGcOptions(args: string[]): GcOptions {
|
||||
const value = parseNonNegativeNumber(arg, args[++index]);
|
||||
if (!Number.isInteger(value) || value <= 0) throw new Error("--limit must be a positive integer");
|
||||
options.limit = Math.min(value, 5000);
|
||||
} else if (arg === "--result-limit") {
|
||||
const value = parseNonNegativeNumber(arg, args[++index]);
|
||||
if (!Number.isInteger(value) || value <= 0) throw new Error("--result-limit must be a positive integer");
|
||||
options.resultLimit = Math.min(value, 5000);
|
||||
} else if (arg === "--full" || arg === "--raw") {
|
||||
options.full = true;
|
||||
} else if (arg === "--confirm" || arg === "--dry-run") {
|
||||
@@ -437,6 +467,18 @@ function parseDbTraceGcOptions(args: string[]): DbTraceGcOptions {
|
||||
return options;
|
||||
}
|
||||
|
||||
function parseGcPolicyOptions(args: string[]): GcPolicyOptions {
|
||||
const options: GcPolicyOptions = {
|
||||
dryRun: args.includes("--dry-run"),
|
||||
enableNow: !args.includes("--no-enable-now"),
|
||||
};
|
||||
for (const arg of args) {
|
||||
if (arg === "--dry-run" || arg === "--no-enable-now") continue;
|
||||
throw new Error(`unknown gc policy option: ${arg}`);
|
||||
}
|
||||
return options;
|
||||
}
|
||||
|
||||
function parseNonNegativeNumber(name: string, raw: string | undefined): number {
|
||||
const value = Number(raw);
|
||||
if (!Number.isFinite(value) || value < 0) throw new Error(`${name} must be a non-negative number`);
|
||||
@@ -481,6 +523,7 @@ function publicOptions(options: GcOptions): Record<string, unknown> {
|
||||
browserCache: options.browserCache,
|
||||
dbSummary: options.dbSummary,
|
||||
limit: options.limit,
|
||||
resultLimit: options.resultLimit,
|
||||
full: options.full,
|
||||
};
|
||||
}
|
||||
@@ -799,6 +842,143 @@ function gcDbTraceRun(options: DbTraceGcOptions): unknown {
|
||||
};
|
||||
}
|
||||
|
||||
function gcPolicyPlan(options: GcPolicyOptions): unknown {
|
||||
const files = gcPolicyFiles();
|
||||
return {
|
||||
ok: true,
|
||||
action: "gc policy plan",
|
||||
dryRun: true,
|
||||
mutation: false,
|
||||
observedAt: new Date().toISOString(),
|
||||
options,
|
||||
files,
|
||||
commands: {
|
||||
install: [
|
||||
["mkdir", "-p", "/etc/systemd/journald.conf.d", "/etc/systemd/system"],
|
||||
["systemctl", "daemon-reload"],
|
||||
["systemctl", "enable", "--now", "unidesk-gc.timer"],
|
||||
],
|
||||
journalRestart: ["systemctl", "restart", "systemd-journald"],
|
||||
},
|
||||
policy: {
|
||||
safeScope: [
|
||||
"systemd journal is capped at 512MiB",
|
||||
"daily timer runs file-log and allowlisted /tmp low-risk gc only",
|
||||
"timer does not touch PostgreSQL PGDATA, Docker images, Docker volumes or Baidu Netdisk staging",
|
||||
"timer output is redirected under .state/gc and capped by gc --result-limit",
|
||||
],
|
||||
manualDbRetention: "gc db-trace remains explicit maintenance and is not scheduled automatically.",
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function gcPolicyInstall(options: GcPolicyOptions): unknown {
|
||||
const plan = gcPolicyPlan(options);
|
||||
if (options.dryRun) return plan;
|
||||
const files = gcPolicyFiles();
|
||||
mkdirSync("/etc/systemd/journald.conf.d", { recursive: true });
|
||||
mkdirSync("/etc/systemd/system", { recursive: true });
|
||||
mkdirSync(rootPath(".state", "gc"), { recursive: true });
|
||||
writeFileSync(files.journald.path, files.journald.content, "utf8");
|
||||
writeFileSync(files.service.path, files.service.content, "utf8");
|
||||
writeFileSync(files.timer.path, files.timer.content, "utf8");
|
||||
const daemonReload = command(["systemctl", "daemon-reload"], 15000);
|
||||
const enableTimer = options.enableNow ? command(["systemctl", "enable", "--now", "unidesk-gc.timer"], 15000) : null;
|
||||
const restartJournald = command(["systemctl", "restart", "systemd-journald"], 15000);
|
||||
const timerStatus = command(["systemctl", "is-enabled", "unidesk-gc.timer"], 5000);
|
||||
const activeStatus = command(["systemctl", "is-active", "unidesk-gc.timer"], 5000);
|
||||
const results = {
|
||||
daemonReload: boundedCommandOutput(daemonReload),
|
||||
enableTimer: enableTimer === null ? { skipped: true } : boundedCommandOutput(enableTimer),
|
||||
restartJournald: boundedCommandOutput(restartJournald),
|
||||
timerStatus: {
|
||||
enabled: timerStatus.stdout.trim(),
|
||||
active: activeStatus.stdout.trim(),
|
||||
},
|
||||
};
|
||||
const failed = daemonReload.exitCode !== 0
|
||||
|| restartJournald.exitCode !== 0
|
||||
|| (enableTimer !== null && enableTimer.exitCode !== 0);
|
||||
return {
|
||||
ok: !failed,
|
||||
action: "gc policy install",
|
||||
dryRun: false,
|
||||
mutation: true,
|
||||
observedAt: new Date().toISOString(),
|
||||
options,
|
||||
files,
|
||||
results,
|
||||
plan,
|
||||
};
|
||||
}
|
||||
|
||||
function gcPolicyFiles(): Record<string, { path: string; content: string }> {
|
||||
const gcStateDir = rootPath(".state", "gc");
|
||||
const bunPath = bunExecutablePath();
|
||||
const gcScript = `cd ${shellQuote(repoRoot)} && mkdir -p ${shellQuote(gcStateDir)} && ${shellQuote(bunPath)} scripts/cli.ts gc run --confirm --no-db-summary --no-build-cache --no-docker-logs --no-journal --limit 5000 --result-limit 25 > ${shellQuote(join(gcStateDir, "last-run.json"))} 2> ${shellQuote(join(gcStateDir, "last-run.stderr"))}`;
|
||||
return {
|
||||
journald: {
|
||||
path: "/etc/systemd/journald.conf.d/unidesk-gc.conf",
|
||||
content: [
|
||||
"[Journal]",
|
||||
"SystemMaxUse=512M",
|
||||
"RuntimeMaxUse=128M",
|
||||
"MaxRetentionSec=7day",
|
||||
"",
|
||||
].join("\n"),
|
||||
},
|
||||
service: {
|
||||
path: "/etc/systemd/system/unidesk-gc.service",
|
||||
content: [
|
||||
"[Unit]",
|
||||
"Description=UniDesk low-risk disk garbage collection",
|
||||
"Documentation=file:///root/unidesk/docs/reference/cli.md",
|
||||
"",
|
||||
"[Service]",
|
||||
"Type=oneshot",
|
||||
`WorkingDirectory=${repoRoot}`,
|
||||
`ExecStart=/bin/bash -lc "${systemdDoubleQuoted(gcScript)}"`,
|
||||
"",
|
||||
].join("\n"),
|
||||
},
|
||||
timer: {
|
||||
path: "/etc/systemd/system/unidesk-gc.timer",
|
||||
content: [
|
||||
"[Unit]",
|
||||
"Description=Daily UniDesk low-risk disk garbage collection",
|
||||
"",
|
||||
"[Timer]",
|
||||
"OnCalendar=*-*-* 03:20:00",
|
||||
"RandomizedDelaySec=20m",
|
||||
"Persistent=true",
|
||||
"",
|
||||
"[Install]",
|
||||
"WantedBy=timers.target",
|
||||
"",
|
||||
].join("\n"),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function bunExecutablePath(): string {
|
||||
const candidates = ["/usr/bin/bun", "/root/.bun/bin/bun", process.argv[0] ?? ""];
|
||||
for (const candidate of candidates) {
|
||||
if (candidate.length > 0 && existsSync(candidate)) return resolve(candidate);
|
||||
}
|
||||
return "bun";
|
||||
}
|
||||
|
||||
function systemdDoubleQuoted(value: string): string {
|
||||
return value
|
||||
.replace(/\\/gu, "\\\\")
|
||||
.replace(/"/gu, "\\\"")
|
||||
.replace(/%/gu, "%%");
|
||||
}
|
||||
|
||||
function shellQuote(value: string): string {
|
||||
return `'${value.replace(/'/gu, "'\\''")}'`;
|
||||
}
|
||||
|
||||
function sqlLiteral(value: string): string {
|
||||
return `'${value.replace(/'/gu, "''")}'`;
|
||||
}
|
||||
@@ -929,6 +1109,13 @@ function summarizeCandidates(candidates: GcCandidate[], returnedCandidates: GcCa
|
||||
};
|
||||
}
|
||||
|
||||
function returnedRunResults(results: GcRunResult["results"], options: GcOptions): GcRunResult["results"] {
|
||||
if (options.full) return results;
|
||||
const failed = results.filter((item) => item.status === "failed");
|
||||
const succeeded = results.filter((item) => item.status === "succeeded");
|
||||
return [...failed, ...succeeded].slice(0, options.resultLimit);
|
||||
}
|
||||
|
||||
function collectFiles(root: string): Array<{ path: string; sizeBytes: number; mtimeMs: number }> {
|
||||
const result: Array<{ path: string; sizeBytes: number; mtimeMs: number }> = [];
|
||||
const visit = (dir: string): void => {
|
||||
|
||||
+6
-2
@@ -18,7 +18,7 @@ export function rootHelp(): unknown {
|
||||
{ command: "server swap status|ensure [--path /swapfile] [--size 2GiB] [--dry-run]", description: "Inspect or idempotently create host swap for low-memory main-server operation." },
|
||||
{ command: "server logs [--tail-bytes N]", description: "Return bounded tails from file logs and docker logs." },
|
||||
{ command: "server cleanup plan [--min-age-hours N] [--limit N]", description: "Dry-run Docker image cleanup plan only: list active/protected images, stale candidates older than the default 24h threshold, risk, estimated reclaim, and manual review commands without deleting anything." },
|
||||
{ command: "gc plan|run|db-trace --confirm [--logs-keep-days N] [--include-browser-cache]", description: "One-time main-server disk relief for logs, journald, Docker build cache, allowlisted /tmp artifacts and explicit trace telemetry retention; plan is read-only and run requires --confirm." },
|
||||
{ command: "gc plan|run|db-trace|policy [--confirm] [--logs-keep-days N] [--include-browser-cache]", description: "One-time main-server disk relief and low-risk anti-bloat policy for logs, journald, allowlisted /tmp artifacts and explicit trace telemetry retention; plan is read-only and run requires --confirm." },
|
||||
{ command: "server rebuild <backend-core|frontend|dev-frontend-proxy|provider-gateway|todo-note|code-queue-mgr|project-manager|baidu-netdisk|oa-event-flow>", description: "Maintenance-only local Compose rebuild for reviewed main-server services; frontend standard release must use CI artifact plus deploy apply dev/prod artifact consumers." },
|
||||
{ command: "provider attach <providerId> [--master-server URL] [--up] [--force] | provider triage <providerId> [--observed-error text] [--observed-scope scope] [--microservice id ...] [--full|--raw]", description: "Generate the minimal external provider-gateway env/compose bundle or run the low-noise read-only provider health triage contract." },
|
||||
{ command: "ssh <route> [operation args...]", description: "Open a Host SSH / WSL SSH maintenance session through the provider-gateway bridge; route syntax such as `G14:k3s` or `D601:win/c/test` only locates distributed targets." },
|
||||
@@ -263,7 +263,7 @@ function providerHelp(): unknown {
|
||||
|
||||
function gcHelp(): unknown {
|
||||
return {
|
||||
command: "gc plan|run|db-trace",
|
||||
command: "gc plan|run|db-trace|policy",
|
||||
output: "json",
|
||||
usage: [
|
||||
"bun scripts/cli.ts gc plan",
|
||||
@@ -273,6 +273,8 @@ function gcHelp(): unknown {
|
||||
"bun scripts/cli.ts gc run --confirm --include-browser-cache",
|
||||
"bun scripts/cli.ts gc db-trace plan --before-date 2026-05-25",
|
||||
"bun scripts/cli.ts gc db-trace run --confirm --before-date 2026-05-25 --vacuum-full",
|
||||
"bun scripts/cli.ts gc policy plan",
|
||||
"bun scripts/cli.ts gc policy install",
|
||||
"bun scripts/cli.ts gc plan --full",
|
||||
],
|
||||
description: "Plan or execute bounded one-time main-server disk relief for file logs, Docker json logs, systemd journal, Docker BuildKit cache, allowlisted /tmp artifacts and explicitly scoped database trace telemetry retention.",
|
||||
@@ -292,10 +294,12 @@ function gcHelp(): unknown {
|
||||
"--build-cache-all": "prune all Docker builder cache without an until filter",
|
||||
"--tmp-min-age-hours N": "delete allowlisted /tmp artifacts older than N hours; default 24",
|
||||
"--limit N": "number of candidates returned and executed by run when --full is not set; default 50",
|
||||
"--result-limit N": "number of per-candidate run results returned when --full is not set; default 50",
|
||||
"--full|--raw": "return and run against all candidates rather than the default bounded page",
|
||||
"--include-browser-cache": "also remove repo-local .state/playwright-browsers cache",
|
||||
"db-trace --before-date YYYY-MM-DD": "plan or delete default trace telemetry event types before the date",
|
||||
"db-trace run --vacuum-full": "rewrite public.oa_events after deletion so df can reclaim disk; requires maintenance window",
|
||||
"policy plan|install": "render or install journald caps and a daily file-log plus allowlisted /tmp low-risk gc systemd timer",
|
||||
"--no-file-logs|--no-docker-logs|--no-journal|--no-build-cache|--no-tmp|--no-db-summary": "disable one collector",
|
||||
},
|
||||
reference: "docs/reference/cli.md",
|
||||
|
||||
@@ -598,7 +598,7 @@ async function fetchTraceStats(executor: SqlExecutor, scopeId: string): Promise<
|
||||
return rows[0] === undefined ? null : statsRowToRecord(rows[0]);
|
||||
}
|
||||
|
||||
async function insertDerivedStatsEvent(executor: SqlExecutor, stats: TraceStatsRecord, sourceEvent: OaEventRecord): Promise<OaEventRecord | null> {
|
||||
function traceStatsEvent(stats: TraceStatsRecord, sourceEvent: OaEventRecord): OaEventRecord {
|
||||
const tags = ["stats", "trace", `service:${stats.serviceId}`, stats.scopeId];
|
||||
if (typeof stats.taskId === "string" && stats.taskId.length > 0) addTag(tags, `task:${stats.taskId}`);
|
||||
if (Number(stats.attemptIndex) > 0) addTag(tags, `attempt:${Number(stats.attemptIndex)}`);
|
||||
@@ -611,7 +611,7 @@ async function insertDerivedStatsEvent(executor: SqlExecutor, stats: TraceStatsR
|
||||
attemptIndex: Number(stats.attemptIndex) > 0 ? Number(stats.attemptIndex) : null,
|
||||
stats,
|
||||
};
|
||||
const derived = normalizeEvent({
|
||||
const event = normalizeEvent({
|
||||
eventId: `oa-event-flow:trace-stats-updated:${stats.scopeId}:${stats.statsRevision}`,
|
||||
type: "trace-stats-updated",
|
||||
sourceKind: "projection",
|
||||
@@ -623,13 +623,7 @@ async function insertDerivedStatsEvent(executor: SqlExecutor, stats: TraceStatsR
|
||||
tags,
|
||||
payload,
|
||||
});
|
||||
const rows = await executor<OaEventRow[]>`
|
||||
INSERT INTO oa_events (event_id, type, source_kind, source_id, aggregate_type, aggregate_id, correlation_id, causation_id, tags, payload, created_at)
|
||||
VALUES (${derived.eventId}, ${derived.type}, ${derived.sourceKind}, ${derived.sourceId}, ${derived.aggregateType}, ${derived.aggregateId}, ${derived.correlationId}, ${derived.causationId}, ${executor.json(derived.tags)}, ${executor.json(derived.payload)}, ${derived.createdAt})
|
||||
ON CONFLICT (event_id) DO NOTHING
|
||||
RETURNING *
|
||||
`;
|
||||
return rows[0] === undefined ? null : rowToEvent(rows[0]);
|
||||
return { ...event, sequence: sourceEvent.sequence, createdAt: sourceEvent.createdAt };
|
||||
}
|
||||
|
||||
async function applyTraceSnapshot(executor: SqlExecutor, event: OaEventRecord, scope: TraceScope): Promise<TraceStatsRecord | null> {
|
||||
@@ -804,8 +798,7 @@ async function publishEvents(inputs: OaEventInput[]): Promise<{ inserted: OaEven
|
||||
}
|
||||
inserted.push(stored);
|
||||
for (const stats of await applyTraceProjection(tx, stored)) {
|
||||
const statsEvent = await insertDerivedStatsEvent(tx, stats, stored);
|
||||
if (statsEvent !== null) derived.push(statsEvent);
|
||||
derived.push(traceStatsEvent(stats, stored));
|
||||
}
|
||||
}
|
||||
const lastSequence = Math.max(0, ...inserted.map((event) => event.sequence), ...derived.map((event) => event.sequence));
|
||||
|
||||
Reference in New Issue
Block a user