From 57402f28c07d54c5e2a649305d5b8d4b08c3e844 Mon Sep 17 00:00:00 2001 From: Codex Date: Sun, 17 May 2026 08:07:32 +0000 Subject: [PATCH] fix(cli): bound diagnostics and add swap management --- AGENTS.md | 7 +- TEST.md | 6 +- docs/reference/cli.md | 11 +- docs/reference/deployment.md | 8 + docs/reference/observability.md | 10 +- scripts/cli.ts | 25 ++- scripts/src/command.ts | 26 ++- scripts/src/docker.ts | 36 +++- scripts/src/jobs.ts | 70 +++++++- scripts/src/microservices.ts | 87 ++++++++- scripts/src/output.ts | 19 +- scripts/src/remote.ts | 60 ++++++- scripts/src/swap.ts | 303 ++++++++++++++++++++++++++++++++ 13 files changed, 618 insertions(+), 50 deletions(-) create mode 100644 scripts/src/swap.ts diff --git a/AGENTS.md b/AGENTS.md index 17c64a48..2f1c856a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -27,8 +27,9 @@ UniDesk 是一个以主 server 为统一入口的分布式工作平台;本文 - `bun scripts/cli.ts config show`:校验并展示根目录 `config.json`,配置来源规则见 `docs/reference/config.md`。 - `bun scripts/cli.ts check`:运行配置、TypeScript、文件存在性和 Docker Compose 配置检查,测试入口见 `TEST.md`。 - `bun scripts/cli.ts server start`:以异步 job 启动 database、backend-core、frontend、provider-gateway 和主 server 用户服务,部署规则见 `docs/reference/deployment.md`。 -- `bun scripts/cli.ts server status`:查询固定端口、容器状态、健康检查和访问 URL,判定标准见 `docs/reference/deployment.md`。 -- `bun scripts/cli.ts server logs`:分页返回文件日志与 Docker 日志尾部,日志规则见 `docs/reference/observability.md`。 +- `bun scripts/cli.ts server status`:查询固定端口、swap 摘要、容器状态、健康检查和访问 URL,判定标准见 `docs/reference/deployment.md`。 +- `bun scripts/cli.ts server swap status|ensure [--path /swapfile] [--size 2GiB] [--dry-run]`:以 JSON 查看或幂等创建主 server swapfile,`ensure` 输出 before/after、动作、持久化状态和 degraded/failed 详情,规则见 `docs/reference/deployment.md`。 +- `bun scripts/cli.ts server logs [--tail-bytes N]`:只返回文件日志与 Docker 日志尾部并带截断元数据,日志规则见 `docs/reference/observability.md`。 - `bun scripts/cli.ts server rebuild `:以 build-first、Compose lock、no-deps force-recreate 和 post-up validation 的异步 job 重建主 server Compose 内单个服务;Code Queue 部署在 D601,规则见 `docs/reference/deployment.md`。 - `bun scripts/cli.ts provider attach [--master-server URL] [--up] [--force]`:在新增计算节点上生成两项配置的 provider-gateway 挂载包;默认只需要主 server URL(默认 `http://74.48.78.17/`)和唯一 Provider ID,生成的 Compose 固定 Docker socket、`pid: "host"`、`restart: always`、只读 `/workspace`、SSH 维护私钥挂载和 loopback egress proxy 端口,规则见 `docs/reference/provider-gateway.md`。 - `bun scripts/cli.ts ssh [ssh-like args...]`:通过 provider-gateway 的 Host SSH / WSL SSH 维护桥打开近似原生 ssh 的交互会话或远端命令,并在远端 PATH 注入 `apply_patch`、`glob` 与 `skill-discover`;`apply-patch`、`py`、`skills`、结构化 `find`、`glob` 和 `argv` 子命令用于避免远端补丁、Python stdin、skill 发现与常用只读命令的嵌套转义问题,使用规则见 `docs/reference/cli.md` 和 `docs/reference/provider-gateway.md`。 @@ -40,7 +41,7 @@ UniDesk 是一个以主 server 为统一入口的分布式工作平台;本文 - `bun scripts/cli.ts codex task `:按 Code Queue 任务 ID 查询初始 prompt、最后 assistant message、工具调用摘要、attempt/judge/error 和耗时,便于新任务引用历史 session。 - `bun scripts/cli.ts codex judge --attempt [--dry-run]`:按指定 task/attempt 用与队列 worker 相同的上下文构建和 MiniMax judge 调用路径单步复现完成判定;`--dry-run` 只输出 prompt/payload 诊断。 - `bun scripts/cli.ts server stop`:以异步 job 停止固定 Compose 项目中的全部 UniDesk 服务,停止后用 `server status` 复核。 -- `bun scripts/cli.ts job list` / `bun scripts/cli.ts job status latest`:查询 `.state/jobs/` 中的异步任务状态,job 机制见 `docs/reference/cli.md`。 +- `bun scripts/cli.ts job list [--limit N]` / `bun scripts/cli.ts job status latest [--tail-bytes N]`:分页查询 `.state/jobs/` 中的异步任务状态,状态输出只读日志尾部并保留完整日志路径,job 机制见 `docs/reference/cli.md`。 - `bun scripts/cli.ts debug health` / `bun scripts/cli.ts debug dispatch` / `bun scripts/cli.ts debug task`:通过 Docker 内网 core、真实 HTTP、WebSocket、系统指标、Docker 状态和 Host SSH 维护桥流程调试健康检查、任务下发与任务结果,调试规则见 `docs/reference/cli.md`。 - `bun scripts/cli.ts e2e run [--only pattern[,pattern...]] [--skip pattern[,pattern...]]`:支持按 check/prefix/wildcard 选择性执行公网 frontend/provider ingress、内网 core/database、provider-gateway 自接入与 Playwright 验证;日常迭代先跑当前问题对应的最小检查集,最终交付再跑全量回归,验收规则见 `docs/reference/e2e.md`。 diff --git a/TEST.md b/TEST.md index 3e0d091d..cb4ef064 100644 --- a/TEST.md +++ b/TEST.md @@ -2,7 +2,7 @@ ## T1 CLI 可观测性与配置校验 -阅读 `AGENTS.md`(本项目 `AGENTS.md` 同时承担 `SKILL.md` 对 `scripts/cli.ts` 的解释职责),然后用 cli 手动测试以下内容:运行 `bun scripts/cli.ts help`、`bun scripts/cli.ts config show`、`bun scripts/cli.ts check`,确认每条命令都有 JSON 输出、失败时包含错误对象、`config.json` 是唯一配置来源,且 TypeScript 检查覆盖 `scripts/` 与 `src/components/`。 +阅读 `AGENTS.md`(本项目 `AGENTS.md` 同时承担 `SKILL.md` 对 `scripts/cli.ts` 的解释职责),然后用 cli 手动测试以下内容:运行 `bun scripts/cli.ts help`、`bun scripts/cli.ts config show`、`bun scripts/cli.ts check`,确认每条命令都有 JSON 输出、失败时包含错误对象、`config.json` 是唯一配置来源,且 TypeScript 检查覆盖 `scripts/` 与 `src/components/`;运行 `set -o pipefail; bun scripts/cli.ts server status | head -1`,确认下游 pipe 关闭时不会打印 Bun EPIPE stack trace。 ## T2 Docker 栈异步启动 @@ -10,7 +10,7 @@ ## T3 主 server 自接入 Provider Gateway -阅读 `AGENTS.md`(本项目 `AGENTS.md` 同时承担 `SKILL.md` 对 `scripts/cli.ts` 的解释职责),然后用 cli 手动测试以下内容:运行 `bun scripts/cli.ts server status` 和 `bun scripts/cli.ts debug health`,确认面向浏览器的公网入口只有 frontend 与 provider ingress,backend-core 显示为 Docker 内部端口,database/OA Event Flow 若因 D601 Code Queue 映射宿主端口也必须显示为受限宿主端口,且 `network.restrictedHostAccess.allowedSourceCidrs` 已生成来源限制,`/api/nodes` 中存在 `main-server` provider,状态为 `online`,`/api/nodes/system-status` 中存在 CPU/内存/硬盘采样,`/api/nodes/docker-status` 中存在 `main-server` 的 Docker 快照,且 provider 标签中能看到 Docker socket 可用性。 +阅读 `AGENTS.md`(本项目 `AGENTS.md` 同时承担 `SKILL.md` 对 `scripts/cli.ts` 的解释职责),然后用 cli 手动测试以下内容:运行 `bun scripts/cli.ts server status`、`bun scripts/cli.ts server swap status` 和 `bun scripts/cli.ts debug health`,确认 `server status` 包含 `swap` 摘要,`server swap status` 快速返回 total memory、active swaps、`/etc/fstab` 持久化状态和 warning;面向浏览器的公网入口只有 frontend 与 provider ingress,backend-core 显示为 Docker 内部端口,database/OA Event Flow 若因 D601 Code Queue 映射宿主端口也必须显示为受限宿主端口,且 `network.restrictedHostAccess.allowedSourceCidrs` 已生成来源限制,`/api/nodes` 中存在 `main-server` provider,状态为 `online`,`/api/nodes/system-status` 中存在 CPU/内存/硬盘采样,`/api/nodes/docker-status` 中存在 `main-server` 的 Docker 快照,且 provider 标签中能看到 Docker socket 可用性。若 `swap.warning` 非空,先运行 `bun scripts/cli.ts server swap ensure --dry-run` 审查动作,再谨慎执行 `bun scripts/cli.ts server swap ensure --size 2GiB`,确认输出包含 `before`/`after`、`actions`、`errors` 和 `status=ok|degraded`;已有 swap 时 ensure 必须 no-op。 ## T4 前端控制台连通 @@ -22,7 +22,7 @@ ## T6 日志第一现场验证 -阅读 `AGENTS.md`(本项目 `AGENTS.md` 同时承担 `SKILL.md` 对 `scripts/cli.ts` 的解释职责),然后用 cli 手动测试以下内容:运行 `bun scripts/cli.ts server logs --tail-bytes 20000`,实际读取输出中列出的 `logs/{YYYYMMDD}/` 文件,确认 backend-core、frontend、provider-gateway、database 都有实时日志;backend-core 与 Code Queue/Codex app-server 日志必须按 `logs/{YYYYMMDD}/{startStamp}_{YYYYMMDD}_{HH}_{service}.jsonl` 小时切片,默认日志族总量不得超过 `1GiB`,超过后会删除最旧切片;日志不得只有启动行,错误日志必须包含可定位的错误消息或 stack。 +阅读 `AGENTS.md`(本项目 `AGENTS.md` 同时承担 `SKILL.md` 对 `scripts/cli.ts` 的解释职责),然后用 cli 手动测试以下内容:运行 `bun scripts/cli.ts server logs --tail-bytes 20000`,确认输出包含 `policy`、每个日志文件的 `sizeBytes/tailBytes/truncated` 和 Docker logs 的 tail 元数据,实际读取输出中列出的 `logs/{YYYYMMDD}/` 文件,确认 backend-core、frontend、provider-gateway、database 都有实时日志;运行 `bun scripts/cli.ts job list --limit 5` 和 `bun scripts/cli.ts job status latest --tail-bytes 20000`,确认 job 列表分页、状态输出只含 stdout/stderr 尾部且保留完整日志路径;backend-core 与 Code Queue/Codex app-server 日志必须按 `logs/{YYYYMMDD}/{startStamp}_{YYYYMMDD}_{HH}_{service}.jsonl` 小时切片,默认日志族总量不得超过 `1GiB`,超过后会删除最旧切片;日志不得只有启动行,错误日志必须包含可定位的错误消息或 stack。 ## T7 停止与端口释放 diff --git a/docs/reference/cli.md b/docs/reference/cli.md index 0165f35c..f2d3364c 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -10,8 +10,9 @@ UniDesk 的统一 CLI 入口是根目录 `scripts/cli.ts`,运行方式固定 - `check` 执行配置校验、文件存在性检查、`scripts/` TypeScript 检查、`src/components/` TypeScript 检查和 Docker Compose 配置检查。 - `server start` 创建异步 job,在后台执行 Docker 构建和启动;命令本身只负责返回 job id、日志路径和启动命令。 - `server stop` 创建异步 job,在后台停止固定 Compose project 中的全部 UniDesk 服务。 -- `server status` 查询公开端口、受限宿主端口、内部端口、Compose 容器、core/frontend/provider/database 健康检查和访问 URL;D601 Code Queue 使用的 PostgreSQL/OA Event Flow host mapping 必须出现在受限宿主端口而不是无条件公开入口中。 -- `server logs` 返回 `logs/` 文件日志和 Docker 容器日志的尾部,默认限制输出大小,避免日志爆炸。 +- `server status` 查询公开端口、受限宿主端口、内部端口、主机 swap 摘要、Compose 容器、core/frontend/provider/database 健康检查和访问 URL;D601 Code Queue 使用的 PostgreSQL/OA Event Flow host mapping 必须出现在受限宿主端口而不是无条件公开入口中。低内存主 server 上 `swap.warning` 非空时,先执行 `server swap status` 或 `server swap ensure`。 +- `server swap status|ensure [--path /swapfile] [--size 2GiB] [--dry-run]` 是主 server swap 管理入口。`status` 仅读 `/proc/meminfo`、`/proc/swaps` 和 `/etc/fstab` 并返回 JSON;`ensure` 在已有任何 active swap 时只报告 no-op,在无 active swap 时创建固定 swapfile、`chmod 600`、`mkswap`、`swapon` 并尽量写入 `/etc/fstab`。输出必须包含 `before`、`after`、total memory、active swap、持久化状态、关键动作和错误详情;若 swap 已启用但 fstab 写入失败,状态为 `degraded`,调用者需按返回的 detail 修复持久化。 +- `server logs` 返回 `logs/` 文件日志和 Docker 容器日志的尾部,默认限制输出大小,避免日志爆炸。实现必须只读取文件末尾字节,不得为了 tail 先把巨大日志完整读入 CLI 内存。 - `server rebuild ` 创建异步 job,先构建目标服务镜像,随后在 `.state/locks/server-compose.lock` 串行保护下用 `--no-deps --force-recreate` 替换目标 service 并等待容器 `healthy/running`;该命令用于替代手工删除容器的兜底流程,其中 `todo-note`、`project-manager`、`baidu-netdisk` 和 `oa-event-flow` 只重建主 server 承载的对应后端,不会重建或删除 database 命名卷。Code Queue 部署在 D601,不再由 `server rebuild` 管理。 - `provider attach [--master-server URL] [--up] [--force]` 在新计算节点生成两项配置的 provider-gateway 挂载包:`.state/provider-.env` 默认只包含 `UNIDESK_MASTER_SERVER` 与 `PROVIDER_ID`,`provider-.yml` 固定 Docker socket、`pid: "host"`、`restart: always`、只读 `/workspace` 和 SSH 维护私钥挂载;`--up` 会立即执行生成的 `docker compose up -d --build`。 - `ssh [ssh-like args...]` 通过 backend-core 内网 WebSocket broker 和 provider-gateway 的 Host SSH / WSL SSH 维护桥连接目标节点;无后续参数时进入远端登录 shell,有后续参数时按 ssh 远端命令体验执行并返回远端 exit code。 @@ -27,7 +28,7 @@ UniDesk 的统一 CLI 入口是根目录 `scripts/cli.ts`,运行方式固定 - `codex output --tail|--from-start|--after-seq N|--before-seq N --limit N [--full-text]` 按原始 output seq 分页读取底层记录;当 trace 行提示 `commandOmittedLines`、`bodyOmittedLines` 或 `rawSeqs` 时,用该命令按 seq 补取完整信息,默认仍有单条文本预览上限,显式 `--full-text` 才返回该页全文。 - `codex judge --attempt N [--dry-run] [--include-prompt]` 通过 Code Queue 私有代理按指定 attempt 单步复现 judge;后端会从 PostgreSQL task JSON 与 output 归档重建该 attempt 在真实队列 worker 中的 `QueueTask`/`CodexRunResult`,再调用同一套 judge prompt builder 和 MiniMax 请求路径。默认会真实调用 MiniMax,`--dry-run` 只返回 prompt/payload 大小、attempt 窗口和重建来源诊断,`--include-prompt` 仅用于本地深度排查。 - Code Queue 多队列 lane 由 `codex` 命令命名空间管理:`queues` 列表、`queue create ` 创建、`queue merge --into ` 合并、`move --queue ` 迁移;同一个 queue 内部串行执行,不同 queue 之间并行执行。迁移只允许尚未被 scheduler claim 的 `queued`/`retry_wait` 任务,必须满足 `startedAt=null`、`currentAttempt=0` 且没有 active thread/turn;已进入 `running`/`judging` 或已有 claim 标记的任务返回 409,不得被 move/merge 回写成 queued。合并会移动可迁移任务归属并自动删除源 queue 记录,只保留合并后的目标 queue;若 source 或 target queue 存在 active/claimed 任务,合并整体返回 409。合并后的目标 queue 按任务原 `queueEnteredAt`/`createdAt` 时间顺序串行,成功迁移 queued/retry_wait 任务后会立即调度目标 queue。 -- `job list` 与 `job status` 查询 `.state/jobs/` 文件系统状态,是异步命令的可观测入口。 +- `job list [--limit N] [--include-command]` 与 `job status [--tail-bytes N]` 查询 `.state/jobs/` 文件系统状态,是异步命令的可观测入口。`job list` 默认只返回最新 50 条摘要;`job status` 默认只返回 stdout/stderr 末尾 12000 字节,并带 `tailPolicy` 与完整日志路径。 - `debug health`、`debug dispatch` 与 `debug task` 走真实内部 core、WebSocket、数据库、provider、系统指标、Docker 状态和 Host SSH 维护桥流程,只用于开发调试,不写入 `TEST.md` 的正式验收步骤。 - `e2e run [--only pattern[,pattern...]] [--skip pattern[,pattern...]]` 使用 publicHost 派生的公开 frontend/provider ingress URL,并通过 Docker 内网验证 core API、PostgreSQL、provider self-connection、系统指标曲线、Docker 状态快照、provider.upgrade 预检和 Playwright 前端页面,是交付前的自动化 E2E 门禁;CLI 默认输出 check 状态摘要,完整诊断写入 `resultPath`,日常迭代应优先用 `--only` / `--skip` 跑最小必要集合。 @@ -43,7 +44,9 @@ UniDesk 的统一 CLI 入口是根目录 `scripts/cli.ts`,运行方式固定 每条命令的最外层 JSON 包含 `ok`、`command` 和 `data` 或 `error`。失败时 CLI 设置非零退出码,但仍然输出 JSON 错误对象;错误对象应包含 `name`、`message` 和可用的 `stack`。 -`microservice proxy` 是面向人工验证的私有后端读取入口。正式写入型用户服务操作由 frontend 同源代理或 E2E 直接调用 backend-core 完成,并由 config 中的 `allowedMethods` 限制;CLI `proxy` 默认仍作为 GET/HEAD 读取验证入口,必要时可显式加 `--method POST|PUT|PATCH|DELETE` 调用无需自定义请求体的受控调试/自测端点,例如 `bun scripts/cli.ts microservice proxy baidu-netdisk /api/self-test --method POST --raw`。为了避免 Pipeline snapshot 这类超大业务 JSON 造成 CLI 输出爆炸,响应 body 超过默认阈值时会返回 `bodyOmitted=true`、`bodyPreview`、`bodyBytes` 和 `rawHint`;需要完整 body 时显式添加 `--raw`,或用 `--max-body-bytes ` 调整预览阈值。正式 frontend 展示仍应优先使用业务控件和 `__unideskArrayLimit` 这类展示级裁剪参数,而不是默认倾倒完整 JSON。 +诊断命令默认采用渐进披露:`server logs`、`job list/status`、`codex task/trace/output` 和 `microservice proxy` 都必须有默认条数、字节数或文本预览上限;用户显式传 `--limit`、`--tail-bytes`、`--full-text` 或 `--full` 才扩大单次输出。CLI stdout 遇到下游 pipe 关闭的 `EPIPE` 必须安静退出,不得打印 Bun stack trace。 + +`microservice proxy` 是面向人工验证的私有后端读取入口。正式写入型用户服务操作由 frontend 同源代理或 E2E 直接调用 backend-core 完成,并由 config 中的 `allowedMethods` 限制;CLI `proxy` 默认仍作为 GET/HEAD 读取验证入口,必要时可显式加 `--method POST|PUT|PATCH|DELETE` 调用无需自定义请求体的受控调试/自测端点,例如 `bun scripts/cli.ts microservice proxy baidu-netdisk /api/self-test --method POST --raw`。为了避免 Pipeline snapshot 这类超大业务 JSON 造成 CLI 输出爆炸,响应 body 超过默认阈值时会返回 `bodyOmitted=true`、`bodyPreview`、`bodyBytes` 和 `rawHint`;`--raw` 仍受默认硬限额保护,需要完整 body 时显式添加 `--raw --full`,或用 `--max-body-bytes ` 调整预览阈值。正式 frontend 展示仍应优先使用业务控件和 `__unideskArrayLimit` 这类展示级裁剪参数,而不是默认倾倒完整 JSON。 `network perf` 用于生成组网性能前后对比数据。标准 Code Queue overview 读路径基准命令是 `bun scripts/cli.ts network perf --service code-queue --path /api/tasks/overview?limit=30 --count 30 --concurrency 1 --label before`,远程主 server 可用 `bun scripts/cli.ts --main-server-ip 74.48.78.17 network perf ...`。输出包含成功/失败数、状态码分布、`x-unidesk-cache`、`x-unidesk-proxy-mode`、`x-unidesk-upstream-proxy-mode` 分布和 min/p50/p90/p95/max;provider-gateway 长连接数据面验收应看到 `proxyModeCounts.provider-ws-http-tunnel`,adapter native Service 数据面验收应看到 upstream proxy mode 为 `kubernetes-native-service`,若出现 `kubernetes-api-service-proxy` 必须结合 `/api/control-plane.nativeServiceProxy.failedServices` 解释 fallback 原因。 diff --git a/docs/reference/deployment.md b/docs/reference/deployment.md index 5f50a2c8..04ad3f0c 100644 --- a/docs/reference/deployment.md +++ b/docs/reference/deployment.md @@ -28,6 +28,14 @@ Compose v2 安装后仍然必须遵守 UniDesk 的服务控制入口:全栈生 版本化用户服务部署优先使用 `bun scripts/cli.ts deploy apply`。`deploy.json` 只声明服务 `id`、`repo` 和 `commitId`;目标节点、Dockerfile、Compose、Kubernetes manifest、健康检查和代理路径继续来自 `config.json` 与现有 manifest。部署必须遵循 target-side build:服务部署到哪台 target,就在哪台 target 从 remote commit 导出源码、一次性代理构建镜像并部署;不得把中心构建镜像作为默认分发路径,也不得用 `docker commit` 或脏 worktree 作为部署输入。完整规则见 `docs/reference/deploy.md`。 +## Main Server Swap + +主 server 可能运行在约 2 GiB 内存的小规格机器上,短时 Docker build、Codex/control-plane 调查和日志读取会触发 global OOM。主 server 必须通过 `bun scripts/cli.ts server swap status` 暴露当前 memory/swap 状态,并在 `server status` 的 `swap` 字段中给出同一摘要。 + +缺少 active swap 时,正式修复入口是 `bun scripts/cli.ts server swap ensure [--path /swapfile] [--size 2GiB]`。该命令必须幂等:已有任何 active swap 时只返回 no-op 状态;无 swap 时创建固定 swapfile、设置 `0600`、执行 `mkswap` 与 `swapon`,并尽量把 ` none swap sw 0 0` 写入 `/etc/fstab`。如果当前环境允许 `swapon` 但不允许写 `/etc/fstab`,命令返回 `status=degraded`,并在 JSON 的 `errors`/`actions` 中说明下一步;不得静默假装持久化完成。 + +swap 管理不能被强塞进所有热路径。`server start/status` 可以暴露 warning 或摘要,但不会自动创建 swap;需要变更主机 swap 时必须显式运行 `server swap ensure`,并用返回的 `before`/`after` 和 `fstab.persisted` 作为验收记录。 + ## Start And Stop `bun scripts/cli.ts server start` 与 `bun scripts/cli.ts server stop` 都是异步 job。启动 job 只执行固定 Compose project 的 `up -d --build --remove-orphans`,不得先 `down`,避免在 provider-gateway 旧容器或网络冲突时把长驻控制面容器先删掉又启动失败;停止 job 才允许执行 `down --remove-orphans`。启动和停止流程都禁止删除 Docker named volume。所有会改变主 server Compose 状态的 job 必须通过 `.state/locks/server-compose.lock` 串行化;连续 `server rebuild` 命令只代表连续创建异步 job,不能代表第一个 job 已结束,实际容器变更仍必须由 Compose lock 串行执行。 diff --git a/docs/reference/observability.md b/docs/reference/observability.md index a108f6a4..e4edd79f 100644 --- a/docs/reference/observability.md +++ b/docs/reference/observability.md @@ -4,7 +4,7 @@ UniDesk 的可观测性优先级高于静默成功。CLI、服务日志、Docker ## CLI Logs -异步 job 的 stdout 和 stderr 位于 `.state/jobs/`。`job status` 会返回有限尾部,避免输出爆炸,同时保留完整日志文件路径便于继续排查。 +异步 job 的 stdout 和 stderr 位于 `.state/jobs/`。`job list` 默认只返回最新 50 条摘要;`job status` 会返回有限尾部,避免输出爆炸,同时保留完整日志文件路径便于继续排查。实现必须只读取日志尾部字节,不得先把完整 job 日志读入 CLI 内存。 ## Service Logs @@ -18,7 +18,13 @@ UniDesk 的可观测性优先级高于静默成功。CLI、服务日志、Docker ## Log Access -`bun scripts/cli.ts server logs` 同时读取文件日志和 Docker logs 尾部。文件日志是服务崩溃时的第一现场,Docker logs 是容器启动失败和 stdout/stderr 的辅助来源。 +`bun scripts/cli.ts server logs` 同时读取文件日志和 Docker logs 尾部。文件日志是服务崩溃时的第一现场,Docker logs 是容器启动失败和 stdout/stderr 的辅助来源。默认输出必须包含 tail 字节数、是否截断和完整文件路径;扩大读取范围只能通过显式 `--tail-bytes N`,且 CLI 会对单次 tail 设置硬上限。 + +## Diagnostic Output Limits + +所有诊断型 CLI 输出必须优先摘要化、尾部化或分页化,禁止默认倾倒大 JSON、全量日志、全量 trace 或 `.state`/`logs` 宽泛搜索结果。当前硬限额入口包括:`server logs` 默认 3000 bytes tail、`job list` 默认 50 条、`job status` 默认 12000 bytes tail、`codex task/trace/output` 默认分页与文本预览、`microservice proxy` 默认 body 预览且 `--raw` 仍受硬限额保护。确实需要完整响应时必须显式使用对应的 `--full`、`--full-text`、`--tail-bytes` 或 `--limit` 参数,并在验收记录中说明为什么需要扩大输出。 + +CLI 写 stdout/stderr 遇到下游 pipe 关闭的 `EPIPE` 必须安静退出,不能打印 Bun stack trace。常见验证命令是 `set -o pipefail; bun scripts/cli.ts server status | head -1`,应只看到第一行 JSON 而无额外错误噪声。 ## Task Liveness diff --git a/scripts/cli.ts b/scripts/cli.ts index 05441f06..3da35bc2 100644 --- a/scripts/cli.ts +++ b/scripts/cli.ts @@ -3,7 +3,7 @@ import { debugDispatch, debugHealth, debugTask, isDebugDispatchCommand, type Deb import { isRebuildableService, rebuildService, stackLogs, stackStatus, startStack, stopStack } from "./src/docker"; import { parseE2ERunOptions, runE2E } from "./src/e2e"; import { emitError, emitJson } from "./src/output"; -import { jobWithTail, listJobs, readJob, runJob } from "./src/jobs"; +import { jobWithTail, listJobs, listJobsSummary, readJob, runJob } from "./src/jobs"; import { runChecks } from "./src/check"; import { runSsh } from "./src/ssh"; import { extractRemoteCliOptions, runRemoteCli } from "./src/remote"; @@ -15,6 +15,7 @@ import { runProviderCommand } from "./src/provider-attach"; import { runScheduleCommand } from "./src/schedules"; import { parseNetworkPerfOptions, runNetworkPerf } from "./src/network-perf"; import { runCiCommand } from "./src/ci"; +import { runSwapCommand } from "./src/swap"; const remoteOptions = extractRemoteCliOptions(process.argv.slice(2)); const args = remoteOptions.args; @@ -32,6 +33,7 @@ function help(): unknown { { command: "server start", description: "Fire-and-forget build/start for database, backend-core, frontend, provider gateway, and managed main-server user services." }, { command: "server stop", description: "Fire-and-forget docker-compose down for the fixed UniDesk stack." }, { command: "server status", description: "Show fixed ports, containers, service health, and public URLs." }, + { command: "server swap status|ensure [--path /swapfile] [--size 2GiB] [--dry-run]", description: "Inspect or idempotently create host swap for low-memory main-server operation." }, { command: "server logs [--tail-bytes N]", description: "Return bounded tails from file logs and docker logs." }, { command: "server rebuild ", description: "Build first, then serialize, force-recreate, and validate one Compose service." }, { command: "provider attach [--master-server URL] [--up] [--force]", description: "Generate the minimal external provider-gateway env/compose bundle; only master server URL and provider id are required." }, @@ -47,7 +49,7 @@ function help(): unknown { { command: "microservice health ", description: "Probe one user service through backend-core -> provider-gateway HTTP proxy." }, { command: "microservice diagnostics ", description: "Split k3sctl-managed proxy health into provider-gateway, HTTP tunnel, adapter, Kubernetes API service proxy, and target Service checks." }, { command: "microservice tunnel-self-test ", description: "Trigger an expected provider HTTP tunnel failure and verify requestId/stage diagnostics are returned." }, - { command: "microservice proxy [--method GET|POST|PUT|PATCH|DELETE] [--raw] [--max-body-bytes N]", description: "Access a private user-service backend path through the same frontend-only proxy used by WebUI; large bodies are summarized unless --raw is set." }, + { command: "microservice proxy [--method GET|POST|PUT|PATCH|DELETE] [--raw] [--full] [--max-body-bytes N]", description: "Access a private user-service backend path through the same frontend-only proxy used by WebUI; large bodies are summarized unless --full is explicit." }, { command: "decision upload [--title text] [--type meeting|decision] [--level G0|G1|G2|G3|P0|P1|P2|P3|none] [--status active|blocked|parked|done] [--linked-goal-id id] [--evidence url]", description: "Upload a meeting note or decision record through backend-core -> decision-center user-service proxy." }, { command: "decision list [--type ...] [--status ...] [--level ...] [--linked-goal-id id] [--limit N]", description: "List Decision Center records through the user-service proxy." }, { command: "decision show ", description: "Show one Decision Center record." }, @@ -59,7 +61,7 @@ function help(): unknown { { command: "codex output [--tail|--from-start|--after-seq N|--before-seq N --limit N] [--full-text]", description: "Fetch paged raw Code Queue output records by seq when a trace row has omitted command/output text." }, { command: "codex judge --attempt N [--dry-run] [--include-prompt]", description: "Replay one stored Code Queue attempt through the same judge context builder and MiniMax judge call path used by the live queue worker." }, { command: "codex (queues | queue create | queue merge --into | move --queue )", description: "List/create/merge Code Queue lanes and move a queued task; merge preserves task queue time order and deletes the source queue record." }, - { command: "job list", description: "List async jobs from .state/jobs." }, + { command: "job list [--limit N] [--include-command]", description: "List async jobs from .state/jobs with a bounded default page." }, { command: "job status [--tail-bytes N]", description: "Show job state with bounded stdout/stderr tails." }, { command: "debug health", description: "Probe internal core, nodes, system/Docker status, frontend, provider ingress, and public boundary." }, { command: "debug dispatch [providerId] [docker.ps|provider.upgrade|host.ssh|microservice.http|echo] [--wait-ms N]", description: "Submit a real internal-core dispatch request for CLI debugging." }, @@ -80,6 +82,10 @@ function numberOption(name: string, defaultValue: number): number { return value; } +function boundedNumberOption(name: string, defaultValue: number, maxValue: number): number { + return Math.min(numberOption(name, defaultValue), maxValue); +} + function stringOption(name: string): string | undefined { const index = args.indexOf(name); if (index === -1) return undefined; @@ -172,8 +178,15 @@ async function main(): Promise { emitJson(commandName, await stackStatus(config)); return; } + if (sub === "swap") { + const result = runSwapCommand(args.slice(2)); + const ok = (result as { ok?: unknown }).ok !== false; + emitJson(commandName, result, ok); + if (!ok) process.exitCode = 1; + return; + } if (sub === "logs") { - emitJson(commandName, stackLogs(config, numberOption("--tail-bytes", 3000))); + emitJson(commandName, stackLogs(config, boundedNumberOption("--tail-bytes", 3000, 500_000))); return; } if (sub === "rebuild") { @@ -227,12 +240,12 @@ async function main(): Promise { if (top === "job") { if (sub === "list") { - emitJson(commandName, { jobs: listJobs() }); + emitJson(commandName, listJobsSummary({ limit: boundedNumberOption("--limit", 50, 500), includeCommand: args.includes("--include-command") })); return; } if (sub === "status") { const id = third === "latest" || third === undefined ? latestJobId() : third; - emitJson(commandName, { job: jobWithTail(readJob(id), numberOption("--tail-bytes", 12000)) }); + emitJson(commandName, { job: jobWithTail(readJob(id), boundedNumberOption("--tail-bytes", 12000, 500_000)) }); return; } } diff --git a/scripts/src/command.ts b/scripts/src/command.ts index 468992dd..5f7a2c1b 100644 --- a/scripts/src/command.ts +++ b/scripts/src/command.ts @@ -1,5 +1,5 @@ import { spawn, spawnSync } from "node:child_process"; -import { createWriteStream, existsSync, readFileSync } from "node:fs"; +import { closeSync, createWriteStream, existsSync, openSync, readSync, statSync } from "node:fs"; export interface CommandResult { command: string[]; @@ -7,20 +7,26 @@ export interface CommandResult { exitCode: number | null; stdout: string; stderr: string; + signal: NodeJS.Signals | null; + timedOut: boolean; } -export function runCommand(command: string[], cwd: string): CommandResult { +export function runCommand(command: string[], cwd: string, options: { timeoutMs?: number } = {}): CommandResult { const result = spawnSync(command[0], command.slice(1), { cwd, encoding: "utf8", maxBuffer: 1024 * 1024 * 8, + timeout: options.timeoutMs, }); + const error = result.error as (Error & { code?: string }) | undefined; return { command, cwd, exitCode: result.status, stdout: result.stdout ?? "", - stderr: result.stderr ?? result.error?.message ?? "", + stderr: result.stderr ?? error?.message ?? "", + signal: result.signal, + timedOut: error?.code === "ETIMEDOUT", }; } @@ -50,6 +56,16 @@ export async function runCommandToFiles(command: string[], cwd: string, stdoutFi export function tailFile(path: string, maxBytes = 8192): string { if (!existsSync(path)) return ""; - const content = readFileSync(path); - return content.subarray(Math.max(0, content.length - maxBytes)).toString("utf8"); + const safeMaxBytes = Math.max(0, Math.floor(maxBytes)); + if (safeMaxBytes === 0) return ""; + const size = statSync(path).size; + const bytesToRead = Math.min(size, safeMaxBytes); + const buffer = Buffer.alloc(bytesToRead); + const fd = openSync(path, "r"); + try { + readSync(fd, buffer, 0, bytesToRead, size - bytesToRead); + } finally { + closeSync(fd); + } + return buffer.toString("utf8"); } diff --git a/scripts/src/docker.ts b/scripts/src/docker.ts index 29631cf0..70bd7bd6 100644 --- a/scripts/src/docker.ts +++ b/scripts/src/docker.ts @@ -1,8 +1,9 @@ -import { chmodSync, existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from "node:fs"; +import { chmodSync, existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync } from "node:fs"; import { basename, dirname, join, resolve } from "node:path"; import { commandOk, runCommand, tailFile } from "./command"; import { type UniDeskConfig, repoRoot, rootPath } from "./config"; import { startJob } from "./jobs"; +import { swapStatus } from "./swap"; export interface ComposeRuntimeEnv { envFile: string; @@ -414,6 +415,7 @@ export async function stackStatus(config: UniDeskConfig): Promise { const overview = dockerExecJson("unidesk-backend-core", "fetch('http://127.0.0.1:8080/api/overview').then(r=>r.json()).then(j=>console.log(JSON.stringify({ok:true,status:200,body:j}))).catch(e=>{console.log(JSON.stringify({ok:false,error:String(e)}));process.exit(1)})"); return { runtimeEnv, + swap: swapStatus(), publicPorts: fixedPorts(config), blockedPublicPorts: [ { name: "backend-core-rest", port: config.network.core.port, listening: isPortListening(config.network.core.port), expected: "not-listening" }, @@ -478,11 +480,37 @@ export function stackLogs(config: UniDeskConfig, tailBytes: number): unknown { const allFiles = listLogFiles(logRoot); const currentFiles = allFiles.filter((path) => basename(path).startsWith(runtimeEnv.logPrefix)); const selectedFiles = (currentFiles.length > 0 ? currentFiles : allFiles.slice(-12)).slice(-12); - const files = selectedFiles.map((path) => ({ path, name: basename(path), tail: tailFile(path, tailBytes) })); + const files = selectedFiles.map((path) => { + const sizeBytes = existsSync(path) ? statSync(path).size : 0; + const truncated = sizeBytes > tailBytes; + return { path, name: basename(path), sizeBytes, tailBytes, truncated, tail: tailFile(path, tailBytes) }; + }); const containerNames = ["unidesk-database", "unidesk-backend-core", "unidesk-frontend", "unidesk-provider-gateway-main", "todo-note-backend", "project-manager-backend", "baidu-netdisk-backend", "oa-event-flow-backend"]; const docker = containerNames.map((name) => { const result = runCommand(["docker", "logs", "--tail", "40", name], repoRoot); - return { name, exitCode: result.exitCode, stdoutTail: result.stdout.slice(-tailBytes), stderrTail: result.stderr.slice(-tailBytes) }; + return { + name, + exitCode: result.exitCode, + tailBytes, + stdoutBytes: Buffer.byteLength(result.stdout, "utf8"), + stderrBytes: Buffer.byteLength(result.stderr, "utf8"), + stdoutTruncated: Buffer.byteLength(result.stdout, "utf8") > tailBytes, + stderrTruncated: Buffer.byteLength(result.stderr, "utf8") > tailBytes, + stdoutTail: result.stdout.slice(-tailBytes), + stderrTail: result.stderr.slice(-tailBytes), + }; }); - return { logRoot, runtimeEnv, files, docker }; + return { + logRoot, + runtimeEnv, + policy: { + defaultTailBytes: 3000, + requestedTailBytes: tailBytes, + selectedFileLimit: 12, + dockerTailLines: 40, + disclosure: "server logs returns tails only; increase with --tail-bytes for a larger bounded tail, and inspect listed paths directly for full logs.", + }, + files, + docker, + }; } diff --git a/scripts/src/jobs.ts b/scripts/src/jobs.ts index ed87cc73..101ffcf4 100644 --- a/scripts/src/jobs.ts +++ b/scripts/src/jobs.ts @@ -1,5 +1,5 @@ import { spawn, spawnSync } from "node:child_process"; -import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from "node:fs"; +import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync } from "node:fs"; import { join } from "node:path"; import { repoRoot, rootPath } from "./config"; import { runCommandToFiles, tailFile } from "./command"; @@ -141,6 +141,70 @@ export async function runJob(id: string): Promise { return job; } -export function jobWithTail(job: JobRecord, maxBytes = 12000): JobRecord & { stdoutTail: string; stderrTail: string } { - return { ...job, stdoutTail: tailFile(job.stdoutFile, maxBytes), stderrTail: tailFile(job.stderrFile, maxBytes) }; +export function jobWithTail(job: JobRecord, maxBytes = 12000): JobRecord & { + tailPolicy: { + requestedTailBytes: number; + stdoutBytes: number; + stderrBytes: number; + stdoutTruncated: boolean; + stderrTruncated: boolean; + fullLogPaths: { stdoutFile: string; stderrFile: string }; + }; + stdoutTail: string; + stderrTail: string; +} { + const stdoutBytes = existsSync(job.stdoutFile) ? statSync(job.stdoutFile).size : 0; + const stderrBytes = existsSync(job.stderrFile) ? statSync(job.stderrFile).size : 0; + return { + ...job, + tailPolicy: { + requestedTailBytes: maxBytes, + stdoutBytes, + stderrBytes, + stdoutTruncated: stdoutBytes > maxBytes, + stderrTruncated: stderrBytes > maxBytes, + fullLogPaths: { stdoutFile: job.stdoutFile, stderrFile: job.stderrFile }, + }, + stdoutTail: tailFile(job.stdoutFile, maxBytes), + stderrTail: tailFile(job.stderrFile, maxBytes), + }; +} + +export interface JobListOptions { + limit?: number; + includeCommand?: boolean; +} + +export function listJobsSummary(options: JobListOptions = {}): unknown { + const limit = Math.max(1, Math.floor(options.limit ?? 50)); + const jobs = listJobs(); + const returned = jobs.slice(0, limit).map((job) => ({ + id: job.id, + name: job.name, + status: job.status, + runner: job.runner, + runnerPid: job.runnerPid ?? null, + runnerContainer: job.runnerContainer ?? null, + createdAt: job.createdAt, + startedAt: job.startedAt, + finishedAt: job.finishedAt, + exitCode: job.exitCode, + note: job.note, + stdoutFile: job.stdoutFile, + stderrFile: job.stderrFile, + ...(options.includeCommand === true ? { command: job.command, cwd: job.cwd } : {}), + })); + return { + jobs: returned, + total: jobs.length, + returned: returned.length, + limit, + truncated: jobs.length > returned.length, + disclosure: { + defaultLimit: 50, + nextCommand: jobs.length > returned.length ? `bun scripts/cli.ts job list --limit ${Math.min(jobs.length, limit * 2)}` : null, + includeCommandCommand: "bun scripts/cli.ts job list --include-command", + statusCommand: "bun scripts/cli.ts job status --tail-bytes 12000", + }, + }; } diff --git a/scripts/src/microservices.ts b/scripts/src/microservices.ts index 24954060..e0932f27 100644 --- a/scripts/src/microservices.ts +++ b/scripts/src/microservices.ts @@ -2,18 +2,51 @@ import { runCommand } from "./command"; import { type UniDeskConfig, repoRoot } from "./config"; import { jsonByteLength, previewJson } from "./preview"; -export function coreInternalFetch(path: string, init?: { method?: string; body?: unknown }): unknown { +export function coreInternalFetch(path: string, init?: { method?: string; body?: unknown; maxResponseBytes?: number }): unknown { if (!path.startsWith("/")) throw new Error("core internal path must start with /"); + const maxResponseBytes = Math.max(1024, Math.floor(init?.maxResponseBytes ?? 5_000_000)); const code = ` const res = await fetch(${JSON.stringify(`http://127.0.0.1:8080${path}`)}, ${JSON.stringify({ method: init?.method ?? "GET", headers: init?.body === undefined ? undefined : { "content-type": "application/json" }, body: init?.body === undefined ? undefined : JSON.stringify(init.body), })}); - const text = await res.text(); + const maxResponseBytes = ${JSON.stringify(maxResponseBytes)}; + const reader = res.body?.getReader(); + const chunks = []; + let bytes = 0; + let responseTruncated = false; + if (reader) { + while (true) { + const { done, value } = await reader.read(); + if (done) break; + if (bytes + value.byteLength > maxResponseBytes) { + const keep = Math.max(0, maxResponseBytes - bytes); + if (keep > 0) { + chunks.push(value.slice(0, keep)); + bytes += keep; + } + responseTruncated = true; + try { await reader.cancel(); } catch {} + break; + } + chunks.push(value); + bytes += value.byteLength; + } + } + const buffer = new Uint8Array(bytes); + let offset = 0; + for (const chunk of chunks) { + buffer.set(chunk, offset); + offset += chunk.byteLength; + } + const text = new TextDecoder().decode(buffer); let body = null; - try { body = text ? JSON.parse(text) : null; } catch { body = { text }; } - console.log(JSON.stringify({ ok: res.ok, status: res.status, body })); + try { body = text && !responseTruncated ? JSON.parse(text) : null; } catch { body = { text }; } + if (responseTruncated) { + body = { _unideskResponseTruncated: true, maxResponseBytes, bytesRead: bytes, contentLength: res.headers.get("content-length"), textPreview: text }; + } + console.log(JSON.stringify({ ok: res.ok, status: res.status, responseTruncated, responseBytesRead: bytes, responseContentLength: res.headers.get("content-length"), body })); `; const result = runCommand(["docker", "exec", "unidesk-backend-core", "bun", "-e", code], repoRoot); if (result.exitCode !== 0) { @@ -50,6 +83,11 @@ function numberOption(args: string[], name: string, defaultValue: number): numbe return value; } +function cappedNumberOption(args: string[], name: string, defaultValue: number, maxValue: number): number { + const value = numberOption(args, name, defaultValue); + return Math.min(value, maxValue); +} + function stringOption(args: string[], name: string): string | undefined { const index = args.indexOf(name); if (index === -1) return undefined; @@ -58,6 +96,10 @@ function stringOption(args: string[], name: string): string | undefined { return raw; } +function hasFlag(args: string[], name: string): boolean { + return args.includes(name); +} + function methodOption(args: string[]): string { const method = (stringOption(args, "--method") ?? "GET").toUpperCase(); if (!["GET", "HEAD", "POST", "DELETE", "PUT", "PATCH"].includes(method)) throw new Error(`unsupported --method ${method}`); @@ -65,13 +107,34 @@ function methodOption(args: string[]): string { } export function summarizeMicroserviceProxyResponse(response: unknown, args: string[]): unknown { - if (args.includes("--raw")) return response; - const maxBodyBytes = numberOption(args, "--max-body-bytes", 60_000); + const full = args.includes("--full"); + const raw = args.includes("--raw"); + const maxBodyBytes = full ? numberOption(args, "--max-body-bytes", 5_000_000) : cappedNumberOption(args, "--max-body-bytes", raw ? 120_000 : 60_000, 500_000); if (typeof response !== "object" || response === null || Array.isArray(response)) return response; const record = response as Record; if (!("body" in record)) return response; + if (record.responseTruncated === true) { + return { + ...record, + bodyOmitted: true, + bodyMaxBytes: maxBodyBytes, + rawHint: "The upstream response exceeded the CLI collection cap before JSON parsing; re-run with --raw --full and a specific --max-body-bytes only when the full body is required.", + }; + } const bodyBytes = jsonByteLength(record.body); - if (bodyBytes <= maxBodyBytes) return response; + if (bodyBytes <= maxBodyBytes) { + if (!raw || full) return response; + return { + ...record, + outputPolicy: { + rawRequested: true, + bounded: true, + maxBodyBytes, + bodyBytes, + fullCommand: "Re-run with --raw --full to allow the complete body.", + }, + }; + } const rest = { ...record }; delete rest.body; return { @@ -80,7 +143,9 @@ export function summarizeMicroserviceProxyResponse(response: unknown, args: stri bodyBytes, bodyMaxBytes: maxBodyBytes, bodyPreview: previewJson(record.body, { maxDepth: 3, maxArrayItems: 3, maxObjectKeys: 16, maxStringLength: 320 }), - rawHint: "Re-run with --raw for the full body, or add/tighten __unideskArrayLimit=: in the proxied path.", + rawHint: raw && !full + ? "The --raw response exceeded the default hard limit; re-run with --raw --full for the complete body, or add/tighten __unideskArrayLimit=: in the proxied path." + : "Re-run with --raw --full for the complete body, or add/tighten __unideskArrayLimit=: in the proxied path.", }; } @@ -106,7 +171,11 @@ export async function runMicroserviceCommand(_config: UniDeskConfig, args: strin if (action === "proxy") { const id = requireId(idArg, "microservice proxy"); const path = requireProxyPath(pathArg); - return summarizeMicroserviceProxyResponse(coreInternalFetch(`/api/microservices/${encodeId(id)}/proxy${path}`, { method: methodOption(args) }), args); + const full = hasFlag(args, "--full"); + const raw = hasFlag(args, "--raw"); + const maxBodyBytes = full ? numberOption(args, "--max-body-bytes", 5_000_000) : cappedNumberOption(args, "--max-body-bytes", raw ? 120_000 : 60_000, 500_000); + const maxResponseBytes = full ? Math.min(Math.max(maxBodyBytes, 120_000), 5_000_000) : Math.min(Math.max(maxBodyBytes * 3, 240_000), 1_500_000); + return summarizeMicroserviceProxyResponse(coreInternalFetch(`/api/microservices/${encodeId(id)}/proxy${path}`, { method: methodOption(args), maxResponseBytes }), args); } throw new Error("microservice command must be one of: list, status, health, diagnostics, tunnel-self-test, proxy"); } diff --git a/scripts/src/output.ts b/scripts/src/output.ts index addfde5c..f3427535 100644 --- a/scripts/src/output.ts +++ b/scripts/src/output.ts @@ -5,6 +5,20 @@ export interface JsonEnvelope { error?: unknown; } +function isEpipe(error: unknown): boolean { + return typeof error === "object" && error !== null && "code" in error && (error as { code?: unknown }).code === "EPIPE"; +} + +process.stdout.on("error", (error) => { + if (isEpipe(error)) process.exit(0); + throw error; +}); + +process.stderr.on("error", (error) => { + if (isEpipe(error)) process.exit(0); + throw error; +}); + export function emitJson(command: string, data: T, ok = true): void { const envelope: JsonEnvelope = { ok, command, data }; safeStdoutWrite(`${JSON.stringify(envelope, null, 2)}\n`); @@ -22,10 +36,7 @@ function safeStdoutWrite(text: string): void { try { process.stdout.write(text); } catch (error) { - if (typeof error === "object" && error !== null && "code" in error && (error as { code?: unknown }).code === "EPIPE") { - process.exitCode = 0; - return; - } + if (isEpipe(error)) process.exit(0); throw error; } } diff --git a/scripts/src/remote.ts b/scripts/src/remote.ts index d09b52a1..bfccb177 100644 --- a/scripts/src/remote.ts +++ b/scripts/src/remote.ts @@ -27,6 +27,9 @@ interface FetchJsonResult { status?: number; body?: unknown; error?: string; + responseTruncated?: boolean; + responseBytesRead?: number; + responseContentLength?: string | null; } const hostOptions = new Set(["--main-server-ip", "--main-server", "--server"]); @@ -172,19 +175,54 @@ function frontendBaseUrl(host: string, config: UniDeskConfig): string { return `http://${host}:${config.network.frontend.port}`; } -async function readJson(url: string, init?: RequestInit, timeoutMs = 8000): Promise { +async function readJson(url: string, init?: RequestInit, timeoutMs = 8000, maxResponseBytes = 5_000_000): Promise { const controller = new AbortController(); const timer = setTimeout(() => controller.abort(), timeoutMs); try { const res = await fetch(url, { ...init, signal: controller.signal }); - const text = await res.text(); + const reader = res.body?.getReader(); + const chunks: Uint8Array[] = []; + let bytes = 0; + let responseTruncated = false; + if (reader !== undefined) { + while (true) { + const { done, value } = await reader.read(); + if (done) break; + if (bytes + value.byteLength > maxResponseBytes) { + const keep = Math.max(0, maxResponseBytes - bytes); + if (keep > 0) { + chunks.push(value.slice(0, keep)); + bytes += keep; + } + responseTruncated = true; + try { + await reader.cancel(); + } catch { + // Ignore cancel failures after the bounded preview has been collected. + } + break; + } + chunks.push(value); + bytes += value.byteLength; + } + } + const buffer = new Uint8Array(bytes); + let offset = 0; + for (const chunk of chunks) { + buffer.set(chunk, offset); + offset += chunk.byteLength; + } + const text = new TextDecoder().decode(buffer); let body: unknown = null; try { - body = text.length > 0 ? JSON.parse(text) : null; + body = text.length > 0 && !responseTruncated ? JSON.parse(text) : null; } catch { body = { text }; } - return { ok: res.ok, status: res.status, body }; + if (responseTruncated) { + body = { _unideskResponseTruncated: true, maxResponseBytes, bytesRead: bytes, contentLength: res.headers.get("content-length"), textPreview: text }; + } + return { ok: res.ok, status: res.status, body, responseTruncated, responseBytesRead: bytes, responseContentLength: res.headers.get("content-length") }; } catch (error) { return { ok: false, error: error instanceof Error ? error.message : String(error) }; } finally { @@ -208,11 +246,11 @@ async function loginFrontend(host: string, config: UniDeskConfig): Promise { +async function frontendJson(session: FrontendSession, path: string, init?: RequestInit, timeoutMs = 8000, maxResponseBytes = 5_000_000): Promise { const headers = new Headers(init?.headers); headers.set("cookie", session.cookie); if (init?.body !== undefined && !headers.has("content-type")) headers.set("content-type", "application/json"); - return readJson(`${session.baseUrl}${path}`, { ...init, headers }, timeoutMs); + return readJson(`${session.baseUrl}${path}`, { ...init, headers }, timeoutMs, maxResponseBytes); } function stringOption(args: string[], name: string): string | undefined { @@ -231,6 +269,10 @@ function numberOption(args: string[], name: string, defaultValue: number): numbe return value; } +function cappedNumberOption(args: string[], name: string, defaultValue: number, maxValue: number): number { + return Math.min(numberOption(args, name, defaultValue), maxValue); +} + function jsonOption(args: string[], name: string): Record | undefined { const raw = stringOption(args, name); if (raw === undefined) return undefined; @@ -462,7 +504,11 @@ async function remoteMicroservice(session: FrontendSession, args: string[]): Pro }; } if (action === "proxy" && id !== undefined && path !== undefined && path.startsWith("/")) { - const response = await frontendJson(session, `/api/microservices/${encodeURIComponent(id)}/proxy${path}`, undefined, 24_000); + const full = args.includes("--full"); + const raw = args.includes("--raw"); + const maxBodyBytes = full ? numberOption(args, "--max-body-bytes", 5_000_000) : cappedNumberOption(args, "--max-body-bytes", raw ? 120_000 : 60_000, 500_000); + const maxResponseBytes = full ? Math.min(Math.max(maxBodyBytes, 120_000), 5_000_000) : Math.min(Math.max(maxBodyBytes * 3, 240_000), 1_500_000); + const response = await frontendJson(session, `/api/microservices/${encodeURIComponent(id)}/proxy${path}`, undefined, 24_000, maxResponseBytes); return { transport: "frontend", response: summarizeMicroserviceProxyResponse(response, args), diff --git a/scripts/src/swap.ts b/scripts/src/swap.ts new file mode 100644 index 00000000..390c86db --- /dev/null +++ b/scripts/src/swap.ts @@ -0,0 +1,303 @@ +import { accessSync, constants, existsSync, readFileSync, statSync } from "node:fs"; +import { runCommand } from "./command"; +import { repoRoot } from "./config"; + +const defaultSwapPath = "/swapfile"; +const defaultSwapSizeBytes = 2 * 1024 * 1024 * 1024; + +export interface SwapArea { + filename: string; + type: string; + sizeBytes: number; + usedBytes: number; + priority: number | null; +} + +export interface SwapMemoryStatus { + totalBytes: number; + availableBytes: number | null; + swapTotalBytes: number; + swapFreeBytes: number; +} + +export interface SwapStatus { + memory: SwapMemoryStatus; + activeSwaps: SwapArea[]; + configuredPath: string; + configuredPathExists: boolean; + configuredPathMode: string | null; + configuredPathSizeBytes: number | null; + configuredPathActive: boolean; + fstab: { + path: string; + writable: boolean; + persisted: boolean; + matchingLine: string | null; + error: string | null; + }; + warning: string | null; +} + +export interface SwapEnsureResult { + ok: boolean; + status: "ok" | "degraded" | "failed"; + requested: { + path: string; + sizeBytes: number; + }; + before: SwapStatus; + after: SwapStatus; + actions: Array<{ action: string; ok: boolean; detail?: unknown }>; + errors: Array<{ action: string; message: string; detail?: unknown }>; +} + +function shellQuote(value: string): string { + return `'${value.replace(/'/g, `'\\''`)}'`; +} + +function parseByteCount(value: string): number { + const raw = value.trim(); + if (/^\d+$/u.test(raw)) return Number(raw); + const match = raw.match(/^([0-9]+(?:\.[0-9]+)?)([KMGTPE]?i?B?)$/iu); + if (!match) return 0; + const amount = Number(match[1]); + const unit = match[2].toUpperCase(); + const powers: Record = { + K: 1, + KB: 1, + KIB: 1, + M: 2, + MB: 2, + MIB: 2, + G: 3, + GB: 3, + GIB: 3, + T: 4, + TB: 4, + TIB: 4, + P: 5, + PB: 5, + PIB: 5, + E: 6, + EB: 6, + EIB: 6, + }; + return Math.round(amount * (1024 ** (powers[unit] ?? 0))); +} + +function parseMeminfo(): SwapMemoryStatus { + const raw = readFileSync("/proc/meminfo", "utf8"); + const values = new Map(); + for (const line of raw.split("\n")) { + const match = line.match(/^([^:]+):\s+(\d+)\s+kB/u); + if (match) values.set(match[1], Number(match[2]) * 1024); + } + return { + totalBytes: values.get("MemTotal") ?? 0, + availableBytes: values.get("MemAvailable") ?? null, + swapTotalBytes: values.get("SwapTotal") ?? 0, + swapFreeBytes: values.get("SwapFree") ?? 0, + }; +} + +function parseSwaps(): SwapArea[] { + if (!existsSync("/proc/swaps")) return []; + const lines = readFileSync("/proc/swaps", "utf8").trim().split("\n").slice(1); + return lines.map((line) => line.trim().split(/\s+/u)).filter((parts) => parts.length >= 5).map(([filename, type, sizeKiB, usedKiB, priority]) => ({ + filename, + type, + sizeBytes: Number(sizeKiB) * 1024, + usedBytes: Number(usedKiB) * 1024, + priority: Number.isFinite(Number(priority)) ? Number(priority) : null, + })); +} + +function fileMode(path: string): string | null { + if (!existsSync(path)) return null; + return (statSync(path).mode & 0o777).toString(8).padStart(3, "0"); +} + +function fstabStatus(path: string): SwapStatus["fstab"] { + const fstabPath = "/etc/fstab"; + try { + const raw = existsSync(fstabPath) ? readFileSync(fstabPath, "utf8") : ""; + let writable = false; + try { + accessSync(fstabPath, constants.W_OK); + writable = true; + } catch { + writable = false; + } + const matchingLine = raw.split("\n").find((line) => { + const trimmed = line.trim(); + if (trimmed.length === 0 || trimmed.startsWith("#")) return false; + const parts = trimmed.split(/\s+/u); + return parts[0] === path && parts[2] === "swap"; + }) ?? null; + return { + path: fstabPath, + writable, + persisted: matchingLine !== null, + matchingLine, + error: null, + }; + } catch (error) { + return { + path: fstabPath, + writable: false, + persisted: false, + matchingLine: null, + error: error instanceof Error ? error.message : String(error), + }; + } +} + +export function swapStatus(path = defaultSwapPath): SwapStatus { + const memory = parseMeminfo(); + const activeSwaps = parseSwaps(); + const configuredPathExists = existsSync(path); + const configuredPathSizeBytes = configuredPathExists ? statSync(path).size : null; + const configuredPathActive = activeSwaps.some((swap) => swap.filename === path); + const warning = memory.swapTotalBytes > 0 ? null : "swap is not active; low-memory main servers are at risk of global OOM during builds or diagnostics"; + return { + memory, + activeSwaps, + configuredPath: path, + configuredPathExists, + configuredPathMode: fileMode(path), + configuredPathSizeBytes, + configuredPathActive, + fstab: fstabStatus(path), + warning, + }; +} + +function pushAction( + actions: SwapEnsureResult["actions"], + errors: SwapEnsureResult["errors"], + action: string, + command: string[], +): boolean { + const result = runCommand(command, repoRoot, { timeoutMs: 120_000 }); + const ok = result.exitCode === 0; + const detail = { + command, + exitCode: result.exitCode, + stdoutTail: result.stdout.slice(-1200), + stderrTail: result.stderr.slice(-1200), + timedOut: result.timedOut, + }; + actions.push({ action, ok, detail }); + if (!ok) { + errors.push({ + action, + message: result.stderr.trim() || result.stdout.trim() || `command failed with exit code ${result.exitCode}`, + detail, + }); + } + return ok; +} + +function ensureFstabLine(path: string): { ok: boolean; action: string; detail: unknown } { + const line = `${path} none swap sw 0 0`; + const script = [ + "set -euo pipefail", + "touch /etc/fstab", + `grep -Eq '^${path.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}[[:space:]]+[^[:space:]]+[[:space:]]+swap[[:space:]]' /etc/fstab || printf '%s\\n' ${shellQuote(line)} >> /etc/fstab`, + ].join("\n"); + const result = runCommand(["bash", "-lc", script], repoRoot, { timeoutMs: 30_000 }); + return { + ok: result.exitCode === 0, + action: "persist-fstab", + detail: { + command: ["bash", "-lc", script], + exitCode: result.exitCode, + stdoutTail: result.stdout.slice(-1200), + stderrTail: result.stderr.slice(-1200), + timedOut: result.timedOut, + }, + }; +} + +function parseSizeOption(args: string[], defaultBytes: number): number { + const index = args.indexOf("--size"); + const raw = index === -1 ? undefined : args[index + 1]; + if (raw === undefined) return defaultBytes; + const bytes = parseByteCount(raw); + if (!Number.isFinite(bytes) || bytes <= 0) throw new Error("--size must be a positive byte count such as 2GiB or 4096M"); + return bytes; +} + +function parsePathOption(args: string[], defaultPath: string): string { + const index = args.indexOf("--path"); + if (index === -1) return defaultPath; + const raw = args[index + 1]; + if (raw === undefined || !raw.startsWith("/")) throw new Error("--path must be an absolute path"); + return raw; +} + +function hasFlag(args: string[], name: string): boolean { + return args.includes(name); +} + +export function runSwapCommand(args: string[]): unknown { + const [action = "status"] = args; + const path = parsePathOption(args, defaultSwapPath); + if (action === "status") return swapStatus(path); + if (action === "ensure") { + const sizeBytes = parseSizeOption(args, defaultSwapSizeBytes); + const dryRun = hasFlag(args, "--dry-run"); + const before = swapStatus(path); + const actions: SwapEnsureResult["actions"] = []; + const errors: SwapEnsureResult["errors"] = []; + if (before.memory.swapTotalBytes > 0) { + actions.push({ action: "noop-existing-swap", ok: true, detail: { activeSwaps: before.activeSwaps } }); + const after = swapStatus(path); + return { ok: true, status: "ok", requested: { path, sizeBytes }, before, after, actions, errors } satisfies SwapEnsureResult; + } + if (dryRun) { + actions.push({ action: "dry-run", ok: true, detail: { wouldCreate: path, sizeBytes, wouldPersistFstab: true } }); + const after = swapStatus(path); + return { ok: true, status: "degraded", requested: { path, sizeBytes }, before, after, actions, errors } satisfies SwapEnsureResult; + } + if (!existsSync(path)) { + const sizeMiB = Math.ceil(sizeBytes / 1024 / 1024); + const allocated = pushAction(actions, errors, "allocate-swapfile", ["fallocate", "-l", `${sizeMiB}M`, path]); + if (!allocated) pushAction(actions, errors, "allocate-swapfile-dd-fallback", ["dd", "if=/dev/zero", `of=${path}`, "bs=1M", `count=${sizeMiB}`, "status=none"]); + } else { + const existingBytes = statSync(path).size; + if (existingBytes < sizeBytes) { + const sizeMiB = Math.ceil(sizeBytes / 1024 / 1024); + const resized = pushAction(actions, errors, "resize-existing-swapfile", ["fallocate", "-l", `${sizeMiB}M`, path]); + if (!resized) pushAction(actions, errors, "resize-existing-swapfile-dd-fallback", ["dd", "if=/dev/zero", `of=${path}`, "bs=1M", `count=${sizeMiB}`, "status=none"]); + } else { + actions.push({ action: "reuse-existing-swapfile-path", ok: true, detail: { path, sizeBytes: existingBytes } }); + } + } + pushAction(actions, errors, "chmod-600", ["chmod", "600", path]); + pushAction(actions, errors, "mkswap", ["mkswap", path]); + pushAction(actions, errors, "swapon", ["swapon", path]); + const persist = ensureFstabLine(path); + actions.push({ action: persist.action, ok: persist.ok, detail: persist.detail }); + if (!persist.ok) { + errors.push({ + action: persist.action, + message: "swap is active but /etc/fstab could not be updated; rerun ensure as root or add the returned fstab line manually", + detail: persist.detail, + }); + } + const after = swapStatus(path); + const swapActive = after.memory.swapTotalBytes > 0; + const status = swapActive && after.fstab.persisted ? "ok" : swapActive ? "degraded" : "failed"; + return { + ok: status !== "failed", + status, + requested: { path, sizeBytes }, + before, + after, + actions, + errors, + } satisfies SwapEnsureResult; + } + throw new Error("server swap command must be one of: status, ensure"); +}