fix: stream runner tran ssh output

This commit is contained in:
Codex
2026-05-25 11:41:56 +00:00
parent 230c13efe4
commit 4a03052747
12 changed files with 381 additions and 152 deletions
+9
View File
@@ -26,6 +26,15 @@ UniDesk 是一个以主 server 为统一入口的分布式工作平台;本文
- D601 上 HWLAB 验证必须通过 UniDesk 透传在 D601 执行,禁止回到 master server 跑 HWLAB check、Playwright/browser smoke、镜像构建或其他重型验证。k3s 操作使用 route 语法 `D601:k3s ...`,默认 D601 原生 k3s kubeconfig。
- 长期细节见 `docs/reference/hwlab.md`
## Critical D601 UniDesk Workspace Rule
- P0: `D601:UniDesk` 的固定开发 workspace 是 D601 节点上的 `/home/ubuntu/workspace/unidesk-dev`,固定使用 `master` 分支和 `origin git@github.com:pikasTech/unidesk.git`;所有需要在 D601 上改 UniDesk 代码、跑轻量合同测试、做分布式敏捷实验补丁收敛或验证 Code Queue runner/tran 的工作,都必须优先使用这个目录。
- P0: 每次开始 `D601:UniDesk` 分布式开发、切换任务、恢复中断或上下文压缩后,必须重新读取目标 workspace 的 `/home/ubuntu/workspace/unidesk-dev/AGENTS.md`,并以该文件和其引用的 UniDesk repo 内规则为当前任务约束;禁止只凭压缩摘要或主 server 记忆继续改代码。
- `/home/ubuntu/cq-deploy``/root/unidesk``/app`、Code Queue pod 内 `/root/unidesk``/tmp/unidesk-*` 都是运行副本、部署副本或一次性实验面,不是 `D601:UniDesk` 日常开发 source truth;运行面热修可以直接作用于 pod/容器,但必须随后把持久化修复落回 fixed workspace 和 Git remote。
- 每次开始 `D601:UniDesk` 工作前必须通过 UniDesk SSH 桥执行 `cd /home/ubuntu/workspace/unidesk-dev && git status --short --branch && git remote -v`;若路径、分支、remote 或权限不符合预期,必须先修正固定 workspace,再继续开发、测试或发布。
- D601 上 UniDesk 验证必须通过 UniDesk 透传在 D601 执行,禁止回到 master server 跑仓库级 check、Playwright/browser smoke、镜像构建或其他重型验证。Code Queue/tran 运行面验证应优先直达 D601 原生 k3s podk3s 操作使用 route 语法 `D601:k3s ...`
- 长期细节见 `docs/reference/dev-environment.md`
## Critical D601 Kubernetes Control-Plane Rule
- P0: D601 上的 Kubernetes 运行面只能以自部署原生 k3s 为准;Docker Desktop Kubernetes 已经停用并清理数据,任何人不得重新启用或把它作为 UniDesk/HWLAB 部署、CI/CD、诊断或验收目标。跟踪 issue: [pikasTech/unidesk#138](https://github.com/pikasTech/unidesk/issues/138),热修复背景见 [pikasTech/unidesk#118](https://github.com/pikasTech/unidesk/issues/118)。
+1
View File
@@ -368,6 +368,7 @@ services:
PROVIDER_INGRESS_PUBLIC_URL: "ws://${UNIDESK_PUBLIC_HOST}:${UNIDESK_PROVIDER_INGRESS_PORT}/ws/provider"
AUTH_USERNAME: "${UNIDESK_AUTH_USERNAME}"
AUTH_PASSWORD: "${UNIDESK_AUTH_PASSWORD}"
PROVIDER_TOKEN: "${UNIDESK_PROVIDER_TOKEN}"
SESSION_SECRET: "${UNIDESK_SESSION_SECRET}"
SESSION_TTL_SECONDS: "${UNIDESK_SESSION_TTL_SECONDS}"
UNIDESK_DEPLOY_REF: "${UNIDESK_FRONTEND_DEPLOY_REF:-deploy.json#environments.prod.services.frontend}"
+5 -4
View File
@@ -24,7 +24,7 @@ CLI 可以从 `master` 快速演进,但必须兼容 `deploy.json` 固定的 CI
- `ssh <providerId> py [script-args...] < script.py` 把本地 stdin 落到远端临时 `.py` 文件后再以 `python3 -u` 执行并自动清理,避免再手写 `'python3 -'`、heredoc 或多层引号;`script-args` 会按 argv 安全透传给远端脚本。
- `ssh <providerId> skills [--scope all|wsl|windows] [--limit N]` 发现目标节点上的 WSL/Linux skill 根目录;当 provider 是 WSL 时同一次调用还会扫描 Windows 用户目录下的 `.agents/skills``.codex/skills`
- `ssh <providerId>:k3s[:namespace:workload[:container]] <operation> ...` 是原生 k3s 结构化 route 入口,route 只定位控制面或 workload`kubectl``logs``exec``script``apply-patch` 和普通容器命令作为 operation 放在 route 之后;CLI 固定注入 `KUBECONFIG=/etc/rancher/k3s/k3s.yaml` 并把 kubectl、workload exec 和 logs 参数组装成 argv,避免在 Host SSH、bash、kubectl exec 和容器 shell 之间反复手写多层引号;D601 与 G14 都有 provider-specific guard,分别校验 `d601` 和 G14 k3s 节点身份。
- Code Queue runner 镜像必须在 PATH 上提供 `/usr/local/bin/tran`。runner 内的 `tran` 检测到 `CODE_QUEUE_*``KUBERNETES_SERVICE_HOST` 后,默认执行 `bun /root/unidesk/scripts/cli.ts --main-server-ip <public-frontend> ssh ...`,其中 `<public-frontend>` 优先来自 `UNIDESK_MAIN_SERVER_IP` / `UNIDESK_MAIN_SERVER_HOST` / `CODE_QUEUE_DEV_CONTAINER_MASTER_HOST`。runner remote frontend HTTP 客户端默认使用 `curl` 后端,降低 Bun 在部分 runner 内读取 HTTP response body 时触发 native crash 的风险;显式 `UNIDESK_REMOTE_HTTP_CLIENT=fetch` 可用于诊断。runner 内跨 D601/G14 的分布式访问应优先使用无 stdin 的 `tran D601 argv ...``tran G14 argv ...``tran D601:k3s kubectl ...``tran D601:k3s:<namespace>:<workload> argv ...``script``apply-patch``py` 等 stdin helper 需要在主 server/host 侧 `tran` 或显式 `--main-server-transport ssh` 中执行,直到 frontend dispatch 支持 stdin 流
- Code Queue runner 镜像必须在 PATH 上提供 `/usr/local/bin/tran`。runner 内的 `tran` 检测到 `CODE_QUEUE_*``KUBERNETES_SERVICE_HOST` 后,默认执行 `bun /root/unidesk/scripts/cli.ts --main-server-ip <public-frontend> ssh ...`,其中 `<public-frontend>` 优先来自 `UNIDESK_MAIN_SERVER_IP` / `UNIDESK_MAIN_SERVER_HOST` / `CODE_QUEUE_DEV_CONTAINER_MASTER_HOST`。runner remote frontend HTTP 客户端默认使用 `curl` 后端,降低 Bun 在部分 runner 内读取非 SSH HTTP response body 时触发 native crash 的风险;显式 `UNIDESK_REMOTE_HTTP_CLIENT=fetch` 可用于诊断。runner 内跨 D601/G14 的分布式访问应优先使用结构化 route/operation,例如 `tran D601 argv ...``tran G14 argv ...``tran D601:k3s kubectl ...``tran D601:k3s:<namespace>:<workload> argv ...``script``apply-patch``py` 等 stdin helper 通过 frontend `/ws/ssh` 流式通道执行,stdout/stderr 也必须完整直通,不得退回 `/api/dispatch` task JSON
- `microservice list/status/health/diagnostics/tunnel-self-test/proxy` 通过 backend-core 内网 API 管理挂载在计算节点 Docker 或 k3s 控制面中的用户服务(底层命令名仍为 microservice);`health``status``diagnostics` 默认返回 compact summary、body 字节数和 `--full|--raw` 展开命令,只有小 body 或无法抽取 summary 时才带有界 body preview,避免 Code Queue/k3s 诊断一次性输出爆炸;`tunnel-self-test``proxy` 会走真实 backend-core -> provider-gateway 或 k3sctl-adapter -> 节点服务链路。`microservice health code-queue` 使用 commander-safe 专用摘要,必须保留 ok/status、service id、running count、queue count、heartbeat freshness/risk、split-brain/live/degraded 解释和 raw drill-down 命令;需要完整健康 JSON 时显式加 `--raw``--full`,等价深挖路径是 `microservice proxy code-queue /health --raw --full``proxy` 支持受控 JSON 请求体并对超大响应 body 默认输出有界预览,规则见 `docs/reference/microservices.md`
- `decision upload/list/show/health` 通过 backend-core 用户服务代理访问 D601 k3s Decision Center,用于上传会议记录/决议 Markdown、列出权威记录、查看详情和健康检查;`decision list` 默认只返回摘要并省略完整 Markdown body,需要排查大正文时显式加 `--include-body`。正式文书字段通过 records 模型一等字段返回和查询:`--doc-no DC-...``--doc-type DCSN|GOAL|PLAN|RPRT|ACTN|ISSU|RETR|RQST|RESP|MINS``--doc-priority P0|P1|P2|P3``--year YYYY``--signer``--issued-at``--effective-scope``--supersedes``--superseded-by``show``requirement update` 可使用 `id``docNo``decision requirement list/create/upsert/update/show` 在同一 records 模型上管理 `goal|decision|blocker|debt|experiment` 需求记录,`docNo` 唯一,未传 `--doc-no` 但提供 `--doc-type/--doc-priority/--year` 时由服务分配下一个序号。它们不得直连 D601 Service、NodePort 或 provider-gateway 业务 HTTP。
- `decision diary import <markdown-file>` 将带 `# YYYY年M月D日``# YYYY-MM-DD``# YYYY/M/D` 标题的工作日志拆成每天一篇 Markdown 日记,按 `YYYY-MM/YYYY-MM-DD.md` 虚拟路径写入 Decision Center PostgreSQL`decision diary list/history` 默认只返回摘要,需要完整 Markdown 时显式加 `--include-body``decision diary show <YYYY-MM-DD|id> [--source-file path]` 查看单日正文,`--source-file` 用于同一天存在多个导入来源时精确选择;`decision diary edit|upsert <YYYY-MM-DD|id> --body-file <path> [--title text] [--source-file path] [--tag tag]` 通过 `PUT /api/diary/entries/:idOrDate` 创建当天或历史条目并编辑既有条目。
@@ -210,6 +210,7 @@ bun scripts/cli.ts ssh D601:k3s:hwlab-dev:hwlab-cloud-api logs --tail 80
bun scripts/cli.ts ssh D601:k3s:hwlab-dev:hwlab-cloud-api node -e 'console.log(process.version)'
bun scripts/cli.ts ssh D601:k3s:hwlab-dev:hwlab-cloud-api/app pwd
printf 'printf "pod=%s\n" "$HOSTNAME"\n' | bun scripts/cli.ts ssh D601:k3s:hwlab-dev:hwlab-cloud-api script
tar -C /tmp/patched-files -cf - . | bun scripts/cli.ts ssh D601:k3s:unidesk:code-queue/root/unidesk exec --stdin -- tar -xf - -C /root/unidesk
bun scripts/cli.ts ssh D601:k3s:hwlab-dev:hwlab-cloud-api apply-patch <<'PATCH'
*** Begin Patch
*** Update File: /tmp/example.txt
@@ -220,9 +221,9 @@ bun scripts/cli.ts ssh D601:k3s:hwlab-dev:hwlab-cloud-api apply-patch <<'PATCH'
PATCH
```
`logs` operation 默认是有界读取;`--follow`/`-f` 会被拒绝,防止 CLI 长时间占用维护桥。目标 route 后面直接跟普通命令时,CLI 会把 argv 放到 `kubectl exec --` 后;显式 `exec` operation 可用于让命令边界更清晰。如果命令本身需要复杂 shell 语法优先改用 `script` operation,把脚本走 stdin,而不是把 `kubectl exec ... -- sh -c ...` 放进远端命令字符串。pod 内 `apply-patch` operation 使用同一个 sh helper,不要求目标容器自带 `python3``node` 或仓库里的工具脚本;它面向文本热修复,不用于大文件或二进制改写。
`logs` operation 默认是有界读取;`--follow`/`-f` 会被拒绝,防止 CLI 长时间占用维护桥。目标 route 后面直接跟普通命令时,CLI 会把 argv 放到 `kubectl exec --` 后;显式 `exec` operation 可用于让命令边界更清晰。`exec --stdin -- <command> ...` 是 workload route 的通用 stdin 流入口,适合把 tar、patch 以外的任意字节流直接送进容器命令;operation 选项必须放在 `--` 前,容器命令从 `--` 后开始。需要 shell 语法优先改用 `script` operation,把脚本走 stdin,而不是把 `kubectl exec ... -- sh -c ...` 放进远端命令字符串。pod 内 `apply-patch` operation 使用同一个 sh helper,不要求目标容器自带 `python3``node` 或仓库里的工具脚本;它面向文本热修复,不用于大文件或二进制改写。
`ssh <providerId> argv <command> [args...]` 是通用 argv 安全拼接入口;`exec` 是同义入口。它是非交互远端单进程命令的默认成功路径,不需要 shell 管道时直接传命令和参数,例如 `bun scripts/cli.ts ssh D601 argv true`。需要管道、重定向、变量展开或多条命令时,优先改用 `ssh <providerId> script <<'SCRIPT'``find``glob``apply-patch` 有专用入口;`rg``grep``sed``nl``stat``du``ls``cat``head``tail``wc``pwd` 可以直接作为 `ssh` 子命令使用,CLI 会对每个 argv token 做 shell quoting。旧的自由 ssh-like 远端命令入口只保留为近似原生 ssh 的人工兼容路径。
`ssh <providerId> argv <command> [args...]` 是通用 argv 安全拼接入口;`exec` 是同义入口。它是非交互远端单进程命令的默认成功路径,不需要 shell 管道时直接传命令和参数,例如 `bun scripts/cli.ts ssh D601 argv true`。需要管道、重定向、变量展开或多条命令时,优先改用 `ssh <providerId> script <<'SCRIPT'``find``glob``apply-patch` 有专用入口;`git``rg``grep``sed``nl``stat``du``ls``cat``head``tail``wc``pwd` 可以直接作为 `ssh` 子命令使用,CLI 会对每个 argv token 做 shell quoting。旧的自由 ssh-like 远端命令入口只保留为近似原生 ssh 的人工兼容路径。
通过 `ssh <providerId>` 执行多行脚本时,优先使用结构化 helper,例如 `bun scripts/cli.ts ssh G14 py < script.py``bun scripts/cli.ts ssh G14 script <<'SCRIPT'``bun scripts/cli.ts ssh G14:k3s script <<'SCRIPT'`。不要在远端命令字符串里再嵌套 heredoc、复杂引号或 `ssh 'python3 - <<EOF ...'` 形态;多层 shell 解析容易把 stdin 绑定到错误进程,结果会打开远端交互解释器并留下悬挂的 broker/SSH 会话。长脚本需要复用时,优先提交到 repo 或通过 stdin 传输到目标节点执行。
@@ -230,7 +231,7 @@ PATCH
`--main-server-ip` 是一个全局前缀,必须放在需要透传的命令同一次调用中,例如 `bun scripts/cli.ts --main-server-ip 74.48.78.17 debug health`。默认传输是公网 frontend:本地 CLI 读取本仓库 `config.json` 中的 frontend 登录账号密码,登录 `http://<ip>:<frontendPort>/` 获取 HttpOnly session cookie,然后通过 frontend 的 `/api/*` 同源代理访问 backend-core 内网 API;因此计算节点只需要能访问公网 frontend,不需要主 server SSH key,也不需要打开 backend-core REST API 或 PostgreSQL 端口。
默认 frontend 传输支持 `debug health``debug dispatch``debug task``artifact-registry status|health``ci publish-user-service --dry-run``microservice list/status/health/diagnostics/tunnel-self-test/proxy``decision upload/list/show/health``decision requirement list/upsert``decision diary import/list/history/months/show/edit/upsert``codex task <taskId>``codex tasks``codex unread``codex queues``codex output <taskId>``codex judge <taskId> --attempt N``ssh <PROVIDER_ID> <remote-command>``microservice status/health/diagnostics` 经 frontend 远程传输时也复用本地 CLI 的默认 compact summary`microservice health code-queue` 只有显式 `--raw``--full` 才返回完整健康 body。运行中纠偏 `codex steer` 属于 active run write control,应在主 server 本机 CLI 或显式 SSH 传输上执行,避免公网 frontend 透传限制 stdin/body 审计语义。其中 `ssh` 的 remote frontend 传输使用 `host.ssh` dispatch 执行有界远端命令,非交互单进程命令优先 `ssh D601 argv true`需要 stdin script、`py` `apply-patch` 这类 stdin-backed helper 时必须在主 server 本机运行,或显式切换到 `--main-server-transport ssh`。交互式登录 shell 仍应在主 server 本机 CLI 使用,或显式切换到旧 SSH 传输后在主 server 上执行。当 backend-core、database、provider-dispatch 或 provider-host-ssh 缺失时,这些 read-only 预检必须返回结构化 `runnerDisposition=infra-blocked` 和缺失通道列表,而不是裸 `No such container`。若确实需要旧行为,可使用 `--main-server-key <key>``--main-server-transport ssh`,这时 CLI 会通过 SSH 登录主 server 的 `--main-server-root` 目录执行同一个 `bun scripts/cli.ts <command>`
默认 frontend 传输支持 `debug health``debug dispatch``debug task``artifact-registry status|health``ci publish-user-service --dry-run``microservice list/status/health/diagnostics/tunnel-self-test/proxy``decision upload/list/show/health``decision requirement list/upsert``decision diary import/list/history/months/show/edit/upsert``codex task <taskId>``codex tasks``codex unread``codex queues``codex output <taskId>``codex judge <taskId> --attempt N``ssh <PROVIDER_ID> <remote-command>``microservice status/health/diagnostics` 经 frontend 远程传输时也复用本地 CLI 的默认 compact summary`microservice health code-queue` 只有显式 `--raw``--full` 才返回完整健康 body。运行中纠偏 `codex steer` 属于 active run write control,应在主 server 本机 CLI 或显式 SSH 传输上执行,避免公网 frontend 透传限制 stdin/body 审计语义。其中 `ssh` 的 remote frontend 传输使用 authenticated frontend `/ws/ssh` WebSocket 代理接入 backend-core SSH bridgestdout/stderr 按字节流直通到调用端,不经过 `/api/dispatch``/api/tasks` 或 task JSON compactfrontend 运行时必须通过 `PROVIDER_TOKEN`/`UNIDESK_PROVIDER_TOKEN``PROVIDER_TOKEN_FILE`/`UNIDESK_PROVIDER_TOKEN_FILE` 读取 provider token,并且不能把 token 下发给 runner。因此 D601 Code Queue runner 内的 `tran G14 ...` 应与主 server 本机 `tran G14 ...` 在输出完整性上保持同一语义。非交互单进程命令优先 `ssh D601 argv true`stdin script、`py` `apply-patch` 这类 stdin-backed helper 也走同一条 `/ws/ssh` 流式通道。交互式登录 shell 仍应在主 server 本机 CLI 使用,或显式切换到旧 SSH 传输后在主 server 上执行。当 backend-core、database、provider-dispatch 或 provider-host-ssh 缺失时,这些 read-only 预检必须返回结构化 `runnerDisposition=infra-blocked` 和缺失通道列表,而不是裸 `No such container`。若确实需要旧行为,可使用 `--main-server-key <key>``--main-server-transport ssh`,这时 CLI 会通过 SSH 登录主 server 的 `--main-server-root` 目录执行同一个 `bun scripts/cli.ts <command>`
计算节点可以用该入口测试自身的远程升级闭环,而不需要在计算节点公开 core REST API 或 database。标准顺序是:先运行 `bun scripts/cli.ts --main-server-ip 74.48.78.17 debug health` 确认主 server 看到当前 Provider 在线,且该 Provider labels 中 `unideskCapabilities` 包含 `host.ssh``hostSshConfigured=true``hostSshKeyPresent=true`;再运行 `bun scripts/cli.ts --main-server-ip 74.48.78.17 debug dispatch <PROVIDER_ID> provider.upgrade --mode schedule --wait-ms 15000` 触发真实 `provider.upgrade`;随后再次运行 `debug health` 确认节点重新上线;最后运行 `bun scripts/cli.ts --main-server-ip 74.48.78.17 debug dispatch <PROVIDER_ID> host.ssh --wait-ms 15000``bun scripts/cli.ts --main-server-ip 74.48.78.17 ssh <PROVIDER_ID> hostname` 验证 SSH 透传能力。provider-gateway 新部署或升级后没有完成这组 remote CLI 自测,不能视为交付完成。
+1 -1
View File
@@ -415,7 +415,7 @@ Code Queue task 不是只要 push 代码就算完成。
如果缺陷只存在于 Code Queue 执行环境,且服务可以在 dev 中安全热修而不触碰 prod,应先做最小临时 live remedy。然后把修复持久化到相关 Dockerfile、容器镜像或凭证传播路径,并在 dev 验证持久化修复后再关闭问题。
Code Queue runner 的分布式访问能力必须通过镜像内 `/usr/local/bin/tran` 固化,而不是依赖临时拷贝脚本或手工记忆命令前缀。runner 内 `tran` 走公网 frontend 控制面`/api/dispatch`,不要求 runner pod 持有 provider token,也不要求 runner pod 能解析 backend-core 集群内 DNS因此当 runner-local DNS/Secret 缺失而 frontend dispatch 仍可用时,应判为可恢复的 runner-local observation gap。runner 环境中的 remote frontend HTTP 读写默认使用 `curl` 后端,以降低部分 runner 上 Bun HTTP body 读取 native crash 的风险;这不是绕过控制面,route parser 和 dispatch payload 仍由 UniDesk CLI 生成。当前必需验收目标是 D601 的真实 Code Queue pod:至少执行 `tran D601 argv ...``tran G14 argv ...` 以及一个 `tran <provider>:k3s ...` 只读命令,证明 D601 runner 能跨 provider host 与 k3s route 透传。G14 runner 可作为后续兼容性观察,不作为该合同当前阻塞条件。
Code Queue runner 的分布式访问能力必须通过镜像内 `/usr/local/bin/tran` 固化,而不是依赖临时拷贝脚本或手工记忆命令前缀。runner 内 `tran` 走公网 frontend 控制面;其中 `ssh`/provider route 通过 authenticated frontend `/ws/ssh` WebSocket 代理接入 backend-core SSH bridgestdout/stderr 必须按字节流直通到 runner,不得再通过 `/api/dispatch``/api/tasks` 或 task JSON compact 返回。runner 不要求持有 provider token,也不要求能解析 backend-core 集群内 DNSfrontend 是唯一对 runner 暴露的控制面,provider token 只存在于 frontend/backend-core 运行侧。runner 环境中的非 SSH remote frontend HTTP 读写默认使用 `curl` 后端,以降低部分 runner 上 Bun HTTP body 读取 native crash 的风险;这不是绕过控制面,route parser、WebSocket open payload 和 API request 仍由 UniDesk CLI 生成。当前必需验收目标是 D601 的真实 Code Queue pod:至少执行 `tran D601 argv ...``tran G14 argv ...` 以及一个 `tran <provider>:k3s ...` 只读命令,并用长 stdout 的长度/哈希证明没有 `...<truncated:N>` 标记,证明 D601 runner 能跨 provider host 与 k3s route 透传。G14 runner 可作为后续兼容性观察,不作为该合同当前阻塞条件。
如果业务任务发现缺少工具、Secret/env、DNS、egress 或凭证路径,指挥官应把它拆成独立 infra task,并标记为 `runnerDisposition=infra-blocked` 或等价基础设施阻塞,而不是埋在业务任务 prompt 中。业务 runner 不应自行摸索 live Secret、打印 env/token、复制凭证命令、扩大网络出口或通过反复 rollout 猜测问题;它只能提交脱敏证据、说明缺失能力和等待指挥官或 infra lane 处理。业务任务在 bridge 存在时应继续推进。
+15
View File
@@ -12,6 +12,21 @@ The dev environment lets users experience the next UniDesk version without inter
- Dev backend-core and frontend rollout use pushed Git commits from `origin/master:deploy.json#environments.dev`, not dirty local worktrees.
- Rust backend-core check/build must run on D601 CI through `ci publish-backend-core`; dev CD consumes the published image and must not compile Rust.
## D601 UniDesk Workspace
`D601:UniDesk` 的固定开发 workspace 是 D601 节点上的 `/home/ubuntu/workspace/unidesk-dev`,固定使用 `master` 分支和 `origin git@github.com:pikasTech/unidesk.git`。所有需要在 D601 上改 UniDesk 代码、跑轻量合同测试、验证 `tran`/Code Queue runner、收敛分布式敏捷实验补丁的工作,都应先进入这个目录。
每次开始 D601 UniDesk 分布式开发、切换任务、恢复中断或上下文压缩后,先通过 UniDesk SSH 维护桥执行:
```bash
tran D601:/home/ubuntu/workspace/unidesk-dev git status --short --branch
tran D601:/home/ubuntu/workspace/unidesk-dev git remote -v
```
若路径、分支或 remote 不符合预期,先修正 fixed workspace,再继续。`/home/ubuntu/cq-deploy`、Code Queue pod 内 `/root/unidesk`/`/app`、D601 上的 `/root/unidesk``/tmp/unidesk-*` 只作为部署副本、运行副本或临时实验面;运行面热修可以直接作用在 pod/容器,但必须随后把持久化修复提交到 Git remote,并在 fixed workspace 中复验。
Master server 不作为 UniDesk 重型验证机。仓库级 check、Playwright/browser smoke、镜像构建、Rust/Go 编译和 Code Queue runner 实测必须放到 D601、CI runner 或其他获批执行面;master server 只做轻量源码编辑、Git 操作、状态观察和受控调度。
## Public Dev Frontend Port
The main server owns one extra public entrypoint for dev UI:
+2 -2
View File
@@ -42,7 +42,7 @@ UniDesk 用户服务是挂载到 UniDesk 核心服务上的、面向用户使用
业务仓库由业务系统自己维护,包括源码、Dockerfile、docker-compose、配置模板和业务测试。UniDesk 只引用业务仓库 URL、commit id、Dockerfile/docker-compose 路径和运行容器名;不得把业务全量代码复制到 `src/components/microservices/` 形成双维护。`src/components/microservices/` 只能放通用示例或 UniDesk 自有示例,不作为业务仓库镜像。
Code Queue runner 也是分布式开发执行面。runner 镜像必须内置 `tran`,让 runner 在执行任务时能通过公网 frontend 控制面访问 D601、G14、host workspace、k3s 控制面和目标 pod。runner 内应优先使用 `tran <provider> argv ...``tran <provider>:k3s kubectl ...``tran <provider>:k3s:<namespace>:<workload> argv ...` 这类无 stdin 的结构化命令;需要 stdin 的 `script``apply-patch``py` 操作仍由主 server/host 侧 `tran` 或显式 SSH transport 执行。这个边界避免把 provider token、backend-core 内网 DNS 或长命令多层引号作为 runner 可用性的前提。
Code Queue runner 也是分布式开发执行面。runner 镜像必须内置 `tran`,让 runner 在执行任务时能通过公网 frontend 控制面访问 D601、G14、host workspace、k3s 控制面和目标 pod。runner 内应优先使用 `tran <provider> argv ...``tran <provider>:k3s kubectl ...``tran <provider>:k3s:<namespace>:<workload> argv ...` 这类结构化命令;需要 stdin 的 `script``apply-patch``py` 操作同样通过 frontend `/ws/ssh` 流式通道执行,不应退回 `/api/dispatch` task polling。这个边界避免把 provider token、backend-core 内网 DNS 或长命令多层引号作为 runner 可用性的前提,也避免大 stdout 被 task JSON compact 截断
## Main Server User Services
@@ -221,7 +221,7 @@ D601 上必须显式使用原生 k3s kubeconfig`KUBECONFIG=/etc/rancher/k3s/k
- Skill 注入边界:DEV Code Queue scheduler/read/write Pod 必须把宿主 `/home/ubuntu/.agents/skills` 只读挂载到容器 `/root/.agents/skills`,并设置 `UNIDESK_SKILLS_PATH=/root/.agents/skills`,让执行任务能读取 `cli-spec` 等技能;只允许挂载 skill 目录本身,不得把宿主 `~/.agents``~/.codex`、token、auth JSON 或其他隐私配置整体暴露给任务容器。`/health``/api/dev-ready` 必须暴露非敏感 `skills` 状态:路径、exists、available、readonly、skillCount、`cliSpecAvailable` 和修复建议;CLI `codex dev-ready` 可读取该摘要。当前交付只要求 DEV manifest 和旧 direct Compose 诊断路径具备只读 skill 注入;PROD Code Queue 发布前必须单独审查隔离级别,不能把 DEV 桥接模式直接推广为生产默认。
- Develop-ready 镜像:Code Queue 镜像必须在启动前预装 UniDesk/Pipeline 调试所需工具,至少包含 `codex``bun``node``npm`/`npx``git``rg``curl``python3`/`pip3``docker``docker compose``docker-compose``jq``ssh``rsync``make``gcc`/`g++``iptables``tar``gzip``unzip`;不得依赖 Codex 任务运行时再 `apt-get install` 这些基础环境。
- 远程开发容器与任务执行 ProviderCode Queue 必须能通过 live API 拉起 D601 等计算节点上的开发容器,入口为 `POST /api/dev-containers/<providerId>/start`,默认 Provider 为 `D601`。该流程由 Code Queue 调用 UniDesk `ssh <providerId>` 维护桥在目标节点创建 `unidesk-codex-dev-<providerId>`,并在 Code Queue 所在节点与开发容器之间建立 `ssh -w` TUN 点对点链路;服务所在节点负责对开发容器的 TUN 源地址做 NAT/MASQUERADE,开发容器默认路由和 DNS 改走该 TUN,从而让 `ping google.com`、DNS、HTTP(S) 等出网都经主 server 全局代理,而不是依赖 D601 本地网络。提交 Code Queue 任务时必须支持选择执行 Provider:`D601` 在 D601 原生 k3s 的 active Code Queue scheduler/runner Pod 中本机执行,默认工作目录为 `/workspace`,并且 `/workspace` 必须映射 D601 WSL host 的 `/home/ubuntu`;同一个 hostPath 还必须挂载到容器内 `/home/ubuntu`,让 WSL home 里的绝对 symlink(例如 `/workspace/cq-deploy -> /home/ubuntu/unidesk-code-queue-deploy`)在任务中可解析,不能只看到 symlink 名而无法进入目标目录。`/root/unidesk``/app` 必须单独映射 `/home/ubuntu/cq-deploy` 作为服务部署仓库;其他 Provider 在对应 `unidesk-codex-dev-<providerId>` 容器中执行,默认工作目录为 `/home/ubuntu`,可按任务覆盖 `cwd`。远程任务启动前必须自动复用或拉起该 Provider 的开发容器、同步 Codex 配置和允许的运行时 provider 环境变量,并通过同一 master TUN/NAT 链路出网;目标 host 存在 `/mnt` 时,开发容器必须挂载 host `/mnt:/mnt`,确保 D601 这类 WSL 节点的 Windows 盘符路径如 `/mnt/f/Work/ConStart` 在任务容器内可见,避免 agent 因缺少真实工作区而搜索到无关项目。TUN 建立必须幂等处理 stale 状态:启动前清理旧 `tun<id>`、默认路由、旧 tunnel SSH 进程和旧 OUTPUT 跳转,缺失旧设备不能导致失败,冷启动运行时准备要有有界但足够的 timeout。TUN 建立后必须创建 `UD-CQ-EGRESS-<provider>` OUTPUT 链,规则只允许 loopback、既有连接、`tun<id>` 出口以及到 master server 的 SSH tunnel 控制连接,随后 reject 其他 IPv4/IPv6 出站包;这条网络层封口是开发/执行容器的权威外网边界,不能用 `HTTP_PROXY`/`NO_PROXY` 环境变量替代,容器镜像也必须使用已解析出的唯一 `unidesk-code-queue:<provider>` 或显式 `image`,缺失时直接失败,禁止 provider-gateway image、`latest` 或其他隐式镜像 fallback。验收必须保留三类日志:容器建隧道后 `ping google.com` 成功、强制指定原 Docker 网卡直连外网被 `sealed_direct_ping=blocked_expected` 拦截、服务所在节点上对应 `UNIDESK-CODEX-DEV-<providerId>` NAT 链或 `tun<id>` 计数在 ping 前后增长;涉及 WSL 工作区任务时还必须在开发容器内验证目标 `/mnt/...` 路径可读。`GET /api/dev-containers/<providerId>/status` 必须展示默认路由、`route_8_8_8_8``egressFirewallChain` 和 OUTPUT 链跳转。开发容器代理密钥只生成到 `.state/code-queue/dev-proxy/` 与目标节点用户目录,不得提交到仓库。
- 远程维护桥调用:Code Queue 已迁移到 D601 后,Code Queue 后端 Pod 内没有主 server 的 `unidesk-backend-core` 容器,不能再把 `bun scripts/cli.ts ssh ...` 实现为本地 `docker exec unidesk-backend-core`。Code Queue 后端发起的 provider 维护命令必须通过主 server frontend `/api/dispatch` 进入 backend-core,再由目标 provider-gateway 执行 `host.ssh`;需要传递脚本时必须使用 base64 临时文件,超过 Host SSH 单命令长度上限时分块上传到目标 `/tmp` 后再执行,避免恢复到本地 Docker broker、交互 stdin 或手工 shell fallback。
- 远程维护桥调用:Code Queue 已迁移到 D601 后,Code Queue 后端 Pod 内没有主 server 的 `unidesk-backend-core` 容器,不能再把 `bun scripts/cli.ts ssh ...` 实现为本地 `docker exec unidesk-backend-core`。Code Queue runner 发起的 provider 维护命令必须通过主 server frontend authenticated `/ws/ssh` 流式代理进入 backend-core SSH bridge,再由目标 provider-gateway 执行 Host SSH/WSL SSHstdout/stderr 直接流回 runner,不能经过 `/api/dispatch` task polling 或 JSON compact。需要传递脚本、`py``apply-patch` 时也使用同一条 stdin 流式通道,避免恢复到本地 Docker broker、手工 base64 分块上传、交互 shell fallback 或多层引号
- 远程 Provider 准备不得阻塞控制面:Code Queue 在请求处理、队列调度、远程开发容器准备、Host SSH/WSL SSH 透传、Codex/OpenCode 启动和日志导出路径中,禁止使用会长时间占用 Bun event loop 的同步子进程调用,例如针对远程 Provider 的 `spawnSync``execSync``execFileSync`。远程命令必须通过异步子进程执行,带显式 timeout、超时 kill、stdout/stderr 上限和任务 output 进度记录;远程准备失败只能让对应任务进入失败或 retry,不能让 `POST /api/tasks`、SSE `/api/events``/health`、overview 或 frontend/core 用户服务代理等控制面请求等待远程 SSH 结束。凡是改动 D601/远程 Provider 准备、`api/dev-containers/*`、任务入队启动或 `runCodeQueueSsh` 等路径,验收必须在一个远程 SSH/status/start 探针运行期间并发验证容器直连 `/health``/api/tasks/overview` 仍能在 1s 内返回,证明远程超时不会复发为全站刷新卡死。
- OpenCode 远程执行:`minimax-m2.7` 走 OpenCode JSON event port 时,本地和远程命令都必须显式执行 `opencode run ...`;远程 Docker exec 不得退化成 `exec run ...`,否则会在目标容器内变成 `bash: exec: run: not found`。OpenCode JSON stream 的终态判定以“当前进程退出码 + 当前 attempt 的最终 assistant response”为准:`exit=0` 且当前 attempt 产生非空最终回复时,即使上游没有发 `step_finish` 事件,也应视为正常 terminal;非零退出、无当前最终回复或传输关闭才进入 retry。每个 attempt 的 `finalResponse` 必须只来自当前 OpenCode/Codex turn,禁止在当前 turn 未产出最终回复时回退复用 task 上一次 `finalResponse`,否则会把旧任务内容误判为本轮完成。
- Codex 控制:服务内部启动 `codex app-server --listen stdio://`,用 JSON-RPC 调用 `thread/start``turn/start``turn/steer``turn/interrupt`,并监听 `turn/completed`、assistant delta、reasoning delta、command output delta、file diff delta 等通知生成前端可轮询的 transcript。
+2
View File
@@ -162,6 +162,7 @@ export function sshHelp(): unknown {
"bun scripts/cli.ts ssh D601:k3s script <<'SCRIPT'",
"bun scripts/cli.ts ssh D601:k3s:hwlab-dev:hwlab-cloud-api/app pwd",
"bun scripts/cli.ts ssh D601:k3s:hwlab-dev:hwlab-cloud-api apply-patch <<'PATCH'",
"tar -C /path/to/files -cf - . | bun scripts/cli.ts ssh D601:k3s:unidesk:code-queue/root/unidesk exec --stdin -- tar -xf - -C /root/unidesk",
"bun scripts/cli.ts ssh D601:k3s:hwlab-dev:hwlab-cloud-api node -e 'console.log(process.version)'",
"bun scripts/cli.ts ssh D601:k3s:hwlab-dev:hwlab-cloud-api script <<'SCRIPT'",
"bun scripts/cli.ts ssh D601:k3s:hwlab-dev:hwlab-cloud-api logs --tail 80",
@@ -169,6 +170,7 @@ export function sshHelp(): unknown {
notes: [
"ssh --help and ssh <route> --help print this JSON help and never open an interactive session.",
"For non-interactive remote commands, prefer argv for a single process and script/stdin for shell logic.",
"For arbitrary stdin streams into a workload command, use a workload route plus `exec --stdin -- <command> ...`; this keeps the route as location-only and avoids heredoc/base64/tar shell wrapping.",
"apply-patch rejects low-context update hunks by default, reports the matched file:line for each hunk on stderr, and only accepts --allow-loose when the caller has manually reviewed an intentionally ambiguous insertion.",
"script defaults to target /bin/sh and inherits provider proxy variables such as HTTP_PROXY/HTTPS_PROXY/ALL_PROXY/NO_PROXY; use --shell bash only for bash syntax such as pipefail, arrays, or [[ ... ]], not as a proxy workaround.",
"Route syntax is `{provider}:{plane}[:{scope...}] {operation} [operation-args...]`: the first argv token locates a distributed target only, and every following token belongs to the operation parser. Host workspace routes use `<provider>:/absolute/workspace`; native k3s providers such as D601 and G14 use <provider>:k3s for the control plane, <provider>:k3s:<namespace>:<workload> for a workload, and <provider>:k3s:<namespace>:<workload>/<pod-workspace> for a pod workspace.",
+168 -138
View File
@@ -6,7 +6,7 @@ import { type UniDeskConfig } from "./config";
import { type DebugDispatchCommand, isDebugDispatchCommand } from "./debug";
import { summarizeMicroserviceHealthResponse, summarizeMicroserviceObservation, summarizeMicroserviceProxyResponse } from "./microservices";
import { parseNetworkPerfOptions, runNetworkPerf } from "./network-perf";
import { formatSshFailureHint, isSshSkillDiscoveryArgs, parseSshInvocation, sshFailureHint } from "./ssh";
import { formatSshFailureHint, parseSshInvocation, sshFailureHint, wrapSshRemoteCommand } from "./ssh";
import { codexJudgeQueryAsync, codexOutputQueryAsync, codexPrPreflightQueryAsync, codexQueuesQueryAsync, codexTaskQueryAsync, codexTasksQueryAsync, codexUnreadTriageAsync } from "./code-queue";
import { runDecisionCenterCommandAsync } from "./decision-center";
import {
@@ -536,95 +536,6 @@ function jsonOption(args: string[], name: string): Record<string, unknown> | und
return parsed as Record<string, unknown>;
}
const compactSkillDiscoverPython = String.raw`import os,json,socket,platform,getpass
from pathlib import Path as P
S=os.getenv('S','all');L=int(os.getenv('L','0'));D=int(os.getenv('D','4'));skip={'node_modules','.git','.state','logs','references','__pycache__'}
def isw():
try:r=P('/proc/sys/kernel/osrelease').read_text(errors='ignore').lower()
except Exception:r=''
return 'microsoft' in r or 'wsl' in r or 'WSL_INTEROP' in os.environ
def wp(p):
s=str(p)
return s[5].upper()+':\\'+s[7:].replace('/','\\') if s.startswith('/mnt/') and len(s)>6 and s[5].isalpha() and s[6]=='/' else None
def md(f):
n=f.parent.name;d=''
try:ls=f.read_text(errors='replace')[:8192].splitlines()
except Exception:ls=[]
if ls and ls[0].strip()=='---':
for l in ls[1:]:
if l.strip()=='---':break
if ':' in l:
k,v=l.split(':',1);k=k.strip().lower();v=v.strip().strip('"\' ')
if k=='name' and v:n=v
if k=='description' and v:d=v
if not d:
for l in ls:
x=l.strip()
if x and not x.startswith('---') and not x.startswith('#'):
d=x;break
return n,d
def scan(sc,root):
rec={'scope':sc,'path':str(root),'windowsPath':wp(root),'exists':False,'skillCount':0,'error':None};out=[]
try:rec['exists']=root.exists()
except Exception as e:rec['error']=str(e)
if not rec['exists']:return rec,out
try:
for f in root.rglob('SKILL.md'):
rel=f.relative_to(root).parts[:-1]
if not rel or len(rel)>D or any(x in skip for x in rel):continue
n,d=md(f);out.append({'scope':sc,'name':n,'description':d,'path':str(f.parent),'skillMd':str(f),'windowsPath':wp(f.parent),'root':str(root)})
except Exception as e:rec['error']=str(e)
rec['skillCount']=len(out);return rec,out
roots=[];h=P.home()
if S!='windows':roots += [('wsl',h/'.agents/skills'),('wsl',h/'.codex/skills'),('wsl',P('/root/.agents/skills')),('wsl',P('/root/.codex/skills'))]
if S!='wsl' and isw():
try:users=list(P('/mnt/c/Users').iterdir())
except Exception:users=[]
for u in users:
if u.is_dir() and u.name.lower() not in {'all users','default','default user','public'}:roots += [('windows',u/'.agents/skills'),('windows',u/'.codex/skills')]
seen=set();rr=[];ss=[]
for sc,r in roots:
k=(sc,str(r))
if k in seen:continue
seen.add(k);a,b=scan(sc,r);rr.append(a);ss+=b
ss.sort(key=lambda x:(0 if x['scope']=='wsl' else 1,x['name'].lower(),x['path']));tb=len(ss)
if L>0:ss=ss[:L]
c={'total':len(ss),'totalBeforeLimit':tb,'wsl':sum(1 for x in ss if x['scope']=='wsl'),'windows':sum(1 for x in ss if x['scope']=='windows')}
print(json.dumps({'ok':True,'command':'unidesk ssh skills','node':{'hostname':socket.gethostname(),'user':getpass.getuser(),'home':str(P.home()),'platform':platform.platform(),'isWsl':isw()},'counts':c,'roots':rr,'skills':ss},ensure_ascii=False,indent=2))`;
function remoteFrontendSkillDiscoverCommand(args: string[]): string {
let scope = "all";
let limit = 0;
let maxDepth = 4;
const start = args[0] === "skill" ? 2 : 1;
for (let index = start; index < args.length; index += 1) {
const arg = args[index] ?? "";
const next = args[index + 1];
if (arg === "--scope") {
if (next !== "all" && next !== "wsl" && next !== "windows") throw new Error("ssh skills --scope must be one of: all, wsl, windows");
scope = next;
index += 1;
continue;
}
if (arg === "--limit") {
const value = Number(next);
if (!Number.isInteger(value) || value < 0) throw new Error("ssh skills --limit must be >= 0");
limit = value;
index += 1;
continue;
}
if (arg === "--max-depth") {
const value = Number(next);
if (!Number.isInteger(value) || value <= 0) throw new Error("ssh skills --max-depth must be positive");
maxDepth = value;
index += 1;
continue;
}
throw new Error(`remote frontend ssh skills does not support option: ${arg}`);
}
return `S=${shellQuote(scope)} L=${shellQuote(String(limit))} D=${shellQuote(String(maxDepth))} python3 -c ${shellQuote(compactSkillDiscoverPython)}`;
}
function dispatchPayload(args: string[], command: DebugDispatchCommand): Record<string, unknown> {
const explicit = jsonOption(args, "--payload-json") ?? {};
if (command === "provider.upgrade") {
@@ -951,12 +862,178 @@ async function remoteNetworkPerf(options: RemoteCliOptions, config: UniDeskConfi
};
}
function frontendSshWebSocketUrl(session: FrontendSession): string {
const url = new URL("/ws/ssh", session.baseUrl);
url.protocol = url.protocol === "https:" ? "wss:" : "ws:";
return url.toString();
}
function webSocketDataText(data: unknown): string {
if (typeof data === "string") return data;
if (data instanceof ArrayBuffer) return Buffer.from(data).toString("utf8");
if (ArrayBuffer.isView(data)) return Buffer.from(data.buffer, data.byteOffset, data.byteLength).toString("utf8");
return String(data);
}
function openFrontendSshWebSocket(session: FrontendSession): WebSocket {
const WebSocketWithHeaders = WebSocket as unknown as new (
url: string,
options?: { headers?: Record<string, string> },
) => WebSocket;
return new WebSocketWithHeaders(frontendSshWebSocketUrl(session), { headers: { cookie: session.cookie } });
}
async function runRemoteSshWebSocket(
session: FrontendSession,
invocation: ReturnType<typeof parseSshInvocation>,
): Promise<number> {
const parsed = invocation.parsed;
const size = {
cols: Number(process.stdout.columns) > 0 ? Number(process.stdout.columns) : 100,
rows: Number(process.stdout.rows) > 0 ? Number(process.stdout.rows) : 30,
};
const openTimeoutMs = Math.max(15000, Number(process.env.UNIDESK_SSH_OPEN_TIMEOUT_MS || 60000));
const payload = {
providerId: invocation.providerId,
command: wrapSshRemoteCommand(parsed.remoteCommand),
cwd: invocation.route.plane === "host" ? invocation.route.workspace ?? undefined : undefined,
tty: parsed.remoteCommand === null,
stdinEotOnEnd: parsed.remoteCommand !== null,
openTimeoutMs,
cols: size.cols,
rows: size.rows,
};
const ws = openFrontendSshWebSocket(session);
let exitCode = 255;
let settled = false;
let canSend = false;
let sessionReady = false;
const pending: string[] = [];
const pendingSessionMessages: string[] = [];
const send = (value: unknown): void => {
const text = JSON.stringify(value);
if (!canSend || ws.readyState !== WebSocket.OPEN) {
pending.push(text);
return;
}
ws.send(text);
};
const flush = (): void => {
while (pending.length > 0 && ws.readyState === WebSocket.OPEN) ws.send(pending.shift()!);
};
const sendInput = (value: Buffer | string): void => {
sendWhenSessionReady({ type: "ssh.input", data: Buffer.from(value).toString("base64"), encoding: "base64" });
};
const sendWhenSessionReady = (value: unknown): void => {
const text = JSON.stringify(value);
if (!sessionReady || ws.readyState !== WebSocket.OPEN) {
pendingSessionMessages.push(text);
return;
}
ws.send(text);
};
const flushSessionMessages = (): void => {
if (!sessionReady || ws.readyState !== WebSocket.OPEN) return;
while (pendingSessionMessages.length > 0) ws.send(pendingSessionMessages.shift()!);
};
return await new Promise<number>((resolve) => {
const rawMode = parsed.remoteCommand === null && process.stdin.isTTY && typeof process.stdin.setRawMode === "function";
const openTimer = setTimeout(() => {
if (sessionReady || settled) return;
process.stderr.write("unidesk remote frontend ssh bridge timed out waiting for provider session\n");
exitCode = 255;
try {
ws.close();
} catch {
// Ignore close failures while resolving the timeout path.
}
}, openTimeoutMs);
const restore = (): void => {
clearTimeout(openTimer);
process.stdin.off("data", onStdinData);
process.stdin.off("end", onStdinEnd);
if (rawMode) process.stdin.setRawMode(false);
};
const finish = (code: number): void => {
if (settled) return;
settled = true;
restore();
const hint = sshFailureHint(invocation.providerId, parsed, code, "");
if (hint !== null) process.stderr.write(formatSshFailureHint(hint));
resolve(code);
};
const onStdinData = (chunk: Buffer): void => {
sendInput(chunk);
};
const onStdinEnd = (): void => {
if (parsed.stdinSuffix) sendInput(parsed.stdinSuffix);
if (payload.stdinEotOnEnd === true) sendInput(Buffer.from([4]));
sendWhenSessionReady({ type: "ssh.eof" });
};
ws.addEventListener("open", () => {
canSend = true;
send({ type: "ssh.open", ...payload });
flush();
});
ws.addEventListener("message", (event) => {
const text = webSocketDataText(event.data);
let message: Record<string, unknown>;
try {
message = JSON.parse(text) as Record<string, unknown>;
} catch {
process.stderr.write(`${text}\n`);
return;
}
if (message.type === "ssh.dispatched") return;
if (message.type === "ssh.opened") {
sessionReady = true;
clearTimeout(openTimer);
flushSessionMessages();
return;
}
if (message.type === "ssh.data") {
const chunk = Buffer.from(String(message.data ?? ""), message.encoding === "base64" ? "base64" : "utf8");
if (message.stream === "stderr") process.stderr.write(chunk);
else process.stdout.write(chunk);
return;
}
if (message.type === "ssh.error") {
process.stderr.write(`${String(message.message || "ssh bridge error")}\n`);
exitCode = 255;
ws.close();
return;
}
if (message.type === "ssh.exit") {
exitCode = Number.isInteger(message.exitCode) ? Number(message.exitCode) : 255;
ws.close();
}
});
ws.addEventListener("close", () => finish(exitCode));
ws.addEventListener("error", () => {
process.stderr.write("unidesk remote frontend ssh bridge websocket error\n");
finish(255);
});
if (rawMode) process.stdin.setRawMode(true);
process.stdin.resume();
if (parsed.stdinPrefix) sendInput(parsed.stdinPrefix);
process.stdin.on("data", onStdinData);
process.stdin.on("end", onStdinEnd);
});
}
export function remoteSshFrontendPlanForTest(target: string, args: string[]): Record<string, unknown> {
const invocation = parseSshInvocation(target, args);
return {
transport: "frontend-websocket",
providerId: invocation.providerId,
route: invocation.route,
remoteCommand: invocation.parsed.remoteCommand,
wrappedRemoteCommand: wrapSshRemoteCommand(invocation.parsed.remoteCommand),
requiresStdin: invocation.parsed.requiresStdin,
invocationKind: invocation.parsed.invocationKind,
payloadCwd: invocation.route.plane === "host" ? invocation.route.workspace : null,
@@ -966,54 +1043,7 @@ export function remoteSshFrontendPlanForTest(target: string, args: string[]): Re
async function runRemoteSshOverFrontend(session: FrontendSession, target: string | undefined, args: string[]): Promise<number> {
if (!target) throw new Error("remote ssh requires a route, for example: bun scripts/cli.ts --main-server-ip 74.48.78.17 ssh D601 hostname");
const invocation = parseSshInvocation(target, args);
const parsed = invocation.parsed;
if (parsed.requiresStdin) {
process.stderr.write("remote frontend transport does not stream stdin for ssh helper subcommands such as script, apply-patch or py; run the command on the main server, use --main-server-transport ssh, or use an argv/pod-route operation that does not need stdin\n");
return 255;
}
if (parsed.remoteCommand === null) {
process.stderr.write("remote frontend transport supports ssh remote commands only; pass a command such as: ssh D601 hostname\n");
return 255;
}
if (args[0] === "glob") {
process.stderr.write("remote frontend transport does not support the ssh glob helper because host.ssh exec has a short command-length limit; run it on the main server CLI instead\n");
return 255;
}
const remoteCommand = isSshSkillDiscoveryArgs(args) ? remoteFrontendSkillDiscoverCommand(args) : parsed.remoteCommand;
const dispatch = await frontendJson(session, "/api/dispatch", {
method: "POST",
body: JSON.stringify({
providerId: invocation.providerId,
command: "host.ssh",
payload: {
source: "cli-remote-ssh",
mode: "exec",
command: remoteCommand,
...(invocation.route.plane === "host" && invocation.route.workspace !== null ? { cwd: invocation.route.workspace } : {}),
timeoutMs: isSshSkillDiscoveryArgs(args) ? 30000 : 15000,
},
}),
});
const taskId = (dispatch as { body?: { taskId?: string } }).body?.taskId ?? "";
if (!dispatch.ok || taskId.length === 0) {
process.stderr.write(`${JSON.stringify(dispatch, null, 2)}\n`);
return 255;
}
const wait = await waitForFrontendTask(session, taskId, 20_000);
const task = (wait as { task?: { status?: string; result?: Record<string, unknown>; message?: string } }).task;
const result = task?.result ?? {};
const stdout = typeof result.stdout === "string" ? result.stdout : "";
const stderr = typeof result.stderr === "string" ? result.stderr : "";
if (stdout.length > 0) process.stdout.write(stdout);
if (stderr.length > 0) process.stderr.write(stderr);
if (task?.status !== "succeeded") {
if (stdout.length === 0 && stderr.length === 0) process.stderr.write(`${JSON.stringify({ taskId, task }, null, 2)}\n`);
const exitCode = typeof result.exitCode === "number" ? result.exitCode : 255;
const hint = sshFailureHint(invocation.providerId, parsed, exitCode, stderr.length > 0 ? stderr : String(task?.message ?? ""));
if (hint !== null) process.stderr.write(formatSshFailureHint(hint));
return exitCode;
}
return typeof result.exitCode === "number" ? result.exitCode : 0;
return runRemoteSshWebSocket(session, invocation);
}
async function runRemoteCliOverFrontend(options: RemoteCliOptions, config: UniDeskConfig): Promise<number> {
+32 -2
View File
@@ -39,7 +39,7 @@ export interface SshFailureHint {
note: string;
}
const argvQuotedSshSubcommands = new Set(["rg", "grep", "sed", "nl", "stat", "du", "ls", "cat", "head", "tail", "wc", "pwd"]);
const argvQuotedSshSubcommands = new Set(["git", "rg", "grep", "sed", "nl", "stat", "du", "ls", "cat", "head", "tail", "wc", "pwd"]);
const nativeK3sKubeconfig = "/etc/rancher/k3s/k3s.yaml";
const k3sResourceKindAliases = new Set(["pod", "po", "pods", "deployment", "deploy", "deployments", "statefulset", "sts", "daemonset", "ds", "job", "jobs"]);
const legacyK3sOperationRouteSegments = new Set([
@@ -1018,7 +1018,12 @@ function parseK3sTargetOperation(route: ParsedSshRoute, args: string[]): ParsedS
}
if (operation === "kubectl") throw new Error(`ssh k3s kubectl is a control-plane operation; use ssh ${route.providerId}:k3s kubectl ...`);
if (operation === "exec") {
return { remoteCommand: buildK3sExecCommand([...targetArgs, ...k3sRouteCommandArgs(operationArgs)]), requiresStdin: false, invocationKind: "helper" };
const execArgs = k3sRouteExecOperationArgs(operationArgs);
return {
remoteCommand: buildK3sExecCommand([...targetArgs, ...execArgs]),
requiresStdin: execArgs.includes("--stdin") || execArgs.includes("-i"),
invocationKind: "helper",
};
}
return { remoteCommand: buildK3sExecCommand([...targetArgs, ...k3sRouteCommandArgs(args)]), requiresStdin: false, invocationKind: "helper" };
}
@@ -1045,6 +1050,31 @@ function k3sRouteCommandArgs(args: string[]): string[] {
return args[0] === "--" ? args : ["--", ...args];
}
function k3sRouteExecOperationArgs(args: string[]): string[] {
if (args.length === 0) throw new Error("ssh k3s target exec operation requires a command to exec");
const result: string[] = [];
for (let index = 0; index < args.length; index += 1) {
const arg = args[index] ?? "";
if (arg === "--") {
if (index === args.length - 1) throw new Error("ssh k3s target exec operation requires a command after --");
return [...result, ...args.slice(index)];
}
if (arg === "--stdin" || arg === "-i" || arg === "--tty" || arg === "-t") {
result.push(arg);
continue;
}
if (arg === "--container" || arg === "-c" || arg === "--workdir" || arg === "--cwd") {
const value = args[index + 1];
if (value === undefined || value.length === 0) throw new Error(`ssh k3s target exec ${arg} requires a value`);
result.push(arg, value);
index += 1;
continue;
}
return [...result, "--", ...args.slice(index)];
}
throw new Error("ssh k3s target exec operation requires a command to exec");
}
function buildK3sCommand(providerId: string, args: string[]): string {
const action = args[0] ?? "";
if (action.length === 0 || action === "--help" || action === "-h" || action === "help") {
+24 -1
View File
@@ -249,6 +249,10 @@ export function runSshArgvGuidanceContract(): JsonRecord {
assertCondition(routeApplyPatchWorkspace.parsed.requiresStdin === true, "pod workspace apply-patch must still stream patch stdin", routeApplyPatchWorkspace);
assertCondition(routeApplyPatchWorkspace.parsed.remoteCommand === "'env' 'KUBECONFIG=/etc/rancher/k3s/k3s.yaml' 'kubectl' 'exec' '-i' '-n' 'hwlab-dev' 'deployment/hwlab-cloud-api' '--' 'sh' '-c' 'cd \"$1\" || exit; shift; exec \"$@\"' 'unidesk-cwd' '/app' 'sh' '-s' '--'", "pod workspace apply-patch must set cwd before injecting the sh helper", routeApplyPatchWorkspace);
const routeExecStdin = parseSshInvocation("D601:k3s:unidesk:code-queue/root/unidesk", ["exec", "--stdin", "--", "tar", "-xf", "-", "-C", "/root/unidesk"]);
assertCondition(routeExecStdin.parsed.requiresStdin === true, "pod route exec --stdin must stream local stdin into kubectl exec", routeExecStdin);
assertCondition(routeExecStdin.parsed.remoteCommand === "'env' 'KUBECONFIG=/etc/rancher/k3s/k3s.yaml' 'kubectl' 'exec' '-n' 'unidesk' 'deployment/code-queue' '-i' '--' 'sh' '-c' 'cd \"$1\" || exit; shift; exec \"$@\"' 'unidesk-cwd' '/root/unidesk' 'tar' '-xf' '-' '-C' '/root/unidesk'", "pod route exec --stdin must keep exec flags before -- and command argv after --", routeExecStdin);
const sshLike = parseSshArgs(["echo hello"]);
const hint = sshFailureHint("D601", sshLike, 255, "kex_exchange_identification: Connection closed by remote host");
assertCondition(hint !== null, "ssh-like kex failure must produce a hint", sshLike);
@@ -271,6 +275,7 @@ export function runSshArgvGuidanceContract(): JsonRecord {
assertCondition(helpText.includes("ssh D601:k3s:hwlab-dev:hwlab-cloud-api/app pwd"), "ssh help must document k3s pod workspace route", helpText);
assertCondition(helpText.includes("ssh D601:k3s script <<'SCRIPT'"), "ssh help must document k3s control-plane script operation", helpText);
assertCondition(helpText.includes("ssh D601:k3s:hwlab-dev:hwlab-cloud-api apply-patch <<'PATCH'"), "ssh help must document k3s pod apply-patch operation", helpText);
assertCondition(helpText.includes("ssh D601:k3s:unidesk:code-queue/root/unidesk exec --stdin -- tar -xf - -C /root/unidesk"), "ssh help must document one-step stdin file streaming into pod exec", helpText);
assertCondition(helpText.includes("apply-patch [--allow-loose]") && helpText.includes("low-context update hunks"), "ssh help must document apply-patch loose-context guard", helpText);
assertCondition(helpText.includes("ssh D601:k3s:hwlab-dev:hwlab-cloud-api script <<'SCRIPT'"), "ssh help must document k3s script operation", helpText);
assertCondition(helpText.includes("UNIDESK_SSH_HINT"), "ssh help must document structured failure hint", helpText);
@@ -279,8 +284,10 @@ export function runSshArgvGuidanceContract(): JsonRecord {
assertCondition(crossChecks.includes("bun scripts/cli.ts ssh D601 argv true"), "provider triage cross-checks must keep argv true", crossChecks);
const frontendRemoteK3sPlan = remoteSshFrontendPlanForTest("D601:k3s", ["kubectl", "get", "nodes", "-o", "name"]);
assertCondition(frontendRemoteK3sPlan.transport === "frontend-websocket", "remote frontend ssh must use the streaming websocket bridge", frontendRemoteK3sPlan);
assertCondition(frontendRemoteK3sPlan.providerId === "D601", "remote frontend ssh must dispatch route target to the provider id", frontendRemoteK3sPlan);
assertCondition(frontendRemoteK3sPlan.remoteCommand === "'env' 'KUBECONFIG=/etc/rancher/k3s/k3s.yaml' 'kubectl' 'get' 'nodes' '-o' 'name'", "remote frontend ssh must preserve k3s route command construction", frontendRemoteK3sPlan);
assertCondition(String(frontendRemoteK3sPlan.wrappedRemoteCommand ?? "").includes("UNIDESK_SSH_TOOL_DIR=/tmp/unidesk-ssh-tools"), "remote frontend ssh must use the same remote tool bootstrap as local ssh", frontendRemoteK3sPlan);
const frontendRemotePodArgvPlan = remoteSshFrontendPlanForTest("G14:k3s:unidesk:code-queue", ["argv", "sh", "-c", "command -v tran"]);
assertCondition(frontendRemotePodArgvPlan.providerId === "G14", "remote frontend pod route must dispatch through G14 provider", frontendRemotePodArgvPlan);
@@ -296,6 +303,20 @@ export function runSshArgvGuidanceContract(): JsonRecord {
const remoteSource = readFileSync(new URL("./src/remote.ts", import.meta.url), "utf8");
assertCondition(remoteSource.includes("UNIDESK_REMOTE_HTTP_CLIENT") && remoteSource.includes("isCodeQueueRunnerEnv(env) ? \"curl\" : \"fetch\""), "remote frontend transport must default to curl HTTP in Code Queue runner environments", remoteSource);
assertCondition(remoteSource.includes("frontendSshWebSocketUrl") && remoteSource.includes("runRemoteSshWebSocket"), "remote frontend ssh must go through the streaming websocket implementation", remoteSource);
assertCondition(!remoteSource.includes("remote frontend transport does not stream stdin"), "remote frontend ssh must not reject stdin-backed helpers", remoteSource);
assertCondition(!remoteSource.includes("source: \"cli-remote-ssh\""), "remote frontend ssh must not use host.ssh dispatch task polling", remoteSource);
const frontendSource = readFileSync(new URL("../src/components/frontend/src/index.ts", import.meta.url), "utf8");
assertCondition(frontendSource.includes('url.pathname === "/ws/ssh"') && frontendSource.includes("proxySshWebSocket"), "frontend must expose an authenticated /ws/ssh proxy", frontendSource);
assertCondition(frontendSource.includes("coreSshWebSocketUrl") && frontendSource.includes('url.searchParams.set("token"'), "frontend /ws/ssh proxy must connect to backend-core ssh bridge with the provider token", frontendSource);
assertCondition(frontendSource.includes("PROVIDER_TOKEN_FILE") && frontendSource.includes("/run/secrets/unidesk_provider_token"), "frontend ssh proxy must support file-based provider token injection for runtime hotfix and secret mounts", frontendSource);
const composeSource = readFileSync(new URL("../docker-compose.yml", import.meta.url), "utf8");
assertCondition(composeSource.includes('PROVIDER_TOKEN: "${UNIDESK_PROVIDER_TOKEN}"'), "frontend compose service must receive provider token for the ssh proxy", composeSource);
const devCoreManifest = readFileSync(new URL("../src/components/microservices/k3sctl-adapter/k3s/dev/unidesk-dev-core.k8s.yaml", import.meta.url), "utf8");
assertCondition(devCoreManifest.includes("name: frontend-dev") && devCoreManifest.includes("name: PROVIDER_TOKEN"), "dev frontend manifest must receive provider token for the ssh proxy", devCoreManifest);
const codeQueueDockerfile = readFileSync(new URL("../src/components/microservices/code-queue/Dockerfile", import.meta.url), "utf8");
assertCondition(codeQueueDockerfile.includes("COPY scripts/tran /usr/local/bin/tran") && codeQueueDockerfile.includes("chmod 755 /usr/local/bin/tran"), "Code Queue runner image must install tran on PATH", codeQueueDockerfile);
@@ -306,6 +327,7 @@ export function runSshArgvGuidanceContract(): JsonRecord {
"argv form is classified and quoted as the success path for non-interactive commands",
"stdin script form removes shell-command strings for host and k3s workload scripts",
"pod apply-patch operation injects helper and forwards patch stdin",
"pod exec --stdin streams arbitrary local stdin through workload routes without shell wrapping",
"apply-patch uses one sh helper for host and pod paths and rejects low-context hunks unless --allow-loose is explicit",
"legacy operation-in-route forms are rejected in any k3s route segment with canonical route-plus-operation guidance",
"post-provider k3s shorthand is rejected so location and operation stay separated",
@@ -314,8 +336,9 @@ export function runSshArgvGuidanceContract(): JsonRecord {
"help text documents stdin script passthrough and UNIDESK_SSH_HINT",
"provider triage recommendedCrossChecks keeps ssh D601 argv true",
"remote frontend ssh uses the same structured route parser for host, k3s and pod argv routes",
"remote frontend ssh uses authenticated /ws/ssh streaming instead of host.ssh dispatch task polling",
"Code Queue runner image installs the tran wrapper and runner tran auto-selects remote frontend transport",
"Code Queue runner remote frontend HTTP uses curl by default to avoid Bun response-body native crashes",
"Code Queue runner remote frontend HTTP uses curl by default for non-ssh API calls to avoid Bun response-body native crashes",
],
};
}
+117 -4
View File
@@ -1,6 +1,7 @@
import { readFileSync } from "node:fs";
import { createHmac, randomBytes, timingSafeEqual } from "node:crypto";
import { join } from "node:path";
import type { Server, ServerWebSocket } from "bun";
import { createHourlyJsonlWriter, logRetentionBytesForService } from "../../shared/src/rotating-jsonl";
import { notificationStyles } from "./notification-styles";
@@ -11,6 +12,7 @@ interface RuntimeConfig {
providerIngressPublicUrl: string;
authUsername: string;
authPassword: string;
providerToken: string | null;
sessionSecret: string;
sessionTtlSeconds: number;
logFile: string;
@@ -61,6 +63,13 @@ interface OperationPerformanceSample {
detail: string;
}
interface FrontendWsData {
channel: "ssh-proxy";
upstream: WebSocket | null;
upstreamOpen: boolean;
pendingToUpstream: string[];
}
const sessionCookieName = "unidesk_session";
const config = readConfig();
const logger = createLogger("frontend", config.logFile);
@@ -142,6 +151,25 @@ function requiredEnv(name: string): string {
return value;
}
function optionalEnv(name: string): string | null {
const value = process.env[name]?.trim() ?? "";
return value.length > 0 ? value : null;
}
function optionalFile(path: string): string | null {
try {
const value = readFileSync(path, "utf8").trim();
return value.length > 0 ? value : null;
} catch {
return null;
}
}
function optionalFileEnv(name: string, fallbackPath: string | null = null): string | null {
const path = optionalEnv(name) ?? fallbackPath;
return path === null ? null : optionalFile(path);
}
function readNumberEnv(name: string): number {
const raw = requiredEnv(name);
const parsed = Number(raw);
@@ -169,6 +197,10 @@ function readConfig(): RuntimeConfig {
providerIngressPublicUrl: requiredEnv("PROVIDER_INGRESS_PUBLIC_URL"),
authUsername: requiredEnv("AUTH_USERNAME"),
authPassword: requiredEnv("AUTH_PASSWORD"),
providerToken: optionalEnv("PROVIDER_TOKEN")
?? optionalEnv("UNIDESK_PROVIDER_TOKEN")
?? optionalFileEnv("PROVIDER_TOKEN_FILE", "/run/secrets/unidesk_provider_token")
?? optionalFileEnv("UNIDESK_PROVIDER_TOKEN_FILE", "/tmp/unidesk-provider-token"),
sessionSecret: requiredEnv("SESSION_SECRET"),
sessionTtlSeconds: readNumberEnv("SESSION_TTL_SECONDS"),
logFile: requiredEnv("LOG_FILE"),
@@ -744,6 +776,68 @@ async function proxyApi(req: Request, url: URL): Promise<Response> {
return new Response(await upstream.arrayBuffer(), { status: upstream.status, headers: responseHeaders });
}
function coreSshWebSocketUrl(): string {
const url = new URL("/ws/ssh", config.coreInternalUrl);
url.protocol = url.protocol === "https:" ? "wss:" : "ws:";
url.searchParams.set("token", config.providerToken ?? "");
return url.toString();
}
function webSocketDataText(message: string | Buffer): string {
return typeof message === "string" ? message : Buffer.from(message).toString("utf8");
}
function flushSshProxyPending(ws: ServerWebSocket<FrontendWsData>): void {
const upstream = ws.data.upstream;
if (upstream === null || !ws.data.upstreamOpen || upstream.readyState !== WebSocket.OPEN) return;
while (ws.data.pendingToUpstream.length > 0) upstream.send(ws.data.pendingToUpstream.shift()!);
}
function closeSshProxy(ws: ServerWebSocket<FrontendWsData>, code = 1000, reason = "ssh proxy closed"): void {
if (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING) {
ws.close(code, reason);
}
}
function openSshProxyUpstream(ws: ServerWebSocket<FrontendWsData>): void {
const upstream = new WebSocket(coreSshWebSocketUrl());
ws.data.upstream = upstream;
upstream.addEventListener("open", () => {
ws.data.upstreamOpen = true;
flushSshProxyPending(ws);
});
upstream.addEventListener("message", (event) => {
if (ws.readyState === WebSocket.OPEN) ws.send(event.data as string | Buffer);
});
upstream.addEventListener("close", (event) => {
closeSshProxy(ws, event.code || 1000, event.reason || "ssh upstream closed");
});
upstream.addEventListener("error", () => {
if (ws.readyState === WebSocket.OPEN) {
ws.send(JSON.stringify({ type: "ssh.error", message: "frontend ssh upstream websocket error" }));
}
closeSshProxy(ws, 1011, "ssh upstream websocket error");
});
}
function proxySshWebSocket(req: Request, server: Server<FrontendWsData>): Response | undefined {
if (sessionFromRequest(req) === null) {
return jsonResponse({ ok: false, error: "authentication required" }, 401);
}
if (config.providerToken === null) {
return jsonResponse({ ok: false, error: "frontend ssh proxy is missing provider token" }, 503);
}
const upgraded = server.upgrade(req, {
data: {
channel: "ssh-proxy",
upstream: null,
upstreamOpen: false,
pendingToUpstream: [],
} satisfies FrontendWsData,
});
return upgraded ? undefined : jsonResponse({ ok: false, error: "websocket upgrade failed" }, 400);
}
function vendorPath(pathname: string): string | null {
if (pathname === "/vendor/react.production.min.js") return join(vendorDir, "react", "umd", "react.production.min.js");
if (pathname === "/vendor/react-dom.production.min.js") return join(vendorDir, "react-dom", "umd", "react-dom.production.min.js");
@@ -768,7 +862,7 @@ function isStaticAssetPath(pathname: string): boolean {
return /\/[^/]+\.[a-z0-9]+$/iu.test(pathname);
}
async function handleRequest(req: Request): Promise<Response> {
async function handleRequest(req: Request, server: Server<FrontendWsData>): Promise<Response | undefined> {
const url = new URL(req.url);
logger("debug", "request", { path: url.pathname });
try {
@@ -779,6 +873,7 @@ async function handleRequest(req: Request): Promise<Response> {
if (url.pathname === "/logout" && req.method === "POST") return logout();
if (url.pathname === "/api/session") return sessionResponse(req);
if (url.pathname === "/api/frontend-performance") return frontendPerformanceResponse(req);
if (url.pathname === "/ws/ssh") return proxySshWebSocket(req, server);
if (url.pathname.startsWith("/api/") || url.pathname === "/logs") return proxyApi(req, url);
if (url.pathname === "/docs" || url.pathname.startsWith("/docs/")) return docsResponse(req, url);
if (url.pathname === "/" || url.pathname === "/index.html") {
@@ -798,21 +893,39 @@ async function handleRequest(req: Request): Promise<Response> {
}
}
const server = Bun.serve({
const server = Bun.serve<FrontendWsData>({
port: config.port,
hostname: "0.0.0.0",
idleTimeout: 120,
async fetch(req) {
async fetch(req, server) {
const started = performance.now();
const url = new URL(req.url);
let response: Response | undefined;
try {
response = await handleRequest(req);
response = await handleRequest(req, server);
return response;
} finally {
recordRequestPerformance(req, url.pathname, response, performance.now() - started);
}
},
websocket: {
open(ws) {
if (ws.data.channel === "ssh-proxy") openSshProxyUpstream(ws);
},
message(ws, message) {
if (ws.data.channel !== "ssh-proxy") return;
ws.data.pendingToUpstream.push(webSocketDataText(message));
flushSshProxyPending(ws);
},
close(ws) {
if (ws.data.channel !== "ssh-proxy") return;
try {
ws.data.upstream?.close();
} catch {
// Closing an already-closed upstream socket is harmless.
}
},
},
});
logger("info", "server_listening", {
@@ -233,6 +233,11 @@ spec:
secretKeyRef:
name: unidesk-dev-runtime-secrets
key: AUTH_PASSWORD
- name: PROVIDER_TOKEN
valueFrom:
secretKeyRef:
name: unidesk-dev-runtime-secrets
key: PROVIDER_TOKEN
- name: SESSION_SECRET
valueFrom:
secretKeyRef: