fix: stream runner tran ssh output
This commit is contained in:
@@ -26,6 +26,15 @@ UniDesk 是一个以主 server 为统一入口的分布式工作平台;本文
|
||||
- D601 上 HWLAB 验证必须通过 UniDesk 透传在 D601 执行,禁止回到 master server 跑 HWLAB check、Playwright/browser smoke、镜像构建或其他重型验证。k3s 操作使用 route 语法 `D601:k3s ...`,默认 D601 原生 k3s kubeconfig。
|
||||
- 长期细节见 `docs/reference/hwlab.md`。
|
||||
|
||||
## Critical D601 UniDesk Workspace Rule
|
||||
|
||||
- P0: `D601:UniDesk` 的固定开发 workspace 是 D601 节点上的 `/home/ubuntu/workspace/unidesk-dev`,固定使用 `master` 分支和 `origin git@github.com:pikasTech/unidesk.git`;所有需要在 D601 上改 UniDesk 代码、跑轻量合同测试、做分布式敏捷实验补丁收敛或验证 Code Queue runner/tran 的工作,都必须优先使用这个目录。
|
||||
- P0: 每次开始 `D601:UniDesk` 分布式开发、切换任务、恢复中断或上下文压缩后,必须重新读取目标 workspace 的 `/home/ubuntu/workspace/unidesk-dev/AGENTS.md`,并以该文件和其引用的 UniDesk repo 内规则为当前任务约束;禁止只凭压缩摘要或主 server 记忆继续改代码。
|
||||
- `/home/ubuntu/cq-deploy`、`/root/unidesk`、`/app`、Code Queue pod 内 `/root/unidesk` 和 `/tmp/unidesk-*` 都是运行副本、部署副本或一次性实验面,不是 `D601:UniDesk` 日常开发 source truth;运行面热修可以直接作用于 pod/容器,但必须随后把持久化修复落回 fixed workspace 和 Git remote。
|
||||
- 每次开始 `D601:UniDesk` 工作前必须通过 UniDesk SSH 桥执行 `cd /home/ubuntu/workspace/unidesk-dev && git status --short --branch && git remote -v`;若路径、分支、remote 或权限不符合预期,必须先修正固定 workspace,再继续开发、测试或发布。
|
||||
- D601 上 UniDesk 验证必须通过 UniDesk 透传在 D601 执行,禁止回到 master server 跑仓库级 check、Playwright/browser smoke、镜像构建或其他重型验证。Code Queue/tran 运行面验证应优先直达 D601 原生 k3s pod,k3s 操作使用 route 语法 `D601:k3s ...`。
|
||||
- 长期细节见 `docs/reference/dev-environment.md`。
|
||||
|
||||
## Critical D601 Kubernetes Control-Plane Rule
|
||||
|
||||
- P0: D601 上的 Kubernetes 运行面只能以自部署原生 k3s 为准;Docker Desktop Kubernetes 已经停用并清理数据,任何人不得重新启用或把它作为 UniDesk/HWLAB 部署、CI/CD、诊断或验收目标。跟踪 issue: [pikasTech/unidesk#138](https://github.com/pikasTech/unidesk/issues/138),热修复背景见 [pikasTech/unidesk#118](https://github.com/pikasTech/unidesk/issues/118)。
|
||||
|
||||
@@ -368,6 +368,7 @@ services:
|
||||
PROVIDER_INGRESS_PUBLIC_URL: "ws://${UNIDESK_PUBLIC_HOST}:${UNIDESK_PROVIDER_INGRESS_PORT}/ws/provider"
|
||||
AUTH_USERNAME: "${UNIDESK_AUTH_USERNAME}"
|
||||
AUTH_PASSWORD: "${UNIDESK_AUTH_PASSWORD}"
|
||||
PROVIDER_TOKEN: "${UNIDESK_PROVIDER_TOKEN}"
|
||||
SESSION_SECRET: "${UNIDESK_SESSION_SECRET}"
|
||||
SESSION_TTL_SECONDS: "${UNIDESK_SESSION_TTL_SECONDS}"
|
||||
UNIDESK_DEPLOY_REF: "${UNIDESK_FRONTEND_DEPLOY_REF:-deploy.json#environments.prod.services.frontend}"
|
||||
|
||||
@@ -24,7 +24,7 @@ CLI 可以从 `master` 快速演进,但必须兼容 `deploy.json` 固定的 CI
|
||||
- `ssh <providerId> py [script-args...] < script.py` 把本地 stdin 落到远端临时 `.py` 文件后再以 `python3 -u` 执行并自动清理,避免再手写 `'python3 -'`、heredoc 或多层引号;`script-args` 会按 argv 安全透传给远端脚本。
|
||||
- `ssh <providerId> skills [--scope all|wsl|windows] [--limit N]` 发现目标节点上的 WSL/Linux skill 根目录;当 provider 是 WSL 时同一次调用还会扫描 Windows 用户目录下的 `.agents/skills` 与 `.codex/skills`。
|
||||
- `ssh <providerId>:k3s[:namespace:workload[:container]] <operation> ...` 是原生 k3s 结构化 route 入口,route 只定位控制面或 workload,`kubectl`、`logs`、`exec`、`script`、`apply-patch` 和普通容器命令作为 operation 放在 route 之后;CLI 固定注入 `KUBECONFIG=/etc/rancher/k3s/k3s.yaml` 并把 kubectl、workload exec 和 logs 参数组装成 argv,避免在 Host SSH、bash、kubectl exec 和容器 shell 之间反复手写多层引号;D601 与 G14 都有 provider-specific guard,分别校验 `d601` 和 G14 k3s 节点身份。
|
||||
- Code Queue runner 镜像必须在 PATH 上提供 `/usr/local/bin/tran`。runner 内的 `tran` 检测到 `CODE_QUEUE_*` 或 `KUBERNETES_SERVICE_HOST` 后,默认执行 `bun /root/unidesk/scripts/cli.ts --main-server-ip <public-frontend> ssh ...`,其中 `<public-frontend>` 优先来自 `UNIDESK_MAIN_SERVER_IP` / `UNIDESK_MAIN_SERVER_HOST` / `CODE_QUEUE_DEV_CONTAINER_MASTER_HOST`。runner remote frontend HTTP 客户端默认使用 `curl` 后端,降低 Bun 在部分 runner 内读取 HTTP response body 时触发 native crash 的风险;显式 `UNIDESK_REMOTE_HTTP_CLIENT=fetch` 可用于诊断。runner 内跨 D601/G14 的分布式访问应优先使用无 stdin 的 `tran D601 argv ...`、`tran G14 argv ...`、`tran D601:k3s kubectl ...` 和 `tran D601:k3s:<namespace>:<workload> argv ...`;`script`、`apply-patch`、`py` 等 stdin helper 需要在主 server/host 侧 `tran` 或显式 `--main-server-transport ssh` 中执行,直到 frontend dispatch 支持 stdin 流。
|
||||
- Code Queue runner 镜像必须在 PATH 上提供 `/usr/local/bin/tran`。runner 内的 `tran` 检测到 `CODE_QUEUE_*` 或 `KUBERNETES_SERVICE_HOST` 后,默认执行 `bun /root/unidesk/scripts/cli.ts --main-server-ip <public-frontend> ssh ...`,其中 `<public-frontend>` 优先来自 `UNIDESK_MAIN_SERVER_IP` / `UNIDESK_MAIN_SERVER_HOST` / `CODE_QUEUE_DEV_CONTAINER_MASTER_HOST`。runner remote frontend HTTP 客户端默认使用 `curl` 后端,降低 Bun 在部分 runner 内读取非 SSH HTTP response body 时触发 native crash 的风险;显式 `UNIDESK_REMOTE_HTTP_CLIENT=fetch` 可用于诊断。runner 内跨 D601/G14 的分布式访问应优先使用结构化 route/operation,例如 `tran D601 argv ...`、`tran G14 argv ...`、`tran D601:k3s kubectl ...` 和 `tran D601:k3s:<namespace>:<workload> argv ...`;`script`、`apply-patch`、`py` 等 stdin helper 通过 frontend `/ws/ssh` 流式通道执行,stdout/stderr 也必须完整直通,不得退回 `/api/dispatch` task JSON。
|
||||
- `microservice list/status/health/diagnostics/tunnel-self-test/proxy` 通过 backend-core 内网 API 管理挂载在计算节点 Docker 或 k3s 控制面中的用户服务(底层命令名仍为 microservice);`health`、`status` 和 `diagnostics` 默认返回 compact summary、body 字节数和 `--full|--raw` 展开命令,只有小 body 或无法抽取 summary 时才带有界 body preview,避免 Code Queue/k3s 诊断一次性输出爆炸;`tunnel-self-test` 和 `proxy` 会走真实 backend-core -> provider-gateway 或 k3sctl-adapter -> 节点服务链路。`microservice health code-queue` 使用 commander-safe 专用摘要,必须保留 ok/status、service id、running count、queue count、heartbeat freshness/risk、split-brain/live/degraded 解释和 raw drill-down 命令;需要完整健康 JSON 时显式加 `--raw` 或 `--full`,等价深挖路径是 `microservice proxy code-queue /health --raw --full`。`proxy` 支持受控 JSON 请求体并对超大响应 body 默认输出有界预览,规则见 `docs/reference/microservices.md`。
|
||||
- `decision upload/list/show/health` 通过 backend-core 用户服务代理访问 D601 k3s Decision Center,用于上传会议记录/决议 Markdown、列出权威记录、查看详情和健康检查;`decision list` 默认只返回摘要并省略完整 Markdown body,需要排查大正文时显式加 `--include-body`。正式文书字段通过 records 模型一等字段返回和查询:`--doc-no DC-...`、`--doc-type DCSN|GOAL|PLAN|RPRT|ACTN|ISSU|RETR|RQST|RESP|MINS`、`--doc-priority P0|P1|P2|P3`、`--year YYYY`、`--signer`、`--issued-at`、`--effective-scope`、`--supersedes`、`--superseded-by`;`show` 和 `requirement update` 可使用 `id` 或 `docNo`。`decision requirement list/create/upsert/update/show` 在同一 records 模型上管理 `goal|decision|blocker|debt|experiment` 需求记录,`docNo` 唯一,未传 `--doc-no` 但提供 `--doc-type/--doc-priority/--year` 时由服务分配下一个序号。它们不得直连 D601 Service、NodePort 或 provider-gateway 业务 HTTP。
|
||||
- `decision diary import <markdown-file>` 将带 `# YYYY年M月D日`、`# YYYY-MM-DD` 或 `# YYYY/M/D` 标题的工作日志拆成每天一篇 Markdown 日记,按 `YYYY-MM/YYYY-MM-DD.md` 虚拟路径写入 Decision Center PostgreSQL;`decision diary list/history` 默认只返回摘要,需要完整 Markdown 时显式加 `--include-body`;`decision diary show <YYYY-MM-DD|id> [--source-file path]` 查看单日正文,`--source-file` 用于同一天存在多个导入来源时精确选择;`decision diary edit|upsert <YYYY-MM-DD|id> --body-file <path> [--title text] [--source-file path] [--tag tag]` 通过 `PUT /api/diary/entries/:idOrDate` 创建当天或历史条目并编辑既有条目。
|
||||
@@ -210,6 +210,7 @@ bun scripts/cli.ts ssh D601:k3s:hwlab-dev:hwlab-cloud-api logs --tail 80
|
||||
bun scripts/cli.ts ssh D601:k3s:hwlab-dev:hwlab-cloud-api node -e 'console.log(process.version)'
|
||||
bun scripts/cli.ts ssh D601:k3s:hwlab-dev:hwlab-cloud-api/app pwd
|
||||
printf 'printf "pod=%s\n" "$HOSTNAME"\n' | bun scripts/cli.ts ssh D601:k3s:hwlab-dev:hwlab-cloud-api script
|
||||
tar -C /tmp/patched-files -cf - . | bun scripts/cli.ts ssh D601:k3s:unidesk:code-queue/root/unidesk exec --stdin -- tar -xf - -C /root/unidesk
|
||||
bun scripts/cli.ts ssh D601:k3s:hwlab-dev:hwlab-cloud-api apply-patch <<'PATCH'
|
||||
*** Begin Patch
|
||||
*** Update File: /tmp/example.txt
|
||||
@@ -220,9 +221,9 @@ bun scripts/cli.ts ssh D601:k3s:hwlab-dev:hwlab-cloud-api apply-patch <<'PATCH'
|
||||
PATCH
|
||||
```
|
||||
|
||||
`logs` operation 默认是有界读取;`--follow`/`-f` 会被拒绝,防止 CLI 长时间占用维护桥。目标 route 后面直接跟普通命令时,CLI 会把 argv 放到 `kubectl exec --` 后;显式 `exec` operation 可用于让命令边界更清晰。如果命令本身需要复杂 shell 语法,优先改用 `script` operation,把脚本走 stdin,而不是把 `kubectl exec ... -- sh -c ...` 放进远端命令字符串。pod 内 `apply-patch` operation 使用同一个 sh helper,不要求目标容器自带 `python3`、`node` 或仓库里的工具脚本;它面向文本热修复,不用于大文件或二进制改写。
|
||||
`logs` operation 默认是有界读取;`--follow`/`-f` 会被拒绝,防止 CLI 长时间占用维护桥。目标 route 后面直接跟普通命令时,CLI 会把 argv 放到 `kubectl exec --` 后;显式 `exec` operation 可用于让命令边界更清晰。`exec --stdin -- <command> ...` 是 workload route 的通用 stdin 流入口,适合把 tar、patch 以外的任意字节流直接送进容器命令;operation 选项必须放在 `--` 前,容器命令从 `--` 后开始。需要 shell 语法时优先改用 `script` operation,把脚本走 stdin,而不是把 `kubectl exec ... -- sh -c ...` 放进远端命令字符串。pod 内 `apply-patch` operation 使用同一个 sh helper,不要求目标容器自带 `python3`、`node` 或仓库里的工具脚本;它面向文本热修复,不用于大文件或二进制改写。
|
||||
|
||||
`ssh <providerId> argv <command> [args...]` 是通用 argv 安全拼接入口;`exec` 是同义入口。它是非交互远端单进程命令的默认成功路径,不需要 shell 管道时直接传命令和参数,例如 `bun scripts/cli.ts ssh D601 argv true`。需要管道、重定向、变量展开或多条命令时,优先改用 `ssh <providerId> script <<'SCRIPT'`。`find`、`glob` 和 `apply-patch` 有专用入口;`rg`、`grep`、`sed`、`nl`、`stat`、`du`、`ls`、`cat`、`head`、`tail`、`wc` 和 `pwd` 可以直接作为 `ssh` 子命令使用,CLI 会对每个 argv token 做 shell quoting。旧的自由 ssh-like 远端命令入口只保留为近似原生 ssh 的人工兼容路径。
|
||||
`ssh <providerId> argv <command> [args...]` 是通用 argv 安全拼接入口;`exec` 是同义入口。它是非交互远端单进程命令的默认成功路径,不需要 shell 管道时直接传命令和参数,例如 `bun scripts/cli.ts ssh D601 argv true`。需要管道、重定向、变量展开或多条命令时,优先改用 `ssh <providerId> script <<'SCRIPT'`。`find`、`glob` 和 `apply-patch` 有专用入口;`git`、`rg`、`grep`、`sed`、`nl`、`stat`、`du`、`ls`、`cat`、`head`、`tail`、`wc` 和 `pwd` 可以直接作为 `ssh` 子命令使用,CLI 会对每个 argv token 做 shell quoting。旧的自由 ssh-like 远端命令入口只保留为近似原生 ssh 的人工兼容路径。
|
||||
|
||||
通过 `ssh <providerId>` 执行多行脚本时,优先使用结构化 helper,例如 `bun scripts/cli.ts ssh G14 py < script.py`、`bun scripts/cli.ts ssh G14 script <<'SCRIPT'` 或 `bun scripts/cli.ts ssh G14:k3s script <<'SCRIPT'`。不要在远端命令字符串里再嵌套 heredoc、复杂引号或 `ssh 'python3 - <<EOF ...'` 形态;多层 shell 解析容易把 stdin 绑定到错误进程,结果会打开远端交互解释器并留下悬挂的 broker/SSH 会话。长脚本需要复用时,优先提交到 repo 或通过 stdin 传输到目标节点执行。
|
||||
|
||||
@@ -230,7 +231,7 @@ PATCH
|
||||
|
||||
`--main-server-ip` 是一个全局前缀,必须放在需要透传的命令同一次调用中,例如 `bun scripts/cli.ts --main-server-ip 74.48.78.17 debug health`。默认传输是公网 frontend:本地 CLI 读取本仓库 `config.json` 中的 frontend 登录账号密码,登录 `http://<ip>:<frontendPort>/` 获取 HttpOnly session cookie,然后通过 frontend 的 `/api/*` 同源代理访问 backend-core 内网 API;因此计算节点只需要能访问公网 frontend,不需要主 server SSH key,也不需要打开 backend-core REST API 或 PostgreSQL 端口。
|
||||
|
||||
默认 frontend 传输支持 `debug health`、`debug dispatch`、`debug task`、`artifact-registry status|health`、`ci publish-user-service --dry-run`、`microservice list/status/health/diagnostics/tunnel-self-test/proxy`、`decision upload/list/show/health`、`decision requirement list/upsert`、`decision diary import/list/history/months/show/edit/upsert`、`codex task <taskId>`、`codex tasks`、`codex unread`、`codex queues`、`codex output <taskId>`、`codex judge <taskId> --attempt N` 和 `ssh <PROVIDER_ID> <remote-command>`。`microservice status/health/diagnostics` 经 frontend 远程传输时也复用本地 CLI 的默认 compact summary,`microservice health code-queue` 只有显式 `--raw` 或 `--full` 才返回完整健康 body。运行中纠偏 `codex steer` 属于 active run write control,应在主 server 本机 CLI 或显式 SSH 传输上执行,避免公网 frontend 透传限制 stdin/body 审计语义。其中 `ssh` 的 remote frontend 传输使用 `host.ssh` dispatch 执行有界远端命令,非交互单进程命令优先 `ssh D601 argv true`;需要 stdin script、`py` 或 `apply-patch` 这类 stdin-backed helper 时必须在主 server 本机运行,或显式切换到 `--main-server-transport ssh`。交互式登录 shell 仍应在主 server 本机 CLI 使用,或显式切换到旧 SSH 传输后在主 server 上执行。当 backend-core、database、provider-dispatch 或 provider-host-ssh 缺失时,这些 read-only 预检必须返回结构化 `runnerDisposition=infra-blocked` 和缺失通道列表,而不是裸 `No such container`。若确实需要旧行为,可使用 `--main-server-key <key>` 或 `--main-server-transport ssh`,这时 CLI 会通过 SSH 登录主 server 的 `--main-server-root` 目录执行同一个 `bun scripts/cli.ts <command>`。
|
||||
默认 frontend 传输支持 `debug health`、`debug dispatch`、`debug task`、`artifact-registry status|health`、`ci publish-user-service --dry-run`、`microservice list/status/health/diagnostics/tunnel-self-test/proxy`、`decision upload/list/show/health`、`decision requirement list/upsert`、`decision diary import/list/history/months/show/edit/upsert`、`codex task <taskId>`、`codex tasks`、`codex unread`、`codex queues`、`codex output <taskId>`、`codex judge <taskId> --attempt N` 和 `ssh <PROVIDER_ID> <remote-command>`。`microservice status/health/diagnostics` 经 frontend 远程传输时也复用本地 CLI 的默认 compact summary,`microservice health code-queue` 只有显式 `--raw` 或 `--full` 才返回完整健康 body。运行中纠偏 `codex steer` 属于 active run write control,应在主 server 本机 CLI 或显式 SSH 传输上执行,避免公网 frontend 透传限制 stdin/body 审计语义。其中 `ssh` 的 remote frontend 传输使用 authenticated frontend `/ws/ssh` WebSocket 代理接入 backend-core SSH bridge,stdout/stderr 按字节流直通到调用端,不经过 `/api/dispatch`、`/api/tasks` 或 task JSON compact;frontend 运行时必须通过 `PROVIDER_TOKEN`/`UNIDESK_PROVIDER_TOKEN` 或 `PROVIDER_TOKEN_FILE`/`UNIDESK_PROVIDER_TOKEN_FILE` 读取 provider token,并且不能把 token 下发给 runner。因此 D601 Code Queue runner 内的 `tran G14 ...` 应与主 server 本机 `tran G14 ...` 在输出完整性上保持同一语义。非交互单进程命令优先 `ssh D601 argv true`;stdin script、`py` 和 `apply-patch` 这类 stdin-backed helper 也走同一条 `/ws/ssh` 流式通道。交互式登录 shell 仍应在主 server 本机 CLI 使用,或显式切换到旧 SSH 传输后在主 server 上执行。当 backend-core、database、provider-dispatch 或 provider-host-ssh 缺失时,这些 read-only 预检必须返回结构化 `runnerDisposition=infra-blocked` 和缺失通道列表,而不是裸 `No such container`。若确实需要旧行为,可使用 `--main-server-key <key>` 或 `--main-server-transport ssh`,这时 CLI 会通过 SSH 登录主 server 的 `--main-server-root` 目录执行同一个 `bun scripts/cli.ts <command>`。
|
||||
|
||||
计算节点可以用该入口测试自身的远程升级闭环,而不需要在计算节点公开 core REST API 或 database。标准顺序是:先运行 `bun scripts/cli.ts --main-server-ip 74.48.78.17 debug health` 确认主 server 看到当前 Provider 在线,且该 Provider labels 中 `unideskCapabilities` 包含 `host.ssh`、`hostSshConfigured=true`、`hostSshKeyPresent=true`;再运行 `bun scripts/cli.ts --main-server-ip 74.48.78.17 debug dispatch <PROVIDER_ID> provider.upgrade --mode schedule --wait-ms 15000` 触发真实 `provider.upgrade`;随后再次运行 `debug health` 确认节点重新上线;最后运行 `bun scripts/cli.ts --main-server-ip 74.48.78.17 debug dispatch <PROVIDER_ID> host.ssh --wait-ms 15000` 和 `bun scripts/cli.ts --main-server-ip 74.48.78.17 ssh <PROVIDER_ID> hostname` 验证 SSH 透传能力。provider-gateway 新部署或升级后没有完成这组 remote CLI 自测,不能视为交付完成。
|
||||
|
||||
|
||||
@@ -415,7 +415,7 @@ Code Queue task 不是只要 push 代码就算完成。
|
||||
|
||||
如果缺陷只存在于 Code Queue 执行环境,且服务可以在 dev 中安全热修而不触碰 prod,应先做最小临时 live remedy。然后把修复持久化到相关 Dockerfile、容器镜像或凭证传播路径,并在 dev 验证持久化修复后再关闭问题。
|
||||
|
||||
Code Queue runner 的分布式访问能力必须通过镜像内 `/usr/local/bin/tran` 固化,而不是依赖临时拷贝脚本或手工记忆命令前缀。runner 内 `tran` 走公网 frontend 控制面和 `/api/dispatch`,不要求 runner pod 持有 provider token,也不要求 runner pod 能解析 backend-core 集群内 DNS;因此当 runner-local DNS/Secret 缺失而 frontend dispatch 仍可用时,应判为可恢复的 runner-local observation gap。runner 环境中的 remote frontend HTTP 读写默认使用 `curl` 后端,以降低部分 runner 上 Bun HTTP body 读取 native crash 的风险;这不是绕过控制面,route parser 和 dispatch payload 仍由 UniDesk CLI 生成。当前必需验收目标是 D601 的真实 Code Queue pod:至少执行 `tran D601 argv ...`、`tran G14 argv ...` 以及一个 `tran <provider>:k3s ...` 只读命令,证明 D601 runner 能跨 provider host 与 k3s route 透传。G14 runner 可作为后续兼容性观察,不作为该合同当前阻塞条件。
|
||||
Code Queue runner 的分布式访问能力必须通过镜像内 `/usr/local/bin/tran` 固化,而不是依赖临时拷贝脚本或手工记忆命令前缀。runner 内 `tran` 走公网 frontend 控制面;其中 `ssh`/provider route 通过 authenticated frontend `/ws/ssh` WebSocket 代理接入 backend-core SSH bridge,stdout/stderr 必须按字节流直通到 runner,不得再通过 `/api/dispatch`、`/api/tasks` 或 task JSON compact 返回。runner 不要求持有 provider token,也不要求能解析 backend-core 集群内 DNS;frontend 是唯一对 runner 暴露的控制面,provider token 只存在于 frontend/backend-core 运行侧。runner 环境中的非 SSH remote frontend HTTP 读写默认使用 `curl` 后端,以降低部分 runner 上 Bun HTTP body 读取 native crash 的风险;这不是绕过控制面,route parser、WebSocket open payload 和 API request 仍由 UniDesk CLI 生成。当前必需验收目标是 D601 的真实 Code Queue pod:至少执行 `tran D601 argv ...`、`tran G14 argv ...` 以及一个 `tran <provider>:k3s ...` 只读命令,并用长 stdout 的长度/哈希证明没有 `...<truncated:N>` 标记,证明 D601 runner 能跨 provider host 与 k3s route 透传。G14 runner 可作为后续兼容性观察,不作为该合同当前阻塞条件。
|
||||
|
||||
如果业务任务发现缺少工具、Secret/env、DNS、egress 或凭证路径,指挥官应把它拆成独立 infra task,并标记为 `runnerDisposition=infra-blocked` 或等价基础设施阻塞,而不是埋在业务任务 prompt 中。业务 runner 不应自行摸索 live Secret、打印 env/token、复制凭证命令、扩大网络出口或通过反复 rollout 猜测问题;它只能提交脱敏证据、说明缺失能力和等待指挥官或 infra lane 处理。业务任务在 bridge 存在时应继续推进。
|
||||
|
||||
|
||||
@@ -12,6 +12,21 @@ The dev environment lets users experience the next UniDesk version without inter
|
||||
- Dev backend-core and frontend rollout use pushed Git commits from `origin/master:deploy.json#environments.dev`, not dirty local worktrees.
|
||||
- Rust backend-core check/build must run on D601 CI through `ci publish-backend-core`; dev CD consumes the published image and must not compile Rust.
|
||||
|
||||
## D601 UniDesk Workspace
|
||||
|
||||
`D601:UniDesk` 的固定开发 workspace 是 D601 节点上的 `/home/ubuntu/workspace/unidesk-dev`,固定使用 `master` 分支和 `origin git@github.com:pikasTech/unidesk.git`。所有需要在 D601 上改 UniDesk 代码、跑轻量合同测试、验证 `tran`/Code Queue runner、收敛分布式敏捷实验补丁的工作,都应先进入这个目录。
|
||||
|
||||
每次开始 D601 UniDesk 分布式开发、切换任务、恢复中断或上下文压缩后,先通过 UniDesk SSH 维护桥执行:
|
||||
|
||||
```bash
|
||||
tran D601:/home/ubuntu/workspace/unidesk-dev git status --short --branch
|
||||
tran D601:/home/ubuntu/workspace/unidesk-dev git remote -v
|
||||
```
|
||||
|
||||
若路径、分支或 remote 不符合预期,先修正 fixed workspace,再继续。`/home/ubuntu/cq-deploy`、Code Queue pod 内 `/root/unidesk`/`/app`、D601 上的 `/root/unidesk` 和 `/tmp/unidesk-*` 只作为部署副本、运行副本或临时实验面;运行面热修可以直接作用在 pod/容器,但必须随后把持久化修复提交到 Git remote,并在 fixed workspace 中复验。
|
||||
|
||||
Master server 不作为 UniDesk 重型验证机。仓库级 check、Playwright/browser smoke、镜像构建、Rust/Go 编译和 Code Queue runner 实测必须放到 D601、CI runner 或其他获批执行面;master server 只做轻量源码编辑、Git 操作、状态观察和受控调度。
|
||||
|
||||
## Public Dev Frontend Port
|
||||
|
||||
The main server owns one extra public entrypoint for dev UI:
|
||||
|
||||
@@ -42,7 +42,7 @@ UniDesk 用户服务是挂载到 UniDesk 核心服务上的、面向用户使用
|
||||
|
||||
业务仓库由业务系统自己维护,包括源码、Dockerfile、docker-compose、配置模板和业务测试。UniDesk 只引用业务仓库 URL、commit id、Dockerfile/docker-compose 路径和运行容器名;不得把业务全量代码复制到 `src/components/microservices/` 形成双维护。`src/components/microservices/` 只能放通用示例或 UniDesk 自有示例,不作为业务仓库镜像。
|
||||
|
||||
Code Queue runner 也是分布式开发执行面。runner 镜像必须内置 `tran`,让 runner 在执行任务时能通过公网 frontend 控制面访问 D601、G14、host workspace、k3s 控制面和目标 pod。runner 内应优先使用 `tran <provider> argv ...`、`tran <provider>:k3s kubectl ...`、`tran <provider>:k3s:<namespace>:<workload> argv ...` 这类无 stdin 的结构化命令;需要 stdin 的 `script`、`apply-patch`、`py` 操作仍由主 server/host 侧 `tran` 或显式 SSH transport 执行。这个边界避免把 provider token、backend-core 内网 DNS 或长命令多层引号作为 runner 可用性的前提。
|
||||
Code Queue runner 也是分布式开发执行面。runner 镜像必须内置 `tran`,让 runner 在执行任务时能通过公网 frontend 控制面访问 D601、G14、host workspace、k3s 控制面和目标 pod。runner 内应优先使用 `tran <provider> argv ...`、`tran <provider>:k3s kubectl ...`、`tran <provider>:k3s:<namespace>:<workload> argv ...` 这类结构化命令;需要 stdin 的 `script`、`apply-patch`、`py` 操作同样通过 frontend `/ws/ssh` 流式通道执行,不应退回 `/api/dispatch` task polling。这个边界避免把 provider token、backend-core 内网 DNS 或长命令多层引号作为 runner 可用性的前提,也避免大 stdout 被 task JSON compact 截断。
|
||||
|
||||
## Main Server User Services
|
||||
|
||||
@@ -221,7 +221,7 @@ D601 上必须显式使用原生 k3s kubeconfig:`KUBECONFIG=/etc/rancher/k3s/k
|
||||
- Skill 注入边界:DEV Code Queue scheduler/read/write Pod 必须把宿主 `/home/ubuntu/.agents/skills` 只读挂载到容器 `/root/.agents/skills`,并设置 `UNIDESK_SKILLS_PATH=/root/.agents/skills`,让执行任务能读取 `cli-spec` 等技能;只允许挂载 skill 目录本身,不得把宿主 `~/.agents`、`~/.codex`、token、auth JSON 或其他隐私配置整体暴露给任务容器。`/health` 和 `/api/dev-ready` 必须暴露非敏感 `skills` 状态:路径、exists、available、readonly、skillCount、`cliSpecAvailable` 和修复建议;CLI `codex dev-ready` 可读取该摘要。当前交付只要求 DEV manifest 和旧 direct Compose 诊断路径具备只读 skill 注入;PROD Code Queue 发布前必须单独审查隔离级别,不能把 DEV 桥接模式直接推广为生产默认。
|
||||
- Develop-ready 镜像:Code Queue 镜像必须在启动前预装 UniDesk/Pipeline 调试所需工具,至少包含 `codex`、`bun`、`node`、`npm`/`npx`、`git`、`rg`、`curl`、`python3`/`pip3`、`docker`、`docker compose`、`docker-compose`、`jq`、`ssh`、`rsync`、`make`、`gcc`/`g++`、`iptables`、`tar`、`gzip` 和 `unzip`;不得依赖 Codex 任务运行时再 `apt-get install` 这些基础环境。
|
||||
- 远程开发容器与任务执行 Provider:Code Queue 必须能通过 live API 拉起 D601 等计算节点上的开发容器,入口为 `POST /api/dev-containers/<providerId>/start`,默认 Provider 为 `D601`。该流程由 Code Queue 调用 UniDesk `ssh <providerId>` 维护桥在目标节点创建 `unidesk-codex-dev-<providerId>`,并在 Code Queue 所在节点与开发容器之间建立 `ssh -w` TUN 点对点链路;服务所在节点负责对开发容器的 TUN 源地址做 NAT/MASQUERADE,开发容器默认路由和 DNS 改走该 TUN,从而让 `ping google.com`、DNS、HTTP(S) 等出网都经主 server 全局代理,而不是依赖 D601 本地网络。提交 Code Queue 任务时必须支持选择执行 Provider:`D601` 在 D601 原生 k3s 的 active Code Queue scheduler/runner Pod 中本机执行,默认工作目录为 `/workspace`,并且 `/workspace` 必须映射 D601 WSL host 的 `/home/ubuntu`;同一个 hostPath 还必须挂载到容器内 `/home/ubuntu`,让 WSL home 里的绝对 symlink(例如 `/workspace/cq-deploy -> /home/ubuntu/unidesk-code-queue-deploy`)在任务中可解析,不能只看到 symlink 名而无法进入目标目录。`/root/unidesk` 与 `/app` 必须单独映射 `/home/ubuntu/cq-deploy` 作为服务部署仓库;其他 Provider 在对应 `unidesk-codex-dev-<providerId>` 容器中执行,默认工作目录为 `/home/ubuntu`,可按任务覆盖 `cwd`。远程任务启动前必须自动复用或拉起该 Provider 的开发容器、同步 Codex 配置和允许的运行时 provider 环境变量,并通过同一 master TUN/NAT 链路出网;目标 host 存在 `/mnt` 时,开发容器必须挂载 host `/mnt:/mnt`,确保 D601 这类 WSL 节点的 Windows 盘符路径如 `/mnt/f/Work/ConStart` 在任务容器内可见,避免 agent 因缺少真实工作区而搜索到无关项目。TUN 建立必须幂等处理 stale 状态:启动前清理旧 `tun<id>`、默认路由、旧 tunnel SSH 进程和旧 OUTPUT 跳转,缺失旧设备不能导致失败,冷启动运行时准备要有有界但足够的 timeout。TUN 建立后必须创建 `UD-CQ-EGRESS-<provider>` OUTPUT 链,规则只允许 loopback、既有连接、`tun<id>` 出口以及到 master server 的 SSH tunnel 控制连接,随后 reject 其他 IPv4/IPv6 出站包;这条网络层封口是开发/执行容器的权威外网边界,不能用 `HTTP_PROXY`/`NO_PROXY` 环境变量替代,容器镜像也必须使用已解析出的唯一 `unidesk-code-queue:<provider>` 或显式 `image`,缺失时直接失败,禁止 provider-gateway image、`latest` 或其他隐式镜像 fallback。验收必须保留三类日志:容器建隧道后 `ping google.com` 成功、强制指定原 Docker 网卡直连外网被 `sealed_direct_ping=blocked_expected` 拦截、服务所在节点上对应 `UNIDESK-CODEX-DEV-<providerId>` NAT 链或 `tun<id>` 计数在 ping 前后增长;涉及 WSL 工作区任务时还必须在开发容器内验证目标 `/mnt/...` 路径可读。`GET /api/dev-containers/<providerId>/status` 必须展示默认路由、`route_8_8_8_8`、`egressFirewallChain` 和 OUTPUT 链跳转。开发容器代理密钥只生成到 `.state/code-queue/dev-proxy/` 与目标节点用户目录,不得提交到仓库。
|
||||
- 远程维护桥调用:Code Queue 已迁移到 D601 后,Code Queue 后端 Pod 内没有主 server 的 `unidesk-backend-core` 容器,不能再把 `bun scripts/cli.ts ssh ...` 实现为本地 `docker exec unidesk-backend-core`。Code Queue 后端发起的 provider 维护命令必须通过主 server frontend `/api/dispatch` 进入 backend-core,再由目标 provider-gateway 执行 `host.ssh`;需要传递脚本时必须使用 base64 临时文件,超过 Host SSH 单命令长度上限时分块上传到目标 `/tmp` 后再执行,避免恢复到本地 Docker broker、交互 stdin 或手工 shell fallback。
|
||||
- 远程维护桥调用:Code Queue 已迁移到 D601 后,Code Queue 后端 Pod 内没有主 server 的 `unidesk-backend-core` 容器,不能再把 `bun scripts/cli.ts ssh ...` 实现为本地 `docker exec unidesk-backend-core`。Code Queue runner 发起的 provider 维护命令必须通过主 server frontend authenticated `/ws/ssh` 流式代理进入 backend-core SSH bridge,再由目标 provider-gateway 执行 Host SSH/WSL SSH;stdout/stderr 直接流回 runner,不能经过 `/api/dispatch` task polling 或 JSON compact。需要传递脚本、`py` 或 `apply-patch` 时也使用同一条 stdin 流式通道,避免恢复到本地 Docker broker、手工 base64 分块上传、交互 shell fallback 或多层引号。
|
||||
- 远程 Provider 准备不得阻塞控制面:Code Queue 在请求处理、队列调度、远程开发容器准备、Host SSH/WSL SSH 透传、Codex/OpenCode 启动和日志导出路径中,禁止使用会长时间占用 Bun event loop 的同步子进程调用,例如针对远程 Provider 的 `spawnSync`、`execSync` 或 `execFileSync`。远程命令必须通过异步子进程执行,带显式 timeout、超时 kill、stdout/stderr 上限和任务 output 进度记录;远程准备失败只能让对应任务进入失败或 retry,不能让 `POST /api/tasks`、SSE `/api/events`、`/health`、overview 或 frontend/core 用户服务代理等控制面请求等待远程 SSH 结束。凡是改动 D601/远程 Provider 准备、`api/dev-containers/*`、任务入队启动或 `runCodeQueueSsh` 等路径,验收必须在一个远程 SSH/status/start 探针运行期间并发验证容器直连 `/health` 和 `/api/tasks/overview` 仍能在 1s 内返回,证明远程超时不会复发为全站刷新卡死。
|
||||
- OpenCode 远程执行:`minimax-m2.7` 走 OpenCode JSON event port 时,本地和远程命令都必须显式执行 `opencode run ...`;远程 Docker exec 不得退化成 `exec run ...`,否则会在目标容器内变成 `bash: exec: run: not found`。OpenCode JSON stream 的终态判定以“当前进程退出码 + 当前 attempt 的最终 assistant response”为准:`exit=0` 且当前 attempt 产生非空最终回复时,即使上游没有发 `step_finish` 事件,也应视为正常 terminal;非零退出、无当前最终回复或传输关闭才进入 retry。每个 attempt 的 `finalResponse` 必须只来自当前 OpenCode/Codex turn,禁止在当前 turn 未产出最终回复时回退复用 task 上一次 `finalResponse`,否则会把旧任务内容误判为本轮完成。
|
||||
- Codex 控制:服务内部启动 `codex app-server --listen stdio://`,用 JSON-RPC 调用 `thread/start`、`turn/start`、`turn/steer` 和 `turn/interrupt`,并监听 `turn/completed`、assistant delta、reasoning delta、command output delta、file diff delta 等通知生成前端可轮询的 transcript。
|
||||
|
||||
@@ -162,6 +162,7 @@ export function sshHelp(): unknown {
|
||||
"bun scripts/cli.ts ssh D601:k3s script <<'SCRIPT'",
|
||||
"bun scripts/cli.ts ssh D601:k3s:hwlab-dev:hwlab-cloud-api/app pwd",
|
||||
"bun scripts/cli.ts ssh D601:k3s:hwlab-dev:hwlab-cloud-api apply-patch <<'PATCH'",
|
||||
"tar -C /path/to/files -cf - . | bun scripts/cli.ts ssh D601:k3s:unidesk:code-queue/root/unidesk exec --stdin -- tar -xf - -C /root/unidesk",
|
||||
"bun scripts/cli.ts ssh D601:k3s:hwlab-dev:hwlab-cloud-api node -e 'console.log(process.version)'",
|
||||
"bun scripts/cli.ts ssh D601:k3s:hwlab-dev:hwlab-cloud-api script <<'SCRIPT'",
|
||||
"bun scripts/cli.ts ssh D601:k3s:hwlab-dev:hwlab-cloud-api logs --tail 80",
|
||||
@@ -169,6 +170,7 @@ export function sshHelp(): unknown {
|
||||
notes: [
|
||||
"ssh --help and ssh <route> --help print this JSON help and never open an interactive session.",
|
||||
"For non-interactive remote commands, prefer argv for a single process and script/stdin for shell logic.",
|
||||
"For arbitrary stdin streams into a workload command, use a workload route plus `exec --stdin -- <command> ...`; this keeps the route as location-only and avoids heredoc/base64/tar shell wrapping.",
|
||||
"apply-patch rejects low-context update hunks by default, reports the matched file:line for each hunk on stderr, and only accepts --allow-loose when the caller has manually reviewed an intentionally ambiguous insertion.",
|
||||
"script defaults to target /bin/sh and inherits provider proxy variables such as HTTP_PROXY/HTTPS_PROXY/ALL_PROXY/NO_PROXY; use --shell bash only for bash syntax such as pipefail, arrays, or [[ ... ]], not as a proxy workaround.",
|
||||
"Route syntax is `{provider}:{plane}[:{scope...}] {operation} [operation-args...]`: the first argv token locates a distributed target only, and every following token belongs to the operation parser. Host workspace routes use `<provider>:/absolute/workspace`; native k3s providers such as D601 and G14 use <provider>:k3s for the control plane, <provider>:k3s:<namespace>:<workload> for a workload, and <provider>:k3s:<namespace>:<workload>/<pod-workspace> for a pod workspace.",
|
||||
|
||||
+168
-138
@@ -6,7 +6,7 @@ import { type UniDeskConfig } from "./config";
|
||||
import { type DebugDispatchCommand, isDebugDispatchCommand } from "./debug";
|
||||
import { summarizeMicroserviceHealthResponse, summarizeMicroserviceObservation, summarizeMicroserviceProxyResponse } from "./microservices";
|
||||
import { parseNetworkPerfOptions, runNetworkPerf } from "./network-perf";
|
||||
import { formatSshFailureHint, isSshSkillDiscoveryArgs, parseSshInvocation, sshFailureHint } from "./ssh";
|
||||
import { formatSshFailureHint, parseSshInvocation, sshFailureHint, wrapSshRemoteCommand } from "./ssh";
|
||||
import { codexJudgeQueryAsync, codexOutputQueryAsync, codexPrPreflightQueryAsync, codexQueuesQueryAsync, codexTaskQueryAsync, codexTasksQueryAsync, codexUnreadTriageAsync } from "./code-queue";
|
||||
import { runDecisionCenterCommandAsync } from "./decision-center";
|
||||
import {
|
||||
@@ -536,95 +536,6 @@ function jsonOption(args: string[], name: string): Record<string, unknown> | und
|
||||
return parsed as Record<string, unknown>;
|
||||
}
|
||||
|
||||
const compactSkillDiscoverPython = String.raw`import os,json,socket,platform,getpass
|
||||
from pathlib import Path as P
|
||||
S=os.getenv('S','all');L=int(os.getenv('L','0'));D=int(os.getenv('D','4'));skip={'node_modules','.git','.state','logs','references','__pycache__'}
|
||||
def isw():
|
||||
try:r=P('/proc/sys/kernel/osrelease').read_text(errors='ignore').lower()
|
||||
except Exception:r=''
|
||||
return 'microsoft' in r or 'wsl' in r or 'WSL_INTEROP' in os.environ
|
||||
def wp(p):
|
||||
s=str(p)
|
||||
return s[5].upper()+':\\'+s[7:].replace('/','\\') if s.startswith('/mnt/') and len(s)>6 and s[5].isalpha() and s[6]=='/' else None
|
||||
def md(f):
|
||||
n=f.parent.name;d=''
|
||||
try:ls=f.read_text(errors='replace')[:8192].splitlines()
|
||||
except Exception:ls=[]
|
||||
if ls and ls[0].strip()=='---':
|
||||
for l in ls[1:]:
|
||||
if l.strip()=='---':break
|
||||
if ':' in l:
|
||||
k,v=l.split(':',1);k=k.strip().lower();v=v.strip().strip('"\' ')
|
||||
if k=='name' and v:n=v
|
||||
if k=='description' and v:d=v
|
||||
if not d:
|
||||
for l in ls:
|
||||
x=l.strip()
|
||||
if x and not x.startswith('---') and not x.startswith('#'):
|
||||
d=x;break
|
||||
return n,d
|
||||
def scan(sc,root):
|
||||
rec={'scope':sc,'path':str(root),'windowsPath':wp(root),'exists':False,'skillCount':0,'error':None};out=[]
|
||||
try:rec['exists']=root.exists()
|
||||
except Exception as e:rec['error']=str(e)
|
||||
if not rec['exists']:return rec,out
|
||||
try:
|
||||
for f in root.rglob('SKILL.md'):
|
||||
rel=f.relative_to(root).parts[:-1]
|
||||
if not rel or len(rel)>D or any(x in skip for x in rel):continue
|
||||
n,d=md(f);out.append({'scope':sc,'name':n,'description':d,'path':str(f.parent),'skillMd':str(f),'windowsPath':wp(f.parent),'root':str(root)})
|
||||
except Exception as e:rec['error']=str(e)
|
||||
rec['skillCount']=len(out);return rec,out
|
||||
roots=[];h=P.home()
|
||||
if S!='windows':roots += [('wsl',h/'.agents/skills'),('wsl',h/'.codex/skills'),('wsl',P('/root/.agents/skills')),('wsl',P('/root/.codex/skills'))]
|
||||
if S!='wsl' and isw():
|
||||
try:users=list(P('/mnt/c/Users').iterdir())
|
||||
except Exception:users=[]
|
||||
for u in users:
|
||||
if u.is_dir() and u.name.lower() not in {'all users','default','default user','public'}:roots += [('windows',u/'.agents/skills'),('windows',u/'.codex/skills')]
|
||||
seen=set();rr=[];ss=[]
|
||||
for sc,r in roots:
|
||||
k=(sc,str(r))
|
||||
if k in seen:continue
|
||||
seen.add(k);a,b=scan(sc,r);rr.append(a);ss+=b
|
||||
ss.sort(key=lambda x:(0 if x['scope']=='wsl' else 1,x['name'].lower(),x['path']));tb=len(ss)
|
||||
if L>0:ss=ss[:L]
|
||||
c={'total':len(ss),'totalBeforeLimit':tb,'wsl':sum(1 for x in ss if x['scope']=='wsl'),'windows':sum(1 for x in ss if x['scope']=='windows')}
|
||||
print(json.dumps({'ok':True,'command':'unidesk ssh skills','node':{'hostname':socket.gethostname(),'user':getpass.getuser(),'home':str(P.home()),'platform':platform.platform(),'isWsl':isw()},'counts':c,'roots':rr,'skills':ss},ensure_ascii=False,indent=2))`;
|
||||
|
||||
function remoteFrontendSkillDiscoverCommand(args: string[]): string {
|
||||
let scope = "all";
|
||||
let limit = 0;
|
||||
let maxDepth = 4;
|
||||
const start = args[0] === "skill" ? 2 : 1;
|
||||
for (let index = start; index < args.length; index += 1) {
|
||||
const arg = args[index] ?? "";
|
||||
const next = args[index + 1];
|
||||
if (arg === "--scope") {
|
||||
if (next !== "all" && next !== "wsl" && next !== "windows") throw new Error("ssh skills --scope must be one of: all, wsl, windows");
|
||||
scope = next;
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
if (arg === "--limit") {
|
||||
const value = Number(next);
|
||||
if (!Number.isInteger(value) || value < 0) throw new Error("ssh skills --limit must be >= 0");
|
||||
limit = value;
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
if (arg === "--max-depth") {
|
||||
const value = Number(next);
|
||||
if (!Number.isInteger(value) || value <= 0) throw new Error("ssh skills --max-depth must be positive");
|
||||
maxDepth = value;
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
throw new Error(`remote frontend ssh skills does not support option: ${arg}`);
|
||||
}
|
||||
return `S=${shellQuote(scope)} L=${shellQuote(String(limit))} D=${shellQuote(String(maxDepth))} python3 -c ${shellQuote(compactSkillDiscoverPython)}`;
|
||||
}
|
||||
|
||||
function dispatchPayload(args: string[], command: DebugDispatchCommand): Record<string, unknown> {
|
||||
const explicit = jsonOption(args, "--payload-json") ?? {};
|
||||
if (command === "provider.upgrade") {
|
||||
@@ -951,12 +862,178 @@ async function remoteNetworkPerf(options: RemoteCliOptions, config: UniDeskConfi
|
||||
};
|
||||
}
|
||||
|
||||
function frontendSshWebSocketUrl(session: FrontendSession): string {
|
||||
const url = new URL("/ws/ssh", session.baseUrl);
|
||||
url.protocol = url.protocol === "https:" ? "wss:" : "ws:";
|
||||
return url.toString();
|
||||
}
|
||||
|
||||
function webSocketDataText(data: unknown): string {
|
||||
if (typeof data === "string") return data;
|
||||
if (data instanceof ArrayBuffer) return Buffer.from(data).toString("utf8");
|
||||
if (ArrayBuffer.isView(data)) return Buffer.from(data.buffer, data.byteOffset, data.byteLength).toString("utf8");
|
||||
return String(data);
|
||||
}
|
||||
|
||||
function openFrontendSshWebSocket(session: FrontendSession): WebSocket {
|
||||
const WebSocketWithHeaders = WebSocket as unknown as new (
|
||||
url: string,
|
||||
options?: { headers?: Record<string, string> },
|
||||
) => WebSocket;
|
||||
return new WebSocketWithHeaders(frontendSshWebSocketUrl(session), { headers: { cookie: session.cookie } });
|
||||
}
|
||||
|
||||
async function runRemoteSshWebSocket(
|
||||
session: FrontendSession,
|
||||
invocation: ReturnType<typeof parseSshInvocation>,
|
||||
): Promise<number> {
|
||||
const parsed = invocation.parsed;
|
||||
const size = {
|
||||
cols: Number(process.stdout.columns) > 0 ? Number(process.stdout.columns) : 100,
|
||||
rows: Number(process.stdout.rows) > 0 ? Number(process.stdout.rows) : 30,
|
||||
};
|
||||
const openTimeoutMs = Math.max(15000, Number(process.env.UNIDESK_SSH_OPEN_TIMEOUT_MS || 60000));
|
||||
const payload = {
|
||||
providerId: invocation.providerId,
|
||||
command: wrapSshRemoteCommand(parsed.remoteCommand),
|
||||
cwd: invocation.route.plane === "host" ? invocation.route.workspace ?? undefined : undefined,
|
||||
tty: parsed.remoteCommand === null,
|
||||
stdinEotOnEnd: parsed.remoteCommand !== null,
|
||||
openTimeoutMs,
|
||||
cols: size.cols,
|
||||
rows: size.rows,
|
||||
};
|
||||
const ws = openFrontendSshWebSocket(session);
|
||||
let exitCode = 255;
|
||||
let settled = false;
|
||||
let canSend = false;
|
||||
let sessionReady = false;
|
||||
const pending: string[] = [];
|
||||
const pendingSessionMessages: string[] = [];
|
||||
|
||||
const send = (value: unknown): void => {
|
||||
const text = JSON.stringify(value);
|
||||
if (!canSend || ws.readyState !== WebSocket.OPEN) {
|
||||
pending.push(text);
|
||||
return;
|
||||
}
|
||||
ws.send(text);
|
||||
};
|
||||
const flush = (): void => {
|
||||
while (pending.length > 0 && ws.readyState === WebSocket.OPEN) ws.send(pending.shift()!);
|
||||
};
|
||||
const sendInput = (value: Buffer | string): void => {
|
||||
sendWhenSessionReady({ type: "ssh.input", data: Buffer.from(value).toString("base64"), encoding: "base64" });
|
||||
};
|
||||
const sendWhenSessionReady = (value: unknown): void => {
|
||||
const text = JSON.stringify(value);
|
||||
if (!sessionReady || ws.readyState !== WebSocket.OPEN) {
|
||||
pendingSessionMessages.push(text);
|
||||
return;
|
||||
}
|
||||
ws.send(text);
|
||||
};
|
||||
const flushSessionMessages = (): void => {
|
||||
if (!sessionReady || ws.readyState !== WebSocket.OPEN) return;
|
||||
while (pendingSessionMessages.length > 0) ws.send(pendingSessionMessages.shift()!);
|
||||
};
|
||||
|
||||
return await new Promise<number>((resolve) => {
|
||||
const rawMode = parsed.remoteCommand === null && process.stdin.isTTY && typeof process.stdin.setRawMode === "function";
|
||||
const openTimer = setTimeout(() => {
|
||||
if (sessionReady || settled) return;
|
||||
process.stderr.write("unidesk remote frontend ssh bridge timed out waiting for provider session\n");
|
||||
exitCode = 255;
|
||||
try {
|
||||
ws.close();
|
||||
} catch {
|
||||
// Ignore close failures while resolving the timeout path.
|
||||
}
|
||||
}, openTimeoutMs);
|
||||
|
||||
const restore = (): void => {
|
||||
clearTimeout(openTimer);
|
||||
process.stdin.off("data", onStdinData);
|
||||
process.stdin.off("end", onStdinEnd);
|
||||
if (rawMode) process.stdin.setRawMode(false);
|
||||
};
|
||||
const finish = (code: number): void => {
|
||||
if (settled) return;
|
||||
settled = true;
|
||||
restore();
|
||||
const hint = sshFailureHint(invocation.providerId, parsed, code, "");
|
||||
if (hint !== null) process.stderr.write(formatSshFailureHint(hint));
|
||||
resolve(code);
|
||||
};
|
||||
const onStdinData = (chunk: Buffer): void => {
|
||||
sendInput(chunk);
|
||||
};
|
||||
const onStdinEnd = (): void => {
|
||||
if (parsed.stdinSuffix) sendInput(parsed.stdinSuffix);
|
||||
if (payload.stdinEotOnEnd === true) sendInput(Buffer.from([4]));
|
||||
sendWhenSessionReady({ type: "ssh.eof" });
|
||||
};
|
||||
|
||||
ws.addEventListener("open", () => {
|
||||
canSend = true;
|
||||
send({ type: "ssh.open", ...payload });
|
||||
flush();
|
||||
});
|
||||
ws.addEventListener("message", (event) => {
|
||||
const text = webSocketDataText(event.data);
|
||||
let message: Record<string, unknown>;
|
||||
try {
|
||||
message = JSON.parse(text) as Record<string, unknown>;
|
||||
} catch {
|
||||
process.stderr.write(`${text}\n`);
|
||||
return;
|
||||
}
|
||||
if (message.type === "ssh.dispatched") return;
|
||||
if (message.type === "ssh.opened") {
|
||||
sessionReady = true;
|
||||
clearTimeout(openTimer);
|
||||
flushSessionMessages();
|
||||
return;
|
||||
}
|
||||
if (message.type === "ssh.data") {
|
||||
const chunk = Buffer.from(String(message.data ?? ""), message.encoding === "base64" ? "base64" : "utf8");
|
||||
if (message.stream === "stderr") process.stderr.write(chunk);
|
||||
else process.stdout.write(chunk);
|
||||
return;
|
||||
}
|
||||
if (message.type === "ssh.error") {
|
||||
process.stderr.write(`${String(message.message || "ssh bridge error")}\n`);
|
||||
exitCode = 255;
|
||||
ws.close();
|
||||
return;
|
||||
}
|
||||
if (message.type === "ssh.exit") {
|
||||
exitCode = Number.isInteger(message.exitCode) ? Number(message.exitCode) : 255;
|
||||
ws.close();
|
||||
}
|
||||
});
|
||||
ws.addEventListener("close", () => finish(exitCode));
|
||||
ws.addEventListener("error", () => {
|
||||
process.stderr.write("unidesk remote frontend ssh bridge websocket error\n");
|
||||
finish(255);
|
||||
});
|
||||
|
||||
if (rawMode) process.stdin.setRawMode(true);
|
||||
process.stdin.resume();
|
||||
if (parsed.stdinPrefix) sendInput(parsed.stdinPrefix);
|
||||
process.stdin.on("data", onStdinData);
|
||||
process.stdin.on("end", onStdinEnd);
|
||||
});
|
||||
}
|
||||
|
||||
export function remoteSshFrontendPlanForTest(target: string, args: string[]): Record<string, unknown> {
|
||||
const invocation = parseSshInvocation(target, args);
|
||||
return {
|
||||
transport: "frontend-websocket",
|
||||
providerId: invocation.providerId,
|
||||
route: invocation.route,
|
||||
remoteCommand: invocation.parsed.remoteCommand,
|
||||
wrappedRemoteCommand: wrapSshRemoteCommand(invocation.parsed.remoteCommand),
|
||||
requiresStdin: invocation.parsed.requiresStdin,
|
||||
invocationKind: invocation.parsed.invocationKind,
|
||||
payloadCwd: invocation.route.plane === "host" ? invocation.route.workspace : null,
|
||||
@@ -966,54 +1043,7 @@ export function remoteSshFrontendPlanForTest(target: string, args: string[]): Re
|
||||
async function runRemoteSshOverFrontend(session: FrontendSession, target: string | undefined, args: string[]): Promise<number> {
|
||||
if (!target) throw new Error("remote ssh requires a route, for example: bun scripts/cli.ts --main-server-ip 74.48.78.17 ssh D601 hostname");
|
||||
const invocation = parseSshInvocation(target, args);
|
||||
const parsed = invocation.parsed;
|
||||
if (parsed.requiresStdin) {
|
||||
process.stderr.write("remote frontend transport does not stream stdin for ssh helper subcommands such as script, apply-patch or py; run the command on the main server, use --main-server-transport ssh, or use an argv/pod-route operation that does not need stdin\n");
|
||||
return 255;
|
||||
}
|
||||
if (parsed.remoteCommand === null) {
|
||||
process.stderr.write("remote frontend transport supports ssh remote commands only; pass a command such as: ssh D601 hostname\n");
|
||||
return 255;
|
||||
}
|
||||
if (args[0] === "glob") {
|
||||
process.stderr.write("remote frontend transport does not support the ssh glob helper because host.ssh exec has a short command-length limit; run it on the main server CLI instead\n");
|
||||
return 255;
|
||||
}
|
||||
const remoteCommand = isSshSkillDiscoveryArgs(args) ? remoteFrontendSkillDiscoverCommand(args) : parsed.remoteCommand;
|
||||
const dispatch = await frontendJson(session, "/api/dispatch", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({
|
||||
providerId: invocation.providerId,
|
||||
command: "host.ssh",
|
||||
payload: {
|
||||
source: "cli-remote-ssh",
|
||||
mode: "exec",
|
||||
command: remoteCommand,
|
||||
...(invocation.route.plane === "host" && invocation.route.workspace !== null ? { cwd: invocation.route.workspace } : {}),
|
||||
timeoutMs: isSshSkillDiscoveryArgs(args) ? 30000 : 15000,
|
||||
},
|
||||
}),
|
||||
});
|
||||
const taskId = (dispatch as { body?: { taskId?: string } }).body?.taskId ?? "";
|
||||
if (!dispatch.ok || taskId.length === 0) {
|
||||
process.stderr.write(`${JSON.stringify(dispatch, null, 2)}\n`);
|
||||
return 255;
|
||||
}
|
||||
const wait = await waitForFrontendTask(session, taskId, 20_000);
|
||||
const task = (wait as { task?: { status?: string; result?: Record<string, unknown>; message?: string } }).task;
|
||||
const result = task?.result ?? {};
|
||||
const stdout = typeof result.stdout === "string" ? result.stdout : "";
|
||||
const stderr = typeof result.stderr === "string" ? result.stderr : "";
|
||||
if (stdout.length > 0) process.stdout.write(stdout);
|
||||
if (stderr.length > 0) process.stderr.write(stderr);
|
||||
if (task?.status !== "succeeded") {
|
||||
if (stdout.length === 0 && stderr.length === 0) process.stderr.write(`${JSON.stringify({ taskId, task }, null, 2)}\n`);
|
||||
const exitCode = typeof result.exitCode === "number" ? result.exitCode : 255;
|
||||
const hint = sshFailureHint(invocation.providerId, parsed, exitCode, stderr.length > 0 ? stderr : String(task?.message ?? ""));
|
||||
if (hint !== null) process.stderr.write(formatSshFailureHint(hint));
|
||||
return exitCode;
|
||||
}
|
||||
return typeof result.exitCode === "number" ? result.exitCode : 0;
|
||||
return runRemoteSshWebSocket(session, invocation);
|
||||
}
|
||||
|
||||
async function runRemoteCliOverFrontend(options: RemoteCliOptions, config: UniDeskConfig): Promise<number> {
|
||||
|
||||
+32
-2
@@ -39,7 +39,7 @@ export interface SshFailureHint {
|
||||
note: string;
|
||||
}
|
||||
|
||||
const argvQuotedSshSubcommands = new Set(["rg", "grep", "sed", "nl", "stat", "du", "ls", "cat", "head", "tail", "wc", "pwd"]);
|
||||
const argvQuotedSshSubcommands = new Set(["git", "rg", "grep", "sed", "nl", "stat", "du", "ls", "cat", "head", "tail", "wc", "pwd"]);
|
||||
const nativeK3sKubeconfig = "/etc/rancher/k3s/k3s.yaml";
|
||||
const k3sResourceKindAliases = new Set(["pod", "po", "pods", "deployment", "deploy", "deployments", "statefulset", "sts", "daemonset", "ds", "job", "jobs"]);
|
||||
const legacyK3sOperationRouteSegments = new Set([
|
||||
@@ -1018,7 +1018,12 @@ function parseK3sTargetOperation(route: ParsedSshRoute, args: string[]): ParsedS
|
||||
}
|
||||
if (operation === "kubectl") throw new Error(`ssh k3s kubectl is a control-plane operation; use ssh ${route.providerId}:k3s kubectl ...`);
|
||||
if (operation === "exec") {
|
||||
return { remoteCommand: buildK3sExecCommand([...targetArgs, ...k3sRouteCommandArgs(operationArgs)]), requiresStdin: false, invocationKind: "helper" };
|
||||
const execArgs = k3sRouteExecOperationArgs(operationArgs);
|
||||
return {
|
||||
remoteCommand: buildK3sExecCommand([...targetArgs, ...execArgs]),
|
||||
requiresStdin: execArgs.includes("--stdin") || execArgs.includes("-i"),
|
||||
invocationKind: "helper",
|
||||
};
|
||||
}
|
||||
return { remoteCommand: buildK3sExecCommand([...targetArgs, ...k3sRouteCommandArgs(args)]), requiresStdin: false, invocationKind: "helper" };
|
||||
}
|
||||
@@ -1045,6 +1050,31 @@ function k3sRouteCommandArgs(args: string[]): string[] {
|
||||
return args[0] === "--" ? args : ["--", ...args];
|
||||
}
|
||||
|
||||
function k3sRouteExecOperationArgs(args: string[]): string[] {
|
||||
if (args.length === 0) throw new Error("ssh k3s target exec operation requires a command to exec");
|
||||
const result: string[] = [];
|
||||
for (let index = 0; index < args.length; index += 1) {
|
||||
const arg = args[index] ?? "";
|
||||
if (arg === "--") {
|
||||
if (index === args.length - 1) throw new Error("ssh k3s target exec operation requires a command after --");
|
||||
return [...result, ...args.slice(index)];
|
||||
}
|
||||
if (arg === "--stdin" || arg === "-i" || arg === "--tty" || arg === "-t") {
|
||||
result.push(arg);
|
||||
continue;
|
||||
}
|
||||
if (arg === "--container" || arg === "-c" || arg === "--workdir" || arg === "--cwd") {
|
||||
const value = args[index + 1];
|
||||
if (value === undefined || value.length === 0) throw new Error(`ssh k3s target exec ${arg} requires a value`);
|
||||
result.push(arg, value);
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
return [...result, "--", ...args.slice(index)];
|
||||
}
|
||||
throw new Error("ssh k3s target exec operation requires a command to exec");
|
||||
}
|
||||
|
||||
function buildK3sCommand(providerId: string, args: string[]): string {
|
||||
const action = args[0] ?? "";
|
||||
if (action.length === 0 || action === "--help" || action === "-h" || action === "help") {
|
||||
|
||||
@@ -249,6 +249,10 @@ export function runSshArgvGuidanceContract(): JsonRecord {
|
||||
assertCondition(routeApplyPatchWorkspace.parsed.requiresStdin === true, "pod workspace apply-patch must still stream patch stdin", routeApplyPatchWorkspace);
|
||||
assertCondition(routeApplyPatchWorkspace.parsed.remoteCommand === "'env' 'KUBECONFIG=/etc/rancher/k3s/k3s.yaml' 'kubectl' 'exec' '-i' '-n' 'hwlab-dev' 'deployment/hwlab-cloud-api' '--' 'sh' '-c' 'cd \"$1\" || exit; shift; exec \"$@\"' 'unidesk-cwd' '/app' 'sh' '-s' '--'", "pod workspace apply-patch must set cwd before injecting the sh helper", routeApplyPatchWorkspace);
|
||||
|
||||
const routeExecStdin = parseSshInvocation("D601:k3s:unidesk:code-queue/root/unidesk", ["exec", "--stdin", "--", "tar", "-xf", "-", "-C", "/root/unidesk"]);
|
||||
assertCondition(routeExecStdin.parsed.requiresStdin === true, "pod route exec --stdin must stream local stdin into kubectl exec", routeExecStdin);
|
||||
assertCondition(routeExecStdin.parsed.remoteCommand === "'env' 'KUBECONFIG=/etc/rancher/k3s/k3s.yaml' 'kubectl' 'exec' '-n' 'unidesk' 'deployment/code-queue' '-i' '--' 'sh' '-c' 'cd \"$1\" || exit; shift; exec \"$@\"' 'unidesk-cwd' '/root/unidesk' 'tar' '-xf' '-' '-C' '/root/unidesk'", "pod route exec --stdin must keep exec flags before -- and command argv after --", routeExecStdin);
|
||||
|
||||
const sshLike = parseSshArgs(["echo hello"]);
|
||||
const hint = sshFailureHint("D601", sshLike, 255, "kex_exchange_identification: Connection closed by remote host");
|
||||
assertCondition(hint !== null, "ssh-like kex failure must produce a hint", sshLike);
|
||||
@@ -271,6 +275,7 @@ export function runSshArgvGuidanceContract(): JsonRecord {
|
||||
assertCondition(helpText.includes("ssh D601:k3s:hwlab-dev:hwlab-cloud-api/app pwd"), "ssh help must document k3s pod workspace route", helpText);
|
||||
assertCondition(helpText.includes("ssh D601:k3s script <<'SCRIPT'"), "ssh help must document k3s control-plane script operation", helpText);
|
||||
assertCondition(helpText.includes("ssh D601:k3s:hwlab-dev:hwlab-cloud-api apply-patch <<'PATCH'"), "ssh help must document k3s pod apply-patch operation", helpText);
|
||||
assertCondition(helpText.includes("ssh D601:k3s:unidesk:code-queue/root/unidesk exec --stdin -- tar -xf - -C /root/unidesk"), "ssh help must document one-step stdin file streaming into pod exec", helpText);
|
||||
assertCondition(helpText.includes("apply-patch [--allow-loose]") && helpText.includes("low-context update hunks"), "ssh help must document apply-patch loose-context guard", helpText);
|
||||
assertCondition(helpText.includes("ssh D601:k3s:hwlab-dev:hwlab-cloud-api script <<'SCRIPT'"), "ssh help must document k3s script operation", helpText);
|
||||
assertCondition(helpText.includes("UNIDESK_SSH_HINT"), "ssh help must document structured failure hint", helpText);
|
||||
@@ -279,8 +284,10 @@ export function runSshArgvGuidanceContract(): JsonRecord {
|
||||
assertCondition(crossChecks.includes("bun scripts/cli.ts ssh D601 argv true"), "provider triage cross-checks must keep argv true", crossChecks);
|
||||
|
||||
const frontendRemoteK3sPlan = remoteSshFrontendPlanForTest("D601:k3s", ["kubectl", "get", "nodes", "-o", "name"]);
|
||||
assertCondition(frontendRemoteK3sPlan.transport === "frontend-websocket", "remote frontend ssh must use the streaming websocket bridge", frontendRemoteK3sPlan);
|
||||
assertCondition(frontendRemoteK3sPlan.providerId === "D601", "remote frontend ssh must dispatch route target to the provider id", frontendRemoteK3sPlan);
|
||||
assertCondition(frontendRemoteK3sPlan.remoteCommand === "'env' 'KUBECONFIG=/etc/rancher/k3s/k3s.yaml' 'kubectl' 'get' 'nodes' '-o' 'name'", "remote frontend ssh must preserve k3s route command construction", frontendRemoteK3sPlan);
|
||||
assertCondition(String(frontendRemoteK3sPlan.wrappedRemoteCommand ?? "").includes("UNIDESK_SSH_TOOL_DIR=/tmp/unidesk-ssh-tools"), "remote frontend ssh must use the same remote tool bootstrap as local ssh", frontendRemoteK3sPlan);
|
||||
|
||||
const frontendRemotePodArgvPlan = remoteSshFrontendPlanForTest("G14:k3s:unidesk:code-queue", ["argv", "sh", "-c", "command -v tran"]);
|
||||
assertCondition(frontendRemotePodArgvPlan.providerId === "G14", "remote frontend pod route must dispatch through G14 provider", frontendRemotePodArgvPlan);
|
||||
@@ -296,6 +303,20 @@ export function runSshArgvGuidanceContract(): JsonRecord {
|
||||
|
||||
const remoteSource = readFileSync(new URL("./src/remote.ts", import.meta.url), "utf8");
|
||||
assertCondition(remoteSource.includes("UNIDESK_REMOTE_HTTP_CLIENT") && remoteSource.includes("isCodeQueueRunnerEnv(env) ? \"curl\" : \"fetch\""), "remote frontend transport must default to curl HTTP in Code Queue runner environments", remoteSource);
|
||||
assertCondition(remoteSource.includes("frontendSshWebSocketUrl") && remoteSource.includes("runRemoteSshWebSocket"), "remote frontend ssh must go through the streaming websocket implementation", remoteSource);
|
||||
assertCondition(!remoteSource.includes("remote frontend transport does not stream stdin"), "remote frontend ssh must not reject stdin-backed helpers", remoteSource);
|
||||
assertCondition(!remoteSource.includes("source: \"cli-remote-ssh\""), "remote frontend ssh must not use host.ssh dispatch task polling", remoteSource);
|
||||
|
||||
const frontendSource = readFileSync(new URL("../src/components/frontend/src/index.ts", import.meta.url), "utf8");
|
||||
assertCondition(frontendSource.includes('url.pathname === "/ws/ssh"') && frontendSource.includes("proxySshWebSocket"), "frontend must expose an authenticated /ws/ssh proxy", frontendSource);
|
||||
assertCondition(frontendSource.includes("coreSshWebSocketUrl") && frontendSource.includes('url.searchParams.set("token"'), "frontend /ws/ssh proxy must connect to backend-core ssh bridge with the provider token", frontendSource);
|
||||
assertCondition(frontendSource.includes("PROVIDER_TOKEN_FILE") && frontendSource.includes("/run/secrets/unidesk_provider_token"), "frontend ssh proxy must support file-based provider token injection for runtime hotfix and secret mounts", frontendSource);
|
||||
|
||||
const composeSource = readFileSync(new URL("../docker-compose.yml", import.meta.url), "utf8");
|
||||
assertCondition(composeSource.includes('PROVIDER_TOKEN: "${UNIDESK_PROVIDER_TOKEN}"'), "frontend compose service must receive provider token for the ssh proxy", composeSource);
|
||||
|
||||
const devCoreManifest = readFileSync(new URL("../src/components/microservices/k3sctl-adapter/k3s/dev/unidesk-dev-core.k8s.yaml", import.meta.url), "utf8");
|
||||
assertCondition(devCoreManifest.includes("name: frontend-dev") && devCoreManifest.includes("name: PROVIDER_TOKEN"), "dev frontend manifest must receive provider token for the ssh proxy", devCoreManifest);
|
||||
|
||||
const codeQueueDockerfile = readFileSync(new URL("../src/components/microservices/code-queue/Dockerfile", import.meta.url), "utf8");
|
||||
assertCondition(codeQueueDockerfile.includes("COPY scripts/tran /usr/local/bin/tran") && codeQueueDockerfile.includes("chmod 755 /usr/local/bin/tran"), "Code Queue runner image must install tran on PATH", codeQueueDockerfile);
|
||||
@@ -306,6 +327,7 @@ export function runSshArgvGuidanceContract(): JsonRecord {
|
||||
"argv form is classified and quoted as the success path for non-interactive commands",
|
||||
"stdin script form removes shell-command strings for host and k3s workload scripts",
|
||||
"pod apply-patch operation injects helper and forwards patch stdin",
|
||||
"pod exec --stdin streams arbitrary local stdin through workload routes without shell wrapping",
|
||||
"apply-patch uses one sh helper for host and pod paths and rejects low-context hunks unless --allow-loose is explicit",
|
||||
"legacy operation-in-route forms are rejected in any k3s route segment with canonical route-plus-operation guidance",
|
||||
"post-provider k3s shorthand is rejected so location and operation stay separated",
|
||||
@@ -314,8 +336,9 @@ export function runSshArgvGuidanceContract(): JsonRecord {
|
||||
"help text documents stdin script passthrough and UNIDESK_SSH_HINT",
|
||||
"provider triage recommendedCrossChecks keeps ssh D601 argv true",
|
||||
"remote frontend ssh uses the same structured route parser for host, k3s and pod argv routes",
|
||||
"remote frontend ssh uses authenticated /ws/ssh streaming instead of host.ssh dispatch task polling",
|
||||
"Code Queue runner image installs the tran wrapper and runner tran auto-selects remote frontend transport",
|
||||
"Code Queue runner remote frontend HTTP uses curl by default to avoid Bun response-body native crashes",
|
||||
"Code Queue runner remote frontend HTTP uses curl by default for non-ssh API calls to avoid Bun response-body native crashes",
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { readFileSync } from "node:fs";
|
||||
import { createHmac, randomBytes, timingSafeEqual } from "node:crypto";
|
||||
import { join } from "node:path";
|
||||
import type { Server, ServerWebSocket } from "bun";
|
||||
import { createHourlyJsonlWriter, logRetentionBytesForService } from "../../shared/src/rotating-jsonl";
|
||||
import { notificationStyles } from "./notification-styles";
|
||||
|
||||
@@ -11,6 +12,7 @@ interface RuntimeConfig {
|
||||
providerIngressPublicUrl: string;
|
||||
authUsername: string;
|
||||
authPassword: string;
|
||||
providerToken: string | null;
|
||||
sessionSecret: string;
|
||||
sessionTtlSeconds: number;
|
||||
logFile: string;
|
||||
@@ -61,6 +63,13 @@ interface OperationPerformanceSample {
|
||||
detail: string;
|
||||
}
|
||||
|
||||
interface FrontendWsData {
|
||||
channel: "ssh-proxy";
|
||||
upstream: WebSocket | null;
|
||||
upstreamOpen: boolean;
|
||||
pendingToUpstream: string[];
|
||||
}
|
||||
|
||||
const sessionCookieName = "unidesk_session";
|
||||
const config = readConfig();
|
||||
const logger = createLogger("frontend", config.logFile);
|
||||
@@ -142,6 +151,25 @@ function requiredEnv(name: string): string {
|
||||
return value;
|
||||
}
|
||||
|
||||
function optionalEnv(name: string): string | null {
|
||||
const value = process.env[name]?.trim() ?? "";
|
||||
return value.length > 0 ? value : null;
|
||||
}
|
||||
|
||||
function optionalFile(path: string): string | null {
|
||||
try {
|
||||
const value = readFileSync(path, "utf8").trim();
|
||||
return value.length > 0 ? value : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function optionalFileEnv(name: string, fallbackPath: string | null = null): string | null {
|
||||
const path = optionalEnv(name) ?? fallbackPath;
|
||||
return path === null ? null : optionalFile(path);
|
||||
}
|
||||
|
||||
function readNumberEnv(name: string): number {
|
||||
const raw = requiredEnv(name);
|
||||
const parsed = Number(raw);
|
||||
@@ -169,6 +197,10 @@ function readConfig(): RuntimeConfig {
|
||||
providerIngressPublicUrl: requiredEnv("PROVIDER_INGRESS_PUBLIC_URL"),
|
||||
authUsername: requiredEnv("AUTH_USERNAME"),
|
||||
authPassword: requiredEnv("AUTH_PASSWORD"),
|
||||
providerToken: optionalEnv("PROVIDER_TOKEN")
|
||||
?? optionalEnv("UNIDESK_PROVIDER_TOKEN")
|
||||
?? optionalFileEnv("PROVIDER_TOKEN_FILE", "/run/secrets/unidesk_provider_token")
|
||||
?? optionalFileEnv("UNIDESK_PROVIDER_TOKEN_FILE", "/tmp/unidesk-provider-token"),
|
||||
sessionSecret: requiredEnv("SESSION_SECRET"),
|
||||
sessionTtlSeconds: readNumberEnv("SESSION_TTL_SECONDS"),
|
||||
logFile: requiredEnv("LOG_FILE"),
|
||||
@@ -744,6 +776,68 @@ async function proxyApi(req: Request, url: URL): Promise<Response> {
|
||||
return new Response(await upstream.arrayBuffer(), { status: upstream.status, headers: responseHeaders });
|
||||
}
|
||||
|
||||
function coreSshWebSocketUrl(): string {
|
||||
const url = new URL("/ws/ssh", config.coreInternalUrl);
|
||||
url.protocol = url.protocol === "https:" ? "wss:" : "ws:";
|
||||
url.searchParams.set("token", config.providerToken ?? "");
|
||||
return url.toString();
|
||||
}
|
||||
|
||||
function webSocketDataText(message: string | Buffer): string {
|
||||
return typeof message === "string" ? message : Buffer.from(message).toString("utf8");
|
||||
}
|
||||
|
||||
function flushSshProxyPending(ws: ServerWebSocket<FrontendWsData>): void {
|
||||
const upstream = ws.data.upstream;
|
||||
if (upstream === null || !ws.data.upstreamOpen || upstream.readyState !== WebSocket.OPEN) return;
|
||||
while (ws.data.pendingToUpstream.length > 0) upstream.send(ws.data.pendingToUpstream.shift()!);
|
||||
}
|
||||
|
||||
function closeSshProxy(ws: ServerWebSocket<FrontendWsData>, code = 1000, reason = "ssh proxy closed"): void {
|
||||
if (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING) {
|
||||
ws.close(code, reason);
|
||||
}
|
||||
}
|
||||
|
||||
function openSshProxyUpstream(ws: ServerWebSocket<FrontendWsData>): void {
|
||||
const upstream = new WebSocket(coreSshWebSocketUrl());
|
||||
ws.data.upstream = upstream;
|
||||
upstream.addEventListener("open", () => {
|
||||
ws.data.upstreamOpen = true;
|
||||
flushSshProxyPending(ws);
|
||||
});
|
||||
upstream.addEventListener("message", (event) => {
|
||||
if (ws.readyState === WebSocket.OPEN) ws.send(event.data as string | Buffer);
|
||||
});
|
||||
upstream.addEventListener("close", (event) => {
|
||||
closeSshProxy(ws, event.code || 1000, event.reason || "ssh upstream closed");
|
||||
});
|
||||
upstream.addEventListener("error", () => {
|
||||
if (ws.readyState === WebSocket.OPEN) {
|
||||
ws.send(JSON.stringify({ type: "ssh.error", message: "frontend ssh upstream websocket error" }));
|
||||
}
|
||||
closeSshProxy(ws, 1011, "ssh upstream websocket error");
|
||||
});
|
||||
}
|
||||
|
||||
function proxySshWebSocket(req: Request, server: Server<FrontendWsData>): Response | undefined {
|
||||
if (sessionFromRequest(req) === null) {
|
||||
return jsonResponse({ ok: false, error: "authentication required" }, 401);
|
||||
}
|
||||
if (config.providerToken === null) {
|
||||
return jsonResponse({ ok: false, error: "frontend ssh proxy is missing provider token" }, 503);
|
||||
}
|
||||
const upgraded = server.upgrade(req, {
|
||||
data: {
|
||||
channel: "ssh-proxy",
|
||||
upstream: null,
|
||||
upstreamOpen: false,
|
||||
pendingToUpstream: [],
|
||||
} satisfies FrontendWsData,
|
||||
});
|
||||
return upgraded ? undefined : jsonResponse({ ok: false, error: "websocket upgrade failed" }, 400);
|
||||
}
|
||||
|
||||
function vendorPath(pathname: string): string | null {
|
||||
if (pathname === "/vendor/react.production.min.js") return join(vendorDir, "react", "umd", "react.production.min.js");
|
||||
if (pathname === "/vendor/react-dom.production.min.js") return join(vendorDir, "react-dom", "umd", "react-dom.production.min.js");
|
||||
@@ -768,7 +862,7 @@ function isStaticAssetPath(pathname: string): boolean {
|
||||
return /\/[^/]+\.[a-z0-9]+$/iu.test(pathname);
|
||||
}
|
||||
|
||||
async function handleRequest(req: Request): Promise<Response> {
|
||||
async function handleRequest(req: Request, server: Server<FrontendWsData>): Promise<Response | undefined> {
|
||||
const url = new URL(req.url);
|
||||
logger("debug", "request", { path: url.pathname });
|
||||
try {
|
||||
@@ -779,6 +873,7 @@ async function handleRequest(req: Request): Promise<Response> {
|
||||
if (url.pathname === "/logout" && req.method === "POST") return logout();
|
||||
if (url.pathname === "/api/session") return sessionResponse(req);
|
||||
if (url.pathname === "/api/frontend-performance") return frontendPerformanceResponse(req);
|
||||
if (url.pathname === "/ws/ssh") return proxySshWebSocket(req, server);
|
||||
if (url.pathname.startsWith("/api/") || url.pathname === "/logs") return proxyApi(req, url);
|
||||
if (url.pathname === "/docs" || url.pathname.startsWith("/docs/")) return docsResponse(req, url);
|
||||
if (url.pathname === "/" || url.pathname === "/index.html") {
|
||||
@@ -798,21 +893,39 @@ async function handleRequest(req: Request): Promise<Response> {
|
||||
}
|
||||
}
|
||||
|
||||
const server = Bun.serve({
|
||||
const server = Bun.serve<FrontendWsData>({
|
||||
port: config.port,
|
||||
hostname: "0.0.0.0",
|
||||
idleTimeout: 120,
|
||||
async fetch(req) {
|
||||
async fetch(req, server) {
|
||||
const started = performance.now();
|
||||
const url = new URL(req.url);
|
||||
let response: Response | undefined;
|
||||
try {
|
||||
response = await handleRequest(req);
|
||||
response = await handleRequest(req, server);
|
||||
return response;
|
||||
} finally {
|
||||
recordRequestPerformance(req, url.pathname, response, performance.now() - started);
|
||||
}
|
||||
},
|
||||
websocket: {
|
||||
open(ws) {
|
||||
if (ws.data.channel === "ssh-proxy") openSshProxyUpstream(ws);
|
||||
},
|
||||
message(ws, message) {
|
||||
if (ws.data.channel !== "ssh-proxy") return;
|
||||
ws.data.pendingToUpstream.push(webSocketDataText(message));
|
||||
flushSshProxyPending(ws);
|
||||
},
|
||||
close(ws) {
|
||||
if (ws.data.channel !== "ssh-proxy") return;
|
||||
try {
|
||||
ws.data.upstream?.close();
|
||||
} catch {
|
||||
// Closing an already-closed upstream socket is harmless.
|
||||
}
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
logger("info", "server_listening", {
|
||||
|
||||
@@ -233,6 +233,11 @@ spec:
|
||||
secretKeyRef:
|
||||
name: unidesk-dev-runtime-secrets
|
||||
key: AUTH_PASSWORD
|
||||
- name: PROVIDER_TOKEN
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: unidesk-dev-runtime-secrets
|
||||
key: PROVIDER_TOKEN
|
||||
- name: SESSION_SECRET
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
|
||||
Reference in New Issue
Block a user