From 5dba1a06bedd4d99c2e8778f1a0d98248d4f99da Mon Sep 17 00:00:00 2001 From: Codex Date: Tue, 2 Jun 2026 08:24:54 +0000 Subject: [PATCH] fix: avoid ssh stream timeout in artifact deploy --- ...fact-registry-ssh-timeout-contract-test.ts | 24 +++ scripts/src/artifact-registry.ts | 149 +++++++++++++++--- 2 files changed, 153 insertions(+), 20 deletions(-) create mode 100644 scripts/artifact-registry-ssh-timeout-contract-test.ts diff --git a/scripts/artifact-registry-ssh-timeout-contract-test.ts b/scripts/artifact-registry-ssh-timeout-contract-test.ts new file mode 100644 index 00000000..76a7ebb6 --- /dev/null +++ b/scripts/artifact-registry-ssh-timeout-contract-test.ts @@ -0,0 +1,24 @@ +import { readFileSync } from "node:fs"; +import { rootPath } from "./src/config"; + +const source = readFileSync(rootPath("scripts/src/artifact-registry.ts"), "utf8"); + +function assertCondition(condition: unknown, message: string): void { + if (!condition) throw new Error(message); +} + +assertCondition(!source.includes('docker save "$image" | gzip -1"'), "artifact registry must not stream docker save over ssh stdout"); +assertCondition(source.includes("downloadRemoteFile(options, remoteArchive, localArchive"), "compose artifact pull must use verified ssh download"); +assertCondition(source.includes("runRemoteScriptBackground(options, remoteScript"), "remote docker save must run as a background job"); +assertCondition(source.includes('runRemoteScriptBackground(options, deployScript, Math.max(options.timeoutMs, 420_000), "d601-k3s-deploy")'), "D601 k3s deploy must use background polling"); +assertCondition(source.includes('"ssh",\n options.providerId,\n "download"'), "download helper must route through UniDesk ssh download"); + +console.log(JSON.stringify({ + ok: true, + test: "artifact-registry-ssh-timeout-contract", + assertions: [ + "no docker-save stdout stream over ssh", + "compose artifact uses verified ssh download", + "remote docker save and k3s deploy use background polling" + ] +}, null, 2)); diff --git a/scripts/src/artifact-registry.ts b/scripts/src/artifact-registry.ts index 14cdf5a9..95013de3 100644 --- a/scripts/src/artifact-registry.ts +++ b/scripts/src/artifact-registry.ts @@ -1,5 +1,5 @@ import { createHash } from "node:crypto"; -import { existsSync, readFileSync, writeFileSync } from "node:fs"; +import { existsSync, readFileSync, rmSync, writeFileSync } from "node:fs"; import { join } from "node:path"; import { runCommand, type CommandResult } from "./command"; import { readConfig, type UniDeskConfig, repoRoot, rootPath } from "./config"; @@ -1656,6 +1656,103 @@ function runRemoteScript(options: ArtifactRegistryOptions, script: string, timeo return runCommand(command, repoRoot, { timeoutMs }); } +function combineCommandResults(command: string[], parts: CommandResult[]): CommandResult { + const failed = parts.find((part) => part.exitCode !== 0 || part.timedOut) ?? parts.at(-1) ?? null; + return { + command, + cwd: repoRoot, + exitCode: failed === null ? 0 : failed.exitCode, + signal: failed?.signal ?? null, + timedOut: parts.some((part) => part.timedOut), + stdout: parts.map((part) => part.stdout).filter((part) => part.length > 0).join("\n"), + stderr: parts.map((part) => part.stderr).filter((part) => part.length > 0).join("\n"), + }; +} + +function downloadRemoteFile(options: ArtifactRegistryOptions, remotePath: string, localPath: string, timeoutMs = options.timeoutMs): CommandResult { + return runCommand([ + process.execPath, + "scripts/cli.ts", + "ssh", + options.providerId, + "download", + remotePath, + localPath, + ], repoRoot, { timeoutMs }); +} + +async function runRemoteScriptBackground( + options: ArtifactRegistryOptions, + script: string, + timeoutMs: number, + name: string, +): Promise { + const token = `${Date.now().toString(36)}-${Math.random().toString(16).slice(2, 8)}`; + const prefix = `/tmp/unidesk-artifact-${safeName(options.serviceId ?? "service")}-${safeName(name)}-${token}`; + const logFile = `${prefix}.log`; + const doneFile = `${prefix}.done`; + const wrapped = [ + `bash -lc ${shellQuote(script)}`, + "code=$?", + `printf '%s\\n' "$code" > ${shellQuote(doneFile)}`, + "exit \"$code\"", + ].join("; "); + const launchScript = [ + `rm -f ${shellQuote(logFile)} ${shellQuote(doneFile)}`, + `nohup bash -lc ${shellQuote(wrapped)} > ${shellQuote(logFile)} 2>&1 < /dev/null & echo $!`, + ].join("\n"); + const launch = runRemoteScript(options, launchScript, 30_000); + const pid = launch.stdout.trim().split(/\n/u).pop()?.trim() ?? ""; + if (launch.exitCode !== 0 || launch.timedOut || !/^\d+$/u.test(pid)) { + return combineCommandResults(["artifact-registry", "remote-background", name], [launch]); + } + + const startedAt = Date.now(); + let latest: CommandResult | null = null; + while (Date.now() - startedAt < timeoutMs) { + await Bun.sleep(2_000); + const poll = runRemoteScript(options, [ + `if [ -f ${shellQuote(doneFile)} ]; then printf 'SENTINEL:%s\\n' "$(cat ${shellQuote(doneFile)} 2>/dev/null || true)"; else echo RUNNING; fi`, + `tail -n 160 ${shellQuote(logFile)} 2>/dev/null | tr -d '\\000' | LC_ALL=C sed 's/[^[:print:]\t]//g' || true`, + ].join("; "), 30_000); + latest = poll; + if (poll.exitCode !== 0 || poll.timedOut) continue; + const stdout = poll.stdout.trimEnd(); + const [head = "", ...rest] = stdout.split("\n"); + if (!head.startsWith("SENTINEL:")) continue; + const code = Number(head.slice("SENTINEL:".length).trim()); + return { + command: ["artifact-registry", "remote-background", name], + cwd: repoRoot, + exitCode: Number.isInteger(code) ? code : 1, + signal: null, + timedOut: false, + stdout: [ + `remote_background_name=${name}`, + `remote_background_pid=${pid}`, + `remote_background_log=${logFile}`, + ...rest, + ].join("\n"), + stderr: poll.stderr, + }; + } + + return { + command: ["artifact-registry", "remote-background", name], + cwd: repoRoot, + exitCode: 124, + signal: null, + timedOut: true, + stdout: [ + `remote_background_name=${name}`, + `remote_background_pid=${pid}`, + `remote_background_log=${logFile}`, + latest?.stdout ?? "", + ].join("\n"), + stderr: latest?.stderr ?? `remote background script ${name} did not finish within ${timeoutMs}ms`, + }; +} + function readonlyCommandFailureResult( options: ArtifactRegistryOptions, command: CommandResult, @@ -2268,32 +2365,44 @@ function upsertEnvFileValues(path: string, values: Record): void writeFileSync(path, `${lines.join("\n")}\n`, "utf8"); } -function pullArtifactFromD601(options: ArtifactRegistryOptions, sourceImage: string): CommandResult { +async function pullArtifactFromD601(options: ArtifactRegistryOptions, sourceImage: string): Promise { + const localArchive = `/tmp/unidesk-artifact-${safeName(options.serviceId ?? "service")}-${safeName(options.commit ?? "commit")}-${Date.now().toString(36)}.tar.gz`; + const remoteArchive = `/tmp/unidesk-artifact-${safeName(options.serviceId ?? "service")}-${safeName(options.commit ?? "commit")}-${Date.now().toString(36)}.tar.gz`; const remoteScript = [ "set -euo pipefail", `image=${shellQuote(sourceImage)}`, + `archive=${shellQuote(remoteArchive)}`, + "rm -f \"$archive\"", "export DOCKER_CONFIG=$(mktemp -d /tmp/unidesk-artifact-docker-config.XXXXXX)", "trap 'rm -rf \"$DOCKER_CONFIG\"' EXIT", "printf '{}\\n' > \"$DOCKER_CONFIG/config.json\"", "docker pull -q \"$image\" >/dev/null", "docker image inspect \"$image\" --format 'remote_source={{ index .Config.Labels \"unidesk.ai/source-commit\" }} remote_service={{ index .Config.Labels \"unidesk.ai/service-id\" }} remote_dockerfile={{ index .Config.Labels \"unidesk.ai/dockerfile\" }}' >&2", - "docker save \"$image\" | gzip -1", + "docker save \"$image\" | gzip -1 > \"$archive\"", + "remote_archive_bytes=$(wc -c < \"$archive\" | tr -d '[:space:]')", + "remote_archive_sha256=$(sha256sum \"$archive\" | awk '{print $1}')", + "printf 'remote_archive=%s\\nremote_archive_bytes=%s\\nremote_archive_sha256=%s\\n' \"$archive\" \"$remote_archive_bytes\" \"$remote_archive_sha256\"", ].join("\n"); - const sshCommand = [ - process.execPath, - rootPath("scripts", "cli.ts"), - "ssh", - options.providerId, - "argv", - "bash", - "-lc", - remoteScript, - ].map(shellQuote).join(" "); - const pipeline = [ - "set -euo pipefail", - `${sshCommand} | gzip -dc | docker load`, - ].join("\n"); - return runCommand(["bash", "-lc", pipeline], repoRoot, { timeoutMs: Math.max(options.timeoutMs, 900_000) }); + const archive = await runRemoteScriptBackground(options, remoteScript, Math.max(options.timeoutMs, 900_000), "docker-save"); + if (archive.exitCode !== 0 || archive.timedOut) return archive; + const download = downloadRemoteFile(options, remoteArchive, localArchive, Math.max(options.timeoutMs, 900_000)); + if (download.exitCode !== 0 || download.timedOut) { + runRemoteScript(options, `rm -f ${shellQuote(remoteArchive)}`, 30_000); + try { + rmSync(localArchive, { force: true }); + } catch { + // Best-effort cleanup only. + } + return combineCommandResults(["artifact-registry", "pull-artifact-from-d601"], [archive, download]); + } + const load = runCommand(["bash", "-lc", `set -euo pipefail; gzip -dc ${shellQuote(localArchive)} | docker load`], repoRoot, { timeoutMs: Math.max(options.timeoutMs, 900_000) }); + runRemoteScript(options, `rm -f ${shellQuote(remoteArchive)}`, 30_000); + try { + rmSync(localArchive, { force: true }); + } catch { + // Best-effort cleanup only. + } + return combineCommandResults(["artifact-registry", "pull-artifact-from-d601"], [archive, download, load]); } function registryArtifactProbeScript(options: ArtifactRegistryOptions, spec: ArtifactConsumerSpec, commit: string): string { @@ -2490,7 +2599,7 @@ async function deployComposeArtifactNow(options: ArtifactRegistryOptions, spec: registryProbe: commandTail(registryProbe), }; } - const pull = pullArtifactFromD601(options, sourceImage); + const pull = await pullArtifactFromD601(options, sourceImage); if (pull.exitCode !== 0 || pull.timedOut) { return { ok: false, @@ -3269,7 +3378,7 @@ async function deployD601K3sArtifactNow(options: ArtifactRegistryOptions, spec: }; } const deployScript = d601K3sArtifactDeployScript(options, spec, target, commit); - const deploy = runRemoteScript(options, deployScript, Math.max(options.timeoutMs, 420_000)); + const deploy = await runRemoteScriptBackground(options, deployScript, Math.max(options.timeoutMs, 420_000), "d601-k3s-deploy"); if (deploy.exitCode !== 0 || deploy.timedOut) { return { ok: false,