From 263b0cf3b2d1081d555b674619c9ba8dd89bd711 Mon Sep 17 00:00:00 2001 From: Codex Date: Tue, 2 Jun 2026 09:13:11 +0000 Subject: [PATCH] fix: stabilize ssh artifact downloads --- ...fact-registry-ssh-timeout-contract-test.ts | 8 ++- scripts/src/artifact-registry.ts | 2 - scripts/src/ssh-file-transfer.ts | 61 ++++++++++++++++++- scripts/ssh-argv-guidance-contract-test.ts | 14 ++++- 4 files changed, 78 insertions(+), 7 deletions(-) diff --git a/scripts/artifact-registry-ssh-timeout-contract-test.ts b/scripts/artifact-registry-ssh-timeout-contract-test.ts index b29ac5ef..ac12e5fe 100644 --- a/scripts/artifact-registry-ssh-timeout-contract-test.ts +++ b/scripts/artifact-registry-ssh-timeout-contract-test.ts @@ -2,6 +2,10 @@ import { readFileSync } from "node:fs"; import { rootPath } from "./src/config"; const source = readFileSync(rootPath("scripts/src/artifact-registry.ts"), "utf8"); +const downloadRemoteFileSource = source.slice( + source.indexOf("function downloadRemoteFile("), + source.indexOf("async function runRemoteScriptBackground("), +); function assertCondition(condition: unknown, message: string): void { if (!condition) throw new Error(message); @@ -12,7 +16,7 @@ assertCondition(source.includes("downloadRemoteFile(options, remoteArchive, loca assertCondition(source.includes("runRemoteScriptBackground(options, remoteScript"), "remote docker save must run as a background job"); assertCondition(source.includes('runRemoteScriptBackground(options, deployScript, Math.max(options.timeoutMs, 420_000), "d601-k3s-deploy")'), "D601 k3s deploy must use background polling"); assertCondition(source.includes('"ssh",\n options.providerId,\n "download"'), "download helper must route through UniDesk ssh download"); -assertCondition(source.includes('"--chunk-bytes",\n "96000"'), "artifact ssh download must use the largest bounded chunk size"); +assertCondition(!downloadRemoteFileSource.includes('"--chunk-bytes"'), "artifact ssh download must use the stable default bounded chunk size, not the largest chunk"); assertCondition(source.includes("UNIDESK_SSH_CLIENT_TOKEN") && source.includes("UNIDESK_SSH_CLIENT_ROUTE_ALLOWLIST"), "dev frontend artifact deploy must sync scoped ssh runtime keys"); console.log(JSON.stringify({ @@ -22,7 +26,7 @@ console.log(JSON.stringify({ "no docker-save stdout stream over ssh", "compose artifact uses verified ssh download", "remote docker save and k3s deploy use background polling", - "artifact downloads use the largest bounded ssh chunk size", + "artifact downloads use the stable default bounded ssh chunk size", "dev frontend artifact deploy syncs scoped ssh runtime keys" ] }, null, 2)); diff --git a/scripts/src/artifact-registry.ts b/scripts/src/artifact-registry.ts index e8a701db..576b0123 100644 --- a/scripts/src/artifact-registry.ts +++ b/scripts/src/artifact-registry.ts @@ -1677,8 +1677,6 @@ function downloadRemoteFile(options: ArtifactRegistryOptions, remotePath: string "ssh", options.providerId, "download", - "--chunk-bytes", - "96000", remotePath, localPath, ], repoRoot, { timeoutMs }); diff --git a/scripts/src/ssh-file-transfer.ts b/scripts/src/ssh-file-transfer.ts index d9c73b3c..1c45e082 100644 --- a/scripts/src/ssh-file-transfer.ts +++ b/scripts/src/ssh-file-transfer.ts @@ -53,7 +53,9 @@ class SshFileTransferError extends Error { const fileTransferReadBlockBytes = 45_000; const fileTransferWriteB64ArgvLimit = 48_000; const fileTransferWriteB64ChunkChars = 12_000; -const fileTransferReadBlockMaxAttempts = 3; +const fileTransferReadBlockMaxAttempts = 12; +const fileTransferReadEmptyRetryDelayMs = 250; +const fileTransferProgressEveryChunks = 64; export function isSshFileTransferOperation(args: string[]): boolean { const subcommand = args[0] ?? ""; @@ -210,6 +212,7 @@ async function readRemoteFileVerified( chunks.push(chunk); actualBytes += chunk.length; chunkCount += 1; + emitDownloadProgress(invocation, remotePath, blockIndex, chunkCount, actualBytes, remote.bytes, chunk.length); } const content = Buffer.concat(chunks); const actual = { bytes: content.length, sha256: sha256HexBuffer(content) }; @@ -234,11 +237,19 @@ async function readRemoteBase64BlockWithRetry( const encoded = read.stdout.replace(/\s+/gu, ""); if (encoded.length > 0) return encoded; attemptErrors.push({ attempt, exitCode: read.exitCode, stdoutBytes: read.stdout.length, stderrTail: read.stderr.slice(-500) }); + if (attempt < fileTransferReadBlockMaxAttempts) { + emitEmptyReadRetryProgress(invocation, remotePath, blockIndex, attempt, expectedBytes, actualBytes); + await delayMs(fileTransferReadEmptyRetryDelayMs); + } } catch (error) { attemptErrors.push({ attempt, error: error instanceof Error ? error.message : String(error), }); + if (attempt < fileTransferReadBlockMaxAttempts) { + emitEmptyReadRetryProgress(invocation, remotePath, blockIndex, attempt, expectedBytes, actualBytes); + await delayMs(fileTransferReadEmptyRetryDelayMs); + } } } throw new SshFileTransferError("remote download returned an empty block before EOF after retries", { @@ -252,6 +263,54 @@ async function readRemoteBase64BlockWithRetry( }); } +function emitDownloadProgress( + invocation: ParsedSshInvocation, + remotePath: string, + blockIndex: number, + chunkCount: number, + actualBytes: number, + expectedBytes: number, + lastChunkBytes: number, +): void { + if (chunkCount !== 1 && actualBytes < expectedBytes && chunkCount % fileTransferProgressEveryChunks !== 0) return; + process.stderr.write(`${JSON.stringify({ + event: "unidesk.ssh.download.progress", + route: invocation.route.raw, + providerId: invocation.providerId, + remotePath, + blockIndex, + chunks: chunkCount, + actualBytes, + expectedBytes, + lastChunkBytes, + })}\n`); +} + +function emitEmptyReadRetryProgress( + invocation: ParsedSshInvocation, + remotePath: string, + blockIndex: number, + attempt: number, + expectedBytes: number, + actualBytes: number, +): void { + process.stderr.write(`${JSON.stringify({ + event: "unidesk.ssh.download.empty-read-retry", + route: invocation.route.raw, + providerId: invocation.providerId, + remotePath, + blockIndex, + attempt, + maxAttempts: fileTransferReadBlockMaxAttempts, + actualBytes, + expectedBytes, + })}\n`); +} + +function delayMs(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + async function statRemoteFile( invocation: ParsedSshInvocation, executor: SshRemoteCommandExecutor, diff --git a/scripts/ssh-argv-guidance-contract-test.ts b/scripts/ssh-argv-guidance-contract-test.ts index 58343896..cc878f4f 100644 --- a/scripts/ssh-argv-guidance-contract-test.ts +++ b/scripts/ssh-argv-guidance-contract-test.ts @@ -54,20 +54,29 @@ function sshShellScriptPrelude(): string { return `${sshUserToolPathPrelude}\n${sshShellCompatibilityPrelude}`; } -async function captureStdout(fn: () => Promise): Promise<{ exitCode: number; stdout: string }> { +async function captureStdout(fn: () => Promise): Promise<{ exitCode: number; stdout: string; stderr: string }> { const originalWrite = process.stdout.write; + const originalStderrWrite = process.stderr.write; let stdout = ""; + let stderr = ""; process.stdout.write = ((chunk: unknown, ...args: unknown[]) => { stdout += Buffer.isBuffer(chunk) ? chunk.toString("utf8") : String(chunk); const callback = args.find((arg): arg is () => void => typeof arg === "function"); if (callback) callback(); return true; }) as typeof process.stdout.write; + process.stderr.write = ((chunk: unknown, ...args: unknown[]) => { + stderr += Buffer.isBuffer(chunk) ? chunk.toString("utf8") : String(chunk); + const callback = args.find((arg): arg is () => void => typeof arg === "function"); + if (callback) callback(); + return true; + }) as typeof process.stderr.write; try { const exitCode = await fn(); - return { exitCode, stdout }; + return { exitCode, stdout, stderr }; } finally { process.stdout.write = originalWrite; + process.stderr.write = originalStderrWrite; } } @@ -462,6 +471,7 @@ export async function runSshArgvGuidanceContract(): Promise { const retryReadBlocks = retryTransfer.commands.filter((item) => item.operation === "read-b64-block"); assertCondition(retryResult.exitCode === 0 && retryJson.sha256 === sha256BufferHex(retryPayload), "download should retry a transient empty block and keep sha256 verification", retryResult); assertCondition(retryReadBlocks.length === Number(retryJson.transfer && typeof retryJson.transfer === "object" ? (retryJson.transfer as JsonRecord).chunks : 0) + 1, "transient empty block should add exactly one repeated read without counting as a chunk", retryTransfer.commands); + assertCondition(retryResult.stderr.includes("unidesk.ssh.download.progress") && retryResult.stderr.includes("unidesk.ssh.download.empty-read-retry"), "download should emit bounded progress and retry events to stderr", retryResult.stderr); assertCondition(readFileSync(retryDownload).equals(retryPayload), "retry download must preserve complete content after transient empty block", { commands: retryTransfer.commands }); } finally { rmSync(transferRoot, { recursive: true, force: true });