diff --git a/AGENTS.md b/AGENTS.md index 1a8e967d..a4a2ac93 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -30,6 +30,7 @@ UniDesk 是一个以主 server 为统一入口的分布式工作平台;本文 - P0: 对 G14/D601/远端 worktree 做文本源码修改时,必须优先使用 UniDesk SSH workspace route 的 `apply-patch` 透传入口;不要优先用远端 Python/Perl/sed heredoc 或复杂 shell quoting 拼接大段文本补丁。 - P0: 只有在 `apply-patch` 本身不可用或需要处理非文本/批量机械生成文件时,才使用其他受控方式;使用前必须说明原因,并在修改后立即用 `git diff`、语法检查或文件尾部检查确认没有截断或污染。 - P0: `apply-patch` 一旦出现误删、尾部截断、匹配漂移或其他正确性问题,必须立即优先修复 UniDesk `apply-patch` 本身;算法必须按 Codex 开源 `apply_patch` 源码语义做 1:1 对齐,不能用局部护栏、兼容绕行、分支开关或改用其他 patch 入口掩盖基础链路缺陷。 +- P0: Codex 开源 `apply_patch` 参考源码已固定缓存到 `/tmp/codex-apply-patch/codex/codex-rs/apply-patch/`,core 侧相关文件和 commit 记录在 `/tmp/codex-apply-patch/`;排查或对齐 `apply-patch` 算法时必须先读该本地缓存,只有缓存缺失或明确需要更新时才重新联网拉取。 ## Critical Distributed Agile Validation Rule diff --git a/scripts/src/apply-patch-v2.ts b/scripts/src/apply-patch-v2.ts index 0cc1d9a5..8d989a25 100644 --- a/scripts/src/apply-patch-v2.ts +++ b/scripts/src/apply-patch-v2.ts @@ -1,5 +1,5 @@ import path from "node:path"; -import { createHash } from "node:crypto"; +import { createHash, randomBytes } from "node:crypto"; import type { Readable, Writable } from "node:stream"; export type PatchHunk = @@ -261,9 +261,53 @@ async function executePlannedOperation(executor: ApplyPatchV2Executor, operation await checkedRemoteV2(executor, "delete", [operation.path]); } +const readBlockBytes = 45_000; +const writeB64ArgvLimit = 48_000; +const writeB64ChunkChars = 12_000; + async function readRemoteText(executor: ApplyPatchV2Executor, target: string): Promise { - const read = await checkedRemoteV2(executor, "read", [target]); - return read.stdout; + const stat = await checkedRemoteV2(executor, "stat", [target]); + const [bytesText, expectedSha256] = stat.stdout.trim().split(/\s+/u); + const expectedBytes = Number(bytesText); + if (!Number.isSafeInteger(expectedBytes) || expectedBytes < 0 || !/^[0-9a-f]{64}$/u.test(expectedSha256 ?? "")) { + throw new ApplyPatchV2Error("remote apply-patch v2 stat returned invalid metadata", { path: target, stdout: stat.stdout.slice(0, 500) }); + } + + const chunks: Buffer[] = []; + let actualBytes = 0; + for (let blockIndex = 0; actualBytes < expectedBytes; blockIndex += 1) { + const read = await checkedRemoteV2(executor, "read-b64-block", [target, String(blockIndex), String(readBlockBytes)]); + const encoded = read.stdout.replace(/\s+/gu, ""); + const chunk = encoded.length === 0 ? Buffer.alloc(0) : Buffer.from(encoded, "base64"); + if (chunk.length === 0) { + throw new ApplyPatchV2Error("remote apply-patch v2 read returned an empty block before EOF", { + path: target, + blockIndex, + expectedBytes, + actualBytes, + }); + } + chunks.push(chunk); + actualBytes += chunk.length; + } + + const contentBuffer = Buffer.concat(chunks); + if (contentBuffer.length !== expectedBytes) { + throw new ApplyPatchV2Error("remote apply-patch v2 read byte count mismatch", { + path: target, + expectedBytes, + actualBytes: contentBuffer.length, + }); + } + const actualSha256 = sha256Hex(contentBuffer); + if (actualSha256 !== expectedSha256) { + throw new ApplyPatchV2Error("remote apply-patch v2 read sha256 mismatch", { + path: target, + expectedSha256, + actualSha256, + }); + } + return contentBuffer.toString("utf8"); } async function writeRemoteText(executor: ApplyPatchV2Executor, target: string, content: string): Promise { @@ -271,14 +315,38 @@ async function writeRemoteText(executor: ApplyPatchV2Executor, target: string, c const encoded = contentBuffer.toString("base64"); const expectedBytes = String(contentBuffer.length); const expectedSha256 = sha256Hex(contentBuffer); - if (encoded.length <= 48_000) { - await checkedRemoteV2(executor, "write-b64-argv", [target, expectedBytes, expectedSha256, ...chunkString(encoded, 12_000)]); + if (encoded.length <= writeB64ArgvLimit) { + await checkedRemoteV2(executor, "write-b64-argv", [target, expectedBytes, expectedSha256, ...chunkString(encoded, writeB64ChunkChars)]); return; } - await checkedRemoteV2(executor, "write-b64-stdin", [target, expectedBytes, expectedSha256], encoded); + try { + await checkedRemoteV2(executor, "write-b64-stdin", [target, expectedBytes, expectedSha256], encoded); + return; + } catch { + // Some SSH/websocket bridges cap stdin payloads without a stable public + // contract. The stdin path is still the fast path; fall back to small argv + // chunks only after the remote sha/byte guard proves no partial write moved. + } + const token = `${process.pid}-${Date.now()}-${randomBytes(4).toString("hex")}-${expectedSha256.slice(0, 12)}`; + await checkedRemoteV2(executor, "write-b64-begin", [target, token]); + for (const chunk of chunkString(encoded, writeB64ChunkChars)) { + await checkedRemoteV2(executor, "write-b64-append", [target, token, chunk]); + } + await checkedRemoteV2(executor, "write-b64-commit", [target, token, expectedBytes, expectedSha256]); } -async function checkedRemoteV2(executor: ApplyPatchV2Executor, operation: "read" | "write-b64-argv" | "write-b64-stdin" | "delete" | "move", args: string[], input?: string): Promise<{ stdout: string }> { +type RemoteV2Operation = + | "stat" + | "read-b64-block" + | "write-b64-argv" + | "write-b64-stdin" + | "write-b64-begin" + | "write-b64-append" + | "write-b64-commit" + | "delete" + | "move"; + +async function checkedRemoteV2(executor: ApplyPatchV2Executor, operation: RemoteV2Operation, args: string[], input?: string): Promise<{ stdout: string }> { const result = await executor.run(remoteV2Script(operation, args), input); if (result.exitCode === 0) return result; throw new ApplyPatchV2Error("remote apply-patch v2 operation failed", { @@ -290,7 +358,7 @@ async function checkedRemoteV2(executor: ApplyPatchV2Executor, operation: "read" }); } -function remoteV2Script(operation: "read" | "write-b64-argv" | "write-b64-stdin" | "delete" | "move", args: string[]): string[] { +function remoteV2Script(operation: RemoteV2Operation, args: string[]): string[] { const script = [ "set -eu", "sha256_file() {", @@ -318,11 +386,31 @@ function remoteV2Script(operation: "read" | "write-b64-argv" | "write-b64-stdin" " exit 24", " fi", "}", + "set_tmp_paths() {", + " target=$1", + " token=$2", + " case \"$token\" in ''|*[!a-zA-Z0-9_.-]*) printf 'invalid v2 temp token\\n' >&2; exit 2;; esac", + " base=${target##*/}", + " dir=.", + " case \"$target\" in */*) dir=${target%/*};; esac", + " tmp=\"$dir/.${base}.unidesk-v2-${token}.tmp\"", + " tmp_b64=\"$tmp.b64\"", + "}", "op=$1", "shift", "case \"$op\" in", - " read)", - " cat -- \"$1\"", + " stat)", + " target=$1", + " bytes=$(wc -c < \"$target\" | tr -d '[:space:]')", + " digest=$(sha256_file \"$target\")", + " printf '%s %s\\n' \"$bytes\" \"$digest\"", + " ;;", + " read-b64-block)", + " target=$1", + " block_index=$2", + " block_size=$3", + " case \"$block_index:$block_size\" in *[!0-9:]*|:*) printf 'invalid read block args\\n' >&2; exit 2;; esac", + " dd if=\"$target\" bs=\"$block_size\" skip=\"$block_index\" count=1 2>/dev/null | base64 | tr -d '\\n'", " ;;", " write-b64-argv)", " target=$1", @@ -352,7 +440,30 @@ function remoteV2Script(operation: "read" | "write-b64-argv" | "write-b64-stdin" " dir=.", " case \"$target\" in */*) dir=${target%/*};; esac", " tmp=\"$dir/.${base}.unidesk-v2-$$.tmp\"", - " base64 -d > \"$tmp\"", + " if ! base64 -d > \"$tmp\"; then rm -f -- \"$tmp\"; printf 'v2 base64 decode failed for %s\\n' \"$target\" >&2; exit 22; fi", + " verify_tmp \"$target\" \"$tmp\" \"$expected_bytes\" \"$expected_sha256\"", + " mv -f -- \"$tmp\" \"$target\"", + " actual_sha256=$(sha256_file \"$target\")", + " if [ \"$actual_sha256\" != \"$expected_sha256\" ]; then printf 'v2 final sha256 mismatch for %s\\n' \"$target\" >&2; exit 25; fi", + " ;;", + " write-b64-begin)", + " target=$1", + " case \"$target\" in */*) parent=${target%/*}; mkdir -p -- \"$parent\";; esac", + " set_tmp_paths \"$target\" \"$2\"", + " : > \"$tmp_b64\"", + " ;;", + " write-b64-append)", + " target=$1", + " set_tmp_paths \"$target\" \"$2\"", + " printf '%s' \"$3\" >> \"$tmp_b64\"", + " ;;", + " write-b64-commit)", + " target=$1", + " set_tmp_paths \"$target\" \"$2\"", + " expected_bytes=$3", + " expected_sha256=$4", + " if ! base64 -d < \"$tmp_b64\" > \"$tmp\"; then rm -f -- \"$tmp\" \"$tmp_b64\"; printf 'v2 base64 decode failed for %s\\n' \"$target\" >&2; exit 22; fi", + " rm -f -- \"$tmp_b64\"", " verify_tmp \"$target\" \"$tmp\" \"$expected_bytes\" \"$expected_sha256\"", " mv -f -- \"$tmp\" \"$target\"", " actual_sha256=$(sha256_file \"$target\")", diff --git a/scripts/ssh-argv-guidance-contract-test.ts b/scripts/ssh-argv-guidance-contract-test.ts index bd836ccf..61cb785c 100644 --- a/scripts/ssh-argv-guidance-contract-test.ts +++ b/scripts/ssh-argv-guidance-contract-test.ts @@ -80,6 +80,7 @@ function applyPatchFixture(args: string[], patch: string, files: Record): Promise<{ stdout: string; files: Record; commands: string[]; error: unknown | null }> { const state = new Map(Object.entries(files)); + const pendingWrites = new Map(); const commands: string[] = []; const stdin = new PassThrough(); stdin.end(patch); @@ -100,9 +101,21 @@ async function applyPatchV2FixtureAttempt(patch: string, files: Record, mutateInput?: (operation: string, input: string | undefined) => string | undefined, + mutateResult?: (operation: string, result: { exitCode: number; stdout: string; stderr: string }) => { exitCode: number; stdout: string; stderr: string }, ): Promise<{ stdout: string; files: Record; commands: string[]; error: unknown | null }> { const root = mkdtempSync(path.join(os.tmpdir(), "unidesk-apply-patch-v2-shell-")); const commands: string[] = []; @@ -179,11 +215,12 @@ async function applyPatchV2ActualShellFixtureAttempt( input: mutateInput ? mutateInput(operation, input) : input, encoding: "utf8", }); - return { + const result = { exitCode: run.status ?? 255, stdout: run.stdout, stderr: run.stderr, }; + return mutateResult ? mutateResult(operation, result) : result; }, }, }); @@ -445,6 +482,7 @@ export async function runSshArgvGuidanceContract(): Promise { }); assertCondition(longChineseReplace.files["novel.md"]?.includes("等待他重新命名"), "v2 should replace long Chinese lines without remote shell search blocks", longChineseReplace); + const largeOriginal = `${"0123456789abcdef\n".repeat(4096)}`; const largeV2 = await applyPatchV2Fixture([ "*** Begin Patch", "*** Update File: large.txt", @@ -453,9 +491,11 @@ export async function runSshArgvGuidanceContract(): Promise { "*** End Patch", "", ].join("\n"), { - "large.txt": `${"0123456789abcdef\n".repeat(4096)}`, + "large.txt": largeOriginal, }); assertCondition(largeV2.commands.some((command) => command.includes("write-b64-stdin")), "v2 should use stdin write path for large remote files to avoid E2BIG", largeV2.commands); + assertCondition(!largeV2.commands.some((command) => command.includes("write-b64-append")), "v2 should keep the single stdin write as the normal large-file fast path", largeV2.commands); + assertCondition(largeV2.commands.filter((command) => command.startsWith("read-b64-block")).length <= 2, "v2 large-file verified read should use coarse chunks, not many tiny SSH calls", largeV2.commands); const multiChunkTailV2 = await applyPatchV2ActualShellFixtureAttempt([ "*** Begin Patch", @@ -474,6 +514,40 @@ export async function runSshArgvGuidanceContract(): Promise { assertCondition(multiChunkTailV2.error === null, "v2 should apply explicit multi-chunk patches through the real shell writer", multiChunkTailV2); assertCondition(multiChunkTailV2.files["two_chunks.txt"] === "a\nB\nc\nD\ne\nf\n", "v2 must preserve untouched tail lines when applying multiple chunks", multiChunkTailV2); + const largeTailV2 = await applyPatchV2ActualShellFixtureAttempt([ + "*** Begin Patch", + "*** Update File: large-tail.txt", + "@@ LINE-2048 tail-preserve", + "-LINE-2049 keep middle", + "+LINE-2049 patched middle", + "*** End Patch", + "", + ].join("\n"), { + "large-tail.txt": Array.from({ length: 5000 }, (_, index) => `LINE-${String(index).padStart(4, "0")} ${index === 2049 ? "keep middle" : "tail-preserve"}`).join("\n") + "\n", + }); + assertCondition(largeTailV2.error === null, "v2 should patch a large file through the real shell writer", largeTailV2); + assertCondition(largeTailV2.files["large-tail.txt"]?.includes("LINE-2049 patched middle"), "v2 large-file patch should update the target line", largeTailV2); + assertCondition(largeTailV2.files["large-tail.txt"]?.endsWith("LINE-4999 tail-preserve\n"), "v2 must preserve the untouched tail of large files", largeTailV2); + assertCondition(largeTailV2.commands.some((command) => command.startsWith("write-b64-stdin")), "v2 large-file real shell path should use stdin fast path before any fallback", largeTailV2.commands); + assertCondition(!largeTailV2.commands.some((command) => command.startsWith("write-b64-append")), "v2 large-file real shell path should not use slower chunk fallback unless stdin integrity fails", largeTailV2.commands); + + const truncatedLargeReadV2 = await applyPatchV2ActualShellFixtureAttempt([ + "*** Begin Patch", + "*** Update File: large-read.txt", + "@@", + "+this write must not happen after a truncated read", + "*** End Patch", + "", + ].join("\n"), { + "large-read.txt": largeOriginal, + }, undefined, (operation, result) => { + if (operation !== "read-b64-block" || result.exitCode !== 0) return result; + return { ...result, stdout: result.stdout.slice(0, Math.max(0, result.stdout.length - 32)) }; + }); + assertCondition(truncatedLargeReadV2.error !== null, "v2 should reject a truncated remote read before planning writes", truncatedLargeReadV2); + assertCondition(truncatedLargeReadV2.files["large-read.txt"] === largeOriginal, "v2 must keep the original file when bridge stdout truncates a read block", truncatedLargeReadV2); + assertCondition(!truncatedLargeReadV2.commands.some((command) => command.startsWith("write-b64")), "v2 must not write after read integrity fails", truncatedLargeReadV2.commands); + const truncatedLargeWriteV2 = await applyPatchV2ActualShellFixtureAttempt([ "*** Begin Patch", "*** Update File: large.txt", @@ -482,18 +556,19 @@ export async function runSshArgvGuidanceContract(): Promise { "*** End Patch", "", ].join("\n"), { - "large.txt": `${"0123456789abcdef\n".repeat(4096)}`, + "large.txt": largeOriginal, }, (operation, input) => { if (operation !== "write-b64-stdin" || input === undefined) return input; return input.slice(0, Math.max(0, input.length - 32)); }); - assertCondition(truncatedLargeWriteV2.error !== null, "v2 should reject truncated stdin write payloads", truncatedLargeWriteV2); - assertCondition(truncatedLargeWriteV2.files["large.txt"] === `${"0123456789abcdef\n".repeat(4096)}`, "v2 must keep the original file when decoded payload integrity fails", truncatedLargeWriteV2); - assertCondition( - String((truncatedLargeWriteV2.error as Error | null)?.message ?? "").includes("remote apply-patch v2 operation failed"), - "v2 truncated payload failure should be visible to the caller", - truncatedLargeWriteV2, - ); + assertCondition(truncatedLargeWriteV2.error === null, "v2 should fall back to bounded argv chunks when the stdin write path is truncated", truncatedLargeWriteV2); + assertCondition(truncatedLargeWriteV2.files["large.txt"]?.includes("large insert that forces a rewritten full-file payload"), "v2 fallback write should still apply the patch", truncatedLargeWriteV2); + assertCondition(truncatedLargeWriteV2.files["large.txt"]?.startsWith(largeOriginal), "v2 fallback write must preserve the original large-file content before appending the inserted line", { + commands: truncatedLargeWriteV2.commands.map((command) => command.split(" ").slice(0, 2).join(" ")), + outputBytes: Buffer.byteLength(truncatedLargeWriteV2.files["large.txt"] ?? "", "utf8"), + }); + assertCondition(truncatedLargeWriteV2.commands.some((command) => command.startsWith("write-b64-stdin")), "v2 should attempt the stdin fast path first", truncatedLargeWriteV2.commands); + assertCondition(truncatedLargeWriteV2.commands.some((command) => command.startsWith("write-b64-commit")), "v2 should commit the chunked fallback after stdin integrity failure", truncatedLargeWriteV2.commands); const failedCompoundV2 = await applyPatchV2FixtureAttempt([ "*** Begin Patch",