fix: align apply-patch v2 with codex semantics

This commit is contained in:
Codex
2026-05-29 18:30:43 +00:00
parent 0817540a01
commit 4c8926fd7f
4 changed files with 170 additions and 48 deletions
+1 -1
View File
@@ -159,7 +159,7 @@ ssh-like 远端命令如果出现 `kex_exchange_identification`、`Connection cl
`ssh <providerId>` 只在当前 operation 需要 helper 时才注入 `/tmp/unidesk-ssh-tools`,普通 `argv``script``kubectl``logs` 和默认 `apply-patch` 等路径不得传输无关工具源码。`apply-patch-v1` 只注入 `apply_patch``glob` 只注入 `glob``skills`/`skill discover` 只注入 `skill-discover``apply_patch` 接受标准 `*** Begin Patch` / `*** End Patch` patch 格式,便于通过 SSH 透传编辑远端仓库文件;远端存在 `perl` 时必须走快速精确匹配路径,避免大文件 hunk 被 sh 模式匹配拖成几十秒,缺少 `perl` 时才退回 sh-only 实现。`glob``skill-discover` 需要远端 `python3`。注入工具只写 `/tmp/unidesk-ssh-tools`,不修改目标仓库。
远端文本 patch 默认使用 `apply-patch` 的 v2 引擎:它不把 hunk 解析交给远端 shell/perl helper,而是在本地按行序列匹配,支持长中文/Unicode 行、纯新增 hunk、低上下文插入和 `@@` 上下文定位,再把完整新内容写回远端。`apply_patch` 旧 helper 默认拒绝低上下文 update hunk:空搜索/纯插入无锚点、只在插入点前有上下文而没有插入点后上下文、或同一 hunk search 在目标文件中匹配多个位置时,都会结构化失败并提示补充上下文。成功应用时每个 hunk 会在 stderr 输出 `apply_patch: hunk N matched path:line`,用于复核实际落点;只有人工确认确实需要旧 helper 行为或 `--allow-loose` 时,才显式调用 `apply-patch-v1 --allow-loose`
远端文本 patch 默认使用 `apply-patch` 的 v2 引擎:它不把 hunk 解析交给远端 shell/perl helper,而是在本地按行序列匹配,支持长中文/Unicode 行、纯新增 hunk、低上下文插入和 `@@` 上下文定位,再把完整新内容写回远端。v2 的文件操作提交顺序按 Codex 标准 `apply_patch` 语义执行:空 patch 会失败;删除不存在的文件会失败;`Add File` 可覆盖已有文件;`Move to` 可覆盖目标文件;当大 patch 后续 hunk 不匹配时,已成功提交的前序文件操作会保留,并在错误详情中记录 `partialChanges`,调用方应基于当前文件内容继续补一个更小的 patch,而不是期待全量事务回滚。`apply_patch` 旧 helper 默认拒绝低上下文 update hunk:空搜索/纯插入无锚点、只在插入点前有上下文而没有插入点后上下文、或同一 hunk search 在目标文件中匹配多个位置时,都会结构化失败并提示补充上下文。成功应用时每个 hunk 会在 stderr 输出 `apply_patch: hunk N matched path:line`,用于复核实际落点;只有人工确认确实需要旧 helper 行为或 `--allow-loose` 时,才显式调用 `apply-patch-v1 --allow-loose`
如果只是远端打文本补丁,不需要再手写 `ssh D601 'apply_patch' < patch.diff` 这种命令拼接;正式默认入口是 `bun scripts/cli.ts ssh D601:/absolute/workspace apply-patch < patch.diff``bun scripts/cli.ts ssh D601:k3s:<namespace>:<workload>/<workspace> apply-patch < patch.diff``bun scripts/cli.ts ssh D601:win/c/test apply-patch < patch.diff`。旧 helper 只有 `apply-patch-v1` 一个入口,附加参数会原样透传给远端 `apply_patch`,例如 `bun scripts/cli.ts ssh D601 apply-patch-v1 --help``bun scripts/cli.ts ssh D601 apply-patch-v1 --allow-loose < reviewed.patch`。标准单命令用法如下,不需要先创建本地 patch 临时文件:
+62 -43
View File
@@ -1,4 +1,3 @@
import path from "node:path";
import { createHash, randomBytes } from "node:crypto";
import type { Readable, Writable } from "node:stream";
@@ -66,12 +65,12 @@ type PlannedOperation = { kind: "write"; path: string; content: string } | { kin
type Replacement = [start: number, oldLength: number, newLines: string[]];
interface ApplyPatchV2Plan {
operations: PlannedOperation[];
changed: string[];
}
const beginMarker = "*** Begin Patch";
const endMarker = "*** End Patch";
const environmentIdMarker = "*** Environment ID: ";
const addFileMarker = "*** Add File: ";
const deleteFileMarker = "*** Delete File: ";
const updateFileMarker = "*** Update File: ";
@@ -113,6 +112,12 @@ export function parseApplyPatchV2(patchText: string): PatchParseResult {
const hunks: PatchHunk[] = [];
let index = 1;
const environmentLine = lines[index]?.trimStart() ?? "";
if (environmentLine.startsWith(environmentIdMarker)) {
const environmentId = environmentLine.slice(environmentIdMarker.length).trim();
if (environmentId.length === 0) throw new ApplyPatchV2Error("apply_patch environment_id cannot be empty", { line: index + 1 });
index += 1;
}
while (index < lines.length - 1) {
const line = lines[index]?.trim() ?? "";
if (line.length === 0) {
@@ -205,18 +210,16 @@ export async function runApplyPatchV2(options: ApplyPatchV2Options): Promise<num
return 2;
}
const parsed = parseApplyPatchV2(patchText);
const plan = await planApplyPatchV2(options.executor, parsed.hunks);
for (const operation of plan.operations) {
await executePlannedOperation(options.executor, operation);
}
const plan = await applyPatchV2Hunks(options.executor, parsed.hunks);
options.stdout.write("Success. Updated the following files:\n");
for (const item of plan.changed) options.stdout.write(`${item}\n`);
return 0;
}
async function planApplyPatchV2(executor: ApplyPatchV2Executor, hunks: PatchHunk[]): Promise<ApplyPatchV2Plan> {
async function applyPatchV2Hunks(executor: ApplyPatchV2Executor, hunks: PatchHunk[]): Promise<ApplyPatchV2Plan> {
if (hunks.length === 0) throw new ApplyPatchV2Error("No files were modified.");
const states = new Map<string, PlannedFileState>();
const operations: PlannedOperation[] = [];
const changed: string[] = [];
async function readPlannedText(filePath: string): Promise<string> {
@@ -230,40 +233,62 @@ async function planApplyPatchV2(executor: ApplyPatchV2Executor, hunks: PatchHunk
return content;
}
function planWrite(filePath: string, content: string): void {
async function applyWrite(filePath: string, content: string): Promise<void> {
await executePlannedOperation(executor, { kind: "write", path: filePath, content });
states.set(filePath, { exists: true, content });
operations.push({ kind: "write", path: filePath, content });
}
function planDelete(filePath: string): void {
async function applyDelete(filePath: string): Promise<void> {
const state = states.get(filePath);
if (state === undefined) {
await ensureRemoteFileExists(executor, filePath);
} else if (!state.exists) {
throw new ApplyPatchV2Error("cannot delete a file deleted earlier in this patch", { path: filePath });
}
await executePlannedOperation(executor, { kind: "delete", path: filePath });
states.set(filePath, { exists: false, content: "" });
operations.push({ kind: "delete", path: filePath });
}
for (const hunk of hunks) {
if (hunk.kind === "add") {
planWrite(hunk.path, hunk.content);
changed.push(hunk.path);
continue;
try {
for (const hunk of hunks) {
if (hunk.kind === "add") {
await applyWrite(hunk.path, hunk.content);
changed.push(`A ${hunk.path}`);
continue;
}
if (hunk.kind === "delete") {
await applyDelete(hunk.path);
changed.push(`D ${hunk.path}`);
continue;
}
const originalContent = await readPlannedText(hunk.path);
const update = deriveUpdatedContent(hunk.path, originalContent, hunk.chunks);
if (hunk.movePath !== null && hunk.movePath !== hunk.path) {
await applyWrite(hunk.movePath, update.newContent);
changed.push(`M ${hunk.movePath}`);
await applyDelete(hunk.path);
continue;
}
await applyWrite(hunk.path, update.newContent);
changed.push(`M ${hunk.path}`);
}
if (hunk.kind === "delete") {
planDelete(hunk.path);
changed.push(hunk.path);
continue;
}
const originalContent = await readPlannedText(hunk.path);
const update = deriveUpdatedContent(hunk.path, originalContent, hunk.chunks);
if (hunk.movePath !== null && hunk.movePath !== hunk.path) {
planWrite(hunk.movePath, update.newContent);
planDelete(hunk.path);
changed.push(`${hunk.path} -> ${hunk.movePath}`);
continue;
}
planWrite(hunk.path, update.newContent);
changed.push(hunk.path);
} catch (error) {
if (changed.length === 0) throw error;
throw new ApplyPatchV2Error(error instanceof Error ? error.message : String(error), {
partialChanges: changed,
cause: error instanceof ApplyPatchV2Error ? error.details : undefined,
});
}
return { operations, changed };
return { changed };
}
async function ensureRemoteFileExists(executor: ApplyPatchV2Executor, target: string): Promise<void> {
if (executor.fs) {
await executor.fs.stat(target);
return;
}
await checkedRemoteV2(executor, "stat", [target]);
}
async function executePlannedOperation(executor: ApplyPatchV2Executor, operation: PlannedOperation): Promise<void> {
@@ -448,8 +473,7 @@ type RemoteV2Operation =
| "write-b64-begin"
| "write-b64-append"
| "write-b64-commit"
| "delete"
| "move";
| "delete";
async function checkedRemoteV2(executor: ApplyPatchV2Executor, operation: RemoteV2Operation, args: string[], input?: string): Promise<{ stdout: string }> {
if (!executor.run) {
@@ -585,12 +609,9 @@ function remoteV2Script(operation: RemoteV2Operation, args: string[]): string[]
" if [ \"$actual_sha256\" != \"$expected_sha256\" ]; then printf 'v2 final sha256 mismatch for %s\\n' \"$target\" >&2; exit 25; fi",
" ;;",
" delete)",
" rm -f -- \"$1\"",
" ;;",
" move)",
" target=$2",
" case \"$target\" in */*) parent=${target%/*}; mkdir -p -- \"$parent\";; esac",
" mv -f -- \"$1\" \"$2\"",
" target=$1",
" if [ ! -e \"$target\" ]; then printf 'file not found: %s\\n' \"$target\" >&2; exit 1; fi",
" rm -- \"$target\"",
" ;;",
" *)",
" printf 'unsupported op: %s\\n' \"$op\" >&2",
@@ -637,8 +658,6 @@ function stripLenientHeredoc(text: string): string {
function validatePatchPath(value: string, line: number): string {
const filePath = value.trim();
if (filePath.length === 0) throw new ApplyPatchV2Error("patch path cannot be empty", { line });
if (path.isAbsolute(filePath)) throw new ApplyPatchV2Error("patch paths must be relative", { line, path: filePath });
if (filePath.split(/[\\/]+/u).includes("..")) throw new ApplyPatchV2Error("patch paths cannot contain ..", { line, path: filePath });
return filePath;
}
+1
View File
@@ -436,6 +436,7 @@ export class GhVirtualFileExecutor implements ApplyPatchV2Executor {
}
if (operation === "delete") {
const path = this.canonical(args[0] ?? "");
if (!this.files.has(path)) return { exitCode: 1, stdout: "", stderr: `virtual file not found: ${path}\n` };
this.files.delete(path);
return { exitCode: 0, stdout: "", stderr: "" };
}
+106 -4
View File
@@ -1,7 +1,7 @@
import { PassThrough, Writable } from "node:stream";
import { spawnSync } from "node:child_process";
import { createHash } from "node:crypto";
import { chmodSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
import { chmodSync, existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
import os from "node:os";
import path from "node:path";
import { sshHelp } from "./src/help";
@@ -251,7 +251,8 @@ async function applyPatchV2ActualShellFixtureAttempt(
}
const outputFiles: Record<string, string> = {};
for (const relativePath of Object.keys(files)) {
outputFiles[relativePath] = readFileSync(path.join(root, relativePath), "utf8");
const target = path.join(root, relativePath);
outputFiles[relativePath] = existsSync(target) ? readFileSync(target, "utf8") : "";
}
return { stdout, files: outputFiles, commands, error };
} finally {
@@ -714,9 +715,33 @@ export async function runSshArgvGuidanceContract(): Promise<JsonRecord> {
"second.txt": "old second\n",
});
assertCondition(failedCompoundV2.error !== null, "v2 compound patch should fail when a later hunk does not match", failedCompoundV2);
assertCondition(failedCompoundV2.files["first.txt"] === "old first\n", "v2 must not partially write earlier files when a later hunk fails", failedCompoundV2);
assertCondition(failedCompoundV2.files["first.txt"] === "new first\n", "v2 should match Codex apply_patch by preserving earlier committed changes when a later hunk fails", failedCompoundV2);
assertCondition(failedCompoundV2.files["second.txt"] === "old second\n", "v2 must leave later failed files unchanged", failedCompoundV2);
assertCondition(!failedCompoundV2.commands.some((command) => command.startsWith("write-b64") || command.startsWith("delete")), "v2 must finish all local planning before any remote mutation", failedCompoundV2.commands);
assertCondition(failedCompoundV2.commands.some((command) => command.startsWith("write-b64")), "v2 should commit preceding operations in patch order like Codex apply_patch", failedCompoundV2.commands);
assertCondition(
Array.isArray((failedCompoundV2.error as { details?: { partialChanges?: unknown } })?.details?.partialChanges)
&& ((failedCompoundV2.error as { details?: { partialChanges?: string[] } }).details?.partialChanges ?? []).includes("M first.txt"),
"v2 failure should expose partialChanges for already committed operations",
failedCompoundV2.error,
);
const addBeforeFailedUpdateV2 = await applyPatchV2FixtureAttempt([
"*** Begin Patch",
"*** Add File: hwpod",
"+#!/bin/sh",
"+exec node /app/skills/device-pod-cli/scripts/device-pod-cli.mjs \"$@\"",
"*** Update File: scripts/artifact-publish.mjs",
"@@",
"-missing artifact line",
"+patched artifact line",
"*** End Patch",
"",
].join("\n"), {
"scripts/artifact-publish.mjs": "actual artifact line\n",
});
assertCondition(addBeforeFailedUpdateV2.error !== null, "v2 should still fail when a later file hunk misses", addBeforeFailedUpdateV2);
assertCondition(addBeforeFailedUpdateV2.files.hwpod?.includes("device-pod-cli.mjs"), "v2 should preserve an earlier Add File from a large patch when a later hunk misses", addBeforeFailedUpdateV2);
assertCondition(addBeforeFailedUpdateV2.files["scripts/artifact-publish.mjs"] === "actual artifact line\n", "v2 must leave the failed later file unchanged", addBeforeFailedUpdateV2);
const sequentialCompoundV2 = await applyPatchV2Fixture([
"*** Begin Patch",
@@ -735,6 +760,83 @@ export async function runSshArgvGuidanceContract(): Promise<JsonRecord> {
});
assertCondition(sequentialCompoundV2.files["sequence.txt"] === "gamma\n", "v2 should plan later hunks against earlier planned edits before remote writes", sequentialCompoundV2);
const emptyPatchV2 = await applyPatchV2FixtureAttempt([
"*** Begin Patch",
"*** End Patch",
"",
].join("\n"), {
"empty.txt": "kept\n",
});
assertCondition(emptyPatchV2.error !== null, "v2 should reject empty patches like Codex apply_patch", emptyPatchV2);
assertCondition(!emptyPatchV2.commands.some((command) => command.startsWith("write-b64") || command.startsWith("delete")), "empty v2 patches must not touch remote files", emptyPatchV2.commands);
const environmentPreambleV2 = await applyPatchV2Fixture([
"*** Begin Patch",
"*** Environment ID: remote",
"*** Update File: env.txt",
"@@",
"-before",
"+after",
"*** End Patch",
"",
].join("\n"), {
"env.txt": "before\n",
});
assertCondition(environmentPreambleV2.files["env.txt"] === "after\n", "v2 should accept Codex apply_patch environment_id preamble", environmentPreambleV2);
const absolutePathV2 = await applyPatchV2Fixture([
"*** Begin Patch",
"*** Update File: /tmp/absolute.txt",
"@@",
"-old",
"+new",
"*** End Patch",
"",
].join("\n"), {
"/tmp/absolute.txt": "old\n",
});
assertCondition(absolutePathV2.files["/tmp/absolute.txt"] === "new\n", "v2 should accept absolute paths like Codex apply_patch when the executor route supports them", absolutePathV2);
const parentSegmentPathV2 = await applyPatchV2Fixture([
"*** Begin Patch",
"*** Update File: ../outside.txt",
"@@",
"-old",
"+new",
"*** End Patch",
"",
].join("\n"), {
"../outside.txt": "old\n",
});
assertCondition(parentSegmentPathV2.files["../outside.txt"] === "new\n", "v2 should leave path containment policy to the executor like Codex apply_patch", parentSegmentPathV2);
const missingDeleteV2 = await applyPatchV2FixtureAttempt([
"*** Begin Patch",
"*** Delete File: missing.txt",
"*** End Patch",
"",
].join("\n"), {
"keep.txt": "kept\n",
});
assertCondition(missingDeleteV2.error !== null, "v2 should reject deleting a missing file like Codex apply_patch", missingDeleteV2);
assertCondition(missingDeleteV2.files["keep.txt"] === "kept\n", "missing delete must leave unrelated files unchanged", missingDeleteV2);
const moveOverwriteV2 = await applyPatchV2Fixture([
"*** Begin Patch",
"*** Update File: old-name.txt",
"*** Move to: new-name.txt",
"@@",
"-old content",
"+new content",
"*** End Patch",
"",
].join("\n"), {
"old-name.txt": "old content\n",
"new-name.txt": "existing content\n",
});
assertCondition(!Object.prototype.hasOwnProperty.call(moveOverwriteV2.files, "old-name.txt"), "v2 rename should remove the source file", moveOverwriteV2);
assertCondition(moveOverwriteV2.files["new-name.txt"] === "new content\n", "v2 rename should overwrite the destination like Codex apply_patch", moveOverwriteV2);
const safePatch = applyPatchFixture([], [
"*** Begin Patch",
"*** Update File: sample.txt",