fix: support minimax large multi-hunk patches

2026-06-03 10:12:16 +00:00
parent 1d542d82b5
commit d012fe9a5e
2 changed files with 116 additions and 12 deletions
@@ -8,6 +8,7 @@ export type PatchHunk =

 export interface UpdateChunk {
  changeContext: string | null;
+  sourceStartLine: number | null;
  oldLines: string[];
  newLines: string[];
  isEndOfFile: boolean;
@@ -119,7 +120,7 @@ export function applyPatchV2HelpPayload() {
    rules: [
      "Add File has no @@ hunk marker: put content immediately after `*** Add File: <path>` and prefix every content line with +.",
      "A blank line in Add File is a line containing only +.",
-      "Update File uses @@ or @@ context markers, followed by context lines starting with space and changed lines starting with - or +.",
+      "Update File uses @@ or @@ context markers, followed by context lines starting with space and changed lines starting with - or +; unified-diff line-range headers are accepted with hints for MiniMax compatibility.",
      "Prefer `trans <route> apply-patch < /tmp/patch.diff` for long patches, Windows paths, or quoting-sensitive content.",
      "MiniMax compatibility: stray @@ or unprefixed content inside Add File, and extra hunk/body lines after Delete File, are accepted with stderr hints."
    ],
@@ -145,7 +146,7 @@ export function applyPatchV2HelpPayload() {
    },
    commonPitfalls: [
      "Do not put @@ after `*** Add File:`; @@ is only for Update File.",
-      "Do not paste unified diff headers such as `@@ -1,3 +1,4 @@`.",
+      "Prefer canonical @@ or @@ context over unified diff headers such as `@@ -1,3 +1,4 @@`; v2 accepts those headers with a hint.",
      "Do not use remote Python/Perl/sed heredocs for text patches when `trans <route> apply-patch` is available."
    ],
    note: "apply-patch reads patch text from stdin and uses the v2 engine by default. Use `apply-patch-v1` only for the legacy helper."
@@ -238,7 +239,7 @@ export function parseApplyPatchV2(patchText: string): PatchParseResult {
          index += 1;
          continue;
        }
-        const parsed = parseUpdateChunk(lines, index, chunks.length === 0);
+        const parsed = parseUpdateChunk(lines, index, chunks.length === 0, filePath, hints);
        chunks.push(parsed.chunk);
        index = parsed.nextIndex;
      }
@@ -805,17 +806,25 @@ function isFileHeader(line: string): boolean {
  return trimmed.startsWith(addFileMarker) || trimmed.startsWith(deleteFileMarker) || trimmed.startsWith(updateFileMarker) || trimmed === endMarker;
 }

-function parseUpdateChunk(lines: string[], startIndex: number, allowMissingContext: boolean): { chunk: UpdateChunk; nextIndex: number } {
+function parseUpdateChunk(lines: string[], startIndex: number, allowMissingContext: boolean, filePath: string, hints: string[]): { chunk: UpdateChunk; nextIndex: number } {
  let index = startIndex;
  let changeContext: string | null = null;
+  let sourceStartLine: number | null = null;
  const first = lines[index] ?? "";
  if (first === emptyChangeContextMarker) {
    index += 1;
-  } else if (first.startsWith(changeContextMarker)) {
-    changeContext = first.slice(changeContextMarker.length);
-    index += 1;
-  } else if (!allowMissingContext) {
-    throw new ApplyPatchV2Error("expected update chunk to start with @@ context marker", { line: startIndex + 1, text: first });
+  } else {
+    const unifiedHeader = parseUnifiedDiffHunkHeader(first);
+    if (unifiedHeader !== null) {
+      sourceStartLine = unifiedHeader.oldStart;
+      hints.push(`apply-patch hint: accepted unified-diff hunk header in ${filePath} on line ${startIndex + 1}; canonical apply_patch uses @@ or @@ context without line ranges.`);
+      index += 1;
+    } else if (first.startsWith(changeContextMarker)) {
+      changeContext = first.slice(changeContextMarker.length);
+      index += 1;
+    } else if (!allowMissingContext) {
+      throw new ApplyPatchV2Error("expected update chunk to start with @@ context marker", { line: startIndex + 1, text: first });
+    }
  }

  const oldLines: string[] = [];
@@ -851,7 +860,15 @@ function parseUpdateChunk(lines: string[], startIndex: number, allowMissingConte
    index += 1;
  }
  if (parsed === 0) throw new ApplyPatchV2Error("update chunk does not contain any lines", { line: startIndex + 1 });
-  return { chunk: { changeContext, oldLines, newLines, isEndOfFile }, nextIndex: index };
+  return { chunk: { changeContext, sourceStartLine, oldLines, newLines, isEndOfFile }, nextIndex: index };
+}
+
+function parseUnifiedDiffHunkHeader(line: string): { oldStart: number } | null {
+  const match = /^@@\s+-(\d+)(?:,\d+)?\s+\+\d+(?:,\d+)?\s+@@(?:\s+.*)?$/u.exec(line);
+  if (match === null) return null;
+  const oldStart = Number(match[1] ?? "0");
+  if (!Number.isSafeInteger(oldStart) || oldStart < 0) return null;
+  return { oldStart };
 }

 function splitContentLines(content: string): string[] {
@@ -883,12 +900,13 @@ function computeReplacements(filePath: string, originalLines: string[], chunks:
    }

    let pattern = chunk.oldLines;
-    let found = seekSequence(originalLines, pattern, lineIndex, chunk.isEndOfFile);
+    const preferredStart = chunk.sourceStartLine === null ? lineIndex : Math.max(lineIndex, chunk.sourceStartLine - 1);
+    let found = seekSequenceWithFallback(originalLines, pattern, preferredStart, lineIndex, chunk.isEndOfFile);
    let newLines = chunk.newLines;
    if (found === null && pattern[pattern.length - 1] === "") {
      pattern = pattern.slice(0, -1);
      newLines = newLines[newLines.length - 1] === "" ? newLines.slice(0, -1) : newLines;
-      found = seekSequence(originalLines, pattern, lineIndex, chunk.isEndOfFile);
+      found = seekSequenceWithFallback(originalLines, pattern, preferredStart, lineIndex, chunk.isEndOfFile);
    }
    if (found === null) {
      throw new ApplyPatchV2Error("failed to find expected lines", {
@@ -904,6 +922,12 @@ function computeReplacements(filePath: string, originalLines: string[], chunks:
  return replacements;
 }

+function seekSequenceWithFallback(lines: string[], pattern: string[], preferredStart: number, fallbackStart: number, eof: boolean): number | null {
+  const preferred = seekSequence(lines, pattern, preferredStart, eof);
+  if (preferred !== null || preferredStart === fallbackStart) return preferred;
+  return seekSequence(lines, pattern, fallbackStart, eof);
+}
+
 function applyReplacements(lines: string[], replacements: Replacement[]): string[] {
  const result = [...lines];
  for (const [start, oldLen, newSegment] of [...replacements].reverse()) {
@@ -734,6 +734,86 @@ export async function runSshArgvGuidanceContract(): Promise<JsonRecord> {
  assertCondition(!largeV2.commands.some((command) => command.includes("write-b64-append")), "v2 should keep the single stdin write as the normal large-file fast path", largeV2.commands);
  assertCondition(largeV2.commands.filter((command) => command.startsWith("read-b64-block")).length <= 2, "v2 large-file verified read should use coarse chunks, not many tiny SSH calls", largeV2.commands);

+  const repeatedLargeLines = Array.from({ length: 1200 }, (_, index) => `repeat target ${String(index).padStart(4, "0")}`);
+  repeatedLargeLines[899] = "same marker";
+  repeatedLargeLines[1099] = "same marker";
+  const unifiedHeaderLineRangeV2 = await applyPatchV2FixtureAttempt([
+    "*** Begin Patch",
+    "*** Update File: repeated-large.txt",
+    "@@ -1100,1 +1100,1 @@",
+    "-same marker",
+    "+same marker patched",
+    "*** End Patch",
+    "",
+  ].join("\n"), {
+    "repeated-large.txt": `${repeatedLargeLines.join("\n")}\n`,
+  }, { stderrOutput: true });
+  assertCondition(unifiedHeaderLineRangeV2.exitCode === 0 && unifiedHeaderLineRangeV2.error === null, "v2 should accept unified-diff hunk headers with a hint", unifiedHeaderLineRangeV2);
+  const unifiedLines = unifiedHeaderLineRangeV2.files["repeated-large.txt"]?.split("\n") ?? [];
+  assertCondition(unifiedLines[899] === "same marker" && unifiedLines[1099] === "same marker patched", "v2 should use unified header old line number to avoid patching the first repeated match", {
+    line900: unifiedLines[899],
+    line1100: unifiedLines[1099],
+    stderr: unifiedHeaderLineRangeV2.stderr,
+  });
+  assertCondition(unifiedHeaderLineRangeV2.stderr.includes("accepted unified-diff hunk header in repeated-large.txt"), "v2 unified header compatibility should emit a canonical syntax hint", unifiedHeaderLineRangeV2.stderr);
+
+  const manyLines = Array.from({ length: 6200 }, (_, index) => {
+    if (index === 4) return "HEAD old";
+    if (index === 3099) return "MIDDLE old";
+    if (index === 6194) return "TAIL old";
+    return `ROW-${String(index + 1).padStart(5, "0")} keep`;
+  });
+  const largeMultiMixedV2 = await applyPatchV2FixtureAttempt([
+    "*** Begin Patch",
+    "*** Update File: big-multi.txt",
+    "@@ -5,1 +5,1 @@",
+    "-HEAD old",
+    "+HEAD new",
+    "@@ -3100,1 +3100,2 @@",
+    "-MIDDLE old",
+    "+MIDDLE new",
+    "+MIDDLE inserted",
+    "@@ -6195,1 +6196,1 @@",
+    "-TAIL old",
+    "+TAIL new",
+    "*** Add File: nested/compat-created.txt",
+    "@@",
+    "first",
+    "+ ",
+    "+last",
+    "*** Delete File: stale.txt",
+    "@@",
+    "-stale",
+    "*** End Patch",
+    "",
+  ].join("\n"), {
+    "big-multi.txt": `${manyLines.join("\n")}\n`,
+    "stale.txt": "stale\n",
+  }, { stderrOutput: true });
+  assertCondition(largeMultiMixedV2.exitCode === 0 && largeMultiMixedV2.error === null, "v2 should apply large multi-hunk mixed compatibility patches", largeMultiMixedV2);
+  const bigMulti = largeMultiMixedV2.files["big-multi.txt"]?.split("\n") ?? [];
+  assertCondition(
+    bigMulti[4] === "HEAD new"
+      && bigMulti[3099] === "MIDDLE new"
+      && bigMulti[3100] === "MIDDLE inserted"
+      && bigMulti[6195] === "TAIL new"
+      && bigMulti[0] === "ROW-00001 keep"
+      && bigMulti[6200] === "ROW-06200 keep"
+      && bigMulti.length === 6202,
+    "v2 should preserve distant untouched lines while applying multiple large-file hunks",
+    { head: bigMulti.slice(0, 6), middle: bigMulti.slice(3098, 3102), tail: bigMulti.slice(6194, 6201) },
+  );
+  assertCondition(largeMultiMixedV2.files["nested/compat-created.txt"] === "first\n\nlast\n", "v2 mixed compatibility Add File should preserve intended blank line", largeMultiMixedV2);
+  assertCondition(largeMultiMixedV2.files["stale.txt"] === undefined, "v2 mixed compatibility Delete File should delete stale files", largeMultiMixedV2);
+  assertCondition(
+    largeMultiMixedV2.stderr.includes("accepted unified-diff hunk header in big-multi.txt")
+      && largeMultiMixedV2.stderr.includes("accepted MiniMax-style @@ inside Add File nested/compat-created.txt")
+      && largeMultiMixedV2.stderr.includes("ignored extra MiniMax-style hunk/body lines after Delete File stale.txt"),
+    "v2 mixed compatibility patch should emit hints for every non-canonical form",
+    largeMultiMixedV2.stderr,
+  );
+  assertCondition(largeMultiMixedV2.commands.some((command) => command.startsWith("write-b64-stdin big-multi.txt")), "v2 large multi-hunk file should use stdin write path", largeMultiMixedV2.commands);
+
  const multiChunkTailV2 = await applyPatchV2ActualShellFixtureAttempt([
    "*** Begin Patch",
    "*** Update File: two_chunks.txt",