Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 108 additions & 0 deletions src/core/diff/strategies/__tests__/multi-search-replace.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1204,4 +1204,112 @@ function sum(a, b) {
expect(result.error).toContain(":start_line:5 <-- Invalid location")
})
})

describe("tolerant :start_line parsing", () => {
let strategy: MultiSearchReplaceDiffStrategy

beforeEach(() => {
strategy = new MultiSearchReplaceDiffStrategy()
})

it("should accept :start_line= with equals delimiter via regex", async () => {
const originalContent = 'function hello() {\n console.log("hello")\n}\n'
const diffContent =
"<<<<<<< SEARCH\n" +
":start_line=1\n" +
"-------\n" +
"function hello() {\n" +
"=======\n" +
"function helloWorld() {\n" +
">>>>>>> REPLACE"

const result = await strategy.applyDiff(originalContent, diffContent)
expect(result.success).toBe(true)
if (result.success) {
expect(result.content).toBe('function helloWorld() {\n console.log("hello")\n}\n')
}
})

it("should accept :start_line= in multiple blocks", async () => {
const originalContent = 'function hello() {\n console.log("hello")\n}\n'
const diffContent =
"<<<<<<< SEARCH\n" +
":start_line=1\n" +
"-------\n" +
"function hello() {\n" +
"=======\n" +
"function helloWorld() {\n" +
">>>>>>> REPLACE\n" +
"<<<<<<< SEARCH\n" +
":start_line=2\n" +
"-------\n" +
' console.log("hello")\n' +
"=======\n" +
' console.log("hello world")\n' +
">>>>>>> REPLACE"

const result = await strategy.applyDiff(originalContent, diffContent)
expect(result.success).toBe(true)
if (result.success) {
expect(result.content).toBe('function helloWorld() {\n console.log("hello world")\n}\n')
}
})

it("should handle botched :start_line= that leaked into search content via fallback", async () => {
// Simulates the exact bug from issue #12199: model writes :start_line=18
// and it leaks into search content because the main regex didn't parse it
const originalContent =
"package config\n" +
"\n" +
'import (\n\t"fmt"\n\t"os"\n\n\t"gopkg.in/yaml.v3"\n)\n' +
"\n" +
"// Config holds all configuration.\n" +
"type Config struct {\n" +
'\tMatrix MatrixConfig `yaml:"matrix"`\n' +
"}\n" +
"\n" +
"// MatrixConfig holds Matrix connection settings.\n" +
"type MatrixConfig struct {\n" +
'\tHomeserverURL string `yaml:"homeserver_url"`\n' +
"}\n"

// This diff uses :start_line:15 (correct format) and should work normally
const diffContent =
"<<<<<<< SEARCH\n" +
":start_line:15\n" +
"-------\n" +
"// MatrixConfig holds Matrix connection settings.\n" +
"type MatrixConfig struct {\n" +
'\tHomeserverURL string `yaml:"homeserver_url"`\n' +
"}\n" +
"=======\n" +
"// MatrixConfig holds Matrix connection settings.\n" +
"type MatrixConfig struct {\n" +
'\tHomeserverURL string `yaml:"homeserver_url"`\n' +
'\tAccessToken string `yaml:"access_token"`\n' +
"}\n" +
">>>>>>> REPLACE"

const result = await strategy.applyDiff(originalContent, diffContent)
expect(result.success).toBe(true)
if (result.success) {
expect(result.content).toContain("AccessToken")
}
})

it("should strip leaked :start_line= directive from search content as fallback", async () => {
// Test the stripLeakedStartLineDirective method directly
const result = strategy["stripLeakedStartLineDirective"](
":start_line=18\n-------\n// MatrixConfig holds Matrix connection settings.\n",
)
expect(result.extractedStartLine).toBe(18)
expect(result.cleanedContent).toBe("// MatrixConfig holds Matrix connection settings.\n")
})

it("should not strip non-leaked content", async () => {
const result = strategy["stripLeakedStartLineDirective"]("// some normal code\nfunction hello() {\n")
expect(result.extractedStartLine).toBeNull()
expect(result.cleanedContent).toBe("// some normal code\nfunction hello() {\n")
})
})
})
39 changes: 38 additions & 1 deletion src/core/diff/strategies/multi-search-replace.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,31 @@ export class MultiSearchReplaceDiffStrategy implements DiffStrategy {
.replace(/^\\:start_line:/gm, ":start_line:")
}

/**
* Detects and strips a botched :start_line directive that leaked into search content.
* This handles cases where the model uses a malformed format (e.g., `:start_line=18`)
* that the main regex couldn't parse, causing the directive and separator to become
* part of the search content.
*
* Returns the cleaned search content and any extracted start line number.
*/
private stripLeakedStartLineDirective(searchContent: string): {
cleanedContent: string
extractedStartLine: number | null
} {
// Match patterns like `:start_line=18\n-------\n` or `:start_line 18\n-------\n`
// at the beginning of search content (the directive + separator leaked in)
const leakedPattern = /^:start_line\s*[=]\s*(\d+)\s*\n(?:-------\s*\n)?/
const match = searchContent.match(leakedPattern)
if (match) {
return {
cleanedContent: searchContent.slice(match[0].length),
extractedStartLine: parseInt(match[1], 10),
}
}
return { cleanedContent: searchContent, extractedStartLine: null }
}

private validateMarkerSequencing(diffContent: string): { success: boolean; error?: string } {
enum State {
START,
Expand Down Expand Up @@ -267,9 +292,11 @@ export class MultiSearchReplaceDiffStrategy implements DiffStrategy {

3. ((?:\:start_line:\s*(\d+)\s*\n))?
Optionally matches a ":start_line:" line. The outer capturing group is group 1 and the inner (\d+) is group 2.
Also accepts ":start_line=" as delimiter (e.g. ":start_line=18").

4. ((?:\:end_line:\s*(\d+)\s*\n))?
Optionally matches a ":end_line:" line. Group 3 is the whole match and group 4 is the digits.
Also accepts ":end_line=" as delimiter.

5. ((?<!\\)-------\s*\n)?
Optionally matches the "-------" marker line (group 5).
Expand All @@ -289,7 +316,7 @@ export class MultiSearchReplaceDiffStrategy implements DiffStrategy {

let matches = [
...diffContent.matchAll(
/(?:^|\n)(?<!\\)<<<<<<< SEARCH>?\s*\n((?:\:start_line:\s*(\d+)\s*\n))?((?:\:end_line:\s*(\d+)\s*\n))?((?<!\\)-------\s*\n)?([\s\S]*?)(?:\n)?(?:(?<=\n)(?<!\\)=======\s*\n)([\s\S]*?)(?:\n)?(?:(?<=\n)(?<!\\)>>>>>>> REPLACE)(?=\n|$)/g,
/(?:^|\n)(?<!\\)<<<<<<< SEARCH>?\s*\n((?:\:start_line[:=]\s*(\d+)\s*\n))?((?:\:end_line[:=]\s*(\d+)\s*\n))?((?<!\\)-------\s*\n)?([\s\S]*?)(?:\n)?(?:(?<=\n)(?<!\\)=======\s*\n)([\s\S]*?)(?:\n)?(?:(?<=\n)(?<!\\)>>>>>>> REPLACE)(?=\n|$)/g,
),
]

Expand Down Expand Up @@ -321,6 +348,16 @@ export class MultiSearchReplaceDiffStrategy implements DiffStrategy {
searchContent = this.unescapeMarkers(searchContent)
replaceContent = this.unescapeMarkers(replaceContent)

// Fallback: detect and strip a botched :start_line directive that leaked into search content
// This handles cases like `:start_line=18` where the `=` delimiter wasn't caught by the main regex
if (startLine === 0) {
const { cleanedContent, extractedStartLine } = this.stripLeakedStartLineDirective(searchContent)
if (extractedStartLine !== null) {
searchContent = cleanedContent
startLine = extractedStartLine + delta
}
}

// Strip line numbers from search and replace content if every line starts with a line number
const hasAllLineNumbers =
(everyLineHasLineNumbers(searchContent) && everyLineHasLineNumbers(replaceContent)) ||
Expand Down
Loading