Skip to content

Commit 288eb04

Browse files
kitlangtonnatewill
andauthored
perf(opencode): batch snapshot diffFull blob reads (anomalyco#20752)
Co-authored-by: Nate Williams <50088025+natewill@users.noreply.github.com>
1 parent 59ca454 commit 288eb04

2 files changed

Lines changed: 270 additions & 23 deletions

File tree

packages/opencode/src/snapshot/index.ts

Lines changed: 178 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,146 @@ export namespace Snapshot {
437437
const diffFull = Effect.fnUntraced(function* (from: string, to: string) {
438438
return yield* locked(
439439
Effect.gen(function* () {
440+
type Row = {
441+
file: string
442+
status: "added" | "deleted" | "modified"
443+
binary: boolean
444+
additions: number
445+
deletions: number
446+
}
447+
448+
type Ref = {
449+
file: string
450+
side: "before" | "after"
451+
ref: string
452+
}
453+
454+
const show = Effect.fnUntraced(function* (row: Row) {
455+
if (row.binary) return ["", ""]
456+
if (row.status === "added") {
457+
return [
458+
"",
459+
yield* git([...cfg, ...args(["show", `${to}:${row.file}`])]).pipe(
460+
Effect.map((item) => item.text),
461+
),
462+
]
463+
}
464+
if (row.status === "deleted") {
465+
return [
466+
yield* git([...cfg, ...args(["show", `${from}:${row.file}`])]).pipe(
467+
Effect.map((item) => item.text),
468+
),
469+
"",
470+
]
471+
}
472+
return yield* Effect.all(
473+
[
474+
git([...cfg, ...args(["show", `${from}:${row.file}`])]).pipe(Effect.map((item) => item.text)),
475+
git([...cfg, ...args(["show", `${to}:${row.file}`])]).pipe(Effect.map((item) => item.text)),
476+
],
477+
{ concurrency: 2 },
478+
)
479+
})
480+
481+
const load = Effect.fnUntraced(
482+
function* (rows: Row[]) {
483+
const refs = rows.flatMap((row) => {
484+
if (row.binary) return []
485+
if (row.status === "added")
486+
return [{ file: row.file, side: "after", ref: `${to}:${row.file}` } satisfies Ref]
487+
if (row.status === "deleted") {
488+
return [{ file: row.file, side: "before", ref: `${from}:${row.file}` } satisfies Ref]
489+
}
490+
return [
491+
{ file: row.file, side: "before", ref: `${from}:${row.file}` } satisfies Ref,
492+
{ file: row.file, side: "after", ref: `${to}:${row.file}` } satisfies Ref,
493+
]
494+
})
495+
if (!refs.length) return new Map<string, { before: string; after: string }>()
496+
497+
const proc = ChildProcess.make("git", [...cfg, ...args(["cat-file", "--batch"])], {
498+
cwd: state.directory,
499+
extendEnv: true,
500+
stdin: Stream.make(new TextEncoder().encode(refs.map((item) => item.ref).join("\n") + "\n")),
501+
})
502+
const handle = yield* spawner.spawn(proc)
503+
const [out, err] = yield* Effect.all(
504+
[Stream.mkUint8Array(handle.stdout), Stream.mkString(Stream.decodeText(handle.stderr))],
505+
{ concurrency: 2 },
506+
)
507+
const code = yield* handle.exitCode
508+
if (code !== 0) {
509+
log.info("git cat-file --batch failed during snapshot diff, falling back to per-file git show", {
510+
stderr: err,
511+
refs: refs.length,
512+
})
513+
return
514+
}
515+
516+
const fail = (msg: string, extra?: Record<string, string>) => {
517+
log.info(msg, { ...extra, refs: refs.length })
518+
return undefined
519+
}
520+
521+
const map = new Map<string, { before: string; after: string }>()
522+
const dec = new TextDecoder()
523+
let i = 0
524+
// Parse the default `git cat-file --batch` stream: one header line,
525+
// then exactly `size` bytes of blob content, then a trailing newline.
526+
for (const ref of refs) {
527+
let end = i
528+
while (end < out.length && out[end] !== 10) end += 1
529+
if (end >= out.length) {
530+
return fail(
531+
"git cat-file --batch returned a truncated header during snapshot diff, falling back to per-file git show",
532+
)
533+
}
534+
535+
const head = dec.decode(out.slice(i, end))
536+
i = end + 1
537+
const hit = map.get(ref.file) ?? { before: "", after: "" }
538+
if (head.endsWith(" missing")) {
539+
map.set(ref.file, hit)
540+
continue
541+
}
542+
543+
const match = head.match(/^[0-9a-f]+ blob (\d+)$/)
544+
if (!match) {
545+
return fail(
546+
"git cat-file --batch returned an unexpected header during snapshot diff, falling back to per-file git show",
547+
{ head },
548+
)
549+
}
550+
551+
const size = Number(match[1])
552+
if (!Number.isInteger(size) || size < 0 || i + size >= out.length || out[i + size] !== 10) {
553+
return fail(
554+
"git cat-file --batch returned truncated content during snapshot diff, falling back to per-file git show",
555+
{ head },
556+
)
557+
}
558+
559+
const text = dec.decode(out.slice(i, i + size))
560+
if (ref.side === "before") hit.before = text
561+
if (ref.side === "after") hit.after = text
562+
map.set(ref.file, hit)
563+
i += size + 1
564+
}
565+
566+
if (i !== out.length) {
567+
return fail(
568+
"git cat-file --batch returned trailing data during snapshot diff, falling back to per-file git show",
569+
)
570+
}
571+
572+
return map
573+
},
574+
Effect.scoped,
575+
Effect.catch(() =>
576+
Effect.succeed<Map<string, { before: string; after: string }> | undefined>(undefined),
577+
),
578+
)
579+
440580
const result: Snapshot.FileDiff[] = []
441581
const status = new Map<string, "added" | "deleted" | "modified">()
442582

@@ -459,30 +599,45 @@ export namespace Snapshot {
459599
},
460600
)
461601

462-
for (const line of numstat.text.trim().split("\n")) {
463-
if (!line) continue
464-
const [adds, dels, file] = line.split("\t")
465-
if (!file) continue
466-
const binary = adds === "-" && dels === "-"
467-
const [before, after] = binary
468-
? ["", ""]
469-
: yield* Effect.all(
470-
[
471-
git([...cfg, ...args(["show", `${from}:${file}`])]).pipe(Effect.map((item) => item.text)),
472-
git([...cfg, ...args(["show", `${to}:${file}`])]).pipe(Effect.map((item) => item.text)),
473-
],
474-
{ concurrency: 2 },
475-
)
476-
const additions = binary ? 0 : parseInt(adds)
477-
const deletions = binary ? 0 : parseInt(dels)
478-
result.push({
479-
file,
480-
before,
481-
after,
482-
additions: Number.isFinite(additions) ? additions : 0,
483-
deletions: Number.isFinite(deletions) ? deletions : 0,
484-
status: status.get(file) ?? "modified",
602+
const rows = numstat.text
603+
.trim()
604+
.split("\n")
605+
.filter(Boolean)
606+
.flatMap((line) => {
607+
const [adds, dels, file] = line.split("\t")
608+
if (!file) return []
609+
const binary = adds === "-" && dels === "-"
610+
const additions = binary ? 0 : parseInt(adds)
611+
const deletions = binary ? 0 : parseInt(dels)
612+
return [
613+
{
614+
file,
615+
status: status.get(file) ?? "modified",
616+
binary,
617+
additions: Number.isFinite(additions) ? additions : 0,
618+
deletions: Number.isFinite(deletions) ? deletions : 0,
619+
} satisfies Row,
620+
]
485621
})
622+
const step = 100
623+
624+
// Keep batches bounded so a large diff does not buffer every blob at once.
625+
for (let i = 0; i < rows.length; i += step) {
626+
const run = rows.slice(i, i + step)
627+
const text = yield* load(run)
628+
629+
for (const row of run) {
630+
const hit = text?.get(row.file) ?? { before: "", after: "" }
631+
const [before, after] = row.binary ? ["", ""] : text ? [hit.before, hit.after] : yield* show(row)
632+
result.push({
633+
file: row.file,
634+
before,
635+
after,
636+
additions: row.additions,
637+
deletions: row.deletions,
638+
status: row.status,
639+
})
640+
}
486641
}
487642

488643
return result

packages/opencode/test/snapshot/snapshot.test.ts

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -982,6 +982,98 @@ test("diffFull with new file additions", async () => {
982982
})
983983
})
984984

985+
test("diffFull with a large interleaved mixed diff", async () => {
986+
await using tmp = await bootstrap()
987+
await Instance.provide({
988+
directory: tmp.path,
989+
fn: async () => {
990+
const ids = Array.from({ length: 60 }, (_, i) => i.toString().padStart(3, "0"))
991+
const mod = ids.map((id) => fwd(tmp.path, "mix", `${id}-mod.txt`))
992+
const del = ids.map((id) => fwd(tmp.path, "mix", `${id}-del.txt`))
993+
const add = ids.map((id) => fwd(tmp.path, "mix", `${id}-add.txt`))
994+
const bin = ids.map((id) => fwd(tmp.path, "mix", `${id}-bin.bin`))
995+
996+
await $`mkdir -p ${tmp.path}/mix`.quiet()
997+
await Promise.all([
998+
...mod.map((file, i) => Filesystem.write(file, `before-${ids[i]}-é\n🙂\nline`)),
999+
...del.map((file, i) => Filesystem.write(file, `gone-${ids[i]}\n你好`)),
1000+
...bin.map((file, i) => Filesystem.write(file, new Uint8Array([0, i, 255, i % 251]))),
1001+
])
1002+
1003+
const before = await Snapshot.track()
1004+
expect(before).toBeTruthy()
1005+
1006+
await Promise.all([
1007+
...mod.map((file, i) => Filesystem.write(file, `after-${ids[i]}-é\n🚀\nline`)),
1008+
...add.map((file, i) => Filesystem.write(file, `new-${ids[i]}\nこんにちは`)),
1009+
...bin.map((file, i) => Filesystem.write(file, new Uint8Array([9, i, 8, i % 251]))),
1010+
...del.map((file) => fs.rm(file)),
1011+
])
1012+
1013+
const after = await Snapshot.track()
1014+
expect(after).toBeTruthy()
1015+
1016+
const diffs = await Snapshot.diffFull(before!, after!)
1017+
expect(diffs).toHaveLength(ids.length * 4)
1018+
1019+
const map = new Map(diffs.map((item) => [item.file, item]))
1020+
for (let i = 0; i < ids.length; i++) {
1021+
const m = map.get(fwd("mix", `${ids[i]}-mod.txt`))
1022+
expect(m).toBeDefined()
1023+
expect(m!.before).toBe(`before-${ids[i]}-é\n🙂\nline`)
1024+
expect(m!.after).toBe(`after-${ids[i]}-é\n🚀\nline`)
1025+
expect(m!.status).toBe("modified")
1026+
1027+
const d = map.get(fwd("mix", `${ids[i]}-del.txt`))
1028+
expect(d).toBeDefined()
1029+
expect(d!.before).toBe(`gone-${ids[i]}\n你好`)
1030+
expect(d!.after).toBe("")
1031+
expect(d!.status).toBe("deleted")
1032+
1033+
const a = map.get(fwd("mix", `${ids[i]}-add.txt`))
1034+
expect(a).toBeDefined()
1035+
expect(a!.before).toBe("")
1036+
expect(a!.after).toBe(`new-${ids[i]}\nこんにちは`)
1037+
expect(a!.status).toBe("added")
1038+
1039+
const b = map.get(fwd("mix", `${ids[i]}-bin.bin`))
1040+
expect(b).toBeDefined()
1041+
expect(b!.before).toBe("")
1042+
expect(b!.after).toBe("")
1043+
expect(b!.additions).toBe(0)
1044+
expect(b!.deletions).toBe(0)
1045+
expect(b!.status).toBe("modified")
1046+
}
1047+
},
1048+
})
1049+
})
1050+
1051+
test("diffFull preserves git diff order across batch boundaries", async () => {
1052+
await using tmp = await bootstrap()
1053+
await Instance.provide({
1054+
directory: tmp.path,
1055+
fn: async () => {
1056+
const ids = Array.from({ length: 140 }, (_, i) => i.toString().padStart(3, "0"))
1057+
1058+
await $`mkdir -p ${tmp.path}/order`.quiet()
1059+
await Promise.all(ids.map((id) => Filesystem.write(`${tmp.path}/order/${id}.txt`, `before-${id}`)))
1060+
1061+
const before = await Snapshot.track()
1062+
expect(before).toBeTruthy()
1063+
1064+
await Promise.all(ids.map((id) => Filesystem.write(`${tmp.path}/order/${id}.txt`, `after-${id}`)))
1065+
1066+
const after = await Snapshot.track()
1067+
expect(after).toBeTruthy()
1068+
1069+
const expected = ids.map((id) => `order/${id}.txt`)
1070+
1071+
const diffs = await Snapshot.diffFull(before!, after!)
1072+
expect(diffs.map((item) => item.file)).toEqual(expected)
1073+
},
1074+
})
1075+
})
1076+
9851077
test("diffFull with file modifications", async () => {
9861078
await using tmp = await bootstrap()
9871079
await Instance.provide({

0 commit comments

Comments
 (0)