@@ -437,6 +437,146 @@ export namespace Snapshot {
437437 const diffFull = Effect . fnUntraced ( function * ( from : string , to : string ) {
438438 return yield * locked (
439439 Effect . gen ( function * ( ) {
440+ type Row = {
441+ file : string
442+ status : "added" | "deleted" | "modified"
443+ binary : boolean
444+ additions : number
445+ deletions : number
446+ }
447+
448+ type Ref = {
449+ file : string
450+ side : "before" | "after"
451+ ref : string
452+ }
453+
454+ const show = Effect . fnUntraced ( function * ( row : Row ) {
455+ if ( row . binary ) return [ "" , "" ]
456+ if ( row . status === "added" ) {
457+ return [
458+ "" ,
459+ yield * git ( [ ...cfg , ...args ( [ "show" , `${ to } :${ row . file } ` ] ) ] ) . pipe (
460+ Effect . map ( ( item ) => item . text ) ,
461+ ) ,
462+ ]
463+ }
464+ if ( row . status === "deleted" ) {
465+ return [
466+ yield * git ( [ ...cfg , ...args ( [ "show" , `${ from } :${ row . file } ` ] ) ] ) . pipe (
467+ Effect . map ( ( item ) => item . text ) ,
468+ ) ,
469+ "" ,
470+ ]
471+ }
472+ return yield * Effect . all (
473+ [
474+ git ( [ ...cfg , ...args ( [ "show" , `${ from } :${ row . file } ` ] ) ] ) . pipe ( Effect . map ( ( item ) => item . text ) ) ,
475+ git ( [ ...cfg , ...args ( [ "show" , `${ to } :${ row . file } ` ] ) ] ) . pipe ( Effect . map ( ( item ) => item . text ) ) ,
476+ ] ,
477+ { concurrency : 2 } ,
478+ )
479+ } )
480+
481+ const load = Effect . fnUntraced (
482+ function * ( rows : Row [ ] ) {
483+ const refs = rows . flatMap ( ( row ) => {
484+ if ( row . binary ) return [ ]
485+ if ( row . status === "added" )
486+ return [ { file : row . file , side : "after" , ref : `${ to } :${ row . file } ` } satisfies Ref ]
487+ if ( row . status === "deleted" ) {
488+ return [ { file : row . file , side : "before" , ref : `${ from } :${ row . file } ` } satisfies Ref ]
489+ }
490+ return [
491+ { file : row . file , side : "before" , ref : `${ from } :${ row . file } ` } satisfies Ref ,
492+ { file : row . file , side : "after" , ref : `${ to } :${ row . file } ` } satisfies Ref ,
493+ ]
494+ } )
495+ if ( ! refs . length ) return new Map < string , { before : string ; after : string } > ( )
496+
497+ const proc = ChildProcess . make ( "git" , [ ...cfg , ...args ( [ "cat-file" , "--batch" ] ) ] , {
498+ cwd : state . directory ,
499+ extendEnv : true ,
500+ stdin : Stream . make ( new TextEncoder ( ) . encode ( refs . map ( ( item ) => item . ref ) . join ( "\n" ) + "\n" ) ) ,
501+ } )
502+ const handle = yield * spawner . spawn ( proc )
503+ const [ out , err ] = yield * Effect . all (
504+ [ Stream . mkUint8Array ( handle . stdout ) , Stream . mkString ( Stream . decodeText ( handle . stderr ) ) ] ,
505+ { concurrency : 2 } ,
506+ )
507+ const code = yield * handle . exitCode
508+ if ( code !== 0 ) {
509+ log . info ( "git cat-file --batch failed during snapshot diff, falling back to per-file git show" , {
510+ stderr : err ,
511+ refs : refs . length ,
512+ } )
513+ return
514+ }
515+
516+ const fail = ( msg : string , extra ?: Record < string , string > ) => {
517+ log . info ( msg , { ...extra , refs : refs . length } )
518+ return undefined
519+ }
520+
521+ const map = new Map < string , { before : string ; after : string } > ( )
522+ const dec = new TextDecoder ( )
523+ let i = 0
524+ // Parse the default `git cat-file --batch` stream: one header line,
525+ // then exactly `size` bytes of blob content, then a trailing newline.
526+ for ( const ref of refs ) {
527+ let end = i
528+ while ( end < out . length && out [ end ] !== 10 ) end += 1
529+ if ( end >= out . length ) {
530+ return fail (
531+ "git cat-file --batch returned a truncated header during snapshot diff, falling back to per-file git show" ,
532+ )
533+ }
534+
535+ const head = dec . decode ( out . slice ( i , end ) )
536+ i = end + 1
537+ const hit = map . get ( ref . file ) ?? { before : "" , after : "" }
538+ if ( head . endsWith ( " missing" ) ) {
539+ map . set ( ref . file , hit )
540+ continue
541+ }
542+
543+ const match = head . match ( / ^ [ 0 - 9 a - f ] + b l o b ( \d + ) $ / )
544+ if ( ! match ) {
545+ return fail (
546+ "git cat-file --batch returned an unexpected header during snapshot diff, falling back to per-file git show" ,
547+ { head } ,
548+ )
549+ }
550+
551+ const size = Number ( match [ 1 ] )
552+ if ( ! Number . isInteger ( size ) || size < 0 || i + size >= out . length || out [ i + size ] !== 10 ) {
553+ return fail (
554+ "git cat-file --batch returned truncated content during snapshot diff, falling back to per-file git show" ,
555+ { head } ,
556+ )
557+ }
558+
559+ const text = dec . decode ( out . slice ( i , i + size ) )
560+ if ( ref . side === "before" ) hit . before = text
561+ if ( ref . side === "after" ) hit . after = text
562+ map . set ( ref . file , hit )
563+ i += size + 1
564+ }
565+
566+ if ( i !== out . length ) {
567+ return fail (
568+ "git cat-file --batch returned trailing data during snapshot diff, falling back to per-file git show" ,
569+ )
570+ }
571+
572+ return map
573+ } ,
574+ Effect . scoped ,
575+ Effect . catch ( ( ) =>
576+ Effect . succeed < Map < string , { before : string ; after : string } > | undefined > ( undefined ) ,
577+ ) ,
578+ )
579+
440580 const result : Snapshot . FileDiff [ ] = [ ]
441581 const status = new Map < string , "added" | "deleted" | "modified" > ( )
442582
@@ -459,30 +599,45 @@ export namespace Snapshot {
459599 } ,
460600 )
461601
462- for ( const line of numstat . text . trim ( ) . split ( "\n" ) ) {
463- if ( ! line ) continue
464- const [ adds , dels , file ] = line . split ( "\t" )
465- if ( ! file ) continue
466- const binary = adds === "-" && dels === "-"
467- const [ before , after ] = binary
468- ? [ "" , "" ]
469- : yield * Effect . all (
470- [
471- git ( [ ...cfg , ...args ( [ "show" , `${ from } :${ file } ` ] ) ] ) . pipe ( Effect . map ( ( item ) => item . text ) ) ,
472- git ( [ ...cfg , ...args ( [ "show" , `${ to } :${ file } ` ] ) ] ) . pipe ( Effect . map ( ( item ) => item . text ) ) ,
473- ] ,
474- { concurrency : 2 } ,
475- )
476- const additions = binary ? 0 : parseInt ( adds )
477- const deletions = binary ? 0 : parseInt ( dels )
478- result . push ( {
479- file,
480- before,
481- after,
482- additions : Number . isFinite ( additions ) ? additions : 0 ,
483- deletions : Number . isFinite ( deletions ) ? deletions : 0 ,
484- status : status . get ( file ) ?? "modified" ,
602+ const rows = numstat . text
603+ . trim ( )
604+ . split ( "\n" )
605+ . filter ( Boolean )
606+ . flatMap ( ( line ) => {
607+ const [ adds , dels , file ] = line . split ( "\t" )
608+ if ( ! file ) return [ ]
609+ const binary = adds === "-" && dels === "-"
610+ const additions = binary ? 0 : parseInt ( adds )
611+ const deletions = binary ? 0 : parseInt ( dels )
612+ return [
613+ {
614+ file,
615+ status : status . get ( file ) ?? "modified" ,
616+ binary,
617+ additions : Number . isFinite ( additions ) ? additions : 0 ,
618+ deletions : Number . isFinite ( deletions ) ? deletions : 0 ,
619+ } satisfies Row ,
620+ ]
485621 } )
622+ const step = 100
623+
624+ // Keep batches bounded so a large diff does not buffer every blob at once.
625+ for ( let i = 0 ; i < rows . length ; i += step ) {
626+ const run = rows . slice ( i , i + step )
627+ const text = yield * load ( run )
628+
629+ for ( const row of run ) {
630+ const hit = text ?. get ( row . file ) ?? { before : "" , after : "" }
631+ const [ before , after ] = row . binary ? [ "" , "" ] : text ? [ hit . before , hit . after ] : yield * show ( row )
632+ result . push ( {
633+ file : row . file ,
634+ before,
635+ after,
636+ additions : row . additions ,
637+ deletions : row . deletions ,
638+ status : row . status ,
639+ } )
640+ }
486641 }
487642
488643 return result
0 commit comments