11import z from "zod"
22import * as fs from "fs"
33import * as path from "path"
4+ import { createInterface } from "readline"
45import { Tool } from "./tool"
56import { LSP } from "../lsp"
67import { FileTime } from "../file/time"
@@ -11,7 +12,9 @@ import { InstructionPrompt } from "../session/instruction"
1112
1213const DEFAULT_READ_LIMIT = 2000
1314const MAX_LINE_LENGTH = 2000
15+ const MAX_LINE_SUFFIX = `... (line truncated to ${ MAX_LINE_LENGTH } chars)`
1416const MAX_BYTES = 50 * 1024
17+ const MAX_BYTES_LABEL = `${ MAX_BYTES / 1024 } KB`
1518
1619export const ReadTool = Tool . define ( "read" , {
1720 description : DESCRIPTION ,
@@ -134,27 +137,53 @@ export const ReadTool = Tool.define("read", {
134137 }
135138 }
136139
137- const isBinary = await isBinaryFile ( filepath , file )
140+ const isBinary = await isBinaryFile ( filepath , stat . size )
138141 if ( isBinary ) throw new Error ( `Cannot read binary file: ${ filepath } ` )
139142
143+ const stream = fs . createReadStream ( filepath , { encoding : "utf8" } )
144+ const rl = createInterface ( {
145+ input : stream ,
146+ // Note: we use the crlfDelay option to recognize all instances of CR LF
147+ // ('\r\n') in file as a single line break.
148+ crlfDelay : Infinity ,
149+ } )
150+
140151 const limit = params . limit ?? DEFAULT_READ_LIMIT
141152 const offset = params . offset ?? 1
142153 const start = offset - 1
143- const lines = await file . text ( ) . then ( ( text ) => text . split ( "\n" ) )
144- if ( start >= lines . length ) throw new Error ( `Offset ${ offset } is out of range for this file (${ lines . length } lines)` )
145-
146154 const raw : string [ ] = [ ]
147155 let bytes = 0
156+ let lines = 0
148157 let truncatedByBytes = false
149- for ( let i = start ; i < Math . min ( lines . length , start + limit ) ; i ++ ) {
150- const line = lines [ i ] . length > MAX_LINE_LENGTH ? lines [ i ] . substring ( 0 , MAX_LINE_LENGTH ) + "..." : lines [ i ]
151- const size = Buffer . byteLength ( line , "utf-8" ) + ( raw . length > 0 ? 1 : 0 )
152- if ( bytes + size > MAX_BYTES ) {
153- truncatedByBytes = true
154- break
158+ let hasMoreLines = false
159+ try {
160+ for await ( const text of rl ) {
161+ lines += 1
162+ if ( lines <= start ) continue
163+
164+ if ( raw . length >= limit ) {
165+ hasMoreLines = true
166+ continue
167+ }
168+
169+ const line = text . length > MAX_LINE_LENGTH ? text . substring ( 0 , MAX_LINE_LENGTH ) + MAX_LINE_SUFFIX : text
170+ const size = Buffer . byteLength ( line , "utf-8" ) + ( raw . length > 0 ? 1 : 0 )
171+ if ( bytes + size > MAX_BYTES ) {
172+ truncatedByBytes = true
173+ hasMoreLines = true
174+ break
175+ }
176+
177+ raw . push ( line )
178+ bytes += size
155179 }
156- raw . push ( line )
157- bytes += size
180+ } finally {
181+ rl . close ( )
182+ stream . destroy ( )
183+ }
184+
185+ if ( lines < offset && ! ( lines === 0 && offset === 1 ) ) {
186+ throw new Error ( `Offset ${ offset } is out of range for this file (${ lines } lines)` )
158187 }
159188
160189 const content = raw . map ( ( line , index ) => {
@@ -165,15 +194,15 @@ export const ReadTool = Tool.define("read", {
165194 let output = [ `<path>${ filepath } </path>` , `<type>file</type>` , "<content>" ] . join ( "\n" )
166195 output += content . join ( "\n" )
167196
168- const totalLines = lines . length
197+ const totalLines = lines
169198 const lastReadLine = offset + raw . length - 1
170- const hasMoreLines = totalLines > lastReadLine
199+ const nextOffset = lastReadLine + 1
171200 const truncated = hasMoreLines || truncatedByBytes
172201
173202 if ( truncatedByBytes ) {
174- output += `\n\n(Output truncated at ${ MAX_BYTES } bytes. Use ' offset' parameter to read beyond line ${ lastReadLine } )`
203+ output += `\n\n(Output capped at ${ MAX_BYTES_LABEL } . Showing lines ${ offset } - ${ lastReadLine } . Use offset= ${ nextOffset } to continue. )`
175204 } else if ( hasMoreLines ) {
176- output += `\n\n(File has more lines. Use ' offset' parameter to read beyond line ${ lastReadLine } )`
205+ output += `\n\n(Showing lines ${ offset } - ${ lastReadLine } of ${ totalLines } . Use offset= ${ nextOffset } to continue. )`
177206 } else {
178207 output += `\n\n(End of file - total ${ totalLines } lines)`
179208 }
@@ -199,7 +228,7 @@ export const ReadTool = Tool.define("read", {
199228 } ,
200229} )
201230
202- async function isBinaryFile ( filepath : string , file : Bun . BunFile ) : Promise < boolean > {
231+ async function isBinaryFile ( filepath : string , fileSize : number ) : Promise < boolean > {
203232 const ext = path . extname ( filepath ) . toLowerCase ( )
204233 // binary check for common non-text extensions
205234 switch ( ext ) {
@@ -236,22 +265,25 @@ async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise<boolea
236265 break
237266 }
238267
239- const stat = await file . stat ( )
240- const fileSize = stat . size
241268 if ( fileSize === 0 ) return false
242269
243- const bufferSize = Math . min ( 4096 , fileSize )
244- const buffer = await file . arrayBuffer ( )
245- if ( buffer . byteLength === 0 ) return false
246- const bytes = new Uint8Array ( buffer . slice ( 0 , bufferSize ) )
270+ const fh = await fs . promises . open ( filepath , "r" )
271+ try {
272+ const sampleSize = Math . min ( 4096 , fileSize )
273+ const bytes = Buffer . alloc ( sampleSize )
274+ const result = await fh . read ( bytes , 0 , sampleSize , 0 )
275+ if ( result . bytesRead === 0 ) return false
247276
248- let nonPrintableCount = 0
249- for ( let i = 0 ; i < bytes . length ; i ++ ) {
250- if ( bytes [ i ] === 0 ) return true
251- if ( bytes [ i ] < 9 || ( bytes [ i ] > 13 && bytes [ i ] < 32 ) ) {
252- nonPrintableCount ++
277+ let nonPrintableCount = 0
278+ for ( let i = 0 ; i < result . bytesRead ; i ++ ) {
279+ if ( bytes [ i ] === 0 ) return true
280+ if ( bytes [ i ] < 9 || ( bytes [ i ] > 13 && bytes [ i ] < 32 ) ) {
281+ nonPrintableCount ++
282+ }
253283 }
284+ // If >30% non-printable characters, consider it binary
285+ return nonPrintableCount / result . bytesRead > 0.3
286+ } finally {
287+ await fh . close ( )
254288 }
255- // If >30% non-printable characters, consider it binary
256- return nonPrintableCount / bytes . length > 0.3
257289}
0 commit comments