Skip to content

Commit c1b03b7

Browse files
authored
fix: make read tool more mem efficient (anomalyco#14009)
1 parent 2a2437b commit c1b03b7

2 files changed

Lines changed: 142 additions & 34 deletions

File tree

packages/opencode/src/tool/read.ts

Lines changed: 62 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import z from "zod"
22
import * as fs from "fs"
33
import * as path from "path"
4+
import { createInterface } from "readline"
45
import { Tool } from "./tool"
56
import { LSP } from "../lsp"
67
import { FileTime } from "../file/time"
@@ -11,7 +12,9 @@ import { InstructionPrompt } from "../session/instruction"
1112

1213
const DEFAULT_READ_LIMIT = 2000
1314
const MAX_LINE_LENGTH = 2000
15+
const MAX_LINE_SUFFIX = `... (line truncated to ${MAX_LINE_LENGTH} chars)`
1416
const MAX_BYTES = 50 * 1024
17+
const MAX_BYTES_LABEL = `${MAX_BYTES / 1024} KB`
1518

1619
export const ReadTool = Tool.define("read", {
1720
description: DESCRIPTION,
@@ -134,27 +137,53 @@ export const ReadTool = Tool.define("read", {
134137
}
135138
}
136139

137-
const isBinary = await isBinaryFile(filepath, file)
140+
const isBinary = await isBinaryFile(filepath, stat.size)
138141
if (isBinary) throw new Error(`Cannot read binary file: ${filepath}`)
139142

143+
const stream = fs.createReadStream(filepath, { encoding: "utf8" })
144+
const rl = createInterface({
145+
input: stream,
146+
// Note: we use the crlfDelay option to recognize all instances of CR LF
147+
// ('\r\n') in file as a single line break.
148+
crlfDelay: Infinity,
149+
})
150+
140151
const limit = params.limit ?? DEFAULT_READ_LIMIT
141152
const offset = params.offset ?? 1
142153
const start = offset - 1
143-
const lines = await file.text().then((text) => text.split("\n"))
144-
if (start >= lines.length) throw new Error(`Offset ${offset} is out of range for this file (${lines.length} lines)`)
145-
146154
const raw: string[] = []
147155
let bytes = 0
156+
let lines = 0
148157
let truncatedByBytes = false
149-
for (let i = start; i < Math.min(lines.length, start + limit); i++) {
150-
const line = lines[i].length > MAX_LINE_LENGTH ? lines[i].substring(0, MAX_LINE_LENGTH) + "..." : lines[i]
151-
const size = Buffer.byteLength(line, "utf-8") + (raw.length > 0 ? 1 : 0)
152-
if (bytes + size > MAX_BYTES) {
153-
truncatedByBytes = true
154-
break
158+
let hasMoreLines = false
159+
try {
160+
for await (const text of rl) {
161+
lines += 1
162+
if (lines <= start) continue
163+
164+
if (raw.length >= limit) {
165+
hasMoreLines = true
166+
continue
167+
}
168+
169+
const line = text.length > MAX_LINE_LENGTH ? text.substring(0, MAX_LINE_LENGTH) + MAX_LINE_SUFFIX : text
170+
const size = Buffer.byteLength(line, "utf-8") + (raw.length > 0 ? 1 : 0)
171+
if (bytes + size > MAX_BYTES) {
172+
truncatedByBytes = true
173+
hasMoreLines = true
174+
break
175+
}
176+
177+
raw.push(line)
178+
bytes += size
155179
}
156-
raw.push(line)
157-
bytes += size
180+
} finally {
181+
rl.close()
182+
stream.destroy()
183+
}
184+
185+
if (lines < offset && !(lines === 0 && offset === 1)) {
186+
throw new Error(`Offset ${offset} is out of range for this file (${lines} lines)`)
158187
}
159188

160189
const content = raw.map((line, index) => {
@@ -165,15 +194,15 @@ export const ReadTool = Tool.define("read", {
165194
let output = [`<path>${filepath}</path>`, `<type>file</type>`, "<content>"].join("\n")
166195
output += content.join("\n")
167196

168-
const totalLines = lines.length
197+
const totalLines = lines
169198
const lastReadLine = offset + raw.length - 1
170-
const hasMoreLines = totalLines > lastReadLine
199+
const nextOffset = lastReadLine + 1
171200
const truncated = hasMoreLines || truncatedByBytes
172201

173202
if (truncatedByBytes) {
174-
output += `\n\n(Output truncated at ${MAX_BYTES} bytes. Use 'offset' parameter to read beyond line ${lastReadLine})`
203+
output += `\n\n(Output capped at ${MAX_BYTES_LABEL}. Showing lines ${offset}-${lastReadLine}. Use offset=${nextOffset} to continue.)`
175204
} else if (hasMoreLines) {
176-
output += `\n\n(File has more lines. Use 'offset' parameter to read beyond line ${lastReadLine})`
205+
output += `\n\n(Showing lines ${offset}-${lastReadLine} of ${totalLines}. Use offset=${nextOffset} to continue.)`
177206
} else {
178207
output += `\n\n(End of file - total ${totalLines} lines)`
179208
}
@@ -199,7 +228,7 @@ export const ReadTool = Tool.define("read", {
199228
},
200229
})
201230

202-
async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise<boolean> {
231+
async function isBinaryFile(filepath: string, fileSize: number): Promise<boolean> {
203232
const ext = path.extname(filepath).toLowerCase()
204233
// binary check for common non-text extensions
205234
switch (ext) {
@@ -236,22 +265,25 @@ async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise<boolea
236265
break
237266
}
238267

239-
const stat = await file.stat()
240-
const fileSize = stat.size
241268
if (fileSize === 0) return false
242269

243-
const bufferSize = Math.min(4096, fileSize)
244-
const buffer = await file.arrayBuffer()
245-
if (buffer.byteLength === 0) return false
246-
const bytes = new Uint8Array(buffer.slice(0, bufferSize))
270+
const fh = await fs.promises.open(filepath, "r")
271+
try {
272+
const sampleSize = Math.min(4096, fileSize)
273+
const bytes = Buffer.alloc(sampleSize)
274+
const result = await fh.read(bytes, 0, sampleSize, 0)
275+
if (result.bytesRead === 0) return false
247276

248-
let nonPrintableCount = 0
249-
for (let i = 0; i < bytes.length; i++) {
250-
if (bytes[i] === 0) return true
251-
if (bytes[i] < 9 || (bytes[i] > 13 && bytes[i] < 32)) {
252-
nonPrintableCount++
277+
let nonPrintableCount = 0
278+
for (let i = 0; i < result.bytesRead; i++) {
279+
if (bytes[i] === 0) return true
280+
if (bytes[i] < 9 || (bytes[i] > 13 && bytes[i] < 32)) {
281+
nonPrintableCount++
282+
}
253283
}
284+
// If >30% non-printable characters, consider it binary
285+
return nonPrintableCount / result.bytesRead > 0.3
286+
} finally {
287+
await fh.close()
254288
}
255-
// If >30% non-printable characters, consider it binary
256-
return nonPrintableCount / bytes.length > 0.3
257289
}

packages/opencode/test/tool/read.test.ts

Lines changed: 80 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -211,8 +211,8 @@ describe("tool.read truncation", () => {
211211
const read = await ReadTool.init()
212212
const result = await read.execute({ filePath: path.join(tmp.path, "large.json") }, ctx)
213213
expect(result.metadata.truncated).toBe(true)
214-
expect(result.output).toContain("Output truncated at")
215-
expect(result.output).toContain("bytes")
214+
expect(result.output).toContain("Output capped at")
215+
expect(result.output).toContain("Use offset=")
216216
},
217217
})
218218
})
@@ -230,7 +230,8 @@ describe("tool.read truncation", () => {
230230
const read = await ReadTool.init()
231231
const result = await read.execute({ filePath: path.join(tmp.path, "many-lines.txt"), limit: 10 }, ctx)
232232
expect(result.metadata.truncated).toBe(true)
233-
expect(result.output).toContain("File has more lines")
233+
expect(result.output).toContain("Showing lines 1-10 of 100")
234+
expect(result.output).toContain("Use offset=11")
234235
expect(result.output).toContain("line0")
235236
expect(result.output).toContain("line9")
236237
expect(result.output).not.toContain("line10")
@@ -267,6 +268,10 @@ describe("tool.read truncation", () => {
267268
fn: async () => {
268269
const read = await ReadTool.init()
269270
const result = await read.execute({ filePath: path.join(tmp.path, "offset.txt"), offset: 10, limit: 5 }, ctx)
271+
expect(result.output).toContain("10: line10")
272+
expect(result.output).toContain("14: line14")
273+
expect(result.output).not.toContain("9: line10")
274+
expect(result.output).not.toContain("15: line15")
270275
expect(result.output).toContain("line10")
271276
expect(result.output).toContain("line14")
272277
expect(result.output).not.toContain("line0")
@@ -293,6 +298,40 @@ describe("tool.read truncation", () => {
293298
})
294299
})
295300

301+
test("allows reading empty file at default offset", async () => {
302+
await using tmp = await tmpdir({
303+
init: async (dir) => {
304+
await Bun.write(path.join(dir, "empty.txt"), "")
305+
},
306+
})
307+
await Instance.provide({
308+
directory: tmp.path,
309+
fn: async () => {
310+
const read = await ReadTool.init()
311+
const result = await read.execute({ filePath: path.join(tmp.path, "empty.txt") }, ctx)
312+
expect(result.metadata.truncated).toBe(false)
313+
expect(result.output).toContain("End of file - total 0 lines")
314+
},
315+
})
316+
})
317+
318+
test("throws when offset > 1 for empty file", async () => {
319+
await using tmp = await tmpdir({
320+
init: async (dir) => {
321+
await Bun.write(path.join(dir, "empty.txt"), "")
322+
},
323+
})
324+
await Instance.provide({
325+
directory: tmp.path,
326+
fn: async () => {
327+
const read = await ReadTool.init()
328+
await expect(read.execute({ filePath: path.join(tmp.path, "empty.txt"), offset: 2 }, ctx)).rejects.toThrow(
329+
"Offset 2 is out of range for this file (0 lines)",
330+
)
331+
},
332+
})
333+
})
334+
296335
test("does not mark final directory page as truncated", async () => {
297336
await using tmp = await tmpdir({
298337
init: async (dir) => {
@@ -324,7 +363,7 @@ describe("tool.read truncation", () => {
324363
fn: async () => {
325364
const read = await ReadTool.init()
326365
const result = await read.execute({ filePath: path.join(tmp.path, "long-line.txt") }, ctx)
327-
expect(result.output).toContain("...")
366+
expect(result.output).toContain("(line truncated to 2000 chars)")
328367
expect(result.output.length).toBeLessThan(3000)
329368
},
330369
})
@@ -425,3 +464,40 @@ describe("tool.read loaded instructions", () => {
425464
})
426465
})
427466
})
467+
468+
describe("tool.read binary detection", () => {
469+
test("rejects text extension files with null bytes", async () => {
470+
await using tmp = await tmpdir({
471+
init: async (dir) => {
472+
const bytes = Buffer.from([0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00, 0x77, 0x6f, 0x72, 0x6c, 0x64])
473+
await Bun.write(path.join(dir, "null-byte.txt"), bytes)
474+
},
475+
})
476+
await Instance.provide({
477+
directory: tmp.path,
478+
fn: async () => {
479+
const read = await ReadTool.init()
480+
await expect(read.execute({ filePath: path.join(tmp.path, "null-byte.txt") }, ctx)).rejects.toThrow(
481+
"Cannot read binary file",
482+
)
483+
},
484+
})
485+
})
486+
487+
test("rejects known binary extensions", async () => {
488+
await using tmp = await tmpdir({
489+
init: async (dir) => {
490+
await Bun.write(path.join(dir, "module.wasm"), "not really wasm")
491+
},
492+
})
493+
await Instance.provide({
494+
directory: tmp.path,
495+
fn: async () => {
496+
const read = await ReadTool.init()
497+
await expect(read.execute({ filePath: path.join(tmp.path, "module.wasm") }, ctx)).rejects.toThrow(
498+
"Cannot read binary file",
499+
)
500+
},
501+
})
502+
})
503+
})

0 commit comments

Comments
 (0)