Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions env.example
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
# BUB_MODEL=openrouter:openrouter/free
# BUB_MAX_STEPS=50
# BUB_MAX_TOKENS=16384
# Max UTF-8 bytes of a single tool result kept inline; larger output is
# truncated and spilled to <bub.home>/tool-output (0 disables). Guards against
# 413 Request Entity Too Large from oversized tool output.
# BUB_MAX_TOOL_RESULT_BYTES=131072
# BUB_MODEL_TIMEOUT_SECONDS=300
# BUB_HOME=~/.bub

Expand Down
25 changes: 21 additions & 4 deletions src/bub/builtin/model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,12 @@

from bub.builtin.settings import AgentSettings, ModelCandidate
from bub.builtin.tape import Tape
from bub.builtin.tool_output import cap_tool_result
from bub.runtime import AsyncStreamEvents, BubError, ErrorKind, StreamEvent, StreamState
from bub.tools import Tool, ToolContext, ToolExecutor

CONTEXT_LENGTH_PATTERNS = re.compile(
r"context.{0,20}(?:length|window)|maximum.{0,20}context|token.{0,10}limit|prompt.{0,10}too long|tokens? > \d+ maximum",
r"context.{0,20}(?:length|window)|maximum.{0,20}context|token.{0,10}limit|prompt.{0,10}too long|tokens? > \d+ maximum|request entity too large",
re.IGNORECASE,
)
TOOL_ARGUMENTS_ADAPTER = TypeAdapter(dict[str, Any])
Expand Down Expand Up @@ -130,21 +131,22 @@ async def iterator() -> AsyncGenerator[StreamEvent, None]:
tool_invocations,
context=context,
)
tool_results = self._cap_tool_results(execution.tool_results, run_id=run_id)
await self.record_chat(
tape=tape,
run_id=run_id,
system_prompt=system_prompt,
new_messages=new_messages,
response_text=None,
tool_calls=serialized_tool_calls,
tool_results=execution.tool_results,
tool_results=tool_results,
response=output.response,
model=model,
usage=state.usage,
)
yield StreamEvent("tool_result", {"tool_results": execution.tool_results})
yield StreamEvent("tool_result", {"tool_results": tool_results})
yield StreamEvent(
"final", {"ok": True, "tool_calls": serialized_tool_calls, "tool_results": execution.tool_results}
"final", {"ok": True, "tool_calls": serialized_tool_calls, "tool_results": tool_results}
)
return

Expand All @@ -163,6 +165,21 @@ async def iterator() -> AsyncGenerator[StreamEvent, None]:

return AsyncStreamEvents(iterator(), state=state)

def _cap_tool_results(self, results: list[Any], *, run_id: str) -> list[Any]:
"""Bound each tool result so a single oversized one cannot trigger a 413.

Runs once per tool execution (before the results hit the tape, the trace,
or the next request), spilling overflow to ``<bub.home>/tool-output``.
"""
import bub

limit = self.settings.max_tool_result_bytes
spill_dir = bub.home / "tool-output"
return [
cap_tool_result(result, run_id=run_id, index=index, limit=limit, spill_dir=spill_dir)
for index, result in enumerate(results)
]

@staticmethod
def generate_run_id() -> str:
return f"run-{datetime.now(UTC).strftime('%Y%m%dT%H%M%S%fZ')}"
Expand Down
9 changes: 9 additions & 0 deletions src/bub/builtin/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, SettingsConfigDict

from bub import Settings, config, ensure_config
from bub.builtin.tool_output import DEFAULT_MAX_TOOL_RESULT_BYTES

DEFAULT_MODEL = "openrouter:openrouter/free"
DEFAULT_MAX_TOKENS = 16384
Expand Down Expand Up @@ -58,6 +59,14 @@ class AgentSettings(Settings):
api_base: str | dict[str, str] | None = None
max_steps: int = 50
max_tokens: int = DEFAULT_MAX_TOKENS
max_tool_result_bytes: int = Field(
default=DEFAULT_MAX_TOOL_RESULT_BYTES,
description=(
"Max UTF-8 bytes of a single tool result kept inline. Larger results are "
"truncated and spilled to <bub.home>/tool-output to avoid 413 errors. "
"Set to 0 to disable."
),
)
model_timeout_seconds: int | None = None
client_args: dict[str, Any] = Field(default_factory=dict)
verbose: int = Field(default=0, description="Verbosity level for logging. Higher means more verbose.", ge=0, le=2)
Expand Down
76 changes: 76 additions & 0 deletions src/bub/builtin/tool_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""Bound tool-result size before it reaches the tape and the next model request.

A single oversized tool result -- e.g. ``grep`` over a minified bundle or source
map under ``node_modules`` -- can blow past the provider's request-body limit and
fail the whole turn with a ``413 Request Entity Too Large``. ``head -50`` is no
protection when individual lines are megabytes long.

We cap each result at a byte budget and spill the full output to a file, so the
agent keeps a bounded inline view *and* the complete output for follow-up
inspection with ``tail`` / ``rg``. The cap is enforced at the result boundary, so
it protects the tape, the trace, the streamed event, and the next request alike.
"""

from __future__ import annotations

import shlex
from pathlib import Path

DEFAULT_MAX_TOOL_RESULT_BYTES = 128 * 1024


def _human_bytes(num_bytes: int) -> str:
size = float(num_bytes)
for unit in ("B", "KB", "MB", "GB", "TB"):
if size < 1024:
return f"{int(size)} {unit}" if unit == "B" else f"{size:.1f} {unit}"
size /= 1024
return f"{size:.1f} PB"


def _spill_to_file(data: bytes, *, run_id: str, index: int, spill_dir: Path) -> Path:
spill_dir.mkdir(parents=True, exist_ok=True)
path = spill_dir / f"{run_id}-call-{index}.txt"
path.write_bytes(data)
return path


def _truncation_footer(*, original_bytes: int, limit: int, spill_path: Path) -> str:
quoted = shlex.quote(str(spill_path))
return (
f"\n\n[output truncated: original {_human_bytes(original_bytes)} "
f"exceeded {_human_bytes(limit)} limit]\n"
f"[full output saved to: {spill_path}]\n"
f"[hint: inspect the end with `tail -c 4096 {quoted}` "
f"or search it with `rg <pattern> {quoted}`]"
)


def cap_tool_result(
result: object,
*,
run_id: str,
index: int,
limit: int,
spill_dir: Path,
) -> object:
"""Bound a single tool result so it cannot blow past the request-body limit.

Oversized string results are truncated to ``limit`` UTF-8 bytes (the footer is
counted against the budget, so the returned string stays within ``limit``); the
full output is written to ``spill_dir`` and the truncated text gains a footer
pointing the agent at the file. Non-string results, results within budget, and
non-positive limits pass through unchanged.
"""
if limit <= 0 or not isinstance(result, str):
return result
encoded = result.encode("utf-8")
if len(encoded) <= limit:
return result

spill_path = _spill_to_file(encoded, run_id=run_id, index=index, spill_dir=spill_dir)
footer = _truncation_footer(original_bytes=len(encoded), limit=limit, spill_path=spill_path)
head_budget = max(0, limit - len(footer.encode("utf-8")))
# errors="ignore" drops a partial multi-byte char left at the cut boundary.
head = encoded[:head_budget].decode("utf-8", errors="ignore")
return head + footer
149 changes: 149 additions & 0 deletions tests/test_tool_output_cap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
from __future__ import annotations

import re
from collections.abc import AsyncIterator, Iterator
from pathlib import Path
from typing import Any

import pytest
from any_llm.constants import LLMProvider
from any_llm.providers.openai.base import BaseOpenAIProvider
from any_llm.types.completion import ChatCompletionChunk

from bub.builtin.context import default_tape_context
from bub.builtin.model_runner import ModelRunner, is_context_length_error
from bub.builtin.settings import AgentSettings, ModelCandidate
from bub.builtin.tape import Tape
from bub.builtin.tool_output import cap_tool_result
from bub.tape import AsyncTapeStoreAdapter, InMemoryTapeStore
from bub.tools import Tool

SPILL_PATH_RE = re.compile(r"\[full output saved to: (?P<path>.+)\]")


def test_cap_tool_result_passes_through_small_and_non_string(tmp_path: Path) -> None:
assert cap_tool_result("small", run_id="r", index=0, limit=1024, spill_dir=tmp_path) == "small"
assert cap_tool_result({"k": "v"}, run_id="r", index=0, limit=8, spill_dir=tmp_path) == {"k": "v"}
big = "x" * 5000
# limit <= 0 disables capping entirely.
assert cap_tool_result(big, run_id="r", index=0, limit=0, spill_dir=tmp_path) == big
assert not list(tmp_path.iterdir())


def test_cap_tool_result_truncates_and_spills_full_output(tmp_path: Path) -> None:
original = "A" * 8000 # single oversized line, like a minified bundle
limit = 1024

capped = cap_tool_result(original, run_id="run-abc", index=2, limit=limit, spill_dir=tmp_path)

assert isinstance(capped, str)
assert len(capped.encode("utf-8")) <= limit
assert "[output truncated:" in capped
match = SPILL_PATH_RE.search(capped)
assert match is not None
spill_path = Path(match.group("path"))
assert spill_path.read_text(encoding="utf-8") == original


def test_cap_tool_result_does_not_split_multibyte_chars(tmp_path: Path) -> None:
original = "你" * 4000 # 3 bytes each in UTF-8
capped = cap_tool_result(original, run_id="run-utf8", index=0, limit=1024, spill_dir=tmp_path)

assert isinstance(capped, str)
# Decoding already happened without raising; the head must be valid UTF-8.
assert len(capped.encode("utf-8")) <= 1024
match = SPILL_PATH_RE.search(capped)
assert match is not None
assert Path(match.group("path")).read_text(encoding="utf-8") == original


class _FakeToolCallProvider(BaseOpenAIProvider):
SUPPORTS_COMPLETION_STREAMING = True

def __init__(self, tool_name: str) -> None:
self._tool_name = tool_name

async def acompletion(self, **_kwargs: Any) -> AsyncIterator[ChatCompletionChunk]:
async def stream() -> AsyncIterator[ChatCompletionChunk]:
yield ChatCompletionChunk.model_validate({
"id": "chatcmpl_test",
"object": "chat.completion.chunk",
"created": 0,
"model": "gpt-test",
"choices": [
{
"index": 0,
"finish_reason": "tool_calls",
"delta": {
"role": "assistant",
"tool_calls": [
{
"index": 0,
"id": "call_1",
"type": "function",
"function": {"name": self._tool_name, "arguments": "{}"},
}
],
},
}
],
})

return stream()


class _FakeOpenAIModelRunner(ModelRunner):
def __init__(self, settings: AgentSettings, llm: _FakeToolCallProvider) -> None:
super().__init__(settings)
self._llm = llm

def iter_llm_clients(self, model: str) -> Iterator[tuple[ModelCandidate, _FakeToolCallProvider]]:
yield ModelCandidate(provider=LLMProvider.OPENAI, model_id=model, name=f"openai:{model}"), self._llm


@pytest.mark.asyncio
async def test_oversized_tool_result_is_bounded_in_next_request(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("BUB_HOME", str(tmp_path / "home"))
limit = 1024
original = "B" * (4 * 1024 * 1024) # 4 MB single-line output

def bigtool() -> str:
return original

tool = Tool.from_callable(bigtool, name="bigtool")
store = InMemoryTapeStore()
tape = Tape(tmp_path, AsyncTapeStoreAdapter(store), default_tape_context()).scoped("test-tape")
runner = _FakeOpenAIModelRunner(
AgentSettings.model_construct(
model="openai:gpt-test", max_tokens=100, model_timeout_seconds=None, max_tool_result_bytes=limit
),
_FakeToolCallProvider(tool_name="bigtool"),
)

await tape.ensure_bootstrap_anchor()
events = [
event async for event in runner.run(tape=tape, model="gpt-test", tools=[tool], system_prompt=None, prompt="go")
]

tool_result_events = [event for event in events if event.kind == "tool_result"]
assert len(tool_result_events) == 1
streamed = tool_result_events[0].data["tool_results"][0]
assert len(streamed.encode("utf-8")) <= limit

# The reconstructed next-turn request must carry the bounded result, not 4 MB.
messages = await tape.read_messages()
tool_messages = [message for message in messages if message.get("role") == "tool"]
assert len(tool_messages) == 1
content = tool_messages[0]["content"]
assert len(content.encode("utf-8")) <= limit
assert "[output truncated:" in content

match = SPILL_PATH_RE.search(content)
assert match is not None
spill_path = Path(match.group("path"))
assert spill_path.read_text(encoding="utf-8") == original


def test_413_is_treated_as_a_context_overflow_for_auto_handoff() -> None:
body = "<html><title>413 Request Entity Too Large</title></html>"
assert is_context_length_error(body) is True
2 changes: 2 additions & 0 deletions website/src/content/docs/docs/reference/settings.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class AgentSettings(Settings):
api_format: Literal["completion", "responses", "messages"] = "completion"
max_steps: int = 50
max_tokens: int = DEFAULT_MAX_TOKENS # 16384
max_tool_result_bytes: int = DEFAULT_MAX_TOOL_RESULT_BYTES # 131072
model_timeout_seconds: int | None = None
client_args: dict[str, Any] | None = None
verbose: int = Field(default=0, ge=0, le=2)
Expand All @@ -65,6 +66,7 @@ Loaded under the YAML root section.
| `BUB_API_FORMAT` | `completion` | `api_format` | One of `completion`, `responses`, `messages`. |
| `BUB_MAX_STEPS` | `50` | `max_steps` | Maximum agent loop iterations per turn. |
| `BUB_MAX_TOKENS` | `16384` | `max_tokens` | Maximum tokens per model call. |
| `BUB_MAX_TOOL_RESULT_BYTES` | `131072` | `max_tool_result_bytes` | Max UTF-8 bytes of a single tool result kept inline. Larger output is truncated and spilled to `<bub.home>/tool-output` to avoid `413` errors. `0` disables. |
| `BUB_MODEL_TIMEOUT_SECONDS` | `null` | `model_timeout_seconds` | Per-call timeout in seconds. |
| `BUB_CLIENT_ARGS` | `null` | `client_args` | Extra kwargs passed to the underlying model client (JSON / dict). |
| `BUB_VERBOSE` | `0` | `verbose` | Logging verbosity level (`0`–`2`). |
Expand Down
2 changes: 2 additions & 0 deletions website/src/content/docs/zh-cn/docs/reference/settings.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class AgentSettings(Settings):
api_format: Literal["completion", "responses", "messages"] = "completion"
max_steps: int = 50
max_tokens: int = DEFAULT_MAX_TOKENS # 16384
max_tool_result_bytes: int = DEFAULT_MAX_TOOL_RESULT_BYTES # 131072
model_timeout_seconds: int | None = None
client_args: dict[str, Any] | None = None
verbose: int = Field(default=0, ge=0, le=2)
Expand All @@ -65,6 +66,7 @@ class AgentSettings(Settings):
| `BUB_API_FORMAT` | `completion` | `api_format` | `completion`、`responses`、`messages` 之一。 |
| `BUB_MAX_STEPS` | `50` | `max_steps` | 单次 turn 内 agent 循环的最大步数。 |
| `BUB_MAX_TOKENS` | `16384` | `max_tokens` | 单次模型调用的最大 token 数。 |
| `BUB_MAX_TOOL_RESULT_BYTES` | `131072` | `max_tool_result_bytes` | 单个工具结果保留为 inline 的最大 UTF-8 字节数。超出部分会被截断并落盘到 `<bub.home>/tool-output`,避免触发 `413`。设为 `0` 关闭。 |
| `BUB_MODEL_TIMEOUT_SECONDS` | `null` | `model_timeout_seconds` | 单次调用的超时秒数。 |
| `BUB_CLIENT_ARGS` | `null` | `client_args` | 传递给底层模型 client 的额外 kwargs(JSON / dict)。 |
| `BUB_VERBOSE` | `0` | `verbose` | 日志详细级别(`0`–`2`)。 |
Expand Down
Loading