From 9b5aa16fe2d05c624f977f99604ae9855faa258b Mon Sep 17 00:00:00 2001 From: Bharat Kunwar Date: Mon, 22 Jun 2026 15:20:51 +0100 Subject: [PATCH] feat: add `ccs prune` to losslessly shrink bloated conversations Conversation JSONL files grow large over time. `ccs prune` rewrites them removing only data that duplicates content kept elsewhere, so pruned conversations still resume with full dialogue: - toolUseResult fields (a copy of the tool_result already in message.content) - file-history-snapshot lines (rewind/checkpoint backups; pruning loses rewind history, not the conversation) User and assistant messages are never modified. Each file is rewritten to .pruned and atomically renamed only if its conversation line count is unchanged, so a failed/partial prune never clobbers the original. Dry run by default - `ccs prune` only previews savings; pass --apply to actually rewrite. Dry-run on real files reclaims ~25% (e.g. a 75MB conversation -> 53MB) with no dialogue loss. CLI: `ccs prune [--apply] [--min-size=N] [--no-tool-results] [--no-snapshots] [-y]`. Tests cover the stream transform, per-category options, atomic file replace + integrity check, and dry-run leaving files untouched. --- AGENTS.md | 3 + README.md | 23 ++++- main.go | 283 +++++++++++++++++++++++++++++++++++++++++++++++++++ main_test.go | 111 ++++++++++++++++++++ 4 files changed, 419 insertions(+), 1 deletion(-) diff --git a/AGENTS.md b/AGENTS.md index 404469a..a376b1a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -29,6 +29,7 @@ go test -v -cover ./ccs --max-age=7 # last 7 days only ./ccs --all # include everything ./ccs -- --plan # pass flags to claude +./ccs prune --dry-run # preview lossless size reduction of large files ``` ## Release Process @@ -91,6 +92,8 @@ go test -v -cover - `formatListItem()` - Formats a single list row - `deleteConversation()` - Removes conversation file and updates UI state - `getTopic()` - Extracts first user message as topic +- `pruneFile()` / `pruneStream()` - Shrink a conversation by dropping duplicate/redundant data (never touches user/assistant lines) +- `runPrune()` - `ccs prune` subcommand driver ### TUI Layout diff --git a/README.md b/README.md index c2c2599..fda13f7 100644 --- a/README.md +++ b/README.md @@ -13,9 +13,10 @@ Globally search and resume [Claude Code](https://claude.ai/claude-code) conversa - Search through all your Claude Code conversations - See session names (your custom titles or Claude's auto-generated ones) in the list - Preview conversation context with search term highlighting -- See message counts and hit counts per conversation +- See message counts, hit counts, and file size per conversation - Resume conversations directly from the search interface - Delete conversations with confirmation prompt +- Prune bloated conversations losslessly (`ccs prune`) - Pass flags through to `claude` (e.g., `--plan`) - Mouse wheel scrolling support @@ -84,6 +85,26 @@ ccs buyer -- --plan - `Ctrl+U` - Clear search - `Esc` / `Ctrl+C` - Quit +## Pruning + +Conversation files grow large over time. `ccs prune` shrinks them by removing data that duplicates content kept elsewhere, so pruned conversations still resume with their full dialogue intact: + +- `toolUseResult` fields - a copy of the tool result already present in `message.content` +- `file-history-snapshot` lines - rewind/checkpoint backups (pruning loses rewind history, not the conversation) + +User and assistant messages are never modified, and a file is only rewritten if its conversation line count is unchanged. + +`ccs prune` is a dry-run preview by default - it only reports what it would reclaim. Pass `--apply` to actually rewrite the files. + +```bash +ccs prune # preview savings, change nothing (files >= 50MB) +ccs prune --apply # prune after a confirmation prompt +ccs prune --apply --min-size=200 # only files >= 200MB +ccs prune --apply --no-tool-results # keep tool results, only drop snapshot backups +``` + +Run `ccs prune --help` for all flags. + ## How it works ccs reads conversation history from `~/.claude/projects/` and presents them in an interactive TUI. When you select a conversation, it changes to the original project directory and runs `claude --resume `. diff --git a/main.go b/main.go index a494a8b..e419def 100644 --- a/main.go +++ b/main.go @@ -4,6 +4,7 @@ import ( "bufio" "encoding/json" "fmt" + "io" "os" "os/exec" "path/filepath" @@ -910,12 +911,289 @@ func buildItems(conversations []Conversation) []listItem { return items } +// ============================================================================ +// Prune - shrink conversation files by removing duplicate / redundant data +// ============================================================================ + +// pruneOpts selects which categories of redundant data to remove. Conversation +// (user/assistant) messages are never touched. +type pruneOpts struct { + dropSnapshots bool // drop file-history-snapshot lines (rewind/checkpoint backups) + stripToolResults bool // remove the toolUseResult field (a copy of the tool_result already in message.content) +} + +type pruneStats struct { + bytesIn int64 + bytesOut int64 + droppedSnapshots int + strippedResults int + convLinesIn int // user/assistant lines seen + convLinesOut int // ... and kept (invariant: must equal convLinesIn) +} + +// pruneLine applies the transforms to one JSONL line. It returns the output +// bytes (nil = drop the line), the line's "type", and whether it was +// dropped/stripped. Unparseable lines pass through verbatim. +func pruneLine(line []byte, opts pruneOpts) (out []byte, typ string, dropped, stripped bool) { + var obj map[string]json.RawMessage + if err := json.Unmarshal(line, &obj); err != nil { + return line, "", false, false + } + if raw, ok := obj["type"]; ok { + _ = json.Unmarshal(raw, &typ) + } + if opts.dropSnapshots && typ == "file-history-snapshot" { + return nil, typ, true, false + } + if opts.stripToolResults { + if _, ok := obj["toolUseResult"]; ok { + delete(obj, "toolUseResult") + b, err := json.Marshal(obj) + if err != nil { + return line, typ, false, false // keep original on marshal error + } + return b, typ, false, true + } + } + return line, typ, false, false +} + +// pruneStream reads JSONL from r and writes the pruned version to w (w may be +// nil to only measure). It never drops or modifies user/assistant lines. +func pruneStream(r io.Reader, w io.Writer, opts pruneOpts) (pruneStats, error) { + var st pruneStats + scanner := bufio.NewScanner(r) + scanner.Buffer(make([]byte, 1024*1024), 64*1024*1024) + for scanner.Scan() { + line := scanner.Bytes() + st.bytesIn += int64(len(line)) + 1 + out, typ, dropped, stripped := pruneLine(line, opts) + isConv := typ == "user" || typ == "assistant" + if isConv { + st.convLinesIn++ + } + if dropped { + st.droppedSnapshots++ + continue + } + if stripped { + st.strippedResults++ + } + if isConv { + st.convLinesOut++ + } + if w != nil { + if _, err := w.Write(out); err != nil { + return st, err + } + if _, err := w.Write([]byte{'\n'}); err != nil { + return st, err + } + } + st.bytesOut += int64(len(out)) + 1 + } + return st, scanner.Err() +} + +// pruneFile prunes one conversation file. With write=true it streams to +// .pruned and atomically replaces path, aborting (no replace) if the +// conversation line count would change. With write=false it only measures. +func pruneFile(path string, write bool, opts pruneOpts) (pruneStats, error) { + in, err := os.Open(path) + if err != nil { + return pruneStats{}, err + } + defer in.Close() + + if !write { + return pruneStream(in, nil, opts) + } + + tmpPath := path + ".pruned" + tmp, err := os.Create(tmpPath) + if err != nil { + return pruneStats{}, err + } + bw := bufio.NewWriter(tmp) + st, err := pruneStream(in, bw, opts) + if err == nil { + err = bw.Flush() + } + if cerr := tmp.Close(); err == nil { + err = cerr + } + if err == nil && st.convLinesIn != st.convLinesOut { + err = fmt.Errorf("integrity check failed: %d conversation lines in, %d out", st.convLinesIn, st.convLinesOut) + } + if err != nil { + os.Remove(tmpPath) + return st, err + } + return st, os.Rename(tmpPath, path) +} + +// findPrunableFiles returns .jsonl files at or above minSize, largest first. +func findPrunableFiles(minSize int64) ([]string, error) { + type fileSize struct { + path string + size int64 + } + var found []fileSize + err := filepath.Walk(getProjectsDir(), func(path string, info os.FileInfo, err error) error { + if err != nil { + return nil + } + if !info.IsDir() && strings.HasSuffix(path, ".jsonl") && info.Size() >= minSize { + found = append(found, fileSize{path, info.Size()}) + } + return nil + }) + sort.Slice(found, func(i, j int) bool { return found[i].size > found[j].size }) + paths := make([]string, len(found)) + for i, f := range found { + paths[i] = f.path + } + return paths, err +} + +// shortPath shows the project dir + filename for readable reporting. +func shortPath(p string) string { + return filepath.Join(filepath.Base(filepath.Dir(p)), filepath.Base(p)) +} + +func runPrune(args []string) { + apply, yes := false, false // dry run by default; --apply to actually rewrite + minSizeMB := int64(50) + opts := pruneOpts{dropSnapshots: true, stripToolResults: true} + for _, a := range args { + switch { + case a == "-h" || a == "--help": + printPruneHelp() + return + case a == "--apply": + apply = true + case a == "--dry-run": + apply = false // explicit; this is already the default + case a == "-y" || a == "--yes": + yes = true + case a == "--no-snapshots": + opts.dropSnapshots = false + case a == "--no-tool-results": + opts.stripToolResults = false + case strings.HasPrefix(a, "--min-size="): + fmt.Sscanf(strings.TrimPrefix(a, "--min-size="), "%d", &minSizeMB) + default: + fmt.Fprintf(os.Stderr, "unknown prune flag: %s (try ccs prune --help)\n", a) + os.Exit(2) + } + } + if !opts.dropSnapshots && !opts.stripToolResults { + fmt.Fprintln(os.Stderr, "nothing to prune: both categories disabled") + os.Exit(2) + } + + files, err := findPrunableFiles(minSizeMB * 1024 * 1024) + if err != nil { + fmt.Fprintf(os.Stderr, "error scanning conversations: %v\n", err) + os.Exit(1) + } + if len(files) == 0 { + fmt.Printf("No conversations >= %dMB to prune.\n", minSizeMB) + return + } + + report := func(st pruneStats, path string) { + saved := st.bytesIn - st.bytesOut + fmt.Printf(" %-46s %8s -> %8s (-%s)\n", shortPath(path), formatBytes(st.bytesIn), formatBytes(st.bytesOut), formatBytes(saved)) + } + + if !apply { + fmt.Printf("Dry run (no changes). Conversations >= %dMB:\n\n", minSizeMB) + var in, out int64 + for _, f := range files { + st, err := pruneFile(f, false, opts) + if err != nil { + fmt.Printf(" %-46s error: %v\n", shortPath(f), err) + continue + } + report(st, f) + in += st.bytesIn + out += st.bytesOut + } + fmt.Printf("\nWould reclaim %s across %d files. Re-run with --apply to prune.\n", formatBytes(in-out), len(files)) + return + } + + if !yes { + var total int64 + for _, f := range files { + if info, e := os.Stat(f); e == nil { + total += info.Size() + } + } + fmt.Printf("Prune %d conversations (%s)? Rewrites them in place, removing duplicate tool\nresults and snapshot backups - dialogue is preserved. [y/N] ", len(files), formatBytes(total)) + var resp string + fmt.Scanln(&resp) + if resp != "y" && resp != "Y" { + fmt.Println("Aborted.") + return + } + } + + var in, out int64 + for _, f := range files { + st, err := pruneFile(f, true, opts) + if err != nil { + fmt.Printf(" %-46s FAILED: %v\n", shortPath(f), err) + continue + } + report(st, f) + in += st.bytesIn + out += st.bytesOut + } + fmt.Printf("\nReclaimed %s across %d files.\n", formatBytes(in-out), len(files)) +} + +func printPruneHelp() { + fmt.Print(`ccs prune - shrink conversation files by removing redundant data + +Removes data that duplicates content kept elsewhere, so pruned conversations +still resume with full dialogue: + - toolUseResult fields (a copy of the tool_result already in message.content) + - file-history-snapshot lines (rewind/checkpoint backups; pruning loses + rewind history, not the conversation) + +User and assistant messages are never modified. Each file is rewritten only if +its conversation line count is unchanged. + +By default this is a dry run that only previews savings - pass --apply to +actually rewrite the files. + +Usage: ccs prune [flags] + +Flags: + --apply Actually rewrite files (default is a dry-run preview) + --min-size=N Only consider files >= N MB (default: 50) + --no-tool-results Keep toolUseResult fields + --no-snapshots Keep file-history-snapshot lines + -y, --yes Skip the confirmation prompt (with --apply) + -h, --help Show this help + +Examples: + ccs prune Preview savings across files >= 50MB + ccs prune --apply Prune files >= 50MB (after confirmation) + ccs prune --apply --min-size=200 Prune files >= 200MB + ccs prune --apply --no-tool-results -y Only drop snapshot backups, no prompt +`) +} + func printHelp() { fmt.Printf(`ccs v%s - Claude Code Search Search and resume Claude Code conversations. Usage: ccs [filter] [-- claude-flags...] + ccs prune [flags] Shrink large conversations (see ccs prune --help) Arguments: filter Initial search query (optional) @@ -953,6 +1231,11 @@ Key bindings: func main() { args := os.Args[1:] + if len(args) > 0 && args[0] == "prune" { + runPrune(args[1:]) + return + } + for _, arg := range args { if arg == "-h" || arg == "--help" { printHelp() diff --git a/main_test.go b/main_test.go index 4606a33..65effe8 100644 --- a/main_test.go +++ b/main_test.go @@ -1,6 +1,7 @@ package main import ( + "bytes" "encoding/json" "os" "path/filepath" @@ -1335,3 +1336,113 @@ func TestPrintHelp(t *testing.T) { printHelp() } + +func TestPruneStreamRemovesDuplicatesKeepsDialogue(t *testing.T) { + input := strings.Join([]string{ + `{"type":"user","message":{"content":"hello"},"uuid":"u1"}`, + `{"type":"assistant","message":{"content":[{"type":"text","text":"hi"}]},"uuid":"a1"}`, + `{"type":"file-history-snapshot","snapshot":{"trackedFileBackups":{"big":"xxxxxxxxxx"}}}`, + `{"type":"user","message":{"content":[{"type":"tool_result","content":"the real output"}]},"toolUseResult":{"type":"text","text":"the real output dup"},"uuid":"u2"}`, + }, "\n") + "\n" + + var out bytes.Buffer + st, err := pruneStream(strings.NewReader(input), &out, pruneOpts{dropSnapshots: true, stripToolResults: true}) + if err != nil { + t.Fatalf("pruneStream: %v", err) + } + if st.droppedSnapshots != 1 { + t.Errorf("droppedSnapshots = %d, want 1", st.droppedSnapshots) + } + if st.strippedResults != 1 { + t.Errorf("strippedResults = %d, want 1", st.strippedResults) + } + if st.convLinesIn != 3 || st.convLinesOut != 3 { + t.Errorf("conv lines in/out = %d/%d, want 3/3", st.convLinesIn, st.convLinesOut) + } + o := out.String() + if strings.Contains(o, "trackedFileBackups") { + t.Error("file-history-snapshot should be dropped") + } + if strings.Contains(o, "toolUseResult") { + t.Error("toolUseResult field should be stripped") + } + for _, want := range []string{"hello", "the real output", "u2", "a1"} { + if !strings.Contains(o, want) { + t.Errorf("output should preserve %q", want) + } + } + for _, line := range strings.Split(strings.TrimSpace(o), "\n") { + var v map[string]json.RawMessage + if json.Unmarshal([]byte(line), &v) != nil { + t.Errorf("output line is not valid JSON: %s", line) + } + } +} + +func TestPruneOptsRespected(t *testing.T) { + input := `{"type":"file-history-snapshot","snapshot":{}}` + "\n" + + `{"type":"user","toolUseResult":{"x":1},"message":{"content":"hi"}}` + "\n" + var a bytes.Buffer + pruneStream(strings.NewReader(input), &a, pruneOpts{dropSnapshots: false, stripToolResults: true}) + if !strings.Contains(a.String(), "file-history-snapshot") { + t.Error("--no-snapshots should keep snapshot lines") + } + var b bytes.Buffer + pruneStream(strings.NewReader(input), &b, pruneOpts{dropSnapshots: true, stripToolResults: false}) + if !strings.Contains(b.String(), "toolUseResult") { + t.Error("--no-tool-results should keep toolUseResult") + } +} + +func TestPruneFileReplacesAndShrinks(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "big.jsonl") + content := `{"type":"user","message":{"content":"keep me"},"uuid":"u1"}` + "\n" + + `{"type":"file-history-snapshot","snapshot":{"data":"` + strings.Repeat("x", 5000) + `"}}` + "\n" + + `{"type":"assistant","message":{"content":"keep me too"},"uuid":"a1"}` + "\n" + if err := os.WriteFile(path, []byte(content), 0644); err != nil { + t.Fatal(err) + } + before, _ := os.Stat(path) + st, err := pruneFile(path, true, pruneOpts{dropSnapshots: true, stripToolResults: true}) + if err != nil { + t.Fatalf("pruneFile: %v", err) + } + if st.convLinesIn != st.convLinesOut { + t.Fatalf("integrity broken: %d != %d", st.convLinesIn, st.convLinesOut) + } + after, _ := os.Stat(path) + if after.Size() >= before.Size() { + t.Errorf("file should shrink: before %d, after %d", before.Size(), after.Size()) + } + if _, err := os.Stat(path + ".pruned"); !os.IsNotExist(err) { + t.Error(".pruned temp should be gone after atomic replace") + } + data, _ := os.ReadFile(path) + s := string(data) + if strings.Contains(s, "file-history-snapshot") { + t.Error("snapshot not removed from file") + } + if !strings.Contains(s, "keep me") || !strings.Contains(s, "keep me too") { + t.Error("dialogue not preserved in file") + } +} + +func TestPruneFileDryRunLeavesFileUnchanged(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "x.jsonl") + content := `{"type":"file-history-snapshot","snapshot":{}}` + "\n" + `{"type":"user","message":{"content":"hi"}}` + "\n" + os.WriteFile(path, []byte(content), 0644) + orig, _ := os.ReadFile(path) + st, err := pruneFile(path, false, pruneOpts{dropSnapshots: true, stripToolResults: true}) + if err != nil { + t.Fatalf("pruneFile dry: %v", err) + } + if st.bytesOut >= st.bytesIn { + t.Error("dry run should still report savings") + } + now, _ := os.ReadFile(path) + if string(now) != string(orig) { + t.Error("dry run must not modify the file") + } +}