diff --git a/AGENTS.md b/AGENTS.md index 404469a..a376b1a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -29,6 +29,7 @@ go test -v -cover ./ccs --max-age=7 # last 7 days only ./ccs --all # include everything ./ccs -- --plan # pass flags to claude +./ccs prune --dry-run # preview lossless size reduction of large files ``` ## Release Process @@ -91,6 +92,8 @@ go test -v -cover - `formatListItem()` - Formats a single list row - `deleteConversation()` - Removes conversation file and updates UI state - `getTopic()` - Extracts first user message as topic +- `pruneFile()` / `pruneStream()` - Shrink a conversation by dropping duplicate/redundant data (never touches user/assistant lines) +- `runPrune()` - `ccs prune` subcommand driver ### TUI Layout diff --git a/README.md b/README.md index c2c2599..fda13f7 100644 --- a/README.md +++ b/README.md @@ -13,9 +13,10 @@ Globally search and resume [Claude Code](https://claude.ai/claude-code) conversa - Search through all your Claude Code conversations - See session names (your custom titles or Claude's auto-generated ones) in the list - Preview conversation context with search term highlighting -- See message counts and hit counts per conversation +- See message counts, hit counts, and file size per conversation - Resume conversations directly from the search interface - Delete conversations with confirmation prompt +- Prune bloated conversations losslessly (`ccs prune`) - Pass flags through to `claude` (e.g., `--plan`) - Mouse wheel scrolling support @@ -84,6 +85,26 @@ ccs buyer -- --plan - `Ctrl+U` - Clear search - `Esc` / `Ctrl+C` - Quit +## Pruning + +Conversation files grow large over time. `ccs prune` shrinks them by removing data that duplicates content kept elsewhere, so pruned conversations still resume with their full dialogue intact: + +- `toolUseResult` fields - a copy of the tool result already present in `message.content` +- `file-history-snapshot` lines - rewind/checkpoint backups (pruning loses rewind history, not the conversation) + +User and assistant messages are never modified, and a file is only rewritten if its conversation line count is unchanged. + +`ccs prune` is a dry-run preview by default - it only reports what it would reclaim. Pass `--apply` to actually rewrite the files. + +```bash +ccs prune # preview savings, change nothing (files >= 50MB) +ccs prune --apply # prune after a confirmation prompt +ccs prune --apply --min-size=200 # only files >= 200MB +ccs prune --apply --no-tool-results # keep tool results, only drop snapshot backups +``` + +Run `ccs prune --help` for all flags. + ## How it works ccs reads conversation history from `~/.claude/projects/` and presents them in an interactive TUI. When you select a conversation, it changes to the original project directory and runs `claude --resume `. diff --git a/main.go b/main.go index a494a8b..e419def 100644 --- a/main.go +++ b/main.go @@ -4,6 +4,7 @@ import ( "bufio" "encoding/json" "fmt" + "io" "os" "os/exec" "path/filepath" @@ -910,12 +911,289 @@ func buildItems(conversations []Conversation) []listItem { return items } +// ============================================================================ +// Prune - shrink conversation files by removing duplicate / redundant data +// ============================================================================ + +// pruneOpts selects which categories of redundant data to remove. Conversation +// (user/assistant) messages are never touched. +type pruneOpts struct { + dropSnapshots bool // drop file-history-snapshot lines (rewind/checkpoint backups) + stripToolResults bool // remove the toolUseResult field (a copy of the tool_result already in message.content) +} + +type pruneStats struct { + bytesIn int64 + bytesOut int64 + droppedSnapshots int + strippedResults int + convLinesIn int // user/assistant lines seen + convLinesOut int // ... and kept (invariant: must equal convLinesIn) +} + +// pruneLine applies the transforms to one JSONL line. It returns the output +// bytes (nil = drop the line), the line's "type", and whether it was +// dropped/stripped. Unparseable lines pass through verbatim. +func pruneLine(line []byte, opts pruneOpts) (out []byte, typ string, dropped, stripped bool) { + var obj map[string]json.RawMessage + if err := json.Unmarshal(line, &obj); err != nil { + return line, "", false, false + } + if raw, ok := obj["type"]; ok { + _ = json.Unmarshal(raw, &typ) + } + if opts.dropSnapshots && typ == "file-history-snapshot" { + return nil, typ, true, false + } + if opts.stripToolResults { + if _, ok := obj["toolUseResult"]; ok { + delete(obj, "toolUseResult") + b, err := json.Marshal(obj) + if err != nil { + return line, typ, false, false // keep original on marshal error + } + return b, typ, false, true + } + } + return line, typ, false, false +} + +// pruneStream reads JSONL from r and writes the pruned version to w (w may be +// nil to only measure). It never drops or modifies user/assistant lines. +func pruneStream(r io.Reader, w io.Writer, opts pruneOpts) (pruneStats, error) { + var st pruneStats + scanner := bufio.NewScanner(r) + scanner.Buffer(make([]byte, 1024*1024), 64*1024*1024) + for scanner.Scan() { + line := scanner.Bytes() + st.bytesIn += int64(len(line)) + 1 + out, typ, dropped, stripped := pruneLine(line, opts) + isConv := typ == "user" || typ == "assistant" + if isConv { + st.convLinesIn++ + } + if dropped { + st.droppedSnapshots++ + continue + } + if stripped { + st.strippedResults++ + } + if isConv { + st.convLinesOut++ + } + if w != nil { + if _, err := w.Write(out); err != nil { + return st, err + } + if _, err := w.Write([]byte{'\n'}); err != nil { + return st, err + } + } + st.bytesOut += int64(len(out)) + 1 + } + return st, scanner.Err() +} + +// pruneFile prunes one conversation file. With write=true it streams to +// .pruned and atomically replaces path, aborting (no replace) if the +// conversation line count would change. With write=false it only measures. +func pruneFile(path string, write bool, opts pruneOpts) (pruneStats, error) { + in, err := os.Open(path) + if err != nil { + return pruneStats{}, err + } + defer in.Close() + + if !write { + return pruneStream(in, nil, opts) + } + + tmpPath := path + ".pruned" + tmp, err := os.Create(tmpPath) + if err != nil { + return pruneStats{}, err + } + bw := bufio.NewWriter(tmp) + st, err := pruneStream(in, bw, opts) + if err == nil { + err = bw.Flush() + } + if cerr := tmp.Close(); err == nil { + err = cerr + } + if err == nil && st.convLinesIn != st.convLinesOut { + err = fmt.Errorf("integrity check failed: %d conversation lines in, %d out", st.convLinesIn, st.convLinesOut) + } + if err != nil { + os.Remove(tmpPath) + return st, err + } + return st, os.Rename(tmpPath, path) +} + +// findPrunableFiles returns .jsonl files at or above minSize, largest first. +func findPrunableFiles(minSize int64) ([]string, error) { + type fileSize struct { + path string + size int64 + } + var found []fileSize + err := filepath.Walk(getProjectsDir(), func(path string, info os.FileInfo, err error) error { + if err != nil { + return nil + } + if !info.IsDir() && strings.HasSuffix(path, ".jsonl") && info.Size() >= minSize { + found = append(found, fileSize{path, info.Size()}) + } + return nil + }) + sort.Slice(found, func(i, j int) bool { return found[i].size > found[j].size }) + paths := make([]string, len(found)) + for i, f := range found { + paths[i] = f.path + } + return paths, err +} + +// shortPath shows the project dir + filename for readable reporting. +func shortPath(p string) string { + return filepath.Join(filepath.Base(filepath.Dir(p)), filepath.Base(p)) +} + +func runPrune(args []string) { + apply, yes := false, false // dry run by default; --apply to actually rewrite + minSizeMB := int64(50) + opts := pruneOpts{dropSnapshots: true, stripToolResults: true} + for _, a := range args { + switch { + case a == "-h" || a == "--help": + printPruneHelp() + return + case a == "--apply": + apply = true + case a == "--dry-run": + apply = false // explicit; this is already the default + case a == "-y" || a == "--yes": + yes = true + case a == "--no-snapshots": + opts.dropSnapshots = false + case a == "--no-tool-results": + opts.stripToolResults = false + case strings.HasPrefix(a, "--min-size="): + fmt.Sscanf(strings.TrimPrefix(a, "--min-size="), "%d", &minSizeMB) + default: + fmt.Fprintf(os.Stderr, "unknown prune flag: %s (try ccs prune --help)\n", a) + os.Exit(2) + } + } + if !opts.dropSnapshots && !opts.stripToolResults { + fmt.Fprintln(os.Stderr, "nothing to prune: both categories disabled") + os.Exit(2) + } + + files, err := findPrunableFiles(minSizeMB * 1024 * 1024) + if err != nil { + fmt.Fprintf(os.Stderr, "error scanning conversations: %v\n", err) + os.Exit(1) + } + if len(files) == 0 { + fmt.Printf("No conversations >= %dMB to prune.\n", minSizeMB) + return + } + + report := func(st pruneStats, path string) { + saved := st.bytesIn - st.bytesOut + fmt.Printf(" %-46s %8s -> %8s (-%s)\n", shortPath(path), formatBytes(st.bytesIn), formatBytes(st.bytesOut), formatBytes(saved)) + } + + if !apply { + fmt.Printf("Dry run (no changes). Conversations >= %dMB:\n\n", minSizeMB) + var in, out int64 + for _, f := range files { + st, err := pruneFile(f, false, opts) + if err != nil { + fmt.Printf(" %-46s error: %v\n", shortPath(f), err) + continue + } + report(st, f) + in += st.bytesIn + out += st.bytesOut + } + fmt.Printf("\nWould reclaim %s across %d files. Re-run with --apply to prune.\n", formatBytes(in-out), len(files)) + return + } + + if !yes { + var total int64 + for _, f := range files { + if info, e := os.Stat(f); e == nil { + total += info.Size() + } + } + fmt.Printf("Prune %d conversations (%s)? Rewrites them in place, removing duplicate tool\nresults and snapshot backups - dialogue is preserved. [y/N] ", len(files), formatBytes(total)) + var resp string + fmt.Scanln(&resp) + if resp != "y" && resp != "Y" { + fmt.Println("Aborted.") + return + } + } + + var in, out int64 + for _, f := range files { + st, err := pruneFile(f, true, opts) + if err != nil { + fmt.Printf(" %-46s FAILED: %v\n", shortPath(f), err) + continue + } + report(st, f) + in += st.bytesIn + out += st.bytesOut + } + fmt.Printf("\nReclaimed %s across %d files.\n", formatBytes(in-out), len(files)) +} + +func printPruneHelp() { + fmt.Print(`ccs prune - shrink conversation files by removing redundant data + +Removes data that duplicates content kept elsewhere, so pruned conversations +still resume with full dialogue: + - toolUseResult fields (a copy of the tool_result already in message.content) + - file-history-snapshot lines (rewind/checkpoint backups; pruning loses + rewind history, not the conversation) + +User and assistant messages are never modified. Each file is rewritten only if +its conversation line count is unchanged. + +By default this is a dry run that only previews savings - pass --apply to +actually rewrite the files. + +Usage: ccs prune [flags] + +Flags: + --apply Actually rewrite files (default is a dry-run preview) + --min-size=N Only consider files >= N MB (default: 50) + --no-tool-results Keep toolUseResult fields + --no-snapshots Keep file-history-snapshot lines + -y, --yes Skip the confirmation prompt (with --apply) + -h, --help Show this help + +Examples: + ccs prune Preview savings across files >= 50MB + ccs prune --apply Prune files >= 50MB (after confirmation) + ccs prune --apply --min-size=200 Prune files >= 200MB + ccs prune --apply --no-tool-results -y Only drop snapshot backups, no prompt +`) +} + func printHelp() { fmt.Printf(`ccs v%s - Claude Code Search Search and resume Claude Code conversations. Usage: ccs [filter] [-- claude-flags...] + ccs prune [flags] Shrink large conversations (see ccs prune --help) Arguments: filter Initial search query (optional) @@ -953,6 +1231,11 @@ Key bindings: func main() { args := os.Args[1:] + if len(args) > 0 && args[0] == "prune" { + runPrune(args[1:]) + return + } + for _, arg := range args { if arg == "-h" || arg == "--help" { printHelp() diff --git a/main_test.go b/main_test.go index 4606a33..65effe8 100644 --- a/main_test.go +++ b/main_test.go @@ -1,6 +1,7 @@ package main import ( + "bytes" "encoding/json" "os" "path/filepath" @@ -1335,3 +1336,113 @@ func TestPrintHelp(t *testing.T) { printHelp() } + +func TestPruneStreamRemovesDuplicatesKeepsDialogue(t *testing.T) { + input := strings.Join([]string{ + `{"type":"user","message":{"content":"hello"},"uuid":"u1"}`, + `{"type":"assistant","message":{"content":[{"type":"text","text":"hi"}]},"uuid":"a1"}`, + `{"type":"file-history-snapshot","snapshot":{"trackedFileBackups":{"big":"xxxxxxxxxx"}}}`, + `{"type":"user","message":{"content":[{"type":"tool_result","content":"the real output"}]},"toolUseResult":{"type":"text","text":"the real output dup"},"uuid":"u2"}`, + }, "\n") + "\n" + + var out bytes.Buffer + st, err := pruneStream(strings.NewReader(input), &out, pruneOpts{dropSnapshots: true, stripToolResults: true}) + if err != nil { + t.Fatalf("pruneStream: %v", err) + } + if st.droppedSnapshots != 1 { + t.Errorf("droppedSnapshots = %d, want 1", st.droppedSnapshots) + } + if st.strippedResults != 1 { + t.Errorf("strippedResults = %d, want 1", st.strippedResults) + } + if st.convLinesIn != 3 || st.convLinesOut != 3 { + t.Errorf("conv lines in/out = %d/%d, want 3/3", st.convLinesIn, st.convLinesOut) + } + o := out.String() + if strings.Contains(o, "trackedFileBackups") { + t.Error("file-history-snapshot should be dropped") + } + if strings.Contains(o, "toolUseResult") { + t.Error("toolUseResult field should be stripped") + } + for _, want := range []string{"hello", "the real output", "u2", "a1"} { + if !strings.Contains(o, want) { + t.Errorf("output should preserve %q", want) + } + } + for _, line := range strings.Split(strings.TrimSpace(o), "\n") { + var v map[string]json.RawMessage + if json.Unmarshal([]byte(line), &v) != nil { + t.Errorf("output line is not valid JSON: %s", line) + } + } +} + +func TestPruneOptsRespected(t *testing.T) { + input := `{"type":"file-history-snapshot","snapshot":{}}` + "\n" + + `{"type":"user","toolUseResult":{"x":1},"message":{"content":"hi"}}` + "\n" + var a bytes.Buffer + pruneStream(strings.NewReader(input), &a, pruneOpts{dropSnapshots: false, stripToolResults: true}) + if !strings.Contains(a.String(), "file-history-snapshot") { + t.Error("--no-snapshots should keep snapshot lines") + } + var b bytes.Buffer + pruneStream(strings.NewReader(input), &b, pruneOpts{dropSnapshots: true, stripToolResults: false}) + if !strings.Contains(b.String(), "toolUseResult") { + t.Error("--no-tool-results should keep toolUseResult") + } +} + +func TestPruneFileReplacesAndShrinks(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "big.jsonl") + content := `{"type":"user","message":{"content":"keep me"},"uuid":"u1"}` + "\n" + + `{"type":"file-history-snapshot","snapshot":{"data":"` + strings.Repeat("x", 5000) + `"}}` + "\n" + + `{"type":"assistant","message":{"content":"keep me too"},"uuid":"a1"}` + "\n" + if err := os.WriteFile(path, []byte(content), 0644); err != nil { + t.Fatal(err) + } + before, _ := os.Stat(path) + st, err := pruneFile(path, true, pruneOpts{dropSnapshots: true, stripToolResults: true}) + if err != nil { + t.Fatalf("pruneFile: %v", err) + } + if st.convLinesIn != st.convLinesOut { + t.Fatalf("integrity broken: %d != %d", st.convLinesIn, st.convLinesOut) + } + after, _ := os.Stat(path) + if after.Size() >= before.Size() { + t.Errorf("file should shrink: before %d, after %d", before.Size(), after.Size()) + } + if _, err := os.Stat(path + ".pruned"); !os.IsNotExist(err) { + t.Error(".pruned temp should be gone after atomic replace") + } + data, _ := os.ReadFile(path) + s := string(data) + if strings.Contains(s, "file-history-snapshot") { + t.Error("snapshot not removed from file") + } + if !strings.Contains(s, "keep me") || !strings.Contains(s, "keep me too") { + t.Error("dialogue not preserved in file") + } +} + +func TestPruneFileDryRunLeavesFileUnchanged(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "x.jsonl") + content := `{"type":"file-history-snapshot","snapshot":{}}` + "\n" + `{"type":"user","message":{"content":"hi"}}` + "\n" + os.WriteFile(path, []byte(content), 0644) + orig, _ := os.ReadFile(path) + st, err := pruneFile(path, false, pruneOpts{dropSnapshots: true, stripToolResults: true}) + if err != nil { + t.Fatalf("pruneFile dry: %v", err) + } + if st.bytesOut >= st.bytesIn { + t.Error("dry run should still report savings") + } + now, _ := os.ReadFile(path) + if string(now) != string(orig) { + t.Error("dry run must not modify the file") + } +}