feat(debrief): scaffold dogfood debriefs from deterministic artifacts

claude · claude · commit f4b889636191 · 2026-04-14T00:40:19.000+08:00
Closes the debrief asymmetry flagged in docs/v4-release/ dogfood-shade-20260413.md §6: failed dogfood runs prompt findings-doc authorship, successful runs leave only artifacts. The Apr 13 atlas-shade run was the canonical example — ran cleanly end-to-end but went undocumented for a full day until a later session dug the artifacts out of untracked paths and a volatile /tmp tmux snapshot. New subcommand: `script.py debrief --from-tasks` reads tasks.json + complexity-report + PRD and emits a markdown skeleton to docs/v4-release/dogfood-<slug>-<YYYYMMDD>.md. Mechanical sections are filled deterministically (task count, dep coverage %, priority distribution, complexity stats, top-5 complexity table, goal extracted from PRD executive summary). Judgment sections (what worked / what broke / comparison / meta) stay as explicit TODO placeholders — those require authorship, not automation. Validated on the real atlas-shade artifacts: emits 12 tasks, 8 with deps (66.7%), avg complexity 4.58, top-3 matching the earlier manual audit (#12=8, #10=7, #7=6). Goal extraction pulls the Executive Summary opener, which required scanning past the frontmatter metadata block (** Version, ** Date, ** Sentinel) instead of stopping at the first ##. Flags: --from-tasks / --complexity-report / --prd (default taskmaster paths) --slug (required — filename anchor) --output-dir (default docs/v4-release) --grade (optional validate-prd grade to embed) --force (overwrite existing output) Tests: 3 new cases covering happy path (mechanical sections filled + TODO placeholders present), missing-tasks-json error, and --force overwrite semantics. Full suite: 225 passed / 1 skip. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
diff --git a/script.py b/script.py
@@ -1018,6 +1018,166 @@ def cmd_append_workflow_section(args: argparse.Namespace) -> None:
           "backup_path": backup_path, "dry_run": False})
 
 
+def cmd_debrief(args: argparse.Namespace) -> None:
+    """Emit a dogfood-run debrief skeleton from deterministic artifacts.
+
+    Closes the debrief asymmetry documented in docs/v4-release/
+    dogfood-shade-20260413.md: failed runs prompt authorship, successful
+    runs leave only artifacts. This subcommand makes success-case debrief
+    authorship a single deterministic command.
+
+    Reads tasks.json + complexity-report + optional PRD and emits a
+    markdown skeleton with the mechanically-derivable sections filled and
+    the judgment-requiring sections (what worked / what broke / meta)
+    left as TODO placeholders for a human.
+    """
+    tasks_path = Path(args.tasks_json)
+    complexity_path = Path(args.complexity_report)
+    prd_path = Path(args.prd) if args.prd else None
+    output_dir = Path(args.output_dir)
+    slug = args.slug
+    date_stamp = datetime.now().strftime("%Y%m%d")
+
+    if not tasks_path.is_file():
+        fail(f"tasks.json not found: {tasks_path}")
+
+    tasks_data = json.loads(tasks_path.read_text())
+    # tasks.json has two shapes: {"master": {"tasks": [...]}} or {"tasks": [...]}
+    tasks_root = tasks_data.get("master", tasks_data) if isinstance(tasks_data, dict) else {}
+    tasks = tasks_root.get("tasks", []) if isinstance(tasks_root, dict) else []
+    if not isinstance(tasks, list):
+        fail("tasks.json has unexpected shape — expected a list under .master.tasks or .tasks")
+
+    task_count = len(tasks)
+    with_deps = sum(1 for t in tasks if t.get("dependencies"))
+    dep_coverage_pct = round(100 * with_deps / task_count, 1) if task_count else 0
+    priorities = {p: sum(1 for t in tasks if t.get("priority") == p)
+                  for p in ("high", "medium", "low")}
+
+    complexity_rows = []
+    complexity_stats = None
+    if complexity_path.is_file():
+        comp_data = json.loads(complexity_path.read_text())
+        analysis = comp_data.get("complexityAnalysis", []) if isinstance(comp_data, dict) else []
+        if analysis:
+            scores = [a.get("complexityScore", 0) for a in analysis]
+            threshold = comp_data.get("meta", {}).get("thresholdScore", 5)
+            complexity_stats = {
+                "threshold": threshold,
+                "avg": round(sum(scores) / len(scores), 2),
+                "min": min(scores), "max": max(scores),
+                "above_threshold": sum(1 for s in scores if s >= threshold),
+            }
+            complexity_rows = sorted(analysis, key=lambda a: -a.get("complexityScore", 0))[:5]
+
+    goal_line = ""
+    if prd_path and prd_path.is_file():
+        prd_text = prd_path.read_text()
+        # Scan the first ~60 lines for the first sentence-like paragraph.
+        # Skip headings, frontmatter metadata (**Key:** value lines), and horizontal rules.
+        # Covers both PRD shapes we see: (a) goal in preamble before first ##,
+        # (b) goal in an "Executive Summary" section after a metadata block.
+        head = "\n".join(prd_text.splitlines()[:60])
+        for para in re.split(r"\n\s*\n", head):
+            para = para.strip()
+            if not para or para.startswith("#") or para.startswith("---"):
+                continue
+            # A paragraph that's only metadata lines (every line starts with `**`) is frontmatter.
+            non_meta_lines = [ln for ln in para.split("\n")
+                              if ln.strip() and not ln.strip().startswith("**")]
+            if not non_meta_lines:
+                continue
+            goal_line = non_meta_lines[0].strip().lstrip("> ")[:300]
+            break
+
+    top_rows = "\n".join(
+        f"| {r.get('taskId')} | {r.get('taskTitle','').strip()} | {r.get('complexityScore')} |"
+        for r in complexity_rows
+    ) or "| — | (no complexity report found) | — |"
+
+    prio_line = " / ".join(f"{k}:{v}" for k, v in priorities.items())
+    complexity_line = (
+        f"threshold {complexity_stats['threshold']}, avg {complexity_stats['avg']}, "
+        f"range [{complexity_stats['min']}, {complexity_stats['max']}]"
+        if complexity_stats else "(no complexity report)"
+    )
+    grade_line = f"**Validation grade:** {args.grade}" if args.grade else \
+                 "**Validation grade:** TODO — re-run `validate-prd` and fill in (e.g. `EXCELLENT 56/57`)"
+
+    skeleton = f"""# Dogfood Run: {slug}
+
+**Date:** {datetime.now().strftime("%Y-%m-%d")}
+**Target project:** {slug}
+**Artifacts:**
+- PRD: `{prd_path or 'TODO'}`
+- Tasks: `{tasks_path}`
+- Complexity: `{complexity_path if complexity_path.is_file() else 'TODO — not found at ' + str(complexity_path)}`
+
+---
+
+## 1. Goal that went in
+
+{goal_line or 'TODO — summarise the user-facing goal (extract from PRD executive summary).'}
+
+## 2. What v4 produced (mechanical)
+
+- **Tasks generated:** {task_count}
+- **Tasks with dependencies:** {with_deps} ({dep_coverage_pct}%)
+- **Priority distribution:** {prio_line}
+- **Complexity:** {complexity_line}
+- {grade_line}
+
+### Top complexity tasks
+
+| ID | Title | Score |
+|---|---|---|
+{top_rows}
+
+## 3. What worked
+
+TODO — one or two paragraphs. Look for: zero-config behaviour, domain fit,
+requirement traceability, non-goal enforcement, validation pass.
+
+## 4. What broke
+
+TODO — one or two paragraphs. Look for: anything the user / operator had
+to work around, hook conflicts, session retirement, flags that didn't
+work, naming mismatches. If the run was clean, say so explicitly and
+note what you tried to break.
+
+## 5. Comparison / context
+
+TODO — reference related runs (self-dogfood, sibling projects). What does
+this run tell you that the others didn't?
+
+## 6. Meta-findings
+
+TODO — patterns worth capturing for next time. Leave blank if none.
+
+---
+
+_Scaffolded by `script.py debrief --from-tasks` on {datetime.now().strftime("%Y-%m-%d %H:%M")}. TODO sections need human authorship — the mechanical sections are load-bearing; the judgment sections are what you actually have to write._
+"""
+
+    output_dir.mkdir(parents=True, exist_ok=True)
+    output_path = output_dir / f"dogfood-{slug}-{date_stamp}.md"
+    if output_path.exists() and not args.force:
+        fail(f"{output_path} exists. Pass --force to overwrite.")
+    output_path.write_text(skeleton)
+
+    placeholders = skeleton.count("TODO")
+    emit({
+        "ok": True,
+        "output_path": str(output_path),
+        "task_count": task_count,
+        "tasks_with_deps": with_deps,
+        "priority_distribution": priorities,
+        "complexity_stats": complexity_stats,
+        "placeholders_remaining": placeholders,
+        "mechanical_sections_filled": placeholders < 8,
+    })
+
+
 def cmd_read_state(args: argparse.Namespace) -> None:
     """Read crash recovery state."""
     state = _read_execution_state()
@@ -1795,6 +1955,26 @@ def build_parser() -> argparse.ArgumentParser:
     p.add_argument("--content-file", required=True, help="Path to content to append")
     p.add_argument("--dry-run", action="store_true", help="Preview without writing")
 
+    # debrief
+    p = sub.add_parser("debrief",
+                       help="Scaffold a dogfood-run debrief from tasks.json + complexity + prd")
+    p.add_argument("--from-tasks", dest="tasks_json",
+                   default=".taskmaster/tasks/tasks.json",
+                   help="Path to tasks.json (default: .taskmaster/tasks/tasks.json)")
+    p.add_argument("--complexity-report",
+                   default=".taskmaster/reports/task-complexity-report.json",
+                   help="Path to complexity report")
+    p.add_argument("--prd", default=".taskmaster/docs/prd.md",
+                   help="Path to source PRD (for goal extraction)")
+    p.add_argument("--slug", required=True,
+                   help="Project slug for filename (e.g. 'shade', 'atlas-nig')")
+    p.add_argument("--output-dir", default="docs/v4-release",
+                   help="Output directory for the debrief skeleton")
+    p.add_argument("--grade", default=None,
+                   help="Optional validate-prd grade to embed (e.g. 'EXCELLENT 56/57')")
+    p.add_argument("--force", action="store_true",
+                   help="Overwrite if the output file already exists")
+
     return parser
 
 
@@ -1813,6 +1993,7 @@ def build_parser() -> argparse.ArgumentParser:
     "detect-capabilities": cmd_detect_capabilities,
     "validate-setup": cmd_validate_setup,
     "append-workflow": cmd_append_workflow_section,
+    "debrief": cmd_debrief,
 }
 
 
diff --git a/tests/test_script.py b/tests/test_script.py
@@ -966,3 +966,84 @@ def test_dry_run_writes_nothing(self, tmp_path):
         assert out["action"] == "would_appended"
         assert target.read_text() == original  # unchanged
         assert "BEGIN prd-taskmaster-v2 workflow" in out["content_preview"]
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# DEBRIEF — closes the debrief asymmetry (docs/v4-release/dogfood-shade-20260413.md §6)
+# ═══════════════════════════════════════════════════════════════════════════════
+
+
+class TestDebrief:
+    """Scaffolds a success-case debrief from tasks.json + complexity + PRD."""
+
+    def _write_artifacts(self, tmp_path):
+        tasks = tmp_path / "tasks.json"
+        tasks.write_text(json.dumps({
+            "master": {"tasks": [
+                {"id": 1, "title": "Setup n8n", "priority": "high", "dependencies": []},
+                {"id": 2, "title": "Build workflow", "priority": "high", "dependencies": [1]},
+                {"id": 3, "title": "Polish docs", "priority": "low", "dependencies": []},
+            ]}
+        }))
+        complexity = tmp_path / "complexity.json"
+        complexity.write_text(json.dumps({
+            "meta": {"thresholdScore": 5},
+            "complexityAnalysis": [
+                {"taskId": 1, "taskTitle": "Setup n8n", "complexityScore": 4},
+                {"taskId": 2, "taskTitle": "Build workflow", "complexityScore": 7},
+                {"taskId": 3, "taskTitle": "Polish docs", "complexityScore": 2},
+            ],
+        }))
+        prd = tmp_path / "prd.md"
+        prd.write_text("# My Project PRD\n\nBuild a nice thing that does X, Y, and Z reliably.\n\n## Next section\n")
+        return tasks, complexity, prd
+
+    def test_scaffold_includes_mechanical_stats_and_todo_placeholders(self, tmp_path):
+        tasks, complexity, prd = self._write_artifacts(tmp_path)
+        out_dir = tmp_path / "docs"
+        rc, out = run_script(SCRIPT_PY, [
+            "debrief",
+            "--from-tasks", str(tasks),
+            "--complexity-report", str(complexity),
+            "--prd", str(prd),
+            "--slug", "demo",
+            "--output-dir", str(out_dir),
+            "--grade", "EXCELLENT 50/51",
+        ])
+        assert rc == 0 and out["ok"] is True
+        assert out["task_count"] == 3
+        assert out["tasks_with_deps"] == 1
+        assert out["priority_distribution"] == {"high": 2, "medium": 0, "low": 1}
+        assert out["complexity_stats"]["above_threshold"] == 1
+        output_path = Path(out["output_path"])
+        assert output_path.exists()
+        body = output_path.read_text()
+        # Mechanical sections filled
+        assert "Tasks generated:** 3" in body
+        assert "EXCELLENT 50/51" in body
+        assert "Build workflow" in body  # top complexity row
+        assert "Build a nice thing that does X, Y, and Z" in body  # goal extracted from PRD
+        # Judgment sections still TODO
+        assert body.count("TODO") >= 4  # worked/broke/comparison/meta + maybe more
+
+    def test_missing_tasks_json_fails_loudly(self, tmp_path):
+        rc, out = run_script(SCRIPT_PY, [
+            "debrief",
+            "--from-tasks", str(tmp_path / "nope.json"),
+            "--slug", "x",
+            "--output-dir", str(tmp_path),
+        ])
+        assert rc != 0
+
+    def test_force_required_to_overwrite(self, tmp_path):
+        tasks, complexity, prd = self._write_artifacts(tmp_path)
+        out_dir = tmp_path / "out"
+        args = ["debrief", "--from-tasks", str(tasks),
+                "--complexity-report", str(complexity), "--prd", str(prd),
+                "--slug", "demo", "--output-dir", str(out_dir)]
+        rc, _ = run_script(SCRIPT_PY, args)
+        assert rc == 0
+        rc2, _ = run_script(SCRIPT_PY, args)  # second run without --force
+        assert rc2 != 0
+        rc3, _ = run_script(SCRIPT_PY, args + ["--force"])
+        assert rc3 == 0