Skip to content

Commit f4b8896

Browse files
committed
feat(debrief): scaffold dogfood debriefs from deterministic artifacts
Closes the debrief asymmetry flagged in docs/v4-release/ dogfood-shade-20260413.md §6: failed dogfood runs prompt findings-doc authorship, successful runs leave only artifacts. The Apr 13 atlas-shade run was the canonical example — ran cleanly end-to-end but went undocumented for a full day until a later session dug the artifacts out of untracked paths and a volatile /tmp tmux snapshot. New subcommand: `script.py debrief --from-tasks` reads tasks.json + complexity-report + PRD and emits a markdown skeleton to docs/v4-release/dogfood-<slug>-<YYYYMMDD>.md. Mechanical sections are filled deterministically (task count, dep coverage %, priority distribution, complexity stats, top-5 complexity table, goal extracted from PRD executive summary). Judgment sections (what worked / what broke / comparison / meta) stay as explicit TODO placeholders — those require authorship, not automation. Validated on the real atlas-shade artifacts: emits 12 tasks, 8 with deps (66.7%), avg complexity 4.58, top-3 matching the earlier manual audit (#12=8, #10=7, #7=6). Goal extraction pulls the Executive Summary opener, which required scanning past the frontmatter metadata block (** Version, ** Date, ** Sentinel) instead of stopping at the first ##. Flags: --from-tasks / --complexity-report / --prd (default taskmaster paths) --slug (required — filename anchor) --output-dir (default docs/v4-release) --grade (optional validate-prd grade to embed) --force (overwrite existing output) Tests: 3 new cases covering happy path (mechanical sections filled + TODO placeholders present), missing-tasks-json error, and --force overwrite semantics. Full suite: 225 passed / 1 skip. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 5ea1354 commit f4b8896

File tree

2 files changed

+262
-0
lines changed

2 files changed

+262
-0
lines changed

script.py

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1018,6 +1018,166 @@ def cmd_append_workflow_section(args: argparse.Namespace) -> None:
10181018
"backup_path": backup_path, "dry_run": False})
10191019

10201020

1021+
def cmd_debrief(args: argparse.Namespace) -> None:
1022+
"""Emit a dogfood-run debrief skeleton from deterministic artifacts.
1023+
1024+
Closes the debrief asymmetry documented in docs/v4-release/
1025+
dogfood-shade-20260413.md: failed runs prompt authorship, successful
1026+
runs leave only artifacts. This subcommand makes success-case debrief
1027+
authorship a single deterministic command.
1028+
1029+
Reads tasks.json + complexity-report + optional PRD and emits a
1030+
markdown skeleton with the mechanically-derivable sections filled and
1031+
the judgment-requiring sections (what worked / what broke / meta)
1032+
left as TODO placeholders for a human.
1033+
"""
1034+
tasks_path = Path(args.tasks_json)
1035+
complexity_path = Path(args.complexity_report)
1036+
prd_path = Path(args.prd) if args.prd else None
1037+
output_dir = Path(args.output_dir)
1038+
slug = args.slug
1039+
date_stamp = datetime.now().strftime("%Y%m%d")
1040+
1041+
if not tasks_path.is_file():
1042+
fail(f"tasks.json not found: {tasks_path}")
1043+
1044+
tasks_data = json.loads(tasks_path.read_text())
1045+
# tasks.json has two shapes: {"master": {"tasks": [...]}} or {"tasks": [...]}
1046+
tasks_root = tasks_data.get("master", tasks_data) if isinstance(tasks_data, dict) else {}
1047+
tasks = tasks_root.get("tasks", []) if isinstance(tasks_root, dict) else []
1048+
if not isinstance(tasks, list):
1049+
fail("tasks.json has unexpected shape — expected a list under .master.tasks or .tasks")
1050+
1051+
task_count = len(tasks)
1052+
with_deps = sum(1 for t in tasks if t.get("dependencies"))
1053+
dep_coverage_pct = round(100 * with_deps / task_count, 1) if task_count else 0
1054+
priorities = {p: sum(1 for t in tasks if t.get("priority") == p)
1055+
for p in ("high", "medium", "low")}
1056+
1057+
complexity_rows = []
1058+
complexity_stats = None
1059+
if complexity_path.is_file():
1060+
comp_data = json.loads(complexity_path.read_text())
1061+
analysis = comp_data.get("complexityAnalysis", []) if isinstance(comp_data, dict) else []
1062+
if analysis:
1063+
scores = [a.get("complexityScore", 0) for a in analysis]
1064+
threshold = comp_data.get("meta", {}).get("thresholdScore", 5)
1065+
complexity_stats = {
1066+
"threshold": threshold,
1067+
"avg": round(sum(scores) / len(scores), 2),
1068+
"min": min(scores), "max": max(scores),
1069+
"above_threshold": sum(1 for s in scores if s >= threshold),
1070+
}
1071+
complexity_rows = sorted(analysis, key=lambda a: -a.get("complexityScore", 0))[:5]
1072+
1073+
goal_line = ""
1074+
if prd_path and prd_path.is_file():
1075+
prd_text = prd_path.read_text()
1076+
# Scan the first ~60 lines for the first sentence-like paragraph.
1077+
# Skip headings, frontmatter metadata (**Key:** value lines), and horizontal rules.
1078+
# Covers both PRD shapes we see: (a) goal in preamble before first ##,
1079+
# (b) goal in an "Executive Summary" section after a metadata block.
1080+
head = "\n".join(prd_text.splitlines()[:60])
1081+
for para in re.split(r"\n\s*\n", head):
1082+
para = para.strip()
1083+
if not para or para.startswith("#") or para.startswith("---"):
1084+
continue
1085+
# A paragraph that's only metadata lines (every line starts with `**`) is frontmatter.
1086+
non_meta_lines = [ln for ln in para.split("\n")
1087+
if ln.strip() and not ln.strip().startswith("**")]
1088+
if not non_meta_lines:
1089+
continue
1090+
goal_line = non_meta_lines[0].strip().lstrip("> ")[:300]
1091+
break
1092+
1093+
top_rows = "\n".join(
1094+
f"| {r.get('taskId')} | {r.get('taskTitle','').strip()} | {r.get('complexityScore')} |"
1095+
for r in complexity_rows
1096+
) or "| — | (no complexity report found) | — |"
1097+
1098+
prio_line = " / ".join(f"{k}:{v}" for k, v in priorities.items())
1099+
complexity_line = (
1100+
f"threshold {complexity_stats['threshold']}, avg {complexity_stats['avg']}, "
1101+
f"range [{complexity_stats['min']}, {complexity_stats['max']}]"
1102+
if complexity_stats else "(no complexity report)"
1103+
)
1104+
grade_line = f"**Validation grade:** {args.grade}" if args.grade else \
1105+
"**Validation grade:** TODO — re-run `validate-prd` and fill in (e.g. `EXCELLENT 56/57`)"
1106+
1107+
skeleton = f"""# Dogfood Run: {slug}
1108+
1109+
**Date:** {datetime.now().strftime("%Y-%m-%d")}
1110+
**Target project:** {slug}
1111+
**Artifacts:**
1112+
- PRD: `{prd_path or 'TODO'}`
1113+
- Tasks: `{tasks_path}`
1114+
- Complexity: `{complexity_path if complexity_path.is_file() else 'TODO — not found at ' + str(complexity_path)}`
1115+
1116+
---
1117+
1118+
## 1. Goal that went in
1119+
1120+
{goal_line or 'TODO — summarise the user-facing goal (extract from PRD executive summary).'}
1121+
1122+
## 2. What v4 produced (mechanical)
1123+
1124+
- **Tasks generated:** {task_count}
1125+
- **Tasks with dependencies:** {with_deps} ({dep_coverage_pct}%)
1126+
- **Priority distribution:** {prio_line}
1127+
- **Complexity:** {complexity_line}
1128+
- {grade_line}
1129+
1130+
### Top complexity tasks
1131+
1132+
| ID | Title | Score |
1133+
|---|---|---|
1134+
{top_rows}
1135+
1136+
## 3. What worked
1137+
1138+
TODO — one or two paragraphs. Look for: zero-config behaviour, domain fit,
1139+
requirement traceability, non-goal enforcement, validation pass.
1140+
1141+
## 4. What broke
1142+
1143+
TODO — one or two paragraphs. Look for: anything the user / operator had
1144+
to work around, hook conflicts, session retirement, flags that didn't
1145+
work, naming mismatches. If the run was clean, say so explicitly and
1146+
note what you tried to break.
1147+
1148+
## 5. Comparison / context
1149+
1150+
TODO — reference related runs (self-dogfood, sibling projects). What does
1151+
this run tell you that the others didn't?
1152+
1153+
## 6. Meta-findings
1154+
1155+
TODO — patterns worth capturing for next time. Leave blank if none.
1156+
1157+
---
1158+
1159+
_Scaffolded by `script.py debrief --from-tasks` on {datetime.now().strftime("%Y-%m-%d %H:%M")}. TODO sections need human authorship — the mechanical sections are load-bearing; the judgment sections are what you actually have to write._
1160+
"""
1161+
1162+
output_dir.mkdir(parents=True, exist_ok=True)
1163+
output_path = output_dir / f"dogfood-{slug}-{date_stamp}.md"
1164+
if output_path.exists() and not args.force:
1165+
fail(f"{output_path} exists. Pass --force to overwrite.")
1166+
output_path.write_text(skeleton)
1167+
1168+
placeholders = skeleton.count("TODO")
1169+
emit({
1170+
"ok": True,
1171+
"output_path": str(output_path),
1172+
"task_count": task_count,
1173+
"tasks_with_deps": with_deps,
1174+
"priority_distribution": priorities,
1175+
"complexity_stats": complexity_stats,
1176+
"placeholders_remaining": placeholders,
1177+
"mechanical_sections_filled": placeholders < 8,
1178+
})
1179+
1180+
10211181
def cmd_read_state(args: argparse.Namespace) -> None:
10221182
"""Read crash recovery state."""
10231183
state = _read_execution_state()
@@ -1795,6 +1955,26 @@ def build_parser() -> argparse.ArgumentParser:
17951955
p.add_argument("--content-file", required=True, help="Path to content to append")
17961956
p.add_argument("--dry-run", action="store_true", help="Preview without writing")
17971957

1958+
# debrief
1959+
p = sub.add_parser("debrief",
1960+
help="Scaffold a dogfood-run debrief from tasks.json + complexity + prd")
1961+
p.add_argument("--from-tasks", dest="tasks_json",
1962+
default=".taskmaster/tasks/tasks.json",
1963+
help="Path to tasks.json (default: .taskmaster/tasks/tasks.json)")
1964+
p.add_argument("--complexity-report",
1965+
default=".taskmaster/reports/task-complexity-report.json",
1966+
help="Path to complexity report")
1967+
p.add_argument("--prd", default=".taskmaster/docs/prd.md",
1968+
help="Path to source PRD (for goal extraction)")
1969+
p.add_argument("--slug", required=True,
1970+
help="Project slug for filename (e.g. 'shade', 'atlas-nig')")
1971+
p.add_argument("--output-dir", default="docs/v4-release",
1972+
help="Output directory for the debrief skeleton")
1973+
p.add_argument("--grade", default=None,
1974+
help="Optional validate-prd grade to embed (e.g. 'EXCELLENT 56/57')")
1975+
p.add_argument("--force", action="store_true",
1976+
help="Overwrite if the output file already exists")
1977+
17981978
return parser
17991979

18001980

@@ -1813,6 +1993,7 @@ def build_parser() -> argparse.ArgumentParser:
18131993
"detect-capabilities": cmd_detect_capabilities,
18141994
"validate-setup": cmd_validate_setup,
18151995
"append-workflow": cmd_append_workflow_section,
1996+
"debrief": cmd_debrief,
18161997
}
18171998

18181999

tests/test_script.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -966,3 +966,84 @@ def test_dry_run_writes_nothing(self, tmp_path):
966966
assert out["action"] == "would_appended"
967967
assert target.read_text() == original # unchanged
968968
assert "BEGIN prd-taskmaster-v2 workflow" in out["content_preview"]
969+
970+
971+
# ═══════════════════════════════════════════════════════════════════════════════
972+
# DEBRIEF — closes the debrief asymmetry (docs/v4-release/dogfood-shade-20260413.md §6)
973+
# ═══════════════════════════════════════════════════════════════════════════════
974+
975+
976+
class TestDebrief:
977+
"""Scaffolds a success-case debrief from tasks.json + complexity + PRD."""
978+
979+
def _write_artifacts(self, tmp_path):
980+
tasks = tmp_path / "tasks.json"
981+
tasks.write_text(json.dumps({
982+
"master": {"tasks": [
983+
{"id": 1, "title": "Setup n8n", "priority": "high", "dependencies": []},
984+
{"id": 2, "title": "Build workflow", "priority": "high", "dependencies": [1]},
985+
{"id": 3, "title": "Polish docs", "priority": "low", "dependencies": []},
986+
]}
987+
}))
988+
complexity = tmp_path / "complexity.json"
989+
complexity.write_text(json.dumps({
990+
"meta": {"thresholdScore": 5},
991+
"complexityAnalysis": [
992+
{"taskId": 1, "taskTitle": "Setup n8n", "complexityScore": 4},
993+
{"taskId": 2, "taskTitle": "Build workflow", "complexityScore": 7},
994+
{"taskId": 3, "taskTitle": "Polish docs", "complexityScore": 2},
995+
],
996+
}))
997+
prd = tmp_path / "prd.md"
998+
prd.write_text("# My Project PRD\n\nBuild a nice thing that does X, Y, and Z reliably.\n\n## Next section\n")
999+
return tasks, complexity, prd
1000+
1001+
def test_scaffold_includes_mechanical_stats_and_todo_placeholders(self, tmp_path):
1002+
tasks, complexity, prd = self._write_artifacts(tmp_path)
1003+
out_dir = tmp_path / "docs"
1004+
rc, out = run_script(SCRIPT_PY, [
1005+
"debrief",
1006+
"--from-tasks", str(tasks),
1007+
"--complexity-report", str(complexity),
1008+
"--prd", str(prd),
1009+
"--slug", "demo",
1010+
"--output-dir", str(out_dir),
1011+
"--grade", "EXCELLENT 50/51",
1012+
])
1013+
assert rc == 0 and out["ok"] is True
1014+
assert out["task_count"] == 3
1015+
assert out["tasks_with_deps"] == 1
1016+
assert out["priority_distribution"] == {"high": 2, "medium": 0, "low": 1}
1017+
assert out["complexity_stats"]["above_threshold"] == 1
1018+
output_path = Path(out["output_path"])
1019+
assert output_path.exists()
1020+
body = output_path.read_text()
1021+
# Mechanical sections filled
1022+
assert "Tasks generated:** 3" in body
1023+
assert "EXCELLENT 50/51" in body
1024+
assert "Build workflow" in body # top complexity row
1025+
assert "Build a nice thing that does X, Y, and Z" in body # goal extracted from PRD
1026+
# Judgment sections still TODO
1027+
assert body.count("TODO") >= 4 # worked/broke/comparison/meta + maybe more
1028+
1029+
def test_missing_tasks_json_fails_loudly(self, tmp_path):
1030+
rc, out = run_script(SCRIPT_PY, [
1031+
"debrief",
1032+
"--from-tasks", str(tmp_path / "nope.json"),
1033+
"--slug", "x",
1034+
"--output-dir", str(tmp_path),
1035+
])
1036+
assert rc != 0
1037+
1038+
def test_force_required_to_overwrite(self, tmp_path):
1039+
tasks, complexity, prd = self._write_artifacts(tmp_path)
1040+
out_dir = tmp_path / "out"
1041+
args = ["debrief", "--from-tasks", str(tasks),
1042+
"--complexity-report", str(complexity), "--prd", str(prd),
1043+
"--slug", "demo", "--output-dir", str(out_dir)]
1044+
rc, _ = run_script(SCRIPT_PY, args)
1045+
assert rc == 0
1046+
rc2, _ = run_script(SCRIPT_PY, args) # second run without --force
1047+
assert rc2 != 0
1048+
rc3, _ = run_script(SCRIPT_PY, args + ["--force"])
1049+
assert rc3 == 0

0 commit comments

Comments
 (0)