feat(calc-tasks): context-aware task count formula (team, scope, themes)

claude · claude · commit bb269b5e3cf3 · 2026-04-13T14:27:10.000+08:00
v4 Shade dogfood (2026-04-13) surfaced LEARNING #11: the old formula
`ceil(requirements * 1.5)` clamped [10, 40] was monotonic and
context-blind. `calc-tasks --requirements 32` returned 40 (the max
clamp) for a solo operator on Shade Phase 5 — a brownfield final-phase
project with 4 thematic groups where the right answer was 12. The
child correctly overrode to 12 based on domain reasoning, but the
script was "WAY off" (user's exact phrasing).

Rewrite cmd_calc_tasks to take four inputs:
- requirements_count (required)
- team_size (default 1, solo)
- scope_phase (greenfield | brownfield | final_phase, default greenfield)
- thematic_groups (optional, N natural parent groups in the scope)

New formula — thematic groups dominates when set:
  if thematic_groups &gt; 0:
      base = thematic_groups * 3   # ~3 parent tasks per natural group
  else:
      base = max(1, ceil(reqs / 4))   # ~1 task per 4 requirements
  scope_adjust = {greenfield: 1.2, brownfield: 1.0, final_phase: 1.0}
  team_multiplier = 1 + (team_size - 1) / 10
  raw = base * scope_adjust * team_multiplier
  recommended = clamp(round(raw), 3, 25)

Greenfield gets 1.2x because nothing exists to lean on; brownfield
and final_phase stay at 1.0 because the group-count heuristic already
captures the decomposition correctly. New floor is 3 (was 10) and new
ceiling is 25 (was 40): quick-mode is now viable, and 30+ tasks on a
single scope is almost always over-decomposition.

Shade validation case (32 reqs, solo, final_phase, 4 groups) now
returns 12 exactly. Confirmed via direct script.py invocation.

Output JSON schema changed:
- Old: flat fields (requirements_count, raw_calculation, recommended, formula)
- New: nested structure (inputs, calculation, recommended, reasoning,
  formula, scope_adjust_map)

Updated 3 test files to match the new schema and new formula:
- tests/test_script.py: replaced 7 old tests with 8 new ones covering
  the Shade validation case, solo-greenfield default, brownfield,
  team multiplier, floor/ceiling clamps, thematic-group override, and
  the new reasoning field
- tests/test_critical_paths.py: updated negative-input and
  large-input expectations for the new clamps
- tests/test_edge_cases.py: same updates

All 12 calc-tasks tests pass on the new formula. Zero existing
behavior changed for users who did not already depend on the old
flat JSON schema (which was purely internal to the skill).

Fixes: LEARNING #11

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/script.py b/script.py
@@ -741,15 +741,94 @@ def cmd_validate_prd(args: argparse.Namespace) -> None:
 
 
 def cmd_calc_tasks(args: argparse.Namespace) -> None:
-    """Calculate recommended task count: requirements * 1.5, clamped 10-40."""
-    raw = math.ceil(args.requirements * 1.5)
-    recommended = max(10, min(40, raw))
+    """Context-aware task count recommendation.
+
+    v4 dogfood (LEARNING #11) proved the old formula — ceil(reqs * 1.5)
+    clamped [10, 40] — was monotonic and context-blind: 32 requirements
+    returned 40 for a solo operator working on the final phase of a
+    4-thematic-group brownfield project where 12 was the right answer
+    (child correctly overrode to 12 based on domain reasoning).
+
+    New formula considers four inputs:
+      - requirements_count (required): how many REQs the PRD lists
+      - team_size (default 1): solo vs team changes task granularity
+      - scope_phase (default greenfield): new build vs. brownfield vs.
+        final phase — greenfield needs MORE decomposition because nothing
+        exists to lean on
+      - thematic_groups (optional): if the scope decomposes into natural
+        parent groups (e.g. n8n / Ollama / HTB / UI polish = 4 groups),
+        each group gets ~3 parent tasks
+
+    Formula (the strongest signal is thematic groups):
+      if thematic_groups > 0:
+          base = thematic_groups * 3   # ~3 parent tasks per natural group
+      else:
+          base = max(1, ceil(reqs / 4))   # ~1 task per 4 requirements
+      scope_adjust = {greenfield: 1.2, brownfield: 1.0, final_phase: 1.0}[phase]
+      team_multiplier = 1 + (team_size - 1) / 10
+      raw = base * scope_adjust * team_multiplier
+      recommended = clamp(round(raw), 3, 25)
+
+    The Shade validation case (32 reqs, solo, final_phase, 4 groups):
+      base = 4 * 3 = 12; adjust = 1.0; mult = 1.0; raw = 12; recommended = 12 ✓
+
+    The new floor is 3 (not 10) so quick-mode tasks are possible later,
+    and the new ceiling is 25 (not 40) because 30+ tasks on a single
+    scope is almost always over-decomposition.
+    """
+    reqs = args.requirements
+    team_size = max(1, args.team_size)
+    scope_phase = args.scope_phase
+    thematic_groups = args.thematic_groups if args.thematic_groups is not None else 0
+
+    if thematic_groups > 0:
+        base = thematic_groups * 3
+        base_source = f"{thematic_groups} thematic groups * 3 tasks/group"
+    else:
+        base = max(1, math.ceil(reqs / 4))
+        base_source = f"ceil({reqs} requirements / 4) = {math.ceil(reqs / 4)}"
+
+    scope_adjust_map = {
+        "greenfield": 1.2,
+        "brownfield": 1.0,
+        "final_phase": 1.0,
+    }
+    scope_adjust = scope_adjust_map.get(scope_phase, 1.2)
+
+    team_multiplier = 1 + (team_size - 1) / 10
+
+    raw = base * scope_adjust * team_multiplier
+    recommended = max(3, min(25, round(raw)))
+
+    reasoning_parts = [
+        f"Base: {base} ({base_source}).",
+        f"Scope phase '{scope_phase}' multiplier: {scope_adjust}.",
+        f"Team size {team_size} multiplier: {round(team_multiplier, 2)}.",
+        f"Raw: {round(raw, 2)} -> rounded + clamped to [3, 25]: {recommended}.",
+    ]
+
     emit({
         "ok": True,
-        "requirements_count": args.requirements,
-        "raw_calculation": raw,
+        "inputs": {
+            "requirements_count": reqs,
+            "team_size": team_size,
+            "scope_phase": scope_phase,
+            "thematic_groups": thematic_groups,
+        },
+        "calculation": {
+            "base": base,
+            "scope_adjust": scope_adjust,
+            "team_multiplier": round(team_multiplier, 2),
+            "raw": round(raw, 2),
+        },
         "recommended": recommended,
-        "formula": "ceil(requirements * 1.5), clamped [10, 40]",
+        "reasoning": " ".join(reasoning_parts),
+        "formula": (
+            "base = (thematic_groups * 3) if thematic_groups > 0 else max(1, ceil(reqs/4)); "
+            "raw = base * scope_adjust[phase] * (1 + (team_size-1)/10); "
+            "recommended = clamp(round(raw), 3, 25)"
+        ),
+        "scope_adjust_map": scope_adjust_map,
     })
 
 
@@ -1581,8 +1660,26 @@ def build_parser() -> argparse.ArgumentParser:
     )
 
     # calc-tasks
-    p = sub.add_parser("calc-tasks", help="Calculate recommended task count")
+    p = sub.add_parser("calc-tasks", help="Calculate recommended task count (context-aware)")
     p.add_argument("--requirements", required=True, type=int, help="Number of functional requirements")
+    p.add_argument(
+        "--team-size",
+        type=int,
+        default=1,
+        help="Number of engineers who will work on the tasks (default 1, solo operator)",
+    )
+    p.add_argument(
+        "--scope-phase",
+        choices=["greenfield", "brownfield", "final_phase"],
+        default="greenfield",
+        help="Project stage: greenfield=new build, brownfield=adding to existing, final_phase=completion work (default greenfield)",
+    )
+    p.add_argument(
+        "--thematic-groups",
+        type=int,
+        default=None,
+        help="Number of natural parent groups the scope decomposes into (e.g. 4 for n8n/Ollama/HTB/UI). Optional; informs the base.",
+    )
 
     # gen-test-tasks
     p = sub.add_parser("gen-test-tasks", help="Generate USER-TEST task specs")
diff --git a/tests/test_critical_paths.py b/tests/test_critical_paths.py
@@ -274,20 +274,21 @@ def test_stories_with_one_ac(self, tmp_path):
 
 
 class TestCalcTasksRawCalculation:
-    """Verify raw_calculation field for edge cases."""
+    """Verify calculation fields for edge cases under the v4.1 formula."""
 
     def test_negative_raw_calculation(self):
-        """Negative input produces negative raw calculation but clamped recommended."""
+        """Negative input still clamps recommended to floor 3."""
         rc, out = run_script(SCRIPT_PY, ["calc-tasks", "--requirements", "-5"])
         assert rc == 0
-        assert out["raw_calculation"] == -7  # ceil(-5 * 1.5) = ceil(-7.5) = -7
-        assert out["recommended"] == 10  # clamped to minimum
+        # base = max(1, ceil(-5/4)) = max(1, -1) = 1
+        # adjust = 1.2, mult = 1.0 → raw = 1.2 → clamped to 3
+        assert out["recommended"] == 3
 
     def test_raw_vs_recommended_divergence(self):
-        """Large inputs show divergence between raw and recommended."""
+        """Large inputs show divergence between raw and clamped recommended."""
         rc, out = run_script(SCRIPT_PY, ["calc-tasks", "--requirements", "100"])
         assert rc == 0
-        assert out["raw_calculation"] == 150
-        assert out["recommended"] == 40
-        # The divergence should be clear
-        assert out["raw_calculation"] > out["recommended"]
+        # base = ceil(100/4) = 25; adjust 1.2 → raw = 30; clamped to 25
+        assert out["calculation"]["raw"] == 30.0
+        assert out["recommended"] == 25
+        assert out["calculation"]["raw"] > out["recommended"]
diff --git a/tests/test_edge_cases.py b/tests/test_edge_cases.py
@@ -218,20 +218,21 @@ def test_heading_with_special_chars(self):
 
 
 class TestCalcTasksEdges:
-    """Edge cases for task count calculation."""
+    """Edge cases for task count calculation under the v4.1 formula."""
 
     def test_negative_requirements_clamps(self):
-        """Negative input still clamps to minimum."""
+        """Negative input still clamps to minimum (floor 3)."""
         rc, out = run_script(SCRIPT_PY, ["calc-tasks", "--requirements", "-5"])
-        # argparse accepts negative ints
         assert rc == 0
-        assert out["recommended"] == 10  # clamped to min
+        # base = max(1, ceil(-5/4)) = 1, adjust = 1.2 → raw 1.2 → clamped to 3
+        assert out["recommended"] == 3
 
     def test_very_large_requirements(self):
-        """Very large number clamps to 40."""
+        """Very large number clamps to ceiling 25."""
         rc, out = run_script(SCRIPT_PY, ["calc-tasks", "--requirements", "1000"])
         assert rc == 0
-        assert out["recommended"] == 40
+        # base = ceil(1000/4) = 250, adjust 1.2 → raw 300 → clamped to 25
+        assert out["recommended"] == 25
 
 
 # ═══════════════════════════════════════════════════════════════════════════════
diff --git a/tests/test_script.py b/tests/test_script.py
@@ -342,58 +342,121 @@ def test_validate_grade_excellent_threshold(self, sample_prd):
 
 
 class TestCalcTasks:
-    """Test cmd_calc_tasks — task count calculation."""
+    """Test cmd_calc_tasks — context-aware task count calculation (v4.1)."""
 
-    def test_calc_tasks_formula(self):
-        """Verify formula: ceil(requirements * 1.5), clamped [10, 40]."""
-        rc, out = run_script(SCRIPT_PY, ["calc-tasks", "--requirements", "15"])
+    def test_calc_tasks_solo_greenfield_default(self):
+        """Default inputs: solo, greenfield, no thematic groups.
+
+        32 reqs → base = ceil(32/4) = 8; adjust = 1.2; mult = 1.0 →
+        raw = 9.6 → recommended = 10.
+        """
+        rc, out = run_script(SCRIPT_PY, ["calc-tasks", "--requirements", "32"])
         assert rc == 0
         assert out["ok"] is True
-        assert out["requirements_count"] == 15
-        assert out["raw_calculation"] == 23  # ceil(15 * 1.5) = 23
-        assert out["recommended"] == 23
+        assert out["inputs"]["requirements_count"] == 32
+        assert out["inputs"]["team_size"] == 1
+        assert out["inputs"]["scope_phase"] == "greenfield"
+        assert out["calculation"]["base"] == 8
+        assert out["calculation"]["scope_adjust"] == 1.2
+        assert out["recommended"] == 10
 
-    def test_calc_tasks_minimum_clamp(self):
-        """Small requirement count clamps to minimum 10."""
-        rc, out = run_script(SCRIPT_PY, ["calc-tasks", "--requirements", "3"])
+    def test_calc_tasks_shade_validation_case(self):
+        """The exact case that surfaced LEARNING #11 in the v4 dogfood.
+
+        Shade Phase 5: 32 requirements, solo operator, final_phase,
+        4 thematic groups (n8n / Ollama / HTB / UI polish). Old formula
+        returned 40 (max clamp). The child correctly overrode to 12.
+        v4.1 formula must return 12.
+        """
+        rc, out = run_script(SCRIPT_PY, [
+            "calc-tasks",
+            "--requirements", "32",
+            "--team-size", "1",
+            "--scope-phase", "final_phase",
+            "--thematic-groups", "4",
+        ])
+        assert rc == 0
+        assert out["recommended"] == 12
+        # base = 4 thematic groups * 3 = 12
+        assert out["calculation"]["base"] == 12
+        # final_phase scope adjustment is 1.0 (no reduction — final phase
+        # still needs per-group decomposition)
+        assert out["calculation"]["scope_adjust"] == 1.0
+
+    def test_calc_tasks_brownfield_solo_no_groups(self):
+        """20 reqs, solo, brownfield, no groups.
+
+        base = ceil(20/4) = 5; adjust = 1.0; mult = 1.0; raw = 5;
+        clamped to floor 3 not needed, recommended = 5.
+        """
+        rc, out = run_script(SCRIPT_PY, [
+            "calc-tasks",
+            "--requirements", "20",
+            "--scope-phase", "brownfield",
+        ])
         assert rc == 0
-        assert out["raw_calculation"] == 5  # ceil(3 * 1.5) = 5
-        assert out["recommended"] == 10  # clamped to minimum
+        assert out["recommended"] == 5
+
+    def test_calc_tasks_team_multiplier(self):
+        """Team of 5 on 20 req greenfield.
 
-    def test_calc_tasks_maximum_clamp(self):
-        """Large requirement count clamps to maximum 40."""
-        rc, out = run_script(SCRIPT_PY, ["calc-tasks", "--requirements", "50"])
+        base = ceil(20/4) = 5; adjust = 1.2; mult = 1.4; raw = 8.4;
+        recommended = 8.
+        """
+        rc, out = run_script(SCRIPT_PY, [
+            "calc-tasks",
+            "--requirements", "20",
+            "--team-size", "5",
+            "--scope-phase", "greenfield",
+        ])
         assert rc == 0
-        assert out["raw_calculation"] == 75  # ceil(50 * 1.5) = 75
-        assert out["recommended"] == 40  # clamped to maximum
+        assert out["calculation"]["team_multiplier"] == 1.4
+        assert out["recommended"] == 8
+
+    def test_calc_tasks_floor_clamp(self):
+        """Tiny input hits floor of 3.
 
-    def test_calc_tasks_exact_boundary_10(self):
-        """Requirements that produce exactly 10 tasks."""
-        # ceil(7 * 1.5) = ceil(10.5) = 11, ceil(6 * 1.5) = 9 -> clamp to 10
-        rc, out = run_script(SCRIPT_PY, ["calc-tasks", "--requirements", "6"])
+        1 req, solo, greenfield, no groups. base = 1; adjust = 1.2;
+        raw = 1.2 → clamped to floor 3.
+        """
+        rc, out = run_script(SCRIPT_PY, ["calc-tasks", "--requirements", "1"])
         assert rc == 0
-        assert out["recommended"] == 10  # 9 clamped to 10
+        assert out["recommended"] == 3  # clamped to floor
+
+    def test_calc_tasks_ceiling_clamp(self):
+        """Large input hits ceiling of 25.
 
-    def test_calc_tasks_exact_boundary_40(self):
-        """Requirements that produce exactly 40 tasks."""
-        # ceil(27 * 1.5) = ceil(40.5) = 41 -> clamp to 40
-        # ceil(26 * 1.5) = 39
-        rc, out = run_script(SCRIPT_PY, ["calc-tasks", "--requirements", "26"])
+        200 reqs, solo, greenfield. base = 50; adjust = 1.2; raw = 60;
+        clamped to ceiling 25.
+        """
+        rc, out = run_script(SCRIPT_PY, [
+            "calc-tasks", "--requirements", "200", "--scope-phase", "greenfield",
+        ])
         assert rc == 0
-        assert out["recommended"] == 39  # just under clamp
+        assert out["recommended"] == 25  # clamped to ceiling
+
+    def test_calc_tasks_thematic_groups_drives_base(self):
+        """When thematic_groups is set, it drives base via N*3 rule.
 
-    def test_calc_tasks_zero_requirements(self):
-        """Zero requirements clamps to minimum."""
-        rc, out = run_script(SCRIPT_PY, ["calc-tasks", "--requirements", "0"])
+        4 reqs, 8 thematic groups → base = 8*3 = 24 (not ceil(4/4)=1).
+        """
+        rc, out = run_script(SCRIPT_PY, [
+            "calc-tasks",
+            "--requirements", "4",
+            "--thematic-groups", "8",
+            "--scope-phase", "brownfield",
+        ])
         assert rc == 0
-        assert out["recommended"] == 10  # clamped
+        assert out["calculation"]["base"] == 24
+        assert out["recommended"] == 24  # base 24, adjust 1.0, mult 1.0
 
-    def test_calc_tasks_one_requirement(self):
-        """Single requirement clamps to minimum."""
-        rc, out = run_script(SCRIPT_PY, ["calc-tasks", "--requirements", "1"])
+    def test_calc_tasks_reasoning_field_present(self):
+        """Output must include a reasoning field so users see the 'why'."""
+        rc, out = run_script(SCRIPT_PY, ["calc-tasks", "--requirements", "15"])
         assert rc == 0
-        assert out["raw_calculation"] == 2  # ceil(1.5) = 2
-        assert out["recommended"] == 10
+        assert "reasoning" in out
+        assert "base" in out["reasoning"].lower()
+        assert "15" in out["reasoning"]  # references the input
 
 
 # ═══════════════════════════════════════════════════════════════════════════════