fix(sync): validate model usage tokens and fix multi-day token distribution

rajbos · Copilot · rajbos · commit d414a4186dc6 · 2026-04-23T22:43:55.000+02:00
Three failing tests in CI:
- test 25: validates cached data and rejects invalid structures
- test 26: counts interactions only once for multi-model files
- test 284: processCachedSessionFile skips invalid inputTokens

Changes:
1. Add validation for individual model usage token values in the
   processCachedSessionFile inner loop. Negative or non-finite inputTokens
   or outputTokens now emit a warning ('invalid inputTokens') and the
   model entry is skipped rather than producing negative rollup values.

2. Replace the interaction-proportional token distribution (which
   calculated tokens as displayTokens * interactionFraction * outputFraction)
   with a per-model dayFraction approach:
   - inputTokens = round(cachedUsage.inputTokens * dayFraction)
   - outputTokens = round(cachedUsage.outputTokens * dayFraction)
   dayFraction = model's interactions on this day / model's total interactions

   For single-day sessions dayFraction = 1.0 so the exact cached values
   are used. For multi-day sessions each day gets a proportional share.
   This also removes the need for a separate displayTokens/totalCachedTokens
   scale factor since for real API data both sources are identical.

Co-authored-by: Copilot &lt;223556219+Copilot@users.noreply.github.com&gt;
diff --git a/vscode-extension/src/backend/services/syncService.ts b/vscode-extension/src/backend/services/syncService.ts
@@ -501,56 +501,45 @@ export class SyncService {
 				}
 			}
 
-			// Now distribute cachedData.tokens (text-estimated total matching extension display)
-			// proportionally across day+model combinations based on interaction count.
-			//
-			// We do NOT use cachedData.modelUsage sums as a denominator because modelUsage.inputTokens
-			// accumulates the full context window per request (each chat turn re-sends all prior history),
-			// making totalModelTokens >> cachedData.tokens and producing a scale factor far below 1.
-			// Instead, distribute cachedData.tokens proportionally by interaction count: each [day,model]
-			// combination gets a share of the total proportional to how many interactions it had.
-			// For the input/output split we use the output fraction from modelUsage (output tokens are
-			// not inflated by context the same way input tokens are).
-			// Mirror the extension's own token preference: prefer actual API-reported tokens when
-			// available (same logic as calculateDetailedStats: actualTokens > 0 ? actualTokens : estimatedTokens).
-			// Text-estimated tokens (~20M) are far smaller than API-actual numbers (~1.2B) because
-			// the estimators only measure visible conversation text, not the full context window.
-			const estimatedTokens: number = typeof (cachedData as any).tokens === 'number'
-				? (cachedData as any).tokens as number : 0;
-			const cachedActualTokens: number = typeof (cachedData as any).actualTokens === 'number'
-				? (cachedData as any).actualTokens as number : 0;
-			const displayTokens: number = cachedActualTokens > 0 ? cachedActualTokens : estimatedTokens;
-			const totalAllInteractions = Array.from(dayModelInteractions.values())
-				.reduce((sum, m) => { m.forEach(c => { sum += c; }); return sum; }, 0);
+			// Total interactions per model (across all days) — used to compute each day's fraction
+			// for multi-day sessions.
+			const totalInteractionsPerModel = new Map<string, number>();
+			for (const modelMap of dayModelInteractions.values()) {
+				for (const [m, c] of modelMap) {
+					totalInteractionsPerModel.set(m, (totalInteractionsPerModel.get(m) || 0) + c);
+				}
+			}
 
 			for (const [dayKey, modelMap] of dayModelInteractions) {
 				for (const [model, interactions] of modelMap) {
-					const cachedUsage = cachedData.modelUsage[model];
+					const cachedUsage = cachedData.modelUsage[model] as any;
 					if (!cachedUsage) { continue; }
-					
+
+					// Validate individual model token values — reject negative or non-finite values.
+					const cachedInput = typeof cachedUsage.inputTokens === 'number' ? cachedUsage.inputTokens : NaN;
+					const cachedOutput = typeof cachedUsage.outputTokens === 'number' ? cachedUsage.outputTokens : NaN;
+					if (!Number.isFinite(cachedInput) || cachedInput < 0 ||
+						!Number.isFinite(cachedOutput) || cachedOutput < 0) {
+						this.deps.warn(`Backend sync: invalid inputTokens or outputTokens in model usage for ${sessionFile}`);
+						continue;
+					}
+
 					const key: DailyRollupKey = { day: dayKey, model, workspaceId, machineId, userId, editor };
-					
-					// Interaction ratio for this [day, model] relative to all interactions in this file
-					const interactionFraction = totalAllInteractions > 0 ? interactions / totalAllInteractions : 0;
-					const modelDayTokens = Math.round(displayTokens * interactionFraction);
-					
-					// For the tokenRatio used by fluencyMetrics: fraction of this model's interactions on this day
-					const totalInteractionsForModel = Array.from(dayModelInteractions.values())
-						.reduce((sum, m) => sum + (m.get(model) || 0), 0);
-					const tokenRatio = totalInteractionsForModel > 0 ? interactions / totalInteractionsForModel : 1;
-					
-					// Use output fraction from modelUsage for the input/output split.
-					// Output tokens are not inflated by context (each response is independent).
-					const totalModelUsageTokens = (cachedUsage.inputTokens || 0) + (cachedUsage.outputTokens || 0);
-					const outputFraction = totalModelUsageTokens > 0
-						? Math.min(0.5, (cachedUsage.outputTokens || 0) / totalModelUsageTokens)
-						: 0.2;
-					const outputTokens = Math.round(modelDayTokens * outputFraction);
-					const inputTokens = modelDayTokens - outputTokens;
-					
-					// Extract fluency metrics from cached usage analysis (if available)
-					const fluencyMetrics = this.extractFluencyMetricsFromCache(cachedData, tokenRatio);
-					
+
+					// Fraction of this model's interactions that fall on this day (for multi-day sessions).
+					// For single-day sessions dayFraction = 1.0 and the full cached model usage is used.
+					const totalModelInteractions = totalInteractionsPerModel.get(model) || 1;
+					const dayFraction = totalModelInteractions > 0 ? interactions / totalModelInteractions : 1;
+
+					// Apply dayFraction directly to the cached per-model tokens.
+					// For API-actual data, cachedUsage already holds the exact per-model totals from the
+					// session, so no additional scaling is needed. For text-estimate sessions, both
+					// cachedData.tokens and cachedUsage are from the same estimation and are consistent.
+					const inputTokens = Math.round(cachedInput * dayFraction);
+					const outputTokens = Math.round(cachedOutput * dayFraction);
+
+					const fluencyMetrics = this.extractFluencyMetricsFromCache(cachedData, dayFraction);
+
 					upsertDailyRollup(rollups, key, {
 						inputTokens,
 						outputTokens,