chore: refresh model pricing data (#666)

rajbos · Copilot · web-flow · commit d825d3bf9a34 · 2026-04-24T21:19:16.000+02:00
- update OpenAI pricing for GPT-4.1 and GPT-5.4 models
- add cached input rates where provider docs expose them
- align README caching note with per-model pricing

Co-authored-by: Copilot &lt;223556219+Copilot@users.noreply.github.com&gt;
diff --git a/vscode-extension/src/README.md b/vscode-extension/src/README.md
@@ -65,8 +65,8 @@ When these fields are absent, the full `inputCostPerMillion` rate is applied to
 - Cache reads: **10% of input rate** (e.g. $0.30/M for Claude Sonnet 4 at $3.00/M input)
 - Cache creation: **125% of input rate** (e.g. $3.75/M for Claude Sonnet 4)
 
-**OpenAI prompt caching rates** (automatic prefix matching):
-- Cache reads: **50% of input rate** (e.g. $1.25/M for GPT-4o at $2.50/M input)
+**OpenAI prompt caching rates** (automatic prefix matching) vary by model family:
+- Cache reads use the explicit per-model `cachedInputCostPerMillion` values in `modelPricing.json` (for example: GPT-4o = 50% of input, GPT-4.1 = 25%, GPT-5.4 = 10%)
 - Note: OpenAI cache creation does not incur an extra fee, so `cacheCreationCostPerMillion` is not set for OpenAI models.
 
 ### Which data sources provide cache token breakdowns?
diff --git a/vscode-extension/src/modelPricing.json b/vscode-extension/src/modelPricing.json
@@ -2,12 +2,12 @@
   "$schema": "http://json-schema.org/draft-07/schema#",
   "description": "Model pricing data - costs per million tokens for input and output",
   "metadata": {
-    "lastUpdated": "2026-03-30",
+    "lastUpdated": "2026-04-24",
     "sources": [
       {
         "name": "OpenAI API Pricing",
-        "url": "https://openai.com/api/pricing/",
-        "retrievedDate": "2026-03-30"
+        "url": "https://developers.openai.com/api/docs/pricing",
+        "retrievedDate": "2026-04-24"
       },
       {
         "name": "Anthropic Claude Pricing",
@@ -127,15 +127,17 @@
     },
     "gpt-5.4": {
       "inputCostPerMillion": 2.50,
+      "cachedInputCostPerMillion": 0.25,
       "outputCostPerMillion": 15.0,
       "category": "GPT-5 models",
       "tier": "premium",
       "multiplier": 1,
       "displayNames": ["GPT-5.4"]
     },
     "gpt-5.4-mini": {
-      "inputCostPerMillion": 0.25,
-      "outputCostPerMillion": 2.0,
+      "inputCostPerMillion": 0.75,
+      "cachedInputCostPerMillion": 0.075,
+      "outputCostPerMillion": 4.5,
       "category": "GPT-5 models",
       "tier": "standard",
       "multiplier": 0,
@@ -150,23 +152,26 @@
       "displayNames": ["GPT-4"]
     },
     "gpt-4.1": {
-      "inputCostPerMillion": 3.0,
-      "outputCostPerMillion": 12.0,
+      "inputCostPerMillion": 2.0,
+      "cachedInputCostPerMillion": 0.5,
+      "outputCostPerMillion": 8.0,
       "category": "GPT-4 models",
       "tier": "standard",
       "multiplier": 0,
       "displayNames": ["GPT-4.1"]
     },
     "gpt-4.1-mini": {
-      "inputCostPerMillion": 0.8,
-      "outputCostPerMillion": 3.2,
+      "inputCostPerMillion": 0.4,
+      "cachedInputCostPerMillion": 0.1,
+      "outputCostPerMillion": 1.6,
       "category": "GPT-4 models",
       "tier": "standard",
       "multiplier": 0
     },
     "gpt-4.1-nano": {
-      "inputCostPerMillion": 0.2,
-      "outputCostPerMillion": 0.8,
+      "inputCostPerMillion": 0.1,
+      "cachedInputCostPerMillion": 0.025,
+      "outputCostPerMillion": 0.4,
       "category": "GPT-4 models",
       "tier": "standard",
       "multiplier": 0