From b8784e4bb32baed1da8f825da1f24e15b69bdb73 Mon Sep 17 00:00:00 2001 From: Csaba Toth Date: Wed, 10 Jun 2026 15:08:29 +0200 Subject: [PATCH] =?UTF-8?q?feat(benchmark):=20Supabase=20store=20hardening?= =?UTF-8?q?=20=E2=80=94=20verify=20command,=20offline=20fallback,=20secret?= =?UTF-8?q?s=20auto-load?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 'python3 -m archie.benchmark verify': checks creds, probe insert into both tables, cascade cleanup; exits 1 when the store would fall back to offline - store_results no longer loses results on a Supabase error: falls back to the offline JSON and reports the error - .archie-bench/secrets.env auto-loads when SUPABASE_* not in env (env wins; template placeholders ignored) - schema.sql: enable RLS on both tables (matches deployed benchmark_harness_v1 migration; service-role writes unaffected) Co-Authored-By: Claude Fable 5 --- archie/benchmark/README.md | 4 +- archie/benchmark/cli.py | 17 ++++ archie/benchmark/schema.sql | 5 ++ archie/benchmark/secrets.env.example | 9 +- archie/benchmark/store.py | 129 +++++++++++++++++++++++---- tests/benchmark/test_store.py | 98 ++++++++++++++++++++ 6 files changed, 242 insertions(+), 20 deletions(-) diff --git a/archie/benchmark/README.md b/archie/benchmark/README.md index 0ec7f187..1fe54bdc 100644 --- a/archie/benchmark/README.md +++ b/archie/benchmark/README.md @@ -13,7 +13,7 @@ detail). In short: ```bash cp archie/benchmark/secrets.env.example .archie-bench/secrets.env # edit .archie-bench/secrets.env: real SUPABASE_URL + service_role key -set -a; source .archie-bench/secrets.env; set +a +python3 -m archie.benchmark verify # auto-loads .archie-bench/secrets.env; checks tables + creds ``` This must be filled in **before** you benchmark if you want results in Supabase. If @@ -59,7 +59,7 @@ Copy the credentials template and fill it in (the copy lives in gitignored ```bash cp archie/benchmark/secrets.env.example .archie-bench/secrets.env # edit .archie-bench/secrets.env: real SUPABASE_URL + service_role key -set -a; source .archie-bench/secrets.env; set +a +python3 -m archie.benchmark verify # auto-loads .archie-bench/secrets.env; checks tables + creds ``` `store.py` reads `SUPABASE_URL` and `SUPABASE_SERVICE_KEY` from the environment. diff --git a/archie/benchmark/cli.py b/archie/benchmark/cli.py index f9d6be6f..509aada1 100644 --- a/archie/benchmark/cli.py +++ b/archie/benchmark/cli.py @@ -74,6 +74,20 @@ def _interactive_deep_scan(cfg): sys.exit(1) +def _cmd_verify(args): + from .store import verify + result = verify() + for name, ok, detail in result["checks"]: + mark = "OK " if ok else "FAIL" + print(f" [{mark}] {name}" + (f" — {detail}" if detail else "")) + if result["ok"]: + print("Supabase store ready — benchmark runs will be stored online.") + else: + print("Supabase store NOT ready — runs would fall back to offline JSON.", + file=sys.stderr) + sys.exit(1) + + def main(argv=None): parser = argparse.ArgumentParser(prog="archie-benchmark", description="Measure Archie effectiveness (control vs treatment).") @@ -94,6 +108,9 @@ def main(argv=None): p_auto.add_argument("--model", default="claude-sonnet-4-6") p_auto.set_defaults(func=_cmd_auto) + p_verify = sub.add_parser("verify", help="self-test the Supabase store connection") + p_verify.set_defaults(func=_cmd_verify) + args = parser.parse_args(argv) args.func(args) diff --git a/archie/benchmark/schema.sql b/archie/benchmark/schema.sql index 62f117e0..97aca1a9 100644 --- a/archie/benchmark/schema.sql +++ b/archie/benchmark/schema.sql @@ -40,6 +40,11 @@ create table if not exists benchmark_samples ( create index if not exists benchmark_samples_run_id_idx on benchmark_samples(run_id); +-- Service-role writes bypass RLS; enabling RLS with no policies keeps +-- anon/authenticated locked out (same posture as the other Archie tables). +alter table benchmark_runs enable row level security; +alter table benchmark_samples enable row level security; + -- Per-run, per-arm rollup the website reads (separate spec). create or replace view benchmark_summary as select diff --git a/archie/benchmark/secrets.env.example b/archie/benchmark/secrets.env.example index 13c722a5..eee5a0d1 100644 --- a/archie/benchmark/secrets.env.example +++ b/archie/benchmark/secrets.env.example @@ -6,9 +6,14 @@ # cp archie/benchmark/secrets.env.example .archie-bench/secrets.env # # then edit .archie-bench/secrets.env with your real URL + service_role key # -# Load it before a run (exports the vars into the environment): +# No sourcing needed: the harness auto-loads .archie-bench/secrets.env from +# the current working directory when SUPABASE_URL / SUPABASE_SERVICE_KEY are +# not already in the environment. (Manual `set -a; source ...; set +a` still +# works and takes precedence, as do real env vars.) # -# set -a; source .archie-bench/secrets.env; set +a +# Check the connection any time with: +# +# python3 -m archie.benchmark verify # # .archie-bench/ is gitignored — keep real keys there, never commit a filled copy. # diff --git a/archie/benchmark/store.py b/archie/benchmark/store.py index 3df6adae..ff6a87b0 100644 --- a/archie/benchmark/store.py +++ b/archie/benchmark/store.py @@ -1,43 +1,140 @@ # archie/benchmark/store.py import json import os +import sys +import urllib.error import urllib.request from pathlib import Path +# Default location of the gitignored credentials file (see secrets.env.example). +SECRETS_PATH = Path(".archie-bench/secrets.env") + + +def _load_secrets(path: Path | None = None): + """Populate SUPABASE_* env vars from the secrets file when not already set. + + Template placeholders (REPLACE-WITH-...) are ignored so an unfilled copy + behaves exactly like a missing file. Never overrides real env vars. + """ + path = path or SECRETS_PATH # module attr resolved at call time (testable) + if not path.is_file(): + return + for line in path.read_text().splitlines(): + line = line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + key, _, value = line.partition("=") + key, value = key.strip(), value.strip().strip("'\"") + if key in ("SUPABASE_URL", "SUPABASE_SERVICE_KEY") and value \ + and "REPLACE-WITH" not in value and not os.environ.get(key): + os.environ[key] = value + def _env(): + _load_secrets() return os.environ.get("SUPABASE_URL"), os.environ.get("SUPABASE_SERVICE_KEY") -def _post(url, key, table, rows): - data = json.dumps(rows).encode("utf-8") +def _request(url, key, table, *, method="POST", body=None, query=""): req = urllib.request.Request( - f"{url}/rest/v1/{table}", - data=data, + f"{url}/rest/v1/{table}{query}", + data=json.dumps(body).encode("utf-8") if body is not None else None, headers={ "apikey": key, "Authorization": f"Bearer {key}", "Content-Type": "application/json", "Prefer": "return=representation", }, - method="POST", + method=method, ) with urllib.request.urlopen(req, timeout=30) as resp: - return json.loads(resp.read().decode("utf-8")) + raw = resp.read().decode("utf-8") + return json.loads(raw) if raw else None + + +def _post(url, key, table, rows): + return _request(url, key, table, body=rows) + + +def _write_offline(run_row, sample_rows, offline_path): + path = Path(offline_path) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps({"run": run_row, "samples": sample_rows}, indent=2)) + return str(path) def store_results(run_row, sample_rows, offline_path, _poster=None): url, key = _env() if not url or not key: - path = Path(offline_path) - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(json.dumps({"run": run_row, "samples": sample_rows}, indent=2)) - return {"mode": "offline", "path": str(path)} + return {"mode": "offline", "path": _write_offline(run_row, sample_rows, offline_path)} poster = _poster or _post - created = poster(url, key, "benchmark_runs", [run_row]) - run_id = created[0]["id"] - for r in sample_rows: - r["run_id"] = run_id - poster(url, key, "benchmark_samples", sample_rows) - return {"mode": "online", "run_id": run_id} + try: + created = poster(url, key, "benchmark_runs", [run_row]) + run_id = created[0]["id"] + for r in sample_rows: + r["run_id"] = run_id + poster(url, key, "benchmark_samples", sample_rows) + return {"mode": "online", "run_id": run_id} + except Exception as e: # never lose results over a storage error + detail = getattr(e, "reason", None) or e + if isinstance(e, urllib.error.HTTPError): + try: + detail = f"HTTP {e.code}: {e.read().decode('utf-8')[:200]}" + except Exception: + detail = f"HTTP {e.code}" + print(f" Supabase write failed ({detail}) — falling back to offline store.", + file=sys.stderr) + return { + "mode": "offline-fallback", + "error": str(detail), + "path": _write_offline(run_row, sample_rows, offline_path), + } + + +def verify(_requester=None): + """Connection self-test for `python3 -m archie.benchmark verify`. + + Checks credentials, reachability, and that both tables accept a + service-role insert (probe row inserted then deleted). Returns + {"ok": bool, "checks": [(name, ok, detail), ...]}. + """ + requester = _requester or _request + checks = [] + + url, key = _env() + creds_ok = bool(url and key) + if creds_ok: + detail = url + else: + missing = [n for n, v in (("SUPABASE_URL", url), ("SUPABASE_SERVICE_KEY", key)) if not v] + detail = f"{' + '.join(missing)} not set (env or .archie-bench/secrets.env)" + checks.append(("credentials", creds_ok, detail)) + if not creds_ok: + return {"ok": False, "checks": checks} + + probe_id = None + try: + created = requester(url, key, "benchmark_runs", + body=[{"name": "verify-probe", "repo_name": "verify"}]) + probe_id = created[0]["id"] + checks.append(("insert benchmark_runs", True, probe_id)) + except Exception as e: + checks.append(("insert benchmark_runs", False, str(e))) + return {"ok": False, "checks": checks} + + try: + requester(url, key, "benchmark_samples", + body=[{"run_id": probe_id, "arm": "control", "repetition": 0}]) + checks.append(("insert benchmark_samples", True, "")) + except Exception as e: + checks.append(("insert benchmark_samples", False, str(e))) + + try: + # Cascade removes the probe sample with the run. + requester(url, key, "benchmark_runs", method="DELETE", query=f"?id=eq.{probe_id}") + checks.append(("cleanup probe", True, "")) + except Exception as e: + checks.append(("cleanup probe", False, f"delete probe {probe_id} manually: {e}")) + + return {"ok": all(ok for _, ok, _ in checks), "checks": checks} diff --git a/tests/benchmark/test_store.py b/tests/benchmark/test_store.py index e266fd49..429fb4f3 100644 --- a/tests/benchmark/test_store.py +++ b/tests/benchmark/test_store.py @@ -1,8 +1,17 @@ # tests/benchmark/test_store.py import json + +import pytest + from archie.benchmark import store +@pytest.fixture(autouse=True) +def _isolate_secrets(tmp_path, monkeypatch): + """Keep tests independent of a developer's real .archie-bench/secrets.env.""" + monkeypatch.setattr(store, "SECRETS_PATH", tmp_path / "no-secrets.env") + + def test_offline_fallback_when_env_missing(tmp_path, monkeypatch): monkeypatch.delenv("SUPABASE_URL", raising=False) monkeypatch.delenv("SUPABASE_SERVICE_KEY", raising=False) @@ -33,3 +42,92 @@ def fake_poster(url, key, table, rows): assert calls[1][0] == "benchmark_samples" # run_id stamped onto every sample row assert all(r["run_id"] == "run-123" for r in calls[1][1]) + + +def test_offline_fallback_on_post_failure(tmp_path, monkeypatch): + monkeypatch.setenv("SUPABASE_URL", "https://x.supabase.co") + monkeypatch.setenv("SUPABASE_SERVICE_KEY", "secret") + + def failing_poster(url, key, table, rows): + raise OSError("connection refused") + + out = tmp_path / "results.json" + res = store.store_results({"name": "x"}, [{"arm": "control"}], out, + _poster=failing_poster) + assert res["mode"] == "offline-fallback" + assert "connection refused" in res["error"] + assert json.loads(out.read_text())["run"]["name"] == "x" + + +def test_secrets_file_autoload(tmp_path, monkeypatch): + monkeypatch.delenv("SUPABASE_URL", raising=False) + monkeypatch.delenv("SUPABASE_SERVICE_KEY", raising=False) + secrets = tmp_path / "secrets.env" + secrets.write_text( + "# comment\n" + "SUPABASE_URL=https://real.supabase.co\n" + "SUPABASE_SERVICE_KEY='sk-real'\n" + ) + monkeypatch.setattr(store, "SECRETS_PATH", secrets) + assert store._env() == ("https://real.supabase.co", "sk-real") + + +def test_secrets_file_placeholders_ignored(tmp_path, monkeypatch): + monkeypatch.delenv("SUPABASE_URL", raising=False) + monkeypatch.delenv("SUPABASE_SERVICE_KEY", raising=False) + secrets = tmp_path / "secrets.env" + secrets.write_text( + "SUPABASE_URL=https://REPLACE-WITH-PROJECT-REF.supabase.co\n" + "SUPABASE_SERVICE_KEY=REPLACE-WITH-SERVICE-ROLE-KEY\n" + ) + monkeypatch.setattr(store, "SECRETS_PATH", secrets) + assert store._env() == (None, None) + + +def test_env_vars_take_precedence_over_secrets_file(tmp_path, monkeypatch): + monkeypatch.setenv("SUPABASE_URL", "https://env.supabase.co") + monkeypatch.setenv("SUPABASE_SERVICE_KEY", "sk-env") + secrets = tmp_path / "secrets.env" + secrets.write_text( + "SUPABASE_URL=https://file.supabase.co\nSUPABASE_SERVICE_KEY=sk-file\n" + ) + monkeypatch.setattr(store, "SECRETS_PATH", secrets) + assert store._env() == ("https://env.supabase.co", "sk-env") + + +def test_verify_reports_missing_credentials(monkeypatch): + monkeypatch.delenv("SUPABASE_URL", raising=False) + monkeypatch.delenv("SUPABASE_SERVICE_KEY", raising=False) + res = store.verify() + assert res["ok"] is False + assert res["checks"][0][0] == "credentials" + assert res["checks"][0][1] is False + + +def test_verify_probe_roundtrip(monkeypatch): + monkeypatch.setenv("SUPABASE_URL", "https://x.supabase.co") + monkeypatch.setenv("SUPABASE_SERVICE_KEY", "secret") + calls = [] + + def fake_requester(url, key, table, *, method="POST", body=None, query=""): + calls.append((method, table, query)) + if method == "POST" and table == "benchmark_runs": + return [{"id": "probe-1"}] + return [] + + res = store.verify(_requester=fake_requester) + assert res["ok"] is True + assert ("POST", "benchmark_samples", "") in calls + assert ("DELETE", "benchmark_runs", "?id=eq.probe-1") in calls + + +def test_verify_fails_on_insert_error(monkeypatch): + monkeypatch.setenv("SUPABASE_URL", "https://x.supabase.co") + monkeypatch.setenv("SUPABASE_SERVICE_KEY", "secret") + + def fake_requester(url, key, table, *, method="POST", body=None, query=""): + raise OSError("relation benchmark_runs does not exist") + + res = store.verify(_requester=fake_requester) + assert res["ok"] is False + assert any(name == "insert benchmark_runs" and not ok for name, ok, _ in res["checks"])