Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions archie/benchmark/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ detail). In short:
```bash
cp archie/benchmark/secrets.env.example .archie-bench/secrets.env
# edit .archie-bench/secrets.env: real SUPABASE_URL + service_role key
set -a; source .archie-bench/secrets.env; set +a
python3 -m archie.benchmark verify # auto-loads .archie-bench/secrets.env; checks tables + creds
```

This must be filled in **before** you benchmark if you want results in Supabase. If
Expand Down Expand Up @@ -59,7 +59,7 @@ Copy the credentials template and fill it in (the copy lives in gitignored
```bash
cp archie/benchmark/secrets.env.example .archie-bench/secrets.env
# edit .archie-bench/secrets.env: real SUPABASE_URL + service_role key
set -a; source .archie-bench/secrets.env; set +a
python3 -m archie.benchmark verify # auto-loads .archie-bench/secrets.env; checks tables + creds
```

`store.py` reads `SUPABASE_URL` and `SUPABASE_SERVICE_KEY` from the environment.
Expand Down
17 changes: 17 additions & 0 deletions archie/benchmark/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,20 @@ def _interactive_deep_scan(cfg):
sys.exit(1)


def _cmd_verify(args):
from .store import verify
result = verify()
for name, ok, detail in result["checks"]:
mark = "OK " if ok else "FAIL"
print(f" [{mark}] {name}" + (f" — {detail}" if detail else ""))
if result["ok"]:
print("Supabase store ready — benchmark runs will be stored online.")
else:
print("Supabase store NOT ready — runs would fall back to offline JSON.",
file=sys.stderr)
sys.exit(1)


def main(argv=None):
parser = argparse.ArgumentParser(prog="archie-benchmark",
description="Measure Archie effectiveness (control vs treatment).")
Expand All @@ -94,6 +108,9 @@ def main(argv=None):
p_auto.add_argument("--model", default="claude-sonnet-4-6")
p_auto.set_defaults(func=_cmd_auto)

p_verify = sub.add_parser("verify", help="self-test the Supabase store connection")
p_verify.set_defaults(func=_cmd_verify)

args = parser.parse_args(argv)
args.func(args)

Expand Down
5 changes: 5 additions & 0 deletions archie/benchmark/schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ create table if not exists benchmark_samples (

create index if not exists benchmark_samples_run_id_idx on benchmark_samples(run_id);

-- Service-role writes bypass RLS; enabling RLS with no policies keeps
-- anon/authenticated locked out (same posture as the other Archie tables).
alter table benchmark_runs enable row level security;
alter table benchmark_samples enable row level security;

-- Per-run, per-arm rollup the website reads (separate spec).
create or replace view benchmark_summary as
select
Expand Down
9 changes: 7 additions & 2 deletions archie/benchmark/secrets.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,14 @@
# cp archie/benchmark/secrets.env.example .archie-bench/secrets.env
# # then edit .archie-bench/secrets.env with your real URL + service_role key
#
# Load it before a run (exports the vars into the environment):
# No sourcing needed: the harness auto-loads .archie-bench/secrets.env from
# the current working directory when SUPABASE_URL / SUPABASE_SERVICE_KEY are
# not already in the environment. (Manual `set -a; source ...; set +a` still
# works and takes precedence, as do real env vars.)
#
# set -a; source .archie-bench/secrets.env; set +a
# Check the connection any time with:
#
# python3 -m archie.benchmark verify
#
# .archie-bench/ is gitignored — keep real keys there, never commit a filled copy.
#
Expand Down
129 changes: 113 additions & 16 deletions archie/benchmark/store.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,140 @@
# archie/benchmark/store.py
import json
import os
import sys
import urllib.error
import urllib.request
from pathlib import Path

# Default location of the gitignored credentials file (see secrets.env.example).
SECRETS_PATH = Path(".archie-bench/secrets.env")


def _load_secrets(path: Path | None = None):
"""Populate SUPABASE_* env vars from the secrets file when not already set.

Template placeholders (REPLACE-WITH-...) are ignored so an unfilled copy
behaves exactly like a missing file. Never overrides real env vars.
"""
path = path or SECRETS_PATH # module attr resolved at call time (testable)
if not path.is_file():
return
for line in path.read_text().splitlines():
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, _, value = line.partition("=")
key, value = key.strip(), value.strip().strip("'\"")
if key in ("SUPABASE_URL", "SUPABASE_SERVICE_KEY") and value \
and "REPLACE-WITH" not in value and not os.environ.get(key):
os.environ[key] = value


def _env():
_load_secrets()
return os.environ.get("SUPABASE_URL"), os.environ.get("SUPABASE_SERVICE_KEY")


def _post(url, key, table, rows):
data = json.dumps(rows).encode("utf-8")
def _request(url, key, table, *, method="POST", body=None, query=""):
req = urllib.request.Request(
f"{url}/rest/v1/{table}",
data=data,
f"{url}/rest/v1/{table}{query}",
data=json.dumps(body).encode("utf-8") if body is not None else None,
headers={
"apikey": key,
"Authorization": f"Bearer {key}",
"Content-Type": "application/json",
"Prefer": "return=representation",
},
method="POST",
method=method,
)
with urllib.request.urlopen(req, timeout=30) as resp:
return json.loads(resp.read().decode("utf-8"))
raw = resp.read().decode("utf-8")
return json.loads(raw) if raw else None


def _post(url, key, table, rows):
return _request(url, key, table, body=rows)


def _write_offline(run_row, sample_rows, offline_path):
path = Path(offline_path)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps({"run": run_row, "samples": sample_rows}, indent=2))
return str(path)


def store_results(run_row, sample_rows, offline_path, _poster=None):
url, key = _env()
if not url or not key:
path = Path(offline_path)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps({"run": run_row, "samples": sample_rows}, indent=2))
return {"mode": "offline", "path": str(path)}
return {"mode": "offline", "path": _write_offline(run_row, sample_rows, offline_path)}

poster = _poster or _post
created = poster(url, key, "benchmark_runs", [run_row])
run_id = created[0]["id"]
for r in sample_rows:
r["run_id"] = run_id
poster(url, key, "benchmark_samples", sample_rows)
return {"mode": "online", "run_id": run_id}
try:
created = poster(url, key, "benchmark_runs", [run_row])
run_id = created[0]["id"]
for r in sample_rows:
r["run_id"] = run_id
poster(url, key, "benchmark_samples", sample_rows)
return {"mode": "online", "run_id": run_id}
except Exception as e: # never lose results over a storage error
detail = getattr(e, "reason", None) or e
if isinstance(e, urllib.error.HTTPError):
try:
detail = f"HTTP {e.code}: {e.read().decode('utf-8')[:200]}"
except Exception:
detail = f"HTTP {e.code}"
print(f" Supabase write failed ({detail}) — falling back to offline store.",
file=sys.stderr)
return {
"mode": "offline-fallback",
"error": str(detail),
"path": _write_offline(run_row, sample_rows, offline_path),
}


def verify(_requester=None):
"""Connection self-test for `python3 -m archie.benchmark verify`.

Checks credentials, reachability, and that both tables accept a
service-role insert (probe row inserted then deleted). Returns
{"ok": bool, "checks": [(name, ok, detail), ...]}.
"""
requester = _requester or _request
checks = []

url, key = _env()
creds_ok = bool(url and key)
if creds_ok:
detail = url
else:
missing = [n for n, v in (("SUPABASE_URL", url), ("SUPABASE_SERVICE_KEY", key)) if not v]
detail = f"{' + '.join(missing)} not set (env or .archie-bench/secrets.env)"
checks.append(("credentials", creds_ok, detail))
if not creds_ok:
return {"ok": False, "checks": checks}

probe_id = None
try:
created = requester(url, key, "benchmark_runs",
body=[{"name": "verify-probe", "repo_name": "verify"}])
probe_id = created[0]["id"]
checks.append(("insert benchmark_runs", True, probe_id))
except Exception as e:
checks.append(("insert benchmark_runs", False, str(e)))
return {"ok": False, "checks": checks}

try:
requester(url, key, "benchmark_samples",
body=[{"run_id": probe_id, "arm": "control", "repetition": 0}])
checks.append(("insert benchmark_samples", True, ""))
except Exception as e:
checks.append(("insert benchmark_samples", False, str(e)))

try:
# Cascade removes the probe sample with the run.
requester(url, key, "benchmark_runs", method="DELETE", query=f"?id=eq.{probe_id}")
checks.append(("cleanup probe", True, ""))
except Exception as e:
checks.append(("cleanup probe", False, f"delete probe {probe_id} manually: {e}"))

return {"ok": all(ok for _, ok, _ in checks), "checks": checks}
98 changes: 98 additions & 0 deletions tests/benchmark/test_store.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
# tests/benchmark/test_store.py
import json

import pytest

from archie.benchmark import store


@pytest.fixture(autouse=True)
def _isolate_secrets(tmp_path, monkeypatch):
"""Keep tests independent of a developer's real .archie-bench/secrets.env."""
monkeypatch.setattr(store, "SECRETS_PATH", tmp_path / "no-secrets.env")


def test_offline_fallback_when_env_missing(tmp_path, monkeypatch):
monkeypatch.delenv("SUPABASE_URL", raising=False)
monkeypatch.delenv("SUPABASE_SERVICE_KEY", raising=False)
Expand Down Expand Up @@ -33,3 +42,92 @@ def fake_poster(url, key, table, rows):
assert calls[1][0] == "benchmark_samples"
# run_id stamped onto every sample row
assert all(r["run_id"] == "run-123" for r in calls[1][1])


def test_offline_fallback_on_post_failure(tmp_path, monkeypatch):
monkeypatch.setenv("SUPABASE_URL", "https://x.supabase.co")
monkeypatch.setenv("SUPABASE_SERVICE_KEY", "secret")

def failing_poster(url, key, table, rows):
raise OSError("connection refused")

out = tmp_path / "results.json"
res = store.store_results({"name": "x"}, [{"arm": "control"}], out,
_poster=failing_poster)
assert res["mode"] == "offline-fallback"
assert "connection refused" in res["error"]
assert json.loads(out.read_text())["run"]["name"] == "x"


def test_secrets_file_autoload(tmp_path, monkeypatch):
monkeypatch.delenv("SUPABASE_URL", raising=False)
monkeypatch.delenv("SUPABASE_SERVICE_KEY", raising=False)
secrets = tmp_path / "secrets.env"
secrets.write_text(
"# comment\n"
"SUPABASE_URL=https://real.supabase.co\n"
"SUPABASE_SERVICE_KEY='sk-real'\n"
)
monkeypatch.setattr(store, "SECRETS_PATH", secrets)
assert store._env() == ("https://real.supabase.co", "sk-real")


def test_secrets_file_placeholders_ignored(tmp_path, monkeypatch):
monkeypatch.delenv("SUPABASE_URL", raising=False)
monkeypatch.delenv("SUPABASE_SERVICE_KEY", raising=False)
secrets = tmp_path / "secrets.env"
secrets.write_text(
"SUPABASE_URL=https://REPLACE-WITH-PROJECT-REF.supabase.co\n"
"SUPABASE_SERVICE_KEY=REPLACE-WITH-SERVICE-ROLE-KEY\n"
)
monkeypatch.setattr(store, "SECRETS_PATH", secrets)
assert store._env() == (None, None)


def test_env_vars_take_precedence_over_secrets_file(tmp_path, monkeypatch):
monkeypatch.setenv("SUPABASE_URL", "https://env.supabase.co")
monkeypatch.setenv("SUPABASE_SERVICE_KEY", "sk-env")
secrets = tmp_path / "secrets.env"
secrets.write_text(
"SUPABASE_URL=https://file.supabase.co\nSUPABASE_SERVICE_KEY=sk-file\n"
)
monkeypatch.setattr(store, "SECRETS_PATH", secrets)
assert store._env() == ("https://env.supabase.co", "sk-env")


def test_verify_reports_missing_credentials(monkeypatch):
monkeypatch.delenv("SUPABASE_URL", raising=False)
monkeypatch.delenv("SUPABASE_SERVICE_KEY", raising=False)
res = store.verify()
assert res["ok"] is False
assert res["checks"][0][0] == "credentials"
assert res["checks"][0][1] is False


def test_verify_probe_roundtrip(monkeypatch):
monkeypatch.setenv("SUPABASE_URL", "https://x.supabase.co")
monkeypatch.setenv("SUPABASE_SERVICE_KEY", "secret")
calls = []

def fake_requester(url, key, table, *, method="POST", body=None, query=""):
calls.append((method, table, query))
if method == "POST" and table == "benchmark_runs":
return [{"id": "probe-1"}]
return []

res = store.verify(_requester=fake_requester)
assert res["ok"] is True
assert ("POST", "benchmark_samples", "") in calls
assert ("DELETE", "benchmark_runs", "?id=eq.probe-1") in calls


def test_verify_fails_on_insert_error(monkeypatch):
monkeypatch.setenv("SUPABASE_URL", "https://x.supabase.co")
monkeypatch.setenv("SUPABASE_SERVICE_KEY", "secret")

def fake_requester(url, key, table, *, method="POST", body=None, query=""):
raise OSError("relation benchmark_runs does not exist")

res = store.verify(_requester=fake_requester)
assert res["ok"] is False
assert any(name == "insert benchmark_runs" and not ok for name, ok, _ in res["checks"])
Loading