diff --git a/.gitignore b/.gitignore index 7da9a8f6..b991fd35 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,7 @@ tks-trigger-cache.json selector-cache.txt *.cache .uv/ + +# File-backed disk caches (utils/disk_cache.py) +source-cache/ +label-cache/ diff --git a/tests/conftest.py b/tests/conftest.py index 9bdb0a53..5a36116a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,12 +9,13 @@ """ import os +from pathlib import Path import pytest @pytest.fixture(autouse=True) -def _isolate_from_live_apis(monkeypatch: pytest.MonkeyPatch) -> None: +def _isolate_from_live_apis(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: """Block accidental live API/RPC calls and reset cross-test singletons. Strips `ETHERSCAN_TOKEN` and every `PROVIDER_URL_*` so a missing mock @@ -23,11 +24,16 @@ def _isolate_from_live_apis(monkeypatch: pytest.MonkeyPatch) -> None: those code paths opt back in via @patch.dict. Also clears `ChainManager._instances` so a real client object cached by - one test can't leak into the next. + one test can't leak into the next, and points `CACHE_DIR` at a per-test + temp dir so the file-backed disk caches (utils.disk_cache) never litter the + repo and never leak entries between tests. """ for key in list(os.environ): if key == "ETHERSCAN_TOKEN" or key.startswith("PROVIDER_URL_"): monkeypatch.delenv(key, raising=False) + # `cache_path` reads this module global at call time, so the redirect takes + # effect for caches created at import (they resolve their dir lazily). + monkeypatch.setattr("utils.cache.CACHE_DIR", str(tmp_path)) try: from utils.web3_wrapper import ChainManager diff --git a/tests/test_disk_cache.py b/tests/test_disk_cache.py new file mode 100644 index 00000000..b6f35271 --- /dev/null +++ b/tests/test_disk_cache.py @@ -0,0 +1,114 @@ +"""Tests for utils/disk_cache.py. + +These rely on the autouse conftest fixture that redirects CACHE_DIR to a +per-test temp dir, so every DiskCache here writes under an isolated location. +""" + +import os +import unittest +from unittest.mock import patch + +from utils.disk_cache import MISS, DiskCache + + +class TestDiskCacheRoundtrip(unittest.TestCase): + def test_positive_roundtrip(self) -> None: + cache = DiskCache(namespace="rt") + cache.set_positive("k", {"a": 1, "b": ["x", "y"]}) + self.assertEqual(cache.get("k"), {"a": 1, "b": ["x", "y"]}) + + def test_absent_key_returns_miss(self) -> None: + cache = DiskCache(namespace="rt") + self.assertIs(cache.get("nope"), MISS) + + def test_negative_value_none_is_distinct_from_miss(self) -> None: + cache = DiskCache(namespace="neg") + cache.set_negative("k") # stores value None + self.assertIsNone(cache.get("k")) # a cached negative, not MISS + self.assertIs(cache.get("other"), MISS) + + def test_empty_list_negative_roundtrips(self) -> None: + cache = DiskCache(namespace="neg") + cache.set_negative("k", []) + self.assertEqual(cache.get("k"), []) + + def test_clear_removes_entries(self) -> None: + cache = DiskCache(namespace="clr") + cache.set_positive("a", 1) + cache.set_positive("b", 2) + cache.clear() + self.assertIs(cache.get("a"), MISS) + self.assertIs(cache.get("b"), MISS) + + +class TestDiskCacheTTL(unittest.TestCase): + def test_negative_entry_expires(self) -> None: + cache = DiskCache(namespace="ttl", negative_ttl=10) + with patch("utils.disk_cache.time.time") as mock_time: + mock_time.return_value = 1000.0 + cache.set_negative("k") + mock_time.return_value = 1005.0 # within TTL + self.assertIsNone(cache.get("k")) + mock_time.return_value = 1011.0 # past TTL + self.assertIs(cache.get("k"), MISS) + + def test_positive_entry_never_expires(self) -> None: + cache = DiskCache(namespace="ttl") + with patch("utils.disk_cache.time.time") as mock_time: + mock_time.return_value = 1000.0 + cache.set_positive("k", "v") + mock_time.return_value = 1000.0 + 10**9 # far future + self.assertEqual(cache.get("k"), "v") + + +class TestDiskCacheEviction(unittest.TestCase): + def test_evicts_to_max_entries_keeping_newest(self) -> None: + cache = DiskCache(namespace="evict", max_entries=2) + cache.set_positive("a", 1) + cache.set_positive("b", 2) + # Force deterministic mtime ordering (a oldest) regardless of FS resolution. + os.utime(cache._path("a"), (100, 100)) + os.utime(cache._path("b"), (200, 200)) + cache.set_positive("c", 3) # fresh mtime; eviction drops the oldest ("a") + + self.assertIs(cache.get("a"), MISS) # evicted + self.assertEqual(cache.get("b"), 2) + self.assertEqual(cache.get("c"), 3) + + def test_read_refreshes_lru_recency(self) -> None: + # Reading "a" before inserting "c" must keep "a" and evict the unread "b", + # even though "a" was written first. (Guards against FIFO-by-write-time.) + cache = DiskCache(namespace="lru", max_entries=2) + cache.set_positive("a", 1) + cache.set_positive("b", 2) + os.utime(cache._path("a"), (100, 100)) # a oldest-written + os.utime(cache._path("b"), (200, 200)) # b newer + cache.get("a") # LRU touch bumps "a" above the unread "b" + cache.set_positive("c", 3) # over cap → evict least-recently-used ("b") + + self.assertEqual(cache.get("a"), 1) # kept: recently read + self.assertIs(cache.get("b"), MISS) # evicted: never read, oldest use + self.assertEqual(cache.get("c"), 3) + + def test_evicts_to_max_bytes(self) -> None: + big = "x" * 2000 + cache = DiskCache(namespace="bytes", max_bytes=3000) + cache.set_positive("a", big) + os.utime(cache._path("a"), (100, 100)) # mark "a" oldest before "b" triggers eviction + cache.set_positive("b", big) # two ~2KB entries exceed the 3KB cap → "a" dropped + self.assertIs(cache.get("a"), MISS) + self.assertEqual(cache.get("b"), big) + + +class TestDiskCacheResilience(unittest.TestCase): + def test_corrupt_file_is_a_miss(self) -> None: + cache = DiskCache(namespace="corrupt") + cache.set_positive("k", "v") + # Overwrite with garbage. + with open(cache._path("k"), "w") as f: + f.write("{not json") + self.assertIs(cache.get("k"), MISS) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_source_context.py b/tests/test_source_context.py index 1ce552c6..993905ad 100644 --- a/tests/test_source_context.py +++ b/tests/test_source_context.py @@ -191,6 +191,49 @@ def test_caches_per_address(self, mock_fetch: object) -> None: # Two calls — Etherscan should be hit only once self.assertEqual(mock_fetch.call_count, 1) # type: ignore[attr-defined] + @patch.dict("os.environ", {"ETHERSCAN_TOKEN": "test-key"}) + @patch("utils.source_context.fetch_json") + def test_verified_source_persists_across_process_restart(self, mock_fetch: object) -> None: + # A persistent disk cache should serve the same address after the in-memory + # layer is dropped (reset_cache simulates a fresh cron process). + mock_fetch.return_value = { # type: ignore[attr-defined] + "status": "1", + "result": [{"SourceCode": INFINIFI_FARM_SOURCE, "ContractName": "Farm"}], + } + self.assertIsNotNone(get_source_context(1, "0xabc", "setMaxSlippage")) + reset_cache() # clears in-memory only; disk cache survives + ctx = get_source_context(1, "0xabc", "setMaxSlippage") + self.assertIsNotNone(ctx) + self.assertEqual(mock_fetch.call_count, 1) # type: ignore[attr-defined] # served from disk + + @patch.dict("os.environ", {"ETHERSCAN_TOKEN": "test-key"}) + @patch("utils.source_context.fetch_json") + def test_unverified_negative_persists_across_process_restart(self, mock_fetch: object) -> None: + mock_fetch.return_value = { # type: ignore[attr-defined] + "status": "1", + "result": [{"SourceCode": "", "ContractName": ""}], + } + self.assertIsNone(get_source_context(1, "0xabc", "setMaxSlippage")) + reset_cache() + self.assertIsNone(get_source_context(1, "0xabc", "setMaxSlippage")) + self.assertEqual(mock_fetch.call_count, 1) # type: ignore[attr-defined] # negative cached on disk + + @patch.dict("os.environ", {"ETHERSCAN_TOKEN": "test-key"}) + @patch("utils.source_context.fetch_json") + def test_transient_error_is_not_persisted(self, mock_fetch: object) -> None: + # A request failure (fetch_json -> None) must not be cached as "unverified". + mock_fetch.return_value = None # type: ignore[attr-defined] + self.assertIsNone(get_source_context(1, "0xabc", "setMaxSlippage")) + reset_cache() + # Etherscan recovers: a later run should re-fetch and succeed, proving the + # blip was never persisted. + mock_fetch.return_value = { # type: ignore[attr-defined] + "status": "1", + "result": [{"SourceCode": INFINIFI_FARM_SOURCE, "ContractName": "Farm"}], + } + self.assertIsNotNone(get_source_context(1, "0xabc", "setMaxSlippage")) + self.assertEqual(mock_fetch.call_count, 2) # type: ignore[attr-defined] + @patch.dict("os.environ", {"ETHERSCAN_TOKEN": "test-key"}) @patch("utils.proxy.get_current_implementation") @patch("utils.source_context.fetch_json") diff --git a/tests/test_swiss_knife.py b/tests/test_swiss_knife.py index 199b6ca2..1409cd1e 100644 --- a/tests/test_swiss_knife.py +++ b/tests/test_swiss_knife.py @@ -45,6 +45,38 @@ def test_caches_repeat_lookups(self, mock_fetch: object) -> None: fetch_swiss_knife_labels(addr, 1) self.assertEqual(mock_fetch.call_count, 1) # type: ignore[attr-defined] + @patch("utils.swiss_knife.fetch_json") + def test_labels_persist_across_process_restart(self, mock_fetch: object) -> None: + # Disk cache should serve labels after the in-memory layer is dropped. + mock_fetch.return_value = ["Curve.fi: 3pool"] # type: ignore[attr-defined] + addr = "0x" + "d0" * 20 + fetch_swiss_knife_labels(addr, 1) + reset_cache() # clears in-memory only + self.assertEqual(fetch_swiss_knife_labels(addr, 1), ["Curve.fi: 3pool"]) + self.assertEqual(mock_fetch.call_count, 1) # type: ignore[attr-defined] # served from disk + + @patch("utils.swiss_knife.fetch_json") + def test_empty_negative_persists_across_process_restart(self, mock_fetch: object) -> None: + # An unknown address (dict error body = a real 200 response) is cached as + # an empty negative so we don't re-query it every run. + mock_fetch.return_value = {"error": "Error fetching data"} # type: ignore[attr-defined] + addr = "0x" + "e0" * 20 + self.assertEqual(fetch_swiss_knife_labels(addr, 1), []) + reset_cache() + self.assertEqual(fetch_swiss_knife_labels(addr, 1), []) + self.assertEqual(mock_fetch.call_count, 1) # type: ignore[attr-defined] # negative cached on disk + + @patch("utils.swiss_knife.fetch_json") + def test_transient_error_is_not_persisted(self, mock_fetch: object) -> None: + # fetch_json -> None is a network/HTTP failure, not "no labels"; never persist. + mock_fetch.return_value = None # type: ignore[attr-defined] + addr = "0x" + "f0" * 20 + self.assertEqual(fetch_swiss_knife_labels(addr, 1), []) + reset_cache() + mock_fetch.return_value = ["Aave: Pool"] # type: ignore[attr-defined] + self.assertEqual(fetch_swiss_knife_labels(addr, 1), ["Aave: Pool"]) + self.assertEqual(mock_fetch.call_count, 2) # type: ignore[attr-defined] + class TestPickDisplayName(unittest.TestCase): """Sanity-check that we only use Swiss Knife's first label when it looks like a name.""" diff --git a/utils/disk_cache.py b/utils/disk_cache.py new file mode 100644 index 00000000..61414a84 --- /dev/null +++ b/utils/disk_cache.py @@ -0,0 +1,203 @@ +"""File-backed JSON cache with per-entry TTL and LRU eviction. + +Stores one small JSON file per key under ``//``. Built to +lift the previously process-lifetime in-memory caches in +:mod:`utils.source_context` and :mod:`utils.swiss_knife` onto disk now that +monitoring runs on a persistent VPS rather than ephemeral CI runners — the same +verified contract source / address labels were otherwise re-fetched from +Etherscan / Swiss Knife on every cron run. + +Positive (found) entries are written with ``ttl=None`` and live until evicted, +since verified source and curated labels are effectively immutable for a given +address. Negative (miss) entries are written with a short TTL (see +:data:`DEFAULT_NEGATIVE_TTL_SECONDS`) so a contract that gets verified — or an +address that later gains a label — is not cached as missing forever. + +Concurrency: writes go through a temp file + :func:`os.replace` (atomic on +POSIX), so a reader never observes a half-written entry even when the hourly and +multisig cron profiles overlap. Eviction and reads are best-effort: any +filesystem error degrades to a cache miss rather than raising. + +Sizing: each cache is bounded by ``max_entries`` and/or ``max_bytes``. When a +write pushes a namespace over either cap, least-recently-used entries are evicted +until both caps are satisfied. Recency is tracked by file mtime: a write sets it +and a successful :meth:`DiskCache.get` touches it (:func:`os.utime`), so an entry +re-read every cron run is kept even if it was written long ago. TTL is computed +from the stored write time, not mtime, so touching on read never extends a +negative entry's lifetime. +""" + +import json +import os +import time +from typing import Any + +from utils.cache import cache_path +from utils.logging import get_logger + +logger = get_logger("utils.disk_cache") + +# Default time-to-live for negative (miss) entries: 1 day. Overridable via env so +# the cadence can be tuned without a code change. +DEFAULT_NEGATIVE_TTL_SECONDS: float = float(os.getenv("CACHE_NEGATIVE_TTL_SECONDS", "86400")) + +# Sentinel returned by ``DiskCache.get`` when a key is absent or expired. +# Distinct from a stored value of ``None`` (a cached negative entry). +MISS: Any = object() + +_SAFE_CHARS = frozenset("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._-") + + +def _safe_filename(key: str) -> str: + """Map a cache key to a filesystem-safe filename stem. + + Replaces any character outside ``[A-Za-z0-9._-]`` with ``_``. Callers pass + already-safe keys (e.g. ``"1-0xabc…"``), so this is a guard rather than a + collision-resistant hash. + """ + cleaned = "".join(c if c in _SAFE_CHARS else "_" for c in key) + return cleaned or "_" + + +class DiskCache: + """A namespaced, file-backed JSON cache with TTL and size-bounded eviction.""" + + def __init__( + self, + namespace: str, + *, + max_entries: int | None = None, + max_bytes: int | None = None, + negative_ttl: float = DEFAULT_NEGATIVE_TTL_SECONDS, + ) -> None: + """Initialise the cache. + + Args: + namespace: Subdirectory under ``CACHE_DIR`` that holds this cache's files. + max_entries: Evict oldest entries once the file count exceeds this. None disables. + max_bytes: Evict oldest entries once total bytes exceed this. None disables. + negative_ttl: Default TTL (seconds) used by :meth:`set_negative`. + """ + self.namespace = namespace + self.max_entries = max_entries + self.max_bytes = max_bytes + self.negative_ttl = negative_ttl + + def _dir(self) -> str: + # Resolved lazily (not at import) so an env/`CACHE_DIR` change — e.g. the + # tests' temp-dir redirect — is always honoured. + return cache_path(self.namespace) + + def _path(self, key: str) -> str: + return os.path.join(self._dir(), _safe_filename(key) + ".json") + + def get(self, key: str) -> Any: + """Return the cached value for ``key``, or :data:`MISS` if absent/expired. + + A return value of ``None`` is a genuine cached negative, distinct from + ``MISS``. Expired negative entries are removed best-effort on read. + """ + path = self._path(key) + try: + with open(path) as f: + entry = json.load(f) + except (OSError, json.JSONDecodeError): + return MISS + if not isinstance(entry, dict) or "v" not in entry: + return MISS + + ttl = entry.get("ttl") + if ttl is not None and (time.time() - float(entry.get("t", 0))) > float(ttl): + try: + os.remove(path) + except OSError: + pass + return MISS + + # LRU: bump the file mtime so eviction (which sorts by mtime) treats this + # as recently used — an entry re-read every cron run survives even if it + # was written long ago. TTL is unaffected: expiry keys off the JSON "t" + # field, not mtime, so a read never extends a negative entry's lifetime. + # Best-effort; a failed touch only means slightly staler eviction order. + try: + os.utime(path, None) + except OSError: + pass + return entry["v"] + + def set(self, key: str, value: Any, *, ttl: float | None) -> None: + """Write ``value`` under ``key`` with an optional ``ttl`` (seconds). + + ``ttl=None`` never expires. Failures are swallowed (best-effort cache). + """ + directory = self._dir() + path = self._path(key) + tmp = f"{path}.{os.getpid()}.tmp" + try: + os.makedirs(directory, exist_ok=True) + with open(tmp, "w") as f: + json.dump({"v": value, "t": time.time(), "ttl": ttl}, f) + os.replace(tmp, path) + except OSError as e: + logger.debug("disk cache write failed for %s/%s: %s", self.namespace, key, e) + try: + os.remove(tmp) + except OSError: + pass + return + self._evict_if_needed(directory) + + def set_positive(self, key: str, value: Any) -> None: + """Cache a found value that never expires (immutable per key).""" + self.set(key, value, ttl=None) + + def set_negative(self, key: str, value: Any = None) -> None: + """Cache a miss for ``negative_ttl`` seconds so it's retried later.""" + self.set(key, value, ttl=self.negative_ttl) + + def clear(self) -> None: + """Remove every entry in this namespace (best-effort).""" + try: + for entry in os.scandir(self._dir()): + if entry.name.endswith(".json"): + try: + os.remove(entry.path) + except OSError: + pass + except OSError: + pass + + def _evict_if_needed(self, directory: str) -> None: + """Drop least-recently-used (oldest mtime) entries until size caps hold. + + ``get`` refreshes mtime on read, so oldest-mtime is least-recently-used + rather than merely oldest-written. + """ + if self.max_entries is None and self.max_bytes is None: + return + try: + items = [(e.path, e.stat()) for e in os.scandir(directory) if e.name.endswith(".json")] + except OSError: + return + + count = len(items) + total_bytes = sum(st.st_size for _, st in items) + + def over_cap() -> bool: + return (self.max_entries is not None and count > self.max_entries) or ( + self.max_bytes is not None and total_bytes > self.max_bytes + ) + + if not over_cap(): + return + + items.sort(key=lambda item: item[1].st_mtime) # oldest first + for path, st in items: + if not over_cap(): + break + try: + os.remove(path) + count -= 1 + total_bytes -= st.st_size + except OSError: + pass diff --git a/utils/source_context.py b/utils/source_context.py index bbe3cef9..0ad08fd2 100644 --- a/utils/source_context.py +++ b/utils/source_context.py @@ -12,6 +12,7 @@ import re from dataclasses import dataclass +from utils.disk_cache import MISS, DiskCache from utils.http import fetch_json from utils.logging import get_logger @@ -25,11 +26,28 @@ MAX_SNIPPET_CHARS = 4000 # Per-process cache: (chain_id, address_lower) -> (contract_name, source, abi_json_string) -# or None for miss. Workflows are short-lived so a process-lifetime dict is sufficient. +# or None for miss. Backed by an on-disk cache (below) so the same verified source is +# not re-fetched from Etherscan on every cron run; the in-memory dict still serves repeat +# lookups within a single process for free. # The ABI is stored as the raw JSON string from Etherscan and parsed lazily by callers # that need it — keeps the cache small for the common case where only source is read. _source_cache: dict[tuple[int, str], tuple[str, str, str] | None] = {} +# On-disk layer keyed by "chain_id-address". Verified source is immutable per address, so +# positive entries never expire; "unverified" misses get the short negative TTL so a +# contract verified later is picked up. Source can be large (~500KB) — bound the namespace +# by total bytes as well as entry count. All tunable via env. +_source_disk_cache = DiskCache( + namespace="source-cache", + max_entries=int(os.getenv("SOURCE_CACHE_MAX_ENTRIES", "5000")), + max_bytes=int(os.getenv("SOURCE_CACHE_MAX_BYTES", str(256 * 1024 * 1024))), +) + + +def _disk_key(chain_id: int, address: str) -> str: + return f"{chain_id}-{address.lower()}" + + _NATSPEC_LINE = r"(?:[ \t]*///.*\n|[ \t]*\*[^/].*\n|[ \t]*/\*\*[\s\S]*?\*/[ \t]*\n)" _NATSPEC_BLOCK = rf"(?:(?:{_NATSPEC_LINE})+)?" @@ -83,6 +101,19 @@ def _fetch_etherscan_contract(chain_id: int, address: str) -> tuple[str, str, st if cache_key in _source_cache: return _source_cache[cache_key] + disk_key = _disk_key(chain_id, address) + disk_val = _source_disk_cache.get(disk_key) + if disk_val is not MISS: + if disk_val is None: + # Cached negative: a prior run saw this contract unverified. + _source_cache[cache_key] = None + return None + if isinstance(disk_val, (list, tuple)) and len(disk_val) == 3: + record = (disk_val[0], disk_val[1], disk_val[2]) + _source_cache[cache_key] = record + return record + # Unexpected shape — fall through to a live fetch. + params = { "chainid": str(chain_id), "module": "contract", @@ -91,12 +122,18 @@ def _fetch_etherscan_contract(chain_id: int, address: str) -> tuple[str, str, st "apikey": api_key, } data = fetch_json(ETHERSCAN_V2_API_URL, params=params) - results = (data or {}).get("result") or [] if (data or {}).get("status") == "1" else [] + # A clean response is status "1" with a result array; anything else (None on a + # request error, or status "0") is treated as transient and not persisted, so an + # Etherscan blip can't poison the disk cache as a day-long "unverified". + status_ok = data is not None and data.get("status") == "1" + results = (data or {}).get("result") or [] if status_ok else [] entry = results[0] if results else {} raw_source = entry.get("SourceCode") or "" if not raw_source: _source_cache[cache_key] = None + if status_ok: + _source_disk_cache.set_negative(disk_key) return None result = ( @@ -105,6 +142,7 @@ def _fetch_etherscan_contract(chain_id: int, address: str) -> tuple[str, str, st entry.get("ABI") or "", ) _source_cache[cache_key] = result + _source_disk_cache.set_positive(disk_key, list(result)) return result diff --git a/utils/swiss_knife.py b/utils/swiss_knife.py index c18f16b1..089b3531 100644 --- a/utils/swiss_knife.py +++ b/utils/swiss_knife.py @@ -8,6 +8,9 @@ of a bare address. """ +import os + +from utils.disk_cache import DiskCache from utils.http import fetch_json from utils.logging import get_logger @@ -17,8 +20,22 @@ _REQUEST_TIMEOUT_S = 5 # Per-process cache: (chain_id, address_lower) -> labels (possibly empty). +# Backed by the on-disk cache below so labels survive across cron runs. _label_cache: dict[tuple[int, str], list[str]] = {} +# On-disk layer keyed by "chain_id-address". Labels are tiny and effectively stable, so a +# found label never expires; an empty result (no curated label) gets the short negative TTL +# so an address that later gains a label is picked up. Entry-count bounded (no byte cap +# needed — each entry is well under a KB). Tunable via env. +_label_disk_cache = DiskCache( + namespace="label-cache", + max_entries=int(os.getenv("LABEL_CACHE_MAX_ENTRIES", "50000")), +) + + +def _disk_key(chain_id: int, address: str) -> str: + return f"{chain_id}-{address.lower()}" + def fetch_swiss_knife_labels(address: str, chain_id: int) -> list[str]: """Return Swiss Knife labels for ``address`` on ``chain_id`` (empty on miss). @@ -35,16 +52,33 @@ def fetch_swiss_knife_labels(address: str, chain_id: int) -> list[str]: if cached is not None: return cached + disk_key = _disk_key(chain_id, address) + disk_val = _label_disk_cache.get(disk_key) + if isinstance(disk_val, list): + cached_labels = [s for s in disk_val if isinstance(s, str) and s] + _label_cache[cache_key] = cached_labels + return cached_labels + url = f"{_API_URL}/{address}" data = fetch_json(url, params={"chainId": chain_id}, timeout=_REQUEST_TIMEOUT_S) # Swiss Knife returns a JSON array directly. fetch_json's type hint is - # `dict | None` but it returns whatever `resp.json()` parses to. + # `dict | None` but it returns whatever `resp.json()` parses to. A non-None + # body is a real 200 response — either a label array or a `{"error": ...}` + # dict for unknown addresses, both of which we persist (the dict as an empty + # negative). `None` means an HTTP/network error, which we leave unpersisted so + # a transient blip is not cached as "no labels". + got_response = data is not None labels: list[str] = [] if isinstance(data, list): labels = [s for s in data if isinstance(s, str) and s] _label_cache[cache_key] = labels + if got_response: + if labels: + _label_disk_cache.set_positive(disk_key, labels) + else: + _label_disk_cache.set_negative(disk_key, []) return labels