From 4c9e843aa6c5b7dafed114b68e1a01c5868749de Mon Sep 17 00:00:00 2001 From: Nikita Kovalev Date: Mon, 22 Jun 2026 14:23:57 +0200 Subject: [PATCH 1/8] Use the SDK version as User-Agent in calls to Connect Backend --- supertab_connect/customer/token.py | 2 ++ supertab_connect/merchant/jwks.py | 3 ++- tests/customer/test_tokens.py | 8 ++++++++ tests/merchant/test_jwks.py | 13 +++++++++++++ 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/supertab_connect/customer/token.py b/supertab_connect/customer/token.py index 2776cb9..a4a65ff 100644 --- a/supertab_connect/customer/token.py +++ b/supertab_connect/customer/token.py @@ -15,6 +15,7 @@ from cryptography.hazmat.primitives.asymmetric import ec, rsa from cryptography.hazmat.primitives.serialization import load_pem_private_key +from supertab_connect._version import _get_sdk_user_agent from supertab_connect.common import debug_log, error_log from supertab_connect.exceptions import SupertabConnectError from supertab_connect.customer.content_matcher import _find_best_matching_content @@ -96,6 +97,7 @@ def _evict_expired_license_xml() -> None: def _create_async_client(**kwargs: Any) -> httpx.AsyncClient: kwargs.setdefault("follow_redirects", True) kwargs.setdefault("timeout", httpx.Timeout(_DEFAULT_HTTP_TIMEOUT_SECONDS)) + kwargs.setdefault("headers", {"User-Agent": _get_sdk_user_agent()}) return httpx.AsyncClient(**kwargs) diff --git a/supertab_connect/merchant/jwks.py b/supertab_connect/merchant/jwks.py index 1115aa6..dff9563 100644 --- a/supertab_connect/merchant/jwks.py +++ b/supertab_connect/merchant/jwks.py @@ -5,6 +5,7 @@ import httpx +from supertab_connect._version import _get_sdk_user_agent from supertab_connect.common import debug_log, error_log from supertab_connect.exceptions import JwksKeyNotFoundError @@ -17,7 +18,7 @@ def _get_http_client() -> httpx.AsyncClient: global _http_client if _http_client is None or _http_client.is_closed: - _http_client = httpx.AsyncClient() + _http_client = httpx.AsyncClient(headers={"User-Agent": _get_sdk_user_agent()}) return _http_client diff --git a/tests/customer/test_tokens.py b/tests/customer/test_tokens.py index ff4ceaf..8309eef 100644 --- a/tests/customer/test_tokens.py +++ b/tests/customer/test_tokens.py @@ -34,6 +34,14 @@ def create_mock_client(**kwargs: Any) -> httpx.AsyncClient: ) +async def test_create_async_client_sets_sdk_user_agent() -> None: + """The customer HTTP client (license.xml + token calls) carries the SDK User-Agent.""" + from supertab_connect._version import _get_sdk_user_agent + + async with _create_async_client() as client: + assert client.headers["User-Agent"] == _get_sdk_user_agent() + + def test_obtain_license_token_fetches_and_caches_token( monkeypatch: pytest.MonkeyPatch, ) -> None: diff --git a/tests/merchant/test_jwks.py b/tests/merchant/test_jwks.py index 705dcd8..b442632 100644 --- a/tests/merchant/test_jwks.py +++ b/tests/merchant/test_jwks.py @@ -30,6 +30,19 @@ async def test_fetch_platform_jwks(jwks_response): assert route.call_count == 1 +async def test_fetch_platform_jwks_sends_sdk_user_agent(jwks_response): + """JWKS requests carry the SDK User-Agent header.""" + from supertab_connect._version import _get_sdk_user_agent + + with respx.mock: + route = respx.get(JWKS_URL).respond(json=jwks_response) + + clear_jwks_cache() + await fetch_platform_jwks(SUPERTAB_BASE_URL) + + assert route.calls[0].request.headers["User-Agent"] == _get_sdk_user_agent() + + async def test_fetch_platform_jwks_caches_result(jwks_response): """Second call returns cached JWKS without a network request.""" with respx.mock: From 076dd2a8bab23f9de01cc65e8da71f2df40e8342 Mon Sep 17 00:00:00 2001 From: Nikita Kovalev Date: Mon, 22 Jun 2026 17:53:20 +0200 Subject: [PATCH 2/8] Update enforcement mode enum --- examples/merchant_handle_request.py | 2 +- examples/merchant_verify_and_record_event.py | 2 +- supertab_connect/merchant/client.py | 4 ++-- supertab_connect/types.py | 6 +++--- tests/merchant/test_client.py | 16 ++++++++-------- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/examples/merchant_handle_request.py b/examples/merchant_handle_request.py index f3708de..1f8d7d8 100644 --- a/examples/merchant_handle_request.py +++ b/examples/merchant_handle_request.py @@ -16,7 +16,7 @@ async def main() -> None: client = SupertabConnect( SupertabConnectConfig( api_key="your_api_key", - enforcement=EnforcementMode.STRICT, + enforcement=EnforcementMode.ENFORCE, debug=True, ) ) diff --git a/examples/merchant_verify_and_record_event.py b/examples/merchant_verify_and_record_event.py index 2d727a2..7dc91c4 100644 --- a/examples/merchant_verify_and_record_event.py +++ b/examples/merchant_verify_and_record_event.py @@ -14,7 +14,7 @@ async def main() -> None: client = SupertabConnect( SupertabConnectConfig( api_key="your_api_key", - enforcement=EnforcementMode.SOFT, + enforcement=EnforcementMode.OBSERVE, debug=True, ) ) diff --git a/supertab_connect/merchant/client.py b/supertab_connect/merchant/client.py index 0e9f91a..504f276 100644 --- a/supertab_connect/merchant/client.py +++ b/supertab_connect/merchant/client.py @@ -171,12 +171,12 @@ async def handle_request(self, request: Request) -> HandlerResult: if not self._detect_bot(request): return {"action": HandlerAction.ALLOW} - if self.enforcement is EnforcementMode.STRICT: + if self.enforcement is EnforcementMode.ENFORCE: return build_block_result( reason=LicenseTokenInvalidReason.MISSING_TOKEN, error="Authorization header missing or malformed", request_url=url, ) - if self.enforcement is EnforcementMode.SOFT: + if self.enforcement is EnforcementMode.OBSERVE: return build_signal_result(url) return {"action": HandlerAction.ALLOW} diff --git a/supertab_connect/types.py b/supertab_connect/types.py index 54a0bf4..7a32994 100644 --- a/supertab_connect/types.py +++ b/supertab_connect/types.py @@ -10,8 +10,8 @@ class EnforcementMode(StrEnum): DISABLED = "disabled" - SOFT = "soft" - STRICT = "strict" + OBSERVE = "observe" + ENFORCE = "enforce" class LicenseTokenInvalidReason(StrEnum): @@ -46,7 +46,7 @@ class UsageType(StrEnum): @dataclass(frozen=True) class SupertabConnectConfig: api_key: str - enforcement: EnforcementMode = EnforcementMode.SOFT + enforcement: EnforcementMode = EnforcementMode.OBSERVE supertab_base_url: str | None = None bot_detector: BotDetector | None = None debug: bool = False diff --git a/tests/merchant/test_client.py b/tests/merchant/test_client.py index 9832de6..3868db2 100644 --- a/tests/merchant/test_client.py +++ b/tests/merchant/test_client.py @@ -33,13 +33,13 @@ def _reset_supertab_connect_singleton(): def test_supertab_connect_returns_existing_instance_for_same_api_key(): - first = SupertabConnect(SupertabConnectConfig(api_key="sk_test_123", enforcement=EnforcementMode.STRICT)) + first = SupertabConnect(SupertabConnectConfig(api_key="sk_test_123", enforcement=EnforcementMode.ENFORCE)) second = SupertabConnect( - SupertabConnectConfig(api_key="sk_test_123", enforcement=EnforcementMode.SOFT, debug=True) + SupertabConnectConfig(api_key="sk_test_123", enforcement=EnforcementMode.OBSERVE, debug=True) ) assert first is second - assert second.enforcement is EnforcementMode.STRICT + assert second.enforcement is EnforcementMode.ENFORCE assert second.debug is False @@ -154,7 +154,7 @@ async def stub_verify_and_record_event(**kwargs): monkeypatch.setattr("supertab_connect.merchant.client.verify_and_record_event", stub_verify_and_record_event) - client = SupertabConnect(SupertabConnectConfig(api_key="sk_test_123", enforcement=EnforcementMode.STRICT)) + client = SupertabConnect(SupertabConnectConfig(api_key="sk_test_123", enforcement=EnforcementMode.ENFORCE)) result = await client.handle_request( _make_request( { @@ -232,7 +232,7 @@ async def close_jwks(): async def test_handle_request_allows_missing_token_without_bot_detector(): - client = SupertabConnect(SupertabConnectConfig(api_key="sk_test_123", enforcement=EnforcementMode.STRICT)) + client = SupertabConnect(SupertabConnectConfig(api_key="sk_test_123", enforcement=EnforcementMode.ENFORCE)) result = await client.handle_request(_make_request({"User-Agent": "Browser/1.0"})) @@ -243,7 +243,7 @@ async def test_handle_request_allows_missing_token_for_non_bot(): client = SupertabConnect( SupertabConnectConfig( api_key="sk_test_123", - enforcement=EnforcementMode.STRICT, + enforcement=EnforcementMode.ENFORCE, bot_detector=lambda request: False, ) ) @@ -257,7 +257,7 @@ async def test_handle_request_blocks_bot_in_strict_mode(): client = SupertabConnect( SupertabConnectConfig( api_key="sk_test_123", - enforcement=EnforcementMode.STRICT, + enforcement=EnforcementMode.ENFORCE, bot_detector=lambda request: True, ) ) @@ -273,7 +273,7 @@ async def test_handle_request_signals_bot_in_soft_mode(): client = SupertabConnect( SupertabConnectConfig( api_key="sk_test_123", - enforcement=EnforcementMode.SOFT, + enforcement=EnforcementMode.OBSERVE, bot_detector=lambda request: True, ) ) From 653d4ac17209f3eceb319fd3ef1b5590c6616dea Mon Sep 17 00:00:00 2001 From: Nikita Kovalev Date: Wed, 24 Jun 2026 12:48:47 +0200 Subject: [PATCH 3/8] First alignment with PR #31 from TS SDK This is not yet including the changes merged to it from #33 and #34 --- README.md | 56 +++++- supertab_connect/__init__.py | 5 + supertab_connect/analytics/__init__.py | 41 ++++ .../analytics/build_analytics_event.py | 83 ++++++++ supertab_connect/analytics/ip.py | 27 +++ supertab_connect/analytics/transport.py | 103 ++++++++++ supertab_connect/analytics/types.py | 86 ++++++++ supertab_connect/merchant/client.py | 119 ++++++++++- supertab_connect/types.py | 25 ++- tests/analytics/__init__.py | 0 tests/analytics/conftest.py | 13 ++ tests/analytics/test_build_analytics_event.py | 163 +++++++++++++++ tests/analytics/test_ip.py | 28 +++ tests/analytics/test_transport.py | 132 ++++++++++++ tests/merchant/test_client_analytics.py | 190 ++++++++++++++++++ 15 files changed, 1067 insertions(+), 4 deletions(-) create mode 100644 supertab_connect/analytics/__init__.py create mode 100644 supertab_connect/analytics/build_analytics_event.py create mode 100644 supertab_connect/analytics/ip.py create mode 100644 supertab_connect/analytics/transport.py create mode 100644 supertab_connect/analytics/types.py create mode 100644 tests/analytics/__init__.py create mode 100644 tests/analytics/conftest.py create mode 100644 tests/analytics/test_build_analytics_event.py create mode 100644 tests/analytics/test_ip.py create mode 100644 tests/analytics/test_transport.py create mode 100644 tests/merchant/test_client_analytics.py diff --git a/README.md b/README.md index 2163465..e049e41 100644 --- a/README.md +++ b/README.md @@ -85,8 +85,60 @@ asyncio.run(main()) ``` For request-level enforcement, use `SupertabConnect.handle_request()` with an -`httpx.Request`. See the `examples` directory for complete merchant and customer -examples. +`httpx.Request`. It extracts the license token from the `Authorization` header, +verifies it, optionally emits a relay analytics event, and applies bot detection +and enforcement mode when no token is present. It returns either +`{"action": HandlerAction.ALLOW, ...}` or +`{"action": HandlerAction.BLOCK, "status": ..., "body": ..., "headers": ...}`. + +`handle_request()` accepts an optional second argument, a `HandleRequestContext`, +which carries per-request signals supplied by an upstream CDN/proxy +(`source_cdn`, `client_ip`, `request_id`, `request_country`, `request_asn`, +`tls_fingerprint`). These are recorded on the analytics event when present; for +direct SDK use the context can be omitted. + +See the `examples` directory for complete merchant and customer examples. + +## Analytics + +The SDK can emit one analytics event per request to the Supertab Connect +**relay** endpoint at `{base_url}/ingest/events`. This is **off by default** — +enable it by passing `analytics_enabled=True`: + +```python +from supertab_connect import SupertabConnect, SupertabConnectConfig + +client = SupertabConnect( + SupertabConnectConfig( + api_key="stc_live_your_api_key", + analytics_enabled=True, + ) +) +``` + +**No extra credentials are required.** Analytics requests are authenticated with +your configured merchant `api_key` using `Authorization: Bearer `. The +backend derives merchant identity from the API key, so the SDK sends **no +merchant identifier** in the analytics payload. + +Each `AnalyticsEvent` captures the request id, source CDN, a normalized client +IP, the request path (with percent-encoding preserved), method, and selected +headers — plus, when an upstream CDN exposes them via `HandleRequestContext`, the +request country, ASN, TLS fingerprint, and HTTP Message Signature headers — along +with the verification/enforcement decision for the request. + +**Fail-open:** analytics emission is fire-and-forget and can never block, slow, +or alter request handling. If emission fails, the error is swallowed and the +request proceeds exactly as it would with analytics disabled. Analytics is sent +only to the relay at `/ingest/events`, independent of billing event recording. + +Point analytics at another environment by setting `supertab_base_url` on the +config (or `SupertabConnect.set_base_url(...)`). + +For advanced use, the `AnalyticsTransport` protocol lets you inject a custom +transport (for example, an in-memory recorder in tests) via the internal +`analytics_transport` config field; `AnalyticsEvent` and `HandleRequestContext` +are exported from the package root. ## Error Handling diff --git a/supertab_connect/__init__.py b/supertab_connect/__init__.py index 6c12aca..2789e67 100644 --- a/supertab_connect/__init__.py +++ b/supertab_connect/__init__.py @@ -1,5 +1,6 @@ """Supertab Connect SDK.""" +from supertab_connect.analytics.types import AnalyticsEvent, AnalyticsTransport from supertab_connect.customer.token import obtain_license_token from supertab_connect.exceptions import SupertabConnectError from supertab_connect.merchant.bots import default_bot_detector @@ -7,6 +8,7 @@ from supertab_connect.merchant.license import verify_license_token from supertab_connect.types import ( EnforcementMode, + HandleRequestContext, HandlerAction, HandlerResult, RSLVerificationResult, @@ -15,7 +17,10 @@ ) __all__ = [ + "AnalyticsEvent", + "AnalyticsTransport", "EnforcementMode", + "HandleRequestContext", "HandlerAction", "HandlerResult", "RSLVerificationResult", diff --git a/supertab_connect/analytics/__init__.py b/supertab_connect/analytics/__init__.py new file mode 100644 index 0000000..3d20233 --- /dev/null +++ b/supertab_connect/analytics/__init__.py @@ -0,0 +1,41 @@ +"""Relay analytics for Supertab Connect (mirrors the TS SDK `analytics/` module).""" + +from supertab_connect.analytics.build_analytics_event import ( + BuildAnalyticsEventContext, + build_analytics_event, +) +from supertab_connect.analytics.ip import normalize_client_ip +from supertab_connect.analytics.transport import ( + ANALYTICS_EVENTS_PATH, + HttpAnalyticsTransport, + NoopAnalyticsTransport, + aclose_http_client as aclose_analytics_http_client, +) +from supertab_connect.analytics.types import ( + SCHEMA_VERSION, + TOKEN_OUTCOME_BY_REASON, + AnalyticsEvent, + AnalyticsTransport, + Decision, + FinalAction, + SourceCdn, + TokenOutcome, +) + +__all__ = [ + "ANALYTICS_EVENTS_PATH", + "SCHEMA_VERSION", + "TOKEN_OUTCOME_BY_REASON", + "AnalyticsEvent", + "AnalyticsTransport", + "BuildAnalyticsEventContext", + "Decision", + "FinalAction", + "HttpAnalyticsTransport", + "NoopAnalyticsTransport", + "SourceCdn", + "TokenOutcome", + "aclose_analytics_http_client", + "build_analytics_event", + "normalize_client_ip", +] diff --git a/supertab_connect/analytics/build_analytics_event.py b/supertab_connect/analytics/build_analytics_event.py new file mode 100644 index 0000000..b922e2c --- /dev/null +++ b/supertab_connect/analytics/build_analytics_event.py @@ -0,0 +1,83 @@ +"""Build a relay AnalyticsEvent from a request + decision (mirrors TS `buildAnalyticsEvent.ts`).""" + +import uuid +from dataclasses import dataclass +from datetime import datetime, timezone + +from httpx import Request + +from supertab_connect.analytics.ip import normalize_client_ip +from supertab_connect.analytics.types import ( + SCHEMA_VERSION, + AnalyticsEvent, + Decision, + EnforcementWire, + SourceCdn, +) +from supertab_connect.types import EnforcementMode + + +@dataclass(frozen=True) +class BuildAnalyticsEventContext: + # Omitted (None) when the request did not pass through a CDN (e.g. invoked directly via the SDK). + source_cdn: SourceCdn | None = None + request_id: str | None = None + client_ip: str | None = None + timestamp: datetime | None = None + request_country: str | None = None + request_asn: int | None = None + tls_fingerprint: str | None = None + + +def _iso_utc(value: datetime) -> str: + """Format as ``YYYY-MM-DDTHH:MM:SS.mmmZ`` to match the TS `Date.toISOString()` wire form.""" + return value.astimezone(timezone.utc).isoformat(timespec="milliseconds").replace("+00:00", "Z") + + +def _safe_pathname(request: Request) -> str: + """Return the request path with percent-encoding preserved. + + ``request.url.path`` percent-*decodes* (``/a%2Fb`` → ``/a/b``), which loses encoded path + semantics. We read ``raw_path`` (``path[?query]`` bytes), drop the query, and decode without + URL-decoding — matching the TS SDK's ``new URL(request.url).pathname``. + """ + path_bytes = request.url.raw_path.split(b"?", 1)[0] + return path_bytes.decode("utf-8", "replace") + + +def _enforcement_to_wire(mode: EnforcementMode) -> EnforcementWire: + # EnforcementMode values are already the wire strings ("observe"/"enforce"/"disabled"). + return mode.value # type: ignore[return-value] + + +def build_analytics_event( + request: Request, + decision: Decision, + context: BuildAnalyticsEventContext, +) -> AnalyticsEvent: + headers = request.headers + timestamp = context.timestamp if context.timestamp is not None else datetime.now(timezone.utc) + request_id = context.request_id if context.request_id is not None else str(uuid.uuid4()) + + return AnalyticsEvent( + timestamp=_iso_utc(timestamp), + request_id=request_id, + schema_version=SCHEMA_VERSION, + source_cdn=context.source_cdn, + user_agent=headers.get("user-agent", ""), + client_ip=normalize_client_ip(context.client_ip), + path=_safe_pathname(request), + method=request.method, + referer=headers.get("referer", ""), + accept_language=headers.get("accept-language", ""), + request_country=context.request_country, + request_asn=context.request_asn, + tls_fingerprint=context.tls_fingerprint, + has_token=decision.has_token, + token_outcome=decision.token_outcome, + final_action=decision.final_action, + enforcement_mode=_enforcement_to_wire(decision.enforcement_mode), + signature_agent=headers.get("signature-agent"), + signature_input=headers.get("signature-input"), + signature=headers.get("signature"), + ) diff --git a/supertab_connect/analytics/ip.py b/supertab_connect/analytics/ip.py new file mode 100644 index 0000000..417f2c3 --- /dev/null +++ b/supertab_connect/analytics/ip.py @@ -0,0 +1,27 @@ +"""Client-IP normalization (mirrors TS `analytics/ip.ts`). + +IPv4 addresses are mapped to their IPv6-mapped form (``::ffff:``); valid IPv6 +addresses pass through unchanged; anything else collapses to the unspecified address. +""" + +import ipaddress + +UNSPECIFIED = "::" + + +def normalize_client_ip(raw: str | None) -> str: + if not raw: + return UNSPECIFIED + trimmed = raw.strip() + if not trimmed: + return UNSPECIFIED + + try: + parsed = ipaddress.ip_address(trimmed) + except ValueError: + return UNSPECIFIED + + if parsed.version == 4: + return f"::ffff:{trimmed}" + # IPv6 passes through unchanged (the original textual form, not a re-compressed one). + return trimmed diff --git a/supertab_connect/analytics/transport.py b/supertab_connect/analytics/transport.py new file mode 100644 index 0000000..15b8a05 --- /dev/null +++ b/supertab_connect/analytics/transport.py @@ -0,0 +1,103 @@ +"""Analytics transports (mirrors TS `analytics/transport.ts`). + +The HTTP transport is fire-and-forget: ``emit`` schedules the POST on the running +event loop and returns immediately, never blocking the request path or raising. +""" + +import asyncio +from dataclasses import asdict + +import httpx + +from supertab_connect._version import _get_sdk_user_agent +from supertab_connect.analytics.types import AnalyticsEvent, AnalyticsTransport +from supertab_connect.common import debug_log, error_log + +ANALYTICS_EVENTS_PATH = "/ingest/events" + +# Hold strong references to in-flight emit tasks so they are not garbage-collected +# before they finish (asyncio only keeps weak references to scheduled tasks). +_background_tasks: set[asyncio.Task] = set() + +_http_client: httpx.AsyncClient | None = None + + +def _get_http_client() -> httpx.AsyncClient: + global _http_client + if _http_client is None or _http_client.is_closed: + _http_client = httpx.AsyncClient(headers={"User-Agent": _get_sdk_user_agent()}) + return _http_client + + +async def aclose_http_client() -> None: + global _http_client + if _http_client is not None and not _http_client.is_closed: + await _http_client.aclose() + _http_client = None + + +class NoopAnalyticsTransport: + """A transport that discards every event. Used when analytics is disabled.""" + + def emit(self, event: AnalyticsEvent) -> None: + # intentional no-op + return None + + +class HttpAnalyticsTransport: + """Posts events to the Supertab Connect relay, fire-and-forget.""" + + def __init__(self, *, url: str, api_key: str, debug: bool = False) -> None: + self._url = url + self._api_key = api_key + self._debug = debug + + def emit(self, event: AnalyticsEvent) -> None: + try: + loop = asyncio.get_running_loop() + except RuntimeError: + # No running event loop to schedule onto; analytics is best-effort, so skip. + debug_log(self._debug, "Skipping analytics emit: no running event loop") + return None + + task = loop.create_task(self._send(event)) + _background_tasks.add(task) + task.add_done_callback(self._on_task_done) + return None + + def _on_task_done(self, task: asyncio.Task) -> None: + # Backstop: drop the reference and retrieve any exception so it never surfaces as an + # "exception was never retrieved" warning, even if _send's own guard is somehow bypassed. + _background_tasks.discard(task) + if task.cancelled(): + return + error = task.exception() + if error is not None: + error_log(self._debug, f"analytics emit task error: {error}") + + async def _send(self, event: AnalyticsEvent) -> None: + # Fail-open: analytics must never block, slow, or alter request handling, so every error + # (transport, serialization, anything) is swallowed here rather than propagating. + try: + response = await _get_http_client().post( + self._url, + json=asdict(event), + headers={ + "Authorization": f"Bearer {self._api_key}", + "Content-Type": "application/json", + }, + ) + if not response.is_success: + debug_log(self._debug, f"analytics emit failed: {response.status_code}") + except Exception as error: # noqa: BLE001 — fail-open guarantee, see comment above + error_log(self._debug, f"analytics emit error: {error}") + + +# Re-exported so callers can rely on structural typing without importing from `types`. +__all__ = [ + "ANALYTICS_EVENTS_PATH", + "AnalyticsTransport", + "HttpAnalyticsTransport", + "NoopAnalyticsTransport", + "aclose_http_client", +] diff --git a/supertab_connect/analytics/types.py b/supertab_connect/analytics/types.py new file mode 100644 index 0000000..0ab431e --- /dev/null +++ b/supertab_connect/analytics/types.py @@ -0,0 +1,86 @@ +"""Analytics event schema and transport protocol (mirrors TS `analytics/types.ts`).""" + +from dataclasses import dataclass +from typing import Literal, Protocol, runtime_checkable + +from supertab_connect.types import EnforcementMode, LicenseTokenInvalidReason + +SCHEMA_VERSION = 1 + +SourceCdn = Literal["cloudflare", "fastly", "cloudfront"] + +TokenOutcome = Literal[ + "absent", + "valid", + "expired", + "invalid_signature", + "invalid_audience", + "invalid_resource", + "invalid_issuer", + "malformed", + "server_error", + "not_validated", +] + +FinalAction = Literal["allow", "observe", "block"] + +EnforcementWire = Literal["observe", "enforce", "disabled"] + + +@dataclass(frozen=True) +class Decision: + has_token: bool + token_outcome: TokenOutcome + final_action: FinalAction + enforcement_mode: EnforcementMode + + +@dataclass(frozen=True) +class AnalyticsEvent: + timestamp: str + request_id: str + schema_version: int + # None when the request did not pass through a CDN (e.g. invoked directly via the SDK). + source_cdn: SourceCdn | None + + user_agent: str + client_ip: str + path: str + method: str + referer: str + accept_language: str + + # Classification signals — supplied by the CDN layer (platform-specific). None when not exposed. + request_country: str | None + request_asn: int | None + tls_fingerprint: str | None + + has_token: bool + token_outcome: TokenOutcome + final_action: FinalAction + enforcement_mode: EnforcementWire + + # HTTP Message Signature headers — platform-agnostic, read directly from request headers. + signature_agent: str | None + signature_input: str | None + signature: str | None + + +@runtime_checkable +class AnalyticsTransport(Protocol): + def emit(self, event: AnalyticsEvent) -> None: + """Emit an analytics event. Implementations must never block the request path or raise.""" + ... + + +TOKEN_OUTCOME_BY_REASON: dict[LicenseTokenInvalidReason, TokenOutcome] = { + LicenseTokenInvalidReason.MISSING_TOKEN: "absent", + LicenseTokenInvalidReason.EXPIRED: "expired", + LicenseTokenInvalidReason.SIGNATURE_VERIFICATION_FAILED: "invalid_signature", + LicenseTokenInvalidReason.INVALID_AUDIENCE: "invalid_audience", + LicenseTokenInvalidReason.INVALID_ISSUER: "invalid_issuer", + LicenseTokenInvalidReason.INVALID_HEADER: "malformed", + LicenseTokenInvalidReason.INVALID_PAYLOAD: "malformed", + LicenseTokenInvalidReason.INVALID_ALG: "malformed", + LicenseTokenInvalidReason.SERVER_ERROR: "server_error", +} diff --git a/supertab_connect/merchant/client.py b/supertab_connect/merchant/client.py index 504f276..4df6526 100644 --- a/supertab_connect/merchant/client.py +++ b/supertab_connect/merchant/client.py @@ -5,6 +5,24 @@ from httpx import Request +from supertab_connect.analytics.build_analytics_event import ( + BuildAnalyticsEventContext, + build_analytics_event, +) +from supertab_connect.analytics.transport import ( + ANALYTICS_EVENTS_PATH, + HttpAnalyticsTransport, + NoopAnalyticsTransport, +) +from supertab_connect.analytics.transport import aclose_http_client as aclose_analytics_http_client +from supertab_connect.analytics.types import ( + TOKEN_OUTCOME_BY_REASON, + AnalyticsTransport, + Decision, + FinalAction, + TokenOutcome, +) +from supertab_connect.common import error_log from supertab_connect.merchant.events import aclose_http_client as aclose_events_http_client from supertab_connect.merchant.license import ( build_block_result, @@ -16,6 +34,7 @@ from supertab_connect.types import ( BotDetector, EnforcementMode, + HandleRequestContext, HandlerAction, HandlerResult, InvalidLicenseToken, @@ -57,9 +76,21 @@ def __init__(self, config: SupertabConnectConfig, reset: bool = False) -> None: self.bot_detector = config.bot_detector self.debug = config.debug self._base_url_override = config.supertab_base_url + self._analytics_transport = self._build_analytics_transport(config) self._initialized = True type(self)._instance = self + def _build_analytics_transport(self, config: SupertabConnectConfig) -> AnalyticsTransport: + if config.analytics_transport is not None: + return config.analytics_transport + if not config.analytics_enabled: + return NoopAnalyticsTransport() + return HttpAnalyticsTransport( + url=f"{self.base_url.rstrip('/')}{ANALYTICS_EVENTS_PATH}", + api_key=config.api_key, + debug=config.debug, + ) + @classmethod def reset_instance(cls) -> None: cls._instance = None @@ -79,6 +110,7 @@ def base_url(self) -> str: async def aclose(self) -> None: await aclose_events_http_client() await aclose_jwks_http_client() + await aclose_analytics_http_client() async def __aenter__(self) -> "SupertabConnect": return self @@ -138,17 +170,60 @@ def _detect_bot(self, request: Request) -> bool: return detector(request) - async def handle_request(self, request: Request) -> HandlerResult: + def _emit_analytics( + self, + request: Request, + context: HandleRequestContext | None, + *, + has_token: bool, + token_outcome: TokenOutcome, + final_action: FinalAction, + ) -> None: + try: + event = build_analytics_event( + request, + Decision( + has_token=has_token, + token_outcome=token_outcome, + final_action=final_action, + enforcement_mode=self.enforcement, + ), + BuildAnalyticsEventContext( + source_cdn=context.source_cdn if context else None, + request_id=context.request_id if context else None, + client_ip=context.client_ip if context else None, + request_country=context.request_country if context else None, + request_asn=context.request_asn if context else None, + tls_fingerprint=context.tls_fingerprint if context else None, + ), + ) + self._analytics_transport.emit(event) + except Exception as error: # noqa: BLE001 — analytics must never break request handling + error_log(self.debug, f"failed to build/emit analytics event: {error}") + + async def handle_request(self, request: Request, context: HandleRequestContext | None = None) -> HandlerResult: auth = request.headers.get("authorization", "") token = None auth_parts = auth.split(None, 1) if len(auth_parts) == 2 and auth_parts[0].lower() == "license": token = auth_parts[1] + has_token = token is not None url = str(request.url) user_agent = request.headers.get("user-agent", "unknown") + # Token present → validate, regardless of bot detection — except in DISABLED + # mode, which short-circuits to ALLOW without verification. if token: if self.enforcement is EnforcementMode.DISABLED: + # DISABLED short-circuits to ALLOW without verifying the token, so we cannot + # honestly claim "valid"; emit "not_validated" so it is not counted as licensed. + self._emit_analytics( + request, + context, + has_token=has_token, + token_outcome="not_validated", + final_action="allow", + ) return {"action": HandlerAction.ALLOW} verification = await verify_and_record_event( @@ -161,22 +236,64 @@ async def handle_request(self, request: Request) -> HandlerResult: request_headers=dict(request.headers.items()), ) if isinstance(verification, InvalidLicenseToken): + self._emit_analytics( + request, + context, + has_token=has_token, + token_outcome=TOKEN_OUTCOME_BY_REASON.get(verification.reason, "malformed"), + final_action="block", + ) return build_block_result( reason=verification.reason, error=verification.error, request_url=url, ) + self._emit_analytics( + request, + context, + has_token=has_token, + token_outcome="valid", + final_action="allow", + ) return {"action": HandlerAction.ALLOW} if not self._detect_bot(request): + self._emit_analytics( + request, + context, + has_token=has_token, + token_outcome="absent", + final_action="allow", + ) return {"action": HandlerAction.ALLOW} if self.enforcement is EnforcementMode.ENFORCE: + self._emit_analytics( + request, + context, + has_token=has_token, + token_outcome="absent", + final_action="block", + ) return build_block_result( reason=LicenseTokenInvalidReason.MISSING_TOKEN, error="Authorization header missing or malformed", request_url=url, ) if self.enforcement is EnforcementMode.OBSERVE: + self._emit_analytics( + request, + context, + has_token=has_token, + token_outcome="absent", + final_action="observe", + ) return build_signal_result(url) + self._emit_analytics( + request, + context, + has_token=has_token, + token_outcome="absent", + final_action="allow", + ) return {"action": HandlerAction.ALLOW} diff --git a/supertab_connect/types.py b/supertab_connect/types.py index 7a32994..86683cf 100644 --- a/supertab_connect/types.py +++ b/supertab_connect/types.py @@ -3,10 +3,13 @@ from collections.abc import Callable from dataclasses import dataclass, field from enum import StrEnum -from typing import Any, Literal, NotRequired, TypeAlias, TypedDict +from typing import TYPE_CHECKING, Any, Literal, NotRequired, TypeAlias, TypedDict from httpx import Request +if TYPE_CHECKING: + from supertab_connect.analytics.types import AnalyticsTransport + class EnforcementMode(StrEnum): DISABLED = "disabled" @@ -50,6 +53,26 @@ class SupertabConnectConfig: supertab_base_url: str | None = None bot_detector: BotDetector | None = None debug: bool = False + # Enables analytics emission to the Supertab Connect relay. Default: False. + analytics_enabled: bool = False + # Internal dependency-injection seam: overrides the default HttpAnalyticsTransport when provided. + # Used by tests to inject in-memory transports. Not a merchant-facing option. + analytics_transport: "AnalyticsTransport | None" = None + + +@dataclass(frozen=True) +class HandleRequestContext: + """Optional CDN-supplied request context for `handle_request`. + + All fields are omitted (None) for direct SDK invocation that did not pass through a CDN. + """ + + source_cdn: Literal["cloudflare", "fastly", "cloudfront"] | None = None + client_ip: str | None = None + request_id: str | None = None + request_country: str | None = None + request_asn: int | None = None + tls_fingerprint: str | None = None class AllowHandlerResult(TypedDict): diff --git a/tests/analytics/__init__.py b/tests/analytics/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/analytics/conftest.py b/tests/analytics/conftest.py new file mode 100644 index 0000000..becf33a --- /dev/null +++ b/tests/analytics/conftest.py @@ -0,0 +1,13 @@ +"""Shared fixtures for analytics tests.""" + +import pytest + +from supertab_connect.analytics import transport as transport_module + + +@pytest.fixture(autouse=True) +async def _reset_analytics_http_client(): + """Reset the module-level analytics http client around each test.""" + await transport_module.aclose_http_client() + yield + await transport_module.aclose_http_client() diff --git a/tests/analytics/test_build_analytics_event.py b/tests/analytics/test_build_analytics_event.py new file mode 100644 index 0000000..b1c3e3b --- /dev/null +++ b/tests/analytics/test_build_analytics_event.py @@ -0,0 +1,163 @@ +"""Tests for building relay analytics events.""" + +from dataclasses import asdict, replace +from datetime import datetime, timezone + +import httpx +import pytest + +from supertab_connect.analytics.build_analytics_event import ( + BuildAnalyticsEventContext, + build_analytics_event, +) +from supertab_connect.analytics.types import SCHEMA_VERSION, Decision +from supertab_connect.types import EnforcementMode + +FIXED_TIME = datetime(2026, 4, 29, 12, 0, 0, tzinfo=timezone.utc) +REQUEST_ID = "req-123" + +BASE_DECISION = Decision( + has_token=False, + token_outcome="absent", + final_action="allow", + enforcement_mode=EnforcementMode.OBSERVE, +) + + +def _make_request( + *, + url: str = "https://example.com/articles/foo?x=1", + method: str = "GET", + headers: dict[str, str] | None = None, +) -> httpx.Request: + return httpx.Request(method, url, headers=headers or {}) + + +def _ctx(**extra) -> BuildAnalyticsEventContext: + base = BuildAnalyticsEventContext(request_id=REQUEST_ID, source_cdn="cloudflare", timestamp=FIXED_TIME) + return replace(base, **extra) + + +def test_returns_event_matching_relay_shape(): + request = _make_request( + headers={ + "user-agent": "Mozilla/5.0", + "referer": "https://example.com/", + "accept-language": "en-US,en;q=0.9", + } + ) + + event = build_analytics_event(request, BASE_DECISION, _ctx(client_ip="1.2.3.4")) + + assert asdict(event) == { + "timestamp": "2026-04-29T12:00:00.000Z", + "request_id": REQUEST_ID, + "schema_version": SCHEMA_VERSION, + "source_cdn": "cloudflare", + "user_agent": "Mozilla/5.0", + "client_ip": "::ffff:1.2.3.4", + "path": "/articles/foo", + "method": "GET", + "referer": "https://example.com/", + "accept_language": "en-US,en;q=0.9", + "request_country": None, + "request_asn": None, + "tls_fingerprint": None, + "has_token": False, + "token_outcome": "absent", + "final_action": "allow", + "enforcement_mode": "observe", + "signature_agent": None, + "signature_input": None, + "signature": None, + } + + +def test_passes_through_classification_signals(): + event = build_analytics_event( + _make_request(), + BASE_DECISION, + _ctx(request_country="DE", request_asn=3320, tls_fingerprint="abc123"), + ) + assert event.request_country == "DE" + assert event.request_asn == 3320 + assert event.tls_fingerprint == "abc123" + + +def test_classification_signals_default_to_none(): + event = build_analytics_event(_make_request(), BASE_DECISION, _ctx()) + assert event.request_country is None + assert event.request_asn is None + assert event.tls_fingerprint is None + + +def test_reads_signature_headers_from_request(): + request = _make_request( + headers={ + "signature-agent": "https://agent.example", + "signature-input": "sig1=(...)", + "signature": "sig1=:abc:", + } + ) + event = build_analytics_event(request, BASE_DECISION, _ctx()) + assert event.signature_agent == "https://agent.example" + assert event.signature_input == "sig1=(...)" + assert event.signature == "sig1=:abc:" + + +def test_signature_headers_default_to_none(): + event = build_analytics_event(_make_request(), BASE_DECISION, _ctx()) + assert event.signature_agent is None + assert event.signature_input is None + assert event.signature is None + + +@pytest.mark.parametrize("final_action", ["allow", "observe", "block"]) +def test_passes_through_final_action(final_action): + decision = replace(BASE_DECISION, final_action=final_action) + event = build_analytics_event(_make_request(), decision, _ctx()) + assert event.final_action == final_action + + +@pytest.mark.parametrize( + ("mode", "wire"), + [ + (EnforcementMode.OBSERVE, "observe"), + (EnforcementMode.ENFORCE, "enforce"), + (EnforcementMode.DISABLED, "disabled"), + ], +) +def test_serializes_enforcement_mode_to_wire(mode, wire): + decision = replace(BASE_DECISION, enforcement_mode=mode) + event = build_analytics_event(_make_request(), decision, _ctx()) + assert event.enforcement_mode == wire + + +def test_source_cdn_is_none_for_direct_sdk_invocation(): + event = build_analytics_event(_make_request(), BASE_DECISION, BuildAnalyticsEventContext()) + assert event.source_cdn is None + + +def test_generates_request_id_when_absent(): + event = build_analytics_event(_make_request(), BASE_DECISION, BuildAnalyticsEventContext(timestamp=FIXED_TIME)) + assert event.request_id # a uuid4 string + + +def test_path_preserves_percent_encoding(): + # request.url.path would decode %2F->"/" and %20->" "; the event must keep encoded semantics. + request = _make_request(url="https://example.com/a%2Fb/c%20d?x=1") + event = build_analytics_event(request, BASE_DECISION, _ctx()) + assert event.path == "/a%2Fb/c%20d" + + +def test_path_drops_query_string(): + request = _make_request(url="https://example.com/articles/foo?x=1&y=2") + event = build_analytics_event(request, BASE_DECISION, _ctx()) + assert event.path == "/articles/foo" + + +def test_missing_headers_default_to_empty_strings(): + event = build_analytics_event(_make_request(), BASE_DECISION, _ctx()) + assert event.user_agent == "" + assert event.referer == "" + assert event.accept_language == "" diff --git a/tests/analytics/test_ip.py b/tests/analytics/test_ip.py new file mode 100644 index 0000000..a0dbc83 --- /dev/null +++ b/tests/analytics/test_ip.py @@ -0,0 +1,28 @@ +"""Tests for client-IP normalization.""" + +import pytest + +from supertab_connect.analytics.ip import normalize_client_ip + + +def test_maps_ipv4_to_ipv6_mapped_form(): + assert normalize_client_ip("1.2.3.4") == "::ffff:1.2.3.4" + assert normalize_client_ip("192.0.2.1") == "::ffff:192.0.2.1" + + +def test_trims_surrounding_whitespace_before_mapping_ipv4(): + assert normalize_client_ip(" 1.2.3.4 ") == "::ffff:1.2.3.4" + + +def test_passes_ipv6_through_unchanged(): + assert normalize_client_ip("2001:db8::1") == "2001:db8::1" + assert normalize_client_ip("::1") == "::1" + + +@pytest.mark.parametrize("value", [None, "", " "]) +def test_returns_unspecified_for_empty(value): + assert normalize_client_ip(value) == "::" + + +def test_returns_unspecified_for_unrecognized_value(): + assert normalize_client_ip("not-an-ip") == "::" diff --git a/tests/analytics/test_transport.py b/tests/analytics/test_transport.py new file mode 100644 index 0000000..d7362c1 --- /dev/null +++ b/tests/analytics/test_transport.py @@ -0,0 +1,132 @@ +"""Tests for analytics transports.""" + +import asyncio +import json + +import httpx +import respx + +from supertab_connect._version import _get_sdk_user_agent +from supertab_connect.analytics.transport import ( + ANALYTICS_EVENTS_PATH, + HttpAnalyticsTransport, + NoopAnalyticsTransport, +) +from supertab_connect.analytics.transport import _background_tasks +from supertab_connect.analytics.types import AnalyticsEvent + +RELAY_URL = "https://relay.test/ingest/events" + +FIXTURE_EVENT = AnalyticsEvent( + timestamp="2026-04-29T12:00:00.000Z", + request_id="req-1", + schema_version=1, + source_cdn="cloudflare", + user_agent="ua", + client_ip="::ffff:1.2.3.4", + path="/p", + method="GET", + referer="", + accept_language="en", + request_country="US", + request_asn=13335, + tls_fingerprint="ja3hash", + has_token=False, + token_outcome="absent", + final_action="allow", + enforcement_mode="observe", + signature_agent=None, + signature_input=None, + signature=None, +) + + +async def _flush() -> None: + """Await all in-flight background emit tasks.""" + while _background_tasks: + await asyncio.gather(*list(_background_tasks), return_exceptions=True) + + +def test_analytics_events_path_targets_the_relay_events_route(): + assert ANALYTICS_EVENTS_PATH == "/ingest/events" + + +async def test_posts_json_body_with_bearer_api_key_to_relay_url(): + with respx.mock: + route = respx.post(RELAY_URL).respond(status_code=202) + transport = HttpAnalyticsTransport(url=RELAY_URL, api_key="merchant-api-key") + + transport.emit(FIXTURE_EVENT) + await _flush() + + assert route.called + request = route.calls[0].request + assert request.method == "POST" + assert request.headers["authorization"] == "Bearer merchant-api-key" + assert request.headers["content-type"] == "application/json" + assert request.headers["user-agent"] == _get_sdk_user_agent() + assert json.loads(request.content) == { + "timestamp": "2026-04-29T12:00:00.000Z", + "request_id": "req-1", + "schema_version": 1, + "source_cdn": "cloudflare", + "user_agent": "ua", + "client_ip": "::ffff:1.2.3.4", + "path": "/p", + "method": "GET", + "referer": "", + "accept_language": "en", + "request_country": "US", + "request_asn": 13335, + "tls_fingerprint": "ja3hash", + "has_token": False, + "token_outcome": "absent", + "final_action": "allow", + "enforcement_mode": "observe", + "signature_agent": None, + "signature_input": None, + "signature": None, + } + + +async def test_does_not_raise_when_request_fails(): + with respx.mock: + respx.post(RELAY_URL).mock(side_effect=httpx.ConnectError("network down")) + transport = HttpAnalyticsTransport(url=RELAY_URL, api_key="t") + + transport.emit(FIXTURE_EVENT) # must not raise + await _flush() + + +async def test_does_not_raise_on_non_2xx_responses(): + with respx.mock: + respx.post(RELAY_URL).respond(status_code=500, text="err") + transport = HttpAnalyticsTransport(url=RELAY_URL, api_key="t") + + transport.emit(FIXTURE_EVENT) # must not raise + await _flush() + + +async def test_does_not_raise_on_non_http_errors(): + # A non-HTTPError raised on the request path (e.g. unexpected runtime error) must still + # be swallowed so the fire-and-forget task never surfaces an unhandled exception. + with respx.mock: + respx.post(RELAY_URL).mock(side_effect=ValueError("unexpected boom")) + transport = HttpAnalyticsTransport(url=RELAY_URL, api_key="t") + + transport.emit(FIXTURE_EVENT) # must not raise + await _flush() + + # No task should retain an unretrieved exception. + assert not _background_tasks + + +def test_emit_without_running_loop_is_a_noop(): + # No running event loop here (sync test) → emit silently skips scheduling. + transport = HttpAnalyticsTransport(url=RELAY_URL, api_key="t") + transport.emit(FIXTURE_EVENT) + + +def test_noop_transport_emit_never_throws(): + transport = NoopAnalyticsTransport() + assert transport.emit(FIXTURE_EVENT) is None diff --git a/tests/merchant/test_client_analytics.py b/tests/merchant/test_client_analytics.py new file mode 100644 index 0000000..e37ab02 --- /dev/null +++ b/tests/merchant/test_client_analytics.py @@ -0,0 +1,190 @@ +"""Tests for analytics emission wired into the high-level merchant client.""" + +import httpx +import pytest + +from supertab_connect.analytics.types import AnalyticsEvent, AnalyticsTransport +from supertab_connect.merchant.client import SupertabConnect +from supertab_connect.types import ( + EnforcementMode, + HandleRequestContext, + HandlerAction, + InvalidLicenseToken, + LicenseTokenInvalidReason, + SupertabConnectConfig, + ValidLicenseToken, +) + +from tests.merchant.constants import REQUEST_URL, SUPERTAB_BASE_URL + + +class RecordingTransport: + def __init__(self) -> None: + self.events: list[AnalyticsEvent] = [] + + def emit(self, event: AnalyticsEvent) -> None: + self.events.append(event) + + +class ThrowingTransport: + def emit(self, event: AnalyticsEvent) -> None: + raise RuntimeError("transport blew up") + + +@pytest.fixture(autouse=True) +def _reset_singleton(): + SupertabConnect.reset_instance() + SupertabConnect.set_base_url(SUPERTAB_BASE_URL) + yield + SupertabConnect.reset_instance() + SupertabConnect.set_base_url(SUPERTAB_BASE_URL) + + +def _request(headers: dict[str, str] | None = None) -> httpx.Request: + return httpx.Request("GET", REQUEST_URL, headers=headers or {}) + + +def _client(transport: AnalyticsTransport, **config_kwargs) -> SupertabConnect: + return SupertabConnect( + SupertabConnectConfig(api_key="sk_test_123", analytics_transport=transport, **config_kwargs) + ) + + +def test_constructs_with_only_api_key(): + # Default transport is the Noop transport; construction must not require analytics config. + SupertabConnect(SupertabConnectConfig(api_key="sk_test_123")) + + +async def test_emits_observe_event_for_bot_without_token(): + transport = RecordingTransport() + client = _client(transport, enforcement=EnforcementMode.OBSERVE, bot_detector=lambda request: True) + + result = await client.handle_request( + _request({"User-Agent": "curl/8.0"}), HandleRequestContext(source_cdn="cloudflare") + ) + + assert result["action"] is HandlerAction.ALLOW + assert len(transport.events) == 1 + event = transport.events[0] + assert event.source_cdn == "cloudflare" + assert event.final_action == "observe" + assert event.enforcement_mode == "observe" + assert event.has_token is False + assert event.token_outcome == "absent" + + +async def test_emits_block_event_for_bot_without_token_in_enforce(): + transport = RecordingTransport() + client = _client(transport, enforcement=EnforcementMode.ENFORCE, bot_detector=lambda request: True) + + result = await client.handle_request(_request({"User-Agent": "curl/8.0"})) + + assert result["action"] is HandlerAction.BLOCK + assert transport.events[0].final_action == "block" + assert transport.events[0].token_outcome == "absent" + + +async def test_emits_allow_event_for_non_bot_without_token(): + transport = RecordingTransport() + client = _client(transport, enforcement=EnforcementMode.ENFORCE, bot_detector=lambda request: False) + + result = await client.handle_request(_request({"User-Agent": "Browser/1.0"})) + + assert result == {"action": HandlerAction.ALLOW} + assert transport.events[0].final_action == "allow" + assert transport.events[0].token_outcome == "absent" + + +async def test_emits_not_validated_for_token_in_disabled_mode(): + transport = RecordingTransport() + client = _client(transport, enforcement=EnforcementMode.DISABLED) + + result = await client.handle_request(_request({"Authorization": "License some-token"})) + + assert result == {"action": HandlerAction.ALLOW} + event = transport.events[0] + assert event.has_token is True + assert event.token_outcome == "not_validated" + assert event.final_action == "allow" + assert event.enforcement_mode == "disabled" + + +async def test_emits_valid_for_verified_token(monkeypatch): + async def stub_verify_and_record_event(**kwargs): + return ValidLicenseToken(license_id="lic_test_123", payload={}) + + monkeypatch.setattr("supertab_connect.merchant.client.verify_and_record_event", stub_verify_and_record_event) + transport = RecordingTransport() + client = _client(transport, enforcement=EnforcementMode.ENFORCE) + + result = await client.handle_request(_request({"Authorization": "License signed.jwt"})) + + assert result == {"action": HandlerAction.ALLOW} + assert transport.events[0].has_token is True + assert transport.events[0].token_outcome == "valid" + assert transport.events[0].final_action == "allow" + + +async def test_emits_mapped_outcome_for_invalid_token(monkeypatch): + async def stub_verify_and_record_event(**kwargs): + return InvalidLicenseToken( + reason=LicenseTokenInvalidReason.EXPIRED, + error="License token expired", + license_id="lic_test_123", + ) + + monkeypatch.setattr("supertab_connect.merchant.client.verify_and_record_event", stub_verify_and_record_event) + transport = RecordingTransport() + client = _client(transport, enforcement=EnforcementMode.ENFORCE) + + result = await client.handle_request(_request({"Authorization": "License signed.jwt"})) + + assert result["action"] is HandlerAction.BLOCK + assert transport.events[0].token_outcome == "expired" + assert transport.events[0].final_action == "block" + + +async def test_forwards_classification_signals_from_context(): + transport = RecordingTransport() + client = _client(transport, bot_detector=lambda request: True) + + await client.handle_request( + _request({"User-Agent": "curl/8.0"}), + HandleRequestContext( + source_cdn="fastly", + client_ip="1.2.3.4", + request_id="req-xyz", + request_country="DE", + request_asn=3320, + tls_fingerprint="abc123", + ), + ) + + event = transport.events[0] + assert event.source_cdn == "fastly" + assert event.client_ip == "::ffff:1.2.3.4" + assert event.request_id == "req-xyz" + assert event.request_country == "DE" + assert event.request_asn == 3320 + assert event.tls_fingerprint == "abc123" + + +async def test_analytics_failure_does_not_break_request_handling(): + client = _client(ThrowingTransport(), bot_detector=lambda request: True) + + # A throwing transport must not propagate out of handle_request. + result = await client.handle_request(_request({"User-Agent": "curl/8.0"})) + + assert result["action"] is HandlerAction.ALLOW + + +async def test_no_event_emitted_without_context_still_works(): + transport = RecordingTransport() + client = _client(transport, bot_detector=lambda request: True) + + await client.handle_request(_request({"User-Agent": "curl/8.0"})) + + # Direct SDK invocation (no context) → source_cdn is None, request_id auto-generated. + event = transport.events[0] + assert event.source_cdn is None + assert event.request_id From 44790547580fa6b0ae0e71c966d1175fece0f88f Mon Sep 17 00:00:00 2001 From: Nikita Kovalev Date: Wed, 24 Jun 2026 13:13:42 +0200 Subject: [PATCH 4/8] feat(analytics): capture-v2 spoof-detection signals (schema_version 2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aligns with TS SDK PR #34 (capture-v2). Emits the richer spoof-detection signals on the analytics event as schema_version 2; the warehouse keeps doing classification at query time — the SDK emits raw signals only. - Portable header signals (read from the httpx Request): sec_fetch_*, sec_ch_ua*, accept, host, has_cookies, and header_names (lowercased, deduped, sorted; edge-injected cf-*/fastly-*/cloudfront-*/x-forwarded-*/ x-real-ip/x-original-request-url stripped — plus the synthesized Host, which httpx adds on construction but the JS fetch Request hides, so the cross-SDK header-name set stays consistent). - Query-string derived signals: query_length, query_param_count, query_suspicious. The raw query is never stored. - CDN plumbing not derivable from the portable Request is supplied by the caller via a new CdnRequestSignals object threaded through HandleRequestContext.cdn_signals (mirrors TS's cdnSignals handler-context field; Python takes the signals from the caller rather than porting the edge handlers). - 512-char truncation on accept / sec_ch_ua / as_organization. Co-Authored-By: Claude Opus 4.8 (1M context) --- supertab_connect/__init__.py | 7 +- supertab_connect/analytics/__init__.py | 2 + .../analytics/build_analytics_event.py | 89 ++++++++ supertab_connect/analytics/types.py | 60 +++++- supertab_connect/merchant/client.py | 1 + supertab_connect/types.py | 4 +- tests/analytics/test_build_analytics_event.py | 200 +++++++++++++++++- tests/analytics/test_transport.py | 54 ++++- tests/merchant/test_client_analytics.py | 22 +- 9 files changed, 432 insertions(+), 7 deletions(-) diff --git a/supertab_connect/__init__.py b/supertab_connect/__init__.py index 2789e67..5b7c670 100644 --- a/supertab_connect/__init__.py +++ b/supertab_connect/__init__.py @@ -1,6 +1,10 @@ """Supertab Connect SDK.""" -from supertab_connect.analytics.types import AnalyticsEvent, AnalyticsTransport +from supertab_connect.analytics.types import ( + AnalyticsEvent, + AnalyticsTransport, + CdnRequestSignals, +) from supertab_connect.customer.token import obtain_license_token from supertab_connect.exceptions import SupertabConnectError from supertab_connect.merchant.bots import default_bot_detector @@ -19,6 +23,7 @@ __all__ = [ "AnalyticsEvent", "AnalyticsTransport", + "CdnRequestSignals", "EnforcementMode", "HandleRequestContext", "HandlerAction", diff --git a/supertab_connect/analytics/__init__.py b/supertab_connect/analytics/__init__.py index 3d20233..45933eb 100644 --- a/supertab_connect/analytics/__init__.py +++ b/supertab_connect/analytics/__init__.py @@ -16,6 +16,7 @@ TOKEN_OUTCOME_BY_REASON, AnalyticsEvent, AnalyticsTransport, + CdnRequestSignals, Decision, FinalAction, SourceCdn, @@ -29,6 +30,7 @@ "AnalyticsEvent", "AnalyticsTransport", "BuildAnalyticsEventContext", + "CdnRequestSignals", "Decision", "FinalAction", "HttpAnalyticsTransport", diff --git a/supertab_connect/analytics/build_analytics_event.py b/supertab_connect/analytics/build_analytics_event.py index b922e2c..25275b1 100644 --- a/supertab_connect/analytics/build_analytics_event.py +++ b/supertab_connect/analytics/build_analytics_event.py @@ -3,6 +3,7 @@ import uuid from dataclasses import dataclass from datetime import datetime, timezone +from urllib.parse import unquote from httpx import Request @@ -10,12 +11,38 @@ from supertab_connect.analytics.types import ( SCHEMA_VERSION, AnalyticsEvent, + CdnRequestSignals, Decision, EnforcementWire, SourceCdn, ) from supertab_connect.types import EnforcementMode +# Defensive cap on client-controlled free-form strings, applied at the edge (mirrored by the relay). +MAX_FIELD_LENGTH = 512 + +# Edge-injected headers are CDN artifacts, not client signals — strip them so ``header_names`` +# reflects only what the client actually sent. Covers all three CDNs: Cloudflare (``cf-*``), +# Fastly (``fastly-*``), CloudFront (``cloudfront-*``), the shared ``x-forwarded-*`` / ``x-real-ip``, +# and the SDK's own routing header ``x-original-request-url``. +_EDGE_HEADER_PREFIXES = ("cf-", "fastly-", "cloudfront-", "x-forwarded-") +# ``host`` is included here because httpx synthesizes a Host header on Request construction; the JS +# fetch ``Request`` hides it as a forbidden header, so the TS SDK never emits it in ``header_names``. +# Stripping it keeps the cross-SDK header-name set consistent (host is captured in its own field). +_EDGE_HEADER_NAMES = frozenset({"x-real-ip", "x-original-request-url", "host"}) + +# Mechanical exploit markers for the query-string heuristic, matched case-insensitively against the +# raw and URL-decoded query. A coarse signal only — real classification stays query-time in the +# warehouse. +_SUSPICIOUS_QUERY_MARKERS = ( + "../", + "..\\", + "union select", + " str: @@ -50,6 +79,35 @@ def _enforcement_to_wire(mode: EnforcementMode) -> EnforcementWire: return mode.value # type: ignore[return-value] +def _truncate(value: str | None, max_length: int = MAX_FIELD_LENGTH) -> str | None: + if value is None: + return None + return value[:max_length] if len(value) > max_length else value + + +def _is_edge_header(name: str) -> bool: + if name in _EDGE_HEADER_NAMES: + return True + return any(name.startswith(prefix) for prefix in _EDGE_HEADER_PREFIXES) + + +def _collect_header_names(request: Request) -> list[str]: + names = {name.lower() for name in request.headers.keys()} + return sorted(name for name in names if not _is_edge_header(name)) + + +def _query_signals(request: Request) -> tuple[int, int, bool]: + # request.url.query is the raw, percent-encoded query bytes (no leading "?"), matching the + # TS SDK's ``url.search.slice(1)``. The raw query itself is never stored on the event. + raw = request.url.query.decode("utf-8", "replace") + params = [p for p in raw.split("&") if p] if raw else [] + + haystack = raw.lower() + "\n" + unquote(raw).lower() + suspicious = any(marker in haystack for marker in _SUSPICIOUS_QUERY_MARKERS) + + return len(raw), len(params), suspicious + + def build_analytics_event( request: Request, decision: Decision, @@ -58,6 +116,8 @@ def build_analytics_event( headers = request.headers timestamp = context.timestamp if context.timestamp is not None else datetime.now(timezone.utc) request_id = context.request_id if context.request_id is not None else str(uuid.uuid4()) + query_length, query_param_count, query_suspicious = _query_signals(request) + cdn = context.cdn_signals if context.cdn_signals is not None else CdnRequestSignals() return AnalyticsEvent( timestamp=_iso_utc(timestamp), @@ -80,4 +140,33 @@ def build_analytics_event( signature_agent=headers.get("signature-agent"), signature_input=headers.get("signature-input"), signature=headers.get("signature"), + # --- Capture v2: portable header signals --- + sec_fetch_mode=headers.get("sec-fetch-mode"), + sec_fetch_site=headers.get("sec-fetch-site"), + sec_fetch_dest=headers.get("sec-fetch-dest"), + sec_fetch_user=headers.get("sec-fetch-user"), + sec_ch_ua=_truncate(headers.get("sec-ch-ua")), + sec_ch_ua_mobile=headers.get("sec-ch-ua-mobile"), + sec_ch_ua_platform=headers.get("sec-ch-ua-platform"), + accept=_truncate(headers.get("accept")), + # httpx synthesizes the Host header from the URL, so this is effectively the parsed host. + host=headers.get("host") or request.url.host or None, + has_cookies="cookie" in headers, + header_names=_collect_header_names(request), + # Query-string derived signals (raw query never stored). + query_length=query_length, + query_param_count=query_param_count, + query_suspicious=query_suspicious, + # --- Capture v2: CDN plumbing (passthrough from the handler context) --- + accept_encoding=cdn.accept_encoding, + http_protocol=cdn.http_protocol, + tls_version=cdn.tls_version, + tls_cipher=cdn.tls_cipher, + tls_client_hello_length=cdn.tls_client_hello_length, + tls_client_extensions_sha1=cdn.tls_client_extensions_sha1, + as_organization=_truncate(cdn.as_organization), + client_tcp_rtt=cdn.client_tcp_rtt, + cdn_verified_bot_category=cdn.cdn_verified_bot_category, + request_priority=cdn.request_priority, + tls_fingerprint_ja4=cdn.tls_fingerprint_ja4, ) diff --git a/supertab_connect/analytics/types.py b/supertab_connect/analytics/types.py index 0ab431e..07bce4b 100644 --- a/supertab_connect/analytics/types.py +++ b/supertab_connect/analytics/types.py @@ -5,7 +5,7 @@ from supertab_connect.types import EnforcementMode, LicenseTokenInvalidReason -SCHEMA_VERSION = 1 +SCHEMA_VERSION = 2 SourceCdn = Literal["cloudflare", "fastly", "cloudfront"] @@ -65,6 +65,64 @@ class AnalyticsEvent: signature_input: str | None signature: str | None + # --- Capture v2 (schema_version 2): spoof-detection signals --- + # Portable header signals — read directly from request headers (every CDN). + sec_fetch_mode: str | None + sec_fetch_site: str | None + sec_fetch_dest: str | None + sec_fetch_user: str | None + sec_ch_ua: str | None + sec_ch_ua_mobile: str | None + sec_ch_ua_platform: str | None + accept: str | None + host: str | None + has_cookies: bool | None + # Lowercased, deduped, sorted request-header names with edge-injected headers + # (cf-*, x-forwarded-*, x-real-ip, …) and the synthesized Host stripped. Non-nullable: [] when none. + header_names: list[str] + + # Query-string derived signals. The raw query is NEVER stored (PII gate → option b); + # only these mechanical derivations are emitted. + query_length: int | None + query_param_count: int | None + query_suspicious: bool | None + + # CDN plumbing — not derivable from the portable Request. Supplied per platform by the + # caller via HandleRequestContext; null when not exposed. + accept_encoding: str | None + http_protocol: str | None + tls_version: str | None + tls_cipher: str | None + tls_client_hello_length: int | None + tls_client_extensions_sha1: str | None + as_organization: str | None + client_tcp_rtt: int | None + cdn_verified_bot_category: str | None + request_priority: str | None + tls_fingerprint_ja4: str | None + + +@dataclass(frozen=True) +class CdnRequestSignals: + """CDN-supplied request signals that cannot be read from the portable httpx ``Request``. + + Extracted per platform by the caller (Cloudflare ``request.cf``, Fastly headers, …) and + threaded through ``HandleRequestContext``. Field names match the wire (snake_case) contract, + so they pass straight through onto the event. + """ + + accept_encoding: str | None = None + http_protocol: str | None = None + tls_version: str | None = None + tls_cipher: str | None = None + tls_client_hello_length: int | None = None + tls_client_extensions_sha1: str | None = None + as_organization: str | None = None + client_tcp_rtt: int | None = None + cdn_verified_bot_category: str | None = None + request_priority: str | None = None + tls_fingerprint_ja4: str | None = None + @runtime_checkable class AnalyticsTransport(Protocol): diff --git a/supertab_connect/merchant/client.py b/supertab_connect/merchant/client.py index 4df6526..870f6d1 100644 --- a/supertab_connect/merchant/client.py +++ b/supertab_connect/merchant/client.py @@ -195,6 +195,7 @@ def _emit_analytics( request_country=context.request_country if context else None, request_asn=context.request_asn if context else None, tls_fingerprint=context.tls_fingerprint if context else None, + cdn_signals=context.cdn_signals if context else None, ), ) self._analytics_transport.emit(event) diff --git a/supertab_connect/types.py b/supertab_connect/types.py index 86683cf..f9e0f9d 100644 --- a/supertab_connect/types.py +++ b/supertab_connect/types.py @@ -8,7 +8,7 @@ from httpx import Request if TYPE_CHECKING: - from supertab_connect.analytics.types import AnalyticsTransport + from supertab_connect.analytics.types import AnalyticsTransport, CdnRequestSignals class EnforcementMode(StrEnum): @@ -73,6 +73,8 @@ class HandleRequestContext: request_country: str | None = None request_asn: int | None = None tls_fingerprint: str | None = None + # Capture-v2 CDN plumbing not derivable from the portable Request (e.g. Cloudflare request.cf). + cdn_signals: "CdnRequestSignals | None" = None class AllowHandlerResult(TypedDict): diff --git a/tests/analytics/test_build_analytics_event.py b/tests/analytics/test_build_analytics_event.py index b1c3e3b..3af1124 100644 --- a/tests/analytics/test_build_analytics_event.py +++ b/tests/analytics/test_build_analytics_event.py @@ -10,7 +10,7 @@ BuildAnalyticsEventContext, build_analytics_event, ) -from supertab_connect.analytics.types import SCHEMA_VERSION, Decision +from supertab_connect.analytics.types import SCHEMA_VERSION, CdnRequestSignals, Decision from supertab_connect.types import EnforcementMode FIXED_TIME = datetime(2026, 4, 29, 12, 0, 0, tzinfo=timezone.utc) @@ -70,6 +70,33 @@ def test_returns_event_matching_relay_shape(): "signature_agent": None, "signature_input": None, "signature": None, + # Capture v2 — portable header signals (none of these headers were sent). + "sec_fetch_mode": None, + "sec_fetch_site": None, + "sec_fetch_dest": None, + "sec_fetch_user": None, + "sec_ch_ua": None, + "sec_ch_ua_mobile": None, + "sec_ch_ua_platform": None, + "accept": None, + "host": "example.com", + "has_cookies": False, + "header_names": ["accept-language", "referer", "user-agent"], + "query_length": 3, + "query_param_count": 1, + "query_suspicious": False, + # Capture v2 — CDN plumbing (no cdn_signals in context → None). + "accept_encoding": None, + "http_protocol": None, + "tls_version": None, + "tls_cipher": None, + "tls_client_hello_length": None, + "tls_client_extensions_sha1": None, + "as_organization": None, + "client_tcp_rtt": None, + "cdn_verified_bot_category": None, + "request_priority": None, + "tls_fingerprint_ja4": None, } @@ -161,3 +188,174 @@ def test_missing_headers_default_to_empty_strings(): assert event.user_agent == "" assert event.referer == "" assert event.accept_language == "" + + +# --- Capture v2 ------------------------------------------------------------------------------- + +BROWSER_HEADERS = { + "user-agent": "Mozilla/5.0", + "sec-fetch-mode": "navigate", + "sec-fetch-site": "none", + "sec-fetch-dest": "document", + "sec-fetch-user": "?1", + "sec-ch-ua": '"Chromium";v="120", "Not(A:Brand";v="24"', + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": '"macOS"', + "accept": "text/html", + "cookie": "session=abc", +} + + +def test_captures_sec_fetch_and_client_hints_from_browser_request(): + event = build_analytics_event(_make_request(headers=BROWSER_HEADERS), BASE_DECISION, _ctx()) + assert event.sec_fetch_mode == "navigate" + assert event.sec_fetch_site == "none" + assert event.sec_fetch_dest == "document" + assert event.sec_fetch_user == "?1" + assert event.sec_ch_ua == '"Chromium";v="120", "Not(A:Brand";v="24"' + assert event.sec_ch_ua_mobile == "?0" + assert event.sec_ch_ua_platform == '"macOS"' + assert event.accept == "text/html" + assert event.has_cookies is True + + +def test_curl_like_request_carries_no_browser_signals(): + event = build_analytics_event(_make_request(headers={"user-agent": "curl/8.0"}), BASE_DECISION, _ctx()) + assert event.sec_fetch_mode is None + assert event.sec_fetch_site is None + assert event.sec_fetch_dest is None + assert event.sec_fetch_user is None + assert event.sec_ch_ua is None + assert event.sec_ch_ua_mobile is None + assert event.sec_ch_ua_platform is None + assert event.has_cookies is False + + +def test_host_falls_back_to_url_host(): + event = build_analytics_event(_make_request(url="https://pub.example.com/a"), BASE_DECISION, _ctx()) + assert event.host == "pub.example.com" + + +def test_truncates_accept_and_sec_ch_ua_to_512_chars(): + long = "a" * 600 + event = build_analytics_event(_make_request(headers={"accept": long, "sec-ch-ua": long}), BASE_DECISION, _ctx()) + assert event.accept == "a" * 512 + assert event.sec_ch_ua == "a" * 512 + + +def test_header_names_lowercased_deduped_sorted(): + event = build_analytics_event( + _make_request(headers={"User-Agent": "x", "Accept": "y", "Referer": "z"}), + BASE_DECISION, + _ctx(), + ) + assert event.header_names == ["accept", "referer", "user-agent"] + + +def test_header_names_strips_edge_injected_headers_across_all_cdns(): + event = build_analytics_event( + _make_request( + headers={ + "user-agent": "x", + # Cloudflare + "cf-connecting-ip": "1.2.3.4", + "cf-ray": "abc", + # Fastly + "fastly-client-ip": "1.2.3.4", + "fastly-client-ja3": "deadbeef", + # CloudFront + "cloudfront-viewer-country": "DE", + "cloudfront-viewer-ja3-fingerprint": "abc", + # shared / SDK routing / synthesized + "x-forwarded-for": "1.2.3.4", + "x-real-ip": "1.2.3.4", + "x-original-request-url": "https://pub.example.com/a", + } + ), + BASE_DECISION, + _ctx(), + ) + # host is stripped too (httpx synthesizes it; the TS SDK never emits it). + assert event.header_names == ["user-agent"] + + +def test_query_signals_derived_without_storing_raw_query(): + event = build_analytics_event(_make_request(url="https://x.test/p?a=1&b=2&c=3"), BASE_DECISION, _ctx()) + assert event.query_length == len("a=1&b=2&c=3") + assert event.query_param_count == 3 + assert event.query_suspicious is False + # The raw query string must never appear on the event. + assert "a=1&b=2&c=3" not in str(asdict(event)) + + +def test_query_signals_are_zero_for_query_less_url(): + event = build_analytics_event(_make_request(url="https://x.test/p"), BASE_DECISION, _ctx()) + assert event.query_length == 0 + assert event.query_param_count == 0 + assert event.query_suspicious is False + + +@pytest.mark.parametrize( + "url", + [ + "https://x.test/?f=../../etc/passwd", + "https://x.test/?q=UNION%20SELECT%201", + "https://x.test/?x=%3Cscript%3E", + ], +) +def test_query_suspicious_flags_exploit_markers_raw_and_encoded(url): + event = build_analytics_event(_make_request(url=url), BASE_DECISION, _ctx()) + assert event.query_suspicious is True + + +def test_cdn_signals_passthrough_with_truncation(): + event = build_analytics_event( + _make_request(), + BASE_DECISION, + _ctx( + cdn_signals=CdnRequestSignals( + accept_encoding="gzip, br", + http_protocol="HTTP/2", + tls_version="TLSv1.3", + tls_cipher="AEAD-AES128-GCM-SHA256", + tls_client_hello_length=1811, + tls_client_extensions_sha1="4cFD...", + as_organization="o" * 600, + client_tcp_rtt=50, + cdn_verified_bot_category="Search Engine Crawler", + request_priority="weight=256;exclusive=1", + tls_fingerprint_ja4=None, + ) + ), + ) + assert event.accept_encoding == "gzip, br" + assert event.http_protocol == "HTTP/2" + assert event.tls_version == "TLSv1.3" + assert event.tls_cipher == "AEAD-AES128-GCM-SHA256" + assert event.tls_client_hello_length == 1811 + assert event.tls_client_extensions_sha1 == "4cFD..." + assert event.as_organization == "o" * 512 + assert event.client_tcp_rtt == 50 + assert event.cdn_verified_bot_category == "Search Engine Crawler" + assert event.request_priority == "weight=256;exclusive=1" + assert event.tls_fingerprint_ja4 is None + + +def test_cdn_signals_default_to_none_when_absent(): + event = build_analytics_event(_make_request(), BASE_DECISION, _ctx()) + assert event.accept_encoding is None + assert event.http_protocol is None + assert event.tls_version is None + assert event.tls_cipher is None + assert event.tls_client_hello_length is None + assert event.tls_client_extensions_sha1 is None + assert event.as_organization is None + assert event.client_tcp_rtt is None + assert event.cdn_verified_bot_category is None + assert event.request_priority is None + assert event.tls_fingerprint_ja4 is None + + +def test_schema_version_is_2(): + event = build_analytics_event(_make_request(), BASE_DECISION, _ctx()) + assert event.schema_version == 2 diff --git a/tests/analytics/test_transport.py b/tests/analytics/test_transport.py index d7362c1..0c6ed9b 100644 --- a/tests/analytics/test_transport.py +++ b/tests/analytics/test_transport.py @@ -20,7 +20,7 @@ FIXTURE_EVENT = AnalyticsEvent( timestamp="2026-04-29T12:00:00.000Z", request_id="req-1", - schema_version=1, + schema_version=2, source_cdn="cloudflare", user_agent="ua", client_ip="::ffff:1.2.3.4", @@ -38,6 +38,31 @@ signature_agent=None, signature_input=None, signature=None, + sec_fetch_mode=None, + sec_fetch_site=None, + sec_fetch_dest=None, + sec_fetch_user=None, + sec_ch_ua=None, + sec_ch_ua_mobile=None, + sec_ch_ua_platform=None, + accept=None, + host="example.com", + has_cookies=False, + header_names=["user-agent"], + query_length=0, + query_param_count=0, + query_suspicious=False, + accept_encoding=None, + http_protocol=None, + tls_version=None, + tls_cipher=None, + tls_client_hello_length=None, + tls_client_extensions_sha1=None, + as_organization=None, + client_tcp_rtt=None, + cdn_verified_bot_category=None, + request_priority=None, + tls_fingerprint_ja4=None, ) @@ -68,7 +93,7 @@ async def test_posts_json_body_with_bearer_api_key_to_relay_url(): assert json.loads(request.content) == { "timestamp": "2026-04-29T12:00:00.000Z", "request_id": "req-1", - "schema_version": 1, + "schema_version": 2, "source_cdn": "cloudflare", "user_agent": "ua", "client_ip": "::ffff:1.2.3.4", @@ -86,6 +111,31 @@ async def test_posts_json_body_with_bearer_api_key_to_relay_url(): "signature_agent": None, "signature_input": None, "signature": None, + "sec_fetch_mode": None, + "sec_fetch_site": None, + "sec_fetch_dest": None, + "sec_fetch_user": None, + "sec_ch_ua": None, + "sec_ch_ua_mobile": None, + "sec_ch_ua_platform": None, + "accept": None, + "host": "example.com", + "has_cookies": False, + "header_names": ["user-agent"], + "query_length": 0, + "query_param_count": 0, + "query_suspicious": False, + "accept_encoding": None, + "http_protocol": None, + "tls_version": None, + "tls_cipher": None, + "tls_client_hello_length": None, + "tls_client_extensions_sha1": None, + "as_organization": None, + "client_tcp_rtt": None, + "cdn_verified_bot_category": None, + "request_priority": None, + "tls_fingerprint_ja4": None, } diff --git a/tests/merchant/test_client_analytics.py b/tests/merchant/test_client_analytics.py index e37ab02..d8d3fc3 100644 --- a/tests/merchant/test_client_analytics.py +++ b/tests/merchant/test_client_analytics.py @@ -3,7 +3,7 @@ import httpx import pytest -from supertab_connect.analytics.types import AnalyticsEvent, AnalyticsTransport +from supertab_connect.analytics.types import AnalyticsEvent, AnalyticsTransport, CdnRequestSignals from supertab_connect.merchant.client import SupertabConnect from supertab_connect.types import ( EnforcementMode, @@ -169,6 +169,26 @@ async def test_forwards_classification_signals_from_context(): assert event.tls_fingerprint == "abc123" +async def test_forwards_cdn_signals_from_context(): + transport = RecordingTransport() + client = _client(transport, bot_detector=lambda request: True) + + await client.handle_request( + _request({"User-Agent": "curl/8.0"}), + HandleRequestContext( + source_cdn="cloudflare", + cdn_signals=CdnRequestSignals( + tls_version="TLSv1.3", + cdn_verified_bot_category="AI Assistant", + ), + ), + ) + + event = transport.events[0] + assert event.tls_version == "TLSv1.3" + assert event.cdn_verified_bot_category == "AI Assistant" + + async def test_analytics_failure_does_not_break_request_handling(): client = _client(ThrowingTransport(), bot_detector=lambda request: True) From 7ec60dc948a69e60597afefc5a8af141090d7d98 Mon Sep 17 00:00:00 2001 From: Nikita Kovalev Date: Wed, 24 Jun 2026 13:14:16 +0200 Subject: [PATCH 5/8] docs(analytics): note Fastly native logging is N/A for the Python SDK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aligns with TS SDK PR #33 (FastlyLogTransport / logEndpoint). The native Fastly Compute logging transport is intentionally not ported: Python does not run on Fastly Compute (no fastly:logger equivalent), and the Python SDK does not embed CDN edge handlers — it receives CDN signals via HandleRequestContext. Documents the gap and points to the AnalyticsTransport protocol for custom, non-relay delivery. Co-Authored-By: Claude Opus 4.8 (1M context) --- README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/README.md b/README.md index e049e41..22933e0 100644 --- a/README.md +++ b/README.md @@ -140,6 +140,20 @@ transport (for example, an in-memory recorder in tests) via the internal `analytics_transport` config field; `AnalyticsEvent` and `HandleRequestContext` are exported from the package root. +### Native Fastly logging (not applicable to the Python SDK) + +The TypeScript SDK can deliver analytics through a **native Fastly Compute +logging endpoint** (`FastlyLogTransport` / the `logEndpoint` option on +`fastlyHandleRequests`) instead of the HTTP relay, letting Fastly ship events +off-path to S3. That path is intentionally **not ported here**: Python does not +run on Fastly Compute (the `fastly:logger` built-in has no Python equivalent), +and — consistent with this SDK's design — the Python SDK does not embed CDN edge +handlers, receiving CDN-derived signals through `HandleRequestContext` instead. + +If you need to deliver analytics somewhere other than the relay (for example, to +a log shipper that forwards to S3/Tinybird), implement the `AnalyticsTransport` +protocol and pass it via the `analytics_transport` config field. + ## Error Handling Customer-side token retrieval raises `SupertabConnectError` when `license.xml` From 8428b79cea76d91052fe278c8bb3ccac9611bee1 Mon Sep 17 00:00:00 2001 From: Nikita Kovalev Date: Wed, 24 Jun 2026 13:14:46 +0200 Subject: [PATCH 6/8] docs(analytics): document capture-v2 signals and cdn_signals context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document the schema_version 2 analytics fields added in the previous commit: the portable header signals (sec_fetch_*, client hints, header_names, …), the query-string derived signals, and the CdnRequestSignals plumbing passed through HandleRequestContext.cdn_signals — mirroring TS SDK PR #34's README. Co-Authored-By: Claude Opus 4.8 (1M context) --- README.md | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 22933e0..e01da8f 100644 --- a/README.md +++ b/README.md @@ -94,8 +94,15 @@ and enforcement mode when no token is present. It returns either `handle_request()` accepts an optional second argument, a `HandleRequestContext`, which carries per-request signals supplied by an upstream CDN/proxy (`source_cdn`, `client_ip`, `request_id`, `request_country`, `request_asn`, -`tls_fingerprint`). These are recorded on the analytics event when present; for -direct SDK use the context can be omitted. +`tls_fingerprint`, and `cdn_signals`). These are recorded on the analytics event +when present; for direct SDK use the context can be omitted. + +`cdn_signals` is a `CdnRequestSignals` object carrying the richer +spoof-detection signals that cannot be read from the portable request — TLS +fingerprinting fields, the verified-bot category, the negotiated protocol, and +so on. These are platform-specific (for example, Cloudflare exposes them on +`request.cf`), so the SDK takes them from the caller rather than extracting them +itself. Everything left unset stays `null` on the event. See the `examples` directory for complete merchant and customer examples. @@ -127,6 +134,28 @@ headers — plus, when an upstream CDN exposes them via `HandleRequestContext`, request country, ASN, TLS fingerprint, and HTTP Message Signature headers — along with the verification/enforcement decision for the request. +Events emit at **`schema_version: 2`** ("capture v2"), which adds raw +spoof-detection signals for query-time classification in the warehouse (the SDK +never classifies — it emits raw signals only): + +- **Portable header signals**, read directly from the request: `sec_fetch_*`, + the `sec_ch_ua*` client hints, `accept`, `host`, `has_cookies`, and + `header_names` — the lowercased, deduped, sorted set of request-header names + with edge-injected headers (`cf-*`, `fastly-*`, `cloudfront-*`, + `x-forwarded-*`, `x-real-ip`, the synthesized `Host`, …) stripped so it + reflects only what the client sent. +- **Query-string derived signals**: `query_length`, `query_param_count`, and + `query_suspicious` (a coarse exploit-marker heuristic). The raw query string + is **never** stored. +- **CDN plumbing** supplied via `HandleRequestContext.cdn_signals`: + `accept_encoding`, `http_protocol`, `tls_version`, `tls_cipher`, + `tls_client_hello_length`, `tls_client_extensions_sha1`, `as_organization`, + `client_tcp_rtt`, `cdn_verified_bot_category`, `request_priority`, and + `tls_fingerprint_ja4`. + +`accept`, `sec_ch_ua`, and `as_organization` are truncated to 512 characters. +Every capture-v2 field is fail-open: anything unavailable is emitted as `null`. + **Fail-open:** analytics emission is fire-and-forget and can never block, slow, or alter request handling. If emission fails, the error is swallowed and the request proceeds exactly as it would with analytics disabled. Analytics is sent From 798ffecbfc08550dcdd1ca3dac30ed909122643b Mon Sep 17 00:00:00 2001 From: Nikita Kovalev Date: Wed, 24 Jun 2026 15:38:22 +0200 Subject: [PATCH 7/8] Set the User-Agent header in all paths Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- supertab_connect/customer/token.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/supertab_connect/customer/token.py b/supertab_connect/customer/token.py index a4a65ff..69841f2 100644 --- a/supertab_connect/customer/token.py +++ b/supertab_connect/customer/token.py @@ -97,7 +97,15 @@ def _evict_expired_license_xml() -> None: def _create_async_client(**kwargs: Any) -> httpx.AsyncClient: kwargs.setdefault("follow_redirects", True) kwargs.setdefault("timeout", httpx.Timeout(_DEFAULT_HTTP_TIMEOUT_SECONDS)) - kwargs.setdefault("headers", {"User-Agent": _get_sdk_user_agent()}) + + headers = kwargs.pop("headers", None) + if headers is None: + headers = {"User-Agent": _get_sdk_user_agent()} + else: + headers = dict(headers) + headers.setdefault("User-Agent", _get_sdk_user_agent()) + kwargs["headers"] = headers + return httpx.AsyncClient(**kwargs) From 42defd33dcaaa9654c517459cb3b058c55ee8277 Mon Sep 17 00:00:00 2001 From: Nikita Kovalev Date: Wed, 24 Jun 2026 15:39:40 +0200 Subject: [PATCH 8/8] Rename enforcement mode tests to match new modes --- tests/merchant/test_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/merchant/test_client.py b/tests/merchant/test_client.py index 3868db2..abf0180 100644 --- a/tests/merchant/test_client.py +++ b/tests/merchant/test_client.py @@ -253,7 +253,7 @@ async def test_handle_request_allows_missing_token_for_non_bot(): assert result == {"action": HandlerAction.ALLOW} -async def test_handle_request_blocks_bot_in_strict_mode(): +async def test_handle_request_blocks_bot_in_enforce_mode(): client = SupertabConnect( SupertabConnectConfig( api_key="sk_test_123", @@ -269,7 +269,7 @@ async def test_handle_request_blocks_bot_in_strict_mode(): assert block_result["status"] == 401 -async def test_handle_request_signals_bot_in_soft_mode(): +async def test_handle_request_signals_bot_in_observe_mode(): client = SupertabConnect( SupertabConnectConfig( api_key="sk_test_123",