diff --git a/README.md b/README.md index 2163465..e01da8f 100644 --- a/README.md +++ b/README.md @@ -85,8 +85,103 @@ asyncio.run(main()) ``` For request-level enforcement, use `SupertabConnect.handle_request()` with an -`httpx.Request`. See the `examples` directory for complete merchant and customer -examples. +`httpx.Request`. It extracts the license token from the `Authorization` header, +verifies it, optionally emits a relay analytics event, and applies bot detection +and enforcement mode when no token is present. It returns either +`{"action": HandlerAction.ALLOW, ...}` or +`{"action": HandlerAction.BLOCK, "status": ..., "body": ..., "headers": ...}`. + +`handle_request()` accepts an optional second argument, a `HandleRequestContext`, +which carries per-request signals supplied by an upstream CDN/proxy +(`source_cdn`, `client_ip`, `request_id`, `request_country`, `request_asn`, +`tls_fingerprint`, and `cdn_signals`). These are recorded on the analytics event +when present; for direct SDK use the context can be omitted. + +`cdn_signals` is a `CdnRequestSignals` object carrying the richer +spoof-detection signals that cannot be read from the portable request — TLS +fingerprinting fields, the verified-bot category, the negotiated protocol, and +so on. These are platform-specific (for example, Cloudflare exposes them on +`request.cf`), so the SDK takes them from the caller rather than extracting them +itself. Everything left unset stays `null` on the event. + +See the `examples` directory for complete merchant and customer examples. + +## Analytics + +The SDK can emit one analytics event per request to the Supertab Connect +**relay** endpoint at `{base_url}/ingest/events`. This is **off by default** — +enable it by passing `analytics_enabled=True`: + +```python +from supertab_connect import SupertabConnect, SupertabConnectConfig + +client = SupertabConnect( + SupertabConnectConfig( + api_key="stc_live_your_api_key", + analytics_enabled=True, + ) +) +``` + +**No extra credentials are required.** Analytics requests are authenticated with +your configured merchant `api_key` using `Authorization: Bearer `. The +backend derives merchant identity from the API key, so the SDK sends **no +merchant identifier** in the analytics payload. + +Each `AnalyticsEvent` captures the request id, source CDN, a normalized client +IP, the request path (with percent-encoding preserved), method, and selected +headers — plus, when an upstream CDN exposes them via `HandleRequestContext`, the +request country, ASN, TLS fingerprint, and HTTP Message Signature headers — along +with the verification/enforcement decision for the request. + +Events emit at **`schema_version: 2`** ("capture v2"), which adds raw +spoof-detection signals for query-time classification in the warehouse (the SDK +never classifies — it emits raw signals only): + +- **Portable header signals**, read directly from the request: `sec_fetch_*`, + the `sec_ch_ua*` client hints, `accept`, `host`, `has_cookies`, and + `header_names` — the lowercased, deduped, sorted set of request-header names + with edge-injected headers (`cf-*`, `fastly-*`, `cloudfront-*`, + `x-forwarded-*`, `x-real-ip`, the synthesized `Host`, …) stripped so it + reflects only what the client sent. +- **Query-string derived signals**: `query_length`, `query_param_count`, and + `query_suspicious` (a coarse exploit-marker heuristic). The raw query string + is **never** stored. +- **CDN plumbing** supplied via `HandleRequestContext.cdn_signals`: + `accept_encoding`, `http_protocol`, `tls_version`, `tls_cipher`, + `tls_client_hello_length`, `tls_client_extensions_sha1`, `as_organization`, + `client_tcp_rtt`, `cdn_verified_bot_category`, `request_priority`, and + `tls_fingerprint_ja4`. + +`accept`, `sec_ch_ua`, and `as_organization` are truncated to 512 characters. +Every capture-v2 field is fail-open: anything unavailable is emitted as `null`. + +**Fail-open:** analytics emission is fire-and-forget and can never block, slow, +or alter request handling. If emission fails, the error is swallowed and the +request proceeds exactly as it would with analytics disabled. Analytics is sent +only to the relay at `/ingest/events`, independent of billing event recording. + +Point analytics at another environment by setting `supertab_base_url` on the +config (or `SupertabConnect.set_base_url(...)`). + +For advanced use, the `AnalyticsTransport` protocol lets you inject a custom +transport (for example, an in-memory recorder in tests) via the internal +`analytics_transport` config field; `AnalyticsEvent` and `HandleRequestContext` +are exported from the package root. + +### Native Fastly logging (not applicable to the Python SDK) + +The TypeScript SDK can deliver analytics through a **native Fastly Compute +logging endpoint** (`FastlyLogTransport` / the `logEndpoint` option on +`fastlyHandleRequests`) instead of the HTTP relay, letting Fastly ship events +off-path to S3. That path is intentionally **not ported here**: Python does not +run on Fastly Compute (the `fastly:logger` built-in has no Python equivalent), +and — consistent with this SDK's design — the Python SDK does not embed CDN edge +handlers, receiving CDN-derived signals through `HandleRequestContext` instead. + +If you need to deliver analytics somewhere other than the relay (for example, to +a log shipper that forwards to S3/Tinybird), implement the `AnalyticsTransport` +protocol and pass it via the `analytics_transport` config field. ## Error Handling diff --git a/examples/merchant_handle_request.py b/examples/merchant_handle_request.py index f3708de..1f8d7d8 100644 --- a/examples/merchant_handle_request.py +++ b/examples/merchant_handle_request.py @@ -16,7 +16,7 @@ async def main() -> None: client = SupertabConnect( SupertabConnectConfig( api_key="your_api_key", - enforcement=EnforcementMode.STRICT, + enforcement=EnforcementMode.ENFORCE, debug=True, ) ) diff --git a/examples/merchant_verify_and_record_event.py b/examples/merchant_verify_and_record_event.py index 2d727a2..7dc91c4 100644 --- a/examples/merchant_verify_and_record_event.py +++ b/examples/merchant_verify_and_record_event.py @@ -14,7 +14,7 @@ async def main() -> None: client = SupertabConnect( SupertabConnectConfig( api_key="your_api_key", - enforcement=EnforcementMode.SOFT, + enforcement=EnforcementMode.OBSERVE, debug=True, ) ) diff --git a/supertab_connect/__init__.py b/supertab_connect/__init__.py index 6c12aca..5b7c670 100644 --- a/supertab_connect/__init__.py +++ b/supertab_connect/__init__.py @@ -1,5 +1,10 @@ """Supertab Connect SDK.""" +from supertab_connect.analytics.types import ( + AnalyticsEvent, + AnalyticsTransport, + CdnRequestSignals, +) from supertab_connect.customer.token import obtain_license_token from supertab_connect.exceptions import SupertabConnectError from supertab_connect.merchant.bots import default_bot_detector @@ -7,6 +12,7 @@ from supertab_connect.merchant.license import verify_license_token from supertab_connect.types import ( EnforcementMode, + HandleRequestContext, HandlerAction, HandlerResult, RSLVerificationResult, @@ -15,7 +21,11 @@ ) __all__ = [ + "AnalyticsEvent", + "AnalyticsTransport", + "CdnRequestSignals", "EnforcementMode", + "HandleRequestContext", "HandlerAction", "HandlerResult", "RSLVerificationResult", diff --git a/supertab_connect/analytics/__init__.py b/supertab_connect/analytics/__init__.py new file mode 100644 index 0000000..45933eb --- /dev/null +++ b/supertab_connect/analytics/__init__.py @@ -0,0 +1,43 @@ +"""Relay analytics for Supertab Connect (mirrors the TS SDK `analytics/` module).""" + +from supertab_connect.analytics.build_analytics_event import ( + BuildAnalyticsEventContext, + build_analytics_event, +) +from supertab_connect.analytics.ip import normalize_client_ip +from supertab_connect.analytics.transport import ( + ANALYTICS_EVENTS_PATH, + HttpAnalyticsTransport, + NoopAnalyticsTransport, + aclose_http_client as aclose_analytics_http_client, +) +from supertab_connect.analytics.types import ( + SCHEMA_VERSION, + TOKEN_OUTCOME_BY_REASON, + AnalyticsEvent, + AnalyticsTransport, + CdnRequestSignals, + Decision, + FinalAction, + SourceCdn, + TokenOutcome, +) + +__all__ = [ + "ANALYTICS_EVENTS_PATH", + "SCHEMA_VERSION", + "TOKEN_OUTCOME_BY_REASON", + "AnalyticsEvent", + "AnalyticsTransport", + "BuildAnalyticsEventContext", + "CdnRequestSignals", + "Decision", + "FinalAction", + "HttpAnalyticsTransport", + "NoopAnalyticsTransport", + "SourceCdn", + "TokenOutcome", + "aclose_analytics_http_client", + "build_analytics_event", + "normalize_client_ip", +] diff --git a/supertab_connect/analytics/build_analytics_event.py b/supertab_connect/analytics/build_analytics_event.py new file mode 100644 index 0000000..25275b1 --- /dev/null +++ b/supertab_connect/analytics/build_analytics_event.py @@ -0,0 +1,172 @@ +"""Build a relay AnalyticsEvent from a request + decision (mirrors TS `buildAnalyticsEvent.ts`).""" + +import uuid +from dataclasses import dataclass +from datetime import datetime, timezone +from urllib.parse import unquote + +from httpx import Request + +from supertab_connect.analytics.ip import normalize_client_ip +from supertab_connect.analytics.types import ( + SCHEMA_VERSION, + AnalyticsEvent, + CdnRequestSignals, + Decision, + EnforcementWire, + SourceCdn, +) +from supertab_connect.types import EnforcementMode + +# Defensive cap on client-controlled free-form strings, applied at the edge (mirrored by the relay). +MAX_FIELD_LENGTH = 512 + +# Edge-injected headers are CDN artifacts, not client signals — strip them so ``header_names`` +# reflects only what the client actually sent. Covers all three CDNs: Cloudflare (``cf-*``), +# Fastly (``fastly-*``), CloudFront (``cloudfront-*``), the shared ``x-forwarded-*`` / ``x-real-ip``, +# and the SDK's own routing header ``x-original-request-url``. +_EDGE_HEADER_PREFIXES = ("cf-", "fastly-", "cloudfront-", "x-forwarded-") +# ``host`` is included here because httpx synthesizes a Host header on Request construction; the JS +# fetch ``Request`` hides it as a forbidden header, so the TS SDK never emits it in ``header_names``. +# Stripping it keeps the cross-SDK header-name set consistent (host is captured in its own field). +_EDGE_HEADER_NAMES = frozenset({"x-real-ip", "x-original-request-url", "host"}) + +# Mechanical exploit markers for the query-string heuristic, matched case-insensitively against the +# raw and URL-decoded query. A coarse signal only — real classification stays query-time in the +# warehouse. +_SUSPICIOUS_QUERY_MARKERS = ( + "../", + "..\\", + "union select", + " str: + """Format as ``YYYY-MM-DDTHH:MM:SS.mmmZ`` to match the TS `Date.toISOString()` wire form.""" + return value.astimezone(timezone.utc).isoformat(timespec="milliseconds").replace("+00:00", "Z") + + +def _safe_pathname(request: Request) -> str: + """Return the request path with percent-encoding preserved. + + ``request.url.path`` percent-*decodes* (``/a%2Fb`` → ``/a/b``), which loses encoded path + semantics. We read ``raw_path`` (``path[?query]`` bytes), drop the query, and decode without + URL-decoding — matching the TS SDK's ``new URL(request.url).pathname``. + """ + path_bytes = request.url.raw_path.split(b"?", 1)[0] + return path_bytes.decode("utf-8", "replace") + + +def _enforcement_to_wire(mode: EnforcementMode) -> EnforcementWire: + # EnforcementMode values are already the wire strings ("observe"/"enforce"/"disabled"). + return mode.value # type: ignore[return-value] + + +def _truncate(value: str | None, max_length: int = MAX_FIELD_LENGTH) -> str | None: + if value is None: + return None + return value[:max_length] if len(value) > max_length else value + + +def _is_edge_header(name: str) -> bool: + if name in _EDGE_HEADER_NAMES: + return True + return any(name.startswith(prefix) for prefix in _EDGE_HEADER_PREFIXES) + + +def _collect_header_names(request: Request) -> list[str]: + names = {name.lower() for name in request.headers.keys()} + return sorted(name for name in names if not _is_edge_header(name)) + + +def _query_signals(request: Request) -> tuple[int, int, bool]: + # request.url.query is the raw, percent-encoded query bytes (no leading "?"), matching the + # TS SDK's ``url.search.slice(1)``. The raw query itself is never stored on the event. + raw = request.url.query.decode("utf-8", "replace") + params = [p for p in raw.split("&") if p] if raw else [] + + haystack = raw.lower() + "\n" + unquote(raw).lower() + suspicious = any(marker in haystack for marker in _SUSPICIOUS_QUERY_MARKERS) + + return len(raw), len(params), suspicious + + +def build_analytics_event( + request: Request, + decision: Decision, + context: BuildAnalyticsEventContext, +) -> AnalyticsEvent: + headers = request.headers + timestamp = context.timestamp if context.timestamp is not None else datetime.now(timezone.utc) + request_id = context.request_id if context.request_id is not None else str(uuid.uuid4()) + query_length, query_param_count, query_suspicious = _query_signals(request) + cdn = context.cdn_signals if context.cdn_signals is not None else CdnRequestSignals() + + return AnalyticsEvent( + timestamp=_iso_utc(timestamp), + request_id=request_id, + schema_version=SCHEMA_VERSION, + source_cdn=context.source_cdn, + user_agent=headers.get("user-agent", ""), + client_ip=normalize_client_ip(context.client_ip), + path=_safe_pathname(request), + method=request.method, + referer=headers.get("referer", ""), + accept_language=headers.get("accept-language", ""), + request_country=context.request_country, + request_asn=context.request_asn, + tls_fingerprint=context.tls_fingerprint, + has_token=decision.has_token, + token_outcome=decision.token_outcome, + final_action=decision.final_action, + enforcement_mode=_enforcement_to_wire(decision.enforcement_mode), + signature_agent=headers.get("signature-agent"), + signature_input=headers.get("signature-input"), + signature=headers.get("signature"), + # --- Capture v2: portable header signals --- + sec_fetch_mode=headers.get("sec-fetch-mode"), + sec_fetch_site=headers.get("sec-fetch-site"), + sec_fetch_dest=headers.get("sec-fetch-dest"), + sec_fetch_user=headers.get("sec-fetch-user"), + sec_ch_ua=_truncate(headers.get("sec-ch-ua")), + sec_ch_ua_mobile=headers.get("sec-ch-ua-mobile"), + sec_ch_ua_platform=headers.get("sec-ch-ua-platform"), + accept=_truncate(headers.get("accept")), + # httpx synthesizes the Host header from the URL, so this is effectively the parsed host. + host=headers.get("host") or request.url.host or None, + has_cookies="cookie" in headers, + header_names=_collect_header_names(request), + # Query-string derived signals (raw query never stored). + query_length=query_length, + query_param_count=query_param_count, + query_suspicious=query_suspicious, + # --- Capture v2: CDN plumbing (passthrough from the handler context) --- + accept_encoding=cdn.accept_encoding, + http_protocol=cdn.http_protocol, + tls_version=cdn.tls_version, + tls_cipher=cdn.tls_cipher, + tls_client_hello_length=cdn.tls_client_hello_length, + tls_client_extensions_sha1=cdn.tls_client_extensions_sha1, + as_organization=_truncate(cdn.as_organization), + client_tcp_rtt=cdn.client_tcp_rtt, + cdn_verified_bot_category=cdn.cdn_verified_bot_category, + request_priority=cdn.request_priority, + tls_fingerprint_ja4=cdn.tls_fingerprint_ja4, + ) diff --git a/supertab_connect/analytics/ip.py b/supertab_connect/analytics/ip.py new file mode 100644 index 0000000..417f2c3 --- /dev/null +++ b/supertab_connect/analytics/ip.py @@ -0,0 +1,27 @@ +"""Client-IP normalization (mirrors TS `analytics/ip.ts`). + +IPv4 addresses are mapped to their IPv6-mapped form (``::ffff:``); valid IPv6 +addresses pass through unchanged; anything else collapses to the unspecified address. +""" + +import ipaddress + +UNSPECIFIED = "::" + + +def normalize_client_ip(raw: str | None) -> str: + if not raw: + return UNSPECIFIED + trimmed = raw.strip() + if not trimmed: + return UNSPECIFIED + + try: + parsed = ipaddress.ip_address(trimmed) + except ValueError: + return UNSPECIFIED + + if parsed.version == 4: + return f"::ffff:{trimmed}" + # IPv6 passes through unchanged (the original textual form, not a re-compressed one). + return trimmed diff --git a/supertab_connect/analytics/transport.py b/supertab_connect/analytics/transport.py new file mode 100644 index 0000000..15b8a05 --- /dev/null +++ b/supertab_connect/analytics/transport.py @@ -0,0 +1,103 @@ +"""Analytics transports (mirrors TS `analytics/transport.ts`). + +The HTTP transport is fire-and-forget: ``emit`` schedules the POST on the running +event loop and returns immediately, never blocking the request path or raising. +""" + +import asyncio +from dataclasses import asdict + +import httpx + +from supertab_connect._version import _get_sdk_user_agent +from supertab_connect.analytics.types import AnalyticsEvent, AnalyticsTransport +from supertab_connect.common import debug_log, error_log + +ANALYTICS_EVENTS_PATH = "/ingest/events" + +# Hold strong references to in-flight emit tasks so they are not garbage-collected +# before they finish (asyncio only keeps weak references to scheduled tasks). +_background_tasks: set[asyncio.Task] = set() + +_http_client: httpx.AsyncClient | None = None + + +def _get_http_client() -> httpx.AsyncClient: + global _http_client + if _http_client is None or _http_client.is_closed: + _http_client = httpx.AsyncClient(headers={"User-Agent": _get_sdk_user_agent()}) + return _http_client + + +async def aclose_http_client() -> None: + global _http_client + if _http_client is not None and not _http_client.is_closed: + await _http_client.aclose() + _http_client = None + + +class NoopAnalyticsTransport: + """A transport that discards every event. Used when analytics is disabled.""" + + def emit(self, event: AnalyticsEvent) -> None: + # intentional no-op + return None + + +class HttpAnalyticsTransport: + """Posts events to the Supertab Connect relay, fire-and-forget.""" + + def __init__(self, *, url: str, api_key: str, debug: bool = False) -> None: + self._url = url + self._api_key = api_key + self._debug = debug + + def emit(self, event: AnalyticsEvent) -> None: + try: + loop = asyncio.get_running_loop() + except RuntimeError: + # No running event loop to schedule onto; analytics is best-effort, so skip. + debug_log(self._debug, "Skipping analytics emit: no running event loop") + return None + + task = loop.create_task(self._send(event)) + _background_tasks.add(task) + task.add_done_callback(self._on_task_done) + return None + + def _on_task_done(self, task: asyncio.Task) -> None: + # Backstop: drop the reference and retrieve any exception so it never surfaces as an + # "exception was never retrieved" warning, even if _send's own guard is somehow bypassed. + _background_tasks.discard(task) + if task.cancelled(): + return + error = task.exception() + if error is not None: + error_log(self._debug, f"analytics emit task error: {error}") + + async def _send(self, event: AnalyticsEvent) -> None: + # Fail-open: analytics must never block, slow, or alter request handling, so every error + # (transport, serialization, anything) is swallowed here rather than propagating. + try: + response = await _get_http_client().post( + self._url, + json=asdict(event), + headers={ + "Authorization": f"Bearer {self._api_key}", + "Content-Type": "application/json", + }, + ) + if not response.is_success: + debug_log(self._debug, f"analytics emit failed: {response.status_code}") + except Exception as error: # noqa: BLE001 — fail-open guarantee, see comment above + error_log(self._debug, f"analytics emit error: {error}") + + +# Re-exported so callers can rely on structural typing without importing from `types`. +__all__ = [ + "ANALYTICS_EVENTS_PATH", + "AnalyticsTransport", + "HttpAnalyticsTransport", + "NoopAnalyticsTransport", + "aclose_http_client", +] diff --git a/supertab_connect/analytics/types.py b/supertab_connect/analytics/types.py new file mode 100644 index 0000000..07bce4b --- /dev/null +++ b/supertab_connect/analytics/types.py @@ -0,0 +1,144 @@ +"""Analytics event schema and transport protocol (mirrors TS `analytics/types.ts`).""" + +from dataclasses import dataclass +from typing import Literal, Protocol, runtime_checkable + +from supertab_connect.types import EnforcementMode, LicenseTokenInvalidReason + +SCHEMA_VERSION = 2 + +SourceCdn = Literal["cloudflare", "fastly", "cloudfront"] + +TokenOutcome = Literal[ + "absent", + "valid", + "expired", + "invalid_signature", + "invalid_audience", + "invalid_resource", + "invalid_issuer", + "malformed", + "server_error", + "not_validated", +] + +FinalAction = Literal["allow", "observe", "block"] + +EnforcementWire = Literal["observe", "enforce", "disabled"] + + +@dataclass(frozen=True) +class Decision: + has_token: bool + token_outcome: TokenOutcome + final_action: FinalAction + enforcement_mode: EnforcementMode + + +@dataclass(frozen=True) +class AnalyticsEvent: + timestamp: str + request_id: str + schema_version: int + # None when the request did not pass through a CDN (e.g. invoked directly via the SDK). + source_cdn: SourceCdn | None + + user_agent: str + client_ip: str + path: str + method: str + referer: str + accept_language: str + + # Classification signals — supplied by the CDN layer (platform-specific). None when not exposed. + request_country: str | None + request_asn: int | None + tls_fingerprint: str | None + + has_token: bool + token_outcome: TokenOutcome + final_action: FinalAction + enforcement_mode: EnforcementWire + + # HTTP Message Signature headers — platform-agnostic, read directly from request headers. + signature_agent: str | None + signature_input: str | None + signature: str | None + + # --- Capture v2 (schema_version 2): spoof-detection signals --- + # Portable header signals — read directly from request headers (every CDN). + sec_fetch_mode: str | None + sec_fetch_site: str | None + sec_fetch_dest: str | None + sec_fetch_user: str | None + sec_ch_ua: str | None + sec_ch_ua_mobile: str | None + sec_ch_ua_platform: str | None + accept: str | None + host: str | None + has_cookies: bool | None + # Lowercased, deduped, sorted request-header names with edge-injected headers + # (cf-*, x-forwarded-*, x-real-ip, …) and the synthesized Host stripped. Non-nullable: [] when none. + header_names: list[str] + + # Query-string derived signals. The raw query is NEVER stored (PII gate → option b); + # only these mechanical derivations are emitted. + query_length: int | None + query_param_count: int | None + query_suspicious: bool | None + + # CDN plumbing — not derivable from the portable Request. Supplied per platform by the + # caller via HandleRequestContext; null when not exposed. + accept_encoding: str | None + http_protocol: str | None + tls_version: str | None + tls_cipher: str | None + tls_client_hello_length: int | None + tls_client_extensions_sha1: str | None + as_organization: str | None + client_tcp_rtt: int | None + cdn_verified_bot_category: str | None + request_priority: str | None + tls_fingerprint_ja4: str | None + + +@dataclass(frozen=True) +class CdnRequestSignals: + """CDN-supplied request signals that cannot be read from the portable httpx ``Request``. + + Extracted per platform by the caller (Cloudflare ``request.cf``, Fastly headers, …) and + threaded through ``HandleRequestContext``. Field names match the wire (snake_case) contract, + so they pass straight through onto the event. + """ + + accept_encoding: str | None = None + http_protocol: str | None = None + tls_version: str | None = None + tls_cipher: str | None = None + tls_client_hello_length: int | None = None + tls_client_extensions_sha1: str | None = None + as_organization: str | None = None + client_tcp_rtt: int | None = None + cdn_verified_bot_category: str | None = None + request_priority: str | None = None + tls_fingerprint_ja4: str | None = None + + +@runtime_checkable +class AnalyticsTransport(Protocol): + def emit(self, event: AnalyticsEvent) -> None: + """Emit an analytics event. Implementations must never block the request path or raise.""" + ... + + +TOKEN_OUTCOME_BY_REASON: dict[LicenseTokenInvalidReason, TokenOutcome] = { + LicenseTokenInvalidReason.MISSING_TOKEN: "absent", + LicenseTokenInvalidReason.EXPIRED: "expired", + LicenseTokenInvalidReason.SIGNATURE_VERIFICATION_FAILED: "invalid_signature", + LicenseTokenInvalidReason.INVALID_AUDIENCE: "invalid_audience", + LicenseTokenInvalidReason.INVALID_ISSUER: "invalid_issuer", + LicenseTokenInvalidReason.INVALID_HEADER: "malformed", + LicenseTokenInvalidReason.INVALID_PAYLOAD: "malformed", + LicenseTokenInvalidReason.INVALID_ALG: "malformed", + LicenseTokenInvalidReason.SERVER_ERROR: "server_error", +} diff --git a/supertab_connect/customer/token.py b/supertab_connect/customer/token.py index 2776cb9..69841f2 100644 --- a/supertab_connect/customer/token.py +++ b/supertab_connect/customer/token.py @@ -15,6 +15,7 @@ from cryptography.hazmat.primitives.asymmetric import ec, rsa from cryptography.hazmat.primitives.serialization import load_pem_private_key +from supertab_connect._version import _get_sdk_user_agent from supertab_connect.common import debug_log, error_log from supertab_connect.exceptions import SupertabConnectError from supertab_connect.customer.content_matcher import _find_best_matching_content @@ -96,6 +97,15 @@ def _evict_expired_license_xml() -> None: def _create_async_client(**kwargs: Any) -> httpx.AsyncClient: kwargs.setdefault("follow_redirects", True) kwargs.setdefault("timeout", httpx.Timeout(_DEFAULT_HTTP_TIMEOUT_SECONDS)) + + headers = kwargs.pop("headers", None) + if headers is None: + headers = {"User-Agent": _get_sdk_user_agent()} + else: + headers = dict(headers) + headers.setdefault("User-Agent", _get_sdk_user_agent()) + kwargs["headers"] = headers + return httpx.AsyncClient(**kwargs) diff --git a/supertab_connect/merchant/client.py b/supertab_connect/merchant/client.py index 0e9f91a..870f6d1 100644 --- a/supertab_connect/merchant/client.py +++ b/supertab_connect/merchant/client.py @@ -5,6 +5,24 @@ from httpx import Request +from supertab_connect.analytics.build_analytics_event import ( + BuildAnalyticsEventContext, + build_analytics_event, +) +from supertab_connect.analytics.transport import ( + ANALYTICS_EVENTS_PATH, + HttpAnalyticsTransport, + NoopAnalyticsTransport, +) +from supertab_connect.analytics.transport import aclose_http_client as aclose_analytics_http_client +from supertab_connect.analytics.types import ( + TOKEN_OUTCOME_BY_REASON, + AnalyticsTransport, + Decision, + FinalAction, + TokenOutcome, +) +from supertab_connect.common import error_log from supertab_connect.merchant.events import aclose_http_client as aclose_events_http_client from supertab_connect.merchant.license import ( build_block_result, @@ -16,6 +34,7 @@ from supertab_connect.types import ( BotDetector, EnforcementMode, + HandleRequestContext, HandlerAction, HandlerResult, InvalidLicenseToken, @@ -57,9 +76,21 @@ def __init__(self, config: SupertabConnectConfig, reset: bool = False) -> None: self.bot_detector = config.bot_detector self.debug = config.debug self._base_url_override = config.supertab_base_url + self._analytics_transport = self._build_analytics_transport(config) self._initialized = True type(self)._instance = self + def _build_analytics_transport(self, config: SupertabConnectConfig) -> AnalyticsTransport: + if config.analytics_transport is not None: + return config.analytics_transport + if not config.analytics_enabled: + return NoopAnalyticsTransport() + return HttpAnalyticsTransport( + url=f"{self.base_url.rstrip('/')}{ANALYTICS_EVENTS_PATH}", + api_key=config.api_key, + debug=config.debug, + ) + @classmethod def reset_instance(cls) -> None: cls._instance = None @@ -79,6 +110,7 @@ def base_url(self) -> str: async def aclose(self) -> None: await aclose_events_http_client() await aclose_jwks_http_client() + await aclose_analytics_http_client() async def __aenter__(self) -> "SupertabConnect": return self @@ -138,17 +170,61 @@ def _detect_bot(self, request: Request) -> bool: return detector(request) - async def handle_request(self, request: Request) -> HandlerResult: + def _emit_analytics( + self, + request: Request, + context: HandleRequestContext | None, + *, + has_token: bool, + token_outcome: TokenOutcome, + final_action: FinalAction, + ) -> None: + try: + event = build_analytics_event( + request, + Decision( + has_token=has_token, + token_outcome=token_outcome, + final_action=final_action, + enforcement_mode=self.enforcement, + ), + BuildAnalyticsEventContext( + source_cdn=context.source_cdn if context else None, + request_id=context.request_id if context else None, + client_ip=context.client_ip if context else None, + request_country=context.request_country if context else None, + request_asn=context.request_asn if context else None, + tls_fingerprint=context.tls_fingerprint if context else None, + cdn_signals=context.cdn_signals if context else None, + ), + ) + self._analytics_transport.emit(event) + except Exception as error: # noqa: BLE001 — analytics must never break request handling + error_log(self.debug, f"failed to build/emit analytics event: {error}") + + async def handle_request(self, request: Request, context: HandleRequestContext | None = None) -> HandlerResult: auth = request.headers.get("authorization", "") token = None auth_parts = auth.split(None, 1) if len(auth_parts) == 2 and auth_parts[0].lower() == "license": token = auth_parts[1] + has_token = token is not None url = str(request.url) user_agent = request.headers.get("user-agent", "unknown") + # Token present → validate, regardless of bot detection — except in DISABLED + # mode, which short-circuits to ALLOW without verification. if token: if self.enforcement is EnforcementMode.DISABLED: + # DISABLED short-circuits to ALLOW without verifying the token, so we cannot + # honestly claim "valid"; emit "not_validated" so it is not counted as licensed. + self._emit_analytics( + request, + context, + has_token=has_token, + token_outcome="not_validated", + final_action="allow", + ) return {"action": HandlerAction.ALLOW} verification = await verify_and_record_event( @@ -161,22 +237,64 @@ async def handle_request(self, request: Request) -> HandlerResult: request_headers=dict(request.headers.items()), ) if isinstance(verification, InvalidLicenseToken): + self._emit_analytics( + request, + context, + has_token=has_token, + token_outcome=TOKEN_OUTCOME_BY_REASON.get(verification.reason, "malformed"), + final_action="block", + ) return build_block_result( reason=verification.reason, error=verification.error, request_url=url, ) + self._emit_analytics( + request, + context, + has_token=has_token, + token_outcome="valid", + final_action="allow", + ) return {"action": HandlerAction.ALLOW} if not self._detect_bot(request): + self._emit_analytics( + request, + context, + has_token=has_token, + token_outcome="absent", + final_action="allow", + ) return {"action": HandlerAction.ALLOW} - if self.enforcement is EnforcementMode.STRICT: + if self.enforcement is EnforcementMode.ENFORCE: + self._emit_analytics( + request, + context, + has_token=has_token, + token_outcome="absent", + final_action="block", + ) return build_block_result( reason=LicenseTokenInvalidReason.MISSING_TOKEN, error="Authorization header missing or malformed", request_url=url, ) - if self.enforcement is EnforcementMode.SOFT: + if self.enforcement is EnforcementMode.OBSERVE: + self._emit_analytics( + request, + context, + has_token=has_token, + token_outcome="absent", + final_action="observe", + ) return build_signal_result(url) + self._emit_analytics( + request, + context, + has_token=has_token, + token_outcome="absent", + final_action="allow", + ) return {"action": HandlerAction.ALLOW} diff --git a/supertab_connect/merchant/jwks.py b/supertab_connect/merchant/jwks.py index 1115aa6..dff9563 100644 --- a/supertab_connect/merchant/jwks.py +++ b/supertab_connect/merchant/jwks.py @@ -5,6 +5,7 @@ import httpx +from supertab_connect._version import _get_sdk_user_agent from supertab_connect.common import debug_log, error_log from supertab_connect.exceptions import JwksKeyNotFoundError @@ -17,7 +18,7 @@ def _get_http_client() -> httpx.AsyncClient: global _http_client if _http_client is None or _http_client.is_closed: - _http_client = httpx.AsyncClient() + _http_client = httpx.AsyncClient(headers={"User-Agent": _get_sdk_user_agent()}) return _http_client diff --git a/supertab_connect/types.py b/supertab_connect/types.py index 54a0bf4..f9e0f9d 100644 --- a/supertab_connect/types.py +++ b/supertab_connect/types.py @@ -3,15 +3,18 @@ from collections.abc import Callable from dataclasses import dataclass, field from enum import StrEnum -from typing import Any, Literal, NotRequired, TypeAlias, TypedDict +from typing import TYPE_CHECKING, Any, Literal, NotRequired, TypeAlias, TypedDict from httpx import Request +if TYPE_CHECKING: + from supertab_connect.analytics.types import AnalyticsTransport, CdnRequestSignals + class EnforcementMode(StrEnum): DISABLED = "disabled" - SOFT = "soft" - STRICT = "strict" + OBSERVE = "observe" + ENFORCE = "enforce" class LicenseTokenInvalidReason(StrEnum): @@ -46,10 +49,32 @@ class UsageType(StrEnum): @dataclass(frozen=True) class SupertabConnectConfig: api_key: str - enforcement: EnforcementMode = EnforcementMode.SOFT + enforcement: EnforcementMode = EnforcementMode.OBSERVE supertab_base_url: str | None = None bot_detector: BotDetector | None = None debug: bool = False + # Enables analytics emission to the Supertab Connect relay. Default: False. + analytics_enabled: bool = False + # Internal dependency-injection seam: overrides the default HttpAnalyticsTransport when provided. + # Used by tests to inject in-memory transports. Not a merchant-facing option. + analytics_transport: "AnalyticsTransport | None" = None + + +@dataclass(frozen=True) +class HandleRequestContext: + """Optional CDN-supplied request context for `handle_request`. + + All fields are omitted (None) for direct SDK invocation that did not pass through a CDN. + """ + + source_cdn: Literal["cloudflare", "fastly", "cloudfront"] | None = None + client_ip: str | None = None + request_id: str | None = None + request_country: str | None = None + request_asn: int | None = None + tls_fingerprint: str | None = None + # Capture-v2 CDN plumbing not derivable from the portable Request (e.g. Cloudflare request.cf). + cdn_signals: "CdnRequestSignals | None" = None class AllowHandlerResult(TypedDict): diff --git a/tests/analytics/__init__.py b/tests/analytics/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/analytics/conftest.py b/tests/analytics/conftest.py new file mode 100644 index 0000000..becf33a --- /dev/null +++ b/tests/analytics/conftest.py @@ -0,0 +1,13 @@ +"""Shared fixtures for analytics tests.""" + +import pytest + +from supertab_connect.analytics import transport as transport_module + + +@pytest.fixture(autouse=True) +async def _reset_analytics_http_client(): + """Reset the module-level analytics http client around each test.""" + await transport_module.aclose_http_client() + yield + await transport_module.aclose_http_client() diff --git a/tests/analytics/test_build_analytics_event.py b/tests/analytics/test_build_analytics_event.py new file mode 100644 index 0000000..3af1124 --- /dev/null +++ b/tests/analytics/test_build_analytics_event.py @@ -0,0 +1,361 @@ +"""Tests for building relay analytics events.""" + +from dataclasses import asdict, replace +from datetime import datetime, timezone + +import httpx +import pytest + +from supertab_connect.analytics.build_analytics_event import ( + BuildAnalyticsEventContext, + build_analytics_event, +) +from supertab_connect.analytics.types import SCHEMA_VERSION, CdnRequestSignals, Decision +from supertab_connect.types import EnforcementMode + +FIXED_TIME = datetime(2026, 4, 29, 12, 0, 0, tzinfo=timezone.utc) +REQUEST_ID = "req-123" + +BASE_DECISION = Decision( + has_token=False, + token_outcome="absent", + final_action="allow", + enforcement_mode=EnforcementMode.OBSERVE, +) + + +def _make_request( + *, + url: str = "https://example.com/articles/foo?x=1", + method: str = "GET", + headers: dict[str, str] | None = None, +) -> httpx.Request: + return httpx.Request(method, url, headers=headers or {}) + + +def _ctx(**extra) -> BuildAnalyticsEventContext: + base = BuildAnalyticsEventContext(request_id=REQUEST_ID, source_cdn="cloudflare", timestamp=FIXED_TIME) + return replace(base, **extra) + + +def test_returns_event_matching_relay_shape(): + request = _make_request( + headers={ + "user-agent": "Mozilla/5.0", + "referer": "https://example.com/", + "accept-language": "en-US,en;q=0.9", + } + ) + + event = build_analytics_event(request, BASE_DECISION, _ctx(client_ip="1.2.3.4")) + + assert asdict(event) == { + "timestamp": "2026-04-29T12:00:00.000Z", + "request_id": REQUEST_ID, + "schema_version": SCHEMA_VERSION, + "source_cdn": "cloudflare", + "user_agent": "Mozilla/5.0", + "client_ip": "::ffff:1.2.3.4", + "path": "/articles/foo", + "method": "GET", + "referer": "https://example.com/", + "accept_language": "en-US,en;q=0.9", + "request_country": None, + "request_asn": None, + "tls_fingerprint": None, + "has_token": False, + "token_outcome": "absent", + "final_action": "allow", + "enforcement_mode": "observe", + "signature_agent": None, + "signature_input": None, + "signature": None, + # Capture v2 — portable header signals (none of these headers were sent). + "sec_fetch_mode": None, + "sec_fetch_site": None, + "sec_fetch_dest": None, + "sec_fetch_user": None, + "sec_ch_ua": None, + "sec_ch_ua_mobile": None, + "sec_ch_ua_platform": None, + "accept": None, + "host": "example.com", + "has_cookies": False, + "header_names": ["accept-language", "referer", "user-agent"], + "query_length": 3, + "query_param_count": 1, + "query_suspicious": False, + # Capture v2 — CDN plumbing (no cdn_signals in context → None). + "accept_encoding": None, + "http_protocol": None, + "tls_version": None, + "tls_cipher": None, + "tls_client_hello_length": None, + "tls_client_extensions_sha1": None, + "as_organization": None, + "client_tcp_rtt": None, + "cdn_verified_bot_category": None, + "request_priority": None, + "tls_fingerprint_ja4": None, + } + + +def test_passes_through_classification_signals(): + event = build_analytics_event( + _make_request(), + BASE_DECISION, + _ctx(request_country="DE", request_asn=3320, tls_fingerprint="abc123"), + ) + assert event.request_country == "DE" + assert event.request_asn == 3320 + assert event.tls_fingerprint == "abc123" + + +def test_classification_signals_default_to_none(): + event = build_analytics_event(_make_request(), BASE_DECISION, _ctx()) + assert event.request_country is None + assert event.request_asn is None + assert event.tls_fingerprint is None + + +def test_reads_signature_headers_from_request(): + request = _make_request( + headers={ + "signature-agent": "https://agent.example", + "signature-input": "sig1=(...)", + "signature": "sig1=:abc:", + } + ) + event = build_analytics_event(request, BASE_DECISION, _ctx()) + assert event.signature_agent == "https://agent.example" + assert event.signature_input == "sig1=(...)" + assert event.signature == "sig1=:abc:" + + +def test_signature_headers_default_to_none(): + event = build_analytics_event(_make_request(), BASE_DECISION, _ctx()) + assert event.signature_agent is None + assert event.signature_input is None + assert event.signature is None + + +@pytest.mark.parametrize("final_action", ["allow", "observe", "block"]) +def test_passes_through_final_action(final_action): + decision = replace(BASE_DECISION, final_action=final_action) + event = build_analytics_event(_make_request(), decision, _ctx()) + assert event.final_action == final_action + + +@pytest.mark.parametrize( + ("mode", "wire"), + [ + (EnforcementMode.OBSERVE, "observe"), + (EnforcementMode.ENFORCE, "enforce"), + (EnforcementMode.DISABLED, "disabled"), + ], +) +def test_serializes_enforcement_mode_to_wire(mode, wire): + decision = replace(BASE_DECISION, enforcement_mode=mode) + event = build_analytics_event(_make_request(), decision, _ctx()) + assert event.enforcement_mode == wire + + +def test_source_cdn_is_none_for_direct_sdk_invocation(): + event = build_analytics_event(_make_request(), BASE_DECISION, BuildAnalyticsEventContext()) + assert event.source_cdn is None + + +def test_generates_request_id_when_absent(): + event = build_analytics_event(_make_request(), BASE_DECISION, BuildAnalyticsEventContext(timestamp=FIXED_TIME)) + assert event.request_id # a uuid4 string + + +def test_path_preserves_percent_encoding(): + # request.url.path would decode %2F->"/" and %20->" "; the event must keep encoded semantics. + request = _make_request(url="https://example.com/a%2Fb/c%20d?x=1") + event = build_analytics_event(request, BASE_DECISION, _ctx()) + assert event.path == "/a%2Fb/c%20d" + + +def test_path_drops_query_string(): + request = _make_request(url="https://example.com/articles/foo?x=1&y=2") + event = build_analytics_event(request, BASE_DECISION, _ctx()) + assert event.path == "/articles/foo" + + +def test_missing_headers_default_to_empty_strings(): + event = build_analytics_event(_make_request(), BASE_DECISION, _ctx()) + assert event.user_agent == "" + assert event.referer == "" + assert event.accept_language == "" + + +# --- Capture v2 ------------------------------------------------------------------------------- + +BROWSER_HEADERS = { + "user-agent": "Mozilla/5.0", + "sec-fetch-mode": "navigate", + "sec-fetch-site": "none", + "sec-fetch-dest": "document", + "sec-fetch-user": "?1", + "sec-ch-ua": '"Chromium";v="120", "Not(A:Brand";v="24"', + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": '"macOS"', + "accept": "text/html", + "cookie": "session=abc", +} + + +def test_captures_sec_fetch_and_client_hints_from_browser_request(): + event = build_analytics_event(_make_request(headers=BROWSER_HEADERS), BASE_DECISION, _ctx()) + assert event.sec_fetch_mode == "navigate" + assert event.sec_fetch_site == "none" + assert event.sec_fetch_dest == "document" + assert event.sec_fetch_user == "?1" + assert event.sec_ch_ua == '"Chromium";v="120", "Not(A:Brand";v="24"' + assert event.sec_ch_ua_mobile == "?0" + assert event.sec_ch_ua_platform == '"macOS"' + assert event.accept == "text/html" + assert event.has_cookies is True + + +def test_curl_like_request_carries_no_browser_signals(): + event = build_analytics_event(_make_request(headers={"user-agent": "curl/8.0"}), BASE_DECISION, _ctx()) + assert event.sec_fetch_mode is None + assert event.sec_fetch_site is None + assert event.sec_fetch_dest is None + assert event.sec_fetch_user is None + assert event.sec_ch_ua is None + assert event.sec_ch_ua_mobile is None + assert event.sec_ch_ua_platform is None + assert event.has_cookies is False + + +def test_host_falls_back_to_url_host(): + event = build_analytics_event(_make_request(url="https://pub.example.com/a"), BASE_DECISION, _ctx()) + assert event.host == "pub.example.com" + + +def test_truncates_accept_and_sec_ch_ua_to_512_chars(): + long = "a" * 600 + event = build_analytics_event(_make_request(headers={"accept": long, "sec-ch-ua": long}), BASE_DECISION, _ctx()) + assert event.accept == "a" * 512 + assert event.sec_ch_ua == "a" * 512 + + +def test_header_names_lowercased_deduped_sorted(): + event = build_analytics_event( + _make_request(headers={"User-Agent": "x", "Accept": "y", "Referer": "z"}), + BASE_DECISION, + _ctx(), + ) + assert event.header_names == ["accept", "referer", "user-agent"] + + +def test_header_names_strips_edge_injected_headers_across_all_cdns(): + event = build_analytics_event( + _make_request( + headers={ + "user-agent": "x", + # Cloudflare + "cf-connecting-ip": "1.2.3.4", + "cf-ray": "abc", + # Fastly + "fastly-client-ip": "1.2.3.4", + "fastly-client-ja3": "deadbeef", + # CloudFront + "cloudfront-viewer-country": "DE", + "cloudfront-viewer-ja3-fingerprint": "abc", + # shared / SDK routing / synthesized + "x-forwarded-for": "1.2.3.4", + "x-real-ip": "1.2.3.4", + "x-original-request-url": "https://pub.example.com/a", + } + ), + BASE_DECISION, + _ctx(), + ) + # host is stripped too (httpx synthesizes it; the TS SDK never emits it). + assert event.header_names == ["user-agent"] + + +def test_query_signals_derived_without_storing_raw_query(): + event = build_analytics_event(_make_request(url="https://x.test/p?a=1&b=2&c=3"), BASE_DECISION, _ctx()) + assert event.query_length == len("a=1&b=2&c=3") + assert event.query_param_count == 3 + assert event.query_suspicious is False + # The raw query string must never appear on the event. + assert "a=1&b=2&c=3" not in str(asdict(event)) + + +def test_query_signals_are_zero_for_query_less_url(): + event = build_analytics_event(_make_request(url="https://x.test/p"), BASE_DECISION, _ctx()) + assert event.query_length == 0 + assert event.query_param_count == 0 + assert event.query_suspicious is False + + +@pytest.mark.parametrize( + "url", + [ + "https://x.test/?f=../../etc/passwd", + "https://x.test/?q=UNION%20SELECT%201", + "https://x.test/?x=%3Cscript%3E", + ], +) +def test_query_suspicious_flags_exploit_markers_raw_and_encoded(url): + event = build_analytics_event(_make_request(url=url), BASE_DECISION, _ctx()) + assert event.query_suspicious is True + + +def test_cdn_signals_passthrough_with_truncation(): + event = build_analytics_event( + _make_request(), + BASE_DECISION, + _ctx( + cdn_signals=CdnRequestSignals( + accept_encoding="gzip, br", + http_protocol="HTTP/2", + tls_version="TLSv1.3", + tls_cipher="AEAD-AES128-GCM-SHA256", + tls_client_hello_length=1811, + tls_client_extensions_sha1="4cFD...", + as_organization="o" * 600, + client_tcp_rtt=50, + cdn_verified_bot_category="Search Engine Crawler", + request_priority="weight=256;exclusive=1", + tls_fingerprint_ja4=None, + ) + ), + ) + assert event.accept_encoding == "gzip, br" + assert event.http_protocol == "HTTP/2" + assert event.tls_version == "TLSv1.3" + assert event.tls_cipher == "AEAD-AES128-GCM-SHA256" + assert event.tls_client_hello_length == 1811 + assert event.tls_client_extensions_sha1 == "4cFD..." + assert event.as_organization == "o" * 512 + assert event.client_tcp_rtt == 50 + assert event.cdn_verified_bot_category == "Search Engine Crawler" + assert event.request_priority == "weight=256;exclusive=1" + assert event.tls_fingerprint_ja4 is None + + +def test_cdn_signals_default_to_none_when_absent(): + event = build_analytics_event(_make_request(), BASE_DECISION, _ctx()) + assert event.accept_encoding is None + assert event.http_protocol is None + assert event.tls_version is None + assert event.tls_cipher is None + assert event.tls_client_hello_length is None + assert event.tls_client_extensions_sha1 is None + assert event.as_organization is None + assert event.client_tcp_rtt is None + assert event.cdn_verified_bot_category is None + assert event.request_priority is None + assert event.tls_fingerprint_ja4 is None + + +def test_schema_version_is_2(): + event = build_analytics_event(_make_request(), BASE_DECISION, _ctx()) + assert event.schema_version == 2 diff --git a/tests/analytics/test_ip.py b/tests/analytics/test_ip.py new file mode 100644 index 0000000..a0dbc83 --- /dev/null +++ b/tests/analytics/test_ip.py @@ -0,0 +1,28 @@ +"""Tests for client-IP normalization.""" + +import pytest + +from supertab_connect.analytics.ip import normalize_client_ip + + +def test_maps_ipv4_to_ipv6_mapped_form(): + assert normalize_client_ip("1.2.3.4") == "::ffff:1.2.3.4" + assert normalize_client_ip("192.0.2.1") == "::ffff:192.0.2.1" + + +def test_trims_surrounding_whitespace_before_mapping_ipv4(): + assert normalize_client_ip(" 1.2.3.4 ") == "::ffff:1.2.3.4" + + +def test_passes_ipv6_through_unchanged(): + assert normalize_client_ip("2001:db8::1") == "2001:db8::1" + assert normalize_client_ip("::1") == "::1" + + +@pytest.mark.parametrize("value", [None, "", " "]) +def test_returns_unspecified_for_empty(value): + assert normalize_client_ip(value) == "::" + + +def test_returns_unspecified_for_unrecognized_value(): + assert normalize_client_ip("not-an-ip") == "::" diff --git a/tests/analytics/test_transport.py b/tests/analytics/test_transport.py new file mode 100644 index 0000000..0c6ed9b --- /dev/null +++ b/tests/analytics/test_transport.py @@ -0,0 +1,182 @@ +"""Tests for analytics transports.""" + +import asyncio +import json + +import httpx +import respx + +from supertab_connect._version import _get_sdk_user_agent +from supertab_connect.analytics.transport import ( + ANALYTICS_EVENTS_PATH, + HttpAnalyticsTransport, + NoopAnalyticsTransport, +) +from supertab_connect.analytics.transport import _background_tasks +from supertab_connect.analytics.types import AnalyticsEvent + +RELAY_URL = "https://relay.test/ingest/events" + +FIXTURE_EVENT = AnalyticsEvent( + timestamp="2026-04-29T12:00:00.000Z", + request_id="req-1", + schema_version=2, + source_cdn="cloudflare", + user_agent="ua", + client_ip="::ffff:1.2.3.4", + path="/p", + method="GET", + referer="", + accept_language="en", + request_country="US", + request_asn=13335, + tls_fingerprint="ja3hash", + has_token=False, + token_outcome="absent", + final_action="allow", + enforcement_mode="observe", + signature_agent=None, + signature_input=None, + signature=None, + sec_fetch_mode=None, + sec_fetch_site=None, + sec_fetch_dest=None, + sec_fetch_user=None, + sec_ch_ua=None, + sec_ch_ua_mobile=None, + sec_ch_ua_platform=None, + accept=None, + host="example.com", + has_cookies=False, + header_names=["user-agent"], + query_length=0, + query_param_count=0, + query_suspicious=False, + accept_encoding=None, + http_protocol=None, + tls_version=None, + tls_cipher=None, + tls_client_hello_length=None, + tls_client_extensions_sha1=None, + as_organization=None, + client_tcp_rtt=None, + cdn_verified_bot_category=None, + request_priority=None, + tls_fingerprint_ja4=None, +) + + +async def _flush() -> None: + """Await all in-flight background emit tasks.""" + while _background_tasks: + await asyncio.gather(*list(_background_tasks), return_exceptions=True) + + +def test_analytics_events_path_targets_the_relay_events_route(): + assert ANALYTICS_EVENTS_PATH == "/ingest/events" + + +async def test_posts_json_body_with_bearer_api_key_to_relay_url(): + with respx.mock: + route = respx.post(RELAY_URL).respond(status_code=202) + transport = HttpAnalyticsTransport(url=RELAY_URL, api_key="merchant-api-key") + + transport.emit(FIXTURE_EVENT) + await _flush() + + assert route.called + request = route.calls[0].request + assert request.method == "POST" + assert request.headers["authorization"] == "Bearer merchant-api-key" + assert request.headers["content-type"] == "application/json" + assert request.headers["user-agent"] == _get_sdk_user_agent() + assert json.loads(request.content) == { + "timestamp": "2026-04-29T12:00:00.000Z", + "request_id": "req-1", + "schema_version": 2, + "source_cdn": "cloudflare", + "user_agent": "ua", + "client_ip": "::ffff:1.2.3.4", + "path": "/p", + "method": "GET", + "referer": "", + "accept_language": "en", + "request_country": "US", + "request_asn": 13335, + "tls_fingerprint": "ja3hash", + "has_token": False, + "token_outcome": "absent", + "final_action": "allow", + "enforcement_mode": "observe", + "signature_agent": None, + "signature_input": None, + "signature": None, + "sec_fetch_mode": None, + "sec_fetch_site": None, + "sec_fetch_dest": None, + "sec_fetch_user": None, + "sec_ch_ua": None, + "sec_ch_ua_mobile": None, + "sec_ch_ua_platform": None, + "accept": None, + "host": "example.com", + "has_cookies": False, + "header_names": ["user-agent"], + "query_length": 0, + "query_param_count": 0, + "query_suspicious": False, + "accept_encoding": None, + "http_protocol": None, + "tls_version": None, + "tls_cipher": None, + "tls_client_hello_length": None, + "tls_client_extensions_sha1": None, + "as_organization": None, + "client_tcp_rtt": None, + "cdn_verified_bot_category": None, + "request_priority": None, + "tls_fingerprint_ja4": None, + } + + +async def test_does_not_raise_when_request_fails(): + with respx.mock: + respx.post(RELAY_URL).mock(side_effect=httpx.ConnectError("network down")) + transport = HttpAnalyticsTransport(url=RELAY_URL, api_key="t") + + transport.emit(FIXTURE_EVENT) # must not raise + await _flush() + + +async def test_does_not_raise_on_non_2xx_responses(): + with respx.mock: + respx.post(RELAY_URL).respond(status_code=500, text="err") + transport = HttpAnalyticsTransport(url=RELAY_URL, api_key="t") + + transport.emit(FIXTURE_EVENT) # must not raise + await _flush() + + +async def test_does_not_raise_on_non_http_errors(): + # A non-HTTPError raised on the request path (e.g. unexpected runtime error) must still + # be swallowed so the fire-and-forget task never surfaces an unhandled exception. + with respx.mock: + respx.post(RELAY_URL).mock(side_effect=ValueError("unexpected boom")) + transport = HttpAnalyticsTransport(url=RELAY_URL, api_key="t") + + transport.emit(FIXTURE_EVENT) # must not raise + await _flush() + + # No task should retain an unretrieved exception. + assert not _background_tasks + + +def test_emit_without_running_loop_is_a_noop(): + # No running event loop here (sync test) → emit silently skips scheduling. + transport = HttpAnalyticsTransport(url=RELAY_URL, api_key="t") + transport.emit(FIXTURE_EVENT) + + +def test_noop_transport_emit_never_throws(): + transport = NoopAnalyticsTransport() + assert transport.emit(FIXTURE_EVENT) is None diff --git a/tests/customer/test_tokens.py b/tests/customer/test_tokens.py index ff4ceaf..8309eef 100644 --- a/tests/customer/test_tokens.py +++ b/tests/customer/test_tokens.py @@ -34,6 +34,14 @@ def create_mock_client(**kwargs: Any) -> httpx.AsyncClient: ) +async def test_create_async_client_sets_sdk_user_agent() -> None: + """The customer HTTP client (license.xml + token calls) carries the SDK User-Agent.""" + from supertab_connect._version import _get_sdk_user_agent + + async with _create_async_client() as client: + assert client.headers["User-Agent"] == _get_sdk_user_agent() + + def test_obtain_license_token_fetches_and_caches_token( monkeypatch: pytest.MonkeyPatch, ) -> None: diff --git a/tests/merchant/test_client.py b/tests/merchant/test_client.py index 9832de6..abf0180 100644 --- a/tests/merchant/test_client.py +++ b/tests/merchant/test_client.py @@ -33,13 +33,13 @@ def _reset_supertab_connect_singleton(): def test_supertab_connect_returns_existing_instance_for_same_api_key(): - first = SupertabConnect(SupertabConnectConfig(api_key="sk_test_123", enforcement=EnforcementMode.STRICT)) + first = SupertabConnect(SupertabConnectConfig(api_key="sk_test_123", enforcement=EnforcementMode.ENFORCE)) second = SupertabConnect( - SupertabConnectConfig(api_key="sk_test_123", enforcement=EnforcementMode.SOFT, debug=True) + SupertabConnectConfig(api_key="sk_test_123", enforcement=EnforcementMode.OBSERVE, debug=True) ) assert first is second - assert second.enforcement is EnforcementMode.STRICT + assert second.enforcement is EnforcementMode.ENFORCE assert second.debug is False @@ -154,7 +154,7 @@ async def stub_verify_and_record_event(**kwargs): monkeypatch.setattr("supertab_connect.merchant.client.verify_and_record_event", stub_verify_and_record_event) - client = SupertabConnect(SupertabConnectConfig(api_key="sk_test_123", enforcement=EnforcementMode.STRICT)) + client = SupertabConnect(SupertabConnectConfig(api_key="sk_test_123", enforcement=EnforcementMode.ENFORCE)) result = await client.handle_request( _make_request( { @@ -232,7 +232,7 @@ async def close_jwks(): async def test_handle_request_allows_missing_token_without_bot_detector(): - client = SupertabConnect(SupertabConnectConfig(api_key="sk_test_123", enforcement=EnforcementMode.STRICT)) + client = SupertabConnect(SupertabConnectConfig(api_key="sk_test_123", enforcement=EnforcementMode.ENFORCE)) result = await client.handle_request(_make_request({"User-Agent": "Browser/1.0"})) @@ -243,7 +243,7 @@ async def test_handle_request_allows_missing_token_for_non_bot(): client = SupertabConnect( SupertabConnectConfig( api_key="sk_test_123", - enforcement=EnforcementMode.STRICT, + enforcement=EnforcementMode.ENFORCE, bot_detector=lambda request: False, ) ) @@ -253,11 +253,11 @@ async def test_handle_request_allows_missing_token_for_non_bot(): assert result == {"action": HandlerAction.ALLOW} -async def test_handle_request_blocks_bot_in_strict_mode(): +async def test_handle_request_blocks_bot_in_enforce_mode(): client = SupertabConnect( SupertabConnectConfig( api_key="sk_test_123", - enforcement=EnforcementMode.STRICT, + enforcement=EnforcementMode.ENFORCE, bot_detector=lambda request: True, ) ) @@ -269,11 +269,11 @@ async def test_handle_request_blocks_bot_in_strict_mode(): assert block_result["status"] == 401 -async def test_handle_request_signals_bot_in_soft_mode(): +async def test_handle_request_signals_bot_in_observe_mode(): client = SupertabConnect( SupertabConnectConfig( api_key="sk_test_123", - enforcement=EnforcementMode.SOFT, + enforcement=EnforcementMode.OBSERVE, bot_detector=lambda request: True, ) ) diff --git a/tests/merchant/test_client_analytics.py b/tests/merchant/test_client_analytics.py new file mode 100644 index 0000000..d8d3fc3 --- /dev/null +++ b/tests/merchant/test_client_analytics.py @@ -0,0 +1,210 @@ +"""Tests for analytics emission wired into the high-level merchant client.""" + +import httpx +import pytest + +from supertab_connect.analytics.types import AnalyticsEvent, AnalyticsTransport, CdnRequestSignals +from supertab_connect.merchant.client import SupertabConnect +from supertab_connect.types import ( + EnforcementMode, + HandleRequestContext, + HandlerAction, + InvalidLicenseToken, + LicenseTokenInvalidReason, + SupertabConnectConfig, + ValidLicenseToken, +) + +from tests.merchant.constants import REQUEST_URL, SUPERTAB_BASE_URL + + +class RecordingTransport: + def __init__(self) -> None: + self.events: list[AnalyticsEvent] = [] + + def emit(self, event: AnalyticsEvent) -> None: + self.events.append(event) + + +class ThrowingTransport: + def emit(self, event: AnalyticsEvent) -> None: + raise RuntimeError("transport blew up") + + +@pytest.fixture(autouse=True) +def _reset_singleton(): + SupertabConnect.reset_instance() + SupertabConnect.set_base_url(SUPERTAB_BASE_URL) + yield + SupertabConnect.reset_instance() + SupertabConnect.set_base_url(SUPERTAB_BASE_URL) + + +def _request(headers: dict[str, str] | None = None) -> httpx.Request: + return httpx.Request("GET", REQUEST_URL, headers=headers or {}) + + +def _client(transport: AnalyticsTransport, **config_kwargs) -> SupertabConnect: + return SupertabConnect( + SupertabConnectConfig(api_key="sk_test_123", analytics_transport=transport, **config_kwargs) + ) + + +def test_constructs_with_only_api_key(): + # Default transport is the Noop transport; construction must not require analytics config. + SupertabConnect(SupertabConnectConfig(api_key="sk_test_123")) + + +async def test_emits_observe_event_for_bot_without_token(): + transport = RecordingTransport() + client = _client(transport, enforcement=EnforcementMode.OBSERVE, bot_detector=lambda request: True) + + result = await client.handle_request( + _request({"User-Agent": "curl/8.0"}), HandleRequestContext(source_cdn="cloudflare") + ) + + assert result["action"] is HandlerAction.ALLOW + assert len(transport.events) == 1 + event = transport.events[0] + assert event.source_cdn == "cloudflare" + assert event.final_action == "observe" + assert event.enforcement_mode == "observe" + assert event.has_token is False + assert event.token_outcome == "absent" + + +async def test_emits_block_event_for_bot_without_token_in_enforce(): + transport = RecordingTransport() + client = _client(transport, enforcement=EnforcementMode.ENFORCE, bot_detector=lambda request: True) + + result = await client.handle_request(_request({"User-Agent": "curl/8.0"})) + + assert result["action"] is HandlerAction.BLOCK + assert transport.events[0].final_action == "block" + assert transport.events[0].token_outcome == "absent" + + +async def test_emits_allow_event_for_non_bot_without_token(): + transport = RecordingTransport() + client = _client(transport, enforcement=EnforcementMode.ENFORCE, bot_detector=lambda request: False) + + result = await client.handle_request(_request({"User-Agent": "Browser/1.0"})) + + assert result == {"action": HandlerAction.ALLOW} + assert transport.events[0].final_action == "allow" + assert transport.events[0].token_outcome == "absent" + + +async def test_emits_not_validated_for_token_in_disabled_mode(): + transport = RecordingTransport() + client = _client(transport, enforcement=EnforcementMode.DISABLED) + + result = await client.handle_request(_request({"Authorization": "License some-token"})) + + assert result == {"action": HandlerAction.ALLOW} + event = transport.events[0] + assert event.has_token is True + assert event.token_outcome == "not_validated" + assert event.final_action == "allow" + assert event.enforcement_mode == "disabled" + + +async def test_emits_valid_for_verified_token(monkeypatch): + async def stub_verify_and_record_event(**kwargs): + return ValidLicenseToken(license_id="lic_test_123", payload={}) + + monkeypatch.setattr("supertab_connect.merchant.client.verify_and_record_event", stub_verify_and_record_event) + transport = RecordingTransport() + client = _client(transport, enforcement=EnforcementMode.ENFORCE) + + result = await client.handle_request(_request({"Authorization": "License signed.jwt"})) + + assert result == {"action": HandlerAction.ALLOW} + assert transport.events[0].has_token is True + assert transport.events[0].token_outcome == "valid" + assert transport.events[0].final_action == "allow" + + +async def test_emits_mapped_outcome_for_invalid_token(monkeypatch): + async def stub_verify_and_record_event(**kwargs): + return InvalidLicenseToken( + reason=LicenseTokenInvalidReason.EXPIRED, + error="License token expired", + license_id="lic_test_123", + ) + + monkeypatch.setattr("supertab_connect.merchant.client.verify_and_record_event", stub_verify_and_record_event) + transport = RecordingTransport() + client = _client(transport, enforcement=EnforcementMode.ENFORCE) + + result = await client.handle_request(_request({"Authorization": "License signed.jwt"})) + + assert result["action"] is HandlerAction.BLOCK + assert transport.events[0].token_outcome == "expired" + assert transport.events[0].final_action == "block" + + +async def test_forwards_classification_signals_from_context(): + transport = RecordingTransport() + client = _client(transport, bot_detector=lambda request: True) + + await client.handle_request( + _request({"User-Agent": "curl/8.0"}), + HandleRequestContext( + source_cdn="fastly", + client_ip="1.2.3.4", + request_id="req-xyz", + request_country="DE", + request_asn=3320, + tls_fingerprint="abc123", + ), + ) + + event = transport.events[0] + assert event.source_cdn == "fastly" + assert event.client_ip == "::ffff:1.2.3.4" + assert event.request_id == "req-xyz" + assert event.request_country == "DE" + assert event.request_asn == 3320 + assert event.tls_fingerprint == "abc123" + + +async def test_forwards_cdn_signals_from_context(): + transport = RecordingTransport() + client = _client(transport, bot_detector=lambda request: True) + + await client.handle_request( + _request({"User-Agent": "curl/8.0"}), + HandleRequestContext( + source_cdn="cloudflare", + cdn_signals=CdnRequestSignals( + tls_version="TLSv1.3", + cdn_verified_bot_category="AI Assistant", + ), + ), + ) + + event = transport.events[0] + assert event.tls_version == "TLSv1.3" + assert event.cdn_verified_bot_category == "AI Assistant" + + +async def test_analytics_failure_does_not_break_request_handling(): + client = _client(ThrowingTransport(), bot_detector=lambda request: True) + + # A throwing transport must not propagate out of handle_request. + result = await client.handle_request(_request({"User-Agent": "curl/8.0"})) + + assert result["action"] is HandlerAction.ALLOW + + +async def test_no_event_emitted_without_context_still_works(): + transport = RecordingTransport() + client = _client(transport, bot_detector=lambda request: True) + + await client.handle_request(_request({"User-Agent": "curl/8.0"})) + + # Direct SDK invocation (no context) → source_cdn is None, request_id auto-generated. + event = transport.events[0] + assert event.source_cdn is None + assert event.request_id diff --git a/tests/merchant/test_jwks.py b/tests/merchant/test_jwks.py index 705dcd8..b442632 100644 --- a/tests/merchant/test_jwks.py +++ b/tests/merchant/test_jwks.py @@ -30,6 +30,19 @@ async def test_fetch_platform_jwks(jwks_response): assert route.call_count == 1 +async def test_fetch_platform_jwks_sends_sdk_user_agent(jwks_response): + """JWKS requests carry the SDK User-Agent header.""" + from supertab_connect._version import _get_sdk_user_agent + + with respx.mock: + route = respx.get(JWKS_URL).respond(json=jwks_response) + + clear_jwks_cache() + await fetch_platform_jwks(SUPERTAB_BASE_URL) + + assert route.calls[0].request.headers["User-Agent"] == _get_sdk_user_agent() + + async def test_fetch_platform_jwks_caches_result(jwks_response): """Second call returns cached JWKS without a network request.""" with respx.mock: