Skip to content

Commit 3cfc49d

Browse files
committed
feat(harvesters): add ExternalHarvester for real-world APIs
- USGS earthquake data (seismic activity) - Cryptocurrency prices (Bitcoin, Ethereum) - Brings entropy from outside the computer
1 parent e354bf3 commit 3cfc49d

1 file changed

Lines changed: 363 additions & 0 deletions

File tree

Lines changed: 363 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,363 @@
1+
# =============================================================================
2+
# TrueEntropy - External API Harvester
3+
# =============================================================================
4+
#
5+
# This harvester collects entropy from external APIs that provide real-world
6+
# data - bringing randomness from outside the computer.
7+
#
8+
# Data Sources:
9+
# 1. USGS Earthquake API - Latest seismic activity
10+
# 2. Cryptocurrency Prices - Bitcoin/ETH prices with high precision
11+
#
12+
# Why External Data is Random:
13+
# - Earthquake magnitude/location are natural phenomena
14+
# - Crypto prices change every second based on global trading
15+
# - These values are determined by chaotic real-world systems
16+
#
17+
# Entropy Estimate:
18+
# - High: ~16-32 bits per successful API call
19+
# - External sources provide true real-world entropy
20+
#
21+
# =============================================================================
22+
23+
"""
24+
External API-based entropy harvester.
25+
26+
Collects entropy from real-world data sources via public APIs:
27+
- USGS earthquake data (seismic activity)
28+
- Cryptocurrency prices (market chaos)
29+
"""
30+
31+
from __future__ import annotations
32+
33+
import hashlib
34+
import struct
35+
import time
36+
from typing import Any, Dict, List, Optional
37+
38+
from trueentropy.harvesters.base import BaseHarvester, HarvestResult
39+
40+
41+
class ExternalHarvester(BaseHarvester):
42+
"""
43+
Harvests entropy from external real-world APIs.
44+
45+
This harvester fetches data from public APIs that provide
46+
information about chaotic real-world systems:
47+
48+
1. **USGS Earthquake API**: Latest seismic activity worldwide
49+
2. **Coinbase/CoinGecko**: Cryptocurrency prices with high precision
50+
51+
These sources bring entropy from outside the computer - data that
52+
is determined by physical and social phenomena.
53+
54+
Attributes:
55+
timeout: Request timeout in seconds (default: 5.0)
56+
enable_earthquake: Whether to fetch earthquake data (default: True)
57+
enable_crypto: Whether to fetch crypto prices (default: True)
58+
59+
Example:
60+
>>> harvester = ExternalHarvester()
61+
>>> result = harvester.collect()
62+
>>> if result.success:
63+
... print(f"Collected {result.entropy_bits} bits from the world!")
64+
"""
65+
66+
# -------------------------------------------------------------------------
67+
# API Endpoints
68+
# -------------------------------------------------------------------------
69+
70+
# USGS Earthquake API - Returns earthquakes from the last hour
71+
# Documentation: https://earthquake.usgs.gov/fdsnws/event/1/
72+
USGS_EARTHQUAKE_URL = (
73+
"https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_hour.geojson"
74+
)
75+
76+
# CoinGecko API - Free cryptocurrency price data
77+
# Documentation: https://www.coingecko.com/en/api/documentation
78+
COINGECKO_URL = (
79+
"https://api.coingecko.com/api/v3/simple/price"
80+
"?ids=bitcoin,ethereum&vs_currencies=usd&precision=18"
81+
)
82+
83+
# Coinbase API - Alternative crypto price source
84+
COINBASE_BTC_URL = "https://api.coinbase.com/v2/prices/BTC-USD/spot"
85+
COINBASE_ETH_URL = "https://api.coinbase.com/v2/prices/ETH-USD/spot"
86+
87+
# -------------------------------------------------------------------------
88+
# Initialization
89+
# -------------------------------------------------------------------------
90+
91+
def __init__(
92+
self,
93+
timeout: float = 5.0,
94+
enable_earthquake: bool = True,
95+
enable_crypto: bool = True
96+
) -> None:
97+
"""
98+
Initialize the external harvester.
99+
100+
Args:
101+
timeout: Request timeout in seconds
102+
enable_earthquake: Whether to fetch earthquake data
103+
enable_crypto: Whether to fetch cryptocurrency prices
104+
"""
105+
self._timeout = timeout
106+
self._enable_earthquake = enable_earthquake
107+
self._enable_crypto = enable_crypto
108+
109+
# -------------------------------------------------------------------------
110+
# BaseHarvester Implementation
111+
# -------------------------------------------------------------------------
112+
113+
@property
114+
def name(self) -> str:
115+
"""Return harvester name."""
116+
return "external"
117+
118+
def collect(self) -> HarvestResult:
119+
"""
120+
Collect entropy from external APIs.
121+
122+
Process:
123+
1. Fetch data from enabled sources (earthquake, crypto)
124+
2. Combine all data into a single bytes object
125+
3. Estimate entropy based on data quality
126+
127+
Note: This harvester gracefully handles API failures.
128+
It will succeed if at least one source returns data.
129+
130+
Returns:
131+
HarvestResult containing external API entropy
132+
"""
133+
# Attempt import of requests library
134+
try:
135+
import requests
136+
except ImportError:
137+
return HarvestResult(
138+
data=b"",
139+
entropy_bits=0,
140+
source=self.name,
141+
success=False,
142+
error="requests library not available"
143+
)
144+
145+
collected_data: List[bytes] = []
146+
total_entropy_bits = 0
147+
errors: List[str] = []
148+
149+
# Collect earthquake data
150+
if self._enable_earthquake:
151+
result = self._fetch_earthquake_data(requests)
152+
if result is not None:
153+
collected_data.append(result)
154+
total_entropy_bits += 24 # ~24 bits from earthquake data
155+
else:
156+
errors.append("earthquake API failed")
157+
158+
# Collect cryptocurrency data
159+
if self._enable_crypto:
160+
result = self._fetch_crypto_data(requests)
161+
if result is not None:
162+
collected_data.append(result)
163+
total_entropy_bits += 32 # ~32 bits from crypto prices
164+
else:
165+
errors.append("crypto API failed")
166+
167+
# Add timestamp for guaranteed fresh data
168+
timestamp_bytes = struct.pack("!Q", time.time_ns())
169+
collected_data.append(timestamp_bytes)
170+
total_entropy_bits += 8
171+
172+
# Combine all collected data
173+
if collected_data:
174+
combined = b"".join(collected_data)
175+
176+
# Hash the combined data for uniform output
177+
# This also protects against API response prediction
178+
hashed = hashlib.sha256(combined).digest()
179+
180+
return HarvestResult(
181+
data=combined + hashed,
182+
entropy_bits=total_entropy_bits,
183+
source=self.name,
184+
success=True,
185+
error="; ".join(errors) if errors else None
186+
)
187+
else:
188+
return HarvestResult(
189+
data=b"",
190+
entropy_bits=0,
191+
source=self.name,
192+
success=False,
193+
error="All external sources failed"
194+
)
195+
196+
# -------------------------------------------------------------------------
197+
# Earthquake Data
198+
# -------------------------------------------------------------------------
199+
200+
def _fetch_earthquake_data(self, requests: Any) -> Optional[bytes]:
201+
"""
202+
Fetch earthquake data from USGS.
203+
204+
Returns data about recent earthquakes including:
205+
- Magnitude
206+
- Location (latitude, longitude, depth)
207+
- Timestamp
208+
209+
Args:
210+
requests: The imported requests module
211+
212+
Returns:
213+
Bytes containing earthquake data, or None if failed
214+
"""
215+
try:
216+
response = requests.get(
217+
self.USGS_EARTHQUAKE_URL,
218+
timeout=self._timeout
219+
)
220+
response.raise_for_status()
221+
222+
data = response.json()
223+
features = data.get("features", [])
224+
225+
if not features:
226+
# No earthquakes in the last hour
227+
# Use metadata instead
228+
metadata = data.get("metadata", {})
229+
meta_str = str(metadata).encode()
230+
return meta_str
231+
232+
# Extract data from the most recent earthquakes
233+
values: List[float] = []
234+
235+
for eq in features[:5]: # Up to 5 earthquakes
236+
props = eq.get("properties", {})
237+
geom = eq.get("geometry", {})
238+
coords = geom.get("coordinates", [0, 0, 0])
239+
240+
# Magnitude (can be negative for very small quakes)
241+
mag = props.get("mag", 0) or 0
242+
values.append(float(mag))
243+
244+
# Location: longitude, latitude, depth
245+
values.extend([float(c) for c in coords[:3]])
246+
247+
# Timestamp
248+
timestamp = props.get("time", 0)
249+
values.append(float(timestamp))
250+
251+
# Pack as double-precision floats
252+
result = struct.pack(f"!{len(values)}d", *values)
253+
return result
254+
255+
except Exception:
256+
return None
257+
258+
# -------------------------------------------------------------------------
259+
# Cryptocurrency Data
260+
# -------------------------------------------------------------------------
261+
262+
def _fetch_crypto_data(self, requests: Any) -> Optional[bytes]:
263+
"""
264+
Fetch cryptocurrency prices.
265+
266+
Gets current prices for Bitcoin and Ethereum with high precision.
267+
Crypto prices are highly volatile and change every second.
268+
269+
Args:
270+
requests: The imported requests module
271+
272+
Returns:
273+
Bytes containing crypto price data, or None if failed
274+
"""
275+
prices: List[float] = []
276+
277+
# Try CoinGecko first (single request for multiple coins)
278+
try:
279+
response = requests.get(
280+
self.COINGECKO_URL,
281+
timeout=self._timeout
282+
)
283+
response.raise_for_status()
284+
data = response.json()
285+
286+
if "bitcoin" in data:
287+
btc_price = data["bitcoin"].get("usd", 0)
288+
prices.append(float(btc_price))
289+
290+
if "ethereum" in data:
291+
eth_price = data["ethereum"].get("usd", 0)
292+
prices.append(float(eth_price))
293+
294+
except Exception:
295+
# Fall back to Coinbase API
296+
try:
297+
# Bitcoin price
298+
btc_resp = requests.get(
299+
self.COINBASE_BTC_URL,
300+
timeout=self._timeout
301+
)
302+
btc_resp.raise_for_status()
303+
btc_data = btc_resp.json()
304+
btc_price = float(btc_data["data"]["amount"])
305+
prices.append(btc_price)
306+
except Exception:
307+
pass
308+
309+
try:
310+
# Ethereum price
311+
eth_resp = requests.get(
312+
self.COINBASE_ETH_URL,
313+
timeout=self._timeout
314+
)
315+
eth_resp.raise_for_status()
316+
eth_data = eth_resp.json()
317+
eth_price = float(eth_data["data"]["amount"])
318+
prices.append(eth_price)
319+
except Exception:
320+
pass
321+
322+
if not prices:
323+
return None
324+
325+
# Pack prices as double-precision floats
326+
result = struct.pack(f"!{len(prices)}d", *prices)
327+
return result
328+
329+
# -------------------------------------------------------------------------
330+
# Configuration Properties
331+
# -------------------------------------------------------------------------
332+
333+
@property
334+
def timeout(self) -> float:
335+
"""Get the request timeout in seconds."""
336+
return self._timeout
337+
338+
@timeout.setter
339+
def timeout(self, value: float) -> None:
340+
"""Set the request timeout."""
341+
if value <= 0:
342+
raise ValueError("timeout must be positive")
343+
self._timeout = value
344+
345+
@property
346+
def enable_earthquake(self) -> bool:
347+
"""Check if earthquake data collection is enabled."""
348+
return self._enable_earthquake
349+
350+
@enable_earthquake.setter
351+
def enable_earthquake(self, value: bool) -> None:
352+
"""Enable or disable earthquake data collection."""
353+
self._enable_earthquake = value
354+
355+
@property
356+
def enable_crypto(self) -> bool:
357+
"""Check if crypto data collection is enabled."""
358+
return self._enable_crypto
359+
360+
@enable_crypto.setter
361+
def enable_crypto(self, value: bool) -> None:
362+
"""Enable or disable crypto data collection."""
363+
self._enable_crypto = value

0 commit comments

Comments
 (0)