Skip to content

Commit e354bf3

Browse files
committed
feat(harvesters): add SystemHarvester for OS state entropy
- Samples RAM, CPU, process counts, disk/network I/O - Uses psutil for cross-platform compatibility - Volatile metrics that change constantly
1 parent 248418a commit e354bf3

1 file changed

Lines changed: 331 additions & 0 deletions

File tree

Lines changed: 331 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,331 @@
1+
# =============================================================================
2+
# TrueEntropy - System Harvester
3+
# =============================================================================
4+
#
5+
# This harvester collects entropy from the system state - volatile information
6+
# about the computer's current condition.
7+
#
8+
# Why System State is Random:
9+
# - RAM allocation changes constantly as programs run
10+
# - CPU usage fluctuates with system activity
11+
# - Process counts change as programs start/stop
12+
# - Disk I/O varies with application behavior
13+
#
14+
# Collection Method:
15+
# 1. Sample various system metrics using psutil
16+
# 2. Pack the values into bytes
17+
# 3. The exact values at any microsecond are unpredictable
18+
#
19+
# Entropy Estimate:
20+
# - Conservative: ~4-8 bits per metric
21+
# - The least significant bits of each value are the most random
22+
#
23+
# =============================================================================
24+
25+
"""
26+
System state-based entropy harvester.
27+
28+
Collects entropy from volatile system metrics like RAM usage,
29+
CPU load, process counts, and disk activity.
30+
"""
31+
32+
from __future__ import annotations
33+
34+
import struct
35+
import time
36+
from typing import List, Tuple
37+
38+
from trueentropy.harvesters.base import BaseHarvester, HarvestResult
39+
40+
41+
class SystemHarvester(BaseHarvester):
42+
"""
43+
Harvests entropy from system state metrics.
44+
45+
This harvester samples volatile system information using the psutil
46+
library. The values change constantly due to:
47+
48+
- Memory allocation/deallocation
49+
- CPU scheduling
50+
- Process creation/termination
51+
- I/O operations
52+
53+
Metrics collected:
54+
- Available RAM (bytes)
55+
- CPU usage percentage (per-core)
56+
- Number of running processes
57+
- System boot time
58+
- Current timestamp (nanoseconds)
59+
60+
Example:
61+
>>> harvester = SystemHarvester()
62+
>>> result = harvester.collect()
63+
>>> print(f"Collected {result.entropy_bits} bits from system state")
64+
"""
65+
66+
# -------------------------------------------------------------------------
67+
# BaseHarvester Implementation
68+
# -------------------------------------------------------------------------
69+
70+
@property
71+
def name(self) -> str:
72+
"""Return harvester name."""
73+
return "system"
74+
75+
def collect(self) -> HarvestResult:
76+
"""
77+
Collect entropy from system state.
78+
79+
Process:
80+
1. Import psutil (if available)
81+
2. Sample various system metrics
82+
3. Pack all values into bytes
83+
4. Estimate entropy based on metric variability
84+
85+
Returns:
86+
HarvestResult containing system state entropy
87+
"""
88+
# Attempt import of psutil
89+
try:
90+
import psutil
91+
except ImportError:
92+
return HarvestResult(
93+
data=b"",
94+
entropy_bits=0,
95+
source=self.name,
96+
success=False,
97+
error="psutil library not available"
98+
)
99+
100+
# Collect system metrics
101+
metrics = self._collect_metrics(psutil)
102+
103+
# Convert to bytes
104+
data = self._metrics_to_bytes(metrics)
105+
106+
# Estimate entropy
107+
# Each metric contributes roughly 4-8 bits of entropy
108+
entropy_bits = len(metrics) * 6
109+
110+
return HarvestResult(
111+
data=data,
112+
entropy_bits=entropy_bits,
113+
source=self.name,
114+
success=True
115+
)
116+
117+
# -------------------------------------------------------------------------
118+
# Private Methods
119+
# -------------------------------------------------------------------------
120+
121+
def _collect_metrics(self, psutil: object) -> List[Tuple[str, int | float]]:
122+
"""
123+
Collect various system metrics.
124+
125+
Args:
126+
psutil: The imported psutil module
127+
128+
Returns:
129+
List of (metric_name, value) tuples
130+
"""
131+
import psutil as ps # For type hints
132+
133+
metrics: List[Tuple[str, int | float]] = []
134+
135+
# =====================================================================
136+
# Memory Metrics
137+
# =====================================================================
138+
139+
try:
140+
# Virtual memory statistics
141+
# available: bytes available for new allocations (very volatile)
142+
mem = ps.virtual_memory()
143+
metrics.append(("mem_available", mem.available))
144+
metrics.append(("mem_used", mem.used))
145+
metrics.append(("mem_percent", int(mem.percent * 1000)))
146+
except Exception:
147+
pass
148+
149+
try:
150+
# Swap memory statistics
151+
swap = ps.swap_memory()
152+
metrics.append(("swap_used", swap.used))
153+
except Exception:
154+
pass
155+
156+
# =====================================================================
157+
# CPU Metrics
158+
# =====================================================================
159+
160+
try:
161+
# Per-CPU usage percentages
162+
# These fluctuate rapidly based on running processes
163+
cpu_percents = ps.cpu_percent(percpu=True)
164+
for i, pct in enumerate(cpu_percents):
165+
# Multiply by 10000 to preserve fractional precision
166+
metrics.append((f"cpu_{i}", int(pct * 10000)))
167+
except Exception:
168+
pass
169+
170+
try:
171+
# CPU times (user, system, idle, etc.)
172+
cpu_times = ps.cpu_times()
173+
metrics.append(("cpu_user", int(cpu_times.user * 1000000)))
174+
metrics.append(("cpu_system", int(cpu_times.system * 1000000)))
175+
except Exception:
176+
pass
177+
178+
# =====================================================================
179+
# Process Metrics
180+
# =====================================================================
181+
182+
try:
183+
# Number of running processes
184+
# This changes as programs start and stop
185+
pids = ps.pids()
186+
metrics.append(("process_count", len(pids)))
187+
188+
# Sum of first N PIDs as a volatile fingerprint
189+
# New processes get incrementing PIDs
190+
pid_sum = sum(pids[:20]) if len(pids) >= 20 else sum(pids)
191+
metrics.append(("pid_sum", pid_sum))
192+
except Exception:
193+
pass
194+
195+
# =====================================================================
196+
# Disk Metrics
197+
# =====================================================================
198+
199+
try:
200+
# Disk I/O counters
201+
# These change with every disk read/write
202+
disk_io = ps.disk_io_counters()
203+
if disk_io is not None:
204+
metrics.append(("disk_read_bytes", disk_io.read_bytes))
205+
metrics.append(("disk_write_bytes", disk_io.write_bytes))
206+
metrics.append(("disk_read_count", disk_io.read_count))
207+
metrics.append(("disk_write_count", disk_io.write_count))
208+
except Exception:
209+
pass
210+
211+
# =====================================================================
212+
# Network I/O Metrics
213+
# =====================================================================
214+
215+
try:
216+
# Network I/O counters
217+
net_io = ps.net_io_counters()
218+
if net_io is not None:
219+
metrics.append(("net_bytes_sent", net_io.bytes_sent))
220+
metrics.append(("net_bytes_recv", net_io.bytes_recv))
221+
metrics.append(("net_packets_sent", net_io.packets_sent))
222+
metrics.append(("net_packets_recv", net_io.packets_recv))
223+
except Exception:
224+
pass
225+
226+
# =====================================================================
227+
# Time Metrics
228+
# =====================================================================
229+
230+
try:
231+
# System boot time (constant but adds to mixing)
232+
boot_time = ps.boot_time()
233+
metrics.append(("boot_time", int(boot_time * 1000000)))
234+
except Exception:
235+
pass
236+
237+
# Current timestamp with nanosecond precision
238+
# Always different, adds guaranteed entropy
239+
metrics.append(("timestamp_ns", time.perf_counter_ns()))
240+
metrics.append(("time_ns", time.time_ns()))
241+
242+
return metrics
243+
244+
def _metrics_to_bytes(
245+
self,
246+
metrics: List[Tuple[str, int | float]]
247+
) -> bytes:
248+
"""
249+
Convert system metrics to bytes.
250+
251+
We pack each metric as a 64-bit value. Float values are
252+
first converted to integers by scaling.
253+
254+
Args:
255+
metrics: List of (name, value) tuples
256+
257+
Returns:
258+
Bytes representation of the metrics
259+
"""
260+
result = b""
261+
262+
for name, value in metrics:
263+
# Convert to integer if needed
264+
if isinstance(value, float):
265+
int_value = int(value * 1000000)
266+
else:
267+
int_value = value
268+
269+
# Ensure value fits in 64 bits (handle negative values)
270+
int_value = int_value & 0xFFFFFFFFFFFFFFFF
271+
272+
# Pack as unsigned 64-bit integer
273+
result += struct.pack("!Q", int_value)
274+
275+
return result
276+
277+
# -------------------------------------------------------------------------
278+
# Utility Methods
279+
# -------------------------------------------------------------------------
280+
281+
def list_available_metrics(self) -> List[str]:
282+
"""
283+
List which metrics are available on this system.
284+
285+
This is useful for debugging and understanding what
286+
entropy sources are being used.
287+
288+
Returns:
289+
List of metric name strings
290+
"""
291+
try:
292+
import psutil as ps
293+
294+
available = []
295+
296+
try:
297+
ps.virtual_memory()
298+
available.extend(["mem_available", "mem_used", "mem_percent"])
299+
except Exception:
300+
pass
301+
302+
try:
303+
ps.cpu_percent(percpu=True)
304+
available.append("cpu_percpu")
305+
except Exception:
306+
pass
307+
308+
try:
309+
ps.pids()
310+
available.extend(["process_count", "pid_sum"])
311+
except Exception:
312+
pass
313+
314+
try:
315+
if ps.disk_io_counters() is not None:
316+
available.extend(["disk_read", "disk_write"])
317+
except Exception:
318+
pass
319+
320+
try:
321+
if ps.net_io_counters() is not None:
322+
available.extend(["net_bytes", "net_packets"])
323+
except Exception:
324+
pass
325+
326+
available.extend(["timestamp_ns", "time_ns"])
327+
328+
return available
329+
330+
except ImportError:
331+
return ["timestamp_ns", "time_ns"] # Always available

0 commit comments

Comments
 (0)