diff --git a/tests/README.md b/tests/README.md
index 49685ec..adb6bd9 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -84,9 +84,55 @@ endurance is worth more than a long Release soak for finding ordering bugs.
 ```bash
 scripts/configure.sh build_tsan --with=unit_tests --with=tsan
 scripts/setup_build.sh build_tsan && cmake --build build_tsan -j
-TSAN_OPTIONS="suppressions=$PWD/tests/tsan.supp" \
+TSAN_OPTIONS="suppressions=$PWD/tests/tsan.supp:halt_on_error=1" \
   tests/endurance.sh build_tsan/kickmsg_stress_test 14400
 ```
+`halt_on_error=1` makes TSAN stop at the first race with the report intact;
+without it TSAN reports and *continues*, and the run still exits cleanly. The
+endurance harness also greps each run for `ThreadSanitizer`/`runtime error:`
+and counts it as a failure (`san=` column), so a race is caught either way.
+
+Rungs 3-4 above are the **hands-on** soaks: pick one binary and a duration and
+hammer it (e.g. crash recovery for an hour before a PR, or TSAN stress for an
+afternoon). The rig below is the **unattended** counterpart -- it just cycles
+all of them for you. It is built *on top of* `endurance.sh`, not a replacement;
+keep using the direct form for targeted runs.
+
+### 5. Unattended long-horizon rig -- cycles all profiles
+`tests/soak_all.sh` loops weighted profiles until a wall-clock deadline, each a
+time-sliced `endurance.sh` pass: **TSAN takes half the slices** (rarest,
+highest-value signal), crash fuzz a quarter, plain stress the rest as periodic
+sanity at oversub 150/200. It records a per-slice verdict, persists failing
+runs, and survives any single slice failing. It self-detaches (survives
+logout), runs under `caffeinate` on macOS, and prints a watch/stop dashboard.
+```bash
+# build the plain + crash binaries (build/) and optionally build_tsan/
+tests/soak_all.sh 604800 1800            # 1 week, 30-min slices; returns at once
+# absent build_tsan/ is skipped, not fatal. SOAK_FOREGROUND=1 to run inline.
+```
+Each launch isolates its run under `soak_logs/run_<timestamp>/` (also
+`soak_logs/latest`) with the master log, per-slice logs, and `fails/`. Check
+progress at any time with:
+```bash
+tests/soak_status.sh                     # state, elapsed/remaining, per-profile tally
+```
+The launch dashboard also prints the exact `tail`/stop commands. Interrupting a
+run (Ctrl-C or `kill`) unlinks the active segment, so it leaves no stale
+`/dev/shm` behind (`kill -9` is the exception, covered by unlink-before-create).
+
+Slice length (arg 2) is operational, not a coverage knob: a profile's total
+iterations over the run depend only on its *share* of the cycle, not on how
+that time is chunked. Shorter slices just give finer failure-attribution
+checkpoints. To shift priority between profiles, edit the weighting in the
+`PROFILES` list -- don't lengthen slices.
+
+The **crash test fuzzes its kill timing** (seeded random per round) so a long
+soak explores new crash windows instead of re-hitting a fixed schedule:
+```bash
+build/kickmsg_crash_test                       # seed from clock -- logged at startup
+KICKMSG_CRASH_SEED=12345 build/kickmsg_crash_test   # replay an exact schedule
+KICKMSG_CRASH_ROUNDS=200 build/kickmsg_crash_test   # more kills per invocation
+```
 
 ## Contention scales to your machine
 
@@ -107,6 +153,8 @@ bound it explicitly.
 
 ## Detached long soak (survives logout; keeps the machine awake)
 
+For a *single-profile* `endurance.sh` run (rungs 3-4); the rig in rung 5
+self-detaches and handles `caffeinate` on its own.
 ```bash
 # macOS: caffeinate prevents idle sleep mid-soak
 nohup caffeinate -i tests/endurance.sh build/kickmsg_stress_test 43200 > soak.log 2>&1 &
diff --git a/tests/crash_test.cc b/tests/crash_test.cc
index b4fccd3..f253711 100644
--- a/tests/crash_test.cc
+++ b/tests/crash_test.cc
@@ -24,6 +24,7 @@
 #include <sys/wait.h>
 #include <unistd.h>
 
+#include "shm_cleanup.h"
 #include "kickmsg/os/Time.h"
 #include "kickmsg/Publisher.h"
 #include "kickmsg/Subscriber.h"
@@ -45,6 +46,48 @@ static uint32_t compute_checksum(CrashPayload const& p)
     return p.magic ^ p.seq ^ 0xBAADF00D;
 }
 
+// --- Seeded kill-timing fuzzer ---------------------------------------------
+// Each kill fires at a random instant so a long soak explores new crash
+// windows instead of re-hitting a fixed schedule.  The seed is logged at
+// startup; set KICKMSG_CRASH_SEED to replay a specific run.
+namespace
+{
+    uint64_t g_rng_state = 0;
+
+    uint64_t next_rand() // splitmix64
+    {
+        g_rng_state += 0x9E3779B97F4A7C15ull;
+        uint64_t z = g_rng_state;
+        z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ull;
+        z = (z ^ (z >> 27)) * 0x94D049BB133111EBull;
+        return z ^ (z >> 31);
+    }
+
+    // Sleep a random duration in [lo_us, hi_us] inclusive (microsecond grain).
+    void sleep_rand(uint64_t lo_us, uint64_t hi_us)
+    {
+        uint64_t span = hi_us - lo_us + 1;
+        kickmsg::sleep(microseconds{static_cast<int64_t>(lo_us + next_rand() % span)});
+    }
+
+    uint64_t seed_fuzzer()
+    {
+        uint64_t seed;
+        char const* env = std::getenv("KICKMSG_CRASH_SEED");
+        if (env != nullptr)
+        {
+            seed = std::strtoull(env, nullptr, 0);
+        }
+        else
+        {
+            seed = static_cast<uint64_t>(monotonic_ns().count())
+                 ^ (static_cast<uint64_t>(::getpid()) << 32);
+        }
+        g_rng_state = seed;
+        return seed;
+    }
+}
+
 /// Child publisher: publishes as fast as possible using allocate() + publish()
 /// to maximize the window where a kill can orphan a slot.
 static void child_publisher_main(int /*round*/)
@@ -128,6 +171,7 @@ struct RoundResult
     bool recovered_entries;
     bool recovered_rings;
     bool recovered_slots;
+    bool repair_ok;
     bool subscriber_ok;
 };
 
@@ -156,8 +200,8 @@ static RoundResult run_one_round(int round)
         _exit(0); // never reached
     }
 
-    // Let publisher run for 20-50ms
-    kickmsg::sleep(milliseconds{20 + (round % 30)});
+    // Kill at a fuzzed instant in the publisher's lifecycle (0.2-50ms).
+    sleep_rand(200, 50000);
 
     // Kill publisher mid-flight
     kill(pub_pid, SIGKILL);
@@ -186,7 +230,8 @@ static RoundResult run_one_round(int round)
 
     // Verify clean after repair
     auto post = region.diagnose();
-    if (post.locked_entries > 0 or post.retired_rings > 0)
+    result.repair_ok = (post.locked_entries == 0 and post.retired_rings == 0);
+    if (not result.repair_ok)
     {
         std::fprintf(stderr, "  [FAIL] Round %d: repair incomplete "
                      "(locked=%u, retired=%u)\n",
@@ -382,7 +427,7 @@ static bool test_multi_publisher_crash()
         }
     }
 
-    kickmsg::sleep(30ms);
+    sleep_rand(2000, 50000);
 
     for (int i = 0; i < N_PUBS; ++i)
     {
@@ -452,7 +497,17 @@ static bool test_multi_publisher_crash()
 
 int main()
 {
-    std::printf("=== Kickmsg Multi-Process Crash Test ===\n\n");
+    std::printf("=== Kickmsg Multi-Process Crash Test ===\n");
+    // Clean up segments if interrupted (Ctrl-C / kill) before the test's own
+    // unlink runs.  Installed before forking so children inherit it too.
+    kickmsg_test::register_cleanup_shm("/kickmsg_crash_test");
+    kickmsg_test::register_cleanup_shm("/kickmsg_crash_test_sub");
+    kickmsg_test::register_cleanup_shm("/kickmsg_crash_test_multi");
+    kickmsg_test::install_signal_cleanup();
+
+    uint64_t const seed = seed_fuzzer();
+    std::printf("crash fuzz seed=%llu (set KICKMSG_CRASH_SEED to replay)\n\n",
+                static_cast<unsigned long long>(seed));
 
     kickmsg::SharedMemory::unlink(SHM_NAME);
 
@@ -490,7 +545,15 @@ int main()
     // Let subscriber attach
     kickmsg::sleep(50ms);
 
-    constexpr int NUM_ROUNDS = 10;
+    int NUM_ROUNDS = 30;
+    if (char const* r = std::getenv("KICKMSG_CRASH_ROUNDS"))
+    {
+        int v = std::atoi(r);
+        if (v > 0)
+        {
+            NUM_ROUNDS = v;
+        }
+    }
     int any_recovery = 0;
     bool all_ok = true;
 
@@ -501,6 +564,10 @@ int main()
         {
             ++any_recovery;
         }
+        if (not result.repair_ok)
+        {
+            all_ok = false;
+        }
     }
 
     // Signal subscriber to exit
diff --git a/tests/endurance.sh b/tests/endurance.sh
index 43af316..b6b796d 100755
--- a/tests/endurance.sh
+++ b/tests/endurance.sh
@@ -38,8 +38,10 @@ while [ "$(date +%s)" -lt "$END_TIME" ]; do
     fi
     SUMMARY=$(echo "$OUTPUT" | grep "Summary:" | tail -1 || true)
     if [ -n "$SUMMARY" ]; then
-        RUN_PASS=$(echo "$SUMMARY" | grep -oE '[0-9]+ passed' | grep -oE '[0-9]+')
-        RUN_FAIL=$(echo "$SUMMARY" | grep -oE '[0-9]+ failed' | grep -oE '[0-9]+')
+        # Guarded: a garbled/interleaved Summary line (possible under heavy
+        # sanitizer contention) must not let set -e kill the whole soak.
+        RUN_PASS=$(echo "$SUMMARY" | grep -oE '[0-9]+ passed' | grep -oE '[0-9]+' || true)
+        RUN_FAIL=$(echo "$SUMMARY" | grep -oE '[0-9]+ failed' | grep -oE '[0-9]+' || true)
     else
         # No summary line (e.g. crash test): tally by exit code.
         if [ "$RC" -eq 0 ]; then
@@ -53,15 +55,25 @@ while [ "$(date +%s)" -lt "$END_TIME" ]; do
     RUN_PASS=${RUN_PASS:-0}
     RUN_FAIL=${RUN_FAIL:-0}
     RUN_REORDER=$(echo "$OUTPUT" | { grep -c "REORDER" || true; })
+    # Sanitizer reports (TSAN/ASAN/UBSAN) go to stderr and do NOT bump the
+    # suite's "failed" count -- detect them explicitly or they get swallowed.
+    RUN_SANITIZER=$(echo "$OUTPUT" | { grep -c -E "ThreadSanitizer|AddressSanitizer|runtime error:" || true; })
+    if [ "$RUN_SANITIZER" -gt 0 ] && [ "$RUN_FAIL" -eq 0 ]; then
+        RUN_FAIL=1
+    fi
     PASS=$((PASS + RUN_PASS))
     FAIL=$((FAIL + RUN_FAIL))
     REORDERS=$((REORDERS + RUN_REORDER))
     ELAPSED=$(($(date +%s) - END_TIME + DURATION_SECS))
-    printf "\r[%ds/%ds] runs=%d pass=%d fail=%d reorders=%d" \
-           "$ELAPSED" "$DURATION_SECS" "$RUNS" "$PASS" "$FAIL" "$REORDERS"
-    if [ "$RUN_FAIL" -gt 0 ]; then
+    printf "\r[%ds/%ds] runs=%d pass=%d fail=%d reorders=%d san=%d" \
+           "$ELAPSED" "$DURATION_SECS" "$RUNS" "$PASS" "$FAIL" "$REORDERS" "$RUN_SANITIZER"
+    if [ "$RUN_FAIL" -gt 0 ] || [ "$RC" -ne 0 ]; then
+        # Persist the full run output -- the evidence is otherwise lost.
+        FAILDIR="${FAILDIR:-endurance_fails}"
+        mkdir -p "$FAILDIR"
+        printf '%s\n' "$OUTPUT" > "$FAILDIR/run_${RUNS}_rc${RC}.log"
         echo ""
-        echo "$OUTPUT" | grep -E "REORDER|FAIL|WARN" || true
+        echo "$OUTPUT" | grep -E "REORDER|FAIL|WARN|ThreadSanitizer|runtime error:" || true
     fi
 done
 echo ""
diff --git a/tests/shm_cleanup.h b/tests/shm_cleanup.h
new file mode 100644
index 0000000..fb472c7
--- /dev/null
+++ b/tests/shm_cleanup.h
@@ -0,0 +1,61 @@
+#ifndef KICKMSG_TESTS_SHM_CLEANUP_H
+#define KICKMSG_TESTS_SHM_CLEANUP_H
+
+// Best-effort shm cleanup for the test binaries: on SIGINT/SIGTERM, unlink the
+// registered segments so an interrupted run leaves no stale /dev/shm entry for
+// the next run to trip over.  Names must be string literals (static storage);
+// the handler only calls shm_unlink + _exit, both async-signal-safe.
+// SIGKILL (-9) cannot be caught -- those leftovers are handled by each
+// scenario's unlink-before-create instead.
+
+#ifndef _WIN32
+#include <csignal>
+#include <cstring>
+#include <sys/mman.h>
+#include <unistd.h>
+
+namespace kickmsg_test
+{
+    inline constexpr int MAX_CLEANUP = 32;
+    // volatile: the elements are read from a signal handler.
+    inline char const* volatile g_cleanup_names[MAX_CLEANUP] = {};
+    inline volatile sig_atomic_t g_cleanup_count = 0;
+
+    inline void shm_cleanup_handler(int sig)
+    {
+        for (sig_atomic_t i = 0; i < g_cleanup_count; ++i)
+        {
+            ::shm_unlink(g_cleanup_names[i]);
+        }
+        ::_exit(128 + sig);
+    }
+
+    inline void register_cleanup_shm(char const* name)
+    {
+        if (g_cleanup_count < MAX_CLEANUP)
+        {
+            g_cleanup_names[g_cleanup_count] = name;
+            g_cleanup_count = g_cleanup_count + 1;
+        }
+    }
+
+    inline void install_signal_cleanup()
+    {
+        struct sigaction sa;
+        std::memset(&sa, 0, sizeof(sa));
+        sa.sa_handler = shm_cleanup_handler;
+        sigemptyset(&sa.sa_mask);
+        sa.sa_flags = 0;
+        ::sigaction(SIGINT, &sa, nullptr);
+        ::sigaction(SIGTERM, &sa, nullptr);
+    }
+}
+#else
+namespace kickmsg_test
+{
+    inline void register_cleanup_shm(char const*) {}
+    inline void install_signal_cleanup() {}
+}
+#endif
+
+#endif
diff --git a/tests/soak_all.sh b/tests/soak_all.sh
new file mode 100755
index 0000000..06b329c
--- /dev/null
+++ b/tests/soak_all.sh
@@ -0,0 +1,185 @@
+#!/bin/bash
+# Cycles weighted test profiles until a deadline, each a time-sliced endurance
+# pass; aggregates results, persists failing runs, survives any slice failing.
+# Self-detaches (survives logout), runs under caffeinate on macOS, and prints
+# a watch/stop dashboard before returning.
+#
+# Usage: soak_all.sh [total_secs] [slice_secs]
+#   total_secs : wall-clock budget (default 604800 = 1 week)
+#   slice_secs : seconds per profile slice (default 1800 = 30 min)
+#   SOAK_FOREGROUND=1   run inline instead of detaching
+#   PLAIN_BIN / CRASH_BIN / TSAN_BIN   override binaries (TSAN optional)
+#
+# NOT 'set -e': a failing slice records and continues.
+set -uo pipefail
+
+TOTAL_SECS="${1:-604800}"
+SLICE_SECS="${2:-1800}"
+ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+SELF="$(cd "$(dirname "$0")" && pwd)/$(basename "$0")"
+BASE_LOGDIR="${SOAK_LOGDIR:-$ROOT/soak_logs}"
+
+# --- Launcher: detach + caffeinate, print dashboard, return -----------------
+if [ -z "${KICKMSG_SOAK_RUNDIR:-}" ] && [ -z "${SOAK_FOREGROUND:-}" ]; then
+    RUNDIR="$BASE_LOGDIR/run_$(date +%Y%m%d_%H%M%S)"
+    mkdir -p "$RUNDIR"
+    ln -sfn "$RUNDIR" "$BASE_LOGDIR/latest" 2>/dev/null || true
+    export KICKMSG_SOAK_RUNDIR="$RUNDIR"
+    caf=""
+    caf_label="off (non-macOS)"
+    if [ "$(uname)" = "Darwin" ] && command -v caffeinate >/dev/null 2>&1; then
+        caf="caffeinate -i"
+        caf_label="on (idle-sleep blocked)"
+    fi
+    nohup $caf "$SELF" "$TOTAL_SECS" "$SLICE_SECS" </dev/null >"$RUNDIR/stdout.log" 2>&1 &
+    pid=$!
+    echo "$pid" > "$RUNDIR/soak.pid"
+    M="$RUNDIR/soak_all.log"
+    cat <<EOF
+
+=== kickmsg soak launched (detached; survives logout) ===
+  pid        : $pid
+  budget     : ${TOTAL_SECS}s total, ${SLICE_SECS}s slices
+  caffeinate : $caf_label
+  run dir    : $RUNDIR   (also: $BASE_LOGDIR/latest)
+
+  watch      : tail -f "$M"
+  rollup     : grep -E "VERDICT|SLICE |slices=" "$M"
+  failures   : ls "$RUNDIR/fails/"           # empty == clean
+  stop       : pkill -f soak_all.sh ; pkill -f endurance.sh
+
+EOF
+    exit 0
+fi
+
+# --- Worker -----------------------------------------------------------------
+LOGDIR="${KICKMSG_SOAK_RUNDIR:-$BASE_LOGDIR/run_fg}"
+SLICEDIR="$LOGDIR/slices"
+MASTER="$LOGDIR/soak_all.log"
+mkdir -p "$SLICEDIR" "$LOGDIR/fails"
+# Overwrite the launcher's pid (caffeinate's wrapper) with the real worker pid.
+echo "$$" > "$LOGDIR/soak.pid"
+
+PLAIN="${PLAIN_BIN:-$ROOT/build/kickmsg_stress_test}"
+CRASH="${CRASH_BIN:-$ROOT/build/kickmsg_crash_test}"
+TSAN="${TSAN_BIN:-$ROOT/build_tsan/kickmsg_stress_test}"
+TSAN_SUPP="$ROOT/tests/tsan.supp"
+ENDURANCE="$ROOT/tests/endurance.sh"
+
+# Weighted cycle: repeats encode priority.  TSAN is the rarest, highest-value
+# signal, so it takes half the slices; crash fuzz a quarter; plain stress the
+# rest as periodic sanity at two contention levels.  Oversub is kept <=200 on
+# purpose -- a single oversub-300 run can take tens of minutes and blow past a
+# slice boundary (endurance.sh only checks the clock between runs).
+# A profile whose binary is absent is skipped, not fatal.
+PROFILES=(
+  "tsan-150|$TSAN|--oversub 150"
+  "crash|$CRASH|"
+  "tsan-150|$TSAN|--oversub 150"
+  "stress-150|$PLAIN|--oversub 150"
+  "tsan-150|$TSAN|--oversub 150"
+  "crash|$CRASH|"
+  "tsan-150|$TSAN|--oversub 150"
+  "stress-200|$PLAIN|--oversub 200"
+)
+
+TOTAL_SLICES=0
+TOTAL_PASS=0
+TOTAL_FAIL=0
+START=$(date +%s)
+DEADLINE=$((START + TOTAL_SECS))
+
+log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a "$MASTER"; }
+
+summary() {
+    echo "" | tee -a "$MASTER"
+    log "=== SOAK SUMMARY ==="
+    log "slices=$TOTAL_SLICES pass_scenarios=$TOTAL_PASS fail_scenarios=$TOTAL_FAIL"
+    # Per-profile breakdown via awk over the recorded slice lines (no
+    # associative arrays -- this must run on macOS bash 3.2).
+    echo "--- per-profile ---" | tee -a "$MASTER"
+    grep -hE "^SLICE " "$MASTER" 2>/dev/null \
+        | awk '{p=$0; sub(/.*profile=/,"",p); sub(/ .*/,"",p);
+                f=$0; sub(/.*fail=/,"",f); sub(/ .*/,"",f);
+                n[p]++; ff[p]+=f}
+               END{for(k in n) printf "  %-12s slices=%d fail=%d\n", k, n[k], ff[k]}' \
+        | tee -a "$MASTER"
+    if [ "$TOTAL_FAIL" -gt 0 ]; then
+        log "VERDICT: FAILURES DETECTED -- evidence under $LOGDIR/fails/"
+    else
+        log "VERDICT: ALL CLEAN"
+    fi
+}
+
+trap 'summary; exit 130' INT TERM
+
+log "soak start: total=${TOTAL_SECS}s slice=${SLICE_SECS}s deadline_epoch=$DEADLINE"
+log "binaries: plain=$([ -x "$PLAIN" ] && echo yes || echo NO) crash=$([ -x "$CRASH" ] && echo yes || echo NO) tsan=$([ -x "$TSAN" ] && echo yes || echo NO)"
+
+i=0
+while [ "$(date +%s)" -lt "$DEADLINE" ]; do
+    n=${#PROFILES[@]}
+    idx=$((i % n))
+    i=$((i + 1))
+    entry="${PROFILES[$idx]}"
+    label="${entry%%|*}"
+    rest="${entry#*|}"
+    bin="${rest%%|*}"
+    extra="${rest#*|}"
+    if [ "$extra" = "$bin" ]; then
+        extra=""
+    fi
+
+    if [ ! -x "$bin" ]; then
+        log "skip profile=$label (binary absent: $bin)"
+        continue
+    fi
+
+    now=$(date +%s)
+    remain=$((DEADLINE - now))
+    if [ "$remain" -lt 5 ]; then
+        break
+    fi
+    this="$SLICE_SECS"
+    if [ "$remain" -lt "$this" ]; then
+        this="$remain"
+    fi
+
+    TOTAL_SLICES=$((TOTAL_SLICES + 1))
+    slog="$SLICEDIR/$(printf '%04d' "$TOTAL_SLICES")_${label}.log"
+    log "slice $TOTAL_SLICES profile=$label dur=${this}s -> $slog"
+
+    # Per-slice evidence dir; sanitizer options only for TSAN slices.
+    export FAILDIR="$LOGDIR/fails/$label"
+    case "$label" in
+        tsan*)
+            supp=""
+            if [ -f "$TSAN_SUPP" ]; then
+                supp="suppressions=$TSAN_SUPP:"
+            fi
+            export TSAN_OPTIONS="${supp}halt_on_error=1:exitcode=66"
+            ;;
+        *)
+            unset TSAN_OPTIONS 2>/dev/null || true
+            ;;
+    esac
+
+    "$ENDURANCE" "$bin" "$this" $extra > "$slog" 2>&1 || true
+
+    p=$(grep -E "Scenarios passed:" "$slog" | grep -oE '[0-9]+' | tail -1 || true)
+    f=$(grep -E "Scenarios failed:" "$slog" | grep -oE '[0-9]+' | tail -1 || true)
+    v=$(grep -E "VERDICT:" "$slog" | tail -1 || true)
+    p="${p:-0}"
+    f="${f:-0}"
+    TOTAL_PASS=$((TOTAL_PASS + p))
+    TOTAL_FAIL=$((TOTAL_FAIL + f))
+    # Machine-greppable record for the per-profile breakdown above.
+    echo "SLICE $TOTAL_SLICES profile=$label pass=$p fail=$f" >> "$MASTER"
+    log "  done profile=$label pass=$p fail=$f ${v:-(no verdict line)}"
+done
+
+summary
+if [ "$TOTAL_FAIL" -gt 0 ]; then
+    exit 1
+fi
+exit 0
diff --git a/tests/soak_status.sh b/tests/soak_status.sh
new file mode 100755
index 0000000..7c3e0fe
--- /dev/null
+++ b/tests/soak_status.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+# Compact live summary of a soak_all.sh run.
+# Usage: soak_status.sh [run_dir]   (defaults to soak_logs/latest)
+set -uo pipefail
+ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+RUNDIR="${1:-${SOAK_LOGDIR:-$ROOT/soak_logs}/latest}"
+M="$RUNDIR/soak_all.log"
+if [ ! -f "$M" ]; then
+    echo "no soak log at $M -- run tests/soak_all.sh first"
+    exit 1
+fi
+
+fmt() { # seconds -> "Hh MMm SSs"
+    local s="$1"
+    printf '%dh %02dm %02ds' "$((s / 3600))" "$(((s % 3600) / 60))" "$((s % 60))"
+}
+
+pid=""
+[ -f "$RUNDIR/soak.pid" ] && pid="$(cat "$RUNDIR/soak.pid" 2>/dev/null)"
+state="finished"
+if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
+    state="RUNNING (pid $pid)"
+fi
+
+start_line="$(grep -m1 'soak start' "$M" 2>/dev/null || true)"
+total="$(echo "$start_line" | grep -oE 'total=[0-9]+' | grep -oE '[0-9]+' || true)"
+deadline="$(echo "$start_line" | grep -oE 'deadline_epoch=[0-9]+' | grep -oE '[0-9]+' || true)"
+total="${total:-0}"
+deadline="${deadline:-0}"
+now="$(date +%s)"
+elapsed=0
+remain=0
+if [ "$deadline" -gt 0 ]; then
+    elapsed=$((now - (deadline - total)))
+    remain=$((deadline - now))
+    [ "$remain" -lt 0 ] && remain=0
+fi
+
+# grep -c always prints a count but exits 1 when zero -- don't add `|| echo 0`
+# or it double-prints. set -e is off, so a non-zero exit here is harmless.
+slices="$(grep -c '^SLICE ' "$M" 2>/dev/null)"
+slices="${slices:-0}"
+pass="$(grep '^SLICE ' "$M" 2>/dev/null | grep -oE 'pass=[0-9]+' | grep -oE '[0-9]+' | awk '{s+=$1} END{print s+0}')"
+fail="$(grep '^SLICE ' "$M" 2>/dev/null | grep -oE 'fail=[0-9]+' | grep -oE '[0-9]+' | awk '{s+=$1} END{print s+0}')"
+nfail="$(find "$RUNDIR/fails" -type f 2>/dev/null | wc -l | tr -d ' ')"
+
+echo "=== kickmsg soak status ==="
+echo "  state    : $state"
+echo "  run dir  : $RUNDIR"
+echo "  elapsed  : $(fmt "$elapsed") of $(fmt "$total")   (remaining $(fmt "$remain"))"
+echo "  slices   : $slices done   pass_scenarios=$pass   fail_scenarios=$fail   fail_logs=$nfail"
+echo "  per-profile:"
+grep '^SLICE ' "$M" 2>/dev/null \
+    | awk '{p=$0; sub(/.*profile=/,"",p); sub(/ .*/,"",p);
+            f=$0; sub(/.*fail=/,"",f); sub(/ .*/,"",f);
+            n[p]++; ff[p]+=f}
+           END{for(k in n) printf "    %-12s slices=%d fail=%d\n", k, n[k], ff[k]}'
+
+# Current/last slice and its latest live tally (\r-separated -> last field).
+cur="$(grep -E ' slice [0-9]+ profile=' "$M" 2>/dev/null | tail -1 || true)"
+if [ -n "$cur" ]; then
+    label="$(echo "$cur" | sed -E 's/.* profile=([^ ]+).*/\1/')"
+    slog="$(echo "$cur" | sed -E 's/.*-> //')"
+    tally=""
+    [ -f "$slog" ] && tally="$(tr '\r' '\n' < "$slog" | grep -E '^\[[0-9]+s/' | tail -1 || true)"
+    echo "  current  : $label   ${tally:-(starting)}"
+fi
+
+if [ "$nfail" -gt 0 ] || [ "$fail" -gt 0 ]; then
+    echo "  !! failures recorded -- see $RUNDIR/fails/"
+fi
diff --git a/tests/stress/main.cc b/tests/stress/main.cc
index ed9679e..d57ccb3 100644
--- a/tests/stress/main.cc
+++ b/tests/stress/main.cc
@@ -1,4 +1,5 @@
 #include "common.h"
+#include "../shm_cleanup.h"
 #include "kickmsg/version.h"
 
 #include <argparse/argparse.hpp>
@@ -56,6 +57,18 @@ int main(int argc, char** argv)
                 std::thread::hardware_concurrency(),
                 static_cast<unsigned>(contention_count()));
 
+    // Unlink the scenario segments if interrupted (Ctrl-C / kill), so a killed
+    // run leaves a clean /dev/shm.  Keep in sync with the scenarios' shm_name.
+    for (char const* name : {"/kickmsg_treiber_stress", "/kickmsg_churn_test",
+                             "/kickmsg_gc_test", "/kickmsg_fairness_test",
+                             "/kickmsg_stress_test", "/kickmsg_pool_exhaustion",
+                             "/kickmsg_live_repair", "/kickmsg_single_slot_ring",
+                             "/kickmsg_sub_saturation"})
+    {
+        kickmsg_test::register_cleanup_shm(name);
+    }
+    kickmsg_test::install_signal_cleanup();
+
     TestRunner runner;
 
     runner.run("treiber_stress",       run_treiber_stress);