diff --git a/tests/README.md b/tests/README.md index 49685ec..adb6bd9 100644 --- a/tests/README.md +++ b/tests/README.md @@ -84,9 +84,55 @@ endurance is worth more than a long Release soak for finding ordering bugs. ```bash scripts/configure.sh build_tsan --with=unit_tests --with=tsan scripts/setup_build.sh build_tsan && cmake --build build_tsan -j -TSAN_OPTIONS="suppressions=$PWD/tests/tsan.supp" \ +TSAN_OPTIONS="suppressions=$PWD/tests/tsan.supp:halt_on_error=1" \ tests/endurance.sh build_tsan/kickmsg_stress_test 14400 ``` +`halt_on_error=1` makes TSAN stop at the first race with the report intact; +without it TSAN reports and *continues*, and the run still exits cleanly. The +endurance harness also greps each run for `ThreadSanitizer`/`runtime error:` +and counts it as a failure (`san=` column), so a race is caught either way. + +Rungs 3-4 above are the **hands-on** soaks: pick one binary and a duration and +hammer it (e.g. crash recovery for an hour before a PR, or TSAN stress for an +afternoon). The rig below is the **unattended** counterpart -- it just cycles +all of them for you. It is built *on top of* `endurance.sh`, not a replacement; +keep using the direct form for targeted runs. + +### 5. Unattended long-horizon rig -- cycles all profiles +`tests/soak_all.sh` loops weighted profiles until a wall-clock deadline, each a +time-sliced `endurance.sh` pass: **TSAN takes half the slices** (rarest, +highest-value signal), crash fuzz a quarter, plain stress the rest as periodic +sanity at oversub 150/200. It records a per-slice verdict, persists failing +runs, and survives any single slice failing. It self-detaches (survives +logout), runs under `caffeinate` on macOS, and prints a watch/stop dashboard. +```bash +# build the plain + crash binaries (build/) and optionally build_tsan/ +tests/soak_all.sh 604800 1800 # 1 week, 30-min slices; returns at once +# absent build_tsan/ is skipped, not fatal. SOAK_FOREGROUND=1 to run inline. +``` +Each launch isolates its run under `soak_logs/run_/` (also +`soak_logs/latest`) with the master log, per-slice logs, and `fails/`. Check +progress at any time with: +```bash +tests/soak_status.sh # state, elapsed/remaining, per-profile tally +``` +The launch dashboard also prints the exact `tail`/stop commands. Interrupting a +run (Ctrl-C or `kill`) unlinks the active segment, so it leaves no stale +`/dev/shm` behind (`kill -9` is the exception, covered by unlink-before-create). + +Slice length (arg 2) is operational, not a coverage knob: a profile's total +iterations over the run depend only on its *share* of the cycle, not on how +that time is chunked. Shorter slices just give finer failure-attribution +checkpoints. To shift priority between profiles, edit the weighting in the +`PROFILES` list -- don't lengthen slices. + +The **crash test fuzzes its kill timing** (seeded random per round) so a long +soak explores new crash windows instead of re-hitting a fixed schedule: +```bash +build/kickmsg_crash_test # seed from clock -- logged at startup +KICKMSG_CRASH_SEED=12345 build/kickmsg_crash_test # replay an exact schedule +KICKMSG_CRASH_ROUNDS=200 build/kickmsg_crash_test # more kills per invocation +``` ## Contention scales to your machine @@ -107,6 +153,8 @@ bound it explicitly. ## Detached long soak (survives logout; keeps the machine awake) +For a *single-profile* `endurance.sh` run (rungs 3-4); the rig in rung 5 +self-detaches and handles `caffeinate` on its own. ```bash # macOS: caffeinate prevents idle sleep mid-soak nohup caffeinate -i tests/endurance.sh build/kickmsg_stress_test 43200 > soak.log 2>&1 & diff --git a/tests/crash_test.cc b/tests/crash_test.cc index b4fccd3..f253711 100644 --- a/tests/crash_test.cc +++ b/tests/crash_test.cc @@ -24,6 +24,7 @@ #include #include +#include "shm_cleanup.h" #include "kickmsg/os/Time.h" #include "kickmsg/Publisher.h" #include "kickmsg/Subscriber.h" @@ -45,6 +46,48 @@ static uint32_t compute_checksum(CrashPayload const& p) return p.magic ^ p.seq ^ 0xBAADF00D; } +// --- Seeded kill-timing fuzzer --------------------------------------------- +// Each kill fires at a random instant so a long soak explores new crash +// windows instead of re-hitting a fixed schedule. The seed is logged at +// startup; set KICKMSG_CRASH_SEED to replay a specific run. +namespace +{ + uint64_t g_rng_state = 0; + + uint64_t next_rand() // splitmix64 + { + g_rng_state += 0x9E3779B97F4A7C15ull; + uint64_t z = g_rng_state; + z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ull; + z = (z ^ (z >> 27)) * 0x94D049BB133111EBull; + return z ^ (z >> 31); + } + + // Sleep a random duration in [lo_us, hi_us] inclusive (microsecond grain). + void sleep_rand(uint64_t lo_us, uint64_t hi_us) + { + uint64_t span = hi_us - lo_us + 1; + kickmsg::sleep(microseconds{static_cast(lo_us + next_rand() % span)}); + } + + uint64_t seed_fuzzer() + { + uint64_t seed; + char const* env = std::getenv("KICKMSG_CRASH_SEED"); + if (env != nullptr) + { + seed = std::strtoull(env, nullptr, 0); + } + else + { + seed = static_cast(monotonic_ns().count()) + ^ (static_cast(::getpid()) << 32); + } + g_rng_state = seed; + return seed; + } +} + /// Child publisher: publishes as fast as possible using allocate() + publish() /// to maximize the window where a kill can orphan a slot. static void child_publisher_main(int /*round*/) @@ -128,6 +171,7 @@ struct RoundResult bool recovered_entries; bool recovered_rings; bool recovered_slots; + bool repair_ok; bool subscriber_ok; }; @@ -156,8 +200,8 @@ static RoundResult run_one_round(int round) _exit(0); // never reached } - // Let publisher run for 20-50ms - kickmsg::sleep(milliseconds{20 + (round % 30)}); + // Kill at a fuzzed instant in the publisher's lifecycle (0.2-50ms). + sleep_rand(200, 50000); // Kill publisher mid-flight kill(pub_pid, SIGKILL); @@ -186,7 +230,8 @@ static RoundResult run_one_round(int round) // Verify clean after repair auto post = region.diagnose(); - if (post.locked_entries > 0 or post.retired_rings > 0) + result.repair_ok = (post.locked_entries == 0 and post.retired_rings == 0); + if (not result.repair_ok) { std::fprintf(stderr, " [FAIL] Round %d: repair incomplete " "(locked=%u, retired=%u)\n", @@ -382,7 +427,7 @@ static bool test_multi_publisher_crash() } } - kickmsg::sleep(30ms); + sleep_rand(2000, 50000); for (int i = 0; i < N_PUBS; ++i) { @@ -452,7 +497,17 @@ static bool test_multi_publisher_crash() int main() { - std::printf("=== Kickmsg Multi-Process Crash Test ===\n\n"); + std::printf("=== Kickmsg Multi-Process Crash Test ===\n"); + // Clean up segments if interrupted (Ctrl-C / kill) before the test's own + // unlink runs. Installed before forking so children inherit it too. + kickmsg_test::register_cleanup_shm("/kickmsg_crash_test"); + kickmsg_test::register_cleanup_shm("/kickmsg_crash_test_sub"); + kickmsg_test::register_cleanup_shm("/kickmsg_crash_test_multi"); + kickmsg_test::install_signal_cleanup(); + + uint64_t const seed = seed_fuzzer(); + std::printf("crash fuzz seed=%llu (set KICKMSG_CRASH_SEED to replay)\n\n", + static_cast(seed)); kickmsg::SharedMemory::unlink(SHM_NAME); @@ -490,7 +545,15 @@ int main() // Let subscriber attach kickmsg::sleep(50ms); - constexpr int NUM_ROUNDS = 10; + int NUM_ROUNDS = 30; + if (char const* r = std::getenv("KICKMSG_CRASH_ROUNDS")) + { + int v = std::atoi(r); + if (v > 0) + { + NUM_ROUNDS = v; + } + } int any_recovery = 0; bool all_ok = true; @@ -501,6 +564,10 @@ int main() { ++any_recovery; } + if (not result.repair_ok) + { + all_ok = false; + } } // Signal subscriber to exit diff --git a/tests/endurance.sh b/tests/endurance.sh index 43af316..b6b796d 100755 --- a/tests/endurance.sh +++ b/tests/endurance.sh @@ -38,8 +38,10 @@ while [ "$(date +%s)" -lt "$END_TIME" ]; do fi SUMMARY=$(echo "$OUTPUT" | grep "Summary:" | tail -1 || true) if [ -n "$SUMMARY" ]; then - RUN_PASS=$(echo "$SUMMARY" | grep -oE '[0-9]+ passed' | grep -oE '[0-9]+') - RUN_FAIL=$(echo "$SUMMARY" | grep -oE '[0-9]+ failed' | grep -oE '[0-9]+') + # Guarded: a garbled/interleaved Summary line (possible under heavy + # sanitizer contention) must not let set -e kill the whole soak. + RUN_PASS=$(echo "$SUMMARY" | grep -oE '[0-9]+ passed' | grep -oE '[0-9]+' || true) + RUN_FAIL=$(echo "$SUMMARY" | grep -oE '[0-9]+ failed' | grep -oE '[0-9]+' || true) else # No summary line (e.g. crash test): tally by exit code. if [ "$RC" -eq 0 ]; then @@ -53,15 +55,25 @@ while [ "$(date +%s)" -lt "$END_TIME" ]; do RUN_PASS=${RUN_PASS:-0} RUN_FAIL=${RUN_FAIL:-0} RUN_REORDER=$(echo "$OUTPUT" | { grep -c "REORDER" || true; }) + # Sanitizer reports (TSAN/ASAN/UBSAN) go to stderr and do NOT bump the + # suite's "failed" count -- detect them explicitly or they get swallowed. + RUN_SANITIZER=$(echo "$OUTPUT" | { grep -c -E "ThreadSanitizer|AddressSanitizer|runtime error:" || true; }) + if [ "$RUN_SANITIZER" -gt 0 ] && [ "$RUN_FAIL" -eq 0 ]; then + RUN_FAIL=1 + fi PASS=$((PASS + RUN_PASS)) FAIL=$((FAIL + RUN_FAIL)) REORDERS=$((REORDERS + RUN_REORDER)) ELAPSED=$(($(date +%s) - END_TIME + DURATION_SECS)) - printf "\r[%ds/%ds] runs=%d pass=%d fail=%d reorders=%d" \ - "$ELAPSED" "$DURATION_SECS" "$RUNS" "$PASS" "$FAIL" "$REORDERS" - if [ "$RUN_FAIL" -gt 0 ]; then + printf "\r[%ds/%ds] runs=%d pass=%d fail=%d reorders=%d san=%d" \ + "$ELAPSED" "$DURATION_SECS" "$RUNS" "$PASS" "$FAIL" "$REORDERS" "$RUN_SANITIZER" + if [ "$RUN_FAIL" -gt 0 ] || [ "$RC" -ne 0 ]; then + # Persist the full run output -- the evidence is otherwise lost. + FAILDIR="${FAILDIR:-endurance_fails}" + mkdir -p "$FAILDIR" + printf '%s\n' "$OUTPUT" > "$FAILDIR/run_${RUNS}_rc${RC}.log" echo "" - echo "$OUTPUT" | grep -E "REORDER|FAIL|WARN" || true + echo "$OUTPUT" | grep -E "REORDER|FAIL|WARN|ThreadSanitizer|runtime error:" || true fi done echo "" diff --git a/tests/shm_cleanup.h b/tests/shm_cleanup.h new file mode 100644 index 0000000..fb472c7 --- /dev/null +++ b/tests/shm_cleanup.h @@ -0,0 +1,61 @@ +#ifndef KICKMSG_TESTS_SHM_CLEANUP_H +#define KICKMSG_TESTS_SHM_CLEANUP_H + +// Best-effort shm cleanup for the test binaries: on SIGINT/SIGTERM, unlink the +// registered segments so an interrupted run leaves no stale /dev/shm entry for +// the next run to trip over. Names must be string literals (static storage); +// the handler only calls shm_unlink + _exit, both async-signal-safe. +// SIGKILL (-9) cannot be caught -- those leftovers are handled by each +// scenario's unlink-before-create instead. + +#ifndef _WIN32 +#include +#include +#include +#include + +namespace kickmsg_test +{ + inline constexpr int MAX_CLEANUP = 32; + // volatile: the elements are read from a signal handler. + inline char const* volatile g_cleanup_names[MAX_CLEANUP] = {}; + inline volatile sig_atomic_t g_cleanup_count = 0; + + inline void shm_cleanup_handler(int sig) + { + for (sig_atomic_t i = 0; i < g_cleanup_count; ++i) + { + ::shm_unlink(g_cleanup_names[i]); + } + ::_exit(128 + sig); + } + + inline void register_cleanup_shm(char const* name) + { + if (g_cleanup_count < MAX_CLEANUP) + { + g_cleanup_names[g_cleanup_count] = name; + g_cleanup_count = g_cleanup_count + 1; + } + } + + inline void install_signal_cleanup() + { + struct sigaction sa; + std::memset(&sa, 0, sizeof(sa)); + sa.sa_handler = shm_cleanup_handler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + ::sigaction(SIGINT, &sa, nullptr); + ::sigaction(SIGTERM, &sa, nullptr); + } +} +#else +namespace kickmsg_test +{ + inline void register_cleanup_shm(char const*) {} + inline void install_signal_cleanup() {} +} +#endif + +#endif diff --git a/tests/soak_all.sh b/tests/soak_all.sh new file mode 100755 index 0000000..06b329c --- /dev/null +++ b/tests/soak_all.sh @@ -0,0 +1,185 @@ +#!/bin/bash +# Cycles weighted test profiles until a deadline, each a time-sliced endurance +# pass; aggregates results, persists failing runs, survives any slice failing. +# Self-detaches (survives logout), runs under caffeinate on macOS, and prints +# a watch/stop dashboard before returning. +# +# Usage: soak_all.sh [total_secs] [slice_secs] +# total_secs : wall-clock budget (default 604800 = 1 week) +# slice_secs : seconds per profile slice (default 1800 = 30 min) +# SOAK_FOREGROUND=1 run inline instead of detaching +# PLAIN_BIN / CRASH_BIN / TSAN_BIN override binaries (TSAN optional) +# +# NOT 'set -e': a failing slice records and continues. +set -uo pipefail + +TOTAL_SECS="${1:-604800}" +SLICE_SECS="${2:-1800}" +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +SELF="$(cd "$(dirname "$0")" && pwd)/$(basename "$0")" +BASE_LOGDIR="${SOAK_LOGDIR:-$ROOT/soak_logs}" + +# --- Launcher: detach + caffeinate, print dashboard, return ----------------- +if [ -z "${KICKMSG_SOAK_RUNDIR:-}" ] && [ -z "${SOAK_FOREGROUND:-}" ]; then + RUNDIR="$BASE_LOGDIR/run_$(date +%Y%m%d_%H%M%S)" + mkdir -p "$RUNDIR" + ln -sfn "$RUNDIR" "$BASE_LOGDIR/latest" 2>/dev/null || true + export KICKMSG_SOAK_RUNDIR="$RUNDIR" + caf="" + caf_label="off (non-macOS)" + if [ "$(uname)" = "Darwin" ] && command -v caffeinate >/dev/null 2>&1; then + caf="caffeinate -i" + caf_label="on (idle-sleep blocked)" + fi + nohup $caf "$SELF" "$TOTAL_SECS" "$SLICE_SECS" "$RUNDIR/stdout.log" 2>&1 & + pid=$! + echo "$pid" > "$RUNDIR/soak.pid" + M="$RUNDIR/soak_all.log" + cat < "$LOGDIR/soak.pid" + +PLAIN="${PLAIN_BIN:-$ROOT/build/kickmsg_stress_test}" +CRASH="${CRASH_BIN:-$ROOT/build/kickmsg_crash_test}" +TSAN="${TSAN_BIN:-$ROOT/build_tsan/kickmsg_stress_test}" +TSAN_SUPP="$ROOT/tests/tsan.supp" +ENDURANCE="$ROOT/tests/endurance.sh" + +# Weighted cycle: repeats encode priority. TSAN is the rarest, highest-value +# signal, so it takes half the slices; crash fuzz a quarter; plain stress the +# rest as periodic sanity at two contention levels. Oversub is kept <=200 on +# purpose -- a single oversub-300 run can take tens of minutes and blow past a +# slice boundary (endurance.sh only checks the clock between runs). +# A profile whose binary is absent is skipped, not fatal. +PROFILES=( + "tsan-150|$TSAN|--oversub 150" + "crash|$CRASH|" + "tsan-150|$TSAN|--oversub 150" + "stress-150|$PLAIN|--oversub 150" + "tsan-150|$TSAN|--oversub 150" + "crash|$CRASH|" + "tsan-150|$TSAN|--oversub 150" + "stress-200|$PLAIN|--oversub 200" +) + +TOTAL_SLICES=0 +TOTAL_PASS=0 +TOTAL_FAIL=0 +START=$(date +%s) +DEADLINE=$((START + TOTAL_SECS)) + +log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a "$MASTER"; } + +summary() { + echo "" | tee -a "$MASTER" + log "=== SOAK SUMMARY ===" + log "slices=$TOTAL_SLICES pass_scenarios=$TOTAL_PASS fail_scenarios=$TOTAL_FAIL" + # Per-profile breakdown via awk over the recorded slice lines (no + # associative arrays -- this must run on macOS bash 3.2). + echo "--- per-profile ---" | tee -a "$MASTER" + grep -hE "^SLICE " "$MASTER" 2>/dev/null \ + | awk '{p=$0; sub(/.*profile=/,"",p); sub(/ .*/,"",p); + f=$0; sub(/.*fail=/,"",f); sub(/ .*/,"",f); + n[p]++; ff[p]+=f} + END{for(k in n) printf " %-12s slices=%d fail=%d\n", k, n[k], ff[k]}' \ + | tee -a "$MASTER" + if [ "$TOTAL_FAIL" -gt 0 ]; then + log "VERDICT: FAILURES DETECTED -- evidence under $LOGDIR/fails/" + else + log "VERDICT: ALL CLEAN" + fi +} + +trap 'summary; exit 130' INT TERM + +log "soak start: total=${TOTAL_SECS}s slice=${SLICE_SECS}s deadline_epoch=$DEADLINE" +log "binaries: plain=$([ -x "$PLAIN" ] && echo yes || echo NO) crash=$([ -x "$CRASH" ] && echo yes || echo NO) tsan=$([ -x "$TSAN" ] && echo yes || echo NO)" + +i=0 +while [ "$(date +%s)" -lt "$DEADLINE" ]; do + n=${#PROFILES[@]} + idx=$((i % n)) + i=$((i + 1)) + entry="${PROFILES[$idx]}" + label="${entry%%|*}" + rest="${entry#*|}" + bin="${rest%%|*}" + extra="${rest#*|}" + if [ "$extra" = "$bin" ]; then + extra="" + fi + + if [ ! -x "$bin" ]; then + log "skip profile=$label (binary absent: $bin)" + continue + fi + + now=$(date +%s) + remain=$((DEADLINE - now)) + if [ "$remain" -lt 5 ]; then + break + fi + this="$SLICE_SECS" + if [ "$remain" -lt "$this" ]; then + this="$remain" + fi + + TOTAL_SLICES=$((TOTAL_SLICES + 1)) + slog="$SLICEDIR/$(printf '%04d' "$TOTAL_SLICES")_${label}.log" + log "slice $TOTAL_SLICES profile=$label dur=${this}s -> $slog" + + # Per-slice evidence dir; sanitizer options only for TSAN slices. + export FAILDIR="$LOGDIR/fails/$label" + case "$label" in + tsan*) + supp="" + if [ -f "$TSAN_SUPP" ]; then + supp="suppressions=$TSAN_SUPP:" + fi + export TSAN_OPTIONS="${supp}halt_on_error=1:exitcode=66" + ;; + *) + unset TSAN_OPTIONS 2>/dev/null || true + ;; + esac + + "$ENDURANCE" "$bin" "$this" $extra > "$slog" 2>&1 || true + + p=$(grep -E "Scenarios passed:" "$slog" | grep -oE '[0-9]+' | tail -1 || true) + f=$(grep -E "Scenarios failed:" "$slog" | grep -oE '[0-9]+' | tail -1 || true) + v=$(grep -E "VERDICT:" "$slog" | tail -1 || true) + p="${p:-0}" + f="${f:-0}" + TOTAL_PASS=$((TOTAL_PASS + p)) + TOTAL_FAIL=$((TOTAL_FAIL + f)) + # Machine-greppable record for the per-profile breakdown above. + echo "SLICE $TOTAL_SLICES profile=$label pass=$p fail=$f" >> "$MASTER" + log " done profile=$label pass=$p fail=$f ${v:-(no verdict line)}" +done + +summary +if [ "$TOTAL_FAIL" -gt 0 ]; then + exit 1 +fi +exit 0 diff --git a/tests/soak_status.sh b/tests/soak_status.sh new file mode 100755 index 0000000..7c3e0fe --- /dev/null +++ b/tests/soak_status.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# Compact live summary of a soak_all.sh run. +# Usage: soak_status.sh [run_dir] (defaults to soak_logs/latest) +set -uo pipefail +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +RUNDIR="${1:-${SOAK_LOGDIR:-$ROOT/soak_logs}/latest}" +M="$RUNDIR/soak_all.log" +if [ ! -f "$M" ]; then + echo "no soak log at $M -- run tests/soak_all.sh first" + exit 1 +fi + +fmt() { # seconds -> "Hh MMm SSs" + local s="$1" + printf '%dh %02dm %02ds' "$((s / 3600))" "$(((s % 3600) / 60))" "$((s % 60))" +} + +pid="" +[ -f "$RUNDIR/soak.pid" ] && pid="$(cat "$RUNDIR/soak.pid" 2>/dev/null)" +state="finished" +if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then + state="RUNNING (pid $pid)" +fi + +start_line="$(grep -m1 'soak start' "$M" 2>/dev/null || true)" +total="$(echo "$start_line" | grep -oE 'total=[0-9]+' | grep -oE '[0-9]+' || true)" +deadline="$(echo "$start_line" | grep -oE 'deadline_epoch=[0-9]+' | grep -oE '[0-9]+' || true)" +total="${total:-0}" +deadline="${deadline:-0}" +now="$(date +%s)" +elapsed=0 +remain=0 +if [ "$deadline" -gt 0 ]; then + elapsed=$((now - (deadline - total))) + remain=$((deadline - now)) + [ "$remain" -lt 0 ] && remain=0 +fi + +# grep -c always prints a count but exits 1 when zero -- don't add `|| echo 0` +# or it double-prints. set -e is off, so a non-zero exit here is harmless. +slices="$(grep -c '^SLICE ' "$M" 2>/dev/null)" +slices="${slices:-0}" +pass="$(grep '^SLICE ' "$M" 2>/dev/null | grep -oE 'pass=[0-9]+' | grep -oE '[0-9]+' | awk '{s+=$1} END{print s+0}')" +fail="$(grep '^SLICE ' "$M" 2>/dev/null | grep -oE 'fail=[0-9]+' | grep -oE '[0-9]+' | awk '{s+=$1} END{print s+0}')" +nfail="$(find "$RUNDIR/fails" -type f 2>/dev/null | wc -l | tr -d ' ')" + +echo "=== kickmsg soak status ===" +echo " state : $state" +echo " run dir : $RUNDIR" +echo " elapsed : $(fmt "$elapsed") of $(fmt "$total") (remaining $(fmt "$remain"))" +echo " slices : $slices done pass_scenarios=$pass fail_scenarios=$fail fail_logs=$nfail" +echo " per-profile:" +grep '^SLICE ' "$M" 2>/dev/null \ + | awk '{p=$0; sub(/.*profile=/,"",p); sub(/ .*/,"",p); + f=$0; sub(/.*fail=/,"",f); sub(/ .*/,"",f); + n[p]++; ff[p]+=f} + END{for(k in n) printf " %-12s slices=%d fail=%d\n", k, n[k], ff[k]}' + +# Current/last slice and its latest live tally (\r-separated -> last field). +cur="$(grep -E ' slice [0-9]+ profile=' "$M" 2>/dev/null | tail -1 || true)" +if [ -n "$cur" ]; then + label="$(echo "$cur" | sed -E 's/.* profile=([^ ]+).*/\1/')" + slog="$(echo "$cur" | sed -E 's/.*-> //')" + tally="" + [ -f "$slog" ] && tally="$(tr '\r' '\n' < "$slog" | grep -E '^\[[0-9]+s/' | tail -1 || true)" + echo " current : $label ${tally:-(starting)}" +fi + +if [ "$nfail" -gt 0 ] || [ "$fail" -gt 0 ]; then + echo " !! failures recorded -- see $RUNDIR/fails/" +fi diff --git a/tests/stress/main.cc b/tests/stress/main.cc index ed9679e..d57ccb3 100644 --- a/tests/stress/main.cc +++ b/tests/stress/main.cc @@ -1,4 +1,5 @@ #include "common.h" +#include "../shm_cleanup.h" #include "kickmsg/version.h" #include @@ -56,6 +57,18 @@ int main(int argc, char** argv) std::thread::hardware_concurrency(), static_cast(contention_count())); + // Unlink the scenario segments if interrupted (Ctrl-C / kill), so a killed + // run leaves a clean /dev/shm. Keep in sync with the scenarios' shm_name. + for (char const* name : {"/kickmsg_treiber_stress", "/kickmsg_churn_test", + "/kickmsg_gc_test", "/kickmsg_fairness_test", + "/kickmsg_stress_test", "/kickmsg_pool_exhaustion", + "/kickmsg_live_repair", "/kickmsg_single_slot_ring", + "/kickmsg_sub_saturation"}) + { + kickmsg_test::register_cleanup_shm(name); + } + kickmsg_test::install_signal_cleanup(); + TestRunner runner; runner.run("treiber_stress", run_treiber_stress);