From 48f40b4edeaeccb4dc31404dd9d46473c22116c8 Mon Sep 17 00:00:00 2001
From: bdchatham <bdchatham@gmail.com>
Date: Mon, 22 Jun 2026 17:53:06 -0700
Subject: [PATCH 1/2] feat(test): TestBenchmark drives seiload (load suite)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Completes the load suite: after provisioning the chain + RPC fleet, render
the platform seiload profile (from the seiload-profiles ConfigMap) with the
fleet's EVM endpoints, apply seiload's own Job manifest as a decoupled unit,
wait for it to run the full load, and assert the chain stayed live under it.

- seiload runs from its own manifest (embedded template, parameterized) — its
  Job spec is not constructed in Go. Profile is read from the platform CM, not
  vendored.
- Pass/fail = Job completion + post-load chain liveness. A throughput/regression
  gate belongs in telemetry (a PromQL query over the run's metrics); the Job
  carries the metrics scrape label so that gate can be added later.
- Per-run profile CM + seiload Job carry sei.io/harness-run for the GC sweep;
  t.Cleanup deletes them on normal exit.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 test/integration/benchmark_test.go     |  44 +++---
 test/integration/harness_test.go       |  21 +++
 test/integration/seiload_job.yaml.tmpl |  66 +++++++++
 test/integration/seiload_test.go       | 191 +++++++++++++++++++++++++
 4 files changed, 302 insertions(+), 20 deletions(-)
 create mode 100644 test/integration/seiload_job.yaml.tmpl
 create mode 100644 test/integration/seiload_test.go
diff --git a/test/integration/benchmark_test.go b/test/integration/benchmark_test.go
index 5bc2c20..282c25e 100644
--- a/test/integration/benchmark_test.go
+++ b/test/integration/benchmark_test.go
@@ -10,15 +10,20 @@ import (
 	"time"
 )
 
-// TestBenchmark provisions a validator chain + RPC fleet for the load suite.
-// seiload drive + report upload are not yet wired (see TODO below).
+// TestBenchmark provisions a validator chain + RPC fleet, drives seiload against
+// the fleet for the configured duration, and asserts the chain stayed live under
+// load. The load suite.
 //
 // Inputs (env, mirroring k8s_nightly.yml):
 //
-//	SEI_CHAIN_ID   per-run chain id (e.g. bench-<run-id>)   [required]
-//	SEID_IMAGE     seid image under test                    [required]
-//	SEI_RUN_ID     unique run id (sei.io/harness-run)       [default: SEI_CHAIN_ID]
-//	SEI_NAMESPACE  shared nightly namespace                 [default: SDK default]
+//	SEI_CHAIN_ID     per-run chain id (e.g. bench-<run-id>)   [required]
+//	SEID_IMAGE       seid image under test                    [required]
+//	SEILOAD_IMAGE    sei-load benchmark image                 [required]
+//	SEI_RUN_ID       unique run id (sei.io/harness-run)       [default: SEI_CHAIN_ID]
+//	SEI_NAMESPACE    shared nightly namespace                 [default: SDK default]
+//	SEILOAD_PROFILE  profile name in seiload-profiles         [default: nightly_evm_transfer]
+//	DURATION_MINUTES seiload run length                       [default: 10]
+//	SEILOAD_COMMIT_ID sei-chain commit label for metrics      [default: ""]
 //
 // Deadlines: the CronJob MUST run this with `-test.timeout 0` (or safely above
 // the scenario timeout). A -test.timeout breach panics and bypasses t.Cleanup,
@@ -30,13 +35,17 @@ func TestBenchmark(t *testing.T) {
 
 	chainID := mustEnv(t, "SEI_CHAIN_ID")
 	s := spec{
-		chainID:    chainID,
-		runID:      envOr("SEI_RUN_ID", chainID),
-		namespace:  envOr("SEI_NAMESPACE", ""),
-		seidImage:  mustEnv(t, "SEID_IMAGE"),
-		validators: 4,
-		rpcNodes:   2, // seiload fans across both via the EVM endpoint list
-		timeout:    90 * time.Minute,
+		chainID:        chainID,
+		runID:          envOr("SEI_RUN_ID", chainID),
+		namespace:      envOr("SEI_NAMESPACE", ""),
+		seidImage:      mustEnv(t, "SEID_IMAGE"),
+		validators:     4,
+		rpcNodes:       2, // seiload fans across both via the EVM endpoint list
+		timeout:        90 * time.Minute,
+		seiloadImage:   mustEnv(t, "SEILOAD_IMAGE"),
+		seiloadProfile: envOr("SEILOAD_PROFILE", "nightly_evm_transfer"),
+		seiloadCommit:  envOr("SEILOAD_COMMIT_ID", ""),
+		durationMin:    envInt(t, "DURATION_MINUTES", 10),
 	}
 
 	ctx, cancel := context.WithTimeout(context.Background(), s.timeout)
@@ -49,20 +58,15 @@ func TestBenchmark(t *testing.T) {
 	defer stopSignals()
 
 	c := openClient(ctx, t)
+	cs := clientset(t)
 
 	ch, err := provision(ctx, t, c, s)
 	cleanupChain(t, ch)
 	if err != nil {
 		t.Fatalf("provision: %v", err)
 	}
-
 	t.Logf("provisioned %s: %d validators + %d RPC followers; EVM endpoints=%v",
 		s.chainID, s.validators, len(ch.rpcNodes), ch.evmEndpoints())
 
-	// TODO: drive seiload as a decoupled unit — apply its own manifest
-	// parameterized with ch.evmEndpoints(), stamped sei.io/harness-run; wait,
-	// read the report from S3, assert TPS/receipts. seiload's Job spec is not
-	// constructed here.
-	t.Skipf("provisioned %s (%d validators + %d followers); seiload drive + report not yet wired — tearing down",
-		s.chainID, s.validators, len(ch.rpcNodes))
+	runSeiload(ctx, t, cs, ch, s)
 }
diff --git a/test/integration/harness_test.go b/test/integration/harness_test.go
index c4460c4..176a601 100644
--- a/test/integration/harness_test.go
+++ b/test/integration/harness_test.go
@@ -20,6 +20,7 @@ import (
 	"fmt"
 	"net/http"
 	"os"
+	"strconv"
 	"testing"
 	"time"
 
@@ -46,6 +47,12 @@ type spec struct {
 	validators int           // genesis validator count (>= 1)
 	rpcNodes   int           // standalone RPC followers; named <chain>-rpc-0..N-1
 	timeout    time.Duration // overall scenario deadline (drives ctx, kept < CronJob activeDeadlineSeconds)
+
+	// seiload inputs (load suite)
+	seiloadImage   string // sei-load benchmark image
+	seiloadProfile string // profile name in the seiload-profiles ConfigMap
+	seiloadCommit  string // sei-chain commit label for the run's metrics
+	durationMin    int    // seiload run length, minutes
 }
 
 // chain is the live provisioned topology a suite runs load against and asserts
@@ -214,3 +221,17 @@ func mustEnv(t *testing.T, key string) string {
 	}
 	return v
 }
+
+// envInt reads an integer env var or a fallback; a non-integer value fails fast.
+func envInt(t *testing.T, key string, fallback int) int {
+	t.Helper()
+	v := os.Getenv(key)
+	if v == "" {
+		return fallback
+	}
+	n, err := strconv.Atoi(v)
+	if err != nil {
+		t.Fatalf("integration suite: env %s=%q is not an integer: %v", key, v, err)
+	}
+	return n
+}
diff --git a/test/integration/seiload_job.yaml.tmpl b/test/integration/seiload_job.yaml.tmpl
new file mode 100644
index 0000000..94a1ad3
--- /dev/null
+++ b/test/integration/seiload_job.yaml.tmpl
@@ -0,0 +1,66 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: seiload-{{.RunID}}
+  labels:
+    app.kubernetes.io/name: seiload
+    sei.io/harness-run: "{{.RunID}}"
+spec:
+  backoffLimit: 0
+  ttlSecondsAfterFinished: 86400
+  template:
+    metadata:
+      labels:
+        # podMonitor selects this for Prometheus scrape (metrics continuity).
+        app.kubernetes.io/name: seiload
+        sei.io/harness-run: "{{.RunID}}"
+    spec:
+      restartPolicy: Never
+      securityContext:
+        runAsNonRoot: true
+        runAsUser: 65532
+        runAsGroup: 65532
+        seccompProfile:
+          type: RuntimeDefault
+      containers:
+        - name: seiload
+          image: {{.Image}}
+          args:
+            - --config
+            - /etc/seiload/profile.json
+            - --duration={{.DurationMinutes}}m
+            - --post-summary-flush-delay=45s
+            - --track-receipts=true
+          ports:
+            - name: metrics
+              containerPort: 9090
+              protocol: TCP
+          securityContext:
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop: ["ALL"]
+            readOnlyRootFilesystem: true
+          env:
+            - name: SEILOAD_RUN_ID
+              value: "{{.RunID}}"
+            - name: SEILOAD_CHAIN_ID
+              value: "{{.ChainID}}"
+            - name: SEILOAD_COMMIT_ID
+              value: "{{.Commit}}"
+            - name: SEILOAD_WORKLOAD
+              value: nightly
+          volumeMounts:
+            - name: profile
+              mountPath: /etc/seiload
+              readOnly: true
+          resources:
+            requests:
+              cpu: "2"
+              memory: "4Gi"
+            limits:
+              cpu: "4"
+              memory: "8Gi"
+      volumes:
+        - name: profile
+          configMap:
+            name: {{.ProfileCM}}
diff --git a/test/integration/seiload_test.go b/test/integration/seiload_test.go
new file mode 100644
index 0000000..f6d155a
--- /dev/null
+++ b/test/integration/seiload_test.go
@@ -0,0 +1,191 @@
+//go:build integration
+
+package integration
+
+import (
+	"bytes"
+	"context"
+	_ "embed"
+	"net/http"
+	"strconv"
+	"strings"
+	"testing"
+	"text/template"
+	"time"
+
+	batchv1 "k8s.io/api/batch/v1"
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/yaml"
+
+	"github.com/sei-protocol/sei-k8s-controller/sdk/sei"
+)
+
+//go:embed seiload_job.yaml.tmpl
+var seiloadJobTmpl string
+
+// seiloadProfilesCM is the platform-owned ConfigMap holding the profile
+// templates (placeholders __SEI_CHAIN_ID__ / __RPC_ENDPOINTS__). The harness
+// reads it from the cluster rather than vendoring the profile, so the load
+// shape stays owned by platform.
+const seiloadProfilesCM = "seiload-profiles"
+
+// seiloadParams are the per-run values templated into the seiload Job manifest.
+type seiloadParams struct {
+	RunID           string
+	ChainID         string
+	Commit          string
+	Image           string
+	DurationMinutes int
+	ProfileCM       string
+}
+
+// clientset builds a client-go clientset from the ambient config — the harness
+// uses it for the Job/ConfigMap operations the SDK does not cover.
+func clientset(t *testing.T) *kubernetes.Clientset {
+	t.Helper()
+	cfg, err := ctrl.GetConfig()
+	if err != nil {
+		t.Fatalf("load kubeconfig: %v", err)
+	}
+	cs, err := kubernetes.NewForConfig(cfg)
+	if err != nil {
+		t.Fatalf("build clientset: %v", err)
+	}
+	return cs
+}
+
+// renderProfile reads the platform profile template from seiload-profiles and
+// substitutes the per-run chain id + the fleet's EVM endpoints (JSON-quoted).
+func renderProfile(
+	ctx context.Context, t *testing.T, cs *kubernetes.Clientset,
+	ns, profile, chainID string, endpoints []string,
+) string {
+	t.Helper()
+	cm, err := cs.CoreV1().ConfigMaps(ns).Get(ctx, seiloadProfilesCM, metav1.GetOptions{})
+	if err != nil {
+		t.Fatalf("get %s/%s: %v", ns, seiloadProfilesCM, err)
+	}
+	tmpl, ok := cm.Data[profile+".json"]
+	if !ok {
+		t.Fatalf("profile %q.json absent from %s", profile, seiloadProfilesCM)
+	}
+	quoted := make([]string, len(endpoints))
+	for i, e := range endpoints {
+		quoted[i] = strconv.Quote(e)
+	}
+	tmpl = strings.ReplaceAll(tmpl, "__SEI_CHAIN_ID__", chainID)
+	tmpl = strings.ReplaceAll(tmpl, "__RPC_ENDPOINTS__", strings.Join(quoted, ","))
+	return tmpl
+}
+
+// createProfileCM writes the rendered profile to a per-run ConfigMap stamped
+// with the run label so the GC sweep reaps it on an abnormal exit.
+func createProfileCM(ctx context.Context, t *testing.T, cs *kubernetes.Clientset, ns, name, runID, profileJSON string) {
+	t.Helper()
+	cm := &corev1.ConfigMap{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      name,
+			Namespace: ns,
+			Labels:    map[string]string{runLabelKey: runID},
+		},
+		Data: map[string]string{"profile.json": profileJSON},
+	}
+	if _, err := cs.CoreV1().ConfigMaps(ns).Create(ctx, cm, metav1.CreateOptions{}); err != nil {
+		t.Fatalf("create profile cm %q: %v", name, err)
+	}
+	t.Cleanup(func() {
+		ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
+		defer cancel()
+		_ = cs.CoreV1().ConfigMaps(ns).Delete(ctx, name, metav1.DeleteOptions{})
+	})
+}
+
+// renderJob templates the embedded seiload Job manifest with the per-run params.
+// The manifest owns seiload's shape; only per-run values are injected.
+func renderJob(t *testing.T, p seiloadParams) *batchv1.Job {
+	t.Helper()
+	var buf bytes.Buffer
+	if err := template.Must(template.New("job").Parse(seiloadJobTmpl)).Execute(&buf, p); err != nil {
+		t.Fatalf("render seiload job: %v", err)
+	}
+	var job batchv1.Job
+	if err := yaml.Unmarshal(buf.Bytes(), &job); err != nil {
+		t.Fatalf("unmarshal seiload job: %v", err)
+	}
+	return &job
+}
+
+// runSeiload drives seiload against the fleet as a decoupled unit: render the
+// platform profile, apply seiload's own Job manifest, wait for it to run the
+// full load, then assert the chain stayed live under it.
+//
+// The pass/fail signal is Job completion (seiload ran the load to the end
+// without erroring) plus post-load chain liveness. A throughput/regression gate
+// belongs in telemetry — a PromQL query over the run's metrics — not in this
+// harness; the Job carries the metrics scrape label so that gate can be added.
+func runSeiload(ctx context.Context, t *testing.T, cs *kubernetes.Clientset, ch *chain, s spec) {
+	t.Helper()
+	ns := envOr("SEI_NAMESPACE", ch.network.Namespace())
+
+	profileCM := "seiload-profile-" + s.runID
+	profileJSON := renderProfile(ctx, t, cs, ns, s.seiloadProfile, s.chainID, ch.evmEndpoints())
+	createProfileCM(ctx, t, cs, ns, profileCM, s.runID, profileJSON)
+
+	job := renderJob(t, seiloadParams{
+		RunID:           s.runID,
+		ChainID:         s.chainID,
+		Commit:          s.seiloadCommit,
+		Image:           s.seiloadImage,
+		DurationMinutes: s.durationMin,
+		ProfileCM:       profileCM,
+	})
+	job.Namespace = ns
+	if _, err := cs.BatchV1().Jobs(ns).Create(ctx, job, metav1.CreateOptions{}); err != nil {
+		t.Fatalf("create seiload job: %v", err)
+	}
+	t.Cleanup(func() {
+		ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
+		defer cancel()
+		bg := metav1.DeletePropagationBackground
+		_ = cs.BatchV1().Jobs(ns).Delete(ctx, job.Name, metav1.DeleteOptions{PropagationPolicy: &bg})
+	})
+
+	waitJob(ctx, t, cs, ns, job.Name)
+
+	// Chain survived the load: node-0 still caught up.
+	hc := &http.Client{Timeout: 10 * time.Second}
+	n0 := ch.rpcNodes[0]
+	if err := sei.WaitCaughtUp(ctx, hc, n0.TendermintRPC()); err != nil {
+		t.Errorf("post-load %s not caught up: %v", n0.Name(), err)
+	}
+}
+
+// waitJob blocks until the seiload Job reaches a terminal condition. A Failed
+// Job fails the suite; success returns. Bounded by ctx.
+func waitJob(ctx context.Context, t *testing.T, cs *kubernetes.Clientset, ns, name string) {
+	t.Helper()
+	tick := time.NewTicker(10 * time.Second)
+	defer tick.Stop()
+	for {
+		job, err := cs.BatchV1().Jobs(ns).Get(ctx, name, metav1.GetOptions{})
+		if err != nil {
+			t.Fatalf("get seiload job %q: %v", name, err)
+		}
+		for _, cond := range job.Status.Conditions {
+			if cond.Type == batchv1.JobComplete && cond.Status == corev1.ConditionTrue {
+				return
+			}
+			if cond.Type == batchv1.JobFailed && cond.Status == corev1.ConditionTrue {
+				t.Fatalf("seiload job %q failed: %s", name, cond.Message)
+			}
+		}
+		select {
+		case <-ctx.Done():
+			t.Fatalf("seiload job %q did not finish before deadline: %v", name, ctx.Err())
+		case <-tick.C:
+		}
+	}
+}

From 557897ee2319ffa2e3fdb9dea23ef54931f51b6d Mon Sep 17 00:00:00 2001
From: bdchatham <bdchatham@gmail.com>
Date: Mon, 22 Jun 2026 18:03:03 -0700
Subject: [PATCH 2/2] fix(test): address seiload-drive xreview (4 lenses)

- systems: capture the failed seiload pod log into the fatal (the failure-time
  signal a Job condition message can't give); add a self-terminating Job
  activeDeadlineSeconds independent of the harness ctx.
- sei-network: widen the post-load liveness check to every follower, not just
  node-0 (a half-dead fleet would otherwise ship green).
- k8s/dissenter + comment-register: soften the runLabelKey comment to stop
  claiming a label-GC sweep that isn't shipped yet (pending platform
  deliverable; DeletionPolicy cascade + t.Cleanup cover normal exit); use the
  chain's resolved namespace directly for the seiload Job (no env re-resolve);
  fix the stale namespace field comment.
- idiom: trim the runSeiload doc to present-state.

chainId 713714 confirmed correct (seid GetEVMChainID falls through to
DefaultChainID for bench-* chains). RBAC Role + podMonitor selector flip +
gc label sweep are platform prereqs, tracked separately.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 test/integration/harness_test.go       | 12 +++---
 test/integration/seiload_job.yaml.tmpl |  1 +
 test/integration/seiload_test.go       | 52 +++++++++++++++++++-------
 3 files changed, 46 insertions(+), 19 deletions(-)

diff --git a/test/integration/harness_test.go b/test/integration/harness_test.go
index 176a601..0b5df0a 100644
--- a/test/integration/harness_test.go
+++ b/test/integration/harness_test.go
@@ -31,10 +31,12 @@ import (
 	_ "github.com/sei-protocol/sei-k8s-controller/sdk/sei/provider/k8s"
 )
 
-// runLabelKey marks a run's resources for the nightly label-GC sweep — the only
-// reaper on abnormal exit (shared namespace), since t.Cleanup is skipped on
-// SIGKILL or a -test.timeout breach. provision stamps it on the network + every
-// node; a suite's directly-applied seiload Job and fault CRs must stamp it too.
+// runLabelKey marks a run's resources for the abnormal-exit reaper (t.Cleanup is
+// skipped on SIGKILL / a -test.timeout breach). provision stamps it on the
+// network + every node; a suite's directly-applied seiload Job + fault CRs stamp
+// it too. The matching nightly label-GC sweep is a pending platform deliverable;
+// until it ships, normal-exit teardown (t.Cleanup) + the SeiNetwork
+// DeletionPolicy cascade are the cleanup path.
 const runLabelKey = "sei.io/harness-run"
 
 // spec is the typed input shared by the suites — the local-Go-state replacement
@@ -42,7 +44,7 @@ const runLabelKey = "sei.io/harness-run"
 type spec struct {
 	chainID    string        // SeiNetwork name == genesis chain id; also the peer-selector value and per-run discriminator
 	runID      string        // unique per run; the sei.io/harness-run label value
-	namespace  string        // shared nightly namespace (D2); "" => SDK client default (SA namespace)
+	namespace  string        // shared nightly namespace; "" => the SDK client's resolved default
 	seidImage  string        // seid container image under test
 	validators int           // genesis validator count (>= 1)
 	rpcNodes   int           // standalone RPC followers; named <chain>-rpc-0..N-1
diff --git a/test/integration/seiload_job.yaml.tmpl b/test/integration/seiload_job.yaml.tmpl
index 94a1ad3..b1927bb 100644
--- a/test/integration/seiload_job.yaml.tmpl
+++ b/test/integration/seiload_job.yaml.tmpl
@@ -7,6 +7,7 @@ metadata:
     sei.io/harness-run: "{{.RunID}}"
 spec:
   backoffLimit: 0
+  activeDeadlineSeconds: {{.DeadlineSeconds}}
   ttlSecondsAfterFinished: 86400
   template:
     metadata:
diff --git a/test/integration/seiload_test.go b/test/integration/seiload_test.go
index f6d155a..b4270ce 100644
--- a/test/integration/seiload_test.go
+++ b/test/integration/seiload_test.go
@@ -6,6 +6,7 @@ import (
 	"bytes"
 	"context"
 	_ "embed"
+	"fmt"
 	"net/http"
 	"strconv"
 	"strings"
@@ -40,6 +41,7 @@ type seiloadParams struct {
 	Image           string
 	DurationMinutes int
 	ProfileCM       string
+	DeadlineSeconds int
 }
 
 // clientset builds a client-go clientset from the ambient config — the harness
@@ -118,17 +120,16 @@ func renderJob(t *testing.T, p seiloadParams) *batchv1.Job {
 	return &job
 }
 
-// runSeiload drives seiload against the fleet as a decoupled unit: render the
-// platform profile, apply seiload's own Job manifest, wait for it to run the
-// full load, then assert the chain stayed live under it.
-//
-// The pass/fail signal is Job completion (seiload ran the load to the end
-// without erroring) plus post-load chain liveness. A throughput/regression gate
-// belongs in telemetry — a PromQL query over the run's metrics — not in this
-// harness; the Job carries the metrics scrape label so that gate can be added.
+// runSeiload renders the platform profile, applies seiload's Job manifest, waits
+// for the Job to complete, and asserts every follower is still caught up.
+// Pass/fail is Job completion plus post-load liveness; throughput gating is a
+// PromQL query over the run's metrics (the Job carries a metrics scrape label,
+// pending a podMonitor that selects it).
 func runSeiload(ctx context.Context, t *testing.T, cs *kubernetes.Clientset, ch *chain, s spec) {
 	t.Helper()
-	ns := envOr("SEI_NAMESPACE", ch.network.Namespace())
+	// The seiload Job co-locates with the chain; the network's resolved
+	// namespace is authoritative (never re-resolve from env here).
+	ns := ch.network.Namespace()
 
 	profileCM := "seiload-profile-" + s.runID
 	profileJSON := renderProfile(ctx, t, cs, ns, s.seiloadProfile, s.chainID, ch.evmEndpoints())
@@ -141,6 +142,9 @@ func runSeiload(ctx context.Context, t *testing.T, cs *kubernetes.Clientset, ch
 		Image:           s.seiloadImage,
 		DurationMinutes: s.durationMin,
 		ProfileCM:       profileCM,
+		// Self-terminating cap independent of the harness ctx: the load plus
+		// generous slack for image pull + the post-summary flush.
+		DeadlineSeconds: (s.durationMin + 15) * 60,
 	})
 	job.Namespace = ns
 	if _, err := cs.BatchV1().Jobs(ns).Create(ctx, job, metav1.CreateOptions{}); err != nil {
@@ -155,11 +159,13 @@ func runSeiload(ctx context.Context, t *testing.T, cs *kubernetes.Clientset, ch
 
 	waitJob(ctx, t, cs, ns, job.Name)
 
-	// Chain survived the load: node-0 still caught up.
+	// Chain survived the load: every follower still caught up (a follower can't
+	// catch up to a halted chain, so this transitively covers validator quorum).
 	hc := &http.Client{Timeout: 10 * time.Second}
-	n0 := ch.rpcNodes[0]
-	if err := sei.WaitCaughtUp(ctx, hc, n0.TendermintRPC()); err != nil {
-		t.Errorf("post-load %s not caught up: %v", n0.Name(), err)
+	for _, n := range ch.rpcNodes {
+		if err := sei.WaitCaughtUp(ctx, hc, n.TendermintRPC()); err != nil {
+			t.Errorf("post-load %s not caught up: %v", n.Name(), err)
+		}
 	}
 }
 
@@ -179,7 +185,8 @@ func waitJob(ctx context.Context, t *testing.T, cs *kubernetes.Clientset, ns, na
 				return
 			}
 			if cond.Type == batchv1.JobFailed && cond.Status == corev1.ConditionTrue {
-				t.Fatalf("seiload job %q failed: %s", name, cond.Message)
+				t.Fatalf("seiload job %q failed: %s\n--- seiload pod log (tail) ---\n%s",
+					name, cond.Message, podLogTail(ctx, cs, ns, name))
 			}
 		}
 		select {
@@ -189,3 +196,20 @@ func waitJob(ctx context.Context, t *testing.T, cs *kubernetes.Clientset, ns, na
 		}
 	}
 }
+
+// podLogTail returns the tail of the seiload pod's log for a Job, best-effort —
+// the failure-time signal a Job condition message alone cannot give.
+func podLogTail(ctx context.Context, cs *kubernetes.Clientset, ns, jobName string) string {
+	pods, err := cs.CoreV1().Pods(ns).List(ctx, metav1.ListOptions{
+		LabelSelector: "batch.kubernetes.io/job-name=" + jobName,
+	})
+	if err != nil || len(pods.Items) == 0 {
+		return fmt.Sprintf("(no pod for job %q: %v)", jobName, err)
+	}
+	lines := int64(50)
+	raw, err := cs.CoreV1().Pods(ns).GetLogs(pods.Items[0].Name, &corev1.PodLogOptions{TailLines: &lines}).DoRaw(ctx)
+	if err != nil {
+		return fmt.Sprintf("(read logs failed: %v)", err)
+	}
+	return string(raw)
+}