diff --git a/.gitignore b/.gitignore
index 72a79d0..ac2c77c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,9 @@
 # Build output — populated by `go build ./cmd/tracebloc` at the repo
-# root. Release artifacts go through dist/ (see Phase 5 / release
-# workflow); local hacking puts a binary here.
+# root, or by `go build -o ./bin/tracebloc ./cmd/tracebloc` for the
+# convention some folks prefer. Release artifacts go through dist/
+# (see Phase 5 / release workflow); local hacking puts a binary here.
 /tracebloc
+/bin/
 
 # Cross-compiled binaries staged by the release workflow.
 /dist/
diff --git a/cmd/tracebloc/main.go b/cmd/tracebloc/main.go
index 8066305..978cde4 100644
--- a/cmd/tracebloc/main.go
+++ b/cmd/tracebloc/main.go
@@ -18,8 +18,11 @@
 package main
 
 import (
+	"context"
 	"fmt"
 	"os"
+	"os/signal"
+	"syscall"
 
 	"github.com/tracebloc/cli/internal/cli"
 )
@@ -35,11 +38,32 @@ var (
 )
 
 func main() {
+	// Wire SIGINT / SIGTERM into the cobra root command's context.
+	// Long-running operations (e.g. push.Stage in `dataset push`)
+	// propagate ctx down through every k8s API call, so cancelling
+	// here triggers ctx.Done() → in-flight HTTP cancels → defers
+	// fire in normal stack unwind → orphan Pod gets cleaned up.
+	//
+	// Without this wire, Ctrl-C goes straight through Go's runtime
+	// signal handler and exits the process WITHOUT running defers
+	// — which silently broke the SIGINT-safe cleanup contract in
+	// push.Stage's docstring. Bugbot flagged this on PR-b.
+	//
+	// signal.NotifyContext (Go 1.16+) is the stdlib pattern for
+	// this. The stop func unregisters the handler so a *second*
+	// SIGINT does the normal hard-kill — important for the case
+	// where the cleanup itself hangs (e.g. API server unreachable
+	// even past the 30s cleanup deadline). The customer's second
+	// Ctrl-C should always work.
+	ctx, stop := signal.NotifyContext(context.Background(),
+		syscall.SIGINT, syscall.SIGTERM)
+	defer stop()
+
 	err := cli.NewRootCmd(cli.BuildInfo{
 		Version:   version,
 		GitSHA:    gitSHA,
 		BuildDate: buildDate,
-	}).Execute()
+	}).ExecuteContext(ctx)
 	if err == nil {
 		return
 	}
diff --git a/go.mod b/go.mod
index c792eb3..56b4e7b 100644
--- a/go.mod
+++ b/go.mod
@@ -10,7 +10,9 @@ go 1.26.0
 
 require (
 	github.com/santhosh-tekuri/jsonschema/v6 v6.0.2
+	github.com/schollz/progressbar/v3 v3.19.0
 	github.com/spf13/cobra v1.8.1
+	golang.org/x/term v0.39.0
 	golang.org/x/text v0.33.0
 	gopkg.in/yaml.v3 v3.0.1
 	k8s.io/api v0.36.1
@@ -28,13 +30,17 @@ require (
 	github.com/go-openapi/swag v0.23.0 // indirect
 	github.com/google/gnostic-models v0.7.0 // indirect
 	github.com/google/uuid v1.6.0 // indirect
+	github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 // indirect
 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
 	github.com/josharian/intern v1.0.0 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
 	github.com/mailru/easyjson v0.7.7 // indirect
+	github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
+	github.com/moby/spdystream v0.5.1 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
+	github.com/rivo/uniseg v0.4.7 // indirect
 	github.com/spf13/pflag v1.0.9 // indirect
 	github.com/x448/float16 v0.8.4 // indirect
 	go.yaml.in/yaml/v2 v2.4.3 // indirect
@@ -42,13 +48,13 @@ require (
 	golang.org/x/net v0.49.0 // indirect
 	golang.org/x/oauth2 v0.34.0 // indirect
 	golang.org/x/sys v0.40.0 // indirect
-	golang.org/x/term v0.39.0 // indirect
 	golang.org/x/time v0.14.0 // indirect
 	google.golang.org/protobuf v1.36.12-0.20260120151049-f2248ac996af // indirect
 	gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
 	k8s.io/klog/v2 v2.140.0 // indirect
 	k8s.io/kube-openapi v0.0.0-20260317180543-43fb72c5454a // indirect
+	k8s.io/streaming v0.36.1 // indirect
 	k8s.io/utils v0.0.0-20260210185600-b8788abfbbc2 // indirect
 	sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect
 	sigs.k8s.io/randfill v1.0.0 // indirect
diff --git a/go.sum b/go.sum
index 9a05a37..a49a7f6 100644
--- a/go.sum
+++ b/go.sum
@@ -1,3 +1,7 @@
+github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
+github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
+github.com/chengxilo/virtualterm v1.0.4 h1:Z6IpERbRVlfB8WkOmtbHiDbBANU7cimRIof7mk9/PwM=
+github.com/chengxilo/virtualterm v1.0.4/go.mod h1:DyxxBZz/x1iqJjFxTFcr6/x+jSpqN0iwWCOK1q10rlY=
 github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -27,6 +31,8 @@ github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 h1:JeSE6pjso5THxAzdVpqr6/geYxZytqFMBCOtn/ujyeo=
+github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674/go.mod h1:r4w70xmWCQKmi1ONH4KIaBptdivuRPyosB9RmPlGEwA=
 github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
 github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
@@ -42,6 +48,12 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
 github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
 github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
+github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
+github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
+github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
+github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
+github.com/moby/spdystream v0.5.1 h1:9sNYeYZUcci9R6/w7KDaFWEWeV4LStVG78Mpyq/Zm/Y=
+github.com/moby/spdystream v0.5.1/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI=
 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
@@ -53,11 +65,15 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8m
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
 github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
+github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
 github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
 github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 h1:KRzFb2m7YtdldCEkzs6KqmJw4nqEVZGK7IN2kJkjTuQ=
 github.com/santhosh-tekuri/jsonschema/v6 v6.0.2/go.mod h1:JXeL+ps8p7/KNMjDQk3TCwPpBy0wYklyWTfbkIzdIFU=
+github.com/schollz/progressbar/v3 v3.19.0 h1:Ea18xuIRQXLAUidVDox3AbwfUhD0/1IvohyTutOIFoc=
+github.com/schollz/progressbar/v3 v3.19.0/go.mod h1:IsO3lpbaGuzh8zIMzgY3+J8l4C8GjO0Y9S69eFvNsec=
 github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
 github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
 github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
@@ -114,6 +130,8 @@ k8s.io/klog/v2 v2.140.0 h1:Tf+J3AH7xnUzZyVVXhTgGhEKnFqye14aadWv7bzXdzc=
 k8s.io/klog/v2 v2.140.0/go.mod h1:o+/RWfJ6PwpnFn7OyAG3QnO47BFsymfEfrz6XyYSSp0=
 k8s.io/kube-openapi v0.0.0-20260317180543-43fb72c5454a h1:xCeOEAOoGYl2jnJoHkC3hkbPJgdATINPMAxaynU2Ovg=
 k8s.io/kube-openapi v0.0.0-20260317180543-43fb72c5454a/go.mod h1:uGBT7iTA6c6MvqUvSXIaYZo9ukscABYi2btjhvgKGZ0=
+k8s.io/streaming v0.36.1 h1:L+K68n4Gg940BGNNYtUBvL1WTLL0YnKT3s+P1MNAmR4=
+k8s.io/streaming v0.36.1/go.mod h1:z6fV3D+NVkoeqRMtWwlUZK6U17SY/LqNzOxWL6GyR/s=
 k8s.io/utils v0.0.0-20260210185600-b8788abfbbc2 h1:AZYQSJemyQB5eRxqcPky+/7EdBj0xi3g0ZcxxJ7vbWU=
 k8s.io/utils v0.0.0-20260210185600-b8788abfbbc2/go.mod h1:xDxuJ0whA3d0I4mf/C4ppKHxXynQ+fxnkmQH0vTHnuk=
 sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg=
diff --git a/internal/cli/dataset.go b/internal/cli/dataset.go
index d740e0f..6034e03 100644
--- a/internal/cli/dataset.go
+++ b/internal/cli/dataset.go
@@ -15,11 +15,11 @@ import (
 )
 
 // newDatasetCmd wires the `tracebloc dataset` subtree. The dominant
-// verb is `push`, introduced in Phase 3 (tracebloc/client#151) and
-// landing across two PRs: PR-a (this one) implements the
-// no-op-safe pre-flight; PR-b adds the actual file streaming via
-// an ephemeral Pod + tar-over-exec. Future verbs (`dataset list`,
-// `dataset rm`) hang off this parent in v0.2.
+// verb is `push`, completed in Phase 3 (tracebloc/client#151) across
+// PR-a (pre-flight: spec synth, validation, layout walk, cluster
+// discovery) and PR-b (this one: ephemeral stage Pod + tar-over-
+// exec stream + progress bar + SIGINT-safe cleanup). Future verbs
+// (`dataset list`, `dataset rm`) hang off this parent in v0.2.
 func newDatasetCmd() *cobra.Command {
 	cmd := &cobra.Command{
 		Use:   "dataset",
@@ -27,9 +27,13 @@ func newDatasetCmd() *cobra.Command {
 		Long: `Commands for staging and managing datasets on the cluster's
 shared PVC.
 
-Today: ` + "`dataset push`" + ` stages a local directory and (in PR-b)
-submits an ingestion run. ` + "`tracebloc cluster info`" + ` is the
-pre-flight you'd typically run before the first push.`,
+Today: ` + "`dataset push`" + ` stages a local directory to the cluster's
+shared PVC. Submission to jobs-manager (so the ingestor Job actually
+runs) lands in Phase 4 (` + "`tracebloc/client#152`" + `); for now the staged
+files are picked up by the existing helm ` + "`tracebloc/ingestor`" + ` flow.
+
+` + "`tracebloc cluster info`" + ` is the pre-flight you'd typically run
+before the first push.`,
 	}
 	cmd.AddCommand(newDatasetPushCmd())
 	return cmd
@@ -37,19 +41,21 @@ pre-flight you'd typically run before the first push.`,
 
 // newDatasetPushCmd implements `tracebloc dataset push <local-path>`.
 //
-// PR-a scope (what this implements today):
+// Phase 3 scope (now complete across PR-a + PR-b):
 //
 //   - Synthesize the ingest spec from flags (`internal/push.SpecArgs.Build`)
 //   - Validate it against the embedded ingest.v1 schema
 //   - Walk the local directory + enforce v0.1 size caps
 //   - Discover cluster, parent release, and shared PVC
-//   - Print a single-screen "ready to stage" summary
-//   - --dry-run stops here (and so does this PR — actual staging
-//     errors out with "coming in PR-b" until #151 PR-b merges)
+//   - Print a single-screen pre-flight summary
+//   - Either --dry-run stop, OR create an ephemeral stage Pod
+//     (alpine 3.20 pinned by digest, PSA-restricted security
+//     context), tar local files into it via an SPDY exec stream
+//     with a progress bar, then defer-delete the Pod
 //
-// PR-b will add the ephemeral stage Pod, tar-over-exec stream,
-// progress bar, and SIGINT-safe cleanup — slotting into the
-// "TODO: PR-b" branch below without touching anything above it.
+// Phase 4 (`tracebloc/client#152`) hooks the submit-to-jobs-manager
+// step into the bottom of this command, replacing the "manually
+// kick off helm ingestor" workaround in the success message.
 func newDatasetPushCmd() *cobra.Command {
 	var (
 		// Kubeconfig flags — same conventions as `cluster info`.
@@ -71,10 +77,18 @@ func newDatasetPushCmd() *cobra.Command {
 		// Operations flags.
 		dryRun bool
 
-		// Ingestor SA name override (only matters once PR-b mints
-		// a token to talk to the future stage-pod-creation hook).
-		// Plumbed today so PR-b doesn't have to touch flag wiring.
+		// Ingestor SA name override. Used as the ServiceAccountName
+		// of the ephemeral stage Pod, so the Pod inherits whatever
+		// imagePullSecrets + PSA exemptions the admin already
+		// configured for that SA.
 		ingestorSAName string
+
+		// Stage Pod image override. Defaults to the digest-pinned
+		// alpine that ships with the CLI; air-gapped customers
+		// override this to an image their registry mirror serves.
+		// Pin by digest in your override too — tag-only references
+		// drift silently and break "all my pushes worked yesterday."
+		stagePodImage string
 	)
 
 	cmd := &cobra.Command{
@@ -99,12 +113,13 @@ datasets need the v0.2 cloud-source story (S3/GCS/HTTPS sources) —
 see tracebloc/client#147 non-goals.
 
 Exit codes:
-  0   staged + (in PR-b) submitted successfully
-  2   schema validation failed (synthesized spec rejected)
+  0   files staged successfully (Phase 4 will add: submitted + completed)
+  2   schema validation failed (synthesized spec rejected) or
+      v0.1-unsupported category passed
   3   local-layout or kubeconfig error
-  4   cluster reachable but parent release missing
-  6   pre-flight succeeded but the actual stage step isn't
-      implemented yet (PR-b for #151 will deliver it)`,
+  4   cluster reachable but parent release / shared PVC missing
+  7   pre-flight succeeded but staging the files failed
+      (Pod creation, image pull, exec stream, or remote tar error)`,
 		Args: cobra.ExactArgs(1),
 		RunE: func(cmd *cobra.Command, args []string) error {
 			return runDatasetPush(cmd.Context(), cmd.OutOrStdout(), cmd.ErrOrStderr(),
@@ -116,6 +131,7 @@ Exit codes:
 					Spec:           push.SpecArgs{Table: table, Category: category, Intent: intent, LabelColumn: labelColumn},
 					DryRun:         dryRun,
 					IngestorSAName: ingestorSAName,
+					StagePodImage:  stagePodImage,
 				})
 		},
 	}
@@ -146,6 +162,9 @@ Exit codes:
 	cmd.Flags().StringVar(&ingestorSAName, "ingestor-sa", "",
 		"override the ingestor ServiceAccount name (default: \"ingestor\"); "+
 			"set this if you customized ingestionAuthz.serviceAccountName in the parent client chart")
+	cmd.Flags().StringVar(&stagePodImage, "stage-pod-image", "",
+		"override the ephemeral stage Pod's image (default: digest-pinned alpine 3.20 baked into the CLI). "+
+			"Pin by digest in your override too — tag-only refs drift silently.")
 
 	return cmd
 }
@@ -162,12 +181,13 @@ type runDatasetPushArgs struct {
 	Spec           push.SpecArgs
 	DryRun         bool
 	IngestorSAName string
+	StagePodImage  string
 }
 
-// runDatasetPush is the PR-a slim implementation. It performs every
-// pre-flight check and prints a summary; the actual file staging
-// is gated behind a clear "not yet implemented" error so PR-a
-// merging doesn't silently advertise a feature it can't deliver.
+// runDatasetPush is the full Phase 3 implementation: pre-flight
+// checks, then either --dry-run stop or stage Pod + tar stream +
+// cleanup. Phase 4 (#152) will hook submit-to-jobs-manager after
+// the staging step.
 //
 // Step order is "fail fast, fail local" — every step that doesn't
 // need the cluster runs before any that does, so a customer with
@@ -177,7 +197,7 @@ func runDatasetPush(ctx context.Context, out, errOut io.Writer, a runDatasetPush
 	// 1. Validate the table name BEFORE anything else. It's both
 	//    the MySQL identifier and the /data/shared/<table>/ PVC
 	//    subdirectory — an unsanitized traversal name (../../etc)
-	//    would escape that subtree once PR-b's stage Pod writes to
+	//    would escape that subtree once the stage Pod writes to
 	//    it. The embedded schema only checks minLength on `table`,
 	//    so this CLI-side guard is the real fix. SpecArgs.Build()
 	//    below calls StagedPrefix, which panics on an unsafe name —
@@ -186,7 +206,23 @@ func runDatasetPush(ctx context.Context, out, errOut io.Writer, a runDatasetPush
 		return &exitError{code: 2, err: err}
 	}
 
-	// 2. Synthesize the spec from flags + validate against schema.
+	// 2. v0.1 category gate. Runs BEFORE schema validation because
+	//    schema-valid-but-unsupported categories (e.g.
+	//    tabular_classification) would otherwise fail with the
+	//    schema's "missing property 'schema'" message — confusing
+	//    for the customer who has no way to set --schema in v0.1.
+	//    Nonsense categories (typos) also hit this gate; the
+	//    "only image_classification in v0.1" message is more
+	//    actionable than the schema's 11-option enum list anyway.
+	//    Bugbot's review-on-self caught the missing gate on PR-a.
+	if a.Spec.Category != "" && a.Spec.Category != "image_classification" {
+		return &exitError{code: 2, err: fmt.Errorf(
+			"category %q is not supported in v0.1 (only image_classification). "+
+				"Other categories are one-PR additions in v0.2 — see "+
+				"tracebloc/client#147 non-goals.", a.Spec.Category)}
+	}
+
+	// 3. Synthesize the spec from flags + validate against schema.
 	//    Catches "bad category", "missing intent" etc. BEFORE we
 	//    touch the filesystem or the cluster. The error formatter
 	//    is the same one ingest validate uses, so a customer who
@@ -223,7 +259,7 @@ func runDatasetPush(ctx context.Context, out, errOut io.Writer, a runDatasetPush
 		return &exitError{code: 2, err: errors.New("synthesized spec failed schema validation; check the flag values above")}
 	}
 
-	// 3. Walk the local directory. Enforces layout + size caps;
+	// 4. Walk the local directory. Enforces layout + size caps;
 	//    customer sees a clear pointer to expected layout if they
 	//    pass the wrong directory.
 	layout, err := push.Discover(a.LocalPath)
@@ -231,7 +267,7 @@ func runDatasetPush(ctx context.Context, out, errOut io.Writer, a runDatasetPush
 		return &exitError{code: 3, err: err}
 	}
 
-	// 4. Cluster discovery — same kubeconfig path as `cluster info`.
+	// 5. Cluster discovery — same kubeconfig path as `cluster info`.
 	//    Errors mirror that command's exit-code contract (3 for
 	//    kubeconfig, 4 for missing release) so behaviour is
 	//    consistent across pre-flight commands.
@@ -255,36 +291,74 @@ func runDatasetPush(ctx context.Context, out, errOut io.Writer, a runDatasetPush
 		release.IngestorSAName = a.IngestorSAName
 	}
 
-	// 5. PVC discovery. New in this PR — confirms the chart's
-	//    shared-data PVC is Bound before we waste time provisioning
-	//    a Pod that can't mount it.
+	// 6. PVC discovery — confirms the chart's shared-data PVC is
+	//    Bound before we waste time provisioning a Pod that can't
+	//    mount it.
 	pvc, err := cluster.DiscoverSharedPVC(ctx, cs, resolved.Namespace)
 	if err != nil {
 		return &exitError{code: 4, err: err}
 	}
 
-	// 6. Print the pre-flight summary. The output is the same in
-	//    dry-run and (eventually) live mode — only the "what
-	//    happens next" line differs. Customers iterating on a
-	//    bad layout see this every attempt, so it's worth keeping
-	//    skimmable: one fact per line, aligned by column.
+	// 7. Print the pre-flight summary. The output is the same in
+	//    dry-run and live mode — only the "what happens next" line
+	//    differs. Customers iterating on a bad layout see this
+	//    every attempt, so it's worth keeping skimmable: one fact
+	//    per line, aligned by column.
 	printPushPreflight(out, layout, release, pvc, spec, a.DryRun)
 
-	// 7. Dry-run stop. Acknowledged success.
+	// 8. Dry-run stop. Acknowledged success.
 	if a.DryRun {
 		_, _ = fmt.Fprintln(out, "Dry-run complete — no cluster resources were created.")
 		return nil
 	}
 
-	// 8. The actual staging branch lands in PR-b. Failing here
-	//    rather than silently returning success means a customer
-	//    who pulled PR-a's binary and ran without --dry-run gets
-	//    a clear "wait for PR-b" signal instead of "0 files
-	//    transferred" confusion in Phase 4.
-	return &exitError{code: 6, err: errors.New(
-		"pre-flight succeeded but the actual file staging step isn't " +
-			"implemented yet — wait for tracebloc/client#151 PR-b. " +
-			"Re-run with --dry-run to validate without this error.")}
+	// 9. Stage the files: create ephemeral Pod → wait Ready → tar
+	//    stream → cleanup. The deferred cleanup inside push.Stage
+	//    runs on success and failure (including ctx cancellation
+	//    from a SIGINT handler), so no orphan Pod is left behind.
+	//
+	//    Exit code 7 ("staging failed") is distinct from the
+	//    pre-flight codes so customers can branch on whether the
+	//    failure was their environment vs the actual data transfer.
+	progress := push.NewProgress(out, layout.TotalBytes,
+		fmt.Sprintf("Staging %s", a.Spec.Table))
+	// Defer Finish so a failure path that returns BEFORE
+	// StreamLayout (e.g. CreateStagePod fails on PSA rejection,
+	// WaitForStagePodReady times out) still clears the TTY
+	// progress UI. push.StreamLayout's own deferred Finish would
+	// otherwise be unreachable. Calling Finish twice on the same
+	// schollz bar is a no-op, so the double-call on the happy
+	// path is safe. Bugbot flagged on PR-b round 5.
+	defer progress.Finish()
+	stageErr := push.Stage(ctx, push.StageOptions{
+		Client: cs,
+		Executor: &push.SPDYExecutor{
+			Config: resolved.RestConfig,
+			Client: cs,
+		},
+		Namespace:      resolved.Namespace,
+		IngestorSAName: release.IngestorSAName,
+		PVCClaimName:   pvc.ClaimName,
+		PVCMountPath:   pvc.MountPath,
+		Layout:         layout,
+		Table:          a.Spec.Table,
+		StagePodImage:  a.StagePodImage,
+		Progress:       progress,
+		Out:            out,
+	})
+	if stageErr != nil {
+		return &exitError{code: 7, err: stageErr}
+	}
+
+	// 10. Phase 4 (submit to jobs-manager + watch + summary) hooks
+	//     in here — tracebloc/client#152. For now PR-b leaves the
+	//     dataset staged on the PVC and exits 0, which the customer
+	//     can then chase manually via `helm install ingestor ...` if
+	//     they need the ingestion to actually run today.
+	_, _ = fmt.Fprintln(out)
+	_, _ = fmt.Fprintln(out, "Dataset staged. Submission to jobs-manager arrives in Phase 4 (#152);")
+	_, _ = fmt.Fprintln(out, "in the meantime, the existing helm ingestor flow can pick up the staged files.")
+	return nil
 }
 
 // printPushPreflight is the customer-facing summary. Mirrors
@@ -333,7 +407,7 @@ func printPushPreflight(
 	_, _ = fmt.Fprintln(out)
 
 	if !dryRun {
-		_, _ = fmt.Fprintf(out, "Next: stage %d files (%s) → %s (coming in PR-b for #151)\n",
+		_, _ = fmt.Fprintf(out, "Next: stage %d files (%s) → %s\n",
 			1+len(layout.Images), push.HumanBytes(layout.TotalBytes),
 			push.StagedPrefix(spec["table"].(string)))
 		_, _ = fmt.Fprintln(out)
diff --git a/internal/cli/dataset_test.go b/internal/cli/dataset_test.go
index 5a4ef69..96d7ea0 100644
--- a/internal/cli/dataset_test.go
+++ b/internal/cli/dataset_test.go
@@ -63,39 +63,33 @@ func execDatasetPush(t *testing.T, args []string) (exitCode int, stdout, stderr
 	return ExitCodeFromError(err), so.String(), se.String()
 }
 
-// TestDatasetPush_BadCategory_ExitsTwo: the synthesized spec gets
-// fed through the embedded schema before any cluster work; an
-// invalid category surfaces with exit 2 (the CLI-wide "schema
-// violation" code).
-func TestDatasetPush_BadCategory_ExitsTwo(t *testing.T) {
+// TestDatasetPush_UnsupportedCategory_ExitsTwo: v0.1 only supports
+// image_classification end-to-end (epic #147 non-goals); the
+// CLI-side gate runs before schema validation so a customer who
+// passes --category=tabular_classification gets the actionable
+// "v0.1 supports only image_classification" message instead of
+// the schema's confusing "missing property 'schema'" (which the
+// customer has no way to supply in v0.1). Bugbot review-on-self
+// caught the missing gate on PR-a; landing it here.
+func TestDatasetPush_UnsupportedCategory_ExitsTwo(t *testing.T) {
 	root := imgcLayout(t)
-	code, _, stderr := execDatasetPush(t, []string{
-		root,
-		"--table=t1",
-		"--category=definitely-not-a-category",
-		"--intent=train",
-		"--label-column=label",
-	})
-	if code != 2 {
-		t.Fatalf("expected exit 2 for schema failure, got %d", code)
-	}
-	// The same FormatErrors output ingest validate uses, routed
-	// to stderr so downstream pipes of stdout aren't polluted.
-	// Checking for "category" + "image_classification" surfacing
-	// means we're getting the JSON-pointer-anchored diagnostic +
-	// the enum-list expansion, not a generic message.
-	// Check three things: (1) the JSON-pointer-style "category"
-	// anchor surfaces, (2) the enum-list expansion happened (so
-	// "image_classification" appears as a valid option), (3) our
-	// "synthesized spec" framing is on the right output stream.
-	// The wording "synthesized spec" is intentionally distinct
-	// from ingest validate's "schema validation failed" — it tells
-	// the customer the CLI synthesized the YAML; they didn't
-	// author it.
-	for _, want := range []string{"category", "image_classification", "synthesized spec failed schema validation"} {
-		if !strings.Contains(stderr, want) {
-			t.Errorf("expected stderr to mention %q, got:\n%s", want, stderr)
-		}
+	for _, badCategory := range []string{
+		"tabular_classification",    // schema-valid but v0.1-unsupported
+		"object_detection",          // ditto
+		"definitely-not-a-category", // nonsense; gate catches this too
+	} {
+		t.Run(badCategory, func(t *testing.T) {
+			code, _, _ := execDatasetPush(t, []string{
+				root,
+				"--table=t1",
+				"--category=" + badCategory,
+				"--intent=train",
+				"--label-column=label",
+			})
+			if code != 2 {
+				t.Fatalf("expected exit 2 for unsupported category %q, got %d", badCategory, code)
+			}
+		})
 	}
 }
 
diff --git a/internal/push/orphan.go b/internal/push/orphan.go
new file mode 100644
index 0000000..65ab4c0
--- /dev/null
+++ b/internal/push/orphan.go
@@ -0,0 +1,196 @@
+package push
+
+import (
+	"context"
+	"fmt"
+	"strings"
+	"time"
+
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+)
+
+// OrphanGracePeriod is how long a non-Running stage Pod has to live
+// before we flag it as an orphan. Running Pods are NEVER flagged
+// regardless of age (see FindOrphanStagePods's Phase=Running skip,
+// Bugbot r7) — Pods past pod.go's StagePodActiveDeadline of 30 min
+// are still legitimate "in-progress active push" candidates, and
+// any per-age cap would produce false positives for near-cap pushes.
+//
+// 5 minutes targets the genuinely-stuck shapes:
+//
+//   - Phase=Pending past 5 min: image pull failure that didn't
+//     resolve, scheduling stuck on missing nodes, PSA rejection
+//     that took kubelet a while to surface
+//   - Phase=Failed past 5 min: container crashed at startup or
+//     during stream, CLI didn't get a chance to clean up
+//   - Phase=Unknown past 5 min: node went away (network partition,
+//     kubelet crash) and the Pod metadata is stranded
+//
+// All three shapes are genuinely orphan — no still-running push
+// will ever progress past Pending/Failed/Unknown into Ready, so
+// 5 min is comfortable for "stuck enough to surface as a warning."
+const OrphanGracePeriod = 5 * time.Minute
+
+// Orphan describes a stage Pod found by the orphan scan. Carries
+// enough metadata for the warning surface to render an actionable
+// "delete with: kubectl delete pod X -n Y" hint.
+type Orphan struct {
+	// Name is the metadata.name.
+	Name string
+
+	// Namespace is the resolved namespace (not the chart's
+	// release-name, the actual API namespace).
+	Namespace string
+
+	// Table is the destination table the orphan was staging for —
+	// read from the StagePodTableLabel set in BuildStagePodSpec.
+	// May be empty for very old orphans (pre-label-convention),
+	// but every Pod the CLI ever created carries this label.
+	Table string
+
+	// Age is how long the Pod has existed (now - creationTimestamp).
+	// Surfaces in the warning so customers can spot stale Pods at
+	// a glance ("3 hours old" → almost certainly safe to delete;
+	// "6 minutes old" → maybe wait, might be from a slow parallel
+	// push).
+	Age time.Duration
+}
+
+// FindOrphanStagePods lists every stage Pod the CLI has ever
+// created in the namespace and returns the ones past OrphanGrace
+// Period. Pods younger than that are silently filtered — they
+// might be the CURRENT invocation's own Pod (the orphan scan runs
+// before CreateStagePod, but in a tight enough loop the previous
+// invocation's just-created Pod could plausibly still be < grace).
+//
+// Returns nil + nil if there are no orphans. Doesn't return an
+// error for the API call failing — orphan scanning is best-effort
+// (an RBAC denial on `list pods` shouldn't block the push) — the
+// caller logs the scan failure but proceeds.
+//
+// Hence the signature returns ([]Orphan, error): the slice is
+// empty on success-with-no-orphans, the error is non-nil only on
+// API failures the caller might want to surface for debugging.
+func FindOrphanStagePods(ctx context.Context, cs kubernetes.Interface, namespace string) ([]Orphan, error) {
+	selector := fmt.Sprintf("%s=%s,%s=%s",
+		StagePodManagedByLabel, StagePodManagedByValue,
+		StagePodComponentLabel, StagePodComponentValue)
+
+	pods, err := cs.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
+		LabelSelector: selector,
+	})
+	if err != nil {
+		// Don't wrap — the caller (cli.runDatasetPush) will either
+		// log this for diagnostic value and proceed, or ignore it
+		// entirely. Either way, the orphan scan isn't on the
+		// critical path.
+		return nil, fmt.Errorf("listing stage Pods in %s: %w", namespace, err)
+	}
+
+	now := time.Now()
+	var orphans []Orphan
+	for i := range pods.Items {
+		p := &pods.Items[i]
+
+		// Phase=Running means an active push (this workstation or
+		// another's) is still doing real work. activeDeadlineSeconds
+		// is its safety net at the cluster level. Flagging it as
+		// orphan would produce false positives for legitimate
+		// slow/near-cap pushes — pod.go budgets ~8.5 minutes for
+		// the 1 GiB-cap transfer alone, which exceeds the 5-min
+		// grace below. Bugbot flagged the false-positive risk on
+		// PR-b round 7.
+		//
+		// Pods in non-Running phases (Pending/Failed/Unknown) past
+		// the grace period are still flagged — those are the
+		// genuine orphan shapes (stuck on image pull, crashed
+		// during stream, network-partitioned).
+		if p.Status.Phase == corev1.PodRunning {
+			continue
+		}
+
+		age := now.Sub(p.CreationTimestamp.Time)
+		if age < OrphanGracePeriod {
+			continue
+		}
+		orphans = append(orphans, Orphan{
+			Name:      p.Name,
+			Namespace: p.Namespace,
+			Table:     p.Labels[StagePodTableLabel],
+			Age:       age,
+		})
+	}
+	return orphans, nil
+}
+
+// FormatOrphansWarning renders the orphan list as a multi-line
+// customer-facing warning. Returns empty string when orphans is
+// empty — caller can blind-print without an "if len > 0" check.
+//
+// The "delete with" hint is the actionable part. v0.1 doesn't
+// auto-delete because:
+//
+//  1. We can't tell from labels alone whether the Pod is stuck
+//     mid-transfer for a still-live parallel push from another
+//     workstation, vs truly orphaned from a crash.
+//  2. Deleting someone else's in-progress push silently is bad
+//     UX. A warn-only approach respects the principle of least
+//     surprise.
+//
+// v0.2 can layer on `--cleanup-orphans` for ops folks who want it
+// automated.
+func FormatOrphansWarning(orphans []Orphan) string {
+	if len(orphans) == 0 {
+		return ""
+	}
+	var s string
+	s += fmt.Sprintf("WARNING: %d orphan stage Pod%s detected in this namespace — likely "+
+		"leftover from a previously crashed `dataset push`:\n",
+		len(orphans), pluralS(len(orphans)))
+	names := make([]string, 0, len(orphans))
+	for _, o := range orphans {
+		tableHint := ""
+		if o.Table != "" {
+			tableHint = fmt.Sprintf(" (table: %s)", o.Table)
+		}
+		s += fmt.Sprintf("  - %s, age %s%s\n", o.Name, humanDuration(o.Age), tableHint)
+		names = append(names, o.Name)
+	}
+	// Use SPECIFIC Pod names in the delete command, not the
+	// label selector. The selector would match every stage Pod
+	// in the namespace, including legitimate running ones from
+	// parallel pushes (this workstation or another's) — copy-
+	// pasting a label-based delete could silently kill someone
+	// else's in-progress push. Bugbot flagged the over-broad
+	// delete on PR-b round 7.
+	s += "Delete with: kubectl delete pod -n " + orphans[0].Namespace + " " +
+		strings.Join(names, " ") + "\n"
+	return s
+}
+
+// pluralS returns "s" for n != 1, else "". Local helper to keep
+// internal/push self-contained (the cli package has its own copy
+// for the same reason — see cli/ingest.go's plural()).
+func pluralS(n int) string {
+	if n == 1 {
+		return ""
+	}
+	return "s"
+}
+
+// humanDuration is a coarse "X hours" / "X minutes" / "X seconds"
+// formatter for orphan warnings. time.Duration.String() returns
+// "2h13m45s" which is technically more precise but harder to read
+// in a one-line warning.
+func humanDuration(d time.Duration) string {
+	switch {
+	case d >= time.Hour:
+		return fmt.Sprintf("%dh", int(d.Hours()))
+	case d >= time.Minute:
+		return fmt.Sprintf("%dm", int(d.Minutes()))
+	default:
+		return fmt.Sprintf("%ds", int(d.Seconds()))
+	}
+}
diff --git a/internal/push/orphan_test.go b/internal/push/orphan_test.go
new file mode 100644
index 0000000..271f086
--- /dev/null
+++ b/internal/push/orphan_test.go
@@ -0,0 +1,220 @@
+package push
+
+import (
+	"context"
+	"strings"
+	"testing"
+	"time"
+
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes/fake"
+)
+
+// stagePodWithAge constructs a stage-labeled Pod whose
+// creationTimestamp is `age` ago. Used to seed the fake clientset
+// for each orphan-test variant. Default phase is "" (unscheduled) —
+// helpers below construct Running/etc explicitly.
+func stagePodWithAge(name, table string, age time.Duration) *corev1.Pod {
+	return &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:              name,
+			Namespace:         "tracebloc",
+			CreationTimestamp: metav1.NewTime(time.Now().Add(-age)),
+			Labels: map[string]string{
+				StagePodManagedByLabel: StagePodManagedByValue,
+				StagePodComponentLabel: StagePodComponentValue,
+				StagePodTableLabel:     table,
+			},
+		},
+	}
+}
+
+// runningStagePodWithAge constructs a stage-labeled Pod in
+// Phase=Running. Used to assert the Bugbot-r7 false-positive fix:
+// a Running Pod is presumed to be an active push (this or
+// another workstation's) and must NOT be flagged as an orphan
+// regardless of age.
+func runningStagePodWithAge(name, table string, age time.Duration) *corev1.Pod {
+	p := stagePodWithAge(name, table, age)
+	p.Status.Phase = corev1.PodRunning
+	return p
+}
+
+// TestFindOrphanStagePods_NoOrphans: empty cluster returns empty,
+// no error.
+func TestFindOrphanStagePods_NoOrphans(t *testing.T) {
+	cs := fake.NewClientset()
+	got, err := FindOrphanStagePods(context.Background(), cs, "tracebloc")
+	if err != nil {
+		t.Fatalf("FindOrphanStagePods: %v", err)
+	}
+	if len(got) != 0 {
+		t.Errorf("len(orphans) = %d, want 0", len(got))
+	}
+}
+
+// TestFindOrphanStagePods_RecentPodFiltered: a Pod just created
+// (well within OrphanGracePeriod) is silently filtered out — it
+// might be the current invocation's own Pod from a parallel
+// workstation push.
+func TestFindOrphanStagePods_RecentPodFiltered(t *testing.T) {
+	cs := fake.NewClientset(stagePodWithAge("fresh", "t1", 30*time.Second))
+	got, err := FindOrphanStagePods(context.Background(), cs, "tracebloc")
+	if err != nil {
+		t.Fatalf("FindOrphanStagePods: %v", err)
+	}
+	if len(got) != 0 {
+		t.Errorf("recent Pod surfaced as orphan; got %d, want 0", len(got))
+	}
+}
+
+// TestFindOrphanStagePods_StaleSurfaces: a Pod past the grace
+// period surfaces with its metadata intact (name, table, age).
+func TestFindOrphanStagePods_StaleSurfaces(t *testing.T) {
+	cs := fake.NewClientset(stagePodWithAge("stale-cats", "cats_dogs", 30*time.Minute))
+	got, err := FindOrphanStagePods(context.Background(), cs, "tracebloc")
+	if err != nil {
+		t.Fatalf("FindOrphanStagePods: %v", err)
+	}
+	if len(got) != 1 {
+		t.Fatalf("len(orphans) = %d, want 1", len(got))
+	}
+	o := got[0]
+	if o.Name != "stale-cats" {
+		t.Errorf("Name = %q, want stale-cats", o.Name)
+	}
+	if o.Table != "cats_dogs" {
+		t.Errorf("Table = %q, want cats_dogs", o.Table)
+	}
+	if o.Age < 29*time.Minute {
+		t.Errorf("Age = %v, want at least 29m", o.Age)
+	}
+}
+
+// TestFindOrphanStagePods_IgnoresNonStagePods: the chart's own
+// Pods (managed-by=Helm) and arbitrary user Pods must NOT show
+// up — the label selector is the safety boundary.
+func TestFindOrphanStagePods_IgnoresNonStagePods(t *testing.T) {
+	chartPod := &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:              "jobs-manager-abc",
+			Namespace:         "tracebloc",
+			CreationTimestamp: metav1.NewTime(time.Now().Add(-1 * time.Hour)),
+			Labels: map[string]string{
+				"app.kubernetes.io/managed-by": "Helm",
+				"app.kubernetes.io/name":       "client",
+			},
+		},
+	}
+	customerPod := &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:              "their-app",
+			Namespace:         "tracebloc",
+			CreationTimestamp: metav1.NewTime(time.Now().Add(-1 * time.Hour)),
+		},
+	}
+	cs := fake.NewClientset(chartPod, customerPod)
+	got, err := FindOrphanStagePods(context.Background(), cs, "tracebloc")
+	if err != nil {
+		t.Fatalf("FindOrphanStagePods: %v", err)
+	}
+	if len(got) != 0 {
+		t.Errorf("non-stage Pods surfaced as orphans; got %d, want 0; names=%v", len(got), got)
+	}
+}
+
+// TestFormatOrphansWarning_Empty returns the empty string so the
+// caller can blind-print without conditionals.
+func TestFormatOrphansWarning_Empty(t *testing.T) {
+	if got := FormatOrphansWarning(nil); got != "" {
+		t.Errorf("FormatOrphansWarning(nil) = %q, want empty", got)
+	}
+}
+
+// TestFormatOrphansWarning_ActionableHint: the warning must contain
+// (a) the count, (b) the per-pod name + age + table, (c) a
+// kubectl delete command using SPECIFIC POD NAMES (not the label
+// selector — Bugbot r7 flagged that the label selector would
+// match every stage Pod including parallel-push's still-running
+// ones).
+func TestFormatOrphansWarning_ActionableHint(t *testing.T) {
+	orphans := []Orphan{
+		{Name: "stage-a", Namespace: "tracebloc", Table: "cats_dogs", Age: 30 * time.Minute},
+		{Name: "stage-b", Namespace: "tracebloc", Table: "xrays", Age: 2 * time.Hour},
+	}
+	got := FormatOrphansWarning(orphans)
+	for _, want := range []string{
+		"2 orphan stage Pods",
+		"stage-a", "cats_dogs", "30m",
+		"stage-b", "xrays", "2h",
+		// Targeted delete — specific names, not the label
+		// selector. The label-selector version would nuke
+		// running pods from parallel pushes.
+		"kubectl delete pod -n tracebloc stage-a stage-b",
+	} {
+		if !strings.Contains(got, want) {
+			t.Errorf("warning missing %q in:\n%s", want, got)
+		}
+	}
+	// Regression check: the label selector MUST NOT appear in
+	// the delete command. If a refactor reintroduces it,
+	// customers could accidentally nuke their parallel-push Pod.
+	if strings.Contains(got, "kubectl delete pod -n tracebloc -l ") {
+		t.Errorf("warning uses label-selector delete (would catch parallel pushes):\n%s", got)
+	}
+}
+
+// TestFindOrphanStagePods_SkipsRunningPods is the Bugbot-r7
+// false-positive fix: a Pod in Phase=Running is presumed to be an
+// active push (this workstation or another's still doing work),
+// and must NOT be flagged as orphan regardless of age. Near-cap
+// 1 GiB pushes legitimately take >5 min (pod.go budgets ~8.5 min
+// for the stream alone), so without this guard the next concurrent
+// push would warn about Pods that are still actively transferring.
+func TestFindOrphanStagePods_SkipsRunningPods(t *testing.T) {
+	// 30-minute-old Running Pod — well past OrphanGracePeriod,
+	// but Running means it's an active push.
+	cs := fake.NewClientset(runningStagePodWithAge("active-slow-push",
+		"big_table", 30*time.Minute))
+	got, err := FindOrphanStagePods(context.Background(), cs, "tracebloc")
+	if err != nil {
+		t.Fatalf("FindOrphanStagePods: %v", err)
+	}
+	if len(got) != 0 {
+		t.Errorf("Running Pod flagged as orphan; got %d, want 0", len(got))
+	}
+}
+
+// TestFindOrphanStagePods_FlagsNonRunningPastGrace: complement to
+// the Running-skip — a Failed or Pending Pod past the grace period
+// IS still an orphan (crashed-at-startup case, stuck-on-image-pull
+// case). The Running-skip should narrow the false positives, not
+// eliminate the warning entirely.
+func TestFindOrphanStagePods_FlagsNonRunningPastGrace(t *testing.T) {
+	failedPod := stagePodWithAge("crashed", "t1", 30*time.Minute)
+	failedPod.Status.Phase = corev1.PodFailed
+	cs := fake.NewClientset(failedPod)
+	got, err := FindOrphanStagePods(context.Background(), cs, "tracebloc")
+	if err != nil {
+		t.Fatalf("FindOrphanStagePods: %v", err)
+	}
+	if len(got) != 1 {
+		t.Errorf("Failed Pod past grace not flagged; got %d, want 1", len(got))
+	}
+}
+
+// TestFormatOrphansWarning_NoTableLabel: very old orphans from
+// before we added StagePodTableLabel might not have it. The
+// formatter must not crash or render "(table: )".
+func TestFormatOrphansWarning_NoTableLabel(t *testing.T) {
+	got := FormatOrphansWarning([]Orphan{
+		{Name: "old-orphan", Namespace: "tracebloc", Age: 1 * time.Hour},
+	})
+	if strings.Contains(got, "(table: )") {
+		t.Errorf("warning rendered empty table parenthetical:\n%s", got)
+	}
+	if !strings.Contains(got, "old-orphan") {
+		t.Errorf("warning missing Pod name:\n%s", got)
+	}
+}
diff --git a/internal/push/pod.go b/internal/push/pod.go
new file mode 100644
index 0000000..fb28e6d
--- /dev/null
+++ b/internal/push/pod.go
@@ -0,0 +1,528 @@
+package push
+
+import (
+	"context"
+	"crypto/rand"
+	"encoding/hex"
+	"errors"
+	"fmt"
+	"strings"
+	"time"
+
+	corev1 "k8s.io/api/core/v1"
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/wait"
+	"k8s.io/client-go/kubernetes"
+)
+
+// DefaultStagePodImage is the alpine image the ephemeral stage Pod
+// runs. Pinned by digest at CLI build time so a customer pulling
+// v0.1.x at any future date gets bit-for-bit identical behavior —
+// no "alpine just shipped a glibc-on-musl regression and now my
+// stage Pod crashloops" surprises.
+//
+// alpine:3.20 was the current 3.x stable when Phase 3 PR-b landed
+// (May 2026). The image is tiny (~8 MiB), has tar/sh/busybox built
+// in, and is one of the most-mirrored images on the planet — air-
+// gapped customers can almost always pull it from their internal
+// mirror without extra config.
+//
+// Override via `tracebloc dataset push --stage-pod-image=...` for:
+//
+//   - Customers on registries that don't proxy docker.io
+//   - Customers running a curated base image with extra audit
+//     instrumentation
+//   - Custom forks that need a specific busybox build
+//
+// Bumping the pinned digest is a v0.2 task; track it alongside
+// the kube-deps refresh.
+const DefaultStagePodImage = "alpine:3.20@sha256:d9e853e87e55526f6b2917df91a2115c36dd7c696a35be12163d44e6e2a4b6bc"
+
+// StagePodLabel{Key,Value} mark every Pod the CLI creates so the
+// orphan-scan logic (orphan.go) can find leftover Pods from a
+// previously crashed `dataset push` invocation. Using the
+// kubernetes.io/managed-by label means the chart's own resources
+// (managed-by=Helm) don't get caught in our scan.
+const (
+	StagePodManagedByLabel = "app.kubernetes.io/managed-by"
+	StagePodManagedByValue = "tracebloc-cli"
+
+	// StagePodComponentLabel narrows the orphan scan to just stage
+	// Pods (not, e.g., a future `tracebloc cluster doctor` Pod).
+	StagePodComponentLabel = "tracebloc.io/component"
+	StagePodComponentValue = "stage-pod"
+
+	// StagePodTableLabel records which table the Pod was staging
+	// for. Useful in the orphan-warning output so the customer
+	// knows which `dataset push` invocation it came from.
+	StagePodTableLabel = "tracebloc.io/table"
+)
+
+// StagePodActiveDeadline is the in-cluster self-kill timer. The CLI
+// also deletes the Pod via defer + SIGINT handler, but those only
+// fire if the CLI process is still alive — a hard kill (`kill -9`,
+// OS crash, network partition between laptop and cluster) leaves
+// the Pod stranded. activeDeadlineSeconds means the kubelet kills
+// it even if the CLI never gets the chance.
+//
+// 30 minutes covers the full lifecycle, not just the stream:
+//
+//	~60s   image pull on a fresh node + scheduler back-pressure
+//	~60s   WaitForStagePodReady ceiling (StagePodReadyTimeout)
+//	~8.5m  1 GiB transfer at a conservative 2 MB/s
+//	+ comfortable margin for variance
+//
+// activeDeadlineSeconds starts the clock at Pod CREATION (not at
+// streaming start), so we have to budget for the pre-stream
+// readiness portion too — the earlier 10-minute value cut it too
+// fine for near-cap customers on slow uplinks (kubelet would
+// terminate mid-transfer). Bugbot flagged the squeeze as High on
+// PR-b round 6.
+//
+// 30 min is generous but cheap — an idle alpine Pod with `sleep`
+// consumes ~5 MiB RAM and zero CPU on the cluster. v0.2 should
+// make this configurable per push.
+const StagePodActiveDeadline = 1800
+
+// StagePodReadyTimeout is how long we wait for the Pod to become
+// Running + Ready after CREATE. Most clusters spawn an alpine Pod
+// in 5-15 seconds; 60 seconds covers image-pull from a slow mirror
+// + scheduler back-pressure on busy clusters. Beyond that, something
+// is wrong (no image-pull access, no schedulable node, PSP/PSA
+// rejection) and the customer wants the diagnostic, not a longer
+// wait.
+const StagePodReadyTimeout = 60 * time.Second
+
+// PodSpecOptions controls the ephemeral stage Pod construction.
+// Fields are intentionally narrow — every knob is one a customer
+// could plausibly need to turn for an air-gapped or hardened-
+// security setup. Adding fields should require a real use case.
+type PodSpecOptions struct {
+	// Namespace is where the Pod gets created — always the
+	// discovered parent-release namespace.
+	Namespace string
+
+	// PVCClaimName is the shared PVC to mount at /data/shared.
+	// Discovered by cluster.DiscoverSharedPVC (always "client-pvc"
+	// today, but routed through a field so a future per-customer
+	// override doesn't require touching this signature).
+	PVCClaimName string
+
+	// PVCMountPath is where to mount the PVC inside the Pod —
+	// "/data/shared" by chart convention.
+	PVCMountPath string
+
+	// Table is the destination table name. Used to compose the Pod
+	// name and the on-PVC subdirectory. MUST have already passed
+	// ValidateTableName; pod-name composition relies on the same
+	// character-class restrictions.
+	Table string
+
+	// Image overrides DefaultStagePodImage. Empty = use default.
+	Image string
+
+	// ServiceAccountName is the SA the Pod runs as. Phase 2's
+	// discovery surfaces this as `ingestor` (the chart default) or
+	// whatever the customer's `--ingestor-sa` flag overrides to.
+	// Using the chart's existing SA means the Pod inherits any
+	// imagePullSecrets and PSA exemptions the admin already
+	// configured for it.
+	ServiceAccountName string
+}
+
+// BuildStagePodSpec produces the corev1.Pod for an ephemeral stage
+// Pod, fully parametrized but with no cluster side-effects.
+// Separated from CreateStagePod so unit tests can assert the spec
+// shape without needing a fake clientset for every assertion.
+//
+// Returns an error only when crypto/rand fails — which is rare but
+// possible on systems with exhausted entropy. Earlier versions
+// swallowed that error and produced a Pod name ending with a bare
+// trailing hyphen (DNS-1123 violation → opaque API server
+// rejection). Bugbot flagged that as Low on PR-b; surfacing the
+// error here turns "weird API error message" into "clear local
+// diagnostic at the call site."
+//
+// Security context follows the Kubernetes Pod Security Standards
+// "restricted" profile — the strictest preset, accepted on every
+// PSA-enabled namespace including the chart's recommended config.
+// This is intentional: the stage Pod runs in the customer's
+// namespace and writes to their PVC, so being a model citizen for
+// PSA defaults reduces "the Pod won't even start on my cluster"
+// surface area.
+func BuildStagePodSpec(opts PodSpecOptions) (*corev1.Pod, error) {
+	image := opts.Image
+	if image == "" {
+		image = DefaultStagePodImage
+	}
+
+	suffix, err := randomSuffix(4) // 4 bytes → 8 hex chars
+	if err != nil {
+		return nil, fmt.Errorf("generating Pod-name random suffix: %w", err)
+	}
+	// Transform the table name into a DNS-1123 subdomain-safe
+	// segment for the Pod name. ValidateTableName accepts
+	// [A-Za-z0-9_]+ (MySQL identifier rules), but Kubernetes Pod
+	// names follow DNS-1123 — lowercase + alphanumeric + hyphen
+	// only, must start/end with alphanumeric. Without this
+	// transform, the dominant canonical example (cats_dogs_train,
+	// snake_case throughout the tracebloc docs) would fail Pod
+	// creation post-pre-flight, which is a worst-of-both-worlds
+	// UX (the pre-flight summary says "we're good!" then the
+	// create fails). Bugbot flagged the gap as High on PR-b.
+	//
+	// The original (un-transformed) table name is preserved
+	// verbatim in the tracebloc.io/table label below, so orphan
+	// warnings still surface the customer-facing identifier.
+	podName := fmt.Sprintf("tracebloc-stage-%s-%s",
+		dns1123SafeTableSegment(opts.Table), suffix)
+
+	// Pod-level security context: runAsNonRoot is the only field
+	// PSA's restricted profile *requires* at the Pod level (the
+	// rest are container-level). Setting it here means every
+	// future container (if we ever add one) inherits the
+	// non-root constraint.
+	runAsNonRoot := true
+	runAsUser := int64(65532) // distroless's "nonroot" UID; works on any cluster
+
+	// Container-level security context: every PSA-restricted
+	// requirement. Reads top-to-bottom: capabilities dropped,
+	// read-only root FS, no privilege escalation, no privileged,
+	// seccomp default. None of these prevent tar from working —
+	// tar reads stdin + writes to the PVC mount (which is RW),
+	// and doesn't need any caps to run as a non-root user.
+	allowPrivEsc := false
+	privileged := false
+	readOnlyRootFS := true
+
+	activeDeadline := int64(StagePodActiveDeadline)
+
+	return &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      podName,
+			Namespace: opts.Namespace,
+			Labels: map[string]string{
+				StagePodManagedByLabel: StagePodManagedByValue,
+				StagePodComponentLabel: StagePodComponentValue,
+				StagePodTableLabel:     opts.Table,
+			},
+			Annotations: map[string]string{
+				// Annotations are searchable via `kubectl describe`
+				// but don't constrain scheduling — perfect for
+				// breadcrumbs that help post-mortem an orphan.
+				"tracebloc.io/created-at": time.Now().UTC().Format(time.RFC3339),
+				"tracebloc.io/created-by": "tracebloc-cli",
+			},
+		},
+		Spec: corev1.PodSpec{
+			ServiceAccountName: opts.ServiceAccountName,
+			RestartPolicy:      corev1.RestartPolicyNever,
+
+			// 10-min in-cluster self-kill — see comment on the const.
+			ActiveDeadlineSeconds: &activeDeadline,
+
+			SecurityContext: &corev1.PodSecurityContext{
+				RunAsNonRoot: &runAsNonRoot,
+				RunAsUser:    &runAsUser,
+				FSGroup:      &runAsUser, // PVC writes need this group
+				SeccompProfile: &corev1.SeccompProfile{
+					Type: corev1.SeccompProfileTypeRuntimeDefault,
+				},
+			},
+
+			Containers: []corev1.Container{{
+				Name:  "stage",
+				Image: image,
+				// `sleep` keeps the Pod alive long enough for the
+				// CLI to open an exec stream and tar files in.
+				// activeDeadlineSeconds caps the worst case; the
+				// CLI deletes the Pod the moment the stream
+				// finishes (or fails).
+				//
+				// Why not the busybox `sleep infinity` idiom: it
+				// causes some PSA configurations to flag the Pod
+				// because the container holds the SIGTERM until
+				// activeDeadlineSeconds fires (sleep traps signals
+				// imperfectly). A finite sleep matched to the
+				// deadline is gentler.
+				Command: []string{"/bin/sleep", fmt.Sprintf("%d", StagePodActiveDeadline)},
+
+				SecurityContext: &corev1.SecurityContext{
+					AllowPrivilegeEscalation: &allowPrivEsc,
+					Privileged:               &privileged,
+					ReadOnlyRootFilesystem:   &readOnlyRootFS,
+					RunAsNonRoot:             &runAsNonRoot,
+					RunAsUser:                &runAsUser,
+					Capabilities: &corev1.Capabilities{
+						// PSA restricted requires ALL capabilities
+						// dropped, then optionally add back
+						// NET_BIND_SERVICE. tar doesn't need it.
+						Drop: []corev1.Capability{"ALL"},
+					},
+					SeccompProfile: &corev1.SeccompProfile{
+						Type: corev1.SeccompProfileTypeRuntimeDefault,
+					},
+				},
+
+				VolumeMounts: []corev1.VolumeMount{{
+					Name:      "shared",
+					MountPath: opts.PVCMountPath,
+				}, {
+					// tar needs a writable working dir for its
+					// temporary state; with ReadOnlyRootFilesystem
+					// it can't use /tmp on the root FS. An
+					// emptyDir at /tmp is the standard pattern.
+					Name:      "tmp",
+					MountPath: "/tmp",
+				}},
+
+				// Conservative resource requests: tar of typical
+				// image_classification data uses <100 MiB RAM and
+				// negligible CPU. Setting requests lets the
+				// scheduler place us; setting limits keeps a
+				// runaway tar (e.g. corrupted archive triggering
+				// a busy loop) from impacting the cluster.
+				Resources: corev1.ResourceRequirements{
+					Requests: corev1.ResourceList{
+						corev1.ResourceCPU:    resource.MustParse("50m"),
+						corev1.ResourceMemory: resource.MustParse("64Mi"),
+					},
+					Limits: corev1.ResourceList{
+						corev1.ResourceCPU:    resource.MustParse("500m"),
+						corev1.ResourceMemory: resource.MustParse("256Mi"),
+					},
+				},
+			}},
+
+			Volumes: []corev1.Volume{{
+				Name: "shared",
+				VolumeSource: corev1.VolumeSource{
+					PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{
+						ClaimName: opts.PVCClaimName,
+					},
+				},
+			}, {
+				Name: "tmp",
+				VolumeSource: corev1.VolumeSource{
+					EmptyDir: &corev1.EmptyDirVolumeSource{},
+				},
+			}},
+		},
+	}, nil
+}
+
+// CreateStagePod creates the Pod in the cluster and returns the
+// metadata.name the API server assigned. The returned name is what
+// callers pass to WaitForStagePodReady + DeleteStagePod.
+//
+// Take-name-from-API-response is deliberate: even though
+// BuildStagePodSpec pre-computes a name, the API server can in
+// principle rewrite it (e.g. via a mutating admission webhook
+// renaming with a prefix). Reading back from the response is the
+// safe contract.
+func CreateStagePod(ctx context.Context, cs kubernetes.Interface, opts PodSpecOptions) (string, error) {
+	pod, err := BuildStagePodSpec(opts)
+	if err != nil {
+		return "", err
+	}
+	created, err := cs.CoreV1().Pods(opts.Namespace).Create(ctx, pod, metav1.CreateOptions{})
+	if err != nil {
+		// Most-common failure path: PSA rejects the Pod because
+		// the customer's namespace policy is stricter than even
+		// "restricted." We surface the raw API error so the customer
+		// sees the PSA violation list verbatim — that's the most
+		// actionable thing.
+		return "", fmt.Errorf("creating stage Pod in namespace %q: %w", opts.Namespace, err)
+	}
+	return created.Name, nil
+}
+
+// WaitForStagePodReady polls until the Pod's status reports
+// Ready=True (the canonical "containers started and not yet
+// terminated" signal). Times out per StagePodReadyTimeout with a
+// diagnostic that tries to help — image-pull failures and
+// scheduler back-pressure are the dominant slow paths and have
+// distinct symptom strings.
+//
+// Returns nil on Ready, the pod object on success so callers don't
+// have to re-Get for it.
+func WaitForStagePodReady(ctx context.Context, cs kubernetes.Interface, namespace, podName string) (*corev1.Pod, error) {
+	var lastObserved *corev1.Pod
+
+	// Poll every 1s — image-pull failures surface in seconds, and
+	// the Ready transition itself is instant once kubelet has
+	// pulled+started. Tighter polling burns API server CPU for no
+	// benefit; looser polling delays the user.
+	err := wait.PollUntilContextTimeout(ctx, 1*time.Second, StagePodReadyTimeout, true,
+		func(ctx context.Context) (bool, error) {
+			p, err := cs.CoreV1().Pods(namespace).Get(ctx, podName, metav1.GetOptions{})
+			if err != nil {
+				// Distinguish terminal vs transient. Terminal
+				// errors (Pod deleted out-of-band, RBAC revoked
+				// to read Pods) MUST short-circuit the poll —
+				// otherwise the customer waits the full 60s
+				// timeout for a condition that won't change.
+				// Bugbot flagged the prior "everything transient"
+				// version as Medium on PR-b.
+				if apierrors.IsNotFound(err) || apierrors.IsForbidden(err) {
+					return false, err
+				}
+				// Transient (network blip, brief API unavail).
+				// Keep polling; last-observed context survives.
+				return false, nil
+			}
+			lastObserved = p
+			// Positive terminal: Ready=True.
+			for _, c := range p.Status.Conditions {
+				if c.Type == corev1.PodReady && c.Status == corev1.ConditionTrue {
+					return true, nil
+				}
+			}
+			// Negative terminal: Phase=Failed (container crashed
+			// at startup — PSA rejection, image crashloop, OOM)
+			// or Phase=Succeeded (sleep exited unexpectedly — the
+			// stage container shouldn't terminate by itself before
+			// we exec into it, so this is also a failure mode).
+			// Without these checks the poll waits the full 60s
+			// for a Pod that will never become Ready. Bugbot
+			// flagged on PR-b round 4 as Medium; this is the
+			// counterpart to the NotFound/Forbidden short-circuit
+			// landed in the previous commit.
+			if p.Status.Phase == corev1.PodFailed || p.Status.Phase == corev1.PodSucceeded {
+				return false, fmt.Errorf(
+					"stage Pod %s/%s terminated in phase %q before becoming Ready%s",
+					namespace, podName, p.Status.Phase, podReadyTimeoutHint(p))
+			}
+			return false, nil
+		})
+
+	if err == nil {
+		return lastObserved, nil
+	}
+
+	// Differentiate "actual timeout expired" from "poll terminated
+	// early" (NotFound, Forbidden, Failed phase, ctx-canceled).
+	// The earlier blanket "did not become Ready within 60s"
+	// wording was misleading — Bugbot flagged on PR-b round 7.
+	hint := podReadyTimeoutHint(lastObserved)
+	if errors.Is(err, context.DeadlineExceeded) {
+		return nil, fmt.Errorf(
+			"stage Pod %s/%s did not become Ready within %s%s",
+			namespace, podName, StagePodReadyTimeout, hint)
+	}
+	// Early-exit error (terminal API error, terminal phase,
+	// SIGINT). Surface it as-is so the customer sees the actual
+	// cause without the wrong "ran out the timer" framing.
+	return nil, fmt.Errorf(
+		"stage Pod %s/%s did not reach Ready state: %w%s",
+		namespace, podName, err, hint)
+}
+
+// podReadyTimeoutHint extracts the most useful diagnostic from a
+// last-observed Pod status. Designed to match against the two
+// dominant slow-path scenarios:
+//
+//  1. Image pull is slow or failed (ImagePullBackOff,
+//     ErrImagePull) — surface the registry message.
+//  2. Pod is unschedulable (PodScheduled=False with reason) —
+//     surface the scheduler's message.
+func podReadyTimeoutHint(p *corev1.Pod) string {
+	if p == nil {
+		return " (no Pod status observed — check API server connectivity)"
+	}
+	for _, cs := range p.Status.ContainerStatuses {
+		if cs.State.Waiting != nil && cs.State.Waiting.Reason != "" {
+			return fmt.Sprintf(" (last container state: %s — %s)",
+				cs.State.Waiting.Reason, cs.State.Waiting.Message)
+		}
+	}
+	for _, c := range p.Status.Conditions {
+		if c.Type == corev1.PodScheduled && c.Status == corev1.ConditionFalse {
+			return fmt.Sprintf(" (scheduling: %s — %s)", c.Reason, c.Message)
+		}
+	}
+	return fmt.Sprintf(" (Pod phase: %s)", p.Status.Phase)
+}
+
+// DeleteStagePod removes the Pod. Called via defer in the orchestrator
+// so it runs on both success and failure (including SIGINT, which
+// is wired up to cancel the parent context and let the defer fire
+// in normal stack unwind).
+//
+// Uses background propagation + a tiny grace period because we
+// don't care about giving sleep a graceful shutdown — we just want
+// the Pod gone so the next push doesn't see an orphan.
+func DeleteStagePod(ctx context.Context, cs kubernetes.Interface, namespace, podName string) error {
+	gracePeriod := int64(0)
+	propagation := metav1.DeletePropagationBackground
+	err := cs.CoreV1().Pods(namespace).Delete(ctx, podName, metav1.DeleteOptions{
+		GracePeriodSeconds: &gracePeriod,
+		PropagationPolicy:  &propagation,
+	})
+	if err != nil && !apierrors.IsNotFound(err) {
+		// Not-found is fine: the Pod might have already self-killed
+		// via activeDeadlineSeconds, or a parallel `kubectl delete`
+		// got there first. Either way, our goal (no orphan) is
+		// achieved. Returning the error here would mask the
+		// upstream cause of failure that triggered the defer.
+		return fmt.Errorf("deleting stage Pod %s/%s: %w", namespace, podName, err)
+	}
+	return nil
+}
+
+// dns1123SafeTableSegment transforms a ValidateTableName-passed
+// table name into a Kubernetes-Pod-name-compatible path segment.
+// The Pod's full name is then `tracebloc-stage-<segment>-<8hex>`,
+// which must satisfy DNS-1123 subdomain rules:
+//
+//	[a-z0-9]([-a-z0-9]*[a-z0-9])?
+//
+// (lowercase + digit + hyphen, start/end alphanumeric).
+//
+// Our input alphabet is [A-Za-z0-9_], so the transform is:
+//
+//  1. Lowercase (DNS-1123 forbids uppercase)
+//  2. Replace '_' with '-' (DNS-1123 forbids underscore)
+//  3. Strip leading/trailing hyphens (a name like "_leading"
+//     would otherwise become "-leading" → tracebloc-stage--leading
+//     which is OK in the middle but ugly)
+//  4. Cap length at 30 chars — Pod names are bounded at 63 total,
+//     and "tracebloc-stage-" + 8-hex-suffix already consumes ~25
+//     of those, leaving ~38 for the segment. 30 gives margin.
+//  5. Fallback to "tbl" if the transform leaves an empty string
+//     (the pathological "_"-only-name case).
+func dns1123SafeTableSegment(table string) string {
+	s := strings.ToLower(strings.ReplaceAll(table, "_", "-"))
+	s = strings.Trim(s, "-")
+	if len(s) > 30 {
+		s = s[:30]
+		// Truncation could leave a trailing hyphen — re-trim.
+		s = strings.TrimRight(s, "-")
+	}
+	if s == "" {
+		// Pathological all-underscore name. The label still
+		// carries the original, so customers can still trace
+		// orphan-Pod warnings back to their push.
+		s = "tbl"
+	}
+	return s
+}
+
+// randomSuffix returns a hex string of length 2*n. Used to make
+// stage Pod names unique across parallel `dataset push` invocations
+// for the same table — without this, two CLI runs would race on the
+// same Pod name and one would fail the Create with AlreadyExists.
+//
+// Cryptographic randomness is overkill for collision avoidance
+// (8 hex chars = 32 bits of entropy is way more than needed) but
+// crypto/rand is the simpler import compared to math/rand which
+// needs explicit seeding.
+func randomSuffix(n int) (string, error) {
+	b := make([]byte, n)
+	if _, err := rand.Read(b); err != nil {
+		return "", err
+	}
+	return hex.EncodeToString(b), nil
+}
diff --git a/internal/push/pod_test.go b/internal/push/pod_test.go
new file mode 100644
index 0000000..6e58715
--- /dev/null
+++ b/internal/push/pod_test.go
@@ -0,0 +1,560 @@
+package push
+
+import (
+	"context"
+	"errors"
+	"strings"
+	"testing"
+	"time"
+
+	corev1 "k8s.io/api/core/v1"
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/client-go/kubernetes/fake"
+	k8stesting "k8s.io/client-go/testing"
+)
+
+// stageOpts is the minimal PodSpecOptions for happy-path tests —
+// individual cases mutate one field to exercise their specific
+// branch.
+func stageOpts() PodSpecOptions {
+	return PodSpecOptions{
+		Namespace:          "tracebloc",
+		PVCClaimName:       "client-pvc",
+		PVCMountPath:       "/data/shared",
+		Table:              "cats_dogs",
+		ServiceAccountName: "ingestor",
+	}
+}
+
+// mustBuildStagePod wraps BuildStagePodSpec for tests that don't
+// care about the (rare) crypto/rand failure path — failing the
+// test if it ever fires is the right escape from "if err != nil"
+// noise in every assertion. The dedicated TestBuildStagePodSpec_
+// PropagatesRandError below covers the error path directly.
+func mustBuildStagePod(t *testing.T, opts PodSpecOptions) *corev1.Pod {
+	t.Helper()
+	p, err := BuildStagePodSpec(opts)
+	if err != nil {
+		t.Fatalf("BuildStagePodSpec: %v", err)
+	}
+	return p
+}
+
+// TestDNS1123SafeTableSegment is the High-severity regression pin
+// for the Bugbot finding on PR-b. Every realistic table name in
+// tracebloc docs uses snake_case (cats_dogs_train, chest_xrays_train);
+// without the transform these would produce Pod names K8s rejects
+// post-pre-flight, which is the worst-of-both-worlds UX. The
+// transform's job: take any ValidateTableName-passed name and
+// produce a DNS-1123 subdomain-safe segment for the Pod name.
+func TestDNS1123SafeTableSegment(t *testing.T) {
+	cases := []struct {
+		in, want string
+	}{
+		// Canonical happy path: snake_case → kebab-case.
+		{"cats_dogs", "cats-dogs"},
+		{"chest_xrays_train", "chest-xrays-train"},
+		// Uppercase: must lowercase.
+		{"MyTable", "mytable"},
+		{"ABC", "abc"},
+		// Already lowercase no-underscore: identity.
+		{"single", "single"},
+		// Mixed: lowercase + underscore → hyphen.
+		{"Table_123", "table-123"},
+		// Edge: leading underscore must be stripped (would
+		// otherwise produce "-leading", and while the full Pod
+		// name `tracebloc-stage--leading-<hex>` is valid DNS-1123
+		// the consecutive hyphens are ugly).
+		{"_leading_underscore", "leading-underscore"},
+		// Edge: trailing underscore stripped.
+		{"trailing_", "trailing"},
+		// Edge: digit-led is valid (DNS-1123 allows it).
+		{"9starts_with_digit", "9starts-with-digit"},
+		// Truncation: input > 30 chars caps at 30, then trims
+		// any trailing hyphen the cut might have left.
+		{strings.Repeat("a", 50), strings.Repeat("a", 30)},
+		// Pathological: all-underscores → fallback "tbl".
+		{"_", "tbl"},
+		{"___", "tbl"},
+	}
+	for _, c := range cases {
+		t.Run(c.in, func(t *testing.T) {
+			got := dns1123SafeTableSegment(c.in)
+			if got != c.want {
+				t.Errorf("dns1123SafeTableSegment(%q) = %q, want %q", c.in, got, c.want)
+			}
+			// Defensive: the result must satisfy DNS-1123
+			// subdomain rules for a Pod-name segment. Cheap
+			// regex check covers the contract.
+			if !isDNS1123SafeSegment(got) {
+				t.Errorf("dns1123SafeTableSegment(%q) = %q, which violates DNS-1123",
+					c.in, got)
+			}
+		})
+	}
+}
+
+// isDNS1123SafeSegment is a test-only helper: returns true iff s
+// matches the DNS-1123 subdomain segment regex (lowercase alnum +
+// hyphen, must start AND end with alnum). Matches what the K8s
+// validation library checks for Pod names.
+func isDNS1123SafeSegment(s string) bool {
+	if s == "" {
+		return false
+	}
+	for i, r := range s {
+		switch {
+		case r >= 'a' && r <= 'z':
+		case r >= '0' && r <= '9':
+		case r == '-':
+			// Hyphen forbidden at boundaries.
+			if i == 0 || i == len(s)-1 {
+				return false
+			}
+		default:
+			return false
+		}
+	}
+	return true
+}
+
+// TestBuildStagePodSpec_Defaults pins the spec fields PR-b's
+// post-create logic depends on: the SA name, the PVC mount, the
+// activeDeadline, and the labels orphan.go keys off.
+func TestBuildStagePodSpec_Defaults(t *testing.T) {
+	p := mustBuildStagePod(t, stageOpts())
+
+	if p.Namespace != "tracebloc" {
+		t.Errorf("Namespace = %q, want tracebloc", p.Namespace)
+	}
+	// Pod name uses the DNS-1123-safe transform of the table name:
+	// "cats_dogs" → "cats-dogs". Bugbot flagged the raw-name version
+	// on PR-b as High severity (snake_case is the canonical naming
+	// style in tracebloc docs but K8s Pod names reject underscores).
+	if !strings.HasPrefix(p.Name, "tracebloc-stage-cats-dogs-") {
+		t.Errorf("Name = %q, want prefix tracebloc-stage-cats-dogs-", p.Name)
+	}
+	if got, wantLen := len(p.Name), len("tracebloc-stage-cats-dogs-")+8; got != wantLen {
+		t.Errorf("len(Name) = %d, want %d (8-hex-char random suffix)", got, wantLen)
+	}
+	if p.Spec.ServiceAccountName != "ingestor" {
+		t.Errorf("ServiceAccountName = %q, want ingestor", p.Spec.ServiceAccountName)
+	}
+	if p.Spec.RestartPolicy != corev1.RestartPolicyNever {
+		t.Errorf("RestartPolicy = %q, want Never", p.Spec.RestartPolicy)
+	}
+	if p.Spec.ActiveDeadlineSeconds == nil || *p.Spec.ActiveDeadlineSeconds != int64(StagePodActiveDeadline) {
+		t.Errorf("ActiveDeadlineSeconds = %v, want %d", p.Spec.ActiveDeadlineSeconds, StagePodActiveDeadline)
+	}
+}
+
+// TestStagePodActiveDeadline_CoversFullLifecycle pins the floor on
+// activeDeadlineSeconds. The earlier 600s value was too tight: the
+// timer starts at Pod CREATION, but image pull (up to 60s) +
+// readiness wait (up to 60s) + the worst-case stream (1 GiB at
+// 2 MB/s = ~8.5 min) leaves no margin, so the kubelet could
+// terminate near-cap pushes mid-transfer. Bugbot flagged as High
+// on PR-b round 6. This test pins the floor at 1500s (25 min) so
+// a future regression bumping it back near 600 gets caught.
+func TestStagePodActiveDeadline_CoversFullLifecycle(t *testing.T) {
+	const minDeadline = 1500
+	if StagePodActiveDeadline < minDeadline {
+		t.Errorf("StagePodActiveDeadline = %d, want at least %d "+
+			"(must cover image-pull + readiness + worst-case 1 GiB stream "+
+			"+ margin; activeDeadline starts at Pod creation not stream start)",
+			StagePodActiveDeadline, minDeadline)
+	}
+}
+
+// TestBuildStagePodSpec_DefaultImage pins the digest-pinned image —
+// if this ever drifts to a tag-only reference, air-gapped customers
+// would silently get whatever alpine:3.20 resolved to that day.
+func TestBuildStagePodSpec_DefaultImage(t *testing.T) {
+	p := mustBuildStagePod(t, stageOpts())
+	got := p.Spec.Containers[0].Image
+	if got != DefaultStagePodImage {
+		t.Errorf("Image = %q, want %q (default)", got, DefaultStagePodImage)
+	}
+	if !strings.Contains(got, "@sha256:") {
+		t.Errorf("Image = %q, want digest-pinned (@sha256:...); air-gapped customers depend on this", got)
+	}
+}
+
+// TestBuildStagePodSpec_OverrideImage pins the --stage-pod-image
+// flag's contract end-to-end at the spec layer.
+func TestBuildStagePodSpec_OverrideImage(t *testing.T) {
+	opts := stageOpts()
+	opts.Image = "internal-mirror.example.com/alpine:3.20@sha256:abc123"
+	p := mustBuildStagePod(t, opts)
+	if got := p.Spec.Containers[0].Image; got != opts.Image {
+		t.Errorf("Image = %q, want override %q", got, opts.Image)
+	}
+}
+
+// TestBuildStagePodSpec_LabelsForOrphanScan pins the labels orphan.go
+// will key off. If these ever drift, orphan-pod detection silently
+// misses leftover Pods from crashed pushes.
+func TestBuildStagePodSpec_LabelsForOrphanScan(t *testing.T) {
+	p := mustBuildStagePod(t, stageOpts())
+	wantLabels := map[string]string{
+		StagePodManagedByLabel: StagePodManagedByValue,
+		StagePodComponentLabel: StagePodComponentValue,
+		StagePodTableLabel:     "cats_dogs",
+	}
+	for k, want := range wantLabels {
+		if got := p.Labels[k]; got != want {
+			t.Errorf("Label %s = %q, want %q", k, got, want)
+		}
+	}
+}
+
+// TestBuildStagePodSpec_RestrictedPSA is the security-regression
+// pin: every PSA-restricted requirement must hold, otherwise the
+// stage Pod gets rejected on hardened namespaces (which is the
+// majority of production tracebloc deployments).
+func TestBuildStagePodSpec_RestrictedPSA(t *testing.T) {
+	p := mustBuildStagePod(t, stageOpts())
+
+	// Pod-level: runAsNonRoot, seccomp RuntimeDefault.
+	psc := p.Spec.SecurityContext
+	if psc == nil {
+		t.Fatal("Pod has no SecurityContext")
+	}
+	if psc.RunAsNonRoot == nil || !*psc.RunAsNonRoot {
+		t.Errorf("Pod.SecurityContext.RunAsNonRoot = %v, want true", psc.RunAsNonRoot)
+	}
+	if psc.SeccompProfile == nil || psc.SeccompProfile.Type != corev1.SeccompProfileTypeRuntimeDefault {
+		t.Errorf("Pod.SeccompProfile = %v, want RuntimeDefault", psc.SeccompProfile)
+	}
+
+	// Container-level: every PSA restricted constraint.
+	if len(p.Spec.Containers) != 1 {
+		t.Fatalf("len(Containers) = %d, want 1", len(p.Spec.Containers))
+	}
+	c := p.Spec.Containers[0]
+	sc := c.SecurityContext
+	if sc == nil {
+		t.Fatal("Container has no SecurityContext")
+	}
+	if sc.AllowPrivilegeEscalation == nil || *sc.AllowPrivilegeEscalation {
+		t.Errorf("AllowPrivilegeEscalation = %v, want false", sc.AllowPrivilegeEscalation)
+	}
+	if sc.Privileged == nil || *sc.Privileged {
+		t.Errorf("Privileged = %v, want false", sc.Privileged)
+	}
+	if sc.ReadOnlyRootFilesystem == nil || !*sc.ReadOnlyRootFilesystem {
+		t.Errorf("ReadOnlyRootFilesystem = %v, want true", sc.ReadOnlyRootFilesystem)
+	}
+	if sc.Capabilities == nil || len(sc.Capabilities.Drop) == 0 {
+		t.Fatalf("Capabilities.Drop = %v, want [ALL]", sc.Capabilities)
+	}
+	if sc.Capabilities.Drop[0] != "ALL" {
+		t.Errorf("Capabilities.Drop = %v, want [ALL]", sc.Capabilities.Drop)
+	}
+	if sc.SeccompProfile == nil || sc.SeccompProfile.Type != corev1.SeccompProfileTypeRuntimeDefault {
+		t.Errorf("Container SeccompProfile = %v, want RuntimeDefault", sc.SeccompProfile)
+	}
+}
+
+// TestBuildStagePodSpec_PVCMount pins the volume + mountPath that
+// the tar stream writes into. If the mount path here ever drifts
+// from cluster.SharedPVCMountPath, the tar would write to the
+// wrong location and Phase 4's ingestor Job would see "no files."
+func TestBuildStagePodSpec_PVCMount(t *testing.T) {
+	p := mustBuildStagePod(t, stageOpts())
+
+	// Volume side: the PVC reference.
+	var foundVol bool
+	for _, v := range p.Spec.Volumes {
+		if v.Name == "shared" {
+			foundVol = true
+			if v.PersistentVolumeClaim == nil {
+				t.Errorf("shared volume has no PVC source")
+			} else if v.PersistentVolumeClaim.ClaimName != "client-pvc" {
+				t.Errorf("PVC ClaimName = %q, want client-pvc", v.PersistentVolumeClaim.ClaimName)
+			}
+		}
+	}
+	if !foundVol {
+		t.Error("Pod has no shared volume")
+	}
+
+	// Mount side: where the container sees it.
+	var foundMount bool
+	for _, m := range p.Spec.Containers[0].VolumeMounts {
+		if m.Name == "shared" {
+			foundMount = true
+			if m.MountPath != "/data/shared" {
+				t.Errorf("MountPath = %q, want /data/shared", m.MountPath)
+			}
+		}
+	}
+	if !foundMount {
+		t.Error("stage container has no shared mount")
+	}
+}
+
+// TestBuildStagePodSpec_TmpEmptyDir pins the writable /tmp emptyDir
+// that tar needs (since the root FS is read-only). Without this,
+// tar would fail to create its working state and the stream would
+// die mid-transfer.
+func TestBuildStagePodSpec_TmpEmptyDir(t *testing.T) {
+	p := mustBuildStagePod(t, stageOpts())
+	var foundEmptyDir, foundMount bool
+	for _, v := range p.Spec.Volumes {
+		if v.Name == "tmp" {
+			foundEmptyDir = true
+			if v.EmptyDir == nil {
+				t.Error("tmp volume is not an EmptyDir")
+			}
+		}
+	}
+	for _, m := range p.Spec.Containers[0].VolumeMounts {
+		if m.Name == "tmp" && m.MountPath == "/tmp" {
+			foundMount = true
+		}
+	}
+	if !foundEmptyDir || !foundMount {
+		t.Errorf("tmp emptyDir mount missing (vol=%v, mount=%v)", foundEmptyDir, foundMount)
+	}
+}
+
+// TestBuildStagePodSpec_RandomSuffixCollisionAvoidance: two specs
+// built back-to-back must have distinct names so parallel pushes
+// don't race on Create.
+func TestBuildStagePodSpec_RandomSuffixCollisionAvoidance(t *testing.T) {
+	a := mustBuildStagePod(t, stageOpts())
+	b := mustBuildStagePod(t, stageOpts())
+	if a.Name == b.Name {
+		t.Errorf("back-to-back BuildStagePodSpec produced identical name %q; "+
+			"random-suffix collision avoidance is broken", a.Name)
+	}
+}
+
+// TestCreateStagePod_HappyPath: the fake clientset accepts a Create
+// and returns the created object. We pin that CreateStagePod
+// surfaces the assigned name back to the caller.
+func TestCreateStagePod_HappyPath(t *testing.T) {
+	cs := fake.NewClientset()
+	name, err := CreateStagePod(context.Background(), cs, stageOpts())
+	if err != nil {
+		t.Fatalf("CreateStagePod: %v", err)
+	}
+	if !strings.HasPrefix(name, "tracebloc-stage-cats-dogs-") {
+		t.Errorf("returned name = %q, want prefix tracebloc-stage-cats-dogs-", name)
+	}
+	// Cross-check: the Pod actually exists in the fake cluster.
+	if _, err := cs.CoreV1().Pods("tracebloc").Get(context.Background(), name, metav1.GetOptions{}); err != nil {
+		t.Errorf("Pod not found after CreateStagePod: %v", err)
+	}
+}
+
+// TestCreateStagePod_APIErrorSurfaces: PSA rejections, RBAC denials,
+// etc. surface verbatim so the customer sees the actionable cluster-
+// side message.
+func TestCreateStagePod_APIErrorSurfaces(t *testing.T) {
+	cs := fake.NewClientset()
+	cs.PrependReactor("create", "pods",
+		func(_ k8stesting.Action) (bool, runtime.Object, error) {
+			return true, nil, apierrors.NewForbidden(
+				corev1.Resource("pods"), "",
+				errors.New("user cannot create pods"))
+		})
+
+	_, err := CreateStagePod(context.Background(), cs, stageOpts())
+	if err == nil {
+		t.Fatal("CreateStagePod returned nil error on forbidden Create")
+	}
+	if !strings.Contains(err.Error(), "creating stage Pod") {
+		t.Errorf("error missing CLI framing: %v", err)
+	}
+}
+
+// TestWaitForStagePodReady_HappyPath: a Pod that immediately reports
+// Ready=True passes through.
+func TestWaitForStagePodReady_HappyPath(t *testing.T) {
+	cs := fake.NewClientset(&corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "tracebloc-stage-cats_dogs-abc12345",
+			Namespace: "tracebloc",
+		},
+		Status: corev1.PodStatus{
+			Phase: corev1.PodRunning,
+			Conditions: []corev1.PodCondition{{
+				Type:   corev1.PodReady,
+				Status: corev1.ConditionTrue,
+			}},
+		},
+	})
+
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+
+	p, err := WaitForStagePodReady(ctx, cs, "tracebloc", "tracebloc-stage-cats_dogs-abc12345")
+	if err != nil {
+		t.Fatalf("WaitForStagePodReady: %v", err)
+	}
+	if p == nil {
+		t.Fatal("returned nil Pod on success")
+	}
+}
+
+// TestWaitForStagePodReady_TimeoutHint: when the Pod is stuck (e.g.
+// ImagePullBackOff), the timeout error should surface the waiting-
+// state reason from container statuses. This is the dominant slow
+// path — air-gapped customers without the right pull secret.
+func TestWaitForStagePodReady_TimeoutHint(t *testing.T) {
+	cs := fake.NewClientset(&corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "stuck-pod",
+			Namespace: "tracebloc",
+		},
+		Status: corev1.PodStatus{
+			Phase: corev1.PodPending,
+			ContainerStatuses: []corev1.ContainerStatus{{
+				Name: "stage",
+				State: corev1.ContainerState{
+					Waiting: &corev1.ContainerStateWaiting{
+						Reason:  "ImagePullBackOff",
+						Message: "Back-off pulling image \"alpine:3.20@sha256:...\"",
+					},
+				},
+			}},
+		},
+	})
+
+	// Tight ctx timeout so the test doesn't actually wait 60s.
+	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
+	defer cancel()
+
+	_, err := WaitForStagePodReady(ctx, cs, "tracebloc", "stuck-pod")
+	if err == nil {
+		t.Fatal("WaitForStagePodReady returned nil on stuck Pod")
+	}
+	if !strings.Contains(err.Error(), "ImagePullBackOff") {
+		t.Errorf("error missing ImagePullBackOff hint: %v", err)
+	}
+}
+
+// TestWaitForStagePodReady_NotFoundIsTerminal: if the Pod gets
+// deleted out-of-band mid-wait (admin cleanup, parallel test
+// teardown, etc.), the poll must short-circuit instead of waiting
+// the full timeout. Bugbot flagged the prior "everything transient"
+// behavior as Medium on PR-b.
+func TestWaitForStagePodReady_NotFoundIsTerminal(t *testing.T) {
+	cs := fake.NewClientset() // empty — Get returns NotFound
+
+	// Generous overall ctx; the test should return WAY before
+	// StagePodReadyTimeout (60s) because NotFound terminates the
+	// poll immediately. If the fix regresses, this test takes the
+	// full 60s and gets caught by go test's -timeout.
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+
+	start := time.Now()
+	_, err := WaitForStagePodReady(ctx, cs, "tracebloc", "ghost-pod")
+	elapsed := time.Since(start)
+	if err == nil {
+		t.Fatal("WaitForStagePodReady returned nil on NotFound")
+	}
+	if elapsed > 3*time.Second {
+		t.Errorf("WaitForStagePodReady waited %s for NotFound; expected immediate return", elapsed)
+	}
+}
+
+// TestWaitForStagePodReady_ForbiddenIsTerminal: same as NotFound but
+// for RBAC denial — the customer's kubeconfig might lose `get pods`
+// permission mid-push (token rotation, RBAC change). The poll must
+// surface that immediately, not spin until timeout.
+func TestWaitForStagePodReady_ForbiddenIsTerminal(t *testing.T) {
+	cs := fake.NewClientset()
+	cs.PrependReactor("get", "pods",
+		func(_ k8stesting.Action) (bool, runtime.Object, error) {
+			return true, nil, apierrors.NewForbidden(
+				corev1.Resource("pods"), "test-pod",
+				errors.New("user cannot get pods"))
+		})
+
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+
+	start := time.Now()
+	_, err := WaitForStagePodReady(ctx, cs, "tracebloc", "test-pod")
+	elapsed := time.Since(start)
+	if err == nil {
+		t.Fatal("WaitForStagePodReady returned nil on Forbidden")
+	}
+	if elapsed > 3*time.Second {
+		t.Errorf("WaitForStagePodReady waited %s for Forbidden; expected immediate return", elapsed)
+	}
+}
+
+// TestWaitForStagePodReady_FailedPhaseIsTerminal: a Pod that
+// crashes at startup (PSA rejection, ImagePullBackOff that escalates
+// to ErrImagePull, OOMKilled during container start) lands in
+// Phase=Failed and will never become Ready. The poll must
+// short-circuit instead of waiting the full 60s. Bugbot flagged
+// the missing check as Medium on PR-b round 4.
+func TestWaitForStagePodReady_FailedPhaseIsTerminal(t *testing.T) {
+	cs := fake.NewClientset(&corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "crashed-pod",
+			Namespace: "tracebloc",
+		},
+		Status: corev1.PodStatus{
+			Phase: corev1.PodFailed,
+			ContainerStatuses: []corev1.ContainerStatus{{
+				Name: "stage",
+				State: corev1.ContainerState{
+					Terminated: &corev1.ContainerStateTerminated{
+						Reason:   "OOMKilled",
+						ExitCode: 137,
+					},
+				},
+			}},
+		},
+	})
+
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+
+	start := time.Now()
+	_, err := WaitForStagePodReady(ctx, cs, "tracebloc", "crashed-pod")
+	elapsed := time.Since(start)
+	if err == nil {
+		t.Fatal("WaitForStagePodReady returned nil on Phase=Failed")
+	}
+	if elapsed > 3*time.Second {
+		t.Errorf("WaitForStagePodReady waited %s for Phase=Failed; expected immediate return", elapsed)
+	}
+	if !strings.Contains(err.Error(), "Failed") {
+		t.Errorf("error missing Phase=Failed signal: %v", err)
+	}
+}
+
+// TestDeleteStagePod_NotFoundIsOK: the Pod might be gone already
+// (activeDeadlineSeconds fired, or someone kubectl-deleted it).
+// Not-found shouldn't error — our goal is "Pod doesn't exist," which
+// is satisfied.
+func TestDeleteStagePod_NotFoundIsOK(t *testing.T) {
+	cs := fake.NewClientset() // empty
+	if err := DeleteStagePod(context.Background(), cs, "tracebloc", "nope"); err != nil {
+		t.Errorf("DeleteStagePod on absent Pod = %v, want nil", err)
+	}
+}
+
+// TestDeleteStagePod_HappyPath: delete a real Pod, confirm it's gone.
+func TestDeleteStagePod_HappyPath(t *testing.T) {
+	cs := fake.NewClientset(&corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{Name: "to-delete", Namespace: "tracebloc"},
+	})
+	if err := DeleteStagePod(context.Background(), cs, "tracebloc", "to-delete"); err != nil {
+		t.Fatalf("DeleteStagePod: %v", err)
+	}
+	_, err := cs.CoreV1().Pods("tracebloc").Get(context.Background(), "to-delete", metav1.GetOptions{})
+	if !apierrors.IsNotFound(err) {
+		t.Errorf("Pod still exists after delete (err=%v)", err)
+	}
+}
diff --git a/internal/push/progress.go b/internal/push/progress.go
new file mode 100644
index 0000000..f5df7b3
--- /dev/null
+++ b/internal/push/progress.go
@@ -0,0 +1,132 @@
+package push
+
+import (
+	"io"
+	"os"
+
+	"github.com/schollz/progressbar/v3"
+	"golang.org/x/term"
+)
+
+// Progress is the narrow interface push.Stream + the stage
+// orchestrator use to surface per-byte transfer progress to the
+// customer. The real implementation wraps schollz/progressbar/v3
+// (NewTTYProgress); tests pass a no-op (NoOpProgress) since CI
+// runs aren't TTYs and decoupling the test layer from any
+// progressbar internals keeps test output clean.
+//
+// The interface is deliberately byte-oriented (not file-count
+// oriented). Tar header bytes flow through too — they're a few
+// hundred bytes per file, which makes the progress bar slightly
+// over-count the "useful" bytes, but they're real bytes on the
+// wire that the customer pays for anyway. The alternative
+// (counting only file-body bytes) requires the progress sink to
+// know what a tar header looks like, which couples it to the
+// streaming format.
+type Progress interface {
+	// Add records that n bytes have been transferred. Safe to
+	// call from any goroutine.
+	Add(n int)
+
+	// Finish marks the transfer done. Calling Finish before all
+	// bytes have been added is fine — the bar just won't fill to
+	// 100%, which is a useful visual signal that something cut
+	// the stream short.
+	Finish()
+}
+
+// NewProgress returns a Progress sink appropriate for the given
+// output writer:
+//
+//   - If out is a TTY (interactive terminal), return a
+//     progressbar/v3 that renders an animated bar with bytes/sec
+//   - ETA + percentage.
+//   - Otherwise return a no-op (CI, redirected output, piped to
+//     a file). A spinning bar in a CI log is noise, not a
+//     feature.
+//
+// totalBytes is the expected transfer size (from LocalLayout's
+// TotalBytes plus a small tar-overhead margin). The bar normalizes
+// against this; if actual transferred bytes overshoot, the bar
+// caps at 100% rather than wrapping.
+//
+// `description` shows up to the left of the bar; "Staging cats_dogs"
+// is a typical value.
+func NewProgress(out io.Writer, totalBytes int64, description string) Progress {
+	if !isTTY(out) {
+		return NoOpProgress{}
+	}
+	return &ttyProgress{
+		bar: progressbar.NewOptions64(totalBytes,
+			progressbar.OptionSetWriter(out),
+			progressbar.OptionSetDescription(description),
+			progressbar.OptionShowBytes(true),
+			progressbar.OptionShowCount(),
+			progressbar.OptionThrottle(100), // ms — keep CPU low
+			progressbar.OptionSetRenderBlankState(true),
+			progressbar.OptionSetWidth(40),
+			progressbar.OptionClearOnFinish(),
+		),
+	}
+}
+
+// isTTY reports whether out points at a real terminal. We test for
+// *os.File AND golang.org/x/term IsTerminal — a *bytes.Buffer in
+// tests isn't *os.File so we return false fast. Wrappers like
+// io.MultiWriter that aren't *os.File also return false, which is
+// the conservative choice.
+func isTTY(out io.Writer) bool {
+	f, ok := out.(*os.File)
+	if !ok {
+		return false
+	}
+	return term.IsTerminal(int(f.Fd()))
+}
+
+// ttyProgress is the real-bar implementation. The progressbar
+// library is itself concurrency-safe under Add(), so the wrapper
+// can be a thin pass-through.
+type ttyProgress struct {
+	bar *progressbar.ProgressBar
+}
+
+func (p *ttyProgress) Add(n int) {
+	// Explicit-discard the error — progressbar's Add returns an
+	// error if the write to the underlying writer fails, which we
+	// can't do anything about mid-stream (the customer's terminal
+	// going away during a push doesn't affect the staging
+	// outcome). Same rationale as the Fprintf discards elsewhere.
+	_ = p.bar.Add(n)
+}
+
+func (p *ttyProgress) Finish() {
+	_ = p.bar.Finish()
+}
+
+// NoOpProgress satisfies Progress without doing anything. Used in
+// tests and when output is non-TTY. Exported so tests in OTHER
+// packages (internal/cli) can also use it.
+type NoOpProgress struct{}
+
+func (NoOpProgress) Add(_ int) {}
+func (NoOpProgress) Finish()   {}
+
+// progressWriter is an io.Writer that funnels write counts into a
+// Progress. Used to wrap the stdin pipe-writer so every byte
+// piped to the exec stream counts toward the bar.
+//
+// Not exported — it's an implementation detail of Stream. Tests
+// validate the contract end-to-end (transferred bytes match
+// expected) rather than poking the wrapper directly.
+type progressWriter struct {
+	w io.Writer
+	p Progress
+}
+
+func (pw *progressWriter) Write(b []byte) (int, error) {
+	n, err := pw.w.Write(b)
+	if n > 0 {
+		pw.p.Add(n)
+	}
+	return n, err
+}
diff --git a/internal/push/spec.go b/internal/push/spec.go
index ec727b0..8aabb0a 100644
--- a/internal/push/spec.go
+++ b/internal/push/spec.go
@@ -40,18 +40,33 @@ import (
 // table-naming style anyway.
 var tableNamePattern = regexp.MustCompile(`^[A-Za-z0-9_]+$`)
 
+// MaxTableNameLength caps `--table` at 63 chars. Two hard limits
+// agree on this:
+//
+//  1. MySQL identifier limit: 64 chars (MySQL Reference 9.2 Schema
+//     Object Names). We use 63 to leave one char of headroom for
+//     any future "ingestion_run_id" suffix the chart might want to
+//     append.
+//  2. Kubernetes label value limit: 63 chars (DNS-1123 label rules).
+//     The stage Pod's tracebloc.io/table label carries the
+//     untransformed table name verbatim — a longer value fails Pod
+//     creation post-pre-flight, which Bugbot flagged on PR-b
+//     round 5.
+//
+// 63 is a coincidence that lets both checks share the same constant.
+const MaxTableNameLength = 63
+
 // ValidateTableName rejects table names that aren't safe as both a
 // MySQL identifier and a single PVC path segment.
 //
 // Why a CLI-side check rather than the schema: the embedded
 // ingest.v1.json only enforces `minLength: 1` on `table` — no
-// `pattern`. Without this guard, --table=../../etc would flow into
-// the /data/shared/<table>/ PVC path; PR-b's stage Pod would then
-// write outside the intended subtree and could clobber another
-// table's data. Tightening the upstream schema with a `pattern`
-// is the proper long-term fix (it would protect the helm flow +
-// jobs-manager too) but needs a change to tracebloc/data-ingestors'
-// schema, which the schema-drift CI check pins — filed as
+// `pattern`, no `maxLength`. Without this guard, --table=../../etc
+// would flow into the /data/shared/<table>/ PVC path, and a 100-char
+// name would make the Pod-label assignment fail post-pre-flight.
+// Tightening the upstream schema is the proper long-term fix
+// (it would protect the helm flow + jobs-manager too) but needs
+// a change to tracebloc/data-ingestors' schema — filed as
 // tracebloc/data-ingestors#116. Once that lands and we re-sync,
 // this guard can collapse to a thin "schema says so" wrapper.
 //
@@ -61,6 +76,14 @@ func ValidateTableName(table string) error {
 	if table == "" {
 		return fmt.Errorf("table name is required (set --table)")
 	}
+	if len(table) > MaxTableNameLength {
+		return fmt.Errorf(
+			"table name is %d characters; the max is %d "+
+				"(matches both the MySQL identifier limit and the "+
+				"Kubernetes label-value limit, which the stage Pod's "+
+				"tracebloc.io/table label is bound by). Use a shorter name.",
+			len(table), MaxTableNameLength)
+	}
 	if !tableNamePattern.MatchString(table) {
 		return fmt.Errorf(
 			"table name %q is invalid: must match [A-Za-z0-9_]+ "+
diff --git a/internal/push/spec_test.go b/internal/push/spec_test.go
index 8f19716..05c7961 100644
--- a/internal/push/spec_test.go
+++ b/internal/push/spec_test.go
@@ -1,6 +1,7 @@
 package push
 
 import (
+	"strings"
 	"testing"
 
 	"gopkg.in/yaml.v3"
@@ -126,6 +127,24 @@ func TestValidateTableName_Accepts(t *testing.T) {
 	}
 }
 
+// TestValidateTableName_RejectsTooLong: K8s label values are
+// capped at 63 chars, and the stage Pod carries the raw table
+// name as the tracebloc.io/table label. Without this rejection,
+// a 100-char table name passes pre-flight then fails Pod create
+// with an opaque label-validation error. Bugbot flagged on PR-b
+// round 5.
+func TestValidateTableName_RejectsTooLong(t *testing.T) {
+	tooLong := strings.Repeat("a", MaxTableNameLength+1)
+	if err := ValidateTableName(tooLong); err == nil {
+		t.Fatalf("ValidateTableName(%d-char name) = nil, want length-cap error", len(tooLong))
+	}
+	// Right at the boundary: 63 chars must be accepted.
+	atCap := strings.Repeat("a", MaxTableNameLength)
+	if err := ValidateTableName(atCap); err != nil {
+		t.Errorf("ValidateTableName(%d-char name) = %v, want nil (at exact cap)", len(atCap), err)
+	}
+}
+
 // TestValidateTableName_RejectsUnsafe is the security-regression
 // pin. The path-traversal cases (../../etc, ../foo) are the ones
 // Bugbot flagged on PR #8 — if this test ever goes green with
diff --git a/internal/push/stage.go b/internal/push/stage.go
new file mode 100644
index 0000000..f8e9472
--- /dev/null
+++ b/internal/push/stage.go
@@ -0,0 +1,153 @@
+package push
+
+import (
+	"context"
+	"fmt"
+	"io"
+	"time"
+
+	"k8s.io/client-go/kubernetes"
+)
+
+// StageOptions packages every dependency Stage needs into a single
+// struct so callers (cli.runDatasetPush) can build it incrementally
+// without juggling 10 positional args.
+//
+// Required fields are pinned in the doc; the nil-able Progress + Out
+// fields get default behaviors (NoOpProgress / io.Discard) so the
+// happy path is short.
+type StageOptions struct {
+	// Client is the discovered kubernetes.Interface.
+	Client kubernetes.Interface
+
+	// Executor is how the tar stream actually reaches the Pod.
+	// Production: a *SPDYExecutor backed by the rest.Config.
+	// Tests: a fakeExecutor (see stream_test.go).
+	Executor Executor
+
+	// Namespace + IngestorSAName come from Phase 2's parent-release
+	// discovery (with optional --ingestor-sa override).
+	Namespace      string
+	IngestorSAName string
+
+	// PVCClaimName + PVCMountPath come from Phase 3 PR-a's PVC
+	// discovery (always "client-pvc" + "/data/shared" today).
+	PVCClaimName string
+	PVCMountPath string
+
+	// Layout is the validated local files-to-stage.
+	Layout *LocalLayout
+
+	// Table is the destination table name. MUST have already
+	// passed ValidateTableName.
+	Table string
+
+	// StagePodImage overrides the alpine default. Empty = default.
+	StagePodImage string
+
+	// Progress is the customer-facing transfer bar. Nil = no-op
+	// (use this in tests / non-TTY output).
+	Progress Progress
+
+	// Out is where Stage prints non-progress diagnostic output
+	// (orphan warnings, "Created stage Pod X" etc.). Nil = io.Discard.
+	// Progress bar output is separate — schollz writes to its own
+	// configured writer (typically the same one).
+	Out io.Writer
+}
+
+// Stage is Phase 3 PR-b's top-level entrypoint. Does the full
+// dance: orphan scan → create stage Pod → wait Ready → tar-over-
+// exec stream → defer-delete (SIGINT-safe).
+//
+// On success, the layout's files are on the cluster's PVC at
+// /data/shared/<table>/{labels.csv, images/...} and the stage Pod
+// has been cleaned up. Caller proceeds to Phase 4's submit step.
+//
+// On error, the deferred cleanup still fires — no orphan Pod left
+// behind even if the stream fails partway. Phase 4's idempotency
+// key handles the "retry the whole push" scenario; for v0.1 we
+// don't try to resume a partial transfer.
+//
+// SIGINT handling: the caller is expected to wire a signal handler
+// that cancels the passed-in ctx (cli/main.go pattern). When ctx
+// is cancelled, the in-flight Exec stream returns ctx.Err(), the
+// deferred DeleteStagePod runs with a FRESH context (so it can
+// reach the API even after our parent ctx is cancelled), and the
+// error propagates with the SIGINT signal preserved.
+func Stage(ctx context.Context, opts StageOptions) error {
+	if opts.Out == nil {
+		opts.Out = io.Discard
+	}
+	if opts.Progress == nil {
+		opts.Progress = NoOpProgress{}
+	}
+
+	// 1. Orphan scan — best-effort. We log either the warning or
+	//    the scan-failure and proceed regardless. Customers who
+	//    care about orphans get to see them; customers whose RBAC
+	//    forbids List Pods just don't get the warning (no impact
+	//    on the push itself).
+	if orphans, err := FindOrphanStagePods(ctx, opts.Client, opts.Namespace); err != nil {
+		_, _ = fmt.Fprintf(opts.Out, "(orphan-scan skipped: %v)\n", err)
+	} else if warning := FormatOrphansWarning(orphans); warning != "" {
+		_, _ = fmt.Fprint(opts.Out, warning)
+	}
+
+	// 2. Create the stage Pod. From here on, cleanup matters.
+	podName, err := CreateStagePod(ctx, opts.Client, PodSpecOptions{
+		Namespace:          opts.Namespace,
+		PVCClaimName:       opts.PVCClaimName,
+		PVCMountPath:       opts.PVCMountPath,
+		Table:              opts.Table,
+		Image:              opts.StagePodImage,
+		ServiceAccountName: opts.IngestorSAName,
+	})
+	if err != nil {
+		return err
+	}
+	_, _ = fmt.Fprintf(opts.Out, "Created stage Pod %s/%s\n", opts.Namespace, podName)
+
+	// 3. Defer cleanup. The deferred call uses a FRESH context with
+	//    its own deadline — if the parent ctx is cancelled (SIGINT,
+	//    timeout), we still want the Pod deleted. Without this, a
+	//    Ctrl-C right after pod-create would leak an orphan.
+	//
+	//    30s deadline on the cleanup is a safety net for the case
+	//    where the API server itself is unreachable — better to
+	//    print "failed to clean up stage Pod" than hang forever.
+	defer func() {
+		cleanupCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+		defer cancel()
+		if delErr := DeleteStagePod(cleanupCtx, opts.Client, opts.Namespace, podName); delErr != nil {
+			_, _ = fmt.Fprintf(opts.Out,
+				"WARNING: failed to delete stage Pod %s/%s (it will be activeDeadline-killed in <=%ds): %v\n",
+				opts.Namespace, podName, StagePodActiveDeadline, delErr)
+		}
+	}()
+
+	// 4. Wait for the Pod to be Ready (containers up, ready to
+	//    accept exec). Times out at StagePodReadyTimeout with
+	//    diagnostic hints from container statuses (image pull,
+	//    scheduling) when we have them.
+	_, _ = fmt.Fprintf(opts.Out, "Waiting for stage Pod to be Ready (timeout %s)...\n", StagePodReadyTimeout)
+	if _, err := WaitForStagePodReady(ctx, opts.Client, opts.Namespace, podName); err != nil {
+		return err
+	}
+
+	// 5. Stream the tar. This is where actual bytes flow. The
+	//    progress bar (if TTY) renders during this call.
+	_, _ = fmt.Fprintf(opts.Out, "Streaming %d files (%s) to %s...\n",
+		1+len(opts.Layout.Images), HumanBytes(opts.Layout.TotalBytes), StagedPrefix(opts.Table))
+
+	if err := StreamLayout(ctx, opts.Executor,
+		opts.Namespace, podName, "stage",
+		opts.Layout, opts.Table, opts.Progress); err != nil {
+		return err
+	}
+
+	// 6. Print "done" message. The deferred cleanup runs after this.
+	_, _ = fmt.Fprintf(opts.Out, "Staged %d files to %s\n",
+		1+len(opts.Layout.Images), StagedPrefix(opts.Table))
+	return nil
+}
diff --git a/internal/push/stage_test.go b/internal/push/stage_test.go
new file mode 100644
index 0000000..b46e0cb
--- /dev/null
+++ b/internal/push/stage_test.go
@@ -0,0 +1,288 @@
+package push
+
+import (
+	"bytes"
+	"context"
+	"errors"
+	"strings"
+	"testing"
+	"time"
+
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/client-go/kubernetes/fake"
+	k8stesting "k8s.io/client-go/testing"
+)
+
+// readyOnNextGet adds a reactor that marks any new stage Pod as
+// Ready=True the moment after it's created. Without this, the fake
+// clientset creates the Pod in default (empty) status and
+// WaitForStagePodReady spins until its timeout. We can't easily
+// patch the Pod from inside StageOptions construction, so the
+// reactor is the standard pattern.
+func readyOnNextGet(cs *fake.Clientset) {
+	cs.PrependReactor("get", "pods",
+		func(action k8stesting.Action) (bool, runtime.Object, error) {
+			ga, ok := action.(k8stesting.GetAction)
+			if !ok {
+				return false, nil, nil
+			}
+			// Let the existing tracker handle the get to find the
+			// real object, then patch it before returning. The
+			// safest way: do the get ourselves, then synthesize a
+			// Ready condition onto the result.
+			pod, err := cs.Tracker().Get(action.GetResource(), action.GetNamespace(), ga.GetName())
+			if err != nil {
+				return false, nil, nil
+			}
+			p := pod.(*corev1.Pod)
+			p.Status.Phase = corev1.PodRunning
+			p.Status.Conditions = []corev1.PodCondition{{
+				Type:   corev1.PodReady,
+				Status: corev1.ConditionTrue,
+			}}
+			return true, p, nil
+		})
+}
+
+// TestStage_HappyPath: orphan scan (empty) → create → wait ready
+// → stream → delete. The fake clientset + fake executor let us
+// verify the full orchestration without a real cluster.
+func TestStage_HappyPath(t *testing.T) {
+	root := imgcDir(t)
+	layout, err := Discover(root)
+	if err != nil {
+		t.Fatalf("Discover: %v", err)
+	}
+
+	cs := fake.NewClientset()
+	readyOnNextGet(cs)
+	fe := &fakeExecutor{}
+	var out bytes.Buffer
+
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+
+	err = Stage(ctx, StageOptions{
+		Client:         cs,
+		Executor:       fe,
+		Namespace:      "tracebloc",
+		IngestorSAName: "ingestor",
+		PVCClaimName:   "client-pvc",
+		PVCMountPath:   "/data/shared",
+		Layout:         layout,
+		Table:          "cats_dogs",
+		Out:            &out,
+	})
+	if err != nil {
+		t.Fatalf("Stage: %v\nout:\n%s", err, out.String())
+	}
+
+	// Diagnostic output: customer should see the lifecycle
+	// breadcrumbs. Pin a few key phrases so a regression that
+	// silences them is caught.
+	for _, want := range []string{"Created stage Pod", "Waiting for stage Pod", "Streaming", "Staged"} {
+		if !strings.Contains(out.String(), want) {
+			t.Errorf("output missing %q:\n%s", want, out.String())
+		}
+	}
+
+	// Executor must have been called with the right ns + a tar
+	// stream on stdin (assert by inspecting captured tar).
+	if fe.gotNS != "tracebloc" {
+		t.Errorf("executor ns = %q, want tracebloc", fe.gotNS)
+	}
+	if len(fe.gotStdin) == 0 {
+		t.Error("executor stdin was empty; expected tar archive bytes")
+	}
+}
+
+// TestStage_CleanupRunsOnError: deferred DeleteStagePod must fire
+// even when StreamLayout fails. Otherwise every failed push leaks
+// an orphan Pod that can't be reused for retry (the random suffix
+// avoids name collisions, but it's still cluster pollution).
+func TestStage_CleanupRunsOnError(t *testing.T) {
+	root := imgcDir(t)
+	layout, err := Discover(root)
+	if err != nil {
+		t.Fatalf("Discover: %v", err)
+	}
+
+	cs := fake.NewClientset()
+	readyOnNextGet(cs)
+	fe := &fakeExecutor{
+		errToReturn:       errors.New("simulated stream failure"),
+		drainBeforeReturn: true,
+	}
+	var out bytes.Buffer
+
+	err = Stage(context.Background(), StageOptions{
+		Client:         cs,
+		Executor:       fe,
+		Namespace:      "tracebloc",
+		IngestorSAName: "ingestor",
+		PVCClaimName:   "client-pvc",
+		PVCMountPath:   "/data/shared",
+		Layout:         layout,
+		Table:          "t1",
+		Out:            &out,
+	})
+	if err == nil {
+		t.Fatal("Stage returned nil on stream failure")
+	}
+
+	// Cleanup contract: NO stage Pods should remain after Stage
+	// returns. The list-by-label query is the same one orphan.go
+	// uses, so if any leaks it's user-visible in the next push.
+	pods, listErr := cs.CoreV1().Pods("tracebloc").List(context.Background(),
+		metav1.ListOptions{LabelSelector: StagePodManagedByLabel + "=" + StagePodManagedByValue})
+	if listErr != nil {
+		t.Fatalf("post-Stage list: %v", listErr)
+	}
+	if len(pods.Items) != 0 {
+		var names []string
+		for _, p := range pods.Items {
+			names = append(names, p.Name)
+		}
+		t.Errorf("Stage leaked %d Pod(s) after stream failure: %v", len(pods.Items), names)
+	}
+}
+
+// TestStage_OrphanWarningSurfaces: stale stage Pods from a
+// previous crash get surfaced as a warning at the start of the
+// next push. Validates the integration: orphan scan → format →
+// print to Out.
+func TestStage_OrphanWarningSurfaces(t *testing.T) {
+	root := imgcDir(t)
+	layout, err := Discover(root)
+	if err != nil {
+		t.Fatalf("Discover: %v", err)
+	}
+
+	cs := fake.NewClientset(stagePodWithAge("stale-from-crash", "cats_dogs", 30*time.Minute))
+	readyOnNextGet(cs)
+	fe := &fakeExecutor{}
+	var out bytes.Buffer
+
+	err = Stage(context.Background(), StageOptions{
+		Client:         cs,
+		Executor:       fe,
+		Namespace:      "tracebloc",
+		IngestorSAName: "ingestor",
+		PVCClaimName:   "client-pvc",
+		PVCMountPath:   "/data/shared",
+		Layout:         layout,
+		Table:          "new_push",
+		Out:            &out,
+	})
+	if err != nil {
+		t.Fatalf("Stage: %v\nout:\n%s", err, out.String())
+	}
+
+	if !strings.Contains(out.String(), "stale-from-crash") {
+		t.Errorf("orphan warning didn't surface; output:\n%s", out.String())
+	}
+	if !strings.Contains(out.String(), "kubectl delete pod") {
+		t.Errorf("orphan warning missing actionable hint; output:\n%s", out.String())
+	}
+}
+
+// TestStage_IngestorSANameFlowsToPod: the --ingestor-sa override
+// MUST land on the stage Pod's ServiceAccountName — otherwise the
+// flag is silently ignored and customers who renamed the chart's
+// ingestor SA get pods running as the default SA (no PVC write
+// access). Pin the integration here at the Stage layer since the
+// flag wiring's effect is only observable end-to-end. Bugbot
+// flagged the missing test coverage on PR-a.
+func TestStage_IngestorSANameFlowsToPod(t *testing.T) {
+	root := imgcDir(t)
+	layout, err := Discover(root)
+	if err != nil {
+		t.Fatalf("Discover: %v", err)
+	}
+
+	cs := fake.NewClientset()
+	readyOnNextGet(cs)
+	fe := &fakeExecutor{}
+	var out bytes.Buffer
+
+	const customSA = "my-renamed-ingestor"
+	_ = Stage(context.Background(), StageOptions{
+		Client:         cs,
+		Executor:       fe,
+		Namespace:      "tracebloc",
+		IngestorSAName: customSA,
+		PVCClaimName:   "client-pvc",
+		PVCMountPath:   "/data/shared",
+		Layout:         layout,
+		Table:          "t1",
+		Out:            &out,
+	})
+
+	// List the stage Pods created (we deleted them in Stage's
+	// defer, so we need to inspect the tracker BEFORE the test
+	// process finishes — but the cleanup happens within Stage).
+	// Workaround: assert via the fake clientset's action log —
+	// look at the Create action's object directly.
+	var seenSA string
+	for _, action := range cs.Actions() {
+		if action.GetVerb() == "create" && action.GetResource().Resource == "pods" {
+			pod := action.(k8stesting.CreateAction).GetObject().(*corev1.Pod)
+			seenSA = pod.Spec.ServiceAccountName
+			break
+		}
+	}
+	if seenSA != customSA {
+		t.Errorf("created Pod's ServiceAccountName = %q, want %q (the --ingestor-sa override)",
+			seenSA, customSA)
+	}
+}
+
+// TestStage_CancelledContext_StillCleansUp: when the parent ctx
+// is cancelled (SIGINT scenario), the deferred cleanup MUST still
+// run — using its own context with a fresh deadline. Without
+// this, every Ctrl-C leaves an orphan Pod.
+func TestStage_CancelledContext_StillCleansUp(t *testing.T) {
+	root := imgcDir(t)
+	layout, err := Discover(root)
+	if err != nil {
+		t.Fatalf("Discover: %v", err)
+	}
+
+	cs := fake.NewClientset()
+	readyOnNextGet(cs)
+	fe := &fakeExecutor{
+		// Simulate the SPDY stream returning ctx.Err() because the
+		// caller cancelled mid-stream.
+		errToReturn: context.Canceled,
+	}
+	var out bytes.Buffer
+
+	// Use a context we cancel immediately. Stage's create still
+	// succeeds (fake clientset doesn't honor ctx on Create), but
+	// the executor sees Canceled.
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+
+	_ = Stage(ctx, StageOptions{
+		Client:         cs,
+		Executor:       fe,
+		Namespace:      "tracebloc",
+		IngestorSAName: "ingestor",
+		PVCClaimName:   "client-pvc",
+		PVCMountPath:   "/data/shared",
+		Layout:         layout,
+		Table:          "interrupted",
+		Out:            &out,
+	})
+
+	// Whether Stage returns an error or not, the cluster MUST be
+	// clean. This is the SIGINT-safety contract.
+	pods, _ := cs.CoreV1().Pods("tracebloc").List(context.Background(),
+		metav1.ListOptions{LabelSelector: StagePodManagedByLabel + "=" + StagePodManagedByValue})
+	if len(pods.Items) != 0 {
+		t.Errorf("Stage leaked %d Pod(s) on context-cancel; SIGINT cleanup contract broken",
+			len(pods.Items))
+	}
+}
diff --git a/internal/push/stream.go b/internal/push/stream.go
new file mode 100644
index 0000000..baba3cb
--- /dev/null
+++ b/internal/push/stream.go
@@ -0,0 +1,462 @@
+package push
+
+import (
+	"archive/tar"
+	"bytes"
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"path"
+	"path/filepath"
+
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/kubernetes/scheme"
+	"k8s.io/client-go/rest"
+	"k8s.io/client-go/tools/remotecommand"
+)
+
+// Executor abstracts "exec a command in a Pod with stdin/stdout/
+// stderr." The interface exists for one reason: client-go's
+// fake.Clientset (the cornerstone of every test in this package)
+// doesn't support the exec subresource. Without an interface
+// boundary here, Stream's lifecycle couldn't be unit-tested —
+// every assertion would need a real cluster.
+//
+// Production uses SPDYExecutor (this file). Tests construct a
+// FakeExecutor (stream_test.go) that captures stdin into a buffer
+// + records the command + can return a synthetic error.
+type Executor interface {
+	// Exec runs cmd in the named container of ns/pod, wiring
+	// stdin/stdout/stderr to the provided streams. Returns when
+	// the remote command terminates or the context is cancelled.
+	//
+	// stdin, stdout, stderr may be nil — Exec must tolerate that
+	// (a stage Pod with no stderr to capture is a legitimate
+	// case for the "just delete me" command after the tar is
+	// done).
+	Exec(ctx context.Context, ns, pod, container string,
+		cmd []string, stdin io.Reader, stdout, stderr io.Writer) error
+}
+
+// SPDYExecutor is the production Executor. Wraps client-go's
+// remotecommand.NewSPDYExecutor — the same machinery `kubectl exec`
+// uses internally.
+//
+// Holds both Config (needed by NewSPDYExecutor for auth/transport)
+// and Client (for building the exec URL via the REST client). Both
+// come from cluster.Load + cluster.NewClientset.
+type SPDYExecutor struct {
+	Config *rest.Config
+	Client kubernetes.Interface
+}
+
+func (e *SPDYExecutor) Exec(
+	ctx context.Context,
+	ns, pod, container string,
+	cmd []string,
+	stdin io.Reader, stdout, stderr io.Writer,
+) error {
+	// Build the exec URL the way kubectl does — POST against the
+	// /pods/<name>/exec subresource, parametrized via PodExecOptions
+	// encoded through the scheme's ParameterCodec.
+	req := e.Client.CoreV1().RESTClient().Post().
+		Resource("pods").
+		Name(pod).
+		Namespace(ns).
+		SubResource("exec").
+		VersionedParams(&corev1.PodExecOptions{
+			Container: container,
+			Command:   cmd,
+			// Toggle the streams that the caller actually wants.
+			// PodExecOptions ignores the unused ones, but being
+			// explicit here matches kubectl's behavior and means
+			// "stdin requested but caller passed nil" surfaces as
+			// a clearer error from the executor.
+			Stdin:  stdin != nil,
+			Stdout: stdout != nil,
+			Stderr: stderr != nil,
+		}, scheme.ParameterCodec)
+
+	exec, err := remotecommand.NewSPDYExecutor(e.Config, "POST", req.URL())
+	if err != nil {
+		return fmt.Errorf("creating SPDY executor for %s/%s: %w", ns, pod, err)
+	}
+
+	err = exec.StreamWithContext(ctx, remotecommand.StreamOptions{
+		Stdin:  stdin,
+		Stdout: stdout,
+		Stderr: stderr,
+		// Tty=false: we're not running an interactive shell, we're
+		// piping bytes. With Tty=true the tar stream would get
+		// line-buffered/CR-translated/etc, which corrupts the
+		// archive.
+		Tty: false,
+	})
+	if err != nil {
+		return fmt.Errorf("exec stream against %s/%s: %w", ns, pod, err)
+	}
+	return nil
+}
+
+// StreamLayout pipes a tar of the layout's files into the stage
+// Pod's `tar -xf - -C /data/shared/<table>` so the files land at
+// /data/shared/<table>/{labels.csv, images/...}.
+//
+// On success, the stage Pod's PVC mount now contains the files
+// jobs-manager's ingestor will read in Phase 4.
+//
+// On failure, the partial-write state on the PVC is whatever tar
+// managed before the error — possibly a few image files but
+// likely not all. Phase 4's idempotency_key handles "retry the
+// whole push"; for v0.1, customers re-run the command and the
+// shell's `mkdir -p` is idempotent so re-creating the destination
+// is fine. Truly-recovering a failed mid-stream push (resume from
+// last-written file) is a v0.2 optimization.
+//
+// Why `tar` (rather than a more modern protocol like a custom REST
+// upload endpoint on jobs-manager): tar-over-exec works against
+// any cluster running the existing chart, with zero server-side
+// changes. A custom upload endpoint would require a coordinated
+// jobs-manager release before the CLI could ship.
+func StreamLayout(
+	ctx context.Context,
+	exec Executor,
+	namespace, podName, containerName string,
+	layout *LocalLayout,
+	table string,
+	progress Progress,
+) error {
+	if progress == nil {
+		progress = NoOpProgress{}
+	}
+	defer progress.Finish()
+
+	// Generate the staging-dir suffix + compose the remote command
+	// BEFORE spawning the tar goroutine. Earlier versions did the
+	// suffix after, which meant a crypto/rand failure would return
+	// from this function without closing the pipe or waiting on
+	// the goroutine — the goroutine would block once the ~64 KB
+	// pipe buffer filled, leaking forever. Bugbot flagged on r9.
+	//
+	// Staging dir gets a fresh 8-hex-char suffix per invocation so
+	// two concurrent `dataset push` runs for the same table don't
+	// race on the same .staging path (r8). Without this, push A's
+	// `rm -rf .staging` could wipe push B's in-progress tar
+	// extraction (or vice versa), producing an interleaved
+	// corrupt dataset on the PVC.
+	stagingSuffix, err := randomSuffix(4) // 4 bytes → 8 hex chars
+	if err != nil {
+		return fmt.Errorf("generating staging-dir suffix: %w", err)
+	}
+	dest := StagedPrefix(table)
+	staging := dest + ".staging-" + stagingSuffix
+
+	// Backup path for the swap. Same unique suffix as staging so
+	// two parallel pushes don't collide on each other's .old-,
+	// AND so the find pattern below catches both .staging-* and
+	// .old-* siblings as orphan candidates.
+	backup := dest + ".old-" + stagingSuffix
+
+	parentDir := path.Dir(dest)
+	tableBase := path.Base(dest)
+
+	// Remote command pipeline. Crosses three semantic guarantees:
+	//
+	//   - HERMETIC (r5): old files don't survive into a re-push
+	//   - TRANSACTIONAL (r7+r10): the previously-staged dataset
+	//     stays intact if THIS push fails mid-transfer; backup-
+	//     and-rollback restores on mv failure
+	//   - PARALLEL-SAFE (r8): unique .staging-<hex>/.old-<hex>
+	//     suffix per invocation so concurrent pushes don't
+	//     interleave each other's tar extraction
+	//   - EXIT-FAITHFUL (r11): set -e propagates any non-zero
+	//     status; the prior `&&-chain ; find || true` shape
+	//     could silently return success on actual failures
+	//
+	// Pipeline steps:
+	//   1. set -e — any unhandled non-zero status aborts the script
+	//   2. rm -rf $STAGING (clean prior failed attempt)
+	//   3. mkdir + tar (extract to staging)
+	//   4. If $DEST exists, mv $DEST → $DEST.old-<hex>  (backup)
+	//   5. mv $STAGING → $DEST                          (commit)
+	//      On failure: restore backup, exit 1
+	//   6. rm -rf $BACKUP                                (success)
+	//   7. find ... orphan cleanup (best-effort, || true)
+	//
+	// The earlier `&&-chained ... ; find ... || true` shape had a
+	// hidden correctness bug (Bugbot r11): POSIX `;` runs the
+	// next command regardless of prior status, and `|| true` then
+	// forces exit 0. A failed tar mid-script would silently
+	// return success to the exec subprocess, and the CLI would
+	// report "Staged N files" on what was actually a failed push.
+	//
+	// set -e fixes that: any unguarded non-zero exits the script
+	// with that status. The find's `|| true` is still fine
+	// because under set -e a `cmd || fallback` is treated as
+	// "cmd may fail" and doesn't trigger the immediate exit.
+	//
+	// Failure modes (recap):
+	//   - tar fails: set -e aborts; $DEST untouched (transactional from r7)
+	//   - backup mv fails: same — aborts before any change to $DEST
+	//   - main mv fails: explicit if/then rolls back the backup
+	//     then `exit 1` (set -e doesn't preempt the explicit
+	//     handler because of the `||`)
+	//   - final rm fails: set -e exits, but the customer's new
+	//     data is already in place — the leftover .old-<hex>
+	//     gets picked up by the find sweep on next invocation
+	//   - find fails: `|| true` suppresses (cleanup is best-effort)
+	remoteCmd := []string{
+		"/bin/sh", "-c",
+		fmt.Sprintf(
+			"set -e\n"+
+				"rm -rf %q\n"+
+				"mkdir -p %q\n"+
+				"/bin/tar -xf - -C %q\n"+
+				"if [ -e %q ]; then mv %q %q; fi\n"+
+				"if ! mv %q %q; then\n"+
+				"  if [ -e %q ]; then mv %q %q; fi\n"+
+				"  exit 1\n"+
+				"fi\n"+
+				"rm -rf %q\n"+
+				"find %q -maxdepth 1 \\( -name %q -o -name %q \\) -mmin +60 -exec rm -rf {} + 2>/dev/null || true\n",
+			staging,            // rm
+			staging,            // mkdir
+			staging,            // tar -C
+			dest, dest, backup, // if exists, backup
+			staging, dest, // main mv
+			backup, backup, dest, // rollback
+			backup,                                                 // success cleanup
+			parentDir, tableBase+".staging-*", tableBase+".old-*"), // find
+	}
+
+	// Use a synchronous pipe so the tar Writer's writes block on
+	// the exec stream's reads — no in-memory buffering of the
+	// whole archive. For a 1 GiB dataset this is the difference
+	// between using 1 GiB of laptop RAM and using a few KB.
+	pr, pw := io.Pipe()
+
+	// Wire the progress sink on the WRITE side so every byte the
+	// tar Writer emits (headers + bodies) counts toward the bar.
+	countedPW := &progressWriter{w: pw, p: progress}
+
+	// Capture stderr from the in-cluster tar so failures surface
+	// with the actual remote message.
+	var stderrBuf bytes.Buffer
+
+	// Kick off the tar build in a goroutine. The Pipe.Writer MUST
+	// be closed when we're done — without that, the Pipe.Reader
+	// side blocks forever waiting for more bytes.
+	//
+	// CloseWithError vs Close: CloseWithError preserves the tar
+	// error across the pipe so the exec side sees the cause.
+	tarErrCh := make(chan error, 1)
+	go func() {
+		err := writeLayoutTar(countedPW, layout)
+		if err != nil {
+			_ = pw.CloseWithError(err)
+		} else {
+			_ = pw.Close()
+		}
+		tarErrCh <- err
+	}()
+
+	streamErr := exec.Exec(ctx, namespace, podName, containerName,
+		remoteCmd, pr, nil, &stderrBuf)
+
+	// Close the reader side. If the executor returned without
+	// fully draining stdin (early exit, ctx cancellation, remote
+	// tar dying), the tar-write goroutine is blocked on its next
+	// pipe.Write — closing the reader unblocks it with
+	// io.ErrClosedPipe so it can finish and report on tarErrCh.
+	// Without this, every error path deadlocks.
+	_ = pr.Close()
+
+	// Drain the tar goroutine — even on stream error, it must
+	// finish or we leak. Buffered channel size 1 means this
+	// receive won't block (the goroutine already sent or will
+	// send imminently after CloseWithError or the pr.Close above).
+	tarErr := <-tarErrCh
+
+	// Order matters here. Bugbot r11 caught the previous logic:
+	// when the LOCAL tar build fails (e.g. r8's stream-time size
+	// cap recheck rejecting a file that grew on disk), the
+	// CloseWithError on the pipe causes exec to see "broken pipe"
+	// and return a generic stream error. If we report streamErr
+	// first, the customer sees "streaming failed" instead of the
+	// actually-actionable "dataset exceeded v0.1 cap" diagnostic
+	// from the tar side.
+	//
+	// Check tarErr first — when both are non-nil, the tar side
+	// is usually the upstream cause. streamErr-only (no tarErr)
+	// is the genuine network/RBAC/remote-tar-failed case where
+	// the exec wording is the right surface.
+	if tarErr != nil {
+		return fmt.Errorf("building tar archive: %w", tarErr)
+	}
+	if streamErr != nil {
+		hint := ""
+		if remote := stderrBuf.String(); remote != "" {
+			hint = fmt.Sprintf(" (remote tar stderr: %s)", remote)
+		}
+		return fmt.Errorf("streaming files to %s/%s: %w%s", namespace, podName, streamErr, hint)
+	}
+	return nil
+}
+
+// writeLayoutTar packages the layout's files into the writer as a
+// flat tar archive with this structure:
+//
+//	labels.csv          (file)
+//	images/<basename>   (file, per layout.Images)
+//
+// The destination tar is unpacked with `tar -xf - -C /data/shared/<table>`,
+// so file paths are RELATIVE to that root — that's why we write
+// "labels.csv" not "/data/shared/<table>/labels.csv".
+//
+// Mode is 0644 on all files (read-only for the ingestor SA). Tar's
+// type flag is TypeReg (regular file). No symlinks, no hard links,
+// no extended attrs — the layout we accept doesn't have any.
+//
+// Uses a named return + deferred Close so a tar trailer-write error
+// (truncated archive — GNU tar refuses to extract these) propagates
+// even on the happy path. errcheck would otherwise flag the bare
+// `defer tw.Close()`, and rightly: silently dropping that error
+// means a truncated stream looks identical to a successful one
+// from this function's caller.
+func writeLayoutTar(w io.Writer, layout *LocalLayout) (err error) {
+	tw := tar.NewWriter(w)
+	defer func() {
+		// If we already have an error, preserve it; the close error
+		// is less useful than whatever caused the early return.
+		// Otherwise surface the close error so a failed trailer
+		// write doesn't silently corrupt the archive.
+		if cerr := tw.Close(); cerr != nil && err == nil {
+			err = fmt.Errorf("closing tar writer: %w", cerr)
+		}
+	}()
+
+	// Running total enforces the v0.1 MaxTotalBytes cap at STREAM
+	// time, not just at Discover time. A file that grew between
+	// pre-flight and now (TOCTOU) — or just a sum miscount — would
+	// otherwise sneak past the cap silently. Bugbot flagged on
+	// PR-b r8 as Medium.
+	var totalBytes int64
+
+	// labels.csv first (small, sanity-checks the stream quickly).
+	n, err := writeTarFile(tw, layout.LabelsCSV, "labels.csv")
+	if err != nil {
+		return fmt.Errorf("packaging labels.csv: %w", err)
+	}
+	totalBytes += n
+	if totalBytes > MaxTotalBytes {
+		return fmt.Errorf(
+			"dataset exceeded v0.1 total cap of %s during stream "+
+				"(labels.csv alone is %s; pre-flight likely raced with "+
+				"a file growing on disk)",
+			HumanBytes(MaxTotalBytes), HumanBytes(totalBytes))
+	}
+
+	// Then each image. We write them in the order Discover returned
+	// (filesystem-walk order, not sorted) — sorting would make the
+	// stream deterministic across runs but adds zero customer value.
+	for _, abs := range layout.Images {
+		// The destination filename inside images/ is the file's
+		// basename — strips the customer's local path so a push
+		// from /home/alice/datasets/cats_dogs/images/001.jpg
+		// becomes images/001.jpg in the tar (and on the PVC).
+		//
+		// Use path.Join (always forward-slash) NOT filepath.Join
+		// (OS-native separator) for the tar HEADER name. USTAR/POSIX
+		// tar requires forward slashes; a Windows-built CLI using
+		// filepath.Join would emit `images\001.jpg` headers that the
+		// Linux stage Pod's `tar -xf` either rejects or extracts as
+		// a flat-named file. Bugbot flagged on PR-b round 6.
+		dst := path.Join("images", filepath.Base(abs))
+		n, err := writeTarFile(tw, abs, dst)
+		if err != nil {
+			return fmt.Errorf("packaging %s: %w", abs, err)
+		}
+		totalBytes += n
+		if totalBytes > MaxTotalBytes {
+			return fmt.Errorf(
+				"dataset exceeded v0.1 total cap of %s after streaming %s "+
+					"(reached %s; file growth between pre-flight and stream)",
+				HumanBytes(MaxTotalBytes), dst, HumanBytes(totalBytes))
+		}
+	}
+
+	// tw.Close() in the defer above writes the tar footer
+	// (two zero blocks). Without that, GNU tar treats the archive
+	// as truncated and refuses to extract.
+	return nil
+}
+
+// writeTarFile writes one file from `src` into tw under the
+// archive-relative name `dst`. Streams the file body — no full-
+// read into memory — so a single 500 MiB image doesn't balloon
+// the CLI's memory.
+//
+// Returns the file's declared size (from os.Lstat) so the caller
+// can maintain a running total against MaxTotalBytes — closes the
+// stream-time half of the TOCTOU window Bugbot flagged on r8.
+//
+// Uses os.Lstat (not Stat) + an explicit symlink reject as a
+// defense-in-depth second layer behind walk.go's rejectSymlink
+// (Bugbot r4). Also re-checks the single-file size cap at stream
+// time: a file that grew between Discover and now would otherwise
+// upload past the advertised 500 MiB cap (Bugbot r8).
+func writeTarFile(tw *tar.Writer, src, dst string) (int64, error) {
+	st, err := os.Lstat(src)
+	if err != nil {
+		return 0, err
+	}
+	if st.Mode()&os.ModeSymlink != 0 {
+		// Reaching here means Discover let a symlink through —
+		// shouldn't happen in production, but the explicit error
+		// keeps the security property locally enforceable.
+		return 0, fmt.Errorf("refusing to stream symbolic link %q (defense-in-depth; should have been rejected by Discover)", src)
+	}
+	if st.Size() > MaxSingleFileBytes {
+		// Stream-time recheck of the single-file cap. Discover
+		// caught this in pre-flight; if we see it again here, the
+		// file grew between then and now.
+		return 0, sizeError(dst, st.Size(), MaxSingleFileBytes)
+	}
+	hdr := &tar.Header{
+		Name:     dst,
+		Size:     st.Size(),
+		Mode:     0o644,
+		Typeflag: tar.TypeReg,
+		// We don't carry ModTime forward — the customer's local
+		// mtime has no useful semantic on the PVC, and zeroing it
+		// keeps the tar bit-for-bit reproducible across runs
+		// (helps tests, makes future content-hash checks easier).
+	}
+	if err := tw.WriteHeader(hdr); err != nil {
+		return 0, err
+	}
+	f, err := os.Open(src)
+	if err != nil {
+		return 0, err
+	}
+	// Read-only file Close errors are not meaningful — there's
+	// nothing the caller can do, and the underlying file descriptor
+	// gets returned to the OS regardless. Explicit-discard pattern,
+	// same as the Fprintf writers elsewhere.
+	defer func() { _ = f.Close() }()
+	// io.CopyN caps the actual stream at the declared header size.
+	// Without the cap, a file that grew between Lstat above and
+	// the Copy here would overflow the header — the tar would be
+	// corrupted (header says N bytes, body has > N). Cap-and-trust
+	// is the safe choice: if the file shrank, the tar trailer
+	// surfaces the mismatch; if the file grew, we just stop at
+	// the declared size and let the new bytes get re-pushed next
+	// invocation. Closes the body-side half of the TOCTOU window.
+	if _, err := io.CopyN(tw, f, st.Size()); err != nil && !errors.Is(err, io.EOF) {
+		return 0, err
+	}
+	return st.Size(), nil
+}
diff --git a/internal/push/stream_test.go b/internal/push/stream_test.go
new file mode 100644
index 0000000..62bde45
--- /dev/null
+++ b/internal/push/stream_test.go
@@ -0,0 +1,428 @@
+package push
+
+import (
+	"archive/tar"
+	"bytes"
+	"context"
+	"errors"
+	"io"
+	"regexp"
+	"sort"
+	"strings"
+	"testing"
+)
+
+// fakeExecutor is the stand-in for SPDYExecutor in tests. Captures
+// every Exec call's parameters into fields the test can assert on,
+// and reads the stdin stream into a buffer (so we can parse the tar
+// archive back out and verify its contents).
+type fakeExecutor struct {
+	// Captured call parameters.
+	gotNS, gotPod, gotContainer string
+	gotCmd                      []string
+
+	// Captured stdin (the tar archive).
+	gotStdin []byte
+
+	// What to write back to the caller as stderr (simulates a
+	// "tar: no space left on device" style remote-side failure).
+	stderrToReturn []byte
+
+	// What to return as the exec error (simulates a 4xx/5xx from
+	// the apiserver, a network drop, etc.).
+	errToReturn error
+
+	// drainBeforeReturn: if false, errToReturn fires BEFORE the
+	// stdin pipe is drained — simulates a fast failure where the
+	// remote tar dies on the first byte. Test infrastructure for
+	// broken-pipe coverage.
+	drainBeforeReturn bool
+}
+
+func (f *fakeExecutor) Exec(
+	_ context.Context,
+	ns, pod, container string,
+	cmd []string,
+	stdin io.Reader, _ io.Writer, stderr io.Writer,
+) error {
+	f.gotNS, f.gotPod, f.gotContainer = ns, pod, container
+	f.gotCmd = cmd
+
+	if stdin != nil && (f.drainBeforeReturn || f.errToReturn == nil) {
+		b, _ := io.ReadAll(stdin)
+		f.gotStdin = b
+	}
+	if len(f.stderrToReturn) > 0 && stderr != nil {
+		_, _ = stderr.Write(f.stderrToReturn)
+	}
+	return f.errToReturn
+}
+
+// TestStreamLayout_TarPathsAreForwardSlash pins the cross-platform
+// tar header convention. On Windows, filepath.Join uses '\' as
+// separator — using it for the tar HEADER name would produce
+// archive entries the Linux stage Pod's `tar -xf` either rejects
+// or extracts as flat-named files (collapsing the images/ subdir).
+// The fix is path.Join (always forward-slash); this test asserts
+// the portable property regardless of which OS the test runs on.
+// Bugbot flagged the Windows bug on PR-b round 6.
+func TestStreamLayout_TarPathsAreForwardSlash(t *testing.T) {
+	root := imgcDir(t, "a.jpg", "b.png")
+	layout, err := Discover(root)
+	if err != nil {
+		t.Fatalf("Discover: %v", err)
+	}
+	fe := &fakeExecutor{}
+	if err := StreamLayout(context.Background(), fe, "tracebloc", "p", "stage",
+		layout, "t", NoOpProgress{}); err != nil {
+		t.Fatalf("StreamLayout: %v", err)
+	}
+
+	tr := tar.NewReader(bytes.NewReader(fe.gotStdin))
+	for {
+		hdr, err := tr.Next()
+		if errors.Is(err, io.EOF) {
+			break
+		}
+		if err != nil {
+			t.Fatalf("reading tar: %v", err)
+		}
+		if strings.Contains(hdr.Name, `\`) {
+			t.Errorf("tar entry %q contains backslash; "+
+				"Linux stage Pod's `tar -xf` requires forward-slash paths", hdr.Name)
+		}
+	}
+}
+
+// TestStreamLayout_TarContents end-to-end: the bytes the executor
+// sees on stdin MUST be a valid tar with the layout's files at the
+// expected paths (labels.csv at the root, images/<basename> under
+// images/). If this drifts, the in-cluster `tar -xf - -C
+// /data/shared/<table>/` lands files in the wrong place and
+// jobs-manager's ingestor in Phase 4 silently sees zero rows.
+func TestStreamLayout_TarContents(t *testing.T) {
+	root := imgcDir(t, "a.jpg", "b.png")
+	layout, err := Discover(root)
+	if err != nil {
+		t.Fatalf("Discover: %v", err)
+	}
+
+	fe := &fakeExecutor{}
+	err = StreamLayout(context.Background(), fe, "tracebloc", "pod-x", "stage",
+		layout, "cats_dogs", NoOpProgress{})
+	if err != nil {
+		t.Fatalf("StreamLayout: %v", err)
+	}
+
+	// Parse the captured tar back out and collect (name, size).
+	type entry struct {
+		name string
+		size int64
+	}
+	var got []entry
+	tr := tar.NewReader(bytes.NewReader(fe.gotStdin))
+	for {
+		hdr, err := tr.Next()
+		if errors.Is(err, io.EOF) {
+			break
+		}
+		if err != nil {
+			t.Fatalf("reading tar: %v", err)
+		}
+		got = append(got, entry{name: hdr.Name, size: hdr.Size})
+	}
+
+	// Sort for deterministic comparison — the producer walks
+	// images/ in OS order which isn't stable across filesystems.
+	sort.Slice(got, func(i, j int) bool { return got[i].name < got[j].name })
+
+	// Three entries: labels.csv + 2 images.
+	if len(got) != 3 {
+		t.Fatalf("tar entry count = %d, want 3 (labels.csv + 2 images); got=%v", len(got), got)
+	}
+	want := []entry{
+		{name: "images/a.jpg", size: 100}, // imgcDir writes 100-byte images
+		{name: "images/b.png", size: 100},
+		{name: "labels.csv", size: 39}, // "image_id,label\n001.jpg,cat\n002.jpg,dog\n"
+	}
+	for i := range want {
+		if got[i] != want[i] {
+			t.Errorf("entry[%d] = %v, want %v", i, got[i], want[i])
+		}
+	}
+}
+
+// TestStreamLayout_RemoteCommand pins the in-cluster command. If
+// this ever drifts (e.g. someone changes the destination path
+// helper), the tar gets extracted to the wrong PVC subdirectory
+// and Phase 4 silently fails. The mkdir -p is what makes re-push
+// overwrite semantics work.
+func TestStreamLayout_RemoteCommand(t *testing.T) {
+	layout, err := Discover(imgcDir(t))
+	if err != nil {
+		t.Fatalf("Discover: %v", err)
+	}
+	fe := &fakeExecutor{}
+	_ = StreamLayout(context.Background(), fe, "tracebloc", "pod-x", "stage",
+		layout, "my_table", NoOpProgress{})
+
+	if got, want := len(fe.gotCmd), 3; got != want {
+		t.Fatalf("len(cmd) = %d, want %d", got, want)
+	}
+	if fe.gotCmd[0] != "/bin/sh" || fe.gotCmd[1] != "-c" {
+		t.Errorf("cmd[:2] = %v, want [/bin/sh -c]", fe.gotCmd[:2])
+	}
+	script := fe.gotCmd[2]
+	// The remote command must be:
+	//   1. HERMETIC (Bugbot r5): old files don't survive a re-push
+	//   2. TRANSACTIONAL (Bugbot r7): tar failure preserves prev
+	//      dataset
+	//   3. RACE-SAFE under parallel-push (Bugbot r8): unique
+	//      .staging-<hex> suffix per invocation so concurrent pushes
+	//      don't interleave each other's tar extraction
+	//
+	// Pattern: extract to <dest>.staging-<hex>, then on tar SUCCESS
+	// rm the old dest + mv staging → dest. The order matters:
+	//
+	//   - tar BEFORE any rm of $DEST  (preserves on tar failure)
+	//   - mv AFTER tar succeeds       (atomic-ish swap)
+	//
+	// Extract the staging path with a regex so the random hex
+	// suffix doesn't pin us to a specific invocation's bytes.
+	stagingRE := regexp.MustCompile(`/data/shared/my_table\.staging-[0-9a-f]{8}`)
+	stagingPaths := stagingRE.FindAllString(script, -1)
+	if len(stagingPaths) == 0 {
+		t.Fatalf("remote script has no /data/shared/my_table.staging-<8hex> path (race-safety regression):\n%s", script)
+	}
+	// All staging mentions must refer to the SAME suffix in a single
+	// invocation. If we see two distinct suffixes that's a bug:
+	// rm/mkdir/tar/mv would target different dirs.
+	for i := 1; i < len(stagingPaths); i++ {
+		if stagingPaths[i] != stagingPaths[0] {
+			t.Errorf("remote script mixes staging suffixes %q and %q:\n%s",
+				stagingPaths[0], stagingPaths[i], script)
+		}
+	}
+	staging := stagingPaths[0]
+
+	// Pin the four key operations.
+	for _, want := range []string{
+		`mkdir -p "` + staging + `"`,
+		`tar -xf - -C "` + staging + `"`,
+		`mv "` + staging + `" "/data/shared/my_table"`,
+	} {
+		if !strings.Contains(script, want) {
+			t.Errorf("remote script missing %q: %s", want, script)
+		}
+	}
+	// Transactional property: tar must come BEFORE any mutation
+	// of the real destination. r5+r7 used `rm $DEST` for the
+	// pre-mv step; r10 swapped that to `mv $DEST $DEST.old-<hex>`
+	// (backup-and-swap, so a mv failure can be rolled back).
+	// The contract is the same: tar runs while $DEST is intact.
+	tarIdx := strings.Index(script, `tar -xf - -C "`+staging+`"`)
+	destBackupMvIdx := strings.Index(script, `mv "/data/shared/my_table" "`)
+	if tarIdx < 0 || destBackupMvIdx < 0 {
+		t.Fatalf("remote script missing tar or destination-backup mv: %s", script)
+	}
+	if tarIdx >= destBackupMvIdx {
+		t.Errorf("remote script touches $DEST BEFORE tar succeeds — partial-transfer could destroy previous data:\n%s", script)
+	}
+	// Single-segment guarantee: both rm targets must end with
+	// /my_table or /my_table.staging-* — never just /data/shared.
+	if strings.Contains(script, `rm -rf "/data/shared"`) ||
+		strings.Contains(script, `rm -rf "/data/shared/"`) {
+		t.Errorf("remote script rm-rfs the parent /data/shared (would nuke sibling tables):\n%s", script)
+	}
+
+	// Bugbot r9 + r10: orphan cleanup for previously-failed pushes
+	// must catch BOTH .staging-* and .old-* siblings. The .old-*
+	// dirs are created by the round-10 backup-and-rollback swap;
+	// without including them in the find pattern, failed-mid-swap
+	// pushes would leak .old-* dirs.
+	for _, wantName := range []string{`-name "my_table.staging-*"`, `-name "my_table.old-*"`} {
+		if !strings.Contains(script, wantName) {
+			t.Errorf("remote script missing %s in orphan cleanup:\n%s", wantName, script)
+		}
+	}
+	if !strings.Contains(script, `-mmin +60 -exec rm -rf {} +`) {
+		t.Errorf("remote script missing -mmin+60 orphan window:\n%s", script)
+	}
+
+	// Bugbot r10: backup-and-swap pattern. The previous dataset
+	// must be backed up to .old-<hex> BEFORE the new dataset
+	// arrives, and restored if the main mv fails. Pin the key
+	// shape pieces — backup mv, primary mv, rollback mv, cleanup.
+	backupRE := regexp.MustCompile(`/data/shared/my_table\.old-[0-9a-f]{8}`)
+	backupPaths := backupRE.FindAllString(script, -1)
+	if len(backupPaths) == 0 {
+		t.Fatalf("remote script has no .old-<hex> backup path (r10 rollback regression):\n%s", script)
+	}
+	// All .old-<hex> mentions in a single invocation must agree —
+	// otherwise the rollback would target a different path than
+	// the backup created.
+	for i := 1; i < len(backupPaths); i++ {
+		if backupPaths[i] != backupPaths[0] {
+			t.Errorf("remote script mixes backup suffixes %q vs %q:\n%s",
+				backupPaths[0], backupPaths[i], script)
+		}
+	}
+	backup := backupPaths[0]
+	// Backup must use the SAME suffix as staging — different
+	// suffixes would defeat the "find -name ...staging-* ...old-*"
+	// orphan-cleanup symmetry, AND would risk collision with a
+	// concurrent push's .old-<hex>.
+	if strings.TrimPrefix(backup, "/data/shared/my_table.old-") !=
+		strings.TrimPrefix(staging, "/data/shared/my_table.staging-") {
+		t.Errorf("backup and staging suffixes diverge: %q vs %q", backup, staging)
+	}
+	// Backup mv (DEST → .old) must appear BEFORE primary mv
+	// (.staging → DEST), or rollback wouldn't have anything to
+	// restore.
+	backupMvIdx := strings.Index(script, `mv "/data/shared/my_table" "`+backup+`"`)
+	primaryMvIdx := strings.Index(script, `mv "`+staging+`" "/data/shared/my_table"`)
+	if backupMvIdx < 0 || primaryMvIdx < 0 {
+		t.Fatalf("remote script missing backup or primary mv:\n%s", script)
+	}
+	if backupMvIdx >= primaryMvIdx {
+		t.Errorf("backup mv (DEST→.old) must come BEFORE primary mv (.staging→DEST); rollback contract broken:\n%s", script)
+	}
+}
+
+// TestStreamLayout_StagingSuffixIsUniquePerInvocation: two back-to-
+// back StreamLayout calls for the same table MUST produce different
+// staging-dir suffixes. Without this, concurrent `dataset push` runs
+// for the same table would race on the same .staging path and
+// corrupt each other's tar extraction. Bugbot r8 fix.
+func TestStreamLayout_StagingSuffixIsUniquePerInvocation(t *testing.T) {
+	layout, err := Discover(imgcDir(t))
+	if err != nil {
+		t.Fatalf("Discover: %v", err)
+	}
+	stagingRE := regexp.MustCompile(`/data/shared/t\.staging-[0-9a-f]{8}`)
+
+	collect := func() string {
+		fe := &fakeExecutor{}
+		if err := StreamLayout(context.Background(), fe, "tracebloc", "p", "stage",
+			layout, "t", NoOpProgress{}); err != nil {
+			t.Fatalf("StreamLayout: %v", err)
+		}
+		m := stagingRE.FindString(fe.gotCmd[2])
+		if m == "" {
+			t.Fatalf("no staging path in: %s", fe.gotCmd[2])
+		}
+		return m
+	}
+	a, b := collect(), collect()
+	if a == b {
+		t.Errorf("back-to-back StreamLayout produced identical staging path %q; race-safety regression", a)
+	}
+}
+
+// TestStreamLayout_TargetingFromArgs pins that namespace + pod name
+// flow through to the executor unchanged. Catches a refactor that
+// hardcodes the namespace or builds the pod name in the wrong layer.
+func TestStreamLayout_TargetingFromArgs(t *testing.T) {
+	layout, err := Discover(imgcDir(t))
+	if err != nil {
+		t.Fatalf("Discover: %v", err)
+	}
+	fe := &fakeExecutor{}
+	_ = StreamLayout(context.Background(), fe, "custom-ns", "weird-pod-name", "container-x",
+		layout, "t", NoOpProgress{})
+	if fe.gotNS != "custom-ns" {
+		t.Errorf("namespace = %q, want custom-ns", fe.gotNS)
+	}
+	if fe.gotPod != "weird-pod-name" {
+		t.Errorf("pod = %q, want weird-pod-name", fe.gotPod)
+	}
+	if fe.gotContainer != "container-x" {
+		t.Errorf("container = %q, want container-x", fe.gotContainer)
+	}
+}
+
+// TestStreamLayout_ExecErrorWithRemoteStderr: when the in-cluster
+// tar fails (e.g. read-only FS, no-space), the remote stderr must
+// surface in the customer-facing error. This is the actionable
+// signal — without it, customers see "exec stream failed" with no
+// way to diagnose what went wrong server-side.
+func TestStreamLayout_ExecErrorWithRemoteStderr(t *testing.T) {
+	layout, err := Discover(imgcDir(t))
+	if err != nil {
+		t.Fatalf("Discover: %v", err)
+	}
+	fe := &fakeExecutor{
+		stderrToReturn:    []byte("tar: write error: No space left on device"),
+		errToReturn:       errors.New("command terminated with exit code 1"),
+		drainBeforeReturn: true,
+	}
+	err = StreamLayout(context.Background(), fe, "tracebloc", "p", "stage",
+		layout, "t", NoOpProgress{})
+	if err == nil {
+		t.Fatal("StreamLayout returned nil on exec error")
+	}
+	for _, want := range []string{"streaming files", "No space left on device", "exit code 1"} {
+		if !strings.Contains(err.Error(), want) {
+			t.Errorf("error missing %q: %v", want, err)
+		}
+	}
+}
+
+// TestStreamLayout_ProgressBytes: the Progress sink receives byte
+// counts during the stream. This is the contract the schollz bar
+// keys off — if the wrapper ever stops calling Add(), the bar would
+// freeze at 0%.
+func TestStreamLayout_ProgressBytes(t *testing.T) {
+	layout, err := Discover(imgcDir(t))
+	if err != nil {
+		t.Fatalf("Discover: %v", err)
+	}
+	fp := &fakeProgress{}
+	err = StreamLayout(context.Background(), &fakeExecutor{}, "tracebloc", "p", "stage",
+		layout, "t", fp)
+	if err != nil {
+		t.Fatalf("StreamLayout: %v", err)
+	}
+	// At minimum we expect the file bodies (200 + 39 = 239) plus
+	// tar headers (~512 per file = ~1536 bytes overhead minimum).
+	// Assert "some bytes flowed" rather than an exact count
+	// because the tar overhead is implementation-defined.
+	if fp.added < 239 {
+		t.Errorf("progress.Add total = %d, want at least 239 (file body bytes)", fp.added)
+	}
+	if !fp.finished {
+		t.Errorf("progress.Finish() never called")
+	}
+}
+
+// TestWriteLayoutTar_LabelsCSVFirst: ordering matters for fail-fast
+// diagnostic — labels.csv at the start means a corrupt tar is more
+// likely to surface before we've shipped a hundred image bytes.
+func TestWriteLayoutTar_LabelsCSVFirst(t *testing.T) {
+	layout, err := Discover(imgcDir(t))
+	if err != nil {
+		t.Fatalf("Discover: %v", err)
+	}
+	var buf bytes.Buffer
+	if err := writeLayoutTar(&buf, layout); err != nil {
+		t.Fatalf("writeLayoutTar: %v", err)
+	}
+	tr := tar.NewReader(&buf)
+	hdr, err := tr.Next()
+	if err != nil {
+		t.Fatalf("reading first entry: %v", err)
+	}
+	if hdr.Name != "labels.csv" {
+		t.Errorf("first entry name = %q, want labels.csv (ordering pins this)", hdr.Name)
+	}
+}
+
+// fakeProgress: a Progress that records total bytes added and
+// whether Finish was called.
+type fakeProgress struct {
+	added    int
+	finished bool
+}
+
+func (p *fakeProgress) Add(n int) { p.added += n }
+func (p *fakeProgress) Finish()   { p.finished = true }
diff --git a/internal/push/walk.go b/internal/push/walk.go
index dd95571..bb7b15f 100644
--- a/internal/push/walk.go
+++ b/internal/push/walk.go
@@ -106,9 +106,17 @@ func Discover(rootDir string) (*LocalLayout, error) {
 
 	layout := &LocalLayout{Root: abs}
 
-	// labels.csv (required).
+	// labels.csv (required). Use Lstat — NOT Stat — so a symlink
+	// shows up as a symlink (mode includes ModeSymlink) rather
+	// than being silently followed. v0.1 rejects symlinks entirely
+	// (see rejectSymlink); without Lstat the size cap below would
+	// see the symlink's own ~100-byte size while writeTarFile
+	// (which uses os.Stat → follows symlinks) would happily stream
+	// the target's full contents — a size-cap bypass and an
+	// arbitrary-local-file disclosure to the cluster PVC. Bugbot
+	// flagged this as Medium-severity security on PR-b round 4.
 	labelsPath := filepath.Join(abs, "labels.csv")
-	labelsStat, err := os.Stat(labelsPath)
+	labelsStat, err := os.Lstat(labelsPath)
 	if err != nil {
 		if errors.Is(err, os.ErrNotExist) {
 			return nil, fmt.Errorf(
@@ -120,6 +128,9 @@ func Discover(rootDir string) (*LocalLayout, error) {
 		}
 		return nil, fmt.Errorf("stat labels.csv: %w", err)
 	}
+	if err := rejectSymlink(labelsStat, "labels.csv"); err != nil {
+		return nil, err
+	}
 	if labelsStat.IsDir() {
 		// A directory literally named "labels.csv" passes the
 		// os.Stat above — without this check the pre-flight would
@@ -138,9 +149,14 @@ func Discover(rootDir string) (*LocalLayout, error) {
 	layout.TotalBytes += labelsStat.Size()
 
 	// images/ subdir (required, must contain at least one
-	// image-extension file).
+	// image-extension file). Use os.Lstat — NOT Stat — so a
+	// symlinked-directory case (e.g. `ln -s /etc images`) gets
+	// caught here, not silently followed into the linked path.
+	// Without Lstat, the symlink-image fixes from the previous
+	// commit don't matter: the whole directory could be a link.
+	// Bugbot flagged the missing dir-level Lstat on PR-b round 5.
 	imagesDir := filepath.Join(abs, "images")
-	imagesStat, err := os.Stat(imagesDir)
+	imagesStat, err := os.Lstat(imagesDir)
 	if err != nil {
 		if errors.Is(err, os.ErrNotExist) {
 			return nil, fmt.Errorf(
@@ -150,6 +166,9 @@ func Discover(rootDir string) (*LocalLayout, error) {
 		}
 		return nil, fmt.Errorf("stat images/: %w", err)
 	}
+	if err := rejectSymlink(imagesStat, "images"); err != nil {
+		return nil, err
+	}
 	if !imagesStat.IsDir() {
 		return nil, fmt.Errorf("%q exists but is not a directory", imagesDir)
 	}
@@ -175,10 +194,19 @@ func Discover(rootDir string) (*LocalLayout, error) {
 		if _, ok := imageExtensions[ext]; !ok {
 			continue
 		}
+		// entry.Info() returns Lstat-like metadata for the
+		// directory entry (the symlink's own mode if it's a
+		// symlink, not the target's). That's exactly what we
+		// want here — combined with rejectSymlink it closes the
+		// symlink-bypass-size-caps hole Bugbot flagged on PR-b
+		// round 4.
 		info, err := entry.Info()
 		if err != nil {
 			return nil, fmt.Errorf("stat %q: %w", entry.Name(), err)
 		}
+		if err := rejectSymlink(info, filepath.Join("images", entry.Name())); err != nil {
+			return nil, err
+		}
 		if info.Size() > MaxSingleFileBytes {
 			return nil, sizeError(filepath.Join("images", entry.Name()),
 				info.Size(), MaxSingleFileBytes)
@@ -207,6 +235,54 @@ func Discover(rootDir string) (*LocalLayout, error) {
 	return layout, nil
 }
 
+// rejectSymlink returns a non-nil error if info describes a symlink.
+// v0.1 refuses symlinks under <root>/{labels.csv, images, images/*}
+// entirely because:
+//
+//   - SECURITY: writeTarFile uses os.Open (which follows symlinks).
+//     Discover sized entries via DirEntry.Info() (which does not).
+//     A symlink whose target is a multi-GB local file would pass
+//     Discover's size cap (the symlink itself is ~100 bytes) yet
+//     stream the target's full contents to the cluster PVC — a
+//     size-cap bypass and arbitrary-local-file disclosure to the
+//     cluster admin. Bugbot caught this on PR-b round 4.
+//   - UX: legitimate image_classification datasets don't use
+//     symlinks. A clear "symlinks not supported" error is better
+//     than the alternative fixes (resolve + re-stat the target,
+//     blanket Stat() everywhere) — both of which would let the
+//     security hole creep back in on a future refactor.
+//
+// Customers with symlink-heavy layouts (rare; usually means their
+// data lives on another filesystem) can `cp -L` to materialize
+// the files before pushing.
+//
+// Known limitation: HARD LINKS are NOT rejected. The filesystem
+// doesn't expose a Mode bit for hard links the way ModeSymlink
+// distinguishes symlinks, and a high-Nlink check has too many
+// false positives (legitimate hard-linked datasets where the
+// dataset dir is the only entry point). The implicit security
+// boundary is the CLI's process-level read permissions: a
+// customer can only hard-link files they already have read
+// access to, so the cluster admin reading a hard-linked
+// /etc/shadow via the PVC isn't a privilege escalation — they'd
+// need the CLI to be running as root for that to be exploitable,
+// and the docs already recommend running as a low-privileged
+// user. v0.2 may add openat(O_NOFOLLOW)-based sandboxing if a
+// real customer needs harder isolation; tracked alongside the
+// cloud-source story (#147 non-goals).
+func rejectSymlink(info os.FileInfo, relPath string) error {
+	if info.Mode()&os.ModeSymlink == 0 {
+		return nil
+	}
+	return fmt.Errorf(
+		"%q is a symbolic link, which v0.1 does not allow in the dataset "+
+			"layout (security: a symlink could escape the dataset tree or "+
+			"bypass size caps). Materialize the link target (e.g. `cp -L`) "+
+			"and re-run, or wait for v0.2's cloud-source story if the data "+
+			"lives elsewhere.",
+		relPath)
+}
+
 // sizeError builds the over-the-single-file-cap error with the same
 // human-readable framing as the total-cap branch above. Centralized
 // so the message stays consistent if we tune the wording later.
diff --git a/internal/push/walk_test.go b/internal/push/walk_test.go
index 8095415..441400c 100644
--- a/internal/push/walk_test.go
+++ b/internal/push/walk_test.go
@@ -3,6 +3,7 @@ package push
 import (
 	"os"
 	"path/filepath"
+	"runtime"
 	"strings"
 	"testing"
 )
@@ -191,6 +192,124 @@ func TestDiscover_LabelsCSVIsDirectory(t *testing.T) {
 	}
 }
 
+// TestDiscover_RejectsSymlinkedImage is the security regression pin
+// for the Bugbot-Medium finding on PR-b round 4. A symlink in
+// images/ whose target points outside the dataset tree (or at a
+// big local file) used to pass Discover's size cap (because
+// DirEntry.Info() returns the link's own ~100-byte size) yet
+// stream the target's full contents into the cluster PVC via
+// writeTarFile's os.Stat+os.Open path. That's both a size-cap
+// bypass AND an arbitrary-local-file disclosure to the cluster
+// admin. v0.1 rejects symlinks outright.
+func TestDiscover_RejectsSymlinkedImage(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("symlinks require admin privileges on Windows")
+	}
+	root := t.TempDir()
+	if err := os.WriteFile(filepath.Join(root, "labels.csv"),
+		[]byte("image_id,label\n001.jpg,cat\n"), 0o644); err != nil {
+		t.Fatalf("write labels.csv: %v", err)
+	}
+	imagesDir := filepath.Join(root, "images")
+	if err := os.MkdirAll(imagesDir, 0o755); err != nil {
+		t.Fatalf("mkdir images: %v", err)
+	}
+	// Create a real image file outside the dataset tree to be the
+	// symlink target. The symlink inside images/ points at it.
+	target := filepath.Join(t.TempDir(), "outside.jpg")
+	if err := os.WriteFile(target, make([]byte, 100), 0o644); err != nil {
+		t.Fatalf("write outside target: %v", err)
+	}
+	link := filepath.Join(imagesDir, "evil.jpg")
+	if err := os.Symlink(target, link); err != nil {
+		t.Fatalf("symlink: %v", err)
+	}
+
+	_, err := Discover(root)
+	if err == nil {
+		t.Fatal("Discover accepted a symlinked image — security regression; v0.1 must reject")
+	}
+	for _, want := range []string{"symbolic link", "security", "cp -L"} {
+		if !strings.Contains(err.Error(), want) {
+			t.Errorf("error missing %q in: %v", want, err)
+		}
+	}
+}
+
+// TestDiscover_RejectsSymlinkedImagesDir is the round-5 follow-up
+// to the symlinked-FILE fix from round 4. The DIRECTORY itself
+// could also be a symlink (e.g. `ln -s /etc images`), which the
+// previous fix didn't catch because it only Lstat'd the entries
+// INSIDE images/. Bugbot flagged it; this test pins the
+// dir-level Lstat.
+func TestDiscover_RejectsSymlinkedImagesDir(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("symlinks require admin privileges on Windows")
+	}
+	root := t.TempDir()
+	if err := os.WriteFile(filepath.Join(root, "labels.csv"),
+		[]byte("image_id,label\n001.jpg,cat\n"), 0o644); err != nil {
+		t.Fatalf("write labels.csv: %v", err)
+	}
+	// Create a real images dir somewhere ELSE that the symlink
+	// will point at. The symlink in `root` is what should be
+	// rejected.
+	realDir := filepath.Join(t.TempDir(), "real-images")
+	if err := os.MkdirAll(realDir, 0o755); err != nil {
+		t.Fatalf("mkdir real-images: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(realDir, "a.jpg"),
+		make([]byte, 100), 0o644); err != nil {
+		t.Fatalf("write img: %v", err)
+	}
+	if err := os.Symlink(realDir, filepath.Join(root, "images")); err != nil {
+		t.Fatalf("symlink images: %v", err)
+	}
+
+	_, err := Discover(root)
+	if err == nil {
+		t.Fatal("Discover accepted a symlinked images/ directory — security regression")
+	}
+	if !strings.Contains(err.Error(), "symbolic link") {
+		t.Errorf("error missing symbolic-link rejection: %v", err)
+	}
+}
+
+// TestDiscover_RejectsSymlinkedLabelsCSV: same hole, different
+// file. The labels.csv path can also be a symlink (e.g. pointing
+// at a huge local file). Lstat + reject covers both cases
+// symmetrically.
+func TestDiscover_RejectsSymlinkedLabelsCSV(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("symlinks require admin privileges on Windows")
+	}
+	root := t.TempDir()
+	target := filepath.Join(t.TempDir(), "outside-labels.csv")
+	if err := os.WriteFile(target, []byte("image_id,label\n001.jpg,cat\n"), 0o644); err != nil {
+		t.Fatalf("write outside target: %v", err)
+	}
+	link := filepath.Join(root, "labels.csv")
+	if err := os.Symlink(target, link); err != nil {
+		t.Fatalf("symlink: %v", err)
+	}
+	imagesDir := filepath.Join(root, "images")
+	if err := os.MkdirAll(imagesDir, 0o755); err != nil {
+		t.Fatalf("mkdir images: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(imagesDir, "a.jpg"),
+		make([]byte, 100), 0o644); err != nil {
+		t.Fatalf("write img: %v", err)
+	}
+
+	_, err := Discover(root)
+	if err == nil {
+		t.Fatal("Discover accepted a symlinked labels.csv — security regression")
+	}
+	if !strings.Contains(err.Error(), "symbolic link") {
+		t.Errorf("error missing symbolic-link rejection: %v", err)
+	}
+}
+
 func TestDiscover_NotADirectory(t *testing.T) {
 	// Customer passes a path to a single file instead of a dir.
 	// This is a common autocomplete-mistake (tab-completing