tracebloc · aptracebloc · Jun 4, 2026 · Jun 4, 2026 · Jun 4, 2026
diff --git a/internal/cli/coverage_test.go b/internal/cli/coverage_test.go
@@ -50,7 +50,8 @@ func TestPrintPushPreflight_RendersKeyFacts(t *testing.T) {
 
 	var buf bytes.Buffer
 	p := ui.New(&buf, ui.WithColor(false))
-	printPushPreflight(p, layout, release, pvc, spec, false)
+	printLocalSummary(p, layout, spec)
+	printClusterSummary(p, release, pvc)
 	out := buf.String()
 
 	for _, want := range []string{

diff --git a/internal/cli/dataset.go b/internal/cli/dataset.go
@@ -341,6 +341,19 @@ func expandHome(path string) string {
 // a bad label-column or oversized dataset gets the diagnostic in
 // milliseconds without a kubeconfig round-trip.
 func runDatasetPush(ctx context.Context, out, errOut io.Writer, a runDatasetPushArgs) error {
+	// Intro header: brand + a plain-English explainer of what a push
+	// does, so a first-time user understands it before any prompts.
+	// Routed through a.Printer, so --output-json keeps it on stderr and
+	// --plain/non-TTY degrade cleanly. (#31)
+	a.Printer.Banner("tracebloc", "dataset push")
+	a.Printer.Para(strings.TrimSpace(`
+This uploads a dataset from your machine into your tracebloc workspace so models
+can be trained on it. Your files are sent to the Kubernetes cluster your
+workspace was installed on — tracebloc checks them and loads them into a table
+your training runs read from. Your data stays on that cluster the whole time;
+contributors train against it without ever seeing the raw files.`))
+	a.Printer.Hintf("Learn more: https://docs.tracebloc.io")
+
 	// 0. Guided mode: prompt for any missing core inputs before
 	//    validation. Flags already provided win; non-TTY / --no-input
 	//    leaves Prompter nil and skips straight to the flag-only path.
@@ -436,6 +449,9 @@ func runDatasetPush(ctx context.Context, out, errOut io.Writer, a runDatasetPush
 		return &exitError{code: 3, err: err}
 	}
 
+	a.Printer.Step(1, 4, "Check your dataset")
+	a.Printer.Hintf("Reading your files locally first — nothing has touched the cluster yet — so a layout or settings problem shows up right away.")
+
 	// 3a. Per-category spec resolution from the local data, so the
 	//     synthesized spec carries the right fields before validation.
 	switch {
@@ -539,10 +555,14 @@ func runDatasetPush(ctx context.Context, out, errOut io.Writer, a runDatasetPush
 		return &exitError{code: 2, err: errors.New("synthesized spec failed schema validation; check the flag values above")}
 	}
 
+	printLocalSummary(a.Printer, layout, spec)
+
 	// 5. Cluster discovery — same kubeconfig path as `cluster info`.
 	//    Errors mirror that command's exit-code contract (3 for
 	//    kubeconfig, 4 for missing release) so behaviour is
 	//    consistent across pre-flight commands.
+	a.Printer.Step(2, 4, "Connect to your workspace's cluster")
+	a.Printer.Hintf("Using your kubeconfig to find the tracebloc release in your workspace and the shared storage your dataset will live on.")
 	resolved, err := cluster.Load(cluster.KubeconfigOptions{
 		Path:      a.Kubeconfig,
 		Context:   a.Context,
@@ -571,16 +591,15 @@ func runDatasetPush(ctx context.Context, out, errOut io.Writer, a runDatasetPush
 		return &exitError{code: 4, err: err}
 	}
 
-	// 7. Print the pre-flight summary. The output is the same in
-	//    dry-run and live mode — only the "what happens next" line
-	//    differs. Customers iterating on a bad layout see this
-	//    every attempt, so it's worth keeping skimmable: one fact
-	//    per line, aligned by column.
-	printPushPreflight(a.Printer, layout, release, pvc, spec, a.DryRun)
+	// 7. Show what we found on the cluster — the customer's last look
+	//    before any bytes move.
+	printClusterSummary(a.Printer, release, pvc)
 
-	// 8. Dry-run stop. Acknowledged success.
+	// 8. Dry-run stop. Acknowledged success, plus a reminder of the
+	//    live-only steps (stage + ingest) the customer just skipped.
 	if a.DryRun {
-		_, _ = fmt.Fprintln(out, "Dry-run complete — no cluster resources were created.")
+		a.Printer.Successf("Dry-run complete — your dataset and cluster check out; nothing was created.")
+		a.Printer.Hintf("A real run continues with step 3 (stage your files) and step 4 (run the ingestion).")
 		if a.OutputJSON {
 			writePushJSON(a.JSONOut, "dry-run", spec, nil, "", "")
 		}
@@ -595,6 +614,8 @@ func runDatasetPush(ctx context.Context, out, errOut io.Writer, a runDatasetPush
 	//    Exit code 7 ("staging failed") is distinct from the
 	//    pre-flight codes so customers can branch on whether the
 	//    failure was their environment vs the actual data transfer.
+	a.Printer.Step(3, 4, "Stage your files")
+	a.Printer.Hintf("A short-lived helper pod mounts the shared storage and your files stream into it — like `kubectl cp`, but set up and cleaned up for you.")
 	progress := push.NewProgress(out, layout.TotalBytes,
 		fmt.Sprintf("Staging %s", a.Spec.Table))
 	// Defer Finish so a failure path that returns BEFORE
@@ -630,7 +651,8 @@ func runDatasetPush(ctx context.Context, out, errOut io.Writer, a runDatasetPush
 	//     min) because the full Phase 4 lifecycle — submit + watch
 	//     + log stream — can run that long for large ingestions.
 	//     The chart's helm flow uses the same token-mint code path.
-	_, _ = fmt.Fprintln(out)
+	a.Printer.Step(4, 4, "Run the ingestion")
+	a.Printer.Hintf("Submitting the run to your workspace, then watching as it validates your data and loads it into the table — progress streams below.")
 	tok, err := cluster.MintIngestorToken(ctx, cs, resolved.Namespace,
 		release.IngestorSAName, 3600, nil)
 	if err != nil {
@@ -750,19 +772,11 @@ func classifyPushOutcome(res *submit.Result, err error) (string, *exitError) {
 	return "unknown", nil
 }
 
-// printPushPreflight is the customer-facing summary. Mirrors
-// `cluster info`'s layout for consistency: section header,
-// indented key:value rows. Kept here (not on the layout/release/pvc
-// types) because the formatting is policy and lives with the CLI,
-// not the data.
-func printPushPreflight(
-	p *ui.Printer,
-	layout *push.LocalLayout,
-	release *cluster.ParentRelease,
-	pvc *cluster.SharedPVC,
-	spec map[string]any,
-	dryRun bool,
-) {
+// printLocalSummary shows what the CLI found on disk plus the ingest
+// settings it assembled — the detail under step 1 ("Check your
+// dataset"). Split from the cluster summary so each sits under its own
+// numbered step. Mirrors `cluster info`'s section/Field layout.
+func printLocalSummary(p *ui.Printer, layout *push.LocalLayout, spec map[string]any) {
 	cat, _ := spec["category"].(string)
 
 	p.Section("Local dataset")
@@ -789,17 +803,7 @@ func printPushPreflight(
 	}
 	p.Field("total size", push.HumanBytes(layout.TotalBytes))
 
-	p.Section("Target cluster")
-	p.Field("release", fmt.Sprintf("%s (chart %s)", release.ReleaseName, release.ChartVersion))
-	p.Field("jobs-manager", release.JobsManagerService)
-	p.Field("shared PVC", fmt.Sprintf("%s (%s)", pvc.ClaimName, pvc.Phase))
-	if !pvc.IsReadWriteMany() {
-		// Warn but don't block — RWO clusters still work; the scheduler
-		// co-locates the stage Pod with the existing mounter.
-		p.Warnf("PVC is %v, not ReadWriteMany — the stage Pod will co-locate with the existing mounter", pvc.AccessModes)
-	}
-
-	p.Section("Synthesized ingest spec")
+	p.Section("Ingest settings")
 	p.Field("table", fmt.Sprintf("%v", spec["table"]))
 	p.Field("category", fmt.Sprintf("%v", spec["category"]))
 	p.Field("intent", fmt.Sprintf("%v", spec["intent"]))
@@ -813,10 +817,19 @@ func printPushPreflight(
 		p.Field("time column", tc)
 	}
 	p.Field("destination", push.FinalDestPrefix(spec["table"].(string)))
+}
 
-	if !dryRun {
-		p.Infof("Next: stage %d files (%s) for table %q",
-			layout.FileCount(), push.HumanBytes(layout.TotalBytes), spec["table"])
+// printClusterSummary shows the discovered workspace cluster target —
+// the detail under step 2 ("Connect to your workspace's cluster").
+func printClusterSummary(p *ui.Printer, release *cluster.ParentRelease, pvc *cluster.SharedPVC) {
+	p.Section("Target cluster")
+	p.Field("release", fmt.Sprintf("%s (chart %s)", release.ReleaseName, release.ChartVersion))
+	p.Field("jobs-manager", release.JobsManagerService)
+	p.Field("shared PVC", fmt.Sprintf("%s (%s)", pvc.ClaimName, pvc.Phase))
+	if !pvc.IsReadWriteMany() {
+		// Warn but don't block — RWO clusters still work; the scheduler
+		// co-locates the stage Pod with the existing mounter.
+		p.Warnf("PVC is %v, not ReadWriteMany — the stage Pod will co-locate with the existing mounter", pvc.AccessModes)
 	}
 }
 

diff --git a/internal/cli/dataset_rm.go b/internal/cli/dataset_rm.go
@@ -145,7 +145,7 @@ func runDatasetRm(ctx context.Context, a runDatasetRmArgs) error {
 	for _, path := range plan.PVCPaths {
 		p.Field("pvc path", path)
 	}
-	p.Warnf("Destructive and cannot be undone. The central backend catalog entry is NOT removed (tracebloc/cli#39).")
+	p.Warnf("Destructive and cannot be undone.")
 
 	// 5. Dry-run stop.
 	if a.DryRun {

diff --git a/internal/cli/interactive.go b/internal/cli/interactive.go
@@ -124,6 +124,7 @@ func runInteractive(p *ui.Printer, pr prompter, a *runDatasetPushArgs, categoryS
 	prompted := false
 
 	if a.LocalPath == "" {
+		p.PromptHint("The folder holding your dataset — a single .csv for tabular, or labels.csv + an images/ folder for images.  e.g. ~/datasets/churn")
 		ans, err := pr.Input("Path to your dataset directory", "e.g. ./my-data", "", nil)
 		if err != nil {
 			return err
@@ -133,6 +134,7 @@ func runInteractive(p *ui.Printer, pr prompter, a *runDatasetPushArgs, categoryS
 	}
 
 	if !categorySet {
+		p.PromptHint("What kind of task your data is for — this drives how it's validated and loaded.")
 		ans, err := pr.Select("Task category", "what kind of data this is",
 			promptCategories, a.Spec.Category)
 		if err != nil {
@@ -143,6 +145,7 @@ func runInteractive(p *ui.Printer, pr prompter, a *runDatasetPushArgs, categoryS
 	}
 
 	if a.Spec.Table == "" {
+		p.PromptHint("Names the table created on the cluster (and its folder on the shared storage). Letters, digits, underscores only.  e.g. churn_train")
 		ans, err := pr.Input("Destination table name",
 			"MySQL identifier + PVC subdir; letters, digits, underscore only", "",
 			push.ValidateTableName)
@@ -154,6 +157,7 @@ func runInteractive(p *ui.Printer, pr prompter, a *runDatasetPushArgs, categoryS
 	}
 
 	if a.Spec.Intent == "" {
+		p.PromptHint("Whether this split is used to train the model or to evaluate it.")
 		ans, err := pr.Select("Intent", "which split this data is",
 			[]string{"train", "test"}, "train")
 		if err != nil {
@@ -165,6 +169,7 @@ func runInteractive(p *ui.Printer, pr prompter, a *runDatasetPushArgs, categoryS
 
 	// masked_language_modeling is self-supervised — no label column.
 	if a.Spec.LabelColumn == "" && a.Spec.Category != "masked_language_modeling" {
+		p.PromptHint("The column in your CSV holding the value to predict (the target).  e.g. label, target, churned")
 		ans, err := pr.Input("Label column",
 			"the column in labels.csv that holds the label", "label", nil)
 		if err != nil {
@@ -174,7 +179,7 @@ func runInteractive(p *ui.Printer, pr prompter, a *runDatasetPushArgs, categoryS
 		prompted = true
 	}
 
-	cp, err := promptCategorySpecific(pr, a)
+	cp, err := promptCategorySpecific(p, pr, a)
 	if err != nil {
 		return err
 	}
@@ -198,12 +203,13 @@ func runInteractive(p *ui.Printer, pr prompter, a *runDatasetPushArgs, categoryS
 // promptCategorySpecific prompts for the inputs a particular category
 // needs beyond the core fields, filling only the gaps. Returns whether
 // it prompted anything (so the caller knows to show the confirm).
-func promptCategorySpecific(pr prompter, a *runDatasetPushArgs) (bool, error) {
+func promptCategorySpecific(p *ui.Printer, pr prompter, a *runDatasetPushArgs) (bool, error) {
 	cat := a.Spec.Category
 	prompted := false
 	switch {
 	case push.IsImage(cat):
 		if cat == "keypoint_detection" && a.Spec.NumberOfKeypoints <= 0 {
+			p.PromptHint("How many keypoints each sample is annotated with — dataset-specific, no default.  e.g. 17 for COCO human pose")
 			ans, err := pr.Input("Number of keypoints per sample",
 				"e.g. 17 for COCO pose", "", validatePositiveInt)
 			if err != nil {
@@ -214,6 +220,7 @@ func promptCategorySpecific(pr prompter, a *runDatasetPushArgs) (bool, error) {
 			prompted = true
 		}
 		if a.TargetSizeFlag == "" {
+			p.PromptHint("All images must share one resolution; the ingestor checks it (it won't resize). Blank = auto-detect from the first image.  e.g. 224x224")
 			ans, err := pr.Input("Image resolution as WxH (blank = auto-detect from the first image)",
 				"all images must share it; the ingestor validates, it doesn't resize", "",
 				validateOptionalTargetSize)
@@ -225,6 +232,7 @@ func promptCategorySpecific(pr prompter, a *runDatasetPushArgs) (bool, error) {
 		}
 	case push.IsTabular(cat):
 		if a.SchemaFlag == "" {
+			p.PromptHint("Override the column types the CLI would infer. Blank = infer from the CSV.  e.g. age:INT,price:FLOAT,city:VARCHAR")
 			ans, err := pr.Input("Column schema as col:TYPE,... (blank = infer from the CSV)",
 				"e.g. age:INT,price:FLOAT", "", validateOptionalSchema)
 			if err != nil {
@@ -234,6 +242,7 @@ func promptCategorySpecific(pr prompter, a *runDatasetPushArgs) (bool, error) {
 			prompted = true
 		}
 		if push.IsRegressionClass(cat) && a.Spec.LabelPolicy == "" {
+			p.PromptHint("Regression targets are continuous. 'bucket' groups them into ranges before they leave the cluster; 'passthrough' keeps raw values.")
 			ans, err := pr.Select("Label policy",
 				"bucket bins the target before it leaves the cluster",
 				[]string{"bucket", "passthrough"}, "bucket")
@@ -244,6 +253,7 @@ func promptCategorySpecific(pr prompter, a *runDatasetPushArgs) (bool, error) {
 			prompted = true
 		}
 		if cat == "time_to_event_prediction" && a.Spec.TimeColumn == "" {
+			p.PromptHint("The column holding the duration / time-to-event.  e.g. time, tenure_days")
 			ans, err := pr.Input("Time column", "the duration/time column name", "time", nil)
 			if err != nil {
 				return prompted, err

diff --git a/internal/cli/interactive_test.go b/internal/cli/interactive_test.go
@@ -3,6 +3,7 @@ package cli
 import (
 	"bytes"
 	"errors"
+	"strings"
 	"testing"
 
 	"github.com/tracebloc/cli/internal/push"
@@ -82,6 +83,35 @@ func TestRunInteractive_FillsAllWhenEmpty(t *testing.T) {
 	}
 }
 
+// TestRunInteractive_ShowsExampleHints: each input prompt is preceded
+// by a visible hint with an example, so the guided flow teaches as it
+// goes. Drives runInteractive with a real (buffer-backed) Printer and
+// asserts the example text lands in the output.
+func TestRunInteractive_ShowsExampleHints(t *testing.T) {
+	f := &fakePrompter{answers: map[string]string{
+		"Path to your dataset directory": "./d",
+		"Destination table name":         "churn_train",
+	}}
+	a := &runDatasetPushArgs{Spec: push.SpecArgs{Category: "tabular_regression"}}
+
+	var buf bytes.Buffer
+	p := ui.New(&buf, ui.WithColor(false))
+	if err := runInteractive(p, f, a, true /*categorySet*/); err != nil {
+		t.Fatalf("runInteractive: %v", err)
+	}
+	out := buf.String()
+	for _, want := range []string{
+		"e.g. churn_train",   // table-name example
+		"e.g. label, target", // label-column example
+		"age:INT",            // tabular schema example
+		"keeps raw values",   // label-policy explanation
+	} {
+		if !strings.Contains(out, want) {
+			t.Errorf("interactive output missing hint %q:\n%s", want, out)
+		}
+	}
+}
+
 // TestRunInteractive_SkipsProvidedValues: flags already set (and an
 // explicit --category) mean nothing is prompted.
 func TestRunInteractive_SkipsProvidedValues(t *testing.T) {

diff --git a/internal/ui/ui.go b/internal/ui/ui.go
@@ -14,6 +14,7 @@ import (
 	"fmt"
 	"io"
 	"os"
+	"strings"
 
 	"github.com/fatih/color"
 	"golang.org/x/term"
@@ -108,6 +109,16 @@ func (p *Printer) Banner(title, subtitle string) {
 	p.out("\n")
 }
 
+// Para prints a normal-weight paragraph, each line indented to match
+// Banner/Section bodies. It splits on embedded newlines so multi-line
+// prose keeps the indent. Use for explanatory prose — distinct from
+// Hintf (dim one-liners) and Infof (· bullets).
+func (p *Printer) Para(text string) {
+	for _, line := range strings.Split(text, "\n") {
+		p.out("  %s\n", line)
+	}
+}
+
 // Step prints a major-step header: "Step n/total  label" in bold cyan.
 // Mirrors common.sh step().
 func (p *Printer) Step(n, total int, label string) {
@@ -144,6 +155,14 @@ func (p *Printer) Hintf(format string, a ...any) {
 	p.out("  %s\n", p.paint(fmt.Sprintf(format, a...), color.Faint))
 }
 
+// PromptHint prints guidance for an interactive prompt: a leading blank
+// line for separation, then the hint in cyan so it stands out directly
+// above the prompt. Distinct from Hintf (dim) — prompt guidance is meant
+// to be read, not skimmed past.
+func (p *Printer) PromptHint(format string, a ...any) {
+	p.out("\n  %s\n", p.paint(fmt.Sprintf(format, a...), color.FgCyan))
+}
+
 // PromptHeader prints a bold-white label before a user-input prompt.
 func (p *Printer) PromptHeader(label string) {
 	p.out("\n  %s\n", p.paint(label, color.Bold, color.FgWhite))