diff --git a/internal/cli/coverage_test.go b/internal/cli/coverage_test.go index 8c80760..9269d00 100644 --- a/internal/cli/coverage_test.go +++ b/internal/cli/coverage_test.go @@ -50,7 +50,8 @@ func TestPrintPushPreflight_RendersKeyFacts(t *testing.T) { var buf bytes.Buffer p := ui.New(&buf, ui.WithColor(false)) - printPushPreflight(p, layout, release, pvc, spec, false) + printLocalSummary(p, layout, spec) + printClusterSummary(p, release, pvc) out := buf.String() for _, want := range []string{ diff --git a/internal/cli/dataset.go b/internal/cli/dataset.go index 7019c21..1188cc5 100644 --- a/internal/cli/dataset.go +++ b/internal/cli/dataset.go @@ -341,6 +341,19 @@ func expandHome(path string) string { // a bad label-column or oversized dataset gets the diagnostic in // milliseconds without a kubeconfig round-trip. func runDatasetPush(ctx context.Context, out, errOut io.Writer, a runDatasetPushArgs) error { + // Intro header: brand + a plain-English explainer of what a push + // does, so a first-time user understands it before any prompts. + // Routed through a.Printer, so --output-json keeps it on stderr and + // --plain/non-TTY degrade cleanly. (#31) + a.Printer.Banner("tracebloc", "dataset push") + a.Printer.Para(strings.TrimSpace(` +This uploads a dataset from your machine into your tracebloc workspace so models +can be trained on it. Your files are sent to the Kubernetes cluster your +workspace was installed on — tracebloc checks them and loads them into a table +your training runs read from. Your data stays on that cluster the whole time; +contributors train against it without ever seeing the raw files.`)) + a.Printer.Hintf("Learn more: https://docs.tracebloc.io") + // 0. Guided mode: prompt for any missing core inputs before // validation. Flags already provided win; non-TTY / --no-input // leaves Prompter nil and skips straight to the flag-only path. @@ -436,6 +449,9 @@ func runDatasetPush(ctx context.Context, out, errOut io.Writer, a runDatasetPush return &exitError{code: 3, err: err} } + a.Printer.Step(1, 4, "Check your dataset") + a.Printer.Hintf("Reading your files locally first — nothing has touched the cluster yet — so a layout or settings problem shows up right away.") + // 3a. Per-category spec resolution from the local data, so the // synthesized spec carries the right fields before validation. switch { @@ -539,10 +555,14 @@ func runDatasetPush(ctx context.Context, out, errOut io.Writer, a runDatasetPush return &exitError{code: 2, err: errors.New("synthesized spec failed schema validation; check the flag values above")} } + printLocalSummary(a.Printer, layout, spec) + // 5. Cluster discovery — same kubeconfig path as `cluster info`. // Errors mirror that command's exit-code contract (3 for // kubeconfig, 4 for missing release) so behaviour is // consistent across pre-flight commands. + a.Printer.Step(2, 4, "Connect to your workspace's cluster") + a.Printer.Hintf("Using your kubeconfig to find the tracebloc release in your workspace and the shared storage your dataset will live on.") resolved, err := cluster.Load(cluster.KubeconfigOptions{ Path: a.Kubeconfig, Context: a.Context, @@ -571,16 +591,15 @@ func runDatasetPush(ctx context.Context, out, errOut io.Writer, a runDatasetPush return &exitError{code: 4, err: err} } - // 7. Print the pre-flight summary. The output is the same in - // dry-run and live mode — only the "what happens next" line - // differs. Customers iterating on a bad layout see this - // every attempt, so it's worth keeping skimmable: one fact - // per line, aligned by column. - printPushPreflight(a.Printer, layout, release, pvc, spec, a.DryRun) + // 7. Show what we found on the cluster — the customer's last look + // before any bytes move. + printClusterSummary(a.Printer, release, pvc) - // 8. Dry-run stop. Acknowledged success. + // 8. Dry-run stop. Acknowledged success, plus a reminder of the + // live-only steps (stage + ingest) the customer just skipped. if a.DryRun { - _, _ = fmt.Fprintln(out, "Dry-run complete — no cluster resources were created.") + a.Printer.Successf("Dry-run complete — your dataset and cluster check out; nothing was created.") + a.Printer.Hintf("A real run continues with step 3 (stage your files) and step 4 (run the ingestion).") if a.OutputJSON { writePushJSON(a.JSONOut, "dry-run", spec, nil, "", "") } @@ -595,6 +614,8 @@ func runDatasetPush(ctx context.Context, out, errOut io.Writer, a runDatasetPush // Exit code 7 ("staging failed") is distinct from the // pre-flight codes so customers can branch on whether the // failure was their environment vs the actual data transfer. + a.Printer.Step(3, 4, "Stage your files") + a.Printer.Hintf("A short-lived helper pod mounts the shared storage and your files stream into it — like `kubectl cp`, but set up and cleaned up for you.") progress := push.NewProgress(out, layout.TotalBytes, fmt.Sprintf("Staging %s", a.Spec.Table)) // Defer Finish so a failure path that returns BEFORE @@ -630,7 +651,8 @@ func runDatasetPush(ctx context.Context, out, errOut io.Writer, a runDatasetPush // min) because the full Phase 4 lifecycle — submit + watch // + log stream — can run that long for large ingestions. // The chart's helm flow uses the same token-mint code path. - _, _ = fmt.Fprintln(out) + a.Printer.Step(4, 4, "Run the ingestion") + a.Printer.Hintf("Submitting the run to your workspace, then watching as it validates your data and loads it into the table — progress streams below.") tok, err := cluster.MintIngestorToken(ctx, cs, resolved.Namespace, release.IngestorSAName, 3600, nil) if err != nil { @@ -750,19 +772,11 @@ func classifyPushOutcome(res *submit.Result, err error) (string, *exitError) { return "unknown", nil } -// printPushPreflight is the customer-facing summary. Mirrors -// `cluster info`'s layout for consistency: section header, -// indented key:value rows. Kept here (not on the layout/release/pvc -// types) because the formatting is policy and lives with the CLI, -// not the data. -func printPushPreflight( - p *ui.Printer, - layout *push.LocalLayout, - release *cluster.ParentRelease, - pvc *cluster.SharedPVC, - spec map[string]any, - dryRun bool, -) { +// printLocalSummary shows what the CLI found on disk plus the ingest +// settings it assembled — the detail under step 1 ("Check your +// dataset"). Split from the cluster summary so each sits under its own +// numbered step. Mirrors `cluster info`'s section/Field layout. +func printLocalSummary(p *ui.Printer, layout *push.LocalLayout, spec map[string]any) { cat, _ := spec["category"].(string) p.Section("Local dataset") @@ -789,17 +803,7 @@ func printPushPreflight( } p.Field("total size", push.HumanBytes(layout.TotalBytes)) - p.Section("Target cluster") - p.Field("release", fmt.Sprintf("%s (chart %s)", release.ReleaseName, release.ChartVersion)) - p.Field("jobs-manager", release.JobsManagerService) - p.Field("shared PVC", fmt.Sprintf("%s (%s)", pvc.ClaimName, pvc.Phase)) - if !pvc.IsReadWriteMany() { - // Warn but don't block — RWO clusters still work; the scheduler - // co-locates the stage Pod with the existing mounter. - p.Warnf("PVC is %v, not ReadWriteMany — the stage Pod will co-locate with the existing mounter", pvc.AccessModes) - } - - p.Section("Synthesized ingest spec") + p.Section("Ingest settings") p.Field("table", fmt.Sprintf("%v", spec["table"])) p.Field("category", fmt.Sprintf("%v", spec["category"])) p.Field("intent", fmt.Sprintf("%v", spec["intent"])) @@ -813,10 +817,19 @@ func printPushPreflight( p.Field("time column", tc) } p.Field("destination", push.FinalDestPrefix(spec["table"].(string))) +} - if !dryRun { - p.Infof("Next: stage %d files (%s) for table %q", - layout.FileCount(), push.HumanBytes(layout.TotalBytes), spec["table"]) +// printClusterSummary shows the discovered workspace cluster target — +// the detail under step 2 ("Connect to your workspace's cluster"). +func printClusterSummary(p *ui.Printer, release *cluster.ParentRelease, pvc *cluster.SharedPVC) { + p.Section("Target cluster") + p.Field("release", fmt.Sprintf("%s (chart %s)", release.ReleaseName, release.ChartVersion)) + p.Field("jobs-manager", release.JobsManagerService) + p.Field("shared PVC", fmt.Sprintf("%s (%s)", pvc.ClaimName, pvc.Phase)) + if !pvc.IsReadWriteMany() { + // Warn but don't block — RWO clusters still work; the scheduler + // co-locates the stage Pod with the existing mounter. + p.Warnf("PVC is %v, not ReadWriteMany — the stage Pod will co-locate with the existing mounter", pvc.AccessModes) } } diff --git a/internal/cli/dataset_rm.go b/internal/cli/dataset_rm.go index 5d78c01..feb7f8e 100644 --- a/internal/cli/dataset_rm.go +++ b/internal/cli/dataset_rm.go @@ -145,7 +145,7 @@ func runDatasetRm(ctx context.Context, a runDatasetRmArgs) error { for _, path := range plan.PVCPaths { p.Field("pvc path", path) } - p.Warnf("Destructive and cannot be undone. The central backend catalog entry is NOT removed (tracebloc/cli#39).") + p.Warnf("Destructive and cannot be undone.") // 5. Dry-run stop. if a.DryRun { diff --git a/internal/cli/interactive.go b/internal/cli/interactive.go index aaff559..3c567bf 100644 --- a/internal/cli/interactive.go +++ b/internal/cli/interactive.go @@ -124,6 +124,7 @@ func runInteractive(p *ui.Printer, pr prompter, a *runDatasetPushArgs, categoryS prompted := false if a.LocalPath == "" { + p.PromptHint("The folder holding your dataset — a single .csv for tabular, or labels.csv + an images/ folder for images. e.g. ~/datasets/churn") ans, err := pr.Input("Path to your dataset directory", "e.g. ./my-data", "", nil) if err != nil { return err @@ -133,6 +134,7 @@ func runInteractive(p *ui.Printer, pr prompter, a *runDatasetPushArgs, categoryS } if !categorySet { + p.PromptHint("What kind of task your data is for — this drives how it's validated and loaded.") ans, err := pr.Select("Task category", "what kind of data this is", promptCategories, a.Spec.Category) if err != nil { @@ -143,6 +145,7 @@ func runInteractive(p *ui.Printer, pr prompter, a *runDatasetPushArgs, categoryS } if a.Spec.Table == "" { + p.PromptHint("Names the table created on the cluster (and its folder on the shared storage). Letters, digits, underscores only. e.g. churn_train") ans, err := pr.Input("Destination table name", "MySQL identifier + PVC subdir; letters, digits, underscore only", "", push.ValidateTableName) @@ -154,6 +157,7 @@ func runInteractive(p *ui.Printer, pr prompter, a *runDatasetPushArgs, categoryS } if a.Spec.Intent == "" { + p.PromptHint("Whether this split is used to train the model or to evaluate it.") ans, err := pr.Select("Intent", "which split this data is", []string{"train", "test"}, "train") if err != nil { @@ -165,6 +169,7 @@ func runInteractive(p *ui.Printer, pr prompter, a *runDatasetPushArgs, categoryS // masked_language_modeling is self-supervised — no label column. if a.Spec.LabelColumn == "" && a.Spec.Category != "masked_language_modeling" { + p.PromptHint("The column in your CSV holding the value to predict (the target). e.g. label, target, churned") ans, err := pr.Input("Label column", "the column in labels.csv that holds the label", "label", nil) if err != nil { @@ -174,7 +179,7 @@ func runInteractive(p *ui.Printer, pr prompter, a *runDatasetPushArgs, categoryS prompted = true } - cp, err := promptCategorySpecific(pr, a) + cp, err := promptCategorySpecific(p, pr, a) if err != nil { return err } @@ -198,12 +203,13 @@ func runInteractive(p *ui.Printer, pr prompter, a *runDatasetPushArgs, categoryS // promptCategorySpecific prompts for the inputs a particular category // needs beyond the core fields, filling only the gaps. Returns whether // it prompted anything (so the caller knows to show the confirm). -func promptCategorySpecific(pr prompter, a *runDatasetPushArgs) (bool, error) { +func promptCategorySpecific(p *ui.Printer, pr prompter, a *runDatasetPushArgs) (bool, error) { cat := a.Spec.Category prompted := false switch { case push.IsImage(cat): if cat == "keypoint_detection" && a.Spec.NumberOfKeypoints <= 0 { + p.PromptHint("How many keypoints each sample is annotated with — dataset-specific, no default. e.g. 17 for COCO human pose") ans, err := pr.Input("Number of keypoints per sample", "e.g. 17 for COCO pose", "", validatePositiveInt) if err != nil { @@ -214,6 +220,7 @@ func promptCategorySpecific(pr prompter, a *runDatasetPushArgs) (bool, error) { prompted = true } if a.TargetSizeFlag == "" { + p.PromptHint("All images must share one resolution; the ingestor checks it (it won't resize). Blank = auto-detect from the first image. e.g. 224x224") ans, err := pr.Input("Image resolution as WxH (blank = auto-detect from the first image)", "all images must share it; the ingestor validates, it doesn't resize", "", validateOptionalTargetSize) @@ -225,6 +232,7 @@ func promptCategorySpecific(pr prompter, a *runDatasetPushArgs) (bool, error) { } case push.IsTabular(cat): if a.SchemaFlag == "" { + p.PromptHint("Override the column types the CLI would infer. Blank = infer from the CSV. e.g. age:INT,price:FLOAT,city:VARCHAR") ans, err := pr.Input("Column schema as col:TYPE,... (blank = infer from the CSV)", "e.g. age:INT,price:FLOAT", "", validateOptionalSchema) if err != nil { @@ -234,6 +242,7 @@ func promptCategorySpecific(pr prompter, a *runDatasetPushArgs) (bool, error) { prompted = true } if push.IsRegressionClass(cat) && a.Spec.LabelPolicy == "" { + p.PromptHint("Regression targets are continuous. 'bucket' groups them into ranges before they leave the cluster; 'passthrough' keeps raw values.") ans, err := pr.Select("Label policy", "bucket bins the target before it leaves the cluster", []string{"bucket", "passthrough"}, "bucket") @@ -244,6 +253,7 @@ func promptCategorySpecific(pr prompter, a *runDatasetPushArgs) (bool, error) { prompted = true } if cat == "time_to_event_prediction" && a.Spec.TimeColumn == "" { + p.PromptHint("The column holding the duration / time-to-event. e.g. time, tenure_days") ans, err := pr.Input("Time column", "the duration/time column name", "time", nil) if err != nil { return prompted, err diff --git a/internal/cli/interactive_test.go b/internal/cli/interactive_test.go index 262e950..d2c9295 100644 --- a/internal/cli/interactive_test.go +++ b/internal/cli/interactive_test.go @@ -3,6 +3,7 @@ package cli import ( "bytes" "errors" + "strings" "testing" "github.com/tracebloc/cli/internal/push" @@ -82,6 +83,35 @@ func TestRunInteractive_FillsAllWhenEmpty(t *testing.T) { } } +// TestRunInteractive_ShowsExampleHints: each input prompt is preceded +// by a visible hint with an example, so the guided flow teaches as it +// goes. Drives runInteractive with a real (buffer-backed) Printer and +// asserts the example text lands in the output. +func TestRunInteractive_ShowsExampleHints(t *testing.T) { + f := &fakePrompter{answers: map[string]string{ + "Path to your dataset directory": "./d", + "Destination table name": "churn_train", + }} + a := &runDatasetPushArgs{Spec: push.SpecArgs{Category: "tabular_regression"}} + + var buf bytes.Buffer + p := ui.New(&buf, ui.WithColor(false)) + if err := runInteractive(p, f, a, true /*categorySet*/); err != nil { + t.Fatalf("runInteractive: %v", err) + } + out := buf.String() + for _, want := range []string{ + "e.g. churn_train", // table-name example + "e.g. label, target", // label-column example + "age:INT", // tabular schema example + "keeps raw values", // label-policy explanation + } { + if !strings.Contains(out, want) { + t.Errorf("interactive output missing hint %q:\n%s", want, out) + } + } +} + // TestRunInteractive_SkipsProvidedValues: flags already set (and an // explicit --category) mean nothing is prompted. func TestRunInteractive_SkipsProvidedValues(t *testing.T) { diff --git a/internal/ui/ui.go b/internal/ui/ui.go index 81082c9..68b7219 100644 --- a/internal/ui/ui.go +++ b/internal/ui/ui.go @@ -14,6 +14,7 @@ import ( "fmt" "io" "os" + "strings" "github.com/fatih/color" "golang.org/x/term" @@ -108,6 +109,16 @@ func (p *Printer) Banner(title, subtitle string) { p.out("\n") } +// Para prints a normal-weight paragraph, each line indented to match +// Banner/Section bodies. It splits on embedded newlines so multi-line +// prose keeps the indent. Use for explanatory prose — distinct from +// Hintf (dim one-liners) and Infof (· bullets). +func (p *Printer) Para(text string) { + for _, line := range strings.Split(text, "\n") { + p.out(" %s\n", line) + } +} + // Step prints a major-step header: "Step n/total label" in bold cyan. // Mirrors common.sh step(). func (p *Printer) Step(n, total int, label string) { @@ -144,6 +155,14 @@ func (p *Printer) Hintf(format string, a ...any) { p.out(" %s\n", p.paint(fmt.Sprintf(format, a...), color.Faint)) } +// PromptHint prints guidance for an interactive prompt: a leading blank +// line for separation, then the hint in cyan so it stands out directly +// above the prompt. Distinct from Hintf (dim) — prompt guidance is meant +// to be read, not skimmed past. +func (p *Printer) PromptHint(format string, a ...any) { + p.out("\n %s\n", p.paint(fmt.Sprintf(format, a...), color.FgCyan)) +} + // PromptHeader prints a bold-white label before a user-input prompt. func (p *Printer) PromptHeader(label string) { p.out("\n %s\n", p.paint(label, color.Bold, color.FgWhite))