Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion internal/cli/coverage_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ func TestPrintPushPreflight_RendersKeyFacts(t *testing.T) {

var buf bytes.Buffer
p := ui.New(&buf, ui.WithColor(false))
printPushPreflight(p, layout, release, pvc, spec, false)
printLocalSummary(p, layout, spec)
printClusterSummary(p, release, pvc)
out := buf.String()

for _, want := range []string{
Expand Down
85 changes: 49 additions & 36 deletions internal/cli/dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,19 @@ func expandHome(path string) string {
// a bad label-column or oversized dataset gets the diagnostic in
// milliseconds without a kubeconfig round-trip.
func runDatasetPush(ctx context.Context, out, errOut io.Writer, a runDatasetPushArgs) error {
// Intro header: brand + a plain-English explainer of what a push
// does, so a first-time user understands it before any prompts.
// Routed through a.Printer, so --output-json keeps it on stderr and
// --plain/non-TTY degrade cleanly. (#31)
a.Printer.Banner("tracebloc", "dataset push")
a.Printer.Para(strings.TrimSpace(`
This uploads a dataset from your machine into your tracebloc workspace so models
can be trained on it. Your files are sent to the Kubernetes cluster your
workspace was installed on — tracebloc checks them and loads them into a table
your training runs read from. Your data stays on that cluster the whole time;
contributors train against it without ever seeing the raw files.`))
a.Printer.Hintf("Learn more: https://docs.tracebloc.io")

// 0. Guided mode: prompt for any missing core inputs before
// validation. Flags already provided win; non-TTY / --no-input
// leaves Prompter nil and skips straight to the flag-only path.
Expand Down Expand Up @@ -436,6 +449,9 @@ func runDatasetPush(ctx context.Context, out, errOut io.Writer, a runDatasetPush
return &exitError{code: 3, err: err}
}

a.Printer.Step(1, 4, "Check your dataset")
a.Printer.Hintf("Reading your files locally first — nothing has touched the cluster yet — so a layout or settings problem shows up right away.")

// 3a. Per-category spec resolution from the local data, so the
// synthesized spec carries the right fields before validation.
switch {
Expand Down Expand Up @@ -539,10 +555,14 @@ func runDatasetPush(ctx context.Context, out, errOut io.Writer, a runDatasetPush
return &exitError{code: 2, err: errors.New("synthesized spec failed schema validation; check the flag values above")}
}

printLocalSummary(a.Printer, layout, spec)

// 5. Cluster discovery — same kubeconfig path as `cluster info`.
// Errors mirror that command's exit-code contract (3 for
// kubeconfig, 4 for missing release) so behaviour is
// consistent across pre-flight commands.
a.Printer.Step(2, 4, "Connect to your workspace's cluster")
a.Printer.Hintf("Using your kubeconfig to find the tracebloc release in your workspace and the shared storage your dataset will live on.")
resolved, err := cluster.Load(cluster.KubeconfigOptions{
Path: a.Kubeconfig,
Context: a.Context,
Expand Down Expand Up @@ -571,16 +591,15 @@ func runDatasetPush(ctx context.Context, out, errOut io.Writer, a runDatasetPush
return &exitError{code: 4, err: err}
}

// 7. Print the pre-flight summary. The output is the same in
// dry-run and live mode — only the "what happens next" line
// differs. Customers iterating on a bad layout see this
// every attempt, so it's worth keeping skimmable: one fact
// per line, aligned by column.
printPushPreflight(a.Printer, layout, release, pvc, spec, a.DryRun)
// 7. Show what we found on the cluster — the customer's last look
// before any bytes move.
printClusterSummary(a.Printer, release, pvc)

// 8. Dry-run stop. Acknowledged success.
// 8. Dry-run stop. Acknowledged success, plus a reminder of the
// live-only steps (stage + ingest) the customer just skipped.
if a.DryRun {
_, _ = fmt.Fprintln(out, "Dry-run complete — no cluster resources were created.")
a.Printer.Successf("Dry-run complete — your dataset and cluster check out; nothing was created.")
a.Printer.Hintf("A real run continues with step 3 (stage your files) and step 4 (run the ingestion).")
if a.OutputJSON {
writePushJSON(a.JSONOut, "dry-run", spec, nil, "", "")
}
Expand All @@ -595,6 +614,8 @@ func runDatasetPush(ctx context.Context, out, errOut io.Writer, a runDatasetPush
// Exit code 7 ("staging failed") is distinct from the
// pre-flight codes so customers can branch on whether the
// failure was their environment vs the actual data transfer.
a.Printer.Step(3, 4, "Stage your files")
a.Printer.Hintf("A short-lived helper pod mounts the shared storage and your files stream into it — like `kubectl cp`, but set up and cleaned up for you.")
progress := push.NewProgress(out, layout.TotalBytes,
fmt.Sprintf("Staging %s", a.Spec.Table))
// Defer Finish so a failure path that returns BEFORE
Expand Down Expand Up @@ -630,7 +651,8 @@ func runDatasetPush(ctx context.Context, out, errOut io.Writer, a runDatasetPush
// min) because the full Phase 4 lifecycle — submit + watch
// + log stream — can run that long for large ingestions.
// The chart's helm flow uses the same token-mint code path.
_, _ = fmt.Fprintln(out)
a.Printer.Step(4, 4, "Run the ingestion")
a.Printer.Hintf("Submitting the run to your workspace, then watching as it validates your data and loads it into the table — progress streams below.")
tok, err := cluster.MintIngestorToken(ctx, cs, resolved.Namespace,
release.IngestorSAName, 3600, nil)
if err != nil {
Expand Down Expand Up @@ -750,19 +772,11 @@ func classifyPushOutcome(res *submit.Result, err error) (string, *exitError) {
return "unknown", nil
}

// printPushPreflight is the customer-facing summary. Mirrors
// `cluster info`'s layout for consistency: section header,
// indented key:value rows. Kept here (not on the layout/release/pvc
// types) because the formatting is policy and lives with the CLI,
// not the data.
func printPushPreflight(
p *ui.Printer,
layout *push.LocalLayout,
release *cluster.ParentRelease,
pvc *cluster.SharedPVC,
spec map[string]any,
dryRun bool,
) {
// printLocalSummary shows what the CLI found on disk plus the ingest
// settings it assembled — the detail under step 1 ("Check your
// dataset"). Split from the cluster summary so each sits under its own
// numbered step. Mirrors `cluster info`'s section/Field layout.
func printLocalSummary(p *ui.Printer, layout *push.LocalLayout, spec map[string]any) {
cat, _ := spec["category"].(string)

p.Section("Local dataset")
Expand All @@ -789,17 +803,7 @@ func printPushPreflight(
}
p.Field("total size", push.HumanBytes(layout.TotalBytes))

p.Section("Target cluster")
p.Field("release", fmt.Sprintf("%s (chart %s)", release.ReleaseName, release.ChartVersion))
p.Field("jobs-manager", release.JobsManagerService)
p.Field("shared PVC", fmt.Sprintf("%s (%s)", pvc.ClaimName, pvc.Phase))
if !pvc.IsReadWriteMany() {
// Warn but don't block — RWO clusters still work; the scheduler
// co-locates the stage Pod with the existing mounter.
p.Warnf("PVC is %v, not ReadWriteMany — the stage Pod will co-locate with the existing mounter", pvc.AccessModes)
}

p.Section("Synthesized ingest spec")
p.Section("Ingest settings")
p.Field("table", fmt.Sprintf("%v", spec["table"]))
p.Field("category", fmt.Sprintf("%v", spec["category"]))
p.Field("intent", fmt.Sprintf("%v", spec["intent"]))
Expand All @@ -813,10 +817,19 @@ func printPushPreflight(
p.Field("time column", tc)
}
p.Field("destination", push.FinalDestPrefix(spec["table"].(string)))
}

if !dryRun {
p.Infof("Next: stage %d files (%s) for table %q",
layout.FileCount(), push.HumanBytes(layout.TotalBytes), spec["table"])
// printClusterSummary shows the discovered workspace cluster target —
// the detail under step 2 ("Connect to your workspace's cluster").
func printClusterSummary(p *ui.Printer, release *cluster.ParentRelease, pvc *cluster.SharedPVC) {
p.Section("Target cluster")
p.Field("release", fmt.Sprintf("%s (chart %s)", release.ReleaseName, release.ChartVersion))
p.Field("jobs-manager", release.JobsManagerService)
p.Field("shared PVC", fmt.Sprintf("%s (%s)", pvc.ClaimName, pvc.Phase))
if !pvc.IsReadWriteMany() {
// Warn but don't block — RWO clusters still work; the scheduler
// co-locates the stage Pod with the existing mounter.
p.Warnf("PVC is %v, not ReadWriteMany — the stage Pod will co-locate with the existing mounter", pvc.AccessModes)
}
}

Expand Down
2 changes: 1 addition & 1 deletion internal/cli/dataset_rm.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ func runDatasetRm(ctx context.Context, a runDatasetRmArgs) error {
for _, path := range plan.PVCPaths {
p.Field("pvc path", path)
}
p.Warnf("Destructive and cannot be undone. The central backend catalog entry is NOT removed (tracebloc/cli#39).")
p.Warnf("Destructive and cannot be undone.")

// 5. Dry-run stop.
if a.DryRun {
Expand Down
14 changes: 12 additions & 2 deletions internal/cli/interactive.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ func runInteractive(p *ui.Printer, pr prompter, a *runDatasetPushArgs, categoryS
prompted := false

if a.LocalPath == "" {
p.PromptHint("The folder holding your dataset — a single .csv for tabular, or labels.csv + an images/ folder for images. e.g. ~/datasets/churn")
ans, err := pr.Input("Path to your dataset directory", "e.g. ./my-data", "", nil)
if err != nil {
return err
Expand All @@ -133,6 +134,7 @@ func runInteractive(p *ui.Printer, pr prompter, a *runDatasetPushArgs, categoryS
}

if !categorySet {
p.PromptHint("What kind of task your data is for — this drives how it's validated and loaded.")
ans, err := pr.Select("Task category", "what kind of data this is",
promptCategories, a.Spec.Category)
if err != nil {
Expand All @@ -143,6 +145,7 @@ func runInteractive(p *ui.Printer, pr prompter, a *runDatasetPushArgs, categoryS
}

if a.Spec.Table == "" {
p.PromptHint("Names the table created on the cluster (and its folder on the shared storage). Letters, digits, underscores only. e.g. churn_train")
ans, err := pr.Input("Destination table name",
"MySQL identifier + PVC subdir; letters, digits, underscore only", "",
push.ValidateTableName)
Expand All @@ -154,6 +157,7 @@ func runInteractive(p *ui.Printer, pr prompter, a *runDatasetPushArgs, categoryS
}

if a.Spec.Intent == "" {
p.PromptHint("Whether this split is used to train the model or to evaluate it.")
ans, err := pr.Select("Intent", "which split this data is",
[]string{"train", "test"}, "train")
if err != nil {
Expand All @@ -165,6 +169,7 @@ func runInteractive(p *ui.Printer, pr prompter, a *runDatasetPushArgs, categoryS

// masked_language_modeling is self-supervised — no label column.
if a.Spec.LabelColumn == "" && a.Spec.Category != "masked_language_modeling" {
p.PromptHint("The column in your CSV holding the value to predict (the target). e.g. label, target, churned")
ans, err := pr.Input("Label column",
"the column in labels.csv that holds the label", "label", nil)
if err != nil {
Expand All @@ -174,7 +179,7 @@ func runInteractive(p *ui.Printer, pr prompter, a *runDatasetPushArgs, categoryS
prompted = true
}

cp, err := promptCategorySpecific(pr, a)
cp, err := promptCategorySpecific(p, pr, a)
if err != nil {
return err
}
Expand All @@ -198,12 +203,13 @@ func runInteractive(p *ui.Printer, pr prompter, a *runDatasetPushArgs, categoryS
// promptCategorySpecific prompts for the inputs a particular category
// needs beyond the core fields, filling only the gaps. Returns whether
// it prompted anything (so the caller knows to show the confirm).
func promptCategorySpecific(pr prompter, a *runDatasetPushArgs) (bool, error) {
func promptCategorySpecific(p *ui.Printer, pr prompter, a *runDatasetPushArgs) (bool, error) {
cat := a.Spec.Category
prompted := false
switch {
case push.IsImage(cat):
if cat == "keypoint_detection" && a.Spec.NumberOfKeypoints <= 0 {
p.PromptHint("How many keypoints each sample is annotated with — dataset-specific, no default. e.g. 17 for COCO human pose")
ans, err := pr.Input("Number of keypoints per sample",
"e.g. 17 for COCO pose", "", validatePositiveInt)
if err != nil {
Expand All @@ -214,6 +220,7 @@ func promptCategorySpecific(pr prompter, a *runDatasetPushArgs) (bool, error) {
prompted = true
}
if a.TargetSizeFlag == "" {
p.PromptHint("All images must share one resolution; the ingestor checks it (it won't resize). Blank = auto-detect from the first image. e.g. 224x224")
ans, err := pr.Input("Image resolution as WxH (blank = auto-detect from the first image)",
"all images must share it; the ingestor validates, it doesn't resize", "",
validateOptionalTargetSize)
Expand All @@ -225,6 +232,7 @@ func promptCategorySpecific(pr prompter, a *runDatasetPushArgs) (bool, error) {
}
case push.IsTabular(cat):
if a.SchemaFlag == "" {
p.PromptHint("Override the column types the CLI would infer. Blank = infer from the CSV. e.g. age:INT,price:FLOAT,city:VARCHAR")
ans, err := pr.Input("Column schema as col:TYPE,... (blank = infer from the CSV)",
"e.g. age:INT,price:FLOAT", "", validateOptionalSchema)
if err != nil {
Expand All @@ -234,6 +242,7 @@ func promptCategorySpecific(pr prompter, a *runDatasetPushArgs) (bool, error) {
prompted = true
}
if push.IsRegressionClass(cat) && a.Spec.LabelPolicy == "" {
p.PromptHint("Regression targets are continuous. 'bucket' groups them into ranges before they leave the cluster; 'passthrough' keeps raw values.")
ans, err := pr.Select("Label policy",
"bucket bins the target before it leaves the cluster",
[]string{"bucket", "passthrough"}, "bucket")
Expand All @@ -244,6 +253,7 @@ func promptCategorySpecific(pr prompter, a *runDatasetPushArgs) (bool, error) {
prompted = true
}
if cat == "time_to_event_prediction" && a.Spec.TimeColumn == "" {
p.PromptHint("The column holding the duration / time-to-event. e.g. time, tenure_days")
ans, err := pr.Input("Time column", "the duration/time column name", "time", nil)
if err != nil {
return prompted, err
Expand Down
30 changes: 30 additions & 0 deletions internal/cli/interactive_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package cli
import (
"bytes"
"errors"
"strings"
"testing"

"github.com/tracebloc/cli/internal/push"
Expand Down Expand Up @@ -82,6 +83,35 @@ func TestRunInteractive_FillsAllWhenEmpty(t *testing.T) {
}
}

// TestRunInteractive_ShowsExampleHints: each input prompt is preceded
// by a visible hint with an example, so the guided flow teaches as it
// goes. Drives runInteractive with a real (buffer-backed) Printer and
// asserts the example text lands in the output.
func TestRunInteractive_ShowsExampleHints(t *testing.T) {
f := &fakePrompter{answers: map[string]string{
"Path to your dataset directory": "./d",
"Destination table name": "churn_train",
}}
a := &runDatasetPushArgs{Spec: push.SpecArgs{Category: "tabular_regression"}}

var buf bytes.Buffer
p := ui.New(&buf, ui.WithColor(false))
if err := runInteractive(p, f, a, true /*categorySet*/); err != nil {
t.Fatalf("runInteractive: %v", err)
}
out := buf.String()
for _, want := range []string{
"e.g. churn_train", // table-name example
"e.g. label, target", // label-column example
"age:INT", // tabular schema example
"keeps raw values", // label-policy explanation
} {
if !strings.Contains(out, want) {
t.Errorf("interactive output missing hint %q:\n%s", want, out)
}
}
}

// TestRunInteractive_SkipsProvidedValues: flags already set (and an
// explicit --category) mean nothing is prompted.
func TestRunInteractive_SkipsProvidedValues(t *testing.T) {
Expand Down
19 changes: 19 additions & 0 deletions internal/ui/ui.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"fmt"
"io"
"os"
"strings"

"github.com/fatih/color"
"golang.org/x/term"
Expand Down Expand Up @@ -108,6 +109,16 @@ func (p *Printer) Banner(title, subtitle string) {
p.out("\n")
}

// Para prints a normal-weight paragraph, each line indented to match
// Banner/Section bodies. It splits on embedded newlines so multi-line
// prose keeps the indent. Use for explanatory prose — distinct from
// Hintf (dim one-liners) and Infof (· bullets).
func (p *Printer) Para(text string) {
for _, line := range strings.Split(text, "\n") {
p.out(" %s\n", line)
}
}

// Step prints a major-step header: "Step n/total label" in bold cyan.
// Mirrors common.sh step().
func (p *Printer) Step(n, total int, label string) {
Expand Down Expand Up @@ -144,6 +155,14 @@ func (p *Printer) Hintf(format string, a ...any) {
p.out(" %s\n", p.paint(fmt.Sprintf(format, a...), color.Faint))
}

// PromptHint prints guidance for an interactive prompt: a leading blank
// line for separation, then the hint in cyan so it stands out directly
// above the prompt. Distinct from Hintf (dim) — prompt guidance is meant
// to be read, not skimmed past.
func (p *Printer) PromptHint(format string, a ...any) {
p.out("\n %s\n", p.paint(fmt.Sprintf(format, a...), color.FgCyan))
}

// PromptHeader prints a bold-white label before a user-input prompt.
func (p *Printer) PromptHeader(label string) {
p.out("\n %s\n", p.paint(label, color.Bold, color.FgWhite))
Expand Down
Loading