diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 82ca229..2174836 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -63,14 +63,44 @@ jobs: go-version-file: go.mod cache: true - - name: golangci-lint - uses: golangci/golangci-lint-action@v6 - with: - version: v1.61.0 - # Pin the cache key per Go version so a Go bump invalidates - # cleanly — stale caches across Go versions cause noisy - # "invalid pkg" failures. - args: --timeout=5m + # golangci-lint-action@v6 reproducibly time-budgeted out at + # ~2 minutes on this module's k8s.io-heavy dep tree (3 runs + # in a row in early 2026 — see #6). Stand-alone tools run + # against the same code in <30 seconds and catch the bugs + # we've actually hit this week (unchecked Fprintf errors + # were errcheck-catchable; gofmt -s drift was a recurring + # nit). Going back to the action when #6 has a strategy. + + # Tool versions are unpinned (@latest) because older tagged + # releases pulled stale golang.org/x/tools that fail to build + # against current Go ("invalid array length -delta * delta"). + # Pinning is preferable for reproducibility but waiting on the + # upstream tools to ship newer tags. Track in #6. + + - name: errcheck + run: | + go install github.com/kisielk/errcheck@latest + errcheck ./... + + - name: gofmt -s + run: | + drift="$(gofmt -s -l .)" + if [ -n "$drift" ]; then + echo "::error::gofmt -s drift in:" + echo "$drift" | sed 's/^/ /' + echo "::error::run \`make fmt\` to fix" + exit 1 + fi + + - name: ineffassign + run: | + go install github.com/gordonklaus/ineffassign@latest + ineffassign ./... + + - name: misspell + run: | + go install github.com/client9/misspell/cmd/misspell@latest + misspell -error . build: name: Build (${{ matrix.os }}/${{ matrix.arch }}) diff --git a/.golangci.yml b/.golangci.yml index 7763fa4..a6d52e7 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -7,17 +7,32 @@ run: timeout: 5m - go: "1.22" + # Track go.mod's `go` directive. Go's release cadence + `go mod + # tidy`'s aggressive bumping (especially when k8s.io/* deps want + # newer Go) keep dragging go.mod's minimum up; pinning a stale + # version here just produces typecheck errors instead of real + # findings. Currently 1.26 to match `go 1.26.0` in go.mod (the + # earlier "1.24" pin drifted when k8s.io/* v0.36 bumped the floor; + # Bugbot caught the drift on PR #2). + go: "1.26" linters: disable-all: true enable: - # Standard set — these catch real bugs and have very low false-positive rates. + # The set is trimmed to the linters that DON'T do full whole-program + # SSA analysis. `staticcheck` and `unused` were in the original set + # and reproducibly caused the GitHub-hosted runner to time-budget + # the job (~2 min then shutdown signal) on this module — k8s.io/* + # transitive deps inflate the analysis graph enough to OOM-or-stall + # the standard 4-CPU/16GB runner. Re-enabling them is a v0.2 + # follow-up that needs either a larger runner, a much narrower + # scope (e.g. only `./internal/...`), or a faster successor like + # govulncheck for the security-only subset. + + # Cheap, per-file checks — catch real bugs without SSA. - errcheck # unchecked error returns - govet # `go vet` - ineffassign # assignments that go nowhere - - staticcheck # SA-series checks (well-vetted) - - unused # dead code # Style / hygiene that pays for itself in code review time. - gofmt diff --git a/cmd/tracebloc/main.go b/cmd/tracebloc/main.go index a38a446..8066305 100644 --- a/cmd/tracebloc/main.go +++ b/cmd/tracebloc/main.go @@ -57,7 +57,11 @@ func main() { // "silent" by returning an exitError with a nil inner — see // cli.IsSilentError for the contract. if !cli.IsSilentError(err) { - fmt.Fprintln(os.Stderr, "Error:", err) + // Explicit-discard the writer error: if stderr itself is + // gone (closed pipe, redirected to /dev/full, etc.) we + // still need to exit non-zero — the error message is + // best-effort, the exit code is the contract. + _, _ = fmt.Fprintln(os.Stderr, "Error:", err) } // Map command-defined exit codes through. Handlers that want a diff --git a/go.mod b/go.mod index 8c5a9db..c792eb3 100644 --- a/go.mod +++ b/go.mod @@ -1,15 +1,57 @@ module github.com/tracebloc/cli -go 1.22 +// Minimum Go is what `go 1.26.0` says below — this used to claim +// "1.22" but the k8s.io/* v0.36.x deps dragged the floor up to 1.26 +// (transitively via `go mod tidy`), and the stale comment misled +// Bugbot and humans alike. The lint config in .golangci.yml's `go:` +// key tracks this directive; bump both together when the minimum +// moves again. +go 1.26.0 require ( github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 github.com/spf13/cobra v1.8.1 - golang.org/x/text v0.16.0 + golang.org/x/text v0.33.0 gopkg.in/yaml.v3 v3.0.1 + k8s.io/api v0.36.1 + k8s.io/apimachinery v0.36.1 + k8s.io/client-go v0.36.1 ) require ( + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/emicklei/go-restful/v3 v3.13.0 // indirect + github.com/fxamacker/cbor/v2 v2.9.0 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-openapi/jsonpointer v0.21.0 // indirect + github.com/go-openapi/jsonreference v0.20.2 // indirect + github.com/go-openapi/swag v0.23.0 // indirect + github.com/google/gnostic-models v0.7.0 // indirect + github.com/google/uuid v1.6.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect - github.com/spf13/pflag v1.0.5 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/spf13/pflag v1.0.9 // indirect + github.com/x448/float16 v0.8.4 // indirect + go.yaml.in/yaml/v2 v2.4.3 // indirect + go.yaml.in/yaml/v3 v3.0.4 // indirect + golang.org/x/net v0.49.0 // indirect + golang.org/x/oauth2 v0.34.0 // indirect + golang.org/x/sys v0.40.0 // indirect + golang.org/x/term v0.39.0 // indirect + golang.org/x/time v0.14.0 // indirect + google.golang.org/protobuf v1.36.12-0.20260120151049-f2248ac996af // indirect + gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + k8s.io/klog/v2 v2.140.0 // indirect + k8s.io/kube-openapi v0.0.0-20260317180543-43fb72c5454a // indirect + k8s.io/utils v0.0.0-20260210185600-b8788abfbbc2 // indirect + sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect + sigs.k8s.io/randfill v1.0.0 // indirect + sigs.k8s.io/structured-merge-diff/v6 v6.3.2 // indirect + sigs.k8s.io/yaml v1.6.0 // indirect ) diff --git a/go.sum b/go.sum index 3cf027a..9a05a37 100644 --- a/go.sum +++ b/go.sum @@ -1,18 +1,126 @@ github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dlclark/regexp2 v1.11.0 h1:G/nrcoOa7ZXlpoa/91N3X7mM3r8eIlMBBJZvsz/mxKI= github.com/dlclark/regexp2 v1.11.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= +github.com/emicklei/go-restful/v3 v3.13.0 h1:C4Bl2xDndpU6nJ4bc1jXd+uTmYPVUwkD6bFY/oTyCes= +github.com/emicklei/go-restful/v3 v3.13.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= +github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= +github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= +github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= +github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= +github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= +github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= +github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= +github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo= +github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= +github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 h1:KRzFb2m7YtdldCEkzs6KqmJw4nqEVZGK7IN2kJkjTuQ= github.com/santhosh-tekuri/jsonschema/v6 v6.0.2/go.mod h1:JXeL+ps8p7/KNMjDQk3TCwPpBy0wYklyWTfbkIzdIFU= github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= -github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= -golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY= +github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0= +go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8= +go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= +golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o= +golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8= +golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw= +golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ= +golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.39.0 h1:RclSuaJf32jOqZz74CkPA9qFuVTX7vhLlpfj/IGWlqY= +golang.org/x/term v0.39.0/go.mod h1:yxzUCTP/U+FzoxfdKmLaA0RV1WgE0VY7hXBwKtY/4ww= +golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE= +golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8= +golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= +golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= +google.golang.org/protobuf v1.36.12-0.20260120151049-f2248ac996af h1:+5/Sw3GsDNlEmu7TfklWKPdQ0Ykja5VEmq2i817+jbI= +google.golang.org/protobuf v1.36.12-0.20260120151049-f2248ac996af/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/evanphx/json-patch.v4 v4.13.0 h1:czT3CmqEaQ1aanPc5SdlgQrrEIb8w/wwCvWWnfEbYzo= +gopkg.in/evanphx/json-patch.v4 v4.13.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.36.1 h1:XbL/EMj8K2aJpJtePmqUyQMsM0D4QI2pvl7YKJ20FTY= +k8s.io/api v0.36.1/go.mod h1:KOWo4ey3TINlXjeHVuwB3i+tXXnu+UcwFBHlI/9dvEo= +k8s.io/apimachinery v0.36.1 h1:G63Gjx2W+q0YD+72Vo8oY0nDnePVwnuzTmmy5ENrVSA= +k8s.io/apimachinery v0.36.1/go.mod h1:ibYOR00vW/I1kzvi5SF0dRuJ52BvKtfvRdOn35GPQ+8= +k8s.io/client-go v0.36.1 h1:FN/K8QIT2CEDt+2WB2HnWrUANZ50AP5GII43/SP2JR0= +k8s.io/client-go v0.36.1/go.mod h1:s6rAnCtTGYDQnpNjEhSaISV+2O8jwruZ6m3QOYBFbtU= +k8s.io/klog/v2 v2.140.0 h1:Tf+J3AH7xnUzZyVVXhTgGhEKnFqye14aadWv7bzXdzc= +k8s.io/klog/v2 v2.140.0/go.mod h1:o+/RWfJ6PwpnFn7OyAG3QnO47BFsymfEfrz6XyYSSp0= +k8s.io/kube-openapi v0.0.0-20260317180543-43fb72c5454a h1:xCeOEAOoGYl2jnJoHkC3hkbPJgdATINPMAxaynU2Ovg= +k8s.io/kube-openapi v0.0.0-20260317180543-43fb72c5454a/go.mod h1:uGBT7iTA6c6MvqUvSXIaYZo9ukscABYi2btjhvgKGZ0= +k8s.io/utils v0.0.0-20260210185600-b8788abfbbc2 h1:AZYQSJemyQB5eRxqcPky+/7EdBj0xi3g0ZcxxJ7vbWU= +k8s.io/utils v0.0.0-20260210185600-b8788abfbbc2/go.mod h1:xDxuJ0whA3d0I4mf/C4ppKHxXynQ+fxnkmQH0vTHnuk= +sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg= +sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= +sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= +sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= +sigs.k8s.io/structured-merge-diff/v6 v6.3.2 h1:kwVWMx5yS1CrnFWA/2QHyRVJ8jM6dBA80uLmm0wJkk8= +sigs.k8s.io/structured-merge-diff/v6 v6.3.2/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE= +sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= +sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4= diff --git a/internal/cli/cluster.go b/internal/cli/cluster.go new file mode 100644 index 0000000..80a7172 --- /dev/null +++ b/internal/cli/cluster.go @@ -0,0 +1,202 @@ +package cli + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "time" + + "github.com/spf13/cobra" + + "github.com/tracebloc/cli/internal/cluster" +) + +// newClusterCmd wires the `tracebloc cluster` subtree. Today it has +// a single verb — `info` — which is the customer's "is the CLI +// pointing at the right cluster?" pre-flight before running +// `dataset push`. Future verbs (e.g. `cluster doctor` for +// diagnostics, `cluster contexts` for switching) hang off this +// parent in later phases. +func newClusterCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "cluster", + Short: "Inspect the cluster the CLI is currently targeting", + Long: `Commands for inspecting the Kubernetes cluster the CLI is +configured to talk to. + +Use ` + "`cluster info`" + ` to verify which cluster, namespace, and parent +tracebloc release the next ` + "`dataset push`" + ` will target. Useful as a +pre-flight before doing anything destructive (e.g. ingesting into +the wrong cluster).`, + } + + cmd.AddCommand(newClusterInfoCmd()) + return cmd +} + +// newClusterInfoCmd implements `tracebloc cluster info`. Discovers +// the parent client release in the configured namespace, mints (or +// looks up) an ingestor SA token, and prints a single-screen +// summary the customer can sanity-check before running a real +// ingestion. +// +// Three kubeconfig flags follow kubectl conventions: +// +// --kubeconfig=PATH override KUBECONFIG / ~/.kube/config +// --context=NAME override the kubeconfig's current-context +// --namespace=NAME override the context's default namespace +// +// All three are zero-value-safe — running `tracebloc cluster info` +// with no flags uses the customer's normal kubectl defaults. +func newClusterInfoCmd() *cobra.Command { + var ( + kubeconfigPath string + contextOverride string + nsOverride string + ingestorSAName string + tokenExpiry int64 + ) + + cmd := &cobra.Command{ + Use: "info", + Short: "Show the cluster, namespace, parent release, and ingestor SA token state", + Long: `Discovers the tracebloc parent client release in the configured +cluster + namespace and prints: + + • Which kubeconfig context the CLI used + • The namespace it resolved to + • The parent release name + chart version + appVersion + • The jobs-manager Service the next dataset push would POST to + • The ingestor ServiceAccount the post-install hook would auth as + • The cluster's configured INGESTOR_IMAGE_DIGEST default + • Whether the user's kubeconfig can mint short-lived SA tokens + via TokenRequest, or has to fall back to a static + service-account-token Secret + +The actual token bytes are never printed; the diagnostic shows +SHA256(token)[:8] so the customer can verify "yes, that's the +token I expect" without leaking it to terminal scrollback. + +Exit codes: + 0 cluster discovered + token mintable; CLI is ready + 4 cluster reachable but no tracebloc parent release found + 5 cluster reachable + release found but no usable SA token`, + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + return runClusterInfo( + cmd.Context(), + cmd.OutOrStdout(), + kubeconfigPath, contextOverride, nsOverride, + ingestorSAName, + tokenExpiry, + ) + }, + } + + cmd.Flags().StringVar(&kubeconfigPath, "kubeconfig", "", + "path to the kubeconfig file (default: $KUBECONFIG, then ~/.kube/config)") + cmd.Flags().StringVar(&contextOverride, "context", "", + "name of the kubeconfig context to use (default: kubeconfig's current-context)") + cmd.Flags().StringVarP(&nsOverride, "namespace", "n", "", + "namespace where the parent tracebloc/client release is installed (default: the context's namespace, or 'default')") + cmd.Flags().StringVar(&ingestorSAName, "ingestor-sa", "", + "override the ingestor ServiceAccount name (default: \"ingestor\", the chart default; "+ + "set this if you customized `ingestionAuthz.serviceAccountName` in the parent client chart)") + cmd.Flags().Int64Var(&tokenExpiry, "token-expiry-seconds", 600, + "requested SA token expiration in seconds (default 600 = 10 min; ignored for static-secret fallback)") + + return cmd +} + +func runClusterInfo( + ctx context.Context, + out interface{ Write([]byte) (int, error) }, + kubeconfigPath, contextOverride, nsOverride string, + ingestorSAOverride string, + tokenExpiry int64, +) error { + resolved, err := cluster.Load(cluster.KubeconfigOptions{ + Path: kubeconfigPath, + Context: contextOverride, + Namespace: nsOverride, + }) + if err != nil { + // Kubeconfig errors are exit-code-3 territory (file/parse + // problem, same conceptual class as `ingest validate`'s + // unreadable-input). + return &exitError{code: 3, err: fmt.Errorf("loading kubeconfig: %w", err)} + } + + cs, err := cluster.NewClientset(resolved) + if err != nil { + return &exitError{code: 3, err: err} + } + + // Explicit-discard the writer errors: same rationale as + // internal/cli/ingest.go — the exit code is the contract, and a + // pipe-write failure shouldn't convert success into failure. If + // the downstream consumer disappears mid-write we still finish + // cleanly. errcheck wants this acknowledged. + _, _ = fmt.Fprintf(out, "Kubeconfig:\n") + _, _ = fmt.Fprintf(out, " context: %s\n", resolved.Context) + _, _ = fmt.Fprintf(out, " server: %s\n", resolved.ServerURL) + _, _ = fmt.Fprintf(out, " namespace: %s\n", resolved.Namespace) + _, _ = fmt.Fprintln(out) + + // Discover the parent release. + release, err := cluster.DiscoverParentRelease(ctx, cs, resolved.Namespace) + if err != nil { + // 4 = "cluster reachable, but no tracebloc release here." + // Distinct from the kubeconfig error (3) so callers can + // branch: 3 means "fix your kubeconfig", 4 means "install + // the parent chart first". + return &exitError{code: 4, err: err} + } + + // Apply the SA-name override here. Discovery doesn't read the + // name from the cluster (see #7); customers with a non-default + // name pass --ingestor-sa. + if ingestorSAOverride != "" { + release.IngestorSAName = ingestorSAOverride + } + + _, _ = fmt.Fprintf(out, "Parent release:\n") + _, _ = fmt.Fprintf(out, " name: %s\n", release.ReleaseName) + _, _ = fmt.Fprintf(out, " chart version: %s\n", release.ChartVersion) + _, _ = fmt.Fprintf(out, " app version: %s\n", release.AppVersion) + _, _ = fmt.Fprintf(out, " jobs-manager: %s\n", release.JobsManagerService) + _, _ = fmt.Fprintf(out, " ingestor SA: %s/%s\n", resolved.Namespace, release.IngestorSAName) + digest := release.IngestorImageDigest + if digest == "" { + digest = "" + } + _, _ = fmt.Fprintf(out, " ingestor img: %s\n", digest) + _, _ = fmt.Fprintln(out) + + // Mint a token (or fall back). The audience is intentionally + // nil today — jobs-manager's TokenReview accepts the default + // API-server audience. A future jobs-manager audience plugs in + // here. + tok, err := cluster.MintIngestorToken(ctx, cs, resolved.Namespace, release.IngestorSAName, tokenExpiry, nil) + if err != nil { + // 5 = "release found but no usable token." Distinct from + // 4 (no release) so customers can RBAC-debug separately + // from install issues. + return &exitError{code: 5, err: err} + } + + hash := sha256.Sum256([]byte(tok.Token)) + _, _ = fmt.Fprintf(out, "Ingestor SA token:\n") + _, _ = fmt.Fprintf(out, " source: %s\n", tok.Source) + _, _ = fmt.Fprintf(out, " sha256[:8]: %s\n", hex.EncodeToString(hash[:8])) + if tok.ExpirationSeconds > 0 { + exp := time.Duration(tok.ExpirationSeconds) * time.Second + _, _ = fmt.Fprintf(out, " expires in: ~%s (server may cap shorter)\n", exp) + } else { + _, _ = fmt.Fprintf(out, " expires in: never (static-secret fallback)\n") + } + _, _ = fmt.Fprintln(out) + _, _ = fmt.Fprintln(out, "Ready for `tracebloc dataset push` (coming in Phase 3).") + return nil +} diff --git a/internal/cli/root.go b/internal/cli/root.go index 5fcf634..0d790ae 100644 --- a/internal/cli/root.go +++ b/internal/cli/root.go @@ -65,6 +65,7 @@ roadmap. Subsequent phases land subcommands incrementally.`, // Subcommands. New phases append here. root.AddCommand(newVersionCmd(info)) root.AddCommand(newIngestCmd()) + root.AddCommand(newClusterCmd()) return root } diff --git a/internal/cluster/discover.go b/internal/cluster/discover.go new file mode 100644 index 0000000..7ee34c7 --- /dev/null +++ b/internal/cluster/discover.go @@ -0,0 +1,211 @@ +package cluster + +import ( + "context" + "fmt" + "strings" + + appsv1 "k8s.io/api/apps/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" +) + +// ParentRelease describes the tracebloc parent client chart release +// discovered in the customer's cluster. The information comes from +// the helm-managed Deployment's labels (and a few of its env vars), +// not from the helm release secret — parsing those secrets requires +// either pulling in helm.sh/helm/v3 (massive dep) or re-implementing +// the gzip+base64+JSON unwrap. The chart's Deployment labels carry +// everything we actually need, so we use those instead. +type ParentRelease struct { + // ReleaseName is the helm release name. The chart's + // jobs-manager Deployment is named "-jobs-manager" by + // convention and labelled `app.kubernetes.io/instance=`. + ReleaseName string + + // ChartVersion is the parent chart's version, e.g. "1.3.5". + // Comes from the `helm.sh/chart=client-1.3.5` label. + ChartVersion string + + // AppVersion mirrors Chart.yaml's appVersion. Useful for + // detecting whether the customer is on a chart that supports + // the declarative ingestor flow at all. + AppVersion string + + // JobsManagerService is the in-cluster DNS name of the + // jobs-manager Service, e.g. + // "-jobs-manager..svc.cluster.local:8080". + // Used as the POST target for ingestion submissions. + JobsManagerService string + + // IngestorSAName is the name of the ServiceAccount the chart's + // hook pods run as. Today this is always the chart's default + // "ingestor". Customers who set `ingestionAuthz.serviceAccountName` + // to a non-default name in the parent client chart need to + // override via `tracebloc cluster info --ingestor-sa=` + // (and similarly for the future `dataset push` command). Reading + // the name from the cluster's ingestionAuthz ConfigMap so this + // flag becomes unnecessary is a v0.2 follow-up (see #7). + IngestorSAName string + + // IngestorImageDigest is the canonical digest the cluster's + // chart will spawn ingestor Jobs with. Comes from + // `INGESTOR_IMAGE_DIGEST` env on jobs-manager. Empty when the + // admin hasn't set images.ingestor.digest yet. + IngestorImageDigest string +} + +// DiscoverParentRelease finds the tracebloc parent client chart +// release in the given namespace by looking for a Deployment with +// the chart's hallmark labels. Returns a friendly error if none +// found, or if multiple candidates exist (so customers can pick). +// +// Why labels on a Deployment, not Helm's release secrets: +// +// - The release-secret path needs gzip + base64 + JSON parsing +// (helm v3's storage format), or pulls in helm.sh/helm/v3 as +// a dep (which transitively pulls in ~80MB of Go modules, +// including kube-runtime). +// - The Deployment is what the chart actually deploys; if it's +// not there, the chart isn't installed (or the install +// failed). Using its labels for discovery means "the +// discovered release is the running release" by construction. +// - The chart's labels are part of its public contract via +// `_helpers.tpl`; they're stable across minor versions. +// +// The chart's labels share `app.kubernetes.io/name=client` across +// EVERY resource it creates (mysql-client, jobs-manager, +// requests-proxy, etc.) — that's the helm convention where the +// chart's name is the label, not the component's. To distinguish +// jobs-manager from its siblings we filter by Deployment name +// suffix matching the chart's naming convention +// "-jobs-manager" (or plain "jobs-manager" for unprefixed +// installs). +func DiscoverParentRelease(ctx context.Context, cs kubernetes.Interface, namespace string) (*ParentRelease, error) { + deps, err := cs.AppsV1().Deployments(namespace).List(ctx, metav1.ListOptions{ + LabelSelector: "app.kubernetes.io/name=client,app.kubernetes.io/managed-by=Helm", + }) + if err != nil { + return nil, fmt.Errorf("listing chart-managed deployments in namespace %s: %w", namespace, err) + } + + // Filter to just the jobs-manager Deployment(s). The chart pins + // the name to either "-jobs-manager" or "jobs-manager" + // depending on chart version + values; either way it ends in + // "-jobs-manager" or equals "jobs-manager". + var jmDeps []appsv1.Deployment + for _, d := range deps.Items { + if d.Name == "jobs-manager" || strings.HasSuffix(d.Name, "-jobs-manager") { + jmDeps = append(jmDeps, d) + } + } + + switch len(jmDeps) { + case 0: + return nil, fmt.Errorf( + "no tracebloc parent client release found in namespace %q "+ + "(no chart-managed Deployment named *-jobs-manager). "+ + "Install with `helm install tracebloc/client --namespace %s` first, "+ + "or pass --namespace to point at the namespace where it's running.", + namespace, namespace, + ) + case 1: + // happy path + default: + names := make([]string, 0, len(jmDeps)) + for _, d := range jmDeps { + names = append(names, d.Name) + } + return nil, fmt.Errorf( + "found %d tracebloc parent releases in namespace %q (%s); "+ + "this CLI doesn't yet support disambiguating between multiple. "+ + "Pass --namespace to target a namespace with exactly one release.", + len(jmDeps), namespace, strings.Join(names, ", "), + ) + } + + d := jmDeps[0] + release := &ParentRelease{ + ReleaseName: d.Labels["app.kubernetes.io/instance"], + ChartVersion: chartVersionFromLabel(d.Labels["helm.sh/chart"]), + AppVersion: d.Labels["app.kubernetes.io/version"], + } + + // The Service shares the Deployment's release labels by + // convention. Construct the FQDN — we don't need to actually + // hit the API to find it; the chart's helper templates pin the + // service name to "-jobs-manager" (or just + // "jobs-manager" for older chart versions where the release + // prefix wasn't included). + // + // We probe both names to be liberal: if the chart pinned just + // "jobs-manager", that wins; otherwise fall back to the + // release-prefixed form. Customers can always override via the + // ingestor subchart's `jobsManager.endpoint` value. + svc := pickJobsManagerService(ctx, cs, namespace, release.ReleaseName) + release.JobsManagerService = fmt.Sprintf("http://%s.%s.svc.cluster.local:8080", svc, namespace) + + // Read INGESTOR_IMAGE_DIGEST from jobs-manager's pod-spec env. + // The chart pipes images.ingestor.digest through to here. + // + // SA name is NOT discovered today — the chart doesn't surface + // `ingestionAuthz.serviceAccountName` through the jobs-manager + // env, and reading the ingestionAuthz ConfigMap to learn it is a + // v0.2 follow-up (see #7). We default to "ingestor" (the chart + // default); customers who renamed it pass --ingestor-sa from + // the CLI. Bugbot caught the earlier version that incorrectly + // claimed to read the SA name from env. + release.IngestorSAName = "ingestor" + if len(d.Spec.Template.Spec.Containers) > 0 { + for _, env := range d.Spec.Template.Spec.Containers[0].Env { + if env.Name == "INGESTOR_IMAGE_DIGEST" { + release.IngestorImageDigest = env.Value + } + } + } + + return release, nil +} + +// pickJobsManagerService probes for the chart's jobs-manager +// Service. The chart's helper templates have used both names over +// chart history: +// +// - "jobs-manager" (chart 1.3.x and earlier) +// - "-jobs-manager" (some prefixing variants) +// +// We try the unprefixed name first because that's the dominant +// shipped behavior; fall back if it doesn't exist. Probe failures +// (timeouts, RBAC denials) fall through to the prefixed name too — +// the customer gets a clear DNS error later if neither resolves, +// which is more actionable than "couldn't enumerate services." +func pickJobsManagerService(ctx context.Context, cs kubernetes.Interface, namespace, release string) string { + candidates := []string{ + "jobs-manager", + release + "-jobs-manager", + } + for _, name := range candidates { + _, err := cs.CoreV1().Services(namespace).Get(ctx, name, metav1.GetOptions{}) + if err == nil { + return name + } + } + // Last-ditch: return the unprefixed candidate; the caller will + // hit a DNS error at POST time which surfaces with a clearer + // message than us guessing. + return "jobs-manager" +} + +// chartVersionFromLabel extracts "1.3.5" from helm.sh/chart="client-1.3.5". +// Labels in this format are standard Helm output; we strip the +// chart-name prefix to expose just the version. Returns the raw +// label if it doesn't match the expected pattern (defensive +// fallback for unusual chart-name formats). +func chartVersionFromLabel(label string) string { + // Expected: "-"; strip the "client-" prefix. + const prefix = "client-" + if strings.HasPrefix(label, prefix) { + return label[len(prefix):] + } + return label +} diff --git a/internal/cluster/discover_test.go b/internal/cluster/discover_test.go new file mode 100644 index 0000000..34f4433 --- /dev/null +++ b/internal/cluster/discover_test.go @@ -0,0 +1,222 @@ +package cluster + +import ( + "context" + "strings" + "testing" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/fake" +) + +// jobsManagerDeployment builds the minimal Deployment the chart +// creates for jobs-manager — labels mirror the chart's +// _helpers.tpl output as of client 1.3.5. Tests use this to seed +// the fake clientset with realistic data; if the chart's label +// contract ever changes, these tests fail loudly and we update both +// here and the discovery logic in lockstep. +// +// Note: app.kubernetes.io/name is "client" (the chart name) on +// EVERY resource the chart creates, not "jobs-manager". This is +// the helm convention. The discovery logic distinguishes +// jobs-manager from its mysql / requests-proxy siblings by name +// suffix matching ("-jobs-manager") since the chart-level labels +// don't disambiguate. +func jobsManagerDeployment(release, namespace, chartLabel, appVersion, digest string) *appsv1.Deployment { + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: release + "-jobs-manager", + Namespace: namespace, + Labels: map[string]string{ + "app.kubernetes.io/name": "client", + "app.kubernetes.io/instance": release, + "app.kubernetes.io/managed-by": "Helm", + "app.kubernetes.io/version": appVersion, + "helm.sh/chart": chartLabel, + }, + }, + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "api", + Env: []corev1.EnvVar{ + {Name: "INGESTOR_IMAGE_DIGEST", Value: digest}, + }, + }}, + }, + }, + }, + } +} + +func jobsManagerService(name, namespace string) *corev1.Service { + return &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + } +} + +func TestDiscoverParentRelease_HappyPath(t *testing.T) { + const ns = "tracebloc" + cs := fake.NewClientset( + jobsManagerDeployment("tracebloc", ns, + "client-1.3.5", "1.3.5", + "sha256:463e236748708a5e3564569eec9173ea8cb3bcf515992d4939c5b610f3807a4a"), + jobsManagerService("jobs-manager", ns), + ) + + release, err := DiscoverParentRelease(context.Background(), cs, ns) + if err != nil { + t.Fatalf("DiscoverParentRelease: %v", err) + } + + want := ParentRelease{ + ReleaseName: "tracebloc", + ChartVersion: "1.3.5", + AppVersion: "1.3.5", + JobsManagerService: "http://jobs-manager." + ns + ".svc.cluster.local:8080", + IngestorSAName: "ingestor", + IngestorImageDigest: "sha256:463e236748708a5e3564569eec9173ea8cb3bcf515992d4939c5b610f3807a4a", + } + if *release != want { + t.Errorf("mismatch.\ngot: %+v\nwant: %+v", *release, want) + } +} + +func TestDiscoverParentRelease_NoReleaseFound(t *testing.T) { + cs := fake.NewClientset() // empty cluster + + _, err := DiscoverParentRelease(context.Background(), cs, "tracebloc") + if err == nil { + t.Fatal("expected error when no jobs-manager deployment exists") + } + // The error message has to be customer-actionable. Pin the + // key remediation phrase so a future refactor that loses it + // (or worse, replaces it with a stack trace) fails this test. + for _, want := range []string{"no tracebloc parent client release found", "helm install"} { + if !strings.Contains(err.Error(), want) { + t.Errorf("expected error to mention %q, got: %s", want, err) + } + } +} + +func TestDiscoverParentRelease_MultipleReleases(t *testing.T) { + const ns = "tracebloc" + cs := fake.NewClientset( + jobsManagerDeployment("rel-a", ns, "client-1.3.5", "1.3.5", ""), + jobsManagerDeployment("rel-b", ns, "client-1.3.4", "1.3.4", ""), + ) + + _, err := DiscoverParentRelease(context.Background(), cs, ns) + if err == nil { + t.Fatal("expected error when multiple releases exist") + } + // Customer needs to see which releases the CLI found so they + // can pick (or clean up). + for _, want := range []string{"found 2", "rel-a", "rel-b"} { + if !strings.Contains(err.Error(), want) { + t.Errorf("expected error to mention %q, got: %s", want, err) + } + } +} + +// Regression for the real-cluster discovery bug. Pre-fix, the +// selector was `app.kubernetes.io/name=jobs-manager` which never +// matches because the chart's convention is `name=` +// — so the real-cluster smoke test returned "no parent release +// found" even though one was clearly running. The fix is two-part: +// match on `name=client` (the chart name) AND filter the result +// set by Deployment-name suffix to pick out jobs-manager from its +// mysql/requests-proxy siblings, all of which share the +// chart-level labels. +func TestDiscoverParentRelease_FiltersOutSiblingDeployments(t *testing.T) { + const ns = "tracebloc" + // All three deployments share the chart-level labels — only + // the name suffix distinguishes them. + mysqlClient := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mysql-client", + Namespace: ns, + Labels: map[string]string{ + "app.kubernetes.io/name": "client", + "app.kubernetes.io/instance": "tracebloc", + "app.kubernetes.io/managed-by": "Helm", + "helm.sh/chart": "client-1.3.5", + }, + }, + } + requestsProxy := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "tracebloc-requests-proxy", + Namespace: ns, + Labels: map[string]string{ + "app.kubernetes.io/name": "client", + "app.kubernetes.io/instance": "tracebloc", + "app.kubernetes.io/managed-by": "Helm", + "helm.sh/chart": "client-1.3.5", + }, + }, + } + + cs := fake.NewClientset( + mysqlClient, + requestsProxy, + jobsManagerDeployment("tracebloc", ns, "client-1.3.5", "1.3.5", ""), + jobsManagerService("jobs-manager", ns), + ) + + release, err := DiscoverParentRelease(context.Background(), cs, ns) + if err != nil { + t.Fatalf("DiscoverParentRelease: %v", err) + } + // Should pick jobs-manager, NOT mysql-client or requests-proxy. + if release.ReleaseName != "tracebloc" { + t.Errorf("ReleaseName = %q, want %q (siblings should be filtered out by name suffix)", + release.ReleaseName, "tracebloc") + } +} + +func TestDiscoverParentRelease_FallsBackToReleasePrefixedService(t *testing.T) { + // Some older chart versions exposed the Service as + // "-jobs-manager" rather than the unprefixed + // "jobs-manager". The discover code probes both names and + // picks whichever resolves. This test seeds ONLY the + // release-prefixed Service and asserts the FQDN reflects it. + const ns = "tracebloc" + cs := fake.NewClientset( + jobsManagerDeployment("custom", ns, "client-1.3.5", "1.3.5", ""), + jobsManagerService("custom-jobs-manager", ns), + // NOTE: no "jobs-manager" service + ) + + release, err := DiscoverParentRelease(context.Background(), cs, ns) + if err != nil { + t.Fatalf("DiscoverParentRelease: %v", err) + } + wantFQDN := "http://custom-jobs-manager." + ns + ".svc.cluster.local:8080" + if release.JobsManagerService != wantFQDN { + t.Errorf("JobsManagerService = %q, want %q", release.JobsManagerService, wantFQDN) + } +} + +func TestChartVersionFromLabel(t *testing.T) { + cases := map[string]string{ + "client-1.3.5": "1.3.5", + "client-2.0.0-rc": "2.0.0-rc", + "client": "client", // unexpected shape — return as-is + "other-1.0.0": "other-1.0.0", // non-client chart — return as-is + "": "", + } + for in, want := range cases { + t.Run(in, func(t *testing.T) { + if got := chartVersionFromLabel(in); got != want { + t.Errorf("chartVersionFromLabel(%q) = %q, want %q", in, got, want) + } + }) + } +} diff --git a/internal/cluster/kubeconfig.go b/internal/cluster/kubeconfig.go new file mode 100644 index 0000000..2a94380 --- /dev/null +++ b/internal/cluster/kubeconfig.go @@ -0,0 +1,164 @@ +// Package cluster handles everything the CLI needs to talk to a +// Kubernetes cluster running the tracebloc parent client chart: +// +// - Locating the customer's kubeconfig + picking a context/namespace +// - Discovering the parent release (jobs-manager Deployment + its +// labels) +// - Minting an ingestor ServiceAccount token via TokenRequest +// +// Everything in this package is exercise-able without a real cluster +// thanks to client-go's fake clientset; see *_test.go. +package cluster + +import ( + "fmt" + "os" + "path/filepath" + + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" +) + +// KubeconfigOptions captures every knob the customer can turn when +// telling the CLI which cluster + namespace to talk to. Defaults +// mirror kubectl: $KUBECONFIG > ~/.kube/config; current-context if +// --context unset; the context's default namespace if --namespace +// unset; "default" if even the context doesn't pin one. +// +// All fields are zero-value-safe — an empty Options{} produces the +// same behaviour as kubectl run with no flags. +type KubeconfigOptions struct { + // Path overrides $KUBECONFIG and ~/.kube/config when set. + // Honors path expansion (e.g. "~/.kube/config" → absolute). + Path string + + // Context picks a non-current-context from the loaded + // kubeconfig. Empty = use whatever current-context points at. + Context string + + // Namespace overrides the context's default. Empty = pull from + // the context's namespace, falling back to "default". + Namespace string +} + +// ResolvedConfig is what Load() returns: a usable rest.Config plus +// the source-of-truth metadata that the rest of the CLI needs (the +// namespace it resolved to, the context name it used). The +// metadata is separate from rest.Config because rest.Config doesn't +// carry the context name out of the box — it's discarded after the +// merge — and we want to show the customer "you're talking to +// context X" in diagnostic output. +type ResolvedConfig struct { + RestConfig *rest.Config + Context string + Namespace string + + // ServerURL is the cluster's API server URL (the value from + // the context's cluster.server field). Exposed for `tracebloc + // cluster info` to show "you're targeting " so the + // customer can sanity-check before doing anything destructive. + ServerURL string +} + +// Load resolves the kubeconfig + flags into a rest.Config ready for +// kubernetes.NewForConfig(). It does NOT make any network calls — +// pure parsing of local files + env. Network errors only happen +// when the caller subsequently uses the returned RestConfig. +func Load(opts KubeconfigOptions) (*ResolvedConfig, error) { + // NewDefaultClientConfigLoadingRules() wires up the full + // kubectl-style lookup chain: ExplicitPath > $KUBECONFIG + // (multi-file merging) > ~/.kube/config. Constructing + // ClientConfigLoadingRules{} directly with an empty + // ExplicitPath does NOT fall back to those defaults — it + // just refuses to load anything ("no configuration has been + // provided" was the symptom). Surfaced during the first + // real-cluster smoke test of `tracebloc cluster info`. + loadingRules := clientcmd.NewDefaultClientConfigLoadingRules() + if explicit := expandPath(opts.Path); explicit != "" { + // Setting ExplicitPath bypasses the env-var merge + // (intentional — when a customer says "use THIS file", + // we don't mix it with their env-var-listed others). + loadingRules.ExplicitPath = explicit + } + + overrides := &clientcmd.ConfigOverrides{} + if opts.Context != "" { + overrides.CurrentContext = opts.Context + } + if opts.Namespace != "" { + // Setting Context.Namespace overrides the kubeconfig's + // per-context default. This matches `kubectl --namespace` + // semantics. + overrides.Context.Namespace = opts.Namespace + } + + loader := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(loadingRules, overrides) + + restConfig, err := loader.ClientConfig() + if err != nil { + return nil, fmt.Errorf("building rest config from kubeconfig: %w", err) + } + + // Pull the namespace the way kubectl does: explicit override + // wins; otherwise the context's namespace; otherwise "default". + // loader.Namespace() handles that fallback chain. + ns, _, err := loader.Namespace() + if err != nil { + return nil, fmt.Errorf("resolving namespace from kubeconfig: %w", err) + } + + // rest.Config doesn't carry the context name out of the box — + // fish it back out of the raw kubeconfig so diagnostics can + // show it. RawConfig() does a fresh read each call; cache if + // this ever becomes hot (it isn't). + rawCfg, err := loader.RawConfig() + if err != nil { + return nil, fmt.Errorf("reading raw kubeconfig: %w", err) + } + contextName := overrides.CurrentContext + if contextName == "" { + contextName = rawCfg.CurrentContext + } + + return &ResolvedConfig{ + RestConfig: restConfig, + Context: contextName, + Namespace: ns, + ServerURL: restConfig.Host, + }, nil +} + +// NewClientset is a tiny wrapper that constructs a kubernetes +// clientset from a ResolvedConfig. Exists so callers don't have to +// import k8s.io/client-go/kubernetes themselves for the common case +// of "I have a ResolvedConfig, I want a clientset." +func NewClientset(rc *ResolvedConfig) (kubernetes.Interface, error) { + cs, err := kubernetes.NewForConfig(rc.RestConfig) + if err != nil { + return nil, fmt.Errorf("constructing kubernetes clientset: %w", err) + } + return cs, nil +} + +// expandPath handles `~/.kube/config` → `/home/user/.kube/config` +// since clientcmd's ExplicitPath wants an absolute path. Empty +// strings pass through unchanged (they signal "use defaults" to +// clientcmd). +func expandPath(p string) string { + if p == "" { + return "" + } + if p[0] != '~' { + return p + } + home, err := os.UserHomeDir() + if err != nil { + // Best-effort: return the unexpanded path and let + // clientcmd's error surface mention it. Failing here would + // hide the more useful "tried to read ~/.kube/config and + // got X" error downstream. + return p + } + return filepath.Join(home, p[1:]) +} diff --git a/internal/cluster/kubeconfig_test.go b/internal/cluster/kubeconfig_test.go new file mode 100644 index 0000000..39709e2 --- /dev/null +++ b/internal/cluster/kubeconfig_test.go @@ -0,0 +1,84 @@ +package cluster + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +func TestExpandPath(t *testing.T) { + home, err := os.UserHomeDir() + if err != nil { + t.Skipf("os.UserHomeDir failed in this environment, skipping: %v", err) + } + + cases := []struct { + in string + wantFunc func(string) bool + }{ + { + in: "", + wantFunc: func(got string) bool { return got == "" }, + }, + { + in: "/absolute/path/to/kubeconfig", + wantFunc: func(got string) bool { return got == "/absolute/path/to/kubeconfig" }, + }, + { + in: "relative/path/kubeconfig", + wantFunc: func(got string) bool { return got == "relative/path/kubeconfig" }, + }, + { + in: "~/.kube/config", + wantFunc: func(got string) bool { + want := filepath.Join(home, ".kube/config") + return got == want + }, + }, + { + in: "~/some/file", + wantFunc: func(got string) bool { + want := filepath.Join(home, "some/file") + return got == want + }, + }, + } + + for _, c := range cases { + t.Run(c.in, func(t *testing.T) { + got := expandPath(c.in) + if !c.wantFunc(got) { + t.Errorf("expandPath(%q) = %q (failed predicate)", c.in, got) + } + }) + } +} + +// Load() involves real filesystem + clientcmd parsing; it's covered +// indirectly by the cluster-info integration path. A unit test +// would require writing a temp kubeconfig and feeding it back — +// useful but lower-priority than the discover/token tests since +// the clientcmd library is well-trodden. Pin the smallest contract: +// an empty Options{} doesn't panic. +func TestLoad_EmptyOptionsDoesNotPanic(t *testing.T) { + // May succeed or fail depending on whether the test runner has + // a kubeconfig available; either is fine. The point of this + // test is to ensure the function returns rather than crashes. + _, err := Load(KubeconfigOptions{}) + if err != nil && !looksLikeKubeconfigError(err.Error()) { + // Defense-in-depth: if Load() ever starts wrapping + // errors in a way that loses the "kubeconfig" context, + // flag it so the customer-facing diagnostic stays clear. + t.Logf("Load() failed with unfamiliar error: %v", err) + } +} + +func looksLikeKubeconfigError(s string) bool { + for _, marker := range []string{"kubeconfig", "config", "context", "namespace"} { + if strings.Contains(strings.ToLower(s), marker) { + return true + } + } + return false +} diff --git a/internal/cluster/token.go b/internal/cluster/token.go new file mode 100644 index 0000000..0c9a962 --- /dev/null +++ b/internal/cluster/token.go @@ -0,0 +1,214 @@ +package cluster + +import ( + "context" + "fmt" + + authenticationv1 "k8s.io/api/authentication/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" +) + +// IngestorToken is what the customer (well, the CLI) authenticates +// to jobs-manager with. Bearer-token over HTTPS to +// /internal/submit-ingestion-run. The token is short-lived (10 +// minutes default) so it can be regenerated per `tracebloc dataset +// push` invocation without long-term-credential concerns. +type IngestorToken struct { + // Token is the raw bearer token string. Treat as sensitive; + // don't log it. Diagnostics print SHA256(Token)[:8] instead. + Token string + + // ExpirationSeconds matches the request — actual server-side + // expiration may be capped by cluster policy + // (--service-account-max-token-expiration on kube-apiserver). + // We don't try to parse the JWT to read its `exp` claim + // because the customer's diagnostic ("token expires in ~N + // min") is accurate enough using the requested value. + ExpirationSeconds int64 + + // Source records how the token was obtained — TokenRequest + // (the modern path) or a static secret (the fallback for + // clusters where the user can't call TokenRequest). Surfaced + // in `tracebloc cluster info` so the customer can see which + // path their RBAC allowed. + Source TokenSource +} + +// TokenSource enumerates the two ways the CLI can obtain an +// ingestor SA token. Adding a third (e.g. a customer-managed +// long-lived secret reference) is a v0.2 extension. +type TokenSource int + +const ( + // TokenSourceTokenRequest means we called the + // /serviceaccounts/{name}/token subresource. The modern, + // rotation-friendly path. Requires `create` permission on + // "serviceaccounts/token" for the user's kubeconfig. + TokenSourceTokenRequest TokenSource = iota + + // TokenSourceStaticSecret means we fell back to reading a + // pre-existing Secret of type + // kubernetes.io/service-account-token that references the + // ingestor SA. Older k8s (pre-1.24 default) auto-created + // these; on 1.24+ admins must create them by hand. Tokens + // from this source are long-lived (no expiration), which is + // less ideal but unblocks customers whose RBAC denies + // TokenRequest. + TokenSourceStaticSecret +) + +func (s TokenSource) String() string { + switch s { + case TokenSourceTokenRequest: + return "TokenRequest" + case TokenSourceStaticSecret: + return "static secret" + default: + return "unknown" + } +} + +// MintIngestorToken obtains a bearer token for the ingestor +// ServiceAccount in the given namespace. Tries TokenRequest first; +// if that's denied (RBAC) or unavailable (older API server) falls +// back to looking for a pre-existing service-account-token Secret. +// Returns an error with a clear remediation message if neither +// works. +// +// audiences may be nil/empty for the default API-server audience. +// Today jobs-manager's TokenReview-based validation accepts the +// default audience, so callers usually pass nil; future audiences +// (e.g. a forthcoming jobs-manager-specific audience) plug in here +// without API breakage. +func MintIngestorToken( + ctx context.Context, + cs kubernetes.Interface, + namespace, saName string, + expirationSeconds int64, + audiences []string, +) (*IngestorToken, error) { + // Try TokenRequest first. + req := &authenticationv1.TokenRequest{ + Spec: authenticationv1.TokenRequestSpec{ + ExpirationSeconds: &expirationSeconds, + Audiences: audiences, + }, + } + tr, err := cs.CoreV1().ServiceAccounts(namespace). + CreateToken(ctx, saName, req, metav1.CreateOptions{}) + + if err == nil { + return &IngestorToken{ + Token: tr.Status.Token, + ExpirationSeconds: expirationSeconds, + Source: TokenSourceTokenRequest, + }, nil + } + + // TokenRequest didn't work. Distinguish the cases we can fall + // back from (permission denied / not-found / unsupported) + // from cases we can't (e.g. context cancelled, network down). + if !isTokenRequestRecoverable(err) { + return nil, fmt.Errorf( + "minting token for ServiceAccount %s/%s via TokenRequest: %w", + namespace, saName, err, + ) + } + + // Fallback: look for a Secret of type + // kubernetes.io/service-account-token in the same namespace + // whose annotations point at our SA. + tok, fallbackErr := findStaticTokenSecret(ctx, cs, namespace, saName) + if fallbackErr != nil { + // Surface BOTH the original TokenRequest failure and the + // fallback failure — they give the customer different + // remediation options. + return nil, fmt.Errorf( + "no usable ingestor token. TokenRequest failed: %v. "+ + "Fallback to static secret also failed: %w. "+ + "Remediation: either grant your user the `create` verb on "+ + "`serviceaccounts/token` (RBAC), or have an admin create a "+ + "long-lived Secret of type kubernetes.io/service-account-token "+ + "that references the %s ServiceAccount in namespace %s.", + err, fallbackErr, saName, namespace, + ) + } + return tok, nil +} + +// isTokenRequestRecoverable reports whether a failed TokenRequest +// call is one we can fall back from (permission denied, API not +// available, SA missing) vs one we can't (context cancelled, +// network errors, etc.). +// +// Falling back on a network error would mask the network problem; +// the customer would see a less useful "static secret not found" +// instead of "couldn't reach the API server." +// +// Defers to k8s.io/apimachinery/pkg/api/errors for the classification +// rather than re-implementing the HTTP-status checks. The stdlib +// helpers key off Status.Reason (the typed enum) — that's the +// canonical interpretation, and our test file already constructs its +// fake errors via apierrors.NewForbidden, which sets both Code and +// Reason. Earlier versions of this file rolled their own helpers +// that read Status.Code numerically; Bugbot correctly flagged the +// divergence as a future-bug-waiting-to-happen for non-standard +// status errors. +func isTokenRequestRecoverable(err error) bool { + if err == nil { + return false + } + // Permission denied (StatusForbidden), SA missing + // (StatusNotFound), API not available on this cluster — typically + // pre-1.22 (StatusMethodNotAllowed/MethodNotSupported). + return apierrors.IsForbidden(err) || + apierrors.IsNotFound(err) || + apierrors.IsMethodNotSupported(err) +} + +// findStaticTokenSecret looks for a long-lived +// service-account-token Secret bound to the given SA. The legacy +// pre-k8s-1.24 way of authenticating an SA: kubectl auto-created +// these on SA creation. On modern clusters admins create them by +// hand when needed. +func findStaticTokenSecret(ctx context.Context, cs kubernetes.Interface, namespace, saName string) (*IngestorToken, error) { + secrets, err := cs.CoreV1().Secrets(namespace).List(ctx, metav1.ListOptions{ + FieldSelector: "type=kubernetes.io/service-account-token", + }) + if err != nil { + // If the user can't even list Secrets, surface that + // directly — they may be missing more RBAC than just + // TokenRequest, and "static secret fallback failed" is + // less informative than the underlying error. + return nil, fmt.Errorf("listing service-account-token secrets in %s: %w", namespace, err) + } + + for _, s := range secrets.Items { + // The Secret is bound to an SA via the + // `kubernetes.io/service-account.name` annotation. (The + // `.uid` annotation pins it to that exact SA instance — + // we don't check that because we want any historically + // created token-secret for the named SA to work.) + if s.Annotations[corev1.ServiceAccountNameKey] != saName { + continue + } + token, ok := s.Data["token"] + if !ok || len(token) == 0 { + continue // empty token field — skip + } + return &IngestorToken{ + Token: string(token), + ExpirationSeconds: 0, // static secrets don't expire client-side + Source: TokenSourceStaticSecret, + }, nil + } + + return nil, fmt.Errorf( + "no Secret of type kubernetes.io/service-account-token bound to "+ + "ServiceAccount %s found in namespace %s", + saName, namespace, + ) +} diff --git a/internal/cluster/token_test.go b/internal/cluster/token_test.go new file mode 100644 index 0000000..64e1c60 --- /dev/null +++ b/internal/cluster/token_test.go @@ -0,0 +1,186 @@ +package cluster + +import ( + "context" + "errors" + "strings" + "testing" + + authenticationv1 "k8s.io/api/authentication/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes/fake" + k8stesting "k8s.io/client-go/testing" +) + +func TestMintIngestorToken_TokenRequest_HappyPath(t *testing.T) { + const ns = "tracebloc" + cs := fake.NewClientset(&corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{Name: "ingestor", Namespace: ns}, + }) + + // The fake clientset doesn't implement the TokenRequest + // subresource by default — we have to inject a reactor. This + // mirrors what the real API server does: returns a stamped + // Status.Token. + cs.PrependReactor("create", "serviceaccounts", + func(action k8stesting.Action) (handled bool, ret runtime.Object, err error) { + ca, ok := action.(k8stesting.CreateAction) + if !ok { + return false, nil, nil + } + if ca.GetSubresource() != "token" { + return false, nil, nil + } + tr := ca.GetObject().(*authenticationv1.TokenRequest) + tr.Status.Token = "fake-token-deadbeef" + return true, tr, nil + }) + + tok, err := MintIngestorToken(context.Background(), cs, ns, "ingestor", 600, nil) + if err != nil { + t.Fatalf("MintIngestorToken: %v", err) + } + if tok.Token != "fake-token-deadbeef" { + t.Errorf("Token = %q, want %q", tok.Token, "fake-token-deadbeef") + } + if tok.Source != TokenSourceTokenRequest { + t.Errorf("Source = %v, want TokenSourceTokenRequest", tok.Source) + } + if tok.ExpirationSeconds != 600 { + t.Errorf("ExpirationSeconds = %d, want 600", tok.ExpirationSeconds) + } +} + +func TestMintIngestorToken_FallsBackToStaticSecret(t *testing.T) { + const ns = "tracebloc" + + // Pre-seed a service-account-token Secret bound to the + // ingestor SA. This is what older clusters auto-created on SA + // creation; modern clusters require admins to create it by + // hand. + staticSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ingestor-token-abc123", + Namespace: ns, + Annotations: map[string]string{ + corev1.ServiceAccountNameKey: "ingestor", + }, + }, + Type: corev1.SecretTypeServiceAccountToken, + Data: map[string][]byte{ + "token": []byte("static-token-cafebabe"), + }, + } + cs := fake.NewClientset( + &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{Name: "ingestor", Namespace: ns}, + }, + staticSecret, + ) + + // Make TokenRequest fail with a recoverable error + // (Forbidden = the user lacks RBAC, which is the dominant + // reason customers fall through to the static-secret path). + cs.PrependReactor("create", "serviceaccounts", + func(action k8stesting.Action) (handled bool, ret runtime.Object, err error) { + if ca, ok := action.(k8stesting.CreateAction); ok && ca.GetSubresource() == "token" { + return true, nil, apierrors.NewForbidden( + corev1.Resource("serviceaccounts/token"), + "ingestor", + errors.New("user cannot create serviceaccounts/token"), + ) + } + return false, nil, nil + }) + + tok, err := MintIngestorToken(context.Background(), cs, ns, "ingestor", 600, nil) + if err != nil { + t.Fatalf("MintIngestorToken: %v", err) + } + if tok.Token != "static-token-cafebabe" { + t.Errorf("Token = %q, want fallback static-token-cafebabe", tok.Token) + } + if tok.Source != TokenSourceStaticSecret { + t.Errorf("Source = %v, want TokenSourceStaticSecret", tok.Source) + } + if tok.ExpirationSeconds != 0 { + t.Errorf("ExpirationSeconds = %d, want 0 (static secrets don't expire client-side)", tok.ExpirationSeconds) + } +} + +func TestMintIngestorToken_NoFallbackOnUnrecoverableError(t *testing.T) { + // Network errors (or any non-403/404/405 error) shouldn't + // trigger the static-secret fallback — they'd mask the real + // underlying problem. Pin that behavior. + const ns = "tracebloc" + cs := fake.NewClientset() + + cs.PrependReactor("create", "serviceaccounts", + func(action k8stesting.Action) (handled bool, ret runtime.Object, err error) { + if ca, ok := action.(k8stesting.CreateAction); ok && ca.GetSubresource() == "token" { + return true, nil, errors.New("simulated network failure: i/o timeout") + } + return false, nil, nil + }) + + _, err := MintIngestorToken(context.Background(), cs, ns, "ingestor", 600, nil) + if err == nil { + t.Fatal("expected non-recoverable error to propagate, got nil") + } + if !strings.Contains(err.Error(), "i/o timeout") { + t.Errorf("expected the network-error message to surface, got: %v", err) + } +} + +func TestMintIngestorToken_BothPathsFailWithRemediation(t *testing.T) { + // Worst case: TokenRequest forbidden AND no static secret. The + // error must surface both failures plus an actionable + // remediation hint. + const ns = "tracebloc" + cs := fake.NewClientset(&corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{Name: "ingestor", Namespace: ns}, + }) + + cs.PrependReactor("create", "serviceaccounts", + func(action k8stesting.Action) (handled bool, ret runtime.Object, err error) { + if ca, ok := action.(k8stesting.CreateAction); ok && ca.GetSubresource() == "token" { + return true, nil, apierrors.NewForbidden( + corev1.Resource("serviceaccounts/token"), + "ingestor", + errors.New("forbidden"), + ) + } + return false, nil, nil + }) + + _, err := MintIngestorToken(context.Background(), cs, ns, "ingestor", 600, nil) + if err == nil { + t.Fatal("expected error when both TokenRequest + static fallback fail") + } + for _, want := range []string{ + "TokenRequest failed", + "static secret also failed", + "Remediation", + "create", // remediation mentions granting the verb + } { + if !strings.Contains(err.Error(), want) { + t.Errorf("expected combined error to mention %q, got: %s", want, err) + } + } +} + +func TestTokenSource_String(t *testing.T) { + cases := map[TokenSource]string{ + TokenSourceTokenRequest: "TokenRequest", + TokenSourceStaticSecret: "static secret", + TokenSource(99): "unknown", + } + for s, want := range cases { + if got := s.String(); got != want { + t.Errorf("TokenSource(%d).String() = %q, want %q", s, got, want) + } + } +}