Skip to content

Commit 698475e

Browse files
authored
wazero: fix guard shutdown leak, logging namespace, and typed trap detection (#3790)
Three actionable improvements to the wazero WASM guard integration identified in a Go module review: WASM runtimes were never closed on shutdown, `registry.go` was emitting logs under the wrong namespace (`guard:context` instead of `guard:registry`), and trap detection relied on fragile string matching. ## Changes ### `Registry.Close()` + shutdown wiring - Added `Close(ctx context.Context)` to `Registry` — iterates guards and calls `Close(ctx)` on those that implement it - Called from both `UnifiedServer.Close()` and `InitiateShutdown()` (nil-safe) to release WASM JIT runtime resources on shutdown - Previously `WithCloseOnContextDone(true)` was effectively inert since guards were created with `context.Background()` ### Fix logging namespace in `registry.go` - `registry.go` was calling `log.Printf(...)` which resolved to the package-level `log` var declared in `context.go` with namespace `"guard:context"` — meaning `DEBUG=guard:registry` silently dropped those messages - Replaced with `logger.LogInfo("guard", ...)` for operational events; retained `debugLog.Printf` for debug-only paths - Removed a duplicate debug log line introduced in the fix ### Typed `sys.ExitError` check in `isWasmTrap` - Old implementation: `strings.Contains(err.Error(), "wasm error:")` — fragile against wazero message format changes, and would incorrectly poison a guard on a clean `exit(0)` (e.g. TinyGo init) - New implementation checks `*sys.ExitError` via `errors.As` first; only non-zero exit codes are treated as traps: ```go func isWasmTrap(err error) bool { if err == nil { return false } var exitErr *sys.ExitError if errors.As(err, &exitErr) { return exitErr.ExitCode() != 0 } return strings.Contains(err.Error(), "wasm error:") } ``` > [!WARNING] > > <details> > <summary>Firewall rules blocked me from connecting to one or more addresses (expand for details)</summary> > > #### I tried to connect to the following addresses, but was blocked by firewall rules: > > - `example.com` > - Triggering command: `/tmp/go-build900194802/b514/launcher.test /tmp/go-build900194802/b514/launcher.test -test.testlogfile=/tmp/go-build900194802/b514/testlog.txt -test.paniconexit0 -test.timeout=10m0s -w o4Cr-wAQW cfg x_amd64/vet -c -I /tmp/go-build185-bool x_amd64/vet 1085�� elemetry.io/otel-errorsas cfg x_amd64/vet --gdwarf-5 ions =0 x_amd64/vet` (dns block) > - `invalid-host-that-does-not-exist-12345.com` > - Triggering command: `/tmp/go-build900194802/b496/config.test /tmp/go-build900194802/b496/config.test -test.testlogfile=/tmp/go-build900194802/b496/testlog.txt -test.paniconexit0 -test.timeout=10m0s /tmp/go-build900194802/b389/vet.cfg g_.a ache/go/1.25.8/x64/src/bufio/bufio.go x_amd64/vet --gdwarf-5 1085977/b140/_cg-atomic -o x_amd64/vet -W _.a /tmp/go-build185-ifaceassert x_amd64/vet . .io/otel/attribu-atomic --64 x_amd64/vet` (dns block) > - `nonexistent.local` > - Triggering command: `/tmp/go-build900194802/b514/launcher.test /tmp/go-build900194802/b514/launcher.test -test.testlogfile=/tmp/go-build900194802/b514/testlog.txt -test.paniconexit0 -test.timeout=10m0s -w o4Cr-wAQW cfg x_amd64/vet -c -I /tmp/go-build185-bool x_amd64/vet 1085�� elemetry.io/otel-errorsas cfg x_amd64/vet --gdwarf-5 ions =0 x_amd64/vet` (dns block) > - `slow.example.com` > - Triggering command: `/tmp/go-build900194802/b514/launcher.test /tmp/go-build900194802/b514/launcher.test -test.testlogfile=/tmp/go-build900194802/b514/testlog.txt -test.paniconexit0 -test.timeout=10m0s -w o4Cr-wAQW cfg x_amd64/vet -c -I /tmp/go-build185-bool x_amd64/vet 1085�� elemetry.io/otel-errorsas cfg x_amd64/vet --gdwarf-5 ions =0 x_amd64/vet` (dns block) > - `this-host-does-not-exist-12345.com` > - Triggering command: `/tmp/go-build900194802/b523/mcp.test /tmp/go-build900194802/b523/mcp.test -test.testlogfile=/tmp/go-build900194802/b523/testlog.txt -test.paniconexit0 -test.timeout=10m0s -o 1085977/b432/_pkg_.a -trimpath x_amd64/vet us.pb.go t/unicode/bidi -lang=go1.25 x_amd64/vet` (dns block) > > If you need me to access, download, or install something from one of these locations, you can either: > > - Configure [Actions setup steps](https://gh.io/copilot/actions-setup-steps) to set up my environment, which run before the firewall is enabled > - Add the appropriate URLs or hosts to the custom allowlist in this repository's [Copilot coding agent settings](https://github.com/github/gh-aw-mcpg/settings/copilot/coding_agent) (admins only) > > </details>
2 parents 73b27e4 + 13ddd06 commit 698475e

5 files changed

Lines changed: 153 additions & 8 deletions

File tree

internal/guard/guard_test.go

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package guard
22

33
import (
44
"context"
5+
"errors"
56
"sync"
67
"testing"
78

@@ -17,6 +18,20 @@ type mockGuard struct {
1718
id string
1819
}
1920

21+
// mockClosableGuard is a guard that tracks whether Close was called
22+
type mockClosableGuard struct {
23+
mockGuard
24+
closed bool
25+
closeCount int
26+
closeErr error
27+
}
28+
29+
func (m *mockClosableGuard) Close(ctx context.Context) error {
30+
m.closed = true
31+
m.closeCount++
32+
return m.closeErr
33+
}
34+
2035
func (m *mockGuard) Name() string { return "mock-" + m.id }
2136
func (m *mockGuard) LabelAgent(ctx context.Context, policy interface{}, backend BackendCaller, caps *difc.Capabilities) (*LabelAgentResult, error) {
2237
return &LabelAgentResult{DIFCMode: difc.ModeStrict}, nil
@@ -450,6 +465,70 @@ func TestGuardRegistry_HasNonNoopGuard(t *testing.T) {
450465
})
451466
}
452467

468+
func TestGuardRegistry_Close(t *testing.T) {
469+
t.Run("close calls Close on guards that implement it", func(t *testing.T) {
470+
registry := NewRegistry()
471+
g := &mockClosableGuard{mockGuard: mockGuard{id: "wasm"}}
472+
registry.Register("server1", g)
473+
474+
registry.Close(context.Background())
475+
476+
assert.True(t, g.closed, "expected guard Close to be called")
477+
})
478+
479+
t.Run("close skips guards that do not implement Close", func(t *testing.T) {
480+
registry := NewRegistry()
481+
registry.Register("server1", NewNoopGuard())
482+
483+
// Should not panic
484+
registry.Close(context.Background())
485+
})
486+
487+
t.Run("close on empty registry is safe", func(t *testing.T) {
488+
registry := NewRegistry()
489+
// Should not panic
490+
registry.Close(context.Background())
491+
})
492+
493+
t.Run("close calls Close on all closable guards", func(t *testing.T) {
494+
registry := NewRegistry()
495+
g1 := &mockClosableGuard{mockGuard: mockGuard{id: "wasm1"}}
496+
g2 := &mockClosableGuard{mockGuard: mockGuard{id: "wasm2"}}
497+
registry.Register("server1", g1)
498+
registry.Register("server2", g2)
499+
500+
registry.Close(context.Background())
501+
502+
assert.True(t, g1.closed, "expected guard 1 Close to be called")
503+
assert.True(t, g2.closed, "expected guard 2 Close to be called")
504+
})
505+
506+
t.Run("close continues when one guard returns an error", func(t *testing.T) {
507+
registry := NewRegistry()
508+
g1 := &mockClosableGuard{mockGuard: mockGuard{id: "failing"}, closeErr: errors.New("close failed")}
509+
g2 := &mockClosableGuard{mockGuard: mockGuard{id: "ok"}}
510+
registry.Register("server1", g1)
511+
registry.Register("server2", g2)
512+
513+
// Should not panic even when one guard returns an error
514+
registry.Close(context.Background())
515+
516+
assert.True(t, g1.closed, "expected failing guard Close to be called")
517+
assert.True(t, g2.closed, "expected ok guard Close to be called")
518+
})
519+
520+
t.Run("double close is safe", func(t *testing.T) {
521+
registry := NewRegistry()
522+
g := &mockClosableGuard{mockGuard: mockGuard{id: "wasm"}}
523+
registry.Register("server1", g)
524+
525+
registry.Close(context.Background())
526+
registry.Close(context.Background())
527+
528+
assert.Equal(t, 2, g.closeCount, "Close should be called twice without panic")
529+
})
530+
}
531+
453532
func TestCreateGuard(t *testing.T) {
454533
tests := []struct {
455534
name string

internal/guard/registry.go

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package guard
22

33
import (
4+
"context"
45
"fmt"
56
"sync"
67

@@ -30,7 +31,7 @@ func (r *Registry) Register(serverID string, guard Guard) {
3031
defer r.mu.Unlock()
3132

3233
r.guards[serverID] = guard
33-
log.Printf("[Guard] Registered guard '%s' for server '%s'", guard.Name(), serverID)
34+
logger.LogInfo("guard", "Registered guard '%s' for server '%s'", guard.Name(), serverID)
3435
}
3536

3637
// Get retrieves the guard for a server, or returns a noop guard if not found
@@ -46,7 +47,6 @@ func (r *Registry) Get(serverID string) Guard {
4647

4748
// Return noop guard as default
4849
debugLog.Printf("No guard registered for serverID=%s, returning noop guard", serverID)
49-
log.Printf("[Guard] No guard registered for server '%s', using noop guard", serverID)
5050
return NewNoopGuard()
5151
}
5252

@@ -76,7 +76,7 @@ func (r *Registry) Remove(serverID string) {
7676
r.mu.Lock()
7777
defer r.mu.Unlock()
7878
delete(r.guards, serverID)
79-
log.Printf("[Guard] Removed guard for server '%s'", serverID)
79+
logger.LogInfo("guard", "Removed guard for server '%s'", serverID)
8080
}
8181

8282
// List returns all registered server IDs
@@ -103,6 +103,33 @@ func (r *Registry) GetGuardInfo() map[string]string {
103103
return info
104104
}
105105

106+
// Close closes all registered guards that implement Close(context.Context) error.
107+
// It should be called during server shutdown to release WASM runtime resources.
108+
func (r *Registry) Close(ctx context.Context) {
109+
type closableGuard struct {
110+
id string
111+
c interface{ Close(context.Context) error }
112+
}
113+
114+
r.mu.RLock()
115+
closers := make([]closableGuard, 0, len(r.guards))
116+
for id, g := range r.guards {
117+
if c, ok := g.(interface{ Close(context.Context) error }); ok {
118+
closers = append(closers, closableGuard{id: id, c: c})
119+
}
120+
}
121+
r.mu.RUnlock()
122+
123+
for _, guard := range closers {
124+
if err := guard.c.Close(ctx); err != nil {
125+
logger.LogWarn("guard", "Failed to close guard for server %s: %v", guard.id, err)
126+
}
127+
}
128+
if len(closers) > 0 {
129+
logger.LogInfo("guard", "Closed %d guard(s)", len(closers))
130+
}
131+
}
132+
106133
// GuardFactory is a function that creates a guard instance
107134
type GuardFactory func() (Guard, error)
108135

@@ -116,7 +143,7 @@ func RegisterGuardType(name string, factory GuardFactory) {
116143
registeredGuardsMu.Lock()
117144
defer registeredGuardsMu.Unlock()
118145
registeredGuards[name] = factory
119-
log.Printf("[Guard] Registered guard type: %s", name)
146+
logger.LogInfo("guard", "Registered guard type: %s", name)
120147
}
121148

122149
// CreateGuard creates a guard instance by name using registered factories

internal/guard/wasm.go

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"github.com/tetratelabs/wazero"
1616
"github.com/tetratelabs/wazero/api"
1717
"github.com/tetratelabs/wazero/imports/wasi_snapshot_preview1"
18+
"github.com/tetratelabs/wazero/sys"
1819
)
1920

2021
var logWasm = logger.New("guard:wasm")
@@ -830,10 +831,22 @@ func parsePathLabeledResponse(responseJSON []byte, originalData interface{}) (di
830831
return pld.ToCollectionLabeledData(), nil
831832
}
832833

833-
// isWasmTrap reports whether err is a WASM execution trap such as the
834-
// "wasm error: unreachable" produced when a Rust-compiled guard panics.
834+
// isWasmTrap reports whether err represents a WASM execution trap that should
835+
// permanently poison the guard. Normal process exits (exit code 0, e.g. TinyGo
836+
// init) are NOT considered traps. A non-zero exit code is treated as a trap.
837+
// As a fallback for wazero execution faults (e.g. Rust panic → unreachable),
838+
// the function also matches on wazero's "wasm error:" message prefix.
835839
func isWasmTrap(err error) bool {
836-
return err != nil && strings.Contains(err.Error(), "wasm error:")
840+
if err == nil {
841+
return false
842+
}
843+
// A normal WASI process exit (exit code 0) is not a trap — don't poison the guard.
844+
var exitErr *sys.ExitError
845+
if errors.As(err, &exitErr) {
846+
return exitErr.ExitCode() != 0
847+
}
848+
// Fallback for wazero execution traps (e.g. Rust panic → unreachable).
849+
return strings.Contains(err.Error(), "wasm error:")
837850
}
838851

839852
// callWasmFunction calls an exported function in the WASM module.

internal/guard/wasm_test.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616

1717
"github.com/github/gh-aw-mcpg/internal/difc"
1818
"github.com/tetratelabs/wazero"
19+
"github.com/tetratelabs/wazero/sys"
1920
)
2021

2122
func TestMain(m *testing.M) {
@@ -1152,6 +1153,26 @@ func TestIsWasmTrap(t *testing.T) {
11521153
err := errors.New("wasm error: out of bounds memory access")
11531154
assert.True(t, isWasmTrap(err))
11541155
})
1156+
1157+
t.Run("sys.ExitError with exit code 0 is not a trap", func(t *testing.T) {
1158+
err := sys.NewExitError(0)
1159+
assert.False(t, isWasmTrap(err))
1160+
})
1161+
1162+
t.Run("sys.ExitError with non-zero exit code is a trap", func(t *testing.T) {
1163+
err := sys.NewExitError(1)
1164+
assert.True(t, isWasmTrap(err))
1165+
})
1166+
1167+
t.Run("wrapped sys.ExitError with exit code 0 is not a trap", func(t *testing.T) {
1168+
err := fmt.Errorf("wrapper: %w", sys.NewExitError(0))
1169+
assert.False(t, isWasmTrap(err))
1170+
})
1171+
1172+
t.Run("wrapped sys.ExitError with non-zero exit code is a trap", func(t *testing.T) {
1173+
err := fmt.Errorf("wrapper: %w", sys.NewExitError(2))
1174+
assert.True(t, isWasmTrap(err))
1175+
})
11551176
}
11561177

11571178
func TestWasmGuardFailedState(t *testing.T) {

internal/server/unified.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -718,7 +718,7 @@ func (us *UnifiedServer) GetToolHandler(backendID string, toolName string) func(
718718

719719
// Close cleans up resources
720720
func (us *UnifiedServer) Close() error {
721-
us.launcher.Close()
721+
us.InitiateShutdown()
722722
return nil
723723
}
724724

@@ -753,6 +753,11 @@ func (us *UnifiedServer) InitiateShutdown() int {
753753
logger.LogInfo("shutdown", "Terminating %d backend servers", serversTerminated)
754754
us.launcher.Close()
755755

756+
// Release WASM runtime resources held by guards
757+
if us.guardRegistry != nil {
758+
us.guardRegistry.Close(context.Background())
759+
}
760+
756761
logger.LogInfo("shutdown", "Backend servers terminated successfully")
757762
})
758763
return serversTerminated

0 commit comments

Comments
 (0)