Skip to content

Commit 53c24f3

Browse files
authored
Rate-limit circuit breaker for GitHub MCP backend tool calls (#3799)
Four tools simultaneously hit GitHub's 15k req/reset limit with no gateway-level protection — rate-limited responses propagated directly to agents, which retried immediately and worsened the storm. ## Changes ### Phase 1: Rate-limit detection + proxy backoff - **Gateway mode** (`unified.go`): After each `executeBackendToolCall`, inspects the tool result for GitHub MCP rate-limit error patterns (`isError: true` + text matching "rate limit exceeded", "secondary rate limit", "too many requests", etc.) using `isRateLimitToolResult()`. Extracts reset time from `[rate reset in Ns]` in the error text. - **Proxy mode** (`handler.go`): `injectRetryAfterIfRateLimited()` checks `HTTP 429` or `X-Ratelimit-Remaining: 0` after each upstream forward, injects a `Retry-After` header derived from `X-Ratelimit-Reset`, and logs at ERROR level. ### Phase 2: Per-backend circuit breaker (`circuit_breaker.go`) Classic three-state machine per backend server ID: ``` CLOSED ──(N consecutive rate-limit errors)──▶ OPEN ▲ │ │ (probe succeeds) (cooldown/reset elapsed) │ ▼ └────────────────────────────────── HALF-OPEN ``` - **OPEN** rejects requests immediately with `ErrCircuitOpen` (includes reset timestamp). - **HALF-OPEN** allows one probe; re-opens on another rate-limit, closes on success. - Transport errors (connection failures) leave the counter unchanged — only actual rate-limit tool results affect it. ### Configuration Two new optional per-server fields in TOML/JSON: ```toml [servers.github] rate_limit_threshold = 3 # consecutive 429s before opening (default: 3) rate_limit_cooldown = 60 # seconds OPEN before probing (default: 60) ``` > [!WARNING] > > <details> > <summary>Firewall rules blocked me from connecting to one or more addresses (expand for details)</summary> > > #### I tried to connect to the following addresses, but was blocked by firewall rules: > > - `example.com` > - Triggering command: `/tmp/go-build151504929/b510/launcher.test /tmp/go-build151504929/b510/launcher.test -test.testlogfile=/tmp/go-build151504929/b510/testlog.txt -test.paniconexit0 -test.timeout=10m0s -test.v=true 64/src/net -trimpath x_amd64/link -p 5607675/b085/ -lang=go1.25 x_amd64/link -I /opt/hostedtoolc-errorsas 5607675/b151/ x_amd64/vet --gdwarf-5 rk/XFoNDgDe6LSlTdocker-cli-plugin-metadata` (dns block) > - Triggering command: `/tmp/go-build1093949210/b510/launcher.test /tmp/go-build1093949210/b510/launcher.test -test.testlogfile=/tmp/go-build1093949210/b510/testlog.txt -test.paniconexit0 -test.timeout=10m0s -test.v=true /version/version.go /version/version_test.go x_amd64/vet 64/src/runtime/cbash ernal/oidc ache/go/1.25.8/x--noprofile x_amd64/vet -ato�� UQYbtZDox -buildtags x_amd64/vet -errorsas -ifaceassert -nilfunc x_amd64/vet` (dns block) > - Triggering command: `/tmp/go-build1807685167/b514/launcher.test /tmp/go-build1807685167/b514/launcher.test -test.testlogfile=/tmp/go-build1807685167/b514/testlog.txt -test.paniconexit0 -test.timeout=10m0s` (dns block) > - `invalid-host-that-does-not-exist-12345.com` > - Triggering command: `/tmp/go-build151504929/b492/config.test /tmp/go-build151504929/b492/config.test -test.testlogfile=/tmp/go-build151504929/b492/testlog.txt -test.paniconexit0 -test.timeout=10m0s -test.v=true ache/go/1.25.8/x-errorsas andler.go x_amd64/compile` (dns block) > - Triggering command: `/tmp/go-build1093949210/b492/config.test /tmp/go-build1093949210/b492/config.test -test.testlogfile=/tmp/go-build1093949210/b492/testlog.txt -test.paniconexit0 -test.timeout=10m0s -test.v=true t/config.go t/driver.go x_amd64/compile -p runtime/pprof -lang=go1.25 x_amd64/compile -uns�� -unreachable=false /tmp/go-build151504929/b092/vet.cfg x_amd64/vet -c=4 -nolocalimports -importcfg x_amd64/vet` (dns block) > - Triggering command: `/tmp/go-build1807685167/b496/config.test /tmp/go-build1807685167/b496/config.test -test.testlogfile=/tmp/go-build1807685167/b496/testlog.txt -test.paniconexit0 -test.timeout=10m0s go1.25.8 -c=4 -nolocalimports -importcfg /tmp/go-build1747915822/b123/importcfg -embedcfg /tmp/go-build1747915822/b123/embedcfg -pack` (dns block) > - `nonexistent.local` > - Triggering command: `/tmp/go-build151504929/b510/launcher.test /tmp/go-build151504929/b510/launcher.test -test.testlogfile=/tmp/go-build151504929/b510/testlog.txt -test.paniconexit0 -test.timeout=10m0s -test.v=true 64/src/net -trimpath x_amd64/link -p 5607675/b085/ -lang=go1.25 x_amd64/link -I /opt/hostedtoolc-errorsas 5607675/b151/ x_amd64/vet --gdwarf-5 rk/XFoNDgDe6LSlTdocker-cli-plugin-metadata` (dns block) > - Triggering command: `/tmp/go-build1093949210/b510/launcher.test /tmp/go-build1093949210/b510/launcher.test -test.testlogfile=/tmp/go-build1093949210/b510/testlog.txt -test.paniconexit0 -test.timeout=10m0s -test.v=true /version/version.go /version/version_test.go x_amd64/vet 64/src/runtime/cbash ernal/oidc ache/go/1.25.8/x--noprofile x_amd64/vet -ato�� UQYbtZDox -buildtags x_amd64/vet -errorsas -ifaceassert -nilfunc x_amd64/vet` (dns block) > - Triggering command: `/tmp/go-build1807685167/b514/launcher.test /tmp/go-build1807685167/b514/launcher.test -test.testlogfile=/tmp/go-build1807685167/b514/testlog.txt -test.paniconexit0 -test.timeout=10m0s` (dns block) > - `slow.example.com` > - Triggering command: `/tmp/go-build151504929/b510/launcher.test /tmp/go-build151504929/b510/launcher.test -test.testlogfile=/tmp/go-build151504929/b510/testlog.txt -test.paniconexit0 -test.timeout=10m0s -test.v=true 64/src/net -trimpath x_amd64/link -p 5607675/b085/ -lang=go1.25 x_amd64/link -I /opt/hostedtoolc-errorsas 5607675/b151/ x_amd64/vet --gdwarf-5 rk/XFoNDgDe6LSlTdocker-cli-plugin-metadata` (dns block) > - Triggering command: `/tmp/go-build1093949210/b510/launcher.test /tmp/go-build1093949210/b510/launcher.test -test.testlogfile=/tmp/go-build1093949210/b510/testlog.txt -test.paniconexit0 -test.timeout=10m0s -test.v=true /version/version.go /version/version_test.go x_amd64/vet 64/src/runtime/cbash ernal/oidc ache/go/1.25.8/x--noprofile x_amd64/vet -ato�� UQYbtZDox -buildtags x_amd64/vet -errorsas -ifaceassert -nilfunc x_amd64/vet` (dns block) > - Triggering command: `/tmp/go-build1807685167/b514/launcher.test /tmp/go-build1807685167/b514/launcher.test -test.testlogfile=/tmp/go-build1807685167/b514/testlog.txt -test.paniconexit0 -test.timeout=10m0s` (dns block) > - `this-host-does-not-exist-12345.com` > - Triggering command: `/tmp/go-build151504929/b519/mcp.test /tmp/go-build151504929/b519/mcp.test -test.testlogfile=/tmp/go-build151504929/b519/testlog.txt -test.paniconexit0 -test.timeout=10m0s -test.v=true 1.80.0/metadata/metadata.go 5607675/b151/ x_amd64/vet --gdwarf-5 --64 -o x_amd64/vet 5607�� 1.10.2/active_help.go 1.10.2/args.go x_amd64/vet --gdwarf-5 --64 -o x_amd64/vet` (dns block) > - Triggering command: `/tmp/go-build1093949210/b519/mcp.test /tmp/go-build1093949210/b519/mcp.test -test.testlogfile=/tmp/go-build1093949210/b519/testlog.txt -test.paniconexit0 -test.timeout=10m0s -test.v=true submodules | hea-p -buildtags x_amd64/vet -errorsas -ifaceassert -nilfunc x_amd64/vet n-me�� g_.a -buildtags 64/pkg/tool/linu-nolocalimports -errorsas ernal/config -nilfunc 64/pkg/tool/linu/tmp/go-build1093949210/b226/_testmain.go` (dns block) > - Triggering command: `/tmp/go-build1807685167/b523/mcp.test /tmp/go-build1807685167/b523/mcp.test -test.testlogfile=/tmp/go-build1807685167/b523/testlog.txt -test.paniconexit0 -test.timeout=10m0s 9560�� g_.a /sys/fs/cgroup ache/go/1.25.8/x64/pkg/tool/linux_amd64/vet JwJi0zHsZ ernal/config x_amd64/vet ache/go/1.25.8/x-buildtags 9560�� ZjxfQrmK0 x_amd64/vet x_amd64/vet rg/x/net@v0.52.0bash cfg 64/pkg/tool/linu--version x_amd64/vet` (dns block) > > If you need me to access, download, or install something from one of these locations, you can either: > > - Configure [Actions setup steps](https://gh.io/copilot/actions-setup-steps) to set up my environment, which run before the firewall is enabled > - Add the appropriate URLs or hosts to the custom allowlist in this repository's [Copilot coding agent settings](https://github.com/github/gh-aw-mcpg/settings/copilot/coding_agent) (admins only) > > </details>
2 parents 698475e + fea1da4 commit 53c24f3

7 files changed

Lines changed: 1081 additions & 1 deletion

File tree

internal/config/config_core.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,18 @@ type ServerConfig struct {
215215
// fallback uses the HTTP client's request timeout instead. Increase this for backends that are
216216
// slow to initialize. Only applies to HTTP server types. Default: 30 seconds.
217217
ConnectTimeout int `toml:"connect_timeout" json:"connect_timeout,omitempty"`
218+
219+
// RateLimitThreshold is the number of consecutive rate-limit errors from this backend
220+
// that will trip the circuit breaker (transition CLOSED → OPEN). When OPEN, requests
221+
// are immediately rejected until the cooldown period elapses. Default: 3.
222+
// Supported in file-based config (TOML/JSON); stdin JSON config does not currently accept this field.
223+
RateLimitThreshold int `toml:"rate_limit_threshold" json:"rate_limit_threshold,omitempty"`
224+
225+
// RateLimitCooldown is the number of seconds the circuit breaker stays OPEN before
226+
// allowing a single probe request (transition OPEN → HALF-OPEN). If the probe
227+
// succeeds the circuit closes; if rate-limited again it re-opens. Default: 60.
228+
// Supported in file-based config (TOML/JSON); stdin JSON config does not currently accept this field.
229+
RateLimitCooldown int `toml:"rate_limit_cooldown" json:"rate_limit_cooldown,omitempty"`
218230
}
219231

220232
// GuardConfig represents a guard configuration for DIFC enforcement.
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
package config
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/assert"
7+
"github.com/stretchr/testify/require"
8+
)
9+
10+
func TestServerConfig_RateLimitFields(t *testing.T) {
11+
t.Parallel()
12+
toml := `
13+
[servers.github]
14+
command = "docker"
15+
args = ["run", "--rm", "-i", "ghcr.io/github/github-mcp-server:latest"]
16+
rate_limit_threshold = 5
17+
rate_limit_cooldown = 120
18+
`
19+
path := writeTempTOML(t, toml)
20+
cfg, err := LoadFromFile(path)
21+
require.NoError(t, err)
22+
srv := cfg.Servers["github"]
23+
assert.Equal(t, 5, srv.RateLimitThreshold)
24+
assert.Equal(t, 120, srv.RateLimitCooldown)
25+
}
26+
27+
func TestServerConfig_RateLimitFieldsDefaultToZero(t *testing.T) {
28+
t.Parallel()
29+
toml := validDockerServerTOML
30+
path := writeTempTOML(t, toml)
31+
cfg, err := LoadFromFile(path)
32+
require.NoError(t, err)
33+
srv := cfg.Servers["github"]
34+
assert.Equal(t, 0, srv.RateLimitThreshold)
35+
assert.Equal(t, 0, srv.RateLimitCooldown)
36+
}

internal/proxy/handler.go

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ import (
77
"fmt"
88
"io"
99
"net/http"
10+
"strconv"
11+
"time"
1012

1113
"go.opentelemetry.io/otel/attribute"
1214
"go.opentelemetry.io/otel/codes"
@@ -350,8 +352,11 @@ func (h *proxyHandler) passthrough(w http.ResponseWriter, r *http.Request, path
350352
}
351353

352354
// writeResponse writes an upstream response to the client.
355+
// When the upstream signals rate-limiting (HTTP 429 or X-RateLimit-Remaining == 0),
356+
// it injects a Retry-After header and logs the event at ERROR level.
353357
func (h *proxyHandler) writeResponse(w http.ResponseWriter, resp *http.Response, body []byte) {
354358
copyResponseHeaders(w, resp)
359+
injectRetryAfterIfRateLimited(w, resp)
355360
w.WriteHeader(resp.StatusCode)
356361
w.Write(body)
357362
}
@@ -416,6 +421,66 @@ func copyResponseHeaders(w http.ResponseWriter, resp *http.Response) {
416421
}
417422
}
418423

424+
// injectRetryAfterIfRateLimited inspects the upstream response for rate-limit signals
425+
// (HTTP 429 or X-Ratelimit-Remaining == 0). When detected it:
426+
// 1. Injects a Retry-After header so the client knows when to retry.
427+
// 2. Logs the event at ERROR level so operators can monitor rate-limit incidents.
428+
func injectRetryAfterIfRateLimited(w http.ResponseWriter, resp *http.Response) {
429+
is429 := resp.StatusCode == http.StatusTooManyRequests
430+
// Use Go's canonical header key form (textproto.CanonicalMIMEHeaderKey produces
431+
// "X-Ratelimit-Remaining", matching GitHub's actual response headers).
432+
remaining := resp.Header.Get("X-Ratelimit-Remaining")
433+
resetHeader := resp.Header.Get("X-Ratelimit-Reset")
434+
435+
isRateLimited := is429 || remaining == "0"
436+
if !isRateLimited {
437+
return
438+
}
439+
440+
resetAt := parseRateLimitReset(resetHeader)
441+
retryAfter := computeRetryAfter(resetAt)
442+
443+
w.Header().Set("Retry-After", strconv.Itoa(retryAfter))
444+
445+
logger.LogError("client",
446+
"upstream rate limit hit: status=%d X-Ratelimit-Remaining=%s X-Ratelimit-Reset=%s retry-after=%ds",
447+
resp.StatusCode, remaining, resetHeader, retryAfter)
448+
}
449+
450+
// parseRateLimitReset parses the X-RateLimit-Reset Unix-timestamp header.
451+
// Returns zero time when absent or malformed.
452+
func parseRateLimitReset(value string) time.Time {
453+
if value == "" {
454+
return time.Time{}
455+
}
456+
unix, err := strconv.ParseInt(value, 10, 64)
457+
if err != nil {
458+
return time.Time{}
459+
}
460+
return time.Unix(unix, 0)
461+
}
462+
463+
// computeRetryAfter returns the number of seconds to wait before retrying.
464+
// When resetAt is in the future the delay is clamped to [1, 3600] seconds.
465+
// When resetAt is zero or in the past a default of 60 seconds is returned.
466+
func computeRetryAfter(resetAt time.Time) int {
467+
const (
468+
defaultDelay = 60
469+
maxDelay = 3600
470+
)
471+
if resetAt.IsZero() {
472+
return defaultDelay
473+
}
474+
secs := int(time.Until(resetAt).Seconds()) + 1 // add 1s buffer
475+
if secs < 1 {
476+
return defaultDelay
477+
}
478+
if secs > maxDelay {
479+
return maxDelay
480+
}
481+
return secs
482+
}
483+
419484
// rewrapSearchResponse re-wraps filtered items into the original search response
420485
// envelope. GitHub search endpoints return {"total_count": N, "items": [...]};
421486
// ToResult() returns a bare array, so we rebuild the wrapper.

internal/proxy/rate_limit_test.go

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
package proxy
2+
3+
import (
4+
"net/http"
5+
"net/http/httptest"
6+
"strconv"
7+
"testing"
8+
"time"
9+
10+
"github.com/stretchr/testify/assert"
11+
)
12+
13+
// TestInjectRetryAfterIfRateLimited verifies Retry-After injection and logging for
14+
// rate-limited upstream responses.
15+
func TestInjectRetryAfterIfRateLimited(t *testing.T) {
16+
t.Parallel()
17+
18+
t.Run("HTTP 429 injects Retry-After", func(t *testing.T) {
19+
t.Parallel()
20+
w := httptest.NewRecorder()
21+
future := time.Now().Add(30 * time.Second)
22+
resp := &http.Response{
23+
StatusCode: http.StatusTooManyRequests,
24+
Header: http.Header{
25+
"X-Ratelimit-Reset": []string{strconv.FormatInt(future.Unix(), 10)},
26+
},
27+
}
28+
injectRetryAfterIfRateLimited(w, resp)
29+
retryAfter := w.Header().Get("Retry-After")
30+
assert.NotEmpty(t, retryAfter, "Retry-After should be set on 429")
31+
secs, err := strconv.Atoi(retryAfter)
32+
assert.NoError(t, err)
33+
assert.Greater(t, secs, 0, "Retry-After should be positive")
34+
})
35+
36+
t.Run("X-Ratelimit-Remaining 0 injects Retry-After", func(t *testing.T) {
37+
t.Parallel()
38+
w := httptest.NewRecorder()
39+
future := time.Now().Add(60 * time.Second)
40+
resp := &http.Response{
41+
StatusCode: http.StatusOK,
42+
Header: http.Header{
43+
"X-Ratelimit-Remaining": []string{"0"},
44+
"X-Ratelimit-Reset": []string{strconv.FormatInt(future.Unix(), 10)},
45+
},
46+
}
47+
injectRetryAfterIfRateLimited(w, resp)
48+
assert.NotEmpty(t, w.Header().Get("Retry-After"), "Retry-After should be set when remaining=0")
49+
})
50+
51+
t.Run("non-zero remaining does not inject Retry-After", func(t *testing.T) {
52+
t.Parallel()
53+
w := httptest.NewRecorder()
54+
resp := &http.Response{
55+
StatusCode: http.StatusOK,
56+
Header: http.Header{
57+
"X-Ratelimit-Remaining": []string{"100"},
58+
},
59+
}
60+
injectRetryAfterIfRateLimited(w, resp)
61+
assert.Empty(t, w.Header().Get("Retry-After"))
62+
})
63+
64+
t.Run("200 with no rate-limit headers does not inject Retry-After", func(t *testing.T) {
65+
t.Parallel()
66+
w := httptest.NewRecorder()
67+
resp := &http.Response{
68+
StatusCode: http.StatusOK,
69+
Header: make(http.Header),
70+
}
71+
injectRetryAfterIfRateLimited(w, resp)
72+
assert.Empty(t, w.Header().Get("Retry-After"))
73+
})
74+
75+
t.Run("429 without reset header uses default delay", func(t *testing.T) {
76+
t.Parallel()
77+
w := httptest.NewRecorder()
78+
resp := &http.Response{
79+
StatusCode: http.StatusTooManyRequests,
80+
Header: make(http.Header),
81+
}
82+
injectRetryAfterIfRateLimited(w, resp)
83+
retryAfter := w.Header().Get("Retry-After")
84+
assert.Equal(t, "60", retryAfter, "default delay should be 60 seconds")
85+
})
86+
}
87+
88+
// TestParseRateLimitReset verifies the Unix-timestamp header parser.
89+
func TestParseRateLimitReset(t *testing.T) {
90+
t.Parallel()
91+
92+
t.Run("empty string returns zero", func(t *testing.T) {
93+
t.Parallel()
94+
assert.True(t, parseRateLimitReset("").IsZero())
95+
})
96+
97+
t.Run("invalid string returns zero", func(t *testing.T) {
98+
t.Parallel()
99+
assert.True(t, parseRateLimitReset("not-a-number").IsZero())
100+
})
101+
102+
t.Run("valid unix timestamp parses correctly", func(t *testing.T) {
103+
t.Parallel()
104+
ts := time.Now().Add(60 * time.Second)
105+
got := parseRateLimitReset(strconv.FormatInt(ts.Unix(), 10))
106+
assert.False(t, got.IsZero())
107+
assert.Equal(t, ts.Unix(), got.Unix())
108+
})
109+
}
110+
111+
// TestComputeRetryAfter verifies the retry-after calculation.
112+
func TestComputeRetryAfter(t *testing.T) {
113+
t.Parallel()
114+
115+
t.Run("zero time returns default", func(t *testing.T) {
116+
t.Parallel()
117+
assert.Equal(t, 60, computeRetryAfter(time.Time{}))
118+
})
119+
120+
t.Run("past time returns default", func(t *testing.T) {
121+
t.Parallel()
122+
assert.Equal(t, 60, computeRetryAfter(time.Now().Add(-time.Minute)))
123+
})
124+
125+
t.Run("future time returns seconds until reset", func(t *testing.T) {
126+
t.Parallel()
127+
future := time.Now().Add(30 * time.Second)
128+
secs := computeRetryAfter(future)
129+
// Allow ±2s for timing jitter.
130+
assert.GreaterOrEqual(t, secs, 29)
131+
assert.LessOrEqual(t, secs, 32)
132+
})
133+
134+
t.Run("very far future is clamped to max", func(t *testing.T) {
135+
t.Parallel()
136+
farFuture := time.Now().Add(24 * time.Hour)
137+
assert.Equal(t, 3600, computeRetryAfter(farFuture))
138+
})
139+
}

0 commit comments

Comments
 (0)