Skip to content

Commit 82fd9c8

Browse files
authored
fix: increase HTTP backend connect timeout from 5s to 30s and make configurable (#3782)
## Problem When an HTTP MCP backend is slow to initialize (e.g. cold-starting LiteLLM proxy taking 21s), the gateway's hardcoded **5-second per-transport connect timeout** causes all three SDK transport attempts to fail. The tool registration silently drops the backend's tools and the agent starts without them. From the issue logs: ``` [2026-04-13T16:37:36Z] [DEBUG] GetOrLaunch called for server: opslevel [2026-04-13T16:37:57Z] [INFO] Successfully registered tools from opslevel (took 21.213743169s) ``` The 21s is consumed by the transport fallback chain: streamable HTTP (5s timeout) → SSE (5s timeout) → plain JSON (~11s). If the backend only supports streamable HTTP, all three transports fail and tools are silently dropped. ## Fix ### 1. Configurable per-transport connect timeout (default 30s → was 5s) New `connect_timeout` field on server config: **TOML:** ```toml [servers.opslevel] type = "http" url = "http://opslevel-proxy:8080/mcp" connect_timeout = 60 # seconds per transport attempt ``` **JSON stdin:** ```json { "mcpServers": { "opslevel": { "type": "http", "url": "http://opslevel-proxy:8080/mcp", "connect_timeout": 60 } } } ``` ### 2. Better error visibility - Failed backend tool registration promoted from **WARN → ERROR** - Summary error log lists which backends failed and impact on agents: `Tool registration incomplete: 1 of 3 backends failed: [opslevel] — agents will not see tools from these servers` ### 3. Reconnect uses stored timeout The connect timeout is stored on the Connection struct so `reconnectSDKTransport` reuses the same value instead of a hardcoded 10s. ## Changes | File | Change | |------|--------| | `config_core.go` | Add `ConnectTimeout` field, `HTTPConnectTimeout()` helper, `DefaultConnectTimeout` constant | | `config_stdin.go` | Add `ConnectTimeout` to `StdinServerConfig`, map it during conversion | | `connection.go` | Add `connectTimeout` to `Connection`, apply default of 30s when ≤ 0 | | `http_transport.go` | Pass configurable timeout through `trySDKTransport` and `newHTTPConnection` | | `launcher.go` | Pass `serverCfg.HTTPConnectTimeout()` to `NewHTTPConnection` | | `tool_registry.go` | Promote failed backends to ERROR, add summary log | | Tests | Updated all `NewHTTPConnection` call sites with new parameter | Fixes #3718
2 parents 6a5cf86 + 98c92c7 commit 82fd9c8

14 files changed

Lines changed: 201 additions & 81 deletions

internal/config/config_core.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ const (
4141
DefaultStartupTimeout = 30 // seconds (per spec §4.1.3)
4242
DefaultToolTimeout = 60 // seconds (per spec §4.1.3)
4343
DefaultKeepaliveInterval = 1500 // seconds (25 minutes) — keeps HTTP backend sessions alive
44+
DefaultConnectTimeout = 30 // seconds — per-transport timeout for HTTP backend connect
4445
)
4546

4647
// Config represents the internal gateway configuration.
@@ -150,6 +151,15 @@ func (c *Config) GetAPIKey() string {
150151
return c.Gateway.APIKey
151152
}
152153

154+
// HTTPConnectTimeout returns the per-transport connect timeout as a Duration.
155+
// Returns DefaultConnectTimeout when the field is zero or negative.
156+
func (s *ServerConfig) HTTPConnectTimeout() time.Duration {
157+
if s == nil || s.ConnectTimeout <= 0 {
158+
return time.Duration(DefaultConnectTimeout) * time.Second
159+
}
160+
return time.Duration(s.ConnectTimeout) * time.Second
161+
}
162+
153163
// AuthConfig configures upstream authentication for HTTP MCP servers.
154164
type AuthConfig struct {
155165
// Type is the authentication type. Currently only "github-oidc" is supported.
@@ -198,6 +208,13 @@ type ServerConfig struct {
198208

199209
// Guard is the name of the guard to use for this server (requires DIFC)
200210
Guard string `toml:"guard" json:"guard,omitempty"`
211+
212+
// ConnectTimeout is the timeout (in seconds) used for SDK-managed HTTP transport connect attempts.
213+
// The gateway tries multiple transports in sequence (streamable HTTP → SSE → plain JSON-RPC).
214+
// This timeout applies to the streamable HTTP and SSE connection attempts; the plain JSON-RPC
215+
// fallback uses the HTTP client's request timeout instead. Increase this for backends that are
216+
// slow to initialize. Only applies to HTTP server types. Default: 30 seconds.
217+
ConnectTimeout int `toml:"connect_timeout" json:"connect_timeout,omitempty"`
201218
}
202219

203220
// GuardConfig represents a guard configuration for DIFC enforcement.

internal/config/config_core_test.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,24 @@ func TestLoadFromFile_HTTPServerValid(t *testing.T) {
114114
assert.Equal(t, "http://localhost:9090/mcp", server.URL)
115115
}
116116

117+
// TestLoadFromFile_HTTPServerWithConnectTimeout verifies that connect_timeout
118+
// is parsed from TOML and returned correctly via HTTPConnectTimeout().
119+
func TestLoadFromFile_HTTPServerWithConnectTimeout(t *testing.T) {
120+
path := writeTempTOML(t, `
121+
[servers.slowservice]
122+
type = "http"
123+
url = "http://localhost:9090/mcp"
124+
connect_timeout = 60
125+
`)
126+
cfg, err := LoadFromFile(path)
127+
require.NoError(t, err)
128+
require.NotNil(t, cfg)
129+
server, ok := cfg.Servers["slowservice"]
130+
require.True(t, ok)
131+
assert.Equal(t, 60, server.ConnectTimeout)
132+
assert.Equal(t, 60*time.Second, server.HTTPConnectTimeout())
133+
}
134+
117135
// TestLoadFromFile_AppliesGatewayDefaults verifies that when no [gateway] section
118136
// is present, default values are applied for port, startup timeout, and tool timeout.
119137
func TestLoadFromFile_AppliesGatewayDefaults(t *testing.T) {
@@ -471,6 +489,48 @@ func TestHTTPKeepaliveInterval(t *testing.T) {
471489
}
472490
}
473491

492+
// TestHTTPConnectTimeout tests all branches of the ServerConfig.HTTPConnectTimeout method.
493+
func TestHTTPConnectTimeout(t *testing.T) {
494+
tests := []struct {
495+
name string
496+
server *ServerConfig
497+
expected time.Duration
498+
}{
499+
{
500+
name: "nil receiver returns default",
501+
server: nil,
502+
expected: time.Duration(DefaultConnectTimeout) * time.Second,
503+
},
504+
{
505+
name: "zero value returns default",
506+
server: &ServerConfig{},
507+
expected: time.Duration(DefaultConnectTimeout) * time.Second,
508+
},
509+
{
510+
name: "negative value returns default",
511+
server: &ServerConfig{ConnectTimeout: -5},
512+
expected: time.Duration(DefaultConnectTimeout) * time.Second,
513+
},
514+
{
515+
name: "positive value returns correct duration",
516+
server: &ServerConfig{ConnectTimeout: 60},
517+
expected: 60 * time.Second,
518+
},
519+
{
520+
name: "default value returns 30 seconds",
521+
server: &ServerConfig{ConnectTimeout: DefaultConnectTimeout},
522+
expected: time.Duration(DefaultConnectTimeout) * time.Second,
523+
},
524+
}
525+
526+
for _, tt := range tests {
527+
t.Run(tt.name, func(t *testing.T) {
528+
got := tt.server.HTTPConnectTimeout()
529+
assert.Equal(t, tt.expected, got)
530+
})
531+
}
532+
}
533+
474534
// TestIsDynamicTOMLPath verifies the branching logic of isDynamicTOMLPath,
475535
// which guards the unknown-field check by exempting map-valued sections
476536
// whose keys are not known at decode time.

internal/config/config_stdin.go

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,10 @@ type StdinServerConfig struct {
123123
// Auth configures upstream authentication for HTTP MCP servers.
124124
Auth *AuthConfig `json:"auth,omitempty"`
125125

126+
// ConnectTimeout is the per-transport timeout (in seconds) for connecting to HTTP backends.
127+
// Only applies to HTTP server types. Default: 30 seconds.
128+
ConnectTimeout *int `json:"connect_timeout,omitempty"`
129+
126130
// AdditionalProperties stores any extra fields for custom server types
127131
// This allows custom schemas to define their own fields beyond the standard ones
128132
AdditionalProperties map[string]interface{} `json:"-"`
@@ -151,20 +155,21 @@ func (s *StdinServerConfig) UnmarshalJSON(data []byte) error {
151155

152156
// Known fields in the struct
153157
knownFields := map[string]bool{
154-
"type": true,
155-
"container": true,
156-
"entrypoint": true,
157-
"entrypointArgs": true,
158-
"args": true,
159-
"mounts": true,
160-
"env": true,
161-
"url": true,
162-
"headers": true,
163-
"tools": true,
164-
"registry": true,
165-
"guard-policies": true,
166-
"guard": true,
167-
"auth": true,
158+
"type": true,
159+
"container": true,
160+
"entrypoint": true,
161+
"entrypointArgs": true,
162+
"args": true,
163+
"mounts": true,
164+
"env": true,
165+
"url": true,
166+
"headers": true,
167+
"tools": true,
168+
"registry": true,
169+
"guard-policies": true,
170+
"guard": true,
171+
"auth": true,
172+
"connect_timeout": true,
168173
}
169174

170175
// Store additional properties (fields not in the struct)
@@ -413,6 +418,9 @@ func convertStdinServerConfig(name string, server *StdinServerConfig, customSche
413418
GuardPolicies: server.GuardPolicies,
414419
Guard: server.Guard,
415420
}
421+
if server.ConnectTimeout != nil {
422+
serverCfg.ConnectTimeout = *server.ConnectTimeout
423+
}
416424
if server.Auth != nil {
417425
serverCfg.Auth = &AuthConfig{
418426
Type: server.Auth.Type,

internal/launcher/launcher.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ func GetOrLaunch(l *Launcher, serverID string) (*mcp.Connection, error) {
135135
}
136136

137137
// Create an HTTP connection
138-
conn, err := mcp.NewHTTPConnection(l.ctx, serverID, serverCfg.URL, serverCfg.Headers, oidcProvider, oidcAudience, l.config.Gateway.HTTPKeepaliveInterval())
138+
conn, err := mcp.NewHTTPConnection(l.ctx, serverID, serverCfg.URL, serverCfg.Headers, oidcProvider, oidcAudience, l.config.Gateway.HTTPKeepaliveInterval(), serverCfg.HTTPConnectTimeout())
139139
if err != nil {
140140
log.Printf("FAILED to create HTTP connection for server %q: %v", serverID, err)
141141
logger.LogErrorWithServer(serverID, "backend", "Failed to create HTTP connection: %s, error=%v", serverID, err)

internal/mcp/connection.go

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"encoding/json"
88
"fmt"
99
"io"
10+
"net"
1011
"net/http"
1112
"os/exec"
1213
"strings"
@@ -70,6 +71,7 @@ type Connection struct {
7071
httpSessionID string // Session ID returned by the HTTP backend
7172
httpTransportType HTTPTransportType // Type of HTTP transport in use
7273
keepAliveInterval time.Duration // Keepalive interval for SDK transports (0 = disabled)
74+
connectTimeout time.Duration // Per-transport connect timeout for SDK transports
7375
// sessionMu protects the mutable session fields: httpSessionID, session, and client.
7476
// Always use getHTTPSessionID() or getSDKSession() to read these fields; the
7577
// reconnect functions (reconnectPlainJSON, reconnectSDKTransport) hold the full Lock.
@@ -194,17 +196,29 @@ func NewConnection(ctx context.Context, serverID, command string, args []string,
194196
// Authorization header from the headers map.
195197
//
196198
// This ensures compatibility with all types of HTTP MCP servers.
197-
func NewHTTPConnection(ctx context.Context, serverID, url string, headers map[string]string, oidcProvider *oidc.Provider, oidcAudience string, keepAlive time.Duration) (*Connection, error) {
198-
logger.LogInfo("backend", "Creating HTTP MCP connection with transport fallback, url=%s", url)
199+
func NewHTTPConnection(ctx context.Context, serverID, url string, headers map[string]string, oidcProvider *oidc.Provider, oidcAudience string, keepAlive time.Duration, connectTimeout time.Duration) (*Connection, error) {
200+
// Apply default connect timeout when not specified
201+
if connectTimeout <= 0 {
202+
connectTimeout = 30 * time.Second
203+
}
204+
logger.LogInfo("backend", "Creating HTTP MCP connection with transport fallback, url=%s, connectTimeout=%v", url, connectTimeout)
199205
ctx, cancel := context.WithCancel(ctx)
200206

201-
// Create an HTTP client with appropriate timeouts
207+
// Create an HTTP client with appropriate timeouts.
208+
// Keep the existing overall request timeout, but also apply connectTimeout to
209+
// the underlying HTTP transport so plain JSON-RPC fallback attempts honor the
210+
// configured per-attempt connection timeout instead of waiting for the full
211+
// client timeout.
202212
httpClient := &http.Client{
203213
Timeout: 120 * time.Second, // Overall request timeout
204214
Transport: &http.Transport{
205-
MaxIdleConns: 10,
206-
IdleConnTimeout: 90 * time.Second,
207-
TLSHandshakeTimeout: 10 * time.Second,
215+
DialContext: (&net.Dialer{
216+
Timeout: connectTimeout,
217+
}).DialContext,
218+
MaxIdleConns: 10,
219+
IdleConnTimeout: 90 * time.Second,
220+
TLSHandshakeTimeout: 10 * time.Second,
221+
ResponseHeaderTimeout: connectTimeout,
208222
},
209223
}
210224

@@ -232,7 +246,7 @@ func NewHTTPConnection(ctx context.Context, serverID, url string, headers map[st
232246

233247
// Try 1: Streamable HTTP (2025-03-26 spec)
234248
logConn.Printf("Attempting streamable HTTP transport for %s", url)
235-
conn, err := tryStreamableHTTPTransport(ctx, cancel, serverID, url, headers, headerClient, keepAlive)
249+
conn, err := tryStreamableHTTPTransport(ctx, cancel, serverID, url, headers, headerClient, keepAlive, connectTimeout)
236250
if err == nil {
237251
logger.LogInfo("backend", "Successfully connected using streamable HTTP transport, url=%s", url)
238252
return conn, nil
@@ -241,7 +255,7 @@ func NewHTTPConnection(ctx context.Context, serverID, url string, headers map[st
241255

242256
// Try 2: SSE (2024-11-05 spec)
243257
logConn.Printf("Attempting SSE transport for %s", url)
244-
conn, err = trySSETransport(ctx, cancel, serverID, url, headers, headerClient, keepAlive)
258+
conn, err = trySSETransport(ctx, cancel, serverID, url, headers, headerClient, keepAlive, connectTimeout)
245259
if err == nil {
246260
logger.LogWarn("backend", "⚠️ MCP over SSE (2024-11-05 spec) is DEPRECATED for url=%s. Please migrate to streamable HTTP transport (2025-03-26 spec).", url)
247261
logger.LogInfo("backend", "Configured HTTP MCP server with SSE transport: %s", url)
@@ -366,7 +380,11 @@ func (c *Connection) reconnectSDKTransport() error {
366380
return fmt.Errorf("cannot reconnect: unsupported transport type %s", c.httpTransportType)
367381
}
368382

369-
connectCtx, cancel := context.WithTimeout(c.ctx, 10*time.Second)
383+
timeout := c.connectTimeout
384+
if timeout == 0 {
385+
timeout = 30 * time.Second
386+
}
387+
connectCtx, cancel := context.WithTimeout(c.ctx, timeout)
370388
defer cancel()
371389

372390
session, err := client.Connect(connectCtx, transport, nil)

internal/mcp/connection_arguments_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ func TestCallTool_ArgumentsPassed(t *testing.T) {
159159
// Create connection
160160
conn, err := NewHTTPConnection(context.Background(), "test-server", testServer.URL, map[string]string{
161161
"Authorization": "test-token",
162-
}, nil, "", 0)
162+
}, nil, "", 0, 0)
163163
require.NoError(t, err, "Failed to create HTTP connection")
164164
defer conn.Close()
165165

@@ -224,7 +224,7 @@ func TestCallTool_MissingArguments(t *testing.T) {
224224

225225
conn, err := NewHTTPConnection(context.Background(), "test-server", testServer.URL, map[string]string{
226226
"Authorization": "test-token",
227-
}, nil, "", 0)
227+
}, nil, "", 0, 0)
228228
require.NoError(t, err)
229229
defer conn.Close()
230230

internal/mcp/connection_stderr_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ func TestConnection_SendRequest(t *testing.T) {
3737

3838
conn, err := NewHTTPConnection(context.Background(), "test-server", srv.URL, map[string]string{
3939
"Authorization": "test-token",
40-
}, nil, "", 0)
40+
}, nil, "", 0, 0)
4141
require.NoError(t, err)
4242
defer conn.Close()
4343

internal/mcp/connection_test.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ func TestHTTPRequest_SessionIDHeader(t *testing.T) {
4343
// Create an HTTP connection
4444
conn, err := NewHTTPConnection(context.Background(), "test-server", testServer.URL, map[string]string{
4545
"Authorization": "test-auth-token",
46-
}, nil, "", 0)
46+
}, nil, "", 0, 0)
4747
require.NoError(t, err, "Failed to create HTTP connection")
4848

4949
// Create a context with session ID
@@ -80,7 +80,7 @@ func TestHTTPRequest_NoSessionID(t *testing.T) {
8080
// Create an HTTP connection
8181
conn, err := NewHTTPConnection(context.Background(), "test-server", testServer.URL, map[string]string{
8282
"Authorization": "test-auth-token",
83-
}, nil, "", 0)
83+
}, nil, "", 0, 0)
8484
require.NoError(t, err, "Failed to create HTTP connection")
8585

8686
// Send a request without session ID in context
@@ -118,7 +118,7 @@ func TestHTTPRequest_ConfiguredHeaders(t *testing.T) {
118118
authToken := "configured-auth-token"
119119
conn, err := NewHTTPConnection(context.Background(), "test-server", testServer.URL, map[string]string{
120120
"Authorization": authToken,
121-
}, nil, "", 0)
121+
}, nil, "", 0, 0)
122122
require.NoError(t, err, "Failed to create HTTP connection")
123123

124124
// Create a context with session ID
@@ -377,7 +377,7 @@ func TestHTTPRequest_ErrorResponses(t *testing.T) {
377377
// Create connection with custom headers to use plain JSON transport
378378
conn, err := NewHTTPConnection(context.Background(), "test-server", testServer.URL, map[string]string{
379379
"Authorization": "test-token",
380-
}, nil, "", 0)
380+
}, nil, "", 0, 0)
381381
if err != nil {
382382
require.True(t, tt.expectError, "Unexpected error creating connection: %v", err)
383383
if tt.errorSubstring != "" {
@@ -423,7 +423,7 @@ func TestConnection_IsHTTP(t *testing.T) {
423423
"X-Custom": "custom-value",
424424
}
425425

426-
conn, err := NewHTTPConnection(context.Background(), "test-server", testServer.URL, headers, nil, "", 0)
426+
conn, err := NewHTTPConnection(context.Background(), "test-server", testServer.URL, headers, nil, "", 0, 0)
427427
require.NoError(t, err, "Failed to create HTTP connection")
428428
defer conn.Close()
429429

@@ -466,7 +466,7 @@ func TestHTTPConnection_InvalidURL(t *testing.T) {
466466

467467
for _, tt := range tests {
468468
t.Run(tt.name, func(t *testing.T) {
469-
_, err := NewHTTPConnection(context.Background(), "test-server", tt.url, tt.headers, nil, "", 0)
469+
_, err := NewHTTPConnection(context.Background(), "test-server", tt.url, tt.headers, nil, "", 0, 0)
470470

471471
if tt.expectError {
472472
require.Error(t, err, "Expected an error but got none")
@@ -523,7 +523,7 @@ func TestNewHTTPConnectionStoresKeepalive(t *testing.T) {
523523
headers := map[string]string{}
524524
httpClient := &http.Client{}
525525

526-
conn := newHTTPConnection(ctx, cancel, client, nil, url, headers, httpClient, HTTPTransportStreamable, "test-server", keepAlive)
526+
conn := newHTTPConnection(ctx, cancel, client, nil, url, headers, httpClient, HTTPTransportStreamable, "test-server", keepAlive, 0)
527527

528528
require.NotNil(t, conn)
529529
assert.Equal(t, keepAlive, conn.keepAliveInterval,
@@ -610,7 +610,7 @@ func TestNewHTTPConnection(t *testing.T) {
610610
headers := map[string]string{"Authorization": "test"}
611611
httpClient := &http.Client{}
612612

613-
conn := newHTTPConnection(ctx, cancel, client, nil, url, headers, httpClient, HTTPTransportStreamable, "test-server", 0)
613+
conn := newHTTPConnection(ctx, cancel, client, nil, url, headers, httpClient, HTTPTransportStreamable, "test-server", 0, 0)
614614

615615
require.NotNil(t, conn, "Connection should not be nil")
616616
assert.Equal(t, client, conn.client, "Client should match")

0 commit comments

Comments
 (0)