Skip to content

Commit ef05bd1

Browse files
DavertMikclaude
andcommitted
refactor(mcp): pause is a follow-up to run_test, not standalone
run_test now spawns its subprocess in pause yield mode and returns early with {status:"paused"} when the test hits pause(). The agent then drives the REPL through the new "pause" tool, which only takes a code string. Drops the standalone pause_session.start action — pause only makes sense when a test is already running. Resume / step / exit are just code values (matching the TTY pause REPL conventions). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 15b89d6 commit ef05bd1

2 files changed

Lines changed: 112 additions & 129 deletions

File tree

bin/mcp-server.js

Lines changed: 66 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
380380
},
381381
{
382382
name: 'run_test',
383-
description: 'Run a specific test.',
383+
description: 'Run a specific test. If the test calls pause(), this tool returns early with status "paused" — call the "pause" tool to interact, then send code:"resume" to let the test finish. Otherwise returns when the test completes with the json reporter result.',
384384
inputSchema: {
385385
type: 'object',
386386
properties: {
@@ -426,18 +426,14 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
426426
},
427427
},
428428
{
429-
name: 'pause_session',
430-
description: 'Run code inside a paused test, mirroring the human pause() REPL. Two actions: "start" spawns a test and waits for it to hit pause(); "run" sends a code line (same syntax as the TTY pause REPL — empty string steps to the next test step, "resume" continues the test, "exit" aborts; any other input is treated as I.<expr> unless prefixed with "=>"). Each run returns the value plus an artifact bundle (URL, ARIA, HTML, screenshot, console, storage), like run_code.',
429+
name: 'pause',
430+
description: 'Send a single line of code to a paused test (one that called pause() during run_test). Same syntax as the TTY pause REPL: an expression like "click(\'Save\')" runs as I.click(\'Save\'); prefix "=>" for raw JS; empty string steps to the next test step; "resume" continues the test to completion; "exit" aborts. Returns the next protocol message — typically {event:"result", ok, value, artifacts, error}, or {event:"paused"} after a step, or {event:"exited", exitInfo} if the test ended.',
431431
inputSchema: {
432432
type: 'object',
433433
properties: {
434-
action: { type: 'string', enum: ['start', 'run'] },
435-
test: { type: 'string' },
436434
code: { type: 'string' },
437-
config: { type: 'string' },
438435
timeout: { type: 'number' },
439436
},
440-
required: ['action'],
441437
},
442438
},
443439
],
@@ -552,78 +548,13 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
552548
}
553549
}
554550

555-
case 'pause_session': {
556-
const action = args?.action
557-
if (!action) throw new Error('pause_session requires "action" parameter')
558-
559-
if (action === 'start') {
560-
if (pauseChild && pauseChild.exitCode == null) {
561-
throw new Error('pause_session already running. Send code: "exit" via action: "run" first.')
562-
}
563-
const { test, config: configPathArg, timeout = 60000 } = args
564-
if (!test) throw new Error('pause_session start requires "test" parameter')
565-
566-
const { configPath, configDir } = resolveConfigPath(configPathArg)
567-
const { cli, root } = findCodeceptCliUpwards(configDir)
568-
const isNodeScript = cli.endsWith('.js')
569-
570-
const resolvedFile = await resolveTestToFile({ cli, root, configPath, test })
571-
const runArgs = ['run', '--config', configPath]
572-
if (resolvedFile) runArgs.push(resolvedFile)
573-
else if (looksLikePath(test)) runArgs.push(test)
574-
else runArgs.push('--grep', String(test))
575-
576-
pauseLogs = []
577-
pauseStdoutBuf = ''
578-
pauseExitInfo = null
579-
pauseProtocolWaiters = []
580-
581-
const env = {
582-
...process.env,
583-
CODECEPTJS_MCP: '1',
584-
CODECEPTJS_MCP_PAUSE: '1',
585-
NODE_ENV: process.env.NODE_ENV || 'test',
586-
}
587-
588-
const cmd = isNodeScript ? process.execPath : cli
589-
const cmdArgs = isNodeScript ? [cli, ...runArgs] : runArgs
590-
591-
pauseChild = spawn(cmd, cmdArgs, { cwd: root, env, stdio: ['pipe', 'pipe', 'pipe'] })
592-
let stderrBuf = ''
593-
pauseChild.stdout.on('data', d => { pauseStdoutBuf = pauseProcessChunk(pauseStdoutBuf, d, 'stdout') })
594-
pauseChild.stderr.on('data', d => { stderrBuf = pauseProcessChunk(stderrBuf, d, 'stderr') })
595-
pauseChild.on('exit', (code, signal) => {
596-
pauseExitInfo = { code, signal }
597-
pauseTeardown()
598-
})
599-
600-
let pausedMsg
601-
try {
602-
pausedMsg = await pauseAwaitProtocol({ timeout })
603-
} catch (err) {
604-
try { pauseChild?.kill('SIGKILL') } catch {}
605-
const stderr = pauseLogs.filter(l => l.stream === 'stderr').map(l => l.line).join('\n')
606-
throw new Error(`pause_session start: ${err.message}. stderr=${stderr.slice(0, 2000)}`)
607-
}
608-
609-
return {
610-
content: [{
611-
type: 'text',
612-
text: JSON.stringify({ status: 'paused', resolvedFile: resolvedFile || null, paused: pausedMsg }, null, 2),
613-
}],
614-
}
615-
}
616-
617-
if (action === 'run') {
618-
if (!pauseChild) throw new Error('No active pause_session. Call action: "start" first.')
619-
if (pauseChild.exitCode != null) throw new Error('pause_session subprocess has exited')
620-
const { code = '', timeout = 60000 } = args
621-
pauseChild.stdin.write(code + '\n')
622-
const resp = await pauseAwaitProtocol({ timeout })
623-
return { content: [{ type: 'text', text: JSON.stringify(resp, null, 2) }] }
624-
}
625-
626-
throw new Error(`pause_session unknown action: ${action}`)
551+
case 'pause': {
552+
if (!pauseChild) throw new Error('No paused test. Run a test first via run_test; if it calls pause(), this tool becomes available.')
553+
if (pauseChild.exitCode != null) throw new Error('Test subprocess has already exited.')
554+
const { code = '', timeout = 60000 } = args || {}
555+
pauseChild.stdin.write(code + '\n')
556+
const resp = await pauseAwaitProtocol({ timeout })
557+
return { content: [{ type: 'text', text: JSON.stringify(resp, null, 2) }] }
627558
}
628559

629560
case 'run_code': {
@@ -724,6 +655,9 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
724655

725656
case 'run_test': {
726657
return await withLock(async () => {
658+
if (pauseChild && pauseChild.exitCode == null) {
659+
throw new Error('A previous run_test is still paused. Send code:"resume" or code:"exit" via the "pause" tool first.')
660+
}
727661
const { test, timeout = 60000, config: configPathArg } = args || {}
728662
const { configPath, configDir } = resolveConfigPath(configPathArg)
729663

@@ -737,27 +671,70 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
737671
else if (looksLikePath(test)) runArgs.push(test)
738672
else runArgs.push('--grep', String(test))
739673

740-
const res = isNodeScript
741-
? await runCmd(process.execPath, [cli, ...runArgs], { cwd: root, timeout })
742-
: await runCmd(cli, runArgs, { cwd: root, timeout })
674+
pauseLogs = []
675+
pauseStdoutBuf = ''
676+
pauseExitInfo = null
677+
pauseProtocolWaiters = []
678+
679+
const env = {
680+
...process.env,
681+
CODECEPTJS_MCP: '1',
682+
CODECEPTJS_MCP_PAUSE: '1',
683+
NODE_ENV: process.env.NODE_ENV || 'test',
684+
}
685+
686+
const cmd = isNodeScript ? process.execPath : cli
687+
const cmdArgs = isNodeScript ? [cli, ...runArgs] : runArgs
743688

744-
const { code, out, err } = res
689+
pauseChild = spawn(cmd, cmdArgs, { cwd: root, env, stdio: ['pipe', 'pipe', 'pipe'] })
690+
let stderrBuf = ''
691+
pauseChild.stdout.on('data', d => { pauseStdoutBuf = pauseProcessChunk(pauseStdoutBuf, d, 'stdout') })
692+
pauseChild.stderr.on('data', d => { stderrBuf = pauseProcessChunk(stderrBuf, d, 'stderr') })
693+
pauseChild.on('exit', (code, signal) => {
694+
pauseExitInfo = { code, signal }
695+
pauseTeardown()
696+
})
697+
698+
let first
699+
try {
700+
first = await pauseAwaitProtocol({ timeout })
701+
} catch (err) {
702+
try { pauseChild?.kill('SIGKILL') } catch {}
703+
throw err
704+
}
705+
706+
if (first.event === 'paused') {
707+
return {
708+
content: [{
709+
type: 'text',
710+
text: JSON.stringify({
711+
status: 'paused',
712+
resolvedFile: resolvedFile || null,
713+
paused: first,
714+
note: 'Test hit pause(). Use the "pause" tool to send code; send code:"resume" to let the test finish.',
715+
}, null, 2),
716+
}],
717+
}
718+
}
745719

720+
// Subprocess exited without pausing — collect normal reporter output
721+
const stdoutText = pauseLogs.filter(l => l.stream === 'stdout').map(l => l.line).join('\n')
722+
const stderrText = pauseLogs.filter(l => l.stream === 'stderr').map(l => l.line).join('\n')
746723
let parsed = null
747-
const jsonStart = out.indexOf('{')
748-
const jsonEnd = out.lastIndexOf('}')
724+
const jsonStart = stdoutText.indexOf('{')
725+
const jsonEnd = stdoutText.lastIndexOf('}')
749726
if (jsonStart !== -1 && jsonEnd !== -1 && jsonEnd > jsonStart) {
750-
try { parsed = JSON.parse(out.slice(jsonStart, jsonEnd + 1)) } catch {}
727+
try { parsed = JSON.parse(stdoutText.slice(jsonStart, jsonEnd + 1)) } catch {}
751728
}
752729

753730
return {
754731
content: [{
755732
type: 'text',
756733
text: JSON.stringify({
757-
meta: { exitCode: code, cli, root, configPath, args: runArgs, resolvedFile: resolvedFile || null },
734+
meta: { exitCode: first.exitInfo?.code ?? null, cli, root, configPath, args: runArgs, resolvedFile: resolvedFile || null },
758735
reporterJson: parsed,
759-
stderr: err ? err.slice(0, 20000) : '',
760-
rawStdout: parsed ? '' : out.slice(0, 20000),
736+
stderr: stderrText.slice(0, 20000),
737+
rawStdout: parsed ? '' : stdoutText.slice(0, 20000),
761738
}, null, 2),
762739
}],
763740
}

docs/mcp.md

Lines changed: 46 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -235,80 +235,86 @@ Capture the current state of the browser without performing any action. Useful f
235235
}
236236
```
237237

238-
### pause_session
238+
### pause
239239

240-
Mirrors the human `pause()` REPL for an AI agent: send a code string, get a result with artifacts (same shape as `run_code`).
240+
Send one line of input to a test that's currently paused at `pause()`. Mirrors the human pause REPL — send code, get a result with the same artifact bundle as `run_code`.
241241

242-
Two actions:
242+
`pause` is only valid while a `run_test` invocation is yielded at a paused subprocess. The flow is:
243243

244-
| Action | Params | Effect |
245-
|---|---|---|
246-
| `start` | `test`, `config?`, `timeout?` | Spawn the test subprocess in pause yield mode. Resolves when the test hits `pause()` and emits `{event:"paused"}`. |
247-
| `run` | `code`, `timeout?` | Send one line of input — same syntax as the TTY REPL. Returns the next protocol message from the subprocess. |
244+
1. Agent calls `run_test`. If the test reaches `pause()`, `run_test` returns `{status:"paused", paused:{event:"paused"}}` and keeps the subprocess alive.
245+
2. Agent calls `pause` with `code` strings to drive the REPL.
246+
3. Agent sends `code:"resume"` (or `code:"exit"`) to let the test finish; the subprocess exits and pause state is cleared.
248247

249-
`code` follows the TTY pause REPL conventions:
250-
- An expression like `click('Save')` runs as `I.click('Save')` and returns `{event:"result", ok, value, artifacts, error}`.
251-
- Prefix `=>` to evaluate raw JS: `=> myVar.id`.
252-
- `""` (empty) → step to the next test step. The subprocess re-pauses; response is `{event:"step"}` followed by `{event:"paused"}` on the next `run` call.
253-
- `"resume"` → continue the test to completion. Response is `{event:"resumed"}`; the subprocess will exit on its own.
254-
- `"exit"` → abort the paused test. Same `{event:"resumed"}` response, then exit.
248+
`code` syntax (same as the TTY pause REPL):
255249

256-
Each result includes the artifact bundle (URL, ARIA, HTML, screenshot, console, storage), like `run_code`. If the subprocess exits during a `run`, the response is `{event:"exited", exitInfo:{code, signal}}`.
250+
| Input | Effect |
251+
|---|---|
252+
| `"click('Save')"` | Runs as `I.click('Save')`. Returns `{event:"result", ok, value, artifacts, error}`. |
253+
| `"=> myVar.id"` | Evaluates raw JS in the paused scope. Returns `{event:"result", ...}`. |
254+
| `""` (empty) | Step to the next test step. Returns `{event:"step"}`; the subprocess re-pauses, and the next `pause` call returns `{event:"paused"}` again. |
255+
| `"resume"` | Continue the test to completion. Returns `{event:"resumed"}`; the subprocess will exit on its own. |
256+
| `"exit"` | Abort the paused test. Returns `{event:"resumed"}`, then the subprocess exits. |
257257

258-
**Lifecycle example:**
258+
If the subprocess exits during a call, the response is `{event:"exited", exitInfo:{code, signal}}` and pause state is cleared.
259+
260+
**Parameters:**
261+
- `code` (optional, default `""`): the line to send.
262+
- `timeout` (optional): ms to wait for the response (default 60000).
263+
264+
**Example:**
259265

260266
```json
261-
{ "name": "pause_session", "arguments": { "action": "start", "test": "checkout_test" } }
262-
{ "name": "pause_session", "arguments": { "action": "run", "code": "grabCurrentUrl()" } }
263-
{ "name": "pause_session", "arguments": { "action": "run", "code": "click('Save')" } }
264-
{ "name": "pause_session", "arguments": { "action": "run", "code": "resume" } }
265-
```
267+
{ "name": "run_test", "arguments": { "test": "checkout_test" } }
268+
// → { "status": "paused", "paused": { "event": "paused" }, ... }
269+
270+
{ "name": "pause", "arguments": { "code": "grabCurrentUrl()" } }
271+
// → { "event": "result", "ok": true, "value": "http://...", "artifacts": { ... } }
266272

267-
A single `pause_session` instance owns one subprocess. Concurrent `start` calls are rejected — send `code: "exit"` (or `"resume"`) first.
273+
{ "name": "pause", "arguments": { "code": "resume" } }
274+
// → { "event": "resumed" }
275+
```
268276

269277
**Notes:**
270-
- The subprocess is spawned with `CODECEPTJS_MCP=1` and `CODECEPTJS_MCP_PAUSE=1` so `pause()` calls in the test land in yield mode.
271-
- `pause()` calls running under `CODECEPTJS_MCP=1` *without* `CODECEPTJS_MCP_PAUSE=1` print a notice and return immediately so leftover `pause()` calls don't deadlock CI runs invoked through MCP.
278+
- `run_test` always spawns its subprocess with `CODECEPTJS_MCP=1` and `CODECEPTJS_MCP_PAUSE=1`, so any `pause()` call in the test lands in yield mode.
279+
- A `pause()` call running with `CODECEPTJS_MCP=1` set but `CODECEPTJS_MCP_PAUSE` unset (e.g., a different MCP-aware caller, or future tooling) prints a notice and returns immediately, so leftover `pause()` calls don't deadlock.
272280
- TTY behaviour (`npx codeceptjs run --debug` at a terminal) is unchanged — the readline REPL is used whenever `process.stdin.isTTY` is true.
273281

274282
### run_test
275283

276-
Run a specific test by name or file path. Uses subprocess to run tests with isolation.
284+
Run a specific test by name or file path. Subprocess is spawned with pause yield mode enabled — if the test calls `pause()`, this tool returns early and the agent drives the REPL via the [`pause`](#pause) tool.
277285

278286
**Parameters:**
279287
- `test` (required): Test name or file path
280288
- `timeout` (optional): Timeout in milliseconds (default: 60000)
281289
- `config` (optional): Path to codecept.conf.js
282290

283-
**Returns:**
291+
**Returns (test completed normally):**
284292
```json
285293
{
286-
"meta": {
287-
"exitCode": 0,
288-
"cli": "/path/to/codecept.js",
289-
"root": "/project/root",
290-
"configPath": "/path/to/codecept.conf.js",
291-
"args": ["run", "--config", "...", "--reporter", "json", "test_file.js"],
292-
"resolvedFile": "/full/path/to/test_file.js"
293-
},
294-
"reporterJson": {
295-
"stats": {
296-
"tests": 3,
297-
"passes": 2,
298-
"failures": 1
299-
}
300-
},
294+
"meta": { "exitCode": 0, "cli": "...", "root": "...", "configPath": "...", "args": [...], "resolvedFile": "..." },
295+
"reporterJson": { "stats": { "tests": 3, "passes": 2, "failures": 1 } },
301296
"stderr": "",
302297
"rawStdout": ""
303298
}
304299
```
305300

301+
**Returns (test reached `pause()`):**
302+
```json
303+
{
304+
"status": "paused",
305+
"resolvedFile": "/path/to/test.js",
306+
"paused": { "__mcpPause": true, "event": "paused" },
307+
"note": "Test hit pause(). Use the \"pause\" tool to send code; send code:\"resume\" to let the test finish."
308+
}
309+
```
310+
306311
**Features:**
307312
- Automatically resolves test names to file paths
308313
- Supports partial test name matching
309314
- Uses json reporter for structured output
310315
- Executes in subprocess for isolation
311316
- Includes stderr for debugging
317+
- Yields on `pause()` so an agent can drive the REPL through the `pause` tool
312318

313319
**Example:**
314320
```json

0 commit comments

Comments
 (0)