refactor(mcp): pause is a follow-up to run_test, not standalone

DavertMik · claude · DavertMik · commit ef05bd10c6a9 · 2026-04-30T03:13:06.000+03:00
run_test now spawns its subprocess in pause yield mode and returns early
with {status:"paused"} when the test hits pause(). The agent then drives
the REPL through the new "pause" tool, which only takes a code string.

Drops the standalone pause_session.start action — pause only makes sense
when a test is already running. Resume / step / exit are just code values
(matching the TTY pause REPL conventions).

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/bin/mcp-server.js b/bin/mcp-server.js
@@ -380,7 +380,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
     },
     {
       name: 'run_test',
-      description: 'Run a specific test.',
+      description: 'Run a specific test. If the test calls pause(), this tool returns early with status "paused" — call the "pause" tool to interact, then send code:"resume" to let the test finish. Otherwise returns when the test completes with the json reporter result.',
       inputSchema: {
         type: 'object',
         properties: {
@@ -426,18 +426,14 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
       },
     },
     {
-      name: 'pause_session',
-      description: 'Run code inside a paused test, mirroring the human pause() REPL. Two actions: "start" spawns a test and waits for it to hit pause(); "run" sends a code line (same syntax as the TTY pause REPL — empty string steps to the next test step, "resume" continues the test, "exit" aborts; any other input is treated as I.<expr> unless prefixed with "=>"). Each run returns the value plus an artifact bundle (URL, ARIA, HTML, screenshot, console, storage), like run_code.',
+      name: 'pause',
+      description: 'Send a single line of code to a paused test (one that called pause() during run_test). Same syntax as the TTY pause REPL: an expression like "click(\'Save\')" runs as I.click(\'Save\'); prefix "=>" for raw JS; empty string steps to the next test step; "resume" continues the test to completion; "exit" aborts. Returns the next protocol message — typically {event:"result", ok, value, artifacts, error}, or {event:"paused"} after a step, or {event:"exited", exitInfo} if the test ended.',
       inputSchema: {
         type: 'object',
         properties: {
-          action: { type: 'string', enum: ['start', 'run'] },
-          test: { type: 'string' },
           code: { type: 'string' },
-          config: { type: 'string' },
           timeout: { type: 'number' },
         },
-        required: ['action'],
       },
     },
   ],
@@ -552,78 +548,13 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
         }
       }
 
-      case 'pause_session': {
-        const action = args?.action
-        if (!action) throw new Error('pause_session requires "action" parameter')
-
-        if (action === 'start') {
-          if (pauseChild && pauseChild.exitCode == null) {
-            throw new Error('pause_session already running. Send code: "exit" via action: "run" first.')
-          }
-          const { test, config: configPathArg, timeout = 60000 } = args
-          if (!test) throw new Error('pause_session start requires "test" parameter')
-
-          const { configPath, configDir } = resolveConfigPath(configPathArg)
-          const { cli, root } = findCodeceptCliUpwards(configDir)
-          const isNodeScript = cli.endsWith('.js')
-
-          const resolvedFile = await resolveTestToFile({ cli, root, configPath, test })
-          const runArgs = ['run', '--config', configPath]
-          if (resolvedFile) runArgs.push(resolvedFile)
-          else if (looksLikePath(test)) runArgs.push(test)
-          else runArgs.push('--grep', String(test))
-
-          pauseLogs = []
-          pauseStdoutBuf = ''
-          pauseExitInfo = null
-          pauseProtocolWaiters = []
-
-          const env = {
-            ...process.env,
-            CODECEPTJS_MCP: '1',
-            CODECEPTJS_MCP_PAUSE: '1',
-            NODE_ENV: process.env.NODE_ENV || 'test',
-          }
-
-          const cmd = isNodeScript ? process.execPath : cli
-          const cmdArgs = isNodeScript ? [cli, ...runArgs] : runArgs
-
-          pauseChild = spawn(cmd, cmdArgs, { cwd: root, env, stdio: ['pipe', 'pipe', 'pipe'] })
-          let stderrBuf = ''
-          pauseChild.stdout.on('data', d => { pauseStdoutBuf = pauseProcessChunk(pauseStdoutBuf, d, 'stdout') })
-          pauseChild.stderr.on('data', d => { stderrBuf = pauseProcessChunk(stderrBuf, d, 'stderr') })
-          pauseChild.on('exit', (code, signal) => {
-            pauseExitInfo = { code, signal }
-            pauseTeardown()
-          })
-
-          let pausedMsg
-          try {
-            pausedMsg = await pauseAwaitProtocol({ timeout })
-          } catch (err) {
-            try { pauseChild?.kill('SIGKILL') } catch {}
-            const stderr = pauseLogs.filter(l => l.stream === 'stderr').map(l => l.line).join('\n')
-            throw new Error(`pause_session start: ${err.message}. stderr=${stderr.slice(0, 2000)}`)
-          }
-
-          return {
-            content: [{
-              type: 'text',
-              text: JSON.stringify({ status: 'paused', resolvedFile: resolvedFile || null, paused: pausedMsg }, null, 2),
-            }],
-          }
-        }
-
-        if (action === 'run') {
-          if (!pauseChild) throw new Error('No active pause_session. Call action: "start" first.')
-          if (pauseChild.exitCode != null) throw new Error('pause_session subprocess has exited')
-          const { code = '', timeout = 60000 } = args
-          pauseChild.stdin.write(code + '\n')
-          const resp = await pauseAwaitProtocol({ timeout })
-          return { content: [{ type: 'text', text: JSON.stringify(resp, null, 2) }] }
-        }
-
-        throw new Error(`pause_session unknown action: ${action}`)
+      case 'pause': {
+        if (!pauseChild) throw new Error('No paused test. Run a test first via run_test; if it calls pause(), this tool becomes available.')
+        if (pauseChild.exitCode != null) throw new Error('Test subprocess has already exited.')
+        const { code = '', timeout = 60000 } = args || {}
+        pauseChild.stdin.write(code + '\n')
+        const resp = await pauseAwaitProtocol({ timeout })
+        return { content: [{ type: 'text', text: JSON.stringify(resp, null, 2) }] }
       }
 
       case 'run_code': {
@@ -724,6 +655,9 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
 
       case 'run_test': {
         return await withLock(async () => {
+          if (pauseChild && pauseChild.exitCode == null) {
+            throw new Error('A previous run_test is still paused. Send code:"resume" or code:"exit" via the "pause" tool first.')
+          }
           const { test, timeout = 60000, config: configPathArg } = args || {}
           const { configPath, configDir } = resolveConfigPath(configPathArg)
 
@@ -737,27 +671,70 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
           else if (looksLikePath(test)) runArgs.push(test)
           else runArgs.push('--grep', String(test))
 
-          const res = isNodeScript
-            ? await runCmd(process.execPath, [cli, ...runArgs], { cwd: root, timeout })
-            : await runCmd(cli, runArgs, { cwd: root, timeout })
+          pauseLogs = []
+          pauseStdoutBuf = ''
+          pauseExitInfo = null
+          pauseProtocolWaiters = []
+
+          const env = {
+            ...process.env,
+            CODECEPTJS_MCP: '1',
+            CODECEPTJS_MCP_PAUSE: '1',
+            NODE_ENV: process.env.NODE_ENV || 'test',
+          }
+
+          const cmd = isNodeScript ? process.execPath : cli
+          const cmdArgs = isNodeScript ? [cli, ...runArgs] : runArgs
 
-          const { code, out, err } = res
+          pauseChild = spawn(cmd, cmdArgs, { cwd: root, env, stdio: ['pipe', 'pipe', 'pipe'] })
+          let stderrBuf = ''
+          pauseChild.stdout.on('data', d => { pauseStdoutBuf = pauseProcessChunk(pauseStdoutBuf, d, 'stdout') })
+          pauseChild.stderr.on('data', d => { stderrBuf = pauseProcessChunk(stderrBuf, d, 'stderr') })
+          pauseChild.on('exit', (code, signal) => {
+            pauseExitInfo = { code, signal }
+            pauseTeardown()
+          })
+
+          let first
+          try {
+            first = await pauseAwaitProtocol({ timeout })
+          } catch (err) {
+            try { pauseChild?.kill('SIGKILL') } catch {}
+            throw err
+          }
+
+          if (first.event === 'paused') {
+            return {
+              content: [{
+                type: 'text',
+                text: JSON.stringify({
+                  status: 'paused',
+                  resolvedFile: resolvedFile || null,
+                  paused: first,
+                  note: 'Test hit pause(). Use the "pause" tool to send code; send code:"resume" to let the test finish.',
+                }, null, 2),
+              }],
+            }
+          }
 
+          // Subprocess exited without pausing — collect normal reporter output
+          const stdoutText = pauseLogs.filter(l => l.stream === 'stdout').map(l => l.line).join('\n')
+          const stderrText = pauseLogs.filter(l => l.stream === 'stderr').map(l => l.line).join('\n')
           let parsed = null
-          const jsonStart = out.indexOf('{')
-          const jsonEnd = out.lastIndexOf('}')
+          const jsonStart = stdoutText.indexOf('{')
+          const jsonEnd = stdoutText.lastIndexOf('}')
           if (jsonStart !== -1 && jsonEnd !== -1 && jsonEnd > jsonStart) {
-            try { parsed = JSON.parse(out.slice(jsonStart, jsonEnd + 1)) } catch {}
+            try { parsed = JSON.parse(stdoutText.slice(jsonStart, jsonEnd + 1)) } catch {}
           }
 
           return {
             content: [{
               type: 'text',
               text: JSON.stringify({
-                meta: { exitCode: code, cli, root, configPath, args: runArgs, resolvedFile: resolvedFile || null },
+                meta: { exitCode: first.exitInfo?.code ?? null, cli, root, configPath, args: runArgs, resolvedFile: resolvedFile || null },
                 reporterJson: parsed,
-                stderr: err ? err.slice(0, 20000) : '',
-                rawStdout: parsed ? '' : out.slice(0, 20000),
+                stderr: stderrText.slice(0, 20000),
+                rawStdout: parsed ? '' : stdoutText.slice(0, 20000),
               }, null, 2),
             }],
           }
diff --git a/docs/mcp.md b/docs/mcp.md
@@ -235,80 +235,86 @@ Capture the current state of the browser without performing any action. Useful f
 }
 ```
 
-### pause_session
+### pause
 
-Mirrors the human `pause()` REPL for an AI agent: send a code string, get a result with artifacts (same shape as `run_code`).
+Send one line of input to a test that's currently paused at `pause()`. Mirrors the human pause REPL — send code, get a result with the same artifact bundle as `run_code`.
 
-Two actions:
+`pause` is only valid while a `run_test` invocation is yielded at a paused subprocess. The flow is:
 
-| Action | Params | Effect |
-|---|---|---|
-| `start` | `test`, `config?`, `timeout?` | Spawn the test subprocess in pause yield mode. Resolves when the test hits `pause()` and emits `{event:"paused"}`. |
-| `run` | `code`, `timeout?` | Send one line of input — same syntax as the TTY REPL. Returns the next protocol message from the subprocess. |
+1. Agent calls `run_test`. If the test reaches `pause()`, `run_test` returns `{status:"paused", paused:{event:"paused"}}` and keeps the subprocess alive.
+2. Agent calls `pause` with `code` strings to drive the REPL.
+3. Agent sends `code:"resume"` (or `code:"exit"`) to let the test finish; the subprocess exits and pause state is cleared.
 
-`code` follows the TTY pause REPL conventions:
-- An expression like `click('Save')` runs as `I.click('Save')` and returns `{event:"result", ok, value, artifacts, error}`.
-- Prefix `=>` to evaluate raw JS: `=> myVar.id`.
-- `""` (empty) → step to the next test step. The subprocess re-pauses; response is `{event:"step"}` followed by `{event:"paused"}` on the next `run` call.
-- `"resume"` → continue the test to completion. Response is `{event:"resumed"}`; the subprocess will exit on its own.
-- `"exit"` → abort the paused test. Same `{event:"resumed"}` response, then exit.
+`code` syntax (same as the TTY pause REPL):
 
-Each result includes the artifact bundle (URL, ARIA, HTML, screenshot, console, storage), like `run_code`. If the subprocess exits during a `run`, the response is `{event:"exited", exitInfo:{code, signal}}`.
+| Input | Effect |
+|---|---|
+| `"click('Save')"` | Runs as `I.click('Save')`. Returns `{event:"result", ok, value, artifacts, error}`. |
+| `"=> myVar.id"` | Evaluates raw JS in the paused scope. Returns `{event:"result", ...}`. |
+| `""` (empty) | Step to the next test step. Returns `{event:"step"}`; the subprocess re-pauses, and the next `pause` call returns `{event:"paused"}` again. |
+| `"resume"` | Continue the test to completion. Returns `{event:"resumed"}`; the subprocess will exit on its own. |
+| `"exit"` | Abort the paused test. Returns `{event:"resumed"}`, then the subprocess exits. |
 
-**Lifecycle example:**
+If the subprocess exits during a call, the response is `{event:"exited", exitInfo:{code, signal}}` and pause state is cleared.
+
+**Parameters:**
+- `code` (optional, default `""`): the line to send.
+- `timeout` (optional): ms to wait for the response (default 60000).
+
+**Example:**
 
 ```json
-{ "name": "pause_session", "arguments": { "action": "start", "test": "checkout_test" } }
-{ "name": "pause_session", "arguments": { "action": "run", "code": "grabCurrentUrl()" } }
-{ "name": "pause_session", "arguments": { "action": "run", "code": "click('Save')" } }
-{ "name": "pause_session", "arguments": { "action": "run", "code": "resume" } }
-```
+{ "name": "run_test", "arguments": { "test": "checkout_test" } }
+// → { "status": "paused", "paused": { "event": "paused" }, ... }
+
+{ "name": "pause", "arguments": { "code": "grabCurrentUrl()" } }
+// → { "event": "result", "ok": true, "value": "http://...", "artifacts": { ... } }
 
-A single `pause_session` instance owns one subprocess. Concurrent `start` calls are rejected — send `code: "exit"` (or `"resume"`) first.
+{ "name": "pause", "arguments": { "code": "resume" } }
+// → { "event": "resumed" }
+```
 
 **Notes:**
-- The subprocess is spawned with `CODECEPTJS_MCP=1` and `CODECEPTJS_MCP_PAUSE=1` so `pause()` calls in the test land in yield mode.
-- `pause()` calls running under `CODECEPTJS_MCP=1` *without* `CODECEPTJS_MCP_PAUSE=1` print a notice and return immediately so leftover `pause()` calls don't deadlock CI runs invoked through MCP.
+- `run_test` always spawns its subprocess with `CODECEPTJS_MCP=1` and `CODECEPTJS_MCP_PAUSE=1`, so any `pause()` call in the test lands in yield mode.
+- A `pause()` call running with `CODECEPTJS_MCP=1` set but `CODECEPTJS_MCP_PAUSE` unset (e.g., a different MCP-aware caller, or future tooling) prints a notice and returns immediately, so leftover `pause()` calls don't deadlock.
 - TTY behaviour (`npx codeceptjs run --debug` at a terminal) is unchanged — the readline REPL is used whenever `process.stdin.isTTY` is true.
 
 ### run_test
 
-Run a specific test by name or file path. Uses subprocess to run tests with isolation.
+Run a specific test by name or file path. Subprocess is spawned with pause yield mode enabled — if the test calls `pause()`, this tool returns early and the agent drives the REPL via the [`pause`](#pause) tool.
 
 **Parameters:**
 - `test` (required): Test name or file path
 - `timeout` (optional): Timeout in milliseconds (default: 60000)
 - `config` (optional): Path to codecept.conf.js
 
-**Returns:**
+**Returns (test completed normally):**
 ```json
 {
-  "meta": {
-    "exitCode": 0,
-    "cli": "/path/to/codecept.js",
-    "root": "/project/root",
-    "configPath": "/path/to/codecept.conf.js",
-    "args": ["run", "--config", "...", "--reporter", "json", "test_file.js"],
-    "resolvedFile": "/full/path/to/test_file.js"
-  },
-  "reporterJson": {
-    "stats": {
-      "tests": 3,
-      "passes": 2,
-      "failures": 1
-    }
-  },
+  "meta": { "exitCode": 0, "cli": "...", "root": "...", "configPath": "...", "args": [...], "resolvedFile": "..." },
+  "reporterJson": { "stats": { "tests": 3, "passes": 2, "failures": 1 } },
   "stderr": "",
   "rawStdout": ""
 }
 ```
 
+**Returns (test reached `pause()`):**
+```json
+{
+  "status": "paused",
+  "resolvedFile": "/path/to/test.js",
+  "paused": { "__mcpPause": true, "event": "paused" },
+  "note": "Test hit pause(). Use the \"pause\" tool to send code; send code:\"resume\" to let the test finish."
+}
+```
+
 **Features:**
 - Automatically resolves test names to file paths
 - Supports partial test name matching
 - Uses json reporter for structured output
 - Executes in subprocess for isolation
 - Includes stderr for debugging
+- Yields on `pause()` so an agent can drive the REPL through the `pause` tool
 
 **Example:**
 ```json