Skip to content

Commit d4d725e

Browse files
DavertMikclaude
andcommitted
refactor(mcp): drop subprocess for pause — run in-process via shared container
Previously pause yield mode spawned a test subprocess and shuttled JSON-line messages through stdin/stdout. That was a lot of plumbing for something the existing run_step_by_step tool already does cleanly: run codecept in-process in the MCP server itself. Now lib/pause.js exposes setPauseHandler/setNextStep. The MCP server installs a handler at startup that turns pause() into a Promise the agent controls. run_test races bootstrap+run() vs that paused promise; on pause it returns {status:"paused"} with the test promise stashed at module level. The pause tool drives the REPL by running code through the same I that the test is using, no IPC. resume/exit await the test promise and return the final reporter result. Drops: pauseChild, pauseProtocolWaiters, pauseProcessChunk, mcpYieldSession, emitMcpProtocol, ensureMcpReadline, the CODECEPTJS_MCP* env detection in lib/pause.js. The TTY readline path is unchanged. Net: 270 added, 526 removed across pause/mcp files. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent ef05bd1 commit d4d725e

6 files changed

Lines changed: 270 additions & 526 deletions

File tree

bin/mcp-server.js

Lines changed: 201 additions & 145 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ import {
1414
writeTraceMarkdown,
1515
} from '../lib/utils/trace.js'
1616
import event from '../lib/event.js'
17+
import { setPauseHandler, setNextStep } from '../lib/pause.js'
18+
import { EventEmitter } from 'events'
1719
import { fileURLToPath, pathToFileURL } from 'url'
1820
import { dirname, resolve as resolvePath } from 'path'
1921
import path from 'path'
@@ -235,81 +237,59 @@ function outputBaseDir() {
235237
return global.output_dir || resolvePath(process.cwd(), 'output')
236238
}
237239

238-
let pauseChild = null
239-
let pauseLogs = []
240-
let pauseStdoutBuf = ''
241-
let pauseProtocolWaiters = []
242-
let pauseExitInfo = null
243-
244-
function pauseProcessStdoutLine(line) {
245-
if (!line) return
246-
if (line.trim().startsWith('{')) {
247-
try {
248-
const msg = JSON.parse(line.trim())
249-
if (msg && msg.__mcpPause) {
250-
const waiter = pauseProtocolWaiters.shift()
251-
if (waiter) waiter(msg)
252-
else pauseLogs.push({ stream: 'protocol-unwaited', line })
253-
return
254-
}
255-
} catch {}
256-
}
257-
pauseLogs.push({ stream: 'stdout', line })
258-
if (pauseLogs.length > 500) pauseLogs.splice(0, pauseLogs.length - 500)
259-
}
260-
261-
function pauseProcessChunk(buf, chunk, stream) {
262-
buf += chunk.toString('utf8')
263-
let idx
264-
while ((idx = buf.indexOf('\n')) !== -1) {
265-
const line = buf.slice(0, idx)
266-
buf = buf.slice(idx + 1)
267-
if (stream === 'stdout') pauseProcessStdoutLine(line)
268-
else {
269-
pauseLogs.push({ stream: 'stderr', line })
270-
if (pauseLogs.length > 500) pauseLogs.splice(0, pauseLogs.length - 500)
271-
}
272-
}
273-
return buf
274-
}
275-
276-
function pauseAwaitProtocol({ timeout = 60000 } = {}) {
277-
return new Promise((resolve, reject) => {
278-
if (!pauseChild) return reject(new Error('No active pause_session. Call action: "start" first.'))
279-
let done = false
280-
const timer = setTimeout(() => {
281-
if (done) return
282-
done = true
283-
const i = pauseProtocolWaiters.indexOf(receiver)
284-
if (i >= 0) pauseProtocolWaiters.splice(i, 1)
285-
pauseChild?.removeListener('exit', onExit)
286-
reject(new Error(`Timeout waiting for pause_session response after ${timeout}ms`))
287-
}, timeout)
288-
const cleanup = () => {
289-
done = true
290-
clearTimeout(timer)
291-
pauseChild?.removeListener('exit', onExit)
292-
}
293-
const receiver = msg => {
294-
if (done) return
295-
cleanup()
296-
resolve(msg)
297-
}
298-
const onExit = () => {
299-
if (done) return
300-
const i = pauseProtocolWaiters.indexOf(receiver)
301-
if (i >= 0) pauseProtocolWaiters.splice(i, 1)
302-
cleanup()
303-
resolve({ event: 'exited', exitInfo: pauseExitInfo })
240+
// In-process pause coordination. When a test running through run_test calls
241+
// pause(), the handler registered via setPauseHandler resolves a "paused"
242+
// promise that run_test is racing against test completion. The "pause" tool
243+
// then drives the REPL by mutating next/abort and resolving the controller.
244+
let pausedController = null // { resolveContinue, registeredVariables }
245+
let pendingRunPromise = null // run_test's run() promise while paused
246+
let pendingRunResults = null // results array being collected while paused
247+
let pendingRunCleanup = null // cleanup callback to detach test.after listener
248+
let pendingRunIO = null // saved stdout/stderr handles to restore after run completes
249+
const pauseEvents = new EventEmitter()
250+
251+
setPauseHandler(({ registeredVariables }) => {
252+
return new Promise(resolve => {
253+
pausedController = {
254+
registeredVariables,
255+
resolveContinue: () => {
256+
pausedController = null
257+
resolve()
258+
},
304259
}
305-
pauseProtocolWaiters.push(receiver)
306-
pauseChild.once('exit', onExit)
260+
pauseEvents.emit('paused')
307261
})
262+
})
263+
264+
async function captureLiveArtifacts(prefix = 'pause') {
265+
const helper = pickActingHelper(container.helpers())
266+
if (!helper) return {}
267+
const dir = snapshotDirFor(outputBaseDir())
268+
mkdirp.sync(dir)
269+
const captured = await captureSnapshot(helper, { dir, prefix })
270+
return artifactsToFileUrls(captured, dir)
308271
}
309272

310-
function pauseTeardown() {
311-
pauseProtocolWaiters = []
312-
pauseChild = null
273+
function collectRunCompletion(errorMessage) {
274+
const results = pendingRunResults || []
275+
const stats = {
276+
tests: results.length,
277+
passes: results.filter(r => r.status === 'passed').length,
278+
failures: results.filter(r => r.status === 'failed').length,
279+
}
280+
if (typeof pendingRunCleanup === 'function') pendingRunCleanup()
281+
if (pendingRunIO) {
282+
process.stdout.write = pendingRunIO.origOut
283+
process.stderr.write = pendingRunIO.origErr
284+
pendingRunIO = null
285+
}
286+
pendingRunPromise = null
287+
pendingRunResults = null
288+
return {
289+
status: 'completed',
290+
reporterJson: { stats, tests: results },
291+
error: errorMessage,
292+
}
313293
}
314294

315295
async function initCodecept(configPath) {
@@ -549,12 +529,78 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
549529
}
550530

551531
case 'pause': {
552-
if (!pauseChild) throw new Error('No paused test. Run a test first via run_test; if it calls pause(), this tool becomes available.')
553-
if (pauseChild.exitCode != null) throw new Error('Test subprocess has already exited.')
532+
if (!pausedController) throw new Error('No paused test. Run a test first via run_test; if it calls pause(), this tool becomes available.')
554533
const { code = '', timeout = 60000 } = args || {}
555-
pauseChild.stdin.write(code + '\n')
556-
const resp = await pauseAwaitProtocol({ timeout })
557-
return { content: [{ type: 'text', text: JSON.stringify(resp, null, 2) }] }
534+
const I = container.support('I')
535+
if (!I) throw new Error('I object not available. Make sure helpers are configured.')
536+
537+
// Mirror TTY parseInput: empty -> step; resume/exit -> end pause
538+
if (code === '' || code === 'resume' || code === 'exit') {
539+
setNextStep(code === '')
540+
const ctrl = pausedController
541+
ctrl.resolveContinue()
542+
543+
if (code === '') {
544+
// Wait for the next paused event (test runs one step then re-pauses)
545+
// or for the test to finish.
546+
const finished = pendingRunPromise
547+
? pendingRunPromise.then(() => ({ event: 'completed' }), err => ({ event: 'completed', error: err.message }))
548+
: new Promise(() => {})
549+
const next = await Promise.race([
550+
new Promise(r => pauseEvents.once('paused', () => r({ event: 'paused' }))),
551+
finished,
552+
new Promise(r => setTimeout(() => r({ event: 'step', note: 'Test did not re-pause within timeout' }), timeout)),
553+
])
554+
555+
if (next.event === 'completed') {
556+
const final = collectRunCompletion(next.error)
557+
return { content: [{ type: 'text', text: JSON.stringify(final, null, 2) }] }
558+
}
559+
return { content: [{ type: 'text', text: JSON.stringify(next, null, 2) }] }
560+
}
561+
562+
// resume / exit — let the test run to completion and return the final reporter result
563+
if (!pendingRunPromise) {
564+
return { content: [{ type: 'text', text: JSON.stringify({ event: 'resumed' }, null, 2) }] }
565+
}
566+
let runError = null
567+
try { await pendingRunPromise } catch (err) { runError = err }
568+
const final = collectRunCompletion(runError?.message)
569+
return { content: [{ type: 'text', text: JSON.stringify(final, null, 2) }] }
570+
}
571+
572+
// Run code via the same I container that the test is using
573+
const registeredVariables = pausedController.registeredVariables || {}
574+
let cmd = code
575+
if (cmd.trim().startsWith('=>')) cmd = cmd.trim().substring(2)
576+
else cmd = `I.${cmd}`
577+
578+
let value
579+
let error = null
580+
try {
581+
for (const k of Object.keys(registeredVariables)) {
582+
// eslint-disable-next-line no-eval
583+
eval(`var ${k} = registeredVariables['${k}'];`)
584+
}
585+
// eslint-disable-next-line no-eval
586+
const locate = global.locate
587+
// eslint-disable-next-line no-eval
588+
value = await Promise.race([
589+
// eslint-disable-next-line no-eval
590+
eval(`(async () => (${cmd}))()`),
591+
new Promise((_, reject) => setTimeout(() => reject(new Error(`Timeout after ${timeout}ms`)), timeout)),
592+
])
593+
} catch (err) {
594+
error = err.message
595+
}
596+
597+
const artifacts = await captureLiveArtifacts('pause')
598+
const result = { event: 'result', ok: !error, artifacts }
599+
if (error) result.error = error
600+
if (value !== undefined) {
601+
try { result.value = JSON.parse(JSON.stringify(value)) } catch { result.value = String(value) }
602+
}
603+
return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] }
558604
}
559605

560606
case 'run_code': {
@@ -655,88 +701,98 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
655701

656702
case 'run_test': {
657703
return await withLock(async () => {
658-
if (pauseChild && pauseChild.exitCode == null) {
704+
if (pausedController) {
659705
throw new Error('A previous run_test is still paused. Send code:"resume" or code:"exit" via the "pause" tool first.')
660706
}
661707
const { test, timeout = 60000, config: configPathArg } = args || {}
662-
const { configPath, configDir } = resolveConfigPath(configPathArg)
708+
await initCodecept(configPathArg)
663709

664-
const { cli, root } = findCodeceptCliUpwards(configDir)
665-
const isNodeScript = cli.endsWith('.js')
710+
// Silence stdout/stderr for the duration of the test (and across any
711+
// pause window). Restored in collectRunCompletion or on early throw.
712+
const origOut = process.stdout.write.bind(process.stdout)
713+
const origErr = process.stderr.write.bind(process.stderr)
714+
process.stdout.write = () => true
715+
process.stderr.write = () => true
716+
pendingRunIO = { origOut, origErr }
666717

667-
const resolvedFile = await resolveTestToFile({ cli, root, configPath, test })
668-
const runArgs = ['run', '--config', configPath, '--reporter', 'json']
718+
try {
719+
codecept.loadTests()
720+
721+
let testFiles = codecept.testFiles
722+
if (test) {
723+
const testName = normalizePath(test).toLowerCase()
724+
testFiles = codecept.testFiles.filter(f => {
725+
const filePath = normalizePath(f).toLowerCase()
726+
return filePath.includes(testName) || filePath.endsWith(testName)
727+
})
728+
}
669729

670-
if (resolvedFile) runArgs.push(resolvedFile)
671-
else if (looksLikePath(test)) runArgs.push(test)
672-
else runArgs.push('--grep', String(test))
730+
if (!testFiles.length) throw new Error(`No tests found matching: ${test}`)
731+
const testFile = testFiles[0]
732+
733+
pendingRunResults = []
734+
const onAfter = t => {
735+
pendingRunResults.push({
736+
title: t.title,
737+
file: t.file,
738+
status: t.err ? 'failed' : 'passed',
739+
error: t.err?.message,
740+
duration: t.duration,
741+
})
742+
}
743+
event.dispatcher.on(event.test.after, onAfter)
744+
pendingRunCleanup = () => {
745+
try { event.dispatcher.removeListener(event.test.after, onAfter) } catch {}
746+
pendingRunCleanup = null
747+
}
673748

674-
pauseLogs = []
675-
pauseStdoutBuf = ''
676-
pauseExitInfo = null
677-
pauseProtocolWaiters = []
749+
let runError = null
750+
const runPromise = (async () => {
751+
try {
752+
await codecept.bootstrap()
753+
await codecept.run(testFile)
754+
} catch (err) {
755+
runError = err
756+
throw err
757+
}
758+
})()
678759

679-
const env = {
680-
...process.env,
681-
CODECEPTJS_MCP: '1',
682-
CODECEPTJS_MCP_PAUSE: '1',
683-
NODE_ENV: process.env.NODE_ENV || 'test',
684-
}
760+
const pausedPromise = new Promise(resolve => pauseEvents.once('paused', () => resolve('paused')))
761+
const completedPromise = runPromise.then(() => 'completed', () => 'completed')
685762

686-
const cmd = isNodeScript ? process.execPath : cli
687-
const cmdArgs = isNodeScript ? [cli, ...runArgs] : runArgs
763+
const which = await Promise.race([
764+
completedPromise,
765+
pausedPromise,
766+
new Promise((_, reject) => setTimeout(() => reject(new Error(`Timeout after ${timeout}ms`)), timeout)),
767+
])
688768

689-
pauseChild = spawn(cmd, cmdArgs, { cwd: root, env, stdio: ['pipe', 'pipe', 'pipe'] })
690-
let stderrBuf = ''
691-
pauseChild.stdout.on('data', d => { pauseStdoutBuf = pauseProcessChunk(pauseStdoutBuf, d, 'stdout') })
692-
pauseChild.stderr.on('data', d => { stderrBuf = pauseProcessChunk(stderrBuf, d, 'stderr') })
693-
pauseChild.on('exit', (code, signal) => {
694-
pauseExitInfo = { code, signal }
695-
pauseTeardown()
696-
})
769+
if (which === 'paused') {
770+
pendingRunPromise = runPromise
771+
return {
772+
content: [{
773+
type: 'text',
774+
text: JSON.stringify({
775+
status: 'paused',
776+
file: testFile,
777+
note: 'Test hit pause(). Use the "pause" tool to send code; send code:"resume" to let the test finish.',
778+
}, null, 2),
779+
}],
780+
}
781+
}
697782

698-
let first
699-
try {
700-
first = await pauseAwaitProtocol({ timeout })
783+
const final = collectRunCompletion(runError?.message)
784+
return { content: [{ type: 'text', text: JSON.stringify({ ...final, file: testFile }, null, 2) }] }
701785
} catch (err) {
702-
try { pauseChild?.kill('SIGKILL') } catch {}
703-
throw err
704-
}
705-
706-
if (first.event === 'paused') {
707-
return {
708-
content: [{
709-
type: 'text',
710-
text: JSON.stringify({
711-
status: 'paused',
712-
resolvedFile: resolvedFile || null,
713-
paused: first,
714-
note: 'Test hit pause(). Use the "pause" tool to send code; send code:"resume" to let the test finish.',
715-
}, null, 2),
716-
}],
786+
// Restore IO if we're throwing out of run_test before collectRunCompletion
787+
if (pendingRunIO) {
788+
process.stdout.write = pendingRunIO.origOut
789+
process.stderr.write = pendingRunIO.origErr
790+
pendingRunIO = null
717791
}
718-
}
719-
720-
// Subprocess exited without pausing — collect normal reporter output
721-
const stdoutText = pauseLogs.filter(l => l.stream === 'stdout').map(l => l.line).join('\n')
722-
const stderrText = pauseLogs.filter(l => l.stream === 'stderr').map(l => l.line).join('\n')
723-
let parsed = null
724-
const jsonStart = stdoutText.indexOf('{')
725-
const jsonEnd = stdoutText.lastIndexOf('}')
726-
if (jsonStart !== -1 && jsonEnd !== -1 && jsonEnd > jsonStart) {
727-
try { parsed = JSON.parse(stdoutText.slice(jsonStart, jsonEnd + 1)) } catch {}
728-
}
729-
730-
return {
731-
content: [{
732-
type: 'text',
733-
text: JSON.stringify({
734-
meta: { exitCode: first.exitInfo?.code ?? null, cli, root, configPath, args: runArgs, resolvedFile: resolvedFile || null },
735-
reporterJson: parsed,
736-
stderr: stderrText.slice(0, 20000),
737-
rawStdout: parsed ? '' : stdoutText.slice(0, 20000),
738-
}, null, 2),
739-
}],
792+
if (typeof pendingRunCleanup === 'function') pendingRunCleanup()
793+
pendingRunPromise = null
794+
pendingRunResults = null
795+
throw err
740796
}
741797
})
742798
}

0 commit comments

Comments
 (0)