@@ -202,20 +202,99 @@ export class Memory {
202202 const systemMessages = messages . filter ( ( msg ) => msg . role === 'system' )
203203 const conversationMessages = messages . filter ( ( msg ) => msg . role !== 'system' )
204204
205- const recentMessages = conversationMessages . slice ( - limit )
205+ // Group messages into conversation turns
206+ // A turn = user message + any tool calls/results + assistant response
207+ const turns = this . groupMessagesIntoTurns ( conversationMessages )
208+
209+ // Take the last N turns
210+ const recentTurns = turns . slice ( - limit )
211+
212+ // Flatten back to messages
213+ const recentMessages = recentTurns . flat ( )
206214
207215 const firstSystemMessage = systemMessages . length > 0 ? [ systemMessages [ 0 ] ] : [ ]
208216
209217 return [ ...firstSystemMessage , ...recentMessages ]
210218 }
211219
220+ /**
221+ * Groups messages into conversation turns.
222+ * A turn starts with a user message and includes all subsequent messages
223+ * until the next user message (tool calls, tool results, assistant response).
224+ */
225+ private groupMessagesIntoTurns ( messages : Message [ ] ) : Message [ ] [ ] {
226+ const turns : Message [ ] [ ] = [ ]
227+ let currentTurn : Message [ ] = [ ]
228+
229+ for ( const msg of messages ) {
230+ if ( msg . role === 'user' ) {
231+ // Start a new turn
232+ if ( currentTurn . length > 0 ) {
233+ turns . push ( currentTurn )
234+ }
235+ currentTurn = [ msg ]
236+ } else {
237+ // Add to current turn (assistant, tool, etc.)
238+ currentTurn . push ( msg )
239+ }
240+ }
241+
242+ // Don't forget the last turn
243+ if ( currentTurn . length > 0 ) {
244+ turns . push ( currentTurn )
245+ }
246+
247+ return turns
248+ }
249+
250+ /**
251+ * Remove orphaned tool messages that don't have a corresponding tool_calls message
252+ * This prevents errors like "tool_result without corresponding tool_use"
253+ */
254+ private removeOrphanedToolMessages ( messages : Message [ ] ) : Message [ ] {
255+ const result : Message [ ] = [ ]
256+ const seenToolCallIds = new Set < string > ( )
257+
258+ // First pass: collect all tool_call IDs from assistant messages with tool_calls
259+ for ( const msg of messages ) {
260+ if ( msg . role === 'assistant' && msg . tool_calls && Array . isArray ( msg . tool_calls ) ) {
261+ for ( const tc of msg . tool_calls ) {
262+ if ( tc . id ) {
263+ seenToolCallIds . add ( tc . id )
264+ }
265+ }
266+ }
267+ }
268+
269+ // Second pass: only include tool messages that have a matching tool_calls message
270+ for ( const msg of messages ) {
271+ if ( msg . role === 'tool' ) {
272+ const toolCallId = ( msg as any ) . tool_call_id
273+ if ( toolCallId && seenToolCallIds . has ( toolCallId ) ) {
274+ result . push ( msg )
275+ } else {
276+ logger . debug ( 'Removing orphaned tool message' , { toolCallId } )
277+ }
278+ } else {
279+ result . push ( msg )
280+ }
281+ }
282+
283+ return result
284+ }
285+
212286 /**
213287 * Apply token-based sliding window to limit conversation by token count
214288 *
215289 * System message handling:
216290 * - For consistency with message-based sliding window, the first system message is preserved
217291 * - System messages are excluded from the token count
218292 * - This ensures system prompts are always available while limiting conversation history
293+ *
294+ * Turn handling:
295+ * - Messages are grouped into turns (user + tool calls/results + assistant response)
296+ * - Complete turns are added to stay within token limit
297+ * - This prevents breaking tool call/result pairs
219298 */
220299 private applySlidingWindowByTokens (
221300 messages : Message [ ] ,
@@ -233,43 +312,52 @@ export class Memory {
233312 const systemMessages = messages . filter ( ( msg ) => msg . role === 'system' )
234313 const conversationMessages = messages . filter ( ( msg ) => msg . role !== 'system' )
235314
315+ // Group into turns to keep tool call/result pairs together
316+ const turns = this . groupMessagesIntoTurns ( conversationMessages )
317+
236318 const result : Message [ ] = [ ]
237319 let currentTokenCount = 0
238320
239- // Add conversation messages from most recent backwards
240- for ( let i = conversationMessages . length - 1 ; i >= 0 ; i -- ) {
241- const message = conversationMessages [ i ]
242- const messageTokens = getAccurateTokenCount ( message . content , model )
321+ // Add turns from most recent backwards
322+ for ( let i = turns . length - 1 ; i >= 0 ; i -- ) {
323+ const turn = turns [ i ]
324+ const turnTokens = turn . reduce (
325+ ( sum , msg ) => sum + getAccurateTokenCount ( msg . content || '' , model ) ,
326+ 0
327+ )
243328
244- if ( currentTokenCount + messageTokens <= tokenLimit ) {
245- result . unshift ( message )
246- currentTokenCount += messageTokens
329+ if ( currentTokenCount + turnTokens <= tokenLimit ) {
330+ result . unshift ( ... turn )
331+ currentTokenCount += turnTokens
247332 } else if ( result . length === 0 ) {
248- logger . warn ( 'Single message exceeds token limit, including anyway' , {
249- messageTokens ,
333+ logger . warn ( 'Single turn exceeds token limit, including anyway' , {
334+ turnTokens ,
250335 tokenLimit,
251- messageRole : message . role ,
336+ turnMessages : turn . length ,
252337 } )
253- result . unshift ( message )
254- currentTokenCount += messageTokens
338+ result . unshift ( ... turn )
339+ currentTokenCount += turnTokens
255340 break
256341 } else {
257342 // Token limit reached, stop processing
258343 break
259344 }
260345 }
261346
347+ // No need to remove orphaned messages - turns are already complete
348+ const cleanedResult = result
349+
262350 logger . debug ( 'Applied token-based sliding window' , {
263351 totalMessages : messages . length ,
264352 conversationMessages : conversationMessages . length ,
265- includedMessages : result . length ,
353+ includedMessages : cleanedResult . length ,
266354 totalTokens : currentTokenCount ,
267355 tokenLimit,
268356 } )
269357
270358 // Preserve first system message and prepend to results (consistent with message-based window)
271359 const firstSystemMessage = systemMessages . length > 0 ? [ systemMessages [ 0 ] ] : [ ]
272- return [ ...firstSystemMessage , ...result ]
360+ return [ ...firstSystemMessage , ...cleanedResult ]
273361 }
274362
275363 /**
@@ -324,7 +412,7 @@ export class Memory {
324412 // Count tokens used by system messages first
325413 let systemTokenCount = 0
326414 for ( const msg of systemMessages ) {
327- systemTokenCount += getAccurateTokenCount ( msg . content , model )
415+ systemTokenCount += getAccurateTokenCount ( msg . content || '' , model )
328416 }
329417
330418 // Calculate remaining tokens available for conversation messages
@@ -339,30 +427,36 @@ export class Memory {
339427 return systemMessages
340428 }
341429
430+ // Group into turns to keep tool call/result pairs together
431+ const turns = this . groupMessagesIntoTurns ( conversationMessages )
432+
342433 const result : Message [ ] = [ ]
343434 let currentTokenCount = 0
344435
345- for ( let i = conversationMessages . length - 1 ; i >= 0 ; i -- ) {
346- const message = conversationMessages [ i ]
347- const messageTokens = getAccurateTokenCount ( message . content , model )
436+ for ( let i = turns . length - 1 ; i >= 0 ; i -- ) {
437+ const turn = turns [ i ]
438+ const turnTokens = turn . reduce (
439+ ( sum , msg ) => sum + getAccurateTokenCount ( msg . content || '' , model ) ,
440+ 0
441+ )
348442
349- if ( currentTokenCount + messageTokens <= remainingTokens ) {
350- result . unshift ( message )
351- currentTokenCount += messageTokens
443+ if ( currentTokenCount + turnTokens <= remainingTokens ) {
444+ result . unshift ( ... turn )
445+ currentTokenCount += turnTokens
352446 } else if ( result . length === 0 ) {
353- logger . warn ( 'Single message exceeds remaining context window, including anyway' , {
354- messageTokens ,
447+ logger . warn ( 'Single turn exceeds remaining context window, including anyway' , {
448+ turnTokens ,
355449 remainingTokens,
356450 systemTokenCount,
357- messageRole : message . role ,
451+ turnMessages : turn . length ,
358452 } )
359- result . unshift ( message )
360- currentTokenCount += messageTokens
453+ result . unshift ( ... turn )
454+ currentTokenCount += turnTokens
361455 break
362456 } else {
363457 logger . info ( 'Auto-trimmed conversation history to fit context window' , {
364- originalMessages : conversationMessages . length ,
365- trimmedMessages : result . length ,
458+ originalTurns : turns . length ,
459+ trimmedTurns : turns . length - i - 1 ,
366460 conversationTokens : currentTokenCount ,
367461 systemTokens : systemTokenCount ,
368462 totalTokens : currentTokenCount + systemTokenCount ,
@@ -372,6 +466,7 @@ export class Memory {
372466 }
373467 }
374468
469+ // No need to remove orphaned messages - turns are already complete
375470 return [ ...systemMessages , ...result ]
376471 }
377472
@@ -638,7 +733,7 @@ export class Memory {
638733 /**
639734 * Validate inputs to prevent malicious data or performance issues
640735 */
641- private validateInputs ( conversationId ?: string , content ?: string ) : void {
736+ private validateInputs ( conversationId ?: string , content ?: string | null ) : void {
642737 if ( conversationId ) {
643738 if ( conversationId . length > 255 ) {
644739 throw new Error ( 'Conversation ID too long (max 255 characters)' )
0 commit comments