Skip to content

Commit bf8f278

Browse files
Fix(lsp): Make server stable with async context loading
1 parent 05c793c commit bf8f278

2 files changed

Lines changed: 156 additions & 47 deletions

File tree

sqlmesh/lsp/main.py

Lines changed: 156 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
"""A Language Server Protocol (LSP) server for SQL with SQLMesh integration, refactored without globals."""
33

44
from itertools import chain
5+
import asyncio
56
import logging
67
import typing as t
78
from pathlib import Path
@@ -205,7 +206,7 @@ def function_call(ls: LanguageServer, params: t.Any) -> t.Dict[str, t.Any]:
205206
self.server.feature(name)(create_function_call(method))
206207

207208
@self.server.feature(types.INITIALIZE)
208-
def initialize(ls: LanguageServer, params: types.InitializeParams) -> None:
209+
async def initialize(ls: LanguageServer, params: types.InitializeParams) -> None:
209210
"""Initialize the server when the client connects."""
210211
try:
211212
# Check if the client supports pull diagnostics
@@ -232,7 +233,7 @@ def initialize(ls: LanguageServer, params: types.InitializeParams) -> None:
232233
for ext in ("py", "yml", "yaml"):
233234
config_path = folder_path / f"config.{ext}"
234235
if config_path.exists():
235-
if self._create_lsp_context([folder_path]):
236+
if await self._create_lsp_context([folder_path]):
236237
loaded_sqlmesh_message(ls, folder_path)
237238
return # Exit after successfully loading any config
238239
except Exception as e:
@@ -254,14 +255,60 @@ def did_open(ls: LanguageServer, params: types.DidOpenTextDocumentParams) -> Non
254255
)
255256

256257
@self.server.feature(types.TEXT_DOCUMENT_DID_SAVE)
257-
def did_save(ls: LanguageServer, params: types.DidSaveTextDocumentParams) -> None:
258+
async def did_save(ls: LanguageServer, params: types.DidSaveTextDocumentParams) -> None:
258259
uri = URI(params.text_document.uri)
259260
if self.lsp_context is None:
260261
return
261262

262263
context = self.lsp_context.context
263-
context.load()
264-
self.lsp_context = LSPContext(context)
264+
retry_count = 1 # Single retry for save operations to avoid blocking UI
265+
last_error = None
266+
267+
for attempt in range(retry_count):
268+
try:
269+
# Run context.load() in a separate thread with asyncio timeout
270+
loop = asyncio.get_event_loop()
271+
272+
# Run the blocking operation in a thread pool
273+
try:
274+
await asyncio.wait_for(
275+
loop.run_in_executor(None, context.load), timeout=30.0
276+
)
277+
self.lsp_context = LSPContext(context)
278+
break # Success, exit retry loop
279+
except asyncio.TimeoutError:
280+
ls.log_trace(
281+
f"Context.load() timed out after 30 seconds (attempt {attempt + 1}/{retry_count})"
282+
)
283+
if attempt < retry_count - 1:
284+
# Wait before retrying
285+
wait_time = 2**attempt # 1s, 2s
286+
ls.log_trace(f"Retrying in {wait_time} seconds...")
287+
await asyncio.sleep(wait_time)
288+
continue
289+
else:
290+
ls.show_message(
291+
"SQLMesh: Model reload timed out after multiple attempts. The LSP server will continue with the previous state.",
292+
types.MessageType.Warning,
293+
)
294+
return
295+
except Exception as e:
296+
last_error = e
297+
ls.log_trace(
298+
f"Error reloading context (attempt {attempt + 1}/{retry_count}): {e}"
299+
)
300+
301+
if attempt < retry_count - 1:
302+
# Wait before retrying
303+
wait_time = 2**attempt # 1s, 2s
304+
ls.log_trace(f"Retrying in {wait_time} seconds...")
305+
await asyncio.sleep(wait_time)
306+
else:
307+
ls.show_message(
308+
f"SQLMesh: Error reloading models after {retry_count} attempts: {str(last_error)}. The LSP server will continue with the previous state.",
309+
types.MessageType.Warning,
310+
)
311+
return
265312

266313
# Only publish diagnostics if client doesn't support pull diagnostics
267314
if not self.client_supports_pull_diagnostics:
@@ -616,13 +663,37 @@ def _get_diagnostics_for_uri(self, uri: URI) -> t.Tuple[t.List[types.Diagnostic]
616663
return [], 0
617664

618665
def _context_get_or_load(self, document_uri: t.Optional[URI] = None) -> LSPContext:
666+
"""Synchronous wrapper for async context loading.
667+
668+
Always attempts to create context if it doesn't exist, regardless of previous failures.
669+
"""
670+
if self.lsp_context is None:
671+
# Always try to load context when it's needed
672+
self.server.log_trace("Context not loaded, attempting to create...")
673+
# Run the async method in a new event loop if needed
674+
import asyncio
675+
676+
try:
677+
loop = asyncio.get_running_loop()
678+
# We're already in an async context, can't use asyncio.run
679+
raise RuntimeError("Cannot load context synchronously from async context")
680+
except RuntimeError:
681+
# No running loop, we can use asyncio.run
682+
asyncio.run(self._ensure_context_for_document(document_uri))
683+
if self.lsp_context is None:
684+
# If we still don't have a context after trying to load, raise an error
685+
# But don't prevent future attempts
686+
raise RuntimeError("No context found able to get or load")
687+
return self.lsp_context
688+
689+
async def _context_get_or_load_async(self, document_uri: t.Optional[URI] = None) -> LSPContext:
619690
if self.lsp_context is None:
620-
self._ensure_context_for_document(document_uri)
691+
await self._ensure_context_for_document(document_uri)
621692
if self.lsp_context is None:
622693
raise RuntimeError("No context found able to get or load")
623694
return self.lsp_context
624695

625-
def _ensure_context_for_document(
696+
async def _ensure_context_for_document(
626697
self,
627698
document_uri: t.Optional[URI] = None,
628699
) -> None:
@@ -635,12 +706,12 @@ def _ensure_context_for_document(
635706
if document_path.is_file() and document_path.suffix in (".sql", ".py"):
636707
document_folder = document_path.parent
637708
if document_folder.is_dir():
638-
self._ensure_context_in_folder(document_folder)
709+
await self._ensure_context_in_folder(document_folder)
639710
return
640711

641-
return self._ensure_context_in_folder()
712+
return await self._ensure_context_in_folder()
642713

643-
def _ensure_context_in_folder(self, folder_path: t.Optional[Path] = None) -> None:
714+
async def _ensure_context_in_folder(self, folder_path: t.Optional[Path] = None) -> None:
644715
if self.lsp_context is not None:
645716
return
646717

@@ -649,7 +720,7 @@ def _ensure_context_in_folder(self, folder_path: t.Optional[Path] = None) -> Non
649720
for ext in ("py", "yml", "yaml"):
650721
config_path = workspace_folder / f"config.{ext}"
651722
if config_path.exists():
652-
if self._create_lsp_context([workspace_folder]):
723+
if await self._create_lsp_context([workspace_folder]):
653724
return
654725

655726
# Then , check the provided folder recursively
@@ -660,7 +731,7 @@ def _ensure_context_in_folder(self, folder_path: t.Optional[Path] = None) -> Non
660731
for ext in ("py", "yml", "yaml"):
661732
config_path = path / f"config.{ext}"
662733
if config_path.exists():
663-
if self._create_lsp_context([path]):
734+
if await self._create_lsp_context([path]):
664735
return
665736

666737
path = path.parent
@@ -672,37 +743,91 @@ def _ensure_context_in_folder(self, folder_path: t.Optional[Path] = None) -> Non
672743
+ (f" or in {folder_path}" if folder_path else "")
673744
)
674745

675-
def _create_lsp_context(self, paths: t.List[Path]) -> t.Optional[LSPContext]:
746+
async def _create_lsp_context(
747+
self, paths: t.List[Path], retry_count: int = 1
748+
) -> t.Optional[LSPContext]:
676749
"""Create a new LSPContext instance using the configured context class.
677750
678751
On success, sets self.lsp_context and returns the created context.
679752
680753
Args:
681754
paths: List of paths to pass to the context constructor
755+
retry_count: Number of times to retry on failure (default: 1)
682756
683757
Returns:
684758
A new LSPContext instance wrapping the created context, or None if creation fails
685759
"""
686-
try:
687-
if self.lsp_context is None:
688-
context = self.context_class(paths=paths)
689-
loaded_sqlmesh_message(self.server, paths[0])
690-
else:
691-
self.lsp_context.context.load()
692-
context = self.lsp_context.context
693-
self.lsp_context = LSPContext(context)
694-
return self.lsp_context
695-
except Exception as e:
696-
# Only show the error message once
697-
if not self.has_raised_loading_error:
698-
self.server.show_message(
699-
f"Error creating context: {e}",
700-
types.MessageType.Error,
760+
# Always attempt to create context when requested
761+
if self.has_raised_loading_error:
762+
self.server.log_trace("Retrying context creation after previous failure...")
763+
# Give a small delay to allow file system changes to be registered
764+
await asyncio.sleep(0.5)
765+
766+
last_error = None
767+
768+
for attempt in range(retry_count):
769+
try:
770+
if self.lsp_context is None:
771+
context = self.context_class(paths=paths)
772+
# Show success message, currently also showing recovering from error
773+
if self.has_raised_loading_error:
774+
self.server.show_message(
775+
f"Successfully loaded SQLMesh context from {paths[0]} (recovered from previous error)",
776+
types.MessageType.Info,
777+
)
778+
loaded_sqlmesh_message(self.server, paths[0])
779+
# Reset error flag on successful load
780+
self.has_raised_loading_error = False
781+
else:
782+
# Run context.load() with asyncio timeout
783+
loop = asyncio.get_event_loop()
784+
785+
try:
786+
await asyncio.wait_for(
787+
loop.run_in_executor(None, self.lsp_context.context.load), timeout=30.0
788+
)
789+
context = self.lsp_context.context
790+
# Reset error flag on successful load
791+
self.has_raised_loading_error = False
792+
except asyncio.TimeoutError:
793+
self.server.log_trace(
794+
f"Context.load() timed out after 30 seconds in _create_lsp_context (attempt {attempt + 1}/{retry_count})"
795+
)
796+
if attempt < retry_count - 1:
797+
# Wait before retrying (exponential backoff (1s, 2s, 4s))
798+
wait_time = 2**attempt
799+
self.server.log_trace(f"Retrying in {wait_time} seconds...")
800+
await asyncio.sleep(wait_time)
801+
continue
802+
return None
803+
804+
self.lsp_context = LSPContext(context)
805+
return self.lsp_context
806+
807+
except Exception as e:
808+
last_error = e
809+
self.server.log_trace(
810+
f"Error creating context (attempt {attempt + 1}/{retry_count}): {e}"
701811
)
702-
self.has_raised_loading_error = True
703812

704-
self.server.log_trace(f"Error creating context: {e}")
705-
return None
813+
if attempt < retry_count - 1:
814+
# Wait before retrying (exponential backoff)
815+
wait_time = 2**attempt # 1s, 2s, 4s
816+
self.server.log_trace(f"Retrying in {wait_time} seconds...")
817+
await asyncio.sleep(wait_time)
818+
else:
819+
# Show error message only if we haven't shown it recently
820+
if not self.has_raised_loading_error:
821+
self.server.show_message(
822+
f"Error creating context: {last_error}",
823+
types.MessageType.Error,
824+
)
825+
self.has_raised_loading_error = True
826+
else:
827+
# Log the error but don't show notification to avoid spamming
828+
self.server.log_trace(f"Context creation failed again: {last_error}")
829+
830+
return None
706831

707832
@staticmethod
708833
def _diagnostic_to_lsp_diagnostic(

vscode/extension/src/lsp/lsp.ts

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -81,14 +81,6 @@ export class LSPClient implements Disposable {
8181
transport: TransportKind.stdio,
8282
options: {
8383
cwd: workspacePath,
84-
// TODO: This is a temporary fix to avoid the issue with the LSP server
85-
// crashing when the number of workers is too high. This is a workaround
86-
// to avoid the issue. Once fixed, we should remove the whole env block.
87-
env: {
88-
MAX_FORK_WORKERS: '1',
89-
...process.env,
90-
...sqlmesh.value.env,
91-
},
9284
},
9385
args: sqlmesh.value.args,
9486
},
@@ -97,14 +89,6 @@ export class LSPClient implements Disposable {
9789
transport: TransportKind.stdio,
9890
options: {
9991
cwd: workspacePath,
100-
env: {
101-
// TODO: This is a temporary fix to avoid the issue with the LSP server
102-
// crashing when the number of workers is too high. This is a workaround
103-
// to avoid the issue. Once fixed, we should remove the whole env block.
104-
MAX_FORK_WORKERS: '1',
105-
...process.env,
106-
...sqlmesh.value.env,
107-
},
10892
},
10993
args: sqlmesh.value.args,
11094
},

0 commit comments

Comments
 (0)