55from pathlib import Path
66from typing import TYPE_CHECKING , Any , ClassVar
77
8+ from sqlalchemy import event
89from sqlalchemy .exc import IntegrityError , OperationalError
910from sqlalchemy .ext .asyncio import AsyncEngine , async_sessionmaker , create_async_engine
10- from sqlalchemy .sql import insert , select , text
11+ from sqlalchemy .sql import insert , select
1112from typing_extensions import override
1213
1314from crawlee ._utils .docs import docs_group
2223if TYPE_CHECKING :
2324 from types import TracebackType
2425
26+ from sqlalchemy .engine .interfaces import DBAPIConnection
2527 from sqlalchemy .ext .asyncio import AsyncSession
28+ from sqlalchemy .pool import ConnectionPoolEntry
2629
2730
2831logger = getLogger (__name__ )
@@ -72,8 +75,7 @@ def __init__(
7275 self ._initialized = False
7376 self .session_maker : None | async_sessionmaker [AsyncSession ] = None
7477
75- # Flag needed to apply optimizations only for default database
76- self ._default_flag = self ._engine is None and self ._connection_string is None
78+ self ._listeners_registered = False
7779 self ._dialect_name : str | None = None
7880
7981 # Call the notification only once
@@ -115,9 +117,10 @@ async def initialize(self, configuration: Configuration) -> None:
115117 """
116118 if not self ._initialized :
117119 engine = self ._get_or_create_engine (configuration )
118- async with engine .begin () as conn :
119- self ._dialect_name = engine .dialect .name
120120
121+ self ._dialect_name = engine .dialect .name
122+
123+ async with engine .begin () as conn :
121124 if self ._dialect_name not in self ._SUPPORTED_DIALECTS :
122125 raise ValueError (
123126 f'Unsupported database dialect: { self ._dialect_name } . Supported: '
@@ -128,16 +131,8 @@ async def initialize(self, configuration: Configuration) -> None:
128131 # Rollback the transaction when an exception occurs.
129132 # This is likely an attempt to create a database from several parallel processes.
130133 try :
131- # Set SQLite pragmas for performance and consistency
132- if self ._default_flag :
133- await conn .execute (text ('PRAGMA journal_mode=WAL' )) # Better concurrency
134- await conn .execute (text ('PRAGMA synchronous=NORMAL' )) # Balanced safety/speed
135- await conn .execute (text ('PRAGMA cache_size=100000' )) # 100MB cache
136- await conn .execute (text ('PRAGMA temp_store=MEMORY' )) # Memory temp storage
137- await conn .execute (text ('PRAGMA mmap_size=268435456' )) # 256MB memory mapping
138- await conn .execute (text ('PRAGMA foreign_keys=ON' )) # Enforce constraints
139- await conn .execute (text ('PRAGMA busy_timeout=30000' )) # 30s busy timeout
140134 await conn .run_sync (Base .metadata .create_all , checkfirst = True )
135+
141136 from crawlee import __version__ # Noqa: PLC0415
142137
143138 db_version = (await conn .execute (select (VersionDb ))).scalar_one_or_none ()
@@ -153,6 +148,7 @@ async def initialize(self, configuration: Configuration) -> None:
153148 )
154149 elif not db_version :
155150 await conn .execute (insert (VersionDb ).values (version = __version__ ))
151+
156152 except (IntegrityError , OperationalError ):
157153 await conn .rollback ()
158154
@@ -161,6 +157,10 @@ async def initialize(self, configuration: Configuration) -> None:
161157 async def close (self ) -> None :
162158 """Close the database connection pool."""
163159 if self ._engine is not None :
160+ if self ._listeners_registered :
161+ event .remove (self ._engine .sync_engine , 'connect' , self ._on_connect )
162+ self ._listeners_registered = False
163+
164164 await self ._engine .dispose ()
165165 self ._engine = None
166166
@@ -285,4 +285,21 @@ def _get_or_create_engine(self, configuration: Configuration) -> AsyncEngine:
285285 connect_args = connect_args ,
286286 ** kwargs ,
287287 )
288+
289+ event .listen (self ._engine .sync_engine , 'connect' , self ._on_connect )
290+ self ._listeners_registered = True
291+
288292 return self ._engine
293+
294+ def _on_connect (self , dbapi_conn : DBAPIConnection , _connection_record : ConnectionPoolEntry ) -> None :
295+ """Event listener for new database connections to set pragmas."""
296+ if self ._dialect_name == 'sqlite' :
297+ cursor = dbapi_conn .cursor ()
298+ cursor .execute ('PRAGMA journal_mode=WAL' ) # Better concurrency
299+ cursor .execute ('PRAGMA synchronous=NORMAL' ) # Balanced safety/speed
300+ cursor .execute ('PRAGMA cache_size=100000' ) # 100MB cache
301+ cursor .execute ('PRAGMA temp_store=MEMORY' ) # Memory temp storage
302+ cursor .execute ('PRAGMA mmap_size=268435456' ) # 256MB memory mapping
303+ cursor .execute ('PRAGMA foreign_keys=ON' ) # Enforce constraints
304+ cursor .execute ('PRAGMA busy_timeout=30000' ) # 30s busy timeout
305+ cursor .close ()
0 commit comments