apify
diff --git a/‎CHANGELOG.md‎
Lines changed: 10 additions & 2 deletions b/‎CHANGELOG.md‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/crawlee/_utils/crypto.py‎
Lines changed: 1 addition & 1 deletion b/‎src/crawlee/_utils/crypto.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py‎
Lines changed: 22 additions & 10 deletions b/‎src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py‎
Lines changed: 22 additions & 10 deletions
diff --git a/‎src/crawlee/crawlers/_playwright/_playwright_crawler.py‎
Lines changed: 29 additions & 16 deletions b/‎src/crawlee/crawlers/_playwright/_playwright_crawler.py‎
Lines changed: 29 additions & 16 deletions
diff --git a/‎src/crawlee/request_loaders/_request_manager_tandem.py‎
Lines changed: 5 additions & 2 deletions b/‎src/crawlee/request_loaders/_request_manager_tandem.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎src/crawlee/request_loaders/_sitemap_request_loader.py‎
Lines changed: 13 additions & 3 deletions b/‎src/crawlee/request_loaders/_sitemap_request_loader.py‎
Lines changed: 13 additions & 3 deletions
diff --git a/‎src/crawlee/storage_clients/_sql/_request_queue_client.py‎
Lines changed: 8 additions & 11 deletions b/‎src/crawlee/storage_clients/_sql/_request_queue_client.py‎
Lines changed: 8 additions & 11 deletions
diff --git a/‎tests/unit/_autoscaling/test_autoscaled_pool.py‎
Lines changed: 17 additions & 25 deletions b/‎tests/unit/_autoscaling/test_autoscaled_pool.py‎
Lines changed: 17 additions & 25 deletions
@@ -3,16 +3,24 @@
 All notable changes to this project will be documented in this file.
 
 <!-- git-cliff-unreleased-start -->
-## 1.6.2 - **not yet released**
+## 1.6.3 - **not yet released**
+
+### 🐛 Bug Fixes
+
+- Fix potential deadlocks in `SitemapRequestLoader` and `RequestManagerTandem` ([#1843](https://github.com/apify/crawlee-python/pull/1843)) ([6226d93](https://github.com/apify/crawlee-python/commit/6226d93f4d25a63f3c88b0f6ec3d2c5431165197)) by [@Mantisus](https://github.com/Mantisus)
+
+
+<!-- git-cliff-unreleased-end -->
+## [1.6.2](https://github.com/apify/crawlee-python/releases/tag/v1.6.2) (2026-04-08)
 
 ### 🐛 Bug Fixes
 
 - **file-system:** Reclaim orphaned in-progress requests on RQ recovery ([#1825](https://github.com/apify/crawlee-python/pull/1825)) ([e86794a](https://github.com/apify/crawlee-python/commit/e86794a6e5605432c9331c7cd99edf885527a3eb)) by [@vdusek](https://github.com/vdusek)
 - Prevent premature `EventManager` shutdown when multiple crawlers share it ([#1810](https://github.com/apify/crawlee-python/pull/1810)) ([2efb668](https://github.com/apify/crawlee-python/commit/2efb668ad54fb3e8d740066446563d1e8a39d2e8)) by [@Mantisus](https://github.com/Mantisus), closes [#1805](https://github.com/apify/crawlee-python/issues/1805), [#1808](https://github.com/apify/crawlee-python/issues/1808)
 - Apply SQLite optimizations to the custom `connection_string` in `SqlStorageClient` ([#1837](https://github.com/apify/crawlee-python/pull/1837)) ([8b53e27](https://github.com/apify/crawlee-python/commit/8b53e273067e27b4ef4b2b4bb40277b15ef6b058)) by [@Mantisus](https://github.com/Mantisus)
+- Apply `SharedTimeout` to post-navigation hooks ([#1839](https://github.com/apify/crawlee-python/pull/1839)) ([88bd05a](https://github.com/apify/crawlee-python/commit/88bd05a2127ebfe3cd4eb78c514a63fc9e2cd079)) by [@vdusek](https://github.com/vdusek)
 
 
-<!-- git-cliff-unreleased-end -->
 ## [1.6.1](https://github.com/apify/crawlee-python/releases/tag/v1.6.1) (2026-03-30)
 
 ### 🐛 Bug Fixes
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "crawlee"
-version = "1.6.2"
+version = "1.6.3"
 description = "Crawlee for Python"
 authors = [{ name = "Apify Technologies s.r.o.", email = "support@apify.com" }]
 license = { file = "LICENSE" }
 
@@ -20,5 +20,5 @@ def compute_short_hash(data: bytes, *, length: int = 8) -> str:
 
 def crypto_random_object_id(length: int = 17) -> str:
     """Generate a random object ID."""
-    chars = 'abcdefghijklmnopqrstuvwxyzABCEDFGHIJKLMNOPQRSTUVWXYZ0123456789'
+    chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
     return ''.join(secrets.choice(chars) for _ in range(length))
@@ -119,6 +119,7 @@ def _create_static_content_crawler_pipeline(self) -> ContextPipeline[ParsedHttpC
         """Create static content crawler context pipeline with expected pipeline steps."""
         return (
             ContextPipeline()
+            .compose(self._manage_shared_navigation_timeout)
             .compose(self._execute_pre_navigation_hooks)
             .compose(self._make_http_request)
             .compose(self._execute_post_navigation_hooks)
@@ -127,26 +128,37 @@ def _create_static_content_crawler_pipeline(self) -> ContextPipeline[ParsedHttpC
             .compose(self._handle_blocked_request_by_content)
         )
 
-    async def _execute_pre_navigation_hooks(
+    async def _manage_shared_navigation_timeout(
         self, context: BasicCrawlingContext
     ) -> AsyncGenerator[BasicCrawlingContext, None]:
-        context_id = id(context)
-        self._shared_navigation_timeouts[context_id] = SharedTimeout(self._navigation_timeout)
+        """Initialize and clean up the shared navigation timeout for the current request."""
+        request_id = id(context.request)
+        self._shared_navigation_timeouts[request_id] = SharedTimeout(self._navigation_timeout)
 
         try:
-            for hook in self._pre_navigation_hooks:
-                async with self._shared_navigation_timeouts[context_id]:
-                    await hook(context)
-
             yield context
         finally:
-            self._shared_navigation_timeouts.pop(context_id, None)
+            self._shared_navigation_timeouts.pop(request_id, None)
+
+    async def _execute_pre_navigation_hooks(
+        self, context: BasicCrawlingContext
+    ) -> AsyncGenerator[BasicCrawlingContext, None]:
+        request_id = id(context.request)
+
+        for hook in self._pre_navigation_hooks:
+            async with self._shared_navigation_timeouts[request_id]:
+                await hook(context)
+
+        yield context
 
     async def _execute_post_navigation_hooks(
         self, context: HttpCrawlingContext
     ) -> AsyncGenerator[HttpCrawlingContext, None]:
+        request_id = id(context.request)
+
         for hook in self._post_navigation_hooks:
-            await hook(context)
+            async with self._shared_navigation_timeouts[request_id]:
+                await hook(context)
 
         yield context
 
@@ -262,7 +274,7 @@ async def _make_http_request(self, context: BasicCrawlingContext) -> AsyncGenera
         Yields:
             The original crawling context enhanced by HTTP response.
         """
-        async with self._shared_navigation_timeouts[id(context)] as remaining_timeout:
+        async with self._shared_navigation_timeouts[id(context.request)] as remaining_timeout:
             result = await self._http_client.crawl(
                 request=context.request,
                 session=context.session,
 
@@ -193,6 +193,7 @@ def __init__(
         # Compose the context pipeline with the Playwright-specific context enhancer.
         kwargs['_context_pipeline'] = (
             ContextPipeline()
+            .compose(self._manage_shared_navigation_timeout)
             .compose(self._open_page)
             .compose(self._navigate)
             .compose(self._execute_post_navigation_hooks)
@@ -216,6 +217,18 @@ def __init__(
 
         super().__init__(**kwargs)
 
+    async def _manage_shared_navigation_timeout(
+        self, context: BasicCrawlingContext
+    ) -> AsyncGenerator[BasicCrawlingContext, None]:
+        """Initialize and clean up the shared navigation timeout for the current request."""
+        request_id = id(context.request)
+        self._shared_navigation_timeouts[request_id] = SharedTimeout(self._navigation_timeout)
+
+        try:
+            yield context
+        finally:
+            self._shared_navigation_timeouts.pop(request_id, None)
+
     async def _open_page(
         self,
         context: BasicCrawlingContext,
@@ -242,21 +255,17 @@ async def _open_page(
             goto_options=GotoOptions(**self._goto_options),
         )
 
-        context_id = id(pre_navigation_context)
-        self._shared_navigation_timeouts[context_id] = SharedTimeout(self._navigation_timeout)
+        request_id = id(pre_navigation_context.request)
 
-        try:
-            # Only use the page context manager here — it sets the current page in a context variable,
-            # making it accessible to PlaywrightHttpClient in subsequent pipeline steps.
-            async with browser_page_context(crawlee_page.page):
-                for hook in self._pre_navigation_hooks:
-                    async with self._shared_navigation_timeouts[context_id]:
-                        await hook(pre_navigation_context)
-
-                # Yield should be inside the browser_page_context.
-                yield pre_navigation_context
-        finally:
-            self._shared_navigation_timeouts.pop(context_id, None)
+        # Only use the page context manager here — it sets the current page in a context variable,
+        # making it accessible to PlaywrightHttpClient in subsequent pipeline steps.
+        async with browser_page_context(crawlee_page.page):
+            for hook in self._pre_navigation_hooks:
+                async with self._shared_navigation_timeouts[request_id]:
+                    await hook(pre_navigation_context)
+
+            # Yield should be inside the browser_page_context.
+            yield pre_navigation_context
 
     def _prepare_request_interceptor(
         self,
@@ -329,7 +338,7 @@ async def _navigate(
             await context.page.route(context.request.url, route_handler)
 
         try:
-            async with self._shared_navigation_timeouts[id(context)] as remaining_timeout:
+            async with self._shared_navigation_timeouts[id(context.request)] as remaining_timeout:
                 response = await context.page.goto(
                     context.request.url, timeout=remaining_timeout.total_seconds() * 1000, **context.goto_options
                 )
@@ -496,8 +505,12 @@ async def _handle_blocked_request_by_content(
     async def _execute_post_navigation_hooks(
         self, context: PlaywrightPostNavCrawlingContext
     ) -> AsyncGenerator[PlaywrightPostNavCrawlingContext, None]:
+        request_id = id(context.request)
+
         for hook in self._post_navigation_hooks:
-            await hook(context)
+            async with self._shared_navigation_timeouts[request_id]:
+                await hook(context)
+
         yield context
 
     async def _create_crawling_context(
 
@@ -89,9 +89,12 @@ async def fetch_next_request(self) -> Request | None:
                 'Adding request from the RequestLoader to the RequestManager failed, the request has been dropped',
                 extra={'url': request.url, 'unique_key': request.unique_key},
             )
-            return None
 
-        await self._read_only_loader.mark_request_as_handled(request)
+            return None
+        finally:
+            # Mark it as processed so that the `request` doesn't get stuck in the `in_progress` status
+            # in `RequestLoader`
+            await self._read_only_loader.mark_request_as_handled(request)
 
         return await self._read_write_manager.fetch_next_request()
 
 
@@ -160,7 +160,11 @@ def __init__(
 
     async def _get_state(self) -> SitemapRequestLoaderState:
         """Initialize and return the current state."""
+        if self._state.is_initialized:
+            return self._state.current_value
+
         async with self._queue_lock:
+            # Re-check if state got initialized while waiting for the lock
             if self._state.is_initialized:
                 return self._state.current_value
 
@@ -260,7 +264,6 @@ async def _load_sitemaps(self) -> None:
                         # Check if we have capacity in the queue
                         await self._queue_has_capacity.wait()
 
-                        state = await self._get_state()
                         async with self._queue_lock:
                             state.url_queue.append(url)
                             state.current_sitemap_processed_urls.add(url)
@@ -318,19 +321,26 @@ async def fetch_next_request(self) -> Request | None:
                 continue
 
             async with self._queue_lock:
+                # Double-check if the queue is still not empty after acquiring the lock
+                if not state.url_queue:
+                    continue
+
                 url = state.url_queue.popleft()
                 request_option = RequestOptions(url=url)
+
+                if len(state.url_queue) < self._max_buffer_size:
+                    self._queue_has_capacity.set()
+
                 if self._transform_request_function:
                     transform_request_option = self._transform_request_function(request_option)
                     if transform_request_option == 'skip':
                         state.total_count -= 1
                         continue
                     if transform_request_option != 'unchanged':
                         request_option = transform_request_option
+
                 request = Request.from_url(**request_option)
                 state.in_progress.add(request.url)
-                if len(state.url_queue) < self._max_buffer_size:
-                    self._queue_has_capacity.set()
 
             return request
 
 
@@ -8,7 +8,6 @@
 from typing import TYPE_CHECKING, Any, cast
 
 from sqlalchemy import CursorResult, exists, func, or_, select, update
-from sqlalchemy import func as sql_func
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.orm import load_only
 from typing_extensions import NotRequired, Self, override
@@ -783,22 +782,20 @@ def _prepare_buffer_data(
     @override
     async def _apply_buffer_updates(self, session: AsyncSession, max_buffer_id: int) -> None:
         aggregations: list[ColumnElement[Any]] = [
-            sql_func.max(self._BUFFER_TABLE.accessed_at).label('max_accessed_at'),
-            sql_func.max(self._BUFFER_TABLE.modified_at).label('max_modified_at'),
-            sql_func.sum(self._BUFFER_TABLE.delta_handled_count).label('delta_handled_count'),
-            sql_func.sum(self._BUFFER_TABLE.delta_pending_count).label('delta_pending_count'),
-            sql_func.sum(self._BUFFER_TABLE.delta_total_count).label('delta_total_count'),
+            func.max(self._BUFFER_TABLE.accessed_at).label('max_accessed_at'),
+            func.max(self._BUFFER_TABLE.modified_at).label('max_modified_at'),
+            func.sum(self._BUFFER_TABLE.delta_handled_count).label('delta_handled_count'),
+            func.sum(self._BUFFER_TABLE.delta_pending_count).label('delta_pending_count'),
+            func.sum(self._BUFFER_TABLE.delta_total_count).label('delta_total_count'),
         ]
 
         if not self._had_multiple_clients:
-            aggregations.append(
-                sql_func.count(sql_func.distinct(self._BUFFER_TABLE.client_id)).label('unique_clients_count')
-            )
+            aggregations.append(func.count(func.distinct(self._BUFFER_TABLE.client_id)).label('unique_clients_count'))
 
         if self._storage_client.get_dialect_name() == 'postgresql':
-            aggregations.append(sql_func.bool_or(self._BUFFER_TABLE.need_recalc).label('need_recalc'))
+            aggregations.append(func.bool_or(self._BUFFER_TABLE.need_recalc).label('need_recalc'))
         else:
-            aggregations.append(sql_func.max(self._BUFFER_TABLE.need_recalc).label('need_recalc'))
+            aggregations.append(func.max(self._BUFFER_TABLE.need_recalc).label('need_recalc'))
 
         aggregation_stmt = select(*aggregations).where(
             self._BUFFER_TABLE.storage_id == self._id, self._BUFFER_TABLE.id <= max_buffer_id
 
@@ -4,7 +4,7 @@
 
 import asyncio
 from contextlib import suppress
-from datetime import datetime, timedelta, timezone
+from datetime import timedelta
 from itertools import chain, repeat
 from typing import TYPE_CHECKING, TypeVar, cast
 from unittest.mock import Mock
@@ -15,19 +15,19 @@
 from crawlee._autoscaling._types import LoadRatioInfo, SystemInfo
 from crawlee._types import ConcurrencySettings
 from crawlee._utils.time import measure_time
+from tests.unit.utils import wait_for_condition
 
 if TYPE_CHECKING:
     from collections.abc import Awaitable
 
+T = TypeVar('T')
+
 
 @pytest.fixture
 def system_status() -> SystemStatus | Mock:
     return Mock(spec=SystemStatus)
 
 
-T = TypeVar('T')
-
-
 def future(value: T, /) -> Awaitable[T]:
     f = asyncio.Future[T]()
     f.set_result(value)
@@ -145,10 +145,6 @@ async def run() -> None:
         await pool.run()
 
 
-@pytest.mark.flaky(
-    rerun=3,
-    reason='Test is flaky on Windows and MacOS, see https://github.com/apify/crawlee-python/issues/1655.',
-)
 async def test_autoscales(
     monkeypatch: pytest.MonkeyPatch,
     system_status: SystemStatus | Mock,
@@ -160,7 +156,7 @@ async def run() -> None:
         nonlocal done_count
         done_count += 1
 
-    start = datetime.now(timezone.utc)
+    overload_active = False
 
     def get_historical_system_info() -> SystemInfo:
         result = SystemInfo(
@@ -170,8 +166,7 @@ def get_historical_system_info() -> SystemInfo:
             client_info=LoadRatioInfo(limit_ratio=0.9, actual_ratio=0.3),
         )
 
-        # 0.5 seconds after the start of the test, pretend the CPU became overloaded
-        if result.created_at - start >= timedelta(seconds=0.5):
+        if overload_active:
             result.cpu_info = LoadRatioInfo(limit_ratio=0.9, actual_ratio=1.0)
 
         return result
@@ -196,24 +191,21 @@ def get_historical_system_info() -> SystemInfo:
     pool_run_task = asyncio.create_task(pool.run(), name='pool run task')
 
     try:
-        # After 0.2s, there should be an increase in concurrency
-        await asyncio.sleep(0.2)
-        assert pool.desired_concurrency > 1
+        # Wait until concurrency scales up above 1.
+        await wait_for_condition(lambda: pool.desired_concurrency > 1, timeout=5.0)
 
-        # After 0.5s, the concurrency should reach max concurrency
-        await asyncio.sleep(0.3)
-        assert pool.desired_concurrency == 4
+        # Wait until concurrency reaches maximum.
+        await wait_for_condition(lambda: pool.desired_concurrency == 4, timeout=5.0)
 
-        # The concurrency should guarantee completion of more than 10 tasks (a single worker would complete ~5)
-        assert done_count > 10
+        # Multiple concurrent workers should have completed more tasks than a single worker could.
+        await wait_for_condition(lambda: done_count > 10, timeout=5.0)
 
-        # After 0.7s, the pretend overload should have kicked in and there should be a drop in desired concurrency
-        await asyncio.sleep(0.2)
-        assert pool.desired_concurrency < 4
+        # Simulate CPU overload and wait for the pool to scale down.
+        overload_active = True
+        await wait_for_condition(lambda: pool.desired_concurrency < 4, timeout=5.0)
 
-        # After a full second, the pool should scale down all the way to 1
-        await asyncio.sleep(0.3)
-        assert pool.desired_concurrency == 1
+        # Wait until the pool scales all the way down to minimum.
+        await wait_for_condition(lambda: pool.desired_concurrency == 1, timeout=5.0)
     finally:
         pool_run_task.cancel()
         with suppress(asyncio.CancelledError):