Skip to content

Commit d121873

Browse files
authored
feat: Expose use_state in BasicCrawler (#1799)
### Description - Expose `use_state` in `BasicCrawler`. This can be useful for passing data initialized during crawler setup into a handler.
1 parent b50b9f2 commit d121873

2 files changed

Lines changed: 16 additions & 2 deletions

File tree

src/crawlee/crawlers/_basic/_basic_crawler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -840,7 +840,7 @@ async def add_requests(
840840
wait_for_all_requests_to_be_added_timeout=wait_for_all_requests_to_be_added_timeout,
841841
)
842842

843-
async def _use_state(
843+
async def use_state(
844844
self,
845845
default_value: dict[str, JsonSerializable] | None = None,
846846
) -> dict[str, JsonSerializable]:
@@ -1421,7 +1421,7 @@ async def __run_task_function(self) -> None:
14211421
add_requests=result.add_requests,
14221422
push_data=result.push_data,
14231423
get_key_value_store=result.get_key_value_store,
1424-
use_state=self._use_state,
1424+
use_state=self.use_state,
14251425
log=self._logger,
14261426
)
14271427
self._context_result_map[context] = result

tests/unit/crawlers/_basic/test_basic_crawler.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -815,6 +815,20 @@ async def handler(context: BasicCrawlingContext) -> None:
815815
assert value == {'hello': 'world'}
816816

817817

818+
async def test_crawler_use_state() -> None:
819+
crawler = BasicCrawler()
820+
821+
await crawler.use_state({'hello': 'world'})
822+
823+
@crawler.router.default_handler
824+
async def handler(context: BasicCrawlingContext) -> None:
825+
# The state set by the crawler must be available in the context of the request handler
826+
state = await context.use_state()
827+
assert state == {'hello': 'world'}
828+
829+
await crawler.run(['https://hello.world'])
830+
831+
818832
async def test_context_use_state_crawlers_share_state() -> None:
819833
async def handler(context: BasicCrawlingContext) -> None:
820834
state = await context.use_state({'urls': []})

0 commit comments

Comments
 (0)