apify
diff --git a/‎docs/guides/architecture_overview.mdx‎
Lines changed: 34 additions & 1 deletion b/‎docs/guides/architecture_overview.mdx‎
Lines changed: 34 additions & 1 deletion
diff --git a/‎docs/guides/code_examples/stagehand_crawler/basic_example.py‎
Lines changed: 47 additions & 0 deletions b/‎docs/guides/code_examples/stagehand_crawler/basic_example.py‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎docs/guides/code_examples/stagehand_crawler/browserbase_example.py‎
Lines changed: 37 additions & 0 deletions b/‎docs/guides/code_examples/stagehand_crawler/browserbase_example.py‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎docs/guides/stagehand_crawler.mdx‎
Lines changed: 124 additions & 0 deletions b/‎docs/guides/stagehand_crawler.mdx‎
Lines changed: 124 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions
@@ -53,6 +53,8 @@ class PlaywrightCrawler
 
 class AdaptivePlaywrightCrawler
 
+class StagehandCrawler
+
 %% ========================
 %% Inheritance arrows
 %% ========================
@@ -63,6 +65,7 @@ BasicCrawler --|> AdaptivePlaywrightCrawler
 AbstractHttpCrawler --|> HttpCrawler
 AbstractHttpCrawler --|> ParselCrawler
 AbstractHttpCrawler --|> BeautifulSoupCrawler
+PlaywrightCrawler --|> StagehandCrawler
 ```
 
 ### HTTP crawlers
@@ -79,7 +82,19 @@ You can learn more about HTTP crawlers in the [HTTP crawlers guide](./http-crawl
 
 ### Browser crawlers
 
-Browser crawlers use a real browser to render pages, enabling scraping of sites that require JavaScript. They manage browser instances, pages, and context lifecycles. Currently, the only browser crawler is <ApiLink to="class/PlaywrightCrawler">`PlaywrightCrawler`</ApiLink>, which utilizes the [Playwright](https://playwright.dev/) library. Playwright provides a high-level API for controlling and navigating browsers. You can learn more about <ApiLink to="class/PlaywrightCrawler">`PlaywrightCrawler`</ApiLink>, its features, and how it internally manages browser instances in the [Playwright crawler guide](./playwright-crawler).
+Browser crawlers use a real browser to render pages, enabling scraping of sites that require
+JavaScript. They manage browser instances, pages, and context lifecycles. Crawlee provides
+two browser crawlers:
+
+- <ApiLink to="class/PlaywrightCrawler">`PlaywrightCrawler`</ApiLink> utilizes the
+  [Playwright](https://playwright.dev/) library and provides a high-level API for controlling
+  and navigating browsers. You can learn more about it in the
+  [Playwright crawler guide](./playwright-crawler).
+- <ApiLink to="class/StagehandCrawler">`StagehandCrawler`</ApiLink> extends
+  `PlaywrightCrawler` with AI-powered browser automation via
+  [Stagehand](https://github.com/browserbase/stagehand). It adds natural-language methods
+  (`act`, `extract`, `observe`, `execute`) directly on the page object. You can learn more
+  about it in the [Stagehand crawler guide](./stagehand-crawler).
 
 ### Adaptive crawler
 
@@ -122,6 +137,12 @@ class AdaptivePlaywrightPreNavCrawlingContext
 
 class AdaptivePlaywrightCrawlingContext
 
+class StagehandPreNavCrawlingContext
+
+class StagehandPostNavCrawlingContext
+
+class StagehandCrawlingContext
+
 %% ========================
 %% Inheritance arrows
 %% ========================
@@ -143,6 +164,12 @@ PlaywrightPreNavCrawlingContext --|> PlaywrightCrawlingContext
 BasicCrawlingContext --|> AdaptivePlaywrightPreNavCrawlingContext
 
 ParsedHttpCrawlingContext --|> AdaptivePlaywrightCrawlingContext
+
+PlaywrightPreNavCrawlingContext --|> StagehandPreNavCrawlingContext
+
+StagehandPreNavCrawlingContext --|> StagehandPostNavCrawlingContext
+
+StagehandPostNavCrawlingContext --|> StagehandCrawlingContext
 ```
 
 They have a similar inheritance structure as the crawlers, with the base class being <ApiLink to="class/BasicCrawlingContext">`BasicCrawlingContext`</ApiLink>. The specific crawling contexts are:
@@ -154,6 +181,12 @@ They have a similar inheritance structure as the crawlers, with the base class b
 - <ApiLink to="class/PlaywrightCrawlingContext">`PlaywrightCrawlingContext`</ApiLink> for Playwright crawlers.
 - <ApiLink to="class/AdaptivePlaywrightPreNavCrawlingContext">`AdaptivePlaywrightPreNavCrawlingContext`</ApiLink> for Adaptive Playwright crawlers before the page is navigated.
 - <ApiLink to="class/AdaptivePlaywrightCrawlingContext">`AdaptivePlaywrightCrawlingContext`</ApiLink> for Adaptive Playwright crawlers.
+- <ApiLink to="class/StagehandPreNavCrawlingContext">`StagehandPreNavCrawlingContext`</ApiLink>
+  for Stagehand crawlers before the page is navigated.
+- <ApiLink to="class/StagehandPostNavCrawlingContext">`StagehandPostNavCrawlingContext`</ApiLink>
+  for Stagehand crawlers after the page is navigated.
+- <ApiLink to="class/StagehandCrawlingContext">`StagehandCrawlingContext`</ApiLink>
+  for Stagehand crawlers.
 
 ## Storages
 
 
@@ -0,0 +1,47 @@
+import asyncio
+from typing import cast
+
+from crawlee.browsers import StagehandOptions
+from crawlee.crawlers import StagehandCrawler, StagehandCrawlingContext
+
+
+async def main() -> None:
+    crawler = StagehandCrawler(
+        stagehand_options=StagehandOptions(
+            model_api_key='your-openai-api-key',
+            model='openai/gpt-4.1-mini',
+        ),
+        max_requests_per_crawl=5,
+    )
+
+    @crawler.router.default_handler
+    async def handler(context: StagehandCrawlingContext) -> None:
+        context.log.info(f'Processing {context.request.url} ...')
+
+        # Dismiss overlays or interact with the page using natural language.
+        await context.page.act(instruction='Click the accept cookies button if present')
+
+        # Extract data from the page using AI.
+        extracted = await context.page.extract(
+            instruction='Get the page title and the main heading text',
+            schema={
+                'type': 'object',
+                'properties': {
+                    'title': {'type': 'string'},
+                    'heading': {'type': 'string'},
+                },
+            },
+        )
+
+        extract_result = extracted.data.result
+
+        if isinstance(extract_result, dict):
+            # Push extracted data to the dataset
+            # Use `cast()` to provide a more specific type hint for the extracted data.
+            await context.push_data(cast('dict[str, str | None]', extract_result))
+
+    await crawler.run(['https://example.com'])
+
+
+if __name__ == '__main__':
+    asyncio.run(main())
@@ -0,0 +1,37 @@
+import asyncio
+from typing import cast
+
+from crawlee.browsers import StagehandOptions
+from crawlee.crawlers import StagehandCrawler, StagehandCrawlingContext
+
+
+async def main() -> None:
+    # Use Browserbase cloud browser instead of a local Chromium instance.
+    crawler = StagehandCrawler(
+        stagehand_options=StagehandOptions(
+            env='BROWSERBASE',
+            browserbase_api_key='your-browserbase-api-key',
+            project_id='your-project-id',
+            model_api_key='your-openai-api-key',
+            model='openai/gpt-4.1-mini',
+        ),
+        max_requests_per_crawl=5,
+    )
+
+    @crawler.router.default_handler
+    async def handler(context: StagehandCrawlingContext) -> None:
+        context.log.info(f'Processing {context.request.url} ...')
+
+        extracted = await context.page.extract(
+            instruction='Get the main content of the page',
+        )
+
+        extract_result = extracted.data.result
+
+        await context.push_data(cast('dict[str, str | None]', extract_result))
+
+    await crawler.run(['https://example.com'])
+
+
+if __name__ == '__main__':
+    asyncio.run(main())
@@ -0,0 +1,124 @@
+---
+id: stagehand-crawler
+title: Stagehand crawler
+description: Learn how to use StagehandCrawler for AI-powered browser automation and data extraction.
+---
+
+import ApiLink from '@site/src/components/ApiLink';
+import CodeBlock from '@theme/CodeBlock';
+
+import BasicExample from '!!raw-loader!./code_examples/stagehand_crawler/basic_example.py';
+import BrowserbaseExample from '!!raw-loader!./code_examples/stagehand_crawler/browserbase_example.py';
+
+A <ApiLink to="class/StagehandCrawler">`StagehandCrawler`</ApiLink> extends <ApiLink to="class/PlaywrightCrawler">`PlaywrightCrawler`</ApiLink> with AI-powered browser automation via [Stagehand](https://www.browserbase.com/stagehand). Instead of writing CSS selectors or XPath expressions, you describe what you want in plain English and the AI model takes care of the rest.
+
+Each page in the crawling context is a <ApiLink to="class/StagehandPage">`StagehandPage`</ApiLink> - a drop-in replacement for the standard Playwright `Page` that adds four AI methods:
+
+- `page.act(**kwargs)` - perform an action using a natural language instruction
+- `page.extract(**kwargs)` - extract structured data from the page using AI
+- `page.observe(**kwargs)` - get a list of AI-suggested actions available on the page
+- `page.execute(**kwargs)` - run an autonomous multi-step agent on the page
+
+All standard Playwright methods remain available alongside these AI methods.
+
+## When to use StagehandCrawler
+
+Use <ApiLink to="class/StagehandCrawler">`StagehandCrawler`</ApiLink> when:
+
+- **Selectors are brittle or unknown** - the AI can locate elements by their visual role or label rather than a specific CSS class.
+- **Interactions are complex** - multi-step forms, dynamic menus, or context-dependent flows that are hard to script.
+- **Rapid prototyping** - you want to build a scraper quickly without spending time reverse-engineering the page structure.
+
+For straightforward scraping tasks where the page structure is stable and well-known, <ApiLink to="class/PlaywrightCrawler">`PlaywrightCrawler`</ApiLink> is more efficient, read more in that [guide](./playwright_crawler).
+
+## Installation
+
+`StagehandCrawler` requires the `stagehand` optional dependency group:
+
+```bash
+pip install 'crawlee[stagehand]'
+```
+
+or with uv:
+
+```bash
+uv add 'crawlee[stagehand]'
+```
+
+## Basic usage
+
+The example below demonstrates the typical usage pattern: dismiss cookie banners with `act()` and extract structured data with `extract()`.
+
+<CodeBlock className="language-python">
+    {BasicExample}
+</CodeBlock>
+
+## StagehandOptions configuration
+
+Stagehand-specific settings are provided via <ApiLink to="class/StagehandOptions">`StagehandOptions`</ApiLink>. Pass the instance to the `stagehand_options` argument of <ApiLink to="class/StagehandCrawler">`StagehandCrawler`</ApiLink>.
+
+## AI page operations
+
+### `act` - perform actions
+
+Use `act()` to interact with the page using a natural language instruction:
+
+```python
+await context.page.act(instruction='Click the "Sign in" button')
+```
+
+### `extract` - structured data extraction
+
+Use `extract()` to pull structured data from the page. Pass a JSON Schema via schema to enforce the output shape:
+
+```python
+data = await context.page.extract(
+    instruction='Extract the top comment on this page',
+    schema={
+        'type': 'object',
+        'properties': {
+            'comment_text': {'type': 'string'},
+            'author': {'type': 'string'},
+        },
+        'required': ['comment_text'],
+    },
+)
+```
+
+### `observe` - inspect available actions
+
+Use `observe()` to get AI-suggested actions currently available on the page. Useful for debugging or building adaptive workflows:
+
+```python
+actions = await context.page.observe(
+    instruction='What actions are available in the navigation menu?'
+)
+```
+
+### `execute` - autonomous multi-step agent
+
+Use `execute()` for longer autonomous tasks that span multiple interactions:
+
+```python
+result = await context.page.execute(
+    instruction='Search for "web scraping" and return the titles of the first five results',
+)
+```
+
+## Browserbase integration
+
+By default, Stagehand launches a local Chromium browser. To use [Browserbase](https://www.browserbase.com/) - a managed cloud browser service - set `env='BROWSERBASE'` in <ApiLink to="class/StagehandOptions">`StagehandOptions`</ApiLink> and supply the required credentials:
+
+<CodeBlock className="language-python">
+    {BrowserbaseExample}
+</CodeBlock>
+
+Browserbase credentials (`browserbase_api_key`, `project_id`) can also be provided via the `BROWSERBASE_API_KEY` and `BROWSERBASE_PROJECT_ID` environment variables.
+
+## Browser configuration limitations
+
+Because Stagehand manages the browser session internally via CDP, only Chromium is supported. Browser settings are limited to the subset accepted by Stagehand's `BrowserLaunchOptions` - `headless`, `args`, `viewport`, `proxy`, `locale`, `executable_path`, and a few others. Features like fingerprint generation and incognito pages are not supported.
+
+## Conclusion
+
+This guide introduced <ApiLink to="class/StagehandCrawler">`StagehandCrawler`</ApiLink> and its AI page operations: `act()`, `extract()`, `observe()`, and `execute()`. You learned how to configure Stagehand via <ApiLink to="class/StagehandOptions">`StagehandOptions`</ApiLink> and switch to Browserbase for cloud browser execution. If you have questions or need assistance, feel free to reach out on our [GitHub](https://github.com/apify/crawlee-python) or join our [Discord community](https://discord.com/invite/jyEM2PRvMU). Happy scraping!
@@ -80,6 +80,8 @@ sql_postgres = [
 stagehand = [
     "stagehand>=3.19.0",
     "playwright>=1.27.0",
+    "apify_fingerprint_datapoints>=0.0.2",
+    "browserforge>=1.2.3",
 ]
 sql_sqlite = [
     "sqlalchemy[asyncio]>=2.0.0,<3.0.0",
Original file line number	Diff line number	Diff line change
`@@ -80,6 +80,8 @@ sql_postgres = [`
`80`	`80`	`stagehand = [`
`81`	`81`	`"stagehand>=3.19.0",`
`82`	`82`	`"playwright>=1.27.0",`
	`83`	`+ "apify_fingerprint_datapoints>=0.0.2",`
	`84`	`+ "browserforge>=1.2.3",`
`83`	`85`	`]`
`84`	`86`	`sql_sqlite = [`
`85`	`87`	`"sqlalchemy[asyncio]>=2.0.0,<3.0.0",`