Skip to content

Commit 476aa4f

Browse files
committed
fixes
1 parent 4115ae2 commit 476aa4f

3 files changed

Lines changed: 25 additions & 23 deletions

File tree

docs/guides/stagehand_crawler.mdx

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,12 @@ Browserbase credentials (`browserbase_api_key`, `project_id`) can also be provid
117117

118118
## Browser configuration limitations
119119

120-
Because Stagehand manages the browser session internally via CDP, only Chromium is supported. Browser settings are limited to the subset accepted by Stagehand's `BrowserLaunchOptions` - `headless`, `args`, `viewport`, `proxy`, `locale`, `executable_path`, and a few others. Features like fingerprint generation and incognito pages are not supported.
120+
Because Stagehand manages the browser session internally via CDP, only Chromium is supported.
121+
Browser settings are limited to the subset accepted by Stagehand's `BrowserLaunchOptions` -
122+
`headless`, `args`, `viewport`, `proxy`, `locale`, `executable_path`, and a few others.
123+
Features like full browser fingerprinting (canvas, WebGL, screen properties) and incognito
124+
pages are not supported. Fingerprint-consistent HTTP headers (`User-Agent`, `Accept`, `sec-ch-ua`)
125+
are still injected automatically.
121126

122127
## Conclusion
123128

src/crawlee/browsers/_stagehand_browser_controller.py

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,8 @@ async def new_page(
126126
"""Create a new page in the Stagehand-managed browser.
127127
128128
On the first call, starts the Stagehand session with the provided options. On subsequent
129-
calls, ``browser_new_context_options`` are ignored with a warning because the browser
130-
context cannot be reconfigured once the session is running.
129+
calls, ``browser_new_context_options`` and ``proxy_info`` are ignored because the session
130+
context cannot be reconfigured once it is running.
131131
132132
Args:
133133
browser_new_context_options: Options merged on top of the plugin's launch options
@@ -173,19 +173,21 @@ async def close(self, *, force: bool = False) -> None:
173173
if self.pages_count > 0 and not force:
174174
raise ValueError('Cannot close the browser while there are open pages.')
175175

176-
if self._session is None:
176+
if self._session is None and self._browser is None:
177177
return
178178

179179
try:
180-
await self._session.end()
181-
except Exception:
182-
logger.warning('Failed to end Stagehand session gracefully.', exc_info=True)
183-
184-
if self._browser is not None and self._browser.is_connected():
185-
await self._browser.close()
186-
187-
self._session = None
188-
self._browser_context = None
180+
if self._session is not None:
181+
try:
182+
await self._session.end()
183+
except Exception:
184+
logger.warning('Failed to end Stagehand session gracefully.', exc_info=True)
185+
186+
if self._browser is not None and self._browser.is_connected():
187+
await self._browser.close()
188+
finally:
189+
self._session = None
190+
self._browser_context = None
189191

190192
def _on_page_close(self, page: StagehandPage) -> None:
191193
self._pages.remove(page)
@@ -194,7 +196,7 @@ def _get_extra_http_headers(
194196
self,
195197
extra_http_headers: Mapping[str, str] | None = None,
196198
) -> dict[str, str]:
197-
if extra_http_headers:
199+
if extra_http_headers is not None:
198200
return dict(extra_http_headers)
199201

200202
if self._header_generator:

src/crawlee/crawlers/_stagehand/_stagehand_crawler.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,9 @@ class StagehandCrawler(
4444
Because Stagehand relies on CDP, only Chromium is supported. Not all Playwright browser and
4545
context configuration options are available - browser settings are limited to the subset accepted
4646
by Stagehand's ``BrowserLaunchOptions`` (such as ``headless``, ``args``, ``viewport``, ``proxy``,
47-
``locale``, and ``executable_path``). Features like fingerprint generation and incognito pages
48-
are not supported.
47+
``locale``, and ``executable_path``). Full browser fingerprinting (canvas, WebGL, screen
48+
properties) and incognito pages are not supported; fingerprint-consistent HTTP headers
49+
(``User-Agent``, ``Accept``, ``sec-ch-ua``) are still injected automatically.
4950
5051
Each page in the crawling context is a `StagehandPage`, which extends the standard Playwright
5152
`Page` with the following AI methods:
@@ -102,7 +103,6 @@ def __init__(
102103
browser_new_context_options: dict[str, Any] | None = None,
103104
goto_options: GotoOptions | None = None,
104105
navigation_timeout: timedelta | None = None,
105-
max_open_pages_per_browser: int | None = None,
106106
**kwargs: Unpack[BasicCrawlerOptions[StagehandCrawlingContext, StatisticsState]],
107107
) -> None:
108108
"""Initialize a new instance.
@@ -129,8 +129,6 @@ def __init__(
129129
option is not supported - use `navigation_timeout` instead.
130130
navigation_timeout: Timeout for the navigation phase (from opening the page to calling
131131
the request handler). Defaults to one minute.
132-
max_open_pages_per_browser: Maximum number of pages open per browser instance.
133-
Cannot be specified if `browser_pool` is provided.
134132
kwargs: Additional keyword arguments forwarded to `BasicCrawler`.
135133
"""
136134
if browser_pool is not None:
@@ -143,13 +141,11 @@ def __init__(
143141
headless,
144142
browser_launch_options,
145143
browser_new_context_options,
146-
max_open_pages_per_browser,
147144
)
148145
):
149146
raise ValueError(
150147
'Cannot specify `stagehand_options`, `user_data_dir`, `headless`, '
151-
'`browser_launch_options`, `browser_new_context_options` or '
152-
'`max_open_pages_per_browser` when `browser_pool` is provided.'
148+
'`browser_launch_options`, `browser_new_context_options` when `browser_pool` is provided.'
153149
)
154150
else:
155151
launch_options = dict(browser_launch_options or {})
@@ -163,7 +159,6 @@ def __init__(
163159
user_data_dir=user_data_dir,
164160
browser_launch_options=launch_options or None,
165161
browser_new_context_options=browser_new_context_options,
166-
max_open_pages_per_browser=max_open_pages_per_browser or 20,
167162
)
168163
]
169164
)

0 commit comments

Comments
 (0)