Skip to content

Commit bd45915

Browse files
committed
add tests
1 parent 0424afc commit bd45915

7 files changed

Lines changed: 667 additions & 17 deletions

File tree

src/crawlee/browsers/_stagehand_browser_controller.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -179,9 +179,12 @@ async def _ensure_session(
179179
browser_new_context_options: Mapping[str, Any] | None = None,
180180
proxy_info: ProxyInfo | None = None,
181181
) -> None:
182+
182183
if self._session is not None:
183184
return
185+
184186
async with self._session_init_lock:
187+
# Double-check if the session was created while waiting for the lock.
185188
if self._session is not None:
186189
return
187190

@@ -231,7 +234,5 @@ async def _ensure_session(
231234
)
232235

233236
self._browser = await self._playwright.chromium.connect_over_cdp(cdp_url)
234-
self._browser_context = (
235-
self._browser.contexts[0] if self._browser.contexts else await self._browser.new_context()
236-
)
237+
self._browser_context = self._browser.contexts[0]
237238
self._session = session

src/crawlee/browsers/_stagehand_browser_plugin.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,9 @@ def __init__(
7171
config = service_locator.get_configuration()
7272

7373
self._max_open_pages_per_browser = max_open_pages_per_browser
74-
self.stagehand_options = stagehand_options or StagehandOptions()
74+
self._stagehand_options = stagehand_options or StagehandOptions()
7575

76-
is_local = self.stagehand_options.env == 'LOCAL'
76+
is_local = self._stagehand_options.env == 'LOCAL'
7777

7878
# browser_launch_options take priority over browser_new_context_options for shared keys.
7979
self._browser_launch_options: dict[str, Any] = {
@@ -93,14 +93,14 @@ def __init__(
9393
self._stagehand_init_params: dict[str, Any] = {
9494
'server': 'local' if is_local else 'remote',
9595
'local_headless': self._browser_launch_options['headless'],
96-
'local_ready_timeout_s': self.stagehand_options.local_ready_timeout_s,
96+
'local_ready_timeout_s': self._stagehand_options.local_ready_timeout_s,
9797
}
98-
if self.stagehand_options.model_api_key is not None:
99-
self._stagehand_init_params['model_api_key'] = self.stagehand_options.model_api_key
98+
if self._stagehand_options.model_api_key is not None:
99+
self._stagehand_init_params['model_api_key'] = self._stagehand_options.model_api_key
100100

101101
if not is_local:
102-
self._stagehand_init_params['browserbase_api_key'] = self.stagehand_options.browserbase_api_key
103-
self._stagehand_init_params['browserbase_project_id'] = self.stagehand_options.project_id
102+
self._stagehand_init_params['browserbase_api_key'] = self._stagehand_options.browserbase_api_key
103+
self._stagehand_init_params['browserbase_project_id'] = self._stagehand_options.project_id
104104

105105
self._stagehand_client: AsyncStagehand | None = None
106106
self._playwright_context_manager = async_playwright()
@@ -145,6 +145,11 @@ def browser_new_context_options(self) -> Mapping[str, Any]:
145145
def max_open_pages_per_browser(self) -> int:
146146
return self._max_open_pages_per_browser
147147

148+
@property
149+
def stagehand_options(self) -> StagehandOptions:
150+
"""Return the Stagehand-specific configuration options."""
151+
return self._stagehand_options
152+
148153
@override
149154
async def __aenter__(self) -> StagehandBrowserPlugin:
150155
if self._active:
@@ -153,7 +158,7 @@ async def __aenter__(self) -> StagehandBrowserPlugin:
153158
self._active = True
154159
self._playwright = await self._playwright_context_manager.__aenter__()
155160

156-
if self.stagehand_options.env == 'LOCAL':
161+
if self._stagehand_options.env == 'LOCAL':
157162
if 'executable_path' not in self._browser_launch_options:
158163
chrome_path = self._playwright.chromium.executable_path
159164
self._browser_launch_options['executable_path'] = chrome_path
@@ -195,6 +200,6 @@ async def new_browser(self) -> BrowserController:
195200
return StagehandBrowserController(
196201
playwright=self._playwright,
197202
stagehand_client=self._stagehand_client,
198-
stagehand_options=self.stagehand_options,
203+
stagehand_options=self._stagehand_options,
199204
max_open_pages_per_browser=self._max_open_pages_per_browser,
200205
)

src/crawlee/browsers/_stagehand_types.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def __init__(self, page: Page, session: AsyncSession) -> None:
7979
self._session = session
8080

8181
async def act(self, **kwargs: Unpack[SessionActParams]) -> SessionActResponse:
82-
"""Perform an action on the page using natural language.
82+
"""Perform an action on the page using natural language. Argument `page` is automatically set.
8383
8484
Args:
8585
**kwargs: Parameters passed to ``AsyncSession.act()``.
@@ -92,7 +92,7 @@ async def act(self, **kwargs: Unpack[SessionActParams]) -> SessionActResponse:
9292
return await self._session.act(page=self, **kwargs)
9393

9494
async def observe(self, **kwargs: Unpack[SessionObserveParams]) -> SessionObserveResponse:
95-
"""Observe the page and get AI-suggested actions.
95+
"""Observe the page and get AI-suggested actions. Argument `page` is automatically set.
9696
9797
Args:
9898
**kwargs: Parameters passed to ``AsyncSession.observe()``.
@@ -104,7 +104,7 @@ async def observe(self, **kwargs: Unpack[SessionObserveParams]) -> SessionObserv
104104
return await self._session.observe(page=self, **kwargs)
105105

106106
async def extract(self, **kwargs: Unpack[SessionExtractParams]) -> SessionExtractResponse:
107-
"""Extract structured data from the page using natural language.
107+
"""Extract structured data from the page using natural language. Argument `page` is automatically set.
108108
109109
Args:
110110
**kwargs: Parameters passed to ``AsyncSession.extract()``.
@@ -116,7 +116,7 @@ async def extract(self, **kwargs: Unpack[SessionExtractParams]) -> SessionExtrac
116116
return await self._session.extract(page=self, **kwargs)
117117

118118
async def execute(self, **kwargs: Unpack[SessionExecuteParams]) -> SessionExecuteResponse:
119-
"""Execute arbitrary code on the page via natural language instructions.
119+
"""Execute arbitrary code on the page via natural language instructions. Argument `page` is automatically set.
120120
121121
Args:
122122
**kwargs: Parameters passed to ``AsyncSession.execute()``.

src/crawlee/crawlers/_stagehand/_stagehand_crawler.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import logging
34
from typing import TYPE_CHECKING, Any
45

56
from crawlee._utils.docs import docs_group
@@ -67,7 +68,7 @@ class StagehandCrawler(
6768
6869
crawler = StagehandCrawler(
6970
stagehand_options=StagehandOptions(
70-
api_key='sk-...',
71+
model_api_key='sk-...',
7172
model='openai/gpt-4.1-mini',
7273
),
7374
)
@@ -168,6 +169,8 @@ def __init__(
168169
]
169170
)
170171

172+
kwargs.setdefault('_logger', logging.getLogger(__name__))
173+
171174
super().__init__(
172175
browser_pool=browser_pool,
173176
goto_options=goto_options,

0 commit comments

Comments
 (0)