22
33from __future__ import annotations
44
5+ import inspect
56from asyncio import Lock
67from datetime import datetime , timedelta , timezone
8+ from functools import lru_cache
79from typing import TYPE_CHECKING , Any , cast
810
911from browserforge .injectors .playwright import AsyncNewContext
1012from playwright .async_api import Browser , BrowserContext , Page , ProxySettings
13+ from playwright .async_api import BrowserType as PlaywrightBrowserType
1114from typing_extensions import override
1215
1316from crawlee ._utils .docs import docs_group
2831logger = getLogger (__name__ )
2932
3033
34+ # Cache Playwright signatures to avoid overhead in critical path
35+ @lru_cache (maxsize = 1 )
36+ def _get_context_params_cache () -> dict [str , set [str ]]:
37+ launch_persistent_params = set (inspect .signature (PlaywrightBrowserType .launch_persistent_context ).parameters )
38+ new_context_params = set (inspect .signature (Browser .new_context ).parameters )
39+ return {
40+ 'common' : launch_persistent_params & new_context_params ,
41+ 'persistent_unique' : launch_persistent_params - new_context_params ,
42+ 'incognito_unique' : new_context_params - launch_persistent_params ,
43+ }
44+
45+
3146@docs_group ('Browser management' )
3247class PlaywrightBrowserController (BrowserController ):
3348 """Controller for managing Playwright browser instances and their pages.
@@ -209,6 +224,31 @@ def _on_page_close(self, page: Page) -> None:
209224 """Handle actions after a page is closed."""
210225 self ._pages .remove (page )
211226
227+ def _filter_context_options (self , options : dict [str , Any ]) -> dict [str , Any ]:
228+ """Filter browser context options based on the current mode (incognito vs persistent).
229+
230+ Options that are valid only in the other mode are dropped with a warning. Unrecognized options are kept
231+ and passed through so that Playwright itself can raise an appropriate error.
232+ """
233+ params_cache = _get_context_params_cache ()
234+ filtered = dict [str , Any ]()
235+
236+ for key , value in options .items ():
237+ if self ._use_incognito_pages and key in params_cache ['persistent_unique' ]:
238+ logger .warning (
239+ f'Option "{ key } " is only supported in persistent context mode '
240+ '(use_incognito_pages=False) and will be ignored.'
241+ )
242+ elif not self ._use_incognito_pages and key in params_cache ['incognito_unique' ]:
243+ logger .warning (
244+ f'Option "{ key } " is only supported in incognito context mode '
245+ '(use_incognito_pages=True) and will be ignored.'
246+ )
247+ else :
248+ filtered [key ] = value
249+
250+ return filtered
251+
212252 async def _create_browser_context (
213253 self ,
214254 browser_new_context_options : Mapping [str , Any ] | None = None ,
@@ -222,11 +262,14 @@ async def _create_browser_context(
222262 `self._fingerprint_generator` is available.
223263 """
224264 browser_new_context_options = dict (browser_new_context_options ) if browser_new_context_options else {}
265+
266+ filtered_options = self ._filter_context_options (browser_new_context_options )
267+
225268 if proxy_info :
226- if browser_new_context_options .get ('proxy' ):
269+ if filtered_options .get ('proxy' ):
227270 logger .warning ("browser_new_context_options['proxy'] overridden by explicit `proxy_info` argument." )
228271
229- browser_new_context_options ['proxy' ] = ProxySettings (
272+ filtered_options ['proxy' ] = ProxySettings (
230273 server = f'{ proxy_info .scheme } ://{ proxy_info .hostname } :{ proxy_info .port } ' ,
231274 username = proxy_info .username ,
232275 password = proxy_info .password ,
@@ -236,7 +279,7 @@ async def _create_browser_context(
236279 return await AsyncNewContext (
237280 browser = self ._browser ,
238281 fingerprint = self ._fingerprint_generator .generate (),
239- ** browser_new_context_options ,
282+ ** filtered_options ,
240283 )
241284
242285 if self ._header_generator :
@@ -256,7 +299,5 @@ async def _create_browser_context(
256299 else :
257300 extra_http_headers = None
258301
259- browser_new_context_options ['extra_http_headers' ] = browser_new_context_options .get (
260- 'extra_http_headers' , extra_http_headers
261- )
262- return await self ._browser .new_context (** browser_new_context_options )
302+ filtered_options ['extra_http_headers' ] = filtered_options .get ('extra_http_headers' , extra_http_headers )
303+ return await self ._browser .new_context (** filtered_options )
0 commit comments