Skip to content

Commit be25c71

Browse files
committed
Add Cloudflare Turnstile captcha support
Add TurnstileTaskProxyless and TurnstileTask classes for solving Cloudflare Turnstile challenges, with optional action, cData, and chlPageData parameters. Includes sync/async examples, unit tests, and documentation updates. https://claude.ai/code/session_01KKZjR7gkNfk4vUhJb1eJr6
1 parent a4cc29e commit be25c71

9 files changed

Lines changed: 322 additions & 0 deletions

File tree

CHANGELOG.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ Unreleased
77
Added
88
#####
99

10+
- Add ``TurnstileTaskProxyless`` and ``TurnstileTask`` for solving Cloudflare Turnstile captchas (with optional ``action``, ``cdata``, and ``chl_page_data`` parameters)
1011
- Add ``AsyncAnticaptchaClient`` and ``AsyncJob`` for async/await usage with ``httpx`` (``pip install python-anticaptcha[async]``)
1112
- Rename ``base.py`` → ``sync_client.py`` for symmetry with ``async_client.py``; backward-compatible ``base.py`` shim preserved
1213
- Rename sync example files with ``sync_`` prefix to match ``async_`` examples

README.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,32 @@ job.join()
134134
print(job.get_captcha_text())
135135
```
136136

137+
### Solve Cloudflare Turnstile
138+
139+
Example snippet for Cloudflare Turnstile captcha:
140+
141+
```python
142+
from python_anticaptcha import AnticaptchaClient, TurnstileTaskProxyless
143+
144+
api_key = '174faff8fbc769e94a5862391ecfd010'
145+
site_key = '0x4AAAAAAABS7vwvV6VFfMcD' # grab from site
146+
url = 'https://example.com'
147+
148+
client = AnticaptchaClient(api_key)
149+
task = TurnstileTaskProxyless(url, site_key)
150+
job = client.create_task(task)
151+
job.join()
152+
print(job.get_token_response())
153+
```
154+
155+
The full integration example is available in file `examples/sync_turnstile_request.py`.
156+
157+
For Turnstile with optional parameters (action, cData):
158+
159+
```python
160+
task = TurnstileTaskProxyless(url, site_key, action="managed", cdata="token")
161+
```
162+
137163
### Solve funcaptcha
138164

139165
Example snippet for funcaptcha:

docs/usage.rst

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,33 @@ Example snippet for text captcha:
116116
job.join()
117117
print(job.get_captcha_text())
118118
119+
Solve Cloudflare Turnstile
120+
##########################
121+
122+
Example snippet for Cloudflare Turnstile captcha:
123+
124+
.. code:: python
125+
126+
from python_anticaptcha import AnticaptchaClient, TurnstileTaskProxyless
127+
128+
api_key = '174faff8fbc769e94a5862391ecfd010'
129+
site_key = '0x4AAAAAAABS7vwvV6VFfMcD' # grab from site
130+
url = 'https://example.com'
131+
132+
client = AnticaptchaClient(api_key)
133+
task = TurnstileTaskProxyless(url, site_key)
134+
job = client.create_task(task)
135+
job.join()
136+
print(job.get_token_response())
137+
138+
The full integration example is available in file ``examples/sync_turnstile_request.py``.
139+
140+
For Turnstile with optional parameters (action, cData):
141+
142+
.. code:: python
143+
144+
task = TurnstileTaskProxyless(url, site_key, action="managed", cdata="token")
145+
119146
Solve funcaptcha
120147
################
121148

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import asyncio
2+
import re
3+
from os import environ
4+
5+
import httpx
6+
7+
from python_anticaptcha import AsyncAnticaptchaClient, TurnstileTaskProxyless
8+
9+
api_key = environ["KEY"]
10+
site_key_pattern = r'data-sitekey="(.+?)"'
11+
url = "https://example.com" # replace with target URL
12+
13+
14+
async def get_form_html(session: httpx.AsyncClient) -> str:
15+
return (await session.get(url)).text
16+
17+
18+
async def get_token(client: AsyncAnticaptchaClient, form_html: str) -> str:
19+
site_key = re.search(site_key_pattern, form_html).group(1)
20+
task = TurnstileTaskProxyless(website_url=url, website_key=site_key)
21+
job = await client.create_task(task)
22+
await job.join()
23+
return job.get_token_response()
24+
25+
26+
async def form_submit(session: httpx.AsyncClient, token: str) -> str:
27+
return (await session.post(url, data={"cf-turnstile-response": token})).text
28+
29+
30+
async def process():
31+
async with AsyncAnticaptchaClient(api_key) as client, httpx.AsyncClient() as session:
32+
html = await get_form_html(session)
33+
token = await get_token(client, html)
34+
return await form_submit(session, token)
35+
36+
37+
if __name__ == "__main__":
38+
print(asyncio.run(process()))

examples/sync_turnstile_request.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import re
2+
from os import environ
3+
4+
import requests
5+
6+
from python_anticaptcha import AnticaptchaClient, TurnstileTaskProxyless
7+
8+
api_key = environ["KEY"]
9+
site_key_pattern = r'data-sitekey="(.+?)"'
10+
url = "https://example.com" # replace with target URL
11+
client = AnticaptchaClient(api_key)
12+
session = requests.Session()
13+
14+
15+
def get_form_html():
16+
return session.get(url).text
17+
18+
19+
def get_token(form_html):
20+
site_key = re.search(site_key_pattern, form_html).group(1)
21+
task = TurnstileTaskProxyless(website_url=url, website_key=site_key)
22+
job = client.create_task(task)
23+
job.join()
24+
return job.get_token_response()
25+
26+
27+
def form_submit(token):
28+
return requests.post(url, data={"cf-turnstile-response": token}).text
29+
30+
31+
def process():
32+
html = get_form_html()
33+
token = get_token(html)
34+
return form_submit(token)
35+
36+
37+
if __name__ == "__main__":
38+
print(process())
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import re
2+
from os import environ
3+
4+
import requests
5+
6+
from python_anticaptcha import AnticaptchaClient, Proxy, TurnstileTask
7+
8+
api_key = environ["KEY"]
9+
proxy_url = environ["PROXY_URL"] # eg. socks5://user:password@123.123.123.123:8888
10+
site_key_pattern = r'data-sitekey="(.+?)"'
11+
url = "https://example.com" # replace with target URL
12+
client = AnticaptchaClient(api_key)
13+
session = requests.Session()
14+
15+
UA = (
16+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 "
17+
"(KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"
18+
)
19+
20+
21+
def get_form_html():
22+
return session.get(url).text
23+
24+
25+
def get_token(form_html):
26+
site_key = re.search(site_key_pattern, form_html).group(1)
27+
proxy = Proxy.parse_url(proxy_url)
28+
task = TurnstileTask(
29+
website_url=url,
30+
website_key=site_key,
31+
user_agent=UA,
32+
**proxy.to_kwargs(),
33+
)
34+
job = client.create_task(task)
35+
job.join()
36+
return job.get_token_response()
37+
38+
39+
def form_submit(token):
40+
return requests.post(url, data={"cf-turnstile-response": token}).text
41+
42+
43+
def process():
44+
html = get_form_html()
45+
token = get_token(html)
46+
return form_submit(token)
47+
48+
49+
if __name__ == "__main__":
50+
print(process())

python_anticaptcha/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@
4141
RecaptchaV2Task,
4242
RecaptchaV2TaskProxyless,
4343
RecaptchaV3TaskProxyless,
44+
TurnstileTask,
45+
TurnstileTaskProxyless,
4446
)
4547

4648
AnticatpchaException = AnticaptchaException
@@ -79,6 +81,8 @@ def __getattr__(name: str) -> type:
7981
"GeeTestTask",
8082
"AntiGateTaskProxyless",
8183
"AntiGateTask",
84+
"TurnstileTaskProxyless",
85+
"TurnstileTask",
8286
"AnticaptchaException",
8387
"AnticatpchaException",
8488
"AsyncAnticaptchaClient",

python_anticaptcha/tasks.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,85 @@ def serialize(self, **result: Any) -> dict[str, Any]:
599599
return data
600600

601601

602+
class TurnstileTaskProxyless(BaseTask):
603+
"""Solve a Cloudflare Turnstile challenge without a proxy.
604+
605+
Turnstile is Cloudflare's CAPTCHA replacement used on millions of websites.
606+
The service automatically detects all Turnstile subtypes (manual,
607+
non-interactive, invisible).
608+
609+
After the job completes, retrieve the token with
610+
:meth:`Job.get_token_response`.
611+
612+
:param website_url: Full URL of the page where the Turnstile widget appears.
613+
:param website_key: The Turnstile ``sitekey`` from the page source.
614+
:param action: Optional action parameter passed to the Turnstile widget.
615+
:param cdata: Optional ``cData`` token for Cloudflare-protected pages.
616+
:param chl_page_data: Optional ``chlPageData`` token for Cloudflare pages.
617+
618+
Example::
619+
620+
task = TurnstileTaskProxyless(
621+
website_url="https://example.com",
622+
website_key="0x4AAAAAAABS7vwvV6VFfMcD",
623+
)
624+
"""
625+
626+
type = "TurnstileTaskProxyless"
627+
websiteURL = None
628+
websiteKey = None
629+
630+
def __init__(
631+
self,
632+
website_url: str,
633+
website_key: str,
634+
action: str | None = None,
635+
cdata: str | None = None,
636+
chl_page_data: str | None = None,
637+
*args: Any,
638+
**kwargs: Any,
639+
) -> None:
640+
self.websiteURL = website_url
641+
self.websiteKey = website_key
642+
self.action = action
643+
self.cData = cdata
644+
self.chlPageData = chl_page_data
645+
super().__init__(*args, **kwargs)
646+
647+
def serialize(self, **result: Any) -> dict[str, Any]:
648+
data = super().serialize(**result)
649+
data["websiteURL"] = self.websiteURL
650+
data["websiteKey"] = self.websiteKey
651+
if self.action is not None:
652+
data["action"] = self.action
653+
if self.cData is not None:
654+
data["cData"] = self.cData
655+
if self.chlPageData is not None:
656+
data["chlPageData"] = self.chlPageData
657+
return data
658+
659+
660+
class TurnstileTask(ProxyMixin, UserAgentMixin, CookieMixin, TurnstileTaskProxyless):
661+
"""Solve a Cloudflare Turnstile challenge through a proxy.
662+
663+
Same as :class:`TurnstileTaskProxyless` but additionally requires
664+
proxy, user-agent, and optional cookie parameters.
665+
666+
Note that the proxy-based approach is slower and requires high-quality,
667+
self-hosted proxies.
668+
669+
:param user_agent: Browser User-Agent string.
670+
:param cookies: Optional cookie string (default: ``""``).
671+
:param proxy_type: Proxy protocol (``"http"``, ``"socks4"``, ``"socks5"``).
672+
:param proxy_address: Proxy server hostname or IP.
673+
:param proxy_port: Proxy server port.
674+
:param proxy_login: Proxy username (empty string if none).
675+
:param proxy_password: Proxy password (empty string if none).
676+
"""
677+
678+
type = "TurnstileTask"
679+
680+
602681
class AntiGateTask(ProxyMixin, AntiGateTaskProxyless):
603682
"""Solve a custom AntiGate task through a proxy.
604683

tests/test_tasks.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
RecaptchaV2Task,
1818
RecaptchaV2TaskProxyless,
1919
RecaptchaV3TaskProxyless,
20+
TurnstileTask,
21+
TurnstileTaskProxyless,
2022
)
2123

2224
PROXY_KWARGS = dict(
@@ -353,6 +355,63 @@ def test_proxy_login_omitted_when_falsy(self):
353355
assert data["proxyPort"] == 8080
354356

355357

358+
class TestTurnstileTaskProxyless:
359+
def test_required_fields(self):
360+
task = TurnstileTaskProxyless(website_url="https://example.com", website_key="tkey")
361+
data = task.serialize()
362+
assert data["type"] == "TurnstileTaskProxyless"
363+
assert data["websiteURL"] == "https://example.com"
364+
assert data["websiteKey"] == "tkey"
365+
366+
def test_optional_fields_omitted(self):
367+
task = TurnstileTaskProxyless(website_url="https://example.com", website_key="tkey")
368+
data = task.serialize()
369+
assert "action" not in data
370+
assert "cData" not in data
371+
assert "chlPageData" not in data
372+
373+
def test_optional_fields_included(self):
374+
task = TurnstileTaskProxyless(
375+
website_url="https://example.com",
376+
website_key="tkey",
377+
action="managed",
378+
cdata="cdata_token",
379+
chl_page_data="chl_token",
380+
)
381+
data = task.serialize()
382+
assert data["action"] == "managed"
383+
assert data["cData"] == "cdata_token"
384+
assert data["chlPageData"] == "chl_token"
385+
386+
387+
class TestTurnstileTask:
388+
def test_type_and_proxy(self):
389+
task = TurnstileTask(
390+
website_url="https://example.com",
391+
website_key="tkey",
392+
**USER_AGENT_KWARGS,
393+
**PROXY_KWARGS,
394+
)
395+
data = task.serialize()
396+
assert data["type"] == "TurnstileTask"
397+
assert data["proxyType"] == "http"
398+
assert data["proxyAddress"] == "1.2.3.4"
399+
assert data["proxyPort"] == 8080
400+
assert data["userAgent"] == "Mozilla/5.0"
401+
402+
def test_optional_fields_with_proxy(self):
403+
task = TurnstileTask(
404+
website_url="https://example.com",
405+
website_key="tkey",
406+
action="managed",
407+
**USER_AGENT_KWARGS,
408+
**PROXY_KWARGS,
409+
)
410+
data = task.serialize()
411+
assert data["type"] == "TurnstileTask"
412+
assert data["action"] == "managed"
413+
414+
356415
class TestCookieMixin:
357416
def test_cookies_omitted_when_empty(self):
358417
task = NoCaptchaTask(

0 commit comments

Comments
 (0)