Skip to content

Commit c87b904

Browse files
committed
fix tests
1 parent d09828c commit c87b904

8 files changed

Lines changed: 107 additions & 17 deletions

File tree

src/crawlee/_utils/retry.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import annotations
22

3-
import asyncio
3+
from asyncio import sleep as _retry_sleep # Using alias for testing purposes
44
from datetime import timedelta
55
from functools import wraps
66
from typing import TYPE_CHECKING, ParamSpec, TypeVar
@@ -24,12 +24,16 @@ def retry_on_error(
2424
max_attempts: Maximum number of attempts including the first one.
2525
base_delay: Base delay between retries; doubles on each subsequent attempt.
2626
"""
27+
if max_attempts < 1:
28+
raise ValueError('max_attempts must be at least 1')
2729

28-
def decorator(func: Callable[P, Awaitable[T]]) -> Callable[P, Awaitable[T]]:
30+
if base_delay < timedelta(0):
31+
raise ValueError('base_delay must be a non-negative timedelta')
2932

30-
if max_attempts < 1:
31-
raise ValueError('max_attempts must be at least 1')
33+
if not exception_types:
34+
raise ValueError('At least one exception type must be specified')
3235

36+
def decorator(func: Callable[P, Awaitable[T]]) -> Callable[P, Awaitable[T]]:
3337
@wraps(func)
3438
async def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
3539
base_delay_seconds = base_delay.total_seconds()
@@ -39,7 +43,7 @@ async def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
3943
except Exception as exc: # noqa: PERF203
4044
if not isinstance(exc, exception_types) or attempt >= max_attempts - 1:
4145
raise
42-
await asyncio.sleep(base_delay_seconds * (2**attempt))
46+
await _retry_sleep(base_delay_seconds * (2**attempt))
4347
raise RuntimeError('Unreachable')
4448

4549
return wrapper

tests/unit/_utils/test_retry.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ async def func() -> bool:
3232
raise ValueError('transient')
3333
return True
3434

35-
with patch('crawlee._utils.retry.asyncio.sleep', new_callable=AsyncMock) as mock_sleep:
35+
with patch('crawlee._utils.retry._retry_sleep', new_callable=AsyncMock) as mock_sleep:
3636
result = await func()
3737

3838
assert result is True
@@ -46,7 +46,7 @@ async def func() -> None:
4646
raise ValueError('persistent')
4747

4848
with (
49-
patch('crawlee._utils.retry.asyncio.sleep', new_callable=AsyncMock),
49+
patch('crawlee._utils.retry._retry_sleep', new_callable=AsyncMock),
5050
pytest.raises(ValueError, match='persistent'),
5151
):
5252
await func()
@@ -61,7 +61,7 @@ async def func() -> None:
6161
raise TypeError('not retryable')
6262

6363
with (
64-
patch('crawlee._utils.retry.asyncio.sleep', new_callable=AsyncMock) as mock_sleep,
64+
patch('crawlee._utils.retry._retry_sleep', new_callable=AsyncMock) as mock_sleep,
6565
pytest.raises(TypeError),
6666
):
6767
await func()
@@ -76,7 +76,7 @@ async def func() -> None:
7676
raise ValueError('test backoff')
7777

7878
with (
79-
patch('crawlee._utils.retry.asyncio.sleep', new_callable=AsyncMock) as mock_sleep,
79+
patch('crawlee._utils.retry._retry_sleep', new_callable=AsyncMock) as mock_sleep,
8080
pytest.raises(ValueError, match='test backoff'),
8181
):
8282
await func()

tests/unit/storage_clients/_redis/test_redis_dataset_client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ async def test_error_handling_on_push_failure(dataset_client: RedisDatasetClient
158158
mock_pipeline_ctx.__aexit__ = AsyncMock(return_value=None)
159159

160160
with (
161-
patch('crawlee._utils.retry.asyncio.sleep', new_callable=AsyncMock) as mock_sleep,
161+
patch('crawlee._utils.retry._retry_sleep', new_callable=AsyncMock) as mock_sleep,
162162
patch.object(dataset_client.redis, 'pipeline', return_value=mock_pipeline_ctx),
163163
pytest.raises(RedisError),
164164
):
@@ -178,7 +178,7 @@ async def test_push_data_does_not_retry_on_unexpected_exception(dataset_client:
178178
mock_pipeline_ctx.__aexit__ = AsyncMock(return_value=None)
179179

180180
with (
181-
patch('crawlee._utils.retry.asyncio.sleep', new_callable=AsyncMock) as mock_sleep,
181+
patch('crawlee._utils.retry._retry_sleep', new_callable=AsyncMock) as mock_sleep,
182182
patch.object(dataset_client.redis, 'pipeline', return_value=mock_pipeline_ctx),
183183
pytest.raises(ValueError, match='unexpected error'),
184184
):

tests/unit/storage_clients/_redis/test_redis_kvs_client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ async def test_error_handling_on_set_failure(kvs_client: RedisKeyValueStoreClien
229229
mock_pipeline_ctx.__aexit__ = AsyncMock(return_value=None)
230230

231231
with (
232-
patch('crawlee._utils.retry.asyncio.sleep', new_callable=AsyncMock) as mock_sleep,
232+
patch('crawlee._utils.retry._retry_sleep', new_callable=AsyncMock) as mock_sleep,
233233
patch.object(kvs_client.redis, 'pipeline', return_value=mock_pipeline_ctx),
234234
pytest.raises(RedisError),
235235
):
@@ -249,7 +249,7 @@ async def test_set_value_does_not_retry_on_unexpected_exception(kvs_client: Redi
249249
mock_pipeline_ctx.__aexit__ = AsyncMock(return_value=None)
250250

251251
with (
252-
patch('crawlee._utils.retry.asyncio.sleep', new_callable=AsyncMock) as mock_sleep,
252+
patch('crawlee._utils.retry._retry_sleep', new_callable=AsyncMock) as mock_sleep,
253253
patch.object(kvs_client.redis, 'pipeline', return_value=mock_pipeline_ctx),
254254
pytest.raises(ValueError, match='unexpected error'),
255255
):

tests/unit/storage_clients/_redis/test_redis_rq_client.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@
33
import asyncio
44
import json
55
from typing import TYPE_CHECKING
6+
from unittest.mock import AsyncMock, MagicMock, patch
67

78
import pytest
9+
from redis.exceptions import RedisError
810

911
from crawlee import Request
1012
from crawlee.storage_clients import RedisStorageClient
@@ -250,3 +252,43 @@ async def test_deduplication(rq_client: RedisRequestQueueClient) -> None:
250252
# Verify no more requests are available
251253
request3 = await rq_client.fetch_next_request()
252254
assert request3 is None
255+
256+
257+
async def test_error_handling_on_get_metadata_failure(rq_client: RedisRequestQueueClient) -> None:
258+
"""Test that get_metadata properly handles Redis errors and retries."""
259+
mock_pipe = MagicMock()
260+
mock_pipe.execute = AsyncMock(side_effect=RedisError('connection lost'))
261+
262+
mock_pipeline_ctx = MagicMock()
263+
mock_pipeline_ctx.__aenter__ = AsyncMock(return_value=mock_pipe)
264+
mock_pipeline_ctx.__aexit__ = AsyncMock(return_value=None)
265+
266+
with (
267+
patch('crawlee._utils.retry._retry_sleep', new_callable=AsyncMock) as mock_sleep,
268+
patch.object(rq_client.redis, 'pipeline', return_value=mock_pipeline_ctx),
269+
pytest.raises(RedisError),
270+
):
271+
await rq_client.get_metadata()
272+
273+
# Verify that retry logic was attempted
274+
assert mock_sleep.call_count == 2 # Assuming default max_attempts=3
275+
276+
277+
async def test_get_metadata_does_not_retry_on_unexpected_exception(rq_client: RedisRequestQueueClient) -> None:
278+
"""Test that get_metadata does not retry on unexpected exceptions."""
279+
mock_pipe = MagicMock()
280+
mock_pipe.execute = AsyncMock(side_effect=ValueError('unexpected error'))
281+
282+
mock_pipeline_ctx = MagicMock()
283+
mock_pipeline_ctx.__aenter__ = AsyncMock(return_value=mock_pipe)
284+
mock_pipeline_ctx.__aexit__ = AsyncMock(return_value=None)
285+
286+
with (
287+
patch('crawlee._utils.retry._retry_sleep', new_callable=AsyncMock) as mock_sleep,
288+
patch.object(rq_client.redis, 'pipeline', return_value=mock_pipeline_ctx),
289+
pytest.raises(ValueError, match='unexpected error'),
290+
):
291+
await rq_client.get_metadata()
292+
293+
# Verify that retry logic was not attempted
294+
assert mock_sleep.call_count == 0

tests/unit/storage_clients/_sql/test_sql_dataset_client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ async def test_error_handling_on_push_failure(dataset_client: SqlDatasetClient)
247247
mock_get_session.return_value = mock_session
248248

249249
with (
250-
patch('crawlee._utils.retry.asyncio.sleep', new_callable=AsyncMock) as mock_sleep,
250+
patch('crawlee._utils.retry._retry_sleep', new_callable=AsyncMock) as mock_sleep,
251251
pytest.raises(SQLAlchemyError),
252252
):
253253
await dataset_client.push_data({'test': 'data'})
@@ -268,7 +268,7 @@ async def test_push_data_does_not_retry_on_unexpected_exception(dataset_client:
268268
mock_get_session.return_value = mock_session
269269

270270
with (
271-
patch('crawlee._utils.retry.asyncio.sleep', new_callable=AsyncMock) as mock_sleep,
271+
patch('crawlee._utils.retry._retry_sleep', new_callable=AsyncMock) as mock_sleep,
272272
pytest.raises(ValueError, match='unexpected error'),
273273
):
274274
await dataset_client.push_data({'test': 'data'})

tests/unit/storage_clients/_sql/test_sql_kvs_client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ async def test_error_handling_on_set_failure(kvs_client: SqlKeyValueStoreClient)
297297
mock_get_session.return_value = mock_session
298298

299299
with (
300-
patch('crawlee._utils.retry.asyncio.sleep', new_callable=AsyncMock) as mock_sleep,
300+
patch('crawlee._utils.retry._retry_sleep', new_callable=AsyncMock) as mock_sleep,
301301
pytest.raises(SQLAlchemyError),
302302
):
303303
await kvs_client.set_value(key='test', value='test-value')
@@ -318,7 +318,7 @@ async def test_set_value_does_not_retry_on_unexpected_exception(kvs_client: SqlK
318318
mock_get_session.return_value = mock_session
319319

320320
with (
321-
patch('crawlee._utils.retry.asyncio.sleep', new_callable=AsyncMock) as mock_sleep,
321+
patch('crawlee._utils.retry._retry_sleep', new_callable=AsyncMock) as mock_sleep,
322322
pytest.raises(ValueError, match='unexpected error'),
323323
):
324324
await kvs_client.set_value(key='test', value='test-value')

tests/unit/storage_clients/_sql/test_sql_rq_client.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33
import asyncio
44
import json
55
from typing import TYPE_CHECKING
6+
from unittest.mock import AsyncMock, patch
67

78
import pytest
89
from sqlalchemy import inspect, select
10+
from sqlalchemy.exc import SQLAlchemyError
911
from sqlalchemy.ext.asyncio import create_async_engine
1012

1113
from crawlee import Request
@@ -233,3 +235,45 @@ async def test_data_persistence_across_reopens(configuration: Configuration) ->
233235
assert {request1.url, request2.url} == {'https://example.com/1', 'https://example.com/2'}
234236

235237
await reopened_client.drop()
238+
239+
240+
async def test_error_handling_on_get_metadata_failure(rq_client: SqlRequestQueueClient) -> None:
241+
"""Test that get_metadata properly handles SQL errors and retries."""
242+
with patch(
243+
'crawlee.storage_clients._sql._request_queue_client.SqlRequestQueueClient.get_session',
244+
) as mock_get_session:
245+
mock_session = AsyncMock()
246+
mock_session.__aenter__ = AsyncMock(return_value=mock_session)
247+
mock_session.__aexit__ = AsyncMock(return_value=None)
248+
mock_session.execute = AsyncMock(side_effect=SQLAlchemyError('db error'))
249+
mock_get_session.return_value = mock_session
250+
251+
with (
252+
patch('crawlee._utils.retry._retry_sleep', new_callable=AsyncMock) as mock_sleep,
253+
pytest.raises(SQLAlchemyError),
254+
):
255+
await rq_client.get_metadata()
256+
257+
# Verify that retry logic was attempted
258+
assert mock_sleep.call_count == 2 # Assuming default max_attempts=3
259+
260+
261+
async def test_get_metadata_does_not_retry_on_unexpected_exception(rq_client: SqlRequestQueueClient) -> None:
262+
"""Test that get_metadata does not retry on unexpected exceptions."""
263+
with patch(
264+
'crawlee.storage_clients._sql._request_queue_client.SqlRequestQueueClient.get_session',
265+
) as mock_get_session:
266+
mock_session = AsyncMock()
267+
mock_session.__aenter__ = AsyncMock(return_value=mock_session)
268+
mock_session.__aexit__ = AsyncMock(return_value=None)
269+
mock_session.execute = AsyncMock(side_effect=ValueError('unexpected error'))
270+
mock_get_session.return_value = mock_session
271+
272+
with (
273+
patch('crawlee._utils.retry._retry_sleep', new_callable=AsyncMock) as mock_sleep,
274+
pytest.raises(ValueError, match='unexpected error'),
275+
):
276+
await rq_client.get_metadata()
277+
278+
# Verify that retry logic was not attempted
279+
assert mock_sleep.call_count == 0

0 commit comments

Comments
 (0)