Skip to content

Commit 2d6e3ad

Browse files
committed
add tests
1 parent 11f9e34 commit 2d6e3ad

8 files changed

Lines changed: 125 additions & 9 deletions

File tree

src/crawlee/storage_clients/_redis/_dataset_client.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,8 @@ async def push_data(self, data: list[dict[str, Any]] | dict[str, Any]) -> None:
126126
if isinstance(data, dict):
127127
data = [data]
128128

129-
async with self._get_pipeline() as pipe:
130-
try:
129+
try:
130+
async with self._get_pipeline() as pipe:
131131
pipe.json().arrappend(self._items_key, '$', *data)
132132
await self._update_metadata(
133133
pipe,
@@ -136,8 +136,8 @@ async def push_data(self, data: list[dict[str, Any]] | dict[str, Any]) -> None:
136136
),
137137
)
138138

139-
except RedisError as e:
140-
raise StorageWriteError(e) from e
139+
except RedisError as e:
140+
raise StorageWriteError(e) from e
141141

142142
@override
143143
async def get_data(

src/crawlee/storage_clients/_redis/_key_value_store_client.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,8 @@ async def set_value(self, *, key: str, value: Any, content_type: str | None = No
143143
content_type=content_type,
144144
size=size,
145145
)
146-
147-
async with self._get_pipeline() as pipe:
148-
try:
146+
try:
147+
async with self._get_pipeline() as pipe:
149148
# redis-py typing issue
150149
await await_redis_response(pipe.hset(self._items_key, key, value_bytes)) # ty: ignore[invalid-argument-type]
151150

@@ -160,8 +159,8 @@ async def set_value(self, *, key: str, value: Any, content_type: str | None = No
160159
pipe, **MetadataUpdateParams(update_accessed_at=True, update_modified_at=True)
161160
)
162161

163-
except RedisError as e:
164-
raise StorageWriteError(e) from e
162+
except RedisError as e:
163+
raise StorageWriteError(e) from e
165164

166165
@override
167166
async def get_value(self, *, key: str) -> KeyValueStoreRecord | None:

tests/unit/storage_clients/_file_system/test_fs_dataset_client.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44
import json
55
from pathlib import Path
66
from typing import TYPE_CHECKING
7+
from unittest.mock import patch
78

89
import pytest
910

1011
from crawlee._consts import METADATA_FILENAME
1112
from crawlee.configuration import Configuration
13+
from crawlee.errors import StorageWriteError
1214
from crawlee.storage_clients import FileSystemStorageClient
1315

1416
if TYPE_CHECKING:
@@ -151,3 +153,15 @@ async def test_data_persistence_across_reopens() -> None:
151153
assert data.items[0] == test_data
152154

153155
await reopened_client.drop()
156+
157+
158+
async def test_error_handling_on_push_failure(dataset_client: FileSystemDatasetClient) -> None:
159+
"""Test that StorageWriteError is raised when file writing fails."""
160+
with (
161+
patch('crawlee.storage_clients._file_system._dataset_client.atomic_write', side_effect=OSError('disk full')),
162+
pytest.raises(StorageWriteError) as exc_info,
163+
):
164+
await dataset_client.push_data({'test': 'data'})
165+
166+
assert isinstance(exc_info.value.cause, OSError)
167+
assert str(exc_info.value.cause) == 'disk full'

tests/unit/storage_clients/_file_system/test_fs_kvs_client.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
import asyncio
44
import json
55
from typing import TYPE_CHECKING
6+
from unittest.mock import patch
67

78
import pytest
89

910
from crawlee._consts import METADATA_FILENAME
1011
from crawlee.configuration import Configuration
12+
from crawlee.errors import StorageWriteError
1113
from crawlee.storage_clients import FileSystemStorageClient
1214

1315
if TYPE_CHECKING:
@@ -199,3 +201,18 @@ async def test_data_persistence_across_reopens(configuration: Configuration) ->
199201
assert record.value == test_value
200202

201203
await reopened_client.drop()
204+
205+
206+
async def test_error_handling_on_set_failure(kvs_client: FileSystemKeyValueStoreClient) -> None:
207+
"""Test that StorageWriteError is raised when file writing fails."""
208+
with (
209+
patch(
210+
'crawlee.storage_clients._file_system._key_value_store_client.atomic_write',
211+
side_effect=OSError('disk full'),
212+
),
213+
pytest.raises(StorageWriteError) as exc_info,
214+
):
215+
await kvs_client.set_value(key='test', value='data')
216+
217+
assert isinstance(exc_info.value.cause, OSError)
218+
assert str(exc_info.value.cause) == 'disk full'

tests/unit/storage_clients/_redis/test_redis_dataset_client.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,12 @@
22

33
import asyncio
44
from typing import TYPE_CHECKING
5+
from unittest.mock import AsyncMock, MagicMock, patch
56

67
import pytest
8+
from redis.exceptions import RedisError
79

10+
from crawlee.errors import StorageWriteError
811
from crawlee.storage_clients import RedisStorageClient
912
from crawlee.storage_clients._redis._utils import await_redis_response
1013

@@ -144,3 +147,22 @@ async def test_metadata_record_updates(dataset_client: RedisDatasetClient) -> No
144147
assert metadata.created_at == initial_created
145148
assert metadata.modified_at > initial_modified
146149
assert metadata.accessed_at > accessed_after_get
150+
151+
152+
async def test_error_handling_on_push_failure(dataset_client: RedisDatasetClient) -> None:
153+
"""Test that StorageWriteError is raised when Redis writing fails."""
154+
mock_pipe = MagicMock()
155+
mock_pipe.execute = AsyncMock(side_effect=RedisError('connection lost'))
156+
157+
mock_pipeline_ctx = MagicMock()
158+
mock_pipeline_ctx.__aenter__ = AsyncMock(return_value=mock_pipe)
159+
mock_pipeline_ctx.__aexit__ = AsyncMock(return_value=None)
160+
161+
with (
162+
patch.object(dataset_client.redis, 'pipeline', return_value=mock_pipeline_ctx),
163+
pytest.raises(StorageWriteError) as exc_info,
164+
):
165+
await dataset_client.push_data({'test': 'data'})
166+
167+
assert isinstance(exc_info.value.cause, RedisError)
168+
assert str(exc_info.value.cause) == 'connection lost'

tests/unit/storage_clients/_redis/test_redis_kvs_client.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,12 @@
33
import asyncio
44
import json
55
from typing import TYPE_CHECKING
6+
from unittest.mock import AsyncMock, MagicMock, patch
67

78
import pytest
9+
from redis.exceptions import RedisError
810

11+
from crawlee.errors import StorageWriteError
912
from crawlee.storage_clients import RedisStorageClient
1013
from crawlee.storage_clients._redis._utils import await_redis_response
1114

@@ -215,3 +218,22 @@ async def test_metadata_record_updates(kvs_client: RedisKeyValueStoreClient) ->
215218
assert metadata.created_at == initial_created
216219
assert metadata.modified_at > initial_modified
217220
assert metadata.accessed_at > accessed_after_read
221+
222+
223+
async def test_error_handling_on_set_failure(kvs_client: RedisKeyValueStoreClient) -> None:
224+
"""Test that StorageWriteError is raised when Redis writing fails."""
225+
mock_pipe = MagicMock()
226+
mock_pipe.execute = AsyncMock(side_effect=RedisError('connection lost'))
227+
228+
mock_pipeline_ctx = MagicMock()
229+
mock_pipeline_ctx.__aenter__ = AsyncMock(return_value=mock_pipe)
230+
mock_pipeline_ctx.__aexit__ = AsyncMock(return_value=None)
231+
232+
with (
233+
patch.object(kvs_client.redis, 'pipeline', return_value=mock_pipeline_ctx),
234+
pytest.raises(StorageWriteError) as exc_info,
235+
):
236+
await kvs_client.set_value(key='test', value='test-value')
237+
238+
assert isinstance(exc_info.value.cause, RedisError)
239+
assert str(exc_info.value.cause) == 'connection lost'

tests/unit/storage_clients/_sql/test_sql_dataset_client.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,15 @@
22

33
import asyncio
44
from typing import TYPE_CHECKING
5+
from unittest.mock import AsyncMock, patch
56

67
import pytest
78
from sqlalchemy import inspect, select
9+
from sqlalchemy.exc import SQLAlchemyError
810
from sqlalchemy.ext.asyncio import create_async_engine
911

1012
from crawlee.configuration import Configuration
13+
from crawlee.errors import StorageWriteError
1114
from crawlee.storage_clients import SqlStorageClient
1215
from crawlee.storage_clients._sql._db_models import DatasetItemDb, DatasetMetadataDb
1316

@@ -231,3 +234,21 @@ async def test_data_persistence_across_reopens(configuration: Configuration) ->
231234
assert data.items[0] == test_data
232235

233236
await reopened_client.drop()
237+
238+
239+
async def test_error_handling_on_push_failure(dataset_client: SqlDatasetClient) -> None:
240+
"""Test that StorageWriteError is raised when SQL writing fails."""
241+
with patch(
242+
'crawlee.storage_clients._sql._dataset_client.SqlDatasetClient.get_session',
243+
) as mock_get_session:
244+
mock_session = AsyncMock()
245+
mock_session.__aenter__ = AsyncMock(return_value=mock_session)
246+
mock_session.__aexit__ = AsyncMock(return_value=None)
247+
mock_session.execute = AsyncMock(side_effect=SQLAlchemyError('db error'))
248+
mock_get_session.return_value = mock_session
249+
250+
with pytest.raises(StorageWriteError) as exc_info:
251+
await dataset_client.push_data({'key': 'value'})
252+
253+
assert isinstance(exc_info.value.cause, SQLAlchemyError)
254+
assert str(exc_info.value.cause) == 'db error'

tests/unit/storage_clients/_sql/test_sql_kvs_client.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,15 @@
33
import asyncio
44
import json
55
from typing import TYPE_CHECKING
6+
from unittest.mock import AsyncMock, patch
67

78
import pytest
89
from sqlalchemy import inspect, select
10+
from sqlalchemy.exc import SQLAlchemyError
911
from sqlalchemy.ext.asyncio import create_async_engine
1012

1113
from crawlee.configuration import Configuration
14+
from crawlee.errors import StorageWriteError
1215
from crawlee.storage_clients import SqlStorageClient
1316
from crawlee.storage_clients._sql._db_models import KeyValueStoreMetadataDb, KeyValueStoreRecordDb
1417
from crawlee.storage_clients.models import KeyValueStoreMetadata
@@ -281,3 +284,21 @@ async def test_data_persistence_across_reopens(configuration: Configuration) ->
281284
assert record.value == test_value
282285

283286
await reopened_client.drop()
287+
288+
289+
async def test_error_handling_on_set_failure(kvs_client: SqlKeyValueStoreClient) -> None:
290+
"""Test that StorageWriteError is raised when SQL writing fails."""
291+
with patch(
292+
'crawlee.storage_clients._sql._key_value_store_client.SqlKeyValueStoreClient.get_session',
293+
) as mock_get_session:
294+
mock_session = AsyncMock()
295+
mock_session.__aenter__ = AsyncMock(return_value=mock_session)
296+
mock_session.__aexit__ = AsyncMock(return_value=None)
297+
mock_session.execute = AsyncMock(side_effect=SQLAlchemyError('db error'))
298+
mock_get_session.return_value = mock_session
299+
300+
with pytest.raises(StorageWriteError) as exc_info:
301+
await kvs_client.set_value(key='test', value='test-value')
302+
303+
assert isinstance(exc_info.value.cause, SQLAlchemyError)
304+
assert str(exc_info.value.cause) == 'db error'

0 commit comments

Comments
 (0)