11from __future__ import annotations
22
3+ import asyncio
34import logging
5+ from datetime import timedelta
46from io import StringIO
57from typing import TYPE_CHECKING , overload
68
911from crawlee import service_locator
1012from crawlee ._utils .docs import docs_group
1113from crawlee ._utils .file import export_csv_to_stream , export_json_to_stream
14+ from crawlee .errors import StorageWriteError
1215
1316from ._base import Storage
1417from ._key_value_store import KeyValueStore
@@ -134,7 +137,13 @@ async def drop(self) -> None:
134137 async def purge (self ) -> None :
135138 await self ._client .purge ()
136139
137- async def push_data (self , data : list [dict [str , Any ]] | dict [str , Any ]) -> None :
140+ async def push_data (
141+ self ,
142+ data : list [dict [str , Any ]] | dict [str , Any ],
143+ * ,
144+ max_attempts : int = 5 ,
145+ wait_time_between_retries : timedelta = timedelta (seconds = 1 ),
146+ ) -> None :
138147 """Store an object or an array of objects to the dataset.
139148
140149 The size of the data is limited by the receiving API and therefore `push_data()` will only
@@ -144,8 +153,26 @@ async def push_data(self, data: list[dict[str, Any]] | dict[str, Any]) -> None:
144153 Args:
145154 data: A JSON serializable data structure to be stored in the dataset. The JSON representation
146155 of each item must be smaller than 9MB.
156+ max_attempts: The maximum number of attempts to push data in case of failure.
157+ wait_time_between_retries: The time to wait between retries in case of failure.
147158 """
148- await self ._client .push_data (data = data )
159+ if max_attempts < 1 :
160+ raise ValueError ('max_attempts must be at least 1' )
161+
162+ wait_time_between_retries_seconds = wait_time_between_retries .total_seconds ()
163+ last_exception : StorageWriteError | None = None
164+
165+ for attempt in range (max_attempts ):
166+ try :
167+ await self ._client .push_data (data = data )
168+ break
169+ except StorageWriteError as e :
170+ last_exception = e
171+ if attempt < max_attempts - 1 :
172+ await asyncio .sleep (wait_time_between_retries_seconds )
173+ else :
174+ if last_exception :
175+ logger .warning (f'Failed to push data after { max_attempts } attempts with error: { last_exception .cause } ' )
149176
150177 async def get_data (
151178 self ,
0 commit comments