3737
3838
3939SQL_QUERY_TEMPLATE : Final = "SELECT * FROM {} LIMIT {}"
40+ SQL_QUERY_TEMPLATE_NO_LIMIT : Final = "SELECT * FROM {}"
4041
4142
4243def create_cdp_connection (
@@ -122,6 +123,7 @@ def __init__(
122123 credentials_profile : str = "default" ,
123124 dataspace : Optional [str ] = None ,
124125 sf_cli_org : Optional [str ] = None ,
126+ default_row_limit : Optional [int ] = None ,
125127 ) -> None :
126128 """Initialize QueryAPIDataCloudReader.
127129
@@ -137,8 +139,12 @@ def __init__(
137139 reader delegates to :class:`SFCLIDataCloudReader` which calls
138140 the Data Cloud REST API directly using the token obtained from
139141 ``sf org display``, bypassing the CDP token-exchange flow.
142+ default_row_limit: Default maximum number of rows to fetch when
143+ ``row_limit`` is not explicitly passed to read methods. When
144+ ``None``, no limit is applied (all rows are returned).
140145 """
141146 self .spark = spark
147+ self ._default_row_limit = default_row_limit
142148 if sf_cli_org :
143149 logger .debug (
144150 f"Initializing QueryAPIDataCloudReader with SF CLI org '{ sf_cli_org } '"
@@ -147,6 +153,7 @@ def __init__(
147153 spark = spark ,
148154 sf_cli_org = sf_cli_org ,
149155 dataspace = dataspace ,
156+ default_row_limit = default_row_limit ,
150157 )
151158 self ._conn = None
152159 else :
@@ -158,19 +165,37 @@ def __init__(
158165 )
159166 self ._conn = create_cdp_connection (credentials , dataspace )
160167
168+ def _build_query (self , name : str , row_limit : Optional [int ]) -> str :
169+ """Build a SQL query, applying the default row limit when needed.
170+
171+ Args:
172+ name: Object name to query.
173+ row_limit: Explicit row limit, or ``None`` to use the configured default.
174+
175+ Returns:
176+ SQL query string.
177+ """
178+ effective_limit = (
179+ row_limit if row_limit is not None else self ._default_row_limit
180+ )
181+ if effective_limit is not None :
182+ return SQL_QUERY_TEMPLATE .format (name , effective_limit )
183+ return SQL_QUERY_TEMPLATE_NO_LIMIT .format (name )
184+
161185 def read_dlo (
162186 self ,
163187 name : str ,
164188 schema : Union [AtomicType , StructType , str , None ] = None ,
165- row_limit : int = 1000 ,
189+ row_limit : Optional [ int ] = None ,
166190 ) -> PySparkDataFrame :
167191 """
168- Read a Data Lake Object (DLO) from the Data Cloud, limited to a number of rows .
192+ Read a Data Lake Object (DLO) from the Data Cloud.
169193
170194 Args:
171195 name (str): The name of the DLO.
172196 schema (Optional[Union[AtomicType, StructType, str]]): Schema of the DLO.
173- row_limit (int): Maximum number of rows to fetch.
197+ row_limit (Optional[int]): Maximum number of rows to fetch.
198+ When ``None``, the configured ``default_row_limit`` is used.
174199
175200 Returns:
176201 PySparkDataFrame: The PySpark DataFrame.
@@ -181,7 +206,7 @@ def read_dlo(
181206 if sf_cli_reader is not None :
182207 return sf_cli_reader .read_dlo (name , schema , row_limit )
183208
184- query = SQL_QUERY_TEMPLATE . format (name , row_limit )
209+ query = self . _build_query (name , row_limit )
185210
186211 assert self ._conn is not None
187212 pandas_df = self ._conn .get_pandas_dataframe (query )
@@ -197,15 +222,16 @@ def read_dmo(
197222 self ,
198223 name : str ,
199224 schema : Union [AtomicType , StructType , str , None ] = None ,
200- row_limit : int = 1000 ,
225+ row_limit : Optional [ int ] = None ,
201226 ) -> PySparkDataFrame :
202227 """
203- Read a Data Model Object (DMO) from the Data Cloud, limited to a number of rows .
228+ Read a Data Model Object (DMO) from the Data Cloud.
204229
205230 Args:
206231 name (str): The name of the DMO.
207232 schema (Optional[Union[AtomicType, StructType, str]]): Schema of the DMO.
208- row_limit (int): Maximum number of rows to fetch.
233+ row_limit (Optional[int]): Maximum number of rows to fetch.
234+ When ``None``, the configured ``default_row_limit`` is used.
209235
210236 Returns:
211237 PySparkDataFrame: The PySpark DataFrame.
@@ -216,7 +242,7 @@ def read_dmo(
216242 if sf_cli_reader is not None :
217243 return sf_cli_reader .read_dmo (name , schema , row_limit )
218244
219- query = SQL_QUERY_TEMPLATE . format (name , row_limit )
245+ query = self . _build_query (name , row_limit )
220246
221247 assert self ._conn is not None
222248 pandas_df = self ._conn .get_pandas_dataframe (query )
0 commit comments