remove row_limit parameter completely

zach-maddox · zach-maddox · commit 11ff105be90e · 2026-04-16T16:50:48.000-04:00
diff --git a/src/datacustomcode/client.py b/src/datacustomcode/client.py
@@ -185,39 +185,29 @@ def _new_function_client(cls) -> Client:
         )
         return cls._instance
 
-    def read_dlo(
-        self, name: str, row_limit: Optional[int] = None
-    ) -> PySparkDataFrame:
+    def read_dlo(self, name: str) -> PySparkDataFrame:
         """Read a DLO from Data Cloud.
 
         Args:
             name: The name of the DLO to read.
-            row_limit: Maximum number of rows to fetch. When ``None``, the
-                reader's configured ``default_row_limit`` is used (1000 for
-                local development, no limit when deployed).
 
         Returns:
             A PySpark DataFrame containing the DLO data.
         """
         self._record_dlo_access(name)
-        return self._reader.read_dlo(name, row_limit=row_limit)
+        return self._reader.read_dlo(name)
 
-    def read_dmo(
-        self, name: str, row_limit: Optional[int] = None
-    ) -> PySparkDataFrame:
+    def read_dmo(self, name: str) -> PySparkDataFrame:
         """Read a DMO from Data Cloud.
 
         Args:
             name: The name of the DMO to read.
-            row_limit: Maximum number of rows to fetch. When ``None``, the
-                reader's configured ``default_row_limit`` is used (1000 for
-                local development, no limit when deployed).
 
         Returns:
             A PySpark DataFrame containing the DMO data.
         """
         self._record_dmo_access(name)
-        return self._reader.read_dmo(name, row_limit=row_limit)
+        return self._reader.read_dmo(name)
 
     def write_to_dlo(
         self, name: str, dataframe: PySparkDataFrame, write_mode: WriteMode, **kwargs
diff --git a/src/datacustomcode/io/reader/base.py b/src/datacustomcode/io/reader/base.py
@@ -15,7 +15,7 @@
 from __future__ import annotations
 
 from abc import abstractmethod
-from typing import TYPE_CHECKING, Optional, Union
+from typing import TYPE_CHECKING, Union
 
 from datacustomcode.io.base import BaseDataAccessLayer
 
@@ -33,13 +33,11 @@ def read_dlo(
         self,
         name: str,
         schema: Union[AtomicType, StructType, str, None] = None,
-        row_limit: Optional[int] = None,
     ) -> PySparkDataFrame: ...
 
     @abstractmethod
     def read_dmo(
         self,
         name: str,
         schema: Union[AtomicType, StructType, str, None] = None,
-        row_limit: Optional[int] = None,
     ) -> PySparkDataFrame: ...
diff --git a/src/datacustomcode/io/reader/query_api.py b/src/datacustomcode/io/reader/query_api.py
@@ -139,9 +139,9 @@ def __init__(
                 reader delegates to :class:`SFCLIDataCloudReader` which calls
                 the Data Cloud REST API directly using the token obtained from
                 ``sf org display``, bypassing the CDP token-exchange flow.
-            default_row_limit: Default maximum number of rows to fetch when
-                ``row_limit`` is not explicitly passed to read methods. When
-                ``None``, no limit is applied (all rows are returned).
+            default_row_limit: Maximum number of rows to fetch automatically.
+                When ``None``, no limit is applied (all rows are returned).
+                Set via ``default_row_limit`` in ``config.yaml`` reader options.
         """
         self.spark = spark
         self._default_row_limit = default_row_limit
@@ -165,37 +165,30 @@ def __init__(
             )
             self._conn = create_cdp_connection(credentials, dataspace)
 
-    def _build_query(self, name: str, row_limit: Optional[int]) -> str:
-        """Build a SQL query, applying the default row limit when needed.
+    def _build_query(self, name: str) -> str:
+        """Build a SQL query, applying the configured default row limit.
 
         Args:
             name: Object name to query.
-            row_limit: Explicit row limit, or ``None`` to use the configured default.
 
         Returns:
             SQL query string.
         """
-        effective_limit = (
-            row_limit if row_limit is not None else self._default_row_limit
-        )
-        if effective_limit is not None:
-            return SQL_QUERY_TEMPLATE.format(name, effective_limit)
+        if self._default_row_limit is not None:
+            return SQL_QUERY_TEMPLATE.format(name, self._default_row_limit)
         return SQL_QUERY_TEMPLATE_NO_LIMIT.format(name)
 
     def read_dlo(
         self,
         name: str,
         schema: Union[AtomicType, StructType, str, None] = None,
-        row_limit: Optional[int] = None,
     ) -> PySparkDataFrame:
         """
         Read a Data Lake Object (DLO) from the Data Cloud.
 
         Args:
             name (str): The name of the DLO.
             schema (Optional[Union[AtomicType, StructType, str]]): Schema of the DLO.
-            row_limit (Optional[int]): Maximum number of rows to fetch.
-                When ``None``, the configured ``default_row_limit`` is used.
 
         Returns:
             PySparkDataFrame: The PySpark DataFrame.
@@ -204,9 +197,9 @@ def read_dlo(
             self, "_sf_cli_reader", None
         )
         if sf_cli_reader is not None:
-            return sf_cli_reader.read_dlo(name, schema, row_limit)
+            return sf_cli_reader.read_dlo(name, schema)
 
-        query = self._build_query(name, row_limit)
+        query = self._build_query(name)
 
         assert self._conn is not None
         pandas_df = self._conn.get_pandas_dataframe(query)
@@ -222,16 +215,13 @@ def read_dmo(
         self,
         name: str,
         schema: Union[AtomicType, StructType, str, None] = None,
-        row_limit: Optional[int] = None,
     ) -> PySparkDataFrame:
         """
         Read a Data Model Object (DMO) from the Data Cloud.
 
         Args:
             name (str): The name of the DMO.
             schema (Optional[Union[AtomicType, StructType, str]]): Schema of the DMO.
-            row_limit (Optional[int]): Maximum number of rows to fetch.
-                When ``None``, the configured ``default_row_limit`` is used.
 
         Returns:
             PySparkDataFrame: The PySpark DataFrame.
@@ -240,9 +230,9 @@ def read_dmo(
             self, "_sf_cli_reader", None
         )
         if sf_cli_reader is not None:
-            return sf_cli_reader.read_dmo(name, schema, row_limit)
+            return sf_cli_reader.read_dmo(name, schema)
 
-        query = self._build_query(name, row_limit)
+        query = self._build_query(name)
 
         assert self._conn is not None
         pandas_df = self._conn.get_pandas_dataframe(query)
diff --git a/src/datacustomcode/io/reader/sf_cli.py b/src/datacustomcode/io/reader/sf_cli.py
@@ -65,9 +65,9 @@ def __init__(
                 (e.g. the alias given to ``sf org login web --alias dev1``).
             dataspace: Optional dataspace identifier.  If ``None`` or
                 ``"default"`` the query runs against the default dataspace.
-            default_row_limit: Default maximum number of rows to fetch when
-                ``row_limit`` is not explicitly passed to read methods. When
-                ``None``, no limit is applied (all rows are returned).
+            default_row_limit: Maximum number of rows to fetch automatically.
+                When ``None``, no limit is applied (all rows are returned).
+                Set via ``default_row_limit`` in ``config.yaml`` reader options.
         """
         self.spark = spark
         self.sf_cli_org = sf_cli_org
@@ -137,12 +137,14 @@ def _get_token(self) -> tuple[str, str]:
         logger.debug(f"Fetched token from SF CLI for org '{self.sf_cli_org}'")
         return access_token, instance_url
 
-    def _execute_query(self, sql: str, row_limit: Optional[int]) -> pd.DataFrame:
+    def _execute_query(self, sql: str) -> pd.DataFrame:
         """Execute *sql* against the Data Cloud REST endpoint.
 
+        The configured ``default_row_limit`` is automatically appended as a
+        ``LIMIT`` clause when set (typically for local development).
+
         Args:
             sql: Base SQL query (no ``LIMIT`` clause).
-            row_limit: Maximum rows to return, or ``None`` for no limit.
 
         Returns:
             Pandas DataFrame with query results.
@@ -152,14 +154,11 @@ def _execute_query(self, sql: str, row_limit: Optional[int]) -> pd.DataFrame:
         """
         access_token, instance_url = self._get_token()
 
-        effective_limit = (
-            row_limit if row_limit is not None else self._default_row_limit
-        )
         url = f"{instance_url}/services/data/{API_VERSION}/ssot/query-sql"
         headers = {"Authorization": f"Bearer {access_token}"}
         params = {"dataspace": self.dataspace}
-        if effective_limit is not None:
-            body = {"sql": f"{sql} LIMIT {effective_limit}"}
+        if self._default_row_limit is not None:
+            body = {"sql": f"{sql} LIMIT {self._default_row_limit}"}
         else:
             body = {"sql": sql}
 
@@ -201,19 +200,17 @@ def read_dlo(
         self,
         name: str,
         schema: Union[AtomicType, StructType, str, None] = None,
-        row_limit: Optional[int] = None,
     ) -> PySparkDataFrame:
         """Read a Data Lake Object (DLO) from Data Cloud.
 
         Args:
             name: DLO name.
             schema: Optional explicit schema.
-            row_limit: Maximum rows to fetch, or ``None`` to use the configured default.
 
         Returns:
             PySpark DataFrame.
         """
-        pandas_df = self._execute_query(f"SELECT * FROM {name}", row_limit)
+        pandas_df = self._execute_query(f"SELECT * FROM {name}")
         if not schema:
             schema = _pandas_to_spark_schema(pandas_df)
         return self.spark.createDataFrame(pandas_df, schema)
@@ -222,19 +219,17 @@ def read_dmo(
         self,
         name: str,
         schema: Union[AtomicType, StructType, str, None] = None,
-        row_limit: Optional[int] = None,
     ) -> PySparkDataFrame:
         """Read a Data Model Object (DMO) from Data Cloud.
 
         Args:
             name: DMO name.
             schema: Optional explicit schema.
-            row_limit: Maximum rows to fetch, or ``None`` to use the configured default.
 
         Returns:
             PySpark DataFrame.
         """
-        pandas_df = self._execute_query(f"SELECT * FROM {name}", row_limit)
+        pandas_df = self._execute_query(f"SELECT * FROM {name}")
         if not schema:
             schema = _pandas_to_spark_schema(pandas_df)
         return self.spark.createDataFrame(pandas_df, schema)
diff --git a/src/datacustomcode/io/writer/print.py b/src/datacustomcode/io/writer/print.py
@@ -90,7 +90,7 @@ def validate_dataframe_columns_against_dlo(
             schema.
         """
         # Get DLO schema (no data, just schema)
-        dlo_df = self.reader.read_dlo(dlo_name, row_limit=0)
+        dlo_df = self.reader.read_dlo(dlo_name).limit(0)
         dlo_columns = set(dlo_df.columns)
         df_columns = set(dataframe.columns)
 
diff --git a/tests/io/reader/test_query_api.py b/tests/io/reader/test_query_api.py
@@ -296,30 +296,6 @@ def test_read_dmo_with_schema(
         args, _ = reader_without_init.spark.createDataFrame.call_args
         assert args[1] is custom_schema
 
-    def test_read_dlo_with_custom_row_limit(
-        self, reader_without_init, mock_connection, mock_pandas_dataframe
-    ):
-        """Test read_dlo method with custom row_limit."""
-        reader_without_init._conn = mock_connection
-
-        reader_without_init.read_dlo("test_dlo", row_limit=50)
-
-        mock_connection.get_pandas_dataframe.assert_called_once_with(
-            SQL_QUERY_TEMPLATE.format("test_dlo", 50)
-        )
-
-    def test_read_dmo_with_custom_row_limit(
-        self, reader_without_init, mock_connection, mock_pandas_dataframe
-    ):
-        """Test read_dmo method with custom row_limit."""
-        reader_without_init._conn = mock_connection
-
-        reader_without_init.read_dmo("test_dmo", row_limit=25)
-
-        mock_connection.get_pandas_dataframe.assert_called_once_with(
-            SQL_QUERY_TEMPLATE.format("test_dmo", 25)
-        )
-
     def test_read_dlo_schema_is_lowercase(
         self, reader_without_init, mock_connection, mock_pandas_dataframe
     ):
@@ -414,12 +390,3 @@ def test_read_dmo_no_limit_when_deployed(
             SQL_QUERY_TEMPLATE_NO_LIMIT.format("test_dmo")
         )
 
-    def test_read_dlo_explicit_limit_still_applied_when_deployed(
-        self, reader_no_limit, mock_connection, mock_pandas_dataframe
-    ):
-        """An explicit row_limit always applies, even without a default."""
-        reader_no_limit._conn = mock_connection
-        reader_no_limit.read_dlo("test_dlo", row_limit=500)
-        mock_connection.get_pandas_dataframe.assert_called_once_with(
-            SQL_QUERY_TEMPLATE.format("test_dlo", 500)
-        )
diff --git a/tests/io/reader/test_sf_cli.py b/tests/io/reader/test_sf_cli.py
diff --git a/tests/io/writer/test_print.py b/tests/io/writer/test_print.py
diff --git a/tests/test_client.py b/tests/test_client.py