Fixed some lint and test problems.

sbyrne-sf · sbyrne-sf · commit 4287a56bc1ce · 2025-05-30T17:42:15.000-04:00
diff --git a/src/datacustomcode/io/reader/query_api.py b/src/datacustomcode/io/reader/query_api.py
@@ -85,7 +85,10 @@ def __init__(self, spark: SparkSession) -> None:
         )
 
     def read_dlo(
-        self, name: str, schema: Union[AtomicType, StructType, str, None] = None, row_limit: int = 1000
+        self,
+        name: str,
+        schema: Union[AtomicType, StructType, str, None] = None,
+        row_limit: int = 1000,
     ) -> PySparkDataFrame:
         """
         Read a Data Lake Object (DLO) from the Data Cloud, limited to a number of rows.
@@ -98,17 +101,24 @@ def read_dlo(
         Returns:
             PySparkDataFrame: The PySpark DataFrame.
         """
-        pandas_df = self._conn.get_pandas_dataframe(SQL_QUERY_TEMPLATE.format(name, row_limit))
+        pandas_df = self._conn.get_pandas_dataframe(
+            SQL_QUERY_TEMPLATE.format(name, row_limit)
+        )
         if not schema:
             # auto infer schema
             schema = _pandas_to_spark_schema(pandas_df)
         spark_dataframe = self.spark.createDataFrame(pandas_df, schema)
         return spark_dataframe
 
     def read_dmo(
-        self, name: str, schema: Union[AtomicType, StructType, str, None] = None, row_limit: int = 1000
+        self,
+        name: str,
+        schema: Union[AtomicType, StructType, str, None] = None,
+        row_limit: int = 1000,
     ) -> PySparkDataFrame:
-        pandas_df = self._conn.get_pandas_dataframe(SQL_QUERY_TEMPLATE.format(name, row_limit))
+        pandas_df = self._conn.get_pandas_dataframe(
+            SQL_QUERY_TEMPLATE.format(name, row_limit)
+        )
         if not schema:
             # auto infer schema
             schema = _pandas_to_spark_schema(pandas_df)
diff --git a/src/datacustomcode/io/writer/print.py b/src/datacustomcode/io/writer/print.py
@@ -15,17 +15,19 @@
 
 
 from pyspark.sql import DataFrame as PySparkDataFrame
-from pyspark.sql import SparkSession
 
-from datacustomcode.io.writer.base import BaseDataCloudWriter, WriteMode
 from datacustomcode.io.reader.query_api import QueryAPIDataCloudReader
+from datacustomcode.io.writer.base import BaseDataCloudWriter, WriteMode
 
 
 class PrintDataCloudWriter(BaseDataCloudWriter):
     CONFIG_NAME = "PrintDataCloudWriter"
 
     def validate_dataframe_columns_against_dlo(
-        self, dataframe: PySparkDataFrame, dlo_name: str, reader: QueryAPIDataCloudReader
+        self,
+        dataframe: PySparkDataFrame,
+        dlo_name: str,
+        reader: QueryAPIDataCloudReader,
     ) -> None:
         """
         Validates that all columns in the given dataframe exist in the DLO schema.
@@ -36,7 +38,8 @@ def validate_dataframe_columns_against_dlo(
             reader (QueryAPIDataCloudReader): The reader to use for schema retrieval.
 
         Raises:
-            ValueError: If any columns in the dataframe are not present in the DLO schema.
+            ValueError: If any columns in the dataframe are not present in the DLO
+            schema.
         """
         # Get DLO schema (no data, just schema)
         dlo_df = reader.read_dlo(dlo_name, row_limit=0)
@@ -47,23 +50,22 @@ def validate_dataframe_columns_against_dlo(
         extra_columns = df_columns - dlo_columns
         if extra_columns:
             raise ValueError(
-                f"The following columns are not present in the DLO '{dlo_name}': {sorted(extra_columns)}.\n"
+                "The following columns are not present in the \n"
+                f"DLO '{dlo_name}': {sorted(extra_columns)}.\n"
                 "To fix this error, you can either:\n"
                 "  - Drop these columns from your DataFrame before writing, e.g.,\n"
                 "      dataframe = dataframe.drop({cols})\n"
-                "  - Or, add these columns to the DLO schema in Data Cloud."
-                .format(cols=sorted(extra_columns))
+                "  - Or, add these columns to the DLO schema in Data Cloud.".format(
+                    cols=sorted(extra_columns)
+                )
             )
 
-
     def write_to_dlo(
         self, name: str, dataframe: PySparkDataFrame, write_mode: WriteMode
     ) -> None:
-        # Create SparkSession if not already created
-        spark = SparkSession.builder.appName("YourAppName").getOrCreate()
 
         # Instantiate the reader
-        reader = QueryAPIDataCloudReader(spark)
+        reader = QueryAPIDataCloudReader(self.spark)
 
         # Validate columns before proceeding
         self.validate_dataframe_columns_against_dlo(dataframe, name, reader)
@@ -73,7 +75,8 @@ def write_to_dlo(
     def write_to_dmo(
         self, name: str, dataframe: PySparkDataFrame, write_mode: WriteMode
     ) -> None:
-        #The way its validating for DLO and dataframes columns, 
-        # its not going to work for DMO because DMO may not exists, so just show the dataframe.
+        # The way its validating for DLO and dataframes columns,
+        # its not going to work for DMO because DMO may not exists,
+        # so just show the dataframe.
 
         dataframe.show()
diff --git a/src/datacustomcode/scan.py b/src/datacustomcode/scan.py
@@ -16,6 +16,7 @@
 
 import ast
 import os
+import sys
 from typing import (
     Any,
     ClassVar,
@@ -25,7 +26,6 @@
 )
 
 import pydantic
-import sys
 
 from datacustomcode.version import get_version
 
@@ -43,6 +43,7 @@
 
 STANDARD_LIBS = set(sys.stdlib_module_names)
 
+
 class DataAccessLayerCalls(pydantic.BaseModel):
     read_dlo: frozenset[str]
     read_dmo: frozenset[str]
diff --git a/tests/io/reader/test_query_api.py b/tests/io/reader/test_query_api.py
@@ -143,7 +143,7 @@ def test_read_dlo(
 
         # Verify get_pandas_dataframe was called with the right SQL
         mock_connection.get_pandas_dataframe.assert_called_once_with(
-            SQL_QUERY_TEMPLATE.format("test_dlo")
+            SQL_QUERY_TEMPLATE.format("test_dlo", 1000)
         )
 
         # Verify DataFrame was created with auto-inferred schema
@@ -172,7 +172,7 @@ def test_read_dlo_with_schema(
 
         # Verify get_pandas_dataframe was called with the right SQL
         mock_connection.get_pandas_dataframe.assert_called_once_with(
-            SQL_QUERY_TEMPLATE.format("test_dlo")
+            SQL_QUERY_TEMPLATE.format("test_dlo", 1000)
         )
 
         # Verify DataFrame was created with provided schema
@@ -192,7 +192,7 @@ def test_read_dmo(
 
         # Verify get_pandas_dataframe was called with the right SQL
         mock_connection.get_pandas_dataframe.assert_called_once_with(
-            SQL_QUERY_TEMPLATE.format("test_dmo")
+            SQL_QUERY_TEMPLATE.format("test_dmo", 1000)
         )
 
         # Verify DataFrame was created
@@ -220,7 +220,7 @@ def test_read_dmo_with_schema(
 
         # Verify get_pandas_dataframe was called with the right SQL
         mock_connection.get_pandas_dataframe.assert_called_once_with(
-            SQL_QUERY_TEMPLATE.format("test_dmo")
+            SQL_QUERY_TEMPLATE.format("test_dmo", 1000)
         )
 
         # Verify DataFrame was created with provided schema