Skip to content

Commit 4287a56

Browse files
committed
Fixed some lint and test problems.
1 parent 06e71b3 commit 4287a56

4 files changed

Lines changed: 36 additions & 22 deletions

File tree

src/datacustomcode/io/reader/query_api.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,10 @@ def __init__(self, spark: SparkSession) -> None:
8585
)
8686

8787
def read_dlo(
88-
self, name: str, schema: Union[AtomicType, StructType, str, None] = None, row_limit: int = 1000
88+
self,
89+
name: str,
90+
schema: Union[AtomicType, StructType, str, None] = None,
91+
row_limit: int = 1000,
8992
) -> PySparkDataFrame:
9093
"""
9194
Read a Data Lake Object (DLO) from the Data Cloud, limited to a number of rows.
@@ -98,17 +101,24 @@ def read_dlo(
98101
Returns:
99102
PySparkDataFrame: The PySpark DataFrame.
100103
"""
101-
pandas_df = self._conn.get_pandas_dataframe(SQL_QUERY_TEMPLATE.format(name, row_limit))
104+
pandas_df = self._conn.get_pandas_dataframe(
105+
SQL_QUERY_TEMPLATE.format(name, row_limit)
106+
)
102107
if not schema:
103108
# auto infer schema
104109
schema = _pandas_to_spark_schema(pandas_df)
105110
spark_dataframe = self.spark.createDataFrame(pandas_df, schema)
106111
return spark_dataframe
107112

108113
def read_dmo(
109-
self, name: str, schema: Union[AtomicType, StructType, str, None] = None, row_limit: int = 1000
114+
self,
115+
name: str,
116+
schema: Union[AtomicType, StructType, str, None] = None,
117+
row_limit: int = 1000,
110118
) -> PySparkDataFrame:
111-
pandas_df = self._conn.get_pandas_dataframe(SQL_QUERY_TEMPLATE.format(name, row_limit))
119+
pandas_df = self._conn.get_pandas_dataframe(
120+
SQL_QUERY_TEMPLATE.format(name, row_limit)
121+
)
112122
if not schema:
113123
# auto infer schema
114124
schema = _pandas_to_spark_schema(pandas_df)

src/datacustomcode/io/writer/print.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,19 @@
1515

1616

1717
from pyspark.sql import DataFrame as PySparkDataFrame
18-
from pyspark.sql import SparkSession
1918

20-
from datacustomcode.io.writer.base import BaseDataCloudWriter, WriteMode
2119
from datacustomcode.io.reader.query_api import QueryAPIDataCloudReader
20+
from datacustomcode.io.writer.base import BaseDataCloudWriter, WriteMode
2221

2322

2423
class PrintDataCloudWriter(BaseDataCloudWriter):
2524
CONFIG_NAME = "PrintDataCloudWriter"
2625

2726
def validate_dataframe_columns_against_dlo(
28-
self, dataframe: PySparkDataFrame, dlo_name: str, reader: QueryAPIDataCloudReader
27+
self,
28+
dataframe: PySparkDataFrame,
29+
dlo_name: str,
30+
reader: QueryAPIDataCloudReader,
2931
) -> None:
3032
"""
3133
Validates that all columns in the given dataframe exist in the DLO schema.
@@ -36,7 +38,8 @@ def validate_dataframe_columns_against_dlo(
3638
reader (QueryAPIDataCloudReader): The reader to use for schema retrieval.
3739
3840
Raises:
39-
ValueError: If any columns in the dataframe are not present in the DLO schema.
41+
ValueError: If any columns in the dataframe are not present in the DLO
42+
schema.
4043
"""
4144
# Get DLO schema (no data, just schema)
4245
dlo_df = reader.read_dlo(dlo_name, row_limit=0)
@@ -47,23 +50,22 @@ def validate_dataframe_columns_against_dlo(
4750
extra_columns = df_columns - dlo_columns
4851
if extra_columns:
4952
raise ValueError(
50-
f"The following columns are not present in the DLO '{dlo_name}': {sorted(extra_columns)}.\n"
53+
"The following columns are not present in the \n"
54+
f"DLO '{dlo_name}': {sorted(extra_columns)}.\n"
5155
"To fix this error, you can either:\n"
5256
" - Drop these columns from your DataFrame before writing, e.g.,\n"
5357
" dataframe = dataframe.drop({cols})\n"
54-
" - Or, add these columns to the DLO schema in Data Cloud."
55-
.format(cols=sorted(extra_columns))
58+
" - Or, add these columns to the DLO schema in Data Cloud.".format(
59+
cols=sorted(extra_columns)
60+
)
5661
)
5762

58-
5963
def write_to_dlo(
6064
self, name: str, dataframe: PySparkDataFrame, write_mode: WriteMode
6165
) -> None:
62-
# Create SparkSession if not already created
63-
spark = SparkSession.builder.appName("YourAppName").getOrCreate()
6466

6567
# Instantiate the reader
66-
reader = QueryAPIDataCloudReader(spark)
68+
reader = QueryAPIDataCloudReader(self.spark)
6769

6870
# Validate columns before proceeding
6971
self.validate_dataframe_columns_against_dlo(dataframe, name, reader)
@@ -73,7 +75,8 @@ def write_to_dlo(
7375
def write_to_dmo(
7476
self, name: str, dataframe: PySparkDataFrame, write_mode: WriteMode
7577
) -> None:
76-
#The way its validating for DLO and dataframes columns,
77-
# its not going to work for DMO because DMO may not exists, so just show the dataframe.
78+
# The way its validating for DLO and dataframes columns,
79+
# its not going to work for DMO because DMO may not exists,
80+
# so just show the dataframe.
7881

7982
dataframe.show()

src/datacustomcode/scan.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
import ast
1818
import os
19+
import sys
1920
from typing import (
2021
Any,
2122
ClassVar,
@@ -25,7 +26,6 @@
2526
)
2627

2728
import pydantic
28-
import sys
2929

3030
from datacustomcode.version import get_version
3131

@@ -43,6 +43,7 @@
4343

4444
STANDARD_LIBS = set(sys.stdlib_module_names)
4545

46+
4647
class DataAccessLayerCalls(pydantic.BaseModel):
4748
read_dlo: frozenset[str]
4849
read_dmo: frozenset[str]

tests/io/reader/test_query_api.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ def test_read_dlo(
143143

144144
# Verify get_pandas_dataframe was called with the right SQL
145145
mock_connection.get_pandas_dataframe.assert_called_once_with(
146-
SQL_QUERY_TEMPLATE.format("test_dlo")
146+
SQL_QUERY_TEMPLATE.format("test_dlo", 1000)
147147
)
148148

149149
# Verify DataFrame was created with auto-inferred schema
@@ -172,7 +172,7 @@ def test_read_dlo_with_schema(
172172

173173
# Verify get_pandas_dataframe was called with the right SQL
174174
mock_connection.get_pandas_dataframe.assert_called_once_with(
175-
SQL_QUERY_TEMPLATE.format("test_dlo")
175+
SQL_QUERY_TEMPLATE.format("test_dlo", 1000)
176176
)
177177

178178
# Verify DataFrame was created with provided schema
@@ -192,7 +192,7 @@ def test_read_dmo(
192192

193193
# Verify get_pandas_dataframe was called with the right SQL
194194
mock_connection.get_pandas_dataframe.assert_called_once_with(
195-
SQL_QUERY_TEMPLATE.format("test_dmo")
195+
SQL_QUERY_TEMPLATE.format("test_dmo", 1000)
196196
)
197197

198198
# Verify DataFrame was created
@@ -220,7 +220,7 @@ def test_read_dmo_with_schema(
220220

221221
# Verify get_pandas_dataframe was called with the right SQL
222222
mock_connection.get_pandas_dataframe.assert_called_once_with(
223-
SQL_QUERY_TEMPLATE.format("test_dmo")
223+
SQL_QUERY_TEMPLATE.format("test_dmo", 1000)
224224
)
225225

226226
# Verify DataFrame was created with provided schema

0 commit comments

Comments
 (0)