Skip to content

Commit 984dada

Browse files
committed
refactoring
Updating README
1 parent 4d6a37d commit 984dada

7 files changed

Lines changed: 246 additions & 59 deletions

File tree

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ Your Python dependencies can be packaged as .py files, .zip archives (containing
124124
Your entry point script will define logic using the `Client` object which wraps data access layers.
125125

126126
You should only need the following methods:
127+
* `file_open(file_name)` - Returns a file handle of the provided file_name
127128
* `read_dlo(name)` – Read from a Data Lake Object by name
128129
* `read_dmo(name)` – Read from a Data Model Object by name
129130
* `write_to_dlo(name, spark_dataframe, write_mode)` – Write to a Data Model Object by name with a Spark dataframe

src/datacustomcode/client.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,19 @@
1414
# limitations under the License.
1515
from __future__ import annotations
1616

17-
import io
18-
1917
from enum import Enum
18+
import io
2019
from typing import (
2120
TYPE_CHECKING,
2221
ClassVar,
2322
Optional,
2423
)
24+
2525
from pyspark.sql import SparkSession
26+
2627
from datacustomcode.config import SparkConfig, config
28+
from datacustomcode.file.reader.default import DefaultFileReader
2729
from datacustomcode.io.reader.base import BaseDataCloudReader
28-
from datacustomcode.file.reader.base import BaseFileReader
2930

3031
if TYPE_CHECKING:
3132
from pyspark.sql import DataFrame as PySparkDataFrame
@@ -113,7 +114,7 @@ class Client:
113114
_instance: ClassVar[Optional[Client]] = None
114115
_reader: BaseDataCloudReader
115116
_writer: BaseDataCloudWriter
116-
_file: BaseFileReader
117+
_file: DefaultFileReader
117118
_data_layer_history: dict[DataCloudObjectType, set[str]]
118119

119120
def __new__(
@@ -156,7 +157,7 @@ def __new__(
156157
writer_init = writer
157158
cls._instance._reader = reader_init
158159
cls._instance._writer = writer_init
159-
cls._instance._file = BaseFileReader()
160+
cls._instance._file = DefaultFileReader()
160161
cls._instance._data_layer_history = {
161162
DataCloudObjectType.DLO: set(),
162163
DataCloudObjectType.DMO: set(),
@@ -216,8 +217,7 @@ def write_to_dmo(
216217
return self._writer.write_to_dmo(name, dataframe, write_mode, **kwargs)
217218

218219
def file_open(self, file_name: str) -> io.TextIOWrapper:
219-
"""Read a file from the local file system.
220-
"""
220+
"""Read a file from the local file system."""
221221

222222
return self._file.file_open(file_name)
223223

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Copyright (c) 2025, Salesforce, Inc.
2+
# SPDX-License-Identifier: Apache-2
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.

src/datacustomcode/file/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,4 @@
1818

1919

2020
class BaseDataAccessLayer(ABC):
21-
pass
21+
pass
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Copyright (c) 2025, Salesforce, Inc.
2+
# SPDX-License-Identifier: Apache-2
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.

src/datacustomcode/file/reader/base.py

Lines changed: 0 additions & 51 deletions
This file was deleted.
Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
# Copyright (c) 2025, Salesforce, Inc.
2+
# SPDX-License-Identifier: Apache-2
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
from __future__ import annotations
16+
17+
import io
18+
import os
19+
from pathlib import Path
20+
from typing import Optional
21+
22+
from datacustomcode.file.base import BaseDataAccessLayer
23+
24+
25+
class FileReaderError(Exception):
26+
"""Base exception for file reader operations."""
27+
28+
29+
class FileNotFoundError(FileReaderError):
30+
"""Raised when a file cannot be found."""
31+
32+
33+
class FileAccessError(FileReaderError):
34+
"""Raised when there's an error accessing a file."""
35+
36+
37+
class DefaultFileReader(BaseDataAccessLayer):
38+
"""Base class for file reading operations.
39+
40+
This class provides a framework for reading files from various locations
41+
with configurable search strategies and error handling.
42+
"""
43+
44+
# Default configuration values
45+
DEFAULT_CODE_PACKAGE = "payload"
46+
DEFAULT_FILE_FOLDER = "files"
47+
DEFAULT_CONFIG_FILE = "config.json"
48+
49+
def __init__(
50+
self,
51+
code_package: Optional[str] = None,
52+
file_folder: Optional[str] = None,
53+
config_file: Optional[str] = None,
54+
):
55+
"""Initialize the file reader with configuration.
56+
57+
Args:
58+
code_package: The default code package directory to search
59+
file_folder: The folder containing files relative to the code package
60+
config_file: The configuration file to use for path resolution
61+
"""
62+
self.code_package = code_package or self.DEFAULT_CODE_PACKAGE
63+
self.file_folder = file_folder or self.DEFAULT_FILE_FOLDER
64+
self.config_file = config_file or self.DEFAULT_CONFIG_FILE
65+
66+
def file_open(self, file_name: str) -> io.TextIOWrapper:
67+
"""Open a file for reading.
68+
69+
Args:
70+
file_name: The name of the file to open
71+
72+
Returns:
73+
A file handle for reading
74+
75+
Raises:
76+
FileNotFoundError: If the file cannot be found
77+
FileAccessError: If there's an error opening the file
78+
"""
79+
if not file_name:
80+
raise ValueError("file_name cannot be empty")
81+
82+
file_path = self._resolve_file_path(file_name)
83+
84+
if not file_path:
85+
raise FileNotFoundError(
86+
f"File '{file_name}' not found in any search location"
87+
)
88+
89+
try:
90+
return self._open_file(file_path)
91+
except (OSError, IOError) as e:
92+
raise FileAccessError(f"Error opening file '{file_path}': {e}") from e
93+
94+
def _resolve_file_path(self, file_name: str) -> Optional[Path]:
95+
"""Resolve the full path to a file.
96+
97+
Args:
98+
file_name: The name of the file to resolve
99+
100+
Returns:
101+
The full path to the file, or None if not found
102+
"""
103+
# First try the default code package location
104+
if self._code_package_exists():
105+
file_path = self._get_code_package_file_path(file_name)
106+
if file_path.exists():
107+
return file_path
108+
109+
# Fall back to config.json-based location
110+
config_path = self._find_config_file()
111+
if config_path:
112+
file_path = self._get_config_based_file_path(file_name, config_path)
113+
if file_path.exists():
114+
return file_path
115+
116+
return None
117+
118+
def _code_package_exists(self) -> bool:
119+
"""Check if the default code package directory exists.
120+
121+
Returns:
122+
True if the code package directory exists
123+
"""
124+
return os.path.exists(self.code_package)
125+
126+
def _get_code_package_file_path(self, file_name: str) -> Path:
127+
"""Get the file path relative to the code package.
128+
129+
Args:
130+
file_name: The name of the file
131+
132+
Returns:
133+
The full path to the file
134+
"""
135+
relative_path = f"{self.code_package}/{self.file_folder}/{file_name}"
136+
return Path.cwd().joinpath(relative_path)
137+
138+
def _find_config_file(self) -> Optional[Path]:
139+
"""Find the configuration file in the current directory tree.
140+
141+
Returns:
142+
The path to the config file, or None if not found
143+
"""
144+
return self._find_file_in_tree(self.config_file, Path.cwd())
145+
146+
def _get_config_based_file_path(self, file_name: str, config_path: Path) -> Path:
147+
"""Get the file path relative to the config file location.
148+
149+
Args:
150+
file_name: The name of the file
151+
config_path: The path to the config file
152+
153+
Returns:
154+
The full path to the file
155+
"""
156+
relative_path = f"{self.file_folder}/{file_name}"
157+
return config_path.parent.joinpath(relative_path)
158+
159+
def _find_file_in_tree(self, filename: str, search_path: Path) -> Optional[Path]:
160+
"""Find a file within a directory tree.
161+
162+
Args:
163+
filename: The name of the file to find
164+
search_path: The root directory to search from
165+
166+
Returns:
167+
The full path to the file, or None if not found
168+
"""
169+
for file_path in search_path.rglob(filename):
170+
return file_path
171+
return None
172+
173+
def _open_file(self, file_path: Path) -> io.TextIOWrapper:
174+
"""Open a file at the given path.
175+
176+
Args:
177+
file_path: The path to the file
178+
179+
Returns:
180+
A file handle for reading
181+
"""
182+
return open(file_path, "r", encoding="utf-8")
183+
184+
def get_search_locations(self) -> list[Path]:
185+
"""Get all possible search locations for files.
186+
187+
Returns:
188+
A list of paths where files might be found
189+
"""
190+
locations = []
191+
192+
# Add code package location
193+
if self._code_package_exists():
194+
locations.append(Path.cwd().joinpath(self.code_package, self.file_folder))
195+
196+
# Add config-based location
197+
config_path = self._find_config_file()
198+
if config_path:
199+
locations.append(config_path.parent.joinpath(self.file_folder))
200+
201+
return locations
202+
203+
204+
class BaseDataAccessLayer:
205+
"""Default implementation of the file reader.
206+
207+
This class provides the standard file reading behavior and can be
208+
easily mocked or subclassed for testing.
209+
"""

0 commit comments

Comments
 (0)