Skip to content

Commit 93bdd81

Browse files
Merge pull request #85 from forcedotcom/SDK_refactoring
SDK refactoring
2 parents 243fbb5 + 128d252 commit 93bdd81

23 files changed

Lines changed: 1976 additions & 1078 deletions

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ repos:
5353
exclude: \.py$
5454

5555
- repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
56-
rev: v2.13.0
56+
rev: v2.14.0
5757
hooks:
5858
- id: pretty-format-toml
5959
args: [--autofix]
@@ -69,7 +69,7 @@ repos:
6969
hooks:
7070
- id: mypy
7171
name: mypy
72-
entry: mypy src/datacustomcode
72+
entry: mypy --explicit-package-bases src/datacustomcode
7373
language: system
7474
pass_filenames: false
7575
types: [python]

CHANGELOG.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,28 @@
11
# Changelog
22

3+
## 3.0.0
4+
5+
### Breaking Changes
6+
7+
- **Added `runtime: datacustomcode.runtime.function.Runtime` to function contract for codeType `function`.
8+
9+
Function now mandates runtime as arguments.
10+
11+
**Why:** `runTime` allows access to resources ( llm_gateway / file ) available during function execution.
12+
13+
**Migration:** use function(request: dict, runTime: Runtime) instead od function(request: dict)
14+
15+
```python
16+
# Before
17+
def function(request: dict):
18+
pass
19+
20+
# After
21+
def function(request: dict, runTime: Runtime):
22+
pass
23+
```
24+
25+
326
## 2.0.0
427

528
### Breaking Changes

poetry.lock

Lines changed: 1071 additions & 1050 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,9 @@ use_parentheses = true
7272

7373
[tool.mypy]
7474
check_untyped_defs = false
75+
explicit_package_bases = true
7576
ignore_missing_imports = true
77+
mypy_path = "src"
7678
no_implicit_optional = true
7779
plugins = [
7880
'pydantic.mypy'
@@ -99,7 +101,7 @@ click = "^8.1.8"
99101
loguru = "^0.7.3"
100102
numpy = "*"
101103
pandas = "*"
102-
pydantic = "^1.8.2 || ^2.0.0"
104+
pydantic = "2.13.1"
103105
pyspark = "3.5.1"
104106
python = ">=3.10,<3.12"
105107
pyyaml = "^6.0"

src/__init__.py

Whitespace-only changes.

src/datacustomcode/client.py

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,6 @@ def __new__(
119119
spark_provider: Optional["BaseSparkSessionProvider"] = None,
120120
code_type: str = "script",
121121
) -> Client:
122-
if "function" in code_type:
123-
return cls._new_function_client()
124122

125123
if cls._instance is None:
126124
cls._instance = super().__new__(cls)
@@ -175,16 +173,6 @@ def __new__(
175173
raise ValueError("Cannot set reader or writer after client is initialized")
176174
return cls._instance
177175

178-
@classmethod
179-
def _new_function_client(cls) -> Client:
180-
cls._instance = super().__new__(cls)
181-
cls._instance._proxy = (
182-
config.proxy_config.to_object() # type: ignore
183-
if config.proxy_config is not None
184-
else None
185-
)
186-
return cls._instance
187-
188176
def read_dlo(self, name: str) -> PySparkDataFrame:
189177
"""Read a DLO from Data Cloud.
190178
@@ -195,7 +183,7 @@ def read_dlo(self, name: str) -> PySparkDataFrame:
195183
A PySpark DataFrame containing the DLO data.
196184
"""
197185
self._record_dlo_access(name)
198-
return self._reader.read_dlo(name)
186+
return self._reader.read_dlo(name) # type: ignore[no-any-return]
199187

200188
def read_dmo(self, name: str) -> PySparkDataFrame:
201189
"""Read a DMO from Data Cloud.
@@ -207,7 +195,7 @@ def read_dmo(self, name: str) -> PySparkDataFrame:
207195
A PySpark DataFrame containing the DMO data.
208196
"""
209197
self._record_dmo_access(name)
210-
return self._reader.read_dmo(name)
198+
return self._reader.read_dmo(name) # type: ignore[no-any-return]
211199

212200
def write_to_dlo(
213201
self, name: str, dataframe: PySparkDataFrame, write_mode: WriteMode, **kwargs
@@ -220,7 +208,7 @@ def write_to_dlo(
220208
write_mode: The write mode to use for writing to the DLO.
221209
"""
222210
self._validate_data_layer_history_does_not_contain(DataCloudObjectType.DMO)
223-
return self._writer.write_to_dlo(name, dataframe, write_mode, **kwargs)
211+
return self._writer.write_to_dlo(name, dataframe, write_mode, **kwargs) # type: ignore[no-any-return]
224212

225213
def write_to_dmo(
226214
self, name: str, dataframe: PySparkDataFrame, write_mode: WriteMode, **kwargs
@@ -233,17 +221,17 @@ def write_to_dmo(
233221
write_mode: The write mode to use for writing to the DMO.
234222
"""
235223
self._validate_data_layer_history_does_not_contain(DataCloudObjectType.DLO)
236-
return self._writer.write_to_dmo(name, dataframe, write_mode, **kwargs)
224+
return self._writer.write_to_dmo(name, dataframe, write_mode, **kwargs) # type: ignore[no-any-return]
237225

238226
def call_llm_gateway(self, LLM_MODEL_ID: str, prompt: str, maxTokens: int) -> str:
239227
if self._proxy is None:
240228
raise ValueError("No proxy configured; set proxy or proxy_config")
241-
return self._proxy.call_llm_gateway(LLM_MODEL_ID, prompt, maxTokens)
229+
return self._proxy.call_llm_gateway(LLM_MODEL_ID, prompt, maxTokens) # type: ignore[no-any-return]
242230

243231
def find_file_path(self, file_name: str) -> Path:
244232
"""Return a file path"""
245233

246-
return self._file.find_file_path(file_name)
234+
return self._file.find_file_path(file_name) # type: ignore[no-any-return]
247235

248236
def _validate_data_layer_history_does_not_contain(
249237
self, data_cloud_object_type: DataCloudObjectType

src/datacustomcode/config.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@
3636
# This lets all readers and writers to be findable via config
3737
from datacustomcode.io import * # noqa: F403
3838
from datacustomcode.io.base import BaseDataAccessLayer
39-
from datacustomcode.io.reader.base import BaseDataCloudReader # noqa: TCH001
40-
from datacustomcode.io.writer.base import BaseDataCloudWriter # noqa: TCH001
39+
from datacustomcode.io.reader.base import BaseDataCloudReader # noqa: TCH002
40+
from datacustomcode.io.writer.base import BaseDataCloudWriter # noqa: TCH002
4141
from datacustomcode.proxy.base import BaseProxyAccessLayer
42-
from datacustomcode.proxy.client.base import BaseProxyClient # noqa: TCH001
42+
from datacustomcode.proxy.client.base import BaseProxyClient # noqa: TCH002
4343
from datacustomcode.spark.base import BaseSparkSessionProvider
4444

4545
DEFAULT_CONFIG_NAME = "config.yaml"
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Copyright (c) 2025, Salesforce, Inc.
2+
# SPDX-License-Identifier: Apache-2
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
"""Function runtime for Data Cloud Custom Code."""
17+
18+
from datacustomcode.function.runtime import Runtime
19+
20+
__all__ = ["Runtime"]
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Copyright (c) 2025, Salesforce, Inc.
2+
# SPDX-License-Identifier: Apache-2
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
17+
class BaseRuntime:
18+
"""Base class for datacustomcode run time"""
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# Copyright (c) 2025, Salesforce, Inc.
2+
# SPDX-License-Identifier: Apache-2
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
"""
17+
Pydantic models for byoc-function-proto (uds_chunking.proto)
18+
Auto-generated - validation rules from buf.validate
19+
"""
20+
21+
from typing import (
22+
Any,
23+
Dict,
24+
List,
25+
Literal,
26+
)
27+
28+
from pydantic import BaseModel, Field
29+
30+
31+
class DocElement(BaseModel):
32+
"""Document element to be chunked"""
33+
34+
text: str = Field(..., description="Text content to be chunked")
35+
metadata: Dict[str, Any] = Field(
36+
default_factory=dict, description="Source document metadata"
37+
)
38+
39+
40+
class ChunkOutput(BaseModel):
41+
"""Output chunk from the chunking process"""
42+
43+
chunk_id: str = Field(..., description="UUID for this chunk")
44+
chunk_type: str = Field(..., description="Type: 'text'")
45+
text: str = Field(..., description="Chunk text content")
46+
seq_no: int = Field(..., description="Sequential chunk number (1-based)")
47+
metadata: Dict[str, str] = Field(
48+
default_factory=dict, description="Metadata from source (DMO fields)"
49+
)
50+
tag_metadata: Dict[str, Any] = Field(
51+
default_factory=dict, description="Additional tags"
52+
)
53+
citations: Dict[str, Any] = Field(
54+
default_factory=dict, description="Citation information"
55+
)
56+
57+
58+
class StatusResponse(BaseModel):
59+
"""Status response for operation"""
60+
61+
status_type: str = Field(..., description="'success' or 'error'")
62+
status_message: str = Field(..., description="Human-readable status")
63+
64+
65+
class UdsChunkingV1BatchRequest(BaseModel):
66+
"""Batch request for UDS chunking"""
67+
68+
version: Literal["v1"] = Field(
69+
default="v1", description="API version, must be 'v1'"
70+
)
71+
input: List[DocElement] = Field(
72+
..., min_length=1, description="List of documents (min 1)"
73+
)
74+
max_characters: int = Field(..., description="Max chars per chunk (default: 100)")
75+
additional_params: Dict[str, Any] = Field(
76+
default_factory=dict, description="Future extension point"
77+
)
78+
79+
80+
class UdsChunkingV1BatchResponse(BaseModel):
81+
"""Batch response for UDS chunking"""
82+
83+
version: Literal["v1"] = Field(
84+
default="v1", description="API version, must be 'v1'"
85+
)
86+
output: List[ChunkOutput] = Field(
87+
default_factory=list, description="Flat list of chunks from all docs"
88+
)
89+
status: StatusResponse = Field(..., description="Overall operation status")

0 commit comments

Comments
 (0)