Skip to content

Commit 04e9861

Browse files
committed
Merge branch 'main' into Add-llm_gateway-wrapper-function
2 parents 76c3f3f + b3e84ee commit 04e9861

7 files changed

Lines changed: 162 additions & 90 deletions

File tree

CHANGELOG.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Changelog
2+
3+
## 1.0.0
4+
5+
### Breaking Changes
6+
7+
- **`read_dlo()` and `read_dmo()` now return DataFrames with all-lowercase column names.**
8+
9+
Column names returned by both `QueryAPIDataCloudReader` and `SFCLIDataCloudReader` are now lowercased to match the column names produced by the deployed Data Cloud environment (e.g., `unitprice__c` instead of `UnitPrice__c`).
10+
11+
**Why:** In the deployed environment, column names are normalized to lowercase by the underlying Iceberg metadata layer. The local SDK previously returned the original API casing, causing "column does not exist" errors when scripts were deployed. This change aligns local behavior with the cloud.
12+
13+
**Migration:** Update any column references in your local scripts to use lowercase:
14+
15+
```python
16+
# Before
17+
df.withColumn("Description__c", upper(col("Description__c")))
18+
df.drop("KQ_Id__c")
19+
df["UnitPrice__c"]
20+
21+
# After
22+
df.withColumn("description__c", upper(col("description__c")))
23+
df.drop("kq_id__c")
24+
df["unitprice__c"]
25+
```
26+
27+
Scripts already running in Data Cloud are unaffected — the cloud always returned lowercase column names.

CONTRIBUTING.md

Lines changed: 76 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,84 @@ Use GitHub Issues page to submit issues, enhancement requests and discuss ideas.
2121
- Issues that have been identified as a feature request will be labelled `enhancement`.
2222

2323

24-
# Issues
24+
### Issues
2525
We use GitHub issues to track public bugs. Please ensure your description is
2626
clear and has sufficient instructions to be able to reproduce the issue.
2727

2828
# Code of Conduct
2929
Please follow our [Code of Conduct](CODE_OF_CONDUCT.md).
30+
31+
# Development
32+
33+
## Quick Start
34+
35+
### Prerequisites
36+
37+
See the [Prerequisites section in README.md](./README.md#prerequisites) for complete setup requirements.
38+
39+
### Initial Setup
40+
41+
1. **Clone the repository**
42+
```bash
43+
git clone <repository-url>
44+
cd datacloud-customcode-python-sdk
45+
```
46+
47+
2. **Set up virtual environment and install dependencies**
48+
49+
**Note**: If you need to set a specific Python version, use `pyenv local 3.11.x` in the project directory.
50+
51+
```bash
52+
python3.11 -m venv .venv
53+
source .venv/bin/activate
54+
pip install poetry
55+
make develop
56+
```
57+
58+
3. **Verify installation**
59+
```bash
60+
datacustomcode version
61+
```
62+
63+
4. **Initialize a project for development work verification**
64+
65+
**Note**: To test your changes and develop new features, initialize a sample project:
66+
67+
```bash
68+
# Create a new directory for your test project
69+
mkdir my-test-project
70+
cd my-test-project
71+
72+
# Initialize a new Data Cloud custom code project
73+
datacustomcode init .
74+
75+
# Test your SDK modifications against the sample project with:
76+
datacustomcode run ./payload/entrypoint.py
77+
```
78+
79+
**Tip**: See the [README.md](./README.md) for additional `datacustomcode` commands (`scan`, `deploy`, `zip`) to test specific code paths and validate your SDK changes thoroughly.
80+
81+
## Makefile Commands
82+
83+
```bash
84+
# Clean build artifacts, caches and temporary files
85+
make clean
86+
87+
# Build package distribution
88+
make package
89+
90+
# Install main dependencies only
91+
make install
92+
93+
# Install dependencies for full development setup
94+
make develop
95+
96+
# Run code quality checks
97+
make lint
98+
99+
# Perform static type checking
100+
make mypy
101+
102+
# Run complete test suite
103+
make test
104+
```

FOR_CONTRIBUTORS.md

Lines changed: 0 additions & 82 deletions
This file was deleted.

src/datacustomcode/io/reader/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,5 +49,5 @@ def _pandas_to_spark_schema(
4949
spark_type = TimestampType()
5050
else:
5151
spark_type = PANDAS_TYPE_MAPPING.get(str(dtype), StringType())
52-
fields.append(StructField(column, spark_type, nullable))
52+
fields.append(StructField(column.lower(), spark_type, nullable))
5353
return StructType(fields)

src/datacustomcode/templates/script/payload/entrypoint.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@ def main():
1010
df = client.read_dlo("Account_std__dll")
1111

1212
# Perform transformations on the DataFrame
13-
df_upper1 = df.withColumn("Description__c", upper(col("Description__c")))
13+
df_upper1 = df.withColumn("description__c", upper(col("description__c")))
1414

1515
# Drop specific columns related to relationships
16-
df_upper1 = df_upper1.drop("SfdcOrganizationId__c")
17-
df_upper1 = df_upper1.drop("KQ_Id__c")
16+
df_upper1 = df_upper1.drop("sfdcorganizationid__c")
17+
df_upper1 = df_upper1.drop("kq_id__c")
1818

1919
# Save the transformed DataFrame
2020
dlo_name = "Account_std_copy__dll"

tests/io/reader/test_query_api.py

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,22 @@ def test_pandas_to_spark_schema_nullable(self):
6060
schema = _pandas_to_spark_schema(df, nullable=False)
6161
assert not schema.fields[0].nullable
6262

63+
def test_pandas_to_spark_schema_lowercases_columns(self):
64+
"""Column names from the API are lowercased to match Data Cloud."""
65+
df = pd.DataFrame({"UnitPrice__c": [1.0], "Quantity__c": [2], "Name__c": ["a"]})
66+
schema = _pandas_to_spark_schema(df)
67+
assert [f.name for f in schema.fields] == [
68+
"unitprice__c",
69+
"quantity__c",
70+
"name__c",
71+
]
72+
73+
def test_pandas_to_spark_schema_already_lowercase_is_idempotent(self):
74+
"""Already-lowercase column names are returned unchanged."""
75+
df = pd.DataFrame({"unitprice__c": [1.0], "quantity__c": [2]})
76+
schema = _pandas_to_spark_schema(df)
77+
assert [f.name for f in schema.fields] == ["unitprice__c", "quantity__c"]
78+
6379
def test_pandas_to_spark_schema_datetime_types(self):
6480
"""Test conversion of pandas datetime types to Spark TimestampType."""
6581

@@ -147,8 +163,8 @@ def mock_spark_session(self):
147163

148164
@pytest.fixture
149165
def mock_pandas_dataframe(self):
150-
"""Create a sample pandas DataFrame for testing."""
151-
return pd.DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
166+
"""Sample pandas DataFrame with PascalCase columns, as the QueryAPI returns."""
167+
return pd.DataFrame({"Col1__c": [1, 2], "Col2__c": ["a", "b"]})
152168

153169
@pytest.fixture
154170
def mock_connection(self, mock_pandas_dataframe):
@@ -301,3 +317,27 @@ def test_read_dmo_with_custom_row_limit(
301317
mock_connection.get_pandas_dataframe.assert_called_once_with(
302318
SQL_QUERY_TEMPLATE.format("test_dmo", 25)
303319
)
320+
321+
def test_read_dlo_schema_is_lowercase(
322+
self, reader_without_init, mock_connection, mock_pandas_dataframe
323+
):
324+
"""read_dlo returns a schema with all-lowercase field names even when the
325+
QueryAPI returns PascalCase column names."""
326+
reader_without_init._conn = mock_connection
327+
328+
reader_without_init.read_dlo("test_dlo")
329+
330+
_, schema_arg = reader_without_init.spark.createDataFrame.call_args[0]
331+
assert all(f.name == f.name.lower() for f in schema_arg.fields)
332+
333+
def test_read_dmo_schema_is_lowercase(
334+
self, reader_without_init, mock_connection, mock_pandas_dataframe
335+
):
336+
"""read_dmo returns a schema with all-lowercase field names even when the
337+
QueryAPI returns PascalCase column names."""
338+
reader_without_init._conn = mock_connection
339+
340+
reader_without_init.read_dmo("test_dmo")
341+
342+
_, schema_arg = reader_without_init.spark.createDataFrame.call_args[0]
343+
assert all(f.name == f.name.lower() for f in schema_arg.fields)

tests/io/reader/test_sf_cli.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,8 @@ def reader(self):
311311

312312
@pytest.fixture
313313
def sample_df(self):
314-
return pd.DataFrame({"id": [1, 2], "name": ["a", "b"]})
314+
"""DataFrame with PascalCase columns, as the REST API metadata returns."""
315+
return pd.DataFrame({"Id__c": [1, 2], "Name__c": ["a", "b"]})
315316

316317
@pytest.mark.parametrize(
317318
"method,obj_name",
@@ -348,6 +349,17 @@ def test_auto_infers_schema_when_none_given(self, reader, sample_df, method):
348349
_, schema_arg = reader.spark.createDataFrame.call_args[0]
349350
assert isinstance(schema_arg, StructType)
350351

352+
@pytest.mark.parametrize("method", ["read_dlo", "read_dmo"])
353+
def test_auto_infers_schema_lowercases_pascal_case_columns(
354+
self, reader, sample_df, method
355+
):
356+
"""Schema is lowercased so local results match Data Cloud column names."""
357+
with patch.object(reader, "_execute_query", return_value=sample_df):
358+
getattr(reader, method)("SomeObj")
359+
360+
_, schema_arg = reader.spark.createDataFrame.call_args[0]
361+
assert all(f.name == f.name.lower() for f in schema_arg.fields)
362+
351363
@pytest.mark.parametrize("method", ["read_dlo", "read_dmo"])
352364
def test_uses_provided_schema(self, reader, sample_df, method):
353365
from pyspark.sql.types import (

0 commit comments

Comments
 (0)