Merge branch 'main' into Add-llm_gateway-wrapper-function

jcatt-sf · jcatt-sf · commit 04e9861b43d2 · 2026-04-03T14:31:00.000-04:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,27 @@
+# Changelog
+
+## 1.0.0
+
+### Breaking Changes
+
+- **`read_dlo()` and `read_dmo()` now return DataFrames with all-lowercase column names.**
+
+  Column names returned by both `QueryAPIDataCloudReader` and `SFCLIDataCloudReader` are now lowercased to match the column names produced by the deployed Data Cloud environment (e.g., `unitprice__c` instead of `UnitPrice__c`).
+
+  **Why:** In the deployed environment, column names are normalized to lowercase by the underlying Iceberg metadata layer. The local SDK previously returned the original API casing, causing "column does not exist" errors when scripts were deployed. This change aligns local behavior with the cloud.
+
+  **Migration:** Update any column references in your local scripts to use lowercase:
+
+  ```python
+  # Before
+  df.withColumn("Description__c", upper(col("Description__c")))
+  df.drop("KQ_Id__c")
+  df["UnitPrice__c"]
+
+  # After
+  df.withColumn("description__c", upper(col("description__c")))
+  df.drop("kq_id__c")
+  df["unitprice__c"]
+  ```
+
+  Scripts already running in Data Cloud are unaffected — the cloud always returned lowercase column names.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -21,9 +21,84 @@ Use GitHub Issues page to submit issues, enhancement requests and discuss ideas.
 -  Issues that have been identified as a feature request will be labelled `enhancement`.
 
 
-# Issues
+### Issues
 We use GitHub issues to track public bugs. Please ensure your description is
 clear and has sufficient instructions to be able to reproduce the issue.
 
 # Code of Conduct
 Please follow our [Code of Conduct](CODE_OF_CONDUCT.md).
+
+# Development
+
+## Quick Start
+
+### Prerequisites
+
+See the [Prerequisites section in README.md](./README.md#prerequisites) for complete setup requirements.
+
+### Initial Setup
+
+1. **Clone the repository**
+   ```bash
+   git clone <repository-url>
+   cd datacloud-customcode-python-sdk
+   ```
+
+2. **Set up virtual environment and install dependencies**
+
+   **Note**: If you need to set a specific Python version, use `pyenv local 3.11.x` in the project directory.
+
+   ```bash
+   python3.11 -m venv .venv
+   source .venv/bin/activate
+   pip install poetry
+   make develop
+   ```
+
+3. **Verify installation**
+   ```bash
+   datacustomcode version
+   ```
+
+4. **Initialize a project for development work verification**
+
+   **Note**: To test your changes and develop new features, initialize a sample project:
+
+   ```bash
+   # Create a new directory for your test project
+   mkdir my-test-project
+   cd my-test-project
+
+   # Initialize a new Data Cloud custom code project
+   datacustomcode init .
+
+   # Test your SDK modifications against the sample project with:
+   datacustomcode run ./payload/entrypoint.py
+   ```
+
+   **Tip**: See the [README.md](./README.md) for additional `datacustomcode` commands (`scan`, `deploy`, `zip`) to test specific code paths and validate your SDK changes thoroughly.
+
+## Makefile Commands
+
+```bash
+# Clean build artifacts, caches and temporary files
+make clean
+
+# Build package distribution
+make package
+
+# Install main dependencies only
+make install
+
+# Install dependencies for full development setup
+make develop
+
+# Run code quality checks
+make lint
+
+# Perform static type checking
+make mypy
+
+# Run complete test suite
+make test
+```
diff --git a/FOR_CONTRIBUTORS.md b/FOR_CONTRIBUTORS.md
diff --git a/src/datacustomcode/io/reader/utils.py b/src/datacustomcode/io/reader/utils.py
@@ -49,5 +49,5 @@ def _pandas_to_spark_schema(
             spark_type = TimestampType()
         else:
             spark_type = PANDAS_TYPE_MAPPING.get(str(dtype), StringType())
-        fields.append(StructField(column, spark_type, nullable))
+        fields.append(StructField(column.lower(), spark_type, nullable))
     return StructType(fields)
diff --git a/src/datacustomcode/templates/script/payload/entrypoint.py b/src/datacustomcode/templates/script/payload/entrypoint.py
@@ -10,11 +10,11 @@ def main():
     df = client.read_dlo("Account_std__dll")
 
     # Perform transformations on the DataFrame
-    df_upper1 = df.withColumn("Description__c", upper(col("Description__c")))
+    df_upper1 = df.withColumn("description__c", upper(col("description__c")))
 
     # Drop specific columns related to relationships
-    df_upper1 = df_upper1.drop("SfdcOrganizationId__c")
-    df_upper1 = df_upper1.drop("KQ_Id__c")
+    df_upper1 = df_upper1.drop("sfdcorganizationid__c")
+    df_upper1 = df_upper1.drop("kq_id__c")
 
     # Save the transformed DataFrame
     dlo_name = "Account_std_copy__dll"
diff --git a/tests/io/reader/test_query_api.py b/tests/io/reader/test_query_api.py
@@ -60,6 +60,22 @@ def test_pandas_to_spark_schema_nullable(self):
         schema = _pandas_to_spark_schema(df, nullable=False)
         assert not schema.fields[0].nullable
 
+    def test_pandas_to_spark_schema_lowercases_columns(self):
+        """Column names from the API are lowercased to match Data Cloud."""
+        df = pd.DataFrame({"UnitPrice__c": [1.0], "Quantity__c": [2], "Name__c": ["a"]})
+        schema = _pandas_to_spark_schema(df)
+        assert [f.name for f in schema.fields] == [
+            "unitprice__c",
+            "quantity__c",
+            "name__c",
+        ]
+
+    def test_pandas_to_spark_schema_already_lowercase_is_idempotent(self):
+        """Already-lowercase column names are returned unchanged."""
+        df = pd.DataFrame({"unitprice__c": [1.0], "quantity__c": [2]})
+        schema = _pandas_to_spark_schema(df)
+        assert [f.name for f in schema.fields] == ["unitprice__c", "quantity__c"]
+
     def test_pandas_to_spark_schema_datetime_types(self):
         """Test conversion of pandas datetime types to Spark TimestampType."""
 
@@ -147,8 +163,8 @@ def mock_spark_session(self):
 
     @pytest.fixture
     def mock_pandas_dataframe(self):
-        """Create a sample pandas DataFrame for testing."""
-        return pd.DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
+        """Sample pandas DataFrame with PascalCase columns, as the QueryAPI returns."""
+        return pd.DataFrame({"Col1__c": [1, 2], "Col2__c": ["a", "b"]})
 
     @pytest.fixture
     def mock_connection(self, mock_pandas_dataframe):
@@ -301,3 +317,27 @@ def test_read_dmo_with_custom_row_limit(
         mock_connection.get_pandas_dataframe.assert_called_once_with(
             SQL_QUERY_TEMPLATE.format("test_dmo", 25)
         )
+
+    def test_read_dlo_schema_is_lowercase(
+        self, reader_without_init, mock_connection, mock_pandas_dataframe
+    ):
+        """read_dlo returns a schema with all-lowercase field names even when the
+        QueryAPI returns PascalCase column names."""
+        reader_without_init._conn = mock_connection
+
+        reader_without_init.read_dlo("test_dlo")
+
+        _, schema_arg = reader_without_init.spark.createDataFrame.call_args[0]
+        assert all(f.name == f.name.lower() for f in schema_arg.fields)
+
+    def test_read_dmo_schema_is_lowercase(
+        self, reader_without_init, mock_connection, mock_pandas_dataframe
+    ):
+        """read_dmo returns a schema with all-lowercase field names even when the
+        QueryAPI returns PascalCase column names."""
+        reader_without_init._conn = mock_connection
+
+        reader_without_init.read_dmo("test_dmo")
+
+        _, schema_arg = reader_without_init.spark.createDataFrame.call_args[0]
+        assert all(f.name == f.name.lower() for f in schema_arg.fields)
diff --git a/tests/io/reader/test_sf_cli.py b/tests/io/reader/test_sf_cli.py
@@ -311,7 +311,8 @@ def reader(self):
 
     @pytest.fixture
     def sample_df(self):
-        return pd.DataFrame({"id": [1, 2], "name": ["a", "b"]})
+        """DataFrame with PascalCase columns, as the REST API metadata returns."""
+        return pd.DataFrame({"Id__c": [1, 2], "Name__c": ["a", "b"]})
 
     @pytest.mark.parametrize(
         "method,obj_name",
@@ -348,6 +349,17 @@ def test_auto_infers_schema_when_none_given(self, reader, sample_df, method):
         _, schema_arg = reader.spark.createDataFrame.call_args[0]
         assert isinstance(schema_arg, StructType)
 
+    @pytest.mark.parametrize("method", ["read_dlo", "read_dmo"])
+    def test_auto_infers_schema_lowercases_pascal_case_columns(
+        self, reader, sample_df, method
+    ):
+        """Schema is lowercased so local results match Data Cloud column names."""
+        with patch.object(reader, "_execute_query", return_value=sample_df):
+            getattr(reader, method)("SomeObj")
+
+        _, schema_arg = reader.spark.createDataFrame.call_args[0]
+        assert all(f.name == f.name.lower() for f in schema_arg.fields)
+
     @pytest.mark.parametrize("method", ["read_dlo", "read_dmo"])
     def test_uses_provided_schema(self, reader, sample_df, method):
         from pyspark.sql.types import (