Lowercase columns for local , to match Data Cloud

markdlv-sf · markdlv-sf · commit 016b4f826af8 · 2026-03-31T19:09:41.000-04:00
diff --git a/src/datacustomcode/io/reader/utils.py b/src/datacustomcode/io/reader/utils.py
@@ -49,5 +49,5 @@ def _pandas_to_spark_schema(
             spark_type = TimestampType()
         else:
             spark_type = PANDAS_TYPE_MAPPING.get(str(dtype), StringType())
-        fields.append(StructField(column, spark_type, nullable))
+        fields.append(StructField(column.lower(), spark_type, nullable))
     return StructType(fields)
diff --git a/src/datacustomcode/templates/script/payload/entrypoint.py b/src/datacustomcode/templates/script/payload/entrypoint.py
@@ -10,11 +10,11 @@ def main():
     df = client.read_dlo("Account_std__dll")
 
     # Perform transformations on the DataFrame
-    df_upper1 = df.withColumn("Description__c", upper(col("Description__c")))
+    df_upper1 = df.withColumn("description__c", upper(col("description__c")))
 
     # Drop specific columns related to relationships
-    df_upper1 = df_upper1.drop("SfdcOrganizationId__c")
-    df_upper1 = df_upper1.drop("KQ_Id__c")
+    df_upper1 = df_upper1.drop("sfdcorganizationid__c")
+    df_upper1 = df_upper1.drop("kq_id__c")
 
     # Save the transformed DataFrame
     dlo_name = "Account_std_copy__dll"
diff --git a/tests/io/reader/test_query_api.py b/tests/io/reader/test_query_api.py
@@ -60,6 +60,22 @@ def test_pandas_to_spark_schema_nullable(self):
         schema = _pandas_to_spark_schema(df, nullable=False)
         assert not schema.fields[0].nullable
 
+    def test_pandas_to_spark_schema_lowercases_columns(self):
+        """Column names from the API are lowercased to match Data Cloud."""
+        df = pd.DataFrame({"UnitPrice__c": [1.0], "Quantity__c": [2], "Name__c": ["a"]})
+        schema = _pandas_to_spark_schema(df)
+        assert [f.name for f in schema.fields] == [
+            "unitprice__c",
+            "quantity__c",
+            "name__c",
+        ]
+
+    def test_pandas_to_spark_schema_already_lowercase_is_idempotent(self):
+        """Already-lowercase column names are returned unchanged."""
+        df = pd.DataFrame({"unitprice__c": [1.0], "quantity__c": [2]})
+        schema = _pandas_to_spark_schema(df)
+        assert [f.name for f in schema.fields] == ["unitprice__c", "quantity__c"]
+
     def test_pandas_to_spark_schema_datetime_types(self):
         """Test conversion of pandas datetime types to Spark TimestampType."""
 
@@ -147,8 +163,8 @@ def mock_spark_session(self):
 
     @pytest.fixture
     def mock_pandas_dataframe(self):
-        """Create a sample pandas DataFrame for testing."""
-        return pd.DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
+        """Sample pandas DataFrame with PascalCase columns, as the QueryAPI returns."""
+        return pd.DataFrame({"Col1__c": [1, 2], "Col2__c": ["a", "b"]})
 
     @pytest.fixture
     def mock_connection(self, mock_pandas_dataframe):
@@ -301,3 +317,27 @@ def test_read_dmo_with_custom_row_limit(
         mock_connection.get_pandas_dataframe.assert_called_once_with(
             SQL_QUERY_TEMPLATE.format("test_dmo", 25)
         )
+
+    def test_read_dlo_schema_is_lowercase(
+        self, reader_without_init, mock_connection, mock_pandas_dataframe
+    ):
+        """read_dlo returns a schema with all-lowercase field names even when the
+        QueryAPI returns PascalCase column names."""
+        reader_without_init._conn = mock_connection
+
+        reader_without_init.read_dlo("test_dlo")
+
+        _, schema_arg = reader_without_init.spark.createDataFrame.call_args[0]
+        assert all(f.name == f.name.lower() for f in schema_arg.fields)
+
+    def test_read_dmo_schema_is_lowercase(
+        self, reader_without_init, mock_connection, mock_pandas_dataframe
+    ):
+        """read_dmo returns a schema with all-lowercase field names even when the
+        QueryAPI returns PascalCase column names."""
+        reader_without_init._conn = mock_connection
+
+        reader_without_init.read_dmo("test_dmo")
+
+        _, schema_arg = reader_without_init.spark.createDataFrame.call_args[0]
+        assert all(f.name == f.name.lower() for f in schema_arg.fields)
diff --git a/tests/io/reader/test_sf_cli.py b/tests/io/reader/test_sf_cli.py
@@ -311,7 +311,8 @@ def reader(self):
 
     @pytest.fixture
     def sample_df(self):
-        return pd.DataFrame({"id": [1, 2], "name": ["a", "b"]})
+        """DataFrame with PascalCase columns, as the REST API metadata returns."""
+        return pd.DataFrame({"Id__c": [1, 2], "Name__c": ["a", "b"]})
 
     @pytest.mark.parametrize(
         "method,obj_name",
@@ -348,6 +349,17 @@ def test_auto_infers_schema_when_none_given(self, reader, sample_df, method):
         _, schema_arg = reader.spark.createDataFrame.call_args[0]
         assert isinstance(schema_arg, StructType)
 
+    @pytest.mark.parametrize("method", ["read_dlo", "read_dmo"])
+    def test_auto_infers_schema_lowercases_pascal_case_columns(
+        self, reader, sample_df, method
+    ):
+        """Schema is lowercased so local results match Data Cloud column names."""
+        with patch.object(reader, "_execute_query", return_value=sample_df):
+            getattr(reader, method)("SomeObj")
+
+        _, schema_arg = reader.spark.createDataFrame.call_args[0]
+        assert all(f.name == f.name.lower() for f in schema_arg.fields)
+
     @pytest.mark.parametrize("method", ["read_dlo", "read_dmo"])
     def test_uses_provided_schema(self, reader, sample_df, method):
         from pyspark.sql.types import (