Skip to content

Commit 641b6f8

Browse files
committed
Add missing datetime64[ms* pandas type mappings
1 parent 9e3da9c commit 641b6f8

2 files changed

Lines changed: 36 additions & 0 deletions

File tree

src/datacustomcode/io/reader/query_api.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@
5050
"float64": DoubleType(),
5151
"datetime64[ns]": TimestampType(),
5252
"datetime64[ns, UTC]": TimestampType(),
53+
"datetime64[ms]": TimestampType(),
54+
"datetime64[ms, UTC]": TimestampType(),
5355
"bool": BooleanType(),
5456
}
5557

tests/io/reader/test_query_api.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
StringType,
1515
StructField,
1616
StructType,
17+
TimestampType,
1718
)
1819
import pytest
1920

@@ -59,6 +60,39 @@ def test_pandas_to_spark_schema_nullable(self):
5960
schema = _pandas_to_spark_schema(df, nullable=False)
6061
assert not schema.fields[0].nullable
6162

63+
def test_pandas_to_spark_schema_datetime_types(self):
64+
"""Test conversion of pandas datetime types to Spark TimestampType."""
65+
import numpy as np
66+
67+
# Create test data with different datetime types
68+
data = {
69+
"datetime_ns": pd.to_datetime(["2023-01-01 10:00:00", "2023-01-02 11:00:00"]),
70+
"datetime_ns_utc": pd.to_datetime(["2023-01-01 10:00:00", "2023-01-02 11:00:00"], utc=True),
71+
"datetime_ms": pd.to_datetime(["2023-01-01 10:00:00", "2023-01-02 11:00:00"]).astype("datetime64[ms]"),
72+
"datetime_ms_utc": pd.to_datetime(["2023-01-01 10:00:00", "2023-01-02 11:00:00"], utc=True).tz_localize(None).astype("datetime64[ms]"),
73+
}
74+
df = pd.DataFrame(data)
75+
76+
# Convert to Spark schema
77+
schema = _pandas_to_spark_schema(df)
78+
79+
# Verify the schema
80+
assert isinstance(schema, StructType)
81+
assert len(schema.fields) == 4
82+
83+
# Check that all datetime columns map to TimestampType
84+
field_dict = {field.name: field for field in schema.fields}
85+
for field_name in ["datetime_ns", "datetime_ns_utc", "datetime_ms", "datetime_ms_utc"]:
86+
assert isinstance(field_dict[field_name].dataType, TimestampType), \
87+
f"Field {field_name} should be TimestampType, got {type(field_dict[field_name].dataType)}"
88+
assert field_dict[field_name].nullable
89+
90+
# Verify the actual pandas dtypes to ensure our test data has the expected types
91+
assert str(df["datetime_ns"].dtype) == "datetime64[ns]"
92+
assert str(df["datetime_ns_utc"].dtype) == "datetime64[ns, UTC]"
93+
assert str(df["datetime_ms"].dtype) == "datetime64[ms]"
94+
assert str(df["datetime_ms_utc"].dtype) == "datetime64[ms]"
95+
6296

6397
# Completely isolated test class for QueryAPIDataCloudReader
6498
@pytest.mark.usefixtures("patch_all_requests")

0 commit comments

Comments
 (0)