|
22 | 22 | Union, |
23 | 23 | ) |
24 | 24 |
|
25 | | -import pandas.api.types as pd_types |
26 | | -from pyspark.sql.types import ( |
27 | | - BooleanType, |
28 | | - DoubleType, |
29 | | - LongType, |
30 | | - StringType, |
31 | | - StructField, |
32 | | - StructType, |
33 | | - TimestampType, |
34 | | -) |
35 | 25 | from salesforcecdpconnector.connection import SalesforceCDPConnection |
36 | 26 |
|
37 | 27 | from datacustomcode.credentials import AuthType, Credentials |
38 | 28 | from datacustomcode.io.reader.base import BaseDataCloudReader |
39 | 29 | from datacustomcode.io.reader.sf_cli import SFCLIDataCloudReader |
| 30 | +from datacustomcode.io.reader.utils import _pandas_to_spark_schema |
40 | 31 |
|
41 | 32 | if TYPE_CHECKING: |
42 | | - import pandas |
43 | 33 | from pyspark.sql import DataFrame as PySparkDataFrame, SparkSession |
44 | | - from pyspark.sql.types import AtomicType |
| 34 | + from pyspark.sql.types import AtomicType, StructType |
45 | 35 |
|
46 | 36 | logger = logging.getLogger(__name__) |
47 | 37 |
|
48 | 38 |
|
49 | 39 | SQL_QUERY_TEMPLATE: Final = "SELECT * FROM {} LIMIT {}" |
50 | | -PANDAS_TYPE_MAPPING = { |
51 | | - "object": StringType(), |
52 | | - "int64": LongType(), |
53 | | - "float64": DoubleType(), |
54 | | - "bool": BooleanType(), |
55 | | -} |
56 | | - |
57 | | - |
58 | | -def _pandas_to_spark_schema( |
59 | | - pandas_df: pandas.DataFrame, nullable: bool = True |
60 | | -) -> StructType: |
61 | | - fields = [] |
62 | | - for column, dtype in pandas_df.dtypes.items(): |
63 | | - spark_type: AtomicType |
64 | | - if pd_types.is_datetime64_any_dtype(dtype): |
65 | | - spark_type = TimestampType() |
66 | | - else: |
67 | | - spark_type = PANDAS_TYPE_MAPPING.get(str(dtype), StringType()) |
68 | | - fields.append(StructField(column, spark_type, nullable)) |
69 | | - return StructType(fields) |
70 | 40 |
|
71 | 41 |
|
72 | 42 | def create_cdp_connection( |
|
0 commit comments