Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit c1d7e1e

Browse files
author
Ehsan Totoni
committed
refactor Series type inference to reuse df
1 parent f69dbe2 commit c1d7e1e

2 files changed

Lines changed: 8 additions & 12 deletions

File tree

hpat/hiframes/boxing.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -46,17 +46,7 @@ def typeof_pd_dataframe(val, c):
4646
# register series types for import
4747
@typeof_impl.register(pd.Series)
4848
def typeof_pd_str_series(val, c):
49-
# TODO: handle NA as 1st value
50-
if len(val) > 0 and isinstance(val.values[0], str): # and isinstance(val[-1], str):
51-
arr_typ = string_array_type
52-
elif len(val) > 0 and isinstance(val.values[0], datetime.date):
53-
# XXX: using .values to check date type since DatetimeIndex returns
54-
# Timestamp which is subtype of datetime.date
55-
return SeriesType(datetime_date_type)
56-
else:
57-
arr_typ = numba.typing.typeof._typeof_ndarray(val.values, c)
58-
59-
return arr_to_series_type(arr_typ)
49+
return SeriesType(_infer_series_dtype(val))
6050

6151

6252
@typeof_impl.register(pd.Index)
@@ -113,11 +103,16 @@ def get_hiframes_dtypes(df):
113103
def _infer_series_dtype(S):
114104
if S.dtype == np.dtype('O'):
115105
# XXX assuming the whole column is strings if 1st val is string
106+
# TODO: handle NA as 1st value
116107
first_val = S.iloc[0]
117108
if isinstance(first_val, list):
118109
return _infer_series_list_dtype(S)
119110
elif isinstance(first_val, str):
120111
return string_type
112+
elif isinstance(S.values[0], datetime.date):
113+
# XXX: using .values to check date type since DatetimeIndex returns
114+
# Timestamp which is subtype of datetime.date
115+
return datetime_date_type
121116
else:
122117
raise ValueError(
123118
"data type for column {} not supported".format(S.name))

hpat/hiframes/pd_series_ext.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,11 @@ def iterator_type(self):
9999

100100

101101
def _get_series_array_type(dtype):
102-
"""get underlying array type of series based on its dtype
102+
"""get underlying default array type of series based on its dtype
103103
"""
104104
# list(list(str))
105105
if dtype == types.List(string_type):
106+
# default data layout is list but split view is used if possible
106107
return list_string_array_type
107108
# string array
108109
elif dtype == string_type:

0 commit comments

Comments
 (0)