Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit e7bb1ea

Browse files
author
Ehsan Totoni
committed
split view fixes
1 parent 179b0f0 commit e7bb1ea

4 files changed

Lines changed: 20 additions & 2 deletions

File tree

hpat/hiframes/boxing.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,10 @@ def codegen(context, builder, sig, args):
234234

235235
if data_typ == string_array_type:
236236
native_val = unbox_str_series(string_array_type, arr_obj, c)
237+
elif data_typ == string_array_split_view_type:
238+
# XXX dummy unboxing to avoid errors in _get_dataframe_data()
239+
out_view = context.make_helper(builder, string_array_split_view_type)
240+
native_val = NativeValue(out_view._getvalue())
237241
elif data_typ == list_string_array_type:
238242
native_val = _unbox_array_list_str(arr_obj, c)
239243
else:

hpat/hiframes/filter.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from hpat.distributed_analysis import Distribution
1010
from hpat.utils import debug_prints
1111
from hpat.str_arr_ext import string_array_type
12+
from hpat.hiframes.split_impl import string_array_split_view_type
1213

1314

1415
class Filter(ir.Stmt):
@@ -49,7 +50,8 @@ def filter_array_analysis(filter_node, equiv_set, typemap, array_analysis):
4950
for _, col_var in filter_node.df_in_vars.items():
5051
typ = typemap[col_var.name]
5152
# TODO handle list_string_array_type in other nodes
52-
if typ in (string_array_type, list_string_array_type):
53+
if typ in (string_array_type, list_string_array_type,
54+
string_array_split_view_type):
5355
continue
5456
col_shape = equiv_set.get_shape(col_var)
5557
all_shapes.append(col_shape[0])
@@ -63,7 +65,8 @@ def filter_array_analysis(filter_node, equiv_set, typemap, array_analysis):
6365
all_shapes = []
6466
for _, col_var in filter_node.df_out_vars.items():
6567
typ = typemap[col_var.name]
66-
if typ in (string_array_type, list_string_array_type):
68+
if typ in (string_array_type, list_string_array_type,
69+
string_array_split_view_type):
6770
continue
6871
(shape, c_post) = array_analysis._gen_shape_call(
6972
equiv_set, col_var, typ.ndim, None)

hpat/hiframes/hiframes_typed.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2003,6 +2003,8 @@ def _handle_df_dropna(self, assign, lhs, rhs):
20032003
if is_str_arr_typ(t)]
20042004
list_str_colnames = [in_names[i] for i, t in enumerate(in_typ.types)
20052005
if t == list_string_array_type]
2006+
split_view_colnames = [in_names[i] for i, t in enumerate(in_typ.types)
2007+
if t == string_array_split_view_type]
20062008
isna_calls = ['hpat.hiframes.api.isna({}, i)'.format(v) for v in in_names]
20072009

20082010
func_text = "def _dropna_impl(arr_tup, inplace):\n"
@@ -2021,12 +2023,17 @@ def _handle_df_dropna(self, assign, lhs, rhs):
20212023
func_text += " {} = hpat.str_arr_ext.pre_alloc_string_array(new_len, num_chars_{})\n".format(out, v)
20222024
elif v in list_str_colnames:
20232025
func_text += " {} = hpat.str_ext.alloc_list_list_str(new_len)\n".format(out)
2026+
elif v in split_view_colnames:
2027+
# TODO support dropna() for split view
2028+
func_text += " {} = {}\n".format(out, v)
20242029
else:
20252030
func_text += " {} = np.empty(new_len, {}.dtype)\n".format(out, v)
20262031
func_text += " curr_ind = 0\n"
20272032
func_text += " for i in numba.parfor.internal_prange(old_len):\n"
20282033
func_text += " if not ({}):\n".format(' or '.join(isna_calls))
20292034
for v, out in zip(in_names, out_names):
2035+
if v in split_view_colnames:
2036+
continue
20302037
func_text += " {}[curr_ind] = {}[i]\n".format(out, v)
20312038
func_text += " curr_ind += 1\n"
20322039
func_text += " return ({},)\n".format(", ".join(out_names))

hpat/hiframes/pd_series_ext.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,9 @@ def arr_to_series_type(arr):
173173
series_type = SeriesType(string_type)
174174
elif arr == list_string_array_type:
175175
series_type = SeriesType(types.List(string_type))
176+
elif arr == string_array_split_view_type:
177+
series_type = SeriesType(types.List(string_type),
178+
string_array_split_view_type)
176179
return series_type
177180

178181

@@ -781,6 +784,7 @@ def generic(self, args, kws):
781784
else:
782785
sig = GetItemStringArray.generic(self, (in_arr, in_idx), kws)
783786
elif in_arr == list_string_array_type:
787+
# TODO: split view
784788
# mimic array indexing for list
785789
if (isinstance(in_idx, types.Array) and in_idx.ndim == 1
786790
and isinstance(

0 commit comments

Comments
 (0)