Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit 179b0f0

Browse files
author
Ehsan Totoni
committed
fix split view in box df, isna, distributed, etc
1 parent 1b46def commit 179b0f0

7 files changed

Lines changed: 23 additions & 4 deletions

File tree

hpat/distributed.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -584,6 +584,13 @@ def f(arr, bag, start, count): # pragma: no cover
584584
self._array_counts[lhs] = self._array_counts[in_arr]
585585
self._array_sizes[lhs] = self._array_sizes[in_arr]
586586

587+
if (fdef == ('compute_split_view', 'hpat.hiframes.split_impl')
588+
and self._is_1D_arr(rhs.args[0].name)):
589+
in_arr = rhs.args[0].name
590+
self._array_starts[lhs] = self._array_starts[in_arr]
591+
self._array_counts[lhs] = self._array_counts[in_arr]
592+
self._array_sizes[lhs] = self._array_sizes[in_arr]
593+
587594
if fdef == ('isna', 'hpat.hiframes.api') and self._is_1D_arr(rhs.args[0].name):
588595
# fix index in call to isna
589596
arr = rhs.args[0]

hpat/distributed_analysis.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,10 @@ def _analyze_call(self, lhs, rhs, func_var, args, array_dists):
379379
self._meet_array_dists(lhs, rhs.args[0].name, array_dists)
380380
return
381381

382+
if fdef == ('compute_split_view', 'hpat.hiframes.split_impl'):
383+
self._meet_array_dists(lhs, rhs.args[0].name, array_dists)
384+
return
385+
382386
# np.fromfile()
383387
if fdef == ('file_read', 'hpat.io.np_io'):
384388
return

hpat/hiframes/api.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
update_shuffle_meta, alloc_pre_shuffle_metadata,
3737
)
3838
from hpat.hiframes.join import write_send_buff
39+
from hpat.hiframes.split_impl import string_array_split_view_type
3940

4041
# XXX: used in agg func output to avoid mutating filter, agg, join, etc.
4142
# TODO: fix type inferrer and remove this
@@ -446,6 +447,8 @@ def isna_overload(arr, i):
446447
# TODO: support NaN in list(list(str))
447448
if arr == list_string_array_type:
448449
return lambda arr, i: False
450+
if arr == string_array_split_view_type:
451+
return lambda arr, i: False
449452
# TODO: extend to other types
450453
assert isinstance(arr, types.Array)
451454
dtype = arr.dtype

hpat/hiframes/boxing.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,8 @@ def box_dataframe(typ, val, c):
189189
elif isinstance(dtype, PDCategoricalDtype):
190190
arr_obj = box_categorical_array(arr_typ, arr, c)
191191
# context.nrt.incref(builder, arr_typ, arr)
192+
elif arr_typ == string_array_split_view_type:
193+
arr_obj = box_str_arr_split_view(arr_typ, arr, c)
192194
elif dtype == types.List(string_type):
193195
arr_obj = box_list(list_string_array_type, arr, c)
194196
# context.nrt.incref(builder, arr_typ, arr) # TODO required?

hpat/hiframes/hiframes_typed.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@
3636
from hpat.hiframes.aggregate import Aggregate
3737
from hpat.hiframes import series_kernels, split_impl
3838
from hpat.hiframes.series_kernels import series_replace_funcs
39-
from hpat.hiframes.split_impl import string_array_split_view_type
39+
from hpat.hiframes.split_impl import (string_array_split_view_type,
40+
StringArraySplitViewType)
4041

4142

4243
_dt_index_binops = ('==', '!=', '>=', '>', '<=', '<', '-',
@@ -2399,7 +2400,7 @@ def _get_arg(self, f_name, args, kws, arg_no, arg_name, default=None,
23992400
def _fix_typ_undefs(new_typ, old_typ):
24002401
if isinstance(old_typ, (types.Array, SeriesType)):
24012402
assert isinstance(new_typ, (types.Array, SeriesType, StringArrayType,
2402-
types.List))
2403+
types.List, StringArraySplitViewType))
24032404
if new_typ.dtype == types.undefined:
24042405
return new_typ.copy(old_typ.dtype)
24052406
if isinstance(old_typ, (types.Tuple, types.UniTuple)):

hpat/hiframes/pd_series_ext.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,8 @@ def __init__(self, dmm, fe_type):
157157

158158

159159
def series_to_array_type(typ, replace_boxed=False):
160-
return _get_series_array_type(typ.dtype)
160+
return typ.data
161+
# return _get_series_array_type(typ.dtype)
161162

162163

163164
def is_series_type(typ):

hpat/utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,8 @@ def get_slice_step(typemap, func_ir, var):
289289
def is_array(typemap, varname):
290290
return (varname in typemap
291291
and (is_np_array(typemap, varname)
292-
or typemap[varname] in (string_array_type, list_string_array_type)
292+
or typemap[varname] in (string_array_type, list_string_array_type,
293+
hpat.hiframes.split_impl.string_array_split_view_type)
293294
or isinstance(typemap[varname], hpat.hiframes.pd_series_ext.SeriesType)))
294295

295296
def is_np_array(typemap, varname):

0 commit comments

Comments
 (0)