Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit dde87ff

Browse files
author
Ehsan Totoni
committed
fix str arr dropna() kernel
1 parent ee7ef6e commit dde87ff

2 files changed

Lines changed: 28 additions & 4 deletions

File tree

hpat/distributed_analysis.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,18 @@ def _analyze_call(self, lhs, rhs, func_var, args, array_dists):
418418
if fdef == ('num_total_chars', 'hpat.str_arr_ext'):
419419
return
420420

421+
if fdef == ('_series_dropna_str_alloc_impl_inner', 'hpat.hiframes.series_kernels'):
422+
if lhs not in array_dists:
423+
array_dists[lhs] = Distribution.OneD_Var
424+
in_dist = array_dists[rhs.args[0].name]
425+
out_dist = array_dists[lhs]
426+
out_dist = Distribution(min(out_dist.value, in_dist.value))
427+
array_dists[lhs] = out_dist
428+
# output can cause input REP
429+
if out_dist != Distribution.OneD_Var:
430+
array_dists[rhs.args[0].name] = out_dist
431+
return
432+
421433
if (fdef == ('copy_non_null_offsets', 'hpat.str_arr_ext')
422434
or fdef == ('copy_data', 'hpat.str_arr_ext')):
423435
out_arrname = rhs.args[0].name

hpat/hiframes/series_kernels.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,19 +70,31 @@ def _series_dropna_float_impl(S, name): # pragma: no cover
7070

7171
return hpat.hiframes.api.init_series(A, None, name)
7272

73-
def _series_dropna_str_alloc_impl(B, name): # pragma: no cover
74-
# local_len to enable 1D_Var dist
73+
74+
# using njit since 1D_var is broken for alloc when there is calculation of len
75+
@numba.njit(no_cpython_wrapper=True)
76+
def _series_dropna_str_alloc_impl_inner(B): # pragma: no cover
7577
# TODO: test
7678
# TODO: generalize
77-
old_len = hpat.distributed_api.local_len(B)
79+
old_len = len(B)
80+
na_count = 0
81+
for i in range(len(B)):
82+
if hpat.str_arr_ext.str_arr_is_na(B, i):
83+
na_count += 1
7884
# TODO: more efficient null counting
79-
new_len = old_len - hpat.hiframes.api.init_series(B).isna().sum()
85+
new_len = old_len - na_count
8086
num_chars = hpat.str_arr_ext.num_total_chars(B)
8187
A = hpat.str_arr_ext.pre_alloc_string_array(new_len, num_chars)
8288
hpat.str_arr_ext.copy_non_null_offsets(A, B)
8389
hpat.str_arr_ext.copy_data(A, B)
90+
return A
91+
92+
93+
def _series_dropna_str_alloc_impl(B, name): # pragma: no cover
94+
A = hpat.hiframes.series_kernels._series_dropna_str_alloc_impl_inner(B)
8495
return hpat.hiframes.api.init_series(A, None, name)
8596

97+
8698
# return the nan value for the type (handle dt64)
8799
def _get_nan(val):
88100
return np.nan

0 commit comments

Comments
 (0)