7272from sdc .hiframes .api import isna
7373from sdc .datatypes .hpat_pandas_groupby_functions import init_series_groupby
7474from sdc .utilities .prange_utils import parallel_chunks
75+ from sdc .set_ext import build_set
7576
7677from .pandas_series_functions import apply
7778from .pandas_series_functions import map as _map
@@ -652,7 +653,7 @@ def sdc_pandas_series_setitem_idx_bool_array_align_impl(self, idx, value):
652653 # and filtered indexes are looked in value.index, and if found corresponding value is set
653654 if value_is_series == True : # noqa
654655 value_index , self_index = value .index , self .index
655- unique_value_indices , unique_self_indices = set (value_index ), set (self_index )
656+ unique_value_indices , unique_self_indices = build_set (value_index ), build_set (self_index )
656657
657658 # pandas behaves differently if value.index has duplicates and if it has no
658659 # in case of duplicates in value.index assignment is made via positions
@@ -702,7 +703,7 @@ def sdc_pandas_series_setitem_idx_bool_series_align_impl(self, idx, value):
702703 # and filtered indexes are either looked in value.index (if value is a Series)
703704 # or in self.index (if value is scalar or array)
704705 filtered_idx_indices = idx_index [idx ._data ]
705- filtered_idx_indices_set = set (filtered_idx_indices )
706+ filtered_idx_indices_set = build_set (filtered_idx_indices )
706707 if value_is_series == True : # noqa
707708
708709 if len (filtered_idx_indices_set ) != len (filtered_idx_indices ):
@@ -2074,7 +2075,7 @@ def hpat_pandas_series_isin_impl(self, values):
20742075 # return pandas.Series (np.isin (self._data, values))
20752076
20762077 values = str_list_to_array (list (values ))
2077- values = set (values )
2078+ values = build_set (values )
20782079 data_len = len (self ._data )
20792080 result = numpy .empty (data_len , dtype = numpy .bool_ )
20802081 for i in prange (data_len ):
@@ -2086,7 +2087,7 @@ def hpat_pandas_series_isin_impl(self, values):
20862087 # TODO: replace with below line when Numba supports np.isin in nopython mode
20872088 # return pandas.Series (np.isin (self._data, values))
20882089
2089- values = set (values )
2090+ values = build_set (values )
20902091 data_len = len (self ._data )
20912092 result = numpy .empty (data_len , dtype = numpy .bool_ )
20922093 for i in prange (data_len ):
@@ -3447,7 +3448,7 @@ def hpat_pandas_series_unique_str_impl(self):
34473448 Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_unique_str
34483449 '''
34493450
3450- str_set = set (self ._data )
3451+ str_set = build_set (self ._data )
34513452 return to_array (str_set )
34523453
34533454 return hpat_pandas_series_unique_str_impl
@@ -3579,7 +3580,7 @@ def hpat_pandas_series_nunique_str_impl(self, dropna=True):
35793580 if dropna :
35803581 nan_mask = self .isna ()
35813582 data = self ._data [~ nan_mask ._data ]
3582- unique_values = set (data )
3583+ unique_values = build_set (data )
35833584 return len (unique_values )
35843585
35853586 return hpat_pandas_series_nunique_str_impl
@@ -3592,7 +3593,7 @@ def hpat_pandas_series_nunique_impl(self, dropna=True):
35923593 data_mask_for_nan = numpy .isnan (self ._data )
35933594 nan_exists = numpy .any (data_mask_for_nan )
35943595 data_no_nan = self ._data [~ data_mask_for_nan ]
3595- data_set = set (data_no_nan )
3596+ data_set = build_set (data_no_nan )
35963597 if dropna or not nan_exists :
35973598 return len (data_set )
35983599 else :
0 commit comments