4848from sdc .functions import numpy_like
4949from sdc .str_arr_type import string_array_type , StringArrayType
5050from sdc .datatypes .range_index_type import RangeIndexType
51- from sdc .datatypes .int64_index_type import Int64IndexType
5251from sdc .str_arr_ext import (num_total_chars , append_string_array_to ,
5352 str_arr_is_na , pre_alloc_string_array , str_arr_set_na , string_array_type ,
5453 cp_str_list_to_array , create_str_arr_from_list , get_utf8_size ,
55- str_arr_set_na_by_mask , str_arr_stable_argosort )
54+ str_arr_set_na_by_mask )
5655from sdc .utilities .prange_utils import parallel_chunks
5756from sdc .utilities .utils import sdc_overload , sdc_register_jitable
58- from sdc .utilities .sdc_typing_utils import (
59- find_common_dtype_from_numpy_dtypes ,
60- TypeChecker )
61- from sdc .utilities .sdc_typing_utils import sdc_pandas_index_types
57+ from sdc .utilities .sdc_typing_utils import (find_common_dtype_from_numpy_dtypes ,
58+ TypeChecker )
6259
6360
6461class SDCLimitation (Exception ):
@@ -74,20 +71,18 @@ def hpat_arrays_append(A, B):
7471def hpat_arrays_append_overload (A , B ):
7572 """Function for appending underlying arrays (A and B) or list/tuple of arrays B to an array A"""
7673
77- use_A_array = isinstance (A , ( RangeIndexType , Int64IndexType ) )
78- use_B_array = isinstance (B , ( RangeIndexType , Int64IndexType ) )
79- if isinstance (A , (types .Array , RangeIndexType , Int64IndexType )):
80- if isinstance (B , (types .Array , RangeIndexType , Int64IndexType )):
74+ A_is_range_index = isinstance (A , RangeIndexType )
75+ B_is_range_index = isinstance (B , RangeIndexType )
76+ if isinstance (A , (types .Array , RangeIndexType )):
77+ if isinstance (B , (types .Array , RangeIndexType )):
8178 def _append_single_numeric_impl (A , B ):
82- _A = A .values if use_A_array == True else A # noqa
83- _B = B .values if use_B_array == True else B # noqa
79+ _A = A .values if A_is_range_index == True else A # noqa
80+ _B = B .values if B_is_range_index == True else B # noqa
8481 return numpy .concatenate ((_A , _B ,))
8582
8683 return _append_single_numeric_impl
87-
88- elif (isinstance (B , (types .UniTuple , types .List ))
89- and isinstance (B .dtype , (types .Array , RangeIndexType , Int64IndexType ))):
90- B_dtype_is_index = isinstance (B .dtype , (RangeIndexType , Int64IndexType ))
84+ elif isinstance (B , (types .UniTuple , types .List )) and isinstance (B .dtype , (types .Array , RangeIndexType )):
85+ B_dtype_is_range_index = isinstance (B .dtype , RangeIndexType )
9186 numba_common_dtype = find_common_dtype_from_numpy_dtypes ([A .dtype , B .dtype .dtype ], [])
9287
9388 # TODO: refactor to use numpy.concatenate when Numba supports building a tuple at runtime
@@ -97,10 +92,10 @@ def _append_list_numeric_impl(A, B):
9792 new_data = numpy .empty (total_length , numba_common_dtype )
9893
9994 stop = len (A )
100- _A = numpy .array (A ) if use_A_array == True else A # noqa
95+ _A = numpy .array (A ) if A_is_range_index == True else A # noqa
10196 new_data [:stop ] = _A
10297 for arr in B :
103- _arr = arr . values if B_dtype_is_index == True else arr # noqa
98+ _arr = numpy . array ( arr ) if B_dtype_is_range_index == True else arr # noqa
10499 start = stop
105100 stop = start + len (_arr )
106101 new_data [start :stop ] = _arr
@@ -223,13 +218,12 @@ def sdc_join_series_indexes_overload(left, right):
223218 """Function for joining arrays left and right in a way similar to pandas.join 'outer' algorithm"""
224219
225220 # check that both operands are of types used for representing Pandas indexes
226- if not (isinstance (left , sdc_pandas_index_types ) and isinstance (right , sdc_pandas_index_types )
227- and not isinstance (left , types .NoneType )
228- and not isinstance (right , types .NoneType )):
221+ if not (isinstance (left , (types .Array , StringArrayType , RangeIndexType ))
222+ and isinstance (right , (types .Array , StringArrayType , RangeIndexType ))):
229223 return None
230224
231- convert_left = isinstance (left , ( RangeIndexType , Int64IndexType ) )
232- convert_right = isinstance (right , ( RangeIndexType , Int64IndexType ) )
225+ convert_left = isinstance (left , RangeIndexType )
226+ convert_right = isinstance (right , RangeIndexType )
233227
234228 def _convert_to_arrays_impl (left , right ):
235229 _left = left .values if convert_left == True else left # noqa
@@ -249,9 +243,10 @@ def sdc_join_range_indexes_impl(left, right):
249243
250244 return sdc_join_range_indexes_impl
251245
252- elif (isinstance (left , (RangeIndexType , Int64IndexType , types .Array ))
253- and isinstance (right , (RangeIndexType , Int64IndexType , types .Array ))
254- and not (isinstance (left , types .Array ) and isinstance (right , types .Array ))):
246+ elif isinstance (left , RangeIndexType ) and isinstance (right , types .Array ):
247+ return _convert_to_arrays_impl
248+
249+ elif isinstance (left , types .Array ) and isinstance (right , RangeIndexType ):
255250 return _convert_to_arrays_impl
256251
257252 # TODO: remove code duplication below and merge numeric and StringArray impls into one
@@ -518,39 +513,41 @@ def sdc_arrays_argsort(A, kind='quicksort'):
518513
519514
520515@sdc_overload (sdc_arrays_argsort , jit_options = {'parallel' : False })
521- def sdc_arrays_argsort_overload (A , kind = 'quicksort' , ascending = True ):
516+ def sdc_arrays_argsort_overload (A , kind = 'quicksort' ):
522517 """Function providing pandas argsort implementation for different 1D array types"""
523518
524519 # kind is not known at compile time, so get this function here and use in impl if needed
525520 quicksort_func = quicksort .make_jit_quicksort ().run_quicksort
526521
527522 kind_is_default = isinstance (kind , str )
528523 if isinstance (A , types .Array ):
529- def _sdc_arrays_argsort_array_impl (A , kind = 'quicksort' , ascending = True ):
524+ def _sdc_arrays_argsort_array_impl (A , kind = 'quicksort' ):
530525 _kind = 'quicksort' if kind_is_default == True else kind # noqa
531- return numpy_like .argsort (A , kind = _kind , ascending = ascending )
526+ return numpy_like .argsort (A , kind = _kind )
532527
533528 return _sdc_arrays_argsort_array_impl
534529
535530 elif A == string_array_type :
536- def _sdc_arrays_argsort_str_arr_impl (A , kind = 'quicksort' , ascending = True ):
531+ def _sdc_arrays_argsort_str_arr_impl (A , kind = 'quicksort' ):
537532
533+ nan_mask = sdc .hiframes .api .get_nan_mask (A )
534+ idx = numpy .arange (len (A ))
535+ old_nan_positions = idx [nan_mask ]
536+
537+ data = A [~ nan_mask ]
538+ keys = idx [~ nan_mask ]
538539 if kind == 'quicksort' :
539- indexes = numpy .arange (len (A ))
540- data_index_pairs = list (zip (list (A ), list (indexes )))
541- zipped = quicksort_func (data_index_pairs )
542- argsorted = [zipped [i ][1 ] for i in indexes ]
543- res = numpy .array (argsorted , dtype = numpy .int64 )
544- # for non-stable sort the order within groups does not matter
545- # so just reverse the result when sorting in descending order
546- if not ascending :
547- res = res [::- 1 ]
540+ zipped = list (zip (list (data ), list (keys )))
541+ zipped = quicksort_func (zipped )
542+ argsorted = [zipped [i ][1 ] for i in numpy .arange (len (data ))]
548543 elif kind == 'mergesort' :
549- res = str_arr_stable_argosort (A , ascending = ascending )
544+ sdc .hiframes .sort .local_sort ((data , ), (keys , ))
545+ argsorted = list (keys )
550546 else :
551547 raise ValueError ("Unrecognized kind of sort in sdc_arrays_argsort" )
552548
553- return res
549+ argsorted .extend (old_nan_positions )
550+ return numpy .asarray (argsorted , dtype = numpy .int32 )
554551
555552 return _sdc_arrays_argsort_str_arr_impl
556553
@@ -621,16 +618,13 @@ def _sdc_take(data, indexes):
621618@sdc_overload (_sdc_take )
622619def _sdc_take_overload (data , indexes ):
623620
624- valid_data_types = (types .Array ,) + sdc_pandas_index_types
625- if not (isinstance (data , valid_data_types ) and not isinstance (data , types .NoneType )):
621+ if not isinstance (data , (types .Array , StringArrayType , RangeIndexType )):
626622 return None
627-
628- if not (isinstance (indexes , (types .Array , types .List , Int64IndexType ))
623+ if not (isinstance (indexes , (types .Array , types .List ))
629624 and isinstance (indexes .dtype , (types .Integer , types .ListType ))):
630625 return None
631626
632- if (isinstance (indexes .dtype , types .ListType )
633- and isinstance (data , (types .Array , types .List , RangeIndexType , Int64IndexType ))):
627+ if isinstance (indexes .dtype , types .ListType ) and isinstance (data , (types .Array , types .List , RangeIndexType )):
634628 arr_dtype = data .dtype
635629
636630 def _sdc_take_list_impl (data , indexes ):
@@ -683,7 +677,7 @@ def _sdc_take_list_str_impl(data, indexes):
683677
684678 return _sdc_take_list_str_impl
685679
686- elif isinstance (data , (types .Array , RangeIndexType , Int64IndexType )):
680+ elif isinstance (data , (types .Array , RangeIndexType )):
687681 arr_dtype = data .dtype
688682
689683 def _sdc_take_array_impl (data , indexes ):
@@ -746,7 +740,6 @@ def sdc_reindex_series_overload(arr, index, name, by_index):
746740 """ Reindexes series data by new index following the logic of pandas.core.indexing.check_bool_indexer """
747741
748742 range_indexes = isinstance (index , RangeIndexType ) and isinstance (by_index , RangeIndexType )
749- int64_indexes = isinstance (index , Int64IndexType ) and isinstance (by_index , Int64IndexType )
750743 data_dtype , index_dtype = arr .dtype , index .dtype
751744 data_is_str_arr = isinstance (arr .dtype , types .UnicodeType )
752745
@@ -755,8 +748,6 @@ def sdc_reindex_series_impl(arr, index, name, by_index):
755748 # no reindexing is needed if indexes are equal
756749 if range_indexes == True : # noqa
757750 equal_indexes = numpy_like .array_equal (index , by_index )
758- elif int64_indexes == True : # noqa
759- equal_indexes = numpy_like .array_equal (index , by_index )
760751 else :
761752 equal_indexes = False
762753 if (index is by_index or equal_indexes ):
@@ -781,10 +772,10 @@ def sdc_reindex_series_impl(arr, index, name, by_index):
781772 map_index_to_position [value ] = i
782773
783774 index_mismatch = 0
784- for i in numba . prange ( len ( by_index )):
785- val = by_index [ i ]
786- if val in map_index_to_position :
787- pos_in_self = map_index_to_position [val ]
775+ # FIXME: TypingError in parfor step (wrong promotion to float64?) if prange is used
776+ for i in numpy . arange ( len ( by_index )):
777+ if by_index [ i ] in map_index_to_position :
778+ pos_in_self = map_index_to_position [by_index [ i ] ]
788779 _res_data [i ] = arr [pos_in_self ]
789780 if data_is_str_arr == True : # noqa
790781 res_data_nan_mask [i ] = isna (arr , i )
0 commit comments