4848from sdc .functions import numpy_like
4949from sdc .str_arr_type import string_array_type , StringArrayType
5050from sdc .datatypes .range_index_type import RangeIndexType
51+ from sdc .datatypes .int64_index_type import Int64IndexType
5152from sdc .str_arr_ext import (num_total_chars , append_string_array_to ,
5253 str_arr_is_na , pre_alloc_string_array , str_arr_set_na , string_array_type ,
5354 cp_str_list_to_array , create_str_arr_from_list , get_utf8_size ,
54- str_arr_set_na_by_mask )
55+ str_arr_set_na_by_mask , str_arr_stable_argosort )
5556from sdc .utilities .prange_utils import parallel_chunks
5657from sdc .utilities .utils import sdc_overload , sdc_register_jitable
57- from sdc .utilities .sdc_typing_utils import (find_common_dtype_from_numpy_dtypes ,
58- TypeChecker )
58+ from sdc .utilities .sdc_typing_utils import (
59+ find_common_dtype_from_numpy_dtypes ,
60+ TypeChecker )
61+ from sdc .utilities .sdc_typing_utils import sdc_pandas_index_types
5962
6063
6164class SDCLimitation (Exception ):
@@ -71,18 +74,20 @@ def hpat_arrays_append(A, B):
7174def hpat_arrays_append_overload (A , B ):
7275 """Function for appending underlying arrays (A and B) or list/tuple of arrays B to an array A"""
7376
74- A_is_range_index = isinstance (A , RangeIndexType )
75- B_is_range_index = isinstance (B , RangeIndexType )
76- if isinstance (A , (types .Array , RangeIndexType )):
77- if isinstance (B , (types .Array , RangeIndexType )):
77+ use_A_array = isinstance (A , ( RangeIndexType , Int64IndexType ) )
78+ use_B_array = isinstance (B , ( RangeIndexType , Int64IndexType ) )
79+ if isinstance (A , (types .Array , RangeIndexType , Int64IndexType )):
80+ if isinstance (B , (types .Array , RangeIndexType , Int64IndexType )):
7881 def _append_single_numeric_impl (A , B ):
79- _A = A .values if A_is_range_index == True else A # noqa
80- _B = B .values if B_is_range_index == True else B # noqa
82+ _A = A .values if use_A_array == True else A # noqa
83+ _B = B .values if use_B_array == True else B # noqa
8184 return numpy .concatenate ((_A , _B ,))
8285
8386 return _append_single_numeric_impl
84- elif isinstance (B , (types .UniTuple , types .List )) and isinstance (B .dtype , (types .Array , RangeIndexType )):
85- B_dtype_is_range_index = isinstance (B .dtype , RangeIndexType )
87+
88+ elif (isinstance (B , (types .UniTuple , types .List ))
89+ and isinstance (B .dtype , (types .Array , RangeIndexType , Int64IndexType ))):
90+ B_dtype_is_index = isinstance (B .dtype , (RangeIndexType , Int64IndexType ))
8691 numba_common_dtype = find_common_dtype_from_numpy_dtypes ([A .dtype , B .dtype .dtype ], [])
8792
8893 # TODO: refactor to use numpy.concatenate when Numba supports building a tuple at runtime
@@ -92,10 +97,10 @@ def _append_list_numeric_impl(A, B):
9297 new_data = numpy .empty (total_length , numba_common_dtype )
9398
9499 stop = len (A )
95- _A = numpy .array (A ) if A_is_range_index == True else A # noqa
100+ _A = numpy .array (A ) if use_A_array == True else A # noqa
96101 new_data [:stop ] = _A
97102 for arr in B :
98- _arr = numpy . array ( arr ) if B_dtype_is_range_index == True else arr # noqa
103+ _arr = arr . values if B_dtype_is_index == True else arr # noqa
99104 start = stop
100105 stop = start + len (_arr )
101106 new_data [start :stop ] = _arr
@@ -218,12 +223,13 @@ def sdc_join_series_indexes_overload(left, right):
218223 """Function for joining arrays left and right in a way similar to pandas.join 'outer' algorithm"""
219224
220225 # check that both operands are of types used for representing Pandas indexes
221- if not (isinstance (left , (types .Array , StringArrayType , RangeIndexType ))
222- and isinstance (right , (types .Array , StringArrayType , RangeIndexType ))):
226+ if not (isinstance (left , sdc_pandas_index_types ) and isinstance (right , sdc_pandas_index_types )
227+ and not isinstance (left , types .NoneType )
228+ and not isinstance (right , types .NoneType )):
223229 return None
224230
225- convert_left = isinstance (left , RangeIndexType )
226- convert_right = isinstance (right , RangeIndexType )
231+ convert_left = isinstance (left , ( RangeIndexType , Int64IndexType ) )
232+ convert_right = isinstance (right , ( RangeIndexType , Int64IndexType ) )
227233
228234 def _convert_to_arrays_impl (left , right ):
229235 _left = left .values if convert_left == True else left # noqa
@@ -243,10 +249,9 @@ def sdc_join_range_indexes_impl(left, right):
243249
244250 return sdc_join_range_indexes_impl
245251
246- elif isinstance (left , RangeIndexType ) and isinstance (right , types .Array ):
247- return _convert_to_arrays_impl
248-
249- elif isinstance (left , types .Array ) and isinstance (right , RangeIndexType ):
252+ elif (isinstance (left , (RangeIndexType , Int64IndexType , types .Array ))
253+ and isinstance (right , (RangeIndexType , Int64IndexType , types .Array ))
254+ and not (isinstance (left , types .Array ) and isinstance (right , types .Array ))):
250255 return _convert_to_arrays_impl
251256
252257 # TODO: remove code duplication below and merge numeric and StringArray impls into one
@@ -513,41 +518,39 @@ def sdc_arrays_argsort(A, kind='quicksort'):
513518
514519
515520@sdc_overload (sdc_arrays_argsort , jit_options = {'parallel' : False })
516- def sdc_arrays_argsort_overload (A , kind = 'quicksort' ):
521+ def sdc_arrays_argsort_overload (A , kind = 'quicksort' , ascending = True ):
517522 """Function providing pandas argsort implementation for different 1D array types"""
518523
519524 # kind is not known at compile time, so get this function here and use in impl if needed
520525 quicksort_func = quicksort .make_jit_quicksort ().run_quicksort
521526
522527 kind_is_default = isinstance (kind , str )
523528 if isinstance (A , types .Array ):
524- def _sdc_arrays_argsort_array_impl (A , kind = 'quicksort' ):
529+ def _sdc_arrays_argsort_array_impl (A , kind = 'quicksort' , ascending = True ):
525530 _kind = 'quicksort' if kind_is_default == True else kind # noqa
526- return numpy_like .argsort (A , kind = _kind )
531+ return numpy_like .argsort (A , kind = _kind , ascending = ascending )
527532
528533 return _sdc_arrays_argsort_array_impl
529534
530535 elif A == string_array_type :
531- def _sdc_arrays_argsort_str_arr_impl (A , kind = 'quicksort' ):
536+ def _sdc_arrays_argsort_str_arr_impl (A , kind = 'quicksort' , ascending = True ):
532537
533- nan_mask = sdc .hiframes .api .get_nan_mask (A )
534- idx = numpy .arange (len (A ))
535- old_nan_positions = idx [nan_mask ]
536-
537- data = A [~ nan_mask ]
538- keys = idx [~ nan_mask ]
539538 if kind == 'quicksort' :
540- zipped = list (zip (list (data ), list (keys )))
541- zipped = quicksort_func (zipped )
542- argsorted = [zipped [i ][1 ] for i in numpy .arange (len (data ))]
539+ indexes = numpy .arange (len (A ))
540+ data_index_pairs = list (zip (list (A ), list (indexes )))
541+ zipped = quicksort_func (data_index_pairs )
542+ argsorted = [zipped [i ][1 ] for i in indexes ]
543+ res = numpy .array (argsorted , dtype = numpy .int64 )
544+ # for non-stable sort the order within groups does not matter
545+ # so just reverse the result when sorting in descending order
546+ if not ascending :
547+ res = res [::- 1 ]
543548 elif kind == 'mergesort' :
544- sdc .hiframes .sort .local_sort ((data , ), (keys , ))
545- argsorted = list (keys )
549+ res = str_arr_stable_argosort (A , ascending = ascending )
546550 else :
547551 raise ValueError ("Unrecognized kind of sort in sdc_arrays_argsort" )
548552
549- argsorted .extend (old_nan_positions )
550- return numpy .asarray (argsorted , dtype = numpy .int32 )
553+ return res
551554
552555 return _sdc_arrays_argsort_str_arr_impl
553556
@@ -618,13 +621,16 @@ def _sdc_take(data, indexes):
618621@sdc_overload (_sdc_take )
619622def _sdc_take_overload (data , indexes ):
620623
621- if not isinstance (data , (types .Array , StringArrayType , RangeIndexType )):
624+ valid_data_types = (types .Array ,) + sdc_pandas_index_types
625+ if not (isinstance (data , valid_data_types ) and not isinstance (data , types .NoneType )):
622626 return None
623- if not (isinstance (indexes , (types .Array , types .List ))
627+
628+ if not (isinstance (indexes , (types .Array , types .List , Int64IndexType ))
624629 and isinstance (indexes .dtype , (types .Integer , types .ListType ))):
625630 return None
626631
627- if isinstance (indexes .dtype , types .ListType ) and isinstance (data , (types .Array , types .List , RangeIndexType )):
632+ if (isinstance (indexes .dtype , types .ListType )
633+ and isinstance (data , (types .Array , types .List , RangeIndexType , Int64IndexType ))):
628634 arr_dtype = data .dtype
629635
630636 def _sdc_take_list_impl (data , indexes ):
@@ -677,7 +683,7 @@ def _sdc_take_list_str_impl(data, indexes):
677683
678684 return _sdc_take_list_str_impl
679685
680- elif isinstance (data , (types .Array , RangeIndexType )):
686+ elif isinstance (data , (types .Array , RangeIndexType , Int64IndexType )):
681687 arr_dtype = data .dtype
682688
683689 def _sdc_take_array_impl (data , indexes ):
@@ -740,6 +746,7 @@ def sdc_reindex_series_overload(arr, index, name, by_index):
740746 """ Reindexes series data by new index following the logic of pandas.core.indexing.check_bool_indexer """
741747
742748 range_indexes = isinstance (index , RangeIndexType ) and isinstance (by_index , RangeIndexType )
749+ int64_indexes = isinstance (index , Int64IndexType ) and isinstance (by_index , Int64IndexType )
743750 data_dtype , index_dtype = arr .dtype , index .dtype
744751 data_is_str_arr = isinstance (arr .dtype , types .UnicodeType )
745752
@@ -748,6 +755,8 @@ def sdc_reindex_series_impl(arr, index, name, by_index):
748755 # no reindexing is needed if indexes are equal
749756 if range_indexes == True : # noqa
750757 equal_indexes = numpy_like .array_equal (index , by_index )
758+ elif int64_indexes == True : # noqa
759+ equal_indexes = numpy_like .array_equal (index , by_index )
751760 else :
752761 equal_indexes = False
753762 if (index is by_index or equal_indexes ):
@@ -772,10 +781,10 @@ def sdc_reindex_series_impl(arr, index, name, by_index):
772781 map_index_to_position [value ] = i
773782
774783 index_mismatch = 0
775- # FIXME: TypingError in parfor step (wrong promotion to float64?) if prange is used
776- for i in numpy . arange ( len ( by_index )):
777- if by_index [ i ] in map_index_to_position :
778- pos_in_self = map_index_to_position [by_index [ i ] ]
784+ for i in numba . prange ( len ( by_index )):
785+ val = by_index [ i ]
786+ if val in map_index_to_position :
787+ pos_in_self = map_index_to_position [val ]
779788 _res_data [i ] = arr [pos_in_self ]
780789 if data_is_str_arr == True : # noqa
781790 res_data_nan_mask [i ] = isna (arr , i )
0 commit comments