4040
4141import sdc
4242import sdc .datatypes .common_functions as common_functions
43+ from sdc .datatypes .common_functions import TypeChecker
4344from sdc .datatypes .hpat_pandas_stringmethods_types import StringMethodsType
4445from sdc .hiframes .pd_series_ext import SeriesType
4546from sdc .str_arr_ext import (StringArrayType , cp_str_list_to_array , num_total_chars )
4647from sdc .utils import to_array
4748
48- class TypeChecker :
49- """
50- Validate object type and raise TypingError if the type is invalid, e.g.:
51- Method nsmallest(). The object n
52- given: bool
53- expected: int
54- """
55- msg_template = '{} The object {}\n given: {}\n expected: {}'
56-
57- def __init__ (self , func_name ):
58- """
59- Parameters
60- ----------
61- func_name: :obj:`str`
62- name of the function where types checking
63- """
64- self .func_name = func_name
65-
66- def raise_exc (self , data , expected_types , name = '' ):
67- """
68- Raise exception with unified message
69- Parameters
70- ----------
71- data: :obj:`any`
72- real type of the data
73- expected_types: :obj:`str`
74- expected types inserting directly to the exception
75- name: :obj:`str`
76- name of the parameter
77- """
78- msg = self .msg_template .format (self .func_name , name , data , expected_types )
79- raise TypingError (msg )
80-
81- def check (self , data , accepted_type , name = '' ):
82- """
83- Check data type belongs to specified type
84- Parameters
85- ----------
86- data: :obj:`any`
87- real type of the data
88- accepted_type: :obj:`type`
89- accepted type
90- name: :obj:`str`
91- name of the parameter
92- """
93- if not isinstance (data , accepted_type ):
94- self .raise_exc (data , accepted_type .__name__ , name = name )
95-
9649
9750@overload (operator .getitem )
9851def hpat_pandas_series_getitem (self , idx ):
@@ -1207,7 +1160,7 @@ def hpat_pandas_series_corr(self, other, method='pearson', min_periods=None):
12071160 if not isinstance (other .data .dtype , types .Number ):
12081161 ty_checker .raise_exc (other .data , 'number' , 'other.data' )
12091162
1210- if not isinstance (min_periods , (types .Integer , types .Omitted , types .NoneType )):
1163+ if not isinstance (min_periods , (int , types .Integer , types .Omitted , types .NoneType )) and min_periods is not None :
12111164 ty_checker .raise_exc (min_periods , 'int64' , 'min_periods' )
12121165
12131166 def hpat_pandas_series_corr_impl (self , other , method = 'pearson' , min_periods = None ):
@@ -1229,7 +1182,20 @@ def hpat_pandas_series_corr_impl(self, other, method='pearson', min_periods=None
12291182 if len (self_arr ) < min_periods :
12301183 return numpy .nan
12311184
1232- return numpy .corrcoef (self_arr , other_arr )[0 , 1 ]
1185+ new_self = pandas .Series (self_arr )
1186+ new_other = pandas .Series (other_arr )
1187+
1188+ n = new_self .count ()
1189+ ma = new_self .sum ()
1190+ mb = new_other .sum ()
1191+ a = n * (self_arr * other_arr ).sum () - ma * mb
1192+ b1 = n * (self_arr * self_arr ).sum () - ma * ma
1193+ b2 = n * (other_arr * other_arr ).sum () - mb * mb
1194+
1195+ if b1 == 0 or b2 == 0 :
1196+ return numpy .nan
1197+
1198+ return a / numpy .sqrt (b1 * b2 )
12331199
12341200 return hpat_pandas_series_corr_impl
12351201
@@ -2163,6 +2129,77 @@ def hpat_pandas_series_quantile_impl(self, q=0.5, interpolation='linear'):
21632129 return hpat_pandas_series_quantile_impl
21642130
21652131
2132+ @overload_method (SeriesType , 'rename' )
2133+ def hpat_pandas_series_rename (self , index = None , copy = True , inplace = False , level = None ):
2134+ """
2135+ Pandas Series method :meth:`pandas.Series.rename` implementation.
2136+ Alter Series index labels or name.
2137+ .. only:: developer
2138+ Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_rename
2139+
2140+ Parameters
2141+ -----------
2142+ index : :obj:`scalar` or `hashable sequence` or `dict` or `function`
2143+ Dict-like or functions are transformations to apply to the index.
2144+ Scalar or hashable sequence-like will alter the Series.name attribute.
2145+ Only scalar value is supported.
2146+ copy : :obj:`bool`, default :obj:`True`
2147+ Whether to copy underlying data.
2148+ inplace : :obj:`bool`, default :obj:`False`
2149+ Whether to return a new Series. If True then value of copy is ignored.
2150+ level : :obj:`int` or `str`
2151+ In case of a MultiIndex, only rename labels in the specified level.
2152+ *Not supported*
2153+ Returns
2154+ -------
2155+ :obj:`pandas.Series`
2156+ returns :obj:`pandas.Series` with index labels or name altered.
2157+ """
2158+
2159+ ty_checker = TypeChecker ('Method rename().' )
2160+ ty_checker .check (self , SeriesType )
2161+
2162+ if not isinstance (index , (types .Omitted , types .UnicodeType ,
2163+ types .StringLiteral , str ,
2164+ types .Integer , types .Boolean ,
2165+ types .Hashable , types .Float ,
2166+ types .NPDatetime , types .NPTimedelta ,
2167+ types .Number )) and index is not None :
2168+ ty_checker .raise_exc (index , 'string' , 'index' )
2169+
2170+ if not isinstance (copy , (types .Omitted , types .Boolean , bool )):
2171+ ty_checker .raise_exc (copy , 'boolean' , 'copy' )
2172+
2173+ if not isinstance (inplace , (types .Omitted , types .Boolean , bool )):
2174+ ty_checker .raise_exc (inplace , 'boolean' , 'inplace' )
2175+
2176+ if not isinstance (level , (types .Omitted , types .UnicodeType ,
2177+ types .StringLiteral , types .Integer )) and level is not None :
2178+ ty_checker .raise_exc (level , 'Integer or srting' , 'level' )
2179+
2180+ def hpat_pandas_series_rename_idx_impl (self , index = None , copy = True , inplace = False , level = None ):
2181+ if copy is True :
2182+ series_data = self ._data .copy ()
2183+ series_index = self ._index .copy ()
2184+ else :
2185+ series_data = self ._data
2186+ series_index = self ._index
2187+
2188+ return pandas .Series (data = series_data , index = series_index , name = index )
2189+
2190+ def hpat_pandas_series_rename_noidx_impl (self , index = None , copy = True , inplace = False , level = None ):
2191+ if copy is True :
2192+ series_data = self ._data .copy ()
2193+ else :
2194+ series_data = self ._data
2195+
2196+ return pandas .Series (data = series_data , index = self ._index , name = index )
2197+
2198+ if isinstance (self .index , types .NoneType ):
2199+ return hpat_pandas_series_rename_noidx_impl
2200+ return hpat_pandas_series_rename_idx_impl
2201+
2202+
21662203@overload_method (SeriesType , 'min' )
21672204def hpat_pandas_series_min (self , axis = None , skipna = True , level = None , numeric_only = None ):
21682205 """
@@ -2925,11 +2962,12 @@ def hpat_pandas_series_nunique_str_impl(self, dropna=True):
29252962 It is better to merge with Numeric branch
29262963 """
29272964
2928- str_set = set (self ._data )
2929- if dropna == False :
2930- return len (str_set ) - 1
2931- else :
2932- return len (str_set )
2965+ data = self ._data
2966+ if dropna :
2967+ nan_mask = self .isna ()
2968+ data = self ._data [~ nan_mask ._data ]
2969+ unique_values = set (data )
2970+ return len (unique_values )
29332971
29342972 return hpat_pandas_series_nunique_str_impl
29352973
@@ -2983,7 +3021,8 @@ def hpat_pandas_series_count(self, level=None):
29833021 if isinstance (self .data , StringArrayType ):
29843022 def hpat_pandas_series_count_str_impl (self , level = None ):
29853023
2986- return len (self ._data )
3024+ nan_mask = self .isna ()
3025+ return numpy .sum (nan_mask ._data == 0 )
29873026
29883027 return hpat_pandas_series_count_str_impl
29893028
@@ -3133,10 +3172,10 @@ def hpat_pandas_series_argsort_idx_impl(self, axis=0, kind='quicksort', order=No
31333172 sort_nona = numpy .argsort (self ._data [~ na_data_arr ])
31343173 q = 0
31353174 for id , i in enumerate (sort ):
3136- if id not in list (sort [len (self ._data ) - na :]):
3137- result [id ] = sort_nona [id - q ]
3138- else :
3175+ if id in set (sort [len (self ._data ) - na :]):
31393176 q += 1
3177+ else :
3178+ result [id ] = sort_nona [id - q ]
31403179 for i in sort [len (self ._data ) - na :]:
31413180 result [i ] = - 1
31423181
@@ -3160,10 +3199,10 @@ def hpat_pandas_series_argsort_noidx_impl(self, axis=0, kind='quicksort', order=
31603199 sort_nona = numpy .argsort (self ._data [~ na_data_arr ])
31613200 q = 0
31623201 for id , i in enumerate (sort ):
3163- if id not in list (sort [len (self ._data ) - na :]):
3164- result [id ] = sort_nona [id - q ]
3165- else :
3202+ if id in set (sort [len (self ._data ) - na :]):
31663203 q += 1
3204+ else :
3205+ result [id ] = sort_nona [id - q ]
31673206 for i in sort [len (self ._data ) - na :]:
31683207 result [i ] = - 1
31693208
@@ -3570,7 +3609,15 @@ def hpat_pandas_series_cov_impl(self, other, min_periods=None):
35703609 if len (self_arr ) < min_periods :
35713610 return numpy .nan
35723611
3573- return numpy .cov (self_arr , other_arr )[0 , 1 ]
3612+ new_self = pandas .Series (self_arr )
3613+
3614+ ma = new_self .mean ()
3615+ mb = other .mean ()
3616+
3617+ if numpy .isinf (mb ):
3618+ return numpy .nan
3619+
3620+ return ((self_arr - ma ) * (other_arr - mb )).sum () / (new_self .count () - 1.0 )
35743621
35753622 return hpat_pandas_series_cov_impl
35763623
0 commit comments