4040
4141import sdc
4242import sdc .datatypes .common_functions as common_functions
43- from sdc .datatypes .common_functions import TypeChecker
4443from sdc .datatypes .hpat_pandas_stringmethods_types import StringMethodsType
4544from sdc .hiframes .pd_series_ext import SeriesType
4645from sdc .str_arr_ext import (StringArrayType , cp_str_list_to_array , num_total_chars )
4746from sdc .utils import to_array
4847
48+ class TypeChecker :
49+ """
50+ Validate object type and raise TypingError if the type is invalid, e.g.:
51+ Method nsmallest(). The object n
52+ given: bool
53+ expected: int
54+ """
55+ msg_template = '{} The object {}\n given: {}\n expected: {}'
56+
57+ def __init__ (self , func_name ):
58+ """
59+ Parameters
60+ ----------
61+ func_name: :obj:`str`
62+ name of the function where types checking
63+ """
64+ self .func_name = func_name
65+
66+ def raise_exc (self , data , expected_types , name = '' ):
67+ """
68+ Raise exception with unified message
69+ Parameters
70+ ----------
71+ data: :obj:`any`
72+ real type of the data
73+ expected_types: :obj:`str`
74+ expected types inserting directly to the exception
75+ name: :obj:`str`
76+ name of the parameter
77+ """
78+ msg = self .msg_template .format (self .func_name , name , data , expected_types )
79+ raise TypingError (msg )
80+
81+ def check (self , data , accepted_type , name = '' ):
82+ """
83+ Check data type belongs to specified type
84+ Parameters
85+ ----------
86+ data: :obj:`any`
87+ real type of the data
88+ accepted_type: :obj:`type`
89+ accepted type
90+ name: :obj:`str`
91+ name of the parameter
92+ """
93+ if not isinstance (data , accepted_type ):
94+ self .raise_exc (data , accepted_type .__name__ , name = name )
95+
4996
5097@overload (operator .getitem )
5198def hpat_pandas_series_getitem (self , idx ):
5299 """
100+ Intel Scalable Dataframe Compiler User Guide
101+ ********************************************
102+ Pandas API: pandas.Series.get
103+
104+ Limitations
105+ -----------
106+ Supported ``key`` can be one of the following:
107+ - Integer scalar, e.g. :obj:`series[0]`
108+ - A slice, e.g. :obj:`series[2:5]`
109+ - Another series
110+
111+ Examples
112+ --------
113+ .. literalinclude:: ../../../examples/series_getitem.py
114+ :language: python
115+ :lines: 27-
116+ :caption: Getting Pandas Series elements
117+ :name: ex_series_getitem
118+
119+ .. code-block:: console
120+
121+ > python ./series_getitem.py
122+ 55
123+
124+ .. todo:: Fix SDC behavior and add the expected output of the > python ./series_getitem.py to the docstring
125+
126+ Intel Scalable Dataframe Compiler Developer Guide
127+ *************************************************
128+
53129 Pandas Series operator :attr:`pandas.Series.get` implementation
54130 **Algorithm**: result = series[idx]
55131
@@ -1131,7 +1207,7 @@ def hpat_pandas_series_corr(self, other, method='pearson', min_periods=None):
11311207 if not isinstance (other .data .dtype , types .Number ):
11321208 ty_checker .raise_exc (other .data , 'number' , 'other.data' )
11331209
1134- if not isinstance (min_periods , (int , types .Integer , types .Omitted , types .NoneType )) and min_periods is not None :
1210+ if not isinstance (min_periods , (types .Integer , types .Omitted , types .NoneType )):
11351211 ty_checker .raise_exc (min_periods , 'int64' , 'min_periods' )
11361212
11371213 def hpat_pandas_series_corr_impl (self , other , method = 'pearson' , min_periods = None ):
@@ -1153,20 +1229,7 @@ def hpat_pandas_series_corr_impl(self, other, method='pearson', min_periods=None
11531229 if len (self_arr ) < min_periods :
11541230 return numpy .nan
11551231
1156- new_self = pandas .Series (self_arr )
1157- new_other = pandas .Series (other_arr )
1158-
1159- n = new_self .count ()
1160- ma = new_self .sum ()
1161- mb = new_other .sum ()
1162- a = n * (self_arr * other_arr ).sum () - ma * mb
1163- b1 = n * (self_arr * self_arr ).sum () - ma * ma
1164- b2 = n * (other_arr * other_arr ).sum () - mb * mb
1165-
1166- if b1 == 0 or b2 == 0 :
1167- return numpy .nan
1168-
1169- return a / numpy .sqrt (b1 * b2 )
1232+ return numpy .corrcoef (self_arr , other_arr )[0 , 1 ]
11701233
11711234 return hpat_pandas_series_corr_impl
11721235
@@ -2100,77 +2163,6 @@ def hpat_pandas_series_quantile_impl(self, q=0.5, interpolation='linear'):
21002163 return hpat_pandas_series_quantile_impl
21012164
21022165
2103- @overload_method (SeriesType , 'rename' )
2104- def hpat_pandas_series_rename (self , index = None , copy = True , inplace = False , level = None ):
2105- """
2106- Pandas Series method :meth:`pandas.Series.rename` implementation.
2107- Alter Series index labels or name.
2108- .. only:: developer
2109- Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_rename
2110-
2111- Parameters
2112- -----------
2113- index : :obj:`scalar` or `hashable sequence` or `dict` or `function`
2114- Dict-like or functions are transformations to apply to the index.
2115- Scalar or hashable sequence-like will alter the Series.name attribute.
2116- Only scalar value is supported.
2117- copy : :obj:`bool`, default :obj:`True`
2118- Whether to copy underlying data.
2119- inplace : :obj:`bool`, default :obj:`False`
2120- Whether to return a new Series. If True then value of copy is ignored.
2121- level : :obj:`int` or `str`
2122- In case of a MultiIndex, only rename labels in the specified level.
2123- *Not supported*
2124- Returns
2125- -------
2126- :obj:`pandas.Series`
2127- returns :obj:`pandas.Series` with index labels or name altered.
2128- """
2129-
2130- ty_checker = TypeChecker ('Method rename().' )
2131- ty_checker .check (self , SeriesType )
2132-
2133- if not isinstance (index , (types .Omitted , types .UnicodeType ,
2134- types .StringLiteral , str ,
2135- types .Integer , types .Boolean ,
2136- types .Hashable , types .Float ,
2137- types .NPDatetime , types .NPTimedelta ,
2138- types .Number )) and index is not None :
2139- ty_checker .raise_exc (index , 'string' , 'index' )
2140-
2141- if not isinstance (copy , (types .Omitted , types .Boolean , bool )):
2142- ty_checker .raise_exc (copy , 'boolean' , 'copy' )
2143-
2144- if not isinstance (inplace , (types .Omitted , types .Boolean , bool )):
2145- ty_checker .raise_exc (inplace , 'boolean' , 'inplace' )
2146-
2147- if not isinstance (level , (types .Omitted , types .UnicodeType ,
2148- types .StringLiteral , types .Integer )) and level is not None :
2149- ty_checker .raise_exc (level , 'Integer or srting' , 'level' )
2150-
2151- def hpat_pandas_series_rename_idx_impl (self , index = None , copy = True , inplace = False , level = None ):
2152- if copy is True :
2153- series_data = self ._data .copy ()
2154- series_index = self ._index .copy ()
2155- else :
2156- series_data = self ._data
2157- series_index = self ._index
2158-
2159- return pandas .Series (data = series_data , index = series_index , name = index )
2160-
2161- def hpat_pandas_series_rename_noidx_impl (self , index = None , copy = True , inplace = False , level = None ):
2162- if copy is True :
2163- series_data = self ._data .copy ()
2164- else :
2165- series_data = self ._data
2166-
2167- return pandas .Series (data = series_data , index = self ._index , name = index )
2168-
2169- if isinstance (self .index , types .NoneType ):
2170- return hpat_pandas_series_rename_noidx_impl
2171- return hpat_pandas_series_rename_idx_impl
2172-
2173-
21742166@overload_method (SeriesType , 'min' )
21752167def hpat_pandas_series_min (self , axis = None , skipna = True , level = None , numeric_only = None ):
21762168 """
@@ -2933,12 +2925,11 @@ def hpat_pandas_series_nunique_str_impl(self, dropna=True):
29332925 It is better to merge with Numeric branch
29342926 """
29352927
2936- data = self ._data
2937- if dropna :
2938- nan_mask = self .isna ()
2939- data = self ._data [~ nan_mask ._data ]
2940- unique_values = set (data )
2941- return len (unique_values )
2928+ str_set = set (self ._data )
2929+ if dropna == False :
2930+ return len (str_set ) - 1
2931+ else :
2932+ return len (str_set )
29422933
29432934 return hpat_pandas_series_nunique_str_impl
29442935
@@ -2992,8 +2983,7 @@ def hpat_pandas_series_count(self, level=None):
29922983 if isinstance (self .data , StringArrayType ):
29932984 def hpat_pandas_series_count_str_impl (self , level = None ):
29942985
2995- nan_mask = self .isna ()
2996- return numpy .sum (nan_mask ._data == 0 )
2986+ return len (self ._data )
29972987
29982988 return hpat_pandas_series_count_str_impl
29992989
@@ -3143,10 +3133,10 @@ def hpat_pandas_series_argsort_idx_impl(self, axis=0, kind='quicksort', order=No
31433133 sort_nona = numpy .argsort (self ._data [~ na_data_arr ])
31443134 q = 0
31453135 for id , i in enumerate (sort ):
3146- if id in set (sort [len (self ._data ) - na :]):
3147- q += 1
3136+ if id not in list (sort [len (self ._data ) - na :]):
3137+ result [ id ] = sort_nona [ id - q ]
31483138 else :
3149- result [ id ] = sort_nona [ id - q ]
3139+ q += 1
31503140 for i in sort [len (self ._data ) - na :]:
31513141 result [i ] = - 1
31523142
@@ -3170,10 +3160,10 @@ def hpat_pandas_series_argsort_noidx_impl(self, axis=0, kind='quicksort', order=
31703160 sort_nona = numpy .argsort (self ._data [~ na_data_arr ])
31713161 q = 0
31723162 for id , i in enumerate (sort ):
3173- if id in set (sort [len (self ._data ) - na :]):
3174- q += 1
3175- else :
3163+ if id not in list (sort [len (self ._data ) - na :]):
31763164 result [id ] = sort_nona [id - q ]
3165+ else :
3166+ q += 1
31773167 for i in sort [len (self ._data ) - na :]:
31783168 result [i ] = - 1
31793169
@@ -3580,15 +3570,7 @@ def hpat_pandas_series_cov_impl(self, other, min_periods=None):
35803570 if len (self_arr ) < min_periods :
35813571 return numpy .nan
35823572
3583- new_self = pandas .Series (self_arr )
3584-
3585- ma = new_self .mean ()
3586- mb = other .mean ()
3587-
3588- if numpy .isinf (mb ):
3589- return numpy .nan
3590-
3591- return ((self_arr - ma ) * (other_arr - mb )).sum () / (new_self .count () - 1.0 )
3573+ return numpy .cov (self_arr , other_arr )[0 , 1 ]
35923574
35933575 return hpat_pandas_series_cov_impl
35943576
0 commit comments