Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit a6e7a09

Browse files
Merge branch 'master' into benchamrk_ci_integration
2 parents eee8fbb + 7e8af7d commit a6e7a09

23 files changed

Lines changed: 699 additions & 558 deletions

sdc/datatypes/hpat_pandas_dataframe_pass.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ def run_pass(self, state):
6565
out_nodes = [inst]
6666

6767
if isinstance(inst, ir.Assign):
68-
self.state.func_ir._definitions[inst.target.name].remove(inst.value)
68+
if inst.value in self.state.func_ir._definitions[inst.target.name]:
69+
self.state.func_ir._definitions[inst.target.name].remove(inst.value)
6970
out_nodes = self._run_assign(inst)
7071

7172
if isinstance(out_nodes, list):

sdc/datatypes/hpat_pandas_series_functions.py

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1178,7 +1178,7 @@ def hpat_pandas_series_corr(self, other, method='pearson', min_periods=None):
11781178
if not isinstance(other.data.dtype, types.Number):
11791179
ty_checker.raise_exc(other.data, 'number', 'other.data')
11801180

1181-
if not isinstance(min_periods, (types.Integer, types.Omitted, types.NoneType)):
1181+
if not isinstance(min_periods, (int, types.Integer, types.Omitted, types.NoneType)) and min_periods is not None:
11821182
ty_checker.raise_exc(min_periods, 'int64', 'min_periods')
11831183

11841184
def hpat_pandas_series_corr_impl(self, other, method='pearson', min_periods=None):
@@ -1200,7 +1200,20 @@ def hpat_pandas_series_corr_impl(self, other, method='pearson', min_periods=None
12001200
if len(self_arr) < min_periods:
12011201
return numpy.nan
12021202

1203-
return numpy.corrcoef(self_arr, other_arr)[0, 1]
1203+
new_self = pandas.Series(self_arr)
1204+
new_other = pandas.Series(other_arr)
1205+
1206+
n = new_self.count()
1207+
ma = new_self.sum()
1208+
mb = new_other.sum()
1209+
a = n * (self_arr * other_arr).sum() - ma * mb
1210+
b1 = n * (self_arr * self_arr).sum() - ma * ma
1211+
b2 = n * (other_arr * other_arr).sum() - mb * mb
1212+
1213+
if b1 == 0 or b2 == 0:
1214+
return numpy.nan
1215+
1216+
return a / numpy.sqrt(b1 * b2)
12041217

12051218
return hpat_pandas_series_corr_impl
12061219

@@ -3106,10 +3119,10 @@ def hpat_pandas_series_argsort_idx_impl(self, axis=0, kind='quicksort', order=No
31063119
sort_nona = numpy.argsort(self._data[~na_data_arr])
31073120
q = 0
31083121
for id, i in enumerate(sort):
3109-
if id not in list(sort[len(self._data) - na:]):
3110-
result[id] = sort_nona[id-q]
3111-
else:
3122+
if id in set(sort[len(self._data) - na:]):
31123123
q += 1
3124+
else:
3125+
result[id] = sort_nona[id - q]
31133126
for i in sort[len(self._data) - na:]:
31143127
result[i] = -1
31153128

@@ -3133,10 +3146,10 @@ def hpat_pandas_series_argsort_noidx_impl(self, axis=0, kind='quicksort', order=
31333146
sort_nona = numpy.argsort(self._data[~na_data_arr])
31343147
q = 0
31353148
for id, i in enumerate(sort):
3136-
if id not in list(sort[len(self._data) - na:]):
3137-
result[id] = sort_nona[id - q]
3138-
else:
3149+
if id in set(sort[len(self._data) - na:]):
31393150
q += 1
3151+
else:
3152+
result[id] = sort_nona[id - q]
31403153
for i in sort[len(self._data) - na:]:
31413154
result[i] = -1
31423155

@@ -3543,7 +3556,15 @@ def hpat_pandas_series_cov_impl(self, other, min_periods=None):
35433556
if len(self_arr) < min_periods:
35443557
return numpy.nan
35453558

3546-
return numpy.cov(self_arr, other_arr)[0, 1]
3559+
new_self = pandas.Series(self_arr)
3560+
3561+
ma = new_self.mean()
3562+
mb = other.mean()
3563+
3564+
if numpy.isinf(mb):
3565+
return numpy.nan
3566+
3567+
return ((self_arr - ma) * (other_arr - mb)).sum() / (new_self.count() - 1.0)
35473568

35483569
return hpat_pandas_series_cov_impl
35493570

sdc/datatypes/hpat_pandas_stringmethods_functions.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ def hpat_pandas_stringmethods_upper_impl(self):
8383

8484
import numba
8585
from numba.extending import overload_method
86+
from numba.errors import TypingError
8687

8788
from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType
8889

@@ -186,6 +187,42 @@ def hpat_pandas_stringmethods_{methodname}_impl(self{methodparams}):
186187
"""
187188

188189

190+
@overload_method(StringMethodsType, 'len')
191+
def hpat_pandas_stringmethods_len(self):
192+
"""
193+
Pandas Series method :meth:`pandas.core.strings.StringMethods.len()` implementation.
194+
195+
Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements.
196+
197+
.. only:: developer
198+
199+
Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_str_len1
200+
201+
Parameters
202+
----------
203+
self: :class:`pandas.core.strings.StringMethods`
204+
input arg
205+
206+
Returns
207+
-------
208+
:obj:`pandas.Series`
209+
returns :obj:`pandas.Series` object
210+
"""
211+
212+
if not isinstance(self, StringMethodsType):
213+
raise TypingError('Method len(). The object must be a pandas.core.strings. Given: {}'.format(self))
214+
215+
def hpat_pandas_stringmethods_len_impl(self):
216+
item_count = len(self._data)
217+
result = numpy.empty(item_count, numba.types.int64)
218+
for idx, item in enumerate(self._data._data):
219+
result[idx] = len(item)
220+
221+
return pandas.Series(result, name=self._data._name)
222+
223+
return hpat_pandas_stringmethods_len_impl
224+
225+
189226
def _hpat_pandas_stringmethods_autogen(method_name):
190227
""""
191228
The function generates a function for 'method_name' from source text that is created on the fly.
@@ -231,7 +268,7 @@ def _hpat_pandas_stringmethods_autogen(method_name):
231268
This is the list of function which are autogenerated to be used from Numba directly.
232269
"""
233270

234-
_hpat_pandas_stringmethods_autogen_exceptions = ['split', 'len', 'get', 'replace']
271+
_hpat_pandas_stringmethods_autogen_exceptions = ['split', 'get', 'replace']
235272

236273
for method_name in _hpat_pandas_stringmethods_autogen_methods:
237274
if not (method_name.startswith('__') or method_name in _hpat_pandas_stringmethods_autogen_exceptions):

sdc/hiframes/dataframe_pass.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,18 @@ def __init__(self, state):
7676
self.state = state
7777

7878
def run_pass(self):
79+
"""
80+
The function could return exxeption. It means that the IR transformation can not be completed.
81+
This is acceptable behaviour.
82+
"""
83+
84+
try:
85+
self.run_pass_throw()
86+
return True
87+
except ValueError:
88+
return False
89+
90+
def run_pass_throw(self):
7991
blocks = self.state.func_ir.blocks
8092
# topo_order necessary so DataFrame data replacement optimization can
8193
# be performed in one pass
@@ -109,8 +121,7 @@ def run_pass(self):
109121
# TODO: add this to dead_branch_prune pass
110122
for inst in self.state.func_ir.blocks[dead_label].body:
111123
if is_assign(inst):
112-
self.state.func_ir._definitions[inst.target.name].remove(
113-
inst.value)
124+
self.state.func_ir._definitions[inst.target.name].remove(inst.value)
114125

115126
del self.state.func_ir.blocks[dead_label]
116127
else:
@@ -124,9 +135,7 @@ def run_pass(self):
124135
used_vars = set()
125136
new_body = []
126137
for inst in reversed(block.body):
127-
if (is_assign(inst)
128-
and inst.target.name not in used_vars
129-
and inst.target.name in jmp_defs):
138+
if (is_assign(inst) and inst.target.name not in used_vars and inst.target.name in jmp_defs):
130139
self.state.func_ir._definitions[inst.target.name].remove(inst.value)
131140
continue
132141
used_vars.update(v.name for v in inst.list_vars())
@@ -140,7 +149,8 @@ def run_pass(self):
140149
out_nodes = [inst]
141150

142151
if isinstance(inst, ir.Assign):
143-
self.state.func_ir._definitions[inst.target.name].remove(inst.value)
152+
if inst.value in self.state.func_ir._definitions[inst.target.name]:
153+
self.state.func_ir._definitions[inst.target.name].remove(inst.value)
144154
out_nodes = self._run_assign(inst)
145155
elif isinstance(inst, (ir.SetItem, ir.StaticSetItem)):
146156
out_nodes = self._run_setitem(inst)

sdc/hiframes/pd_series_ext.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -756,7 +756,7 @@ def resolve_rename(self, ary, args, kws):
756756
Functions which are still overloaded by HPAT compiler pipeline
757757
"""
758758

759-
str2str_methods_excluded = ['upper', 'lower', 'lstrip', 'rstrip', 'strip']
759+
str2str_methods_excluded = ['upper', 'len', 'lower', 'lstrip', 'rstrip', 'strip']
760760
"""
761761
Functions which are used from Numba directly by calling from StringMethodsType
762762

sdc/tests/test_base.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import unittest
2+
import warnings
3+
4+
5+
class TestCase(unittest.TestCase):
6+
"""Base class for all tests"""
7+
8+
def numba_jit(self, *args, **kwargs):
9+
import numba
10+
if 'nopython' in kwargs:
11+
warnings.warn('nopython is set to True and is ignored', RuntimeWarning)
12+
if 'parallel' in kwargs:
13+
warnings.warn('parallel is set to True and is ignored', RuntimeWarning)
14+
kwargs.update({'nopython': True, 'parallel': True})
15+
return numba.jit(*args, **kwargs)
16+
17+
def sdc_jit(self, *args, **kwargs):
18+
import sdc
19+
return sdc.jit(*args, **kwargs)
20+
21+
def jit(self, *args, **kwargs):
22+
from sdc import config
23+
if config.config_pipeline_hpat_default:
24+
return self.sdc_jit(*args, **kwargs)
25+
else:
26+
return self.numba_jit(*args, **kwargs)

0 commit comments

Comments
 (0)