Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit 3ca4d5a

Browse files
committed
Merge branch 'master' of https://github.com/IntelPython/sdc into testperf
2 parents e008d2f + c7c5e65 commit 3ca4d5a

9 files changed

Lines changed: 78 additions & 93 deletions

File tree

sdc/__init__.py

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -66,22 +66,15 @@
6666

6767
if not sdc.config.config_pipeline_hpat_default:
6868
"""
69-
Overload Numba functions to allow call SDC pass in Numba compiler pipeline
69+
Overload Numba function to allow call SDC pass in Numba compiler pipeline
7070
Functions are:
71-
- AnnotateTypes run_pass()
72-
- InlineClosureLikes run_pass()
71+
- Numba DefaultPassBuilder define_nopython_pipeline()
7372
7473
TODO: Needs to detect 'import Pandas' and align initialization according to it
7574
"""
7675

77-
# Need more work since Series tests failed
78-
# Test: SDC_CONFIG_PIPELINE_SDC=0 python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_sort_values1
79-
80-
# sdc.config.numba_typed_passes_annotatetypes_orig = numba.typed_passes.AnnotateTypes.run_pass
81-
# numba.typed_passes.AnnotateTypes.run_pass = sdc.datatypes.hpat_pandas_dataframe_pass.sdc_dataframepassimpl_overload
82-
83-
# sdc.config.numba_untyped_passes_inlineclosurelikes_orig = numba.untyped_passes.InlineClosureLikes.run_pass
84-
# numba.untyped_passes.InlineClosureLikes.run_pass = sdc.datatypes.hpat_pandas_dataframe_pass.sdc_hiframespassimpl_overload
76+
sdc.config.numba_compiler_define_nopython_pipeline_orig = numba.compiler.DefaultPassBuilder.define_nopython_pipeline
77+
numba.compiler.DefaultPassBuilder.define_nopython_pipeline = sdc.datatypes.hpat_pandas_dataframe_pass.sdc_nopython_pipeline_lite_register
8578

8679
def _init_extension():
8780
'''Register Pandas classes and functions with Numba.

sdc/config.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -87,12 +87,7 @@
8787
Default value used to select compiler pipeline in a function decorator
8888
'''
8989

90-
numba_typed_passes_annotatetypes_orig = None
90+
numba_compiler_define_nopython_pipeline_orig = None
9191
'''
92-
Default value for a pointer intended to use as Numba AnnotateTypes run_pass() in overloaded function
93-
'''
94-
95-
numba_untyped_passes_inlineclosurelikes_orig = None
96-
'''
97-
Default value for a pointer intended to use as Numba InlineClosureLikes run_pass() in overloaded function
92+
Default value for a pointer intended to use as Numba.DefaultPassBuilder.define_nopython_pipeline() in overloaded function
9893
'''

sdc/datatypes/hpat_pandas_dataframe_pass.py

Lines changed: 19 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -28,71 +28,37 @@
2828
| Procedures are required for SDC DataFrameType handling in Numba
2929
'''
3030

31-
import sdc
32-
33-
34-
def sdc_dataframepassimpl_overload(*args, **kwargs):
35-
"""
36-
This is a pointer intended to use as Numba AnnotateTypes run_pass() function
37-
A hook made to overload Numba function and:
38-
- call original function
39-
- call hiframes.dataframe_pass.DataFramePass
40-
- call compiler.PostprocessorPass
41-
- call hiframes.hiframes_typed.HiFramesTypedPass
42-
43-
return True if any passes mutated original Numba IR
44-
45-
This function needs to be removed if SDC DataFrame support
46-
no more needs Numba IR transformations via DataFramePass
47-
"""
48-
49-
if sdc.config.numba_typed_passes_annotatetypes_orig is None:
50-
"""
51-
Unexpected usage of this function
52-
"""
53-
54-
return False
55-
56-
status_numba_pass = sdc.config.numba_typed_passes_annotatetypes_orig(*args, **kwargs)
31+
from numba.untyped_passes import InlineClosureLikes
32+
from numba.typed_passes import AnnotateTypes
5733

58-
numba_state_var = args[1]
59-
60-
status_dataframe_pass = sdc.hiframes.dataframe_pass.DataFramePassImpl(numba_state_var).run_pass()
61-
status_postprocess_pass = sdc.compiler.PostprocessorPass().run_pass(numba_state_var)
62-
status_dataframe_typed_pass = sdc.hiframes.hiframes_typed.HiFramesTypedPassImpl(numba_state_var).run_pass()
63-
64-
is_ir_mutated = status_numba_pass or status_dataframe_pass or status_postprocess_pass or status_dataframe_typed_pass
34+
import sdc
6535

66-
return is_ir_mutated
6736

68-
def sdc_hiframespassimpl_overload(*args, **kwargs):
37+
def sdc_nopython_pipeline_lite_register(state, name='nopython'):
6938
"""
70-
This is a pointer intended to use as Numba InlineClosureLikes run_pass() function
71-
A hook made to overload Numba function and:
72-
- call compiler.InlinePass
73-
- call hiframes.hiframes_untyped.HiFramesPass
74-
- call original function
39+
This is to register some sub set of Intel SDC compiler passes in Numba NoPython pipeline
40+
Each pass, enabled here, is expected to be called many times on every decorated function including
41+
functions which are not related to Pandas.
7542
76-
return True if any passes mutated original Numba IR
43+
Test: SDC_CONFIG_PIPELINE_SDC=0 python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_sort_values1
7744
7845
This function needs to be removed if SDC DataFrame support
7946
no more needs Numba IR transformations via DataFramePass
8047
"""
8148

82-
if sdc.config.numba_untyped_passes_inlineclosurelikes_orig is None:
83-
"""
84-
Unexpected usage of this function
85-
"""
86-
87-
return False
49+
if sdc.config.numba_compiler_define_nopython_pipeline_orig is None:
50+
raise ValueError("Intel SDC. Unexpected usage of DataFrame passes registration function.")
8851

89-
numba_state_var = args[1]
52+
numba_pass_manager = sdc.config.numba_compiler_define_nopython_pipeline_orig(state, name)
9053

91-
status_inlinepass_pass = sdc.compiler.InlinePass().run_pass(numba_state_var)
92-
status_hiframespass_pass = sdc.hiframes.hiframes_untyped.HiFramesPassImpl(numba_state_var).run_pass()
54+
# numba_pass_manager.add_pass_after(sdc.compiler.InlinePass, InlineClosureLikes)
55+
# numba_pass_manager.add_pass_after(sdc.hiframes.hiframes_untyped.HiFramesPass, sdc.compiler.InlinePass)
56+
numba_pass_manager.add_pass_after(sdc.hiframes.hiframes_untyped.HiFramesPass, InlineClosureLikes)
9357

94-
status_numba_pass = sdc.config.numba_untyped_passes_inlineclosurelikes_orig(*args, **kwargs)
58+
numba_pass_manager.add_pass_after(sdc.hiframes.dataframe_pass.DataFramePass, AnnotateTypes)
59+
numba_pass_manager.add_pass_after(sdc.compiler.PostprocessorPass, AnnotateTypes)
60+
# numba_pass_manager.add_pass_after(sdc.hiframes.hiframes_typed.HiFramesTypedPass, sdc.hiframes.dataframe_pass.DataFramePass)
9561

96-
is_ir_mutated = status_inlinepass_pass or status_hiframespass_pass or status_numba_pass
62+
numba_pass_manager.finalize()
9763

98-
return is_ir_mutated
64+
return numba_pass_manager

sdc/datatypes/hpat_pandas_stringmethods_functions.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,12 +82,9 @@ def hpat_pandas_stringmethods_upper_impl(self):
8282
import pandas
8383

8484
import numba
85-
from numba import types
8685
from numba.extending import overload_method
87-
from numba.errors import TypingError
8886

8987
from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType
90-
from sdc.str_arr_ext import to_string_list
9188

9289

9390
_hpat_pandas_stringmethods_autogen_global_dict = {
@@ -229,7 +226,7 @@ def _hpat_pandas_stringmethods_autogen(method_name):
229226

230227

231228
# _hpat_pandas_stringmethods_autogen_methods = sorted(dir(numba.types.misc.UnicodeType.__getattribute__.__qualname__))
232-
_hpat_pandas_stringmethods_autogen_methods = ['upper', 'lower']
229+
_hpat_pandas_stringmethods_autogen_methods = ['upper', 'lower', 'lstrip', 'rstrip', 'strip']
233230
"""
234231
This is the list of function which are autogenerated to be used from Numba directly.
235232
"""

sdc/hiframes/pd_series_ext.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -752,12 +752,12 @@ def resolve_rename(self, ary, args, kws):
752752

753753
# return typer
754754

755-
str2str_methods = ['capitalize', 'lstrip', 'rstrip', 'strip', 'swapcase', 'title']
755+
str2str_methods = ['capitalize', 'swapcase', 'title']
756756
"""
757757
Functions which are still overloaded by HPAT compiler pipeline
758758
"""
759759

760-
str2str_methods_excluded = ['upper', 'lower']
760+
str2str_methods_excluded = ['upper', 'lower', 'lstrip', 'rstrip', 'strip']
761761
"""
762762
Functions which are used from Numba directly by calling from StringMethodsType
763763

sdc/tests/test_series.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2320,8 +2320,8 @@ def test_impl(S):
23202320
pd.testing.assert_series_equal(hpat_func(S), test_impl(S))
23212321

23222322
def test_series_str2str(self):
2323-
common_methods = ['lower', 'upper']
2324-
sdc_methods = ['capitalize', 'lstrip', 'rstrip', 'strip', 'swapcase', 'title']
2323+
common_methods = ['lower', 'upper', 'lstrip', 'rstrip', 'strip']
2324+
sdc_methods = ['capitalize', 'swapcase', 'title']
23252325
str2str_methods = common_methods[:]
23262326
if sdc.config.config_pipeline_hpat_default:
23272327
str2str_methods += sdc_methods
@@ -2341,8 +2341,7 @@ def test_series_str2str(self):
23412341
@unittest.skipIf(sdc.config.config_pipeline_hpat_default,
23422342
'Series.str.<method>() unsupported')
23432343
def test_series_str2str_unsupported(self):
2344-
unsupported_methods = ['capitalize', 'lstrip', 'rstrip',
2345-
'strip', 'swapcase', 'title']
2344+
unsupported_methods = ['capitalize', 'swapcase', 'title']
23462345
for method in unsupported_methods:
23472346
func_lines = ['def test_impl(S):',
23482347
' return S.str.{}()'.format(method)]

sdc/tests/test_utils.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# -*- coding: utf-8 -*-
12
# *****************************************************************************
23
# Copyright (c) 2019, Intel Corporation All rights reserved.
34
#
@@ -46,15 +47,21 @@
4647

4748

4849
def count_array_REPs():
49-
from sdc.distributed import Distribution
50-
vals = sdc.distributed.dist_analysis.array_dists.values()
51-
return sum([v == Distribution.REP for v in vals])
50+
if sdc.config.config_pipeline_hpat_default:
51+
from sdc.distributed import Distribution
52+
vals = sdc.distributed.dist_analysis.array_dists.values()
53+
return sum([v == Distribution.REP for v in vals])
54+
else:
55+
return 0
5256

5357

5458
def count_parfor_REPs():
55-
from sdc.distributed import Distribution
56-
vals = sdc.distributed.dist_analysis.parfor_dists.values()
57-
return sum([v == Distribution.REP for v in vals])
59+
if sdc.config.config_pipeline_hpat_default:
60+
from sdc.distributed import Distribution
61+
vals = sdc.distributed.dist_analysis.parfor_dists.values()
62+
return sum([v == Distribution.REP for v in vals])
63+
else:
64+
return 0
5865

5966

6067
def count_parfor_OneDs():

sdc/tests/tests_perf/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
### Performance testing
22
based on Python unit testing framework where typical test suite looks like:
3-
```
3+
```python
44
class TestSuite(unittest.TestCase):
55
# how many times function will be executed for more accurate measurements
66
iter_number = 5

sdc/tests/tests_perf/test_perf_utils.py

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
# *****************************************************************************
2929

3030
import gc
31+
import logging
3132
import sys
3233
import sdc
3334
import time
@@ -52,6 +53,19 @@
5253
"""
5354

5455

56+
def setup_logging():
57+
"""Setup logger"""
58+
stream_handler = logging.StreamHandler()
59+
stream_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
60+
61+
logger = logging.getLogger(__name__)
62+
logger.setLevel(level=logging.INFO)
63+
logger.addHandler(stream_handler)
64+
65+
return logger
66+
67+
68+
5569
def is_true(input_string):
5670
if isinstance(input_string, str):
5771
input_string = input_string.lower()
@@ -186,6 +200,7 @@ class TestResults:
186200
raw_perf_results_xlsx = 'raw_perf_results.xlsx'
187201
index = ['name', 'N', 'type', 'size']
188202
test_results_data = pandas.DataFrame(index=index)
203+
logger = setup_logging()
189204

190205
@property
191206
def grouped_data(self):
@@ -258,11 +273,20 @@ def dump(self):
258273
Dump performance testing results from global data storage to excel
259274
"""
260275
# openpyxl need to be installed
261-
with pandas.ExcelWriter(self.perf_results_xlsx) as writer:
262-
self.grouped_data.to_excel(writer)
263276

264-
with pandas.ExcelWriter(self.raw_perf_results_xlsx) as writer:
265-
self.test_results_data.to_excel(writer, index=False)
277+
try:
278+
with pandas.ExcelWriter(self.perf_results_xlsx) as writer:
279+
self.grouped_data.to_excel(writer)
280+
except ModuleNotFoundError as e:
281+
msg = 'Could not dump the results to "%s": %s'
282+
self.logger.warning(msg, self.perf_results_xlsx, e)
283+
284+
try:
285+
with pandas.ExcelWriter(self.raw_perf_results_xlsx) as writer:
286+
self.test_results_data.to_excel(writer, index=False)
287+
except ModuleNotFoundError as e:
288+
msg = 'Could not dump raw results to "%s": %s'
289+
self.logger.warning(msg, self.raw_perf_results_xlsx, e)
266290

267291
def load(self):
268292
"""
@@ -272,7 +296,11 @@ def load(self):
272296
if raw_perf_results_xlsx.exists():
273297
with raw_perf_results_xlsx.open('rb') as fd:
274298
# xlrd need to be installed
275-
self.test_results_data = pandas.read_excel(fd)
299+
try:
300+
self.test_results_data = pandas.read_excel(fd)
301+
except ModuleNotFoundError as e:
302+
msg = 'Could not load previous results from %s: %s'
303+
self.logger.warning(msg, raw_perf_results_xlsx, e)
276304

277305

278306
class TestResultsStr(TestResults):

0 commit comments

Comments
 (0)