Merge branch 'master' of https://github.com/IntelPython/sdc into testperf

densmirn · densmirn · commit 3ca4d5afae1d · 2019-11-21T16:35:09.000+03:00
diff --git a/sdc/__init__.py b/sdc/__init__.py
@@ -66,22 +66,15 @@
 
 if not sdc.config.config_pipeline_hpat_default:
     """
-    Overload Numba functions to allow call SDC pass in Numba compiler pipeline
+    Overload Numba function to allow call SDC pass in Numba compiler pipeline
     Functions are:
-    - AnnotateTypes run_pass()
-    - InlineClosureLikes run_pass()
+    - Numba DefaultPassBuilder define_nopython_pipeline()
 
     TODO: Needs to detect 'import Pandas' and align initialization according to it
     """
 
-# Need more work since Series tests failed
-# Test: SDC_CONFIG_PIPELINE_SDC=0 python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_sort_values1
-
-#    sdc.config.numba_typed_passes_annotatetypes_orig = numba.typed_passes.AnnotateTypes.run_pass
-#    numba.typed_passes.AnnotateTypes.run_pass = sdc.datatypes.hpat_pandas_dataframe_pass.sdc_dataframepassimpl_overload
-
-#    sdc.config.numba_untyped_passes_inlineclosurelikes_orig = numba.untyped_passes.InlineClosureLikes.run_pass
-#    numba.untyped_passes.InlineClosureLikes.run_pass = sdc.datatypes.hpat_pandas_dataframe_pass.sdc_hiframespassimpl_overload
+    sdc.config.numba_compiler_define_nopython_pipeline_orig = numba.compiler.DefaultPassBuilder.define_nopython_pipeline
+    numba.compiler.DefaultPassBuilder.define_nopython_pipeline = sdc.datatypes.hpat_pandas_dataframe_pass.sdc_nopython_pipeline_lite_register
 
 def _init_extension():
     '''Register Pandas classes and functions with Numba.
diff --git a/sdc/config.py b/sdc/config.py
@@ -87,12 +87,7 @@
 Default value used to select compiler pipeline in a function decorator
 '''
 
-numba_typed_passes_annotatetypes_orig = None
+numba_compiler_define_nopython_pipeline_orig = None
 '''
-Default value for a pointer intended to use as Numba AnnotateTypes run_pass() in overloaded function
-'''
-
-numba_untyped_passes_inlineclosurelikes_orig = None
-'''
-Default value for a pointer intended to use as Numba InlineClosureLikes run_pass() in overloaded function
+Default value for a pointer intended to use as Numba.DefaultPassBuilder.define_nopython_pipeline() in overloaded function
 '''
diff --git a/sdc/datatypes/hpat_pandas_dataframe_pass.py b/sdc/datatypes/hpat_pandas_dataframe_pass.py
@@ -28,71 +28,37 @@
 | Procedures are required for SDC DataFrameType handling in Numba
 '''
 
-import sdc
-
-
-def sdc_dataframepassimpl_overload(*args, **kwargs):
-    """
-    This is a pointer intended to use as Numba AnnotateTypes run_pass() function
-    A hook made to overload Numba function and:
-    - call original function
-    - call hiframes.dataframe_pass.DataFramePass
-    - call compiler.PostprocessorPass
-    - call hiframes.hiframes_typed.HiFramesTypedPass
-
-    return True if any passes mutated original Numba IR
-
-    This function needs to be removed if SDC DataFrame support
-    no more needs Numba IR transformations via DataFramePass
-    """
-
-    if sdc.config.numba_typed_passes_annotatetypes_orig is None:
-        """
-        Unexpected usage of this function
-        """
-
-        return False
-
-    status_numba_pass = sdc.config.numba_typed_passes_annotatetypes_orig(*args, **kwargs)
+from numba.untyped_passes import InlineClosureLikes
+from numba.typed_passes import AnnotateTypes
 
-    numba_state_var = args[1]
-
-    status_dataframe_pass = sdc.hiframes.dataframe_pass.DataFramePassImpl(numba_state_var).run_pass()
-    status_postprocess_pass = sdc.compiler.PostprocessorPass().run_pass(numba_state_var)
-    status_dataframe_typed_pass = sdc.hiframes.hiframes_typed.HiFramesTypedPassImpl(numba_state_var).run_pass()
-
-    is_ir_mutated = status_numba_pass or status_dataframe_pass or status_postprocess_pass or status_dataframe_typed_pass
+import sdc
 
-    return is_ir_mutated
 
-def sdc_hiframespassimpl_overload(*args, **kwargs):
+def sdc_nopython_pipeline_lite_register(state, name='nopython'):
     """
-    This is a pointer intended to use as Numba InlineClosureLikes run_pass() function
-    A hook made to overload Numba function and:
-    - call compiler.InlinePass
-    - call hiframes.hiframes_untyped.HiFramesPass
-    - call original function
+    This is to register some sub set of Intel SDC compiler passes in Numba NoPython pipeline
+    Each pass, enabled here, is expected to be called many times on every decorated function including
+    functions which are not related to Pandas.
 
-    return True if any passes mutated original Numba IR
+    Test: SDC_CONFIG_PIPELINE_SDC=0 python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_sort_values1
 
     This function needs to be removed if SDC DataFrame support
     no more needs Numba IR transformations via DataFramePass
     """
 
-    if sdc.config.numba_untyped_passes_inlineclosurelikes_orig is None:
-        """
-        Unexpected usage of this function
-        """
-
-        return False
+    if sdc.config.numba_compiler_define_nopython_pipeline_orig is None:
+        raise ValueError("Intel SDC. Unexpected usage of DataFrame passes registration function.")
 
-    numba_state_var = args[1]
+    numba_pass_manager = sdc.config.numba_compiler_define_nopython_pipeline_orig(state, name)
 
-    status_inlinepass_pass = sdc.compiler.InlinePass().run_pass(numba_state_var)
-    status_hiframespass_pass = sdc.hiframes.hiframes_untyped.HiFramesPassImpl(numba_state_var).run_pass()
+    # numba_pass_manager.add_pass_after(sdc.compiler.InlinePass, InlineClosureLikes)
+    # numba_pass_manager.add_pass_after(sdc.hiframes.hiframes_untyped.HiFramesPass, sdc.compiler.InlinePass)
+    numba_pass_manager.add_pass_after(sdc.hiframes.hiframes_untyped.HiFramesPass, InlineClosureLikes)
 
-    status_numba_pass = sdc.config.numba_untyped_passes_inlineclosurelikes_orig(*args, **kwargs)
+    numba_pass_manager.add_pass_after(sdc.hiframes.dataframe_pass.DataFramePass, AnnotateTypes)
+    numba_pass_manager.add_pass_after(sdc.compiler.PostprocessorPass, AnnotateTypes)
+    # numba_pass_manager.add_pass_after(sdc.hiframes.hiframes_typed.HiFramesTypedPass, sdc.hiframes.dataframe_pass.DataFramePass)
 
-    is_ir_mutated = status_inlinepass_pass or status_hiframespass_pass or status_numba_pass
+    numba_pass_manager.finalize()
 
-    return is_ir_mutated
+    return numba_pass_manager
diff --git a/sdc/datatypes/hpat_pandas_stringmethods_functions.py b/sdc/datatypes/hpat_pandas_stringmethods_functions.py
@@ -82,12 +82,9 @@ def hpat_pandas_stringmethods_upper_impl(self):
 import pandas
 
 import numba
-from numba import types
 from numba.extending import overload_method
-from numba.errors import TypingError
 
 from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType
-from sdc.str_arr_ext import to_string_list
 
 
 _hpat_pandas_stringmethods_autogen_global_dict = {
@@ -229,7 +226,7 @@ def _hpat_pandas_stringmethods_autogen(method_name):
 
 
 # _hpat_pandas_stringmethods_autogen_methods = sorted(dir(numba.types.misc.UnicodeType.__getattribute__.__qualname__))
-_hpat_pandas_stringmethods_autogen_methods = ['upper', 'lower']
+_hpat_pandas_stringmethods_autogen_methods = ['upper', 'lower', 'lstrip', 'rstrip', 'strip']
 """
     This is the list of function which are autogenerated to be used from Numba directly.
 """
diff --git a/sdc/hiframes/pd_series_ext.py b/sdc/hiframes/pd_series_ext.py
@@ -752,12 +752,12 @@ def resolve_rename(self, ary, args, kws):
 
 #     return typer
 
-str2str_methods = ['capitalize', 'lstrip', 'rstrip', 'strip', 'swapcase', 'title']
+str2str_methods = ['capitalize', 'swapcase', 'title']
 """
     Functions which are still overloaded by HPAT compiler pipeline
 """
 
-str2str_methods_excluded = ['upper', 'lower']
+str2str_methods_excluded = ['upper', 'lower', 'lstrip', 'rstrip', 'strip']
 """
     Functions which are used from Numba directly by calling from StringMethodsType
 
diff --git a/sdc/tests/test_series.py b/sdc/tests/test_series.py
@@ -2320,8 +2320,8 @@ def test_impl(S):
         pd.testing.assert_series_equal(hpat_func(S), test_impl(S))
 
     def test_series_str2str(self):
-        common_methods = ['lower', 'upper']
-        sdc_methods = ['capitalize', 'lstrip', 'rstrip', 'strip', 'swapcase', 'title']
+        common_methods = ['lower', 'upper', 'lstrip', 'rstrip', 'strip']
+        sdc_methods = ['capitalize', 'swapcase', 'title']
         str2str_methods = common_methods[:]
         if sdc.config.config_pipeline_hpat_default:
             str2str_methods += sdc_methods
@@ -2341,8 +2341,7 @@ def test_series_str2str(self):
     @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
                      'Series.str.<method>() unsupported')
     def test_series_str2str_unsupported(self):
-        unsupported_methods = ['capitalize', 'lstrip', 'rstrip',
-                               'strip', 'swapcase', 'title']
+        unsupported_methods = ['capitalize', 'swapcase', 'title']
         for method in unsupported_methods:
             func_lines = ['def test_impl(S):',
                           '  return S.str.{}()'.format(method)]
diff --git a/sdc/tests/test_utils.py b/sdc/tests/test_utils.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 # *****************************************************************************
 # Copyright (c) 2019, Intel Corporation All rights reserved.
 #
@@ -46,15 +47,21 @@
 
 
 def count_array_REPs():
-    from sdc.distributed import Distribution
-    vals = sdc.distributed.dist_analysis.array_dists.values()
-    return sum([v == Distribution.REP for v in vals])
+    if sdc.config.config_pipeline_hpat_default:
+        from sdc.distributed import Distribution
+        vals = sdc.distributed.dist_analysis.array_dists.values()
+        return sum([v == Distribution.REP for v in vals])
+    else:
+        return 0
 
 
 def count_parfor_REPs():
-    from sdc.distributed import Distribution
-    vals = sdc.distributed.dist_analysis.parfor_dists.values()
-    return sum([v == Distribution.REP for v in vals])
+    if sdc.config.config_pipeline_hpat_default:
+        from sdc.distributed import Distribution
+        vals = sdc.distributed.dist_analysis.parfor_dists.values()
+        return sum([v == Distribution.REP for v in vals])
+    else:
+        return 0
 
 
 def count_parfor_OneDs():
diff --git a/sdc/tests/tests_perf/README.md b/sdc/tests/tests_perf/README.md
@@ -1,6 +1,6 @@
 ### Performance testing
 based on Python unit testing framework where typical test suite looks like:
-```
+```python
 class TestSuite(unittest.TestCase):
     # how many times function will be executed for more accurate measurements
     iter_number = 5
diff --git a/sdc/tests/tests_perf/test_perf_utils.py b/sdc/tests/tests_perf/test_perf_utils.py
@@ -28,6 +28,7 @@
 # *****************************************************************************
 
 import gc
+import logging
 import sys
 import sdc
 import time
@@ -52,6 +53,19 @@
 """
 
 
+def setup_logging():
+    """Setup logger"""
+    stream_handler = logging.StreamHandler()
+    stream_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
+
+    logger = logging.getLogger(__name__)
+    logger.setLevel(level=logging.INFO)
+    logger.addHandler(stream_handler)
+
+    return logger
+
+
+
 def is_true(input_string):
     if isinstance(input_string, str):
         input_string = input_string.lower()
@@ -186,6 +200,7 @@ class TestResults:
     raw_perf_results_xlsx = 'raw_perf_results.xlsx'
     index = ['name', 'N', 'type', 'size']
     test_results_data = pandas.DataFrame(index=index)
+    logger = setup_logging()
 
     @property
     def grouped_data(self):
@@ -258,11 +273,20 @@ def dump(self):
         Dump performance testing results from global data storage to excel
         """
         # openpyxl need to be installed
-        with pandas.ExcelWriter(self.perf_results_xlsx) as writer:
-            self.grouped_data.to_excel(writer)
 
-        with pandas.ExcelWriter(self.raw_perf_results_xlsx) as writer:
-            self.test_results_data.to_excel(writer, index=False)
+        try:
+            with pandas.ExcelWriter(self.perf_results_xlsx) as writer:
+                self.grouped_data.to_excel(writer)
+        except ModuleNotFoundError as e:
+            msg = 'Could not dump the results to "%s": %s'
+            self.logger.warning(msg, self.perf_results_xlsx, e)
+
+        try:
+            with pandas.ExcelWriter(self.raw_perf_results_xlsx) as writer:
+                self.test_results_data.to_excel(writer, index=False)
+        except ModuleNotFoundError as e:
+            msg = 'Could not dump raw results to "%s": %s'
+            self.logger.warning(msg, self.raw_perf_results_xlsx, e)
 
     def load(self):
         """
@@ -272,7 +296,11 @@ def load(self):
         if raw_perf_results_xlsx.exists():
             with raw_perf_results_xlsx.open('rb') as fd:
                 # xlrd need to be installed
-                self.test_results_data = pandas.read_excel(fd)
+                try:
+                    self.test_results_data = pandas.read_excel(fd)
+                except ModuleNotFoundError as e:
+                    msg = 'Could not load previous results from %s: %s'
+                    self.logger.warning(msg, raw_perf_results_xlsx, e)
 
 
 class TestResultsStr(TestResults):