Move to Numba 0.52 (#939)

kozlov-alexey · web-flow · commit 7524c5982d45 · 2021-02-18T03:38:06.000+03:00
* Taking numba from master

* Moving to Numba 0.52

commit 3182540b127268ace11cf4042cd87f044875d9fa
Author: Kozlov, Alexey &lt;alexey.kozlov@intel.com&gt;
Date:   Wed Oct 21 19:49:58 2020 +0300

    Cleaning up before squash

commit 895668116542fe3057f73fcb276c441cbde66747
Author: Kozlov, Alexey &lt;alexey.kozlov@intel.com&gt;
Date:   Tue Oct 13 17:31:34 2020 +0300

    Workaround for set from str_arr problem

* Fixing correct NUMBA_VERSION

* Remove intel/label/beta channel from Azure CI builds
diff --git a/README.rst b/README.rst
@@ -82,7 +82,7 @@ Building on Linux with setuptools
 
     export PYVER=<3.6 or 3.7>
     export NUMPYVER=<1.16 or 1.17>
-    conda create -n sdc-env -q -y -c intel/label/beta -c defaults -c intel -c conda-forge python=$PYVER numpy=$NUMPYVER tbb-devel tbb4py numba=0.49 pandas=1.0.5 pyarrow=0.17.0 gcc_linux-64 gxx_linux-64
+    conda create -n sdc-env -q -y -c intel/label/beta -c defaults -c intel -c conda-forge python=$PYVER numpy=$NUMPYVER tbb-devel tbb4py numba=0.52 pandas=1.0.5 pyarrow=0.17.0 gcc_linux-64 gxx_linux-64
     source activate sdc-env
     git clone https://github.com/IntelPython/sdc.git
     cd sdc
@@ -120,7 +120,7 @@ Building on Windows with setuptools
 
     set PYVER=<3.6 or 3.7>
     set NUMPYVER=<1.16 or 1.17>
-    conda create -n sdc-env -c intel/label/beta -c defaults -c intel -c conda-forge python=%PYVER% numpy=%NUMPYVER% tbb-devel tbb4py numba=0.49 pandas=1.0.5 pyarrow=0.17.0
+    conda create -n sdc-env -c intel/label/beta -c defaults -c intel -c conda-forge python=%PYVER% numpy=%NUMPYVER% tbb-devel tbb4py numba=0.52 pandas=1.0.5 pyarrow=0.17.0
     conda activate sdc-env
     set INCLUDE=%INCLUDE%;%CONDA_PREFIX%\Library\include
     set LIB=%LIB%;%CONDA_PREFIX%\Library\lib
diff --git a/buildscripts/utilities.py b/buildscripts/utilities.py
@@ -52,7 +52,7 @@ def __init__(self, python, sdc_local_channel=None):
         self.line_single = '-'*80
 
         # Set channels
-        self.channel_list = ['-c', 'intel/label/beta', '-c', 'defaults', '-c', 'conda-forge']
+        self.channel_list = ['-c', 'defaults', '-c', 'conda-forge']
         if sdc_local_channel:
             sdc_local_channel = Path(sdc_local_channel).resolve().as_uri()
             self.channel_list = ['-c', sdc_local_channel] + self.channel_list
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
@@ -1,4 +1,4 @@
-{% set NUMBA_VERSION = "==0.51.2" %}
+{% set NUMBA_VERSION = "==0.52.0" %}
 {% set PANDAS_VERSION = "==1.0.5" %}
 {% set PYARROW_VERSION = "==0.17.0" %}
 
diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,6 @@
 numpy>=1.16
-pandas==0.25.3
+pandas==1.0.5
 pyarrow==0.17.0
-numba==0.51.2
+numba==0.52.0
 tbb
 tbb-devel
diff --git a/sdc/__init__.py b/sdc/__init__.py
@@ -28,7 +28,7 @@
 
 # re-export from Numba
 from numba import (typeof, prange, pndindex, gdb, gdb_breakpoint, gdb_init,
-                   stencil, threading_layer, jitclass, objmode)
+                   stencil, threading_layer, objmode)
 
 import sdc.config
 import sdc.set_ext
diff --git a/sdc/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py
@@ -72,6 +72,7 @@
 from sdc.hiframes.api import isna
 from sdc.datatypes.hpat_pandas_groupby_functions import init_series_groupby
 from sdc.utilities.prange_utils import parallel_chunks
+from sdc.set_ext import build_set
 
 from .pandas_series_functions import apply
 from .pandas_series_functions import map as _map
@@ -652,7 +653,7 @@ def sdc_pandas_series_setitem_idx_bool_array_align_impl(self, idx, value):
             # and filtered indexes are looked in value.index, and if found corresponding value is set
             if value_is_series == True:  # noqa
                 value_index, self_index = value.index, self.index
-                unique_value_indices, unique_self_indices = set(value_index), set(self_index)
+                unique_value_indices, unique_self_indices = build_set(value_index), build_set(self_index)
 
                 # pandas behaves differently if value.index has duplicates and if it has no
                 # in case of duplicates in value.index assignment is made via positions
@@ -702,7 +703,7 @@ def sdc_pandas_series_setitem_idx_bool_series_align_impl(self, idx, value):
             # and filtered indexes are either looked in value.index (if value is a Series)
             # or in self.index (if value is scalar or array)
             filtered_idx_indices = idx_index[idx._data]
-            filtered_idx_indices_set = set(filtered_idx_indices)
+            filtered_idx_indices_set = build_set(filtered_idx_indices)
             if value_is_series == True:  # noqa
 
                 if len(filtered_idx_indices_set) != len(filtered_idx_indices):
@@ -2074,7 +2075,7 @@ def hpat_pandas_series_isin_impl(self, values):
             # return pandas.Series (np.isin (self._data, values))
 
             values = str_list_to_array(list(values))
-            values = set(values)
+            values = build_set(values)
             data_len = len(self._data)
             result = numpy.empty(data_len, dtype=numpy.bool_)
             for i in prange(data_len):
@@ -2086,7 +2087,7 @@ def hpat_pandas_series_isin_impl(self, values):
             # TODO: replace with below line when Numba supports np.isin in nopython mode
             # return pandas.Series (np.isin (self._data, values))
 
-            values = set(values)
+            values = build_set(values)
             data_len = len(self._data)
             result = numpy.empty(data_len, dtype=numpy.bool_)
             for i in prange(data_len):
@@ -3447,7 +3448,7 @@ def hpat_pandas_series_unique_str_impl(self):
             Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_unique_str
             '''
 
-            str_set = set(self._data)
+            str_set = build_set(self._data)
             return to_array(str_set)
 
         return hpat_pandas_series_unique_str_impl
@@ -3579,7 +3580,7 @@ def hpat_pandas_series_nunique_str_impl(self, dropna=True):
             if dropna:
                 nan_mask = self.isna()
                 data = self._data[~nan_mask._data]
-            unique_values = set(data)
+            unique_values = build_set(data)
             return len(unique_values)
 
         return hpat_pandas_series_nunique_str_impl
@@ -3592,7 +3593,7 @@ def hpat_pandas_series_nunique_impl(self, dropna=True):
         data_mask_for_nan = numpy.isnan(self._data)
         nan_exists = numpy.any(data_mask_for_nan)
         data_no_nan = self._data[~data_mask_for_nan]
-        data_set = set(data_no_nan)
+        data_set = build_set(data_no_nan)
         if dropna or not nan_exists:
             return len(data_set)
         else:
diff --git a/sdc/set_ext.py b/sdc/set_ext.py
@@ -128,6 +128,7 @@ def _build_str_set_impl(A):
         str_set.add(_str)
     return str_set
 
+
 # TODO: remove since probably unused
 @overload(set)
 def init_set_string_array(A):
diff --git a/sdc/tests/test_hpat_jit.py b/sdc/tests/test_hpat_jit.py
@@ -36,6 +36,7 @@
 from sdc import *
 from sdc.tests.test_base import TestCase
 from sdc.tests.test_utils import skip_numba_jit
+from numba.experimental import jitclass
 
 
 class TestHpatJitIssues(TestCase):
diff --git a/setup.py b/setup.py
@@ -377,7 +377,7 @@ def run(self):
           'numpy>=1.16',
           'pandas>=1.0',
           'pyarrow==0.17.0',
-          'numba>=0.51.2,<0.52',
+          'numba>=0.52.0,<0.53',
           'tbb'
           ],
       cmdclass=sdc_build_commands,

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-{% set NUMBA_VERSION = "==0.51.2" %}`
	`1`	`+{% set NUMBA_VERSION = "==0.52.0" %}`
`2`	`2`	`{% set PANDAS_VERSION = "==1.0.5" %}`
`3`	`3`	`{% set PYARROW_VERSION = "==0.17.0" %}`
`4`	`4`