Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit 4410e92

Browse files
densmirnshssf
authored andcommitted
Add series.str.len() in new style (#321)
* Add series.str.len() in new style * Workaround issue with named series
1 parent 69db986 commit 4410e92

3 files changed

Lines changed: 41 additions & 3 deletions

File tree

sdc/datatypes/hpat_pandas_stringmethods_functions.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ def hpat_pandas_stringmethods_upper_impl(self):
8383

8484
import numba
8585
from numba.extending import overload_method
86+
from numba.errors import TypingError
8687

8788
from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType
8889

@@ -186,6 +187,42 @@ def hpat_pandas_stringmethods_{methodname}_impl(self{methodparams}):
186187
"""
187188

188189

190+
@overload_method(StringMethodsType, 'len')
191+
def hpat_pandas_stringmethods_len(self):
192+
"""
193+
Pandas Series method :meth:`pandas.core.strings.StringMethods.len()` implementation.
194+
195+
Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements.
196+
197+
.. only:: developer
198+
199+
Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_str_len1
200+
201+
Parameters
202+
----------
203+
self: :class:`pandas.core.strings.StringMethods`
204+
input arg
205+
206+
Returns
207+
-------
208+
:obj:`pandas.Series`
209+
returns :obj:`pandas.Series` object
210+
"""
211+
212+
if not isinstance(self, StringMethodsType):
213+
raise TypingError('Method len(). The object must be a pandas.core.strings. Given: {}'.format(self))
214+
215+
def hpat_pandas_stringmethods_len_impl(self):
216+
item_count = len(self._data)
217+
result = numpy.empty(item_count, numba.types.int64)
218+
for idx, item in enumerate(self._data._data):
219+
result[idx] = len(item)
220+
221+
return pandas.Series(result, name=self._data._name)
222+
223+
return hpat_pandas_stringmethods_len_impl
224+
225+
189226
def _hpat_pandas_stringmethods_autogen(method_name):
190227
""""
191228
The function generates a function for 'method_name' from source text that is created on the fly.
@@ -231,7 +268,7 @@ def _hpat_pandas_stringmethods_autogen(method_name):
231268
This is the list of function which are autogenerated to be used from Numba directly.
232269
"""
233270

234-
_hpat_pandas_stringmethods_autogen_exceptions = ['split', 'len', 'get', 'replace']
271+
_hpat_pandas_stringmethods_autogen_exceptions = ['split', 'get', 'replace']
235272

236273
for method_name in _hpat_pandas_stringmethods_autogen_methods:
237274
if not (method_name.startswith('__') or method_name in _hpat_pandas_stringmethods_autogen_exceptions):

sdc/hiframes/pd_series_ext.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -756,7 +756,7 @@ def resolve_rename(self, ary, args, kws):
756756
Functions which are still overloaded by HPAT compiler pipeline
757757
"""
758758

759-
str2str_methods_excluded = ['upper', 'lower', 'lstrip', 'rstrip', 'strip']
759+
str2str_methods_excluded = ['upper', 'len', 'lower', 'lstrip', 'rstrip', 'strip']
760760
"""
761761
Functions which are used from Numba directly by calling from StringMethodsType
762762

sdc/tests/test_series.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2347,7 +2347,8 @@ def test_impl(S):
23472347
return S.str.len()
23482348
hpat_func = self.jit(test_impl)
23492349

2350-
S = pd.Series(['aa', 'abc', 'c', 'cccd'])
2350+
# TODO: fix issue occurred if name is not assigned
2351+
S = pd.Series(['aa', 'abc', 'c', 'cccd'], name='A')
23512352
pd.testing.assert_series_equal(hpat_func(S), test_impl(S))
23522353

23532354
@skip_numba_jit

0 commit comments

Comments
 (0)