Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit dedb0a9

Browse files
densmirnAlexanderKalistratov
authored andcommitted
Implement Series.str.find() (#382)
* Implement Series.str.find() * Remove IntegerLiteral from check of start/end IntegerLiteral is inherited of Integer
1 parent 86c8701 commit dedb0a9

3 files changed

Lines changed: 114 additions & 1 deletion

File tree

sdc/datatypes/hpat_pandas_stringmethods_functions.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ def hpat_pandas_stringmethods_upper_impl(self):
8282

8383
import numba
8484
from numba.extending import overload_method
85+
from numba.types import (Integer, NoneType, Omitted, StringLiteral, UnicodeType)
8586

8687
from sdc.datatypes.common_functions import TypeChecker
8788
from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType
@@ -187,6 +188,65 @@ def hpat_pandas_stringmethods_{methodname}_impl(self{methodparams}):
187188
"""
188189

189190

191+
@overload_method(StringMethodsType, 'find')
192+
def hpat_pandas_stringmethods_find(self, sub, start=0, end=None):
193+
"""
194+
Pandas Series method :meth:`pandas.core.strings.StringMethods.find()` implementation.
195+
196+
Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements.
197+
198+
.. only:: developer
199+
200+
Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_find
201+
202+
Parameters
203+
----------
204+
self: :class:`pandas.core.strings.StringMethods`
205+
input arg
206+
sub: :obj:`str`
207+
Substring being searched
208+
start: :obj:`int`
209+
Left edge index
210+
*unsupported*
211+
end: :obj:`int`
212+
Right edge index
213+
*unsupported*
214+
215+
Returns
216+
-------
217+
:obj:`pandas.Series`
218+
returns :obj:`pandas.Series` object
219+
"""
220+
221+
ty_checker = TypeChecker('Method find().')
222+
ty_checker.check(self, StringMethodsType)
223+
224+
if not isinstance(sub, (StringLiteral, UnicodeType)):
225+
ty_checker.raise_exc(sub, 'str', 'sub')
226+
227+
accepted_types = (Integer, NoneType, Omitted)
228+
if not isinstance(start, accepted_types) and start != 0:
229+
ty_checker.raise_exc(start, 'None, int', 'start')
230+
231+
if not isinstance(end, accepted_types) and end is not None:
232+
ty_checker.raise_exc(end, 'None, int', 'end')
233+
234+
def hpat_pandas_stringmethods_find_impl(self, sub, start=0, end=None):
235+
if start != 0:
236+
raise ValueError('Method find(). The object start\n expected: 0')
237+
if end is not None:
238+
raise ValueError('Method find(). The object end\n expected: None')
239+
240+
item_count = len(self._data)
241+
result = numpy.empty(item_count, numba.types.int64)
242+
for idx, item in enumerate(self._data._data):
243+
result[idx] = item.find(sub)
244+
245+
return pandas.Series(result, self._data._index, name=self._data._name)
246+
247+
return hpat_pandas_stringmethods_find_impl
248+
249+
190250
@overload_method(StringMethodsType, 'isupper')
191251
def hpat_pandas_stringmethods_isupper(self):
192252
"""

sdc/hiframes/pd_series_ext.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -758,7 +758,7 @@ def resolve_head(self, ary, args, kws):
758758
Functions which are still overloaded by HPAT compiler pipeline
759759
"""
760760

761-
str2str_methods_excluded = ['upper', 'isupper', 'len', 'lower',
761+
str2str_methods_excluded = ['upper', 'find', 'isupper', 'len', 'lower',
762762
'lstrip', 'rstrip', 'strip']
763763
"""
764764
Functions which are used from Numba directly by calling from StringMethodsType

sdc/tests/test_series.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2424,6 +2424,59 @@ def test_impl(S1, S2):
24242424
hpat_func(S1, S2), test_impl(S1, S2),
24252425
err_msg='S1={}\nS2={}'.format(S1, S2))
24262426

2427+
def test_series_str_find(self):
2428+
def test_impl(series, sub):
2429+
return series.str.find(sub)
2430+
hpat_func = self.jit(test_impl)
2431+
2432+
data = test_global_input_data_unicode_kind4
2433+
subs = [''] + [s[:min(len(s) for s in data)] for s in data] + data
2434+
indices = [None, list(range(len(data)))[::-1], data[::-1]]
2435+
names = [None, 'A']
2436+
for index, name in product(indices, names):
2437+
series = pd.Series(data, index, name=name)
2438+
for sub in subs:
2439+
pd.testing.assert_series_equal(hpat_func(series, sub),
2440+
test_impl(series, sub))
2441+
2442+
def test_series_str_find_exception_unsupported_start(self):
2443+
def test_impl(series, sub, start):
2444+
return series.str.find(sub, start)
2445+
hpat_func = self.jit(test_impl)
2446+
2447+
series = pd.Series(test_global_input_data_unicode_kind4)
2448+
msg_tmpl = 'Method {}(). The object {}\n {}'
2449+
2450+
with self.assertRaises(TypingError) as raises:
2451+
hpat_func(series, '', '0')
2452+
msg = msg_tmpl.format('find', 'start', 'given: unicode_type\n '
2453+
'expected: None, int')
2454+
self.assertIn(msg, str(raises.exception))
2455+
2456+
with self.assertRaises(ValueError) as raises:
2457+
hpat_func(series, '', 1)
2458+
msg = msg_tmpl.format('find', 'start', 'expected: 0')
2459+
self.assertIn(msg, str(raises.exception))
2460+
2461+
def test_series_str_find_exception_unsupported_end(self):
2462+
def test_impl(series, sub, start, end):
2463+
return series.str.find(sub, start, end)
2464+
hpat_func = self.jit(test_impl)
2465+
2466+
series = pd.Series(test_global_input_data_unicode_kind4)
2467+
msg_tmpl = 'Method {}(). The object {}\n {}'
2468+
2469+
with self.assertRaises(TypingError) as raises:
2470+
hpat_func(series, '', 0, 'None')
2471+
msg = msg_tmpl.format('find', 'end', 'given: unicode_type\n '
2472+
'expected: None, int')
2473+
self.assertIn(msg, str(raises.exception))
2474+
2475+
with self.assertRaises(ValueError) as raises:
2476+
hpat_func(series, '', 0, 0)
2477+
msg = msg_tmpl.format('find', 'end', 'expected: None')
2478+
self.assertIn(msg, str(raises.exception))
2479+
24272480
def test_series_str_len1(self):
24282481
def test_impl(S):
24292482
return S.str.len()

0 commit comments

Comments
 (0)