Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit efa5a89

Browse files
authored
Implement Series.str.startswith() (#383)
* Implement Series.str.startswith() * Minor fixes in tests for Series.str.find()
1 parent 99abf61 commit efa5a89

3 files changed

Lines changed: 93 additions & 9 deletions

File tree

sdc/datatypes/hpat_pandas_stringmethods_functions.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,57 @@ def hpat_pandas_stringmethods_len_impl(self):
371371
return hpat_pandas_stringmethods_len_impl
372372

373373

374+
@overload_method(StringMethodsType, 'startswith')
375+
def hpat_pandas_stringmethods_startswith(self, pat, na=None):
376+
"""
377+
Pandas Series method :meth:`pandas.core.strings.StringMethods.startswith()` implementation.
378+
379+
Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements.
380+
381+
.. only:: developer
382+
383+
Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_startswith
384+
385+
Parameters
386+
----------
387+
self: :class:`pandas.core.strings.StringMethods`
388+
input arg
389+
pat: :obj:`str`
390+
Character sequence
391+
na: :obj:`bool`
392+
Object shown if element tested is not a string
393+
*unsupported*
394+
395+
Returns
396+
-------
397+
:obj:`pandas.Series`
398+
returns :obj:`pandas.Series` object
399+
"""
400+
401+
ty_checker = TypeChecker('Method startswith().')
402+
ty_checker.check(self, StringMethodsType)
403+
404+
if not isinstance(pat, (StringLiteral, UnicodeType)):
405+
ty_checker.raise_exc(pat, 'str', 'pat')
406+
407+
if not isinstance(na, (Boolean, NoneType, Omitted)) and na is not None:
408+
ty_checker.raise_exc(na, 'bool', 'na')
409+
410+
def hpat_pandas_stringmethods_startswith_impl(self, pat, na=None):
411+
if na is not None:
412+
msg = 'Method startswith(). The object na\n expected: None'
413+
raise ValueError(msg)
414+
415+
item_startswith = len(self._data)
416+
result = numpy.empty(item_startswith, numba.types.boolean)
417+
for idx, item in enumerate(self._data._data):
418+
result[idx] = item.startswith(pat)
419+
420+
return pandas.Series(result, self._data._index, name=self._data._name)
421+
422+
return hpat_pandas_stringmethods_startswith_impl
423+
424+
374425
def _hpat_pandas_stringmethods_autogen(method_name):
375426
""""
376427
The function generates a function for 'method_name' from source text that is created on the fly.

sdc/hiframes/pd_series_ext.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -759,7 +759,7 @@ def resolve_head(self, ary, args, kws):
759759
"""
760760

761761
str2str_methods_excluded = ['upper', 'endswith', 'find', 'isupper', 'len',
762-
'lower', 'lstrip', 'rstrip', 'strip']
762+
'lower', 'lstrip', 'rstrip', 'startswith', 'strip']
763763
"""
764764
Functions which are used from Numba directly by calling from StringMethodsType
765765

sdc/tests/test_series.py

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2480,17 +2480,16 @@ def test_impl(series, sub, start):
24802480
hpat_func = self.jit(test_impl)
24812481

24822482
series = pd.Series(test_global_input_data_unicode_kind4)
2483-
msg_tmpl = 'Method {}(). The object {}\n {}'
2483+
msg_tmpl = 'Method find(). The object start\n {}'
24842484

24852485
with self.assertRaises(TypingError) as raises:
24862486
hpat_func(series, '', '0')
2487-
msg = msg_tmpl.format('find', 'start', 'given: unicode_type\n '
2488-
'expected: None, int')
2487+
msg = msg_tmpl.format('given: unicode_type\n expected: None, int')
24892488
self.assertIn(msg, str(raises.exception))
24902489

24912490
with self.assertRaises(ValueError) as raises:
24922491
hpat_func(series, '', 1)
2493-
msg = msg_tmpl.format('find', 'start', 'expected: 0')
2492+
msg = msg_tmpl.format('expected: 0')
24942493
self.assertIn(msg, str(raises.exception))
24952494

24962495
def test_series_str_find_exception_unsupported_end(self):
@@ -2499,17 +2498,16 @@ def test_impl(series, sub, start, end):
24992498
hpat_func = self.jit(test_impl)
25002499

25012500
series = pd.Series(test_global_input_data_unicode_kind4)
2502-
msg_tmpl = 'Method {}(). The object {}\n {}'
2501+
msg_tmpl = 'Method find(). The object end\n {}'
25032502

25042503
with self.assertRaises(TypingError) as raises:
25052504
hpat_func(series, '', 0, 'None')
2506-
msg = msg_tmpl.format('find', 'end', 'given: unicode_type\n '
2507-
'expected: None, int')
2505+
msg = msg_tmpl.format('given: unicode_type\n expected: None, int')
25082506
self.assertIn(msg, str(raises.exception))
25092507

25102508
with self.assertRaises(ValueError) as raises:
25112509
hpat_func(series, '', 0, 0)
2512-
msg = msg_tmpl.format('find', 'end', 'expected: None')
2510+
msg = msg_tmpl.format('expected: None')
25132511
self.assertIn(msg, str(raises.exception))
25142512

25152513
def test_series_str_len1(self):
@@ -2524,6 +2522,41 @@ def test_impl(S):
25242522
S = pd.Series(data, index, name=name)
25252523
pd.testing.assert_series_equal(hpat_func(S), test_impl(S))
25262524

2525+
def test_series_str_startswith(self):
2526+
def test_impl(series, pat):
2527+
return series.str.startswith(pat)
2528+
2529+
hpat_func = self.jit(test_impl)
2530+
2531+
data = test_global_input_data_unicode_kind4
2532+
pats = [''] + [s[:min(len(s) for s in data)] for s in data] + data
2533+
indices = [None, list(range(len(data)))[::-1], data[::-1]]
2534+
names = [None, 'A']
2535+
for index, name in product(indices, names):
2536+
series = pd.Series(data, index, name=name)
2537+
for pat in pats:
2538+
pd.testing.assert_series_equal(hpat_func(series, pat),
2539+
test_impl(series, pat))
2540+
2541+
def test_series_str_startswith_exception_unsupported_na(self):
2542+
def test_impl(series, pat, na):
2543+
return series.str.startswith(pat, na)
2544+
2545+
hpat_func = self.jit(test_impl)
2546+
2547+
series = pd.Series(test_global_input_data_unicode_kind4)
2548+
msg_tmpl = 'Method startswith(). The object na\n {}'
2549+
2550+
with self.assertRaises(TypingError) as raises:
2551+
hpat_func(series, '', 'None')
2552+
msg = msg_tmpl.format('given: unicode_type\n expected: bool')
2553+
self.assertIn(msg, str(raises.exception))
2554+
2555+
with self.assertRaises(ValueError) as raises:
2556+
hpat_func(series, '', False)
2557+
msg = msg_tmpl.format('expected: None')
2558+
self.assertIn(msg, str(raises.exception))
2559+
25272560
def test_series_str2str(self):
25282561
common_methods = ['lower', 'upper', 'isupper']
25292562
sdc_methods = ['capitalize', 'swapcase', 'title',

0 commit comments

Comments
 (0)