Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit defb961

Browse files
authored
Merge pull request #391 from IntelPython/apiref_generator_style_fixes
apiref_generator.py style fixes
2 parents 529cdd6 + 7766524 commit defb961

6 files changed

Lines changed: 97 additions & 116 deletions

File tree

-154 Bytes
Binary file not shown.
Binary file not shown.
-10.1 KB
Binary file not shown.
Binary file not shown.

docs/source/buildscripts/apiref_generator.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
import pandas
2929
from sdc_object_utils import init_pandas_structure, init_sdc_structure, init_pandas_sdc_dict, get_sdc_object, get_obj
3030
from sdc_object_utils import get_class_methods, get_class_attributes, get_fully_qualified_name
31-
from sdc_doc_utils import is_sdc_user_guide_header, get_indent, reindent,get_short_description
31+
from sdc_doc_utils import is_sdc_user_guide_header, get_indent, reindent, get_short_description
3232
from sdc_doc_utils import split_in_sections, get_docstring, create_heading_str, cut_sdc_dev_guide
3333
import os
3434

@@ -43,7 +43,7 @@ def reformat(text):
4343
:param text: Original text with warnings
4444
:return: Modified text that fixes warnings
4545
"""
46-
text = reformat_replace_star_list_with_dash_list(text) # Must be called before :func:`reformat_asterisks`
46+
text = reformat_replace_star_list_with_dash_list(text) # Must be called before :func:`reformat_asterisks`
4747
text = reformat_asterisks(text) # Fix for * and ** symbols
4848
text = reformat_explicit_markup(text) # Fix for explicit markup without a blank line
4949
text = reformat_bullet_list(text) # Fix bullet list indentation issues
@@ -214,7 +214,7 @@ def reformat_asterisks(text):
214214
if idx2 == -1:
215215
# Only one single asterisk in the line - Reformat to `\*`
216216
line = line.replace('*', '\\*')
217-
idx = len(line) # Parsed the line. Go to another line
217+
idx = len(line) # Parsed the line. Go to another line
218218
elif idx2 == idx1+1:
219219
# First double asterisk met in the line
220220
idx2 = line.find('**', idx1+2)
@@ -278,7 +278,6 @@ def _get_param_text(title, param):
278278
elif title == 'Raises':
279279
return ':raises:'
280280

281-
282281
# Internal function. Returns correct markup for Parameters section
283282
def _reformat_parameters(title, text):
284283
lines = text.split('\n')
@@ -608,15 +607,15 @@ def parse_templ_rst(fname_templ):
608607
doc.pop(0) # Skipping ``.. sdc_toctree``
609608

610609
# Parsing the list of APIs
611-
while len(doc) >0 and doc[0].strip() != '':
610+
while len(doc) > 0 and doc[0].strip() != '':
612611
line = doc[0]
613612
indent = get_indent(line)
614613
line = line.strip()
615614
full_name = current_module_name + '.' + line
616615
obj = get_obj(full_name)
617616
short_description = generate_simple_object_doc(obj, short_doc_flag=True).strip()
618617
new_line = reindent(':ref:`', indent) + line + ' <' + full_name + '>`\n' + \
619-
reindent(short_description, indent+4) + '\n'
618+
reindent(short_description, indent+4) + '\n'
620619
fout.write(new_line)
621620
doc.pop(0)
622621

sdc/datatypes/hpat_pandas_series_functions.py

Lines changed: 92 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -40,16 +40,92 @@
4040

4141
import sdc
4242
import sdc.datatypes.common_functions as common_functions
43-
from sdc.datatypes.common_functions import TypeChecker
4443
from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType
4544
from sdc.hiframes.pd_series_ext import SeriesType
4645
from sdc.str_arr_ext import (StringArrayType, cp_str_list_to_array, num_total_chars)
4746
from sdc.utils import to_array
4847

48+
class TypeChecker:
49+
"""
50+
Validate object type and raise TypingError if the type is invalid, e.g.:
51+
Method nsmallest(). The object n
52+
given: bool
53+
expected: int
54+
"""
55+
msg_template = '{} The object {}\n given: {}\n expected: {}'
56+
57+
def __init__(self, func_name):
58+
"""
59+
Parameters
60+
----------
61+
func_name: :obj:`str`
62+
name of the function where types checking
63+
"""
64+
self.func_name = func_name
65+
66+
def raise_exc(self, data, expected_types, name=''):
67+
"""
68+
Raise exception with unified message
69+
Parameters
70+
----------
71+
data: :obj:`any`
72+
real type of the data
73+
expected_types: :obj:`str`
74+
expected types inserting directly to the exception
75+
name: :obj:`str`
76+
name of the parameter
77+
"""
78+
msg = self.msg_template.format(self.func_name, name, data, expected_types)
79+
raise TypingError(msg)
80+
81+
def check(self, data, accepted_type, name=''):
82+
"""
83+
Check data type belongs to specified type
84+
Parameters
85+
----------
86+
data: :obj:`any`
87+
real type of the data
88+
accepted_type: :obj:`type`
89+
accepted type
90+
name: :obj:`str`
91+
name of the parameter
92+
"""
93+
if not isinstance(data, accepted_type):
94+
self.raise_exc(data, accepted_type.__name__, name=name)
95+
4996

5097
@overload(operator.getitem)
5198
def hpat_pandas_series_getitem(self, idx):
5299
"""
100+
Intel Scalable Dataframe Compiler User Guide
101+
********************************************
102+
Pandas API: pandas.Series.get
103+
104+
Limitations
105+
-----------
106+
Supported ``key`` can be one of the following:
107+
- Integer scalar, e.g. :obj:`series[0]`
108+
- A slice, e.g. :obj:`series[2:5]`
109+
- Another series
110+
111+
Examples
112+
--------
113+
.. literalinclude:: ../../../examples/series_getitem.py
114+
:language: python
115+
:lines: 27-
116+
:caption: Getting Pandas Series elements
117+
:name: ex_series_getitem
118+
119+
.. code-block:: console
120+
121+
> python ./series_getitem.py
122+
55
123+
124+
.. todo:: Fix SDC behavior and add the expected output of the > python ./series_getitem.py to the docstring
125+
126+
Intel Scalable Dataframe Compiler Developer Guide
127+
*************************************************
128+
53129
Pandas Series operator :attr:`pandas.Series.get` implementation
54130
**Algorithm**: result = series[idx]
55131
@@ -1131,7 +1207,7 @@ def hpat_pandas_series_corr(self, other, method='pearson', min_periods=None):
11311207
if not isinstance(other.data.dtype, types.Number):
11321208
ty_checker.raise_exc(other.data, 'number', 'other.data')
11331209

1134-
if not isinstance(min_periods, (int, types.Integer, types.Omitted, types.NoneType)) and min_periods is not None:
1210+
if not isinstance(min_periods, (types.Integer, types.Omitted, types.NoneType)):
11351211
ty_checker.raise_exc(min_periods, 'int64', 'min_periods')
11361212

11371213
def hpat_pandas_series_corr_impl(self, other, method='pearson', min_periods=None):
@@ -1153,20 +1229,7 @@ def hpat_pandas_series_corr_impl(self, other, method='pearson', min_periods=None
11531229
if len(self_arr) < min_periods:
11541230
return numpy.nan
11551231

1156-
new_self = pandas.Series(self_arr)
1157-
new_other = pandas.Series(other_arr)
1158-
1159-
n = new_self.count()
1160-
ma = new_self.sum()
1161-
mb = new_other.sum()
1162-
a = n * (self_arr * other_arr).sum() - ma * mb
1163-
b1 = n * (self_arr * self_arr).sum() - ma * ma
1164-
b2 = n * (other_arr * other_arr).sum() - mb * mb
1165-
1166-
if b1 == 0 or b2 == 0:
1167-
return numpy.nan
1168-
1169-
return a / numpy.sqrt(b1 * b2)
1232+
return numpy.corrcoef(self_arr, other_arr)[0, 1]
11701233

11711234
return hpat_pandas_series_corr_impl
11721235

@@ -2100,77 +2163,6 @@ def hpat_pandas_series_quantile_impl(self, q=0.5, interpolation='linear'):
21002163
return hpat_pandas_series_quantile_impl
21012164

21022165

2103-
@overload_method(SeriesType, 'rename')
2104-
def hpat_pandas_series_rename(self, index=None, copy=True, inplace=False, level=None):
2105-
"""
2106-
Pandas Series method :meth:`pandas.Series.rename` implementation.
2107-
Alter Series index labels or name.
2108-
.. only:: developer
2109-
Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_rename
2110-
2111-
Parameters
2112-
-----------
2113-
index : :obj:`scalar` or `hashable sequence` or `dict` or `function`
2114-
Dict-like or functions are transformations to apply to the index.
2115-
Scalar or hashable sequence-like will alter the Series.name attribute.
2116-
Only scalar value is supported.
2117-
copy : :obj:`bool`, default :obj:`True`
2118-
Whether to copy underlying data.
2119-
inplace : :obj:`bool`, default :obj:`False`
2120-
Whether to return a new Series. If True then value of copy is ignored.
2121-
level : :obj:`int` or `str`
2122-
In case of a MultiIndex, only rename labels in the specified level.
2123-
*Not supported*
2124-
Returns
2125-
-------
2126-
:obj:`pandas.Series`
2127-
returns :obj:`pandas.Series` with index labels or name altered.
2128-
"""
2129-
2130-
ty_checker = TypeChecker('Method rename().')
2131-
ty_checker.check(self, SeriesType)
2132-
2133-
if not isinstance(index, (types.Omitted, types.UnicodeType,
2134-
types.StringLiteral, str,
2135-
types.Integer, types.Boolean,
2136-
types.Hashable, types.Float,
2137-
types.NPDatetime, types.NPTimedelta,
2138-
types.Number)) and index is not None:
2139-
ty_checker.raise_exc(index, 'string', 'index')
2140-
2141-
if not isinstance(copy, (types.Omitted, types.Boolean, bool)):
2142-
ty_checker.raise_exc(copy, 'boolean', 'copy')
2143-
2144-
if not isinstance(inplace, (types.Omitted, types.Boolean, bool)):
2145-
ty_checker.raise_exc(inplace, 'boolean', 'inplace')
2146-
2147-
if not isinstance(level, (types.Omitted, types.UnicodeType,
2148-
types.StringLiteral, types.Integer)) and level is not None:
2149-
ty_checker.raise_exc(level, 'Integer or srting', 'level')
2150-
2151-
def hpat_pandas_series_rename_idx_impl(self, index=None, copy=True, inplace=False, level=None):
2152-
if copy is True:
2153-
series_data = self._data.copy()
2154-
series_index = self._index.copy()
2155-
else:
2156-
series_data = self._data
2157-
series_index = self._index
2158-
2159-
return pandas.Series(data=series_data, index=series_index, name=index)
2160-
2161-
def hpat_pandas_series_rename_noidx_impl(self, index=None, copy=True, inplace=False, level=None):
2162-
if copy is True:
2163-
series_data = self._data.copy()
2164-
else:
2165-
series_data = self._data
2166-
2167-
return pandas.Series(data=series_data, index=self._index, name=index)
2168-
2169-
if isinstance(self.index, types.NoneType):
2170-
return hpat_pandas_series_rename_noidx_impl
2171-
return hpat_pandas_series_rename_idx_impl
2172-
2173-
21742166
@overload_method(SeriesType, 'min')
21752167
def hpat_pandas_series_min(self, axis=None, skipna=True, level=None, numeric_only=None):
21762168
"""
@@ -2933,12 +2925,11 @@ def hpat_pandas_series_nunique_str_impl(self, dropna=True):
29332925
It is better to merge with Numeric branch
29342926
"""
29352927

2936-
data = self._data
2937-
if dropna:
2938-
nan_mask = self.isna()
2939-
data = self._data[~nan_mask._data]
2940-
unique_values = set(data)
2941-
return len(unique_values)
2928+
str_set = set(self._data)
2929+
if dropna == False:
2930+
return len(str_set) - 1
2931+
else:
2932+
return len(str_set)
29422933

29432934
return hpat_pandas_series_nunique_str_impl
29442935

@@ -2992,8 +2983,7 @@ def hpat_pandas_series_count(self, level=None):
29922983
if isinstance(self.data, StringArrayType):
29932984
def hpat_pandas_series_count_str_impl(self, level=None):
29942985

2995-
nan_mask = self.isna()
2996-
return numpy.sum(nan_mask._data == 0)
2986+
return len(self._data)
29972987

29982988
return hpat_pandas_series_count_str_impl
29992989

@@ -3143,10 +3133,10 @@ def hpat_pandas_series_argsort_idx_impl(self, axis=0, kind='quicksort', order=No
31433133
sort_nona = numpy.argsort(self._data[~na_data_arr])
31443134
q = 0
31453135
for id, i in enumerate(sort):
3146-
if id in set(sort[len(self._data) - na:]):
3147-
q += 1
3136+
if id not in list(sort[len(self._data) - na:]):
3137+
result[id] = sort_nona[id-q]
31483138
else:
3149-
result[id] = sort_nona[id - q]
3139+
q += 1
31503140
for i in sort[len(self._data) - na:]:
31513141
result[i] = -1
31523142

@@ -3170,10 +3160,10 @@ def hpat_pandas_series_argsort_noidx_impl(self, axis=0, kind='quicksort', order=
31703160
sort_nona = numpy.argsort(self._data[~na_data_arr])
31713161
q = 0
31723162
for id, i in enumerate(sort):
3173-
if id in set(sort[len(self._data) - na:]):
3174-
q += 1
3175-
else:
3163+
if id not in list(sort[len(self._data) - na:]):
31763164
result[id] = sort_nona[id - q]
3165+
else:
3166+
q += 1
31773167
for i in sort[len(self._data) - na:]:
31783168
result[i] = -1
31793169

@@ -3580,15 +3570,7 @@ def hpat_pandas_series_cov_impl(self, other, min_periods=None):
35803570
if len(self_arr) < min_periods:
35813571
return numpy.nan
35823572

3583-
new_self = pandas.Series(self_arr)
3584-
3585-
ma = new_self.mean()
3586-
mb = other.mean()
3587-
3588-
if numpy.isinf(mb):
3589-
return numpy.nan
3590-
3591-
return ((self_arr - ma) * (other_arr - mb)).sum() / (new_self.count() - 1.0)
3573+
return numpy.cov(self_arr, other_arr)[0, 1]
35923574

35933575
return hpat_pandas_series_cov_impl
35943576

0 commit comments

Comments
 (0)