Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit c788989

Browse files
SAT-2978 - SDC Series binops corrupt operands if fill_value is used (#891)
* SAT-2978 - SDC Series binops corrupt operands if fill_value is used This PR solves multiple issues in implementation of Series arithmetic and comparison methods: * fixes wrong filling behavior (e.g. np.nan + np.nan was 2 * fill_value) * removes inplace=True operations that corrupt operands * fixes broken scalar + series use cases * fixes a sub case of common case when series indexes are equal, and hence no alignment should happen * adds performance tests for all different implementations * moves operators and binop methods unit tests to a separate test suite * unskips many tests incorrectly decorated with skip_parallel * Fixing PEP and other remarks
1 parent 7d176c8 commit c788989

15 files changed

Lines changed: 2491 additions & 2057 deletions

buildscripts/autogen_sources.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -117,9 +117,17 @@
117117
imports_start_line, import_end_line = min(imports_line_numbers), max(imports_line_numbers)
118118
import_section_text = ''.join(module_text_lines[imports_start_line: import_end_line + 1])
119119

120-
# read function templates for arithmetic and comparison operators from templates module
120+
# templates for arithmetic methods
121+
template_func_binop_def = inspect.getsource(templates_module.sdc_binop)
122+
template_func_binop_ovld = inspect.getsource(templates_module.sdc_binop_ovld)
121123
template_series_binop = inspect.getsource(templates_module.sdc_pandas_series_binop)
124+
125+
# templates for comparison methods
126+
template_func_comp_binop_def = inspect.getsource(templates_module.sdc_comp_binop)
127+
template_func_comp_binop_ovld = inspect.getsource(templates_module.sdc_comp_binop_ovld)
122128
template_series_comp_binop = inspect.getsource(templates_module.sdc_pandas_series_comp_binop)
129+
130+
# templates for operators
123131
template_series_operator = inspect.getsource(templates_module.sdc_pandas_series_operator_binop)
124132
template_series_comp_operator = inspect.getsource(templates_module.sdc_pandas_series_operator_comp_binop)
125133
template_str_arr_comp_binop = inspect.getsource(templates_module.sdc_str_arr_operator_comp_binop)
@@ -136,27 +144,35 @@
136144
# certaing modifications are needed to be applied for templates, so
137145
# verify correctness of produced code manually
138146
for name in arithmetic_binops_symbols:
139-
func_text = template_series_binop.replace('binop', name)
147+
func_text = template_func_binop_def.replace('binop', name)
148+
file.write(f'\n\n{func_text}')
149+
func_text = template_func_binop_ovld.replace('def ', f"@sdc_overload(sdc_{name})\ndef ", 1)
150+
func_text = func_text.replace('binop', name)
140151
func_text = func_text.replace(' + ', f' {arithmetic_binops_symbols[name]} ')
152+
file.write(f'\n\n{func_text}')
153+
func_text = template_series_binop.replace('binop', name)
141154
func_text = func_text.replace('def ', f"@sdc_overload_method(SeriesType, '{name}')\ndef ", 1)
142155
file.write(f'\n\n{func_text}')
143156

144157
for name in comparison_binops_symbols:
145-
func_text = template_series_comp_binop.replace('comp_binop', name)
158+
func_text = template_func_comp_binop_def.replace('comp_binop', name)
159+
file.write(f'\n\n{func_text}')
160+
func_text = template_func_comp_binop_ovld.replace('def ', f"@sdc_overload(sdc_{name})\ndef ", 1)
161+
func_text = func_text.replace('comp_binop', name)
146162
func_text = func_text.replace(' < ', f' {comparison_binops_symbols[name]} ')
163+
file.write(f'\n\n{func_text}')
164+
func_text = template_series_comp_binop.replace('comp_binop', name)
147165
func_text = func_text.replace('def ', f"@sdc_overload_method(SeriesType, '{name}')\ndef ", 1)
148166
file.write(f'\n\n{func_text}')
149167

150168
for name in arithmetic_binops_symbols:
151169
if name != "div":
152170
func_text = template_series_operator.replace('binop', name)
153-
func_text = func_text.replace(' + ', f' {arithmetic_binops_symbols[name]} ')
154171
func_text = func_text.replace('def ', f'@sdc_overload(operator.{name})\ndef ', 1)
155172
file.write(f'\n\n{func_text}')
156173

157174
for name in comparison_binops_symbols:
158175
func_text = template_series_comp_operator.replace('comp_binop', name)
159-
func_text = func_text.replace(' < ', f' {comparison_binops_symbols[name]} ')
160176
func_text = func_text.replace('def ', f'@sdc_overload(operator.{name})\ndef ', 1)
161177
file.write(f'\n\n{func_text}')
162178

sdc/datatypes/hpat_pandas_series_functions.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4137,8 +4137,6 @@ def hpat_pandas_series_fillna_impl(self, value=None, method=None, axis=None, inp
41374137
else:
41384138
# non inplace implementations, copy array, fill the NA/NaN and return a new Series
41394139
if isinstance(self.dtype, types.UnicodeType):
4140-
# For StringArrayType implementation is taken from _series_fillna_str_alloc_impl
4141-
# (can be called directly when it's index handling is fixed)
41424140
def hpat_pandas_series_str_fillna_impl(self, value=None, method=None, axis=None,
41434141
inplace=False, limit=None, downcast=None):
41444142
return pandas.Series(data=numpy_like.fillna(self._data, inplace=inplace, value=value),

sdc/functions/numpy_like.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -602,7 +602,8 @@ def sdc_fillna_str_impl(self, inplace=False, value=None):
602602
num_chars += get_utf8_size(s)
603603

604604
filled_data = pre_alloc_string_array(n, num_chars)
605-
for i in prange(n):
605+
# StringArray doesn't support parallel setitem, thus no prange here
606+
for i in numpy.arange(n):
606607
if sdc.hiframes.api.isna(self, i):
607608
filled_data[i] = value
608609
else:

0 commit comments

Comments
 (0)