|
37 | 37 | from hpat.hiframes import series_kernels, split_impl |
38 | 38 | from hpat.hiframes.series_kernels import series_replace_funcs |
39 | 39 | from hpat.hiframes.split_impl import (string_array_split_view_type, |
40 | | - StringArraySplitViewType) |
| 40 | + StringArraySplitViewType, getitem_c_arr, get_array_ctypes_ptr) |
41 | 41 |
|
42 | 42 |
|
43 | 43 | _dt_index_binops = ('==', '!=', '>=', '>', '<=', '<', '-', |
@@ -1734,9 +1734,50 @@ def _str_get_impl(str_arr, ind): |
1734 | 1734 | out_arr[i] = _str |
1735 | 1735 | return hpat.hiframes.api.init_series(out_arr) |
1736 | 1736 |
|
| 1737 | + if arr_typ == string_array_split_view_type: |
| 1738 | + # TODO: refactor and enable distributed |
| 1739 | + def _str_get_impl(arr, ind): |
| 1740 | + numba.parfor.init_prange() |
| 1741 | + n = len(arr) |
| 1742 | + n_total_chars = 0 |
| 1743 | + for i in numba.parfor.internal_prange(n): |
| 1744 | + start_index = getitem_c_arr(arr._index_offsets, i) |
| 1745 | + # TODO: check num strings and support NAN |
| 1746 | + # end_index = getitem_c_arr(arr._index_offsets, i+1) |
| 1747 | + data_start = getitem_c_arr( |
| 1748 | + arr._data_offsets, start_index + ind) |
| 1749 | + data_start += 1 |
| 1750 | + # get around -1 storage in uint32 problem |
| 1751 | + if start_index + ind == 0: |
| 1752 | + data_start = 0 |
| 1753 | + data_end = getitem_c_arr( |
| 1754 | + arr._data_offsets, start_index + ind + 1) |
| 1755 | + length = data_end - data_start |
| 1756 | + n_total_chars += length |
| 1757 | + numba.parfor.init_prange() |
| 1758 | + out_arr = pre_alloc_string_array(n, n_total_chars) |
| 1759 | + for i in numba.parfor.internal_prange(n): |
| 1760 | + start_index = getitem_c_arr(arr._index_offsets, i) |
| 1761 | + # TODO: check num strings and support NAN |
| 1762 | + # end_index = getitem_c_arr(arr._index_offsets, i+1) |
| 1763 | + data_start = getitem_c_arr( |
| 1764 | + arr._data_offsets, start_index + ind) |
| 1765 | + data_start += 1 |
| 1766 | + # get around -1 storage in uint32 problem |
| 1767 | + if start_index + ind == 0: |
| 1768 | + data_start = 0 |
| 1769 | + data_end = getitem_c_arr( |
| 1770 | + arr._data_offsets, start_index + ind + 1) |
| 1771 | + length = data_end - data_start |
| 1772 | + ptr = get_array_ctypes_ptr(arr._data, data_start) |
| 1773 | + hpat.str_arr_ext.setitem_str_arr_ptr(out_arr, i, ptr, length) |
| 1774 | + return hpat.hiframes.api.init_series(out_arr) |
| 1775 | + |
1737 | 1776 | return self._replace_func(_str_get_impl, [arr, ind_var], |
1738 | 1777 | pre_nodes=nodes, |
1739 | | - extra_globals={'pre_alloc_string_array': pre_alloc_string_array}) |
| 1778 | + extra_globals={'pre_alloc_string_array': pre_alloc_string_array, |
| 1779 | + 'get_array_ctypes_ptr': get_array_ctypes_ptr, |
| 1780 | + 'getitem_c_arr': getitem_c_arr}) |
1740 | 1781 |
|
1741 | 1782 | def _is_dt_index_binop(self, rhs): |
1742 | 1783 | if rhs.op != 'binop': |
|
0 commit comments