Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit 0b73620

Browse files
author
Ehsan Totoni
committed
fix split view boxing
1 parent 57926a5 commit 0b73620

2 files changed

Lines changed: 52 additions & 15 deletions

File tree

hpat/_str_ext.cpp

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ int64_t hash_str(std::string* in_str);
9292
void c_glob(uint32_t **offsets, char **data, uint8_t **null_bitmap, int64_t* num_strings, char* path);
9393
npy_intp array_size(PyArrayObject* arr);
9494
void* array_getptr1(PyArrayObject* arr, npy_intp ind);
95+
void array_setitem(PyArrayObject* arr, char* p, PyObject *s);
96+
9597

9698
PyMODINIT_FUNC PyInit_hstr_ext(void) {
9799
PyObject *m;
@@ -192,6 +194,8 @@ PyMODINIT_FUNC PyInit_hstr_ext(void) {
192194
PyLong_FromVoidPtr((void*)(&array_size)));
193195
PyObject_SetAttrString(m, "array_getptr1",
194196
PyLong_FromVoidPtr((void*)(&array_getptr1)));
197+
PyObject_SetAttrString(m, "array_setitem",
198+
PyLong_FromVoidPtr((void*)(&array_setitem)));
195199
return m;
196200
}
197201

@@ -249,11 +253,14 @@ void dtor_str_arr_split_view(str_arr_split_view_payload* in_str_arr, int64_t siz
249253
return;
250254
}
251255

256+
// example: ['AB,CC', 'C,ABB,D', 'G', '', 'g,f']
257+
// offsets [0, 5, 12, 13, 13, 14, 17]
258+
// data_offsets [-1, 2, 5, 4, 6, 10, 12, 11, 13, 12, 13, 12, 14, 16]
259+
// index_offsets [0, 3, 7, 9, 11, 14]
252260
void str_arr_split_view_impl(str_arr_split_view_payload* out_view, int64_t n_strs, uint32_t* offsets, char* data, char sep)
253261
{
254262
uint32_t total_chars = offsets[n_strs];
255-
printf("n_strs %d sep %c total chars:%d\n", n_strs, sep, total_chars);
256-
//return;
263+
// printf("n_strs %d sep %c total chars:%d\n", n_strs, sep, total_chars);
257264
uint32_t* index_offsets = new uint32_t[n_strs+1];
258265
std::vector<uint32_t> data_offs;
259266

@@ -273,6 +280,8 @@ void str_arr_split_view_impl(str_arr_split_view_payload* out_view, int64_t n_str
273280
index_offsets[str_ind+1] = data_offs.size();
274281
str_ind++;
275282
if (str_ind == n_strs) break; // all finished
283+
// start new string
284+
data_offs.push_back(data_ind-1);
276285
continue; // stay on same data_ind for start of next string
277286
}
278287
if (data[data_ind] == sep)
@@ -286,14 +295,14 @@ void str_arr_split_view_impl(str_arr_split_view_payload* out_view, int64_t n_str
286295
// TODO: avoid copy
287296
std::copy(data_offs.cbegin(), data_offs.cend(), out_view->data_offsets);
288297

289-
printf("index_offsets: ");
290-
for (int i=0; i<=n_strs; i++)
291-
printf("%d ", index_offsets[i]);
292-
printf("\n");
293-
printf("data_offsets: ");
294-
for (int i=0; i<data_offs.size(); i++)
295-
printf("%d ", data_offs[i]);
296-
printf("\n");
298+
// printf("index_offsets: ");
299+
// for (int i=0; i<=n_strs; i++)
300+
// printf("%d ", index_offsets[i]);
301+
// printf("\n");
302+
// printf("data_offsets: ");
303+
// for (int i=0; i<data_offs.size(); i++)
304+
// printf("%d ", data_offs[i]);
305+
// printf("\n");
297306
return;
298307
}
299308

@@ -704,6 +713,16 @@ void* array_getptr1(PyArrayObject* arr, npy_intp ind)
704713
return PyArray_GETPTR1(arr, ind);
705714
}
706715

716+
void array_setitem(PyArrayObject* arr, char* p, PyObject *s)
717+
{
718+
#define CHECK(expr, msg) if(!(expr)){std::cerr << msg << std::endl; return;}
719+
// std::cout << "get array ptr " << ind << '\n';
720+
int err = PyArray_SETITEM(arr, p, s);
721+
CHECK(err==0, "setting item in numpy array failed");
722+
return;
723+
#undef CHECK
724+
}
725+
707726
// glob support
708727
void c_glob(uint32_t **offsets, char **data, uint8_t **null_bitmap, int64_t* num_strings, char* path)
709728
{

hpat/hiframes/split_impl.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
import llvmlite.llvmpy.core as lc
2020
from llvmlite import ir as lir
2121
import llvmlite.binding as ll
22+
from llvmlite.llvmpy.core import Type as LLType
23+
from .. import hstr_ext
24+
ll.add_symbol('array_setitem', hstr_ext.array_setitem)
25+
ll.add_symbol('array_getptr1', hstr_ext.array_getptr1)
2226

2327
from .. import hstr_ext
2428
ll.add_symbol('dtor_str_arr_split_view', hstr_ext.dtor_str_arr_split_view)
@@ -187,6 +191,15 @@ def box_str_arr_split_view(typ, val, c):
187191
out_arr = c.pyapi.call_method(
188192
np_class_obj, "ndarray", (num_items_obj, dtype))
189193

194+
# Array setitem call
195+
arr_get_fnty = LLType.function(
196+
lir.IntType(8).as_pointer(), [c.pyapi.pyobj, c.pyapi.py_ssize_t])
197+
arr_get_fn = c.pyapi._get_function(arr_get_fnty, name="array_getptr1")
198+
arr_setitem_fnty = LLType.function(
199+
lir.VoidType(),
200+
[c.pyapi.pyobj, lir.IntType(8).as_pointer(), c.pyapi.pyobj])
201+
arr_setitem_fn = c.pyapi._get_function(
202+
arr_setitem_fnty, name="array_setitem")
190203

191204
# for each string
192205
with cgutils.for_range(builder, sp_view.num_items) as loop:
@@ -200,7 +213,8 @@ def box_str_arr_split_view(typ, val, c):
200213

201214
# Build a new Python list
202215
nitems = builder.sub(list_end_offset, list_start_offset)
203-
cgutils.printf(builder, "str %lld n %lld\n", str_ind, nitems)
216+
nitems = builder.sub(nitems, nitems.type(1))
217+
# cgutils.printf(builder, "str %lld n %lld\n", str_ind, nitems)
204218
list_obj = c.pyapi.list_new(nitems)
205219
with c.builder.if_then(cgutils.is_not_null(c.builder, list_obj),
206220
likely=True):
@@ -211,10 +225,14 @@ def box_str_arr_split_view(typ, val, c):
211225
# add 1 since starts from -1
212226
data_start = builder.add(data_start, data_start.type(1))
213227
data_end = builder.load(builder.gep(sp_view.data_offsets, [builder.add(start_index, start_index.type(1))]))
214-
cgutils.printf(builder, "ind %lld %lld\n", data_start, data_end)
215-
#itemobj =
216-
#c.pyapi.list_setitem(obj, loop.index, itemobj)
217-
228+
# cgutils.printf(builder, "ind %lld %lld\n", data_start, data_end)
229+
data_ptr = builder.gep(builder.extract_value(sp_view.data, 0), [data_start])
230+
str_size = builder.sext(builder.sub(data_end, data_start), lir.IntType(64))
231+
str_obj = c.pyapi.string_from_string_and_size(data_ptr, str_size)
232+
c.pyapi.list_setitem(list_obj, loop.index, str_obj)
233+
234+
arr_ptr = builder.call(arr_get_fn, [out_arr, str_ind])
235+
builder.call(arr_setitem_fn, [out_arr, arr_ptr, list_obj])
218236

219237

220238
c.pyapi.decref(np_class_obj)

0 commit comments

Comments
 (0)