2727from .. import hstr_ext
2828ll .add_symbol ('dtor_str_arr_split_view' , hstr_ext .dtor_str_arr_split_view )
2929ll .add_symbol ('str_arr_split_view_impl' , hstr_ext .str_arr_split_view_impl )
30+ ll .add_symbol ('str_arr_split_view_alloc' , hstr_ext .str_arr_split_view_alloc )
3031
3132char_typ = types .uint8
3233offset_typ = types .uint32
@@ -167,6 +168,7 @@ def codegen(context, builder, sig, args):
167168 out_view .num_items = in_str_arr .num_items
168169 out_view .index_offsets = view_payload .index_offsets
169170 out_view .data_offsets = view_payload .data_offsets
171+ # TODO: incref?
170172 out_view .data = context .compile_internal (
171173 builder , lambda S : get_data_ptr (S ),
172174 data_ctypes_type (string_array_type ), [str_arr ])
@@ -245,6 +247,58 @@ def box_str_arr_split_view(typ, val, c):
245247 return out_arr
246248
247249
250+ @intrinsic
251+ def pre_alloc_str_arr_view (typingctx , num_items_t , num_offsets_t , data_t = None ):
252+ assert num_items_t == types .intp and num_offsets_t == types .intp
253+ def codegen (context , builder , sig , args ):
254+ num_items , num_offsets , data_ptr = args
255+ meminfo , meminfo_data_ptr = construct_str_arr_split_view (
256+ context , builder )
257+
258+ # (str_arr_split_view_payload* out_view, int64_t num_items,
259+ # int64_t num_offsets)
260+ fnty = lir .FunctionType (
261+ lir .VoidType (),
262+ [meminfo_data_ptr .type , lir .IntType (64 ), lir .IntType (64 )])
263+
264+ fn_impl = builder .module .get_or_insert_function (
265+ fnty , name = "str_arr_split_view_alloc" )
266+
267+ builder .call (fn_impl ,
268+ [meminfo_data_ptr , num_items , num_offsets ])
269+
270+
271+ view_payload = cgutils .create_struct_proxy (
272+ str_arr_split_view_payload_type )(
273+ context , builder , value = builder .load (meminfo_data_ptr ))
274+
275+ out_view = context .make_helper (builder , string_array_split_view_type )
276+ out_view .num_items = num_items
277+ out_view .index_offsets = view_payload .index_offsets
278+ out_view .data_offsets = view_payload .data_offsets
279+ # TODO: incref?
280+ out_view .data = data_ptr
281+ # out_view.null_bitmap = view_payload.null_bitmap
282+ out_view .meminfo = meminfo
283+ ret = out_view ._getvalue ()
284+
285+ return impl_ret_new_ref (
286+ context , builder , string_array_split_view_type , ret )
287+
288+ return string_array_split_view_type (
289+ types .intp , types .intp , data_t ), codegen
290+
291+
292+ @intrinsic
293+ def get_c_arr_ptr (typingctx , c_arr , ind_t = None ):
294+ def codegen (context , builder , sig , args ):
295+ in_arr , ind = args
296+ return builder .bitcast (
297+ builder .gep (in_arr , [ind ]), lir .IntType (8 ).as_pointer ())
298+
299+ return types .voidptr (c_arr , ind_t ), codegen
300+
301+
248302@intrinsic
249303def getitem_c_arr (typingctx , c_arr , ind_t = None ):
250304 def codegen (context , builder , sig , args ):
@@ -254,6 +308,16 @@ def codegen(context, builder, sig, args):
254308 return c_arr .dtype (c_arr , ind_t ), codegen
255309
256310
311+ @intrinsic
312+ def setitem_c_arr (typingctx , c_arr , ind_t , item_t = None ):
313+ def codegen (context , builder , sig , args ):
314+ in_arr , ind , item = args
315+ ptr = builder .gep (in_arr , [ind ])
316+ builder .store (item , ptr )
317+
318+ return types .void (c_arr , ind_t , c_arr .dtype ), codegen
319+
320+
257321@intrinsic
258322def get_array_ctypes_ptr (typingctx , arr_ctypes_t , ind_t = None ):
259323 def codegen (context , builder , sig , args ):
@@ -286,7 +350,6 @@ def _impl(A, ind):
286350 end_index = getitem_c_arr (A ._index_offsets , ind + 1 )
287351 n = end_index - start_index - 1
288352
289-
290353 str_list = hpat .str_ext .alloc_str_list (n )
291354 for i in range (n ):
292355 data_start = getitem_c_arr (
@@ -306,3 +369,41 @@ def _impl(A, ind):
306369 return str_list
307370
308371 return _impl
372+
373+ if A == string_array_split_view_type and ind == types .Array (types .bool_ , 1 , 'C' ):
374+ def _impl (A , ind ):
375+ n = len (A )
376+ if n != len (ind ):
377+ raise IndexError ("boolean index did not match indexed array"
378+ " along dimension 0" )
379+
380+ num_items = 0
381+ num_offsets = 0
382+ for i in range (n ):
383+ if ind [i ]:
384+ num_items += 1
385+ start_index = getitem_c_arr (A ._index_offsets , i )
386+ end_index = getitem_c_arr (A ._index_offsets , i + 1 )
387+ num_offsets += end_index - start_index
388+
389+ out_arr = pre_alloc_str_arr_view (num_items , num_offsets , A ._data )
390+ item_ind = 0
391+ offset_ind = 0
392+ for i in range (n ):
393+ if ind [i ]:
394+ start_index = getitem_c_arr (A ._index_offsets , i )
395+ end_index = getitem_c_arr (A ._index_offsets , i + 1 )
396+ n_offsets = end_index - start_index
397+
398+ setitem_c_arr (out_arr ._index_offsets , item_ind , offset_ind )
399+ ptr = get_c_arr_ptr (A ._data_offsets , start_index )
400+ out_ptr = get_c_arr_ptr (out_arr ._data_offsets , offset_ind )
401+ _memcpy (out_ptr , ptr , n_offsets , 4 )
402+ item_ind += 1
403+ offset_ind += n_offsets
404+
405+ # last item
406+ setitem_c_arr (out_arr ._index_offsets , item_ind , offset_ind )
407+ return out_arr
408+
409+ return _impl
0 commit comments