@@ -92,6 +92,8 @@ int64_t hash_str(std::string* in_str);
9292void c_glob (uint32_t **offsets, char **data, uint8_t **null_bitmap, int64_t * num_strings, char * path);
9393npy_intp array_size (PyArrayObject* arr);
9494void * array_getptr1 (PyArrayObject* arr, npy_intp ind);
95+ void array_setitem (PyArrayObject* arr, char * p, PyObject *s);
96+
9597
9698PyMODINIT_FUNC PyInit_hstr_ext (void ) {
9799 PyObject *m;
@@ -192,6 +194,8 @@ PyMODINIT_FUNC PyInit_hstr_ext(void) {
192194 PyLong_FromVoidPtr ((void *)(&array_size)));
193195 PyObject_SetAttrString (m, " array_getptr1" ,
194196 PyLong_FromVoidPtr ((void *)(&array_getptr1)));
197+ PyObject_SetAttrString (m, " array_setitem" ,
198+ PyLong_FromVoidPtr ((void *)(&array_setitem)));
195199 return m;
196200}
197201
@@ -249,11 +253,14 @@ void dtor_str_arr_split_view(str_arr_split_view_payload* in_str_arr, int64_t siz
249253 return ;
250254}
251255
256+ // example: ['AB,CC', 'C,ABB,D', 'G', '', 'g,f']
257+ // offsets [0, 5, 12, 13, 13, 14, 17]
258+ // data_offsets [-1, 2, 5, 4, 6, 10, 12, 11, 13, 12, 13, 12, 14, 16]
259+ // index_offsets [0, 3, 7, 9, 11, 14]
252260void str_arr_split_view_impl (str_arr_split_view_payload* out_view, int64_t n_strs, uint32_t * offsets, char * data, char sep)
253261{
254262 uint32_t total_chars = offsets[n_strs];
255- printf (" n_strs %d sep %c total chars:%d\n " , n_strs, sep, total_chars);
256- // return;
263+ // printf("n_strs %d sep %c total chars:%d\n", n_strs, sep, total_chars);
257264 uint32_t * index_offsets = new uint32_t [n_strs+1 ];
258265 std::vector<uint32_t > data_offs;
259266
@@ -273,6 +280,8 @@ void str_arr_split_view_impl(str_arr_split_view_payload* out_view, int64_t n_str
273280 index_offsets[str_ind+1 ] = data_offs.size ();
274281 str_ind++;
275282 if (str_ind == n_strs) break ; // all finished
283+ // start new string
284+ data_offs.push_back (data_ind-1 );
276285 continue ; // stay on same data_ind for start of next string
277286 }
278287 if (data[data_ind] == sep)
@@ -286,14 +295,14 @@ void str_arr_split_view_impl(str_arr_split_view_payload* out_view, int64_t n_str
286295 // TODO: avoid copy
287296 std::copy (data_offs.cbegin (), data_offs.cend (), out_view->data_offsets );
288297
289- printf (" index_offsets: " );
290- for (int i=0 ; i<=n_strs; i++)
291- printf (" %d " , index_offsets[i]);
292- printf (" \n " );
293- printf (" data_offsets: " );
294- for (int i=0 ; i<data_offs.size (); i++)
295- printf (" %d " , data_offs[i]);
296- printf (" \n " );
298+ // printf("index_offsets: ");
299+ // for (int i=0; i<=n_strs; i++)
300+ // printf("%d ", index_offsets[i]);
301+ // printf("\n");
302+ // printf("data_offsets: ");
303+ // for (int i=0; i<data_offs.size(); i++)
304+ // printf("%d ", data_offs[i]);
305+ // printf("\n");
297306 return ;
298307}
299308
@@ -704,6 +713,16 @@ void* array_getptr1(PyArrayObject* arr, npy_intp ind)
704713 return PyArray_GETPTR1 (arr, ind);
705714}
706715
716+ void array_setitem (PyArrayObject* arr, char * p, PyObject *s)
717+ {
718+ #define CHECK (expr, msg ) if (!(expr)){std::cerr << msg << std::endl; return ;}
719+ // std::cout << "get array ptr " << ind << '\n';
720+ int err = PyArray_SETITEM (arr, p, s);
721+ CHECK (err==0 , " setting item in numpy array failed" );
722+ return ;
723+ #undef CHECK
724+ }
725+
707726// glob support
708727void c_glob (uint32_t **offsets, char **data, uint8_t **null_bitmap, int64_t * num_strings, char * path)
709728{
0 commit comments