5252from sdc .datatypes .range_index_type import RangeIndexType
5353
5454from sdc .hiframes .pd_dataframe_type import DataFrameType
55+ from sdc .hiframes .pd_dataframe_ext import init_dataframe_internal , get_structure_maps
5556from sdc .hiframes .pd_series_type import SeriesType
5657
5758from sdc .datatypes .hpat_pandas_dataframe_getitem_types import (DataFrameGetitemAccessorType ,
@@ -1337,40 +1338,69 @@ def isna_overload(df):
13371338 return sdc_pandas_dataframe_isna_codegen (df , 'isna' )
13381339
13391340
1340- def sdc_pandas_dataframe_drop_codegen (func_name , func_args , df , drop_cols ):
1341+ def sdc_pandas_dataframe_drop_codegen (func_name , func_args , df , drop_col_names ):
13411342 """
13421343 Example of generated implementation:
13431344 def sdc_pandas_dataframe_drop_impl(df, labels=None, axis=0, index=None, columns=None,
1344- level=None, inplace=False, errors="raise"):
1345- new_col_0_data_df = df._data[1][0]
1346- new_col_1_data_df = df._data[0][1]
1347- return pandas.DataFrame({"B": new_col_0_data_df, "C": new_col_1_data_df}, index=df.index)
1345+ level=None, inplace=False, errors="raise"):
1346+ list_0 = df._data[0].copy()
1347+ for col_id in old_scheme_drop_idxs_0[::-1]:
1348+ list_0.pop(col_id)
1349+ list_1 = df._data[1].copy()
1350+ new_data = (list_1, list_0, )
1351+ return init_dataframe_internal(new_data, df._index, df_type)
13481352 """
13491353 indent = 4 * ' '
1350- saved_df_columns = [column for column in df .columns if column not in drop_cols ]
1351- func_definition = [f'def sdc_pandas_dataframe_{ func_name } _impl({ ", " .join (func_args )} ):' ]
1354+ func_definition = [f'def { func_name } ({ ", " .join (func_args )} ):' ]
13521355 func_text = []
1353- column_list = []
13541356
1355- for label in drop_cols :
1357+ old_column_loc , old_data_typs_map , old_types_order = get_structure_maps (df .data , df .columns )
1358+
1359+ new_data_typs = tuple (t for i , t in enumerate (df .data ) if df .columns [i ] not in drop_col_names )
1360+ new_column_names = tuple (c for c in df .columns if c not in drop_col_names )
1361+ new_column_loc , new_data_typs_map , new_types_order = get_structure_maps (new_data_typs , new_column_names )
1362+
1363+ old_types_idxs_map = dict (zip (old_types_order , range (len (old_types_order ))))
1364+ reorder_scheme = tuple (old_types_idxs_map [t ] for t in new_types_order )
1365+ df_type = DataFrameType (new_data_typs , df .index , new_column_names , column_loc = new_column_loc )
1366+
1367+ old_scheme_drop_idxs = []
1368+ for i , k in enumerate (old_types_order ):
1369+ a = [j for j , x in enumerate (old_data_typs_map [k ][1 ]) if df .columns [x ] in drop_col_names ]
1370+ old_scheme_drop_idxs .append (tuple (a ) or None )
1371+
1372+ for label in drop_col_names :
13561373 if label not in df .columns :
13571374 func_text .append (f'if errors == "raise":' )
13581375 func_text .append (indent + f'raise ValueError("The label { label } is not found in the selected axis")' )
13591376 break
13601377
1361- for column_id , column_name in enumerate (saved_df_columns ):
1362- col_loc = df .column_loc [column_name ]
1363- type_id , col_id = col_loc .type_id , col_loc .col_id
1364- func_text .append (f'new_col_{ column_id } _data_df = df._data[{ type_id } ][{ col_id } ]' )
1365- column_list .append ((f'new_col_{ column_id } _data_df' , column_name ))
1366-
1367- data = ', ' .join (f'"{ column_name } ": { column } ' for column , column_name in column_list )
1368- index = 'df.index'
1369- func_text .append (f"return pandas.DataFrame({{{ data } }}, index={ index } )\n " )
1378+ old_ntypes = len (old_types_order )
1379+ for type_id in range (old_ntypes ):
1380+ func_text .append (f'list_{ type_id } = df._data[{ type_id } ].copy()' )
1381+ if old_scheme_drop_idxs [type_id ]:
1382+ func_text .append (f'for col_id in old_scheme_drop_idxs_{ type_id } [::-1]:' )
1383+ func_text .append (indent + f'list_{ type_id } .pop(col_id)' )
1384+
1385+ # in new df the order of array lists (i.e. types_order) can be different, so
1386+ # making a new tuple of lists reorder as needed
1387+ new_ntypes = len (new_types_order )
1388+ data_lists_reordered = ', ' .join (['list_' + str (reorder_scheme [i ]) for i in range (new_ntypes )])
1389+ data_val = '(' + data_lists_reordered + ', )' if new_ntypes > 0 else '()'
1390+
1391+ data , index = 'new_data' , 'df._index'
1392+ func_text .append (f'{ data } = { data_val } ' )
1393+ func_text .append (f"return init_dataframe_internal({ data } , { index } , df_type)\n " )
13701394 func_definition .extend ([indent + func_line for func_line in func_text ])
13711395 func_def = '\n ' .join (func_definition )
13721396
1373- global_vars = {'pandas' : pandas }
1397+ global_vars = {
1398+ 'pandas' : pandas ,
1399+ 'init_dataframe_internal' : init_dataframe_internal ,
1400+ 'df_type' : df_type
1401+ }
1402+
1403+ global_vars .update ({f'old_scheme_drop_idxs_{ i } ' : old_scheme_drop_idxs [i ] for i in range (old_ntypes )})
13741404
13751405 return func_def , global_vars
13761406
@@ -1387,7 +1417,8 @@ def sdc_pandas_dataframe_drop(df, labels=None, axis=0, index=None, columns=None,
13871417 -----------
13881418 - Parameters ``labels``, ``axis``, ``index``, ``level`` and ``inplace`` are currently unsupported.
13891419 - Parameter ``columns`` is required and is expected to be a Literal value with one column name
1390- or Tuple with columns names.
1420+ or List with columns names. Mutating a list of column names after it was defined and then using it as a
1421+ columns argument results in an SDCLimitation exception at runtime.
13911422 - Supported ``errors`` can be {``raise``, ``ignore``}, default ``raise``. If ``ignore``, suppress error and only
13921423 existing labels are dropped.
13931424
@@ -1420,36 +1451,66 @@ def sdc_pandas_dataframe_drop(df, labels=None, axis=0, index=None, columns=None,
14201451
14211452 """
14221453
1423- _func_name = ' drop'
1454+ method_name = f'Method drop(). '
14241455
1425- ty_checker = TypeChecker (f'Method { _func_name } ().' )
1456+ ty_checker = TypeChecker (method_name )
14261457 ty_checker .check (df , DataFrameType )
14271458
1428- if not isinstance (labels , types .Omitted ) and labels is not None :
1459+ if not isinstance (labels , ( types .Omitted , types . NoneType ) ) and labels is not None :
14291460 ty_checker .raise_exc (labels , 'None' , 'labels' )
14301461
1431- if not isinstance (axis , (int , types .Omitted )) :
1462+ if not isinstance (axis , (types . Omitted , types .Integer )) and axis != 0 :
14321463 ty_checker .raise_exc (axis , 'int' , 'axis' )
14331464
1434- if not isinstance (index , types .Omitted ) and index is not None :
1465+ if not isinstance (index , ( types .Omitted , types . NoneType ) ) and index is not None :
14351466 ty_checker .raise_exc (index , 'None' , 'index' )
14361467
1437- if not isinstance (columns , (types .Omitted , types .Tuple , types .Literal )):
1438- ty_checker .raise_exc (columns , 'str, tuple of str' , 'columns' )
1468+ if not (isinstance (columns , (types .Omitted , types .StringLiteral ))
1469+ or (isinstance (columns , types .Tuple )
1470+ and all (isinstance (c , types .StringLiteral ) for c in columns ))
1471+ or (isinstance (columns , types .UniTuple ) and isinstance (columns .dtype , types .StringLiteral ))
1472+ or isinstance (columns , types .List ) and isinstance (columns .dtype , types .UnicodeType )
1473+ ):
1474+ ty_checker .raise_exc (columns , 'str, list of const str' , 'columns' )
14391475
1440- if not isinstance (level , (types .Omitted , types .Literal )) and level is not None :
1476+ if not isinstance (level , (types .Omitted , types .NoneType , types . Literal )) and level is not None :
14411477 ty_checker .raise_exc (level , 'None' , 'level' )
14421478
1443- if not isinstance (inplace , (bool , types .Omitted )) and inplace :
1479+ if not isinstance (inplace , (types . Omitted , types .NoneType , types . Boolean )) and inplace :
14441480 ty_checker .raise_exc (inplace , 'bool' , 'inplace' )
14451481
1446- if not isinstance (errors , (str , types .Omitted , types .Literal )) :
1482+ if not isinstance (errors , (types . Omitted , types .UnicodeType , types .StringLiteral )) and errors != "raise" :
14471483 ty_checker .raise_exc (errors , 'str' , 'errors' )
14481484
1485+ if isinstance (columns , types .List ):
1486+ if columns .initial_value is None :
1487+ raise TypingError ('{} Unsupported use of parameter columns:'
1488+ ' expected list of constant strings. Given: {}' .format (method_name , columns ))
1489+ else :
1490+ # this works because global tuple of strings is captured as Tuple of StringLiterals
1491+ columns_as_tuple = tuple (columns .initial_value )
1492+ def _sdc_pandas_dataframe_drop_wrapper_impl (df , labels = None , axis = 0 , index = None ,
1493+ columns = None , level = None , inplace = False , errors = "raise" ):
1494+
1495+ # if at runtime columns list differs from it's initial value (known at compile time)
1496+ # we cannot tell which columns to drop and what is the resulting DataFrameType, so raise exception
1497+ if list (columns_as_tuple ) != columns :
1498+ raise SDCLimitation ("Unsupported use of parameter columns: non-const list was used." )
1499+
1500+ return df .drop (labels = labels ,
1501+ axis = axis ,
1502+ index = index ,
1503+ columns = columns_as_tuple ,
1504+ level = level ,
1505+ inplace = inplace ,
1506+ errors = errors )
1507+
1508+ return _sdc_pandas_dataframe_drop_wrapper_impl
1509+
14491510 args = {'labels' : None , 'axis' : 0 , 'index' : None , 'columns' : None , 'level' : None , 'inplace' : False ,
14501511 'errors' : f'"raise"' }
14511512
1452- def sdc_pandas_dataframe_drop_impl (df , _func_name , args , columns ):
1513+ def sdc_pandas_dataframe_drop_impl (df , args , columns ):
14531514 func_args = ['df' ]
14541515 for key , value in args .items ():
14551516 if key not in func_args :
@@ -1459,18 +1520,19 @@ def sdc_pandas_dataframe_drop_impl(df, _func_name, args, columns):
14591520
14601521 if isinstance (columns , types .StringLiteral ):
14611522 drop_cols = (columns .literal_value ,)
1462- elif isinstance (columns , types .Tuple ):
1523+ elif isinstance (columns , ( types .Tuple , types . UniTuple ) ):
14631524 drop_cols = tuple (column .literal_value for column in columns )
14641525 else :
14651526 raise ValueError ('Only drop by one column or tuple of columns is currently supported in df.drop()' )
14661527
1467- func_def , global_vars = sdc_pandas_dataframe_drop_codegen (_func_name , func_args , df , drop_cols )
1528+ func_name = 'sdc_pandas_dataframe_drop_impl'
1529+ func_def , global_vars = sdc_pandas_dataframe_drop_codegen (func_name , func_args , df , drop_cols )
14681530 loc_vars = {}
14691531 exec (func_def , global_vars , loc_vars )
1470- _drop_impl = loc_vars ['sdc_pandas_dataframe_drop_impl' ]
1532+ _drop_impl = loc_vars [func_name ]
14711533 return _drop_impl
14721534
1473- return sdc_pandas_dataframe_drop_impl (df , _func_name , args , columns )
1535+ return sdc_pandas_dataframe_drop_impl (df , args , columns )
14741536
14751537
14761538def df_length_expr (self ):
0 commit comments