11import numpy as np
22import matplotlib .pyplot as plt
3- # %matplotlib inline
43
54data_performance = {
65'read_parquet' : ((0.804 , 2.939 , 6.235 , 20.91 ),
76 (0.51 , 1.662 , 3.151 , 8.555 )),
8- 'read_csv' : ((5.526 , 5.806 , 7.008 , 10.46 ),
9- (19.732 , 10.466 , 6.277 , 6.947 )),
7+ 'read_csv' : ((19.732 , 10.466 , 6.277 , 6.947 )),
108'describe' : ((0.823 , 1.713 , 3.324 , 7.103 ),
119 (0.704 , 1.396 , 2.89 , 6.485 )),
1210'v_counts' : ((0.867 , 1.777 , 3.447 , 6.799 ),
1311 (0.699 , 1.401 , 2.914 , 6.276 )),
1412}
1513
1614plot_params = {
17- 'read_parquet' :(('1m' , '2m' , '4m' , '8m' ), 'Data size' ),
18- 'read_csv' :(('1 node' , '2 nodes' , '4 nodes' , '8 nodes' ), 'Number of processes' ),
19- 'describe' :(('1m' , '2m' , '4m' , '8m' ), 'Data size' ),
20- 'v_counts' :(('1m' , '2m' , '4m' , '8m' ), 'Data size' ),
15+ 'read_parquet' :(('1m' , '2m' , '4m' , '8m' ), 'Data size' , 'Performance: Pandas vs SDC' , True , 'upper left' ),
16+ 'read_csv' :(('1 node' , '2 nodes' , '4 nodes' , '8 nodes' ), 'Number of processes' , 'SDC Scalability' , False , 'upper right' ),
17+ 'describe' :(('1m' , '2m' , '4m' , '8m' ), 'Data size' , 'Performance: Pandas vs SDC' , True , 'upper left' ),
18+ 'v_counts' :(('1m' , '2m' , '4m' , '8m' ), 'Data size' , 'Data size' , 'Performance: Pandas vs SDC' , True , 'upper left' ),
2119}
2220
2321
2422class Plotter :
2523 def __init__ (self , func_id = 'read_parquet' ):
2624 self .func_id = func_id
27- self .x_labels , self .x_title = plot_params [self .func_id ]
25+ self .x_labels , self .x_title , self . title , self . is_compared , self . label_position = plot_params [self .func_id ]
2826 self .ngroups = len (self .x_labels )
2927
3028 def autolabel (self , rects , ax ):
@@ -39,30 +37,37 @@ def autolabel(self, rects, ax):
3937 def plot_performance (self ):
4038
4139 plt .figure (figsize = (16 , 8 ))
42-
43- means_pandas , means_sdc = data_performance [self .func_id ]
4440 # create plot
4541 index = np .arange (self .ngroups )
4642 bar_width = 0.35
4743 opacity = 0.8
4844
49- rects1 = plt .bar (index , means_pandas , bar_width ,
50- alpha = opacity ,
51- label = 'Pandas' )
45+ plt .xlabel (self .x_title , fontsize = 16 )
46+ plt .ylabel ('Time, s' , fontsize = 16 )
47+ plt .title (self .title , fontsize = 18 )
48+
49+ plt .tick_params (labelsize = 12 )
5250
53- rects2 = plt .bar (index + bar_width , means_sdc , bar_width ,
51+ if self .is_compared :
52+ data_pandas , data_sdc = data_performance [self .func_id ]
53+ rects_pandas = plt .bar (index + bar_width , data_pandas , bar_width ,
54+ alpha = opacity ,
55+ label = 'Pandas' )
56+
57+ plt .xticks (index + bar_width , self .x_labels )
58+ else :
59+ data_sdc = data_performance [self .func_id ]
60+ plt .xticks (index , self .x_labels )
61+
62+ rects_sdc = plt .bar (index , data_sdc , bar_width ,
5463 alpha = opacity ,
5564 label = 'SDC' )
5665
57- plt .xlabel (self .x_title , fontsize = 16 )
58- plt .ylabel ('Time, s' , fontsize = 16 )
59- plt .title ('Performance: Pandas vs SDC' , fontsize = 18 )
60- plt .xticks (index + bar_width , self .x_labels )
61- plt .tick_params (labelsize = 12 )
62- plt .legend (fontsize = 16 ,loc = "upper left" )
66+ if self .is_compared :
67+ self .autolabel (rects_pandas , plt )
6368
64- self . autolabel ( rects1 , plt )
65- self .autolabel (rects2 , plt )
69+ plt . legend ( fontsize = 16 , loc = self . label_position )
70+ self .autolabel (rects_sdc , plt )
6671
6772 plt .tight_layout ()
6873 plt .show ()
0 commit comments