|
2 | 2 | import matplotlib.pyplot as plt |
3 | 3 |
|
4 | 4 | data_performance = { |
5 | | -'read_parquet': ((0.804, 2.939, 6.235, 20.91), |
6 | | - (0.51, 1.662, 3.151, 8.555)), |
7 | | -'read_csv': ((19.732, 10.466, 6.277, 6.947)), |
8 | | -'describe': ((0.823, 1.713, 3.324, 7.103), |
9 | | - (0.704, 1.396, 2.89, 6.485)), |
| 5 | +'read_csv': ((10.324, 20.793, 41.413, 84.63), |
| 6 | + (0.914, 1.925, 3.988, 8.149)), |
| 7 | +'describe': ((0.374, 0.681, 1.426, 2.875), |
| 8 | + (0.235, 0.597, 1.192, 2.264)), |
10 | 9 | 'v_counts': ((0.867, 1.777, 3.447, 6.799), |
11 | | - (0.699, 1.401, 2.914, 6.276)), |
| 10 | + (0.699, 1.401, 2.914, 6.276)), |
| 11 | +'statistics': ((2.1, 4.6, 8.3, 20.2), |
| 12 | + (0.3, 0.7, 1.6, 3.2)), |
| 13 | +'sum': ((0.9, 1.2, 2.7, 5.9), |
| 14 | + (0.1, 0.4, 0.8, 1.9)), |
12 | 15 | } |
13 | 16 |
|
14 | 17 | plot_params = { |
15 | | - 'read_parquet':(('1m', '2m', '4m', '8m'), 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'), |
16 | | - 'read_csv':(('1 node', '2 nodes', '4 nodes', '8 nodes'), 'Number of processes', 'SDC Scalability', False, 'upper right'), |
17 | | - 'describe':(('1m', '2m', '4m', '8m'), 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'), |
18 | | - 'v_counts':(('1m', '2m', '4m', '8m'), 'Data size', 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'), |
| 18 | + 'read_csv':(('1m', '2m', '4m', '8m'), 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'), |
| 19 | + 'describe':(('1m', '2m', '4m', '8m'), 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'), |
| 20 | + 'v_counts':(('1m', '2m', '4m', '8m'), 'Data size', 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'), |
| 21 | + 'statistics':(('10m', '20m', '40m', '80m'), 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'), |
| 22 | + 'sum':(('10m', '20m', '40m', '80m'), 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'), |
19 | 23 | } |
20 | 24 |
|
21 | 25 |
|
22 | 26 | class Plotter: |
23 | | - def __init__(self, func_id='read_parquet'): |
24 | | - self.func_id = func_id |
25 | | - self.x_labels, self.x_title, self.title, self.is_compared, self.label_position = plot_params[self.func_id] |
26 | | - self.ngroups = len(self.x_labels) |
| 27 | + def __init__(self, func_id='read_parquet'): |
| 28 | + self.func_id = func_id |
| 29 | + self.x_labels, self.x_title, self.title, self.is_compared, self.label_position = plot_params[self.func_id] |
| 30 | + self.ngroups = len(self.x_labels) |
27 | 31 |
|
28 | | - def autolabel(self, rects, ax): |
29 | | - for rect in rects: |
30 | | - height = rect.get_height() |
31 | | - ax.annotate('{}'.format(height), |
32 | | - xy=(rect.get_x() + rect.get_width() / 2, height), |
33 | | - xytext=(0, 3), # 3 points vertical offset |
34 | | - textcoords="offset points", |
35 | | - ha='center', va='bottom', fontsize=12) |
| 32 | + def autolabel(self, rects, ax): |
| 33 | + for rect in rects: |
| 34 | + height = rect.get_height() |
| 35 | + ax.annotate('{}'.format(height), |
| 36 | + xy=(rect.get_x() + rect.get_width() / 2, height), |
| 37 | + xytext=(0, 3), # 3 points vertical offset |
| 38 | + textcoords="offset points", |
| 39 | + ha='center', va='bottom', fontsize=12) |
36 | 40 |
|
37 | | - def plot_performance(self): |
| 41 | + def plot_performance(self): |
38 | 42 |
|
39 | | - plt.figure(figsize = (16, 8)) |
40 | | - # create plot |
41 | | - index = np.arange(self.ngroups) |
42 | | - bar_width = 0.35 |
43 | | - opacity = 0.8 |
| 43 | + plt.figure(figsize = (16, 8)) |
| 44 | + # create plot |
| 45 | + index = np.arange(self.ngroups) |
| 46 | + bar_width = 0.35 |
| 47 | + opacity = 0.8 |
44 | 48 |
|
45 | | - plt.xlabel(self.x_title, fontsize=16) |
46 | | - plt.ylabel('Time, s', fontsize=16) |
47 | | - plt.title(self.title, fontsize=18) |
48 | | - |
49 | | - plt.tick_params(labelsize=12) |
| 49 | + plt.xlabel(self.x_title, fontsize=16) |
| 50 | + plt.ylabel('Time, s', fontsize=16) |
| 51 | + plt.title(self.title, fontsize=18) |
| 52 | + |
| 53 | + plt.tick_params(labelsize=12) |
50 | 54 |
|
51 | | - if self.is_compared: |
52 | | - data_pandas, data_sdc = data_performance[self.func_id] |
53 | | - rects_pandas = plt.bar(index + bar_width, data_pandas, bar_width, |
54 | | - alpha=opacity, |
55 | | - label='Pandas') |
| 55 | + if self.is_compared: |
| 56 | + data_pandas, data_sdc = data_performance[self.func_id] |
| 57 | + rects_pandas = plt.bar(index + bar_width, data_pandas, bar_width, |
| 58 | + alpha=opacity, |
| 59 | + label='Pandas') |
56 | 60 |
|
57 | | - plt.xticks(index + bar_width, self.x_labels) |
58 | | - else: |
59 | | - data_sdc = data_performance[self.func_id] |
60 | | - plt.xticks(index, self.x_labels) |
| 61 | + plt.xticks(index + bar_width, self.x_labels) |
| 62 | + else: |
| 63 | + data_sdc = data_performance[self.func_id] |
| 64 | + plt.xticks(index, self.x_labels) |
61 | 65 |
|
62 | | - rects_sdc = plt.bar(index, data_sdc, bar_width, |
63 | | - alpha=opacity, |
64 | | - label='SDC') |
| 66 | + rects_sdc = plt.bar(index, data_sdc, bar_width, |
| 67 | + alpha=opacity, |
| 68 | + label='SDC') |
65 | 69 |
|
66 | | - if self.is_compared: |
67 | | - self.autolabel(rects_pandas, plt) |
| 70 | + if self.is_compared: |
| 71 | + self.autolabel(rects_pandas, plt) |
68 | 72 |
|
69 | | - plt.legend(fontsize=16, loc=self.label_position) |
70 | | - self.autolabel(rects_sdc, plt) |
| 73 | + plt.legend(fontsize=16, loc=self.label_position) |
| 74 | + self.autolabel(rects_sdc, plt) |
71 | 75 |
|
72 | | - plt.tight_layout() |
73 | | - plt.show() |
| 76 | + plt.tight_layout() |
| 77 | + plt.show() |
0 commit comments