Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit 0369309

Browse files
akharcheshssf
authored andcommitted
demo refactoring (#299)
1 parent 21366ff commit 0369309

2 files changed

Lines changed: 90 additions & 207 deletions

File tree

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,28 @@
11
import numpy as np
22
import matplotlib.pyplot as plt
3-
# %matplotlib inline
43

54
data_performance = {
65
'read_parquet': ((0.804, 2.939, 6.235, 20.91),
76
(0.51, 1.662, 3.151, 8.555)),
8-
'read_csv': ((5.526, 5.806, 7.008, 10.46),
9-
(19.732, 10.466, 6.277, 6.947)),
7+
'read_csv': ((19.732, 10.466, 6.277, 6.947)),
108
'describe': ((0.823, 1.713, 3.324, 7.103),
119
(0.704, 1.396, 2.89, 6.485)),
1210
'v_counts': ((0.867, 1.777, 3.447, 6.799),
1311
(0.699, 1.401, 2.914, 6.276)),
1412
}
1513

1614
plot_params = {
17-
'read_parquet':(('1m', '2m', '4m', '8m'), 'Data size'),
18-
'read_csv':(('1 node', '2 nodes', '4 nodes', '8 nodes'), 'Number of processes'),
19-
'describe':(('1m', '2m', '4m', '8m'), 'Data size'),
20-
'v_counts':(('1m', '2m', '4m', '8m'), 'Data size'),
15+
'read_parquet':(('1m', '2m', '4m', '8m'), 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'),
16+
'read_csv':(('1 node', '2 nodes', '4 nodes', '8 nodes'), 'Number of processes', 'SDC Scalability', False, 'upper right'),
17+
'describe':(('1m', '2m', '4m', '8m'), 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'),
18+
'v_counts':(('1m', '2m', '4m', '8m'), 'Data size', 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'),
2119
}
2220

2321

2422
class Plotter:
2523
def __init__(self, func_id='read_parquet'):
2624
self.func_id = func_id
27-
self.x_labels, self.x_title = plot_params[self.func_id]
25+
self.x_labels, self.x_title, self.title, self.is_compared, self.label_position = plot_params[self.func_id]
2826
self.ngroups = len(self.x_labels)
2927

3028
def autolabel(self, rects, ax):
@@ -39,30 +37,37 @@ def autolabel(self, rects, ax):
3937
def plot_performance(self):
4038

4139
plt.figure(figsize = (16, 8))
42-
43-
means_pandas, means_sdc = data_performance[self.func_id]
4440
# create plot
4541
index = np.arange(self.ngroups)
4642
bar_width = 0.35
4743
opacity = 0.8
4844

49-
rects1 = plt.bar(index, means_pandas, bar_width,
50-
alpha=opacity,
51-
label='Pandas')
45+
plt.xlabel(self.x_title, fontsize=16)
46+
plt.ylabel('Time, s', fontsize=16)
47+
plt.title(self.title, fontsize=18)
48+
49+
plt.tick_params(labelsize=12)
5250

53-
rects2 = plt.bar(index + bar_width, means_sdc, bar_width,
51+
if self.is_compared:
52+
data_pandas, data_sdc = data_performance[self.func_id]
53+
rects_pandas = plt.bar(index + bar_width, data_pandas, bar_width,
54+
alpha=opacity,
55+
label='Pandas')
56+
57+
plt.xticks(index + bar_width, self.x_labels)
58+
else:
59+
data_sdc = data_performance[self.func_id]
60+
plt.xticks(index, self.x_labels)
61+
62+
rects_sdc = plt.bar(index, data_sdc, bar_width,
5463
alpha=opacity,
5564
label='SDC')
5665

57-
plt.xlabel(self.x_title, fontsize=16)
58-
plt.ylabel('Time, s', fontsize=16)
59-
plt.title('Performance: Pandas vs SDC', fontsize=18)
60-
plt.xticks(index + bar_width, self.x_labels)
61-
plt.tick_params(labelsize=12)
62-
plt.legend(fontsize=16,loc="upper left")
66+
if self.is_compared:
67+
self.autolabel(rects_pandas, plt)
6368

64-
self.autolabel(rects1, plt)
65-
self.autolabel(rects2, plt)
69+
plt.legend(fontsize=16, loc=self.label_position)
70+
self.autolabel(rects_sdc, plt)
6671

6772
plt.tight_layout()
6873
plt.show()

0 commit comments

Comments
 (0)