Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit 59f707e

Browse files
akharcheshssf
authored andcommitted
Load final version of SDC demo (#315)
1 parent 62032db commit 59f707e

2 files changed

Lines changed: 216 additions & 320 deletions

File tree

docs/demos/plotting.py

Lines changed: 55 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -2,72 +2,76 @@
22
import matplotlib.pyplot as plt
33

44
data_performance = {
5-
'read_parquet': ((0.804, 2.939, 6.235, 20.91),
6-
(0.51, 1.662, 3.151, 8.555)),
7-
'read_csv': ((19.732, 10.466, 6.277, 6.947)),
8-
'describe': ((0.823, 1.713, 3.324, 7.103),
9-
(0.704, 1.396, 2.89, 6.485)),
5+
'read_csv': ((10.324, 20.793, 41.413, 84.63),
6+
(0.914, 1.925, 3.988, 8.149)),
7+
'describe': ((0.374, 0.681, 1.426, 2.875),
8+
(0.235, 0.597, 1.192, 2.264)),
109
'v_counts': ((0.867, 1.777, 3.447, 6.799),
11-
(0.699, 1.401, 2.914, 6.276)),
10+
(0.699, 1.401, 2.914, 6.276)),
11+
'statistics': ((2.1, 4.6, 8.3, 20.2),
12+
(0.3, 0.7, 1.6, 3.2)),
13+
'sum': ((0.9, 1.2, 2.7, 5.9),
14+
(0.1, 0.4, 0.8, 1.9)),
1215
}
1316

1417
plot_params = {
15-
'read_parquet':(('1m', '2m', '4m', '8m'), 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'),
16-
'read_csv':(('1 node', '2 nodes', '4 nodes', '8 nodes'), 'Number of processes', 'SDC Scalability', False, 'upper right'),
17-
'describe':(('1m', '2m', '4m', '8m'), 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'),
18-
'v_counts':(('1m', '2m', '4m', '8m'), 'Data size', 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'),
18+
'read_csv':(('1m', '2m', '4m', '8m'), 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'),
19+
'describe':(('1m', '2m', '4m', '8m'), 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'),
20+
'v_counts':(('1m', '2m', '4m', '8m'), 'Data size', 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'),
21+
'statistics':(('10m', '20m', '40m', '80m'), 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'),
22+
'sum':(('10m', '20m', '40m', '80m'), 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'),
1923
}
2024

2125

2226
class Plotter:
23-
def __init__(self, func_id='read_parquet'):
24-
self.func_id = func_id
25-
self.x_labels, self.x_title, self.title, self.is_compared, self.label_position = plot_params[self.func_id]
26-
self.ngroups = len(self.x_labels)
27+
def __init__(self, func_id='read_parquet'):
28+
self.func_id = func_id
29+
self.x_labels, self.x_title, self.title, self.is_compared, self.label_position = plot_params[self.func_id]
30+
self.ngroups = len(self.x_labels)
2731

28-
def autolabel(self, rects, ax):
29-
for rect in rects:
30-
height = rect.get_height()
31-
ax.annotate('{}'.format(height),
32-
xy=(rect.get_x() + rect.get_width() / 2, height),
33-
xytext=(0, 3), # 3 points vertical offset
34-
textcoords="offset points",
35-
ha='center', va='bottom', fontsize=12)
32+
def autolabel(self, rects, ax):
33+
for rect in rects:
34+
height = rect.get_height()
35+
ax.annotate('{}'.format(height),
36+
xy=(rect.get_x() + rect.get_width() / 2, height),
37+
xytext=(0, 3), # 3 points vertical offset
38+
textcoords="offset points",
39+
ha='center', va='bottom', fontsize=12)
3640

37-
def plot_performance(self):
41+
def plot_performance(self):
3842

39-
plt.figure(figsize = (16, 8))
40-
# create plot
41-
index = np.arange(self.ngroups)
42-
bar_width = 0.35
43-
opacity = 0.8
43+
plt.figure(figsize = (16, 8))
44+
# create plot
45+
index = np.arange(self.ngroups)
46+
bar_width = 0.35
47+
opacity = 0.8
4448

45-
plt.xlabel(self.x_title, fontsize=16)
46-
plt.ylabel('Time, s', fontsize=16)
47-
plt.title(self.title, fontsize=18)
48-
49-
plt.tick_params(labelsize=12)
49+
plt.xlabel(self.x_title, fontsize=16)
50+
plt.ylabel('Time, s', fontsize=16)
51+
plt.title(self.title, fontsize=18)
52+
53+
plt.tick_params(labelsize=12)
5054

51-
if self.is_compared:
52-
data_pandas, data_sdc = data_performance[self.func_id]
53-
rects_pandas = plt.bar(index + bar_width, data_pandas, bar_width,
54-
alpha=opacity,
55-
label='Pandas')
55+
if self.is_compared:
56+
data_pandas, data_sdc = data_performance[self.func_id]
57+
rects_pandas = plt.bar(index + bar_width, data_pandas, bar_width,
58+
alpha=opacity,
59+
label='Pandas')
5660

57-
plt.xticks(index + bar_width, self.x_labels)
58-
else:
59-
data_sdc = data_performance[self.func_id]
60-
plt.xticks(index, self.x_labels)
61+
plt.xticks(index + bar_width, self.x_labels)
62+
else:
63+
data_sdc = data_performance[self.func_id]
64+
plt.xticks(index, self.x_labels)
6165

62-
rects_sdc = plt.bar(index, data_sdc, bar_width,
63-
alpha=opacity,
64-
label='SDC')
66+
rects_sdc = plt.bar(index, data_sdc, bar_width,
67+
alpha=opacity,
68+
label='SDC')
6569

66-
if self.is_compared:
67-
self.autolabel(rects_pandas, plt)
70+
if self.is_compared:
71+
self.autolabel(rects_pandas, plt)
6872

69-
plt.legend(fontsize=16, loc=self.label_position)
70-
self.autolabel(rects_sdc, plt)
73+
plt.legend(fontsize=16, loc=self.label_position)
74+
self.autolabel(rects_sdc, plt)
7175

72-
plt.tight_layout()
73-
plt.show()
76+
plt.tight_layout()
77+
plt.show()

0 commit comments

Comments
 (0)