Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit ea0ffed

Browse files
Merge pull request #33 from IntelPython/master
Merge changes from SDC master
2 parents b5a052e + 10dd390 commit ea0ffed

96 files changed

Lines changed: 2853 additions & 6569 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.coveragerc

Lines changed: 0 additions & 15 deletions
This file was deleted.

.gitignore

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1 @@
1-
# Compiled python modules.
2-
*.pyc
3-
4-
# Setuptools distribution folder.
5-
/dist
6-
7-
# cache
8-
hpat/__pycache__
9-
hpat/hiframes/__pycache__
10-
11-
# Python egg metadata, regenerated from source files by setuptools.
12-
/*.egg-info
13-
/*.egg
14-
15-
*.o
16-
*.so
17-
181
build/
19-
docs/_build/
20-
docs/gh-pages/
21-
*.DS_Store
22-
23-
# data files for testing
24-
*.pq
25-
*.csv
26-
*.parquet
27-
*.hdf5
28-
tmp_tests/

README.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,15 @@ Intel® Scalable Dataframe Compiler
44

55
.. image:: https://travis-ci.com/IntelPython/sdc.svg?branch=master
66
:target: https://travis-ci.com/IntelPython/sdc
7+
:alt: Travis CI
8+
9+
.. image:: https://dev.azure.com/IntelPython/HPAT/_apis/build/status/IntelPython.sdc?branchName=master
10+
:target: https://dev.azure.com/IntelPython/HPAT/_build/latest?definitionId=2&branchName=master
11+
:alt: Azure Pipelines
712

813
.. image:: https://coveralls.io/repos/github/IntelPython/sdc/badge.svg?branch=master
914
:target: https://coveralls.io/github/IntelPython/sdc?branch=master
15+
:alt: Coveralls
1016

1117
A compiler-based framework for big data in Python
1218
#################################################
@@ -162,6 +168,5 @@ Running unit tests
162168
------------------
163169
::
164170

165-
conda install h5py
166171
python sdc/tests/gen_test_data.py
167172
python -m unittest
Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
echo on
22

3-
set HDF5_DIR="%LIBRARY_PREFIX%"
43
"%PYTHON%" setup.py build install --single-version-externally-managed --record=record.txt
54
if errorlevel 1 exit 1
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
set -ex
22

3-
HDF5_DIR="${PREFIX}" MACOSX_DEPLOYMENT_TARGET=10.9 \
3+
MACOSX_DEPLOYMENT_TARGET=10.9 \
44
$PYTHON setup.py build install --single-version-externally-managed --record=record.txt

buildscripts/sdc-conda-recipe/meta.yaml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,6 @@ requirements:
3333
- pyarrow {{ PYARROW_VERSION }}
3434
- arrow-cpp {{ ARROW_CPP_VERSION }}
3535
- boost
36-
- hdf5
37-
- h5py
3836
- mpich # [not win]
3937
- impi_rt # [win]
4038
- impi-devel # [win]
@@ -55,7 +53,6 @@ requirements:
5553

5654
test:
5755
requires:
58-
- h5py
5956
- scipy
6057
imports:
6158
- sdc
@@ -78,8 +75,6 @@ outputs:
7875
- pyarrow {{ PYARROW_VERSION }}
7976
- arrow-cpp {{ PYARROW_VERSION }}
8077
- boost
81-
- hdf5
82-
- h5py
8378
- mpich # [not win]
8479
- impi_rt # [win]
8580
- impi-devel # [win]

buildscripts/test.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -108,17 +108,21 @@
108108

109109

110110
if run_coverage == 'True':
111+
if platform.system() == 'Windows':
112+
format_print('Coverage can be run only on Linux of mac for now')
113+
sys.exit(0)
114+
115+
coverage_omit = './sdc/tests/*'
116+
coverage_cmd = ' && '.join(['coverage erase',
117+
f'coverage run --source=./sdc --omit {coverage_omit} ./sdc/runtests.py',
118+
'coveralls -v'])
119+
111120
format_print('Run coverage')
112121
format_print(f'Assume that SDC is installed in develop build-mode to {develop_env} environment', new_block=False)
113122
format_print('Install scipy and coveralls')
114123
run_command(f'{develop_env_activate} && conda install -q -y scipy coveralls')
115-
os.environ['PYTHONPATH'] = '.'
116-
os.environ['HDF5_DIR'] = conda_prefix
117-
try:
118-
run_command(f'{develop_env_activate} && python -m sdc.tests.gen_test_data && coverage erase && coverage run -m sdc.runtests && coveralls -v')
119-
except:
120-
format_print('Coverage fails')
121-
print(traceback.format_exc())
124+
run_command(f'{develop_env_activate} && python -m sdc.tests.gen_test_data')
125+
run_command(f'{develop_env_activate} && {coverage_cmd}')
122126
sys.exit(0)
123127

124128
if test_mode == 'develop':

docs/demos/plotting.py

Lines changed: 55 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -2,72 +2,76 @@
22
import matplotlib.pyplot as plt
33

44
data_performance = {
5-
'read_parquet': ((0.804, 2.939, 6.235, 20.91),
6-
(0.51, 1.662, 3.151, 8.555)),
7-
'read_csv': ((19.732, 10.466, 6.277, 6.947)),
8-
'describe': ((0.823, 1.713, 3.324, 7.103),
9-
(0.704, 1.396, 2.89, 6.485)),
5+
'read_csv': ((10.324, 20.793, 41.413, 84.63),
6+
(0.914, 1.925, 3.988, 8.149)),
7+
'describe': ((0.374, 0.681, 1.426, 2.875),
8+
(0.235, 0.597, 1.192, 2.264)),
109
'v_counts': ((0.867, 1.777, 3.447, 6.799),
11-
(0.699, 1.401, 2.914, 6.276)),
10+
(0.699, 1.401, 2.914, 6.276)),
11+
'statistics': ((2.1, 4.6, 8.3, 20.2),
12+
(0.3, 0.7, 1.6, 3.2)),
13+
'sum': ((0.9, 1.2, 2.7, 5.9),
14+
(0.1, 0.4, 0.8, 1.9)),
1215
}
1316

1417
plot_params = {
15-
'read_parquet':(('1m', '2m', '4m', '8m'), 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'),
16-
'read_csv':(('1 node', '2 nodes', '4 nodes', '8 nodes'), 'Number of processes', 'SDC Scalability', False, 'upper right'),
17-
'describe':(('1m', '2m', '4m', '8m'), 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'),
18-
'v_counts':(('1m', '2m', '4m', '8m'), 'Data size', 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'),
18+
'read_csv':(('1m', '2m', '4m', '8m'), 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'),
19+
'describe':(('1m', '2m', '4m', '8m'), 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'),
20+
'v_counts':(('1m', '2m', '4m', '8m'), 'Data size', 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'),
21+
'statistics':(('10m', '20m', '40m', '80m'), 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'),
22+
'sum':(('10m', '20m', '40m', '80m'), 'Data size', 'Performance: Pandas vs SDC', True, 'upper left'),
1923
}
2024

2125

2226
class Plotter:
23-
def __init__(self, func_id='read_parquet'):
24-
self.func_id = func_id
25-
self.x_labels, self.x_title, self.title, self.is_compared, self.label_position = plot_params[self.func_id]
26-
self.ngroups = len(self.x_labels)
27+
def __init__(self, func_id='read_parquet'):
28+
self.func_id = func_id
29+
self.x_labels, self.x_title, self.title, self.is_compared, self.label_position = plot_params[self.func_id]
30+
self.ngroups = len(self.x_labels)
2731

28-
def autolabel(self, rects, ax):
29-
for rect in rects:
30-
height = rect.get_height()
31-
ax.annotate('{}'.format(height),
32-
xy=(rect.get_x() + rect.get_width() / 2, height),
33-
xytext=(0, 3), # 3 points vertical offset
34-
textcoords="offset points",
35-
ha='center', va='bottom', fontsize=12)
32+
def autolabel(self, rects, ax):
33+
for rect in rects:
34+
height = rect.get_height()
35+
ax.annotate('{}'.format(height),
36+
xy=(rect.get_x() + rect.get_width() / 2, height),
37+
xytext=(0, 3), # 3 points vertical offset
38+
textcoords="offset points",
39+
ha='center', va='bottom', fontsize=12)
3640

37-
def plot_performance(self):
41+
def plot_performance(self):
3842

39-
plt.figure(figsize = (16, 8))
40-
# create plot
41-
index = np.arange(self.ngroups)
42-
bar_width = 0.35
43-
opacity = 0.8
43+
plt.figure(figsize = (16, 8))
44+
# create plot
45+
index = np.arange(self.ngroups)
46+
bar_width = 0.35
47+
opacity = 0.8
4448

45-
plt.xlabel(self.x_title, fontsize=16)
46-
plt.ylabel('Time, s', fontsize=16)
47-
plt.title(self.title, fontsize=18)
48-
49-
plt.tick_params(labelsize=12)
49+
plt.xlabel(self.x_title, fontsize=16)
50+
plt.ylabel('Time, s', fontsize=16)
51+
plt.title(self.title, fontsize=18)
52+
53+
plt.tick_params(labelsize=12)
5054

51-
if self.is_compared:
52-
data_pandas, data_sdc = data_performance[self.func_id]
53-
rects_pandas = plt.bar(index + bar_width, data_pandas, bar_width,
54-
alpha=opacity,
55-
label='Pandas')
55+
if self.is_compared:
56+
data_pandas, data_sdc = data_performance[self.func_id]
57+
rects_pandas = plt.bar(index + bar_width, data_pandas, bar_width,
58+
alpha=opacity,
59+
label='Pandas')
5660

57-
plt.xticks(index + bar_width, self.x_labels)
58-
else:
59-
data_sdc = data_performance[self.func_id]
60-
plt.xticks(index, self.x_labels)
61+
plt.xticks(index + bar_width, self.x_labels)
62+
else:
63+
data_sdc = data_performance[self.func_id]
64+
plt.xticks(index, self.x_labels)
6165

62-
rects_sdc = plt.bar(index, data_sdc, bar_width,
63-
alpha=opacity,
64-
label='SDC')
66+
rects_sdc = plt.bar(index, data_sdc, bar_width,
67+
alpha=opacity,
68+
label='SDC')
6569

66-
if self.is_compared:
67-
self.autolabel(rects_pandas, plt)
70+
if self.is_compared:
71+
self.autolabel(rects_pandas, plt)
6872

69-
plt.legend(fontsize=16, loc=self.label_position)
70-
self.autolabel(rects_sdc, plt)
73+
plt.legend(fontsize=16, loc=self.label_position)
74+
self.autolabel(rects_sdc, plt)
7175

72-
plt.tight_layout()
73-
plt.show()
76+
plt.tight_layout()
77+
plt.show()

0 commit comments

Comments
 (0)