-
Notifications
You must be signed in to change notification settings - Fork 253
Expand file tree
/
Copy pathbundle-entrypoint.sh
More file actions
executable file
·202 lines (167 loc) · 9.03 KB
/
bundle-entrypoint.sh
File metadata and controls
executable file
·202 lines (167 loc) · 9.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#!/bin/sh
set -ex
start=$(date +%s)
ROOT=`pwd`
if [ -n "$TRACT_RUN" ]
then
TRACT=$TRACT_RUN
elif [ -x tract ]
then
TRACT="./tract"
else
cargo build -p tract-cli -q --release
TRACT="./target/release/tract"
fi
CACHEDIR=${CACHEDIR:-$HOME/.cache/tract-ci-minion-models}
case $CACHEDIR in
"http"*)
wget $CACHEDIR/private/private-benches.sh
PRIVATE=`pwd`/private-benches.sh
;;
*)
[ -d $CACHEDIR ] || mkdir $CACHEDIR
PATH=$PATH:/usr/local/bin # for aws command on darwin
aws s3 sync s3://tract-ci-builds/model $CACHEDIR || echo "Warning: aws s3 sync failed, continuing with cached models"
(cd $CACHEDIR
[ -d en_libri_real ] || tar zxf en_libri_real.tar.gz
[ -d en_tdnn_lstm_bn_q7 ] || tar zxf en_tdnn_lstm_bn_q7.tar.gz
)
PRIVATE=$CACHEDIR/private/private-benches.sh
;;
esac
touch metrics
if [ -e sizes ]
then
cat sizes >> metrics
fi
if [ $(uname) = "Linux" ]
then
if [ -r /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor -a `cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor` = "userspace" ]
then
F=$(printf "%s\n" `cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies` | sort -n | tail -1)
echo $F > /sys/devices/system/cpu/cpu0/cpufreq/scaling_setspeed
fi
fi
net_bench() {
net=$1
pb=$2
shift 2
$TRACT "$@" --machine-friendly -O bench --allow-random-input $BENCH_OPTS > tract.out
v=`cat tract.out | grep -a real | cut -f 2 -d ' ' | sed 's/\([0-9]\{9,9\}\)[0-9]*/\1/'`
echo net.$net.evaltime.$pb $v >> metrics
$TRACT "$@" --readings --readings-heartbeat 1000 --machine-friendly -O bench --allow-random-input $BENCH_OPTS > tract.out
for stage in model_ready before_optimize
do
pattern=$(echo $stage | sed 's/[_-]/./g')
v=$(grep -a $pattern readings.out | sed 's/ */ /g;s/^ *//' | cut -f 1 -d ' ')
echo net.$net.time_to_$stage.$pb $v >> metrics
v=$(grep -a $pattern readings.out | sed 's/ */ /g;s/^ *//' | cut -f 4 -d ' ')
echo net.$net.rsz_at_$stage.$pb $v >> metrics
f=$(grep -a $pattern readings.out | sed 's/ */ /g;s/^ *//' | cut -f 11 -d ' ')
a=$(grep -a $pattern readings.out | sed 's/ */ /g;s/^ *//' | cut -f 10 -d ' ')
echo net.$net.active_at_$stage.$pb $(($a-$f)) >> metrics
done
}
llm_bench() {
net=$1
pb=$2
shift 2
if $TRACT "$@" --llm --machine-friendly -O llm-bench $BENCH_OPTS > tract.out
then
cat tract.out
echo llm.$net.pp512.$pb $(cat tract.out | grep -a PP512 | cut -f 2 -d ' ') >> metrics
echo llm.$net.tg128.$pb $(cat tract.out | grep -a TG128 | cut -f 2 -d ' ') >> metrics
fi
if $TRACT "$@" --readings --readings-heartbeat 1000 --llm --machine-friendly -O llm-bench $BENCH_OPTS > /dev/null
then
for stage in model_ready before_optimize
do
pattern=$(echo $stage | sed 's/[_-]/./g')
v=$(grep -a $pattern readings.out | sed 's/ */ /g;s/^ *//' | cut -f 1 -d ' ')
echo llm.$net.time_to_$stage.$pb $v >> metrics
v=$(grep -a $pattern readings.out | sed 's/ */ /g;s/^ *//' | cut -f 4 -d ' ')
echo llm.$net.rsz_at_$stage.$pb $v >> metrics
f=$(grep -a $pattern readings.out | sed 's/ */ /g;s/^ *//' | cut -f 11 -d ' ')
a=$(grep -a $pattern readings.out | sed 's/ */ /g;s/^ *//' | cut -f 10 -d ' ')
if [ -n "$a" -a -n "$f" ]
then
echo llm.$net.active_at_$stage.$pb $(($a-$f)) >> metrics
fi
done
fi
}
net_bench arm_ml_kws_cnn_m pass $CACHEDIR/ARM-ML-KWS-CNN-M.pb -i 49,10,f32 --partial --input-node Mfcc
net_bench hey_snips_v1 400ms $CACHEDIR/hey_snips_v1.pb -i 80,40,f32
net_bench hey_snips_v31 400ms $CACHEDIR/hey_snips_v3.1.pb -i 40,40,f32
net_bench hey_snips_v4_model17 2sec $CACHEDIR/hey_snips_v4_model17.pb -i 200,20,f32
net_bench hey_snips_v4_model17 pulse8 $CACHEDIR/hey_snips_v4_model17.pb -i S,20,f32 --pulse 8
net_bench hey_snips_v4_model17_nnef pulse8 --nnef-tract-pulse $CACHEDIR/hey_snips_v4_model17.alpha1.tar
net_bench mobilenet_v1_1 pass $CACHEDIR/mobilenet_v1_1.0_224_frozen.pb -i 1,224,224,3,f32
net_bench mobilenet_v2_1 pass $CACHEDIR/mobilenet_v2_1.4_224_frozen.pb -i 1,224,224,3,f32
net_bench inceptionv1q pass $CACHEDIR/inceptionv1_quant.nnef.tar.gz --nnef-tract-core
net_bench inceptionv3 pass $CACHEDIR/inception_v3_2016_08_28_frozen.pb -i 1,299,299,3,f32
net_bench mdl-en-2019-Q3-librispeech_onnx 2600ms $CACHEDIR/en_libri_real/model.onnx --output-node output -i 264,40
net_bench mdl-en-2019-Q3-librispeech_onnx pulse_240ms $CACHEDIR/en_libri_real/model.onnx --output-node output -i S,40 --pulse 24
net_bench en_tdnn_lstm_bn_q7 2600ms $CACHEDIR/en_tdnn_lstm_bn_q7/model.onnx --output-node output -i 264,40
net_bench en_tdnn_lstm_bn_q7 pulse_240ms $CACHEDIR/en_tdnn_lstm_bn_q7/model.onnx --output-node output -i S,40 --pulse 24
net_bench en_tdnn_8M 2600ms $CACHEDIR/mdl-en-2019-12-24-aho-corasick-18h01m33s.onnx --output-node output -i 264,40
net_bench en_tdnn_8M pulse_240ms $CACHEDIR/mdl-en-2019-12-24-aho-corasick-18h01m33s.onnx --output-node output -i S,40 --pulse 24
net_bench en_tdnn_8M pulse_180ms $CACHEDIR/mdl-en-2019-12-24-aho-corasick-18h01m33s.onnx --output-node output -i S,40 --pulse 18
net_bench en_tdnn_8M pulse_120ms $CACHEDIR/mdl-en-2019-12-24-aho-corasick-18h01m33s.onnx --output-node output -i S,40 --pulse 12
net_bench en_tdnn_8M_nnef pulse_240ms $CACHEDIR/mdl-en-2019-12-24-aho-corasick-18h01m33s.alpha1.a.tar --nnef-tract-pulse
net_bench en_tdnn_15M 2600ms $CACHEDIR/en_tdnn_15M.onnx --output-node output -i 264,40
net_bench en_tdnn_15M pulse_240ms $CACHEDIR/en_tdnn_15M.onnx --output-node output -i S,40 --pulse 24
net_bench en_tdnn_15M pulse_120ms $CACHEDIR/en_tdnn_15M.onnx --output-node output -i S,40 --pulse 12
net_bench en_tdnn_15M_nnef pulse_240ms $CACHEDIR/en_tdnn_15M.alpha1.tar --nnef-tract-pulse
net_bench dummy-conmer-12M pulse_120ms $CACHEDIR/dummy-conmer-12M.nnef.tar --nnef-tract-core --pulse 12
net_bench en_tdnn_pyt_15M pulse_120ms $CACHEDIR/mdl-en-2023-03-27-allen-17h11m50s.nnef.tar --nnef-tract-core --pulse 12
net_bench speaker_id pulse8 $CACHEDIR/speaker-id-2019-03.onnx -i 1,S,40,f32 --output-node 257 --partial --pulse 8
net_bench voicecom_fake_quant 2sec $CACHEDIR/snips-voice-commands-cnn-fake-quant.pb -i 200,10,f32
net_bench voicecom_float 2sec $CACHEDIR/snips-voice-commands-cnn-float.pb -i 200,10,f32
net_bench trunet pulse1_f32 $CACHEDIR/trunet_dummy.nnef.tgz --nnef-tract-core --pulse 1
net_bench trunet pulse1_f16 $CACHEDIR/trunet_dummy.nnef.tgz --nnef-tract-core -t f32_to_f16 --pulse 1
. $PRIVATE
if [ $(uname) = "Darwin" ]
then
LLM_BACKENDS="cpu metal"
fi
if which nvidia-smi
then
LLM_BACKENDS="cpu cuda"
fi
if [ -n "$LLM_BACKENDS" ]
then
for backend in $LLM_BACKENDS
do
case $backend in
cpu) extra="--timeout 180";;
metal) extra="--metal --timeout 60"
BENCH_OPTS="--warmup-loops 1"
;;
cuda) extra="--cuda --timeout 60"
BENCH_OPTS="--warmup-loops 1"
;;
esac
llm_bench llama-3_2-1B-q40ef32-516 $backend $CACHEDIR/Llama-3.2-1B-q40ef32.516.nnef.tgz $extra
llm_bench openelm-270M-q40ef16-516 $backend $CACHEDIR/OpenELM-270M-q40ef16.516.nnef.tgz $extra
llm_bench llama-3_2-1B-instruct-q40ef16-541 $backend $CACHEDIR/Llama-3.2-1B-Instruct-q40ef16.541.nnef.tgz $extra
llm_bench openelm-270M-q40ef16-541 $backend $CACHEDIR/OpenELM-270M-q40ef16.541.nnef.tgz $extra
net_bench parakeet-tdt-600m-v3-f32f32-preprocessor_1s $backend $CACHEDIR/parakeet-tdt-0.6b-v3-f32f32.608.preprocessor.nnef.tgz \
-t transformers_detect_all --nnef-tract-transformers --set B=1 --set A=16000 $extra
net_bench parakeet-tdt-600m-v3-f32f32-encoder_1s $backend $CACHEDIR/parakeet-tdt-0.6b-v3-f32f32.608.encoder.p1.nnef.tgz \
-t transformers_detect_all --nnef-tract-transformers --set B=1 --set S=100 $extra
net_bench parakeet-tdt-600m-v3-f32f32-decoder_pass $backend $CACHEDIR/parakeet-tdt-0.6b-v3-f32f32.608.decoder.nnef.tgz \
-t transformers_detect_all --nnef-tract-transformers --set B=1 --set T=1 $extra
net_bench parakeet-tdt-600m-v3-f32f32-joint_pass $backend $CACHEDIR/parakeet-tdt-0.6b-v3-f32f32.608.joint.nnef.tgz \
-t transformers_detect_all --nnef-tract-transformers --set B=1 --set R=1 --set U=1 $extra
if [ "$backend" != "cpu" ]
then
llm_bench llama-3_2-3B-q40ef32-516 $backend $CACHEDIR/Llama-3.2-3B-q40ef32.516.nnef.tgz $extra
llm_bench llama-3_1-8B-instruct-q40ef16-541 $backend $CACHEDIR/Llama-3.1-8B-Instruct-q40ef16.541.nnef.tgz $extra
llm_bench llama-3_2-3B-instruct-q40ef16-541 $backend $CACHEDIR/Llama-3.2-3B-Instruct-q40ef16.541.nnef.tgz $extra
llm_bench qwen3-1_7B-q40ef16-541 $backend $CACHEDIR/Qwen3-1.7B-q40ef16.541.nnef.tgz $extra
fi
done
fi
end=$(date +%s)
echo bundle.bench-runtime $(($end - $start)) >> metrics