Skip to content

Commit cb183f8

Browse files
authored
Merge pull request #429 from mdboom/benchmark_longitudinal
Add a by-benchmark longitudinal plot
2 parents 1b05216 + c816981 commit cb183f8

5 files changed

Lines changed: 151 additions & 4 deletions

File tree

README.md

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ For each runner in your `bench_runner.toml`, you can specify a `plot` table with
140140
- `marker`: A [matplotlib marker](https://matplotlib.org/stable/api/markers_api.html#module-matplotlib.markers)
141141
- `color`: A [matplotlib color](https://matplotlib.org/stable/users/explain/colors/colors.html#colors-def)
142142

143-
#### Longitudinal plot configuration
143+
##### Longitudinal plot configuration
144144

145145
The longitudinal plot shows the change of a version branch over time against a specified base version. It is made up of multiple subplots, each with its own head and base, and optionally configuration flags.
146146

@@ -162,7 +162,7 @@ subplots = [
162162
]
163163
```
164164

165-
#### Flag effect plot configuration
165+
##### Flag effect plot configuration
166166

167167
The flag effect plot shows the effect of specified configuration flags against a base with the same commit hash, but different configuration flags.
168168

@@ -195,6 +195,16 @@ head_flags = ["TAILCALL"]
195195
runner_map = { linux_clang = "linux" }
196196
```
197197

198+
##### Benchmark longitudinal plot configuration
199+
200+
The benchmark longitudinal plot shows the change over time, per benchmark. The configuration consists of the following keys:
201+
202+
- `base`: The base version
203+
- `version`: The version to track
204+
- `runners`: The runners to show
205+
- `head_flags`: (optional) The flags to use for the head commits
206+
- `base_flags`: (optional) The flags to use for the base commits
207+
198208
#### Purging old data
199209

200210
With a local checkout of your results repository you can perform some maintenance tasks.

bench_runner/plot.py

Lines changed: 126 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33

44
import argparse
5+
from collections import defaultdict
56
import datetime
67
import functools
78
import json
@@ -118,6 +119,25 @@ def get_flag_effect_plot_config():
118119
return plots
119120

120121

122+
@functools.cache
123+
def get_benchmark_longitudinal_plot_config():
124+
cfg = mconfig.get_bench_runner_config()
125+
126+
plot = cfg.get("benchmark_longitudinal_plot", {})
127+
assert "base" in plot
128+
assert "version" in plot
129+
assert "runners" in plot
130+
if "head_flags" not in plot:
131+
plot["head_flags"] = []
132+
else:
133+
plot["head_flags"] = sorted(set(plot["head_flags"]))
134+
if "base_flags" not in plot:
135+
plot["base_flags"] = []
136+
else:
137+
plot["base_flags"] = sorted(set(plot["base_flags"]))
138+
return plot
139+
140+
121141
def plot_diff_pair(ax, data):
122142
if not len(data):
123143
return []
@@ -477,11 +497,11 @@ def get_comparison_value(ref, r, force_valid):
477497
if subplot["runner_map"] and not runner_is_mapped:
478498
continue
479499
head_results = commits.get(runner.nickname, {}).get(
480-
tuple(subplot["head_flags"]), {}
500+
tuple(sorted(subplot["head_flags"])), {}
481501
)
482502
base_results = commits.get(
483503
subplot["runner_map"].get(runner.nickname, runner.nickname), {}
484-
).get(tuple(subplot["base_flags"]), {})
504+
).get(tuple(sorted(subplot["base_flags"])), {})
485505

486506
line = []
487507
for cpython_hash, r in head_results.items():
@@ -525,6 +545,110 @@ def get_comparison_value(ref, r, force_valid):
525545
json.dump(data, fd, indent=2)
526546

527547

548+
def benchmark_longitudinal_plot(
549+
results: Iterable[result.Result], output_filename: PathLike
550+
):
551+
if "benchmark_longitudinal_plot" not in mconfig.get_bench_runner_config():
552+
return
553+
554+
output_filename = Path(output_filename)
555+
556+
cache_filename = output_filename.with_suffix(".json")
557+
if cache_filename.is_file():
558+
with cache_filename.open() as fd:
559+
cache = json.load(fd)
560+
else:
561+
cache = {}
562+
563+
cfg = get_benchmark_longitudinal_plot_config()
564+
565+
results = [
566+
r for r in results if r.fork == "python" and r.nickname in cfg["runners"]
567+
]
568+
569+
base = None
570+
for r in results:
571+
if r.version == cfg["base"] and r.flags == cfg["base_flags"]:
572+
base = r
573+
break
574+
else:
575+
raise ValueError(f"Base version {cfg['base']} not found")
576+
577+
results = [
578+
r
579+
for r in results
580+
if r.version.startswith(cfg["version"]) and r.flags == cfg["head_flags"]
581+
]
582+
583+
by_benchmark = defaultdict(lambda: defaultdict(list))
584+
for r in results:
585+
if r.filename.name not in cache:
586+
comparison = result.BenchmarkComparison(base, r, "")
587+
timing = comparison.get_timing_diff()
588+
589+
for name, _diff, mean in timing:
590+
# Don't include insignificant results
591+
if mean > 0.0:
592+
value = [r.commit_date, mean, r.cpython_hash]
593+
if r.filename.name not in cache:
594+
cache[r.filename.name] = {}
595+
cache[r.filename.name][name] = value
596+
597+
for name, value in cache[r.filename.name].items():
598+
by_benchmark[name][r.nickname].append(value)
599+
600+
with cache_filename.open("w") as fd:
601+
json.dump(cache, fd, indent=2)
602+
603+
# Exclude any benchmarks where we don't have enough data to make a
604+
# meaningful plot
605+
by_benchmark = {
606+
k: v for k, v in by_benchmark.items() if any(len(x) > 2 for x in v.values())
607+
}
608+
609+
fig, axs = plt.subplots(
610+
len(by_benchmark),
611+
1,
612+
figsize=(10, len(by_benchmark)),
613+
layout="constrained",
614+
)
615+
if len(by_benchmark) == 1:
616+
axs = [axs]
617+
618+
plt.suptitle(
619+
f"Performance change by benchmark on {cfg['version']} vs. {cfg['base']}"
620+
)
621+
622+
first = True
623+
for (benchmark, runners), ax in zip(sorted(by_benchmark.items()), axs):
624+
for runner_name, timings in runners.items():
625+
runner = mrunners.get_runner_by_nickname(runner_name)
626+
timings.sort(key=lambda x: datetime.datetime.fromisoformat(x[0]))
627+
dates = [datetime.datetime.fromisoformat(x[0]) for x in timings]
628+
ax.plot(
629+
dates,
630+
[x[1] for x in timings],
631+
label=runner.plot.name,
632+
color=runner.plot.color,
633+
linestyle=runner.plot.style,
634+
marker=runner.plot.marker,
635+
markersize=2,
636+
)
637+
ax.set_xticks([])
638+
ax.set_ylabel(benchmark, rotation=0, horizontalalignment="right")
639+
ax.yaxis.set_major_formatter(formatter)
640+
for spine in ax.spines.values():
641+
spine.set_visible(False)
642+
ax.grid(True, axis="y")
643+
ax.axhline(1.0, color="#666", linestyle="-")
644+
ax.set_facecolor("#f0f0f0")
645+
if first:
646+
ax.legend(loc="upper left")
647+
first = False
648+
649+
savefig(output_filename, dpi=150)
650+
651+
528652
if __name__ == "__main__":
529653
parser = argparse.ArgumentParser(
530654
"Compare two benchmark .json files",

bench_runner/scripts/generate_results.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,11 @@ def _main(repo_dir: PathLike, force: bool = False, bases: Sequence[str] | None =
419419
title="Memory usage change by configuration",
420420
),
421421
),
422+
(
423+
plot.benchmark_longitudinal_plot,
424+
(benchmarking_results, repo_dir / "benchmarks.svg"),
425+
{},
426+
),
422427
],
423428
"Generating plots",
424429
):

bench_runner/templates/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ Here are some recent and important revisions. 👉 [Complete list of results](RE
99
[Currently failing benchmarks](failures.md).
1010

1111
**Key:** 📄: table, 📈: time plot, 🧠: memory plot
12+
1213
<!-- START table -->
1314

1415
<!-- END table -->
@@ -27,6 +28,8 @@ The results have a resolution of 0.01 (1%).
2728

2829
![Configuration speed improvement](/configs.svg)
2930

31+
There is also a [longitudinal plot by benchmark](/benchmarks.svg).
32+
3033
## Documentation
3134

3235
### Running benchmarks from the GitHub web UI

tests/data/bench_runner.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,11 @@ subplots = [
2121
{ name = "JIT", version = "3.14", head_flags = ["JIT"] },
2222
]
2323

24+
[benchmark_longitudinal_plot]
25+
base = "3.10.4"
26+
version = "3.11"
27+
runners = ["linux"]
28+
2429
[publish_mirror]
2530
skip = false
2631

0 commit comments

Comments
 (0)