Skip to content

Commit d4f283f

Browse files
committed
Add a by-benchmark longitudinal plot
1 parent aa2ac8c commit d4f283f

4 files changed

Lines changed: 122 additions & 4 deletions

File tree

README.md

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ For each runner in your `bench_runner.toml`, you can specify a `plot` table with
140140
- `marker`: A [matplotlib marker](https://matplotlib.org/stable/api/markers_api.html#module-matplotlib.markers)
141141
- `color`: A [matplotlib color](https://matplotlib.org/stable/users/explain/colors/colors.html#colors-def)
142142

143-
#### Longitudinal plot configuration
143+
##### Longitudinal plot configuration
144144

145145
The longitudinal plot shows the change of a version branch over time against a specified base version. It is made up of multiple subplots, each with its own head and base, and optionally configuration flags.
146146

@@ -162,7 +162,7 @@ subplots = [
162162
]
163163
```
164164

165-
#### Flag effect plot configuration
165+
##### Flag effect plot configuration
166166

167167
The flag effect plot shows the effect of specified configuration flags against a base with the same commit hash, but different configuration flags.
168168

@@ -186,6 +186,16 @@ head_flags = ["TAILCALL"]
186186
runner_map = { linux_clang = "linux" }
187187
```
188188

189+
##### Benchmark longitudinal plot configuration
190+
191+
The benchmark longitudinal plot shows the change over time, per benchmark. The configuration consists of the following keys:
192+
193+
- `base`: The base version
194+
- `version`: The version to track
195+
- `runner`: The runner to use
196+
- `head_flags`: (optional) The flags to use for the head commits
197+
- `base_flags`: (optional) The flags to use for the base commits
198+
189199
#### Purging old data
190200

191201
With a local checkout of your results repository you can perform some maintenance tasks.

bench_runner/plot.py

Lines changed: 102 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33

44
import argparse
5+
from collections import defaultdict
56
import datetime
67
import functools
78
import json
@@ -117,6 +118,25 @@ def get_flag_effect_plot_config():
117118
return plots
118119

119120

121+
@functools.cache
122+
def get_benchmark_longitudinal_plot_config():
123+
cfg = mconfig.get_bench_runner_config()
124+
125+
plot = cfg.get("benchmark_longitudinal_plot", {})
126+
assert "base" in plot
127+
assert "version" in plot
128+
assert "runner" in plot
129+
if "head_flags" not in plot:
130+
plot["head_flags"] = []
131+
else:
132+
plot["head_flags"] = sorted(set(plot["head_flags"]))
133+
if "base_flags" not in plot:
134+
plot["base_flags"] = []
135+
else:
136+
plot["base_flags"] = sorted(set(plot["base_flags"]))
137+
return plot
138+
139+
120140
def plot_diff_pair(ax, data):
121141
if not len(data):
122142
return []
@@ -467,11 +487,11 @@ def get_comparison_value(ref, r):
467487

468488
for runner in mrunners.get_runners():
469489
head_results = commits.get(runner.nickname, {}).get(
470-
tuple(subplot["head_flags"]), {}
490+
tuple(sorted(subplot["head_flags"])), {}
471491
)
472492
base_results = commits.get(
473493
subplot["runner_map"].get(runner.nickname, runner.nickname), {}
474-
).get(tuple(subplot["base_flags"]), {})
494+
).get(tuple(sorted(subplot["base_flags"])), {})
475495

476496
line = []
477497
for cpython_hash, r in head_results.items():
@@ -511,6 +531,86 @@ def get_comparison_value(ref, r):
511531
json.dump(data, fd, indent=2)
512532

513533

534+
def benchmark_longitudinal_plot(
535+
results: Iterable[result.Result], output_filename: PathLike
536+
):
537+
output_filename = Path(output_filename)
538+
539+
cache_filename = output_filename.with_suffix(".json")
540+
if cache_filename.is_file():
541+
with cache_filename.open() as fd:
542+
cache = json.load(fd)
543+
else:
544+
cache = {}
545+
546+
cfg = get_benchmark_longitudinal_plot_config()
547+
548+
results = [r for r in results if r.fork == "python" and r.nickname == cfg["runner"]]
549+
550+
base = None
551+
for r in results:
552+
if r.version == cfg["base"] and r.flags == cfg["base_flags"]:
553+
base = r
554+
break
555+
else:
556+
raise ValueError(f"Base version {cfg['base']} not found")
557+
558+
results = [
559+
r
560+
for r in results
561+
if r.version.startswith(cfg["version"]) and r.flags == cfg["head_flags"]
562+
]
563+
564+
by_benchmark = defaultdict(list)
565+
for r in results:
566+
if r.filename.name not in cache:
567+
comparison = result.BenchmarkComparison(base, r, "")
568+
timing = comparison.get_timing_diff()
569+
570+
for name, _diff, mean in timing:
571+
if mean > 0.01:
572+
value = [r.commit_date, mean, r.cpython_hash]
573+
if r.filename.name not in cache:
574+
cache[r.filename.name] = {}
575+
cache[r.filename.name][name] = value
576+
577+
for name, value in cache[r.filename.name].items():
578+
by_benchmark[name].append(value)
579+
580+
with cache_filename.open("w") as fd:
581+
json.dump(cache, fd, indent=2)
582+
583+
by_benchmark = {k: v for k, v in by_benchmark.items() if len(v) > 2}
584+
585+
fig, axs = plt.subplots(
586+
len(by_benchmark),
587+
1,
588+
figsize=(10, len(by_benchmark)),
589+
layout="constrained",
590+
)
591+
if len(by_benchmark) == 1:
592+
axs = [axs]
593+
594+
plt.suptitle(
595+
f"Performance change by benchmark on {cfg['version']} vs. {cfg['base']}"
596+
)
597+
598+
for (benchmark, timings), ax in zip(sorted(by_benchmark.items()), axs):
599+
timings.sort(key=lambda x: datetime.datetime.fromisoformat(x[0]))
600+
dates = [datetime.datetime.fromisoformat(x[0]) for x in timings]
601+
ax.plot(dates, [x[1] for x in timings])
602+
ax.set_xticks([])
603+
ax.set_ylabel(benchmark, rotation=0, horizontalalignment="right")
604+
ax.yaxis.set_major_formatter(formatter)
605+
for spine in ax.spines.values():
606+
spine.set_visible(False)
607+
ax.grid(True, axis="y")
608+
ax.axhline(1.0, color="#666", linestyle="-")
609+
ax.set_facecolor("#f0f0f0")
610+
611+
savefig(output_filename, dpi=150)
612+
613+
514614
if __name__ == "__main__":
515615
parser = argparse.ArgumentParser(
516616
"Compare two benchmark .json files",

bench_runner/scripts/generate_results.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,11 @@ def _main(repo_dir: PathLike, force: bool = False, bases: Sequence[str] | None =
419419
title="Memory usage change by configuration",
420420
),
421421
),
422+
(
423+
plot.benchmark_longitudinal_plot,
424+
(benchmarking_results, repo_dir / "benchmarks.svg"),
425+
{},
426+
),
422427
],
423428
"Generating plots",
424429
):

bench_runner/templates/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ Here are some recent and important revisions. 👉 [Complete list of results](RE
99
[Currently failing benchmarks](failures.md).
1010

1111
**Key:** 📄: table, 📈: time plot, 🧠: memory plot
12+
1213
<!-- START table -->
1314

1415
<!-- END table -->
@@ -27,6 +28,8 @@ The results have a resolution of 0.01 (1%).
2728

2829
![Configuration speed improvement](/configs.svg)
2930

31+
There is also a [longitudinal plot by benchmark](/benchmarks.svg).
32+
3033
## Documentation
3134

3235
### Running benchmarks from the GitHub web UI

0 commit comments

Comments
 (0)