|
2 | 2 |
|
3 | 3 |
|
4 | 4 | import argparse |
| 5 | +from collections import defaultdict |
5 | 6 | import datetime |
6 | 7 | import functools |
7 | 8 | import json |
@@ -117,6 +118,25 @@ def get_flag_effect_plot_config(): |
117 | 118 | return plots |
118 | 119 |
|
119 | 120 |
|
| 121 | +@functools.cache |
| 122 | +def get_benchmark_longitudinal_plot_config(): |
| 123 | + cfg = mconfig.get_bench_runner_config() |
| 124 | + |
| 125 | + plot = cfg.get("benchmark_longitudinal_plot", {}) |
| 126 | + assert "base" in plot |
| 127 | + assert "version" in plot |
| 128 | + assert "runner" in plot |
| 129 | + if "head_flags" not in plot: |
| 130 | + plot["head_flags"] = [] |
| 131 | + else: |
| 132 | + plot["head_flags"] = sorted(set(plot["head_flags"])) |
| 133 | + if "base_flags" not in plot: |
| 134 | + plot["base_flags"] = [] |
| 135 | + else: |
| 136 | + plot["base_flags"] = sorted(set(plot["base_flags"])) |
| 137 | + return plot |
| 138 | + |
| 139 | + |
120 | 140 | def plot_diff_pair(ax, data): |
121 | 141 | if not len(data): |
122 | 142 | return [] |
@@ -467,11 +487,11 @@ def get_comparison_value(ref, r): |
467 | 487 |
|
468 | 488 | for runner in mrunners.get_runners(): |
469 | 489 | head_results = commits.get(runner.nickname, {}).get( |
470 | | - tuple(subplot["head_flags"]), {} |
| 490 | + tuple(sorted(subplot["head_flags"])), {} |
471 | 491 | ) |
472 | 492 | base_results = commits.get( |
473 | 493 | subplot["runner_map"].get(runner.nickname, runner.nickname), {} |
474 | | - ).get(tuple(subplot["base_flags"]), {}) |
| 494 | + ).get(tuple(sorted(subplot["base_flags"])), {}) |
475 | 495 |
|
476 | 496 | line = [] |
477 | 497 | for cpython_hash, r in head_results.items(): |
@@ -511,6 +531,86 @@ def get_comparison_value(ref, r): |
511 | 531 | json.dump(data, fd, indent=2) |
512 | 532 |
|
513 | 533 |
|
| 534 | +def benchmark_longitudinal_plot( |
| 535 | + results: Iterable[result.Result], output_filename: PathLike |
| 536 | +): |
| 537 | + output_filename = Path(output_filename) |
| 538 | + |
| 539 | + cache_filename = output_filename.with_suffix(".json") |
| 540 | + if cache_filename.is_file(): |
| 541 | + with cache_filename.open() as fd: |
| 542 | + cache = json.load(fd) |
| 543 | + else: |
| 544 | + cache = {} |
| 545 | + |
| 546 | + cfg = get_benchmark_longitudinal_plot_config() |
| 547 | + |
| 548 | + results = [r for r in results if r.fork == "python" and r.nickname == cfg["runner"]] |
| 549 | + |
| 550 | + base = None |
| 551 | + for r in results: |
| 552 | + if r.version == cfg["base"] and r.flags == cfg["base_flags"]: |
| 553 | + base = r |
| 554 | + break |
| 555 | + else: |
| 556 | + raise ValueError(f"Base version {cfg['base']} not found") |
| 557 | + |
| 558 | + results = [ |
| 559 | + r |
| 560 | + for r in results |
| 561 | + if r.version.startswith(cfg["version"]) and r.flags == cfg["head_flags"] |
| 562 | + ] |
| 563 | + |
| 564 | + by_benchmark = defaultdict(list) |
| 565 | + for r in results: |
| 566 | + if r.filename.name not in cache: |
| 567 | + comparison = result.BenchmarkComparison(base, r, "") |
| 568 | + timing = comparison.get_timing_diff() |
| 569 | + |
| 570 | + for name, _diff, mean in timing: |
| 571 | + if mean > 0.01: |
| 572 | + value = [r.commit_date, mean, r.cpython_hash] |
| 573 | + if r.filename.name not in cache: |
| 574 | + cache[r.filename.name] = {} |
| 575 | + cache[r.filename.name][name] = value |
| 576 | + |
| 577 | + for name, value in cache[r.filename.name].items(): |
| 578 | + by_benchmark[name].append(value) |
| 579 | + |
| 580 | + with cache_filename.open("w") as fd: |
| 581 | + json.dump(cache, fd, indent=2) |
| 582 | + |
| 583 | + by_benchmark = {k: v for k, v in by_benchmark.items() if len(v) > 2} |
| 584 | + |
| 585 | + fig, axs = plt.subplots( |
| 586 | + len(by_benchmark), |
| 587 | + 1, |
| 588 | + figsize=(10, len(by_benchmark)), |
| 589 | + layout="constrained", |
| 590 | + ) |
| 591 | + if len(by_benchmark) == 1: |
| 592 | + axs = [axs] |
| 593 | + |
| 594 | + plt.suptitle( |
| 595 | + f"Performance change by benchmark on {cfg['version']} vs. {cfg['base']}" |
| 596 | + ) |
| 597 | + |
| 598 | + for (benchmark, timings), ax in zip(sorted(by_benchmark.items()), axs): |
| 599 | + timings.sort(key=lambda x: datetime.datetime.fromisoformat(x[0])) |
| 600 | + dates = [datetime.datetime.fromisoformat(x[0]) for x in timings] |
| 601 | + ax.plot(dates, [x[1] for x in timings]) |
| 602 | + ax.set_xticks([]) |
| 603 | + ax.set_ylabel(benchmark, rotation=0, horizontalalignment="right") |
| 604 | + ax.yaxis.set_major_formatter(formatter) |
| 605 | + for spine in ax.spines.values(): |
| 606 | + spine.set_visible(False) |
| 607 | + ax.grid(True, axis="y") |
| 608 | + ax.axhline(1.0, color="#666", linestyle="-") |
| 609 | + ax.set_facecolor("#f0f0f0") |
| 610 | + |
| 611 | + savefig(output_filename, dpi=150) |
| 612 | + |
| 613 | + |
514 | 614 | if __name__ == "__main__": |
515 | 615 | parser = argparse.ArgumentParser( |
516 | 616 | "Compare two benchmark .json files", |
|
0 commit comments