|
2 | 2 |
|
3 | 3 |
|
4 | 4 | import argparse |
| 5 | +from collections import defaultdict |
5 | 6 | import datetime |
6 | 7 | import functools |
7 | 8 | import json |
@@ -118,6 +119,25 @@ def get_flag_effect_plot_config(): |
118 | 119 | return plots |
119 | 120 |
|
120 | 121 |
|
| 122 | +@functools.cache |
| 123 | +def get_benchmark_longitudinal_plot_config(): |
| 124 | + cfg = mconfig.get_bench_runner_config() |
| 125 | + |
| 126 | + plot = cfg.get("benchmark_longitudinal_plot", {}) |
| 127 | + assert "base" in plot |
| 128 | + assert "version" in plot |
| 129 | + assert "runners" in plot |
| 130 | + if "head_flags" not in plot: |
| 131 | + plot["head_flags"] = [] |
| 132 | + else: |
| 133 | + plot["head_flags"] = sorted(set(plot["head_flags"])) |
| 134 | + if "base_flags" not in plot: |
| 135 | + plot["base_flags"] = [] |
| 136 | + else: |
| 137 | + plot["base_flags"] = sorted(set(plot["base_flags"])) |
| 138 | + return plot |
| 139 | + |
| 140 | + |
121 | 141 | def plot_diff_pair(ax, data): |
122 | 142 | if not len(data): |
123 | 143 | return [] |
@@ -477,11 +497,11 @@ def get_comparison_value(ref, r, force_valid): |
477 | 497 | if subplot["runner_map"] and not runner_is_mapped: |
478 | 498 | continue |
479 | 499 | head_results = commits.get(runner.nickname, {}).get( |
480 | | - tuple(subplot["head_flags"]), {} |
| 500 | + tuple(sorted(subplot["head_flags"])), {} |
481 | 501 | ) |
482 | 502 | base_results = commits.get( |
483 | 503 | subplot["runner_map"].get(runner.nickname, runner.nickname), {} |
484 | | - ).get(tuple(subplot["base_flags"]), {}) |
| 504 | + ).get(tuple(sorted(subplot["base_flags"])), {}) |
485 | 505 |
|
486 | 506 | line = [] |
487 | 507 | for cpython_hash, r in head_results.items(): |
@@ -525,6 +545,110 @@ def get_comparison_value(ref, r, force_valid): |
525 | 545 | json.dump(data, fd, indent=2) |
526 | 546 |
|
527 | 547 |
|
| 548 | +def benchmark_longitudinal_plot( |
| 549 | + results: Iterable[result.Result], output_filename: PathLike |
| 550 | +): |
| 551 | + if "benchmark_longitudinal_plot" not in mconfig.get_bench_runner_config(): |
| 552 | + return |
| 553 | + |
| 554 | + output_filename = Path(output_filename) |
| 555 | + |
| 556 | + cache_filename = output_filename.with_suffix(".json") |
| 557 | + if cache_filename.is_file(): |
| 558 | + with cache_filename.open() as fd: |
| 559 | + cache = json.load(fd) |
| 560 | + else: |
| 561 | + cache = {} |
| 562 | + |
| 563 | + cfg = get_benchmark_longitudinal_plot_config() |
| 564 | + |
| 565 | + results = [ |
| 566 | + r for r in results if r.fork == "python" and r.nickname in cfg["runners"] |
| 567 | + ] |
| 568 | + |
| 569 | + base = None |
| 570 | + for r in results: |
| 571 | + if r.version == cfg["base"] and r.flags == cfg["base_flags"]: |
| 572 | + base = r |
| 573 | + break |
| 574 | + else: |
| 575 | + raise ValueError(f"Base version {cfg['base']} not found") |
| 576 | + |
| 577 | + results = [ |
| 578 | + r |
| 579 | + for r in results |
| 580 | + if r.version.startswith(cfg["version"]) and r.flags == cfg["head_flags"] |
| 581 | + ] |
| 582 | + |
| 583 | + by_benchmark = defaultdict(lambda: defaultdict(list)) |
| 584 | + for r in results: |
| 585 | + if r.filename.name not in cache: |
| 586 | + comparison = result.BenchmarkComparison(base, r, "") |
| 587 | + timing = comparison.get_timing_diff() |
| 588 | + |
| 589 | + for name, _diff, mean in timing: |
| 590 | + # Don't include insignificant results |
| 591 | + if mean > 0.0: |
| 592 | + value = [r.commit_date, mean, r.cpython_hash] |
| 593 | + if r.filename.name not in cache: |
| 594 | + cache[r.filename.name] = {} |
| 595 | + cache[r.filename.name][name] = value |
| 596 | + |
| 597 | + for name, value in cache[r.filename.name].items(): |
| 598 | + by_benchmark[name][r.nickname].append(value) |
| 599 | + |
| 600 | + with cache_filename.open("w") as fd: |
| 601 | + json.dump(cache, fd, indent=2) |
| 602 | + |
| 603 | + # Exclude any benchmarks where we don't have enough data to make a |
| 604 | + # meaningful plot |
| 605 | + by_benchmark = { |
| 606 | + k: v for k, v in by_benchmark.items() if any(len(x) > 2 for x in v.values()) |
| 607 | + } |
| 608 | + |
| 609 | + fig, axs = plt.subplots( |
| 610 | + len(by_benchmark), |
| 611 | + 1, |
| 612 | + figsize=(10, len(by_benchmark)), |
| 613 | + layout="constrained", |
| 614 | + ) |
| 615 | + if len(by_benchmark) == 1: |
| 616 | + axs = [axs] |
| 617 | + |
| 618 | + plt.suptitle( |
| 619 | + f"Performance change by benchmark on {cfg['version']} vs. {cfg['base']}" |
| 620 | + ) |
| 621 | + |
| 622 | + first = True |
| 623 | + for (benchmark, runners), ax in zip(sorted(by_benchmark.items()), axs): |
| 624 | + for runner_name, timings in runners.items(): |
| 625 | + runner = mrunners.get_runner_by_nickname(runner_name) |
| 626 | + timings.sort(key=lambda x: datetime.datetime.fromisoformat(x[0])) |
| 627 | + dates = [datetime.datetime.fromisoformat(x[0]) for x in timings] |
| 628 | + ax.plot( |
| 629 | + dates, |
| 630 | + [x[1] for x in timings], |
| 631 | + label=runner.plot.name, |
| 632 | + color=runner.plot.color, |
| 633 | + linestyle=runner.plot.style, |
| 634 | + marker=runner.plot.marker, |
| 635 | + markersize=2, |
| 636 | + ) |
| 637 | + ax.set_xticks([]) |
| 638 | + ax.set_ylabel(benchmark, rotation=0, horizontalalignment="right") |
| 639 | + ax.yaxis.set_major_formatter(formatter) |
| 640 | + for spine in ax.spines.values(): |
| 641 | + spine.set_visible(False) |
| 642 | + ax.grid(True, axis="y") |
| 643 | + ax.axhline(1.0, color="#666", linestyle="-") |
| 644 | + ax.set_facecolor("#f0f0f0") |
| 645 | + if first: |
| 646 | + ax.legend(loc="upper left") |
| 647 | + first = False |
| 648 | + |
| 649 | + savefig(output_filename, dpi=150) |
| 650 | + |
| 651 | + |
528 | 652 | if __name__ == "__main__": |
529 | 653 | parser = argparse.ArgumentParser( |
530 | 654 | "Compare two benchmark .json files", |
|
0 commit comments