|
| 1 | +import argparse |
| 2 | +import contextlib |
| 3 | +from pathlib import Path |
| 4 | +import subprocess |
| 5 | +import sys |
| 6 | +import traceback |
| 7 | + |
| 8 | + |
| 9 | +import numpy as np |
| 10 | +import rich_argparse |
| 11 | + |
| 12 | + |
| 13 | +from bench_runner import flags as mflags |
| 14 | +from bench_runner import git |
| 15 | +from bench_runner import result |
| 16 | +from bench_runner.scripts import run_benchmarks |
| 17 | +from bench_runner.scripts import workflow |
| 18 | +from bench_runner.util import PathLike, format_seconds |
| 19 | + |
| 20 | + |
| 21 | +def _get_result_commandline( |
| 22 | + benchmark: str, |
| 23 | + good_val: float, |
| 24 | + bad_val: float, |
| 25 | + pgo: bool, |
| 26 | + flags: str, |
| 27 | + repo: PathLike, |
| 28 | +) -> list[str]: |
| 29 | + repo = Path(repo) |
| 30 | + |
| 31 | + return [ |
| 32 | + sys.executable, |
| 33 | + "-m", |
| 34 | + "bench_runner.scripts.bisect", |
| 35 | + benchmark, |
| 36 | + str(good_val), |
| 37 | + str(bad_val), |
| 38 | + str(pgo), |
| 39 | + str(flags), |
| 40 | + str(repo.absolute()), |
| 41 | + ] |
| 42 | + |
| 43 | + |
| 44 | +def parse_result(benchmark_json: PathLike) -> float: |
| 45 | + # The name of the benchmark in the JSON file may be different from the one |
| 46 | + # used to select the benchmark. Therefore, just take the mean of all the |
| 47 | + # benchmarks in the JSON file. |
| 48 | + r = result.Result.from_arbitrary_filename(benchmark_json) |
| 49 | + timing_data = r.get_timing_data() |
| 50 | + return float(np.mean([x.mean() for x in timing_data.values()])) |
| 51 | + |
| 52 | + |
| 53 | +def get_result( |
| 54 | + benchmark: str, |
| 55 | + pgo: bool = False, |
| 56 | + flags: str = "", |
| 57 | + cpython: PathLike = Path("cpython"), |
| 58 | + reconfigure: bool = False, |
| 59 | +) -> float: |
| 60 | + cpython = Path(cpython) |
| 61 | + |
| 62 | + if pgo or reconfigure: |
| 63 | + # Jumping around through commits with PGO can leave stale PGO data |
| 64 | + # around, so we need to clean it each time. We also always do it the |
| 65 | + # first time in case the *last* bisect run used pgo. |
| 66 | + subprocess.run(["make", "clean"], cwd=cpython) |
| 67 | + |
| 68 | + workflow.compile_unix(cpython, mflags.parse_flags(flags), pgo, False, reconfigure) |
| 69 | + run_benchmarks.run_benchmarks(cpython / "python", benchmark) |
| 70 | + timing = parse_result(run_benchmarks.BENCHMARK_JSON) |
| 71 | + |
| 72 | + return timing |
| 73 | + |
| 74 | + |
| 75 | +def get_log_file() -> Path: |
| 76 | + return Path("bisect_log.txt") |
| 77 | + |
| 78 | + |
| 79 | +def delete_log() -> None: |
| 80 | + bisect_log = get_log_file() |
| 81 | + if bisect_log.is_file(): |
| 82 | + bisect_log.unlink() |
| 83 | + |
| 84 | + |
| 85 | +def show_log() -> None: |
| 86 | + bisect_log = get_log_file() |
| 87 | + |
| 88 | + print() |
| 89 | + print("Bisect log:") |
| 90 | + |
| 91 | + with get_log_file().open("r") as f: |
| 92 | + for line in f.readlines(): |
| 93 | + print(line.strip()) |
| 94 | + |
| 95 | + |
| 96 | +def log(message: str) -> None: |
| 97 | + with get_log_file().open("a") as f: |
| 98 | + f.write(f"{message}\n") |
| 99 | + |
| 100 | + |
| 101 | +def _main( |
| 102 | + benchmark: str, |
| 103 | + good: str, |
| 104 | + bad: str, |
| 105 | + pgo: bool = False, |
| 106 | + flags: str = "", |
| 107 | + repo: PathLike = Path("."), |
| 108 | + cpython: PathLike = Path("cpython"), |
| 109 | +): |
| 110 | + repo = Path(repo).absolute() |
| 111 | + cpython = Path(cpython).absolute() |
| 112 | + |
| 113 | + delete_log() |
| 114 | + |
| 115 | + if not cpython.is_dir(): |
| 116 | + git.clone( |
| 117 | + cpython, "https://github.com/python/cpython.git", branch="main", depth=None |
| 118 | + ) |
| 119 | + |
| 120 | + git.checkout(cpython, good) |
| 121 | + good_timing = get_result(benchmark, pgo, flags, cpython=cpython, reconfigure=True) |
| 122 | + log(f"KNOWN GOOD ({good[:7]}): {format_seconds(good_timing)}") |
| 123 | + |
| 124 | + git.checkout(cpython, bad) |
| 125 | + bad_timing = get_result(benchmark, pgo, flags, cpython=cpython) |
| 126 | + log(f"KNOWN BAD ({bad[:7]}): {format_seconds(bad_timing)}") |
| 127 | + |
| 128 | + try: |
| 129 | + with contextlib.chdir(cpython): |
| 130 | + subprocess.run(["git", "bisect", "start"]) |
| 131 | + subprocess.run(["git", "bisect", "bad", bad]) |
| 132 | + subprocess.run(["git", "bisect", "good", good]) |
| 133 | + subprocess.run( |
| 134 | + ["git", "bisect", "run"] |
| 135 | + + _get_result_commandline( |
| 136 | + benchmark, good_timing, bad_timing, pgo, flags, repo |
| 137 | + ) |
| 138 | + ) |
| 139 | + finally: |
| 140 | + show_log() |
| 141 | + delete_log() |
| 142 | + |
| 143 | + |
| 144 | +def main(): |
| 145 | + # This is the entry point for the user |
| 146 | + |
| 147 | + parser = argparse.ArgumentParser( |
| 148 | + description=""" |
| 149 | + Run bisect on a benchmark to find the first regressing commit. |
| 150 | +
|
| 151 | + A full checkout of CPython should be in the cpython directory. |
| 152 | + If it doesn't exist, it will be cloned. |
| 153 | + """, |
| 154 | + formatter_class=rich_argparse.ArgumentDefaultsRichHelpFormatter, |
| 155 | + ) |
| 156 | + parser.add_argument( |
| 157 | + "benchmark", |
| 158 | + type=str, |
| 159 | + help="The benchmark to run bisect on.", |
| 160 | + ) |
| 161 | + parser.add_argument( |
| 162 | + "good", |
| 163 | + type=str, |
| 164 | + help="The good commit hash for the bisect.", |
| 165 | + ) |
| 166 | + parser.add_argument( |
| 167 | + "bad", |
| 168 | + type=str, |
| 169 | + help="The bad commit hash for the bisect.", |
| 170 | + ) |
| 171 | + parser.add_argument( |
| 172 | + "--pgo", |
| 173 | + action="store_true", |
| 174 | + ) |
| 175 | + parser.add_argument( |
| 176 | + "--flags", |
| 177 | + type=str, |
| 178 | + default="", |
| 179 | + ) |
| 180 | + |
| 181 | + args = parser.parse_args() |
| 182 | + |
| 183 | + _main(args.benchmark, args.good, args.bad, args.pgo, args.flags) |
| 184 | + |
| 185 | + |
| 186 | +if __name__ == "__main__": |
| 187 | + # This is the entry point when we are called from `git bisect run` itself |
| 188 | + |
| 189 | + parser = argparse.ArgumentParser() |
| 190 | + parser.add_argument("benchmark", type=str) |
| 191 | + parser.add_argument("good_val", type=float) |
| 192 | + parser.add_argument("bad_val", type=float) |
| 193 | + parser.add_argument("pgo", type=str) |
| 194 | + parser.add_argument("flags", type=str) |
| 195 | + parser.add_argument("repo", type=str) |
| 196 | + args = parser.parse_args() |
| 197 | + |
| 198 | + mid_point = (args.good_val + args.bad_val) / 2.0 |
| 199 | + |
| 200 | + repo = Path(args.repo) |
| 201 | + cpython = repo / "cpython" |
| 202 | + |
| 203 | + try: |
| 204 | + with contextlib.chdir(repo): |
| 205 | + timing = get_result( |
| 206 | + args.benchmark, args.pgo == "True", args.flags, cpython=cpython |
| 207 | + ) |
| 208 | + except Exception as e: |
| 209 | + # If there was any exception, display that exception and traceback and |
| 210 | + # then abort the git bisect with -1 |
| 211 | + traceback.print_exception(e) |
| 212 | + sys.exit(-1) |
| 213 | + |
| 214 | + # The confidence is 0.0 at the mid-point, 1.0 at the good and bad values, |
| 215 | + # and > 1.0 outside of that. |
| 216 | + confidence = abs((timing - mid_point) / ((args.bad_val - args.good_val) / 2.0)) |
| 217 | + |
| 218 | + with contextlib.chdir(repo): |
| 219 | + if timing > mid_point: |
| 220 | + log( |
| 221 | + f"BISECT BAD ({git.get_git_hash(cpython)[:7]}): " |
| 222 | + f"{format_seconds(timing)} (confidence {confidence:.02f})" |
| 223 | + ) |
| 224 | + print(f"BAD: {timing} vs. ({args.good_val}, {args.bad_val})") |
| 225 | + sys.exit(1) |
| 226 | + else: |
| 227 | + log( |
| 228 | + f"BISECT GOOD ({git.get_git_hash(cpython)[:7]}): " |
| 229 | + f"{format_seconds(timing)} (confidence {confidence:.02f})" |
| 230 | + ) |
| 231 | + print(f"GOOD: {timing} vs. ({args.good_val}, {args.bad_val})") |
| 232 | + sys.exit(0) |
0 commit comments