Skip to content

Commit 09760ca

Browse files
committed
Add a helper script to bisect a regression
1 parent 1b05216 commit 09760ca

5 files changed

Lines changed: 270 additions & 2 deletions

File tree

bench_runner/__main__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
COMMANDS = {
99
"backfill": "Schedule benchmarking a number of commits",
10+
"bisect": "Run a bisect to find the commit that caused a regression",
1011
"compare": "Compare a matrix of specific results",
1112
"find_failures": "Find the benchmarks that failed in the last weekly run",
1213
"generate_results": "Create all of the derived artifacts from raw data",

bench_runner/git.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,3 +168,12 @@ def clone(
168168
if depth is not None:
169169
args += ["--depth", str(depth)]
170170
subprocess.check_call(args)
171+
172+
173+
def checkout(dirname: PathLike, ref: str) -> None:
174+
dirname = Path(dirname)
175+
176+
subprocess.check_call(
177+
["git", "checkout", ref],
178+
cwd=dirname,
179+
)

bench_runner/scripts/bisect.py

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
import argparse
2+
import contextlib
3+
from pathlib import Path
4+
import subprocess
5+
import sys
6+
import traceback
7+
8+
9+
import numpy as np
10+
import rich_argparse
11+
12+
13+
from bench_runner import flags as mflags
14+
from bench_runner import git
15+
from bench_runner import result
16+
from bench_runner.scripts import run_benchmarks
17+
from bench_runner.scripts import workflow
18+
from bench_runner.util import PathLike, format_seconds
19+
20+
21+
def _get_result_commandline(
22+
benchmark: str,
23+
good_val: float,
24+
bad_val: float,
25+
pgo: bool,
26+
flags: str,
27+
repo: PathLike,
28+
) -> list[str]:
29+
repo = Path(repo)
30+
31+
return [
32+
sys.executable,
33+
"-m",
34+
"bench_runner.scripts.bisect",
35+
benchmark,
36+
str(good_val),
37+
str(bad_val),
38+
str(pgo),
39+
str(flags),
40+
str(repo.absolute()),
41+
]
42+
43+
44+
def parse_result(benchmark_json: PathLike) -> float:
45+
# The name of the benchmark in the JSON file may be different from the one
46+
# used to select the benchmark. Therefore, just take the mean of all the
47+
# benchmarks in the JSON file.
48+
r = result.Result.from_arbitrary_filename(benchmark_json)
49+
timing_data = r.get_timing_data()
50+
return float(np.mean([x.mean() for x in timing_data.values()]))
51+
52+
53+
def get_result(
54+
benchmark: str,
55+
pgo: bool = False,
56+
flags: str = "",
57+
cpython: PathLike = Path("cpython"),
58+
reconfigure: bool = False,
59+
) -> float:
60+
cpython = Path(cpython)
61+
62+
if pgo or reconfigure:
63+
# Jumping around through commits with PGO can leave stale PGO data
64+
# around, so we need to clean it each time. We also always do it the
65+
# first time in case the *last* bisect run used pgo.
66+
subprocess.run(["make", "clean"], cwd=cpython)
67+
68+
workflow.compile_unix(cpython, mflags.parse_flags(flags), pgo, False, reconfigure)
69+
run_benchmarks.run_benchmarks(cpython / "python", benchmark)
70+
timing = parse_result(run_benchmarks.BENCHMARK_JSON)
71+
72+
return timing
73+
74+
75+
def get_log_file() -> Path:
76+
return Path("bisect_log.txt")
77+
78+
79+
def delete_log() -> None:
80+
bisect_log = get_log_file()
81+
if bisect_log.is_file():
82+
bisect_log.unlink()
83+
84+
85+
def show_log() -> None:
86+
bisect_log = get_log_file()
87+
88+
print()
89+
print("Bisect log:")
90+
91+
with get_log_file().open("r") as f:
92+
for line in f.readlines():
93+
print(line.strip())
94+
95+
96+
def log(message: str) -> None:
97+
with get_log_file().open("a") as f:
98+
f.write(f"{message}\n")
99+
100+
101+
def _main(
102+
benchmark: str,
103+
good: str,
104+
bad: str,
105+
pgo: bool = False,
106+
flags: str = "",
107+
repo: PathLike = Path("."),
108+
cpython: PathLike = Path("cpython"),
109+
):
110+
repo = Path(repo).absolute()
111+
cpython = Path(cpython).absolute()
112+
113+
delete_log()
114+
115+
if not cpython.is_dir():
116+
git.clone(
117+
cpython, "https://github.com/python/cpython.git", branch="main", depth=None
118+
)
119+
120+
git.checkout(cpython, good)
121+
good_timing = get_result(benchmark, pgo, flags, cpython=cpython, reconfigure=True)
122+
log(f"KNOWN GOOD ({good[:7]}): {format_seconds(good_timing)}")
123+
124+
git.checkout(cpython, bad)
125+
bad_timing = get_result(benchmark, pgo, flags, cpython=cpython)
126+
log(f"KNOWN BAD ({bad[:7]}): {format_seconds(bad_timing)}")
127+
128+
try:
129+
with contextlib.chdir(cpython):
130+
subprocess.run(["git", "bisect", "start"])
131+
subprocess.run(["git", "bisect", "bad", bad])
132+
subprocess.run(["git", "bisect", "good", good])
133+
subprocess.run(
134+
["git", "bisect", "run"]
135+
+ _get_result_commandline(
136+
benchmark, good_timing, bad_timing, pgo, flags, repo
137+
)
138+
)
139+
finally:
140+
show_log()
141+
delete_log()
142+
143+
144+
def main():
145+
# This is the entry point for the user
146+
147+
parser = argparse.ArgumentParser(
148+
description="""
149+
Run bisect on a benchmark to find the first regressing commit.
150+
151+
A full checkout of CPython should be in the cpython directory.
152+
If it doesn't exist, it will be cloned.
153+
""",
154+
formatter_class=rich_argparse.ArgumentDefaultsRichHelpFormatter,
155+
)
156+
parser.add_argument(
157+
"benchmark",
158+
type=str,
159+
help="The benchmark to run bisect on.",
160+
)
161+
parser.add_argument(
162+
"good",
163+
type=str,
164+
help="The good commit hash for the bisect.",
165+
)
166+
parser.add_argument(
167+
"bad",
168+
type=str,
169+
help="The bad commit hash for the bisect.",
170+
)
171+
parser.add_argument(
172+
"--pgo",
173+
action="store_true",
174+
)
175+
parser.add_argument(
176+
"--flags",
177+
type=str,
178+
default="",
179+
)
180+
181+
args = parser.parse_args()
182+
183+
_main(args.benchmark, args.good, args.bad, args.pgo, args.flags)
184+
185+
186+
if __name__ == "__main__":
187+
# This is the entry point when we are called from `git bisect run` itself
188+
189+
parser = argparse.ArgumentParser()
190+
parser.add_argument("benchmark", type=str)
191+
parser.add_argument("good_val", type=float)
192+
parser.add_argument("bad_val", type=float)
193+
parser.add_argument("pgo", type=str)
194+
parser.add_argument("flags", type=str)
195+
parser.add_argument("repo", type=str)
196+
args = parser.parse_args()
197+
198+
mid_point = (args.good_val + args.bad_val) / 2.0
199+
200+
repo = Path(args.repo)
201+
cpython = repo / "cpython"
202+
203+
try:
204+
with contextlib.chdir(repo):
205+
timing = get_result(
206+
args.benchmark, args.pgo == "True", args.flags, cpython=cpython
207+
)
208+
except Exception as e:
209+
# If there was any exception, display that exception and traceback and
210+
# then abort the git bisect with -1
211+
traceback.print_exception(e)
212+
sys.exit(-1)
213+
214+
# The confidence is 0.0 at the mid-point, 1.0 at the good and bad values,
215+
# and > 1.0 outside of that.
216+
confidence = abs((timing - mid_point) / ((args.bad_val - args.good_val) / 2.0))
217+
218+
with contextlib.chdir(repo):
219+
if timing > mid_point:
220+
log(
221+
f"BISECT BAD ({git.get_git_hash(cpython)[:7]}): "
222+
f"{format_seconds(timing)} (confidence {confidence:.02f})"
223+
)
224+
print(f"BAD: {timing} vs. ({args.good_val}, {args.bad_val})")
225+
sys.exit(1)
226+
else:
227+
log(
228+
f"BISECT GOOD ({git.get_git_hash(cpython)[:7]}): "
229+
f"{format_seconds(timing)} (confidence {confidence:.02f})"
230+
)
231+
print(f"GOOD: {timing} vs. ({args.good_val}, {args.bad_val})")
232+
sys.exit(0)

bench_runner/scripts/workflow.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,13 @@ def checkout_benchmarks():
130130
)
131131

132132

133-
def compile_unix(cpython: PathLike, flags: list[str], pgo: bool, pystats: bool) -> None:
133+
def compile_unix(
134+
cpython: PathLike,
135+
flags: list[str],
136+
pgo: bool,
137+
pystats: bool,
138+
reconfigure: bool = True,
139+
) -> None:
134140
cpython = Path(cpython)
135141
cfg = config.get_config_for_current_runner()
136142

@@ -166,7 +172,8 @@ def compile_unix(cpython: PathLike, flags: list[str], pgo: bool, pystats: bool)
166172
make_args.extend(["-j"])
167173

168174
with contextlib.chdir(cpython):
169-
subprocess.check_call(["./configure", *args], env=env)
175+
if reconfigure:
176+
subprocess.check_call(["./configure", *args], env=env)
170177
subprocess.check_call(["make", *make_args], env=env)
171178

172179

bench_runner/util.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,25 @@ def get_simple_platform() -> Literal["linux", "macos", "windows"]:
8787
raise RuntimeError(f"Unsupported platform {sys.platform}.")
8888

8989

90+
def format_seconds(value: float) -> str:
91+
"""
92+
Given a float value in seconds, formats it into a human-readable string with the appropriate precision.
93+
"""
94+
_TIMEDELTA_UNITS = ("sec", "ms", "us", "ns")
95+
96+
for i in range(2, -9, -1):
97+
if value >= 10.0**i:
98+
break
99+
else:
100+
i = -9
101+
102+
precision = 2 - i % 3
103+
k = -(i // 3) if i < 0 else 0
104+
factor = 10 ** (k * 3)
105+
unit = _TIMEDELTA_UNITS[k]
106+
return f"{value * factor:.{precision}f} {unit}"
107+
108+
90109
if os.getenv("GITHUB_ACTIONS") == "true":
91110

92111
@contextlib.contextmanager

0 commit comments

Comments
 (0)