Skip to content

Commit 4383b2a

Browse files
authored
Merge branch 'main' into runners_list
2 parents 57ea5a9 + dfaf4ea commit 4383b2a

6 files changed

Lines changed: 290 additions & 16 deletions

File tree

bench_runner/__main__.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import importlib
2+
import os
23
import sys
34

45

@@ -7,6 +8,7 @@
78

89
COMMANDS = {
910
"backfill": "Schedule benchmarking a number of commits",
11+
"bisect": "Run a bisect to find the commit that caused a regression",
1012
"compare": "Compare a matrix of specific results",
1113
"find_failures": "Find the benchmarks that failed in the last weekly run",
1214
"generate_results": "Create all of the derived artifacts from raw data",
@@ -24,6 +26,15 @@
2426
}
2527

2628
if __name__ == "__main__":
29+
# This lets pytest-cov collect coverage data in a subprocess
30+
if "COV_CORE_SOURCE" in os.environ:
31+
try:
32+
from pytest_cov.embed import init
33+
34+
init()
35+
except Exception:
36+
sys.stderr.write("pytest-cov: Failed to setup subprocess coverage.")
37+
2738
command = len(sys.argv) >= 2 and sys.argv[1] or ""
2839

2940
if command not in COMMANDS:
@@ -36,5 +47,5 @@
3647
sys.exit(1)
3748

3849
sys.argv = [sys.argv[0], *sys.argv[2:]]
39-
mod = importlib.import_module(f"bench_runner.scripts.{command}")
50+
mod = importlib.import_module(f".{command}", "bench_runner.scripts")
4051
mod.main()

bench_runner/git.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ def clone(
138138
url: str,
139139
*,
140140
branch: str | None = None,
141-
depth: int = 1,
141+
depth: int | None = 1,
142142
) -> None:
143143
is_hash = re.match(r"^[0-9a-f]{40}$", branch) if branch else False
144144

@@ -168,3 +168,12 @@ def clone(
168168
if depth is not None:
169169
args += ["--depth", str(depth)]
170170
subprocess.check_call(args)
171+
172+
173+
def checkout(dirname: PathLike, ref: str) -> None:
174+
dirname = Path(dirname)
175+
176+
subprocess.check_call(
177+
["git", "checkout", ref],
178+
cwd=dirname,
179+
)

bench_runner/scripts/bisect.py

Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
import argparse
2+
import contextlib
3+
from pathlib import Path
4+
import subprocess
5+
import sys
6+
import traceback
7+
8+
9+
import numpy as np
10+
import rich_argparse
11+
12+
13+
from bench_runner import flags as mflags
14+
from bench_runner import git
15+
from bench_runner import result
16+
from bench_runner.scripts import run_benchmarks
17+
from bench_runner.scripts import workflow
18+
from bench_runner.util import PathLike, format_seconds
19+
20+
21+
def _get_result_commandline(
22+
benchmark: str,
23+
good_val: float,
24+
bad_val: float,
25+
pgo: bool,
26+
flags: str,
27+
repo: PathLike,
28+
) -> list[str]:
29+
repo = Path(repo)
30+
31+
return [
32+
sys.executable,
33+
"-m",
34+
"bench_runner.scripts.bisect",
35+
benchmark,
36+
str(good_val),
37+
str(bad_val),
38+
str(pgo),
39+
str(flags),
40+
str(repo.absolute()),
41+
]
42+
43+
44+
def parse_result(benchmark_json: PathLike) -> float:
45+
# The name of the benchmark in the JSON file may be different from the one
46+
# used to select the benchmark. Therefore, just take the mean of all the
47+
# benchmarks in the JSON file.
48+
r = result.Result.from_arbitrary_filename(benchmark_json)
49+
timing_data = r.get_timing_data()
50+
return float(np.mean([x.mean() for x in timing_data.values()]))
51+
52+
53+
def get_result(
54+
benchmark: str,
55+
pgo: bool = False,
56+
flags: str = "",
57+
cpython: PathLike = Path("cpython"),
58+
reconfigure: bool = False,
59+
) -> float:
60+
cpython = Path(cpython)
61+
62+
if pgo or reconfigure:
63+
# Jumping around through commits with PGO can leave stale PGO data
64+
# around, so we need to clean it each time. We also always do it the
65+
# first time in case the *last* bisect run used pgo.
66+
subprocess.run(["make", "clean"], cwd=cpython)
67+
68+
workflow.compile_unix(cpython, mflags.parse_flags(flags), pgo, False, reconfigure)
69+
run_benchmarks.run_benchmarks(cpython / "python", benchmark)
70+
timing = parse_result(run_benchmarks.BENCHMARK_JSON)
71+
72+
return timing
73+
74+
75+
def get_log_file() -> Path:
76+
return Path("bisect_log.txt")
77+
78+
79+
def delete_log() -> None:
80+
bisect_log = get_log_file()
81+
if bisect_log.is_file():
82+
bisect_log.unlink()
83+
84+
85+
def show_log() -> None:
86+
print()
87+
print("Bisect log:")
88+
89+
with get_log_file().open("r") as f:
90+
for line in f.readlines():
91+
print(line.strip())
92+
93+
94+
def log(message: str) -> None:
95+
with get_log_file().open("a") as f:
96+
f.write(f"{message}\n")
97+
98+
99+
def _main(
100+
benchmark: str,
101+
good: str,
102+
bad: str,
103+
pgo: bool = False,
104+
flags: str = "",
105+
repo: PathLike = Path("."),
106+
cpython: PathLike = Path("cpython"),
107+
):
108+
repo = Path(repo).absolute()
109+
cpython = Path(cpython).absolute()
110+
111+
delete_log()
112+
113+
if not cpython.is_dir():
114+
git.clone(
115+
cpython, "https://github.com/python/cpython.git", branch="main", depth=None
116+
)
117+
118+
git.checkout(cpython, good)
119+
good_timing = get_result(benchmark, pgo, flags, cpython=cpython, reconfigure=True)
120+
log(f"KNOWN GOOD ({good[:7]}): {format_seconds(good_timing)}")
121+
122+
git.checkout(cpython, bad)
123+
bad_timing = get_result(benchmark, pgo, flags, cpython=cpython)
124+
log(f"KNOWN BAD ({bad[:7]}): {format_seconds(bad_timing)}")
125+
126+
try:
127+
with contextlib.chdir(cpython):
128+
subprocess.run(["git", "bisect", "start"])
129+
subprocess.run(["git", "bisect", "bad", bad])
130+
subprocess.run(["git", "bisect", "good", good])
131+
subprocess.run(
132+
["git", "bisect", "run"]
133+
+ _get_result_commandline(
134+
benchmark, good_timing, bad_timing, pgo, flags, repo
135+
)
136+
)
137+
finally:
138+
show_log()
139+
delete_log()
140+
141+
142+
def main():
143+
# This is the entry point for the user
144+
145+
parser = argparse.ArgumentParser(
146+
description="""
147+
Run bisect on a benchmark to find the first regressing commit.
148+
149+
A full checkout of CPython should be in the cpython directory.
150+
If it doesn't exist, it will be cloned.
151+
""",
152+
formatter_class=rich_argparse.ArgumentDefaultsRichHelpFormatter,
153+
)
154+
parser.add_argument(
155+
"benchmark",
156+
type=str,
157+
help="The benchmark to run bisect on.",
158+
)
159+
parser.add_argument(
160+
"good",
161+
type=str,
162+
help="The good commit hash for the bisect.",
163+
)
164+
parser.add_argument(
165+
"bad",
166+
type=str,
167+
help="The bad commit hash for the bisect.",
168+
)
169+
parser.add_argument(
170+
"--pgo",
171+
action="store_true",
172+
)
173+
parser.add_argument(
174+
"--flags",
175+
type=str,
176+
default="",
177+
)
178+
179+
args = parser.parse_args()
180+
181+
_main(args.benchmark, args.good, args.bad, args.pgo, args.flags)
182+
183+
184+
if __name__ == "__main__":
185+
# This is the entry point when we are called from `git bisect run` itself
186+
187+
parser = argparse.ArgumentParser()
188+
parser.add_argument("benchmark", type=str)
189+
parser.add_argument("good_val", type=float)
190+
parser.add_argument("bad_val", type=float)
191+
parser.add_argument("pgo", type=str)
192+
parser.add_argument("flags", type=str)
193+
parser.add_argument("repo", type=str)
194+
args = parser.parse_args()
195+
196+
mid_point = (args.good_val + args.bad_val) / 2.0
197+
198+
repo = Path(args.repo)
199+
cpython = repo / "cpython"
200+
201+
try:
202+
with contextlib.chdir(repo):
203+
timing = get_result(
204+
args.benchmark, args.pgo == "True", args.flags, cpython=cpython
205+
)
206+
except Exception as e:
207+
# If there was any exception, display that exception and traceback and
208+
# then abort the git bisect with -1
209+
traceback.print_exception(e)
210+
sys.exit(-1)
211+
212+
# The confidence is 0.0 at the mid-point, 1.0 at the good and bad values,
213+
# and > 1.0 outside of that.
214+
confidence = abs((timing - mid_point) / ((args.bad_val - args.good_val) / 2.0))
215+
216+
with contextlib.chdir(repo):
217+
if timing > mid_point:
218+
log(
219+
f"BISECT BAD ({git.get_git_hash(cpython)[:7]}): "
220+
f"{format_seconds(timing)} (confidence {confidence:.02f})"
221+
)
222+
print(f"BAD: {timing} vs. ({args.good_val}, {args.bad_val})")
223+
sys.exit(1)
224+
else:
225+
log(
226+
f"BISECT GOOD ({git.get_git_hash(cpython)[:7]}): "
227+
f"{format_seconds(timing)} (confidence {confidence:.02f})"
228+
)
229+
print(f"GOOD: {timing} vs. ({args.good_val}, {args.bad_val})")
230+
sys.exit(0)

bench_runner/scripts/workflow.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,13 @@ def checkout_benchmarks():
130130
)
131131

132132

133-
def compile_unix(cpython: PathLike, flags: list[str], pgo: bool, pystats: bool) -> None:
133+
def compile_unix(
134+
cpython: PathLike,
135+
flags: list[str],
136+
pgo: bool,
137+
pystats: bool,
138+
reconfigure: bool = True,
139+
) -> None:
134140
cpython = Path(cpython)
135141
cfg = config.get_config_for_current_runner()
136142

@@ -166,7 +172,8 @@ def compile_unix(cpython: PathLike, flags: list[str], pgo: bool, pystats: bool)
166172
make_args.extend(["-j"])
167173

168174
with contextlib.chdir(cpython):
169-
subprocess.check_call(["./configure", *args], env=env)
175+
if reconfigure:
176+
subprocess.check_call(["./configure", *args], env=env)
170177
subprocess.check_call(["make", *make_args], env=env)
171178

172179

@@ -391,13 +398,4 @@ def main():
391398

392399

393400
if __name__ == "__main__":
394-
# This lets pytest-cov collect coverage data in a subprocess
395-
if "COV_CORE_SOURCE" in os.environ:
396-
try:
397-
from pytest_cov.embed import init
398-
399-
init()
400-
except Exception:
401-
sys.stderr.write("pytest-cov: Failed to setup subprocess coverage.")
402-
403401
main()

bench_runner/util.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,26 @@ def get_simple_platform() -> Literal["linux", "macos", "windows"]:
8787
raise RuntimeError(f"Unsupported platform {sys.platform}.")
8888

8989

90+
def format_seconds(value: float) -> str:
91+
"""
92+
Given a float value in seconds, formats it into a human-readable string with
93+
the appropriate precision.
94+
"""
95+
_TIMEDELTA_UNITS = ("sec", "ms", "us", "ns")
96+
97+
for i in range(2, -9, -1):
98+
if value >= 10.0**i:
99+
break
100+
else:
101+
i = -9
102+
103+
precision = 2 - i % 3
104+
k = -(i // 3) if i < 0 else 0
105+
factor = 10 ** (k * 3)
106+
unit = _TIMEDELTA_UNITS[k]
107+
return f"{value * factor:.{precision}f} {unit}"
108+
109+
90110
if os.getenv("GITHUB_ACTIONS") == "true":
91111

92112
@contextlib.contextmanager

tests/test_run_benchmarks.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,9 @@ def test_run_benchmarks(benchmarks_checkout, monkeypatch):
8383
subprocess.check_call(
8484
[
8585
venv_python,
86-
run_benchmarks.__file__,
86+
"-m",
87+
"bench_runner",
88+
"run_benchmarks",
8789
"benchmark",
8890
sys.executable,
8991
"python",
@@ -132,7 +134,9 @@ def test_run_benchmarks(benchmarks_checkout, monkeypatch):
132134
returncode = subprocess.call(
133135
[
134136
venv_python,
135-
run_benchmarks.__file__,
137+
"-m",
138+
"bench_runner",
139+
"run_benchmarks",
136140
"benchmark",
137141
sys.executable,
138142
"python",
@@ -164,7 +168,9 @@ def test_run_benchmarks_flags(benchmarks_checkout):
164168
subprocess.check_call(
165169
[
166170
venv_python,
167-
run_benchmarks.__file__,
171+
"-m",
172+
"bench_runner",
173+
"run_benchmarks",
168174
"benchmark",
169175
sys.executable,
170176
"python",

0 commit comments

Comments
 (0)