Skip to content

Commit 297f2aa

Browse files
committed
unified API and discord submit paths
1 parent 02f413c commit 297f2aa

9 files changed

Lines changed: 109 additions & 139 deletions

File tree

src/discord-cluster-manager/api/main.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,6 @@ async def cli_auth(auth_provider: str, code: str, state: str, db_context=Depends
227227

228228
async def _stream_submission_response(
229229
submission_request: SubmissionRequest,
230-
user_info: dict,
231230
submission_mode_enum: SubmissionMode,
232231
backend: KernelBackend,
233232
):
@@ -237,7 +236,6 @@ async def _stream_submission_response(
237236
task = asyncio.create_task(
238237
_run_submission(
239238
submission_request,
240-
user_info,
241239
submission_mode_enum,
242240
backend,
243241
)
@@ -398,6 +396,7 @@ async def run_submission( # noqa: C901
398396
code=submission_code,
399397
file_name=file.filename or "submission.py",
400398
user_id=user_id,
399+
user_name=user_name,
401400
gpus=[gpu_type],
402401
leaderboard=leaderboard_name,
403402
)
@@ -412,7 +411,6 @@ async def run_submission( # noqa: C901
412411

413412
generator = _stream_submission_response(
414413
submission_request=submission_request,
415-
user_info={"user_id": user_id, "user_name": user_name},
416414
submission_mode_enum=submission_mode_enum,
417415
backend=backend_instance,
418416
)

src/discord-cluster-manager/api/utils.py

Lines changed: 17 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
1-
import asyncio
2-
from datetime import datetime
3-
from typing import List
4-
51
import requests
62
from backend import KernelBackend
7-
from consts import SubmissionMode, get_gpu_by_name
3+
from consts import SubmissionMode
84
from env import (
95
CLI_DISCORD_CLIENT_ID,
106
CLI_DISCORD_CLIENT_SECRET,
@@ -13,7 +9,7 @@
139
CLI_TOKEN_URL,
1410
)
1511
from fastapi import HTTPException
16-
from report import Log, RunProgressReporter, RunResultReport, Text
12+
from report import Log, MultiProgressReporter, RunProgressReporter, RunResultReport, Text
1713
from submission import SubmissionRequest, prepare_submission
1814

1915

@@ -140,73 +136,35 @@ async def _handle_github_oauth(code: str, redirect_uri: str) -> tuple[str, str]:
140136

141137

142138
async def _run_submission(
143-
submission: SubmissionRequest, user_info: dict, mode: SubmissionMode, backend: KernelBackend
139+
submission: SubmissionRequest, mode: SubmissionMode, backend: KernelBackend
144140
):
145141
try:
146142
req = prepare_submission(submission, backend.db)
147143
except Exception as e:
148144
raise HTTPException(status_code=400, detail=str(e)) from e
149145

150-
selected_gpus = [get_gpu_by_name(gpu) for gpu in req.gpus]
151-
if len(selected_gpus) > 1 or selected_gpus[0] is None:
146+
if len(req.gpus) != 1:
152147
raise HTTPException(status_code=400, detail="Invalid GPU type")
153148

154-
user_name = user_info["user_name"]
155-
user_id = user_info["user_id"]
156-
157-
with backend.db as db:
158-
sub_id = db.create_submission(
159-
leaderboard=req.leaderboard,
160-
file_name=submission.file_name,
161-
code=submission.code,
162-
user_id=user_id,
163-
time=datetime.now(),
164-
user_name=user_name,
165-
)
149+
reporter = MultiProgressReporterAPI()
150+
sub_id, results = await backend.submit_full(req, mode, reporter)
151+
return results, [rep.get_message() + "\n" + rep.long_report for rep in reporter.runs]
166152

167-
gpu = selected_gpus[0]
168153

169-
reporters: List[RunProgressReporterAPI] = []
154+
class MultiProgressReporterAPI(MultiProgressReporter):
155+
def __init__(self):
156+
self.runs = []
170157

171-
def add_reporter(title: str):
158+
async def show(self, title: str):
159+
return
160+
161+
def add_run(self, title: str) -> "RunProgressReporterAPI":
172162
rep = RunProgressReporterAPI(title)
173-
reporters.append(rep)
163+
self.runs.append(rep)
174164
return rep
175165

176-
try:
177-
tasks = [
178-
backend.submit_leaderboard(
179-
sub_id,
180-
submission.code,
181-
submission.file_name,
182-
gpu,
183-
add_reporter(f"{gpu.name} on {gpu.runner}"),
184-
req.task,
185-
mode,
186-
None,
187-
)
188-
]
189-
190-
if mode == SubmissionMode.LEADERBOARD:
191-
tasks += [
192-
backend.submit_leaderboard(
193-
sub_id,
194-
submission.code,
195-
submission.file_name,
196-
gpu,
197-
add_reporter(f"{gpu.name} on {gpu.runner} (secret)"),
198-
req.task,
199-
SubmissionMode.PRIVATE,
200-
req.secret_seed,
201-
)
202-
]
203-
204-
results = await asyncio.gather(*tasks)
205-
finally:
206-
with backend.db as db:
207-
db.mark_submission_done(sub_id)
208-
209-
return results, [rep.get_message() + "\n" + rep.long_report for rep in reporters]
166+
def make_message(self):
167+
return
210168

211169

212170
class RunProgressReporterAPI(RunProgressReporter):

src/discord-cluster-manager/backend.py

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
1+
import asyncio
12
import copy
23
import math
4+
from datetime import datetime
35
from typing import Optional
46

57
import env
6-
from consts import GPU, GPU_TO_SM, RankCriterion, SubmissionMode
8+
from consts import GPU, GPU_TO_SM, RankCriterion, SubmissionMode, get_gpu_by_name
79
from launchers import Launcher
810
from leaderboard_db import LeaderboardDB
9-
from report import RunProgressReporter, generate_report, make_short_report
11+
from report import MultiProgressReporter, RunProgressReporter, generate_report, make_short_report
1012
from run_eval import FullResult
13+
from submission import ProcessedSubmissionRequest
1114
from task import LeaderboardTask, build_task_config
1215
from utils import KernelBotError, setup_logging
1316

@@ -44,6 +47,60 @@ def register_launcher(self, launcher: Launcher):
4447
for gpu in launcher.gpus:
4548
self.launcher_map[gpu.value] = launcher
4649

50+
async def submit_full(
51+
self, req: ProcessedSubmissionRequest, mode: SubmissionMode, reporter: MultiProgressReporter
52+
):
53+
with self.db as db:
54+
sub_id = db.create_submission(
55+
leaderboard=req.leaderboard,
56+
file_name=req.file_name,
57+
code=req.code,
58+
user_id=req.user_id,
59+
time=datetime.now(),
60+
user_name=req.user_name,
61+
)
62+
63+
selected_gpus = [get_gpu_by_name(gpu) for gpu in req.gpus]
64+
65+
try:
66+
tasks = [
67+
self.submit_leaderboard(
68+
sub_id,
69+
req.code,
70+
req.file_name,
71+
gpu,
72+
reporter.add_run(f"{gpu.name} on {gpu.runner}"),
73+
req.task,
74+
mode,
75+
None,
76+
)
77+
for gpu in selected_gpus
78+
]
79+
80+
if mode == SubmissionMode.LEADERBOARD:
81+
tasks += [
82+
self.submit_leaderboard(
83+
sub_id,
84+
req.code,
85+
req.file_name,
86+
gpu,
87+
reporter.add_run(f"{gpu.name} on {gpu.runner} (secret)"),
88+
req.task,
89+
SubmissionMode.PRIVATE,
90+
req.secret_seed,
91+
)
92+
for gpu in selected_gpus
93+
]
94+
await reporter.show(
95+
f"Submission **{sub_id}**: `{req.file_name}` for `{req.leaderboard}`"
96+
)
97+
results = await asyncio.gather(*tasks)
98+
finally:
99+
with self.db as db:
100+
db.mark_submission_done(sub_id)
101+
102+
return sub_id, results
103+
47104
async def submit_leaderboard( # noqa: C901
48105
self,
49106
submission_id: int,

src/discord-cluster-manager/cogs/leaderboard_cog.py

Lines changed: 8 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,14 @@
1-
import asyncio
21
from datetime import datetime, timedelta
32
from io import StringIO
43
from typing import TYPE_CHECKING, List, Optional
54

65
import discord
76
from consts import (
87
SubmissionMode,
9-
get_gpu_by_name,
108
)
119
from discord import app_commands
1210
from discord.ext import commands
13-
from discord_reporter import MultiProgressReporter
11+
from discord_reporter import MultiProgressReporterDiscord
1412
from discord_utils import (
1513
get_user_from_id,
1614
leaderboard_name_autocomplete,
@@ -85,79 +83,25 @@ async def on_submit_hook( # noqa: C901
8583
)
8684
return -1
8785

86+
if not interaction.response.is_done():
87+
await interaction.response.defer(ephemeral=True)
88+
8889
req = SubmissionRequest(
8990
code=submission_content,
9091
file_name=script.filename,
9192
user_id=interaction.user.id,
93+
user_name=interaction.user.global_name or interaction.user.name,
9294
gpus=cmd_gpus,
9395
leaderboard=leaderboard_name,
9496
)
9597
req = prepare_submission(req, self.bot.leaderboard_db)
9698

97-
# if there is more than one candidate GPU, display UI to let user select,
98-
# otherwise just run on that GPU
99-
if not interaction.response.is_done():
100-
await interaction.response.defer(ephemeral=True)
101-
10299
if req.gpus is None:
103100
view = await self.select_gpu_view(interaction, leaderboard_name, req.task_gpus)
104-
selected_gpus = view.selected_gpus
105-
else:
106-
selected_gpus = req.gpus
107-
108-
selected_gpus = [get_gpu_by_name(gpu) for gpu in selected_gpus]
101+
req.gpus = view.selected_gpus
109102

110-
command = self.bot.backend.submit_leaderboard
111-
112-
user_name = interaction.user.global_name or interaction.user.name
113-
# Create a submission entry in the database
114-
with self.bot.leaderboard_db as db:
115-
sub_id = db.create_submission(
116-
leaderboard=req.leaderboard,
117-
file_name=script.filename,
118-
code=submission_content,
119-
user_id=interaction.user.id,
120-
time=datetime.now(),
121-
user_name=user_name,
122-
)
123-
124-
run_msg = f"Submission **{sub_id}**: `{script.filename}` for `{req.leaderboard}`"
125-
reporter = MultiProgressReporter(interaction, run_msg)
126-
try:
127-
tasks = [
128-
command(
129-
sub_id,
130-
submission_content,
131-
script.filename,
132-
gpu,
133-
reporter.add_run(f"{gpu.name} on {gpu.runner}"),
134-
req.task,
135-
mode,
136-
None,
137-
)
138-
for gpu in selected_gpus
139-
]
140-
141-
# also schedule secret run
142-
if mode == SubmissionMode.LEADERBOARD:
143-
tasks += [
144-
command(
145-
sub_id,
146-
submission_content,
147-
script.filename,
148-
gpu,
149-
reporter.add_run(f"{gpu.name} on {gpu.runner} (secret)"),
150-
req.task,
151-
SubmissionMode.PRIVATE,
152-
req.secret_seed,
153-
)
154-
for gpu in selected_gpus
155-
]
156-
await reporter.show()
157-
await asyncio.gather(*tasks)
158-
finally:
159-
with self.bot.leaderboard_db as db:
160-
db.mark_submission_done(sub_id)
103+
reporter = MultiProgressReporterDiscord(interaction)
104+
sub_id, results = await self.bot.backend.submit_full(req, mode, reporter)
161105

162106
if mode == SubmissionMode.LEADERBOARD:
163107
await self.post_submit_hook(interaction, sub_id)

src/discord-cluster-manager/cogs/submit_cog.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from consts import SubmissionMode, get_gpu_by_name
1010
from discord import app_commands
1111
from discord.ext import commands
12-
from discord_reporter import MultiProgressReporter
12+
from discord_reporter import MultiProgressReporterDiscord
1313
from discord_utils import send_discord_message, with_error_handling
1414
from utils import (
1515
setup_logging,
@@ -70,7 +70,7 @@ async def run_script(
7070
"""
7171
Function invoked by the `run` command to run a single script.
7272
"""
73-
reporter = MultiProgressReporter(interaction, "Script run")
73+
reporter = MultiProgressReporterDiscord(interaction, "Script run")
7474
rep = reporter.add_run(f"{gpu_type.name}")
7575
await reporter.show()
7676
gpu_type = get_gpu_by_name(gpu_type.name)

src/discord-cluster-manager/cogs/verify_run_cog.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from discord import app_commands
1414
from discord.app_commands import Choice
1515
from discord.ext import commands
16-
from discord_reporter import MultiProgressReporter
16+
from discord_reporter import MultiProgressReporterDiscord
1717
from discord_utils import send_discord_message, with_error_handling
1818
from leaderboard_db import RunItem, SubmissionItem
1919
from task import make_task
@@ -291,7 +291,7 @@ async def verify_runs(self, interaction: discord.Interaction):
291291
amd = get_gpu_by_name("mi300")
292292
t4 = get_gpu_by_name("T4")
293293

294-
reporter = MultiProgressReporter("Verifying")
294+
reporter = MultiProgressReporterDiscord("Verifying")
295295
await reporter.show(interaction)
296296

297297
results = await asyncio.gather(

src/discord-cluster-manager/discord_reporter.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,19 @@
11
import discord
22
from discord_utils import _send_split_log
3-
from report import Log, RunProgressReporter, RunResultReport, Text
3+
from report import Log, MultiProgressReporter, RunProgressReporter, RunResultReport, Text
44

55

6-
class MultiProgressReporter:
7-
def __init__(self, interaction: discord.Interaction, header: str):
8-
self.header = header
6+
class MultiProgressReporterDiscord(MultiProgressReporter):
7+
def __init__(self, interaction: discord.Interaction):
8+
self.header = ""
99
self.runs = []
1010
self.interaction = interaction
1111

12-
async def show(self):
12+
async def show(self, title: str):
13+
self.header = title
1314
await self._update_message()
1415

15-
def add_run(self, title: str) -> "RunProgressReporter":
16+
def add_run(self, title: str) -> "RunProgressReporterDiscord":
1617
rpr = RunProgressReporterDiscord(self, self.interaction, title)
1718
self.runs.append(rpr)
1819
return rpr
@@ -34,7 +35,7 @@ async def _update_message(self):
3435
class RunProgressReporterDiscord(RunProgressReporter):
3536
def __init__(
3637
self,
37-
root: MultiProgressReporter,
38+
root: MultiProgressReporterDiscord,
3839
interaction: discord.Interaction,
3940
title: str,
4041
):

0 commit comments

Comments
 (0)