Skip to content

Commit e413950

Browse files
author
Mark Saroufim
authored
Use Modal for CI (#311)
* Update runner_ci.yml * update * update * update * update * update * update * lint * update * update test files * Trigger CI * push * Trigger CI * update * ci
1 parent 3898c38 commit e413950

5 files changed

Lines changed: 195 additions & 51 deletions

File tree

.github/workflows/runner_ci.yml

Lines changed: 13 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -11,59 +11,25 @@ on:
1111
- dev
1212

1313
jobs:
14-
check-cuda:
15-
runs-on: [gpumode-nvidia-arc]
16-
timeout-minutes: 10
17-
container:
18-
image: nvidia/cuda:12.4.0-devel-ubuntu22.04
14+
check-modal:
15+
runs-on: ubuntu-latest
16+
timeout-minutes: 30
1917
steps:
2018
- uses: actions/checkout@v3
2119

2220
- name: Setup Python
2321
uses: actions/setup-python@v5
2422
with:
25-
python-version: '3.10'
23+
python-version: '3.13'
2624

27-
- name: Install pytest
28-
shell: bash
29-
run: pip install pytest
30-
31-
- name: Run script
32-
shell: bash
33-
run: pytest scripts/ci_test_cuda.py
34-
35-
env:
36-
CUDA_VISIBLE_DEVICES: 0
37-
38-
check-pytorch:
39-
runs-on: [gpumode-nvidia-arc]
40-
timeout-minutes: 10
41-
container:
42-
image: nvidia/cuda:12.4.0-devel-ubuntu22.04
43-
steps:
44-
- uses: actions/checkout@v3
45-
46-
- name: Setup Python
47-
uses: actions/setup-python@v5
48-
with:
49-
python-version: '3.10'
50-
51-
- name: Install uv
52-
uses: astral-sh/setup-uv@v3
53-
with:
54-
version: "latest"
55-
56-
- name: Setup Python environment
25+
- name: Install dependencies
5726
run: |
58-
uv venv .venv
59-
echo "VIRTUAL_ENV=$PWD/.venv" >> $GITHUB_ENV
60-
echo "$PWD/.venv/bin" >> $GITHUB_PATH
61-
uv pip install numpy torch setuptools ninja pytest
62-
63-
- name: Run script
64-
shell: bash
65-
run: pytest scripts/ci_test_python.py
66-
67-
env:
68-
CUDA_VISIBLE_DEVICES: 0
27+
pip install modal
28+
pip install -r requirements.txt
6929
30+
- name: Run Modal tests
31+
run: |
32+
python scripts/modal_ci_test.py
33+
env:
34+
MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
35+
MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ yoyo-migrations
1010
ruff
1111
pre-commit
1212
better_profanity
13+
pytest
1314

1415
# api
1516
fastapi[all] # install all to avoid random bugs

scripts/modal_ci_test.py

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Modal CI test runner - runs key test scenarios on Modal
4+
"""
5+
import os
6+
import sys
7+
from pathlib import Path
8+
9+
import modal
10+
11+
# Change to the correct directory
12+
if os.path.basename(os.getcwd()) == "scripts":
13+
os.chdir("..")
14+
15+
# Add the src directory to Python path for Modal deserialization
16+
sys.path.append("src/discord-cluster-manager")
17+
18+
from consts import SubmissionMode
19+
from task import build_task_config, make_task_definition
20+
21+
22+
def test_cuda_correct():
23+
"""Test that correct CUDA submission passes"""
24+
print("Testing CUDA correct submission...")
25+
26+
func = modal.Function.from_name("discord-bot-runner", "run_cuda_script_t4")
27+
task = make_task_definition("examples/identity_cuda")
28+
submission_cu = Path("examples/identity_cuda/submission.cu").read_text()
29+
30+
config = build_task_config(
31+
task=task.task,
32+
submission_content=submission_cu,
33+
arch=None,
34+
mode=SubmissionMode.TEST,
35+
)
36+
37+
result = func.remote(config=config)
38+
39+
if not result.success:
40+
raise Exception(f"CUDA test failed: {result.error}")
41+
42+
# Check if any test runs failed
43+
for run_name, run_result in result.runs.items():
44+
if run_result.run and not run_result.run.success:
45+
raise Exception(f"Test run {run_name} failed")
46+
47+
print("✅ CUDA correct submission passed")
48+
49+
50+
def test_cuda_validation_fail():
51+
"""Test that incorrect CUDA submission fails validation"""
52+
print("Testing CUDA validation failure...")
53+
54+
func = modal.Function.from_name("discord-bot-runner", "run_cuda_script_t4")
55+
task = make_task_definition("examples/identity_cuda")
56+
57+
# no-op submission that should fail validation
58+
submission_cu = """
59+
#include "task.h"
60+
61+
output_t custom_kernel(input_t data)
62+
{
63+
output_t result;
64+
result.resize(data.size());
65+
return result;
66+
}
67+
"""
68+
69+
config = build_task_config(
70+
task=task.task,
71+
submission_content=submission_cu,
72+
arch=None,
73+
mode=SubmissionMode.TEST,
74+
)
75+
76+
result = func.remote(config=config)
77+
78+
if not result.success:
79+
raise Exception(f"CUDA test failed to execute: {result.error}")
80+
81+
# Should have a test run that fails validation
82+
test_run = result.runs.get("test")
83+
if not test_run or not test_run.run:
84+
raise Exception("No test run found")
85+
86+
if test_run.run.passed:
87+
raise Exception("Expected validation failure but test passed")
88+
89+
print("✅ CUDA validation failure test passed")
90+
91+
92+
def test_pytorch_correct():
93+
"""Test that correct PyTorch submission passes"""
94+
print("Testing PyTorch correct submission...")
95+
96+
func = modal.Function.from_name("discord-bot-runner", "run_pytorch_script_t4")
97+
task = make_task_definition("examples/identity_py")
98+
submission_py = Path("examples/identity_py/submission.py").read_text()
99+
100+
config = build_task_config(
101+
task=task.task,
102+
submission_content=submission_py,
103+
arch=None,
104+
mode=SubmissionMode.TEST,
105+
)
106+
107+
result = func.remote(config=config)
108+
109+
if not result.success:
110+
raise Exception(f"PyTorch test failed: {result.error}")
111+
112+
# Check if any test runs failed
113+
for run_name, run_result in result.runs.items():
114+
if run_result.run and not run_result.run.success:
115+
raise Exception(f"Test run {run_name} failed")
116+
117+
print("✅ PyTorch correct submission passed")
118+
119+
120+
def test_pytorch_validation_fail():
121+
"""Test that incorrect PyTorch submission fails validation"""
122+
print("Testing PyTorch validation failure...")
123+
124+
func = modal.Function.from_name("discord-bot-runner", "run_pytorch_script_t4")
125+
task = make_task_definition("examples/identity_py")
126+
127+
# no-op submission that should fail validation
128+
submission_py = """
129+
import torch
130+
def custom_kernel(input):
131+
return torch.zeros_like(input)
132+
"""
133+
134+
config = build_task_config(
135+
task=task.task,
136+
submission_content=submission_py,
137+
arch=None,
138+
mode=SubmissionMode.TEST,
139+
)
140+
141+
result = func.remote(config=config)
142+
143+
if not result.success:
144+
raise Exception(f"PyTorch test failed to execute: {result.error}")
145+
146+
# Should have a test run that fails validation
147+
test_run = result.runs.get("test")
148+
if not test_run or not test_run.run:
149+
raise Exception("No test run found")
150+
151+
if test_run.run.passed:
152+
raise Exception("Expected validation failure but test passed")
153+
154+
print("✅ PyTorch validation failure test passed")
155+
156+
157+
def main():
158+
"""Run all Modal tests"""
159+
print("Running Modal CI tests...")
160+
161+
try:
162+
test_cuda_correct()
163+
test_cuda_validation_fail()
164+
test_pytorch_correct()
165+
test_pytorch_validation_fail()
166+
167+
print("\n🎉 All Modal tests passed!")
168+
except Exception as e:
169+
print(f"\n❌ Test failed: {e}")
170+
sys.exit(1)
171+
172+
173+
if __name__ == "__main__":
174+
main()

src/discord-cluster-manager/modal_runner.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,18 @@
2828
"requests~=2.32.4",
2929
"packaging~=25.0",
3030
"numpy~=2.3",
31+
"pytest",
32+
3133
)
3234
.pip_install(
3335
"torch~=2.7",
3436
"torchvision~=0.22",
3537
"torchaudio~=2.7",
36-
index_url="https://download.pytorch.org/whl/cu128"
38+
index_url="https://download.pytorch.org/whl/cu128",
3739
)
3840
# other frameworks
3941
.pip_install(
40-
"jax[cuda12]==0.5.3", # 0.6 want's cudnn 9.8 in conflict with torch 2.7
42+
"jax[cuda12]==0.5.3", # 0.6 want's cudnn 9.8 in conflict with torch 2.7
4143
"jax2torch==0.0.7",
4244
"tinygrad~=0.10",
4345
)
@@ -47,8 +49,8 @@
4749
"nvidia-cutlass-dsl~=4.0",
4850
"cuda-core[cu12]~=0.3",
4951
"cuda-python[all]==12.8",
50-
#"nvmath-python[cu12]~=0.4",
51-
#"numba-cuda[cu12]~=0.15",
52+
# "nvmath-python[cu12]~=0.4",
53+
# "numba-cuda[cu12]~=0.15",
5254
)
5355
)
5456

src/discord-cluster-manager/task.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,4 +191,5 @@ def build_task_config(
191191
"sources": sources,
192192
"headers": headers,
193193
"include_dirs": task.config.include_dirs,
194+
**common,
194195
}

0 commit comments

Comments
 (0)