Use Modal for CI (#311)

Mark Saroufim · web-flow · commit e413950f0dc7 · 2025-07-17T14:29:38.000-07:00
* Update runner_ci.yml

* update

* update

* update

* update

* update

* update

* lint

* update

* update test files

* Trigger CI

* push

* Trigger CI

* update

* ci
diff --git a/.github/workflows/runner_ci.yml b/.github/workflows/runner_ci.yml
@@ -11,59 +11,25 @@ on:
       - dev
 
 jobs:
-  check-cuda:
-    runs-on: [gpumode-nvidia-arc]
-    timeout-minutes: 10
-    container:
-      image: nvidia/cuda:12.4.0-devel-ubuntu22.04
+  check-modal:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
     steps:
       - uses: actions/checkout@v3
 
       - name: Setup Python
         uses: actions/setup-python@v5
         with:
-          python-version: '3.10'
+          python-version: '3.13'
 
-      - name: Install pytest
-        shell: bash
-        run: pip install pytest
-
-      - name: Run script
-        shell: bash
-        run: pytest scripts/ci_test_cuda.py
-
-    env:
-      CUDA_VISIBLE_DEVICES: 0
-
-  check-pytorch:
-    runs-on: [gpumode-nvidia-arc]
-    timeout-minutes: 10
-    container:
-      image: nvidia/cuda:12.4.0-devel-ubuntu22.04
-    steps:
-      - uses: actions/checkout@v3
-
-      - name: Setup Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: '3.10'
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v3
-        with:
-          version: "latest"
-
-      - name: Setup Python environment
+      - name: Install dependencies
         run: |
-          uv venv .venv
-          echo "VIRTUAL_ENV=$PWD/.venv" >> $GITHUB_ENV
-          echo "$PWD/.venv/bin" >> $GITHUB_PATH
-          uv pip install numpy torch setuptools ninja pytest
-
-      - name: Run script
-        shell: bash
-        run: pytest scripts/ci_test_python.py
-
-    env:
-      CUDA_VISIBLE_DEVICES: 0
+          pip install modal
+          pip install -r requirements.txt
 
+      - name: Run Modal tests
+        run: |
+          python scripts/modal_ci_test.py
+        env:
+          MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
+          MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
diff --git a/requirements.txt b/requirements.txt
@@ -10,6 +10,7 @@ yoyo-migrations
 ruff
 pre-commit
 better_profanity
+pytest
 
 # api
 fastapi[all] # install all to avoid random bugs
diff --git a/scripts/modal_ci_test.py b/scripts/modal_ci_test.py
@@ -0,0 +1,174 @@
+#!/usr/bin/env python3
+"""
+Modal CI test runner - runs key test scenarios on Modal
+"""
+import os
+import sys
+from pathlib import Path
+
+import modal
+
+# Change to the correct directory
+if os.path.basename(os.getcwd()) == "scripts":
+    os.chdir("..")
+
+# Add the src directory to Python path for Modal deserialization
+sys.path.append("src/discord-cluster-manager")
+
+from consts import SubmissionMode
+from task import build_task_config, make_task_definition
+
+
+def test_cuda_correct():
+    """Test that correct CUDA submission passes"""
+    print("Testing CUDA correct submission...")
+
+    func = modal.Function.from_name("discord-bot-runner", "run_cuda_script_t4")
+    task = make_task_definition("examples/identity_cuda")
+    submission_cu = Path("examples/identity_cuda/submission.cu").read_text()
+
+    config = build_task_config(
+        task=task.task,
+        submission_content=submission_cu,
+        arch=None,
+        mode=SubmissionMode.TEST,
+    )
+
+    result = func.remote(config=config)
+
+    if not result.success:
+        raise Exception(f"CUDA test failed: {result.error}")
+
+    # Check if any test runs failed
+    for run_name, run_result in result.runs.items():
+        if run_result.run and not run_result.run.success:
+            raise Exception(f"Test run {run_name} failed")
+
+    print("✅ CUDA correct submission passed")
+
+
+def test_cuda_validation_fail():
+    """Test that incorrect CUDA submission fails validation"""
+    print("Testing CUDA validation failure...")
+
+    func = modal.Function.from_name("discord-bot-runner", "run_cuda_script_t4")
+    task = make_task_definition("examples/identity_cuda")
+
+    # no-op submission that should fail validation
+    submission_cu = """
+    #include "task.h"
+
+    output_t custom_kernel(input_t data)
+    {
+        output_t result;
+        result.resize(data.size());
+        return result;
+    }
+    """
+
+    config = build_task_config(
+        task=task.task,
+        submission_content=submission_cu,
+        arch=None,
+        mode=SubmissionMode.TEST,
+    )
+
+    result = func.remote(config=config)
+
+    if not result.success:
+        raise Exception(f"CUDA test failed to execute: {result.error}")
+
+    # Should have a test run that fails validation
+    test_run = result.runs.get("test")
+    if not test_run or not test_run.run:
+        raise Exception("No test run found")
+
+    if test_run.run.passed:
+        raise Exception("Expected validation failure but test passed")
+
+    print("✅ CUDA validation failure test passed")
+
+
+def test_pytorch_correct():
+    """Test that correct PyTorch submission passes"""
+    print("Testing PyTorch correct submission...")
+
+    func = modal.Function.from_name("discord-bot-runner", "run_pytorch_script_t4")
+    task = make_task_definition("examples/identity_py")
+    submission_py = Path("examples/identity_py/submission.py").read_text()
+
+    config = build_task_config(
+        task=task.task,
+        submission_content=submission_py,
+        arch=None,
+        mode=SubmissionMode.TEST,
+    )
+
+    result = func.remote(config=config)
+
+    if not result.success:
+        raise Exception(f"PyTorch test failed: {result.error}")
+
+    # Check if any test runs failed
+    for run_name, run_result in result.runs.items():
+        if run_result.run and not run_result.run.success:
+            raise Exception(f"Test run {run_name} failed")
+
+    print("✅ PyTorch correct submission passed")
+
+
+def test_pytorch_validation_fail():
+    """Test that incorrect PyTorch submission fails validation"""
+    print("Testing PyTorch validation failure...")
+
+    func = modal.Function.from_name("discord-bot-runner", "run_pytorch_script_t4")
+    task = make_task_definition("examples/identity_py")
+
+    # no-op submission that should fail validation
+    submission_py = """
+import torch
+def custom_kernel(input):
+    return torch.zeros_like(input)
+    """
+
+    config = build_task_config(
+        task=task.task,
+        submission_content=submission_py,
+        arch=None,
+        mode=SubmissionMode.TEST,
+    )
+
+    result = func.remote(config=config)
+
+    if not result.success:
+        raise Exception(f"PyTorch test failed to execute: {result.error}")
+
+    # Should have a test run that fails validation
+    test_run = result.runs.get("test")
+    if not test_run or not test_run.run:
+        raise Exception("No test run found")
+
+    if test_run.run.passed:
+        raise Exception("Expected validation failure but test passed")
+
+    print("✅ PyTorch validation failure test passed")
+
+
+def main():
+    """Run all Modal tests"""
+    print("Running Modal CI tests...")
+
+    try:
+        test_cuda_correct()
+        test_cuda_validation_fail()
+        test_pytorch_correct()
+        test_pytorch_validation_fail()
+
+        print("\n🎉 All Modal tests passed!")
+    except Exception as e:
+        print(f"\n❌ Test failed: {e}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/discord-cluster-manager/modal_runner.py b/src/discord-cluster-manager/modal_runner.py
@@ -28,16 +28,18 @@
         "requests~=2.32.4",
         "packaging~=25.0",
         "numpy~=2.3",
+        "pytest",
+
     )
     .pip_install(
         "torch~=2.7",
         "torchvision~=0.22",
         "torchaudio~=2.7",
-        index_url="https://download.pytorch.org/whl/cu128"
+        index_url="https://download.pytorch.org/whl/cu128",
     )
     # other frameworks
     .pip_install(
-        "jax[cuda12]==0.5.3",   # 0.6 want's cudnn 9.8 in conflict with torch 2.7
+        "jax[cuda12]==0.5.3",  # 0.6 want's cudnn 9.8 in conflict with torch 2.7
         "jax2torch==0.0.7",
         "tinygrad~=0.10",
     )
@@ -47,8 +49,8 @@
         "nvidia-cutlass-dsl~=4.0",
         "cuda-core[cu12]~=0.3",
         "cuda-python[all]==12.8",
-        #"nvmath-python[cu12]~=0.4",
-        #"numba-cuda[cu12]~=0.15",
+        # "nvmath-python[cu12]~=0.4",
+        # "numba-cuda[cu12]~=0.15",
     )
 )
 
diff --git a/src/discord-cluster-manager/task.py b/src/discord-cluster-manager/task.py
@@ -191,4 +191,5 @@ def build_task_config(
             "sources": sources,
             "headers": headers,
             "include_dirs": task.config.include_dirs,
+            **common,
         }

Original file line number	Diff line number	Diff line change
`@@ -191,4 +191,5 @@ def build_task_config(`
`191`	`191`	`"sources": sources,`
`192`	`192`	`"headers": headers,`
`193`	`193`	`"include_dirs": task.config.include_dirs,`
	`194`	`+ **common,`
`194`	`195`	`}`