Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
2c763bc
Added Ci to test CUDA and OMP notebooks
HrisShterev May 21, 2026
2394f29
fix to ci and py script
HrisShterev May 28, 2026
335b747
fix to 10th notebook
HrisShterev May 29, 2026
d16511f
addedd prepare-dell to ci
HrisShterev May 30, 2026
5571788
Add prepare-dell job to check-notebooks workflow
HrisShterev May 30, 2026
ee30a7c
adding parsing milliseconds to the testing
HrisShterev May 30, 2026
0277a61
Disable micromamba download in workflow
HrisShterev May 30, 2026
e08266c
only activating the micromamba enviorment instead of setting it up
HrisShterev May 30, 2026
2abea07
Fix formatting in check-notebooks.yml
HrisShterev May 30, 2026
ca8aa68
Fix whitespace in check-notebooks.yml
HrisShterev May 30, 2026
6a62743
setting up a micromamba env manually
HrisShterev May 30, 2026
03eb186
fixed update env to create env
HrisShterev May 30, 2026
30fb804
fixed hardware cell error
HrisShterev May 30, 2026
ffbea56
run notebooks from their own directory
HrisShterev May 30, 2026
90fcd80
added missing comma
HrisShterev May 30, 2026
ede29fc
Update .github/workflows/check-notebooks.yml
HrisShterev May 31, 2026
8e12608
Update .github/workflows/check-notebooks.yml
HrisShterev May 31, 2026
4e70a90
Update .github/workflows/check-notebooks.yml
HrisShterev May 31, 2026
b30c5de
Update .github/workflows/check-notebooks.yml
HrisShterev May 31, 2026
de6feee
Error tests. Forced error for 06-CUDA and 05-OMP notebooks
HrisShterev May 31, 2026
39c45e2
Removed forced erros
HrisShterev May 31, 2026
d08331b
changes to cuda notebooks
HrisShterev Jun 1, 2026
f2628c8
fixed images + added credits to the cuda by example book
HrisShterev Jun 2, 2026
cf69657
removed cuda image + changes omp one
HrisShterev Jun 3, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 89 additions & 5 deletions .github/workflows/check-notebooks.yml
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
name: Check C++ Notebooks
name: CI

on:
push:
pull_request:
workflow_dispatch:

jobs:
execute-notebooks:
name: Execute C++ Notebooks
execute-notebooks-cpp:
name: C++
runs-on: ubuntu-latest
defaults:
run:
shell: bash -el {0}

steps:
- name: Checkout repository
uses: actions/checkout@v4
Expand All @@ -29,7 +28,7 @@ jobs:
- name: Run C++ Tests via Pytest
run: |
mkdir -p executed
$CONDA_PREFIX/bin/pytest tests/test_notebooks.py -sv
$CONDA_PREFIX/bin/pytest tests/test_notebooks.py::CppNotebookTests -sv

- name: Upload executed notebooks as artifact
if: always()
Expand All @@ -38,3 +37,88 @@ jobs:
name: executed-cpp-notebooks
path: executed/
if-no-files-found: ignore

execute-notebooks-openmp:
name: OpenMP
runs-on: ubuntu-latest
defaults:
run:
shell: bash -el {0}
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up micromamba environment
uses: mamba-org/setup-micromamba@v2
with:
environment-file: environment.yml
cache-environment: true

- name: List available kernels
run: jupyter kernelspec list

- name: Run OpenMP notebook tests
run: |
mkdir -p executed

# Force OpenMP to spawn 8 threads to match your reference .ipynb files
export OMP_NUM_THREADS=8

LD_PRELOAD="$CONDA_PREFIX/lib/libomp.so" \
$CONDA_PREFIX/bin/pytest tests/test_notebooks.py::OpenMPNotebookTests -sv

- name: Upload executed OpenMP notebooks as artifact
if: always()
uses: actions/upload-artifact@v4
with:
name: executed-openmp-notebooks
path: executed/
if-no-files-found: ignore

prepare-dell:
runs-on: [self-hosted, spotter]
steps:
- uses: compiler-research/ci-workflows/actions/wake-on-lan@main
with:
mac: a4:bb:6d:51:d5:d2
target-host: 192.168.100.30

execute-notebooks-cuda:
name: CUDA
needs: prepare-dell
runs-on: [self-hosted, cuda]
defaults:
run:
shell: bash -el {0}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up micromamba environment
run: |
/root/micromamba-bin/micromamba create -n livecpp-ci -f environment.yml --yes
echo "CONDA_PREFIX=/root/.local/share/mamba/envs/livecpp-ci" >> $GITHUB_ENV

- name: Install CUDA kernel
run: |
cp -r $CONDA_PREFIX/share/jupyter/kernels/xcpp23 $CONDA_PREFIX/share/jupyter/kernels/xcpp23-cuda
python3 -c "
import json
with open('$CONDA_PREFIX/share/jupyter/kernels/xcpp23-cuda/kernel.json') as f:
k = json.load(f)
k['argv'] += ['--cuda', '--cuda-path=$CONDA_PREFIX/targets/x86_64-linux']
with open('$CONDA_PREFIX/share/jupyter/kernels/xcpp23-cuda/kernel.json', 'w') as f:
json.dump(k, f, indent=2)
"

- name: Run CUDA Tests via Pytest
run: |
mkdir -p executed
$CONDA_PREFIX/bin/pytest tests/test_notebooks.py::CudaNotebookTests -sv

- name: Upload executed notebooks as artifact
if: always()
uses: actions/upload-artifact@v4
with:
name: executed-cuda-notebooks
path: executed/
if-no-files-found: ignore
203 changes: 203 additions & 0 deletions cuda/01_Intoduction-to-cuda.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "b",
"metadata": {},
"outputs": [],
"source": [
"#undef __noinline__"
]
},
{
"cell_type": "markdown",
"id": "a",
"metadata": {},
"source": [
"# Introduction to CUDA\n",
"\n",
"CUDA lets you run code directly on the GPU by writing special functions called **kernels**.\n",
"This notebook walks through the simplest possible examples to get you comfortable with the\n",
"three things every CUDA program has to do:\n",
"\n",
"1. Move data onto the GPU\n",
"2. Launch a kernel to process it\n",
"3. Move the result back to the CPU\n",
"\n",
"**Some examples in this series are inspired by concepts from CUDA by Example by Jason Sanders and Edward Kandrot. [Link to the book](https://books.google.bg/books?id=Om8JRAAACAAJ&redir_esc=y)**\n",
"\n",
"---\n",
"\n",
"## Part 1 — Adding two numbers on the GPU\n",
"\n",
"The `__global__` keyword tells the compiler that this function runs on the GPU but is called from the CPU.\n",
"The result can't be returned normally, so we write it through a pointer that lives in GPU memory."
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "bf2ceb5e",
"metadata": {},
"outputs": [],
"source": [
"__global__ void gpu_add(int a, int b, int *result) {\n",
" *result = a + b;\n",
"}"
]
},
{
"cell_type": "markdown",
"id": "df01c907",
"metadata": {},
"source": [
"To call it we use the `<<<blocks, threads>>>` launch syntax — `<<<1,1>>>` means one block,\n",
"one thread. Before the call we need a place in GPU memory to hold the answer, which we get\n",
"with `cudaMalloc`. After the kernel finishes we pull the value back with `cudaMemcpy`."
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "dce266d8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"5 + 9 = 14\n"
]
}
],
"source": [
"#include <cstdio>\n",
"\n",
"int host_result;\n",
"int *dev_result;\n",
"\n",
"cudaMalloc((void**)&dev_result, sizeof(int));\n",
"\n",
"gpu_add<<<1, 1>>>(5, 9, dev_result);\n",
"\n",
"cudaMemcpy(&host_result, dev_result, sizeof(int), cudaMemcpyDeviceToHost);\n",
"\n",
"printf(\"5 + 9 = %d\\n\", host_result);\n",
"cudaFree(dev_result);"
]
},
{
"cell_type": "markdown",
"id": "e67df804",
"metadata": {},
"source": [
"---\n",
"\n",
"## Part 2 — Adding two arrays in parallel\n",
"\n",
"A single GPU thread is no faster than the CPU. The power comes from launching **many threads at once**,\n",
"each one handling one element independently.\n",
"\n",
"Below, the kernel adds a single pair of elements. The index it operates on comes from\n",
"`blockIdx.x` — the block number — so launching N blocks gives us N simultaneous additions.\n",
"\n",
"![Vector_Add_Model](images/vectoradd.png)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "92148462",
"metadata": {},
"outputs": [],
"source": [
"#define N 10\n",
"\n",
"__global__ void add_vectors(int *a, int *b, int *c) {\n",
" int i = blockIdx.x; // each block handles one element\n",
" if (i < N)\n",
" c[i] = a[i] + b[i];\n",
"}"
]
},
{
"cell_type": "markdown",
"id": "458bea36",
"metadata": {},
"source": [
"We allocate three arrays on the GPU, copy the inputs across, launch `N` blocks, then bring\n",
"the result back."
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "93d16f4d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 + 0 = 0\n",
"1 + 1 = 2\n",
"2 + 4 = 6\n",
"3 + 9 = 12\n",
"4 + 16 = 20\n",
"5 + 25 = 30\n",
"6 + 36 = 42\n",
"7 + 49 = 56\n",
"8 + 64 = 72\n",
"9 + 81 = 90\n"
]
}
],
"source": [
"int h_in1[N], h_in2[N], h_out[N];\n",
"int *d_in1, *d_in2, *d_out;\n",
"\n",
"cudaMalloc((void**)&d_in1, N * sizeof(int));\n",
"cudaMalloc((void**)&d_in2, N * sizeof(int));\n",
"cudaMalloc((void**)&d_out, N * sizeof(int));\n",
"\n",
"for (int i = 0; i < N; i++) {\n",
" h_in1[i] = i;\n",
" h_in2[i] = i * i;\n",
"}\n",
"\n",
"cudaMemcpy(d_in1, h_in1, N * sizeof(int), cudaMemcpyHostToDevice);\n",
"cudaMemcpy(d_in2, h_in2, N * sizeof(int), cudaMemcpyHostToDevice);\n",
"\n",
"add_vectors<<<N, 1>>>(d_in1, d_in2, d_out);\n",
"\n",
"cudaMemcpy(h_out, d_out, N * sizeof(int), cudaMemcpyDeviceToHost);\n",
"\n",
"for (int i = 0; i < N; i++)\n",
" printf(\"%d + %d = %d\\n\", h_in1[i], h_in2[i], h_out[i]);\n",
"\n",
"cudaFree(d_in1);\n",
"cudaFree(d_in2);\n",
"cudaFree(d_out);"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "C++23 CUDA",
"language": "cpp",
"name": "xcpp23-cuda"
},
"language_info": {
"codemirror_mode": "text/x-c++src",
"file_extension": ".cpp",
"mimetype": "text/x-c++src",
"name": "CUDA",
"nbconvert_exporter": "",
"pygments_lexer": "",
"version": "cxx23"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading
Loading