Skip to content

Commit 5ae423f

Browse files
authored
[FEA]: Add pathfinder cudla support (.so, .h) (#1855)
* pathfinder: add cudla and nvcudla support Add pathfinder support for loading ``libcudla.so.1`` from the ``nvidia-cudla`` package and probing ``libnvcudla.so`` through the existing canary subprocess path. Use that probe in the cudla load test so hosts without the platform runtime are skipped, while real ``libcudla.so.1`` load failures still surface when ``libnvcudla.so`` is available. Made-with: Cursor * pathfinder: gate cudla support by machine architecture Mark cudla and nvcudla as aarch64-only descriptors and derive the supported library tables from the current machine as well as the current OS. This keeps those libraries known to pathfinder while reporting them as unavailable on linux-64, and updates the descriptor-registry tests to match the new current-platform filtering model. Made-with: Cursor * pathfinder: skip nvcudla tests when runtime is absent Skip the cudla and nvcudla load tests on aarch64 hosts when the nvcudla canary probe cannot resolve libnvcudla.so. This keeps non-Tegra linux-aarch64 systems from failing strict test runs while still exercising the real success path on Tegra platforms where the platform runtime is installed. Made-with: Cursor * pathfinder: rely on nvcudla runtime probe in tests Remove the machine-architecture gating for cudla and nvcudla so they remain part of the normal Linux descriptor tables. Let the nvcudla canary probe decide whether cudla and nvcudla tests should run, which keeps strict test runs green on hosts without the platform runtime while still exercising real load behavior where libnvcudla.so is available. Made-with: Cursor * pathfinder: share libnvcudla test skip helper Move the libnvcudla.so skip logic into conftest so cudla and nvcudla tests use one shared rule. Keeping the helper in the pytest support layer avoids duplicate test code while still deferring the pathfinder import until the helper runs. Made-with: Cursor * pathfinder: add cudla header lookup support Register cudla as a CTK header so locate_nvidia_header_directory() can find cudla.h in the standard cu13 wheel include directory. In strict header tests, skip cudla on hosts where libnvcudla.so is not available so Tegra setups still exercise the real path without making unsupported hosts fail. Made-with: Cursor * pathfinder: classify cudla as a CTK library Move cudla into the CTK descriptor block so its packaging classification matches how it is shipped in toolkit installs and the optional nvidia-cudla wheel. This keeps the catalog organization consistent with the current understanding of cudla as a CUDA Toolkit component rather than a third-party add-on. Made-with: Cursor * Undo Copyright date change left over after undoing all other intermediate changes.
1 parent c6aea12 commit 5ae423f

8 files changed

Lines changed: 54 additions & 3 deletions

File tree

cuda_pathfinder/cuda/pathfinder/_dynamic_libs/descriptor_catalog.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,12 @@ class DescriptorSpec:
290290
anchor_rel_dirs_windows=("extras/CUPTI/lib64", "bin"),
291291
ctk_root_canary_anchor_libnames=("cudart",),
292292
),
293+
DescriptorSpec(
294+
name="cudla",
295+
packaged_with="ctk",
296+
linux_sonames=("libcudla.so.1",),
297+
site_packages_linux=("nvidia/cu13/lib",),
298+
),
293299
# -----------------------------------------------------------------------
294300
# Third-party / separately packaged libraries
295301
# -----------------------------------------------------------------------
@@ -386,6 +392,11 @@ class DescriptorSpec:
386392
linux_sonames=("libcuda.so.1",),
387393
windows_dlls=("nvcuda.dll",),
388394
),
395+
DescriptorSpec(
396+
name="nvcudla",
397+
packaged_with="driver",
398+
linux_sonames=("libnvcudla.so",),
399+
),
389400
DescriptorSpec(
390401
name="nvml",
391402
packaged_with="driver",

cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,14 +99,18 @@ def _raise_canary_probe_child_process_error(
9999

100100

101101
@functools.cache
102-
def _resolve_system_loaded_abs_path_in_subprocess(libname: str) -> str | None:
102+
def _resolve_system_loaded_abs_path_in_subprocess(
103+
libname: str,
104+
*,
105+
timeout: float = _CANARY_PROBE_TIMEOUT_SECONDS,
106+
) -> str | None:
103107
"""Resolve a canary library's absolute path in a fresh Python subprocess."""
104108
try:
105109
result = subprocess.run( # noqa: S603 - trusted argv: current interpreter + internal probe module
106110
build_dynamic_lib_subprocess_command(MODE_CANARY, libname),
107111
capture_output=True,
108112
text=True,
109-
timeout=_CANARY_PROBE_TIMEOUT_SECONDS,
113+
timeout=timeout,
110114
check=False,
111115
cwd=DYNAMIC_LIB_SUBPROCESS_CWD,
112116
)
@@ -127,6 +131,11 @@ def _resolve_system_loaded_abs_path_in_subprocess(libname: str) -> str | None:
127131
return None
128132

129133

134+
def _loadable_via_canary_subprocess(libname: str, *, timeout: float = _CANARY_PROBE_TIMEOUT_SECONDS) -> bool:
135+
"""Return True if the canary subprocess can resolve ``libname`` via system search."""
136+
return _resolve_system_loaded_abs_path_in_subprocess(libname, timeout=timeout) is not None
137+
138+
130139
def _try_ctk_root_canary(ctx: SearchContext) -> str | None:
131140
"""Try CTK-root canary fallback for descriptor-configured libraries."""
132141
for canary_libname in ctx.desc.ctk_root_canary_anchor_libnames:

cuda_pathfinder/cuda/pathfinder/_headers/header_descriptor_catalog.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,13 @@ class HeaderDescriptorSpec:
134134
site_packages_dirs=("nvidia/cu13/include", "nvidia/cuda_nvcc/nvvm/include"),
135135
anchor_include_rel_dirs=("nvvm/include",),
136136
),
137+
HeaderDescriptorSpec(
138+
name="cudla",
139+
packaged_with="ctk",
140+
header_basename="cudla.h",
141+
site_packages_dirs=("nvidia/cu13/include",),
142+
available_on_windows=False,
143+
),
137144
# -----------------------------------------------------------------------
138145
# Third-party / separately packaged headers
139146
# -----------------------------------------------------------------------

cuda_pathfinder/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ cu13 = [
3636
"cuda-toolkit[cufile]==13.*; sys_platform != 'win32'",
3737
"cutensor-cu13",
3838
"nvidia-cublasmp-cu13; sys_platform != 'win32'",
39+
"nvidia-cudla; platform_system == 'Linux' and platform_machine == 'aarch64'",
3940
"nvidia-cudss-cu13",
4041
"nvidia-cufftmp-cu13; sys_platform != 'win32'",
4142
"nvidia-cusolvermp-cu13; sys_platform != 'win32'",

cuda_pathfinder/tests/conftest.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,15 @@ def _append(message):
2929
request.config.custom_info.append(f"{request.node.name}: {message}")
3030

3131
return _append
32+
33+
34+
def skip_if_missing_libnvcudla_so(libname: str, *, timeout: float) -> None:
35+
if libname not in ("cudla", "nvcudla"):
36+
return
37+
# Keep the import inside the helper so unrelated import issues do not fail
38+
# pytest collection for the whole test suite.
39+
from cuda.pathfinder._dynamic_libs import load_nvidia_dynamic_lib as load_nvidia_dynamic_lib_module
40+
41+
if load_nvidia_dynamic_lib_module._loadable_via_canary_subprocess("nvcudla", timeout=timeout):
42+
return
43+
pytest.skip("libnvcudla.so is not loadable via canary subprocess on this host.")

cuda_pathfinder/tests/test_driver_lib_loading.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
run_load_nvidia_dynamic_lib_in_subprocess,
1717
)
1818

19+
from conftest import skip_if_missing_libnvcudla_so
1920
from cuda.pathfinder._dynamic_libs.lib_descriptor import LIB_DESCRIPTORS
2021
from cuda.pathfinder._dynamic_libs.load_dl_common import DynamicLibNotFoundError, LoadedDL
2122
from cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib import (
@@ -147,6 +148,7 @@ def raise_child_process_failed():
147148
error_label="Load subprocess child process",
148149
)
149150
if payload.status == STATUS_NOT_FOUND:
151+
skip_if_missing_libnvcudla_so(libname, timeout=timeout)
150152
if STRICTNESS == "all_must_work":
151153
raise_child_process_failed()
152154
info_summary_append(f"Not found: {libname=!r}")

cuda_pathfinder/tests/test_find_nvidia_headers.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import pytest
2222

2323
import cuda.pathfinder._headers.find_nvidia_headers as find_nvidia_headers_module
24+
from conftest import skip_if_missing_libnvcudla_so
2425
from cuda.pathfinder import LocatedHeaderDir, find_nvidia_header_directory, locate_nvidia_header_directory
2526
from cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib import (
2627
_resolve_system_loaded_abs_path_in_subprocess,
@@ -158,6 +159,8 @@ def test_locate_ctk_headers(info_summary_append, libname):
158159
h_filename = SUPPORTED_HEADERS_CTK[libname]
159160
assert os.path.isfile(os.path.join(hdr_dir, h_filename))
160161
if STRICTNESS == "all_must_work":
162+
if libname == "cudla":
163+
skip_if_missing_libnvcudla_so(libname, timeout=30)
161164
assert hdr_dir is not None
162165

163166

cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,14 @@
1111
)
1212
from local_helpers import have_distribution
1313

14+
from conftest import skip_if_missing_libnvcudla_so
1415
from cuda.pathfinder import DynamicLibNotAvailableError, DynamicLibUnknownError, load_nvidia_dynamic_lib
1516
from cuda.pathfinder._dynamic_libs import load_nvidia_dynamic_lib as load_nvidia_dynamic_lib_module
1617
from cuda.pathfinder._dynamic_libs import supported_nvidia_libs
17-
from cuda.pathfinder._dynamic_libs.subprocess_protocol import STATUS_NOT_FOUND, parse_dynamic_lib_subprocess_payload
18+
from cuda.pathfinder._dynamic_libs.subprocess_protocol import (
19+
STATUS_NOT_FOUND,
20+
parse_dynamic_lib_subprocess_payload,
21+
)
1822
from cuda.pathfinder._utils.platform_aware import IS_WINDOWS, quote_for_shell
1923

2024
STRICTNESS = os.environ.get("CUDA_PATHFINDER_TEST_LOAD_NVIDIA_DYNAMIC_LIB_STRICTNESS", "see_what_works")
@@ -117,6 +121,7 @@ def raise_child_process_failed():
117121
raise RuntimeError(build_child_process_failed_for_libname_message(libname, result))
118122

119123
if result.returncode != 0:
124+
skip_if_missing_libnvcudla_so(libname, timeout=timeout)
120125
raise_child_process_failed()
121126
assert not result.stderr
122127
payload = parse_dynamic_lib_subprocess_payload(
@@ -125,6 +130,7 @@ def raise_child_process_failed():
125130
error_label="Load subprocess child process",
126131
)
127132
if payload.status == STATUS_NOT_FOUND:
133+
skip_if_missing_libnvcudla_so(libname, timeout=timeout)
128134
if STRICTNESS == "all_must_work" and not _is_expected_load_nvidia_dynamic_lib_failure(libname):
129135
raise_child_process_failed()
130136
info_summary_append(f"Not found: {libname=!r}")

0 commit comments

Comments
 (0)