Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
7f56f01
use pathfinder naively for dynamic libs
brandon-b-miller Jun 17, 2025
c2f7611
other needed cuda components
brandon-b-miller Jun 24, 2025
977c8c9
a comment
brandon-b-miller Jun 24, 2025
2765eae
merge/resolve/pass
brandon-b-miller Oct 15, 2025
738702c
deps
brandon-b-miller Oct 15, 2025
2bfa543
updates
brandon-b-miller Oct 16, 2025
a0756f7
simpler
brandon-b-miller Oct 16, 2025
03cddf2
clean
brandon-b-miller Oct 16, 2025
0ab6939
small fixes
brandon-b-miller Oct 17, 2025
ba99bbe
merge/resolve
brandon-b-miller Jan 9, 2026
64ab7a9
fixi
brandon-b-miller Jan 9, 2026
109e3e6
fallback to old logic for nvvm
brandon-b-miller Jan 9, 2026
6b13895
merge/resolve
brandon-b-miller Jan 12, 2026
074f63d
greptile
brandon-b-miller Jan 12, 2026
ab9c363
none case
brandon-b-miller Jan 12, 2026
aace79c
Merge branch 'main' into cuda-core-pathfinder
brandon-b-miller Jan 12, 2026
d657f9a
remove commented code
brandon-b-miller Jan 12, 2026
aa4d58a
Merge branch 'main' into cuda-core-pathfinder
brandon-b-miller Jan 12, 2026
a9d1c07
small changes
brandon-b-miller Jan 12, 2026
7251a21
Merge branch 'main' into cuda-core-pathfinder
brandon-b-miller Jan 20, 2026
f49647b
_get_nvvm from review/test
brandon-b-miller Jan 20, 2026
32178c0
fix awkward job
brandon-b-miller Jan 20, 2026
0b822ff
fix
brandon-b-miller Jan 20, 2026
8630f41
fix
brandon-b-miller Jan 20, 2026
7d84f93
minor updates
brandon-b-miller Jan 20, 2026
f88abb0
code cleanup
brandon-b-miller Jan 20, 2026
71b800a
suggestion from greptile
brandon-b-miller Jan 20, 2026
ae330ca
address reviews
brandon-b-miller Jan 20, 2026
7da093f
Update pyproject.toml
brandon-b-miller Jan 20, 2026
b87fab2
address reviews
brandon-b-miller Jan 21, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions ci/test_thirdparty_awkward.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
set -euo pipefail

CUDA_VER_MAJOR_MINOR=${CUDA_VER%.*}
CUDA_VER_MAJOR=${CUDA_VER%.*.*}
AWKWARD_VERSION="2.8.10"

rapids-logger "Install awkward and related libraries"
Expand All @@ -15,10 +16,7 @@ rapids-logger "Install wheel with test dependencies"
package=$(realpath "${NUMBA_CUDA_ARTIFACTS_DIR}"/*.whl)
echo "Package path: ${package}"
python -m pip install \
"${package}" \
"cuda-python==${CUDA_VER_MAJOR_MINOR%.*}.*" \
"cuda-core" \
"nvidia-nvjitlink-cu12" \
"${package}[cu${CUDA_VER_MAJOR}]" \
--group test
Comment thread
brandon-b-miller marked this conversation as resolved.
Comment thread
brandon-b-miller marked this conversation as resolved.


Expand Down
285 changes: 108 additions & 177 deletions numba_cuda/numba/cuda/cuda_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
from numba.cuda.core.config import IS_WIN32
from numba.cuda.misc.findlib import find_lib
from numba.cuda import config
from cuda import pathfinder
Comment thread
brandon-b-miller marked this conversation as resolved.
import pathlib
from contextlib import contextmanager

_env_path_tuple = namedtuple("_env_path_tuple", ["by", "info"])

Expand All @@ -20,6 +23,20 @@
]


@contextmanager
def temporary_env_var(key, value):
"""Context manager to temporarily set an environment variable."""
old_value = os.environ.get(key)
os.environ[key] = value
try:
yield
finally:
if old_value is None:
os.environ.pop(key, None)
else:
os.environ[key] = old_value


def _get_distribution(distribution_name):
"""Get the distribution path using importlib.metadata, returning None if not found."""
try:
Expand Down Expand Up @@ -83,112 +100,6 @@ def _get_libdevice_path_decision():
return _find_first_valid_lazy(options)


def _get_nvvm_path_decision():
options = _build_options(
[
("Conda environment", _get_nvvm_conda_path),
("NVIDIA NVCC Wheel", _get_nvvm_wheel_path),
("CUDA_HOME", _get_nvvm_cuda_home_path),
("System", _get_nvvm_system_path),
]
)
return _find_first_valid_lazy(options)


def _get_nvrtc_path_decision():
options = _build_options(
[
("Conda environment", get_conda_ctk_libdir),
("NVIDIA NVCC Wheel", _get_nvrtc_wheel_libdir),
("CUDA_HOME", get_cuda_home_libdir),
("System", get_system_ctk_libdir),
]
)
return _find_first_valid_lazy(options)


def _get_nvvm_wheel_path():
dso_path = None
# CUDA 12
nvcc_distribution = _get_distribution("nvidia-cuda-nvcc-cu12")
if nvcc_distribution is not None:
site_packages_path = nvcc_distribution.locate_file("")
nvvm_lib_dir = os.path.join(
site_packages_path,
"nvidia",
"cuda_nvcc",
"nvvm",
"bin" if IS_WIN32 else "lib64",
)
dso_path = os.path.join(
nvvm_lib_dir, "nvvm64_40_0.dll" if IS_WIN32 else "libnvvm.so"
)

# CUDA 13
if dso_path is None:
nvcc_distribution = _get_distribution("nvidia-nvvm")
if (
nvcc_distribution is not None
and nvcc_distribution.version.startswith("13.")
):
site_packages_path = nvcc_distribution.locate_file("")
nvvm_lib_dir = os.path.join(
site_packages_path,
"nvidia",
"cu13",
"bin" if IS_WIN32 else "lib",
"x86_64" if IS_WIN32 else "",
)
dso_path = os.path.join(
nvvm_lib_dir, "nvvm64_40_0.dll" if IS_WIN32 else "libnvvm.so.4"
)

if dso_path and os.path.isfile(dso_path):
return dso_path
return None


def _get_nvrtc_wheel_libdir():
dso_path = None
# CUDA 12
nvrtc_distribution = _get_distribution("nvidia-cuda-nvrtc-cu12")
if nvrtc_distribution is not None:
site_packages_path = nvrtc_distribution.locate_file("")
nvrtc_lib_dir = os.path.join(
site_packages_path,
"nvidia",
"cuda_nvrtc",
"bin" if IS_WIN32 else "lib",
)
dso_path = os.path.join(
nvrtc_lib_dir, "nvrtc64_120_0.dll" if IS_WIN32 else "libnvrtc.so.12"
)

# CUDA 13
if dso_path is None:
nvrtc_distribution = _get_distribution("nvidia-cuda-nvrtc")
if (
nvrtc_distribution is not None
and nvrtc_distribution.version.startswith("13.")
):
site_packages_path = nvrtc_distribution.locate_file("")
nvrtc_lib_dir = os.path.join(
site_packages_path,
"nvidia",
"cu13",
"bin" if IS_WIN32 else "lib",
"x86_64" if IS_WIN32 else "",
)
dso_path = os.path.join(
nvrtc_lib_dir,
"nvrtc64_130_0.dll" if IS_WIN32 else "libnvrtc.so.13",
)

if dso_path and os.path.isfile(dso_path):
return os.path.dirname(dso_path)
return None


def _get_libdevice_path():
by, out = _get_libdevice_path_decision()
if not out:
Expand Down Expand Up @@ -321,22 +232,6 @@ def get_system_ctk_include():
return None


def _get_nvvm_system_path():
nvvm_lib_dir = get_system_ctk("nvvm")
if nvvm_lib_dir is None:
return None
nvvm_lib_dir = os.path.join(nvvm_lib_dir, "bin" if IS_WIN32 else "lib64")
if IS_WIN32 and os.path.isdir(os.path.join(nvvm_lib_dir, "x64")):
nvvm_lib_dir = os.path.join(nvvm_lib_dir, "x64")

nvvm_path = os.path.join(
nvvm_lib_dir, "nvvm64_40_0.dll" if IS_WIN32 else "libnvvm.so.4"
)
# if os.path.isfile(nvvm_path):
# return nvvm_path
return nvvm_path


def get_conda_ctk_libdir():
"""Return path to directory containing the shared libraries of cudatoolkit."""
is_conda_env = os.path.isdir(os.path.join(sys.prefix, "conda-meta"))
Expand Down Expand Up @@ -378,29 +273,6 @@ def get_libdevice_conda_path():
return None


def _get_nvvm_conda_path():
"""Return path to directory containing the nvvm library."""
is_conda_env = os.path.isdir(os.path.join(sys.prefix, "conda-meta"))
if not is_conda_env:
return None
nvvm_dir = os.path.join(
sys.prefix,
"Library" if IS_WIN32 else "",
"nvvm",
"bin" if IS_WIN32 else "lib64",
)
# Windows CUDA 13.0.0 puts in "bin\x64" directory but 13.0.1+ just uses "bin" directory
if IS_WIN32 and os.path.isdir(os.path.join(nvvm_dir, "x64")):
nvvm_dir = os.path.join(nvvm_dir, "x64")

nvvm_path = os.path.join(
nvvm_dir, "nvvm64_40_0.dll" if IS_WIN32 else "libnvvm.so.4"
)
if os.path.isfile(nvvm_path):
return nvvm_path
return None


def get_wheel_static_libdir():
cuda_module_static_lib_dir = None
# CUDA 12
Expand Down Expand Up @@ -528,43 +400,13 @@ def get_cuda_home_include():
return None


def _get_nvvm_cuda_home_path():
nvvm_lib_dir = get_cuda_home("nvvm")
if nvvm_lib_dir is None:
return
nvvm_lib_dir = os.path.join(nvvm_lib_dir, "bin" if IS_WIN32 else "lib64")
if IS_WIN32 and os.path.isdir(os.path.join(nvvm_lib_dir, "x64")):
nvvm_lib_dir = os.path.join(nvvm_lib_dir, "x64")

nvvm_path = os.path.join(
nvvm_lib_dir, "nvvm64_40_0.dll" if IS_WIN32 else "libnvvm.so.4"
)
# if os.path.isfile(nvvm_path):
# return nvvm_path
return nvvm_path


def _get_nvvm_path():
by, out = _get_nvvm_path_decision()
if not out:
return _env_path_tuple(by, None)
return _env_path_tuple(by, out)


def _get_nvrtc_path():
by, path = _get_nvrtc_path_decision()
candidates = find_lib("nvrtc", libdir=path)
path = max(candidates) if candidates else None
return _env_path_tuple(by, path)


def get_cuda_paths():
"""Returns a dictionary mapping component names to a 2-tuple
of (source_variable, info).

The returned dictionary will have the following keys and infos:
- "nvvm": file_path
- "nvrtc": file_path
- "nvvm": file_path
- "libdevice": file_path
- "cudalib_dir": directory_path
- "static_cudalib_dir": directory_path
Expand All @@ -578,8 +420,8 @@ def get_cuda_paths():
else:
# Not in cache
d = {
"nvvm": _get_nvvm_path(),
"nvrtc": _get_nvrtc_path(),
"nvvm": _get_nvvm_path(),
"libdevice": _get_libdevice_path(),
"cudalib_dir": _get_cudalib_dir(),
"static_cudalib_dir": _get_static_cudalib_dir(),
Expand Down Expand Up @@ -689,3 +531,92 @@ def _get_include_dir():
]
by, include_dir = _find_valid_path(options)
return _env_path_tuple(by, include_dir)


def _find_cuda_home_from_lib_path(lib_path):
"""
Walk up from a library path to find a directory containing 'nvvm' subdirectory.

For example, given /usr/local/cuda/lib64/libnvrtc.so.12,
this would find /usr/local/cuda (which contains nvvm/).

Returns the path if found, None otherwise.
"""
current = pathlib.Path(lib_path).resolve()

# Walk up the directory tree
for parent in current.parents:
nvvm_subdir = parent / "nvvm"
if nvvm_subdir.is_dir():
return str(parent)

return None


def _get_nvvm():
# Strategy:
# 1. Try pathfinder directly
# 2. If CUDA_HOME/CUDA_PATH are set, pathfinder would have found it - give up
# 3. Use nvrtc's location to infer CUDA installation root
# 4. Temporarily set CUDA_HOME and retry pathfinder
# First, try pathfinder directly
try:
return pathfinder.load_nvidia_dynamic_lib("nvvm")
except pathfinder.DynamicLibNotFoundError as e:
nvvm_exc = e

def _raise_original(reason: str) -> None:
raise pathfinder.DynamicLibNotFoundError(
f"{reason}; original nvvm error: {nvvm_exc}"
) from nvvm_exc

# If CUDA_HOME or CUDA_PATH is set, pathfinder would have found libnvvm
# based on the environment variable(s) - nothing more we can do
if os.environ.get("CUDA_HOME") or os.environ.get("CUDA_PATH"):
_raise_original("nvvm not found and CUDA_HOME/CUDA_PATH is set")
# Try to locate nvrtc - this library is almost certainly needed if nvvm is needed (in the context of numba-cuda)
try:
loaded_nvrtc = _get_nvrtc()
except Exception as nvrtc_exc:
raise pathfinder.DynamicLibNotFoundError(
f"nvrtc load failed while inferring CUDA_HOME; original nvvm error: {nvvm_exc}"
) from nvrtc_exc
# If nvrtc was not found via system-search, we can't reliably determine
# the CUDA installation structure
if loaded_nvrtc.found_via != "system-search":
_raise_original(
f"nvrtc found via {loaded_nvrtc.found_via}, cannot infer CUDA_HOME"
)
# Search backward from nvrtc's location to find a directory with "nvvm" subdirectory
cuda_home = _find_cuda_home_from_lib_path(loaded_nvrtc.abs_path)
if cuda_home is None:
_raise_original(
f"nvrtc path did not map to CUDA_HOME ({loaded_nvrtc.abs_path})"
)
# Temporarily set CUDA_HOME and retry pathfinder
with temporary_env_var("CUDA_HOME", cuda_home):
try:
library = pathfinder.load_nvidia_dynamic_lib("nvvm")
except pathfinder.DynamicLibNotFoundError as exc:
raise pathfinder.DynamicLibNotFoundError(
f"nvvm not found after inferring CUDA_HOME={cuda_home}; "
f"original nvvm error: {nvvm_exc}"
) from exc
library.found_via = "system-search"
return library

Comment on lines +556 to +607
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Logic error in exception handling: if path_or_none is not None but nvvm.exists() returns False (line 529), the function returns None implicitly by falling through. This will cause an AttributeError when _get_nvvm_path() (line 548) tries to access nvvm.found_via on the None value.

The exception should be raised in all failure cases:

Suggested change
def _get_nvvm():
try:
nvvm = pathfinder.load_nvidia_dynamic_lib("nvvm")
return nvvm
except pathfinder.DynamicLibNotFoundError:
# Try system search
# TODO: remove after cuda-python/1157 is resolved
path_or_none = _get_nvvm_system_path()
if path_or_none is not None:
nvvm = pathlib.Path(path_or_none)
if nvvm.exists():
dl = pathfinder._dynamic_libs.load_nvidia_dynamic_lib.load_with_abs_path(
"nvvm", nvvm, "system-search"
)
return dl
else:
raise pathfinder.DynamicLibNotFoundError("nvvm not found")
def _get_nvvm():
try:
nvvm = pathfinder.load_nvidia_dynamic_lib("nvvm")
return nvvm
except pathfinder.DynamicLibNotFoundError:
# Try system search
# TODO: remove after cuda-python/1157 is resolved
path_or_none = _get_nvvm_system_path()
if path_or_none is not None:
nvvm = pathlib.Path(path_or_none)
if nvvm.exists():
dl = pathfinder._dynamic_libs.load_nvidia_dynamic_lib.load_with_abs_path(
"nvvm", nvvm, "system-search"
)
return dl
raise pathfinder.DynamicLibNotFoundError("nvvm not found")


def _get_nvrtc():
return pathfinder.load_nvidia_dynamic_lib("nvrtc")
Comment on lines +609 to +610
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_get_nvrtc() lacks system path fallback that _get_nvvm() has. If pathfinder fails to find nvrtc (e.g., in some system installations), this will raise an exception with no fallback attempt. Consider adding similar system search fallback logic as in _get_nvvm() for consistency and robustness, or document why nvrtc doesn't need this fallback.

Comment on lines +609 to +610
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inconsistency: _get_nvrtc() lacks the system path fallback that _get_nvvm() has (lines 519-534). If pathfinder.load_nvidia_dynamic_lib("nvrtc") fails, this function will raise an exception, while _get_nvvm() has a fallback to _get_nvvm_system_path().

This creates inconsistent behavior between the two library lookup functions. Either:

  1. Add a similar system path fallback for nvrtc, OR
  2. Remove the fallback from nvvm to keep them consistent

The TODO comment in _get_nvvm() (line 525) suggests the system fallback is temporary until cuda-python issue #1157 is resolved. Consider whether nvrtc needs the same workaround.

Suggested change
def _get_nvrtc():
return pathfinder.load_nvidia_dynamic_lib("nvrtc")
def _get_nvrtc():
try:
return pathfinder.load_nvidia_dynamic_lib("nvrtc")
except pathfinder.DynamicLibNotFoundError:
# Try system search if pathfinder fails
# Consider adding _get_nvrtc_system_path() similar to _get_nvvm_system_path()
raise

Comment thread
brandon-b-miller marked this conversation as resolved.
Comment thread
brandon-b-miller marked this conversation as resolved.
Comment on lines +609 to +610
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inconsistent exception handling: _get_nvrtc() lacks try-except wrapper that _get_nvvm() has (lines 520-535). If nvrtc library is not found, pathfinder.load_nvidia_dynamic_lib("nvrtc") will raise pathfinder.DynamicLibNotFoundError, which will propagate through _get_nvrtc_path()get_cuda_paths() → callers.

This is problematic because:

  1. Callers like nvvm.py line 154 only catch OSError, not pathfinder.DynamicLibNotFoundError
  2. _get_nvvm() has fallback logic to try system paths, but _get_nvrtc() does not

Add try-except wrapper with system search fallback to match _get_nvvm() pattern, or at minimum wrap the exception to ensure it's caught by existing error handlers.

Comment on lines +609 to +610
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

logic: unhandled exception breaks initialization. if nvrtc not found, pathfinder.load_nvidia_dynamic_lib("nvrtc") raises DynamicLibNotFoundError which propagates through _get_nvrtc_path()get_cuda_paths(), crashing at module import time before any OSError handlers can catch it.

add try-except:

Suggested change
def _get_nvrtc():
return pathfinder.load_nvidia_dynamic_lib("nvrtc")
def _get_nvrtc():
try:
return pathfinder.load_nvidia_dynamic_lib("nvrtc")
except pathfinder.DynamicLibNotFoundError:
return None

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pathfinder should cover all cases for nvrtc, thus if it can't find it, we want to propagate the error. This includes the system path, so, there shouldn't be any need to fall back to the inbuilt mechanism for invoking a system search.



def _get_nvrtc_path():
# the pathfinder API will either find the library or raise
nvrtc = _get_nvrtc()
return _env_path_tuple(nvrtc.found_via, nvrtc.abs_path)
Comment thread
brandon-b-miller marked this conversation as resolved.


def _get_nvvm_path():
# the pathfinder API will either find the library or raise
nvvm = _get_nvvm()
return _env_path_tuple(nvvm.found_via, nvvm.abs_path)
Comment thread
brandon-b-miller marked this conversation as resolved.
7 changes: 4 additions & 3 deletions numba_cuda/numba/cuda/cudadrv/libs.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from numba.cuda.cuda_paths import get_cuda_paths
from numba.cuda.cudadrv.driver import locate_driver_and_loader, load_driver
from numba.cuda.cudadrv.error import CudaSupportError
from numba.cuda.core import config
from numba.cuda import config


if sys.platform == "win32":
Expand Down Expand Up @@ -53,7 +53,9 @@ def get_cudalib(lib, static=False):
loader's search mechanism.
"""
if lib in {"nvrtc", "nvvm"}:
return get_cuda_paths()[lib].info or _dllnamepattern % lib
# System search either invoked inside cuda-pathfinder
# or, for nvvm, using custom logic inside cuda-paths
return get_cuda_paths()[lib].info
Comment on lines 55 to +58
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[P0] Removed fallback breaks library loading. Old code: get_cuda_paths()[lib].info or _dllnamepattern % lib provided fallback to generic library name when path not found, allowing system loader to search. New code returns None if pathfinder fails (when get_cuda_paths()[lib].info is None), causing ctypes.CDLL(None) to fail in open_cudalib() at line 85.

Comment on lines 55 to +58
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed fallback to generic library name pattern. The old code had return get_cuda_paths()[lib].info or _dllnamepattern % lib which provided a fallback to generic names like "libnvvm.so" or "libnvrtc.so", allowing the system loader to search for the library. The new code returns None if pathfinder can't find the library, which will cause ctypes.CDLL(None) to fail in open_cudalib() (line 85).

This breaks the documented behavior in the docstring (lines 49-53): "If the search fails, return a generic filename for the library...so that we may attempt to load it using the system loader's search mechanism."

Consider restoring the fallback or updating the implementation to handle None returns gracefully.

Comment thread
brandon-b-miller marked this conversation as resolved.
Comment thread
brandon-b-miller marked this conversation as resolved.

dir_type = "static_cudalib_dir" if static else "cudalib_dir"
libdir = get_cuda_paths()[dir_type].info
Expand All @@ -69,7 +71,6 @@ def get_cuda_include_dir():
Note that this does not list the `CUDA_INCLUDE_PATH` entry in user
configuration.
"""

return get_cuda_paths()["include_dir"].info


Expand Down
Loading
Loading