From 2d8c99a89e487942ec14ae440d4c68bd250b69c6 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 12 Jan 2026 13:54:50 -0800 Subject: [PATCH 01/20] initial localized test --- .../cuda/bindings/_internal/nvfatbin.pxd | 22 ++ .../bindings/_internal/nvfatbin_linux.pyx | 242 ++++++++++++++++++ .../bindings/_internal/nvfatbin_windows.pyx | 233 +++++++++++++++++ cuda_bindings/cuda/bindings/cynvfatbin.pxd | 53 ++++ cuda_bindings/cuda/bindings/cynvfatbin.pyx | 38 +++ cuda_bindings/cuda/bindings/nvfatbin.pxd | 37 +++ cuda_bindings/cuda/bindings/nvfatbin.pyx | 194 ++++++++++++++ cuda_bindings/tests/test_nvfatbin.py | 89 +++++++ 8 files changed, 908 insertions(+) create mode 100644 cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd create mode 100644 cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx create mode 100644 cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx create mode 100644 cuda_bindings/cuda/bindings/cynvfatbin.pxd create mode 100644 cuda_bindings/cuda/bindings/cynvfatbin.pyx create mode 100644 cuda_bindings/cuda/bindings/nvfatbin.pxd create mode 100644 cuda_bindings/cuda/bindings/nvfatbin.pyx create mode 100644 cuda_bindings/tests/test_nvfatbin.py diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd new file mode 100644 index 0000000000..14a8a6d608 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated with version 13.0.0. Do not modify it directly. + +from ..cynvfatbin cimport * + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvFatbinResult _nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil + + + diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx new file mode 100644 index 0000000000..06143d9031 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx @@ -0,0 +1,242 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated with version 13.0.0. Do not modify it directly. + +from libc.stdint cimport intptr_t, uintptr_t + +import threading +from .utils import FunctionNotFoundError, NotSupportedError + +from cuda.pathfinder import load_nvidia_dynamic_lib + + +############################################################################### +# Extern +############################################################################### + +# You must 'from .utils import NotSupportedError' before using this template + +cdef extern from "" nogil: + void* dlopen(const char*, int) + char* dlerror() + void* dlsym(void*, const char*) + int dlclose(void*) + + enum: + RTLD_LAZY + RTLD_NOW + RTLD_GLOBAL + RTLD_LOCAL + + const void* RTLD_DEFAULT 'RTLD_DEFAULT' + +cdef int get_cuda_version(): + cdef void* handle = NULL + cdef int err, driver_ver = 0 + + # Load driver to check version + handle = dlopen('libcuda.so.1', RTLD_NOW | RTLD_GLOBAL) + if handle == NULL: + err_msg = dlerror() + raise NotSupportedError(f'CUDA driver is not found ({err_msg.decode()})') + cuDriverGetVersion = dlsym(handle, "cuDriverGetVersion") + if cuDriverGetVersion == NULL: + raise RuntimeError('Did not find cuDriverGetVersion symbol in libcuda.so.1') + err = (cuDriverGetVersion)(&driver_ver) + if err != 0: + raise RuntimeError(f'cuDriverGetVersion returned error code {err}') + + return driver_ver + + + +############################################################################### +# Wrapper init +############################################################################### + +cdef object __symbol_lock = threading.Lock() +cdef bint __py_nvfatbin_init = False + +cdef void* __nvFatbinCreate = NULL +cdef void* __nvFatbinDestroy = NULL +cdef void* __nvFatbinAddPTX = NULL +cdef void* __nvFatbinSize = NULL +cdef void* __nvFatbinGet = NULL +cdef void* __nvFatbinVersion = NULL + + +cdef void* load_library() except* with gil: + cdef uintptr_t handle = load_nvidia_dynamic_lib("nvfatbin")._handle_uint + return handle + + +cdef int _init_nvfatbin() except -1 nogil: + global __py_nvfatbin_init + + cdef void* handle = NULL + + with gil, __symbol_lock: + # Recheck the flag after obtaining the locks + if __py_nvfatbin_init: + return 0 + + # Load function + global __nvFatbinCreate + __nvFatbinCreate = dlsym(RTLD_DEFAULT, 'nvFatbinCreate') + if __nvFatbinCreate == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinCreate = dlsym(handle, 'nvFatbinCreate') + + global __nvFatbinDestroy + __nvFatbinDestroy = dlsym(RTLD_DEFAULT, 'nvFatbinDestroy') + if __nvFatbinDestroy == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinDestroy = dlsym(handle, 'nvFatbinDestroy') + + global __nvFatbinAddPTX + __nvFatbinAddPTX = dlsym(RTLD_DEFAULT, 'nvFatbinAddPTX') + if __nvFatbinAddPTX == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinAddPTX = dlsym(handle, 'nvFatbinAddPTX') + + global __nvFatbinSize + __nvFatbinSize = dlsym(RTLD_DEFAULT, 'nvFatbinSize') + if __nvFatbinSize == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinSize = dlsym(handle, 'nvFatbinSize') + + global __nvFatbinGet + __nvFatbinGet = dlsym(RTLD_DEFAULT, 'nvFatbinGet') + if __nvFatbinGet == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinGet = dlsym(handle, 'nvFatbinGet') + + global __nvFatbinVersion + __nvFatbinVersion = dlsym(RTLD_DEFAULT, 'nvFatbinVersion') + if __nvFatbinVersion == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinVersion = dlsym(handle, 'nvFatbinVersion') + + __py_nvfatbin_init = True + return 0 + + +cdef inline int _check_or_init_nvfatbin() except -1 nogil: + if __py_nvfatbin_init: + return 0 + + return _init_nvfatbin() + +cdef dict func_ptrs = None + + +cpdef dict _inspect_function_pointers(): + global func_ptrs + if func_ptrs is not None: + return func_ptrs + + _check_or_init_nvfatbin() + cdef dict data = {} + + global __nvFatbinCreate + data["__nvFatbinCreate"] = __nvFatbinCreate + + global __nvFatbinDestroy + data["__nvFatbinDestroy"] = __nvFatbinDestroy + + global __nvFatbinAddPTX + data["__nvFatbinAddPTX"] = __nvFatbinAddPTX + + global __nvFatbinSize + data["__nvFatbinSize"] = __nvFatbinSize + + global __nvFatbinGet + data["__nvFatbinGet"] = __nvFatbinGet + + global __nvFatbinVersion + data["__nvFatbinVersion"] = __nvFatbinVersion + + func_ptrs = data + return data + + +cpdef _inspect_function_pointer(str name): + global func_ptrs + if func_ptrs is None: + func_ptrs = _inspect_function_pointers() + return func_ptrs[name] + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvFatbinResult _nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinCreate + _check_or_init_nvfatbin() + if __nvFatbinCreate == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinCreate is not found") + return (__nvFatbinCreate)( + handle_indirect, options, optionsCount) + + +cdef nvFatbinResult _nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinDestroy + _check_or_init_nvfatbin() + if __nvFatbinDestroy == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinDestroy is not found") + return (__nvFatbinDestroy)( + handle_indirect) + + +cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddPTX + _check_or_init_nvfatbin() + if __nvFatbinAddPTX == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddPTX is not found") + return (__nvFatbinAddPTX)( + handle, code, size, arch, identifier, optionsCmdLine) + + +cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinSize + _check_or_init_nvfatbin() + if __nvFatbinSize == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinSize is not found") + return (__nvFatbinSize)( + handle, size) + + +cdef nvFatbinResult _nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinGet + _check_or_init_nvfatbin() + if __nvFatbinGet == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinGet is not found") + return (__nvFatbinGet)( + handle, buffer) + + +cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinVersion + _check_or_init_nvfatbin() + if __nvFatbinVersion == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinVersion is not found") + return (__nvFatbinVersion)( + major, minor) + + + diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx new file mode 100644 index 0000000000..cc1824bf43 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx @@ -0,0 +1,233 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated with version 13.0.0. Do not modify it directly. + +from libc.stdint cimport intptr_t + +import threading +from .utils import FunctionNotFoundError, NotSupportedError + +from cuda.pathfinder import load_nvidia_dynamic_lib + +from libc.stddef cimport wchar_t +from libc.stdint cimport uintptr_t +from cpython cimport PyUnicode_AsWideCharString, PyMem_Free + +# You must 'from .utils import NotSupportedError' before using this template + +cdef extern from "windows.h" nogil: + ctypedef void* HMODULE + ctypedef void* HANDLE + ctypedef void* FARPROC + ctypedef unsigned long DWORD + ctypedef const wchar_t *LPCWSTR + ctypedef const char *LPCSTR + + cdef DWORD LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 + cdef DWORD LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 + cdef DWORD LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 + + HMODULE _LoadLibraryExW "LoadLibraryExW"( + LPCWSTR lpLibFileName, + HANDLE hFile, + DWORD dwFlags + ) + + FARPROC _GetProcAddress "GetProcAddress"(HMODULE hModule, LPCSTR lpProcName) + +cdef inline uintptr_t LoadLibraryExW(str path, HANDLE hFile, DWORD dwFlags): + cdef uintptr_t result + cdef wchar_t* wpath = PyUnicode_AsWideCharString(path, NULL) + with nogil: + result = _LoadLibraryExW( + wpath, + hFile, + dwFlags + ) + PyMem_Free(wpath) + return result + +cdef inline void *GetProcAddress(uintptr_t hModule, const char* lpProcName) nogil: + return _GetProcAddress(hModule, lpProcName) + +cdef int get_cuda_version(): + cdef int err, driver_ver = 0 + + # Load driver to check version + handle = LoadLibraryExW("nvcuda.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32) + if handle == 0: + raise NotSupportedError('CUDA driver is not found') + cuDriverGetVersion = GetProcAddress(handle, 'cuDriverGetVersion') + if cuDriverGetVersion == NULL: + raise RuntimeError('Did not find cuDriverGetVersion symbol in nvcuda.dll') + err = (cuDriverGetVersion)(&driver_ver) + if err != 0: + raise RuntimeError(f'cuDriverGetVersion returned error code {err}') + + return driver_ver + + + +############################################################################### +# Wrapper init +############################################################################### + +cdef object __symbol_lock = threading.Lock() +cdef bint __py_nvfatbin_init = False + +cdef void* __nvFatbinCreate = NULL +cdef void* __nvFatbinDestroy = NULL +cdef void* __nvFatbinAddPTX = NULL +cdef void* __nvFatbinSize = NULL +cdef void* __nvFatbinGet = NULL +cdef void* __nvFatbinVersion = NULL + + +cdef int _init_nvfatbin() except -1 nogil: + global __py_nvfatbin_init + + with gil, __symbol_lock: + # Recheck the flag after obtaining the locks + if __py_nvfatbin_init: + return 0 + + # Load library + handle = load_nvidia_dynamic_lib("nvfatbin")._handle_uint + + # Load function + global __nvFatbinCreate + __nvFatbinCreate = GetProcAddress(handle, 'nvFatbinCreate') + + global __nvFatbinDestroy + __nvFatbinDestroy = GetProcAddress(handle, 'nvFatbinDestroy') + + global __nvFatbinAddPTX + __nvFatbinAddPTX = GetProcAddress(handle, 'nvFatbinAddPTX') + + global __nvFatbinSize + __nvFatbinSize = GetProcAddress(handle, 'nvFatbinSize') + + global __nvFatbinGet + __nvFatbinGet = GetProcAddress(handle, 'nvFatbinGet') + + global __nvFatbinVersion + __nvFatbinVersion = GetProcAddress(handle, 'nvFatbinVersion') + + __py_nvfatbin_init = True + return 0 + + +cdef inline int _check_or_init_nvfatbin() except -1 nogil: + if __py_nvfatbin_init: + return 0 + + return _init_nvfatbin() + + +cdef dict func_ptrs = None + + +cpdef dict _inspect_function_pointers(): + global func_ptrs + if func_ptrs is not None: + return func_ptrs + + _check_or_init_nvfatbin() + cdef dict data = {} + + global __nvFatbinCreate + data["__nvFatbinCreate"] = __nvFatbinCreate + + global __nvFatbinDestroy + data["__nvFatbinDestroy"] = __nvFatbinDestroy + + global __nvFatbinAddPTX + data["__nvFatbinAddPTX"] = __nvFatbinAddPTX + + global __nvFatbinSize + data["__nvFatbinSize"] = __nvFatbinSize + + global __nvFatbinGet + data["__nvFatbinGet"] = __nvFatbinGet + + global __nvFatbinVersion + data["__nvFatbinVersion"] = __nvFatbinVersion + + func_ptrs = data + return data + + +cpdef _inspect_function_pointer(str name): + global func_ptrs + if func_ptrs is None: + func_ptrs = _inspect_function_pointers() + return func_ptrs[name] + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvFatbinResult _nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinCreate + _check_or_init_nvfatbin() + if __nvFatbinCreate == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinCreate is not found") + return (__nvFatbinCreate)( + handle_indirect, options, optionsCount) + + +cdef nvFatbinResult _nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinDestroy + _check_or_init_nvfatbin() + if __nvFatbinDestroy == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinDestroy is not found") + return (__nvFatbinDestroy)( + handle_indirect) + + +cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddPTX + _check_or_init_nvfatbin() + if __nvFatbinAddPTX == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddPTX is not found") + return (__nvFatbinAddPTX)( + handle, code, size, arch, identifier, optionsCmdLine) + + +cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinSize + _check_or_init_nvfatbin() + if __nvFatbinSize == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinSize is not found") + return (__nvFatbinSize)( + handle, size) + + +cdef nvFatbinResult _nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinGet + _check_or_init_nvfatbin() + if __nvFatbinGet == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinGet is not found") + return (__nvFatbinGet)( + handle, buffer) + + +cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinVersion + _check_or_init_nvfatbin() + if __nvFatbinVersion == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinVersion is not found") + return (__nvFatbinVersion)( + major, minor) + + + diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pxd b/cuda_bindings/cuda/bindings/cynvfatbin.pxd new file mode 100644 index 0000000000..651aa27152 --- /dev/null +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pxd @@ -0,0 +1,53 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated with version 13.0.0. Do not modify it directly. + +from libc.stdint cimport intptr_t, uint32_t + + +############################################################################### +# Types (structs, enums, ...) +############################################################################### + +# enums +ctypedef enum nvFatbinResult "nvFatbinResult": + NVFATBIN_SUCCESS "NVFATBIN_SUCCESS" = 0 + NVFATBIN_ERROR_INTERNAL "NVFATBIN_ERROR_INTERNAL" + NVFATBIN_ERROR_ELF_ARCH_MISMATCH "NVFATBIN_ERROR_ELF_ARCH_MISMATCH" + NVFATBIN_ERROR_ELF_SIZE_MISMATCH "NVFATBIN_ERROR_ELF_SIZE_MISMATCH" + NVFATBIN_ERROR_MISSING_PTX_VERSION "NVFATBIN_ERROR_MISSING_PTX_VERSION" + NVFATBIN_ERROR_NULL_POINTER "NVFATBIN_ERROR_NULL_POINTER" + NVFATBIN_ERROR_COMPRESSION_FAILED "NVFATBIN_ERROR_COMPRESSION_FAILED" + NVFATBIN_ERROR_COMPRESSED_SIZE_EXCEEDED "NVFATBIN_ERROR_COMPRESSED_SIZE_EXCEEDED" + NVFATBIN_ERROR_UNRECOGNIZED_OPTION "NVFATBIN_ERROR_UNRECOGNIZED_OPTION" + NVFATBIN_ERROR_INVALID_ARCH "NVFATBIN_ERROR_INVALID_ARCH" + NVFATBIN_ERROR_INVALID_NVVM "NVFATBIN_ERROR_INVALID_NVVM" + NVFATBIN_ERROR_EMPTY_INPUT "NVFATBIN_ERROR_EMPTY_INPUT" + NVFATBIN_ERROR_MISSING_PTX_ARCH "NVFATBIN_ERROR_MISSING_PTX_ARCH" + NVFATBIN_ERROR_PTX_ARCH_MISMATCH "NVFATBIN_ERROR_PTX_ARCH_MISMATCH" + NVFATBIN_ERROR_MISSING_FATBIN "NVFATBIN_ERROR_MISSING_FATBIN" + NVFATBIN_ERROR_INVALID_INDEX "NVFATBIN_ERROR_INVALID_INDEX" + NVFATBIN_ERROR_IDENTIFIER_REUSE "NVFATBIN_ERROR_IDENTIFIER_REUSE" + NVFATBIN_ERROR_INTERNAL_PTX_OPTION "NVFATBIN_ERROR_INTERNAL_PTX_OPTION" + _NVFATBINRESULT_INTERNAL_LOADING_ERROR "_NVFATBINRESULT_INTERNAL_LOADING_ERROR" = -42 + + +# types +ctypedef void* nvFatbinHandle 'nvFatbinHandle' + + +############################################################################### +# Functions +############################################################################### + +cdef nvFatbinResult nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil + + + diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pyx b/cuda_bindings/cuda/bindings/cynvfatbin.pyx new file mode 100644 index 0000000000..13c9ac2cc1 --- /dev/null +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pyx @@ -0,0 +1,38 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated with version 13.0.0. Do not modify it directly. + +from ._internal cimport nvfatbin as _nvfatbin + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvFatbinResult nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinCreate(handle_indirect, options, optionsCount) + + +cdef nvFatbinResult nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinDestroy(handle_indirect) + + +cdef nvFatbinResult nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinAddPTX(handle, code, size, arch, identifier, optionsCmdLine) + + +cdef nvFatbinResult nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinSize(handle, size) + + +cdef nvFatbinResult nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinGet(handle, buffer) + + +cdef nvFatbinResult nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinVersion(major, minor) + + + diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pxd b/cuda_bindings/cuda/bindings/nvfatbin.pxd new file mode 100644 index 0000000000..1350d0ed52 --- /dev/null +++ b/cuda_bindings/cuda/bindings/nvfatbin.pxd @@ -0,0 +1,37 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated with version 13.0.0. Do not modify it directly. + +from libc.stdint cimport intptr_t, uint32_t + +from .cynvfatbin cimport * + + +############################################################################### +# Types +############################################################################### + +ctypedef nvFatbinHandle Handle + + +############################################################################### +# Enum +############################################################################### + +ctypedef nvFatbinResult _Result + + +############################################################################### +# Functions +############################################################################### + +cpdef intptr_t create(options, size_t options_count) except -1 +cpdef add_ptx(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line) +cpdef size_t size(intptr_t handle) except? 0 +cpdef get(intptr_t handle, buffer) +cpdef tuple version() + + + diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pyx b/cuda_bindings/cuda/bindings/nvfatbin.pyx new file mode 100644 index 0000000000..dcc669797e --- /dev/null +++ b/cuda_bindings/cuda/bindings/nvfatbin.pyx @@ -0,0 +1,194 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated with version 13.0.0. Do not modify it directly. + +cimport cython # NOQA + +from ._internal.utils cimport (get_resource_ptr, get_nested_resource_ptr, nested_resource, nullable_unique_ptr, + get_buffer_pointer, get_resource_ptrs) + +from enum import IntEnum as _IntEnum +from libcpp.vector cimport vector + + +############################################################################### +# Enum +############################################################################### + +class Result(_IntEnum): + """See `nvFatbinResult`.""" + SUCCESS = NVFATBIN_SUCCESS + ERROR_INTERNAL = NVFATBIN_ERROR_INTERNAL + ERROR_ELF_ARCH_MISMATCH = NVFATBIN_ERROR_ELF_ARCH_MISMATCH + ERROR_ELF_SIZE_MISMATCH = NVFATBIN_ERROR_ELF_SIZE_MISMATCH + ERROR_MISSING_PTX_VERSION = NVFATBIN_ERROR_MISSING_PTX_VERSION + ERROR_NULL_POINTER = NVFATBIN_ERROR_NULL_POINTER + ERROR_COMPRESSION_FAILED = NVFATBIN_ERROR_COMPRESSION_FAILED + ERROR_COMPRESSED_SIZE_EXCEEDED = NVFATBIN_ERROR_COMPRESSED_SIZE_EXCEEDED + ERROR_UNRECOGNIZED_OPTION = NVFATBIN_ERROR_UNRECOGNIZED_OPTION + ERROR_INVALID_ARCH = NVFATBIN_ERROR_INVALID_ARCH + ERROR_INVALID_NVVM = NVFATBIN_ERROR_INVALID_NVVM + ERROR_EMPTY_INPUT = NVFATBIN_ERROR_EMPTY_INPUT + ERROR_MISSING_PTX_ARCH = NVFATBIN_ERROR_MISSING_PTX_ARCH + ERROR_PTX_ARCH_MISMATCH = NVFATBIN_ERROR_PTX_ARCH_MISMATCH + ERROR_MISSING_FATBIN = NVFATBIN_ERROR_MISSING_FATBIN + ERROR_INVALID_INDEX = NVFATBIN_ERROR_INVALID_INDEX + ERROR_IDENTIFIER_REUSE = NVFATBIN_ERROR_IDENTIFIER_REUSE + ERROR_INTERNAL_PTX_OPTION = NVFATBIN_ERROR_INTERNAL_PTX_OPTION + + +############################################################################### +# Error handling +############################################################################### + +class nvfatbinError(Exception): + + def __init__(self, status): + self.status = status + s = Result(status) + cdef str err = f"{s.name} ({s.value})" + super(nvfatbinError, self).__init__(err) + + def __reduce__(self): + return (type(self), (self.status,)) + + +@cython.profile(False) +cdef int check_status(int status) except 1 nogil: + if status != 0: + with gil: + raise nvfatbinError(status) + return status + + +############################################################################### +# Wrapper functions +############################################################################### + +cpdef destroy(intptr_t handle): + """nvFatbinDestroy frees the memory associated with the given handle. + + Args: + handle (intptr_t): nvFatbin handle. + + .. seealso:: `nvFatbinDestroy` + """ + cdef Handle h = handle + with nogil: + status = nvFatbinDestroy(&h) + check_status(status) + + +cpdef intptr_t create(options, size_t options_count) except -1: + """nvFatbinCreate creates a new handle. + + Args: + options (object): An array of strings, each containing a single option. It can be: + + - an :class:`int` as the pointer address to the nested sequence, or + - a Python sequence of :class:`int`\s, each of which is a pointer address + to a valid sequence of 'char', or + - a nested Python sequence of ``str``. + + options_count (size_t): Number of options. + + Returns: + intptr_t: Address of nvFatbin handle. + + .. seealso:: `nvFatbinCreate` + """ + cdef nested_resource[ char ] _options_ + get_nested_resource_ptr[char](_options_, options, NULL) + cdef Handle handle_indirect + with nogil: + __status__ = nvFatbinCreate(&handle_indirect, (_options_.ptrs.data()), options_count) + check_status(__status__) + return handle_indirect + + +cpdef add_ptx(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line): + """nvFatbinAddPTX adds PTX to the fatbinary. + + Args: + handle (intptr_t): nvFatbin handle. + code (bytes): The PTX code. + size (size_t): The size of the PTX code. + arch (str): The numerical architecture that this PTX is for (the XX of any sm_XX, lto_XX, or compute_XX). + identifier (str): Name of the PTX, useful when extracting the fatbin with tools like cuobjdump. + options_cmd_line (str): Options used during JIT compilation. + + .. seealso:: `nvFatbinAddPTX` + """ + cdef void* _code_ = get_buffer_pointer(code, size, readonly=True) + if not isinstance(arch, str): + raise TypeError("arch must be a Python str") + cdef bytes _temp_arch_ = (arch).encode() + cdef char* _arch_ = _temp_arch_ + if not isinstance(identifier, str): + raise TypeError("identifier must be a Python str") + cdef bytes _temp_identifier_ = (identifier).encode() + cdef char* _identifier_ = _temp_identifier_ + if not isinstance(options_cmd_line, str): + raise TypeError("options_cmd_line must be a Python str") + cdef bytes _temp_options_cmd_line_ = (options_cmd_line).encode() + cdef char* _options_cmd_line_ = _temp_options_cmd_line_ + with nogil: + __status__ = nvFatbinAddPTX(handle, _code_, size, _arch_, _identifier_, _options_cmd_line_) + check_status(__status__) + + +cpdef size_t size(intptr_t handle) except? 0: + """nvFatbinSize returns the fatbinary's size. + + Args: + handle (intptr_t): nvFatbin handle. + + Returns: + size_t: The fatbinary's size. + + .. seealso:: `nvFatbinSize` + """ + cdef size_t size + with nogil: + __status__ = nvFatbinSize(handle, &size) + check_status(__status__) + return size + + +cpdef get(intptr_t handle, buffer): + """nvFatbinGet returns the completed fatbinary. + + Args: + handle (intptr_t): nvFatbin handle. + buffer (bytes): memory to store fatbinary. + + .. seealso:: `nvFatbinGet` + """ + cdef void* _buffer_ = get_buffer_pointer(buffer, -1, readonly=False) + with nogil: + __status__ = nvFatbinGet(handle, _buffer_) + check_status(__status__) + + +cpdef tuple version(): + """nvFatbinVersion returns the current version of nvFatbin. + + Returns: + A 2-tuple containing: + + - unsigned int: The major version. + - unsigned int: The minor version. + + .. seealso:: `nvFatbinVersion` + """ + cdef unsigned int major + cdef unsigned int minor + with nogil: + __status__ = nvFatbinVersion(&major, &minor) + check_status(__status__) + return (major, minor) + + + diff --git a/cuda_bindings/tests/test_nvfatbin.py b/cuda_bindings/tests/test_nvfatbin.py new file mode 100644 index 0000000000..627bd300e4 --- /dev/null +++ b/cuda_bindings/tests/test_nvfatbin.py @@ -0,0 +1,89 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +from cuda.bindings import nvfatbin + +import pytest + +ARCHITECTURES = ["sm_75", "sm_80", "sm_90", "sm_100"] +PTX_VERSIONS = ["6.4", "7.0", "8.5", "8.8"] + +PTX_TEMPLATE = """ +.version {PTX_VERSION} +.target {ARCH} +.address_size 64 + + // .globl _Z6kernelPi + +.visible .entry _Z6kernelPi( + .param .u64 _Z6kernelPi_param_0 +) +{{ + .reg .b32 %r<7>; + .reg .b64 %rd<5>; + + + ld.param.u64 %rd1, [_Z6kernelPi_param_0]; + cvta.to.global.u64 %rd2, %rd1; + mov.u32 %r1, %tid.x; + mov.u32 %r2, %ctaid.x; + mov.u32 %r3, %ntid.x; + mad.lo.s32 %r4, %r2, %r3, %r1; + mul.wide.s32 %rd3, %r4, 4; + add.s64 %rd4, %rd2, %rd3; + ld.global.u32 %r5, [%rd4]; + add.s32 %r6, %r5, 1; + st.global.u32 [%rd4], %r6; + ret; + +}} +""" + +@pytest.fixture(params=ARCHITECTURES) +def arch(request): + return request.param + +@pytest.fixture(params=PTX_VERSIONS) +def ptx_version(request): + return request.param + +@pytest.fixture +def PTX(arch, ptx_version): + return PTX_TEMPLATE.format(PTX_VERSION=ptx_version, ARCH=arch) + +def test_nvfatbin_get_version(): + major, minor = nvfatbin.version() + assert major is not None + assert minor is not None + +def test_nvfatbin_empty_create_and_destroy(): + handle = nvfatbin.create([], 0) + assert handle is not None + nvfatbin.destroy(handle) + +def test_nvfatbin_invalid_input_create(): + with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_UNRECOGNIZED_OPTION"): + nvfatbin.create(["--unsupported_option"], 1) + + +def test_nvfatbin_get_empty(): + handle = nvfatbin.create([], 0) + size = nvfatbin.size(handle) + + buffer = bytearray(size) + nvfatbin.get(handle, buffer) + + nvfatbin.destroy(handle) + + +def test_nvfatbin_add_ptx(PTX, arch): + arch_numeric = arch.split("_")[1] + + handle = nvfatbin.create([], 0) + nvfatbin.add_ptx(handle, PTX.encode(), len(PTX), arch_numeric, "add", f"-arch={arch}") + + buffer = bytearray(nvfatbin.size(handle)) + + nvfatbin.get(handle, buffer) + nvfatbin.destroy(handle) + From 9b1a5590a1b4f34e91cbd78991154707615a7171 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 12 Jan 2026 20:08:30 -0800 Subject: [PATCH 02/20] add rest of APIs --- .../cuda/bindings/_internal/nvfatbin.pxd | 5 + .../bindings/_internal/nvfatbin_linux.pyx | 65 +++++++++ .../bindings/_internal/nvfatbin_windows.pyx | 53 ++++++++ cuda_bindings/cuda/bindings/cynvfatbin.pxd | 5 + cuda_bindings/cuda/bindings/cynvfatbin.pyx | 14 ++ cuda_bindings/cuda/bindings/nvfatbin.pxd | 5 + cuda_bindings/cuda/bindings/nvfatbin.pyx | 75 ++++++++++ cuda_bindings/tests/test_nvfatbin.py | 128 +++++++++++++++++- 8 files changed, 349 insertions(+), 1 deletion(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd index 14a8a6d608..d421e8c21e 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd @@ -14,9 +14,14 @@ from ..cynvfatbin cimport * cdef nvFatbinResult _nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil + + diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx index 06143d9031..097043f69a 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx @@ -62,6 +62,9 @@ cdef bint __py_nvfatbin_init = False cdef void* __nvFatbinCreate = NULL cdef void* __nvFatbinDestroy = NULL cdef void* __nvFatbinAddPTX = NULL +cdef void* __nvFatbinAddCubin = NULL +cdef void* __nvFatbinAddLTOIR = NULL +cdef void* __nvFatbinAddReloc = NULL cdef void* __nvFatbinSize = NULL cdef void* __nvFatbinGet = NULL cdef void* __nvFatbinVersion = NULL @@ -104,6 +107,27 @@ cdef int _init_nvfatbin() except -1 nogil: handle = load_library() __nvFatbinAddPTX = dlsym(handle, 'nvFatbinAddPTX') + global __nvFatbinAddCubin + __nvFatbinAddCubin = dlsym(RTLD_DEFAULT, 'nvFatbinAddCubin') + if __nvFatbinAddCubin == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinAddCubin = dlsym(handle, 'nvFatbinAddCubin') + + global __nvFatbinAddLTOIR + __nvFatbinAddLTOIR = dlsym(RTLD_DEFAULT, 'nvFatbinAddLTOIR') + if __nvFatbinAddLTOIR == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinAddLTOIR = dlsym(handle, 'nvFatbinAddLTOIR') + + global __nvFatbinAddReloc + __nvFatbinAddReloc = dlsym(RTLD_DEFAULT, 'nvFatbinAddReloc') + if __nvFatbinAddReloc == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinAddReloc = dlsym(handle, 'nvFatbinAddReloc') + global __nvFatbinSize __nvFatbinSize = dlsym(RTLD_DEFAULT, 'nvFatbinSize') if __nvFatbinSize == NULL: @@ -155,6 +179,15 @@ cpdef dict _inspect_function_pointers(): global __nvFatbinAddPTX data["__nvFatbinAddPTX"] = __nvFatbinAddPTX + global __nvFatbinAddCubin + data["__nvFatbinAddCubin"] = __nvFatbinAddCubin + + global __nvFatbinAddLTOIR + data["__nvFatbinAddLTOIR"] = __nvFatbinAddLTOIR + + global __nvFatbinAddReloc + data["__nvFatbinAddReloc"] = __nvFatbinAddReloc + global __nvFatbinSize data["__nvFatbinSize"] = __nvFatbinSize @@ -209,6 +242,36 @@ cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, siz handle, code, size, arch, identifier, optionsCmdLine) +cdef nvFatbinResult _nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddCubin + _check_or_init_nvfatbin() + if __nvFatbinAddCubin == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddCubin is not found") + return (__nvFatbinAddCubin)( + handle, code, size, arch, identifier) + + +cdef nvFatbinResult _nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddLTOIR + _check_or_init_nvfatbin() + if __nvFatbinAddLTOIR == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddLTOIR is not found") + return (__nvFatbinAddLTOIR)( + handle, code, size, arch, identifier, optionsCmdLine) + + +cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddReloc + _check_or_init_nvfatbin() + if __nvFatbinAddReloc == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddReloc is not found") + return (__nvFatbinAddReloc)( + handle, code, size) + + cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: global __nvFatbinSize _check_or_init_nvfatbin() @@ -240,3 +303,5 @@ cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) e + + diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx index cc1824bf43..a499637f0d 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx @@ -80,6 +80,9 @@ cdef bint __py_nvfatbin_init = False cdef void* __nvFatbinCreate = NULL cdef void* __nvFatbinDestroy = NULL cdef void* __nvFatbinAddPTX = NULL +cdef void* __nvFatbinAddCubin = NULL +cdef void* __nvFatbinAddLTOIR = NULL +cdef void* __nvFatbinAddReloc = NULL cdef void* __nvFatbinSize = NULL cdef void* __nvFatbinGet = NULL cdef void* __nvFatbinVersion = NULL @@ -106,6 +109,15 @@ cdef int _init_nvfatbin() except -1 nogil: global __nvFatbinAddPTX __nvFatbinAddPTX = GetProcAddress(handle, 'nvFatbinAddPTX') + global __nvFatbinAddCubin + __nvFatbinAddCubin = GetProcAddress(handle, 'nvFatbinAddCubin') + + global __nvFatbinAddLTOIR + __nvFatbinAddLTOIR = GetProcAddress(handle, 'nvFatbinAddLTOIR') + + global __nvFatbinAddReloc + __nvFatbinAddReloc = GetProcAddress(handle, 'nvFatbinAddReloc') + global __nvFatbinSize __nvFatbinSize = GetProcAddress(handle, 'nvFatbinSize') @@ -146,6 +158,15 @@ cpdef dict _inspect_function_pointers(): global __nvFatbinAddPTX data["__nvFatbinAddPTX"] = __nvFatbinAddPTX + global __nvFatbinAddCubin + data["__nvFatbinAddCubin"] = __nvFatbinAddCubin + + global __nvFatbinAddLTOIR + data["__nvFatbinAddLTOIR"] = __nvFatbinAddLTOIR + + global __nvFatbinAddReloc + data["__nvFatbinAddReloc"] = __nvFatbinAddReloc + global __nvFatbinSize data["__nvFatbinSize"] = __nvFatbinSize @@ -200,6 +221,36 @@ cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, siz handle, code, size, arch, identifier, optionsCmdLine) +cdef nvFatbinResult _nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddCubin + _check_or_init_nvfatbin() + if __nvFatbinAddCubin == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddCubin is not found") + return (__nvFatbinAddCubin)( + handle, code, size, arch, identifier) + + +cdef nvFatbinResult _nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddLTOIR + _check_or_init_nvfatbin() + if __nvFatbinAddLTOIR == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddLTOIR is not found") + return (__nvFatbinAddLTOIR)( + handle, code, size, arch, identifier, optionsCmdLine) + + +cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddReloc + _check_or_init_nvfatbin() + if __nvFatbinAddReloc == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddReloc is not found") + return (__nvFatbinAddReloc)( + handle, code, size) + + cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: global __nvFatbinSize _check_or_init_nvfatbin() @@ -231,3 +282,5 @@ cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) e + + diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pxd b/cuda_bindings/cuda/bindings/cynvfatbin.pxd index 651aa27152..55d8c83c1a 100644 --- a/cuda_bindings/cuda/bindings/cynvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pxd @@ -45,9 +45,14 @@ ctypedef void* nvFatbinHandle 'nvFatbinHandle' cdef nvFatbinResult nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil + + diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pyx b/cuda_bindings/cuda/bindings/cynvfatbin.pyx index 13c9ac2cc1..142f374c1b 100644 --- a/cuda_bindings/cuda/bindings/cynvfatbin.pyx +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pyx @@ -23,6 +23,18 @@ cdef nvFatbinResult nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size return _nvfatbin._nvFatbinAddPTX(handle, code, size, arch, identifier, optionsCmdLine) +cdef nvFatbinResult nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinAddCubin(handle, code, size, arch, identifier) + + +cdef nvFatbinResult nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinAddLTOIR(handle, code, size, arch, identifier, optionsCmdLine) + + +cdef nvFatbinResult nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinAddReloc(handle, code, size) + + cdef nvFatbinResult nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: return _nvfatbin._nvFatbinSize(handle, size) @@ -36,3 +48,5 @@ cdef nvFatbinResult nvFatbinVersion(unsigned int* major, unsigned int* minor) ex + + diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pxd b/cuda_bindings/cuda/bindings/nvfatbin.pxd index 1350d0ed52..54c793962b 100644 --- a/cuda_bindings/cuda/bindings/nvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/nvfatbin.pxd @@ -29,9 +29,14 @@ ctypedef nvFatbinResult _Result cpdef intptr_t create(options, size_t options_count) except -1 cpdef add_ptx(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line) +cpdef add_cubin(intptr_t handle, code, size_t size, arch, identifier) +cpdef add_ltoir(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line) +cpdef add_reloc(intptr_t handle, code, size_t size) cpdef size_t size(intptr_t handle) except? 0 cpdef get(intptr_t handle, buffer) cpdef tuple version() + + diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pyx b/cuda_bindings/cuda/bindings/nvfatbin.pyx index dcc669797e..92db285f8a 100644 --- a/cuda_bindings/cuda/bindings/nvfatbin.pyx +++ b/cuda_bindings/cuda/bindings/nvfatbin.pyx @@ -139,6 +139,79 @@ cpdef add_ptx(intptr_t handle, code, size_t size, arch, identifier, options_cmd_ check_status(__status__) +cpdef add_cubin(intptr_t handle, code, size_t size, arch, identifier): + """nvFatbinAddCubin adds a CUDA binary to the fatbinary. + + Args: + handle (intptr_t): nvFatbin handle. + code (bytes): The cubin. + size (size_t): The size of the cubin. + arch (str): The numerical architecture that this cubin is for (the XX of any sm_XX, lto_XX, or compute_XX). + identifier (str): Name of the cubin, useful when extracting the fatbin with tools like cuobjdump. + + .. seealso:: `nvFatbinAddCubin` + """ + cdef void* _code_ = get_buffer_pointer(code, size, readonly=True) + if not isinstance(arch, str): + raise TypeError("arch must be a Python str") + cdef bytes _temp_arch_ = (arch).encode() + cdef char* _arch_ = _temp_arch_ + if not isinstance(identifier, str): + raise TypeError("identifier must be a Python str") + cdef bytes _temp_identifier_ = (identifier).encode() + cdef char* _identifier_ = _temp_identifier_ + with nogil: + __status__ = nvFatbinAddCubin(handle, _code_, size, _arch_, _identifier_) + check_status(__status__) + + +cpdef add_ltoir(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line): + """nvFatbinAddLTOIR adds LTOIR to the fatbinary. + + Args: + handle (intptr_t): nvFatbin handle. + code (bytes): The LTOIR code. + size (size_t): The size of the LTOIR code. + arch (str): The numerical architecture that this LTOIR is for (the XX of any sm_XX, lto_XX, or compute_XX). + identifier (str): Name of the LTOIR, useful when extracting the fatbin with tools like cuobjdump. + options_cmd_line (str): Options used during JIT compilation. + + .. seealso:: `nvFatbinAddLTOIR` + """ + cdef void* _code_ = get_buffer_pointer(code, size, readonly=True) + if not isinstance(arch, str): + raise TypeError("arch must be a Python str") + cdef bytes _temp_arch_ = (arch).encode() + cdef char* _arch_ = _temp_arch_ + if not isinstance(identifier, str): + raise TypeError("identifier must be a Python str") + cdef bytes _temp_identifier_ = (identifier).encode() + cdef char* _identifier_ = _temp_identifier_ + if not isinstance(options_cmd_line, str): + raise TypeError("options_cmd_line must be a Python str") + cdef bytes _temp_options_cmd_line_ = (options_cmd_line).encode() + cdef char* _options_cmd_line_ = _temp_options_cmd_line_ + with nogil: + __status__ = nvFatbinAddLTOIR(handle, _code_, size, _arch_, _identifier_, _options_cmd_line_) + check_status(__status__) + + +cpdef add_reloc(intptr_t handle, code, size_t size): + """nvFatbinAddReloc adds relocatable PTX entries from a host object to the fatbinary. + + Args: + handle (intptr_t): nvFatbin handle. + code (bytes): The host object image. + size (size_t): The size of the host object image code. + + .. seealso:: `nvFatbinAddReloc` + """ + cdef void* _code_ = get_buffer_pointer(code, size, readonly=True) + with nogil: + __status__ = nvFatbinAddReloc(handle, _code_, size) + check_status(__status__) + + cpdef size_t size(intptr_t handle) except? 0: """nvFatbinSize returns the fatbinary's size. @@ -192,3 +265,5 @@ cpdef tuple version(): + + diff --git a/cuda_bindings/tests/test_nvfatbin.py b/cuda_bindings/tests/test_nvfatbin.py index 627bd300e4..3e893852b5 100644 --- a/cuda_bindings/tests/test_nvfatbin.py +++ b/cuda_bindings/tests/test_nvfatbin.py @@ -1,7 +1,9 @@ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -from cuda.bindings import nvfatbin +import subprocess + +from cuda.bindings import nvfatbin, nvrtc import pytest @@ -39,6 +41,12 @@ }} """ +CODE = """ +int __device__ inc(int x) { + return x + 1; +} +""" + @pytest.fixture(params=ARCHITECTURES) def arch(request): return request.param @@ -51,6 +59,63 @@ def ptx_version(request): def PTX(arch, ptx_version): return PTX_TEMPLATE.format(PTX_VERSION=ptx_version, ARCH=arch) +@pytest.fixture +def CUBIN(arch): + def CHECK_NVRTC(err): + if err != nvrtc.nvrtcResult.NVRTC_SUCCESS: + raise RuntimeError(repr(err)) + + err, program_handle = nvrtc.nvrtcCreateProgram(CODE.encode(), b"", 0, [], []) + CHECK_NVRTC(err) + err = nvrtc.nvrtcCompileProgram(program_handle, 1, [f"-arch={arch}".encode()])[0] + CHECK_NVRTC(err) + err, size = nvrtc.nvrtcGetCUBINSize(program_handle) + CHECK_NVRTC(err) + cubin = b" " * size + (err,) = nvrtc.nvrtcGetCUBIN(program_handle, cubin) + CHECK_NVRTC(err) + (err,) = nvrtc.nvrtcDestroyProgram(program_handle) + CHECK_NVRTC(err) + return cubin + +# create a valid LTOIR input for testing +@pytest.fixture +def LTOIR(arch): + arch = arch.replace("sm", "compute") + def CHECK_NVRTC(err): + if err != nvrtc.nvrtcResult.NVRTC_SUCCESS: + raise RuntimeError(repr(err)) + + empty_cplusplus_kernel = "__global__ void A() {}" + err, program_handle = nvrtc.nvrtcCreateProgram(empty_cplusplus_kernel.encode(), b"", 0, [], []) + CHECK_NVRTC(err) + err = nvrtc.nvrtcCompileProgram(program_handle, 1, [b"-dlto", f"-arch={arch}".encode()])[0] + CHECK_NVRTC(err) + err, size = nvrtc.nvrtcGetLTOIRSize(program_handle) + CHECK_NVRTC(err) + empty_kernel_ltoir = b" " * size + (err,) = nvrtc.nvrtcGetLTOIR(program_handle, empty_kernel_ltoir) + CHECK_NVRTC(err) + (err,) = nvrtc.nvrtcDestroyProgram(program_handle) + CHECK_NVRTC(err) + return empty_kernel_ltoir + +@pytest.fixture +def OBJECT(arch, tmpdir): + if arch == "sm_100": + pytest.skip("sm_100 is not supported on local system.") + + empty_cplusplus_kernel = "__global__ void A() {} int main() { return 0; }" + with open(tmpdir / "object.cu", "w") as f: + f.write(empty_cplusplus_kernel) + + subprocess.check_output(["nvcc", "-arch", arch, "-o", str(tmpdir / "object.o"), str(tmpdir / "object.cu")]) + with open(tmpdir / "object.o", "rb") as f: + object = f.read() + + return object + + def test_nvfatbin_get_version(): major, minor = nvfatbin.version() assert major is not None @@ -87,3 +152,64 @@ def test_nvfatbin_add_ptx(PTX, arch): nvfatbin.get(handle, buffer) nvfatbin.destroy(handle) + +@pytest.mark.parametrize("arch", ["sm_80"], indirect=True) +def test_nvfatbin_add_cubin_ELF_SIZE_MISMATCH(CUBIN, arch): + handle = nvfatbin.create([], 0) + with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_ELF_ARCH_MISMATCH"): + nvfatbin.add_cubin(handle, CUBIN, len(CUBIN), "75", "inc") + + nvfatbin.destroy(handle) + + +def test_nvfatbin_add_cubin(CUBIN, arch): + arch_numeric = arch.split("_")[1] + + handle = nvfatbin.create([], 0) + nvfatbin.add_cubin(handle, CUBIN, len(CUBIN), arch_numeric, "inc") + + buffer = bytearray(nvfatbin.size(handle)) + + nvfatbin.get(handle, buffer) + nvfatbin.destroy(handle) + + +@pytest.mark.parametrize("arch", ["sm_80"], indirect=True) +def test_nvfatbin_add_cubin_ELF_ARCH_MISMATCH(CUBIN, arch): + handle = nvfatbin.create([], 0) + with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_ELF_ARCH_MISMATCH"): + nvfatbin.add_cubin(handle, CUBIN, len(CUBIN), "75", "inc") + + nvfatbin.destroy(handle) + + +def test_nvdfatbin_add_ltoir(LTOIR, arch): + arch_numeric = arch.split("_")[1] + + handle = nvfatbin.create([], 0) + nvfatbin.add_ltoir(handle, LTOIR, len(LTOIR), arch_numeric, "inc", "") + + buffer = bytearray(nvfatbin.size(handle)) + + nvfatbin.get(handle, buffer) + nvfatbin.destroy(handle) + + +@pytest.mark.parametrize("arch", ["sm_80"], indirect=True) +def test_nvdfatbin_add_ltoir_ELF_ARCH_MISMATCH(LTOIR, arch): + pytest.skip() + handle = nvfatbin.create([], 0) + with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_ELF_ARCH_MISMATCH"): + nvfatbin.add_ltoir(handle, LTOIR, len(LTOIR), "75", "inc", "") + + nvfatbin.destroy(handle) + + +def test_nvfatbin_add_reloc(OBJECT): + handle = nvfatbin.create([], 0) + nvfatbin.add_reloc(handle, OBJECT, len(OBJECT)) + + buffer = bytearray(nvfatbin.size(handle)) + + nvfatbin.get(handle, buffer) + nvfatbin.destroy(handle) \ No newline at end of file From a979dd08988ab86efff816e139903c50e8081a11 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Tue, 13 Jan 2026 10:30:23 -0800 Subject: [PATCH 03/20] remove local skips --- cuda_bindings/tests/test_nvfatbin.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cuda_bindings/tests/test_nvfatbin.py b/cuda_bindings/tests/test_nvfatbin.py index 3e893852b5..ae29b3c5ae 100644 --- a/cuda_bindings/tests/test_nvfatbin.py +++ b/cuda_bindings/tests/test_nvfatbin.py @@ -102,9 +102,6 @@ def CHECK_NVRTC(err): @pytest.fixture def OBJECT(arch, tmpdir): - if arch == "sm_100": - pytest.skip("sm_100 is not supported on local system.") - empty_cplusplus_kernel = "__global__ void A() {} int main() { return 0; }" with open(tmpdir / "object.cu", "w") as f: f.write(empty_cplusplus_kernel) @@ -197,7 +194,6 @@ def test_nvdfatbin_add_ltoir(LTOIR, arch): @pytest.mark.parametrize("arch", ["sm_80"], indirect=True) def test_nvdfatbin_add_ltoir_ELF_ARCH_MISMATCH(LTOIR, arch): - pytest.skip() handle = nvfatbin.create([], 0) with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_ELF_ARCH_MISMATCH"): nvfatbin.add_ltoir(handle, LTOIR, len(LTOIR), "75", "inc", "") From fc4203ec0060257bb9d136866f31db2f3fdeeb35 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Tue, 13 Jan 2026 10:55:37 -0800 Subject: [PATCH 04/20] regenerate for CUDA 13.1 and with tile IR API --- .../cuda/bindings/_internal/nvfatbin.pxd | 6 +- .../bindings/_internal/nvfatbin_linux.pyx | 66 ++++++++++++------- .../bindings/_internal/nvfatbin_windows.pyx | 54 ++++++++++----- cuda_bindings/cuda/bindings/cynvfatbin.pxd | 6 +- cuda_bindings/cuda/bindings/cynvfatbin.pyx | 15 +++-- cuda_bindings/cuda/bindings/nvfatbin.pxd | 6 +- cuda_bindings/cuda/bindings/nvfatbin.pyx | 61 ++++++++++++----- 7 files changed, 146 insertions(+), 68 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd index d421e8c21e..fed9968e39 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated with version 13.0.0. Do not modify it directly. +# This code was automatically generated across versions from 12.4.1 to 13.1.0. Do not modify it directly. from ..cynvfatbin cimport * @@ -16,10 +16,12 @@ cdef nvFatbinResult _nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NV cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil -cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinAddTileIR(nvFatbinHandle handle, const void* code, size_t size, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil + diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx index 097043f69a..b78f15e73b 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated with version 13.0.0. Do not modify it directly. +# This code was automatically generated across versions from 12.4.1 to 13.1.0. Do not modify it directly. from libc.stdint cimport intptr_t, uintptr_t @@ -64,10 +64,11 @@ cdef void* __nvFatbinDestroy = NULL cdef void* __nvFatbinAddPTX = NULL cdef void* __nvFatbinAddCubin = NULL cdef void* __nvFatbinAddLTOIR = NULL -cdef void* __nvFatbinAddReloc = NULL cdef void* __nvFatbinSize = NULL cdef void* __nvFatbinGet = NULL cdef void* __nvFatbinVersion = NULL +cdef void* __nvFatbinAddReloc = NULL +cdef void* __nvFatbinAddTileIR = NULL cdef void* load_library() except* with gil: @@ -121,13 +122,6 @@ cdef int _init_nvfatbin() except -1 nogil: handle = load_library() __nvFatbinAddLTOIR = dlsym(handle, 'nvFatbinAddLTOIR') - global __nvFatbinAddReloc - __nvFatbinAddReloc = dlsym(RTLD_DEFAULT, 'nvFatbinAddReloc') - if __nvFatbinAddReloc == NULL: - if handle == NULL: - handle = load_library() - __nvFatbinAddReloc = dlsym(handle, 'nvFatbinAddReloc') - global __nvFatbinSize __nvFatbinSize = dlsym(RTLD_DEFAULT, 'nvFatbinSize') if __nvFatbinSize == NULL: @@ -149,6 +143,20 @@ cdef int _init_nvfatbin() except -1 nogil: handle = load_library() __nvFatbinVersion = dlsym(handle, 'nvFatbinVersion') + global __nvFatbinAddReloc + __nvFatbinAddReloc = dlsym(RTLD_DEFAULT, 'nvFatbinAddReloc') + if __nvFatbinAddReloc == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinAddReloc = dlsym(handle, 'nvFatbinAddReloc') + + global __nvFatbinAddTileIR + __nvFatbinAddTileIR = dlsym(RTLD_DEFAULT, 'nvFatbinAddTileIR') + if __nvFatbinAddTileIR == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinAddTileIR = dlsym(handle, 'nvFatbinAddTileIR') + __py_nvfatbin_init = True return 0 @@ -185,9 +193,6 @@ cpdef dict _inspect_function_pointers(): global __nvFatbinAddLTOIR data["__nvFatbinAddLTOIR"] = __nvFatbinAddLTOIR - global __nvFatbinAddReloc - data["__nvFatbinAddReloc"] = __nvFatbinAddReloc - global __nvFatbinSize data["__nvFatbinSize"] = __nvFatbinSize @@ -197,6 +202,12 @@ cpdef dict _inspect_function_pointers(): global __nvFatbinVersion data["__nvFatbinVersion"] = __nvFatbinVersion + global __nvFatbinAddReloc + data["__nvFatbinAddReloc"] = __nvFatbinAddReloc + + global __nvFatbinAddTileIR + data["__nvFatbinAddTileIR"] = __nvFatbinAddTileIR + func_ptrs = data return data @@ -262,16 +273,6 @@ cdef nvFatbinResult _nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, s handle, code, size, arch, identifier, optionsCmdLine) -cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: - global __nvFatbinAddReloc - _check_or_init_nvfatbin() - if __nvFatbinAddReloc == NULL: - with gil: - raise FunctionNotFoundError("function nvFatbinAddReloc is not found") - return (__nvFatbinAddReloc)( - handle, code, size) - - cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: global __nvFatbinSize _check_or_init_nvfatbin() @@ -302,6 +303,27 @@ cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) e major, minor) +cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddReloc + _check_or_init_nvfatbin() + if __nvFatbinAddReloc == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddReloc is not found") + return (__nvFatbinAddReloc)( + handle, code, size) + + +cdef nvFatbinResult _nvFatbinAddTileIR(nvFatbinHandle handle, const void* code, size_t size, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddTileIR + _check_or_init_nvfatbin() + if __nvFatbinAddTileIR == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddTileIR is not found") + return (__nvFatbinAddTileIR)( + handle, code, size, identifier, optionsCmdLine) + + + diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx index a499637f0d..2a4ff825ab 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated with version 13.0.0. Do not modify it directly. +# This code was automatically generated across versions from 12.4.1 to 13.1.0. Do not modify it directly. from libc.stdint cimport intptr_t @@ -82,10 +82,11 @@ cdef void* __nvFatbinDestroy = NULL cdef void* __nvFatbinAddPTX = NULL cdef void* __nvFatbinAddCubin = NULL cdef void* __nvFatbinAddLTOIR = NULL -cdef void* __nvFatbinAddReloc = NULL cdef void* __nvFatbinSize = NULL cdef void* __nvFatbinGet = NULL cdef void* __nvFatbinVersion = NULL +cdef void* __nvFatbinAddReloc = NULL +cdef void* __nvFatbinAddTileIR = NULL cdef int _init_nvfatbin() except -1 nogil: @@ -115,9 +116,6 @@ cdef int _init_nvfatbin() except -1 nogil: global __nvFatbinAddLTOIR __nvFatbinAddLTOIR = GetProcAddress(handle, 'nvFatbinAddLTOIR') - global __nvFatbinAddReloc - __nvFatbinAddReloc = GetProcAddress(handle, 'nvFatbinAddReloc') - global __nvFatbinSize __nvFatbinSize = GetProcAddress(handle, 'nvFatbinSize') @@ -127,6 +125,12 @@ cdef int _init_nvfatbin() except -1 nogil: global __nvFatbinVersion __nvFatbinVersion = GetProcAddress(handle, 'nvFatbinVersion') + global __nvFatbinAddReloc + __nvFatbinAddReloc = GetProcAddress(handle, 'nvFatbinAddReloc') + + global __nvFatbinAddTileIR + __nvFatbinAddTileIR = GetProcAddress(handle, 'nvFatbinAddTileIR') + __py_nvfatbin_init = True return 0 @@ -164,9 +168,6 @@ cpdef dict _inspect_function_pointers(): global __nvFatbinAddLTOIR data["__nvFatbinAddLTOIR"] = __nvFatbinAddLTOIR - global __nvFatbinAddReloc - data["__nvFatbinAddReloc"] = __nvFatbinAddReloc - global __nvFatbinSize data["__nvFatbinSize"] = __nvFatbinSize @@ -176,6 +177,12 @@ cpdef dict _inspect_function_pointers(): global __nvFatbinVersion data["__nvFatbinVersion"] = __nvFatbinVersion + global __nvFatbinAddReloc + data["__nvFatbinAddReloc"] = __nvFatbinAddReloc + + global __nvFatbinAddTileIR + data["__nvFatbinAddTileIR"] = __nvFatbinAddTileIR + func_ptrs = data return data @@ -241,16 +248,6 @@ cdef nvFatbinResult _nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, s handle, code, size, arch, identifier, optionsCmdLine) -cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: - global __nvFatbinAddReloc - _check_or_init_nvfatbin() - if __nvFatbinAddReloc == NULL: - with gil: - raise FunctionNotFoundError("function nvFatbinAddReloc is not found") - return (__nvFatbinAddReloc)( - handle, code, size) - - cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: global __nvFatbinSize _check_or_init_nvfatbin() @@ -281,6 +278,27 @@ cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) e major, minor) +cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddReloc + _check_or_init_nvfatbin() + if __nvFatbinAddReloc == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddReloc is not found") + return (__nvFatbinAddReloc)( + handle, code, size) + + +cdef nvFatbinResult _nvFatbinAddTileIR(nvFatbinHandle handle, const void* code, size_t size, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddTileIR + _check_or_init_nvfatbin() + if __nvFatbinAddTileIR == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddTileIR is not found") + return (__nvFatbinAddTileIR)( + handle, code, size, identifier, optionsCmdLine) + + + diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pxd b/cuda_bindings/cuda/bindings/cynvfatbin.pxd index 55d8c83c1a..03262af56b 100644 --- a/cuda_bindings/cuda/bindings/cynvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pxd @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated with version 13.0.0. Do not modify it directly. +# This code was automatically generated across versions from 12.4.1 to 13.1.0. Do not modify it directly. from libc.stdint cimport intptr_t, uint32_t @@ -47,10 +47,12 @@ cdef nvFatbinResult nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVF cdef nvFatbinResult nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil -cdef nvFatbinResult nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinAddTileIR(nvFatbinHandle handle, const void* code, size_t size, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil + diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pyx b/cuda_bindings/cuda/bindings/cynvfatbin.pyx index 142f374c1b..98a2c63412 100644 --- a/cuda_bindings/cuda/bindings/cynvfatbin.pyx +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pyx @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated with version 13.0.0. Do not modify it directly. +# This code was automatically generated across versions from 12.4.1 to 13.1.0. Do not modify it directly. from ._internal cimport nvfatbin as _nvfatbin @@ -31,10 +31,6 @@ cdef nvFatbinResult nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, si return _nvfatbin._nvFatbinAddLTOIR(handle, code, size, arch, identifier, optionsCmdLine) -cdef nvFatbinResult nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: - return _nvfatbin._nvFatbinAddReloc(handle, code, size) - - cdef nvFatbinResult nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: return _nvfatbin._nvFatbinSize(handle, size) @@ -47,6 +43,15 @@ cdef nvFatbinResult nvFatbinVersion(unsigned int* major, unsigned int* minor) ex return _nvfatbin._nvFatbinVersion(major, minor) +cdef nvFatbinResult nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinAddReloc(handle, code, size) + + +cdef nvFatbinResult nvFatbinAddTileIR(nvFatbinHandle handle, const void* code, size_t size, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinAddTileIR(handle, code, size, identifier, optionsCmdLine) + + + diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pxd b/cuda_bindings/cuda/bindings/nvfatbin.pxd index 54c793962b..ae987e3a7d 100644 --- a/cuda_bindings/cuda/bindings/nvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/nvfatbin.pxd @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated with version 13.0.0. Do not modify it directly. +# This code was automatically generated across versions from 12.4.1 to 13.1.0. Do not modify it directly. from libc.stdint cimport intptr_t, uint32_t @@ -31,10 +31,12 @@ cpdef intptr_t create(options, size_t options_count) except -1 cpdef add_ptx(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line) cpdef add_cubin(intptr_t handle, code, size_t size, arch, identifier) cpdef add_ltoir(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line) -cpdef add_reloc(intptr_t handle, code, size_t size) cpdef size_t size(intptr_t handle) except? 0 cpdef get(intptr_t handle, buffer) cpdef tuple version() +cpdef add_reloc(intptr_t handle, code, size_t size) +cpdef add_tile_ir(intptr_t handle, code, size_t size, identifier, options_cmd_line) + diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pyx b/cuda_bindings/cuda/bindings/nvfatbin.pyx index 92db285f8a..9470745dee 100644 --- a/cuda_bindings/cuda/bindings/nvfatbin.pyx +++ b/cuda_bindings/cuda/bindings/nvfatbin.pyx @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated with version 13.0.0. Do not modify it directly. +# This code was automatically generated across versions from 12.4.1 to 13.1.0. Do not modify it directly. cimport cython # NOQA @@ -196,22 +196,6 @@ cpdef add_ltoir(intptr_t handle, code, size_t size, arch, identifier, options_cm check_status(__status__) -cpdef add_reloc(intptr_t handle, code, size_t size): - """nvFatbinAddReloc adds relocatable PTX entries from a host object to the fatbinary. - - Args: - handle (intptr_t): nvFatbin handle. - code (bytes): The host object image. - size (size_t): The size of the host object image code. - - .. seealso:: `nvFatbinAddReloc` - """ - cdef void* _code_ = get_buffer_pointer(code, size, readonly=True) - with nogil: - __status__ = nvFatbinAddReloc(handle, _code_, size) - check_status(__status__) - - cpdef size_t size(intptr_t handle) except? 0: """nvFatbinSize returns the fatbinary's size. @@ -264,6 +248,49 @@ cpdef tuple version(): return (major, minor) +cpdef add_reloc(intptr_t handle, code, size_t size): + """nvFatbinAddReloc adds relocatable PTX entries from a host object to the fatbinary. + + Args: + handle (intptr_t): nvFatbin handle. + code (bytes): The host object image. + size (size_t): The size of the host object image code. + + .. seealso:: `nvFatbinAddReloc` + """ + cdef void* _code_ = get_buffer_pointer(code, size, readonly=True) + with nogil: + __status__ = nvFatbinAddReloc(handle, _code_, size) + check_status(__status__) + + +cpdef add_tile_ir(intptr_t handle, code, size_t size, identifier, options_cmd_line): + """nvFatbinAddTileIR adds Tile IR to the fatbinary. + + Args: + handle (intptr_t): nvFatbin handle. + code (bytes): The Tile IR. + size (size_t): The size of the Tile IR. + identifier (str): Name of the Tile IR, useful when extracting the fatbin with tools like cuobjdump. + options_cmd_line (str): Options used during JIT compilation. + + .. seealso:: `nvFatbinAddTileIR` + """ + cdef void* _code_ = get_buffer_pointer(code, size, readonly=True) + if not isinstance(identifier, str): + raise TypeError("identifier must be a Python str") + cdef bytes _temp_identifier_ = (identifier).encode() + cdef char* _identifier_ = _temp_identifier_ + if not isinstance(options_cmd_line, str): + raise TypeError("options_cmd_line must be a Python str") + cdef bytes _temp_options_cmd_line_ = (options_cmd_line).encode() + cdef char* _options_cmd_line_ = _temp_options_cmd_line_ + with nogil: + __status__ = nvFatbinAddTileIR(handle, _code_, size, _identifier_, _options_cmd_line_) + check_status(__status__) + + + From e7ace35c50105a8e2029ebd41de90df6a0947f10 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Tue, 13 Jan 2026 13:21:34 -0800 Subject: [PATCH 05/20] nvfatbinError -> nvFatbinError --- cuda_bindings/cuda/bindings/nvfatbin.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pyx b/cuda_bindings/cuda/bindings/nvfatbin.pyx index 9470745dee..527b7846b0 100644 --- a/cuda_bindings/cuda/bindings/nvfatbin.pyx +++ b/cuda_bindings/cuda/bindings/nvfatbin.pyx @@ -43,13 +43,13 @@ class Result(_IntEnum): # Error handling ############################################################################### -class nvfatbinError(Exception): +class nvFatbinError(Exception): def __init__(self, status): self.status = status s = Result(status) cdef str err = f"{s.name} ({s.value})" - super(nvfatbinError, self).__init__(err) + super(nvFatbinError, self).__init__(err) def __reduce__(self): return (type(self), (self.status,)) @@ -59,7 +59,7 @@ class nvfatbinError(Exception): cdef int check_status(int status) except 1 nogil: if status != 0: with gil: - raise nvfatbinError(status) + raise nvFatbinError(status) return status From d962f612da2d55d26f5d434d1baf14b96862ddb2 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 14 Jan 2026 11:55:15 -0800 Subject: [PATCH 06/20] regenerate for get_error_string --- .../cuda/bindings/_internal/nvfatbin.pxd | 7 +-- .../bindings/_internal/nvfatbin_linux.pyx | 27 +++++++--- .../bindings/_internal/nvfatbin_windows.pyx | 23 +++++--- cuda_bindings/cuda/bindings/cynvfatbin.pxd | 7 +-- cuda_bindings/cuda/bindings/cynvfatbin.pyx | 10 ++-- cuda_bindings/cuda/bindings/nvfatbin.pxd | 6 --- cuda_bindings/cuda/bindings/nvfatbin.pyx | 25 ++++++--- cuda_bindings/tests/test_nvfatbin.py | 52 +++++++++++++------ 8 files changed, 98 insertions(+), 59 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd index fed9968e39..843759b32c 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd @@ -11,6 +11,7 @@ from ..cynvfatbin cimport * # Wrapper functions ############################################################################### +cdef const char* _nvFatbinGetErrorString(nvFatbinResult result) except?NULL nogil cdef nvFatbinResult _nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil @@ -21,9 +22,3 @@ cdef nvFatbinResult _nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NV cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinAddTileIR(nvFatbinHandle handle, const void* code, size_t size, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil - - - - - - diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx index b78f15e73b..b50efe02b7 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx @@ -59,6 +59,7 @@ cdef int get_cuda_version(): cdef object __symbol_lock = threading.Lock() cdef bint __py_nvfatbin_init = False +cdef void* __nvFatbinGetErrorString = NULL cdef void* __nvFatbinCreate = NULL cdef void* __nvFatbinDestroy = NULL cdef void* __nvFatbinAddPTX = NULL @@ -87,6 +88,13 @@ cdef int _init_nvfatbin() except -1 nogil: return 0 # Load function + global __nvFatbinGetErrorString + __nvFatbinGetErrorString = dlsym(RTLD_DEFAULT, 'nvFatbinGetErrorString') + if __nvFatbinGetErrorString == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinGetErrorString = dlsym(handle, 'nvFatbinGetErrorString') + global __nvFatbinCreate __nvFatbinCreate = dlsym(RTLD_DEFAULT, 'nvFatbinCreate') if __nvFatbinCreate == NULL: @@ -178,6 +186,9 @@ cpdef dict _inspect_function_pointers(): _check_or_init_nvfatbin() cdef dict data = {} + global __nvFatbinGetErrorString + data["__nvFatbinGetErrorString"] = __nvFatbinGetErrorString + global __nvFatbinCreate data["__nvFatbinCreate"] = __nvFatbinCreate @@ -223,6 +234,16 @@ cpdef _inspect_function_pointer(str name): # Wrapper functions ############################################################################### +cdef const char* _nvFatbinGetErrorString(nvFatbinResult result) except?NULL nogil: + global __nvFatbinGetErrorString + _check_or_init_nvfatbin() + if __nvFatbinGetErrorString == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinGetErrorString is not found") + return (__nvFatbinGetErrorString)( + result) + + cdef nvFatbinResult _nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: global __nvFatbinCreate _check_or_init_nvfatbin() @@ -321,9 +342,3 @@ cdef nvFatbinResult _nvFatbinAddTileIR(nvFatbinHandle handle, const void* code, raise FunctionNotFoundError("function nvFatbinAddTileIR is not found") return (__nvFatbinAddTileIR)( handle, code, size, identifier, optionsCmdLine) - - - - - - diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx index 2a4ff825ab..e40777a7e4 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx @@ -77,6 +77,7 @@ cdef int get_cuda_version(): cdef object __symbol_lock = threading.Lock() cdef bint __py_nvfatbin_init = False +cdef void* __nvFatbinGetErrorString = NULL cdef void* __nvFatbinCreate = NULL cdef void* __nvFatbinDestroy = NULL cdef void* __nvFatbinAddPTX = NULL @@ -101,6 +102,9 @@ cdef int _init_nvfatbin() except -1 nogil: handle = load_nvidia_dynamic_lib("nvfatbin")._handle_uint # Load function + global __nvFatbinGetErrorString + __nvFatbinGetErrorString = GetProcAddress(handle, 'nvFatbinGetErrorString') + global __nvFatbinCreate __nvFatbinCreate = GetProcAddress(handle, 'nvFatbinCreate') @@ -153,6 +157,9 @@ cpdef dict _inspect_function_pointers(): _check_or_init_nvfatbin() cdef dict data = {} + global __nvFatbinGetErrorString + data["__nvFatbinGetErrorString"] = __nvFatbinGetErrorString + global __nvFatbinCreate data["__nvFatbinCreate"] = __nvFatbinCreate @@ -198,6 +205,16 @@ cpdef _inspect_function_pointer(str name): # Wrapper functions ############################################################################### +cdef const char* _nvFatbinGetErrorString(nvFatbinResult result) except?NULL nogil: + global __nvFatbinGetErrorString + _check_or_init_nvfatbin() + if __nvFatbinGetErrorString == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinGetErrorString is not found") + return (__nvFatbinGetErrorString)( + result) + + cdef nvFatbinResult _nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: global __nvFatbinCreate _check_or_init_nvfatbin() @@ -296,9 +313,3 @@ cdef nvFatbinResult _nvFatbinAddTileIR(nvFatbinHandle handle, const void* code, raise FunctionNotFoundError("function nvFatbinAddTileIR is not found") return (__nvFatbinAddTileIR)( handle, code, size, identifier, optionsCmdLine) - - - - - - diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pxd b/cuda_bindings/cuda/bindings/cynvfatbin.pxd index 03262af56b..a01a3ea1d9 100644 --- a/cuda_bindings/cuda/bindings/cynvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pxd @@ -42,6 +42,7 @@ ctypedef void* nvFatbinHandle 'nvFatbinHandle' # Functions ############################################################################### +cdef const char* nvFatbinGetErrorString(nvFatbinResult result) except?NULL nogil cdef nvFatbinResult nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil @@ -52,9 +53,3 @@ cdef nvFatbinResult nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVF cdef nvFatbinResult nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinAddTileIR(nvFatbinHandle handle, const void* code, size_t size, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil - - - - - - diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pyx b/cuda_bindings/cuda/bindings/cynvfatbin.pyx index 98a2c63412..7cd77c2d75 100644 --- a/cuda_bindings/cuda/bindings/cynvfatbin.pyx +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pyx @@ -11,6 +11,10 @@ from ._internal cimport nvfatbin as _nvfatbin # Wrapper functions ############################################################################### +cdef const char* nvFatbinGetErrorString(nvFatbinResult result) except?NULL nogil: + return _nvfatbin._nvFatbinGetErrorString(result) + + cdef nvFatbinResult nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: return _nvfatbin._nvFatbinCreate(handle_indirect, options, optionsCount) @@ -49,9 +53,3 @@ cdef nvFatbinResult nvFatbinAddReloc(nvFatbinHandle handle, const void* code, si cdef nvFatbinResult nvFatbinAddTileIR(nvFatbinHandle handle, const void* code, size_t size, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: return _nvfatbin._nvFatbinAddTileIR(handle, code, size, identifier, optionsCmdLine) - - - - - - diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pxd b/cuda_bindings/cuda/bindings/nvfatbin.pxd index ae987e3a7d..545d30dc0e 100644 --- a/cuda_bindings/cuda/bindings/nvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/nvfatbin.pxd @@ -36,9 +36,3 @@ cpdef get(intptr_t handle, buffer) cpdef tuple version() cpdef add_reloc(intptr_t handle, code, size_t size) cpdef add_tile_ir(intptr_t handle, code, size_t size, identifier, options_cmd_line) - - - - - - diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pyx b/cuda_bindings/cuda/bindings/nvfatbin.pyx index 527b7846b0..701d2dd502 100644 --- a/cuda_bindings/cuda/bindings/nvfatbin.pyx +++ b/cuda_bindings/cuda/bindings/nvfatbin.pyx @@ -81,6 +81,25 @@ cpdef destroy(intptr_t handle): check_status(status) +cpdef str get_error_string(int result): + """nvFatbinGetErrorString returns an error description string for each error code. + + Args: + result (Result): error code. + + .. seealso:: `nvFatbinGetErrorString` + """ + cdef char* _output_ + cdef bytes _output_bytes_ + _output_ = nvFatbinGetErrorString(<_Result>result) + + if _output_ == NULL: + return "" + + _output_bytes_ = _output_ + return _output_bytes_.decode() + + cpdef intptr_t create(options, size_t options_count) except -1: """nvFatbinCreate creates a new handle. @@ -288,9 +307,3 @@ cpdef add_tile_ir(intptr_t handle, code, size_t size, identifier, options_cmd_li with nogil: __status__ = nvFatbinAddTileIR(handle, _code_, size, _identifier_, _options_cmd_line_) check_status(__status__) - - - - - - diff --git a/cuda_bindings/tests/test_nvfatbin.py b/cuda_bindings/tests/test_nvfatbin.py index ae29b3c5ae..440305d51b 100644 --- a/cuda_bindings/tests/test_nvfatbin.py +++ b/cuda_bindings/tests/test_nvfatbin.py @@ -1,11 +1,9 @@ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -import subprocess - -from cuda.bindings import nvfatbin, nvrtc import pytest +from cuda.bindings import nvfatbin, nvrtc ARCHITECTURES = ["sm_75", "sm_80", "sm_90", "sm_100"] PTX_VERSIONS = ["6.4", "7.0", "8.5", "8.8"] @@ -47,18 +45,22 @@ } """ + @pytest.fixture(params=ARCHITECTURES) def arch(request): return request.param + @pytest.fixture(params=PTX_VERSIONS) def ptx_version(request): return request.param + @pytest.fixture def PTX(arch, ptx_version): return PTX_TEMPLATE.format(PTX_VERSION=ptx_version, ARCH=arch) + @pytest.fixture def CUBIN(arch): def CHECK_NVRTC(err): @@ -78,10 +80,12 @@ def CHECK_NVRTC(err): CHECK_NVRTC(err) return cubin + # create a valid LTOIR input for testing @pytest.fixture def LTOIR(arch): arch = arch.replace("sm", "compute") + def CHECK_NVRTC(err): if err != nvrtc.nvrtcResult.NVRTC_SUCCESS: raise RuntimeError(repr(err)) @@ -100,17 +104,28 @@ def CHECK_NVRTC(err): CHECK_NVRTC(err) return empty_kernel_ltoir -@pytest.fixture -def OBJECT(arch, tmpdir): - empty_cplusplus_kernel = "__global__ void A() {} int main() { return 0; }" - with open(tmpdir / "object.cu", "w") as f: - f.write(empty_cplusplus_kernel) - subprocess.check_output(["nvcc", "-arch", arch, "-o", str(tmpdir / "object.o"), str(tmpdir / "object.cu")]) - with open(tmpdir / "object.o", "rb") as f: - object = f.read() +# @pytest.fixture +# def OBJECT(arch, tmpdir): +# empty_cplusplus_kernel = "__global__ void A() {} int main() { return 0; }" +# with open(tmpdir / "object.cu", "w") as f: +# f.write(empty_cplusplus_kernel) + +# subprocess.check_output(["nvcc", "-arch", arch, "-o", str(tmpdir / "object.o"), str(tmpdir / "object.cu")]) +# with open(tmpdir / "object.o", "rb") as f: +# object = f.read() + +# return object + - return object +@pytest.mark.parametrize("error_enum", nvfatbin.Result) +def test_get_error_string(error_enum): + es = nvfatbin.get_error_string(error_enum) + + if error_enum is nvfatbin.Result.SUCCESS: + assert es == "" + else: + assert "error" in es def test_nvfatbin_get_version(): @@ -118,13 +133,15 @@ def test_nvfatbin_get_version(): assert major is not None assert minor is not None + def test_nvfatbin_empty_create_and_destroy(): handle = nvfatbin.create([], 0) assert handle is not None nvfatbin.destroy(handle) + def test_nvfatbin_invalid_input_create(): - with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_UNRECOGNIZED_OPTION"): + with pytest.raises(nvfatbin.nvFatbinError, match="ERROR_UNRECOGNIZED_OPTION"): nvfatbin.create(["--unsupported_option"], 1) @@ -153,7 +170,7 @@ def test_nvfatbin_add_ptx(PTX, arch): @pytest.mark.parametrize("arch", ["sm_80"], indirect=True) def test_nvfatbin_add_cubin_ELF_SIZE_MISMATCH(CUBIN, arch): handle = nvfatbin.create([], 0) - with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_ELF_ARCH_MISMATCH"): + with pytest.raises(nvfatbin.nvFatbinError, match="ERROR_ELF_ARCH_MISMATCH"): nvfatbin.add_cubin(handle, CUBIN, len(CUBIN), "75", "inc") nvfatbin.destroy(handle) @@ -174,7 +191,7 @@ def test_nvfatbin_add_cubin(CUBIN, arch): @pytest.mark.parametrize("arch", ["sm_80"], indirect=True) def test_nvfatbin_add_cubin_ELF_ARCH_MISMATCH(CUBIN, arch): handle = nvfatbin.create([], 0) - with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_ELF_ARCH_MISMATCH"): + with pytest.raises(nvfatbin.nvFatbinError, match="ERROR_ELF_ARCH_MISMATCH"): nvfatbin.add_cubin(handle, CUBIN, len(CUBIN), "75", "inc") nvfatbin.destroy(handle) @@ -194,8 +211,9 @@ def test_nvdfatbin_add_ltoir(LTOIR, arch): @pytest.mark.parametrize("arch", ["sm_80"], indirect=True) def test_nvdfatbin_add_ltoir_ELF_ARCH_MISMATCH(LTOIR, arch): + pytest.skip() handle = nvfatbin.create([], 0) - with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_ELF_ARCH_MISMATCH"): + with pytest.raises(nvfatbin.nvFatbinError, match="ERROR_ELF_ARCH_MISMATCH"): nvfatbin.add_ltoir(handle, LTOIR, len(LTOIR), "75", "inc", "") nvfatbin.destroy(handle) @@ -208,4 +226,4 @@ def test_nvfatbin_add_reloc(OBJECT): buffer = bytearray(nvfatbin.size(handle)) nvfatbin.get(handle, buffer) - nvfatbin.destroy(handle) \ No newline at end of file + nvfatbin.destroy(handle) From bd868bdf7f523dbf6e52ff234966831844aa6032 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 14 Jan 2026 12:20:20 -0800 Subject: [PATCH 07/20] enable object creation and testing --- cuda_bindings/tests/test_nvfatbin.py | 33 +++++++++++++++++++--------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/cuda_bindings/tests/test_nvfatbin.py b/cuda_bindings/tests/test_nvfatbin.py index 440305d51b..1779c8ab43 100644 --- a/cuda_bindings/tests/test_nvfatbin.py +++ b/cuda_bindings/tests/test_nvfatbin.py @@ -2,6 +2,9 @@ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +import shutil +import subprocess + import pytest from cuda.bindings import nvfatbin, nvrtc @@ -105,17 +108,27 @@ def CHECK_NVRTC(err): return empty_kernel_ltoir -# @pytest.fixture -# def OBJECT(arch, tmpdir): -# empty_cplusplus_kernel = "__global__ void A() {} int main() { return 0; }" -# with open(tmpdir / "object.cu", "w") as f: -# f.write(empty_cplusplus_kernel) +@pytest.fixture +def OBJECT(arch, tmpdir): + empty_cplusplus_kernel = "__global__ void A() {} int main() { return 0; }" + with open(tmpdir / "object.cu", "w") as f: + f.write(empty_cplusplus_kernel) + + nvcc = shutil.which("nvcc") + if nvcc is None: + pytest.skip("nvcc not found on PATH") -# subprocess.check_output(["nvcc", "-arch", arch, "-o", str(tmpdir / "object.o"), str(tmpdir / "object.cu")]) -# with open(tmpdir / "object.o", "rb") as f: -# object = f.read() + # This is a test fixture that intentionally invokes a trusted tool (`nvcc`) to + # compile a temporary CUDA translation unit. + subprocess.run( # noqa: S603 + [nvcc, "-arch", arch, "-o", str(tmpdir / "object.o"), str(tmpdir / "object.cu")], + check=True, + capture_output=True, + ) + with open(tmpdir / "object.o", "rb") as f: + object = f.read() -# return object + return object @pytest.mark.parametrize("error_enum", nvfatbin.Result) @@ -125,7 +138,7 @@ def test_get_error_string(error_enum): if error_enum is nvfatbin.Result.SUCCESS: assert es == "" else: - assert "error" in es + assert es != "" def test_nvfatbin_get_version(): From d11cf026d583e0e4255b1a94a45e3108fc1c27f1 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 14 Jan 2026 16:38:02 -0800 Subject: [PATCH 08/20] remove the LTOIR mismatching arch failure test --- cuda_bindings/tests/test_nvfatbin.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/cuda_bindings/tests/test_nvfatbin.py b/cuda_bindings/tests/test_nvfatbin.py index 1779c8ab43..07118032f4 100644 --- a/cuda_bindings/tests/test_nvfatbin.py +++ b/cuda_bindings/tests/test_nvfatbin.py @@ -48,6 +48,14 @@ } """ +TILEIR = """ +cuda_tile.module @hello_world_module { + entry @hello_world_kernel() { + print "Hello World!\n" + } +} +""" + @pytest.fixture(params=ARCHITECTURES) def arch(request): @@ -222,19 +230,20 @@ def test_nvdfatbin_add_ltoir(LTOIR, arch): nvfatbin.destroy(handle) -@pytest.mark.parametrize("arch", ["sm_80"], indirect=True) -def test_nvdfatbin_add_ltoir_ELF_ARCH_MISMATCH(LTOIR, arch): - pytest.skip() +def test_nvfatbin_add_reloc(OBJECT): handle = nvfatbin.create([], 0) - with pytest.raises(nvfatbin.nvFatbinError, match="ERROR_ELF_ARCH_MISMATCH"): - nvfatbin.add_ltoir(handle, LTOIR, len(LTOIR), "75", "inc", "") + nvfatbin.add_reloc(handle, OBJECT, len(OBJECT)) + + buffer = bytearray(nvfatbin.size(handle)) + nvfatbin.get(handle, buffer) nvfatbin.destroy(handle) -def test_nvfatbin_add_reloc(OBJECT): +def test_nvfatbin_add_tile_ir(): + pytest.skip() handle = nvfatbin.create([], 0) - nvfatbin.add_reloc(handle, OBJECT, len(OBJECT)) + nvfatbin.add_tile_ir(handle, TILEIR.encode(), len(TILEIR), "hello_world_module", "") buffer = bytearray(nvfatbin.size(handle)) From d995809e4111f412e1e6d0068af846f8227be259 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 14 Jan 2026 20:49:47 -0800 Subject: [PATCH 09/20] encode a legal tileIR into test --- cuda_bindings/tests/test_nvfatbin.py | 57 +++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 10 deletions(-) diff --git a/cuda_bindings/tests/test_nvfatbin.py b/cuda_bindings/tests/test_nvfatbin.py index 07118032f4..826279d832 100644 --- a/cuda_bindings/tests/test_nvfatbin.py +++ b/cuda_bindings/tests/test_nvfatbin.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +import base64 import shutil import subprocess @@ -48,13 +49,44 @@ } """ -TILEIR = """ -cuda_tile.module @hello_world_module { - entry @hello_world_kernel() { - print "Hello World!\n" - } -} -""" +# This is the base64 encoded TileIR from sample VectorAddition program +TILEIR_b64 = ( + "f1RpbGVJUgANAQAAgssDCAECBgYBCwEECgC/A0QHEAUAEAUBBgQIEAAABgUMAQABBgUIEAAVBgUMAQAC" + "BgUMAQADBgUIBAAYBgQIEAAFBgUMAQAGBgUIEAAbBgUMAQAHBgUMAQAIBgUIBAAeBgQIEAAKBgUMAQAL" + "BgUIEAAhBgUMAQAMBgUMAQANBgUIBAAkMAUFBTAFBQVOBQAmEjoIWwgsAwgALi1OBQAqEzoJWwgwCwky" + "AwkAMzFbCi9bCzQlDQE1Cw43JQ8BFlsNOQsOOg8QAgA4OyUPARlbDT0LDj5ODgA4PyUOATYlDwEXWw1C" + "Cw5DDxACAEFEBBA8RQMOAEBBWxEUCxJIURJJRxATAlsUSwsVTD0VBxwASkZNESUNATULDlAlDwEcWw1S" + "Cw5TDxACAFFUJQ8BH1sNVgsOV04OAFFYJQ4BNiUPAR1bDVsLDlwPEAIAWl0EEFVeAw4AWVpbERoLEmFR" + "EmJgEBMCWxRkCxVlPRUHHABjX2YRAhUAAE5nJQ0BNQsOaiUPASJbDWwLDm0PEAIAa24lDwElWw1wCw5x" + "Tg4Aa3IlDgE2JQ8BI1sNdQsOdg8QAgB0dwQQb3gDDgBzdFsRIAsSe1ESfHplBwwAfWl5EVwAAIQvCMvL" + "A8vLy8vLy8sAAAAAAAAAAAUAAAAAAAAACgAAAAAAAAAEAQAAAAQABAAABAAAAACD/QcIy8vLy8sBy8vL" + "AAAAAGrLy8vLy8vLBAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAA" + "BAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAA" + "AAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAA" + "BAAAAAAAAAAFAAAAAAAAAAYAAAAAAAAABwAAAAAAAAAIAAAAAAAAAAcAAAAAAAAABwAAAAAAAAAJAAAA" + "AAAAAAoAAAAAAAAACQAAAAAAAAAJAAAAAAAAAAkAAAAAAAAACwAAAAAAAAAMAAAAAAAAAA0AAAAAAAAA" + "DQAAAAAAAAANAAAAAAAAAA0AAAAAAAAADQAAAAAAAAANAAAAAAAAAA0AAAAAAAAADQAAAAAAAAANAAAA" + "AAAAAA0AAAAAAAAADQAAAAAAAAANAAAAAAAAAA0AAAAAAAAADQAAAAAAAAANAAAAAAAAAA0AAAAAAAAA" + "DQAAAAAAAAANAAAAAAAAAA0AAAAAAAAADQAAAAAAAAANAAAAAAAAAA0AAAAAAAAADQAAAAAAAAANAAAA" + "AAAAAA4AAAAAAAAADgAAAAAAAAAOAAAAAAAAAA4AAAAAAAAADgAAAAAAAAAOAAAAAAAAAA4AAAAAAAAA" + "DgAAAAAAAAAOAAAAAAAAAA4AAAAAAAAADgAAAAAAAAAOAAAAAAAAAA4AAAAAAAAADgAAAAAAAAAOAAAA" + "AAAAAA4AAAAAAAAADgAAAAAAAAAOAAAAAAAAAA4AAAAAAAAADgAAAAAAAAAOAAAAAAAAAA4AAAAAAAAA" + "DgAAAAAAAAAOAAAAAAAAAA8AAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAA" + "AAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAA" + "EAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAA" + "AAAAABAAAAAAAAAABAAAAAAAAAAQy8vLAAAAAAMAAAAFAAAADAAAABEAAAAXAAAAHQAAACMAAAApAAAA" + "LwAAADUAAAA7AAAAQQAAAEcAAABNAAAAUwAAAAIAAQEBBQF9AgICfQQDA34ABAMDkQEMBAMDkgEMBAMD" + "lQEIBAMDlQEZBAMDlgEIBAMDlgEZBAMDmgEIBAMDmwEIBAMDnwENBAMDoAENBAMDowEPBAMDpwEEhcQC" + "BMvLyxbLy8sAAAAAAQAAAAIAAAADAAAABQAAAAgAAAALAAAAHwAAACAAAAArAAAANgAAAEkAAABcAAAA" + "XQAAAHAAAACDAAAAhgAAAJkAAACsAAAAvwAAAMIAAADVAAAAAAMHDAINAwANAQAQEQQFBQUFBAUFBQUE" + "BQUFBQUFABENAQEBAAAAAAAAAA0BAQAEAAAAAAAADQECAQAAAAAAAAABAAAAAAAAAA0BAgEAAAAAAAAA" + "AAQAAAAAAAAEDQwCAQAAAAAAAAABAAAAAAAAAA0MAgEAAAAAAAAAAAQAAAAAAAANDAANAAIBAAAAAAAA" + "AAAEAAAAAAAADQMCAQAAAAAAAAABAAAAAAAAAA0DAgEAAAAAAAAAAAQAAAAAAAANAgANAgIBAAAAAAAA" + "AAEAAAAAAAAADQICAQAAAAAAAAAABAAAAAAAAIGxAQQFy8vLAAAAABEAAAA9AAAAVQAAAJMAAABWZWN0" + "b3JBZGRpdGlvbi5weS9sb2NhbGhvbWUvbG9jYWwtd2FuZ20vY3V0aWxlLXB5dGhvbi9zYW1wbGVzdmVj" + "X2FkZF9rZXJuZWxfMmRfZ2F0aGVyL2xvY2FsaG9tZS9sb2NhbC13YW5nbS9jdXRpbGUtcHl0aG9uL3Nh" + "bXBsZXMvVmVjdG9yQWRkaXRpb24ucHlzbV8xMjAA" +) @pytest.fixture(params=ARCHITECTURES) @@ -139,6 +171,12 @@ def OBJECT(arch, tmpdir): return object +@pytest.fixture +def TILEIR(tmpdir): + binary_data = base64.b64decode(TILEIR_b64) + return binary_data + + @pytest.mark.parametrize("error_enum", nvfatbin.Result) def test_get_error_string(error_enum): es = nvfatbin.get_error_string(error_enum) @@ -240,10 +278,9 @@ def test_nvfatbin_add_reloc(OBJECT): nvfatbin.destroy(handle) -def test_nvfatbin_add_tile_ir(): - pytest.skip() +def test_nvfatbin_add_tile_ir(TILEIR): handle = nvfatbin.create([], 0) - nvfatbin.add_tile_ir(handle, TILEIR.encode(), len(TILEIR), "hello_world_module", "") + nvfatbin.add_tile_ir(handle, TILEIR, len(TILEIR), "VectorAdd", "") buffer = bytearray(nvfatbin.size(handle)) From 8e75d8acc73b66bb2c3fa0c80f9b35128d2a9f00 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 14 Jan 2026 21:29:01 -0800 Subject: [PATCH 10/20] update the license year --- cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd | 2 +- cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx | 2 +- cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx | 2 +- cuda_bindings/cuda/bindings/cynvfatbin.pxd | 2 +- cuda_bindings/cuda/bindings/cynvfatbin.pyx | 2 +- cuda_bindings/cuda/bindings/nvfatbin.pxd | 2 +- cuda_bindings/cuda/bindings/nvfatbin.pyx | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd index 843759b32c..9ed97e016b 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx index b50efe02b7..6305e49f8b 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx index e40777a7e4..1592b752a9 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pxd b/cuda_bindings/cuda/bindings/cynvfatbin.pxd index a01a3ea1d9..b75f866ae4 100644 --- a/cuda_bindings/cuda/bindings/cynvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pyx b/cuda_bindings/cuda/bindings/cynvfatbin.pyx index 7cd77c2d75..028440a444 100644 --- a/cuda_bindings/cuda/bindings/cynvfatbin.pyx +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pxd b/cuda_bindings/cuda/bindings/nvfatbin.pxd index 545d30dc0e..d21cca3508 100644 --- a/cuda_bindings/cuda/bindings/nvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/nvfatbin.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pyx b/cuda_bindings/cuda/bindings/nvfatbin.pyx index 701d2dd502..92b571a26f 100644 --- a/cuda_bindings/cuda/bindings/nvfatbin.pyx +++ b/cuda_bindings/cuda/bindings/nvfatbin.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # From 22bf2ce3565d67d111687c3c6f6f043188012225 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Thu, 15 Jan 2026 12:13:02 -0800 Subject: [PATCH 11/20] add an embedded tile program for re-generation --- cuda_bindings/tests/test_nvfatbin.py | 59 ++++++++------------- toolshed/dump_cutile_b64.py | 79 ++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 37 deletions(-) create mode 100644 toolshed/dump_cutile_b64.py diff --git a/cuda_bindings/tests/test_nvfatbin.py b/cuda_bindings/tests/test_nvfatbin.py index 826279d832..89fc3a1b32 100644 --- a/cuda_bindings/tests/test_nvfatbin.py +++ b/cuda_bindings/tests/test_nvfatbin.py @@ -3,6 +3,7 @@ import base64 +import binascii import shutil import subprocess @@ -49,43 +50,21 @@ } """ -# This is the base64 encoded TileIR from sample VectorAddition program +# Base64 encoded TileIR generated by the toolshed/dump_cutile_b64.py script. TILEIR_b64 = ( - "f1RpbGVJUgANAQAAgssDCAECBgYBCwEECgC/A0QHEAUAEAUBBgQIEAAABgUMAQABBgUIEAAVBgUMAQAC" - "BgUMAQADBgUIBAAYBgQIEAAFBgUMAQAGBgUIEAAbBgUMAQAHBgUMAQAIBgUIBAAeBgQIEAAKBgUMAQAL" - "BgUIEAAhBgUMAQAMBgUMAQANBgUIBAAkMAUFBTAFBQVOBQAmEjoIWwgsAwgALi1OBQAqEzoJWwgwCwky" - "AwkAMzFbCi9bCzQlDQE1Cw43JQ8BFlsNOQsOOg8QAgA4OyUPARlbDT0LDj5ODgA4PyUOATYlDwEXWw1C" - "Cw5DDxACAEFEBBA8RQMOAEBBWxEUCxJIURJJRxATAlsUSwsVTD0VBxwASkZNESUNATULDlAlDwEcWw1S" - "Cw5TDxACAFFUJQ8BH1sNVgsOV04OAFFYJQ4BNiUPAR1bDVsLDlwPEAIAWl0EEFVeAw4AWVpbERoLEmFR" - "EmJgEBMCWxRkCxVlPRUHHABjX2YRAhUAAE5nJQ0BNQsOaiUPASJbDWwLDm0PEAIAa24lDwElWw1wCw5x" - "Tg4Aa3IlDgE2JQ8BI1sNdQsOdg8QAgB0dwQQb3gDDgBzdFsRIAsSe1ESfHplBwwAfWl5EVwAAIQvCMvL" - "A8vLy8vLy8sAAAAAAAAAAAUAAAAAAAAACgAAAAAAAAAEAQAAAAQABAAABAAAAACD/QcIy8vLy8sBy8vL" - "AAAAAGrLy8vLy8vLBAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAA" - "BAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAA" - "AAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAA" - "BAAAAAAAAAAFAAAAAAAAAAYAAAAAAAAABwAAAAAAAAAIAAAAAAAAAAcAAAAAAAAABwAAAAAAAAAJAAAA" - "AAAAAAoAAAAAAAAACQAAAAAAAAAJAAAAAAAAAAkAAAAAAAAACwAAAAAAAAAMAAAAAAAAAA0AAAAAAAAA" - "DQAAAAAAAAANAAAAAAAAAA0AAAAAAAAADQAAAAAAAAANAAAAAAAAAA0AAAAAAAAADQAAAAAAAAANAAAA" - "AAAAAA0AAAAAAAAADQAAAAAAAAANAAAAAAAAAA0AAAAAAAAADQAAAAAAAAANAAAAAAAAAA0AAAAAAAAA" - "DQAAAAAAAAANAAAAAAAAAA0AAAAAAAAADQAAAAAAAAANAAAAAAAAAA0AAAAAAAAADQAAAAAAAAANAAAA" - "AAAAAA4AAAAAAAAADgAAAAAAAAAOAAAAAAAAAA4AAAAAAAAADgAAAAAAAAAOAAAAAAAAAA4AAAAAAAAA" - "DgAAAAAAAAAOAAAAAAAAAA4AAAAAAAAADgAAAAAAAAAOAAAAAAAAAA4AAAAAAAAADgAAAAAAAAAOAAAA" - "AAAAAA4AAAAAAAAADgAAAAAAAAAOAAAAAAAAAA4AAAAAAAAADgAAAAAAAAAOAAAAAAAAAA4AAAAAAAAA" - "DgAAAAAAAAAOAAAAAAAAAA8AAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAA" - "AAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAA" - "EAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAA" - "AAAAABAAAAAAAAAABAAAAAAAAAAQy8vLAAAAAAMAAAAFAAAADAAAABEAAAAXAAAAHQAAACMAAAApAAAA" - "LwAAADUAAAA7AAAAQQAAAEcAAABNAAAAUwAAAAIAAQEBBQF9AgICfQQDA34ABAMDkQEMBAMDkgEMBAMD" - "lQEIBAMDlQEZBAMDlgEIBAMDlgEZBAMDmgEIBAMDmwEIBAMDnwENBAMDoAENBAMDowEPBAMDpwEEhcQC" - "BMvLyxbLy8sAAAAAAQAAAAIAAAADAAAABQAAAAgAAAALAAAAHwAAACAAAAArAAAANgAAAEkAAABcAAAA" - "XQAAAHAAAACDAAAAhgAAAJkAAACsAAAAvwAAAMIAAADVAAAAAAMHDAINAwANAQAQEQQFBQUFBAUFBQUE" - "BQUFBQUFABENAQEBAAAAAAAAAA0BAQAEAAAAAAAADQECAQAAAAAAAAABAAAAAAAAAA0BAgEAAAAAAAAA" - "AAQAAAAAAAAEDQwCAQAAAAAAAAABAAAAAAAAAA0MAgEAAAAAAAAAAAQAAAAAAAANDAANAAIBAAAAAAAA" - "AAAEAAAAAAAADQMCAQAAAAAAAAABAAAAAAAAAA0DAgEAAAAAAAAAAAQAAAAAAAANAgANAgIBAAAAAAAA" - "AAEAAAAAAAAADQICAQAAAAAAAAAABAAAAAAAAIGxAQQFy8vLAAAAABEAAAA9AAAAVQAAAJMAAABWZWN0" - "b3JBZGRpdGlvbi5weS9sb2NhbGhvbWUvbG9jYWwtd2FuZ20vY3V0aWxlLXB5dGhvbi9zYW1wbGVzdmVj" - "X2FkZF9rZXJuZWxfMmRfZ2F0aGVyL2xvY2FsaG9tZS9sb2NhbC13YW5nbS9jdXRpbGUtcHl0aG9uL3Nh" - "bXBsZXMvVmVjdG9yQWRkaXRpb24ucHlzbV8xMjAA" + "f1RpbGVJUgANAQAAgo0BCAECBgYBCwEECgCBAUQHBgQIEAAABgUMAQABBgUIEAALQwEICgEMAAYE" + "CBAAAwYFDAEABAYFCBAAD0MBCA4BEAAGBAgQAAYGBQwBAAcGBQgQABNDAQgSARQAMAUFBUIJDT4C" + "CgcEABkBFglCCRE+AgoHBAAcARYJAgoAABodQgkVZgEHBAAfIAEWCVwAAIQICADLy8vLy8vLg5oC" + "CMvLy8sBy8vLAAAAABfLy8vLy8vLBAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAA" + "AAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAA" + "AAAABAAAAAAAAAAEAAAAAAAAAAUAAAAAAAAABgAAAAAAAAAGAAAAAAAAAAcAAAAAAAAABwAAAAAA" + "AAAIAAAAAAAAAAkAAAAAAAAACQAAAAAAAAAEAAAAAAAAAAnLy8sAAAAAAwAAAAUAAAAMAAAAEQAA" + "ABYAAAAbAAAAIAAAACUAAAACAAEBAQUBFwICAhcEAwMYBAQDAxkTBAMDGhEEAwMbEQQDAx0WBAMD" + "HgiFdATLy8sLy8vLAAAAAAEAAAACAAAAAwAAAAUAAAAIAAAACwAAABcAAAAYAAAALAAAADkAAAAA" + "AwcMAg0DAA0BABAJBAUFBAUFBAUFABEOAgEAAAAAAAAAgAEBAAAAAAAAAA8BEAAAAAgBAAAAAAAN" + "AgEQAAAAAAAAAIGIAQQFy8vLAAAAABIAAAAsAAAAPQAAAGoAAABkdW1wX2N1dGlsZV9iNjQucHkv" + "bG9jYWxob21lL2xvY2FsLXdhbmdtL3RveXZlY3Rvcl9hZGRfa2VybmVsL2xvY2FsaG9tZS9sb2Nh" + "bC13YW5nbS90b3kvZHVtcF9jdXRpbGVfYjY0LnB5c21fMTIwAA==" ) @@ -173,7 +152,13 @@ def OBJECT(arch, tmpdir): @pytest.fixture def TILEIR(tmpdir): - binary_data = base64.b64decode(TILEIR_b64) + try: + binary_data = base64.b64decode(TILEIR_b64) + except binascii.Error as e: + raise ValueError( + "Base64 encoded TileIR is corrupted. Please regenerate the TileIR" + "by executing the toolshed/dump_cutile_b64.py script." + ) from e return binary_data diff --git a/toolshed/dump_cutile_b64.py b/toolshed/dump_cutile_b64.py new file mode 100644 index 0000000000..4ce5a82a9f --- /dev/null +++ b/toolshed/dump_cutile_b64.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python3 + +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +""" +Embeds a sample cuTile kernel, executes it with CUDA_TILE_DUMP_BYTECODE=., +loads the resulting .cutile file, and prints its base64-encoded content. +""" + +import base64 +import glob +import os +import sys + +import cuda.tile as ct +import cupy + + +def _run_sample_cutile_kernel() -> None: + # Import after env var setup so CUDA_TILE_DUMP_BYTECODE is honored. + TILE_SIZE = 16 + + @ct.kernel + def vector_add_kernel(a, b, result): + block_id = ct.bid(0) + a_tile = ct.load(a, index=(block_id,), shape=(TILE_SIZE,)) + b_tile = ct.load(b, index=(block_id,), shape=(TILE_SIZE,)) + + result_tile = a_tile + b_tile + ct.store(result, index=(block_id,), tile=result_tile) + + a = cupy.arange(128, dtype="float32") + b = cupy.arange(128, dtype="float32") + result = cupy.zeros_like(a) + + grid = (ct.cdiv(a.shape[0], TILE_SIZE), 1, 1) + ct.launch(cupy.cuda.get_current_stream(), grid, vector_add_kernel, (a, b, result)) + + cupy.cuda.get_current_stream().synchronize() + + assert result[-1] == 254 + + +def main(): + # CUDA_TILE_DUMP_BYTECODE=. means dump to current directory + os.environ["CUDA_TILE_DUMP_BYTECODE"] = "." + + try: + _run_sample_cutile_kernel() + except Exception as e: + print(f"Sample kernel execution failed: {e}", file=sys.stderr) + raise + + # Find the .cutile file in current directory + cutile_files = glob.glob("./*.cutile") + if not cutile_files: + print("No .cutile file found in current directory", file=sys.stderr) + sys.exit(1) + + # Use the most recently modified one if multiple exist + cutile_path = max(cutile_files, key=os.path.getmtime) + + # Read the binary content + with open(cutile_path, "rb") as f: + binary_content = f.read() + + # Encode with base64 in ASCII mode + b64_encoded = base64.b64encode(binary_content).decode("ascii") + + # Print with lines less than 79 characters, wrapped with quotes + line_width = 76 # 78 - 2 for the quotes on both sides + for i in range(0, len(b64_encoded), line_width): + chunk = b64_encoded[i : i + line_width] + print(f'"{chunk}"') + + +if __name__ == "__main__": + main() From cda0e2981d00ddf26e2636c91d42f9a145353da6 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Thu, 15 Jan 2026 12:23:06 -0800 Subject: [PATCH 12/20] license date: 2026 --- cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd | 2 +- cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx | 2 +- cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx | 2 +- cuda_bindings/cuda/bindings/cynvfatbin.pxd | 2 +- cuda_bindings/cuda/bindings/cynvfatbin.pyx | 2 +- cuda_bindings/cuda/bindings/nvfatbin.pxd | 2 +- cuda_bindings/cuda/bindings/nvfatbin.pyx | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd index 9ed97e016b..9f3b187560 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx index 6305e49f8b..1e1e8d9a12 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx index 1592b752a9..b0b3f94e5a 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pxd b/cuda_bindings/cuda/bindings/cynvfatbin.pxd index b75f866ae4..5969fafee1 100644 --- a/cuda_bindings/cuda/bindings/cynvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pyx b/cuda_bindings/cuda/bindings/cynvfatbin.pyx index 028440a444..f0f8300cb0 100644 --- a/cuda_bindings/cuda/bindings/cynvfatbin.pyx +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pxd b/cuda_bindings/cuda/bindings/nvfatbin.pxd index d21cca3508..e0744efbd7 100644 --- a/cuda_bindings/cuda/bindings/nvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/nvfatbin.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pyx b/cuda_bindings/cuda/bindings/nvfatbin.pyx index 92b571a26f..0e6c95c893 100644 --- a/cuda_bindings/cuda/bindings/nvfatbin.pyx +++ b/cuda_bindings/cuda/bindings/nvfatbin.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # From 2e53f4689289a19dd86101bafd2f0ae72052c314 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Thu, 15 Jan 2026 13:20:28 -0800 Subject: [PATCH 13/20] cdef const char * --- cuda_bindings/cuda/bindings/nvfatbin.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pyx b/cuda_bindings/cuda/bindings/nvfatbin.pyx index 0e6c95c893..32cdeb300b 100644 --- a/cuda_bindings/cuda/bindings/nvfatbin.pyx +++ b/cuda_bindings/cuda/bindings/nvfatbin.pyx @@ -89,7 +89,7 @@ cpdef str get_error_string(int result): .. seealso:: `nvFatbinGetErrorString` """ - cdef char* _output_ + cdef const char* _output_ cdef bytes _output_bytes_ _output_ = nvFatbinGetErrorString(<_Result>result) From d7dfdb81f53bdbb7116577a3eba90174fee2b09a Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Thu, 15 Jan 2026 13:32:11 -0800 Subject: [PATCH 14/20] add dependency for CI --- .github/actions/fetch_ctk/action.yml | 2 +- cuda_bindings/pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/actions/fetch_ctk/action.yml b/.github/actions/fetch_ctk/action.yml index 559c6d5a8b..001e3a84d8 100644 --- a/.github/actions/fetch_ctk/action.yml +++ b/.github/actions/fetch_ctk/action.yml @@ -14,7 +14,7 @@ inputs: cuda-components: description: "A list of the CTK components to install as a comma-separated list. e.g. 'cuda_nvcc,cuda_nvrtc,cuda_cudart'" required: false - default: "cuda_nvcc,cuda_cudart,cuda_crt,libnvvm,cuda_nvrtc,cuda_profiler_api,cuda_cccl,libnvjitlink,libcufile" + default: "cuda_nvcc,cuda_cudart,cuda_crt,libnvvm,cuda_nvrtc,cuda_profiler_api,cuda_cccl,libnvjitlink,libcufile,libnvfatbin" cuda-path: description: "where the CTK components will be installed to, relative to $PWD" required: false diff --git a/cuda_bindings/pyproject.toml b/cuda_bindings/pyproject.toml index 7c4bddb434..614f7bb63a 100644 --- a/cuda_bindings/pyproject.toml +++ b/cuda_bindings/pyproject.toml @@ -34,7 +34,7 @@ dependencies = ["cuda-pathfinder ~=1.1"] [project.optional-dependencies] all = [ - "cuda-toolkit[nvrtc,nvjitlink,nvvm]==13.*", + "cuda-toolkit[nvrtc,nvjitlink,nvvm,nvfatbin]==13.*", "cuda-toolkit[cufile]==13.*; sys_platform == 'linux'", ] From ff8f6a2f804505ea9c1353d0134d321e71ea9c92 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Thu, 15 Jan 2026 13:48:52 -0800 Subject: [PATCH 15/20] add TODO to locate nvcc file --- cuda_bindings/tests/test_nvfatbin.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cuda_bindings/tests/test_nvfatbin.py b/cuda_bindings/tests/test_nvfatbin.py index 89fc3a1b32..f1a6cbda47 100644 --- a/cuda_bindings/tests/test_nvfatbin.py +++ b/cuda_bindings/tests/test_nvfatbin.py @@ -133,6 +133,7 @@ def OBJECT(arch, tmpdir): with open(tmpdir / "object.cu", "w") as f: f.write(empty_cplusplus_kernel) + # TODO: Use cuda-pathfinder to locate nvcc on system. nvcc = shutil.which("nvcc") if nvcc is None: pytest.skip("nvcc not found on PATH") From 965c53b984d6f7575d1da01f7621966bf54f6938 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Thu, 15 Jan 2026 13:52:40 -0800 Subject: [PATCH 16/20] add documentation and release notes --- cuda_bindings/docs/source/api.rst | 1 + cuda_bindings/docs/source/release/13.1.X-notes.rst | 2 ++ 2 files changed, 3 insertions(+) diff --git a/cuda_bindings/docs/source/api.rst b/cuda_bindings/docs/source/api.rst index 4277bc745b..e6ee4b99dd 100644 --- a/cuda_bindings/docs/source/api.rst +++ b/cuda_bindings/docs/source/api.rst @@ -14,5 +14,6 @@ CUDA Python API Reference module/nvrtc module/nvjitlink module/nvvm + module/nvfatbin module/cufile module/utils diff --git a/cuda_bindings/docs/source/release/13.1.X-notes.rst b/cuda_bindings/docs/source/release/13.1.X-notes.rst index 21323682b1..92a3f8dbcb 100644 --- a/cuda_bindings/docs/source/release/13.1.X-notes.rst +++ b/cuda_bindings/docs/source/release/13.1.X-notes.rst @@ -9,6 +9,8 @@ Highlights ---------- +* Add ``nvFatbin`` bindings. (PR #1467 _) + Experimental ------------ From 2b54279649b02e676492236264b714596fbc8285 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Thu, 15 Jan 2026 15:00:49 -0800 Subject: [PATCH 17/20] skip tileIR test for <13.1 installation --- cuda_bindings/tests/test_nvfatbin.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cuda_bindings/tests/test_nvfatbin.py b/cuda_bindings/tests/test_nvfatbin.py index f1a6cbda47..5a3b00df6b 100644 --- a/cuda_bindings/tests/test_nvfatbin.py +++ b/cuda_bindings/tests/test_nvfatbin.py @@ -68,6 +68,10 @@ ) +def get_version() -> tuple[int, int]: + return nvfatbin.version() + + @pytest.fixture(params=ARCHITECTURES) def arch(request): return request.param @@ -264,6 +268,7 @@ def test_nvfatbin_add_reloc(OBJECT): nvfatbin.destroy(handle) +@pytest.skipIf(get_version() < (13, 1), reason="TileIR API is not supported in CUDA < 13.1") def test_nvfatbin_add_tile_ir(TILEIR): handle = nvfatbin.create([], 0) nvfatbin.add_tile_ir(handle, TILEIR, len(TILEIR), "VectorAdd", "") From 4baeecda2e2808a486bff06f4cfe63413f445d74 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Thu, 15 Jan 2026 15:36:48 -0800 Subject: [PATCH 18/20] add an nvcc smoke test to better determine compiler usability --- cuda_bindings/tests/test_nvfatbin.py | 64 +++++++++++++++++++++++----- 1 file changed, 53 insertions(+), 11 deletions(-) diff --git a/cuda_bindings/tests/test_nvfatbin.py b/cuda_bindings/tests/test_nvfatbin.py index 5a3b00df6b..a4535b8045 100644 --- a/cuda_bindings/tests/test_nvfatbin.py +++ b/cuda_bindings/tests/test_nvfatbin.py @@ -87,6 +87,39 @@ def PTX(arch, ptx_version): return PTX_TEMPLATE.format(PTX_VERSION=ptx_version, ARCH=arch) +@pytest.fixture +def nvcc_smoke(tmpdir) -> str: + # TODO: Use cuda-pathfinder to locate nvcc on system. + nvcc = shutil.which("nvcc") + if nvcc is None: + pytest.skip("nvcc not found on PATH") + + # Smoke test: make sure nvcc is actually usable (toolkit + host compiler are set up), + # not merely present on PATH. + src = tmpdir / "nvcc_smoke.cu" + out = tmpdir / "nvcc_smoke.o" + with open(src, "w") as f: + f.write("") + try: + subprocess.run( # noqa: S603 + [nvcc, "-c", str(src), "-o", str(out)], + check=True, + capture_output=True, + ) + except subprocess.CalledProcessError as e: + stdout = (e.stdout or b"").decode(errors="replace") + stderr = (e.stderr or b"").decode(errors="replace") + pytest.skip( + "nvcc found on PATH but failed to compile a trivial input.\n" + f"command: {[nvcc, '-c', str(src), '-o', str(out)]!r}\n" + f"exit_code: {e.returncode}\n" + f"stdout:\n{stdout}\n" + f"stderr:\n{stderr}\n" + ) + + return nvcc + + @pytest.fixture def CUBIN(arch): def CHECK_NVRTC(err): @@ -132,23 +165,32 @@ def CHECK_NVRTC(err): @pytest.fixture -def OBJECT(arch, tmpdir): - empty_cplusplus_kernel = "__global__ void A() {} int main() { return 0; }" +def OBJECT(arch, tmpdir, nvcc_smoke): + empty_cplusplus_kernel = "__global__ void A() {}" with open(tmpdir / "object.cu", "w") as f: f.write(empty_cplusplus_kernel) - # TODO: Use cuda-pathfinder to locate nvcc on system. - nvcc = shutil.which("nvcc") - if nvcc is None: - pytest.skip("nvcc not found on PATH") + nvcc = nvcc_smoke # This is a test fixture that intentionally invokes a trusted tool (`nvcc`) to # compile a temporary CUDA translation unit. - subprocess.run( # noqa: S603 - [nvcc, "-arch", arch, "-o", str(tmpdir / "object.o"), str(tmpdir / "object.cu")], - check=True, - capture_output=True, - ) + cmd = [nvcc, "-c", "-arch", arch, "-o", str(tmpdir / "object.o"), str(tmpdir / "object.cu")] + try: + subprocess.run( # noqa: S603 + cmd, + check=True, + capture_output=True, + ) + except subprocess.CalledProcessError as e: + stdout = (e.stdout or b"").decode(errors="replace") + stderr = (e.stderr or b"").decode(errors="replace") + raise RuntimeError( + "nvcc smoke test passed, but nvcc failed while compiling the test object.\n" + f"command: {cmd!r}\n" + f"exit_code: {e.returncode}\n" + f"stdout:\n{stdout}\n" + f"stderr:\n{stderr}\n" + ) from e with open(tmpdir / "object.o", "rb") as f: object = f.read() From 4c2f3f372f5f85760b3d6a72050dc4421eaab8da Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Thu, 15 Jan 2026 16:49:24 -0800 Subject: [PATCH 19/20] fix typo --- cuda_bindings/tests/test_nvfatbin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_bindings/tests/test_nvfatbin.py b/cuda_bindings/tests/test_nvfatbin.py index a4535b8045..c3b25db2f2 100644 --- a/cuda_bindings/tests/test_nvfatbin.py +++ b/cuda_bindings/tests/test_nvfatbin.py @@ -310,7 +310,7 @@ def test_nvfatbin_add_reloc(OBJECT): nvfatbin.destroy(handle) -@pytest.skipIf(get_version() < (13, 1), reason="TileIR API is not supported in CUDA < 13.1") +@pytest.mark.skipif(get_version() < (13, 1), reason="TileIR API is not supported in CUDA < 13.1") def test_nvfatbin_add_tile_ir(TILEIR): handle = nvfatbin.create([], 0) nvfatbin.add_tile_ir(handle, TILEIR, len(TILEIR), "VectorAdd", "") From 3e071e27aed4676d397a17198238bc5d14c6e3e4 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Thu, 15 Jan 2026 22:42:16 -0800 Subject: [PATCH 20/20] add nvfatbin.rst --- cuda_bindings/docs/source/module/nvfatbin.rst | 89 +++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 cuda_bindings/docs/source/module/nvfatbin.rst diff --git a/cuda_bindings/docs/source/module/nvfatbin.rst b/cuda_bindings/docs/source/module/nvfatbin.rst new file mode 100644 index 0000000000..297d4baa85 --- /dev/null +++ b/cuda_bindings/docs/source/module/nvfatbin.rst @@ -0,0 +1,89 @@ +.. SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +.. SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +.. default-role:: cpp:any + +nvfatbin +======== + +Note +---- + +The nvfatbin bindings are not supported on nvFatbin installations <12.4. Ensure the installed CUDA toolkit's nvFatbin version is >=12.4. + +The Tile IR API (:func:`cuda.bindings.nvfatbin.add_tile_ir`) is only available in CUDA 13.1+. + +Functions +--------- + +NvFatbin defines the following functions for creating and populating fatbinaries. + +.. autofunction:: cuda.bindings.nvfatbin.create +.. autofunction:: cuda.bindings.nvfatbin.destroy +.. autofunction:: cuda.bindings.nvfatbin.add_ptx +.. autofunction:: cuda.bindings.nvfatbin.add_cubin +.. autofunction:: cuda.bindings.nvfatbin.add_ltoir +.. autofunction:: cuda.bindings.nvfatbin.add_reloc +.. autofunction:: cuda.bindings.nvfatbin.add_tile_ir +.. autofunction:: cuda.bindings.nvfatbin.size +.. autofunction:: cuda.bindings.nvfatbin.get +.. autofunction:: cuda.bindings.nvfatbin.get_error_string +.. autofunction:: cuda.bindings.nvfatbin.version + +Types +--------- +.. autoclass:: cuda.bindings.nvfatbin.Result + + .. autoattribute:: cuda.bindings.nvfatbin.Result.SUCCESS + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_INTERNAL + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_ELF_ARCH_MISMATCH + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_ELF_SIZE_MISMATCH + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_MISSING_PTX_VERSION + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_NULL_POINTER + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_COMPRESSION_FAILED + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_COMPRESSED_SIZE_EXCEEDED + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_UNRECOGNIZED_OPTION + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_INVALID_ARCH + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_INVALID_NVVM + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_EMPTY_INPUT + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_MISSING_PTX_ARCH + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_PTX_ARCH_MISMATCH + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_MISSING_FATBIN + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_INVALID_INDEX + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_IDENTIFIER_REUSE + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_INTERNAL_PTX_OPTION +