NVIDIA · gmarkall · Jan 22, 2026 · Dec 19, 2025 · Dec 30, 2025 · Dec 30, 2025
diff --git a/numba_cuda/numba/cuda/cudadrv/nvvm.py b/numba_cuda/numba/cuda/cudadrv/nvvm.py
@@ -75,6 +75,9 @@ def is_available():
 class NVVM(object):
     """Process-wide singleton."""
 
+    _libnvvm_cuda_version = None
+    _libnvvm_cuda_version_attempted = False
+
     _PROTOTYPES = {
         # nvvmResult nvvmVersion(int *major, int *minor)
         "nvvmVersion": (nvvm_result, POINTER(c_int), POINTER(c_int)),
@@ -195,6 +198,115 @@ def get_ir_version(self):
         self.check_error(err, "Failed to get IR version.")
         return majorIR.value, minorIR.value, majorDbg.value, minorDbg.value
 
+    def get_cuda_version(self):
+        """
+        Detect the libNVVM CUDA version by compiling dummy IR and analyzing the PTX output.
+
+        Workaround for the lack of direct CUDA version API (nvbugs 5312315).
+        The approach:
+        - Compile a small dummy NVVM IR to PTX
+        - Use PTX version analysis APIs if available to infer CUDA version
+        - Cache the result for future use
+        """
+
+        if self._libnvvm_cuda_version_attempted:
+            return self._libnvvm_cuda_version
+        self._libnvvm_cuda_version_attempted = True
+
+        try:
+            from cuda.bindings.utils import (
+                get_minimal_required_cuda_ver_from_ptx_ver,
+                get_ptx_ver,
+            )
+        except ImportError:
+            return None
+
+        precheck_nvvm_ir = """target triple = "nvptx64-unknown-cuda"
+        target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+
+        define void @dummy_kernel() {{
+        entry:
+        ret void
+        }}
+
+        !nvvm.annotations = !{{!0}}
+        !0 = !{{void ()* @dummy_kernel, !"kernel", i32 1}}
+
+        !nvvmir.version = !{{!1}}
+        !1 = !{{i32 {major}, i32 {minor}, i32 {debug_major}, i32 {debug_minor}}}
+        """
+
+        # Create a test program to compile in order to determine
+        # the CUDA Toolkit version based on the PTX version that
+        # is generated by libnvvm.
+        program = c_void_p()
+        try:
+            # Create the NVVM program
+            err = self.nvvmCreateProgram(byref(program))
+            self.check_error(err, "Failed to create test program.")
+
+            # Add the test program to the compilation unit
+            precheck_nvvm_ir = precheck_nvvm_ir.format(
+                major=self._majorIR,
+                minor=self._minorIR,
+                debug_major=self._majorDbg,
+                debug_minor=self._minorDbg,
+            )
+            precheck_ir_bytes = precheck_nvvm_ir.encode("utf-8")
+            err = self.nvvmAddModuleToProgram(
+                program,
+                precheck_ir_bytes,
+                len(precheck_ir_bytes),
+                "precheck.ll".encode("utf-8"),
+            )
+            self.check_error(err, "Failed to add test module.")
+
+            # Compile the test program
+            options = ["-arch=compute_90"]
+            option_ptrs = (c_char_p * len(options))(
+                *[c_char_p(x.encode("utf-8")) for x in options]
+            )
+            err = self.nvvmVerifyProgram(program, len(options), option_ptrs)
+            self.check_error(err, "Failed to verify test program.")
+            err = self.nvvmCompileProgram(program, len(options), option_ptrs)
+            self.check_error(err, "Failed to compile test program.")
+
+            # Retrieve the PTX from the compiled program
+            ptx_size = c_size_t()
+            err = self.nvvmGetCompiledResultSize(program, byref(ptx_size))
+            self.check_error(
+                err, "Failed to get test program compiled result size."
+            )
+            ptx_data = (c_char * ptx_size.value)()
+            err = self.nvvmGetCompiledResult(program, ptx_data)
+            self.check_error(err, "Failed to get test program compiled result.")
+        except Exception as exception:
+            print(f"Exception compiling test program: {exception}")
-            print(f"Exception compiling test program: {exception}")
+            warnings.warn(
+                f"Exception compiling test program: {exception}",
+                category=NvvmWarning
+            )
-            print(f"Exception compiling test program: {exception}")
+            warnings.warn(
+                f"Exception compiling test program: {exception}",
+                category=NvvmWarning
+            )
+            raise exception
-            raise exception
-        except Exception as exception:
-            print(f"Exception compiling test program: {exception}")
-            raise exception
+        except Exception:
+            pass
-            raise exception
-        except Exception as exception:
-            print(f"Exception compiling test program: {exception}")
-            raise exception
+        except Exception:
+            pass
+        finally:
+            if program.value:
+                # Destroy the NVVM program, not fatal if it fails
+                err = self.nvvmDestroyProgram(byref(program))
+                try:
+                    self.check_error(err, "Failed to destroy test program.")
+                except Exception:
+                    pass
-                try:
-                    self.check_error(err, "Failed to destroy test program.")
-                except Exception:
-                    pass
-                try:
-                    self.check_error(err, "Failed to destroy test program.")
-                except Exception:
-                    pass
+
+        # Extract the PTX version and lookup the corresponding
+        # CUDA Toolkit version. If this fails, the CUDA Toolkit version
+        # cannot be determined and self._libnvvm_cuda_version will remain None
+        # as expected.
+        try:
+            ptx_version = get_ptx_ver(ptx_data[:].decode("utf-8"))
+            self._libnvvm_cuda_version = (
+                get_minimal_required_cuda_ver_from_ptx_ver(ptx_version)
+            )
+        except Exception:
-        except Exception:
+        except ValueError:
-        except Exception:
+        except ValueError:
+            pass
+
+        # Return the CUDA Toolkit version or None if it could not be determined
+        return self._libnvvm_cuda_version
+
     def check_error(self, error, msg, exit=False):
         if error:
             exc = NvvmError(msg, RESULT_CODE_NAMES[error])
@@ -243,6 +355,15 @@ def stringify_option(k, v):
 
             return f"-{k}={v}".encode("utf-8")
 
+        # Starting in r13.1, we must pass in the -numba-debug flag to the
+        # compiler when compiling with a debug build. If the CUDA version
+        # cannot be determined, assume that a newer version is being used and
+        # pass in the -numba-debug flag.
+        if "g" in options:
+            ctk_version = self.driver.get_cuda_version()
+            if ctk_version is None or ctk_version >= (13, 1):
-            if ctk_version is None or ctk_version >= (13, 1):
+            if ctk_version is not None and ctk_version >= (13, 1):
-            if ctk_version is None or ctk_version >= (13, 1):
+            if ctk_version is not None and ctk_version >= (13, 1):
+                options["numba-debug"] = None
+
         options = [stringify_option(k, v) for k, v in options.items()]
         option_ptrs = (c_char_p * len(options))(*[c_char_p(x) for x in options])