diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py index 915f38f1a2..227f7bec5c 100644 --- a/cuda_core/cuda/core/experimental/_linker.py +++ b/cuda_core/cuda/core/experimental/_linker.py @@ -100,6 +100,8 @@ class LinkerOptions: Attributes ---------- + name : str, optional + Name of the linker. If the linking succeeds, the name is passed down to the generated `ObjectCode`. arch : str, optional Pass the SM architecture value, such as ``sm_`` (for generating CUBIN) or ``compute_`` (for generating PTX). If not provided, the current device's architecture @@ -161,6 +163,7 @@ class LinkerOptions: Default: False. """ + name: Optional[str] = "" arch: Optional[str] = None max_register_count: Optional[int] = None time: Optional[bool] = None @@ -184,6 +187,7 @@ class LinkerOptions: def __post_init__(self): _lazy_init() + self._name = self.name.encode() self.formatted_options = [] if _nvjitlink: self._init_nvjitlink() @@ -393,7 +397,7 @@ def _add_code_object(self, object_code: ObjectCode): data = object_code._module assert_type(data, bytes) with _exception_manager(self): - name_str = f"{object_code._handle}_{object_code._code_type}" + name_str = f"{object_code.name}" if _nvjitlink: _nvjitlink.add_data( self._mnff.handle, @@ -455,7 +459,7 @@ def link(self, target_type) -> ObjectCode: addr, size = handle_return(_driver.cuLinkComplete(self._mnff.handle)) code = (ctypes.c_char * size).from_address(addr) - return ObjectCode._init(bytes(code), target_type) + return ObjectCode._init(bytes(code), target_type, name=self._options.name) def get_error_log(self) -> str: """Get the error log generated by the linker. diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index 9c80c687b7..8bfc74e4a8 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -449,7 +449,7 @@ class ObjectCode: context. """ - __slots__ = ("_handle", "_backend_version", "_code_type", "_module", "_loader", "_sym_map") + __slots__ = ("_handle", "_backend_version", "_code_type", "_module", "_loader", "_sym_map", "_name") _supported_code_type = ("cubin", "ptx", "ltoir", "fatbin", "object", "library") def __new__(self, *args, **kwargs): @@ -459,7 +459,7 @@ def __new__(self, *args, **kwargs): ) @classmethod - def _init(cls, module, code_type, *, symbol_mapping: Optional[dict] = None): + def _init(cls, module, code_type, *, name: str = "", symbol_mapping: Optional[dict] = None): self = super().__new__(cls) assert code_type in self._supported_code_type, f"{code_type=} is not supported" _lazy_init() @@ -473,19 +473,20 @@ def _init(cls, module, code_type, *, symbol_mapping: Optional[dict] = None): self._code_type = code_type self._module = module self._sym_map = {} if symbol_mapping is None else symbol_mapping + self._name = name return self @classmethod - def _reduce_helper(self, module, code_type, symbol_mapping): + def _reduce_helper(self, module, code_type, name, symbol_mapping): # just for forwarding kwargs - return ObjectCode._init(module, code_type, symbol_mapping=symbol_mapping) + return ObjectCode._init(module, code_type, name=name, symbol_mapping=symbol_mapping) def __reduce__(self): - return ObjectCode._reduce_helper, (self._module, self._code_type, self._sym_map) + return ObjectCode._reduce_helper, (self._module, self._code_type, self._name, self._sym_map) @staticmethod - def from_cubin(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode": + def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing cubin. Parameters @@ -493,15 +494,17 @@ def from_cubin(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = No module : Union[bytes, str] Either a bytes object containing the in-memory cubin to load, or a file path string pointing to the on-disk cubin to load. + name : Optional[str] + A human-readable identifier representing this code object. symbol_mapping : Optional[dict] A dictionary specifying how the unmangled symbol names (as keys) should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - return ObjectCode._init(module, "cubin", symbol_mapping=symbol_mapping) + return ObjectCode._init(module, "cubin", name=name, symbol_mapping=symbol_mapping) @staticmethod - def from_ptx(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode": + def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing PTX. Parameters @@ -509,15 +512,17 @@ def from_ptx(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None module : Union[bytes, str] Either a bytes object containing the in-memory ptx code to load, or a file path string pointing to the on-disk ptx file to load. + name : Optional[str] + A human-readable identifier representing this code object. symbol_mapping : Optional[dict] A dictionary specifying how the unmangled symbol names (as keys) should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - return ObjectCode._init(module, "ptx", symbol_mapping=symbol_mapping) + return ObjectCode._init(module, "ptx", name=name, symbol_mapping=symbol_mapping) @staticmethod - def from_ltoir(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode": + def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing LTOIR. Parameters @@ -525,15 +530,19 @@ def from_ltoir(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = No module : Union[bytes, str] Either a bytes object containing the in-memory ltoir code to load, or a file path string pointing to the on-disk ltoir file to load. + name : Optional[str] + A human-readable identifier representing this code object. symbol_mapping : Optional[dict] A dictionary specifying how the unmangled symbol names (as keys) should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - return ObjectCode._init(module, "ltoir", symbol_mapping=symbol_mapping) + return ObjectCode._init(module, "ltoir", name=name, symbol_mapping=symbol_mapping) @staticmethod - def from_fatbin(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode": + def from_fatbin( + module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None + ) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing fatbin. Parameters @@ -541,15 +550,19 @@ def from_fatbin(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = N module : Union[bytes, str] Either a bytes object containing the in-memory fatbin to load, or a file path string pointing to the on-disk fatbin to load. + name : Optional[str] + A human-readable identifier representing this code object. symbol_mapping : Optional[dict] A dictionary specifying how the unmangled symbol names (as keys) should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - return ObjectCode._init(module, "fatbin", symbol_mapping=symbol_mapping) + return ObjectCode._init(module, "fatbin", name=name, symbol_mapping=symbol_mapping) @staticmethod - def from_object(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode": + def from_object( + module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None + ) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing object code. Parameters @@ -557,15 +570,19 @@ def from_object(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = N module : Union[bytes, str] Either a bytes object containing the in-memory object code to load, or a file path string pointing to the on-disk object code to load. + name : Optional[str] + A human-readable identifier representing this code object. symbol_mapping : Optional[dict] A dictionary specifying how the unmangled symbol names (as keys) should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - return ObjectCode._init(module, "object", symbol_mapping=symbol_mapping) + return ObjectCode._init(module, "object", name=name, symbol_mapping=symbol_mapping) @staticmethod - def from_library(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode": + def from_library( + module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None + ) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing library. Parameters @@ -573,12 +590,14 @@ def from_library(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = module : Union[bytes, str] Either a bytes object containing the in-memory library to load, or a file path string pointing to the on-disk library to load. + name : Optional[str] + A human-readable identifier representing this code object. symbol_mapping : Optional[dict] A dictionary specifying how the unmangled symbol names (as keys) should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - return ObjectCode._init(module, "library", symbol_mapping=symbol_mapping) + return ObjectCode._init(module, "library", name=name, symbol_mapping=symbol_mapping) # TODO: do we want to unload in a finalizer? Probably not.. @@ -632,6 +651,11 @@ def code(self) -> CodeTypeT: """Return the underlying code object.""" return self._module + @property + def name(self) -> str: + """Return a human-readable name of this code object.""" + return self._name + @property @precondition(_lazy_load_module) def handle(self): diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index dd86bd250d..e82b33a981 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -57,6 +57,8 @@ class ProgramOptions: Attributes ---------- + name : str, optional + Name of the program. If the compilation succeeds, the name is passed down to the generated `ObjectCode`. arch : str, optional Pass the SM architecture value, such as ``sm_`` (for generating CUBIN) or ``compute_`` (for generating PTX). If not provided, the current device's architecture @@ -180,6 +182,7 @@ class ProgramOptions: Default: False """ + name: Optional[str] = "" arch: Optional[str] = None relocatable_device_code: Optional[bool] = None extensible_whole_program: Optional[bool] = None @@ -222,6 +225,8 @@ class ProgramOptions: minimal: Optional[bool] = None def __post_init__(self): + self._name = self.name.encode() + self._formatted_options = [] if self.arch is not None: self._formatted_options.append(f"--gpu-architecture={self.arch}") @@ -396,7 +401,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None): # TODO: support pre-loaded headers & include names # TODO: allow tuples once NVIDIA/cuda-python#72 is resolved - self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), b"", 0, [], [])) + self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), options._name, 0, [], [])) self._backend = "NVRTC" self._linker = None @@ -413,6 +418,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None): def _translate_program_options(self, options: ProgramOptions) -> LinkerOptions: return LinkerOptions( + name=options.name, arch=options.arch, max_register_count=options.max_register_count, time=options.time, @@ -505,7 +511,7 @@ def compile(self, target_type, name_expressions=(), logs=None): handle_return(nvrtc.nvrtcGetProgramLog(self._mnff.handle, log), handle=self._mnff.handle) logs.write(log.decode("utf-8", errors="backslashreplace")) - return ObjectCode._init(data, target_type, symbol_mapping=symbol_mapping) + return ObjectCode._init(data, target_type, symbol_mapping=symbol_mapping, name=self._options.name) supported_backends = ("nvJitLink", "driver") if self._backend not in supported_backends: diff --git a/cuda_core/docs/source/release/0.3.0-notes.rst b/cuda_core/docs/source/release/0.3.0-notes.rst index 2d32a38906..0f8cc77aed 100644 --- a/cuda_core/docs/source/release/0.3.0-notes.rst +++ b/cuda_core/docs/source/release/0.3.0-notes.rst @@ -20,9 +20,12 @@ Breaking Changes New features ------------ -- :class:`Kernel` adds :property:`Kernel.num_arguments` and :property:`Kernel.arguments_info` for introspection of kernel arguments. (#612) -- Add pythonic access to kernel occupancy calculation functions via :property:`Kernel.occupancy`. (#648) +- :class:`Kernel` adds :attr:`Kernel.num_arguments` and :attr:`Kernel.arguments_info` for introspection of kernel arguments. (#612) +- Add pythonic access to kernel occupancy calculation functions via :attr:`Kernel.occupancy`. (#648) - Support launching cooperative kernels by setting :property:`LaunchConfig.cooperative_launch` to `True`. +- A name can be assigned to :class:`ObjectCode` instances generated by both :class:`Program` and :class:`Linker` through their respective + options. + New examples ------------ diff --git a/cuda_core/tests/test_linker.py b/cuda_core/tests/test_linker.py index f15e98a422..9b96d68473 100644 --- a/cuda_core/tests/test_linker.py +++ b/cuda_core/tests/test_linker.py @@ -109,6 +109,7 @@ def test_linker_link_ptx_nvjitlink(compile_ltoir_functions): linker = Linker(*compile_ltoir_functions, options=options) linked_code = linker.link("ptx") assert isinstance(linked_code, ObjectCode) + assert linked_code.name == options.name @pytest.mark.skipif(not is_culink_backend, reason="nvjitlink requires lto for ptx linking") @@ -117,6 +118,7 @@ def test_linker_link_ptx_culink(compile_ptx_functions): linker = Linker(*compile_ptx_functions, options=options) linked_code = linker.link("ptx") assert isinstance(linked_code, ObjectCode) + assert linked_code.name == options.name def test_linker_link_cubin(compile_ptx_functions): @@ -124,6 +126,7 @@ def test_linker_link_cubin(compile_ptx_functions): linker = Linker(*compile_ptx_functions, options=options) linked_code = linker.link("cubin") assert isinstance(linked_code, ObjectCode) + assert linked_code.name == options.name def test_linker_link_ptx_multiple(compile_ptx_functions): @@ -132,6 +135,7 @@ def test_linker_link_ptx_multiple(compile_ptx_functions): linker = Linker(*ptxes, options=options) linked_code = linker.link("cubin") assert isinstance(linked_code, ObjectCode) + assert linked_code.name == options.name def test_linker_link_invalid_target_type(compile_ptx_functions): @@ -144,14 +148,16 @@ def test_linker_link_invalid_target_type(compile_ptx_functions): # this test causes an API error when using the culink API @skipif_testing_with_compute_sanitizer def test_linker_get_error_log(compile_ptx_functions): - options = LinkerOptions(arch=ARCH) + options = LinkerOptions(name="ABC", arch=ARCH) replacement_kernel = """ extern __device__ int Z(); extern __device__ int C(int a, int b); __global__ void A() { int result = C(Z(), 1);} """ - dummy_program = Program(replacement_kernel, "c++", ProgramOptions(relocatable_device_code=True)).compile("ptx") + dummy_program = Program( + replacement_kernel, "c++", ProgramOptions(name="CBA", relocatable_device_code=True) + ).compile("ptx") linker = Linker(dummy_program, *(compile_ptx_functions[1:]), options=options) try: linker.link("cubin") @@ -160,8 +166,9 @@ def test_linker_get_error_log(compile_ptx_functions): log = linker.get_error_log() assert isinstance(log, str) # TODO when 4902246 is addressed, we can update this to cover nvjitlink as well + # The error is coming from the input object that's being linked (CBA), not the output object (ABC). if is_culink_backend: - assert log.rstrip("\x00") == "error : Undefined reference to '_Z1Zv' in 'None_ptx'" + assert log.rstrip("\x00") == "error : Undefined reference to '_Z1Zv' in 'CBA'" def test_linker_get_info_log(compile_ptx_functions): diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 56cddb1355..7d818657e8 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -24,6 +24,7 @@ def ptx_code_object(): @pytest.mark.parametrize( "options", [ + ProgramOptions(name="abc"), ProgramOptions(device_code_optimize=True, debug=True), ProgramOptions(relocatable_device_code=True, max_register_count=32), ProgramOptions(ftz=True, prec_sqrt=False, prec_div=False), @@ -105,21 +106,23 @@ def test_program_init_invalid_code_format(): # This is tested against the current device's arch def test_program_compile_valid_target_type(init_cuda): code = 'extern "C" __global__ void my_kernel() {}' - program = Program(code, "c++") + program = Program(code, "c++", options={"name": "42"}) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") ptx_object_code = program.compile("ptx") + assert isinstance(ptx_object_code, ObjectCode) + assert ptx_object_code.name == "42" if any("The CUDA driver version is older than the backend version" in str(warning.message) for warning in w): pytest.skip("PTX version too new for current driver") + ptx_kernel = ptx_object_code.get_kernel("my_kernel") + assert isinstance(ptx_kernel, Kernel) - program = Program(ptx_object_code._module.decode(), "ptx") + program = Program(ptx_object_code._module.decode(), "ptx", options={"name": "24"}) cubin_object_code = program.compile("cubin") - ptx_kernel = ptx_object_code.get_kernel("my_kernel") - cubin_kernel = cubin_object_code.get_kernel("my_kernel") - assert isinstance(ptx_object_code, ObjectCode) assert isinstance(cubin_object_code, ObjectCode) - assert isinstance(ptx_kernel, Kernel) + assert cubin_object_code.name == "24" + cubin_kernel = cubin_object_code.get_kernel("my_kernel") assert isinstance(cubin_kernel, Kernel)