Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions cuda_core/cuda/core/experimental/_linker.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ class LinkerOptions:

Attributes
----------
name : str, optional
Name of the linker. If the linking succeeds, the name is passed down to the generated `ObjectCode`.
arch : str, optional
Pass the SM architecture value, such as ``sm_<CC>`` (for generating CUBIN) or
``compute_<CC>`` (for generating PTX). If not provided, the current device's architecture
Expand Down Expand Up @@ -161,6 +163,7 @@ class LinkerOptions:
Default: False.
"""

name: Optional[str] = "<default linker>"
arch: Optional[str] = None
max_register_count: Optional[int] = None
time: Optional[bool] = None
Expand All @@ -184,6 +187,7 @@ class LinkerOptions:

def __post_init__(self):
_lazy_init()
self._name = self.name.encode()
self.formatted_options = []
if _nvjitlink:
self._init_nvjitlink()
Expand Down Expand Up @@ -393,7 +397,7 @@ def _add_code_object(self, object_code: ObjectCode):
data = object_code._module
assert_type(data, bytes)
with _exception_manager(self):
name_str = f"{object_code._handle}_{object_code._code_type}"
name_str = f"{object_code.name}"
if _nvjitlink:
_nvjitlink.add_data(
self._mnff.handle,
Expand Down Expand Up @@ -455,7 +459,7 @@ def link(self, target_type) -> ObjectCode:
addr, size = handle_return(_driver.cuLinkComplete(self._mnff.handle))
code = (ctypes.c_char * size).from_address(addr)

return ObjectCode._init(bytes(code), target_type)
return ObjectCode._init(bytes(code), target_type, name=self._options.name)

def get_error_log(self) -> str:
"""Get the error log generated by the linker.
Expand Down
58 changes: 41 additions & 17 deletions cuda_core/cuda/core/experimental/_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ class ObjectCode:
context.
"""

__slots__ = ("_handle", "_backend_version", "_code_type", "_module", "_loader", "_sym_map")
__slots__ = ("_handle", "_backend_version", "_code_type", "_module", "_loader", "_sym_map", "_name")
_supported_code_type = ("cubin", "ptx", "ltoir", "fatbin", "object", "library")

def __new__(self, *args, **kwargs):
Expand All @@ -459,7 +459,7 @@ def __new__(self, *args, **kwargs):
)

@classmethod
def _init(cls, module, code_type, *, symbol_mapping: Optional[dict] = None):
def _init(cls, module, code_type, *, name: str = "", symbol_mapping: Optional[dict] = None):
self = super().__new__(cls)
assert code_type in self._supported_code_type, f"{code_type=} is not supported"
_lazy_init()
Expand All @@ -473,112 +473,131 @@ def _init(cls, module, code_type, *, symbol_mapping: Optional[dict] = None):
self._code_type = code_type
self._module = module
self._sym_map = {} if symbol_mapping is None else symbol_mapping
self._name = name

return self

@classmethod
def _reduce_helper(self, module, code_type, symbol_mapping):
def _reduce_helper(self, module, code_type, name, symbol_mapping):
# just for forwarding kwargs
return ObjectCode._init(module, code_type, symbol_mapping=symbol_mapping)
return ObjectCode._init(module, code_type, name=name, symbol_mapping=symbol_mapping)

def __reduce__(self):
return ObjectCode._reduce_helper, (self._module, self._code_type, self._sym_map)
return ObjectCode._reduce_helper, (self._module, self._code_type, self._name, self._sym_map)

@staticmethod
def from_cubin(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode":
def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode":
"""Create an :class:`ObjectCode` instance from an existing cubin.

Parameters
----------
module : Union[bytes, str]
Either a bytes object containing the in-memory cubin to load, or
a file path string pointing to the on-disk cubin to load.
name : Optional[str]
A human-readable identifier representing this code object.
symbol_mapping : Optional[dict]
A dictionary specifying how the unmangled symbol names (as keys)
should be mapped to the mangled names before trying to retrieve
them (default to no mappings).
"""
return ObjectCode._init(module, "cubin", symbol_mapping=symbol_mapping)
return ObjectCode._init(module, "cubin", name=name, symbol_mapping=symbol_mapping)

@staticmethod
def from_ptx(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode":
def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode":
"""Create an :class:`ObjectCode` instance from an existing PTX.

Parameters
----------
module : Union[bytes, str]
Either a bytes object containing the in-memory ptx code to load, or
a file path string pointing to the on-disk ptx file to load.
name : Optional[str]
A human-readable identifier representing this code object.
symbol_mapping : Optional[dict]
A dictionary specifying how the unmangled symbol names (as keys)
should be mapped to the mangled names before trying to retrieve
them (default to no mappings).
"""
return ObjectCode._init(module, "ptx", symbol_mapping=symbol_mapping)
return ObjectCode._init(module, "ptx", name=name, symbol_mapping=symbol_mapping)

@staticmethod
def from_ltoir(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode":
def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode":
"""Create an :class:`ObjectCode` instance from an existing LTOIR.

Parameters
----------
module : Union[bytes, str]
Either a bytes object containing the in-memory ltoir code to load, or
a file path string pointing to the on-disk ltoir file to load.
name : Optional[str]
A human-readable identifier representing this code object.
symbol_mapping : Optional[dict]
A dictionary specifying how the unmangled symbol names (as keys)
should be mapped to the mangled names before trying to retrieve
them (default to no mappings).
"""
return ObjectCode._init(module, "ltoir", symbol_mapping=symbol_mapping)
return ObjectCode._init(module, "ltoir", name=name, symbol_mapping=symbol_mapping)

@staticmethod
def from_fatbin(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode":
def from_fatbin(
module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None
) -> "ObjectCode":
"""Create an :class:`ObjectCode` instance from an existing fatbin.

Parameters
----------
module : Union[bytes, str]
Either a bytes object containing the in-memory fatbin to load, or
a file path string pointing to the on-disk fatbin to load.
name : Optional[str]
A human-readable identifier representing this code object.
symbol_mapping : Optional[dict]
A dictionary specifying how the unmangled symbol names (as keys)
should be mapped to the mangled names before trying to retrieve
them (default to no mappings).
"""
return ObjectCode._init(module, "fatbin", symbol_mapping=symbol_mapping)
return ObjectCode._init(module, "fatbin", name=name, symbol_mapping=symbol_mapping)

@staticmethod
def from_object(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode":
def from_object(
module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None
) -> "ObjectCode":
"""Create an :class:`ObjectCode` instance from an existing object code.

Parameters
----------
module : Union[bytes, str]
Either a bytes object containing the in-memory object code to load, or
a file path string pointing to the on-disk object code to load.
name : Optional[str]
A human-readable identifier representing this code object.
symbol_mapping : Optional[dict]
A dictionary specifying how the unmangled symbol names (as keys)
should be mapped to the mangled names before trying to retrieve
them (default to no mappings).
"""
return ObjectCode._init(module, "object", symbol_mapping=symbol_mapping)
return ObjectCode._init(module, "object", name=name, symbol_mapping=symbol_mapping)

@staticmethod
def from_library(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode":
def from_library(
module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None
) -> "ObjectCode":
"""Create an :class:`ObjectCode` instance from an existing library.

Parameters
----------
module : Union[bytes, str]
Either a bytes object containing the in-memory library to load, or
a file path string pointing to the on-disk library to load.
name : Optional[str]
A human-readable identifier representing this code object.
symbol_mapping : Optional[dict]
A dictionary specifying how the unmangled symbol names (as keys)
should be mapped to the mangled names before trying to retrieve
them (default to no mappings).
"""
return ObjectCode._init(module, "library", symbol_mapping=symbol_mapping)
return ObjectCode._init(module, "library", name=name, symbol_mapping=symbol_mapping)

# TODO: do we want to unload in a finalizer? Probably not..

Expand Down Expand Up @@ -632,6 +651,11 @@ def code(self) -> CodeTypeT:
"""Return the underlying code object."""
return self._module

@property
def name(self) -> str:
"""Return a human-readable name of this code object."""
return self._name

@property
@precondition(_lazy_load_module)
def handle(self):
Expand Down
10 changes: 8 additions & 2 deletions cuda_core/cuda/core/experimental/_program.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ class ProgramOptions:

Attributes
----------
name : str, optional
Name of the program. If the compilation succeeds, the name is passed down to the generated `ObjectCode`.
arch : str, optional
Pass the SM architecture value, such as ``sm_<CC>`` (for generating CUBIN) or
``compute_<CC>`` (for generating PTX). If not provided, the current device's architecture
Expand Down Expand Up @@ -180,6 +182,7 @@ class ProgramOptions:
Default: False
"""

name: Optional[str] = "<default program>"
arch: Optional[str] = None
relocatable_device_code: Optional[bool] = None
extensible_whole_program: Optional[bool] = None
Expand Down Expand Up @@ -222,6 +225,8 @@ class ProgramOptions:
minimal: Optional[bool] = None

def __post_init__(self):
self._name = self.name.encode()

self._formatted_options = []
if self.arch is not None:
self._formatted_options.append(f"--gpu-architecture={self.arch}")
Expand Down Expand Up @@ -396,7 +401,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None):
# TODO: support pre-loaded headers & include names
# TODO: allow tuples once NVIDIA/cuda-python#72 is resolved

self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), b"", 0, [], []))
self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), options._name, 0, [], []))
self._backend = "NVRTC"
self._linker = None

Expand All @@ -413,6 +418,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None):

def _translate_program_options(self, options: ProgramOptions) -> LinkerOptions:
return LinkerOptions(
name=options.name,
arch=options.arch,
max_register_count=options.max_register_count,
time=options.time,
Expand Down Expand Up @@ -505,7 +511,7 @@ def compile(self, target_type, name_expressions=(), logs=None):
handle_return(nvrtc.nvrtcGetProgramLog(self._mnff.handle, log), handle=self._mnff.handle)
logs.write(log.decode("utf-8", errors="backslashreplace"))

return ObjectCode._init(data, target_type, symbol_mapping=symbol_mapping)
return ObjectCode._init(data, target_type, symbol_mapping=symbol_mapping, name=self._options.name)

supported_backends = ("nvJitLink", "driver")
if self._backend not in supported_backends:
Expand Down
2 changes: 2 additions & 0 deletions cuda_core/docs/source/release/0.3.0-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ New features

- :class:`Kernel` adds :property:`Kernel.num_arguments` and :property:`Kernel.arguments_info` for introspection of kernel arguments. (#612)
- Add pythonic access to kernel occupancy calculation functions via :property:`Kernel.occupancy`. (#648)
- A name can be assigned to :class:`ObjectCode` instances generated by both :class:`Program` and :class:`Linker` through their respective
options.

New examples
------------
Expand Down
13 changes: 10 additions & 3 deletions cuda_core/tests/test_linker.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ def test_linker_link_ptx_nvjitlink(compile_ltoir_functions):
linker = Linker(*compile_ltoir_functions, options=options)
linked_code = linker.link("ptx")
assert isinstance(linked_code, ObjectCode)
assert linked_code.name == options.name


@pytest.mark.skipif(not is_culink_backend, reason="nvjitlink requires lto for ptx linking")
Expand All @@ -117,13 +118,15 @@ def test_linker_link_ptx_culink(compile_ptx_functions):
linker = Linker(*compile_ptx_functions, options=options)
linked_code = linker.link("ptx")
assert isinstance(linked_code, ObjectCode)
assert linked_code.name == options.name


def test_linker_link_cubin(compile_ptx_functions):
options = LinkerOptions(arch=ARCH)
linker = Linker(*compile_ptx_functions, options=options)
linked_code = linker.link("cubin")
assert isinstance(linked_code, ObjectCode)
assert linked_code.name == options.name


def test_linker_link_ptx_multiple(compile_ptx_functions):
Expand All @@ -132,6 +135,7 @@ def test_linker_link_ptx_multiple(compile_ptx_functions):
linker = Linker(*ptxes, options=options)
linked_code = linker.link("cubin")
assert isinstance(linked_code, ObjectCode)
assert linked_code.name == options.name


def test_linker_link_invalid_target_type(compile_ptx_functions):
Expand All @@ -144,14 +148,16 @@ def test_linker_link_invalid_target_type(compile_ptx_functions):
# this test causes an API error when using the culink API
@skipif_testing_with_compute_sanitizer
def test_linker_get_error_log(compile_ptx_functions):
options = LinkerOptions(arch=ARCH)
options = LinkerOptions(name="ABC", arch=ARCH)

replacement_kernel = """
extern __device__ int Z();
extern __device__ int C(int a, int b);
__global__ void A() { int result = C(Z(), 1);}
"""
dummy_program = Program(replacement_kernel, "c++", ProgramOptions(relocatable_device_code=True)).compile("ptx")
dummy_program = Program(
replacement_kernel, "c++", ProgramOptions(name="CBA", relocatable_device_code=True)
).compile("ptx")
linker = Linker(dummy_program, *(compile_ptx_functions[1:]), options=options)
try:
linker.link("cubin")
Expand All @@ -160,8 +166,9 @@ def test_linker_get_error_log(compile_ptx_functions):
log = linker.get_error_log()
assert isinstance(log, str)
# TODO when 4902246 is addressed, we can update this to cover nvjitlink as well
# The error is coming from the input object that's being linked (CBA), not the output object (ABC).
if is_culink_backend:
assert log.rstrip("\x00") == "error : Undefined reference to '_Z1Zv' in 'None_ptx'"
assert log.rstrip("\x00") == "error : Undefined reference to '_Z1Zv' in 'CBA'"


def test_linker_get_info_log(compile_ptx_functions):
Expand Down
15 changes: 9 additions & 6 deletions cuda_core/tests/test_program.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def ptx_code_object():
@pytest.mark.parametrize(
"options",
[
ProgramOptions(name="abc"),
ProgramOptions(device_code_optimize=True, debug=True),
ProgramOptions(relocatable_device_code=True, max_register_count=32),
ProgramOptions(ftz=True, prec_sqrt=False, prec_div=False),
Expand Down Expand Up @@ -105,21 +106,23 @@ def test_program_init_invalid_code_format():
# This is tested against the current device's arch
def test_program_compile_valid_target_type(init_cuda):
code = 'extern "C" __global__ void my_kernel() {}'
program = Program(code, "c++")
program = Program(code, "c++", options={"name": "42"})

with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
ptx_object_code = program.compile("ptx")
assert isinstance(ptx_object_code, ObjectCode)
assert ptx_object_code.name == "42"
if any("The CUDA driver version is older than the backend version" in str(warning.message) for warning in w):
pytest.skip("PTX version too new for current driver")
ptx_kernel = ptx_object_code.get_kernel("my_kernel")
assert isinstance(ptx_kernel, Kernel)

program = Program(ptx_object_code._module.decode(), "ptx")
program = Program(ptx_object_code._module.decode(), "ptx", options={"name": "24"})
cubin_object_code = program.compile("cubin")
ptx_kernel = ptx_object_code.get_kernel("my_kernel")
cubin_kernel = cubin_object_code.get_kernel("my_kernel")
assert isinstance(ptx_object_code, ObjectCode)
assert isinstance(cubin_object_code, ObjectCode)
assert isinstance(ptx_kernel, Kernel)
assert cubin_object_code.name == "24"
cubin_kernel = cubin_object_code.get_kernel("my_kernel")
assert isinstance(cubin_kernel, Kernel)


Expand Down
Loading