Skip to content

Commit 1f1f479

Browse files
authored
refactor: privatize StridedLayout as _StridedLayout (#1373)
* revert: revert `StridedLayout` (#1370) This reverts commit d8e9317. * chore: remove strided layout from docs * chore: privatize stridedlayout * chore: remove unused cimports
1 parent 7b29de3 commit 1f1f479

15 files changed

Lines changed: 3896 additions & 83 deletions

File tree

cuda_core/cuda/core/experimental/_layout.pxd

Lines changed: 693 additions & 0 deletions
Large diffs are not rendered by default.

cuda_core/cuda/core/experimental/_layout.pyx

Lines changed: 1323 additions & 0 deletions
Large diffs are not rendered by default.

cuda_core/cuda/core/experimental/_memory/_buffer.pxd

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,23 @@ from libc.stdint cimport uintptr_t
77
from cuda.core.experimental._stream cimport Stream
88

99

10+
cdef struct _MemAttrs:
11+
int device_id
12+
bint is_device_accessible
13+
bint is_host_accessible
14+
15+
1016
cdef class Buffer:
1117
cdef:
1218
uintptr_t _ptr
1319
size_t _size
1420
MemoryResource _memory_resource
1521
object _ipc_data
22+
object _owner
1623
object _ptr_obj
1724
Stream _alloc_stream
25+
_MemAttrs _mem_attrs
26+
bint _mem_attrs_inited
1827

1928

2029
cdef class MemoryResource:

cuda_core/cuda/core/experimental/_memory/_buffer.pyx

Lines changed: 100 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from __future__ import annotations
66

7+
cimport cython
78
from libc.stdint cimport uintptr_t
89

910
from cuda.bindings cimport cydriver
@@ -24,6 +25,7 @@ else:
2425

2526
from cuda.core.experimental._dlpack import DLDeviceType, make_py_capsule
2627
from cuda.core.experimental._utils.cuda_utils import driver
28+
from cuda.core.experimental._device import Device
2729

2830
__all__ = ['Buffer', 'MemoryResource']
2931

@@ -53,6 +55,8 @@ cdef class Buffer:
5355
self._ipc_data = None
5456
self._ptr_obj = None
5557
self._alloc_stream = None
58+
self._owner = None
59+
self._mem_attrs_inited = False
5660

5761
def __init__(self, *args, **kwargs):
5862
raise RuntimeError("Buffer objects cannot be instantiated directly. "
@@ -61,15 +65,19 @@ cdef class Buffer:
6165
@classmethod
6266
def _init(
6367
cls, ptr: DevicePointerT, size_t size, mr: MemoryResource | None = None,
64-
stream: Stream | None = None, ipc_descriptor: IPCBufferDescriptor | None = None
68+
stream: Stream | None = None, ipc_descriptor: IPCBufferDescriptor | None = None,
69+
owner : object | None = None
6570
):
6671
cdef Buffer self = Buffer.__new__(cls)
6772
self._ptr = <uintptr_t>(int(ptr))
6873
self._ptr_obj = ptr
6974
self._size = size
75+
if mr is not None and owner is not None:
76+
raise ValueError("owner and memory resource cannot be both specified together")
7077
self._memory_resource = mr
7178
self._ipc_data = IPCDataForBuffer(ipc_descriptor, True) if ipc_descriptor is not None else None
7279
self._alloc_stream = <Stream>(stream) if stream is not None else None
80+
self._owner = owner
7381
return self
7482

7583
def __dealloc__(self):
@@ -81,7 +89,8 @@ cdef class Buffer:
8189

8290
@staticmethod
8391
def from_handle(
84-
ptr: DevicePointerT, size_t size, mr: MemoryResource | None = None
92+
ptr: DevicePointerT, size_t size, mr: MemoryResource | None = None,
93+
owner: object | None = None,
8594
) -> Buffer:
8695
"""Create a new :class:`Buffer` object from a pointer.
8796

@@ -93,9 +102,13 @@ cdef class Buffer:
93102
Memory size of the buffer
94103
mr : :obj:`~_memory.MemoryResource`, optional
95104
Memory resource associated with the buffer
105+
owner : object, optional
106+
An object holding external allocation that the ``ptr`` points to.
107+
The reference is kept as long as the buffer is alive.
108+
The ``owner`` and ``mr`` cannot be specified together.
96109
"""
97110
# TODO: It is better to take a stream for latter deallocation
98-
return Buffer._init(ptr, size, mr=mr)
111+
return Buffer._init(ptr, size, mr=mr, owner=owner)
99112

100113
@classmethod
101114
def from_ipc_descriptor(
@@ -330,7 +343,9 @@ cdef class Buffer:
330343
"""Return the device ordinal of this buffer."""
331344
if self._memory_resource is not None:
332345
return self._memory_resource.device_id
333-
raise NotImplementedError("WIP: Currently this property only supports buffers with associated MemoryResource")
346+
else:
347+
Buffer_init_mem_attrs(self)
348+
return self._mem_attrs.device_id
334349

335350
@property
336351
def handle(self) -> DevicePointerT:
@@ -354,14 +369,18 @@ cdef class Buffer:
354369
"""Return True if this buffer can be accessed by the GPU, otherwise False."""
355370
if self._memory_resource is not None:
356371
return self._memory_resource.is_device_accessible
357-
raise NotImplementedError("WIP: Currently this property only supports buffers with associated MemoryResource")
372+
else:
373+
Buffer_init_mem_attrs(self)
374+
return self._mem_attrs.is_device_accessible
358375

359376
@property
360377
def is_host_accessible(self) -> bool:
361378
"""Return True if this buffer can be accessed by the CPU, otherwise False."""
362379
if self._memory_resource is not None:
363380
return self._memory_resource.is_host_accessible
364-
raise NotImplementedError("WIP: Currently this property only supports buffers with associated MemoryResource")
381+
else:
382+
Buffer_init_mem_attrs(self)
383+
return self._mem_attrs.is_host_accessible
365384

366385
@property
367386
def is_mapped(self) -> bool:
@@ -379,20 +398,92 @@ cdef class Buffer:
379398
"""Return the memory size of this buffer."""
380399
return self._size
381400

401+
@property
402+
def owner(self) -> object:
403+
"""Return the object holding external allocation."""
404+
return self._owner
405+
382406

383407
# Buffer Implementation
384408
# ---------------------
385409
cdef inline void Buffer_close(Buffer self, stream):
386410
cdef Stream s
387-
if self._ptr and self._memory_resource is not None:
388-
s = Stream_accept(stream) if stream is not None else self._alloc_stream
389-
self._memory_resource.deallocate(self._ptr, self._size, s)
411+
if self._ptr:
412+
if self._memory_resource is not None:
413+
s = Stream_accept(stream) if stream is not None else self._alloc_stream
414+
self._memory_resource.deallocate(self._ptr, self._size, s)
390415
self._ptr = 0
391416
self._memory_resource = None
417+
self._owner = None
392418
self._ptr_obj = None
393419
self._alloc_stream = None
394420

395421

422+
cdef Buffer_init_mem_attrs(Buffer self):
423+
if not self._mem_attrs_inited:
424+
query_memory_attrs(self._mem_attrs, self._ptr)
425+
self._mem_attrs_inited = True
426+
427+
428+
cdef int query_memory_attrs(_MemAttrs &out, uintptr_t ptr) except -1 nogil:
429+
cdef unsigned int memory_type = 0
430+
cdef int is_managed = 0
431+
cdef int device_id = 0
432+
_query_memory_attrs(memory_type, is_managed, device_id, <cydriver.CUdeviceptr>ptr)
433+
434+
if memory_type == 0:
435+
# unregistered host pointer
436+
out.is_host_accessible = True
437+
out.is_device_accessible = False
438+
out.device_id = -1
439+
# for managed memory, the memory type can be CU_MEMORYTYPE_DEVICE,
440+
# so we need to check it first not to falsely claim it is not
441+
# host accessible.
442+
elif (
443+
is_managed
444+
or memory_type == cydriver.CUmemorytype.CU_MEMORYTYPE_HOST
445+
):
446+
# For pinned memory allocated with cudaMallocHost or paged-locked
447+
# with cudaHostRegister, the memory_type is
448+
# cydriver.CUmemorytype.CU_MEMORYTYPE_HOST.
449+
# TODO(ktokarski): In some cases, the registered memory requires
450+
# using different ptr for device and host, we could check
451+
# cuMemHostGetDevicePointer and
452+
# CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM
453+
# to double check the device accessibility.
454+
out.is_host_accessible = True
455+
out.is_device_accessible = True
456+
out.device_id = device_id
457+
elif memory_type == cydriver.CUmemorytype.CU_MEMORYTYPE_DEVICE:
458+
out.is_host_accessible = False
459+
out.is_device_accessible = True
460+
out.device_id = device_id
461+
else:
462+
raise ValueError(f"Unsupported memory type: {memory_type}")
463+
return 0
464+
465+
466+
cdef inline int _query_memory_attrs(unsigned int& memory_type, int & is_managed, int& device_id, cydriver.CUdeviceptr ptr) except -1 nogil:
467+
cdef cydriver.CUpointer_attribute attrs[3]
468+
cdef uintptr_t vals[3]
469+
attrs[0] = cydriver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_MEMORY_TYPE
470+
attrs[1] = cydriver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_IS_MANAGED
471+
attrs[2] = cydriver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL
472+
vals[0] = <uintptr_t><void*>&memory_type
473+
vals[1] = <uintptr_t><void*>&is_managed
474+
vals[2] = <uintptr_t><void*>&device_id
475+
476+
cdef cydriver.CUresult ret
477+
ret = cydriver.cuPointerGetAttributes(3, attrs, <void**>vals, ptr)
478+
if ret == cydriver.CUresult.CUDA_ERROR_NOT_INITIALIZED:
479+
with cython.gil:
480+
# Device class handles the cuInit call internally
481+
Device()
482+
ret = cydriver.cuPointerGetAttributes(3, attrs, <void**>vals, ptr)
483+
HANDLE_RETURN(ret)
484+
return 0
485+
486+
396487
cdef class MemoryResource:
397488
"""Abstract base class for memory resources that manage allocation and
398489
deallocation of buffers.

0 commit comments

Comments
 (0)