44
55from __future__ import annotations
66
7+ cimport cython
78from libc.stdint cimport uintptr_t
89
910from cuda.bindings cimport cydriver
2425
2526from cuda.core.experimental._dlpack import DLDeviceType, make_py_capsule
2627from cuda.core.experimental._utils.cuda_utils import driver
28+ from cuda.core.experimental._device import Device
2729
2830__all__ = [' Buffer' , ' MemoryResource' ]
2931
@@ -53,6 +55,8 @@ cdef class Buffer:
5355 self ._ipc_data = None
5456 self ._ptr_obj = None
5557 self ._alloc_stream = None
58+ self ._owner = None
59+ self ._mem_attrs_inited = False
5660
5761 def __init__ (self , *args , **kwargs ):
5862 raise RuntimeError (" Buffer objects cannot be instantiated directly. "
@@ -61,15 +65,19 @@ cdef class Buffer:
6165 @classmethod
6266 def _init (
6367 cls , ptr: DevicePointerT , size_t size , mr: MemoryResource | None = None ,
64- stream: Stream | None = None , ipc_descriptor: IPCBufferDescriptor | None = None
68+ stream: Stream | None = None , ipc_descriptor: IPCBufferDescriptor | None = None ,
69+ owner : object | None = None
6570 ):
6671 cdef Buffer self = Buffer.__new__ (cls )
6772 self ._ptr = < uintptr_t> (int (ptr))
6873 self ._ptr_obj = ptr
6974 self ._size = size
75+ if mr is not None and owner is not None :
76+ raise ValueError (" owner and memory resource cannot be both specified together" )
7077 self ._memory_resource = mr
7178 self ._ipc_data = IPCDataForBuffer(ipc_descriptor, True ) if ipc_descriptor is not None else None
7279 self ._alloc_stream = < Stream> (stream) if stream is not None else None
80+ self ._owner = owner
7381 return self
7482
7583 def __dealloc__ (self ):
@@ -81,7 +89,8 @@ cdef class Buffer:
8189
8290 @staticmethod
8391 def from_handle (
84- ptr: DevicePointerT , size_t size , mr: MemoryResource | None = None
92+ ptr: DevicePointerT , size_t size , mr: MemoryResource | None = None ,
93+ owner: object | None = None ,
8594 ) -> Buffer:
8695 """Create a new :class:`Buffer` object from a pointer.
8796
@@ -93,9 +102,13 @@ cdef class Buffer:
93102 Memory size of the buffer
94103 mr : :obj:`~_memory.MemoryResource`, optional
95104 Memory resource associated with the buffer
105+ owner : object , optional
106+ An object holding external allocation that the ``ptr`` points to.
107+ The reference is kept as long as the buffer is alive.
108+ The ``owner`` and ``mr`` cannot be specified together.
96109 """
97110 # TODO: It is better to take a stream for latter deallocation
98- return Buffer._init(ptr , size , mr = mr)
111+ return Buffer._init(ptr , size , mr = mr, owner = owner )
99112
100113 @classmethod
101114 def from_ipc_descriptor(
@@ -330,7 +343,9 @@ cdef class Buffer:
330343 """Return the device ordinal of this buffer."""
331344 if self._memory_resource is not None:
332345 return self._memory_resource.device_id
333- raise NotImplementedError("WIP: Currently this property only supports buffers with associated MemoryResource")
346+ else:
347+ Buffer_init_mem_attrs(self )
348+ return self._mem_attrs.device_id
334349
335350 @property
336351 def handle(self ) -> DevicePointerT:
@@ -354,14 +369,18 @@ cdef class Buffer:
354369 """Return True if this buffer can be accessed by the GPU , otherwise False."""
355370 if self._memory_resource is not None:
356371 return self._memory_resource.is_device_accessible
357- raise NotImplementedError("WIP: Currently this property only supports buffers with associated MemoryResource")
372+ else:
373+ Buffer_init_mem_attrs(self )
374+ return self._mem_attrs.is_device_accessible
358375
359376 @property
360377 def is_host_accessible(self ) -> bool:
361378 """Return True if this buffer can be accessed by the CPU , otherwise False."""
362379 if self._memory_resource is not None:
363380 return self._memory_resource.is_host_accessible
364- raise NotImplementedError("WIP: Currently this property only supports buffers with associated MemoryResource")
381+ else:
382+ Buffer_init_mem_attrs(self )
383+ return self._mem_attrs.is_host_accessible
365384
366385 @property
367386 def is_mapped(self ) -> bool:
@@ -379,20 +398,92 @@ cdef class Buffer:
379398 """Return the memory size of this buffer."""
380399 return self._size
381400
401+ @property
402+ def owner(self ) -> object:
403+ """Return the object holding external allocation."""
404+ return self._owner
405+
382406
383407# Buffer Implementation
384408# ---------------------
385409cdef inline void Buffer_close(Buffer self , stream ):
386410 cdef Stream s
387- if self ._ptr and self ._memory_resource is not None :
388- s = Stream_accept(stream) if stream is not None else self ._alloc_stream
389- self ._memory_resource.deallocate(self ._ptr, self ._size, s)
411+ if self ._ptr:
412+ if self ._memory_resource is not None :
413+ s = Stream_accept(stream) if stream is not None else self ._alloc_stream
414+ self ._memory_resource.deallocate(self ._ptr, self ._size, s)
390415 self ._ptr = 0
391416 self ._memory_resource = None
417+ self ._owner = None
392418 self ._ptr_obj = None
393419 self ._alloc_stream = None
394420
395421
422+ cdef Buffer_init_mem_attrs(Buffer self ):
423+ if not self ._mem_attrs_inited:
424+ query_memory_attrs(self ._mem_attrs, self ._ptr)
425+ self ._mem_attrs_inited = True
426+
427+
428+ cdef int query_memory_attrs(_MemAttrs & out, uintptr_t ptr) except - 1 nogil:
429+ cdef unsigned int memory_type = 0
430+ cdef int is_managed = 0
431+ cdef int device_id = 0
432+ _query_memory_attrs(memory_type, is_managed, device_id, < cydriver.CUdeviceptr> ptr)
433+
434+ if memory_type == 0 :
435+ # unregistered host pointer
436+ out.is_host_accessible = True
437+ out.is_device_accessible = False
438+ out.device_id = - 1
439+ # for managed memory, the memory type can be CU_MEMORYTYPE_DEVICE,
440+ # so we need to check it first not to falsely claim it is not
441+ # host accessible.
442+ elif (
443+ is_managed
444+ or memory_type == cydriver.CUmemorytype.CU_MEMORYTYPE_HOST
445+ ):
446+ # For pinned memory allocated with cudaMallocHost or paged-locked
447+ # with cudaHostRegister, the memory_type is
448+ # cydriver.CUmemorytype.CU_MEMORYTYPE_HOST.
449+ # TODO(ktokarski): In some cases, the registered memory requires
450+ # using different ptr for device and host, we could check
451+ # cuMemHostGetDevicePointer and
452+ # CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM
453+ # to double check the device accessibility.
454+ out.is_host_accessible = True
455+ out.is_device_accessible = True
456+ out.device_id = device_id
457+ elif memory_type == cydriver.CUmemorytype.CU_MEMORYTYPE_DEVICE:
458+ out.is_host_accessible = False
459+ out.is_device_accessible = True
460+ out.device_id = device_id
461+ else :
462+ raise ValueError (f" Unsupported memory type: {memory_type}" )
463+ return 0
464+
465+
466+ cdef inline int _query_memory_attrs(unsigned int & memory_type, int & is_managed, int & device_id, cydriver.CUdeviceptr ptr) except - 1 nogil:
467+ cdef cydriver.CUpointer_attribute attrs[3 ]
468+ cdef uintptr_t vals[3 ]
469+ attrs[0 ] = cydriver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_MEMORY_TYPE
470+ attrs[1 ] = cydriver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_IS_MANAGED
471+ attrs[2 ] = cydriver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL
472+ vals[0 ] = < uintptr_t>< void * > & memory_type
473+ vals[1 ] = < uintptr_t>< void * > & is_managed
474+ vals[2 ] = < uintptr_t>< void * > & device_id
475+
476+ cdef cydriver.CUresult ret
477+ ret = cydriver.cuPointerGetAttributes(3 , attrs, < void ** > vals, ptr)
478+ if ret == cydriver.CUresult.CUDA_ERROR_NOT_INITIALIZED:
479+ with cython.gil:
480+ # Device class handles the cuInit call internally
481+ Device()
482+ ret = cydriver.cuPointerGetAttributes(3 , attrs, < void ** > vals, ptr)
483+ HANDLE_RETURN(ret)
484+ return 0
485+
486+
396487cdef class MemoryResource:
397488 """ Abstract base class for memory resources that manage allocation and
398489 deallocation of buffers.
0 commit comments