diff --git a/cuda_core/cuda/core/experimental/_event.py b/cuda_core/cuda/core/experimental/_event.py index bd97305a60..10d52dda64 100644 --- a/cuda_core/cuda/core/experimental/_event.py +++ b/cuda_core/cuda/core/experimental/_event.py @@ -47,8 +47,21 @@ class Event: the last recorded stream. Events can be used to monitor device's progress, query completion - of work up to event's record, and help establish dependencies - between GPU work submissions. + of work up to event's record, help establish dependencies + between GPU work submissions, and record the elapsed time (in milliseconds) + on GPU: + + .. code-block:: python + + # To create events and record the timing: + s = Device().create_stream() + e1 = Device().create_event({"enable_timing": True}) + e2 = Device().create_event({"enable_timing": True}) + s.record(e1) + # ... run some GPU works ... + s.record(e2) + e2.sync() + print(f"time = {e2 - e1} milliseconds") Directly creating an :obj:`~_event.Event` is not supported due to ambiguity, and they should instead be created through a :obj:`~_stream.Stream` object. @@ -96,6 +109,22 @@ def close(self): """Destroy the event.""" self._mnff.close() + def __isub__(self, other): + return NotImplemented + + def __rsub__(self, other): + return NotImplemented + + def __sub__(self, other): + # return self - other (in milliseconds) + try: + timing = handle_return(driver.cuEventElapsedTime(other.handle, self.handle)) + except CUDAError as e: + raise RuntimeError( + "Timing capability must be enabled in order to subtract two Events; timing is disabled by default." + ) from e + return timing + @property def is_timing_disabled(self) -> bool: """Return True if the event does not record timing data, otherwise False.""" diff --git a/cuda_core/docs/source/release/0.2.0-notes.rst b/cuda_core/docs/source/release/0.2.0-notes.rst index 9719377a35..02f586d58f 100644 --- a/cuda_core/docs/source/release/0.2.0-notes.rst +++ b/cuda_core/docs/source/release/0.2.0-notes.rst @@ -27,7 +27,8 @@ New features - Expose :class:`ObjectCode` as a public API, which allows loading cubins from memory or disk. For loading other kinds of code types, please continue using :class:`Program`. - A C++ helper function ``get_cuda_native_handle()`` is provided in the new ``include/utility.cuh`` header to retrive the underlying CUDA C objects (ex: ``CUstream``) from a Python object returned by the ``.handle`` attribute (ex: :attr:`Stream.handle`). - For objects such as :class:`Program` and :class:`Linker` that could dispatch to different backends, a new ``.backend`` attribute is provided to query this information. -- An :class:`~_event.Event` may now be created without recording it to a :class:`Stream` using the :meth:`Device.create_event`` method. +- Support CUDA event timing. +- An :class:`~_event.Event` may now be created without recording it to a :class:`~_stream.Stream` using the :meth:`Device.create_event` method. Limitations ----------- diff --git a/cuda_core/tests/test_event.py b/cuda_core/tests/test_event.py index 3cae72bc30..384cf45867 100644 --- a/cuda_core/tests/test_event.py +++ b/cuda_core/tests/test_event.py @@ -6,6 +6,8 @@ # this software and related documentation outside the terms of the EULA # is strictly prohibited. +import time + import pytest import cuda.core.experimental @@ -21,8 +23,23 @@ def test_event_init_disabled(): def test_timing(init_cuda, enable_timing): options = EventOptions(enable_timing=enable_timing) stream = Device().create_stream() - event = stream.record(options=options) - assert event.is_timing_disabled == (not enable_timing if enable_timing is not None else True) + delay_seconds = 0.5 + e1 = stream.record(options=options) + time.sleep(delay_seconds) + e2 = stream.record(options=options) + e2.sync() + for e in (e1, e2): + assert e.is_timing_disabled == (True if enable_timing is None else not enable_timing) + if enable_timing: + elapsed_time_ms = e2 - e1 + assert isinstance(elapsed_time_ms, float) + assert delay_seconds * 1000 <= elapsed_time_ms < delay_seconds * 1000 + 2 # tolerance 2 ms + else: + with pytest.raises(RuntimeError) as e: + elapsed_time_ms = e2 - e1 + msg = str(e) + assert "disabled by default" in msg + assert "CUDA_ERROR_INVALID_HANDLE" in msg def test_is_sync_busy_waited(init_cuda):