Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 45 additions & 25 deletions cuda_core/examples/strided_memory_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@
# ################################################################################

import importlib
import shutil
import string
import sys
import tempfile

try:
from cffi import FFI
Expand Down Expand Up @@ -73,8 +75,18 @@
source_extension=".cpp",
extra_compile_args=["-std=c++11"],
)
cpu_prog.compile()
cpu_func = getattr(importlib.import_module("_cpu_obj.lib"), func_name)
temp_dir = tempfile.mkdtemp()
try:
cpu_prog.compile(tmpdir=temp_dir)
finally:
shutil.rmtree(temp_dir)
saved_sys_path = sys.path
try:
sys.path.append(temp_dir)
cpu_func = getattr(importlib.import_module("_cpu_obj.lib"), func_name)
Comment thread
leofang marked this conversation as resolved.
finally:
sys.path = saved_sys_path
shutil.rmtree(temp_dir)

# Here is a concrete (again, very naive!) implementation on GPU:
if cp:
Expand Down Expand Up @@ -138,31 +150,39 @@ def my_func(arr, work_stream):
cpu_func(cpu_prog.cast("int*", view.ptr), size)


# This takes the CPU path
if FFI:
# Create input array on CPU
arr_cpu = np.zeros(1024, dtype=np.int32)
print(f"before: {arr_cpu[:10]=}")

# Run the workload
my_func(arr_cpu, None)

# Check the result
print(f"after: {arr_cpu[:10]=}")
assert np.allclose(arr_cpu, np.arange(1024, dtype=np.int32))


# This takes the GPU path
if cp:
s = dev.create_stream()
# Create input array on GPU
arr_gpu = cp.ones(1024, dtype=cp.int32)
print(f"before: {arr_gpu[:10]=}")
try:
# Create input array on GPU
arr_gpu = cp.ones(1024, dtype=cp.int32)
print(f"before: {arr_gpu[:10]=}")

# Run the workload
my_func(arr_gpu, s)

# Run the workload
my_func(arr_gpu, s)
# Check the result
print(f"after: {arr_gpu[:10]=}")
assert cp.allclose(arr_gpu, 1 + cp.arange(1024, dtype=cp.int32))
finally:
s.close()

# Check the result
print(f"after: {arr_gpu[:10]=}")
assert cp.allclose(arr_gpu, 1 + cp.arange(1024, dtype=cp.int32))
s.close()
# This takes the CPU path
if FFI:
try:
# Create input array on CPU
arr_cpu = np.zeros(1024, dtype=np.int32)
print(f"before: {arr_cpu[:10]=}")

# Run the workload
my_func(arr_cpu, None)

# Check the result
print(f"after: {arr_cpu[:10]=}")
assert np.allclose(arr_cpu, np.arange(1024, dtype=np.int32))
finally:
# to allow FFI module to unload, we delete references to
# to cpu_func
del cpu_func, my_func
# clean up temp directory
shutil.rmtree(temp_dir)
17 changes: 0 additions & 17 deletions cuda_core/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
# Copyright 2024 NVIDIA Corporation. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import glob
import os
import sys

try:
from cuda.bindings import driver
Expand Down Expand Up @@ -67,21 +65,6 @@ def pop_all_contexts():
return pop_all_contexts


# samples relying on cffi could fail as the modules cannot be imported
sys.path.append(os.getcwd())


@pytest.fixture(scope="session", autouse=True)
def clean_up_cffi_files():
yield
files = glob.glob(os.path.join(os.getcwd(), "_cpu_obj*"))
for f in files:
try: # noqa: SIM105
os.remove(f)
except FileNotFoundError:
pass # noqa: SIM105


skipif_testing_with_compute_sanitizer = pytest.mark.skipif(
os.environ.get("CUDA_PYTHON_TESTING_WITH_COMPUTE_SANITIZER", "0") == "1",
reason="The compute-sanitizer is running, and this test causes an API error.",
Expand Down