55# this software. Any use, reproduction, disclosure, or distribution of
66# this software and related documentation outside the terms of the EULA
77# is strictly prohibited.
8- import pytest
98import ctypes
109
10+ import pytest
11+
1112# Always skip since cupy is not CTK 12.x yet
1213skip_tests = True
1314if not skip_tests :
1415 try :
1516 import cupy
17+
1618 skip_tests = False
1719 except ImportError :
1820 skip_tests = True
1921
2022from .kernels import kernel_string
2123
24+
2225def launch (kernel , args = ()):
2326 kernel ((1 ,), (1 ,), args )
2427
28+
2529# Measure launch latency with no parmaeters
2630@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
2731@pytest .mark .benchmark (group = "cupy" )
2832def test_launch_latency_empty_kernel (benchmark ):
2933 module = cupy .RawModule (code = kernel_string )
30- kernel = module .get_function (' empty_kernel' )
34+ kernel = module .get_function (" empty_kernel" )
3135
3236 stream = cupy .cuda .stream .Stream (non_blocking = True )
3337
3438 with stream :
3539 benchmark (launch , kernel )
3640 stream .synchronize ()
3741
42+
3843# Measure launch latency with a single parameter
3944@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
4045@pytest .mark .benchmark (group = "cupy" )
4146def test_launch_latency_small_kernel (benchmark ):
4247 module = cupy .RawModule (code = kernel_string )
43- kernel = module .get_function (' small_kernel' )
48+ kernel = module .get_function (" small_kernel" )
4449 cupy .cuda .set_allocator ()
4550 arg = cupy .cuda .alloc (ctypes .sizeof (ctypes .c_float ))
4651
@@ -50,12 +55,13 @@ def test_launch_latency_small_kernel(benchmark):
5055 benchmark (launch , kernel , (arg ,))
5156 stream .synchronize ()
5257
58+
5359# Measure launch latency with many parameters using builtin parameter packing
5460@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
5561@pytest .mark .benchmark (group = "cupy" )
5662def test_launch_latency_small_kernel_512_args (benchmark ):
5763 module = cupy .RawModule (code = kernel_string )
58- kernel = module .get_function (' small_kernel_512_args' )
64+ kernel = module .get_function (" small_kernel_512_args" )
5965 cupy .cuda .set_allocator ()
6066
6167 args = []
@@ -69,12 +75,13 @@ def test_launch_latency_small_kernel_512_args(benchmark):
6975 benchmark (launch , kernel , args )
7076 stream .synchronize ()
7177
78+
7279# Measure launch latency with many parameters using builtin parameter packing
7380@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
7481@pytest .mark .benchmark (group = "cupy" )
7582def test_launch_latency_small_kernel_512_bools (benchmark ):
7683 module = cupy .RawModule (code = kernel_string )
77- kernel = module .get_function (' small_kernel_512_bools' )
84+ kernel = module .get_function (" small_kernel_512_bools" )
7885 cupy .cuda .set_allocator ()
7986
8087 args = [True ] * 512
@@ -86,12 +93,13 @@ def test_launch_latency_small_kernel_512_bools(benchmark):
8693 benchmark (launch , kernel , args )
8794 stream .synchronize ()
8895
96+
8997# Measure launch latency with many parameters using builtin parameter packing
9098@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
9199@pytest .mark .benchmark (group = "cupy" )
92100def test_launch_latency_small_kernel_512_doubles (benchmark ):
93101 module = cupy .RawModule (code = kernel_string )
94- kernel = module .get_function (' small_kernel_512_doubles' )
102+ kernel = module .get_function (" small_kernel_512_doubles" )
95103 cupy .cuda .set_allocator ()
96104
97105 args = [1.2345 ] * 512
@@ -103,12 +111,13 @@ def test_launch_latency_small_kernel_512_doubles(benchmark):
103111 benchmark (launch , kernel , args )
104112 stream .synchronize ()
105113
114+
106115# Measure launch latency with many parameters using builtin parameter packing
107116@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
108117@pytest .mark .benchmark (group = "cupy" )
109118def test_launch_latency_small_kernel_512_ints (benchmark ):
110119 module = cupy .RawModule (code = kernel_string )
111- kernel = module .get_function (' small_kernel_512_ints' )
120+ kernel = module .get_function (" small_kernel_512_ints" )
112121 cupy .cuda .set_allocator ()
113122
114123 args = [123 ] * 512
@@ -120,12 +129,13 @@ def test_launch_latency_small_kernel_512_ints(benchmark):
120129 benchmark (launch , kernel , args )
121130 stream .synchronize ()
122131
132+
123133# Measure launch latency with many parameters using builtin parameter packing
124134@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
125135@pytest .mark .benchmark (group = "cupy" )
126136def test_launch_latency_small_kernel_512_bytes (benchmark ):
127137 module = cupy .RawModule (code = kernel_string )
128- kernel = module .get_function (' small_kernel_512_chars' )
138+ kernel = module .get_function (" small_kernel_512_chars" )
129139 cupy .cuda .set_allocator ()
130140
131141 args = [127 ] * 512
@@ -137,12 +147,13 @@ def test_launch_latency_small_kernel_512_bytes(benchmark):
137147 benchmark (launch , kernel , args )
138148 stream .synchronize ()
139149
150+
140151# Measure launch latency with many parameters using builtin parameter packing
141152@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
142153@pytest .mark .benchmark (group = "cupy" )
143154def test_launch_latency_small_kernel_512_longlongs (benchmark ):
144155 module = cupy .RawModule (code = kernel_string )
145- kernel = module .get_function (' small_kernel_512_longlongs' )
156+ kernel = module .get_function (" small_kernel_512_longlongs" )
146157 cupy .cuda .set_allocator ()
147158
148159 args = [9223372036854775806 ] * 512
@@ -154,12 +165,13 @@ def test_launch_latency_small_kernel_512_longlongs(benchmark):
154165 benchmark (launch , kernel , args )
155166 stream .synchronize ()
156167
168+
157169# Measure launch latency with many parameters using builtin parameter packing
158170@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
159171@pytest .mark .benchmark (group = "cupy" )
160172def test_launch_latency_small_kernel_256_args (benchmark ):
161173 module = cupy .RawModule (code = kernel_string )
162- kernel = module .get_function (' small_kernel_256_args' )
174+ kernel = module .get_function (" small_kernel_256_args" )
163175 cupy .cuda .set_allocator ()
164176
165177 args = []
@@ -173,12 +185,13 @@ def test_launch_latency_small_kernel_256_args(benchmark):
173185 benchmark (launch , kernel , args )
174186 stream .synchronize ()
175187
188+
176189# Measure launch latency with many parameters using builtin parameter packing
177190@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
178191@pytest .mark .benchmark (group = "cupy" )
179192def test_launch_latency_small_kernel_16_args (benchmark ):
180193 module = cupy .RawModule (code = kernel_string )
181- kernel = module .get_function (' small_kernel_16_args' )
194+ kernel = module .get_function (" small_kernel_16_args" )
182195 cupy .cuda .set_allocator ()
183196
184197 args = []
0 commit comments