Skip to content

Commit 2060af5

Browse files
committed
Queue up 'skip reasons' (#1352)
1 parent 7db313b commit 2060af5

5 files changed

Lines changed: 88 additions & 72 deletions

File tree

cuda_bindings/tests/nvml/conftest.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -72,14 +72,9 @@ def get_devices(device_info):
7272

7373

7474
@pytest.fixture
75-
def for_all_devices(device_info):
75+
def all_devices(device_info):
7676
with NVMLInitializer():
77-
unique_devices = set()
78-
for device_id in get_devices(device_info):
79-
if device_id not in unique_devices:
80-
unique_devices.add(device_id)
81-
yield device_id
82-
# RestoreDefaultEnvironment.restore()
77+
yield sorted(list(set(get_devices(device_info))))
8378

8479

8580
@pytest.fixture

cuda_bindings/tests/nvml/test_compute_mode.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,19 @@
1616

1717

1818
@pytest.mark.skipif(sys.platform == "win32", reason="Test not supported on Windows")
19-
def test_compute_mode_supported_nonroot(for_all_devices):
20-
device = for_all_devices
21-
22-
try:
23-
original_compute_mode = nvml.device_get_compute_mode(device)
24-
except nvml.NotSupportedError:
25-
pytest.skip("nvmlDeviceGetComputeMode not supported")
26-
27-
for cm in COMPUTE_MODES:
28-
with pytest.raises(nvml.NoPermissionError):
29-
nvml.device_set_compute_mode(device, cm)
30-
assert original_compute_mode == nvml.device_get_compute_mode(device), "Compute mode shouldn't have changed"
19+
def test_compute_mode_supported_nonroot(all_devices):
20+
skip_reasons = set()
21+
for device in all_devices:
22+
try:
23+
original_compute_mode = nvml.device_get_compute_mode(device)
24+
except nvml.NotSupportedError:
25+
skip_reasons.add(f"nvmlDeviceGetComputeMode not supported for device {device}")
26+
continue
27+
28+
for cm in COMPUTE_MODES:
29+
with pytest.raises(nvml.NoPermissionError):
30+
nvml.device_set_compute_mode(device, cm)
31+
assert original_compute_mode == nvml.device_get_compute_mode(device), "Compute mode shouldn't have changed"
32+
33+
if skip_reasons:
34+
pytest.skip(" ; ".join(skip_reasons))

cuda_bindings/tests/nvml/test_gpu.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,24 @@ def test_gpu_get_module_id(nvml_init):
2323
assert isinstance(module_id, int)
2424

2525

26-
def test_gpu_get_platform_info(for_all_devices):
27-
device = for_all_devices
26+
def test_gpu_get_platform_info(all_devices):
27+
skip_reasons = set()
28+
for device in all_devices:
29+
if util.is_vgpu(device):
30+
skip_reasons.add(f"Not supported on vGPU device {device}")
31+
continue
2832

29-
if util.is_vgpu(device):
30-
pytest.skip("Not supported on vGPU device")
33+
# TODO
34+
# if device.feature_dict.board.chip < board_class.Architecture.Blackwell:
35+
# test_utils.skip_test("Not supported on chip before Blackwell")
3136

32-
# TODO
33-
# if device.feature_dict.board.chip < board_class.Architecture.Blackwell:
34-
# test_utils.skip_test("Not supported on chip before Blackwell")
37+
try:
38+
platform_info = nvml.device_get_platform_info(device)
39+
except nvml.NotSupportedError:
40+
skip_reasons.add(f"Not supported returned, linkely NVLink is disable for {device}")
41+
continue
3542

36-
try:
37-
platform_info = nvml.device_get_platform_info(device)
38-
except nvml.NotSupportedError:
39-
pytest.skip("Not supported returned, likely NVLink is disabled.")
43+
assert isinstance(platform_info, nvml.PlatformInfo_v2)
4044

41-
assert isinstance(platform_info, nvml.PlatformInfo_v2)
45+
if skip_reasons:
46+
pytest.skip(" ; ".join(skip_reasons))

cuda_bindings/tests/nvml/test_nvlink.py

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,26 +5,25 @@
55
from cuda.bindings import _nvml as nvml
66

77

8-
def test_nvlink_get_link_count(for_all_devices):
8+
def test_nvlink_get_link_count(all_devices):
99
"""
1010
Checks that the link count of the device is same.
1111
"""
12-
device = for_all_devices
12+
for device in all_devices:
13+
fields = nvml.FieldValue(1)
14+
fields[0].field_id = nvml.FI.DEV_NVLINK_LINK_COUNT
15+
value = nvml.device_get_field_values(device, fields)[0]
16+
assert value.nvml_return == nvml.Return.SUCCESS or value.nvml_return == nvml.Return.ERROR_NOT_SUPPORTED, (
17+
f"Unexpected return {value.nvml_return} for link count field query"
18+
)
1319

14-
fields = nvml.FieldValue(1)
15-
fields[0].field_id = nvml.FI.DEV_NVLINK_LINK_COUNT
16-
value = nvml.device_get_field_values(device, fields)[0]
17-
assert value.nvml_return == nvml.Return.SUCCESS or value.nvml_return == nvml.Return.ERROR_NOT_SUPPORTED, (
18-
f"Unexpected return {value.nvml_return} for link count field query"
19-
)
20+
# Use the alternative argument to device_get_field_values
21+
value = nvml.device_get_field_values(device, [nvml.FI.DEV_NVLINK_LINK_COUNT])[0]
22+
assert value.nvml_return == nvml.Return.SUCCESS or value.nvml_return == nvml.Return.ERROR_NOT_SUPPORTED, (
23+
f"Unexpected return {value.nvml_return} for link count field query"
24+
)
2025

21-
# Use the alternative argument to device_get_field_values
22-
value = nvml.device_get_field_values(device, [nvml.FI.DEV_NVLINK_LINK_COUNT])[0]
23-
assert value.nvml_return == nvml.Return.SUCCESS or value.nvml_return == nvml.Return.ERROR_NOT_SUPPORTED, (
24-
f"Unexpected return {value.nvml_return} for link count field query"
25-
)
26-
27-
# The feature_nvlink_supported detection is not robust, so we
28-
# can't be more specific about how many links we should find.
29-
if value.nvml_return == nvml.Return.SUCCESS:
30-
assert value.value.ui_val <= nvml.NVLINK_MAX_LINKS, f"Unexpected link count {value.value.ui_val}"
26+
# The feature_nvlink_supported detection is not robust, so we
27+
# can't be more specific about how many links we should find.
28+
if value.nvml_return == nvml.Return.SUCCESS:
29+
assert value.value.ui_val <= nvml.NVLINK_MAX_LINKS, f"Unexpected link count {value.value.ui_val}"

cuda_bindings/tests/nvml/test_page_retirement.py

Lines changed: 36 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -21,42 +21,55 @@ def supports_page_retirement(device):
2121
return False
2222

2323

24-
def test_page_retirement_notsupported(for_all_devices):
24+
def test_page_retirement_notsupported(all_devices):
2525
"""
2626
Verifies that on platforms that don't supports page retirement, APIs will return Not Supported
2727
"""
28-
device = for_all_devices
28+
skip_reasons = set()
2929

30-
if supports_page_retirement(device):
31-
pytest.skip("page_retirement not supported")
30+
for device in all_devices:
31+
if supports_page_retirement(device):
32+
skip_reasons.add(f"page_retirement is supported for {device}")
33+
continue
3234

33-
if not util.supports_ecc(device):
34-
pytest.skip("device doesn't support ECC")
35+
if not util.supports_ecc(device):
36+
skip_reasons.add(f"device doesn't support ECC for {device}")
37+
continue
3538

36-
with pytest.raises(nvml.NotSupportedError):
37-
for source in PAGE_RETIREMENT_PUBLIC_CAUSE_TYPES:
38-
nvml.device_get_retired_pages(device, source)
39+
with pytest.raises(nvml.NotSupportedError):
40+
for source in PAGE_RETIREMENT_PUBLIC_CAUSE_TYPES:
41+
nvml.device_get_retired_pages(device, source)
3942

40-
with pytest.raises(nvml.NotSupportedError):
41-
nvml.device_get_retired_pages_pending_status(device)
43+
with pytest.raises(nvml.NotSupportedError):
44+
nvml.device_get_retired_pages_pending_status(device)
4245

46+
if skip_reasons:
47+
pytest.skip(" ; ".join(skip_reasons))
4348

44-
def test_page_retirement_supported(for_all_devices):
49+
50+
def test_page_retirement_supported(all_devices):
4551
"""
4652
Verifies that on platforms that support page_retirement, APIs will return success
4753
"""
48-
device = for_all_devices
54+
skip_reasons = set()
4955

50-
if not supports_page_retirement(device):
51-
pytest.skip("page_retirement not supported")
56+
for device in all_devices:
57+
if not supports_page_retirement(device):
58+
skip_reasons.add(f"page_retirement not supported for {device}")
59+
continue
5260

53-
if not util.supports_ecc(device):
54-
pytest.skip("device doesn't support ECC")
61+
if not util.supports_ecc(device):
62+
skip_reasons.add(f"device doesn't support ECC for {device}")
63+
continue
5564

56-
try:
57-
for source in PAGE_RETIREMENT_PUBLIC_CAUSE_TYPES:
58-
nvml.device_get_retired_pages(device, source)
59-
except nvml.NotSupportedError:
60-
pytest.skip("Exception case: Page retirment is not supported in this GPU")
65+
try:
66+
for source in PAGE_RETIREMENT_PUBLIC_CAUSE_TYPES:
67+
nvml.device_get_retired_pages(device, source)
68+
except nvml.NotSupportedError:
69+
skip_reasons.add(f"Exception case: Page retirement is not supported in this GPU {device}")
70+
continue
71+
72+
nvml.device_get_retired_pages_pending_status(device)
6173

62-
nvml.device_get_retired_pages_pending_status(device)
74+
if skip_reasons:
75+
pytest.skip(" ; ".join(skip_reasons))

0 commit comments

Comments
 (0)