Skip to content

Commit eea9d7d

Browse files
yzh119junrushao
authored andcommitted
[Runtime] Device API to query L2 cache size (apache#15332)
Followup of apache#15305 , this PR creates API to query device L2 cache size in bytes. Currently, the API-supported devices includes CUDA, OpenCL, and ROCM. Note that OpenCL's API does not return the accurate device L2 cache size. I cannot find a Vulkan API that returns L2 texture cache size, but the `vkCmdPipelineBarrier` call will flush the L2 texture cache automatically(https://zeux.io/2020/02/27/writing-an-efficient-vulkan-renderer/), thus we return 0 by default.
1 parent f04a640 commit eea9d7d

File tree

8 files changed

+47
-1
lines changed

8 files changed

+47
-1
lines changed

include/tvm/runtime/device_api.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ enum DeviceAttrKind : int {
4848
kMaxRegistersPerBlock = 9,
4949
kGcnArch = 10,
5050
kApiVersion = 11,
51-
kDriverVersion = 12
51+
kDriverVersion = 12,
52+
kL2CacheSizeBytes = 13,
5253
};
5354

5455
#ifdef TVM_KALLOC_ALIGNMENT

python/tvm/_ffi/runtime_ctypes.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,24 @@ def driver_version(self):
488488
"""
489489
return self._GetDeviceAttr(self.device_type, self.device_id, 12)
490490

491+
@property
492+
def l2_cache_size_bytes(self):
493+
"""Return the size of the device L2 cache in bytes
494+
495+
Supported devices include CUDA/ROCM/OpenCL.
496+
497+
Returns
498+
-------
499+
l2_cache_size_bytes : int or None
500+
The size of the device L2 cache in bytes returned by device runtime API.
501+
Return None if the device does not support this feature.
502+
503+
Note
504+
----
505+
The value returned by opencl's API is smaller than actual device L2 cache size.
506+
"""
507+
return self._GetDeviceAttr(self.device_type, self.device_id, 13)
508+
491509
def texture_spatial_limit(self):
492510
"""Returns limits for textures by spatial dimensions
493511

python/tvm/target/target.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,10 @@ def supports_cooperative_matrix(self):
244244
def features(self):
245245
return TargetFeatures(self)
246246

247+
@property
248+
def l2_cache_size_bytes(self):
249+
return int(self.attrs.get("l2_cache_size_bytes", 0))
250+
247251
def get_kind_attr(self, attr_name):
248252
"""Get additional attribute about the target kind.
249253

src/runtime/cuda/cuda_device_api.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,12 @@ class CUDADeviceAPI final : public DeviceAPI {
105105
}
106106
case kDriverVersion:
107107
return;
108+
case kL2CacheSizeBytes:
109+
// Get size of device l2 cache size in bytes.
110+
int l2_size = 0;
111+
CUDA_CALL(cudaDeviceGetAttribute(&l2_size, cudaDevAttrL2CacheSize, dev.device_id));
112+
*rv = l2_size;
113+
return;
108114
}
109115
*rv = value;
110116
}

src/runtime/metal/metal_device_api.mm

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@
8181
return;
8282
case kDriverVersion:
8383
return;
84+
case kL2CacheSizeBytes:
85+
return;
8486
}
8587
};
8688
}

src/runtime/opencl/opencl_device_api.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,13 @@ void OpenCLWorkspace::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv)
198198
*rv = std::string(value);
199199
break;
200200
}
201+
case kL2CacheSizeBytes:
202+
// NOTE(Zihao): this API cannot reflect the real L2 cache size in both CUDA/AMD GPUs.
203+
cl_ulong value;
204+
OPENCL_CALL(clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, sizeof(value), &value,
205+
nullptr));
206+
*rv = static_cast<int64_t>(value);
207+
break;
201208
}
202209
}
203210

src/runtime/rocm/rocm_device_api.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,11 @@ class ROCMDeviceAPI final : public DeviceAPI {
122122
}
123123
case kDriverVersion:
124124
return;
125+
case kL2CacheSizeBytes:
126+
// Get size of device l2 cache size in bytes.
127+
int l2_size;
128+
ROCM_CALL(hipDeviceGetAttribute(&l2_size, hipDeviceAttributeL2CacheSize, device.device_id));
129+
*rv = l2_size;
125130
}
126131
*rv = value;
127132
}

src/runtime/vulkan/vulkan_device_api.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,9 @@ void VulkanDeviceAPI::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv)
160160
*rv = os.str();
161161
break;
162162
}
163+
164+
case kL2CacheSizeBytes:
165+
break;
163166
}
164167
}
165168

0 commit comments

Comments
 (0)