Skip to content

Commit f18d8d4

Browse files
avulishaavulisha
authored andcommitted
[Runtime][Vulkan] Add RGP support to TVM for vulkan device
RGP(Raedon GPU Profiler) is a tool used to analyze the applications run on AMD GPU. RGP captures the data based on VKPresent and provides the hardware specific information. Allowing the developer to optimize the application. To add RGP support to TVM, debug labels "AmdFrameBegin" and "AmdFrameEnd" need to be inserted into the vulkan queue.These Labels helps the RGP tool to understand the start|end of frame when no present is available. Thus enabling the RGP tool to capture and analyze the data. At runtime, set the envirnoment variable "TVM_USE_AMD_RGP=1" to start inserting the Debug Labels into the vulkan queue. Signed-off-by: Wilkin Chau <Wing-Ki.ChauWilkin@amd.com> Signed-off-by: Anurag Kumar Vulisha <AnuragKumar.Vulisha@amd.com>
1 parent c5bd181 commit f18d8d4

File tree

9 files changed

+183
-0
lines changed

9 files changed

+183
-0
lines changed
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "vulkan_device.h"
21+
22+
namespace tvm {
23+
namespace runtime {
24+
namespace vulkan {
25+
26+
VulkanStreamProfiler::VulkanStreamProfiler(const VulkanDevice* device)
27+
: device_(device), curr_state_(READY), available_(device->UseDebugUtilsLabel()) {}
28+
29+
void AmdRgpProfiler::capture() {
30+
if (!available_) {
31+
return;
32+
}
33+
34+
// Trigger RGP capture by using dummy present and switch state from READY to RUNNING
35+
if (curr_state_ == READY) {
36+
VkDebugUtilsLabelEXT frame_end_label = {
37+
VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, NULL, "AmdFrameEnd", {0.0f, 0.0f, 0.0f, 0.0f}};
38+
device_->queue_insert_debug_utils_label_functions->vkQueueInsertDebugUtilsLabelEXT(
39+
device_->Queue(), &frame_end_label);
40+
41+
VkDebugUtilsLabelEXT frame_begin_label = {
42+
VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, NULL, "AmdFrameBegin", {0.0f, 0.0f, 0.0f, 0.0f}};
43+
device_->queue_insert_debug_utils_label_functions->vkQueueInsertDebugUtilsLabelEXT(
44+
device_->Queue(), &frame_begin_label);
45+
46+
// Set state as RUNNING
47+
curr_state_ = RUNNING;
48+
}
49+
}
50+
51+
} // namespace vulkan
52+
} // namespace runtime
53+
} // namespace tvm

src/runtime/vulkan/vulkan_amdrgp.h

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#ifndef TVM_RUNTIME_VULKAN_VULKAN_AMDRGP_H_
21+
#define TVM_RUNTIME_VULKAN_VULKAN_AMDRGP_H_
22+
23+
namespace tvm {
24+
namespace runtime {
25+
namespace vulkan {
26+
27+
class VulkanDevice;
28+
29+
class VulkanStreamProfiler {
30+
public:
31+
enum state { READY = 0, RUNNING, RESET };
32+
33+
explicit VulkanStreamProfiler(const VulkanDevice* device);
34+
35+
virtual ~VulkanStreamProfiler() {}
36+
37+
virtual void reset() { curr_state_ = RESET; }
38+
39+
virtual void ready() {
40+
if (curr_state_ == RESET) {
41+
curr_state_ = READY;
42+
}
43+
}
44+
45+
virtual void capture() = 0;
46+
47+
protected:
48+
const VulkanDevice* device_;
49+
state curr_state_;
50+
bool available_;
51+
};
52+
53+
class AmdRgpProfiler : public VulkanStreamProfiler {
54+
public:
55+
explicit AmdRgpProfiler(const VulkanDevice* device) : VulkanStreamProfiler(device) {}
56+
57+
void capture();
58+
};
59+
60+
} // namespace vulkan
61+
} // namespace runtime
62+
} // namespace tvm
63+
#endif // TVM_RUNTIME_VULKAN_VULKAN_AMDRGP_H_

src/runtime/vulkan/vulkan_device.cc

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,12 @@ VulkanGetBufferMemoryRequirements2Functions::VulkanGetBufferMemoryRequirements2F
228228
vkGetDeviceProcAddr(device, "vkGetBufferMemoryRequirements2KHR"));
229229
}
230230

231+
VulkanQueueInsertDebugUtilsLabelFunctions::VulkanQueueInsertDebugUtilsLabelFunctions(
232+
VkInstance instance) {
233+
vkQueueInsertDebugUtilsLabelEXT = (PFN_vkQueueInsertDebugUtilsLabelEXT)ICHECK_NOTNULL(
234+
vkGetInstanceProcAddr(instance, "vkQueueInsertDebugUtilsLabelEXT"));
235+
}
236+
231237
VulkanDevice::VulkanDevice(const VulkanInstance& instance, VkPhysicalDevice phy_device)
232238
: physical_device_(phy_device) {
233239
queue_family_index = SelectComputeQueueFamily();
@@ -325,6 +331,11 @@ VulkanDevice::VulkanDevice(const VulkanInstance& instance, VkPhysicalDevice phy_
325331
get_buffer_memory_requirements_2_functions =
326332
std::make_unique<VulkanGetBufferMemoryRequirements2Functions>(device_);
327333
}
334+
335+
if (instance.HasExtension("VK_EXT_debug_utils")) {
336+
queue_insert_debug_utils_label_functions =
337+
std::make_unique<VulkanQueueInsertDebugUtilsLabelFunctions>(instance);
338+
}
328339
}
329340

330341
VulkanDevice::~VulkanDevice() {
@@ -363,6 +374,8 @@ void VulkanDevice::do_swap(VulkanDevice&& other) {
363374
std::swap(descriptor_template_khr_functions, other.descriptor_template_khr_functions);
364375
std::swap(get_buffer_memory_requirements_2_functions,
365376
other.get_buffer_memory_requirements_2_functions);
377+
std::swap(queue_insert_debug_utils_label_functions,
378+
other.queue_insert_debug_utils_label_functions);
366379
std::swap(compute_mtype_index, other.compute_mtype_index);
367380
std::swap(queue, other.queue);
368381
std::swap(queue_family_index, other.queue_family_index);

src/runtime/vulkan/vulkan_device.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,12 @@ struct VulkanGetBufferMemoryRequirements2Functions {
5757
PFN_vkGetBufferMemoryRequirements2KHR vkGetBufferMemoryRequirements2KHR{nullptr};
5858
};
5959

60+
struct VulkanQueueInsertDebugUtilsLabelFunctions {
61+
explicit VulkanQueueInsertDebugUtilsLabelFunctions(VkInstance instance);
62+
63+
PFN_vkQueueInsertDebugUtilsLabelEXT vkQueueInsertDebugUtilsLabelEXT{nullptr};
64+
};
65+
6066
/*!
6167
* \brief Stores the capabilities/limits queried from the physical device.
6268
*
@@ -212,6 +218,8 @@ class VulkanDevice {
212218
std::unique_ptr<VulkanDescriptorTemplateKHRFunctions> descriptor_template_khr_functions{nullptr};
213219
std::unique_ptr<VulkanGetBufferMemoryRequirements2Functions>
214220
get_buffer_memory_requirements_2_functions{nullptr};
221+
std::unique_ptr<VulkanQueueInsertDebugUtilsLabelFunctions>
222+
queue_insert_debug_utils_label_functions{nullptr};
215223
// Memory type index for compute
216224
uint32_t compute_mtype_index{0};
217225

@@ -220,6 +228,10 @@ class VulkanDevice {
220228

221229
bool UseImmediate() const { return descriptor_template_khr_functions != nullptr; }
222230

231+
bool UseDebugUtilsLabel() const { return queue_insert_debug_utils_label_functions != nullptr; }
232+
233+
VkQueue Queue() const { return queue; }
234+
223235
private:
224236
/*! \brief Helper function for move assignment/construction
225237
*

src/runtime/vulkan/vulkan_device_api.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,7 @@ void VulkanDeviceAPI::CopyDataFromTo(const void* from, size_t from_offset, void*
367367
&copy_info);
368368
});
369369
stream.Synchronize();
370+
stream.ProfilerReset();
370371
if (!device.coherent_staging) {
371372
VkMappedMemoryRange mrange;
372373
mrange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
@@ -413,6 +414,8 @@ void VulkanDeviceAPI::CopyDataFromTo(const void* from, size_t from_offset, void*
413414
vkCmdCopyBuffer(state->cmd_buffer_, staging_buffer.vk_buf.buffer, to_buf->buffer, 1,
414415
&copy_info);
415416
});
417+
418+
stream.ProfilerReady();
416419
// TODO(tulloch): should we instead make the staging buffer a property of the
417420
// Stream? This would allow us to elide synchronizations here.
418421
stream.Synchronize();

src/runtime/vulkan/vulkan_instance.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,14 @@ VulkanInstance::VulkanInstance() {
5959
std::vector<const char*> required_extensions{};
6060
std::vector<const char*> optional_extensions{"VK_KHR_get_physical_device_properties2"};
6161

62+
// Check if RGP support is needed. If needed, enable VK_EXT_debug_utils extension for
63+
// inserting debug labels into the queue.
64+
const char* val = getenv("TVM_USE_AMD_RGP");
65+
if (val != nullptr && atoi(val) == 1) {
66+
LOG(INFO) << "Push VK_EXT_debug_utils";
67+
required_extensions.push_back("VK_EXT_debug_utils");
68+
}
69+
6270
uint32_t inst_extension_prop_count;
6371
VULKAN_CALL(
6472
vkEnumerateInstanceExtensionProperties(nullptr, &inst_extension_prop_count, nullptr));

src/runtime/vulkan/vulkan_stream.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,15 @@ VulkanStream::VulkanStream(const VulkanDevice* device)
5555
cb_begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
5656
cb_begin.pInheritanceInfo = 0;
5757
VULKAN_CALL(vkBeginCommandBuffer(state_->cmd_buffer_, &cb_begin));
58+
59+
profiler_ = new AmdRgpProfiler(device_);
5860
}
5961

6062
VulkanStream::~VulkanStream() {
6163
vkDestroyFence(*device_, state_->fence_, nullptr);
6264
vkDestroyCommandPool(*device_, cmd_pool_, nullptr);
65+
66+
delete (profiler_);
6367
}
6468

6569
void VulkanStream::Launch(const std::function<void(VulkanStreamState*)>& kernel) {
@@ -132,6 +136,8 @@ void VulkanStream::Synchronize() {
132136
cb_submit.signalSemaphoreCount = 0;
133137
cb_submit.pSignalSemaphores = nullptr;
134138

139+
profiler_->capture();
140+
135141
device_->QueueSubmit(cb_submit, state_->fence_);
136142

137143
uint64_t timeout = 1UL << 30UL;

src/runtime/vulkan/vulkan_stream.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <unordered_map>
2626
#include <vector>
2727

28+
#include "vulkan_amdrgp.h"
2829
#include "vulkan_common.h"
2930

3031
namespace tvm {
@@ -99,6 +100,20 @@ class VulkanStream {
99100
const std::function<void(VulkanStreamState*)>& deferred_kernel,
100101
const VulkanStreamToken& deferred_token);
101102

103+
// reset profiler state
104+
void ProfilerReset() {
105+
if (profiler_) {
106+
profiler_->reset();
107+
}
108+
}
109+
110+
// set profiler to READY state after reset
111+
void ProfilerReady() {
112+
if (profiler_) {
113+
profiler_->ready();
114+
}
115+
}
116+
102117
// Synchronize the current stream `state_` with respect to the host.
103118
void Synchronize();
104119

@@ -110,6 +125,7 @@ class VulkanStream {
110125
std::unordered_map<VkDescriptorSet, std::vector<VulkanStreamToken>> deferred_tokens_;
111126
std::vector<std::function<void(VulkanStreamState*)>> deferred_kernels_;
112127
VkCommandPool cmd_pool_;
128+
VulkanStreamProfiler* profiler_;
113129
};
114130

115131
} // namespace vulkan

src/runtime/vulkan/vulkan_wrapped_func.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,15 @@ void VulkanWrappedFunc::operator()(TVMArgs args, TVMRetValue* rv,
164164
deferred_token.buffers_[i] = descriptor_buffers[i].buffer;
165165
}
166166
device.ThreadLocalStream().LaunchDeferred(deferred_initializer, deferred_kernel, deferred_token);
167+
168+
if (device.UseDebugUtilsLabel()) {
169+
VkDebugUtilsLabelEXT dispatch_label = {VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
170+
NULL,
171+
func_name_.c_str(),
172+
{0.0f, 0.0f, 0.0f, 0.0f}};
173+
device.queue_insert_debug_utils_label_functions->vkQueueInsertDebugUtilsLabelEXT(
174+
device.Queue(), &dispatch_label);
175+
}
167176
}
168177

169178
VulkanModuleNode::~VulkanModuleNode() {

0 commit comments

Comments
 (0)