Skip to content

Commit 0a126bd

Browse files
committed
[GraphRuntime] Debug graph runtime
1 parent 29cfac6 commit 0a126bd

File tree

4 files changed

+57
-66
lines changed

4 files changed

+57
-66
lines changed

python/tvm/contrib/debugger/debug_result.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -207,10 +207,8 @@ def dump_graph_json(self, graph):
207207
def display_debug_result(self):
208208
"""Displays the debugger result"
209209
"""
210-
header = ["Node Name", "Ops", "Time(us)", "Time(%)", "Start Time", \
211-
"End Time", "Shape", "Inputs", "Outputs"]
212-
lines = ["---------", "---", "--------", "-------", "----------", \
213-
"--------", "-----", "------", "-------"]
210+
header = ["Node Name", "Ops", "Time(us)", "Time(%)", "Shape", "Inputs", "Outputs"]
211+
lines = ["---------", "---", "--------", "-------", "-----", "------", "-------"]
214212
eid = 0
215213
data = []
216214
total_time = sum(time[0] for time in self._time_list)
@@ -223,12 +221,11 @@ def display_debug_result(self):
223221
continue
224222
name = node['name']
225223
shape = str(self._output_tensor_list[eid].shape)
226-
time_us = round(time[0] * 1000000, 2)
227-
time_percent = round(((time[0] / total_time) * 100), 2)
224+
time_us = round(time[0] * 1000000, 3)
225+
time_percent = round(((time[0] / total_time) * 100), 3)
228226
inputs = str(node['attrs']['num_inputs'])
229227
outputs = str(node['attrs']['num_outputs'])
230-
node_data = [name, op, time_us, time_percent, str(time[1]), str(time[2]), \
231-
shape, inputs, outputs]
228+
node_data = [name, op, time_us, time_percent, shape, inputs, outputs]
232229
data.append(node_data)
233230
eid += 1
234231
fmt = ""

python/tvm/contrib/debugger/debug_runtime.py

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
_DUMP_ROOT_PREFIX = "tvmdbg_"
3131
_DUMP_PATH_PREFIX = "_tvmdbg_"
3232

33+
3334
def create(graph_json_str, libmod, ctx, dump_root=None):
3435
"""Create a runtime executor module given a graph and module.
3536
@@ -62,17 +63,23 @@ def create(graph_json_str, libmod, ctx, dump_root=None):
6263
try:
6364
fcreate = get_global_func("tvm.graph_runtime_debug.create")
6465
except ValueError:
65-
raise ValueError("Please set '(USE_GRAPH_RUNTIME_DEBUG ON)' in " \
66-
"config.cmake and rebuild TVM to enable debug mode")
66+
raise ValueError(
67+
"Please set '(USE_GRAPH_RUNTIME_DEBUG ON)' in "
68+
"config.cmake and rebuild TVM to enable debug mode"
69+
)
6770

6871
ctx, num_rpc_ctx, device_type_id = graph_runtime.get_device_ctx(libmod, ctx)
6972
if num_rpc_ctx == len(ctx):
7073
libmod = rpc_base._ModuleHandle(libmod)
7174
try:
72-
fcreate = ctx[0]._rpc_sess.get_function("tvm.graph_runtime_debug.remote_create")
75+
fcreate = ctx[0]._rpc_sess.get_function(
76+
"tvm.graph_runtime_debug.remote_create"
77+
)
7378
except ValueError:
74-
raise ValueError("Please set '(USE_GRAPH_RUNTIME_DEBUG ON)' in " \
75-
"config.cmake and rebuild TVM to enable debug mode")
79+
raise ValueError(
80+
"Please set '(USE_GRAPH_RUNTIME_DEBUG ON)' in "
81+
"config.cmake and rebuild TVM to enable debug mode"
82+
)
7683
func_obj = fcreate(graph_json_str, libmod, *device_type_id)
7784
return GraphModuleDebug(func_obj, ctx, graph_json_str, dump_root)
7885

@@ -100,10 +107,10 @@ class GraphModuleDebug(graph_runtime.GraphModule):
100107
To select which folder the outputs should be kept.
101108
None will make a temp folder in /tmp/tvmdbg<rand_string> and does the dumping
102109
"""
110+
103111
def __init__(self, module, ctx, graph_json_str, dump_root):
104112
self._dump_root = dump_root
105113
self._dump_path = None
106-
self._debug_run = module["debug_run"]
107114
self._get_output_by_layer = module["get_output_by_layer"]
108115
self._run_individual = module["run_individual"]
109116
graph_runtime.GraphModule.__init__(self, module)
@@ -181,13 +188,10 @@ def _run_debug(self):
181188
Time consumed for each execution will be set as debug output.
182189
183190
"""
184-
self.debug_datum._time_list = []
185-
191+
self.debug_datum._time_list = [
192+
[float(t) * 1e-6] for t in self.run_individual(10, 1, 1)
193+
]
186194
for i, node in enumerate(self.debug_datum.get_graph_nodes()):
187-
start_time = datetime.now().time()
188-
time_stamp = self._debug_run(i)
189-
end_time = datetime.now().time()
190-
self.debug_datum._time_list.append([time_stamp, start_time, end_time])
191195
num_outputs = self.debug_datum.get_graph_node_output_num(node)
192196
for j in range(num_outputs):
193197
out_tensor = self._get_output_by_layer(i, j)
@@ -212,8 +216,13 @@ def debug_get_output(self, node, out):
212216
ret = output_tensors[node]
213217
except:
214218
node_list = output_tensors.keys()
215-
raise RuntimeError("Node " + node + " not found, available nodes are: "
216-
+ str(node_list) + ".")
219+
raise RuntimeError(
220+
"Node "
221+
+ node
222+
+ " not found, available nodes are: "
223+
+ str(node_list)
224+
+ "."
225+
)
217226
elif isinstance(node, int):
218227
output_tensors = self.debug_datum._output_tensor_list
219228
ret = output_tensors[node]
@@ -242,7 +251,11 @@ def run(self, **input_dict):
242251
self.debug_datum.display_debug_result()
243252

244253
def run_individual(self, number, repeat=1, min_repeat_ms=0):
245-
self._run_individual(number, repeat, min_repeat_ms)
254+
ret = self._run_individual(number, repeat, min_repeat_ms)
255+
if ret:
256+
return ret.strip(",").split(",")
257+
else:
258+
return []
246259

247260
def exit(self):
248261
"""Exits the dump folder and all its contents"""

src/runtime/graph/debug/graph_runtime_debug.cc

Lines changed: 24 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
* to you under the Apache License, Version 2.0 (the
77
* "License"); you may not use this file except in compliance
88
* with the License. You may obtain a copy of the License at
9-
*
9+
*
1010
* http://www.apache.org/licenses/LICENSE-2.0
11-
*
11+
*
1212
* Unless required by applicable law or agreed to in writing,
1313
* software distributed under the License is distributed on an
1414
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -24,7 +24,9 @@
2424
#include <tvm/runtime/packed_func.h>
2525
#include <tvm/runtime/registry.h>
2626
#include <tvm/runtime/ndarray.h>
27+
2728
#include <chrono>
29+
#include <sstream>
2830
#include "../graph_runtime.h"
2931

3032
namespace tvm {
@@ -39,40 +41,23 @@ namespace runtime {
3941
class GraphRuntimeDebug : public GraphRuntime {
4042
public:
4143
/*!
42-
* \brief Run each operation and get the output.
43-
* \param index The index of op which needs to be run.
44-
* \return the elapsed time.
45-
*/
46-
double DebugRun(size_t index) {
47-
CHECK(index < op_execs_.size());
48-
TVMContext ctx = data_entry_[entry_id(index, 0)]->ctx;
49-
auto tbegin = std::chrono::high_resolution_clock::now();
50-
if (op_execs_[index]) {
51-
op_execs_[index]();
52-
}
53-
TVMSynchronize(ctx.device_type, ctx.device_id, nullptr);
54-
auto tend = std::chrono::high_resolution_clock::now();
55-
double time = std::chrono::duration_cast<std::chrono::duration<double> >(
56-
tend - tbegin).count();
57-
return time;
58-
}
59-
60-
/*!
61-
* \brief Run each operation in the graph and print out the runtime per op.
44+
* \brief Run each operation in the graph and get the time per op for all ops.
6245
* \param number The number of times to run this function for taking average.
6346
* \param repeat The number of times to repeat the measurement.
64-
In total, the function will be invoked (1 + number x repeat) times,
65-
where the first one is warmed up and will be discarded in case
66-
there is lazy initialization.
47+
* In total, the function will be invoked (1 + number x repeat) times,
48+
* where the first one is warmed up and will be discarded in case
49+
* there is lazy initialization.
6750
* \param min_repeat_ms The minimum duration of one `repeat` in milliseconds.
68-
By default, one `repeat` contains `number` runs. If this parameter is set,
69-
the parameters `number` will be dynamically adjusted to meet the
70-
minimum duration requirement of one `repeat`.
51+
* By default, one `repeat` contains `number` runs. If this parameter is set,
52+
* the parameters `number` will be dynamically adjusted to meet the
53+
* minimum duration requirement of one `repeat`.
54+
* \return Comma seperated string containing the elapsed time per op for the last
55+
* iteration only, because returning a long string over rpc can be expensive.
7156
*/
72-
void RunIndividual(int number, int repeat, int min_repeat_ms) {
57+
std::string RunIndividual(int number, int repeat, int min_repeat_ms) {
7358
// warmup run
7459
GraphRuntime::Run();
75-
60+
std::ostringstream os;
7661
std::vector<double> time_per_op(op_execs_.size(), 0);
7762
for (int i = 0; i < repeat; ++i) {
7863
std::chrono::time_point<
@@ -96,7 +81,7 @@ class GraphRuntimeDebug : public GraphRuntime {
9681
auto op_tend = std::chrono::high_resolution_clock::now();
9782
double op_duration = std::chrono::duration_cast<
9883
std::chrono::duration<double> >(op_tend - op_tbegin).count();
99-
time_per_op[index] += op_duration * 1000; // ms
84+
time_per_op[index] += op_duration * 1e6; // us
10085
}
10186
}
10287
}
@@ -105,16 +90,20 @@ class GraphRuntimeDebug : public GraphRuntime {
10590
(tend - tbegin).count() * 1000;
10691
} while (duration_ms < min_repeat_ms);
10792

108-
LOG(INFO) << "Repeat: " << i;
93+
LOG(INFO) << "Iteration: " << i;
10994
int op = 0;
11095
for (size_t index = 0; index < time_per_op.size(); index++) {
11196
if (op_execs_[index]) {
11297
time_per_op[index] /= number;
11398
LOG(INFO) << "Op #" << op++ << " " << GetNodeName(index) << ": "
114-
<< time_per_op[index] << " ms/iter";
99+
<< time_per_op[index] << " us/iter";
115100
}
116101
}
117102
}
103+
for (size_t index = 0; index < time_per_op.size(); index++) {
104+
os << time_per_op[index] << ",";
105+
}
106+
return os.str();
118107
}
119108

120109
/*!
@@ -182,11 +171,7 @@ PackedFunc GraphRuntimeDebug::GetFunction(
182171
const std::string& name,
183172
const std::shared_ptr<ModuleNode>& sptr_to_self) {
184173
// return member functions during query.
185-
if (name == "debug_run") {
186-
return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
187-
*rv = this->DebugRun(static_cast<size_t>(args[0].operator int64_t()));
188-
});
189-
} else if (name == "get_output_by_layer") {
174+
if (name == "get_output_by_layer") {
190175
return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
191176
*rv = this->GetOutputByLayer(args[0], args[1]);
192177
});
@@ -206,7 +191,7 @@ PackedFunc GraphRuntimeDebug::GetFunction(
206191
CHECK_GT(number, 0);
207192
CHECK_GT(repeat, 0);
208193
CHECK_GE(min_repeat_ms, 0);
209-
this->RunIndividual(number, repeat, min_repeat_ms);
194+
*rv = this->RunIndividual(number, repeat, min_repeat_ms);
210195
});
211196
} else {
212197
return GraphRuntime::GetFunction(name, sptr_to_self);

tests/python/unittest/test_runtime_graph_debug.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,6 @@ def check_verify():
100100
out = mod.get_output(0, tvm.nd.empty((n,)))
101101
np.testing.assert_equal(out.asnumpy(), a + 1)
102102

103-
#test individual run
104-
mod.run_individual(20, 2, 1)
105-
106103
mod.exit()
107104
#verify dump root delete after cleanup
108105
assert(not os.path.exists(directory))
@@ -129,7 +126,6 @@ def check_remote():
129126
mod.run(x=tvm.nd.array(a, ctx))
130127
out = tvm.nd.empty((n,), ctx=ctx)
131128
out = mod.get_output(0, out)
132-
mod.run_individual(20, 2, 1)
133129
np.testing.assert_equal(out.asnumpy(), a + 1)
134130

135131
check_verify()

0 commit comments

Comments
 (0)