@@ -222,13 +222,18 @@ def _run_per_layer(self):
222222 output_tensors .append (self ._get_node_output (i , j ))
223223 self .debug_datum .update_output_tensors (output_tensors )
224224
225- def _run_debug (self ):
225+ def _run_debug (self , number , repeat , min_repeat_ms , cooldown_interval_ms ):
226226 """Execute the node specified with index will be executed.
227227 Each debug output will be copied to the buffer
228228 Time consumed for each execution will be set as debug output.
229229 """
230230 # Get timing.
231- self .debug_datum ._time_list = [[float (t )] for t in self .run_individual (10 , 1 , 1 )]
231+ self .debug_datum ._time_list = self .run_individual (
232+ number = number ,
233+ repeat = repeat ,
234+ min_repeat_ms = min_repeat_ms ,
235+ cooldown_interval_ms = cooldown_interval_ms ,
236+ )
232237
233238 # Get outputs.
234239 self ._run_per_layer ()
@@ -259,31 +264,98 @@ def debug_get_output(self, node, out=None):
259264
260265 self ._debug_get_output (node_index , out )
261266
262- def run (self , ** input_dict ):
267+ # pylint: disable=arguments-differ
268+ def run (self , number = 10 , repeat = 1 , min_repeat_ms = 1 , cooldown_interval_ms = 0 , ** input_dict ):
263269 """Run forward execution of the graph with debug
264270
265271 Parameters
266272 ----------
273+ number: int, optional
274+ The number of times to run this function for taking average.
275+ We call these runs as one `repeat` of measurement.
276+
277+ repeat: int, optional
278+ The number of times to repeat the measurement.
279+ In total, the function will be invoked (1 + number x repeat) times,
280+ where the first one is warm up and will be discarded.
281+ The returned result contains `repeat` costs,
282+ each of which is an average of `number` costs.
283+
284+ min_repeat_ms: int, optional
285+ The minimum duration of one `repeat` in milliseconds.
286+ By default, one `repeat` contains `number` runs. If this parameter is set,
287+ the parameters `number` will be dynamically adjusted to meet the
288+ minimum duration requirement of one `repeat`.
289+ i.e., When the run time of one `repeat` falls below this time, the `number` parameter
290+ will be automatically increased.
291+
292+ cooldown_interval_ms: int, optional
293+ The cool down interval between two measurements in milliseconds.
294+
267295 input_dict : dict of str to NDArray
268296 List of input values to be feed to
269297 """
270298 if input_dict :
271299 self .set_input (** input_dict )
272300
273301 # Step 1. Execute the graph
274- self ._run_debug ()
302+ self ._run_debug (
303+ number = number ,
304+ repeat = repeat ,
305+ min_repeat_ms = min_repeat_ms ,
306+ cooldown_interval_ms = cooldown_interval_ms ,
307+ )
275308 # Step 2. Dump the output tensors to the dump folder
276309 self .debug_datum .dump_output_tensor ()
277310 # Step 3. Dump the Chrome trace to the dump folder
278311 self .debug_datum .dump_chrome_trace ()
279312 # Step 4. Display the collected information
280313 self .debug_datum .display_debug_result ()
281314
282- def run_individual (self , number , repeat = 1 , min_repeat_ms = 0 ):
283- ret = self ._run_individual (number , repeat , min_repeat_ms )
284- return ret .strip ("," ).split ("," ) if ret else []
315+ def run_individual (self , number , repeat = 1 , min_repeat_ms = 0 , cooldown_interval_ms = 0 ):
316+ """Run each operation in the graph and get the time per op for all ops.
317+
318+ number: int
319+ The number of times to run this function for taking average.
320+ We call these runs as one `repeat` of measurement.
321+
322+ repeat: int, optional
323+ The number of times to repeat the measurement.
324+ In total, the function will be invoked (1 + number x repeat) times,
325+ where the first one is warm up and will be discarded.
326+ The returned result contains `repeat` costs,
327+ each of which is an average of `number` costs.
285328
286- def run_individual_node (self , index , number = 10 , repeat = 1 , min_repeat_ms = 0 ):
329+ min_repeat_ms: int, optional
330+ The minimum duration of one `repeat` in milliseconds.
331+ By default, one `repeat` contains `number` runs. If this parameter is set,
332+ the parameters `number` will be dynamically adjusted to meet the
333+ minimum duration requirement of one `repeat`.
334+ i.e., When the run time of one `repeat` falls below this time, the `number` parameter
335+ will be automatically increased.
336+
337+ cooldown_interval_ms: int, optional
338+ The cool down interval between two measurements in milliseconds.
339+
340+ Returns
341+ -------
342+ A 3-dimensional array where the dimensions are: the index of the operation,
343+ the repeat and the number of the measurement.
344+ """
345+ ret = self ._run_individual (number , repeat , min_repeat_ms , cooldown_interval_ms )
346+ measurements = []
347+ for node_data in ret .strip (":" ).split (":" ):
348+ measurements .append ([])
349+ for repeat_data in node_data .strip (";" ).split (";" ):
350+ measurements [- 1 ].append ([])
351+ for number_data in repeat_data .strip ("," ).split ("," ):
352+ if number_data :
353+ measurements [- 1 ][- 1 ].append (float (number_data ))
354+ return measurements
355+
356+ def run_individual_node (
357+ self , index , number = 10 , repeat = 1 , min_repeat_ms = 0 , cooldown_interval_ms = 0
358+ ):
287359 """Benchmark a single node in the serialized graph.
288360
289361 This does not do any data transfers and uses arrays already on the device.
@@ -304,27 +376,30 @@ def run_individual_node(self, index, number=10, repeat=1, min_repeat_ms=0):
304376 The returned result contains `repeat` costs,
305377 each of which is an average of `number` costs.
306378
307- min_repeat_ms: int, optional
379+ min_repeat_ms : int, optional
308380 The minimum duration of one `repeat` in milliseconds.
309381 By default, one `repeat` contains `number` runs. If this parameter is set,
310382 the parameters `number` will be dynamically adjusted to meet the
311383 minimum duration requirement of one `repeat`.
312384 i.e., When the run time of one `repeat` falls below this time, the `number` parameter
313385 will be automatically increased.
314386
387+ cooldown_interval_ms : int, optional
388+ The cool down interval between two measurements in milliseconds.
389+
315390 Returns
316391 -------
317392 A module BenchmarkResult
318393 """
319394 # Results are returned as serialized strings which we deserialize
320- ret = self ._run_individual_node (index , number , repeat , min_repeat_ms )
321- answer = []
322- for value in ret .split ( ", " ):
323- if value . strip () == "" :
324- continue
325- answer . append ( float ( value ))
326-
327- return BenchmarkResult (answer )
395+ ret = self ._run_individual_node (index , number , repeat , min_repeat_ms , cooldown_interval_ms )
396+ measurements = []
397+ for repeat_data in ret .replace ( " " , "" ). strip ( ";" ). split ( "; " ):
398+ measurements . append ([])
399+ for number_data in repeat_data . strip ( "," ). split ( "," ):
400+ if number_data :
401+ measurements [ - 1 ]. append ( float ( number_data ))
402+ return BenchmarkResult (measurements )
328403
329404 def profile (self , collectors = None , ** input_dict ):
330405 """Run forward execution of the graph and collect overall and per-op
0 commit comments