@@ -222,13 +222,19 @@ def _run_per_layer(self):
222222 output_tensors .append (self ._get_node_output (i , j ))
223223 self .debug_datum .update_output_tensors (output_tensors )
224224
225- def _run_debug (self ):
225+ def _run_debug (self , number , repeat , min_repeat_ms , cooldown_interval_ms , repeats_to_cooldown ):
226226 """Execute the node specified with index will be executed.
227227 Each debug output will be copied to the buffer
228228 Time consumed for each execution will be set as debug output.
229229 """
230230 # Get timing.
231- self .debug_datum ._time_list = [[float (t )] for t in self .run_individual (10 , 1 , 1 )]
231+ self .debug_datum ._time_list = self .run_individual (
232+ number = number ,
233+ repeat = repeat ,
234+ min_repeat_ms = min_repeat_ms ,
235+ cooldown_interval_ms = cooldown_interval_ms ,
236+ repeats_to_cooldown = repeats_to_cooldown ,
237+ )
232238
233239 # Get outputs.
234240 self ._run_per_layer ()
@@ -259,31 +265,123 @@ def debug_get_output(self, node, out=None):
259265
260266 self ._debug_get_output (node_index , out )
261267
262- def run (self , ** input_dict ):
268+ # pylint: disable=arguments-differ
269+ def run (
270+ self ,
271+ number = 10 ,
272+ repeat = 1 ,
273+ min_repeat_ms = 1 ,
274+ cooldown_interval_ms = 0 ,
275+ repeats_to_cooldown = 1 ,
276+ ** input_dict ,
277+ ):
263278 """Run forward execution of the graph with debug
264279
265280 Parameters
266281 ----------
282+ number: int, optional
283+ The number of times to run this function for taking average.
284+ We call these runs as one `repeat` of measurement.
285+
286+ repeat: int, optional
287+ The number of times to repeat the measurement.
288+ In total, the function will be invoked (1 + number x repeat) times,
289+ where the first one is warm up and will be discarded.
290+ The returned result contains `repeat` costs,
291+ each of which is an average of `number` costs.
292+
293+ min_repeat_ms: int, optional
294+ The minimum duration of one `repeat` in milliseconds.
295+ By default, one `repeat` contains `number` runs. If this parameter is set,
296+ the parameters `number` will be dynamically adjusted to meet the
297+ minimum duration requirement of one `repeat`.
298+ i.e., When the run time of one `repeat` falls below this time, the `number` parameter
299+ will be automatically increased.
300+
301+ cooldown_interval_ms: int, optional
302+ The cooldown interval in milliseconds between the number of repeats defined by
303+ `repeats_to_cooldown`.
304+
305+ repeats_to_cooldown: int, optional
306+ The number of repeats before the cooldown is activated.
307+
267308 input_dict : dict of str to NDArray
268309 List of input values to be feed to
269310 """
270311 if input_dict :
271312 self .set_input (** input_dict )
272313
273314 # Step 1. Execute the graph
274- self ._run_debug ()
315+ self ._run_debug (
316+ number = number ,
317+ repeat = repeat ,
318+ min_repeat_ms = min_repeat_ms ,
319+ cooldown_interval_ms = cooldown_interval_ms ,
320+ repeats_to_cooldown = repeats_to_cooldown ,
321+ )
275322 # Step 2. Dump the output tensors to the dump folder
276323 self .debug_datum .dump_output_tensor ()
277324 # Step 3. Dump the Chrome trace to the dump folder
278325 self .debug_datum .dump_chrome_trace ()
279326 # Step 4. Display the collected information
280327 self .debug_datum .display_debug_result ()
281328
282- def run_individual (self , number , repeat = 1 , min_repeat_ms = 0 ):
283- ret = self ._run_individual (number , repeat , min_repeat_ms )
284- return ret .strip ("," ).split ("," ) if ret else []
329+ def run_individual (
330+ self , number , repeat = 1 , min_repeat_ms = 0 , cooldown_interval_ms = 0 , repeats_to_cooldown = 1
331+ ):
332+ """Run each operation in the graph and get the time per op for all ops.
333+
334+ number: int
335+ The number of times to run this function for taking average.
336+ We call these runs as one `repeat` of measurement.
285337
286- def run_individual_node (self , index , number = 10 , repeat = 1 , min_repeat_ms = 0 ):
338+ repeat: int, optional
339+ The number of times to repeat the measurement.
340+ In total, the function will be invoked (1 + number x repeat) times,
341+ where the first one is warm up and will be discarded.
342+ The returned result contains `repeat` costs,
343+ each of which is an average of `number` costs.
344+
345+ min_repeat_ms: int, optional
346+ The minimum duration of one `repeat` in milliseconds.
347+ By default, one `repeat` contains `number` runs. If this parameter is set,
348+ the parameters `number` will be dynamically adjusted to meet the
349+ minimum duration requirement of one `repeat`.
350+ i.e., When the run time of one `repeat` falls below this time, the `number` parameter
351+ will be automatically increased.
352+
353+ cooldown_interval_ms: int, optional
354+ The cooldown interval in milliseconds between the number of repeats defined by
355+ `repeats_to_cooldown`.
356+
357+ repeats_to_cooldown: int, optional
358+ The number of repeats before the cooldown is activated.
359+
360+ Returns
361+ -------
362+ A 2-dimensional array where the dimensions are: the index of the operation and
363+ the repeat of the measurement.
364+ """
365+ ret = self ._run_individual (
366+ number , repeat , min_repeat_ms , cooldown_interval_ms , repeats_to_cooldown
367+ )
368+ results = []
369+ for node_data in ret .strip (";" ).split (";" ):
370+ results .append ([])
371+ for repeat_data in node_data .strip ("," ).split ("," ):
372+ if repeat_data :
373+ results [- 1 ].append (float (repeat_data ))
374+ return results
375+
376+ def run_individual_node (
377+ self ,
378+ index ,
379+ number = 10 ,
380+ repeat = 1 ,
381+ min_repeat_ms = 0 ,
382+ cooldown_interval_ms = 0 ,
383+ repeats_to_cooldown = 1 ,
384+ ):
287385 """Benchmark a single node in the serialized graph.
288386
289387 This does not do any data transfers and uses arrays already on the device.
@@ -304,27 +402,34 @@ def run_individual_node(self, index, number=10, repeat=1, min_repeat_ms=0):
304402 The returned result contains `repeat` costs,
305403 each of which is an average of `number` costs.
306404
307- min_repeat_ms: int, optional
405+ min_repeat_ms : int, optional
308406 The minimum duration of one `repeat` in milliseconds.
309407 By default, one `repeat` contains `number` runs. If this parameter is set,
310408 the parameters `number` will be dynamically adjusted to meet the
311409 minimum duration requirement of one `repeat`.
312410 i.e., When the run time of one `repeat` falls below this time, the `number` parameter
313411 will be automatically increased.
314412
413+ cooldown_interval_ms: int, optional
414+ The cooldown interval in milliseconds between the number of repeats defined by
415+ `repeats_to_cooldown`.
416+
417+ repeats_to_cooldown: int, optional
418+ The number of repeats before the cooldown is activated.
419+
315420 Returns
316421 -------
317422 A module BenchmarkResult
318423 """
319424 # Results are returned as serialized strings which we deserialize
320- ret = self ._run_individual_node (index , number , repeat , min_repeat_ms )
321- answer = []
322- for value in ret . split ( "," ):
323- if value . strip () == "" :
324- continue
325- answer . append ( float ( value ))
326-
327- return BenchmarkResult (answer )
425+ ret = self ._run_individual_node (
426+ index , number , repeat , min_repeat_ms , cooldown_interval_ms , repeats_to_cooldown
427+ )
428+ results = []
429+ for repeat_data in ret . replace ( " " , "" ). strip ( "," ). split ( "," ):
430+ if repeat_data :
431+ results . append ( float ( repeat_data ))
432+ return BenchmarkResult (results )
328433
329434 def profile (self , collectors = None , ** input_dict ):
330435 """Run forward execution of the graph and collect overall and per-op
0 commit comments