mlcommons · arav-agarwal2 · Feb 2, 2026 · Feb 3, 2026 · Feb 9, 2026 · Feb 9, 2026
@@ -1,17 +1,10 @@
 import json
-import os
 import io
 
-
-def _make_r_io_base(f, mode: str):
-    if not isinstance(f, io.IOBase):
-        f = open(f, mode=mode)
-    return f
-
-
 def jload(f, mode="r"):
     """Load a .json file into a dictionary."""
-    f = _make_r_io_base(f, mode)
-    jdict = json.load(f)
-    f.close()
-    return jdict
+    if not isinstance(f, io.IOBase):
+        with open(f, mode=mode) as f:
+            return json.load(f)
+    else:
+        return json.load(f)
@@ -46,7 +46,8 @@ def load_prompts(dataset_path):
 
 
 class Model:
-    def __init__(self, model_path, device, config, prompts, fixed_latent=None, rank=0):
+    def __init__(self, model_path, device, config,
+                 prompts, fixed_latent=None, rank=0):
         self.device = device
         self.rank = rank
         self.height = config["height"]
@@ -106,7 +107,8 @@ def flush_queries(self):
 
 
 class DebugModel:
-    def __init__(self, model_path, device, config, prompts, fixed_latent=None, rank=0):
+    def __init__(self, model_path, device, config,
+                 prompts, fixed_latent=None, rank=0):
         self.prompts = prompts
 
     def issue_queries(self, query_samples):
@@ -186,7 +188,8 @@ def get_args():
     parser.add_argument(
         "--scenario",
         default="SingleStream",
-        help="mlperf benchmark scenario, one of " + str(list(SCENARIO_MAP.keys())),
+        help="mlperf benchmark scenario, one of " +
+        str(list(SCENARIO_MAP.keys())),
     )
     parser.add_argument(
         "--user_conf",
@@ -202,7 +205,10 @@ def get_args():
         help="performance sample count",
         default=5000,
     )
-    parser.add_argument("--accuracy", action="store_true", help="enable accuracy pass")
+    parser.add_argument(
+        "--accuracy",
+        action="store_true",
+        help="enable accuracy pass")
     # Dont overwrite these for official submission
     parser.add_argument("--count", type=int, help="dataset items to use")
     parser.add_argument("--time", type=int, help="time to scan in seconds")
@@ -271,7 +277,10 @@ def run_mlperf(args, config):
 
         audit_config = os.path.abspath(args.audit_conf)
         if os.path.exists(audit_config):
-            settings.FromConfig(audit_config, "wan-2.2-t2v-a14b", args.scenario)
+            settings.FromConfig(
+                audit_config,
+                "wan-2.2-t2v-a14b",
+                args.scenario)
         settings.scenario = SCENARIO_MAP[args.scenario]
 
         settings.mode = lg.TestMode.PerformanceOnly
@@ -297,8 +306,10 @@ def run_mlperf(args, config):
         if args.samples_per_query:
             settings.multi_stream_samples_per_query = args.samples_per_query
         if args.max_latency:
-            settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
-            settings.multi_stream_expected_latency_ns = int(args.max_latency * NANO_SEC)
+            settings.server_target_latency_ns = int(
+                args.max_latency * NANO_SEC)
+            settings.multi_stream_expected_latency_ns = int(
+                args.max_latency * NANO_SEC)
 
         performance_sample_count = (
             args.performance_sample_count
@@ -311,7 +322,8 @@ def run_mlperf(args, config):
             count, performance_sample_count, load_query_samples, unload_query_samples
         )
 
-        lg.StartTestWithLogSettings(sut, qsl, settings, log_settings, audit_config)
+        lg.StartTestWithLogSettings(
+            sut, qsl, settings, log_settings, audit_config)
 
         lg.DestroyQSL(qsl)
         lg.DestroySUT(sut)

@@ -101,11 +101,11 @@ def main():
         "singlestream": "SingleStream",
         "multistream": "MultiStream",
         "server": "Server",
-        "interactive":"Interactive",
+        "interactive": "Interactive",
         "offline": "Offline",
     }
-    df["Scenario"] = df["Scenario"].apply(lambda x: scenario_map.get(str(x).lower(), x))
-
+    df["Scenario"] = df["Scenario"].apply(
+        lambda x: scenario_map.get(str(x).lower(), x))
 
     output = args.input[:-4]
     writer = pd.ExcelWriter(output + ".xlsx", engine="xlsxwriter")

@@ -85,13 +85,15 @@ def missing_check(self):
             self.log.error("Performance log missing at %s", self.path)
             return False
         return True
-    
+
     def scenarios_check(self):
         if self.submission_logs.loader_data.get("check_scenarios", False):
             return True
         else:
-            missing_scenarios = self.submission_logs.loader_data.get("missing_scenarios", [])
-            unknown_scenarios = self.submission_logs.loader_data.get("unknown_scenarios", [])
+            missing_scenarios = self.submission_logs.loader_data.get(
+                "missing_scenarios", [])
+            unknown_scenarios = self.submission_logs.loader_data.get(
+                "unknown_scenarios", [])
             if len(missing_scenarios) > 0:
                 self.log.error(
                     "%s does not have all required scenarios, missing %s",
@@ -116,7 +118,8 @@ def loadgen_errors_check(self):
             bool: True if no blocking Loadgen errors are present,
                 False otherwise.
         """
-        compliance_skip = self.submission_logs.loader_data.get("compliance_skip", False)
+        compliance_skip = self.submission_logs.loader_data.get(
+            "compliance_skip", False)
         if self.mlperf_log.has_error():
             has_critical_errors = False
             if self.config.ignore_uncommited:
@@ -127,7 +130,7 @@ def loadgen_errors_check(self):
                     ):
                         has_critical_errors = True
                     if (
-                        not compliance_skip 
+                        not compliance_skip
                         and "Multiple conf files are used" in error["value"]
                     ):
                         has_critical_errors = True
@@ -454,7 +457,7 @@ def inferred_check(self):
                 ("singlestream", "offline")
             ]
             if (self.scenario.lower(), self.scenario_fixed.lower()
-                ) not in list_inferred:
+                    ) not in list_inferred:
                 self.log.error(
                     "Result for scenario %s can not be inferred from %s for: %s",
                     self.scenario_fixed,
@@ -543,12 +546,12 @@ def get_inferred_result(self, res):
             res = qps_wo_loadgen_overhead
 
         if (self.scenario_fixed in ["Offline"]
-            ) and self.scenario in ["MultiStream"]:
+                ) and self.scenario in ["MultiStream"]:
             inferred = True
             res = samples_per_query * S_TO_MS / (latency_mean / MS_TO_NS)
 
         if (self.scenario_fixed in ["MultiStream"]
-            ) and self.scenario in ["SingleStream"]:
+                ) and self.scenario in ["SingleStream"]:
             inferred = True
             # samples_per_query does not match with the one reported in the logs
             # when inferring MultiStream from SingleStream
@@ -565,6 +568,6 @@ def get_inferred_result(self, res):
             else:
                 res = (latency_99_percentile * samples_per_query) / MS_TO_NS
         if (self.scenario_fixed in ["Interactive"]
-            ) and self.scenario not in ["Server"]:
+                ) and self.scenario not in ["Server"]:
             is_valid = False
         return res, is_valid
@@ -107,17 +107,19 @@ def is_number(s):
         return True
     except ValueError:
         return False
-
+
+
 def lower_list(l):
     return [str(e).lower() for e in l]
 
+
 def contains_list(l1, l2):
     # Check if l1 contains all elements of l2
     missing = []
     for e in l2:
         if e not in l1:
             missing.append(e)
-    return missing, len(missing) == 0 
+    return missing, len(missing) == 0
 
 
 def get_performance_metric(
@@ -317,7 +319,7 @@ def get_power_metric(config, scenario_fixed, log_path, is_valid, res):
                 samples_per_query = 8
 
             if (scenario_fixed in ["MultiStream"]
-                ) and scenario in ["SingleStream"]:
+                    ) and scenario in ["SingleStream"]:
                 power_metric = (
                     avg_power * power_duration * samples_per_query * 1000 / num_queries
                 )