NVIDIA · shrikantpachpor · Apr 5, 2026 · jmartin-tech · Apr 6, 2026 · jmartin-tech
diff --git a/README.md b/README.md
@@ -115,6 +115,37 @@ See if the Hugging Face version of GPT2 is vulnerable to DAN 11.0
 python3 -m garak --target_type huggingface --target_name gpt2 --probes dan.Dan_11_0
 ```
 
+## Resumable Scans
+
+`garak` supports resumable scans that allow you to continue interrupted scans without starting from scratch. This is useful for:
+- Long-running scans that may be interrupted by network issues, rate limits, or system crashes
+- Saving API costs by avoiding redundant prompts
+- Enabling flexible scan scheduling (pause/resume)
+
+### Basic Usage
+
+```bash
+# Start a resumable scan
+python3 -m garak --target_type openai --target_name gpt-4 --probes all
+
+# List saved runs (newest first); copy the run_id you need
+python3 -m garak --list_runs
+
+# Resume with the same probe list and target as the original scan
+python3 -m garak --resume 550e8400-e29b-41d4-a716-446655440000
+
+# Delete old run state
+python3 -m garak --delete_run 550e8400-e29b-41d4-a716-446655440000
+```
+
+### Resume Notes
+Resume is probe-level: probes that are fully completed are skipped on resume.
+Each resume creates a new timestamped output report file; the original report is preserved.
+
+Resuming checks that your **probe list** and **generator target** (`--target_type` / `--target_name`) match the original run; if they differ, resume stops with an error so completed-probe bookkeeping stays meaningful.
+
+Saved run state is stored under the XDG data directory in a `runs/` subdirectory (for example: `<xdg_data_home>/garak/runs/<run_id>/state.json`).
+You can manage saved state with `--list_runs` and `--delete_run`.
 
 ## Reading the results
 

diff --git a/garak/_config.py b/garak/_config.py
@@ -66,6 +66,7 @@ class TransientConfig(GarakSubConfig):
     hitlogfile = None
     args = None  # only access this when determining what was passed on CLI
     run_id = None
+    resume_run_id = None
     package_dir = pathlib.Path(__file__).parents[0]
     config_dir = xdg_config_home() / project_dir_name
     data_dir = xdg_data_home() / project_dir_name
@@ -122,6 +123,7 @@ def _nested_dict():
 run.soft_probe_prompt_cap = 64
 run.target_lang = "en"
 run.langproviders = []
+# Note: resume configuration is provided only via transient.resume_run_id
 
 # placeholder
 # generator, probe, detector, buff = {}, {}, {}, {}

diff --git a/garak/analyze/bootstrap_ci.py b/garak/analyze/bootstrap_ci.py
@@ -45,15 +45,15 @@ def _bootstrap_calculation(
 
     n = len(results)
     corrected_asrs = np.empty(num_iterations)
-    
+
     # No correction needed when denominator ≈ 1.0
     # This occurs when: (1) perfect detector (Se=Sp=1.0), or (2) fallback triggered above (Se+Sp-1 < 0.01)
     is_perfect_detector = np.isclose(denominator, 1.0)
-    
+
     for i in range(num_iterations):
         resampled_results = np.random.choice(results, size=n, replace=True)
         p_obs = resampled_results.mean()
-        
+
         if is_perfect_detector:
             corrected_asrs[i] = p_obs
         else:

diff --git a/garak/analyze/ci_calculator.py b/garak/analyze/ci_calculator.py
@@ -53,10 +53,7 @@ def _extract_reporting_config_from_setup(report_path: str) -> dict:
         entry = json.loads(first_line)
         if entry.get("entry_type") != "start_run setup":
             return {}
-        return {
-            k: v for k, v in entry.items()
-            if k.startswith("reporting.")
-        }
+        return {k: v for k, v in entry.items() if k.startswith("reporting.")}
 
 
 def _reconstruct_binary_from_aggregates(passed: int, failed: int) -> List[int]:
@@ -68,109 +65,109 @@ def calculate_ci_from_report(
     report_path: str,
     probe_detector_pairs: Optional[List[Tuple[str, str]]] = None,
     num_iterations: Optional[int] = None,
-    confidence_level: Optional[float] = None
+    confidence_level: Optional[float] = None,
 ) -> Dict[Tuple[str, str], Tuple[float, float]]:
     """Calculate bootstrap CIs for probe/detector pairs using report digest aggregates"""
     report_file = Path(report_path)
-    
+
     if not report_file.exists():
         raise FileNotFoundError(
             f"Report file not found at: {report_file}. "
             f"Expected to find garak report JSONL file."
         )
-    
+
     # Pull defaults from config
     if num_iterations is None:
         num_iterations = _config.reporting.bootstrap_num_iterations
     if confidence_level is None:
         confidence_level = _config.reporting.bootstrap_confidence_level
-    
+
     # Read digest entry from report
     digest = _get_report_digest(str(report_file))
-    
+
     if digest is None:
         raise ValueError(
             f"Report {report_file} missing 'digest' entry. "
             f"Digest is required for CI calculation from aggregates. "
             f"Ensure report was generated with garak v0.11.0 or later."
         )
-    
+
     eval_data = digest.get("eval", {})
     if not eval_data:
         logging.warning("No evaluation data found in digest for %s", report_file)
         return {}
-    
+
     # Load detector metrics for Se/Sp correction
     detector_metrics = get_detector_metrics()
     min_sample_size = _config.reporting.bootstrap_min_sample_size
-    
+
     ci_results = {}
-    
+
     # Iterate through digest structure: probe_group -> probe_class -> detector
     for probe_group in eval_data:
         for probe_key in eval_data[probe_group]:
             if probe_key == "_summary":
                 continue
-            
+
             # Parse probe module and class from key (format: "module.class")
             if "." not in probe_key:
                 continue
-            
+
             probe_name = probe_key
-            
+
             for detector_key in eval_data[probe_group][probe_key]:
                 if detector_key == "_summary":
                     continue
-                
+
                 detector_name = detector_key
-                
+
                 # Skip if not in requested pairs (if specified)
                 if probe_detector_pairs is not None:
                     if (probe_name, detector_name) not in probe_detector_pairs:
                         continue
-                
+
                 detector_result = eval_data[probe_group][probe_key][detector_key]
-                
+
                 # Extract aggregates
                 total = detector_result.get("total_evaluated", 0)
                 passed = detector_result.get("passed", 0)
-                
+
                 if total == 0:
                     logging.warning(
                         "No evaluated samples for probe=%s, detector=%s",
                         probe_name,
-                        detector_name
+                        detector_name,
                     )
                     continue
-                
+
                 # Check minimum sample size
                 if total < min_sample_size:
                     logging.warning(
                         "Insufficient samples for CI calculation: probe=%s, detector=%s, n=%d (minimum: %d)",
                         probe_name,
                         detector_name,
                         total,
-                        min_sample_size
+                        min_sample_size,
                     )
                     continue
-                
+
                 # Reconstruct binary data from aggregates
                 # Order irrelevant: bootstrap resamples randomly with replacement
                 failed = total - passed
                 binary_results = _reconstruct_binary_from_aggregates(passed, failed)
-                
+
                 # Get detector Se/Sp for correction
                 se, sp = detector_metrics.get_detector_se_sp(detector_key)
-                
+
                 # Calculate bootstrap CI
                 ci_result = calculate_bootstrap_ci(
                     results=binary_results,
                     sensitivity=se,
                     specificity=sp,
                     num_iterations=num_iterations,
-                    confidence_level=confidence_level
+                    confidence_level=confidence_level,
                 )
-                
+
                 if ci_result is not None:
                     ci_results[(probe_name, detector_name)] = ci_result
                     logging.debug(
@@ -179,9 +176,9 @@ def calculate_ci_from_report(
                         detector_name,
                         ci_result[0],
                         ci_result[1],
-                        total
+                        total,
                     )
-    
+
     return ci_results
 
 
@@ -190,86 +187,87 @@ def update_eval_entries_with_ci(
     ci_results: Dict[Tuple[str, str], Tuple[float, float]],
     output_path: Optional[str] = None,
     confidence_method: Optional[str] = None,
-    confidence_level: Optional[float] = None
+    confidence_level: Optional[float] = None,
 ) -> None:
     """Update eval entries in report JSONL with new CI values, overwrites if output_path is None"""
     if confidence_method is None:
         confidence_method = _config.reporting.confidence_interval_method
     if confidence_level is None:
         confidence_level = _config.reporting.bootstrap_confidence_level
     report_file = Path(report_path)
-    
+
     if not report_file.exists():
         raise FileNotFoundError(
-            f"Report file not found at: {report_file}. "
-            f"Cannot update eval entries."
+            f"Report file not found at: {report_file}. " f"Cannot update eval entries."
         )
-    
+
     # Use pathlib.Path for output handling
     if output_path is None:
         output_file = report_file.with_suffix(".tmp")
         overwrite = True
     else:
         output_file = Path(output_path)
         overwrite = False
-    
+
     try:
-        with open(report_file, "r", encoding="utf-8") as infile, \
-             open(output_file, "w", encoding="utf-8") as outfile:
-
+        with (
+            open(report_file, "r", encoding="utf-8") as infile,
+            open(output_file, "w", encoding="utf-8") as outfile,
+        ):
+
             for line_num, line in enumerate(infile, 1):
                 try:
                     entry = json.loads(line.strip())
                 except json.JSONDecodeError as e:
                     raise json.JSONDecodeError(
                         f"Malformed JSON at line {line_num} in {report_file}: {e.msg}",
                         e.doc,
-                        e.pos
+                        e.pos,
                     ) from e
-                
+
                 if entry.get("entry_type") == "digest":
                     logging.debug("Stripping stale digest entry (will be recalculated)")
                     continue
 
                 if entry.get("entry_type") == "start_run setup":
                     for param in _config.reporting_params:
-                        entry[f"reporting.{param}"] = getattr(
-                            _config.reporting, param
-                        )
+                        entry[f"reporting.{param}"] = getattr(_config.reporting, param)
 
                 if entry.get("entry_type") == "eval":
                     probe = entry.get("probe")
                     detector = entry.get("detector")
-                    
+
                     if probe is None or detector is None:
                         outfile.write(json.dumps(entry, ensure_ascii=False) + "\n")
                         continue
-                    
+
                     key = (probe, detector)
-                    
+
                     if key in ci_results:
                         ci_lower, ci_upper = ci_results[key]
                         entry["confidence_method"] = confidence_method
                         entry["confidence"] = str(confidence_level)
-                        entry["confidence_lower"] = ci_lower / 100.0  # Store as 0-1 scale
+                        entry["confidence_lower"] = (
+                            ci_lower / 100.0
+                        )  # Store as 0-1 scale
                         entry["confidence_upper"] = ci_upper / 100.0
-                        
+
                         logging.debug(
                             "Updated CI for %s / %s: [%.2f, %.2f]",
                             probe,
                             detector,
                             ci_lower,
-                            ci_upper
+                            ci_upper,
                         )
-                
+
                 outfile.write(json.dumps(entry, ensure_ascii=False) + "\n")
-        
+
         if overwrite:
             output_file.replace(report_file)
             logging.info("Updated report file: %s", report_file)
         else:
             logging.info("Wrote updated report to: %s", output_file)
-    
+
     except OSError as e:
         if overwrite and output_file.exists():
             output_file.unlink()

diff --git a/garak/analyze/rebuild_cis.py b/garak/analyze/rebuild_cis.py
@@ -111,14 +111,18 @@ def rebuild_cis_for_report(
         existing_method = existing.get("confidence_method", "unknown")
         existing_level = existing.get("confidence_level")
         if existing_method != ci_method:
-            print(f"📊 Report used '{existing_method}' method. Rebuilding with '{ci_method}'.")
+            print(
+                f"📊 Report used '{existing_method}' method. Rebuilding with '{ci_method}'."
+            )
         if existing_level is not None and abs(existing_level - active_level) > 1e-9:
             print(
                 f"📊 Report has existing CIs at {existing_level * 100:.1f}% confidence. "
                 f"Rebuilding with {active_level * 100:.1f}% confidence."
             )
         else:
-            print(f"📊 Rebuilding CIs at {active_level * 100:.1f}% confidence for {report_file}")
+            print(
+                f"📊 Rebuilding CIs at {active_level * 100:.1f}% confidence for {report_file}"
+            )
     else:
         print(
             f"📊 No existing CIs found in report. "