@@ -311,15 +311,15 @@ def test_write_performance_comparison_no_average_regression(self, comparator):
311311 output = StringIO ()
312312 regression_found = comparator ._write_performance_comparison (output , current_results , baseline_results )
313313
314- # Average change should be: (20 + (-2) + (-15)) / 3 = 1 %
314+ # Average change using geometric mean: ~0.0 %
315315 # This is less than 5% threshold, so no overall regression
316316 assert not regression_found
317317
318318 result = output .getvalue ()
319319 assert "SUMMARY" in result
320320 assert "Total benchmarks compared: 3" in result
321321 assert "Individual regressions (>5.0%): 1" in result # Only the +20% one
322- assert "Average time change: 1 .0%" in result
322+ assert "Average time change: -0 .0%" in result
323323 assert "✅ OVERALL OK" in result
324324
325325 def test_write_performance_comparison_with_average_regression (self , comparator ):
@@ -344,15 +344,15 @@ def test_write_performance_comparison_with_average_regression(self, comparator):
344344 output = StringIO ()
345345 regression_found = comparator ._write_performance_comparison (output , current_results , baseline_results )
346346
347- # Average change should be: (10 + 8 + (-1)) / 3 = 5.67 %
347+ # Average change using geometric mean: 5.6 %
348348 # This exceeds 5% threshold, so overall regression found
349349 assert regression_found
350350
351351 result = output .getvalue ()
352352 assert "SUMMARY" in result
353353 assert "Total benchmarks compared: 3" in result
354354 assert "Individual regressions (>5.0%): 2" in result # The +10% and +8% ones
355- assert "Average time change: 5.7 %" in result
355+ assert "Average time change: 5.6 %" in result
356356 assert "🚨 OVERALL REGRESSION" in result
357357
358358 def test_write_performance_comparison_with_average_improvement (self , comparator ):
@@ -377,15 +377,15 @@ def test_write_performance_comparison_with_average_improvement(self, comparator)
377377 output = StringIO ()
378378 regression_found = comparator ._write_performance_comparison (output , current_results , baseline_results )
379379
380- # Average change should be: (-10 + (-8) + 2) / 3 = -5.33 %
380+ # Average change using geometric mean: -5.5 %
381381 # This is significant improvement, so no regression found
382382 assert not regression_found
383383
384384 result = output .getvalue ()
385385 assert "SUMMARY" in result
386386 assert "Total benchmarks compared: 3" in result
387387 assert "Individual regressions (>5.0%): 0" in result
388- assert "Average time change: -5.3 %" in result
388+ assert "Average time change: -5.5 %" in result
389389 assert "🎉 OVERALL IMPROVEMENT" in result
390390
391391 def test_write_performance_comparison_missing_baseline (self , comparator ):
@@ -501,14 +501,14 @@ def test_realistic_mixed_performance_scenario(self, comparator):
501501 output = StringIO ()
502502 regression_found = comparator ._write_performance_comparison (output , current_results , baseline_results )
503503
504- # Average change: (3 + 7 + (-2) + (-12) + 4) / 5 = 0 %
504+ # Average change using geometric mean: -0.2 %
505505 # No overall regression should be detected
506506 assert not regression_found
507507
508508 result = output .getvalue ()
509509 assert "Total benchmarks compared: 5" in result
510510 assert "Individual regressions (>5.0%): 1" in result # Only the 7% one
511- assert "Average time change: 0.0 %" in result
511+ assert "Average time change: -0.2 %" in result
512512 assert "✅ OVERALL OK" in result
513513
514514 def test_gradual_performance_degradation_scenario (self , comparator ):
@@ -571,15 +571,15 @@ def test_noisy_benchmarks_scenario(self, comparator):
571571 output = StringIO ()
572572 regression_found = comparator ._write_performance_comparison (output , current_results , baseline_results )
573573
574- # Average change: (2 + (-4) + 3 + 40 + (-10)) / 5 = 6.2 %
575- # Despite the one big outlier, overall regression should be detected
576- assert regression_found
574+ # Average change using geometric mean: 4.9 %
575+ # Despite the one big outlier, no overall regression should be detected (4.9% < 5.0% threshold)
576+ assert not regression_found
577577
578578 result = output .getvalue ()
579579 assert "Total benchmarks compared: 5" in result
580580 assert "Individual regressions (>5.0%): 1" in result # Only the 40% outlier
581- assert "Average time change: 6.2 %" in result
582- assert "🚨 OVERALL REGRESSION " in result
581+ assert "Average time change: 4.9 %" in result
582+ assert "✅ OVERALL OK " in result
583583
584584
585585class TestEdgeCases :
@@ -652,6 +652,9 @@ def test_mixed_valid_invalid_baselines(self, comparator):
652652 output = StringIO ()
653653 regression_found = comparator ._write_performance_comparison (output , current_results , baseline_results )
654654
655+ # Should find regression due to the 10% change in the valid comparison
656+ assert regression_found
657+
655658 result = output .getvalue ()
656659 assert "Total benchmarks compared: 1" in result # Only one valid comparison
657660 assert "N/A (baseline mean is 0)" in result
0 commit comments