From 2f43c667762198041cdad301e7af55beee3d692d Mon Sep 17 00:00:00 2001 From: Bas van Dijk Date: Fri, 30 Jan 2026 17:59:04 +0000 Subject: [PATCH 1/4] chore: //ci/githubstats:query add 'impact' column --- ci/githubstats/query.py | 17 +++++---- ci/githubstats/top.sql | 83 +++++++++++++++++++++++------------------ 2 files changed, 56 insertions(+), 44 deletions(-) diff --git a/ci/githubstats/query.py b/ci/githubstats/query.py index 914f0e519644..69f04b448503 100755 --- a/ci/githubstats/query.py +++ b/ci/githubstats/query.py @@ -146,6 +146,7 @@ def top(args): headers = [desc[0] for desc in cursor.description] df = pd.DataFrame(cursor, columns=headers) + df["impact"] = df["impact"].apply(normalize_duration) df["duration_p90"] = df["duration_p90"].apply(normalize_duration) # Find the CODEOWNERS for each test target: @@ -171,13 +172,14 @@ def top(args): "left", # label "decimal", # total "decimal", # non_success - "decimal", # non_success% "decimal", # flaky - "decimal", # flaky% "decimal", # timeout - "decimal", # timeout% "decimal", # fail - "decimal", # fail% + "decimal", # non_success% + "decimal", # flaky% + "decimal", # timeout% + "decimal", # fail% + "right", # impact "right", # duration_p90 "left", # owners ] @@ -317,13 +319,14 @@ def main(): choices=[ "total", "non_success", - "non_success%", "flaky", - "flaky%", "timeout", - "timeout%", "fail", + "non_success%", + "flaky%", + "timeout%", "fail%", + "impact", "duration_p90", ], help="COLUMN to order by and have the condition flags like --gt, --ge, etc. apply to", diff --git a/ci/githubstats/top.sql b/ci/githubstats/top.sql index 920c496fb49e..fb3023ed1290 100644 --- a/ci/githubstats/top.sql +++ b/ci/githubstats/top.sql @@ -1,38 +1,47 @@ -WITH "top" AS ( - SELECT - label, - - COUNT(*) AS "total", - - SUM(CASE WHEN overall_status <> 1 THEN 1 ELSE 0 END) AS "non_success", - ROUND((SUM(CASE WHEN overall_status <> 1 THEN 1 ELSE 0 END) * 100.0) / COUNT(*), 1) AS "non_success%", - - SUM(CASE WHEN overall_status = 2 THEN 1 ELSE 0 END) AS "flaky", - ROUND((SUM(CASE WHEN overall_status = 2 THEN 1 ELSE 0 END) * 100.0) / COUNT(*), 1) AS "flaky%", - - SUM(CASE WHEN overall_status = 3 THEN 1 ELSE 0 END) AS "timeout", - ROUND((SUM(CASE WHEN overall_status = 3 THEN 1 ELSE 0 END) * 100.0) / COUNT(*), 1) AS "timeout%", - - SUM(CASE WHEN overall_status = 4 THEN 1 ELSE 0 END) AS "fail", - ROUND((SUM(CASE WHEN overall_status = 4 THEN 1 ELSE 0 END) * 100.0) / COUNT(*), 1) AS "fail%", - - percentile_disc(0.9) WITHIN GROUP (ORDER BY total_run_duration) * INTERVAL '1 second' AS "duration_p90" - - FROM - workflow_runs AS wr JOIN - bazel_invocations AS bi ON wr.id = bi.run_id JOIN - bazel_tests AS bt ON bi.build_id = bt.build_id - - WHERE - ({hide} = '' OR bt.label NOT LIKE {hide}) - AND ('{period}' = '' OR bt.first_start_time > now() - ('1 {period}'::interval)) - AND (NOT {only_prs} OR wr.event_type = 'pull_request') - AND ({branch} = '' OR wr.head_branch LIKE {branch}) - - GROUP BY label - - ORDER BY {order_by} DESC - - LIMIT {N} -) +WITH + "core" AS ( + SELECT + label, + COUNT(*) AS "total", + SUM(CASE WHEN overall_status <> 1 THEN 1 ELSE 0 END) AS "non_success", + SUM(CASE WHEN overall_status = 2 THEN 1 ELSE 0 END) AS "flaky", + SUM(CASE WHEN overall_status = 3 THEN 1 ELSE 0 END) AS "timeout", + SUM(CASE WHEN overall_status = 4 THEN 1 ELSE 0 END) AS "fail", + percentile_disc(0.9) WITHIN GROUP (ORDER BY total_run_duration) * INTERVAL '1 second' AS "duration_p90" + + FROM + workflow_runs AS wr JOIN + bazel_invocations AS bi ON wr.id = bi.run_id JOIN + bazel_tests AS bt ON bi.build_id = bt.build_id + + WHERE + ({hide} = '' OR bt.label NOT LIKE {hide}) + AND ('{period}' = '' OR bt.first_start_time > now() - ('1 {period}'::interval)) + AND (NOT {only_prs} OR wr.event_type = 'pull_request') + AND ({branch} = '' OR wr.head_branch LIKE {branch}) + + GROUP BY label + ), + "top" AS ( + SELECT + label, + "total", + "non_success", + "flaky", + "timeout", + "fail", + ROUND(("non_success" * 100.0) / "total", 1) AS "non_success%", + ROUND(("flaky" * 100.0) / "total", 1) AS "flaky%", + ROUND(("timeout" * 100.0) / "total", 1) AS "timeout%", + ROUND(("fail" * 100.0) / "total", 1) AS "fail%", + "non_success" * "duration_p90" AS "impact", + "duration_p90" + + FROM + "core" + + ORDER BY {order_by} DESC + + LIMIT {N} + ) SELECT * FROM "top" WHERE {condition} From db1cf0d82b70fc2ccd996ddba4779387f8b4512a Mon Sep 17 00:00:00 2001 From: Bas van Dijk Date: Fri, 30 Jan 2026 18:48:32 +0000 Subject: [PATCH 2/4] include docs --- ci/githubstats/query.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/ci/githubstats/query.py b/ci/githubstats/query.py index 69f04b448503..5020a582f20c 100755 --- a/ci/githubstats/query.py +++ b/ci/githubstats/query.py @@ -266,6 +266,11 @@ def direct_url_to_buildbuddy(url): print(tabulate(df[columns], headers="keys", tablefmt=args.tablefmt, colalign=colalignments)) +# argparse formatter to allow newlines in --help. +class RawDefaultsFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawTextHelpFormatter): + pass + + def main(): parser = argparse.ArgumentParser(prog="bazel run //ci/githubstats:query --") @@ -307,7 +312,7 @@ def main(): "top", parents=[common_parser, filter_parser], help="Get the top non-successful / flaky / failed / timed-out tests in the last period", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, + formatter_class=RawDefaultsFormatter, ) top_parser.add_argument( "N", type=int, nargs="?", default=10, help="If specified, limits the number of tests to show" @@ -329,7 +334,19 @@ def main(): "impact", "duration_p90", ], - help="COLUMN to order by and have the condition flags like --gt, --ge, etc. apply to", + help="""COLUMN to order by and have the condition flags like --gt, --ge, etc. apply to. + +total:\t\tTotal runs in the specified period +non_success:\tNumber of non-successful runs in the specified period +flaky:\t\tNumber of flaky runs in the specified period +timeout:\tNumber of timed-out runs in the specified period +fail:\t\tNumber of failed runs in the specified period +non_success%%:\tPercentage of non-successful runs in the specified period +flaky%%:\t\tPercentage of flaky runs in the specified period +timeout%%:\tPercentage of timed-out runs in the specified period +fail%%:\t\tPercentage of failed runs in the specified period +impact:\t\tnon_success * duration_p90. A rough estimate on the impact of failures +duration_p90:\t90th percentile duration of all runs in the specified period""", ) condition_group = top_parser.add_mutually_exclusive_group() From 7893282ab23325c8747538d906a2876b8594c9e4 Mon Sep 17 00:00:00 2001 From: Bas van Dijk Date: Fri, 30 Jan 2026 18:59:56 +0000 Subject: [PATCH 3/4] add examples in --help --- ci/githubstats/query.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/ci/githubstats/query.py b/ci/githubstats/query.py index 5020a582f20c..eec30831265a 100755 --- a/ci/githubstats/query.py +++ b/ci/githubstats/query.py @@ -311,8 +311,19 @@ def main(): top_parser = subparsers.add_parser( "top", parents=[common_parser, filter_parser], - help="Get the top non-successful / flaky / failed / timed-out tests in the last period", formatter_class=RawDefaultsFormatter, + help="Get the top non-successful / flaky / failed / timed-out tests in the last period", + epilog=""" +Examples: + # Show the top 10 most flaky tests in the last week + bazel run //ci/githubstats:query -- top 10 flaky% --week + + # Show the top 5 tests on PRs where failures had the highest impact in the last week + bazel run //ci/githubstats:query -- top 5 impact --prs --week + + # Show the 100 slowest tests in the last month + bazel run //ci/githubstats:query -- top 100 duration_p90 --month +""" ) top_parser.add_argument( "N", type=int, nargs="?", default=10, help="If specified, limits the number of tests to show" @@ -369,8 +380,13 @@ def main(): last_runs_parser = subparsers.add_parser( "last", parents=[common_parser, filter_parser], + formatter_class=RawDefaultsFormatter, help="Get the last runs of the specified test in the given period", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, + epilog=""" +Examples: + # Show the last flaky runs of the rent_subnet_test in the last week + bazel run //ci/githubstats:query -- last --flaky //rs/tests/nns:rent_subnet_test --week +""" ) last_runs_parser.add_argument("--success", action="store_true", help="Include successful runs") last_runs_parser.add_argument("--flaky", action="store_true", help="Include flaky runs") From 211702daf9f25a52f58451d0d95e42dc1a257304 Mon Sep 17 00:00:00 2001 From: Bas van Dijk Date: Fri, 30 Jan 2026 19:01:55 +0000 Subject: [PATCH 4/4] bazel run :ruff-format --- ci/githubstats/query.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/githubstats/query.py b/ci/githubstats/query.py index eec30831265a..45cf830dcec6 100755 --- a/ci/githubstats/query.py +++ b/ci/githubstats/query.py @@ -323,7 +323,7 @@ def main(): # Show the 100 slowest tests in the last month bazel run //ci/githubstats:query -- top 100 duration_p90 --month -""" +""", ) top_parser.add_argument( "N", type=int, nargs="?", default=10, help="If specified, limits the number of tests to show" @@ -386,7 +386,7 @@ def main(): Examples: # Show the last flaky runs of the rent_subnet_test in the last week bazel run //ci/githubstats:query -- last --flaky //rs/tests/nns:rent_subnet_test --week -""" +""", ) last_runs_parser.add_argument("--success", action="store_true", help="Include successful runs") last_runs_parser.add_argument("--flaky", action="store_true", help="Include flaky runs")