Skip to content

Commit 92919a7

Browse files
committed
docs: auto generate metrics documentation
1 parent 50d20dd commit 92919a7

File tree

21 files changed

+785
-104
lines changed

21 files changed

+785
-104
lines changed

.github/workflows/rust.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -708,6 +708,11 @@ jobs:
708708
# If you encounter an error, run './dev/update_function_docs.sh' and commit
709709
./dev/update_function_docs.sh
710710
git diff --exit-code
711+
- name: Check if metrics.md has been modified
712+
run: |
713+
# If you encounter an error, run './dev/update_metric_docs.sh' and commit
714+
./dev/update_metric_docs.sh
715+
git diff --exit-code
711716
712717
# Verify MSRV for the crates which are directly used by other projects:
713718
# - datafusion

Cargo.lock

Lines changed: 17 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/core/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ datafusion-datasource-parquet = { workspace = true, optional = true }
129129
datafusion-execution = { workspace = true }
130130
datafusion-expr = { workspace = true, default-features = false }
131131
datafusion-expr-common = { workspace = true }
132+
datafusion-doc = { workspace = true }
132133
datafusion-functions = { workspace = true }
133134
datafusion-functions-aggregate = { workspace = true }
134135
datafusion-functions-nested = { workspace = true, default-features = false, optional = true }
@@ -167,7 +168,6 @@ ctor = { workspace = true }
167168
dashmap = "6.1.0"
168169
datafusion-doc = { workspace = true }
169170
datafusion-functions-window-common = { workspace = true }
170-
datafusion-macros = { workspace = true }
171171
datafusion-physical-optimizer = { workspace = true }
172172
doc-comment = { workspace = true }
173173
env_logger = { workspace = true }
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Print metrics documentation collected via `DocumentedMetrics`/`DocumentedExec`.
19+
//! Called from doc generation scripts to refresh `docs/source/user-guide/metrics.md`.
20+
21+
use std::{collections::HashSet, fs, path::PathBuf};
22+
23+
use datafusion_doc::metric_doc_sections::{
24+
ExecDoc, MetricDoc, MetricDocPosition, exec_docs, metric_docs,
25+
};
26+
use datafusion_execution as _; // Link metrics defined in execution crate.
27+
use datafusion_physical_plan as _; // Link metrics and execs defined in physical plan.
28+
29+
const LICENSE_HEADER: &str = "<!---
30+
Licensed to the Apache Software Foundation (ASF) under one
31+
or more contributor license agreements. See the NOTICE file
32+
distributed with this work for additional information
33+
regarding copyright ownership. The ASF licenses this file
34+
to you under the Apache License, Version 2.0 (the
35+
\"License\"); you may not use this file except in compliance
36+
with the License. You may obtain a copy of the License at
37+
38+
http://www.apache.org/licenses/LICENSE-2.0
39+
40+
Unless required by applicable law or agreed to in writing,
41+
software distributed under the License is distributed on an
42+
\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
43+
KIND, either express or implied. See the License for the
44+
specific language governing permissions and limitations
45+
under the License.
46+
-->";
47+
48+
const INTRO: &str = "DataFusion operators expose runtime metrics so you can understand where time is spent and how much data flows through the pipeline. See more in [EXPLAIN ANALYZE](sql/explain.md#explain-analyze).";
49+
50+
fn main() -> std::io::Result<()> {
51+
let mut content = String::new();
52+
content.push_str(LICENSE_HEADER);
53+
content.push_str("\n\n# Metrics\n\n");
54+
content.push_str(INTRO);
55+
content.push_str("\n\n");
56+
57+
let mut metrics: Vec<&MetricDoc> = metric_docs().collect();
58+
metrics.sort_by(|a, b| a.name.cmp(b.name));
59+
60+
let mut execs: Vec<&ExecDoc> = exec_docs().collect();
61+
execs.sort_by(|a, b| a.name.cmp(b.name));
62+
63+
let common: Vec<&MetricDoc> = metrics
64+
.iter()
65+
.copied()
66+
.filter(|m| m.position == MetricDocPosition::Common)
67+
.collect();
68+
69+
// Collect names of common metric types for filtering embedded fields
70+
let common_metric_names: HashSet<&str> = common.iter().map(|m| m.name).collect();
71+
72+
if !common.is_empty() {
73+
content.push_str("## Common Metrics\n\n");
74+
for metric in common {
75+
render_metric_doc(&mut content, metric, 3, &common_metric_names);
76+
}
77+
}
78+
79+
if !execs.is_empty() {
80+
content.push_str("## Operator-specific Metrics\n\n");
81+
for exec in execs {
82+
render_exec_doc(&mut content, exec, &common_metric_names);
83+
}
84+
}
85+
86+
let path = output_path();
87+
if let Some(parent) = path.parent() {
88+
fs::create_dir_all(parent)?;
89+
}
90+
fs::write(path, content)
91+
}
92+
93+
fn render_exec_doc(
94+
out: &mut String,
95+
exec: &ExecDoc,
96+
common_metric_names: &HashSet<&str>,
97+
) {
98+
out.push_str(&heading(3, exec.name));
99+
out.push_str("\n\n");
100+
101+
if let Some(doc) = summarize(exec.doc) {
102+
if !doc.is_empty() {
103+
out.push_str(&sanitize(doc));
104+
out.push_str("\n\n");
105+
}
106+
}
107+
108+
// Filter to operator-specific metrics only (common metrics are documented separately)
109+
let mut metrics: Vec<&MetricDoc> = exec
110+
.metrics
111+
.iter()
112+
.copied()
113+
.filter(|metric| metric.position != MetricDocPosition::Common)
114+
.collect();
115+
metrics.sort_by(|a, b| a.name.cmp(b.name));
116+
117+
if metrics.is_empty() {
118+
out.push_str("_No operator-specific metrics documented._\n\n");
119+
} else {
120+
for metric in metrics {
121+
render_metric_doc(out, metric, 4, common_metric_names);
122+
}
123+
}
124+
}
125+
126+
fn render_metric_doc(
127+
out: &mut String,
128+
metric: &MetricDoc,
129+
heading_level: usize,
130+
common_metric_names: &HashSet<&str>,
131+
) {
132+
out.push_str(&heading(heading_level, metric.name));
133+
out.push_str("\n\n");
134+
135+
if let Some(doc) = summarize(metric.doc) {
136+
if !doc.is_empty() {
137+
out.push_str(&sanitize(doc));
138+
out.push_str("\n\n");
139+
}
140+
}
141+
142+
// Filter out fields whose type is a common metric (documented separately)
143+
let fields: Vec<_> = metric
144+
.fields
145+
.iter()
146+
.filter(|field| !common_metric_names.contains(field.type_name))
147+
.collect();
148+
149+
if fields.is_empty() {
150+
out.push_str("_No metrics documented._\n\n");
151+
return;
152+
}
153+
154+
out.push_str("| Metric | Description |\n");
155+
out.push_str("| --- | --- |\n");
156+
for field in fields {
157+
out.push_str(&format!("| {} | {} |\n", field.name, sanitize(field.doc)));
158+
}
159+
out.push('\n');
160+
}
161+
162+
fn heading(level: usize, title: &str) -> String {
163+
format!("{} {}", "#".repeat(level), title)
164+
}
165+
166+
fn summarize(doc: &str) -> Option<&str> {
167+
let trimmed = doc.trim();
168+
if trimmed.is_empty() {
169+
return None;
170+
}
171+
trimmed.split("\n\n").next().map(str::trim)
172+
}
173+
174+
fn sanitize(doc: &str) -> String {
175+
doc.split_whitespace().collect::<Vec<_>>().join(" ")
176+
}
177+
178+
fn output_path() -> PathBuf {
179+
let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
180+
manifest_dir
181+
.parent()
182+
.and_then(|p| p.parent())
183+
.unwrap_or(&manifest_dir)
184+
.join("docs/source/user-guide/metrics.md")
185+
}

datafusion/doc/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ license = { workspace = true }
2828
authors = { workspace = true }
2929
rust-version = { workspace = true }
3030

31+
[dependencies]
32+
inventory = "0.3.15"
33+
3134
[package.metadata.docs.rs]
3235
all-features = true
3336

datafusion/doc/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,12 @@
2323
)]
2424
#![cfg_attr(docsrs, feature(doc_cfg))]
2525

26+
mod metrics;
2627
mod udaf;
2728
mod udf;
2829
mod udwf;
2930

31+
pub use metrics::metric_doc_sections;
3032
pub use udaf::aggregate_doc_sections;
3133
pub use udf::scalar_doc_sections;
3234
pub use udwf::window_doc_sections;

0 commit comments

Comments
 (0)