perf: Optimize round scalar performance (#19831)

kumarUjjawal · web-flow · commit d90d0746d64b · 2026-01-19T08:09:25.000Z
## Which issue does this PR close?  - Part of apache/datafusion-comet#2986 ## Rationale for this change The round function currently converts scalar inputs to arrays before processing, even when both value and decimal_places are scalar values. This adds unnecessary overhead for constant folding scenarios like  ## What changes are included in this PR? - Add scalar fast path in RoundFunc::invoke_with_args for Float64 and Float32 inputs - Directly compute the result when both inputs are scalars, avoiding array conversion overhead - Add benchmark  ## Are these changes tested? Yes Type | Before | After | Speedup -- | -- | -- | -- round_f64_scalar | 570 ns | 195 ns | 2.9x round_f32_scalar | 564 ns | 192 ns | 2.9x  ## Are there any user-facing changes? No
diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
@@ -320,3 +320,8 @@ required-features = ["math_expressions"]
 harness = false
 name = "floor_ceil"
 required-features = ["math_expressions"]
+
+[[bench]]
+harness = false
+name = "round"
+required-features = ["math_expressions"]
diff --git a/datafusion/functions/benches/round.rs b/datafusion/functions/benches/round.rs
@@ -0,0 +1,154 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use arrow::datatypes::{DataType, Field, Float32Type, Float64Type};
+use arrow::util::bench_util::create_primitive_array;
+use criterion::{Criterion, SamplingMode, criterion_group, criterion_main};
+use datafusion_common::ScalarValue;
+use datafusion_common::config::ConfigOptions;
+use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
+use datafusion_functions::math::round;
+use std::hint::black_box;
+use std::sync::Arc;
+use std::time::Duration;
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let round_fn = round();
+    let config_options = Arc::new(ConfigOptions::default());
+
+    for size in [1024, 4096, 8192] {
+        let mut group = c.benchmark_group(format!("round size={size}"));
+        group.sampling_mode(SamplingMode::Flat);
+        group.sample_size(10);
+        group.measurement_time(Duration::from_secs(10));
+
+        // Float64 array benchmark
+        let f64_array = Arc::new(create_primitive_array::<Float64Type>(size, 0.1));
+        let batch_len = f64_array.len();
+        let f64_args = vec![
+            ColumnarValue::Array(f64_array),
+            ColumnarValue::Scalar(ScalarValue::Int32(Some(2))),
+        ];
+
+        group.bench_function("round_f64_array", |b| {
+            b.iter(|| {
+                let args_cloned = f64_args.clone();
+                black_box(
+                    round_fn
+                        .invoke_with_args(ScalarFunctionArgs {
+                            args: args_cloned,
+                            arg_fields: vec![
+                                Field::new("a", DataType::Float64, true).into(),
+                                Field::new("b", DataType::Int32, false).into(),
+                            ],
+                            number_rows: batch_len,
+                            return_field: Field::new("f", DataType::Float64, true).into(),
+                            config_options: Arc::clone(&config_options),
+                        })
+                        .unwrap(),
+                )
+            })
+        });
+
+        // Float32 array benchmark
+        let f32_array = Arc::new(create_primitive_array::<Float32Type>(size, 0.1));
+        let f32_args = vec![
+            ColumnarValue::Array(f32_array),
+            ColumnarValue::Scalar(ScalarValue::Int32(Some(2))),
+        ];
+
+        group.bench_function("round_f32_array", |b| {
+            b.iter(|| {
+                let args_cloned = f32_args.clone();
+                black_box(
+                    round_fn
+                        .invoke_with_args(ScalarFunctionArgs {
+                            args: args_cloned,
+                            arg_fields: vec![
+                                Field::new("a", DataType::Float32, true).into(),
+                                Field::new("b", DataType::Int32, false).into(),
+                            ],
+                            number_rows: batch_len,
+                            return_field: Field::new("f", DataType::Float32, true).into(),
+                            config_options: Arc::clone(&config_options),
+                        })
+                        .unwrap(),
+                )
+            })
+        });
+
+        // Scalar benchmark (the optimization we added)
+        let scalar_f64_args = vec![
+            ColumnarValue::Scalar(ScalarValue::Float64(Some(std::f64::consts::PI))),
+            ColumnarValue::Scalar(ScalarValue::Int32(Some(2))),
+        ];
+
+        group.bench_function("round_f64_scalar", |b| {
+            b.iter(|| {
+                let args_cloned = scalar_f64_args.clone();
+                black_box(
+                    round_fn
+                        .invoke_with_args(ScalarFunctionArgs {
+                            args: args_cloned,
+                            arg_fields: vec![
+                                Field::new("a", DataType::Float64, false).into(),
+                                Field::new("b", DataType::Int32, false).into(),
+                            ],
+                            number_rows: 1,
+                            return_field: Field::new("f", DataType::Float64, false)
+                                .into(),
+                            config_options: Arc::clone(&config_options),
+                        })
+                        .unwrap(),
+                )
+            })
+        });
+
+        let scalar_f32_args = vec![
+            ColumnarValue::Scalar(ScalarValue::Float32(Some(std::f32::consts::PI))),
+            ColumnarValue::Scalar(ScalarValue::Int32(Some(2))),
+        ];
+
+        group.bench_function("round_f32_scalar", |b| {
+            b.iter(|| {
+                let args_cloned = scalar_f32_args.clone();
+                black_box(
+                    round_fn
+                        .invoke_with_args(ScalarFunctionArgs {
+                            args: args_cloned,
+                            arg_fields: vec![
+                                Field::new("a", DataType::Float32, false).into(),
+                                Field::new("b", DataType::Int32, false).into(),
+                            ],
+                            number_rows: 1,
+                            return_field: Field::new("f", DataType::Float32, false)
+                                .into(),
+                            config_options: Arc::clone(&config_options),
+                        })
+                        .unwrap(),
+                )
+            })
+        });
+
+        group.finish();
+    }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions/src/math/round.rs b/datafusion/functions/src/math/round.rs
@@ -31,7 +31,7 @@ use arrow::error::ArrowError;
 use datafusion_common::types::{
     NativeType, logical_float32, logical_float64, logical_int32,
 };
-use datafusion_common::{Result, ScalarValue, exec_err};
+use datafusion_common::{Result, ScalarValue, exec_err, internal_err};
 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
 use datafusion_expr::{
     Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
@@ -141,7 +141,67 @@ impl ScalarUDFImpl for RoundFunc {
             &default_decimal_places
         };
 
-        round_columnar(&args.args[0], decimal_places, args.number_rows)
+        // Scalar fast path for float and decimal types - avoid array conversion overhead
+        if let (ColumnarValue::Scalar(value_scalar), ColumnarValue::Scalar(dp_scalar)) =
+            (&args.args[0], decimal_places)
+        {
+            if value_scalar.is_null() || dp_scalar.is_null() {
+                return ColumnarValue::Scalar(ScalarValue::Null)
+                    .cast_to(args.return_type(), None);
+            }
+
+            let dp = if let ScalarValue::Int32(Some(dp)) = dp_scalar {
+                *dp
+            } else {
+                return internal_err!(
+                    "Unexpected datatype for decimal_places: {}",
+                    dp_scalar.data_type()
+                );
+            };
+
+            match value_scalar {
+                ScalarValue::Float32(Some(v)) => {
+                    let rounded = round_float(*v, dp)?;
+                    Ok(ColumnarValue::Scalar(ScalarValue::from(rounded)))
+                }
+                ScalarValue::Float64(Some(v)) => {
+                    let rounded = round_float(*v, dp)?;
+                    Ok(ColumnarValue::Scalar(ScalarValue::from(rounded)))
+                }
+                ScalarValue::Decimal128(Some(v), precision, scale) => {
+                    let rounded = round_decimal(*v, *scale, dp)?;
+                    let scalar =
+                        ScalarValue::Decimal128(Some(rounded), *precision, *scale);
+                    Ok(ColumnarValue::Scalar(scalar))
+                }
+                ScalarValue::Decimal256(Some(v), precision, scale) => {
+                    let rounded = round_decimal(*v, *scale, dp)?;
+                    let scalar =
+                        ScalarValue::Decimal256(Some(rounded), *precision, *scale);
+                    Ok(ColumnarValue::Scalar(scalar))
+                }
+                ScalarValue::Decimal64(Some(v), precision, scale) => {
+                    let rounded = round_decimal(*v, *scale, dp)?;
+                    let scalar =
+                        ScalarValue::Decimal64(Some(rounded), *precision, *scale);
+                    Ok(ColumnarValue::Scalar(scalar))
+                }
+                ScalarValue::Decimal32(Some(v), precision, scale) => {
+                    let rounded = round_decimal(*v, *scale, dp)?;
+                    let scalar =
+                        ScalarValue::Decimal32(Some(rounded), *precision, *scale);
+                    Ok(ColumnarValue::Scalar(scalar))
+                }
+                _ => {
+                    internal_err!(
+                        "Unexpected datatype for value: {}",
+                        value_scalar.data_type()
+                    )
+                }
+            }
+        } else {
+            round_columnar(&args.args[0], decimal_places, args.number_rows)
+        }
     }
 
     fn output_ordering(&self, input: &[ExprProperties]) -> Result<SortProperties> {