@@ -29,7 +29,8 @@ use arrow::datatypes::{Field, Schema, SchemaRef};
2929use datafusion_common:: stats:: { ColumnStatistics , Precision } ;
3030use datafusion_common:: tree_node:: { Transformed , TransformedResult , TreeNode } ;
3131use datafusion_common:: {
32- Result , ScalarValue , assert_or_internal_err, internal_datafusion_err, plan_err,
32+ Result , ScalarValue , Statistics , assert_or_internal_err, internal_datafusion_err,
33+ plan_err,
3334} ;
3435
3536use datafusion_physical_expr_common:: metrics:: ExecutionPlanMetricsSet ;
@@ -125,7 +126,8 @@ impl From<ProjectionExpr> for (Arc<dyn PhysicalExpr>, String) {
125126/// indices.
126127#[ derive( Debug , Clone , PartialEq , Eq ) ]
127128pub struct ProjectionExprs {
128- exprs : Vec < ProjectionExpr > ,
129+ /// [`Arc`] used for a cheap clone, which improves physical plan optimization performance.
130+ exprs : Arc < [ ProjectionExpr ] > ,
129131}
130132
131133impl std:: fmt:: Display for ProjectionExprs {
@@ -137,22 +139,24 @@ impl std::fmt::Display for ProjectionExprs {
137139
138140impl From < Vec < ProjectionExpr > > for ProjectionExprs {
139141 fn from ( value : Vec < ProjectionExpr > ) -> Self {
140- Self { exprs : value }
142+ Self {
143+ exprs : value. into ( ) ,
144+ }
141145 }
142146}
143147
144148impl From < & [ ProjectionExpr ] > for ProjectionExprs {
145149 fn from ( value : & [ ProjectionExpr ] ) -> Self {
146150 Self {
147- exprs : value. to_vec ( ) ,
151+ exprs : value. iter ( ) . cloned ( ) . collect ( ) ,
148152 }
149153 }
150154}
151155
152156impl FromIterator < ProjectionExpr > for ProjectionExprs {
153157 fn from_iter < T : IntoIterator < Item = ProjectionExpr > > ( exprs : T ) -> Self {
154158 Self {
155- exprs : exprs. into_iter ( ) . collect :: < Vec < _ > > ( ) ,
159+ exprs : exprs. into_iter ( ) . collect ( ) ,
156160 }
157161 }
158162}
@@ -164,12 +168,17 @@ impl AsRef<[ProjectionExpr]> for ProjectionExprs {
164168}
165169
166170impl ProjectionExprs {
167- pub fn new < I > ( exprs : I ) -> Self
168- where
169- I : IntoIterator < Item = ProjectionExpr > ,
170- {
171+ /// Make a new [`ProjectionExprs`] from expressions iterator.
172+ pub fn new ( exprs : impl IntoIterator < Item = ProjectionExpr > ) -> Self {
173+ Self {
174+ exprs : exprs. into_iter ( ) . collect ( ) ,
175+ }
176+ }
177+
178+ /// Make a new [`ProjectionExprs`] from expressions.
179+ pub fn from_expressions ( exprs : impl Into < Arc < [ ProjectionExpr ] > > ) -> Self {
171180 Self {
172- exprs : exprs. into_iter ( ) . collect :: < Vec < _ > > ( ) ,
181+ exprs : exprs. into ( ) ,
173182 }
174183 }
175184
@@ -285,13 +294,14 @@ impl ProjectionExprs {
285294 {
286295 let exprs = self
287296 . exprs
288- . into_iter ( )
297+ . iter ( )
298+ . cloned ( )
289299 . map ( |mut proj| {
290300 proj. expr = f ( proj. expr ) ?;
291301 Ok ( proj)
292302 } )
293- . collect :: < Result < Vec < _ > > > ( ) ?;
294- Ok ( Self :: new ( exprs) )
303+ . collect :: < Result < Arc < _ > > > ( ) ?;
304+ Ok ( Self :: from_expressions ( exprs) )
295305 }
296306
297307 /// Apply another projection on top of this projection, returning the combined projection.
@@ -361,7 +371,7 @@ impl ProjectionExprs {
361371 /// applied on top of this projection.
362372 pub fn try_merge ( & self , other : & ProjectionExprs ) -> Result < ProjectionExprs > {
363373 let mut new_exprs = Vec :: with_capacity ( other. exprs . len ( ) ) ;
364- for proj_expr in & other. exprs {
374+ for proj_expr in other. exprs . iter ( ) {
365375 let new_expr = update_expr ( & proj_expr. expr , & self . exprs , true ) ?
366376 . ok_or_else ( || {
367377 internal_datafusion_err ! (
@@ -602,12 +612,12 @@ impl ProjectionExprs {
602612 /// ```
603613 pub fn project_statistics (
604614 & self ,
605- mut stats : datafusion_common :: Statistics ,
615+ mut stats : Statistics ,
606616 output_schema : & Schema ,
607- ) -> Result < datafusion_common :: Statistics > {
617+ ) -> Result < Statistics > {
608618 let mut column_statistics = vec ! [ ] ;
609619
610- for proj_expr in & self . exprs {
620+ for proj_expr in self . exprs . iter ( ) {
611621 let expr = & proj_expr. expr ;
612622 let col_stats = if let Some ( col) = expr. as_any ( ) . downcast_ref :: < Column > ( ) {
613623 std:: mem:: take ( & mut stats. column_statistics [ col. index ( ) ] )
@@ -754,13 +764,52 @@ impl Projector {
754764 }
755765}
756766
757- impl IntoIterator for ProjectionExprs {
758- type Item = ProjectionExpr ;
759- type IntoIter = std:: vec:: IntoIter < ProjectionExpr > ;
767+ /// Describes an immutable reference counted projection.
768+ ///
769+ /// This structure represents projecting a set of columns by index.
770+ /// [`Arc`] is used to make it cheap to clone.
771+ pub type ProjectionRef = Arc < [ usize ] > ;
760772
761- fn into_iter ( self ) -> Self :: IntoIter {
762- self . exprs . into_iter ( )
763- }
773+ /// Combine two projections.
774+ ///
775+ /// If `p1` is [`None`] then there are no changes.
776+ /// Otherwise, if passed `p2` is not [`None`] then it is remapped
777+ /// according to the `p1`. Otherwise, there are no changes.
778+ ///
779+ /// # Example
780+ ///
781+ /// If stored projection is [0, 2] and we call `apply_projection([0, 2, 3])`,
782+ /// then the resulting projection will be [0, 3].
783+ ///
784+ /// # Error
785+ ///
786+ /// Returns an internal error if `p1` contains index that is greater than `p2` len.
787+ ///
788+ pub fn combine_projections (
789+ p1 : Option < & ProjectionRef > ,
790+ p2 : Option < & ProjectionRef > ,
791+ ) -> Result < Option < ProjectionRef > > {
792+ let Some ( p1) = p1 else {
793+ return Ok ( None ) ;
794+ } ;
795+ let Some ( p2) = p2 else {
796+ return Ok ( Some ( Arc :: clone ( p1) ) ) ;
797+ } ;
798+
799+ Ok ( Some (
800+ p1. iter ( )
801+ . map ( |i| {
802+ let idx = * i;
803+ assert_or_internal_err ! (
804+ idx < p2. len( ) ,
805+ "unable to apply projection: index {} is greater than new projection len {}" ,
806+ idx,
807+ p2. len( ) ,
808+ ) ;
809+ Ok ( p2[ * i] )
810+ } )
811+ . collect :: < Result < Arc < [ usize ] > > > ( ) ?,
812+ ) )
764813}
765814
766815/// The function operates in two modes:
0 commit comments