From ee5c89cf8471c2b22b1a8250f069064612a04b05 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 29 Jan 2026 12:34:33 -0500 Subject: [PATCH 1/7] feat: add ExpressionPlacement enum for optimizer expression placement decisions This extracts the ExpressionPlacement enum from PR #20036 to provide a mechanism for expressions to indicate where they should be placed in the query plan for optimal execution. Changes: - Add ExpressionPlacement enum with variants: Literal, Column, PlaceAtLeaves, PlaceAtRoot - Add placement() method to Expr, ScalarUDF, ScalarUDFImpl traits - Add placement() method to PhysicalExpr trait and implementations - Implement placement() for GetFieldFunc to return PlaceAtLeaves when accessing struct fields with literal keys - Replace is_expr_trivial() checks with placement() in optimizer and physical-plan projection code Co-Authored-By: Claude Opus 4.5 --- datafusion/expr-common/src/lib.rs | 3 ++ datafusion/expr-common/src/placement.rs | 36 +++++++++++++++++++ datafusion/expr/src/expr.rs | 18 ++++++++++ datafusion/expr/src/lib.rs | 1 + datafusion/expr/src/udf.rs | 26 ++++++++++++++ datafusion/functions/src/core/getfield.rs | 30 ++++++++++++++-- .../optimizer/src/optimize_projections/mod.rs | 18 ++++------ .../physical-expr-common/src/physical_expr.rs | 11 ++++++ .../physical-expr/src/expressions/column.rs | 5 +++ .../physical-expr/src/expressions/literal.rs | 5 +++ .../physical-expr/src/scalar_function.rs | 9 +++-- datafusion/physical-plan/src/projection.rs | 27 +++++++------- 12 files changed, 161 insertions(+), 28 deletions(-) create mode 100644 datafusion/expr-common/src/placement.rs diff --git a/datafusion/expr-common/src/lib.rs b/datafusion/expr-common/src/lib.rs index 0018694d18eeb..c9a95fd294503 100644 --- a/datafusion/expr-common/src/lib.rs +++ b/datafusion/expr-common/src/lib.rs @@ -40,7 +40,10 @@ pub mod dyn_eq; pub mod groups_accumulator; pub mod interval_arithmetic; pub mod operator; +pub mod placement; pub mod signature; pub mod sort_properties; pub mod statistics; pub mod type_coercion; + +pub use placement::ExpressionPlacement; diff --git a/datafusion/expr-common/src/placement.rs b/datafusion/expr-common/src/placement.rs new file mode 100644 index 0000000000000..201ddea1db9be --- /dev/null +++ b/datafusion/expr-common/src/placement.rs @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Expression placement information for optimization decisions. + +/// Describes where an expression should be placed in the query plan for +/// optimal execution. This is used by optimizers to make decisions about +/// expression placement, such as whether to push expressions down through +/// projections. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum ExpressionPlacement { + /// A constant literal value. + Literal, + /// A simple column reference. + Column, + /// A cheap expression that can be pushed to leaf nodes in the plan. + /// Examples include `get_field` for struct field access. + PlaceAtLeaves, + /// An expensive expression that should stay at the root of the plan. + /// This is the default for most expressions. + PlaceAtRoot, +} diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 8eae81bc5bc73..c22196376d262 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -31,6 +31,7 @@ use crate::{AggregateUDF, Volatility}; use crate::{ExprSchemable, Operator, Signature, WindowFrame, WindowUDF}; use arrow::datatypes::{DataType, Field, FieldRef}; +use datafusion_expr_common::placement::ExpressionPlacement; use datafusion_common::cse::{HashNode, NormalizeEq, Normalizeable}; use datafusion_common::tree_node::{ Transformed, TransformedResult, TreeNode, TreeNodeContainer, TreeNodeRecursion, @@ -1536,6 +1537,23 @@ impl Expr { } } + /// Returns placement information for this expression. + /// + /// This is used by optimizers to make decisions about expression placement, + /// such as whether to push expressions down through projections. + pub fn placement(&self) -> ExpressionPlacement { + match self { + Expr::Column(_) => ExpressionPlacement::Column, + Expr::Literal(_, _) => ExpressionPlacement::Literal, + Expr::ScalarFunction(func) => { + let arg_placements: Vec<_> = + func.args.iter().map(|arg| arg.placement()).collect(); + func.func.placement(&arg_placements) + } + _ => ExpressionPlacement::PlaceAtRoot, + } + } + /// Return String representation of the variant represented by `self` /// Useful for non-rust based bindings pub fn variant_name(&self) -> &str { diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs index 201f7a02515cc..cb136229bf88d 100644 --- a/datafusion/expr/src/lib.rs +++ b/datafusion/expr/src/lib.rs @@ -95,6 +95,7 @@ pub use datafusion_expr_common::accumulator::Accumulator; pub use datafusion_expr_common::columnar_value::ColumnarValue; pub use datafusion_expr_common::groups_accumulator::{EmitTo, GroupsAccumulator}; pub use datafusion_expr_common::operator::Operator; +pub use datafusion_expr_common::placement::ExpressionPlacement; pub use datafusion_expr_common::signature::{ ArrayFunctionArgument, ArrayFunctionSignature, Coercion, Signature, TIMEZONE_WILDCARD, TypeSignature, TypeSignatureClass, Volatility, diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 2183bdbea4d7d..2a32346d785f7 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -24,6 +24,7 @@ use crate::simplify::{ExprSimplifyResult, SimplifyContext}; use crate::sort_properties::{ExprProperties, SortProperties}; use crate::udf_eq::UdfEq; use crate::{ColumnarValue, Documentation, Expr, Signature}; +use datafusion_expr_common::placement::ExpressionPlacement; use arrow::datatypes::{DataType, Field, FieldRef}; #[cfg(debug_assertions)] use datafusion_common::assert_or_internal_err; @@ -361,6 +362,13 @@ impl ScalarUDF { pub fn as_async(&self) -> Option<&AsyncScalarUDF> { self.inner().as_any().downcast_ref::() } + + /// Returns placement information for this function. + /// + /// See [`ScalarUDFImpl::placement`] for more details. + pub fn placement(&self, args: &[ExpressionPlacement]) -> ExpressionPlacement { + self.inner.placement(args) + } } impl From for ScalarUDF @@ -964,6 +972,20 @@ pub trait ScalarUDFImpl: Debug + DynEq + DynHash + Send + Sync { fn documentation(&self) -> Option<&Documentation> { None } + + /// Returns placement information for this function. + /// + /// This is used by optimizers to make decisions about expression placement, + /// such as whether to push expressions down through projections. + /// + /// The default implementation returns [`ExpressionPlacement::PlaceAtRoot`], + /// meaning the expression should stay at the root of the plan. + /// + /// Override this method to indicate that the function can be pushed down + /// closer to the data source. + fn placement(&self, _args: &[ExpressionPlacement]) -> ExpressionPlacement { + ExpressionPlacement::PlaceAtRoot + } } /// ScalarUDF that adds an alias to the underlying function. It is better to @@ -1091,6 +1113,10 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl { fn documentation(&self) -> Option<&Documentation> { self.inner.documentation() } + + fn placement(&self, args: &[ExpressionPlacement]) -> ExpressionPlacement { + self.inner.placement(args) + } } #[cfg(test)] diff --git a/datafusion/functions/src/core/getfield.rs b/datafusion/functions/src/core/getfield.rs index 47a903639dde5..dc02b55fa6f01 100644 --- a/datafusion/functions/src/core/getfield.rs +++ b/datafusion/functions/src/core/getfield.rs @@ -33,8 +33,8 @@ use datafusion_common::{ use datafusion_expr::expr::ScalarFunction; use datafusion_expr::simplify::ExprSimplifyResult; use datafusion_expr::{ - ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF, - ScalarUDFImpl, Signature, Volatility, + ColumnarValue, Documentation, Expr, ExpressionPlacement, ReturnFieldArgs, + ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, Volatility, }; use datafusion_macros::user_doc; @@ -499,6 +499,32 @@ impl ScalarUDFImpl for GetFieldFunc { fn documentation(&self) -> Option<&Documentation> { self.doc() } + + fn placement(&self, args: &[ExpressionPlacement]) -> ExpressionPlacement { + // get_field can be pushed to leaves if: + // 1. The base (first arg) is a column or already placeable at leaves + // 2. All field keys (remaining args) are literals + if args.is_empty() { + return ExpressionPlacement::PlaceAtRoot; + } + + let base_placement = args[0]; + let base_is_pushable = matches!( + base_placement, + ExpressionPlacement::Column | ExpressionPlacement::PlaceAtLeaves + ); + + let all_keys_are_literals = args + .iter() + .skip(1) + .all(|p| matches!(p, ExpressionPlacement::Literal)); + + if base_is_pushable && all_keys_are_literals { + ExpressionPlacement::PlaceAtLeaves + } else { + ExpressionPlacement::PlaceAtRoot + } + } } #[cfg(test)] diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs index f97b05ea68fbd..e1e88827d094a 100644 --- a/datafusion/optimizer/src/optimize_projections/mod.rs +++ b/datafusion/optimizer/src/optimize_projections/mod.rs @@ -30,8 +30,8 @@ use datafusion_common::{ }; use datafusion_expr::expr::Alias; use datafusion_expr::{ - Aggregate, Distinct, EmptyRelation, Expr, Projection, TableScan, Unnest, Window, - logical_plan::LogicalPlan, + Aggregate, Distinct, EmptyRelation, Expr, ExpressionPlacement, Projection, TableScan, + Unnest, Window, logical_plan::LogicalPlan, }; use crate::optimize_projections::required_indices::RequiredIndices; @@ -525,14 +525,15 @@ fn merge_consecutive_projections(proj: Projection) -> Result 1 - && !is_expr_trivial( - &prev_projection.expr - [prev_projection.schema.index_of_column(col).unwrap()], + && matches!( + prev_projection.expr[prev_projection.schema.index_of_column(col).unwrap()] + .placement(), + ExpressionPlacement::PlaceAtRoot ) }) { // no change @@ -586,11 +587,6 @@ fn merge_consecutive_projections(proj: Projection) -> Result bool { - matches!(expr, Expr::Column(_) | Expr::Literal(_, _)) -} - /// Rewrites a projection expression using the projection before it (i.e. its input) /// This is a subroutine to the `merge_consecutive_projections` function. /// diff --git a/datafusion/physical-expr-common/src/physical_expr.rs b/datafusion/physical-expr-common/src/physical_expr.rs index 2358a21940912..a4fe728c892ae 100644 --- a/datafusion/physical-expr-common/src/physical_expr.rs +++ b/datafusion/physical-expr-common/src/physical_expr.rs @@ -35,6 +35,7 @@ use datafusion_common::{ }; use datafusion_expr_common::columnar_value::ColumnarValue; use datafusion_expr_common::interval_arithmetic::Interval; +use datafusion_expr_common::placement::ExpressionPlacement; use datafusion_expr_common::sort_properties::ExprProperties; use datafusion_expr_common::statistics::Distribution; @@ -430,6 +431,16 @@ pub trait PhysicalExpr: Any + Send + Sync + Display + Debug + DynEq + DynHash { fn is_volatile_node(&self) -> bool { false } + + /// Returns placement information for this expression. + /// + /// This is used by optimizers to make decisions about expression placement, + /// such as whether to push expressions down through projections. + /// + /// The default implementation returns [`ExpressionPlacement::PlaceAtRoot`]. + fn placement(&self) -> ExpressionPlacement { + ExpressionPlacement::PlaceAtRoot + } } #[deprecated( diff --git a/datafusion/physical-expr/src/expressions/column.rs b/datafusion/physical-expr/src/expressions/column.rs index 8c7e8c319fff4..cf844790a002e 100644 --- a/datafusion/physical-expr/src/expressions/column.rs +++ b/datafusion/physical-expr/src/expressions/column.rs @@ -30,6 +30,7 @@ use arrow::{ use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_common::{Result, internal_err, plan_err}; use datafusion_expr::ColumnarValue; +use datafusion_expr_common::placement::ExpressionPlacement; /// Represents the column at a given index in a RecordBatch /// @@ -146,6 +147,10 @@ impl PhysicalExpr for Column { fn fmt_sql(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.name) } + + fn placement(&self) -> ExpressionPlacement { + ExpressionPlacement::Column + } } impl Column { diff --git a/datafusion/physical-expr/src/expressions/literal.rs b/datafusion/physical-expr/src/expressions/literal.rs index 1f3fefc60b7ad..9105297c96d61 100644 --- a/datafusion/physical-expr/src/expressions/literal.rs +++ b/datafusion/physical-expr/src/expressions/literal.rs @@ -33,6 +33,7 @@ use datafusion_common::{Result, ScalarValue}; use datafusion_expr::Expr; use datafusion_expr_common::columnar_value::ColumnarValue; use datafusion_expr_common::interval_arithmetic::Interval; +use datafusion_expr_common::placement::ExpressionPlacement; use datafusion_expr_common::sort_properties::{ExprProperties, SortProperties}; /// Represents a literal value @@ -134,6 +135,10 @@ impl PhysicalExpr for Literal { fn fmt_sql(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self, f) } + + fn placement(&self) -> ExpressionPlacement { + ExpressionPlacement::Literal + } } /// Create a literal expression diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs index aa090743ad441..4ceaa28cfca2c 100644 --- a/datafusion/physical-expr/src/scalar_function.rs +++ b/datafusion/physical-expr/src/scalar_function.rs @@ -45,8 +45,8 @@ use datafusion_expr::interval_arithmetic::Interval; use datafusion_expr::sort_properties::ExprProperties; use datafusion_expr::type_coercion::functions::fields_with_udf; use datafusion_expr::{ - ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF, Volatility, - expr_vec_fmt, + ColumnarValue, ExpressionPlacement, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF, + Volatility, expr_vec_fmt, }; /// Physical expression of a scalar function @@ -362,6 +362,11 @@ impl PhysicalExpr for ScalarFunctionExpr { fn is_volatile_node(&self) -> bool { self.fun.signature().volatility == Volatility::Volatile } + + fn placement(&self) -> ExpressionPlacement { + let arg_placements: Vec<_> = self.args.iter().map(|arg| arg.placement()).collect(); + self.fun.placement(&arg_placements) + } } #[cfg(test)] diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs index 8d4c775f87348..e96bb2864b6a3 100644 --- a/datafusion/physical-plan/src/projection.rs +++ b/datafusion/physical-plan/src/projection.rs @@ -20,7 +20,7 @@ //! of a projection on table `t1` where the expressions `a`, `b`, and `a+b` are the //! projection expressions. `SELECT` without `FROM` will only evaluate expressions. -use super::expressions::{Column, Literal}; +use super::expressions::Column; use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use super::{ DisplayAs, ExecutionPlanProperties, PlanProperties, RecordBatchStream, @@ -48,6 +48,7 @@ use datafusion_common::tree_node::{ }; use datafusion_common::{DataFusionError, JoinSide, Result, internal_err}; use datafusion_execution::TaskContext; +use datafusion_expr::ExpressionPlacement; use datafusion_physical_expr::equivalence::ProjectionMapping; use datafusion_physical_expr::projection::Projector; use datafusion_physical_expr::utils::{collect_columns, reassign_expr_columns}; @@ -285,10 +286,13 @@ impl ExecutionPlan for ProjectionExec { .as_ref() .iter() .all(|proj_expr| { - proj_expr.expr.as_any().is::() - || proj_expr.expr.as_any().is::() + !matches!( + proj_expr.expr.placement(), + ExpressionPlacement::PlaceAtRoot + ) }); - // If expressions are all either column_expr or Literal, then all computations in this projection are reorder or rename, + // If expressions are all either column_expr or Literal (or other cheap expressions), + // then all computations in this projection are reorder or rename, // and projection would not benefit from the repartition, benefits_from_input_partitioning will return false. vec![!all_simple_exprs] } @@ -1003,11 +1007,15 @@ fn try_unifying_projections( .unwrap(); }); // Merging these projections is not beneficial, e.g - // If an expression is not trivial and it is referred more than 1, unifies projections will be + // If an expression is not trivial (PlaceAtRoot) and it is referred more than 1, unifies projections will be // beneficial as caching mechanism for non-trivial computations. // See discussion in: https://github.com/apache/datafusion/issues/8296 if column_ref_map.iter().any(|(column, count)| { - *count > 1 && !is_expr_trivial(&Arc::clone(&child.expr()[column.index()].expr)) + *count > 1 + && matches!( + child.expr()[column.index()].expr.placement(), + ExpressionPlacement::PlaceAtRoot + ) }) { return Ok(None); } @@ -1117,13 +1125,6 @@ fn new_columns_for_join_on( (new_columns.len() == hash_join_on.len()).then_some(new_columns) } -/// Checks if the given expression is trivial. -/// An expression is considered trivial if it is either a `Column` or a `Literal`. -fn is_expr_trivial(expr: &Arc) -> bool { - expr.as_any().downcast_ref::().is_some() - || expr.as_any().downcast_ref::().is_some() -} - #[cfg(test)] mod tests { use super::*; From 1cb6effb09a50d2c9b5445a79b255d78ede1156d Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 29 Jan 2026 12:37:51 -0500 Subject: [PATCH 2/7] test: add placement unit tests for GetFieldFunc Add tests for GetFieldFunc::placement() covering: - Literal key access (leaf-pushable) - Column key access (not leaf-pushable) - PlaceAtRoot base expressions - Edge cases (empty args, literal base) Co-Authored-By: Claude Opus 4.5 --- datafusion/functions/src/core/getfield.rs | 76 +++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/datafusion/functions/src/core/getfield.rs b/datafusion/functions/src/core/getfield.rs index dc02b55fa6f01..9e42c888d4e60 100644 --- a/datafusion/functions/src/core/getfield.rs +++ b/datafusion/functions/src/core/getfield.rs @@ -568,4 +568,80 @@ mod tests { Ok(()) } + + #[test] + fn test_placement_literal_key() { + let func = GetFieldFunc::new(); + + // get_field(col, 'literal') -> leaf-pushable (static field access) + let args = vec![ExpressionPlacement::Column, ExpressionPlacement::Literal]; + assert_eq!(func.placement(&args), ExpressionPlacement::PlaceAtLeaves); + + // get_field(col, 'a', 'b') -> leaf-pushable (nested static field access) + let args = vec![ + ExpressionPlacement::Column, + ExpressionPlacement::Literal, + ExpressionPlacement::Literal, + ]; + assert_eq!(func.placement(&args), ExpressionPlacement::PlaceAtLeaves); + + // get_field(get_field(col, 'a'), 'b') represented as PlaceAtLeaves for base + let args = vec![ + ExpressionPlacement::PlaceAtLeaves, + ExpressionPlacement::Literal, + ]; + assert_eq!(func.placement(&args), ExpressionPlacement::PlaceAtLeaves); + } + + #[test] + fn test_placement_column_key() { + let func = GetFieldFunc::new(); + + // get_field(col, other_col) -> NOT leaf-pushable (dynamic per-row lookup) + let args = vec![ExpressionPlacement::Column, ExpressionPlacement::Column]; + assert_eq!(func.placement(&args), ExpressionPlacement::PlaceAtRoot); + + // get_field(col, 'a', other_col) -> NOT leaf-pushable (dynamic nested lookup) + let args = vec![ + ExpressionPlacement::Column, + ExpressionPlacement::Literal, + ExpressionPlacement::Column, + ]; + assert_eq!(func.placement(&args), ExpressionPlacement::PlaceAtRoot); + } + + #[test] + fn test_placement_root() { + let func = GetFieldFunc::new(); + + // get_field(root_expr, 'literal') -> NOT leaf-pushable + let args = vec![ + ExpressionPlacement::PlaceAtRoot, + ExpressionPlacement::Literal, + ]; + assert_eq!(func.placement(&args), ExpressionPlacement::PlaceAtRoot); + + // get_field(col, root_expr) -> NOT leaf-pushable + let args = vec![ + ExpressionPlacement::Column, + ExpressionPlacement::PlaceAtRoot, + ]; + assert_eq!(func.placement(&args), ExpressionPlacement::PlaceAtRoot); + } + + #[test] + fn test_placement_edge_cases() { + let func = GetFieldFunc::new(); + + // Empty args -> NOT leaf-pushable + assert_eq!(func.placement(&[]), ExpressionPlacement::PlaceAtRoot); + + // Just base, no key -> PlaceAtLeaves (not a valid call but should handle gracefully) + let args = vec![ExpressionPlacement::Column]; + assert_eq!(func.placement(&args), ExpressionPlacement::PlaceAtLeaves); + + // Literal base with literal key -> NOT leaf-pushable (would be constant-folded) + let args = vec![ExpressionPlacement::Literal, ExpressionPlacement::Literal]; + assert_eq!(func.placement(&args), ExpressionPlacement::PlaceAtRoot); + } } From 380117ca43a819d9baa0bd9cb2e36a8dcc59a196 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 29 Jan 2026 13:42:18 -0500 Subject: [PATCH 3/7] update tests --- datafusion/expr/src/expr.rs | 2 +- datafusion/expr/src/udf.rs | 2 +- .../optimizer/src/optimize_projections/mod.rs | 5 +++-- .../physical-expr/src/scalar_function.rs | 3 ++- .../test_files/projection_pushdown.slt | 22 +++++++++++++++---- datafusion/sqllogictest/test_files/unnest.slt | 4 ++-- 6 files changed, 27 insertions(+), 11 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index c22196376d262..6a795e5507de8 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -31,7 +31,6 @@ use crate::{AggregateUDF, Volatility}; use crate::{ExprSchemable, Operator, Signature, WindowFrame, WindowUDF}; use arrow::datatypes::{DataType, Field, FieldRef}; -use datafusion_expr_common::placement::ExpressionPlacement; use datafusion_common::cse::{HashNode, NormalizeEq, Normalizeable}; use datafusion_common::tree_node::{ Transformed, TransformedResult, TreeNode, TreeNodeContainer, TreeNodeRecursion, @@ -39,6 +38,7 @@ use datafusion_common::tree_node::{ use datafusion_common::{ Column, DFSchema, HashMap, Result, ScalarValue, Spans, TableReference, }; +use datafusion_expr_common::placement::ExpressionPlacement; use datafusion_functions_window_common::field::WindowUDFFieldArgs; #[cfg(feature = "sql")] use sqlparser::ast::{ diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 2a32346d785f7..440478e3be68c 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -24,7 +24,6 @@ use crate::simplify::{ExprSimplifyResult, SimplifyContext}; use crate::sort_properties::{ExprProperties, SortProperties}; use crate::udf_eq::UdfEq; use crate::{ColumnarValue, Documentation, Expr, Signature}; -use datafusion_expr_common::placement::ExpressionPlacement; use arrow::datatypes::{DataType, Field, FieldRef}; #[cfg(debug_assertions)] use datafusion_common::assert_or_internal_err; @@ -32,6 +31,7 @@ use datafusion_common::config::ConfigOptions; use datafusion_common::{ExprSchema, Result, ScalarValue, not_impl_err}; use datafusion_expr_common::dyn_eq::{DynEq, DynHash}; use datafusion_expr_common::interval_arithmetic::Interval; +use datafusion_expr_common::placement::ExpressionPlacement; use std::any::Any; use std::cmp::Ordering; use std::fmt::Debug; diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs index e1e88827d094a..0d08def854438 100644 --- a/datafusion/optimizer/src/optimize_projections/mod.rs +++ b/datafusion/optimizer/src/optimize_projections/mod.rs @@ -531,8 +531,9 @@ fn merge_consecutive_projections(proj: Projection) -> Result 1 && matches!( - prev_projection.expr[prev_projection.schema.index_of_column(col).unwrap()] - .placement(), + prev_projection.expr + [prev_projection.schema.index_of_column(col).unwrap()] + .placement(), ExpressionPlacement::PlaceAtRoot ) }) { diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs index 4ceaa28cfca2c..dab4153fa6828 100644 --- a/datafusion/physical-expr/src/scalar_function.rs +++ b/datafusion/physical-expr/src/scalar_function.rs @@ -364,7 +364,8 @@ impl PhysicalExpr for ScalarFunctionExpr { } fn placement(&self) -> ExpressionPlacement { - let arg_placements: Vec<_> = self.args.iter().map(|arg| arg.placement()).collect(); + let arg_placements: Vec<_> = + self.args.iter().map(|arg| arg.placement()).collect(); self.fun.placement(&arg_placements) } } diff --git a/datafusion/sqllogictest/test_files/projection_pushdown.slt b/datafusion/sqllogictest/test_files/projection_pushdown.slt index 4be83589495e7..3c148561d9ead 100644 --- a/datafusion/sqllogictest/test_files/projection_pushdown.slt +++ b/datafusion/sqllogictest/test_files/projection_pushdown.slt @@ -932,6 +932,21 @@ SELECT id, id + 100 as computed FROM simple_struct ORDER BY id LIMIT 3; # plan extracts the shared get_field for efficient computation ### +query TT +EXPLAIN SELECT (id + s['value']) * (id + s['value']) as id_and_value FROM simple_struct WHERE id > 2; +---- +logical_plan +01)Projection: __common_expr_1 * __common_expr_1 AS id_and_value +02)--Projection: simple_struct.id + get_field(simple_struct.s, Utf8("value")) AS __common_expr_1 +03)----Filter: simple_struct.id > Int64(2) +04)------TableScan: simple_struct projection=[id, s], partial_filters=[simple_struct.id > Int64(2)] +physical_plan +01)ProjectionExec: expr=[__common_expr_1@0 * __common_expr_1@0 as id_and_value] +02)--ProjectionExec: expr=[id@0 + get_field(s@1, value) as __common_expr_1] +03)----FilterExec: id@0 > 2 +04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/projection_pushdown/simple.parquet]]}, projection=[id, s], file_type=parquet, predicate=id@0 > 2, pruning_predicate=id_null_count@1 != row_count@2 AND id_max@0 > 2, required_guarantees=[] + + query TT EXPLAIN SELECT s['value'] + s['value'] as doubled FROM simple_struct WHERE id > 2; ---- @@ -941,10 +956,9 @@ logical_plan 03)----Filter: simple_struct.id > Int64(2) 04)------TableScan: simple_struct projection=[id, s], partial_filters=[simple_struct.id > Int64(2)] physical_plan -01)ProjectionExec: expr=[__common_expr_1@0 + __common_expr_1@0 as doubled] -02)--ProjectionExec: expr=[get_field(s@0, value) as __common_expr_1] -03)----FilterExec: id@0 > 2, projection=[s@1] -04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/projection_pushdown/simple.parquet]]}, projection=[id, s], file_type=parquet, predicate=id@0 > 2, pruning_predicate=id_null_count@1 != row_count@2 AND id_max@0 > 2, required_guarantees=[] +01)ProjectionExec: expr=[get_field(s@0, value) + get_field(s@0, value) as doubled] +02)--FilterExec: id@0 > 2, projection=[s@1] +03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/projection_pushdown/simple.parquet]]}, projection=[id, s], file_type=parquet, predicate=id@0 > 2, pruning_predicate=id_null_count@1 != row_count@2 AND id_max@0 > 2, required_guarantees=[] # Verify correctness query I diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt index f939cd0154a82..1a6b82020c667 100644 --- a/datafusion/sqllogictest/test_files/unnest.slt +++ b/datafusion/sqllogictest/test_files/unnest.slt @@ -673,8 +673,8 @@ logical_plan physical_plan 01)ProjectionExec: expr=[__unnest_placeholder(UNNEST(recursive_unnest_table.column3)[c1],depth=2)@0 as UNNEST(UNNEST(UNNEST(recursive_unnest_table.column3)[c1])), column3@1 as column3] 02)--UnnestExec -03)----ProjectionExec: expr=[get_field(__unnest_placeholder(recursive_unnest_table.column3,depth=1)@0, c1) as __unnest_placeholder(UNNEST(recursive_unnest_table.column3)[c1]), column3@1 as column3] -04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +04)------ProjectionExec: expr=[get_field(__unnest_placeholder(recursive_unnest_table.column3,depth=1)@0, c1) as __unnest_placeholder(UNNEST(recursive_unnest_table.column3)[c1]), column3@1 as column3] 05)--------UnnestExec 06)----------ProjectionExec: expr=[column3@0 as __unnest_placeholder(recursive_unnest_table.column3), column3@0 as column3] 07)------------DataSourceExec: partitions=1, partition_sizes=[1] From 9432e87c7ccb0664b88663cf0e19f99a742a0e8b Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 29 Jan 2026 14:10:42 -0500 Subject: [PATCH 4/7] wrap up logic into function --- datafusion/expr-common/src/placement.rs | 23 +++++++++++++++++++ .../optimizer/src/optimize_projections/mod.rs | 13 ++++------- datafusion/physical-plan/src/projection.rs | 8 +++---- 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/datafusion/expr-common/src/placement.rs b/datafusion/expr-common/src/placement.rs index 201ddea1db9be..0bc30ac184101 100644 --- a/datafusion/expr-common/src/placement.rs +++ b/datafusion/expr-common/src/placement.rs @@ -34,3 +34,26 @@ pub enum ExpressionPlacement { /// This is the default for most expressions. PlaceAtRoot, } + +impl ExpressionPlacement { + /// Returns true if the expression can be pushed down to leaf nodes + /// in the query plan. + /// + /// This returns true for: + /// - `Column`: Simple column references can be pushed down. They do no compute and do not increase or + /// decrease the amount of data being processed. + /// A projection that reduces the number of columns can eliminate unnecessary data early, + /// but this method only considers one expression at a time, not a projection as a whole. + /// - `PlaceAtLeaves`: Cheap expressions can be pushed down to leaves to take advantage of + /// early computation and potential optimizations at the data source level. + /// For example `struct_col['field']` is cheap to compute (just an Arc clone of the nested array for `'field'`) + /// and thus can reduce data early in the plan at very low compute cost. + /// It may even be possible to eliminate the expression entirely if the data source can project only the needed field + /// (as e.g. Parquet can). + pub fn should_push_to_leaves(&self) -> bool { + matches!( + self, + ExpressionPlacement::Column | ExpressionPlacement::PlaceAtLeaves + ) + } +} diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs index 0d08def854438..d618e3680dd7d 100644 --- a/datafusion/optimizer/src/optimize_projections/mod.rs +++ b/datafusion/optimizer/src/optimize_projections/mod.rs @@ -30,8 +30,8 @@ use datafusion_common::{ }; use datafusion_expr::expr::Alias; use datafusion_expr::{ - Aggregate, Distinct, EmptyRelation, Expr, ExpressionPlacement, Projection, TableScan, - Unnest, Window, logical_plan::LogicalPlan, + Aggregate, Distinct, EmptyRelation, Expr, Projection, TableScan, Unnest, Window, + logical_plan::LogicalPlan, }; use crate::optimize_projections::required_indices::RequiredIndices; @@ -530,12 +530,9 @@ fn merge_consecutive_projections(proj: Projection) -> Result 1 - && matches!( - prev_projection.expr - [prev_projection.schema.index_of_column(col).unwrap()] - .placement(), - ExpressionPlacement::PlaceAtRoot - ) + && !prev_projection.expr[prev_projection.schema.index_of_column(col).unwrap()] + .placement() + .should_push_to_leaves() }) { // no change return Projection::try_new_with_schema(expr, input, schema).map(Transformed::no); diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs index e96bb2864b6a3..ec90213b1ec3e 100644 --- a/datafusion/physical-plan/src/projection.rs +++ b/datafusion/physical-plan/src/projection.rs @@ -1012,10 +1012,10 @@ fn try_unifying_projections( // See discussion in: https://github.com/apache/datafusion/issues/8296 if column_ref_map.iter().any(|(column, count)| { *count > 1 - && matches!( - child.expr()[column.index()].expr.placement(), - ExpressionPlacement::PlaceAtRoot - ) + && !child.expr()[column.index()] + .expr + .placement() + .should_push_to_leaves() }) { return Ok(None); } From bbce58e1f3c0655c500f7a94dd22d6beb20a0556 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 29 Jan 2026 15:11:03 -0500 Subject: [PATCH 5/7] update aggregate slt --- datafusion/sqllogictest/test_files/aggregate.slt | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index 036bb93283cc6..2066c5d285062 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -7951,8 +7951,9 @@ logical_plan 02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] 03)----TableScan: t projection=[] physical_plan -01)ProjectionExec: expr=[2 as count(Int64(1)), 2 as count()] -02)--PlaceholderRowExec +01)ProjectionExec: expr=[count(Int64(1))@0 as count(Int64(1)), count(Int64(1))@0 as count()] +02)--ProjectionExec: expr=[2 as count(Int64(1))] +03)----PlaceholderRowExec query II select count(1), count(*) from t; @@ -7967,8 +7968,9 @@ logical_plan 02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] 03)----TableScan: t projection=[] physical_plan -01)ProjectionExec: expr=[2 as count(Int64(1)), 2 as count(*)] -02)--PlaceholderRowExec +01)ProjectionExec: expr=[count(Int64(1))@0 as count(Int64(1)), count(Int64(1))@0 as count(*)] +02)--ProjectionExec: expr=[2 as count(Int64(1))] +03)----PlaceholderRowExec query II select count(), count(*) from t; @@ -7983,8 +7985,9 @@ logical_plan 02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] 03)----TableScan: t projection=[] physical_plan -01)ProjectionExec: expr=[2 as count(), 2 as count(*)] -02)--PlaceholderRowExec +01)ProjectionExec: expr=[count(Int64(1))@0 as count(), count(Int64(1))@0 as count(*)] +02)--ProjectionExec: expr=[2 as count(Int64(1))] +03)----PlaceholderRowExec query TT explain select count(1) * count(2) from t; From e0b5e574709dcb23d0e1b39ab12551925483df91 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 29 Jan 2026 16:09:37 -0500 Subject: [PATCH 6/7] add comment about literals and cse: --- datafusion/optimizer/src/common_subexpr_eliminate.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index d9273a8f60fb2..5d29892a23252 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -702,6 +702,11 @@ impl CSEController for ExprCSEController<'_> { #[expect(deprecated)] let is_normal_minus_aggregates = matches!( node, + // TODO: there's an argument for removing `Literal` from here, + // maybe using `Expr::placemement().should_push_to_leaves()` instead + // so that we extract common literals and don't broadcast them to num_batch_rows multiple times. + // However that currently breaks things like `percentile_cont()` which expect literal arguments + // (and would instead be getting `col(__common_expr_n)`). Expr::Literal(..) | Expr::Column(..) | Expr::ScalarVariable(..) From f7a7155a20aae792c9a8dd78fa42b36d1b5105ae Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sat, 31 Jan 2026 12:56:00 -0500 Subject: [PATCH 7/7] rename --- datafusion/expr-common/src/placement.rs | 18 +++-- datafusion/expr/src/expr.rs | 2 +- datafusion/expr/src/udf.rs | 4 +- datafusion/functions/src/core/getfield.rs | 68 +++++++++++++------ .../optimizer/src/optimize_projections/mod.rs | 2 +- .../physical-expr-common/src/physical_expr.rs | 4 +- datafusion/physical-plan/src/projection.rs | 4 +- 7 files changed, 68 insertions(+), 34 deletions(-) diff --git a/datafusion/expr-common/src/placement.rs b/datafusion/expr-common/src/placement.rs index 0bc30ac184101..8c4ff108214d5 100644 --- a/datafusion/expr-common/src/placement.rs +++ b/datafusion/expr-common/src/placement.rs @@ -29,10 +29,14 @@ pub enum ExpressionPlacement { Column, /// A cheap expression that can be pushed to leaf nodes in the plan. /// Examples include `get_field` for struct field access. - PlaceAtLeaves, - /// An expensive expression that should stay at the root of the plan. - /// This is the default for most expressions. - PlaceAtRoot, + /// Pushing these expressions down in the plan can reduce data early + /// at low compute cost. + /// See [`ExpressionPlacement::should_push_to_leaves`] for details. + MoveTowardsLeafNodes, + /// An expensive expression that should stay where it is in the plan + /// or possibly be moved closer to the root nodes (this is not implemented yet). + /// Examples include complex scalar functions or UDFs. + MoveTowardsRootNodes, } impl ExpressionPlacement { @@ -40,11 +44,11 @@ impl ExpressionPlacement { /// in the query plan. /// /// This returns true for: - /// - `Column`: Simple column references can be pushed down. They do no compute and do not increase or + /// - [`ExpressionPlacement::Column`]: Simple column references can be pushed down. They do no compute and do not increase or /// decrease the amount of data being processed. /// A projection that reduces the number of columns can eliminate unnecessary data early, /// but this method only considers one expression at a time, not a projection as a whole. - /// - `PlaceAtLeaves`: Cheap expressions can be pushed down to leaves to take advantage of + /// - [`ExpressionPlacement::MoveTowardsLeafNodes`]: Cheap expressions can be pushed down to leaves to take advantage of /// early computation and potential optimizations at the data source level. /// For example `struct_col['field']` is cheap to compute (just an Arc clone of the nested array for `'field'`) /// and thus can reduce data early in the plan at very low compute cost. @@ -53,7 +57,7 @@ impl ExpressionPlacement { pub fn should_push_to_leaves(&self) -> bool { matches!( self, - ExpressionPlacement::Column | ExpressionPlacement::PlaceAtLeaves + ExpressionPlacement::Column | ExpressionPlacement::MoveTowardsLeafNodes ) } } diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 6a795e5507de8..ec6a378b3c069 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -1550,7 +1550,7 @@ impl Expr { func.args.iter().map(|arg| arg.placement()).collect(); func.func.placement(&arg_placements) } - _ => ExpressionPlacement::PlaceAtRoot, + _ => ExpressionPlacement::MoveTowardsRootNodes, } } diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 440478e3be68c..01779b85568cd 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -978,13 +978,13 @@ pub trait ScalarUDFImpl: Debug + DynEq + DynHash + Send + Sync { /// This is used by optimizers to make decisions about expression placement, /// such as whether to push expressions down through projections. /// - /// The default implementation returns [`ExpressionPlacement::PlaceAtRoot`], + /// The default implementation returns [`ExpressionPlacement::MoveTowardsRootNodes`], /// meaning the expression should stay at the root of the plan. /// /// Override this method to indicate that the function can be pushed down /// closer to the data source. fn placement(&self, _args: &[ExpressionPlacement]) -> ExpressionPlacement { - ExpressionPlacement::PlaceAtRoot + ExpressionPlacement::MoveTowardsRootNodes } } diff --git a/datafusion/functions/src/core/getfield.rs b/datafusion/functions/src/core/getfield.rs index 9e42c888d4e60..d01ce82d522b6 100644 --- a/datafusion/functions/src/core/getfield.rs +++ b/datafusion/functions/src/core/getfield.rs @@ -505,13 +505,13 @@ impl ScalarUDFImpl for GetFieldFunc { // 1. The base (first arg) is a column or already placeable at leaves // 2. All field keys (remaining args) are literals if args.is_empty() { - return ExpressionPlacement::PlaceAtRoot; + return ExpressionPlacement::MoveTowardsRootNodes; } let base_placement = args[0]; let base_is_pushable = matches!( base_placement, - ExpressionPlacement::Column | ExpressionPlacement::PlaceAtLeaves + ExpressionPlacement::Column | ExpressionPlacement::MoveTowardsLeafNodes ); let all_keys_are_literals = args @@ -520,9 +520,9 @@ impl ScalarUDFImpl for GetFieldFunc { .all(|p| matches!(p, ExpressionPlacement::Literal)); if base_is_pushable && all_keys_are_literals { - ExpressionPlacement::PlaceAtLeaves + ExpressionPlacement::MoveTowardsLeafNodes } else { - ExpressionPlacement::PlaceAtRoot + ExpressionPlacement::MoveTowardsRootNodes } } } @@ -575,7 +575,10 @@ mod tests { // get_field(col, 'literal') -> leaf-pushable (static field access) let args = vec![ExpressionPlacement::Column, ExpressionPlacement::Literal]; - assert_eq!(func.placement(&args), ExpressionPlacement::PlaceAtLeaves); + assert_eq!( + func.placement(&args), + ExpressionPlacement::MoveTowardsLeafNodes + ); // get_field(col, 'a', 'b') -> leaf-pushable (nested static field access) let args = vec![ @@ -583,14 +586,20 @@ mod tests { ExpressionPlacement::Literal, ExpressionPlacement::Literal, ]; - assert_eq!(func.placement(&args), ExpressionPlacement::PlaceAtLeaves); + assert_eq!( + func.placement(&args), + ExpressionPlacement::MoveTowardsLeafNodes + ); - // get_field(get_field(col, 'a'), 'b') represented as PlaceAtLeaves for base + // get_field(get_field(col, 'a'), 'b') represented as MoveTowardsLeafNodes for base let args = vec![ - ExpressionPlacement::PlaceAtLeaves, + ExpressionPlacement::MoveTowardsLeafNodes, ExpressionPlacement::Literal, ]; - assert_eq!(func.placement(&args), ExpressionPlacement::PlaceAtLeaves); + assert_eq!( + func.placement(&args), + ExpressionPlacement::MoveTowardsLeafNodes + ); } #[test] @@ -599,7 +608,10 @@ mod tests { // get_field(col, other_col) -> NOT leaf-pushable (dynamic per-row lookup) let args = vec![ExpressionPlacement::Column, ExpressionPlacement::Column]; - assert_eq!(func.placement(&args), ExpressionPlacement::PlaceAtRoot); + assert_eq!( + func.placement(&args), + ExpressionPlacement::MoveTowardsRootNodes + ); // get_field(col, 'a', other_col) -> NOT leaf-pushable (dynamic nested lookup) let args = vec![ @@ -607,7 +619,10 @@ mod tests { ExpressionPlacement::Literal, ExpressionPlacement::Column, ]; - assert_eq!(func.placement(&args), ExpressionPlacement::PlaceAtRoot); + assert_eq!( + func.placement(&args), + ExpressionPlacement::MoveTowardsRootNodes + ); } #[test] @@ -616,17 +631,23 @@ mod tests { // get_field(root_expr, 'literal') -> NOT leaf-pushable let args = vec![ - ExpressionPlacement::PlaceAtRoot, + ExpressionPlacement::MoveTowardsRootNodes, ExpressionPlacement::Literal, ]; - assert_eq!(func.placement(&args), ExpressionPlacement::PlaceAtRoot); + assert_eq!( + func.placement(&args), + ExpressionPlacement::MoveTowardsRootNodes + ); // get_field(col, root_expr) -> NOT leaf-pushable let args = vec![ ExpressionPlacement::Column, - ExpressionPlacement::PlaceAtRoot, + ExpressionPlacement::MoveTowardsRootNodes, ]; - assert_eq!(func.placement(&args), ExpressionPlacement::PlaceAtRoot); + assert_eq!( + func.placement(&args), + ExpressionPlacement::MoveTowardsRootNodes + ); } #[test] @@ -634,14 +655,23 @@ mod tests { let func = GetFieldFunc::new(); // Empty args -> NOT leaf-pushable - assert_eq!(func.placement(&[]), ExpressionPlacement::PlaceAtRoot); + assert_eq!( + func.placement(&[]), + ExpressionPlacement::MoveTowardsRootNodes + ); - // Just base, no key -> PlaceAtLeaves (not a valid call but should handle gracefully) + // Just base, no key -> MoveTowardsLeafNodes (not a valid call but should handle gracefully) let args = vec![ExpressionPlacement::Column]; - assert_eq!(func.placement(&args), ExpressionPlacement::PlaceAtLeaves); + assert_eq!( + func.placement(&args), + ExpressionPlacement::MoveTowardsLeafNodes + ); // Literal base with literal key -> NOT leaf-pushable (would be constant-folded) let args = vec![ExpressionPlacement::Literal, ExpressionPlacement::Literal]; - assert_eq!(func.placement(&args), ExpressionPlacement::PlaceAtRoot); + assert_eq!( + func.placement(&args), + ExpressionPlacement::MoveTowardsRootNodes + ); } } diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs index d618e3680dd7d..13df07a35144d 100644 --- a/datafusion/optimizer/src/optimize_projections/mod.rs +++ b/datafusion/optimizer/src/optimize_projections/mod.rs @@ -525,7 +525,7 @@ fn merge_consecutive_projections(proj: Projection) -> Result ExpressionPlacement { - ExpressionPlacement::PlaceAtRoot + ExpressionPlacement::MoveTowardsRootNodes } } diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs index ec90213b1ec3e..d15dcd5612f7d 100644 --- a/datafusion/physical-plan/src/projection.rs +++ b/datafusion/physical-plan/src/projection.rs @@ -288,7 +288,7 @@ impl ExecutionPlan for ProjectionExec { .all(|proj_expr| { !matches!( proj_expr.expr.placement(), - ExpressionPlacement::PlaceAtRoot + ExpressionPlacement::MoveTowardsRootNodes ) }); // If expressions are all either column_expr or Literal (or other cheap expressions), @@ -1007,7 +1007,7 @@ fn try_unifying_projections( .unwrap(); }); // Merging these projections is not beneficial, e.g - // If an expression is not trivial (PlaceAtRoot) and it is referred more than 1, unifies projections will be + // If an expression is not trivial (MoveTowardsRootNodes) and it is referred more than 1, unifies projections will be // beneficial as caching mechanism for non-trivial computations. // See discussion in: https://github.com/apache/datafusion/issues/8296 if column_ref_map.iter().any(|(column, count)| {