From f3ece734f404962678ad93347272245cddbffb27 Mon Sep 17 00:00:00 2001 From: gstvg <28798827+gstvg@users.noreply.github.com> Date: Thu, 30 Apr 2026 03:38:18 +0000 Subject: [PATCH 1/3] minor: simplify coerce_values_for_lambdas usage --- datafusion/expr/src/higher_order_function.rs | 28 ++++--------------- .../expr/src/type_coercion/functions.rs | 25 ++++++++--------- 2 files changed, 17 insertions(+), 36 deletions(-) diff --git a/datafusion/expr/src/higher_order_function.rs b/datafusion/expr/src/higher_order_function.rs index 0e238ffc65f1e..aafdf898550ce 100644 --- a/datafusion/expr/src/higher_order_function.rs +++ b/datafusion/expr/src/higher_order_function.rs @@ -74,8 +74,6 @@ pub struct HigherOrderSignature { pub type_signature: HigherOrderTypeSignature, /// The volatility of the function. See [Volatility] for more information. pub volatility: Volatility, - /// Whether [HigherOrderUDF::coerce_values_for_lambdas] should be called - pub coerce_values_for_lambdas: bool, /// The max number of times to call [HigherOrderUDF::lambda_parameters] before raising an error. /// Used to guard against implementations that causes an infinite loop by endlessly returning /// [LambdaParametersProgress::Partial]. Defaults to 256 @@ -90,7 +88,6 @@ impl HigherOrderSignature { HigherOrderSignature { type_signature, volatility, - coerce_values_for_lambdas: false, lambda_parameters_max_iterations: LAMBDA_PARAMETERS_MAX_ITERATIONS, } } @@ -100,7 +97,6 @@ impl HigherOrderSignature { Self { type_signature: HigherOrderTypeSignature::UserDefined, volatility, - coerce_values_for_lambdas: false, lambda_parameters_max_iterations: LAMBDA_PARAMETERS_MAX_ITERATIONS, } } @@ -110,7 +106,6 @@ impl HigherOrderSignature { Self { type_signature: HigherOrderTypeSignature::VariadicAny, volatility, - coerce_values_for_lambdas: false, lambda_parameters_max_iterations: LAMBDA_PARAMETERS_MAX_ITERATIONS, } } @@ -120,18 +115,9 @@ impl HigherOrderSignature { Self { type_signature: HigherOrderTypeSignature::Any(arg_count), volatility, - coerce_values_for_lambdas: false, lambda_parameters_max_iterations: LAMBDA_PARAMETERS_MAX_ITERATIONS, } } - - /// Set [Self::coerce_values_for_lambdas] to true to indicate that [HigherOrderUDF::coerce_values_for_lambdas] - /// should be called - pub fn with_coerce_values_for_lambdas(mut self) -> Self { - self.coerce_values_for_lambdas = true; - - self - } } impl PartialEq for dyn HigherOrderUDF { @@ -518,7 +504,7 @@ pub trait HigherOrderUDF: Debug + DynEq + DynHash + Send + Sync + Any { /// /// The implementation can assume that some other part of the code has coerced /// the actual argument types to match [`Self::signature`], except the coercion defined by - /// [Self::coerce_values_for_lambdas], if applicable. + /// [Self::coerce_values_for_lambdas]. /// /// [`HigherOrderFunction`]: crate::expr::HigherOrderFunction /// [`HigherOrderFunction::lambda_parameters`]: crate::expr::HigherOrderFunction::lambda_parameters @@ -531,8 +517,7 @@ pub trait HigherOrderUDF: Debug + DynEq + DynHash + Send + Sync + Any { /// Coerce value arguments of a function call to types that the function can evaluate also taking into /// account the *output type of it's lambdas*. This differs from [HigherOrderUDF::coerce_value_types] /// that only has access to the type of it's value arguments because it's called before the output type - /// of lambdas are known. So that this method is called, the function must have it's - /// [HigherOrderSignature::coerce_values_for_lambdas] set to true + /// of lambdas are known. /// /// See the [type coercion module](crate::type_coercion) /// documentation for more details on type coercion @@ -552,18 +537,15 @@ pub trait HigherOrderUDF: Debug + DynEq + DynHash + Send + Sync + Any { fn coerce_values_for_lambdas( &self, _fields: &[ValueOrLambda], - ) -> Result> { - not_impl_err!( - "{} coerce_values_for_lambdas is not implemented", - self.name() - ) + ) -> Result>> { + Ok(None) } /// What type will be returned by this function, given the arguments? /// /// The implementation can assume that some other part of the code has coerced /// the actual argument types to match [`Self::signature`], including the coercion - /// defined by [Self::coerce_values_for_lambdas], if applicable. + /// defined by [Self::coerce_values_for_lambdas]. /// /// # Example creating `Field` /// diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 86616daf08c73..5e1c4f332161d 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -251,16 +251,16 @@ pub fn value_fields_with_higher_order_udf_and_lambdas( ) -> Result>> { let mut new_fields = value_fields_with_higher_order_udf(current_fields, func)?; - if func.signature().coerce_values_for_lambdas { - let new_types = new_fields - .iter() - .map(|f| match f { - ValueOrLambda::Value(f) => ValueOrLambda::Value(f.data_type().clone()), - ValueOrLambda::Lambda(f) => ValueOrLambda::Lambda(f.data_type().clone()), - }) - .collect::>(); + let new_types = new_fields + .iter() + .map(|f| match f { + ValueOrLambda::Value(f) => ValueOrLambda::Value(f.data_type().clone()), + ValueOrLambda::Lambda(f) => ValueOrLambda::Lambda(f.data_type().clone()), + }) + .collect::>(); - let mut new_value_types = func.coerce_values_for_lambdas(&new_types)?.into_iter(); + if let Some(new_value_types) = func.coerce_values_for_lambdas(&new_types)? { + let mut new_value_types = new_value_types.into_iter(); let value_types_count = new_types .iter() @@ -1851,7 +1851,7 @@ mod tests { fn coerce_values_for_lambdas( &self, fields: &[ValueOrLambda], - ) -> Result> { + ) -> Result>> { // thoerical impl of array_reduce without finish let [ ValueOrLambda::Value(list), @@ -1862,7 +1862,7 @@ mod tests { unreachable!() }; - Ok(vec![list.clone(), merge.clone()]) + Ok(Some(vec![list.clone(), merge.clone()])) } fn lambda_parameters( @@ -1925,8 +1925,7 @@ mod tests { #[test] fn test_higher_order_function_coerce_values_for_lambdas() { let fun = MockHigherOrderUDF { - signature: HigherOrderSignature::variadic_any(Volatility::Immutable) - .with_coerce_values_for_lambdas(), + signature: HigherOrderSignature::variadic_any(Volatility::Immutable), coerced_value_types: vec![], }; From 030c2fba2aa52ee2e4837a3e00da92f65f683b05 Mon Sep 17 00:00:00 2001 From: gstvg <28798827+gstvg@users.noreply.github.com> Date: Tue, 12 May 2026 18:14:02 -0300 Subject: [PATCH 2/3] fix doc comment --- datafusion/expr/src/higher_order_function.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/datafusion/expr/src/higher_order_function.rs b/datafusion/expr/src/higher_order_function.rs index aafdf898550ce..e17bf4a2e841c 100644 --- a/datafusion/expr/src/higher_order_function.rs +++ b/datafusion/expr/src/higher_order_function.rs @@ -526,12 +526,13 @@ pub trait HigherOrderUDF: Debug + DynEq + DynHash + Send + Sync + Any { /// * `fields`: The argument types of the value arguments of this function, or the output type of lambdas /// /// # Return value - /// A Vec with the same number of [ValueOrLambda::Value] in `fields`. DataFusion will `CAST` the - /// function call arguments to these specific types. + /// If `Some`, contains a Vec with the same number of [ValueOrLambda::Value] in `fields`. + /// DataFusion will `CAST` the function call arguments to these specific types. If `None`, no + /// coercion will be applied beyond the one defined by the function signature. /// /// For example, a flexible array_reduce implementation (see [Self::lambda_parameters] docs), when working /// with the expression below, may want to coerce it's initial value argument, the *integer* `0`, - /// to match the output it's merge function, which is a *float*: + /// to match the output of it's merge function, which is a *float*: /// /// `array_reduce([1.2, 2.1], 0, (acc, v) -> acc + v + 1.5, v -> v > 2.0)` fn coerce_values_for_lambdas( From ba110728414289a3ddf2dafac31e193e850abfb5 Mon Sep 17 00:00:00 2001 From: gstvg <28798827+gstvg@users.noreply.github.com> Date: Thu, 14 May 2026 07:51:52 -0300 Subject: [PATCH 3/3] fix docs to reflect new api --- datafusion/expr/src/higher_order_function.rs | 4 ++-- datafusion/expr/src/type_coercion/functions.rs | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/datafusion/expr/src/higher_order_function.rs b/datafusion/expr/src/higher_order_function.rs index e17bf4a2e841c..c5e5275926cc3 100644 --- a/datafusion/expr/src/higher_order_function.rs +++ b/datafusion/expr/src/higher_order_function.rs @@ -489,12 +489,12 @@ pub trait HigherOrderUDF: Debug + DynEq + DynHash + Send + Sync + Any { /// /// assert_eq!( /// coerce_to, - /// vec![ + /// Some(vec![ /// // return the same type for the array being reduced /// DataType::new_list(DataType::Float32, true), /// // coerce the initial value to the output of the merge lambda /// DataType::Float32, - /// ] + /// ]) /// ); /// /// ``` diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 5e1c4f332161d..8c26f23dafe81 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -158,9 +158,9 @@ pub fn fields_with_udf( /// argument must be coerced to match `signature`. /// For lambda arguments, returns a clone of the associated data /// -/// Note this does not invokes [HigherOrderUDF::coerce_values_for_lambdas] -/// if requested by the function signature. If that's required, use -/// [value_fields_with_higher_order_udf_and_lambdas] instead +/// Note this does not invokes [HigherOrderUDF::coerce_values_for_lambdas]. +/// If that's required, use [value_fields_with_higher_order_udf_and_lambdas] +/// instead /// /// For more details on coercion in general, please see the /// [`type_coercion`](crate::type_coercion) module. @@ -235,8 +235,8 @@ pub fn value_fields_with_higher_order_udf( /// Performs type coercion for higher order function arguments, /// including those defined by [HigherOrderUDF::coerce_values_for_lambdas], -/// if defined by the signature. Note that compared to -/// [value_fields_with_higher_order_udf], this function requires +/// if it returns `Some(...)` instead of the default `None`. Note that +/// compared to [value_fields_with_higher_order_udf], this function requires /// the [ValueOrLambda::Lambda] variant to contain the output field of the lambda. /// /// For value arguments, returns the field to which each