Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 3 additions & 19 deletions datafusion/expr/src/expr_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ use arrow::datatypes::{DataType, Field, FieldRef};
use datafusion_common::datatype::FieldExt;
use datafusion_common::metadata::FieldMetadata;
use datafusion_common::{
Column, DataFusionError, ExprSchema, Result, ScalarValue, Spans, TableReference,
not_impl_err, plan_datafusion_err, plan_err,
Column, ExprSchema, Result, ScalarValue, Spans, TableReference, not_impl_err,
plan_err,
};
use datafusion_expr_common::type_coercion::binary::BinaryTypeCoercer;
use datafusion_functions_window_common::field::WindowUDFFieldArgs;
Expand Down Expand Up @@ -628,23 +628,7 @@ fn verify_function_arguments<F: UDFCoercionExt>(
input_fields: &[FieldRef],
) -> Result<Vec<FieldRef>> {
fields_with_udf(input_fields, function).map_err(|err| {
let data_types = input_fields
.iter()
.map(|f| f.data_type())
.cloned()
.collect::<Vec<_>>();
plan_datafusion_err!(
"{} {}",
match err {
DataFusionError::Plan(msg) => msg,
err => err.to_string(),
},
utils::generate_signature_error_message(
function.name(),
function.signature(),
&data_types
)
)
utils::generate_signature_error_message(function, input_fields, err)
})
}

Expand Down
16 changes: 5 additions & 11 deletions datafusion/expr/src/type_coercion/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use datafusion_common::utils::{
ListCoercion, base_type, coerced_fixed_size_list_to_list,
};
use datafusion_common::{
Result, exec_err, internal_err, plan_err, types::NativeType, utils::list_ndims,
Result, internal_err, plan_err, types::NativeType, utils::list_ndims,
};
use datafusion_expr_common::signature::ArrayFunctionArgument;
use datafusion_expr_common::type_coercion::binary::type_union_resolution;
Expand Down Expand Up @@ -313,16 +313,10 @@ fn get_valid_types_with_udf<F: UDFCoercionExt>(
func: &F,
) -> Result<Vec<Vec<DataType>>> {
let valid_types = match signature {
TypeSignature::UserDefined => match func.coerce_types(current_types) {
Ok(coerced_types) => vec![coerced_types],
Err(e) => {
return exec_err!(
"Function '{}' user-defined coercion failed with {:?}",
func.name(),
e.strip_backtrace()
);
}
},
TypeSignature::UserDefined => {
let coerced_types = func.coerce_types(current_types)?;
vec![coerced_types]
}
TypeSignature::OneOf(signatures) => {
let mut res = vec![];
let mut errors = vec![];
Expand Down
131 changes: 104 additions & 27 deletions datafusion/expr/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,20 @@ use std::sync::Arc;

use crate::expr::{Alias, Sort, WildcardOptions, WindowFunctionParams};
use crate::expr_rewriter::strip_outer_reference;
use crate::type_coercion::functions::UDFCoercionExt;
use crate::{
BinaryExpr, Expr, ExprSchemable, Filter, GroupingSet, LogicalPlan, Operator, and,
};
use datafusion_expr_common::signature::{Signature, TypeSignature};

use arrow::datatypes::{DataType, Field, Schema};
use arrow::datatypes::{DataType, Field, FieldRef, Schema};
use datafusion_common::tree_node::{
Transformed, TransformedResult, TreeNode, TreeNodeRecursion,
};
use datafusion_common::utils::get_at_indices;
use datafusion_common::{
Column, DFSchema, DFSchemaRef, HashMap, Result, TableReference, internal_err,
plan_err,
Column, DFSchema, DFSchemaRef, DataFusionError, HashMap, Result, TableReference,
internal_err, plan_datafusion_err, plan_err,
};

#[cfg(not(feature = "sql"))]
Expand Down Expand Up @@ -971,13 +972,57 @@ pub fn generate_signature_error_msg(
/// round(Float64)
/// round(Float32)
/// ```
pub(crate) fn generate_signature_error_message(
func_name: &str,
func_signature: &Signature,
input_expr_types: &[DataType],
) -> String {
#[expect(deprecated)]
generate_signature_error_msg(func_name, func_signature.clone(), input_expr_types)
pub(crate) fn generate_signature_error_message<F: UDFCoercionExt>(
function: &F,
input_fields: &[FieldRef],
original_error: DataFusionError,
) -> DataFusionError {
let data_types = input_fields
.iter()
.map(|f| f.data_type())
.cloned()
.collect::<Vec<_>>();
let function_call_str = format!(
"{}({})",
function.name(),
TypeSignature::join_types(&data_types, ", "),
);

// UserDefined signatures are better served returning the original error, since
// we can't know what went wrong in the custom code
if function.signature().type_signature == TypeSignature::UserDefined {
let original_error = match original_error {
// Since we're returning a Plan error we don't want any double nesting.
// TODO: is there a better way to strip backtrace & the planning prefix?
err @ DataFusionError::Plan(_) => err
.strip_backtrace()
.strip_prefix("Error during planning: ")
.unwrap()
.to_string(),
err => err.strip_backtrace(),
};
return plan_datafusion_err!(
"User-defined coercion of function call '{function_call_str}' failed with:\n{original_error}"
);
}

// For other signature types, we don't rely on what specifically went wrong;
// we will only show the valid signatures to better display what is allowed
// (instead of enumerating everything that potentially went wrong).
drop(original_error);

let candidate_signatures = function
.signature()
.type_signature
.to_string_repr_with_names(function.signature().parameter_names.as_deref())
.iter()
.map(|args_str| format!("\t{}({args_str})", function.name()))
.collect::<Vec<String>>()
.join("\n");

plan_datafusion_err!(
"Failed to coerce function call '{function_call_str}'. You might need to add explicit type casts.\n\tCandidate functions:\n{candidate_signatures}"
)
}

/// Splits a conjunctive [`Expr`] such as `A AND B AND C` => `[A, B, C]`
Expand Down Expand Up @@ -1735,6 +1780,35 @@ mod tests {
assert!(!can_hash(&list_union_type));
}

struct MockUdf(Signature);

impl UDFCoercionExt for MockUdf {
fn name(&self) -> &str {
"mock"
}

fn signature(&self) -> &Signature {
&self.0
}

fn coerce_types(&self, _arg_types: &[DataType]) -> Result<Vec<DataType>> {
unimplemented!()
}
}

#[test]
fn test_generate_signature_error_msg_user_defined() {
let error = generate_signature_error_message(
&MockUdf(Signature::user_defined(Volatility::Immutable)),
&[Field::new("name", DataType::Int32, true).into()],
DataFusionError::Plan("Expected 'mock' to fail".to_string()),
);

let expected = "Error during planning: User-defined coercion of function call 'mock(Int32)' failed with:
Expected 'mock' to fail";
assert!(error.to_string().starts_with(expected));
}

#[test]
fn test_generate_signature_error_msg_with_parameter_names() {
let sig = Signature::one_of(
Expand All @@ -1755,18 +1829,17 @@ mod tests {
])
.expect("valid parameter names");

// Generate error message with only 1 argument provided
let error_msg =
generate_signature_error_message("substr", &sig, &[DataType::Utf8]);

assert!(
error_msg.contains("str: Utf8, start_pos: Int64"),
"Expected 'str: Utf8, start_pos: Int64' in error message, got: {error_msg}"
);
assert!(
error_msg.contains("str: Utf8, start_pos: Int64, length: Int64"),
"Expected 'str: Utf8, start_pos: Int64, length: Int64' in error message, got: {error_msg}"
let error = generate_signature_error_message(
&MockUdf(sig),
&[Field::new("name", DataType::Utf8, true).into()],
DataFusionError::Plan("".to_string()),
);

let expected = "Error during planning: Failed to coerce function call 'mock(Utf8)'. You might need to add explicit type casts.
\tCandidate functions:
\tmock(str: Utf8, start_pos: Int64)
\tmock(str: Utf8, start_pos: Int64, length: Int64)";
assert!(error.to_string().starts_with(expected));
}

#[test]
Expand All @@ -1776,12 +1849,16 @@ mod tests {
Volatility::Immutable,
);

let error_msg =
generate_signature_error_message("my_func", &sig, &[DataType::Int32]);

assert!(
error_msg.contains("Any, Any"),
"Expected 'Any, Any' without parameter names, got: {error_msg}"
let error = generate_signature_error_message(
&MockUdf(sig),
&[Field::new("name", DataType::Int32, true).into()],
DataFusionError::Plan("".to_string()),
);

let expected = "Error during planning: Failed to coerce function call 'mock(Int32)'. You might need to add explicit type casts.
\tCandidate functions:
\tmock(Any, Any)
\tmock(Any, Any, Any)";
assert!(error.to_string().starts_with(expected));
}
}
4 changes: 2 additions & 2 deletions datafusion/optimizer/src/analyzer/type_coercion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1830,7 +1830,7 @@ mod test {

let err = Projection::try_new(vec![udaf], empty).err().unwrap();
assert!(
err.strip_backtrace().starts_with("Error during planning: Failed to coerce arguments to satisfy a call to 'MY_AVG' function: coercion from Utf8 to the signature Uniform(1, [Float64]) failed")
err.strip_backtrace().starts_with("Error during planning: Failed to coerce function call 'MY_AVG(Utf8)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tMY_AVG(Float64)")
);
Ok(())
}
Expand Down Expand Up @@ -1891,7 +1891,7 @@ mod test {
.err()
.unwrap()
.strip_backtrace();
assert!(err.starts_with("Error during planning: Failed to coerce arguments to satisfy a call to 'avg' function: coercion from Utf8 to the signature Uniform(1, [Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64]) failed"));
assert!(err.starts_with("Error during planning: Failed to coerce function call 'avg(Utf8)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tavg(Int8/Int16/Int32/Int64/UInt8/UInt16/UInt32/UInt64/Float32/Float64)"));
Ok(())
}

Expand Down
15 changes: 9 additions & 6 deletions datafusion/sql/tests/sql_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4744,41 +4744,44 @@ fn error_message_test(sql: &str, err_msg_starts_with: &str) {
fn test_error_message_invalid_scalar_function_signature() {
error_message_test(
"select sqrt()",
"Error during planning: 'sqrt' does not support zero arguments",
"Error during planning: Failed to coerce function call 'sqrt()'",
);
error_message_test(
"select sqrt(1, 2)",
"Error during planning: Failed to coerce arguments",
"Error during planning: Failed to coerce function call 'sqrt(Int64, Int64)'",
);
}

#[test]
fn test_error_message_invalid_aggregate_function_signature() {
error_message_test(
"select sum()",
"Error during planning: Execution error: Function 'sum' user-defined coercion failed with \"Execution error: sum function requires 1 argument, got 0\"",
"Error during planning: User-defined coercion of function call 'sum()' failed with:
Execution error: sum function requires 1 argument, got 0",
);
// We keep two different prefixes because they clarify each other.
// It might be incorrect, and we should consider keeping only one.
error_message_test(
"select max(9, 3)",
"Error during planning: Execution error: Function 'max' user-defined coercion failed",
"Error during planning: User-defined coercion of function call 'max(Int64, Int64)' failed with:
Execution error: min/max was called with 2 arguments. It requires only 1.",
);
}

#[test]
fn test_error_message_invalid_window_function_signature() {
error_message_test(
"select rank(1) over()",
"Error during planning: The function 'rank' expected zero argument but received 1",
"Error during planning: Failed to coerce function call 'rank(Int64)'",
);
}

#[test]
fn test_error_message_invalid_window_aggregate_function_signature() {
error_message_test(
"select sum() over()",
"Error during planning: Execution error: Function 'sum' user-defined coercion failed with \"Execution error: sum function requires 1 argument, got 0\"",
"Error during planning: User-defined coercion of function call 'sum()' failed with:
Execution error: sum function requires 1 argument, got 0",
);
}

Expand Down
12 changes: 6 additions & 6 deletions datafusion/sqllogictest/test_files/aggregate.slt
Original file line number Diff line number Diff line change
Expand Up @@ -144,26 +144,26 @@ statement error DataFusion error: Schema error: Schema contains duplicate unqual
SELECT approx_distinct(c9) count_c9, approx_distinct(cast(c9 as varchar)) count_c9_str FROM aggregate_test_100

# csv_query_approx_percentile_cont_with_weight
statement error Failed to coerce arguments to satisfy a call to 'approx_percentile_cont_with_weight' function
statement error Failed to coerce function call 'approx_percentile_cont_with_weight\(Utf8View, Int8, Float64\)'
SELECT approx_percentile_cont_with_weight(c2, 0.95) WITHIN GROUP (ORDER BY c1) FROM aggregate_test_100

statement error Failed to coerce arguments to satisfy a call to 'approx_percentile_cont_with_weight' function
statement error Failed to coerce function call 'approx_percentile_cont_with_weight\(Int16, Utf8View, Float64\)'
SELECT approx_percentile_cont_with_weight(c1, 0.95) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100

statement error Failed to coerce arguments to satisfy a call to 'approx_percentile_cont_with_weight' function
statement error Failed to coerce function call 'approx_percentile_cont_with_weight\(Int16, Int8, Utf8View\)'
SELECT approx_percentile_cont_with_weight(c2, c1) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100

# csv_query_approx_percentile_cont_with_histogram_bins
statement error DataFusion error: Error during planning: Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be UInt > 0 literal \(got data type Int64\)\.
SELECT c1, approx_percentile_cont(0.95, -1000) WITHIN GROUP (ORDER BY c3) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1

statement error Failed to coerce arguments to satisfy a call to 'approx_percentile_cont' function
statement error Failed to coerce function call 'approx_percentile_cont\(Int16, Float64, Utf8View\)'
SELECT approx_percentile_cont(0.95, c1) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100

statement error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'approx_percentile_cont' function: coercion from Int16, Float64, Float64 to the signature OneOf(.*) failed(.|\n)*
statement error Failed to coerce function call 'approx_percentile_cont\(Int16, Float64, Float64\)'
SELECT approx_percentile_cont(0.95, 111.1) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100

statement error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'approx_percentile_cont' function: coercion from Float64, Float64, Float64 to the signature OneOf(.*) failed(.|\n)*
statement error Failed to coerce function call 'approx_percentile_cont\(Float64, Float64, Float64\)'
SELECT approx_percentile_cont(0.95, 111.1) WITHIN GROUP (ORDER BY c12) FROM aggregate_test_100

statement error DataFusion error: Error during planning: Percentile value for 'APPROX_PERCENTILE_CONT' must be a literal
Expand Down
Loading