From e6716d74ddeaa023e9889ea3e4965737f69e05c3 Mon Sep 17 00:00:00 2001 From: buraksenn Date: Thu, 5 Mar 2026 15:12:55 +0300 Subject: [PATCH 01/10] attach span to cte and diagnostic --- datafusion/sql/src/cte.rs | 21 ++++-- datafusion/sql/src/planner.rs | 26 ++++++-- datafusion/sql/src/select.rs | 84 ++++++++++++++++++++---- datafusion/sql/tests/cases/diagnostic.rs | 54 +++++++++++++++ 4 files changed, 165 insertions(+), 20 deletions(-) diff --git a/datafusion/sql/src/cte.rs b/datafusion/sql/src/cte.rs index 18766d7056355..bbf76dd69bdb6 100644 --- a/datafusion/sql/src/cte.rs +++ b/datafusion/sql/src/cte.rs @@ -20,7 +20,7 @@ use std::sync::Arc; use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; use datafusion_common::{ - Result, not_impl_err, plan_err, + Diagnostic, Result, Span, not_impl_err, plan_err, tree_node::{TreeNode, TreeNodeRecursion}, }; use datafusion_expr::{LogicalPlan, LogicalPlanBuilder, TableSource}; @@ -37,10 +37,24 @@ impl SqlToRel<'_, S> { for cte in with.cte_tables { // A `WITH` block can't use the same name more than once let cte_name = self.ident_normalizer.normalize(cte.alias.name.clone()); + let cte_name_span = + Span::try_from_sqlparser_span(cte.alias.name.span); if planner_context.contains_cte(&cte_name) { + let first_span = planner_context.get_cte_span(&cte_name); + let mut diagnostic = Diagnostic::new_error( + format!( + "WITH query name {cte_name:?} specified more than once" + ), + cte_name_span, + ); + if let Some(first_span) = first_span { + diagnostic = + diagnostic.with_note("previously defined here", Some(first_span)); + } return plan_err!( "WITH query name {cte_name:?} specified more than once" - ); + ) + .map_err(|e| e.with_diagnostic(diagnostic)); } // Create a logical plan for the CTE @@ -53,8 +67,7 @@ impl SqlToRel<'_, S> { // Each `WITH` block can change the column names in the last // projection (e.g. "WITH table(t1, t2) AS SELECT 1, 2"). let final_plan = self.apply_table_alias(cte_plan, cte.alias)?; - // Export the CTE to the outer query - planner_context.insert_cte(cte_name, final_plan); + planner_context.insert_cte_with_span(cte_name, final_plan, cte_name_span); } Ok(()) } diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 307f28e8ff9ad..6c813073994d1 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -27,7 +27,7 @@ use datafusion_common::TableReference; use datafusion_common::config::SqlParserOptions; use datafusion_common::datatype::{DataTypeExt, FieldExt}; use datafusion_common::error::add_possible_columns_to_diag; -use datafusion_common::{DFSchema, DataFusionError, Result, not_impl_err, plan_err}; +use datafusion_common::{DFSchema, DataFusionError, Result, Span, not_impl_err, plan_err}; use datafusion_common::{ DFSchemaRef, Diagnostic, SchemaError, field_not_found, internal_err, plan_datafusion_err, @@ -258,9 +258,9 @@ pub struct PlannerContext { /// Data types for numbered parameters ($1, $2, etc), if supplied /// in `PREPARE` statement prepare_param_data_types: Arc>, - /// Map of CTE name to logical plan of the WITH clause. + /// Map of CTE name to logical plan of the WITH clause and optional span. /// Use `Arc` to allow cheap cloning - ctes: HashMap>, + ctes: HashMap, Option)>, /// The queries schemas of outer query relations, used to resolve the outer referenced /// columns in subquery (recursive aware) @@ -387,19 +387,35 @@ impl PlannerContext { /// Subquery for the specified name pub fn insert_cte(&mut self, cte_name: impl Into, plan: LogicalPlan) { let cte_name = cte_name.into(); - self.ctes.insert(cte_name, Arc::new(plan)); + self.ctes.insert(cte_name, (Arc::new(plan), None)); + } + + /// Inserts a LogicalPlan with an optional span for the CTE + pub(super) fn insert_cte_with_span( + &mut self, + cte_name: impl Into, + plan: LogicalPlan, + span: Option, + ) { + let cte_name = cte_name.into(); + self.ctes.insert(cte_name, (Arc::new(plan), span)); } /// Return a plan for the Common Table Expression (CTE) / Subquery for the /// specified name pub fn get_cte(&self, cte_name: &str) -> Option<&LogicalPlan> { - self.ctes.get(cte_name).map(|cte| cte.as_ref()) + self.ctes.get(cte_name).map(|(cte, _)| cte.as_ref()) } /// Remove the plan of CTE / Subquery for the specified name pub(super) fn remove_cte(&mut self, cte_name: &str) { self.ctes.remove(cte_name); } + + /// Get the span of a previously defined CTE name + pub(super) fn get_cte_span(&self, name: &str) -> Option { + self.ctes.get(name).and_then(|(_, span)| *span) + } } /// SQL query planner and binder diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index edf4b9ef79e83..b7074a7ae554c 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; use std::ops::ControlFlow; use std::sync::Arc; @@ -29,7 +29,7 @@ use crate::utils::{ use datafusion_common::error::DataFusionErrorBuilder; use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion}; -use datafusion_common::{Column, DFSchema, Result, not_impl_err, plan_err}; +use datafusion_common::{Column, DFSchema, Diagnostic, Result, Span, not_impl_err, plan_err}; use datafusion_common::{RecursionUnnestOption, UnnestOptions}; use datafusion_expr::expr::{Alias, PlannedReplaceSelectItem, WildcardOptions}; use datafusion_expr::expr_rewriter::{ @@ -50,7 +50,9 @@ use sqlparser::ast::{ SelectItemQualifiedWildcardKind, WildcardAdditionalOptions, WindowType, visit_expressions_mut, }; -use sqlparser::ast::{NamedWindowDefinition, Select, SelectItem, TableWithJoins}; +use sqlparser::ast::{ + NamedWindowDefinition, Select, SelectItem, Spanned, TableFactor, TableWithJoins, +}; /// Result of the `aggregate` function, containing the aggregate plan and /// rewritten expressions that reference the aggregate output columns. @@ -690,21 +692,81 @@ impl SqlToRel<'_, S> { self.plan_table_with_joins(input, planner_context) } _ => { + let extract_table_name = + |t: &TableWithJoins| -> Option<(String, Option)> { + let span = + Span::try_from_sqlparser_span(t.relation.span()); + match &t.relation { + TableFactor::Table { alias: Some(a), .. } => { + let name = self + .ident_normalizer + .normalize(a.name.clone()); + Some((name, span)) + } + TableFactor::Table { name, alias: None, .. } => { + let table_name = name + .0 + .iter() + .filter_map(|p| p.as_ident()) + .map(|id| { + self.ident_normalizer + .normalize(id.clone()) + }) + .last()?; + Some((table_name, span)) + } + _ => None, + } + }; + + let mut alias_spans: HashMap> = + HashMap::new(); + let mut from = from.into_iter(); + let first = from.next().unwrap(); + + if let Some((name, span)) = extract_table_name(&first) { + alias_spans.entry(name).or_insert(span); + } + + let mut left = LogicalPlanBuilder::from( + self.plan_table_with_joins(first, planner_context)?, + ); - let mut left = LogicalPlanBuilder::from({ - let input = from.next().unwrap(); - self.plan_table_with_joins(input, planner_context)? - }); let old_outer_from_schema = { let left_schema = Some(Arc::clone(left.schema())); planner_context.set_outer_from_schema(left_schema) }; for input in from { - // Join `input` with the current result (`left`). - let right = self.plan_table_with_joins(input, planner_context)?; - left = left.cross_join(right)?; - // Update the outer FROM schema. + let current_span = + Span::try_from_sqlparser_span(input.relation.span()); + let current_name = extract_table_name(&input); + + if let Some((ref name, _)) = current_name { + alias_spans.entry(name.clone()).or_insert(current_span); + } + + let right = + self.plan_table_with_joins(input, planner_context)?; + + left = left.cross_join(right).map_err(|e| { + if let Some((ref name, _)) = current_name { + if let Some(prior_span) = + alias_spans.get(name).copied().flatten() + { + let mut diagnostic = Diagnostic::new_error( + "duplicate table alias in FROM clause", + current_span, + ); + diagnostic = diagnostic.with_note( + "first defined here", + Some(prior_span), + ); + return e.with_diagnostic(diagnostic); + } + } + e + })?; let left_schema = Some(Arc::clone(left.schema())); planner_context.set_outer_from_schema(left_schema); } diff --git a/datafusion/sql/tests/cases/diagnostic.rs b/datafusion/sql/tests/cases/diagnostic.rs index 7a729739469d3..96a3b18824422 100644 --- a/datafusion/sql/tests/cases/diagnostic.rs +++ b/datafusion/sql/tests/cases/diagnostic.rs @@ -390,3 +390,57 @@ fn test_syntax_error() -> Result<()> { }, } } + +#[test] +fn test_duplicate_cte_name() -> Result<()> { + let query = + "WITH /*a*/cte/*a*/ AS (SELECT 1 AS col), /*b*/cte/*b*/ AS (SELECT 2 AS col) SELECT 1"; + let spans = get_spans(query); + let diag = do_query(query); + assert_snapshot!(diag.message, @r#"WITH query name "cte" specified more than once"#); + assert_eq!(diag.span, Some(spans["b"])); + assert_eq!(diag.notes.len(), 1); + assert_snapshot!(diag.notes[0].message, @"previously defined here"); + assert_eq!(diag.notes[0].span, Some(spans["a"])); + Ok(()) +} + +#[test] +fn test_duplicate_table_alias() -> Result<()> { + let query = "SELECT * FROM /*a*/person a/*a*/, /*b*/person a/*b*/"; + let spans = get_spans(query); + let diag = do_query(query); + assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); + assert_eq!(diag.span, Some(spans["b"])); + assert_eq!(diag.notes.len(), 1); + assert_snapshot!(diag.notes[0].message, @"first defined here"); + assert_eq!(diag.notes[0].span, Some(spans["a"])); + Ok(()) +} + +#[test] +fn test_duplicate_table_alias_not_first() -> Result<()> { + let query = + "SELECT * FROM person a, /*b*/test_decimal b/*b*/, /*c*/person b/*c*/"; + let spans = get_spans(query); + let diag = do_query(query); + assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); + assert_eq!(diag.span, Some(spans["c"])); + assert_eq!(diag.notes.len(), 1); + assert_snapshot!(diag.notes[0].message, @"first defined here"); + assert_eq!(diag.notes[0].span, Some(spans["b"])); + Ok(()) +} + +#[test] +fn test_duplicate_bare_table_in_from() -> Result<()> { + let query = "SELECT * FROM /*a*/person/*a*/, /*b*/person/*b*/"; + let spans = get_spans(query); + let diag = do_query(query); + assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); + assert_eq!(diag.span, Some(spans["b"])); + assert_eq!(diag.notes.len(), 1); + assert_snapshot!(diag.notes[0].message, @"first defined here"); + assert_eq!(diag.notes[0].span, Some(spans["a"])); + Ok(()) +} From fbdbc6173b723722689da4a83fa04424511125e4 Mon Sep 17 00:00:00 2001 From: buraksenn Date: Thu, 5 Mar 2026 15:18:15 +0300 Subject: [PATCH 02/10] refactor: small adjustments --- datafusion/sql/src/cte.rs | 18 +++++++----------- datafusion/sql/src/select.rs | 16 +++++++--------- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/datafusion/sql/src/cte.rs b/datafusion/sql/src/cte.rs index bbf76dd69bdb6..d7dc7b5fcc8ba 100644 --- a/datafusion/sql/src/cte.rs +++ b/datafusion/sql/src/cte.rs @@ -40,21 +40,17 @@ impl SqlToRel<'_, S> { let cte_name_span = Span::try_from_sqlparser_span(cte.alias.name.span); if planner_context.contains_cte(&cte_name) { - let first_span = planner_context.get_cte_span(&cte_name); - let mut diagnostic = Diagnostic::new_error( - format!( - "WITH query name {cte_name:?} specified more than once" - ), - cte_name_span, + let msg = format!( + "WITH query name {cte_name:?} specified more than once" ); - if let Some(first_span) = first_span { + let mut diagnostic = + Diagnostic::new_error(&msg, cte_name_span); + if let Some(first_span) = planner_context.get_cte_span(&cte_name) { diagnostic = diagnostic.with_note("previously defined here", Some(first_span)); } - return plan_err!( - "WITH query name {cte_name:?} specified more than once" - ) - .map_err(|e| e.with_diagnostic(diagnostic)); + return plan_err!("{msg}") + .map_err(|e| e.with_diagnostic(diagnostic)); } // Create a logical plan for the CTE diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index b7074a7ae554c..ba4c1f4080f12 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -738,27 +738,25 @@ impl SqlToRel<'_, S> { planner_context.set_outer_from_schema(left_schema) }; for input in from { - let current_span = - Span::try_from_sqlparser_span(input.relation.span()); let current_name = extract_table_name(&input); - if let Some((ref name, _)) = current_name { - alias_spans.entry(name.clone()).or_insert(current_span); + if let Some((ref name, ref span)) = current_name { + alias_spans.entry(name.clone()).or_insert(*span); } let right = self.plan_table_with_joins(input, planner_context)?; left = left.cross_join(right).map_err(|e| { - if let Some((ref name, _)) = current_name { + if let Some((ref name, ref current_span)) = current_name { if let Some(prior_span) = alias_spans.get(name).copied().flatten() { - let mut diagnostic = Diagnostic::new_error( + let diagnostic = Diagnostic::new_error( "duplicate table alias in FROM clause", - current_span, - ); - diagnostic = diagnostic.with_note( + *current_span, + ) + .with_note( "first defined here", Some(prior_span), ); From 10f69467fba0d95b21e1f4370a8322ac0202aabf Mon Sep 17 00:00:00 2001 From: buraksenn Date: Thu, 5 Mar 2026 15:55:49 +0300 Subject: [PATCH 03/10] cargo fmt --- datafusion/sql/src/cte.rs | 14 ++++------- datafusion/sql/src/planner.rs | 4 ++- datafusion/sql/src/select.rs | 32 ++++++++++-------------- datafusion/sql/tests/cases/diagnostic.rs | 6 ++--- 4 files changed, 23 insertions(+), 33 deletions(-) diff --git a/datafusion/sql/src/cte.rs b/datafusion/sql/src/cte.rs index d7dc7b5fcc8ba..63f0ba20cf7bd 100644 --- a/datafusion/sql/src/cte.rs +++ b/datafusion/sql/src/cte.rs @@ -37,20 +37,16 @@ impl SqlToRel<'_, S> { for cte in with.cte_tables { // A `WITH` block can't use the same name more than once let cte_name = self.ident_normalizer.normalize(cte.alias.name.clone()); - let cte_name_span = - Span::try_from_sqlparser_span(cte.alias.name.span); + let cte_name_span = Span::try_from_sqlparser_span(cte.alias.name.span); if planner_context.contains_cte(&cte_name) { - let msg = format!( - "WITH query name {cte_name:?} specified more than once" - ); - let mut diagnostic = - Diagnostic::new_error(&msg, cte_name_span); + let msg = + format!("WITH query name {cte_name:?} specified more than once"); + let mut diagnostic = Diagnostic::new_error(&msg, cte_name_span); if let Some(first_span) = planner_context.get_cte_span(&cte_name) { diagnostic = diagnostic.with_note("previously defined here", Some(first_span)); } - return plan_err!("{msg}") - .map_err(|e| e.with_diagnostic(diagnostic)); + return plan_err!("{msg}").map_err(|e| e.with_diagnostic(diagnostic)); } // Create a logical plan for the CTE diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 6c813073994d1..b816aa6f9e69c 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -27,7 +27,9 @@ use datafusion_common::TableReference; use datafusion_common::config::SqlParserOptions; use datafusion_common::datatype::{DataTypeExt, FieldExt}; use datafusion_common::error::add_possible_columns_to_diag; -use datafusion_common::{DFSchema, DataFusionError, Result, Span, not_impl_err, plan_err}; +use datafusion_common::{ + DFSchema, DataFusionError, Result, Span, not_impl_err, plan_err, +}; use datafusion_common::{ DFSchemaRef, Diagnostic, SchemaError, field_not_found, internal_err, plan_datafusion_err, diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index ba4c1f4080f12..468532a7fb964 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -29,7 +29,9 @@ use crate::utils::{ use datafusion_common::error::DataFusionErrorBuilder; use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion}; -use datafusion_common::{Column, DFSchema, Diagnostic, Result, Span, not_impl_err, plan_err}; +use datafusion_common::{ + Column, DFSchema, Diagnostic, Result, Span, not_impl_err, plan_err, +}; use datafusion_common::{RecursionUnnestOption, UnnestOptions}; use datafusion_expr::expr::{Alias, PlannedReplaceSelectItem, WildcardOptions}; use datafusion_expr::expr_rewriter::{ @@ -694,24 +696,21 @@ impl SqlToRel<'_, S> { _ => { let extract_table_name = |t: &TableWithJoins| -> Option<(String, Option)> { - let span = - Span::try_from_sqlparser_span(t.relation.span()); + let span = Span::try_from_sqlparser_span(t.relation.span()); match &t.relation { TableFactor::Table { alias: Some(a), .. } => { - let name = self - .ident_normalizer - .normalize(a.name.clone()); + let name = + self.ident_normalizer.normalize(a.name.clone()); Some((name, span)) } - TableFactor::Table { name, alias: None, .. } => { + TableFactor::Table { + name, alias: None, .. + } => { let table_name = name .0 .iter() .filter_map(|p| p.as_ident()) - .map(|id| { - self.ident_normalizer - .normalize(id.clone()) - }) + .map(|id| self.ident_normalizer.normalize(id.clone())) .last()?; Some((table_name, span)) } @@ -719,8 +718,7 @@ impl SqlToRel<'_, S> { } }; - let mut alias_spans: HashMap> = - HashMap::new(); + let mut alias_spans: HashMap> = HashMap::new(); let mut from = from.into_iter(); let first = from.next().unwrap(); @@ -744,8 +742,7 @@ impl SqlToRel<'_, S> { alias_spans.entry(name.clone()).or_insert(*span); } - let right = - self.plan_table_with_joins(input, planner_context)?; + let right = self.plan_table_with_joins(input, planner_context)?; left = left.cross_join(right).map_err(|e| { if let Some((ref name, ref current_span)) = current_name { @@ -756,10 +753,7 @@ impl SqlToRel<'_, S> { "duplicate table alias in FROM clause", *current_span, ) - .with_note( - "first defined here", - Some(prior_span), - ); + .with_note("first defined here", Some(prior_span)); return e.with_diagnostic(diagnostic); } } diff --git a/datafusion/sql/tests/cases/diagnostic.rs b/datafusion/sql/tests/cases/diagnostic.rs index 96a3b18824422..f3c455d446f29 100644 --- a/datafusion/sql/tests/cases/diagnostic.rs +++ b/datafusion/sql/tests/cases/diagnostic.rs @@ -393,8 +393,7 @@ fn test_syntax_error() -> Result<()> { #[test] fn test_duplicate_cte_name() -> Result<()> { - let query = - "WITH /*a*/cte/*a*/ AS (SELECT 1 AS col), /*b*/cte/*b*/ AS (SELECT 2 AS col) SELECT 1"; + let query = "WITH /*a*/cte/*a*/ AS (SELECT 1 AS col), /*b*/cte/*b*/ AS (SELECT 2 AS col) SELECT 1"; let spans = get_spans(query); let diag = do_query(query); assert_snapshot!(diag.message, @r#"WITH query name "cte" specified more than once"#); @@ -420,8 +419,7 @@ fn test_duplicate_table_alias() -> Result<()> { #[test] fn test_duplicate_table_alias_not_first() -> Result<()> { - let query = - "SELECT * FROM person a, /*b*/test_decimal b/*b*/, /*c*/person b/*c*/"; + let query = "SELECT * FROM person a, /*b*/test_decimal b/*b*/, /*c*/person b/*c*/"; let spans = get_spans(query); let diag = do_query(query); assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); From 17868149511fe5389234cc8f521c7ccc86873540 Mon Sep 17 00:00:00 2001 From: buraksenn Date: Thu, 5 Mar 2026 16:04:07 +0300 Subject: [PATCH 04/10] chore: address clippy --- datafusion/sql/src/select.rs | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index 468532a7fb964..e0206a9175f36 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -711,7 +711,7 @@ impl SqlToRel<'_, S> { .iter() .filter_map(|p| p.as_ident()) .map(|id| self.ident_normalizer.normalize(id.clone())) - .last()?; + .next_back()?; Some((table_name, span)) } _ => None, @@ -745,17 +745,16 @@ impl SqlToRel<'_, S> { let right = self.plan_table_with_joins(input, planner_context)?; left = left.cross_join(right).map_err(|e| { - if let Some((ref name, ref current_span)) = current_name { - if let Some(prior_span) = + if let Some((ref name, ref current_span)) = current_name + && let Some(prior_span) = alias_spans.get(name).copied().flatten() - { - let diagnostic = Diagnostic::new_error( - "duplicate table alias in FROM clause", - *current_span, - ) - .with_note("first defined here", Some(prior_span)); - return e.with_diagnostic(diagnostic); - } + { + let diagnostic = Diagnostic::new_error( + "duplicate table alias in FROM clause", + *current_span, + ) + .with_note("first defined here", Some(prior_span)); + return e.with_diagnostic(diagnostic); } e })?; From 308765676e0657d5a052eb6d628b55b86dcfaca8 Mon Sep 17 00:00:00 2001 From: buraksenn Date: Fri, 6 Mar 2026 16:23:03 +0300 Subject: [PATCH 05/10] address issue in the review --- datafusion/sql/src/select.rs | 31 ++++++++++++------------ datafusion/sql/tests/cases/diagnostic.rs | 26 ++++++++++++++++++++ 2 files changed, 41 insertions(+), 16 deletions(-) diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index e0206a9175f36..a4192411ba735 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -738,26 +738,25 @@ impl SqlToRel<'_, S> { for input in from { let current_name = extract_table_name(&input); - if let Some((ref name, ref span)) = current_name { - alias_spans.entry(name.clone()).or_insert(*span); + if let Some((ref name, current_span)) = current_name { + if let Some(prior_span) = alias_spans.get(name) { + let mut diagnostic = Diagnostic::new_error( + "duplicate table alias in FROM clause", + current_span, + ); + if let Some(span) = *prior_span { + diagnostic = diagnostic + .with_note("first defined here", Some(span)); + } + return plan_err!("duplicate table alias in FROM clause") + .map_err(|e| e.with_diagnostic(diagnostic)); + } + alias_spans.insert(name.clone(), current_span); } let right = self.plan_table_with_joins(input, planner_context)?; - left = left.cross_join(right).map_err(|e| { - if let Some((ref name, ref current_span)) = current_name - && let Some(prior_span) = - alias_spans.get(name).copied().flatten() - { - let diagnostic = Diagnostic::new_error( - "duplicate table alias in FROM clause", - *current_span, - ) - .with_note("first defined here", Some(prior_span)); - return e.with_diagnostic(diagnostic); - } - e - })?; + left = left.cross_join(right)?; let left_schema = Some(Arc::clone(left.schema())); planner_context.set_outer_from_schema(left_schema); } diff --git a/datafusion/sql/tests/cases/diagnostic.rs b/datafusion/sql/tests/cases/diagnostic.rs index f3c455d446f29..6858a5709a82d 100644 --- a/datafusion/sql/tests/cases/diagnostic.rs +++ b/datafusion/sql/tests/cases/diagnostic.rs @@ -442,3 +442,29 @@ fn test_duplicate_bare_table_in_from() -> Result<()> { assert_eq!(diag.notes[0].span, Some(spans["a"])); Ok(()) } + +#[test] +fn test_duplicate_alias_non_overlapping_columns() -> Result<()> { + let query = "SELECT * FROM /*a*/j1 AS t/*a*/, /*b*/j2 AS t/*b*/"; + let spans = get_spans(query); + let diag = do_query(query); + assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); + assert_eq!(diag.span, Some(spans["b"])); + assert_eq!(diag.notes.len(), 1); + assert_snapshot!(diag.notes[0].message, @"first defined here"); + assert_eq!(diag.notes[0].span, Some(spans["a"])); + Ok(()) +} + +#[test] +fn test_duplicate_alias_non_overlapping_three_tables() -> Result<()> { + let query = "SELECT * FROM j1 AS x, /*a*/j2 AS t/*a*/, j3 AS y, /*b*/j1 AS t/*b*/"; + let spans = get_spans(query); + let diag = do_query(query); + assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); + assert_eq!(diag.span, Some(spans["b"])); + assert_eq!(diag.notes.len(), 1); + assert_snapshot!(diag.notes[0].message, @"first defined here"); + assert_eq!(diag.notes[0].span, Some(spans["a"])); + Ok(()) +} From 67a1e17e232797de829ef456a50c84870ea7d7e3 Mon Sep 17 00:00:00 2001 From: buraksenn Date: Mon, 9 Mar 2026 12:49:54 +0300 Subject: [PATCH 06/10] add derived,fucntion,nestedjoin etc and unit tests --- datafusion/sql/src/select.rs | 10 +++- datafusion/sql/tests/cases/diagnostic.rs | 64 ++++++++++++++++++++++-- 2 files changed, 68 insertions(+), 6 deletions(-) diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index a4192411ba735..34bfff1b53c72 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -696,9 +696,13 @@ impl SqlToRel<'_, S> { _ => { let extract_table_name = |t: &TableWithJoins| -> Option<(String, Option)> { - let span = Span::try_from_sqlparser_span(t.relation.span()); match &t.relation { - TableFactor::Table { alias: Some(a), .. } => { + TableFactor::Table { alias: Some(a), .. } + | TableFactor::Derived { alias: Some(a), .. } + | TableFactor::Function { alias: Some(a), .. } + | TableFactor::UNNEST { alias: Some(a), .. } + | TableFactor::NestedJoin { alias: Some(a), .. } => { + let span = Span::try_from_sqlparser_span(a.name.span); let name = self.ident_normalizer.normalize(a.name.clone()); Some((name, span)) @@ -706,6 +710,8 @@ impl SqlToRel<'_, S> { TableFactor::Table { name, alias: None, .. } => { + let span = + Span::try_from_sqlparser_span(t.relation.span()); let table_name = name .0 .iter() diff --git a/datafusion/sql/tests/cases/diagnostic.rs b/datafusion/sql/tests/cases/diagnostic.rs index 6858a5709a82d..d514f4542f1e4 100644 --- a/datafusion/sql/tests/cases/diagnostic.rs +++ b/datafusion/sql/tests/cases/diagnostic.rs @@ -406,7 +406,7 @@ fn test_duplicate_cte_name() -> Result<()> { #[test] fn test_duplicate_table_alias() -> Result<()> { - let query = "SELECT * FROM /*a*/person a/*a*/, /*b*/person a/*b*/"; + let query = "SELECT * FROM person /*a*/a/*a*/, person /*b*/a/*b*/"; let spans = get_spans(query); let diag = do_query(query); assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); @@ -419,7 +419,7 @@ fn test_duplicate_table_alias() -> Result<()> { #[test] fn test_duplicate_table_alias_not_first() -> Result<()> { - let query = "SELECT * FROM person a, /*b*/test_decimal b/*b*/, /*c*/person b/*c*/"; + let query = "SELECT * FROM person a, test_decimal /*b*/b/*b*/, person /*c*/b/*c*/"; let spans = get_spans(query); let diag = do_query(query); assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); @@ -445,7 +445,7 @@ fn test_duplicate_bare_table_in_from() -> Result<()> { #[test] fn test_duplicate_alias_non_overlapping_columns() -> Result<()> { - let query = "SELECT * FROM /*a*/j1 AS t/*a*/, /*b*/j2 AS t/*b*/"; + let query = "SELECT * FROM j1 AS /*a*/t/*a*/, j2 AS /*b*/t/*b*/"; let spans = get_spans(query); let diag = do_query(query); assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); @@ -458,7 +458,63 @@ fn test_duplicate_alias_non_overlapping_columns() -> Result<()> { #[test] fn test_duplicate_alias_non_overlapping_three_tables() -> Result<()> { - let query = "SELECT * FROM j1 AS x, /*a*/j2 AS t/*a*/, j3 AS y, /*b*/j1 AS t/*b*/"; + let query = "SELECT * FROM j1 AS x, j2 AS /*a*/t/*a*/, j3 AS y, j1 AS /*b*/t/*b*/"; + let spans = get_spans(query); + let diag = do_query(query); + assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); + assert_eq!(diag.span, Some(spans["b"])); + assert_eq!(diag.notes.len(), 1); + assert_snapshot!(diag.notes[0].message, @"first defined here"); + assert_eq!(diag.notes[0].span, Some(spans["a"])); + Ok(()) +} + +#[test] +fn test_duplicate_derived_subquery_alias() -> Result<()> { + let query = + "SELECT * FROM (SELECT 1) AS /*a*/t/*a*/, (SELECT 2) AS /*b*/t/*b*/"; + let spans = get_spans(query); + let diag = do_query(query); + assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); + assert_eq!(diag.span, Some(spans["b"])); + assert_eq!(diag.notes.len(), 1); + assert_snapshot!(diag.notes[0].message, @"first defined here"); + assert_eq!(diag.notes[0].span, Some(spans["a"])); + Ok(()) +} + +#[test] +fn test_duplicate_alias_table_and_derived() -> Result<()> { + let query = + "SELECT * FROM person AS /*a*/t/*a*/, (SELECT 1) AS /*b*/t/*b*/"; + let spans = get_spans(query); + let diag = do_query(query); + assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); + assert_eq!(diag.span, Some(spans["b"])); + assert_eq!(diag.notes.len(), 1); + assert_snapshot!(diag.notes[0].message, @"first defined here"); + assert_eq!(diag.notes[0].span, Some(spans["a"])); + Ok(()) +} + +#[test] +fn test_duplicate_alias_derived_and_table() -> Result<()> { + let query = + "SELECT * FROM (SELECT 1) AS /*a*/t/*a*/, person AS /*b*/t/*b*/"; + let spans = get_spans(query); + let diag = do_query(query); + assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); + assert_eq!(diag.span, Some(spans["b"])); + assert_eq!(diag.notes.len(), 1); + assert_snapshot!(diag.notes[0].message, @"first defined here"); + assert_eq!(diag.notes[0].span, Some(spans["a"])); + Ok(()) +} + +#[test] +fn test_duplicate_nested_join_alias() -> Result<()> { + let query = + "SELECT * FROM (person CROSS JOIN j1) AS /*a*/t/*a*/, (person CROSS JOIN j2) AS /*b*/t/*b*/"; let spans = get_spans(query); let diag = do_query(query); assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); From db79b6dbf75c9c7ebd16c8eebb28466662a0c60e Mon Sep 17 00:00:00 2001 From: buraksenn Date: Mon, 9 Mar 2026 13:05:28 +0300 Subject: [PATCH 07/10] cargo fmt --- datafusion/sql/tests/cases/diagnostic.rs | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/datafusion/sql/tests/cases/diagnostic.rs b/datafusion/sql/tests/cases/diagnostic.rs index d514f4542f1e4..4c0781f37ce5b 100644 --- a/datafusion/sql/tests/cases/diagnostic.rs +++ b/datafusion/sql/tests/cases/diagnostic.rs @@ -471,8 +471,7 @@ fn test_duplicate_alias_non_overlapping_three_tables() -> Result<()> { #[test] fn test_duplicate_derived_subquery_alias() -> Result<()> { - let query = - "SELECT * FROM (SELECT 1) AS /*a*/t/*a*/, (SELECT 2) AS /*b*/t/*b*/"; + let query = "SELECT * FROM (SELECT 1) AS /*a*/t/*a*/, (SELECT 2) AS /*b*/t/*b*/"; let spans = get_spans(query); let diag = do_query(query); assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); @@ -485,8 +484,7 @@ fn test_duplicate_derived_subquery_alias() -> Result<()> { #[test] fn test_duplicate_alias_table_and_derived() -> Result<()> { - let query = - "SELECT * FROM person AS /*a*/t/*a*/, (SELECT 1) AS /*b*/t/*b*/"; + let query = "SELECT * FROM person AS /*a*/t/*a*/, (SELECT 1) AS /*b*/t/*b*/"; let spans = get_spans(query); let diag = do_query(query); assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); @@ -499,8 +497,7 @@ fn test_duplicate_alias_table_and_derived() -> Result<()> { #[test] fn test_duplicate_alias_derived_and_table() -> Result<()> { - let query = - "SELECT * FROM (SELECT 1) AS /*a*/t/*a*/, person AS /*b*/t/*b*/"; + let query = "SELECT * FROM (SELECT 1) AS /*a*/t/*a*/, person AS /*b*/t/*b*/"; let spans = get_spans(query); let diag = do_query(query); assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); @@ -513,8 +510,7 @@ fn test_duplicate_alias_derived_and_table() -> Result<()> { #[test] fn test_duplicate_nested_join_alias() -> Result<()> { - let query = - "SELECT * FROM (person CROSS JOIN j1) AS /*a*/t/*a*/, (person CROSS JOIN j2) AS /*b*/t/*b*/"; + let query = "SELECT * FROM (person CROSS JOIN j1) AS /*a*/t/*a*/, (person CROSS JOIN j2) AS /*b*/t/*b*/"; let spans = get_spans(query); let diag = do_query(query); assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); From 048005df186495cbd9ca0f6fc8a9efe875f2e364 Mon Sep 17 00:00:00 2001 From: buraksenn Date: Thu, 16 Apr 2026 17:03:57 +0300 Subject: [PATCH 08/10] detect duplicate table aliases in explicit JOINs and use full table reference for unaliased tables and refactor tests --- datafusion/sql/src/relation/join.rs | 40 ++- datafusion/sql/src/relation/mod.rs | 25 ++ datafusion/sql/src/select.rs | 64 ++--- datafusion/sql/tests/cases/diagnostic.rs | 299 +++++++++++++---------- 4 files changed, 258 insertions(+), 170 deletions(-) diff --git a/datafusion/sql/src/relation/join.rs b/datafusion/sql/src/relation/join.rs index 8e1a8817309f0..f5830ebc90118 100644 --- a/datafusion/sql/src/relation/join.rs +++ b/datafusion/sql/src/relation/join.rs @@ -16,12 +16,14 @@ // under the License. use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; -use datafusion_common::{Column, Result, not_impl_err, plan_datafusion_err}; +use datafusion_common::{ + Column, Diagnostic, Result, Span, not_impl_err, plan_datafusion_err, plan_err, +}; use datafusion_expr::{JoinType, LogicalPlan, LogicalPlanBuilder}; use sqlparser::ast::{ Join, JoinConstraint, JoinOperator, ObjectName, TableFactor, TableWithJoins, }; -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; impl SqlToRel<'_, S> { pub(crate) fn plan_table_with_joins( @@ -29,13 +31,39 @@ impl SqlToRel<'_, S> { t: TableWithJoins, planner_context: &mut PlannerContext, ) -> Result { - let mut left = if is_lateral(&t.relation) { - self.create_relation_subquery(t.relation, planner_context)? + let TableWithJoins { relation, joins } = t; + + let mut alias_spans: HashMap> = HashMap::new(); + + if let Some((name, span)) = self.extract_relation_name(&relation)? { + alias_spans.insert(name, span); + } + + let mut left = if is_lateral(&relation) { + self.create_relation_subquery(relation, planner_context)? } else { - self.create_relation(t.relation, planner_context)? + self.create_relation(relation, planner_context)? }; let old_outer_from_schema = planner_context.outer_from_schema(); - for join in t.joins { + for join in joins { + if let Some((ref name, current_span)) = + self.extract_relation_name(&join.relation)? + { + if let Some(prior_span) = alias_spans.get(name) { + let mut diagnostic = Diagnostic::new_error( + "duplicate table alias in FROM clause", + current_span, + ); + if let Some(span) = *prior_span { + diagnostic = + diagnostic.with_note("first defined here", Some(span)); + } + return plan_err!("duplicate table alias in FROM clause") + .map_err(|e| e.with_diagnostic(diagnostic)); + } + alias_spans.insert(name.clone(), current_span); + } + planner_context.extend_outer_from_schema(left.schema())?; left = self.parse_relation_join(left, join, planner_context)?; } diff --git a/datafusion/sql/src/relation/mod.rs b/datafusion/sql/src/relation/mod.rs index 6558763ca4e42..cde68005345b8 100644 --- a/datafusion/sql/src/relation/mod.rs +++ b/datafusion/sql/src/relation/mod.rs @@ -80,6 +80,31 @@ impl<'a, 'b, S: ContextProvider> RelationPlannerContext } impl SqlToRel<'_, S> { + pub(crate) fn extract_relation_name( + &self, + relation: &TableFactor, + ) -> Result)>> { + match relation { + TableFactor::Table { alias: Some(a), .. } + | TableFactor::Derived { alias: Some(a), .. } + | TableFactor::Function { alias: Some(a), .. } + | TableFactor::UNNEST { alias: Some(a), .. } + | TableFactor::NestedJoin { alias: Some(a), .. } => { + let span = Span::try_from_sqlparser_span(a.name.span); + let name = self.ident_normalizer.normalize(a.name.clone()); + Ok(Some((name, span))) + } + TableFactor::Table { + name, alias: None, .. + } => { + let span = Span::try_from_sqlparser_span(relation.span()); + let table_ref = self.object_name_to_table_reference(name.clone())?; + Ok(Some((table_ref.to_string(), span))) + } + _ => Ok(None), + } + } + /// Create a `LogicalPlan` that scans the named relation. /// /// First tries any registered extension planners. If no extension handles diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index 34bfff1b53c72..72a7d44a310e0 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -52,9 +52,7 @@ use sqlparser::ast::{ SelectItemQualifiedWildcardKind, WildcardAdditionalOptions, WindowType, visit_expressions_mut, }; -use sqlparser::ast::{ - NamedWindowDefinition, Select, SelectItem, Spanned, TableFactor, TableWithJoins, -}; +use sqlparser::ast::{NamedWindowDefinition, Select, SelectItem, TableWithJoins}; /// Result of the `aggregate` function, containing the aggregate plan and /// rewritten expressions that reference the aggregate output columns. @@ -694,44 +692,21 @@ impl SqlToRel<'_, S> { self.plan_table_with_joins(input, planner_context) } _ => { - let extract_table_name = - |t: &TableWithJoins| -> Option<(String, Option)> { - match &t.relation { - TableFactor::Table { alias: Some(a), .. } - | TableFactor::Derived { alias: Some(a), .. } - | TableFactor::Function { alias: Some(a), .. } - | TableFactor::UNNEST { alias: Some(a), .. } - | TableFactor::NestedJoin { alias: Some(a), .. } => { - let span = Span::try_from_sqlparser_span(a.name.span); - let name = - self.ident_normalizer.normalize(a.name.clone()); - Some((name, span)) - } - TableFactor::Table { - name, alias: None, .. - } => { - let span = - Span::try_from_sqlparser_span(t.relation.span()); - let table_name = name - .0 - .iter() - .filter_map(|p| p.as_ident()) - .map(|id| self.ident_normalizer.normalize(id.clone())) - .next_back()?; - Some((table_name, span)) - } - _ => None, - } - }; - let mut alias_spans: HashMap> = HashMap::new(); let mut from = from.into_iter(); let first = from.next().unwrap(); - if let Some((name, span)) = extract_table_name(&first) { + if let Some((name, span)) = self.extract_relation_name(&first.relation)? { alias_spans.entry(name).or_insert(span); } + for join in &first.joins { + if let Some((name, span)) = + self.extract_relation_name(&join.relation)? + { + alias_spans.entry(name).or_insert(span); + } + } let mut left = LogicalPlanBuilder::from( self.plan_table_with_joins(first, planner_context)?, @@ -742,13 +717,21 @@ impl SqlToRel<'_, S> { planner_context.set_outer_from_schema(left_schema) }; for input in from { - let current_name = extract_table_name(&input); + let mut current_names = Vec::new(); + if let Some(pair) = self.extract_relation_name(&input.relation)? { + current_names.push(pair); + } + for join in &input.joins { + if let Some(pair) = self.extract_relation_name(&join.relation)? { + current_names.push(pair); + } + } - if let Some((ref name, current_span)) = current_name { - if let Some(prior_span) = alias_spans.get(name) { + for (name, current_span) in ¤t_names { + if let Some(prior_span) = alias_spans.get(name.as_str()) { let mut diagnostic = Diagnostic::new_error( "duplicate table alias in FROM clause", - current_span, + *current_span, ); if let Some(span) = *prior_span { diagnostic = diagnostic @@ -757,7 +740,10 @@ impl SqlToRel<'_, S> { return plan_err!("duplicate table alias in FROM clause") .map_err(|e| e.with_diagnostic(diagnostic)); } - alias_spans.insert(name.clone(), current_span); + } + + for (name, span) in current_names { + alias_spans.insert(name, span); } let right = self.plan_table_with_joins(input, planner_context)?; diff --git a/datafusion/sql/tests/cases/diagnostic.rs b/datafusion/sql/tests/cases/diagnostic.rs index 4c0781f37ce5b..b8c5bf87b189f 100644 --- a/datafusion/sql/tests/cases/diagnostic.rs +++ b/datafusion/sql/tests/cases/diagnostic.rs @@ -28,6 +28,25 @@ use regex::Regex; use crate::{MockContextProvider, MockSessionState}; +fn expect_plan_ok(sql: &str) { + let statement = DFParserBuilder::new(sql) + .build() + .expect("unable to create parser") + .parse_statement() + .expect("unable to parse query"); + let options = ParserOptions { + collect_spans: true, + ..ParserOptions::default() + }; + let state = MockSessionState::default() + .with_scalar_function(Arc::new(string::concat().as_ref().clone())); + let context = MockContextProvider { state }; + let sql_to_rel = SqlToRel::new_with_options(&context, options); + sql_to_rel + .statement_to_plan(statement) + .expect("expected query to plan successfully"); +} + fn do_query(sql: &'static str) -> Diagnostic { let statement = DFParserBuilder::new(sql) .build() @@ -79,7 +98,7 @@ fn do_query(sql: &'static str) -> Diagnostic { /// dbg!(&spans["left"]); /// dbg!(&spans["right"]); /// ``` -fn get_spans(query: &'static str) -> HashMap { +fn get_spans(query: &str) -> HashMap { let mut spans = HashMap::new(); let mut bytes_per_line = vec![]; @@ -392,131 +411,161 @@ fn test_syntax_error() -> Result<()> { } #[test] -fn test_duplicate_cte_name() -> Result<()> { - let query = "WITH /*a*/cte/*a*/ AS (SELECT 1 AS col), /*b*/cte/*b*/ AS (SELECT 2 AS col) SELECT 1"; - let spans = get_spans(query); - let diag = do_query(query); - assert_snapshot!(diag.message, @r#"WITH query name "cte" specified more than once"#); - assert_eq!(diag.span, Some(spans["b"])); - assert_eq!(diag.notes.len(), 1); - assert_snapshot!(diag.notes[0].message, @"previously defined here"); - assert_eq!(diag.notes[0].span, Some(spans["a"])); - Ok(()) -} - -#[test] -fn test_duplicate_table_alias() -> Result<()> { - let query = "SELECT * FROM person /*a*/a/*a*/, person /*b*/a/*b*/"; - let spans = get_spans(query); - let diag = do_query(query); - assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); - assert_eq!(diag.span, Some(spans["b"])); - assert_eq!(diag.notes.len(), 1); - assert_snapshot!(diag.notes[0].message, @"first defined here"); - assert_eq!(diag.notes[0].span, Some(spans["a"])); - Ok(()) -} - -#[test] -fn test_duplicate_table_alias_not_first() -> Result<()> { - let query = "SELECT * FROM person a, test_decimal /*b*/b/*b*/, person /*c*/b/*c*/"; - let spans = get_spans(query); - let diag = do_query(query); - assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); - assert_eq!(diag.span, Some(spans["c"])); - assert_eq!(diag.notes.len(), 1); - assert_snapshot!(diag.notes[0].message, @"first defined here"); - assert_eq!(diag.notes[0].span, Some(spans["b"])); - Ok(()) -} - -#[test] -fn test_duplicate_bare_table_in_from() -> Result<()> { - let query = "SELECT * FROM /*a*/person/*a*/, /*b*/person/*b*/"; - let spans = get_spans(query); - let diag = do_query(query); - assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); - assert_eq!(diag.span, Some(spans["b"])); - assert_eq!(diag.notes.len(), 1); - assert_snapshot!(diag.notes[0].message, @"first defined here"); - assert_eq!(diag.notes[0].span, Some(spans["a"])); - Ok(()) -} - -#[test] -fn test_duplicate_alias_non_overlapping_columns() -> Result<()> { - let query = "SELECT * FROM j1 AS /*a*/t/*a*/, j2 AS /*b*/t/*b*/"; - let spans = get_spans(query); - let diag = do_query(query); - assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); - assert_eq!(diag.span, Some(spans["b"])); - assert_eq!(diag.notes.len(), 1); - assert_snapshot!(diag.notes[0].message, @"first defined here"); - assert_eq!(diag.notes[0].span, Some(spans["a"])); - Ok(()) -} - -#[test] -fn test_duplicate_alias_non_overlapping_three_tables() -> Result<()> { - let query = "SELECT * FROM j1 AS x, j2 AS /*a*/t/*a*/, j3 AS y, j1 AS /*b*/t/*b*/"; - let spans = get_spans(query); - let diag = do_query(query); - assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); - assert_eq!(diag.span, Some(spans["b"])); - assert_eq!(diag.notes.len(), 1); - assert_snapshot!(diag.notes[0].message, @"first defined here"); - assert_eq!(diag.notes[0].span, Some(spans["a"])); - Ok(()) -} - -#[test] -fn test_duplicate_derived_subquery_alias() -> Result<()> { - let query = "SELECT * FROM (SELECT 1) AS /*a*/t/*a*/, (SELECT 2) AS /*b*/t/*b*/"; - let spans = get_spans(query); - let diag = do_query(query); - assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); - assert_eq!(diag.span, Some(spans["b"])); - assert_eq!(diag.notes.len(), 1); - assert_snapshot!(diag.notes[0].message, @"first defined here"); - assert_eq!(diag.notes[0].span, Some(spans["a"])); - Ok(()) -} - -#[test] -fn test_duplicate_alias_table_and_derived() -> Result<()> { - let query = "SELECT * FROM person AS /*a*/t/*a*/, (SELECT 1) AS /*b*/t/*b*/"; - let spans = get_spans(query); - let diag = do_query(query); - assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); - assert_eq!(diag.span, Some(spans["b"])); - assert_eq!(diag.notes.len(), 1); - assert_snapshot!(diag.notes[0].message, @"first defined here"); - assert_eq!(diag.notes[0].span, Some(spans["a"])); - Ok(()) -} - -#[test] -fn test_duplicate_alias_derived_and_table() -> Result<()> { - let query = "SELECT * FROM (SELECT 1) AS /*a*/t/*a*/, person AS /*b*/t/*b*/"; - let spans = get_spans(query); - let diag = do_query(query); - assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); - assert_eq!(diag.span, Some(spans["b"])); - assert_eq!(diag.notes.len(), 1); - assert_snapshot!(diag.notes[0].message, @"first defined here"); - assert_eq!(diag.notes[0].span, Some(spans["a"])); - Ok(()) +fn test_duplicate_name_diagnostics() { + let cases: &[(&str, &str, &str, &str)] = &[ + // (label, query, expected_error, expected_note) + ( + "cte_duplicate", + "WITH /*a*/cte/*a*/ AS (SELECT 1 AS col), /*b*/cte/*b*/ AS (SELECT 2 AS col) SELECT 1", + r#"WITH query name "cte" specified more than once"#, + "previously defined here", + ), + ( + "comma_basic_alias", + "SELECT * FROM person /*a*/a/*a*/, person /*b*/a/*b*/", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "comma_alias_not_first", + "SELECT * FROM person a, test_decimal /*a*/b/*a*/, person /*b*/b/*b*/", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "comma_bare_table", + "SELECT * FROM /*a*/person/*a*/, /*b*/person/*b*/", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "comma_non_overlapping_columns", + "SELECT * FROM j1 AS /*a*/t/*a*/, j2 AS /*b*/t/*b*/", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "comma_non_overlapping_three_tables", + "SELECT * FROM j1 AS x, j2 AS /*a*/t/*a*/, j3 AS y, j1 AS /*b*/t/*b*/", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "comma_derived_subquery", + "SELECT * FROM (SELECT 1) AS /*a*/t/*a*/, (SELECT 2) AS /*b*/t/*b*/", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "comma_table_and_derived", + "SELECT * FROM person AS /*a*/t/*a*/, (SELECT 1) AS /*b*/t/*b*/", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "comma_derived_and_table", + "SELECT * FROM (SELECT 1) AS /*a*/t/*a*/, person AS /*b*/t/*b*/", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "comma_nested_join", + "SELECT * FROM (person CROSS JOIN j1) AS /*a*/t/*a*/, (person CROSS JOIN j2) AS /*b*/t/*b*/", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "explicit_join_alias", + "SELECT 1 FROM j1 AS /*a*/t/*a*/ JOIN j2 AS /*b*/t/*b*/ ON true", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "explicit_join_derived", + "SELECT 1 FROM (SELECT 1 AS a) AS /*a*/t/*a*/ JOIN (SELECT 2 AS b) AS /*b*/t/*b*/ ON true", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "comma_and_explicit_join", + "SELECT 1 FROM j1 AS /*a*/t/*a*/, j2 JOIN j3 AS /*b*/t/*b*/ ON true", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "explicit_join_bare_table", + "SELECT 1 FROM /*a*/person/*a*/ JOIN /*b*/person/*b*/ ON true", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "explicit_cross_join", + "SELECT 1 FROM j1 AS /*a*/t/*a*/ CROSS JOIN j2 AS /*b*/t/*b*/", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "explicit_left_join", + "SELECT 1 FROM j1 AS /*a*/t/*a*/ LEFT JOIN j2 AS /*b*/t/*b*/ ON true", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "explicit_three_way_join", + "SELECT 1 FROM j1 AS /*a*/x/*a*/ JOIN j2 AS y ON true JOIN j3 AS /*b*/x/*b*/ ON true", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "explicit_between_joins", + "SELECT 1 FROM j1 AS x JOIN j2 AS /*a*/y/*a*/ ON true JOIN j3 AS /*b*/y/*b*/ ON true", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "explicit_nested_join", + "SELECT 1 FROM (j1 CROSS JOIN j2) AS /*a*/t/*a*/ JOIN j3 AS /*b*/t/*b*/ ON true", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "cross_entry_joins", + "SELECT 1 FROM j1 JOIN j2 AS /*a*/t/*a*/ ON true, j3 JOIN person AS /*b*/t/*b*/ ON true", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "case_insensitive", + "SELECT 1 FROM j1 AS /*a*/T/*a*/ JOIN j2 AS /*b*/t/*b*/ ON true", + "duplicate table alias in FROM clause", + "first defined here", + ), + ]; + + for &(label, query, expected_error, expected_note) in cases { + let spans = get_spans(query); + let diag = do_query(query); + + assert_eq!(diag.message, expected_error, "message mismatch in {label}"); + assert_eq!( + diag.span, + Some(spans["b"]), + "error span mismatch in {label}" + ); + assert_eq!(diag.notes.len(), 1, "expected 1 note in {label}"); + assert_eq!( + diag.notes[0].message, expected_note, + "note mismatch in {label}" + ); + assert_eq!( + diag.notes[0].span, + Some(spans["a"]), + "note span mismatch in {label}" + ); + } } #[test] -fn test_duplicate_nested_join_alias() -> Result<()> { - let query = "SELECT * FROM (person CROSS JOIN j1) AS /*a*/t/*a*/, (person CROSS JOIN j2) AS /*b*/t/*b*/"; - let spans = get_spans(query); - let diag = do_query(query); - assert_snapshot!(diag.message, @"duplicate table alias in FROM clause"); - assert_eq!(diag.span, Some(spans["b"])); - assert_eq!(diag.notes.len(), 1); - assert_snapshot!(diag.notes[0].message, @"first defined here"); - assert_eq!(diag.notes[0].span, Some(spans["a"])); - Ok(()) +fn test_schema_qualified_tables_not_duplicate() { + expect_plan_ok("SELECT * FROM schema1.person, schema2.person"); } From f683fdf5f1b0781b7503e69e562ae6885b45451b Mon Sep 17 00:00:00 2001 From: buraksenn Date: Thu, 16 Apr 2026 17:12:08 +0300 Subject: [PATCH 09/10] revert unrelated submodule change --- testing | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing b/testing index 0d60ccae40d0e..7df2b70baf4f0 160000 --- a/testing +++ b/testing @@ -1 +1 @@ -Subproject commit 0d60ccae40d0e8f2d22c15fafb01c5d4be8c63a6 +Subproject commit 7df2b70baf4f081ebf8e0c6bd22745cf3cbfd824 From 4da4b948eb59a06f113e0297dc06bf2ad346a415 Mon Sep 17 00:00:00 2001 From: buraksenn Date: Thu, 16 Apr 2026 21:00:29 +0300 Subject: [PATCH 10/10] fix side effect failed test --- datafusion/sqllogictest/test_files/join.slt.part | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datafusion/sqllogictest/test_files/join.slt.part b/datafusion/sqllogictest/test_files/join.slt.part index b9d163d877596..551e40f83a6a1 100644 --- a/datafusion/sqllogictest/test_files/join.slt.part +++ b/datafusion/sqllogictest/test_files/join.slt.part @@ -1230,14 +1230,14 @@ drop table t2; statement ok create table t1(v1 int) as values(100); -## Query with Ambiguous column reference -query error DataFusion error: Schema error: Schema contains duplicate qualified field name t1\.v1 +## Query with duplicate table name in self-join (no aliases) +query error DataFusion error: Error during planning: duplicate table alias in FROM clause select count(*) from t1 right outer join t1 on t1.v1 > 0; -query error DataFusion error: Schema error: Schema contains duplicate qualified field name t1\.v1 +query error DataFusion error: Error during planning: duplicate table alias in FROM clause select t1.v1 from t1 join t1 using(v1) cross join (select struct('foo' as v1) as t1); statement ok