You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# left semi join should propagate constraint of left side as is.
3569
-
query IRR
3568
+
# left semi join with a nullable UNIQUE key cannot safely propagate the
3569
+
# constraint for expansion, because UNIQUE allows multiple NULLs.
3570
+
statement error DataFusion error: Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "l\.amount" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "l\.sn, sum\(l\.amount\)" appears in the SELECT clause satisfies this requirement
3570
3571
SELECT l.sn, l.amount, SUM(l.amount)
3571
3572
FROM (SELECT *
3572
3573
FROM sales_global_with_unique as l
3573
3574
LEFT SEMI JOIN sales_global_with_unique as r
3574
3575
ON l.amount >= r.amount + 10)
3575
3576
GROUP BY l.sn
3576
3577
ORDER BY l.sn
3577
-
----
3578
-
1 50 50
3579
-
2 75 75
3580
-
3 200 200
3581
-
4 100 100
3582
-
NULL 100 100
3583
3578
3584
-
# Similarly, left anti join should propagate constraint of left side as is.
3585
-
query IRR
3579
+
# Similarly, left anti join with a nullable UNIQUE key cannot safely propagate
3580
+
# the constraint for expansion.
3581
+
statement error DataFusion error: Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "l\.amount" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "l\.sn, sum\(l\.amount\)" appears in the SELECT clause satisfies this requirement
3586
3582
SELECT l.sn, l.amount, SUM(l.amount)
3587
3583
FROM (SELECT *
3588
3584
FROM sales_global_with_unique as l
3589
3585
LEFT ANTI JOIN sales_global_with_unique as r
3590
3586
ON l.amount >= r.amount + 10)
3591
3587
GROUP BY l.sn
3592
3588
ORDER BY l.sn
3593
-
----
3594
-
0 30 30
3595
3589
3596
3590
# Should support grouping by list column
3597
3591
query ?I
@@ -5641,3 +5635,32 @@ set datafusion.execution.target_partitions = 4;
5641
5635
5642
5636
statement count 0
5643
5637
drop table t;
5638
+
5639
+
# Test that GROUP BY with a UNIQUE constraint does not incorrectly collapse
5640
+
# NULL rows. UNIQUE allows multiple NULLs (NULLs are not equal in SQL), so
5641
+
# a UNIQUE column cannot be used to eliminate other GROUP BY columns.
5642
+
# Regression test for https://github.com/apache/datafusion/issues/21507
5643
+
5644
+
statement ok
5645
+
CREATE TABLE t_unique_null(a INT, b INT, c INT, UNIQUE(a));
0 commit comments