diff --git a/_local_files_and_notes/dropped_tests/03844_distributed_nested_alias_marker.reference b/_local_files_and_notes/dropped_tests/03844_distributed_nested_alias_marker.reference new file mode 100644 index 000000000000..7b05cb1e81a0 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03844_distributed_nested_alias_marker.reference @@ -0,0 +1,4 @@ +analyzer +x x +legacy +x x diff --git a/_local_files_and_notes/dropped_tests/03844_distributed_nested_alias_marker.sql b/_local_files_and_notes/dropped_tests/03844_distributed_nested_alias_marker.sql new file mode 100644 index 000000000000..b725acf38949 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03844_distributed_nested_alias_marker.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS test_nested_alias_dist; +DROP TABLE IF EXISTS test_nested_alias_local; + +CREATE TABLE test_nested_alias_local +( + dt DateTime64(3), + base String, + a String ALIAS base, + b String ALIAS a +) +ENGINE = MergeTree() +ORDER BY dt; + +INSERT INTO test_nested_alias_local VALUES ('1999-03-29T01:15:33', 'x'); + +CREATE TABLE test_nested_alias_dist AS test_nested_alias_local +ENGINE = Distributed('test_shard_localhost', currentDatabase(), test_nested_alias_local, rand()); + +SELECT 'analyzer'; +SELECT a, b +FROM test_nested_alias_dist +ORDER BY dt +LIMIT 1 +SETTINGS enable_analyzer = 1; + +SELECT 'legacy'; +SELECT a, b +FROM test_nested_alias_dist +ORDER BY dt +LIMIT 1 +SETTINGS enable_analyzer = 0; + +DROP TABLE test_nested_alias_dist; +DROP TABLE test_nested_alias_local; diff --git a/_local_files_and_notes/dropped_tests/03845_distributed_global_in_join_alias_chain.reference b/_local_files_and_notes/dropped_tests/03845_distributed_global_in_join_alias_chain.reference new file mode 100644 index 000000000000..325078d71cc1 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03845_distributed_global_in_join_alias_chain.reference @@ -0,0 +1,8 @@ +rewrite_in +1 +1 +rewrite_join +1 +1 +1 +1 diff --git a/_local_files_and_notes/dropped_tests/03845_distributed_global_in_join_alias_chain.sql b/_local_files_and_notes/dropped_tests/03845_distributed_global_in_join_alias_chain.sql new file mode 100644 index 000000000000..9bd95d72fd20 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03845_distributed_global_in_join_alias_chain.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS test_global_alias_chain_dist; +DROP TABLE IF EXISTS test_global_alias_chain_local; + +CREATE TABLE test_global_alias_chain_local +( + id UInt64, + base UInt64, + a UInt64 ALIAS base, + b UInt64 ALIAS a +) +ENGINE = MergeTree() +ORDER BY id; + +INSERT INTO test_global_alias_chain_local VALUES (1, 1); + +CREATE TABLE test_global_alias_chain_dist AS test_global_alias_chain_local +ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), test_global_alias_chain_local, rand()); + +SELECT 'rewrite_in'; +SELECT id +FROM test_global_alias_chain_dist +WHERE id IN (SELECT b FROM test_global_alias_chain_dist) +ORDER BY id +SETTINGS enable_analyzer = 1, distributed_product_mode = 'global'; + +SELECT 'rewrite_join'; +SELECT l.id +FROM test_global_alias_chain_dist AS l +INNER JOIN (SELECT b FROM test_global_alias_chain_dist) AS r ON l.id = r.b +ORDER BY l.id +SETTINGS enable_analyzer = 1, distributed_product_mode = 'global'; + +DROP TABLE test_global_alias_chain_dist; +DROP TABLE test_global_alias_chain_local; diff --git a/_local_files_and_notes/dropped_tests/03846_distributed_global_in_alias_marker_collision.reference b/_local_files_and_notes/dropped_tests/03846_distributed_global_in_alias_marker_collision.reference new file mode 100644 index 000000000000..9a3a29a69ce8 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03846_distributed_global_in_alias_marker_collision.reference @@ -0,0 +1,2 @@ +global_in_collision_check +1 diff --git a/_local_files_and_notes/dropped_tests/03846_distributed_global_in_alias_marker_collision.sql b/_local_files_and_notes/dropped_tests/03846_distributed_global_in_alias_marker_collision.sql new file mode 100644 index 000000000000..d47e6a304ba1 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03846_distributed_global_in_alias_marker_collision.sql @@ -0,0 +1,56 @@ +DROP TABLE IF EXISTS test_marker_collision_dist; +DROP TABLE IF EXISTS test_marker_collision_main; +DROP TABLE IF EXISTS test_marker_collision_left; +DROP TABLE IF EXISTS test_marker_collision_right; + +CREATE TABLE test_marker_collision_main +( + id UInt64 +) +ENGINE = MergeTree() +ORDER BY id; + +INSERT INTO test_marker_collision_main VALUES (1); + +CREATE TABLE test_marker_collision_left +( + id UInt64, + x UInt64, + b UInt64 ALIAS x +) +ENGINE = MergeTree() +ORDER BY id; + +CREATE TABLE test_marker_collision_right +( + id UInt64, + y UInt64, + b UInt64 ALIAS y +) +ENGINE = MergeTree() +ORDER BY id; + +INSERT INTO test_marker_collision_left VALUES (1, 1); +INSERT INTO test_marker_collision_right VALUES (1, 20); + +CREATE TABLE test_marker_collision_dist AS test_marker_collision_main +ENGINE = Distributed('test_shard_localhost', currentDatabase(), test_marker_collision_main, rand()); + +SELECT 'global_in_collision_check'; +SELECT id +FROM test_marker_collision_dist +WHERE id GLOBAL IN +( + SELECT test_marker_collision_left.id + FROM test_marker_collision_left + INNER JOIN test_marker_collision_right + ON test_marker_collision_left.id = test_marker_collision_right.id + WHERE test_marker_collision_left.b + test_marker_collision_right.b = 21 +) +ORDER BY id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1; + +DROP TABLE test_marker_collision_dist; +DROP TABLE test_marker_collision_main; +DROP TABLE test_marker_collision_left; +DROP TABLE test_marker_collision_right; diff --git a/_local_files_and_notes/dropped_tests/03924_hybrid_unknown_table_exact_schema.reference b/_local_files_and_notes/dropped_tests/03924_hybrid_unknown_table_exact_schema.reference new file mode 100644 index 000000000000..dcebc31194b1 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03924_hybrid_unknown_table_exact_schema.reference @@ -0,0 +1,30 @@ +merge_tree_row_count +5 +iceberg_row_count +5 +hybrid_row_count +5 +true 8751 7291.267979503492 Frank 2024-01-01 06:00:00.000000 2024-01-01 43200000000 2024-01-01 12:00:00.000000 5313 8428.52 456.78 +false 3611 4492.090462838536 Isaac 2024-01-01 06:00:00.000000 2024-01-01 43200000000 2024-01-01 12:00:00.000000 4552 1554.795 456.78 +true 1519 3799.273006373374 Louis 2024-01-01 06:00:00.000000 2024-01-01 43200000000 2024-01-01 12:00:00.000000 8785 1248.2616 456.78 +true 1654 3801.2622503916614 Oliver 2024-01-01 06:00:00.000000 2024-01-01 43200000000 2024-01-01 12:00:00.000000 3432 6701.752 456.78 +true 9044 2931.782814070929 William 2024-01-01 06:00:00.000000 2024-01-01 43200000000 2024-01-01 12:00:00.000000 3733 7730.6836 456.78 +issue_1208_join_hybrid_mt_local +Frank 8751 8751 +Isaac 3611 3611 +Louis 1519 1519 +Oliver 1654 1654 +William 9044 9044 +issue_1208_join_hybrid_mt_allow +Frank 8751 8751 +Isaac 3611 3611 +Louis 1519 1519 +Oliver 1654 1654 +William 9044 9044 +issue_1208_join_hybrid_mt_iceberg_local +issue_1208_join_hybrid_mt_iceberg_allow +Frank 8751 8751 8751 +Isaac 3611 3611 3611 +Louis 1519 1519 1519 +Oliver 1654 1654 1654 +William 9044 9044 9044 diff --git a/_local_files_and_notes/dropped_tests/03924_hybrid_unknown_table_exact_schema.sql b/_local_files_and_notes/dropped_tests/03924_hybrid_unknown_table_exact_schema.sql new file mode 100644 index 000000000000..a37c991684b4 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03924_hybrid_unknown_table_exact_schema.sql @@ -0,0 +1,329 @@ +SET allow_experimental_hybrid_table = 1, + enable_analyzer = 1, + prefer_localhost_replica = 0, + iceberg_delete_data_on_drop = 1; + +DROP TABLE IF EXISTS hybrid_table; +DROP TABLE IF EXISTS merge_tree_table_b9faf88a_d5d3_11f0_b816_e0c26496f172; +DROP TABLE IF EXISTS iceberg_table_b4bd039e_d5d3_11f0_8208_e0c26496f172; +DROP TABLE IF EXISTS merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172; +DROP TABLE IF EXISTS hybrid_table_64293f1a_0cba_11f1_876b_de7b9eea3490; +DROP TABLE IF EXISTS merge_tree_table_640a9b6e_0cba_11f1_876b_de7b9eea3490; +DROP TABLE IF EXISTS database_39afd42b_d5d6_11f0_b919_e0c26496f172.`namespace_39afe1b3_d5d6_11f0_9b00_e0c26496f172.table_39afe20a_d5d6_11f0_8208_e0c26496f172`; +DROP DATABASE IF EXISTS database_39afd42b_d5d6_11f0_b919_e0c26496f172; + +CREATE TABLE merge_tree_table_b9faf88a_d5d3_11f0_b816_e0c26496f172 +( + boolean_col Nullable(Bool), + long_col Nullable(Int64), + double_col Nullable(Float64), + string_col String, + timestamp_col Nullable(DateTime64(6)), + date_col Nullable(Date), + time_col Nullable(Int64), + timestamptz_col Nullable(DateTime64(6, 'UTC')), + integer_col Nullable(Int32), + float_col Nullable(Float32), + decimal_col Nullable(Decimal(10, 2)) +) +ENGINE = MergeTree +PARTITION BY string_col +ORDER BY tuple() +SETTINGS index_granularity = 8192; + +INSERT INTO merge_tree_table_b9faf88a_d5d3_11f0_b816_e0c26496f172 VALUES + (true, 9044, 2931.782814070929, 'William', toDateTime64('2024-01-01 06:00:00', 6), toDate('2024-01-01'), 43200000000, toDateTime64('2024-01-01 12:00:00', 6, 'UTC'), 3733, 7730.6836, 456.78), + (true, 1654, 3801.2622503916614, 'Oliver', toDateTime64('2024-01-01 06:00:00', 6), toDate('2024-01-01'), 43200000000, toDateTime64('2024-01-01 12:00:00', 6, 'UTC'), 3432, 6701.752, 456.78), + (true, 8751, 7291.267979503492, 'Frank', toDateTime64('2024-01-01 06:00:00', 6), toDate('2024-01-01'), 43200000000, toDateTime64('2024-01-01 12:00:00', 6, 'UTC'), 5313, 8428.52, 456.78), + (true, 1519, 3799.273006373374, 'Louis', toDateTime64('2024-01-01 06:00:00', 6), toDate('2024-01-01'), 43200000000, toDateTime64('2024-01-01 12:00:00', 6, 'UTC'), 8785, 1248.2616, 456.78), + (false, 3611, 4492.090462838536, 'Isaac', toDateTime64('2024-01-01 06:00:00', 6), toDate('2024-01-01'), 43200000000, toDateTime64('2024-01-01 12:00:00', 6, 'UTC'), 4552, 1554.795, 456.78); + +SELECT 'merge_tree_row_count'; +SELECT count() FROM merge_tree_table_b9faf88a_d5d3_11f0_b816_e0c26496f172; + +CREATE TABLE iceberg_table_b4bd039e_d5d3_11f0_8208_e0c26496f172 +( + boolean_col Nullable(Int32), + long_col Nullable(Int64), + double_col Nullable(Float64), + string_col String, + timestamp_col Nullable(DateTime64(6)), + date_col Nullable(Date), + time_col Nullable(Int64), + timestamptz_col Nullable(DateTime64(6, 'UTC')), + integer_col Nullable(Int32), + float_col Nullable(Float32), + decimal_col Nullable(Float64) +) +ENGINE = IcebergS3( + s3_conn, + filename = concat('hybrid_unknown_table_exact_schema_03924/', currentDatabase(), '/iceberg_table') +); + +INSERT INTO iceberg_table_b4bd039e_d5d3_11f0_8208_e0c26496f172 SETTINGS allow_experimental_insert_into_iceberg = 1, write_full_path_in_iceberg_metadata = 1 VALUES + (1, 9044, 2931.782814070929, 'William', toDateTime64('2024-01-01 06:00:00', 6), toDate('2024-01-01'), 43200000000, toDateTime64('2024-01-01 12:00:00', 6, 'UTC'), 3733, 7730.6836, 456.78), + (1, 1654, 3801.2622503916614, 'Oliver', toDateTime64('2024-01-01 06:00:00', 6), toDate('2024-01-01'), 43200000000, toDateTime64('2024-01-01 12:00:00', 6, 'UTC'), 3432, 6701.752, 456.78), + (1, 8751, 7291.267979503492, 'Frank', toDateTime64('2024-01-01 06:00:00', 6), toDate('2024-01-01'), 43200000000, toDateTime64('2024-01-01 12:00:00', 6, 'UTC'), 5313, 8428.52, 456.78), + (1, 1519, 3799.273006373374, 'Louis', toDateTime64('2024-01-01 06:00:00', 6), toDate('2024-01-01'), 43200000000, toDateTime64('2024-01-01 12:00:00', 6, 'UTC'), 8785, 1248.2616, 456.78), + (0, 3611, 4492.090462838536, 'Isaac', toDateTime64('2024-01-01 06:00:00', 6), toDate('2024-01-01'), 43200000000, toDateTime64('2024-01-01 12:00:00', 6, 'UTC'), 4552, 1554.795, 456.78); + +SELECT 'iceberg_row_count'; +SELECT count() FROM iceberg_table_b4bd039e_d5d3_11f0_8208_e0c26496f172; + +CREATE TABLE hybrid_table +( + boolean_col Nullable(Bool), + long_col Nullable(Int64), + double_col Nullable(Float64), + string_col String, + timestamp_col Nullable(DateTime64(6)), + date_col Nullable(Date), + time_col Nullable(Int64), + timestamptz_col Nullable(DateTime64(6, 'UTC')), + integer_col Nullable(Int32), + float_col Nullable(Float32), + decimal_col Nullable(Decimal(10, 2)) +) +ENGINE = Hybrid( + remote('localhost', currentDatabase(), 'merge_tree_table_b9faf88a_d5d3_11f0_b816_e0c26496f172'), + date_col <= '2024-01-01', + icebergCluster( + 'test_cluster_one_shard_three_replicas_localhost', + concat('http://localhost:11111/test/hybrid_unknown_table_exact_schema_03924/', currentDatabase(), '/iceberg_table/'), + 'test', + 'testtest' + ), + date_col > '2024-01-01' +); + +SELECT 'hybrid_row_count'; +SELECT count() FROM hybrid_table; + +CREATE TABLE merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172 +( + boolean_col Nullable(Bool), + long_col Nullable(Int64), + double_col Nullable(Float64), + string_col String, + timestamp_col Nullable(DateTime64(6)), + date_col Nullable(Date), + time_col Nullable(Int64), + timestamptz_col Nullable(DateTime64(6, 'UTC')), + integer_col Nullable(Int32), + float_col Nullable(Float32), + decimal_col Nullable(Decimal(10, 2)) +) +ENGINE = MergeTree +PARTITION BY string_col +ORDER BY tuple() +SETTINGS index_granularity = 8192; + +INSERT INTO merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172 +SELECT * FROM merge_tree_table_b9faf88a_d5d3_11f0_b816_e0c26496f172; + +CREATE DATABASE database_39afd42b_d5d6_11f0_b919_e0c26496f172; + +CREATE TABLE database_39afd42b_d5d6_11f0_b919_e0c26496f172.`namespace_39afe1b3_d5d6_11f0_9b00_e0c26496f172.table_39afe20a_d5d6_11f0_8208_e0c26496f172` +( + boolean_col Nullable(Int32), + long_col Nullable(Int64), + double_col Nullable(Float64), + string_col String, + timestamp_col Nullable(DateTime64(6)), + date_col Nullable(Date), + time_col Nullable(Int64), + timestamptz_col Nullable(DateTime64(6, 'UTC')), + integer_col Nullable(Int32), + float_col Nullable(Float32), + decimal_col Nullable(Float64) +) +ENGINE = IcebergS3( + s3_conn, + filename = concat('hybrid_unknown_table_exact_schema_03924/', currentDatabase(), '/iceberg_table_39afe20a_d5d6_11f0_8208_e0c26496f172') +); + +INSERT INTO database_39afd42b_d5d6_11f0_b919_e0c26496f172.`namespace_39afe1b3_d5d6_11f0_9b00_e0c26496f172.table_39afe20a_d5d6_11f0_8208_e0c26496f172` +SETTINGS allow_experimental_insert_into_iceberg = 1, write_full_path_in_iceberg_metadata = 1 +SELECT + toInt32(boolean_col), + long_col, + double_col, + string_col, + timestamp_col, + date_col, + time_col, + timestamptz_col, + integer_col, + float_col, + toFloat64(decimal_col) +FROM merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172; + +SELECT * +FROM hybrid_table +WHERE string_col IN +( + SELECT DISTINCT string_col + FROM hybrid_table + WHERE long_col > 1500 +) +ORDER BY string_col; + +SELECT 'issue_1208_join_hybrid_mt_local'; +SELECT + h.string_col, + h.long_col AS hybrid_long, + m.long_col AS mt_long +FROM hybrid_table AS h +FULL OUTER JOIN merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172 AS m ON h.string_col = m.string_col +ORDER BY h.string_col +LIMIT 10 +SETTINGS object_storage_cluster_join_mode = 'local'; + +SELECT 'issue_1208_join_hybrid_mt_allow'; +SELECT + h.string_col, + h.long_col AS hybrid_long, + m.long_col AS mt_long +FROM hybrid_table AS h +FULL OUTER JOIN merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172 AS m ON h.string_col = m.string_col +ORDER BY h.string_col +LIMIT 10 +SETTINGS object_storage_cluster_join_mode = 'allow'; + +SELECT 'issue_1208_join_hybrid_mt_iceberg_local'; +SELECT + h.string_col, + h.long_col AS hybrid_long, + m.long_col AS mt_long, + i.long_col AS iceberg_long +FROM hybrid_table AS h +FULL OUTER JOIN merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172 AS m ON h.string_col = m.string_col +FULL OUTER JOIN database_39afd42b_d5d6_11f0_b919_e0c26496f172.`namespace_39afe1b3_d5d6_11f0_9b00_e0c26496f172.table_39afe20a_d5d6_11f0_8208_e0c26496f172` AS i ON h.string_col = i.string_col +ORDER BY h.string_col +LIMIT 10 +SETTINGS object_storage_cluster_join_mode = 'local'; -- { serverError UNKNOWN_IDENTIFIER } + +SELECT 'issue_1208_join_hybrid_mt_iceberg_allow'; +SELECT + h.string_col, + h.long_col AS hybrid_long, + m.long_col AS mt_long, + i.long_col AS iceberg_long +FROM hybrid_table AS h +FULL OUTER JOIN merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172 AS m ON h.string_col = m.string_col +FULL OUTER JOIN database_39afd42b_d5d6_11f0_b919_e0c26496f172.`namespace_39afe1b3_d5d6_11f0_9b00_e0c26496f172.table_39afe20a_d5d6_11f0_8208_e0c26496f172` AS i ON h.string_col = i.string_col +ORDER BY h.string_col +LIMIT 10 +SETTINGS object_storage_cluster_join_mode = 'allow'; + +-- Exact issue-shape queries (no ORDER BY), deterministic output via FORMAT Null. +SELECT + h.string_col, + h.long_col AS hybrid_long, + m.long_col AS mt_long +FROM hybrid_table AS h +FULL OUTER JOIN merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172 AS m ON h.string_col = m.string_col +LIMIT 10 +SETTINGS object_storage_cluster_join_mode = 'local' +FORMAT Null; + +SELECT + h.string_col, + h.long_col AS hybrid_long, + m.long_col AS mt_long +FROM hybrid_table AS h +FULL OUTER JOIN merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172 AS m ON h.string_col = m.string_col +LIMIT 10 +SETTINGS object_storage_cluster_join_mode = 'allow' +FORMAT Null; + +SELECT + h.string_col, + h.long_col AS hybrid_long, + m.long_col AS mt_long, + i.long_col AS iceberg_long +FROM hybrid_table AS h +FULL OUTER JOIN merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172 AS m ON h.string_col = m.string_col +FULL OUTER JOIN database_39afd42b_d5d6_11f0_b919_e0c26496f172.`namespace_39afe1b3_d5d6_11f0_9b00_e0c26496f172.table_39afe20a_d5d6_11f0_8208_e0c26496f172` AS i ON h.string_col = i.string_col +LIMIT 10 +SETTINGS object_storage_cluster_join_mode = 'local' +FORMAT Null; -- { serverError UNKNOWN_IDENTIFIER } + +SELECT + h.string_col, + h.long_col AS hybrid_long, + m.long_col AS mt_long, + i.long_col AS iceberg_long +FROM hybrid_table AS h +FULL OUTER JOIN merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172 AS m ON h.string_col = m.string_col +FULL OUTER JOIN database_39afd42b_d5d6_11f0_b919_e0c26496f172.`namespace_39afe1b3_d5d6_11f0_9b00_e0c26496f172.table_39afe20a_d5d6_11f0_8208_e0c26496f172` AS i ON h.string_col = i.string_col +LIMIT 10 +SETTINGS object_storage_cluster_join_mode = 'allow' +FORMAT Null; + +CREATE TABLE merge_tree_table_640a9b6e_0cba_11f1_876b_de7b9eea3490 +( + boolean_col Nullable(Bool), + long_col Nullable(Int64), + double_col Nullable(Float64), + string_col String, + timestamp_col Nullable(DateTime64(6)), + date_col Nullable(Date), + time_col Nullable(Int64), + timestamptz_col Nullable(DateTime64(6, 'UTC')), + integer_col Nullable(Int32), + float_col Nullable(Float32), + decimal_col Nullable(Decimal(10, 2)) +) +ENGINE = MergeTree +PARTITION BY string_col +ORDER BY tuple() +SETTINGS index_granularity = 8192; + +INSERT INTO merge_tree_table_640a9b6e_0cba_11f1_876b_de7b9eea3490 +SELECT * FROM merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172; + +CREATE TABLE hybrid_table_64293f1a_0cba_11f1_876b_de7b9eea3490 +( + boolean_col Nullable(Bool), + long_col Nullable(Int64), + double_col Nullable(Float64), + string_col String, + timestamp_col Nullable(DateTime64(6)), + date_col Nullable(Date), + time_col Nullable(Int64), + timestamptz_col Nullable(DateTime64(6, 'UTC')), + integer_col Nullable(Int32), + float_col Nullable(Float32), + decimal_col Nullable(Decimal(10, 2)) +) +ENGINE = Hybrid( + remote('localhost', currentDatabase(), 'merge_tree_table_b9faf88a_d5d3_11f0_b816_e0c26496f172'), + date_col <= '2024-01-01', + icebergCluster( + 'test_cluster_one_shard_three_replicas_localhost', + concat('http://localhost:11111/test/hybrid_unknown_table_exact_schema_03924/', currentDatabase(), '/iceberg_table/'), + 'test', + 'testtest' + ), + date_col > '2024-01-01' +); + +SELECT * +FROM hybrid_table_64293f1a_0cba_11f1_876b_de7b9eea3490 +WHERE string_col IN +( + SELECT DISTINCT string_col + FROM merge_tree_table_640a9b6e_0cba_11f1_876b_de7b9eea3490 + WHERE long_col > 1500 +) +FORMAT Null; + +DROP TABLE hybrid_table; +DROP TABLE merge_tree_table_b9faf88a_d5d3_11f0_b816_e0c26496f172; +DROP TABLE iceberg_table_b4bd039e_d5d3_11f0_8208_e0c26496f172; +DROP TABLE merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172; +DROP TABLE hybrid_table_64293f1a_0cba_11f1_876b_de7b9eea3490; +DROP TABLE merge_tree_table_640a9b6e_0cba_11f1_876b_de7b9eea3490; +DROP TABLE database_39afd42b_d5d6_11f0_b919_e0c26496f172.`namespace_39afe1b3_d5d6_11f0_9b00_e0c26496f172.table_39afe20a_d5d6_11f0_8208_e0c26496f172`; +DROP DATABASE database_39afd42b_d5d6_11f0_b919_e0c26496f172; diff --git a/_local_files_and_notes/dropped_tests/03925_distributed_alias_column_swap_without_marker.reference b/_local_files_and_notes/dropped_tests/03925_distributed_alias_column_swap_without_marker.reference new file mode 100644 index 000000000000..1e2e9b11750a --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03925_distributed_alias_column_swap_without_marker.reference @@ -0,0 +1,56 @@ +prefer_localhost_replica_0_uint64 +x a_num inner_c +1 1 2 +1 1 2 +1 1 2 +1 1 2 +2 1 3 +2 1 3 +2 1 3 +2 1 3 +10 1 11 +10 1 11 +10 1 11 +10 1 11 +prefer_localhost_replica_0_string +x a_str inner_c +1 aaaa 2 +1 aaaa 2 +1 aaaa 2 +1 aaaa 2 +2 aaaa 3 +2 aaaa 3 +2 aaaa 3 +2 aaaa 3 +10 aaaa 11 +10 aaaa 11 +10 aaaa 11 +10 aaaa 11 +prefer_localhost_replica_1_uint64 +x a_num inner_c +1 1 2 +1 1 2 +1 1 2 +1 1 2 +2 1 3 +2 1 3 +2 1 3 +2 1 3 +10 1 11 +10 1 11 +10 1 11 +10 1 11 +prefer_localhost_replica_1_string +x a_str inner_c +1 aaaa 2 +1 aaaa 2 +1 aaaa 2 +1 aaaa 2 +2 aaaa 3 +2 aaaa 3 +2 aaaa 3 +2 aaaa 3 +10 aaaa 11 +10 aaaa 11 +10 aaaa 11 +10 aaaa 11 diff --git a/_local_files_and_notes/dropped_tests/03925_distributed_alias_column_swap_without_marker.sql b/_local_files_and_notes/dropped_tests/03925_distributed_alias_column_swap_without_marker.sql new file mode 100644 index 000000000000..84ad3cf170d0 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03925_distributed_alias_column_swap_without_marker.sql @@ -0,0 +1,96 @@ +DROP TABLE IF EXISTS test_dod_alias_swap_no_marker_outer; +DROP TABLE IF EXISTS test_dod_alias_swap_no_marker_inner; +DROP TABLE IF EXISTS test_dod_alias_swap_no_marker_local; + +CREATE TABLE test_dod_alias_swap_no_marker_local +( + x UInt64 +) +ENGINE = MergeTree() +ORDER BY x; + +INSERT INTO test_dod_alias_swap_no_marker_local VALUES (1), (2), (10); + +CREATE TABLE test_dod_alias_swap_no_marker_inner +( + x UInt64, + inner_c UInt64 ALIAS x + 1 +) +ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), test_dod_alias_swap_no_marker_local); + +CREATE TABLE test_dod_alias_swap_no_marker_outer +( + x UInt64, + inner_c UInt64, + a_num UInt64 ALIAS 1, + a_str String ALIAS 'aaaa' +) +ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), test_dod_alias_swap_no_marker_inner); + +SELECT 'prefer_localhost_replica_0_uint64'; +SELECT + x, + a_num, + inner_c +FROM test_dod_alias_swap_no_marker_outer +ORDER BY x +SETTINGS + allow_experimental_analyzer = 1, + enable_alias_marker = 0, + prefer_localhost_replica = 0, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +SELECT 'prefer_localhost_replica_0_string'; +SELECT + x, + a_str, + inner_c +FROM test_dod_alias_swap_no_marker_outer +ORDER BY x +SETTINGS + allow_experimental_analyzer = 1, + enable_alias_marker = 0, + prefer_localhost_replica = 0, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +SELECT 'prefer_localhost_replica_1_uint64'; +SELECT + x, + a_num, + inner_c +FROM test_dod_alias_swap_no_marker_outer +ORDER BY x +SETTINGS + allow_experimental_analyzer = 1, + enable_alias_marker = 0, + prefer_localhost_replica = 1, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +SELECT 'prefer_localhost_replica_1_string'; +SELECT + x, + a_str, + inner_c +FROM test_dod_alias_swap_no_marker_outer +ORDER BY x +SETTINGS + allow_experimental_analyzer = 1, + enable_alias_marker = 0, + prefer_localhost_replica = 1, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +DROP TABLE test_dod_alias_swap_no_marker_outer; +DROP TABLE test_dod_alias_swap_no_marker_inner; +DROP TABLE test_dod_alias_swap_no_marker_local; diff --git a/_local_files_and_notes/dropped_tests/03926_parallel_replicas_dod_alias_column_swap.reference b/_local_files_and_notes/dropped_tests/03926_parallel_replicas_dod_alias_column_swap.reference new file mode 100644 index 000000000000..228ac5f667f7 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03926_parallel_replicas_dod_alias_column_swap.reference @@ -0,0 +1,20 @@ +no_pr_uint64 +x a_num inner_c +1 1 2 +2 1 3 +10 1 11 +no_pr_string +x a_str inner_c +1 aaaa 2 +2 aaaa 3 +10 aaaa 11 +pr_uint64 +x a_num inner_c +1 1 2 +2 1 3 +10 1 11 +pr_string +x a_str inner_c +1 aaaa 2 +2 aaaa 3 +10 aaaa 11 diff --git a/_local_files_and_notes/dropped_tests/03926_parallel_replicas_dod_alias_column_swap.sql b/_local_files_and_notes/dropped_tests/03926_parallel_replicas_dod_alias_column_swap.sql new file mode 100644 index 000000000000..070330e98826 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03926_parallel_replicas_dod_alias_column_swap.sql @@ -0,0 +1,94 @@ +DROP TABLE IF EXISTS test_pr_dod_alias_swap_outer; +DROP TABLE IF EXISTS test_pr_dod_alias_swap_inner; +DROP TABLE IF EXISTS test_pr_dod_alias_swap_local; + +CREATE TABLE test_pr_dod_alias_swap_local +( + x UInt64 +) +ENGINE = MergeTree() +ORDER BY x; + +INSERT INTO test_pr_dod_alias_swap_local VALUES (1), (2), (10); + +CREATE TABLE test_pr_dod_alias_swap_inner +( + x UInt64, + inner_c UInt64 ALIAS x + 1 +) +ENGINE = Distributed(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), test_pr_dod_alias_swap_local); + +CREATE TABLE test_pr_dod_alias_swap_outer +( + x UInt64, + inner_c UInt64, + a_num UInt64 ALIAS 1, + a_str String ALIAS 'aaaa' +) +ENGINE = Distributed(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), test_pr_dod_alias_swap_inner); + +SELECT 'no_pr_uint64'; +SELECT x, a_num, inner_c +FROM test_pr_dod_alias_swap_outer +ORDER BY x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 0, + enable_parallel_replicas = 0, + allow_experimental_parallel_reading_from_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0, + parallel_replicas_for_non_replicated_merge_tree = 1, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost' +FORMAT TSVWithNames; + +SELECT 'no_pr_string'; +SELECT x, a_str, inner_c +FROM test_pr_dod_alias_swap_outer +ORDER BY x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 0, + enable_parallel_replicas = 0, + allow_experimental_parallel_reading_from_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0, + parallel_replicas_for_non_replicated_merge_tree = 1, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost' +FORMAT TSVWithNames; + +SELECT 'pr_uint64'; +SELECT x, a_num, inner_c +FROM test_pr_dod_alias_swap_outer +ORDER BY x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 0, + enable_parallel_replicas = 2, + allow_experimental_parallel_reading_from_replicas = 2, + max_parallel_replicas = 3, + parallel_replicas_local_plan = 1, + parallel_replicas_for_non_replicated_merge_tree = 1, + parallel_replicas_min_number_of_rows_per_replica = 0, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost' +FORMAT TSVWithNames; + +SELECT 'pr_string'; +SELECT x, a_str, inner_c +FROM test_pr_dod_alias_swap_outer +ORDER BY x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 0, + enable_parallel_replicas = 2, + allow_experimental_parallel_reading_from_replicas = 2, + max_parallel_replicas = 3, + parallel_replicas_local_plan = 1, + parallel_replicas_for_non_replicated_merge_tree = 1, + parallel_replicas_min_number_of_rows_per_replica = 0, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost' +FORMAT TSVWithNames; + +DROP TABLE test_pr_dod_alias_swap_outer; +DROP TABLE test_pr_dod_alias_swap_inner; +DROP TABLE test_pr_dod_alias_swap_local; diff --git a/_local_files_and_notes/dropped_tests/03927_distributed_alias_marker_explicit_column_swap.reference b/_local_files_and_notes/dropped_tests/03927_distributed_alias_marker_explicit_column_swap.reference new file mode 100644 index 000000000000..f3797cb0ce0e --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03927_distributed_alias_marker_explicit_column_swap.reference @@ -0,0 +1,32 @@ +prefer_localhost_replica_0_uint64 +a inner_c +1 2 +1 2 +1 3 +1 3 +1 11 +1 11 +prefer_localhost_replica_0_string +a inner_c +aaaa 2 +aaaa 2 +aaaa 3 +aaaa 3 +aaaa 11 +aaaa 11 +prefer_localhost_replica_1_uint64 +a inner_c +1 2 +1 2 +1 3 +1 3 +1 11 +1 11 +prefer_localhost_replica_1_string +a inner_c +aaaa 2 +aaaa 2 +aaaa 3 +aaaa 3 +aaaa 11 +aaaa 11 diff --git a/_local_files_and_notes/dropped_tests/03927_distributed_alias_marker_explicit_column_swap.sql b/_local_files_and_notes/dropped_tests/03927_distributed_alias_marker_explicit_column_swap.sql new file mode 100644 index 000000000000..50085531b6dd --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03927_distributed_alias_marker_explicit_column_swap.sql @@ -0,0 +1,82 @@ +DROP TABLE IF EXISTS test_dod_alias_swap_local; +DROP TABLE IF EXISTS test_dod_alias_swap_inner; + +CREATE TABLE test_dod_alias_swap_local +( + x UInt64 +) +ENGINE = MergeTree() +ORDER BY x; + +INSERT INTO test_dod_alias_swap_local VALUES (1), (2), (10); + +CREATE TABLE test_dod_alias_swap_inner +( + x UInt64, + a UInt64 ALIAS 2, + inner_c UInt64 ALIAS x + 1 +) +ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), test_dod_alias_swap_local); + +SELECT 'prefer_localhost_replica_0_uint64'; +SELECT + __aliasMarker(_CAST(1, 'UInt64'), '__table1.a') AS a, + __table1.inner_c AS inner_c +FROM test_dod_alias_swap_inner AS __table1 +ORDER BY __table1.x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 1, + prefer_localhost_replica = 0, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +SELECT 'prefer_localhost_replica_0_string'; +SELECT + __aliasMarker(_CAST('aaaa', 'String'), '__table1.a') AS a, + __table1.inner_c AS inner_c +FROM test_dod_alias_swap_inner AS __table1 +ORDER BY __table1.x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 1, + prefer_localhost_replica = 0, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +SELECT 'prefer_localhost_replica_1_uint64'; +SELECT + __aliasMarker(_CAST(1, 'UInt64'), '__table1.a') AS a, + __table1.inner_c AS inner_c +FROM test_dod_alias_swap_inner AS __table1 +ORDER BY __table1.x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 1, + prefer_localhost_replica = 1, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +SELECT 'prefer_localhost_replica_1_string'; +SELECT + __aliasMarker(_CAST('aaaa', 'String'), '__table1.a') AS a, + __table1.inner_c AS inner_c +FROM test_dod_alias_swap_inner AS __table1 +ORDER BY __table1.x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 1, + prefer_localhost_replica = 1, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +DROP TABLE test_dod_alias_swap_inner; +DROP TABLE test_dod_alias_swap_local; diff --git a/_local_files_and_notes/dropped_tests/03930_distributed_alias_swap_planner.reference b/_local_files_and_notes/dropped_tests/03930_distributed_alias_swap_planner.reference new file mode 100644 index 000000000000..402cc360bae5 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03930_distributed_alias_swap_planner.reference @@ -0,0 +1,15 @@ +local +11 12 +21 22 +dist_prefer0 +11 12 +21 22 +dist_prefer1 +11 12 +21 22 +dist_prefer0_plan +11 12 +21 22 +dist_prefer1_plan +11 12 +21 22 diff --git a/_local_files_and_notes/dropped_tests/03930_distributed_alias_swap_planner.sql b/_local_files_and_notes/dropped_tests/03930_distributed_alias_swap_planner.sql new file mode 100644 index 000000000000..848f35b0be14 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03930_distributed_alias_swap_planner.sql @@ -0,0 +1,34 @@ +-- Plain Distributed (no Hybrid). Two nested ALIAS columns: a2 contains a1's subexpression, +-- so planner CSE may reorder the remote header. Correct result must equal the single-node +-- ('local') result across every transport variant. +DROP TABLE IF EXISTS t_local_03930; +DROP TABLE IF EXISTS t_dist_03930; + +CREATE TABLE t_local_03930 (x UInt32, a1 UInt32 ALIAS x + 1, a2 UInt32 ALIAS a1 + 1) +ENGINE = MergeTree ORDER BY x; +INSERT INTO t_local_03930 VALUES (10), (20); + +CREATE TABLE t_dist_03930 AS t_local_03930 +ENGINE = Distributed(test_shard_localhost, currentDatabase(), t_local_03930); + +SELECT 'local'; +SELECT a1, a2 FROM t_local_03930 ORDER BY a1; + +SELECT 'dist_prefer0'; +SELECT a1, a2 FROM t_dist_03930 ORDER BY a1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0; + +SELECT 'dist_prefer1'; +SELECT a1, a2 FROM t_dist_03930 ORDER BY a1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 1; + +SELECT 'dist_prefer0_plan'; +SELECT a1, a2 FROM t_dist_03930 ORDER BY a1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0, serialize_query_plan = 1; + +SELECT 'dist_prefer1_plan'; +SELECT a1, a2 FROM t_dist_03930 ORDER BY a1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 1, serialize_query_plan = 1; + +DROP TABLE t_dist_03930; +DROP TABLE t_local_03930; diff --git a/_local_files_and_notes/dropped_tests/03931_parallel_replicas_alias_swap.reference b/_local_files_and_notes/dropped_tests/03931_parallel_replicas_alias_swap.reference new file mode 100644 index 000000000000..14f9c770f714 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03931_parallel_replicas_alias_swap.reference @@ -0,0 +1,9 @@ +local +10 11 12 +20 21 22 +pr_ast +10 11 12 +20 21 22 +pr_plan +10 11 12 +20 21 22 diff --git a/_local_files_and_notes/dropped_tests/03931_parallel_replicas_alias_swap.sql b/_local_files_and_notes/dropped_tests/03931_parallel_replicas_alias_swap.sql new file mode 100644 index 000000000000..f669631889c2 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03931_parallel_replicas_alias_swap.sql @@ -0,0 +1,37 @@ +-- Plain Distributed + parallel replicas (no Hybrid). Exercises the findParallelReplicasQuery +-- header reconciliation path with nested ALIAS columns. Correct result equals the single-node +-- ('local') result for both AST and serialized-plan transport. +-- +-- Determinism note: parallel replicas over a small non-replicated table can read the same rows on +-- several replicas under some (randomized) settings, duplicating output. GROUP BY x, a1, a2 +-- deduplicates that and keeps x in the required columns for the ALIAS expansion; ORDER BY x over +-- distinct values gives a total order. The test still fails if a1/a2 are swapped or wrong. +DROP TABLE IF EXISTS t_local_03931; +DROP TABLE IF EXISTS t_dist_03931; + +CREATE TABLE t_local_03931 (x UInt32, a1 UInt32 ALIAS x + 1, a2 UInt32 ALIAS a1 + 1) +ENGINE = MergeTree ORDER BY x; +INSERT INTO t_local_03931 VALUES (10), (20); + +CREATE TABLE t_dist_03931 AS t_local_03931 +ENGINE = Distributed(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), t_local_03931); + +SELECT 'local'; +SELECT x, a1, a2 FROM t_local_03931 GROUP BY x, a1, a2 ORDER BY x; + +SELECT 'pr_ast'; +SELECT x, a1, a2 FROM t_dist_03931 GROUP BY x, a1, a2 ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, + allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', + serialize_query_plan = 0; + +SELECT 'pr_plan'; +SELECT x, a1, a2 FROM t_dist_03931 GROUP BY x, a1, a2 ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, + allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', + serialize_query_plan = 1; + +DROP TABLE t_dist_03931; +DROP TABLE t_local_03931; diff --git a/_local_files_and_notes/dropped_tests/03932_distributed_alias_strict_name.reference b/_local_files_and_notes/dropped_tests/03932_distributed_alias_strict_name.reference new file mode 100644 index 000000000000..cddf594d4e31 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03932_distributed_alias_strict_name.reference @@ -0,0 +1,9 @@ +local +12 11 23 +22 21 43 +dist +12 11 23 +22 21 43 +dist_plan +12 11 23 +22 21 43 diff --git a/_local_files_and_notes/dropped_tests/03932_distributed_alias_strict_name.sql b/_local_files_and_notes/dropped_tests/03932_distributed_alias_strict_name.sql new file mode 100644 index 000000000000..c094d28f01e0 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03932_distributed_alias_strict_name.sql @@ -0,0 +1,27 @@ +-- Plain Distributed (no Hybrid). Reorders alias columns and mixes a computed expression over +-- them. With strict name-based header reconciliation (positional fallback disabled), the result +-- must equal the single-node ('local') result for both AST and serialized-plan transport, and no +-- LOGICAL_ERROR must be raised. +DROP TABLE IF EXISTS t_local_03932; +DROP TABLE IF EXISTS t_dist_03932; + +CREATE TABLE t_local_03932 (x UInt32, a1 UInt32 ALIAS x + 1, a2 UInt32 ALIAS a1 + 1) +ENGINE = MergeTree ORDER BY x; +INSERT INTO t_local_03932 VALUES (10), (20); + +CREATE TABLE t_dist_03932 AS t_local_03932 +ENGINE = Distributed(test_shard_localhost, currentDatabase(), t_local_03932); + +SELECT 'local'; +SELECT a2, a1, a1 + a2 AS s FROM t_local_03932 ORDER BY x; + +SELECT 'dist'; +SELECT a2, a1, a1 + a2 AS s FROM t_dist_03932 ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0; + +SELECT 'dist_plan'; +SELECT a2, a1, a1 + a2 AS s FROM t_dist_03932 ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0, serialize_query_plan = 1; + +DROP TABLE t_dist_03932; +DROP TABLE t_local_03932; diff --git a/_local_files_and_notes/dropped_tests/2026-05-28-bucket-d-redundant-with-pr-94644.md b/_local_files_and_notes/dropped_tests/2026-05-28-bucket-d-redundant-with-pr-94644.md new file mode 100644 index 000000000000..d4315c16c951 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/2026-05-28-bucket-d-redundant-with-pr-94644.md @@ -0,0 +1,67 @@ +# Bucket D — redundant with upstream PR #94644 + +**Context:** During the `__aliasMarker` upstream port (workspace branch +`alias_marker3`), a three-way cross-validation against `upstream/master` and PR +#105690 revealed that 6 of the 17 ported regression tests now pass on pure +master — they no longer reproduce any current regression. + +**Cause:** [PR #94644](https://github.com/ClickHouse/ClickHouse/pull/94644) +"Preserve ALIAS column order for distributed reads" landed on upstream +2026-01-22, after the original `__aliasMarker` work began on the 26.3 +development branch. #94644 fixes the column-order regression at the +`PlannerJoinTree` / `TableExpressionData` insertion-order level — exactly the +shape these 6 tests were designed to catch — and ships its own test +`03726_distributed_alias_column_order.sql` covering it. + +## Shared DDL pattern + +All 6 dropped tests share the same nested-alias DDL: + +```sql +CREATE TABLE t (x UInt32, a1 UInt32 ALIAS x + 1, a2 UInt32 ALIAS a1 + 1) +ENGINE = MergeTree ORDER BY x; +``` + +In this pattern both `a1` and `a2` need `x` as input. After #94644 preserves +insertion order, CSE doesn't reorder these expressions on the remote side, so +the initiator/shard header matches as-expected by the time +`addConvertingActions` runs. The bug we were trying to reproduce no longer +manifests. + +## What's actually load-bearing on `alias_marker3` after the refactor + +The remaining real bugs (not fixed by #94644 and still failing on master) need +different patterns: + +- **Shared sub-expression across siblings:** `flag_zero ALIAS toBool(bitTest(f, 0))`, + `flag_one ALIAS toBool(bitTest(f, 1))` — CSE collapses to a single + `bitTest(f, ...)` output on the remote, returning fewer columns than the + initiator expects (`NUMBER_OF_COLUMNS_DOESNT_MATCH`). Covered by the kept + tests `04279_distributed_alias_planner_column_count` (single-hop and + multi-hop) and `04280_distributed_alias_column_order` (silent column swap + with `ORDER BY ... LIMIT`). +- **Multi-hop `Distributed`-over-`Distributed`, `Merge`-over-`Distributed`, + parallel-replicas follower, and `distributed_product_mode='local'` rewriting + of `GLOBAL IN`:** covered by the other 4 kept tests (`04281`, `04282`, `04283`). + +## Dropped tests — file index in this directory + +All preserved with their original SQL (copied from +`feature/antalya-26.3/alias_marker_fixes` tip), using original 26.3 slot +numbers: + +| File | Mapped to (now-deleted) slot in alias_marker3 | Status note | +|---|---|---| +| `03930_distributed_alias_swap_planner.sql` | was `04282` on alias_marker3 | Doesn't reproduce on master; #94644 already fixes column order. | +| `03844_distributed_nested_alias_marker.sql` | was `04285` | Doesn't reproduce; #94644 handles the chain. | +| `03845_distributed_global_in_join_alias_chain.sql` | was `04286` | Doesn't reproduce; subquery has one column so column-count divergence dodged. | +| `03846_distributed_global_in_alias_marker_collision.sql` | was `04287` | **Interesting variant.** Two source tables with alias `b` (one is `b ALIAS x`, other `b ALIAS y`). The JOIN resolves on the shard side, so the marker collision is absorbed before the initiator sees it. The *real* collision scenario lives at `04283_distributed_alias_global_in_product_mode_local` (uses `distributed_product_mode='local'` which causes the analyzer to bind both `__table*.x` identifiers to the same alias `foo` on the initiator → `MULTIPLE_EXPRESSIONS_FOR_ALIAS`). | +| `03931_parallel_replicas_alias_swap.sql` | was `04293` | **Interesting variant.** Uses the canonical parallel-replicas-determinism workaround: `GROUP BY x, a1, a2 ORDER BY x` to dedupe non-deterministically distributed rows. Pattern worth recording in case future parallel-replicas tests need it. Underlying scenario doesn't reproduce after #94644. | +| `03932_distributed_alias_strict_name.sql` | was `04294` | Doesn't reproduce; #94644's insertion-order fix handles the reorder + computed column case. | + +## How to revisit + +If a future change ever undoes #94644's insertion-order guarantee, these tests +will start reproducing again. Resurrect them by copying back from this +directory into `tests/queries/0_stateless/` with fresh slot numbers via +`./tests/queries/0_stateless/add-test`. diff --git a/_local_files_and_notes/dropped_tests/README.md b/_local_files_and_notes/dropped_tests/README.md new file mode 100644 index 000000000000..e956b2ca1681 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/README.md @@ -0,0 +1,39 @@ +# Dropped tests register + +This directory preserves the rationale and SQL of tests that were dropped from +upstream PRs but might be informative for future contributors. Each entry +documents: + +1. **What was dropped** — file names, commit references. +2. **Why it was dropped** — and why the scenario it claimed to test isn't + load-bearing on the current upstream. +3. **The one interesting non-redundant variant** (if any) — so a future + contributor doesn't reinvent the same shape. +4. **What's actually load-bearing** — the test (if any) that does cover the + subsystem. + +## Convention + +- Dropped SQL files are copied here verbatim (preserving the original 26.3 + slot numbers so they can be located in the source branch). +- A dated rationale Markdown sits next to them with the same prefix. + +## Entries + +### 2026-05-28: bucket D — alias-marker regression tests redundant with PR #94644 + +See [`2026-05-28-bucket-d-redundant-with-pr-94644.md`](2026-05-28-bucket-d-redundant-with-pr-94644.md). + +Six alias-marker regression tests (slots 03844, 03845, 03846, 03930, 03931, +03932 on `feature/antalya-26.3/alias_marker_fixes`; renumbered to 0428x range +on the upstream port branch `alias_marker3` before being dropped) preserved +in this directory along with their `.reference` files. + +### Older entries (pre-2026-05-28) + +Files in this directory that don't have a dated Markdown next to them +(`03924_hybrid_unknown_table_exact_schema`, `03925_distributed_alias_column_swap_without_marker`, +`03926_parallel_replicas_dod_alias_column_swap`, `03927_distributed_alias_marker_explicit_column_swap`) +were dropped during earlier iterations of the same `__aliasMarker` work, +without an accompanying note. Rationale for those lives in commit messages on +the source branch. diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp index 7b1193e92a28..5764702009e5 100644 --- a/src/Analyzer/Utils.cpp +++ b/src/Analyzer/Utils.cpp @@ -49,6 +49,7 @@ #include + #include namespace DB { @@ -979,6 +980,97 @@ void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const Stri function_node.resolveAsAggregateFunction(std::move(aggregate_function)); } +namespace +{ + +/// Finalize __aliasMarker nodes right before distributed SQL boundaries. +/// This pass preserves nested markers and materializes arg2 to String constant +/// only when arg2 is ColumnNode. +class FinalizeAliasMarkersForDistributedSerializationVisitor : public InDepthQueryTreeVisitor +{ +public: + explicit FinalizeAliasMarkersForDistributedSerializationVisitor(ContextPtr context_) + : context(std::move(context_)) + {} + + bool shouldTraverseTopToBottom() const + { + return false; + } + + static bool needChildVisit(const QueryTreeNodePtr & parent, const QueryTreeNodePtr &) + { + /// Do not descend into lambda bodies. A marker inside a lambda (e.g. a user-written + /// `arrayMap(x -> __aliasMarker(x, x), ...)`) is a per-row identity computation, not a + /// distributed-serialization-boundary column; its argument column resolves to the lambda + /// parameter which has no table source to materialize. Visiting it would otherwise hit the + /// "unnamed source" path below and raise a user-triggerable LOGICAL_ERROR (see 03933). + if (parent && parent->getNodeType() == QueryTreeNodeType::LAMBDA) + return false; + + /// Keep traversing marker payload recursively so nested chains are preserved + /// and each marker can materialize its own arg2 when needed. + return true; + } + + void visitImpl(QueryTreeNodePtr & node) + { + auto * function_node = node->as(); + if (!function_node || function_node->getFunctionName() != "__aliasMarker") + return; + + auto & arguments = function_node->getArguments().getNodes(); + if (arguments.size() != 2 || !arguments[0] || !arguments[1]) + return; + + String alias_id; + if (const auto * marker_column_node = arguments[1]->as()) + { + if (const auto & marker_source = marker_column_node->getColumnSourceOrNull(); + marker_source && marker_source->hasAlias()) + { + alias_id = marker_source->getAlias() + "." + marker_column_node->getColumnName(); + } + else + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "__aliasMarker expects the second argument to resolve to a column with a source alias before distributed serialization. " + "Column '{}' has an unnamed or missing source", + marker_column_node->getColumnName()); + } + } + else if (const auto * marker_id_node = arguments[1]->as(); + marker_id_node && isString(marker_id_node->getResultType())) + { + /// Already materialized marker id from a previous hop. Keep as is. + return; + } + + /// arg2 was neither a column with a source alias nor an already-materialized String id + /// (e.g. a user-supplied marker with an arbitrary second argument). Leave it untouched - + /// the function is a pass-through identity, so no materialization is the safe, non-throwing + /// behavior. Our own injected markers always carry an aliased column source, so this path + /// is not reachable for them. + if (alias_id.empty()) + return; + + arguments[1] = std::make_shared(std::move(alias_id), std::make_shared()); + resolveOrdinaryFunctionNodeByName(*function_node, "__aliasMarker", context); + } + +private: + ContextPtr context; +}; + +} + +void finalizeAliasMarkersForDistributedSerialization(QueryTreeNodePtr & node, const ContextPtr & context) +{ + FinalizeAliasMarkersForDistributedSerializationVisitor visitor(context); + visitor.visit(node); +} + std::pair getExpressionSource(const QueryTreeNodePtr & node) { if (const auto * column = node->as()) diff --git a/src/Analyzer/Utils.h b/src/Analyzer/Utils.h index 9a19af2b4e0d..2fd2fe85bbe8 100644 --- a/src/Analyzer/Utils.h +++ b/src/Analyzer/Utils.h @@ -157,6 +157,10 @@ void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const Strin /// Arguments and parameters are taken from the node. void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name); +/// Finalize __aliasMarker nodes before distributed SQL boundaries by materializing +/// marker ids in arg2 from ColumnNode to String ConstantNode when needed. +void finalizeAliasMarkersForDistributedSerialization(QueryTreeNodePtr & node, const ContextPtr & context); + /// Returns single source of expression node. /// First element of pair is source node, can be nullptr if there are no sources or multiple sources. /// Second element of pair is true if there is at most one source, false if there are multiple sources. diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 847ef382dcdf..2f83d2ab4ac2 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -2354,8 +2354,14 @@ Maximum length of step description in EXPLAIN PLAN. )", 0) \ \ DECLARE(Bool, enable_alias_marker, true, R"( -Enable __aliasMarker injection for ALIAS column expressions when using the analyzer. -This stabilizes action node names across planner/analyzer stages without changing query semantics. +Enable __aliasMarker injection for ALIAS column expressions when reading a Distributed table with the analyzer. +The marker preserves the identity of an inlined ALIAS expression across the initiator/shard boundary so columns are +reconciled by name instead of by position. This is a correctness fix: with it disabled, distributed queries over +ALIAS columns (especially distributed-over-distributed) can return swapped columns or fail with a type-mismatch error. + +The marker is sent to shards as the `__aliasMarker` function in the distributed SQL. On a mixed-version cluster whose +shards do not understand `__aliasMarker`, set this setting to `false` on the initiator: that disables marker injection +and falls back to the previous behavior (no negotiation/version handshake is performed). )", 0) \ \ DECLARE(UInt64, preferred_block_size_bytes, 1000000, R"( diff --git a/src/Functions/identity.cpp b/src/Functions/identity.cpp index 05d2ef870601..3e1c2903a3f9 100644 --- a/src/Functions/identity.cpp +++ b/src/Functions/identity.cpp @@ -38,7 +38,7 @@ REGISTER_FUNCTION(AliasMarker) { factory.registerFunction(FunctionDocumentation{ .description = R"( -Internal function that marks ALIAS column expressions for the analyzer. Not intended for direct use. +Internal function. Not for direct use. )", .syntax = {"__aliasMarker(expr, alias_name)"}, .arguments = { diff --git a/src/Functions/identity.h b/src/Functions/identity.h index f74b92280f65..77f90c6b6a31 100644 --- a/src/Functions/identity.h +++ b/src/Functions/identity.h @@ -108,6 +108,53 @@ struct AliasMarkerName static constexpr auto name = "__aliasMarker"; }; +/** + * __aliasMarker is an internal function used to enforce an alias-preserving projection step exactly + * where it appears in distributed SQL transport. + * + * It is injected only when a pushed-down expression must still behave like a real column from the + * initiator's point of view, rather than as an arbitrary expression produced on the initiator. This + * typically happens after expanding an ALIAS column to its underlying expression for distributed SQL + * transport. Conceptually, if the initiator has `SELECT foo AS bar FROM distr` and `foo` is an ALIAS + * column such as `1 + x`, the remote query should look like + * `SELECT __aliasMarker(1 + x, 'table1.foo') AS bar FROM local AS table1`. + * + * The user-facing SQL alias (`bar` in the example above) is separate and must stay untouched. + * __aliasMarker carries only the low-level column identity that says "treat this expression as the + * expanded form of that logical column". Preserving that identity is important because otherwise remote + * headers may diverge from initiator expectations, leading to header mismatch, wrong column association, + * or column-count mismatch. + * + * This must not be confused with normal SQL aliases that appear in the query text: those participate + * in user-visible query semantics and may or may not be materialized depending on the execution stage. + * A normal SQL alias is not enough here because it may interfere with user query logic, clash with + * existing names, and in the mergeable-state path the final projection step that normally assigns + * aliases is intentionally skipped (see the conditional createComputeAliasColumnsStep(...) path in + * PlannerJoinTree::buildQueryPlanForTableExpression()). + * + * This is also why __aliasMarker is not the same as __actionName. For this use case we need the + * wrapper to be consumed into an alias/projection boundary on top of the child expression, so the + * expression keeps behaving like a distinct logical column. __actionName would instead survive as a + * normal function node with a forced result name, which is a different semantic contract and a worse + * fit for distributed alias transport. + * + * The marker also prevents distinct logical columns with the same expression from collapsing into one + * transport column. For example, `SELECT 2 * x AS x, 2 * x AS y` must still travel as two columns; + * otherwise both expressions may collapse to a single `multiply(2, x)` output and break distributed + * header reconciliation. + * + * Lifecycle/invariants: + * 1) Injected around rewritten alias expressions that need stable transport identity. + * 2) Materialized before the query is sent to the shard in serialized form: the marker id is converted + * to a String alias identifier. + * 3) Consumed on the receiver by adding a projection step where it appears, so that identity is enforced + * in actions without changing the user-facing aliasing logic. + * 4) Preserved while forwarding to the next hop. Nested marker chains are allowed and each marker may + * contribute an alias step during actions construction. + * + * This is a temporary bridge while distributed plan transport still relies on SQL text in these paths. + * As query plan serialization fully replaces that boundary, this marker path should become unnecessary. + */ class FunctionAliasMarker : public IFunction { public: @@ -116,7 +163,7 @@ class FunctionAliasMarker : public IFunction String getName() const override { return name; } size_t getNumberOfArguments() const override { return 2; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; } bool isSuitableForConstantFolding() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } @@ -125,9 +172,6 @@ class FunctionAliasMarker : public IFunction if (arguments.size() != 2) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __aliasMarker expects 2 arguments"); - if (!WhichDataType(arguments[1]).isString()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __aliasMarker is internal and should not be used directly"); - return arguments.front(); } diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 7636b41d06a9..55a8a18f57e4 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -93,6 +93,17 @@ String calculateActionNodeNameWithCastIfNeeded(const ConstantNode & constant_nod return buffer.str(); } +String tryExtractAliasMarkerIdFromSecondArgument(const QueryTreeNodePtr & argument) +{ + if (const auto * second_argument_constant = argument->as(); + second_argument_constant && isString(second_argument_constant->getResultType())) + { + return second_argument_constant->getValue().safeGet(); + } + + return {}; +} + class ActionNodeNameHelper { public: @@ -189,14 +200,12 @@ class ActionNodeNameHelper { /// Perform sanity check, because user may call this function with unexpected arguments const auto & function_argument_nodes = function_node.getArguments().getNodes(); - if (function_argument_nodes.size() == 2) - { - if (const auto * second_argument = function_argument_nodes.at(1)->as()) - { - if (isString(second_argument->getResultType())) - result = second_argument->getValue().safeGet(); - } - } + if (function_argument_nodes.size() != 2) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __aliasMarker expects 2 arguments"); + + result = tryExtractAliasMarkerIdFromSecondArgument(function_argument_nodes.at(1)); + if (result.empty()) + result = calculateActionNodeName(function_argument_nodes.at(0)); /// Empty node name is not allowed and leads to logical errors if (result.empty()) @@ -1139,15 +1148,11 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi if (function_arguments.size() != 2) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __aliasMarker expects 2 arguments"); - const auto * alias_id_node = function_arguments.at(1)->as(); - if (!alias_id_node || !isString(alias_id_node->getResultType())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __aliasMarker is internal and should not be used directly"); - - const auto & alias_id = alias_id_node->getValue().safeGet(); + auto [child_name, levels] = visitImpl(function_arguments.at(0)); + auto alias_id = tryExtractAliasMarkerIdFromSecondArgument(function_arguments.at(1)); if (alias_id.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __aliasMarker is internal and should not be used directly"); + alias_id = child_name; - auto [child_name, levels] = visitImpl(function_arguments.at(0)); if (alias_id == child_name) return {child_name, levels}; diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index a602426619b7..53797bb055cd 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -1573,11 +1573,13 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres auto expected_block = *expected_header; materializeBlockInplace(expected_block); - auto rename_actions_dag = ActionsDAG::makeConvertingActions( - query_plan.getCurrentHeader()->getColumnsWithTypeAndName(), - expected_block.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Position, + const auto & source_columns = query_plan.getCurrentHeader()->getColumnsWithTypeAndName(); + const auto & result_columns = expected_block.getColumnsWithTypeAndName(); + auto rename_actions_dag = makeConvertingActionsPreferNameThenPosition( + source_columns, + result_columns, planner_context->getQueryContext(), + "PlannerJoinTree", true /*ignore_constant_values*/, false /*add_cast_columns*/, nullptr /*new_names*/); diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index 5357e3d93dba..36f8cafa312d 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -1,5 +1,7 @@ #include +#include + #include #include #include @@ -51,6 +53,7 @@ #include #include #include +#include namespace DB { @@ -199,62 +202,13 @@ ASTPtr queryNodeToSelectQuery(const QueryTreeNodePtr & query_node, bool set_subq return result_ast; } -namespace -{ -class NormalizeAliasMarkerVisitor : public InDepthQueryTreeVisitor -{ -public: - void visitImpl(QueryTreeNodePtr & node) - { - auto * function_node = node->as(); - if (!function_node || function_node->getFunctionName() != "__aliasMarker") - return; - - auto & arguments = function_node->getArguments().getNodes(); - if (arguments.size() != 2) - return; - - while (true) - { - auto * inner_function = arguments.front()->as(); - if (!inner_function || inner_function->getFunctionName() != "__aliasMarker") - break; - - auto & inner_arguments = inner_function->getArguments().getNodes(); - if (inner_arguments.size() != 2) - break; - - arguments.front() = inner_arguments.front(); - } - } - - bool needChildVisit(QueryTreeNodePtr & parent, QueryTreeNodePtr & child) - { - auto * parent_function = parent->as(); - if (parent_function && parent_function->getFunctionName() == "__aliasMarker") - return false; - - auto child_node_type = child->getNodeType(); - return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION); - } -}; - -void normalizeAliasMarkersInQueryTree(QueryTreeNodePtr & node) -{ - NormalizeAliasMarkerVisitor visitor; - visitor.visit(node); -} -} - ASTPtr queryNodeToDistributedSelectQuery(const QueryTreeNodePtr & query_node) { /// Remove CTEs information from distributed queries. /// Now, if cte_name is set for subquery node, AST -> String serialization will only print cte name. /// But CTE is defined only for top-level query part, so may not be sent. /// Removing cte_name forces subquery to be always printed. - auto query_node_to_convert = query_node->clone(); - normalizeAliasMarkersInQueryTree(query_node_to_convert); - auto ast = queryNodeToSelectQuery(query_node_to_convert, /*set_subquery_cte_name=*/false); + auto ast = queryNodeToSelectQuery(query_node, /*set_subquery_cte_name=*/false); return ast; } @@ -757,4 +711,59 @@ QueryPlanStepPtr projectOnlyUsedColumns( return step; } +static bool canMatchByNameWithoutAmbiguity( + const ColumnsWithTypeAndName & source, + const ColumnsWithTypeAndName & result) +{ + if (source.size() != result.size()) + return false; + + NameSet source_names; + NameSet result_names; + + for (const auto & source_column : source) + if (!source_names.insert(source_column.name).second) + return false; + + for (const auto & result_column : result) + if (!result_names.insert(result_column.name).second) + return false; + + return source_names == result_names; +} + +ActionsDAG makeConvertingActionsPreferNameThenPosition( + const ColumnsWithTypeAndName & source_columns, + const ColumnsWithTypeAndName & result_columns, + const ContextPtr & context, + std::string_view location, + bool ignore_constant_values, + bool add_cast_columns, + NameToNameMap * new_names) +{ + const auto mode = canMatchByNameWithoutAmbiguity(source_columns, result_columns) + ? ActionsDAG::MatchColumnsMode::Name + : ActionsDAG::MatchColumnsMode::Position; + + if (mode == ActionsDAG::MatchColumnsMode::Position) + { + static auto log = getLogger("Planner"); + LOG_TEST( + log, + "Position match at {} (names not matchable as a set): source=[{}] result=[{}]", + location, + Block(source_columns).dumpNames(), + Block(result_columns).dumpNames()); + } + + return ActionsDAG::makeConvertingActions( + source_columns, + result_columns, + mode, + context, + ignore_constant_values, + add_cast_columns, + new_names); +} + } diff --git a/src/Planner/Utils.h b/src/Planner/Utils.h index 97385e9f56fc..289704b0ff8a 100644 --- a/src/Planner/Utils.h +++ b/src/Planner/Utils.h @@ -23,6 +23,8 @@ #include +#include + namespace DB { @@ -124,4 +126,14 @@ QueryPlanStepPtr projectOnlyUsedColumns( const SharedHeader & stream_header, const ColumnIdentifiers & used_column_identifiers); +/// Try Name-based conversion first, fallback to Position with detailed trace report. +ActionsDAG makeConvertingActionsPreferNameThenPosition( + const ColumnsWithTypeAndName & source_columns, + const ColumnsWithTypeAndName & result_columns, + const ContextPtr & context, + std::string_view location, + bool ignore_constant_values, + bool add_cast_columns, + NameToNameMap * new_names = nullptr); + } diff --git a/src/Planner/findParallelReplicasQuery.cpp b/src/Planner/findParallelReplicasQuery.cpp index 93d006da59b5..a116e38909eb 100644 --- a/src/Planner/findParallelReplicasQuery.cpp +++ b/src/Planner/findParallelReplicasQuery.cpp @@ -534,11 +534,13 @@ JoinTreeQueryPlan buildQueryPlanForParallelReplicas( storage_limits, nullptr); - auto converting = ActionsDAG::makeConvertingActions( - header->getColumnsWithTypeAndName(), - initial_header->getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Position, + const auto & source_columns = header->getColumnsWithTypeAndName(); + const auto & result_columns = initial_header->getColumnsWithTypeAndName(); + auto converting = makeConvertingActionsPreferNameThenPosition( + source_columns, + result_columns, context, + "findParallelReplicasQuery", false /*ignore_constant_values*/, false /*add_cast_columns*/, nullptr /*new_names*/); @@ -546,7 +548,8 @@ JoinTreeQueryPlan buildQueryPlanForParallelReplicas( /// initial_header is a header expected by initial query. /// header is a header which is returned by the follower. /// They are different because tables will have different aliases (e.g. _table1 or _table5). - /// Here we just rename columns by position, with the hope the types would match. + /// Reconcile by name first (matching the initiator's column identifiers) and fall back to + /// position only when name matching is not possible. auto step = std::make_unique(query_plan.getCurrentHeader(), std::move(converting)); step->setStepDescription("Convert distributed names"); query_plan.addStep(std::move(step)); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 6566aabdad97..991c2814ab7a 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -55,7 +55,6 @@ #include #include -#include #include #include #include @@ -840,6 +839,8 @@ StorageSnapshotPtr StorageDistributed::getStorageSnapshot(const StorageMetadataP namespace { +/// Rebuild alias ColumnNode references into expression nodes and optionally +/// wrap them with __aliasMarker for distributed SQL transport. class ReplaseAliasColumnsVisitor : public InDepthQueryTreeVisitor { QueryTreeNodePtr getColumnNodeAliasExpression(const QueryTreeNodePtr & node) const @@ -855,40 +856,35 @@ class ReplaseAliasColumnsVisitor : public InDepthQueryTreeVisitorgetExpression(); - const auto & column_name = column_node->getColumnName(); + const String output_alias = column_node->hasAlias() ? column_node->getAlias() : String{}; - if (!context->getSettingsRef()[Setting::enable_alias_marker]) + const auto & settings = context->getSettingsRef(); + const bool use_alias_marker = settings[Setting::enable_alias_marker]; + if (!use_alias_marker) { - column_expression->setAlias(column_name); - return column_expression; - } - - String alias_id; - const auto & source_alias = column_source->getAlias(); - if (!source_alias.empty()) - alias_id = source_alias + "." + column_name; - else - alias_id = column_name; - - if (auto * function_node = column_expression->as(); - function_node && function_node->getFunctionName() == "__aliasMarker") - { - auto & arguments = function_node->getArguments().getNodes(); - if (arguments.size() == 2) - arguments[1] = std::make_shared(alias_id, std::make_shared()); - - column_expression->setAlias(column_name); - return column_expression; + auto column_expression_with_alias = column_expression->clone(); + column_expression_with_alias->removeAlias(); + if (!output_alias.empty()) + column_expression_with_alias->setAlias(output_alias); + return column_expression_with_alias; } QueryTreeNodes arguments; arguments.reserve(2); - arguments.emplace_back(std::move(column_expression)); - arguments.emplace_back(std::make_shared(alias_id, std::make_shared())); + /// Preserve the original column reference in arg2 so normal analyzer passes + /// (alias/source uniquification) can still transform it consistently. + /// Before query is sent to shard this ColumnNode is materialized to String ConstantNode. + /// Clone the expression before mutating its alias below: getExpression() may return a node + /// shared elsewhere in the tree, and removeAlias() would otherwise be a side effect on it. + arguments.emplace_back(column_expression->clone()); + arguments.emplace_back(std::make_shared(column_node->getColumn(), column_source)); auto alias_marker_node = std::make_shared("__aliasMarker"); - alias_marker_node->getArguments().getNodes() = std::move(arguments); - alias_marker_node->setAlias(column_name); + auto & nodes = alias_marker_node->getArguments().getNodes(); + nodes = std::move(arguments); + nodes[0]->removeAlias(); + if (!output_alias.empty()) + alias_marker_node->setAlias(output_alias); resolveOrdinaryFunctionNodeByName(*alias_marker_node, "__aliasMarker", context); return alias_marker_node; @@ -903,6 +899,22 @@ class ReplaseAliasColumnsVisitor : public InDepthQueryTreeVisitoras(); + if (!function_node || function_node->getFunctionName() != "__aliasMarker") + return true; + + const auto & arguments = function_node->getArguments().getNodes(); + if (arguments.size() < 2) + return true; + + /// Do not recurse into __aliasMarker arg2. + /// It is an internal column-reference payload used only for later id materialization, + /// and visiting it here can re-expand aliases or create recursive rewrites. + return child_node.get() != arguments[1].get(); + } + private: ContextPtr context; }; @@ -1215,7 +1227,9 @@ QueryTreeNodePtr buildQueryTreeDistributed(SelectQueryInfo & query_info, rewriteJoinToGlobalJoinIfNeeded(query_node.getJoinTree()); } - return buildQueryTreeForShard(query_info.planner_context, query_tree_to_modify, /*allow_global_join_for_right_table*/ false); + auto shard_query_tree = buildQueryTreeForShard(query_info.planner_context, query_tree_to_modify, /*allow_global_join_for_right_table*/ false); + finalizeAliasMarkersForDistributedSerialization(shard_query_tree, query_context); + return shard_query_tree; } diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 329882a13a1e..be02782ce62c 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -816,7 +817,7 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ /// Source tables could have different but convertible types, like numeric types of different width. /// We must return streams with structure equals to structure of Merge table. - convertAndFilterSourceStream(*common_header, modified_query_info, nested_storage_snapshot, aliases, row_policy_data_opt, context, child, is_smallest_column_requested); + convertAndFilterSourceStream(*common_header, modified_query_info, nested_storage_snapshot, merge_storage_snapshot->metadata->getColumns(), aliases, row_policy_data_opt, context, child, is_smallest_column_requested); for (const auto & filter_info : pushed_down_filters) { @@ -1557,6 +1558,7 @@ void ReadFromMerge::convertAndFilterSourceStream( const Block & header, SelectQueryInfo & modified_query_info, const StorageSnapshotPtr & snapshot, + const ColumnsDescription & merge_columns, const Aliases & aliases, const RowPolicyDataOpt & row_policy_data_opt, ContextPtr local_context, @@ -1567,12 +1569,129 @@ void ReadFromMerge::convertAndFilterSourceStream( auto pipe_columns = before_block_header->getNamesAndTypesList(); + /// TODO(storage-merge-alias): the analyzer branch below is a manual reproduction of what the + /// analyzer's standard column-alias resolution would do if it ran end-to-end on the child + /// plan. It exists because of a two-step design in `getModifiedQueryInfo` + here: + /// + /// Step 1 (in `getModifiedQueryInfo`): rewrite the query going to the child storage. + /// References to ALIAS columns at the Merge level are replaced by their resolved + /// expressions via `replaceColumns(query_tree, column_name_to_node)`. The child storage + /// receives a request for the PHYSICAL columns the expressions need; it does not see the + /// alias names at all. + /// + /// Step 2 (here, `convertAndFilterSourceStream`): re-compute the alias VALUES at the + /// Merge level from those physical columns by building a fresh ActionsDAG, running + /// `QueryAnalysisPass` on each alias expression, and visiting with `PlannerActionsVisitor`. + /// Emit each alias output under the alias's analyzer identifier so the Merge target + /// header (also using analyzer identifiers) can pick it up by name. + /// + /// The structural awkwardness: alias values are computed AFTER the child's ReadFromMergeTree, + /// not before / inside it. This means predicates on ALIAS columns can only use the underlying + /// physical column for index analysis IF Step 1's `replaceColumns` happens to inline the + /// alias expression into the predicate too (which it does today), making KeyCondition see + /// `col*2 > 10` instead of `alias > 10`. Output-side aliases on the other hand are recomputed + /// here from scratch even when the child has already produced the same value (e.g. + /// Distributed children inline-evaluate alias expressions on the shard and return them as + /// expression-named output columns). The recompute is redundant for those cases. + /// + /// A natural unification would be to use `__aliasMarker(expr, identifier)` (the function + /// introduced elsewhere for distributed ALIAS-column header reconciliation) in Step 1: + /// replace each ALIAS reference with `__aliasMarker(, '')` + /// instead of the bare resolved expression. The child planner's `PlannerActionsVisitor` + /// resolves the marker at plan-build time -- the marker function call disappears from the + /// resulting ActionsDAG, leaving a normal action node that computes `` named + /// ``. So predicate / KeyCondition analysis is unaffected (it sees the underlying + /// computation graph, the marker is a planner-time naming device, not a runtime expression). + /// With this unification Step 2 here disappears entirely: pipe_columns would already carry + /// alias values under correct names, and the entire `if (allow_experimental_analyzer) { ... }` + /// block below could be deleted. + /// + /// Left as future work. The current design is correct (Step 1 + Step 2 together produce the + /// right values), just not minimal. if (local_context->getSettingsRef()[Setting::allow_experimental_analyzer]) { + /// The Merge table expects its columns under analyzer identifiers (e.g. `__table1.a`, + /// `__table1.\`n.a\``) while alias expressions and `alias.name` reference plain logical + /// names (e.g. `a`, `n.a`). + /// + /// At this point in the pipeline the planner's TableExpressionData for the Merge node + /// is NOT yet populated (column collection happens later in CollectTableExpressionData), + /// so we cannot look up the mapping via PlannerContext. Instead, build it ourselves by + /// matching each Merge-declared column name against the suffixes of the `header` / + /// `pipe_columns` identifier names: a header column named like `.` or + /// `.\`\`` corresponds to the Merge column `C`. + /// + /// This is robust to dotted column names (Nested, backtick-quoted) because the candidate + /// set is the actual declared Merge schema rather than a regex over the analyzer's + /// naming convention. + auto build_plain_to_identifier = [&](const auto & candidate_names) + { + std::unordered_map plain_to_identifier; + std::unordered_set ambiguous; + for (const auto & column : candidate_names) + { + /// First try exact match (no analyzer prefix at all). + if (merge_columns.has(column.name)) + { + if (!plain_to_identifier.emplace(column.name, column.name).second) + ambiguous.insert(column.name); + continue; + } + + /// Otherwise look for the `.` or `.\`\`` shape where C is + /// a declared Merge column name. Skip any column that doesn't match a known + /// Merge column (e.g. intermediate expression outputs of the child plan). + /// The analyzer's quoting of dotted column names varies between branches: some + /// produce `__tableN.\`n.a\`` (backtick-quoted), others `__tableN.n.a` (raw). + /// Try both. + for (const auto & merge_column : merge_columns.getAll()) + { + bool dotted = merge_column.name.find('.') != String::npos; + String want_raw = "." + merge_column.name; + String want_quoted = dotted ? ("." + backQuote(merge_column.name)) : want_raw; + if (column.name.ends_with(want_quoted) || (dotted && column.name.ends_with(want_raw))) + { + if (!plain_to_identifier.emplace(merge_column.name, column.name).second) + ambiguous.insert(merge_column.name); + break; + } + } + } + for (const auto & a : ambiguous) + plain_to_identifier.erase(a); + return plain_to_identifier; + }; + + const auto header_plain_to_identifier = build_plain_to_identifier(header); + const auto pipe_plain_to_identifier = build_plain_to_identifier(pipe_columns); + for (const auto & alias : aliases) { ActionsDAG actions_dag(pipe_columns); + /// Bridge: alias expressions reference columns by plain logical name, but the child + /// stream's inputs are named with analyzer identifiers. For each Merge column with a + /// known identifier in `pipe_columns`, expose it under its plain name as well so + /// `buildQueryTree(alias.expression)` resolves references like `a` or `n.a` against + /// inputs named `__table1.a` / `__table1.\`n.a\``. Required for alias-of-alias + /// resolution (e.g. `b ALIAS a + 1`, see 04283). + for (const auto & [plain, identifier] : pipe_plain_to_identifier) + { + if (plain == identifier) + continue; + const ActionsDAG::Node * input_node = nullptr; + for (const auto * candidate : actions_dag.getInputs()) + { + if (candidate->result_name == identifier) + { + input_node = candidate; + break; + } + } + if (input_node) + actions_dag.addAlias(*input_node, plain); + } + QueryTreeNodePtr query_tree = buildQueryTree(alias.expression, local_context); query_tree->setAlias(alias.name); @@ -1580,13 +1699,18 @@ void ReadFromMerge::convertAndFilterSourceStream( query_analysis_pass.run(query_tree, local_context); ColumnNodePtrWithHashSet empty_correlated_columns_set; - PlannerActionsVisitor actions_visitor(modified_query_info.planner_context, empty_correlated_columns_set, false /*use_column_identifier_as_action_node_name*/); + PlannerActionsVisitor actions_visitor(modified_query_info.planner_context, empty_correlated_columns_set, true /*use_column_identifier_as_action_node_name*/); const auto & [nodes, _] = actions_visitor.visit(actions_dag, query_tree); if (nodes.size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected to have 1 output but got {}", nodes.size()); - actions_dag.addOrReplaceInOutputs(actions_dag.addAlias(*nodes.front(), alias.name)); + /// Emit the alias output under its analyzer identifier so the downstream + /// `addMissingDefaults` matches it by name (otherwise the expected + /// `__tableN.\`alias.name\`` column would be filled with type defaults). + auto it = header_plain_to_identifier.find(alias.name); + const String & output_name = it != header_plain_to_identifier.end() ? it->second : alias.name; + actions_dag.addOrReplaceInOutputs(actions_dag.addAlias(*nodes.front(), output_name)); auto expression_step = std::make_unique(child.plan.getCurrentHeader(), std::move(actions_dag)); child.plan.addStep(std::move(expression_step)); } @@ -1636,6 +1760,7 @@ void ReadFromMerge::convertAndFilterSourceStream( }; String smallest_column_name = ExpressionActions::getSmallestColumn(snapshot->metadata->getColumns().getAllPhysical()).name; + for (size_t i = 0; i < size; ++i) { const auto & source_elem = current_step_columns[i]; @@ -1660,11 +1785,14 @@ void ReadFromMerge::convertAndFilterSourceStream( } } - auto convert_actions_dag = ActionsDAG::makeConvertingActions( + auto convert_actions_dag = makeConvertingActionsPreferNameThenPosition( current_step_columns, converted_columns, - ActionsDAG::MatchColumnsMode::Position, - local_context); + local_context, + "StorageMerge", + false /*ignore_constant_values*/, + false /*add_cast_columns*/, + nullptr /*new_names*/); auto expression_step = std::make_unique(child.plan.getCurrentHeader(), std::move(convert_actions_dag)); child.plan.addStep(std::move(expression_step)); diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index f23c7a750a78..5e57960d4862 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -298,6 +298,7 @@ class ReadFromMerge final : public SourceStepWithFilter const Block & header, SelectQueryInfo & modified_query_info, const StorageSnapshotPtr & snapshot, + const ColumnsDescription & merge_columns, const Aliases & aliases, const RowPolicyDataOpt & row_policy_data_opt, ContextPtr context, diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index efb7d426b4fe..dc27f983d43c 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -404,7 +404,10 @@ TableNodePtr executeSubqueryNode(const QueryTreeNodePtr & subquery_node, ContextMutablePtr & mutable_context, size_t subquery_depth) { - const auto subquery_hash = subquery_node->getTreeHash(); + auto subquery_node_to_execute = subquery_node->clone(); + finalizeAliasMarkersForDistributedSerialization(subquery_node_to_execute, mutable_context); + + const auto subquery_hash = subquery_node_to_execute->getTreeHash(); const auto temporary_table_name = fmt::format("_data_{}", toString(subquery_hash)); const auto & external_tables = mutable_context->getExternalTables(); @@ -422,7 +425,7 @@ TableNodePtr executeSubqueryNode(const QueryTreeNodePtr & subquery_node, auto context_copy = Context::createCopy(mutable_context); updateContextForSubqueryExecution(context_copy); - InterpreterSelectQueryAnalyzer interpreter(subquery_node, context_copy, subquery_options); + InterpreterSelectQueryAnalyzer interpreter(subquery_node_to_execute, context_copy, subquery_options); auto & query_plan = interpreter.getQueryPlan(); auto sample_block_with_unique_names = *query_plan.getCurrentHeader(); diff --git a/tests/queries/0_stateless/03648_alias_marker_with_mergeable_state.reference b/tests/queries/0_stateless/03648_alias_marker_with_mergeable_state.reference index 58bf6a7ec74b..5f061a829b23 100644 --- a/tests/queries/0_stateless/03648_alias_marker_with_mergeable_state.reference +++ b/tests/queries/0_stateless/03648_alias_marker_with_mergeable_state.reference @@ -2,6 +2,8 @@ Header: sum(foo) AggregateFunction(sum, Int64) ---- stage: with_mergeable_state (analyzer=0) ---- Expected error: Function __aliasMarker is internal and supported only with the analyzer +---- explicit __aliasMarker in user query (analyzer=1) ---- +Explicit __aliasMarker call is allowed ---- stage: complete (analyzer=1) ---- Header: x Int64 ---- stage: fetch_columns (analyzer=1) ---- diff --git a/tests/queries/0_stateless/03648_alias_marker_with_mergeable_state.sh b/tests/queries/0_stateless/03648_alias_marker_with_mergeable_state.sh index 66974be38517..fb0580e796f0 100755 --- a/tests/queries/0_stateless/03648_alias_marker_with_mergeable_state.sh +++ b/tests/queries/0_stateless/03648_alias_marker_with_mergeable_state.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh echo "---- stage: with_mergeable_state (analyzer=1, setting=enable_alias_marker=1) ----" -$CLICKHOUSE_CLIENT --enable_analyzer=1 --stage with_mergeable_state --multiquery 2>&1 <<'EOF' | sed -n '/^Header:/,/^ [^ ]/p' | sed '$d' +$CLICKHOUSE_CLIENT --enable_analyzer=1 --query_kind secondary_query --stage with_mergeable_state --multiquery 2>&1 <<'EOF' | sed -n '/^Header:/,/^ [^ ]/p' | sed '$d' SET enable_alias_marker=1; EXPLAIN header=1 SELECT sum(__aliasMarker(number*2-3,'foo')) AS x @@ -22,27 +22,36 @@ else echo "${alias_marker_error_output}" fi +echo "---- explicit __aliasMarker in user query (analyzer=1) ----" +if $CLICKHOUSE_CLIENT --enable_analyzer=1 --query \ + "SELECT __aliasMarker(number*2-3,'foo') FROM numbers(1)" >/dev/null 2>&1 +then + echo "Explicit __aliasMarker call is allowed" +else + echo "Unexpected error for explicit __aliasMarker call" +fi + echo "---- stage: complete (analyzer=1) ----" -$CLICKHOUSE_CLIENT --enable_analyzer=1 --stage complete --query \ +$CLICKHOUSE_CLIENT --enable_analyzer=1 --query_kind secondary_query --stage complete --query \ "EXPLAIN header=1 SELECT sum(__aliasMarker(number*2-3,'foo')) AS x FROM numbers(10)" \ 2>&1 | sed -n '/^Header:/,/^ [^ ]/p' | sed '$d' echo "---- stage: fetch_columns (analyzer=1) ----" -$CLICKHOUSE_CLIENT --enable_analyzer=1 --stage fetch_columns --query \ +$CLICKHOUSE_CLIENT --enable_analyzer=1 --query_kind secondary_query --stage fetch_columns --query \ "EXPLAIN header=1 SELECT sum(__aliasMarker(number*2-3,'foo')) AS x FROM numbers(10)" \ 2>&1 | sed -n '/^Header:/,/^ [^ ]/p' | sed '$d' echo "---- stage: with_mergeable_state (analyzer=1) ----" -$CLICKHOUSE_CLIENT --enable_analyzer=1 --stage with_mergeable_state --query \ +$CLICKHOUSE_CLIENT --enable_analyzer=1 --query_kind secondary_query --stage with_mergeable_state --query \ "EXPLAIN header=1 SELECT sum(__aliasMarker(number*2-3,'foo')) AS x FROM numbers(10)" \ 2>&1 | sed -n '/^Header:/,/^ [^ ]/p' | sed '$d' echo "---- stage: with_mergeable_state_after_aggregation (analyzer=1) ----" -$CLICKHOUSE_CLIENT --enable_analyzer=1 --stage with_mergeable_state_after_aggregation --query \ +$CLICKHOUSE_CLIENT --enable_analyzer=1 --query_kind secondary_query --stage with_mergeable_state_after_aggregation --query \ "EXPLAIN header=1 SELECT sum(__aliasMarker(number*2-3,'foo')) AS x FROM numbers(10)" \ 2>&1 | sed -n '/^Header:/,/^ [^ ]/p' | sed '$d' echo "---- stage: with_mergeable_state_after_aggregation_and_limit (analyzer=1) ----" -$CLICKHOUSE_CLIENT --enable_analyzer=1 --stage with_mergeable_state_after_aggregation_and_limit --query \ +$CLICKHOUSE_CLIENT --enable_analyzer=1 --query_kind secondary_query --stage with_mergeable_state_after_aggregation_and_limit --query \ "EXPLAIN header=1 SELECT sum(__aliasMarker(number*2-3,'foo')) AS x FROM numbers(10) GROUP BY intDiv(number,10) AS y ORDER BY y LIMIT 10" \ 2>&1 | sed -n '/^Header:/,/^ [^ ]/p' | sed '$d' diff --git a/tests/queries/0_stateless/03842_hybrid_alias_issue_1424.reference b/tests/queries/0_stateless/03842_hybrid_alias_issue_1424.reference new file mode 100644 index 000000000000..6f78da4c4f59 --- /dev/null +++ b/tests/queries/0_stateless/03842_hybrid_alias_issue_1424.reference @@ -0,0 +1,42 @@ +max in subquery +4294967294 +sum in subquery +-4921211434 +cte min with predicate +679772422 +cte with limit +-2147483648 -4294967296 +-1762862292 -574613778 +-1329695183 -1573638336 +-221724287 679772422 +0 0 +550067609 -3048000734 +1084637461 3417479706 +1169291374 -3082049462 +1899628504 -740161250 +2147483647 4294967294 +cte without limit +-2147483648 -4294967296 +-1762862292 -574613778 +-1329695183 -1573638336 +-221724287 679772422 +0 0 +550067609 -3048000734 +1084637461 3417479706 +1169291374 -3082049462 +1899628504 -740161250 +2147483647 4294967294 +group by in subquery +10 10 +intersect with order by +-221724287 679772422 +1084637461 3417479706 +2147483647 4294967294 +intersect without order by +-221724287 679772422 +1084637461 3417479706 +2147483647 4294967294 +constant alias in subquery +9 7 32 +constant alias predicate +2 diff --git a/tests/queries/0_stateless/03842_hybrid_alias_issue_1424.sql b/tests/queries/0_stateless/03842_hybrid_alias_issue_1424.sql new file mode 100644 index 000000000000..8b9cf9182896 --- /dev/null +++ b/tests/queries/0_stateless/03842_hybrid_alias_issue_1424.sql @@ -0,0 +1,202 @@ +SET allow_experimental_hybrid_table = 1, enable_analyzer = 1, enable_alias_marker = 1; + +DROP TABLE IF EXISTS test_hybrid_issue_1424; +DROP TABLE IF EXISTS test_hybrid_issue_1424_left; +DROP TABLE IF EXISTS test_hybrid_issue_1424_right; +DROP TABLE IF EXISTS test_hybrid_issue_1424_const; +DROP TABLE IF EXISTS test_hybrid_issue_1424_const_left; +DROP TABLE IF EXISTS test_hybrid_issue_1424_const_right; + +CREATE TABLE test_hybrid_issue_1424_left +( + id Int32, + value Int32, + date_col Date, + computed ALIAS value * 2 +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(date_col) +ORDER BY (date_col, id); + +INSERT INTO test_hybrid_issue_1424_left VALUES + (toInt32(2147483647), toInt32(2147483647), toDate('2149-06-06')), + (toInt32(-2147483648), toInt32(-2147483648), toDate('1970-01-01')), + (toInt32(0), toInt32(0), '1970-01-01'), + (toInt32(1084637461), toInt32(1708739853), toDate(1335613783)), + (toInt32(-221724287), toInt32(339886211), toDate(1294089763)), + (toInt32(-1762862292), toInt32(-287306889), toDate(1375707465)), + (toInt32(1169291374), toInt32(-1541024731), toDate(1082126480)), + (toInt32(-1329695183), toInt32(-786819168), toDate(1226000164)), + (toInt32(1899628504), toInt32(-370080625), toDate(1179050966)), + (toInt32(550067609), toInt32(-1524000367), toDate(1410654931)); + +CREATE TABLE test_hybrid_issue_1424_right +( + id Int32, + value Int32, + date_col Date, + computed ALIAS value * 2 +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(date_col) +ORDER BY (date_col, id); + +INSERT INTO test_hybrid_issue_1424_right VALUES + (toInt32(2147483647), toInt32(2147483647), toDate('2149-06-06')), + (toInt32(-2147483648), toInt32(-2147483648), toDate('1970-01-01')), + (toInt32(0), toInt32(0), '1970-01-01'), + (toInt32(1084637461), toInt32(1708739853), toDate(1335613783)), + (toInt32(-221724287), toInt32(339886211), toDate(1294089763)), + (toInt32(-1762862292), toInt32(-287306889), toDate(1375707465)), + (toInt32(1169291374), toInt32(-1541024731), toDate(1082126480)), + (toInt32(-1329695183), toInt32(-786819168), toDate(1226000164)), + (toInt32(1899628504), toInt32(-370080625), toDate(1179050966)), + (toInt32(550067609), toInt32(-1524000367), toDate(1410654931)); + +CREATE TABLE test_hybrid_issue_1424 +( + id Int32, + value Int32, + date_col Date, + computed Int64 +) +ENGINE = Hybrid( + remote('127.0.0.1:9000', currentDatabase(), 'test_hybrid_issue_1424_left'), date_col >= '2025-01-15', + remote('127.0.0.1:9000', currentDatabase(), 'test_hybrid_issue_1424_right'), date_col < '2025-01-15' +); + +SELECT 'max in subquery'; +SELECT max_computed FROM (SELECT max(computed) AS max_computed FROM test_hybrid_issue_1424); + +SELECT 'sum in subquery'; +SELECT sum_computed FROM (SELECT sum(computed) AS sum_computed FROM test_hybrid_issue_1424); + +SELECT 'cte min with predicate'; +WITH cte AS +( + SELECT min(computed) AS min_computed + FROM test_hybrid_issue_1424 + WHERE computed > 50 +) +SELECT * FROM cte; + +SELECT 'cte with limit'; +WITH ranked AS +( + SELECT id, computed + FROM test_hybrid_issue_1424 + LIMIT 10 +) +SELECT * +FROM ranked +ORDER BY id ASC; + +SELECT 'cte without limit'; +WITH ranked AS +( + SELECT id, computed + FROM test_hybrid_issue_1424 +) +SELECT * +FROM ranked +ORDER BY id ASC; + +SELECT 'group by in subquery'; +WITH monthly AS +( + SELECT count() AS cnt + FROM test_hybrid_issue_1424 + GROUP BY computed +) +SELECT sum(cnt), count() FROM monthly; + +SELECT 'intersect with order by'; +SELECT * +FROM +( + SELECT id, computed + FROM test_hybrid_issue_1424 + WHERE computed > 100 + INTERSECT + SELECT id, computed + FROM test_hybrid_issue_1424 + WHERE value > 50 +) +ORDER BY id; + +SELECT 'intersect without order by'; +SELECT * +FROM +( + SELECT id, computed + FROM test_hybrid_issue_1424 + WHERE computed > 100 + INTERSECT + SELECT id, computed + FROM test_hybrid_issue_1424 + WHERE value > 50 +) +ORDER BY id; + +CREATE TABLE test_hybrid_issue_1424_const_left +( + id Int32, + value Int32, + date_col Date, + computed ALIAS toInt64(7) +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(date_col) +ORDER BY (date_col, id); + +INSERT INTO test_hybrid_issue_1424_const_left VALUES + (1, 1, toDate('2025-01-15')), + (2, 2, toDate('2025-02-01')); + +CREATE TABLE test_hybrid_issue_1424_const_right +( + id Int32, + value Int32, + date_col Date, + computed ALIAS toInt64(9) +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(date_col) +ORDER BY (date_col, id); + +INSERT INTO test_hybrid_issue_1424_const_right VALUES + (3, 3, toDate('2024-12-31')), + (4, 4, toDate('2020-01-01')); + +CREATE TABLE test_hybrid_issue_1424_const +( + id Int32, + value Int32, + date_col Date, + computed Int64 +) +ENGINE = Hybrid( + remote('127.0.0.1:9000', currentDatabase(), 'test_hybrid_issue_1424_const_left'), date_col >= '2025-01-15', + remote('127.0.0.1:9000', currentDatabase(), 'test_hybrid_issue_1424_const_right'), date_col < '2025-01-15' +); + +SELECT 'constant alias in subquery'; +SELECT max_computed, min_computed, sum_computed +FROM +( + SELECT + max(computed) AS max_computed, + min(computed) AS min_computed, + sum(computed) AS sum_computed + FROM test_hybrid_issue_1424_const +); + +SELECT 'constant alias predicate'; +SELECT count() FROM test_hybrid_issue_1424_const WHERE computed = 9; + +DROP TABLE test_hybrid_issue_1424; +DROP TABLE test_hybrid_issue_1424_left; +DROP TABLE test_hybrid_issue_1424_right; +DROP TABLE test_hybrid_issue_1424_const; +DROP TABLE test_hybrid_issue_1424_const_left; +DROP TABLE test_hybrid_issue_1424_const_right; diff --git a/tests/queries/0_stateless/03843_distributed_alias_same_expression.reference b/tests/queries/0_stateless/03843_distributed_alias_same_expression.reference new file mode 100644 index 000000000000..279f11b9bf19 --- /dev/null +++ b/tests/queries/0_stateless/03843_distributed_alias_same_expression.reference @@ -0,0 +1,15 @@ +first +1999-03-29 01:15:33.000 +second +1999-03-29 01:15:33.000 +third +1999-03-29 01:15:33.000 +fourth +fifth +1999-03-29 01:15:33.000 +sixth +query_alias_0 query_alias_1 + +seventh +alias_String_7_0 alias_String_7_1 + diff --git a/tests/queries/0_stateless/03843_distributed_alias_same_expression.sql b/tests/queries/0_stateless/03843_distributed_alias_same_expression.sql new file mode 100644 index 000000000000..8aa596998571 --- /dev/null +++ b/tests/queries/0_stateless/03843_distributed_alias_same_expression.sql @@ -0,0 +1,67 @@ +-- Regression coverage for distributed ORDER BY + ALIAS columns with identical expressions. +-- Related issue: https://github.com/ClickHouse/ClickHouse/issues/79916 + +DROP TABLE IF EXISTS test_alias_same_expr_remote; + +CREATE TABLE test_alias_same_expr_remote +( + dt DateTime64(3), + String_7 String, + alias_String_7_0 String ALIAS String_7, + alias_String_7_1 String ALIAS String_7 +) +ENGINE = MergeTree() +ORDER BY dt; + +INSERT INTO test_alias_same_expr_remote VALUES ('1999-03-29T01:15:33', ''); + +SELECT 'first'; +SELECT dt, alias_String_7_0, alias_String_7_1 +FROM remote('127.0.0.{1,2}', currentDatabase(), test_alias_same_expr_remote) +LIMIT 1; + +SELECT 'second'; +SELECT dt, alias_String_7_0, alias_String_7_1 +FROM remote('127.0.0.{1,2}', currentDatabase(), test_alias_same_expr_remote) +ORDER BY dt +LIMIT 1 +SETTINGS enable_analyzer = 0; + +SELECT 'third'; +SELECT dt, alias_String_7_0, alias_String_7_1 +FROM remote('127.0.0.{1,2}', currentDatabase(), test_alias_same_expr_remote) +ORDER BY dt +LIMIT 1 +SETTINGS enable_analyzer = 1; + +SELECT 'fourth'; +SELECT dt, alias_String_7_0, alias_String_7_1 +FROM remote('127.0.0.{1,2}', currentDatabase(), test_alias_same_expr_remote) +ORDER BY dt +LIMIT 1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 0; -- { serverError NUMBER_OF_COLUMNS_DOESNT_MATCH } + +SELECT 'fifth'; +SELECT dt, alias_String_7_0, alias_String_7_1 +FROM remote('127.0.0.{1,2}', currentDatabase(), test_alias_same_expr_remote) +ORDER BY dt +LIMIT 1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, serialize_query_plan = 1; + +SELECT 'sixth'; +SELECT alias_String_7_0 AS query_alias_0, alias_String_7_1 AS query_alias_1 +FROM remote('127.0.0.{1,2}', currentDatabase(), test_alias_same_expr_remote) +ORDER BY dt +LIMIT 1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 1 +FORMAT TSVWithNames; + +SELECT 'seventh'; +SELECT alias_String_7_0, alias_String_7_1 +FROM remote('127.0.0.{1,2}', currentDatabase(), test_alias_same_expr_remote) +ORDER BY dt +LIMIT 1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 1 +FORMAT TSVWithNames; + +DROP TABLE test_alias_same_expr_remote; diff --git a/tests/queries/0_stateless/03844_distributed_nested_alias_marker.reference b/tests/queries/0_stateless/03844_distributed_nested_alias_marker.reference new file mode 100644 index 000000000000..7b05cb1e81a0 --- /dev/null +++ b/tests/queries/0_stateless/03844_distributed_nested_alias_marker.reference @@ -0,0 +1,4 @@ +analyzer +x x +legacy +x x diff --git a/tests/queries/0_stateless/03844_distributed_nested_alias_marker.sql b/tests/queries/0_stateless/03844_distributed_nested_alias_marker.sql new file mode 100644 index 000000000000..b725acf38949 --- /dev/null +++ b/tests/queries/0_stateless/03844_distributed_nested_alias_marker.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS test_nested_alias_dist; +DROP TABLE IF EXISTS test_nested_alias_local; + +CREATE TABLE test_nested_alias_local +( + dt DateTime64(3), + base String, + a String ALIAS base, + b String ALIAS a +) +ENGINE = MergeTree() +ORDER BY dt; + +INSERT INTO test_nested_alias_local VALUES ('1999-03-29T01:15:33', 'x'); + +CREATE TABLE test_nested_alias_dist AS test_nested_alias_local +ENGINE = Distributed('test_shard_localhost', currentDatabase(), test_nested_alias_local, rand()); + +SELECT 'analyzer'; +SELECT a, b +FROM test_nested_alias_dist +ORDER BY dt +LIMIT 1 +SETTINGS enable_analyzer = 1; + +SELECT 'legacy'; +SELECT a, b +FROM test_nested_alias_dist +ORDER BY dt +LIMIT 1 +SETTINGS enable_analyzer = 0; + +DROP TABLE test_nested_alias_dist; +DROP TABLE test_nested_alias_local; diff --git a/tests/queries/0_stateless/03845_distributed_global_in_join_alias_chain.reference b/tests/queries/0_stateless/03845_distributed_global_in_join_alias_chain.reference new file mode 100644 index 000000000000..325078d71cc1 --- /dev/null +++ b/tests/queries/0_stateless/03845_distributed_global_in_join_alias_chain.reference @@ -0,0 +1,8 @@ +rewrite_in +1 +1 +rewrite_join +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/03845_distributed_global_in_join_alias_chain.sql b/tests/queries/0_stateless/03845_distributed_global_in_join_alias_chain.sql new file mode 100644 index 000000000000..9bd95d72fd20 --- /dev/null +++ b/tests/queries/0_stateless/03845_distributed_global_in_join_alias_chain.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS test_global_alias_chain_dist; +DROP TABLE IF EXISTS test_global_alias_chain_local; + +CREATE TABLE test_global_alias_chain_local +( + id UInt64, + base UInt64, + a UInt64 ALIAS base, + b UInt64 ALIAS a +) +ENGINE = MergeTree() +ORDER BY id; + +INSERT INTO test_global_alias_chain_local VALUES (1, 1); + +CREATE TABLE test_global_alias_chain_dist AS test_global_alias_chain_local +ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), test_global_alias_chain_local, rand()); + +SELECT 'rewrite_in'; +SELECT id +FROM test_global_alias_chain_dist +WHERE id IN (SELECT b FROM test_global_alias_chain_dist) +ORDER BY id +SETTINGS enable_analyzer = 1, distributed_product_mode = 'global'; + +SELECT 'rewrite_join'; +SELECT l.id +FROM test_global_alias_chain_dist AS l +INNER JOIN (SELECT b FROM test_global_alias_chain_dist) AS r ON l.id = r.b +ORDER BY l.id +SETTINGS enable_analyzer = 1, distributed_product_mode = 'global'; + +DROP TABLE test_global_alias_chain_dist; +DROP TABLE test_global_alias_chain_local; diff --git a/tests/queries/0_stateless/03846_distributed_global_in_alias_marker_collision.reference b/tests/queries/0_stateless/03846_distributed_global_in_alias_marker_collision.reference new file mode 100644 index 000000000000..9a3a29a69ce8 --- /dev/null +++ b/tests/queries/0_stateless/03846_distributed_global_in_alias_marker_collision.reference @@ -0,0 +1,2 @@ +global_in_collision_check +1 diff --git a/tests/queries/0_stateless/03846_distributed_global_in_alias_marker_collision.sql b/tests/queries/0_stateless/03846_distributed_global_in_alias_marker_collision.sql new file mode 100644 index 000000000000..d47e6a304ba1 --- /dev/null +++ b/tests/queries/0_stateless/03846_distributed_global_in_alias_marker_collision.sql @@ -0,0 +1,56 @@ +DROP TABLE IF EXISTS test_marker_collision_dist; +DROP TABLE IF EXISTS test_marker_collision_main; +DROP TABLE IF EXISTS test_marker_collision_left; +DROP TABLE IF EXISTS test_marker_collision_right; + +CREATE TABLE test_marker_collision_main +( + id UInt64 +) +ENGINE = MergeTree() +ORDER BY id; + +INSERT INTO test_marker_collision_main VALUES (1); + +CREATE TABLE test_marker_collision_left +( + id UInt64, + x UInt64, + b UInt64 ALIAS x +) +ENGINE = MergeTree() +ORDER BY id; + +CREATE TABLE test_marker_collision_right +( + id UInt64, + y UInt64, + b UInt64 ALIAS y +) +ENGINE = MergeTree() +ORDER BY id; + +INSERT INTO test_marker_collision_left VALUES (1, 1); +INSERT INTO test_marker_collision_right VALUES (1, 20); + +CREATE TABLE test_marker_collision_dist AS test_marker_collision_main +ENGINE = Distributed('test_shard_localhost', currentDatabase(), test_marker_collision_main, rand()); + +SELECT 'global_in_collision_check'; +SELECT id +FROM test_marker_collision_dist +WHERE id GLOBAL IN +( + SELECT test_marker_collision_left.id + FROM test_marker_collision_left + INNER JOIN test_marker_collision_right + ON test_marker_collision_left.id = test_marker_collision_right.id + WHERE test_marker_collision_left.b + test_marker_collision_right.b = 21 +) +ORDER BY id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1; + +DROP TABLE test_marker_collision_dist; +DROP TABLE test_marker_collision_main; +DROP TABLE test_marker_collision_left; +DROP TABLE test_marker_collision_right; diff --git a/tests/queries/0_stateless/03847_parallel_replicas_second_hop_alias_marker.reference b/tests/queries/0_stateless/03847_parallel_replicas_second_hop_alias_marker.reference new file mode 100644 index 000000000000..fbdae0d35623 --- /dev/null +++ b/tests/queries/0_stateless/03847_parallel_replicas_second_hop_alias_marker.reference @@ -0,0 +1,4 @@ +single_replica_second_hop +1999-03-29 01:15:33.000 x x +parallel_replicas_second_hop +1999-03-29 01:15:33.000 x x diff --git a/tests/queries/0_stateless/03847_parallel_replicas_second_hop_alias_marker.sql b/tests/queries/0_stateless/03847_parallel_replicas_second_hop_alias_marker.sql new file mode 100644 index 000000000000..5500c4904f82 --- /dev/null +++ b/tests/queries/0_stateless/03847_parallel_replicas_second_hop_alias_marker.sql @@ -0,0 +1,51 @@ +-- Regression coverage for materialized __aliasMarker metadata across +-- remote -> Distributed -> parallel replicas fanout. + +DROP TABLE IF EXISTS test_alias_pr_second_hop_dist; +DROP TABLE IF EXISTS test_alias_pr_second_hop_local; + +CREATE TABLE test_alias_pr_second_hop_local +( + dt DateTime64(3), + base String, + alias_base_0 String ALIAS base, + alias_base_1 String ALIAS base +) +ENGINE = MergeTree() +ORDER BY dt; + +INSERT INTO test_alias_pr_second_hop_local VALUES + ('1999-03-29T01:15:33', 'x'), + ('1999-03-29T01:15:34', 'y'); + +CREATE TABLE test_alias_pr_second_hop_dist AS test_alias_pr_second_hop_local +ENGINE = Distributed(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), test_alias_pr_second_hop_local); + +SELECT 'single_replica_second_hop'; +SELECT dt, alias_base_0, alias_base_1 +FROM remote('127.0.0.2', currentDatabase(), test_alias_pr_second_hop_dist) +ORDER BY dt +LIMIT 1 +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 1, + enable_parallel_replicas = 1, + max_parallel_replicas = 1, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', + parallel_replicas_for_non_replicated_merge_tree = 1; + +SELECT 'parallel_replicas_second_hop'; +SELECT dt, alias_base_0, alias_base_1 +FROM remote('127.0.0.2', currentDatabase(), test_alias_pr_second_hop_dist) +ORDER BY dt +LIMIT 1 +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 1, + enable_parallel_replicas = 1, + max_parallel_replicas = 3, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', + parallel_replicas_for_non_replicated_merge_tree = 1; + +DROP TABLE test_alias_pr_second_hop_dist; +DROP TABLE test_alias_pr_second_hop_local; diff --git a/tests/queries/0_stateless/03920_distributed_global_alias_marker_matrix.reference b/tests/queries/0_stateless/03920_distributed_global_alias_marker_matrix.reference new file mode 100644 index 000000000000..8c2ebec53ff8 --- /dev/null +++ b/tests/queries/0_stateless/03920_distributed_global_alias_marker_matrix.reference @@ -0,0 +1,35 @@ +case1_global_in_unnamed_identical_derived_subqueries +1 +case2_global_join_unnamed_identical_derived_subqueries +id left_b0 right_b0 +1 10 20 +case3_global_join_unnamed_identical_derived_subqueries_serialize_query_plan +id left_b0 right_b0 +1 10 20 +case4_global_join_unnamed_remote_over_distributed_subqueries +id left_b0 right_b0 +1 10 20 +case5_global_join_unnamed_identical_dual_alias_columns +id left_b0 right_b1 +1 10 20 +case6_local_join_unnamed_identical_derived_subqueries +id left_b0 right_b0 +1 10 20 +case7_local_join_unnamed_identical_derived_subqueries_serialize_query_plan +id left_b0 right_b0 +1 10 20 +case8_global_join_direct_distributed_serialize_query_plan +id b0 b1 +1 10 10 +2 20 20 +case9_global_join_direct_remote_over_distributed_serialize_query_plan +id b0 b1 +1 10 10 +2 20 20 +case10_wrapper_alias_subquery_serialize_query_plan +id left_foo right_foo +1 1 20 +case11_wrapper_constant_alias_subquery_serialize_query_plan +id left_foo right_foo +1 foo foo +2 foo foo diff --git a/tests/queries/0_stateless/03920_distributed_global_alias_marker_matrix.sql b/tests/queries/0_stateless/03920_distributed_global_alias_marker_matrix.sql new file mode 100644 index 000000000000..2ac9a8a65fe3 --- /dev/null +++ b/tests/queries/0_stateless/03920_distributed_global_alias_marker_matrix.sql @@ -0,0 +1,297 @@ +DROP TABLE IF EXISTS test_marker_suite_main_dist; +DROP TABLE IF EXISTS test_marker_suite_side_dist; +DROP TABLE IF EXISTS test_marker_suite_main; +DROP TABLE IF EXISTS test_marker_suite_side; +DROP TABLE IF EXISTS test_wrapper_alias_a_dist; +DROP TABLE IF EXISTS test_wrapper_alias_b_dist; +DROP TABLE IF EXISTS test_wrapper_alias_a_local; +DROP TABLE IF EXISTS test_wrapper_alias_b_local; +DROP TABLE IF EXISTS test_wrapper_const_alias_a_dist; +DROP TABLE IF EXISTS test_wrapper_const_alias_b_dist; +DROP TABLE IF EXISTS test_wrapper_const_alias_a_local; +DROP TABLE IF EXISTS test_wrapper_const_alias_b_local; + +CREATE TABLE test_marker_suite_main +( + id UInt64 +) +ENGINE = MergeTree() +ORDER BY id; + +INSERT INTO test_marker_suite_main VALUES (1), (2); + +CREATE TABLE test_marker_suite_side +( + id UInt64, + x UInt64, + b0 UInt64 ALIAS x, + b1 UInt64 ALIAS x +) +ENGINE = MergeTree() +ORDER BY id; + +INSERT INTO test_marker_suite_side VALUES (1, 10), (2, 20); + +CREATE TABLE test_marker_suite_main_dist AS test_marker_suite_main +ENGINE = Distributed('test_shard_localhost', currentDatabase(), test_marker_suite_main, rand()); + +CREATE TABLE test_marker_suite_side_dist AS test_marker_suite_side +ENGINE = Distributed('test_shard_localhost', currentDatabase(), test_marker_suite_side, rand()); + +SELECT 'case1_global_in_unnamed_identical_derived_subqueries'; +SELECT id +FROM test_marker_suite_main_dist +WHERE id GLOBAL IN +( + SELECT left_id + FROM + (SELECT id AS left_id, b0 AS left_b0 FROM test_marker_suite_side_dist) + INNER JOIN + (SELECT id AS right_id, b0 AS right_b0 FROM test_marker_suite_side_dist) + ON left_id < right_id + WHERE left_b0 + right_b0 = 30 + SETTINGS joined_subquery_requires_alias = 0 +) +ORDER BY id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1; + +SELECT 'case2_global_join_unnamed_identical_derived_subqueries'; +SELECT m.id, j.left_b0, j.right_b0 +FROM test_marker_suite_main_dist AS m +GLOBAL INNER JOIN +( + SELECT left_id AS id, left_b0, right_b0 + FROM + (SELECT id AS left_id, b0 AS left_b0 FROM test_marker_suite_side_dist) + INNER JOIN + (SELECT id AS right_id, b0 AS right_b0 FROM test_marker_suite_side_dist) + ON left_id < right_id + WHERE left_b0 + right_b0 = 30 + SETTINGS joined_subquery_requires_alias = 0 +) AS j +ON m.id = j.id +ORDER BY m.id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1 +FORMAT TSVWithNames; + +SELECT 'case3_global_join_unnamed_identical_derived_subqueries_serialize_query_plan'; +SELECT m.id, j.left_b0, j.right_b0 +FROM test_marker_suite_main_dist AS m +GLOBAL INNER JOIN +( + SELECT left_id AS id, left_b0, right_b0 + FROM + (SELECT id AS left_id, b0 AS left_b0 FROM test_marker_suite_side_dist) + INNER JOIN + (SELECT id AS right_id, b0 AS right_b0 FROM test_marker_suite_side_dist) + ON left_id < right_id + WHERE left_b0 + right_b0 = 30 + SETTINGS joined_subquery_requires_alias = 0 +) AS j +ON m.id = j.id +ORDER BY m.id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, serialize_query_plan = 1 +FORMAT TSVWithNames; + +SELECT 'case4_global_join_unnamed_remote_over_distributed_subqueries'; +SELECT m.id, j.left_b0, j.right_b0 +FROM test_marker_suite_main_dist AS m +GLOBAL INNER JOIN +( + SELECT left_id AS id, left_b0, right_b0 + FROM + (SELECT id AS left_id, b0 AS left_b0 FROM remote('127.0.0.2', currentDatabase(), test_marker_suite_side_dist)) + INNER JOIN + (SELECT id AS right_id, b0 AS right_b0 FROM remote('127.0.0.2', currentDatabase(), test_marker_suite_side_dist)) + ON left_id < right_id + WHERE left_b0 + right_b0 = 30 + SETTINGS joined_subquery_requires_alias = 0 +) AS j +ON m.id = j.id +ORDER BY m.id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1 +FORMAT TSVWithNames; + +SELECT 'case5_global_join_unnamed_identical_dual_alias_columns'; +SELECT m.id, j.left_b0, j.right_b1 +FROM test_marker_suite_main_dist AS m +GLOBAL INNER JOIN +( + SELECT left_id AS id, left_b0, right_b1 + FROM + (SELECT id AS left_id, b0 AS left_b0 FROM test_marker_suite_side_dist) + INNER JOIN + (SELECT id AS right_id, b1 AS right_b1 FROM test_marker_suite_side_dist) + ON left_id < right_id + WHERE left_b0 + right_b1 = 30 + SETTINGS joined_subquery_requires_alias = 0 +) AS j +ON m.id = j.id +ORDER BY m.id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1 +FORMAT TSVWithNames; + +SELECT 'case6_local_join_unnamed_identical_derived_subqueries'; +SELECT m.id, j.left_b0, j.right_b0 +FROM test_marker_suite_main_dist AS m +INNER JOIN +( + SELECT left_id AS id, left_b0, right_b0 + FROM + (SELECT id AS left_id, b0 AS left_b0 FROM test_marker_suite_side_dist) + INNER JOIN + (SELECT id AS right_id, b0 AS right_b0 FROM test_marker_suite_side_dist) + ON left_id < right_id + WHERE left_b0 + right_b0 = 30 + SETTINGS joined_subquery_requires_alias = 0 +) AS j +ON m.id = j.id +ORDER BY m.id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, distributed_product_mode = 'local' +FORMAT TSVWithNames; + +SELECT 'case7_local_join_unnamed_identical_derived_subqueries_serialize_query_plan'; +SELECT m.id, j.left_b0, j.right_b0 +FROM test_marker_suite_main_dist AS m +INNER JOIN +( + SELECT left_id AS id, left_b0, right_b0 + FROM + (SELECT id AS left_id, b0 AS left_b0 FROM test_marker_suite_side_dist) + INNER JOIN + (SELECT id AS right_id, b0 AS right_b0 FROM test_marker_suite_side_dist) + ON left_id < right_id + WHERE left_b0 + right_b0 = 30 + SETTINGS joined_subquery_requires_alias = 0 +) AS j +ON m.id = j.id +ORDER BY m.id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, distributed_product_mode = 'local', serialize_query_plan = 1 +FORMAT TSVWithNames; + +SELECT 'case8_global_join_direct_distributed_serialize_query_plan'; +SELECT m.id, b0, b1 +FROM test_marker_suite_main_dist AS m +GLOBAL INNER JOIN test_marker_suite_side_dist USING (id) +ORDER BY m.id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, asterisk_include_alias_columns = 1, serialize_query_plan = 1 +FORMAT TSVWithNames; + +SELECT 'case9_global_join_direct_remote_over_distributed_serialize_query_plan'; +SELECT m.id, b0, b1 +FROM test_marker_suite_main_dist AS m +GLOBAL INNER JOIN remote('127.0.0.2', currentDatabase(), test_marker_suite_side_dist) USING (id) +ORDER BY m.id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, asterisk_include_alias_columns = 1, joined_subquery_requires_alias = 0, serialize_query_plan = 1 +FORMAT TSVWithNames; + +DROP TABLE test_marker_suite_main_dist; +DROP TABLE test_marker_suite_side_dist; +DROP TABLE test_marker_suite_main; +DROP TABLE test_marker_suite_side; + +CREATE TABLE test_wrapper_alias_a_local +( + id UInt64, + x UInt64 +) +ENGINE = MergeTree() +ORDER BY id; + +CREATE TABLE test_wrapper_alias_b_local +( + id UInt64, + x UInt64 +) +ENGINE = MergeTree() +ORDER BY id; + +INSERT INTO test_wrapper_alias_a_local VALUES (1, 1), (2, 20); +INSERT INTO test_wrapper_alias_b_local VALUES (1, 1), (2, 20); + +CREATE TABLE test_wrapper_alias_a_dist +( + id UInt64, + x UInt64, + foo UInt64 ALIAS x +) +ENGINE = Distributed('test_shard_localhost', currentDatabase(), test_wrapper_alias_a_local, rand()); + +CREATE TABLE test_wrapper_alias_b_dist +( + id UInt64, + x UInt64, + foo UInt64 ALIAS x +) +ENGINE = Distributed('test_shard_localhost', currentDatabase(), test_wrapper_alias_b_local, rand()); + +SELECT 'case10_wrapper_alias_subquery_serialize_query_plan'; +SELECT a.id, j.left_foo, j.right_foo +FROM test_wrapper_alias_a_dist AS a +GLOBAL INNER JOIN +( + SELECT l.id, l.foo AS left_foo, r.foo AS right_foo + FROM test_wrapper_alias_a_dist AS l + INNER JOIN test_wrapper_alias_b_dist AS r ON l.id < r.id + WHERE l.foo + r.foo = 21 +) AS j +ON a.id = j.id +ORDER BY a.id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, serialize_query_plan = 1 +FORMAT TSVWithNames; + +DROP TABLE test_wrapper_alias_a_dist; +DROP TABLE test_wrapper_alias_b_dist; +DROP TABLE test_wrapper_alias_a_local; +DROP TABLE test_wrapper_alias_b_local; + +CREATE TABLE test_wrapper_const_alias_a_local +( + id UInt64 +) +ENGINE = MergeTree() +ORDER BY id; + +CREATE TABLE test_wrapper_const_alias_b_local +( + id UInt64 +) +ENGINE = MergeTree() +ORDER BY id; + +INSERT INTO test_wrapper_const_alias_a_local VALUES (1), (2); +INSERT INTO test_wrapper_const_alias_b_local VALUES (1), (2); + +CREATE TABLE test_wrapper_const_alias_a_dist +( + id UInt64, + foo String ALIAS 'foo' +) +ENGINE = Distributed('test_shard_localhost', currentDatabase(), test_wrapper_const_alias_a_local, rand()); + +CREATE TABLE test_wrapper_const_alias_b_dist +( + id UInt64, + foo String ALIAS 'foo' +) +ENGINE = Distributed('test_shard_localhost', currentDatabase(), test_wrapper_const_alias_b_local, rand()); + +SELECT 'case11_wrapper_constant_alias_subquery_serialize_query_plan'; +SELECT a.id, j.left_foo, j.right_foo +FROM test_wrapper_const_alias_a_dist AS a +GLOBAL INNER JOIN +( + SELECT l.id, l.foo AS left_foo, r.foo AS right_foo + FROM test_wrapper_const_alias_a_dist AS l + INNER JOIN test_wrapper_const_alias_b_dist AS r ON l.id = r.id + WHERE l.foo = 'foo' AND r.foo = 'foo' +) AS j +ON a.id = j.id +ORDER BY a.id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, serialize_query_plan = 1 +FORMAT TSVWithNames; + +DROP TABLE test_wrapper_const_alias_a_dist; +DROP TABLE test_wrapper_const_alias_b_dist; +DROP TABLE test_wrapper_const_alias_a_local; +DROP TABLE test_wrapper_const_alias_b_local; diff --git a/tests/queries/0_stateless/03921_distributed_over_distributed_double_aliases.reference b/tests/queries/0_stateless/03921_distributed_over_distributed_double_aliases.reference new file mode 100644 index 000000000000..750abc85a605 --- /dev/null +++ b/tests/queries/0_stateless/03921_distributed_over_distributed_double_aliases.reference @@ -0,0 +1,56 @@ +prefer_localhost_replica_0 +x a b c d inner_c inner_d +1 1 1 2 2 2 2 +1 1 1 2 2 2 2 +1 1 1 2 2 2 2 +1 1 1 2 2 2 2 +2 1 1 3 3 3 3 +2 1 1 3 3 3 3 +2 1 1 3 3 3 3 +2 1 1 3 3 3 3 +10 1 1 11 11 11 11 +10 1 1 11 11 11 11 +10 1 1 11 11 11 11 +10 1 1 11 11 11 11 +prefer_localhost_replica_1 +x a b c d inner_c inner_d +1 1 1 2 2 2 2 +1 1 1 2 2 2 2 +1 1 1 2 2 2 2 +1 1 1 2 2 2 2 +2 1 1 3 3 3 3 +2 1 1 3 3 3 3 +2 1 1 3 3 3 3 +2 1 1 3 3 3 3 +10 1 1 11 11 11 11 +10 1 1 11 11 11 11 +10 1 1 11 11 11 11 +10 1 1 11 11 11 11 +prefer_localhost_replica_0_serialize_query_plan_1 +x a b c d inner_c inner_d +1 1 1 2 2 2 2 +1 1 1 2 2 2 2 +1 1 1 2 2 2 2 +1 1 1 2 2 2 2 +2 1 1 3 3 3 3 +2 1 1 3 3 3 3 +2 1 1 3 3 3 3 +2 1 1 3 3 3 3 +10 1 1 11 11 11 11 +10 1 1 11 11 11 11 +10 1 1 11 11 11 11 +10 1 1 11 11 11 11 +prefer_localhost_replica_1_serialize_query_plan_1 +x a b c d inner_c inner_d +1 1 1 2 2 2 2 +1 1 1 2 2 2 2 +1 1 1 2 2 2 2 +1 1 1 2 2 2 2 +2 1 1 3 3 3 3 +2 1 1 3 3 3 3 +2 1 1 3 3 3 3 +2 1 1 3 3 3 3 +10 1 1 11 11 11 11 +10 1 1 11 11 11 11 +10 1 1 11 11 11 11 +10 1 1 11 11 11 11 diff --git a/tests/queries/0_stateless/03921_distributed_over_distributed_double_aliases.sql b/tests/queries/0_stateless/03921_distributed_over_distributed_double_aliases.sql new file mode 100644 index 000000000000..e4bbceb7e6cf --- /dev/null +++ b/tests/queries/0_stateless/03921_distributed_over_distributed_double_aliases.sql @@ -0,0 +1,92 @@ +DROP TABLE IF EXISTS test_dod_double_alias_outer; +DROP TABLE IF EXISTS test_dod_double_alias_inner; +DROP TABLE IF EXISTS test_dod_double_alias_local; + +CREATE TABLE test_dod_double_alias_local +( + x UInt64 +) +ENGINE = MergeTree() +ORDER BY x; + +INSERT INTO test_dod_double_alias_local VALUES (1), (2), (10); + +CREATE TABLE test_dod_double_alias_inner +( + x UInt64, + a UInt64 ALIAS 2, + b UInt64 ALIAS 2, + inner_c UInt64 ALIAS x + 1, + inner_d UInt64 ALIAS x + 1 +) +ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), test_dod_double_alias_local); + +CREATE TABLE test_dod_double_alias_outer +( + x UInt64, + inner_c UInt64, + a UInt64 ALIAS 1, + b UInt64 ALIAS 1, + c UInt64 ALIAS inner_c, + d UInt64 ALIAS inner_c, + inner_d UInt64 +) +ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), test_dod_double_alias_inner); + +SELECT 'prefer_localhost_replica_0'; +SELECT x, a, b, c, d, inner_c, inner_d +FROM test_dod_double_alias_outer +ORDER BY x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 1, + prefer_localhost_replica = 0, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +SELECT 'prefer_localhost_replica_1'; +SELECT x, a, b, c, d, inner_c, inner_d +FROM test_dod_double_alias_outer +ORDER BY x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 1, + prefer_localhost_replica = 1, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +SELECT 'prefer_localhost_replica_0_serialize_query_plan_1'; +SELECT x, a, b, c, d, inner_c, inner_d +FROM test_dod_double_alias_outer +ORDER BY x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 1, + prefer_localhost_replica = 0, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0, + serialize_query_plan = 1 +FORMAT TSVWithNames; + +SELECT 'prefer_localhost_replica_1_serialize_query_plan_1'; +SELECT x, a, b, c, d, inner_c, inner_d +FROM test_dod_double_alias_outer +ORDER BY x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 1, + prefer_localhost_replica = 1, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0, + serialize_query_plan = 1 +FORMAT TSVWithNames; + +DROP TABLE test_dod_double_alias_outer; +DROP TABLE test_dod_double_alias_inner; +DROP TABLE test_dod_double_alias_local; diff --git a/tests/queries/0_stateless/03923_hybrid_unknown_table_issues_1208_1209_1422.reference b/tests/queries/0_stateless/03923_hybrid_unknown_table_issues_1208_1209_1422.reference new file mode 100644 index 000000000000..5155d27310c2 --- /dev/null +++ b/tests/queries/0_stateless/03923_hybrid_unknown_table_issues_1208_1209_1422.reference @@ -0,0 +1,8 @@ +issue_1208_self_in_subquery +5 +issue_1209_join_mode_local +6 +issue_1209_join_mode_allow +6 +issue_1422_hybrid_in_merge_tree_subquery +5 diff --git a/tests/queries/0_stateless/03923_hybrid_unknown_table_issues_1208_1209_1422.sql b/tests/queries/0_stateless/03923_hybrid_unknown_table_issues_1208_1209_1422.sql new file mode 100644 index 000000000000..c5cab11cee86 --- /dev/null +++ b/tests/queries/0_stateless/03923_hybrid_unknown_table_issues_1208_1209_1422.sql @@ -0,0 +1,118 @@ +SET allow_experimental_hybrid_table = 1, + enable_analyzer = 1, + prefer_localhost_replica = 0, + -- AST-path regression test for unknown-table issues #1208/#1209/#1422. Pin + -- serialize_query_plan=0 so the "distributed plan" CI flavor (which forces it on) does not + -- route these hybrid + IN-subquery queries through the plan path, which has a separate, + -- unrelated header-reconciliation gap. + serialize_query_plan = 0; + +DROP TABLE IF EXISTS test_hybrid_issue_1208_1209_1422; +DROP TABLE IF EXISTS test_hybrid_issue_1208_1209_1422_left; +DROP TABLE IF EXISTS test_hybrid_issue_1208_1209_1422_right; +DROP TABLE IF EXISTS test_hybrid_issue_1208_1209_1422_mt; + +CREATE TABLE test_hybrid_issue_1208_1209_1422_left +( + string_col String, + long_col Int64, + date_col Date +) +ENGINE = MergeTree +ORDER BY string_col; + +CREATE TABLE test_hybrid_issue_1208_1209_1422_right +( + string_col String, + long_col Int64, + date_col Date +) +ENGINE = MergeTree +ORDER BY string_col; + +CREATE TABLE test_hybrid_issue_1208_1209_1422_mt +( + string_col String, + long_col Int64, + date_col Date +) +ENGINE = MergeTree +ORDER BY string_col; + +INSERT INTO test_hybrid_issue_1208_1209_1422_left VALUES + ('William', 9044, toDate('2024-01-01')), + ('Oliver', 1654, toDate('2024-01-01')), + ('Frank', 8751, toDate('2024-01-01')); + +INSERT INTO test_hybrid_issue_1208_1209_1422_right VALUES + ('Louis', 1519, toDate('2024-01-02')), + ('Isaac', 3611, toDate('2024-01-02')); + +INSERT INTO test_hybrid_issue_1208_1209_1422_mt +SELECT * FROM test_hybrid_issue_1208_1209_1422_left +UNION ALL +SELECT * FROM test_hybrid_issue_1208_1209_1422_right; + +CREATE TABLE test_hybrid_issue_1208_1209_1422 +( + string_col String, + long_col Int64, + date_col Date +) +ENGINE = Hybrid( + remote('127.0.0.1:9000', currentDatabase(), 'test_hybrid_issue_1208_1209_1422_left'), date_col <= '2024-01-01', + remote('127.0.0.1:9000', currentDatabase(), 'test_hybrid_issue_1208_1209_1422_right'), date_col > '2024-01-01' +); + +SELECT 'issue_1208_self_in_subquery'; +SELECT count() +FROM +( + SELECT string_col + FROM test_hybrid_issue_1208_1209_1422 + WHERE string_col IN + ( + SELECT DISTINCT string_col + FROM test_hybrid_issue_1208_1209_1422 + WHERE long_col > 1500 + ) +); + +SELECT 'issue_1209_join_mode_local'; +SELECT uniqExact(coalesce(h_string_col, m_string_col)) +FROM +( + SELECT h.string_col AS h_string_col, m.string_col AS m_string_col, h.long_col AS hybrid_long, m.long_col AS mt_long + FROM test_hybrid_issue_1208_1209_1422 AS h + FULL OUTER JOIN test_hybrid_issue_1208_1209_1422_mt AS m ON h.string_col = m.string_col + SETTINGS object_storage_cluster_join_mode = 'local' +); + +SELECT 'issue_1209_join_mode_allow'; +SELECT uniqExact(coalesce(h_string_col, m_string_col)) +FROM +( + SELECT h.string_col AS h_string_col, m.string_col AS m_string_col, h.long_col AS hybrid_long, m.long_col AS mt_long + FROM test_hybrid_issue_1208_1209_1422 AS h + FULL OUTER JOIN test_hybrid_issue_1208_1209_1422_mt AS m ON h.string_col = m.string_col + SETTINGS object_storage_cluster_join_mode = 'allow' +); + +SELECT 'issue_1422_hybrid_in_merge_tree_subquery'; +SELECT count() +FROM +( + SELECT string_col + FROM test_hybrid_issue_1208_1209_1422 + WHERE string_col IN + ( + SELECT DISTINCT string_col + FROM test_hybrid_issue_1208_1209_1422_mt + WHERE long_col > 1500 + ) +); + +DROP TABLE test_hybrid_issue_1208_1209_1422; +DROP TABLE test_hybrid_issue_1208_1209_1422_left; +DROP TABLE test_hybrid_issue_1208_1209_1422_right; +DROP TABLE test_hybrid_issue_1208_1209_1422_mt; diff --git a/tests/queries/0_stateless/03928_merge_over_distributed_alias_marker_column_swap.reference b/tests/queries/0_stateless/03928_merge_over_distributed_alias_marker_column_swap.reference new file mode 100644 index 000000000000..f32381f38096 --- /dev/null +++ b/tests/queries/0_stateless/03928_merge_over_distributed_alias_marker_column_swap.reference @@ -0,0 +1,20 @@ +local +1 2 3 +2 3 4 +10 11 12 +merge_prefer0 +1 2 3 +2 3 4 +10 11 12 +merge_prefer1 +1 2 3 +2 3 4 +10 11 12 +merge_prefer0_plan +1 2 3 +2 3 4 +10 11 12 +merge_prefer1_plan +1 2 3 +2 3 4 +10 11 12 diff --git a/tests/queries/0_stateless/03928_merge_over_distributed_alias_marker_column_swap.sql b/tests/queries/0_stateless/03928_merge_over_distributed_alias_marker_column_swap.sql new file mode 100644 index 000000000000..c5817fd07f34 --- /dev/null +++ b/tests/queries/0_stateless/03928_merge_over_distributed_alias_marker_column_swap.sql @@ -0,0 +1,58 @@ +-- Plain Merge over Distributed over MergeTree (no Hybrid, no explicit __aliasMarker). +-- Nested ALIAS columns (b contains a's subexpression). Reading the alias columns through the +-- Merge table must reconcile the child (Distributed) header by name; a positional reconciliation +-- in StorageMerge::convertAndFilterSourceStream would swap the columns (or fill them with 0). +-- The correct result equals the single-node ('local') result. +-- +-- Determinism notes: `x` is kept in GROUP BY so the ALIAS expansion can resolve it (the alias +-- expressions are defined in terms of x); GROUP BY also deduplicates the rows the two shards +-- produce, and ORDER BY x (distinct values) gives a total order independent of the distributed +-- merge order. So every block - local and the distributed variants - yields the same rows. +DROP TABLE IF EXISTS test_merge_alias_swap_merge; +DROP TABLE IF EXISTS test_merge_alias_swap_dist; +DROP TABLE IF EXISTS test_merge_alias_swap_local; + +CREATE TABLE test_merge_alias_swap_local +( + x UInt64, + a UInt64 ALIAS x + 1, + b UInt64 ALIAS a + 1 +) +ENGINE = MergeTree() +ORDER BY x; + +INSERT INTO test_merge_alias_swap_local VALUES (1), (2), (10); + +CREATE TABLE test_merge_alias_swap_dist AS test_merge_alias_swap_local +ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), test_merge_alias_swap_local); + +CREATE TABLE test_merge_alias_swap_merge +( + x UInt64, + a UInt64, + b UInt64 +) +ENGINE = Merge(currentDatabase(), '^test_merge_alias_swap_dist$'); + +SELECT 'local'; +SELECT x, a, b FROM test_merge_alias_swap_local GROUP BY x, a, b ORDER BY x; + +SELECT 'merge_prefer0'; +SELECT x, a, b FROM test_merge_alias_swap_merge GROUP BY x, a, b ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0; + +SELECT 'merge_prefer1'; +SELECT x, a, b FROM test_merge_alias_swap_merge GROUP BY x, a, b ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 1; + +SELECT 'merge_prefer0_plan'; +SELECT x, a, b FROM test_merge_alias_swap_merge GROUP BY x, a, b ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0, serialize_query_plan = 1; + +SELECT 'merge_prefer1_plan'; +SELECT x, a, b FROM test_merge_alias_swap_merge GROUP BY x, a, b ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 1, serialize_query_plan = 1; + +DROP TABLE test_merge_alias_swap_merge; +DROP TABLE test_merge_alias_swap_dist; +DROP TABLE test_merge_alias_swap_local; diff --git a/tests/queries/0_stateless/03930_distributed_alias_swap_planner.reference b/tests/queries/0_stateless/03930_distributed_alias_swap_planner.reference new file mode 100644 index 000000000000..402cc360bae5 --- /dev/null +++ b/tests/queries/0_stateless/03930_distributed_alias_swap_planner.reference @@ -0,0 +1,15 @@ +local +11 12 +21 22 +dist_prefer0 +11 12 +21 22 +dist_prefer1 +11 12 +21 22 +dist_prefer0_plan +11 12 +21 22 +dist_prefer1_plan +11 12 +21 22 diff --git a/tests/queries/0_stateless/03930_distributed_alias_swap_planner.sql b/tests/queries/0_stateless/03930_distributed_alias_swap_planner.sql new file mode 100644 index 000000000000..848f35b0be14 --- /dev/null +++ b/tests/queries/0_stateless/03930_distributed_alias_swap_planner.sql @@ -0,0 +1,34 @@ +-- Plain Distributed (no Hybrid). Two nested ALIAS columns: a2 contains a1's subexpression, +-- so planner CSE may reorder the remote header. Correct result must equal the single-node +-- ('local') result across every transport variant. +DROP TABLE IF EXISTS t_local_03930; +DROP TABLE IF EXISTS t_dist_03930; + +CREATE TABLE t_local_03930 (x UInt32, a1 UInt32 ALIAS x + 1, a2 UInt32 ALIAS a1 + 1) +ENGINE = MergeTree ORDER BY x; +INSERT INTO t_local_03930 VALUES (10), (20); + +CREATE TABLE t_dist_03930 AS t_local_03930 +ENGINE = Distributed(test_shard_localhost, currentDatabase(), t_local_03930); + +SELECT 'local'; +SELECT a1, a2 FROM t_local_03930 ORDER BY a1; + +SELECT 'dist_prefer0'; +SELECT a1, a2 FROM t_dist_03930 ORDER BY a1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0; + +SELECT 'dist_prefer1'; +SELECT a1, a2 FROM t_dist_03930 ORDER BY a1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 1; + +SELECT 'dist_prefer0_plan'; +SELECT a1, a2 FROM t_dist_03930 ORDER BY a1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0, serialize_query_plan = 1; + +SELECT 'dist_prefer1_plan'; +SELECT a1, a2 FROM t_dist_03930 ORDER BY a1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 1, serialize_query_plan = 1; + +DROP TABLE t_dist_03930; +DROP TABLE t_local_03930; diff --git a/tests/queries/0_stateless/03931_parallel_replicas_alias_swap.reference b/tests/queries/0_stateless/03931_parallel_replicas_alias_swap.reference new file mode 100644 index 000000000000..14f9c770f714 --- /dev/null +++ b/tests/queries/0_stateless/03931_parallel_replicas_alias_swap.reference @@ -0,0 +1,9 @@ +local +10 11 12 +20 21 22 +pr_ast +10 11 12 +20 21 22 +pr_plan +10 11 12 +20 21 22 diff --git a/tests/queries/0_stateless/03931_parallel_replicas_alias_swap.sql b/tests/queries/0_stateless/03931_parallel_replicas_alias_swap.sql new file mode 100644 index 000000000000..f669631889c2 --- /dev/null +++ b/tests/queries/0_stateless/03931_parallel_replicas_alias_swap.sql @@ -0,0 +1,37 @@ +-- Plain Distributed + parallel replicas (no Hybrid). Exercises the findParallelReplicasQuery +-- header reconciliation path with nested ALIAS columns. Correct result equals the single-node +-- ('local') result for both AST and serialized-plan transport. +-- +-- Determinism note: parallel replicas over a small non-replicated table can read the same rows on +-- several replicas under some (randomized) settings, duplicating output. GROUP BY x, a1, a2 +-- deduplicates that and keeps x in the required columns for the ALIAS expansion; ORDER BY x over +-- distinct values gives a total order. The test still fails if a1/a2 are swapped or wrong. +DROP TABLE IF EXISTS t_local_03931; +DROP TABLE IF EXISTS t_dist_03931; + +CREATE TABLE t_local_03931 (x UInt32, a1 UInt32 ALIAS x + 1, a2 UInt32 ALIAS a1 + 1) +ENGINE = MergeTree ORDER BY x; +INSERT INTO t_local_03931 VALUES (10), (20); + +CREATE TABLE t_dist_03931 AS t_local_03931 +ENGINE = Distributed(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), t_local_03931); + +SELECT 'local'; +SELECT x, a1, a2 FROM t_local_03931 GROUP BY x, a1, a2 ORDER BY x; + +SELECT 'pr_ast'; +SELECT x, a1, a2 FROM t_dist_03931 GROUP BY x, a1, a2 ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, + allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', + serialize_query_plan = 0; + +SELECT 'pr_plan'; +SELECT x, a1, a2 FROM t_dist_03931 GROUP BY x, a1, a2 ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, + allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', + serialize_query_plan = 1; + +DROP TABLE t_dist_03931; +DROP TABLE t_local_03931; diff --git a/tests/queries/0_stateless/03932_distributed_alias_strict_name.reference b/tests/queries/0_stateless/03932_distributed_alias_strict_name.reference new file mode 100644 index 000000000000..cddf594d4e31 --- /dev/null +++ b/tests/queries/0_stateless/03932_distributed_alias_strict_name.reference @@ -0,0 +1,9 @@ +local +12 11 23 +22 21 43 +dist +12 11 23 +22 21 43 +dist_plan +12 11 23 +22 21 43 diff --git a/tests/queries/0_stateless/03932_distributed_alias_strict_name.sql b/tests/queries/0_stateless/03932_distributed_alias_strict_name.sql new file mode 100644 index 000000000000..c094d28f01e0 --- /dev/null +++ b/tests/queries/0_stateless/03932_distributed_alias_strict_name.sql @@ -0,0 +1,27 @@ +-- Plain Distributed (no Hybrid). Reorders alias columns and mixes a computed expression over +-- them. With strict name-based header reconciliation (positional fallback disabled), the result +-- must equal the single-node ('local') result for both AST and serialized-plan transport, and no +-- LOGICAL_ERROR must be raised. +DROP TABLE IF EXISTS t_local_03932; +DROP TABLE IF EXISTS t_dist_03932; + +CREATE TABLE t_local_03932 (x UInt32, a1 UInt32 ALIAS x + 1, a2 UInt32 ALIAS a1 + 1) +ENGINE = MergeTree ORDER BY x; +INSERT INTO t_local_03932 VALUES (10), (20); + +CREATE TABLE t_dist_03932 AS t_local_03932 +ENGINE = Distributed(test_shard_localhost, currentDatabase(), t_local_03932); + +SELECT 'local'; +SELECT a2, a1, a1 + a2 AS s FROM t_local_03932 ORDER BY x; + +SELECT 'dist'; +SELECT a2, a1, a1 + a2 AS s FROM t_dist_03932 ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0; + +SELECT 'dist_plan'; +SELECT a2, a1, a1 + a2 AS s FROM t_dist_03932 ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0, serialize_query_plan = 1; + +DROP TABLE t_dist_03932; +DROP TABLE t_local_03932; diff --git a/tests/queries/0_stateless/03933_alias_marker_direct_use_no_logical_error.reference b/tests/queries/0_stateless/03933_alias_marker_direct_use_no_logical_error.reference new file mode 100644 index 000000000000..f3f736b7dea2 --- /dev/null +++ b/tests/queries/0_stateless/03933_alias_marker_direct_use_no_logical_error.reference @@ -0,0 +1,14 @@ +2arg_identity +42 +lambda_local +[1] +[2] +[3] +lambda_over_distributed +[1] +[2] +[3] +lambda_over_distributed_plan +[1] +[2] +[3] diff --git a/tests/queries/0_stateless/03933_alias_marker_direct_use_no_logical_error.sql b/tests/queries/0_stateless/03933_alias_marker_direct_use_no_logical_error.sql new file mode 100644 index 000000000000..e327c442397d --- /dev/null +++ b/tests/queries/0_stateless/03933_alias_marker_direct_use_no_logical_error.sql @@ -0,0 +1,31 @@ +-- __aliasMarker is an internal pass-through identity function. Direct use from SQL must not +-- raise a server-side LOGICAL_ERROR (which would abort under abort_on_logical_error / sanitizers), +-- in particular inside a lambda over a Distributed table where the marker's column argument +-- resolves to a lambda parameter with no table source. +DROP TABLE IF EXISTS t_local_03933; +DROP TABLE IF EXISTS t_dist_03933; + +CREATE TABLE t_local_03933 (x UInt64) ENGINE = MergeTree ORDER BY x; +INSERT INTO t_local_03933 VALUES (1), (2), (3); + +CREATE TABLE t_dist_03933 AS t_local_03933 +ENGINE = Distributed(test_shard_localhost, currentDatabase(), t_local_03933); + +SELECT '2arg_identity'; +SELECT __aliasMarker(42, 'anything'); + +SELECT 'lambda_local'; +SELECT arrayMap(lx -> __aliasMarker(lx, lx), [x]) AS arr FROM t_local_03933 ORDER BY x; + +SELECT 'lambda_over_distributed'; +SELECT arrayMap(lx -> __aliasMarker(lx, lx), [x]) AS arr +FROM t_dist_03933 ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0; + +SELECT 'lambda_over_distributed_plan'; +SELECT arrayMap(lx -> __aliasMarker(lx, lx), [x]) AS arr +FROM t_dist_03933 ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0, serialize_query_plan = 1; + +DROP TABLE t_dist_03933; +DROP TABLE t_local_03933; diff --git a/tests/queries/0_stateless/03934_distributed_alias_marker_setting_effect.reference b/tests/queries/0_stateless/03934_distributed_alias_marker_setting_effect.reference new file mode 100644 index 000000000000..cc0e49a8fdf9 --- /dev/null +++ b/tests/queries/0_stateless/03934_distributed_alias_marker_setting_effect.reference @@ -0,0 +1,15 @@ +marker_on +x a_str inner_c +1 aaaa 2 +1 aaaa 2 +1 aaaa 2 +1 aaaa 2 +2 aaaa 3 +2 aaaa 3 +2 aaaa 3 +2 aaaa 3 +10 aaaa 11 +10 aaaa 11 +10 aaaa 11 +10 aaaa 11 +marker_off_reintroduces_swap diff --git a/tests/queries/0_stateless/03934_distributed_alias_marker_setting_effect.sql b/tests/queries/0_stateless/03934_distributed_alias_marker_setting_effect.sql new file mode 100644 index 000000000000..4100828c9eb5 --- /dev/null +++ b/tests/queries/0_stateless/03934_distributed_alias_marker_setting_effect.sql @@ -0,0 +1,39 @@ +-- Demonstrates that enable_alias_marker is a correctness toggle for distributed ALIAS columns. +-- Distributed-over-distributed with a String ALIAS (`a_str`) and a UInt64 ALIAS (`inner_c`): +-- * marker ON -> columns reconciled by name, correct results. +-- * marker OFF -> the inlined ALIAS expansion swaps columns; the String 'aaaa' is routed into +-- the UInt64 `inner_c` slot and the query fails with CANNOT_PARSE_TEXT. +DROP TABLE IF EXISTS t_se_local; +DROP TABLE IF EXISTS t_se_inner; +DROP TABLE IF EXISTS t_se_outer; + +CREATE TABLE t_se_local (x UInt64) ENGINE = MergeTree() ORDER BY x; +INSERT INTO t_se_local VALUES (1), (2), (10); + +CREATE TABLE t_se_inner (x UInt64, inner_c UInt64 ALIAS x + 1) +ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), t_se_local); + +CREATE TABLE t_se_outer (x UInt64, inner_c UInt64, a_str String ALIAS 'aaaa') +ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), t_se_inner); + +-- serialize_query_plan is pinned to 0 throughout: this test targets the AST-path alias marker. +-- On the serialized-plan path the header is reconciled by name regardless of the marker, so the +-- marker_off swap below does not occur there; the "distributed plan" CI flavor would otherwise +-- force the plan path on and the marker_off query would succeed instead of erroring. +SELECT 'marker_on'; +SELECT x, a_str, inner_c +FROM t_se_outer +ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0, serialize_query_plan = 0 +FORMAT TSVWithNames; + +SELECT 'marker_off_reintroduces_swap'; +-- No output format header here: the query errors mid-execution, so it must not stream a header. +SELECT x, a_str, inner_c +FROM t_se_outer +ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 0, prefer_localhost_replica = 0, serialize_query_plan = 0; -- { serverError CANNOT_PARSE_TEXT } + +DROP TABLE t_se_outer; +DROP TABLE t_se_inner; +DROP TABLE t_se_local; diff --git a/tests/queries/0_stateless/04281_storage_merge_over_distributed_alias.reference b/tests/queries/0_stateless/04281_storage_merge_over_distributed_alias.reference new file mode 100644 index 000000000000..f32381f38096 --- /dev/null +++ b/tests/queries/0_stateless/04281_storage_merge_over_distributed_alias.reference @@ -0,0 +1,20 @@ +local +1 2 3 +2 3 4 +10 11 12 +merge_prefer0 +1 2 3 +2 3 4 +10 11 12 +merge_prefer1 +1 2 3 +2 3 4 +10 11 12 +merge_prefer0_plan +1 2 3 +2 3 4 +10 11 12 +merge_prefer1_plan +1 2 3 +2 3 4 +10 11 12 diff --git a/tests/queries/0_stateless/04281_storage_merge_over_distributed_alias.sql b/tests/queries/0_stateless/04281_storage_merge_over_distributed_alias.sql new file mode 100644 index 000000000000..e1ac59428d5e --- /dev/null +++ b/tests/queries/0_stateless/04281_storage_merge_over_distributed_alias.sql @@ -0,0 +1,58 @@ +-- Plain Merge over Distributed over MergeTree without an explicit __aliasMarker call. +-- Nested ALIAS columns (b contains a's subexpression). Reading the alias columns through the +-- Merge table must reconcile the child (Distributed) header by name; a positional reconciliation +-- in StorageMerge::convertAndFilterSourceStream would swap the columns (or fill them with 0). +-- The correct result equals the single-node ('local') result. +-- +-- Determinism notes: `x` is kept in GROUP BY so the ALIAS expansion can resolve it (the alias +-- expressions are defined in terms of x); GROUP BY also deduplicates the rows the two shards +-- produce, and ORDER BY x (distinct values) gives a total order independent of the distributed +-- merge order. So every block - local and the distributed variants - yields the same rows. +DROP TABLE IF EXISTS test_merge_alias_swap_merge; +DROP TABLE IF EXISTS test_merge_alias_swap_dist; +DROP TABLE IF EXISTS test_merge_alias_swap_local; + +CREATE TABLE test_merge_alias_swap_local +( + x UInt64, + a UInt64 ALIAS x + 1, + b UInt64 ALIAS a + 1 +) +ENGINE = MergeTree() +ORDER BY x; + +INSERT INTO test_merge_alias_swap_local VALUES (1), (2), (10); + +CREATE TABLE test_merge_alias_swap_dist AS test_merge_alias_swap_local +ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), test_merge_alias_swap_local); + +CREATE TABLE test_merge_alias_swap_merge +( + x UInt64, + a UInt64, + b UInt64 +) +ENGINE = Merge(currentDatabase(), '^test_merge_alias_swap_dist$'); + +SELECT 'local'; +SELECT x, a, b FROM test_merge_alias_swap_local GROUP BY x, a, b ORDER BY x; + +SELECT 'merge_prefer0'; +SELECT x, a, b FROM test_merge_alias_swap_merge GROUP BY x, a, b ORDER BY x +SETTINGS enable_analyzer = 1, prefer_localhost_replica = 0; + +SELECT 'merge_prefer1'; +SELECT x, a, b FROM test_merge_alias_swap_merge GROUP BY x, a, b ORDER BY x +SETTINGS enable_analyzer = 1, prefer_localhost_replica = 1; + +SELECT 'merge_prefer0_plan'; +SELECT x, a, b FROM test_merge_alias_swap_merge GROUP BY x, a, b ORDER BY x +SETTINGS enable_analyzer = 1, prefer_localhost_replica = 0, serialize_query_plan = 1; + +SELECT 'merge_prefer1_plan'; +SELECT x, a, b FROM test_merge_alias_swap_merge GROUP BY x, a, b ORDER BY x +SETTINGS enable_analyzer = 1, prefer_localhost_replica = 1, serialize_query_plan = 1; + +DROP TABLE test_merge_alias_swap_merge; +DROP TABLE test_merge_alias_swap_dist; +DROP TABLE test_merge_alias_swap_local; diff --git a/tests/queries/0_stateless/04286_dotted_alias_merge_over_distributed.reference b/tests/queries/0_stateless/04286_dotted_alias_merge_over_distributed.reference new file mode 100644 index 000000000000..97dd73615024 --- /dev/null +++ b/tests/queries/0_stateless/04286_dotted_alias_merge_over_distributed.reference @@ -0,0 +1,9 @@ +local +1 10 100 +2 20 200 +merge_prefer0 +1 10 100 +2 20 200 +merge_prefer1 +1 10 100 +2 20 200 diff --git a/tests/queries/0_stateless/04286_dotted_alias_merge_over_distributed.sql b/tests/queries/0_stateless/04286_dotted_alias_merge_over_distributed.sql new file mode 100644 index 000000000000..c42ad63adab4 --- /dev/null +++ b/tests/queries/0_stateless/04286_dotted_alias_merge_over_distributed.sql @@ -0,0 +1,64 @@ +-- Regression for the StorageMerge alias-output-naming fix. +-- The bug: `Nested::splitName(name, reverse=true)` (used before this fix to strip the +-- analyzer's `__tableN.` prefix from header column names) splits on the LAST dot, so for +-- an analyzer identifier like `__table1.\`n.a\`` (a dotted column name wrapped in backticks +-- by the analyzer) it returns the suffix `a\`` instead of `n.a`, leaving the +-- `logical_name_to_header_name` map with broken keys. The lookup for `alias.name == "n.a"` +-- then misses, the alias output falls back to the bare name `n.a`, and the downstream +-- header-reconciliation step fills the expected `__table1.\`n.a\`` column with type +-- defaults (zeros). Silent wrong data. +-- +-- Repro shape: Merge declares dotted column names explicitly (typical when matching a +-- schema with Nested-style names), underlying storage has those columns as ALIAS, and +-- the Distributed routing forces analyzer-prefixed names in the Merge level. Using a +-- two-shard cluster with prefer_localhost_replica=0 reliably reproduces. + +DROP TABLE IF EXISTS test_04286_dotted_alias_local; +DROP TABLE IF EXISTS test_04286_dotted_alias_dist; +DROP TABLE IF EXISTS test_04286_dotted_alias_merge; + +CREATE TABLE test_04286_dotted_alias_local +( + id UInt32, + `n.a` UInt32 ALIAS id * 10, + `m.b` UInt32 ALIAS id * 100 +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO test_04286_dotted_alias_local VALUES (1), (2); + +CREATE TABLE test_04286_dotted_alias_dist AS test_04286_dotted_alias_local +ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), test_04286_dotted_alias_local); + +CREATE TABLE test_04286_dotted_alias_merge +( + id UInt32, + `n.a` UInt32, + `m.b` UInt32 +) +ENGINE = Merge(currentDatabase(), '^test_04286_dotted_alias_dist$'); + +SELECT 'local'; +SELECT id, `n.a`, `m.b` +FROM test_04286_dotted_alias_local +GROUP BY id, `n.a`, `m.b` +ORDER BY id; + +SELECT 'merge_prefer0'; +SELECT id, `n.a`, `m.b` +FROM test_04286_dotted_alias_merge +GROUP BY id, `n.a`, `m.b` +ORDER BY id +SETTINGS prefer_localhost_replica = 0; + +SELECT 'merge_prefer1'; +SELECT id, `n.a`, `m.b` +FROM test_04286_dotted_alias_merge +GROUP BY id, `n.a`, `m.b` +ORDER BY id +SETTINGS prefer_localhost_replica = 1; + +DROP TABLE test_04286_dotted_alias_merge; +DROP TABLE test_04286_dotted_alias_dist; +DROP TABLE test_04286_dotted_alias_local;