From d421dedc69e16d8966b45fd0c7675e6c56a147b5 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Mon, 30 Mar 2026 00:57:06 +0000 Subject: [PATCH] fix(ast): treat X'...' as binary literal (#19600) --- Cargo.lock | 1 + src/query/ast/Cargo.toml | 1 + src/query/ast/src/ast/expr.rs | 4 ++ src/query/ast/src/parser/expr.rs | 44 ++++++++++++------ src/query/ast/tests/it/parser.rs | 1 + .../ast/tests/it/testdata/expr-error.txt | 13 ++++++ src/query/ast/tests/it/testdata/expr.txt | 46 ++++++++++++++----- .../sql/src/planner/semantic/type_check.rs | 3 +- src/query/sql/test-support/src/expr_parser.rs | 1 + .../suites/query/functions/binary_format.test | 11 +++++ 10 files changed, 97 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0cd0fac823d55..67312aca775b8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3382,6 +3382,7 @@ dependencies = [ "fast-float2", "fastrace", "goldenfile", + "hex", "indent", "itertools 0.13.0", "logos", diff --git a/src/query/ast/Cargo.toml b/src/query/ast/Cargo.toml index 9fd55aa247945..2f25f211a5e67 100644 --- a/src/query/ast/Cargo.toml +++ b/src/query/ast/Cargo.toml @@ -15,6 +15,7 @@ enum-as-inner = { workspace = true } ethnum = { workspace = true } fast-float2 = { workspace = true } fastrace = { workspace = true } +hex = { workspace = true } indent = { workspace = true } itertools = { workspace = true } logos = { workspace = true } diff --git a/src/query/ast/src/ast/expr.rs b/src/query/ast/src/ast/expr.rs index 6f82bcb6e1029..075a38bf1d6ab 100644 --- a/src/query/ast/src/ast/expr.rs +++ b/src/query/ast/src/ast/expr.rs @@ -1027,6 +1027,7 @@ pub enum Literal { precision: u8, scale: u8, }, + Binary(Vec), // Quoted string literal value String(String), Boolean(bool), @@ -1074,6 +1075,9 @@ impl Display for Literal { write!(f, "{s}") } } + Literal::Binary(val) => { + write!(f, "X'{}'", hex::encode_upper(val)) + } Literal::String(val) => { write!(f, "{}", QuotedString(val, '\'')) } diff --git a/src/query/ast/src/parser/expr.rs b/src/query/ast/src/parser/expr.rs index 99089dcdd3081..bdc276e72c100 100644 --- a/src/query/ast/src/parser/expr.rs +++ b/src/query/ast/src/parser/expr.rs @@ -1604,11 +1604,19 @@ pub fn expr_element(i: Input) -> IResult> { }) }, ); - let hex_uint = map_res(literal_hex_str, |str| { + let mysql_hex_uint = map_res(mysql_literal_hex_str, |str| { Ok(ExprElement::Literal { value: parse_uint(str, 16).map_err(nom::Err::Failure)?, }) }); + let pg_hex_binary = map_res(pg_literal_hex_str, |str| { + Ok(ExprElement::Literal { + value: Literal::Binary( + hex::decode(str) + .map_err(|_| nom::Err::Failure(ErrorKind::Other("invalid hex literal")))?, + ), + }) + }); let decimal_float = map_res( verify( rule! { @@ -1757,7 +1765,8 @@ pub fn expr_element(i: Input) -> IResult> { LiteralCodeString => with_span!(code_string).parse(i), LiteralInteger => with_span!(decimal_uint).parse(i), LiteralFloat => with_span!(rule!{ #decimal_float | #dot_number_map_access }).parse(i), - MySQLLiteralHex | PGLiteralHex => with_span!(hex_uint).parse(i), + MySQLLiteralHex => with_span!(mysql_hex_uint).parse(i), + PGLiteralHex => with_span!(pg_hex_binary).parse(i), TRUE | FALSE => with_span!(boolean).parse(i), NULL => with_span!(null).parse(i), ROW => with_span!(column_row).parse(i), @@ -1921,9 +1930,14 @@ pub fn literal(i: Input) -> IResult { }, |token| parse_uint(token.text(), 10).map_err(nom::Err::Failure), ); - let mut hex_uint = map_res(literal_hex_str, |str| { + let mut mysql_hex_uint = map_res(mysql_literal_hex_str, |str| { parse_uint(str, 16).map_err(nom::Err::Failure) }); + let mut pg_hex_binary = map_res(pg_literal_hex_str, |str| { + hex::decode(str) + .map(Literal::Binary) + .map_err(|_| nom::Err::Failure(ErrorKind::Other("invalid hex literal"))) + }); let mut decimal_float = map_res( rule! { LiteralFloat @@ -1936,7 +1950,8 @@ pub fn literal(i: Input) -> IResult { LiteralCodeString => code_string.parse(i), LiteralInteger => decimal_uint.parse(i), LiteralFloat => decimal_float.parse(i), - MySQLLiteralHex | PGLiteralHex => hex_uint(i), + MySQLLiteralHex => mysql_hex_uint.parse(i), + PGLiteralHex => pg_hex_binary.parse(i), TRUE | FALSE => boolean.parse(i), NULL => null.parse(i), ); @@ -1949,25 +1964,24 @@ pub fn literal(i: Input) -> IResult { ))) } -pub fn literal_hex_str(i: Input<'_>) -> IResult<'_, &str> { +pub fn mysql_literal_hex_str(i: Input<'_>) -> IResult<'_, &str> { // 0XFFFF - let mysql_hex = map( + map( rule! { MySQLLiteralHex }, |token| &token.text()[2..], - ); + ) + .parse(i) +} + +pub fn pg_literal_hex_str(i: Input<'_>) -> IResult<'_, &str> { // x'FFFF' - let pg_hex = map( + map( rule! { PGLiteralHex }, |token| &token.text()[2..token.text().len() - 1], - ); - - rule!( - #mysql_hex - | #pg_hex ) .parse(i) } @@ -1980,7 +1994,7 @@ pub fn literal_u64(i: Input) -> IResult { }, |token| u64::from_str_radix(token.text(), 10).map_err(|e| nom::Err::Failure(e.into())), ); - let hex = map_res(literal_hex_str, |lit| { + let hex = map_res(mysql_literal_hex_str, |lit| { u64::from_str_radix(lit, 16).map_err(|e| nom::Err::Failure(e.into())) }); @@ -1999,7 +2013,7 @@ pub fn literal_i64(i: Input) -> IResult { }, |token| i64::from_str_radix(token.text(), 10).map_err(|e| nom::Err::Failure(e.into())), ); - let hex = map_res(literal_hex_str, |lit| { + let hex = map_res(mysql_literal_hex_str, |lit| { i64::from_str_radix(lit, 16).map_err(|e| nom::Err::Failure(e.into())) }); diff --git a/src/query/ast/tests/it/parser.rs b/src/query/ast/tests/it/parser.rs index bdfc1ab930b80..3729806171794 100644 --- a/src/query/ast/tests/it/parser.rs +++ b/src/query/ast/tests/it/parser.rs @@ -1559,6 +1559,7 @@ fn test_expr_error() { "#, r#"CAST(1 AS STRING) ESCAPE '$'"#, r#"1 + 1 ESCAPE '$'"#, + r#"x'ABC'"#, ]; for case in cases { diff --git a/src/query/ast/tests/it/testdata/expr-error.txt b/src/query/ast/tests/it/testdata/expr-error.txt index 6c826ef40fffe..34cd6e4c9762b 100644 --- a/src/query/ast/tests/it/testdata/expr-error.txt +++ b/src/query/ast/tests/it/testdata/expr-error.txt @@ -134,3 +134,16 @@ error: | while parsing expression +---------- Input ---------- +x'ABC' +---------- Output --------- +error: + --> SQL:1:1 + | +1 | x'ABC' + | ^^^^^^ + | | + | expecting ``, '', '', '', 'TRUE', 'FALSE', or more ... + | while parsing expression + + diff --git a/src/query/ast/tests/it/testdata/expr.txt b/src/query/ast/tests/it/testdata/expr.txt index 16c8de88d6dec..efff6529dc27b 100644 --- a/src/query/ast/tests/it/testdata/expr.txt +++ b/src/query/ast/tests/it/testdata/expr.txt @@ -122,7 +122,7 @@ FunctionCall { ---------- Input ---------- [42, 3.5, 4., .001, 5e2, 1.925e-3, .38e+7, 1.e-01, 0xfff, x'deedbeef'] ---------- Output --------- -[42, 3.5, 4, 0.001, 500, 0.001925, 3800000, 0.1, 4095, 3740122863] +[42, 3.5, 4, 0.001, 500, 0.001925, 3800000, 0.1, 4095, X'DEEDBEEF'] ---------- AST ------------ Array { span: Some( @@ -219,8 +219,13 @@ Array { span: Some( 58..69, ), - value: UInt64( - 3740122863, + value: Binary( + [ + 222, + 237, + 190, + 239, + ], ), }, ], @@ -262,17 +267,31 @@ Literal { ---------- Input ---------- x'123456789012345678901234567890' ---------- Output --------- -94522879687365475552814062743484560 +X'123456789012345678901234567890' ---------- AST ------------ Literal { span: Some( 0..33, ), - value: Decimal256 { - value: 94522879687365475552814062743484560, - precision: 76, - scale: 0, - }, + value: Binary( + [ + 18, + 52, + 86, + 120, + 144, + 18, + 52, + 86, + 120, + 144, + 18, + 52, + 86, + 120, + 144, + ], + ), } @@ -895,7 +914,7 @@ BinaryOp { ---------- Input ---------- 0XFF + 0xff + 0xa + x'ffff' ---------- Output --------- -255 + 255 + 10 + 65535 +255 + 255 + 10 + X'FFFF' ---------- AST ------------ BinaryOp { span: Some( @@ -942,8 +961,11 @@ BinaryOp { span: Some( 20..27, ), - value: UInt64( - 65535, + value: Binary( + [ + 255, + 255, + ], ), }, } diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs index 6bae051bec815..c241c6175adbc 100644 --- a/src/query/sql/src/planner/semantic/type_check.rs +++ b/src/query/sql/src/planner/semantic/type_check.rs @@ -5471,6 +5471,7 @@ impl<'a> TypeChecker<'a> { DecimalSize::new_unchecked(*precision, *scale), )), Literal::Float64(float) => Scalar::Number(NumberScalar::Float64((*float).into())), + Literal::Binary(bytes) => Scalar::Binary(bytes.clone()), Literal::String(string) => Scalar::String(string.clone()), Literal::Boolean(boolean) => Scalar::Boolean(*boolean), Literal::Null => Scalar::Null, @@ -5506,7 +5507,7 @@ impl<'a> TypeChecker<'a> { )), Literal::Float64(v) => Scalar::Number(NumberScalar::Float64((-*v).into())), Literal::Null => Scalar::Null, - Literal::String(_) | Literal::Boolean(_) => { + Literal::Binary(_) | Literal::String(_) | Literal::Boolean(_) => { return Err(ErrorCode::InvalidArgument(format!( "Invalid minus operator for {}", literal diff --git a/src/query/sql/test-support/src/expr_parser.rs b/src/query/sql/test-support/src/expr_parser.rs index 1cb1317efa9bc..7d10e1f9ea878 100644 --- a/src/query/sql/test-support/src/expr_parser.rs +++ b/src/query/sql/test-support/src/expr_parser.rs @@ -732,6 +732,7 @@ fn transform_literal(lit: ASTLiteral) -> Scalar { i256(value), DecimalSize::new_unchecked(precision, scale), )), + ASTLiteral::Binary(b) => Scalar::Binary(b), ASTLiteral::String(s) => Scalar::String(s), ASTLiteral::Boolean(b) => Scalar::Boolean(b), ASTLiteral::Null => Scalar::Null, diff --git a/tests/sqllogictests/suites/query/functions/binary_format.test b/tests/sqllogictests/suites/query/functions/binary_format.test index 0fe6b471328fb..d9b0ca142859c 100644 --- a/tests/sqllogictests/suites/query/functions/binary_format.test +++ b/tests/sqllogictests/suites/query/functions/binary_format.test @@ -1,6 +1,17 @@ statement ok drop table if exists fmt_bin +statement ok +set binary_output_format = 'hex' + +query TT +select typeof(X'ABCD'), X'ABCD' +---- +Binary ABCD + +statement error +select X'0A' + 1 + statement ok create table fmt_bin(id int, v binary)