From 2b79498579cd02c807f090bdb0ea435684eab626 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Thu, 14 May 2026 16:41:57 +0800 Subject: [PATCH 1/2] feat: add integer scalar to character conversion in format_string function - Implemented integer_scalar_to_char(&ScalarValue) in format_string.rs - Routed all integer %c cases through the new helper function - Preserved null handling and support for other integer formats --- .../src/function/string/format_string.rs | 65 ++++++++++++------- 1 file changed, 41 insertions(+), 24 deletions(-) diff --git a/datafusion/spark/src/function/string/format_string.rs b/datafusion/spark/src/function/string/format_string.rs index 84e29b4713a3c..6205d646e98fe 100644 --- a/datafusion/spark/src/function/string/format_string.rs +++ b/datafusion/spark/src/function/string/format_string.rs @@ -891,6 +891,23 @@ fn unsigned_to_char(value: u64) -> Result { codepoint_to_char(codepoint) } +/// Convert a non-null integer scalar to a [`char`] for the `%c` conversion. +fn integer_scalar_to_char(value: &ScalarValue) -> Result { + match value { + ScalarValue::Int8(Some(value)) => signed_to_char(*value as i64), + ScalarValue::Int16(Some(value)) => signed_to_char(*value as i64), + ScalarValue::Int32(Some(value)) => signed_to_char(*value as i64), + ScalarValue::Int64(Some(value)) => signed_to_char(*value), + ScalarValue::UInt8(Some(value)) => unsigned_to_char(*value as u64), + ScalarValue::UInt16(Some(value)) => unsigned_to_char(*value as u64), + ScalarValue::UInt32(Some(value)) => unsigned_to_char(*value as u64), + ScalarValue::UInt64(Some(value)) => unsigned_to_char(*value), + _ => datafusion_common::internal_err!( + "integer_scalar_to_char expects a non-null integer scalar, got {value:?}" + ), + } +} + impl ConversionSpecifier { /// Validates that the grouping separator flag is not used with scientific /// notation conversions, matching Java/Spark behavior which throws @@ -923,7 +940,7 @@ impl ConversionSpecifier { _ => self.format_boolean(string, value), }, - ScalarValue::Int8(value) => match (self.conversion_type, value) { + scalar @ ScalarValue::Int8(value) => match (self.conversion_type, value) { (ConversionType::DecInt, Some(value)) => { self.format_signed(string, *value as i64) } @@ -933,8 +950,8 @@ impl ConversionSpecifier { | ConversionType::OctInt, Some(value), ) => self.format_unsigned(string, (*value as u8) as u64), - (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => { - self.format_char(string, signed_to_char(*value as i64)?) + (ConversionType::CharLower | ConversionType::CharUpper, Some(_)) => { + self.format_char(string, integer_scalar_to_char(scalar)?) } ( ConversionType::StringLower | ConversionType::StringUpper, @@ -948,12 +965,12 @@ impl ConversionSpecifier { ) } }, - ScalarValue::Int16(value) => match (self.conversion_type, value) { + scalar @ ScalarValue::Int16(value) => match (self.conversion_type, value) { (ConversionType::DecInt, Some(value)) => { self.format_signed(string, *value as i64) } - (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => { - self.format_char(string, signed_to_char(*value as i64)?) + (ConversionType::CharLower | ConversionType::CharUpper, Some(_)) => { + self.format_char(string, integer_scalar_to_char(scalar)?) } ( ConversionType::HexIntLower @@ -973,7 +990,7 @@ impl ConversionSpecifier { ) } }, - ScalarValue::Int32(value) => match (self.conversion_type, value) { + scalar @ ScalarValue::Int32(value) => match (self.conversion_type, value) { (ConversionType::DecInt, Some(value)) => { self.format_signed(string, *value as i64) } @@ -983,8 +1000,8 @@ impl ConversionSpecifier { | ConversionType::OctInt, Some(value), ) => self.format_unsigned(string, (*value as u32) as u64), - (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => { - self.format_char(string, signed_to_char(*value as i64)?) + (ConversionType::CharLower | ConversionType::CharUpper, Some(_)) => { + self.format_char(string, integer_scalar_to_char(scalar)?) } ( ConversionType::StringLower | ConversionType::StringUpper, @@ -998,7 +1015,7 @@ impl ConversionSpecifier { ) } }, - ScalarValue::Int64(value) => match (self.conversion_type, value) { + scalar @ ScalarValue::Int64(value) => match (self.conversion_type, value) { (ConversionType::DecInt, Some(value)) => { self.format_signed(string, *value) } @@ -1008,8 +1025,8 @@ impl ConversionSpecifier { | ConversionType::OctInt, Some(value), ) => self.format_unsigned(string, *value as u64), - (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => { - self.format_char(string, signed_to_char(*value)?) + (ConversionType::CharLower | ConversionType::CharUpper, Some(_)) => { + self.format_char(string, integer_scalar_to_char(scalar)?) } ( ConversionType::StringLower | ConversionType::StringUpper, @@ -1023,7 +1040,7 @@ impl ConversionSpecifier { ) } }, - ScalarValue::UInt8(value) => match (self.conversion_type, value) { + scalar @ ScalarValue::UInt8(value) => match (self.conversion_type, value) { ( ConversionType::DecInt | ConversionType::HexIntLower @@ -1031,8 +1048,8 @@ impl ConversionSpecifier { | ConversionType::OctInt, Some(value), ) => self.format_unsigned(string, *value as u64), - (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => { - self.format_char(string, unsigned_to_char(*value as u64)?) + (ConversionType::CharLower | ConversionType::CharUpper, Some(_)) => { + self.format_char(string, integer_scalar_to_char(scalar)?) } ( ConversionType::StringLower | ConversionType::StringUpper, @@ -1046,7 +1063,7 @@ impl ConversionSpecifier { ) } }, - ScalarValue::UInt16(value) => match (self.conversion_type, value) { + scalar @ ScalarValue::UInt16(value) => match (self.conversion_type, value) { ( ConversionType::DecInt | ConversionType::HexIntLower @@ -1054,8 +1071,8 @@ impl ConversionSpecifier { | ConversionType::OctInt, Some(value), ) => self.format_unsigned(string, *value as u64), - (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => { - self.format_char(string, unsigned_to_char(*value as u64)?) + (ConversionType::CharLower | ConversionType::CharUpper, Some(_)) => { + self.format_char(string, integer_scalar_to_char(scalar)?) } ( ConversionType::StringLower | ConversionType::StringUpper, @@ -1069,7 +1086,7 @@ impl ConversionSpecifier { ) } }, - ScalarValue::UInt32(value) => match (self.conversion_type, value) { + scalar @ ScalarValue::UInt32(value) => match (self.conversion_type, value) { ( ConversionType::DecInt | ConversionType::HexIntLower @@ -1077,8 +1094,8 @@ impl ConversionSpecifier { | ConversionType::OctInt, Some(value), ) => self.format_unsigned(string, *value as u64), - (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => { - self.format_char(string, unsigned_to_char(*value as u64)?) + (ConversionType::CharLower | ConversionType::CharUpper, Some(_)) => { + self.format_char(string, integer_scalar_to_char(scalar)?) } ( ConversionType::StringLower | ConversionType::StringUpper, @@ -1092,7 +1109,7 @@ impl ConversionSpecifier { ) } }, - ScalarValue::UInt64(value) => match (self.conversion_type, value) { + scalar @ ScalarValue::UInt64(value) => match (self.conversion_type, value) { ( ConversionType::DecInt | ConversionType::HexIntLower @@ -1100,8 +1117,8 @@ impl ConversionSpecifier { | ConversionType::OctInt, Some(value), ) => self.format_unsigned(string, *value), - (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => { - self.format_char(string, unsigned_to_char(*value)?) + (ConversionType::CharLower | ConversionType::CharUpper, Some(_)) => { + self.format_char(string, integer_scalar_to_char(scalar)?) } ( ConversionType::StringLower | ConversionType::StringUpper, From c1d261523b991e181abe357fb57dd683dd3c88b4 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Thu, 14 May 2026 16:48:01 +0800 Subject: [PATCH 2/2] feat: refactor integer_scalar_to_char function - Updated function signature from integer_scalar_to_char(value) to integer_scalar_to_char(scalar) - Added a centralized non-null integer %c dispatch arm - Removed 8 redundant %c dispatch arms - Removed 8 scalar @ bindings --- .../src/function/string/format_string.rs | 61 ++++++++----------- 1 file changed, 26 insertions(+), 35 deletions(-) diff --git a/datafusion/spark/src/function/string/format_string.rs b/datafusion/spark/src/function/string/format_string.rs index 6205d646e98fe..03fbf1cb2293c 100644 --- a/datafusion/spark/src/function/string/format_string.rs +++ b/datafusion/spark/src/function/string/format_string.rs @@ -892,8 +892,8 @@ fn unsigned_to_char(value: u64) -> Result { } /// Convert a non-null integer scalar to a [`char`] for the `%c` conversion. -fn integer_scalar_to_char(value: &ScalarValue) -> Result { - match value { +fn integer_scalar_to_char(scalar: &ScalarValue) -> Result { + match scalar { ScalarValue::Int8(Some(value)) => signed_to_char(*value as i64), ScalarValue::Int16(Some(value)) => signed_to_char(*value as i64), ScalarValue::Int32(Some(value)) => signed_to_char(*value as i64), @@ -903,7 +903,7 @@ fn integer_scalar_to_char(value: &ScalarValue) -> Result { ScalarValue::UInt32(Some(value)) => unsigned_to_char(*value as u64), ScalarValue::UInt64(Some(value)) => unsigned_to_char(*value), _ => datafusion_common::internal_err!( - "integer_scalar_to_char expects a non-null integer scalar, got {value:?}" + "integer_scalar_to_char expects a non-null integer scalar, got {scalar:?}" ), } } @@ -940,7 +940,22 @@ impl ConversionSpecifier { _ => self.format_boolean(string, value), }, - scalar @ ScalarValue::Int8(value) => match (self.conversion_type, value) { + ScalarValue::Int8(Some(_)) + | ScalarValue::Int16(Some(_)) + | ScalarValue::Int32(Some(_)) + | ScalarValue::Int64(Some(_)) + | ScalarValue::UInt8(Some(_)) + | ScalarValue::UInt16(Some(_)) + | ScalarValue::UInt32(Some(_)) + | ScalarValue::UInt64(Some(_)) + if matches!( + self.conversion_type, + ConversionType::CharLower | ConversionType::CharUpper + ) => + { + self.format_char(string, integer_scalar_to_char(value)?) + } + ScalarValue::Int8(value) => match (self.conversion_type, value) { (ConversionType::DecInt, Some(value)) => { self.format_signed(string, *value as i64) } @@ -950,9 +965,6 @@ impl ConversionSpecifier { | ConversionType::OctInt, Some(value), ) => self.format_unsigned(string, (*value as u8) as u64), - (ConversionType::CharLower | ConversionType::CharUpper, Some(_)) => { - self.format_char(string, integer_scalar_to_char(scalar)?) - } ( ConversionType::StringLower | ConversionType::StringUpper, Some(value), @@ -965,13 +977,10 @@ impl ConversionSpecifier { ) } }, - scalar @ ScalarValue::Int16(value) => match (self.conversion_type, value) { + ScalarValue::Int16(value) => match (self.conversion_type, value) { (ConversionType::DecInt, Some(value)) => { self.format_signed(string, *value as i64) } - (ConversionType::CharLower | ConversionType::CharUpper, Some(_)) => { - self.format_char(string, integer_scalar_to_char(scalar)?) - } ( ConversionType::HexIntLower | ConversionType::HexIntUpper @@ -990,7 +999,7 @@ impl ConversionSpecifier { ) } }, - scalar @ ScalarValue::Int32(value) => match (self.conversion_type, value) { + ScalarValue::Int32(value) => match (self.conversion_type, value) { (ConversionType::DecInt, Some(value)) => { self.format_signed(string, *value as i64) } @@ -1000,9 +1009,6 @@ impl ConversionSpecifier { | ConversionType::OctInt, Some(value), ) => self.format_unsigned(string, (*value as u32) as u64), - (ConversionType::CharLower | ConversionType::CharUpper, Some(_)) => { - self.format_char(string, integer_scalar_to_char(scalar)?) - } ( ConversionType::StringLower | ConversionType::StringUpper, Some(value), @@ -1015,7 +1021,7 @@ impl ConversionSpecifier { ) } }, - scalar @ ScalarValue::Int64(value) => match (self.conversion_type, value) { + ScalarValue::Int64(value) => match (self.conversion_type, value) { (ConversionType::DecInt, Some(value)) => { self.format_signed(string, *value) } @@ -1025,9 +1031,6 @@ impl ConversionSpecifier { | ConversionType::OctInt, Some(value), ) => self.format_unsigned(string, *value as u64), - (ConversionType::CharLower | ConversionType::CharUpper, Some(_)) => { - self.format_char(string, integer_scalar_to_char(scalar)?) - } ( ConversionType::StringLower | ConversionType::StringUpper, Some(value), @@ -1040,7 +1043,7 @@ impl ConversionSpecifier { ) } }, - scalar @ ScalarValue::UInt8(value) => match (self.conversion_type, value) { + ScalarValue::UInt8(value) => match (self.conversion_type, value) { ( ConversionType::DecInt | ConversionType::HexIntLower @@ -1048,9 +1051,6 @@ impl ConversionSpecifier { | ConversionType::OctInt, Some(value), ) => self.format_unsigned(string, *value as u64), - (ConversionType::CharLower | ConversionType::CharUpper, Some(_)) => { - self.format_char(string, integer_scalar_to_char(scalar)?) - } ( ConversionType::StringLower | ConversionType::StringUpper, Some(value), @@ -1063,7 +1063,7 @@ impl ConversionSpecifier { ) } }, - scalar @ ScalarValue::UInt16(value) => match (self.conversion_type, value) { + ScalarValue::UInt16(value) => match (self.conversion_type, value) { ( ConversionType::DecInt | ConversionType::HexIntLower @@ -1071,9 +1071,6 @@ impl ConversionSpecifier { | ConversionType::OctInt, Some(value), ) => self.format_unsigned(string, *value as u64), - (ConversionType::CharLower | ConversionType::CharUpper, Some(_)) => { - self.format_char(string, integer_scalar_to_char(scalar)?) - } ( ConversionType::StringLower | ConversionType::StringUpper, Some(value), @@ -1086,7 +1083,7 @@ impl ConversionSpecifier { ) } }, - scalar @ ScalarValue::UInt32(value) => match (self.conversion_type, value) { + ScalarValue::UInt32(value) => match (self.conversion_type, value) { ( ConversionType::DecInt | ConversionType::HexIntLower @@ -1094,9 +1091,6 @@ impl ConversionSpecifier { | ConversionType::OctInt, Some(value), ) => self.format_unsigned(string, *value as u64), - (ConversionType::CharLower | ConversionType::CharUpper, Some(_)) => { - self.format_char(string, integer_scalar_to_char(scalar)?) - } ( ConversionType::StringLower | ConversionType::StringUpper, Some(value), @@ -1109,7 +1103,7 @@ impl ConversionSpecifier { ) } }, - scalar @ ScalarValue::UInt64(value) => match (self.conversion_type, value) { + ScalarValue::UInt64(value) => match (self.conversion_type, value) { ( ConversionType::DecInt | ConversionType::HexIntLower @@ -1117,9 +1111,6 @@ impl ConversionSpecifier { | ConversionType::OctInt, Some(value), ) => self.format_unsigned(string, *value), - (ConversionType::CharLower | ConversionType::CharUpper, Some(_)) => { - self.format_char(string, integer_scalar_to_char(scalar)?) - } ( ConversionType::StringLower | ConversionType::StringUpper, Some(value),