From 9b343b1cd03e84cf02fdb25c765ffb1312afff79 Mon Sep 17 00:00:00 2001 From: Remi Dettai Date: Fri, 22 May 2026 10:35:10 +0200 Subject: [PATCH] Fix parser --- .../src/java_date_time_format.rs | 1074 ++++++++++++++++- 1 file changed, 1056 insertions(+), 18 deletions(-) diff --git a/quickwit/quickwit-datetime/src/java_date_time_format.rs b/quickwit/quickwit-datetime/src/java_date_time_format.rs index a0d6c1cb0f5..bb224d94cfd 100644 --- a/quickwit/quickwit-datetime/src/java_date_time_format.rs +++ b/quickwit/quickwit-datetime/src/java_date_time_format.rs @@ -18,8 +18,8 @@ use std::sync::OnceLock; use time::error::{Format, TryFromParsed}; use time::format_description::modifier::{ - Day, Hour, Minute, Month as MonthModifier, Padding, Second, Subsecond, SubsecondDigits, - WeekNumber, WeekNumberRepr, Weekday, WeekdayRepr, Year, YearRepr, + Day, Hour, Minute, Month as MonthModifier, OffsetHour, OffsetMinute, Padding, Second, + Subsecond, SubsecondDigits, WeekNumber, WeekNumberRepr, Weekday, WeekdayRepr, Year, YearRepr, }; use time::format_description::{Component, OwnedFormatItem}; use time::parsing::Parsed; @@ -38,6 +38,7 @@ const JAVA_DATE_FORMAT_TOKENS: &[&str] = &[ "SSSS", "SSS", "SS", + "XXXXX", // ISO 8601 offset with colon or 'Z' "ZZ", "ww", "w[w]", @@ -99,11 +100,43 @@ fn build_zone_offset(_: &str) -> Option { .into_boxed_slice(); let offset_compound = OwnedFormatItem::Compound(offset_items); + // Offset in '+/-HH' format (abbreviated, hour only) + let offset_hour_only = OwnedFormatItem::Component(Component::OffsetHour(Default::default())); + Some(OwnedFormatItem::First( - vec![z_literal, offset_with_delimiter_compound, offset_compound].into_boxed_slice(), + vec![ + z_literal, + offset_with_delimiter_compound, + offset_compound, + offset_hour_only, + ] + .into_boxed_slice(), )) } +/// Build the ES specific formatting: always outputs '+HH:MM' format +/// +/// NOTE: Unfortunately we cannot have a conditional forwarding that replaces +/// +00:00 with 'Z' for UTC as ES does. We perform this replacement after the +/// formatting. +fn build_iso8601_zone_offset_for_formatting() -> Option { + // Configure OffsetHour to always show the sign + let mut offset_hour_mod = OffsetHour::default(); + offset_hour_mod.sign_is_mandatory = true; + offset_hour_mod.padding = Padding::Zero; + + let mut offset_minute_mod = OffsetMinute::default(); + offset_minute_mod.padding = Padding::Zero; + + let offset_with_delimiter_items: Box<[OwnedFormatItem]> = vec![ + OwnedFormatItem::Component(Component::OffsetHour(offset_hour_mod)), + OwnedFormatItem::Literal(Box::from(b":".as_ref())), + OwnedFormatItem::Component(Component::OffsetMinute(offset_minute_mod)), + ] + .into_boxed_slice(); + Some(OwnedFormatItem::Compound(offset_with_delimiter_items)) +} + // There is a `YearRepr::LastTwo` representation in the time crate, but the parser is unreliable, so // we only support `YearRepr::Full` for now. See also https://github.com/time-rs/time/issues/649. const fn year_item() -> Option { @@ -159,14 +192,37 @@ fn build_second_item(ptn: &str) -> Option { Some(OwnedFormatItem::Component(Component::Second(second))) } -fn build_fraction_of_second_item(_ptn: &str) -> Option { +fn build_fraction_of_second_item_for_parsing() -> Option { let mut subsecond: Subsecond = Default::default(); + // For parsing, use OneOrMore to accept variable precision subsecond.digits = SubsecondDigits::OneOrMore; Some(OwnedFormatItem::Component(Component::Subsecond(subsecond))) } +// Build fractional seconds with fixed precision based on pattern length +fn build_fraction_of_second_item_for_formatting(ptn: &str) -> Option { + use time::format_description::modifier::SubsecondDigits; + + let mut subsecond: Subsecond = Default::default(); + // Use pattern length to determine fixed precision for formatting + subsecond.digits = match ptn.len() { + 1 => SubsecondDigits::One, + 2 => SubsecondDigits::Two, + 3 => SubsecondDigits::Three, + 4 => SubsecondDigits::Four, + 5 => SubsecondDigits::Five, + 6 => SubsecondDigits::Six, + 7 => SubsecondDigits::Seven, + 8 => SubsecondDigits::Eight, + 9 => SubsecondDigits::Nine, + _ => SubsecondDigits::OneOrMore, + }; + Some(OwnedFormatItem::Component(Component::Subsecond(subsecond))) +} + fn parse_java_datetime_format_items_recursive( chars: &mut std::iter::Peekable, + for_formatting: bool, ) -> Result, String> { let mut items = Vec::new(); @@ -174,7 +230,8 @@ fn parse_java_datetime_format_items_recursive( match c { '[' => { chars.next(); - let optional_items = parse_java_datetime_format_items_recursive(chars)?; + let optional_items = + parse_java_datetime_format_items_recursive(chars, for_formatting)?; items.push(OwnedFormatItem::Optional(Box::new( OwnedFormatItem::Compound(optional_items.into_boxed_slice()), ))); @@ -198,7 +255,7 @@ fn parse_java_datetime_format_items_recursive( items.push(literal(literal_str.as_bytes())); } _ => { - if let Some(format_item) = match_java_date_format_token(chars)? { + if let Some(format_item) = match_java_date_format_token(chars, for_formatting)? { items.push(format_item); } else { // Treat as a literal character @@ -216,6 +273,7 @@ fn parse_java_datetime_format_items_recursive( // here https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-date-format.html fn match_java_date_format_token( chars: &mut std::iter::Peekable, + for_formatting: bool, ) -> Result, String> { if chars.peek().is_none() { return Ok(None); @@ -238,7 +296,18 @@ fn match_java_date_format_token( "mm" | "m" => build_minute_item(token), "ss" | "s" => build_second_item(token), "SSSSSSSSS" | "SSSSSSS" | "SSSSSS" | "SSSSS" | "SSSS" | "SSS" | "SS" | "S" => { - build_fraction_of_second_item(token) + if for_formatting { + build_fraction_of_second_item_for_formatting(token) + } else { + build_fraction_of_second_item_for_parsing() + } + } + "XXXXX" => { + if for_formatting { + build_iso8601_zone_offset_for_formatting() + } else { + return Err("XXXXX pattern is only supported for formatting.".to_string()); + } } "Z" => build_zone_offset(token), "ww" | "w[w]" | "w" => build_week_of_year_item(token), @@ -256,22 +325,22 @@ fn match_java_date_format_token( // Java date format it is mapped to, if any. // If the java_datetime_format is not an alias, it is expected to be a // java date time format and should be returned as is. -fn resolve_java_datetime_format_alias(java_datetime_format: &str) -> &str { +fn resolve_java_datetime_format_alias_for_parsing(java_datetime_format: &str) -> &str { static JAVA_DATE_FORMAT_ALIASES: OnceLock> = OnceLock::new(); let java_datetime_format_map = JAVA_DATE_FORMAT_ALIASES.get_or_init(|| { let mut m = HashMap::new(); m.insert( "date_optional_time", - "yyyy[-MM[-dd['T'HH[:mm[:ss[.SSS][Z]]]]]]", + "yyyy[-MM[-dd['T'HH[Z][:mm[Z][:ss[.SSS][Z]]]]]]", ); m.insert( "strict_date_optional_time", - "yyyy[-MM[-dd['T'HH[:mm[:ss[.SSS][Z]]]]]]", + "yyyy[-MM[-dd['T'HH[Z][:mm[Z][:ss[.SSS][Z]]]]]]", ); m.insert( "strict_date_optional_time_nanos", - "yyyy[-MM[-dd['T'HH[:mm[:ss[.SSSSSS][Z]]]]]]", + "yyyy[-MM[-dd['T'HH[Z][:mm[Z][:ss[.SSSSSS][Z]]]]]]", ); m.insert("basic_date", "yyyyMMdd"); @@ -297,18 +366,69 @@ fn resolve_java_datetime_format_alias(java_datetime_format: &str) -> &str { .unwrap_or(java_datetime_format) } +fn resolve_java_datetime_format_alias_for_formatting(java_datetime_format: &str) -> &str { + static JAVA_DATE_FORMAT_ALIASES_FORMATTING: OnceLock> = + OnceLock::new(); + let java_datetime_format_map = JAVA_DATE_FORMAT_ALIASES_FORMATTING.get_or_init(|| { + let mut m = HashMap::new(); + // For strict_date_optional_time, format with full date-time and milliseconds + m.insert( + "strict_date_optional_time", + "yyyy-MM-dd'T'HH:mm:ss.SSSXXXXX", + ); + // For strict_date_optional_time_nanos, format with full date-time and nanoseconds + m.insert( + "strict_date_optional_time_nanos", + "yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSSXXXXX", + ); + // date_optional_time uses the same format as strict variant + m.insert("date_optional_time", "yyyy-MM-dd'T'HH:mm:ss.SSSXXXXX"); + // Other formats that don't have complex optional structures can use their parse patterns + m.insert("basic_date", "yyyyMMdd"); + m.insert("strict_basic_week_date", "xxxx'W'wwe"); + m.insert("basic_week_date", "xxxx'W'wwe"); + m.insert( + "strict_basic_week_date_time", + "xxxx'W'wwe'T'HHmmss.SSSXXXXX", + ); + m.insert("basic_week_date_time", "xxxx'W'wwe'T'HHmmss.SSSXXXXX"); + m.insert( + "strict_basic_week_date_time_no_millis", + "xxxx'W'wwe'T'HHmmssXXXXX", + ); + m.insert("basic_week_date_time_no_millis", "xxxx'W'wwe'T'HHmmssXXXXX"); + m.insert("strict_week_date", "xxxx-'W'ww-e"); + m.insert("week_date", "xxxx-'W'ww-e"); + m + }); + java_datetime_format_map + .get(java_datetime_format) + .copied() + .unwrap_or(java_datetime_format) +} + /// A date time parser that holds the format specification `Vec`. #[derive(Clone)] pub struct StrptimeParser { pub(crate) strptime_format: String, items: Box<[OwnedFormatItem]>, + format_items: Box<[OwnedFormatItem]>, } pub fn parse_java_datetime_format_items( java_datetime_format: &str, ) -> Result, String> { let mut chars = java_datetime_format.chars().peekable(); - let items = parse_java_datetime_format_items_recursive(&mut chars)?; + let items = parse_java_datetime_format_items_recursive(&mut chars, false)?; + Ok(items.into_boxed_slice()) +} + +// Parse format items with fixed precision for formatting output +fn parse_java_datetime_format_items_for_formatting( + java_datetime_format: &str, +) -> Result, String> { + let mut chars = java_datetime_format.chars().peekable(); + let items = parse_java_datetime_format_items_recursive(&mut chars, true)?; Ok(items.into_boxed_slice()) } @@ -326,9 +446,21 @@ impl StrptimeParser { date_time_str: &str, default_offset: UtcOffset, ) -> Result { + // Elasticsearch/OpenSearch support comma as a decimal separator for fractional seconds + // (in addition to period). The time crate only supports period, so we normalize + // comma to period before parsing. This is safe because comma doesn't appear in + // other parts of the format. + let normalized_date_time_str; + let date_time_str_to_parse = if date_time_str.contains(',') { + normalized_date_time_str = date_time_str.replace(',', "."); + &normalized_date_time_str + } else { + date_time_str + }; + let mut parsed = Parsed::new(); if !parsed - .parse_items(date_time_str.as_bytes(), &self.items) + .parse_items(date_time_str_to_parse.as_bytes(), &self.items) .map_err(|err| err.to_string())? .is_empty() { @@ -375,7 +507,15 @@ impl StrptimeParser { } pub fn format_date_time(&self, date_time: &OffsetDateTime) -> Result { - date_time.format(&self.items) + let mut formatted = date_time.format(&self.format_items)?; + // For ES/Java ISO 8601 compatibility: replace '+00:00' with 'Z' for UTC. + // The time crate doesn't support conditional 'Z' in format items, so we handle it manually + // here. Since the offset is always a suffix, we can efficiently truncate and append. + if date_time.offset() == UtcOffset::UTC && formatted.ends_with("+00:00") { + formatted.truncate(formatted.len() - 6); + formatted.push('Z'); + } + Ok(formatted) } pub fn from_strptime(strptime_format: &str) -> Result { @@ -385,21 +525,43 @@ impl StrptimeParser { .map(|item| item.into()) .collect::>() .into_boxed_slice(); - Ok(StrptimeParser::new(strptime_format.to_string(), items)) + // For strptime, use the same items for both parsing and formatting + let format_items = items.clone(); + Ok(StrptimeParser::new( + strptime_format.to_string(), + items, + format_items, + )) } pub fn from_java_datetime_format(java_datetime_format: &str) -> Result { let java_datetime_format_resolved = - resolve_java_datetime_format_alias(java_datetime_format); + resolve_java_datetime_format_alias_for_parsing(java_datetime_format); let items: Box<[OwnedFormatItem]> = parse_java_datetime_format_items(java_datetime_format_resolved)?; - Ok(StrptimeParser::new(java_datetime_format.to_string(), items)) + + // Get format-specific pattern and create format items + let java_datetime_format_for_formatting = + resolve_java_datetime_format_alias_for_formatting(java_datetime_format); + let format_items: Box<[OwnedFormatItem]> = + parse_java_datetime_format_items_for_formatting(java_datetime_format_for_formatting)?; + + Ok(StrptimeParser::new( + java_datetime_format.to_string(), + items, + format_items, + )) } - fn new(strptime_format: String, items: Box<[OwnedFormatItem]>) -> Self { + fn new( + strptime_format: String, + items: Box<[OwnedFormatItem]>, + format_items: Box<[OwnedFormatItem]>, + ) -> Self { StrptimeParser { strptime_format, items, + format_items, } } } @@ -775,3 +937,879 @@ mod tests { ); } } + +/// Tests ported from Elasticsearch's `DateFormattersTests.java` to ensure maximum +/// compatibility with their date parsing behavior. +#[cfg(test)] +mod tests_parsing_ported_from_es { + use time::macros::datetime; + + use super::*; + + #[test] + fn test_strict_date_optional_time_comprehensive() { + let parser = + StrptimeParser::from_java_datetime_format("strict_date_optional_time").unwrap(); + + // Comprehensive test cases from Elasticsearch + let test_cases = [ + // Date only + ("2018-12-31", datetime!(2018-12-31 00:00:00 UTC)), + // Date with time + ("2010-01-05T02:00", datetime!(2010-01-05 02:00:00 UTC)), + ("2018-12-31T10:15:30", datetime!(2018-12-31 10:15:30 UTC)), + // With UTC timezone + ("2018-12-31T10:15:30Z", datetime!(2018-12-31 10:15:30 UTC)), + ("2015-01-04T00:00Z", datetime!(2015-01-04 00:00:00 UTC)), + // With numeric timezones (compact) + ("2016-11-30T00+01", datetime!(2016-11-30 00:00:00 +01:00:00)), + ( + "2016-11-30T00+0100", + datetime!(2016-11-30 00:00:00 +01:00:00), + ), + ( + "2018-12-31T10:15:30+0100", + datetime!(2018-12-31 10:15:30 +01:00:00), + ), + // With numeric timezones (colon-separated) + ( + "2016-11-30T00+01:00", + datetime!(2016-11-30 00:00:00 +01:00:00), + ), + ( + "2018-12-31T10:15:30+01:00", + datetime!(2018-12-31 10:15:30 +01:00:00), + ), + // With milliseconds + ( + "2018-12-31T10:15:30.1Z", + datetime!(2018-12-31 10:15:30.1 UTC), + ), + ( + "2018-12-31T10:15:30.123Z", + datetime!(2018-12-31 10:15:30.123 UTC), + ), + ( + "2018-12-31T10:15:30.1+0100", + datetime!(2018-12-31 10:15:30.1 +01:00:00), + ), + ( + "2018-12-31T10:15:30.123+0100", + datetime!(2018-12-31 10:15:30.123 +01:00:00), + ), + ( + "2018-12-31T10:15:30.123+01:00", + datetime!(2018-12-31 10:15:30.123 +01:00:00), + ), + // Partial dates + ("2001", datetime!(2001-01-01 00:00:00 UTC)), + ("2001-01", datetime!(2001-01-01 00:00:00 UTC)), + ("2001-01-01", datetime!(2001-01-01 00:00:00 UTC)), + ]; + + for (input, expected) in test_cases.iter() { + let parsed = parser + .parse_date_time(input) + .unwrap_or_else(|err| panic!("failed to parse {input}: {err}")); + assert_eq!(parsed, *expected, "mismatch for input: {input}"); + } + } + + #[test] + fn test_strict_date_optional_time_nanos_comprehensive() { + let parser = + StrptimeParser::from_java_datetime_format("strict_date_optional_time_nanos").unwrap(); + + // Comprehensive test cases with nanosecond precision + let test_cases = [ + // From Elasticsearch test suite + ( + "2016-01-01T00:00:00.000", + datetime!(2016-01-01 00:00:00.0 UTC), + ), + ("2018-05-15T17:14:56", datetime!(2018-05-15 17:14:56 UTC)), + ("2018-05-15T17:14:56Z", datetime!(2018-05-15 17:14:56 UTC)), + ( + "2018-05-15T17:14:56+0100", + datetime!(2018-05-15 17:14:56 +01:00:00), + ), + ( + "2018-05-15T17:14:56+01:00", + datetime!(2018-05-15 17:14:56 +01:00:00), + ), + ( + "2022-12-16T10:00:57.149001+00:00", + datetime!(2022-12-16 10:00:57.149001 UTC), + ), + ( + "2018-05-15T17:14:56.123456789+0100", + datetime!(2018-05-15 17:14:56.123456789 +01:00:00), + ), + ( + "2018-05-15T17:14:56.123456789+01:00", + datetime!(2018-05-15 17:14:56.123456789 +01:00:00), + ), + // Fractional second precision tests (1-9 digits) + ( + "2019-05-06T14:52:37.1Z", + datetime!(2019-05-06 14:52:37.1 UTC), + ), + ( + "2019-05-06T14:52:37.12Z", + datetime!(2019-05-06 14:52:37.12 UTC), + ), + ( + "2019-05-06T14:52:37.123Z", + datetime!(2019-05-06 14:52:37.123 UTC), + ), + ( + "2019-05-06T14:52:37.1234Z", + datetime!(2019-05-06 14:52:37.1234 UTC), + ), + ( + "2019-05-06T14:52:37.12345Z", + datetime!(2019-05-06 14:52:37.12345 UTC), + ), + ( + "2019-05-06T14:52:37.123456Z", + datetime!(2019-05-06 14:52:37.123456 UTC), + ), + ( + "2019-05-06T14:52:37.1234567Z", + datetime!(2019-05-06 14:52:37.1234567 UTC), + ), + ( + "2019-05-06T14:52:37.12345678Z", + datetime!(2019-05-06 14:52:37.12345678 UTC), + ), + ( + "2019-05-06T14:52:37.123456789Z", + datetime!(2019-05-06 14:52:37.123456789 UTC), + ), + // Edge case: 1 nanosecond + ( + "1970-01-01T00:00:00.000000001", + datetime!(1970-01-01 00:00:00.000000001 UTC), + ), + ]; + + for (input, expected) in test_cases.iter() { + let parsed = parser + .parse_date_time(input) + .unwrap_or_else(|err| panic!("failed to parse {input}: {err}")); + assert_eq!(parsed, *expected, "mismatch for input: {input}"); + } + } + + // Additional comprehensive tests including time variations + #[test] + fn test_strict_date_optional_time_variations() { + let parser = + StrptimeParser::from_java_datetime_format("strict_date_optional_time").unwrap(); + + let test_cases = [ + // Time without timezone + ("2016-11-30T12", datetime!(2016-11-30 12:00:00 UTC)), + ("2016-11-30T12:00", datetime!(2016-11-30 12:00:00 UTC)), + ("2016-11-30T12:00:00", datetime!(2016-11-30 12:00:00 UTC)), + ( + "2016-11-30T12:00:00.000", + datetime!(2016-11-30 12:00:00.0 UTC), + ), + // Hour with timezone (abbreviated formats) + ("2016-11-30T12+01", datetime!(2016-11-30 12:00:00 +01:00:00)), + ( + "2016-11-30T12+0100", + datetime!(2016-11-30 12:00:00 +01:00:00), + ), + ( + "2016-11-30T12+01:00", + datetime!(2016-11-30 12:00:00 +01:00:00), + ), + // Hour:minute with timezone + ( + "2016-11-30T12:00+01", + datetime!(2016-11-30 12:00:00 +01:00:00), + ), + ( + "2016-11-30T12:00+0100", + datetime!(2016-11-30 12:00:00 +01:00:00), + ), + ( + "2016-11-30T12:00+01:00", + datetime!(2016-11-30 12:00:00 +01:00:00), + ), + // Full time with timezone + ( + "2016-11-30T12:00:00+01", + datetime!(2016-11-30 12:00:00 +01:00:00), + ), + ( + "2016-11-30T12:00:00+0100", + datetime!(2016-11-30 12:00:00 +01:00:00), + ), + ( + "2016-11-30T12:00:00+01:00", + datetime!(2016-11-30 12:00:00 +01:00:00), + ), + // Milliseconds with timezone + ( + "2016-11-30T12:00:00.000+01", + datetime!(2016-11-30 12:00:00.0 +01:00:00), + ), + ( + "2016-11-30T12:00:00.000+0100", + datetime!(2016-11-30 12:00:00.0 +01:00:00), + ), + ( + "2016-11-30T12:00:00.000+01:00", + datetime!(2016-11-30 12:00:00.0 +01:00:00), + ), + ]; + + for (input, expected) in test_cases.iter() { + let parsed = parser + .parse_date_time(input) + .unwrap_or_else(|err| panic!("failed to parse {input}: {err}")); + assert_eq!(parsed, *expected, "mismatch for input: {input}"); + } + } + + // Test error cases - strict parsing should reject malformed inputs + #[test] + fn test_strict_date_optional_time_error_cases() { + let parser = + StrptimeParser::from_java_datetime_format("strict_date_optional_time").unwrap(); + + // These should all fail to parse + let error_cases = [ + // Timezone without time component + "2016-11-30T+01", + // Non-zero-padded time components + "2018-12-31T9:15:30", // hour not padded + "2018-12-31T10:5:30", // minute not padded + "2018-12-31T10:15:3", // second not padded + // Non-zero-padded date components + "2018-12-1", // day not padded + "2018-1-31", // month not padded + // 5-digit year (out of range) + "10000-01-31", + ]; + + for input in error_cases.iter() { + assert!( + parser.parse_date_time(input).is_err(), + "Expected parsing to fail for input: {input}" + ); + } + } + + #[test] + fn test_strict_date_optional_time_fractional_seconds() { + // the difference between strict_date_optional_time and + // strict_date_optional_time_nanos is formatting, not parsing, ES + // accepts all precisions even with strict_date_optional_time (see + // testFractionalSeconds in ES's DateFormattersTests.java) + let parser = + StrptimeParser::from_java_datetime_format("strict_date_optional_time").unwrap(); + + // Test various fractional second precisions (1-9 digits) + let test_cases = [ + ("2019-05-06T14:52:37.1Z", 100_000_000), + ("2019-05-06T14:52:37.12Z", 120_000_000), + ("2019-05-06T14:52:37.123Z", 123_000_000), + ("2019-05-06T14:52:37.1234Z", 123_400_000), + ("2019-05-06T14:52:37.12345Z", 123_450_000), + ("2019-05-06T14:52:37.123456Z", 123_456_000), + ("2019-05-06T14:52:37.1234567Z", 123_456_700), + ("2019-05-06T14:52:37.12345678Z", 123_456_780), + ("2019-05-06T14:52:37.123456789Z", 123_456_789), + ]; + + for (input, expected_nanos) in test_cases.iter() { + let parsed = parser + .parse_date_time(input) + .unwrap_or_else(|err| panic!("failed to parse {input}: {err}")); + assert_eq!( + parsed.nanosecond(), + *expected_nanos, + "mismatch for input: {input}" + ); + } + } + + // Test decimal point variations (comma vs period) + #[test] + fn test_decimal_point_parsing() { + let parser = + StrptimeParser::from_java_datetime_format("strict_date_optional_time").unwrap(); + + // Period as decimal separator (standard) + let result = parser.parse_date_time("2001-01-01T00:00:00.123Z"); + assert!(result.is_ok(), "Failed to parse with period separator"); + assert_eq!(result.unwrap(), datetime!(2001-01-01 00:00:00.123 UTC)); + + // Comma as decimal separator (some locales) + // Elasticsearch/OpenSearch support both period and comma + let result = parser.parse_date_time("2001-01-01T00:00:00,123Z"); + assert!(result.is_ok(), "Failed to parse with comma separator"); + assert_eq!(result.unwrap(), datetime!(2001-01-01 00:00:00.123 UTC)); + + // Test comma with different precision levels + let test_cases = [ + ( + "2019-05-06T14:52:37,1Z", + datetime!(2019-05-06 14:52:37.1 UTC), + ), + ( + "2019-05-06T14:52:37,12Z", + datetime!(2019-05-06 14:52:37.12 UTC), + ), + ( + "2019-05-06T14:52:37,123Z", + datetime!(2019-05-06 14:52:37.123 UTC), + ), + ( + "2018-12-31T10:15:30,123+01:00", + datetime!(2018-12-31 10:15:30.123 +01:00:00), + ), + ]; + + for (input, expected) in test_cases.iter() { + let parsed = parser + .parse_date_time(input) + .unwrap_or_else(|err| panic!("failed to parse {input}: {err}")); + assert_eq!(parsed, *expected, "mismatch for input: {input}"); + } + } + + #[test] + fn test_basic_date() { + let parser = StrptimeParser::from_java_datetime_format("basic_date").unwrap(); + + let test_cases = [ + ("20181126", datetime!(2018-11-26 00:00:00 UTC)), + ("20210101", datetime!(2021-01-01 00:00:00 UTC)), + ("19991231", datetime!(1999-12-31 00:00:00 UTC)), + ]; + + for (input, expected) in test_cases.iter() { + let parsed = parser + .parse_date_time(input) + .unwrap_or_else(|err| panic!("failed to parse {input}: {err}")); + assert_eq!(parsed, *expected, "mismatch for input: {input}"); + } + } + + #[test] + fn test_week_date_formats() { + // basic_week_date with 4-digit years + let basic_parser = StrptimeParser::from_java_datetime_format("basic_week_date").unwrap(); + + let basic_cases = [ + ("2018W313", datetime!(2018-08-02 00:00:00 UTC)), + ("2024W011", datetime!(2024-01-02 00:00:00 UTC)), + ]; + + for (input, expected) in basic_cases.iter() { + let parsed = basic_parser + .parse_date_time(input) + .unwrap_or_else(|err| panic!("failed to parse {input}: {err}")); + assert_eq!(parsed, *expected, "mismatch for input: {input}"); + } + + // strict_basic_week_date requires exactly 4-digit year + let strict_parser = + StrptimeParser::from_java_datetime_format("strict_basic_week_date").unwrap(); + + assert!(strict_parser.parse_date_time("2018W313").is_ok()); + assert!(strict_parser.parse_date_time("2024W011").is_ok()); + assert!(strict_parser.parse_date_time("18W313").is_err()); + + // ES allows 1-2 digit years for basic_week_date but our implementation + // currently requires 4 digits like the strict variant + // TODO: implement flexible year parsing to match ES behavior for: + // - "1W313" (1-digit year) + // - "18W313" (2-digit year) + } + + #[test] + fn test_week_date_time_formats() { + let basic_parser = + StrptimeParser::from_java_datetime_format("basic_week_date_time").unwrap(); + + let test_cases = [ + ("2018W313T121212.1Z", datetime!(2018-08-02 12:12:12.1 UTC)), + ( + "2018W313T121212.1+0100", + datetime!(2018-08-02 12:12:12.1 +01:00:00), + ), + ( + "2018W313T121212.123Z", + datetime!(2018-08-02 12:12:12.123 UTC), + ), + ( + "2018W313T121212.123456789Z", + datetime!(2018-08-02 12:12:12.123456789 UTC), + ), + ( + "2018W313T121212.123+0100", + datetime!(2018-08-02 12:12:12.123 +01:00:00), + ), + ( + "2018W313T121212.123+01:00", + datetime!(2018-08-02 12:12:12.123 +01:00:00), + ), + ]; + + for (input, expected) in test_cases.iter() { + let parsed = basic_parser + .parse_date_time(input) + .unwrap_or_else(|err| panic!("failed to parse {input}: {err}")); + assert_eq!(parsed, *expected, "mismatch for input: {input}"); + } + + // Test strict variant + let strict_parser = + StrptimeParser::from_java_datetime_format("strict_basic_week_date_time").unwrap(); + + // Additional test case from ES testStrictParsing + let parsed = strict_parser + .parse_date_time("2018W313T121212.1+01:00") + .unwrap(); + assert_eq!(parsed, datetime!(2018-08-02 12:12:12.1 +01:00:00)); + + for (input, expected) in test_cases.iter() { + let parsed = strict_parser + .parse_date_time(input) + .unwrap_or_else(|err| panic!("failed to parse {input}: {err}")); + assert_eq!(parsed, *expected, "mismatch for input: {input}"); + } + + // Test strict error cases - invalid time components + assert!( + strict_parser + .parse_date_time("2018W313T12128.123Z") + .is_err() + ); + assert!( + strict_parser + .parse_date_time("2018W313T81212.123Z") + .is_err() + ); + assert!( + strict_parser + .parse_date_time("2018W313T12812.123Z") + .is_err() + ); + assert!(strict_parser.parse_date_time("2018W313T12812.1Z").is_err()); + assert!( + strict_parser + .parse_date_time("2018W313T12128.123456789Z") + .is_err() + ); + } + + #[test] + fn test_week_date_time_no_millis() { + let basic_parser = + StrptimeParser::from_java_datetime_format("basic_week_date_time_no_millis").unwrap(); + + let test_cases = [ + ("2018W313T121212Z", datetime!(2018-08-02 12:12:12 UTC)), + ( + "2018W313T121212+0100", + datetime!(2018-08-02 12:12:12 +01:00:00), + ), + ( + "2018W313T121212+01:00", + datetime!(2018-08-02 12:12:12 +01:00:00), + ), + ]; + + for (input, expected) in test_cases.iter() { + let parsed = basic_parser + .parse_date_time(input) + .unwrap_or_else(|err| panic!("failed to parse {input}: {err}")); + assert_eq!(parsed, *expected, "mismatch for input: {input}"); + } + + // Test strict variant + let strict_parser = + StrptimeParser::from_java_datetime_format("strict_basic_week_date_time_no_millis") + .unwrap(); + + for (input, expected) in test_cases.iter() { + let parsed = strict_parser + .parse_date_time(input) + .unwrap_or_else(|err| panic!("failed to parse {input}: {err}")); + assert_eq!(parsed, *expected, "mismatch for input: {input}"); + } + + // Test strict error cases - invalid time components + assert!(strict_parser.parse_date_time("2018W313T12128Z").is_err()); + assert!(strict_parser.parse_date_time("2018W313T81212Z").is_err()); + assert!(strict_parser.parse_date_time("2018W313T12812Z").is_err()); + assert!( + strict_parser + .parse_date_time("2018W313T12128+0100") + .is_err() + ); + assert!( + strict_parser + .parse_date_time("2018W313T81212+01:00") + .is_err() + ); + assert!( + strict_parser + .parse_date_time("2018W313T12128+01:00") + .is_err() + ); + assert!( + strict_parser + .parse_date_time("2018W313T81212+0100") + .is_err() + ); + assert!( + strict_parser + .parse_date_time("2018W313T12812+0100") + .is_err() + ); + assert!( + strict_parser + .parse_date_time("2018W313T12812+01:00") + .is_err() + ); + } + + #[test] + fn test_strict_week_date_with_separators() { + let parser = StrptimeParser::from_java_datetime_format("strict_week_date").unwrap(); + + let test_cases = [ + ("2012-W48-6", datetime!(2012-12-02 00:00:00 UTC)), + ("2012-W01-6", datetime!(2012-01-08 00:00:00 UTC)), + ("2018-W31-3", datetime!(2018-08-02 00:00:00 UTC)), + ]; + + for (input, expected) in test_cases.iter() { + let parsed = parser + .parse_date_time(input) + .unwrap_or_else(|err| panic!("failed to parse {input}: {err}")); + assert_eq!(parsed, *expected, "mismatch for input: {input}"); + } + + // Test error cases - non-padded week + assert!(parser.parse_date_time("2012-W1-6").is_err()); + // Invalid day of week (should be 1-7) + assert!(parser.parse_date_time("2012-W01-8").is_err()); + // Both errors at once + assert!(parser.parse_date_time("2012-W1-8").is_err()); + } + + #[test] + fn test_week_date_non_strict() { + let parser = StrptimeParser::from_java_datetime_format("week_date").unwrap(); + + let test_cases = [ + ("2012-W48-6", datetime!(2012-12-02 00:00:00 UTC)), + ("2012-W01-6", datetime!(2012-01-08 00:00:00 UTC)), + ("2012-W1-6", datetime!(2012-01-08 00:00:00 UTC)), // Non-strict allows W1 + ]; + + for (input, expected) in test_cases.iter() { + let parsed = parser + .parse_date_time(input) + .unwrap_or_else(|err| panic!("failed to parse {input}: {err}")); + assert_eq!(parsed, *expected, "mismatch for input: {input}"); + } + + // Invalid day of week should still fail + assert!(parser.parse_date_time("2012-W1-8").is_err()); + } + + // Test date_optional_time format (non-strict variant) + #[test] + fn test_date_optional_time() { + let parser = StrptimeParser::from_java_datetime_format("date_optional_time").unwrap(); + + let test_cases = [ + // Date only formats + ("2018-05", datetime!(2018-05-01 00:00:00 UTC)), + ("2018-05-30", datetime!(2018-05-30 00:00:00 UTC)), + // With time components (no timezone) + ("2018-05-30T20", datetime!(2018-05-30 20:00:00 UTC)), + ("2018-05-30T20:21", datetime!(2018-05-30 20:21:00 UTC)), + ("2018-05-30T20:21:23", datetime!(2018-05-30 20:21:23 UTC)), + // With fractional seconds (no timezone) + ( + "2018-05-30T20:21:23.1", + datetime!(2018-05-30 20:21:23.1 UTC), + ), + ( + "2018-05-30T20:21:23.123", + datetime!(2018-05-30 20:21:23.123 UTC), + ), + ( + "2018-05-30T20:21:23.123456789", + datetime!(2018-05-30 20:21:23.123456789 UTC), + ), + // With timezone + ( + "2018-05-30T20:21:23.123Z", + datetime!(2018-05-30 20:21:23.123 UTC), + ), + ( + "2018-05-30T20:21:23.123456789Z", + datetime!(2018-05-30 20:21:23.123456789 UTC), + ), + ( + "2018-05-30T20:21:23.1+0100", + datetime!(2018-05-30 20:21:23.1 +01:00:00), + ), + ( + "2018-05-30T20:21:23.123+0100", + datetime!(2018-05-30 20:21:23.123 +01:00:00), + ), + ( + "2018-05-30T20:21:23.1+01:00", + datetime!(2018-05-30 20:21:23.1 +01:00:00), + ), + ( + "2018-05-30T20:21:23.123+01:00", + datetime!(2018-05-30 20:21:23.123 +01:00:00), + ), + // Padded time components + ("2018-12-31T10:15:30", datetime!(2018-12-31 10:15:30 UTC)), + ]; + + for (input, expected) in test_cases.iter() { + let parsed = parser + .parse_date_time(input) + .unwrap_or_else(|err| panic!("failed to parse {input}: {err}")); + assert_eq!(parsed, *expected, "mismatch for input: {input}"); + } + + // ES allows non-padded date and time components for date_optional_time but our + // current implementation uses strict padding (dd, HH, mm, ss require 2 digits) + // TODO: implement optional padding to match ES behavior for: + // - "2018-12-1" (non-padded day) + // - "2018-12-31T10:15:3" (non-padded second) + // - "2018-12-31T10:5:30" (non-padded minute) + // - "2018-12-31T1:15:30" (non-padded hour) + } +} + +/// Tests for formatting datetime to strings, ported from Elasticsearch's +/// `DateFormattersTests.java` to ensure maximum compatibility with their +/// date formatting behavior. +#[cfg(test)] +mod tests_formatting_ported_from_es { + use time::macros::datetime; + + use super::*; + + #[test] + fn test_strict_date_optional_time_formats_milliseconds() { + // From ES testMinNanos() and testMaxNanos() - strict_date_optional_time + // should format with 3 digits (milliseconds precision) + let formatter = + StrptimeParser::from_java_datetime_format("strict_date_optional_time").unwrap(); + + // Test with various nanosecond values + let test_cases = [ + // Full milliseconds + ( + datetime!(2019-02-08 11:43:00.123 UTC), + "2019-02-08T11:43:00.123Z", + ), + // Zero milliseconds should show .000 + ( + datetime!(2019-02-08 11:43:00.0 UTC), + "2019-02-08T11:43:00.000Z", + ), + // Partial milliseconds - should round/truncate to 3 digits + ( + datetime!(2019-02-08 11:43:00.1 UTC), + "2019-02-08T11:43:00.100Z", + ), + ( + datetime!(2019-02-08 11:43:00.12 UTC), + "2019-02-08T11:43:00.120Z", + ), + // With microseconds - should truncate to milliseconds + ( + datetime!(2019-02-08 11:43:00.123456 UTC), + "2019-02-08T11:43:00.123Z", + ), + // With nanoseconds - should truncate to milliseconds + ( + datetime!(2019-02-08 11:43:00.123456789 UTC), + "2019-02-08T11:43:00.123Z", + ), + ]; + + for (input, expected) in test_cases.iter() { + let formatted = formatter + .format_date_time(input) + .unwrap_or_else(|err| panic!("failed to format {input}: {err}")); + assert_eq!( + &formatted, expected, + "formatting mismatch for {input}: got {formatted}" + ); + } + } + + #[test] + fn test_strict_date_optional_time_nanos_formats_nanoseconds() { + // From ES testMinNanos() and testMaxNanos() - strict_date_optional_time_nanos + // should format with 9 digits (nanosecond precision) + let formatter = + StrptimeParser::from_java_datetime_format("strict_date_optional_time_nanos").unwrap(); + + // Test with various nanosecond values + let test_cases = [ + // Full nanoseconds + ( + datetime!(2019-02-08 11:43:00.123456789 UTC), + "2019-02-08T11:43:00.123456789Z", + ), + // Zero nanoseconds should show .000000000 + ( + datetime!(2019-02-08 11:43:00.0 UTC), + "2019-02-08T11:43:00.000000000Z", + ), + // Milliseconds only - should pad to 9 digits + ( + datetime!(2019-02-08 11:43:00.123 UTC), + "2019-02-08T11:43:00.123000000Z", + ), + // Microseconds - should pad to 9 digits + ( + datetime!(2019-02-08 11:43:00.123456 UTC), + "2019-02-08T11:43:00.123456000Z", + ), + // Partial nanoseconds + ( + datetime!(2019-02-08 11:43:00.1 UTC), + "2019-02-08T11:43:00.100000000Z", + ), + ( + datetime!(2019-02-08 11:43:00.12 UTC), + "2019-02-08T11:43:00.120000000Z", + ), + ]; + + for (input, expected) in test_cases.iter() { + let formatted = formatter + .format_date_time(input) + .unwrap_or_else(|err| panic!("failed to format {input}: {err}")); + assert_eq!( + &formatted, expected, + "formatting mismatch for {input}: got {formatted}" + ); + } + } + + #[test] + fn test_strict_date_optional_time_formats_with_timezone() { + let formatter = + StrptimeParser::from_java_datetime_format("strict_date_optional_time").unwrap(); + + let test_cases = [ + // UTC timezone + ( + datetime!(2018-12-31 10:15:30.123 UTC), + "2018-12-31T10:15:30.123Z", + ), + // Positive offset + ( + datetime!(2018-12-31 10:15:30.123 +01:00:00), + "2018-12-31T10:15:30.123+01:00", + ), + // Negative offset + ( + datetime!(2018-12-31 10:15:30.123 -05:00:00), + "2018-12-31T10:15:30.123-05:00", + ), + ]; + + for (input, expected) in test_cases.iter() { + let formatted = formatter + .format_date_time(input) + .unwrap_or_else(|err| panic!("failed to format {input}: {err}")); + assert_eq!( + &formatted, expected, + "formatting mismatch for {input}: got {formatted}" + ); + } + } + + #[test] + fn test_format_and_parse_roundtrip() { + // Verify that formatting and parsing are inverse operations + let formatter = + StrptimeParser::from_java_datetime_format("strict_date_optional_time").unwrap(); + + let test_datetimes = [ + datetime!(2018-12-31 10:15:30.123 UTC), + datetime!(2019-02-08 11:43:00.0 UTC), + datetime!(2020-01-01 00:00:00.001 UTC), + datetime!(2018-12-31 10:15:30.123 +01:00:00), + ]; + + for original in test_datetimes.iter() { + let formatted = formatter + .format_date_time(original) + .unwrap_or_else(|err| panic!("failed to format {original}: {err}")); + let parsed = formatter + .parse_date_time(&formatted) + .unwrap_or_else(|err| panic!("failed to parse {formatted}: {err}")); + assert_eq!( + parsed, *original, + "roundtrip failed for {original}: formatted as {formatted}, parsed as {parsed}" + ); + } + } + + #[test] + fn test_format_nanos_roundtrip() { + // Verify that formatting and parsing are inverse operations for nanos format + let formatter = + StrptimeParser::from_java_datetime_format("strict_date_optional_time_nanos").unwrap(); + + let test_datetimes = [ + datetime!(2018-12-31 10:15:30.123456789 UTC), + datetime!(2019-02-08 11:43:00.0 UTC), + datetime!(2020-01-01 00:00:00.000000001 UTC), + datetime!(2018-12-31 10:15:30.123456 UTC), + ]; + + for original in test_datetimes.iter() { + let formatted = formatter + .format_date_time(original) + .unwrap_or_else(|err| panic!("failed to format {original}: {err}")); + let parsed = formatter + .parse_date_time(&formatted) + .unwrap_or_else(|err| panic!("failed to parse {formatted}: {err}")); + assert_eq!( + parsed, *original, + "roundtrip failed for {original}: formatted as {formatted}, parsed as {parsed}" + ); + } + } + + #[test] + fn test_zero_millis_formatted_with_trailing_zeros() { + // From ES test0MillisAreFormatted() - even when milliseconds are zero, + // they should be formatted with .000 + let formatter = + StrptimeParser::from_java_datetime_format("strict_date_optional_time").unwrap(); + + let dt = datetime!(2019-02-08 11:43:00.0 UTC); + let formatted = formatter.format_date_time(&dt).unwrap(); + + // Should format as .000, not omit the fractional seconds + assert_eq!(formatted, "2019-02-08T11:43:00.000Z"); + } +}