From 7cb837a9b24887211de7db75acadefe703568fc8 Mon Sep 17 00:00:00 2001 From: comphead Date: Tue, 12 May 2026 18:31:03 -0700 Subject: [PATCH] feat: fix windows frame positive/neg overflows --- datafusion/expr/src/window_state.rs | 57 +++-- datafusion/sqllogictest/test_files/window.slt | 220 ++++++++++++++++++ 2 files changed, 254 insertions(+), 23 deletions(-) diff --git a/datafusion/expr/src/window_state.rs b/datafusion/expr/src/window_state.rs index d7da7a778b011..f8d4609d3690c 100644 --- a/datafusion/expr/src/window_state.rs +++ b/datafusion/expr/src/window_state.rs @@ -396,6 +396,11 @@ impl WindowFrameStateRange { length: usize, ) -> Result { let current_row_values = get_row_at_idx(range_columns, idx)?; + let search_start = if SIDE { + last_range.start + } else { + last_range.end + }; let end_range = if let Some(delta) = delta { let is_descending: bool = self .sort_options @@ -407,34 +412,40 @@ impl WindowFrameStateRange { })? .descending; - current_row_values - .iter() - .map(|value| { - if value.is_null() { - return Ok(value.clone()); + // On overflow the boundary exceeds the type's range and is + // effectively unbounded within the partition. Collapse to the + // partition edge rather than feeding `search_in_slice` a + // wrapped-around target: PRECEDING searches reach `search_start`, + // FOLLOWING searches reach `length`. + let unbounded_edge = if SEARCH_SIDE { search_start } else { length }; + let mut targets = Vec::with_capacity(current_row_values.len()); + for value in ¤t_row_values { + if value.is_null() { + targets.push(value.clone()); + continue; + } + let target = if SEARCH_SIDE == is_descending { + match value.add_checked(delta) { + Ok(v) => v, + Err(_) => return Ok(unbounded_edge), } - if SEARCH_SIDE == is_descending { - // TODO: Handle positive overflows. - value.add(delta) - } else if value.is_unsigned() && value < delta { - // NOTE: This gets a polymorphic zero without having long coercion code for ScalarValue. - // If we decide to implement a "default" construction mechanism for ScalarValue, - // change the following statement to use that. - value.sub(value) - } else { - // TODO: Handle negative overflows. - value.sub(delta) + } else if value.is_unsigned() && value < delta { + // NOTE: This gets a polymorphic zero without having long coercion code for ScalarValue. + // If we decide to implement a "default" construction mechanism for ScalarValue, + // change the following statement to use that. + value.sub(value)? + } else { + match value.sub_checked(delta) { + Ok(v) => v, + Err(_) => return Ok(unbounded_edge), } - }) - .collect::>>()? + }; + targets.push(target); + } + targets } else { current_row_values }; - let search_start = if SIDE { - last_range.start - } else { - last_range.end - }; let compare_fn = |current: &[ScalarValue], target: &[ScalarValue]| { let cmp = compare_rows(current, target, &self.sort_options)?; Ok(if SIDE { cmp.is_lt() } else { cmp.is_le() }) diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index 74c2e38baaad5..2a74660fe9fec 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -6236,6 +6236,226 @@ INNER JOIN issue_20194_t2 t2 ---- 6774502793 10040029 1 +# Regression tests for RANGE window frames whose value-offset boundary +# computation overflows the type's representable range. Previously these +# queries panicked in functions-window/src/nth_value.rs with +# "attempt to subtract with overflow" because the wrapped-around target +# produced a frame range where `end < start`. Both positive overflows +# (target above type MAX) and negative overflows (target below type MIN) +# must be treated as unbounded within the partition. + +############################################################################ +# Positive overflow: value + delta exceeds type MAX +############################################################################ + +# ASC + FOLLOWING: end bound wraps past i64::MAX. +query II +SELECT a, last_value(a) OVER (ORDER BY a RANGE BETWEEN CURRENT ROW AND 4 FOLLOWING) +FROM ( + SELECT 9223372036854775804 AS a + UNION ALL SELECT 9223372036854775805 + UNION ALL SELECT 9223372036854775806 +); +---- +9223372036854775804 9223372036854775806 +9223372036854775805 9223372036854775806 +9223372036854775806 9223372036854775806 + +query II +SELECT a, first_value(a) OVER (ORDER BY a RANGE BETWEEN CURRENT ROW AND 4 FOLLOWING) +FROM ( + SELECT 9223372036854775804 AS a + UNION ALL SELECT 9223372036854775805 + UNION ALL SELECT 9223372036854775806 +); +---- +9223372036854775804 9223372036854775804 +9223372036854775805 9223372036854775805 +9223372036854775806 9223372036854775806 + +# Symmetric PRECEDING/FOLLOWING where the FOLLOWING side overflows past MAX. +query II +SELECT a, last_value(a) OVER (ORDER BY a RANGE BETWEEN 4 PRECEDING AND 4 FOLLOWING) +FROM ( + SELECT 9223372036854775804 AS a + UNION ALL SELECT 9223372036854775805 + UNION ALL SELECT 9223372036854775806 +); +---- +9223372036854775804 9223372036854775806 +9223372036854775805 9223372036854775806 +9223372036854775806 9223372036854775806 + +# DESC + PRECEDING: "PRECEDING" walks toward larger values in DESC order, +# so offsetting past i64::MAX exercises the ADD-overflow path. +query II +SELECT a, first_value(a) OVER (ORDER BY a DESC RANGE BETWEEN 4 PRECEDING AND CURRENT ROW) +FROM ( + SELECT 9223372036854775804 AS a + UNION ALL SELECT 9223372036854775805 + UNION ALL SELECT 9223372036854775806 +); +---- +9223372036854775806 9223372036854775806 +9223372036854775805 9223372036854775806 +9223372036854775804 9223372036854775806 + +query II +SELECT a, last_value(a) OVER (ORDER BY a DESC RANGE BETWEEN 4 PRECEDING AND CURRENT ROW) +FROM ( + SELECT 9223372036854775804 AS a + UNION ALL SELECT 9223372036854775805 + UNION ALL SELECT 9223372036854775806 +); +---- +9223372036854775806 9223372036854775806 +9223372036854775805 9223372036854775805 +9223372036854775804 9223372036854775804 + +# Unsigned ordering column: add past u64::MAX must not wrap. +query II +SELECT a, last_value(a) OVER (ORDER BY a RANGE BETWEEN CURRENT ROW AND 4 FOLLOWING) +FROM ( + SELECT arrow_cast(18446744073709551612, 'UInt64') AS a + UNION ALL SELECT arrow_cast(18446744073709551613, 'UInt64') + UNION ALL SELECT arrow_cast(18446744073709551614, 'UInt64') +); +---- +18446744073709551612 18446744073709551614 +18446744073709551613 18446744073709551614 +18446744073709551614 18446744073709551614 + +query II +SELECT a, first_value(a) OVER (ORDER BY a RANGE BETWEEN CURRENT ROW AND 4 FOLLOWING) +FROM ( + SELECT arrow_cast(18446744073709551612, 'UInt64') AS a + UNION ALL SELECT arrow_cast(18446744073709551613, 'UInt64') + UNION ALL SELECT arrow_cast(18446744073709551614, 'UInt64') +); +---- +18446744073709551612 18446744073709551612 +18446744073709551613 18446744073709551613 +18446744073709551614 18446744073709551614 + +############################################################################ +# Negative overflow: value - delta falls below type MIN +############################################################################ + +# ASC + PRECEDING: start bound wraps below i64::MIN. +query II +SELECT a, first_value(a) OVER (ORDER BY a RANGE BETWEEN 4 PRECEDING AND CURRENT ROW) +FROM ( + SELECT -9223372036854775807 AS a + UNION ALL SELECT -9223372036854775806 + UNION ALL SELECT -9223372036854775805 +); +---- +-9223372036854775807 -9223372036854775807 +-9223372036854775806 -9223372036854775807 +-9223372036854775805 -9223372036854775807 + +query II +SELECT a, last_value(a) OVER (ORDER BY a RANGE BETWEEN 4 PRECEDING AND CURRENT ROW) +FROM ( + SELECT -9223372036854775807 AS a + UNION ALL SELECT -9223372036854775806 + UNION ALL SELECT -9223372036854775805 +); +---- +-9223372036854775807 -9223372036854775807 +-9223372036854775806 -9223372036854775806 +-9223372036854775805 -9223372036854775805 + +# Symmetric PRECEDING/FOLLOWING where the PRECEDING side underflows past MIN. +query II +SELECT a, first_value(a) OVER (ORDER BY a RANGE BETWEEN 4 PRECEDING AND 4 FOLLOWING) +FROM ( + SELECT -9223372036854775807 AS a + UNION ALL SELECT -9223372036854775806 + UNION ALL SELECT -9223372036854775805 +); +---- +-9223372036854775807 -9223372036854775807 +-9223372036854775806 -9223372036854775807 +-9223372036854775805 -9223372036854775807 + +# DESC + FOLLOWING: "FOLLOWING" walks toward smaller values in DESC order, +# so offsetting past i64::MIN exercises the SUB-underflow path. +query II +SELECT a, last_value(a) OVER (ORDER BY a DESC RANGE BETWEEN CURRENT ROW AND 4 FOLLOWING) +FROM ( + SELECT -9223372036854775805 AS a + UNION ALL SELECT -9223372036854775806 + UNION ALL SELECT -9223372036854775807 +); +---- +-9223372036854775805 -9223372036854775807 +-9223372036854775806 -9223372036854775807 +-9223372036854775807 -9223372036854775807 + +query II +SELECT a, first_value(a) OVER (ORDER BY a DESC RANGE BETWEEN CURRENT ROW AND 4 FOLLOWING) +FROM ( + SELECT -9223372036854775805 AS a + UNION ALL SELECT -9223372036854775806 + UNION ALL SELECT -9223372036854775807 +); +---- +-9223372036854775805 -9223372036854775805 +-9223372036854775806 -9223372036854775806 +-9223372036854775807 -9223372036854775807 + +# Unsigned ordering column: subtracting an offset that would go below 0 +# must saturate to 0, not wrap to u64::MAX. +query II +SELECT a, first_value(a) OVER (ORDER BY a RANGE BETWEEN 4 PRECEDING AND CURRENT ROW) +FROM ( + SELECT arrow_cast(1, 'UInt64') AS a + UNION ALL SELECT arrow_cast(2, 'UInt64') + UNION ALL SELECT arrow_cast(3, 'UInt64') +); +---- +1 1 +2 1 +3 1 + +query II +SELECT a, last_value(a) OVER (ORDER BY a RANGE BETWEEN 4 PRECEDING AND CURRENT ROW) +FROM ( + SELECT arrow_cast(1, 'UInt64') AS a + UNION ALL SELECT arrow_cast(2, 'UInt64') + UNION ALL SELECT arrow_cast(3, 'UInt64') +); +---- +1 1 +2 2 +3 3 + +############################################################################ +# ROWS frame regression guard: huge offsets already saturate via +# saturating_sub / min(length), verify we keep that behavior. +############################################################################ + +query II +SELECT a, last_value(a) OVER (ORDER BY a ROWS BETWEEN CURRENT ROW AND 9223372036854775807 FOLLOWING) +FROM ( + SELECT 1 AS a UNION ALL SELECT 2 UNION ALL SELECT 3 +); +---- +1 3 +2 3 +3 3 + +query II +SELECT a, first_value(a) OVER (ORDER BY a ROWS BETWEEN 9223372036854775807 PRECEDING AND CURRENT ROW) +FROM ( + SELECT 1 AS a UNION ALL SELECT 2 UNION ALL SELECT 3 +); +---- +1 1 +2 1 +3 1 + # Config reset statement ok reset datafusion.execution.batch_size;