From 2e9f27be2c5cce9a4ae923ccc0a048f54f0d7bbc Mon Sep 17 00:00:00 2001 From: Rustam Nassyrov Date: Thu, 16 Apr 2026 22:01:36 +0000 Subject: [PATCH 1/4] Fix EliasFanoVec::rank off-by-start_index_lower in bin-search fallback `search_element_in_block::`'s binary-search fallback sets `cursor = final_bound + direction`, but `final_bound` is an ABSOLUTE position in `lower_vec` while the subsequent fallthrough return `start_index_lower + cursor` treats `cursor` as RELATIVE. That double-counts `start_index_lower`, so `rank(v)` can return a value `>= len()` for a `v` that is actually in the vec. Trigger: the query equals the last element of an EF upper-bucket of size > BIN_SEARCH_THRESHOLD (= 4). Found via property testing; a minimal 14-element case is pinned as `test_rank_last_element_of_large_bucket`. Downstream, this bogus rank fed into a parallel EF vec's `get_unchecked` caused an OOB panic at `bit_vec/mod.rs:1187` in production. Fix: subtract `start_index_lower` when reassigning `cursor`, so the fallthrough return still yields the correct absolute index. --- src/elias_fano/mod.rs | 9 ++++++-- src/elias_fano/tests.rs | 47 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/src/elias_fano/mod.rs b/src/elias_fano/mod.rs index eddf5b3..054d6de 100644 --- a/src/elias_fano/mod.rs +++ b/src/elias_fano/mod.rs @@ -381,9 +381,14 @@ impl EliasFanoVec { } } - // update the cursor because we use it for the final index calculation + // update the cursor because we use it for the final index calculation. + // `final_bound` is an ABSOLUTE position in `lower_vec`, while the + // fallthrough return `start_index_lower + cursor` treats `cursor` + // as RELATIVE to `start_index_lower`, so we subtract it here to + // avoid double-counting. if INDEX { - cursor = final_bound as isize + direction; + cursor = final_bound as isize + direction + - start_index_lower as isize; } break; } diff --git a/src/elias_fano/tests.rs b/src/elias_fano/tests.rs index b7b0d42..4965084 100644 --- a/src/elias_fano/tests.rs +++ b/src/elias_fano/tests.rs @@ -524,3 +524,50 @@ fn test_empty_ef_vec() { assert_eq!(ef.rank(3), 0); assert_eq!(ef.delta(0), None); } + +/// Regression test for a `rank` off-by-`start_index_lower` bug in the binary-search +/// fallback of `search_element_in_block::`. +/// +/// When the query equals the last element of an EF upper-bucket whose size is +/// `> BIN_SEARCH_THRESHOLD` (= 4), the fallthrough path sets +/// `cursor = final_bound + direction` where `final_bound` is ABSOLUTE, but the +/// final return expression `start_index_lower + cursor` treats `cursor` as RELATIVE. +/// The returned rank is then off by `start_index_lower`, which can exceed `len()`. +/// +/// The 14-element input below was shrunk by proptest: term ids at indices 5..9 all +/// map to upper-bucket 4 (size 5). `rank(term_at_idx_9)` should return 9 but +/// returned 14 (= len) before the fix. A similar 368-element input caused an +/// out-of-bounds panic in downstream code that used the bogus rank to index a +/// parallel EF vec. +#[test] +fn test_rank_last_element_of_large_bucket() { + let term_ids: Vec = vec![ + 78_331_002, + 513_675_053, + 636_305_051, + 862_020_831, + 940_175_489, + 1_185_270_276, + 1_190_131_203, + 1_307_243_738, + 1_317_580_511, + 1_370_826_467, + 2_629_427_158, + 3_318_559_395, + 3_497_149_084, + 3_949_162_615, + ]; + let ef = EliasFanoVec::from_slice(&term_ids); + + // Every member's rank must equal its index in the input. + for (i, &t) in term_ids.iter().enumerate() { + assert_eq!( + ef.rank(t) as usize, + i, + "rank({t}) returned wrong index (expected {i})" + ); + } + + // Specifically: the last element of bucket 4 (idx 9) used to return 14. + assert_eq!(ef.rank(1_370_826_467), 9); +} From b48c261852bd8a90fd15abd6c9e8ba4708256853 Mon Sep 17 00:00:00 2001 From: Rustam Nassyrov Date: Thu, 16 Apr 2026 22:22:42 +0000 Subject: [PATCH 2/4] trim verbose comments --- src/elias_fano/mod.rs | 7 ++----- src/elias_fano/tests.rs | 28 ++++------------------------ 2 files changed, 6 insertions(+), 29 deletions(-) diff --git a/src/elias_fano/mod.rs b/src/elias_fano/mod.rs index 054d6de..91eb4b9 100644 --- a/src/elias_fano/mod.rs +++ b/src/elias_fano/mod.rs @@ -381,11 +381,8 @@ impl EliasFanoVec { } } - // update the cursor because we use it for the final index calculation. - // `final_bound` is an ABSOLUTE position in `lower_vec`, while the - // fallthrough return `start_index_lower + cursor` treats `cursor` - // as RELATIVE to `start_index_lower`, so we subtract it here to - // avoid double-counting. + // `final_bound` is absolute; the fallthrough return adds + // `start_index_lower`, so make `cursor` relative here. if INDEX { cursor = final_bound as isize + direction - start_index_lower as isize; diff --git a/src/elias_fano/tests.rs b/src/elias_fano/tests.rs index 4965084..5c55500 100644 --- a/src/elias_fano/tests.rs +++ b/src/elias_fano/tests.rs @@ -525,20 +525,9 @@ fn test_empty_ef_vec() { assert_eq!(ef.delta(0), None); } -/// Regression test for a `rank` off-by-`start_index_lower` bug in the binary-search -/// fallback of `search_element_in_block::`. -/// -/// When the query equals the last element of an EF upper-bucket whose size is -/// `> BIN_SEARCH_THRESHOLD` (= 4), the fallthrough path sets -/// `cursor = final_bound + direction` where `final_bound` is ABSOLUTE, but the -/// final return expression `start_index_lower + cursor` treats `cursor` as RELATIVE. -/// The returned rank is then off by `start_index_lower`, which can exceed `len()`. -/// -/// The 14-element input below was shrunk by proptest: term ids at indices 5..9 all -/// map to upper-bucket 4 (size 5). `rank(term_at_idx_9)` should return 9 but -/// returned 14 (= len) before the fix. A similar 368-element input caused an -/// out-of-bounds panic in downstream code that used the bogus rank to index a -/// parallel EF vec. +/// Regression for a bin-search-fallback off-by-`start_index_lower` in `rank` when +/// the query hits the last element of an upper-bucket of size > BIN_SEARCH_THRESHOLD. +/// Indices 5..9 share upper-bucket 4; `rank(ids[9])` used to return 14 (= len). #[test] fn test_rank_last_element_of_large_bucket() { let term_ids: Vec = vec![ @@ -558,16 +547,7 @@ fn test_rank_last_element_of_large_bucket() { 3_949_162_615, ]; let ef = EliasFanoVec::from_slice(&term_ids); - - // Every member's rank must equal its index in the input. for (i, &t) in term_ids.iter().enumerate() { - assert_eq!( - ef.rank(t) as usize, - i, - "rank({t}) returned wrong index (expected {i})" - ); + assert_eq!(ef.rank(t) as usize, i, "rank({t}) expected {i}"); } - - // Specifically: the last element of bucket 4 (idx 9) used to return 14. - assert_eq!(ef.rank(1_370_826_467), 9); } From 0f2eca1517b77dc13c2b449847474ac66bd066e1 Mon Sep 17 00:00:00 2001 From: Johannes Hengstler Date: Fri, 17 Apr 2026 10:54:53 +0200 Subject: [PATCH 3/4] simplify test case, and slightly clarify regression documentation. --- src/elias_fano/tests.rs | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/src/elias_fano/tests.rs b/src/elias_fano/tests.rs index 5c55500..ac48c42 100644 --- a/src/elias_fano/tests.rs +++ b/src/elias_fano/tests.rs @@ -526,26 +526,11 @@ fn test_empty_ef_vec() { } /// Regression for a bin-search-fallback off-by-`start_index_lower` in `rank` when -/// the query hits the last element of an upper-bucket of size > BIN_SEARCH_THRESHOLD. -/// Indices 5..9 share upper-bucket 4; `rank(ids[9])` used to return 14 (= len). +/// the query hits the last element of a bucket (in the upper array) and has already fallen back to binary search. +/// Indices 5..8 share upper-bucket 4, querying 1004 should return 8, but was off by the `start_index_lower` of the search. #[test] fn test_rank_last_element_of_large_bucket() { - let term_ids: Vec = vec![ - 78_331_002, - 513_675_053, - 636_305_051, - 862_020_831, - 940_175_489, - 1_185_270_276, - 1_190_131_203, - 1_307_243_738, - 1_317_580_511, - 1_370_826_467, - 2_629_427_158, - 3_318_559_395, - 3_497_149_084, - 3_949_162_615, - ]; + let term_ids: Vec = vec![0, 500, 600, 800, 900, 1_001, 1_002, 1_003, 1_004, 3_000]; let ef = EliasFanoVec::from_slice(&term_ids); for (i, &t) in term_ids.iter().enumerate() { assert_eq!(ef.rank(t) as usize, i, "rank({t}) expected {i}"); From 6fea4884cc44574ef5b4a2fe10553a3110df21cf Mon Sep 17 00:00:00 2001 From: Johannes Hengstler Date: Fri, 17 Apr 2026 10:58:47 +0200 Subject: [PATCH 4/4] slight clarification of comments --- src/elias_fano/mod.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/elias_fano/mod.rs b/src/elias_fano/mod.rs index 91eb4b9..0e0bd9a 100644 --- a/src/elias_fano/mod.rs +++ b/src/elias_fano/mod.rs @@ -381,11 +381,10 @@ impl EliasFanoVec { } } - // `final_bound` is absolute; the fallthrough return adds - // `start_index_lower`, so make `cursor` relative here. + // `final_bound` is absolute; the final return adds `start_index_lower`, + // so we have to subtract it here to match the relative cursor coming out of the linear search. if INDEX { - cursor = final_bound as isize + direction - - start_index_lower as isize; + cursor = final_bound as isize + direction - start_index_lower as isize; } break; } @@ -393,8 +392,7 @@ impl EliasFanoVec { return if INDEX { // the loop ended because the element at cursor has a larger upper index, - // so we return the previous element count - // (element at curser - 1, +1 because count is not 0 based) + // so we return the previous element count (element at curser - 1 + 1 because count is not 0 based) start_index_lower as u64 + cursor as u64 } else { (query_masked_upper | lower_candidate) + self.universe_zero