diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ccf812649..04108b22f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -204,28 +204,30 @@ jobs: # env: # CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - # miri: - # needs: basics - # name: miri-test - # runs-on: ubuntu-latest - # steps: - # - uses: actions/checkout@v4 - # with: - # lfs: true + miri: + needs: basics + name: miri-test + runs-on: ubuntu-latest + continue-on-error: true + steps: + - uses: actions/checkout@v4 + with: + lfs: true - # - name: Install Rust nightly with miri - # uses: dtolnay/rust-toolchain@stable - # with: - # toolchain: nightly - # components: miri + - name: Install Rust nightly with miri + uses: dtolnay/rust-toolchain@stable + with: + toolchain: nightly + components: miri - # - name: Install cargo-nextest - # uses: taiki-e/install-action@v2 - # with: - # tool: cargo-nextest + - name: Install cargo-nextest + uses: taiki-e/install-action@v2 + with: + tool: cargo-nextest - # - uses: Swatinem/rust-cache@v2 - # - name: miri - # run: cargo +nightly miri nextest run --package diskann-quantization - # env: - # MIRIFLAGS: -Zmiri-disable-isolation -Zmiri-strict-provenance + - uses: Swatinem/rust-cache@v2 + + - name: miri + run: cargo +nightly miri nextest run --package diskann-quantization + env: + MIRIFLAGS: -Zmiri-disable-isolation -Zmiri-strict-provenance diff --git a/diskann-quantization/src/algorithms/heap.rs b/diskann-quantization/src/algorithms/heap.rs index b3d1093fe..3943cdd30 100644 --- a/diskann-quantization/src/algorithms/heap.rs +++ b/diskann-quantization/src/algorithms/heap.rs @@ -390,11 +390,15 @@ mod tests { // Heap of size 2. fuzz_test_impl(2, 101, &mut rng); - // Heap size not power of two. - fuzz_test_impl(1000, 1000, &mut rng); - - // Heap size power of two. - fuzz_test_impl(128, 1000, &mut rng); + // Miri is extremely slow, so we skip the larger tests there. + #[cfg(not(miri))] + { + // Heap size not power of two. + fuzz_test_impl(1000, 1000, &mut rng); + + // Heap size power of two. + fuzz_test_impl(128, 1000, &mut rng); + } } #[test] diff --git a/diskann-quantization/src/algorithms/kmeans/common.rs b/diskann-quantization/src/algorithms/kmeans/common.rs index 3c095de0d..4808b6fa5 100644 --- a/diskann-quantization/src/algorithms/kmeans/common.rs +++ b/diskann-quantization/src/algorithms/kmeans/common.rs @@ -565,19 +565,35 @@ mod tests { #[test] fn test_block_transpose_16() { + #[cfg(not(miri))] for nrows in 0..128 { for ncols in 0..5 { test_block_transpose::<16>(nrows, ncols); } } + + #[cfg(miri)] + for nrows in 127..128 { + for ncols in 4..5 { + test_block_transpose::<16>(nrows, ncols); + } + } } #[test] fn test_block_transpose_8() { + #[cfg(not(miri))] for nrows in 0..128 { for ncols in 0..5 { test_block_transpose::<8>(nrows, ncols); } } + + #[cfg(miri)] + for nrows in 127..128 { + for ncols in 4..5 { + test_block_transpose::<8>(nrows, ncols); + } + } } } diff --git a/diskann-quantization/src/algorithms/kmeans/lloyds.rs b/diskann-quantization/src/algorithms/kmeans/lloyds.rs index 5c99d3d4e..f6b575fba 100644 --- a/diskann-quantization/src/algorithms/kmeans/lloyds.rs +++ b/diskann-quantization/src/algorithms/kmeans/lloyds.rs @@ -457,14 +457,15 @@ pub fn lloyds( #[cfg(test)] mod tests { - use diskann_utils::{lazy_format, views::Matrix}; + #[cfg(not(miri))] + use diskann_utils::lazy_format; + use diskann_utils::views::Matrix; use diskann_vector::{distance::SquaredL2, PureDistanceFunction}; - use rand::{ - distr::{Distribution, Uniform}, - rngs::StdRng, - seq::{IndexedRandom, SliceRandom}, - Rng, SeedableRng, - }; + #[cfg(not(miri))] + use rand::distr::{Distribution, Uniform}; + #[cfg(not(miri))] + use rand::seq::IndexedRandom; + use rand::{rngs::StdRng, seq::SliceRandom, Rng, SeedableRng}; use super::*; @@ -477,6 +478,7 @@ mod tests { // relatively quickly. // // Outside of rare validations, Miri tests go through a different path for speed purposes. + #[cfg(not(miri))] fn test_distances_in_place_impl( ndata: usize, ncenters: usize, @@ -556,15 +558,11 @@ mod tests { } } - cfg_if::cfg_if! { - if #[cfg(miri)] { - const TRIALS: usize = 1; - } else { - const TRIALS: usize = 100; - } - } + #[cfg(not(miri))] + const TRIALS: usize = 100; #[test] + #[cfg(not(miri))] fn test_distances_in_place() { let mut rng = StdRng::seed_from_u64(0xece88a9c6cd86a8a); for ndata in 1..=31 { @@ -719,12 +717,22 @@ mod tests { #[test] fn end_to_end_test() { let mut rng = StdRng::seed_from_u64(0xff22c38d0f0531bf); - let setup = EndToEndSetup { - ncenters: 11, - ndim: 4, - data_per_center: 8, - step_between_clusters: 20, - ntrials: 10, + let setup = if cfg!(miri) { + EndToEndSetup { + ncenters: 3, + ndim: 4, + data_per_center: 2, + step_between_clusters: 20, + ntrials: 2, + } + } else { + EndToEndSetup { + ncenters: 11, + ndim: 4, + data_per_center: 8, + step_between_clusters: 20, + ntrials: 10, + } }; end_to_end_test_impl(&setup, &mut rng); } diff --git a/diskann-quantization/src/algorithms/kmeans/plusplus.rs b/diskann-quantization/src/algorithms/kmeans/plusplus.rs index e34945ad4..efc2ab544 100644 --- a/diskann-quantization/src/algorithms/kmeans/plusplus.rs +++ b/diskann-quantization/src/algorithms/kmeans/plusplus.rs @@ -683,6 +683,11 @@ mod tests { fn test_update_distances() { let mut rng = StdRng::seed_from_u64(0x56c94b53c73e4fd9); for num_points in 0..48 { + #[cfg(miri)] + if num_points % 7 != 0 { + continue; + } + for dim in 1..4 { test_update_distances_impl(num_points, dim, &mut rng); } @@ -695,6 +700,7 @@ mod tests { // Kmeans++ sanity checks - if there are only `N` distinct and we want `N` centers, // then all `N` should be selected without repeats. + #[cfg(not(miri))] fn sanity_check_impl(ncenters: usize, dim: usize, rng: &mut R) { let repeats_per_center = 3; let context = lazy_format!( @@ -756,6 +762,7 @@ mod tests { // This test is like the sanity check - but instead of exact repeats, we use slightly // perturbed values to test that the proportionality is of distances is respected. + #[cfg(not(miri))] fn fuzzy_sanity_check_impl(ncenters: usize, dim: usize, rng: &mut R) { let repeats_per_center = 3; diff --git a/diskann-quantization/src/algorithms/transforms/double_hadamard.rs b/diskann-quantization/src/algorithms/transforms/double_hadamard.rs index 33ea3fbb8..c5740a988 100644 --- a/diskann-quantization/src/algorithms/transforms/double_hadamard.rs +++ b/diskann-quantization/src/algorithms/transforms/double_hadamard.rs @@ -387,6 +387,7 @@ where /////////// #[cfg(test)] +#[cfg(not(miri))] mod tests { use diskann_utils::lazy_format; use rand::{rngs::StdRng, SeedableRng}; diff --git a/diskann-quantization/src/algorithms/transforms/mod.rs b/diskann-quantization/src/algorithms/transforms/mod.rs index 867e1ebd2..4205a605e 100644 --- a/diskann-quantization/src/algorithms/transforms/mod.rs +++ b/diskann-quantization/src/algorithms/transforms/mod.rs @@ -28,6 +28,7 @@ crate::utils::features! { mod utils; #[cfg(test)] +#[cfg(not(miri))] mod test_utils; // reexports @@ -355,4 +356,5 @@ pub enum TargetDim { } #[cfg(test)] +#[cfg(not(miri))] test_utils::delegate_transformer!(Transform); diff --git a/diskann-quantization/src/algorithms/transforms/padding_hadamard.rs b/diskann-quantization/src/algorithms/transforms/padding_hadamard.rs index ed6d3b27f..535f5c624 100644 --- a/diskann-quantization/src/algorithms/transforms/padding_hadamard.rs +++ b/diskann-quantization/src/algorithms/transforms/padding_hadamard.rs @@ -366,14 +366,15 @@ where #[cfg(test)] mod tests { + #[cfg(not(miri))] use diskann_utils::lazy_format; use rand::{rngs::StdRng, SeedableRng}; use super::*; - use crate::{ - algorithms::transforms::{test_utils, Transform, TransformKind}, - alloc::GlobalAllocator, - }; + + #[cfg(not(miri))] + use crate::algorithms::transforms::{test_utils, Transform, TransformKind}; + use crate::alloc::GlobalAllocator; // Since we use a slightly non-obvious strategy for applying the +/-1 permutation, we // test its behavior explicitly. @@ -441,11 +442,13 @@ mod tests { assert_eq!(output[15], 0.0f32); } + #[cfg(not(miri))] test_utils::delegate_transformer!(PaddingHadamard); // This tests the natural hadamard transform where the output dimension is upgraded // to the next power of 2. #[test] + #[cfg(not(miri))] fn test_padding_hadamard() { // Inner product computations are more susceptible to floating point error. // Instead of using ULP here, we fall back to using absolute and relative error. diff --git a/diskann-quantization/src/algorithms/transforms/test_utils.rs b/diskann-quantization/src/algorithms/transforms/test_utils.rs index c853cc904..c38ba5ea0 100644 --- a/diskann-quantization/src/algorithms/transforms/test_utils.rs +++ b/diskann-quantization/src/algorithms/transforms/test_utils.rs @@ -172,7 +172,11 @@ fn within_ulp(mut got: f32, expected: f32, ulp: usize) -> bool { #[derive(Debug, Clone, Copy)] pub(super) enum Check { Ulp(usize), - AbsRel { abs: f32, rel: f32 }, + AbsRel { + abs: f32, + rel: f32, + }, + #[cfg(not(miri))] Skip, } @@ -185,6 +189,7 @@ impl Check { Self::AbsRel { abs, rel } } + #[cfg(not(miri))] pub(super) fn skip() -> Self { Self::Skip } @@ -219,6 +224,7 @@ impl Check { }) } } + #[cfg(not(miri))] Self::Skip => Ok(()), } } diff --git a/diskann-quantization/src/bits/distances.rs b/diskann-quantization/src/bits/distances.rs index 00ece2883..e888dccf0 100644 --- a/diskann-quantization/src/bits/distances.rs +++ b/diskann-quantization/src/bits/distances.rs @@ -2003,6 +2003,12 @@ mod tests { let dist = Uniform::new_inclusive(min, max).unwrap(); for dim in 0..dim_max { + // Only run the maximum dimension when running under miri. + #[cfg(miri)] + if dim != dim_max - 1 { + continue; + } + let mut x_reference: Vec = vec![0; dim]; let mut y_reference: Vec = vec![0; dim]; @@ -2092,7 +2098,7 @@ mod tests { cfg_if::cfg_if! { if #[cfg(miri)] { - const MAX_DIM: usize = 128; + const MAX_DIM: usize = 8; const TRIALS_PER_DIM: usize = 1; } else { const MAX_DIM: usize = 256; diff --git a/diskann-quantization/src/bits/slice.rs b/diskann-quantization/src/bits/slice.rs index fb77cefff..a08708f4c 100644 --- a/diskann-quantization/src/bits/slice.rs +++ b/diskann-quantization/src/bits/slice.rs @@ -1512,6 +1512,11 @@ mod tests { fn test_binary_dense() { let mut rng = StdRng::seed_from_u64(0xb3c95e8e19d3842e); for len in 0..MAX_DIM { + #[cfg(miri)] + if len != MAX_DIM - 1 { + continue; + } + test_send_and_sync::<1, Binary, Dense>(); test_empty::<1, Binary, Dense>(); test_construction_errors::<1, Binary, Dense>(); @@ -1558,6 +1563,11 @@ mod tests { fn test_4bit_bit_transpose() { let mut rng = StdRng::seed_from_u64(0xb3c95e8e19d3842e); for len in 0..MAX_DIM { + #[cfg(miri)] + if len != MAX_DIM - 1 { + continue; + } + test_send_and_sync::<4, Unsigned, BitTranspose>(); test_empty::<4, Unsigned, BitTranspose>(); test_construction_errors::<4, Unsigned, BitTranspose>(); diff --git a/diskann-quantization/src/minmax/quantizer.rs b/diskann-quantization/src/minmax/quantizer.rs index 13d624b0a..e9f10f651 100644 --- a/diskann-quantization/src/minmax/quantizer.rs +++ b/diskann-quantization/src/minmax/quantizer.rs @@ -337,6 +337,7 @@ impl_functor!(MinMaxCosineNormalized); // Tests // /////////// #[cfg(test)] +#[cfg(not(miri))] mod minmax_quantizer_tests { use std::num::NonZeroUsize; diff --git a/diskann-quantization/src/minmax/vectors.rs b/diskann-quantization/src/minmax/vectors.rs index b8aac8a1c..51510cf82 100644 --- a/diskann-quantization/src/minmax/vectors.rs +++ b/diskann-quantization/src/minmax/vectors.rs @@ -489,6 +489,7 @@ where /////////// #[cfg(test)] +#[cfg(not(miri))] mod minmax_vector_tests { use diskann_utils::Reborrow; use rand::{ @@ -752,7 +753,8 @@ mod minmax_vector_tests { #[test] fn $name() { let mut rng = StdRng::seed_from_u64($seed); - for dim in 1..(bit_scale::<$nbits>() as usize) { + const MAX_DIM: usize = (bit_scale::<$nbits>() as usize); + for dim in 1..=MAX_DIM { for _ in 0..TRIALS { test_minmax_compensated_vectors::<$nbits, _>(dim, &mut rng); } @@ -760,7 +762,7 @@ mod minmax_vector_tests { } }; } - test_minmax_compensated!(unsigned_minmax_compensated_test_u1, 1, 0xa32d5658097a1c35); + test_minmax_compensated!(unsigned_minmax_compensated_test_u1, 1, 0xa33d5658097a1c35); test_minmax_compensated!(unsigned_minmax_compensated_test_u2, 2, 0xaedf3d2a223b7b77); test_minmax_compensated!(unsigned_minmax_compensated_test_u4, 4, 0xf60c0c8d1aadc126); test_minmax_compensated!(unsigned_minmax_compensated_test_u8, 8, 0x09fa14c42a9d7d98); diff --git a/diskann-quantization/src/product/tables/test.rs b/diskann-quantization/src/product/tables/test.rs index 2c823c1eb..70732f65d 100644 --- a/diskann-quantization/src/product/tables/test.rs +++ b/diskann-quantization/src/product/tables/test.rs @@ -4,10 +4,13 @@ */ // A collection of test helpers to ensure uniformity across tables. -use diskann_utils::views::{Matrix, MatrixView, MutMatrixView}; +use diskann_utils::views::Matrix; +#[cfg(not(miri))] +use diskann_utils::views::{MatrixView, MutMatrixView}; +#[cfg(not(miri))] +use rand::seq::IndexedRandom; use rand::{ distr::{Distribution, Uniform}, - seq::IndexedRandom, Rng, SeedableRng, }; @@ -290,6 +293,7 @@ pub(super) fn check_pqtable_single_compression_errors( //////////////////////////////////////////////////////////////////// // A cantralized test for error handling in `CompressInto<[f32], [u8]>` +#[cfg(not(miri))] pub(super) fn check_pqtable_batch_compression_errors( build: &dyn Fn(Matrix, ChunkOffsets) -> T, context: &dyn std::fmt::Display, diff --git a/diskann-quantization/src/product/tables/transposed/pivots.rs b/diskann-quantization/src/product/tables/transposed/pivots.rs index a6d96c256..f4dcd9f36 100644 --- a/diskann-quantization/src/product/tables/transposed/pivots.rs +++ b/diskann-quantization/src/product/tables/transposed/pivots.rs @@ -1418,7 +1418,7 @@ mod tests { fn run_test_happy_path() { // Step dimensions by 1 to test all possible residual combinations. let dims: Vec = if cfg!(miri) { - (1..=8).collect() + (7..=8).collect() } else { (1..=16).collect() }; @@ -1583,12 +1583,22 @@ mod tests { #[test] fn test_process_into() { let mut rng = StdRng::seed_from_u64(0x21dfb5f35dfe5639); + + #[cfg(not(miri))] for total in 1..64 { for dim in 1..5 { println!("on ({}, {})", total, dim); test_process_into_impl(dim, total, &mut rng); } } + + #[cfg(miri)] + for total in 63..64 { + for dim in 4..5 { + println!("on ({}, {})", total, dim); + test_process_into_impl(dim, total, &mut rng); + } + } } #[test] diff --git a/diskann-quantization/src/product/tables/transposed/table.rs b/diskann-quantization/src/product/tables/transposed/table.rs index 2db1ac8a7..d20105e11 100644 --- a/diskann-quantization/src/product/tables/transposed/table.rs +++ b/diskann-quantization/src/product/tables/transposed/table.rs @@ -511,13 +511,14 @@ mod test_compression { }; use super::*; + #[cfg(not(miri))] + use crate::product::tables::test::{ + check_pqtable_batch_compression_errors, check_pqtable_single_compression_errors, + }; use crate::{ distances::{InnerProduct, SquaredL2}, error::format, - product::tables::test::{ - check_pqtable_batch_compression_errors, check_pqtable_single_compression_errors, - create_dataset, create_pivot_tables, - }, + product::tables::test::{create_dataset, create_pivot_tables}, }; use diskann_utils::lazy_format; @@ -618,7 +619,7 @@ mod test_compression { let mut rng = StdRng::seed_from_u64(0x88e3d3366501ad6c); let num_data = if cfg!(miri) { - vec![0, 8, 9, 10, 11] + vec![7, 8] } else { vec![0, 1, 2, 3, 4, 16, 17, 18, 19] }; @@ -915,11 +916,20 @@ mod test_compression { #[test] fn test_process_into() { let mut rng = StdRng::seed_from_u64(0x0e3cf3ba4b27e7f8); + + #[cfg(not(miri))] for num_chunks in 1..5 { for num_centers in 1..48 { test_process_into_impl(num_chunks, num_centers, 2, &mut rng); } } + + #[cfg(miri)] + for num_chunks in 4..5 { + for num_centers in 47..48 { + test_process_into_impl(num_chunks, num_centers, 2, &mut rng); + } + } } #[test] diff --git a/diskann-quantization/src/test_util.rs b/diskann-quantization/src/test_util.rs index a258dd12a..780471776 100644 --- a/diskann-quantization/src/test_util.rs +++ b/diskann-quantization/src/test_util.rs @@ -3,6 +3,7 @@ * Licensed under the MIT license. */ +#[cfg(not(miri))] use std::sync::{ atomic::{AtomicUsize, Ordering}, Arc, @@ -15,7 +16,9 @@ use rand::{ seq::SliceRandom, }; -use crate::alloc::{AllocatorCore, AllocatorError, GlobalAllocator}; +#[cfg(not(miri))] +use crate::alloc::GlobalAllocator; +use crate::alloc::{AllocatorCore, AllocatorError}; /// An allocator that always fails. #[derive(Debug, Clone, Copy)] @@ -36,11 +39,13 @@ unsafe impl AllocatorCore for AlwaysFails { /// An allocator that can only perform a limited number of allocations. /// /// Used to test interfaces for allocation reliability. +#[cfg(not(miri))] #[derive(Debug, Clone)] pub(crate) struct LimitedAllocator { remaining: Arc, } +#[cfg(not(miri))] impl LimitedAllocator { pub(crate) fn new(allocations: usize) -> Self { Self { @@ -49,6 +54,7 @@ impl LimitedAllocator { } } +#[cfg(not(miri))] /// SAFETY: This either forwards to the global allocator, or failed. unsafe impl AllocatorCore for LimitedAllocator { fn allocate(