Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions imspy_connector/src/py_dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,21 @@ impl PyTimsDataset {
/// # Create dataset with calibration (fast + accurate)
/// dataset = PyTimsDataset.with_calibration("sample.d", False, im_lookup.tolist())
/// ```
///
/// `bruker_lib_path` (optional) supplies the Bruker SDK shared library so an
/// accurate m/z calibration can be derived. When omitted (default "NO_SDK")
/// the converter falls back to the 2-point boundary m/z model, which can
/// have a large m/z error on some datasets.
#[staticmethod]
pub fn with_calibration(data_path: &str, in_memory: bool, im_lookup: Vec<f64>) -> Self {
let dataset = TimsDataset::new_with_calibration(data_path, in_memory, im_lookup);
#[pyo3(signature = (data_path, in_memory, im_lookup, bruker_lib_path = "NO_SDK"))]
pub fn with_calibration(
data_path: &str,
in_memory: bool,
im_lookup: Vec<f64>,
bruker_lib_path: &str,
) -> Self {
let dataset =
TimsDataset::new_with_calibration(data_path, in_memory, bruker_lib_path, im_lookup);
PyTimsDataset { inner: dataset }
}

Expand Down
16 changes: 14 additions & 2 deletions imspy_connector/src/py_dda.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,21 @@ impl PyTimsDatasetDDA {
/// # Create dataset with calibration (fast + accurate)
/// dataset = py_dda.PyTimsDatasetDDA.with_calibration("sample.d", False, im_lookup.tolist())
/// ```
///
/// `bruker_lib_path` (optional) supplies the Bruker SDK shared library so an
/// accurate m/z calibration can be derived. When omitted (default "NO_SDK")
/// the converter falls back to the 2-point boundary m/z model, which can
/// have a large m/z error on some datasets.
#[staticmethod]
pub fn with_calibration(data_path: &str, in_memory: bool, im_lookup: Vec<f64>) -> Self {
let dataset = TimsDatasetDDA::new_with_calibration(data_path, in_memory, im_lookup);
#[pyo3(signature = (data_path, in_memory, im_lookup, bruker_lib_path = "NO_SDK"))]
pub fn with_calibration(
data_path: &str,
in_memory: bool,
im_lookup: Vec<f64>,
bruker_lib_path: &str,
) -> Self {
let dataset =
TimsDatasetDDA::new_with_calibration(data_path, in_memory, bruker_lib_path, im_lookup);
PyTimsDatasetDDA { inner: dataset }
}

Expand Down
6 changes: 6 additions & 0 deletions rustdf/src/data/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,17 @@ impl TimsDataset {
/// # Arguments
/// * `data_path` - Path to the .d folder
/// * `in_memory` - Whether to load all data into memory
/// * `bruker_lib_path` - Path to the Bruker SDK shared library; used to
/// derive an accurate m/z calibration. Pass "NO_SDK" (or an empty
/// string) to skip and use the 2-point boundary m/z model.
/// * `im_lookup` - Pre-computed scan→1/K0 lookup table from Bruker SDK
///
/// # Returns
/// A new TimsDataset with LookupIndexConverter
pub fn new_with_calibration(
data_path: &str,
in_memory: bool,
bruker_lib_path: &str,
im_lookup: Vec<f64>,
) -> Self {
let global_meta_data = read_global_meta_sql(data_path).unwrap();
Expand All @@ -81,13 +85,15 @@ impl TimsDataset {
let loader = match in_memory {
true => TimsDataLoader::new_in_memory_with_calibration(
data_path,
bruker_lib_path,
tof_max_index,
mz_lower,
mz_upper,
im_lookup,
),
false => TimsDataLoader::new_lazy_with_calibration(
data_path,
bruker_lib_path,
tof_max_index,
mz_lower,
mz_upper,
Expand Down
6 changes: 6 additions & 0 deletions rustdf/src/data/dda.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,13 +195,17 @@ impl TimsDatasetDDA {
/// # Arguments
/// * `data_path` - Path to the .d folder
/// * `in_memory` - Whether to load all data into memory
/// * `bruker_lib_path` - Path to the Bruker SDK shared library; used to
/// derive an accurate m/z calibration. Pass "NO_SDK" (or an empty
/// string) to skip and use the 2-point boundary m/z model.
/// * `im_lookup` - Pre-computed scan→1/K0 lookup table from Bruker SDK
///
/// # Returns
/// A new TimsDatasetDDA with LookupIndexConverter (thread-safe, accurate)
pub fn new_with_calibration(
data_path: &str,
in_memory: bool,
bruker_lib_path: &str,
im_lookup: Vec<f64>,
) -> Self {
let global_meta_data = read_global_meta_sql(data_path).unwrap();
Expand All @@ -213,13 +217,15 @@ impl TimsDatasetDDA {
let loader = match in_memory {
true => TimsDataLoader::new_in_memory_with_calibration(
data_path,
bruker_lib_path,
tof_max_index,
mz_lower,
mz_upper,
im_lookup,
),
false => TimsDataLoader::new_lazy_with_calibration(
data_path,
bruker_lib_path,
tof_max_index,
mz_lower,
mz_upper,
Expand Down
79 changes: 75 additions & 4 deletions rustdf/src/data/handle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,41 @@ fn derive_mz_calibration(
Some((intercept, slope))
}

/// Build a `LookupIndexConverter`, preferring an SDK-derived m/z calibration.
///
/// The `LookupIndexConverter` always carries the pre-computed scan→1/K0 lookup
/// for ion mobility. For m/z it normally uses a 2-point boundary model
/// (`LookupIndexConverter::new`), which can have a large m/z error on some
/// datasets. When a valid `bruker_lib_path` is supplied, m/z is instead
/// calibrated with the same regression fit used by the `Calibrated` converter
/// (`derive_mz_calibration`). Falls back to the boundary model when the SDK is
/// unavailable (e.g. macOS, or `bruker_lib_path` is empty / "NO_SDK").
fn build_lookup_converter(
bruker_lib_path: &str,
data_path: &str,
tof_max_index: u32,
mz_lower: f64,
mz_upper: f64,
im_lookup: Vec<f64>,
) -> LookupIndexConverter {
match derive_mz_calibration(bruker_lib_path, data_path, tof_max_index) {
Some((intercept, slope)) => {
LookupIndexConverter::with_mz_fit(intercept, slope, im_lookup)
}
None => {
if !bruker_lib_path.is_empty() && bruker_lib_path != "NO_SDK" {
eprintln!(
"Warning: Could not derive m/z calibration from SDK at '{}'. \
Falling back to the 2-point boundary model, which may have a \
large m/z error on some datasets.",
bruker_lib_path
);
}
LookupIndexConverter::new(mz_lower, mz_upper, tof_max_index, im_lookup)
}
}
}

fn lzf_decompress(data: &[u8], max_output_size: usize) -> Result<Vec<u8>, Box<dyn Error>> {
let decompressed_data = lzf::decompress(data, max_output_size)
.map_err(|e| format!("LZF decompression failed: {}", e))?;
Expand Down Expand Up @@ -912,6 +947,9 @@ impl TimsDataLoader {
///
/// # Arguments
/// * `data_path` - Path to the .d folder
/// * `bruker_lib_path` - Path to the Bruker SDK shared library; used to
/// derive an accurate m/z calibration. Pass "NO_SDK" (or an empty
/// string) to skip and use the 2-point boundary m/z model.
/// * `tof_max_index` - Maximum TOF index (from GlobalMetaData)
/// * `mz_lower` - Minimum m/z value (from GlobalMetaData)
/// * `mz_upper` - Maximum m/z value (from GlobalMetaData)
Expand All @@ -921,17 +959,20 @@ impl TimsDataLoader {
/// A new TimsDataLoader with LookupIndexConverter
pub fn new_lazy_with_calibration(
data_path: &str,
bruker_lib_path: &str,
tof_max_index: u32,
mz_lower: f64,
mz_upper: f64,
im_lookup: Vec<f64>,
) -> Self {
let raw_data_layout = TimsRawDataLayout::new(data_path);

let index_converter = TimsIndexConverter::Lookup(LookupIndexConverter::new(
let index_converter = TimsIndexConverter::Lookup(build_lookup_converter(
bruker_lib_path,
data_path,
tof_max_index,
mz_lower,
mz_upper,
tof_max_index,
im_lookup,
));

Expand All @@ -948,6 +989,9 @@ impl TimsDataLoader {
///
/// # Arguments
/// * `data_path` - Path to the .d folder
/// * `bruker_lib_path` - Path to the Bruker SDK shared library; used to
/// derive an accurate m/z calibration. Pass "NO_SDK" (or an empty
/// string) to skip and use the 2-point boundary m/z model.
/// * `tof_max_index` - Maximum TOF index (from GlobalMetaData)
/// * `mz_lower` - Minimum m/z value (from GlobalMetaData)
/// * `mz_upper` - Maximum m/z value (from GlobalMetaData)
Expand All @@ -957,17 +1001,20 @@ impl TimsDataLoader {
/// A new TimsDataLoader with LookupIndexConverter
pub fn new_in_memory_with_calibration(
data_path: &str,
bruker_lib_path: &str,
tof_max_index: u32,
mz_lower: f64,
mz_upper: f64,
im_lookup: Vec<f64>,
) -> Self {
let raw_data_layout = TimsRawDataLayout::new(data_path);

let index_converter = TimsIndexConverter::Lookup(LookupIndexConverter::new(
let index_converter = TimsIndexConverter::Lookup(build_lookup_converter(
bruker_lib_path,
data_path,
tof_max_index,
mz_lower,
mz_upper,
tof_max_index,
im_lookup,
));

Expand Down Expand Up @@ -1349,6 +1396,30 @@ impl LookupIndexConverter {
im_max,
}
}

/// Create a `LookupIndexConverter` with regression-derived m/z coefficients.
///
/// Uses `sqrt(mz) = tof_intercept + tof_slope * tof` with coefficients fitted
/// from the Bruker SDK (see `derive_mz_calibration`), instead of the 2-point
/// boundary model in `new`. Preferred whenever the SDK is available.
///
/// # Arguments
/// * `tof_intercept` - Intercept of the sqrt(mz)-vs-tof regression
/// * `tof_slope` - Slope of the sqrt(mz)-vs-tof regression
/// * `im_lookup` - Pre-computed scan→1/K0 lookup table from Bruker SDK
pub fn with_mz_fit(tof_intercept: f64, tof_slope: f64, im_lookup: Vec<f64>) -> Self {
// Get IM bounds for inverse conversion
let im_min = im_lookup.iter().cloned().fold(f64::INFINITY, f64::min);
let im_max = im_lookup.iter().cloned().fold(f64::NEG_INFINITY, f64::max);

Self {
tof_intercept,
tof_slope,
im_lookup,
im_min,
im_max,
}
}
}

impl IndexConverter for LookupIndexConverter {
Expand Down
Loading