diff --git a/.gitignore b/.gitignore index 2d14f8e4..236c2c2e 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,5 @@ data/ *.ms2 *.fasta *.fas -**/*.DS_Store \ No newline at end of file +**/*.DS_Store +*.parquet \ No newline at end of file diff --git a/crates/sage-cli/src/input.rs b/crates/sage-cli/src/input.rs index 75b7b84c..1257bdd4 100644 --- a/crates/sage-cli/src/input.rs +++ b/crates/sage-cli/src/input.rs @@ -30,6 +30,8 @@ pub struct Search { pub min_matched_peaks: u16, pub report_psms: usize, pub predict_rt: bool, + pub predict_im: bool, + pub align_rt: bool, pub mzml_paths: Vec, pub output_paths: Vec, pub bruker_config: BrukerProcessingConfig, @@ -65,6 +67,8 @@ pub struct Input { pub deisotope: Option, pub quant: Option, pub predict_rt: Option, + pub predict_im: Option, + pub align_rt: Option, pub output_directory: Option, pub mzml_paths: Option>, pub bruker_config: Option, @@ -82,6 +86,7 @@ pub struct LfqOptions { pub ppm_tolerance: Option, pub mobility_pct_tolerance: Option, pub combine_charge_states: Option, + pub mbr: Option, } impl From for LfqSettings { @@ -98,6 +103,7 @@ impl From for LfqSettings { combine_charge_states: value .combine_charge_states .unwrap_or(default.combine_charge_states), + mbr: value.mbr.unwrap_or(true), }; if settings.ppm_tolerance > 20.0 { log::warn!("lfq_settings.ppm_tolerance is higher than expected"); @@ -285,13 +291,24 @@ impl Input { } } - if !self.predict_rt.unwrap_or(true) - && self.quant.as_ref().and_then(|q| q.lfq).unwrap_or(false) - { - log::warn!( - "`predict_rt: false` and `lfq: true` are incompatible. Setting `predict_rt: true`" - ); - self.predict_rt = Some(true); + if !self.align_rt.unwrap_or(true) { + if self.predict_rt.unwrap_or(true) { + log::warn!( + "`predict_rt: true` and `align_rt: false` are incompatible. Setting `align_rt: true`" + ); + self.align_rt = Some(true); + } else if let Some(quant) = &self.quant { + if quant.lfq.unwrap_or(false) + && quant + .lfq_options + .as_ref() + .and_then(|f| f.mbr) + .unwrap_or(true) + { + log::warn!("`mbr: true` and `align_rt: false` are incompatible. Setting `align_rt: true`"); + self.align_rt = Some(true); + } + } } let mzml_paths = self.mzml_paths.expect("'mzml_paths' must be provided!"); @@ -330,6 +347,8 @@ impl Input { chimera: self.chimera.unwrap_or(false), wide_window: self.wide_window.unwrap_or(false), predict_rt: self.predict_rt.unwrap_or(true), + predict_im: self.predict_im.unwrap_or(true), + align_rt: self.align_rt.unwrap_or(true), output_paths: Vec::new(), write_pin: self.write_pin.unwrap_or(false), bruker_config: self.bruker_config.unwrap_or_default(), diff --git a/crates/sage-cli/src/runner.rs b/crates/sage-cli/src/runner.rs index 40915112..ede3ea84 100644 --- a/crates/sage-cli/src/runner.rs +++ b/crates/sage-cli/src/runner.rs @@ -3,6 +3,7 @@ use super::output::SageResults; use super::telemetry; use anyhow::Context; use csv::ByteRecord; +use fnv::FnvHashMap; use log::info; use rayon::prelude::*; use sage_cloudpath::{CloudPath, FileFormat}; @@ -11,6 +12,7 @@ use sage_core::fasta::Fasta; use sage_core::ion_series::Kind; use sage_core::lfq::{Peak, PrecursorId}; use sage_core::mass::Tolerance; +use sage_core::ml::retention_alignment::Alignment; use sage_core::peptide::Peptide; use sage_core::scoring::Fragments; use sage_core::scoring::{Feature, Scorer}; @@ -436,7 +438,7 @@ impl Runner { let spectra = spectra .ms1 .into_iter() - .map(|x| sp.process_with_mobility(x)) + .map(|x| SpectrumProcessor::process_with_mobility(x)) .collect(); MS1Spectra::WithMobility(spectra) } else { @@ -482,26 +484,18 @@ impl Runner { //Collect all results into a single container let mut outputs = self.batch_files(&scorer, parallel); - let alignments = if self.parameters.predict_rt { - // Poisson probability is usually the best single feature for refining FDR. - // Take our set of 1% FDR filtered PSMs, and use them to train a linear - // regression model for predicting retention time - outputs - .features - .par_sort_unstable_by(|a, b| a.poisson.total_cmp(&b.poisson)); - sage_core::ml::qvalue::spectrum_q_value(&mut outputs.features); - - let alignments = sage_core::ml::retention_alignment::global_alignment( - &mut outputs.features, - self.parameters.mzml_paths.len(), - ); - let _ = sage_core::ml::retention_model::predict(&self.database, &mut outputs.features); - let _ = sage_core::ml::mobility_model::predict(&self.database, &mut outputs.features); - Some(alignments) - } else { - None - }; + // Poisson probability is usually the best single feature for refining FDR. + // Take our set of 1% FDR filtered PSMs, and use them to train a linear + // regression model for predicting retention time + outputs + .features + .par_sort_unstable_by(|a, b| a.poisson.total_cmp(&b.poisson)); + sage_core::ml::qvalue::spectrum_q_value(&mut outputs.features); + + let alignments = Self::align(&self.parameters, &mut outputs.features); + Self::predict(&self.parameters, &self.database, &mut outputs.features); + log::info!("Calculating FDR"); let q_spectrum = self.spectrum_fdr(&mut outputs.features); let q_peptide = sage_core::fdr::picked_peptide(&self.database, &mut outputs.features); let q_protein = sage_core::fdr::picked_protein(&self.database, &mut outputs.features); @@ -518,24 +512,13 @@ impl Runner { }) .collect::>(); - let areas = alignments.and_then(|alignments| { - if self.parameters.quant.lfq { - log::trace!("performing LFQ"); - let mut areas = sage_core::lfq::build_feature_map( - self.parameters.quant.lfq_settings, - self.parameters.precursor_charge, - &outputs.features, - ) - .quantify(&self.database, &outputs.ms1, &alignments); - - let q_precursor = sage_core::fdr::picked_precursor(&mut areas); - - log::info!("discovered {} target MS1 peaks at 5% FDR", q_precursor); - Some(areas) - } else { - None - } - }); + let areas = Self::quantify( + &self.parameters, + &self.database, + &outputs.features, + &outputs.ms1, + alignments, + ); log::info!( "discovered {} target peptide-spectrum matches at 1% FDR", @@ -627,6 +610,56 @@ impl Runner { Ok(telemetry) } + + pub fn align(parameters: &Search, features: &mut [Feature]) -> Vec { + let alignments = if parameters.align_rt { + log::info!("aligning retention times"); + sage_core::ml::retention_alignment::global_alignment( + features, + parameters.mzml_paths.len(), + ) + } else { + sage_core::ml::retention_alignment::no_alignment(features, parameters.mzml_paths.len()) + }; + alignments + } + + pub fn predict(parameters: &Search, database: &IndexedDatabase, features: &mut [Feature]) { + if parameters.predict_rt { + log::info!("predicting retention times "); + let _ = sage_core::ml::retention_model::predict(database, features); + }; + if parameters.predict_im { + log::info!("predicting mobility values"); + let _ = sage_core::ml::mobility_model::predict(database, features); + }; + } + + pub fn quantify( + parameters: &Search, + database: &IndexedDatabase, + features: &[Feature], + ms1_spectra: &MS1Spectra, + alignments: Vec, + ) -> Option)>> { + let areas = if parameters.quant.lfq { + let mut areas = sage_core::lfq::quantify( + ¶meters.quant.lfq_settings, + parameters.precursor_charge, + database, + features, + ms1_spectra, + alignments, + ); + let q_precursor = sage_core::fdr::picked_precursor(&mut areas); + log::info!("discovered {} target MS1 peaks at 5% FDR", q_precursor); + Some(areas) + } else { + None + }; + areas + } + pub fn serialize_feature(&self, feature: &Feature, filenames: &[String]) -> csv::ByteRecord { let mut record = csv::ByteRecord::new(); diff --git a/crates/sage-cloudpath/src/tdf.rs b/crates/sage-cloudpath/src/tdf.rs index 57290408..5c495c7d 100644 --- a/crates/sage-cloudpath/src/tdf.rs +++ b/crates/sage-cloudpath/src/tdf.rs @@ -11,14 +11,14 @@ use timsrust::readers::SpectrumReaderConfig as TimsrustSpectrumConfig; pub struct TdfReader; #[derive(Deserialize, Serialize, Debug, Clone, Copy)] -pub struct BrukerMS1CentoidingConfig { +pub struct BrukerMS1CentroidingConfig { pub mz_ppm: f32, pub ims_pct: f32, } -impl Default for BrukerMS1CentoidingConfig { +impl Default for BrukerMS1CentroidingConfig { fn default() -> Self { - BrukerMS1CentoidingConfig { + BrukerMS1CentroidingConfig { mz_ppm: 5.0, ims_pct: 3.0, } @@ -28,7 +28,7 @@ impl Default for BrukerMS1CentoidingConfig { #[derive(Default, Deserialize, Serialize, Debug, Clone, Copy)] pub struct BrukerProcessingConfig { pub ms2: TimsrustSpectrumConfig, - pub ms1: BrukerMS1CentoidingConfig, + pub ms1: BrukerMS1CentroidingConfig, } impl TdfReader { @@ -43,20 +43,19 @@ impl TdfReader { .with_path(path_name.as_ref()) .with_config(config.ms2.clone()) .finalize()?; - let mut spectra = self.read_msn_spectra(file_id, &spectrum_reader)?; + let mut spectra = Self::read_msn_spectra(file_id, &spectrum_reader)?; if requires_ms1 { - let ms1s = self.read_ms1_spectra(&path_name, file_id, config.ms1)?; + let ms1s = Self::read_ms1_spectra(&path_name, file_id, config.ms1)?; spectra.extend(ms1s); } Ok(spectra) } - fn read_ms1_spectra( - &self, + pub fn read_ms1_spectra( path_name: impl AsRef, file_id: usize, - config: BrukerMS1CentoidingConfig, + config: BrukerMS1CentroidingConfig, ) -> Result, timsrust::TimsRustError> { let start = std::time::Instant::now(); let frame_reader = timsrust::readers::FrameReader::new(path_name.as_ref())?; @@ -117,8 +116,7 @@ impl TdfReader { Ok(ms1_spectra) } - fn read_msn_spectra( - &self, + pub fn read_msn_spectra( file_id: usize, spectrum_reader: &SpectrumReader, ) -> Result, timsrust::TimsRustError> { @@ -221,7 +219,7 @@ impl PeakBuffer { // The "order" is sorted by intensity // This will be used later during the centroiding (for details check that implementation) - self.order.extend((0..self.len())); + self.order.extend(0..self.len()); self.order.sort_unstable_by(|&a, &b| { self.peaks[b] .intensity @@ -380,8 +378,7 @@ impl PeakBuffer { } } - self - .agg_buff + self.agg_buff .sort_unstable_by(|a, b| a.mz.partial_cmp(&b.mz).unwrap()); // println!("Centroiding: Start len: {}; end len: {};", arr_len, result.len()); // Ultra data is usually start: 40k end 10k, @@ -389,12 +386,10 @@ impl PeakBuffer { // rarely leaves peaks with intensity > 200 ... ive never seen // it happen. -JSP 2025-Jan - self - .agg_buff + self.agg_buff .drain(..) .into_iter() .map(|x| (x.mz, (x.intensity, x.im))) .unzip() } } - diff --git a/crates/sage/src/fdr.rs b/crates/sage/src/fdr.rs index 75658a38..554ee7b9 100644 --- a/crates/sage/src/fdr.rs +++ b/crates/sage/src/fdr.rs @@ -235,7 +235,7 @@ pub fn picked_precursor( .map(|score| ((score.ix, score.decoy), score.q)) .collect::>(); - peaks.par_iter_mut().for_each(|((ix), (peak, _))| { + peaks.par_iter_mut().for_each(|(ix, (peak, _))| { peak.q_value = scores[ix]; }); passing diff --git a/crates/sage/src/lfq.rs b/crates/sage/src/lfq.rs index 0a0ef60f..2f844957 100644 --- a/crates/sage/src/lfq.rs +++ b/crates/sage/src/lfq.rs @@ -1,9 +1,11 @@ use crate::database::{binary_search_slice, IndexedDatabase, PeptideIx}; +use crate::group_indices_by; use crate::mass::{composition, Composition, Tolerance, NEUTRON}; use crate::ml::{matrix::Matrix, retention_alignment::Alignment}; use crate::scoring::Feature; use crate::spectrum::MS1Spectra; use dashmap::DashMap; +use fnv::FnvHashMap; use rayon::prelude::*; use serde::{Deserialize, Serialize}; use std::collections::HashMap; @@ -50,6 +52,7 @@ pub struct LfqSettings { pub ppm_tolerance: f32, pub mobility_pct_tolerance: f32, pub combine_charge_states: bool, + pub mbr: bool, } impl Default for LfqSettings { @@ -61,6 +64,7 @@ impl Default for LfqSettings { ppm_tolerance: 5.0, mobility_pct_tolerance: 1.0, combine_charge_states: true, + mbr: true, } } } @@ -77,6 +81,7 @@ pub struct PrecursorRange { pub peptide: PeptideIx, pub file_id: usize, pub decoy: bool, + pub isotope_dist: [f32; 3], } /// Create a data structure analogous to [`IndexedDatabase`] - instaed of @@ -90,17 +95,18 @@ pub struct FeatureMap { } pub fn build_feature_map( - settings: LfqSettings, + settings: &LfqSettings, precursor_charge: (u8, u8), - features: &[Feature], + features: &[impl QuantifiableFeature], + db: &impl CompositionDatabase, ) -> FeatureMap { let map: DashMap = DashMap::default(); features .iter() - .filter(|feat| feat.peptide_q <= 0.01 && feat.label == 1) + .filter(|feat| feat.peptide_q() <= 0.01 && feat.label() == 1) .for_each(|feat| { // `features` is sorted by confidence, so just take the first entry - if !map.contains_key(&feat.peptide_idx) { + if !map.contains_key(&feat.peptide_idx()) { // let mass = if feat.isotope_error > 0.0 || feat.delta_mass >= settings.ppm_tolerance * 3.0 { // feat.expmass - feat.isotope_error // } else { @@ -110,20 +116,24 @@ pub fn build_feature_map( -settings.mobility_pct_tolerance, settings.mobility_pct_tolerance, ) - .bounds(feat.ims); + .bounds(feat.ims()); + let composition = feat.get_composition(db); + let dist = + crate::isotopes::peptide_isotopes(composition.carbon, composition.sulfur); map.insert( - feat.peptide_idx, + feat.peptide_idx(), PrecursorRange { - rt: feat.aligned_rt, - mass_lo: feat.calcmass, + rt: feat.aligned_rt(), + mass_lo: feat.calcmass(), mass_hi: 0.0, - peptide: feat.peptide_idx, - charge: feat.charge, + peptide: feat.peptide_idx(), + charge: feat.charge(), isotope: 0, - file_id: feat.file_id, + file_id: feat.file_id(), mobility_lo, mobility_hi, decoy: false, + isotope_dist: dist, }, ); } @@ -186,7 +196,7 @@ pub fn build_feature_map( ranges, min_rts, bin_size: 16 * 1024, - settings, + settings: settings.clone(), } } @@ -223,7 +233,6 @@ impl FeatureMap { /// Run label-free quantification module pub fn quantify( &self, - db: &IndexedDatabase, spectra: &MS1Spectra, alignments: &[Alignment], ) -> HashMap<(PrecursorId, bool), (Peak, Vec), fnv::FnvBuildHasher> { @@ -246,17 +255,13 @@ impl FeatureMap { }; let mut grid = scores.entry((id, entry.decoy)).or_insert_with(|| { - let p = &db[entry.peptide]; - let composition = p - .sequence - .iter() - .map(|r| composition(*r)) - .sum::(); - let dist = crate::isotopes::peptide_isotopes( - composition.carbon, - composition.sulfur, - ); - Grid::new(entry, RT_TOL, dist, alignments.len(), GRID_SIZE) + Grid::new( + entry, + RT_TOL, + entry.isotope_dist, + alignments.len(), + GRID_SIZE, + ) }); grid.add_entry(rt, entry.isotope, spectrum.file_id, peak.intensity); @@ -276,17 +281,13 @@ impl FeatureMap { }; let mut grid = scores.entry((id, entry.decoy)).or_insert_with(|| { - let p = &db[entry.peptide]; - let composition = p - .sequence - .iter() - .map(|r| composition(*r)) - .sum::(); - let dist = crate::isotopes::peptide_isotopes( - composition.carbon, - composition.sulfur, - ); - Grid::new(entry, RT_TOL, dist, alignments.len(), GRID_SIZE) + Grid::new( + entry, + RT_TOL, + entry.isotope_dist, + alignments.len(), + GRID_SIZE, + ) }); grid.add_entry(rt, entry.isotope, spectrum.file_id, peak.intensity); @@ -697,3 +698,117 @@ impl Query<'_> { }) } } + +pub fn quantify( + lfq_settings: &LfqSettings, + precursor_charge: (u8, u8), + database: &impl CompositionDatabase, + features: &[impl QuantifiableFeature], + ms1_spectra: &MS1Spectra, + alignments: Vec, +) -> FnvHashMap<(PrecursorId, bool), (Peak, Vec)> { + let areas = if lfq_settings.mbr { + log::info!("performing LFQ with MBR"); + build_feature_map(lfq_settings, precursor_charge, features, database) + .quantify(&ms1_spectra, &alignments) + } else { + log::info!("performing LFQ without MBR"); + let mut final_areas = FnvHashMap::default(); + // MS1Spectra, features and aligments are assumed to come from the same files and all be not empty + for (file_id, (local_ms1_spectra, local_features)) in ms1_spectra + .split_by_file() + .zip(split_features_by_file(features)) + .enumerate() + { + let mut file_alignment = alignments[file_id]; + file_alignment.file_id = 0; + let file_alignments = vec![file_alignment]; + let areas = + build_feature_map(lfq_settings, precursor_charge, &local_features, database) + .quantify(&local_ms1_spectra, &file_alignments); + areas.into_iter().for_each(|(entry, (peak, quants))| { + let quant = final_areas + .entry(entry) + .or_insert((peak, vec![-1.0; alignments.len()])); + quant.1[file_id] = quants[0]; + }); + } + final_areas + }; + areas +} + +fn split_features_by_file( + features: &[impl QuantifiableFeature], +) -> impl Iterator> + '_ { + group_indices_by(features, |f| f.file_id()) + .into_iter() + .map(|x| { + x.into_iter() + .map(|i| { + let mut feat = features[i].clone(); + feat.set_file_id(0); + feat + }) + .collect::>() + }) +} + +pub trait CompositionDatabase { + fn get_composition_for_feature(&self, feat: &impl QuantifiableFeature) -> Composition; +} + +impl CompositionDatabase for IndexedDatabase { + fn get_composition_for_feature(&self, feat: &impl QuantifiableFeature) -> Composition { + self[feat.peptide_idx()] + .sequence + .iter() + .map(|r| composition(*r)) + .sum::() + } +} + +pub trait QuantifiableFeature: Clone { + fn file_id(&self) -> usize; + fn peptide_idx(&self) -> PeptideIx; + fn peptide_q(&self) -> f32; + fn aligned_rt(&self) -> f32; + fn charge(&self) -> u8; + fn ims(&self) -> f32; + fn calcmass(&self) -> f32; + fn label(&self) -> i32; + fn set_file_id(&mut self, file_id: usize); + fn get_composition(&self, db: &impl CompositionDatabase) -> Composition { + db.get_composition_for_feature(self) + } +} + +impl QuantifiableFeature for Feature { + fn file_id(&self) -> usize { + self.file_id + } + fn peptide_idx(&self) -> PeptideIx { + self.peptide_idx + } + fn peptide_q(&self) -> f32 { + self.peptide_q + } + fn aligned_rt(&self) -> f32 { + self.aligned_rt + } + fn charge(&self) -> u8 { + self.charge + } + fn ims(&self) -> f32 { + self.ims + } + fn calcmass(&self) -> f32 { + self.calcmass + } + fn label(&self) -> i32 { + self.label + } + fn set_file_id(&mut self, file_id: usize) { + self.file_id = file_id; + } +} diff --git a/crates/sage/src/lib.rs b/crates/sage/src/lib.rs index 8d4e9b84..c4f4ae80 100644 --- a/crates/sage/src/lib.rs +++ b/crates/sage/src/lib.rs @@ -13,3 +13,14 @@ pub mod peptide; pub mod scoring; pub mod spectrum; pub mod tmt; + +fn group_indices_by K, K: Ord>(my_vec: &[X], key_fn: F) -> Vec> { + use itertools::Itertools; + (0..my_vec.len()) + .sorted_by_key(|&i| key_fn(&my_vec[i])) + .into_iter() + .group_by(|&i| key_fn(&my_vec[i])) + .into_iter() + .map(|(_, group)| group.into_iter().collect()) + .collect() +} diff --git a/crates/sage/src/ml/retention_alignment.rs b/crates/sage/src/ml/retention_alignment.rs index a6ddf85d..f8dc8fa9 100644 --- a/crates/sage/src/ml/retention_alignment.rs +++ b/crates/sage/src/ml/retention_alignment.rs @@ -23,14 +23,15 @@ use rayon::prelude::*; type FnvDashMap = DashMap>; -fn max_rt_by_file(features: &[Feature], n_files: usize) -> Vec { +fn max_rt_by_file(features: &[impl AlignableFeature], n_files: usize) -> Vec { let max_rt = (0..n_files) .map(|_| AtomicU32::new(0)) .map(|_| AtomicU32::new(0)) .collect::>(); features.par_iter().for_each(|feat| { - max_rt[feat.file_id].fetch_max(feat.rt.ceil() as u32, std::sync::atomic::Ordering::SeqCst); + max_rt[feat.file_id()] + .fetch_max(feat.rt().ceil() as u32, std::sync::atomic::Ordering::SeqCst); }); max_rt @@ -41,22 +42,27 @@ fn max_rt_by_file(features: &[Feature], n_files: usize) -> Vec { /// Return a map from PeptideIx to a map from File ID to average RT of the parent /// PeptideIX -fn mean_rt_by_file(features: &[Feature]) -> FnvDashMap> { +fn mean_rt_by_file( + features: &[impl AlignableFeature], +) -> FnvDashMap> { let rts: FnvDashMap> = DashMap::default(); features .par_iter() - .filter(|feat| feat.label == 1 && feat.spectrum_q <= 0.01) + .filter(|feat| feat.label() == 1 && feat.spectrum_q() <= 0.01) .for_each(|feat| { - rts.entry(feat.peptide_idx) + rts.entry(feat.peptide_idx()) .or_default() - .entry(feat.file_id) - .and_modify(|f| *f = f.min(feat.rt as f64)) - .or_insert(feat.rt as f64); + .entry(feat.file_id()) + .and_modify(|f| *f = f.min(feat.rt() as f64)) + .or_insert(feat.rt() as f64); }); rts } -fn rt_matrix(features: &[Feature], max_rt: &[f64]) -> (HashMap, Matrix) { +fn rt_matrix( + features: &[impl AlignableFeature], + max_rt: &[f64], +) -> (HashMap, Matrix) { let mean_rt = mean_rt_by_file(features); let (means, mat): (HashMap, Vec<_>) = mean_rt @@ -92,7 +98,7 @@ pub struct Alignment { pub intercept: f32, } -pub fn global_alignment(features: &mut [Feature], n_files: usize) -> Vec { +pub fn global_alignment(features: &mut [impl AlignableFeature], n_files: usize) -> Vec { let max_rt = max_rt_by_file(features, n_files); let (_, rt) = rt_matrix(features, &max_rt); @@ -160,14 +166,69 @@ pub fn global_alignment(features: &mut [Feature], n_files: usize) -> Vec Vec { + let max_rt = max_rt_by_file(features, n_files); + + let alignments = (0..n_files) + .into_par_iter() + .map(|file_id| Alignment { + file_id, + max_rt: max_rt[file_id] as f32, + slope: 1.0, + intercept: 0.0, + }) + .collect::>(); + + log::info!("aligned retention times across {} files", n_files); + + update_feature_aligned_rt(features, &alignments); + + alignments +} + +fn update_feature_aligned_rt(features: &mut [impl AlignableFeature], alignments: &[Alignment]) { features.par_iter_mut().for_each(|feature| { - let a = alignments[feature.file_id]; + let a = alignments[feature.file_id()]; // Calculate aligned RT // - Divide by maximum RT of this run // - Multiply by regression parameters - feature.aligned_rt = (feature.rt / a.max_rt) * a.slope + a.intercept; + let aligned_rt = (feature.rt() / a.max_rt) * a.slope + a.intercept; + feature.set_aligned_rt(aligned_rt); }); +} - alignments +pub trait AlignableFeature: Send + Sync { + fn rt(&self) -> f32; + fn file_id(&self) -> usize; + fn peptide_idx(&self) -> PeptideIx; + fn label(&self) -> i8; + fn spectrum_q(&self) -> f32; + fn set_aligned_rt(&mut self, aligned_rt: f32); +} + +impl AlignableFeature for Feature { + fn rt(&self) -> f32 { + self.rt + } + fn file_id(&self) -> usize { + self.file_id + } + fn peptide_idx(&self) -> PeptideIx { + self.peptide_idx + } + fn label(&self) -> i8 { + self.label as i8 + } + fn spectrum_q(&self) -> f32 { + self.spectrum_q + } + fn set_aligned_rt(&mut self, aligned_rt: f32) { + self.aligned_rt = aligned_rt; + } } diff --git a/crates/sage/src/modification.rs b/crates/sage/src/modification.rs index 5d025ea1..afdef9a4 100644 --- a/crates/sage/src/modification.rs +++ b/crates/sage/src/modification.rs @@ -4,7 +4,7 @@ use std::{ str::FromStr, }; -use serde::{de::Visitor, Deserialize, Serialize}; +use serde::Serialize; use crate::mass::VALID_AA; diff --git a/crates/sage/src/spectrum.rs b/crates/sage/src/spectrum.rs index e5f2849f..5c5d2f25 100644 --- a/crates/sage/src/spectrum.rs +++ b/crates/sage/src/spectrum.rs @@ -1,4 +1,5 @@ use crate::database::binary_search_slice; +use crate::group_indices_by; use crate::mass::{Tolerance, NEUTRON, PROTON}; /// A charge-less peak at monoisotopic mass @@ -161,6 +162,41 @@ impl Default for MS1Spectra { } } +impl MS1Spectra { + pub fn split_by_file(&self) -> impl Iterator + '_ { + let spectrum_indices = match self { + Self::NoMobility(spectra) => group_indices_by(spectra, |i| i.file_id), + Self::WithMobility(spectra) => group_indices_by(spectra, |i| i.file_id), + Self::Empty => vec![vec![]], + }; + spectrum_indices.into_iter().map(move |indices| match self { + Self::NoMobility(all_spectra) => { + let spectra = indices + .into_iter() + .map(|i| { + let mut spec = all_spectra[i].clone(); + spec.file_id = 0; + spec + }) + .collect::>(); + MS1Spectra::NoMobility(spectra) + } + Self::WithMobility(all_spectra) => { + let spectra = indices + .into_iter() + .map(|i| { + let mut spec = all_spectra[i].clone(); + spec.file_id = 0; + spec + }) + .collect::>(); + MS1Spectra::WithMobility(spectra) + } + Self::Empty => Self::Empty, + }) + } +} + /// Binary search followed by linear search to select the most intense peak within `tolerance` window /// * `offset` - this parameter allows for a static adjustment to the lower and upper bounds of the search window. /// Sage subtracts a proton (and assumes z=1) for all experimental peaks, and stores all fragments as monoisotopic @@ -394,7 +430,7 @@ impl SpectrumProcessor { } } - pub fn process_with_mobility(&self, spectrum: RawSpectrum) -> ProcessedSpectrum { + pub fn process_with_mobility(spectrum: RawSpectrum) -> ProcessedSpectrum { assert!( spectrum.ms_level == 1, "Logic error, mobility processing should only be used for MS1"