Skip to content

Commit

Permalink
MS3-TMT quant
Browse files Browse the repository at this point in the history
  • Loading branch information
lazear committed Sep 3, 2022
1 parent d1e5c17 commit 1364a08
Show file tree
Hide file tree
Showing 8 changed files with 492 additions and 321 deletions.
97 changes: 81 additions & 16 deletions src/bin/sage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,9 @@ fn process_mzml_file<P: AsRef<Path>>(
let spectra = sage::mzml::MzMlReader::read(&p)?;
// let mut scores = sage::mzml::MzMlReader::read_ms2(&p)?
let mut scores = spectra
.par_iter()
.into_par_iter()
.filter(|spec| spec.mz.len() >= search.min_peaks)
.filter_map(|spec| sp.process(spec))
.map(|spec| sp.process(spec))
.flat_map(|spec| scorer.score(&spec, search.report_psms))
.collect::<Vec<_>>();

Expand Down Expand Up @@ -163,28 +163,93 @@ fn process_mzml_file_sps<P: AsRef<Path>>(
panic!("expecting .mzML files as input!")
}

let spectra = sage::mzml::MzMlReader::read(&p)?;
// let mut scores = sage::mzml::MzMlReader::read_ms2(&p)?
// let mut scores = spectra
// .par_iter()
// .filter(|spec| spec.mz.len() >= search.min_peaks)
// .filter_map(|spec| sp.process(spec))
// .flat_map(|spec| scorer.score(&spec, search.report_psms))
// .collect::<Vec<_>>();
let mut bad_scans = 0;
let mut total_scans = 0;
let spectra = sage::mzml::MzMlReader::read(&p)?
.into_par_iter()
.map(|spec| sp.process(spec))
.collect::<Vec<_>>();

let mut path = p.as_ref().to_path_buf();
path.set_extension("quant.csv");

let mut wtr = csv::WriterBuilder::default().from_path(&path)?;
wtr.write_record(&[
"scannr",
// "peptide",
// "sps_purity",
// "correct_precursors",
"tmt_1",
"tmt_2",
"tmt_3",
"tmt_4",
"tmt_5",
"tmt_6",
"tmt_7",
"tmt_8",
"tmt_9",
"tmt_10",
"tmt_11",
])?;

let mut scores = Vec::new();
for spectrum in &spectra {
if spectrum.ms_level == 3 {
let r = scorer.calculate_sps_purity(&sp, &spectra, &spectrum);
total_scans += 1;
if spectrum.level == 3 {
// if let Some(quant) = sage::tmt::quantify_sps(&scorer, &spectra, &spectrum) {
// let mut v = vec![
// spectrum.scan.to_string(),
// quant.hit.peptide.clone(),
// quant.hit_purity.ratio.to_string(),
// quant.hit_purity.correct_precursors.to_string(),
// ];
// v.extend(
// quant
// .intensities
// .iter()
// .map(|peak| peak.map(|p| p.intensity.to_string()).unwrap_or_default()),
// );
// wtr.write_record(v)?;
// // scores.push(quant.hit);

// // if let Some(chimera) = quant.chimera {
// // scores.push(chimera);
// // }
// }
} else if spectrum.level == 2 {
if let Some(hit) = scorer.score(spectrum, search.report_psms).first() {
scores.push(hit.clone());
}
}
}
wtr.flush()?;

eprintln!("{} {}", bad_scans, total_scans);
(&mut scores).par_sort_unstable_by(|a, b| a.poisson.total_cmp(&b.poisson));
let passing_psms = assign_q_values(&mut scores);

let mut path = p.as_ref().to_path_buf();
path.set_extension("sage.pin");

if let Some(mut directory) = search.output_directory.clone() {
directory.push(path.file_name().expect("BUG: should be a filename!"));
path = directory;
}

let mut writer = csv::WriterBuilder::new()
.delimiter(b'\t')
.from_path(&path)?;

let total_psms = scores.len();

for (idx, mut score) in scores.into_iter().enumerate() {
score.specid = idx;
writer.serialize(score)?;
}

info!(
"{:?}: assigned {} PSMs ({} with 1% FDR)",
p.as_ref(),
total_psms,
passing_psms,
);

Ok(path)
}

Expand Down
20 changes: 11 additions & 9 deletions src/database.rs
Original file line number Diff line number Diff line change
Expand Up @@ -251,15 +251,16 @@ impl IndexedDatabase {
///
/// All matches returned by the query will be within the specified tolerance
/// parameters
pub fn query<'d, 'q>(
pub fn query<'d>(
&'d self,
query: &'q ProcessedSpectrum,
// query: &'q ProcessedSpectrum,
precursor_mass: f32,
precursor_tol: Tolerance,
fragment_tol: Tolerance,
min_isotope_err: i8,
max_isotope_err: i8,
) -> IndexedQuery<'d, 'q> {
let (precursor_lo, precursor_hi) = precursor_tol.bounds(query.monoisotopic_mass);
) -> IndexedQuery<'d> {
let (precursor_lo, precursor_hi) = precursor_tol.bounds(precursor_mass);

let (pre_idx_lo, pre_idx_hi) = binary_search_slice(
&self.peptides,
Expand All @@ -270,7 +271,8 @@ impl IndexedDatabase {

IndexedQuery {
db: self,
query,
// query,
precursor_mass,
precursor_tol,
fragment_tol,
min_isotope_err,
Expand All @@ -297,9 +299,9 @@ impl std::ops::Index<PeptideIx> for IndexedDatabase {
}
}

pub struct IndexedQuery<'d, 'q> {
pub struct IndexedQuery<'d> {
db: &'d IndexedDatabase,
query: &'q ProcessedSpectrum,
precursor_mass: f32,
precursor_tol: Tolerance,
fragment_tol: Tolerance,
min_isotope_err: i8,
Expand All @@ -308,11 +310,11 @@ pub struct IndexedQuery<'d, 'q> {
pub pre_idx_hi: usize,
}

impl<'d, 'q> IndexedQuery<'d, 'q> {
impl<'d> IndexedQuery<'d> {
/// Search for a specified `fragment_mz` within the database
pub fn page_search(&self, fragment_mz: f32) -> impl Iterator<Item = &Theoretical> {
let (fragment_lo, fragment_hi) = self.fragment_tol.bounds(fragment_mz);
let (precursor_lo, precursor_hi) = self.precursor_tol.bounds(self.query.monoisotopic_mass);
let (precursor_lo, precursor_hi) = self.precursor_tol.bounds(self.precursor_mass);

// Locate the left and right page indices that contain matching fragments
// Note that we need to multiply by `bucket_size` to transform these into
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ pub mod mzml;
pub mod peptide;
pub mod scoring;
pub mod spectrum;
pub mod tmt;
35 changes: 7 additions & 28 deletions src/mzml.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::spectrum::Precursor;
use quick_xml::events::Event;
use quick_xml::Reader;
use std::io::{BufRead, BufReader};
Expand All @@ -22,19 +23,11 @@ impl std::fmt::Display for MzMlError {

impl std::error::Error for MzMlError {}

#[derive(Default, Debug, Copy, Clone)]
pub struct Precursor {
pub mz: f32,
pub intensity: Option<f32>,
pub charge: Option<u8>,
pub scan: Option<usize>,
}

#[derive(Default, Debug, Clone)]
pub struct Spectrum {
pub ms_level: usize,
pub ms_level: u8,
pub scan_id: usize,
pub precursor: Vec<Precursor>,
pub precursors: Vec<Precursor>,
pub representation: Representation,

// Scan start time
Expand Down Expand Up @@ -103,21 +96,7 @@ const SELECTED_ION_CHARGE: &str = "MS:1000041";

#[derive(Default)]
pub struct MzMlReader {
ms_level: Option<usize>,
}

pub fn find_spectrum_by_id(spectra: &[Spectrum], scan_id: usize) -> Option<&Spectrum> {
// First try indexing by scan
if let Some(first) = spectra.get(scan_id.saturating_sub(1)) {
if first.scan_id == scan_id {
return Some(first);
}
}
// Fall back to binary search
let idx = spectra
.binary_search_by(|spec| spec.scan_id.cmp(&scan_id))
.ok()?;
spectra.get(idx)
ms_level: Option<u8>,
}

impl MzMlReader {
Expand All @@ -126,7 +105,7 @@ impl MzMlReader {
/// # Example
///
/// A minimum level of 2 will not parse or return MS1 scans
pub fn with_level_filter(ms_level: usize) -> Self {
pub fn with_level_filter(ms_level: u8) -> Self {
Self {
ms_level: Some(ms_level),
}
Expand Down Expand Up @@ -242,7 +221,7 @@ impl MzMlReader {
match accession {
MS_LEVEL => {
let level = extract!(ev, b"value");
let level = std::str::from_utf8(&level)?.parse::<usize>()?;
let level = std::str::from_utf8(&level)?.parse::<u8>()?;
if let Some(filter) = self.ms_level {
if level != filter {
spectrum = Spectrum::default();
Expand Down Expand Up @@ -337,7 +316,7 @@ impl MzMlReader {
(Some(State::BinaryDataArray), b"binaryDataArray") => Some(State::Spectrum),
(Some(State::SelectedIon), b"selectedIon") => {
if precursor.mz != 0.0 {
spectrum.precursor.push(precursor);
spectrum.precursors.push(precursor);
precursor = Precursor::default();
}
Some(State::Spectrum)
Expand Down
Loading

0 comments on commit 1364a08

Please sign in to comment.