diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 445fa2c..07c30b7 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -29,5 +29,6 @@ jobs: -o foo \ --annotate-matches \ --write-pin \ + --disable-telemetry-i-dont-want-to-improve-sage \ tests/config-cli.json \ tests/*.mzML diff --git a/CHANGELOG.md b/CHANGELOG.md index 834897c..2e425b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added - Support for semi-enzymatic digests (`database.enzyme.semi_enzymatic` parameter) +- Ability to directly export matched fragment ions (e.g. for spectral library or rescoring) with the `--annotate-matches` CLI option. This is compatible with the `--parquet` CLI option as well. Annotations will be written to `matched_fragments.sage.tsv` or `matched_fragments.sage.parquet` +- Sage sends basic telemetry data (version of Sage, run time, OS, # of CPU cores, # of peptides in database, whether LFQ is used) to a remote server. No information about your actual data is sent - e.g. identifications, quantities, organism, or modifications are NOT tracked or reported. This data will be used to help focus efforts on improving Sage and figuring which features are most used. Please take a look at `crates/sage-cli/src/telemetry.rs` to see exactly what is sent! You can disable sending telemetry data by using the `--disable-telemetry-i-dont-want-to-improve-sage` CLI flag. +### Changed +- Modified visibility on some crate internals to support the [sagepy project](https://github.com/theGreatHerrLebert/sagepy) +- Added `psm_id` field to various output files to match the new `--annotate-matches` option. +### Removed +- Removed the `ms1_intensity` field from CSV output, since it is essentially useless + ## [v0.14.4] ### Added diff --git a/Cargo.lock b/Cargo.lock index 5859a1c..a463e60 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -222,7 +222,7 @@ dependencies = [ "hex", "http", "hyper", - "ring", + "ring 0.16.20", "time", "tokio", "tower", @@ -446,7 +446,7 @@ dependencies = [ "http", "http-body", "hyper", - "hyper-rustls", + "hyper-rustls 0.23.2", "lazy_static", "pin-project-lite", "serde", @@ -943,6 +943,15 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" +[[package]] +name = "encoding_rs" +version = "0.8.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1" +dependencies = [ + "cfg-if", +] + [[package]] name = "env_logger" version = "0.8.4" @@ -1286,10 +1295,24 @@ dependencies = [ "http", "hyper", "log", - "rustls", + "rustls 0.20.8", "rustls-native-certs", "tokio", - "tokio-rustls", + "tokio-rustls 0.23.4", +] + +[[package]] +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http", + "hyper", + "rustls 0.21.9", + "tokio", + "tokio-rustls 0.24.1", ] [[package]] @@ -1350,6 +1373,12 @@ version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" +[[package]] +name = "ipnet" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" + [[package]] name = "itertools" version = "0.10.5" @@ -1555,6 +1584,12 @@ dependencies = [ "autocfg", ] +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + [[package]] name = "miniz_oxide" version = "0.5.4" @@ -1576,6 +1611,15 @@ dependencies = [ "windows-sys 0.42.0", ] +[[package]] +name = "ntapi" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" +dependencies = [ + "winapi", +] + [[package]] name = "num" version = "0.4.1" @@ -1993,6 +2037,45 @@ version = "0.6.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" +[[package]] +name = "reqwest" +version = "0.11.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cde824a14b7c14f85caff81225f411faacc04a2013f41670f41443742b1c1c55" +dependencies = [ + "base64 0.21.0", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "hyper", + "hyper-rustls 0.24.2", + "ipnet", + "js-sys", + "log", + "mime", + "once_cell", + "percent-encoding", + "pin-project-lite", + "rustls 0.21.9", + "rustls-pemfile", + "serde", + "serde_json", + "serde_urlencoded", + "tokio", + "tokio-rustls 0.24.1", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "webpki-roots", + "winreg", +] + [[package]] name = "ring" version = "0.16.20" @@ -2002,12 +2085,26 @@ dependencies = [ "cc", "libc", "once_cell", - "spin", - "untrusted", + "spin 0.5.2", + "untrusted 0.7.1", "web-sys", "winapi", ] +[[package]] +name = "ring" +version = "0.17.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9babe80d5c16becf6594aa32ad2be8fe08498e7ae60b77de8df700e67f191d7e" +dependencies = [ + "cc", + "getrandom", + "libc", + "spin 0.9.8", + "untrusted 0.9.0", + "windows-sys 0.48.0", +] + [[package]] name = "roxmltree" version = "0.14.1" @@ -2047,11 +2144,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fff78fc74d175294f4e83b28343315ffcfb114b156f0185e9741cb5570f50e2f" dependencies = [ "log", - "ring", + "ring 0.16.20", "sct", "webpki", ] +[[package]] +name = "rustls" +version = "0.21.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "629648aced5775d558af50b2b4c7b02983a04b312126d45eeead26e7caa498b9" +dependencies = [ + "log", + "ring 0.17.3", + "rustls-webpki", + "sct", +] + [[package]] name = "rustls-native-certs" version = "0.6.2" @@ -2073,6 +2182,16 @@ dependencies = [ "base64 0.21.0", ] +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring 0.17.3", + "untrusted 0.9.0", +] + [[package]] name = "ryu" version = "1.0.11" @@ -2098,6 +2217,7 @@ dependencies = [ "sage-core", "serde", "serde_json", + "sysinfo", ] [[package]] @@ -2116,6 +2236,7 @@ dependencies = [ "parquet 44.0.0", "quick-xml", "rayon", + "reqwest", "sage-core", "serde", "serde_json", @@ -2161,8 +2282,8 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4" dependencies = [ - "ring", - "untrusted", + "ring 0.16.20", + "untrusted 0.7.1", ] [[package]] @@ -2231,6 +2352,18 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa 1.0.4", + "ryu", + "serde", +] + [[package]] name = "sha1" version = "0.10.5" @@ -2290,6 +2423,12 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + [[package]] name = "static_assertions" version = "1.1.0" @@ -2319,6 +2458,21 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sysinfo" +version = "0.29.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd727fc423c2060f6c92d9534cef765c65a6ed3f428a03d7def74a8c4348e666" +dependencies = [ + "cfg-if", + "core-foundation-sys", + "libc", + "ntapi", + "once_cell", + "rayon", + "winapi", +] + [[package]] name = "termcolor" version = "1.1.3" @@ -2461,11 +2615,21 @@ version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" dependencies = [ - "rustls", + "rustls 0.20.8", "tokio", "webpki", ] +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls 0.21.9", + "tokio", +] + [[package]] name = "tokio-stream" version = "0.1.11" @@ -2616,6 +2780,12 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + [[package]] name = "url" version = "2.3.1" @@ -2692,6 +2862,18 @@ dependencies = [ "wasm-bindgen-shared", ] +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23639446165ca5a5de86ae1d8896b737ae80319560fbaa4c2887b7da6e7ebd7d" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "wasm-bindgen-macro" version = "0.2.83" @@ -2737,8 +2919,17 @@ version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f095d78192e208183081cc07bc5515ef55216397af48b873e5edcd72637fa1bd" dependencies = [ - "ring", - "untrusted", + "ring 0.16.20", + "untrusted 0.7.1", +] + +[[package]] +name = "webpki-roots" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c71e40d7d2c34a5106301fb632274ca37242cd0c9d3e64dbece371a40a2d87" +dependencies = [ + "webpki", ] [[package]] @@ -2818,6 +3009,15 @@ dependencies = [ "windows-targets 0.42.1", ] +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + [[package]] name = "windows-targets" version = "0.42.1" @@ -2962,6 +3162,15 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" +[[package]] +name = "winreg" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d" +dependencies = [ + "winapi", +] + [[package]] name = "xmlparser" version = "0.13.5" diff --git a/crates/sage-cli/Cargo.toml b/crates/sage-cli/Cargo.toml index ec9ac45..26b0aa3 100644 --- a/crates/sage-cli/Cargo.toml +++ b/crates/sage-cli/Cargo.toml @@ -30,4 +30,5 @@ rayon = "1.5" regex = "1.0" ryu = "1.0" serde = { version="1.0", features = ["derive"] } -serde_json = "1.0" \ No newline at end of file +serde_json = "1.0" +sysinfo = "0.29" \ No newline at end of file diff --git a/crates/sage-cli/src/main.rs b/crates/sage-cli/src/main.rs index add4c5c..cb7ee14 100644 --- a/crates/sage-cli/src/main.rs +++ b/crates/sage-cli/src/main.rs @@ -14,6 +14,7 @@ use std::time::Instant; mod input; mod output; +mod telemetry; struct Runner { database: IndexedDatabase, @@ -238,7 +239,7 @@ impl Runner { .collect::() } - pub fn run(mut self, parallel: usize, parquet: bool) -> anyhow::Result<()> { + pub fn run(mut self, parallel: usize, parquet: bool) -> anyhow::Result { let scorer = Scorer { db: &self.database, precursor_tol: self.parameters.precursor_tol, @@ -393,7 +394,15 @@ impl Runner { info!("finished in {}s", run_time); info!("cite: \"Sage: An Open-Source Tool for Fast Proteomics Searching and Quantification at Scale\" https://doi.org/10.1021/acs.jproteome.3c00486"); - Ok(()) + let telemetry = telemetry::Telemetry::new( + self.parameters, + self.database.peptides.len(), + self.database.fragments.len(), + parquet, + run_time, + ); + + Ok(telemetry) } } @@ -471,6 +480,12 @@ fn main() -> anyhow::Result<()> { .action(clap::ArgAction::SetTrue) .help("Write percolator-compatible `.pin` output files"), ) + .arg( + Arg::new("disable-telemetry") + .long("disable-telemetry-i-dont-want-to-improve-sage") + .action(clap::ArgAction::SetFalse) + .help("Disable sending telemetry data"), + ) .help_template( "{usage-heading} {usage}\n\n\ {about-with-newline}\n\ @@ -485,12 +500,20 @@ fn main() -> anyhow::Result<()> { .unwrap_or_else(|| num_cpus::get() as u16 / 2) as usize; let parquet = matches.get_one::("parquet").copied().unwrap_or(false); + let send_telemetry = matches + .get_one::("disable-telemetry") + .copied() + .unwrap_or(true); let input = Input::from_arguments(matches)?; let runner = input.build().and_then(Runner::new)?; - runner.run(parallel, parquet)?; + let tel = runner.run(parallel, parquet)?; + + if send_telemetry { + tel.send(); + } Ok(()) } diff --git a/crates/sage-cli/src/telemetry.rs b/crates/sage-cli/src/telemetry.rs new file mode 100644 index 0000000..7f901da --- /dev/null +++ b/crates/sage-cli/src/telemetry.rs @@ -0,0 +1,77 @@ +//! Send a minimal telemetry report + +use sage_core::tmt::Isobaric; +use serde::Serialize; +use sysinfo::{System, SystemExt}; + +#[derive(Debug, Serialize)] +pub struct Telemetry { + // Which version of Sage? + version: String, + // How many peptides are in the fragment index? + peptides: usize, + // How many fragments are in the index? + fragments: usize, + // How many files are being processed? + files: usize, + // How long did analysis take? + runtime_secs: u64, + + // Is LFQ used? + lfq: bool, + // Which kind of TMT tags are used, if any? + tmt: Option, + // Are results written in parquet format? + parquet: bool, + + // Details about the operating system and computer: + // - Which OS? + // - Total memory available + // - Number of CPU cores + os_name: String, + total_memory: u64, + cpus: usize, +} + +impl Telemetry { + pub fn new( + settings: crate::input::Search, + peptides: usize, + fragments: usize, + parquet: bool, + runtime_secs: u64, + ) -> Telemetry { + let mut system = System::default(); + system.refresh_all(); + + Telemetry { + version: settings.version, + peptides, + fragments, + files: settings.mzml_paths.len(), + runtime_secs, + lfq: settings.quant.lfq, + tmt: settings.quant.tmt, + parquet, + os_name: system.long_os_version().unwrap_or_default(), + total_memory: system.total_memory(), + cpus: num_cpus::get(), + } + } + + pub fn send(self) { + log::trace!("sending telemetry..."); + // doesn't matter if it fails + match sage_cloudpath::util::send_data( + "https://pax3h44gubc6o5ci23knddnw2i0qnuaz.lambda-url.us-west-2.on.aws/", + &self, + ) { + Ok(_) => { + log::trace!("telemetry data sent successfully!") + } + Err(e) => { + log::trace!("error while sending telemetry: {}", e) + } + } + } +} diff --git a/crates/sage-cloudpath/Cargo.toml b/crates/sage-cloudpath/Cargo.toml index b275e79..a6a2f23 100644 --- a/crates/sage-cloudpath/Cargo.toml +++ b/crates/sage-cloudpath/Cargo.toml @@ -25,6 +25,7 @@ tokio = { version = "1.0", features = ["fs", "io-util", "rt", "macros"] } quick-xml = { version = "0.30.0", features = ["async-tokio"] } timsrust = "0.2.0" rayon = "1.5" +reqwest = { version = "0.11", features = ["json", "rustls-tls"], default-features = false } serde = { version="1.0", features = ["derive"] } serde_json = "1.0" diff --git a/crates/sage-cloudpath/src/util.rs b/crates/sage-cloudpath/src/util.rs index 1d6eb2f..e7ff209 100644 --- a/crates/sage-cloudpath/src/util.rs +++ b/crates/sage-cloudpath/src/util.rs @@ -1,5 +1,6 @@ use crate::{read_and_execute, Error}; use sage_core::spectrum::RawSpectrum; +use serde::Serialize; use tokio::io::AsyncReadExt; pub fn read_mzml>( @@ -55,3 +56,20 @@ where Ok(serde_json::from_str(&contents)?) }) } + +/// Send telemetry data +pub fn send_data(url: &str, data: &T) -> Result<(), Box> +where + T: Serialize, +{ + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build()?; + + rt.block_on(async { + let client = reqwest::ClientBuilder::default().https_only(true).build()?; + let res = client.post(url).json(data).send().await?; + res.error_for_status()?; + Ok(()) + }) +}