Skip to main content

Quickstart: Rust

Add the umbrella crate to your Cargo.toml:

[dependencies]
openproteo-io = { path = "../OpenProteo/crates/openproteo-io", features = ["all"] }
openproteo-core = { path = "../OpenProteoCore" }

The all feature pulls in every vendor. To trim binary size you can opt in to one vendor at a time: features = ["thermo"], features = ["bruker"], features = ["waters"].

Convert a file to mzML

use std::path::Path;

fn main() -> Result<(), Box<dyn std::error::Error>> {
let input = Path::new("sample.raw");
let output = Path::new("sample.mzML");

let detected = openproteo_io::detect_format(input)
.ok_or("not a recognized vendor format")?;
openproteo_io::convert_to_mzml(detected, output, /* indexed = */ true)?;
Ok(())
}

Iterate spectra without writing mzML

use openproteo_core::SpectrumSource;

fn main() -> Result<(), Box<dyn std::error::Error>> {
let detected = openproteo_io::detect_format(std::path::Path::new("sample.raw"))
.ok_or("not a vendor format")?;
let (records, meta) = openproteo_io::collect(detected)?;
println!("{} spectra from {}", records.len(), meta.instrument.name);
for s in records.iter().take(5) {
println!(
"idx={} ms={} rt={:.2}s peaks={}",
s.index, s.ms_level, s.retention_time_sec, s.mz.len()
);
}
Ok(())
}

For long runs you usually want the streaming variant. Open the vendor source directly and drive iter_spectra yourself - this is what convert_to_mzml does internally:

use openproteo_core::SpectrumSource;

let mut src = opentimstdf::mzml::TdfSource::open("sample.d")?;
for s in src.iter_spectra() {
// process one spectrum at a time
}
# Ok::<(), Box<dyn std::error::Error>>(())

Validate

use openproteo_core::conformance::assert_iter_invariants;

let detected = openproteo_io::detect_format(std::path::Path::new("sample.raw")).unwrap();
let (records, _) = openproteo_io::collect(detected)?;
let n = assert_iter_invariants(records.iter().cloned())?;
println!("conformance ok: {n} spectra");
# Ok::<(), Box<dyn std::error::Error>>(())

Arrow

Enable the arrow feature on openproteo-core and build a record batch directly:

use openproteo_core::arrow::SpectrumBatchBuilder;
use openproteo_core::SpectrumSource;

let mut src = opentimstdf::mzml::TdfSource::open("sample.d")?;
let mut b = SpectrumBatchBuilder::new(None);
for s in src.iter_spectra() {
b.push(&s);
}
let batch = b.finish()?;
println!("{} rows x {} cols", batch.num_rows(), batch.num_columns());
# Ok::<(), Box<dyn std::error::Error>>(())