Skip to content

Commit

Permalink
Merge pull request #4 from colindaven/dev
Browse files Browse the repository at this point in the history
add argparse, improve debug, add trim
  • Loading branch information
colindaven authored May 31, 2021
2 parents e86867c + 37cf38c commit 4986dc4
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 17 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@ edition = "2018"

[dependencies]
bio = "*"
argparse = "0.2.2"

16 changes: 16 additions & 0 deletions run_big_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash
# Colin Davenport, April 2021

oligo="ATCCA"
testfile="/ngsssd1/rcug/wochenende_test/COPD/COPD_D_MG000060_2020_R1.fastq"

echo "running command for oligo $oligo"
cargo build --release
cat $testfile | target/release/rs_demultiplex --remove --barcode $oligo > $oligo.txt

echo "produced this output - head"
head -n 12 $oligo.txt

echo "removing file"
#rm $oligo.txt

4 changes: 3 additions & 1 deletion run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

oligo="ATC"
echo "running command for oligo $oligo"
cargo run -q && cat test.fastq | target/debug/rs_demultiplex $oligo > $oligo.txt
cargo build
#cargo run -q && cat test.fastq | target/debug/rs_demultiplex --remove --barcode $oligo > $oligo.txt
cat test.fastq | target/debug/rs_demultiplex --remove --barcode $oligo > $oligo.txt

echo "produced this output - head"
head -n 12 $oligo.txt
Expand Down
116 changes: 100 additions & 16 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,33 @@
//Usage: cat Undetermined_S0_R1.fastq | target/release/rs_demultiplex AACTCCGC > AACTCCGC.txt

extern crate bio;
extern crate argparse;

use argparse::{ArgumentParser, Store, StoreTrue};
use std::env;
use std::str;
use std::fs::File;
//use std::io::prelude::*;
//use std::path::Path;
use std::collections::HashMap;
use std::io;
use std::io::Write;
use bio::io::fastq;
use bio::io::fastq::FastqRead;

// ## Changelog
//0.10 - now works for variable length oligos, was previously just 8bp oligos

// ## Changelog
// 0.14 - added bigger test script and debug parameters
// 0.13 - barcode removal
// 0.12 - add arg parsing
// 0.11 - add arg parsing basis
// 0.10 - now works for variable length oligos, was previously just 8bp oligos

fn version() -> String {
let version: String = str::to_string("0.13");
eprintln!("rs-demultiplex version: {}. Reads input from std in, eg cat x.fastq | rs_demultiplex --barcode AGAG --remove > barcode.fastq ", &version);
version
}


fn read_barcodes () -> Vec<String> {
Expand Down Expand Up @@ -47,12 +61,14 @@ fn build_file_map(barcodes: &[String]) -> HashMap<String, File> {
files
}

fn check_args ((local_args: &Vec<String>, version: &str), -> bool) {
/**
fn check_args (local_args: &Vec<String>, version: &str, -> bool) {
let mut args_ok: bool = false;
if local_args.len() < 2 {
eprintln!("Version: {}. Usage: supply an input oligo, and remember to pipe in data. eg cat in.fastq | rs_demultiplex AGAGAGAG > AGAGAGAG.fastq", version);
&args_ok = false;
//return;
//break/exit etc
}
else if local_args.len() >= 2 {
Expand All @@ -62,22 +78,63 @@ fn check_args ((local_args: &Vec<String>, version: &str), -> bool) {
}
return args_ok
}
*/


fn main() {


let version = "0.10";
//Warning - if you set debug to true, errors and warnings will be sent to stderr (previously stdout). Check FASTQ is conform!
let debug: bool = false;
version();


////////////////
// Parse input arguments

//let mut input_file = "test.fastq".to_string();
let mut input_oligo = "ATAT".to_string();
let mut remove_oligo = false;

{ // this block limits scope of borrows by parser.refer() method
let mut parser = ArgumentParser::new();

parser.refer(&mut input_oligo)
.add_option(&["-b", "--barcode"], Store,
"Oligo barcode eg AGGATTCC to search for. Any length.");

parser.refer(&mut remove_oligo)
.add_option(&["-r", "--remove"], StoreTrue,
"Set this to remove barcode sequence and quality from FASTQ file. Default: off");
/*
parser.refer(&mut input_file)
.add_option(&["-f", "--input_file"], Store,
"Input file FASTQ (not functional, better to just cat the file in!");
*/
parser.parse_args_or_exit();
}
//let input_csv: String = str::to_string(&input_file);
let input_barcode: String = str::to_string(&input_oligo);
let remove_oligo = remove_oligo;

if debug {
if remove_oligo {
eprintln!("Remove parameter has been set");
}
}

// Args
/*
let args: Vec<String> = env::args().collect();
let mut args_ok: bool = false;
args_ok = check_args(&args, &version);
if !args_ok {
eprintln("Args not ok, exiting");
return
}
*/

let barcode_from_args = &args[1];
let barcode_from_args = input_barcode;
//let barcodes_vector: Vec<String> = read_barcodes();

/*
Expand All @@ -102,15 +159,13 @@ fn main() {
let mut counts_vector: [i32; 30] = [0; 30];
//let file_map = build_file_map(&barcodes_vector);

//Warning - if you set debug to true, errors and warnings will be sent to stdout and fastq will not be format conform
let debug: bool = false;

while let Ok(()) = reader.read(&mut record) {

if record.is_empty() {
let _check = record.check();
if debug {
println!("Warning: Record empty {} ", record);
eprintln!("Warning: Record empty {} ", record);
}

break;
Expand All @@ -124,34 +179,63 @@ fn main() {

let barcode_from_args_length = barcode_from_args.len();
let sequence_oligo = &sequence_text[0..barcode_from_args_length];
//println!("barcode args {}, sequence_oligo {} ", &barcode_from_args, sequence_oligo);
//eprintln!("barcode args {}, sequence_oligo {} ", &barcode_from_args, sequence_oligo);

if sequence_oligo == barcode_from_args {
if debug {
println!("Hit ! Barcode {}, seq_oligo from read {} ", &barcode_from_args, sequence_oligo);
eprintln!("Hit ! Barcode {}, seq_oligo from read {} ", &barcode_from_args, sequence_oligo);
}
counts_vector[0] += 1;
//write to stdout
writer.write_record(&record);
if remove_oligo {
// Modify the fastq record seq and qual lines to remove the oligo/barcode
let id = record.id();
let sequence = record.seq();
let qual = record.qual();

// TODO Convert to TextSlice and u8 respectively.
let trim_seq = &sequence[barcode_from_args_length..sequence.len()];
let trim_qual = &qual[barcode_from_args_length..qual.len()];
let record_trimmed = fastq::Record::with_attrs(id, record.desc(), trim_seq, trim_qual);

if debug {
let mut sequence_text = str::from_utf8(sequence).unwrap();
let mut qual_text = str::from_utf8(qual).unwrap();
// now modify the oligo text and qual, convert
sequence_text = &sequence_text[barcode_from_args_length..sequence_text.len()];
qual_text = &qual_text[barcode_from_args_length..qual_text.len()];
println!("Hit ! Barcode {}, seq_oligo from read {}, seq text oligo removed {}", &barcode_from_args, sequence_oligo, sequence_text);
println!("Hit ! Barcode {}, seq_oligo from read {}, qual text oligo removed {}", &barcode_from_args, sequence_oligo, qual_text);
println!("sequence length: {}, qual length {}", sequence.len(), qual.len());
println!("trim_seq length: {}, trim_qual length {}", trim_seq.len(), trim_qual.len());
}
// Do not create a new record object - performance. Just write. write_fmt failed though.
//write_fmt!(writer, "@{}\n{}\n+\n{}\n", id, trim_seq, trim_qual);
writer.write_record(&record_trimmed);

}
else {
//write to stdout without changing
writer.write_record(&record);
}
}

// keep track of the total number of bases
n_bases += record.seq().len();
if n_bases % 1000000 == 0 {
if debug {
println!("Number of bases read: {} ", n_bases);
eprintln!("Number of bases read: {} ", n_bases);
}
}

line_counter += 1;
//println!("Line count:{}", line_counter);
//eprintln!("Line count:{}", line_counter);


}

if debug {
println!("Counts {}", counts_vector[0]);
println!("There are {} bases in your file.", n_bases);
eprintln!("Counts {}", counts_vector[0]);
eprintln!("There are {} bases in your file.", n_bases);
}

}

0 comments on commit 4986dc4

Please sign in to comment.