1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
//! Perform random operations on fastq files, using unix streaming.
//! Secure your analysis with Fasten!
//! # Synopsis
//! ## read metrics
//! ```text
//!
//! $ cat testdata/R1.fastq testdata/R2.fastq | \
//! fasten_shuffle | fasten_metrics | column -t
//! totalLength numReads avgReadLength avgQual
//! 800 8 100 19.53875
//! ```
//!
//! ## read cleaning
//!
//! ```text
//! $ cat testdata/R1.fastq testdata/R2.fastq | \
//! fasten_shuffle | \
//! fasten_clean --paired-end --min-length 2 | \
//! gzip -c > cleaned.shuffled.fastq.gz
//!
//! $ zcat cleaned.shuffled.fastq.gz | fasten_metrics | column -t
//! totalLength numReads avgReadLength avgQual
//! 800 8 100 19.53875
//! ```
//! _NOTE_: No reads were actually filtered with cleaning, with --min-length=2
//!
//! ## Kmer counting
//! ```text
//! $ cat testdata/R1.fastq | \
//! fasten_kmer -k 21 > 21mers.tsv
//! ```
//!
//! ## Read sampling
//! ```text
//! $ cat testdata/R1.fastq testdata/R2.fastq | \
//! fasten_shuffle | \
//! fasten_sample --paired-end --frequency 0.1 > 10percent.fastq
//! ```
//!
//! # Advanced
//! ## Set of downsampled reads
//! Create a set of downsampled reads for a titration experiment
//! and clean them
//! ```text
//! for frequency in 0.1 0.2 0.3 0.4 0.5; do
//! cat testdata/R1.fastq testdata/R2.fastq | \
//! fasten_shuffle | \
//! fasten_clean --paired-end --min-length 50 --min-trim-quality 25
//! fasten_sample --paired-end --frequency $frequency > cleaned.$frequency.fastq
//! done
//! ```
//!
//! ## Validate a whole directory of fastq reads
//! ```text
//! \ls *_1.fastq.gz | xargs -n 1 -P 4 bash -c '
//! echo -n "." >&2 # progress bar
//! R1=$0
//! R2=${0/_1.fastq.gz/_2.fastq.gz}
//! zcat $R1 $R2 | fasten_shuffle | fasten_validate --paired-end
//! '
//! ```
extern crate regex;
extern crate statistical;
extern crate getopts;
use std::env;
use std::path::Path;
use getopts::Options;
/// input/output methods
pub mod io;
/// Have some strings that can be printed which could be
/// used to propagate errors between piped scripts.
/// Invalid fastq ID (no @)
static INVALID_ID :&'static str= "invalid_id";
/// Invalid sequence (underscore)
static INVALID_SEQ :&'static str= "invalid_seq";
/// Invalid plus line (no +)
static INVALID_PLUS:&'static str= "invalid_plus";
/// Invalid qual line (~ is chr 126 when the normal max number is 40)
static INVALID_QUAL:&'static str= "invalid_qual";
/// Propagate an error by printing invalid read(s)
pub fn eexit() -> () {
println!("{}\n{}\n{}\n{}",INVALID_ID,INVALID_SEQ,INVALID_PLUS,INVALID_QUAL);
std::process::exit(1);
}
/// Rewrite print!() so that it doesn't panic on broken
/// pipe.
#[macro_export]
macro_rules! print (
// The extra scope is necessary so we don't leak imports
($($arg:tt)*) => ({
// The `write!()` macro is written so it can use `std::io::Write`
// or `std::fmt::Write`, this import sets which to use
use std::io::{self, Write};
if let Err(_) = write!(io::stdout(), $($arg)*) {
// Optionally write the error to stderr
::std::process::exit(0);
}
})
);
/// a function that reads an options object and adds fasten default options.
pub fn fasten_base_options() -> Options{
let mut opts = Options::new();
opts.optflag("h", "help", "Print this help menu.");
opts.optopt("n","numcpus","Number of CPUs (default: 1)","INT");
opts.optflag("p","paired-end","The input reads are interleaved paired-end");
opts.optflag("v","verbose","Print more status messages");
return opts;
}
/// Print a formatted message to stderr
pub fn logmsg<S: AsRef<str>>(stringlike: S) {
let args: Vec<_> = env::args().collect();
// is there a better way to get the basename of the program???
let program = Path::file_name(Path::new(&args[0])).unwrap().to_str().unwrap();
let str_ref = stringlike.as_ref();
eprintln!("{}: {}", &program, str_ref);
}
