1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
extern crate getopts;
extern crate fasten;
extern crate regex;
use std::fs::File;
use std::io::BufReader;
use std::io::BufRead;
use std::env;
use regex::Regex;
use fasten::fasten_base_options;
fn main(){
let args: Vec<String> = env::args().collect();
let mut opts = fasten_base_options();
opts.optopt("","min-length","Minimum read length allowed","INT");
opts.optopt("","min-quality","Minimum quality allowed","FLOAT");
opts.optflag("","paired-end","The reads are interleaved paired-end");
opts.optflag("","print-reads","Print the reads as they are being validated (useful for unix pipes)");
opts.optflag("v","verbose","");
let matches = opts.parse(&args[1..]).expect("ERROR: could not parse parameters");
if matches.opt_present("help") {
println!("Validates your reads and makes you feel good about yourself!\n{}", opts.usage(&opts.short_usage(&args[0])));
std::process::exit(0);
}
let my_file = File::open("/dev/stdin").expect("Could not open file");
let my_buffer=BufReader::new(my_file);
let lines_per_read={
if matches.opt_present("paired-end") {
8
}else{
4
}
};
let min_length :usize={
if matches.opt_present("min-length") {
matches.opt_str("min-length")
.expect("ERROR parsing min-length")
.parse()
.expect("ERROR parsing min-length as INT")
} else {
0
}
};
let min_qual :f32={
if matches.opt_present("min-quality") {
matches.opt_str("min-quality")
.expect("ERROR parsing min-quality")
.parse()
.expect("ERROR parsing min-quality as FLOAT")
} else {
0.0
}
};
let should_print=matches.opt_present("print-reads");
let seq_regex = Regex::new(r"[^a-zA-Z]").expect("malformed seq regex");
let qual_regex= Regex::new(r"\s").expect("malformed qual regex");
let mut i = 0;
for line in my_buffer.lines() {
let line=line.expect("ERROR: did not get a line");
if should_print {
println!("{}",line);
}
match i%4{
0=>{
if line.chars().nth(0).unwrap() != '@' {
panic!("ERROR: first character of the identifier is not @ in the line {}. Contents are:\n {}",i,line);
}
}
1=>{
if seq_regex.is_match(&line) {
panic!("ERROR: there are characters that are not in the alphabet in line {}. Contents are:\n {}",i,line);
}
if min_length > 0 && line.len() > min_length {
panic!("ERROR: sequence at line {} is less than the minimum sequence length",i);
}
}
2=>{
if line.chars().nth(0).unwrap() != '+' {
panic!("ERROR: first character of the qual identifier is not + in the line {}. Contents are:\n {}",i,line);
}
}
3=>{
if qual_regex.is_match(&line) {
for cap in qual_regex.captures_iter(&line) {
eprintln!("Illegal qual character found: {}", &cap[0]);
}
panic!("ERROR: there are characters that are not qual characters in line {}. Contents are:\n {}",i,line);
}
if min_qual > 0.0 {
let mut qual_total :usize = 0;
for q in line.chars() {
qual_total += q as usize;
}
let avg_qual :f32 = qual_total as f32 / line.len() as f32 - 33.0;
if avg_qual < min_qual {
panic!("ERROR: quality is less than min qual in line {}.\n Avg qual is {}.\n Min qual is {}\n Contents are:\n {}",i,avg_qual,min_qual,line);
}
}
}
_=>{
panic!("INTERNAL ERROR");
}
}
i += 1;
}
if i % lines_per_read > 0{
panic!("ERROR: incomplete fastq entry. Num lines: {}",i);
}
if matches.opt_present("verbose") {
fasten::logmsg("These reads have been validated!");
}
}