3
|
1 #!/usr/bin/perl
|
|
2
|
|
3 ####################################################
|
|
4 ##
|
|
5 ## sum_fastqc.pl
|
|
6 ##
|
|
7 ## Errol Strain (estrain@gmail.com)
|
|
8 ##
|
|
9 ## Description: Takes raw FASTQC output and produces
|
|
10 ## simple table summary
|
|
11 ##
|
|
12 ####################################################
|
|
13
|
|
14 my($inname)=shift(@ARGV);
|
|
15 my($qscore)=shift(@ARGV);
|
|
16 $qscore=~s/\s+//g;
|
|
17 my(@qlist)=split(/\,/,$qscore);
|
|
18
|
4
|
19 print "Input\tFile\tFastQC\tPass-Fail\tReads\tPoor_Reads\tGC\tMeanQ";
|
3
|
20 foreach(@qlist) {
|
|
21 print "\tQ".$_;
|
|
22 }
|
|
23 print "\n";
|
|
24
|
|
25 foreach (@ARGV) {
|
|
26 print_stats($_);
|
|
27 }
|
|
28
|
|
29 sub print_stats {
|
|
30 $infile = shift;
|
|
31 # First 10 lines of raw FASTQC contain basic overview
|
|
32 @sumlines=`head -n 10 $infile`;
|
|
33 chomp(@sumlines);
|
|
34
|
|
35 # Sequence level Q scores are buried in the middle of the file
|
|
36 @qlines=`awk '/#Quality\tCount/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`;
|
|
37 chomp(@qlines);
|
|
38
|
|
39 @fastqc = split(/[\n\t]/,shift(@sumlines));
|
|
40 @pass = split(/\t/,shift(@sumlines));
|
|
41 shift(@sumlines);
|
|
42 @fn = split(/\t/,shift(@sumlines));
|
|
43 shift(@sumlines);
|
|
44 shift(@sumlines);
|
|
45 @nreads = split(/\t/,shift(@sumlines));
|
|
46 @npoor = split(/\t/,shift(@sumlines));
|
|
47 shift(@sumlines);
|
|
48 @gc = split(/\t/,shift(@sumlines));
|
|
49
|
|
50 print $inname."\t";
|
|
51 print $fn[1]."\t";
|
|
52 print $fastqc[1]."\t";
|
|
53 print $pass[1]."\t";
|
|
54 print $nreads[1]."\t";
|
|
55 print $npoor[1]."\t";
|
4
|
56 print $gc[1]."\t";
|
|
57 print readmean($nreads[1],\@qlines);
|
3
|
58 foreach $qs (@qlist) {
|
|
59 print "\t";
|
|
60 print qcal($nreads[1],$qs,\@qlines);
|
|
61 }
|
|
62 print "\n";
|
|
63 }
|
|
64
|
|
65 # Sum reads w/ Q scores > cutoff and divide by number of reads
|
|
66 sub qcal {
|
|
67 $nreads=shift(@_);
|
|
68 $cutoff=shift(@_);
|
|
69 @qarray=@{$_[0]};
|
|
70 $sum = 0;
|
|
71
|
|
72 foreach $item (@qarray) {
|
|
73 my($qval,$q)=split(/\t/,$item);
|
|
74 if($qval>=$cutoff) {
|
|
75 $sum += $q;
|
|
76 }
|
|
77 }
|
|
78 $qmean = sprintf("%.2f", 100 * $sum / $nreads);
|
|
79 return $qmean;
|
|
80 }
|
4
|
81
|
|
82 sub readmean {
|
|
83 $nreads=shift(@_);
|
|
84 @qarray=@{$_[0]};
|
|
85 my($sum) = 0;
|
|
86
|
|
87 foreach $item (@qarray) {
|
|
88 my($qval,$q)=split(/\t/,$item);
|
|
89 $sum += $q*$qval;
|
|
90 }
|
|
91
|
|
92 $readq = sprintf("%.2f", $sum / $nreads);
|
|
93 return $readq;
|
|
94 }
|