Mercurial > repos > estrain > sum_fastqc
comparison sum_fastqc-b769c810924e/sum_fastqc.pl @ 0:17652c6a7517 draft
Uploaded
author | estrain |
---|---|
date | Wed, 17 Oct 2018 11:09:06 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:17652c6a7517 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 #################################################### | |
4 ## | |
5 ## sum_fastqc.pl | |
6 ## | |
7 ## Errol Strain (estrain@gmail.com) | |
8 ## | |
9 ## Description: Takes raw FASTQC output and produces | |
10 ## simple table summary | |
11 ## | |
12 #################################################### | |
13 | |
14 my($inname)=shift(@ARGV); | |
15 my($qscore)=shift(@ARGV); | |
16 $qscore=~s/\s+//g; | |
17 my(@qlist)=split(/\,/,$qscore); | |
18 | |
19 print "Input\tFile\tFastQC\tPass-Fail\tReads\tPoor_Reads\tGC"; | |
20 foreach(@qlist) { | |
21 print "\tQ".$_; | |
22 } | |
23 print "\n"; | |
24 | |
25 foreach (@ARGV) { | |
26 print_stats($_); | |
27 } | |
28 | |
29 sub print_stats { | |
30 $infile = shift; | |
31 # First 10 lines of raw FASTQC contain basic overview | |
32 @sumlines=`head -n 10 $infile`; | |
33 chomp(@sumlines); | |
34 | |
35 # Sequence level Q scores are buried in the middle of the file | |
36 @qlines=`awk '/#Quality\tCount/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`; | |
37 chomp(@qlines); | |
38 | |
39 @fastqc = split(/[\n\t]/,shift(@sumlines)); | |
40 @pass = split(/\t/,shift(@sumlines)); | |
41 shift(@sumlines); | |
42 @fn = split(/\t/,shift(@sumlines)); | |
43 shift(@sumlines); | |
44 shift(@sumlines); | |
45 @nreads = split(/\t/,shift(@sumlines)); | |
46 @npoor = split(/\t/,shift(@sumlines)); | |
47 shift(@sumlines); | |
48 @gc = split(/\t/,shift(@sumlines)); | |
49 | |
50 print $inname."\t"; | |
51 print $fn[1]."\t"; | |
52 print $fastqc[1]."\t"; | |
53 print $pass[1]."\t"; | |
54 print $nreads[1]."\t"; | |
55 print $npoor[1]."\t"; | |
56 print $gc[1]; | |
57 foreach $qs (@qlist) { | |
58 print "\t"; | |
59 print qcal($nreads[1],$qs,\@qlines); | |
60 } | |
61 print "\n"; | |
62 } | |
63 | |
64 # Sum reads w/ Q scores > cutoff and divide by number of reads | |
65 sub qcal { | |
66 $nreads=shift(@_); | |
67 $cutoff=shift(@_); | |
68 @qarray=@{$_[0]}; | |
69 $sum = 0; | |
70 | |
71 foreach $item (@qarray) { | |
72 my($qval,$q)=split(/\t/,$item); | |
73 if($qval>=$cutoff) { | |
74 $sum += $q; | |
75 } | |
76 } | |
77 $qmean = sprintf("%.2f", 100 * $sum / $nreads); | |
78 return $qmean; | |
79 } |