comparison sum_fastqc-b769c810924e/sum_fastqc.pl @ 0:17652c6a7517 draft

Uploaded
author estrain
date Wed, 17 Oct 2018 11:09:06 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:17652c6a7517
1 #!/usr/bin/perl
2
3 ####################################################
4 ##
5 ## sum_fastqc.pl
6 ##
7 ## Errol Strain (estrain@gmail.com)
8 ##
9 ## Description: Takes raw FASTQC output and produces
10 ## simple table summary
11 ##
12 ####################################################
13
14 my($inname)=shift(@ARGV);
15 my($qscore)=shift(@ARGV);
16 $qscore=~s/\s+//g;
17 my(@qlist)=split(/\,/,$qscore);
18
19 print "Input\tFile\tFastQC\tPass-Fail\tReads\tPoor_Reads\tGC";
20 foreach(@qlist) {
21 print "\tQ".$_;
22 }
23 print "\n";
24
25 foreach (@ARGV) {
26 print_stats($_);
27 }
28
29 sub print_stats {
30 $infile = shift;
31 # First 10 lines of raw FASTQC contain basic overview
32 @sumlines=`head -n 10 $infile`;
33 chomp(@sumlines);
34
35 # Sequence level Q scores are buried in the middle of the file
36 @qlines=`awk '/#Quality\tCount/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`;
37 chomp(@qlines);
38
39 @fastqc = split(/[\n\t]/,shift(@sumlines));
40 @pass = split(/\t/,shift(@sumlines));
41 shift(@sumlines);
42 @fn = split(/\t/,shift(@sumlines));
43 shift(@sumlines);
44 shift(@sumlines);
45 @nreads = split(/\t/,shift(@sumlines));
46 @npoor = split(/\t/,shift(@sumlines));
47 shift(@sumlines);
48 @gc = split(/\t/,shift(@sumlines));
49
50 print $inname."\t";
51 print $fn[1]."\t";
52 print $fastqc[1]."\t";
53 print $pass[1]."\t";
54 print $nreads[1]."\t";
55 print $npoor[1]."\t";
56 print $gc[1];
57 foreach $qs (@qlist) {
58 print "\t";
59 print qcal($nreads[1],$qs,\@qlines);
60 }
61 print "\n";
62 }
63
64 # Sum reads w/ Q scores > cutoff and divide by number of reads
65 sub qcal {
66 $nreads=shift(@_);
67 $cutoff=shift(@_);
68 @qarray=@{$_[0]};
69 $sum = 0;
70
71 foreach $item (@qarray) {
72 my($qval,$q)=split(/\t/,$item);
73 if($qval>=$cutoff) {
74 $sum += $q;
75 }
76 }
77 $qmean = sprintf("%.2f", 100 * $sum / $nreads);
78 return $qmean;
79 }