Repository 'sum_fastqc'
hg clone https://toolshed.g2.bx.psu.edu/repos/estrain/sum_fastqc

Changeset 3:8256c1d0d63b (2018-10-17)
Previous changeset 2:bc939b04bb12 (2018-10-17) Next changeset 4:d47775122e78 (2018-10-17)
Commit message:
Uploaded
added:
sum_fastqc.pl
b
diff -r bc939b04bb12 -r 8256c1d0d63b sum_fastqc.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sum_fastqc.pl Wed Oct 17 11:10:03 2018 -0400
[
@@ -0,0 +1,79 @@
+#!/usr/bin/perl
+
+####################################################
+## 
+## sum_fastqc.pl
+## 
+## Errol Strain (estrain@gmail.com) 
+##
+## Description: Takes raw FASTQC output and produces
+## simple table summary
+##
+#################################################### 
+
+my($inname)=shift(@ARGV);
+my($qscore)=shift(@ARGV);
+$qscore=~s/\s+//g;
+my(@qlist)=split(/\,/,$qscore);
+
+print "Input\tFile\tFastQC\tPass-Fail\tReads\tPoor_Reads\tGC";
+foreach(@qlist) {
+  print "\tQ".$_;
+}
+print "\n";
+
+foreach (@ARGV) {
+  print_stats($_);
+}
+
+sub print_stats {
+  $infile = shift;
+  # First 10 lines of raw FASTQC contain basic overview
+  @sumlines=`head -n 10 $infile`;
+  chomp(@sumlines);
+
+  # Sequence level Q scores are buried in the middle of the file
+  @qlines=`awk '/#Quality\tCount/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`;
+  chomp(@qlines);
+
+  @fastqc = split(/[\n\t]/,shift(@sumlines));
+  @pass = split(/\t/,shift(@sumlines));
+  shift(@sumlines);
+  @fn = split(/\t/,shift(@sumlines));
+  shift(@sumlines);
+  shift(@sumlines);
+  @nreads = split(/\t/,shift(@sumlines));
+  @npoor = split(/\t/,shift(@sumlines));
+  shift(@sumlines);
+  @gc = split(/\t/,shift(@sumlines));
+
+  print $inname."\t";
+  print $fn[1]."\t";
+  print $fastqc[1]."\t";
+  print $pass[1]."\t";
+  print $nreads[1]."\t";
+  print $npoor[1]."\t";
+  print $gc[1];
+  foreach $qs (@qlist) {
+    print "\t";
+    print qcal($nreads[1],$qs,\@qlines);
+  }
+  print "\n";
+}
+
+# Sum reads w/ Q scores > cutoff and divide by number of reads
+sub qcal {
+   $nreads=shift(@_);
+   $cutoff=shift(@_);
+   @qarray=@{$_[0]};
+   $sum = 0;
+  
+   foreach $item (@qarray) {
+      my($qval,$q)=split(/\t/,$item);
+      if($qval>=$cutoff) {
+        $sum += $q;
+      }
+   }
+   $qmean = sprintf("%.2f", 100 * $sum / $nreads);
+   return $qmean;
+}