# HG changeset patch # User estrain # Date 1539788946 14400 # Node ID 17652c6a7517255608d37b559799f4fff0608087 Uploaded diff -r 000000000000 -r 17652c6a7517 sum_fastqc-b769c810924e/sum_fastqc.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sum_fastqc-b769c810924e/sum_fastqc.pl Wed Oct 17 11:09:06 2018 -0400 @@ -0,0 +1,79 @@ +#!/usr/bin/perl + +#################################################### +## +## sum_fastqc.pl +## +## Errol Strain (estrain@gmail.com) +## +## Description: Takes raw FASTQC output and produces +## simple table summary +## +#################################################### + +my($inname)=shift(@ARGV); +my($qscore)=shift(@ARGV); +$qscore=~s/\s+//g; +my(@qlist)=split(/\,/,$qscore); + +print "Input\tFile\tFastQC\tPass-Fail\tReads\tPoor_Reads\tGC"; +foreach(@qlist) { + print "\tQ".$_; +} +print "\n"; + +foreach (@ARGV) { + print_stats($_); +} + +sub print_stats { + $infile = shift; + # First 10 lines of raw FASTQC contain basic overview + @sumlines=`head -n 10 $infile`; + chomp(@sumlines); + + # Sequence level Q scores are buried in the middle of the file + @qlines=`awk '/#Quality\tCount/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`; + chomp(@qlines); + + @fastqc = split(/[\n\t]/,shift(@sumlines)); + @pass = split(/\t/,shift(@sumlines)); + shift(@sumlines); + @fn = split(/\t/,shift(@sumlines)); + shift(@sumlines); + shift(@sumlines); + @nreads = split(/\t/,shift(@sumlines)); + @npoor = split(/\t/,shift(@sumlines)); + shift(@sumlines); + @gc = split(/\t/,shift(@sumlines)); + + print $inname."\t"; + print $fn[1]."\t"; + print $fastqc[1]."\t"; + print $pass[1]."\t"; + print $nreads[1]."\t"; + print $npoor[1]."\t"; + print $gc[1]; + foreach $qs (@qlist) { + print "\t"; + print qcal($nreads[1],$qs,\@qlines); + } + print "\n"; +} + +# Sum reads w/ Q scores > cutoff and divide by number of reads +sub qcal { + $nreads=shift(@_); + $cutoff=shift(@_); + @qarray=@{$_[0]}; + $sum = 0; + + foreach $item (@qarray) { + my($qval,$q)=split(/\t/,$item); + if($qval>=$cutoff) { + $sum += $q; + } + } + $qmean = sprintf("%.2f", 100 * $sum / $nreads); + return $qmean; +} diff -r 000000000000 -r 17652c6a7517 sum_fastqc-b769c810924e/sum_fastqc.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sum_fastqc-b769c810924e/sum_fastqc.xml Wed Oct 17 11:09:06 2018 -0400 @@ -0,0 +1,55 @@ + + + + sum_fastqc.tab + #else if $jobtype.select == "col" + #set inname = $jobtype.coll.name + #set infile = $jobtype.coll.forward + #set infile2 = $jobtype.coll.reverse + perl $__tool_directory__/sum_fastqc.pl "$inname" $qset.qscore $infile $infile2 > sum_fastqc.tab; + #end if + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +