diff CoverageReport.pl @ 1:864d0ccfbe6f draft

Initial Uploaded
author geert-vandeweyer
date Thu, 05 Sep 2013 02:37:03 -0400
parents
children 39c374d4cba7
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CoverageReport.pl	Thu Sep 05 02:37:03 2013 -0400
@@ -0,0 +1,837 @@
+#!/usr/bin/perl
+
+# load modules
+use Getopt::Std;
+use File::Basename;
+use Number::Format;
+
+# number format
+my $de = new Number::Format(-thousands_sep =>',',-decimal_point => '.');
+
+##########
+## opts ##
+##########
+## input files
+# b : path to input (b)am file
+# t : path to input (t)arget regions in BED format
+## output files
+# o : report pdf (o)utput file
+# z : all plots and tables in tar.g(z) format
+## entries in the report
+# r : Coverage per (r)egion (boolean)
+# s : (s)ubregion coverage if average < specified (plots for positions along target region) (boolean)
+# S : (S)ubregion coverage for ALL failed exons  => use either s OR S or you will have double plots. 
+# A : (A)ll exons will be plotted. 
+# L : (L)ist failed exons instead of plotting
+# m : (m)inimal Coverage threshold
+# f : fraction of average as threshold
+# n : sample (n)ame.
+ 
+
+getopts('b:t:o:z:rsSALm:n:f:', \%opts) ;
+
+# make output directory in (tmp) working dir
+our $wd = "/tmp/Coverage.".int(rand(1000));
+while (-d $wd) {
+	$wd = "/tmp/Coverage.".int(rand(1000));
+}
+system("mkdir $wd");
+
+## variables
+our %commandsrun = ();
+
+if (!exists($opts{'b'}) || !-e $opts{'b'}) {
+	die('Bam File not found');
+}
+if (!exists($opts{'t'}) || !-e $opts{'t'}) {
+	die('Target File (BED) not found');
+}
+
+if (exists($opts{'m'})) {
+	$thresh = $opts{'m'};
+}
+else {
+	$thresh = 40;
+}
+
+if (exists($opts{'f'})) {
+	$frac = $opts{'f'};
+}
+else {
+	$frac = 0.2;
+}
+
+if (exists($opts{'o'})) {
+	$pdffile = $opts{'o'};
+}
+else {
+	$pdffile = "$wd/CoverageReport.pdf";
+}
+
+if (exists($opts{'z'})) {
+	$tarfile = $opts{'z'};
+}
+else {
+	$tarfile = "$wd/Results.tar.gz";
+}
+
+# 1. Global Summary => default
+&GlobalSummary($opts{'b'}, $opts{'t'});
+
+# 2. Coverage per position
+&SubRegionCoverage($opts{'b'}, $opts{'t'});
+our %filehash;
+if (exists($opts{'s'}) || exists($opts{'S'}) || exists($opts{'A'}) || exists($opts{'L'})) {
+	system("mkdir $wd/SplitFiles");
+	## get position coverages
+	## split input files
+	open IN, "$wd/Targets.Position.Coverage";
+	my $fileidx = 0;
+	my $currreg = '';
+	while (<IN>) {
+		my $line = $_;
+		chomp($line);
+		my @p = split(/\t/,$line);
+		my $reg = $p[0].'-'.$p[1].'-'.$p[2]; #.$p[3];
+		my $ex = $p[3];
+		if ($reg ne $currreg) {
+			## new exon open new outfile
+			if ($currreg ne '') {
+				## filehandle is open. close it
+				close OUT; 
+			}
+			if (!exists($filehash{$reg})) {
+				$fileidx++;
+				$filehash{$reg}{'idx'} = $fileidx;
+				$filehash{$reg}{'exon'} = $ex;
+				open OUT, ">> $wd/SplitFiles/File_$fileidx.txt";
+				$currreg = $reg;
+			}
+			else {
+				open OUT, ">> $wd/SplitFiles/File_".$filehash{$reg}{'idx'}.".txt";
+				$currreg = $reg;
+			}
+		}
+		## print the line to the open filehandle. 	
+		print OUT "$line\n";
+	}
+	close OUT;
+	close IN;
+
+}
+
+## sort output files according to targets file
+if (exists($opts{'r'}) ) {
+	my %hash = ();
+	open IN, "$wd/Targets.Global.Coverage";
+	while (<IN>) {
+		my @p = split(/\t/,$_) ;
+		$hash{$p[3]} = $_;
+	}
+	close IN;
+	open OUT, ">$wd/Targets.Global.Coverage";
+	open IN, $opts{'t'};
+	while (<IN>) {
+		my @p = split(/\t/,$_) ;
+		print OUT $hash{$p[3]};
+	}
+	close IN;
+	close OUT;
+}
+
+
+####################################
+## PROCESS RESULTS & CREATE PLOTS ##
+####################################
+system("mkdir $wd/Report");
+
+system("mkdir $wd/Rout");
+system("mkdir $wd/Plots");
+
+$samplename = $opts{'n'};
+$samplename =~ s/_/\\_/g;
+
+# 0. Preamble
+## compose preamble
+open OUT, ">$wd/Report/Report.tex";
+print OUT '\documentclass[a4paper,10pt]{article}'."\n";
+print OUT '\usepackage[left=2cm,top=1.5cm,right=1.5cm,bottom=2.5cm,nohead]{geometry}'."\n";
+print OUT '\usepackage{longtable}'."\n";
+print OUT '\usepackage[T1]{fontenc}'."\n";
+print OUT '\usepackage{fancyhdr}'."\n";
+print OUT '\usepackage[latin9]{inputenc}'."\n";
+print OUT '\usepackage{color}'."\n";
+print OUT '\usepackage[pdftex]{graphicx}'."\n";
+print OUT '\definecolor{grey}{RGB}{160,160,160}'."\n";
+print OUT '\definecolor{darkgrey}{RGB}{100,100,100}'."\n";
+print OUT '\definecolor{red}{RGB}{255,0,0}'."\n";
+print OUT '\definecolor{orange}{RGB}{238,118,0}'."\n";
+print OUT '\setlength\LTleft{0pt}'."\n";
+print OUT '\setlength\LTright{0pt}'."\n";
+print OUT '\begin{document}'."\n";
+print OUT '\pagestyle{fancy}'."\n";
+print OUT '\fancyhead{}'."\n";
+print OUT '\renewcommand{\footrulewidth}{0.4pt}'."\n";
+print OUT '\renewcommand{\headrulewidth}{0pt}'."\n";
+print OUT '\fancyfoot[R]{\today\hspace{2cm}\thepage\ of \pageref{endofdoc}}'."\n";
+print OUT '\fancyfoot[C]{}'."\n";
+print OUT '\fancyfoot[L]{Coverage Report for ``'.$samplename.'"}'."\n";
+print OUT '\let\oldsubsubsection=\subsubsection'."\n";
+print OUT '\renewcommand{\subsubsection}{%'."\n";
+print OUT '  \filbreak'."\n";
+print OUT '  \oldsubsubsection'."\n";
+print OUT '}'."\n";
+# main title
+print OUT '\section*{Coverage Report for ``'.$samplename.'"}'."\n";
+close OUT; 
+
+# 1. Summary Report
+# Get samtools flagstat summary of BAM file	
+my $flagstat = `samtools flagstat $opts{'b'}`;
+my @s = split(/\n/,$flagstat);
+# Get number of reads mapped in total 
+## updated on 2012-10-1 !!
+$totalmapped = $s[2];
+$totalmapped =~ s/^(\d+)(\s.+)/$1/;
+# count columns
+my $head = `head -n 1 $wd/Targets.Global.Coverage`;
+chomp($head);
+my @cols = split(/\t/,$head);
+my $nrcols = scalar(@cols);
+my $covcol = $nrcols - 3;
+# get min/max/median/average coverage => values
+my $covs = `cut -f $covcol $wd/Targets.Global.Coverage`;
+my @coverages = split(/\n/,$covs);
+my ($eavg,$med,$min,$max,$first,$third,$ontarget) = arraystats(@coverages);
+my $spec = sprintf("%.1f",($ontarget / $totalmapped)*100);
+# get min/max/median/average coverage => boxplot in R
+open OUT, ">$wd/Rout/boxplot.R";
+print OUT 'coverage <- read.table("../Targets.Global.Coverage",as.is=TRUE,sep="\t",header=FALSE)'."\n";
+print OUT 'coverage <- coverage[,'.$covcol.']'."\n";
+print OUT 'png(file="../Plots/CoverageBoxPlot.png", bg="white", width=240, height=480)'."\n";
+print OUT 'boxplot(coverage,range=1.5,main="Target Region Coverage")'."\n";
+print OUT 'graphics.off()'."\n";
+close OUT;
+system("cd $wd/Rout && Rscript boxplot.R");
+
+## global nt coverage plot
+## use perl to make histogram (lower memory)
+open IN, "$wd/Targets.Position.Coverage";
+my %dens;
+my $counter = 0;
+my $sum = 0;
+while (<IN>) {
+	chomp();
+	my @p = split(/\t/);
+	$sum += $p[-1];
+	$counter++;
+	if (defined($dens{$p[-1]})) {
+		$dens{$p[-1]}++;
+	}
+	else {
+		$dens{$p[-1]} = 1;
+	}
+}
+$avg = $sum/$counter;
+close IN;
+open OUT, ">$wd/Rout/hist.txt";
+foreach (keys(%dens)) {
+	print OUT "$_;$dens{$_}\n";
+}
+close OUT;
+if (!defined($dens{'0'})) {
+	$dens{'0'} = 0;
+}
+open OUT, ">$wd/Rout/ntplot.R";
+# read coverage hist in R to plot
+print OUT 'coverage <- read.table("hist.txt" , as.is = TRUE, header=FALSE,sep=";")'."\n";
+print OUT 'mincov <- '."$thresh \n";
+print OUT "avg <- round($avg)\n";
+print OUT "colnames(coverage) <- c('cov','count')\n";
+print OUT 'coverage$cov <- coverage$cov / avg'."\n";
+print OUT 'rep <- which(coverage$cov > 1)'."\n";
+print OUT 'coverage[coverage$cov > 1,1] <- 1'."\n";
+print OUT 'values <- coverage[coverage$cov < 1,]'."\n";
+print OUT 'values <- rbind(values,c(1,sum(coverage[coverage$cov == 1,"count"])))'."\n";
+print OUT 'values <- values[order(values$cov),]'."\n";
+print OUT 'prevcount <- 0'."\n";
+# make cumulative count data frame
+print OUT 'for (i in rev(values$cov)) {'."\n";
+print OUT '  values[values$cov == i,"count"] <- prevcount + values[values$cov == i,"count"]'."\n";
+print OUT '  prevcount <- values[values$cov == i,"count"]'."\n";
+print OUT '}'."\n";
+print OUT 'values$count <- values$count / (values[values$cov == 0,"count"] / 100)'."\n";
+# get some values to plot lines.
+print OUT 'mincov.x <- mincov/avg'."\n";
+print OUT 'if (mincov/avg <= 1) {'."\n";
+print OUT '  ii <- which(values$cov == mincov.x)'."\n";
+print OUT '  if (length(ii) == 1) {'."\n";
+print OUT '    mincov.y <- values[ii[1],"count"]'."\n";
+print OUT '  } else {'."\n";
+print OUT '    i1 <- max(which(values$cov < mincov.x))'."\n";
+print OUT '    i2 <- min(which(values$cov > mincov.x))'."\n";
+print OUT '    mincov.y <- ((values[i2,"count"] - values[i1,"count"])/(values[i2,"cov"] - values[i1,"cov"]))*(mincov.x - values[i1,"cov"]) + values[i1,"count"]'."\n";
+print OUT '  }'."\n";
+print OUT '}'."\n";
+# open output image and create plot
+print OUT 'png(file="../Plots/CoverageNtPlot.png", bg="white", width=540, height=480)'."\n";
+print OUT 'par(xaxs="i",yaxs="i")'."\n";
+print OUT 'plot(values$cov,values$count,ylim=c(0,100),pch=".",main="Cumulative Normalised Base-Coverage Plot",xlab="Normalizalised Coverage",ylab="Cumulative Nr. Of Bases")'."\n";
+print OUT 'lines(values$cov,values$count)'."\n";
+print OUT 'if (mincov.x <= 1) {'."\n";
+print OUT '  lines(c(mincov.x,mincov.x),c(0,mincov.y),lty=2,col="darkgreen")'."\n";
+print OUT '  lines(c(0,mincov.x),c(mincov.y,mincov.y),lty=2,col="darkgreen")'."\n";
+print OUT '  text(1,(95),pos=2,col="darkgreen",labels="Threshold: '.$thresh.'x")'."\n";
+print OUT '  text(1,(91),pos=2,col="darkgreen",labels=paste("%Bases: ",round(mincov.y,2),"%",sep=""))'."\n";
+print OUT '} else {'."\n";
+print OUT '  text(1,(95),pos=2,col="darkgreen",labels="Threshold ('.$thresh.'x) > Average")'."\n";
+print OUT '  text(1,(91),pos=2,col="darkgreen",labels="Plotting impossible")'."\n";
+print OUT '}'."\n";
+print OUT 'frac.x <- '."$frac\n";
+print OUT 'ii <- which(values$cov == frac.x)'."\n";
+print OUT 'if (length(ii) == 1) {'."\n";
+print OUT '  frac.y <- values[ii[1],"count"]'."\n";
+print OUT '} else {'."\n";
+print OUT '  i1 <- max(which(values$cov < frac.x))'."\n";
+print OUT '  i2 <- min(which(values$cov > frac.x))'."\n";
+print OUT '  frac.y <- ((values[i2,"count"] - values[i1,"count"])/(values[i2,"cov"] - values[i1,"cov"]))*(frac.x - values[i1,"cov"]) + values[i1,"count"]'."\n";
+print OUT '}'."\n";
+print OUT 'lines(c(frac.x,frac.x),c(0,frac.y),lty=2,col="red")'."\n";
+print OUT 'lines(c(0,frac.x),c(frac.y,frac.y),lty=2,col="red")'."\n";
+#iprint OUT 'text((frac.x+0.05),(frac.y - 2),pos=4,col="red",labels=paste(frac.x," x Avg.Cov : ",round(frac.x * avg,2),"x",sep="" ))'."\n";
+#print OUT 'text((frac.x+0.05),(frac.y-5),pos=4,col="red",labels=paste("%Bases: ",round(frac.y,2),"%",sep=""))'."\n";
+print OUT 'text(1,86,pos=2,col="red",labels=paste(frac.x," x Avg.Cov : ",round(frac.x * avg,2),"x",sep="" ))'."\n";
+print OUT 'text(1,82,pos=2,col="red",labels=paste("%Bases: ",round(frac.y,2),"%",sep=""))'."\n";
+
+print OUT 'graphics.off()'."\n";
+
+close OUT;
+system("cd $wd/Rout && Rscript ntplot.R");
+## PRINT TO .TEX FILE
+open OUT, ">>$wd/Report/Report.tex";
+# average coverage overviews
+print OUT '\subsection*{Overall Summary}'."\n";
+print OUT '{\small ';
+# left : boxplot
+print OUT '\begin{minipage}{0.3\linewidth}\centering'."\n";
+print OUT '\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/CoverageBoxPlot.png}'."\n";
+print OUT '\end{minipage}'."\n";
+# right : cum.cov.plot
+print OUT '\hspace{0.6cm}'."\n";
+print OUT '\begin{minipage}{0.65\linewidth}\centering'."\n";
+print OUT '\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/CoverageNtPlot.png}'."\n";
+print OUT '\end{minipage} \\\\'."\n";
+## next line
+print OUT '\begin{minipage}{0.48\linewidth}'."\n";
+print OUT '\vspace{-1.2em}'."\n";
+print OUT '\begin{tabular}{ll}'."\n";
+# bam statistics
+print OUT '\multicolumn{2}{l}{\textbf{\underline{Samtools Flagstat Summary}}} \\\\'."\n";
+foreach (@s) {
+	$_ =~ m/^(\d+)\s(.+)$/;
+	my $one = $1;
+	my $two = $2;
+	$two =~ s/\s\+\s0\s//;
+	$two = ucfirst($two);
+	$one =~ s/%/\\%/g;
+	# remove '+ 0 ' from front
+	$two =~ s/\+\s0\s//;
+	# remove trailing from end
+	$two =~ s/(\s\+.*)|(:.*)/\)/;
+	$two =~ s/%/\\%/g;
+	$two =~ s/>=/\$\\ge\$/g;
+	$two = ucfirst($two);
+	print OUT '\textbf{'.$two.'} & '.$one.' \\\\'."\n";
+}
+print OUT '\end{tabular}\end{minipage}'."\n";
+print OUT '\hspace{1.5cm}'."\n";
+# target coverage statistics
+print OUT '\begin{minipage}{0.4\linewidth}'."\n";
+#print OUT '\vspace{-4.8em}'."\n";
+print OUT '\begin{tabular}{ll}'."\n";
+print OUT '\multicolumn{2}{l}{\textbf{\underline{Target Region Coverage}}} \\\\'."\n";
+print OUT '\textbf{Number of Target Regions} & '.scalar(@coverages).' \\\\'."\n";
+print OUT '\textbf{Minimal Region Coverage} & '.$min.' \\\\'."\n";
+print OUT '\textbf{25\% Region Coverage} & '.$first.' \\\\'. "\n";
+print OUT '\textbf{50\% (Median) Region Coverage} & '.$med.' \\\\'. "\n";
+print OUT '\textbf{75\% Region Coverage} & '.$third.' \\\\'. "\n";
+print OUT '\textbf{Maximal Region Coverage} & '.$max.' \\\\'. "\n";
+print OUT '\textbf{Average Region Coverage} & '.int($eavg).' \\\\'. "\n";
+print OUT '\textbf{Mapped On Target} & '.$spec.' \\\\'."\n";
+print OUT '\multicolumn{2}{l}{\textbf{\underline{Target Base Coverage }}} \\\\'."\n";
+print OUT '\textbf{Number of Target Bases} & '.$counter.' \\\\'."\n";
+print OUT '\textbf{Average Base Coverage} & '.int($avg).' \\\\'. "\n";
+print OUT '\textbf{Non-Covered Bases} & '.$dens{'0'}.' \\\\'."\n";
+#print OUT '\textbf{Bases Covered $ge$ '.$frac.'xAvg.Cov} & '.
+print OUT '\end{tabular}\end{minipage}}'."\n";
+close OUT;
+
+# 2. GLOBAL COVERAGE OVERVIEW PER GENE
+@failedexons;
+@allexons;
+@allregions;
+@failedregions;
+if (exists($opts{'r'}) || exists($opts{'s'}) || exists($opts{'S'})) {
+	# count columns
+	my $head = `head -n 1 $wd/Targets.Global.Coverage`;
+	chomp($head);
+	my @cols = split(/\t/,$head);
+	my $nrcols = scalar(@cols);
+	my $covcol = $nrcols - 3;
+	# Coverage Plots for each gene => barplots in R, table here.
+	open IN, "$wd/Targets.Global.Coverage";
+	my $currgroup = '';
+	my $startline = 0;
+	my $stopline = 0;
+	$linecounter = 0;
+	while (<IN>) {
+		$linecounter++;
+		chomp($_);
+		my @c = split(/\t/,$_);
+		push(@allregions,$c[0].'-'.$c[1].'-'.$c[2]);
+		my $group = $c[3];
+		## coverage failure?
+		if ($c[$nrcol-1] < 1 || $c[$covcol-1] < $thresh) {
+			push(@failedexons,$group);
+			push(@failedregions,$c[0].'-'.$c[1].'-'.$c[2]);
+		}
+		## store exon
+		push(@allexons,$group);
+		## extract and check gene
+		$group =~ s/^(\S+)[\|\s](.+)/$1/;
+		if ($group ne $currgroup ) {
+		    if ($currgroup ne '') {
+			# new gene, make plot. 
+			open OUT, ">$wd/Rout/barplot.R";
+			print OUT 'coveragetable <- read.table("../Targets.Global.Coverage",as.is=TRUE,sep="\t",header=FALSE)'."\n";
+			print OUT 'coverage <- coveragetable[c('.$startline.':'.$stopline.'),'.$covcol.']'."\n";
+			print OUT 'entries <- coveragetable[c('.$startline.':'.$stopline.'),4]'."\n";
+			print OUT 'entries <- sub("\\\\S+\\\\|","",entries,perl=TRUE)'."\n";
+			print OUT 'coverage[coverage < 1] <- 1'."\n";
+			print OUT 'colors <- c(rep("grey",length(coverage)))'."\n";
+			# coverage not whole target region => orange
+			print OUT 'covperc <- coveragetable[c('.$startline.':'.$stopline.'),'.$nrcols.']'."\n";
+			print OUT 'colors[covperc<1] <- "orange"'."\n";
+			# coverage below threshold => red
+			print OUT 'colors[coverage<'.$thresh.'] <- "red"'."\n";
+
+			if ($stopline - $startline > 20) {
+				$scale = 2;
+			}
+			else {
+				$scale = 1;
+			}
+			my $width = 480 * $scale;
+			my $height = 240 * $scale;
+			print OUT 'png(file="../Plots/Coverage_'.$currgroup.'.png", bg="white", width='.$width.', height='.$height.')'."\n";
+			print OUT 'ylim = c(0,max(max(log10(coverage),log10('.($thresh+20).'))))'."\n";
+			print OUT 'mp <- barplot(log10(coverage),col=colors,main="Exon Coverage for '.$currgroup.'",ylab="Log10(Coverage)",ylim=ylim)'."\n";
+			print OUT 'text(mp, log10(coverage) + '.(0.4/$scale).',format(coverage),xpd = TRUE,srt=90)'."\n";
+			print OUT 'text(mp,par("usr")[3]-0.05,labels=entries,srt=45,adj=1,xpd=TRUE)'."\n";
+			print OUT 'abline(h=log10('.$thresh.'),lwd=4,col=rgb(255,0,0,100,maxColorValue=255))'."\n";
+			print OUT 'graphics.off()'."\n";
+			close OUT;
+			system("cd $wd/Rout && Rscript barplot.R");
+			if ($scale == 1) {
+				push(@small,'\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$currgroup.'.png}');
+			}
+			else {
+				push(@large,'\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$currgroup.'.png}');
+			}
+
+		    }
+		    $currgroup = $group;
+		    $startline = $linecounter;
+		}
+		$stopline = $linecounter;
+	}
+	close IN;
+	if ($currgroup ne '') {
+		# last gene, make plot. 
+		open OUT, ">$wd/Rout/barplot.R";
+		print OUT 'coveragetable <- read.table("../Targets.Global.Coverage",as.is=TRUE,sep="\t",header=FALSE)'."\n";
+		print OUT 'coverage <- coveragetable[c('.$startline.':'.$stopline.'),'.$covcol.']'."\n";
+		print OUT 'entries <- coveragetable[c('.$startline.':'.$stopline.'),4]'."\n";
+		print OUT 'entries <- sub("\\\\S+\\\\|","",entries,perl=TRUE)'."\n";
+		print OUT 'coverage[coverage < 1] <- 1'."\n";
+		print OUT 'colors <- c(rep("grey",length(coverage)))'."\n";
+		print OUT 'colors[coverage<'.$thresh.'] <- "red"'."\n";
+
+		if ($stopline - $startline > 20) {
+			$scale = 2;
+		}
+		else {
+			$scale = 1;
+		}
+		my $width = 480 * $scale;
+		my $height = 240 * $scale;
+		print OUT 'png(file="../Plots/Coverage_'.$currgroup.'.png", bg="white", width='.$width.', height='.$height.')'."\n";
+		print OUT 'ylim = c(0,max(max(log10(coverage),log10('.($thresh+20).'))))'."\n";
+		print OUT 'mp <- barplot(log10(coverage),col=colors,main="Exon Coverage for '.$currgroup.'",ylab="Log10(Coverage)", ylim=ylim)'."\n";
+		print OUT 'text(mp, log10(coverage) + log10(2),format(coverage),xpd = TRUE,srt=90)'."\n";
+		print OUT 'text(mp,par("usr")[3]-0.1,labels=entries,srt=45,adj=1,xpd=TRUE)'."\n";
+		print OUT 'abline(h=log10('.$thresh.'),lwd=4,col=rgb(255,0,0,100,maxColorValue=255))'."\n";
+		print OUT 'graphics.off()'."\n";
+		close OUT;
+		system("cd $wd/Rout && Rscript barplot.R");
+		if ($scale == 1) {
+			push(@small,'\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$currgroup.'.png}');
+		}
+		else {
+			push(@large,'\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$currgroup.'.png}');
+		}
+	}
+	## print to TEX
+	open OUT, ">>$wd/Report/Report.tex";
+	print OUT '\subsection*{Gene Summaries}'."\n";
+	print OUT '\underline{Legend:} \\\\'."\n";
+	print OUT '{\color{red}\textbf{RED:} Coverage did not reach set threshold of '.$thresh.'} \\\\'."\n";
+	print OUT '{\color{orange}\textbf{ORANGE:} Coverage was incomplete for the exon. Overruled by red.} \\\\' ."\n";
+	$col = 1;
+	foreach (@small) {
+		if ($col > 2) {
+			$col = 1;
+			print OUT "\n";
+		}
+		print OUT '\begin{minipage}{0.5\linewidth}\centering'."\n";
+		print OUT $_."\n";
+		print OUT '\end{minipage}'."\n";
+		$col++;
+	}
+	## new line 
+	if ($col == 2) {
+		print OUT '\\\\'." \n";
+	}
+	foreach(@large) {
+		print OUT $_."\n";
+	}
+	close OUT;
+	
+}
+
+# 3. Detailed overview of failed exons (globally failed)
+if (exists($opts{'s'})) {
+	# count columns
+	my $head = `head -n 1 $wd/Targets.Position.Coverage`;
+	chomp($head);
+	my @cols = split(/\t/,$head);
+	my $nrcols = scalar(@cols);
+	my $covcol = $nrcols;
+	my $poscol = $nrcols -1;
+	# tex section header
+	open TEX, ">>$wd/Report/Report.tex";
+	print TEX '\subsection*{Failed Exon Plots}'."\n";
+	$col = 1;
+	print TEX '\underline{NOTE:} Only exons with global coverage $<$'.$thresh.' or incomplete coverage were plotted \\\\'."\n";
+	foreach(@failedregions) {
+		if ($col > 2) {
+			$col = 1;
+			print TEX "\n";
+		}
+		# which exon
+		my $region = $_;
+		my $exon = $filehash{$region}{'exon'};
+		# link exon to tmp file
+		my $exonfile = "$wd/SplitFiles/File_".$filehash{$region}{'idx'}.".txt";
+		## determine transcript orientation and location
+		my $firstline = `head -n 1 $exonfile`;
+		my @firstcols = split(/\t/,$firstline);
+		my $orient = $firstcols[5];
+		my $genomicchr = $firstcols[0];
+		my $genomicstart = $firstcols[1];
+		my $genomicstop = $firstcols[2];
+		if ($orient eq '+') {
+			$bps = $genomicstop - $genomicstart + 1;
+			$subtitle = "Region 0-$bps: $genomicchr:".$de->format_number($genomicstart)."+".$de->format_number($genomicstop);
+		}
+		else {
+			$bps = $genomicstop - $genomicstart + 1;
+			$subtitle = "Region 0-$bps: $genomicchr:".$de->format_number($genomicstart)."-".$de->format_number($genomicstop);
+		}
+		# print Rscript
+		open OUT, ">$wd/Rout/exonplot.R";
+		print OUT 'coveragetable <- read.table("'.$exonfile.'",as.is=TRUE,sep="\t",header=FALSE)'."\n";
+		print OUT 'coverage <- coveragetable[,'.$covcol.']'."\n";
+		print OUT 'coverage[coverage < 1] <- 1'."\n";
+		print OUT 'positions <- coveragetable[,'.$poscol.']'."\n";
+		
+		my $width = 480 ;
+		my $height = 240 ;
+		my $exonstr = $exon;
+		$exonstr =~ s/\s/_/g;
+		$exon =~ s/_/ /g;
+		$exon =~ s/\|/ /g;
+		print OUT 'png(file="../Plots/Coverage_'.$exonstr.'.png", bg="white", width='.$width.', height='.$height.')'."\n";
+		print OUT 'ylim = c(0,log10(max(max(coverage),'.($thresh+10).')))'."\n";
+		if ($orient eq '-') {
+			print OUT 'plot(positions,log10(coverage),type="n",main="Coverage for '.$exon.'",ylab="log10(Coverage)",ylim=ylim,xlab="Position",xlim=rev(range(positions)),sub="(Transcribed from minus strand)")'."\n";
+			print OUT 'mtext("'.$subtitle.'")'."\n";
+		}
+		else {
+			print OUT 'plot(positions,log10(coverage),type="n",main="Coverage for '.$exon.'",ylab="log10(Coverage)",ylim=ylim,xlab="Position",sub="(Transcribed from plus strand)")'."\n";
+			print OUT 'mtext("'.$subtitle.'")'."\n";
+		}
+		print OUT 'lines(positions,log10(coverage))'."\n";
+		print OUT 'abline(h=log10('.$thresh.'),lwd=4,col=rgb(255,0,0,100,maxColorValue=255))'."\n";
+		print OUT 'failedpos <- positions[coverage<'.$thresh.']'."\n";
+		print OUT 'failedcov <- coverage[coverage<'.$thresh.']'."\n";
+		print OUT 'points(failedpos,log10(failedcov),col="red",pch=19)'."\n";
+		print OUT 'graphics.off()'."\n";
+		close OUT;
+		# run R script
+		system("cd $wd/Rout && Rscript exonplot.R");
+		# Add to .TEX
+		print TEX '\begin{minipage}{0.5\linewidth}\centering'."\n";
+		print TEX '\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$exonstr.'.png}'."\n";
+		print TEX '\end{minipage}'."\n";
+		$col++;
+	}
+}
+
+## plot failed (subregion) or all exons
+if (exists($opts{'S'}) || exists($opts{'A'})) {
+	# count columns
+	my $head = `head -n 1 $wd/Targets.Position.Coverage`;
+	chomp($head);
+	my @cols = split(/\t/,$head);
+	my $nrcols = scalar(@cols);
+	my $covcol = $nrcols;
+	my $poscol = $nrcols -1;
+	# tex section header
+	open TEX, ">>$wd/Report/Report.tex";
+	print TEX '\subsection*{Failed Exon Plots}'."\n";
+	if (exists($opts{'S'})) {
+		print TEX '\underline{NOTE:} ALL exons were tested for local coverage $<$'.$thresh.' \\\\'."\n";
+	}
+	elsif (exists($opts{'A'})) {
+		print TEX '\underline{NOTE:} ALL exons are plotted, regardless of coverage \\\\'."\n";
+	}
+	$col = 1;
+	foreach(@allregions) {
+		if ($col > 2) {
+			$col = 1;
+			print TEX "\n";
+		}
+		# which exon
+		my $region = $_;
+		my $exon = $filehash{$region}{'exon'};
+		# grep exon to tmp file
+		my $exonfile = "$wd/SplitFiles/File_".$filehash{$region}{'idx'}.".txt";
+		## determine transcript orientation.
+                my $firstline = `head -n 1 $exonfile`;
+                my @firstcols = split(/\t/,$firstline);
+                my $orient = $firstcols[5];
+		my $genomicchr = $firstcols[0];
+		my $genomicstart = $firstcols[1];
+		my $genomicstop = $firstcols[2];
+
+		if ($orient eq '+') {
+			$bps = $genomicstop - $genomicstart + 1;
+			$subtitle = "Region 0-$bps: $genomicchr:".$de->format_number($genomicstart)."+".$de->format_number($genomicstop);
+
+		}
+		else {
+			$bps = $genomicstop - $genomicstart + 1;
+			$subtitle = "Region 0-$bps: $genomicchr:".$de->format_number($genomicstart)."-".$de->format_number($genomicstop);
+
+		}
+
+		# check if failed
+		if (exists($opts{'S'})) {
+			my $cs = `cut -f $covcol '$exonfile' `;
+			my @c = split(/\n/,$cs);
+			@c = sort { $a <=> $b } @c;
+			if ($c[0] >= $thresh) {
+				# lowest coverage > threshold => skip
+				next;
+			}
+		}
+		# print Rscript
+		open OUT, ">$wd/Rout/exonplot.R";
+		print OUT 'coveragetable <- read.table("'.$exonfile.'",as.is=TRUE,sep="\t",header=FALSE)'."\n";
+		print OUT 'coverage <- coveragetable[,'.$covcol.']'."\n";
+		print OUT 'coverage[coverage < 1] <- 1'."\n";
+		print OUT 'positions <- coveragetable[,'.$poscol.']'."\n";
+		my $width = 480 ;
+		my $height = 240 ;
+		my $exonstr = $exon;
+		$exonstr =~ s/\s/_/g;
+		$exon =~ s/_/ /g;
+		$exon =~ s/\|/ /g;
+		print OUT 'png(file="../Plots/Coverage_'.$exonstr.'.png", bg="white", width='.$width.', height='.$height.')'."\n";
+		print OUT 'ylim = c(0,log10(max(max(coverage),'.($thresh+10).')))'."\n";
+		if ($orient eq '-') {
+			print OUT 'plot(positions,log10(coverage),type="n",main="Coverage for '.$exon.'",ylab="log10(Coverage)",ylim=ylim,xlab="Position",xlim=rev(range(positions)),sub="(Transcribed from minus strand)")'."\n";
+			print OUT 'mtext("'.$subtitle.'")'."\n";
+                }
+                else {
+			print OUT 'plot(positions,log10(coverage),type="n",main="Coverage for '.$exon.'",ylab="log10(Coverage)",ylim=ylim,xlab="Position",sub="(Transcribed from plus strand)")'."\n";
+			print OUT 'mtext("'.$subtitle.'")'."\n";
+                }
+		
+		print OUT 'lines(positions,log10(coverage))'."\n";
+		print OUT 'abline(h=log10('.$thresh.'),lwd=4,col=rgb(255,0,0,100,maxColorValue=255))'."\n";
+		print OUT 'failedpos <- positions[coverage<'.$thresh.']'."\n";
+		print OUT 'failedcov <- coverage[coverage<'.$thresh.']'."\n";
+		print OUT 'points(failedpos,log10(failedcov),col="red",pch=19)'."\n";
+		print OUT 'graphics.off()'."\n";
+		close OUT;
+		# run R script
+		system("cd $wd/Rout && Rscript exonplot.R");
+		# Add to .TEX
+		print TEX '\begin{minipage}{0.5\linewidth}\centering'."\n";
+		print TEX '\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$exonstr.'.png}'."\n";
+		print TEX '\end{minipage}'."\n";
+		$col++;
+	}
+}
+## list failed exons
+if (exists($opts{'L'})) {
+	# count columns
+	my $head = `head -n 1 $wd/Targets.Position.Coverage`;
+	chomp($head);
+	my @cols = split(/\t/,$head);
+	my $nrcols = scalar(@cols);
+	my $covcol = $nrcols;
+	my $poscol = $nrcols -1;
+	## hash to print
+	# tex section header
+	open TEX, ">>$wd/Report/Report.tex";
+	print TEX '\subsection*{List of Failed Exons}'."\n";
+	print TEX '\underline{NOTE:} ALL exons were tested for local coverage $<$'.$thresh.' \\\\'."\n";
+	print TEX '{\footnotesize\begin{longtable}[l]{@{\extracolsep{\fill}}llll}'."\n".'\hline'."\n";
+	print TEX '\textbf{Target Name} & \textbf{Genomic Position} & \textbf{Avg.Coverage} & \textbf{Min.Coverage} \\\\'."\n".'\hline'."\n";
+	print TEX '\endhead'."\n";
+	print TEX '\hline '."\n".'\multicolumn{4}{r}{{\textsl{\footnotesize Continued on next page}}} \\\\ '."\n".'\hline' ."\n". '\endfoot' . "\n". '\endlastfoot' . "\n";
+ 
+	$col = 1;
+	open IN, "$wd/Targets.Global.Coverage";
+	while (<IN>) {
+		chomp($_);
+		my @p = split(/\t/,$_);
+		my $region = $p[0].'-'.$p[1].'-'.$p[2];
+		my $exon = $filehash{$region}{'exon'};
+		# grep exon to tmp file
+		my $exonfile = "$wd/SplitFiles/File_".$filehash{$region}{'idx'}.".txt";
+		## determine transcript orientation.
+                my $firstline = `head -n 1 $exonfile`;
+                my @firstcols = split(/\t/,$firstline);
+                my $orient = $firstcols[5];
+		my $genomicchr = $firstcols[0];
+		my $genomicstart = $firstcols[1];
+		my $genomicstop = $firstcols[2];
+
+		if ($orient eq '+') {
+			$bps = $genomicstop - $genomicstart + 1;
+			$subtitle = "$genomicchr:".$de->format_number($genomicstart)."+".$de->format_number($genomicstop);
+
+		}
+		else {
+			$bps = $genomicstop - $genomicstart + 1;
+			$subtitle = "$genomicchr:".$de->format_number($genomicstart)."-".$de->format_number($genomicstop);
+		}
+
+		# check if failed
+		my $cs = `cut -f $covcol '$exonfile' `;
+		my @c = split(/\n/,$cs);
+		my ($avg,$med,$min,$max,$first,$third,$ontarget) = arraystats(@c);
+		
+		if ($min >= $thresh) {
+			# lowest coverage > threshold => skip
+			next;
+		}
+		
+		# print to .tex table 
+		if (length($exon) > 30) {
+			$exon = substr($exon,0,27) . '...';
+		}
+		$exon =~ s/_/ /g;
+		$exon =~ s/\|/ /g;
+
+		print TEX "$exon & $subtitle & ".int($avg)." & $min ".'\\\\'."\n"; 
+	}
+	close IN;
+	print TEX '\hline'."\n";
+	print TEX '\end{longtable}}'."\n";
+	close TEX;
+}
+
+
+## Close document
+open OUT, ">>$wd/Report/Report.tex";
+print OUT '\label{endofdoc}'."\n";
+print OUT '\end{document}'."\n";
+close OUT;
+system("cd $wd/Report && pdflatex Report.tex > /dev/null 2>&1 && pdflatex Report.tex > /dev/null 2>&1 ");
+
+## mv report to output file
+system("cp -f $wd/Report/Report.pdf '$pdffile'");
+##create tar.gz file
+system("mkdir $wd/Results");
+system("cp -Rf $wd/Plots $wd/Results/");
+system("cp -Rf $wd/Report/ $wd/Results/");
+if (-e "$wd/Targets.Global.Coverage") {
+	system("cp -Rf $wd/Targets.Global.Coverage $wd/Results/");
+}
+if (-e "$wd/Targets.Position.Coverage") {
+	system("cp -Rf $wd/Targets.Position.Coverage $wd/Results/");
+}
+
+system("cd $wd  && tar czf '$tarfile' Results/");
+## clean up (galaxy stores outside wd)
+system("rm -Rf $wd");
+###############
+## FUNCTIONS ##
+###############
+sub arraystats{
+	my @array = @_;
+	my $count = scalar(@array);
+	@array = sort { $a <=> $b } @array;
+	# median
+	my $median = 0;
+	if ($count % 2) { 
+		$median = $array[int($count/2)]; 
+	} else { 
+		$median = ($array[$count/2] + $array[$count/2 - 1]) / 2; 
+	} 
+	# average 
+	my $sum = 0;
+	foreach (@array) { $sum += $_; } 
+	my $average = $sum / $count;
+	# quantiles (rounded)
+	my $quart = int($count/4) ;	
+	my $first = $array[$quart];
+	my $third = $array[($quart*3)];
+	my $min = $array[0];
+	my $max = $array[($count-1)];
+	return ($average,$median,$min,$max,$first,$third,$sum);
+}
+
+sub GlobalSummary {
+	my ($bam,$targets) = @_;
+
+	my $command = "cd $wd && coverageBed -abam $bam -b $targets > $wd/Targets.Global.Coverage";
+	if (exists($commandsrun{$command})) {
+		return;
+	}
+	system($command);
+	$commandsrun{$command} = 1;
+}
+
+sub CoveragePerRegion {
+	my ($bam,$targets) = @_;
+	my $command = "cd $wd && coverageBed -abam $bam -b $targets > $wd/Targets.Global.Coverage";
+	if (exists($commandsrun{$command})) {
+		return;
+	}
+	system($command);
+	$commandsrun{$command} = 1;
+}
+
+sub SubRegionCoverage {
+	my ($bam,$targets) = @_;
+	my $command = "cd $wd && coverageBed -abam $bam -b $targets -d > $wd/Targets.Position.Coverage";
+	system($command);
+	$commandsrun{$command} = 1;
+}
+