Mercurial > repos > bgruening > bismark

diff bismark_pretty_report/bismark2report @ 7:fcadce4d9a06 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit b'e6ee273f75fff61d1e419283fa8088528cf59470\n'
author: bgruening
date: Sat, 06 May 2017 13:18:09 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bismark_pretty_report/bismark2report	Sat May 06 13:18:09 2017 -0400
@@ -0,0 +1,1237 @@
+#!/usr/bin/env perl
+use warnings;
+use strict;
+use Getopt::Long;
+use FindBin qw($Bin);
+use lib "$Bin/../lib";
+
+## This program is Copyright (C) 2010-16, Felix Krueger (felix.krueger@babraham.ac.uk)
+
+## This program is free software: you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation, either version 3 of the License, or
+## (at your option) any later version.
+
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+
+## You should have received a copy of the GNU General Public License
+## along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+my $bismark2report_version = 'v0.16.3';
+my (@alignment_reports,@dedup_reports,@splitting_reports,@mbias_reports,@nuc_reports);
+
+my ($output_dir,$verbose,$manual_output_file) = process_commandline();
+
+# print join (",",@alignment_reports)."\n";
+# print join (",",@dedup_reports)."\n";
+# print join (",",@splitting_reports)."\n";
+# print join (",",@mbias_reports)."\n";
+# print join (",",@nuc_reports)."\n";
+
+while (@alignment_reports){
+
+  my $alignment_report = shift @alignment_reports;
+  my $dedup_report     = shift @dedup_reports;
+  my $splitting_report = shift @splitting_reports;
+  my $mbias_report     = shift @mbias_reports;
+  my $nuc_report       = shift @nuc_reports;
+
+  ### HTML OUTPUT FILE
+  my $report_output = $alignment_report;
+  $report_output =~ s/^.*\///; # deleting optional path information
+  $report_output =~ s/\.txt$//;
+  $report_output =~ s/$/.html/;
+
+  # if -o output_file was specified we are going to use that name preferentially. This may only happen if there is a single report in the folder, or if a single report has been specified manually
+  if ($manual_output_file){
+      warn "A specific output filename was specified: $manual_output_file. Using that one instead of deriving the filename\n"; sleep(1);
+      $report_output = $manual_output_file;
+  }
+
+  $report_output = $output_dir.$report_output;
+  warn "\nWriting Bismark HTML report to >> $report_output <<\n\n";
+
+  my $doc = read_report_template(); #reading and storing the entire report template
+
+  # BISMARK ALIGNMENT REPORT (mandatory)
+  warn "="x110,"\n";
+  warn "Using the following alignment report:\t\t> $alignment_report <\n";
+  # DEDUPLICATION REPORT (optional)
+  if ($dedup_report){
+      warn "Using the following deduplication report:\t> $dedup_report <\n";
+  }
+  else{
+      warn "No deduplication report file specified, skipping this step\n";
+  }
+
+  # SPLITTING REPORT (optional)
+  if ($splitting_report){
+    warn "Using the following splitting report:\t\t> $splitting_report <\n";
+  }
+  else{
+    warn "No splitting report file specified, skipping this step\n";
+  }
+
+  # M-BIAS REPORT (optional)
+  if ($mbias_report){
+      warn "Using the following M-bias report:\t\t> $mbias_report <\n";
+  }
+  else{
+      warn "No M-bias report file specified, skipping this step\n";
+  }
+  
+  # NUCLEOTIDE COVERAGE REPORT (optional)
+  if ($nuc_report){
+      warn "Using the following nucleotide coverage report:\t> $nuc_report <\n";
+  }
+  else{
+      warn "No nucleotide coverage report file specified, skipping this step\n";
+  } 
+  warn "="x110,"\n\n\n";
+  $verbose and sleep(3);
+
+  # creating timestamp
+  $doc = getLoggingTime($doc);
+
+  $doc = read_alignment_report($alignment_report,$doc); # mandatory
+
+  if ($dedup_report){ # optional
+    $doc = read_deduplication_report($dedup_report,$doc);
+
+    # removing the delete tags in the html template
+    $doc =~ s/\{\{start_deletion_duplication\}\}//g;
+    $doc =~ s/\{\{end_deletion_duplication\}\}//g;
+  }
+  else{
+    # removing the entire graph and table section for the deduplication part
+    warn "Removing the duplication section from the HTML report\n" if ($verbose);
+    $doc =~ s/(\{\{start_deletion_duplication.*?end_deletion_duplication\}\})//gs; # s includes newline chars; non-greedy
+    warn "Deleting the following content:\n$1\n\n" if ($verbose);
+    sleep(3) if ($verbose);
+  }
+
+  if ($nuc_report){ # optional
+      $doc = read_nucleotide_coverage_report($nuc_report,$doc);
+      
+      # removing the delete tags in the html template
+      $doc =~ s/\{\{start_deletion_nucleotide_coverage\}\}//g;
+      $doc =~ s/\{\{end_deletion_nucleotide_coverage\}\}//g;
+  }
+  else{
+      # removing the entire graph and table section for the nucleotide coverage part
+      warn "Removing the nucleotide coverage section from the HTML report\n" if ($verbose);
+      $doc =~ s/(\{\{start_deletion_nucleotide_coverage.*?end_deletion_nucleotide_coverage\}\})//gs; # s includes newline chars; non-greedy
+      warn "Deleting the following content:\n$1\n\n" if ($verbose);
+      sleep(3) if ($verbose);
+  }
+
+  if ($splitting_report){ # optional
+    $doc = read_splitting_report($splitting_report,$doc);
+
+    # removing the delete tags in the html template
+    $doc =~ s/\{\{start_deletion_splitting\}\}//g;
+    $doc =~ s/\{\{end_deletion_splitting\}\}//g;
+  }
+  else{
+    # removing the entire graph and table section for the deduplication part
+    warn "Removing the splitting report section from the HTML report\n" if ($verbose);
+    $doc =~ s/(\{\{start_deletion_splitting.*?end_deletion_splitting\}\})//gs; # s includes newline chars; non-greedy
+    warn "Deleting the following content:\n$1\n\n" if ($verbose);
+    sleep(3) if ($verbose);
+  }
+
+  if ($mbias_report){ # optional
+    (my $state, $doc) = read_mbias_report($mbias_report,$doc);
+
+    # removing the delete tags in the html template
+    $doc =~ s/\{\{start_deletion_mbias\}\}//g;
+    $doc =~ s/\{\{end_deletion_mbias\}\}//g;
+
+    warn "Reported read state: $state\n\n" if ($verbose);
+    # if the report was single-end we need to remove the second plot only
+    if ($state eq 'single'){
+      $doc =~ s/(\{\{start_deletion_mbias_2.*?end_deletion_mbias_2\}\})//gs; # s includes newline chars; non-greedy
+      warn "Deleting the following content:\n$1\n\n" if ($verbose);
+    }
+    else{
+      $doc =~ s/\{\{start_deletion_mbias_2\}\}//g;
+      $doc =~ s/\{\{end_deletion_mbias_2\}\}//g;
+    }
+  }
+  else{
+    # removing the entire graph and table section for the deduplication part
+    $doc =~ s/(\{\{start_deletion_mbias.*?end_deletion_mbias\}\})//gs; # s includes newline chars; non-greedy
+    warn "Deleting the following content:\n$1\n\n" if ($verbose);
+    sleep(3) if ($verbose);
+  }
+
+  write_out_report($report_output,$doc);
+
+}
+
+sub write_out_report{
+  my ($report_output,$doc) = @_;
+  open (OUT,'>',$report_output) or die "Failed to write to output file $report_output: $!\n\n";
+  print OUT $doc;
+}
+
+sub getLoggingTime {
+  my $doc = shift;
+  my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time);
+
+  my $time = sprintf ("%02d:%02d", $hour,$min,$sec);
+  my $date = sprintf ("%04d-%02d-%02d", $year+1900,$mon+1,$mday);
+  warn "Using Time: $time, and date: $date\n\n" if ($verbose);
+
+  $doc =~ s/\{\{date\}\}/$date/g;
+  $doc =~ s/\{\{time\}\}/$time/g;
+
+  return $doc;
+}
+
+
+sub read_alignment_report{
+
+  my ($alignment_report,$doc) = @_;
+
+  warn "Processing alignment report $alignment_report ...\n";
+  open (ALN,$alignment_report) or die "Couldn't read from file $alignment_report: $!\n\n";
+
+  my $unique;
+  my $no_aln;
+  my $multiple;
+  my $no_genomic;
+  my $total_seqs;
+  my $bismark_version;
+  my $input_filename;
+
+  my $unique_text;
+  my $no_aln_text;
+  my $multiple_text;
+  my $total_seq_text;
+
+  my $total_C_count;
+  my ($meth_CpG,$meth_CHG,$meth_CHH,$meth_unknown);
+  my ($unmeth_CpG,$unmeth_CHG,$unmeth_CHH,$unmeth_unknown);
+  my ($perc_CpG,$perc_CHG,$perc_CHH,$perc_unknown);
+
+  my $number_OT;
+  my $number_CTOT;
+  my $number_CTOB;
+  my $number_OB;
+
+  while (<ALN>){
+    chomp;
+
+    ### General Alignment stats
+    if ($_ =~ /^Sequence pairs analysed in total:/ ){ ## Paired-end
+      (undef,$total_seqs) = split /\t/;
+       print "Total paired seqs: >> $total_seqs <<\n" if ($verbose);
+      $total_seq_text = 'Sequence pairs analysed in total';
+    }
+    elsif ($_ =~ /^Sequences analysed in total:/ ){   ## Single-end
+      (undef,$total_seqs) = split /\t/;
+      $total_seq_text = 'Sequences analysed in total';
+      print "total single-end seqs >> $total_seqs <<\n" if ($verbose);
+    }
+
+    elsif($_ =~ /^Bismark report for: (.*) \(version: (.*)\)/){
+      $input_filename = $1;
+      $bismark_version = $2;
+      print "Input filename(s) >> $input_filename <<\n" if ($verbose);
+      print "Bismark version >> $bismark_version <<\n" if ($verbose);
+    }
+
+    elsif($_ =~ /^Number of paired-end alignments with a unique best hit:/){ ## Paired-end
+      (undef,$unique) = split /\t/;
+      print "Unique PE>> $unique <<\n" if ($verbose);;
+      $unique_text = 'Paired-end alignments with a unique best hit';
+    }
+    elsif($_ =~ /^Number of alignments with a unique best hit from/){            ## Single-end
+      (undef,$unique) = split /\t/;
+      print "Unique SE>> $unique <<\n" if ($verbose);
+      $unique_text = 'Single-end alignments with a unique best hit';
+    }
+
+    elsif($_ =~ /^Sequence pairs with no alignments under any condition:/){  ## Paired-end
+      (undef,$no_aln) = split /\t/;
+      print "No alignment PE >> $no_aln <<\n" if ($verbose);
+      $no_aln_text = 'Pairs without alignments under any condition';
+    }
+    elsif($_ =~ /^Sequences with no alignments under any condition:/){  ## Single-end
+      (undef,$no_aln) = split /\t/;
+      print "No alignments SE>> $no_aln <<\n" if ($verbose);
+      $no_aln_text = 'Sequences without alignments under any condition';
+    }
+
+    elsif($_ =~ /^Sequence pairs did not map uniquely:/){ ## Paired-end
+      (undef,$multiple) = split /\t/;
+      print "Multiple alignments PE >> $multiple <<\n" if ($verbose);
+      $multiple_text = 'Pairs that did not map uniquely';
+    }
+    elsif($_ =~ /^Sequences did not map uniquely:/){ ## Single-end
+      (undef,$multiple) = split /\t/;
+      print "Multiple alignments SE >> $multiple <<\n" if ($verbose);
+      $multiple_text = 'Sequences that did not map uniquely';
+    }
+
+    elsif($_ =~ /^Sequence pairs which were discarded because genomic sequence could not be extracted:/){ ## Paired-end
+      (undef,$no_genomic) = split /\t/;
+      print "No genomic sequence PE >> $no_genomic <<\n" if ($verbose);
+    }
+    elsif($_ =~ /^Sequences which were discarded because genomic sequence could not be extracted:/){ ## Single-end
+      (undef,$no_genomic) = split /\t/;
+      print "No genomic sequence SE>> $no_genomic <<\n" if ($verbose);
+    }
+
+    ### Context Methylation
+    elsif($_ =~ /^Total number of C/ ){
+      (undef,$total_C_count) = split /\t/;
+      print "Total number C >> $total_C_count <<\n" if ($verbose);
+    }
+
+      elsif($_ =~ /^Total methylated C\'s in CpG context:/ ){
+      (undef,$meth_CpG) = split /\t/;
+      print "meth CpG >> $meth_CpG <<\n" if ($verbose);
+    }
+    elsif($_ =~ /^Total methylated C\'s in CHG context:/ ){
+      (undef,$meth_CHG) = split /\t/;
+      print "meth CHG >> $meth_CHG <<\n" if ($verbose);
+    }
+    elsif($_ =~ /^Total methylated C\'s in CHH context:/ ){
+      (undef,$meth_CHH) = split /\t/;
+      print "meth CHH >> $meth_CHH <<\n" if ($verbose);
+    }
+    elsif($_ =~ /^Total methylated C\'s in Unknown context:/ ){
+      (undef,$meth_unknown) = split /\t/;
+      print "meth Unknown >> $meth_unknown <<\n" if ($verbose);
+    }
+
+    elsif($_ =~ /^Total unmethylated C\'s in CpG context:/ or $_ =~ /^Total C to T conversions in CpG context:/){
+      (undef,$unmeth_CpG) = split /\t/;
+      print "unmeth CpG >> $unmeth_CpG <<\n" if ($verbose);
+    }
+    elsif($_ =~ /^Total unmethylated C\'s in CHG context:/ or $_ =~ /^Total C to T conversions in CHG context:/){
+      (undef,$unmeth_CHG) = split /\t/;
+      print "unmeth CHG >> $unmeth_CHG <<\n" if ($verbose);
+    }
+    elsif($_ =~ /^Total unmethylated C\'s in CHH context:/ or $_ =~ /^Total C to T conversions in CHH context:/){
+      (undef,$unmeth_CHH) = split /\t/;
+      print "unmeth CHH >> $unmeth_CHH <<\n"if ($verbose);
+    }
+    elsif($_ =~ /^Total unmethylated C\'s in Unknown context:/ or $_ =~ /^Total C to T conversions in Unknown context:/){
+      (undef,$unmeth_unknown) = split /\t/;
+      print "unmeth Unknown >> $unmeth_unknown <<\n" if ($verbose);
+    }
+
+    elsif($_ =~ /^C methylated in CpG context:/ ){
+      (undef,$perc_CpG) = split /\t/;
+      $perc_CpG =~ s/%//;
+      print "percentage CpG >> $perc_CpG <<\n" if ($verbose);
+    }
+    elsif($_ =~ /^C methylated in CHG context:/ ){
+      (undef,$perc_CHG) = split /\t/;
+      $perc_CHG =~ s/%//;
+      print "percentage CHG >> $perc_CHG <<\n" if ($verbose);
+    }
+    elsif($_ =~ /^C methylated in CHH context:/ ){
+      (undef,$perc_CHH) = split /\t/;
+      $perc_CHH =~ s/%//;
+      print "percentage CHH >> $perc_CHH <<\n" if ($verbose);
+    }
+    elsif($_ =~ /^C methylated in Unknown context:/ ){
+      (undef,$perc_unknown) = split /\t/;
+      $perc_unknown =~ s/%//;
+      print "percentage Unknown >> $perc_unknown <<\n" if ($verbose);
+    }
+
+
+    ### Strand Origin
+
+    elsif($_ =~ /^CT\/GA\/CT:/ ){             ## Paired-end
+      (undef,$number_OT) = split /\t/;
+      print "Number OT PE>> $number_OT <<\n" if ($verbose);
+    }
+    elsif($_ =~ /^CT\/CT:/ ){                 ## Single-end
+      (undef,$number_OT) = split /\t/;
+      print "Number OT SE>> $number_OT <<\n" if ($verbose);
+    }
+
+    elsif($_ =~ /^GA\/CT\/CT:/ ){             ## Paired-end
+      (undef,$number_CTOT) = split /\t/;
+      print "Number CTOT PE >> $number_CTOT <<\n" if ($verbose);
+    }
+    elsif($_ =~ /^GA\/CT:/ ){                 ## Single-end
+      (undef,$number_CTOT) = split /\t/;
+      print "Number CTOT SE >> $number_CTOT <<\n" if ($verbose);
+    }
+
+    elsif($_ =~ /^GA\/CT\/GA:/ ){             ## Paired-end
+      (undef,$number_CTOB) = split /\t/;
+      print "Number CTOB PE >> $number_CTOB <<\n" if ($verbose);
+    }
+    elsif($_ =~ /^GA\/GA:/ ){                 ## Single-end
+      (undef,$number_CTOB) = split /\t/;
+      print "Number CTOB SE >> $number_CTOB <<\n" if ($verbose);
+    }
+
+    elsif($_ =~ /^CT\/GA\/GA:/ ){             ## Paired-end
+      (undef,$number_OB) = split /\t/;
+      print "Number OB PE >> $number_OB <<\n" if ($verbose);
+    }
+    elsif($_ =~ /^CT\/GA:/ ){                 ## Single-end
+      (undef,$number_OB) = split /\t/;
+      print "Number OB SE >> $number_OB <<\n" if ($verbose);
+    }
+
+
+  }
+
+  if (defined $unique and defined $no_aln and defined $multiple and defined $no_genomic and defined $total_seqs){
+    warn "Got all necessary information, editing HTML report\n"  if ($verbose);
+
+    ### General Alignment Stats
+    $doc =~ s/\{\{unique_seqs\}\}/$unique/g;
+    $doc =~ s/\{\{unique_seqs_text\}\}/$unique_text/g;
+
+    $doc =~ s/\{\{no_alignments\}\}/$no_aln/g;
+    $doc =~ s/\{\{no_alignments_text\}\}/$no_aln_text/g;
+
+    $doc =~ s/\{\{multiple_alignments\}\}/$multiple/g;
+    $doc =~ s/\{\{multiple_alignments_text\}\}/$multiple_text/g;
+
+    $doc =~ s/\{\{no_genomic\}\}/$no_genomic/g;
+
+    $doc =~ s/\{\{total_sequences_alignments\}\}/$total_seqs/g;
+    $doc =~ s/\{\{sequences_analysed_in_total\}\}/$total_seq_text/g;
+
+    $doc =~ s/\{\{filename\}\}/$input_filename/g;
+    $doc =~ s/\{\{bismark_version\}\}/$bismark_version/g;
+
+    ### Strand Origin
+    $doc =~ s/\{\{number_OT\}\}/$number_OT/g;
+    $doc =~ s/\{\{number_CTOT\}\}/$number_CTOT/g;
+    $doc =~ s/\{\{number_CTOB\}\}/$number_CTOB/g;
+    $doc =~ s/\{\{number_OB\}\}/$number_OB/g;
+
+    ### Context Methylation
+    $doc =~ s/\{\{total_C_count\}\}/$total_C_count/g;
+
+    unless (defined $perc_CpG){
+      $perc_CpG = 'N/A';
+    }
+    unless (defined $perc_CHG){
+      $perc_CHG = 'N/A';
+    }
+    unless (defined $perc_CHH){
+      $perc_CHH = 'N/A';
+    }
+    unless (defined $perc_unknown){
+      $perc_unknown = 'N/A';
+    }
+
+    ### Unknown sequence context, just for Bowtie 2 alignments
+    my $meth_unknown_inject;
+    my $unmeth_unknown_inject;
+    my $perc_unknown_inject;
+
+    if (defined $meth_unknown){ # if one Unknown context file is present, so should the others
+      $meth_unknown_inject = "     <tr>
+                                <th>Methylated C's in Unknown context</th>
+    				<td>$meth_unknown</td>
+    			</tr>";
+      $unmeth_unknown_inject = "     <tr>
+                                <th>Unmethylated C's in Unknown context</th>
+    				<td>$unmeth_unknown</td>
+    			</tr>";
+      $perc_unknown_inject = "     <tr>
+                                <th>Methylated C's in Unknown context</th>
+    				<td>$perc_unknown%</td>
+    			</tr>";
+    }
+    else{
+      $meth_unknown_inject = $unmeth_unknown_inject = $perc_unknown_inject = '';
+    }
+
+    ### injecting this into the table
+    $doc =~ s/\{\{meth_unknown\}\}/$meth_unknown_inject/g;
+    $doc =~ s/\{\{unmeth_unknown\}\}/$unmeth_unknown_inject/g;
+    $doc =~ s/\{\{perc_unknown\}\}/$perc_unknown_inject/g;
+
+
+    $doc =~ s/\{\{meth_CpG\}\}/$meth_CpG/g;
+    $doc =~ s/\{\{meth_CHG\}\}/$meth_CHG/g;
+    $doc =~ s/\{\{meth_CHH\}\}/$meth_CHH/g;
+
+    $doc =~ s/\{\{unmeth_CpG\}\}/$unmeth_CpG/g;
+    $doc =~ s/\{\{unmeth_CHG\}\}/$unmeth_CHG/g;
+    $doc =~ s/\{\{unmeth_CHH\}\}/$unmeth_CHH/g;
+
+    $doc =~ s/\{\{perc_CpG\}\}/$perc_CpG/g;
+    $doc =~ s/\{\{perc_CHG\}\}/$perc_CHG/g;
+    $doc =~ s/\{\{perc_CHH\}\}/$perc_CHH/g;
+
+    my ($perc_CpG_graph, $perc_CHG_graph,$perc_CHH_graph,$perc_unknown_graph);
+
+    if ($perc_CpG eq 'N/A'){
+      $perc_CpG_graph = 0; # values of 0 won't show in the graph and won't produce errors
+    }
+    else{
+      $perc_CpG_graph =  $perc_CpG;
+    }
+
+    if ($perc_CHG eq 'N/A'){
+      $perc_CHG_graph = 0; # values of 0 won't show in the graph and won't produce errors
+    }
+    else{
+      $perc_CHG_graph =  $perc_CHG;
+    }
+
+    if ($perc_CHH eq 'N/A'){
+      $perc_CHH_graph = 0; # values of 0 won't show in the graph and won't produce errors
+    }
+    else{
+      $perc_CHH_graph =  $perc_CHH;
+    }
+
+    if ($perc_unknown eq 'N/A'){
+      $perc_unknown_graph = 0; # values of 0 won't show in the graph and won't produce errors
+    }
+    else{
+      $perc_unknown_graph =  $perc_unknown;
+    }
+
+    $doc =~ s/\{\{perc_CpG_graph\}\}/$perc_CpG_graph/g;
+    $doc =~ s/\{\{perc_CHG_graph\}\}/$perc_CHG_graph/g;
+    $doc =~ s/\{\{perc_CHH_graph\}\}/$perc_CHH_graph/g;
+
+  }
+  else{
+    warn "Am I missing something?\n\n";
+  }
+
+  warn "Complete\n\n";
+  return $doc;
+}
+
+
+sub read_deduplication_report{
+
+  my ($dedup_report,$doc) = @_;
+
+  warn "Processing deduplication report $dedup_report ...\n";
+  open (DEDUP,$dedup_report) or die "Couldn't read from file $dedup_report: $!\n\n";
+
+  my $total_seqs;
+  my $dups;
+  my $diff_pos;
+  my $leftover;
+
+  while (<DEDUP>){
+    chomp;
+    if ($_ =~ /^Total number of alignments/){
+      (undef,$total_seqs) = split /\t/;
+      warn "Total number of seqs >> $total_seqs <<\n" if ($verbose);
+    }
+    elsif($_ =~ /^Total number duplicated/){
+      (undef,$dups) = split /\t/;
+      $dups =~ s/\s.*//; # just need the number, not the percentage
+      warn "Duplicated >> $dups <<\n" if ($verbose);
+    }
+    elsif($_ =~ /^Duplicated alignments were found at/){
+      (undef,$diff_pos) = split /\t/;
+      $diff_pos =~ s/\s.*//; # just need the number
+      warn "Different positions >> $diff_pos <<\n" if ($verbose);
+    }
+    elsif($_ =~ /^Total count of deduplicated leftover sequences: (\d+)/){
+      $leftover = $1;
+      warn "Leftover seqs >> $leftover <<\n" if ($verbose);
+    }
+  }
+
+  unless (defined $leftover){
+    if (defined $dups and defined $total_seqs){
+      $leftover = $total_seqs - $dups;
+    }
+  }
+
+  # Checking if we got all we need
+  if (defined $dups and defined $total_seqs and defined $diff_pos and defined $leftover){
+    # warn "Got all I need!\n\n";
+    $doc =~ s/\{\{seqs_total_duplicates\}\}/$total_seqs/g;
+    $doc =~ s/\{\{unique_alignments_duplicates\}\}/$leftover/g;
+    $doc =~ s/\{\{duplicate_alignments_duplicates\}\}/$dups/g;
+    $doc =~ s/\{\{different_positions_duplicates\}\}/$diff_pos/g;
+  }
+  else{
+    warn "Something went wrong... Use --verbose to get a clue...\n";
+    # skipping this plot entirely if values could not be extracted
+    return $doc;
+  }
+  warn "Complete\n\n";
+  return $doc;
+}
+
+
+sub read_nucleotide_coverage_report{
+    
+    my ($nuc_report,$doc) = @_;
+    
+    warn "Processing nucleotide coverage report '$nuc_report' ...\n";
+    open (NUC,$nuc_report) or die "Couldn't read from file $nuc_report: $!\n\n";
+    
+    my %nucs; # storing nucleotides and frequencies
+    my $linecount = 0;    
+    
+    while (<NUC>){
+	chomp;
+	$_ =~ s/\r//; # removing carriage returns
+	# warn "$_\n"; sleep(1);
+	my ($element,$count_obs,$observed,$count_exp,$expected,$coverage) = (split /\t/);
+	# warn "$element , $count_obs , $observed , $count_exp , $expected, $coverage\n"; sleep(1);
+	if ($linecount == 0){ # verifying that the data appears to be a Bismark nucleotide coverage report
+	    if ($observed eq 'percent sample'){
+		# warn "Fine, found '$observed'\n";
+	    }
+	    else{
+		die "Expected to find 'percent sample' as entry in line 1, column 3 but found '$observed'. This doesn't look like a Bismark nucleotide coverage report. Please respecify!\n";
+	    }
+	    
+	    if ($expected eq 'percent genomic'){
+		# warn "Fine, found '$expected'\n";
+	    }
+	    else{
+		die "Expected to find 'percent genomic' as entry in line 1, column 5 but found '$expected'. This doesn't look like a Bismark nucleotide coverage report. Please respecify!\n";
+	    }
+	}
+	else{
+	    $nucs{$element}->{obs}->{percent}  = $observed;
+	    $nucs{$element}->{exp}->{percent}  = $expected;
+	    $nucs{$element}->{obs}->{counts}   = $count_obs;
+	    $nucs{$element}->{exp}->{counts}   = $count_exp;
+	    $nucs{$element}->{obs}->{coverage} = $coverage; # coverage of that nucleotide in the sample
+	    warn "Element '$element' observed: $observed\n" if $verbose;
+	    warn "Element '$element' expected: $expected\n" if $verbose;
+	}
+
+	++$linecount;
+
+    }
+
+    # Checking if we got all we need
+    my $looksOK = 1;
+    foreach my $key (keys %nucs){ 
+	unless ( (defined $nucs{$key}->{obs}) and (defined $nucs{$key}->{exp})){
+	    $looksOK = 0;
+	}
+    }
+    
+    if ($looksOK){ 
+	warn "Got all necessary information, editing HTML report ...\n" if $verbose;
+	my $minmax = 0;
+	foreach my $key (sort {$a cmp $b} keys %nucs){
+	    my $nuc_obs = $nucs{$key}->{obs}->{percent};
+	    my $nuc_exp = $nucs{$key}->{exp}->{percent};
+	    my $counts_obs = $nucs{$key}->{obs}->{counts};
+	    my $counts_exp = $nucs{$key}->{exp}->{counts};
+	    my $cov = $nucs{$key}->{obs}->{coverage};
+
+	    # calculating log2 observed/expected
+	    my $ratio = $nuc_obs/$nuc_exp;
+	    #  my $logratio = sprintf ("%.2f",log($ratio)/log(2));
+	    # if (abs($logratio) > $minmax){
+	    # $minmax = abs($logratio);
+	    # } 
+	    warn "$key\tnuc_${key}_obs\t$nuc_obs\tnuc_${key}_exp\t$nuc_exp\tratio: $ratio\n" if $verbose;
+
+	    $doc =~ s/\{\{nuc_${key}_p_obs\}\}/$nuc_obs/g;
+	    $doc =~ s/\{\{nuc_${key}_p_exp\}\}/$nuc_exp/g;
+	    $doc =~ s/\{\{nuc_${key}_counts_obs\}\}/$counts_obs/g;
+	    $doc =~ s/\{\{nuc_${key}_counts_exp\}\}/$counts_exp/g;
+	    $doc =~ s/\{\{nuc_${key}_coverage\}\}/$cov/g;
+	}
+	# warn "Minimum/maxium ratio was: $minmax\n" if $verbose;
+	# $doc =~ s/\{\{nuc_minmax\}\}/$minmax/g;
+    }
+    else{
+	warn "Something went wrong, skipping this plot entirely... Use --verbose to get a clue...\n";
+	# skipping this plot entirely if values could not be extracted
+	return $doc;
+    }
+
+    warn "Complete\n\n";
+    return $doc;
+}
+
+
+sub read_splitting_report{
+
+  my ($splitting_report,$doc) = @_;
+
+  warn "Processing splitting report $splitting_report ...\n";
+  open (SPLIT,$splitting_report) or die "Couldn't read from file $splitting_report: $!\n\n";
+
+  my $total_seqs;
+
+  my $total_C_count;
+  my ($meth_CpG,$meth_CHG,$meth_CHH,$meth_unknown);
+  my ($unmeth_CpG,$unmeth_CHG,$unmeth_CHH,$unmeth_unknown);
+  my ($perc_CpG,$perc_CHG,$perc_CHH,$perc_unknown);
+
+  while (<SPLIT>){
+    chomp;
+
+    ### Context Methylation
+    if($_ =~ /^Total number of C/ ){
+      (undef,$total_C_count) = split /\t/;
+      print "total calls >> $total_C_count <<\n" if ($verbose);
+    }
+
+    elsif($_ =~ /^Total methylated C\'s in CpG context:/ ){
+      (undef,$meth_CpG) = split /\t/;
+      print "meth CpG >> $meth_CpG <<\n" if ($verbose);
+    }
+    elsif($_ =~ /^Total methylated C\'s in CHG context:/ ){
+      (undef,$meth_CHG) = split /\t/;
+      print "meth CHG>> $meth_CHG <<\n" if ($verbose);
+    }
+    elsif($_ =~ /^Total methylated C\'s in CHH context:/ ){
+      (undef,$meth_CHH) = split /\t/;
+      print "meth CHH >> $meth_CHH <<\n" if ($verbose);
+    }
+    elsif($_ =~ /^Total methylated C\'s in Unknown context:/ ){
+      (undef,$meth_unknown) = split /\t/;
+      print "meth Unknown >> $meth_unknown <<\n" if ($verbose);
+    }
+
+    elsif($_ =~ /^Total C to T conversions in CpG context:/ ){
+      (undef,$unmeth_CpG) = split /\t/;
+      print "unmeth CpG >> $unmeth_CpG <<\n" if ($verbose);
+    }
+    elsif($_ =~ /^Total C to T conversions in CHG context:/ ){
+      (undef,$unmeth_CHG) = split /\t/;
+      print "unmeth CHG >> $unmeth_CHG <<\n" if ($verbose);
+    }
+    elsif($_ =~ /^Total C to T conversions in CHH context:/ ){
+      (undef,$unmeth_CHH) = split /\t/;
+      print "unmeth CHH >> $unmeth_CHH <<\n" if ($verbose);
+    }
+    elsif($_ =~ /^Total C to T conversions in Unknown context:/ ){
+      (undef,$unmeth_unknown) = split /\t/;
+      print "unmeth Unknown >> $unmeth_unknown <<\n" if ($verbose);
+    } 
+
+    elsif($_ =~ /^C methylated in CpG context:/ ){
+      (undef,$perc_CpG) = split /\t/;
+      $perc_CpG =~ s/%//;
+      print "percentage CpG >> $perc_CpG <<\n" if ($verbose);
+    }
+    elsif($_ =~ /^C methylated in CHG context:/ ){
+      (undef,$perc_CHG) = split /\t/;
+      $perc_CHG =~ s/%//;
+      print "percentage CHG >> $perc_CHG <<\n" if ($verbose);
+    }
+    elsif($_ =~ /^C methylated in CHH context:/ ){
+      (undef,$perc_CHH) = split /\t/;
+      $perc_CHH =~ s/%//;
+      print "percentage CHH >> $perc_CHH <<\n" if ($verbose);
+    }
+    elsif($_ =~ /^C methylated in Unknown context:/ ){
+      (undef,$perc_unknown) = split /\t/;
+      $perc_unknown =~ s/%//;
+      print "percentage unknown >> $perc_unknown <<\n" if ($verbose);
+    }
+  }
+
+  if (defined $meth_CpG and defined $meth_CHG and defined $meth_CHH and defined $unmeth_CpG and defined $unmeth_CHG and defined $unmeth_CHH){
+    warn "Got all necessary information, editing HTML report ...\n" if ($verbose);
+
+    ### Context Methylation
+    $doc =~ s/\{\{total_C_count_splitting\}\}/$total_C_count/g;
+
+    $doc =~ s/\{\{meth_CpG_splitting\}\}/$meth_CpG/g;
+    $doc =~ s/\{\{meth_CHG_splitting\}\}/$meth_CHG/g;
+    $doc =~ s/\{\{meth_CHH_splitting\}\}/$meth_CHH/g;
+
+    $doc =~ s/\{\{unmeth_CpG_splitting\}\}/$unmeth_CpG/g;
+    $doc =~ s/\{\{unmeth_CHG_splitting\}\}/$unmeth_CHG/g;
+    $doc =~ s/\{\{unmeth_CHH_splitting\}\}/$unmeth_CHH/g;
+
+    unless (defined $perc_CpG){
+      $perc_CpG = 'N/A';
+    }
+    unless (defined $perc_CHG){
+      $perc_CHG = 'N/A';
+    }
+    unless (defined $perc_CHH){
+      $perc_CHH = 'N/A';
+    }
+    unless (defined $perc_unknown){
+      $perc_unknown = 'N/A';
+    }
+
+    ### Unknown sequence context, just for Bowtie 2 alignments
+    my $meth_unknown_inject;
+    my $unmeth_unknown_inject;
+    my $perc_unknown_inject;
+
+    if (defined $meth_unknown){ # if one Unknown context file is present, so should the others
+      $meth_unknown_inject = "     <tr>
+                                <th>Methylated C's in Unknown context</th>
+    				<td>$meth_unknown</td>
+    			</tr>";
+      $unmeth_unknown_inject = "     <tr>
+                                <th>Unmethylated C's in Unknown context</th>
+    				<td>$unmeth_unknown</td>
+    			</tr>";
+      $perc_unknown_inject = "     <tr>
+                                <th>Methylated C's in Unknown context</th>
+    				<td>$perc_unknown%</td>
+    			</tr>";
+    }
+    else{
+      $meth_unknown_inject = $unmeth_unknown_inject = $perc_unknown_inject = '';
+    }
+
+    ### injecting this into the table
+    $doc =~ s/\{\{meth_unknown_splitting\}\}/$meth_unknown_inject/g;
+    $doc =~ s/\{\{unmeth_unknown_splitting\}\}/$unmeth_unknown_inject/g;
+    $doc =~ s/\{\{perc_unknown_splitting\}\}/$perc_unknown_inject/g;
+
+    # for the graph we need to take care that there are no N/A values in the percentage fields
+    my ($perc_CpG_graph, $perc_CHG_graph,$perc_CHH_graph,$perc_unknown_graph);
+
+    if ($perc_CpG eq 'N/A'){
+      $perc_CpG_graph = 0; # values of 0 won't show in the graph and won't produce errors
+    }
+    else{
+      $perc_CpG_graph =  $perc_CpG;
+    }
+
+    if ($perc_CHG eq 'N/A'){
+      $perc_CHG_graph = 0; # values of 0 won't show in the graph and won't produce errors
+    }
+    else{
+      $perc_CHG_graph =  $perc_CHG;
+    }
+
+    if ($perc_CHH eq 'N/A'){
+      $perc_CHH_graph = 0; # values of 0 won't show in the graph and won't produce errors
+    }
+    else{
+      $perc_CHH_graph =  $perc_CHH;
+    }
+
+    if ($perc_unknown eq 'N/A'){
+      $perc_unknown_graph = 0; # values of 0 won't show in the graph and won't produce errors
+    }
+    else{
+      $perc_unknown_graph =  $perc_unknown;
+    }
+
+    $doc =~ s/\{\{perc_CpG_graph_splitting\}\}/$perc_CpG_graph/g;
+    $doc =~ s/\{\{perc_CHG_graph_splitting\}\}/$perc_CHG_graph/g;
+    $doc =~ s/\{\{perc_CHH_graph_splitting\}\}/$perc_CHH_graph/g;
+
+    $doc =~ s/\{\{perc_CpG_splitting\}\}/$perc_CpG/g;
+    $doc =~ s/\{\{perc_CHG_splitting\}\}/$perc_CHG/g;
+    $doc =~ s/\{\{perc_CHH_splitting\}\}/$perc_CHH/g;
+  }
+  else{
+    warn "Am I missing something? Try using --verbose to get a clue...\n\n";
+  }
+  warn "Complete\n\n";
+
+  return $doc;
+
+}
+
+
+sub read_mbias_report{
+
+  my ($mbias_report,$doc) = @_;
+
+  warn "Processing M-bias report $mbias_report ...\n";
+  open (MBIAS,$mbias_report) or die "Couldn't read from file $mbias_report: $!\n\n";
+
+  my %mbias_1;
+  my %mbias_2;
+
+  my $context;
+  my $read_identity;
+  my $state = 'single'; # setting this to 'single' if there is no read 2
+
+  while (<MBIAS>){
+    chomp;
+    if ($_ =~ /^(C.{2}) context/){
+      $context = $1;
+
+      if ($_ =~ /R2/){
+	$read_identity = 2;
+	$state = 'paired';
+      }
+      else{
+	$read_identity = 1;
+      }
+
+      # warn "new context is: $context\n";
+      # warn "Read identity is: Read $read_identity\n";
+    }
+    if ($_ =~ /^\d/){
+      my ($pos,$meth,$unmeth,$perc,$coverage) = (split /\t/);
+      if ($read_identity == 1){
+	push @{$mbias_1{$context}->{coverage}}, "[$pos, $coverage]";
+	push @{$mbias_1{$context}->{perc}}, "[$pos, $perc]";
+      }
+      elsif ($read_identity == 2){
+	push @{$mbias_2{$context}->{coverage}}, "[$pos, $coverage]";
+	push @{$mbias_2{$context}->{perc}}, "[$pos, $perc]";
+      }
+      else{
+	warn "read identity was unknown : '$read_identity'\n\n";
+      }
+
+      # print join (" ",$pos,$meth,$unmeth,$perc,$coverage)."\n";
+    }
+  }
+
+  # Read 1 M-bias
+  my $r1_CpG_coverage = join (',',@{$mbias_1{'CpG'}->{coverage}});
+  my $r1_CpG_perc     = join (',',@{$mbias_1{'CpG'}->{perc}});
+
+  my $r1_CHG_coverage = join (',',@{$mbias_1{'CHG'}->{coverage}});
+  my $r1_CHG_perc     = join (',',@{$mbias_1{'CHG'}->{perc}});
+
+  my $r1_CHH_coverage = join (',',@{$mbias_1{'CHH'}->{coverage}});
+  my $r1_CHH_perc     = join (',',@{$mbias_1{'CHH'}->{perc}});
+
+  $doc =~ s/\{\{CpG_total_calls_R1\}\}/$r1_CpG_coverage/g;
+  $doc =~ s/\{\{CHG_total_calls_R1\}\}/$r1_CHG_coverage/g;
+  $doc =~ s/\{\{CHH_total_calls_R1\}\}/$r1_CHH_coverage/g;
+
+  $doc =~ s/\{\{CpG_methylation_R1\}\}/$r1_CpG_perc/g;
+  $doc =~ s/\{\{CHG_methylation_R1\}\}/$r1_CHG_perc/g;
+  $doc =~ s/\{\{CHH_methylation_R1\}\}/$r1_CHH_perc/g;
+
+  # Read 2 M-bias
+  if (%mbias_2){
+      my $r2_CpG_coverage = join (',',@{$mbias_2{'CpG'}->{coverage}});
+      my $r2_CpG_perc     = join (',',@{$mbias_2{'CpG'}->{perc}});
+      
+      my $r2_CHG_coverage = join (',',@{$mbias_2{'CHG'}->{coverage}});
+      my $r2_CHG_perc     = join (',',@{$mbias_2{'CHG'}->{perc}});
+      
+      my $r2_CHH_coverage = join (',',@{$mbias_2{'CHH'}->{coverage}});
+      my $r2_CHH_perc     = join (',',@{$mbias_2{'CHH'}->{perc}});
+      
+      $doc =~ s/\{\{CpG_total_calls_R2\}\}/$r2_CpG_coverage/g;
+      $doc =~ s/\{\{CHG_total_calls_R2\}\}/$r2_CHG_coverage/g;
+      $doc =~ s/\{\{CHH_total_calls_R2\}\}/$r2_CHH_coverage/g;
+      
+      $doc =~ s/\{\{CpG_methylation_R2\}\}/$r2_CpG_perc/g;
+      $doc =~ s/\{\{CHG_methylation_R2\}\}/$r2_CHG_perc/g;
+      $doc =~ s/\{\{CHH_methylation_R2\}\}/$r2_CHH_perc/g;
+  }
+  warn "Complete\n\n";
+
+  return ($state,$doc);
+
+}
+
+
+sub read_report_template{
+  my $doc;
+  warn "Attempting to open file from: $Bin/bismark_sitrep.tpl\n\n" if ($verbose);
+  open (DOC,"$Bin/bismark_sitrep.tpl") or die $!;
+  while(<DOC>){
+    chomp;
+    $_ =~ s/\r//g;
+    $doc .= $_."\n";
+  }
+
+  close DOC or die $!;
+  return $doc;
+}
+
+
+
+sub process_commandline{
+  my $help;
+  my $output_dir;
+  my $manual_output_file;
+  my $alignment_report;
+  my $dedup_report;
+  my $splitting_report;
+  my $mbias_report;
+  my $nucleotide_coverage_report;  # stores nucleotide coverage statistics
+  my $verbose;
+
+  my $version;
+
+  my $command_line = GetOptions ('help|man'              => \$help,
+				 'dir=s'                 => \$output_dir,
+				 'o|output=s'            => \$manual_output_file,
+				 'alignment_report=s'    => \$alignment_report,
+				 'dedup_report=s'        => \$dedup_report,
+				 'splitting_report=s'    => \$splitting_report,
+				 'mbias_report=s'        => \$mbias_report,
+				 'nucleotide_report=s'   => \$nucleotide_coverage_report, 
+				 'version'               => \$version,
+				 'verbose'               => \$verbose,
+				);
+
+  ### EXIT ON ERROR if there were errors with any of the supplied options
+  unless ($command_line){
+    die "Please respecify command line options\n";
+  }
+
+  ### HELPFILE
+  if ($help){
+    print_helpfile();
+    exit;
+  }
+
+  if ($version){
+    print << "VERSION";
+
+
+                              Bismark HTML Report Module
+
+                         bismark2report version: $bismark2report_version
+                            Copyright 2010-16 Felix Krueger
+                               Babraham Bioinformatics
+                www.bioinformatics.babraham.ac.uk/projects/bismark/
+
+
+VERSION
+    exit;
+  }
+
+  ### OUTPUT DIR PATH
+  if (defined $output_dir){
+    unless ($output_dir eq ''){ # if the output dir has been passed on by the methylation extractor and is an empty string we don't want to change it
+      unless ($output_dir =~ /\/$/){
+	$output_dir =~ s/$/\//;
+      }
+    }
+  }
+  else{
+    $output_dir = '';
+  }
+
+
+  ## First we are looking for alignment reports, and then look whether there are any optional plots with the same base name
+
+  if ($alignment_report){
+    ### we only process the one alignment report (and possibly the other ones as well) that was specified
+    push @alignment_reports, $alignment_report;
+  }
+  else{ ### no alignment report specified, looking in the current directory for files ending in *E_report.txt (SE or PE)
+
+    ### looking in the current directory for report files. Less than 1 report file is not allowed
+    @alignment_reports = <*E_report.txt>;
+
+    if (scalar @alignment_reports == 0){
+      warn "Found no potential alignment reports in the current directory. Please specify a single Bismark alignment report file using the option '--alignment_report FILE'\n\n";
+      print_helpfile();
+      exit;
+    }
+    else{
+      # there are Bismark alignment report(s) in the directory
+      warn "Found ",scalar @alignment_reports," alignment reports in current directory. Now trying to figure out whether there are corresponding optional reports\n";
+    }
+  }
+
+  ### Ensuring that there isn't more than 1 file in @alignment_reports if someone manually specified an output file.
+  if (scalar @alignment_reports > 1){
+    if (defined $manual_output_file){
+      die "You cannot run bismark2report on more than 1 file while specifying a single output file. Either lose the option -o to derive the output filenames automatically, or specify a single Bismark alignment report file using the option '--alignment_report FILE'\n\n";
+    }
+  }
+
+  foreach my $aln (@alignment_reports){
+
+    # warn "Figuring things out for:\n$aln\n";
+    $aln =~ /^(.+)_(P|S)E_report.txt$/;
+    my $basename = $1;
+    # warn "using this basename:\n$basename\n\n";
+
+    ### DEDUPLICATION REPORT (optional)
+    if ($dedup_report){
+	warn "User specifified dedup report: $dedup_report\n";sleep(3);
+      if (lc$dedup_report eq 'none'){
+	push @dedup_reports, ''; # user may wish to skip this step by specifying 'none'
+      }
+      else{
+	push @dedup_reports, $dedup_report;
+      }
+    }
+    else{
+      ### looking in the current directory for a report file with the same base name
+      my @dedup_report_files = <$basename*deduplication_report.txt>;
+      # warn "Number of deduplication reports found in current directory for basename $1: ", scalar @dedup_report_files , "\n";
+
+      if (scalar @dedup_report_files > 1){
+	die "Found ", scalar @dedup_report_files ," potential deduplication reports with the same basename ($basename) in the current directory. Please specify a single report using the option '--dedup_report FILE' or otherwise provide filenames that are easier to figure out...\n\n";
+      }
+      elsif (scalar @dedup_report_files == 0){
+	push @dedup_reports, ''; # no corresponding deduplication report found
+      }
+      else{
+	# there is only a single deduplication report in the current directory, using this one
+	$dedup_report = shift @dedup_report_files;
+	push @dedup_reports, $dedup_report;
+	# warn "Going to use this dedup report: $dedup_report\n\n";
+      }
+    }
+
+    ### NUCLEOTIDE COVERAGE REPORT (optional)
+    if (defined $nucleotide_coverage_report){
+	# warn "User specified nucleotide coverage report: $nucleotide_coverage_report\n";sleep(1);
+	if (lc$nucleotide_coverage_report eq 'none'){
+	    push @nuc_reports, ''; # user may wish to skip this step by specifying 'none'
+	}
+	else{
+	    push @nuc_reports, $nucleotide_coverage_report;
+	}
+    }
+    else{
+	### looking in the current directory for a report file with the same base name
+	my @nucleotide_coverage_report_files = <$basename*nucleotide_stats.txt>;
+	# warn "Number of nucleotide coverage statistic reports found in current directory for basename $1: ", scalar @nucleotide_coverage_report_files , "\n";
+	
+	if (scalar @nucleotide_coverage_report_files > 1){
+	    die "Found ", scalar @nucleotide_coverage_report_files ," potential nucleotide coverage reports with the same basename ($basename) in the current directory. Please specify a single report using the option '--nucleotide_report FILE' or otherwise provide filenames that are easier to figure out...\n\n";
+	}
+	elsif (scalar @nucleotide_coverage_report_files == 0){
+	    # warn "Found no corresponding nucleotide coverage file\n";
+	    push @nuc_reports, ''; # no corresponding nucleotide coverge file report found
+	}
+	else{
+	    # there is only a single nucleotide coverage report in the current directory, using this one
+	    $nucleotide_coverage_report = shift @nucleotide_coverage_report_files;
+	    push @nuc_reports, $nucleotide_coverage_report;
+	    # warn "Going to use this nucleotide coverage report: $nucleotide_coverage_report\n\n"; sleep(3);
+	}
+    }
+
+    
+    ### METHYLATION EXTRACTOR SPLITTING REPORT
+    if ($splitting_report){
+      if (lc$splitting_report eq 'none'){
+	push @splitting_reports, ''; # user may wish to skip this step by specifying 'none'
+      }
+      else{
+	push @splitting_reports, $splitting_report;
+      }
+    }
+    else{
+      ### looking in the current directory for a report file with the same basename
+      my @splitting_report_files = <$basename*splitting_report.txt>;
+      # warn "Number of splitting reports found in current directory: ", scalar @splitting_report_files , "\n";
+
+      if (scalar @splitting_report_files > 1){
+	die "Found ", scalar @splitting_report_files ," potential methylation extractor splitting reports with the same basename ($basename) in the current directory. Please specify a single report using the option '--splitting_report FILE' or otherwise provide filenames that are easier to figure out...\n\n";
+      }
+      elsif (scalar @splitting_report_files == 0){
+	push @splitting_reports, ''; # no corresponding methylation extractor report found
+      }
+      else{
+	# there is only a single splitting report in the current directory, using this one
+	$splitting_report = shift @splitting_report_files;
+	push  @splitting_reports, $splitting_report;
+      }
+    }
+
+    ### M-BIAS REPORT
+    if ($mbias_report){
+      if (lc$mbias_report eq 'none'){
+	$mbias_report = ''; # user may wish to skip this step by specifying 'none'
+   	push @mbias_reports, $mbias_report;
+      }
+      else{
+	push @mbias_reports, $mbias_report;
+      }
+    }
+    else{
+      ### looking in the current directory for a single report file. More (or less) than 1 report file is not allowed
+      my @mbias_report_files = <$basename*M-bias.txt>;
+
+      # warn "Number of M-bias reports found in current directory: ", scalar @mbias_report_files , "\n";
+
+      if (scalar @mbias_report_files > 1){
+	die "Found ", scalar @mbias_report_files ," potential M-bias reports with the same basename ($basename) in the current directory. Please specify a single report using the option '--mbias_report FILE'or otherwise provide filenames that are easier to figure out automatically ...\n\n";
+      }
+      elsif (scalar @mbias_report_files == 0){
+	push @mbias_reports, '';
+      }
+      else{
+	# there is only a single M-bias report in the current directory, using this one
+	$mbias_report = shift @mbias_report_files;
+	push @mbias_reports, $mbias_report;
+      }
+    }
+    $dedup_report = $splitting_report = $mbias_report = $nucleotide_coverage_report = undef;
+  }
+
+  return ($output_dir,$verbose,$manual_output_file);
+
+}
+
+sub print_helpfile{
+  print <<EOF
+
+  SYNOPSIS:
+
+  This script uses a Bismark alignment report to generate a graphical HTML report page. Optionally, further reports of
+  the Bismark suite such as deduplication, methylation extractor splitting or M-bias reports can be specified as well. If several
+  Bismark reports are found in the same folder, a separate report will be generated for each of these, whereby the output filename
+  will be derived from the Bismark alignment report file. Bismark2report attempts to find optional reports automatically based
+  on the file basename.
+
+
+  USAGE: bismark2report [options]
+
+
+-o/--output <filename>     Name of the output file (optional). If not specified explicitly, the output filename will be derived
+                           from the Bismark alignment report file. Specifying an output filename only works if the HTML report is
+                           to be generated for a single Bismark alignment report (and potentially additional reports).
+
+--dir                      Output directory. Output is written to the current directory if not specified explicitly.
+
+
+--alignment_report FILE    If not specified explicitly, bismark2report attempts to find Bismark report file(s) in the current
+                           directory and produces a separate HTML report for each mapping report file. Based on the basename of
+                           the Bismark mapping report, bismark2report will also attempt to find the other Bismark reports (see below)
+                           for inclusion into the HTML report. Specifying a Bismark alignment report file is mandatory.
+
+--dedup_report FILE        If not specified explicitly, bismark2report attempts to find a deduplication report file with the same
+                           basename as the Bismark mapping report (generated by deduplicate_bismark) in the
+                           current working directory. Including a deduplication report is optional, and using the FILE 'none'
+                           will skip this step entirely.
+
+--splitting_report FILE    If not specified explicitly, bismark2report attempts to find a splitting report file with the same
+                           basename as the Bismark mapping report (generated by the Bismark methylation extractor) in the current
+                           working directory. Including a splitting report is optional, and using the FILE 'none' will skip this
+                           step entirely.
+
+--mbias_report FILE        If not specified explicitly, bismark2report attempts to find a single M-bias report file with the same
+                           basename as the Bismark mapping report (generated by the Bismark methylation extractor) in the current
+                           working directory. Including an M-Bias report is optional, and using the FILE 'none' will skip this step
+                           entirely.
+
+--nucleotide_report FILE   If not specified explicitly, bismark2report attempts to find a single nucleotide coverage report file
+                           with the same basename as the Bismark mapping report (generated by Bismark with the option
+                           '--nucleotide_coverage') in the current working directory. Including a nucleotide coverage statistics
+                           report is optional, and using the FILE 'none' will skip this report entirely.
+
+                           Script last modified: 13 May 2016
+
+EOF
+    ;
+  exit 1;
+}
+
author	bgruening
date	Sat, 06 May 2017 13:18:09 -0400
parents
children