diff bismark_wrapper/bismark @ 1:183de9d00131 draft

add indices.loc files
author bjoern-gruening
date Tue, 25 Dec 2012 05:52:28 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bismark_wrapper/bismark	Tue Dec 25 05:52:28 2012 -0500
@@ -0,0 +1,6615 @@
+#!/usr/bin/perl --
+use strict;
+use warnings;
+use IO::Handle;
+use Cwd;
+$|++;
+use Getopt::Long;
+
+
+## This program is Copyright (C) 2010-12, Felix Krueger (felix.krueger@babraham.ac.uk)
+
+## This program is free software: you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation, either version 3 of the License, or
+## (at your option) any later version.
+
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+
+## You should have received a copy of the GNU General Public License
+## along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+my $parent_dir = getcwd;
+my $bismark_version = 'v0.7.7';
+my $command_line = join (" ",@ARGV);
+
+### before processing the command line we will replace --solexa1.3-quals with --phred64-quals as the '.' in the option name will cause Getopt::Long to fail
+foreach my $arg (@ARGV){
+  if ($arg eq '--solexa1.3-quals'){
+    $arg = '--phred64-quals';
+  }
+}
+my @filenames;   # will be populated by processing the command line
+
+my ($genome_folder,$CT_index_basename,$GA_index_basename,$path_to_bowtie,$sequence_file_format,$bowtie_options,$directional,$unmapped,$ambiguous,$phred64,$solexa,$output_dir,$bowtie2,$vanilla,$sam_no_hd,$skip,$upto,$temp_dir) = process_command_line();
+
+my @fhs;         # stores alignment process names, bisulfite index location, bowtie filehandles and the number of times sequences produced an alignment
+my %chromosomes; # stores the chromosome sequences of the mouse genome
+my %counting;    # counting various events
+
+my $seqID_contains_tabs;
+
+foreach my $filename (@filenames){
+
+  chdir $parent_dir or die "Unable to move to initial working directory $!\n";
+  ### resetting the counting hash and fhs
+  reset_counters_and_fhs($filename);
+  $seqID_contains_tabs = 0;
+
+  ### PAIRED-END ALIGNMENTS
+  if ($filename =~ ','){
+    my ($C_to_T_infile_1,$G_to_A_infile_1); # to be made from mate1 file
+
+    $fhs[0]->{name} = 'CTread1GAread2CTgenome';
+    $fhs[1]->{name} = 'GAread1CTread2GAgenome';
+    $fhs[2]->{name} = 'GAread1CTread2CTgenome';
+    $fhs[3]->{name} = 'CTread1GAread2GAgenome';
+
+    print "\nPaired-end alignments will be performed\n",'='x39,"\n\n";
+
+    my ($filename_1,$filename_2) = (split (/,/,$filename));
+    print "The provided filenames for paired-end alignments are $filename_1 and $filename_2\n";
+
+    ### additional variables only for paired-end alignments
+    my ($C_to_T_infile_2,$G_to_A_infile_2); # to be made from mate2 file
+
+    ### FastA format
+    if ($sequence_file_format eq 'FASTA'){
+      print "Input files are in FastA format\n";
+
+      if ($directional){
+	($C_to_T_infile_1) = biTransformFastAFiles_paired_end ($filename_1,1); # also passing the read number
+	($G_to_A_infile_2) = biTransformFastAFiles_paired_end ($filename_2,2);
+
+	$fhs[0]->{inputfile_1} = $C_to_T_infile_1;
+	$fhs[0]->{inputfile_2} = $G_to_A_infile_2;
+	$fhs[1]->{inputfile_1} = undef;
+	$fhs[1]->{inputfile_2} = undef;
+	$fhs[2]->{inputfile_1} = undef;
+	$fhs[2]->{inputfile_2} = undef;
+	$fhs[3]->{inputfile_1} = $C_to_T_infile_1;
+	$fhs[3]->{inputfile_2} = $G_to_A_infile_2;
+      }
+      else{
+	($C_to_T_infile_1,$G_to_A_infile_1) = biTransformFastAFiles_paired_end ($filename_1,1); # also passing the read number
+	($C_to_T_infile_2,$G_to_A_infile_2) = biTransformFastAFiles_paired_end ($filename_2,2);
+
+	$fhs[0]->{inputfile_1} = $C_to_T_infile_1;
+	$fhs[0]->{inputfile_2} = $G_to_A_infile_2;
+	$fhs[1]->{inputfile_1} = $G_to_A_infile_1;
+	$fhs[1]->{inputfile_2} = $C_to_T_infile_2;
+	$fhs[2]->{inputfile_1} = $G_to_A_infile_1;
+	$fhs[2]->{inputfile_2} = $C_to_T_infile_2;
+	$fhs[3]->{inputfile_1} = $C_to_T_infile_1;
+	$fhs[3]->{inputfile_2} = $G_to_A_infile_2;
+      }
+
+      if ($bowtie2){
+	paired_end_align_fragments_to_bisulfite_genome_fastA_bowtie2 ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
+      }
+      else{
+	paired_end_align_fragments_to_bisulfite_genome_fastA ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
+      }
+    }
+
+    ### FastQ format
+    else{
+      print "Input files are in FastQ format\n";
+      if ($directional){
+	($C_to_T_infile_1) = biTransformFastQFiles_paired_end ($filename_1,1); # also passing the read number
+	($G_to_A_infile_2) = biTransformFastQFiles_paired_end ($filename_2,2);
+	
+	$fhs[0]->{inputfile_1} = $C_to_T_infile_1;
+	$fhs[0]->{inputfile_2} = $G_to_A_infile_2;
+	$fhs[1]->{inputfile_1} = undef;
+	$fhs[1]->{inputfile_2} = undef;
+	$fhs[2]->{inputfile_1} = undef;
+	$fhs[2]->{inputfile_2} = undef;
+	$fhs[3]->{inputfile_1} = $C_to_T_infile_1;
+	$fhs[3]->{inputfile_2} = $G_to_A_infile_2;
+      }
+      else{
+	($C_to_T_infile_1,$G_to_A_infile_1) = biTransformFastQFiles_paired_end ($filename_1,1); # also passing the read number
+	($C_to_T_infile_2,$G_to_A_infile_2) = biTransformFastQFiles_paired_end ($filename_2,2);
+
+	$fhs[0]->{inputfile_1} = $C_to_T_infile_1;
+	$fhs[0]->{inputfile_2} = $G_to_A_infile_2;
+	$fhs[1]->{inputfile_1} = $G_to_A_infile_1;
+	$fhs[1]->{inputfile_2} = $C_to_T_infile_2;
+	$fhs[2]->{inputfile_1} = $G_to_A_infile_1;
+	$fhs[2]->{inputfile_2} = $C_to_T_infile_2;
+	$fhs[3]->{inputfile_1} = $C_to_T_infile_1;
+	$fhs[3]->{inputfile_2} = $G_to_A_infile_2;
+      }
+
+      if ($bowtie2){
+	paired_end_align_fragments_to_bisulfite_genome_fastQ_bowtie2 ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
+      }
+      else{
+	paired_end_align_fragments_to_bisulfite_genome_fastQ ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);	
+      }
+    }
+    start_methylation_call_procedure_paired_ends($filename_1,$filename_2,$C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
+  }
+
+  ### Else we are performing SINGLE-END ALIGNMENTS
+  else{
+    print "\nSingle-end alignments will be performed\n",'='x39,"\n\n";
+    ### Initialising bisulfite conversion filenames
+    my ($C_to_T_infile,$G_to_A_infile);
+
+
+    ### FastA format
+    if ($sequence_file_format eq 'FASTA'){
+      print "Inut file is in FastA format\n";
+      if ($directional){
+	($C_to_T_infile) = biTransformFastAFiles ($filename);
+	$fhs[0]->{inputfile} = $fhs[1]->{inputfile} = $C_to_T_infile;
+      }
+      else{
+	($C_to_T_infile,$G_to_A_infile) = biTransformFastAFiles ($filename);
+	$fhs[0]->{inputfile} = $fhs[1]->{inputfile} = $C_to_T_infile;
+	$fhs[2]->{inputfile} = $fhs[3]->{inputfile} = $G_to_A_infile;
+      }
+
+      ### Creating 4 different bowtie filehandles and storing the first entry
+      if ($bowtie2){
+	single_end_align_fragments_to_bisulfite_genome_fastA_bowtie2 ($C_to_T_infile,$G_to_A_infile);
+      }
+      else{
+	single_end_align_fragments_to_bisulfite_genome_fastA ($C_to_T_infile,$G_to_A_infile);
+      }
+    }
+
+    ## FastQ format
+    else{
+      print "Input file is in FastQ format\n";
+      if ($directional){
+	($C_to_T_infile) = biTransformFastQFiles ($filename);
+	$fhs[0]->{inputfile} = $fhs[1]->{inputfile} = $C_to_T_infile;
+      }
+      else{
+	($C_to_T_infile,$G_to_A_infile) = biTransformFastQFiles ($filename);
+	$fhs[0]->{inputfile} = $fhs[1]->{inputfile} = $C_to_T_infile;
+	$fhs[2]->{inputfile} = $fhs[3]->{inputfile} = $G_to_A_infile;
+      }
+
+      ### Creating 4 different bowtie filehandles and storing the first entry
+      if ($bowtie2){
+	single_end_align_fragments_to_bisulfite_genome_fastQ_bowtie2 ($C_to_T_infile,$G_to_A_infile);
+      }
+      else{
+	single_end_align_fragments_to_bisulfite_genome_fastQ ($C_to_T_infile,$G_to_A_infile);
+      }
+    }
+
+    start_methylation_call_procedure_single_ends($filename,$C_to_T_infile,$G_to_A_infile);
+
+  }
+}
+
+sub start_methylation_call_procedure_single_ends {
+  my ($sequence_file,$C_to_T_infile,$G_to_A_infile) = @_;
+  my ($dir,$filename);
+
+  if ($sequence_file =~ /\//){
+    ($dir,$filename) = $sequence_file =~ m/(.*\/)(.*)$/;
+  }
+  else{
+    $filename = $sequence_file;
+  }
+
+  ### printing all alignments to a results file
+  my $outfile = $filename;
+
+  if ($bowtie2){ # SAM format is the default for Bowtie 2
+    $outfile =~ s/$/_bt2_bismark.sam/;
+  }
+  elsif ($vanilla){ # vanilla custom Bismark output single-end output (like Bismark versions 0.5.X)
+    $outfile =~ s/$/_bismark.txt/;
+  }
+  else{ # SAM is the default output
+    $outfile =~ s/$/_bismark.sam/;
+  }
+  print "Writing bisulfite mapping results to $output_dir$outfile\n\n";
+  open (OUT,'>',"$output_dir$outfile") or die "Failed to write to $outfile: $!\n";
+  if ($vanilla){
+    print OUT "Bismark version: $bismark_version\n";
+  }
+
+  ### printing alignment and methylation call summary to a report file
+  my $reportfile = $filename;
+  if ($bowtie2){
+    $reportfile =~ s/$/_bt2_Bismark_mapping_report.txt/;
+  }
+  else{
+    $reportfile =~ s/$/_Bismark_mapping_report.txt/;
+  }
+
+  open (REPORT,'>',"$output_dir$reportfile") or die "Failed to write to $reportfile: $!\n";
+  print REPORT "Bismark report for: $sequence_file (version: $bismark_version)\n";
+
+  if ($unmapped){
+    my $unmapped_file = $filename;
+    $unmapped_file =~ s/$/_unmapped_reads.txt/;
+    open (UNMAPPED,'>',"$output_dir$unmapped_file") or die "Failed to write to $unmapped_file: $!\n";
+    print "Unmapped sequences will be written to $output_dir$unmapped_file\n";
+  }
+  if ($ambiguous){
+    my $ambiguous_file = $filename;
+    $ambiguous_file =~ s/$/_ambiguous_reads.txt/;
+    open (AMBIG,'>',"$output_dir$ambiguous_file") or die "Failed to write to $ambiguous_file: $!\n";
+    print "Ambiguously mapping sequences will be written to $output_dir$ambiguous_file\n";
+  }
+
+  if ($directional){
+    print REPORT "Option '--directional' specified: alignments to complementary strands will be ignored (i.e. not performed!)\n";
+  }
+  print REPORT "Bowtie was run against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+
+
+  ### if 2 or more files are provided we can hold the genome in memory and don't need to read it in a second time
+  unless (%chromosomes){
+    my $cwd = getcwd; # storing the path of the current working directory
+    print "Current working directory is: $cwd\n\n";
+    read_genome_into_memory($cwd);
+  }
+
+  unless ($vanilla or $sam_no_hd){
+    generate_SAM_header();
+  }
+
+  ### Input file is in FastA format
+  if ($sequence_file_format eq 'FASTA'){
+    process_single_end_fastA_file_for_methylation_call($sequence_file,$C_to_T_infile,$G_to_A_infile);
+  }
+  ### Input file is in FastQ format
+  else{
+    process_single_end_fastQ_file_for_methylation_call($sequence_file,$C_to_T_infile,$G_to_A_infile);
+  }
+}
+
+sub start_methylation_call_procedure_paired_ends {
+  my ($sequence_file_1,$sequence_file_2,$C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;
+
+  my ($dir_1,$filename_1);
+
+  if ($sequence_file_1 =~ /\//){
+    ($dir_1,$filename_1) = $sequence_file_1 =~ m/(.*\/)(.*)$/;
+  }
+  else{
+    $filename_1 = $sequence_file_1;
+  }
+
+  my ($dir_2,$filename_2);
+
+  if  ($sequence_file_2 =~ /\//){
+    ($dir_2,$filename_2) = $sequence_file_2 =~ m/(.*\/)(.*)$/;
+  }
+  else{
+    $filename_2 = $sequence_file_2;
+  }
+
+  ### printing all alignments to a results file
+  my $outfile = $filename_1;
+  if ($bowtie2){ # SAM format is the default Bowtie 2 output
+    $outfile =~ s/$/_bismark_bt2_pe.sam/;
+  }
+  elsif ($vanilla){ # vanilla custom Bismark paired-end output (like Bismark versions 0.5.X)
+    $outfile =~ s/$/_bismark_pe.txt/;
+  }
+  else{ # SAM format is the default Bowtie 1 output
+    $outfile =~ s/$/_bismark_pe.sam/;
+  }
+
+  print "Writing bisulfite mapping results to $outfile\n\n";
+  open (OUT,'>',"$output_dir$outfile") or die "Failed to write to $outfile: $!";
+  if ($vanilla){
+    print OUT "Bismark version: $bismark_version\n";
+  }
+
+  ### printing alignment and methylation call summary to a report file
+  my $reportfile = $filename_1;
+  if ($bowtie2){
+    $reportfile =~ s/$/_Bismark_bt2_paired-end_mapping_report.txt/;
+  }
+  else{
+    $reportfile =~ s/$/_Bismark_paired-end_mapping_report.txt/;
+  }
+
+  open (REPORT,'>',"$output_dir$reportfile") or die "Failed to write to $reportfile: $!\n";
+  print REPORT "Bismark report for: $sequence_file_1 and $sequence_file_2 (version: $bismark_version)\n";
+  print REPORT "Bowtie was run against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+
+
+  ### Unmapped read output
+  if ($unmapped){
+    my $unmapped_1 = $filename_1;
+    my $unmapped_2 = $filename_2;
+    $unmapped_1 =~ s/$/_unmapped_reads_1.txt/;
+    $unmapped_2 =~ s/$/_unmapped_reads_2.txt/;
+    open (UNMAPPED_1,'>',"$output_dir$unmapped_1") or die "Failed to write to $unmapped_1: $!\n";
+    open (UNMAPPED_2,'>',"$output_dir$unmapped_2") or die "Failed to write to $unmapped_2: $!\n";
+    print "Unmapped sequences will be written to $unmapped_1 and $unmapped_2\n";
+  }
+
+  if ($ambiguous){
+    my $amb_1 = $filename_1;
+    my $amb_2 = $filename_2;
+    $amb_1 =~ s/$/_ambiguous_reads_1.txt/;
+    $amb_2 =~ s/$/_ambiguous_reads_2.txt/;
+    open (AMBIG_1,'>',"$output_dir$amb_1") or die "Failed to write to $amb_1: $!\n";
+    open (AMBIG_2,'>',"$output_dir$amb_2") or die "Failed to write to $amb_2: $!\n";
+    print "Ambiguously mapping sequences will be written to $amb_1 and $amb_2\n";
+  }
+
+  if ($directional){
+    print REPORT "Option '--directional' specified: alignments to complementary strands will be ignored (i.e. not performed)\n";
+  }
+
+  ### if 2 or more files are provided we might still hold the genome in memory and don't need to read it in a second time
+  unless (%chromosomes){
+    my $cwd = getcwd; # storing the path of the current working directory
+    print "Current working directory is: $cwd\n\n";
+    read_genome_into_memory($cwd);
+  }
+
+  unless ($vanilla or $sam_no_hd){
+    generate_SAM_header();
+  }
+
+  ### Input files are in FastA format
+  if ($sequence_file_format eq 'FASTA'){
+    process_fastA_files_for_paired_end_methylation_calls($sequence_file_1,$sequence_file_2,$C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
+  }
+  ### Input files are in FastQ format
+  else{
+    process_fastQ_files_for_paired_end_methylation_calls($sequence_file_1,$sequence_file_2,$C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
+  }
+}
+
+sub print_final_analysis_report_single_end{
+  my ($C_to_T_infile,$G_to_A_infile) = @_;
+  ### All sequences from the original sequence file have been analysed now
+  ### deleting temporary C->T or G->A infiles
+
+  if ($directional){
+    my $deletion_successful =  unlink "$temp_dir$C_to_T_infile";
+    if ($deletion_successful == 1){
+      warn "\nSuccessfully deleted the temporary file $temp_dir$C_to_T_infile\n\n";
+    }
+    else{
+      warn "Could not delete temporary file $C_to_T_infile properly $!\n";
+    }
+  }
+
+  else{
+    my $deletion_successful =  unlink "$temp_dir$C_to_T_infile","$temp_dir$G_to_A_infile";
+    if ($deletion_successful == 2){
+      warn "\nSuccessfully deleted the temporary files $temp_dir$C_to_T_infile and $temp_dir$G_to_A_infile\n\n";
+    }
+    else{
+      warn "Could not delete temporary files properly $!\n";
+    }
+  }
+
+  ### printing a final report for the alignment procedure
+  print REPORT "Final Alignment report\n",'='x22,"\n";
+  print "Final Alignment report\n",'='x22,"\n";
+  #  foreach my $index (0..$#fhs){
+  #    print "$fhs[$index]->{name}\n";
+  #    print "$fhs[$index]->{seen}\talignments on the correct strand in total\n";
+  #    print "$fhs[$index]->{wrong_strand}\talignments were discarded (nonsensical alignments)\n\n";
+  #  }
+
+  ### printing a final report for the methylation call procedure
+  warn "Sequences analysed in total:\t$counting{sequences_count}\n";
+  print REPORT "Sequences analysed in total:\t$counting{sequences_count}\n";
+  my $percent_alignable_sequences;
+
+  if ($counting{sequences_count} == 0){
+    $percent_alignable_sequences = 0;
+  }
+  else{
+    $percent_alignable_sequences = sprintf ("%.1f",$counting{unique_best_alignment_count}*100/$counting{sequences_count});
+  }
+
+  warn "Number of alignments with a unique best hit from the different alignments:\t$counting{unique_best_alignment_count}\nMapping efficiency:\t${percent_alignable_sequences}%\n\n";
+  print REPORT "Number of alignments with a unique best hit from the different alignments:\t$counting{unique_best_alignment_count}\nMapping efficiency:\t${percent_alignable_sequences}%\n";
+
+  ### percentage of low complexity reads overruled because of low complexity (thereby creating a bias for highly methylated reads),
+  ### only calculating the percentage if there were any overruled alignments
+  if ($counting{low_complexity_alignments_overruled_count}){
+    my $percent_overruled_low_complexity_alignments = sprintf ("%.1f",$counting{low_complexity_alignments_overruled_count}*100/$counting{sequences_count});
+    #   print REPORT "Number of low complexity alignments which were overruled to have a unique best hit rather than discarding them:\t$counting{low_complexity_alignments_overruled_count}\t(${percent_overruled_low_complexity_alignments}%)\n";
+  }
+
+  print "Sequences with no alignments under any condition:\t$counting{no_single_alignment_found}\n";
+  print "Sequences did not map uniquely:\t$counting{unsuitable_sequence_count}\n";
+  print "Sequences which were discarded because genomic sequence could not be extracted:\t$counting{genomic_sequence_could_not_be_extracted_count}\n\n";
+  print "Number of sequences with unique best (first) alignment came from the bowtie output:\n";
+  print join ("\n","CT/CT:\t$counting{CT_CT_count}\t((converted) top strand)","CT/GA:\t$counting{CT_GA_count}\t((converted) bottom strand)","GA/CT:\t$counting{GA_CT_count}\t(complementary to (converted) top strand)","GA/GA:\t$counting{GA_GA_count}\t(complementary to (converted) bottom strand)"),"\n\n";
+
+  print REPORT "Sequences with no alignments under any condition:\t$counting{no_single_alignment_found}\n";
+  print REPORT "Sequences did not map uniquely:\t$counting{unsuitable_sequence_count}\n";
+  print REPORT "Sequences which were discarded because genomic sequence could not be extracted:\t$counting{genomic_sequence_could_not_be_extracted_count}\n\n";
+  print REPORT "Number of sequences with unique best (first) alignment came from the bowtie output:\n";
+  print REPORT join ("\n","CT/CT:\t$counting{CT_CT_count}\t((converted) top strand)","CT/GA:\t$counting{CT_GA_count}\t((converted) bottom strand)","GA/CT:\t$counting{GA_CT_count}\t(complementary to (converted) top strand)","GA/GA:\t$counting{GA_GA_count}\t(complementary to (converted) bottom strand)"),"\n\n";
+
+  if ($directional){
+    print "Number of alignments to (merely theoretical) complementary strands being rejected in total:\t$counting{alignments_rejected_count}\n\n";
+    print REPORT "Number of alignments to (merely theoretical) complementary strands being rejected in total:\t$counting{alignments_rejected_count}\n\n";
+  }
+
+  ### detailed information about Cs analysed
+  warn "Final Cytosine Methylation Report\n",'='x33,"\n";
+  my $total_number_of_C = $counting{total_meCHH_count}+$counting{total_meCHG_count}+$counting{total_meCpG_count}+$counting{total_unmethylated_CHH_count}+$counting{total_unmethylated_CHG_count}+$counting{total_unmethylated_CpG_count};
+  warn "Total number of C's analysed:\t$total_number_of_C\n\n";
+  warn "Total methylated C's in CpG context:\t$counting{total_meCpG_count}\n";
+  warn "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
+  warn "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n\n";
+  warn "Total C to T conversions in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
+  warn "Total C to T conversions in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
+  warn "Total C to T conversions in CHH context:\t$counting{total_unmethylated_CHH_count}\n\n";
+
+  print REPORT "Final Cytosine Methylation Report\n",'='x33,"\n";
+  print REPORT "Total number of C's analysed:\t$total_number_of_C\n\n";
+  print REPORT "Total methylated C's in CpG context:\t $counting{total_meCpG_count}\n";
+  print REPORT "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
+  print REPORT "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n\n";
+  print REPORT "Total C to T conversions in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
+  print REPORT "Total C to T conversions in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
+  print REPORT "Total C to T conversions in CHH context:\t$counting{total_unmethylated_CHH_count}\n\n";
+
+  my $percent_meCHG;
+  if (($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}) > 0){
+    $percent_meCHG = sprintf("%.1f",100*$counting{total_meCHG_count}/($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}));
+  }
+
+  my $percent_meCHH;
+  if (($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}) > 0){
+    $percent_meCHH = sprintf("%.1f",100*$counting{total_meCHH_count}/($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}));
+  }
+
+  my $percent_meCpG;
+  if (($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}) > 0){
+    $percent_meCpG = sprintf("%.1f",100*$counting{total_meCpG_count}/($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}));
+  }
+
+  ### printing methylated CpG percentage if applicable
+  if ($percent_meCpG){
+    warn "C methylated in CpG context:\t${percent_meCpG}%\n";
+    print REPORT "C methylated in CpG context:\t${percent_meCpG}%\n";
+  }
+  else{
+    warn "Can't determine percentage of methylated Cs in CpG context if value was 0\n";
+    print REPORT "Can't determine percentage of methylated Cs in CpG context if value was 0\n";
+  }
+
+  ### printing methylated C percentage (CHG context) if applicable
+  if ($percent_meCHG){
+    warn "C methylated in CHG context:\t${percent_meCHG}%\n";
+    print REPORT "C methylated in CHG context:\t${percent_meCHG}%\n";
+  }
+  else{
+    warn "Can't determine percentage of methylated Cs in CHG context if value was 0\n";
+    print REPORT "Can't determine percentage of methylated Cs in CHG context if value was 0\n";
+  }
+
+  ### printing methylated C percentage (CHH context) if applicable
+  if ($percent_meCHH){
+    warn "C methylated in CHH context:\t${percent_meCHH}%\n\n\n";
+    print REPORT "C methylated in CHH context:\t${percent_meCHH}%\n\n\n";
+  }
+  else{
+    warn "Can't determine percentage of methylated Cs in CHH context if value was 0\n\n\n";
+    print REPORT "Can't determine percentage of methylated Cs in CHH context if value was 0\n\n\n";
+  }
+
+  if ($seqID_contains_tabs){
+    warn "The sequence IDs in the provided file contain tab-stops which might prevent sequence alignments. If this happened, please replace all tab characters within the seqID field with spaces before running Bismark.\n\n";
+    print REPORT "The sequence IDs in the provided file contain tab-stops which might prevent sequence alignments. If this happened, please replace all tab characters within the seqID field with spaces before running Bismark.\n\n";
+  }
+}
+
+sub print_final_analysis_report_paired_ends{
+  my ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;
+  ### All sequences from the original sequence file have been analysed now, therefore deleting temporary C->T or G->A infiles
+  if ($directional){
+    my $deletion_successful =  unlink "$temp_dir$C_to_T_infile_1","$temp_dir$G_to_A_infile_2";
+    if ($deletion_successful == 2){
+      warn "\nSuccessfully deleted the temporary files $temp_dir$C_to_T_infile_1 and $temp_dir$G_to_A_infile_2\n\n";
+    }
+    else{
+      warn "Could not delete temporary files $temp_dir$C_to_T_infile_1 and $temp_dir$G_to_A_infile_2 properly: $!\n";
+    }
+  }
+  else{
+    my $deletion_successful =  unlink "$temp_dir$C_to_T_infile_1","$temp_dir$G_to_A_infile_1","$temp_dir$C_to_T_infile_2","$temp_dir$G_to_A_infile_2";
+    if ($deletion_successful == 4){
+      warn "\nSuccessfully deleted the temporary files $temp_dir$C_to_T_infile_1, $temp_dir$G_to_A_infile_1, $temp_dir$C_to_T_infile_2 and $temp_dir$G_to_A_infile_2\n\n";
+    }
+    else{
+      warn "Could not delete temporary files properly: $!\n";
+    }
+  }
+
+  ### printing a final report for the alignment procedure
+  warn "Final Alignment report\n",'='x22,"\n";
+  print REPORT "Final Alignment report\n",'='x22,"\n";
+  #  foreach my $index (0..$#fhs){
+  #    print "$fhs[$index]->{name}\n";
+  #    print "$fhs[$index]->{seen}\talignments on the correct strand in total\n";
+  #    print "$fhs[$index]->{wrong_strand}\talignments were discarded (nonsensical alignments)\n\n";
+  #  }
+
+  ### printing a final report for the methylation call procedure
+  warn "Sequence pairs analysed in total:\t$counting{sequences_count}\n";
+  print REPORT "Sequence pairs analysed in total:\t$counting{sequences_count}\n";
+
+  my $percent_alignable_sequence_pairs;
+  if ($counting{sequences_count} == 0){
+    $percent_alignable_sequence_pairs = 0;
+  }
+  else{
+    $percent_alignable_sequence_pairs = sprintf ("%.1f",$counting{unique_best_alignment_count}*100/$counting{sequences_count});
+  }
+  print "Number of paired-end alignments with a unique best hit:\t$counting{unique_best_alignment_count}\nMapping efficiency:\t${percent_alignable_sequence_pairs}%\n\n";
+  print REPORT "Number of paired-end alignments with a unique best hit:\t$counting{unique_best_alignment_count}\nMapping efficiency:\t${percent_alignable_sequence_pairs}% \n";
+
+  print "Sequence pairs with no alignments under any condition:\t$counting{no_single_alignment_found}\n";
+  print "Sequence pairs did not map uniquely:\t$counting{unsuitable_sequence_count}\n";
+  print "Sequence pairs which were discarded because genomic sequence could not be extracted:\t$counting{genomic_sequence_could_not_be_extracted_count}\n\n";
+  print "Number of sequence pairs with unique best (first) alignment came from the bowtie output:\n";
+  print join ("\n","CT/GA/CT:\t$counting{CT_GA_CT_count}\t((converted) top strand)","GA/CT/CT:\t$counting{GA_CT_CT_count}\t(complementary to (converted) top strand)","GA/CT/GA:\t$counting{GA_CT_GA_count}\t(complementary to (converted) bottom strand)","CT/GA/GA:\t$counting{CT_GA_GA_count}\t((converted) bottom strand)"),"\n\n";
+
+
+  print REPORT "Sequence pairs with no alignments under any condition:\t$counting{no_single_alignment_found}\n";
+  print REPORT "Sequence pairs did not map uniquely:\t$counting{unsuitable_sequence_count}\n";
+  print REPORT "Sequence pairs which were discarded because genomic sequence could not be extracted:\t$counting{genomic_sequence_could_not_be_extracted_count}\n\n";
+  print REPORT "Number of sequence pairs with unique best (first) alignment came from the bowtie output:\n";
+  print REPORT join ("\n","CT/GA/CT:\t$counting{CT_GA_CT_count}\t((converted) top strand)","GA/CT/CT:\t$counting{GA_CT_CT_count}\t(complementary to (converted) top strand)","GA/CT/GA:\t$counting{GA_CT_GA_count}\t(complementary to (converted) bottom strand)","CT/GA/GA:\t$counting{CT_GA_GA_count}\t((converted) bottom strand)"),"\n\n";
+  ### detailed information about Cs analysed
+
+  if ($directional){
+    print "Number of alignments to (merely theoretical) complementary strands being rejected in total:\t$counting{alignments_rejected_count}\n\n";
+    print REPORT "Number of alignments to (merely theoretical) complementary strands being rejected in total:\t$counting{alignments_rejected_count}\n\n";
+  }
+
+  warn "Final Cytosine Methylation Report\n",'='x33,"\n";
+  print REPORT "Final Cytosine Methylation Report\n",'='x33,"\n";
+
+  my $total_number_of_C = $counting{total_meCHG_count}+ $counting{total_meCHH_count}+$counting{total_meCpG_count}+$counting{total_unmethylated_CHG_count}+$counting{total_unmethylated_CHH_count}+$counting{total_unmethylated_CpG_count};
+  warn "Total number of C's analysed:\t$total_number_of_C\n\n";
+  warn "Total methylated C's in CpG context:\t$counting{total_meCpG_count}\n";
+  warn "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
+  warn "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n\n";
+  warn "Total C to T conversions in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
+  warn "Total C to T conversions in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
+  warn "Total C to T conversions in CHH context:\t$counting{total_unmethylated_CHH_count}\n\n";
+
+  print REPORT "Total number of C's analysed:\t$total_number_of_C\n\n";
+  print REPORT "Total methylated C's in CpG context:\t$counting{total_meCpG_count}\n";
+  print REPORT "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
+  print REPORT "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n\n";
+  print REPORT "Total C to T conversions in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
+  print REPORT "Total C to T conversions in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
+  print REPORT "Total C to T conversions in CHH context:\t$counting{total_unmethylated_CHH_count}\n\n";
+
+  my $percent_meCHG;
+  if (($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}) > 0){
+    $percent_meCHG = sprintf("%.1f",100*$counting{total_meCHG_count}/($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}));
+  }
+
+  my $percent_meCHH;
+  if (($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}) > 0){
+    $percent_meCHH = sprintf("%.1f",100*$counting{total_meCHH_count}/($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}));
+  }
+
+  my $percent_meCpG;
+  if (($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}) > 0){
+    $percent_meCpG = sprintf("%.1f",100*$counting{total_meCpG_count}/($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}));
+  }
+
+  ### printing methylated CpG percentage if applicable
+  if ($percent_meCpG){
+    warn "C methylated in CpG context:\t${percent_meCpG}%\n";
+    print REPORT "C methylated in CpG context:\t${percent_meCpG}%\n";
+  }
+  else{
+    warn "Can't determine percentage of methylated Cs in CpG context if value was 0\n";
+    print REPORT "Can't determine percentage of methylated Cs in CpG context if value was 0\n";
+  }
+
+  ### printing methylated C percentage in CHG context if applicable
+  if ($percent_meCHG){
+    warn "C methylated in CHG context:\t${percent_meCHG}%\n";
+    print REPORT "C methylated in CHG context:\t${percent_meCHG}%\n";
+  }
+  else{
+    warn "Can't determine percentage of methylated Cs in CHG context if value was 0\n";
+    print REPORT "Can't determine percentage of methylated Cs in CHG context if value was 0\n";
+  }
+
+  ### printing methylated C percentage in CHH context if applicable
+  if ($percent_meCHH){
+    warn "C methylated in CHH context:\t${percent_meCHH}%\n\n\n";
+    print REPORT "C methylated in CHH context:\t${percent_meCHH}%\n\n\n";
+  }
+  else{
+    warn "Can't determine percentage of methylated Cs in CHH context if value was 0\n\n\n";
+    print REPORT "Can't determine percentage of methylated Cs in CHH context if value was 0\n\n\n";
+  }
+
+}
+
+sub process_single_end_fastA_file_for_methylation_call{
+  my ($sequence_file,$C_to_T_infile,$G_to_A_infile) = @_;
+  ### this is a FastA sequence file; we need the actual sequence to compare it against the genomic sequence in order to make a methylation call.
+  ### Now reading in the sequence file sequence by sequence and see if the current sequence was mapped to one (or both) of the converted genomes in either
+  ### the C->T or G->A version
+
+  ### gzipped version of the infile
+  if ($sequence_file =~ /\.gz$/){
+    open (IN,"zcat $sequence_file |") or die $!;
+  }
+  else{
+    open (IN,$sequence_file) or die $!;
+  }
+
+  my $count = 0;
+
+  warn "\nReading in the sequence file $sequence_file\n";
+  while (1) {
+    # last if ($counting{sequences_count} > 100);
+    my $identifier = <IN>;
+    my $sequence = <IN>;
+    last unless ($identifier and $sequence);
+
+    $identifier = fix_IDs($identifier); # this is to avoid problems with truncated read ID when they contain white spaces
+
+    ++$count;
+
+    if ($skip){
+      next unless ($count > $skip);
+    }
+    if ($upto){
+      last if ($count > $upto);
+    }
+
+    $counting{sequences_count}++;
+    if ($counting{sequences_count}%100000==0) {
+      warn "Processed $counting{sequences_count} sequences so far\n";
+    }
+    chomp $sequence;
+    chomp $identifier;
+
+    $identifier =~ s/^>//; # deletes the > at the beginning of FastA headers
+
+    my $return;
+    if ($bowtie2){
+      $return = check_bowtie_results_single_end_bowtie2 (uc$sequence,$identifier);
+    }
+    else{
+      $return = check_bowtie_results_single_end(uc$sequence,$identifier); # default Bowtie 1
+    }
+
+    unless ($return){
+      $return = 0;
+    }
+
+    # print the sequence to ambiguous.out if --ambiguous was specified
+    if ($ambiguous and $return == 2){
+      print AMBIG ">$identifier\n";	
+      print AMBIG "$sequence\n";
+    }
+
+    # print the sequence to <unmapped.out> file if --un was specified
+    elsif ($unmapped and $return == 1){
+      print UNMAPPED ">$identifier\n";	
+      print UNMAPPED "$sequence\n";
+    }
+  }
+  print "Processed $counting{sequences_count} sequences in total\n\n";
+
+  print_final_analysis_report_single_end($C_to_T_infile,$G_to_A_infile);
+
+}
+
+sub process_single_end_fastQ_file_for_methylation_call{
+  my ($sequence_file,$C_to_T_infile,$G_to_A_infile) = @_;
+  ### this is the Illumina sequence file; we need the actual sequence to compare it against the genomic sequence in order to make a methylation call.
+  ### Now reading in the sequence file sequence by sequence and see if the current sequence was mapped to one (or both) of the converted genomes in either
+  ### the C->T or G->A version
+
+  ### gzipped version of the infile
+  if ($sequence_file =~ /\.gz$/){
+    open (IN,"zcat $sequence_file |") or die $!;
+  }
+  else{
+    open (IN,$sequence_file) or die $!;
+  }
+
+  my $count = 0;
+
+  warn "\nReading in the sequence file $sequence_file\n";
+  while (1) {
+    my $identifier = <IN>;
+    my $sequence = <IN>;
+    my $identifier_2 = <IN>;
+    my $quality_value = <IN>;
+    last unless ($identifier and $sequence and $identifier_2 and $quality_value);
+
+    $identifier = fix_IDs($identifier); # this is to avoid problems with truncated read ID when they contain white spaces
+
+    ++$count;
+
+    if ($skip){
+      next unless ($count > $skip);
+    }
+    if ($upto){
+      last if ($count > $upto);
+    }
+
+    $counting{sequences_count}++;
+
+    if ($counting{sequences_count}%1000000==0) {
+      warn "Processed $counting{sequences_count} sequences so far\n";
+    }
+    chomp $sequence;
+    chomp $identifier;
+    chomp $quality_value;
+
+    $identifier =~ s/^\@//;  # deletes the @ at the beginning of Illumin FastQ headers
+
+    my $return;
+    if ($bowtie2){
+      $return = check_bowtie_results_single_end_bowtie2 (uc$sequence,$identifier,$quality_value);
+    }
+    else{
+      $return = check_bowtie_results_single_end(uc$sequence,$identifier,$quality_value); # default Bowtie 1
+    }
+
+    unless ($return){
+      $return = 0;
+    }
+
+    # print the sequence to ambiguous.out if --ambiguous was specified
+    if ($ambiguous and $return == 2){
+      print AMBIG "\@$identifier\n";	
+      print AMBIG "$sequence\n";
+      print AMBIG $identifier_2;	
+      print AMBIG "$quality_value\n";
+    }
+
+    # print the sequence to <unmapped.out> file if --un was specified
+    elsif ($unmapped and $return == 1){
+      print UNMAPPED "\@$identifier\n";	
+      print UNMAPPED "$sequence\n";
+      print UNMAPPED $identifier_2;	
+      print UNMAPPED "$quality_value\n";
+    }
+  }
+  print "Processed $counting{sequences_count} sequences in total\n\n";
+
+  print_final_analysis_report_single_end($C_to_T_infile,$G_to_A_infile);
+
+}
+
+sub process_fastA_files_for_paired_end_methylation_calls{
+  my ($sequence_file_1,$sequence_file_2,$C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;
+  ### Processing the two FastA sequence files; we need the actual sequences of both reads to compare them against the genomic sequence in order to
+  ### make a methylation call. The sequence idetifier per definition needs to be the same for a sequence pair used for paired-end mapping.
+  ### Now reading in the sequence files sequence by sequence and see if the current sequences produced an alignment to one (or both) of the
+  ### converted genomes (either the C->T or G->A version)
+
+  ### gzipped version of the infiles
+  if ($sequence_file_1 =~ /\.gz$/ and $sequence_file_2 =~ /\.gz$/){
+    open (IN1,"zcat $sequence_file_1 |") or die "Failed to open zcat pipe to $sequence_file_1 $!\n";
+    open (IN2,"zcat $sequence_file_2 |") or die "Failed to open zcat pipe to $sequence_file_2 $!\n";
+  }
+  else{
+    open (IN1,$sequence_file_1) or die $!;
+    open (IN2,$sequence_file_2) or die $!;
+  }
+
+  warn "\nReading in the sequence files $sequence_file_1 and $sequence_file_2\n";
+  ### Both files are required to have the exact same number of sequences, therefore we can process the sequences jointly one by one
+
+  my $count = 0;
+
+  while (1) {
+    # reading from the first input file
+    my $identifier_1 = <IN1>;
+    my $sequence_1 = <IN1>;
+    # reading from the second input file
+    my $identifier_2 = <IN2>;
+    my $sequence_2 = <IN2>;
+    last unless ($identifier_1 and $sequence_1 and $identifier_2 and $sequence_2);
+
+    $identifier_1 = fix_IDs($identifier_1); # this is to avoid problems with truncated read ID when they contain white spaces
+    $identifier_2 = fix_IDs($identifier_2);
+
+    ++$count;
+
+    if ($skip){
+      next unless ($count > $skip);
+    }
+    if ($upto){
+      last if ($count > $upto);
+    }
+
+    $counting{sequences_count}++;
+    if ($counting{sequences_count}%100000==0) {
+      warn "Processed $counting{sequences_count} sequences so far\n";
+    }
+    my $orig_identifier_1 = $identifier_1;
+    my $orig_identifier_2 = $identifier_2;
+
+    chomp $sequence_1;
+    chomp $identifier_1;
+    chomp $sequence_2;
+    chomp $identifier_2;
+
+    $identifier_1 =~ s/^>//; # deletes the > at the beginning of FastA headers
+
+    my $return;
+    if ($bowtie2){
+      $return = check_bowtie_results_paired_ends_bowtie2 (uc$sequence_1,uc$sequence_2,$identifier_1);
+    }
+    else{
+      $return = check_bowtie_results_paired_ends (uc$sequence_1,uc$sequence_2,$identifier_1);
+    }
+
+    unless ($return){
+      $return = 0;
+    }
+
+    # print the sequences to ambiguous_1 and _2 if --ambiguous was specified
+    if ($ambiguous and $return == 2){
+      print AMBIG_1 $orig_identifier_1;	
+      print AMBIG_1 "$sequence_1\n";
+      print AMBIG_2 $orig_identifier_2;	
+      print AMBIG_2 "$sequence_2\n";
+    }
+
+    # print the sequences to unmapped_1.out and unmapped_2.out if --un was specified
+    elsif ($unmapped and $return == 1){
+      print UNMAPPED_1 $orig_identifier_1;	
+      print UNMAPPED_1 "$sequence_1\n";
+      print UNMAPPED_2 $orig_identifier_2;	
+      print UNMAPPED_2 "$sequence_2\n";
+    }
+  }
+
+  print "Processed $counting{sequences_count} sequences in total\n\n";
+
+  print_final_analysis_report_paired_ends($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
+
+}
+
+sub process_fastQ_files_for_paired_end_methylation_calls{
+  my ($sequence_file_1,$sequence_file_2,$C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;
+  ### Processing the two Illumina sequence files; we need the actual sequence of both reads to compare them against the genomic sequence in order to
+  ### make a methylation call. The sequence identifier per definition needs to be same for a sequence pair used for paired-end alignments.
+  ### Now reading in the sequence files sequence by sequence and see if the current sequences produced a paired-end alignment to one (or both)
+  ### of the converted genomes (either C->T or G->A version)
+
+  ### gzipped version of the infiles
+  if ($sequence_file_1 =~ /\.gz$/ and $sequence_file_2 =~ /\.gz$/){
+    open (IN1,"zcat $sequence_file_1 |") or die "Failed to open zcat pipe to $sequence_file_1 $!\n";
+    open (IN2,"zcat $sequence_file_2 |") or die "Failed to open zcat pipe to $sequence_file_2 $!\n";
+  }
+  else{
+    open (IN1,$sequence_file_1) or die $!;
+    open (IN2,$sequence_file_2) or die $!;
+  }
+
+  my $count = 0;
+
+  warn "\nReading in the sequence files $sequence_file_1 and $sequence_file_2\n";
+  ### Both files are required to have the exact same number of sequences, therefore we can process the sequences jointly one by one
+  while (1) {
+    # reading from the first input file
+    my $identifier_1 = <IN1>;
+    my $sequence_1 = <IN1>;
+    my $ident_1 = <IN1>;         # not needed
+    my $quality_value_1 = <IN1>; # not needed
+    # reading from the second input file
+    my $identifier_2 = <IN2>;
+    my $sequence_2 = <IN2>;
+    my $ident_2 = <IN2>;         # not needed
+    my $quality_value_2 = <IN2>; # not needed
+    last unless ($identifier_1 and $sequence_1 and $quality_value_1 and $identifier_2 and $sequence_2 and $quality_value_2);
+
+    $identifier_1 = fix_IDs($identifier_1); # this is to avoid problems with truncated read ID when they contain white spaces
+    $identifier_2 = fix_IDs($identifier_2);
+
+    ++$count;
+
+    if ($skip){
+      next unless ($count > $skip);
+    }
+    if ($upto){
+      last if ($count > $upto);
+    }
+
+    $counting{sequences_count}++;
+    if ($counting{sequences_count}%100000==0) {
+      warn "Processed $counting{sequences_count} sequences so far\n";
+    }
+
+    my $orig_identifier_1 = $identifier_1;
+    my $orig_identifier_2 = $identifier_2;
+
+    chomp $sequence_1;
+    chomp $identifier_1;
+    chomp $sequence_2;
+    chomp $identifier_2;
+    chomp $quality_value_1;
+    chomp $quality_value_2;
+
+    $identifier_1 =~ s/^\@//;  # deletes the @ at the beginning of the FastQ ID
+
+    my $return;
+    if ($bowtie2){
+      $return = check_bowtie_results_paired_ends_bowtie2 (uc$sequence_1,uc$sequence_2,$identifier_1,$quality_value_1,$quality_value_2);
+    }
+    else{
+      $return = check_bowtie_results_paired_ends (uc$sequence_1,uc$sequence_2,$identifier_1,$quality_value_1,$quality_value_2);
+    }
+
+    unless ($return){
+      $return = 0;
+    }
+
+    # print the sequences to ambiguous_1 and _2 if --ambiguous was specified
+    if ($ambiguous and $return == 2){
+      # seq_1
+      print AMBIG_1 $orig_identifier_1;	
+      print AMBIG_1 "$sequence_1\n";
+      print AMBIG_1 $ident_1;	
+      print AMBIG_1 "$quality_value_1\n";
+	# seq_2
+      print AMBIG_2 $orig_identifier_2;	
+      print AMBIG_2 "$sequence_2\n";
+      print AMBIG_2 $ident_2;	
+      print AMBIG_2 "$quality_value_2\n";
+    }
+
+    # print the sequences to unmapped_1.out and unmapped_2.out if --un was specified
+    elsif ($unmapped and $return == 1){
+      # seq_1
+      print UNMAPPED_1 $orig_identifier_1;	
+      print UNMAPPED_1 "$sequence_1\n";
+      print UNMAPPED_1 $ident_1;	
+      print UNMAPPED_1 "$quality_value_1\n";
+      # seq_2
+      print UNMAPPED_2 $orig_identifier_2;	
+      print UNMAPPED_2 "$sequence_2\n";
+      print UNMAPPED_2 $ident_2;	
+      print UNMAPPED_2 "$quality_value_2\n";
+    }
+  }
+
+  print "Processed $counting{sequences_count} sequences in total\n\n";
+
+  print_final_analysis_report_paired_ends($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
+
+}
+
+sub check_bowtie_results_single_end{
+  my ($sequence,$identifier,$quality_value) = @_;
+
+  unless ($quality_value){ # FastA sequences get assigned a quality value of Phred 40 throughout
+    $quality_value = 'I'x(length$sequence);
+  }
+
+  my %mismatches = ();
+  ### reading from the bowtie output files to see if this sequence aligned to a bisulfite converted genome
+  foreach my $index (0..$#fhs){
+
+    ### skipping this index if the last alignment has been set to undefined already (i.e. end of bowtie output)
+    next unless ($fhs[$index]->{last_line} and defined $fhs[$index]->{last_seq_id});
+    ### if the sequence we are currently looking at produced an alignment we are doing various things with it
+    if ($fhs[$index]->{last_seq_id} eq $identifier) {
+      ###############################################################
+      ### STEP I Now processing the alignment stored in last_line ###
+      ###############################################################
+      my $valid_alignment_found_1 = decide_whether_single_end_alignment_is_valid($index,$identifier);
+      ### sequences can fail at this point if there was only 1 seq in the wrong orientation, or if there were 2 seqs, both in the wrong orientation
+      ### we only continue to extract useful information about this alignment if 1 was returned
+      if ($valid_alignment_found_1 == 1){
+	### Bowtie outputs which made it this far are in the correct orientation, so we can continue to analyse the alignment itself
+	### need to extract the chromosome number from the bowtie output (which is either XY_cf (complete forward) or XY_cr (complete reverse)
+	my ($id,$strand,$mapped_chromosome,$position,$bowtie_sequence,$mismatch_info) = (split (/\t/,$fhs[$index]->{last_line},-1))[0,1,2,3,4,7];
+
+	unless($mismatch_info){
+	  $mismatch_info = '';
+	}
+
+	chomp $mismatch_info;
+	my $chromosome;
+	if ($mapped_chromosome =~ s/_(CT|GA)_converted$//){
+	  $chromosome = $mapped_chromosome;
+	}
+	else{
+	  die "Chromosome number extraction failed for $mapped_chromosome\n";
+	}
+	### Now extracting the number of mismatches to the converted genome
+	my $number_of_mismatches;
+	if ($mismatch_info eq ''){
+	  $number_of_mismatches = 0;
+	}
+	elsif ($mismatch_info =~ /^\d/){
+	  my @mismatches = split (/,/,$mismatch_info);
+	  $number_of_mismatches = scalar @mismatches;
+	}
+	else{
+	  die "Something weird is going on with the mismatch field:\t>>> $mismatch_info <<<\n";
+	}
+	### creating a composite location variable from $chromosome and $position and storing the alignment information in a temporary hash table
+	my $alignment_location = join (":",$chromosome,$position);
+	### If a sequence aligns to exactly the same location twice the sequence does either not contain any C or G, or all the Cs (or Gs on the reverse
+	### strand) were methylated and therefore protected. It is not needed to overwrite the same positional entry with a second entry for the same
+	### location (the genomic sequence extraction and methylation would not be affected by this, only the thing which would change is the index
+	### number for the found alignment)
+	unless (exists $mismatches{$number_of_mismatches}->{$alignment_location}){
+	  $mismatches{$number_of_mismatches}->{$alignment_location}->{seq_id}=$id;
+	  $mismatches{$number_of_mismatches}->{$alignment_location}->{bowtie_sequence}=$bowtie_sequence;
+	  $mismatches{$number_of_mismatches}->{$alignment_location}->{index}=$index;
+	  $mismatches{$number_of_mismatches}->{$alignment_location}->{chromosome}=$chromosome;
+	  $mismatches{$number_of_mismatches}->{$alignment_location}->{position}=$position;
+	}
+	$number_of_mismatches = undef;
+	##################################################################################################################################################
+	### STEP II Now reading in the next line from the bowtie filehandle. The next alignment can either be a second alignment of the same sequence or a
+	### a new sequence. In either case we will store the next line in @fhs ->{last_line}. In case the alignment is already the next entry, a 0 will
+	### be returned as $valid_alignment_found and it will then be processed in the next round only.
+	##################################################################################################################################################
+	my $newline = $fhs[$index]->{fh}-> getline();
+	if ($newline){
+	  my ($seq_id) = split (/\t/,$newline);
+	  $fhs[$index]->{last_seq_id} = $seq_id;
+	  $fhs[$index]->{last_line} = $newline;
+	}
+	else {
+	  # assigning undef to last_seq_id and last_line and jumping to the next index (end of bowtie output)
+	  $fhs[$index]->{last_seq_id} = undef;
+	  $fhs[$index]->{last_line} = undef;
+	  next;
+	}	
+	my $valid_alignment_found_2 = decide_whether_single_end_alignment_is_valid($index,$identifier);
+	### we only continue to extract useful information about this second alignment if 1 was returned
+	if ($valid_alignment_found_2 == 1){
+	  ### If the second Bowtie output made it this far it is in the correct orientation, so we can continue to analyse the alignment itself
+	  ### need to extract the chromosome number from the bowtie output (which is either XY_cf (complete forward) or XY_cr (complete reverse)
+	  my ($id,$strand,$mapped_chromosome,$position,$bowtie_sequence,$mismatch_info) = (split (/\t/,$fhs[$index]->{last_line},-1))[0,1,2,3,4,7];
+	  unless($mismatch_info){
+	    $mismatch_info = '';
+	  }	
+	  chomp $mismatch_info;
+
+	  my $chromosome;	
+	  if ($mapped_chromosome =~ s/_(CT|GA)_converted$//){
+	    $chromosome = $mapped_chromosome;
+	  }
+	  else{
+	    die "Chromosome number extraction failed for $mapped_chromosome\n";
+	  }
+
+	  ### Now extracting the number of mismatches to the converted genome
+	  my $number_of_mismatches;
+	  if ($mismatch_info eq ''){
+	    $number_of_mismatches = 0;
+	  }
+	  elsif ($mismatch_info =~ /^\d/){
+	    my @mismatches = split (/,/,$mismatch_info);
+	    $number_of_mismatches = scalar @mismatches;
+	  }
+	  else{
+	    die "Something weird is going on with the mismatch field\n";
+	  }
+	  ### creating a composite location variable from $chromosome and $position and storing the alignment information in a temporary hash table
+	  ### extracting the chromosome number from the bowtie output (see above)
+	  my $alignment_location = join (":",$chromosome,$position);
+	  ### In the special case that two differently converted sequences align against differently converted genomes, but to the same position
+	  ### with the same number of mismatches (or perfect matches), the chromosome, position and number of mismatches are the same. In this
+	  ### case we are not writing the same entry out a second time.
+	  unless (exists $mismatches{$number_of_mismatches}->{$alignment_location}){
+	    $mismatches{$number_of_mismatches}->{$alignment_location}->{seq_id}=$id;
+	    $mismatches{$number_of_mismatches}->{$alignment_location}->{bowtie_sequence}=$bowtie_sequence;
+	    $mismatches{$number_of_mismatches}->{$alignment_location}->{index}=$index;
+	    $mismatches{$number_of_mismatches}->{$alignment_location}->{chromosome}=$chromosome;
+	    $mismatches{$number_of_mismatches}->{$alignment_location}->{position}=$position;
+	  }
+	  ####################################################################################################################################
+	  #### STEP III Now reading in one more line which has to be the next alignment to be analysed. Adding it to @fhs ->{last_line}    ###
+	  ####################################################################################################################################
+	  $newline = $fhs[$index]->{fh}-> getline();
+	  if ($newline){
+	    my ($seq_id) = split (/\t/,$newline);
+	    die "The same seq ID occurred more than twice in a row\n" if ($seq_id eq $identifier);
+	    $fhs[$index]->{last_seq_id} = $seq_id;
+	    $fhs[$index]->{last_line} = $newline;
+	    next;
+	  }	
+	  else {
+	    # assigning undef to last_seq_id and last_line and jumping to the next index (end of bowtie output)
+	    $fhs[$index]->{last_seq_id} = undef;
+	    $fhs[$index]->{last_line} = undef;
+	    next;
+	  }
+	  ### still within the 2nd sequence in correct orientation found	
+	}
+	### still withing the 1st sequence in correct orientation found
+      }
+      ### still within the if (last_seq_id eq identifier) condition
+    }
+    ### still within foreach index loop
+  }
+  ### if there was not a single alignment found for a certain sequence we will continue with the next sequence in the sequence file
+  unless(%mismatches){
+    $counting{no_single_alignment_found}++;
+    if ($unmapped){
+      return 1; ### We will print this sequence out as unmapped sequence if --un unmapped.out has been specified
+    }
+    else{
+      return;
+    }
+  }
+  #######################################################################################################################################################
+  #######################################################################################################################################################
+  ### We are now looking if there is a unique best alignment for a certain sequence. This means we are sorting in ascending order and look at the     ###
+  ### sequence with the lowest amount of mismatches. If there is only one single best position we are going to store the alignment information in the ###
+  ### meth_call variables, if there are multiple hits with the same amount of (lowest) mismatches we are discarding the sequence altogether           ###
+  #######################################################################################################################################################
+  #######################################################################################################################################################
+  ### Going to use the variable $sequence_fails as a 'memory' if a sequence could not be aligned uniquely (set to 1 then)
+  my $sequence_fails = 0;
+  ### Declaring an empty hash reference which will store all information we need for the methylation call
+  my $methylation_call_params; # hash reference!
+  ### sorting in ascending order
+  foreach my $mismatch_number (sort {$a<=>$b} keys %mismatches){
+
+    ### if there is only 1 entry in the hash with the lowest number of mismatches we accept it as the best alignment
+    if (scalar keys %{$mismatches{$mismatch_number}} == 1){
+      for my $unique_best_alignment (keys %{$mismatches{$mismatch_number}}){
+	$methylation_call_params->{$identifier}->{bowtie_sequence} = $mismatches{$mismatch_number}->{$unique_best_alignment}->{bowtie_sequence};
+	$methylation_call_params->{$identifier}->{chromosome} = $mismatches{$mismatch_number}->{$unique_best_alignment}->{chromosome};
+	$methylation_call_params->{$identifier}->{position} = $mismatches{$mismatch_number}->{$unique_best_alignment}->{position};
+	$methylation_call_params->{$identifier}->{index} = $mismatches{$mismatch_number}->{$unique_best_alignment}->{index};
+    	$methylation_call_params->{$identifier}->{number_of_mismatches} = $mismatch_number;
+      }
+    }
+    elsif (scalar keys %{$mismatches{$mismatch_number}} == 3){
+      ### If there are 3 sequences with the same number of lowest mismatches we can discriminate 2 cases: (i) all 3 alignments are unique best hits and
+      ### come from different alignments processes (== indices) or (ii) one sequence alignment (== index) will give a unique best alignment, whereas a
+      ### second one will produce 2 (or potentially many) alignments for the same sequence but in a different conversion state or against a different genome
+      ### version (or both). This becomes especially relevant for highly converted sequences in which all Cs have been converted to Ts in the bisulfite
+      ### reaction. E.g.
+      ### CAGTCACGCGCGCGCG will become
+      ### TAGTTATGTGTGTGTG in the CT transformed version, which will ideally still give the correct alignment in the CT->CT alignment condition.
+      ### If the same read will then become G->A transformed as well however, the resulting sequence will look differently and potentially behave
+      ### differently in a GA->GA alignment and this depends on the methylation state of the original sequence!:
+      ### G->A conversion:
+      ### highly methylated: CAATCACACACACACA
+      ### highly converted : TAATTATATATATATA <== this sequence has a reduced complexity (only 2 bases left and not 3), and it is more likely to produce
+      ### an alignment with a low complexity genomic region than the one above. This would normally lead to the entire sequence being kicked out as the
+      ### there will be 3 alignments with the same number of lowest mismatches!! This in turn means that highly methylated and thereby not converted
+      ### sequences are more likely to pass the alignment step, thereby creating a bias for methylated reads compared to their non-methylated counterparts.
+      ### We do not want any bias, whatsover. Therefore if we have 1 sequence producing a unique best alignment and the second and third conditions
+      ### producing alignments only after performing an additional (theoretical) conversion we want to keep the best alignment with the lowest number of
+      ### additional transliterations performed. Thus we want to have a look at the level of complexity of the sequences producing the alignment.
+      ### In the above example the number of transliterations required to transform the actual sequence
+      ### to the C->T version would be TAGTTATGTGTGTGTG -> TAGTTATGTGTGTGTG = 0; (assuming this gives the correct alignment)
+      ### in the G->A case it would be TAGTTATGTGTGTGTG -> TAATTATATATATATA = 6; (assuming this gives multiple wrong alignments)
+      ### if the sequence giving a unique best alignment required a lower number of transliterations than the second best sequence yielding alignments
+      ### while requiring a much higher number of transliterations, we are going to accept the unique best alignment with the lowest number of performed
+      ### transliterations. As a threshold which does scale we will start with the number of tranliterations of the lowest best match x 2 must still be
+      ### smaller than the number of tranliterations of the second best sequence. Everything will be flagged with $sequence_fails = 1 and discarded.
+      my @three_candidate_seqs;
+      foreach my $composite_location (keys (%{$mismatches{$mismatch_number}}) ){
+	my $transliterations_performed;
+	if ($mismatches{$mismatch_number}->{$composite_location}->{index} == 0 or $mismatches{$mismatch_number}->{$composite_location}->{index} == 1){
+	  $transliterations_performed = determine_number_of_transliterations_performed($sequence,'CT');
+	}
+	elsif ($mismatches{$mismatch_number}->{$composite_location}->{index} == 2 or $mismatches{$mismatch_number}->{$composite_location}->{index} == 3){
+	  $transliterations_performed = determine_number_of_transliterations_performed($sequence,'GA');
+	}
+	else{
+	  die "unexpected index number range $!\n";
+	}
+	push @three_candidate_seqs,{
+				    index =>$mismatches{$mismatch_number}->{$composite_location}->{index},
+				    bowtie_sequence => $mismatches{$mismatch_number}->{$composite_location}->{bowtie_sequence},
+				    mismatch_number => $mismatch_number,
+				    chromosome => $mismatches{$mismatch_number}->{$composite_location}->{chromosome},
+				    position => $mismatches{$mismatch_number}->{$composite_location}->{position},
+				    seq_id => $mismatches{$mismatch_number}->{$composite_location}->{seq_id},
+				    transliterations_performed => $transliterations_performed,
+				   };
+      }
+      ### sorting in ascending order for the lowest number of transliterations performed
+      @three_candidate_seqs = sort {$a->{transliterations_performed} <=> $b->{transliterations_performed}} @three_candidate_seqs;
+      my $first_array_element = $three_candidate_seqs[0]->{transliterations_performed};
+      my $second_array_element = $three_candidate_seqs[1]->{transliterations_performed};
+      my $third_array_element = $three_candidate_seqs[2]->{transliterations_performed};
+      # print "$first_array_element\t$second_array_element\t$third_array_element\n";
+      if (($first_array_element*2) < $second_array_element){
+	$counting{low_complexity_alignments_overruled_count}++;
+	### taking the index with the unique best hit and over ruling low complexity alignments with 2 hits
+	$methylation_call_params->{$identifier}->{bowtie_sequence} = $three_candidate_seqs[0]->{bowtie_sequence};
+	$methylation_call_params->{$identifier}->{chromosome} = $three_candidate_seqs[0]->{chromosome};
+	$methylation_call_params->{$identifier}->{position} = $three_candidate_seqs[0]->{position};
+	$methylation_call_params->{$identifier}->{index} = $three_candidate_seqs[0]->{index};
+	$methylation_call_params->{$identifier}->{number_of_mismatches} = $mismatch_number;
+	# print "Overruled low complexity alignments! Using $first_array_element and disregarding $second_array_element and $third_array_element\n";
+      }
+      else{
+	$sequence_fails = 1;
+      }
+    }
+    else{
+      $sequence_fails = 1;
+    }
+    ### after processing the alignment with the lowest number of mismatches we exit
+    last;
+  }
+  ### skipping the sequence completely if there were multiple alignments with the same amount of lowest mismatches found at different positions
+  if ($sequence_fails == 1){
+    $counting{unsuitable_sequence_count}++;
+    if ($ambiguous){
+      return 2; # => exits to next sequence, and prints it out to multiple_alignments.out if --ambiguous has been specified
+    }
+    if ($unmapped){
+      return 1; # => exits to next sequence, and prints it out to unmapped.out if --un has been specified
+    }
+    else{
+      return 0; # => exits to next sequence (default)
+    }
+  }
+
+  ### --DIRECTIONAL
+  ### If the option --directional has been specified the user wants to consider only alignments to the original top strand or the original bottom strand. We will therefore
+  ### discard all alignments to strands complementary to the original strands, as they should not exist in reality due to the library preparation protocol
+  if ($directional){
+    if ( ($methylation_call_params->{$identifier}->{index} == 2) or ($methylation_call_params->{$identifier}->{index} == 3) ){
+      #    warn "Alignment rejected! (index was: $methylation_call_params->{$identifier}->{index})\n";
+      $counting{alignments_rejected_count}++;
+      return 0;
+    }
+  }
+
+  ### If the sequence has not been rejected so far it will have a unique best alignment
+  $counting{unique_best_alignment_count}++;
+  extract_corresponding_genomic_sequence_single_end($identifier,$methylation_call_params);
+  ### check test to see if the genomic sequence we extracted has the same length as the observed sequence+2, and only then we perform the methylation call
+  if (length($methylation_call_params->{$identifier}->{unmodified_genomic_sequence}) != length($sequence)+2){
+    warn "Chromosomal sequence could not be extracted for\t$identifier\t$methylation_call_params->{$identifier}->{chromosome}\t$methylation_call_params->{$identifier}->{position}\n";
+    $counting{genomic_sequence_could_not_be_extracted_count}++;
+    return 0;
+  }
+
+  ### otherwise we are set to perform the actual methylation call
+  $methylation_call_params->{$identifier}->{methylation_call} = methylation_call($identifier,$sequence,$methylation_call_params->{$identifier}->{unmodified_genomic_sequence},$methylation_call_params->{$identifier}->{read_conversion});
+
+  print_bisulfite_mapping_result_single_end($identifier,$sequence,$methylation_call_params,$quality_value);
+  return 0; ## otherwise 1 will be returned by default, which would print the sequence to unmapped.out
+}
+
+sub check_bowtie_results_single_end_bowtie2{
+  my ($sequence,$identifier,$quality_value) = @_;
+
+  unless ($quality_value){ # FastA sequences get assigned a quality value of Phred 40 throughout
+    $quality_value = 'I'x(length$sequence);
+  }
+
+  # as of version Bowtie 2 2.0.0 beta7, when input reads are unpaired, Bowtie 2 no longer removes the trailing /1 or /2 from the read name.
+  # $identifier =~ s/\/[1234567890]+$//; # some sequencers don't just have /1 or /2 at the end of read IDs
+
+  my $alignment_ambiguous = 0;
+
+  my %alignments = ();
+
+  ### reading from the Bowtie 2 output filehandles
+  foreach my $index (0..$#fhs){
+    # print "Index: $index\n";
+    # print "$fhs[$index]->{last_line}\n";
+    # print "$fhs[$index]->{last_seq_id}\n\n";
+
+    ### skipping this index if the last alignment has been set to undefined already (i.e. end of bowtie output)
+    next unless ($fhs[$index]->{last_line} and defined $fhs[$index]->{last_seq_id});
+
+    ### if the sequence we are currently looking at produced an alignment we are doing various things with it
+    # print "last seq id: $fhs[$index]->{last_seq_id} and identifier: $identifier\n";
+
+   if ($fhs[$index]->{last_seq_id} eq $identifier) {
+
+      #  SAM format specifications for Bowtie 2
+      #  (1) Name of read that aligned
+      #  (2) Sum of all applicable flags. Flags relevant to Bowtie are:
+      #        1 The read is one of a pair
+      #        2 The alignment is one end of a proper paired-end alignment
+      #        4 The read has no reported alignments
+      #        8 The read is one of a pair and has no reported alignments
+      #       16 The alignment is to the reverse reference strand
+      #       32 The other mate in the paired-end alignment is aligned to the reverse reference strand
+      #       64 The read is mate 1 in a pair
+      #      128 The read is mate 2 in a pair
+      #      256 The read has multiple mapping states
+      #  (3) Name of reference sequence where alignment occurs (unmapped reads have a *)
+      #  (4) 1-based offset into the forward reference strand where leftmost character of the alignment occurs (0 for unmapped reads)
+      #  (5) Mapping quality (255 means MAPQ is not available)
+      #  (6) CIGAR string representation of alignment (* if unavailable)
+      #  (7) Name of reference sequence where mate's alignment occurs. Set to = if the mate's reference sequence is the same as this alignment's, or * if there is no mate.
+      #  (8) 1-based offset into the forward reference strand where leftmost character of the mate's alignment occurs. Offset is 0 if there is no mate.
+      #  (9) Inferred fragment size. Size is negative if the mate's alignment occurs upstream of this alignment. Size is 0 if there is no mate.
+      # (10) Read sequence (reverse-complemented if aligned to the reverse strand)
+      # (11) ASCII-encoded read qualities (reverse-complemented if the read aligned to the reverse strand). The encoded quality values are on the Phred quality scale and the encoding is ASCII-offset by 33 (ASCII char !), similarly to a FASTQ file.
+      # (12) Optional fields. Fields are tab-separated. bowtie2 outputs zero or more of these optional fields for each alignment, depending on the type of the alignment:
+      # AS:i:<N> Alignment score. Can be negative. Can be greater than 0 in --local mode (but not in --end-to-end mode). Only present if SAM record is for an aligned read.
+      # XS:i:<N> Alignment score for second-best alignment. Can be negative. Can be greater than 0 in --local mode (but not in --end-to-end mode). Only present if the SAM record is for an aligned read and more than one alignment was found for the read.
+      # YS:i:<N> Alignment score for opposite mate in the paired-end alignment. Only present if the SAM record is for a read that aligned as part of a paired-end alignment.
+      # XN:i:<N> The number of ambiguous bases in the reference covering this alignment. Only present if SAM record is for an aligned read.
+      # XM:i:<N> The number of mismatches in the alignment. Only present if SAM record is for an aligned read.
+      # XO:i:<N> The number of gap opens, for both read and reference gaps, in the alignment. Only present if SAM record is for an aligned read.
+      # XG:i:<N> The number of gap extensions, for both read and reference gaps, in the alignment. Only present if SAM record is for an aligned read.
+      # NM:i:<N> The edit distance; that is, the minimal number of one-nucleotide edits (substitutions, insertions and deletions) needed to transform the read string into the reference string. Only present if SAM record is for an aligned read.
+      # YF:Z:<N> String indicating reason why the read was filtered out. See also: Filtering. Only appears for reads that were filtered out.
+      # MD:Z:<S> A string representation of the mismatched reference bases in the alignment. See SAM format specification for details. Only present if SAM record is for an aligned read.
+
+      my ($id,$flag,$mapped_chromosome,$position,$mapping_quality,$cigar,$bowtie_sequence,$qual) = (split (/\t/,$fhs[$index]->{last_line}))[0,1,2,3,4,5,9,10];
+
+      ### If a sequence has no reported alignments there will be a single output line with a bit-wise flag value of 4. We can store the next alignment and move on to the next Bowtie 2 instance
+      if ($flag == 4){
+	## reading in the next alignment, which must be the next sequence
+	my $newline = $fhs[$index]->{fh}-> getline();
+	if ($newline){
+	  chomp $newline;
+	  my ($seq_id) = split (/\t/,$newline);
+	  $fhs[$index]->{last_seq_id} = $seq_id;
+	  $fhs[$index]->{last_line} = $newline;
+	  if ($seq_id eq $identifier){
+	    die "Sequence with ID $identifier did not produce any alignment, but next seq-ID was also $fhs[$index]->{last_seq_id}!\n";
+	  }
+	  next; # next instance
+	}
+	else{
+	  # assigning undef to last_seq_id and last_line and jumping to the next index (end of Bowtie 2 output)
+	  $fhs[$index]->{last_seq_id} = undef;
+	  $fhs[$index]->{last_line} = undef;
+	  next;
+	}
+      }
+
+      # if there are one or more proper alignments we can extract the chromosome number
+      my $chromosome;
+      if ($mapped_chromosome =~ s/_(CT|GA)_converted$//){
+	$chromosome = $mapped_chromosome;
+      }
+      else{
+	die "Chromosome number extraction failed for $mapped_chromosome\n";
+      }
+	  
+      ### We will use the optional field to determine the best alignment. Later on we extract the number of mismatches and/or indels from the CIGAR string
+      my ($alignment_score,$second_best,$MD_tag);
+      my @fields = split (/\t/,$fhs[$index]->{last_line});
+
+      foreach (11..$#fields){
+	if ($fields[$_] =~ /AS:i:(.*)/){
+	  $alignment_score = $1;
+	}
+	elsif ($fields[$_] =~ /XS:i:(.*)/){
+	  $second_best = $1;
+	}
+	elsif ($fields[$_] =~ /MD:Z:(.*)/){
+	  $MD_tag = $1;
+	}
+      }
+
+      # warn "First  best alignment_score is: '$alignment_score'\n";
+      # warn "MD tag is: '$MD_tag'\n";
+      die "Failed to extract alignment score ($alignment_score) and MD tag ($MD_tag)!\n" unless (defined $alignment_score and defined $MD_tag);
+
+      if (defined $second_best){
+	# warn "second best alignment_score is: '$second_best'\n";
+
+	# If the first alignment score is the same as the alignment score of the second best hit we are going to boot this sequence altogether
+	if ($alignment_score == $second_best){
+	  $alignment_ambiguous = 1;
+	  ## need to read and discard all additional ambiguous reads until we reach the next sequence
+	  until ($fhs[$index]->{last_seq_id} ne $identifier){
+	    my $newline = $fhs[$index]->{fh}-> getline();
+	    if ($newline){
+	      chomp $newline;
+	      my ($seq_id) = split (/\t/,$newline);
+	      $fhs[$index]->{last_seq_id} = $seq_id;
+	      $fhs[$index]->{last_line} = $newline;
+	    }
+	    else{
+	      # assigning undef to last_seq_id and last_line and jumping to the next index (end of Bowtie 2 output)
+	      $fhs[$index]->{last_seq_id} = undef;
+	      $fhs[$index]->{last_line} = undef;
+	      last; # break free in case we have reached the end of the alignment output
+	    }
+	  }
+	  #  warn "Index: $index\tThe current Seq-ID is $identifier, skipped all ambiguous sequences until the next ID which is: $fhs[$index]->{last_seq_id}\n";
+	}
+	else{ # the next best alignment has a lower alignment score than the current read, so we can safely store the current alignment
+
+	  my $alignment_location = join (":",$chromosome,$position);
+	
+	  ### If a sequence aligns to exactly the same location with a perfect match twice the sequence does either not contain any C or G, or all the Cs (or Gs on the reverse
+	  ### strand) were methylated and therefore protected. Alternatively it will align better in one condition than in the other. In any case, it is not needed to overwrite
+	  ### the same positional entry with a second entry for the same location, as the genomic sequence extraction and methylation call would not be affected by this. The only
+	  ### thing which would change is the index number for the found alignment). We will continue to assign these alignments to the first indexes 0 and 1, i.e. OT and OB 
+	
+	  unless (exists $alignments{$alignment_location}){
+	    $alignments{$alignment_location}->{seq_id} = $id; 
+	    $alignments{$alignment_location}->{alignment_score} = $alignment_score;
+	    $alignments{$alignment_location}->{bowtie_sequence} = $bowtie_sequence;
+	    $alignments{$alignment_location}->{index} = $index;
+	    $alignments{$alignment_location}->{chromosome} = $chromosome;
+	    $alignments{$alignment_location}->{position} = $position;
+	    $alignments{$alignment_location}->{CIGAR} = $cigar;
+	    $alignments{$alignment_location}->{MD_tag} = $MD_tag;
+	  }
+	
+	  ### now reading and discarding all (inferior) alignments of this sequencing read until we hit the next sequence
+	  until ($fhs[$index]->{last_seq_id} ne $identifier){
+	    my $newline = $fhs[$index]->{fh}-> getline();
+	    if ($newline){
+	      chomp $newline;
+	      my ($seq_id) = split (/\t/,$newline);
+	      $fhs[$index]->{last_seq_id} = $seq_id;
+	      $fhs[$index]->{last_line} = $newline;
+	    }
+	    else{
+	      # assigning undef to last_seq_id and last_line and jumping to the next index (end of Bowtie 2 output)
+	      $fhs[$index]->{last_seq_id} = undef;
+	      $fhs[$index]->{last_line} = undef;
+	      last; # break free in case we have reached the end of the alignment output
+	    }
+	  }
+	  #  warn "Index: $index\tThe current Seq-ID is $identifier, skipped all ambiguous sequences until the next ID which is: $fhs[$index]->{last_seq_id}\n";
+	}
+      }
+      else{ # there is no second best hit, so we can just store this one and read in the next sequence
+	
+	my $alignment_location = join (":",$chromosome,$position);
+	
+	### If a sequence aligns to exactly the same location with a perfect match twice the sequence does either not contain any C or G, or all the Cs (or Gs on the reverse
+	### strand) were methylated and therefore protected. Alternatively it will align better in one condition than in the other. In any case, it is not needed to overwrite
+	### the same positional entry with a second entry for the same location, as the genomic sequence extraction and methylation call would not be affected by this. The only
+	### thing which would change is the index number for the found alignment). We will continue to assign these alignments to the first indexes 0 and 1, i.e. OT and OB 
+
+	unless (exists $alignments{$alignment_location}){
+	  $alignments{$alignment_location}->{seq_id} = $id; 
+	  $alignments{$alignment_location}->{alignment_score} = $alignment_score;
+	  $alignments{$alignment_location}->{bowtie_sequence} = $bowtie_sequence;
+	  $alignments{$alignment_location}->{index} = $index;
+	  $alignments{$alignment_location}->{chromosome} = $chromosome;
+	  $alignments{$alignment_location}->{position} = $position;
+	  $alignments{$alignment_location}->{MD_tag} = $MD_tag;
+	  $alignments{$alignment_location}->{CIGAR} = $cigar;
+	}
+	
+	my $newline = $fhs[$index]->{fh}-> getline();
+	if ($newline){
+	  chomp $newline;
+	  my ($seq_id) = split (/\t/,$newline);
+	  $fhs[$index]->{last_seq_id} = $seq_id;
+	  $fhs[$index]->{last_line} = $newline;
+	  if ($seq_id eq $identifier){
+	    die "Sequence with ID $identifier did not have a second best alignment, but next seq-ID was also $fhs[$index]->{last_seq_id}!\n";
+	  }
+	}
+	else{
+	  # assigning undef to last_seq_id and last_line and jumping to the next index (end of Bowtie 2 output)
+	  $fhs[$index]->{last_seq_id} = undef;
+	  $fhs[$index]->{last_line} = undef;
+	}
+      }
+    }
+  }
+
+  ### if the read produced several ambiguous alignments already now can returning already now. If --ambiguous or --unmapped was specified the read sequence will be printed out.
+  if ($alignment_ambiguous == 1){
+    $counting{unsuitable_sequence_count}++;
+    ### report that the sequence has multiple hits with bitwise flag 256. We can print the sequence to the result file straight away and skip everything else
+    # my $ambiguous_read_output = join("\t",$identifier,'256','*','0','0','*','*','0','0',$sequence,$quality_value);
+    # print "$ambiguous_read_output\n";
+
+    if ($ambiguous){
+      return 2; # => exits to next sequence, and prints it out to _ambiguous_reads.txt if '--ambiguous' was specified
+    }
+    elsif ($unmapped){
+      return 1; # => exits to next sequence, and prints it out to _unmapped_reads.txt if '--unmapped' but not '--ambiguous' was specified
+    }
+    else{
+      return 0;
+    }
+  }
+
+  ### if there was no alignment found for a certain sequence at all we continue with the next sequence in the sequence file
+  unless(%alignments){
+    $counting{no_single_alignment_found}++;
+    # my $unmapped_read_output = join("\t",$identifier,'4','*','0','0','*','*','0','0',$sequence,$quality_value);
+    # print  "$unmapped_read_output\n";
+    if ($unmapped){
+      return 1; # => exits to next sequence, and prints it out to _unmapped_reads.txt if '--unmapped' was specified
+    }
+    else{
+      return 0; # default
+    }
+  }
+
+  #######################################################################################################################################################
+
+  ### If the sequence was not rejected so far we are now looking if there is a unique best alignment among all alignment instances. If there is only one
+  ### single best position we are going to store the alignment information in the $meth_call variable. If there are multiple hits with the same (highest)
+  ### alignment score we are discarding the sequence altogether.
+  ### For end-to-end alignments the maximum alignment score can be 0, each mismatch can receive penalties up to 6, and each gap receives penalties for
+  ### opening (5) and extending (3 per bp) the gap.
+
+  #######################################################################################################################################################
+
+  my $methylation_call_params; # hash reference which will store all information we need for the methylation call
+  my $sequence_fails = 0; # Going to use $sequence_fails as a 'memory' if a sequence could not be aligned uniquely (set to 1 then)
+
+  ### print contents of %alignments for debugging
+  #   if (scalar keys %alignments > 1){
+  #     print "\n******\n";
+  #     foreach my $alignment_location (sort {$a cmp $b} keys %alignments){
+  #       print "Loc:  $alignment_location\n";
+  #       print "ID:   $alignments{$alignment_location}->{seq_id}\n";
+  #       print "AS:   $alignments{$alignment_location}->{alignment_score}\n";
+  #       print "Seq:  $alignments{$alignment_location}->{bowtie_sequence}\n";
+  #       print "Index $alignments{$alignment_location}->{index}\n";
+  #       print "Chr:  $alignments{$alignment_location}->{chromosome}\n";
+  #       print "pos:  $alignments{$alignment_location}->{position}\n";
+  #       print "MD:   $alignments{$alignment_location}->{MD_tag}\n\n";
+  #     }
+  #     print "\n******\n";
+  #   }
+
+  ### if there is only 1 entry in the hash with we accept it as the best alignment
+  if (scalar keys %alignments == 1){
+    for my $unique_best_alignment (keys %alignments){
+      $methylation_call_params->{$identifier}->{bowtie_sequence} = $alignments{$unique_best_alignment}->{bowtie_sequence};
+      $methylation_call_params->{$identifier}->{chromosome}      = $alignments{$unique_best_alignment}->{chromosome};
+      $methylation_call_params->{$identifier}->{position}        = $alignments{$unique_best_alignment}->{position};
+      $methylation_call_params->{$identifier}->{index}           = $alignments{$unique_best_alignment}->{index};
+      $methylation_call_params->{$identifier}->{alignment_score} = $alignments{$unique_best_alignment}->{alignment_score};
+      $methylation_call_params->{$identifier}->{MD_tag}          = $alignments{$unique_best_alignment}->{MD_tag};
+      $methylation_call_params->{$identifier}->{CIGAR}           = $alignments{$unique_best_alignment}->{CIGAR};
+    }
+  }
+
+  ### otherwise we are going to find out if there is a best match among the multiple alignments, or whether there are 2 or more equally good alignments (in which case
+  ### we boot the sequence altogether
+  elsif (scalar keys %alignments >= 2  and scalar keys %alignments <= 4){
+    my $best_alignment_score;
+    my $best_alignment_location;
+    foreach my $alignment_location (sort {$alignments{$b}->{alignment_score} <=> $alignments{$a}->{alignment_score}} keys %alignments){
+      # print "$alignments{$alignment_location}->{alignment_score}\n";
+      unless (defined $best_alignment_score){
+	$best_alignment_score = $alignments{$alignment_location}->{alignment_score};
+	$best_alignment_location = $alignment_location;
+	# print "setting best alignment score: $best_alignment_score\n";
+      }
+      else{
+	### if the second best alignment has the same alignment score as the first one, the sequence will get booted
+	if ($alignments{$alignment_location}->{alignment_score} == $best_alignment_score){
+	  # warn "Same alignment score, the sequence will get booted!\n";
+	  $sequence_fails = 1;
+	  last; # exiting after the second alignment since we know that the sequence has ambiguous alignments
+	}
+	### else we are going to store the best alignment for further processing
+	else{
+	  $methylation_call_params->{$identifier}->{bowtie_sequence} = $alignments{$best_alignment_location}->{bowtie_sequence};
+	  $methylation_call_params->{$identifier}->{chromosome}      = $alignments{$best_alignment_location}->{chromosome};
+	  $methylation_call_params->{$identifier}->{position}        = $alignments{$best_alignment_location}->{position};
+	  $methylation_call_params->{$identifier}->{index}           = $alignments{$best_alignment_location}->{index};
+	  $methylation_call_params->{$identifier}->{alignment_score} = $alignments{$best_alignment_location}->{alignment_score};
+	  $methylation_call_params->{$identifier}->{MD_tag}          = $alignments{$best_alignment_location}->{MD_tag};
+	  $methylation_call_params->{$identifier}->{CIGAR}           = $alignments{$best_alignment_location}->{CIGAR};
+	  last; # exiting after processing the second alignment since the sequence produced a unique best alignment
+	}
+      }
+    }
+  }
+  else{
+    die "There are too many potential hits for this sequence (1-4 expected, but found: ",scalar keys %alignments,")\n";;
+  }
+
+  ### skipping the sequence completely if there were multiple alignments with the same best alignment score at different positions
+  if ($sequence_fails == 1){
+    $counting{unsuitable_sequence_count}++;
+
+    ### report that the sequence has multiple hits with bitwise flag 256. We can print the sequence to the result file straight away and skip everything else
+    # my $ambiguous_read_output = join("\t",$identifier,'256','*','0','0','*','*','0','0',$sequence,$quality_value);
+    # print OUT "$ambiguous_read_output\n";
+
+    if ($ambiguous){
+      return 2; # => exits to next sequence, and prints it out (in FastQ format) to _ambiguous_reads.txt if '--ambiguous' was specified
+    }
+    elsif ($unmapped){
+      return 1; # => exits to next sequence, and prints it out (in FastQ format) to _unmapped_reads.txt if '--unmapped' but not '--ambiguous' was specified
+    }
+    else{
+      return 0; # => exits to next sequence (default)
+    }
+  }
+
+  ### --DIRECTIONAL
+  ### If the option --directional has been specified the user wants to consider only alignments to the original top strand or the original bottom strand. We will therefore
+  ### discard all alignments to strands complementary to the original strands, as they should not exist in reality due to the library preparation protocol
+  if ($directional){
+    if ( ($methylation_call_params->{$identifier}->{index} == 2) or ($methylation_call_params->{$identifier}->{index} == 3) ){
+      # warn "Alignment rejected! (index was: $methylation_call_params->{$identifier}->{index})\n";
+      $counting{alignments_rejected_count}++;
+      return 0;
+    }
+  }
+
+  ### If the sequence has not been rejected so far it has a unique best alignment
+  $counting{unique_best_alignment_count}++;
+
+  ### Now we need to extract a genomic sequence that exactly corresponds to the reported alignment. This potentially means that we need to deal with insertions or deletions as well
+  extract_corresponding_genomic_sequence_single_end_bowtie2 ($identifier,$methylation_call_params);
+
+  ### check test to see if the genomic sequence we extracted has the same length as the observed sequence+2, and only then we perform the methylation call
+  if (length($methylation_call_params->{$identifier}->{unmodified_genomic_sequence}) != length($sequence)+2){
+    warn "Chromosomal sequence could not be extracted for\t$identifier\t$methylation_call_params->{$identifier}->{chromosome}\t$methylation_call_params->{$identifier}->{position}\n";
+    $counting{genomic_sequence_could_not_be_extracted_count}++;
+    return 0;
+  }
+
+
+  ### otherwise we are set to perform the actual methylation call
+  $methylation_call_params->{$identifier}->{methylation_call} = methylation_call($identifier,$sequence,$methylation_call_params->{$identifier}->{unmodified_genomic_sequence},$methylation_call_params->{$identifier}->{read_conversion});
+  print_bisulfite_mapping_result_single_end_bowtie2 ($identifier,$sequence,$methylation_call_params,$quality_value);
+  return 0; ## if a sequence got this far we do not want to print it to unmapped or ambiguous.out
+}
+
+
+sub determine_number_of_transliterations_performed{
+  my ($sequence,$read_conversion) = @_;
+  my $number_of_transliterations;
+  if ($read_conversion eq 'CT'){
+    $number_of_transliterations = $sequence =~ tr/C/T/;
+  }
+  elsif ($read_conversion eq 'GA'){
+    $number_of_transliterations = $sequence =~ tr/G/A/;
+  }
+  else{
+    die "Read conversion mode of the read was not specified $!\n";
+  }
+  return $number_of_transliterations;
+}
+
+sub decide_whether_single_end_alignment_is_valid{
+  my ($index,$identifier) = @_;
+
+  # extracting from Bowtie 1 format
+  my ($id,$strand) = (split (/\t/,$fhs[$index]->{last_line}))[0,1];
+
+  ### ensuring that the entry is the correct sequence
+  if (($id eq $fhs[$index]->{last_seq_id}) and ($id eq $identifier)){
+    ### checking the orientation of the alignment. We need to discriminate between 8 different conditions, however only 4 of them are theoretically
+    ### sensible alignments
+    my $orientation = ensure_sensical_alignment_orientation_single_end ($index,$strand);
+    ### If the orientation was correct can we move on
+    if ($orientation == 1){
+      return 1; ### 1st possibility for a sequence to pass
+    }
+    ### If the alignment was in the wrong orientation we need to read in a new line
+    elsif($orientation == 0){
+      my $newline = $fhs[$index]->{fh}->getline();
+      if ($newline){
+		($id,$strand) = (split (/\t/,$newline))[0,1];
+		
+	### ensuring that the next entry is still the correct sequence
+	if ($id eq $identifier){
+	  ### checking orientation again
+	  $orientation = ensure_sensical_alignment_orientation_single_end ($index,$strand);
+	  ### If the orientation was correct can we move on
+	  if ($orientation == 1){
+	    $fhs[$index]->{last_seq_id} = $id;
+	    $fhs[$index]->{last_line} = $newline;
+	    return 1; ### 2nd possibility for a sequence to pass
+	  }
+	  ### If the alignment was in the wrong orientation again we need to read in yet another new line and store it in @fhs
+	  elsif ($orientation == 0){
+	    $newline = $fhs[$index]->{fh}->getline();
+	    if ($newline){
+	      my ($seq_id) = split (/\t/,$newline);
+	      ### check if the next line still has the same seq ID (must not happen), and if not overwrite the current seq-ID and bowtie output with
+	      ### the same fields of the just read next entry
+	      die "Same seq ID 3 or more times in a row!(should be 2 max) $!" if ($seq_id eq $identifier);
+	      $fhs[$index]->{last_seq_id} = $seq_id;
+	      $fhs[$index]->{last_line} = $newline;
+	      return 0; # not processing anything this round as the alignment currently stored in last_line was in the wrong orientation
+	    }
+	    else{
+	      # assigning undef to last_seq_id and last_line (end of bowtie output)
+	      $fhs[$index]->{last_seq_id} = undef;
+	      $fhs[$index]->{last_line} = undef;
+	      return 0; # not processing anything as the alignment currently stored in last_line was in the wrong orientation
+	    }
+	  }
+	  else{
+	    die "The orientation of the alignment must be either correct or incorrect\n";
+	  }
+	}
+	### the sequence we just read in is already the next sequence to be analysed -> store it in @fhs
+	else{
+	  $fhs[$index]->{last_seq_id} = $id;
+	  $fhs[$index]->{last_line} = $newline;
+	  return 0; # processing the new alignment result only in the next round
+	}
+      }
+      else {
+	# assigning undef to last_seq_id and last_line (end of bowtie output)
+	$fhs[$index]->{last_seq_id} = undef;
+	$fhs[$index]->{last_line} = undef;
+	return 0; # not processing anything as the alignment currently stored in last_line was in the wrong orientation
+      }
+    }
+    else{
+      die "The orientation of the alignment must be either correct or incorrect\n";
+    }
+  }
+  ### the sequence stored in @fhs as last_line is already the next sequence to be analysed -> analyse next round
+  else{
+    return 0;
+  }
+}
+#########################
+### BOWTIE 1 | PAIRED-END
+#########################
+
+sub check_bowtie_results_paired_ends{
+  my ($sequence_1,$sequence_2,$identifier,$quality_value_1,$quality_value_2) = @_;
+
+  ### quality values are not given for FastA files, so they are initialised with a Phred quality of 40
+  unless ($quality_value_1){
+    $quality_value_1 = 'I'x(length$sequence_1);
+  }
+  unless ($quality_value_2){
+    $quality_value_2 = 'I'x(length$sequence_2);
+  }
+
+  #  print "$identifier\n$fhs[0]->{last_seq_id}\n$fhs[1]->{last_seq_id}\n$fhs[2]->{last_seq_id}\n$fhs[3]->{last_seq_id}\n\n";
+
+  my %mismatches = ();
+  ### reading from the bowtie output files to see if this sequence pair aligned to a bisulfite converted genome
+
+
+  ### for paired end reads we are reporting alignments to the OT strand first (index 0), then the OB strand (index 3!!), similiar to the single end way.
+  ### alignments to the complementary strands are reported afterwards (CTOT got index 1, and CTOB got index 2).
+  ### This is needed so that alignments which either contain no single C or G or reads which contain only protected Cs are reported to the original strands (OT and OB)
+  ### Before the complementary strands. Remember that it does not make any difference for the methylation calls, but it will matter if alignment to the complementary
+  ### strands are not being reported by specifying --directional
+
+  foreach my $index (0,3,1,2){
+    ### skipping this index if the last alignment has been set to undefined already (i.e. end of bowtie output)
+    next unless ($fhs[$index]->{last_line_1} and $fhs[$index]->{last_line_2} and defined $fhs[$index]->{last_seq_id});
+    ### if the sequence pair we are currently looking at produced an alignment we are doing various things with it
+    if ($fhs[$index]->{last_seq_id} eq $identifier) {
+      # print "$identifier\n$fhs[$index]->{last_seq_id}\n\n";
+
+      ##################################################################################
+      ### STEP I Processing the entry which is stored in last_line_1 and last_line_2 ###
+      ##################################################################################
+      my $valid_alignment_found = decide_whether_paired_end_alignment_is_valid($index,$identifier);
+      ### sequences can fail at this point if there was only 1 alignment in the wrong orientation, or if there were 2 aligments both in the wrong
+      ### orientation. We only continue to extract useful information about this alignment if 1 was returned
+      if ($valid_alignment_found == 1){
+	### Bowtie outputs which made it this far are in the correct orientation, so we can continue to analyse the alignment itself.
+	### we store the useful information in %mismatches
+	my ($id_1,$strand_1,$mapped_chromosome_1,$position_1,$bowtie_sequence_1,$mismatch_info_1) = (split (/\t/,$fhs[$index]->{last_line_1},-1))[0,1,2,3,4,7];
+	my ($id_2,$strand_2,$mapped_chromosome_2,$position_2,$bowtie_sequence_2,$mismatch_info_2) = (split (/\t/,$fhs[$index]->{last_line_2},-1))[0,1,2,3,4,7];
+	chomp $mismatch_info_1;
+	chomp $mismatch_info_2;
+	
+	### need to extract the chromosome number from the bowtie output (which is either XY_CT_converted or XY_GA_converted
+	my ($chromosome_1,$chromosome_2);
+	if ($mapped_chromosome_1 =~ s/_(CT|GA)_converted$//){
+	  $chromosome_1 = $mapped_chromosome_1;
+	}	
+	else{
+	  die "Chromosome number extraction failed for $mapped_chromosome_1\n";
+	}
+	if ($mapped_chromosome_2 =~ s/_(CT|GA)_converted$//){
+	  $chromosome_2 = $mapped_chromosome_2;
+	}
+	else{
+	  die "Chromosome number extraction failed for $mapped_chromosome_2\n";
+	}
+	
+	### Now extracting the number of mismatches to the converted genome
+	my $number_of_mismatches_1;
+	my $number_of_mismatches_2;
+	if ($mismatch_info_1 eq ''){
+	  $number_of_mismatches_1 = 0;
+	}
+	elsif ($mismatch_info_1 =~ /^\d/){
+	  my @mismatches = split (/,/,$mismatch_info_1);
+	  $number_of_mismatches_1 = scalar @mismatches;
+	}
+	else{
+	  die "Something weird is going on with the mismatch field\n";
+	}
+	if ($mismatch_info_2 eq ''){
+	  $number_of_mismatches_2 = 0;
+	}
+	elsif ($mismatch_info_2 =~ /^\d/){
+	  my @mismatches = split (/,/,$mismatch_info_2);
+	  $number_of_mismatches_2 = scalar @mismatches;
+	}
+	else{
+	  die "Something weird is going on with the mismatch field\n";
+	}
+	### To decide whether a sequence pair has a unique best alignment we will look at the lowest sum of mismatches from both alignments
+	my $sum_of_mismatches = $number_of_mismatches_1+$number_of_mismatches_2;
+	### creating a composite location variable from $chromosome and $position and storing the alignment information in a temporary hash table
+	die "Position 1 is higher than position 2" if ($position_1 > $position_2);
+	die "Paired-end alignments need to be on the same chromosome\n" unless ($chromosome_1 eq $chromosome_2);
+	my $alignment_location = join(":",$chromosome_1,$position_1,$position_2);
+	### If a sequence aligns to exactly the same location twice the sequence does either not contain any C or G, or all the Cs (or Gs on the reverse
+	### strand) were methylated and therefore protected. It is not needed to overwrite the same positional entry with a second entry for the same
+	### location (the genomic sequence extraction and methylation would not be affected by this, only the thing which would change is the index
+	### number for the found alignment)
+	unless (exists $mismatches{$sum_of_mismatches}->{$alignment_location}){
+	  $mismatches{$sum_of_mismatches}->{$alignment_location}->{seq_id}=$id_1; # either is fine
+	  $mismatches{$sum_of_mismatches}->{$alignment_location}->{bowtie_sequence_1}=$bowtie_sequence_1;
+	  $mismatches{$sum_of_mismatches}->{$alignment_location}->{bowtie_sequence_2}=$bowtie_sequence_2;
+	  $mismatches{$sum_of_mismatches}->{$alignment_location}->{index}=$index;
+	  $mismatches{$sum_of_mismatches}->{$alignment_location}->{chromosome}=$chromosome_1; # either is fine
+	  $mismatches{$sum_of_mismatches}->{$alignment_location}->{start_seq_1}=$position_1;
+	  $mismatches{$sum_of_mismatches}->{$alignment_location}->{start_seq_2}=$position_2;
+	  $mismatches{$sum_of_mismatches}->{$alignment_location}->{number_of_mismatches_1} = $number_of_mismatches_1;
+	  $mismatches{$sum_of_mismatches}->{$alignment_location}->{number_of_mismatches_2} = $number_of_mismatches_2;
+	}
+	###################################################################################################################################################
+	### STEP II Now reading in the next 2 lines from the bowtie filehandle. If there are 2 next lines in the alignments filehandle it can either    ###
+	### be a second alignment of the same sequence pair or a new sequence pair. In any case we will just add it to last_line_1 and last_line _2.    ###
+	### If it is the alignment of the next sequence pair, 0 will be returned as $valid_alignment_found, so it will not be processed any further in  ###
+	### this round                                                                                                                                  ###
+	###################################################################################################################################################
+	my $newline_1 = $fhs[$index]->{fh}-> getline();
+	my $newline_2 = $fhs[$index]->{fh}-> getline();
+
+	if ($newline_1 and $newline_2){
+	  my ($seq_id_1) = split (/\t/,$newline_1);
+	  my ($seq_id_2) = split (/\t/,$newline_2);
+	
+	  if ($seq_id_1 =~ s/\/1$//){ # removing the read /1 tag
+	    $fhs[$index]->{last_seq_id} = $seq_id_1;
+	  }
+	  elsif ($seq_id_2 =~ s/\/1$//){ # removing the read /1 tag
+	    $fhs[$index]->{last_seq_id} = $seq_id_2;
+	  }
+	  else{
+	    die "Either read 1 or read 2 needs to end on '/1'\n";
+	  }
+	
+	  $fhs[$index]->{last_line_1} = $newline_1;
+	  $fhs[$index]->{last_line_2} = $newline_2;
+	}
+	else {
+	  # assigning undef to last_seq_id and both last_lines and jumping to the next index (end of bowtie output)
+	  $fhs[$index]->{last_seq_id} = undef;
+	  $fhs[$index]->{last_line_1} = undef;
+	  $fhs[$index]->{last_line_2} = undef;
+	  next; # jumping to the next index
+	}
+	### Now processing the entry we just stored in last_line_1 and last_line_2
+	$valid_alignment_found = decide_whether_paired_end_alignment_is_valid($index,$identifier);
+	### only processing the alignment further if 1 was returned. 0 will be returned either if the alignment is already the next sequence pair to
+	### be analysed or if it was a second alignment of the current sequence pair but in the wrong orientation
+	if ($valid_alignment_found == 1){
+	  ### we store the useful information in %mismatches
+	  ($id_1,$strand_1,$mapped_chromosome_1,$position_1,$bowtie_sequence_1,$mismatch_info_1) = (split (/\t/,$fhs[$index]->{last_line_1}))[0,1,2,3,4,7];
+	  ($id_2,$strand_2,$mapped_chromosome_2,$position_2,$bowtie_sequence_2,$mismatch_info_2) = (split (/\t/,$fhs[$index]->{last_line_2}))[0,1,2,3,4,7];
+	  chomp $mismatch_info_1;
+	  chomp $mismatch_info_2;
+	  ### need to extract the chromosome number from the bowtie output (which is either _CT_converted or _GA_converted)
+	  if ($mapped_chromosome_1 =~ s/_(CT|GA)_converted$//){
+	    $chromosome_1 = $mapped_chromosome_1;
+	  }	
+	  else{
+	    die "Chromosome number extraction failed for $mapped_chromosome_1\n";
+	  }
+	  if ($mapped_chromosome_2 =~ s/_(CT|GA)_converted$//){
+	    $chromosome_2 = $mapped_chromosome_2;
+	  }
+	  else{
+	    die "Chromosome number extraction failed for $mapped_chromosome_2\n";
+	  }
+	
+	  $number_of_mismatches_1='';
+	  $number_of_mismatches_2='';
+	  ### Now extracting the number of mismatches to the converted genome
+	  if ($mismatch_info_1 eq ''){
+	    $number_of_mismatches_1 = 0;
+	  }
+	  elsif ($mismatch_info_1 =~ /^\d/){
+	    my @mismatches = split (/,/,$mismatch_info_1);
+	    $number_of_mismatches_1 = scalar @mismatches;
+	  }
+	  else{
+	    die "Something weird is going on with the mismatch field\n";
+	  }
+	  if ($mismatch_info_2 eq ''){
+	    $number_of_mismatches_2 = 0;
+	  }
+	  elsif ($mismatch_info_2 =~ /^\d/){
+	    my @mismatches = split (/,/,$mismatch_info_2);
+	    $number_of_mismatches_2 = scalar @mismatches;
+	  }
+	  else{
+	    die "Something weird is going on with the mismatch field\n";
+	  }
+	  ### To decide whether a sequence pair has a unique best alignment we will look at the lowest sum of mismatches from both alignments
+	  $sum_of_mismatches = $number_of_mismatches_1+$number_of_mismatches_2;
+	  ### creating a composite location variable from $chromosome and $position and storing the alignment information in a temporary hash table
+	  die "position 1 is greater than position 2" if ($position_1 > $position_2);
+	  die "Paired-end alignments need to be on the same chromosome\n" unless ($chromosome_1 eq $chromosome_2);
+	  $alignment_location = join(":",$chromosome_1,$position_1,$position_2);
+	  ### If a sequence aligns to exactly the same location twice the sequence does either not contain any C or G, or all the Cs (or Gs on the reverse
+	  ### strand) were methylated and therefore protected. It is not needed to overwrite the same positional entry with a second entry for the same
+	  ### location (the genomic sequence extraction and methylation would not be affected by this, only the thing which would change is the index
+	  ### number for the found alignment)
+	  unless (exists $mismatches{$sum_of_mismatches}->{$alignment_location}){
+	    $mismatches{$sum_of_mismatches}->{$alignment_location}->{seq_id}=$id_1; # either is fine
+	    $mismatches{$sum_of_mismatches}->{$alignment_location}->{bowtie_sequence_1}=$bowtie_sequence_1;
+	    $mismatches{$sum_of_mismatches}->{$alignment_location}->{bowtie_sequence_2}=$bowtie_sequence_2;
+	    $mismatches{$sum_of_mismatches}->{$alignment_location}->{index}=$index;
+	    $mismatches{$sum_of_mismatches}->{$alignment_location}->{chromosome}=$chromosome_1; # either is fine
+	    $mismatches{$sum_of_mismatches}->{$alignment_location}->{start_seq_1}=$position_1;
+	    $mismatches{$sum_of_mismatches}->{$alignment_location}->{start_seq_2}=$position_2;
+	    $mismatches{$sum_of_mismatches}->{$alignment_location}->{number_of_mismatches_1} = $number_of_mismatches_1;
+	    $mismatches{$sum_of_mismatches}->{$alignment_location}->{number_of_mismatches_2} = $number_of_mismatches_2;
+	  }
+	  ###############################################################################################################################################
+	  ### STEP III Now reading in two more lines. These have to be the next entry and we will just add assign them to last_line_1 and last_line_2 ###
+	  ###############################################################################################################################################
+	  $newline_1 = $fhs[$index]->{fh}-> getline();
+	  $newline_2 = $fhs[$index]->{fh}-> getline();
+
+	  if ($newline_1 and $newline_2){
+	    my ($seq_id_1) = split (/\t/,$newline_1);
+	    my ($seq_id_2) = split (/\t/,$newline_2);
+
+	    if ($seq_id_1 =~ s/\/1$//){ # removing the read /1 tag
+	      $fhs[$index]->{last_seq_id} = $seq_id_1;
+	    }
+	    if ($seq_id_2 =~ s/\/1$//){ # removing the read /1 tag
+	      $fhs[$index]->{last_seq_id} = $seq_id_2;
+	    }
+	    $fhs[$index]->{last_line_1} = $newline_1;
+	    $fhs[$index]->{last_line_2} = $newline_2;
+	  }
+	  else {
+	    # assigning undef to last_seq_id and both last_lines and jumping to the next index (end of bowtie output)
+	    $fhs[$index]->{last_seq_id} = undef;
+	    $fhs[$index]->{last_line_1} = undef;
+	    $fhs[$index]->{last_line_2} = undef;
+	    next; # jumping to the next index
+	  }
+	  ### within the 2nd sequence pair alignment in correct orientation found
+	}
+	### within the 1st sequence pair alignment in correct orientation found
+      }
+      ### still within the (last_seq_id eq identifier) condition
+    }
+    ### still within foreach index loop
+  }
+  ### if there was no single alignment found for a certain sequence we will continue with the next sequence in the sequence file
+  unless(%mismatches){
+    $counting{no_single_alignment_found}++;
+    return 1; ### We will print this sequence out as unmapped sequence if --un unmapped.out has been specified
+  }
+  ### Going to use the variable $sequence_pair_fails as a 'memory' if a sequence could not be aligned uniquely (set to 1 then)
+  my $sequence_pair_fails = 0;
+  ### Declaring an empty hash reference which will store all information we need for the methylation call
+  my $methylation_call_params; # hash reference!
+  ### We are now looking if there is a unique best alignment for a certain sequence. This means we are sorting in ascending order and look at the
+  ### sequence with the lowest amount of mismatches. If there is only one single best position we are going to store the alignment information in the
+  ### meth_call variables, if there are multiple hits with the same amount of (lowest) mismatches we are discarding the sequence altogether
+  foreach my $mismatch_number (sort {$a<=>$b} keys %mismatches){
+    #dev print "Number of mismatches: $mismatch_number\t$identifier\t$sequence_1\t$sequence_2\n";
+    foreach my $entry (keys (%{$mismatches{$mismatch_number}}) ){
+      #dev print "$mismatch_number\t$entry\t$mismatches{$mismatch_number}->{$entry}->{index}\n";
+      # print join("\t",$mismatch_number,$mismatches{$mismatch_number}->{$entry}->{seq_id},$sequence,$mismatches{$mismatch_number}->{$entry}->{bowtie_sequence},$mismatches{$mismatch_number}->{$entry}->{chromosome},$mismatches{$mismatch_number}->{$entry}->{position},$mismatches{$mismatch_number}->{$entry}->{index}),"\n";
+    }
+    if (scalar keys %{$mismatches{$mismatch_number}} == 1){
+      #  print "Unique best alignment for sequence pair $sequence_1\t$sequence_1\n";
+      for my $unique_best_alignment (keys %{$mismatches{$mismatch_number}}){
+	$methylation_call_params->{$identifier}->{seq_id} = $identifier;
+ 	$methylation_call_params->{$identifier}->{bowtie_sequence_1} = $mismatches{$mismatch_number}->{$unique_best_alignment}->{bowtie_sequence_1};
+	$methylation_call_params->{$identifier}->{bowtie_sequence_2} = $mismatches{$mismatch_number}->{$unique_best_alignment}->{bowtie_sequence_2};
+       	$methylation_call_params->{$identifier}->{chromosome} = $mismatches{$mismatch_number}->{$unique_best_alignment}->{chromosome};
+      	$methylation_call_params->{$identifier}->{start_seq_1} = $mismatches{$mismatch_number}->{$unique_best_alignment}->{start_seq_1};
+	$methylation_call_params->{$identifier}->{start_seq_2} = $mismatches{$mismatch_number}->{$unique_best_alignment}->{start_seq_2};
+	$methylation_call_params->{$identifier}->{alignment_end} = ($mismatches{$mismatch_number}->{$unique_best_alignment}->{start_seq_2}+length($mismatches{$mismatch_number}->{$unique_best_alignment}->{bowtie_sequence_2}));
+	$methylation_call_params->{$identifier}->{index} = $mismatches{$mismatch_number}->{$unique_best_alignment}->{index};
+     	$methylation_call_params->{$identifier}->{number_of_mismatches_1} = $mismatches{$mismatch_number}->{$unique_best_alignment}->{number_of_mismatches_1};
+	$methylation_call_params->{$identifier}->{number_of_mismatches_2} = $mismatches{$mismatch_number}->{$unique_best_alignment}->{number_of_mismatches_2};
+      }
+    }
+    else{
+      $sequence_pair_fails = 1;
+    }
+    ### after processing the alignment with the lowest number of mismatches we exit
+    last;
+  }
+  ### skipping the sequence completely if there were multiple alignments with the same amount of lowest mismatches found at different positions
+  if ($sequence_pair_fails == 1){
+    $counting{unsuitable_sequence_count}++;
+    if ($ambiguous){
+      return 2; # => exits to next sequence pair, and prints both seqs out to multiple_alignments_1 and -2 if --ambiguous has been specified
+    }
+    if ($unmapped){
+      return 1; # => exits to next sequence pair, and prints both seqs out to unmapped_1 and _2  if --un has been specified
+    }
+    else{
+      return 0; # => exits to next sequence (default)
+    }
+  }
+
+  ### --DIRECTIONAL
+  ### If the option --directional has been specified the user wants to consider only alignments to the original top strand or the original bottom strand. We will therefore
+  ### discard all alignments to strands complementary to the original strands, as they should not exist in reality due to the library preparation protocol
+  if ($directional){
+    if ( ($methylation_call_params->{$identifier}->{index} == 1) or ($methylation_call_params->{$identifier}->{index} == 2) ){
+      #    warn "Alignment rejected! (index was: $methylation_call_params->{$identifier}->{index})\n";
+      $counting{alignments_rejected_count}++;
+      return 0;
+    }
+  }
+
+  ### If the sequence has not been rejected so far it does have a unique best alignment
+  $counting{unique_best_alignment_count}++;
+  extract_corresponding_genomic_sequence_paired_ends($identifier,$methylation_call_params);
+
+  ### check test to see if the genomic sequences we extracted has the same length as the observed sequences +2, and only then we perform the methylation call
+  if (length($methylation_call_params->{$identifier}->{unmodified_genomic_sequence_1}) != length($sequence_1)+2){
+    warn "Chromosomal sequence could not be extracted for\t$identifier\t$methylation_call_params->{$identifier}->{chromosome}\t$methylation_call_params->{$identifier}->{start_seq_1}\n";
+    $counting{genomic_sequence_could_not_be_extracted_count}++;
+    return 0;
+  }
+  if (length($methylation_call_params->{$identifier}->{unmodified_genomic_sequence_2}) != length($sequence_2)+2){
+    warn "Chromosomal sequence could not be extracted for\t$identifier\t$methylation_call_params->{$identifier}->{chromosome}\t$methylation_call_params->{$identifier}->{start_seq_2}\n";
+    $counting{genomic_sequence_could_not_be_extracted_count}++;
+    return 0;
+  }
+
+  ### otherwise we are set to perform the actual methylation call
+  $methylation_call_params->{$identifier}->{methylation_call_1} = methylation_call($identifier,$sequence_1,$methylation_call_params->{$identifier}->{unmodified_genomic_sequence_1},$methylation_call_params->{$identifier}->{read_conversion_1});
+  $methylation_call_params->{$identifier}->{methylation_call_2} = methylation_call($identifier,$sequence_2,$methylation_call_params->{$identifier}->{unmodified_genomic_sequence_2},$methylation_call_params->{$identifier}->{read_conversion_2});
+
+  print_bisulfite_mapping_results_paired_ends($identifier,$sequence_1,$sequence_2,$methylation_call_params,$quality_value_1,$quality_value_2);
+  return 0; ## otherwise 1 will be returned by default, which would print the sequence pair to unmapped_1 and _2
+}
+
+#########################
+### BOWTIE 2 | PAIRED-END
+#########################
+
+sub check_bowtie_results_paired_ends_bowtie2{
+  my ($sequence_1,$sequence_2,$identifier,$quality_value_1,$quality_value_2) = @_;
+
+  ### quality values are not given for FastA files, so they are initialised with a Phred quality of 40
+  unless ($quality_value_1){
+    $quality_value_1 = 'I'x(length$sequence_1);
+  }
+
+  unless ($quality_value_2){
+    $quality_value_2 = 'I'x(length$sequence_2);
+  }
+
+
+  # print "$identifier\n$fhs[0]->{last_seq_id}\n$fhs[1]->{last_seq_id}\n$fhs[2]->{last_seq_id}\n$fhs[3]->{last_seq_id}\n\n";
+
+
+  my %alignments;
+  my $alignment_ambiguous = 0;
+
+  ### reading from the Bowtie 2 output filehandles
+
+  ### for paired end reads we are reporting alignments to the OT strand first (index 0), then the OB strand (index 3!!), similiar to the single end way.
+  ### alignments to the complementary strands are reported afterwards (CTOT got index 1, and CTOB got index 2).
+  ### This is needed so that alignments which either contain no single C or G or reads which contain only protected Cs are reported to the original strands (OT and OB)
+  ### Before the complementary strands. Remember that it does not make any difference for the methylation calls, but it will matter if alignments to the complementary
+  ### strands are not being reported when '--directional' is specified
+
+  foreach my $index (0,3,1,2){
+    ### skipping this index if the last alignment has been set to undefined already (i.e. end of bowtie output)
+    next unless ($fhs[$index]->{last_line_1} and $fhs[$index]->{last_line_2} and defined $fhs[$index]->{last_seq_id});
+
+    ### if the sequence pair we are currently looking at produced an alignment we are doing various things with it
+    if ($fhs[$index]->{last_seq_id} eq $identifier) {
+
+      my ($id_1,$flag_1,$mapped_chromosome_1,$position_1,$mapping_quality_1,$cigar_1,$bowtie_sequence_1,$qual_1) = (split (/\t/,$fhs[$index]->{last_line_1}))[0,1,2,3,4,5,9,10];
+      my ($id_2,$flag_2,$mapped_chromosome_2,$position_2,$mapping_quality_2,$cigar_2,$bowtie_sequence_2,$qual_2) = (split (/\t/,$fhs[$index]->{last_line_2}))[0,1,2,3,4,5,9,10];
+      #  print "Index: $index\t$fhs[$index]->{last_line_1}\n";
+      #  print "Index: $index\t$fhs[$index]->{last_line_2}\n";	
+      #  print join ("\t",$id_1,$flag_1,$mapped_chromosome_1,$position_1,$mapping_quality_1,$cigar_1,$bowtie_sequence_1,$qual_1),"\n";
+      #  print join ("\t",$id_2,$flag_2,$mapped_chromosome_2,$position_2,$mapping_quality_2,$cigar_2,$bowtie_sequence_2,$qual_2),"\n";
+      $id_1 =~ s/\/1$//;
+      $id_2 =~ s/\/2$//;
+
+      #  SAM format specifications for Bowtie 2
+      #  (1) Name of read that aligned
+      #  (2) Sum of all applicable flags. Flags relevant to Bowtie are:
+      #        1 The read is one of a pair
+      #        2 The alignment is one end of a proper paired-end alignment
+      #        4 The read has no reported alignments
+      #        8 The read is one of a pair and has no reported alignments
+      #       16 The alignment is to the reverse reference strand
+      #       32 The other mate in the paired-end alignment is aligned to the reverse reference strand
+      #       64 The read is mate 1 in a pair
+      #      128 The read is mate 2 in a pair
+      #      256 The read has multiple mapping states
+      #  (3) Name of reference sequence where alignment occurs (unmapped reads have a *)
+      #  (4) 1-based offset into the forward reference strand where leftmost character of the alignment occurs (0 for unmapped reads)
+      #  (5) Mapping quality (255 means MAPQ is not available)
+      #  (6) CIGAR string representation of alignment (* if unavailable)
+      #  (7) Name of reference sequence where mate's alignment occurs. Set to = if the mate's reference sequence is the same as this alignment's, or * if there is no mate.
+      #  (8) 1-based offset into the forward reference strand where leftmost character of the mate's alignment occurs. Offset is 0 if there is no mate.
+      #  (9) Inferred fragment size. Size is negative if the mate's alignment occurs upstream of this alignment. Size is 0 if there is no mate.
+      # (10) Read sequence (reverse-complemented if aligned to the reverse strand)
+      # (11) ASCII-encoded read qualities (reverse-complemented if the read aligned to the reverse strand). The encoded quality values are on the Phred quality scale and the encoding is ASCII-offset by 33 (ASCII char !), similarly to a FASTQ file.
+      # (12) Optional fields. Fields are tab-separated. bowtie2 outputs zero or more of these optional fields for each alignment, depending on the type of the alignment:
+      # AS:i:<N> Alignment score. Can be negative. Can be greater than 0 in --local mode (but not in --end-to-end mode). Only present if SAM record is for an aligned read.
+      # XS:i:<N> Alignment score for second-best alignment. Can be negative. Can be greater than 0 in --local mode (but not in --end-to-end mode). Only present if the SAM record is for an aligned read and more than one alignment was found for the read.
+      # YS:i:<N> Alignment score for opposite mate in the paired-end alignment. Only present if the SAM record is for a read that aligned as part of a paired-end alignment.
+      # XN:i:<N> The number of ambiguous bases in the reference covering this alignment. Only present if SAM record is for an aligned read.
+      # XM:i:<N> The number of mismatches in the alignment. Only present if SAM record is for an aligned read.
+      # XO:i:<N> The number of gap opens, for both read and reference gaps, in the alignment. Only present if SAM record is for an aligned read.
+      # XG:i:<N> The number of gap extensions, for both read and reference gaps, in the alignment. Only present if SAM record is for an aligned read.
+      # NM:i:<N> The edit distance; that is, the minimal number of one-nucleotide edits (substitutions, insertions and deletions) needed to transform the read string into the reference string. Only present if SAM record is for an aligned read.
+      # YF:Z:<N> String indicating reason why the read was filtered out. See also: Filtering. Only appears for reads that were filtered out.
+      # MD:Z:<S> A string representation of the mismatched reference bases in the alignment. See SAM format specification for details. Only present if SAM record is for an aligned read.
+
+      ### If a sequence has no reported alignments there will be a single output line per sequence with a bit-wise flag value of 77 for read 1 (1+4+8+64), or 141 for read 2 (1+4+8+128).
+      ### We can store the next alignment and move on to the next Bowtie 2 instance
+      if ($flag_1 == 77 and $flag_2 == 141){
+	## reading in the next alignment, which must be the next sequence
+	my $newline_1 = $fhs[$index]->{fh}-> getline();
+	my $newline_2 = $fhs[$index]->{fh}-> getline();
+	
+	if ($newline_1 and $newline_2){
+	  chomp $newline_1;
+	  chomp $newline_2;
+	  my ($seq_id_1) = split (/\t/,$newline_1);
+	  my ($seq_id_2) = split (/\t/,$newline_2);
+	  $seq_id_1 =~ s/\/1$//;
+	  $seq_id_2 =~ s/\/2$//;
+	  $fhs[$index]->{last_seq_id} = $seq_id_1;
+	  $fhs[$index]->{last_line_1} = $newline_1;
+	  $fhs[$index]->{last_line_2} = $newline_2;
+
+	  #  print "current sequence ($identifier) did not map, reading in next sequence\n";
+	  #  print "$index\t$fhs[$index]->{last_seq_id}\n";
+	  #  print "$index\t$fhs[$index]->{last_line_1}\n";
+	  #  print "$index\t$fhs[$index]->{last_line_2}\n";
+	  next; # next instance
+	}
+	else{
+	  # assigning undef to last_seq_id and last_line and jumping to the next index (end of Bowtie 2 output)
+	  $fhs[$index]->{last_seq_id} = undef;
+	  $fhs[$index]->{last_line_1} = undef;
+	  $fhs[$index]->{last_line_2} = undef;
+	  next;
+	}
+      }
+
+      ### If there are one or more proper alignments we can extract the chromosome number
+      my ($chromosome_1,$chromosome_2);
+      if ($mapped_chromosome_1 =~ s/_(CT|GA)_converted$//){
+	$chromosome_1 = $mapped_chromosome_1;
+      }	
+      else{
+	die "Chromosome number extraction failed for $mapped_chromosome_1\n";
+      }
+      if ($mapped_chromosome_2 =~ s/_(CT|GA)_converted$//){
+	$chromosome_2 = $mapped_chromosome_2;
+      }
+      else{
+	die "Chromosome number extraction failed for $mapped_chromosome_2\n";
+      }
+
+      die "Paired-end alignments need to be on the same chromosome\n" unless ($chromosome_1 eq $chromosome_2);
+
+      ### We will use the optional fields to determine the best alignments. Later on we extract the number of mismatches and/or indels from the CIGAR string
+      my ($alignment_score_1,$alignment_score_2,$second_best_1,$second_best_2,$MD_tag_1,$MD_tag_2);
+
+      my @fields_1 = split (/\t/,$fhs[$index]->{last_line_1});
+      my @fields_2 = split (/\t/,$fhs[$index]->{last_line_2});
+
+      foreach (11..$#fields_1){
+	if ($fields_1[$_] =~ /AS:i:(.*)/){
+	  $alignment_score_1 = $1;
+	}
+	elsif ($fields_1[$_] =~ /XS:i:(.*)/){
+	  $second_best_1 = $1;
+	}
+	elsif ($fields_1[$_] =~ /MD:Z:(.*)/){
+	  $MD_tag_1 = $1;
+	}
+      }
+
+      foreach (11..$#fields_2){
+	if ($fields_2[$_] =~ /AS:i:(.*)/){
+	  $alignment_score_2 = $1;
+	}
+	elsif ($fields_2[$_] =~ /XS:i:(.*)/){
+	  $second_best_2 = $1;
+	}
+	elsif ($fields_2[$_] =~ /MD:Z:(.*)/){
+	  $MD_tag_2 = $1;
+	}
+      }
+
+      die "Failed to extract alignment score 1 ($alignment_score_1) and MD tag ($MD_tag_1)!\nlast alignment 1: $fhs[$index]->{last_line_1}\nlast alignment 2: $fhs[$index]->{last_line_2}\n" unless (defined $alignment_score_1 and defined $MD_tag_1);
+      die "Failed to extract alignment score 2 ($alignment_score_2) and MD tag ($MD_tag_2)!\nlast alignment 1: $fhs[$index]->{last_line_1}\nlast alignment 2: $fhs[$index]->{last_line_2}\n" unless (defined $alignment_score_2 and defined $MD_tag_2);
+
+      # warn "First read 1 alignment score is: '$alignment_score_1'\n";
+      # warn "First read 2 alignment score is: '$alignment_score_2'\n";
+      # warn "MD tag 1 is: '$MD_tag_1'\n";
+      # warn "MD tag 2 is: '$MD_tag_2'\n";
+
+      ### To decide whether a sequence pair has a unique best alignment we will look at the highest sum of alignment scores from both alignments
+      my $sum_of_alignment_scores_1 = $alignment_score_1 + $alignment_score_2 ;
+      # print "sum of alignment scores: $sum_of_alignment_scores_1\n\n";
+
+      if (defined $second_best_1 and defined $second_best_2){
+	my $sum_of_alignment_scores_second_best = $second_best_1 + $second_best_2;
+	# warn "Second best alignment_score_1 is: '$second_best_1'\n";
+	# warn "Second best alignment_score_2 is: '$second_best_2'\n";
+	# warn "Second best alignment sum of alignment scores is: '$sum_of_alignment_scores_second_best'\n";
+
+	# If the first alignment score for the first read pair is the same as the alignment score of the second best hit we are going to boot this sequence pair altogether
+	if ($sum_of_alignment_scores_1 == $sum_of_alignment_scores_second_best){
+	  $alignment_ambiguous = 1;
+	  # print "This read will be chucked (AS==XS detected)!\n";
+
+ 	  ## need to read and discard all additional ambiguous reads until we reach the next sequence
+ 	  until ($fhs[$index]->{last_seq_id} ne $identifier){
+ 	    my $newline_1 = $fhs[$index]->{fh}-> getline();
+	    my $newline_2 = $fhs[$index]->{fh}-> getline();
+	    if ($newline_1 and $newline_2){
+	      chomp $newline_1;
+	      chomp $newline_2;
+	      my ($seq_id_1) = split (/\t/,$newline_1);
+	      my ($seq_id_2) = split (/\t/,$newline_2);
+	      $seq_id_1 =~ s/\/1$//;
+	      $seq_id_2 =~ s/\/2$//;
+	      # print "New Seq IDs:\t$seq_id_1\t$seq_id_2\n";
+
+	      $fhs[$index]->{last_seq_id} = $seq_id_1;
+	      $fhs[$index]->{last_line_1} = $newline_1;
+	      $fhs[$index]->{last_line_2} = $newline_2;
+		}
+ 	    else{
+ 	      # assigning undef to last_seq_id and last_line and jumping to the next index (end of Bowtie 2 output)
+ 	      $fhs[$index]->{last_seq_id} = undef;
+ 	      $fhs[$index]->{last_line_1} = undef;
+	      $fhs[$index]->{last_line_2} = undef;
+	      last; # break free if the end of the alignment output was reached
+ 	    }
+ 	  }
+	  #  if ($fhs[$index]->{last_seq_id}){
+	  #    warn "Index: $index\tThis Seq-ID is $identifier, skipped all ambiguous sequences until the next ID which is: $fhs[$index]->{last_seq_id}\n";
+	  #  }
+	}
+ 	else{ # the next best alignment has a lower alignment score than the current read, so we can safely store the current alignment
+	
+	  my $alignment_location;
+	  if ($position_1 <= $position_2){
+	    $alignment_location = join(":",$chromosome_1,$position_1,$position_2);
+	  }
+	  elsif($position_2 < $position_1){	
+	    $alignment_location = join(":",$chromosome_1,$position_2,$position_1);
+	  }
+	
+ 	  ### If a sequence aligns to exactly the same location twice the sequence does either not contain any C or G, or all the Cs (or Gs on the reverse
+ 	  ### strand) were methylated and therefore protected. Alternatively it will align better in one condition than in the other. In any case, it is not needed to overwrite
+ 	  ### the same positional entry with a second entry for the same location, as the genomic sequence extraction and methylation call would not be affected by this. The only
+ 	  ### thing which would change is the index number for the found alignment). We will continue to assign these alignments to the first indexes 0 and 3, i.e. OT and OB 
+	
+	  unless (exists $alignments{$alignment_location}){
+	    $alignments{$alignment_location}->{seq_id} = $id_1;
+ 	    $alignments{$alignment_location}->{alignment_score_1} = $alignment_score_1;
+	    $alignments{$alignment_location}->{alignment_score_2} = $alignment_score_2;
+	    $alignments{$alignment_location}->{sum_of_alignment_scores} = $sum_of_alignment_scores_1;
+	    $alignments{$alignment_location}->{bowtie_sequence_1} = $bowtie_sequence_1;
+	    $alignments{$alignment_location}->{bowtie_sequence_2} = $bowtie_sequence_2;
+	    $alignments{$alignment_location}->{index} = $index;
+ 	    $alignments{$alignment_location}->{chromosome} = $chromosome_1; # either is fine
+ 	    $alignments{$alignment_location}->{position_1} = $position_1;
+	    $alignments{$alignment_location}->{position_2} = $position_2;
+ 	    $alignments{$alignment_location}->{mismatch_info_1} = $MD_tag_1;
+ 	    $alignments{$alignment_location}->{mismatch_info_2} = $MD_tag_2;
+	    $alignments{$alignment_location}->{CIGAR_1} = $cigar_1;
+	    $alignments{$alignment_location}->{CIGAR_2} = $cigar_2;
+	    $alignments{$alignment_location}->{flag_1} = $flag_1;
+	    $alignments{$alignment_location}->{flag_2} = $flag_2;
+ 	  }
+	  # warn "added best of several alignments to \%alignments hash\n";
+	
+ 	  ### now reading and discarding all (inferior) alignments of this read pair until we hit the next sequence
+	  until ($fhs[$index]->{last_seq_id} ne $identifier){
+ 	    my $newline_1 = $fhs[$index]->{fh}-> getline();
+	    my $newline_2 = $fhs[$index]->{fh}-> getline();
+	    if ($newline_1 and $newline_2){
+	      chomp $newline_1;
+	      chomp $newline_2;
+	      my ($seq_id_1) = split (/\t/,$newline_1);
+	      my ($seq_id_2) = split (/\t/,$newline_2);
+	      $seq_id_1 =~ s/\/1$//;
+	      $seq_id_2 =~ s/\/2$//;
+	      # print "New Seq IDs:\t$seq_id_1\t$seq_id_2\n";
+
+	      $fhs[$index]->{last_seq_id} = $seq_id_1;
+	      $fhs[$index]->{last_line_1} = $newline_1;
+	      $fhs[$index]->{last_line_2} = $newline_2;
+	    }
+ 	    else{
+ 	      # assigning undef to last_seq_id and last_line_1 and _2 and jumping to the next index (end of Bowtie 2 output)
+ 	      $fhs[$index]->{last_seq_id} = undef;
+ 	      $fhs[$index]->{last_line_1} = undef;
+	      $fhs[$index]->{last_line_2} = undef;
+	      last; # break free if the end of the alignment output was reached
+ 	    }
+	  }
+	  # if($fhs[$index]->{last_seq_id}){
+	  #   warn "Index: $index\tThis Seq-ID is $identifier, skipped all other alignments until the next ID was reached which is: $fhs[$index]->{last_seq_id}\n";
+	  # }
+	}	
+      }
+      else{ # there is no second best hit, so we can just store this one and read in the next sequence
+	
+	my $alignment_location = join(":",$chromosome_1,$position_1,$position_2);
+	# print "$alignment_location\n";
+	### If a sequence aligns to exactly the same location with a perfect match twice the sequence does either not contain any C or G, or all the Cs (or Gs on the reverse
+ 	### strand) were methylated and therefore protected. Alternatively it will align better in one condition than in the other. In any case, it is not needed to overwrite
+ 	### the same positional entry with a second entry for the same location, as the genomic sequence extraction and methylation call would not be affected by this. The only
+ 	### thing which would change is the index number for the found alignment). We will continue to assign these alignments to the first indexes 0 and 3, i.e. OT and OB 
+
+	unless (exists $alignments{$alignment_location}){
+	  $alignments{$alignment_location}->{seq_id} = $id_1;
+	  $alignments{$alignment_location}->{alignment_score_1} = $alignment_score_1;
+	  $alignments{$alignment_location}->{alignment_score_2} = $alignment_score_2;
+	  $alignments{$alignment_location}->{sum_of_alignment_scores} = $sum_of_alignment_scores_1;
+	  $alignments{$alignment_location}->{bowtie_sequence_1} = $bowtie_sequence_1;
+	  $alignments{$alignment_location}->{bowtie_sequence_2} = $bowtie_sequence_2;
+	  $alignments{$alignment_location}->{index} = $index;
+	  $alignments{$alignment_location}->{chromosome} = $chromosome_1; # either is fine
+	  $alignments{$alignment_location}->{position_1} = $position_1;
+	  $alignments{$alignment_location}->{position_2} = $position_2;
+	  $alignments{$alignment_location}->{mismatch_info_1} = $MD_tag_1;
+	  $alignments{$alignment_location}->{mismatch_info_2} = $MD_tag_2;
+	  $alignments{$alignment_location}->{CIGAR_1} = $cigar_1;
+	  $alignments{$alignment_location}->{CIGAR_2} = $cigar_2;
+	  $alignments{$alignment_location}->{flag_1} = $flag_1;
+	  $alignments{$alignment_location}->{flag_2} = $flag_2;
+	}
+	
+	# warn "added unique alignment to \%alignments hash\n";
+
+	# Now reading and storing the next read pair
+	my $newline_1 = $fhs[$index]->{fh}-> getline();
+	my $newline_2 = $fhs[$index]->{fh}-> getline();
+	if ($newline_1 and $newline_2){
+	  chomp $newline_1;
+	  chomp $newline_2;
+	  # print "$newline_1\n";
+	  # print "$newline_2\n";
+	  my ($seq_id_1) = split (/\t/,$newline_1);
+	  my ($seq_id_2) = split (/\t/,$newline_2);
+	  $seq_id_1 =~ s/\/1$//;
+	  $seq_id_2 =~ s/\/2$//;
+	  # print "New Seq IDs:\t$seq_id_1\t$seq_id_2\n";
+
+	  $fhs[$index]->{last_seq_id} = $seq_id_1;
+	  $fhs[$index]->{last_line_1} = $newline_1;
+	  $fhs[$index]->{last_line_2} = $newline_2;
+
+	  if ($seq_id_1 eq $identifier){
+ 	    die "Sequence with ID $identifier did not have a second best alignment, but next seq-ID was also $fhs[$index]->{last_seq_id}!\n";
+ 	  }
+  	}
+	else{
+	  # assigning undef to last_seq_id and last_line_1 and _2 and jumping to the next index (end of Bowtie 2 output)
+	  $fhs[$index]->{last_seq_id} = undef;
+	  $fhs[$index]->{last_line_1} = undef;
+	  $fhs[$index]->{last_line_2} = undef;
+	}
+      }
+    }
+  }
+
+  ### if the read produced several ambiguous alignments for a single instance of Bowtie 2 we can return already now. If --ambiguous was specified the read sequence will be printed out in FastQ format
+  if ($alignment_ambiguous == 1){
+    $counting{unsuitable_sequence_count}++;
+    ### report that the sequence pair has multiple hits with bitwise flag 256. We can print the sequence to the result file straight away and skip everything else
+    #  my $ambiguous_read_1 = join("\t",$identifier.'/1','256','*','0','0','*','*','0','0',$sequence_1,$quality_value_1);
+    #  my $ambiguous_read_2 = join("\t",$identifier.'/2','256','*','0','0','*','*','0','0',$sequence_2,$quality_value_2);
+    #  print "$ambiguous_read_1\n";
+    #  print "$ambiguous_read_2\n";
+
+    if ($ambiguous){
+      return 2; # => exits to next sequence pair, and prints it out to _ambiguous_reads_1.txt and _ambiguous_reads_2.txt if '--ambiguous' was specified
+    }
+    elsif ($unmapped){
+      return 1; # => exits to next sequence pair, and prints it out to _unmapped_reads_1.txt and _unmapped_reads_2.txt if '--unmapped' but not '--ambiguous' was specified
+    }
+    else{
+      return 0;
+    }
+  }
+
+  ### if no alignment was found for a certain sequence at all we continue with the next sequence in the sequence file
+  unless (%alignments){
+    $counting{no_single_alignment_found}++;
+
+    # my $unmapped_read_1 = join("\t",$identifier.'/1','77','*','0','0','*','*','0','0',$sequence_1,$quality_value_1);
+    # my $unmapped_read_2 = join("\t",$identifier.'/2','141','*','0','0','*','*','0','0',$sequence_2,$quality_value_2);
+    # print "$unmapped_read_1\n";
+    # print "$unmapped_read_2\n";
+    if ($unmapped){
+      return 1; # => exits to next sequence pair, and prints it out to _unmapped_reads_1.txt and _unmapped_read_2.txt if '--unmapped' was specified
+    }
+    else{
+      return 0;
+    }
+  }
+
+  #######################################################################################################################################################
+
+  ### If the sequence pair was not rejected so far we are now looking if there is a unique best alignment among all alignment instances. If there is only one
+  ### single best position we are going to store the alignment information in the $meth_call variable. If there are multiple hits with the same (highest)
+  ### alignment score we are discarding the sequence pair altogether.
+  ### For end-to-end alignments the maximum alignment score is 0, each mismatch receives a penalty of 6, and each gap receives penalties for opening (5)
+  ### and extending (3 per bp) the gap.
+
+  #######################################################################################################################################################
+
+  ### Declaring an empty hash reference which will store all information we need for the methylation call
+  my $methylation_call_params; # hash reference
+  my $sequence_pair_fails = 0; # using $sequence_pair_fails as a 'memory' if a sequence could not be aligned uniquely (set to 1 then)
+
+  ### print contents of %alignments for debugging
+  ##  if (scalar keys %alignments >= 1){
+  #     print "\n******\n";
+  #     foreach my $alignment_location (sort {$a cmp $b} keys %alignments){
+  #       print "Loc:  $alignment_location\n";
+  #       print "ID:      $alignments{$alignment_location}->{seq_id}\n";
+  #       print "AS_1:    $alignments{$alignment_location}->{alignment_score_1}\n";
+  #       print "AS_2:    $alignments{$alignment_location}->{alignment_score_2}\n";
+  #       print "Seq_1:   $alignments{$alignment_location}->{bowtie_sequence_1}\n";
+  #       print "Seq_2:   $alignments{$alignment_location}->{bowtie_sequence_2}\n";
+  #       print "Index    $alignments{$alignment_location}->{index}\n";
+  #       print "Chr:     $alignments{$alignment_location}->{chromosome}\n";
+  #       print "Pos_1:   $alignments{$alignment_location}->{position_1}\n";
+  #       print "Pos_2:   $alignments{$alignment_location}->{position_2}\n";
+  #       print "CIGAR_1: $alignments{$alignment_location}->{CIGAR_1}\n";
+  #       print "CIGAR_2: $alignments{$alignment_location}->{CIGAR_2}\n";
+  #       print "MD_1:    $alignments{$alignment_location}->{mismatch_info_1}\n";
+  #       print "MD_2:    $alignments{$alignment_location}->{mismatch_info_2}\n";
+  #       print "Flag 1:  $alignments{$alignment_location}->{flag_1}\n";
+  #       print "Flag 2:  $alignments{$alignment_location}->{flag_2}\n";
+  #    }
+  #    print "\n******\n";
+  #  }
+
+  ### if there is only 1 entry in the %alignments hash we accept it as the best alignment
+  if (scalar keys %alignments == 1){
+    for my $unique_best_alignment (keys %alignments){
+      $methylation_call_params->{$identifier}->{bowtie_sequence_1} = $alignments{$unique_best_alignment}->{bowtie_sequence_1};
+      $methylation_call_params->{$identifier}->{bowtie_sequence_2} = $alignments{$unique_best_alignment}->{bowtie_sequence_2};
+      $methylation_call_params->{$identifier}->{chromosome}        = $alignments{$unique_best_alignment}->{chromosome};
+      $methylation_call_params->{$identifier}->{position_1}        = $alignments{$unique_best_alignment}->{position_1};
+      $methylation_call_params->{$identifier}->{position_2}        = $alignments{$unique_best_alignment}->{position_2};
+      $methylation_call_params->{$identifier}->{index}             = $alignments{$unique_best_alignment}->{index};
+      $methylation_call_params->{$identifier}->{alignment_score_1} = $alignments{$unique_best_alignment}->{alignment_score_1};
+      $methylation_call_params->{$identifier}->{alignment_score_2} = $alignments{$unique_best_alignment}->{alignment_score_2};
+      $methylation_call_params->{$identifier}->{sum_of_alignment_scores} = $alignments{$unique_best_alignment}->{sum_of_alignment_scores};
+      $methylation_call_params->{$identifier}->{mismatch_info_1}   = $alignments{$unique_best_alignment}->{mismatch_info_1};
+      $methylation_call_params->{$identifier}->{mismatch_info_2}   = $alignments{$unique_best_alignment}->{mismatch_info_2};
+      $methylation_call_params->{$identifier}->{CIGAR_1}           = $alignments{$unique_best_alignment}->{CIGAR_1};
+      $methylation_call_params->{$identifier}->{CIGAR_2}           = $alignments{$unique_best_alignment}->{CIGAR_2};
+      $methylation_call_params->{$identifier}->{flag_1}            = $alignments{$unique_best_alignment}->{flag_1};
+      $methylation_call_params->{$identifier}->{flag_2}            = $alignments{$unique_best_alignment}->{flag_2};
+    }
+  }
+
+  ### otherwise we are going to find out if there is a best match among the multiple alignments, or whether there are 2 or more equally good alignments (in which case
+  ### we boot the sequence pair altogether)
+  elsif (scalar keys %alignments >= 2  and scalar keys %alignments <= 4){
+    my $best_sum_of_alignment_scores;
+    my $best_alignment_location;
+    foreach my $alignment_location (sort {$alignments{$b}->{sum_of_alignment_scores} <=> $alignments{$a}->{sum_of_alignment_scores}} keys %alignments){
+      # print "$alignments{$alignment_location}->{sum_of_alignment_scores}\n";
+      unless (defined $best_sum_of_alignment_scores){
+	$best_sum_of_alignment_scores = $alignments{$alignment_location}->{sum_of_alignment_scores};
+	$best_alignment_location = $alignment_location;
+	# print "setting best alignment score to: $best_sum_of_alignment_scores\n";
+      }
+      else{
+	### if the second best alignment has the same sum of alignment scores as the first one, the sequence pair will get booted
+	if ($alignments{$alignment_location}->{sum_of_alignment_scores} == $best_sum_of_alignment_scores){
+	  # warn "Same sum of alignment scores for 2 different alignments, the sequence pair will get booted!\n";
+	  $sequence_pair_fails = 1;
+	  last; # exiting since we know that the sequence has ambiguous alignments
+	}
+	### else we are going to store the best alignment for further processing
+	else{
+	  $methylation_call_params->{$identifier}->{bowtie_sequence_1} = $alignments{$best_alignment_location}->{bowtie_sequence_1};
+	  $methylation_call_params->{$identifier}->{bowtie_sequence_2} = $alignments{$best_alignment_location}->{bowtie_sequence_2};
+	  $methylation_call_params->{$identifier}->{chromosome}        = $alignments{$best_alignment_location}->{chromosome};
+	  $methylation_call_params->{$identifier}->{position_1}        = $alignments{$best_alignment_location}->{position_1};
+	  $methylation_call_params->{$identifier}->{position_2}        = $alignments{$best_alignment_location}->{position_2};
+	  $methylation_call_params->{$identifier}->{index}             = $alignments{$best_alignment_location}->{index};
+	  $methylation_call_params->{$identifier}->{alignment_score_1} = $alignments{$best_alignment_location}->{alignment_score_1};
+	  $methylation_call_params->{$identifier}->{alignment_score_2} = $alignments{$best_alignment_location}->{alignment_score_2};
+	  $methylation_call_params->{$identifier}->{sum_of_alignment_scores} = $alignments{$best_alignment_location}->{sum_of_alignment_scores};
+	  $methylation_call_params->{$identifier}->{mismatch_info_1}   = $alignments{$best_alignment_location}->{mismatch_info_1};
+	  $methylation_call_params->{$identifier}->{mismatch_info_2}   = $alignments{$best_alignment_location}->{mismatch_info_2};
+	  $methylation_call_params->{$identifier}->{CIGAR_1}           = $alignments{$best_alignment_location}->{CIGAR_1};
+	  $methylation_call_params->{$identifier}->{CIGAR_2}           = $alignments{$best_alignment_location}->{CIGAR_2};
+	  $methylation_call_params->{$identifier}->{flag_1}            = $alignments{$best_alignment_location}->{flag_1};
+	  $methylation_call_params->{$identifier}->{flag_2}            = $alignments{$best_alignment_location}->{flag_2};
+	  last; # exiting since the sequence produced a unique best alignment
+	}
+      }
+    }
+  }
+  else{
+    die "There are too many potential hits for this sequence pair (1-4 expected, but found: '",scalar keys %alignments,"')\n";;
+  }
+
+  ### skipping the sequence completely if there were multiple alignments with the same best sum of alignment scores at different positions
+  if ($sequence_pair_fails == 1){
+    $counting{unsuitable_sequence_count}++;
+
+    ### report that the sequence has multiple hits with bitwise flag 256. We can print the sequence to the result file straight away and skip everything else
+    # my $ambiguous_read_1 = join("\t",$identifier.'/1','256','*','0','0','*','*','0','0',$sequence_1,$quality_value_1);
+    # my $ambiguous_read_2 = join("\t",$identifier.'/2','256','*','0','0','*','*','0','0',$sequence_2,$quality_value_2);
+    # print "$ambiguous_read_1\n";
+    # print "$ambiguous_read_2\n";
+
+    if ($ambiguous){
+      return 2; # => exits to next sequence pair, and prints it out (in FastQ format) to _ambiguous_reads_1.txt and _ambiguous_reads_2.txt if '--ambiguous' was specified
+      }
+    elsif ($unmapped){
+      return 1; # => exits to next sequence pair, and prints it out (in FastQ format) to _unmapped_reads_1.txt and _unmapped_reads_2.txt if '--unmapped' but not '--ambiguous' was specified
+    }
+    else{
+      return 0; # => exits to next sequence pair (default)
+    }
+  }
+
+  ### --DIRECTIONAL
+  ### If the option --directional has been specified the user wants to consider only alignments to the original top strand or the original bottom strand. We will therefore
+  ### discard all alignments to strands complementary to the original strands, as they should not exist in reality due to the library preparation protocol
+  if ($directional){
+    if ( ($methylation_call_params->{$identifier}->{index} == 1) or ($methylation_call_params->{$identifier}->{index} == 2) ){
+      #    warn "Alignment rejected! (index was: $methylation_call_params->{$identifier}->{index})\n";
+      $counting{alignments_rejected_count}++;
+      return 0;
+    }
+  }
+
+  ### If the sequence pair has not been rejected so far it does have a unique best alignment
+  $counting{unique_best_alignment_count}++;
+  extract_corresponding_genomic_sequence_paired_ends_bowtie2($identifier,$methylation_call_params);
+
+  ### check to see if the genomic sequences we extracted has the same length as the observed sequences +2, and only then we perform the methylation call
+  if (length($methylation_call_params->{$identifier}->{unmodified_genomic_sequence_1}) != length($sequence_1)+2){
+    warn "Chromosomal sequence could not be extracted for\t$identifier\t$methylation_call_params->{$identifier}->{chromosome}\t$methylation_call_params->{$identifier}->{start_seq_1}\n";
+    $counting{genomic_sequence_could_not_be_extracted_count}++;
+    return 0;
+  }
+  if (length($methylation_call_params->{$identifier}->{unmodified_genomic_sequence_2}) != length($sequence_2)+2){
+    warn "Chromosomal sequence could not be extracted for\t$identifier\t$methylation_call_params->{$identifier}->{chromosome}\t$methylation_call_params->{$identifier}->{start_seq_2}\n";
+    $counting{genomic_sequence_could_not_be_extracted_count}++;
+    return 0;
+  }
+
+  ### now we are set to perform the actual methylation call
+  $methylation_call_params->{$identifier}->{methylation_call_1} = methylation_call($identifier,$sequence_1,$methylation_call_params->{$identifier}->{unmodified_genomic_sequence_1},$methylation_call_params->{$identifier}->{read_conversion_1});
+  $methylation_call_params->{$identifier}->{methylation_call_2} = methylation_call($identifier,$sequence_2,$methylation_call_params->{$identifier}->{unmodified_genomic_sequence_2},$methylation_call_params->{$identifier}->{read_conversion_2});
+  # print "$methylation_call_params->{$identifier}->{read_conversion_2}\n";
+  # print "  $sequence_2\n";
+  # print "$methylation_call_params->{$identifier}->{unmodified_genomic_sequence_2}\n";
+  # print "  $methylation_call_params->{$identifier}->{methylation_call_2}\n";
+
+  print_bisulfite_mapping_results_paired_ends_bowtie2($identifier,$sequence_1,$sequence_2,$methylation_call_params,$quality_value_1,$quality_value_2);
+  return 0; ## otherwise 1 will be returned by default, which would print the sequence pair to unmapped_1 and _2
+}
+
+###
+
+sub decide_whether_paired_end_alignment_is_valid{
+  my ($index,$identifier) = @_;
+  my ($id_1,$strand_1,$mapped_chromosome_1,$position_1,$bowtie_sequence_1,$mismatch_info_1) = (split (/\t/,$fhs[$index]->{last_line_1},-1))[0,1,2,3,4,7];
+  my ($id_2,$strand_2,$mapped_chromosome_2,$position_2,$bowtie_sequence_2,$mismatch_info_2) = (split (/\t/,$fhs[$index]->{last_line_2},-1))[0,1,2,3,4,7];
+  chomp $mismatch_info_1;
+  chomp $mismatch_info_2;
+  my $seq_id_1 = $id_1;
+  my $seq_id_2 = $id_2;
+  $seq_id_1 =~ s/\/1$//; # removing the read /1
+  $seq_id_2 =~ s/\/1$//; # removing the read /1
+
+  ### ensuring that the current entry is the correct sequence
+  if ($seq_id_1 eq $identifier or $seq_id_2 eq $identifier){
+    ### checking the orientation of the alignment. We need to discriminate between 8 different conditions, however only 4 of them are theoretically
+    ### sensible alignments
+    my $orientation = ensure_sensical_alignment_orientation_paired_ends ($index,$id_1,$strand_1,$id_2,$strand_2);
+    ### If the orientation was correct can we move on
+    if ($orientation == 1){
+      return 1; ### 1st possibility for A SEQUENCE-PAIR TO PASS
+    }
+    ### If the alignment was in the wrong orientation we need to read in two new lines
+    elsif($orientation == 0){
+      my $newline_1 = $fhs[$index]->{fh}->getline();
+      my $newline_2 = $fhs[$index]->{fh}->getline();
+      if ($newline_1 and $newline_2){
+	### extract detailed information about the alignment again (from $newline_1 and $newline_2 this time)
+	($id_1,$strand_1) = (split (/\t/,$newline_1))[0,1];
+	($id_2,$strand_2) = (split (/\t/,$newline_2))[0,1];
+
+	my $seqid;
+	$seq_id_1 = $id_1;
+	$seq_id_2 = $id_2;
+	# we need to capture the first read (ending on /1)
+	if ($seq_id_1 =~ s/\/1$//){ # removing the read /1 tag
+	  $seqid = $seq_id_1;
+	}
+	elsif ($seq_id_2 =~ s/\/1$//){ # removing the read /1 tag
+	  $seqid = $seq_id_2;
+	}
+	else{
+	  die "One of the two reads needs to end on /1!!";
+	}
+	
+	### ensuring that the next entry is still the correct sequence
+	if ($seq_id_1 eq $identifier or  $seq_id_2 eq $identifier){
+	  ### checking orientation again
+	  $orientation = ensure_sensical_alignment_orientation_paired_ends ($index,$id_1,$strand_1,$id_2,$strand_2);
+	  ### If the orientation was correct can we move on
+	  if ($orientation == 1){
+	    ### Writing the current sequence to last_line_1 and last_line_2
+	    $fhs[$index]->{last_seq_id} = $seqid;
+	    $fhs[$index]->{last_line_1} = $newline_1;
+	    $fhs[$index]->{last_line_2} = $newline_2;
+	    return 1; ### 2nd possibility for a SEQUENCE-PAIR TO PASS
+	  }
+	  ### If the alignment was in the wrong orientation again we need to read in yet another 2 new lines and store them in @fhs (this must be
+	  ### the next entry)
+	  elsif ($orientation == 0){
+	    $newline_1 = $fhs[$index]->{fh}->getline();
+	    $newline_2 = $fhs[$index]->{fh}->getline();
+	    if ($newline_1 and $newline_2){
+	      ($seq_id_1) = split (/\t/,$newline_1);
+	      ($seq_id_2) = split (/\t/,$newline_2);
+	
+	      $seqid = '';
+	      if ($seq_id_1 =~ s/\/1$//){ # removing the read /1 tag
+		$seqid = $seq_id_1;
+	      }
+	      elsif ($seq_id_2 =~ s/\/1$//){ # removing the read /1 tag
+		$seqid = $seq_id_2;
+	      }
+	      else{
+		die "One of the two reads needs to end on /1!!";
+	      }
+	
+	      ### check if the next 2 lines still have the same seq ID (must not happen), and if not overwrite the current seq-ID and bowtie output with
+	      ### the same fields of the just read next entry
+	      die "Same seq ID 3 or more times in a row!(should be 2 max)" if ($seqid eq $identifier);
+	      $fhs[$index]->{last_seq_id} = $seqid;
+	      $fhs[$index]->{last_line_1} = $newline_1;
+	      $fhs[$index]->{last_line_2} = $newline_2;
+	      return 0; # not processing anything this round as the alignment currently stored in last_line_1 and _2 was in the wrong orientation
+	    }
+	    else {
+	      ### assigning undef to last_seq_id and last_line (end of bowtie output)
+	      $fhs[$index]->{last_seq_id} = undef;
+	      $fhs[$index]->{last_line_1} = undef;
+	      $fhs[$index]->{last_line_2} = undef;
+	      return 0; # not processing anything as the alignment currently stored in last_line_1 and _2 was in the wrong orientation
+	    }
+	  }
+	  else{
+	    die "The orientation of the alignment must be either correct or incorrect\n";
+	  }
+	}
+	### the sequence pair we just read in is already the next sequence pair to be analysed -> store it in @fhs
+	else{
+	  $fhs[$index]->{last_seq_id} = $seqid;
+	  $fhs[$index]->{last_line_1} = $newline_1;
+	  $fhs[$index]->{last_line_2} = $newline_2;
+	  return 0; # processing the new alignment result only in the next round
+	}
+      }
+      else {
+	# assigning undef to last_seq_id and both last_lines (end of bowtie output)
+	$fhs[$index]->{last_seq_id} = undef;
+	$fhs[$index]->{last_line_1} = undef;
+	$fhs[$index]->{last_line_2} = undef;
+	return 0; # not processing anything as the alignment currently stored in last_line_1 and _2 was in the wrong orientation
+      }
+    }
+    else{
+      die "The orientation of the alignment must be either correct or incorrect\n";
+    }
+  }
+  ### the sequence pair stored in @fhs as last_line_1 and last_line_2 is already the next sequence pair to be analysed -> analyse next round
+  else{
+    return 0;
+  }
+}
+
+### EXTRACT GENOMIC SEQUENCE | BOWTIE 1 | PAIRED-END
+
+sub extract_corresponding_genomic_sequence_paired_ends {
+  my ($sequence_identifier,$methylation_call_params) = @_;
+  ### A bisulfite sequence pair for 1 location in the genome can theoretically be on any of the 4 possible converted strands. We are also giving the
+  ### sequence a 'memory' of the conversion we are expecting which we will need later for the methylation call
+  my $alignment_read_1;
+  my $alignment_read_2;
+  my $read_conversion_info_1;
+  my $read_conversion_info_2;
+  my $genome_conversion;
+
+  ### Now extracting the same sequence from the mouse genomic sequence, +2 extra bases at oone of the ends so that we can also make a CpG, CHG or CHH methylation call
+  ### if the C happens to be at the first or last position of the actually observed sequence
+  my $non_bisulfite_sequence_1;
+  my $non_bisulfite_sequence_2;
+
+  ### all alignments reported by bowtie have the + alignment first and the - alignment as the second one irrespective of whether read 1 or read 2 was
+  ### the + alignment. We however always read in sequences read 1 then read 2, so if read 2 is the + alignment we need to swap the extracted genomic
+  ### sequences around!
+  ### results from CT converted read 1 plus GA converted read 2 vs. CT converted genome (+/- orientation alignments are reported only)
+  if ($methylation_call_params->{$sequence_identifier}->{index} == 0){
+    ### [Index 0, sequence originated from (converted) forward strand]
+    $counting{CT_GA_CT_count}++;
+    $alignment_read_1 = '+';
+    $alignment_read_2 = '-';
+    $read_conversion_info_1 = 'CT';
+    $read_conversion_info_2 = 'GA';
+    $genome_conversion = 'CT';
+    ### SEQUENCE 1 (this is always the forward hit, in this case it is read 1)
+    ### for hits on the forward strand we need to capture 2 extra bases at the 3' end
+
+    $non_bisulfite_sequence_1 = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$methylation_call_params->{$sequence_identifier}->{start_seq_1},length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence_1})+2); ##CHH change
+
+    ### SEQUENCE 2 (this will always be on the reverse strand, in this case it is read 2)
+    ### As the second conversion is GA we need to capture 1 base 3', so that it is a 5' base after reverse complementation
+    if (length($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}}) > $methylation_call_params->{$sequence_identifier}->{start_seq_2}+length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence_2})+1){ ## CHH change to +1
+
+      $non_bisulfite_sequence_2 = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},($methylation_call_params->{$sequence_identifier}->{start_seq_2}),length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence_2})+2);
+      ### the reverse strand sequence needs to be reverse complemented
+      $non_bisulfite_sequence_2 = reverse_complement($non_bisulfite_sequence_2);
+    }
+    else{
+       $non_bisulfite_sequence_2 = '';
+     }
+   }
+
+   ### results from GA converted read 1 plus CT converted read 2 vs. GA converted genome (+/- orientation alignments are reported only)
+  elsif ($methylation_call_params->{$sequence_identifier}->{index} == 1){
+    ### [Index 1, sequence originated from complementary to (converted) reverse strand]
+    $counting{GA_CT_GA_count}++;
+    $alignment_read_1 = '+';
+    $alignment_read_2 = '-';
+    $read_conversion_info_1 = 'GA';
+    $read_conversion_info_2 = 'CT';
+    $genome_conversion = 'GA';
+
+    ### SEQUENCE 1 (this is always the forward hit, in this case it is read 1)
+    ### as we need to make the methylation call for the base 5' of the first base (GA conversion!) we need to capture 2 extra bases at the 5' end
+    if ($methylation_call_params->{$sequence_identifier}->{start_seq_1}-1 > 0){ ## CHH change to -1
+      $non_bisulfite_sequence_1 = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$methylation_call_params->{$sequence_identifier}->{start_seq_1}-2,length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence_1})+2); ### CHH change to -2/+2
+    }
+    else{
+      $non_bisulfite_sequence_1 = '';
+    }
+
+    ### SEQUENCE 2 (this will always be on the reverse strand, in this case it is read 2)
+    ### As we are doing a CT comparison for the reverse strand we are taking 2 bases extra at the 5' end, so it is a 3' base after reverse complementation
+    $non_bisulfite_sequence_2 = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},($methylation_call_params->{$sequence_identifier}->{start_seq_2})-2,length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence_2})+2); ### CHH change to -2/+2
+    ### the reverse strand sequence needs to be reverse complemented
+    $non_bisulfite_sequence_2 = reverse_complement($non_bisulfite_sequence_2);
+  }
+
+  ### results from GA converted read 1 plus CT converted read 2 vs. CT converted genome (-/+ orientation alignments are reported only)
+  elsif ($methylation_call_params->{$sequence_identifier}->{index} == 2){
+    ### [Index 2, sequence originated from the complementary to (converted) forward strand]
+    $counting{GA_CT_CT_count}++;
+    $alignment_read_1 = '-';
+    $alignment_read_2 = '+';
+    $read_conversion_info_1 = 'GA';
+    $read_conversion_info_2 = 'CT';
+    $genome_conversion = 'CT';
+
+    ### Here we switch the sequence information round!!  non_bisulfite_sequence_1 will later correspond to the read 1!!!!
+    ### SEQUENCE 1 (this is always the forward hit, in this case it is READ 2), read 1 is in - orientation on the reverse strand
+    ### As read 1 is GA converted we need to capture 2 extra 3' bases which will be 2 extra 5' base after reverse complementation
+    $non_bisulfite_sequence_1 = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},($methylation_call_params->{$sequence_identifier}->{start_seq_2}),length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence_2})+2); ### CHH change to +2
+    ### the reverse strand sequence needs to be reverse complemented
+    $non_bisulfite_sequence_1 = reverse_complement($non_bisulfite_sequence_1);
+
+    ### SEQUENCE 2 (this will always be on the reverse strand, in this case it is READ 1)
+    ### non_bisulfite_sequence_2 will later correspond to the read 2!!!!
+    ### Read 2 is CT converted so we need to capture 2 extra 3' bases
+    if (length($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}}) > ($methylation_call_params->{$sequence_identifier}->{start_seq_1})+length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence_1})+1){ ## CHH change to +1
+      $non_bisulfite_sequence_2 = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},($methylation_call_params->{$sequence_identifier}->{start_seq_1}),length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence_1})+2); ## CHH changed from +1 to +2
+    }
+    else{
+      $non_bisulfite_sequence_2 = '';
+    }
+  }
+
+  ### results from CT converted read 1 plus GA converted read 2 vs. GA converted genome (-/+ orientation alignments are reported only)
+  elsif ($methylation_call_params->{$sequence_identifier}->{index} == 3){
+    ### [Index 3, sequence originated from the (converted) reverse strand]
+    $counting{CT_GA_GA_count}++;
+    $alignment_read_1 = '-';
+    $alignment_read_2 = '+';
+    $read_conversion_info_1 = 'CT';
+    $read_conversion_info_2 = 'GA';
+    $genome_conversion = 'GA';
+
+    ### Here we switch the sequence information round!!  non_bisulfite_sequence_1 will later correspond to the read 1!!!!
+    ### SEQUENCE 1 (this is always the forward hit, in this case it is READ 2), read 1 is in - orientation on the reverse strand
+    ### As read 1 is CT converted we need to capture 2 extra 5' bases which will be 2 extra 3' base after reverse complementation
+    if ( ($methylation_call_params->{$sequence_identifier}->{start_seq_2}-1) > 0){ ## CHH changed to -1
+      $non_bisulfite_sequence_1 = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},($methylation_call_params->{$sequence_identifier}->{start_seq_2})-2,length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence_2})+2); ### CHH changed to -2/+2
+      ### the reverse strand sequence needs to be reverse complemented
+      $non_bisulfite_sequence_1 = reverse_complement($non_bisulfite_sequence_1);
+    }
+    else{
+      $non_bisulfite_sequence_1 = '';
+    }
+
+    ### SEQUENCE 2 (this will always be on the reverse strand, in this case it is READ 1)
+    ### non_bisulfite_sequence_2 will later correspond to the read 2!!!!
+    ### Read 2 is GA converted so we need to capture 2 extra 5' bases
+    $non_bisulfite_sequence_2 = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},($methylation_call_params->{$sequence_identifier}->{start_seq_1})-2,length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence_1})+2); ### CHH changed to -2/+2
+  }
+  else{
+    die "Too many bowtie result filehandles\n";
+  }
+  ### the alignment_strand information is needed to determine which strand of the genomic sequence we are comparing the read against,
+  ### the read_conversion information is needed to know whether we are looking for C->T or G->A substitutions
+
+  $methylation_call_params->{$sequence_identifier}->{alignment_read_1} = $alignment_read_1;
+  $methylation_call_params->{$sequence_identifier}->{alignment_read_2} = $alignment_read_2;
+  $methylation_call_params->{$sequence_identifier}->{genome_conversion} = $genome_conversion;
+  $methylation_call_params->{$sequence_identifier}->{read_conversion_1} = $read_conversion_info_1;
+  $methylation_call_params->{$sequence_identifier}->{read_conversion_2} = $read_conversion_info_2;
+  $methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_1} = $non_bisulfite_sequence_1;
+  $methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_2} = $non_bisulfite_sequence_2;
+}
+
+### EXTRACT GENOMIC SEQUENCE BOWTIE 2 | PAIRED-END
+
+sub extract_corresponding_genomic_sequence_paired_ends_bowtie2{
+  my ($sequence_identifier,$methylation_call_params) = @_;
+  ### A bisulfite sequence pair for 1 location in the genome can theoretically be on any of the 4 possible converted strands. We are also giving the
+  ### sequence a 'memory' of the conversion we are expecting which we will need later for the methylation call
+
+  my $cigar_1 = $methylation_call_params->{$sequence_identifier}->{CIGAR_1};
+  my $cigar_2 = $methylation_call_params->{$sequence_identifier}->{CIGAR_2};
+  my $flag_1 =  $methylation_call_params->{$sequence_identifier}->{flag_1};
+  my $flag_2 =  $methylation_call_params->{$sequence_identifier}->{flag_2};
+#  print "$cigar_1\t$cigar_2\t$flag_1\t$flag_2\n";
+  ### We are now extracting the corresponding genomic sequence, +2 extra bases at the end (or start) so that we can also make a CpG methylation call and
+  ### in addition make differential calls for Cs in CHG or CHH context if the C happens to be at the last (or first)  position of the actually observed sequence
+
+  ### the alignment_strand information is needed to determine which strand of the genomic sequence we are comparing the read against,
+  ### the read_conversion information is needed to know whether we are looking for C->T or G->A substitutions
+  my $alignment_read_1;
+  my $alignment_read_2;
+  my $read_conversion_info_1;
+  my $read_conversion_info_2;
+  my $genome_conversion;
+
+  ### Now extracting the same sequence from the mouse genomic sequence, +2 extra bases at one of the ends so that we can also make a CpG, CHG or CHH methylation call
+  ### if the C happens to be at the last position of the actually observed sequence
+  my $non_bisulfite_sequence_1 = '';
+  my $non_bisulfite_sequence_2 = '';
+
+  ### Positions in SAM format are 1 based, so we need to subract 1 when getting substrings
+  my $pos_1 = $methylation_call_params->{$sequence_identifier}->{position_1}-1;
+  my $pos_2 = $methylation_call_params->{$sequence_identifier}->{position_2}-1;
+
+  # parsing CIGAR 1 string
+  my @len_1 = split (/\D+/,$cigar_1); # storing the length per operation
+  my @ops_1 = split (/\d+/,$cigar_1); # storing the operation
+  shift @ops_1; # remove the empty first element
+  die "CIGAR 1 string contained a non-matching number of lengths and operations\n" unless (scalar @len_1 == scalar @ops_1);
+  # parsing CIGAR 2 string
+  my @len_2 = split (/\D+/,$cigar_2); # storing the length per operation
+  my @ops_2 = split (/\d+/,$cigar_2); # storing the operation
+  shift @ops_2; # remove the empty first element
+  die "CIGAR 2 string contained a non-matching number of lengths and operations\n" unless (scalar @len_2 == scalar @ops_2);
+
+  my $indels_1 = 0; # addiong these to the hemming distance value (needed for the NM field in the final SAM output
+  my $indels_2 = 0;
+  
+  ### Extracting read 1 genomic sequence ###
+
+  # extracting 2 additional bp at the 5' end (read 1)
+  if ( ($methylation_call_params->{$sequence_identifier}->{index} == 1) or ($methylation_call_params->{$sequence_identifier}->{index} == 3) ){
+    # checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+    unless ( ($pos_1-2) > 0){# exiting with en empty genomic sequence otherwise
+      $methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_1} = $non_bisulfite_sequence_1;
+      return;
+    }
+    $non_bisulfite_sequence_1 .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos_1-2,2);
+  }
+
+  foreach (0..$#len_1){
+    if ($ops_1[$_] eq 'M'){
+      # extracting genomic sequence
+      $non_bisulfite_sequence_1 .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos_1,$len_1[$_]);
+      #   warn "$non_bisulfite_sequence_1\n";
+      # adjusting position
+      $pos_1 += $len_1[$_];
+    }
+    elsif ($ops_1[$_] eq 'I'){ # insertion in the read sequence
+      # we simply add padding Ns instead of finding genomic sequence. This will not be used to infer methylation calls
+      $non_bisulfite_sequence_1 .= 'N' x $len_1[$_];
+      #    warn "$non_bisulfite_sequence_1\n";
+      # position doesn't need adjusting
+	  $indels_1 += $len_1[$_]; # adding to $indels_1 to determine the hemming distance (= single base mismatches, insertions or deletions) for the SAM output
+    }
+    elsif ($ops_1[$_] eq 'D'){ # deletion in the read sequence
+      # we do not add any genomic sequence but only adjust the position
+      #     warn "Just adjusting the position by: ",$len_1[$_],"bp\n";
+      $pos_1 += $len_1[$_];
+	  $indels_1 += $len_1[$_]; # adding to $indels_1 to determine the hemming distance (= single base mismatches, insertions or deletions) for the SAM output
+    }
+    elsif($cigar_1 =~ tr/[NSHPX=]//){ # if these (for standard mapping) illegal characters exist we die
+      die "The CIGAR 1 string contained illegal CIGAR operations in addition to 'M', 'I' and 'D': $cigar_1\n";
+    }
+    else{
+      die "The CIGAR 1 string contained undefined CIGAR operations in addition to 'M', 'I' and 'D': $cigar_1\n";
+    }
+  }
+
+  ### 3' end of read 1
+  if ( ($methylation_call_params->{$sequence_identifier}->{index} == 0) or ($methylation_call_params->{$sequence_identifier}->{index} == 2) ){
+    ## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+    unless (length($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}}) >= $pos_1+2){# exiting with en empty genomic sequence otherwise
+      $methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_1} = $non_bisulfite_sequence_1;
+      return;
+    }
+    $non_bisulfite_sequence_1 .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos_1,2);
+  }
+
+
+  ### Extracting read 2 genomic sequence ###
+
+  ### 5' end of read 2
+  if ( ($methylation_call_params->{$sequence_identifier}->{index} == 1) or ($methylation_call_params->{$sequence_identifier}->{index} == 3) ){
+    ## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+    unless ( ($pos_2-2) >= 0){# exiting with en empty genomic sequence otherwise
+      $methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_2} = $non_bisulfite_sequence_2;
+      return;
+    }
+    $non_bisulfite_sequence_2 .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos_2-2,2);
+  }
+
+  foreach (0..$#len_2){
+    if ($ops_2[$_] eq 'M'){
+      # extracting genomic sequence
+      $non_bisulfite_sequence_2 .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos_2,$len_2[$_]);
+      # warn "$non_bisulfite_sequence_2\n";
+      # adjusting position
+      $pos_2 += $len_2[$_];
+    }
+    elsif ($ops_2[$_] eq 'I'){ # insertion in the read sequence
+      # we simply add padding Ns instead of finding genomic sequence. This will not be used to infer methylation calls
+      $non_bisulfite_sequence_2 .= 'N' x $len_2[$_];
+      # warn "$non_bisulfite_sequence_2\n";
+      # position doesn't need adjusting
+	  $indels_2 += $len_2[$_]; # adding to $indels_1 to determine the hemming distance (= single base mismatches, insertions or deletions) for the SAM output
+    }
+    elsif ($ops_2[$_] eq 'D'){ # deletion in the read sequence
+      # we do not add any genomic sequence but only adjust the position
+      # warn "Just adjusting the position by: ",$len_2[$_],"bp\n";
+      $pos_2 += $len_2[$_];
+	  $indels_2 += $len_2[$_]; # adding to $indels_1 to determine the hemming distance (= single base mismatches, insertions or deletions) for the SAM output
+    }
+    elsif($cigar_2 =~ tr/[NSHPX=]//){ # if these (for standard mapping) illegal characters exist we die
+      die "The CIGAR 2 string contained illegal CIGAR operations in addition to 'M', 'I' and 'D': $cigar_2\n";
+    }
+    else{
+      die "The CIGAR 2 string contained undefined CIGAR operations in addition to 'M', 'I' and 'D': $cigar_2\n";
+    }
+  }
+
+  ### 3' end of read 2
+  if ( ($methylation_call_params->{$sequence_identifier}->{index} == 0) or ($methylation_call_params->{$sequence_identifier}->{index} == 2) ){
+    ## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+    unless (length($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}}) >= $pos_2+2){# exiting with en empty genomic sequence otherwise
+      $methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_2} = $non_bisulfite_sequence_2;
+      return;
+    }
+    $non_bisulfite_sequence_2 .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos_2,2);
+  }
+
+  ### all paired-end alignments reported by Bowtie 2 have the Read 1 alignment first and the Read 2 alignment as the second one irrespective of whether read 1 or read 2 was
+  ### the + alignment. We also read in sequences read 1 then read 2 so they should correspond perfectly
+
+  ### results from CT converted read 1 plus GA converted read 2 vs. CT converted genome (+/- orientation alignments are reported only)
+  if ($methylation_call_params->{$sequence_identifier}->{index} == 0){
+    ### [Index 0, sequence originated from (converted) forward strand]
+    $counting{CT_GA_CT_count}++;
+    $alignment_read_1 = '+';
+    $alignment_read_2 = '-';
+    $read_conversion_info_1 = 'CT';
+    $read_conversion_info_2 = 'GA';
+    $genome_conversion = 'CT';
+    ### Read 1 is always the forward hit
+    ### Read 2 is will always on the reverse strand, so it needs to be reverse complemented
+    $non_bisulfite_sequence_2 = reverse_complement($non_bisulfite_sequence_2);
+  }
+
+  ### results from GA converted read 1 plus CT converted read 2 vs. GA converted genome (+/- orientation alignments are reported only)
+  elsif ($methylation_call_params->{$sequence_identifier}->{index} == 1){
+    ### [Index 1, sequence originated from complementary to (converted) bottom strand]
+    $counting{GA_CT_GA_count}++;
+    $alignment_read_1 = '+';
+    $alignment_read_2 = '-';
+    $read_conversion_info_1 = 'GA';
+    $read_conversion_info_2 = 'CT';
+    $genome_conversion = 'GA';
+    ### Read 1 is always the forward hit
+    ### Read 2 is will always on the reverse strand, so it needs to be reverse complemented
+    $non_bisulfite_sequence_2 = reverse_complement($non_bisulfite_sequence_2);
+  }
+
+  ### results from GA converted read 1 plus CT converted read 2 vs. CT converted genome (-/+ orientation alignments are reported only)
+  elsif ($methylation_call_params->{$sequence_identifier}->{index} == 2){
+    ### [Index 2, sequence originated from the complementary to (converted) top strand]
+    $counting{GA_CT_CT_count}++;
+    $alignment_read_1 = '-';
+    $alignment_read_2 = '+';
+    $read_conversion_info_1 = 'GA';
+    $read_conversion_info_2 = 'CT';
+    $genome_conversion = 'CT';
+
+    ### Read 1 (the reverse strand) genomic sequence needs to be reverse complemented
+    $non_bisulfite_sequence_1 = reverse_complement($non_bisulfite_sequence_1);
+  }
+
+  ### results from CT converted read 1 plus GA converted read 2 vs. GA converted genome (-/+ orientation alignments are reported only)
+  elsif ($methylation_call_params->{$sequence_identifier}->{index} == 3){
+    ### [Index 3, sequence originated from the (converted) reverse strand]
+    $counting{CT_GA_GA_count}++;
+    $alignment_read_1 = '-';
+    $alignment_read_2 = '+';
+    $read_conversion_info_1 = 'CT';
+    $read_conversion_info_2 = 'GA';
+    $genome_conversion = 'GA';
+    ### Read 1 (the reverse strand) genomic sequence needs to be reverse complemented
+    $non_bisulfite_sequence_1 = reverse_complement($non_bisulfite_sequence_1);
+  }
+  else{
+    die "Too many bowtie result filehandles\n";
+  }
+  ### the alignment_strand information is needed to determine which strand of the genomic sequence we are comparing the read against,
+  ### the read_conversion information is needed to know whether we are looking for C->T or G->A substitutions
+
+  $methylation_call_params->{$sequence_identifier}->{alignment_read_1} = $alignment_read_1;
+  $methylation_call_params->{$sequence_identifier}->{alignment_read_2} = $alignment_read_2;
+  $methylation_call_params->{$sequence_identifier}->{genome_conversion} = $genome_conversion;
+  $methylation_call_params->{$sequence_identifier}->{read_conversion_1} = $read_conversion_info_1;
+  $methylation_call_params->{$sequence_identifier}->{read_conversion_2} = $read_conversion_info_2;
+  $methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_1} = $non_bisulfite_sequence_1;
+  $methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_2} = $non_bisulfite_sequence_2;
+  ## the end position of a read is stored in $pos
+  $methylation_call_params->{$sequence_identifier}->{end_position_1} = $pos_1;
+  $methylation_call_params->{$sequence_identifier}->{end_position_2} = $pos_2;
+  $methylation_call_params->{$sequence_identifier}->{indels_1} = $indels_1;
+  $methylation_call_params->{$sequence_identifier}->{indels_2} = $indels_2;
+}
+
+##########################################
+### PRINT SINGLE END RESULTS: Bowtie 1 ###
+##########################################
+
+sub print_bisulfite_mapping_result_single_end{
+  my ($identifier,$sequence,$methylation_call_params,$quality_value)= @_;
+
+  ### we will output the FastQ quality in Sanger encoding (Phred 33 scale)
+  if ($phred64){
+    $quality_value = convert_phred64_quals_to_phred33($quality_value);
+  }
+  elsif ($solexa){
+    $quality_value = convert_solexa_quals_to_phred33($quality_value);
+  }
+
+  ### We will add +1 bp to the starting position of single-end reads, as Bowtie 1 reports the index and not the bp position. 
+  $methylation_call_params->{$identifier}->{position} += 1;
+	
+  ### writing every uniquely mapped read and its methylation call to the output file
+  if ($vanilla){
+    my $bowtie1_output = join("\t",$identifier,$methylation_call_params->{$identifier}->{alignment_strand},$methylation_call_params->{$identifier}->{chromosome},$methylation_call_params->{$identifier}->{position},$methylation_call_params->{$identifier}->{end_position},$sequence,$methylation_call_params->{$identifier}->{unmodified_genomic_sequence},$methylation_call_params->{$identifier}->{methylation_call},$methylation_call_params->{$identifier}->{read_conversion},$methylation_call_params->{$identifier}->{genome_conversion},$quality_value);
+    print OUT "$bowtie1_output\n";
+  }
+  else{ # SAM output, default since Bismark v1.0.0
+    single_end_SAM_output($identifier,$sequence,$methylation_call_params,$quality_value); # at the end of the script
+  }
+}
+
+##########################################
+### PRINT SINGLE END RESULTS: Bowtie 2 ###
+##########################################
+
+sub print_bisulfite_mapping_result_single_end_bowtie2{
+  my ($identifier,$sequence,$methylation_call_params,$quality_value)= @_;
+
+  ### we will output the FastQ quality in Sanger encoding (Phred 33 scale)
+  if ($phred64){
+    $quality_value = convert_phred64_quals_to_phred33($quality_value);
+  }
+  elsif ($solexa){
+    $quality_value = convert_solexa_quals_to_phred33($quality_value);
+  }
+
+  ### writing every mapped read and its methylation call to the SAM output file (unmapped and ambiguous reads were already printed)
+	single_end_SAM_output($identifier,$sequence,$methylation_call_params,$quality_value); # at the end of the script
+}
+
+##########################################
+### PRINT PAIRED END ESULTS: Bowtie 1  ###
+##########################################
+
+sub print_bisulfite_mapping_results_paired_ends{
+  my ($identifier,$sequence_1,$sequence_2,$methylation_call_params,$quality_value_1,$quality_value_2)= @_;
+
+  ### we will output the FastQ quality in Sanger encoding (Phred 33 scale)
+  if ($phred64){
+    $quality_value_1 = convert_phred64_quals_to_phred33($quality_value_1);
+    $quality_value_2 = convert_phred64_quals_to_phred33($quality_value_2);
+  }
+  elsif ($solexa){
+    $quality_value_1 = convert_solexa_quals_to_phred33($quality_value_1);
+    $quality_value_2 = convert_solexa_quals_to_phred33($quality_value_2);
+  }
+
+  ### We will add +1 bp to the start position of paired-end reads, as Bowtie 1 reports the index and not the bp position. (End position is already 1-based)
+  $methylation_call_params->{$identifier}->{start_seq_1} += 1;
+
+  ### writing every single aligned read and its methylation call to the output file
+  if ($vanilla){	
+    my $bowtie1_output_paired_end = join("\t",$identifier,$methylation_call_params->{$identifier}->{alignment_read_1},$methylation_call_params->{$identifier}->{chromosome},$methylation_call_params->{$identifier}->{start_seq_1},$methylation_call_params->{$identifier}->{alignment_end},$sequence_1,$methylation_call_params->{$identifier}->{unmodified_genomic_sequence_1},$methylation_call_params->{$identifier}->{methylation_call_1},$sequence_2,$methylation_call_params->{$identifier}->{unmodified_genomic_sequence_2},$methylation_call_params->{$identifier}->{methylation_call_2},$methylation_call_params->{$identifier}->{read_conversion_1},$methylation_call_params->{$identifier}->{genome_conversion},$quality_value_1,$quality_value_2);
+    print OUT "$bowtie1_output_paired_end\n";
+  }
+  else{ # SAM output, default since Bismark v1.0.0
+    paired_end_SAM_output($identifier,$sequence_1,$sequence_2,$methylation_call_params,$quality_value_1,$quality_value_2); # at the end of the script
+  }
+
+}
+
+##########################################
+### PRINT PAIRED END ESULTS: Bowtie 2  ###
+##########################################
+
+sub print_bisulfite_mapping_results_paired_ends_bowtie2{
+  my ($identifier,$sequence_1,$sequence_2,$methylation_call_params,$quality_value_1,$quality_value_2)= @_;
+
+  ### we will output the FastQ quality in Sanger encoding (Phred 33 scale)
+  if ($phred64){
+    $quality_value_1 = convert_phred64_quals_to_phred33($quality_value_1);
+    $quality_value_2 = convert_phred64_quals_to_phred33($quality_value_2);
+  }
+  elsif ($solexa){
+    $quality_value_1 = convert_solexa_quals_to_phred33($quality_value_1);
+    $quality_value_2 = convert_solexa_quals_to_phred33($quality_value_2);
+  }
+
+  ### writing every single aligned read and its methylation call to the output file  (unmapped and ambiguous reads were already printed)
+  paired_end_SAM_output($identifier,$sequence_1,$sequence_2,$methylation_call_params,$quality_value_1,$quality_value_2); # at the end of the script
+
+}
+	
+	
+sub convert_phred64_quals_to_phred33{
+
+  my $qual = shift;
+  my @quals = split (//,$qual);
+  my @new_quals;
+
+  foreach my $index (0..$#quals){
+    my $phred_score = convert_phred64_quality_string_into_phred_score ($quals[$index]);
+    my $phred33_quality_string = convert_phred_score_into_phred33_quality_string ($phred_score);
+    $new_quals[$index] = $phred33_quality_string;
+  }
+
+  my $phred33_quality = join ("",@new_quals);
+  return $phred33_quality;
+}
+
+sub convert_solexa_quals_to_phred33{
+
+  my $qual = shift;
+  my @quals = split (//,$qual);
+  my @new_quals;
+
+  foreach my $index (0..$#quals){
+    my $phred_score = convert_solexa_pre1_3_quality_string_into_phred_score ($quals[$index]);
+    my $phred33_quality_string = convert_phred_score_into_phred33_quality_string ($phred_score);
+    $new_quals[$index] = $phred33_quality_string;
+  }
+
+  my $phred33_quality = join ("",@new_quals);
+  return $phred33_quality;
+}
+
+sub convert_phred_score_into_phred33_quality_string{
+  my $qual = shift;
+  $qual = chr($qual+33);
+  return $qual;
+}
+
+sub convert_phred64_quality_string_into_phred_score{
+  my $string = shift;
+  my $qual = ord($string)-64;
+  return $qual;
+}
+
+sub convert_solexa_pre1_3_quality_string_into_phred_score{
+  ### We will just use 59 as the offset here as all Phred Scores between 10 and 40 look exactly the same, there is only a minute difference for values between 0 and 10
+  my $string = shift;
+  my $qual = ord($string)-59;
+  return $qual;
+}
+
+
+sub extract_corresponding_genomic_sequence_single_end {
+  my ($sequence_identifier,$methylation_call_params) = @_;
+  ### A bisulfite sequence for 1 location in the genome can theoretically be any of the 4 possible converted strands. We are also giving the
+  ### sequence a 'memory' of the conversion we are expecting which we will need later for the methylation call
+
+  ### the alignment_strand information is needed to determine which strand of the genomic sequence we are comparing the read against,
+  ### the read_conversion information is needed to know whether we are looking for C->T or G->A substitutions
+  my $alignment_strand;
+  my $read_conversion_info;
+  my $genome_conversion;
+  ### Also extracting the corresponding genomic sequence, +2 extra bases at the end so that we can also make a CpG methylation call and
+  ### in addition make differential calls for Cs non-CpG context, which will now be divided into CHG and CHH methylation,
+  ### if the C happens to be at the last position of the actually observed sequence
+  my $non_bisulfite_sequence;
+  ### depending on the conversion we want to make need to capture 1 extra base at the 3' end
+
+  ### results from CT converted read vs. CT converted genome (+ orientation alignments are reported only)
+  if ($methylation_call_params->{$sequence_identifier}->{index} == 0){
+    ### [Index 0, sequence originated from (converted) forward strand]
+    $counting{CT_CT_count}++;
+    $alignment_strand = '+';
+    $read_conversion_info = 'CT';
+    $genome_conversion = 'CT';
+
+    ## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+    if (length($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}}) > $methylation_call_params->{$sequence_identifier}->{position}+length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence})+1){ ## CHH changed to +1
+      ### + 2 extra base at the 3' end
+      $non_bisulfite_sequence = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$methylation_call_params->{$sequence_identifier}->{position},length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence})+2); ## CHH changed to +2
+    }
+    else{
+      $non_bisulfite_sequence = '';
+    }
+  }
+
+  ### results from CT converted reads vs. GA converted genome (- orientation alignments are reported only)
+  elsif ($methylation_call_params->{$sequence_identifier}->{index} == 1){
+    ### [Index 1, sequence originated from (converted) reverse strand]
+    $counting{CT_GA_count}++;
+    $alignment_strand = '-';
+    $read_conversion_info = 'CT';
+    $genome_conversion = 'GA';
+
+    ## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+    if ($methylation_call_params->{$sequence_identifier}->{position}-2 >= 0){ ## CHH changed to -2 # 02 02 2012 Changed this to >= from >
+      ### Extracting 2 extra 5' bases on forward strand which will become 2 extra 3' bases after reverse complementation
+      $non_bisulfite_sequence = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$methylation_call_params->{$sequence_identifier}->{position}-2,length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence})+2); ## CHH changed to -2/+2
+      ## reverse complement!
+      $non_bisulfite_sequence = reverse_complement($non_bisulfite_sequence);
+    }
+    else{
+      $non_bisulfite_sequence = '';
+    }
+  }
+
+  ### results from GA converted reads vs. CT converted genome (- orientation alignments are reported only)
+  elsif ($methylation_call_params->{$sequence_identifier}->{index} == 2){
+    ### [Index 2, sequence originated from complementary to (converted) forward strand]
+    $counting{GA_CT_count}++;
+    $alignment_strand = '-';
+    $read_conversion_info = 'GA';
+    $genome_conversion = 'CT';
+
+    ### +2 extra bases on the forward strand 3', which will become 2 extra 5' bases after reverse complementation
+    ## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+    if (length($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}}) > $methylation_call_params->{$sequence_identifier}->{position}+length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence})+1){ ## changed to +1 on 02 02 2012
+      $non_bisulfite_sequence = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$methylation_call_params->{$sequence_identifier}->{position},length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence})+2); ## CHH changed to +2
+      ## reverse complement!
+      $non_bisulfite_sequence = reverse_complement($non_bisulfite_sequence);
+    }
+    else{
+      $non_bisulfite_sequence = '';
+    }
+  }
+
+  ### results from GA converted reads vs. GA converted genome (+ orientation alignments are reported only)
+  elsif ($methylation_call_params->{$sequence_identifier}->{index} == 3){
+    ### [Index 3, sequence originated from complementary to (converted) reverse strand]
+    $counting{GA_GA_count}++;
+    $alignment_strand = '+';
+    $read_conversion_info = 'GA';
+    $genome_conversion = 'GA';
+
+    ## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+    if ($methylation_call_params->{$sequence_identifier}->{position}-2 >= 0){ ## CHH changed to +2 # 02 02 2012 Changed this to >= from >
+      ### +2 extra base at the 5' end as we are nominally checking the converted reverse strand
+      $non_bisulfite_sequence = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$methylation_call_params->{$sequence_identifier}->{position}-2,length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence})+2); ## CHH changed to -2/+2
+    }
+    else{
+      $non_bisulfite_sequence = '';
+    }
+  }
+  else{
+    die "Too many bowtie result filehandles\n";
+  }
+
+  $methylation_call_params->{$sequence_identifier}->{alignment_strand} = $alignment_strand;
+  $methylation_call_params->{$sequence_identifier}->{read_conversion} = $read_conversion_info;
+  $methylation_call_params->{$sequence_identifier}->{genome_conversion} = $genome_conversion;
+  $methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence} = $non_bisulfite_sequence;
+
+  ### at this point we can also determine the end position of a read
+  $methylation_call_params->{$sequence_identifier}->{end_position} = $methylation_call_params->{$sequence_identifier}->{position}+length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence});
+}
+
+
+sub extract_corresponding_genomic_sequence_single_end_bowtie2{
+  my ($sequence_identifier,$methylation_call_params) = @_;
+
+  my $MD_tag = $methylation_call_params->{$sequence_identifier}->{mismatch_info};
+  my $cigar = $methylation_call_params->{$sequence_identifier}->{CIGAR};
+
+  ### A bisulfite sequence for 1 location in the genome can theoretically be any of the 4 possible converted strands. We are also giving the
+  ### sequence a 'memory' of the conversion we are expecting which we will need later for the methylation call
+
+  ### the alignment_strand information is needed to determine which strand of the genomic sequence we are comparing the read against,
+  ### the read_conversion information is needed to know whether we are looking for C->T or G->A substitutions
+  my $alignment_strand;
+  my $read_conversion_info;
+  my $genome_conversion;
+  ### We are now extracting the corresponding genomic sequence, +2 extra bases at the end (or start) so that we can also make a CpG methylation call and
+  ### in addition make differential calls for Cs in CHG or CHH context if the C happens to be at the last (or first)  position of the actually observed sequence
+  my $non_bisulfite_sequence = '';
+
+  ### Positions in SAM format are 1 based, so we need to subract 1 when getting substrings
+  my $pos = $methylation_call_params->{$sequence_identifier}->{position}-1;
+
+  # parsing CIGAR string
+  my @len = split (/\D+/,$cigar); # storing the length per operation
+  my @ops = split (/\d+/,$cigar); # storing the operation
+  shift @ops; # remove the empty first element
+  die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);
+
+  ### If the sequence aligns best as CT converted reads vs. GA converted genome (OB, index 1) or GA converted reads vs. GA converted genome (CTOB, index 3)
+  if ( ($methylation_call_params->{$sequence_identifier}->{index} == 1) or ($methylation_call_params->{$sequence_identifier}->{index} == 3) ){
+    ## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+    unless ( ($pos-2) >= 0){ # exiting with en empty genomic sequence otherwise
+      $methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence} = $non_bisulfite_sequence;
+      return;
+    }
+    $non_bisulfite_sequence .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos-2,2);
+  }
+  my $indels = 0;	
+
+  foreach (0..$#len){
+    if ($ops[$_] eq 'M'){
+      #extracting genomic sequence
+      $non_bisulfite_sequence .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos,$len[$_]);
+      # adjusting position
+      $pos += $len[$_];
+    }
+    elsif ($ops[$_] eq 'I'){ # insertion in the read sequence
+      # we simply add padding Ns instead of finding genomic sequence. This will not be used to infer methylation calls
+      $non_bisulfite_sequence .= 'N' x $len[$_];
+      # warn "$non_bisulfite_sequence\n";
+      # position doesn't need to be adjusting
+      $indels += $len[$_]; # adding this to $indels so we can determine the hemming distance for the SAM output (= single-base substitutions (mismatches, insertions, deletions)
+    }
+    elsif ($ops[$_] eq 'D'){ # deletion in the read sequence
+      # we do not add any genomic sequence but only adjust the position
+      $pos += $len[$_];
+      $indels += $len[$_]; # adding this to $indels so we can determine the hemming distance for the SAM output (= single-base substitutions (mismatches, insertions, deletions)
+    }
+    elsif($cigar =~ tr/[NSHPX=]//){ # if these (for standard mapping) illegal characters exist we die
+      die "The CIGAR string contained illegal CIGAR operations in addition to 'M', 'I' and 'D': $cigar\n";
+    }
+    else{
+      die "The CIGAR string contained undefined CIGAR operations in addition to 'M', 'I' and 'D': $cigar\n";
+    }
+  }
+
+  ### If the sequence aligns best as CT converted reads vs. CT converted genome (OT, index 0) or GA converted reads vs. CT converted genome (CTOT, index 2)
+  if ( ($methylation_call_params->{$sequence_identifier}->{index} == 0) or ($methylation_call_params->{$sequence_identifier}->{index} == 2) ){
+    ## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+    unless (length($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}}) >= $pos+2){ # exiting with en empty genomic sequence otherwise
+      $methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence} = $non_bisulfite_sequence;
+      return;
+    }
+    $non_bisulfite_sequence .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos,2);
+    # print "$methylation_call_params->{$sequence_identifier}->{bowtie_sequence}\n$non_bisulfite_sequence\n";
+  }
+
+
+
+  ### results from CT converted read vs. CT converted genome (+ orientation alignments are reported only)
+  if ($methylation_call_params->{$sequence_identifier}->{index} == 0){
+    ### [Index 0, sequence originated from (converted) forward strand]
+    $counting{CT_CT_count}++;
+    $alignment_strand = '+';
+    $read_conversion_info = 'CT';
+    $genome_conversion = 'CT';
+  }
+
+  ### results from CT converted reads vs. GA converted genome (- orientation alignments are reported only)
+  elsif ($methylation_call_params->{$sequence_identifier}->{index} == 1){
+    ### [Index 1, sequence originated from (converted) reverse strand]
+    $counting{CT_GA_count}++;
+    $alignment_strand = '-';
+    $read_conversion_info = 'CT';
+    $genome_conversion = 'GA';
+
+    ### reverse complement!
+    $non_bisulfite_sequence = reverse_complement($non_bisulfite_sequence);
+  }
+
+  ### results from GA converted reads vs. CT converted genome (- orientation alignments are reported only)
+  elsif ($methylation_call_params->{$sequence_identifier}->{index} == 2){
+    ### [Index 2, sequence originated from complementary to (converted) forward strand]
+    $counting{GA_CT_count}++;
+    $alignment_strand = '-';
+    $read_conversion_info = 'GA';
+    $genome_conversion = 'CT';
+
+    ### reverse complement!
+    $non_bisulfite_sequence = reverse_complement($non_bisulfite_sequence);
+  }
+
+  ### results from GA converted reads vs. GA converted genome (+ orientation alignments are reported only)
+  elsif ($methylation_call_params->{$sequence_identifier}->{index} == 3){
+    ### [Index 3, sequence originated from complementary to (converted) reverse strand]
+    $counting{GA_GA_count}++;
+    $alignment_strand = '+';
+    $read_conversion_info = 'GA';
+    $genome_conversion = 'GA';
+
+  }
+  else{
+    die "Too many Bowtie 2 result filehandles\n";
+  }
+
+  $methylation_call_params->{$sequence_identifier}->{alignment_strand} = $alignment_strand;
+  $methylation_call_params->{$sequence_identifier}->{read_conversion} = $read_conversion_info;
+  $methylation_call_params->{$sequence_identifier}->{genome_conversion} = $genome_conversion;
+  $methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence} = $non_bisulfite_sequence;
+
+  ### the end position of a read is stored in $pos
+  $methylation_call_params->{$sequence_identifier}->{end_position} = $pos;
+  $methylation_call_params->{$sequence_identifier}->{indels} = $indels;
+}
+
+### METHYLATION CALL
+
+sub methylation_call{
+  my ($identifier,$sequence_actually_observed,$genomic_sequence,$read_conversion) = @_;
+  ### splitting both the actually observed sequence and the genomic sequence up into single bases so we can compare them one by one
+  my @seq = split(//,$sequence_actually_observed);
+  my @genomic = split(//,$genomic_sequence);
+  #  print join ("\n",$identifier,$sequence_actually_observed,$genomic_sequence,$read_conversion),"\n";
+  ### Creating a match-string with different characters for non-cytosine bases (disregarding mismatches here), methyl-Cs or non-methyl Cs in either
+  ### CpG, CHH or CHG context
+
+  #################################################################
+  ### . for bases not involving cytosines                       ###
+  ### X for methylated C in CHG context (was protected)         ###
+  ### x for not methylated C in CHG context (was converted)     ###
+  ### H for methylated C in CHH context (was protected)         ###
+  ### h for not methylated C in CHH context (was converted)     ###
+  ### Z for methylated C in CpG context (was protected)         ###
+  ### z for not methylated C in CpG context (was converted)     ###
+  #################################################################
+
+  my @match =();
+  warn "length of \@seq: ",scalar @seq,"\tlength of \@genomic: ",scalar @genomic,"\n" unless (scalar @seq eq (scalar@genomic-2)); ## CHH changed to -2
+  my $methyl_CHH_count = 0;
+  my $methyl_CHG_count = 0;
+  my $methyl_CpG_count = 0;
+  my $unmethylated_CHH_count = 0;
+  my $unmethylated_CHG_count = 0;
+  my $unmethylated_CpG_count = 0;
+
+  if ($read_conversion eq 'CT'){
+    for my $index (0..$#seq) {
+      if ($seq[$index] eq $genomic[$index]) {
+	### The residue can only be a C if it was not converted to T, i.e. protected my methylation
+	if ($genomic[$index] eq 'C') {
+	  ### If the residue is a C we want to know if it was in CpG context or in any other context
+	  my $downstream_base = $genomic[$index+1];
+	
+	  if ($downstream_base eq 'G'){
+	    ++$methyl_CpG_count;
+	    push @match,'Z'; # protected C, methylated, in CpG context
+	  }
+	
+	  else {
+	    ### C in not in CpG-context, determining the second downstream base context
+	    my $second_downstream_base = $genomic[$index+2];
+	
+	    if ($second_downstream_base eq 'G'){
+	      ++$methyl_CHG_count;
+	      push @match,'X'; # protected C, methylated, in CHG context
+	    }
+	    else{
+	      ++$methyl_CHH_count;
+	      push @match,'H'; # protected C, methylated, in CHH context
+	    }
+	  }
+	}
+	else {
+	  push @match, '.';
+	}
+      }
+      elsif ($seq[$index] ne $genomic[$index]) {
+	### for the methylation call we are only interested in mismatches involving cytosines (in the genomic sequence) which were converted into Ts
+	### in the actually observed sequence
+	if ($genomic[$index] eq 'C' and $seq[$index] eq 'T') {
+	  ### If the residue was converted to T we want to know if it was in CpG, CHG or CHH  context
+	  my $downstream_base = $genomic[$index+1];
+	
+	  if ($downstream_base eq 'G'){
+	    ++$unmethylated_CpG_count;
+	    push @match,'z'; # converted C, not methylated, in CpG context
+	  }
+
+	  else{
+	    ### C in not in CpG-context, determining the second downstream base context
+	    my $second_downstream_base = $genomic[$index+2];
+	
+	    if ($second_downstream_base eq 'G'){
+	      ++$unmethylated_CHG_count;
+	      push @match,'x'; # converted C, not methylated, in CHG context
+	    }
+	    else{
+	      ++$unmethylated_CHH_count;
+	      push @match,'h'; # converted C, not methylated, in CHH context
+	    }
+	  }
+	}
+	### all other mismatches are not of interest for a methylation call
+	else {
+	  push @match,'.';
+	}
+      }
+      else{
+	die "There can be only 2 possibilities\n";
+      }
+    }
+  }
+  elsif ($read_conversion eq 'GA'){
+    # print join ("\n",'***',$identifier,$sequence_actually_observed,$genomic_sequence,$read_conversion,'***'),"\n";
+
+    for my $index (0..$#seq) {
+      if ($seq[$index] eq $genomic[$index+2]) {
+	### The residue can only be a G if the C on the other strand was not converted to T, i.e. protected my methylation
+	if ($genomic[$index+2] eq 'G') {
+	  ### If the residue is a G we want to know if the C on the other strand was in CpG, CHG or CHH context, therefore we need
+	  ### to look if the base upstream is a C
+
+	  my $upstream_base = $genomic[$index+1];
+	
+	  if ($upstream_base eq 'C'){
+	    ++$methyl_CpG_count;
+	    push @match,'Z'; # protected C on opposing strand, methylated, in CpG context
+	  }
+
+	  else{
+	    ### C in not in CpG-context, determining the second upstream base context
+	    my $second_upstream_base = $genomic[$index];
+	
+	    if ($second_upstream_base eq 'C'){
+	      ++$methyl_CHG_count;
+	      push @match,'X'; # protected C on opposing strand, methylated, in CHG context
+	    }
+	    else{
+	      ++$methyl_CHH_count;
+	      push @match,'H'; # protected C on opposing strand, methylated, in CHH context
+	    }
+	  }
+	}
+	else{
+	  push @match, '.';
+	}
+      }
+      elsif ($seq[$index] ne $genomic[$index+2]) {
+	### for the methylation call we are only interested in mismatches involving cytosines (in the genomic sequence) which were converted to Ts
+	### on the opposing strand, so G to A conversions in the actually observed sequence
+	if ($genomic[$index+2] eq 'G' and $seq[$index] eq 'A') {
+	  ### If the C residue on the opposing strand was converted to T then we will see an A in the currently observed sequence. We want to know if
+	  ### the C on the opposing strand was it was in CpG, CHG or CHH context, therefore we need to look one (or two) bases upstream!
+
+	  my $upstream_base = $genomic[$index+1];
+
+	  if ($upstream_base eq 'C'){
+	    ++$unmethylated_CpG_count;
+	    push @match,'z'; # converted C on opposing strand, not methylated, in CpG context
+	  }
+
+	  else{
+	    ### C in not in CpG-context, determining the second upstream base context
+	    my $second_upstream_base = $genomic[$index];
+	
+	    if ($second_upstream_base eq 'C'){
+	      ++$unmethylated_CHG_count;
+	      push @match,'x'; # converted C on opposing strand, not methylated, in CHG context
+	    }
+	    else{
+	      ++$unmethylated_CHH_count;
+	      push @match,'h'; # converted C on opposing strand, not methylated, in CHH context
+	    }
+	  }
+	}
+	### all other mismatches are not of interest for a methylation call
+	else {
+	  push @match,'.';
+	}
+      }
+      else{
+	die "There can be only 2 possibilities\n";
+      }
+    }
+  }
+  else{
+    die "Strand conversion info is required to perform a methylation call\n";
+  }
+
+  my $methylation_call = join ("",@match);
+
+  $counting{total_meCHH_count} += $methyl_CHH_count;
+  $counting{total_meCHG_count} += $methyl_CHG_count;
+  $counting{total_meCpG_count} += $methyl_CpG_count;
+  $counting{total_unmethylated_CHH_count} += $unmethylated_CHH_count;
+  $counting{total_unmethylated_CHG_count} += $unmethylated_CHG_count;
+  $counting{total_unmethylated_CpG_count} += $unmethylated_CpG_count;
+
+  # print "\n$sequence_actually_observed\n$genomic_sequence\n",@match,"\n$read_conversion\n\n";
+  return $methylation_call;
+}
+
+sub read_genome_into_memory{
+    ## working directoy
+    my $cwd = shift;
+    ## reading in and storing the specified genome in the %chromosomes hash
+    chdir ($genome_folder) or die "Can't move to $genome_folder: $!";
+    print "Now reading in and storing sequence information of the genome specified in: $genome_folder\n\n";
+
+    my @chromosome_filenames =  <*.fa>;
+
+    ### if there aren't any genomic files with the extension .fa we will look for files with the extension .fasta
+    unless (@chromosome_filenames){
+      @chromosome_filenames =  <*.fasta>;
+    }
+
+    unless (@chromosome_filenames){
+      die "The specified genome folder $genome_folder does not contain any sequence files in FastA format (with .fa or .fasta file extensions)\n";
+    }
+
+    foreach my $chromosome_filename (@chromosome_filenames){
+
+	open (CHR_IN,$chromosome_filename) or die "Failed to read from sequence file $chromosome_filename $!\n";
+	### first line needs to be a fastA header
+	my $first_line = <CHR_IN>;
+	chomp $first_line;
+	$first_line =~ s/\r//;
+	
+	### Extracting chromosome name from the FastA header
+	my $chromosome_name = extract_chromosome_name($first_line);
+	
+	my $sequence;
+	while (<CHR_IN>){
+	    chomp;
+	    $_ =~ s/\r//;
+	    if ($_ =~ /^>/){
+		### storing the previous chromosome in the %chromosomes hash, only relevant for Multi-Fasta-Files (MFA)
+		if (exists $chromosomes{$chromosome_name}){
+		    print "chr $chromosome_name (",length $sequence ," bp)\n";
+		    die "Exiting because chromosome name already exists. Please make sure all chromosomes have a unique name!\n";
+		}
+		else {
+		    if (length($sequence) == 0){
+			warn "Chromosome $chromosome_name in the multi-fasta file $chromosome_filename did not contain any sequence information!\n";
+		    }
+		    print "chr $chromosome_name (",length $sequence ," bp)\n";
+		    $chromosomes{$chromosome_name} = $sequence;
+		}
+		### resetting the sequence variable
+		$sequence = '';
+		### setting new chromosome name
+		$chromosome_name = extract_chromosome_name($_);
+	    }
+	    else{
+		$sequence .= uc$_;
+	    }
+	}
+	
+	if (exists $chromosomes{$chromosome_name}){
+	    print "chr $chromosome_name (",length $sequence ," bp)\t";
+	    die "Exiting because chromosome name already exists. Please make sure all chromosomes have a unique name.\n";
+	}
+	else{
+	    if (length($sequence) == 0){
+		warn "Chromosome $chromosome_name in the file $chromosome_filename did not contain any sequence information!\n";
+	    }
+	    print "chr $chromosome_name (",length $sequence ," bp)\n";
+	    $chromosomes{$chromosome_name} = $sequence;
+	}
+    }
+    print "\n";
+    chdir $cwd or die "Failed to move to directory $cwd\n";
+}
+
+sub extract_chromosome_name {
+    ## Bowtie seems to extract the first string after the inition > in the FASTA file, so we are doing this as well
+    my $fasta_header = shift;
+    if ($fasta_header =~ s/^>//){
+	my ($chromosome_name) = split (/\s+/,$fasta_header);
+	return $chromosome_name;
+    }
+    else{
+	die "The specified chromosome ($fasta_header) file doesn't seem to be in FASTA format as required!\n";
+    }
+}
+
+sub reverse_complement{
+  my $sequence = shift;
+  $sequence =~ tr/CATG/GTAC/;
+  $sequence = reverse($sequence);
+  return $sequence;
+}
+
+sub biTransformFastAFiles {
+  my $file = shift;
+  my ($dir,$filename);
+  if ($file =~ /\//){
+    ($dir,$filename) = $file =~ m/(.*\/)(.*)$/;
+  }
+  else{
+    $filename = $file;
+  }
+
+  ### gzipped version of the infile
+  if ($file =~ /\.gz$/){
+    open (IN,"zcat $file |") or die "Couldn't read from file $file: $!\n";
+  }
+  else{
+    open (IN,$file) or die "Couldn't read from file $file: $!\n";
+  }
+
+  if ($skip){
+    warn "Skipping the first $skip reads from $file\n";
+    sleep (1);
+  }
+  if ($upto){
+    warn "Processing reads up to sequence no. $upto from $file\n";
+    sleep (1);
+  }
+
+  my $C_to_T_infile = my $G_to_A_infile = $filename;
+  $C_to_T_infile =~ s/$/_C_to_T.fa/;
+  $G_to_A_infile =~ s/$/_G_to_A.fa/;
+  print "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
+  open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
+
+  unless ($directional){
+    print "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
+    open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
+  }
+
+  my $count = 0;
+  while (1){
+    my $header = <IN>;
+    my $sequence= <IN>;
+    last unless ($header and $sequence);
+
+    $header = fix_IDs($header); # this is to avoid problems with truncated read ID when they contain white spaces
+
+    ++$count;
+
+    if ($skip){
+      next unless ($count > $skip);
+    }
+    if ($upto){
+      last if ($count > $upto);
+    }
+
+    $sequence = uc$sequence; # make input file case insensitive
+
+    # detecting if the input file contains tab stops, as this is likely to result in no alignments
+    if (index($header,"\t") != -1){
+      $seqID_contains_tabs++;
+    }
+
+    ### small check if the sequence seems to be in FastA format
+    die "Input file doesn't seem to be in FastA format at sequence $count: $!\n" unless ($header =~ /^>.*/);
+
+    my $sequence_C_to_T = $sequence;
+    $sequence_C_to_T =~ tr/C/T/;
+    print CTOT "$header$sequence_C_to_T";
+
+    unless ($directional){
+      my $sequence_G_to_A = $sequence;
+      $sequence_G_to_A =~ tr/G/A/;
+      print GTOA "$header$sequence_G_to_A";
+    }
+  }
+  if ($directional){
+    print "\nCreated C -> T converted versions of the FastA file $filename ($count sequences in total)\n\n";
+  }
+  else{
+    print "\nCreated C -> T as well as G -> A converted versions of the FastA file $filename ($count sequences in total)\n\n";
+  }
+  return ($C_to_T_infile,$G_to_A_infile);
+}
+
+sub biTransformFastAFiles_paired_end {
+  my ($file,$read_number) = @_;
+
+  my ($dir,$filename);
+  if ($file =~ /\//){
+    ($dir,$filename) = $file =~ m/(.*\/)(.*)$/;
+  }
+  else{
+    $filename = $file;
+  }
+
+  ### gzipped version of the infile
+  if ($file =~ /\.gz$/){
+    open (IN,"zcat $file |") or die "Couldn't read from file $file: $!\n";
+  }
+  else{
+    open (IN,$file) or die "Couldn't read from file $file: $!\n";
+  }
+
+  if ($skip){
+    warn "Skipping the first $skip reads from $file\n";
+    sleep (1);
+  }
+  if ($upto){
+    warn "Processing reads up to sequence no. $upto from $file\n";
+    sleep (1);
+  }
+
+  my $C_to_T_infile = my $G_to_A_infile = $filename;
+  $C_to_T_infile =~ s/$/_C_to_T.fa/;
+  $G_to_A_infile =~ s/$/_G_to_A.fa/;
+
+  if ($directional){
+    if ($read_number == 1){
+      print "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
+      open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
+    }
+    elsif ($read_number == 2){
+      print "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
+      open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
+    }
+    else{
+      die "Read number needs to be 1 or 2, but was: $read_number\n\n";
+    }
+  }
+  else{ # all four strand output
+    print "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
+    print "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
+    open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
+    open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
+  }
+
+  my $count = 0;
+
+  while (1){
+    my $header = <IN>;
+    my $sequence= <IN>;
+    last unless ($header and $sequence);
+
+    $header = fix_IDs($header); # this is to avoid problems with truncated read ID when they contain white spaces
+
+    ++$count;
+
+    if ($skip){
+      next unless ($count > $skip);
+    }
+    if ($upto){
+      last if ($count > $upto);
+    }
+
+    $sequence = uc$sequence; # make input file case insensitive
+
+    # detecting if the input file contains tab stops, as this is likely to result in no alignments
+    if (index($header,"\t") != -1){
+      $seqID_contains_tabs++;
+    }
+
+    ## small check if the sequence seems to be in FastA format
+    die "Input file doesn't seem to be in FastA format at sequence $count: $!\n" unless ($header =~ /^>.*/);
+
+    if ($read_number == 1){
+      if ($bowtie2){
+	$header =~ s/$/\/1\/1/;
+      }
+      else{	
+	$header =~ s/$/\/1/;
+      }
+    }
+    elsif ($read_number == 2){
+      if ($bowtie2){
+	$header =~ s/$/\/2\/2/;
+      }
+      else{
+	$header =~ s/$/\/2/;
+      }
+    }
+    else{
+      die "Read number needs to be 1 or 2, but was: $read_number\n\n";
+    }
+    my $sequence_C_to_T = my $sequence_G_to_A = $sequence;
+
+    $sequence_C_to_T =~ tr/C/T/;
+    $sequence_G_to_A =~ tr/G/A/;
+
+    if ($directional){
+
+      if ($read_number == 1){
+	print CTOT "$header$sequence_C_to_T";
+      }
+      elsif ($read_number == 2){
+	print GTOA "$header$sequence_G_to_A";
+      }
+    }
+    else{
+      print CTOT "$header$sequence_C_to_T";
+      print GTOA "$header$sequence_G_to_A";
+    }
+  }
+
+  if ($directional){
+    if ($read_number == 1){
+      print "\nCreated C -> T converted version of the FastA file $filename ($count sequences in total)\n\n";
+    }
+    else{
+      print "\nCreated G -> A converted version of the FastA file $filename ($count sequences in total)\n\n";
+    }
+  }
+  else{
+    print "\nCreated C -> T as well as G -> A converted versions of the FastA file $filename ($count sequences in total)\n\n";
+  }
+
+  if ($directional){
+    if ($read_number == 1){
+      return ($C_to_T_infile);
+    }
+    else{
+      return ($G_to_A_infile);
+    }
+  }
+  else{
+    return ($C_to_T_infile,$G_to_A_infile);
+  }
+}
+
+
+sub biTransformFastQFiles {
+  my $file = shift;
+  my ($dir,$filename);
+  if ($file =~ /\//){
+    ($dir,$filename) = $file =~ m/(.*\/)(.*)$/;
+  }
+  else{
+    $filename = $file;
+  }
+
+  ### gzipped version of the infile
+  if ($file =~ /\.gz$/){
+    open (IN,"zcat $file |") or die "Couldn't read from file $file: $!\n";
+  }
+  else{
+    open (IN,$file) or die "Couldn't read from file $file: $!\n";
+  }
+
+  if ($skip){
+    warn "Skipping the first $skip reads from $file\n";
+    sleep (1);
+  }
+  if ($upto){
+    warn "Processing reads up to sequence no. $upto from $file\n";
+    sleep (1);
+  }
+
+  my $C_to_T_infile = my $G_to_A_infile = $filename;
+
+  $C_to_T_infile =~ s/$/_C_to_T.fastq/;
+  print "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
+  open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
+
+  unless ($directional){
+    $G_to_A_infile =~ s/$/_G_to_A.fastq/;
+    print "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
+    open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
+  }
+
+  my $count = 0;
+  while (1){
+    my $identifier = <IN>;
+    my $sequence = <IN>;
+    my $identifier2 = <IN>;
+    my $quality_score = <IN>;
+    last unless ($identifier and $sequence and $identifier2 and $quality_score);
+
+    $identifier = fix_IDs($identifier); # this is to avoid problems with truncated read ID when they contain white spaces
+
+    ++$count;
+
+    if ($skip){
+      next unless ($count > $skip);
+    }
+    if ($upto){
+      last if ($count > $upto);
+    }
+
+    $sequence = uc$sequence; # make input file case insensitive
+
+    # detecting if the input file contains tab stops, as this is likely to result in no alignments
+    if (index($identifier,"\t") != -1){
+      $seqID_contains_tabs++;
+    }
+
+    ## small check if the sequence file appears to be a FastQ file
+    if ($identifier !~ /^\@/ or $identifier2 !~ /^\+/){
+      die "Input file doesn't seem to be in FastQ format at sequence $count: $!\n";
+    }
+
+    my $sequence_C_to_T = $sequence;
+    $sequence_C_to_T =~ tr/C/T/;
+    print CTOT join ('',$identifier,$sequence_C_to_T,$identifier2,$quality_score);
+
+    unless ($directional){
+      my $sequence_G_to_A = $sequence;
+      $sequence_G_to_A =~ tr/G/A/;
+      print GTOA join ('',$identifier,$sequence_G_to_A,$identifier2,$quality_score);
+    }
+  }
+
+  if ($directional){
+    print "\nCreated C -> T converted versions of the FastQ file $filename ($count sequences in total)\n\n";
+  }
+  else{
+    print "\nCreated C -> T as well as G -> A converted versions of the FastQ file $filename ($count sequences in total)\n\n";
+  }
+
+  return ($C_to_T_infile,$G_to_A_infile);
+}
+
+sub biTransformFastQFiles_paired_end {
+  my ($file,$read_number) = @_;
+  my ($dir,$filename);
+
+  if ($file =~ /\//){
+    ($dir,$filename) = $file =~ m/(.*\/)(.*)$/;
+  }
+  else{
+    $filename = $file;
+  }
+
+  ### gzipped version of the infile
+  if ($file =~ /\.gz$/){
+    open (IN,"zcat $file |") or die "Couldn't read from file $file: $!\n";
+  }
+  else{
+    open (IN,$file) or die "Couldn't read from file $file: $!\n";
+  }
+
+  if ($skip){
+    warn "Skipping the first $skip reads from $file\n";
+    sleep (1);
+  }
+  if ($upto){
+    warn "Processing reads up to sequence no. $upto from $file\n";
+    sleep (1);
+  }
+
+  my $C_to_T_infile = my $G_to_A_infile = $filename;
+  $C_to_T_infile =~ s/$/_C_to_T.fastq/;
+  $G_to_A_infile =~ s/$/_G_to_A.fastq/;
+
+  if ($directional){
+    if ($read_number == 1){
+      print "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
+      open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
+    }
+    elsif ($read_number == 2){
+      print "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
+      open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
+    }
+    else{
+      die "Read number needs to be 1 or 2, but was $read_number!\n\n";
+    }
+  }
+  else{
+    print "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
+    print "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
+    open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
+    open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
+  }
+
+  my $count = 0;
+
+  while (1){
+    my $identifier = <IN>;
+    my $sequence = <IN>;
+    my $identifier2 = <IN>;
+    my $quality_score = <IN>;
+    last unless ($identifier and $sequence and $identifier2 and $quality_score);
+    ++$count;
+
+    $identifier = fix_IDs($identifier); # this is to avoid problems with truncated read ID when they contain white spaces
+
+    if ($skip){
+      next unless ($count > $skip);
+    }
+    if ($upto){
+      last if ($count > $upto);
+    }
+
+    $sequence= uc$sequence; # make input file case insensitive
+
+    ## small check if the sequence file appears to be a FastQ file
+    if ($identifier !~ /^\@/ or $identifier2 !~ /^\+/){
+      die "Input file doesn't seem to be in FastQ format at sequence $count: $!\n";
+    }
+    my $sequence_C_to_T = my $sequence_G_to_A = $sequence;
+
+    if ($read_number == 1){
+      if ($bowtie2){
+	$identifier =~ s/$/\/1\/1/;
+      }
+      else{
+	$identifier =~ s/$/\/1/;
+      }
+    }
+    elsif ($read_number == 2){
+      if ($bowtie2){
+	$identifier =~ s/$/\/2\/2/;
+       }
+      else{
+	$identifier =~ s/$/\/2/;
+      }
+    }
+    else{
+      die "Read number needs to be 1 or 2\n";
+    }
+
+    $sequence_C_to_T =~ tr/C/T/;
+    $sequence_G_to_A =~ tr/G/A/;
+
+    if ($directional){
+      if ($read_number == 1){
+	print CTOT join ('',$identifier,$sequence_C_to_T,$identifier2,$quality_score);
+      }
+      else{
+	print GTOA join ('',$identifier,$sequence_G_to_A,$identifier2,$quality_score);
+      }
+    }
+    else{
+      print CTOT join ('',$identifier,$sequence_C_to_T,$identifier2,$quality_score);
+      print GTOA join ('',$identifier,$sequence_G_to_A,$identifier2,$quality_score);
+    }
+  }
+
+  if ($directional){
+    if ($read_number == 1){
+      print "\nCreated C -> T converted version of the FastQ file $filename ($count sequences in total)\n\n";
+    }
+    else{
+      print "\nCreated G -> A converted version of the FastQ file $filename ($count sequences in total)\n\n";
+    }
+  }
+  else{
+    print "\nCreated C -> T as well as G -> A converted versions of the FastQ file $filename ($count sequences in total)\n\n";
+  }
+  if ($directional){
+    if ($read_number == 1){
+      return ($C_to_T_infile);
+    }
+    else{
+      return ($G_to_A_infile);
+    }
+  }
+  else{
+    return ($C_to_T_infile,$G_to_A_infile);
+  }
+}
+
+sub fix_IDs{
+  my $id = shift;
+  $id =~ s/[ \t]+/_/g; # replace spaces or tabs with underscores
+  return $id;
+}
+
+sub ensure_sensical_alignment_orientation_single_end{
+  my $index = shift; # index number if the sequence produced an alignment
+  my $strand = shift;
+  ###  setting $orientation to 1 if it is in the correct orientation, and leave it 0 if it is the nonsensical wrong one
+  my $orientation = 0;
+  ##############################################################################################################
+  ## FORWARD converted read against FORWARD converted genome (read: C->T.....C->T..      genome:C->T.......C->T)
+  ## here we only want reads in the forward (+) orientation
+  if ($fhs[$index]->{name} eq 'CTreadCTgenome') {
+    ### if the alignment is (+) we count it, and return 1 for a correct orientation
+    if ($strand eq '+') {
+      $fhs[$index]->{seen}++;
+      $orientation = 1;
+      return $orientation;
+    }
+    ### if the orientation equals (-) the alignment is nonsensical
+    elsif ($strand eq '-') {
+      $fhs[$index]->{wrong_strand}++;
+      return $orientation;
+    }
+  }
+  ###############################################################################################################
+  ## FORWARD converted read against reverse converted genome (read: C->T.....C->T..      genome: G->A.......G->A)
+  ## here we only want reads in the forward (-) orientation
+  elsif ($fhs[$index]->{name} eq 'CTreadGAgenome') {
+    ### if the alignment is (-) we count it and return 1 for a correct orientation
+    if ($strand eq '-') {
+      $fhs[$index]->{seen}++;
+      $orientation = 1;
+      return $orientation;
+    }
+    ### if the orientation equals (+) the alignment is nonsensical
+    elsif ($strand eq '+') {
+      $fhs[$index]->{wrong_strand}++;
+      return $orientation;
+    }
+  }
+  ###############################################################################################################
+  ## Reverse converted read against FORWARD converted genome (read: G->A.....G->A..      genome: C->T.......C->T)
+  ## here we only want reads in the forward (-) orientation
+  elsif ($fhs[$index]->{name} eq 'GAreadCTgenome') {
+    ### if the alignment is (-) we count it and return 1 for a correct orientation
+    if ($strand eq '-') {
+      $fhs[$index]->{seen}++;
+      $orientation = 1;
+      return $orientation;
+    }
+    ### if the orientation equals (+) the alignment is nonsensical
+    elsif ($strand eq '+') {
+      $fhs[$index]->{wrong_strand}++;
+      return $orientation;
+    }
+  }
+  ###############################################################################################################
+  ## Reverse converted read against reverse converted genome (read: G->A.....G->A..      genome: G->A.......G->A)
+  ## here we only want reads in the forward (+) orientation
+  elsif ($fhs[$index]->{name} eq 'GAreadGAgenome') {
+    ### if the alignment is (+) we count it and return 1 for a correct orientation
+    if ($strand eq '+') {
+      $fhs[$index]->{seen}++;
+      $orientation = 1;
+      return $orientation;
+    }
+    ### if the orientation equals (-) the alignment is nonsensical
+    elsif ($strand eq '-') {
+      $fhs[$index]->{wrong_strand}++;
+      return $orientation;
+    }
+  } else{
+    die "One of the above conditions must be true\n";
+  }
+}
+
+sub ensure_sensical_alignment_orientation_paired_ends{
+  my ($index,$id_1,$strand_1,$id_2,$strand_2) = @_; # index number if the sequence produced an alignment
+  ###  setting $orientation to 1 if it is in the correct orientation, and leave it 0 if it is the nonsensical wrong one
+  my $orientation = 0;
+  ##############################################################################################################
+  ## [Index 0, sequence originated from (converted) forward strand]
+  ## CT converted read 1
+  ## GA converted read 2
+  ## CT converted genome
+  ## here we only want read 1 in (+) orientation and read 2 in (-) orientation
+  if ($fhs[$index]->{name} eq 'CTread1GAread2CTgenome') {
+    ### if the paired-end alignment is read1 (+) and read2 (-) we count it, and return 1 for a correct orientation
+    if ($id_1 =~ /1$/ and $strand_1 eq '+' and $id_2 =~ /2$/ and $strand_2 eq '-') {
+      $fhs[$index]->{seen}++;
+      $orientation = 1;
+      return $orientation;
+    }
+    ### if the read 2 is in (+) orientation and read 1 in (-) the alignment is nonsensical
+    elsif ($id_1 =~ /2$/ and $strand_1 eq '+' and $id_2 =~ /1$/ and $strand_2 eq '-') {
+      $fhs[$index]->{wrong_strand}++;
+      return $orientation;
+    }
+    else{
+      die "id1: $id_1\tid2: $id_2\tThis should be impossible\n";
+    }
+  }
+  ###############################################################################################################
+  ## [Index 1, sequence originated from (converted) reverse strand]
+  ## GA converted read 1
+  ## CT converted read 2
+  ## GA converted genome
+  ## here we only want read 1 in (+) orientation and read 2 in (-) orientation
+  elsif ($fhs[$index]->{name} eq 'GAread1CTread2GAgenome') {
+    ### if the paired-end alignment is read1 (+) and read2 (-) we count it, and return 1 for a correct orientation
+    if ($id_1 =~ /1$/ and $strand_1 eq '+' and $id_2 =~ /2$/ and $strand_2 eq '-') {
+      $fhs[$index]->{seen}++;
+      $orientation = 1;
+      return $orientation;
+    }
+    ### if the read 2 is in (+) orientation and read 1 in (-) the alignment is nonsensical
+    elsif ($id_1 =~ /2$/ and $strand_1 eq '+' and $id_2 =~ /1$/ and $strand_2 eq '-') {
+      $fhs[$index]->{wrong_strand}++;
+      return $orientation;
+    }
+    else{
+      die "id1: $id_1\tid2: $id_2\tThis should be impossible\n";
+    }
+  }
+  ###############################################################################################################
+  ## [Index 2, sequence originated from complementary to (converted) forward strand]
+  ## GA converted read 1
+  ## CT converted read 2
+  ## CT converted genome
+  ## here we only want read 1 in (-) orientation and read 2 in (+) orientation
+  elsif ($fhs[$index]->{name} eq 'GAread1CTread2CTgenome') {
+    ### if the paired-end alignment is read1 (-) and read2 (+) we count it, and return 1 for a correct orientation
+    if ($id_1 =~ /2$/ and $strand_1 eq '+' and $id_2 =~ /1$/ and $strand_2 eq '-') {
+      $fhs[$index]->{seen}++;
+      $orientation = 1;
+      return $orientation;
+    }
+    ### if the read 2 is in (+) orientation and read 1 in (-) the alignment is nonsensical
+    elsif ($id_1 =~ /1$/ and $strand_1 eq '+' and $id_2 =~ /2$/ and $strand_2 eq '-') {
+      $fhs[$index]->{wrong_strand}++;
+      return $orientation;
+    }
+    else{
+      die "id1: $id_1\tid2: $id_2\tThis should be impossible\n";
+    }
+  }
+  ###############################################################################################################
+  ## [Index 3, sequence originated from complementary to (converted) reverse strand]
+  ## CT converted read 1
+  ## GA converted read 2
+  ## GA converted genome
+  ## here we only want read 1 in (+) orientation and read 2 in (-) orientation
+  elsif ($fhs[$index]->{name} eq 'CTread1GAread2GAgenome') {
+    ### if the paired-end alignment is read1 (-) and read2 (+) we count it, and return 1 for a correct orientation
+    if ($id_1 =~ /2$/ and $strand_1 eq '+' and $id_2 =~ /1$/ and $strand_2 eq '-') {
+      $fhs[$index]->{seen}++;
+      $orientation = 1;
+      return $orientation;
+    }
+    ### if the read 2 is in (+) orientation and read 1 in (-) the alignment is nonsensical
+    elsif ($id_1 =~ /1$/ and $strand_1 eq '+' and $id_2 =~ /2$/ and $strand_2 eq '-') {
+      $fhs[$index]->{wrong_strand}++;
+      return $orientation;
+    }
+    else{
+      die "id1: $id_1\tid2: $id_2\tThis should be impossible\n";
+    }
+  }
+  else{
+    die "One of the above conditions must be true\n";
+  }
+}
+
+#####################################################################################################################################################
+
+### Bowtie 1 (default) | PAIRED-END | FASTA
+
+sub paired_end_align_fragments_to_bisulfite_genome_fastA {
+
+  my ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;
+
+  if ($directional){
+    print "Input files are $C_to_T_infile_1 and $G_to_A_infile_2 (FastA)\n";
+  }
+  else{
+    print "Input files are $C_to_T_infile_1 and $G_to_A_infile_1 and $C_to_T_infile_2 and $G_to_A_infile_2 (FastA)\n";
+  }
+
+  ## Now starting up to 4 instances of Bowtie feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in the
+  ## data structure above
+  if ($directional){
+    warn "Now running 2 instances of Bowtie against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+  }
+  else{
+    warn "Now running 4 individual instances of Bowtie against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+  }
+
+  foreach my $fh (@fhs) {
+
+    if ($directional){
+      unless ($fh->{inputfile_1}){
+	$fh->{last_seq_id} = undef;
+	$fh->{last_line_1} = undef;
+	$fh->{last_line_2} = undef;
+	next;
+      }
+    }
+
+    my $bt_options = $bowtie_options;
+    if ($fh->{name} eq 'CTread1GAread2CTgenome' or $fh->{name} eq 'GAread1CTread2GAgenome'){
+      $bt_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
+    }
+    else {
+      $bt_options .= ' --nofw';
+    }
+
+    warn "Now starting a Bowtie paired-end alignment for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile_1} and $temp_dir$fh->{inputfile_2}, with the options: $bt_options)\n";
+    open ($fh->{fh},"$path_to_bowtie $bt_options $fh->{bisulfiteIndex} -1 $temp_dir$fh->{inputfile_1} -2 $temp_dir$fh->{inputfile_2} |") or die "Can't open pipe to bowtie: $!";
+
+    my $line_1 = $fh->{fh}->getline();
+    my $line_2 = $fh->{fh}->getline();
+
+    # if Bowtie produces an alignment we store the first line of the output
+    if ($line_1 and $line_2) {
+      chomp $line_1;
+      chomp $line_2;
+      my $id_1 = (split(/\t/,$line_1))[0]; # this is the first element of the first bowtie output line (= the sequence identifier)
+      my $id_2 = (split(/\t/,$line_2))[0]; # this is the first element of the second bowtie output line
+
+      ### Bowtie always reports the alignment with the smaller chromosomal position first. This can be either sequence 1 or sequence 2.
+      ### We will thus identify which sequence was read 1 and store this ID as last_seq_id
+
+      if ($id_1 =~ s/\/1$//){ # removing the read 1 tag if present
+	$fh->{last_seq_id} = $id_1;
+      }
+      elsif ($id_2 =~ s/\/1$//){ # removing the read 1 tag if present
+	$fh->{last_seq_id} = $id_2;
+      }
+      else{
+	die "Either the first or the second id need to be read 1! ID1 was: $id_1; ID2 was: $id_2\n";
+      }
+
+      $fh->{last_line_1} = $line_1; # this contains either read 1 or read 2
+      $fh->{last_line_2} = $line_2; # this contains either read 1 or read 2
+      warn "Found first alignment:\n$fh->{last_line_1}\n$fh->{last_line_2}\n";
+    }
+    # otherwise we just initialise last_seq_id and last_lines as undefined
+    else {
+      print "Found no alignment, assigning undef to last_seq_id and last_lines\n";
+      $fh->{last_seq_id} = undef;
+      $fh->{last_line_1} = undef;
+      $fh->{last_line_2} = undef;
+    }
+  }
+}
+
+### Bowtie 2 | PAIRED-END | FASTA
+
+sub paired_end_align_fragments_to_bisulfite_genome_fastA_bowtie2 {
+  my ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;
+  if ($directional){
+    print "Input files are $C_to_T_infile_1 and $G_to_A_infile_2 (FastA)\n";
+  }
+  else{
+    print "Input files are $C_to_T_infile_1 and $G_to_A_infile_1 and $C_to_T_infile_2 and $G_to_A_infile_2 (FastA)\n";
+  }
+
+  ## Now starting up to 4 instances of Bowtie feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in the
+  ## data structure above
+  if ($directional){
+    warn "Now running 2 instances of Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+  }
+  else{
+    warn "Now running 4 individual instances of Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+  }
+
+  foreach my $fh (@fhs) {
+
+    if ($directional){
+      unless ($fh->{inputfile_1}){
+	$fh->{last_seq_id} = undef;
+	$fh->{last_line_1} = undef;
+	$fh->{last_line_2} = undef;
+	next;
+      }
+    }
+
+    my $bt2_options = $bowtie_options;
+    if ($fh->{name} eq 'CTread1GAread2CTgenome' or $fh->{name} eq 'GAread1CTread2GAgenome'){
+      $bt2_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
+    }
+    else {
+      $bt2_options .= ' --nofw';
+    }
+
+    warn "Now starting a Bowtie 2 paired-end alignment for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile_1} and $temp_dir$fh->{inputfile_2}, with the options: $bt2_options))\n";
+    open ($fh->{fh},"$path_to_bowtie $bt2_options $fh->{bisulfiteIndex} -1 $temp_dir$fh->{inputfile_1} -2 $temp_dir$fh->{inputfile_2} |") or die "Can't open pipe to bowtie: $!";
+
+    ### Bowtie 2 outputs out SAM format, so we need to skip everything until the first sequence
+    while (1){
+      $_ = $fh->{fh}->getline();
+      if ($_) {
+	last unless ($_ =~ /^\@/); # SAM headers start with @
+      }
+      else{
+	last; # no alignment output
+      }
+    }
+
+    my $line_1 = $_;
+    my $line_2 = $fh->{fh}->getline();
+
+    # if Bowtie produces an alignment we store the first line of the output
+    if ($line_1 and $line_2) {
+      chomp $line_1;
+      chomp $line_2;
+      my $id_1 = (split(/\t/,$line_1))[0]; # this is the first element of the first bowtie output line (= the sequence identifier)
+      my $id_2 = (split(/\t/,$line_2))[0]; # this is the first element of the second bowtie output line
+
+      ### Bowtie always reports the alignment with the smaller chromosomal position first. This can be either sequence 1 or sequence 2.
+      ### We will thus identify which sequence was read 1 and store this ID as last_seq_id
+
+      if ($id_1 =~ s/\/1$//){ # removing the read 1 /1 tag if present (remember that Bowtie2 clips off /1 or /2 line endings itself, so we added /1/1 or /2/2 to start with
+	$fh->{last_seq_id} = $id_1;
+      }
+      elsif ($id_2 =~ s/\/1$//){ # removing the read 1 /2 tag if present
+	$fh->{last_seq_id} = $id_2;
+      }
+      else{
+	warn "Either the first or the second id need to be read 1! ID1 was: $id_1; ID2 was: $id_2\n";
+      }
+
+      $fh->{last_line_1} = $line_1; # this contains either read 1 or read 2
+      $fh->{last_line_2} = $line_2; # this contains either read 1 or read 2
+      warn "Found first alignment:\n$fh->{last_line_1}\n$fh->{last_line_2}\n";
+    }
+    # otherwise we just initialise last_seq_id and last_lines as undefined
+    else {
+      print "Found no alignment, assigning undef to last_seq_id and last_lines\n";
+      $fh->{last_seq_id} = undef;
+      $fh->{last_line_1} = undef;
+      $fh->{last_line_2} = undef;
+    }
+  }
+}
+
+### Bowtie 1 (default) | PAIRED-END | FASTQ
+
+sub paired_end_align_fragments_to_bisulfite_genome_fastQ {
+  my ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;
+  if ($directional){
+    print "Input files are $C_to_T_infile_1 $G_to_A_infile_2 (FastQ)\n";
+  }
+  else{
+    print "Input files are $C_to_T_infile_1 and $G_to_A_infile_1 and $C_to_T_infile_2 and $G_to_A_infile_2 (FastQ)\n";
+  }
+
+  ## Now starting up to 4 instances of Bowtie feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in the
+  ## data structure above
+  if ($directional){
+    warn "Now running 2 instances of Bowtie against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+  }
+  else{
+    warn "Now running 4 individual instances of Bowtie against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+  }
+
+  foreach my $fh (@fhs) {
+
+    if ($directional){
+      unless ($fh->{inputfile_1}){
+	$fh->{last_seq_id} = undef;
+	$fh->{last_line_1} = undef;
+	$fh->{last_line_2} = undef;
+	next;
+      }
+    }
+
+    my $bt_options = $bowtie_options;
+    if ($fh->{name} eq 'CTread1GAread2CTgenome' or $fh->{name} eq 'GAread1CTread2GAgenome'){
+      $bt_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
+    }
+    else {
+      $bt_options .= ' --nofw';
+    }
+
+    warn "Now starting a Bowtie paired-end alignment for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile_1} and $temp_dir$fh->{inputfile_2}, with the options: $bt_options))\n";
+    open ($fh->{fh},"$path_to_bowtie $bt_options $fh->{bisulfiteIndex} -1 $temp_dir$fh->{inputfile_1} -2 $temp_dir$fh->{inputfile_2} |") or die "Can't open pipe to bowtie: $!";
+
+    my $line_1 = $fh->{fh}->getline();
+    my $line_2 = $fh->{fh}->getline();
+
+    # if Bowtie produces an alignment we store the first line of the output
+    if ($line_1 and $line_2) {
+      chomp $line_1;
+      chomp $line_2;
+      ### Bowtie always reports the alignment with the smaller chromosomal position first. This can be either sequence 1 or sequence 2.
+      ### We will thus identify which sequence was read 1 and store this ID as last_seq_id
+
+      my $id_1 = (split(/\t/,$line_1))[0]; # this is the first element of the first bowtie output line (= the sequence identifier)
+      my $id_2 = (split(/\t/,$line_2))[0]; # this is the first element of the second bowtie output line
+
+      if ($id_1 =~ s/\/1$//){ # removing the read 1 tag if present
+	$fh->{last_seq_id} = $id_1;
+      }
+      elsif ($id_2 =~ s/\/1$//){ # removing the read 1 tag if present
+	$fh->{last_seq_id} = $id_2;
+      }
+      else{
+	die "Either the first or the second id need to be read 1! ID1 was: $id_1; ID2 was: $id_2\n";
+      }
+
+      $fh->{last_line_1} = $line_1; # this contains read 1 or read 2
+      $fh->{last_line_2} = $line_2; # this contains read 1 or read 2
+      warn "Found first alignment:\n$fh->{last_line_1}\n$fh->{last_line_2}\n";
+    }
+
+    # otherwise we just initialise last_seq_id and last_lines as undefined
+    else {
+      print "Found no alignment, assigning undef to last_seq_id and last_lines\n";
+      $fh->{last_seq_id} = undef;
+      $fh->{last_line_1} = undef;
+      $fh->{last_line_2} = undef;
+    }
+  }
+}
+
+### Bowtie 2 | PAIRED-END | FASTQ
+
+sub paired_end_align_fragments_to_bisulfite_genome_fastQ_bowtie2 {
+  my ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;
+  if ($directional){
+    print "Input files are $C_to_T_infile_1 and $G_to_A_infile_2 (FastQ)\n";
+  }
+  else{
+    print "Input files are $C_to_T_infile_1 and $G_to_A_infile_1 and $C_to_T_infile_2 and $G_to_A_infile_2 (FastQ)\n";
+  }
+
+  ## Now starting up 4 instances of Bowtie 2 feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in the
+  ## data structure above
+  if ($directional){
+    warn "Now running 2 instances of Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+  }
+  else{
+    warn "Now running 4 individual instances of Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+  }
+
+  foreach my $fh (@fhs) {
+
+    if ($directional){
+      unless ($fh->{inputfile_1}){
+	$fh->{last_seq_id} = undef;
+	$fh->{last_line_1} = undef;
+	$fh->{last_line_2} = undef;
+	next;
+      }
+    }
+
+    my $bt2_options = $bowtie_options;
+    if ($fh->{name} eq 'CTread1GAread2CTgenome' or $fh->{name} eq 'GAread1CTread2GAgenome'){
+      $bt2_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
+    }
+    else {
+      $bt2_options .= ' --nofw';
+    }
+
+    warn "Now starting a Bowtie 2 paired-end alignment for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile_1} and $temp_dir$fh->{inputfile_2}, with the options: $bt2_options))\n";
+    open ($fh->{fh},"$path_to_bowtie $bt2_options $fh->{bisulfiteIndex} -1 $temp_dir$fh->{inputfile_1} -2 $temp_dir$fh->{inputfile_2} |") or die "Can't open pipe to bowtie: $!";
+
+    ### Bowtie 2 outputs out SAM format, so we need to skip everything until the first sequence
+    while (1){
+      $_ = $fh->{fh}->getline();
+      if ($_) {
+	last unless ($_ =~ /^\@/); # SAM headers start with @
+      }
+      else{
+	last; # no alignment output
+      }
+    }
+
+    my $line_1 = $_;
+    my $line_2 = $fh->{fh}->getline();
+
+    # if Bowtie produces an alignment we store the first line of the output
+    if ($line_1 and $line_2) {
+      chomp $line_1;
+      chomp $line_2;
+      ### Bowtie always reports the alignment with the smaller chromosomal position first. This can be either sequence 1 or sequence 2.
+      ### We will thus identify which sequence was read 1 and store this ID as last_seq_id
+
+      my $id_1 = (split(/\t/,$line_1))[0]; # this is the first element of the first bowtie output line (= the sequence identifier)
+      my $id_2 = (split(/\t/,$line_2))[0]; # this is the first element of the second bowtie output line
+
+      if ($id_1 =~ s/\/1$//){ # removing the read 1 tag if present (remember that Bowtie2 clips off /1 or /2 line endings itself, so we added /1/1 or /2/2 to start with
+	$fh->{last_seq_id} = $id_1;
+      }
+      elsif ($id_2 =~ s/\/1$//){ # removing the read 1 tag if present
+	$fh->{last_seq_id} = $id_2;
+      }
+      else{
+	die "Either the first or the second id need to be read 1! ID1 was: $id_1; ID2 was: $id_2\n";
+      }
+
+      $fh->{last_line_1} = $line_1; # this contains read 1 or read 2
+      $fh->{last_line_2} = $line_2; # this contains read 1 or read 2
+      warn "Found first alignment:\n$fh->{last_line_1}\n$fh->{last_line_2}\n";
+    }
+
+    # otherwise we just initialise last_seq_id and last_lines as undefined
+    else {
+      print "Found no alignment, assigning undef to last_seq_id and last_lines\n";
+      $fh->{last_seq_id} = undef;
+      $fh->{last_line_1} = undef;
+      $fh->{last_line_2} = undef;
+    }
+  }
+}
+
+#####################################################################################################################################################
+
+### Bowtie 1 (default) | SINGLE-END | FASTA
+sub single_end_align_fragments_to_bisulfite_genome_fastA {
+  my ($C_to_T_infile,$G_to_A_infile) = @_;
+  if ($directional){
+    print "Input file is $C_to_T_infile (FastA)\n";
+  }
+  else{
+    print "Input files are $C_to_T_infile and $G_to_A_infile (FastA)\n";
+  }
+
+  ## Now starting up to 4 instances of Bowtie feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in
+  ## data structure above
+  if ($directional){
+    warn "Now running 2 instances of Bowtie against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+  }
+  else{
+    warn "Now running 4 individual instances of Bowtie against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+  }
+
+  foreach my $fh (@fhs) {
+
+    my $bt_options = $bowtie_options;
+    if ($fh->{name} eq 'CTreadCTgenome' or $fh->{name} eq 'GAreadGAgenome'){
+      $bt_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
+    }
+    else {
+      $bt_options .= ' --nofw';
+    }
+
+    warn "Now starting the Bowtie aligner for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile} with options: $bt_options)\n";
+    open ($fh->{fh},"$path_to_bowtie $bt_options $fh->{bisulfiteIndex} $temp_dir$fh->{inputfile} |") or die "Can't open pipe to bowtie: $!";
+
+    # if Bowtie produces an alignment we store the first line of the output
+    $_ = $fh->{fh}->getline();
+    if ($_) {
+      chomp;
+      my $id = (split(/\t/))[0]; # this is the first element of the bowtie output (= the sequence identifier)
+      $fh->{last_seq_id} = $id;
+      $fh->{last_line} = $_;
+      warn "Found first alignment:\t$fh->{last_line}\n";
+    }
+    # otherwise we just initialise last_seq_id and last_line as undefined
+    else {
+      print "Found no alignment, assigning undef to last_seq_id and last_line\n";
+      $fh->{last_seq_id} = undef;
+      $fh->{last_line} = undef;
+    }
+  }
+}
+
+### Bowtie 2 | SINGLE-END | FASTA
+sub single_end_align_fragments_to_bisulfite_genome_fastA_bowtie2 {
+  my ($C_to_T_infile,$G_to_A_infile) = @_;
+  if ($directional){
+    print "Input file is $C_to_T_infile (FastA)\n";
+  }
+  else{
+    print "Input files are $C_to_T_infile and $G_to_A_infile (FastA)\n";
+  }
+
+  ## Now starting up to 4 instances of Bowtie feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in
+  ## data structure above
+  if ($directional){
+    warn "Now running 2 instances of Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+  }
+  else{
+    warn "Now running 4 individual instances of Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+  }
+
+  foreach my $fh (@fhs) {
+
+    my $bt2_options = $bowtie_options;
+    if ($fh->{name} eq 'CTreadCTgenome' or $fh->{name} eq 'GAreadGAgenome'){
+      $bt2_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
+    }
+    else {
+      $bt2_options .= ' --nofw';
+    }
+
+    warn "Now starting the Bowtie 2 aligner for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile} with options: $bt2_options)\n";
+    open ($fh->{fh},"$path_to_bowtie $bt2_options $fh->{bisulfiteIndex} -U $temp_dir$fh->{inputfile} |") or die "Can't open pipe to bowtie: $!";
+
+    ### Bowtie 2 outputs out SAM format, so we need to skip everything until the first sequence
+    while (1){
+      $_ = $fh->{fh}->getline();
+      if ($_) {
+	last unless ($_ =~ /^\@/); # SAM headers start with @
+      }
+      else{
+	last; # no alignment output
+      }
+    }
+
+    # Bowtie 2 outputs a result line even for sequences without any alignments. We thus store the first line of the output
+    if ($_) {
+      chomp;
+      my $id = (split(/\t/))[0]; # this is the first element of the Bowtie output (= the sequence identifier)
+      $fh->{last_seq_id} = $id;
+      $fh->{last_line} = $_;
+      warn "Found first alignment:\t$fh->{last_line}\n";
+    }
+    # otherwise we just initialise last_seq_id and last_line as undefinded. This should only happen at the end of a file for Bowtie 2 output
+    else {
+      print "Found no alignment, assigning undef to last_seq_id and last_line\n";
+      $fh->{last_seq_id} = undef;
+      $fh->{last_line} = undef;
+    }
+  }
+}
+
+
+### Bowtie 1 (default) | SINGLE-END | FASTQ
+sub single_end_align_fragments_to_bisulfite_genome_fastQ {
+  my ($C_to_T_infile,$G_to_A_infile) = @_;
+  if ($directional){
+    print "Input file is $C_to_T_infile (FastQ)\n";
+  }
+  else{
+    print "Input files are $C_to_T_infile and $G_to_A_infile (FastQ)\n";
+  }
+
+  ## Now starting up to 4 instances of Bowtie feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in
+  ## the data structure above
+  if ($directional){
+    warn "Now running 2 instances of Bowtie against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+  }
+  else{
+    warn "Now running 4 individual instances of Bowtie against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+  }
+
+  foreach my $fh (@fhs) {
+    my $bt_options = $bowtie_options;
+    if ($fh->{name} eq 'CTreadCTgenome' or $fh->{name} eq 'GAreadGAgenome'){
+      $bt_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
+    }
+    else {
+      $bt_options .= ' --nofw';
+    }
+
+    warn "Now starting the Bowtie aligner for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile} with options: $bt_options)\n";
+    open ($fh->{fh},"$path_to_bowtie $bowtie_options $fh->{bisulfiteIndex} $temp_dir$fh->{inputfile} |") or die "Can't open pipe to bowtie: $!";
+
+    # if Bowtie produces an alignment we store the first line of the output
+    $_ = $fh->{fh}->getline();
+    if ($_) {
+      chomp;
+      my $id = (split(/\t/))[0]; # this is the first element of the Bowtie output (= the sequence identifier)
+      $fh->{last_seq_id} = $id;
+      $fh->{last_line} = $_;
+      warn "Found first alignment:\t$fh->{last_line}\n";
+    }
+    # otherwise we just initialise last_seq_id and last_line as undefined
+    else {
+      print "Found no alignment, assigning undef to last_seq_id and last_line\n";
+      $fh->{last_seq_id} = undef;
+      $fh->{last_line} = undef;
+    }
+  }
+}
+
+### Bowtie 2 | SINGLE-END | FASTQ
+sub single_end_align_fragments_to_bisulfite_genome_fastQ_bowtie2 {
+  my ($C_to_T_infile,$G_to_A_infile) = @_;
+  if ($directional){
+    print "Input file is $C_to_T_infile (FastQ)\n\n";
+  }
+  else{
+    print "Input files are $C_to_T_infile and $G_to_A_infile (FastQ)\n\n";
+  }
+
+  ## Now starting up to 4 instances of Bowtie 2 feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in
+  ## the data structure above
+  if ($directional){
+    warn "Now running 2 instances of Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+  }
+  else{
+    warn "Now running 4 individual instances of Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+  }
+
+  foreach my $fh (@fhs) {
+    my $bt2_options = $bowtie_options;
+    if ($fh->{name} eq 'CTreadCTgenome' or $fh->{name} eq 'GAreadGAgenome'){
+      $bt2_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
+    }
+    else {
+      $bt2_options .= ' --nofw';
+    }
+    warn "Now starting the Bowtie 2 aligner for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile} with options $bt2_options)\n";
+    warn "Using Bowtie 2 index: $fh->{bisulfiteIndex}\n\n";
+
+    open ($fh->{fh},"$path_to_bowtie $bt2_options $fh->{bisulfiteIndex} -U $temp_dir$fh->{inputfile} |") or die "Can't open pipe to bowtie: $!";
+    ### Bowtie 2 outputs out SAM format, so we need to skip everything until the first sequence
+    while (1){
+      $_ = $fh->{fh}->getline();
+      if ($_) {
+	last unless ($_ =~ /^\@/); # SAM headers start with @
+      }
+      else {
+	last;
+      }
+    }
+
+    # Bowtie 2 outputs a result line even for sequences without any alignments. We thus store the first line of the output
+    if ($_) {
+      chomp;
+      my $id = (split(/\t/))[0]; # this is the first element of the Bowtie 2 output (= the sequence identifier)
+      $fh->{last_seq_id} = $id;
+      $fh->{last_line} = $_;
+      warn "Found first alignment:\t$fh->{last_line}\n";
+    }
+    # otherwise we just initialise last_seq_id and last_line as undefined. This should only happen at the end of a file for Bowtie 2 output
+    else {
+      print "Found no alignment, assigning undef to last_seq_id and last_line\n";
+      $fh->{last_seq_id} = undef;
+      $fh->{last_line} = undef;
+    }
+  }
+}
+
+###########################################################################################################################################
+
+sub reset_counters_and_fhs{
+  my $filename = shift;
+  %counting=(
+	     total_meCHH_count => 0,
+	     total_meCHG_count => 0,
+	     total_meCpG_count => 0,
+	     total_unmethylated_CHH_count => 0,
+	     total_unmethylated_CHG_count => 0,
+	     total_unmethylated_CpG_count => 0,
+	     sequences_count => 0,
+	     no_single_alignment_found => 0,
+	     unsuitable_sequence_count => 0,
+	     genomic_sequence_could_not_be_extracted_count => 0,
+	     unique_best_alignment_count => 0,
+	     low_complexity_alignments_overruled_count => 0,
+	     CT_CT_count => 0, #(CT read/CT genome, original top strand)
+	     CT_GA_count => 0, #(CT read/GA genome, original bottom strand)
+	     GA_CT_count => 0, #(GA read/CT genome, complementary to original top strand)
+	     GA_GA_count => 0, #(GA read/GA genome, complementary to original bottom strand)
+	     CT_GA_CT_count => 0, #(CT read1/GA read2/CT genome, original top strand)
+	     GA_CT_GA_count => 0, #(GA read1/CT read2/GA genome, complementary to original bottom strand)
+	     GA_CT_CT_count => 0, #(GA read1/CT read2/CT genome, complementary to original top strand)
+	     CT_GA_GA_count => 0, #(CT read1/GA read2/GA genome, original bottom strand)
+	     alignments_rejected_count => 0, # only relevant if --directional was specified
+	    );
+
+  if ($directional){
+    if ($filename =~ ','){ # paired-end files
+      @fhs=(
+	    { name => 'CTreadCTgenome',
+	      strand_identity => 'con ori forward',
+	      bisulfiteIndex => $CT_index_basename,
+	      seen => 0,
+	      wrong_strand => 0,
+	    },
+	    { name => 'CTreadGAgenome',
+	      strand_identity => 'con ori reverse',
+	      bisulfiteIndex => $GA_index_basename,
+	      seen => 0,
+	      wrong_strand => 0,
+	    },
+	    { name => 'GAreadCTgenome',
+	      strand_identity => 'compl ori con forward',
+	      bisulfiteIndex => $CT_index_basename,
+	      seen => 0,
+	      wrong_strand => 0,
+	    },
+	    { name => 'GAreadGAgenome',
+	    strand_identity => 'compl ori con reverse',
+	      bisulfiteIndex => $GA_index_basename,
+	      seen => 0,
+	      wrong_strand => 0,
+	    },
+	   );
+    }
+    else{ # single-end files
+      @fhs=(
+	    { name => 'CTreadCTgenome',
+	      strand_identity => 'con ori forward',
+	      bisulfiteIndex => $CT_index_basename,
+	      seen => 0,
+	      wrong_strand => 0,
+	    },
+	    { name => 'CTreadGAgenome',
+	      strand_identity => 'con ori reverse',
+	      bisulfiteIndex => $GA_index_basename,
+	      seen => 0,
+	      wrong_strand => 0,
+	    },
+	   );
+    }
+  }
+  else{
+    @fhs=(
+	  { name => 'CTreadCTgenome',
+	    strand_identity => 'con ori forward',
+	    bisulfiteIndex => $CT_index_basename,
+	    seen => 0,
+	    wrong_strand => 0,
+	  },
+	  { name => 'CTreadGAgenome',
+	    strand_identity => 'con ori reverse',
+	    bisulfiteIndex => $GA_index_basename,
+	    seen => 0,
+	    wrong_strand => 0,
+	  },
+	  { name => 'GAreadCTgenome',
+	    strand_identity => 'compl ori con forward',
+	    bisulfiteIndex => $CT_index_basename,
+	    seen => 0,
+	    wrong_strand => 0,
+	  },
+	  { name => 'GAreadGAgenome',
+	    strand_identity => 'compl ori con reverse',
+	    bisulfiteIndex => $GA_index_basename,
+	    seen => 0,
+	    wrong_strand => 0,
+	  },
+	 );
+  }
+}
+
+
+sub process_command_line{
+  my @bowtie_options;
+  my $help;
+  my $mates1;
+  my $mates2;
+  my $path_to_bowtie;
+  my $fastq;
+  my $fasta;
+  my $skip;
+  my $qupto;
+  my $phred64;
+  my $phred33;
+  my $solexa;
+  my $mismatches;
+  my $seed_length;
+  my $best;
+  my $sequence_format;
+  my $version;
+  my $quiet;
+  my $chunk;
+  my $non_directional;
+  my $ceiling;
+  my $maxins;
+  my $minins;
+  my $unmapped;
+  my $multi_map;
+  my $output_dir;
+  my $bowtie2;
+  my $vanilla;
+  my $sam_no_hd;
+  my $seed_extension_fails;
+  my $reseed_repetitive_seeds;
+  my $most_valid_alignments;
+  my $score_min;
+  my $parallel;
+  my $temp_dir;
+  my $rdg;
+  my $rfg;
+
+  my $command_line = GetOptions ('help|man' => \$help,
+				 '1=s' => \$mates1,
+				 '2=s' => \$mates2,
+				 'path_to_bowtie=s' => \$path_to_bowtie,
+				 'f|fasta' => \$fasta,
+				 'q|fastq' => \$fastq,
+				 's|skip=i' => \$skip,
+				 'u|upto=i' => \$qupto,
+				 'phred33-quals' => \$phred33,
+				 'phred64-quals|solexa1' => \$phred64,
+				 'solexa-quals' => \$solexa,
+				 'n|seedmms=i' => \$mismatches,
+				 'l|seedlen=i' => \$seed_length,
+				 'no_best' => \$best,
+				 'version' => \$version,
+				 'quiet' => \$quiet,
+				 'chunkmbs=i' => \$chunk,
+				 'non_directional' => \$non_directional,
+				 'I|minins=i' => \$minins,
+				 'X|maxins=i' => \$maxins,
+				 'e|maqerr=i' => \$ceiling,
+				 'un|unmapped' => \$unmapped,
+				 'ambiguous' => \$multi_map,
+				 'o|output_dir=s' => \$output_dir,
+				 'bowtie2' => \$bowtie2,
+				 'vanilla' => \$vanilla,
+				 'sam-no-hd' => \$sam_no_hd,
+				 'D=i' => \$seed_extension_fails,
+				 'R=i' => \$reseed_repetitive_seeds,
+				 'score_min=s' => \$score_min,
+				 'most_valid_alignments=i' => \$most_valid_alignments,
+				 'p=i' => \$parallel,
+				 'temp_dir=s' => \$temp_dir,
+				 'rdg=s' => \$rdg,
+				 'rfg=s' => \$rfg,
+				);
+
+
+  ### EXIT ON ERROR if there were errors with any of the supplied options
+  unless ($command_line){
+    die "Please respecify command line options\n";
+  }
+  ### HELPFILE
+  if ($help){
+    print_helpfile();
+    exit;
+  }
+  if ($version){
+    print << "VERSION";
+
+
+          Bismark - Bisulfite Mapper and Methylation Caller.
+
+   Bismark Version: $bismark_version Copyright 2010-12 Felix Krueger, Babraham Bioinformatics
+              www.bioinformatics.babraham.ac.uk/projects/
+
+
+VERSION
+    exit;
+  }
+
+
+  ##########################
+  ### PROCESSING OPTIONS ###
+  ##########################
+
+  unless ($bowtie2){
+    $bowtie2 = 0;
+  }
+  unless ($sam_no_hd){
+    $sam_no_hd =0;
+  }
+
+  ### PATH TO BOWTIE
+  ### if a special path to Bowtie 1/2 was specified we will use that one, otherwise it is assumed that Bowtie 1/2 is in the PATH
+  if ($path_to_bowtie){
+    unless ($path_to_bowtie =~ /\/$/){
+      $path_to_bowtie =~ s/$/\//;
+    }
+    if (-d $path_to_bowtie){
+      if ($bowtie2){
+	$path_to_bowtie = "${path_to_bowtie}bowtie2";
+      }
+      else{
+	$path_to_bowtie = "${path_to_bowtie}bowtie";
+      }
+    }
+    else{
+      die "The path to bowtie provided ($path_to_bowtie) is invalid (not a directory)!\n";
+    }
+  }
+  else{
+    if ($bowtie2){
+      $path_to_bowtie = 'bowtie2';
+      warn "Path to Bowtie 2 specified as: $path_to_bowtie\n";  }
+    else{
+      $path_to_bowtie = 'bowtie';
+      warn "Path to Bowtie specified as: $path_to_bowtie\n";
+    }
+  }
+
+  ####################################
+  ### PROCESSING ARGUMENTS
+
+  ### GENOME FOLDER
+  my $genome_folder = shift @ARGV; # mandatory
+  unless ($genome_folder){
+    warn "Genome folder was not specified!\n";
+    print_helpfile();
+    exit;
+  }
+
+  ### checking that the genome folder, all subfolders and the required bowtie index files exist
+  unless ($genome_folder =~/\/$/){
+    $genome_folder =~ s/$/\//;
+  }
+
+  if (chdir $genome_folder){
+    my $absolute_genome_folder = getcwd; ## making the genome folder path absolute
+    unless ($absolute_genome_folder =~/\/$/){
+      $absolute_genome_folder =~ s/$/\//;
+    }
+    warn "Reference genome folder provided is $genome_folder\t(absolute path is '$absolute_genome_folder)'\n";
+    $genome_folder = $absolute_genome_folder;
+  }
+  else{
+    die "Failed to move to $genome_folder: $!\nUSAGE: Bismark.pl [options] <genome_folder> {-1 <mates1> -2 <mates2> | <singles>} [<hits>]    (--help for more details)\n";
+  }
+
+  my $CT_dir = "${genome_folder}Bisulfite_Genome/CT_conversion/";
+  my $GA_dir = "${genome_folder}Bisulfite_Genome/GA_conversion/";
+
+  if ($bowtie2){ ### Bowtie 2 (new)
+    ### checking the integrity of $CT_dir
+    chdir $CT_dir or die "Failed to move to directory $CT_dir: $!\n";
+    my @CT_bowtie_index = ('BS_CT.1.bt2','BS_CT.2.bt2','BS_CT.3.bt2','BS_CT.4.bt2','BS_CT.rev.1.bt2','BS_CT.rev.2.bt2');
+    foreach my $file(@CT_bowtie_index){
+      unless (-f $file){
+	die "The Bowtie 2 index of the C->T converted genome seems to be faulty ($file). Please run the bismark_genome_preparation before running Bismark.\n";
+      }
+    }
+    ### checking the integrity of $GA_dir
+    chdir $GA_dir or die "Failed to move to directory $GA_dir: $!\n";
+    my @GA_bowtie_index = ('BS_GA.1.bt2','BS_GA.2.bt2','BS_GA.3.bt2','BS_GA.4.bt2','BS_GA.rev.1.bt2','BS_GA.rev.2.bt2');
+    foreach my $file(@GA_bowtie_index){
+      unless (-f $file){
+	die "The Bowtie 2 index of the G->A converted genome seems to be faulty ($file). Please run bismark_genome_preparation before running Bismark.\n";
+      }
+    }
+  }
+
+  else{ ### Bowtie 1 (default)
+    ### checking the integrity of $CT_dir
+    chdir $CT_dir or die "Failed to move to directory $CT_dir: $!\n";
+    my @CT_bowtie_index = ('BS_CT.1.ebwt','BS_CT.2.ebwt','BS_CT.3.ebwt','BS_CT.4.ebwt','BS_CT.rev.1.ebwt','BS_CT.rev.2.ebwt');
+    foreach my $file(@CT_bowtie_index){
+      unless (-f $file){
+	die "The Bowtie index of the C->T converted genome seems to be faulty ($file). Please run bismark_genome_preparation before running Bismark.\n";
+      }
+    }
+    ### checking the integrity of $GA_dir
+    chdir $GA_dir or die "Failed to move to directory $GA_dir: $!\n";
+    my @GA_bowtie_index = ('BS_GA.1.ebwt','BS_GA.2.ebwt','BS_GA.3.ebwt','BS_GA.4.ebwt','BS_GA.rev.1.ebwt','BS_GA.rev.2.ebwt');
+    foreach my $file(@GA_bowtie_index){
+      unless (-f $file){
+	die "The Bowtie index of the G->A converted genome seems to be faulty ($file). Please run bismark_genome_preparation before running Bismark.\n";
+      }
+    }
+  }
+
+  my $CT_index_basename = "${CT_dir}BS_CT";
+  my $GA_index_basename = "${GA_dir}BS_GA";
+
+  ### INPUT OPTIONS
+
+  ### SEQUENCE FILE FORMAT
+  ### exits if both fastA and FastQ were specified
+  if ($fasta and $fastq){
+    die "Only one sequence filetype can be specified (fastA or fastQ)\n";
+  }
+
+  ### unless fastA is specified explicitely, fastQ sequence format is expected by default
+  if ($fasta){
+    print "FastA format specified\n";
+    $sequence_format = 'FASTA';
+    push @bowtie_options, '-f';
+  }
+  elsif ($fastq){
+    print "FastQ format specified\n";
+    $sequence_format = 'FASTQ';
+    push @bowtie_options, '-q';
+  }
+  else{
+    $fastq = 1;
+    print "FastQ format assumed (by default)\n";
+    $sequence_format = 'FASTQ';
+    push @bowtie_options, '-q';
+  }
+
+  ### SKIP
+  if ($skip){
+    warn "Skipping the first $skip reads from the input file\n";
+    # push @bowtie_options,"-s $skip";
+  }
+
+  ### UPTO
+  if ($qupto){
+    warn "Processing sequences up to read no. $qupto from the input file\n";
+    if ($bowtie2){
+      #      push @bowtie_options,"--upto $qupto"; ## slightly changed for Bowtie 2
+    }
+    else{
+      #     push @bowtie_options,"--qupto $qupto";
+    }
+  }
+
+  ### QUALITY VALUES
+  if (($phred33 and $phred64) or ($phred33 and $solexa) or ($phred64 and $solexa)){
+    die "You can only specify one type of quality value at a time! (--phred33-quals or --phred64-quals or --solexa-quals)";
+  }
+  if ($phred33){ ## if nothing else is specified $phred33 will be used as default by both Bowtie 1 and 2.
+    # Phred quality values work only when -q is specified
+    unless ($fastq){
+      die "Phred quality values works only when -q (FASTQ) is specified\n";
+    }
+    if ($bowtie2){
+      push @bowtie_options,"--phred33";
+    }
+    else{
+      push @bowtie_options,"--phred33-quals";
+    }
+  }
+  if ($phred64){
+    # Phred quality values work only when -q is specified
+    unless ($fastq){
+      die "Phred quality values work only when -q (FASTQ) is specified\n";
+    }
+    if ($bowtie2){
+      push @bowtie_options,"--phred64";
+    }
+    else{
+      push @bowtie_options,"--phred64-quals";
+    }
+  }
+  else{
+    $phred64 = 0;
+  }
+
+  if ($solexa){
+    if ($bowtie2){
+      die "The option '--solexa-quals' is not compatible with Bowtie 2. Please respecify!\n";
+    }
+    # Solexa to Phred value conversion works only when -q is specified
+    unless ($fastq){
+      die "Conversion from Solexa to Phred quality values works only when -q (FASTQ) is specified\n";
+    }
+    push @bowtie_options,"--solexa-quals";
+  }
+  else{
+    $solexa = 0;
+  }
+
+  ### ALIGNMENT OPTIONS
+
+  ### MISMATCHES
+  if (defined $mismatches){
+    if ($bowtie2){
+      if ($mismatches == 0 or $mismatches == 1){
+	push @bowtie_options,"-N $mismatches";
+      }
+      else{
+	die "Please set the number of multiseed mismatches for Bowtie 2 with '-N <int>' (where <int> can be 0 or 1)\n";
+      }
+    }
+    else{
+      if ($mismatches >= 0 and $mismatches <= 3){
+	push @bowtie_options,"-n $mismatches";
+      }
+      else{
+	die "Please set the number of seed mismatches for Bowtie 1 with '-n <int>' (where <int> can be 0,1,2 or 3)\n";
+      }
+    }
+  }
+  else{
+    unless ($bowtie2){
+      push @bowtie_options,"-n 1"; # setting -n to 1 by default (for use with Bowtie only) because it is much quicker than the default mode of -n 2
+    }
+  }
+
+  ### SEED LENGTH
+  if (defined $seed_length){
+    if ($bowtie2){
+      push @bowtie_options,"-L $seed_length";
+    }
+    else{
+      push @bowtie_options,"-l $seed_length";
+    }
+  }
+
+  ### MISMATCH CEILING
+  if (defined $ceiling){
+    die "The option '-e' is not compatible with Bowtie 2. Please respecify options\n" if ($bowtie2);
+    push @bowtie_options,"-e $ceiling";
+  }
+
+
+  ### BOWTIE 2 EFFORT OPTIONS
+
+  ### CONSECUTIVE SEED EXTENSION FAILS
+  if (defined $seed_extension_fails){
+    die "The option '-D <int>' is only available when using Bowtie 2\n\n" unless ($bowtie2);
+    push @bowtie_options,"-D $seed_extension_fails";
+  }
+
+  ### RE-SEEDING REPETITIVE SEEDS
+  if (defined $reseed_repetitive_seeds){
+    die "The option '-R <int>' is only available when using Bowtie 2\n\n" unless ($bowtie2);
+    push @bowtie_options,"-R $reseed_repetitive_seeds";
+  }
+
+
+  ### BOWTIE 2 SCORING OPTIONS
+  if ($score_min){
+    die "The option '--score_min <func>' is only available when using Bowtie 2\n\n" unless ($bowtie2);
+    unless ($score_min =~ /^L,.+,.+$/){
+      die "The option '--score_min <func>' needs to be in the format <L,value,value> . Please consult \"setting up functions\" in the Bowtie 2 manual for further information\n\n";
+    }
+    push @bowtie_options,"--score-min $score_min";
+  }
+  else{
+    if ($bowtie2){
+      push @bowtie_options,"--score-min L,0,-0.2"; # default setting, more stringent than normal Bowtie2
+    }
+  }
+
+  ### BOWTIE 2 READ GAP OPTIONS
+  if ($rdg){
+    die "The option '--rdg <int1>,<int2>' is only available when using Bowtie 2\n\n" unless ($bowtie2);
+    unless ($rdg =~ /^.+,.+$/){
+      die "The option '--rdg <int1>,<int2>' needs to be in the format <integer,integer> . Please consult \"setting up functions\" in the Bowtie 2 manual for further information\n\n";
+    }
+    push @bowtie_options,"--rdg $rdg";
+  }
+
+  ### BOWTIE 2 REFERENCE GAP OPTIONS
+  if ($rfg){
+    die "The option '--rfg <int1>,<int2>' is only available when using Bowtie 2\n\n" unless ($bowtie2);
+    unless ($rfg =~ /^.+,.+$/){
+      die "The option '--rfg <int1>,<int2>' needs to be in the format <integer,integer> . Please consult \"setting up functions\" in the Bowtie 2 manual for further information\n\n";
+    }
+    push @bowtie_options,"--rfg $rfg";
+  }
+
+
+
+  ### BOWTIE 2 PARALLELIZATION OPTIONS
+  if (defined $parallel){
+    die "The parallelization switch '-p' only works for Bowtie 2. Please respecify!" unless ($bowtie2);
+  }
+  if ($bowtie2){
+    if ($parallel){
+      die "Please select a value for -p of 2 or more!\n" unless ($parallel > 1);
+      push @bowtie_options,"-p $parallel";
+      push @bowtie_options,'--reorder'; ## re-orders the bowtie 2 output so that it does match the input files. This is abolutely required for parallelization to work.
+      print "Each Bowtie 2 instance is going to be run with $parallel threads. Please monitor performance closely and tune down if needed!\n";
+      sleep (2);
+    }
+  }
+
+  ### REPORTING OPTIONS
+
+  if ($bowtie2){
+    push @bowtie_options,'--ignore-quals'; ## All mismatches will receive penalty for mismatches as if they were of high quality, which is 6 by default
+
+    ### Option -M is deprecated since Bowtie 2 version 2.0.0 beta7. I'll leave this option commented out for a while
+    if(defined $most_valid_alignments){
+
+      warn "\nThe option -M is now deprecated (as of Bowtie 2 version 2.0.0 beta7). What used to be called -M mode is still the default mode. Use the -D and -R options to adjust the effort expended to find valid alignments.\n\n";
+      #      push @bowtie_options,"-M $most_valid_alignments";sleep (5);
+    }
+    #  else{
+    #    push @bowtie_options,'-M 10';    # the default behavior for Bowtie 2 is to report (and sort) up to 500 alignments for a given sequence
+    #  }
+  }
+  else{ # Because of the way Bismark works we will always use the reporting option -k 2 (report up to 2 valid alignments) for Bowtie 1
+    push @bowtie_options,'-k 2';
+  }
+
+  ### --BEST
+  if ($bowtie2){
+    if ($best){    # Bowtie 2 does away with the concept of --best, so one can also not select --no-best when Bowtie 2 is to be used
+      die "The option '--no-best' is not compatible with Bowtie 2. Please respecify options\n";
+    }
+  }
+  else{
+    # --best is the default option for Bowtie 1, specifying --no-best can turn it off (e.g. to speed up alignment process)
+    unless ($best){
+      push @bowtie_options,'--best';
+    }
+  }
+
+  ### VANILLA BISMARK (BOWTIE 1) OUTPUT
+  if ($vanilla){
+    if ($bowtie2){
+      die "The options --bowtie2 and the --vanilla are not compatible. Please respecify!\n\n";
+    }
+  }
+  else{
+    $vanilla = 0;
+  }
+
+  ### PAIRED-END MAPPING
+  if ($mates1){
+    my @mates1 = (split (/,/,$mates1));
+    die "Paired-end mapping requires the format: -1 <mates1> -2 <mates2>, please respecify!\n" unless ($mates2);
+    my @mates2 = (split(/,/,$mates2));
+    unless (scalar @mates1 == scalar @mates2){
+      die "Paired-end mapping requires the same amounnt of mate1 and mate2 files, please respecify! (format: -1 <mates1> -2 <mates2>)\n";
+    }
+    while (1){
+      my $mate1 = shift @mates1;
+      my $mate2 = shift @mates2;
+      last unless ($mate1 and $mate2);
+      push @filenames,"$mate1,$mate2";
+    }
+    if ($bowtie2){
+      push @bowtie_options,'--no-mixed';     ## By default Bowtie 2 is not looking for single-end alignments if it can't find concordant or discordant alignments
+      push @bowtie_options,'--no-discordant';## By default Bowtie 2 is not looking for discordant alignments if it can't find concordant ones
+    }
+  }
+  elsif ($mates2){
+    die "Paired-end mapping requires the format: -1 <mates1> -2 <mates2>, please respecify!\n";
+  }
+
+  ### SINGLE-END MAPPING
+  # Single-end mapping will be performed if no mate pairs for paired-end mapping have been specified
+  my $singles;
+  unless ($mates1 and $mates2){
+    $singles = join (',',@ARGV);
+    unless ($singles){
+      die "\nNo filename supplied! Please specify one or more files for single-end Bismark mapping!\n";
+    }
+    $singles =~ s/\s/,/g;
+    @filenames = (split(/,/,$singles));
+    warn "\nFiles to be analysed:\n";
+    warn "@filenames\n\n";
+    sleep (3);
+  }
+
+  ### MININUM INSERT SIZE (PAIRED-END ONLY)
+  if (defined $minins){
+    die "-I/--minins can only be used for paired-end mapping!\n\n" if ($singles);
+    push @bowtie_options,"--minins $minins";
+  }
+
+  ### MAXIMUM INSERT SIZE (PAIRED-END ONLY)
+  if (defined $maxins){
+    die "-X/--maxins can only be used for paired-end mapping!\n\n" if ($singles);
+    push @bowtie_options,"--maxins $maxins";
+  }
+  else{
+    unless ($singles){
+      push @bowtie_options,'--maxins 500';
+    }
+  }
+
+  ### QUIET prints nothing  besides alignments (suppresses warnings)
+  if ($quiet){
+    push @bowtie_options,'--quiet';
+  }
+
+  ### CHUNKMBS needed to be increased to avoid memory exhaustion warnings for Bowtie 1, particularly for --best (and paired-end) alignments
+  unless ($bowtie2){ # Bowtie 2 does not have a chunkmbs option
+    if (defined $chunk){
+      push @bowtie_options,"--chunkmbs $chunk";
+    }
+    else{
+      push @bowtie_options,'--chunkmbs 512'; ## setting the default to 512MB (up from 64 default)
+    }
+  }
+
+
+  ### SUMMARY OF ALL BOWTIE OPTIONS
+  my $bowtie_options = join (' ',@bowtie_options);
+
+
+  ### STRAND-SPECIFIC LIBRARIES
+  my $directional;
+  if ($non_directional){
+    print "Library was specified to be not strand-specific (non-directional), therefore alignments to all four possible bisulfite strands (OT, CTOT, OB and CTOB) will be reported.\n";
+    sleep (3);
+    $directional = 0;
+  }
+  else{
+    print "Library is assumed to be strand-specific (directional), alignments to strands complementary to the original top or bottom strands will be ignored (i.e. not performed!).\n";
+    sleep (3);
+    $directional = 1; # Changed this to being the default behaviour
+  }
+
+  ### UNMAPPED SEQUENCE OUTPUT
+  $unmapped = 0 unless ($unmapped);
+
+  ### AMBIGUOUS ALIGNMENT SEQUENCE OUTPUT
+  $multi_map = 0 unless ($multi_map);
+
+
+  ### OUTPUT DIRECTORY
+
+  chdir $parent_dir or die "Failed to move back to current working directory\n";
+  if ($output_dir){
+    unless ($output_dir =~ /\/$/){
+      $output_dir =~ s/$/\//;
+    }
+
+    if (chdir $output_dir){
+      $output_dir = getcwd; #  making the path absolute
+      unless ($output_dir =~ /\/$/){
+	$output_dir =~ s/$/\//;
+      }
+    }
+    else{
+      mkdir $output_dir or die "Unable to create directory $output_dir $!\n";
+      warn "Created output directory $output_dir!\n\n";
+      chdir $output_dir or die "Failed to move to $output_dir\n";
+      $output_dir = getcwd; #  making the path absolute
+      unless ($output_dir =~ /\/$/){
+	$output_dir =~ s/$/\//;
+      }
+    }
+    warn "Output will be written into the directory: $output_dir\n";
+  }
+  else{
+    $output_dir = '';
+  }
+
+  ### TEMPORARY DIRECTORY for C->T and G->A transcribed files
+
+  chdir $parent_dir or die "Failed to move back to current working directory\n";
+  if ($temp_dir){
+    warn "\nUsing temp directory: $temp_dir\n";
+    unless ($temp_dir =~ /\/$/){
+      $temp_dir =~ s/$/\//;
+    }
+
+    if (chdir $temp_dir){
+      $temp_dir = getcwd; #  making the path absolute
+      unless ($temp_dir =~ /\/$/){
+	$temp_dir =~ s/$/\//;
+      }
+    }
+    else{
+      mkdir $temp_dir or die "Unable to create directory $temp_dir $!\n";
+      warn "Created temporary directory $temp_dir!\n\n";
+      chdir $temp_dir or die "Failed to move to $temp_dir\n";
+      $temp_dir = getcwd; #  making the path absolute
+      unless ($temp_dir =~ /\/$/){
+	$temp_dir =~ s/$/\//;
+      }
+    }
+    warn "Temporary files will be written into the directory: $temp_dir\n";
+  }
+  else{
+    $temp_dir = '';
+  }
+
+
+  return ($genome_folder,$CT_index_basename,$GA_index_basename,$path_to_bowtie,$sequence_format,$bowtie_options,$directional,$unmapped,$multi_map,$phred64,$solexa,$output_dir,$bowtie2,$vanilla,$sam_no_hd,$skip,$qupto,$temp_dir);
+}
+
+
+
+sub generate_SAM_header{
+  print OUT "\@HD\tVN:1.0\tSO:unsorted\n";          # @HD = header, VN = version, SO = sort order
+  foreach my $chr (keys %chromosomes){
+    my $length = length ($chromosomes{$chr});
+    print OUT "\@SQ\tSN:$chr\tLN:$length\n";        # @SQ = sequence, SN = seq name, LN = length
+  }
+  print OUT "\@PG\tID:Bismark\tVN:$bismark_version\tCL:\"bismark $command_line\"\n";        # @PG = program, ID = unique identifier, PN = program name name, VN = program version
+}
+
+### I would like to thank the following individuals for their valuable contributions to the Bismark SAM output format:
+### O. Tam (Sep 2010), C. Whelan (2011), E. Vidal (2011), T. McBryan (2011), P. Hickey (2011)
+
+sub single_end_SAM_output{
+  my ($id,$actual_seq,$methylation_call_params,$qual) = @_;
+  my $strand            = $methylation_call_params->{$id}->{alignment_strand};
+  my $chr               = $methylation_call_params->{$id}->{chromosome};	
+  my $start             = $methylation_call_params->{$id}->{position};	
+  my $stop              = $methylation_call_params->{$id}->{end_position};	
+  my $ref_seq           = $methylation_call_params->{$id}->{unmodified_genomic_sequence};
+  my $methcall          = $methylation_call_params->{$id}->{methylation_call};
+  my $read_conversion   = $methylation_call_params->{$id}->{read_conversion};
+  my $genome_conversion = $methylation_call_params->{$id}->{genome_conversion};
+  my $number_of_mismatches = $methylation_call_params->{$id}->{number_of_mismatches};	
+  ### This is a description of the bitwise FLAG field which needs to be set for the SAM file taken from: "The SAM Format Specification (v1.4-r985), September 7, 2011"
+  ## FLAG: bitwise FLAG. Each bit is explained in the following table:
+  ## Bit    Description                                                Comment                                Value
+  ## 0x1    template has multiple segments in sequencing               0: single-end 1: paired end            value: 2**0 (  1)
+  ## 0x2    each segment properly aligned according to the aligner     true only for paired-end alignments    value: 2**1 (  2)
+  ## 0x4    segment unmapped                                           ---                                           ---
+  ## 0x8    next segment in the template unmapped                      ---                                           ---
+  ## 0x10   SEQ being reverse complemented                                                                    value: 2**4 ( 16)
+  ## 0x20   SEQ of the next segment in the template being reversed                                            value: 2**5 ( 32)
+  ## 0x40   the first segment in the template                          read 1                                 value: 2**6 ( 64)
+  ## 0x80   the last segment in the template                           read 2                                 value: 2**7 (128)
+  ## 0x100  secondary alignment                                        ---                                           ---
+  ## 0x200  not passing quality controls                               ---                                           ---
+  ## 0x400  PCR or optical duplicate                                   ---                                           ---
+
+  #####
+
+  my $flag;                                                           # FLAG variable used for SAM format.
+  if ($strand eq "+"){
+    if ($read_conversion eq 'CT' and $genome_conversion eq 'CT'){
+      $flag = 0;                                                      # 0 for "+" strand (OT)
+    }
+    elsif ($read_conversion eq 'GA' and $genome_conversion eq 'GA'){
+      $flag = 16;                                                     # 16 for "-" strand (CTOB, yields information for the original bottom strand)
+    }
+    else{
+      die "Unexpected strand and read/genome conversion: strand: $strand, read conversion: $read_conversion, genome_conversion: $genome_conversion\n\n";
+    }
+  }
+  elsif ($strand eq "-"){
+    if ($read_conversion eq 'CT' and $genome_conversion eq 'GA'){
+      $flag = 16;                                                     # 16 for "-" strand (OB)
+    }
+    elsif ($read_conversion eq 'GA' and $genome_conversion eq 'CT'){
+      $flag = 0;                                                      # 0 for "+" strand (CTOT, yields information for the original top strand)
+    }
+    else{
+      die "Unexpected strand and read/genome conversion: strand: $strand, read conversion: $read_conversion, genome_conversion: $genome_conversion\n\n";
+    }
+  }
+  else{
+    die "Unexpected strand information: $strand\n\n";
+  }
+
+  #####
+
+  my $mapq = 255;                                                     # Assume mapping quality is unavailable
+
+  #####
+
+  my $cigar;
+  if ($bowtie2){
+    $cigar = $methylation_call_params->{$id}->{CIGAR};                # Actual CIGAR string reported by Bowtie 2
+  }
+  else{
+    $cigar = length($actual_seq) . "M";                               # Bowtie 1 output does not contain indels (only matches and mismatches)
+  }
+
+  #####	
+
+  my $rnext = "*";                                                    # Paired-end variable
+
+  #####
+
+  my $pnext = 0;                                                      # Paired-end variable
+
+  #####
+
+  my $tlen = 0;                                                       # Paired-end variable
+
+  #####
+
+  if ($read_conversion eq 'CT'){
+    $ref_seq = substr($ref_seq, 0, length($ref_seq) - 2);    # Removes additional nucleotides from the 3' end. This only works for the original top or bottom strands
+  }
+  else{
+    $ref_seq = substr($ref_seq, 2, length($ref_seq) - 2);    # Removes additional nucleotides from the 5' end. This works for the complementary strands in non-directional libraries
+  }
+
+  if ($strand eq '-'){
+    $actual_seq = revcomp($actual_seq);                               # Sequence represented on the forward genomic strand
+    $ref_seq = revcomp($ref_seq);                                     # Required for comparison with actual sequence
+    $qual = reverse $qual;                                            # if the sequence was reverse-complemented the quality string needs to be reversed as well
+  }
+
+  #####
+
+  my $hemming_dist = hemming_dist($actual_seq,$ref_seq);              # Edit distance to the reference, i.e. minimal number of one-nucleotide edits needed to transform the read string
+                                                                      # into the reference string. hemming_dist()
+  if ($bowtie2){
+    $hemming_dist += $methylation_call_params->{$id}->{indels};       # Adding the number of inserted/deleted bases which we parsed while getting the genomic sequence
+  }
+
+  my $NM_tag = "NM:i:$hemming_dist";                                  # Optional tag NM: edit distance based on nucleotide differences
+
+  #####
+
+  my $XX_tag = make_mismatch_string($actual_seq, $ref_seq);           # Optional tag XX: string providing mismatched reference bases in the alignment (NO indel information!)
+
+  #####
+
+  my $XM_tag;                                                         # Optional tag XM: Methylation Call String
+  if ($strand eq '+'){
+    $XM_tag = "XM:Z:$methcall";
+  }
+  elsif ($strand eq '-'){
+    $XM_tag = 'XM:Z:'.reverse $methcall;                              # if the sequence was reverse-complemented the methylation call string needs to be reversed as well
+  }
+
+  #####
+
+  my $XR_tag = "XR:Z:$read_conversion";                               # Optional tag XR: Read Conversion
+
+  #####
+
+  my $XG_tag = "XG:Z:$genome_conversion";                             # Optional tag XG: Genome Conversion
+
+  #####
+
+  # SAM format: QNAME, FLAG, RNAME, 1-based POS, MAPQ, CIGAR, RNEXT, PNEXT, TLEN, SEQ, QUAL, optional fields
+  print OUT join("\t",($id,$flag,$chr,$start,$mapq,$cigar,$rnext,$pnext,$tlen,$actual_seq,$qual,$NM_tag,$XX_tag,$XM_tag,$XR_tag,$XG_tag)),"\n";
+}
+
+
+sub paired_end_SAM_output{
+  my ($id,$actual_seq_1,$actual_seq_2,$methylation_call_params,$qual_1,$qual_2) = @_;
+  my $strand_1                = $methylation_call_params->{$id}->{alignment_read_1}; # Bowtie 1 only reports the read 1 alignment strand
+  my $strand_2                = $methylation_call_params->{$id}->{alignment_read_2};
+  my $chr                     = $methylation_call_params->{$id}->{chromosome};	
+  my $ref_seq_1               = $methylation_call_params->{$id}->{unmodified_genomic_sequence_1};
+  my $ref_seq_2               = $methylation_call_params->{$id}->{unmodified_genomic_sequence_2};
+  my $methcall_1              = $methylation_call_params->{$id}->{methylation_call_1};
+  my $methcall_2              = $methylation_call_params->{$id}->{methylation_call_2};
+  my $read_conversion_1       = $methylation_call_params->{$id}->{read_conversion_1};
+  my $read_conversion_2       = $methylation_call_params->{$id}->{read_conversion_2};
+  my $genome_conversion       = $methylation_call_params->{$id}->{genome_conversion};
+  my $number_of_mismatches_1  = $methylation_call_params->{$id}->{number_of_mismatches_1}; # only needed for custom allele-specific output, not the default!
+  my $number_of_mismatches_2  = $methylation_call_params->{$id}->{number_of_mismatches_2};
+
+  my $id_1 = $id.'/1';
+  my $id_2 = $id.'/2';
+
+  # Allows all degenerate nucleotide sequences in reference genome
+  die "Reference sequence ($ref_seq_1) contains invalid nucleotides!\n" if $ref_seq_1 =~ /[^ACTGNRYMKSWBDHV]/i;
+  die "Reference sequence ($ref_seq_2) contains invalid nucleotides!\n" if $ref_seq_2 =~ /[^ACTGNRYMKSWBDHV]/i;
+
+  my $index; # used to store the srand origin of the alignment in a less convoluted way
+
+  if ($read_conversion_1 eq 'CT' and $genome_conversion eq 'CT'){
+    $index = 0; ## this is OT   (original top strand)
+  }	
+  elsif ($read_conversion_1 eq 'GA' and $genome_conversion eq 'GA'){
+    $index = 1; ## this is CTOB (complementary to OB)
+  }
+  elsif ($read_conversion_1 eq 'GA' and $genome_conversion eq 'CT'){
+    $index = 2; ## this is CTOT (complementary to OT)
+  }
+  elsif ($read_conversion_1 eq 'CT' and $genome_conversion eq 'GA'){
+    $index = 3; ## this is OB   (original bottom)
+  }
+  else {
+    die "Unexpected combination of read 1 and genome conversion: $read_conversion_1 / $genome_conversion\n";
+  }
+	
+  ### we need to remove 2 bp of the genomic sequence as we were extracting read + 2bp long fragments to make a methylation call at the
+  ### first or last position.
+
+  if ($index == 0 or $index == 3){ # OT or OB
+    $ref_seq_1 = substr($ref_seq_1,0,length($ref_seq_1)-2);
+    $ref_seq_2 = substr($ref_seq_2,2,length($ref_seq_2)-2);
+  }
+  else{ # CTOT or CTOB
+    $ref_seq_1 = substr($ref_seq_1,2,length($ref_seq_1)-2);
+    $ref_seq_2 = substr($ref_seq_2,0,length($ref_seq_2)-2);
+  }
+
+  #####
+
+  my $start_read_1;
+  my $start_read_2;
+  # adjusting end positions
+
+  if ($bowtie2){
+    $start_read_1 = $methylation_call_params->{$id}->{position_1};
+    $start_read_2 = $methylation_call_params->{$id}->{position_2};
+  }
+  else{ # Bowtie 1 output. $strand_1 stores the alignment of Read 1
+    if ($strand_1 eq '+'){ # Read 1 aligns to the + strand
+      $start_read_1 = $methylation_call_params->{$id}->{start_seq_1};
+      $start_read_2 = $methylation_call_params->{$id}->{alignment_end} - length ($actual_seq_2) + 1;
+    }
+    else{ # read 1 is on the - strand
+      $start_read_1 = $methylation_call_params->{$id}->{alignment_end} - length ($actual_seq_1) + 1;
+      $start_read_2 = $methylation_call_params->{$id}->{start_seq_1};
+    }
+  }
+
+  #####
+
+  my $end_read_1;
+  my $end_read_2;
+  # adjusting end positions
+
+  if ($bowtie2){
+    $end_read_1 = $methylation_call_params->{$id}->{end_position_1};
+    $end_read_2 = $methylation_call_params->{$id}->{end_position_2};
+  }
+  else{ # Bowtie 1 output. $strand_1 stores the alignment of Read 1
+    if ($strand_1 eq '+'){ # Read 1 aligns to the + strand
+      $end_read_1 = $methylation_call_params->{$id}->{start_seq_1} + length ($actual_seq_1)-1;
+      $end_read_2 = $methylation_call_params->{$id}->{alignment_end};
+      }
+    else{
+      $end_read_1 = $methylation_call_params->{$id}->{alignment_end};
+      $end_read_2 = $methylation_call_params->{$id}->{start_seq_1} + length ($actual_seq_2)-1;
+    }
+  }
+
+  #####
+
+  ### This is a description of the bitwise FLAG field which needs to be set for the SAM file taken from: "The SAM Format Specification (v1.4-r985), September 7, 2011"
+  ## FLAG: bitwise FLAG. Each bit is explained in the following table:
+  ## Bit    Description                                                Comment                                Value
+  ## 0x1    template having multiple segments in sequencing            0: single-end 1: paired end            value: 2^^0 (  1)
+  ## 0x2    each segment properly aligned according to the aligner     true only for paired-end alignments    value: 2^^1 (  2)
+  ## 0x4    segment unmapped                                           ---                                           ---
+  ## 0x8    next segment in the template unmapped                      ---                                           ---
+  ## 0x10   SEQ being reverse complemented                             - strand alignment                     value: 2^^4 ( 16)
+  ## 0x20   SEQ of the next segment in the template being reversed     + strand alignment                     value: 2^^5 ( 32)
+  ## 0x40   the first segment in the template                          read 1                                 value: 2^^6 ( 64)
+  ## 0x80   the last segment in the template                           read 2                                 value: 2^^7 (128)
+  ## 0x100  secondary alignment                                        ---                                           ---
+  ## 0x200  not passing quality controls                               ---                                           ---
+  ## 0x400  PCR or optical duplicate                                   ---                                           ---
+
+  ### As the FLAG value do not consider that there might be 4 different bisulfite strands of DNA, we are trying to make FLAG tags which take the strand identity into account
+
+  # strands OT and CTOT will be treated as aligning to the top strand (both sequences are scored as aligning to the top strand)
+  # strands OB and CTOB will be treated as aligning to the bottom strand (both sequences are scored as reverse complemented sequences)
+
+  my $flag_1;                                                          # FLAG variable used for SAM format
+  my $flag_2;
+
+  if ($index == 0){       # OT
+    $flag_1 = 67;                                                      # Read 1 is on the + strand  (1+2+64) (Read 2 is technically reverse-complemented, but we do not score it)
+    $flag_2 = 131;                                                     # Read 2 is on - strand but informative for the OT        (1+2+128)
+  }
+  elsif ($index == 1){    # CTOB
+    $flag_1 = 115;                                                     # Read 1 is on the + strand, we score for OB  (1+2+16+32+64)
+    $flag_2 = 179;                                                     # Read 2 is on the - strand  (1+2+16+32+128)
+  }
+  elsif ($index == 2){    # CTOT
+    $flag_1 = 67;                                                      # Read 1 is on the - strand (CTOT) strand, but we score it for OT (1+2+64)
+    $flag_2 = 131;                                                     # Read 2 is on the + strand, score it for OT (1+2+128)
+  }
+  elsif ($index == 3){    # OB
+    $flag_1 = 115;                                                     # Read 1 is on the - strand, we score for OB  (1+2+16+32+64)
+    $flag_2 = 179;                                                     # Read 2 is on the + strand  (1+2+16+32+128)
+  }
+
+  #####
+
+  my $mapq = 255;                                                      # Mapping quality is unavailable
+
+  #####
+
+  my $cigar_1;
+  my $cigar_2;
+
+  if ($bowtie2){
+    $cigar_1 = $methylation_call_params->{$id}->{CIGAR_1};             # Actual CIGAR string reported by Bowtie 2
+    $cigar_2 = $methylation_call_params->{$id}->{CIGAR_2};
+  }
+  else{
+    $cigar_1 = length($actual_seq_1) . "M";                            # Assume no indels for Bowtie 1  mapping (only matches and mismatches)
+    $cigar_2 = length($actual_seq_2) . "M";
+  }
+
+  #####
+
+  my $rnext = '=';                                                     # Chromosome of mate; applies to both reads
+
+  #####
+
+  my $pnext_1 = $start_read_2;                                         # Leftmost position of mate
+  my $pnext_2 = $start_read_1;
+
+  #####
+
+  my $tlen_1;                                                          # signed observed Template LENgth (or inferred fragment size)
+  my $tlen_2;
+
+  if ($bowtie2){
+
+    if ($start_read_1 <= $start_read_2){
+
+      # Read 1 alignment is leftmost
+
+      if ($end_read_2 >= $end_read_1){
+	
+	# ------------------------->     read 1   reads overlapping
+	#  <-------------------------    read 2
+	#
+	# or
+	#
+	# ------------------------->     read 1
+	#   <-----------------------     read 2   read 2 contained within read 1
+	#
+	# or
+	#
+	# ------------------------->     read 1   reads 1 and 2 exactly overlapping
+	# <-------------------------     read 2
+	#
+
+	# dovetailing of reads is not enabled for Bowtie 2 alignments
+
+	$tlen_1 = $end_read_2 - $start_read_1 + 1;                         # Leftmost read has a + sign,
+	$tlen_2 = $start_read_1 - $end_read_2 - 1;                         # Rightmost read has a - sign
+      }
+      elsif ($end_read_2 < $end_read_1){
+
+	# ------------------------->     read 1
+	#       <-----------             read 2   read 2 contained within read 1
+	#
+	# or
+	#
+	# ------------------------->     read 1
+	# <-----------                   read 2   read 2 contained within read 1
+
+	# start and end of read 2  are fully contained within read 1
+	$tlen_1 = 0;                                                       # Set as 0 when the information is unavailable
+	$tlen_2 = 0;                                                       # Set as 0 when the information is unavailable
+      }
+
+    }
+
+    elsif ($start_read_2 < $start_read_1){
+
+      if ($end_read_1 >= $end_read_2){
+
+      # Read 2 alignment is leftmost
+
+	# ------------------------->     read 2   reads overlapping
+	#  <-------------------------    read 1
+	#
+	# or
+	#
+	# ------------------------->     read 2
+	#   <-----------------------     read 1   read 1 contained within read 2
+	#
+	#
+
+	$tlen_2 = $end_read_1 - $start_read_2 + 1;                         # Leftmost read has a + sign,
+	$tlen_1 = $start_read_2 - $end_read_1 - 1;                         # Rightmost read has a - sign
+      }
+      elsif ($end_read_1 < $end_read_2){
+
+	# ------------------------->     read 2
+	#       <-----------             read 1   read 1 contained within read 2
+	#
+	# or
+	#
+	# ------------------------->     read 2
+	# <-----------                   read 1   read 1 contained within read 2
+	
+	# start and end of read 1  are fully contained within read 2
+	$tlen_1 = 0;                                                       # Set as 0 when the information is unavailable
+	$tlen_2 = 0;                                                       # Set as 0 when the information is unavailable
+      }
+    }
+  }
+
+  else{ # Bowtie 1
+
+    if ($end_read_2 >= $end_read_1){
+      # Read 1 alignment is leftmost
+      # ------------------------->  read 1
+      #  <------------------------- read 2
+      # this is the most extreme case for Bowtie 1 alignments, reads do not contain each other, also no dovetailing
+
+      $tlen_1 = $end_read_2 - $start_read_1 + 1;                         # Leftmost read has a + sign,
+      $tlen_2 = $start_read_1 - $end_read_2 - 1;                         # Rightmost read has a - sign
+    }
+    else{
+      # Read 2 alignment is leftmost
+      # ------------------------->  read 2
+      #  <------------------------- read 1
+      # this is the most extreme case for Bowtie 1 alignments, reads do not contain each other, also no dovetailing
+
+      $tlen_2 = $end_read_1 - $start_read_2 + 1;                         # Leftmost read has a + sign,
+      $tlen_1 = $start_read_2 - $end_read_1 - 1;                         # Rightmost read has a - sign
+    }
+  }
+
+  #####
+
+  # adjusting the strand of the sequence before we use them to generate mismatch strings
+  if ($strand_1 eq '-'){
+    $actual_seq_1 = revcomp($actual_seq_1);                            # Sequence represented on the forward genomic strand
+    $ref_seq_1 = revcomp($ref_seq_1);                                  # Required for comparison with actual sequence
+    $qual_1 = reverse $qual_1;                                         # we need to reverse the quality string as well
+  }
+  if ($strand_2 eq '-'){
+    $actual_seq_2 = revcomp($actual_seq_2);                            # Mate sequence represented on the forward genomic strand
+    $ref_seq_2 = revcomp($ref_seq_2);                                  # Required for comparison with actual sequence
+    $qual_2 = reverse $qual_2;                                         # If the sequence gets reverse complemented we reverse the quality string as well
+  }
+
+  #  print "$actual_seq_1\n$ref_seq_1\n\n";
+  #  print "$actual_seq_2\n$ref_seq_2\n\n";
+
+  #####
+
+  my $hemming_dist_1 = hemming_dist($actual_seq_1,$ref_seq_1);         # Minimal number of one-nucleotide edits needed to transform the read string into the reference sequence
+  my $hemming_dist_2 = hemming_dist($actual_seq_2,$ref_seq_2);
+  if ($bowtie2){
+    $hemming_dist_1 += $methylation_call_params->{$id}->{indels_1};    # Adding the number of inserted/deleted bases which we parsed while getting the genomic sequence
+    $hemming_dist_2 += $methylation_call_params->{$id}->{indels_2};    # Adding the number of inserted/deleted bases which we parsed while getting the genomic sequence
+  }
+  my $NM_tag_1 = "NM:i:$hemming_dist_1";                               # Optional tag NM: edit distance based on nucleotide differences
+  my $NM_tag_2 = "NM:i:$hemming_dist_2";                               # Optional tag NM: edit distance based on nucleotide differences
+
+  #####
+
+  my $XX_tag_1 = make_mismatch_string($actual_seq_1,$ref_seq_1);       # Optional tag XX: String providing mismatched reference bases in the alignment (NO indel information!)
+  my $XX_tag_2 = make_mismatch_string($actual_seq_2,$ref_seq_2);
+
+  #####
+
+  my $XM_tag_1;                                                        # Optional tag XM: Methylation call string
+  my $XM_tag_2;
+
+  if ($strand_1 eq '-'){
+    $XM_tag_1 = 'XM:Z:'.reverse $methcall_1;                           # Needs to be reversed if the sequence was reverse complemented
+  }
+  else{
+    $XM_tag_1 = "XM:Z:$methcall_1";
+  }
+
+  if ($strand_2 eq '-'){
+    $XM_tag_2 = 'XM:Z:'.reverse $methcall_2;                           # Needs to be reversed if the sequence was reverse complemented
+  }
+  else{
+    $XM_tag_2 = "XM:Z:$methcall_2";
+  }
+
+  #####
+
+  my $XR_tag_1 = "XR:Z:$read_conversion_1";                            # Optional tag XR: Read 1 conversion state
+  my $XR_tag_2 = "XR:Z:$read_conversion_2";                            # Optional tag XR: Read 2 conversion state
+
+  #####
+
+  my $XG_tag = "XG:Z:$genome_conversion";                              # Optional tag XG: Genome Conversion state; valid for both reads
+
+  #####
+
+  # SAM format: QNAME, FLAG, RNAME, 1-based POS, MAPQ, CIGAR, RNEXT, PNEXT, TLEN, SEQ, QUAL, optional fields
+  print OUT join("\t", ($id_1, $flag_1, $chr, $start_read_1, $mapq, $cigar_1, $rnext, $pnext_1, $tlen_1, $actual_seq_1, $qual_1, $NM_tag_1, $XX_tag_1, $XM_tag_1,$XR_tag_1,$XG_tag)), "\n";
+  print OUT join("\t", ($id_2, $flag_2, $chr, $start_read_2, $mapq, $cigar_2, $rnext, $pnext_2, $tlen_2, $actual_seq_2, $qual_2, $NM_tag_2, $XX_tag_2, $XM_tag_2,$XR_tag_2,$XG_tag)), "\n";
+}
+
+sub revcomp{
+  my $seq = shift or die "Missing seq to reverse complement\n";
+  $seq = reverse $seq;
+  $seq =~ tr/ACTGactg/TGACTGAC/;
+  return $seq;
+}
+
+sub hemming_dist{
+  my $matches = 0;
+  my @actual_seq = split //,(shift @_);
+  my @ref_seq = split //,(shift @_);
+  foreach (0..$#actual_seq){
+    ++$matches if ($actual_seq[$_] eq $ref_seq[$_]);
+  }
+  return my $hd = scalar @actual_seq - $matches;
+}
+
+sub make_mismatch_string{
+  my $actual_seq = shift or die "Missing actual sequence";
+  my $ref_seq = shift or die "Missing reference sequence";
+  my $XX_tag = "XX:Z:";
+  my $tmp = ($actual_seq ^ $ref_seq);                    # Bitwise comparison
+  my $prev_mm_pos = 0;
+  while($tmp =~ /[^\0]/g){                               # Where bitwise comparison showed a difference
+    my $nuc_match = pos($tmp) - $prev_mm_pos - 1;        # Generate number of nucleotide that matches since last mismatch
+    my $nuc_mm = substr($ref_seq, pos($tmp) - 1, 1) if pos($tmp) <= length($ref_seq);  # Obtain reference nucleotide that was different from the actual read
+    $XX_tag .= "$nuc_match" if $nuc_match > 0;           # Ignore if mismatches are adjacent to each other
+    $XX_tag .= "$nuc_mm" if defined $nuc_mm;             # Ignore if there is no mismatch (prevents uninitialized string concatenation)
+    $prev_mm_pos = pos($tmp);                            # Position of last mismatch
+  }
+  my $end_matches = length($ref_seq) - $prev_mm_pos;     # Provides number of matches from last mismatch till end of sequence
+  $XX_tag .= "$end_matches" if $end_matches > 0;         # Ignore if mismatch is at the end of sequence
+  return $XX_tag;
+}
+
+
+
+sub print_helpfile{
+  print << "HOW_TO";
+
+
+     This program is free software: you can redistribute it and/or modify
+     it under the terms of the GNU General Public License as published by
+     the Free Software Foundation, either version 3 of the License, or
+     (at your option) any later version.
+
+     This program is distributed in the hope that it will be useful,
+     but WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+     GNU General Public License for more details.
+     You should have received a copy of the GNU General Public License
+     along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+
+DESCRIPTION
+
+
+The following is a brief description of command line options and arguments to control the Bismark
+bisulfite mapper and methylation caller. Bismark takes in FastA or FastQ files and aligns the
+reads to a specified bisulfite genome. Sequence reads are transformed into a bisulfite converted forward strand
+version (C->T conversion) or into a bisulfite treated reverse strand (G->A conversion of the forward strand).
+Each of these reads are then aligned to bisulfite treated forward strand index of a reference genome
+(C->T converted) and a bisulfite treated reverse strand index of the genome (G->A conversion of the
+forward strand, by doing this alignments will produce the same positions). These 4 instances of Bowtie (1 or 2)
+are run in parallel. The sequence file(s) are then read in again sequence by sequence to pull out the original
+sequence from the genome and determine if there were any protected C's present or not.
+
+As of version 0.7.0 Bismark will only run 2 alignment threads for OT and OB in parallel, the 4 strand mode can be
+re-enabled by using --non_directional.
+
+The final output of Bismark is in SAM format by default. For Bowtie 1 one can alos choose to report the old
+'vanilla' output format, which is a single tab delimited file with all sequences that have a unique best
+alignment to any of the 4 possible strands of a bisulfite PCR product. Both formats are described in more detail below.
+
+
+USAGE: bismark [options] <genome_folder> {-1 <mates1> -2 <mates2> | <singles>}
+
+
+ARGUMENTS:
+
+<genome_folder>          The path to the folder containing the unmodified reference genome
+                         as well as the subfolders created by the Bismark_Genome_Preparation
+                         script (/Bisulfite_Genome/CT_conversion/ and /Bisulfite_Genome/GA_conversion/).
+                         Bismark expects one or more fastA files in this folder (file extension: .fa
+                         or .fasta). The path can be relative or absolute.
+
+-1 <mates1>              Comma-separated list of files containing the #1 mates (filename usually includes
+                         "_1"), e.g. flyA_1.fq,flyB_1.fq). Sequences specified with this option must
+                         correspond file-for-file and read-for-read with those specified in <mates2>.
+                         Reads may be a mix of different lengths. Bismark will produce one mapping result
+                         and one report file per paired-end input file pair.
+
+-2 <mates2>              Comma-separated list of files containing the #2 mates (filename usually includes
+                         "_2"), e.g. flyA_1.fq,flyB_1.fq). Sequences specified with this option must
+                         correspond file-for-file and read-for-read with those specified in <mates1>.
+                         Reads may be a mix of different lengths.
+
+<singles>                A comma- or space-separated list of files containing the reads to be aligned (e.g.
+                         lane1.fq,lane2.fq lane3.fq). Reads may be a mix of different lengths. Bismark will
+                         produce one mapping result and one report file per input file.
+
+
+OPTIONS:
+
+
+Input:
+
+-q/--fastq               The query input files (specified as <mate1>,<mate2> or <singles> are FASTQ
+                         files (usually having extension .fg or .fastq). This is the default. See also
+                         --solexa-quals.
+
+-f/--fasta               The query input files (specified as <mate1>,<mate2> or <singles> are FASTA
+                         files (usually havin extension .fa, .mfa, .fna or similar). All quality values
+                         are assumed to be 40 on the Phred scale.
+
+-s/--skip <int>          Skip (i.e. do not align) the first <int> reads or read pairs from the input.
+
+-u/--upto <int>          Only aligns the first <int> reads or read pairs from the input. Default: no limit.
+
+--phred33-quals          FASTQ qualities are ASCII chars equal to the Phred quality plus 33. Default: on.
+
+--phred64-quals          FASTQ qualities are ASCII chars equal to the Phred quality plus 64. Default: off.
+
+--solexa-quals           Convert FASTQ qualities from solexa-scaled (which can be negative) to phred-scaled
+                         (which can't). The formula for conversion is: 
+                         phred-qual = 10 * log(1 + 10 ** (solexa-qual/10.0)) / log(10). Used with -q. This
+                         is usually the right option for use with (unconverted) reads emitted by the GA
+                         Pipeline versions prior to 1.3. Works only for Bowtie 1. Default: off.
+
+--solexa1.3-quals        Same as --phred64-quals. This is usually the right option for use with (unconverted)
+                         reads emitted by GA Pipeline version 1.3 or later. Default: off.
+
+--path_to_bowtie         The full path </../../> to the Bowtie (1 or 2) installation on your system. If not
+                         specified it is assumed that Bowtie (1 or 2) is in the PATH.
+
+
+Alignment:
+
+-n/--seedmms <int>       The maximum number of mismatches permitted in the "seed", i.e. the first L base pairs
+                         of the read (where L is set with -l/--seedlen). This may be 0, 1, 2 or 3 and the 
+                         default is 1. This option is only available for Bowtie 1 (for Bowtie 2 see -N).
+
+-l/--seedlen             The "seed length"; i.e., the number of bases of the high quality end of the read to
+                         which the -n ceiling applies. The default is 28. Bowtie (and thus Bismark) is faster for
+                         larger values of -l. This option is only available for Bowtie 1 (for Bowtie 2 see -L).
+
+-e/--maqerr <int>        Maximum permitted total of quality values at all mismatched read positions throughout
+                         the entire alignment, not just in the "seed". The default is 70. Like Maq, bowtie rounds
+                         quality values to the nearest 10 and saturates at 30. This value is not relevant for
+                         Bowtie 2.
+
+--chunkmbs <int>         The number of megabytes of memory a given thread is given to store path descriptors in
+                         --best mode. Best-first search must keep track of many paths at once to ensure it is
+                         always extending the path with the lowest cumulative cost. Bowtie tries to minimize the
+                         memory impact of the descriptors, but they can still grow very large in some cases. If
+                         you receive an error message saying that chunk memory has been exhausted in --best mode,
+                         try adjusting this parameter up to dedicate more memory to the descriptors. This value
+                         is not relevant for Bowtie 2. Default: 512.
+
+-I/--minins <int>        The minimum insert size for valid paired-end alignments. E.g. if -I 60 is specified and
+                         a paired-end alignment consists of two 20-bp alignments in the appropriate orientation
+                         with a 20-bp gap between them, that alignment is considered valid (as long as -X is also
+                         satisfied). A 19-bp gap would not be valid in that case. Default: 0.
+
+-X/--maxins <int>        The maximum insert size for valid paired-end alignments. E.g. if -X 100 is specified and
+                         a paired-end alignment consists of two 20-bp alignments in the proper orientation with a
+                         60-bp gap between them, that alignment is considered valid (as long as -I is also satisfied).
+                         A 61-bp gap would not be valid in that case. Default: 500.
+
+
+Bowtie 1 Reporting:
+
+-k <2>                   Due to the way Bismark works Bowtie will report up to 2 valid alignments. This option
+                         will be used by default.
+
+--best                   Make Bowtie guarantee that reported singleton alignments are "best" in terms of stratum
+                         (i.e. number of mismatches, or mismatches in the seed in the case if -n mode) and in
+                         terms of the quality; e.g. a 1-mismatch alignment where the mismatch position has Phred
+                         quality 40 is preferred over a 2-mismatch alignment where the mismatched positions both
+                         have Phred quality 10. When --best is not specified, Bowtie may report alignments that
+                         are sub-optimal in terms of stratum and/or quality (though an effort is made to report
+                         the best alignment). --best mode also removes all strand bias. Note that --best does not
+                         affect which alignments are considered "valid" by Bowtie, only which valid alignments
+                         are reported by Bowtie. Bowtie is about 1-2.5 times slower when --best is specified.
+                         Default: on.
+
+--no_best                Disables the --best option which is on by default. This can speed up the alignment process,
+                         e.g. for testing purposes, but for credible results it is not recommended to disable --best.
+
+
+Output:
+
+--non_directional        The sequencing library was constructed in a non strand-specific manner, alignments to all four
+                         bisulfite strands will be reported. Default: OFF.
+
+                         (The current Illumina protocol for BS-Seq is directional, in which case the strands complementary
+                         to the original strands are merely theoretical and should not exist in reality. Specifying directional
+                         alignments (which is the default) will only run 2 alignment threads to the original top (OT)
+                         or bottom (OB) strands in parallel and report these alignments. This is the recommended option
+                         for sprand-specific libraries).
+
+--sam-no-hd              Suppress SAM header lines (starting with @). This might be useful when very large input files are
+                         split up into several smaller files to run concurrently and the output files are to be merged.
+
+--quiet                  Print nothing besides alignments.
+
+--vanilla                Performs bisulfite mapping with Bowtie 1 and prints the 'old' output (as in Bismark 0.5.X) instead
+                         of SAM format output.
+
+-un/--unmapped           Write all reads that could not be aligned to a file in the output directory. Written reads will
+                         appear as they did in the input, without any translation of quality values that may have
+                         taken place within Bowtie or Bismark. Paired-end reads will be written to two parallel files with _1
+                         and _2 inserted in their filenames, i.e. _unmapped_reads_1.txt and unmapped_reads_2.txt. Reads
+                         with more than one valid alignment with the same number of lowest mismatches (ambiguous mapping)
+                         are also written to _unmapped_reads.txt unless the option --ambiguous is specified as well.
+
+--ambiguous              Write all reads which produce more than one valid alignment with the same number of lowest
+                         mismatches or other reads that fail to align uniquely to a file in the output directory.
+                         Written reads will appear as they did in the input, without any of the translation of quality
+                         values that may have taken place within Bowtie or Bismark. Paired-end reads will be written to two
+                         parallel files with _1 and _2 inserted in theit filenames, i.e. _ambiguous_reads_1.txt and
+                         _ambiguous_reads_2.txt. These reads are not written to the file specified with --un.
+
+-o/--output_dir <dir>    Write all output files into this directory. By default the output files will be written into
+                         the same folder as the input file(s). If the specified folder does not exist, Bismark will attempt
+                         to create it first. The path to the output folder can be either relative or absolute.
+
+--temp_dir <dir>         Write temporary files to this directory instead of into the same directory as the input files. If
+                         the specified folder does not exist, Bismark will attempt to create it first. The path to the
+                         temporary folder can be either relative or absolute.
+
+
+
+Other:
+
+-h/--help                Displays this help file.
+
+-v/--version             Displays version information.
+
+
+BOWTIE 2 SPECIFIC OPTIONS
+
+--bowtie2                Uses Bowtie 2 instead of Bowtie 1. Bismark limits Bowtie 2 to only perform end-to-end
+                         alignments, i.e. searches for alignments involving all read characters (also called 
+                         untrimmed or unclipped alignments). Bismark assumes that raw sequence data is adapter
+                         and/or quality trimmed where appropriate. Default: off.
+
+Bowtie 2 alignment options:
+
+-N <int>                 Sets the number of mismatches to allowed in a seed alignment during multiseed alignment.
+                         Can be set to 0 or 1. Setting this higher makes alignment slower (often much slower)
+                         but increases sensitivity. Default: 0. This option is only available for Bowtie 2 (for
+                         Bowtie 1 see -n).
+
+-L <int>                 Sets the length of the seed substrings to align during multiseed alignment. Smaller values
+                         make alignment slower but more senstive. Default: the --sensitive preset of Bowtie 2 is
+                         used by default, which sets -L to 20. This option is only available for Bowtie 2 (for
+                         Bowtie 1 see -l).
+
+--ignore-quals           When calculating a mismatch penalty, always consider the quality value at the mismatched
+                         position to be the highest possible, regardless of the actual value. I.e. input is treated
+                         as though all quality values are high. This is also the default behavior when the input
+                         doesn't specify quality values (e.g. in -f mode). This option is invariable and on by default.
+
+
+Bowtie 2 paired-end options:
+
+--no-mixed               This option disables Bowtie 2's behavior to try to find alignments for the individual mates if
+                         it cannot find a concordant or discordant alignment for a pair. This option is invariable and
+                         and on by default.
+
+--no-discordant          Normally, Bowtie 2 looks for discordant alignments if it cannot find any concordant alignments.
+                         A discordant alignment is an alignment where both mates align uniquely, but that does not
+                         satisfy the paired-end constraints (--fr/--rf/--ff, -I, -X). This option disables that behavior
+                         and it is on by default.
+
+
+Bowtie 2 effort options:
+
+-D <int>                 Up to <int> consecutive seed extension attempts can "fail" before Bowtie 2 moves on, using
+                         the alignments found so far. A seed extension "fails" if it does not yield a new best or a
+                         new second-best alignment. Default: 15.
+
+-R <int>                 <int> is the maximum number of times Bowtie 2 will "re-seed" reads with repetitive seeds.
+                         When "re-seeding," Bowtie 2 simply chooses a new set of reads (same length, same number of
+                         mismatches allowed) at different offsets and searches for more alignments. A read is considered
+                         to have repetitive seeds if the total number of seed hits divided by the number of seeds
+                         that aligned at least once is greater than 300. Default: 2.
+
+Bowtie 2 parallelization options:
+
+
+-p NTHREADS              Launch NTHREADS parallel search threads (default: 1). Threads will run on separate processors/cores
+                         and synchronize when parsing reads and outputting alignments. Searching for alignments is highly
+                         parallel, and speedup is close to linear. Increasing -p increases Bowtie 2's memory footprint.
+                         E.g. when aligning to a human genome index, increasing -p from 1 to 8 increases the memory footprint
+                         by a few hundred megabytes. This option is only available if bowtie is linked with the pthreads
+                         library (i.e. if BOWTIE_PTHREADS=0 is not specified at build time). In addition, this option will
+                         automatically use the option '--reorder', which guarantees that output SAM records are printed in
+                         an order corresponding to the order of the reads in the original input file, even when -p is set
+                         greater than 1 (Bismark requires the Bowtie 2 output to be this way). Specifying --reorder and
+                         setting -p greater than 1 causes Bowtie 2 to run somewhat slower and use somewhat more memory then
+                         if --reorder were not specified. Has no effect if -p is set to 1, since output order will naturally
+                         correspond to input order in that case.
+
+Bowtie 2 Scoring options:
+
+--score_min <func>       Sets a function governing the minimum alignment score needed for an alignment to be considered
+                         "valid" (i.e. good enough to report). This is a function of read length. For instance, specifying
+                         L,0,-0.2 sets the minimum-score function f to f(x) = 0 + -0.2 * x, where x is the read length.
+                         See also: setting function options at http://bowtie-bio.sourceforge.net/bowtie2. The default is
+                         L,0,-0.2.
+
+--rdg <int1>,<int2>      Sets the read gap open (<int1>) and extend (<int2>) penalties. A read gap of length N gets a penalty
+                         of <int1> + N * <int2>. Default: 5, 3.
+
+--rfg <int1>,<int2>      Sets the reference gap open (<int1>) and extend (<int2>) penalties. A reference gap of length N gets
+                         a penalty of <int1> + N * <int2>. Default: 5, 3.
+
+
+Bowtie 2 Reporting options:
+
+-most_valid_alignments <int> This used to be the Bowtie 2 parameter -M. As of Bowtie 2 version 2.0.0 beta7 the option -M is
+                         deprecated. It will be removed in subsequent versions. What used to be called -M mode is still the
+                         default mode, but adjusting the -M setting is deprecated.  Use the -D and -R options to adjust the
+                         effort expended to find valid alignments.
+
+                         For reference, this used to be the old (now deprecated) description of -M:
+                         Bowtie 2 searches for at most <int>+1 distinct, valid alignments for each read. The search terminates when it
+                         can't find more distinct valid alignments, or when it finds <int>+1 distinct alignments, whichever
+                         happens first. Only the best alignment is reported. Information from the other alignments is used to
+                         estimate mapping quality and to set SAM optional fields, such as AS:i and XS:i. Increasing -M makes 
+                         Bowtie 2 slower, but increases the likelihood that it will pick the correct alignment for a read that
+                         aligns many places. For reads that have more than <int>+1 distinct, valid alignments, Bowtie 2 does not
+                         guarantee that the alignment reported is the best possible in terms of alignment score. -M is
+                         always used and its default value is set to 10.
+
+
+'VANILLA' Bismark  OUTPUT:
+
+Single-end output format (tab-separated):
+
+ (1) <seq-ID>
+ (2) <read alignment strand>
+ (3) <chromosome>
+ (4) <start position>
+ (5) <end position>
+ (6) <observed bisulfite sequence>
+ (7) <equivalent genomic sequence>
+ (8) <methylation call>
+ (9) <read conversion
+(10) <genome conversion>
+(11) <read quality score (Phred33)>
+
+
+Paired-end output format (tab-separated):
+ (1) <seq-ID>
+ (2) <read 1 alignment strand>
+ (3) <chromosome>
+ (4) <start position>
+ (5) <end position>
+ (6) <observed bisulfite sequence 1>
+ (7) <equivalent genomic sequence 1>
+ (8) <methylation call 1>
+ (9) <observed bisulfite sequence 2>
+(10) <equivalent genomic sequence 2>
+(11) <methylation call 2>
+(12) <read 1 conversion
+(13) <genome conversion>
+(14) <read 1 quality score (Phred33)>
+(15) <read 2 quality score (Phred33)>
+
+
+Bismark SAM OUTPUT (default):
+
+ (1) QNAME  (seq-ID)
+ (2) FLAG   (this flag tries to take the strand a bisulfite read originated from into account (this is different from ordinary DNA alignment flags!))
+ (3) RNAME  (chromosome)
+ (4) POS    (start position)
+ (5) MAPQ   (always 255)
+ (6) CIGAR
+ (7) RNEXT
+ (8) PNEXT
+ (9) TLEN
+(10) SEQ
+(11) QUAL   (Phred33 scale)
+(12) NM-tag (edit distance to the reference)
+(13) XX-tag (base-by-base mismatches to the reference. This does not include indels)
+(14) XM-tag (methylation call string)
+(15) XR-tag (read conversion state for the alignment)
+(16) XG-tag (genome conversion state for the alignment)
+
+Each read of paired-end alignments is written out in a separate line in the above format.
+
+
+This script was last edited on 21 Aug 2012.
+
+HOW_TO
+}