Mercurial > repos > bgruening > trim_galore

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Sat Jul 06 09:52:23 2013 -0400
@@ -0,0 +1,43 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="cutadapt" version="1.1">
+        <install version="1.0">
+            <actions>
+                <action type="download_by_url">http://cutadapt.googlecode.com/files/cutadapt-1.1.tar.gz</action>
+                <action type="move_directory_files">
+                    <source_directory>bin</source_directory>
+                    <destination_directory>$INSTALL_DIR/bin</destination_directory>
+                </action>
+                <action type="move_directory_files">
+                    <source_directory>cutadapt</source_directory>
+                    <destination_directory>$INSTALL_DIR/cutadapt</destination_directory>
+                </action>
+                <action type="set_environment">
+                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>
+        </readme>
+    </package>
+    <package name="fastqc" version="0.10.1">
+        <install version="1.0">
+            <actions>
+                <action type="download_by_url">http://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.10.1.zip</action>
+                <action type="move_directory_files">
+                    <source_directory>../FastQC/</source_directory>
+                    <destination_directory>$INSTALL_DIR/FastQC</destination_directory>
+                </action>
+                <action type="set_environment">
+                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/FastQC</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>
+        FastQC needs a java Runtime Environment.
+        </readme>
+    </package>
+</tool_dependency>
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/trim_galore	Sat Jul 06 09:52:23 2013 -0400
@@ -0,0 +1,1159 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+use Getopt::Long;
+use IPC::Open3;
+use File::Spec;
+use File::Basename;
+use Cwd;
+
+## This program is Copyright (C) 2012, Felix Krueger (felix.krueger@babraham.ac.uk)
+
+## This program is free software: you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation, either version 3 of the License, or
+## (at your option) any later version.
+
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+
+## You should have received a copy of the GNU General Public License
+## along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+
+## this script is taking in FastQ sequences and trims them with Cutadapt
+## last modified on 18 10 2012
+
+########################################################################
+
+# change these paths if needed
+
+my $path_to_cutadapt = 'cutadapt';
+my $path_to_fastqc = 'fastqc';
+
+########################################################################
+
+
+my $trimmer_version = '0.2.5';
+my $DOWARN = 1; # print on screen warning and text by default
+BEGIN { $SIG{'__WARN__'} = sub { warn $_[0] if $DOWARN } };
+
+my ($cutoff,$adapter,$stringency,$rrbs,$length_cutoff,$keep,$fastqc,$non_directional,$phred_encoding,$fastqc_args,$trim,$gzip,$validate,$retain,$length_read_1,$length_read_2,$a2,$error_rate,$output_dir,$no_report_file) = process_commandline();
+
+### SETTING DEFAULTS UNLESS THEY WERE SPECIFIED
+unless (defined $cutoff){
+  $cutoff = 20;
+}
+my $phred_score_cutoff = $cutoff; # only relevant for report
+
+unless (defined $adapter){
+  $adapter = 'AGATCGGAAGAGC';
+}
+unless (defined $a2){ # optional adapter for the second read in a pair. Only works for --paired trimming
+  $a2 = '';
+}
+
+unless (defined $stringency){
+  $stringency = 1;
+}
+
+unless (defined $length_cutoff){
+  $length_cutoff = 20;
+}
+
+if ($phred_encoding == 64){
+  $cutoff += 31;
+}
+
+my @filenames = @ARGV;
+
+my $file_1;
+my $file_2;
+
+foreach my $filename (@ARGV){
+  trim ($filename);
+}
+
+
+sub trim{
+  my $filename = shift;
+
+  my $output_filename = (split (/\//,$filename))[-1];
+  # warn "Here is the outputfile name: $output_filename\n";
+
+  my $report = $output_filename;
+  $report =~ s/$/_trimming_report.txt/;
+
+  if ($no_report_file) {
+    $report = File::Spec->devnull;
+    open (REPORT,'>',$report) or die "Failed to write to file: $!\n";
+    # warn "Redirecting report output to /dev/null\n";
+  }
+  else{
+    open (REPORT,'>',$output_dir.$report) or die "Failed to write to file: $!\n";
+    warn "Writing report to '$output_dir$report'\n";
+  }
+
+  warn "\nSUMMARISING RUN PARAMETERS\n==========================\nInput filename: $filename\n";
+  print REPORT "\nSUMMARISING RUN PARAMETERS\n==========================\nInput filename: $filename\n";
+
+  warn "Quality Phred score cutoff: $phred_score_cutoff\n";
+  print REPORT "Quality Phred score cutoff: $phred_score_cutoff\n";
+
+  warn "Quality encoding type selected: ASCII+$phred_encoding\n";
+  print REPORT "Quality encoding type selected: ASCII+$phred_encoding\n";
+
+  warn "Adapter sequence: '$adapter'\n";
+  print REPORT "Adapter sequence: '$adapter'\n";
+
+  if ($error_rate == 0.1){
+    warn "Maximum trimming error rate: $error_rate (default)\n";
+  }
+  else{
+    warn "Maximum trimming error rate: $error_rate\n";
+  }
+
+  print REPORT "Maximum trimming error rate: $error_rate";
+  if ($error_rate == 0.1){
+    print REPORT " (default)\n";
+  }
+  else{
+    print REPORT "\n";
+  }
+
+  if ($a2){
+    warn "Optional adapter 2 sequence (only used for read 2 of paired-end files): '$a2'\n";
+    print REPORT "Optional adapter 2 sequence (only used for read 2 of paired-end files): '$a2'\n";
+  }
+
+  warn "Minimum required adapter overlap (stringency): $stringency bp\n";
+  print REPORT "Minimum required adapter overlap (stringency): $stringency bp\n";
+
+  if ($validate){
+    warn "Minimum required sequence length for both reads before a sequence pair gets removed: $length_cutoff bp\n";
+    print REPORT "Minimum required sequence length for both reads before a sequence pair gets removed: $length_cutoff bp\n";
+  }
+  else{
+    warn "Minimum required sequence length before a sequence gets removed: $length_cutoff bp\n";
+    print REPORT "Minimum required sequence length before a sequence gets removed: $length_cutoff bp\n";
+  }
+
+  if ($validate){ # only for paired-end files
+
+    if ($retain){ # keeping single-end reads if only one end is long enough
+
+      if ($length_read_1 == 35){
+	warn "Length cut-off for read 1: $length_read_1 bp (default)\n";
+	print REPORT "Length cut-off for read 1: $length_read_1 bp (default)\n";
+      }
+      else{
+	warn "Length cut-off for read 1: $length_read_1 bp\n";
+	print REPORT "Length cut-off for read 1: $length_read_1 bp\n";
+      }
+
+      if ($length_read_2 == 35){
+	warn "Length cut-off for read 2: $length_read_2 b (default)\n";
+	print REPORT "Length cut-off for read 2: $length_read_2 bp (default)\n";
+      }
+      else{
+	warn "Length cut-off for read 2: $length_read_2 bp\n";
+	print REPORT "Length cut-off for read 2: $length_read_2 bp\n";
+      }
+    }
+  }
+
+  if ($rrbs){
+    warn "File was specified to be an MspI-digested RRBS sample. Sequences with adapter contamination will be trimmed a further 2 bp to remove potential methylation-biased bases from the end-repair reaction\n";
+    print REPORT "File was specified to be an MspI-digested RRBS sample. Sequences with adapter contamination will be trimmed a further 2 bp to remove potential methylation-biased bases from the end-repair reaction\n";
+  }
+
+  if ($non_directional){
+    warn "File was specified to be a non-directional MspI-digested RRBS sample. Sequences starting with either 'CAA' or 'CGA' will have the first 2 bp trimmed off to remove potential methylation-biased bases from the end-repair reaction\n";
+    print REPORT "File was specified to be a non-directional MspI-digested RRBS sample. Sequences starting with either 'CAA' or 'CGA' will have the first 2 bp trimmed off to remove potential methylation-biased bases from the end-repair reaction\n";
+  }
+
+  if ($trim){
+    warn "All sequences will be trimmed by 1 bp on their 3' end to avoid problems with invalid paired-end alignments with Bowtie 1\n";
+    print REPORT "All sequences will be trimmed by 1 bp on their 3' end to avoid problems with invalid paired-end alignments with Bowtie 1\n";
+  }
+
+  if ($fastqc){
+    warn "Running FastQC on the data once trimming has completed\n";
+    print REPORT "Running FastQC on the data once trimming has completed\n";
+
+    if ($fastqc_args){
+      warn "Running FastQC with the following extra arguments: '$fastqc_args'\n";
+      print REPORT  "Running FastQC with the following extra arguments: $fastqc_args\n";
+    }
+  }
+
+  if ($keep and $rrbs){
+    warn "Keeping quality trimmed (but not yet adapter trimmed) intermediate FastQ file\n";
+    print REPORT "Keeping quality trimmed (but not yet adapter trimmed) intermediate FastQ file\n";
+  }
+
+  if ($gzip or $filename =~ /\.gz$/){
+    warn "Output file will be GZIP compressed\n";
+    print REPORT "Output file will be GZIP compressed\n";
+  }
+
+  warn "\n";
+  print REPORT "\n";
+  sleep (3);
+
+  my $temp;
+
+  ### Proceeding differently for RRBS and other type of libraries
+  if ($rrbs){
+
+    ### Skipping quality filtering for RRBS libraries if a quality cutoff of 0 was specified
+    if ($cutoff == 0){
+      warn "Quality cutoff selected was 0    -    Skipping quality trimming altogether\n\n";
+      sleep (3);
+    }
+    else{
+
+      $temp = $filename;
+      $temp =~ s/$/_qual_trimmed.fastq/;
+      open (TEMP,'>',$output_dir.$temp) or die "Can't write to $temp: $!";
+
+      warn "  >>> Now performing adaptive quality trimming with a Phred-score cutoff of: $cutoff <<<\n\n";
+      sleep (3);
+
+      open (QUAL,"$path_to_cutadapt -f fastq -e $error_rate -q $cutoff -a X $filename |") or die "Can't open pipe to Cutadapt: $!";
+
+      my $qual_count = 0;
+
+      while (1){
+	my $l1 = <QUAL>;
+	my $seq = <QUAL>;
+	my $l3 = <QUAL>;
+	my $qual = <QUAL>;
+	last unless (defined $qual);
+
+	$qual_count++;
+	if ($qual_count%10000000 == 0){
+	  warn "$qual_count sequences processed\n";
+	}
+	print TEMP "$l1$seq$l3$qual";
+      }
+
+      warn "\n  >>> Quality trimming completed <<<\n$qual_count sequences processed in total\n\n";
+      close QUAL or die "Unable to close filehandle: $!\n";
+      sleep (3);
+
+    }
+  }
+
+
+  if ($output_filename =~ /\.fastq$/){
+    $output_filename =~ s/\.fastq$/_trimmed.fq/;
+  }
+  elsif ($output_filename =~ /\.fastq\.gz$/){
+    $output_filename =~ s/\.fastq\.gz$/_trimmed.fq/;
+  }
+  elsif ($output_filename =~ /\.fq$/){
+    $output_filename =~ s/\.fq$/_trimmed.fq/;
+  }
+  elsif ($output_filename =~ /\.fq\.gz$/){
+    $output_filename =~ s/\.fq\.gz$/_trimmed.fq/;
+  }
+  else{
+    $output_filename =~ s/$/_trimmed.fq/;
+  }
+
+  warn "Writing final adapter and quality trimmed output to $output_filename\n\n";
+  open (OUT,'>',$output_dir.$output_filename) or die "Can't open $output_filename: $!\n";
+  sleep (2);
+
+  my $count = 0;
+  my $too_short = 0;
+  my $quality_trimmed = 0;
+  my $rrbs_trimmed = 0;
+  my $rrbs_trimmed_start = 0;
+  my $CAA = 0;
+  my $CGA = 0;
+
+  if ($rrbs and $cutoff != 0){
+
+    ### optionally using 2 different adapters for read 1 and read 2
+    if ($validate and $a2){
+      ### Figure out whether current file counts as read 1 or read 2 of paired-end files
+      if ( scalar(@filenames)%2 == 0){ # this is read 1 of a pair
+	warn "\n  >>> Now performing adapter trimming for the adapter sequence: '$adapter' from file $temp <<< \n";
+	sleep (3);
+	open3 (\*WRITER, \*TRIM, \*ERROR,"$path_to_cutadapt -f fastq -e $error_rate -O $stringency -a $adapter $output_dir$temp") or die "Failed to launch Cutadapt: $!\n";
+      }
+      else{                            # this is read 2 of a pair
+	warn "\n  >>> Now performing adapter trimming for the adapter sequence: '$a2' from file $temp <<< \n";
+	sleep (3);
+    	open3 (\*WRITER, \*TRIM, \*ERROR,"$path_to_cutadapt -f fastq -e $error_rate -O $stringency -a $a2 $output_dir$temp") or die "Failed to launch Cutadapt: $!\n";
+      }
+    }
+    ### Using the same adapter for both read 1 and read 2
+    else{
+      warn "\n  >>> Now performing adapter trimming for the adapter sequence: '$adapter' from file $temp <<< \n";
+      sleep (3);
+      open3 (\*WRITER, \*TRIM, \*ERROR,"$path_to_cutadapt -f fastq -e $error_rate -O $stringency -a $adapter $output_dir$temp") or die "Failed to launch Cutadapt: $!\n";
+    }
+
+    close WRITER or die $!; # not needed
+
+    open (QUAL,"$output_dir$temp") or die $!; # quality trimmed file
+
+    if ($filename =~ /\.gz$/){
+      open (IN,"zcat $filename |") or die $!; # original, untrimmed file
+    }
+    else{
+      open (IN,$filename) or die $!; # original, untrimmed file
+    }
+
+    while (1){
+
+      # we can process the output from Cutadapt and the original input 1 by 1 to decide if the adapter has been removed or not
+      my $l1 = <TRIM>;
+      my $seq = <TRIM>; # adapter trimmed sequence
+      my $l3 = <TRIM>;
+      my $qual = <TRIM>;
+
+      $_ = <IN>;   # irrelevant
+      my $original_seq = <IN>;
+      $_ = <IN>;   # irrelevant
+      $_ = <IN>;   # irrelevant
+
+      $_ = <QUAL>; # irrelevant
+      my $qual_trimmed_seq = <QUAL>;
+      $_ = <QUAL>; # irrelevant
+      my $qual_trimmed_qual = <QUAL>;
+
+      last unless (defined $qual and defined $qual_trimmed_qual); # could be empty strings
+
+      $count++;
+      if ($count%10000000 == 0){
+	warn "$count sequences processed\n";
+      }
+
+      chomp $seq;
+      chomp $qual;
+      chomp $qual_trimmed_seq;
+      chomp $original_seq;
+
+      my $quality_trimmed_seq_length = length $qual_trimmed_seq;
+
+      if (length $original_seq > length $qual_trimmed_seq){
+	++$quality_trimmed;
+      }
+
+      my $nd = 0;
+
+      ### NON-DIRECTIONAL RRBS
+      if ($non_directional){
+	if (length$seq > 2){
+	  if ($seq =~ /^CAA/){
+	    ++$CAA;
+	    $seq = substr ($seq,2,length($seq)-2);
+	    $qual = substr ($qual,2,length($qual)-2);
+	    ++$rrbs_trimmed_start;
+	    $nd = 1;
+	  }
+	  elsif ($seq =~ /^CGA/){
+	    $seq = substr ($seq,2,length($seq)-2);
+	    $qual = substr ($qual,2,length($qual)-2);
+	    ++$CGA;
+	    ++$rrbs_trimmed_start;
+	    $nd = 1;
+	  }
+	}
+      }
+
+      ### directional read
+      unless ($nd == 1){
+	if (length $seq >= 2 and length$seq < $quality_trimmed_seq_length){
+	  $seq = substr ($seq,0,length($seq)-2);
+	  $qual = substr ($qual,0,length($qual)-2);
+	  ++$rrbs_trimmed;
+	}
+      }
+
+      ### Shortening all sequences by 1 bp on the 3' end
+      if ($trim){
+	$seq = substr($seq,0,length($seq)-1);
+	$qual = substr($qual,0,length($qual)-1);
+      }
+
+      ### PRINTING (POTENTIALLY TRIMMED) SEQUENCE
+      if ($validate){ # printing the sequence without performing a length check (this is performed for the read pair separately later)
+	print OUT "$l1$seq\n$l3$qual\n";
+      }
+      else{ # single end
+	if (length $seq < $length_cutoff){
+	  ++$too_short;
+	  next;
+	}
+	else{
+	  print OUT "$l1$seq\n$l3$qual\n";
+	}
+      }
+    }
+
+    print REPORT "\n";
+    while (<ERROR>){
+      warn $_;
+      print REPORT $_;
+    }
+
+    close IN or die "Unable to close IN filehandle: $!";
+    close QUAL or die "Unable to close QUAL filehandle: $!";
+    close TRIM or die "Unable to close TRIM filehandle: $!";
+    close OUT or die  "Unable to close OUT filehandle: $!";
+  }
+  else{
+
+    ### optionally using 2 different adapters for read 1 and read 2
+    if ($validate and $a2){
+      ### Figure out whether current file counts as read 1 or read 2 of paired-end files
+      if ( scalar(@filenames)%2 == 0){ # this is read 1 of a pair
+	warn "\n  >>> Now performing quality (cutoff $cutoff) and adapter trimming in a single pass for the adapter sequence: '$adapter' from file $filename <<< \n";
+	sleep (3);
+	open3 (\*WRITER, \*TRIM, \*ERROR, "$path_to_cutadapt -f fastq -e $error_rate -q $cutoff -O $stringency -a $adapter $filename") or die "Failed to launch Cutadapt: $!";
+      }
+      else{                            # this is read 2 of a pair
+	warn "\n  >>> Now performing quality (cutoff $cutoff) and adapter trimming in a single pass for the adapter sequence: '$a2' from file $filename <<< \n";
+	sleep (3);
+	open3 (\*WRITER, \*TRIM, \*ERROR, "$path_to_cutadapt -f fastq -e $error_rate -q $cutoff -O $stringency -a $a2 $filename") or die "Failed to launch Cutadapt: $!";
+      }
+    }
+    ### Using the same adapter for both read 1 and read 2
+    else{
+      warn "\n  >>> Now performing quality (cutoff $cutoff) and adapter trimming in a single pass for the adapter sequence: '$adapter' from file $filename <<< \n";
+      sleep (3);
+      open3 (\*WRITER, \*TRIM, \*ERROR, "$path_to_cutadapt -f fastq -e $error_rate -q $cutoff -O $stringency -a $adapter $filename") or die "Failed to launch Cutadapt: $!";
+    }
+
+    close WRITER or die $!; # not needed
+
+    while (1){
+
+      my $l1 = <TRIM>;
+      my $seq = <TRIM>; # quality and/or adapter trimmed sequence
+      my $l3 = <TRIM>;
+      my $qual = <TRIM>;
+      # print "$l1$seq\n$l3$qual\n";
+      last unless (defined $qual); # could be an empty string
+
+      $count++;
+      if ($count%10000000 == 0){
+	warn "$count sequences processed\n";
+      }
+
+      chomp $seq;
+      chomp $qual;
+
+      ### Shortening all sequences by 1 bp on the 3' end
+      if ($trim){
+	$seq = substr($seq,0,length($seq)-1);
+	$qual = substr($qual,0,length($qual)-1);
+      }
+
+      ### PRINTING (POTENTIALLY TRIMMED) SEQUENCE
+      if ($validate){ # printing the sequence without performing a length check (this is performed for the read pair separately later)
+	print OUT "$l1$seq\n$l3$qual\n";
+      }
+      else{ # single end
+	if (length $seq < $length_cutoff){
+	  ++$too_short;
+	  next;
+	}
+	else{
+	  print OUT "$l1$seq\n$l3$qual\n";
+	}
+      }
+    }
+
+    print REPORT "\n";
+    while (<ERROR>){
+      warn $_;
+      print REPORT $_;
+    }
+
+    close TRIM or die "Unable to close TRIM filehandle: $!\n";
+    close ERROR or die "Unable to close ERROR filehandle: $!\n";
+    close OUT or die  "Unable to close OUT filehandle: $!\n";
+
+  }
+
+  if ($rrbs){
+    unless ($keep){ # keeping the quality trimmed intermediate file for RRBS files
+
+      # deleting temporary quality trimmed file
+      my $deleted = unlink "$output_dir$temp";
+
+      if ($deleted){
+	warn "Successfully deleted temporary file $temp\n\n";
+      }
+      else{
+	warn "Could not delete temporary file $temp";
+      }
+    }
+  }
+
+  warn "\nRUN STATISTICS FOR INPUT FILE: $filename\n";
+  print REPORT "\nRUN STATISTICS FOR INPUT FILE: $filename\n";
+
+  warn "="x 45,"\n";
+  print REPORT "="x 45,"\n";
+
+  warn "$count sequences processed in total\n";
+  print REPORT "$count sequences processed in total\n";
+
+  ###  only reporting this separately if quality and adapter trimming were performed separately
+  if ($rrbs){
+    my $percentage_shortened = sprintf ("%.1f",$quality_trimmed/$count*100);
+    warn "Sequences were truncated to a varying degree because of deteriorating qualities (Phred score quality cutoff: $cutoff):\t$quality_trimmed ($percentage_shortened%)\n";
+    print REPORT "Sequences were truncated to a varying degree because of deteriorating qualities (Phred score quality cutoff: $cutoff):\t$quality_trimmed ($percentage_shortened%)\n";
+  }
+
+  my $percentage_too_short = sprintf ("%.1f",$too_short/$count*100);
+  warn "Sequences removed because they became shorter than the length cutoff of $length_cutoff bp:\t$too_short ($percentage_too_short%)\n";
+  print REPORT "Sequences removed because they became shorter than the length cutoff of $length_cutoff bp:\t$too_short ($percentage_too_short%)\n";
+
+  if ($rrbs){
+    my $percentage_rrbs_trimmed = sprintf ("%.1f",$rrbs_trimmed/$count*100);
+    warn "RRBS reads trimmed by additional 2 bp when adapter contamination was detected:\t$rrbs_trimmed ($percentage_rrbs_trimmed%)\n";
+    print REPORT "RRBS reads trimmed by additional 2 bp when adapter contamination was detected:\t$rrbs_trimmed ($percentage_rrbs_trimmed%)\n";
+  }
+
+  if ($non_directional){
+    my $percentage_rrbs_trimmed_at_start = sprintf ("%.1f",$rrbs_trimmed_start/$count*100);
+    warn "RRBS reads trimmed by 2 bp at the start when read started with CAA ($CAA) or CGA ($CGA) in total:\t$rrbs_trimmed_start ($percentage_rrbs_trimmed_at_start%)\n";
+    print REPORT "RRBS reads trimmed by 2 bp at the start when read started with CAA ($CAA) or CGA ($CGA) in total:\t$rrbs_trimmed_start ($percentage_rrbs_trimmed_at_start%)\n";
+  }
+
+  warn "\n";
+  print REPORT "\n";
+
+  ### RUNNING FASTQC
+  if ($fastqc){
+    warn "\n  >>> Now running FastQC on the data <<<\n\n";
+    sleep (5);
+    if ($fastqc_args){
+      system ("$path_to_fastqc $fastqc_args $output_filename");
+    }
+    else{
+      system ("$path_to_fastqc $output_filename");
+    }
+  }
+
+  ### COMPRESSING OUTPUT FILE
+  unless ($validate){ # not gzipping intermediate files for paired-end files
+    if ($gzip or $filename =~ /\.gz$/){
+      warn "\n  >>> GZIP-ing the output file <<<\n\n";
+      system ("gzip -f $output_filename");
+      $output_filename = $output_filename.'.gz';
+    }
+  }
+
+  ### VALIDATE PAIRED-END FILES
+  if ($validate){
+
+    ### Figure out whether current file counts as read 1 or read 2 of paired-end files
+
+    if ( scalar(@filenames)%2 == 0){ # this is read 1 of a pair
+      $file_1 = $output_filename;
+      shift @filenames;
+      # warn "This is read 1: $file_1\n\n";
+    }
+    else{                            # this is read 2 of a pair
+      $file_2 = $output_filename;
+      shift @filenames;
+      # warn "This is read 2: $file_2\n\n";
+    }
+
+    if ($file_1 and $file_2){
+      warn "Validate paired-end files $file_1 and $file_2\n";
+      sleep (1);
+
+      my ($val_1,$val_2,$un_1,$un_2) = validate_paired_end_files($file_1,$file_2);
+
+      ### RUNNING FASTQC
+      if ($fastqc){
+
+	warn "\n  >>> Now running FastQC on the validated data $val_1<<<\n\n";
+	sleep (3);
+
+	if ($fastqc_args){
+	  system ("$path_to_fastqc $fastqc_args $val_1");
+	}
+	else{
+	  system ("$path_to_fastqc $val_1");
+	}
+
+	warn "\n  >>> Now running FastQC on the validated data $val_2<<<\n\n";
+	sleep (3);
+
+	if ($fastqc_args){
+	  system ("$path_to_fastqc $fastqc_args $val_2");
+	}
+	else{
+	  system ("$path_to_fastqc $val_2");
+	}
+
+      }
+
+      if ($gzip or $filename =~ /\.gz$/){
+
+	# compressing main fastQ output files
+	warn "Compressing the validated output file $val_1 ...\n";
+	system ("gzip -f $val_1");
+
+	warn "Compressing the validated output file $val_2 ...\n";
+	system ("gzip -f $val_2");
+
+
+	if ($retain){ # compressing unpaired reads
+	  warn "Compressing the unpaired read output $un_1 ...\n";
+	  system ("gzip -f $un_1");
+
+	  warn "Compressing the unpaired read output $un_2 ...\n";
+	  system ("gzip -f $un_2");
+	}
+      }
+
+      warn "Deleting both intermediate output files $file_1 and $file_2\n";
+      unlink "$output_dir$file_1";
+      unlink "$output_dir$file_2";
+
+      warn "\n",'='x100,"\n\n";
+      sleep (3);
+
+      $file_1 = undef; # setting file_1 and file_2 to undef once validation is completed
+      $file_2 = undef;
+    }
+  }
+
+}
+
+sub validate_paired_end_files{
+
+  my $file_1 = shift;
+  my $file_2 = shift;
+
+  warn "file_1 $file_1 file_2 $file_2\n\n";
+
+  if ($file_1 =~ /\.gz$/){
+    open (IN1,"zcat $output_dir$file_1 |") or die "Couldn't read from file $file_1: $!\n";
+  }
+  else{
+    open (IN1, "$output_dir$file_1") or die "Couldn't read from file $file_1: $!\n";
+  }
+
+  if ($file_2 =~ /\.gz$/){
+    open (IN2,"zcat $output_dir$file_2 |") or die "Couldn't read from file $file_2: $!\n";
+  }
+  else{
+    open (IN2, "$output_dir$file_2") or die "Couldn't read from file $file_2: $!\n";
+  }
+
+  warn "\n>>>>> Now validing the length of the 2 paired-end infiles: $file_1 and $file_2 <<<<<\n";
+  sleep (3);
+
+  my $out_1 = $file_1;
+  my $out_2 = $file_2;
+
+  if ($out_1 =~ /gz$/){
+    $out_1 =~ s/trimmed\.fq\.gz$/val_1.fq/;
+  }
+  else{
+    $out_1 =~ s/trimmed\.fq$/val_1.fq/;
+  }
+
+  if ($out_2 =~ /gz$/){
+    $out_2 =~ s/trimmed\.fq\.gz$/val_2.fq/;
+  }
+  else{
+    $out_2 =~ s/trimmed\.fq$/val_2.fq/;
+  }
+
+  open (R1,'>',$output_dir.$out_1) or die "Couldn't write to $out_1 $!\n";
+  open (R2,'>',$output_dir.$out_2) or die "Couldn't write to $out_2 $!\n";
+  warn "Writing validated paired-end read 1 reads to $out_1\n";
+  warn "Writing validated paired-end read 2 reads to $out_2\n\n";
+
+  my $unpaired_1;
+  my $unpaired_2;
+
+  if ($retain){
+
+    $unpaired_1 = $file_1;
+    $unpaired_2 = $file_2;
+
+    if ($unpaired_1 =~ /gz$/){
+      $unpaired_1 =~ s/trimmed\.fq\.gz$/unpaired_1.fq/;
+    }
+    else{
+      $unpaired_1 =~ s/trimmed\.fq$/unpaired_1.fq/;
+    }
+
+    if ($unpaired_2 =~ /gz$/){
+      $unpaired_2 =~ s/trimmed\.fq\.gz$/unpaired_2.fq/;
+    }
+    else{
+      $unpaired_2 =~ s/trimmed\.fq$/unpaired_2.fq/;
+    }
+
+    open (UNPAIRED1,'>',$output_dir.$unpaired_1) or die "Couldn't write to $unpaired_1: $!\n";
+    open (UNPAIRED2,'>',$output_dir.$unpaired_2) or die "Couldn't write to $unpaired_2: $!\n";
+
+    warn "Writing unpaired read 1 reads to $unpaired_1\n";
+    warn "Writing unpaired read 2 reads to $unpaired_2\n\n";
+  }
+
+  my $sequence_pairs_removed = 0;
+  my $read_1_printed = 0;
+  my $read_2_printed = 0;
+
+  my $count = 0;
+
+  while (1){
+    my $id_1   = <IN1>;
+    my $seq_1  = <IN1>;
+    my $l3_1   = <IN1>;
+    my $qual_1 = <IN1>;
+    last unless ($id_1 and $seq_1 and $l3_1 and $qual_1);
+
+    my $id_2   = <IN2>;
+    my $seq_2  = <IN2>;
+    my $l3_2   = <IN2>;
+    my $qual_2 = <IN2>;
+    last unless ($id_2 and $seq_2 and $l3_2 and $qual_2);
+
+    ++$count;
+
+
+    ## small check if the sequence files appear to FastQ files
+    if ($count == 1){ # performed just once
+      if ($id_1 !~ /^\@/ or $l3_1 !~ /^\+/){
+	die "Input file doesn't seem to be in FastQ format at sequence $count\n";
+      }
+      if ($id_2 !~ /^\@/ or $l3_2 !~ /^\+/){
+	die "Input file doesn't seem to be in FastQ format at sequence $count\n";
+      }
+    }
+
+    chomp $seq_1;
+    chomp $seq_2;
+
+
+    ### making sure that the reads do have a sensible length
+    if ( (length($seq_1) < $length_cutoff) or (length($seq_2) < $length_cutoff) ){
+      ++$sequence_pairs_removed;
+      if ($retain){ # writing out single-end reads if they are longer than the cutoff
+
+	if ( length($seq_1) >= $length_read_1){ # read 1 is long enough
+	  print UNPAIRED1 $id_1;
+	  print UNPAIRED1 "$seq_1\n";
+	  print UNPAIRED1 $l3_1;
+	  print UNPAIRED1 $qual_1;
+	  ++$read_1_printed;
+	}
+
+	if ( length($seq_2) >= $length_read_2){ # read 2 is long enough
+	  print UNPAIRED2 $id_2;
+	  print UNPAIRED2 "$seq_2\n";
+	  print UNPAIRED2 $l3_2;
+	  print UNPAIRED2 $qual_2;
+	  ++$read_2_printed;
+	}
+
+      }
+    }
+    else{
+      print R1 $id_1;
+      print R1 "$seq_1\n";
+      print R1 $l3_1;
+      print R1 $qual_1;
+
+      print R2 $id_2;
+      print R2 "$seq_2\n";
+      print R2 $l3_2;
+      print R2 $qual_2;
+    }
+
+  }
+  my $percentage = sprintf("%.2f",$sequence_pairs_removed/$count*100);
+  warn "Total number of sequences analysed: $count\n\n";
+  warn "Number of sequence pairs removed: $sequence_pairs_removed ($percentage%)\n";
+
+  print REPORT "Total number of sequences analysed for the sequence pair length validation: $count\n\n";
+  print REPORT "Number of sequence pairs removed because at least one read was shorter than the length cutoff ($length_cutoff bp): $sequence_pairs_removed ($percentage%)\n";
+
+  if ($keep){
+    warn "Number of unpaired read 1 reads printed: $read_1_printed\n";
+    warn "Number of unpaired read 2 reads printed: $read_2_printed\n";
+  }
+
+  warn "\n";
+  if ($retain){
+    return ($out_1,$out_2,$unpaired_1,$unpaired_2);
+  }
+  else{
+    return ($out_1,$out_2);
+  }
+}
+
+sub process_commandline{
+  my $help;
+  my $quality;
+  my $adapter;
+  my $adapter2;
+  my $stringency;
+  my $report;
+  my $version;
+  my $rrbs;
+  my $length_cutoff;
+  my $keep;
+  my $fastqc;
+  my $non_directional;
+  my $phred33;
+  my $phred64;
+  my $fastqc_args;
+  my $trim;
+  my $gzip;
+  my $validate;
+  my $retain;
+  my $length_read_1;
+  my $length_read_2;
+  my $error_rate;
+  my $output_dir;
+  my $no_report_file;
+  my $suppress_warn;
+
+  my $command_line = GetOptions ('help|man' => \$help,
+				 'q|quality=i' => \$quality,
+				 'a|adapter=s' => \$adapter,
+				 'a2|adapter2=s' => \$adapter2,
+				 'report' => \$report,
+				 'version' => \$version,
+				 'stringency=i' => \$stringency,
+				 'fastqc' => \$fastqc,
+				 'RRBS' => \$rrbs,
+				 'keep' => \$keep,
+				 'length=i' => \$length_cutoff,
+				 'non_directional' => \$non_directional,
+				 'phred33' => \$phred33,
+				 'phred64' => \$phred64,
+				 'fastqc_args=s' => \$fastqc_args,
+				 'trim1' => \$trim,
+				 'gzip' => \$gzip,
+				 'paired_end' => \$validate,
+				 'retain_unpaired' => \$retain,
+				 'length_1|r1=i' => \$length_read_1,
+				 'length_2|r2=i' => \$length_read_2,
+				 'e|error_rate=s' => \$error_rate,
+				 'o|output_dir=s' => \$output_dir,
+				 'no_report_file' => \$no_report_file,
+				 'suppress_warn' => \$suppress_warn,
+				);
+
+  ### EXIT ON ERROR if there were errors with any of the supplied options
+  unless ($command_line){
+    die "Please respecify command line options\n";
+  }
+
+  ### HELPFILE
+  if ($help){
+    print_helpfile();
+    exit;
+  }
+
+
+
+
+  if ($version){
+    print << "VERSION";
+
+                          Quality-/Adapter-/RRBS-Trimming
+                               (powered by Cutadapt)
+                                  version $trimmer_version
+
+                             Last update: 18 10 2012
+
+VERSION
+    exit;
+  }
+
+  ### RRBS
+  unless ($rrbs){
+    $rrbs = 0;
+  }
+
+  ### SUPRESS WARNINGS
+  if (defined $suppress_warn){
+    $DOWARN = 0;
+  }
+
+  ### QUALITY SCORES
+  my $phred_encoding;
+  if ($phred33){
+    if ($phred64){
+      die "Please specify only a single quality encoding type (--phred33 or --phred64)\n\n";
+    }
+    $phred_encoding = 33;
+  }
+  elsif ($phred64){
+    $phred_encoding = 64;
+  }
+  unless ($phred33 or $phred64){
+    warn "No quality encoding type selected. Assuming that the data provided uses Sanger encoded Phred scores (default)\n\n";
+    $phred_encoding = 33;
+    sleep (3);
+  }
+
+  ### NON-DIRECTIONAL RRBS
+  if ($non_directional){
+    unless ($rrbs){
+      die "Option '--non_directional' requires '--rrbs' to be specified as well. Please re-specify!\n";
+    }
+  }
+  else{
+    $non_directional = 0;
+  }
+
+  if ($fastqc_args){
+    $fastqc = 1; # specifying fastqc extra arguments automatically means that FastQC will be executed
+  }
+  else{
+    $fastqc_args = 0;
+  }
+
+  ### CUSTOM ERROR RATE
+  if (defined $error_rate){
+    # make sure that the error rate is between 0 and 1
+    unless ($error_rate >= 0 and $error_rate <= 1){
+      die "Please specify an error rate between 0 and 1 (the default is 0.1)\n";
+    }
+  }
+  else{
+    $error_rate = 0.1; # (default)
+  }
+
+  if (defined $adapter){
+    unless ($adapter =~ /^[ACTGNactgn]+$/){
+      die "Adapter sequence must contain DNA characters only (A,C,T,G or N)!\n";
+    }
+    $adapter = uc$adapter;
+  }
+
+  if (defined $adapter2){
+    unless ($validate){
+      die "An optional adapter for read 2 of paired-end files requires '--paired' to be specified as well! Please re-specify\n";
+    }
+    unless ($adapter2 =~ /^[ACTGNactgn]+$/){
+      die "Optional adapter 2 sequence must contain DNA characters only (A,C,T,G or N)!\n";
+    }
+    $adapter2 = uc$adapter2;
+  }
+
+  ### files are supposed to be paired-end files
+  if ($validate){
+
+    # making sure that an even number of reads has been supplied
+    unless ((scalar@ARGV)%2 == 0){
+      die "Please provide an even number of input files for paired-end FastQ trimming! Aborting ...\n";
+    }
+
+    ## CUTOFF FOR VALIDATED READ-PAIRS
+
+    if (defined $length_read_1 or defined $length_read_2){
+      unless ($retain){
+	die "Please specify --keep_unpaired to alter the unpaired single-end read length cut off(s)\n\n";
+      }
+
+      if (defined $length_read_1){
+	unless ($length_read_1 >= 15 and $length_read_1 <= 100){
+	  die "Please select a sensible cutoff for when a read pair should be filtered out due to short length (allowed range: 15-100 bp)\n\n";
+	}
+	unless ($length_read_1 > $length_cutoff){
+	  die "The single-end unpaired read length needs to be longer than the paired-end cut-off value\n\n";
+	}
+      }
+
+      if (defined $length_read_2){
+	unless ($length_read_2 >= 15 and $length_read_2 <= 100){
+	  die "Please select a sensible cutoff for when a read pair should be filtered out due to short length (allowed range: 15-100 bp)\n\n";
+	}
+	unless ($length_read_2 > $length_cutoff){
+	  die "The single-end unpaired read length needs to be longer than the paired-end cut-off value\n\n";
+	}
+      }
+    }
+
+    if ($retain){
+      $length_read_1 = 35 unless (defined $length_read_1);
+      $length_read_2 = 35 unless (defined $length_read_2);
+    }
+  }
+
+  unless ($no_report_file){
+    $no_report_file = 0;
+  }
+
+  ### OUTPUT DIR PATH
+  if ($output_dir){
+    unless ($output_dir =~ /\/$/){
+      $output_dir =~ s/$/\//;
+    }
+  }
+  else{
+    $output_dir = '';
+  }
+
+  return ($quality,$adapter,$stringency,$rrbs,$length_cutoff,$keep,$fastqc,$non_directional,$phred_encoding,$fastqc_args,$trim,$gzip,$validate,$retain,$length_read_1,$length_read_2,$adapter2,$error_rate,$output_dir,$no_report_file);
+}
+
+
+
+
+sub print_helpfile{
+  print << "HELP";
+
+ USAGE:
+
+trim_galore [options] <filename(s)>
+
+
+-h/--help               Print this help message and exits.
+
+-v/--version            Print the version information and exits.
+
+-q/--quality <INT>      Trim low-quality ends from reads in addition to adapter removal. For
+                        RRBS samples, quality trimming will be performed first, and adapter
+                        trimming is carried in a second round. Other files are quality and adapter
+                        trimmed in a single pass. The algorithm is the same as the one used by BWA
+                        (Subtract INT from all qualities; compute partial sums from all indices
+                        to the end of the sequence; cut sequence at the index at which the sum is
+                        minimal). Default Phred score: 20.
+
+--phred33               Instructs Cutadapt to use ASCII+33 quality scores as Phred scores
+                        (Sanger/Illumina 1.9+ encoding) for quality trimming. Default: ON.
+
+--phred64               Instructs Cutadapt to use ASCII+64 quality scores as Phred scores
+                        (Illumina 1.5 encoding) for quality trimming.
+
+--fastqc                Run FastQC in the default mode on the FastQ file once trimming is complete.
+
+--fastqc_args "<ARGS>"  Passes extra arguments to FastQC. If more than one argument is to be passed
+                        to FastQC they must be in the form "arg1 arg2 etc.". An example would be:
+                        --fastqc_args "--nogroup --outdir /home/". Passing extra arguments will
+                        automatically invoke FastQC, so --fastqc does not have to be specified
+                        separately.
+
+-a/--adapter <STRING>   Adapter sequence to be trimmed. If not specified explicitely, the first 13
+                        bp of the Illumina adapter 'AGATCGGAAGAGC' will be used by default.
+
+-a2/--adapter2 <STRING> Optional adapter sequence to be trimmed off read 2 of paired-end files. This
+                        option requires '--paired' to be specified as well.
+
+
+-s/--stringency <INT>   Overlap with adapter sequence required to trim a sequence. Defaults to a
+                        very stringent setting of '1', i.e. even a single bp of overlapping sequence
+                        will be trimmed of the 3' end of any read.
+
+-e <ERROR RATE>         Maximum allowed error rate (no. of errors divided by the length of the matching
+                        region) (default: 0.1)
+
+--gzip                  Compress the output file with gzip. If the input files are gzip-compressed
+                        the output files will be automatically gzip compressed as well.
+
+--length <INT>          Discard reads that became shorter than length INT because of either
+                        quality or adapter trimming. A value of '0' effectively disables
+                        this behaviour. Default: 20 bp.
+
+                        For paired-end files, both reads of a read-pair need to be longer than
+                        <INT> bp to be printed out to validated paired-end files (see option --paired).
+                        If only one read became too short there is the possibility of keeping such
+                        unpaired single-end reads (see --retain_unpaired). Default pair-cutoff: 20 bp.
+
+-o/--output_dir <DIR>   If specified all output will be written to this directory instead of the current
+                        directory.
+
+--no_report_file        If specified no report file will be generated.
+
+--suppress_warn         If specified any output to STDOUT or STDERR will be suppressed.
+
+
+
+RRBS-specific options (MspI digested material):
+
+--rrbs                  Specifies that the input file was an MspI digested RRBS sample (recognition
+                        site: CCGG). Sequences which were adapter-trimmed will have a further 2 bp
+                        removed from their 3' end. This is to avoid that the filled-in C close to the
+                        second MspI site in a sequence is used for methylation calls. Sequences which
+                        were merely trimmed because of poor quality will not be shortened further.
+
+--non_directional       Selecting this option for non-directional RRBS libraries will screen
+                        quality-trimmed sequences for 'CAA' or 'CGA' at the start of the read
+                        and, if found, removes the first two basepairs. Like with the option
+                        '--rrbs' this avoids using cytosine positions that were filled-in
+                        during the end-repair step. '--non_directional' requires '--rrbs' to
+                        be specified as well.
+
+--keep                  Keep the quality trimmed intermediate file. Default: off, which means
+                        the temporary file is being deleted after adapter trimming. Only has
+                        an effect for RRBS samples since other FastQ files are not trimmed
+                        for poor qualities separately.
+
+
+Note for RRBS using MseI:
+
+If your DNA material was digested with MseI (recognition motif: TTAA) instead of MspI it is NOT necessary
+to specify --rrbs or --non_directional since virtually all reads should start with the sequence
+'TAA', and this holds true for both directional and non-directional libraries. As the end-repair of 'TAA'
+restricted sites does not involve any cytosines it does not need to be treated especially. Instead, simply
+run Trim Galore! in the standard (i.e. non-RRBS) mode.
+
+
+Paired-end specific options:
+
+--paired                This option performs length trimming of quality/adapter/RRBS trimmed reads for
+                        paired-end files. To pass the validation test, both sequences of a sequence pair
+                        are required to have a certain minimum length which is governed by the option
+                        --length (see above). If only one read passes this length threshold the
+                        other read can be rescued (see option --retain_unpaired). Using this option lets
+                        you discard too short read pairs without disturbing the sequence-by-sequence order
+                        of FastQ files which is required by many aligners.
+
+                        Trim Galore! expects paired-end files to be supplied in a pairwise fashion, e.g.
+                        file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz ... .
+
+-t/--trim1              Trims 1 bp off every read from its 3' end. This may be needed for FastQ files that
+                        are to be aligned as paired-end data with Bowtie. This is because Bowtie (1) regards
+                        alignments like this:
+
+                          R1 --------------------------->     or this:    ----------------------->  R1
+                          R2 <---------------------------                       <-----------------  R2
+
+                        as invalid (whenever a start/end coordinate is contained within the other read).
+
+--retain_unpaired       If only one of the two paired-end reads became too short, the longer
+                        read will be written to either '.unpaired_1.fq' or '.unpaired_2.fq'
+                        output files. The length cutoff for unpaired single-end reads is
+                        governed by the parameters -r1/--length_1 and -r2/--length_2. Default: OFF.
+
+-r1/--length_1 <INT>    Unpaired single-end read length cutoff needed for read 1 to be written to
+                        '.unpaired_1.fq' output file. These reads may be mapped in single-end mode.
+                        Default: 35 bp.
+
+-r2/--length_2 <INT>    Unpaired single-end read length cutoff needed for read 2 to be written to
+                        '.unpaired_2.fq' output file. These reads may be mapped in single-end mode.
+                        Default: 35 bp.
+
+
+Last modified on 18 Oct 2012.
+
+HELP
+  exit;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/trim_galore_wrapper.xml	Sat Jul 06 09:52:23 2013 -0400
@@ -0,0 +1,577 @@
+<tool id="trim_galore" name="Trim Galore" version="0.2.4.1">
+    <!-- Wrapper compatible with Trim Galore version 0.2.4.0 -->
+    <description>adaptive quality and adapter trimmer</description>
+    <version_command interpreter="perl">trim_galore --version</version_command>
+    <requirements>
+        <requirement type="package" version="1.1">cutadapt</requirement>
+        <requirement type="package" version="0.10.1">fastqc</requirement>
+    </requirements>
+    <command interpreter="perl">
+        #from glob import glob
+        #import tempfile, os
+
+        trim_galore
+
+        ##
+        ##  Input parameters
+        ##
+
+
+        #if $params.settingsType == "custom":
+
+            $params.fastqc
+            ## default 20
+            --quality $params.quality
+            ## default 'AGATCGGAAGAGC'
+            #if $params.adapter.strip() != '':
+                --adapter $params.adapter
+            #end if
+            ## default 1
+            --stringency $params.stringency
+
+            ## default 0.1
+            -e $params.error_rate
+
+            ## default 20
+            --length $params.min_length
+
+            #if $params.retain_unpaired.settingsType == "retain_unpaired_output":
+                --retain_unpaired
+                --length_1 $params.retain_unpaired.length_1
+                --length_2 $params.retain_unpaired.length_2
+            #end if
+
+        #end if
+
+        ##
+        ## RBBS specific options.
+        ##
+
+        #if $rrbs.settingsType == "custom":
+
+            $rrbs.rrbs
+            $rrbs.non_directional
+
+        #end if
+
+        ##
+        ##  Creating a temporary directory where trim_galore will store all result files
+        ##
+
+        #set $temp_dir = os.path.abspath(tempfile.mkdtemp())
+
+        --output_dir $temp_dir
+        --suppress_warn
+
+
+        #if $singlePaired.sPaired == "single":
+
+            #if $singlePaired.input_singles.ext == "fastqillumina":
+                --phred64
+            #elif $singlePaired.input_singles.ext == "fastqsanger":
+                --phred33
+            #end if
+
+            #if $params.settingsType == "custom":
+                #if not $params.report:
+                    --no_report_file
+                #end if
+            #end if
+
+            ## input sequence
+            $singlePaired.input_singles
+        #else:
+            --paired
+            #if $singlePaired.input_mate1.ext == "fastqillumina":
+                --phred64
+            #elif $singlePaired.input_mate1.ext == "fastqsanger":
+                --phred33
+            #end if
+
+            $singlePaired.trim1
+            #if $singlePaired.adapter2.strip() != '':
+                --adapter2 $singlePaired.adapter2
+            #end if
+
+            #if $params.settingsType == "custom":
+                #if not $params.report:
+                    --no_report_file
+                #end if
+            #end if
+
+            ## input sequences
+            $singlePaired.input_mate1
+            $singlePaired.input_mate2
+
+        #end if
+
+        &amp;&amp;
+
+        ##
+        ##  Trim Galore! run is finished. Move the result files to the proper place
+        ##
+
+
+        #if $singlePaired.sPaired == "single":
+            #set $single_end_path =  os.path.join($temp_dir, os.path.basename(str($singlePaired.input_singles)) + '_trimmed.fq')
+            mv $single_end_path $trimmed_reads_single;
+
+            #if $params.settingsType == "custom":
+                #if $params.report:
+                    #set $report_path =  os.path.join($temp_dir, os.path.basename(str($singlePaired.input_singles)) + '_trimming_report.txt')
+                    mv $report_path $report_file;
+                #end if
+            #end if
+
+        #else:
+            #set $paired_end_path_1 =  os.path.join($temp_dir, os.path.basename(str($singlePaired.input_mate1)) + '_val_1.fq')
+            #set $paired_end_path_2 =  os.path.join($temp_dir, os.path.basename(str($singlePaired.input_mate2)) + '_val_2.fq')
+            mv $paired_end_path_1 $trimmed_reads_pair1;
+            mv $paired_end_path_2 $trimmed_reads_pair2;
+
+            #if $params.settingsType == "custom":
+                #if $params.retain_unpaired.settingsType == "retain_unpaired_output":
+                    #set $unpaired_path_1 =  os.path.join($temp_dir, os.path.basename(str($singlePaired.input_mate1)) + '_unpaired_1.fq')
+                    #set $unpaired_path_2 =  os.path.join($temp_dir, os.path.basename(str($singlePaired.input_mate2)) + '_unpaired_2.fq')
+                    mv $unpaired_path_1 $unpaired_reads_1;
+                    mv $unpaired_path_2 $unpaired_reads_2;
+                #end if
+
+                #if $params.report:
+                    #set $report_path =  os.path.join($temp_dir, os.path.basename(str($singlePaired.input_mate1)) + '_trimming_report.txt')
+                    mv $report_path $report_file;
+                #end if
+
+            #end if
+        #end if
+
+        ## delete the temp_dir
+        rm -rf $temp_dir
+
+    </command>
+    <inputs>
+
+        <!-- Input Parameters -->
+        <conditional name="singlePaired">
+            <param name="sPaired" type="select" label="Is this library mate-paired?">
+              <option value="single">Single-end</option>
+              <option value="paired">Paired-end</option>
+            </param>
+            <when value="single">
+                <param name="input_singles" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." />
+            </when>
+            <when value="paired">
+                <param name="input_mate1" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." />
+                <param name="input_mate2" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." />
+                <param name="trim1" type="boolean" truevalue="--trim1" falsevalue="" checked="False" label="Trims 1 bp off every read from its 3' end." help="" />
+                <param name="adapter2" type="text" value="" label="Optional adapter sequence to be trimmed off read 2">
+                    <validator type="regex" message="Adapter sequence must contain DNA characters only (A,C,T,G or N)">^[ACTGNactgn]*$</validator>
+                </param>
+            </when>
+        </conditional>
+
+
+        <conditional name="params">
+            <param name="settingsType" type="select" label="Trim galore! advanced settings" help="You can use the default settings or set custom values for any of Trim Galore's parameters.">
+              <option value="default">Use Defaults</option>
+              <option value="custom">Full parameter list</option>
+            </param>
+            <when value="default" />
+            <!-- Full/advanced params. -->
+            <when value="custom">
+                <param name="fastqc" type="boolean" truevalue="--fastqc" falsevalue="" checked="False" label="Run FastQC in the default mode on the FastQ file once trimming is complete" help="" />
+                <param name="quality" type="integer" value="20" label="Trim low-quality ends from reads in addition to adapter removal." help="For more information please see below." />
+                <param name="adapter" type="text" value="AGATCGGAAGAGC" label="Adapter sequence to be trimmed">
+                    <validator type="regex" message="Adapter sequence must contain DNA characters only (A,C,T,G or N)">^[ACTGNactgn]*$</validator>
+                </param>
+                <param name="stringency" type="integer" value="1" label="Overlap with adapter sequence required to trim a sequence" />
+                <param name="error_rate" type="float" value="0.1" label="Maximum allowed error rate" />
+                <param name="min_length" type="integer" value="20" label="Discard reads that became shorter than length INT" />
+
+                <param name="report" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Generate a report file" help="" />
+
+                <conditional name="retain_unpaired">
+                    <param name="settingsType" type="select" label="specify if you would like to retain unpaired reads">
+                      <option value="no_output">Do not output unpaired reads</option>
+                      <option value="retain_unpaired_output">Output unpaired reads</option>
+                    </param>
+                    <when value="no_output" />
+                    <!-- Output params. -->
+                    <when value="retain_unpaired_output">
+                        <param name="length_1" type="integer" value="35" label="Unpaired single-end read length cutoff needed for read 1 to be written" />
+                        <param name="length_2" type="integer" value="35" label="Unpaired single-end read length cutoff needed for read 2 to be written" />
+                    </when>  <!-- output -->
+                </conditional>  <!-- retain_unpaired -->
+
+            </when>  <!-- full -->
+        </conditional>  <!-- params -->
+
+        <conditional name="rrbs">
+            <param name="settingsType" type="select" label="RRBS specific settings">
+              <option value="default">Use Defaults (no RRBS)</option>
+              <option value="custom">Full parameter list</option>
+            </param>
+            <when value="default" />
+            <!-- Full/advanced params. -->
+            <when value="custom">
+                <param name="rrbs" type="boolean" truevalue="--rrbs" falsevalue="" checked="True" label="Specifies that the input file was an MspI digested RRBS sample" />
+                <param name="non_directional" type="boolean" truevalue="--non_directional" falsevalue="" checked="False" label="Selecting this option for non-directional RRBS libraries" />
+            </when>  <!-- full -->
+      </conditional>  <!-- params -->
+
+    </inputs>
+    <outputs>
+
+        <data format="fastq" name="trimmed_reads_single" label="${tool.name} on ${on_string}: trimmed reads">
+          <filter>singlePaired['sPaired'] == "single"</filter>
+          <actions>
+                <action type="format">
+                  <option type="from_param" name="singlePaired.input_singles" param_attribute="ext" />
+                </action>
+          </actions>
+        </data>
+
+        <data format="fastq" name="trimmed_reads_pair1" label="${tool.name} on ${on_string}: trimmed reads pair 1">
+            <filter>singlePaired['sPaired'] == "paired"</filter>
+            <actions>
+                <action type="format">
+                    <option type="from_param" name="singlePaired.input_mate1" param_attribute="ext" />
+                </action>
+          </actions>
+        </data>
+
+        <data format="fastq" name="trimmed_reads_pair2" label="${tool.name} on ${on_string}: trimmed reads pair 2">
+            <filter>singlePaired['sPaired'] == "paired"</filter>
+            <actions>
+                <action type="format">
+                    <option type="from_param" name="singlePaired.input_mate1" param_attribute="ext" />
+                </action>
+            </actions>
+        </data>
+
+        <data format="fastq" name="unpaired_reads_1" label="${tool.name} on ${on_string}: unpaired reads (1)">
+          <filter>
+            ((
+              params['settingsType'] == "custom" and
+              params['retain_unpaired']['settingsType'] == "retain_unpaired_output"
+            ))
+          </filter>
+          <actions>
+                <action type="format">
+                  <option type="from_param" name="singlePaired.input_mate1" param_attribute="ext" />
+                </action>
+          </actions>
+        </data>
+
+        <data format="fastq" name="unpaired_reads_2" label="${tool.name} on ${on_string}: unpaired reads (2)">
+          <filter>
+            ((
+              params['settingsType'] == "custom" and
+              params['retain_unpaired']['settingsType'] == "retain_unpaired_output"
+            ))
+          </filter>
+          <actions>
+                <action type="format">
+                  <option type="from_param" name="singlePaired.input_mate1" param_attribute="ext" />
+                </action>
+          </actions>
+        </data>
+
+        <data format="txt" name="report_file" label="${tool.name} on ${on_string}: report file">
+          <filter>
+            ((
+              params['settingsType'] == "custom" and
+              params['report'] == True
+            ))
+          </filter>
+        </data>
+
+    </outputs>
+    <tests>
+    </tests>
+
+    <help>
+
+**What it does**
+
+TrimGalore!_ is a wrapper script that makes use of the publically available
+adapter trimming tool Cutadapt and FastQC for optional quality control once
+the trimming process has completed.
+
+
+.. _TrimGalore!: http://www.bioinformatics.babraham.ac.uk/projects/trim_galore/
+
+
+It is developed by Krueger F at the Babraham Institute.
+
+------
+
+**Know what you are doing**
+
+.. class:: warningmark
+
+There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
+
+ .. __: http://www.bioinformatics.babraham.ac.uk/projects/bismark/
+
+------
+
+**Input formats**
+
+Bismark accepts files in either Sanger FASTQ format (galaxy type *fastqsanger*), Illumina FASTQ format (galaxy type *fastqillumina*) or FASTA format (galaxy type *fasta*). Use the FASTQ Groomer to prepare your files.
+
+------
+
+**A Note on Built-in Reference Genomes**
+
+The default variant for all genomes is "Full", defined as all primary chromosomes (or scaffolds/contigs) including mitochondrial plus associated unmapped, plasmid, and other segments. When only one version of a genome is available in this tool, it represents the default "Full" variant. Some genomes will have more than one variant available. The "Canonical Male" or sometimes simply "Canonical" variant contains the primary chromosomes for a genome. For example a human "Canonical" variant contains chr1-chr22, chrX, chrY, and chrM. The "Canonical Female" variant contains the primary chromosomes excluding chrY.
+
+------
+
+The final output of Bismark is in SAM format by default.
+
+**Outputs**
+
+The output is in SAM format, and has the following columns::
+
+    Column  Description
+  --------  --------------------------------------------------------
+  1  QNAME  seq-ID
+  2  FLAG   this flag tries to take the strand a bisulfite read
+            originated from into account
+            (this is different from ordinary DNA alignment flags!)
+  3  RNAME  chromosome
+  4  POS    start position
+  5  MAPQ   always 255
+  6  CIGAR  extended CIGAR string
+  7  MRNM   Mate Reference sequence NaMe ('=' if same as RNAME)
+  8  MPOS   1-based Mate POSition
+  9  ISIZE  Inferred insert SIZE
+  10 SEQ    query SEQuence on the same strand as the reference
+  11 QUAL   Phred33 scale
+  12 NM-tag edit distance to the reference)
+  13 XX-tag base-by-base mismatches to the reference.
+            This does not include indels.
+  14 XM-tag methylation call string
+  15 XR-tag read conversion state for the alignment
+  16 XG-tag genome conversion state for the alignment
+
+
+Each read of paired-end alignments is written out in a separate line in the above format.
+
+
+It looks like this (scroll sideways to see the entire example)::
+
+  QNAME	FLAG	RNAME	POS	MAPQ	CIAGR	MRNM	MPOS	ISIZE	SEQ	QUAL	OPT
+  HWI-EAS91_1_30788AAXX:1:1:1761:343	4	*	0	0	*	*	0	0	AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG	hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh
+  HWI-EAS91_1_30788AAXX:1:1:1578:331	4	*	0	0	*	*	0	0	GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG	hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh
+
+-------
+
+**Bismark settings**
+
+All of the options have a default value. You can change any of them. If any Bismark function is missing please contact the tool author or your Galaxy admin.
+
+------
+
+**Bismark parameter list**
+
+This is an exhaustive list of Bismark options:
+
+------
+
+**OPTIONS**
+
+
+Input::
+
+  --singles              A comma- or space-separated list of files containing the reads to be aligned (e.g.
+                         lane1.fq,lane2.fq lane3.fq). Reads may be a mix of different lengths. Bismark will
+                         produce one mapping result and one report file per input file.
+
+  -1 mates1              Comma-separated list of files containing the #1 mates (filename usually includes
+                         "_1"), e.g. flyA_1.fq,flyB_1.fq). Sequences specified with this option must
+                         correspond file-for-file and read-for-read with those specified in mates2.
+                         Reads may be a mix of different lengths. Bismark will produce one mapping result
+                         and one report file per paired-end input file pair.
+
+  -2 mates2              Comma-separated list of files containing the #2 mates (filename usually includes
+                         "_2"), e.g. flyA_1.fq,flyB_1.fq). Sequences specified with this option must
+                         correspond file-for-file and read-for-read with those specified in mates1.
+                         Reads may be a mix of different lengths.
+
+  -q/--fastq             The query input files (specified as mate1,mate2 or singles are FASTQ
+                         files (usually having extension .fg or .fastq). This is the default. See also
+                         --solexa-quals.
+
+  -f/--fasta             The query input files (specified as mate1,mate2 or singles are FASTA
+                         files (usually havin extension .fa, .mfa, .fna or similar). All quality values
+                         are assumed to be 40 on the Phred scale.
+
+  -s/--skip INT          Skip (i.e. do not align) the first INT reads or read pairs from the input.
+
+  -u/--upto INT          Only aligns the first INT reads or read pairs from the input. Default: no limit.
+
+  --phred33-quals        FASTQ qualities are ASCII chars equal to the Phred quality plus 33. Default: on.
+
+  --phred64-quals        FASTQ qualities are ASCII chars equal to the Phred quality plus 64. Default: off.
+
+  --solexa-quals         Convert FASTQ qualities from solexa-scaled (which can be negative) to phred-scaled
+                         (which can't). The formula for conversion is:
+                         phred-qual = 10 * log(1 + 10 ** (solexa-qual/10.0)) / log(10). Used with -q. This
+                         is usually the right option for use with (unconverted) reads emitted by the GA
+                         Pipeline versions prior to 1.3. Works only for Bowtie 1. Default: off.
+
+  --solexa1.3-quals      Same as --phred64-quals. This is usually the right option for use with (unconverted)
+                         reads emitted by GA Pipeline version 1.3 or later. Default: off.
+
+
+Alignment::
+
+  -n/--seedmms INT       The maximum number of mismatches permitted in the "seed", i.e. the first L base pairs
+                         of the read (where L is set with -l/--seedlen). This may be 0, 1, 2 or 3 and the
+                         default is 1. This option is only available for Bowtie 1 (for Bowtie 2 see -N).
+
+  -l/--seedlen           The "seed length"; i.e., the number of bases of the high quality end of the read to
+                         which the -n ceiling applies. The default is 28. Bowtie (and thus Bismark) is faster for
+                         larger values of -l. This option is only available for Bowtie 1 (for Bowtie 2 see -L).
+
+  -e/--maqerr INT        Maximum permitted total of quality values at all mismatched read positions throughout
+                         the entire alignment, not just in the "seed". The default is 70. Like Maq, bowtie rounds
+                         quality values to the nearest 10 and saturates at 30. This value is not relevant for
+                         Bowtie 2.
+
+  --chunkmbs INT         The number of megabytes of memory a given thread is given to store path descriptors in
+                         --best mode. Best-first search must keep track of many paths at once to ensure it is
+                         always extending the path with the lowest cumulative cost. Bowtie tries to minimize the
+                         memory impact of the descriptors, but they can still grow very large in some cases. If
+                         you receive an error message saying that chunk memory has been exhausted in --best mode,
+                         try adjusting this parameter up to dedicate more memory to the descriptors. This value
+                         is not relevant for Bowtie 2. Default: 512.
+
+  -I/--minins INT        The minimum insert size for valid paired-end alignments. E.g. if -I 60 is specified and
+                         a paired-end alignment consists of two 20-bp alignments in the appropriate orientation
+                         with a 20-bp gap between them, that alignment is considered valid (as long as -X is also
+                         satisfied). A 19-bp gap would not be valid in that case. Default: 0.
+
+  -X/--maxins INT        The maximum insert size for valid paired-end alignments. E.g. if -X 100 is specified and
+                         a paired-end alignment consists of two 20-bp alignments in the proper orientation with a
+                         60-bp gap between them, that alignment is considered valid (as long as -I is also satisfied).
+                         A 61-bp gap would not be valid in that case. Default: 500.
+
+
+
+Output::
+
+  --non_directional      The sequencing library was constructed in a non strand-specific manner, alignments to all four
+                         bisulfite strands will be reported. Default: OFF.
+
+                         (The current Illumina protocol for BS-Seq is directional, in which case the strands complementary
+                         to the original strands are merely theoretical and should not exist in reality. Specifying directional
+                         alignments (which is the default) will only run 2 alignment threads to the original top (OT)
+                         or bottom (OB) strands in parallel and report these alignments. This is the recommended option
+                         for sprand-specific libraries).
+
+  --sam-no-hd            Suppress SAM header lines (starting with @). This might be useful when very large input files are
+                         split up into several smaller files to run concurrently and the output files are to be merged.
+
+  --quiet                Print nothing besides alignments.
+
+  --vanilla              Performs bisulfite mapping with Bowtie 1 and prints the 'old' output (as in Bismark 0.5.X) instead
+                         of SAM format output.
+
+  -un/--unmapped         Write all reads that could not be aligned to a file in the output directory. Written reads will
+                         appear as they did in the input, without any translation of quality values that may have
+                         taken place within Bowtie or Bismark. Paired-end reads will be written to two parallel files with _1
+                         and _2 inserted in their filenames, i.e. _unmapped_reads_1.txt and unmapped_reads_2.txt. Reads
+                         with more than one valid alignment with the same number of lowest mismatches (ambiguous mapping)
+                         are also written to _unmapped_reads.txt unless the option --ambiguous is specified as well.
+
+  --ambiguous            Write all reads which produce more than one valid alignment with the same number of lowest
+                         mismatches or other reads that fail to align uniquely to a file in the output directory.
+                         Written reads will appear as they did in the input, without any of the translation of quality
+                         values that may have taken place within Bowtie or Bismark. Paired-end reads will be written to two
+                         parallel files with _1 and _2 inserted in theit filenames, i.e. _ambiguous_reads_1.txt and
+                         _ambiguous_reads_2.txt. These reads are not written to the file specified with --un.
+
+  -o/--output_dir DIR    Write all output files into this directory. By default the output files will be written into
+                         the same folder as the input file(s). If the specified folder does not exist, Bismark will attempt
+                         to create it first. The path to the output folder can be either relative or absolute.
+
+  --temp_dir DIR          Write temporary files to this directory instead of into the same directory as the input files. If
+                         the specified folder does not exist, Bismark will attempt to create it first. The path to the
+                         temporary folder can be either relative or absolute.
+
+------
+
+Bowtie 2 alignment options::
+
+  -N INT                 Sets the number of mismatches to allowed in a seed alignment during multiseed alignment.
+                         Can be set to 0 or 1. Setting this higher makes alignment slower (often much slower)
+                         but increases sensitivity. Default: 0. This option is only available for Bowtie 2 (for
+                         Bowtie 1 see -n).
+
+  -L INT                   Sets the length of the seed substrings to align during multiseed alignment. Smaller values
+                         make alignment slower but more senstive. Default: the --sensitive preset of Bowtie 2 is
+                         used by default, which sets -L to 20. This option is only available for Bowtie 2 (for
+                         Bowtie 1 see -l).
+
+  --ignore-quals         When calculating a mismatch penalty, always consider the quality value at the mismatched
+                         position to be the highest possible, regardless of the actual value. I.e. input is treated
+                         as though all quality values are high. This is also the default behavior when the input
+                         doesn't specify quality values (e.g. in -f mode). This option is invariable and on by default.
+
+
+Bowtie 2 paired-end options::
+
+  --no-mixed             This option disables Bowtie 2's behavior to try to find alignments for the individual mates if
+                         it cannot find a concordant or discordant alignment for a pair. This option is invariable and
+                         and on by default.
+
+  --no-discordant        Normally, Bowtie 2 looks for discordant alignments if it cannot find any concordant alignments.
+                         A discordant alignment is an alignment where both mates align uniquely, but that does not
+                         satisfy the paired-end constraints (--fr/--rf/--ff, -I, -X). This option disables that behavior
+                         and it is on by default.
+
+
+Bowtie 2 effort options::
+
+  -D INT                 Up to INT consecutive seed extension attempts can "fail" before Bowtie 2 moves on, using
+                         the alignments found so far. A seed extension "fails" if it does not yield a new best or a
+                         new second-best alignment. Default: 15.
+
+  -R INT                 INT is the maximum number of times Bowtie 2 will "re-seed" reads with repetitive seeds.
+                         When "re-seeding," Bowtie 2 simply chooses a new set of reads (same length, same number of
+                         mismatches allowed) at different offsets and searches for more alignments. A read is considered
+                         to have repetitive seeds if the total number of seed hits divided by the number of seeds
+                         that aligned at least once is greater than 300. Default: 2.
+
+
+Bowtie 2 Scoring options::
+
+  --score_min "func"     Sets a function governing the minimum alignment score needed for an alignment to be considered
+                         "valid" (i.e. good enough to report). This is a function of read length. For instance, specifying
+                         L,0,-0.2 sets the minimum-score function f to f(x) = 0 + -0.2 * x, where x is the read length.
+                         See also: setting function options at http://bowtie-bio.sourceforge.net/bowtie2. The default is
+                         L,0,-0.2.
+
+
+Bowtie 2 Reporting options::
+
+ --most_valid_alignments INT This used to be the Bowtie 2 parameter -M. As of Bowtie 2 version 2.0.0 beta7 the option -M is
+                         deprecated. It will be removed in subsequent versions. What used to be called -M mode is still the
+                         default mode, but adjusting the -M setting is deprecated.  Use the -D and -R options to adjust the
+                         effort expended to find valid alignments.
+
+                         For reference, this used to be the old (now deprecated) description of -M:
+                         Bowtie 2 searches for at most INT+1 distinct, valid alignments for each read. The search terminates when it
+                         can't find more distinct valid alignments, or when it finds INT+1 distinct alignments, whichever
+                         happens first. Only the best alignment is reported. Information from the other alignments is used to
+                         estimate mapping quality and to set SAM optional fields, such as AS:i and XS:i. Increasing -M makes
+                         Bowtie 2 slower, but increases the likelihood that it will pick the correct alignment for a read that
+                         aligns many places. For reads that have more than INT+1 distinct, valid alignments, Bowtie 2 does not
+                         guarantee that the alignment reported is the best possible in terms of alignment score. -M is
+                         always used and its default value is set to 10.
+
+  </help>
+</tool>