# HG changeset patch
# User bgruening
# Date 1493058607 14400
# Node ID 80cd83b11214401e0bd26e774dab095020dbe1fe
# Parent b4e39d993fc8ce7506fc01d828e1d430c03cf070
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 78bee2b2efd36fe9399ce574159fc007cb6bdfbf
diff -r b4e39d993fc8 -r 80cd83b11214 test-data/paired_collection_example_results3.txt
--- a/test-data/paired_collection_example_results3.txt Thu Apr 20 09:14:30 2017 -0400
+++ b/test-data/paired_collection_example_results3.txt Mon Apr 24 14:30:07 2017 -0400
@@ -3,8 +3,8 @@
==========================
Input filename: input_1.fastq
Trimming mode: paired-end
-Trim Galore version: 0.4.0
-Cutadapt version: 1.8
+Trim Galore version: 0.4.3
+Cutadapt version: 1.13
Quality Phred score cutoff: 20
Quality encoding type selected: ASCII+33
Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected)
@@ -15,10 +15,10 @@
Length cut-off for read 2: 35 bp (default)
-This is cutadapt 1.8 with Python 3.5.3
+This is cutadapt 1.13 with Python 3.5.3
Command line parameters: -f fastq -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA input_1.fastq
-Trimming 1 adapter(s) with at most 10.0% errors in single-end mode ...
-Finished in 0.10 s (1010 us/read; 0.06 M reads/minute).
+Trimming 1 adapter with at most 10.0% errors in single-end mode ...
+Finished in 0.01 s (101 us/read; 0.59 M reads/minute).
=== Summary ===
@@ -85,8 +85,8 @@
==========================
Input filename: input_2.fastq
Trimming mode: paired-end
-Trim Galore version: 0.4.0
-Cutadapt version: 1.8
+Trim Galore version: 0.4.3
+Cutadapt version: 1.13
Quality Phred score cutoff: 20
Quality encoding type selected: ASCII+33
Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected)
@@ -97,10 +97,10 @@
Length cut-off for read 2: 35 bp (default)
-This is cutadapt 1.8 with Python 3.5.3
+This is cutadapt 1.13 with Python 3.5.3
Command line parameters: -f fastq -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA input_2.fastq
-Trimming 1 adapter(s) with at most 10.0% errors in single-end mode ...
-Finished in 0.10 s (1000 us/read; 0.06 M reads/minute).
+Trimming 1 adapter with at most 10.0% errors in single-end mode ...
+Finished in 0.01 s (100 us/read; 0.60 M reads/minute).
=== Summary ===
diff -r b4e39d993fc8 -r 80cd83b11214 test-data/paired_collection_example_results3gz.txt
--- a/test-data/paired_collection_example_results3gz.txt Thu Apr 20 09:14:30 2017 -0400
+++ b/test-data/paired_collection_example_results3gz.txt Mon Apr 24 14:30:07 2017 -0400
@@ -3,8 +3,8 @@
==========================
Input filename: input_1.fastq.gz
Trimming mode: paired-end
-Trim Galore version: 0.4.0
-Cutadapt version: 1.8
+Trim Galore version: 0.4.3
+Cutadapt version: 1.13
Quality Phred score cutoff: 20
Quality encoding type selected: ASCII+33
Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected)
@@ -16,10 +16,10 @@
Output file will be GZIP compressed
-This is cutadapt 1.8 with Python 3.5.3
+This is cutadapt 1.13 with Python 3.5.3
Command line parameters: -f fastq -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA input_1.fastq.gz
-Trimming 1 adapter(s) with at most 10.0% errors in single-end mode ...
-Finished in 0.10 s (1010 us/read; 0.06 M reads/minute).
+Trimming 1 adapter with at most 10.0% errors in single-end mode ...
+Finished in 0.01 s (101 us/read; 0.59 M reads/minute).
=== Summary ===
@@ -86,8 +86,8 @@
==========================
Input filename: input_2.fastq.gz
Trimming mode: paired-end
-Trim Galore version: 0.4.0
-Cutadapt version: 1.8
+Trim Galore version: 0.4.3
+Cutadapt version: 1.13
Quality Phred score cutoff: 20
Quality encoding type selected: ASCII+33
Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected)
@@ -99,10 +99,10 @@
Output file will be GZIP compressed
-This is cutadapt 1.8 with Python 3.5.3
+This is cutadapt 1.13 with Python 3.5.3
Command line parameters: -f fastq -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA input_2.fastq.gz
-Trimming 1 adapter(s) with at most 10.0% errors in single-end mode ...
-Finished in 0.10 s (1000 us/read; 0.06 M reads/minute).
+Trimming 1 adapter with at most 10.0% errors in single-end mode ...
+Finished in 0.01 s (100 us/read; 0.60 M reads/minute).
=== Summary ===
diff -r b4e39d993fc8 -r 80cd83b11214 test-data/paired_example_results2.txt
--- a/test-data/paired_example_results2.txt Thu Apr 20 09:14:30 2017 -0400
+++ b/test-data/paired_example_results2.txt Mon Apr 24 14:30:07 2017 -0400
@@ -3,8 +3,8 @@
==========================
Input filename: input_1.fastq
Trimming mode: paired-end
-Trim Galore version: 0.4.0
-Cutadapt version: 1.8
+Trim Galore version: 0.4.3
+Cutadapt version: 1.13
Quality Phred score cutoff: 20
Quality encoding type selected: ASCII+33
Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected)
@@ -13,10 +13,10 @@
Minimum required sequence length for both reads before a sequence pair gets removed: 20 bp
-This is cutadapt 1.8 with Python 3.5.3
+This is cutadapt 1.13 with Python 3.5.3
Command line parameters: -f fastq -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA input_1.fastq
-Trimming 1 adapter(s) with at most 10.0% errors in single-end mode ...
-Finished in 0.10 s (1010 us/read; 0.06 M reads/minute).
+Trimming 1 adapter with at most 10.0% errors in single-end mode ...
+Finished in 0.01 s (101 us/read; 0.59 M reads/minute).
=== Summary ===
@@ -83,8 +83,8 @@
==========================
Input filename: input_2.fastq
Trimming mode: paired-end
-Trim Galore version: 0.4.0
-Cutadapt version: 1.8
+Trim Galore version: 0.4.3
+Cutadapt version: 1.13
Quality Phred score cutoff: 20
Quality encoding type selected: ASCII+33
Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected)
@@ -93,10 +93,10 @@
Minimum required sequence length for both reads before a sequence pair gets removed: 20 bp
-This is cutadapt 1.8 with Python 3.5.3
+This is cutadapt 1.13 with Python 3.5.3
Command line parameters: -f fastq -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA input_2.fastq
-Trimming 1 adapter(s) with at most 10.0% errors in single-end mode ...
-Finished in 0.10 s (1000 us/read; 0.06 M reads/minute).
+Trimming 1 adapter with at most 10.0% errors in single-end mode ...
+Finished in 0.01 s (100 us/read; 0.60 M reads/minute).
=== Summary ===
diff -r b4e39d993fc8 -r 80cd83b11214 test-data/paired_example_results2gz.txt
--- a/test-data/paired_example_results2gz.txt Thu Apr 20 09:14:30 2017 -0400
+++ b/test-data/paired_example_results2gz.txt Mon Apr 24 14:30:07 2017 -0400
@@ -3,8 +3,8 @@
==========================
Input filename: input_1.fastq.gz
Trimming mode: paired-end
-Trim Galore version: 0.4.0
-Cutadapt version: 1.8
+Trim Galore version: 0.4.3
+Cutadapt version: 1.13
Quality Phred score cutoff: 20
Quality encoding type selected: ASCII+33
Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected)
@@ -14,10 +14,10 @@
Output file will be GZIP compressed
-This is cutadapt 1.8 with Python 3.5.3
+This is cutadapt 1.13 with Python 3.5.3
Command line parameters: -f fastq -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA input_1.fastq.gz
-Trimming 1 adapter(s) with at most 10.0% errors in single-end mode ...
-Finished in 0.10 s (1010 us/read; 0.06 M reads/minute).
+Trimming 1 adapter with at most 10.0% errors in single-end mode ...
+Finished in 0.01 s (101 us/read; 0.59 M reads/minute).
=== Summary ===
@@ -84,8 +84,8 @@
==========================
Input filename: input_2.fastq.gz
Trimming mode: paired-end
-Trim Galore version: 0.4.0
-Cutadapt version: 1.8
+Trim Galore version: 0.4.3
+Cutadapt version: 1.13
Quality Phred score cutoff: 20
Quality encoding type selected: ASCII+33
Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected)
@@ -95,10 +95,10 @@
Output file will be GZIP compressed
-This is cutadapt 1.8 with Python 3.5.3
+This is cutadapt 1.13 with Python 3.5.3
Command line parameters: -f fastq -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA input_2.fastq.gz
-Trimming 1 adapter(s) with at most 10.0% errors in single-end mode ...
-Finished in 0.10 s (1000 us/read; 0.06 M reads/minute).
+Trimming 1 adapter with at most 10.0% errors in single-end mode ...
+Finished in 0.01 s (100 us/read; 0.60 M reads/minute).
=== Summary ===
diff -r b4e39d993fc8 -r 80cd83b11214 test-data/sanger_full_range_report_results1.txt
--- a/test-data/sanger_full_range_report_results1.txt Thu Apr 20 09:14:30 2017 -0400
+++ b/test-data/sanger_full_range_report_results1.txt Mon Apr 24 14:30:07 2017 -0400
@@ -3,8 +3,8 @@
==========================
Input filename: input_1.fastq
Trimming mode: single-end
-Trim Galore version: 0.4.0
-Cutadapt version: 1.8
+Trim Galore version: 0.4.3
+Cutadapt version: 1.13
Quality Phred score cutoff: 20
Quality encoding type selected: ASCII+33
Adapter sequence: 'AGATCGGAAGAGC' (Illumina TruSeq, Sanger iPCR; default (inconclusive auto-detection))
@@ -13,10 +13,10 @@
Minimum required sequence length before a sequence gets removed: 20 bp
-This is cutadapt 1.8 with Python 3.5.3
+This is cutadapt 1.13 with Python 3.5.3
Command line parameters: -f fastq -e 0.1 -q 20 -O 1 -a AGATCGGAAGAGC input_1.fastq
-Trimming 1 adapter(s) with at most 10.0% errors in single-end mode ...
-Finished in 0.10 s (50000 us/read; 0.00 M reads/minute).
+Trimming 1 adapter with at most 10.0% errors in single-end mode ...
+Finished in 0.01 s (5000 us/read; 0.01 M reads/minute).
=== Summary ===
diff -r b4e39d993fc8 -r 80cd83b11214 test-data/sanger_full_range_report_results1gz.txt
--- a/test-data/sanger_full_range_report_results1gz.txt Thu Apr 20 09:14:30 2017 -0400
+++ b/test-data/sanger_full_range_report_results1gz.txt Mon Apr 24 14:30:07 2017 -0400
@@ -3,8 +3,8 @@
==========================
Input filename: input_1.fastq.gz
Trimming mode: single-end
-Trim Galore version: 0.4.0
-Cutadapt version: 1.8
+Trim Galore version: 0.4.3
+Cutadapt version: 1.13
Quality Phred score cutoff: 20
Quality encoding type selected: ASCII+33
Adapter sequence: 'AGATCGGAAGAGC' (Illumina TruSeq, Sanger iPCR; default (inconclusive auto-detection))
@@ -14,10 +14,10 @@
Output file will be GZIP compressed
-This is cutadapt 1.8 with Python 3.5.3
+This is cutadapt 1.13 with Python 3.5.3
Command line parameters: -f fastq -e 0.1 -q 20 -O 1 -a AGATCGGAAGAGC input_1.fastq.gz
-Trimming 1 adapter(s) with at most 10.0% errors in single-end mode ...
-Finished in 0.10 s (50000 us/read; 0.00 M reads/minute).
+Trimming 1 adapter with at most 10.0% errors in single-end mode ...
+Finished in 0.01 s (5000 us/read; 0.01 M reads/minute).
=== Summary ===
diff -r b4e39d993fc8 -r 80cd83b11214 tool_dependencies.xml
--- a/tool_dependencies.xml Thu Apr 20 09:14:30 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-
-
-
-
-
-
diff -r b4e39d993fc8 -r 80cd83b11214 trim_galore
--- a/trim_galore Thu Apr 20 09:14:30 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1609 +0,0 @@
-#!/usr/bin/perl
-use strict;
-use warnings;
-use Getopt::Long;
-use IPC::Open3;
-use File::Spec;
-use File::Basename;
-use Cwd;
-
-## This program is Copyright (C) 2012-14, Felix Krueger (felix.krueger@babraham.ac.uk)
-
-## This program is free software: you can redistribute it and/or modify
-## it under the terms of the GNU General Public License as published by
-## the Free Software Foundation, either version 3 of the License, or
-## (at your option) any later version.
-
-## This program is distributed in the hope that it will be useful,
-## but WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-## GNU General Public License for more details.
-
-## You should have received a copy of the GNU General Public License
-## along with this program. If not, see .
-
-
-## this script is taking in FastQ sequences and trims them using Cutadapt
-
-## last modified on 01 May 2015
-
-my $DOWARN = 1; # print on screen warning and text by default
-BEGIN { $SIG{'__WARN__'} = sub { warn $_[0] if $DOWARN } };
-
-my $trimmer_version = '0.4.0';
-
-
-my ($cutoff,$adapter,$stringency,$rrbs,$length_cutoff,$keep,$fastqc,$non_directional,$phred_encoding,$fastqc_args,$trim,$gzip,$validate,$retain,$length_read_1,$length_read_2,$a2,$error_rate,$output_dir,$no_report_file,$dont_gzip,$clip_r1,$clip_r2,$three_prime_clip_r1,$three_prime_clip_r2,$nextera,$small_rna,$path_to_cutadapt,$illumina) = process_commandline();
-
-my @filenames = @ARGV;
-die "\nPlease provide the filename(s) of one or more FastQ file(s) to launch Trim Galore!\n
-USAGE: 'trim_galore [options] ' or 'trim_galore --help' for more options\n\n" unless (@filenames);
-file_sanity_check($filenames[0]);
-
-
-########################################################################
-
-my $path_to_fastqc = 'fastqc';
-
-# Before we start let's have quick look if Cutadapt seems to be working with the path information provided
-# To change the path to Cutadapt use --path_to_cutadapt /full/path/to/the/Cutadapt/executable
-
-if(defined $path_to_cutadapt){
- warn "Path to Cutadapt set as: '$path_to_cutadapt' (user defined)\n";
- # we'll simply use this
-}
-else{
- $path_to_cutadapt = 'cutadapt'; # default, assuming it is in the PATH
- warn "Path to Cutadapt set as: '$path_to_cutadapt' (default)\n";
-}
-my $cutadapt_version;
-my $return = system "$path_to_cutadapt --version"; #>/dev/null 2>&1";
-if ($return == -1){
- die "Failed to execute Cutadapt porperly. Please install Cutadapt first and make sure it is in the PATH, or specify the path to the Cutadapt executable using --path_to_cutadapt /path/to/cutadapt\n\n";
-}
-else{
- warn "Cutadapt seems to be working fine (tested command '$path_to_cutadapt --version')\n";
- $cutadapt_version = `$path_to_cutadapt --version`;
- chomp $cutadapt_version;
- # warn "Cutadapt version: $cutadapt_version\n";
-}
-
-
-########################################################################
-
-sub autodetect_adapter_type{
- warn "\n\nAUTO-DETECTING ADAPTER TYPE\n===========================\n";
- warn "Attempting to auto-detect adapter type from the first 1 million sequences of the first file (>> $ARGV[0] <<)\n\n";
-
- if ($ARGV[0] =~ /gz$/){
- open (AUTODETECT,"zcat $ARGV[0] |") or die "Failed to read from file $ARGV[0]\n";
- }
- else{
- open (AUTODETECT,$ARGV[0]) or die "Failed to read from file $ARGV[0]\n";
- }
-
- my %adapters;
-
- $adapters{'Illumina'} -> {seq} = 'AGATCGGAAGAGC';
- $adapters{'Illumina'} -> {count}= 0;
- $adapters{'Illumina'} -> {name}= 'Illumina TruSeq, Sanger iPCR; auto-detected';
-
- $adapters{'Nextera'} -> {seq} = 'CTGTCTCTTATA';
- $adapters{'Nextera'} -> {count}= 0;
- $adapters{'Nextera'} -> {name}= 'Nextera Transposase sequence; auto-detected';
-
- $adapters{'smallRNA'} -> {seq} = 'ATGGAATTCTCG';
- $adapters{'smallRNA'} -> {count}= 0;
- $adapters{'smallRNA'} -> {name}= 'Illumina small RNA adapter; auto-detected';
-
-
- # we will read the first 1 million sequences, or until the end of the file whatever comes first, and then use the adapter that for trimming which was found to occcur most often
- my $count = 0;
- while (1){
-
- my $line1 = ;
- my $line2 = ;
- my $line3 = ;
- my $line4 = ;
- last unless ($line4);
- $count++;
- last if ($count == 1000000);
-
- chomp $line2;
- $adapters{'Illumina'}->{count}++ unless (index($line2,'AGATCGGAAGAGC')== -1);
- $adapters{'Nextera'} ->{count}++ unless (index($line2,'CTGTCTCTTATA') == -1);
- $adapters{'smallRNA'}->{count}++ unless (index($line2,'ATGGAATTCTCG') == -1);
-
- }
-
- my $highest;
- my $second;
- my $seq;
- my $adapter_name;
-
- warn "Found perfect matches for the following adapter sequences:\nAdapter type\tCount\tSequence\tSequences analysed\tPercentage\n";
- foreach my $adapter (sort {$adapters{$b}->{count}<=>$adapters{$a}->{count}} keys %adapters){
-
- my $percentage = sprintf("%.2f",$adapters{$adapter}->{count}/$count*100);
-
- warn "$adapter\t$adapters{$adapter}->{count}\t$adapters{$adapter}->{seq}\t$count\t$percentage\n";
-
- unless (defined $highest){
- $highest = $adapter;
- $seq = $adapters{$adapter}->{seq};
- $adapter_name = $adapters{$adapter}->{name};
- next;
- }
- unless (defined $second){
- $second = $adapter;
- }
- }
-
-
- # using the highest occurrence as adapter to look out for
- if ($adapters{$highest}->{count} == $adapters{$second}->{count}){
- warn "Unable to auto-detect most prominent adapter from the first specified file (count $highest: $adapters{$highest}->{count}, count $second: $adapters{$second}->{second})\n";
-
- if ($adapters{$highest}->{count} == 0){
- warn "Defaulting to Illumina universal adapter ( AGATCGGAAGAGC ). Specify -a SEQUENCE to avoid this behavior).\n\n";
- $adapter_name = 'Illumina TruSeq, Sanger iPCR; default (inconclusive auto-detection)';
- $seq = 'AGATCGGAAGAGC';
- }
- else{
- warn "Using $highest adapter for trimming (count: $adapters{$highest}->{count}). Second best hit was $second (count: $adapters{$second}->{count})\n\n";
- }
- }
- else{
- warn "Using $highest adapter for trimming (count: $adapters{$highest}->{count}). Second best hit was $second (count: $adapters{$second}->{count})\n\n";
- }
-
- close AUTODETECT;
-
- return ($seq,$adapter_name);
-
-}
-
-
-
-### SETTING DEFAULTS UNLESS THEY WERE SPECIFIED
-unless (defined $cutoff){
- $cutoff = 20;
-}
-my $phred_score_cutoff = $cutoff; # only relevant for report
-my $adapter_name = '';
-unless (defined $adapter){
- if ($nextera){
- $adapter = 'CTGTCTCTTATA';
- $adapter_name = 'Nextera Transposase sequence; user defined';
- }
- elsif($small_rna){
- $adapter = 'ATGGAATTCTCG';
- $adapter_name = 'Illumina small RNA adapter; user defined';
- }
- elsif($illumina){
- $adapter = 'AGATCGGAAGAGC';
- $adapter_name = 'Illumina TruSeq, Sanger iPCR; user defined';
- }
- else{ # default
- ($adapter,$adapter_name) = autodetect_adapter_type();
- }
-}
-unless (defined $a2){ # optional adapter for the second read in a pair. Only works for --paired trimming
- $a2 = '';
-}
-
-unless (defined $stringency){
- $stringency = 1;
-}
-
-if ($phred_encoding == 64){
- $cutoff += 31;
-}
-
-my $file_1;
-my $file_2;
-
-foreach my $filename (@ARGV){
- trim ($filename);
-}
-
-
-sub trim{
- my $filename = shift;
-
- my $output_filename = (split (/\//,$filename))[-1];
-
- my $report = $output_filename;
- $report =~ s/$/_trimming_report.txt/;
-
- if ($no_report_file) {
- $report = File::Spec->devnull;
- open (REPORT,'>',$report) or die "Failed to write to file '$report': $!\n";
- # warn "Redirecting report output to /dev/null\n";
- }
- else{
- open (REPORT,'>',$output_dir.$report) or die "Failed to write to file '$report': $!\n";
- warn "Writing report to '$output_dir$report'\n";
- }
-
- warn "\nSUMMARISING RUN PARAMETERS\n==========================\nInput filename: $filename\n";
- print REPORT "\nSUMMARISING RUN PARAMETERS\n==========================\nInput filename: $filename\n";
-
- if ($validate){ # paired-end mode
- warn "Trimming mode: paired-end\n";
- print REPORT "Trimming mode: paired-end\n";
- }
- else{
- warn "Trimming mode: single-end\n";
- print REPORT "Trimming mode: single-end\n";
- }
-
-
- warn "Trim Galore version: $trimmer_version\n";
- print REPORT "Trim Galore version: $trimmer_version\n";
-
- warn "Cutadapt version: $cutadapt_version\n";
- print REPORT "Cutadapt version: $cutadapt_version\n";
-
- warn "Quality Phred score cutoff: $phred_score_cutoff\n";
- print REPORT "Quality Phred score cutoff: $phred_score_cutoff\n";
-
- warn "Quality encoding type selected: ASCII+$phred_encoding\n";
- print REPORT "Quality encoding type selected: ASCII+$phred_encoding\n";
-
- warn "Adapter sequence: '$adapter' ($adapter_name)\n";
- print REPORT "Adapter sequence: '$adapter' ($adapter_name)\n";
-
- if ($error_rate == 0.1){
- warn "Maximum trimming error rate: $error_rate (default)\n";
- }
- else{
- warn "Maximum trimming error rate: $error_rate\n";
- }
-
- print REPORT "Maximum trimming error rate: $error_rate";
- if ($error_rate == 0.1){
- print REPORT " (default)\n";
- }
- else{
- print REPORT "\n";
- }
-
- if ($a2){
- warn "Optional adapter 2 sequence (only used for read 2 of paired-end files): '$a2'\n";
- print REPORT "Optional adapter 2 sequence (only used for read 2 of paired-end files): '$a2'\n";
- }
-
- warn "Minimum required adapter overlap (stringency): $stringency bp\n";
- print REPORT "Minimum required adapter overlap (stringency): $stringency bp\n";
-
- if ($validate){
- warn "Minimum required sequence length for both reads before a sequence pair gets removed: $length_cutoff bp\n";
- print REPORT "Minimum required sequence length for both reads before a sequence pair gets removed: $length_cutoff bp\n";
- }
- else{
- warn "Minimum required sequence length before a sequence gets removed: $length_cutoff bp\n";
- print REPORT "Minimum required sequence length before a sequence gets removed: $length_cutoff bp\n";
- }
-
- if ($validate){ # only for paired-end files
-
- if ($retain){ # keeping single-end reads if only one end is long enough
-
- if ($length_read_1 == 35){
- warn "Length cut-off for read 1: $length_read_1 bp (default)\n";
- print REPORT "Length cut-off for read 1: $length_read_1 bp (default)\n";
- }
- else{
- warn "Length cut-off for read 1: $length_read_1 bp\n";
- print REPORT "Length cut-off for read 1: $length_read_1 bp\n";
- }
-
- if ($length_read_2 == 35){
- warn "Length cut-off for read 2: $length_read_2 bb (default)\n";
- print REPORT "Length cut-off for read 2: $length_read_2 bp (default)\n";
- }
- else{
- warn "Length cut-off for read 2: $length_read_2 bp\n";
- print REPORT "Length cut-off for read 2: $length_read_2 bp\n";
- }
- }
- }
-
- if ($rrbs){
- warn "File was specified to be an MspI-digested RRBS sample. Sequences with adapter contamination will be trimmed a further 2 bp to remove potential methylation-biased bases from the end-repair reaction\n";
- print REPORT "File was specified to be an MspI-digested RRBS sample. Sequences with adapter contamination will be trimmed a further 2 bp to remove potential methylation-biased bases from the end-repair reaction\n";
- }
-
- if ($non_directional){
- warn "File was specified to be a non-directional MspI-digested RRBS sample. Sequences starting with either 'CAA' or 'CGA' will have the first 2 bp trimmed off to remove potential methylation-biased bases from the end-repair reaction\n";
- print REPORT "File was specified to be a non-directional MspI-digested RRBS sample. Sequences starting with either 'CAA' or 'CGA' will have the first 2 bp trimmed off to remove potential methylation-biased bases from the end-repair reaction\n";
- }
-
- if ($trim){
- warn "All sequences will be trimmed by 1 bp on their 3' end to avoid problems with invalid paired-end alignments with Bowtie 1\n";
- print REPORT "All sequences will be trimmed by 1 bp on their 3' end to avoid problems with invalid paired-end alignments with Bowtie 1\n";
- }
-
- if ($clip_r1){
- warn "All Read 1 sequences will be trimmed by $clip_r1 bp from their 5' end to avoid poor qualities or biases\n";
- print REPORT "All Read 1 sequences will be trimmed by $clip_r1 bp from their 5' end to avoid poor qualities or biases\n";
- }
- if ($clip_r2){
- warn "All Read 2 sequences will be trimmed by $clip_r2 bp from their 5' end to avoid poor qualities or biases (e.g. M-bias for BS-Seq applications)\n";
- print REPORT "All Read 2 sequences will be trimmed by $clip_r2 bp from their 5' end to avoid poor qualities or biases (e.g. M-bias for BS-Seq applications)\n";
- }
-
- if ($three_prime_clip_r1){
- warn "All Read 1 sequences will be trimmed by $three_prime_clip_r1 bp from their 3' end to avoid poor qualities or biases\n";
- print REPORT "All Read 1 sequences will be trimmed by $three_prime_clip_r1 bp from their 3' end to avoid poor qualities or biases\n";
- }
- if ($three_prime_clip_r2){
- warn "All Read 2 sequences will be trimmed by $three_prime_clip_r2 bp from their 3' end to avoid poor qualities or biases\n";
- print REPORT "All Read 2 sequences will be trimmed by $three_prime_clip_r2 bp from their 3' end to avoid poor qualities or biases\n";
- }
-
- if ($fastqc){
- warn "Running FastQC on the data once trimming has completed\n";
- print REPORT "Running FastQC on the data once trimming has completed\n";
-
- if ($fastqc_args){
- warn "Running FastQC with the following extra arguments: '$fastqc_args'\n";
- print REPORT "Running FastQC with the following extra arguments: $fastqc_args\n";
- }
- }
-
- if ($keep and $rrbs){
- warn "Keeping quality trimmed (but not yet adapter trimmed) intermediate FastQ file\n";
- print REPORT "Keeping quality trimmed (but not yet adapter trimmed) intermediate FastQ file\n";
- }
-
-
- if ($gzip or $filename =~ /\.gz$/){
- $gzip = 1;
- unless ($dont_gzip){
- warn "Output file(s) will be GZIP compressed\n";
- print REPORT "Output file will be GZIP compressed\n";
- }
- }
-
- warn "\n";
- print REPORT "\n";
- sleep (3);
-
- my $temp;
-
- ### Proceeding differently for RRBS and other type of libraries
- if ($rrbs){
-
- ### Skipping quality filtering for RRBS libraries if a quality cutoff of 0 was specified
- if ($cutoff == 0){
- warn "Quality cutoff selected was 0 - Skipping quality trimming altogether\n\n";
- sleep (3);
- }
- else{
-
- $temp = $filename;
- $temp =~ s/^.*\///; # replacing optional file path information
- $temp =~ s/$/_qual_trimmed.fastq/;
- open (TEMP,'>',$output_dir.$temp) or die "Can't write to '$temp': $!";
-
- warn " >>> Now performing adaptive quality trimming with a Phred-score cutoff of: $cutoff <<<\n\n";
- sleep (3);
-
- open (QUAL,"$path_to_cutadapt -f fastq -e $error_rate -q $cutoff -a X $filename |") or die "Can't open pipe to Cutadapt: $!";
-
- my $qual_count = 0;
-
- while (1){
- my $l1 = ;
- my $seq = ;
- my $l3 = ;
- my $qual = ;
- last unless (defined $qual);
-
- $qual_count++;
- if ($qual_count%10000000 == 0){
- warn "$qual_count sequences processed\n";
- }
- print TEMP "$l1$seq$l3$qual";
- }
-
- warn "\n >>> Quality trimming completed <<<\n$qual_count sequences processed in total\n\n";
- close QUAL or die "Unable to close QUAL filehandle: $!\n";
- sleep (3);
-
- }
- }
-
-
- if ($output_filename =~ /\.fastq$/){
- $output_filename =~ s/\.fastq$/_trimmed.fq/;
- }
- elsif ($output_filename =~ /\.fastq\.gz$/){
- $output_filename =~ s/\.fastq\.gz$/_trimmed.fq/;
- }
- elsif ($output_filename =~ /\.fq$/){
- $output_filename =~ s/\.fq$/_trimmed.fq/;
- }
- elsif ($output_filename =~ /\.fq\.gz$/){
- $output_filename =~ s/\.fq\.gz$/_trimmed.fq/;
- }
- else{
- $output_filename =~ s/$/_trimmed.fq/;
- }
-
- if ($gzip or $filename =~ /\.gz$/){
- if ($dont_gzip){
- open (OUT,'>',$output_dir.$output_filename) or die "Can't open '$output_filename': $!\n"; # don't need to gzip intermediate file
- }
- else{
- ### 6 Jan 2014: had a request to also gzip intermediate files to save disk space
- # if ($validate){
- # open (OUT,'>',$output_dir.$output_filename) or die "Can't open '$output_filename': $!\n"; # don't need to gzip intermediate file
- # }
- $output_filename .= '.gz';
- open (OUT,"| gzip -c - > ${output_dir}${output_filename}") or die "Can't write to '$output_filename': $!\n";
- }
- }
- else{
- open (OUT,'>',$output_dir.$output_filename) or die "Can't open '$output_filename': $!\n";
- }
- warn "Writing final adapter and quality trimmed output to $output_filename\n\n";
-
- my $count = 0;
- my $too_short = 0;
- my $quality_trimmed = 0;
- my $rrbs_trimmed = 0;
- my $rrbs_trimmed_start = 0;
- my $CAA = 0;
- my $CGA = 0;
-
- my $pid;
-
- if ($rrbs and $cutoff != 0){
-
- ### optionally using 2 different adapters for read 1 and read 2
- if ($validate and $a2){
- ### Figure out whether current file counts as read 1 or read 2 of paired-end files
- if ( scalar(@filenames)%2 == 0){ # this is read 1 of a pair
- warn "\n >>> Now performing adapter trimming for the adapter sequence: '$adapter' from file $temp <<< \n";
- sleep (3);
- $pid = open3 (\*WRITER, \*TRIM, \*ERROR,"$path_to_cutadapt -f fastq -e $error_rate -O $stringency -a $adapter $output_dir$temp") or die "Failed to launch Cutadapt: $!\n";
- }
- else{ # this is read 2 of a pair
- warn "\n >>> Now performing adapter trimming for the adapter sequence: '$a2' from file $temp <<< \n";
- sleep (3);
- $pid = open3 (\*WRITER, \*TRIM, \*ERROR,"$path_to_cutadapt -f fastq -e $error_rate -O $stringency -a $a2 $output_dir$temp") or die "Failed to launch Cutadapt: $!\n";
- }
- }
- ### Using the same adapter for both read 1 and read 2
- else{
- warn "\n >>> Now performing adapter trimming for the adapter sequence: '$adapter' from file $temp <<< \n";
- sleep (3);
- $pid = open3 (\*WRITER, \*TRIM, \*ERROR,"$path_to_cutadapt -f fastq -e $error_rate -O $stringency -a $adapter $output_dir$temp") or die "Failed to launch Cutadapt: $!\n";
- }
-
- close WRITER or die $!; # not needed
-
- open (QUAL,"$output_dir$temp") or die $!; # quality trimmed file
-
- if ($filename =~ /\.gz$/){
- open (IN,"zcat $filename |") or die $!; # original, untrimmed file
- }
- else{
- open (IN,$filename) or die $!; # original, untrimmed file
- }
-
- while (1){
-
- # we can process the output from Cutadapt and the original input 1 by 1 to decide if the adapter has been removed or not
- my $l1 = ;
- my $seq = ; # adapter trimmed sequence
- my $l3 = ;
- my $qual = ;
-
- $_ = ; # irrelevant
- my $original_seq = ;
- $_ = ; # irrelevant
- $_ = ; # irrelevant
-
- $_ = ; # irrelevant
- my $qual_trimmed_seq = ;
- $_ = ; # irrelevant
- my $qual_trimmed_qual = ;
-
- last unless (defined $qual and defined $qual_trimmed_qual); # could be empty strings
-
- $count++;
- if ($count%10000000 == 0){
- warn "$count sequences processed\n";
- }
-
- chomp $seq;
- chomp $qual;
- chomp $qual_trimmed_seq;
- chomp $original_seq;
-
- my $quality_trimmed_seq_length = length $qual_trimmed_seq;
-
- if (length $original_seq > length $qual_trimmed_seq){
- ++$quality_trimmed;
- }
-
- my $nd = 0;
-
- ### NON-DIRECTIONAL RRBS
- if ($non_directional){
- if (length$seq > 2){
- if ($seq =~ /^CAA/){
- ++$CAA;
- $seq = substr ($seq,2,length($seq)-2);
- $qual = substr ($qual,2,length($qual)-2);
- ++$rrbs_trimmed_start;
- $nd = 1;
- }
- elsif ($seq =~ /^CGA/){
- $seq = substr ($seq,2,length($seq)-2);
- $qual = substr ($qual,2,length($qual)-2);
- ++$CGA;
- ++$rrbs_trimmed_start;
- $nd = 1;
- }
- }
- }
-
- ### directional read
- unless ($nd == 1){
- if (length $seq >= 2 and length$seq < $quality_trimmed_seq_length){
- $seq = substr ($seq,0,length($seq)-2);
- $qual = substr ($qual,0,length($qual)-2);
- ++$rrbs_trimmed;
- }
- }
-
- ### Shortening all sequences by 1 bp on the 3' end
- if ($trim){
- $seq = substr($seq,0,length($seq)-1);
- $qual = substr($qual,0,length($qual)-1);
- }
-
- ### PRINTING (POTENTIALLY TRIMMED) SEQUENCE
- if ($validate){ # printing the sequence without performing a length check (this is performed for the read pair separately later)
- print OUT "$l1$seq\n$l3$qual\n";
- }
- else{ # single end
-
- if ($clip_r1){
- if (length $seq > $clip_r1){ # sequences that are already too short won't be clipped again
- $seq = substr($seq,$clip_r1); # starting after the sequences to be trimmed until the end of the sequence
- $qual = substr($qual,$clip_r1);
- }
- }
-
- if ($three_prime_clip_r1){
-
- if (length $seq > $three_prime_clip_r1){ # sequences that are already too short won't be clipped again
- # warn "seq/qual before/after trimming:\n$seq\n$qual\n";
- $seq = substr($seq,0,(length($seq) - $three_prime_clip_r1)); # starting after the sequences to be trimmed until the end of the sequence
- $qual = substr($qual,0,(length($qual) - $three_prime_clip_r1 ));
- # warn "$seq\n$qual\n";
- }
-
- }
-
- if (length $seq < $length_cutoff){
- ++$too_short;
- next;
- }
- else{
- print OUT "$l1$seq\n$l3$qual\n";
- }
- }
- }
-
- print REPORT "\n";
- while (){
- warn $_;
- print REPORT $_;
- }
-
- close IN or die "Unable to close IN filehandle: $!";
- close QUAL or die "Unable to close QUAL filehandle: $!";
- close TRIM or die "Unable to close TRIM filehandle: $!";
- close OUT or die "Unable to close OUT filehandle: $!";
-
- }
- else{
-
- ### optionally using 2 different adapters for read 1 and read 2
- if ($validate and $a2){
- ### Figure out whether current file counts as read 1 or read 2 of paired-end files
- if ( scalar(@filenames)%2 == 0){ # this is read 1 of a pair
- warn "\n >>> Now performing quality (cutoff $cutoff) and adapter trimming in a single pass for the adapter sequence: '$adapter' from file $filename <<< \n";
- sleep (3);
- $pid = open3 (\*WRITER, \*TRIM, \*ERROR, "$path_to_cutadapt -f fastq -e $error_rate -q $cutoff -O $stringency -a $adapter $filename") or die "Failed to launch Cutadapt: $!";
- }
- else{ # this is read 2 of a pair
- warn "\n >>> Now performing quality (cutoff $cutoff) and adapter trimming in a single pass for the adapter sequence: '$a2' from file $filename <<< \n";
- sleep (3);
- $pid = open3 (\*WRITER, \*TRIM, \*ERROR, "$path_to_cutadapt -f fastq -e $error_rate -q $cutoff -O $stringency -a $a2 $filename") or die "Failed to launch Cutadapt: $!";
- }
- }
- ### Using the same adapter for both read 1 and read 2
- else{
- warn "\n >>> Now performing quality (cutoff $cutoff) and adapter trimming in a single pass for the adapter sequence: '$adapter' from file $filename <<< \n";
- sleep (3);
- $pid = open3 (\*WRITER, \*TRIM, \*ERROR, "$path_to_cutadapt -f fastq -e $error_rate -q $cutoff -O $stringency -a $adapter $filename") or die "Failed to launch Cutadapt: $!";
- }
-
- close WRITER or die $!; # not needed
-
- while (1){
-
- my $l1 = ;
- my $seq = ; # quality and/or adapter trimmed sequence
- my $l3 = ;
- my $qual = ;
- # print "$l1$seq\n$l3$qual\n";
- last unless (defined $qual); # could be an empty string
-
- $count++;
- if ($count%10000000 == 0){
- warn "$count sequences processed\n";
- }
-
- chomp $seq;
- chomp $qual;
-
- ### Shortening all sequences by 1 bp on the 3' end
- if ($trim){
- $seq = substr($seq,0,length($seq)-1);
- $qual = substr($qual,0,length($qual)-1);
- }
-
- ### PRINTING (POTENTIALLY TRIMMED) SEQUENCE
- if ($validate){ # printing the sequence without performing a length check (this is performed for the read pair separately later)
- print OUT "$l1$seq\n$l3$qual\n";
- }
- else{ # single end
-
- if ($clip_r1){
- if (length $seq > $clip_r1){ # sequences that are already too short won't be clipped again
- $seq = substr($seq,$clip_r1); # starting after the sequences to be trimmed until the end of the sequence
- $qual = substr($qual,$clip_r1);
- }
- }
-
- if ($three_prime_clip_r1){
- if (length $seq > $three_prime_clip_r1){ # sequences that are already too short won't be clipped again
- # warn "seq/qual before/after trimming:\n$seq\n$qual\n";
- $seq = substr($seq,0,(length($seq) - $three_prime_clip_r1)); # starting after the sequences to be trimmed until the end of the sequence
- $qual = substr($qual,0,(length($qual) - $three_prime_clip_r1));
- # warn "$seq\n$qual\n";sleep(1);
- }
- }
-
- if (length $seq < $length_cutoff){
- ++$too_short;
- next;
- }
- else{
- print OUT "$l1$seq\n$l3$qual\n";
- }
- }
- }
-
- print REPORT "\n";
- while (){
- warn $_;
- print REPORT $_;
- }
-
- close TRIM or die "Unable to close TRIM filehandle: $!\n";
- close ERROR or die "Unable to close ERROR filehandle: $!\n";
- close OUT or die "Unable to close OUT filehandle: $!\n";
-
- }
-
-
- if ($rrbs){
- unless ($keep){ # keeping the quality trimmed intermediate file for RRBS files
-
- # deleting temporary quality trimmed file
- my $deleted = unlink "$output_dir$temp";
-
- if ($deleted){
- warn "Successfully deleted temporary file $temp\n\n";
- }
- else{
- warn "Could not delete temporary file $temp";
- }
- }
- }
-
- ### Wait and reap the child process (Cutadapt) so that it doesn't become a zombie process
- waitpid $pid, 0;
- unless ($? == 0){
- die "\n\nCutadapt terminated with exit signal: '$?'.\nTerminating Trim Galore run, please check error message(s) to get an idea what went wrong...\n\n";
- }
-
- warn "\nRUN STATISTICS FOR INPUT FILE: $filename\n";
- print REPORT "\nRUN STATISTICS FOR INPUT FILE: $filename\n";
-
- warn "="x 45,"\n";
- print REPORT "="x 45,"\n";
-
- warn "$count sequences processed in total\n";
- print REPORT "$count sequences processed in total\n";
-
- ### only reporting this separately if quality and adapter trimming were performed separately
- if ($rrbs){
- my $percentage_shortened;
- if ($count){
- $percentage_shortened = sprintf ("%.1f",$quality_trimmed/$count*100);
- warn "Sequences were truncated to a varying degree because of deteriorating qualities (Phred score quality cutoff: $cutoff):\t$quality_trimmed ($percentage_shortened%)\n";
- print REPORT "Sequences were truncated to a varying degree because of deteriorating qualities (Phred score quality cutoff: $cutoff):\t$quality_trimmed ($percentage_shortened%)\n";
- }
- else{
- warn "Unable to determine percentage of reads that were shortened because 0 lines were processed\n\n";
- print REPORT "Unable to determine percentage of reads that were shortened because 0 lines were processed\n\n";
- }
- }
-
- my $percentage_too_short;
- if ($count){
- $percentage_too_short = sprintf ("%.1f",$too_short/$count*100);
- }
- else{
- $percentage_too_short = 'N/A';
- }
-
- if ($validate){ ### only for paired-end files
- warn "The length threshold of paired-end sequences gets evaluated later on (in the validation step)\n";
- }
- else{ ### Single-end file
- warn "Sequences removed because they became shorter than the length cutoff of $length_cutoff bp:\t$too_short ($percentage_too_short%)\n";
- print REPORT "Sequences removed because they became shorter than the length cutoff of $length_cutoff bp:\t$too_short ($percentage_too_short%)\n";
- }
-
- if ($rrbs){
- my $percentage_rrbs_trimmed = sprintf ("%.1f",$rrbs_trimmed/$count*100);
- warn "RRBS reads trimmed by additional 2 bp when adapter contamination was detected:\t$rrbs_trimmed ($percentage_rrbs_trimmed%)\n";
- print REPORT "RRBS reads trimmed by additional 2 bp when adapter contamination was detected:\t$rrbs_trimmed ($percentage_rrbs_trimmed%)\n";
- }
-
- if ($non_directional){
- my $percentage_rrbs_trimmed_at_start = sprintf ("%.1f",$rrbs_trimmed_start/$count*100);
- warn "RRBS reads trimmed by 2 bp at the start when read started with CAA ($CAA) or CGA ($CGA) in total:\t$rrbs_trimmed_start ($percentage_rrbs_trimmed_at_start%)\n";
- print REPORT "RRBS reads trimmed by 2 bp at the start when read started with CAA ($CAA) or CGA ($CGA) in total:\t$rrbs_trimmed_start ($percentage_rrbs_trimmed_at_start%)\n";
- }
-
- warn "\n";
- print REPORT "\n";
-
- ### RUNNING FASTQC unless we are dealing with paired-end files
- unless($validate){
- if ($fastqc){
- warn "\n >>> Now running FastQC on the data <<<\n\n";
- sleep (5);
- if ($fastqc_args){
- system ("$path_to_fastqc $fastqc_args $output_dir$output_filename");
- }
- else{
- system ("$path_to_fastqc $output_dir$output_filename");
- }
- }
- }
-
- ### VALIDATE PAIRED-END FILES
- if ($validate){
-
- ### Figure out whether current file counts as read 1 or read 2 of paired-end files
-
- if ( scalar(@filenames)%2 == 0){ # this is read 1 of a pair
- $file_1 = $output_filename;
- shift @filenames;
- # warn "This is read 1: $file_1\n\n";
- }
- else{ # this is read 2 of a pair
- $file_2 = $output_filename;
- shift @filenames;
- # warn "This is read 2: $file_2\n\n";
- }
-
- if ($file_1 and $file_2){
- warn "Validate paired-end files $file_1 and $file_2\n";
- sleep (1);
-
- my ($val_1,$val_2,$un_1,$un_2) = validate_paired_end_files($file_1,$file_2);
-
- ### RUNNING FASTQC
- if ($fastqc){
-
- warn "\n >>> Now running FastQC on the validated data $val_1<<<\n\n";
- sleep (3);
-
- if ($fastqc_args){
- system ("$path_to_fastqc $fastqc_args $output_dir$val_1");
- }
- else{
- system ("$path_to_fastqc $output_dir$val_1");
- }
-
- warn "\n >>> Now running FastQC on the validated data $val_2<<<\n\n";
- sleep (3);
-
- if ($fastqc_args){
- system ("$path_to_fastqc $fastqc_args $output_dir$val_2");
- }
- else{
- system ("$path_to_fastqc $output_dir$val_2");
- }
-
- }
-
- warn "Deleting both intermediate output files $file_1 and $file_2\n";
- unlink "$output_dir$file_1";
- unlink "$output_dir$file_2";
-
- warn "\n",'='x100,"\n\n";
- sleep (3);
-
- $file_1 = undef; # setting file_1 and file_2 to undef once validation is completed
- $file_2 = undef;
- }
- }
-
-}
-
-sub validate_paired_end_files{
-
- my $file_1 = shift;
- my $file_2 = shift;
-
- warn "file_1: $file_1, file_2: $file_2\n\n";
-
- if ($file_1 =~ /\.gz$/){
- open (IN1,"zcat $output_dir$file_1 |") or die "Couldn't read from file $file_1: $!\n";
- }
- else{
- open (IN1, "$output_dir$file_1") or die "Couldn't read from file $file_1: $!\n";
- }
-
- if ($file_2 =~ /\.gz$/){
- open (IN2,"zcat $output_dir$file_2 |") or die "Couldn't read from file $file_2: $!\n";
- }
- else{
- open (IN2, "$output_dir$file_2") or die "Couldn't read from file $file_2: $!\n";
- }
-
- warn "\n>>>>> Now validing the length of the 2 paired-end infiles: $file_1 and $file_2 <<<<<\n";
- sleep (3);
-
- my $out_1 = $file_1;
- my $out_2 = $file_2;
-
- if ($out_1 =~ /gz$/){
- $out_1 =~ s/trimmed\.fq\.gz$/val_1.fq/;
- }
- else{
- $out_1 =~ s/trimmed\.fq$/val_1.fq/;
- }
-
- if ($out_2 =~ /gz$/){
- $out_2 =~ s/trimmed\.fq\.gz$/val_2.fq/;
- }
- else{
- $out_2 =~ s/trimmed\.fq$/val_2.fq/;
- }
-
- if ($gzip){
- if ($dont_gzip){
- open (R1,'>',$output_dir.$out_1) or die "Couldn't write to $out_1 $!\n";
- }
- else{
- $out_1 .= '.gz';
- open (R1,"| gzip -c - > ${output_dir}${out_1}") or die "Can't write to $out_1: $!\n";
- }
- }
- else{
- open (R1,'>',$output_dir.$out_1) or die "Couldn't write to $out_1 $!\n";
- }
-
- if ($gzip){
- if ($dont_gzip){
- open (R2,'>',$output_dir.$out_2) or die "Couldn't write to $out_2 $!\n";
- }
- else{
- $out_2 .= '.gz';
- open (R2,"| gzip -c - > ${output_dir}${out_2}") or die "Can't write to $out_2: $!\n";
- }
- }
- else{
- open (R2,'>',$output_dir.$out_2) or die "Couldn't write to $out_2 $!\n";
- }
-
- warn "Writing validated paired-end read 1 reads to $out_1\n";
- warn "Writing validated paired-end read 2 reads to $out_2\n\n";
-
- my $unpaired_1;
- my $unpaired_2;
-
- if ($retain){
-
- $unpaired_1 = $file_1;
- $unpaired_2 = $file_2;
-
- if ($unpaired_1 =~ /gz$/){
- $unpaired_1 =~ s/trimmed\.fq\.gz$/unpaired_1.fq/;
- }
- else{
- $unpaired_1 =~ s/trimmed\.fq$/unpaired_1.fq/;
- }
-
- if ($unpaired_2 =~ /gz$/){
- $unpaired_2 =~ s/trimmed\.fq\.gz$/unpaired_2.fq/;
- }
- else{
- $unpaired_2 =~ s/trimmed\.fq$/unpaired_2.fq/;
- }
-
- if ($gzip){
- if ($dont_gzip){
- open (UNPAIRED1,'>',$output_dir.$unpaired_1) or die "Couldn't write to $unpaired_1: $!\n";
- }
- else{
- $unpaired_1 .= '.gz';
- open (UNPAIRED1,"| gzip -c - > ${output_dir}${unpaired_1}") or die "Can't write to $unpaired_1: $!\n";
- }
- }
- else{
- open (UNPAIRED1,'>',$output_dir.$unpaired_1) or die "Couldn't write to $unpaired_1: $!\n";
- }
-
- if ($gzip){
- if ($dont_gzip){
- open (UNPAIRED2,'>',$output_dir.$unpaired_2) or die "Couldn't write to $unpaired_2: $!\n";
- }
- else{
- $unpaired_2 .= '.gz';
- open (UNPAIRED2,"| gzip -c - > ${output_dir}${unpaired_2}") or die "Can't write to $unpaired_2: $!\n";
- }
- }
- else{
- open (UNPAIRED2,'>',$output_dir.$unpaired_2) or die "Couldn't write to $unpaired_2: $!\n";
- }
-
- warn "Writing unpaired read 1 reads to $unpaired_1\n";
- warn "Writing unpaired read 2 reads to $unpaired_2\n\n";
- }
-
- my $sequence_pairs_removed = 0;
- my $read_1_printed = 0;
- my $read_2_printed = 0;
-
- my $count = 0;
-
- while (1){
- my $id_1 = ;
- my $seq_1 = ;
- my $l3_1 = ;
- my $qual_1 = ;
- last unless ($id_1 and $seq_1 and $l3_1 and $qual_1);
-
- my $id_2 = ;
- my $seq_2 = ;
- my $l3_2 = ;
- my $qual_2 = ;
- last unless ($id_2 and $seq_2 and $l3_2 and $qual_2);
-
- ++$count;
-
-
- ## small check if the sequence files appear to be FastQ files
- if ($count == 1){ # performed just once
- if ($id_1 !~ /^\@/ or $l3_1 !~ /^\+/){
- die "Input file doesn't seem to be in FastQ format at sequence $count\n";
- }
- if ($id_2 !~ /^\@/ or $l3_2 !~ /^\+/){
- die "Input file doesn't seem to be in FastQ format at sequence $count\n";
- }
- }
-
- chomp $seq_1;
- chomp $seq_2;
- chomp $qual_1;
- chomp $qual_2;
-
- if ($clip_r1){
- if (length $seq_1 > $clip_r1){ # sequences that are already too short won't be trimmed again
- $seq_1 = substr($seq_1,$clip_r1); # starting after the sequences to be trimmed until the end of the sequence
- $qual_1 = substr($qual_1,$clip_r1);
- }
- }
- if ($clip_r2){
- if (length $seq_2 > $clip_r2){ # sequences that are already too short won't be trimmed again
- $seq_2 = substr($seq_2,$clip_r2); # starting after the sequences to be trimmed until the end of the sequence
- $qual_2 = substr($qual_2,$clip_r2);
- }
- }
-
- if ($three_prime_clip_r1){
- if (length $seq_1 > $three_prime_clip_r1){ # sequences that are already too short won't be clipped again
- $seq_1 = substr($seq_1,0,(length($seq_1) - $three_prime_clip_r1)); # starting after the sequences to be trimmed until the end of the sequence
- $qual_1 = substr($qual_1,0,(length($qual_1) - $three_prime_clip_r1));
- }
- }
- if ($three_prime_clip_r2){
- if (length $seq_2 > $three_prime_clip_r2){ # sequences that are already too short won't be clipped again
- $seq_2 = substr($seq_2,0,(length($seq_2) - $three_prime_clip_r2)); # starting after the sequences to be trimmed until the end of the sequence
- $qual_2 = substr($qual_2,0,(length($qual_2) - $three_prime_clip_r2));
- }
- }
-
-
-
- ### making sure that the reads do have a sensible length
- if ( (length($seq_1) < $length_cutoff) or (length($seq_2) < $length_cutoff) ){
- ++$sequence_pairs_removed;
- if ($retain){ # writing out single-end reads if they are longer than the cutoff
-
- if ( length($seq_1) >= $length_read_1){ # read 1 is long enough
- print UNPAIRED1 $id_1;
- print UNPAIRED1 "$seq_1\n";
- print UNPAIRED1 $l3_1;
- print UNPAIRED1 "$qual_1\n";
- ++$read_1_printed;
- }
-
- if ( length($seq_2) >= $length_read_2){ # read 2 is long enough
- print UNPAIRED2 $id_2;
- print UNPAIRED2 "$seq_2\n";
- print UNPAIRED2 $l3_2;
- print UNPAIRED2 "$qual_2\n";
- ++$read_2_printed;
- }
-
- }
- }
- else{
- print R1 $id_1;
- print R1 "$seq_1\n";
- print R1 $l3_1;
- print R1 "$qual_1\n";
-
- print R2 $id_2;
- print R2 "$seq_2\n";
- print R2 $l3_2;
- print R2 "$qual_2\n";
- }
-
- }
-
-
- my $percentage;
-
- if ($count){
- $percentage = sprintf("%.2f",$sequence_pairs_removed/$count*100);
- }
- else{
- $percentage = 'N/A';
- }
-
- warn "Total number of sequences analysed: $count\n\n";
- warn "Number of sequence pairs removed because at least one read was shorter than the length cutoff ($length_cutoff bp): $sequence_pairs_removed ($percentage%)\n";
-
- print REPORT "Total number of sequences analysed for the sequence pair length validation: $count\n\n";
- print REPORT "Number of sequence pairs removed because at least one read was shorter than the length cutoff ($length_cutoff bp): $sequence_pairs_removed ($percentage%)\n";
-
- if ($keep){
- warn "Number of unpaired read 1 reads printed: $read_1_printed\n";
- warn "Number of unpaired read 2 reads printed: $read_2_printed\n";
- }
-
- close R1 or die $!;
- close R2 or die $!;
-
- if ($retain){
- close UNPAIRED1 or die $!;
- close UNPAIRED2 or die $!;
- }
-
- warn "\n";
- if ($retain){
- return ($out_1,$out_2,$unpaired_1,$unpaired_2);
- }
- else{
- return ($out_1,$out_2);
- }
-}
-
-
-sub file_sanity_check{
-
- my $file = shift;
- open (SANITY,$file) or die "Failed to read from file '$file' to perform sanity check\n";
-
- # just processing a single FastQ entry
- my $id = ;
- my $seq = ;
- my $three = ;
- my $qual = ;
-
- unless ($id and $seq and $three and $qual){
- warn "Input file '$file' seems to be completely empty. Consider respecifying!\n\n";
- }
- return;
- chomp $seq;
-
- # testing if the file is a colorspace file in which case we bail
- if ($seq =~ /\d+/){
- die "File seems to be in SOLiD colorspace format which is not supported by Trim Galore (sequence is: '$seq')! Please use Cutadapt on colorspace files separately and check its documentation!\n\n";
- }
-
- close SANITY;
-}
-
-
-sub process_commandline{
- my $help;
- my $quality;
- my $adapter;
- my $adapter2;
- my $stringency;
- my $report;
- my $version;
- my $rrbs;
- my $length_cutoff;
- my $keep;
- my $fastqc;
- my $non_directional;
- my $phred33;
- my $phred64;
- my $fastqc_args;
- my $trim;
- my $gzip;
- my $validate;
- my $retain;
- my $length_read_1;
- my $length_read_2;
- my $error_rate;
- my $output_dir;
- my $no_report_file;
- my $suppress_warn;
- my $dont_gzip;
- my $clip_r1;
- my $clip_r2;
- my $three_prime_clip_r1;
- my $three_prime_clip_r2;
- my $nextera;
- my $small_rna;
- my $illumina;
- my $path_to_cutadapt;
-
- my $command_line = GetOptions ('help|man' => \$help,
- 'q|quality=i' => \$quality,
- 'a|adapter=s' => \$adapter,
- 'a2|adapter2=s' => \$adapter2,
- 'report' => \$report,
- 'version' => \$version,
- 'stringency=i' => \$stringency,
- 'fastqc' => \$fastqc,
- 'RRBS' => \$rrbs,
- 'keep' => \$keep,
- 'length=i' => \$length_cutoff,
- 'non_directional' => \$non_directional,
- 'phred33' => \$phred33,
- 'phred64' => \$phred64,
- 'fastqc_args=s' => \$fastqc_args,
- 'trim1' => \$trim,
- 'gzip' => \$gzip,
- 'paired_end' => \$validate,
- 'retain_unpaired' => \$retain,
- 'length_1|r1=i' => \$length_read_1,
- 'length_2|r2=i' => \$length_read_2,
- 'e|error_rate=s' => \$error_rate,
- 'o|output_dir=s' => \$output_dir,
- 'no_report_file' => \$no_report_file,
- 'suppress_warn' => \$suppress_warn,
- 'dont_gzip' => \$dont_gzip,
- 'clip_R1=i' => \$clip_r1,
- 'clip_R2=i' => \$clip_r2,
- 'three_prime_clip_R1=i' => \$three_prime_clip_r1,
- 'three_prime_clip_R2=i' => \$three_prime_clip_r2,
- 'illumina' => \$illumina,
- 'nextera' => \$nextera,
- 'small_rna' => \$small_rna,
- 'path_to_cutadapt=s' => \$path_to_cutadapt,
- );
-
- ### EXIT ON ERROR if there were errors with any of the supplied options
- unless ($command_line){
- die "Please respecify command line options\n";
- }
-
- ### HELPFILE
- if ($help){
- print_helpfile();
- exit;
- }
-
-
-
-
-
- if ($version){
- print << "VERSION";
-
- Quality-/Adapter-/RRBS-Trimming
- (powered by Cutadapt)
- version $trimmer_version
-
- Last update: 06 05 2015
-
-VERSION
- exit;
- }
-
- ### RRBS
- unless ($rrbs){
- $rrbs = 0;
- }
-
- ### SUPRESS WARNINGS
- if (defined $suppress_warn){
- $DOWARN = 0;
- }
-
- ### QUALITY SCORES
- my $phred_encoding;
- if ($phred33){
- if ($phred64){
- die "Please specify only a single quality encoding type (--phred33 or --phred64)\n\n";
- }
- $phred_encoding = 33;
- }
- elsif ($phred64){
- $phred_encoding = 64;
- }
- unless ($phred33 or $phred64){
- warn "No quality encoding type selected. Assuming that the data provided uses Sanger encoded Phred scores (default)\n\n";
- $phred_encoding = 33;
- sleep (1);
- }
-
- ### NON-DIRECTIONAL RRBS
- if ($non_directional){
- unless ($rrbs){
- die "Option '--non_directional' requires '--rrbs' to be specified as well. Please re-specify!\n";
- }
- }
- else{
- $non_directional = 0;
- }
-
- if ($fastqc_args){
- $fastqc = 1; # specifying fastqc extra arguments automatically means that FastQC will be executed
- }
- else{
- $fastqc_args = 0;
- }
-
- ### CUSTOM ERROR RATE
- if (defined $error_rate){
- # make sure that the error rate is between 0 and 1
- unless ($error_rate >= 0 and $error_rate <= 1){
- die "Please specify an error rate between 0 and 1 (the default is 0.1)\n";
- }
- }
- else{
- $error_rate = 0.1; # (default)
- }
-
- if ($nextera and $small_rna or $nextera and $illumina or $illumina and $small_rna ){
- die "You can't use several different adapter types at the same time. Make your choice or consider using -a and -a2\n\n";
- }
-
- if (defined $adapter){
- unless ($adapter =~ /^[ACTGNXactgnx]+$/){
- die "Adapter sequence must contain DNA characters only (A,C,T,G or N)!\n";
- }
- $adapter = uc$adapter;
-
- if ($illumina){
- die "You can't supply an adapter sequence AND use the Illumina universal adapter sequence. Make your choice.\n\n";
- }
- if ($nextera){
- die "You can't supply an adapter sequence AND use the Nextera transposase adapter sequence. Make your choice.\n\n";
- }
- if ($small_rna){
- die "You can't supply an adapter sequence AND use the Illumina small RNA adapter sequence. Make your choice.\n\n";
- }
- }
-
- if (defined $adapter2){
- unless ($validate){
- die "An optional adapter for read 2 of paired-end files requires '--paired' to be specified as well! Please re-specify\n";
- }
- unless ($adapter2 =~ /^[ACTGNactgn]+$/){
- die "Optional adapter 2 sequence must contain DNA characters only (A,C,T,G or N)!\n";
- }
- $adapter2 = uc$adapter2;
- }
-
- ### LENGTH CUTOFF
- unless (defined $length_cutoff){
- $length_cutoff = 20;
- }
-
- ### files are supposed to be paired-end files
- if ($validate){
-
- # making sure that an even number of reads has been supplied
- unless ((scalar@ARGV)%2 == 0){
- die "Please provide an even number of input files for paired-end FastQ trimming! Aborting ...\n";
- }
-
- ## CUTOFF FOR VALIDATED READ-PAIRS
- if (defined $length_read_1 or defined $length_read_2){
-
- unless ($retain){
- die "Please specify --keep_unpaired to alter the unpaired single-end read length cut off(s)\n\n";
- }
-
- if (defined $length_read_1){
- unless ($length_read_1 >= 15 and $length_read_1 <= 100){
- die "Please select a sensible cutoff for when a read pair should be filtered out due to short length (allowed range: 15-100 bp)\n\n";
- }
- unless ($length_read_1 > $length_cutoff){
- die "The single-end unpaired read length needs to be longer than the paired-end cut-off value ($length_cutoff bp)\n\n";
- }
- }
-
- if (defined $length_read_2){
- unless ($length_read_2 >= 15 and $length_read_2 <= 100){
- die "Please select a sensible cutoff for when a read pair should be filtered out due to short length (allowed range: 15-100 bp)\n\n";
- }
- unless ($length_read_2 > $length_cutoff){
- die "The single-end unpaired read length needs to be longer than the paired-end cut-off value ($length_cutoff bp)\n\n";
- }
- }
- }
-
- if ($retain){
- $length_read_1 = 35 unless (defined $length_read_1);
- $length_read_2 = 35 unless (defined $length_read_2);
- }
- }
-
- unless ($no_report_file){
- $no_report_file = 0;
- }
-
- ### OUTPUT DIR PATH
- if ($output_dir){
- unless ($output_dir =~ /\/$/){
- $output_dir =~ s/$/\//;
- }
- }
- else{
- $output_dir = '';
- }
-
- ### Trimming at the 5' end
- if (defined $clip_r2){ # trimming 5' bases of read 1
- die "Clipping the 5' end of read 2 is only allowed for paired-end files (--paired)\n" unless ($validate);
- }
-
- if (defined $clip_r1){ # trimming 5' bases of read 1
- unless ($clip_r1 > 0 and $clip_r1 < 100){
- die "The 5' clipping value for read 1 should have a sensible value (> 0 and < read length)\n\n";
- }
- }
-
- if (defined $clip_r2){ # trimming 5' bases of read 2
- unless ($clip_r2 > 0 and $clip_r2 < 100){
- die "The 5' clipping value for read 2 should have a sensible value (> 0 and < read length)\n\n";
- }
- }
-
- ### Trimming at the 3' end
- if (defined $three_prime_clip_r1){ # trimming 3' bases of read 1
- unless ($three_prime_clip_r1 > 0 and $three_prime_clip_r1 < 100){
- die "The 3' clipping value for read 1 should have a sensible value (> 0 and < read length)\n\n";
- }
- }
-
- if (defined $three_prime_clip_r2){ # trimming 3' bases of read 2
- unless ($three_prime_clip_r2 > 0 and $three_prime_clip_r2 < 100){
- die "The 3' clipping value for read 2 should have a sensible value (> 0 and < read length)\n\n";
- }
- }
-
-
- return ($quality,$adapter,$stringency,$rrbs,$length_cutoff,$keep,$fastqc,$non_directional,$phred_encoding,$fastqc_args,$trim,$gzip,$validate,$retain,$length_read_1,$length_read_2,$adapter2,$error_rate,$output_dir,$no_report_file,$dont_gzip,$clip_r1,$clip_r2,$three_prime_clip_r1,$three_prime_clip_r2,$nextera,$small_rna,$path_to_cutadapt,$illumina);
-}
-
-
-
-
-sub print_helpfile{
- print << "HELP";
-
- USAGE:
-
-trim_galore [options]
-
-
--h/--help Print this help message and exits.
-
--v/--version Print the version information and exits.
-
--q/--quality Trim low-quality ends from reads in addition to adapter removal. For
- RRBS samples, quality trimming will be performed first, and adapter
- trimming is carried in a second round. Other files are quality and adapter
- trimmed in a single pass. The algorithm is the same as the one used by BWA
- (Subtract INT from all qualities; compute partial sums from all indices
- to the end of the sequence; cut sequence at the index at which the sum is
- minimal). Default Phred score: 20.
-
---phred33 Instructs Cutadapt to use ASCII+33 quality scores as Phred scores
- (Sanger/Illumina 1.9+ encoding) for quality trimming. Default: ON.
-
---phred64 Instructs Cutadapt to use ASCII+64 quality scores as Phred scores
- (Illumina 1.5 encoding) for quality trimming.
-
---fastqc Run FastQC in the default mode on the FastQ file once trimming is complete.
-
---fastqc_args "" Passes extra arguments to FastQC. If more than one argument is to be passed
- to FastQC they must be in the form "arg1 arg2 etc.". An example would be:
- --fastqc_args "--nogroup --outdir /home/". Passing extra arguments will
- automatically invoke FastQC, so --fastqc does not have to be specified
- separately.
-
--a/--adapter Adapter sequence to be trimmed. If not specified explicitly, Trim Galore will
- try to auto-detect whether the Illumina universal, Nextera transposase or Illumina
- small RNA adapter sequence was used. Also see '--illumina', '--nextera' and
- '--small_rna'. If no adapter can be detected within the first 1 million sequences
- of the first file specified Trim Galore defaults to '--illumina'.
-
--a2/--adapter2 Optional adapter sequence to be trimmed off read 2 of paired-end files. This
- option requires '--paired' to be specified as well.
-
---illumina Adapter sequence to be trimmed is the first 13bp of the Illumina universal adapter
- 'AGATCGGAAGAGC' instead of the default auto-detection of adapter sequence.
-
---nextera Adapter sequence to be trimmed is the first 12bp of the Nextera adapter
- 'CTGTCTCTTATA' instead of the default auto-detection of adapter sequence.
-
---small_rna Adapter sequence to be trimmed is the first 12bp of the Illumina Small RNA Adapter
- 'ATGGAATTCTCG' instead of the default auto-detection of adapter sequence.
-
-
---stringency Overlap with adapter sequence required to trim a sequence. Defaults to a
- very stringent setting of 1, i.e. even a single bp of overlapping sequence
- will be trimmed off from the 3' end of any read.
-
--e Maximum allowed error rate (no. of errors divided by the length of the matching
- region) (default: 0.1)
-
---gzip Compress the output file with GZIP. If the input files are GZIP-compressed
- the output files will automatically be GZIP compressed as well. As of v0.2.8 the
- compression will take place on the fly.
-
---dont_gzip Output files won't be compressed with GZIP. This option overrides --gzip.
-
---length Discard reads that became shorter than length INT because of either
- quality or adapter trimming. A value of '0' effectively disables
- this behaviour. Default: 20 bp.
-
- For paired-end files, both reads of a read-pair need to be longer than
- bp to be printed out to validated paired-end files (see option --paired).
- If only one read became too short there is the possibility of keeping such
- unpaired single-end reads (see --retain_unpaired). Default pair-cutoff: 20 bp.
-
--o/--output_dir If specified all output will be written to this directory instead of the current
- directory.
-
---no_report_file If specified no report file will be generated.
-
---suppress_warn If specified any output to STDOUT or STDERR will be suppressed.
-
---clip_R1 Instructs Trim Galore to remove bp from the 5' end of read 1 (or single-end
- reads). This may be useful if the qualities were very poor, or if there is some
- sort of unwanted bias at the 5' end. Default: OFF.
-
---clip_R2 Instructs Trim Galore to remove bp from the 5' end of read 2 (paired-end reads
- only). This may be useful if the qualities were very poor, or if there is some sort
- of unwanted bias at the 5' end. For paired-end BS-Seq, it is recommended to remove
- the first few bp because the end-repair reaction may introduce a bias towards low
- methylation. Please refer to the M-bias plot section in the Bismark User Guide for
- some examples. Default: OFF.
-
---three_prime_clip_R1 Instructs Trim Galore to remove bp from the 3' end of read 1 (or single-end
- reads) AFTER adapter/quality trimming has been performed. This may remove some unwanted
- bias from the 3' end that is not directly related to adapter sequence or basecall quality.
- Default: OFF.
-
---three_prime_clip_R2 Instructs Trim Galore to remove bp from the 3' end of read 2 AFTER
- adapter/quality trimming has been performed. This may remove some unwanted bias from
- the 3' end that is not directly related to adapter sequence or basecall quality.
- Default: OFF.
-
---path_to_cutadapt You may use this option to specify a path to the Cutadapt executable,
- e.g. /my/home/cutadapt-1.7.1/bin/cutadapt. Else it is assumed that Cutadapt is in
- the PATH.
-
-
-RRBS-specific options (MspI digested material):
-
---rrbs Specifies that the input file was an MspI digested RRBS sample (recognition
- site: CCGG). Sequences which were adapter-trimmed will have a further 2 bp
- removed from their 3' end. This is to avoid that the filled-in C close to the
- second MspI site in a sequence is used for methylation calls. Sequences which
- were merely trimmed because of poor quality will not be shortened further.
-
---non_directional Selecting this option for non-directional RRBS libraries will screen
- quality-trimmed sequences for 'CAA' or 'CGA' at the start of the read
- and, if found, removes the first two basepairs. Like with the option
- '--rrbs' this avoids using cytosine positions that were filled-in
- during the end-repair step. '--non_directional' requires '--rrbs' to
- be specified as well.
-
---keep Keep the quality trimmed intermediate file. Default: off, which means
- the temporary file is being deleted after adapter trimming. Only has
- an effect for RRBS samples since other FastQ files are not trimmed
- for poor qualities separately.
-
-
-Note for RRBS using MseI:
-
-If your DNA material was digested with MseI (recognition motif: TTAA) instead of MspI it is NOT necessary
-to specify --rrbs or --non_directional since virtually all reads should start with the sequence
-'TAA', and this holds true for both directional and non-directional libraries. As the end-repair of 'TAA'
-restricted sites does not involve any cytosines it does not need to be treated especially. Instead, simply
-run Trim Galore! in the standard (i.e. non-RRBS) mode.
-
-
-Paired-end specific options:
-
---paired This option performs length trimming of quality/adapter/RRBS trimmed reads for
- paired-end files. To pass the validation test, both sequences of a sequence pair
- are required to have a certain minimum length which is governed by the option
- --length (see above). If only one read passes this length threshold the
- other read can be rescued (see option --retain_unpaired). Using this option lets
- you discard too short read pairs without disturbing the sequence-by-sequence order
- of FastQ files which is required by many aligners.
-
- Trim Galore! expects paired-end files to be supplied in a pairwise fashion, e.g.
- file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz ... .
-
--t/--trim1 Trims 1 bp off every read from its 3' end. This may be needed for FastQ files that
- are to be aligned as paired-end data with Bowtie. This is because Bowtie (1) regards
- alignments like this:
-
- R1 ---------------------------> or this: -----------------------> R1
- R2 <--------------------------- <----------------- R2
-
- as invalid (whenever a start/end coordinate is contained within the other read).
-
---retain_unpaired If only one of the two paired-end reads became too short, the longer
- read will be written to either '.unpaired_1.fq' or '.unpaired_2.fq'
- output files. The length cutoff for unpaired single-end reads is
- governed by the parameters -r1/--length_1 and -r2/--length_2. Default: OFF.
-
--r1/--length_1 Unpaired single-end read length cutoff needed for read 1 to be written to
- '.unpaired_1.fq' output file. These reads may be mapped in single-end mode.
- Default: 35 bp.
-
--r2/--length_2 Unpaired single-end read length cutoff needed for read 2 to be written to
- '.unpaired_2.fq' output file. These reads may be mapped in single-end mode.
- Default: 35 bp.
-
-
-Last modified on 06 May 2015.
-
-HELP
- exit;
-}
diff -r b4e39d993fc8 -r 80cd83b11214 trim_galore.xml
--- a/trim_galore.xml Thu Apr 20 09:14:30 2017 -0400
+++ b/trim_galore.xml Mon Apr 24 14:30:07 2017 -0400
@@ -1,5 +1,5 @@
-
-
+
+
Quality and adapter trimmer of reads
@@ -24,7 +24,6 @@
-
^[ACTGNactgn]*$
@@ -44,16 +43,12 @@
-
- cutadapt
- cutadapt
+ trim-galore
- perl '$__tool_directory__/trim_galore' --version
+ trim_galore --version
-
- $report_file;
+ cat ./*_trimming_report.txt > '$report_file'
#end if
-
-]]>
-
+ ]]>
@@ -283,8 +275,8 @@
label="Screen quality-trimmed sequences for 'CAA' or 'CGA' at the start of the read and, if found, removes the first two basepairs" />
+
-
singlePaired['sPaired'] == "single"
@@ -331,8 +323,8 @@
params['settingsType'] == "custom"
params['report'] == True
+
-
@@ -449,8 +441,7 @@
-
-
| R2 <-----------------
|
@@ -600,7 +591,7 @@
| Selecting this option for non-directional RRBS libraries will screen quality-trimmed sequences for 'CAA' or 'CGA' at the start of the read and, if found, removes the first two basepairs. Like with the option '--rrbs' this avoids using cytosine positions that were filled-in during the end-repair step. '--non_directional' requires '--rrbs' to be specified as well.
|
- | *option --non_directional*]]>
-
+ | *option --non_directional*
+ ]]>