Mercurial > repos > bgruening > trim_galore
annotate trim_galore @ 10:b4e39d993fc8 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit bbef69cc08154b5c156c25f9ca43df0915803856
author | bgruening |
---|---|
date | Thu, 20 Apr 2017 09:14:30 -0400 |
parents | 11962ce40855 |
children |
rev | line source |
---|---|
0 | 1 #!/usr/bin/perl |
2 use strict; | |
3 use warnings; | |
4 use Getopt::Long; | |
5 use IPC::Open3; | |
6 use File::Spec; | |
7 use File::Basename; | |
8 use Cwd; | |
9 | |
4 | 10 ## This program is Copyright (C) 2012-14, Felix Krueger (felix.krueger@babraham.ac.uk) |
0 | 11 |
12 ## This program is free software: you can redistribute it and/or modify | |
13 ## it under the terms of the GNU General Public License as published by | |
14 ## the Free Software Foundation, either version 3 of the License, or | |
15 ## (at your option) any later version. | |
16 | |
17 ## This program is distributed in the hope that it will be useful, | |
18 ## but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 ## GNU General Public License for more details. | |
21 | |
22 ## You should have received a copy of the GNU General Public License | |
23 ## along with this program. If not, see <http://www.gnu.org/licenses/>. | |
24 | |
25 | |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
26 ## this script is taking in FastQ sequences and trims them using Cutadapt |
0 | 27 |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
28 ## last modified on 01 May 2015 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
29 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
30 my $DOWARN = 1; # print on screen warning and text by default |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
31 BEGIN { $SIG{'__WARN__'} = sub { warn $_[0] if $DOWARN } }; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
32 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
33 my $trimmer_version = '0.4.0'; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
34 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
35 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
36 my ($cutoff,$adapter,$stringency,$rrbs,$length_cutoff,$keep,$fastqc,$non_directional,$phred_encoding,$fastqc_args,$trim,$gzip,$validate,$retain,$length_read_1,$length_read_2,$a2,$error_rate,$output_dir,$no_report_file,$dont_gzip,$clip_r1,$clip_r2,$three_prime_clip_r1,$three_prime_clip_r2,$nextera,$small_rna,$path_to_cutadapt,$illumina) = process_commandline(); |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
37 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
38 my @filenames = @ARGV; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
39 die "\nPlease provide the filename(s) of one or more FastQ file(s) to launch Trim Galore!\n |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
40 USAGE: 'trim_galore [options] <filename(s)>' or 'trim_galore --help' for more options\n\n" unless (@filenames); |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
41 file_sanity_check($filenames[0]); |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
42 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
43 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
44 ######################################################################## |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
45 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
46 my $path_to_fastqc = 'fastqc'; |
4 | 47 |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
48 # Before we start let's have quick look if Cutadapt seems to be working with the path information provided |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
49 # To change the path to Cutadapt use --path_to_cutadapt /full/path/to/the/Cutadapt/executable |
4 | 50 |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
51 if(defined $path_to_cutadapt){ |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
52 warn "Path to Cutadapt set as: '$path_to_cutadapt' (user defined)\n"; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
53 # we'll simply use this |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
54 } |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
55 else{ |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
56 $path_to_cutadapt = 'cutadapt'; # default, assuming it is in the PATH |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
57 warn "Path to Cutadapt set as: '$path_to_cutadapt' (default)\n"; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
58 } |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
59 my $cutadapt_version; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
60 my $return = system "$path_to_cutadapt --version"; #>/dev/null 2>&1"; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
61 if ($return == -1){ |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
62 die "Failed to execute Cutadapt porperly. Please install Cutadapt first and make sure it is in the PATH, or specify the path to the Cutadapt executable using --path_to_cutadapt /path/to/cutadapt\n\n"; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
63 } |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
64 else{ |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
65 warn "Cutadapt seems to be working fine (tested command '$path_to_cutadapt --version')\n"; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
66 $cutadapt_version = `$path_to_cutadapt --version`; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
67 chomp $cutadapt_version; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
68 # warn "Cutadapt version: $cutadapt_version\n"; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
69 } |
4 | 70 |
0 | 71 |
72 ######################################################################## | |
73 | |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
74 sub autodetect_adapter_type{ |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
75 warn "\n\nAUTO-DETECTING ADAPTER TYPE\n===========================\n"; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
76 warn "Attempting to auto-detect adapter type from the first 1 million sequences of the first file (>> $ARGV[0] <<)\n\n"; |
0 | 77 |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
78 if ($ARGV[0] =~ /gz$/){ |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
79 open (AUTODETECT,"zcat $ARGV[0] |") or die "Failed to read from file $ARGV[0]\n"; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
80 } |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
81 else{ |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
82 open (AUTODETECT,$ARGV[0]) or die "Failed to read from file $ARGV[0]\n"; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
83 } |
0 | 84 |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
85 my %adapters; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
86 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
87 $adapters{'Illumina'} -> {seq} = 'AGATCGGAAGAGC'; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
88 $adapters{'Illumina'} -> {count}= 0; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
89 $adapters{'Illumina'} -> {name}= 'Illumina TruSeq, Sanger iPCR; auto-detected'; |
0 | 90 |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
91 $adapters{'Nextera'} -> {seq} = 'CTGTCTCTTATA'; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
92 $adapters{'Nextera'} -> {count}= 0; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
93 $adapters{'Nextera'} -> {name}= 'Nextera Transposase sequence; auto-detected'; |
1 | 94 |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
95 $adapters{'smallRNA'} -> {seq} = 'ATGGAATTCTCG'; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
96 $adapters{'smallRNA'} -> {count}= 0; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
97 $adapters{'smallRNA'} -> {name}= 'Illumina small RNA adapter; auto-detected'; |
1 | 98 |
4 | 99 |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
100 # we will read the first 1 million sequences, or until the end of the file whatever comes first, and then use the adapter that for trimming which was found to occcur most often |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
101 my $count = 0; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
102 while (1){ |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
103 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
104 my $line1 = <AUTODETECT>; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
105 my $line2 = <AUTODETECT>; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
106 my $line3 = <AUTODETECT>; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
107 my $line4 = <AUTODETECT>; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
108 last unless ($line4); |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
109 $count++; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
110 last if ($count == 1000000); |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
111 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
112 chomp $line2; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
113 $adapters{'Illumina'}->{count}++ unless (index($line2,'AGATCGGAAGAGC')== -1); |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
114 $adapters{'Nextera'} ->{count}++ unless (index($line2,'CTGTCTCTTATA') == -1); |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
115 $adapters{'smallRNA'}->{count}++ unless (index($line2,'ATGGAATTCTCG') == -1); |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
116 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
117 } |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
118 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
119 my $highest; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
120 my $second; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
121 my $seq; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
122 my $adapter_name; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
123 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
124 warn "Found perfect matches for the following adapter sequences:\nAdapter type\tCount\tSequence\tSequences analysed\tPercentage\n"; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
125 foreach my $adapter (sort {$adapters{$b}->{count}<=>$adapters{$a}->{count}} keys %adapters){ |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
126 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
127 my $percentage = sprintf("%.2f",$adapters{$adapter}->{count}/$count*100); |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
128 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
129 warn "$adapter\t$adapters{$adapter}->{count}\t$adapters{$adapter}->{seq}\t$count\t$percentage\n"; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
130 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
131 unless (defined $highest){ |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
132 $highest = $adapter; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
133 $seq = $adapters{$adapter}->{seq}; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
134 $adapter_name = $adapters{$adapter}->{name}; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
135 next; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
136 } |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
137 unless (defined $second){ |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
138 $second = $adapter; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
139 } |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
140 } |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
141 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
142 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
143 # using the highest occurrence as adapter to look out for |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
144 if ($adapters{$highest}->{count} == $adapters{$second}->{count}){ |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
145 warn "Unable to auto-detect most prominent adapter from the first specified file (count $highest: $adapters{$highest}->{count}, count $second: $adapters{$second}->{second})\n"; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
146 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
147 if ($adapters{$highest}->{count} == 0){ |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
148 warn "Defaulting to Illumina universal adapter ( AGATCGGAAGAGC ). Specify -a SEQUENCE to avoid this behavior).\n\n"; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
149 $adapter_name = 'Illumina TruSeq, Sanger iPCR; default (inconclusive auto-detection)'; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
150 $seq = 'AGATCGGAAGAGC'; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
151 } |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
152 else{ |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
153 warn "Using $highest adapter for trimming (count: $adapters{$highest}->{count}). Second best hit was $second (count: $adapters{$second}->{count})\n\n"; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
154 } |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
155 } |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
156 else{ |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
157 warn "Using $highest adapter for trimming (count: $adapters{$highest}->{count}). Second best hit was $second (count: $adapters{$second}->{count})\n\n"; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
158 } |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
159 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
160 close AUTODETECT; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
161 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
162 return ($seq,$adapter_name); |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
163 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
164 } |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
165 |
1 | 166 |
0 | 167 |
168 ### SETTING DEFAULTS UNLESS THEY WERE SPECIFIED | |
169 unless (defined $cutoff){ | |
170 $cutoff = 20; | |
171 } | |
172 my $phred_score_cutoff = $cutoff; # only relevant for report | |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
173 my $adapter_name = ''; |
0 | 174 unless (defined $adapter){ |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
175 if ($nextera){ |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
176 $adapter = 'CTGTCTCTTATA'; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
177 $adapter_name = 'Nextera Transposase sequence; user defined'; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
178 } |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
179 elsif($small_rna){ |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
180 $adapter = 'ATGGAATTCTCG'; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
181 $adapter_name = 'Illumina small RNA adapter; user defined'; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
182 } |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
183 elsif($illumina){ |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
184 $adapter = 'AGATCGGAAGAGC'; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
185 $adapter_name = 'Illumina TruSeq, Sanger iPCR; user defined'; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
186 } |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
187 else{ # default |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
188 ($adapter,$adapter_name) = autodetect_adapter_type(); |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
189 } |
0 | 190 } |
191 unless (defined $a2){ # optional adapter for the second read in a pair. Only works for --paired trimming | |
192 $a2 = ''; | |
193 } | |
194 | |
195 unless (defined $stringency){ | |
196 $stringency = 1; | |
197 } | |
198 | |
199 if ($phred_encoding == 64){ | |
200 $cutoff += 31; | |
201 } | |
202 | |
203 my $file_1; | |
204 my $file_2; | |
205 | |
206 foreach my $filename (@ARGV){ | |
207 trim ($filename); | |
208 } | |
209 | |
210 | |
211 sub trim{ | |
212 my $filename = shift; | |
213 | |
214 my $output_filename = (split (/\//,$filename))[-1]; | |
215 | |
216 my $report = $output_filename; | |
217 $report =~ s/$/_trimming_report.txt/; | |
218 | |
219 if ($no_report_file) { | |
220 $report = File::Spec->devnull; | |
4 | 221 open (REPORT,'>',$report) or die "Failed to write to file '$report': $!\n"; |
0 | 222 # warn "Redirecting report output to /dev/null\n"; |
223 } | |
224 else{ | |
4 | 225 open (REPORT,'>',$output_dir.$report) or die "Failed to write to file '$report': $!\n"; |
0 | 226 warn "Writing report to '$output_dir$report'\n"; |
227 } | |
228 | |
229 warn "\nSUMMARISING RUN PARAMETERS\n==========================\nInput filename: $filename\n"; | |
230 print REPORT "\nSUMMARISING RUN PARAMETERS\n==========================\nInput filename: $filename\n"; | |
231 | |
4 | 232 if ($validate){ # paired-end mode |
233 warn "Trimming mode: paired-end\n"; | |
234 print REPORT "Trimming mode: paired-end\n"; | |
235 } | |
236 else{ | |
237 warn "Trimming mode: single-end\n"; | |
238 print REPORT "Trimming mode: single-end\n"; | |
239 } | |
240 | |
241 | |
242 warn "Trim Galore version: $trimmer_version\n"; | |
243 print REPORT "Trim Galore version: $trimmer_version\n"; | |
244 | |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
245 warn "Cutadapt version: $cutadapt_version\n"; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
246 print REPORT "Cutadapt version: $cutadapt_version\n"; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
247 |
0 | 248 warn "Quality Phred score cutoff: $phred_score_cutoff\n"; |
249 print REPORT "Quality Phred score cutoff: $phred_score_cutoff\n"; | |
250 | |
251 warn "Quality encoding type selected: ASCII+$phred_encoding\n"; | |
252 print REPORT "Quality encoding type selected: ASCII+$phred_encoding\n"; | |
253 | |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
254 warn "Adapter sequence: '$adapter' ($adapter_name)\n"; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
255 print REPORT "Adapter sequence: '$adapter' ($adapter_name)\n"; |
0 | 256 |
257 if ($error_rate == 0.1){ | |
258 warn "Maximum trimming error rate: $error_rate (default)\n"; | |
259 } | |
260 else{ | |
261 warn "Maximum trimming error rate: $error_rate\n"; | |
262 } | |
263 | |
264 print REPORT "Maximum trimming error rate: $error_rate"; | |
265 if ($error_rate == 0.1){ | |
266 print REPORT " (default)\n"; | |
267 } | |
268 else{ | |
269 print REPORT "\n"; | |
270 } | |
271 | |
272 if ($a2){ | |
273 warn "Optional adapter 2 sequence (only used for read 2 of paired-end files): '$a2'\n"; | |
274 print REPORT "Optional adapter 2 sequence (only used for read 2 of paired-end files): '$a2'\n"; | |
275 } | |
276 | |
277 warn "Minimum required adapter overlap (stringency): $stringency bp\n"; | |
278 print REPORT "Minimum required adapter overlap (stringency): $stringency bp\n"; | |
279 | |
280 if ($validate){ | |
281 warn "Minimum required sequence length for both reads before a sequence pair gets removed: $length_cutoff bp\n"; | |
282 print REPORT "Minimum required sequence length for both reads before a sequence pair gets removed: $length_cutoff bp\n"; | |
283 } | |
284 else{ | |
285 warn "Minimum required sequence length before a sequence gets removed: $length_cutoff bp\n"; | |
286 print REPORT "Minimum required sequence length before a sequence gets removed: $length_cutoff bp\n"; | |
287 } | |
288 | |
289 if ($validate){ # only for paired-end files | |
290 | |
291 if ($retain){ # keeping single-end reads if only one end is long enough | |
292 | |
293 if ($length_read_1 == 35){ | |
294 warn "Length cut-off for read 1: $length_read_1 bp (default)\n"; | |
295 print REPORT "Length cut-off for read 1: $length_read_1 bp (default)\n"; | |
296 } | |
297 else{ | |
298 warn "Length cut-off for read 1: $length_read_1 bp\n"; | |
299 print REPORT "Length cut-off for read 1: $length_read_1 bp\n"; | |
300 } | |
301 | |
302 if ($length_read_2 == 35){ | |
1 | 303 warn "Length cut-off for read 2: $length_read_2 bb (default)\n"; |
0 | 304 print REPORT "Length cut-off for read 2: $length_read_2 bp (default)\n"; |
305 } | |
306 else{ | |
307 warn "Length cut-off for read 2: $length_read_2 bp\n"; | |
308 print REPORT "Length cut-off for read 2: $length_read_2 bp\n"; | |
309 } | |
310 } | |
311 } | |
312 | |
313 if ($rrbs){ | |
314 warn "File was specified to be an MspI-digested RRBS sample. Sequences with adapter contamination will be trimmed a further 2 bp to remove potential methylation-biased bases from the end-repair reaction\n"; | |
315 print REPORT "File was specified to be an MspI-digested RRBS sample. Sequences with adapter contamination will be trimmed a further 2 bp to remove potential methylation-biased bases from the end-repair reaction\n"; | |
316 } | |
317 | |
318 if ($non_directional){ | |
319 warn "File was specified to be a non-directional MspI-digested RRBS sample. Sequences starting with either 'CAA' or 'CGA' will have the first 2 bp trimmed off to remove potential methylation-biased bases from the end-repair reaction\n"; | |
320 print REPORT "File was specified to be a non-directional MspI-digested RRBS sample. Sequences starting with either 'CAA' or 'CGA' will have the first 2 bp trimmed off to remove potential methylation-biased bases from the end-repair reaction\n"; | |
321 } | |
322 | |
323 if ($trim){ | |
324 warn "All sequences will be trimmed by 1 bp on their 3' end to avoid problems with invalid paired-end alignments with Bowtie 1\n"; | |
325 print REPORT "All sequences will be trimmed by 1 bp on their 3' end to avoid problems with invalid paired-end alignments with Bowtie 1\n"; | |
326 } | |
327 | |
1 | 328 if ($clip_r1){ |
329 warn "All Read 1 sequences will be trimmed by $clip_r1 bp from their 5' end to avoid poor qualities or biases\n"; | |
330 print REPORT "All Read 1 sequences will be trimmed by $clip_r1 bp from their 5' end to avoid poor qualities or biases\n"; | |
331 } | |
332 if ($clip_r2){ | |
333 warn "All Read 2 sequences will be trimmed by $clip_r2 bp from their 5' end to avoid poor qualities or biases (e.g. M-bias for BS-Seq applications)\n"; | |
334 print REPORT "All Read 2 sequences will be trimmed by $clip_r2 bp from their 5' end to avoid poor qualities or biases (e.g. M-bias for BS-Seq applications)\n"; | |
335 } | |
336 | |
4 | 337 if ($three_prime_clip_r1){ |
338 warn "All Read 1 sequences will be trimmed by $three_prime_clip_r1 bp from their 3' end to avoid poor qualities or biases\n"; | |
339 print REPORT "All Read 1 sequences will be trimmed by $three_prime_clip_r1 bp from their 3' end to avoid poor qualities or biases\n"; | |
340 } | |
341 if ($three_prime_clip_r2){ | |
342 warn "All Read 2 sequences will be trimmed by $three_prime_clip_r2 bp from their 3' end to avoid poor qualities or biases\n"; | |
343 print REPORT "All Read 2 sequences will be trimmed by $three_prime_clip_r2 bp from their 3' end to avoid poor qualities or biases\n"; | |
344 } | |
1 | 345 |
0 | 346 if ($fastqc){ |
347 warn "Running FastQC on the data once trimming has completed\n"; | |
348 print REPORT "Running FastQC on the data once trimming has completed\n"; | |
349 | |
350 if ($fastqc_args){ | |
351 warn "Running FastQC with the following extra arguments: '$fastqc_args'\n"; | |
352 print REPORT "Running FastQC with the following extra arguments: $fastqc_args\n"; | |
353 } | |
354 } | |
355 | |
356 if ($keep and $rrbs){ | |
357 warn "Keeping quality trimmed (but not yet adapter trimmed) intermediate FastQ file\n"; | |
358 print REPORT "Keeping quality trimmed (but not yet adapter trimmed) intermediate FastQ file\n"; | |
359 } | |
360 | |
1 | 361 |
0 | 362 if ($gzip or $filename =~ /\.gz$/){ |
1 | 363 $gzip = 1; |
364 unless ($dont_gzip){ | |
365 warn "Output file(s) will be GZIP compressed\n"; | |
366 print REPORT "Output file will be GZIP compressed\n"; | |
367 } | |
0 | 368 } |
369 | |
370 warn "\n"; | |
371 print REPORT "\n"; | |
372 sleep (3); | |
373 | |
374 my $temp; | |
375 | |
376 ### Proceeding differently for RRBS and other type of libraries | |
377 if ($rrbs){ | |
378 | |
379 ### Skipping quality filtering for RRBS libraries if a quality cutoff of 0 was specified | |
380 if ($cutoff == 0){ | |
381 warn "Quality cutoff selected was 0 - Skipping quality trimming altogether\n\n"; | |
382 sleep (3); | |
383 } | |
384 else{ | |
385 | |
386 $temp = $filename; | |
4 | 387 $temp =~ s/^.*\///; # replacing optional file path information |
0 | 388 $temp =~ s/$/_qual_trimmed.fastq/; |
4 | 389 open (TEMP,'>',$output_dir.$temp) or die "Can't write to '$temp': $!"; |
0 | 390 |
391 warn " >>> Now performing adaptive quality trimming with a Phred-score cutoff of: $cutoff <<<\n\n"; | |
392 sleep (3); | |
393 | |
394 open (QUAL,"$path_to_cutadapt -f fastq -e $error_rate -q $cutoff -a X $filename |") or die "Can't open pipe to Cutadapt: $!"; | |
395 | |
396 my $qual_count = 0; | |
397 | |
398 while (1){ | |
399 my $l1 = <QUAL>; | |
400 my $seq = <QUAL>; | |
401 my $l3 = <QUAL>; | |
402 my $qual = <QUAL>; | |
403 last unless (defined $qual); | |
404 | |
405 $qual_count++; | |
406 if ($qual_count%10000000 == 0){ | |
407 warn "$qual_count sequences processed\n"; | |
408 } | |
409 print TEMP "$l1$seq$l3$qual"; | |
410 } | |
411 | |
412 warn "\n >>> Quality trimming completed <<<\n$qual_count sequences processed in total\n\n"; | |
4 | 413 close QUAL or die "Unable to close QUAL filehandle: $!\n"; |
0 | 414 sleep (3); |
415 | |
416 } | |
417 } | |
418 | |
419 | |
420 if ($output_filename =~ /\.fastq$/){ | |
421 $output_filename =~ s/\.fastq$/_trimmed.fq/; | |
422 } | |
423 elsif ($output_filename =~ /\.fastq\.gz$/){ | |
424 $output_filename =~ s/\.fastq\.gz$/_trimmed.fq/; | |
425 } | |
426 elsif ($output_filename =~ /\.fq$/){ | |
427 $output_filename =~ s/\.fq$/_trimmed.fq/; | |
428 } | |
429 elsif ($output_filename =~ /\.fq\.gz$/){ | |
430 $output_filename =~ s/\.fq\.gz$/_trimmed.fq/; | |
431 } | |
432 else{ | |
433 $output_filename =~ s/$/_trimmed.fq/; | |
434 } | |
435 | |
1 | 436 if ($gzip or $filename =~ /\.gz$/){ |
4 | 437 if ($dont_gzip){ |
438 open (OUT,'>',$output_dir.$output_filename) or die "Can't open '$output_filename': $!\n"; # don't need to gzip intermediate file | |
1 | 439 } |
440 else{ | |
4 | 441 ### 6 Jan 2014: had a request to also gzip intermediate files to save disk space |
442 # if ($validate){ | |
443 # open (OUT,'>',$output_dir.$output_filename) or die "Can't open '$output_filename': $!\n"; # don't need to gzip intermediate file | |
444 # } | |
445 $output_filename .= '.gz'; | |
446 open (OUT,"| gzip -c - > ${output_dir}${output_filename}") or die "Can't write to '$output_filename': $!\n"; | |
1 | 447 } |
448 } | |
449 else{ | |
4 | 450 open (OUT,'>',$output_dir.$output_filename) or die "Can't open '$output_filename': $!\n"; |
1 | 451 } |
0 | 452 warn "Writing final adapter and quality trimmed output to $output_filename\n\n"; |
453 | |
454 my $count = 0; | |
455 my $too_short = 0; | |
456 my $quality_trimmed = 0; | |
457 my $rrbs_trimmed = 0; | |
458 my $rrbs_trimmed_start = 0; | |
459 my $CAA = 0; | |
460 my $CGA = 0; | |
461 | |
4 | 462 my $pid; |
463 | |
0 | 464 if ($rrbs and $cutoff != 0){ |
465 | |
466 ### optionally using 2 different adapters for read 1 and read 2 | |
467 if ($validate and $a2){ | |
468 ### Figure out whether current file counts as read 1 or read 2 of paired-end files | |
469 if ( scalar(@filenames)%2 == 0){ # this is read 1 of a pair | |
470 warn "\n >>> Now performing adapter trimming for the adapter sequence: '$adapter' from file $temp <<< \n"; | |
471 sleep (3); | |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
472 $pid = open3 (\*WRITER, \*TRIM, \*ERROR,"$path_to_cutadapt -f fastq -e $error_rate -O $stringency -a $adapter $output_dir$temp") or die "Failed to launch Cutadapt: $!\n"; |
0 | 473 } |
474 else{ # this is read 2 of a pair | |
475 warn "\n >>> Now performing adapter trimming for the adapter sequence: '$a2' from file $temp <<< \n"; | |
476 sleep (3); | |
4 | 477 $pid = open3 (\*WRITER, \*TRIM, \*ERROR,"$path_to_cutadapt -f fastq -e $error_rate -O $stringency -a $a2 $output_dir$temp") or die "Failed to launch Cutadapt: $!\n"; |
0 | 478 } |
479 } | |
480 ### Using the same adapter for both read 1 and read 2 | |
481 else{ | |
482 warn "\n >>> Now performing adapter trimming for the adapter sequence: '$adapter' from file $temp <<< \n"; | |
483 sleep (3); | |
4 | 484 $pid = open3 (\*WRITER, \*TRIM, \*ERROR,"$path_to_cutadapt -f fastq -e $error_rate -O $stringency -a $adapter $output_dir$temp") or die "Failed to launch Cutadapt: $!\n"; |
0 | 485 } |
486 | |
487 close WRITER or die $!; # not needed | |
488 | |
489 open (QUAL,"$output_dir$temp") or die $!; # quality trimmed file | |
490 | |
491 if ($filename =~ /\.gz$/){ | |
492 open (IN,"zcat $filename |") or die $!; # original, untrimmed file | |
493 } | |
494 else{ | |
495 open (IN,$filename) or die $!; # original, untrimmed file | |
496 } | |
497 | |
498 while (1){ | |
499 | |
500 # we can process the output from Cutadapt and the original input 1 by 1 to decide if the adapter has been removed or not | |
501 my $l1 = <TRIM>; | |
502 my $seq = <TRIM>; # adapter trimmed sequence | |
503 my $l3 = <TRIM>; | |
504 my $qual = <TRIM>; | |
505 | |
506 $_ = <IN>; # irrelevant | |
507 my $original_seq = <IN>; | |
508 $_ = <IN>; # irrelevant | |
509 $_ = <IN>; # irrelevant | |
510 | |
511 $_ = <QUAL>; # irrelevant | |
512 my $qual_trimmed_seq = <QUAL>; | |
513 $_ = <QUAL>; # irrelevant | |
514 my $qual_trimmed_qual = <QUAL>; | |
515 | |
516 last unless (defined $qual and defined $qual_trimmed_qual); # could be empty strings | |
517 | |
518 $count++; | |
519 if ($count%10000000 == 0){ | |
520 warn "$count sequences processed\n"; | |
521 } | |
522 | |
523 chomp $seq; | |
524 chomp $qual; | |
525 chomp $qual_trimmed_seq; | |
526 chomp $original_seq; | |
527 | |
528 my $quality_trimmed_seq_length = length $qual_trimmed_seq; | |
529 | |
530 if (length $original_seq > length $qual_trimmed_seq){ | |
531 ++$quality_trimmed; | |
532 } | |
533 | |
534 my $nd = 0; | |
535 | |
536 ### NON-DIRECTIONAL RRBS | |
537 if ($non_directional){ | |
538 if (length$seq > 2){ | |
539 if ($seq =~ /^CAA/){ | |
540 ++$CAA; | |
541 $seq = substr ($seq,2,length($seq)-2); | |
542 $qual = substr ($qual,2,length($qual)-2); | |
543 ++$rrbs_trimmed_start; | |
544 $nd = 1; | |
545 } | |
546 elsif ($seq =~ /^CGA/){ | |
547 $seq = substr ($seq,2,length($seq)-2); | |
548 $qual = substr ($qual,2,length($qual)-2); | |
549 ++$CGA; | |
550 ++$rrbs_trimmed_start; | |
551 $nd = 1; | |
552 } | |
553 } | |
554 } | |
555 | |
556 ### directional read | |
557 unless ($nd == 1){ | |
558 if (length $seq >= 2 and length$seq < $quality_trimmed_seq_length){ | |
559 $seq = substr ($seq,0,length($seq)-2); | |
560 $qual = substr ($qual,0,length($qual)-2); | |
561 ++$rrbs_trimmed; | |
562 } | |
563 } | |
564 | |
565 ### Shortening all sequences by 1 bp on the 3' end | |
566 if ($trim){ | |
567 $seq = substr($seq,0,length($seq)-1); | |
568 $qual = substr($qual,0,length($qual)-1); | |
569 } | |
570 | |
571 ### PRINTING (POTENTIALLY TRIMMED) SEQUENCE | |
572 if ($validate){ # printing the sequence without performing a length check (this is performed for the read pair separately later) | |
573 print OUT "$l1$seq\n$l3$qual\n"; | |
574 } | |
575 else{ # single end | |
1 | 576 |
577 if ($clip_r1){ | |
4 | 578 if (length $seq > $clip_r1){ # sequences that are already too short won't be clipped again |
579 $seq = substr($seq,$clip_r1); # starting after the sequences to be trimmed until the end of the sequence | |
580 $qual = substr($qual,$clip_r1); | |
581 } | |
582 } | |
583 | |
584 if ($three_prime_clip_r1){ | |
585 | |
586 if (length $seq > $three_prime_clip_r1){ # sequences that are already too short won't be clipped again | |
587 # warn "seq/qual before/after trimming:\n$seq\n$qual\n"; | |
588 $seq = substr($seq,0,(length($seq) - $three_prime_clip_r1)); # starting after the sequences to be trimmed until the end of the sequence | |
589 $qual = substr($qual,0,(length($qual) - $three_prime_clip_r1 )); | |
590 # warn "$seq\n$qual\n"; | |
591 } | |
592 | |
1 | 593 } |
594 | |
0 | 595 if (length $seq < $length_cutoff){ |
596 ++$too_short; | |
597 next; | |
598 } | |
599 else{ | |
600 print OUT "$l1$seq\n$l3$qual\n"; | |
601 } | |
602 } | |
603 } | |
604 | |
605 print REPORT "\n"; | |
606 while (<ERROR>){ | |
607 warn $_; | |
608 print REPORT $_; | |
609 } | |
610 | |
611 close IN or die "Unable to close IN filehandle: $!"; | |
612 close QUAL or die "Unable to close QUAL filehandle: $!"; | |
613 close TRIM or die "Unable to close TRIM filehandle: $!"; | |
614 close OUT or die "Unable to close OUT filehandle: $!"; | |
4 | 615 |
0 | 616 } |
617 else{ | |
618 | |
619 ### optionally using 2 different adapters for read 1 and read 2 | |
620 if ($validate and $a2){ | |
621 ### Figure out whether current file counts as read 1 or read 2 of paired-end files | |
622 if ( scalar(@filenames)%2 == 0){ # this is read 1 of a pair | |
623 warn "\n >>> Now performing quality (cutoff $cutoff) and adapter trimming in a single pass for the adapter sequence: '$adapter' from file $filename <<< \n"; | |
624 sleep (3); | |
4 | 625 $pid = open3 (\*WRITER, \*TRIM, \*ERROR, "$path_to_cutadapt -f fastq -e $error_rate -q $cutoff -O $stringency -a $adapter $filename") or die "Failed to launch Cutadapt: $!"; |
0 | 626 } |
627 else{ # this is read 2 of a pair | |
628 warn "\n >>> Now performing quality (cutoff $cutoff) and adapter trimming in a single pass for the adapter sequence: '$a2' from file $filename <<< \n"; | |
629 sleep (3); | |
4 | 630 $pid = open3 (\*WRITER, \*TRIM, \*ERROR, "$path_to_cutadapt -f fastq -e $error_rate -q $cutoff -O $stringency -a $a2 $filename") or die "Failed to launch Cutadapt: $!"; |
0 | 631 } |
632 } | |
633 ### Using the same adapter for both read 1 and read 2 | |
634 else{ | |
635 warn "\n >>> Now performing quality (cutoff $cutoff) and adapter trimming in a single pass for the adapter sequence: '$adapter' from file $filename <<< \n"; | |
636 sleep (3); | |
4 | 637 $pid = open3 (\*WRITER, \*TRIM, \*ERROR, "$path_to_cutadapt -f fastq -e $error_rate -q $cutoff -O $stringency -a $adapter $filename") or die "Failed to launch Cutadapt: $!"; |
0 | 638 } |
639 | |
640 close WRITER or die $!; # not needed | |
641 | |
642 while (1){ | |
643 | |
644 my $l1 = <TRIM>; | |
645 my $seq = <TRIM>; # quality and/or adapter trimmed sequence | |
646 my $l3 = <TRIM>; | |
647 my $qual = <TRIM>; | |
648 # print "$l1$seq\n$l3$qual\n"; | |
649 last unless (defined $qual); # could be an empty string | |
650 | |
651 $count++; | |
652 if ($count%10000000 == 0){ | |
653 warn "$count sequences processed\n"; | |
654 } | |
655 | |
656 chomp $seq; | |
657 chomp $qual; | |
658 | |
659 ### Shortening all sequences by 1 bp on the 3' end | |
660 if ($trim){ | |
661 $seq = substr($seq,0,length($seq)-1); | |
662 $qual = substr($qual,0,length($qual)-1); | |
663 } | |
664 | |
665 ### PRINTING (POTENTIALLY TRIMMED) SEQUENCE | |
666 if ($validate){ # printing the sequence without performing a length check (this is performed for the read pair separately later) | |
667 print OUT "$l1$seq\n$l3$qual\n"; | |
668 } | |
669 else{ # single end | |
4 | 670 |
1 | 671 if ($clip_r1){ |
4 | 672 if (length $seq > $clip_r1){ # sequences that are already too short won't be clipped again |
673 $seq = substr($seq,$clip_r1); # starting after the sequences to be trimmed until the end of the sequence | |
674 $qual = substr($qual,$clip_r1); | |
675 } | |
676 } | |
1 | 677 |
4 | 678 if ($three_prime_clip_r1){ |
679 if (length $seq > $three_prime_clip_r1){ # sequences that are already too short won't be clipped again | |
680 # warn "seq/qual before/after trimming:\n$seq\n$qual\n"; | |
681 $seq = substr($seq,0,(length($seq) - $three_prime_clip_r1)); # starting after the sequences to be trimmed until the end of the sequence | |
682 $qual = substr($qual,0,(length($qual) - $three_prime_clip_r1)); | |
683 # warn "$seq\n$qual\n";sleep(1); | |
684 } | |
685 } | |
686 | |
0 | 687 if (length $seq < $length_cutoff){ |
688 ++$too_short; | |
689 next; | |
690 } | |
691 else{ | |
692 print OUT "$l1$seq\n$l3$qual\n"; | |
693 } | |
694 } | |
695 } | |
696 | |
697 print REPORT "\n"; | |
698 while (<ERROR>){ | |
699 warn $_; | |
700 print REPORT $_; | |
701 } | |
702 | |
703 close TRIM or die "Unable to close TRIM filehandle: $!\n"; | |
704 close ERROR or die "Unable to close ERROR filehandle: $!\n"; | |
705 close OUT or die "Unable to close OUT filehandle: $!\n"; | |
706 | |
707 } | |
708 | |
4 | 709 |
0 | 710 if ($rrbs){ |
711 unless ($keep){ # keeping the quality trimmed intermediate file for RRBS files | |
712 | |
713 # deleting temporary quality trimmed file | |
714 my $deleted = unlink "$output_dir$temp"; | |
715 | |
716 if ($deleted){ | |
717 warn "Successfully deleted temporary file $temp\n\n"; | |
718 } | |
719 else{ | |
720 warn "Could not delete temporary file $temp"; | |
721 } | |
722 } | |
723 } | |
724 | |
4 | 725 ### Wait and reap the child process (Cutadapt) so that it doesn't become a zombie process |
726 waitpid $pid, 0; | |
727 unless ($? == 0){ | |
728 die "\n\nCutadapt terminated with exit signal: '$?'.\nTerminating Trim Galore run, please check error message(s) to get an idea what went wrong...\n\n"; | |
729 } | |
730 | |
0 | 731 warn "\nRUN STATISTICS FOR INPUT FILE: $filename\n"; |
732 print REPORT "\nRUN STATISTICS FOR INPUT FILE: $filename\n"; | |
733 | |
734 warn "="x 45,"\n"; | |
735 print REPORT "="x 45,"\n"; | |
736 | |
737 warn "$count sequences processed in total\n"; | |
738 print REPORT "$count sequences processed in total\n"; | |
739 | |
740 ### only reporting this separately if quality and adapter trimming were performed separately | |
741 if ($rrbs){ | |
4 | 742 my $percentage_shortened; |
743 if ($count){ | |
744 $percentage_shortened = sprintf ("%.1f",$quality_trimmed/$count*100); | |
745 warn "Sequences were truncated to a varying degree because of deteriorating qualities (Phred score quality cutoff: $cutoff):\t$quality_trimmed ($percentage_shortened%)\n"; | |
746 print REPORT "Sequences were truncated to a varying degree because of deteriorating qualities (Phred score quality cutoff: $cutoff):\t$quality_trimmed ($percentage_shortened%)\n"; | |
747 } | |
748 else{ | |
749 warn "Unable to determine percentage of reads that were shortened because 0 lines were processed\n\n"; | |
750 print REPORT "Unable to determine percentage of reads that were shortened because 0 lines were processed\n\n"; | |
751 } | |
0 | 752 } |
753 | |
4 | 754 my $percentage_too_short; |
755 if ($count){ | |
756 $percentage_too_short = sprintf ("%.1f",$too_short/$count*100); | |
757 } | |
758 else{ | |
759 $percentage_too_short = 'N/A'; | |
760 } | |
761 | |
762 if ($validate){ ### only for paired-end files | |
763 warn "The length threshold of paired-end sequences gets evaluated later on (in the validation step)\n"; | |
764 } | |
765 else{ ### Single-end file | |
766 warn "Sequences removed because they became shorter than the length cutoff of $length_cutoff bp:\t$too_short ($percentage_too_short%)\n"; | |
767 print REPORT "Sequences removed because they became shorter than the length cutoff of $length_cutoff bp:\t$too_short ($percentage_too_short%)\n"; | |
768 } | |
0 | 769 |
770 if ($rrbs){ | |
771 my $percentage_rrbs_trimmed = sprintf ("%.1f",$rrbs_trimmed/$count*100); | |
772 warn "RRBS reads trimmed by additional 2 bp when adapter contamination was detected:\t$rrbs_trimmed ($percentage_rrbs_trimmed%)\n"; | |
773 print REPORT "RRBS reads trimmed by additional 2 bp when adapter contamination was detected:\t$rrbs_trimmed ($percentage_rrbs_trimmed%)\n"; | |
774 } | |
775 | |
776 if ($non_directional){ | |
777 my $percentage_rrbs_trimmed_at_start = sprintf ("%.1f",$rrbs_trimmed_start/$count*100); | |
778 warn "RRBS reads trimmed by 2 bp at the start when read started with CAA ($CAA) or CGA ($CGA) in total:\t$rrbs_trimmed_start ($percentage_rrbs_trimmed_at_start%)\n"; | |
779 print REPORT "RRBS reads trimmed by 2 bp at the start when read started with CAA ($CAA) or CGA ($CGA) in total:\t$rrbs_trimmed_start ($percentage_rrbs_trimmed_at_start%)\n"; | |
780 } | |
781 | |
782 warn "\n"; | |
783 print REPORT "\n"; | |
784 | |
1 | 785 ### RUNNING FASTQC unless we are dealing with paired-end files |
786 unless($validate){ | |
787 if ($fastqc){ | |
788 warn "\n >>> Now running FastQC on the data <<<\n\n"; | |
789 sleep (5); | |
790 if ($fastqc_args){ | |
791 system ("$path_to_fastqc $fastqc_args $output_dir$output_filename"); | |
792 } | |
793 else{ | |
794 system ("$path_to_fastqc $output_dir$output_filename"); | |
795 } | |
0 | 796 } |
797 } | |
798 | |
799 ### VALIDATE PAIRED-END FILES | |
800 if ($validate){ | |
801 | |
802 ### Figure out whether current file counts as read 1 or read 2 of paired-end files | |
803 | |
804 if ( scalar(@filenames)%2 == 0){ # this is read 1 of a pair | |
805 $file_1 = $output_filename; | |
806 shift @filenames; | |
807 # warn "This is read 1: $file_1\n\n"; | |
808 } | |
809 else{ # this is read 2 of a pair | |
810 $file_2 = $output_filename; | |
811 shift @filenames; | |
812 # warn "This is read 2: $file_2\n\n"; | |
813 } | |
814 | |
815 if ($file_1 and $file_2){ | |
816 warn "Validate paired-end files $file_1 and $file_2\n"; | |
817 sleep (1); | |
818 | |
819 my ($val_1,$val_2,$un_1,$un_2) = validate_paired_end_files($file_1,$file_2); | |
820 | |
821 ### RUNNING FASTQC | |
822 if ($fastqc){ | |
823 | |
824 warn "\n >>> Now running FastQC on the validated data $val_1<<<\n\n"; | |
825 sleep (3); | |
826 | |
827 if ($fastqc_args){ | |
1 | 828 system ("$path_to_fastqc $fastqc_args $output_dir$val_1"); |
0 | 829 } |
830 else{ | |
1 | 831 system ("$path_to_fastqc $output_dir$val_1"); |
0 | 832 } |
833 | |
834 warn "\n >>> Now running FastQC on the validated data $val_2<<<\n\n"; | |
835 sleep (3); | |
836 | |
837 if ($fastqc_args){ | |
1 | 838 system ("$path_to_fastqc $fastqc_args $output_dir$val_2"); |
0 | 839 } |
840 else{ | |
1 | 841 system ("$path_to_fastqc $output_dir$val_2"); |
0 | 842 } |
843 | |
844 } | |
845 | |
846 warn "Deleting both intermediate output files $file_1 and $file_2\n"; | |
847 unlink "$output_dir$file_1"; | |
848 unlink "$output_dir$file_2"; | |
849 | |
850 warn "\n",'='x100,"\n\n"; | |
851 sleep (3); | |
852 | |
853 $file_1 = undef; # setting file_1 and file_2 to undef once validation is completed | |
854 $file_2 = undef; | |
855 } | |
856 } | |
857 | |
858 } | |
859 | |
860 sub validate_paired_end_files{ | |
861 | |
862 my $file_1 = shift; | |
863 my $file_2 = shift; | |
864 | |
1 | 865 warn "file_1: $file_1, file_2: $file_2\n\n"; |
0 | 866 |
867 if ($file_1 =~ /\.gz$/){ | |
868 open (IN1,"zcat $output_dir$file_1 |") or die "Couldn't read from file $file_1: $!\n"; | |
869 } | |
870 else{ | |
871 open (IN1, "$output_dir$file_1") or die "Couldn't read from file $file_1: $!\n"; | |
872 } | |
873 | |
874 if ($file_2 =~ /\.gz$/){ | |
875 open (IN2,"zcat $output_dir$file_2 |") or die "Couldn't read from file $file_2: $!\n"; | |
876 } | |
877 else{ | |
878 open (IN2, "$output_dir$file_2") or die "Couldn't read from file $file_2: $!\n"; | |
879 } | |
880 | |
881 warn "\n>>>>> Now validing the length of the 2 paired-end infiles: $file_1 and $file_2 <<<<<\n"; | |
882 sleep (3); | |
883 | |
884 my $out_1 = $file_1; | |
885 my $out_2 = $file_2; | |
886 | |
887 if ($out_1 =~ /gz$/){ | |
888 $out_1 =~ s/trimmed\.fq\.gz$/val_1.fq/; | |
889 } | |
890 else{ | |
891 $out_1 =~ s/trimmed\.fq$/val_1.fq/; | |
892 } | |
893 | |
894 if ($out_2 =~ /gz$/){ | |
895 $out_2 =~ s/trimmed\.fq\.gz$/val_2.fq/; | |
896 } | |
897 else{ | |
898 $out_2 =~ s/trimmed\.fq$/val_2.fq/; | |
899 } | |
900 | |
1 | 901 if ($gzip){ |
902 if ($dont_gzip){ | |
903 open (R1,'>',$output_dir.$out_1) or die "Couldn't write to $out_1 $!\n"; | |
904 } | |
905 else{ | |
906 $out_1 .= '.gz'; | |
907 open (R1,"| gzip -c - > ${output_dir}${out_1}") or die "Can't write to $out_1: $!\n"; | |
908 } | |
909 } | |
910 else{ | |
911 open (R1,'>',$output_dir.$out_1) or die "Couldn't write to $out_1 $!\n"; | |
912 } | |
913 | |
914 if ($gzip){ | |
915 if ($dont_gzip){ | |
916 open (R2,'>',$output_dir.$out_2) or die "Couldn't write to $out_2 $!\n"; | |
917 } | |
918 else{ | |
919 $out_2 .= '.gz'; | |
920 open (R2,"| gzip -c - > ${output_dir}${out_2}") or die "Can't write to $out_2: $!\n"; | |
921 } | |
922 } | |
923 else{ | |
924 open (R2,'>',$output_dir.$out_2) or die "Couldn't write to $out_2 $!\n"; | |
925 } | |
926 | |
0 | 927 warn "Writing validated paired-end read 1 reads to $out_1\n"; |
928 warn "Writing validated paired-end read 2 reads to $out_2\n\n"; | |
929 | |
930 my $unpaired_1; | |
931 my $unpaired_2; | |
932 | |
933 if ($retain){ | |
934 | |
935 $unpaired_1 = $file_1; | |
936 $unpaired_2 = $file_2; | |
937 | |
938 if ($unpaired_1 =~ /gz$/){ | |
939 $unpaired_1 =~ s/trimmed\.fq\.gz$/unpaired_1.fq/; | |
940 } | |
941 else{ | |
942 $unpaired_1 =~ s/trimmed\.fq$/unpaired_1.fq/; | |
943 } | |
944 | |
945 if ($unpaired_2 =~ /gz$/){ | |
946 $unpaired_2 =~ s/trimmed\.fq\.gz$/unpaired_2.fq/; | |
947 } | |
948 else{ | |
949 $unpaired_2 =~ s/trimmed\.fq$/unpaired_2.fq/; | |
950 } | |
951 | |
1 | 952 if ($gzip){ |
953 if ($dont_gzip){ | |
954 open (UNPAIRED1,'>',$output_dir.$unpaired_1) or die "Couldn't write to $unpaired_1: $!\n"; | |
955 } | |
956 else{ | |
957 $unpaired_1 .= '.gz'; | |
958 open (UNPAIRED1,"| gzip -c - > ${output_dir}${unpaired_1}") or die "Can't write to $unpaired_1: $!\n"; | |
959 } | |
960 } | |
961 else{ | |
962 open (UNPAIRED1,'>',$output_dir.$unpaired_1) or die "Couldn't write to $unpaired_1: $!\n"; | |
963 } | |
964 | |
965 if ($gzip){ | |
966 if ($dont_gzip){ | |
967 open (UNPAIRED2,'>',$output_dir.$unpaired_2) or die "Couldn't write to $unpaired_2: $!\n"; | |
968 } | |
969 else{ | |
970 $unpaired_2 .= '.gz'; | |
971 open (UNPAIRED2,"| gzip -c - > ${output_dir}${unpaired_2}") or die "Can't write to $unpaired_2: $!\n"; | |
972 } | |
973 } | |
974 else{ | |
975 open (UNPAIRED2,'>',$output_dir.$unpaired_2) or die "Couldn't write to $unpaired_2: $!\n"; | |
976 } | |
0 | 977 |
978 warn "Writing unpaired read 1 reads to $unpaired_1\n"; | |
979 warn "Writing unpaired read 2 reads to $unpaired_2\n\n"; | |
980 } | |
981 | |
982 my $sequence_pairs_removed = 0; | |
983 my $read_1_printed = 0; | |
984 my $read_2_printed = 0; | |
985 | |
986 my $count = 0; | |
987 | |
988 while (1){ | |
989 my $id_1 = <IN1>; | |
990 my $seq_1 = <IN1>; | |
991 my $l3_1 = <IN1>; | |
992 my $qual_1 = <IN1>; | |
993 last unless ($id_1 and $seq_1 and $l3_1 and $qual_1); | |
994 | |
995 my $id_2 = <IN2>; | |
996 my $seq_2 = <IN2>; | |
997 my $l3_2 = <IN2>; | |
998 my $qual_2 = <IN2>; | |
999 last unless ($id_2 and $seq_2 and $l3_2 and $qual_2); | |
1000 | |
1001 ++$count; | |
1002 | |
1003 | |
1 | 1004 ## small check if the sequence files appear to be FastQ files |
0 | 1005 if ($count == 1){ # performed just once |
1006 if ($id_1 !~ /^\@/ or $l3_1 !~ /^\+/){ | |
1007 die "Input file doesn't seem to be in FastQ format at sequence $count\n"; | |
1008 } | |
1009 if ($id_2 !~ /^\@/ or $l3_2 !~ /^\+/){ | |
1010 die "Input file doesn't seem to be in FastQ format at sequence $count\n"; | |
1011 } | |
1012 } | |
1013 | |
1014 chomp $seq_1; | |
1015 chomp $seq_2; | |
4 | 1016 chomp $qual_1; |
1017 chomp $qual_2; | |
0 | 1018 |
1 | 1019 if ($clip_r1){ |
4 | 1020 if (length $seq_1 > $clip_r1){ # sequences that are already too short won't be trimmed again |
1021 $seq_1 = substr($seq_1,$clip_r1); # starting after the sequences to be trimmed until the end of the sequence | |
1022 $qual_1 = substr($qual_1,$clip_r1); | |
1023 } | |
1 | 1024 } |
1025 if ($clip_r2){ | |
4 | 1026 if (length $seq_2 > $clip_r2){ # sequences that are already too short won't be trimmed again |
1027 $seq_2 = substr($seq_2,$clip_r2); # starting after the sequences to be trimmed until the end of the sequence | |
1028 $qual_2 = substr($qual_2,$clip_r2); | |
1029 } | |
1 | 1030 } |
0 | 1031 |
4 | 1032 if ($three_prime_clip_r1){ |
1033 if (length $seq_1 > $three_prime_clip_r1){ # sequences that are already too short won't be clipped again | |
1034 $seq_1 = substr($seq_1,0,(length($seq_1) - $three_prime_clip_r1)); # starting after the sequences to be trimmed until the end of the sequence | |
1035 $qual_1 = substr($qual_1,0,(length($qual_1) - $three_prime_clip_r1)); | |
1036 } | |
1037 } | |
1038 if ($three_prime_clip_r2){ | |
1039 if (length $seq_2 > $three_prime_clip_r2){ # sequences that are already too short won't be clipped again | |
1040 $seq_2 = substr($seq_2,0,(length($seq_2) - $three_prime_clip_r2)); # starting after the sequences to be trimmed until the end of the sequence | |
1041 $qual_2 = substr($qual_2,0,(length($qual_2) - $three_prime_clip_r2)); | |
1042 } | |
1043 } | |
1044 | |
1045 | |
1046 | |
0 | 1047 ### making sure that the reads do have a sensible length |
1048 if ( (length($seq_1) < $length_cutoff) or (length($seq_2) < $length_cutoff) ){ | |
1049 ++$sequence_pairs_removed; | |
1050 if ($retain){ # writing out single-end reads if they are longer than the cutoff | |
1051 | |
1052 if ( length($seq_1) >= $length_read_1){ # read 1 is long enough | |
1053 print UNPAIRED1 $id_1; | |
1054 print UNPAIRED1 "$seq_1\n"; | |
1055 print UNPAIRED1 $l3_1; | |
4 | 1056 print UNPAIRED1 "$qual_1\n"; |
0 | 1057 ++$read_1_printed; |
1058 } | |
1059 | |
1060 if ( length($seq_2) >= $length_read_2){ # read 2 is long enough | |
1061 print UNPAIRED2 $id_2; | |
1062 print UNPAIRED2 "$seq_2\n"; | |
1063 print UNPAIRED2 $l3_2; | |
4 | 1064 print UNPAIRED2 "$qual_2\n"; |
0 | 1065 ++$read_2_printed; |
1066 } | |
1067 | |
1068 } | |
1069 } | |
1070 else{ | |
1071 print R1 $id_1; | |
1072 print R1 "$seq_1\n"; | |
1073 print R1 $l3_1; | |
4 | 1074 print R1 "$qual_1\n"; |
0 | 1075 |
1076 print R2 $id_2; | |
1077 print R2 "$seq_2\n"; | |
1078 print R2 $l3_2; | |
4 | 1079 print R2 "$qual_2\n"; |
0 | 1080 } |
1081 | |
1082 } | |
4 | 1083 |
1084 | |
1085 my $percentage; | |
1086 | |
1087 if ($count){ | |
1088 $percentage = sprintf("%.2f",$sequence_pairs_removed/$count*100); | |
1089 } | |
1090 else{ | |
1091 $percentage = 'N/A'; | |
1092 } | |
1093 | |
0 | 1094 warn "Total number of sequences analysed: $count\n\n"; |
4 | 1095 warn "Number of sequence pairs removed because at least one read was shorter than the length cutoff ($length_cutoff bp): $sequence_pairs_removed ($percentage%)\n"; |
0 | 1096 |
1097 print REPORT "Total number of sequences analysed for the sequence pair length validation: $count\n\n"; | |
1098 print REPORT "Number of sequence pairs removed because at least one read was shorter than the length cutoff ($length_cutoff bp): $sequence_pairs_removed ($percentage%)\n"; | |
1099 | |
1100 if ($keep){ | |
1101 warn "Number of unpaired read 1 reads printed: $read_1_printed\n"; | |
1102 warn "Number of unpaired read 2 reads printed: $read_2_printed\n"; | |
1103 } | |
1104 | |
1 | 1105 close R1 or die $!; |
1106 close R2 or die $!; | |
1107 | |
1108 if ($retain){ | |
1109 close UNPAIRED1 or die $!; | |
1110 close UNPAIRED2 or die $!; | |
1111 } | |
1112 | |
0 | 1113 warn "\n"; |
1114 if ($retain){ | |
1115 return ($out_1,$out_2,$unpaired_1,$unpaired_2); | |
1116 } | |
1117 else{ | |
1118 return ($out_1,$out_2); | |
1119 } | |
1120 } | |
1121 | |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1122 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1123 sub file_sanity_check{ |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1124 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1125 my $file = shift; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1126 open (SANITY,$file) or die "Failed to read from file '$file' to perform sanity check\n"; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1127 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1128 # just processing a single FastQ entry |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1129 my $id = <SANITY>; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1130 my $seq = <SANITY>; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1131 my $three = <SANITY>; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1132 my $qual = <SANITY>; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1133 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1134 unless ($id and $seq and $three and $qual){ |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1135 warn "Input file '$file' seems to be completely empty. Consider respecifying!\n\n"; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1136 } |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1137 return; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1138 chomp $seq; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1139 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1140 # testing if the file is a colorspace file in which case we bail |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1141 if ($seq =~ /\d+/){ |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1142 die "File seems to be in SOLiD colorspace format which is not supported by Trim Galore (sequence is: '$seq')! Please use Cutadapt on colorspace files separately and check its documentation!\n\n"; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1143 } |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1144 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1145 close SANITY; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1146 } |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1147 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1148 |
0 | 1149 sub process_commandline{ |
1150 my $help; | |
1151 my $quality; | |
1152 my $adapter; | |
1153 my $adapter2; | |
1154 my $stringency; | |
1155 my $report; | |
1156 my $version; | |
1157 my $rrbs; | |
1158 my $length_cutoff; | |
1159 my $keep; | |
1160 my $fastqc; | |
1161 my $non_directional; | |
1162 my $phred33; | |
1163 my $phred64; | |
1164 my $fastqc_args; | |
1165 my $trim; | |
1166 my $gzip; | |
1167 my $validate; | |
1168 my $retain; | |
1169 my $length_read_1; | |
1170 my $length_read_2; | |
1171 my $error_rate; | |
1172 my $output_dir; | |
1173 my $no_report_file; | |
1174 my $suppress_warn; | |
1 | 1175 my $dont_gzip; |
1176 my $clip_r1; | |
1177 my $clip_r2; | |
4 | 1178 my $three_prime_clip_r1; |
1179 my $three_prime_clip_r2; | |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1180 my $nextera; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1181 my $small_rna; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1182 my $illumina; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1183 my $path_to_cutadapt; |
0 | 1184 |
1185 my $command_line = GetOptions ('help|man' => \$help, | |
1186 'q|quality=i' => \$quality, | |
1187 'a|adapter=s' => \$adapter, | |
1188 'a2|adapter2=s' => \$adapter2, | |
1189 'report' => \$report, | |
1190 'version' => \$version, | |
1191 'stringency=i' => \$stringency, | |
1192 'fastqc' => \$fastqc, | |
1193 'RRBS' => \$rrbs, | |
1194 'keep' => \$keep, | |
1195 'length=i' => \$length_cutoff, | |
1196 'non_directional' => \$non_directional, | |
1197 'phred33' => \$phred33, | |
1198 'phred64' => \$phred64, | |
1199 'fastqc_args=s' => \$fastqc_args, | |
1200 'trim1' => \$trim, | |
1201 'gzip' => \$gzip, | |
1202 'paired_end' => \$validate, | |
1203 'retain_unpaired' => \$retain, | |
1204 'length_1|r1=i' => \$length_read_1, | |
1205 'length_2|r2=i' => \$length_read_2, | |
1206 'e|error_rate=s' => \$error_rate, | |
1207 'o|output_dir=s' => \$output_dir, | |
1208 'no_report_file' => \$no_report_file, | |
1209 'suppress_warn' => \$suppress_warn, | |
1 | 1210 'dont_gzip' => \$dont_gzip, |
1211 'clip_R1=i' => \$clip_r1, | |
1212 'clip_R2=i' => \$clip_r2, | |
4 | 1213 'three_prime_clip_R1=i' => \$three_prime_clip_r1, |
1214 'three_prime_clip_R2=i' => \$three_prime_clip_r2, | |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1215 'illumina' => \$illumina, |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1216 'nextera' => \$nextera, |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1217 'small_rna' => \$small_rna, |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1218 'path_to_cutadapt=s' => \$path_to_cutadapt, |
0 | 1219 ); |
1 | 1220 |
0 | 1221 ### EXIT ON ERROR if there were errors with any of the supplied options |
1222 unless ($command_line){ | |
1223 die "Please respecify command line options\n"; | |
1224 } | |
1225 | |
1226 ### HELPFILE | |
1227 if ($help){ | |
1228 print_helpfile(); | |
1229 exit; | |
1230 } | |
1231 | |
1232 | |
1233 | |
1234 | |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1235 |
0 | 1236 if ($version){ |
1237 print << "VERSION"; | |
1238 | |
1239 Quality-/Adapter-/RRBS-Trimming | |
1240 (powered by Cutadapt) | |
1241 version $trimmer_version | |
1242 | |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1243 Last update: 06 05 2015 |
0 | 1244 |
1245 VERSION | |
1246 exit; | |
1247 } | |
1248 | |
1249 ### RRBS | |
1250 unless ($rrbs){ | |
1251 $rrbs = 0; | |
1252 } | |
1253 | |
1254 ### SUPRESS WARNINGS | |
1255 if (defined $suppress_warn){ | |
1256 $DOWARN = 0; | |
1257 } | |
1258 | |
1259 ### QUALITY SCORES | |
1260 my $phred_encoding; | |
1261 if ($phred33){ | |
1262 if ($phred64){ | |
1263 die "Please specify only a single quality encoding type (--phred33 or --phred64)\n\n"; | |
1264 } | |
1265 $phred_encoding = 33; | |
1266 } | |
1267 elsif ($phred64){ | |
1268 $phred_encoding = 64; | |
1269 } | |
1270 unless ($phred33 or $phred64){ | |
1271 warn "No quality encoding type selected. Assuming that the data provided uses Sanger encoded Phred scores (default)\n\n"; | |
1272 $phred_encoding = 33; | |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1273 sleep (1); |
0 | 1274 } |
1275 | |
1276 ### NON-DIRECTIONAL RRBS | |
1277 if ($non_directional){ | |
1278 unless ($rrbs){ | |
1279 die "Option '--non_directional' requires '--rrbs' to be specified as well. Please re-specify!\n"; | |
1280 } | |
1281 } | |
1282 else{ | |
1283 $non_directional = 0; | |
1284 } | |
1285 | |
1286 if ($fastqc_args){ | |
1287 $fastqc = 1; # specifying fastqc extra arguments automatically means that FastQC will be executed | |
1288 } | |
1289 else{ | |
1290 $fastqc_args = 0; | |
1291 } | |
1292 | |
1293 ### CUSTOM ERROR RATE | |
1294 if (defined $error_rate){ | |
1295 # make sure that the error rate is between 0 and 1 | |
1296 unless ($error_rate >= 0 and $error_rate <= 1){ | |
1297 die "Please specify an error rate between 0 and 1 (the default is 0.1)\n"; | |
1298 } | |
1299 } | |
1300 else{ | |
1301 $error_rate = 0.1; # (default) | |
1302 } | |
1303 | |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1304 if ($nextera and $small_rna or $nextera and $illumina or $illumina and $small_rna ){ |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1305 die "You can't use several different adapter types at the same time. Make your choice or consider using -a and -a2\n\n"; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1306 } |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1307 |
0 | 1308 if (defined $adapter){ |
4 | 1309 unless ($adapter =~ /^[ACTGNXactgnx]+$/){ |
0 | 1310 die "Adapter sequence must contain DNA characters only (A,C,T,G or N)!\n"; |
1311 } | |
1312 $adapter = uc$adapter; | |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1313 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1314 if ($illumina){ |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1315 die "You can't supply an adapter sequence AND use the Illumina universal adapter sequence. Make your choice.\n\n"; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1316 } |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1317 if ($nextera){ |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1318 die "You can't supply an adapter sequence AND use the Nextera transposase adapter sequence. Make your choice.\n\n"; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1319 } |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1320 if ($small_rna){ |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1321 die "You can't supply an adapter sequence AND use the Illumina small RNA adapter sequence. Make your choice.\n\n"; |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1322 } |
0 | 1323 } |
1324 | |
1325 if (defined $adapter2){ | |
1326 unless ($validate){ | |
1327 die "An optional adapter for read 2 of paired-end files requires '--paired' to be specified as well! Please re-specify\n"; | |
1328 } | |
1329 unless ($adapter2 =~ /^[ACTGNactgn]+$/){ | |
1330 die "Optional adapter 2 sequence must contain DNA characters only (A,C,T,G or N)!\n"; | |
1331 } | |
1332 $adapter2 = uc$adapter2; | |
1333 } | |
1334 | |
4 | 1335 ### LENGTH CUTOFF |
1336 unless (defined $length_cutoff){ | |
1337 $length_cutoff = 20; | |
1338 } | |
1339 | |
0 | 1340 ### files are supposed to be paired-end files |
1341 if ($validate){ | |
1342 | |
1343 # making sure that an even number of reads has been supplied | |
1344 unless ((scalar@ARGV)%2 == 0){ | |
1345 die "Please provide an even number of input files for paired-end FastQ trimming! Aborting ...\n"; | |
1346 } | |
1347 | |
1348 ## CUTOFF FOR VALIDATED READ-PAIRS | |
4 | 1349 if (defined $length_read_1 or defined $length_read_2){ |
0 | 1350 |
1351 unless ($retain){ | |
1352 die "Please specify --keep_unpaired to alter the unpaired single-end read length cut off(s)\n\n"; | |
1353 } | |
1354 | |
1355 if (defined $length_read_1){ | |
1356 unless ($length_read_1 >= 15 and $length_read_1 <= 100){ | |
1357 die "Please select a sensible cutoff for when a read pair should be filtered out due to short length (allowed range: 15-100 bp)\n\n"; | |
1358 } | |
1359 unless ($length_read_1 > $length_cutoff){ | |
4 | 1360 die "The single-end unpaired read length needs to be longer than the paired-end cut-off value ($length_cutoff bp)\n\n"; |
0 | 1361 } |
1362 } | |
1363 | |
1364 if (defined $length_read_2){ | |
1365 unless ($length_read_2 >= 15 and $length_read_2 <= 100){ | |
1366 die "Please select a sensible cutoff for when a read pair should be filtered out due to short length (allowed range: 15-100 bp)\n\n"; | |
1367 } | |
1368 unless ($length_read_2 > $length_cutoff){ | |
4 | 1369 die "The single-end unpaired read length needs to be longer than the paired-end cut-off value ($length_cutoff bp)\n\n"; |
0 | 1370 } |
1371 } | |
1372 } | |
1373 | |
1374 if ($retain){ | |
1375 $length_read_1 = 35 unless (defined $length_read_1); | |
1376 $length_read_2 = 35 unless (defined $length_read_2); | |
1377 } | |
1378 } | |
1379 | |
1380 unless ($no_report_file){ | |
1381 $no_report_file = 0; | |
1382 } | |
1383 | |
1384 ### OUTPUT DIR PATH | |
1385 if ($output_dir){ | |
1386 unless ($output_dir =~ /\/$/){ | |
1387 $output_dir =~ s/$/\//; | |
1388 } | |
1389 } | |
1390 else{ | |
1391 $output_dir = ''; | |
1392 } | |
1393 | |
1 | 1394 ### Trimming at the 5' end |
1395 if (defined $clip_r2){ # trimming 5' bases of read 1 | |
1396 die "Clipping the 5' end of read 2 is only allowed for paired-end files (--paired)\n" unless ($validate); | |
1397 } | |
1398 | |
1399 if (defined $clip_r1){ # trimming 5' bases of read 1 | |
1400 unless ($clip_r1 > 0 and $clip_r1 < 100){ | |
1401 die "The 5' clipping value for read 1 should have a sensible value (> 0 and < read length)\n\n"; | |
1402 } | |
1403 } | |
1404 | |
1405 if (defined $clip_r2){ # trimming 5' bases of read 2 | |
1406 unless ($clip_r2 > 0 and $clip_r2 < 100){ | |
1407 die "The 5' clipping value for read 2 should have a sensible value (> 0 and < read length)\n\n"; | |
1408 } | |
1409 } | |
1410 | |
4 | 1411 ### Trimming at the 3' end |
1412 if (defined $three_prime_clip_r1){ # trimming 3' bases of read 1 | |
1413 unless ($three_prime_clip_r1 > 0 and $three_prime_clip_r1 < 100){ | |
1414 die "The 3' clipping value for read 1 should have a sensible value (> 0 and < read length)\n\n"; | |
1415 } | |
1416 } | |
1 | 1417 |
4 | 1418 if (defined $three_prime_clip_r2){ # trimming 3' bases of read 2 |
1419 unless ($three_prime_clip_r2 > 0 and $three_prime_clip_r2 < 100){ | |
1420 die "The 3' clipping value for read 2 should have a sensible value (> 0 and < read length)\n\n"; | |
1421 } | |
1422 } | |
1423 | |
1424 | |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1425 return ($quality,$adapter,$stringency,$rrbs,$length_cutoff,$keep,$fastqc,$non_directional,$phred_encoding,$fastqc_args,$trim,$gzip,$validate,$retain,$length_read_1,$length_read_2,$adapter2,$error_rate,$output_dir,$no_report_file,$dont_gzip,$clip_r1,$clip_r2,$three_prime_clip_r1,$three_prime_clip_r2,$nextera,$small_rna,$path_to_cutadapt,$illumina); |
0 | 1426 } |
1427 | |
1428 | |
1429 | |
1430 | |
1431 sub print_helpfile{ | |
1432 print << "HELP"; | |
1433 | |
1434 USAGE: | |
1435 | |
1436 trim_galore [options] <filename(s)> | |
1437 | |
1438 | |
1439 -h/--help Print this help message and exits. | |
1440 | |
1441 -v/--version Print the version information and exits. | |
1442 | |
1443 -q/--quality <INT> Trim low-quality ends from reads in addition to adapter removal. For | |
1444 RRBS samples, quality trimming will be performed first, and adapter | |
1445 trimming is carried in a second round. Other files are quality and adapter | |
1446 trimmed in a single pass. The algorithm is the same as the one used by BWA | |
1447 (Subtract INT from all qualities; compute partial sums from all indices | |
1448 to the end of the sequence; cut sequence at the index at which the sum is | |
1449 minimal). Default Phred score: 20. | |
1450 | |
1451 --phred33 Instructs Cutadapt to use ASCII+33 quality scores as Phred scores | |
1452 (Sanger/Illumina 1.9+ encoding) for quality trimming. Default: ON. | |
1453 | |
1454 --phred64 Instructs Cutadapt to use ASCII+64 quality scores as Phred scores | |
1455 (Illumina 1.5 encoding) for quality trimming. | |
1456 | |
1457 --fastqc Run FastQC in the default mode on the FastQ file once trimming is complete. | |
1458 | |
1459 --fastqc_args "<ARGS>" Passes extra arguments to FastQC. If more than one argument is to be passed | |
1460 to FastQC they must be in the form "arg1 arg2 etc.". An example would be: | |
1461 --fastqc_args "--nogroup --outdir /home/". Passing extra arguments will | |
1462 automatically invoke FastQC, so --fastqc does not have to be specified | |
1463 separately. | |
1464 | |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1465 -a/--adapter <STRING> Adapter sequence to be trimmed. If not specified explicitly, Trim Galore will |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1466 try to auto-detect whether the Illumina universal, Nextera transposase or Illumina |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1467 small RNA adapter sequence was used. Also see '--illumina', '--nextera' and |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1468 '--small_rna'. If no adapter can be detected within the first 1 million sequences |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1469 of the first file specified Trim Galore defaults to '--illumina'. |
0 | 1470 |
1471 -a2/--adapter2 <STRING> Optional adapter sequence to be trimmed off read 2 of paired-end files. This | |
1472 option requires '--paired' to be specified as well. | |
1473 | |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1474 --illumina Adapter sequence to be trimmed is the first 13bp of the Illumina universal adapter |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1475 'AGATCGGAAGAGC' instead of the default auto-detection of adapter sequence. |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1476 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1477 --nextera Adapter sequence to be trimmed is the first 12bp of the Nextera adapter |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1478 'CTGTCTCTTATA' instead of the default auto-detection of adapter sequence. |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1479 |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1480 --small_rna Adapter sequence to be trimmed is the first 12bp of the Illumina Small RNA Adapter |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1481 'ATGGAATTCTCG' instead of the default auto-detection of adapter sequence. |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1482 |
0 | 1483 |
4 | 1484 --stringency <INT> Overlap with adapter sequence required to trim a sequence. Defaults to a |
1485 very stringent setting of 1, i.e. even a single bp of overlapping sequence | |
1486 will be trimmed off from the 3' end of any read. | |
0 | 1487 |
1488 -e <ERROR RATE> Maximum allowed error rate (no. of errors divided by the length of the matching | |
1489 region) (default: 0.1) | |
1490 | |
1 | 1491 --gzip Compress the output file with GZIP. If the input files are GZIP-compressed |
1492 the output files will automatically be GZIP compressed as well. As of v0.2.8 the | |
1493 compression will take place on the fly. | |
1494 | |
1495 --dont_gzip Output files won't be compressed with GZIP. This option overrides --gzip. | |
0 | 1496 |
1497 --length <INT> Discard reads that became shorter than length INT because of either | |
1498 quality or adapter trimming. A value of '0' effectively disables | |
1499 this behaviour. Default: 20 bp. | |
1500 | |
1501 For paired-end files, both reads of a read-pair need to be longer than | |
1502 <INT> bp to be printed out to validated paired-end files (see option --paired). | |
1503 If only one read became too short there is the possibility of keeping such | |
1504 unpaired single-end reads (see --retain_unpaired). Default pair-cutoff: 20 bp. | |
1505 | |
1506 -o/--output_dir <DIR> If specified all output will be written to this directory instead of the current | |
1507 directory. | |
1508 | |
1509 --no_report_file If specified no report file will be generated. | |
1510 | |
1511 --suppress_warn If specified any output to STDOUT or STDERR will be suppressed. | |
1512 | |
1 | 1513 --clip_R1 <int> Instructs Trim Galore to remove <int> bp from the 5' end of read 1 (or single-end |
1514 reads). This may be useful if the qualities were very poor, or if there is some | |
1515 sort of unwanted bias at the 5' end. Default: OFF. | |
1516 | |
1517 --clip_R2 <int> Instructs Trim Galore to remove <int> bp from the 5' end of read 2 (paired-end reads | |
1518 only). This may be useful if the qualities were very poor, or if there is some sort | |
1519 of unwanted bias at the 5' end. For paired-end BS-Seq, it is recommended to remove | |
1520 the first few bp because the end-repair reaction may introduce a bias towards low | |
1521 methylation. Please refer to the M-bias plot section in the Bismark User Guide for | |
1522 some examples. Default: OFF. | |
1523 | |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1524 --three_prime_clip_R1 <int> Instructs Trim Galore to remove <int> bp from the 3' end of read 1 (or single-end |
4 | 1525 reads) AFTER adapter/quality trimming has been performed. This may remove some unwanted |
1526 bias from the 3' end that is not directly related to adapter sequence or basecall quality. | |
1527 Default: OFF. | |
1528 | |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1529 --three_prime_clip_R2 <int> Instructs Trim Galore to remove <int> bp from the 3' end of read 2 AFTER |
4 | 1530 adapter/quality trimming has been performed. This may remove some unwanted bias from |
1531 the 3' end that is not directly related to adapter sequence or basecall quality. | |
1532 Default: OFF. | |
0 | 1533 |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1534 --path_to_cutadapt </path/to/cutadapt> You may use this option to specify a path to the Cutadapt executable, |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1535 e.g. /my/home/cutadapt-1.7.1/bin/cutadapt. Else it is assumed that Cutadapt is in |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1536 the PATH. |
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1537 |
0 | 1538 |
1539 RRBS-specific options (MspI digested material): | |
1540 | |
1541 --rrbs Specifies that the input file was an MspI digested RRBS sample (recognition | |
1542 site: CCGG). Sequences which were adapter-trimmed will have a further 2 bp | |
1543 removed from their 3' end. This is to avoid that the filled-in C close to the | |
1544 second MspI site in a sequence is used for methylation calls. Sequences which | |
1545 were merely trimmed because of poor quality will not be shortened further. | |
1546 | |
1547 --non_directional Selecting this option for non-directional RRBS libraries will screen | |
1548 quality-trimmed sequences for 'CAA' or 'CGA' at the start of the read | |
1549 and, if found, removes the first two basepairs. Like with the option | |
1550 '--rrbs' this avoids using cytosine positions that were filled-in | |
1551 during the end-repair step. '--non_directional' requires '--rrbs' to | |
1552 be specified as well. | |
1553 | |
1554 --keep Keep the quality trimmed intermediate file. Default: off, which means | |
1555 the temporary file is being deleted after adapter trimming. Only has | |
1556 an effect for RRBS samples since other FastQ files are not trimmed | |
1557 for poor qualities separately. | |
1558 | |
1559 | |
1560 Note for RRBS using MseI: | |
1561 | |
1562 If your DNA material was digested with MseI (recognition motif: TTAA) instead of MspI it is NOT necessary | |
1563 to specify --rrbs or --non_directional since virtually all reads should start with the sequence | |
1564 'TAA', and this holds true for both directional and non-directional libraries. As the end-repair of 'TAA' | |
1565 restricted sites does not involve any cytosines it does not need to be treated especially. Instead, simply | |
1566 run Trim Galore! in the standard (i.e. non-RRBS) mode. | |
1567 | |
1568 | |
1569 Paired-end specific options: | |
1570 | |
1571 --paired This option performs length trimming of quality/adapter/RRBS trimmed reads for | |
1572 paired-end files. To pass the validation test, both sequences of a sequence pair | |
1573 are required to have a certain minimum length which is governed by the option | |
1574 --length (see above). If only one read passes this length threshold the | |
1575 other read can be rescued (see option --retain_unpaired). Using this option lets | |
1576 you discard too short read pairs without disturbing the sequence-by-sequence order | |
1577 of FastQ files which is required by many aligners. | |
1578 | |
1579 Trim Galore! expects paired-end files to be supplied in a pairwise fashion, e.g. | |
1580 file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz ... . | |
1581 | |
1582 -t/--trim1 Trims 1 bp off every read from its 3' end. This may be needed for FastQ files that | |
1583 are to be aligned as paired-end data with Bowtie. This is because Bowtie (1) regards | |
1584 alignments like this: | |
1585 | |
1586 R1 ---------------------------> or this: -----------------------> R1 | |
1587 R2 <--------------------------- <----------------- R2 | |
1588 | |
1589 as invalid (whenever a start/end coordinate is contained within the other read). | |
1590 | |
1591 --retain_unpaired If only one of the two paired-end reads became too short, the longer | |
1592 read will be written to either '.unpaired_1.fq' or '.unpaired_2.fq' | |
1593 output files. The length cutoff for unpaired single-end reads is | |
1594 governed by the parameters -r1/--length_1 and -r2/--length_2. Default: OFF. | |
1595 | |
1596 -r1/--length_1 <INT> Unpaired single-end read length cutoff needed for read 1 to be written to | |
1597 '.unpaired_1.fq' output file. These reads may be mapped in single-end mode. | |
1598 Default: 35 bp. | |
1599 | |
1600 -r2/--length_2 <INT> Unpaired single-end read length cutoff needed for read 2 to be written to | |
1601 '.unpaired_2.fq' output file. These reads may be mapped in single-end mode. | |
1602 Default: 35 bp. | |
1603 | |
1604 | |
6
11962ce40855
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents:
4
diff
changeset
|
1605 Last modified on 06 May 2015. |
0 | 1606 |
1607 HELP | |
1608 exit; | |
1609 } |