annotate trim_galore @ 10:b4e39d993fc8 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit bbef69cc08154b5c156c25f9ca43df0915803856
author bgruening
date Thu, 20 Apr 2017 09:14:30 -0400
parents 11962ce40855
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1 #!/usr/bin/perl
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
2 use strict;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
3 use warnings;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
4 use Getopt::Long;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
5 use IPC::Open3;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
6 use File::Spec;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
7 use File::Basename;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
8 use Cwd;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
9
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
10 ## This program is Copyright (C) 2012-14, Felix Krueger (felix.krueger@babraham.ac.uk)
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
11
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
12 ## This program is free software: you can redistribute it and/or modify
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
13 ## it under the terms of the GNU General Public License as published by
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
14 ## the Free Software Foundation, either version 3 of the License, or
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
15 ## (at your option) any later version.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
16
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
17 ## This program is distributed in the hope that it will be useful,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
18 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
19 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
20 ## GNU General Public License for more details.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
21
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
22 ## You should have received a copy of the GNU General Public License
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
23 ## along with this program. If not, see <http://www.gnu.org/licenses/>.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
24
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
25
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
26 ## this script is taking in FastQ sequences and trims them using Cutadapt
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
27
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
28 ## last modified on 01 May 2015
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
29
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
30 my $DOWARN = 1; # print on screen warning and text by default
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
31 BEGIN { $SIG{'__WARN__'} = sub { warn $_[0] if $DOWARN } };
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
32
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
33 my $trimmer_version = '0.4.0';
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
34
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
35
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
36 my ($cutoff,$adapter,$stringency,$rrbs,$length_cutoff,$keep,$fastqc,$non_directional,$phred_encoding,$fastqc_args,$trim,$gzip,$validate,$retain,$length_read_1,$length_read_2,$a2,$error_rate,$output_dir,$no_report_file,$dont_gzip,$clip_r1,$clip_r2,$three_prime_clip_r1,$three_prime_clip_r2,$nextera,$small_rna,$path_to_cutadapt,$illumina) = process_commandline();
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
37
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
38 my @filenames = @ARGV;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
39 die "\nPlease provide the filename(s) of one or more FastQ file(s) to launch Trim Galore!\n
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
40 USAGE: 'trim_galore [options] <filename(s)>' or 'trim_galore --help' for more options\n\n" unless (@filenames);
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
41 file_sanity_check($filenames[0]);
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
42
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
43
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
44 ########################################################################
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
45
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
46 my $path_to_fastqc = 'fastqc';
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
47
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
48 # Before we start let's have quick look if Cutadapt seems to be working with the path information provided
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
49 # To change the path to Cutadapt use --path_to_cutadapt /full/path/to/the/Cutadapt/executable
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
50
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
51 if(defined $path_to_cutadapt){
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
52 warn "Path to Cutadapt set as: '$path_to_cutadapt' (user defined)\n";
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
53 # we'll simply use this
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
54 }
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
55 else{
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
56 $path_to_cutadapt = 'cutadapt'; # default, assuming it is in the PATH
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
57 warn "Path to Cutadapt set as: '$path_to_cutadapt' (default)\n";
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
58 }
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
59 my $cutadapt_version;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
60 my $return = system "$path_to_cutadapt --version"; #>/dev/null 2>&1";
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
61 if ($return == -1){
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
62 die "Failed to execute Cutadapt porperly. Please install Cutadapt first and make sure it is in the PATH, or specify the path to the Cutadapt executable using --path_to_cutadapt /path/to/cutadapt\n\n";
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
63 }
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
64 else{
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
65 warn "Cutadapt seems to be working fine (tested command '$path_to_cutadapt --version')\n";
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
66 $cutadapt_version = `$path_to_cutadapt --version`;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
67 chomp $cutadapt_version;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
68 # warn "Cutadapt version: $cutadapt_version\n";
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
69 }
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
70
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
71
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
72 ########################################################################
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
73
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
74 sub autodetect_adapter_type{
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
75 warn "\n\nAUTO-DETECTING ADAPTER TYPE\n===========================\n";
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
76 warn "Attempting to auto-detect adapter type from the first 1 million sequences of the first file (>> $ARGV[0] <<)\n\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
77
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
78 if ($ARGV[0] =~ /gz$/){
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
79 open (AUTODETECT,"zcat $ARGV[0] |") or die "Failed to read from file $ARGV[0]\n";
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
80 }
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
81 else{
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
82 open (AUTODETECT,$ARGV[0]) or die "Failed to read from file $ARGV[0]\n";
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
83 }
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
84
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
85 my %adapters;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
86
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
87 $adapters{'Illumina'} -> {seq} = 'AGATCGGAAGAGC';
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
88 $adapters{'Illumina'} -> {count}= 0;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
89 $adapters{'Illumina'} -> {name}= 'Illumina TruSeq, Sanger iPCR; auto-detected';
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
90
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
91 $adapters{'Nextera'} -> {seq} = 'CTGTCTCTTATA';
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
92 $adapters{'Nextera'} -> {count}= 0;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
93 $adapters{'Nextera'} -> {name}= 'Nextera Transposase sequence; auto-detected';
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
94
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
95 $adapters{'smallRNA'} -> {seq} = 'ATGGAATTCTCG';
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
96 $adapters{'smallRNA'} -> {count}= 0;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
97 $adapters{'smallRNA'} -> {name}= 'Illumina small RNA adapter; auto-detected';
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
98
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
99
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
100 # we will read the first 1 million sequences, or until the end of the file whatever comes first, and then use the adapter that for trimming which was found to occcur most often
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
101 my $count = 0;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
102 while (1){
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
103
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
104 my $line1 = <AUTODETECT>;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
105 my $line2 = <AUTODETECT>;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
106 my $line3 = <AUTODETECT>;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
107 my $line4 = <AUTODETECT>;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
108 last unless ($line4);
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
109 $count++;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
110 last if ($count == 1000000);
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
111
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
112 chomp $line2;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
113 $adapters{'Illumina'}->{count}++ unless (index($line2,'AGATCGGAAGAGC')== -1);
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
114 $adapters{'Nextera'} ->{count}++ unless (index($line2,'CTGTCTCTTATA') == -1);
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
115 $adapters{'smallRNA'}->{count}++ unless (index($line2,'ATGGAATTCTCG') == -1);
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
116
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
117 }
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
118
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
119 my $highest;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
120 my $second;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
121 my $seq;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
122 my $adapter_name;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
123
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
124 warn "Found perfect matches for the following adapter sequences:\nAdapter type\tCount\tSequence\tSequences analysed\tPercentage\n";
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
125 foreach my $adapter (sort {$adapters{$b}->{count}<=>$adapters{$a}->{count}} keys %adapters){
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
126
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
127 my $percentage = sprintf("%.2f",$adapters{$adapter}->{count}/$count*100);
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
128
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
129 warn "$adapter\t$adapters{$adapter}->{count}\t$adapters{$adapter}->{seq}\t$count\t$percentage\n";
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
130
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
131 unless (defined $highest){
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
132 $highest = $adapter;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
133 $seq = $adapters{$adapter}->{seq};
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
134 $adapter_name = $adapters{$adapter}->{name};
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
135 next;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
136 }
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
137 unless (defined $second){
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
138 $second = $adapter;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
139 }
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
140 }
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
141
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
142
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
143 # using the highest occurrence as adapter to look out for
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
144 if ($adapters{$highest}->{count} == $adapters{$second}->{count}){
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
145 warn "Unable to auto-detect most prominent adapter from the first specified file (count $highest: $adapters{$highest}->{count}, count $second: $adapters{$second}->{second})\n";
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
146
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
147 if ($adapters{$highest}->{count} == 0){
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
148 warn "Defaulting to Illumina universal adapter ( AGATCGGAAGAGC ). Specify -a SEQUENCE to avoid this behavior).\n\n";
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
149 $adapter_name = 'Illumina TruSeq, Sanger iPCR; default (inconclusive auto-detection)';
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
150 $seq = 'AGATCGGAAGAGC';
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
151 }
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
152 else{
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
153 warn "Using $highest adapter for trimming (count: $adapters{$highest}->{count}). Second best hit was $second (count: $adapters{$second}->{count})\n\n";
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
154 }
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
155 }
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
156 else{
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
157 warn "Using $highest adapter for trimming (count: $adapters{$highest}->{count}). Second best hit was $second (count: $adapters{$second}->{count})\n\n";
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
158 }
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
159
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
160 close AUTODETECT;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
161
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
162 return ($seq,$adapter_name);
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
163
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
164 }
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
165
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
166
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
167
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
168 ### SETTING DEFAULTS UNLESS THEY WERE SPECIFIED
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
169 unless (defined $cutoff){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
170 $cutoff = 20;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
171 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
172 my $phred_score_cutoff = $cutoff; # only relevant for report
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
173 my $adapter_name = '';
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
174 unless (defined $adapter){
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
175 if ($nextera){
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
176 $adapter = 'CTGTCTCTTATA';
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
177 $adapter_name = 'Nextera Transposase sequence; user defined';
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
178 }
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
179 elsif($small_rna){
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
180 $adapter = 'ATGGAATTCTCG';
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
181 $adapter_name = 'Illumina small RNA adapter; user defined';
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
182 }
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
183 elsif($illumina){
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
184 $adapter = 'AGATCGGAAGAGC';
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
185 $adapter_name = 'Illumina TruSeq, Sanger iPCR; user defined';
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
186 }
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
187 else{ # default
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
188 ($adapter,$adapter_name) = autodetect_adapter_type();
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
189 }
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
190 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
191 unless (defined $a2){ # optional adapter for the second read in a pair. Only works for --paired trimming
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
192 $a2 = '';
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
193 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
194
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
195 unless (defined $stringency){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
196 $stringency = 1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
197 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
198
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
199 if ($phred_encoding == 64){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
200 $cutoff += 31;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
201 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
202
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
203 my $file_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
204 my $file_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
205
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
206 foreach my $filename (@ARGV){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
207 trim ($filename);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
208 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
209
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
210
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
211 sub trim{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
212 my $filename = shift;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
213
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
214 my $output_filename = (split (/\//,$filename))[-1];
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
215
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
216 my $report = $output_filename;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
217 $report =~ s/$/_trimming_report.txt/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
218
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
219 if ($no_report_file) {
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
220 $report = File::Spec->devnull;
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
221 open (REPORT,'>',$report) or die "Failed to write to file '$report': $!\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
222 # warn "Redirecting report output to /dev/null\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
223 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
224 else{
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
225 open (REPORT,'>',$output_dir.$report) or die "Failed to write to file '$report': $!\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
226 warn "Writing report to '$output_dir$report'\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
227 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
228
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
229 warn "\nSUMMARISING RUN PARAMETERS\n==========================\nInput filename: $filename\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
230 print REPORT "\nSUMMARISING RUN PARAMETERS\n==========================\nInput filename: $filename\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
231
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
232 if ($validate){ # paired-end mode
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
233 warn "Trimming mode: paired-end\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
234 print REPORT "Trimming mode: paired-end\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
235 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
236 else{
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
237 warn "Trimming mode: single-end\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
238 print REPORT "Trimming mode: single-end\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
239 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
240
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
241
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
242 warn "Trim Galore version: $trimmer_version\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
243 print REPORT "Trim Galore version: $trimmer_version\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
244
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
245 warn "Cutadapt version: $cutadapt_version\n";
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
246 print REPORT "Cutadapt version: $cutadapt_version\n";
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
247
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
248 warn "Quality Phred score cutoff: $phred_score_cutoff\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
249 print REPORT "Quality Phred score cutoff: $phred_score_cutoff\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
250
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
251 warn "Quality encoding type selected: ASCII+$phred_encoding\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
252 print REPORT "Quality encoding type selected: ASCII+$phred_encoding\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
253
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
254 warn "Adapter sequence: '$adapter' ($adapter_name)\n";
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
255 print REPORT "Adapter sequence: '$adapter' ($adapter_name)\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
256
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
257 if ($error_rate == 0.1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
258 warn "Maximum trimming error rate: $error_rate (default)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
259 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
260 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
261 warn "Maximum trimming error rate: $error_rate\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
262 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
263
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
264 print REPORT "Maximum trimming error rate: $error_rate";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
265 if ($error_rate == 0.1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
266 print REPORT " (default)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
267 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
268 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
269 print REPORT "\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
270 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
271
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
272 if ($a2){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
273 warn "Optional adapter 2 sequence (only used for read 2 of paired-end files): '$a2'\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
274 print REPORT "Optional adapter 2 sequence (only used for read 2 of paired-end files): '$a2'\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
275 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
276
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
277 warn "Minimum required adapter overlap (stringency): $stringency bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
278 print REPORT "Minimum required adapter overlap (stringency): $stringency bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
279
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
280 if ($validate){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
281 warn "Minimum required sequence length for both reads before a sequence pair gets removed: $length_cutoff bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
282 print REPORT "Minimum required sequence length for both reads before a sequence pair gets removed: $length_cutoff bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
283 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
284 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
285 warn "Minimum required sequence length before a sequence gets removed: $length_cutoff bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
286 print REPORT "Minimum required sequence length before a sequence gets removed: $length_cutoff bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
287 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
288
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
289 if ($validate){ # only for paired-end files
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
290
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
291 if ($retain){ # keeping single-end reads if only one end is long enough
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
292
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
293 if ($length_read_1 == 35){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
294 warn "Length cut-off for read 1: $length_read_1 bp (default)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
295 print REPORT "Length cut-off for read 1: $length_read_1 bp (default)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
296 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
297 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
298 warn "Length cut-off for read 1: $length_read_1 bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
299 print REPORT "Length cut-off for read 1: $length_read_1 bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
300 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
301
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
302 if ($length_read_2 == 35){
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
303 warn "Length cut-off for read 2: $length_read_2 bb (default)\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
304 print REPORT "Length cut-off for read 2: $length_read_2 bp (default)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
305 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
306 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
307 warn "Length cut-off for read 2: $length_read_2 bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
308 print REPORT "Length cut-off for read 2: $length_read_2 bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
309 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
310 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
311 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
312
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
313 if ($rrbs){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
314 warn "File was specified to be an MspI-digested RRBS sample. Sequences with adapter contamination will be trimmed a further 2 bp to remove potential methylation-biased bases from the end-repair reaction\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
315 print REPORT "File was specified to be an MspI-digested RRBS sample. Sequences with adapter contamination will be trimmed a further 2 bp to remove potential methylation-biased bases from the end-repair reaction\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
316 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
317
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
318 if ($non_directional){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
319 warn "File was specified to be a non-directional MspI-digested RRBS sample. Sequences starting with either 'CAA' or 'CGA' will have the first 2 bp trimmed off to remove potential methylation-biased bases from the end-repair reaction\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
320 print REPORT "File was specified to be a non-directional MspI-digested RRBS sample. Sequences starting with either 'CAA' or 'CGA' will have the first 2 bp trimmed off to remove potential methylation-biased bases from the end-repair reaction\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
321 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
322
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
323 if ($trim){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
324 warn "All sequences will be trimmed by 1 bp on their 3' end to avoid problems with invalid paired-end alignments with Bowtie 1\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
325 print REPORT "All sequences will be trimmed by 1 bp on their 3' end to avoid problems with invalid paired-end alignments with Bowtie 1\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
326 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
327
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
328 if ($clip_r1){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
329 warn "All Read 1 sequences will be trimmed by $clip_r1 bp from their 5' end to avoid poor qualities or biases\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
330 print REPORT "All Read 1 sequences will be trimmed by $clip_r1 bp from their 5' end to avoid poor qualities or biases\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
331 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
332 if ($clip_r2){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
333 warn "All Read 2 sequences will be trimmed by $clip_r2 bp from their 5' end to avoid poor qualities or biases (e.g. M-bias for BS-Seq applications)\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
334 print REPORT "All Read 2 sequences will be trimmed by $clip_r2 bp from their 5' end to avoid poor qualities or biases (e.g. M-bias for BS-Seq applications)\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
335 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
336
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
337 if ($three_prime_clip_r1){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
338 warn "All Read 1 sequences will be trimmed by $three_prime_clip_r1 bp from their 3' end to avoid poor qualities or biases\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
339 print REPORT "All Read 1 sequences will be trimmed by $three_prime_clip_r1 bp from their 3' end to avoid poor qualities or biases\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
340 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
341 if ($three_prime_clip_r2){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
342 warn "All Read 2 sequences will be trimmed by $three_prime_clip_r2 bp from their 3' end to avoid poor qualities or biases\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
343 print REPORT "All Read 2 sequences will be trimmed by $three_prime_clip_r2 bp from their 3' end to avoid poor qualities or biases\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
344 }
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
345
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
346 if ($fastqc){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
347 warn "Running FastQC on the data once trimming has completed\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
348 print REPORT "Running FastQC on the data once trimming has completed\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
349
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
350 if ($fastqc_args){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
351 warn "Running FastQC with the following extra arguments: '$fastqc_args'\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
352 print REPORT "Running FastQC with the following extra arguments: $fastqc_args\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
353 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
354 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
355
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
356 if ($keep and $rrbs){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
357 warn "Keeping quality trimmed (but not yet adapter trimmed) intermediate FastQ file\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
358 print REPORT "Keeping quality trimmed (but not yet adapter trimmed) intermediate FastQ file\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
359 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
360
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
361
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
362 if ($gzip or $filename =~ /\.gz$/){
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
363 $gzip = 1;
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
364 unless ($dont_gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
365 warn "Output file(s) will be GZIP compressed\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
366 print REPORT "Output file will be GZIP compressed\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
367 }
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
368 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
369
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
370 warn "\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
371 print REPORT "\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
372 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
373
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
374 my $temp;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
375
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
376 ### Proceeding differently for RRBS and other type of libraries
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
377 if ($rrbs){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
378
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
379 ### Skipping quality filtering for RRBS libraries if a quality cutoff of 0 was specified
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
380 if ($cutoff == 0){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
381 warn "Quality cutoff selected was 0 - Skipping quality trimming altogether\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
382 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
383 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
384 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
385
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
386 $temp = $filename;
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
387 $temp =~ s/^.*\///; # replacing optional file path information
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
388 $temp =~ s/$/_qual_trimmed.fastq/;
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
389 open (TEMP,'>',$output_dir.$temp) or die "Can't write to '$temp': $!";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
390
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
391 warn " >>> Now performing adaptive quality trimming with a Phred-score cutoff of: $cutoff <<<\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
392 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
393
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
394 open (QUAL,"$path_to_cutadapt -f fastq -e $error_rate -q $cutoff -a X $filename |") or die "Can't open pipe to Cutadapt: $!";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
395
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
396 my $qual_count = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
397
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
398 while (1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
399 my $l1 = <QUAL>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
400 my $seq = <QUAL>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
401 my $l3 = <QUAL>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
402 my $qual = <QUAL>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
403 last unless (defined $qual);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
404
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
405 $qual_count++;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
406 if ($qual_count%10000000 == 0){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
407 warn "$qual_count sequences processed\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
408 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
409 print TEMP "$l1$seq$l3$qual";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
410 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
411
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
412 warn "\n >>> Quality trimming completed <<<\n$qual_count sequences processed in total\n\n";
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
413 close QUAL or die "Unable to close QUAL filehandle: $!\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
414 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
415
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
416 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
417 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
418
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
419
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
420 if ($output_filename =~ /\.fastq$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
421 $output_filename =~ s/\.fastq$/_trimmed.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
422 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
423 elsif ($output_filename =~ /\.fastq\.gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
424 $output_filename =~ s/\.fastq\.gz$/_trimmed.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
425 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
426 elsif ($output_filename =~ /\.fq$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
427 $output_filename =~ s/\.fq$/_trimmed.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
428 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
429 elsif ($output_filename =~ /\.fq\.gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
430 $output_filename =~ s/\.fq\.gz$/_trimmed.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
431 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
432 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
433 $output_filename =~ s/$/_trimmed.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
434 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
435
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
436 if ($gzip or $filename =~ /\.gz$/){
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
437 if ($dont_gzip){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
438 open (OUT,'>',$output_dir.$output_filename) or die "Can't open '$output_filename': $!\n"; # don't need to gzip intermediate file
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
439 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
440 else{
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
441 ### 6 Jan 2014: had a request to also gzip intermediate files to save disk space
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
442 # if ($validate){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
443 # open (OUT,'>',$output_dir.$output_filename) or die "Can't open '$output_filename': $!\n"; # don't need to gzip intermediate file
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
444 # }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
445 $output_filename .= '.gz';
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
446 open (OUT,"| gzip -c - > ${output_dir}${output_filename}") or die "Can't write to '$output_filename': $!\n";
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
447 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
448 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
449 else{
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
450 open (OUT,'>',$output_dir.$output_filename) or die "Can't open '$output_filename': $!\n";
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
451 }
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
452 warn "Writing final adapter and quality trimmed output to $output_filename\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
453
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
454 my $count = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
455 my $too_short = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
456 my $quality_trimmed = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
457 my $rrbs_trimmed = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
458 my $rrbs_trimmed_start = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
459 my $CAA = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
460 my $CGA = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
461
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
462 my $pid;
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
463
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
464 if ($rrbs and $cutoff != 0){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
465
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
466 ### optionally using 2 different adapters for read 1 and read 2
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
467 if ($validate and $a2){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
468 ### Figure out whether current file counts as read 1 or read 2 of paired-end files
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
469 if ( scalar(@filenames)%2 == 0){ # this is read 1 of a pair
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
470 warn "\n >>> Now performing adapter trimming for the adapter sequence: '$adapter' from file $temp <<< \n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
471 sleep (3);
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
472 $pid = open3 (\*WRITER, \*TRIM, \*ERROR,"$path_to_cutadapt -f fastq -e $error_rate -O $stringency -a $adapter $output_dir$temp") or die "Failed to launch Cutadapt: $!\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
473 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
474 else{ # this is read 2 of a pair
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
475 warn "\n >>> Now performing adapter trimming for the adapter sequence: '$a2' from file $temp <<< \n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
476 sleep (3);
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
477 $pid = open3 (\*WRITER, \*TRIM, \*ERROR,"$path_to_cutadapt -f fastq -e $error_rate -O $stringency -a $a2 $output_dir$temp") or die "Failed to launch Cutadapt: $!\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
478 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
479 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
480 ### Using the same adapter for both read 1 and read 2
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
481 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
482 warn "\n >>> Now performing adapter trimming for the adapter sequence: '$adapter' from file $temp <<< \n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
483 sleep (3);
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
484 $pid = open3 (\*WRITER, \*TRIM, \*ERROR,"$path_to_cutadapt -f fastq -e $error_rate -O $stringency -a $adapter $output_dir$temp") or die "Failed to launch Cutadapt: $!\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
485 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
486
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
487 close WRITER or die $!; # not needed
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
488
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
489 open (QUAL,"$output_dir$temp") or die $!; # quality trimmed file
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
490
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
491 if ($filename =~ /\.gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
492 open (IN,"zcat $filename |") or die $!; # original, untrimmed file
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
493 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
494 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
495 open (IN,$filename) or die $!; # original, untrimmed file
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
496 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
497
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
498 while (1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
499
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
500 # we can process the output from Cutadapt and the original input 1 by 1 to decide if the adapter has been removed or not
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
501 my $l1 = <TRIM>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
502 my $seq = <TRIM>; # adapter trimmed sequence
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
503 my $l3 = <TRIM>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
504 my $qual = <TRIM>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
505
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
506 $_ = <IN>; # irrelevant
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
507 my $original_seq = <IN>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
508 $_ = <IN>; # irrelevant
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
509 $_ = <IN>; # irrelevant
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
510
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
511 $_ = <QUAL>; # irrelevant
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
512 my $qual_trimmed_seq = <QUAL>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
513 $_ = <QUAL>; # irrelevant
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
514 my $qual_trimmed_qual = <QUAL>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
515
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
516 last unless (defined $qual and defined $qual_trimmed_qual); # could be empty strings
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
517
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
518 $count++;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
519 if ($count%10000000 == 0){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
520 warn "$count sequences processed\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
521 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
522
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
523 chomp $seq;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
524 chomp $qual;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
525 chomp $qual_trimmed_seq;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
526 chomp $original_seq;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
527
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
528 my $quality_trimmed_seq_length = length $qual_trimmed_seq;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
529
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
530 if (length $original_seq > length $qual_trimmed_seq){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
531 ++$quality_trimmed;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
532 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
533
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
534 my $nd = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
535
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
536 ### NON-DIRECTIONAL RRBS
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
537 if ($non_directional){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
538 if (length$seq > 2){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
539 if ($seq =~ /^CAA/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
540 ++$CAA;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
541 $seq = substr ($seq,2,length($seq)-2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
542 $qual = substr ($qual,2,length($qual)-2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
543 ++$rrbs_trimmed_start;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
544 $nd = 1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
545 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
546 elsif ($seq =~ /^CGA/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
547 $seq = substr ($seq,2,length($seq)-2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
548 $qual = substr ($qual,2,length($qual)-2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
549 ++$CGA;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
550 ++$rrbs_trimmed_start;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
551 $nd = 1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
552 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
553 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
554 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
555
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
556 ### directional read
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
557 unless ($nd == 1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
558 if (length $seq >= 2 and length$seq < $quality_trimmed_seq_length){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
559 $seq = substr ($seq,0,length($seq)-2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
560 $qual = substr ($qual,0,length($qual)-2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
561 ++$rrbs_trimmed;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
562 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
563 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
564
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
565 ### Shortening all sequences by 1 bp on the 3' end
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
566 if ($trim){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
567 $seq = substr($seq,0,length($seq)-1);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
568 $qual = substr($qual,0,length($qual)-1);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
569 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
570
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
571 ### PRINTING (POTENTIALLY TRIMMED) SEQUENCE
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
572 if ($validate){ # printing the sequence without performing a length check (this is performed for the read pair separately later)
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
573 print OUT "$l1$seq\n$l3$qual\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
574 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
575 else{ # single end
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
576
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
577 if ($clip_r1){
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
578 if (length $seq > $clip_r1){ # sequences that are already too short won't be clipped again
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
579 $seq = substr($seq,$clip_r1); # starting after the sequences to be trimmed until the end of the sequence
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
580 $qual = substr($qual,$clip_r1);
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
581 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
582 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
583
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
584 if ($three_prime_clip_r1){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
585
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
586 if (length $seq > $three_prime_clip_r1){ # sequences that are already too short won't be clipped again
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
587 # warn "seq/qual before/after trimming:\n$seq\n$qual\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
588 $seq = substr($seq,0,(length($seq) - $three_prime_clip_r1)); # starting after the sequences to be trimmed until the end of the sequence
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
589 $qual = substr($qual,0,(length($qual) - $three_prime_clip_r1 ));
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
590 # warn "$seq\n$qual\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
591 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
592
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
593 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
594
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
595 if (length $seq < $length_cutoff){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
596 ++$too_short;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
597 next;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
598 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
599 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
600 print OUT "$l1$seq\n$l3$qual\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
601 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
602 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
603 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
604
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
605 print REPORT "\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
606 while (<ERROR>){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
607 warn $_;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
608 print REPORT $_;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
609 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
610
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
611 close IN or die "Unable to close IN filehandle: $!";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
612 close QUAL or die "Unable to close QUAL filehandle: $!";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
613 close TRIM or die "Unable to close TRIM filehandle: $!";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
614 close OUT or die "Unable to close OUT filehandle: $!";
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
615
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
616 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
617 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
618
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
619 ### optionally using 2 different adapters for read 1 and read 2
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
620 if ($validate and $a2){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
621 ### Figure out whether current file counts as read 1 or read 2 of paired-end files
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
622 if ( scalar(@filenames)%2 == 0){ # this is read 1 of a pair
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
623 warn "\n >>> Now performing quality (cutoff $cutoff) and adapter trimming in a single pass for the adapter sequence: '$adapter' from file $filename <<< \n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
624 sleep (3);
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
625 $pid = open3 (\*WRITER, \*TRIM, \*ERROR, "$path_to_cutadapt -f fastq -e $error_rate -q $cutoff -O $stringency -a $adapter $filename") or die "Failed to launch Cutadapt: $!";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
626 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
627 else{ # this is read 2 of a pair
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
628 warn "\n >>> Now performing quality (cutoff $cutoff) and adapter trimming in a single pass for the adapter sequence: '$a2' from file $filename <<< \n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
629 sleep (3);
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
630 $pid = open3 (\*WRITER, \*TRIM, \*ERROR, "$path_to_cutadapt -f fastq -e $error_rate -q $cutoff -O $stringency -a $a2 $filename") or die "Failed to launch Cutadapt: $!";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
631 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
632 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
633 ### Using the same adapter for both read 1 and read 2
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
634 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
635 warn "\n >>> Now performing quality (cutoff $cutoff) and adapter trimming in a single pass for the adapter sequence: '$adapter' from file $filename <<< \n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
636 sleep (3);
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
637 $pid = open3 (\*WRITER, \*TRIM, \*ERROR, "$path_to_cutadapt -f fastq -e $error_rate -q $cutoff -O $stringency -a $adapter $filename") or die "Failed to launch Cutadapt: $!";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
638 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
639
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
640 close WRITER or die $!; # not needed
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
641
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
642 while (1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
643
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
644 my $l1 = <TRIM>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
645 my $seq = <TRIM>; # quality and/or adapter trimmed sequence
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
646 my $l3 = <TRIM>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
647 my $qual = <TRIM>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
648 # print "$l1$seq\n$l3$qual\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
649 last unless (defined $qual); # could be an empty string
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
650
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
651 $count++;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
652 if ($count%10000000 == 0){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
653 warn "$count sequences processed\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
654 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
655
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
656 chomp $seq;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
657 chomp $qual;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
658
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
659 ### Shortening all sequences by 1 bp on the 3' end
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
660 if ($trim){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
661 $seq = substr($seq,0,length($seq)-1);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
662 $qual = substr($qual,0,length($qual)-1);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
663 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
664
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
665 ### PRINTING (POTENTIALLY TRIMMED) SEQUENCE
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
666 if ($validate){ # printing the sequence without performing a length check (this is performed for the read pair separately later)
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
667 print OUT "$l1$seq\n$l3$qual\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
668 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
669 else{ # single end
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
670
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
671 if ($clip_r1){
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
672 if (length $seq > $clip_r1){ # sequences that are already too short won't be clipped again
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
673 $seq = substr($seq,$clip_r1); # starting after the sequences to be trimmed until the end of the sequence
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
674 $qual = substr($qual,$clip_r1);
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
675 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
676 }
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
677
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
678 if ($three_prime_clip_r1){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
679 if (length $seq > $three_prime_clip_r1){ # sequences that are already too short won't be clipped again
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
680 # warn "seq/qual before/after trimming:\n$seq\n$qual\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
681 $seq = substr($seq,0,(length($seq) - $three_prime_clip_r1)); # starting after the sequences to be trimmed until the end of the sequence
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
682 $qual = substr($qual,0,(length($qual) - $three_prime_clip_r1));
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
683 # warn "$seq\n$qual\n";sleep(1);
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
684 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
685 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
686
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
687 if (length $seq < $length_cutoff){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
688 ++$too_short;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
689 next;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
690 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
691 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
692 print OUT "$l1$seq\n$l3$qual\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
693 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
694 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
695 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
696
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
697 print REPORT "\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
698 while (<ERROR>){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
699 warn $_;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
700 print REPORT $_;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
701 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
702
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
703 close TRIM or die "Unable to close TRIM filehandle: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
704 close ERROR or die "Unable to close ERROR filehandle: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
705 close OUT or die "Unable to close OUT filehandle: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
706
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
707 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
708
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
709
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
710 if ($rrbs){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
711 unless ($keep){ # keeping the quality trimmed intermediate file for RRBS files
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
712
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
713 # deleting temporary quality trimmed file
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
714 my $deleted = unlink "$output_dir$temp";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
715
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
716 if ($deleted){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
717 warn "Successfully deleted temporary file $temp\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
718 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
719 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
720 warn "Could not delete temporary file $temp";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
721 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
722 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
723 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
724
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
725 ### Wait and reap the child process (Cutadapt) so that it doesn't become a zombie process
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
726 waitpid $pid, 0;
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
727 unless ($? == 0){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
728 die "\n\nCutadapt terminated with exit signal: '$?'.\nTerminating Trim Galore run, please check error message(s) to get an idea what went wrong...\n\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
729 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
730
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
731 warn "\nRUN STATISTICS FOR INPUT FILE: $filename\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
732 print REPORT "\nRUN STATISTICS FOR INPUT FILE: $filename\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
733
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
734 warn "="x 45,"\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
735 print REPORT "="x 45,"\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
736
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
737 warn "$count sequences processed in total\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
738 print REPORT "$count sequences processed in total\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
739
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
740 ### only reporting this separately if quality and adapter trimming were performed separately
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
741 if ($rrbs){
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
742 my $percentage_shortened;
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
743 if ($count){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
744 $percentage_shortened = sprintf ("%.1f",$quality_trimmed/$count*100);
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
745 warn "Sequences were truncated to a varying degree because of deteriorating qualities (Phred score quality cutoff: $cutoff):\t$quality_trimmed ($percentage_shortened%)\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
746 print REPORT "Sequences were truncated to a varying degree because of deteriorating qualities (Phred score quality cutoff: $cutoff):\t$quality_trimmed ($percentage_shortened%)\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
747 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
748 else{
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
749 warn "Unable to determine percentage of reads that were shortened because 0 lines were processed\n\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
750 print REPORT "Unable to determine percentage of reads that were shortened because 0 lines were processed\n\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
751 }
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
752 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
753
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
754 my $percentage_too_short;
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
755 if ($count){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
756 $percentage_too_short = sprintf ("%.1f",$too_short/$count*100);
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
757 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
758 else{
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
759 $percentage_too_short = 'N/A';
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
760 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
761
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
762 if ($validate){ ### only for paired-end files
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
763 warn "The length threshold of paired-end sequences gets evaluated later on (in the validation step)\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
764 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
765 else{ ### Single-end file
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
766 warn "Sequences removed because they became shorter than the length cutoff of $length_cutoff bp:\t$too_short ($percentage_too_short%)\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
767 print REPORT "Sequences removed because they became shorter than the length cutoff of $length_cutoff bp:\t$too_short ($percentage_too_short%)\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
768 }
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
769
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
770 if ($rrbs){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
771 my $percentage_rrbs_trimmed = sprintf ("%.1f",$rrbs_trimmed/$count*100);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
772 warn "RRBS reads trimmed by additional 2 bp when adapter contamination was detected:\t$rrbs_trimmed ($percentage_rrbs_trimmed%)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
773 print REPORT "RRBS reads trimmed by additional 2 bp when adapter contamination was detected:\t$rrbs_trimmed ($percentage_rrbs_trimmed%)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
774 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
775
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
776 if ($non_directional){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
777 my $percentage_rrbs_trimmed_at_start = sprintf ("%.1f",$rrbs_trimmed_start/$count*100);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
778 warn "RRBS reads trimmed by 2 bp at the start when read started with CAA ($CAA) or CGA ($CGA) in total:\t$rrbs_trimmed_start ($percentage_rrbs_trimmed_at_start%)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
779 print REPORT "RRBS reads trimmed by 2 bp at the start when read started with CAA ($CAA) or CGA ($CGA) in total:\t$rrbs_trimmed_start ($percentage_rrbs_trimmed_at_start%)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
780 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
781
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
782 warn "\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
783 print REPORT "\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
784
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
785 ### RUNNING FASTQC unless we are dealing with paired-end files
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
786 unless($validate){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
787 if ($fastqc){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
788 warn "\n >>> Now running FastQC on the data <<<\n\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
789 sleep (5);
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
790 if ($fastqc_args){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
791 system ("$path_to_fastqc $fastqc_args $output_dir$output_filename");
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
792 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
793 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
794 system ("$path_to_fastqc $output_dir$output_filename");
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
795 }
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
796 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
797 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
798
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
799 ### VALIDATE PAIRED-END FILES
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
800 if ($validate){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
801
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
802 ### Figure out whether current file counts as read 1 or read 2 of paired-end files
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
803
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
804 if ( scalar(@filenames)%2 == 0){ # this is read 1 of a pair
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
805 $file_1 = $output_filename;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
806 shift @filenames;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
807 # warn "This is read 1: $file_1\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
808 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
809 else{ # this is read 2 of a pair
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
810 $file_2 = $output_filename;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
811 shift @filenames;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
812 # warn "This is read 2: $file_2\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
813 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
814
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
815 if ($file_1 and $file_2){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
816 warn "Validate paired-end files $file_1 and $file_2\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
817 sleep (1);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
818
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
819 my ($val_1,$val_2,$un_1,$un_2) = validate_paired_end_files($file_1,$file_2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
820
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
821 ### RUNNING FASTQC
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
822 if ($fastqc){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
823
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
824 warn "\n >>> Now running FastQC on the validated data $val_1<<<\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
825 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
826
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
827 if ($fastqc_args){
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
828 system ("$path_to_fastqc $fastqc_args $output_dir$val_1");
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
829 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
830 else{
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
831 system ("$path_to_fastqc $output_dir$val_1");
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
832 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
833
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
834 warn "\n >>> Now running FastQC on the validated data $val_2<<<\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
835 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
836
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
837 if ($fastqc_args){
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
838 system ("$path_to_fastqc $fastqc_args $output_dir$val_2");
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
839 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
840 else{
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
841 system ("$path_to_fastqc $output_dir$val_2");
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
842 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
843
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
844 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
845
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
846 warn "Deleting both intermediate output files $file_1 and $file_2\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
847 unlink "$output_dir$file_1";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
848 unlink "$output_dir$file_2";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
849
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
850 warn "\n",'='x100,"\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
851 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
852
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
853 $file_1 = undef; # setting file_1 and file_2 to undef once validation is completed
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
854 $file_2 = undef;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
855 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
856 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
857
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
858 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
859
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
860 sub validate_paired_end_files{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
861
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
862 my $file_1 = shift;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
863 my $file_2 = shift;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
864
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
865 warn "file_1: $file_1, file_2: $file_2\n\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
866
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
867 if ($file_1 =~ /\.gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
868 open (IN1,"zcat $output_dir$file_1 |") or die "Couldn't read from file $file_1: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
869 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
870 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
871 open (IN1, "$output_dir$file_1") or die "Couldn't read from file $file_1: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
872 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
873
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
874 if ($file_2 =~ /\.gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
875 open (IN2,"zcat $output_dir$file_2 |") or die "Couldn't read from file $file_2: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
876 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
877 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
878 open (IN2, "$output_dir$file_2") or die "Couldn't read from file $file_2: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
879 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
880
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
881 warn "\n>>>>> Now validing the length of the 2 paired-end infiles: $file_1 and $file_2 <<<<<\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
882 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
883
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
884 my $out_1 = $file_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
885 my $out_2 = $file_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
886
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
887 if ($out_1 =~ /gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
888 $out_1 =~ s/trimmed\.fq\.gz$/val_1.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
889 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
890 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
891 $out_1 =~ s/trimmed\.fq$/val_1.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
892 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
893
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
894 if ($out_2 =~ /gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
895 $out_2 =~ s/trimmed\.fq\.gz$/val_2.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
896 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
897 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
898 $out_2 =~ s/trimmed\.fq$/val_2.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
899 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
900
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
901 if ($gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
902 if ($dont_gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
903 open (R1,'>',$output_dir.$out_1) or die "Couldn't write to $out_1 $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
904 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
905 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
906 $out_1 .= '.gz';
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
907 open (R1,"| gzip -c - > ${output_dir}${out_1}") or die "Can't write to $out_1: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
908 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
909 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
910 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
911 open (R1,'>',$output_dir.$out_1) or die "Couldn't write to $out_1 $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
912 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
913
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
914 if ($gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
915 if ($dont_gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
916 open (R2,'>',$output_dir.$out_2) or die "Couldn't write to $out_2 $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
917 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
918 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
919 $out_2 .= '.gz';
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
920 open (R2,"| gzip -c - > ${output_dir}${out_2}") or die "Can't write to $out_2: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
921 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
922 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
923 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
924 open (R2,'>',$output_dir.$out_2) or die "Couldn't write to $out_2 $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
925 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
926
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
927 warn "Writing validated paired-end read 1 reads to $out_1\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
928 warn "Writing validated paired-end read 2 reads to $out_2\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
929
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
930 my $unpaired_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
931 my $unpaired_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
932
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
933 if ($retain){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
934
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
935 $unpaired_1 = $file_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
936 $unpaired_2 = $file_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
937
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
938 if ($unpaired_1 =~ /gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
939 $unpaired_1 =~ s/trimmed\.fq\.gz$/unpaired_1.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
940 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
941 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
942 $unpaired_1 =~ s/trimmed\.fq$/unpaired_1.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
943 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
944
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
945 if ($unpaired_2 =~ /gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
946 $unpaired_2 =~ s/trimmed\.fq\.gz$/unpaired_2.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
947 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
948 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
949 $unpaired_2 =~ s/trimmed\.fq$/unpaired_2.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
950 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
951
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
952 if ($gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
953 if ($dont_gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
954 open (UNPAIRED1,'>',$output_dir.$unpaired_1) or die "Couldn't write to $unpaired_1: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
955 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
956 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
957 $unpaired_1 .= '.gz';
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
958 open (UNPAIRED1,"| gzip -c - > ${output_dir}${unpaired_1}") or die "Can't write to $unpaired_1: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
959 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
960 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
961 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
962 open (UNPAIRED1,'>',$output_dir.$unpaired_1) or die "Couldn't write to $unpaired_1: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
963 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
964
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
965 if ($gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
966 if ($dont_gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
967 open (UNPAIRED2,'>',$output_dir.$unpaired_2) or die "Couldn't write to $unpaired_2: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
968 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
969 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
970 $unpaired_2 .= '.gz';
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
971 open (UNPAIRED2,"| gzip -c - > ${output_dir}${unpaired_2}") or die "Can't write to $unpaired_2: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
972 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
973 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
974 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
975 open (UNPAIRED2,'>',$output_dir.$unpaired_2) or die "Couldn't write to $unpaired_2: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
976 }
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
977
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
978 warn "Writing unpaired read 1 reads to $unpaired_1\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
979 warn "Writing unpaired read 2 reads to $unpaired_2\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
980 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
981
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
982 my $sequence_pairs_removed = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
983 my $read_1_printed = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
984 my $read_2_printed = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
985
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
986 my $count = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
987
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
988 while (1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
989 my $id_1 = <IN1>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
990 my $seq_1 = <IN1>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
991 my $l3_1 = <IN1>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
992 my $qual_1 = <IN1>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
993 last unless ($id_1 and $seq_1 and $l3_1 and $qual_1);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
994
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
995 my $id_2 = <IN2>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
996 my $seq_2 = <IN2>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
997 my $l3_2 = <IN2>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
998 my $qual_2 = <IN2>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
999 last unless ($id_2 and $seq_2 and $l3_2 and $qual_2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1000
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1001 ++$count;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1002
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1003
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1004 ## small check if the sequence files appear to be FastQ files
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1005 if ($count == 1){ # performed just once
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1006 if ($id_1 !~ /^\@/ or $l3_1 !~ /^\+/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1007 die "Input file doesn't seem to be in FastQ format at sequence $count\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1008 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1009 if ($id_2 !~ /^\@/ or $l3_2 !~ /^\+/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1010 die "Input file doesn't seem to be in FastQ format at sequence $count\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1011 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1012 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1013
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1014 chomp $seq_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1015 chomp $seq_2;
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1016 chomp $qual_1;
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1017 chomp $qual_2;
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1018
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1019 if ($clip_r1){
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1020 if (length $seq_1 > $clip_r1){ # sequences that are already too short won't be trimmed again
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1021 $seq_1 = substr($seq_1,$clip_r1); # starting after the sequences to be trimmed until the end of the sequence
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1022 $qual_1 = substr($qual_1,$clip_r1);
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1023 }
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1024 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1025 if ($clip_r2){
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1026 if (length $seq_2 > $clip_r2){ # sequences that are already too short won't be trimmed again
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1027 $seq_2 = substr($seq_2,$clip_r2); # starting after the sequences to be trimmed until the end of the sequence
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1028 $qual_2 = substr($qual_2,$clip_r2);
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1029 }
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1030 }
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1031
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1032 if ($three_prime_clip_r1){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1033 if (length $seq_1 > $three_prime_clip_r1){ # sequences that are already too short won't be clipped again
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1034 $seq_1 = substr($seq_1,0,(length($seq_1) - $three_prime_clip_r1)); # starting after the sequences to be trimmed until the end of the sequence
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1035 $qual_1 = substr($qual_1,0,(length($qual_1) - $three_prime_clip_r1));
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1036 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1037 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1038 if ($three_prime_clip_r2){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1039 if (length $seq_2 > $three_prime_clip_r2){ # sequences that are already too short won't be clipped again
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1040 $seq_2 = substr($seq_2,0,(length($seq_2) - $three_prime_clip_r2)); # starting after the sequences to be trimmed until the end of the sequence
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1041 $qual_2 = substr($qual_2,0,(length($qual_2) - $three_prime_clip_r2));
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1042 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1043 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1044
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1045
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1046
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1047 ### making sure that the reads do have a sensible length
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1048 if ( (length($seq_1) < $length_cutoff) or (length($seq_2) < $length_cutoff) ){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1049 ++$sequence_pairs_removed;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1050 if ($retain){ # writing out single-end reads if they are longer than the cutoff
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1051
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1052 if ( length($seq_1) >= $length_read_1){ # read 1 is long enough
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1053 print UNPAIRED1 $id_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1054 print UNPAIRED1 "$seq_1\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1055 print UNPAIRED1 $l3_1;
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1056 print UNPAIRED1 "$qual_1\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1057 ++$read_1_printed;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1058 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1059
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1060 if ( length($seq_2) >= $length_read_2){ # read 2 is long enough
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1061 print UNPAIRED2 $id_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1062 print UNPAIRED2 "$seq_2\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1063 print UNPAIRED2 $l3_2;
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1064 print UNPAIRED2 "$qual_2\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1065 ++$read_2_printed;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1066 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1067
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1068 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1069 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1070 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1071 print R1 $id_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1072 print R1 "$seq_1\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1073 print R1 $l3_1;
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1074 print R1 "$qual_1\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1075
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1076 print R2 $id_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1077 print R2 "$seq_2\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1078 print R2 $l3_2;
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1079 print R2 "$qual_2\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1080 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1081
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1082 }
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1083
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1084
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1085 my $percentage;
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1086
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1087 if ($count){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1088 $percentage = sprintf("%.2f",$sequence_pairs_removed/$count*100);
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1089 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1090 else{
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1091 $percentage = 'N/A';
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1092 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1093
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1094 warn "Total number of sequences analysed: $count\n\n";
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1095 warn "Number of sequence pairs removed because at least one read was shorter than the length cutoff ($length_cutoff bp): $sequence_pairs_removed ($percentage%)\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1096
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1097 print REPORT "Total number of sequences analysed for the sequence pair length validation: $count\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1098 print REPORT "Number of sequence pairs removed because at least one read was shorter than the length cutoff ($length_cutoff bp): $sequence_pairs_removed ($percentage%)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1099
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1100 if ($keep){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1101 warn "Number of unpaired read 1 reads printed: $read_1_printed\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1102 warn "Number of unpaired read 2 reads printed: $read_2_printed\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1103 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1104
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1105 close R1 or die $!;
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1106 close R2 or die $!;
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1107
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1108 if ($retain){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1109 close UNPAIRED1 or die $!;
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1110 close UNPAIRED2 or die $!;
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1111 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1112
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1113 warn "\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1114 if ($retain){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1115 return ($out_1,$out_2,$unpaired_1,$unpaired_2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1116 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1117 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1118 return ($out_1,$out_2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1119 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1120 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1121
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1122
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1123 sub file_sanity_check{
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1124
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1125 my $file = shift;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1126 open (SANITY,$file) or die "Failed to read from file '$file' to perform sanity check\n";
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1127
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1128 # just processing a single FastQ entry
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1129 my $id = <SANITY>;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1130 my $seq = <SANITY>;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1131 my $three = <SANITY>;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1132 my $qual = <SANITY>;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1133
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1134 unless ($id and $seq and $three and $qual){
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1135 warn "Input file '$file' seems to be completely empty. Consider respecifying!\n\n";
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1136 }
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1137 return;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1138 chomp $seq;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1139
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1140 # testing if the file is a colorspace file in which case we bail
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1141 if ($seq =~ /\d+/){
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1142 die "File seems to be in SOLiD colorspace format which is not supported by Trim Galore (sequence is: '$seq')! Please use Cutadapt on colorspace files separately and check its documentation!\n\n";
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1143 }
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1144
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1145 close SANITY;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1146 }
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1147
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1148
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1149 sub process_commandline{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1150 my $help;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1151 my $quality;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1152 my $adapter;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1153 my $adapter2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1154 my $stringency;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1155 my $report;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1156 my $version;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1157 my $rrbs;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1158 my $length_cutoff;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1159 my $keep;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1160 my $fastqc;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1161 my $non_directional;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1162 my $phred33;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1163 my $phred64;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1164 my $fastqc_args;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1165 my $trim;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1166 my $gzip;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1167 my $validate;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1168 my $retain;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1169 my $length_read_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1170 my $length_read_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1171 my $error_rate;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1172 my $output_dir;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1173 my $no_report_file;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1174 my $suppress_warn;
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1175 my $dont_gzip;
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1176 my $clip_r1;
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1177 my $clip_r2;
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1178 my $three_prime_clip_r1;
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1179 my $three_prime_clip_r2;
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1180 my $nextera;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1181 my $small_rna;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1182 my $illumina;
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1183 my $path_to_cutadapt;
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1184
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1185 my $command_line = GetOptions ('help|man' => \$help,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1186 'q|quality=i' => \$quality,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1187 'a|adapter=s' => \$adapter,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1188 'a2|adapter2=s' => \$adapter2,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1189 'report' => \$report,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1190 'version' => \$version,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1191 'stringency=i' => \$stringency,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1192 'fastqc' => \$fastqc,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1193 'RRBS' => \$rrbs,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1194 'keep' => \$keep,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1195 'length=i' => \$length_cutoff,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1196 'non_directional' => \$non_directional,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1197 'phred33' => \$phred33,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1198 'phred64' => \$phred64,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1199 'fastqc_args=s' => \$fastqc_args,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1200 'trim1' => \$trim,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1201 'gzip' => \$gzip,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1202 'paired_end' => \$validate,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1203 'retain_unpaired' => \$retain,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1204 'length_1|r1=i' => \$length_read_1,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1205 'length_2|r2=i' => \$length_read_2,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1206 'e|error_rate=s' => \$error_rate,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1207 'o|output_dir=s' => \$output_dir,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1208 'no_report_file' => \$no_report_file,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1209 'suppress_warn' => \$suppress_warn,
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1210 'dont_gzip' => \$dont_gzip,
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1211 'clip_R1=i' => \$clip_r1,
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1212 'clip_R2=i' => \$clip_r2,
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1213 'three_prime_clip_R1=i' => \$three_prime_clip_r1,
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1214 'three_prime_clip_R2=i' => \$three_prime_clip_r2,
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1215 'illumina' => \$illumina,
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1216 'nextera' => \$nextera,
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1217 'small_rna' => \$small_rna,
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1218 'path_to_cutadapt=s' => \$path_to_cutadapt,
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1219 );
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1220
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1221 ### EXIT ON ERROR if there were errors with any of the supplied options
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1222 unless ($command_line){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1223 die "Please respecify command line options\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1224 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1225
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1226 ### HELPFILE
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1227 if ($help){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1228 print_helpfile();
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1229 exit;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1230 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1231
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1232
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1233
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1234
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1235
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1236 if ($version){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1237 print << "VERSION";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1238
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1239 Quality-/Adapter-/RRBS-Trimming
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1240 (powered by Cutadapt)
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1241 version $trimmer_version
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1242
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1243 Last update: 06 05 2015
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1244
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1245 VERSION
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1246 exit;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1247 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1248
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1249 ### RRBS
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1250 unless ($rrbs){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1251 $rrbs = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1252 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1253
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1254 ### SUPRESS WARNINGS
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1255 if (defined $suppress_warn){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1256 $DOWARN = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1257 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1258
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1259 ### QUALITY SCORES
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1260 my $phred_encoding;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1261 if ($phred33){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1262 if ($phred64){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1263 die "Please specify only a single quality encoding type (--phred33 or --phred64)\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1264 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1265 $phred_encoding = 33;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1266 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1267 elsif ($phred64){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1268 $phred_encoding = 64;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1269 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1270 unless ($phred33 or $phred64){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1271 warn "No quality encoding type selected. Assuming that the data provided uses Sanger encoded Phred scores (default)\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1272 $phred_encoding = 33;
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1273 sleep (1);
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1274 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1275
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1276 ### NON-DIRECTIONAL RRBS
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1277 if ($non_directional){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1278 unless ($rrbs){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1279 die "Option '--non_directional' requires '--rrbs' to be specified as well. Please re-specify!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1280 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1281 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1282 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1283 $non_directional = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1284 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1285
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1286 if ($fastqc_args){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1287 $fastqc = 1; # specifying fastqc extra arguments automatically means that FastQC will be executed
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1288 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1289 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1290 $fastqc_args = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1291 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1292
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1293 ### CUSTOM ERROR RATE
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1294 if (defined $error_rate){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1295 # make sure that the error rate is between 0 and 1
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1296 unless ($error_rate >= 0 and $error_rate <= 1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1297 die "Please specify an error rate between 0 and 1 (the default is 0.1)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1298 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1299 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1300 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1301 $error_rate = 0.1; # (default)
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1302 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1303
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1304 if ($nextera and $small_rna or $nextera and $illumina or $illumina and $small_rna ){
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1305 die "You can't use several different adapter types at the same time. Make your choice or consider using -a and -a2\n\n";
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1306 }
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1307
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1308 if (defined $adapter){
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1309 unless ($adapter =~ /^[ACTGNXactgnx]+$/){
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1310 die "Adapter sequence must contain DNA characters only (A,C,T,G or N)!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1311 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1312 $adapter = uc$adapter;
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1313
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1314 if ($illumina){
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1315 die "You can't supply an adapter sequence AND use the Illumina universal adapter sequence. Make your choice.\n\n";
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1316 }
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1317 if ($nextera){
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1318 die "You can't supply an adapter sequence AND use the Nextera transposase adapter sequence. Make your choice.\n\n";
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1319 }
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1320 if ($small_rna){
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1321 die "You can't supply an adapter sequence AND use the Illumina small RNA adapter sequence. Make your choice.\n\n";
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1322 }
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1323 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1324
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1325 if (defined $adapter2){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1326 unless ($validate){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1327 die "An optional adapter for read 2 of paired-end files requires '--paired' to be specified as well! Please re-specify\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1328 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1329 unless ($adapter2 =~ /^[ACTGNactgn]+$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1330 die "Optional adapter 2 sequence must contain DNA characters only (A,C,T,G or N)!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1331 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1332 $adapter2 = uc$adapter2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1333 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1334
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1335 ### LENGTH CUTOFF
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1336 unless (defined $length_cutoff){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1337 $length_cutoff = 20;
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1338 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1339
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1340 ### files are supposed to be paired-end files
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1341 if ($validate){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1342
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1343 # making sure that an even number of reads has been supplied
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1344 unless ((scalar@ARGV)%2 == 0){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1345 die "Please provide an even number of input files for paired-end FastQ trimming! Aborting ...\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1346 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1347
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1348 ## CUTOFF FOR VALIDATED READ-PAIRS
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1349 if (defined $length_read_1 or defined $length_read_2){
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1350
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1351 unless ($retain){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1352 die "Please specify --keep_unpaired to alter the unpaired single-end read length cut off(s)\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1353 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1354
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1355 if (defined $length_read_1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1356 unless ($length_read_1 >= 15 and $length_read_1 <= 100){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1357 die "Please select a sensible cutoff for when a read pair should be filtered out due to short length (allowed range: 15-100 bp)\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1358 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1359 unless ($length_read_1 > $length_cutoff){
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1360 die "The single-end unpaired read length needs to be longer than the paired-end cut-off value ($length_cutoff bp)\n\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1361 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1362 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1363
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1364 if (defined $length_read_2){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1365 unless ($length_read_2 >= 15 and $length_read_2 <= 100){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1366 die "Please select a sensible cutoff for when a read pair should be filtered out due to short length (allowed range: 15-100 bp)\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1367 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1368 unless ($length_read_2 > $length_cutoff){
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1369 die "The single-end unpaired read length needs to be longer than the paired-end cut-off value ($length_cutoff bp)\n\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1370 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1371 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1372 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1373
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1374 if ($retain){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1375 $length_read_1 = 35 unless (defined $length_read_1);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1376 $length_read_2 = 35 unless (defined $length_read_2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1377 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1378 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1379
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1380 unless ($no_report_file){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1381 $no_report_file = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1382 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1383
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1384 ### OUTPUT DIR PATH
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1385 if ($output_dir){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1386 unless ($output_dir =~ /\/$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1387 $output_dir =~ s/$/\//;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1388 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1389 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1390 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1391 $output_dir = '';
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1392 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1393
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1394 ### Trimming at the 5' end
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1395 if (defined $clip_r2){ # trimming 5' bases of read 1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1396 die "Clipping the 5' end of read 2 is only allowed for paired-end files (--paired)\n" unless ($validate);
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1397 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1398
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1399 if (defined $clip_r1){ # trimming 5' bases of read 1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1400 unless ($clip_r1 > 0 and $clip_r1 < 100){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1401 die "The 5' clipping value for read 1 should have a sensible value (> 0 and < read length)\n\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1402 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1403 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1404
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1405 if (defined $clip_r2){ # trimming 5' bases of read 2
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1406 unless ($clip_r2 > 0 and $clip_r2 < 100){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1407 die "The 5' clipping value for read 2 should have a sensible value (> 0 and < read length)\n\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1408 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1409 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1410
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1411 ### Trimming at the 3' end
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1412 if (defined $three_prime_clip_r1){ # trimming 3' bases of read 1
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1413 unless ($three_prime_clip_r1 > 0 and $three_prime_clip_r1 < 100){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1414 die "The 3' clipping value for read 1 should have a sensible value (> 0 and < read length)\n\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1415 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1416 }
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1417
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1418 if (defined $three_prime_clip_r2){ # trimming 3' bases of read 2
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1419 unless ($three_prime_clip_r2 > 0 and $three_prime_clip_r2 < 100){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1420 die "The 3' clipping value for read 2 should have a sensible value (> 0 and < read length)\n\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1421 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1422 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1423
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1424
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1425 return ($quality,$adapter,$stringency,$rrbs,$length_cutoff,$keep,$fastqc,$non_directional,$phred_encoding,$fastqc_args,$trim,$gzip,$validate,$retain,$length_read_1,$length_read_2,$adapter2,$error_rate,$output_dir,$no_report_file,$dont_gzip,$clip_r1,$clip_r2,$three_prime_clip_r1,$three_prime_clip_r2,$nextera,$small_rna,$path_to_cutadapt,$illumina);
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1426 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1427
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1428
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1429
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1430
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1431 sub print_helpfile{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1432 print << "HELP";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1433
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1434 USAGE:
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1435
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1436 trim_galore [options] <filename(s)>
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1437
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1438
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1439 -h/--help Print this help message and exits.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1440
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1441 -v/--version Print the version information and exits.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1442
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1443 -q/--quality <INT> Trim low-quality ends from reads in addition to adapter removal. For
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1444 RRBS samples, quality trimming will be performed first, and adapter
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1445 trimming is carried in a second round. Other files are quality and adapter
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1446 trimmed in a single pass. The algorithm is the same as the one used by BWA
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1447 (Subtract INT from all qualities; compute partial sums from all indices
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1448 to the end of the sequence; cut sequence at the index at which the sum is
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1449 minimal). Default Phred score: 20.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1450
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1451 --phred33 Instructs Cutadapt to use ASCII+33 quality scores as Phred scores
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1452 (Sanger/Illumina 1.9+ encoding) for quality trimming. Default: ON.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1453
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1454 --phred64 Instructs Cutadapt to use ASCII+64 quality scores as Phred scores
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1455 (Illumina 1.5 encoding) for quality trimming.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1456
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1457 --fastqc Run FastQC in the default mode on the FastQ file once trimming is complete.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1458
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1459 --fastqc_args "<ARGS>" Passes extra arguments to FastQC. If more than one argument is to be passed
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1460 to FastQC they must be in the form "arg1 arg2 etc.". An example would be:
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1461 --fastqc_args "--nogroup --outdir /home/". Passing extra arguments will
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1462 automatically invoke FastQC, so --fastqc does not have to be specified
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1463 separately.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1464
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1465 -a/--adapter <STRING> Adapter sequence to be trimmed. If not specified explicitly, Trim Galore will
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1466 try to auto-detect whether the Illumina universal, Nextera transposase or Illumina
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1467 small RNA adapter sequence was used. Also see '--illumina', '--nextera' and
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1468 '--small_rna'. If no adapter can be detected within the first 1 million sequences
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1469 of the first file specified Trim Galore defaults to '--illumina'.
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1470
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1471 -a2/--adapter2 <STRING> Optional adapter sequence to be trimmed off read 2 of paired-end files. This
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1472 option requires '--paired' to be specified as well.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1473
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1474 --illumina Adapter sequence to be trimmed is the first 13bp of the Illumina universal adapter
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1475 'AGATCGGAAGAGC' instead of the default auto-detection of adapter sequence.
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1476
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1477 --nextera Adapter sequence to be trimmed is the first 12bp of the Nextera adapter
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1478 'CTGTCTCTTATA' instead of the default auto-detection of adapter sequence.
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1479
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1480 --small_rna Adapter sequence to be trimmed is the first 12bp of the Illumina Small RNA Adapter
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1481 'ATGGAATTCTCG' instead of the default auto-detection of adapter sequence.
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1482
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1483
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1484 --stringency <INT> Overlap with adapter sequence required to trim a sequence. Defaults to a
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1485 very stringent setting of 1, i.e. even a single bp of overlapping sequence
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1486 will be trimmed off from the 3' end of any read.
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1487
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1488 -e <ERROR RATE> Maximum allowed error rate (no. of errors divided by the length of the matching
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1489 region) (default: 0.1)
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1490
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1491 --gzip Compress the output file with GZIP. If the input files are GZIP-compressed
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1492 the output files will automatically be GZIP compressed as well. As of v0.2.8 the
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1493 compression will take place on the fly.
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1494
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1495 --dont_gzip Output files won't be compressed with GZIP. This option overrides --gzip.
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1496
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1497 --length <INT> Discard reads that became shorter than length INT because of either
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1498 quality or adapter trimming. A value of '0' effectively disables
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1499 this behaviour. Default: 20 bp.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1500
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1501 For paired-end files, both reads of a read-pair need to be longer than
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1502 <INT> bp to be printed out to validated paired-end files (see option --paired).
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1503 If only one read became too short there is the possibility of keeping such
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1504 unpaired single-end reads (see --retain_unpaired). Default pair-cutoff: 20 bp.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1505
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1506 -o/--output_dir <DIR> If specified all output will be written to this directory instead of the current
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1507 directory.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1508
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1509 --no_report_file If specified no report file will be generated.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1510
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1511 --suppress_warn If specified any output to STDOUT or STDERR will be suppressed.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1512
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1513 --clip_R1 <int> Instructs Trim Galore to remove <int> bp from the 5' end of read 1 (or single-end
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1514 reads). This may be useful if the qualities were very poor, or if there is some
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1515 sort of unwanted bias at the 5' end. Default: OFF.
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1516
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1517 --clip_R2 <int> Instructs Trim Galore to remove <int> bp from the 5' end of read 2 (paired-end reads
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1518 only). This may be useful if the qualities were very poor, or if there is some sort
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1519 of unwanted bias at the 5' end. For paired-end BS-Seq, it is recommended to remove
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1520 the first few bp because the end-repair reaction may introduce a bias towards low
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1521 methylation. Please refer to the M-bias plot section in the Bismark User Guide for
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1522 some examples. Default: OFF.
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1523
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1524 --three_prime_clip_R1 <int> Instructs Trim Galore to remove <int> bp from the 3' end of read 1 (or single-end
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1525 reads) AFTER adapter/quality trimming has been performed. This may remove some unwanted
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1526 bias from the 3' end that is not directly related to adapter sequence or basecall quality.
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1527 Default: OFF.
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1528
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1529 --three_prime_clip_R2 <int> Instructs Trim Galore to remove <int> bp from the 3' end of read 2 AFTER
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1530 adapter/quality trimming has been performed. This may remove some unwanted bias from
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1531 the 3' end that is not directly related to adapter sequence or basecall quality.
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1532 Default: OFF.
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1533
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1534 --path_to_cutadapt </path/to/cutadapt> You may use this option to specify a path to the Cutadapt executable,
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1535 e.g. /my/home/cutadapt-1.7.1/bin/cutadapt. Else it is assumed that Cutadapt is in
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1536 the PATH.
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1537
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1538
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1539 RRBS-specific options (MspI digested material):
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1540
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1541 --rrbs Specifies that the input file was an MspI digested RRBS sample (recognition
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1542 site: CCGG). Sequences which were adapter-trimmed will have a further 2 bp
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1543 removed from their 3' end. This is to avoid that the filled-in C close to the
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1544 second MspI site in a sequence is used for methylation calls. Sequences which
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1545 were merely trimmed because of poor quality will not be shortened further.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1546
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1547 --non_directional Selecting this option for non-directional RRBS libraries will screen
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1548 quality-trimmed sequences for 'CAA' or 'CGA' at the start of the read
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1549 and, if found, removes the first two basepairs. Like with the option
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1550 '--rrbs' this avoids using cytosine positions that were filled-in
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1551 during the end-repair step. '--non_directional' requires '--rrbs' to
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1552 be specified as well.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1553
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1554 --keep Keep the quality trimmed intermediate file. Default: off, which means
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1555 the temporary file is being deleted after adapter trimming. Only has
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1556 an effect for RRBS samples since other FastQ files are not trimmed
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1557 for poor qualities separately.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1558
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1559
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1560 Note for RRBS using MseI:
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1561
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1562 If your DNA material was digested with MseI (recognition motif: TTAA) instead of MspI it is NOT necessary
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1563 to specify --rrbs or --non_directional since virtually all reads should start with the sequence
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1564 'TAA', and this holds true for both directional and non-directional libraries. As the end-repair of 'TAA'
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1565 restricted sites does not involve any cytosines it does not need to be treated especially. Instead, simply
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1566 run Trim Galore! in the standard (i.e. non-RRBS) mode.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1567
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1568
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1569 Paired-end specific options:
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1570
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1571 --paired This option performs length trimming of quality/adapter/RRBS trimmed reads for
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1572 paired-end files. To pass the validation test, both sequences of a sequence pair
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1573 are required to have a certain minimum length which is governed by the option
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1574 --length (see above). If only one read passes this length threshold the
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1575 other read can be rescued (see option --retain_unpaired). Using this option lets
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1576 you discard too short read pairs without disturbing the sequence-by-sequence order
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1577 of FastQ files which is required by many aligners.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1578
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1579 Trim Galore! expects paired-end files to be supplied in a pairwise fashion, e.g.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1580 file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz ... .
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1581
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1582 -t/--trim1 Trims 1 bp off every read from its 3' end. This may be needed for FastQ files that
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1583 are to be aligned as paired-end data with Bowtie. This is because Bowtie (1) regards
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1584 alignments like this:
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1585
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1586 R1 ---------------------------> or this: -----------------------> R1
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1587 R2 <--------------------------- <----------------- R2
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1588
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1589 as invalid (whenever a start/end coordinate is contained within the other read).
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1590
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1591 --retain_unpaired If only one of the two paired-end reads became too short, the longer
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1592 read will be written to either '.unpaired_1.fq' or '.unpaired_2.fq'
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1593 output files. The length cutoff for unpaired single-end reads is
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1594 governed by the parameters -r1/--length_1 and -r2/--length_2. Default: OFF.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1595
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1596 -r1/--length_1 <INT> Unpaired single-end read length cutoff needed for read 1 to be written to
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1597 '.unpaired_1.fq' output file. These reads may be mapped in single-end mode.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1598 Default: 35 bp.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1599
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1600 -r2/--length_2 <INT> Unpaired single-end read length cutoff needed for read 2 to be written to
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1601 '.unpaired_2.fq' output file. These reads may be mapped in single-end mode.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1602 Default: 35 bp.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1603
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1604
6
11962ce40855 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit 9198b904ef37fe46007256f1734c33de6d23331b-dirty
bgruening
parents: 4
diff changeset
1605 Last modified on 06 May 2015.
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1606
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1607 HELP
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1608 exit;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1609 }