annotate trim_galore @ 4:2c1f0fe810f7 draft

Uploaded
author bgruening
date Wed, 15 Apr 2015 11:32:11 -0400
parents 898db63d2e84
children 11962ce40855
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1 #!/usr/bin/perl
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
2 use strict;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
3 use warnings;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
4 use Getopt::Long;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
5 use IPC::Open3;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
6 use File::Spec;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
7 use File::Basename;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
8 use Cwd;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
9
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
10 ## This program is Copyright (C) 2012-14, Felix Krueger (felix.krueger@babraham.ac.uk)
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
11
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
12 ## This program is free software: you can redistribute it and/or modify
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
13 ## it under the terms of the GNU General Public License as published by
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
14 ## the Free Software Foundation, either version 3 of the License, or
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
15 ## (at your option) any later version.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
16
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
17 ## This program is distributed in the hope that it will be useful,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
18 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
19 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
20 ## GNU General Public License for more details.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
21
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
22 ## You should have received a copy of the GNU General Public License
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
23 ## along with this program. If not, see <http://www.gnu.org/licenses/>.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
24
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
25
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
26
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
27 ## this script is taking in FastQ sequences and trims them with Cutadapt
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
28
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
29 ## last modified on 16 July 2014
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
30
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
31
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
32
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
33 ########################################################################
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
34
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
35 # change these paths if needed
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
36
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
37 my $path_to_cutadapt = 'cutadapt';
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
38 my $path_to_fastqc = 'fastqc';
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
39
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
40 ########################################################################
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
41
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
42 my $trimmer_version = '0.3.7';
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
43 my $DOWARN = 1; # print on screen warning and text by default
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
44 BEGIN { $SIG{'__WARN__'} = sub { warn $_[0] if $DOWARN } };
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
45
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
46 my ($cutoff,$adapter,$stringency,$rrbs,$length_cutoff,$keep,$fastqc,$non_directional,$phred_encoding,$fastqc_args,$trim,$gzip,$validate,$retain,$length_read_1,$length_read_2,$a2,$error_rate,$output_dir,$no_report_file,$dont_gzip,$clip_r1,$clip_r2,$three_prime_clip_r1,$three_prime_clip_r2) = process_commandline();
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
47
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
48 my @filenames = @ARGV;
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
49
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
50
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
51 die "\nPlease provide the filename(s) of one or more FastQ file(s) to launch Trim Galore!\n
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
52 USAGE: 'trim_galore [options] <filename(s)>' or 'trim_galore --help' for more options\n\n" unless (@filenames);
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
53
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
54
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
55 ### SETTING DEFAULTS UNLESS THEY WERE SPECIFIED
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
56 unless (defined $cutoff){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
57 $cutoff = 20;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
58 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
59 my $phred_score_cutoff = $cutoff; # only relevant for report
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
60
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
61 unless (defined $adapter){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
62 $adapter = 'AGATCGGAAGAGC';
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
63 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
64 unless (defined $a2){ # optional adapter for the second read in a pair. Only works for --paired trimming
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
65 $a2 = '';
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
66 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
67
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
68 unless (defined $stringency){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
69 $stringency = 1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
70 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
71
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
72 if ($phred_encoding == 64){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
73 $cutoff += 31;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
74 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
75
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
76 my $file_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
77 my $file_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
78
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
79 foreach my $filename (@ARGV){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
80 trim ($filename);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
81 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
82
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
83
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
84 sub trim{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
85 my $filename = shift;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
86
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
87 my $output_filename = (split (/\//,$filename))[-1];
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
88
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
89 my $report = $output_filename;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
90 $report =~ s/$/_trimming_report.txt/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
91
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
92 if ($no_report_file) {
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
93 $report = File::Spec->devnull;
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
94 open (REPORT,'>',$report) or die "Failed to write to file '$report': $!\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
95 # warn "Redirecting report output to /dev/null\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
96 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
97 else{
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
98 open (REPORT,'>',$output_dir.$report) or die "Failed to write to file '$report': $!\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
99 warn "Writing report to '$output_dir$report'\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
100 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
101
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
102 warn "\nSUMMARISING RUN PARAMETERS\n==========================\nInput filename: $filename\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
103 print REPORT "\nSUMMARISING RUN PARAMETERS\n==========================\nInput filename: $filename\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
104
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
105 if ($validate){ # paired-end mode
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
106 warn "Trimming mode: paired-end\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
107 print REPORT "Trimming mode: paired-end\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
108 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
109 else{
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
110 warn "Trimming mode: single-end\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
111 print REPORT "Trimming mode: single-end\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
112 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
113
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
114
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
115 warn "Trim Galore version: $trimmer_version\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
116 print REPORT "Trim Galore version: $trimmer_version\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
117
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
118 warn "Quality Phred score cutoff: $phred_score_cutoff\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
119 print REPORT "Quality Phred score cutoff: $phred_score_cutoff\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
120
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
121 warn "Quality encoding type selected: ASCII+$phred_encoding\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
122 print REPORT "Quality encoding type selected: ASCII+$phred_encoding\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
123
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
124 warn "Adapter sequence: '$adapter'\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
125 print REPORT "Adapter sequence: '$adapter'\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
126
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
127 if ($error_rate == 0.1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
128 warn "Maximum trimming error rate: $error_rate (default)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
129 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
130 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
131 warn "Maximum trimming error rate: $error_rate\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
132 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
133
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
134 print REPORT "Maximum trimming error rate: $error_rate";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
135 if ($error_rate == 0.1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
136 print REPORT " (default)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
137 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
138 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
139 print REPORT "\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
140 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
141
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
142 if ($a2){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
143 warn "Optional adapter 2 sequence (only used for read 2 of paired-end files): '$a2'\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
144 print REPORT "Optional adapter 2 sequence (only used for read 2 of paired-end files): '$a2'\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
145 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
146
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
147 warn "Minimum required adapter overlap (stringency): $stringency bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
148 print REPORT "Minimum required adapter overlap (stringency): $stringency bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
149
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
150 if ($validate){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
151 warn "Minimum required sequence length for both reads before a sequence pair gets removed: $length_cutoff bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
152 print REPORT "Minimum required sequence length for both reads before a sequence pair gets removed: $length_cutoff bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
153 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
154 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
155 warn "Minimum required sequence length before a sequence gets removed: $length_cutoff bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
156 print REPORT "Minimum required sequence length before a sequence gets removed: $length_cutoff bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
157 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
158
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
159 if ($validate){ # only for paired-end files
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
160
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
161 if ($retain){ # keeping single-end reads if only one end is long enough
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
162
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
163 if ($length_read_1 == 35){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
164 warn "Length cut-off for read 1: $length_read_1 bp (default)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
165 print REPORT "Length cut-off for read 1: $length_read_1 bp (default)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
166 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
167 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
168 warn "Length cut-off for read 1: $length_read_1 bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
169 print REPORT "Length cut-off for read 1: $length_read_1 bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
170 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
171
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
172 if ($length_read_2 == 35){
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
173 warn "Length cut-off for read 2: $length_read_2 bb (default)\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
174 print REPORT "Length cut-off for read 2: $length_read_2 bp (default)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
175 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
176 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
177 warn "Length cut-off for read 2: $length_read_2 bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
178 print REPORT "Length cut-off for read 2: $length_read_2 bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
179 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
180 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
181 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
182
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
183 if ($rrbs){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
184 warn "File was specified to be an MspI-digested RRBS sample. Sequences with adapter contamination will be trimmed a further 2 bp to remove potential methylation-biased bases from the end-repair reaction\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
185 print REPORT "File was specified to be an MspI-digested RRBS sample. Sequences with adapter contamination will be trimmed a further 2 bp to remove potential methylation-biased bases from the end-repair reaction\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
186 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
187
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
188 if ($non_directional){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
189 warn "File was specified to be a non-directional MspI-digested RRBS sample. Sequences starting with either 'CAA' or 'CGA' will have the first 2 bp trimmed off to remove potential methylation-biased bases from the end-repair reaction\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
190 print REPORT "File was specified to be a non-directional MspI-digested RRBS sample. Sequences starting with either 'CAA' or 'CGA' will have the first 2 bp trimmed off to remove potential methylation-biased bases from the end-repair reaction\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
191 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
192
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
193 if ($trim){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
194 warn "All sequences will be trimmed by 1 bp on their 3' end to avoid problems with invalid paired-end alignments with Bowtie 1\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
195 print REPORT "All sequences will be trimmed by 1 bp on their 3' end to avoid problems with invalid paired-end alignments with Bowtie 1\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
196 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
197
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
198 if ($clip_r1){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
199 warn "All Read 1 sequences will be trimmed by $clip_r1 bp from their 5' end to avoid poor qualities or biases\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
200 print REPORT "All Read 1 sequences will be trimmed by $clip_r1 bp from their 5' end to avoid poor qualities or biases\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
201 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
202 if ($clip_r2){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
203 warn "All Read 2 sequences will be trimmed by $clip_r2 bp from their 5' end to avoid poor qualities or biases (e.g. M-bias for BS-Seq applications)\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
204 print REPORT "All Read 2 sequences will be trimmed by $clip_r2 bp from their 5' end to avoid poor qualities or biases (e.g. M-bias for BS-Seq applications)\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
205 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
206
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
207 if ($three_prime_clip_r1){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
208 warn "All Read 1 sequences will be trimmed by $three_prime_clip_r1 bp from their 3' end to avoid poor qualities or biases\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
209 print REPORT "All Read 1 sequences will be trimmed by $three_prime_clip_r1 bp from their 3' end to avoid poor qualities or biases\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
210 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
211 if ($three_prime_clip_r2){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
212 warn "All Read 2 sequences will be trimmed by $three_prime_clip_r2 bp from their 3' end to avoid poor qualities or biases\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
213 print REPORT "All Read 2 sequences will be trimmed by $three_prime_clip_r2 bp from their 3' end to avoid poor qualities or biases\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
214 }
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
215
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
216 if ($fastqc){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
217 warn "Running FastQC on the data once trimming has completed\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
218 print REPORT "Running FastQC on the data once trimming has completed\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
219
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
220 if ($fastqc_args){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
221 warn "Running FastQC with the following extra arguments: '$fastqc_args'\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
222 print REPORT "Running FastQC with the following extra arguments: $fastqc_args\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
223 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
224 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
225
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
226 if ($keep and $rrbs){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
227 warn "Keeping quality trimmed (but not yet adapter trimmed) intermediate FastQ file\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
228 print REPORT "Keeping quality trimmed (but not yet adapter trimmed) intermediate FastQ file\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
229 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
230
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
231
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
232 if ($gzip or $filename =~ /\.gz$/){
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
233 $gzip = 1;
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
234 unless ($dont_gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
235 warn "Output file(s) will be GZIP compressed\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
236 print REPORT "Output file will be GZIP compressed\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
237 }
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
238 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
239
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
240 warn "\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
241 print REPORT "\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
242 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
243
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
244 my $temp;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
245
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
246 ### Proceeding differently for RRBS and other type of libraries
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
247 if ($rrbs){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
248
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
249 ### Skipping quality filtering for RRBS libraries if a quality cutoff of 0 was specified
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
250 if ($cutoff == 0){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
251 warn "Quality cutoff selected was 0 - Skipping quality trimming altogether\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
252 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
253 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
254 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
255
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
256 $temp = $filename;
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
257 $temp =~ s/^.*\///; # replacing optional file path information
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
258 $temp =~ s/$/_qual_trimmed.fastq/;
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
259 open (TEMP,'>',$output_dir.$temp) or die "Can't write to '$temp': $!";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
260
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
261 warn " >>> Now performing adaptive quality trimming with a Phred-score cutoff of: $cutoff <<<\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
262 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
263
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
264 open (QUAL,"$path_to_cutadapt -f fastq -e $error_rate -q $cutoff -a X $filename |") or die "Can't open pipe to Cutadapt: $!";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
265
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
266 my $qual_count = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
267
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
268 while (1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
269 my $l1 = <QUAL>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
270 my $seq = <QUAL>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
271 my $l3 = <QUAL>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
272 my $qual = <QUAL>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
273 last unless (defined $qual);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
274
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
275 $qual_count++;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
276 if ($qual_count%10000000 == 0){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
277 warn "$qual_count sequences processed\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
278 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
279 print TEMP "$l1$seq$l3$qual";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
280 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
281
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
282 warn "\n >>> Quality trimming completed <<<\n$qual_count sequences processed in total\n\n";
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
283 close QUAL or die "Unable to close QUAL filehandle: $!\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
284 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
285
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
286 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
287 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
288
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
289
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
290 if ($output_filename =~ /\.fastq$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
291 $output_filename =~ s/\.fastq$/_trimmed.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
292 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
293 elsif ($output_filename =~ /\.fastq\.gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
294 $output_filename =~ s/\.fastq\.gz$/_trimmed.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
295 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
296 elsif ($output_filename =~ /\.fq$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
297 $output_filename =~ s/\.fq$/_trimmed.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
298 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
299 elsif ($output_filename =~ /\.fq\.gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
300 $output_filename =~ s/\.fq\.gz$/_trimmed.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
301 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
302 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
303 $output_filename =~ s/$/_trimmed.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
304 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
305
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
306 if ($gzip or $filename =~ /\.gz$/){
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
307 if ($dont_gzip){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
308 open (OUT,'>',$output_dir.$output_filename) or die "Can't open '$output_filename': $!\n"; # don't need to gzip intermediate file
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
309 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
310 else{
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
311 ### 6 Jan 2014: had a request to also gzip intermediate files to save disk space
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
312 # if ($validate){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
313 # open (OUT,'>',$output_dir.$output_filename) or die "Can't open '$output_filename': $!\n"; # don't need to gzip intermediate file
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
314 # }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
315 $output_filename .= '.gz';
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
316 open (OUT,"| gzip -c - > ${output_dir}${output_filename}") or die "Can't write to '$output_filename': $!\n";
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
317 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
318 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
319 else{
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
320 open (OUT,'>',$output_dir.$output_filename) or die "Can't open '$output_filename': $!\n";
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
321 }
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
322 warn "Writing final adapter and quality trimmed output to $output_filename\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
323
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
324 my $count = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
325 my $too_short = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
326 my $quality_trimmed = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
327 my $rrbs_trimmed = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
328 my $rrbs_trimmed_start = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
329 my $CAA = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
330 my $CGA = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
331
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
332 my $pid;
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
333
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
334 if ($rrbs and $cutoff != 0){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
335
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
336 ### optionally using 2 different adapters for read 1 and read 2
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
337 if ($validate and $a2){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
338 ### Figure out whether current file counts as read 1 or read 2 of paired-end files
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
339 if ( scalar(@filenames)%2 == 0){ # this is read 1 of a pair
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
340 warn "\n >>> Now performing adapter trimming for the adapter sequence: '$adapter' from file $temp <<< \n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
341 sleep (3);
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
342 $pid = open (\*WRITER, \*TRIM, \*ERROR,"$path_to_cutadapt -f fastq -e $error_rate -O $stringency -a $adapter $output_dir$temp") or die "Failed to launch Cutadapt: $!\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
343 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
344 else{ # this is read 2 of a pair
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
345 warn "\n >>> Now performing adapter trimming for the adapter sequence: '$a2' from file $temp <<< \n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
346 sleep (3);
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
347 $pid = open3 (\*WRITER, \*TRIM, \*ERROR,"$path_to_cutadapt -f fastq -e $error_rate -O $stringency -a $a2 $output_dir$temp") or die "Failed to launch Cutadapt: $!\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
348 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
349 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
350 ### Using the same adapter for both read 1 and read 2
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
351 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
352 warn "\n >>> Now performing adapter trimming for the adapter sequence: '$adapter' from file $temp <<< \n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
353 sleep (3);
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
354 $pid = open3 (\*WRITER, \*TRIM, \*ERROR,"$path_to_cutadapt -f fastq -e $error_rate -O $stringency -a $adapter $output_dir$temp") or die "Failed to launch Cutadapt: $!\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
355 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
356
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
357 close WRITER or die $!; # not needed
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
358
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
359 open (QUAL,"$output_dir$temp") or die $!; # quality trimmed file
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
360
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
361 if ($filename =~ /\.gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
362 open (IN,"zcat $filename |") or die $!; # original, untrimmed file
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
363 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
364 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
365 open (IN,$filename) or die $!; # original, untrimmed file
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
366 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
367
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
368 while (1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
369
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
370 # we can process the output from Cutadapt and the original input 1 by 1 to decide if the adapter has been removed or not
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
371 my $l1 = <TRIM>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
372 my $seq = <TRIM>; # adapter trimmed sequence
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
373 my $l3 = <TRIM>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
374 my $qual = <TRIM>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
375
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
376 $_ = <IN>; # irrelevant
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
377 my $original_seq = <IN>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
378 $_ = <IN>; # irrelevant
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
379 $_ = <IN>; # irrelevant
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
380
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
381 $_ = <QUAL>; # irrelevant
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
382 my $qual_trimmed_seq = <QUAL>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
383 $_ = <QUAL>; # irrelevant
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
384 my $qual_trimmed_qual = <QUAL>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
385
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
386 last unless (defined $qual and defined $qual_trimmed_qual); # could be empty strings
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
387
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
388 $count++;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
389 if ($count%10000000 == 0){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
390 warn "$count sequences processed\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
391 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
392
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
393 chomp $seq;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
394 chomp $qual;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
395 chomp $qual_trimmed_seq;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
396 chomp $original_seq;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
397
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
398 my $quality_trimmed_seq_length = length $qual_trimmed_seq;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
399
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
400 if (length $original_seq > length $qual_trimmed_seq){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
401 ++$quality_trimmed;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
402 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
403
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
404 my $nd = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
405
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
406 ### NON-DIRECTIONAL RRBS
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
407 if ($non_directional){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
408 if (length$seq > 2){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
409 if ($seq =~ /^CAA/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
410 ++$CAA;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
411 $seq = substr ($seq,2,length($seq)-2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
412 $qual = substr ($qual,2,length($qual)-2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
413 ++$rrbs_trimmed_start;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
414 $nd = 1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
415 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
416 elsif ($seq =~ /^CGA/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
417 $seq = substr ($seq,2,length($seq)-2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
418 $qual = substr ($qual,2,length($qual)-2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
419 ++$CGA;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
420 ++$rrbs_trimmed_start;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
421 $nd = 1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
422 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
423 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
424 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
425
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
426 ### directional read
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
427 unless ($nd == 1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
428 if (length $seq >= 2 and length$seq < $quality_trimmed_seq_length){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
429 $seq = substr ($seq,0,length($seq)-2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
430 $qual = substr ($qual,0,length($qual)-2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
431 ++$rrbs_trimmed;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
432 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
433 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
434
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
435 ### Shortening all sequences by 1 bp on the 3' end
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
436 if ($trim){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
437 $seq = substr($seq,0,length($seq)-1);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
438 $qual = substr($qual,0,length($qual)-1);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
439 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
440
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
441 ### PRINTING (POTENTIALLY TRIMMED) SEQUENCE
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
442 if ($validate){ # printing the sequence without performing a length check (this is performed for the read pair separately later)
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
443 print OUT "$l1$seq\n$l3$qual\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
444 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
445 else{ # single end
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
446
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
447 if ($clip_r1){
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
448 if (length $seq > $clip_r1){ # sequences that are already too short won't be clipped again
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
449 $seq = substr($seq,$clip_r1); # starting after the sequences to be trimmed until the end of the sequence
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
450 $qual = substr($qual,$clip_r1);
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
451 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
452 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
453
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
454 if ($three_prime_clip_r1){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
455
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
456 if (length $seq > $three_prime_clip_r1){ # sequences that are already too short won't be clipped again
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
457 # warn "seq/qual before/after trimming:\n$seq\n$qual\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
458 $seq = substr($seq,0,(length($seq) - $three_prime_clip_r1)); # starting after the sequences to be trimmed until the end of the sequence
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
459 $qual = substr($qual,0,(length($qual) - $three_prime_clip_r1 ));
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
460 # warn "$seq\n$qual\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
461 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
462
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
463 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
464
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
465 if (length $seq < $length_cutoff){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
466 ++$too_short;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
467 next;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
468 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
469 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
470 print OUT "$l1$seq\n$l3$qual\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
471 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
472 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
473 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
474
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
475 print REPORT "\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
476 while (<ERROR>){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
477 warn $_;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
478 print REPORT $_;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
479 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
480
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
481 close IN or die "Unable to close IN filehandle: $!";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
482 close QUAL or die "Unable to close QUAL filehandle: $!";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
483 close TRIM or die "Unable to close TRIM filehandle: $!";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
484 close OUT or die "Unable to close OUT filehandle: $!";
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
485
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
486 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
487 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
488
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
489 ### optionally using 2 different adapters for read 1 and read 2
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
490 if ($validate and $a2){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
491 ### Figure out whether current file counts as read 1 or read 2 of paired-end files
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
492 if ( scalar(@filenames)%2 == 0){ # this is read 1 of a pair
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
493 warn "\n >>> Now performing quality (cutoff $cutoff) and adapter trimming in a single pass for the adapter sequence: '$adapter' from file $filename <<< \n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
494 sleep (3);
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
495 $pid = open3 (\*WRITER, \*TRIM, \*ERROR, "$path_to_cutadapt -f fastq -e $error_rate -q $cutoff -O $stringency -a $adapter $filename") or die "Failed to launch Cutadapt: $!";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
496 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
497 else{ # this is read 2 of a pair
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
498 warn "\n >>> Now performing quality (cutoff $cutoff) and adapter trimming in a single pass for the adapter sequence: '$a2' from file $filename <<< \n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
499 sleep (3);
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
500 $pid = open3 (\*WRITER, \*TRIM, \*ERROR, "$path_to_cutadapt -f fastq -e $error_rate -q $cutoff -O $stringency -a $a2 $filename") or die "Failed to launch Cutadapt: $!";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
501 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
502 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
503 ### Using the same adapter for both read 1 and read 2
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
504 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
505 warn "\n >>> Now performing quality (cutoff $cutoff) and adapter trimming in a single pass for the adapter sequence: '$adapter' from file $filename <<< \n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
506 sleep (3);
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
507 $pid = open3 (\*WRITER, \*TRIM, \*ERROR, "$path_to_cutadapt -f fastq -e $error_rate -q $cutoff -O $stringency -a $adapter $filename") or die "Failed to launch Cutadapt: $!";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
508 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
509
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
510 close WRITER or die $!; # not needed
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
511
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
512 while (1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
513
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
514 my $l1 = <TRIM>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
515 my $seq = <TRIM>; # quality and/or adapter trimmed sequence
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
516 my $l3 = <TRIM>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
517 my $qual = <TRIM>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
518 # print "$l1$seq\n$l3$qual\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
519 last unless (defined $qual); # could be an empty string
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
520
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
521 $count++;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
522 if ($count%10000000 == 0){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
523 warn "$count sequences processed\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
524 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
525
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
526 chomp $seq;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
527 chomp $qual;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
528
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
529 ### Shortening all sequences by 1 bp on the 3' end
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
530 if ($trim){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
531 $seq = substr($seq,0,length($seq)-1);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
532 $qual = substr($qual,0,length($qual)-1);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
533 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
534
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
535 ### PRINTING (POTENTIALLY TRIMMED) SEQUENCE
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
536 if ($validate){ # printing the sequence without performing a length check (this is performed for the read pair separately later)
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
537 print OUT "$l1$seq\n$l3$qual\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
538 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
539 else{ # single end
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
540
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
541 if ($clip_r1){
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
542 if (length $seq > $clip_r1){ # sequences that are already too short won't be clipped again
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
543 $seq = substr($seq,$clip_r1); # starting after the sequences to be trimmed until the end of the sequence
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
544 $qual = substr($qual,$clip_r1);
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
545 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
546 }
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
547
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
548 if ($three_prime_clip_r1){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
549 if (length $seq > $three_prime_clip_r1){ # sequences that are already too short won't be clipped again
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
550 # warn "seq/qual before/after trimming:\n$seq\n$qual\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
551 $seq = substr($seq,0,(length($seq) - $three_prime_clip_r1)); # starting after the sequences to be trimmed until the end of the sequence
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
552 $qual = substr($qual,0,(length($qual) - $three_prime_clip_r1));
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
553 # warn "$seq\n$qual\n";sleep(1);
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
554 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
555 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
556
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
557 if (length $seq < $length_cutoff){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
558 ++$too_short;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
559 next;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
560 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
561 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
562 print OUT "$l1$seq\n$l3$qual\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
563 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
564 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
565 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
566
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
567 print REPORT "\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
568 while (<ERROR>){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
569 warn $_;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
570 print REPORT $_;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
571 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
572
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
573 close TRIM or die "Unable to close TRIM filehandle: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
574 close ERROR or die "Unable to close ERROR filehandle: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
575 close OUT or die "Unable to close OUT filehandle: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
576
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
577 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
578
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
579
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
580 if ($rrbs){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
581 unless ($keep){ # keeping the quality trimmed intermediate file for RRBS files
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
582
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
583 # deleting temporary quality trimmed file
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
584 my $deleted = unlink "$output_dir$temp";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
585
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
586 if ($deleted){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
587 warn "Successfully deleted temporary file $temp\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
588 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
589 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
590 warn "Could not delete temporary file $temp";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
591 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
592 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
593 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
594
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
595 ### Wait and reap the child process (Cutadapt) so that it doesn't become a zombie process
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
596 waitpid $pid, 0;
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
597 unless ($? == 0){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
598 die "\n\nCutadapt terminated with exit signal: '$?'.\nTerminating Trim Galore run, please check error message(s) to get an idea what went wrong...\n\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
599 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
600
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
601 warn "\nRUN STATISTICS FOR INPUT FILE: $filename\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
602 print REPORT "\nRUN STATISTICS FOR INPUT FILE: $filename\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
603
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
604 warn "="x 45,"\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
605 print REPORT "="x 45,"\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
606
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
607 warn "$count sequences processed in total\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
608 print REPORT "$count sequences processed in total\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
609
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
610 ### only reporting this separately if quality and adapter trimming were performed separately
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
611 if ($rrbs){
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
612 my $percentage_shortened;
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
613 if ($count){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
614 $percentage_shortened = sprintf ("%.1f",$quality_trimmed/$count*100);
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
615 warn "Sequences were truncated to a varying degree because of deteriorating qualities (Phred score quality cutoff: $cutoff):\t$quality_trimmed ($percentage_shortened%)\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
616 print REPORT "Sequences were truncated to a varying degree because of deteriorating qualities (Phred score quality cutoff: $cutoff):\t$quality_trimmed ($percentage_shortened%)\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
617 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
618 else{
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
619 warn "Unable to determine percentage of reads that were shortened because 0 lines were processed\n\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
620 print REPORT "Unable to determine percentage of reads that were shortened because 0 lines were processed\n\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
621 }
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
622 warn "Sequences were truncated to a varying degree because of deteriorating qualities (Phred score quality cutoff: $cutoff):\t$quality_trimmed ($percentage_shortened%)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
623 print REPORT "Sequences were truncated to a varying degree because of deteriorating qualities (Phred score quality cutoff: $cutoff):\t$quality_trimmed ($percentage_shortened%)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
624 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
625
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
626 my $percentage_too_short;
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
627 if ($count){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
628 $percentage_too_short = sprintf ("%.1f",$too_short/$count*100);
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
629 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
630 else{
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
631 $percentage_too_short = 'N/A';
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
632 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
633
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
634 if ($validate){ ### only for paired-end files
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
635 warn "The length threshold of paired-end sequences gets evaluated later on (in the validation step)\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
636 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
637 else{ ### Single-end file
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
638 warn "Sequences removed because they became shorter than the length cutoff of $length_cutoff bp:\t$too_short ($percentage_too_short%)\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
639 print REPORT "Sequences removed because they became shorter than the length cutoff of $length_cutoff bp:\t$too_short ($percentage_too_short%)\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
640 }
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
641
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
642 if ($rrbs){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
643 my $percentage_rrbs_trimmed = sprintf ("%.1f",$rrbs_trimmed/$count*100);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
644 warn "RRBS reads trimmed by additional 2 bp when adapter contamination was detected:\t$rrbs_trimmed ($percentage_rrbs_trimmed%)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
645 print REPORT "RRBS reads trimmed by additional 2 bp when adapter contamination was detected:\t$rrbs_trimmed ($percentage_rrbs_trimmed%)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
646 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
647
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
648 if ($non_directional){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
649 my $percentage_rrbs_trimmed_at_start = sprintf ("%.1f",$rrbs_trimmed_start/$count*100);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
650 warn "RRBS reads trimmed by 2 bp at the start when read started with CAA ($CAA) or CGA ($CGA) in total:\t$rrbs_trimmed_start ($percentage_rrbs_trimmed_at_start%)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
651 print REPORT "RRBS reads trimmed by 2 bp at the start when read started with CAA ($CAA) or CGA ($CGA) in total:\t$rrbs_trimmed_start ($percentage_rrbs_trimmed_at_start%)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
652 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
653
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
654 warn "\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
655 print REPORT "\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
656
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
657 ### RUNNING FASTQC unless we are dealing with paired-end files
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
658 unless($validate){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
659 if ($fastqc){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
660 warn "\n >>> Now running FastQC on the data <<<\n\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
661 sleep (5);
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
662 if ($fastqc_args){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
663 system ("$path_to_fastqc $fastqc_args $output_dir$output_filename");
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
664 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
665 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
666 system ("$path_to_fastqc $output_dir$output_filename");
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
667 }
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
668 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
669 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
670
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
671 ### VALIDATE PAIRED-END FILES
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
672 if ($validate){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
673
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
674 ### Figure out whether current file counts as read 1 or read 2 of paired-end files
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
675
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
676 if ( scalar(@filenames)%2 == 0){ # this is read 1 of a pair
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
677 $file_1 = $output_filename;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
678 shift @filenames;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
679 # warn "This is read 1: $file_1\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
680 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
681 else{ # this is read 2 of a pair
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
682 $file_2 = $output_filename;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
683 shift @filenames;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
684 # warn "This is read 2: $file_2\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
685 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
686
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
687 if ($file_1 and $file_2){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
688 warn "Validate paired-end files $file_1 and $file_2\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
689 sleep (1);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
690
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
691 my ($val_1,$val_2,$un_1,$un_2) = validate_paired_end_files($file_1,$file_2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
692
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
693 ### RUNNING FASTQC
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
694 if ($fastqc){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
695
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
696 warn "\n >>> Now running FastQC on the validated data $val_1<<<\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
697 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
698
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
699 if ($fastqc_args){
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
700 system ("$path_to_fastqc $fastqc_args $output_dir$val_1");
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
701 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
702 else{
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
703 system ("$path_to_fastqc $output_dir$val_1");
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
704 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
705
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
706 warn "\n >>> Now running FastQC on the validated data $val_2<<<\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
707 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
708
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
709 if ($fastqc_args){
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
710 system ("$path_to_fastqc $fastqc_args $output_dir$val_2");
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
711 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
712 else{
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
713 system ("$path_to_fastqc $output_dir$val_2");
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
714 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
715
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
716 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
717
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
718 warn "Deleting both intermediate output files $file_1 and $file_2\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
719 unlink "$output_dir$file_1";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
720 unlink "$output_dir$file_2";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
721
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
722 warn "\n",'='x100,"\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
723 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
724
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
725 $file_1 = undef; # setting file_1 and file_2 to undef once validation is completed
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
726 $file_2 = undef;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
727 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
728 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
729
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
730 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
731
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
732 sub validate_paired_end_files{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
733
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
734 my $file_1 = shift;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
735 my $file_2 = shift;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
736
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
737 warn "file_1: $file_1, file_2: $file_2\n\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
738
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
739 if ($file_1 =~ /\.gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
740 open (IN1,"zcat $output_dir$file_1 |") or die "Couldn't read from file $file_1: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
741 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
742 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
743 open (IN1, "$output_dir$file_1") or die "Couldn't read from file $file_1: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
744 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
745
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
746 if ($file_2 =~ /\.gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
747 open (IN2,"zcat $output_dir$file_2 |") or die "Couldn't read from file $file_2: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
748 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
749 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
750 open (IN2, "$output_dir$file_2") or die "Couldn't read from file $file_2: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
751 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
752
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
753 warn "\n>>>>> Now validing the length of the 2 paired-end infiles: $file_1 and $file_2 <<<<<\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
754 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
755
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
756 my $out_1 = $file_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
757 my $out_2 = $file_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
758
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
759 if ($out_1 =~ /gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
760 $out_1 =~ s/trimmed\.fq\.gz$/val_1.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
761 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
762 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
763 $out_1 =~ s/trimmed\.fq$/val_1.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
764 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
765
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
766 if ($out_2 =~ /gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
767 $out_2 =~ s/trimmed\.fq\.gz$/val_2.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
768 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
769 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
770 $out_2 =~ s/trimmed\.fq$/val_2.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
771 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
772
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
773 if ($gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
774 if ($dont_gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
775 open (R1,'>',$output_dir.$out_1) or die "Couldn't write to $out_1 $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
776 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
777 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
778 $out_1 .= '.gz';
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
779 open (R1,"| gzip -c - > ${output_dir}${out_1}") or die "Can't write to $out_1: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
780 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
781 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
782 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
783 open (R1,'>',$output_dir.$out_1) or die "Couldn't write to $out_1 $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
784 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
785
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
786 if ($gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
787 if ($dont_gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
788 open (R2,'>',$output_dir.$out_2) or die "Couldn't write to $out_2 $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
789 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
790 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
791 $out_2 .= '.gz';
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
792 open (R2,"| gzip -c - > ${output_dir}${out_2}") or die "Can't write to $out_2: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
793 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
794 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
795 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
796 open (R2,'>',$output_dir.$out_2) or die "Couldn't write to $out_2 $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
797 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
798
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
799 warn "Writing validated paired-end read 1 reads to $out_1\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
800 warn "Writing validated paired-end read 2 reads to $out_2\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
801
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
802 my $unpaired_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
803 my $unpaired_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
804
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
805 if ($retain){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
806
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
807 $unpaired_1 = $file_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
808 $unpaired_2 = $file_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
809
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
810 if ($unpaired_1 =~ /gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
811 $unpaired_1 =~ s/trimmed\.fq\.gz$/unpaired_1.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
812 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
813 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
814 $unpaired_1 =~ s/trimmed\.fq$/unpaired_1.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
815 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
816
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
817 if ($unpaired_2 =~ /gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
818 $unpaired_2 =~ s/trimmed\.fq\.gz$/unpaired_2.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
819 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
820 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
821 $unpaired_2 =~ s/trimmed\.fq$/unpaired_2.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
822 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
823
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
824 if ($gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
825 if ($dont_gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
826 open (UNPAIRED1,'>',$output_dir.$unpaired_1) or die "Couldn't write to $unpaired_1: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
827 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
828 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
829 $unpaired_1 .= '.gz';
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
830 open (UNPAIRED1,"| gzip -c - > ${output_dir}${unpaired_1}") or die "Can't write to $unpaired_1: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
831 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
832 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
833 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
834 open (UNPAIRED1,'>',$output_dir.$unpaired_1) or die "Couldn't write to $unpaired_1: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
835 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
836
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
837 if ($gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
838 if ($dont_gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
839 open (UNPAIRED2,'>',$output_dir.$unpaired_2) or die "Couldn't write to $unpaired_2: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
840 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
841 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
842 $unpaired_2 .= '.gz';
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
843 open (UNPAIRED2,"| gzip -c - > ${output_dir}${unpaired_2}") or die "Can't write to $unpaired_2: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
844 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
845 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
846 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
847 open (UNPAIRED2,'>',$output_dir.$unpaired_2) or die "Couldn't write to $unpaired_2: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
848 }
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
849
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
850 warn "Writing unpaired read 1 reads to $unpaired_1\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
851 warn "Writing unpaired read 2 reads to $unpaired_2\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
852 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
853
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
854 my $sequence_pairs_removed = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
855 my $read_1_printed = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
856 my $read_2_printed = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
857
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
858 my $count = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
859
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
860 while (1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
861 my $id_1 = <IN1>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
862 my $seq_1 = <IN1>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
863 my $l3_1 = <IN1>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
864 my $qual_1 = <IN1>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
865 last unless ($id_1 and $seq_1 and $l3_1 and $qual_1);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
866
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
867 my $id_2 = <IN2>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
868 my $seq_2 = <IN2>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
869 my $l3_2 = <IN2>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
870 my $qual_2 = <IN2>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
871 last unless ($id_2 and $seq_2 and $l3_2 and $qual_2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
872
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
873 ++$count;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
874
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
875
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
876 ## small check if the sequence files appear to be FastQ files
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
877 if ($count == 1){ # performed just once
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
878 if ($id_1 !~ /^\@/ or $l3_1 !~ /^\+/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
879 die "Input file doesn't seem to be in FastQ format at sequence $count\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
880 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
881 if ($id_2 !~ /^\@/ or $l3_2 !~ /^\+/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
882 die "Input file doesn't seem to be in FastQ format at sequence $count\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
883 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
884 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
885
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
886 chomp $seq_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
887 chomp $seq_2;
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
888 chomp $qual_1;
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
889 chomp $qual_2;
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
890
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
891 if ($clip_r1){
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
892 if (length $seq_1 > $clip_r1){ # sequences that are already too short won't be trimmed again
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
893 $seq_1 = substr($seq_1,$clip_r1); # starting after the sequences to be trimmed until the end of the sequence
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
894 $qual_1 = substr($qual_1,$clip_r1);
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
895 }
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
896 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
897 if ($clip_r2){
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
898 if (length $seq_2 > $clip_r2){ # sequences that are already too short won't be trimmed again
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
899 $seq_2 = substr($seq_2,$clip_r2); # starting after the sequences to be trimmed until the end of the sequence
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
900 $qual_2 = substr($qual_2,$clip_r2);
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
901 }
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
902 }
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
903
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
904 if ($three_prime_clip_r1){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
905 if (length $seq_1 > $three_prime_clip_r1){ # sequences that are already too short won't be clipped again
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
906 $seq_1 = substr($seq_1,0,(length($seq_1) - $three_prime_clip_r1)); # starting after the sequences to be trimmed until the end of the sequence
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
907 $qual_1 = substr($qual_1,0,(length($qual_1) - $three_prime_clip_r1));
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
908 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
909 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
910 if ($three_prime_clip_r2){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
911 if (length $seq_2 > $three_prime_clip_r2){ # sequences that are already too short won't be clipped again
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
912 $seq_2 = substr($seq_2,0,(length($seq_2) - $three_prime_clip_r2)); # starting after the sequences to be trimmed until the end of the sequence
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
913 $qual_2 = substr($qual_2,0,(length($qual_2) - $three_prime_clip_r2));
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
914 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
915 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
916
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
917
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
918
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
919 ### making sure that the reads do have a sensible length
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
920 if ( (length($seq_1) < $length_cutoff) or (length($seq_2) < $length_cutoff) ){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
921 ++$sequence_pairs_removed;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
922 if ($retain){ # writing out single-end reads if they are longer than the cutoff
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
923
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
924 if ( length($seq_1) >= $length_read_1){ # read 1 is long enough
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
925 print UNPAIRED1 $id_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
926 print UNPAIRED1 "$seq_1\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
927 print UNPAIRED1 $l3_1;
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
928 print UNPAIRED1 "$qual_1\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
929 ++$read_1_printed;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
930 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
931
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
932 if ( length($seq_2) >= $length_read_2){ # read 2 is long enough
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
933 print UNPAIRED2 $id_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
934 print UNPAIRED2 "$seq_2\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
935 print UNPAIRED2 $l3_2;
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
936 print UNPAIRED2 "$qual_2\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
937 ++$read_2_printed;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
938 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
939
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
940 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
941 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
942 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
943 print R1 $id_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
944 print R1 "$seq_1\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
945 print R1 $l3_1;
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
946 print R1 "$qual_1\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
947
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
948 print R2 $id_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
949 print R2 "$seq_2\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
950 print R2 $l3_2;
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
951 print R2 "$qual_2\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
952 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
953
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
954 }
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
955
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
956
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
957 my $percentage;
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
958
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
959 if ($count){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
960 $percentage = sprintf("%.2f",$sequence_pairs_removed/$count*100);
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
961 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
962 else{
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
963 $percentage = 'N/A';
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
964 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
965
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
966 warn "Total number of sequences analysed: $count\n\n";
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
967 warn "Number of sequence pairs removed because at least one read was shorter than the length cutoff ($length_cutoff bp): $sequence_pairs_removed ($percentage%)\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
968
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
969 print REPORT "Total number of sequences analysed for the sequence pair length validation: $count\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
970 print REPORT "Number of sequence pairs removed because at least one read was shorter than the length cutoff ($length_cutoff bp): $sequence_pairs_removed ($percentage%)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
971
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
972 if ($keep){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
973 warn "Number of unpaired read 1 reads printed: $read_1_printed\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
974 warn "Number of unpaired read 2 reads printed: $read_2_printed\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
975 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
976
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
977 close R1 or die $!;
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
978 close R2 or die $!;
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
979
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
980 if ($retain){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
981 close UNPAIRED1 or die $!;
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
982 close UNPAIRED2 or die $!;
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
983 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
984
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
985 warn "\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
986 if ($retain){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
987 return ($out_1,$out_2,$unpaired_1,$unpaired_2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
988 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
989 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
990 return ($out_1,$out_2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
991 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
992 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
993
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
994 sub process_commandline{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
995 my $help;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
996 my $quality;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
997 my $adapter;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
998 my $adapter2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
999 my $stringency;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1000 my $report;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1001 my $version;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1002 my $rrbs;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1003 my $length_cutoff;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1004 my $keep;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1005 my $fastqc;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1006 my $non_directional;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1007 my $phred33;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1008 my $phred64;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1009 my $fastqc_args;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1010 my $trim;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1011 my $gzip;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1012 my $validate;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1013 my $retain;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1014 my $length_read_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1015 my $length_read_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1016 my $error_rate;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1017 my $output_dir;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1018 my $no_report_file;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1019 my $suppress_warn;
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1020 my $dont_gzip;
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1021 my $clip_r1;
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1022 my $clip_r2;
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1023 my $three_prime_clip_r1;
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1024 my $three_prime_clip_r2;
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1025
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1026 my $command_line = GetOptions ('help|man' => \$help,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1027 'q|quality=i' => \$quality,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1028 'a|adapter=s' => \$adapter,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1029 'a2|adapter2=s' => \$adapter2,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1030 'report' => \$report,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1031 'version' => \$version,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1032 'stringency=i' => \$stringency,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1033 'fastqc' => \$fastqc,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1034 'RRBS' => \$rrbs,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1035 'keep' => \$keep,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1036 'length=i' => \$length_cutoff,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1037 'non_directional' => \$non_directional,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1038 'phred33' => \$phred33,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1039 'phred64' => \$phred64,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1040 'fastqc_args=s' => \$fastqc_args,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1041 'trim1' => \$trim,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1042 'gzip' => \$gzip,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1043 'paired_end' => \$validate,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1044 'retain_unpaired' => \$retain,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1045 'length_1|r1=i' => \$length_read_1,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1046 'length_2|r2=i' => \$length_read_2,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1047 'e|error_rate=s' => \$error_rate,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1048 'o|output_dir=s' => \$output_dir,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1049 'no_report_file' => \$no_report_file,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1050 'suppress_warn' => \$suppress_warn,
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1051 'dont_gzip' => \$dont_gzip,
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1052 'clip_R1=i' => \$clip_r1,
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1053 'clip_R2=i' => \$clip_r2,
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1054 'three_prime_clip_R1=i' => \$three_prime_clip_r1,
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1055 'three_prime_clip_R2=i' => \$three_prime_clip_r2,
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1056 );
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1057
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1058 ### EXIT ON ERROR if there were errors with any of the supplied options
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1059 unless ($command_line){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1060 die "Please respecify command line options\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1061 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1062
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1063 ### HELPFILE
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1064 if ($help){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1065 print_helpfile();
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1066 exit;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1067 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1068
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1069
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1070
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1071
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1072 if ($version){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1073 print << "VERSION";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1074
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1075 Quality-/Adapter-/RRBS-Trimming
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1076 (powered by Cutadapt)
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1077 version $trimmer_version
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1078
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1079 Last update: 15 04 2014
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1080
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1081 VERSION
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1082 exit;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1083 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1084
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1085 ### RRBS
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1086 unless ($rrbs){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1087 $rrbs = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1088 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1089
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1090 ### SUPRESS WARNINGS
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1091 if (defined $suppress_warn){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1092 $DOWARN = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1093 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1094
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1095 ### QUALITY SCORES
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1096 my $phred_encoding;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1097 if ($phred33){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1098 if ($phred64){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1099 die "Please specify only a single quality encoding type (--phred33 or --phred64)\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1100 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1101 $phred_encoding = 33;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1102 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1103 elsif ($phred64){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1104 $phred_encoding = 64;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1105 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1106 unless ($phred33 or $phred64){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1107 warn "No quality encoding type selected. Assuming that the data provided uses Sanger encoded Phred scores (default)\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1108 $phred_encoding = 33;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1109 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1110 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1111
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1112 ### NON-DIRECTIONAL RRBS
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1113 if ($non_directional){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1114 unless ($rrbs){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1115 die "Option '--non_directional' requires '--rrbs' to be specified as well. Please re-specify!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1116 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1117 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1118 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1119 $non_directional = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1120 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1121
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1122 if ($fastqc_args){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1123 $fastqc = 1; # specifying fastqc extra arguments automatically means that FastQC will be executed
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1124 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1125 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1126 $fastqc_args = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1127 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1128
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1129 ### CUSTOM ERROR RATE
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1130 if (defined $error_rate){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1131 # make sure that the error rate is between 0 and 1
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1132 unless ($error_rate >= 0 and $error_rate <= 1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1133 die "Please specify an error rate between 0 and 1 (the default is 0.1)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1134 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1135 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1136 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1137 $error_rate = 0.1; # (default)
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1138 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1139
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1140 if (defined $adapter){
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1141 unless ($adapter =~ /^[ACTGNXactgnx]+$/){
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1142 die "Adapter sequence must contain DNA characters only (A,C,T,G or N)!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1143 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1144 $adapter = uc$adapter;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1145 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1146
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1147 if (defined $adapter2){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1148 unless ($validate){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1149 die "An optional adapter for read 2 of paired-end files requires '--paired' to be specified as well! Please re-specify\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1150 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1151 unless ($adapter2 =~ /^[ACTGNactgn]+$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1152 die "Optional adapter 2 sequence must contain DNA characters only (A,C,T,G or N)!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1153 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1154 $adapter2 = uc$adapter2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1155 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1156
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1157 ### LENGTH CUTOFF
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1158 unless (defined $length_cutoff){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1159 $length_cutoff = 20;
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1160 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1161
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1162 ### files are supposed to be paired-end files
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1163 if ($validate){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1164
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1165 # making sure that an even number of reads has been supplied
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1166 unless ((scalar@ARGV)%2 == 0){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1167 die "Please provide an even number of input files for paired-end FastQ trimming! Aborting ...\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1168 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1169
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1170 ## CUTOFF FOR VALIDATED READ-PAIRS
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1171 if (defined $length_read_1 or defined $length_read_2){
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1172
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1173 unless ($retain){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1174 die "Please specify --keep_unpaired to alter the unpaired single-end read length cut off(s)\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1175 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1176
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1177 if (defined $length_read_1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1178 unless ($length_read_1 >= 15 and $length_read_1 <= 100){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1179 die "Please select a sensible cutoff for when a read pair should be filtered out due to short length (allowed range: 15-100 bp)\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1180 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1181 unless ($length_read_1 > $length_cutoff){
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1182 die "The single-end unpaired read length needs to be longer than the paired-end cut-off value ($length_cutoff bp)\n\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1183 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1184 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1185
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1186 if (defined $length_read_2){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1187 unless ($length_read_2 >= 15 and $length_read_2 <= 100){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1188 die "Please select a sensible cutoff for when a read pair should be filtered out due to short length (allowed range: 15-100 bp)\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1189 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1190 unless ($length_read_2 > $length_cutoff){
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1191 die "The single-end unpaired read length needs to be longer than the paired-end cut-off value ($length_cutoff bp)\n\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1192 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1193 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1194 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1195
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1196 if ($retain){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1197 $length_read_1 = 35 unless (defined $length_read_1);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1198 $length_read_2 = 35 unless (defined $length_read_2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1199 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1200 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1201
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1202 unless ($no_report_file){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1203 $no_report_file = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1204 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1205
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1206 ### OUTPUT DIR PATH
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1207 if ($output_dir){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1208 unless ($output_dir =~ /\/$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1209 $output_dir =~ s/$/\//;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1210 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1211 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1212 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1213 $output_dir = '';
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1214 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1215
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1216 ### Trimming at the 5' end
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1217 if (defined $clip_r2){ # trimming 5' bases of read 1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1218 die "Clipping the 5' end of read 2 is only allowed for paired-end files (--paired)\n" unless ($validate);
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1219 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1220
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1221 if (defined $clip_r1){ # trimming 5' bases of read 1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1222 unless ($clip_r1 > 0 and $clip_r1 < 100){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1223 die "The 5' clipping value for read 1 should have a sensible value (> 0 and < read length)\n\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1224 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1225 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1226
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1227 if (defined $clip_r2){ # trimming 5' bases of read 2
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1228 unless ($clip_r2 > 0 and $clip_r2 < 100){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1229 die "The 5' clipping value for read 2 should have a sensible value (> 0 and < read length)\n\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1230 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1231 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1232
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1233 ### Trimming at the 3' end
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1234 if (defined $three_prime_clip_r1){ # trimming 3' bases of read 1
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1235 unless ($three_prime_clip_r1 > 0 and $three_prime_clip_r1 < 100){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1236 die "The 3' clipping value for read 1 should have a sensible value (> 0 and < read length)\n\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1237 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1238 }
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1239
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1240 if (defined $three_prime_clip_r2){ # trimming 3' bases of read 2
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1241 unless ($three_prime_clip_r2 > 0 and $three_prime_clip_r2 < 100){
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1242 die "The 3' clipping value for read 2 should have a sensible value (> 0 and < read length)\n\n";
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1243 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1244 }
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1245
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1246
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1247 return ($quality,$adapter,$stringency,$rrbs,$length_cutoff,$keep,$fastqc,$non_directional,$phred_encoding,$fastqc_args,$trim,$gzip,$validate,$retain,$length_read_1,$length_read_2,$adapter2,$error_rate,$output_dir,$no_report_file,$dont_gzip,$clip_r1,$clip_r2,$three_prime_clip_r1,$three_prime_clip_r2);
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1248 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1249
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1250
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1251
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1252
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1253 sub print_helpfile{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1254 print << "HELP";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1255
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1256 USAGE:
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1257
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1258 trim_galore [options] <filename(s)>
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1259
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1260
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1261 -h/--help Print this help message and exits.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1262
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1263 -v/--version Print the version information and exits.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1264
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1265 -q/--quality <INT> Trim low-quality ends from reads in addition to adapter removal. For
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1266 RRBS samples, quality trimming will be performed first, and adapter
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1267 trimming is carried in a second round. Other files are quality and adapter
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1268 trimmed in a single pass. The algorithm is the same as the one used by BWA
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1269 (Subtract INT from all qualities; compute partial sums from all indices
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1270 to the end of the sequence; cut sequence at the index at which the sum is
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1271 minimal). Default Phred score: 20.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1272
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1273 --phred33 Instructs Cutadapt to use ASCII+33 quality scores as Phred scores
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1274 (Sanger/Illumina 1.9+ encoding) for quality trimming. Default: ON.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1275
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1276 --phred64 Instructs Cutadapt to use ASCII+64 quality scores as Phred scores
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1277 (Illumina 1.5 encoding) for quality trimming.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1278
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1279 --fastqc Run FastQC in the default mode on the FastQ file once trimming is complete.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1280
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1281 --fastqc_args "<ARGS>" Passes extra arguments to FastQC. If more than one argument is to be passed
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1282 to FastQC they must be in the form "arg1 arg2 etc.". An example would be:
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1283 --fastqc_args "--nogroup --outdir /home/". Passing extra arguments will
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1284 automatically invoke FastQC, so --fastqc does not have to be specified
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1285 separately.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1286
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1287 -a/--adapter <STRING> Adapter sequence to be trimmed. If not specified explicitely, the first 13
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1288 bp of the Illumina adapter 'AGATCGGAAGAGC' will be used by default.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1289
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1290 -a2/--adapter2 <STRING> Optional adapter sequence to be trimmed off read 2 of paired-end files. This
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1291 option requires '--paired' to be specified as well.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1292
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1293
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1294 --stringency <INT> Overlap with adapter sequence required to trim a sequence. Defaults to a
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1295 very stringent setting of 1, i.e. even a single bp of overlapping sequence
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1296 will be trimmed off from the 3' end of any read.
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1297
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1298 -e <ERROR RATE> Maximum allowed error rate (no. of errors divided by the length of the matching
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1299 region) (default: 0.1)
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1300
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1301 --gzip Compress the output file with GZIP. If the input files are GZIP-compressed
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1302 the output files will automatically be GZIP compressed as well. As of v0.2.8 the
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1303 compression will take place on the fly.
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1304
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1305 --dont_gzip Output files won't be compressed with GZIP. This option overrides --gzip.
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1306
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1307 --length <INT> Discard reads that became shorter than length INT because of either
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1308 quality or adapter trimming. A value of '0' effectively disables
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1309 this behaviour. Default: 20 bp.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1310
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1311 For paired-end files, both reads of a read-pair need to be longer than
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1312 <INT> bp to be printed out to validated paired-end files (see option --paired).
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1313 If only one read became too short there is the possibility of keeping such
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1314 unpaired single-end reads (see --retain_unpaired). Default pair-cutoff: 20 bp.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1315
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1316 -o/--output_dir <DIR> If specified all output will be written to this directory instead of the current
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1317 directory.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1318
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1319 --no_report_file If specified no report file will be generated.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1320
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1321 --suppress_warn If specified any output to STDOUT or STDERR will be suppressed.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1322
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1323 --clip_R1 <int> Instructs Trim Galore to remove <int> bp from the 5' end of read 1 (or single-end
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1324 reads). This may be useful if the qualities were very poor, or if there is some
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1325 sort of unwanted bias at the 5' end. Default: OFF.
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1326
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1327 --clip_R2 <int> Instructs Trim Galore to remove <int> bp from the 5' end of read 2 (paired-end reads
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1328 only). This may be useful if the qualities were very poor, or if there is some sort
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1329 of unwanted bias at the 5' end. For paired-end BS-Seq, it is recommended to remove
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1330 the first few bp because the end-repair reaction may introduce a bias towards low
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1331 methylation. Please refer to the M-bias plot section in the Bismark User Guide for
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1332 some examples. Default: OFF.
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1333
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1334 --three_prime_clip_R1 <int> Instructs Trim Galore to remove <int> bp from the 3' end of read 1 (or single-end
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1335 reads) AFTER adapter/quality trimming has been performed. This may remove some unwanted
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1336 bias from the 3' end that is not directly related to adapter sequence or basecall quality.
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1337 Default: OFF.
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1338
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1339 --three_prime_clip_R2 <int> Instructs Trim Galore to remove <int> bp from the 3' end of read 2 AFTER
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1340 adapter/quality trimming has been performed. This may remove some unwanted bias from
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1341 the 3' end that is not directly related to adapter sequence or basecall quality.
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1342 Default: OFF.
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1343
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1344
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1345 RRBS-specific options (MspI digested material):
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1346
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1347 --rrbs Specifies that the input file was an MspI digested RRBS sample (recognition
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1348 site: CCGG). Sequences which were adapter-trimmed will have a further 2 bp
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1349 removed from their 3' end. This is to avoid that the filled-in C close to the
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1350 second MspI site in a sequence is used for methylation calls. Sequences which
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1351 were merely trimmed because of poor quality will not be shortened further.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1352
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1353 --non_directional Selecting this option for non-directional RRBS libraries will screen
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1354 quality-trimmed sequences for 'CAA' or 'CGA' at the start of the read
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1355 and, if found, removes the first two basepairs. Like with the option
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1356 '--rrbs' this avoids using cytosine positions that were filled-in
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1357 during the end-repair step. '--non_directional' requires '--rrbs' to
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1358 be specified as well.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1359
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1360 --keep Keep the quality trimmed intermediate file. Default: off, which means
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1361 the temporary file is being deleted after adapter trimming. Only has
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1362 an effect for RRBS samples since other FastQ files are not trimmed
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1363 for poor qualities separately.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1364
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1365
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1366 Note for RRBS using MseI:
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1367
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1368 If your DNA material was digested with MseI (recognition motif: TTAA) instead of MspI it is NOT necessary
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1369 to specify --rrbs or --non_directional since virtually all reads should start with the sequence
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1370 'TAA', and this holds true for both directional and non-directional libraries. As the end-repair of 'TAA'
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1371 restricted sites does not involve any cytosines it does not need to be treated especially. Instead, simply
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1372 run Trim Galore! in the standard (i.e. non-RRBS) mode.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1373
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1374
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1375 Paired-end specific options:
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1376
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1377 --paired This option performs length trimming of quality/adapter/RRBS trimmed reads for
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1378 paired-end files. To pass the validation test, both sequences of a sequence pair
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1379 are required to have a certain minimum length which is governed by the option
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1380 --length (see above). If only one read passes this length threshold the
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1381 other read can be rescued (see option --retain_unpaired). Using this option lets
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1382 you discard too short read pairs without disturbing the sequence-by-sequence order
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1383 of FastQ files which is required by many aligners.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1384
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1385 Trim Galore! expects paired-end files to be supplied in a pairwise fashion, e.g.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1386 file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz ... .
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1387
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1388 -t/--trim1 Trims 1 bp off every read from its 3' end. This may be needed for FastQ files that
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1389 are to be aligned as paired-end data with Bowtie. This is because Bowtie (1) regards
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1390 alignments like this:
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1391
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1392 R1 ---------------------------> or this: -----------------------> R1
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1393 R2 <--------------------------- <----------------- R2
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1394
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1395 as invalid (whenever a start/end coordinate is contained within the other read).
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1396
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1397 --retain_unpaired If only one of the two paired-end reads became too short, the longer
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1398 read will be written to either '.unpaired_1.fq' or '.unpaired_2.fq'
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1399 output files. The length cutoff for unpaired single-end reads is
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1400 governed by the parameters -r1/--length_1 and -r2/--length_2. Default: OFF.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1401
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1402 -r1/--length_1 <INT> Unpaired single-end read length cutoff needed for read 1 to be written to
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1403 '.unpaired_1.fq' output file. These reads may be mapped in single-end mode.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1404 Default: 35 bp.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1405
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1406 -r2/--length_2 <INT> Unpaired single-end read length cutoff needed for read 2 to be written to
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1407 '.unpaired_2.fq' output file. These reads may be mapped in single-end mode.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1408 Default: 35 bp.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1409
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1410
4
2c1f0fe810f7 Uploaded
bgruening
parents: 1
diff changeset
1411 Last modified on 16 July 2014.
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1412
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1413 HELP
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1414 exit;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1415 }