annotate trim_galore @ 1:898db63d2e84 draft

upgrade to new version
author bgruening
date Wed, 17 Jul 2013 15:05:43 -0400
parents 3c1664caa8e3
children 2c1f0fe810f7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1 #!/usr/bin/perl
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
2 use strict;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
3 use warnings;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
4 use Getopt::Long;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
5 use IPC::Open3;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
6 use File::Spec;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
7 use File::Basename;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
8 use Cwd;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
9
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
10 ## This program is Copyright (C) 2012-13, Felix Krueger (felix.krueger@babraham.ac.uk)
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
11
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
12 ## This program is free software: you can redistribute it and/or modify
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
13 ## it under the terms of the GNU General Public License as published by
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
14 ## the Free Software Foundation, either version 3 of the License, or
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
15 ## (at your option) any later version.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
16
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
17 ## This program is distributed in the hope that it will be useful,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
18 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
19 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
20 ## GNU General Public License for more details.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
21
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
22 ## You should have received a copy of the GNU General Public License
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
23 ## along with this program. If not, see <http://www.gnu.org/licenses/>.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
24
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
25
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
26
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
27 ## this script is taking in FastQ sequences and trims them with Cutadapt
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
28 ## last modified on 10 April 2013
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
29
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
30 ########################################################################
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
31
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
32 # change these paths if needed
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
33
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
34 my $path_to_cutadapt = 'cutadapt';
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
35 my $path_to_fastqc = 'fastqc';
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
36
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
37 ########################################################################
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
38
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
39
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
40 my $trimmer_version = '0.2.8';
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
41 my $DOWARN = 1; # print on screen warning and text by default
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
42 BEGIN { $SIG{'__WARN__'} = sub { warn $_[0] if $DOWARN } };
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
43
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
44 my ($cutoff,$adapter,$stringency,$rrbs,$length_cutoff,$keep,$fastqc,$non_directional,$phred_encoding,$fastqc_args,$trim,$gzip,$validate,$retain,$length_read_1,$length_read_2,$a2,$error_rate,$output_dir,$no_report_file,$dont_gzip,$clip_r1,$clip_r2) = process_commandline();
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
45
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
46 my @filenames = @ARGV;
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
47
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
48 die "\nPlease provide the filename(s) of one or more FastQ file(s) to launch Trim Galore!\n
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
49 USAGE: 'trim_galore [options] <filename(s)>' or 'trim_galore --help' for more options\n\n" unless (@filenames);
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
50
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
51
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
52 ### SETTING DEFAULTS UNLESS THEY WERE SPECIFIED
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
53 unless (defined $cutoff){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
54 $cutoff = 20;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
55 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
56 my $phred_score_cutoff = $cutoff; # only relevant for report
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
57
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
58 unless (defined $adapter){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
59 $adapter = 'AGATCGGAAGAGC';
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
60 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
61 unless (defined $a2){ # optional adapter for the second read in a pair. Only works for --paired trimming
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
62 $a2 = '';
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
63 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
64
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
65 unless (defined $stringency){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
66 $stringency = 1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
67 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
68
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
69 unless (defined $length_cutoff){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
70 $length_cutoff = 20;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
71 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
72
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
73 if ($phred_encoding == 64){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
74 $cutoff += 31;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
75 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
76
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
77 my $file_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
78 my $file_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
79
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
80 foreach my $filename (@ARGV){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
81 trim ($filename);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
82 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
83
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
84
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
85 sub trim{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
86 my $filename = shift;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
87
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
88 my $output_filename = (split (/\//,$filename))[-1];
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
89 # warn "Here is the outputfile name: $output_filename\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
90
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
91 my $report = $output_filename;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
92 $report =~ s/$/_trimming_report.txt/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
93
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
94 if ($no_report_file) {
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
95 $report = File::Spec->devnull;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
96 open (REPORT,'>',$report) or die "Failed to write to file: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
97 # warn "Redirecting report output to /dev/null\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
98 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
99 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
100 open (REPORT,'>',$output_dir.$report) or die "Failed to write to file: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
101 warn "Writing report to '$output_dir$report'\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
102 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
103
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
104 warn "\nSUMMARISING RUN PARAMETERS\n==========================\nInput filename: $filename\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
105 print REPORT "\nSUMMARISING RUN PARAMETERS\n==========================\nInput filename: $filename\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
106
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
107 warn "Quality Phred score cutoff: $phred_score_cutoff\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
108 print REPORT "Quality Phred score cutoff: $phred_score_cutoff\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
109
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
110 warn "Quality encoding type selected: ASCII+$phred_encoding\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
111 print REPORT "Quality encoding type selected: ASCII+$phred_encoding\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
112
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
113 warn "Adapter sequence: '$adapter'\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
114 print REPORT "Adapter sequence: '$adapter'\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
115
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
116 if ($error_rate == 0.1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
117 warn "Maximum trimming error rate: $error_rate (default)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
118 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
119 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
120 warn "Maximum trimming error rate: $error_rate\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
121 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
122
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
123 print REPORT "Maximum trimming error rate: $error_rate";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
124 if ($error_rate == 0.1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
125 print REPORT " (default)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
126 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
127 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
128 print REPORT "\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
129 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
130
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
131 if ($a2){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
132 warn "Optional adapter 2 sequence (only used for read 2 of paired-end files): '$a2'\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
133 print REPORT "Optional adapter 2 sequence (only used for read 2 of paired-end files): '$a2'\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
134 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
135
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
136 warn "Minimum required adapter overlap (stringency): $stringency bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
137 print REPORT "Minimum required adapter overlap (stringency): $stringency bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
138
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
139 if ($validate){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
140 warn "Minimum required sequence length for both reads before a sequence pair gets removed: $length_cutoff bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
141 print REPORT "Minimum required sequence length for both reads before a sequence pair gets removed: $length_cutoff bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
142 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
143 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
144 warn "Minimum required sequence length before a sequence gets removed: $length_cutoff bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
145 print REPORT "Minimum required sequence length before a sequence gets removed: $length_cutoff bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
146 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
147
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
148 if ($validate){ # only for paired-end files
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
149
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
150 if ($retain){ # keeping single-end reads if only one end is long enough
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
151
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
152 if ($length_read_1 == 35){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
153 warn "Length cut-off for read 1: $length_read_1 bp (default)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
154 print REPORT "Length cut-off for read 1: $length_read_1 bp (default)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
155 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
156 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
157 warn "Length cut-off for read 1: $length_read_1 bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
158 print REPORT "Length cut-off for read 1: $length_read_1 bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
159 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
160
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
161 if ($length_read_2 == 35){
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
162 warn "Length cut-off for read 2: $length_read_2 bb (default)\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
163 print REPORT "Length cut-off for read 2: $length_read_2 bp (default)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
164 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
165 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
166 warn "Length cut-off for read 2: $length_read_2 bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
167 print REPORT "Length cut-off for read 2: $length_read_2 bp\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
168 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
169 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
170 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
171
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
172 if ($rrbs){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
173 warn "File was specified to be an MspI-digested RRBS sample. Sequences with adapter contamination will be trimmed a further 2 bp to remove potential methylation-biased bases from the end-repair reaction\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
174 print REPORT "File was specified to be an MspI-digested RRBS sample. Sequences with adapter contamination will be trimmed a further 2 bp to remove potential methylation-biased bases from the end-repair reaction\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
175 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
176
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
177 if ($non_directional){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
178 warn "File was specified to be a non-directional MspI-digested RRBS sample. Sequences starting with either 'CAA' or 'CGA' will have the first 2 bp trimmed off to remove potential methylation-biased bases from the end-repair reaction\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
179 print REPORT "File was specified to be a non-directional MspI-digested RRBS sample. Sequences starting with either 'CAA' or 'CGA' will have the first 2 bp trimmed off to remove potential methylation-biased bases from the end-repair reaction\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
180 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
181
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
182 if ($trim){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
183 warn "All sequences will be trimmed by 1 bp on their 3' end to avoid problems with invalid paired-end alignments with Bowtie 1\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
184 print REPORT "All sequences will be trimmed by 1 bp on their 3' end to avoid problems with invalid paired-end alignments with Bowtie 1\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
185 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
186
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
187 if ($clip_r1){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
188 warn "All Read 1 sequences will be trimmed by $clip_r1 bp from their 5' end to avoid poor qualities or biases\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
189 print REPORT "All Read 1 sequences will be trimmed by $clip_r1 bp from their 5' end to avoid poor qualities or biases\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
190 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
191 if ($clip_r2){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
192 warn "All Read 2 sequences will be trimmed by $clip_r2 bp from their 5' end to avoid poor qualities or biases (e.g. M-bias for BS-Seq applications)\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
193 print REPORT "All Read 2 sequences will be trimmed by $clip_r2 bp from their 5' end to avoid poor qualities or biases (e.g. M-bias for BS-Seq applications)\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
194 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
195
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
196
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
197 if ($fastqc){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
198 warn "Running FastQC on the data once trimming has completed\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
199 print REPORT "Running FastQC on the data once trimming has completed\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
200
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
201 if ($fastqc_args){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
202 warn "Running FastQC with the following extra arguments: '$fastqc_args'\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
203 print REPORT "Running FastQC with the following extra arguments: $fastqc_args\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
204 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
205 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
206
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
207 if ($keep and $rrbs){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
208 warn "Keeping quality trimmed (but not yet adapter trimmed) intermediate FastQ file\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
209 print REPORT "Keeping quality trimmed (but not yet adapter trimmed) intermediate FastQ file\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
210 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
211
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
212
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
213 if ($gzip or $filename =~ /\.gz$/){
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
214 $gzip = 1;
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
215 unless ($dont_gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
216 warn "Output file(s) will be GZIP compressed\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
217 print REPORT "Output file will be GZIP compressed\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
218 }
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
219 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
220
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
221 warn "\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
222 print REPORT "\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
223 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
224
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
225 my $temp;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
226
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
227 ### Proceeding differently for RRBS and other type of libraries
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
228 if ($rrbs){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
229
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
230 ### Skipping quality filtering for RRBS libraries if a quality cutoff of 0 was specified
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
231 if ($cutoff == 0){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
232 warn "Quality cutoff selected was 0 - Skipping quality trimming altogether\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
233 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
234 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
235 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
236
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
237 $temp = $filename;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
238 $temp =~ s/$/_qual_trimmed.fastq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
239 open (TEMP,'>',$output_dir.$temp) or die "Can't write to $temp: $!";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
240
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
241 warn " >>> Now performing adaptive quality trimming with a Phred-score cutoff of: $cutoff <<<\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
242 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
243
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
244 open (QUAL,"$path_to_cutadapt -f fastq -e $error_rate -q $cutoff -a X $filename |") or die "Can't open pipe to Cutadapt: $!";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
245
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
246 my $qual_count = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
247
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
248 while (1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
249 my $l1 = <QUAL>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
250 my $seq = <QUAL>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
251 my $l3 = <QUAL>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
252 my $qual = <QUAL>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
253 last unless (defined $qual);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
254
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
255 $qual_count++;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
256 if ($qual_count%10000000 == 0){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
257 warn "$qual_count sequences processed\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
258 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
259 print TEMP "$l1$seq$l3$qual";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
260 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
261
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
262 warn "\n >>> Quality trimming completed <<<\n$qual_count sequences processed in total\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
263 close QUAL or die "Unable to close filehandle: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
264 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
265
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
266 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
267 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
268
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
269
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
270 if ($output_filename =~ /\.fastq$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
271 $output_filename =~ s/\.fastq$/_trimmed.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
272 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
273 elsif ($output_filename =~ /\.fastq\.gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
274 $output_filename =~ s/\.fastq\.gz$/_trimmed.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
275 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
276 elsif ($output_filename =~ /\.fq$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
277 $output_filename =~ s/\.fq$/_trimmed.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
278 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
279 elsif ($output_filename =~ /\.fq\.gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
280 $output_filename =~ s/\.fq\.gz$/_trimmed.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
281 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
282 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
283 $output_filename =~ s/$/_trimmed.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
284 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
285
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
286 if ($gzip or $filename =~ /\.gz$/){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
287 unless ($dont_gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
288 if ($validate){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
289 open (OUT,'>',$output_dir.$output_filename) or die "Can't open $output_filename: $!\n"; # don't need to gzip intermediate file
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
290 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
291 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
292 $output_filename .= '.gz';
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
293 open (OUT,"| gzip -c - > ${output_dir}${output_filename}") or die "Can't write to $output_filename: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
294 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
295 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
296 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
297 open (OUT,'>',$output_dir.$output_filename) or die "Can't open $output_filename: $!\n"; # don't need to gzip intermediate file
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
298 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
299 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
300 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
301 open (OUT,'>',$output_dir.$output_filename) or die "Can't open $output_filename: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
302 }
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
303 warn "Writing final adapter and quality trimmed output to $output_filename\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
304
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
305 my $count = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
306 my $too_short = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
307 my $quality_trimmed = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
308 my $rrbs_trimmed = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
309 my $rrbs_trimmed_start = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
310 my $CAA = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
311 my $CGA = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
312
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
313 if ($rrbs and $cutoff != 0){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
314
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
315 ### optionally using 2 different adapters for read 1 and read 2
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
316 if ($validate and $a2){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
317 ### Figure out whether current file counts as read 1 or read 2 of paired-end files
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
318 if ( scalar(@filenames)%2 == 0){ # this is read 1 of a pair
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
319 warn "\n >>> Now performing adapter trimming for the adapter sequence: '$adapter' from file $temp <<< \n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
320 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
321 open3 (\*WRITER, \*TRIM, \*ERROR,"$path_to_cutadapt -f fastq -e $error_rate -O $stringency -a $adapter $output_dir$temp") or die "Failed to launch Cutadapt: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
322 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
323 else{ # this is read 2 of a pair
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
324 warn "\n >>> Now performing adapter trimming for the adapter sequence: '$a2' from file $temp <<< \n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
325 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
326 open3 (\*WRITER, \*TRIM, \*ERROR,"$path_to_cutadapt -f fastq -e $error_rate -O $stringency -a $a2 $output_dir$temp") or die "Failed to launch Cutadapt: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
327 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
328 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
329 ### Using the same adapter for both read 1 and read 2
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
330 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
331 warn "\n >>> Now performing adapter trimming for the adapter sequence: '$adapter' from file $temp <<< \n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
332 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
333 open3 (\*WRITER, \*TRIM, \*ERROR,"$path_to_cutadapt -f fastq -e $error_rate -O $stringency -a $adapter $output_dir$temp") or die "Failed to launch Cutadapt: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
334 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
335
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
336 close WRITER or die $!; # not needed
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
337
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
338 open (QUAL,"$output_dir$temp") or die $!; # quality trimmed file
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
339
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
340 if ($filename =~ /\.gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
341 open (IN,"zcat $filename |") or die $!; # original, untrimmed file
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
342 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
343 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
344 open (IN,$filename) or die $!; # original, untrimmed file
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
345 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
346
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
347 while (1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
348
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
349 # we can process the output from Cutadapt and the original input 1 by 1 to decide if the adapter has been removed or not
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
350 my $l1 = <TRIM>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
351 my $seq = <TRIM>; # adapter trimmed sequence
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
352 my $l3 = <TRIM>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
353 my $qual = <TRIM>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
354
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
355 $_ = <IN>; # irrelevant
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
356 my $original_seq = <IN>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
357 $_ = <IN>; # irrelevant
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
358 $_ = <IN>; # irrelevant
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
359
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
360 $_ = <QUAL>; # irrelevant
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
361 my $qual_trimmed_seq = <QUAL>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
362 $_ = <QUAL>; # irrelevant
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
363 my $qual_trimmed_qual = <QUAL>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
364
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
365 last unless (defined $qual and defined $qual_trimmed_qual); # could be empty strings
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
366
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
367 $count++;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
368 if ($count%10000000 == 0){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
369 warn "$count sequences processed\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
370 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
371
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
372 chomp $seq;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
373 chomp $qual;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
374 chomp $qual_trimmed_seq;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
375 chomp $original_seq;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
376
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
377 my $quality_trimmed_seq_length = length $qual_trimmed_seq;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
378
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
379 if (length $original_seq > length $qual_trimmed_seq){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
380 ++$quality_trimmed;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
381 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
382
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
383 my $nd = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
384
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
385 ### NON-DIRECTIONAL RRBS
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
386 if ($non_directional){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
387 if (length$seq > 2){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
388 if ($seq =~ /^CAA/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
389 ++$CAA;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
390 $seq = substr ($seq,2,length($seq)-2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
391 $qual = substr ($qual,2,length($qual)-2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
392 ++$rrbs_trimmed_start;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
393 $nd = 1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
394 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
395 elsif ($seq =~ /^CGA/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
396 $seq = substr ($seq,2,length($seq)-2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
397 $qual = substr ($qual,2,length($qual)-2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
398 ++$CGA;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
399 ++$rrbs_trimmed_start;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
400 $nd = 1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
401 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
402 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
403 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
404
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
405 ### directional read
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
406 unless ($nd == 1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
407 if (length $seq >= 2 and length$seq < $quality_trimmed_seq_length){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
408 $seq = substr ($seq,0,length($seq)-2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
409 $qual = substr ($qual,0,length($qual)-2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
410 ++$rrbs_trimmed;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
411 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
412 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
413
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
414 ### Shortening all sequences by 1 bp on the 3' end
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
415 if ($trim){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
416 $seq = substr($seq,0,length($seq)-1);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
417 $qual = substr($qual,0,length($qual)-1);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
418 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
419
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
420 ### PRINTING (POTENTIALLY TRIMMED) SEQUENCE
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
421 if ($validate){ # printing the sequence without performing a length check (this is performed for the read pair separately later)
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
422 print OUT "$l1$seq\n$l3$qual\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
423 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
424 else{ # single end
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
425
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
426 if ($clip_r1){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
427 $seq = substr($seq,$clip_r1); # starting after the sequences to be trimmed until the end of the sequence
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
428 $qual = substr($qual,$clip_r1);
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
429 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
430
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
431 if (length $seq < $length_cutoff){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
432 ++$too_short;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
433 next;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
434 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
435 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
436 print OUT "$l1$seq\n$l3$qual\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
437 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
438 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
439 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
440
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
441 print REPORT "\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
442 while (<ERROR>){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
443 warn $_;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
444 print REPORT $_;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
445 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
446
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
447 close IN or die "Unable to close IN filehandle: $!";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
448 close QUAL or die "Unable to close QUAL filehandle: $!";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
449 close TRIM or die "Unable to close TRIM filehandle: $!";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
450 close OUT or die "Unable to close OUT filehandle: $!";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
451 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
452 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
453
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
454 ### optionally using 2 different adapters for read 1 and read 2
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
455 if ($validate and $a2){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
456 ### Figure out whether current file counts as read 1 or read 2 of paired-end files
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
457 if ( scalar(@filenames)%2 == 0){ # this is read 1 of a pair
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
458 warn "\n >>> Now performing quality (cutoff $cutoff) and adapter trimming in a single pass for the adapter sequence: '$adapter' from file $filename <<< \n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
459 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
460 open3 (\*WRITER, \*TRIM, \*ERROR, "$path_to_cutadapt -f fastq -e $error_rate -q $cutoff -O $stringency -a $adapter $filename") or die "Failed to launch Cutadapt: $!";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
461 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
462 else{ # this is read 2 of a pair
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
463 warn "\n >>> Now performing quality (cutoff $cutoff) and adapter trimming in a single pass for the adapter sequence: '$a2' from file $filename <<< \n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
464 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
465 open3 (\*WRITER, \*TRIM, \*ERROR, "$path_to_cutadapt -f fastq -e $error_rate -q $cutoff -O $stringency -a $a2 $filename") or die "Failed to launch Cutadapt: $!";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
466 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
467 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
468 ### Using the same adapter for both read 1 and read 2
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
469 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
470 warn "\n >>> Now performing quality (cutoff $cutoff) and adapter trimming in a single pass for the adapter sequence: '$adapter' from file $filename <<< \n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
471 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
472 open3 (\*WRITER, \*TRIM, \*ERROR, "$path_to_cutadapt -f fastq -e $error_rate -q $cutoff -O $stringency -a $adapter $filename") or die "Failed to launch Cutadapt: $!";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
473 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
474
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
475 close WRITER or die $!; # not needed
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
476
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
477 while (1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
478
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
479 my $l1 = <TRIM>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
480 my $seq = <TRIM>; # quality and/or adapter trimmed sequence
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
481 my $l3 = <TRIM>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
482 my $qual = <TRIM>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
483 # print "$l1$seq\n$l3$qual\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
484 last unless (defined $qual); # could be an empty string
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
485
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
486 $count++;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
487 if ($count%10000000 == 0){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
488 warn "$count sequences processed\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
489 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
490
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
491 chomp $seq;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
492 chomp $qual;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
493
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
494 ### Shortening all sequences by 1 bp on the 3' end
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
495 if ($trim){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
496 $seq = substr($seq,0,length($seq)-1);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
497 $qual = substr($qual,0,length($qual)-1);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
498 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
499
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
500 ### PRINTING (POTENTIALLY TRIMMED) SEQUENCE
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
501 if ($validate){ # printing the sequence without performing a length check (this is performed for the read pair separately later)
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
502 print OUT "$l1$seq\n$l3$qual\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
503 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
504 else{ # single end
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
505
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
506 if ($clip_r1){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
507 $seq = substr($seq,$clip_r1); # starting after the sequences to be trimmed until the end of the sequence
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
508 $qual = substr($qual,$clip_r1);
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
509 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
510
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
511 if (length $seq < $length_cutoff){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
512 ++$too_short;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
513 next;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
514 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
515 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
516 print OUT "$l1$seq\n$l3$qual\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
517 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
518 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
519 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
520
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
521 print REPORT "\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
522 while (<ERROR>){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
523 warn $_;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
524 print REPORT $_;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
525 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
526
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
527 close TRIM or die "Unable to close TRIM filehandle: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
528 close ERROR or die "Unable to close ERROR filehandle: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
529 close OUT or die "Unable to close OUT filehandle: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
530
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
531 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
532
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
533 if ($rrbs){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
534 unless ($keep){ # keeping the quality trimmed intermediate file for RRBS files
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
535
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
536 # deleting temporary quality trimmed file
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
537 my $deleted = unlink "$output_dir$temp";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
538
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
539 if ($deleted){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
540 warn "Successfully deleted temporary file $temp\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
541 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
542 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
543 warn "Could not delete temporary file $temp";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
544 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
545 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
546 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
547
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
548 warn "\nRUN STATISTICS FOR INPUT FILE: $filename\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
549 print REPORT "\nRUN STATISTICS FOR INPUT FILE: $filename\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
550
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
551 warn "="x 45,"\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
552 print REPORT "="x 45,"\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
553
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
554 warn "$count sequences processed in total\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
555 print REPORT "$count sequences processed in total\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
556
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
557 ### only reporting this separately if quality and adapter trimming were performed separately
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
558 if ($rrbs){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
559 my $percentage_shortened = sprintf ("%.1f",$quality_trimmed/$count*100);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
560 warn "Sequences were truncated to a varying degree because of deteriorating qualities (Phred score quality cutoff: $cutoff):\t$quality_trimmed ($percentage_shortened%)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
561 print REPORT "Sequences were truncated to a varying degree because of deteriorating qualities (Phred score quality cutoff: $cutoff):\t$quality_trimmed ($percentage_shortened%)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
562 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
563
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
564 my $percentage_too_short = sprintf ("%.1f",$too_short/$count*100);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
565 warn "Sequences removed because they became shorter than the length cutoff of $length_cutoff bp:\t$too_short ($percentage_too_short%)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
566 print REPORT "Sequences removed because they became shorter than the length cutoff of $length_cutoff bp:\t$too_short ($percentage_too_short%)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
567
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
568 if ($rrbs){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
569 my $percentage_rrbs_trimmed = sprintf ("%.1f",$rrbs_trimmed/$count*100);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
570 warn "RRBS reads trimmed by additional 2 bp when adapter contamination was detected:\t$rrbs_trimmed ($percentage_rrbs_trimmed%)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
571 print REPORT "RRBS reads trimmed by additional 2 bp when adapter contamination was detected:\t$rrbs_trimmed ($percentage_rrbs_trimmed%)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
572 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
573
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
574 if ($non_directional){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
575 my $percentage_rrbs_trimmed_at_start = sprintf ("%.1f",$rrbs_trimmed_start/$count*100);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
576 warn "RRBS reads trimmed by 2 bp at the start when read started with CAA ($CAA) or CGA ($CGA) in total:\t$rrbs_trimmed_start ($percentage_rrbs_trimmed_at_start%)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
577 print REPORT "RRBS reads trimmed by 2 bp at the start when read started with CAA ($CAA) or CGA ($CGA) in total:\t$rrbs_trimmed_start ($percentage_rrbs_trimmed_at_start%)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
578 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
579
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
580 warn "\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
581 print REPORT "\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
582
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
583 ### RUNNING FASTQC unless we are dealing with paired-end files
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
584 unless($validate){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
585 if ($fastqc){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
586 warn "\n >>> Now running FastQC on the data <<<\n\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
587 sleep (5);
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
588 if ($fastqc_args){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
589 system ("$path_to_fastqc $fastqc_args $output_dir$output_filename");
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
590 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
591 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
592 system ("$path_to_fastqc $output_dir$output_filename");
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
593 }
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
594 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
595 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
596
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
597 ### VALIDATE PAIRED-END FILES
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
598 if ($validate){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
599
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
600 ### Figure out whether current file counts as read 1 or read 2 of paired-end files
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
601
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
602 if ( scalar(@filenames)%2 == 0){ # this is read 1 of a pair
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
603 $file_1 = $output_filename;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
604 shift @filenames;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
605 # warn "This is read 1: $file_1\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
606 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
607 else{ # this is read 2 of a pair
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
608 $file_2 = $output_filename;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
609 shift @filenames;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
610 # warn "This is read 2: $file_2\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
611 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
612
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
613 if ($file_1 and $file_2){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
614 warn "Validate paired-end files $file_1 and $file_2\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
615 sleep (1);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
616
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
617 my ($val_1,$val_2,$un_1,$un_2) = validate_paired_end_files($file_1,$file_2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
618
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
619 ### RUNNING FASTQC
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
620 if ($fastqc){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
621
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
622 warn "\n >>> Now running FastQC on the validated data $val_1<<<\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
623 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
624
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
625 if ($fastqc_args){
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
626 system ("$path_to_fastqc $fastqc_args $output_dir$val_1");
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
627 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
628 else{
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
629 system ("$path_to_fastqc $output_dir$val_1");
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
630 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
631
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
632 warn "\n >>> Now running FastQC on the validated data $val_2<<<\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
633 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
634
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
635 if ($fastqc_args){
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
636 system ("$path_to_fastqc $fastqc_args $output_dir$val_2");
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
637 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
638 else{
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
639 system ("$path_to_fastqc $output_dir$val_2");
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
640 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
641
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
642 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
643
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
644 warn "Deleting both intermediate output files $file_1 and $file_2\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
645 unlink "$output_dir$file_1";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
646 unlink "$output_dir$file_2";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
647
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
648 warn "\n",'='x100,"\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
649 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
650
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
651 $file_1 = undef; # setting file_1 and file_2 to undef once validation is completed
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
652 $file_2 = undef;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
653 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
654 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
655
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
656 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
657
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
658 sub validate_paired_end_files{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
659
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
660 my $file_1 = shift;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
661 my $file_2 = shift;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
662
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
663 warn "file_1: $file_1, file_2: $file_2\n\n";
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
664
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
665 if ($file_1 =~ /\.gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
666 open (IN1,"zcat $output_dir$file_1 |") or die "Couldn't read from file $file_1: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
667 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
668 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
669 open (IN1, "$output_dir$file_1") or die "Couldn't read from file $file_1: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
670 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
671
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
672 if ($file_2 =~ /\.gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
673 open (IN2,"zcat $output_dir$file_2 |") or die "Couldn't read from file $file_2: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
674 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
675 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
676 open (IN2, "$output_dir$file_2") or die "Couldn't read from file $file_2: $!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
677 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
678
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
679 warn "\n>>>>> Now validing the length of the 2 paired-end infiles: $file_1 and $file_2 <<<<<\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
680 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
681
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
682 my $out_1 = $file_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
683 my $out_2 = $file_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
684
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
685 if ($out_1 =~ /gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
686 $out_1 =~ s/trimmed\.fq\.gz$/val_1.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
687 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
688 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
689 $out_1 =~ s/trimmed\.fq$/val_1.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
690 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
691
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
692 if ($out_2 =~ /gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
693 $out_2 =~ s/trimmed\.fq\.gz$/val_2.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
694 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
695 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
696 $out_2 =~ s/trimmed\.fq$/val_2.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
697 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
698
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
699 if ($gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
700 if ($dont_gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
701 open (R1,'>',$output_dir.$out_1) or die "Couldn't write to $out_1 $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
702 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
703 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
704 $out_1 .= '.gz';
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
705 open (R1,"| gzip -c - > ${output_dir}${out_1}") or die "Can't write to $out_1: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
706 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
707 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
708 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
709 open (R1,'>',$output_dir.$out_1) or die "Couldn't write to $out_1 $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
710 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
711
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
712 if ($gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
713 if ($dont_gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
714 open (R2,'>',$output_dir.$out_2) or die "Couldn't write to $out_2 $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
715 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
716 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
717 $out_2 .= '.gz';
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
718 open (R2,"| gzip -c - > ${output_dir}${out_2}") or die "Can't write to $out_2: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
719 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
720 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
721 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
722 open (R2,'>',$output_dir.$out_2) or die "Couldn't write to $out_2 $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
723 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
724
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
725 warn "Writing validated paired-end read 1 reads to $out_1\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
726 warn "Writing validated paired-end read 2 reads to $out_2\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
727
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
728 my $unpaired_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
729 my $unpaired_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
730
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
731 if ($retain){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
732
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
733 $unpaired_1 = $file_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
734 $unpaired_2 = $file_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
735
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
736 if ($unpaired_1 =~ /gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
737 $unpaired_1 =~ s/trimmed\.fq\.gz$/unpaired_1.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
738 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
739 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
740 $unpaired_1 =~ s/trimmed\.fq$/unpaired_1.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
741 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
742
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
743 if ($unpaired_2 =~ /gz$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
744 $unpaired_2 =~ s/trimmed\.fq\.gz$/unpaired_2.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
745 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
746 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
747 $unpaired_2 =~ s/trimmed\.fq$/unpaired_2.fq/;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
748 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
749
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
750 if ($gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
751 if ($dont_gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
752 open (UNPAIRED1,'>',$output_dir.$unpaired_1) or die "Couldn't write to $unpaired_1: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
753 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
754 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
755 $unpaired_1 .= '.gz';
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
756 open (UNPAIRED1,"| gzip -c - > ${output_dir}${unpaired_1}") or die "Can't write to $unpaired_1: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
757 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
758 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
759 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
760 open (UNPAIRED1,'>',$output_dir.$unpaired_1) or die "Couldn't write to $unpaired_1: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
761 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
762
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
763 if ($gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
764 if ($dont_gzip){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
765 open (UNPAIRED2,'>',$output_dir.$unpaired_2) or die "Couldn't write to $unpaired_2: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
766 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
767 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
768 $unpaired_2 .= '.gz';
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
769 open (UNPAIRED2,"| gzip -c - > ${output_dir}${unpaired_2}") or die "Can't write to $unpaired_2: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
770 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
771 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
772 else{
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
773 open (UNPAIRED2,'>',$output_dir.$unpaired_2) or die "Couldn't write to $unpaired_2: $!\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
774 }
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
775
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
776 warn "Writing unpaired read 1 reads to $unpaired_1\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
777 warn "Writing unpaired read 2 reads to $unpaired_2\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
778 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
779
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
780 my $sequence_pairs_removed = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
781 my $read_1_printed = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
782 my $read_2_printed = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
783
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
784 my $count = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
785
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
786 while (1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
787 my $id_1 = <IN1>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
788 my $seq_1 = <IN1>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
789 my $l3_1 = <IN1>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
790 my $qual_1 = <IN1>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
791 last unless ($id_1 and $seq_1 and $l3_1 and $qual_1);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
792
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
793 my $id_2 = <IN2>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
794 my $seq_2 = <IN2>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
795 my $l3_2 = <IN2>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
796 my $qual_2 = <IN2>;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
797 last unless ($id_2 and $seq_2 and $l3_2 and $qual_2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
798
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
799 ++$count;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
800
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
801
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
802 ## small check if the sequence files appear to be FastQ files
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
803 if ($count == 1){ # performed just once
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
804 if ($id_1 !~ /^\@/ or $l3_1 !~ /^\+/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
805 die "Input file doesn't seem to be in FastQ format at sequence $count\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
806 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
807 if ($id_2 !~ /^\@/ or $l3_2 !~ /^\+/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
808 die "Input file doesn't seem to be in FastQ format at sequence $count\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
809 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
810 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
811
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
812 chomp $seq_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
813 chomp $seq_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
814
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
815 if ($clip_r1){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
816 $seq_1 = substr($seq_1,$clip_r1); # starting after the sequences to be trimmed until the end of the sequence
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
817 $qual_1 = substr($qual_1,$clip_r1);
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
818 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
819 if ($clip_r2){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
820 $seq_2 = substr($seq_2,$clip_r2); # starting after the sequences to be trimmed until the end of the sequence
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
821 $qual_2 = substr($qual_2,$clip_r2);
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
822 }
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
823
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
824 ### making sure that the reads do have a sensible length
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
825 if ( (length($seq_1) < $length_cutoff) or (length($seq_2) < $length_cutoff) ){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
826 ++$sequence_pairs_removed;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
827 if ($retain){ # writing out single-end reads if they are longer than the cutoff
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
828
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
829 if ( length($seq_1) >= $length_read_1){ # read 1 is long enough
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
830 print UNPAIRED1 $id_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
831 print UNPAIRED1 "$seq_1\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
832 print UNPAIRED1 $l3_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
833 print UNPAIRED1 $qual_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
834 ++$read_1_printed;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
835 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
836
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
837 if ( length($seq_2) >= $length_read_2){ # read 2 is long enough
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
838 print UNPAIRED2 $id_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
839 print UNPAIRED2 "$seq_2\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
840 print UNPAIRED2 $l3_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
841 print UNPAIRED2 $qual_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
842 ++$read_2_printed;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
843 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
844
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
845 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
846 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
847 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
848 print R1 $id_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
849 print R1 "$seq_1\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
850 print R1 $l3_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
851 print R1 $qual_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
852
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
853 print R2 $id_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
854 print R2 "$seq_2\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
855 print R2 $l3_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
856 print R2 $qual_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
857 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
858
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
859 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
860 my $percentage = sprintf("%.2f",$sequence_pairs_removed/$count*100);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
861 warn "Total number of sequences analysed: $count\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
862 warn "Number of sequence pairs removed: $sequence_pairs_removed ($percentage%)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
863
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
864 print REPORT "Total number of sequences analysed for the sequence pair length validation: $count\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
865 print REPORT "Number of sequence pairs removed because at least one read was shorter than the length cutoff ($length_cutoff bp): $sequence_pairs_removed ($percentage%)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
866
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
867 if ($keep){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
868 warn "Number of unpaired read 1 reads printed: $read_1_printed\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
869 warn "Number of unpaired read 2 reads printed: $read_2_printed\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
870 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
871
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
872 close R1 or die $!;
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
873 close R2 or die $!;
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
874
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
875 if ($retain){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
876 close UNPAIRED1 or die $!;
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
877 close UNPAIRED2 or die $!;
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
878 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
879
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
880 warn "\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
881 if ($retain){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
882 return ($out_1,$out_2,$unpaired_1,$unpaired_2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
883 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
884 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
885 return ($out_1,$out_2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
886 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
887 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
888
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
889 sub process_commandline{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
890 my $help;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
891 my $quality;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
892 my $adapter;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
893 my $adapter2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
894 my $stringency;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
895 my $report;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
896 my $version;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
897 my $rrbs;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
898 my $length_cutoff;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
899 my $keep;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
900 my $fastqc;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
901 my $non_directional;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
902 my $phred33;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
903 my $phred64;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
904 my $fastqc_args;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
905 my $trim;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
906 my $gzip;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
907 my $validate;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
908 my $retain;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
909 my $length_read_1;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
910 my $length_read_2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
911 my $error_rate;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
912 my $output_dir;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
913 my $no_report_file;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
914 my $suppress_warn;
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
915 my $dont_gzip;
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
916 my $clip_r1;
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
917 my $clip_r2;
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
918
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
919 my $command_line = GetOptions ('help|man' => \$help,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
920 'q|quality=i' => \$quality,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
921 'a|adapter=s' => \$adapter,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
922 'a2|adapter2=s' => \$adapter2,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
923 'report' => \$report,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
924 'version' => \$version,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
925 'stringency=i' => \$stringency,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
926 'fastqc' => \$fastqc,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
927 'RRBS' => \$rrbs,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
928 'keep' => \$keep,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
929 'length=i' => \$length_cutoff,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
930 'non_directional' => \$non_directional,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
931 'phred33' => \$phred33,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
932 'phred64' => \$phred64,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
933 'fastqc_args=s' => \$fastqc_args,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
934 'trim1' => \$trim,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
935 'gzip' => \$gzip,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
936 'paired_end' => \$validate,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
937 'retain_unpaired' => \$retain,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
938 'length_1|r1=i' => \$length_read_1,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
939 'length_2|r2=i' => \$length_read_2,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
940 'e|error_rate=s' => \$error_rate,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
941 'o|output_dir=s' => \$output_dir,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
942 'no_report_file' => \$no_report_file,
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
943 'suppress_warn' => \$suppress_warn,
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
944 'dont_gzip' => \$dont_gzip,
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
945 'clip_R1=i' => \$clip_r1,
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
946 'clip_R2=i' => \$clip_r2,
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
947 );
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
948
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
949
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
950 ### EXIT ON ERROR if there were errors with any of the supplied options
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
951 unless ($command_line){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
952 die "Please respecify command line options\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
953 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
954
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
955 ### HELPFILE
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
956 if ($help){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
957 print_helpfile();
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
958 exit;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
959 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
960
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
961
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
962
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
963
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
964 if ($version){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
965 print << "VERSION";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
966
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
967 Quality-/Adapter-/RRBS-Trimming
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
968 (powered by Cutadapt)
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
969 version $trimmer_version
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
970
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
971 Last update: 10 04 2013
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
972
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
973 VERSION
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
974 exit;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
975 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
976
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
977 ### RRBS
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
978 unless ($rrbs){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
979 $rrbs = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
980 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
981
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
982 ### SUPRESS WARNINGS
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
983 if (defined $suppress_warn){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
984 $DOWARN = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
985 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
986
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
987 ### QUALITY SCORES
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
988 my $phred_encoding;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
989 if ($phred33){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
990 if ($phred64){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
991 die "Please specify only a single quality encoding type (--phred33 or --phred64)\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
992 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
993 $phred_encoding = 33;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
994 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
995 elsif ($phred64){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
996 $phred_encoding = 64;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
997 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
998 unless ($phred33 or $phred64){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
999 warn "No quality encoding type selected. Assuming that the data provided uses Sanger encoded Phred scores (default)\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1000 $phred_encoding = 33;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1001 sleep (3);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1002 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1003
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1004 ### NON-DIRECTIONAL RRBS
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1005 if ($non_directional){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1006 unless ($rrbs){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1007 die "Option '--non_directional' requires '--rrbs' to be specified as well. Please re-specify!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1008 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1009 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1010 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1011 $non_directional = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1012 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1013
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1014 if ($fastqc_args){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1015 $fastqc = 1; # specifying fastqc extra arguments automatically means that FastQC will be executed
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1016 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1017 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1018 $fastqc_args = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1019 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1020
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1021 ### CUSTOM ERROR RATE
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1022 if (defined $error_rate){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1023 # make sure that the error rate is between 0 and 1
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1024 unless ($error_rate >= 0 and $error_rate <= 1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1025 die "Please specify an error rate between 0 and 1 (the default is 0.1)\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1026 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1027 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1028 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1029 $error_rate = 0.1; # (default)
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1030 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1031
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1032 if (defined $adapter){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1033 unless ($adapter =~ /^[ACTGNactgn]+$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1034 die "Adapter sequence must contain DNA characters only (A,C,T,G or N)!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1035 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1036 $adapter = uc$adapter;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1037 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1038
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1039 if (defined $adapter2){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1040 unless ($validate){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1041 die "An optional adapter for read 2 of paired-end files requires '--paired' to be specified as well! Please re-specify\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1042 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1043 unless ($adapter2 =~ /^[ACTGNactgn]+$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1044 die "Optional adapter 2 sequence must contain DNA characters only (A,C,T,G or N)!\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1045 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1046 $adapter2 = uc$adapter2;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1047 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1048
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1049 ### files are supposed to be paired-end files
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1050 if ($validate){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1051
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1052 # making sure that an even number of reads has been supplied
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1053 unless ((scalar@ARGV)%2 == 0){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1054 die "Please provide an even number of input files for paired-end FastQ trimming! Aborting ...\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1055 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1056
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1057 ## CUTOFF FOR VALIDATED READ-PAIRS
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1058
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1059 if (defined $length_read_1 or defined $length_read_2){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1060 unless ($retain){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1061 die "Please specify --keep_unpaired to alter the unpaired single-end read length cut off(s)\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1062 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1063
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1064 if (defined $length_read_1){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1065 unless ($length_read_1 >= 15 and $length_read_1 <= 100){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1066 die "Please select a sensible cutoff for when a read pair should be filtered out due to short length (allowed range: 15-100 bp)\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1067 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1068 unless ($length_read_1 > $length_cutoff){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1069 die "The single-end unpaired read length needs to be longer than the paired-end cut-off value\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1070 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1071 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1072
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1073 if (defined $length_read_2){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1074 unless ($length_read_2 >= 15 and $length_read_2 <= 100){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1075 die "Please select a sensible cutoff for when a read pair should be filtered out due to short length (allowed range: 15-100 bp)\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1076 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1077 unless ($length_read_2 > $length_cutoff){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1078 die "The single-end unpaired read length needs to be longer than the paired-end cut-off value\n\n";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1079 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1080 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1081 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1082
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1083 if ($retain){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1084 $length_read_1 = 35 unless (defined $length_read_1);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1085 $length_read_2 = 35 unless (defined $length_read_2);
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1086 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1087 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1088
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1089 unless ($no_report_file){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1090 $no_report_file = 0;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1091 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1092
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1093 ### OUTPUT DIR PATH
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1094 if ($output_dir){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1095 unless ($output_dir =~ /\/$/){
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1096 $output_dir =~ s/$/\//;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1097 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1098 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1099 else{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1100 $output_dir = '';
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1101 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1102
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1103 ### Trimming at the 5' end
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1104 if (defined $clip_r2){ # trimming 5' bases of read 1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1105 die "Clipping the 5' end of read 2 is only allowed for paired-end files (--paired)\n" unless ($validate);
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1106 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1107
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1108 if (defined $clip_r1){ # trimming 5' bases of read 1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1109 unless ($clip_r1 > 0 and $clip_r1 < 100){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1110 die "The 5' clipping value for read 1 should have a sensible value (> 0 and < read length)\n\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1111 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1112 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1113
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1114 if (defined $clip_r2){ # trimming 5' bases of read 2
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1115 unless ($clip_r2 > 0 and $clip_r2 < 100){
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1116 die "The 5' clipping value for read 2 should have a sensible value (> 0 and < read length)\n\n";
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1117 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1118 }
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1119
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1120
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1121 return ($quality,$adapter,$stringency,$rrbs,$length_cutoff,$keep,$fastqc,$non_directional,$phred_encoding,$fastqc_args,$trim,$gzip,$validate,$retain,$length_read_1,$length_read_2,$adapter2,$error_rate,$output_dir,$no_report_file,$dont_gzip,$clip_r1,$clip_r2);
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1122 }
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1123
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1124
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1125
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1126
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1127 sub print_helpfile{
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1128 print << "HELP";
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1129
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1130 USAGE:
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1131
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1132 trim_galore [options] <filename(s)>
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1133
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1134
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1135 -h/--help Print this help message and exits.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1136
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1137 -v/--version Print the version information and exits.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1138
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1139 -q/--quality <INT> Trim low-quality ends from reads in addition to adapter removal. For
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1140 RRBS samples, quality trimming will be performed first, and adapter
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1141 trimming is carried in a second round. Other files are quality and adapter
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1142 trimmed in a single pass. The algorithm is the same as the one used by BWA
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1143 (Subtract INT from all qualities; compute partial sums from all indices
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1144 to the end of the sequence; cut sequence at the index at which the sum is
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1145 minimal). Default Phred score: 20.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1146
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1147 --phred33 Instructs Cutadapt to use ASCII+33 quality scores as Phred scores
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1148 (Sanger/Illumina 1.9+ encoding) for quality trimming. Default: ON.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1149
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1150 --phred64 Instructs Cutadapt to use ASCII+64 quality scores as Phred scores
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1151 (Illumina 1.5 encoding) for quality trimming.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1152
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1153 --fastqc Run FastQC in the default mode on the FastQ file once trimming is complete.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1154
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1155 --fastqc_args "<ARGS>" Passes extra arguments to FastQC. If more than one argument is to be passed
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1156 to FastQC they must be in the form "arg1 arg2 etc.". An example would be:
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1157 --fastqc_args "--nogroup --outdir /home/". Passing extra arguments will
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1158 automatically invoke FastQC, so --fastqc does not have to be specified
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1159 separately.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1160
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1161 -a/--adapter <STRING> Adapter sequence to be trimmed. If not specified explicitely, the first 13
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1162 bp of the Illumina adapter 'AGATCGGAAGAGC' will be used by default.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1163
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1164 -a2/--adapter2 <STRING> Optional adapter sequence to be trimmed off read 2 of paired-end files. This
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1165 option requires '--paired' to be specified as well.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1166
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1167
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1168 -s/--stringency <INT> Overlap with adapter sequence required to trim a sequence. Defaults to a
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1169 very stringent setting of '1', i.e. even a single bp of overlapping sequence
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1170 will be trimmed of the 3' end of any read.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1171
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1172 -e <ERROR RATE> Maximum allowed error rate (no. of errors divided by the length of the matching
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1173 region) (default: 0.1)
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1174
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1175 --gzip Compress the output file with GZIP. If the input files are GZIP-compressed
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1176 the output files will automatically be GZIP compressed as well. As of v0.2.8 the
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1177 compression will take place on the fly.
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1178
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1179 --dont_gzip Output files won't be compressed with GZIP. This option overrides --gzip.
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1180
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1181 --length <INT> Discard reads that became shorter than length INT because of either
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1182 quality or adapter trimming. A value of '0' effectively disables
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1183 this behaviour. Default: 20 bp.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1184
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1185 For paired-end files, both reads of a read-pair need to be longer than
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1186 <INT> bp to be printed out to validated paired-end files (see option --paired).
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1187 If only one read became too short there is the possibility of keeping such
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1188 unpaired single-end reads (see --retain_unpaired). Default pair-cutoff: 20 bp.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1189
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1190 -o/--output_dir <DIR> If specified all output will be written to this directory instead of the current
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1191 directory.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1192
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1193 --no_report_file If specified no report file will be generated.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1194
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1195 --suppress_warn If specified any output to STDOUT or STDERR will be suppressed.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1196
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1197 --clip_R1 <int> Instructs Trim Galore to remove <int> bp from the 5' end of read 1 (or single-end
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1198 reads). This may be useful if the qualities were very poor, or if there is some
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1199 sort of unwanted bias at the 5' end. Default: OFF.
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1200
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1201 --clip_R2 <int> Instructs Trim Galore to remove <int> bp from the 5' end of read 2 (paired-end reads
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1202 only). This may be useful if the qualities were very poor, or if there is some sort
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1203 of unwanted bias at the 5' end. For paired-end BS-Seq, it is recommended to remove
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1204 the first few bp because the end-repair reaction may introduce a bias towards low
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1205 methylation. Please refer to the M-bias plot section in the Bismark User Guide for
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1206 some examples. Default: OFF.
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1207
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1208
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1209
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1210 RRBS-specific options (MspI digested material):
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1211
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1212 --rrbs Specifies that the input file was an MspI digested RRBS sample (recognition
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1213 site: CCGG). Sequences which were adapter-trimmed will have a further 2 bp
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1214 removed from their 3' end. This is to avoid that the filled-in C close to the
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1215 second MspI site in a sequence is used for methylation calls. Sequences which
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1216 were merely trimmed because of poor quality will not be shortened further.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1217
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1218 --non_directional Selecting this option for non-directional RRBS libraries will screen
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1219 quality-trimmed sequences for 'CAA' or 'CGA' at the start of the read
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1220 and, if found, removes the first two basepairs. Like with the option
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1221 '--rrbs' this avoids using cytosine positions that were filled-in
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1222 during the end-repair step. '--non_directional' requires '--rrbs' to
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1223 be specified as well.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1224
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1225 --keep Keep the quality trimmed intermediate file. Default: off, which means
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1226 the temporary file is being deleted after adapter trimming. Only has
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1227 an effect for RRBS samples since other FastQ files are not trimmed
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1228 for poor qualities separately.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1229
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1230
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1231 Note for RRBS using MseI:
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1232
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1233 If your DNA material was digested with MseI (recognition motif: TTAA) instead of MspI it is NOT necessary
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1234 to specify --rrbs or --non_directional since virtually all reads should start with the sequence
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1235 'TAA', and this holds true for both directional and non-directional libraries. As the end-repair of 'TAA'
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1236 restricted sites does not involve any cytosines it does not need to be treated especially. Instead, simply
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1237 run Trim Galore! in the standard (i.e. non-RRBS) mode.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1238
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1239
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1240 Paired-end specific options:
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1241
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1242 --paired This option performs length trimming of quality/adapter/RRBS trimmed reads for
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1243 paired-end files. To pass the validation test, both sequences of a sequence pair
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1244 are required to have a certain minimum length which is governed by the option
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1245 --length (see above). If only one read passes this length threshold the
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1246 other read can be rescued (see option --retain_unpaired). Using this option lets
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1247 you discard too short read pairs without disturbing the sequence-by-sequence order
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1248 of FastQ files which is required by many aligners.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1249
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1250 Trim Galore! expects paired-end files to be supplied in a pairwise fashion, e.g.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1251 file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz ... .
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1252
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1253 -t/--trim1 Trims 1 bp off every read from its 3' end. This may be needed for FastQ files that
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1254 are to be aligned as paired-end data with Bowtie. This is because Bowtie (1) regards
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1255 alignments like this:
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1256
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1257 R1 ---------------------------> or this: -----------------------> R1
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1258 R2 <--------------------------- <----------------- R2
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1259
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1260 as invalid (whenever a start/end coordinate is contained within the other read).
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1261
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1262 --retain_unpaired If only one of the two paired-end reads became too short, the longer
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1263 read will be written to either '.unpaired_1.fq' or '.unpaired_2.fq'
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1264 output files. The length cutoff for unpaired single-end reads is
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1265 governed by the parameters -r1/--length_1 and -r2/--length_2. Default: OFF.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1266
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1267 -r1/--length_1 <INT> Unpaired single-end read length cutoff needed for read 1 to be written to
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1268 '.unpaired_1.fq' output file. These reads may be mapped in single-end mode.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1269 Default: 35 bp.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1270
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1271 -r2/--length_2 <INT> Unpaired single-end read length cutoff needed for read 2 to be written to
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1272 '.unpaired_2.fq' output file. These reads may be mapped in single-end mode.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1273 Default: 35 bp.
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1274
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1275
1
898db63d2e84 upgrade to new version
bgruening
parents: 0
diff changeset
1276 Last modified on 15 July 2013.
0
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1277
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1278 HELP
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1279 exit;
3c1664caa8e3 Uploaded
bgruening
parents:
diff changeset
1280 }