annotate Paired_fastQ_trimmer.pl @ 1:fdbcc1aa4f01 draft

Uploaded
author geert-vandeweyer
date Thu, 13 Feb 2014 08:21:15 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1 #!/usr/bin/perl
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
2
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
3 # load modules
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
4 use Getopt::Std;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
5 use threads;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
6 use Thread::Queue;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
7 use threads::shared;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
8 use File::Basename;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
9 use List::Util qw(max);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
10
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
11 $now = time ;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
12
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
13 $|++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
14 # variables
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
15 my $paired :shared;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
16 $paired = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
17 my $fq :shared;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
18 my $rq :shared;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
19 my $ofq :shared;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
20 my $orq :shared;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
21 my $trimq :shared;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
22 my $failedfile :shared;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
23 my $done :shared;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
24 $done = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
25 my $nrin :shared;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
26 $nrin = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
27
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
28 my $rqz :shared;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
29 my $fqz :shared;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
30 my $rand :shared;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
31 my $discarded :shared;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
32 $discarded = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
33 my $verbose :shared;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
34 $verbose = 1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
35 my $trim3 :shared;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
36 $trim3 = 1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
37 my $trim5 :shared;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
38 $trim5 = 1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
39 # create file lock in the tmp dir
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
40 $rand = int(rand(10000));
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
41 while (-e "/tmp/$rand.lock") {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
42 $rand = int(rand(10000));
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
43 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
44 system("touch '/tmp/$rand.lock'");
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
45 if (!-e "/tmp/sg.write.lock") {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
46 system("touch '/tmp/sg.write.lock'");
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
47 system("chmod 777 '/tmp/sg.write.lock'");
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
48 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
49
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
50 # opts
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
51 # i : path to forward read FASTQ file
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
52 # I : path to reverse read FASTQ file
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
53 # q : quality threshold in phred.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
54 # n : string representing common read name parts
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
55 # s : trimming style : simple/bwa
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
56 # o : output file for forward reads
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
57 # O : output file for reverse reads
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
58 # F : output file with fully failed reads pairs.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
59 # v : verbose: defaults to on (0/1)
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
60 # S : Trim on side: 5/3/b(oth)
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
61 # m : minimal length.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
62 getopts('i:I:q:n:s:o:O:F:v:S:m:', \%opts) ;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
63
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
64
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
65 if (defined($opts{'v'})) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
66 $verbose = $opts{'v'};
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
67 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
68
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
69 if (defined($opts{'s'})) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
70 if ($opts{'s'} eq 'bwa') {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
71 if ($verbose == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
72 print "Using BWA style trimming\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
73 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
74 $style = 'bwa';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
75 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
76 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
77 if ($verbose == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
78 print "Using simple 1bp Quality trimming\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
79 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
80 $style = 'simple';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
81 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
82 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
83 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
84 if ($verbose == 1){
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
85 print "Using simple 1bp Quality trimming\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
86 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
87 $style = 'simple';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
88 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
89
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
90 if (!defined($opts{'i'})) { die('Forward reads fastq is mandatory');}
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
91 $fq = $opts{'i'};
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
92 $ofq = $opts{'o'};
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
93
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
94 ## gzipped infile?
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
95 my $out = `file $fq`;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
96 if ($out =~ m/gzip compressed/) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
97 if ($verbose == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
98 print "Forward reads in gzipped format.\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
99 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
100 $fqz = 1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
101 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
102 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
103 if ($verbose == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
104 print "Forward reads in plain fastq format.\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
105 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
106 $fqz = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
107 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
108
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
109
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
110 if (defined($opts{'I'})) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
111 if ($verbose == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
112 print "Reverse reads provided. Performing paired-end trimming\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
113 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
114 $paired = 1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
115 $rq = $opts{'I'};
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
116 $orq = $opts{'O'};
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
117 # gzipped ?
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
118 my $out = `file $rq`;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
119 if ($out =~ m/gzip compressed/) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
120 if ($verbose == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
121 print "Reverse reads in gzipped format.\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
122 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
123 $rqz = 1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
124 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
125 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
126 if ($verbose == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
127 print "Reverse reads in plain fastq format.\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
128 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
129 $rqz = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
130 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
131 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
132 ## FAILED FILENAME
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
133 $failedfile = $opts{'F'};
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
134
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
135 ## set the sides to trim
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
136 my $trimside = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
137 if (defined($opts{'S'})) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
138 if ($opts{'S'} eq '3') {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
139 $trimside = "Trimming reads from the 3' end only\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
140 $trim3 = 1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
141 $trim5 = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
142 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
143 elsif ($opts{'S'} eq '5') {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
144 $trimside = "Trimming reads from the 5' end only\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
145 $trim3 = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
146 $trim5 = 1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
147 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
148 elsif ($opts{'S'} eq 'b') {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
149 $trimside = "Trimming reads both from 5' and 3'\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
150 $trim3 = 1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
151 $trim5 = 1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
152 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
153 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
154 $trimside = "Invalid trimming side specification. Setting to both sides trimming\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
155 $trim3 = 1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
156 $trim5 = 1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
157 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
158 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
159 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
160 $trimside = "Invalid trimming side specification. Setting to both sides trimming\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
161 $trim3 = 1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
162 $trim5 = 1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
163 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
164 if ($verbose == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
165 print $trimside;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
166 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
167
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
168 # SET MINIMAL LENGTH
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
169 my $minlength :shared;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
170 if (exists($opts{'m'})) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
171 $minlength = $opts{'m'};
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
172 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
173 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
174 $minlength = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
175 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
176
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
177 ## CHECK IF OUTPUT FILESNAMES ARE PRESENT
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
178 if ($ofq eq '') {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
179 $ofq = "$fq.$style.trimmed";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
180 if ($verbose == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
181 print "No Forward output name specified. Forward reads will be printed to:\n\t$ofq\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
182 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
183 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
184 if ($paired == 1 && $orq eq '') {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
185 $orq = "$rq.$style.trimmed";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
186 if ($verbose == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
187 print "No Reverse output name specified. Reverse reads will be printed to:\n\t$orq\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
188 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
189 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
190 if ($failedfile eq '') {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
191 $failedfile = "$fq.Failed_Reads";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
192 if ($verbose == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
193 print "No failed ouput name specified. Failed reads will be printed to:\n\t$failedfile\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
194 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
195 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
196
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
197 ## set trimming threshold
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
198 if (!defined($opts{'q'}) or !($opts{'q'} =~ m/^[0-9]+$/)) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
199 $trimq = 20;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
200 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
201 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
202 $trimq = $opts{'q'};
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
203 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
204
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
205 ## set read delimiter
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
206 if (defined($opts{'n'})) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
207 $indelim = $opts{'n'};
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
208 # galaxy replaced @ by __at__
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
209 if (substr($indelim,0,6) eq '__at__') {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
210 $indelim = '@'.substr($indelim,6);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
211 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
212 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
213 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
214 print "No indelim defined\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
215 $indelim = "@"; # has risk, fails if @ is first qual item
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
216 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
217
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
218 ## NEW : REPLACE if indelim = @ by the first 5 chars of first line of forward-fastq.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
219 if ($indelim eq '@') {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
220 if ($fqz == 0) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
221 open IN, $fq;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
222 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
223 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
224 open(IN, "gunzip -c $fq | ");
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
225 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
226 $head = <IN>;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
227 $indelim = substr($head,0,5);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
228 print "\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
229 print "WARNING:\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
230 print "########\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
231 print " Setting Delimiter as '\@' is prone to errors\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
232 print " as it will incorrectly split if '\@' is the firt value in the quality string !!\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
233 print ' It is recommended to use eg -n \'\@SRR301\''."\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
234 print " Therefore, the delimiter was changed to the first 5 characters of the first forward fastq file.\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
235 print " => Delimiter : $indelim\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
236 print "\n\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
237 sleep 3;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
238 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
239 ## create queues
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
240 my $totrim = Thread::Queue->new();
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
241 my $toprint = Thread::Queue->new();
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
242 my $dummy = Thread::Queue->new();
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
243 my $failed = Thread::Queue->new();
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
244
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
245 # monitor thread
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
246 my $finish :shared;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
247 $finish = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
248 $mon = threads->create('monitor');
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
249
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
250 # start FASTQ reading thread
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
251 if ($verbose == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
252 print "start reading file\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
253 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
254 $reading = threads->create('readfile');
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
255
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
256 ## give time to build reading buffer
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
257 sleep 10;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
258
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
259 # start trimming threads
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
260 if ($verbose == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
261 print "start trimming\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
262 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
263 if ($paired == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
264 if ($style eq 'bwa') {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
265 $trimming1 = threads->create('bwapaired');
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
266 $trimming2 = threads->create('bwapaired');
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
267
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
268 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
269 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
270 $trimming1 = threads->create('simplepaired');
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
271 $trimming2 = threads->create('simplepaired');
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
272 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
273
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
274 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
275 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
276 if ($style eq 'bwa') {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
277 #print "not available yet, switching to simple trimming\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
278 #$style = 'simple';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
279 $trimming1 = threads->create('bwasingle');
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
280 $trimming2 = threads->create('bwasingle');
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
281
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
282 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
283 if ($style eq 'simple') {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
284 $trimming1 = threads->create('simplesingle');
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
285 $trimming2 = threads->create('simplesingle');
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
286 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
287 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
288 # start printing threads
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
289 $printout = threads->create('printout');
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
290 $printfail = threads->create('printfailed');
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
291 # end threads
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
292 $reading->join();
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
293 ## all reads are queued for trimming, add undef to trimming queues to end threads.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
294 $totrim->enqueue(undef);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
295 $totrim->enqueue(undef);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
296 $trimming1->join();
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
297 $trimming2->join();
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
298 ## all reads are trimmed and queued from printing, send undef to end printing threads.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
299 $toprint->enqueue(undef);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
300 $failed->enqueue(undef);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
301 $printout->join();
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
302 $printfail->join();
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
303 if ($verbose == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
304 print "Waiting for status monitor to shut down\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
305 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
306 $mon->join();
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
307 ## copy files from /tmp to destination folder
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
308 if ($verbose == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
309 print "Copying files to destination: Forward..";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
310 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
311 if ($fqz == 0) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
312 system("cp -f /tmp/$rand.fq $ofq");
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
313 unlink("/tmp/$rand.fq");
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
314 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
315 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
316 system("gzip -c /tmp/$rand.fq > $ofq");
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
317 unlink("/tmp/$rand.fq");
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
318 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
319 if ($paired == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
320 if ($verbose == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
321 print " Reverse..";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
322 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
323 if ($rqz == 0) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
324 system("cp -f /tmp/$rand.rq $orq");
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
325 unlink("/tmp/$rand.rq");
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
326 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
327 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
328 system("gzip -c /tmp/$rand.rq > $orq");
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
329 unlink("/tmp/$rand.rq");
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
330 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
331 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
332
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
333 if ($verbose == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
334 print " Failed items..";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
335 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
336 system("cp -f /tmp/$rand.failed $failedfile");
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
337 unlink("/tmp/$rand.failed");
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
338 if ($verbose == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
339 print " Done\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
340 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
341
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
342 # remove lock file
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
343 unlink("/tmp/$rand.lock");
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
344
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
345 ########################
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
346 ## print run details: ##
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
347 ########################
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
348 my $string = sprintf("%.2f",$done/1000000)."M Reads in (each) FASTQ file processed.\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
349 if ($paired == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
350 $string .= sprintf ("%.2f",($done-$discarded)/1000000)."M read pairs in output FASTQ's.\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
351 $string .= sprintf("%.2f",$discarded/1000000)."M reads pairs discarded\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
352 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
353 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
354 $string .= sprintf ("%.2f",($done-$discarded)/1000000)."M reads in output FASTQ.\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
355 $string .= sprintf("%.2f",$discarded/1000000)."M reads discarded\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
356 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
357 print $string;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
358 ##################
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
359 # PRINT RUN-TIME #
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
360 ##################
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
361 $now = time - $now;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
362 printf("\n\nRunning time:%02d:%02d:%02d\n",int($now/3600),int(($now % 3600)/60),int($now % 60));
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
363
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
364
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
365 exit();
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
366
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
367
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
368
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
369 ## SUBROUTINE : READ FASTQ FILES
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
370 ################################
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
371 sub readfile {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
372 if ($paired == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
373 if ($fqz == 0) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
374 open IN1, $fq;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
375 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
376 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
377 open(IN1, "gunzip -c $fq | ");
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
378 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
379 if ($rqz == 0) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
380 open IN2, $rq;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
381 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
382 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
383 open(IN2, "gunzip -c $rq | ");
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
384 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
385 $/ = "\n$indelim";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
386 $deliml = length($indelim);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
387 # compose pack ignore string
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
388 my $dx = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
389 for (my $idx = 0; $idx<length($indelim); $idx++) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
390 $dx .= 'X';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
391 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
392 my $f = <IN1>;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
393 my $r = <IN2>;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
394 # the reads contain the delimiter on the end (cf \n on regular file read line endings.)
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
395 # so we strip teh delimiter using pack, leaving just the \n.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
396 # this means that in subsequent reads, we need to add the delimiter to the front of the read.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
397 my $read = substr(pack("A*$dx",$f),$deliml) . $r; #pack("A*$dx",$r) ;#. '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
398 my $counter = 1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
399 my $fr = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
400 my $rr = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
401 my $reads = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
402 my $lastf = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
403 while ($fr = <IN1>) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
404 $rr = <IN2>;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
405 $lastf = $fr;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
406 ## add delim to front, trim from back. add our delim.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
407 ## done in next iteration, because last is different.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
408 # the delimiter from the forward read is used to complete the reverse read.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
409 $reads .= $indelim.pack("A*$dx",$read) .'|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
410 # concat reads
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
411 $read = $fr.$rr;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
412 ## add string to queue if 20,000 reads in array
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
413 if ($counter == 20000) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
414 $nrin = $nrin + 20000;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
415 $totrim->enqueue(pack("A*X",$reads)); # pack("A*X") strips last |
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
416 $reads = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
417 $counter = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
418 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
419 $counter++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
420
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
421 ## dummy (see monitor).
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
422 my $d = $dummy->dequeue_nb();
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
423 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
424 # last reads in files need special care.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
425 $reads .= $indelim.$lastf.$indelim.$rr;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
426 close IN1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
427 close IN2;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
428 # submit last reads to queue
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
429 if ($reads ne '') {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
430 #$totrim->enqueue(pack("A*X",$reads));
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
431 $totrim->enqueue($reads);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
432 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
433 if ($verbose == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
434 print "All reads queued for trimming\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
435 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
436 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
437 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
438 if ($fqz == 0) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
439 open IN1, $fq;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
440 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
441 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
442 open(IN1, "gunzip -c $fq | ");
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
443 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
444 $/ = "\n$indelim";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
445 $deliml = length($indelim);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
446 # compose pack ignore string
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
447 my $dx = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
448 for (my $idx = 0; $idx<length($indelim); $idx++) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
449 $dx .= 'X';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
450 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
451 my $f = <IN1>;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
452 # the reads contain the delimiter on the end (cf \n on regular file read line endings.)
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
453 # so we strip teh delimiter using pack, leaving just the \n.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
454 # this means that in subsequent reads, we need to add the delimiter to the front of the read.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
455 my $read = substr($f,$deliml);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
456 my $counter = 1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
457 my $fr = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
458 my $reads = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
459 while ($fr = <IN1>) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
460 ## add delim to front, trim from back. add our delim.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
461 ## done in next iteration, because last is different.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
462 $reads .= $indelim.pack("A*$dx",$read) .'|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
463 # concat reads
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
464 $read = $fr;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
465 ## add string to queue if 10,000 reads in array
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
466 if ($counter == 40000) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
467 $nrin = $nrin + 40000;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
468 $totrim->enqueue(pack("A*X",$reads));
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
469 $reads = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
470 $counter = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
471 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
472 $counter++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
473
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
474 ## dummy (see monitor).
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
475 my $d = $dummy->dequeue_nb();
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
476 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
477 $reads .= $indelim.$read;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
478 close IN1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
479 # submit last reads to queue
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
480 if ($reads ne '') {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
481 #$totrim->enqueue(pack("A*X",$reads));
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
482 $totrim->enqueue($reads);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
483 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
484 if ($verbose == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
485 print "All reads queued for trimming\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
486 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
487
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
488 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
489 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
490
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
491 sub bwapaired {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
492 my $tq = $trimq;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
493 while ( defined(my $rstring = $totrim->dequeue())) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
494 my @reads = split(/\|/,$rstring); # each item has 20.000 reads
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
495 my $printstring = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
496 my $failedstring = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
497 foreach (@reads) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
498 my $read = $_;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
499 my ($h1a, $s1,$h1b,$q1,$h2a,$s2,$h2b,$q2) = split(/\n/,$read);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
500 ## skip too short reads
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
501 if (length($q1) < $minlength || length($q2) < $minlength) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
502 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
503 $failedstring .= $read ;#. '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
504 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
505 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
506 # initial check for maximal quality value.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
507 ## quality arrays
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
508 my @q1array = unpack("C*",$q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
509 if (max(@q1array) < $tq + 33) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
510 #print "Discarding on failed forward qualities: ".max(@q1array). " vs $tq + 33)\n$read\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
511 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
512 $failedstring .= $read ;#. '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
513 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
514 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
515 my @q2array = unpack("C*",$q2);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
516 if (max(@q2array) < $trimq + 33) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
517 #print "Discarding on failed reverse qualities\n$read\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
518 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
519 $failedstring .= $read ;#. '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
520 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
521 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
522
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
523 #############
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
524 ## Trim 3' ##
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
525 #############
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
526 if ($trim3 == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
527 #trim first
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
528 my $pos = length($q1) - 1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
529 my $maxPos = $pos;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
530 ## skip empty read.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
531 #if ($pos <= 0) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
532 # $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
533 # $failedstring .= $read . '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
534 # #print "skip empty : \n$read\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
535 # next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
536 #}
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
537 my $area = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
538 my $maxArea = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
539 ## unpack qual-string to array of phred scores
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
540 #my @qarray = unpack("C*",$q1); # use from initial check.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
541 while ($pos>0 && $area>=0) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
542 $area += $tq - ($q1array[($pos)] - 33);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
543 ## not <= for more aggressive trimming : if multiple equal maxima => use shortest read.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
544 if ($area < $maxArea) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
545 $pos--;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
546 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
547 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
548 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
549 $maxArea = $area;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
550 $maxPos = $pos;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
551 $pos--;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
552 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
553 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
554 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
555 if ($maxPos==0) { # scanned whole read and didn't integrate to maximum? skip (no output for this read)
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
556 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
557 $failedstring .= $read ;#. '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
558 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
559 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
560 # otherwise trim before position where area reached a maximum
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
561 # $maxPos as length of substr gives positions zero to just before the maxpos.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
562 #$s1 = substr($s1,0,$maxPos);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
563 #$q1 = substr($q1,0,$maxPos);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
564 $s1 = unpack("A$maxPos",$s1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
565 $q1 = unpack("A$maxPos",$q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
566
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
567 #trim second
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
568 $pos = length($q2);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
569 $maxPos = $pos;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
570 ## skip empty read.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
571 #if ($pos <= 0) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
572 # $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
573 # $failedstring .= $read . '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
574 # next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
575 #}
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
576 $area = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
577 $maxArea = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
578 #@qarray = unpack("C*",$q2);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
579 while ($pos>0 && $area>=0) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
580 $area += $tq - ($q2array[($pos)] - 33);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
581 if ($area < $maxArea) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
582 $pos--;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
583 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
584 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
585 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
586 $maxArea = $area;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
587 $maxPos = $pos;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
588 $pos--;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
589 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
590 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
591 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
592 if ($maxPos==0) { # scanned whole read and didn't integrate to zero? skip (no output for this read)
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
593 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
594 $failedstring .= $read ;#. '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
595 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
596 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
597 # Otherwise trim before position where area reached a maximum
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
598 $s2 = unpack("A$maxPos",$s2);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
599 $q2 = unpack("A$maxPos",$q2);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
600 #$s2 = substr($s2,0,$maxPos);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
601 #$q2 = substr($q2,0,$maxPos);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
602
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
603 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
604 #############
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
605 ## TRIM 5' ##
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
606 #############
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
607 if ($trim5 == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
608 #trim first
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
609 my $skip = 0; #length($q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
610 my $maxPos = $skip;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
611 my $area = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
612 my $maxArea = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
613 my @qarray = unpack("C*",$q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
614 while ($skip<length($q1) && $area>=0) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
615 $area += $tq - ($qarray[($skip)] - 33);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
616 if ($area < $maxArea) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
617 $skip++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
618 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
619 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
620 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
621 $maxArea = $area;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
622 $maxPos = $skip;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
623 $skip++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
624 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
625 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
626 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
627 if ($maxPos==length($q1)- 1) { # => maximal value at end of string => no point of no return found.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
628 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
629 $failedstring .= $read ;#. '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
630 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
631 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
632 # trim after position where area reached a maximum
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
633 #$s1 = substr($s1,($maxPos+1));
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
634 #$q1 = substr($q1,($maxPos+1));
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
635 $s1 = unpack("x$maxPos A*",$s1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
636 $q1 = unpack("x$maxPos A*",$q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
637
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
638
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
639 #trim second
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
640 $skip = 0 ;#length($q2);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
641 $maxPos = $skip;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
642 $area = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
643 $maxArea = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
644 @qarray = unpack("C*",$q2);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
645
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
646 while ($skip < length($q2) && $area>=0) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
647 $area += $tq - ($qarray[($skip)] - 33);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
648
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
649 if ($area < $maxArea) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
650 $skip++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
651 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
652 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
653 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
654 $maxArea = $area;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
655 $maxPos = $skip;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
656 $skip++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
657 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
658 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
659 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
660 if ($maxPos==(length($q2)-1)) { # scanned whole read and didn't integrate to zero? skip (no output for this read)
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
661 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
662 $failedstring .= $read ;#. '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
663 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
664 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
665 # trim after position where area reached a maximum
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
666 #$s2 = substr($s2,($maxPos+1));
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
667 #$q2 = substr($q2,($maxPos+1));
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
668 $s2 = unpack("x$maxPos A*",$s2);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
669 $q2 = unpack("x$maxPos A*",$q2);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
670 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
671
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
672 ## DISCARD IF TOO SHORT ##
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
673 if (length($s1) < $minlength || length($s2) < $minlength) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
674 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
675 $failedstring .= $read ;#. '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
676 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
677
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
678 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
679 # queue for printing
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
680 my @arr = ($h1a,$s1,$h1b,$q1,$h2a,$s2,$h2b,$q2);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
681 $printstring .= join('~',@arr) . '|'; # these two are not in quality string, and can thus be used as seperator
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
682 # | seperates pairs of reads; ~ seperates lines of single read entry.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
683 #print "$printstring\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
684 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
685
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
686 if ($printstring ne '') {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
687 $toprint->enqueue(pack("A*X",$printstring)); # removes trailing |
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
688 #$toprint->enqueue($printstring);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
689 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
690 if ($failedstring ne '') {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
691 #$failed->enqueue(pack("A*X",$failedstring));
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
692 $failed->enqueue($failedstring);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
693 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
694
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
695
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
696 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
697 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
698 sub simplepaired {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
699 # local variables speed up threading
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
700 my $tq = $trimq;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
701 while ( defined(my $rstring = $totrim->dequeue())){
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
702 my @reads = split(/\|/,$rstring);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
703 my $printstring = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
704 my $failedstring = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
705 foreach (@reads) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
706 my $read = $_;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
707 my ($h1a, $s1,$h1b,$q1,$h2a,$s2,$h2b,$q2) = split(/\n/,$read);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
708 ## skip too short reads
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
709 if (length($q1) < $minlength || length($q2) < $minlength) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
710 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
711 $failedstring .= $read; # . '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
712 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
713 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
714
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
715 # initial check for maximal quality value.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
716 ## quality arrays
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
717 my @q1array = unpack("C*",$q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
718 if (max(@q1array) < $trimq + 33) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
719 #print "Discarding on failed forward qualities\n$read\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
720 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
721 $failedstring .= $read;# . '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
722 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
723 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
724 my @q2array = unpack("C*",$q2);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
725 if (max(@q2array) < $trimq + 33) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
726 #print "Discarding on failed reverse qualities\n$read\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
727 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
728 $failedstring .= $read;# . '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
729 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
730 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
731
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
732 #############
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
733 ## trim 3' ##
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
734 #############
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
735 if ($trim3 == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
736 #trim first
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
737 my $pos = length($q1) - 1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
738 my $maxPos = $pos;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
739 #if ($pos <= 0) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
740 # $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
741 # $failedstring .= $read . '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
742 # next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
743 #}
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
744 #my @qarray = unpack("C*",$q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
745 while ( $pos>0 ) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
746 if ($tq > $q1array[$pos] - 33) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
747 # score at position is below threshold
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
748 $pos--;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
749 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
750 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
751 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
752 # score is at or above threshold
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
753 $maxPos = $pos;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
754 last;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
755 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
756 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
757 if ($pos==0) { # scanned whole read and didn't integrate to zero? skip (no output for this read)
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
758 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
759 $failedstring .= $read ;#. '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
760 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
761 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
762 # integrated to zero? trim before position where quality was below threshold
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
763 # unpack is one based
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
764 #$maxPos++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
765 #$s1 = substr($s1,0,$maxPos);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
766 $s1 = unpack("A$maxPos",$s1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
767 #$q1 = substr($q1,0,$maxPos);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
768 $q1 = unpack("A$maxPos",$q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
769
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
770 #trim second
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
771 $pos = length($q2) -1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
772 $maxPos = $pos;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
773 if ($pos <= 0) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
774 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
775 $failedstring .= $read ;#. '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
776 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
777 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
778 #my @qarray = unpack("C*",$q2);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
779
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
780 while ($pos>0) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
781 if ($tq > $q2array[$pos] - 33) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
782 # score at position is below threshold
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
783 $pos--;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
784 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
785 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
786 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
787 # score is at or above threshold
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
788 $maxPos = $pos;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
789 last;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
790 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
791 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
792 if ($pos==0) { # scanned whole read and didn't integrate to zero? skip (no output for this read)
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
793 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
794 $failedstring .= $read ;#. '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
795 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
796 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
797 # integrated to zero? trim before position where score below threshold
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
798 #$maxPos++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
799 $s2 = unpack("A$maxPos",$s2);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
800 $q2 = unpack("A$maxPos",$q2);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
801 #$s2 = substr($s2,0,$maxPos);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
802 #$q2 = substr($q2,0,$maxPos);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
803
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
804 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
805 #############
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
806 ## trim 5' ##
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
807 #############
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
808 if ($trim5 == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
809 # first
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
810 my $skip = 0; #length($q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
811 #my $maxPos = $skip;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
812 my @qarray = unpack("C*",$q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
813 while ( $skip < length($q1) ) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
814 if ($tq > $qarray[$skip] - 33) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
815 # score at position is below threshold
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
816 $skip++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
817 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
818 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
819 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
820 # score is at or above threshold
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
821 #$maxPos = $skip;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
822 last;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
823 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
824 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
825 if ($skip == length($q1)) { # should not fail, as that would have happened on 3' trim
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
826 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
827 $failedstring .= $read ;#. '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
828 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
829 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
830 # integrated to zero? trim before position where quality was below threshold
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
831 # unpack is one based
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
832 $s1 = unpack("x$skip A*",$s1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
833 $q1 = unpack("x$skip A*",$q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
834 #$s1 = substr($s1,$skip);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
835 #$q1 = substr($q1,$skip);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
836
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
837 # second
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
838 $skip = 0; #length($q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
839 my @qarray = unpack("C*",$q2);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
840 #$maxPos = $skip;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
841 while ( $skip < length($q2) ) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
842 if ($tq > $qarray[$skip] - 33) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
843 # score at position is below threshold
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
844 $skip++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
845 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
846 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
847 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
848 # score is at or above threshold
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
849 #$maxPos = $skip;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
850 last;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
851 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
852 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
853 if ($skip == length($q2)) { # should not fail, as that would have happened on 3' trim
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
854 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
855 $failedstring .= $read ;#. '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
856 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
857 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
858 # integrated to zero? trim before position where quality was below threshold
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
859 # unpack is one based
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
860 $s2 = unpack("x$skip A*",$s2);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
861 $q2 = unpack("x$skip A*",$q2);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
862 #$s2 = substr($s2,$skip);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
863 #$q2 = substr($q2,$skip);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
864
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
865 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
866 ## DISCARD IF TOO SHORT ##
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
867 if (length($s1) < $minlength || length($s2) < $minlength) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
868 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
869 $failedstring .= $read .#. '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
870 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
871
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
872 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
873
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
874 # queue for printing
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
875 my @arr = ($h1a,$s1,$h1b,$q1,$h2a,$s2,$h2b,$q2);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
876 $printstring .= join('~',@arr) . '|'; # these two are not in quality string, and can thus be used as seperator
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
877 # | seperates pairs of reads; ~ seperates lines of single read entry.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
878 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
879 if ($printstring ne '') {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
880 $toprint->enqueue(pack("A*X",$printstring)); # removes trailing |
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
881 #$toprint->enqueue($printstring);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
882 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
883 if ($failedstring ne '') {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
884 #$failed->enqueue(pack("A*X",$failedstring));
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
885 $failed->enqueue($failedstring);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
886 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
887 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
888 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
889
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
890 sub bwasingle {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
891 # local variables speed up threading
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
892 my $tq = $trimq;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
893 while ( defined(my $rstring = $totrim->dequeue())){
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
894 my @reads = split(/\|/,$rstring);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
895 my $printstring = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
896 my $failedstring = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
897 foreach (@reads) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
898 my $read = $_;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
899 my ($h1a,$s1,$h1b,$q1) = split(/\n/,$read);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
900 ## skip too short reads
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
901 if (length($q1) < $minlength ) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
902
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
903 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
904 $failedstring .= $read ;#. '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
905 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
906 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
907 # initial check for maximal quality value.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
908 ## quality arrays
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
909 my @q1array = unpack("C*",$q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
910 if (max(@q1array) < $tq + 33) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
911 print "Discarding on failed qualities\n$read\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
912 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
913 $failedstring .= $read;# . '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
914 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
915 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
916
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
917 #############
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
918 ## trim 3' ##
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
919 #############
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
920 if ($trim3 == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
921 my $pos = length($q1) - 1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
922 my $maxPos = $pos;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
923 ## skip empty read.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
924 #if ($pos <= 0) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
925 # $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
926 # $failedstring .= $read . '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
927 # next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
928 #}
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
929 my $area = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
930 my $maxArea = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
931 #my @qarray = unpack("C*",$q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
932 while ($pos>0 && $area>=0) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
933 $area += $tq - ($q1array[($pos)] - 33);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
934 ## not <= for more aggressive trimming : if multiple equal maxima => use shortest read.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
935 if ($area < $maxArea) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
936 $pos--;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
937 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
938 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
939 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
940 $maxArea = $area;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
941 $maxPos = $pos;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
942 $pos--;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
943 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
944 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
945 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
946 ## The online perl approach is wrong if the high-quality part is not long enough to reach 0.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
947 if ($maxPos == 0) { # maximal badness on position 0 => no point of no return reached.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
948 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
949 $failedstring .= $read ;#. '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
950 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
951 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
952 # otherwise trim before position where area reached a maximum
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
953 # $maxPos as length of substr gives positions zero to just before the maxpos.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
954 $s1 = unpack("A$maxPos",$s1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
955 $q1 = unpack("A$maxPos",$q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
956
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
957 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
958 #############
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
959 ## trim 5' ##
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
960 #############
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
961 if ($trim5 == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
962 my $skip = 0; #length($q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
963 my $maxPos = $skip;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
964 my $area = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
965 my $maxArea = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
966 my @qarray = unpack("C*",$q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
967 while ($skip<length($q1) && $area>=0) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
968 #$area += $tq - (ord(unpack("x$skip A1",$q1)) - 33);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
969 $area += $tq - ($qarray[($skip)] - 33);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
970 if ($area < $maxArea) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
971 $skip++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
972 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
973 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
974 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
975 $maxArea = $area;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
976 $maxPos = $skip;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
977 $skip++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
978 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
979 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
980 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
981 #if ($skip==length($q1)) { # scanned whole read and didn't integrate to zero? skip (no output for this read
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
982 if ($maxPos == (length($q1)-1)) { # => maximal value at end of string => no point of no return found.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
983 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
984 $failedstring .= $read ;#. '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
985 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
986 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
987 # trim after position where area reached a maximum
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
988 $s1 = unpack("x$maxPos A*",$s1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
989 $q1 = unpack("x$maxPos A*",$q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
990 #$s1 = substr($s1,($maxPos+1));
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
991 #$q1 = substr($q1,($maxPos+1));
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
992 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
993 ## DISCARD IF TOO SHORT ##
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
994 if (length($s1) < $minlength) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
995 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
996 $failedstring .= $read ;#. '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
997 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
998
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
999 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1000
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1001 # queue for printing
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1002 my @arr = ($h1a,$s1,$h1b,$q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1003 $printstring .= join("\n",@arr) . "\n"; # join components by newline character for printing
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1004 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1005 if ($printstring ne '') {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1006 #$toprint->enqueue(pack("A*X",$printstring)); # removes trailing |
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1007 $toprint->enqueue($printstring);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1008 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1009 if ($failedstring ne '') {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1010 #$failed->enqueue(pack("A*X",$failedstring));
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1011 $failed->enqueue($failedstring);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1012 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1013
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1014 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1015 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1016
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1017
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1018 sub simplesingle {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1019 # local variables speed up threading
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1020 my $tq = $trimq;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1021 while ( defined(my $rstring = $totrim->dequeue())){
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1022 my @reads = split(/\|/,$rstring);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1023 my $printstring = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1024 my $failedstring = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1025 foreach (@reads) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1026 my $read = $_;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1027 my ($h1a,$s1,$h1b,$q1) = split(/\n/,$read);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1028 ## skip too short reads
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1029 if (length($q1) < $minlength ) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1030 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1031 $failedstring .= $read ;#. '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1032 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1033 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1034 # initial check for maximal quality value.
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1035 ## quality arrays
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1036 my @q1array = unpack("C*",$q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1037 if (max(@q1array) < $trimq + 33) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1038 #print "Discarding on failed qualities\n$read\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1039 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1040 $failedstring .= $read ;#. '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1041 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1042 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1043
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1044 #############
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1045 ## trim 3' ##
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1046 #############
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1047 if ($trim3 == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1048 my $pos = length($q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1049 my $maxPos = $pos;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1050 #if ($pos <= 0) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1051 # $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1052 # $failedstring .= $read . '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1053 # next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1054 #}
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1055 #my @qarray = unpack("C*",$q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1056
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1057 while ( $pos>0 ) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1058 if ($tq > $q1array[$pos] - 33) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1059 # score at position is below threshold
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1060 $pos--;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1061 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1062 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1063 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1064 # score is at or above threshold
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1065 $maxPos = $pos;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1066 last;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1067 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1068 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1069 if ($pos==0) { # scanned whole read and didn't integrate to zero? skip (no output for this read)
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1070 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1071 $failedstring .= $read ;#. '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1072 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1073 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1074 # integrated to zero? trim before position where quality was below threshold
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1075 # unpack is one based
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1076 $s1 = unpack("A$maxPos",$s1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1077 $q1 = unpack("A$maxPos",$q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1078 #$s1 = substr($s1,0,$maxPos);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1079 #$q1 = substr($q1,0,$maxPos);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1080
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1081 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1082 #############
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1083 ## trim 5' ##
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1084 #############
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1085 if ($trim5 == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1086 my $skip = 0; #length($q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1087 #my $maxPos = $skip;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1088 my @qarray = unpack("C*",$q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1089
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1090 while ( $skip < length($q1) ) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1091 if ($tq > $qarray[$skip] - 33) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1092 # score at position is below threshold
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1093 $skip++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1094 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1095 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1096 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1097 # score is at or above threshold
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1098 #$maxPos = $skip;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1099 last;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1100 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1101 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1102 if ($skip == length($q1)) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1103 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1104 $failedstring .= $read ;#. '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1105 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1106 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1107 # integrated to zero? trim before position where quality was below threshold
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1108 # unpack is one based
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1109 #$s1 = substr($s1,$skip);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1110 #$q1 = substr($q1,$skip);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1111 $s1 = unpack("x$skip A*",$s1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1112 $q1 = unpack("x$skip A*",$q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1113
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1114
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1115 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1116 ## DISCARD IF TOO SHORT ##
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1117 if (length($s1) < $minlength) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1118 $discarded++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1119 $failedstring .= $read ;#. '|';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1120 next;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1121
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1122 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1123
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1124
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1125 # queue for printing
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1126 my @arr = ($h1a,$s1,$h1b,$q1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1127 $printstring .= join("\n",@arr) . "\n"; # join components by newline character for printing
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1128 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1129 if ($printstring ne '') {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1130 $toprint->enqueue($printstring); # single reads => no | present, just \n
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1131 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1132 if ($failedstring ne '') {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1133 #$failed->enqueue(pack("A*X",$failedstring));
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1134 $failed->enqueue($failedstring);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1135 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1136
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1137 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1138 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1139
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1140 sub printout {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1141 if ($rand eq '' || !defined($rand)) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1142 die('Random value not passed on!');
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1143 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1144 if ($paired == 0) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1145 my $counter = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1146 my $string1 = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1147 if (-e "/tmp/$rand.fq") {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1148 unlink("/tmp/$rand.fq");
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1149 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1150 while (defined(my $string = $toprint->dequeue())) { # each item represents 10000 input reads (might be less by trimming)
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1151 #my @array = split(/\|/,$printstring);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1152 #$done = $done + scalar(@array);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1153 $done = $done + 10000;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1154 $string1 .= $string;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1155 $counter++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1156 #print batches of 500,000 reads
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1157 if ($counter == 50) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1158 open OUT1, ">>/tmp/$rand.fq";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1159 print OUT1 $string1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1160 close OUT1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1161 $string1 = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1162 #$string2 = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1163 $counter = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1164 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1165 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1166 ## print last
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1167 open OUT1, ">>/tmp/$rand.fq";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1168 print OUT1 $string1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1169 close OUT1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1170 $finish = 1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1171
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1172 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1173 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1174 my $counter = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1175 my $string1 = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1176 my $string2 = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1177 if (-e "/tmp/$rand.fq") {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1178 unlink("/tmp/$rand.fq");
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1179 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1180 if (-e "/tmp/$rand.fq") {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1181 unlink("/tmp/$rand.rq");
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1182 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1183 while (defined(my $printstring = $toprint->dequeue())) { # each item represents 10000 input reads
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1184 my @array = split(/\|/,$printstring);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1185 $done = $done + scalar(@array);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1186 foreach (@array) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1187 my @subarr = split(/\~/,$_);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1188 $string1 .= join("\n",@subarr[0..3])."\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1189 $string2 .= join("\n",@subarr[4..7])."\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1190 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1191 $counter++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1192 if ($counter == 50) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1193 #print "Printing!\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1194 flock("/tmp/sg.write.lock",2);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1195 open OUT1, ">>/tmp/$rand.fq";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1196 print OUT1 $string1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1197 close OUT1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1198 open OUT2, ">>/tmp/$rand.rq";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1199 print OUT2 $string2;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1200 close OUT2;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1201 flock("/tmp/sg.write.lock",8);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1202 $string1 = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1203 $string2 = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1204 $counter = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1205 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1206 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1207 ## print last
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1208 flock("/tmp/sg.write.lock",2);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1209 open OUT1, ">>/tmp/$rand.fq";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1210 print OUT1 $string1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1211 close OUT1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1212 open OUT2, ">>/tmp/$rand.rq";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1213 print OUT2 $string2;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1214 close OUT2;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1215 flock("/tmp/sg.write.lock",8);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1216 $finish = 1;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1217 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1218
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1219 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1220
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1221 sub printfailed {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1222 if (-e "/tmp/$rand.failed") {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1223 unlink("/tmp/$rand.failed");
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1224 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1225 #open FAIL, ">/tmp/$rand.failed" or die("could not open file with failed reads, '/tmp/$rand.failed'");
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1226 #FAIL->autoflush(1);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1227 my $nr = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1228 my $string = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1229 while ( defined(my $rstring = $failed->dequeue())) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1230 #my @array = split(/\|/,$rstring);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1231 #foreach (@array) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1232 $string .= $rstring;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1233 #}
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1234 $nr++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1235 if ($nr == 50) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1236 open FAIL, ">>/tmp/$rand.failed";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1237 print FAIL $string;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1238 close FAIL;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1239 $nr = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1240 $string = '';
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1241 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1242 #close FAIL;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1243 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1244 open FAIL, ">>/tmp/$rand.failed";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1245 print FAIL $string;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1246 close FAIL;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1247 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1248 sub monitor {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1249 # checks the file read monitor
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1250 my $counter = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1251 while ($finish == 0) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1252 $counter++;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1253 if ($totrim->pending() > 200) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1254 lock($dummy);
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1255 #print "Putting the file reading to sleep for 10 seconds.\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1256 sleep 5;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1257 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1258 else {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1259 sleep 5;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1260 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1261 if ($counter == 6) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1262 if ($verbose == 1) {
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1263 my $string = ($done/1000000)."M Reads Passed, ".($discarded/1000000)."M reads discarded (".$totrim->pending().".10^4 pending)\n";
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1264 print $string ;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1265 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1266 $counter = 0;
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1267 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1268
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1269 }
fdbcc1aa4f01 Uploaded
geert-vandeweyer
parents:
diff changeset
1270 }