2
|
1 #!/usr/bin/perl
|
|
2 use strict;
|
|
3 use warnings;
|
|
4
|
|
5 my $read1_row = $ARGV[0];
|
|
6 my $read2_row = $ARGV[1];
|
|
7
|
|
8 my $read1_trimmed_part1 = $ARGV[2];
|
|
9 my $read1_trimmed_part2 = $ARGV[3];
|
|
10 my $read1_trimmed_part3 = $ARGV[4];
|
|
11 my $read1_trimmed_part4 = $ARGV[5];
|
|
12 my $read2_trimmed_part1 = $ARGV[6];
|
|
13 my $read2_trimmed_part2 = $ARGV[7];
|
|
14 my $read2_trimmed_part3 = $ARGV[8];
|
|
15 my $read2_trimmed_part4 = $ARGV[9];
|
|
16
|
|
17 my $sam_row_part1 = $ARGV[10];
|
|
18 my $sam_row_part2 = $ARGV[11];
|
|
19 my $sam_row_part3 = $ARGV[12];
|
|
20 my $sam_row_part4 = $ARGV[13];
|
|
21 my $sam_filtered_part1 = $ARGV[14];
|
|
22 my $sam_filtered_part2 = $ARGV[15];
|
|
23 my $sam_filtered_part3 = $ARGV[16];
|
|
24 my $sam_filtered_part4 = $ARGV[17];
|
|
25
|
|
26 my $mpileup_variant = $ARGV[18];
|
|
27
|
|
28 my $list_filtered = $ARGV[19];
|
|
29
|
|
30 my $blast_filtered_part1 = $ARGV[20];
|
|
31 my $blast_filtered_part2 = $ARGV[21];
|
|
32 my $blast_filtered_part3 = $ARGV[22];
|
|
33 my $blast_filtered_part4 = $ARGV[23];
|
|
34
|
|
35 my $snp_selected = $ARGV[24];
|
|
36
|
|
37
|
|
38 open(INR1R, $read1_row) or die ("Can't open $read1_row\n");
|
|
39 my $nbread=0;
|
|
40 my $nbbase =0;
|
|
41 while (my $line1=<INR1R>){
|
|
42 my $line2 = <INR1R>;
|
|
43 my $line3 = <INR1R>;
|
|
44 my $line4 = <INR1R>;
|
|
45 if ($line1 =~ /^@/){
|
|
46 $nbread++;
|
|
47 if ($line2=~/([ATGCNX]+)/i){
|
|
48 $nbbase += length($1);
|
|
49 }
|
|
50 }
|
|
51 }
|
|
52 print "Row Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
|
|
53 close (INR1R);
|
|
54
|
|
55
|
|
56
|
|
57
|
|
58 open(INR2R, $read2_row) or die ("Can't open $read2_row\n");
|
|
59 $nbread=0;
|
|
60 $nbbase =0;
|
|
61 while (my $line1=<INR2R>){
|
|
62 my $line2 = <INR2R>;
|
|
63 my $line3 = <INR2R>;
|
|
64 my $line4 = <INR2R>;
|
|
65 if ($line1 =~ /^@/){
|
|
66 $nbread++;
|
|
67 if ($line2=~/([ATGCNX]+)/i){
|
|
68 $nbbase += length($1);
|
|
69 }
|
|
70 }
|
|
71 }
|
|
72 print "Row Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
|
|
73 close (INR2R);
|
|
74
|
|
75
|
|
76
|
|
77
|
|
78
|
|
79 open(INR1TP1, $read1_trimmed_part1) or die ("Can't open $read1_trimmed_part1\n");
|
|
80 $nbread=0;
|
|
81 $nbbase =0;
|
|
82 while (my $line1=<INR1TP1>){
|
|
83 my $line2 = <INR1TP1>;
|
|
84 my $line3 = <INR1TP1>;
|
|
85 my $line4 = <INR1TP1>;
|
|
86 if ($line1 =~ /^@/){
|
|
87 $nbread++;
|
|
88 if ($line2=~/([ATGCNX]+)/i){
|
|
89 $nbbase += length($1);
|
|
90 }
|
|
91 else {
|
|
92 print STDERR "$line1\n$line2\n";
|
|
93 }
|
|
94 }
|
|
95 }
|
|
96 close (INR1TP1);
|
|
97 open(INR1TP2, $read1_trimmed_part2) or die ("Can't open $read1_trimmed_part2\n");
|
|
98 while (my $line1=<INR1TP2>){
|
|
99 my $line2 = <INR1TP2>;
|
|
100 my $line3 = <INR1TP2>;
|
|
101 my $line4 = <INR1TP2>;
|
|
102 if ($line1 =~ /^@/){
|
|
103 $nbread++;
|
|
104 if ($line2=~/([ATGCNX]+)/i){
|
|
105 $nbbase += length($1);
|
|
106 }
|
|
107 else {
|
|
108 print STDERR "$line1\n$line2\n";
|
|
109 }
|
|
110 }
|
|
111 }
|
|
112 close (INR1TP2);
|
|
113 open(INR1TP3, $read1_trimmed_part3) or die ("Can't open $read1_trimmed_part3\n");
|
|
114 while (my $line1=<INR1TP3>){
|
|
115 my $line2 = <INR1TP3>;
|
|
116 my $line3 = <INR1TP3>;
|
|
117 my $line4 = <INR1TP3>;
|
|
118 if ($line1 =~ /^@/){
|
|
119 $nbread++;
|
|
120 if ($line2=~/([ATGCNX]+)/i){
|
|
121 $nbbase += length($1);
|
|
122 }
|
|
123 else {
|
|
124 print STDERR "$line1\n$line2\n";
|
|
125 }
|
|
126 }
|
|
127 }
|
|
128 close (INR1TP3);
|
|
129 open(INR1TP4, $read1_trimmed_part4) or die ("Can't open $read1_trimmed_part4\n");
|
|
130 while (my $line1=<INR1TP4>){
|
|
131 my $line2 = <INR1TP4>;
|
|
132 my $line3 = <INR1TP4>;
|
|
133 my $line4 = <INR1TP4>;
|
|
134 if ($line1 =~ /^@/){
|
|
135 $nbread++;
|
|
136 if ($line2=~/([ATGCNX]+)/i){
|
|
137 $nbbase += length($1);
|
|
138 }
|
|
139 else {
|
|
140 print STDERR "$line1\n$line2\n";
|
|
141 }
|
|
142 }
|
|
143 }
|
|
144 close (INR1TP4);
|
|
145 print "Trimmed Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
|
|
146
|
|
147
|
|
148
|
|
149
|
|
150 open(INR2TP1, $read2_trimmed_part1) or die ("Can't open $read2_trimmed_part1\n");
|
|
151 $nbread=0;
|
|
152 $nbbase =0;
|
|
153 while (my $line1=<INR2TP1>){
|
|
154 my $line2 = <INR2TP1>;
|
|
155 my $line3 = <INR2TP1>;
|
|
156 my $line4 = <INR2TP1>;
|
|
157 if ($line1 =~ /^@/){
|
|
158 $nbread++;
|
|
159 if ($line2=~/([ATGCNX]+)/i){
|
|
160 $nbbase += length($1);
|
|
161 }
|
|
162 else {
|
|
163 print STDERR "$line1\n$line2\n";
|
|
164 }
|
|
165 }
|
|
166 }
|
|
167 close (INR2TP2);
|
|
168 open(INR2TP2, $read2_trimmed_part2) or die ("Can't open $read2_trimmed_part2\n");
|
|
169 while (my $line1=<INR2TP2>){
|
|
170 my $line2 = <INR2TP2>;
|
|
171 my $line3 = <INR2TP2>;
|
|
172 my $line4 = <INR2TP2>;
|
|
173 if ($line1 =~ /^@/){
|
|
174 $nbread++;
|
|
175 if ($line2=~/([ATGCNX]+)/i){
|
|
176 $nbbase += length($1);
|
|
177 }
|
|
178 else {
|
|
179 print STDERR "$line1\n$line2\n";
|
|
180 }
|
|
181 }
|
|
182 }
|
|
183 close (INR2TP2);
|
|
184 open(INR2TP3, $read2_trimmed_part3) or die ("Can't open $read2_trimmed_part3\n");
|
|
185 while (my $line1=<INR2TP3>){
|
|
186 my $line2 = <INR2TP3>;
|
|
187 my $line3 = <INR2TP3>;
|
|
188 my $line4 = <INR2TP3>;
|
|
189 if ($line1 =~ /^@/){
|
|
190 $nbread++;
|
|
191 if ($line2=~/([ATGCNX]+)/i){
|
|
192 $nbbase += length($1);
|
|
193 }
|
|
194 else {
|
|
195 print STDERR "$line1\n$line2\n";
|
|
196 }
|
|
197 }
|
|
198 }
|
|
199 close (INR2TP3);
|
|
200 open(INR2TP4, $read2_trimmed_part4) or die ("Can't open $read2_trimmed_part4\n");
|
|
201 while (my $line1=<INR2TP4>){
|
|
202 my $line2 = <INR2TP4>;
|
|
203 my $line3 = <INR2TP4>;
|
|
204 my $line4 = <INR2TP4>;
|
|
205 if ($line1 =~ /^@/){
|
|
206 $nbread++;
|
|
207 if ($line2=~/([ATGCNX]+)/i){
|
|
208 $nbbase += length($1);
|
|
209 }
|
|
210 else {
|
|
211 print STDERR "$line1\n$line2\n";
|
|
212 }
|
|
213 }
|
|
214 }
|
|
215 close (INR2TP4);
|
|
216 print "Trimmed Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
|
|
217
|
|
218
|
|
219
|
|
220
|
|
221 print "\nSAM row\n";
|
|
222 open(SAMP1, $sam_row_part1) or die ("Can't open $sam_row_part1\n");
|
|
223 my %bitscore;
|
|
224 while (my $line=<SAMP1>){
|
|
225 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
|
|
226 my @fields = split(/\s+/,$line);
|
|
227 my $bit = $fields[1];
|
|
228 if ($bitscore{$bit}){
|
|
229 $bitscore{$bit}++;
|
|
230 }
|
|
231 else {
|
|
232 $bitscore{$bit}=1;
|
|
233 }
|
|
234 }
|
|
235 }
|
|
236 close (SAMP1);
|
|
237 open(SAMP2, $sam_row_part2) or die ("Can't open $sam_row_part2\n");
|
|
238 while (my $line=<SAMP2>){
|
|
239 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
|
|
240 my @fields = split(/\s+/,$line);
|
|
241 my $bit = $fields[1];
|
|
242 if ($bitscore{$bit}){
|
|
243 $bitscore{$bit}++;
|
|
244 }
|
|
245 else {
|
|
246 $bitscore{$bit}=1;
|
|
247 }
|
|
248 }
|
|
249 }
|
|
250 close (SAMP2);
|
|
251 open(SAMP3, $sam_row_part3) or die ("Can't open $sam_row_part3\n");
|
|
252 while (my $line=<SAMP3>){
|
|
253 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
|
|
254 my @fields = split(/\s+/,$line);
|
|
255 my $bit = $fields[1];
|
|
256 if ($bitscore{$bit}){
|
|
257 $bitscore{$bit}++;
|
|
258 }
|
|
259 else {
|
|
260 $bitscore{$bit}=1;
|
|
261 }
|
|
262 }
|
|
263 }
|
|
264 close (SAMP3);
|
|
265 open(SAMP4, $sam_row_part4) or die ("Can't open $sam_row_part4\n");
|
|
266 while (my $line=<SAMP4>){
|
|
267 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
|
|
268 my @fields = split(/\s+/,$line);
|
|
269 my $bit = $fields[1];
|
|
270 if ($bitscore{$bit}){
|
|
271 $bitscore{$bit}++;
|
|
272 }
|
|
273 else {
|
|
274 $bitscore{$bit}=1;
|
|
275 }
|
|
276 }
|
|
277 }
|
|
278 close (SAMP4);
|
|
279 print "bitscore\t";
|
|
280 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
|
|
281 print $key,"\t*\t";
|
|
282 }
|
|
283 print "\n";
|
|
284 print " number \t";
|
|
285 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
|
|
286 print $bitscore{$key},"\t*\t";
|
|
287 }
|
|
288 print "\n";
|
|
289
|
|
290
|
|
291
|
|
292
|
|
293 print "\nSAM filtered\n";
|
|
294 open(SAMFP1, $sam_filtered_part1) or die ("Can't open $sam_filtered_part1\n");
|
|
295 undef %bitscore;
|
|
296 while (my $line=<SAMFP1>){
|
|
297 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
|
|
298 my @fields = split(/\s+/,$line);
|
|
299 my $bit = $fields[1];
|
|
300 if ($bitscore{$bit}){
|
|
301 $bitscore{$bit}++;
|
|
302 }
|
|
303 else {
|
|
304 $bitscore{$bit}=1;
|
|
305 }
|
|
306 }
|
|
307 }
|
|
308 close (SAMFP1);
|
|
309 open(SAMFP2, $sam_filtered_part2) or die ("Can't open $sam_filtered_part2\n");
|
|
310 while (my $line=<SAMFP2>){
|
|
311 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
|
|
312 my @fields = split(/\s+/,$line);
|
|
313 my $bit = $fields[1];
|
|
314 if ($bitscore{$bit}){
|
|
315 $bitscore{$bit}++;
|
|
316 }
|
|
317 else {
|
|
318 $bitscore{$bit}=1;
|
|
319 }
|
|
320 }
|
|
321 }
|
|
322 close (SAMFP2);
|
|
323 open(SAMFP3, $sam_filtered_part3) or die ("Can't open $sam_filtered_part3\n");
|
|
324 while (my $line=<SAMFP3>){
|
|
325 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
|
|
326 my @fields = split(/\s+/,$line);
|
|
327 my $bit = $fields[1];
|
|
328 if ($bitscore{$bit}){
|
|
329 $bitscore{$bit}++;
|
|
330 }
|
|
331 else {
|
|
332 $bitscore{$bit}=1;
|
|
333 }
|
|
334 }
|
|
335 }
|
|
336 close (SAMFP3);
|
|
337 open(SAMFP4, $sam_filtered_part4) or die ("Can't open $sam_filtered_part4\n");
|
|
338 while (my $line=<SAMFP4>){
|
|
339 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
|
|
340 my @fields = split(/\s+/,$line);
|
|
341 my $bit = $fields[1];
|
|
342 if ($bitscore{$bit}){
|
|
343 $bitscore{$bit}++;
|
|
344 }
|
|
345 else {
|
|
346 $bitscore{$bit}=1;
|
|
347 }
|
|
348 }
|
|
349 }
|
|
350 close (SAMFP4);
|
|
351 print "bitscore\t";
|
|
352 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
|
|
353 print $key,"\t*\t";
|
|
354 }
|
|
355 print "\n";
|
|
356 print " number \t";
|
|
357 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
|
|
358 print $bitscore{$key},"\t*\t";
|
|
359 }
|
|
360 print "\n";
|
|
361
|
|
362
|
|
363
|
|
364
|
|
365 print "\nMPILEUP variant\n";
|
|
366 open(MPV, $mpileup_variant) or die ("Can't open $mpileup_variant\n");
|
|
367 my $nbvariant=0;
|
|
368 while (my $line=<MPV>){
|
|
369 my @fields = split(/\s+/,$line);
|
|
370 if ($#fields >= 4){
|
|
371 my $match = $fields[4];
|
|
372 $match =~ s/\$//g; #the read start at this position
|
|
373 $match =~ s/\^.//g; #the read end at this position followed by quality char
|
|
374 if ($match =~/[ACGTNacgtn]+/){
|
|
375 $nbvariant++;
|
|
376 }
|
|
377 }
|
|
378 else {
|
|
379 #print STDERR "Erreur : $line\n";
|
|
380 }
|
|
381 }
|
|
382 print "Variant detected :\t$nbvariant\n";
|
|
383 close (MPV);
|
|
384
|
|
385
|
|
386
|
|
387
|
|
388
|
|
389
|
|
390 print "\nMPILEUP filtered without dubious position\n";
|
|
391 open(LF, $list_filtered) or die ("Can't open $list_filtered\n");
|
|
392 $nbvariant=0;
|
|
393 while (my $line=<LF>){
|
|
394 $nbvariant++;
|
|
395 }
|
|
396 print "Variant selected :\t$nbvariant\n";
|
|
397 close (LF);
|
|
398
|
|
399
|
|
400
|
|
401
|
|
402
|
|
403 print "\nMPILEUP filtered without dubious position and BLAST\n";
|
|
404 open(BFP1, $blast_filtered_part1) or die ("Can't open $blast_filtered_part1\n");
|
|
405 $nbvariant=0;
|
|
406 while (my $line=<BFP1>){
|
|
407 $nbvariant++;
|
|
408 }
|
|
409 close (BFP1);
|
|
410 open(BFP2, $blast_filtered_part2) or die ("Can't open $blast_filtered_part2\n");
|
|
411 while (my $line=<BFP2>){
|
|
412 $nbvariant++;
|
|
413 }
|
|
414 close (BFP2);
|
|
415 open(BFP3, $blast_filtered_part3) or die ("Can't open $blast_filtered_part3\n");
|
|
416 while (my $line=<BFP3>){
|
|
417 $nbvariant++;
|
|
418 }
|
|
419 close (BFP3);
|
|
420 open(BFP4, $blast_filtered_part4) or die ("Can't open $blast_filtered_part4\n");
|
|
421 while (my $line=<BFP4>){
|
|
422 $nbvariant++;
|
|
423 }
|
|
424 close (BFP4);
|
|
425 print "Variant selected :\t$nbvariant\n";
|
|
426
|
|
427
|
|
428
|
|
429
|
|
430
|
|
431 print "\nSNP selected after mpileup filtering : \t";
|
|
432 open(SNP, $snp_selected) or die ("Can't open $snp_selected\n");
|
|
433 $nbvariant=0;
|
|
434 while (my $line=<SNP>){
|
|
435 $nbvariant++;
|
|
436 }
|
|
437
|
|
438 print "$nbvariant\n";
|
|
439 close (SNP);
|
|
440
|
|
441
|
|
442
|
|
443
|
|
444
|
|
445
|
|
446
|
|
447
|