Mercurial > repos > mcharles > rapsosnp
comparison rapsodyn/rapsosnp_stats4x.pl @ 2:761fecc07fa9 draft
Uploaded
author | mcharles |
---|---|
date | Thu, 11 Sep 2014 03:10:47 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1:7f36bd129321 | 2:761fecc07fa9 |
---|---|
1 #!/usr/bin/perl | |
2 use strict; | |
3 use warnings; | |
4 | |
5 my $read1_row = $ARGV[0]; | |
6 my $read2_row = $ARGV[1]; | |
7 | |
8 my $read1_trimmed_part1 = $ARGV[2]; | |
9 my $read1_trimmed_part2 = $ARGV[3]; | |
10 my $read1_trimmed_part3 = $ARGV[4]; | |
11 my $read1_trimmed_part4 = $ARGV[5]; | |
12 my $read2_trimmed_part1 = $ARGV[6]; | |
13 my $read2_trimmed_part2 = $ARGV[7]; | |
14 my $read2_trimmed_part3 = $ARGV[8]; | |
15 my $read2_trimmed_part4 = $ARGV[9]; | |
16 | |
17 my $sam_row_part1 = $ARGV[10]; | |
18 my $sam_row_part2 = $ARGV[11]; | |
19 my $sam_row_part3 = $ARGV[12]; | |
20 my $sam_row_part4 = $ARGV[13]; | |
21 my $sam_filtered_part1 = $ARGV[14]; | |
22 my $sam_filtered_part2 = $ARGV[15]; | |
23 my $sam_filtered_part3 = $ARGV[16]; | |
24 my $sam_filtered_part4 = $ARGV[17]; | |
25 | |
26 my $mpileup_variant = $ARGV[18]; | |
27 | |
28 my $list_filtered = $ARGV[19]; | |
29 | |
30 my $blast_filtered_part1 = $ARGV[20]; | |
31 my $blast_filtered_part2 = $ARGV[21]; | |
32 my $blast_filtered_part3 = $ARGV[22]; | |
33 my $blast_filtered_part4 = $ARGV[23]; | |
34 | |
35 my $snp_selected = $ARGV[24]; | |
36 | |
37 | |
38 open(INR1R, $read1_row) or die ("Can't open $read1_row\n"); | |
39 my $nbread=0; | |
40 my $nbbase =0; | |
41 while (my $line1=<INR1R>){ | |
42 my $line2 = <INR1R>; | |
43 my $line3 = <INR1R>; | |
44 my $line4 = <INR1R>; | |
45 if ($line1 =~ /^@/){ | |
46 $nbread++; | |
47 if ($line2=~/([ATGCNX]+)/i){ | |
48 $nbbase += length($1); | |
49 } | |
50 } | |
51 } | |
52 print "Row Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n"; | |
53 close (INR1R); | |
54 | |
55 | |
56 | |
57 | |
58 open(INR2R, $read2_row) or die ("Can't open $read2_row\n"); | |
59 $nbread=0; | |
60 $nbbase =0; | |
61 while (my $line1=<INR2R>){ | |
62 my $line2 = <INR2R>; | |
63 my $line3 = <INR2R>; | |
64 my $line4 = <INR2R>; | |
65 if ($line1 =~ /^@/){ | |
66 $nbread++; | |
67 if ($line2=~/([ATGCNX]+)/i){ | |
68 $nbbase += length($1); | |
69 } | |
70 } | |
71 } | |
72 print "Row Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n"; | |
73 close (INR2R); | |
74 | |
75 | |
76 | |
77 | |
78 | |
79 open(INR1TP1, $read1_trimmed_part1) or die ("Can't open $read1_trimmed_part1\n"); | |
80 $nbread=0; | |
81 $nbbase =0; | |
82 while (my $line1=<INR1TP1>){ | |
83 my $line2 = <INR1TP1>; | |
84 my $line3 = <INR1TP1>; | |
85 my $line4 = <INR1TP1>; | |
86 if ($line1 =~ /^@/){ | |
87 $nbread++; | |
88 if ($line2=~/([ATGCNX]+)/i){ | |
89 $nbbase += length($1); | |
90 } | |
91 else { | |
92 print STDERR "$line1\n$line2\n"; | |
93 } | |
94 } | |
95 } | |
96 close (INR1TP1); | |
97 open(INR1TP2, $read1_trimmed_part2) or die ("Can't open $read1_trimmed_part2\n"); | |
98 while (my $line1=<INR1TP2>){ | |
99 my $line2 = <INR1TP2>; | |
100 my $line3 = <INR1TP2>; | |
101 my $line4 = <INR1TP2>; | |
102 if ($line1 =~ /^@/){ | |
103 $nbread++; | |
104 if ($line2=~/([ATGCNX]+)/i){ | |
105 $nbbase += length($1); | |
106 } | |
107 else { | |
108 print STDERR "$line1\n$line2\n"; | |
109 } | |
110 } | |
111 } | |
112 close (INR1TP2); | |
113 open(INR1TP3, $read1_trimmed_part3) or die ("Can't open $read1_trimmed_part3\n"); | |
114 while (my $line1=<INR1TP3>){ | |
115 my $line2 = <INR1TP3>; | |
116 my $line3 = <INR1TP3>; | |
117 my $line4 = <INR1TP3>; | |
118 if ($line1 =~ /^@/){ | |
119 $nbread++; | |
120 if ($line2=~/([ATGCNX]+)/i){ | |
121 $nbbase += length($1); | |
122 } | |
123 else { | |
124 print STDERR "$line1\n$line2\n"; | |
125 } | |
126 } | |
127 } | |
128 close (INR1TP3); | |
129 open(INR1TP4, $read1_trimmed_part4) or die ("Can't open $read1_trimmed_part4\n"); | |
130 while (my $line1=<INR1TP4>){ | |
131 my $line2 = <INR1TP4>; | |
132 my $line3 = <INR1TP4>; | |
133 my $line4 = <INR1TP4>; | |
134 if ($line1 =~ /^@/){ | |
135 $nbread++; | |
136 if ($line2=~/([ATGCNX]+)/i){ | |
137 $nbbase += length($1); | |
138 } | |
139 else { | |
140 print STDERR "$line1\n$line2\n"; | |
141 } | |
142 } | |
143 } | |
144 close (INR1TP4); | |
145 print "Trimmed Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n"; | |
146 | |
147 | |
148 | |
149 | |
150 open(INR2TP1, $read2_trimmed_part1) or die ("Can't open $read2_trimmed_part1\n"); | |
151 $nbread=0; | |
152 $nbbase =0; | |
153 while (my $line1=<INR2TP1>){ | |
154 my $line2 = <INR2TP1>; | |
155 my $line3 = <INR2TP1>; | |
156 my $line4 = <INR2TP1>; | |
157 if ($line1 =~ /^@/){ | |
158 $nbread++; | |
159 if ($line2=~/([ATGCNX]+)/i){ | |
160 $nbbase += length($1); | |
161 } | |
162 else { | |
163 print STDERR "$line1\n$line2\n"; | |
164 } | |
165 } | |
166 } | |
167 close (INR2TP2); | |
168 open(INR2TP2, $read2_trimmed_part2) or die ("Can't open $read2_trimmed_part2\n"); | |
169 while (my $line1=<INR2TP2>){ | |
170 my $line2 = <INR2TP2>; | |
171 my $line3 = <INR2TP2>; | |
172 my $line4 = <INR2TP2>; | |
173 if ($line1 =~ /^@/){ | |
174 $nbread++; | |
175 if ($line2=~/([ATGCNX]+)/i){ | |
176 $nbbase += length($1); | |
177 } | |
178 else { | |
179 print STDERR "$line1\n$line2\n"; | |
180 } | |
181 } | |
182 } | |
183 close (INR2TP2); | |
184 open(INR2TP3, $read2_trimmed_part3) or die ("Can't open $read2_trimmed_part3\n"); | |
185 while (my $line1=<INR2TP3>){ | |
186 my $line2 = <INR2TP3>; | |
187 my $line3 = <INR2TP3>; | |
188 my $line4 = <INR2TP3>; | |
189 if ($line1 =~ /^@/){ | |
190 $nbread++; | |
191 if ($line2=~/([ATGCNX]+)/i){ | |
192 $nbbase += length($1); | |
193 } | |
194 else { | |
195 print STDERR "$line1\n$line2\n"; | |
196 } | |
197 } | |
198 } | |
199 close (INR2TP3); | |
200 open(INR2TP4, $read2_trimmed_part4) or die ("Can't open $read2_trimmed_part4\n"); | |
201 while (my $line1=<INR2TP4>){ | |
202 my $line2 = <INR2TP4>; | |
203 my $line3 = <INR2TP4>; | |
204 my $line4 = <INR2TP4>; | |
205 if ($line1 =~ /^@/){ | |
206 $nbread++; | |
207 if ($line2=~/([ATGCNX]+)/i){ | |
208 $nbbase += length($1); | |
209 } | |
210 else { | |
211 print STDERR "$line1\n$line2\n"; | |
212 } | |
213 } | |
214 } | |
215 close (INR2TP4); | |
216 print "Trimmed Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n"; | |
217 | |
218 | |
219 | |
220 | |
221 print "\nSAM row\n"; | |
222 open(SAMP1, $sam_row_part1) or die ("Can't open $sam_row_part1\n"); | |
223 my %bitscore; | |
224 while (my $line=<SAMP1>){ | |
225 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ | |
226 my @fields = split(/\s+/,$line); | |
227 my $bit = $fields[1]; | |
228 if ($bitscore{$bit}){ | |
229 $bitscore{$bit}++; | |
230 } | |
231 else { | |
232 $bitscore{$bit}=1; | |
233 } | |
234 } | |
235 } | |
236 close (SAMP1); | |
237 open(SAMP2, $sam_row_part2) or die ("Can't open $sam_row_part2\n"); | |
238 while (my $line=<SAMP2>){ | |
239 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ | |
240 my @fields = split(/\s+/,$line); | |
241 my $bit = $fields[1]; | |
242 if ($bitscore{$bit}){ | |
243 $bitscore{$bit}++; | |
244 } | |
245 else { | |
246 $bitscore{$bit}=1; | |
247 } | |
248 } | |
249 } | |
250 close (SAMP2); | |
251 open(SAMP3, $sam_row_part3) or die ("Can't open $sam_row_part3\n"); | |
252 while (my $line=<SAMP3>){ | |
253 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ | |
254 my @fields = split(/\s+/,$line); | |
255 my $bit = $fields[1]; | |
256 if ($bitscore{$bit}){ | |
257 $bitscore{$bit}++; | |
258 } | |
259 else { | |
260 $bitscore{$bit}=1; | |
261 } | |
262 } | |
263 } | |
264 close (SAMP3); | |
265 open(SAMP4, $sam_row_part4) or die ("Can't open $sam_row_part4\n"); | |
266 while (my $line=<SAMP4>){ | |
267 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ | |
268 my @fields = split(/\s+/,$line); | |
269 my $bit = $fields[1]; | |
270 if ($bitscore{$bit}){ | |
271 $bitscore{$bit}++; | |
272 } | |
273 else { | |
274 $bitscore{$bit}=1; | |
275 } | |
276 } | |
277 } | |
278 close (SAMP4); | |
279 print "bitscore\t"; | |
280 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) { | |
281 print $key,"\t*\t"; | |
282 } | |
283 print "\n"; | |
284 print " number \t"; | |
285 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) { | |
286 print $bitscore{$key},"\t*\t"; | |
287 } | |
288 print "\n"; | |
289 | |
290 | |
291 | |
292 | |
293 print "\nSAM filtered\n"; | |
294 open(SAMFP1, $sam_filtered_part1) or die ("Can't open $sam_filtered_part1\n"); | |
295 undef %bitscore; | |
296 while (my $line=<SAMFP1>){ | |
297 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ | |
298 my @fields = split(/\s+/,$line); | |
299 my $bit = $fields[1]; | |
300 if ($bitscore{$bit}){ | |
301 $bitscore{$bit}++; | |
302 } | |
303 else { | |
304 $bitscore{$bit}=1; | |
305 } | |
306 } | |
307 } | |
308 close (SAMFP1); | |
309 open(SAMFP2, $sam_filtered_part2) or die ("Can't open $sam_filtered_part2\n"); | |
310 while (my $line=<SAMFP2>){ | |
311 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ | |
312 my @fields = split(/\s+/,$line); | |
313 my $bit = $fields[1]; | |
314 if ($bitscore{$bit}){ | |
315 $bitscore{$bit}++; | |
316 } | |
317 else { | |
318 $bitscore{$bit}=1; | |
319 } | |
320 } | |
321 } | |
322 close (SAMFP2); | |
323 open(SAMFP3, $sam_filtered_part3) or die ("Can't open $sam_filtered_part3\n"); | |
324 while (my $line=<SAMFP3>){ | |
325 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ | |
326 my @fields = split(/\s+/,$line); | |
327 my $bit = $fields[1]; | |
328 if ($bitscore{$bit}){ | |
329 $bitscore{$bit}++; | |
330 } | |
331 else { | |
332 $bitscore{$bit}=1; | |
333 } | |
334 } | |
335 } | |
336 close (SAMFP3); | |
337 open(SAMFP4, $sam_filtered_part4) or die ("Can't open $sam_filtered_part4\n"); | |
338 while (my $line=<SAMFP4>){ | |
339 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ | |
340 my @fields = split(/\s+/,$line); | |
341 my $bit = $fields[1]; | |
342 if ($bitscore{$bit}){ | |
343 $bitscore{$bit}++; | |
344 } | |
345 else { | |
346 $bitscore{$bit}=1; | |
347 } | |
348 } | |
349 } | |
350 close (SAMFP4); | |
351 print "bitscore\t"; | |
352 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) { | |
353 print $key,"\t*\t"; | |
354 } | |
355 print "\n"; | |
356 print " number \t"; | |
357 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) { | |
358 print $bitscore{$key},"\t*\t"; | |
359 } | |
360 print "\n"; | |
361 | |
362 | |
363 | |
364 | |
365 print "\nMPILEUP variant\n"; | |
366 open(MPV, $mpileup_variant) or die ("Can't open $mpileup_variant\n"); | |
367 my $nbvariant=0; | |
368 while (my $line=<MPV>){ | |
369 my @fields = split(/\s+/,$line); | |
370 if ($#fields >= 4){ | |
371 my $match = $fields[4]; | |
372 $match =~ s/\$//g; #the read start at this position | |
373 $match =~ s/\^.//g; #the read end at this position followed by quality char | |
374 if ($match =~/[ACGTNacgtn]+/){ | |
375 $nbvariant++; | |
376 } | |
377 } | |
378 else { | |
379 #print STDERR "Erreur : $line\n"; | |
380 } | |
381 } | |
382 print "Variant detected :\t$nbvariant\n"; | |
383 close (MPV); | |
384 | |
385 | |
386 | |
387 | |
388 | |
389 | |
390 print "\nMPILEUP filtered without dubious position\n"; | |
391 open(LF, $list_filtered) or die ("Can't open $list_filtered\n"); | |
392 $nbvariant=0; | |
393 while (my $line=<LF>){ | |
394 $nbvariant++; | |
395 } | |
396 print "Variant selected :\t$nbvariant\n"; | |
397 close (LF); | |
398 | |
399 | |
400 | |
401 | |
402 | |
403 print "\nMPILEUP filtered without dubious position and BLAST\n"; | |
404 open(BFP1, $blast_filtered_part1) or die ("Can't open $blast_filtered_part1\n"); | |
405 $nbvariant=0; | |
406 while (my $line=<BFP1>){ | |
407 $nbvariant++; | |
408 } | |
409 close (BFP1); | |
410 open(BFP2, $blast_filtered_part2) or die ("Can't open $blast_filtered_part2\n"); | |
411 while (my $line=<BFP2>){ | |
412 $nbvariant++; | |
413 } | |
414 close (BFP2); | |
415 open(BFP3, $blast_filtered_part3) or die ("Can't open $blast_filtered_part3\n"); | |
416 while (my $line=<BFP3>){ | |
417 $nbvariant++; | |
418 } | |
419 close (BFP3); | |
420 open(BFP4, $blast_filtered_part4) or die ("Can't open $blast_filtered_part4\n"); | |
421 while (my $line=<BFP4>){ | |
422 $nbvariant++; | |
423 } | |
424 close (BFP4); | |
425 print "Variant selected :\t$nbvariant\n"; | |
426 | |
427 | |
428 | |
429 | |
430 | |
431 print "\nSNP selected after mpileup filtering : \t"; | |
432 open(SNP, $snp_selected) or die ("Can't open $snp_selected\n"); | |
433 $nbvariant=0; | |
434 while (my $line=<SNP>){ | |
435 $nbvariant++; | |
436 } | |
437 | |
438 print "$nbvariant\n"; | |
439 close (SNP); | |
440 | |
441 | |
442 | |
443 | |
444 | |
445 | |
446 | |
447 |