comparison rapsodyn/rapsosnp_stats2x.pl @ 2:761fecc07fa9 draft

Uploaded
author mcharles
date Thu, 11 Sep 2014 03:10:47 -0400
parents
children
comparison
equal deleted inserted replaced
1:7f36bd129321 2:761fecc07fa9
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
4
5 my $read1_row = $ARGV[0];
6 my $read2_row = $ARGV[1];
7
8 my $read1_trimmed_part1 = $ARGV[2];
9 my $read1_trimmed_part2 = $ARGV[3];
10 my $read2_trimmed_part1 = $ARGV[4];
11 my $read2_trimmed_part2 = $ARGV[5];
12
13 my $sam_row_part1 = $ARGV[6];
14 my $sam_row_part2 = $ARGV[7];
15 my $sam_filtered_part1 = $ARGV[8];
16 my $sam_filtered_part2 = $ARGV[9];
17
18 my $mpileup_variant = $ARGV[10];
19
20 my $list_filtered = $ARGV[11];
21
22 my $blast_filtered_part1 = $ARGV[12];
23 my $blast_filtered_part2 = $ARGV[13];
24
25 my $snp_selected = $ARGV[14];
26
27
28 open(INR1R, $read1_row) or die ("Can't open $read1_row\n");
29 my $nbread=0;
30 my $nbbase =0;
31 while (my $line1=<INR1R>){
32 my $line2 = <INR1R>;
33 my $line3 = <INR1R>;
34 my $line4 = <INR1R>;
35 if ($line1 =~ /^@/){
36 $nbread++;
37 if ($line2=~/([ATGCNX]+)/i){
38 $nbbase += length($1);
39 }
40 }
41 }
42 print "Row Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
43 close (INR1R);
44
45
46
47
48 open(INR2R, $read2_row) or die ("Can't open $read2_row\n");
49 $nbread=0;
50 $nbbase =0;
51 while (my $line1=<INR2R>){
52 my $line2 = <INR2R>;
53 my $line3 = <INR2R>;
54 my $line4 = <INR2R>;
55 if ($line1 =~ /^@/){
56 $nbread++;
57 if ($line2=~/([ATGCNX]+)/i){
58 $nbbase += length($1);
59 }
60 }
61 }
62 print "Row Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
63 close (INR2R);
64
65
66
67
68
69 open(INR1TP1, $read1_trimmed_part1) or die ("Can't open $read1_trimmed_part1\n");
70 $nbread=0;
71 $nbbase =0;
72 while (my $line1=<INR1TP1>){
73 my $line2 = <INR1TP1>;
74 my $line3 = <INR1TP1>;
75 my $line4 = <INR1TP1>;
76 if ($line1 =~ /^@/){
77 $nbread++;
78 if ($line2=~/([ATGCNX]+)/i){
79 $nbbase += length($1);
80 }
81 else {
82 print STDERR "$line1\n$line2\n";
83 }
84 }
85 }
86 close (INR1TP1);
87 open(INR1TP2, $read1_trimmed_part2) or die ("Can't open $read1_trimmed_part2\n");
88 while (my $line1=<INR1TP2>){
89 my $line2 = <INR1TP2>;
90 my $line3 = <INR1TP2>;
91 my $line4 = <INR1TP2>;
92 if ($line1 =~ /^@/){
93 $nbread++;
94 if ($line2=~/([ATGCNX]+)/i){
95 $nbbase += length($1);
96 }
97 else {
98 print STDERR "$line1\n$line2\n";
99 }
100 }
101 }
102 close (INR1TP2);
103 print "Trimmed Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
104
105
106
107
108 open(INR2TP1, $read2_trimmed_part1) or die ("Can't open $read2_trimmed_part1\n");
109 $nbread=0;
110 $nbbase =0;
111 while (my $line1=<INR2TP1>){
112 my $line2 = <INR2TP1>;
113 my $line3 = <INR2TP1>;
114 my $line4 = <INR2TP1>;
115 if ($line1 =~ /^@/){
116 $nbread++;
117 if ($line2=~/([ATGCNX]+)/i){
118 $nbbase += length($1);
119 }
120 else {
121 print STDERR "$line1\n$line2\n";
122 }
123 }
124 }
125 close (INR2TP2);
126 open(INR2TP2, $read2_trimmed_part2) or die ("Can't open $read2_trimmed_part2\n");
127 while (my $line1=<INR2TP2>){
128 my $line2 = <INR2TP2>;
129 my $line3 = <INR2TP2>;
130 my $line4 = <INR2TP2>;
131 if ($line1 =~ /^@/){
132 $nbread++;
133 if ($line2=~/([ATGCNX]+)/i){
134 $nbbase += length($1);
135 }
136 else {
137 print STDERR "$line1\n$line2\n";
138 }
139 }
140 }
141 close (INR2TP2);
142 print "Trimmed Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
143
144
145
146
147 print "\nSAM row\n";
148 open(SAMP1, $sam_row_part1) or die ("Can't open $sam_row_part1\n");
149 my %bitscore;
150 while (my $line=<SAMP1>){
151 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
152 my @fields = split(/\s+/,$line);
153 my $bit = $fields[1];
154 if ($bitscore{$bit}){
155 $bitscore{$bit}++;
156 }
157 else {
158 $bitscore{$bit}=1;
159 }
160 }
161 }
162 close (SAMP1);
163 open(SAMP2, $sam_row_part2) or die ("Can't open $sam_row_part2\n");
164 while (my $line=<SAMP2>){
165 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
166 my @fields = split(/\s+/,$line);
167 my $bit = $fields[1];
168 if ($bitscore{$bit}){
169 $bitscore{$bit}++;
170 }
171 else {
172 $bitscore{$bit}=1;
173 }
174 }
175 }
176 close (SAMP2);
177 print "bitscore\t";
178 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
179 print $key,"\t*\t";
180 }
181 print "\n";
182 print " number \t";
183 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
184 print $bitscore{$key},"\t*\t";
185 }
186 print "\n";
187
188
189
190
191 print "\nSAM filtered\n";
192 open(SAMFP1, $sam_filtered_part1) or die ("Can't open $sam_filtered_part1\n");
193 undef %bitscore;
194 while (my $line=<SAMFP1>){
195 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
196 my @fields = split(/\s+/,$line);
197 my $bit = $fields[1];
198 if ($bitscore{$bit}){
199 $bitscore{$bit}++;
200 }
201 else {
202 $bitscore{$bit}=1;
203 }
204 }
205 }
206 close (SAMFP1);
207 open(SAMFP2, $sam_filtered_part2) or die ("Can't open $sam_filtered_part2\n");
208 while (my $line=<SAMFP2>){
209 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
210 my @fields = split(/\s+/,$line);
211 my $bit = $fields[1];
212 if ($bitscore{$bit}){
213 $bitscore{$bit}++;
214 }
215 else {
216 $bitscore{$bit}=1;
217 }
218 }
219 }
220 close (SAMFP2);
221 print "bitscore\t";
222 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
223 print $key,"\t*\t";
224 }
225 print "\n";
226 print " number \t";
227 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
228 print $bitscore{$key},"\t*\t";
229 }
230 print "\n";
231
232
233
234
235 print "\nMPILEUP variant\n";
236 open(MPV, $mpileup_variant) or die ("Can't open $mpileup_variant\n");
237 my $nbvariant=0;
238 while (my $line=<MPV>){
239 my @fields = split(/\s+/,$line);
240 if ($#fields >= 4){
241 my $match = $fields[4];
242 $match =~ s/\$//g; #the read start at this position
243 $match =~ s/\^.//g; #the read end at this position followed by quality char
244 if ($match =~/[ACGTNacgtn]+/){
245 $nbvariant++;
246 }
247 }
248 else {
249 #print STDERR "Erreur : $line\n";
250 }
251 }
252 print "Variant detected :\t$nbvariant\n";
253 close (MPV);
254
255
256
257
258
259
260 print "\nMPILEUP filtered without dubious position\n";
261 open(LF, $list_filtered) or die ("Can't open $list_filtered\n");
262 $nbvariant=0;
263 while (my $line=<LF>){
264 $nbvariant++;
265 }
266 print "Variant selected :\t$nbvariant\n";
267 close (LF);
268
269
270
271
272
273 print "\nMPILEUP filtered without dubious position and BLAST\n";
274 open(BFP1, $blast_filtered_part1) or die ("Can't open $blast_filtered_part1\n");
275 $nbvariant=0;
276 while (my $line=<BFP1>){
277 $nbvariant++;
278 }
279 close (BFP1);
280 open(BFP2, $blast_filtered_part2) or die ("Can't open $blast_filtered_part2\n");
281 while (my $line=<BFP2>){
282 $nbvariant++;
283 }
284 close (BFP2);
285 print "Variant selected :\t$nbvariant\n";
286
287
288
289
290
291 print "\nSNP selected after mpileup filtering : \t";
292 open(SNP, $snp_selected) or die ("Can't open $snp_selected\n");
293 $nbvariant=0;
294 while (my $line=<SNP>){
295 $nbvariant++;
296 }
297
298 print "$nbvariant\n";
299 close (SNP);
300
301
302
303
304
305
306
307