comparison rapsodyn/rapsosnp_stats.pl @ 1:7f36bd129321 draft

Uploaded
author mcharles
date Wed, 10 Sep 2014 10:24:01 -0400
parents
children
comparison
equal deleted inserted replaced
0:442a7c88b886 1:7f36bd129321
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
4
5 my $read1_row = $ARGV[0];
6 my $read2_row = $ARGV[1];
7
8 my $read1_trimmed = $ARGV[2];
9 my $read2_trimmed = $ARGV[3];
10
11 my $sam_row = $ARGV[4];
12 my $sam_filtered = $ARGV[5];
13
14 my $mpileup_variant = $ARGV[6];
15
16 my $list_filtered = $ARGV[7];
17
18 my $blast_filtered = $ARGV[8];
19
20 my $snp_selected = $ARGV[9];
21
22
23 open(INR1R, $read1_row) or die ("Can't open $read1_row\n");
24 my $nbread=0;
25 my $nbbase =0;
26 while (my $line1=<INR1R>){
27 my $line2 = <INR1R>;
28 my $line3 = <INR1R>;
29 my $line4 = <INR1R>;
30 if ($line1 =~ /^@/){
31 $nbread++;
32 if ($line2=~/([ATGCNX]+)/i){
33 $nbbase += length($1);
34 }
35 }
36 }
37 print "Row Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
38 close (INR1R);
39
40 open(INR2R, $read2_row) or die ("Can't open $read2_row\n");
41 $nbread=0;
42 $nbbase =0;
43 while (my $line1=<INR2R>){
44 my $line2 = <INR2R>;
45 my $line3 = <INR2R>;
46 my $line4 = <INR2R>;
47 if ($line1 =~ /^@/){
48 $nbread++;
49 if ($line2=~/([ATGCNX]+)/i){
50 $nbbase += length($1);
51 }
52 }
53 }
54 print "Row Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
55 close (INR2R);
56
57 open(INR1T, $read1_trimmed) or die ("Can't open $read1_trimmed\n");
58 $nbread=0;
59 $nbbase =0;
60 while (my $line1=<INR1T>){
61 my $line2 = <INR1T>;
62 my $line3 = <INR1T>;
63 my $line4 = <INR1T>;
64 if ($line1 =~ /^@/){
65 $nbread++;
66 if ($line2=~/([ATGCNX]+)/i){
67 $nbbase += length($1);
68 }
69 else {
70 print STDERR "$line1\n$line2\n";
71 }
72 }
73 }
74 print "Trimmed Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
75 close (INR1T);
76
77 open(INR2T, $read2_trimmed) or die ("Can't open $read2_trimmed\n");
78 $nbread=0;
79 $nbbase =0;
80 while (my $line1=<INR2T>){
81 my $line2 = <INR2T>;
82 my $line3 = <INR2T>;
83 my $line4 = <INR2T>;
84 if ($line1 =~ /^@/){
85 $nbread++;
86 if ($line2=~/([ATGCNX]+)/i){
87 $nbbase += length($1);
88 }
89 else {
90 print STDERR "$line1\n$line2\n";
91 }
92 }
93 }
94 print "Trimmed Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
95 close (INR2T);
96
97 print "\nSAM row\n";
98 open(SAM, $sam_row) or die ("Can't open $sam_row\n");
99 my %bitscore;
100 while (my $line=<SAM>){
101 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
102 my @fields = split(/\s+/,$line);
103 my $bit = $fields[1];
104 if ($bitscore{$bit}){
105 $bitscore{$bit}++;
106 }
107 else {
108 $bitscore{$bit}=1;
109 }
110 }
111 }
112
113 print "bitscore\t";
114 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
115 print $key,"\t*\t";
116 }
117 print "\n";
118
119 print " number \t";
120 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
121 print $bitscore{$key},"\t*\t";
122 }
123 print "\n";
124 close (SAM);
125
126 print "\nSAM filtered\n";
127 open(SAMF, $sam_filtered) or die ("Can't open $sam_filtered\n");
128 undef %bitscore;
129 while (my $line=<SAMF>){
130 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
131 my @fields = split(/\s+/,$line);
132 my $bit = $fields[1];
133 if ($bitscore{$bit}){
134 $bitscore{$bit}++;
135 }
136 else {
137 $bitscore{$bit}=1;
138 }
139 }
140 }
141
142 print "bitscore\t";
143 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
144 print $key,"\t*\t";
145 }
146 print "\n";
147
148 print " number \t";
149 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
150 print $bitscore{$key},"\t*\t";
151 }
152 print "\n";
153 close (SAMF);
154
155 print "\nMPILEUP variant\n";
156 open(MPV, $mpileup_variant) or die ("Can't open $mpileup_variant\n");
157
158 my $nbvariant=0;
159 while (my $line=<MPV>){
160 my @fields = split(/\s+/,$line);
161 if ($#fields >= 4){
162 my $match = $fields[4];
163 $match =~ s/\$//g; #the read start at this position
164 $match =~ s/\^.//g; #the read end at this position followed by quality char
165 if ($match =~/[ACGTNacgtn]+/){
166 $nbvariant++;
167 }
168 }
169 else {
170 #print STDERR "Erreur : $line\n";
171 }
172 }
173
174 print "Variant detected :\t$nbvariant\n";
175 close (MPV);
176
177
178 print "\nMPILEUP filtered without dubious position\n";
179 open(LF, $list_filtered) or die ("Can't open $list_filtered\n");
180 $nbvariant=0;
181 while (my $line=<LF>){
182 $nbvariant++;
183 }
184
185 print "Variant selected :\t$nbvariant\n";
186 close (LF);
187
188 print "\nMPILEUP filtered without dubious position and BLAST\n";
189 open(BF, $blast_filtered) or die ("Can't open $blast_filtered\n");
190 $nbvariant=0;
191 while (my $line=<BF>){
192 $nbvariant++;
193 }
194
195 print "Variant selected :\t$nbvariant\n";
196 close (BF);
197
198
199 print "\nSNP selected after mpileup filtering : \t";
200 open(SNP, $snp_selected) or die ("Can't open $snp_selected\n");
201 $nbvariant=0;
202 while (my $line=<SNP>){
203 $nbvariant++;
204 }
205
206 print "$nbvariant\n";
207 close (SNP);
208
209
210
211
212
213
214
215