annotate nucleScore.pl @ 0:82dce1eb9074 draft default tip

Uploaded
author dcouvin
date Fri, 03 Sep 2021 22:36:56 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
1 #!/usr/bin/perl
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
2
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
3 use strict;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
4 use warnings;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
5 use Bio::SeqIO;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
6 #use Shannon::Entropy qw/entropy/;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
7 use File::Basename;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
8 #use Bio::Species;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
9
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
10 #use FindBin;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
11 #use lib "$FindBin::RealBin/../perl5";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
12
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
13 #my $input = $ARGV[0];
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
14 #chercher comment faire une liste perl pour input
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
15 #my @liste = split(/,/, $input);
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
16 #my $recap_total_seq = $ARGV[1];
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
17
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
18 #my ($input, $recap_total_seq) = @ARGV;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
19
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
20 #my $start = time();
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
21
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
22 #my $file = ""; #= $ARGV[0];
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
23 #my $recap_total_seq = "nucleScore_result.xls";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
24
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
25 #open (RECAP,'>', $recap_total_seq) or die "could not open $!";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
26 print "File\tA percent\tT percent\tC percent\tG percent\tGC percent\tAT/GC ratio\tNucleScore\tATG\tTGA\tTAG\tTAA\tGenome size (bp)\n";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
27 #close(RECAP);
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
28
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
29
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
30 #FASTA files
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
31 #if(@ARGV){
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
32
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
33 #for (my $i = 0; $i <= $#ARGV; $i++) {
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
34 #if ($ARGV[$i]=~/-output/i or $ARGV[$i]=~/-o/i) {
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
35 # $recap_total_seq = $ARGV[$i+1];
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
36 #}
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
37 #}
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
38
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
39
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
40 #open (RECAP,'>>', $recap_total_seq) or die "could not open $!";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
41
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
42 #refaire le for pour la liste input
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
43 for my $arg (@ARGV){
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
44 #for my $arg (@liste){
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
45 # if ($arg =~ m/.fasta/ or $arg =~ m/.fna/ or $arg =~ m/.fa/){
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
46
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
47 #print "Traitement du fichier de sequence: $arg\n";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
48 #print "Traitement du fichier de sequence: $arg\n";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
49 #my $file = $arg;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
50 my $file = $arg;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
51
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
52
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
53 my $seqIO = Bio::SeqIO->new(-format=>'Fasta', -file=>$file);
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
54 my $globalSeq = "";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
55 while (my $seq = $seqIO->next_seq()) {
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
56 my $seqID = $seq->id;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
57 my $seqNuc = $seq->seq;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
58 $globalSeq .= $seqNuc;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
59 #push @arrayID, $seqID;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
60 #$hSeq{$seqID} = $seqNuc;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
61 #my @seqArray = split //, $seqNuc;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
62 }
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
63
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
64 my $gcpercent = gc_percent($globalSeq);
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
65 my ($ade, $thy, $gua, $cyt, $n, $length) = number_nuc_length_seq($file);
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
66 my ($aPercent, $tPercent, $gPercent, $cPercent, $nPercent) = nucleotid_percent($ade, $thy, $gua, $cyt, $n, $length);
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
67
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
68 my $atgcRatio = atgc_ratio($ade, $thy, $gua, $cyt);
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
69
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
70 my @percentList = ($aPercent, $tPercent, $gPercent, $cPercent, $nPercent);
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
71
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
72 my $variance = shift_data_variance(@percentList);
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
73 my $nucleScore = nucle_score($variance, $gcpercent, $atgcRatio, $length);
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
74 #my $entropy = entropy($globalSeq);
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
75
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
76 #print "The sequence length for $file is: $length\n";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
77 #print "A percent: $aPercent\n";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
78 #print "T percent: $tPercent\n";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
79 #print "G percent: $gPercent\n";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
80 #print "C percent: $cPercent\n";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
81 #print "N percent: $nPercent\n";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
82
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
83 #print "GC percent: $gcpercent\n";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
84
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
85 #print "AT/GC ratio: $atgcRatio\n";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
86
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
87 #print "NucleScore: $nucleScore\n";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
88
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
89 #print "Shannon Entropy: $entropy\n\n";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
90
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
91 #print "3 digits:\n";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
92 my @trinucs=($globalSeq=~/(?=(.{3}))/g);
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
93 my %tri_count=();
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
94 $tri_count{$_}++ for @trinucs;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
95 #print $_,":",$tri_count{$_},"\n" for sort keys(%tri_count);
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
96 #print "\n2 digits:\n";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
97 my @trinucs2=($globalSeq=~/(?=(.{2}))/g);
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
98 my %tri_count2=();
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
99 $tri_count2{$_}++ for @trinucs2;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
100 #print $_,":",$tri_count2{$_},"\n" for sort keys(%tri_count2);
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
101
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
102 my $atg = $tri_count{'ATG'};
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
103 my $tga = $tri_count{'TGA'};
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
104 my $tag = $tri_count{'TAG'};
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
105 my $taa = $tri_count{'TAA'};
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
106
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
107 #print "--------------------------------------\n\n";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
108
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
109 my $label = basename($file);
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
110
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
111
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
112 #Summary file
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
113 #print RECAP "$file\t$aPercent\t$tPercent\t$cPercent\t$gPercent\t$gcpercent\t$atgcRatio\t$nucleScore\t$entropy\t$aaa\t$aat\n";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
114 print "$label\t$aPercent\t$tPercent\t$cPercent\t$gPercent\t$gcpercent\t$atgcRatio\t$nucleScore\t$atg\t$tga\t$tag\t$taa\t$length\n";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
115 #}
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
116 }
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
117 #close (RECAP) or die "close file error : $!";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
118 #}
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
119
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
120 #my $end = time();
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
121
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
122 #my $total = $end - $start;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
123
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
124 #print "***** Total time (in seconds) is: $total *****\n";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
125
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
126 #------------------------------------------------------------------------------
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
127 # number nucleotid and length
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
128 sub number_nuc_length_seq {
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
129 my ($fastaFile) = @_;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
130 my $ade = 0;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
131 my $thy = 0;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
132 my $gua = 0;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
133 my $cyt = 0;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
134 my $n = 0;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
135 my $length = 0;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
136
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
137 open (FASTA, "<", $fastaFile) or die "Could not open $!";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
138 while (<FASTA>) {
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
139 chomp;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
140 if ($_ !~ />/) {
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
141 my @seq = split //, $_;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
142
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
143 for my $nuc (@seq) {
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
144 $length +=1 ;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
145 if ($nuc =~ /a/i) {$ade+=1;}
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
146 elsif ($nuc =~ /t/i) {$thy+=1;}
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
147 elsif ($nuc =~ /g/i) {$gua+=1;}
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
148 elsif ($nuc =~ /c/i) {$cyt+=1;}
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
149 elsif ($nuc =~ /n/i) {$n+=1;}
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
150 }
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
151 }
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
152 }
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
153 close(FASTA) or die "Error close file :$!";
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
154 return ($ade, $thy, $gua, $cyt, $n, $length);
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
155
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
156 }
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
157
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
158 #------------------------------------------------------------------------------
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
159 # compute percentage of nucleotid
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
160 sub nucleotid_percent {
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
161 my($ade, $thy, $gua, $cyt, $n, $length) = @_;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
162
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
163 my $adePercent = $ade / $length * 100;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
164 my $thyPercent = $thy / $length * 100;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
165 my $guaPercent = $gua / $length * 100;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
166 my $cytPercent = $cyt / $length * 100;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
167 my $nPercent = $n / $length * 100;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
168
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
169 return ($adePercent, $thyPercent, $guaPercent, $cytPercent, $nPercent);
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
170
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
171 }
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
172
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
173 #------------------------------------------------------------------------------
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
174 # compute GC pourcent
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
175 sub gc_percent {
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
176 my ($seq) = @_;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
177
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
178 my @charSeq = split(//, uc($seq));
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
179 my %hashFlank = ();
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
180
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
181 foreach my $v (@charSeq) {
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
182 $hashFlank{$v} += 1;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
183 }
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
184
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
185 if (! $hashFlank{'G'}) { $hashFlank{'G'} = 0;}
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
186 if (! $hashFlank{'C'}) { $hashFlank{'C'} = 0;}
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
187
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
188 if(length($seq) == 0) {
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
189 return 0;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
190 }
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
191 else {
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
192 return (($hashFlank{'G'} + $hashFlank{'C'}) / (length($seq))) * 100;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
193 }
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
194
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
195 }
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
196 #------------------------------------------------------------------------------
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
197 # compute ATGC ratio
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
198 sub atgc_ratio {
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
199 my ($ade, $thy, $gua, $cyt) = @_;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
200
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
201 return (($ade + $thy) / ($gua + $cyt));
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
202
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
203 }
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
204 #------------------------------------------------------------------------------
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
205 # variance
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
206 sub shift_data_variance {
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
207 my (@data) = @_;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
208
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
209 if ($#data + 1 < 2) { return 0.0; }
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
210
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
211 my $K = $data[0];
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
212 my ($n, $Ex, $Ex2) = 0.0;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
213
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
214 for my $x (@data) {
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
215 $n = $n + 1;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
216 $Ex += $x - $K;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
217 $Ex2 += ($x - $K) * ($x - $K);
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
218 }
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
219
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
220 my $variance = ($Ex2 - ($Ex * $Ex) / $n) / ($n); ## ($n - 1)
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
221
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
222 return $variance;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
223
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
224 }
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
225 #------------------------------------------------------------------------------
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
226 # nucle score
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
227 #sub nucle_score {
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
228 # my ($variance, $gcPercent, $atgcRatio, $length) = @_;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
229 #
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
230 # return (($variance * $gcPercent * $atgcRatio) / $length);
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
231 #}
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
232 sub nucle_score {
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
233 my ($variance, $gcPercent, $atgcRatio, $length) = @_;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
234 return log2(($variance * $gcPercent * $atgcRatio ** (3)) / sqrt($length));
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
235 }
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
236
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
237 #------------------------------------------------------------------------------
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
238 sub log2 {
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
239 my $n = shift;
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
240 return (log($n) / log(2));
82dce1eb9074 Uploaded
dcouvin
parents:
diff changeset
241 }