Mercurial > repos > dereeper > sniplay
comparison egglib/CalculateDiversityIndexes.pl @ 1:420b57c3c185 draft
Uploaded
author | dereeper |
---|---|
date | Fri, 10 Jul 2015 04:39:30 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:3e19d0dfcf3e | 1:420b57c3c185 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 use strict; | |
4 use Switch; | |
5 use Getopt::Long; | |
6 use Bio::SeqIO; | |
7 | |
8 | |
9 my $usage = qq~Usage:$0 <args> [<opts>] | |
10 where <args> are: | |
11 -i, --input <FASTA input> | |
12 -o, --output <output filename> | |
13 -d, --directory <directory of egglib package> | |
14 ~; | |
15 $usage .= "\n"; | |
16 | |
17 my ($infile,$outfile,$dir_exe); | |
18 | |
19 | |
20 GetOptions( | |
21 "input=s" => \$infile, | |
22 "output=s" => \$outfile, | |
23 "directory=s"=> \$dir_exe | |
24 ); | |
25 | |
26 | |
27 die $usage | |
28 if ( !$infile || !$outfile || !$dir_exe); | |
29 | |
30 | |
31 my $EGGSTATS_EXE = "$dir_exe/egglib-2.1.5/bin/eggstats"; | |
32 | |
33 my %gene_alignments; | |
34 my $in = Bio::SeqIO->new(-file => $infile , '-format' => 'Fasta'); | |
35 while ( my $seq = $in->next_seq() ) | |
36 { | |
37 my $id = $seq -> id(); | |
38 my $sequence = $seq -> seq(); | |
39 my ($gene,$ind,$num_allele) = split("_",$id); | |
40 $gene_alignments{$gene}.= ">$id\n$sequence\n"; | |
41 } | |
42 | |
43 open(OUT,">$outfile"); | |
44 foreach my $gene(keys(%gene_alignments)) | |
45 { | |
46 open(F,">$gene.egglib_input.fa"); | |
47 print F $gene_alignments{$gene}; | |
48 close(F); | |
49 | |
50 my $results_egglib = `$EGGSTATS_EXE $gene.egglib_input.fa`; | |
51 | |
52 # parse Seqlib output | |
53 if ($results_egglib) | |
54 { | |
55 my %egglig_stats; | |
56 my @eggstats = split(/^/,$results_egglib); | |
57 foreach my $eggstat(@eggstats) | |
58 { | |
59 my ($desc,$value) = split(/: /,$eggstat); | |
60 chomp($value); | |
61 $egglig_stats{$desc} = $value; | |
62 } | |
63 print OUT "$gene;"; | |
64 print OUT $egglig_stats{"Total number of sequences"} . ";"; | |
65 print OUT $egglig_stats{"Total number of sites"} . ";"; | |
66 print OUT $egglig_stats{"Number of analyzed sites"} . ";"; | |
67 print OUT $egglig_stats{"S"} . ";"; | |
68 print OUT $egglig_stats{"thetaW"} . ";"; | |
69 print OUT $egglig_stats{"Pi"} . ";"; | |
70 print OUT $egglig_stats{"D"} . ";"; | |
71 print OUT $egglig_stats{"number of haplotypes"} . ";"; | |
72 print OUT $egglig_stats{"haplotypes diversity"} . ";"; | |
73 print OUT $egglig_stats{"Fay and Wu H"} . ";"; | |
74 print OUT $egglig_stats{"Fst"} . ";"; | |
75 print OUT $egglig_stats{"Snn"} . ";"; | |
76 print OUT "\n"; | |
77 unlink("$gene.egglib_input.fa"); | |
78 } | |
79 } | |
80 close(OUT); | |
81 |