annotate egglib/CalculateDiversityIndexes.pl @ 1:420b57c3c185 draft

Uploaded
author dereeper
date Fri, 10 Jul 2015 04:39:30 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
1 #!/usr/bin/perl
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
2
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
3 use strict;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
4 use Switch;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
5 use Getopt::Long;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
6 use Bio::SeqIO;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
7
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
8
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
9 my $usage = qq~Usage:$0 <args> [<opts>]
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
10 where <args> are:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
11 -i, --input <FASTA input>
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
12 -o, --output <output filename>
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
13 -d, --directory <directory of egglib package>
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
14 ~;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
15 $usage .= "\n";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
16
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
17 my ($infile,$outfile,$dir_exe);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
18
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
19
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
20 GetOptions(
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
21 "input=s" => \$infile,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
22 "output=s" => \$outfile,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
23 "directory=s"=> \$dir_exe
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
24 );
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
25
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
26
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
27 die $usage
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
28 if ( !$infile || !$outfile || !$dir_exe);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
29
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
30
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
31 my $EGGSTATS_EXE = "$dir_exe/egglib-2.1.5/bin/eggstats";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
32
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
33 my %gene_alignments;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
34 my $in = Bio::SeqIO->new(-file => $infile , '-format' => 'Fasta');
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
35 while ( my $seq = $in->next_seq() )
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
36 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
37 my $id = $seq -> id();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
38 my $sequence = $seq -> seq();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
39 my ($gene,$ind,$num_allele) = split("_",$id);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
40 $gene_alignments{$gene}.= ">$id\n$sequence\n";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
41 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
42
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
43 open(OUT,">$outfile");
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
44 foreach my $gene(keys(%gene_alignments))
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
45 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
46 open(F,">$gene.egglib_input.fa");
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
47 print F $gene_alignments{$gene};
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
48 close(F);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
49
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
50 my $results_egglib = `$EGGSTATS_EXE $gene.egglib_input.fa`;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
51
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
52 # parse Seqlib output
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
53 if ($results_egglib)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
54 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
55 my %egglig_stats;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
56 my @eggstats = split(/^/,$results_egglib);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
57 foreach my $eggstat(@eggstats)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
58 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
59 my ($desc,$value) = split(/: /,$eggstat);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
60 chomp($value);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
61 $egglig_stats{$desc} = $value;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
62 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
63 print OUT "$gene;";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
64 print OUT $egglig_stats{"Total number of sequences"} . ";";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
65 print OUT $egglig_stats{"Total number of sites"} . ";";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
66 print OUT $egglig_stats{"Number of analyzed sites"} . ";";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
67 print OUT $egglig_stats{"S"} . ";";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
68 print OUT $egglig_stats{"thetaW"} . ";";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
69 print OUT $egglig_stats{"Pi"} . ";";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
70 print OUT $egglig_stats{"D"} . ";";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
71 print OUT $egglig_stats{"number of haplotypes"} . ";";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
72 print OUT $egglig_stats{"haplotypes diversity"} . ";";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
73 print OUT $egglig_stats{"Fay and Wu H"} . ";";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
74 print OUT $egglig_stats{"Fst"} . ";";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
75 print OUT $egglig_stats{"Snn"} . ";";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
76 print OUT "\n";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
77 unlink("$gene.egglib_input.fa");
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
78 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
79 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
80 close(OUT);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
81