comparison gfapts/gfap_r1.0_cdsvar_functional_annotater.pl @ 0:f753b30013e6 draft

Uploaded
author rdaveau
date Fri, 29 Jun 2012 10:20:55 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:f753b30013e6
1 #!/usr/bin/perl
2
3 use strict;
4 use warnings FATAL => qw[ numeric uninitialized ];
5 use List::Util qw[ sum min max ];
6 use List::MoreUtils qw[ first_index ];
7 use File::Basename;
8 use Getopt::Long;
9
10 my($varfile, $buildver, $dbdir, $release, $outdir, $outfile, $max, $i, $k);
11 my(@buffer, @legend, @header, @k, @score, @tools, @Temp, %buffer, %AAS, %dbscore, %dbtools, %opts);
12
13 GetOptions(\%opts, "varfile=s", "buildver=s", "dbdir=s", "release=s", "outdir=s", "outfile=s");
14 $varfile = $opts{varfile};
15 $buildver = $opts{buildver};
16 $dbdir = $opts{dbdir};
17 $release = $opts{release};
18 $outdir = $opts{outdir};
19 $outfile = $opts{outfile};
20
21 my $fname = readlink($varfile) || $varfile;
22 my $dbfile="${dbdir}/${buildver}_dbnsfp_${release}.txt";
23 $fname = basename($fname);
24
25 open IN, "<$varfile" or die $!;
26 open OUT, ">${outdir}/${fname}.Temp" or die $!;
27 while(<IN>){
28 push @legend, $1 and next if /^#(.+=.+)/;
29 next if $_!~/\b(?:s(?:g|l))|ns\b/;
30 next if /\s+-\s+/;
31 /^(?:chr)*(\S+)\s+(\S+)/;
32 @{$buffer{($k=join('_', $1, $2))}->{dbnsfp}}=();
33 push @k, $k;
34 print OUT $_;
35 }
36 close IN;
37 close OUT;
38
39 $i=first_index{ /^annot/ } @legend;
40 @_=$legend[$i]=~/((?:s(?:g|l)|ns):[^;|\s]+)/g;
41 $legend[$i]=join(' = ', 'annot', join('; ', @_));
42 push @legend, (
43 'AAS = Amino Acid Substitution(s)',
44 "FIS = Functional Impact Score(s) from dbnsfp ${release} release",
45 "OCC = number of tools from which FIS was/were calculated",
46 "FIS.max = highest score among FIS",
47 "OCC.max = number of tools from which FIS.max was calculated",
48 "PRED = qualitative ternary classifier ie. [L]ow; [M]edium; [H]igh"
49 );
50 foreach (@legend){
51 /^(\S+)/;
52 push @header, $1;
53 }
54
55 open IN, "<$dbfile" or die $!;
56 while(<IN>){
57 next if /^#/;
58 /^(\S+)\s+(\S+)(?:\s+\S+){2}\s+(.+)/;
59 next if !exists $buffer{($k=join('_', $1, $2))};
60 push @{$buffer{$k}->{dbnsfp}}, join(':', split /\t/, $3);
61 }
62 close IN;
63 open IN, "<${outdir}/${fname}.Temp" or die $!;
64 open OUT, ">${outdir}/${fname}.dbnsfp" or die $!;
65 print OUT "#", $_, "\n" foreach @legend;
66 print OUT "#", join("\t", @header), "\n";
67 foreach $k (@k){
68 $i=0;
69 $_=readline(IN);
70 chomp;
71 @buffer=split /\s+/, $_;
72 %{$_}=() foreach (\%AAS, \%dbscore, \%dbtools);
73 foreach (split(/[;\|]/, $buffer[-1])){
74 $AAS{$1.$2}++ if /^p\.(\w{1})\d+(\w{1})$/;
75 }
76 if($#{$buffer{$k}->{dbnsfp}}<0){
77 unshift @buffer, (%AAS)?(join(':', keys %AAS)):('na'), (join(':', ('na') x max(scalar(keys %AAS), 1))) x 2;
78 }elsif(%AAS){
79 foreach (@{$buffer{$k}->{dbnsfp}}){
80 @Temp=split /:/, $_;
81 $k=shift @Temp;
82 @{$_}=split(/;/, pop @Temp) foreach (\@tools, \@score);
83 foreach (split /;/, shift @Temp){
84 $dbscore{$k.$_}=shift @score;
85 $dbtools{$k.$_}=shift @tools;
86 }
87 }
88 foreach (keys %AAS){
89 push @score, $dbscore{$_} || 'na';
90 push @tools, $dbtools{$_} || 'na';
91 }
92 unshift @buffer, join(':', keys %AAS), join(':', @score), join(':', @tools);
93 }
94 push @buffer, shift @buffer for 1..3;
95 @{$_}=grep{ !/na/ } split(/:/, $buffer[--$i]) foreach (\@tools, \@score);
96 $max=max(@score) || 'na';
97 push @buffer, (($max ne 'na')?($score[($i=first_index{ $max } @score)], $tools[$i], ($max<.3)?'L':($max<.7)?'M':'H'):(('na')x3));
98 print OUT join("\t", @buffer), "\n";
99 }
100 close IN;
101 close OUT;
102 system "rm ${outdir}/${fname}*Temp $outfile; ln -s ${outdir}/${fname}.dbnsfp $outfile" and die $!;