annotate gfapts/gfap_r1.0_cdsvar_functional_annotater.pl @ 0:f753b30013e6 draft

Uploaded
author rdaveau
date Fri, 29 Jun 2012 10:20:55 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
1 #!/usr/bin/perl
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
2
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
3 use strict;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
4 use warnings FATAL => qw[ numeric uninitialized ];
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
5 use List::Util qw[ sum min max ];
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
6 use List::MoreUtils qw[ first_index ];
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
7 use File::Basename;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
8 use Getopt::Long;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
9
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
10 my($varfile, $buildver, $dbdir, $release, $outdir, $outfile, $max, $i, $k);
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
11 my(@buffer, @legend, @header, @k, @score, @tools, @Temp, %buffer, %AAS, %dbscore, %dbtools, %opts);
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
12
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
13 GetOptions(\%opts, "varfile=s", "buildver=s", "dbdir=s", "release=s", "outdir=s", "outfile=s");
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
14 $varfile = $opts{varfile};
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
15 $buildver = $opts{buildver};
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
16 $dbdir = $opts{dbdir};
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
17 $release = $opts{release};
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
18 $outdir = $opts{outdir};
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
19 $outfile = $opts{outfile};
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
20
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
21 my $fname = readlink($varfile) || $varfile;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
22 my $dbfile="${dbdir}/${buildver}_dbnsfp_${release}.txt";
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
23 $fname = basename($fname);
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
24
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
25 open IN, "<$varfile" or die $!;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
26 open OUT, ">${outdir}/${fname}.Temp" or die $!;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
27 while(<IN>){
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
28 push @legend, $1 and next if /^#(.+=.+)/;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
29 next if $_!~/\b(?:s(?:g|l))|ns\b/;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
30 next if /\s+-\s+/;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
31 /^(?:chr)*(\S+)\s+(\S+)/;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
32 @{$buffer{($k=join('_', $1, $2))}->{dbnsfp}}=();
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
33 push @k, $k;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
34 print OUT $_;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
35 }
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
36 close IN;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
37 close OUT;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
38
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
39 $i=first_index{ /^annot/ } @legend;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
40 @_=$legend[$i]=~/((?:s(?:g|l)|ns):[^;|\s]+)/g;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
41 $legend[$i]=join(' = ', 'annot', join('; ', @_));
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
42 push @legend, (
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
43 'AAS = Amino Acid Substitution(s)',
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
44 "FIS = Functional Impact Score(s) from dbnsfp ${release} release",
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
45 "OCC = number of tools from which FIS was/were calculated",
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
46 "FIS.max = highest score among FIS",
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
47 "OCC.max = number of tools from which FIS.max was calculated",
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
48 "PRED = qualitative ternary classifier ie. [L]ow; [M]edium; [H]igh"
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
49 );
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
50 foreach (@legend){
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
51 /^(\S+)/;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
52 push @header, $1;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
53 }
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
54
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
55 open IN, "<$dbfile" or die $!;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
56 while(<IN>){
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
57 next if /^#/;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
58 /^(\S+)\s+(\S+)(?:\s+\S+){2}\s+(.+)/;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
59 next if !exists $buffer{($k=join('_', $1, $2))};
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
60 push @{$buffer{$k}->{dbnsfp}}, join(':', split /\t/, $3);
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
61 }
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
62 close IN;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
63 open IN, "<${outdir}/${fname}.Temp" or die $!;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
64 open OUT, ">${outdir}/${fname}.dbnsfp" or die $!;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
65 print OUT "#", $_, "\n" foreach @legend;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
66 print OUT "#", join("\t", @header), "\n";
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
67 foreach $k (@k){
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
68 $i=0;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
69 $_=readline(IN);
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
70 chomp;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
71 @buffer=split /\s+/, $_;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
72 %{$_}=() foreach (\%AAS, \%dbscore, \%dbtools);
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
73 foreach (split(/[;\|]/, $buffer[-1])){
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
74 $AAS{$1.$2}++ if /^p\.(\w{1})\d+(\w{1})$/;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
75 }
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
76 if($#{$buffer{$k}->{dbnsfp}}<0){
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
77 unshift @buffer, (%AAS)?(join(':', keys %AAS)):('na'), (join(':', ('na') x max(scalar(keys %AAS), 1))) x 2;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
78 }elsif(%AAS){
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
79 foreach (@{$buffer{$k}->{dbnsfp}}){
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
80 @Temp=split /:/, $_;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
81 $k=shift @Temp;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
82 @{$_}=split(/;/, pop @Temp) foreach (\@tools, \@score);
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
83 foreach (split /;/, shift @Temp){
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
84 $dbscore{$k.$_}=shift @score;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
85 $dbtools{$k.$_}=shift @tools;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
86 }
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
87 }
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
88 foreach (keys %AAS){
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
89 push @score, $dbscore{$_} || 'na';
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
90 push @tools, $dbtools{$_} || 'na';
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
91 }
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
92 unshift @buffer, join(':', keys %AAS), join(':', @score), join(':', @tools);
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
93 }
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
94 push @buffer, shift @buffer for 1..3;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
95 @{$_}=grep{ !/na/ } split(/:/, $buffer[--$i]) foreach (\@tools, \@score);
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
96 $max=max(@score) || 'na';
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
97 push @buffer, (($max ne 'na')?($score[($i=first_index{ $max } @score)], $tools[$i], ($max<.3)?'L':($max<.7)?'M':'H'):(('na')x3));
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
98 print OUT join("\t", @buffer), "\n";
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
99 }
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
100 close IN;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
101 close OUT;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
102 system "rm ${outdir}/${fname}*Temp $outfile; ln -s ${outdir}/${fname}.dbnsfp $outfile" and die $!;