annotate gfapts/gfap_r1.0_known_var_finder.pl~ @ 1:028f435b6cfb draft default tip

Uploaded
author rdaveau
date Fri, 03 Aug 2012 05:50:41 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
1 #!/usr/bin/perl
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
2
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
3 use strict;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
4 #use lib 'inc/perlmod';
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
5 #use ngsutil qw[ :DEFAULT &varscan ];
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
6 use warnings FATAL => qw[ numeric uninitialized ];
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
7 use File::Basename;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
8 use Getopt::Long;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
9
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
10 #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
11 # ngsutil.pm
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
12 sub explode_varcall{
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
13 my $N=0;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
14 $_=shift @_ foreach my($POS, $REF, $ALT);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
15 $_=$POS foreach my($START, $END);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
16 my(@length, @range, @idx, @VAR, @POS);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
17 @{$_}=() foreach (\@length, \@range, \@idx, \@VAR, \@POS);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
18 push @length, length($_) foreach ($REF, $ALT);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
19 @range=sort{ $a<=>$b } @length;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
20 if($range[0]==1){
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
21 if($range[1]!=1){
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
22 foreach ($REF, $ALT){
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
23 $_=substr($_, 1);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
24 $_=~s/^$/-/;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
25 }
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
26 if($length[0]!=1){
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
27 $END+=$length[0]-1;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
28 $START++;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
29 }
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
30 }
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
31 push @POS, $START, $END;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
32 push @VAR, $REF, $ALT;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
33 }else{
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
34 my @N=();
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
35 undef $_ foreach my ($i, $VAR);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
36 $_-=2 foreach (@length, @range);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
37 $_++ foreach ($START, $END);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
38 $_=substr($_, 1) foreach ($REF, $ALT);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
39 my $indel='-' x ($range[1]-$range[0]);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
40 $VAR.=($_>$range[0])?
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
41 ('-'):((substr($REF, $_, 1) ne substr($ALT, $_, 1))?
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
42 0:1) for 0 .. $range[1];
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
43 $N++ while $VAR =~ /0/g;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
44 if($length[0]<$length[1]){
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
45 @VAR=($VAR);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
46 @N=($N);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
47 $N=0;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
48 undef($VAR);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
49 $VAR.=($_>$range[0])?
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
50 ('-'):((substr($REF, $length[0]-$_, 1) ne substr($ALT, $length[1]-$_, 1))?
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
51 0:1) for reverse 0 .. $range[1];
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
52 $N++ while $VAR =~ /0/g;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
53 if($N>=$N[0]){ $N=shift(@N); $VAR=shift(@VAR); }
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
54 else{ $REF=$indel . $REF; }
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
55 }else{ $ALT.=$indel; }
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
56 foreach (qw[ 0 \- ]){
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
57 push @idx, [ $-[0], $+[0]-$-[0] ] while ($VAR =~ /$_+/g);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
58 }
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
59 @{$_}=() foreach (\@VAR, \@POS);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
60 foreach my $k (@idx){
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
61 push @VAR, substr($_, ${$k}[0], ${$k}[1]) || '-' foreach ($REF, $ALT);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
62 push @POS, ${$k}[0], sum(@{$k})-1;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
63 }
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
64 $_+=$START foreach @POS;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
65 $_=~s/\-+/\-/ foreach @VAR;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
66 for($i=0; $i<$#POS; $i+=2){ $POS[$i+1]=$POS[$i] if $VAR[$i] eq '-'; }
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
67 }
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
68 return(\@POS, \@VAR);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
69 }
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
70
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
71 sub varscan{
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
72 $_=shift @_ foreach my($kname, $fpath, $href);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
73 my($k, @buffer);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
74 open IN, "<$fpath" or die $!;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
75 while(<IN>){
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
76 next if /^#/;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
77 chomp;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
78 @buffer=split /\s+/, $_;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
79 next if !exists $$href{($k=join(':', @buffer[0..2]))};
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
80 next if $$href{$k}->{ref} !~ $buffer[3];
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
81 next if $$href{$k}->{alt} !~ $buffer[4];
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
82 splice(@buffer, 0, 5);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
83 $$href{$k}->{$kname}=join(':', @buffer);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
84 }
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
85 close IN;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
86 }
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
87 #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
88
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
89 my($varfile, $buildver, $outdir, $dir_1000g, $dir_dbsnp, $dir_cosmic, $release_1000g, $release_dbsnp, $release_cosmic, $outfile, $k, @buffer, @varlist, %opts, %varlist);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
90
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
91 GetOptions(\%opts, "varfile=s", "buildver=s", "outdir=s", "dir_1000g=s", "dir_dbsnp=s", "dir_cosmic=s", "release_1000g=s", "release_dbsnp=s", "release_cosmic=s", "outfile=s");
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
92 $varfile = $opts{varfile};
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
93 $buildver = $opts{buildver};
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
94 $outdir = $opts{outdir};
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
95 $dir_1000g = $opts{dir_1000g};
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
96 $dir_dbsnp = $opts{dir_dbsnp};
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
97 $dir_cosmic = $opts{dir_cosmic};
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
98 $release_1000g = $opts{release_1000g};
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
99 $release_dbsnp = $opts{release_dbsnp};
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
100 $release_cosmic = $opts{release_cosmic};
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
101 $outfile = $opts{outfile};
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
102
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
103 my $fname = readlink($varfile) || $varfile;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
104 $fname = basename($fname);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
105
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
106 my %k=(
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
107 '1000g' => {
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
108 'dir' => $dir_1000g, 'release' => $release_1000g, 'value' => join(':', ('0.00000')x5), 'header' => join(':', 'AF_ALL', 'AF_AFR', 'AF_AMR', 'AF_ASN', 'AF_EUR')
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
109 }, 'dbsnp' => {
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
110 'dir' => $dir_dbsnp, 'release' => $release_dbsnp, 'value' => join(':', ('na')x2), 'header' => join(':', 'rs', 'dbsnp')
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
111 }, 'cosmic_var' => {
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
112 'dir' => $dir_cosmic, 'release' => $release_cosmic, 'value' => join(':', '0.00000', 'na'), 'header' => join(':', 'AF_COS', 'cid')
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
113 }
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
114 );
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
115
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
116 my %legend=(
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
117 'chr' => 'chromosome identifier',
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
118 'start' => "${buildver} 1-based start position",
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
119 'end' => "${buildver} 1-based end position",
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
120 'ref' => 'reference allele',
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
121 'alt' => 'alternate allele',
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
122 'QC' => 'Phred-scaled call quality',
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
123 'NRF' => '#reads consistent w/ the reference allele on the F-strand',
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
124 'NRR' => '#reads consistent w/ the reference allele on the R-strand',
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
125 'NAF' => '#reads consistent w/ the alternate allele on the F-strand',
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
126 'NAR' => '#reads consistent w/ the alternate allele on the R-strand',
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
127 'DP' => 'total #reads in call ie. NRF+NRR+NAF+NAR',
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
128 'AD' => 'total #reads consistent w/ the alternate allele ie. NAF+NAR',
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
129 'AF' => 'alternate allele ratio ie. AD/DP',
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
130 'VCF.FILTER' => 'FILTER field from the input vcf file',
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
131 'DPT.FILTER' => 'check for heterogeneous depth in substituted blocks',
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
132 'VAR.FILTER' => 'GFAP default FILTER to discriminate between TP and FP variants',
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
133 'P.str' => 'NRF+NAF vs. NRR+NAR binomial test P-value ie. total strand bias',
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
134 'P.ref' => 'NRF vs. NRR binomial test P-value ie. reference allele strand bias',
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
135 'P.alt' => 'NAF vs. NAR binomial test P-value ie. alternate allele strand bias',
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
136 'AF_ALL' => "global AF in ${release_1000g} 1000g data",
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
137 'AF_AFR' => "AF in AFR ${release_1000g} 1000g data",
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
138 'AF_AMR' => "AF in AMR ${release_1000g} 1000g data",
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
139 'AF_ASN' => "AF in ASN ${release_1000g} 1000g data",
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
140 'AF_EUR' => "AF in EUR ${release_1000g} 1000g data",
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
141 'AF_COS' => "AF in ${release_cosmic} cosmic data",
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
142 'rs' => "dbsnp rs identifier(s) from ${release_dbsnp} release",
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
143 'dbsnp' => "dbsnp build version(s) from ${release_dbsnp} release",
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
144 'cid' => "cosmic mutation identifier from ${release_cosmic} release"
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
145 );
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
146 my @header=('chr', 'start', 'end', 'ref', 'alt', 'DPT.FILTER', 'QC', 'NRF', 'NRR', 'NAF', 'NAR', 'VCF.FILTER', 'P.str', 'P.ref', 'P.alt', 'DP', 'AD', 'AF', 'VAR.FILTER');
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
147 my @k=qw[ 1000g dbsnp cosmic_var ];
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
148
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
149 open IN, "<$varfile" or die $!;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
150 while(<IN>){
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
151 chomp;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
152 @buffer=split /\s+/, $_;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
153 $buffer[0]=~s/^chr(.+)$/$1/;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
154 push @varlist, ($k=join(':', @buffer[0..2]));
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
155 shift(@buffer) for 0..2;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
156 $varlist{$k}->{$_}=shift(@buffer) foreach qw[ ref alt ];
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
157 $varlist{$k}->{cov}=join(':', (($buffer[0] eq 'unk')?'SKIP':'PASS'), @buffer[1..$#buffer]);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
158 }
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
159 close IN;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
160
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
161 foreach $k (@k){
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
162 push @header, split(/:/, $k{$k}->{header});
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
163 varscan($k, $k{$k}->{file}, \%varlist);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
164 }
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
165
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
166 my @idx=(0..4,7..10,15..17,6,12..14,11,5,18..23,26..27,24..25);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
167 open OUT, ">${outdir}/${fname}.dbi" or die $!;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
168 print OUT '#', join(' = ', $_, $legend{$_}), "\n" foreach @header[@idx];
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
169 print OUT '#', join("\t", @header[@idx]), "\n";
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
170 foreach $k (@varlist){
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
171 @buffer=(split(/:/, 'chr'.$k), $varlist{$k}->{ref}, $varlist{$k}->{alt});
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
172 push @buffer, split(/:/, ($varlist{$k}->{$_} || $k{$_}->{value})) foreach ('cov', @k);
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
173 print OUT join("\t", @buffer[@idx]), "\n";
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
174 }
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
175 close OUT;
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
176
028f435b6cfb Uploaded
rdaveau
parents:
diff changeset
177 system "rm $outfile; ln -s ${outdir}/${fname}.dbi $outfile" and die $!;