annotate gfapts/gfap_r1.0_known_var_finder.pl @ 0:f753b30013e6 draft

Uploaded
author rdaveau
date Fri, 29 Jun 2012 10:20:55 -0400
parents
children 028f435b6cfb
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
1 #!/usr/bin/perl
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
2
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
3 use strict;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
4 use lib 'inc/perlmod';
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
5 use ngsutil qw[ :DEFAULT &varscan ];
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
6 use warnings FATAL => qw[ numeric uninitialized ];
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
7 use File::Basename;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
8 use Getopt::Long;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
9
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
10 my($varfile, $buildver, $outdir, $dir_1000g, $dir_dbsnp, $dir_cosmic, $release_1000g, $release_dbsnp, $release_cosmic, $outfile, $k, @buffer, @varlist, %opts, %varlist);
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
11
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
12 GetOptions(\%opts, "varfile=s", "buildver=s", "outdir=s", "dir_1000g=s", "dir_dbsnp=s", "dir_cosmic=s", "release_1000g=s", "release_dbsnp=s", "release_cosmic=s", "outfile=s");
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
13 $varfile = $opts{varfile};
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
14 $buildver = $opts{buildver};
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
15 $outdir = $opts{outdir};
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
16 $dir_1000g = $opts{dir_1000g};
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
17 $dir_dbsnp = $opts{dir_dbsnp};
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
18 $dir_cosmic = $opts{dir_cosmic};
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
19 $release_1000g = $opts{release_1000g};
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
20 $release_dbsnp = $opts{release_dbsnp};
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
21 $release_cosmic = $opts{release_cosmic};
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
22 $outfile = $opts{outfile};
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
23
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
24 my $fname = readlink($varfile) || $varfile;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
25 $fname = basename($fname);
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
26
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
27 my %k=(
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
28 '1000g' => {
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
29 'dir' => $dir_1000g, 'release' => $release_1000g, 'value' => join(':', ('0.00000')x5), 'header' => join(':', 'AF_ALL', 'AF_AFR', 'AF_AMR', 'AF_ASN', 'AF_EUR')
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
30 }, 'dbsnp' => {
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
31 'dir' => $dir_dbsnp, 'release' => $release_dbsnp, 'value' => join(':', ('na')x2), 'header' => join(':', 'rs', 'dbsnp')
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
32 }, 'cosmic_var' => {
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
33 'dir' => $dir_cosmic, 'release' => $release_cosmic, 'value' => join(':', '0.00000', 'na'), 'header' => join(':', 'AF_COS', 'cid')
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
34 }
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
35 );
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
36
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
37 my %legend=(
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
38 'chr' => 'chromosome identifier',
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
39 'start' => "${buildver} 1-based start position",
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
40 'end' => "${buildver} 1-based end position",
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
41 'ref' => 'reference allele',
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
42 'alt' => 'alternate allele',
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
43 'QC' => 'Phred-scaled call quality',
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
44 'NRF' => '#reads consistent w/ the reference allele on the F-strand',
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
45 'NRR' => '#reads consistent w/ the reference allele on the R-strand',
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
46 'NAF' => '#reads consistent w/ the alternate allele on the F-strand',
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
47 'NAR' => '#reads consistent w/ the alternate allele on the R-strand',
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
48 'DP' => 'total #reads in call ie. NRF+NRR+NAF+NAR',
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
49 'AD' => 'total #reads consistent w/ the alternate allele ie. NAF+NAR',
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
50 'AF' => 'alternate allele ratio ie. AD/DP',
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
51 'VCF.FILTER' => 'FILTER field from the input vcf file',
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
52 'DPT.FILTER' => 'check for heterogeneous depth in substituted blocks',
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
53 'VAR.FILTER' => 'GFAP default FILTER to discriminate between TP and FP variants',
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
54 'P.str' => 'NRF+NAF vs. NRR+NAR binomial test P-value ie. total strand bias',
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
55 'P.ref' => 'NRF vs. NRR binomial test P-value ie. reference allele strand bias',
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
56 'P.alt' => 'NAF vs. NAR binomial test P-value ie. alternate allele strand bias',
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
57 'AF_ALL' => "global AF in ${release_1000g} 1000g data",
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
58 'AF_AFR' => "AF in AFR ${release_1000g} 1000g data",
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
59 'AF_AMR' => "AF in AMR ${release_1000g} 1000g data",
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
60 'AF_ASN' => "AF in ASN ${release_1000g} 1000g data",
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
61 'AF_EUR' => "AF in EUR ${release_1000g} 1000g data",
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
62 'AF_COS' => "AF in ${release_cosmic} cosmic data",
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
63 'rs' => "dbsnp rs identifier(s) from ${release_dbsnp} release",
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
64 'dbsnp' => "dbsnp build version(s) from ${release_dbsnp} release",
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
65 'cid' => "cosmic mutation identifier from ${release_cosmic} release"
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
66 );
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
67 my @header=('chr', 'start', 'end', 'ref', 'alt', 'DPT.FILTER', 'QC', 'NRF', 'NRR', 'NAF', 'NAR', 'VCF.FILTER', 'P.str', 'P.ref', 'P.alt', 'DP', 'AD', 'AF', 'VAR.FILTER');
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
68 my @k=qw[ 1000g dbsnp cosmic_var ];
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
69
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
70 open IN, "<$varfile" or die $!;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
71 while(<IN>){
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
72 chomp;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
73 @buffer=split /\s+/, $_;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
74 $buffer[0]=~s/^chr(.+)$/$1/;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
75 push @varlist, ($k=join(':', @buffer[0..2]));
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
76 shift(@buffer) for 0..2;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
77 $varlist{$k}->{$_}=shift(@buffer) foreach qw[ ref alt ];
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
78 $varlist{$k}->{cov}=join(':', (($buffer[0] eq 'unk')?'SKIP':'PASS'), @buffer[1..$#buffer]);
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
79 }
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
80 close IN;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
81
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
82 foreach $k (@k){
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
83 push @header, split(/:/, $k{$k}->{header});
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
84 varscan($k, $k{$k}->{file}, \%varlist);
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
85 }
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
86
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
87 my @idx=(0..4,7..10,15..17,6,12..14,11,5,18..23,26..27,24..25);
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
88 open OUT, ">${outdir}/${fname}.dbi" or die $!;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
89 print OUT '#', join(' = ', $_, $legend{$_}), "\n" foreach @header[@idx];
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
90 print OUT '#', join("\t", @header[@idx]), "\n";
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
91 foreach $k (@varlist){
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
92 @buffer=(split(/:/, 'chr'.$k), $varlist{$k}->{ref}, $varlist{$k}->{alt});
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
93 push @buffer, split(/:/, ($varlist{$k}->{$_} || $k{$_}->{value})) foreach ('cov', @k);
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
94 print OUT join("\t", @buffer[@idx]), "\n";
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
95 }
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
96 close OUT;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
97
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
98 system "rm $outfile; ln -s ${outdir}/${fname}.dbi $outfile" and die $!;