annotate associate_variant_phenotypes @ 0:6411ca16916e default tip

initial commit
author Yusuf Ali <ali@yusuf.email>
date Wed, 25 Mar 2015 13:23:29 -0600
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
1 #!/usr/bin/env perl
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
2
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
3 use strict;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
4 use warnings;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
5 use Math::CDF qw(pchisq); # chi square for calculating Fisher's method of combining p-values
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
6 use File::Basename;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
7 my $dirname = dirname(__FILE__);
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
8
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
9 # configuration file stuff
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
10 my %config;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
11 my $tool_data = shift @ARGV;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
12 if(not -e "$tool_data/associate_phenotypes.loc" ){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
13 system("cp $dirname/tool-data/associate_phenotypes.loc $tool_data/associate_phenotypes.loc") >> 8 and die "Could not create config file: $!\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
14 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
15 open CONFIG, '<', "$tool_data/associate_phenotypes.loc";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
16 while(<CONFIG>){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
17 next if $_ =~ /^#/;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
18 (my $key, my $value) = split(/\s+/,$_);
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
19 $config{$key} = $value;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
20 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
21 close CONFIG;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
22 my $dbs_dir = $config{"dbs_dir"};
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
23
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
24 @ARGV == 6 or die "Usage: $0 <outfiles prefix> <annotated input> <preselected gene list of interest> <human literature terms> <mouse knockout terms> <gene ontology terms>\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
25
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
26 my $final_confident_outfiles_prefix = shift @ARGV;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
27 my $confident_input = shift @ARGV;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
28 my $preselected_genes_file = shift @ARGV;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
29 my $human_lit_file = shift @ARGV;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
30 my $mouse_knockout_file = shift @ARGV;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
31 my $gene_ontology_file = shift @ARGV;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
32
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
33 my @genes;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
34 if(-e $preselected_genes_file){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
35 open(GENES, $preselected_genes_file)
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
36 or die "Cannot open $preselected_genes_file for reading: $!\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
37 while(<GENES>){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
38 chomp;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
39 next if /^#/ or not /\S/;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
40 s/^\s+|\s+$//g; # get rid of trailing or leading spaces
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
41 push @genes, $_;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
42 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
43 close(GENES);
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
44 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
45 else{
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
46 @genes = split / or /, $preselected_genes_file;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
47 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
48 my %genes;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
49 for my $g (@genes){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
50 $genes{lc($g)} = 1;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
51 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
52
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
53 my @human_lit_query;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
54 if(-e $human_lit_file){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
55 open(HUMAN, $human_lit_file)
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
56 or die "Cannot open $human_lit_file for reading: $!\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
57 while(<HUMAN>){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
58 chomp;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
59 next if /^#/ or not /\S/;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
60 s/^\s+|\s+$//g; # get rid of trailing or leading spaces
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
61 s/\s+/ /g; # normalize any other whitespace
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
62 # to do: stem query terms? exclude stop words?
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
63 push @human_lit_query, $_;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
64 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
65 close(HUMAN);
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
66 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
67 else{
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
68 @human_lit_query = split / or /, $human_lit_file;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
69 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
70 my $human_lit_query = join(" or ", @human_lit_query, @genes);
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
71
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
72 my @mouse_knockout_query;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
73 if(-e $mouse_knockout_file){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
74 open(MOUSE, $mouse_knockout_file)
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
75 or die "Cannot open $mouse_knockout_file for reading: $!\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
76 while(<MOUSE>){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
77 chomp;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
78 next if /^#/ or not /\S/;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
79 s/^\s+|\s+$//g; # get rid of trailing or leading spaces
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
80 s/\s+/ /g; # normalize any other whitespace
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
81 # to do: stem query terms? exclude stop words?
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
82 push @mouse_knockout_query, $_;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
83 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
84 close(MOUSE);
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
85 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
86 else{
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
87 @mouse_knockout_query = split / or /, $mouse_knockout_file;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
88 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
89 my $mouse_knockout_query = join(" or ", @mouse_knockout_query);
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
90
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
91 my @go_query;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
92 if(-e $gene_ontology_file){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
93 open(GO, $gene_ontology_file)
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
94 or die "Cannot open $gene_ontology_file for reading: $!\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
95 while(<GO>){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
96 chomp;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
97 next if /^#/ or not /\S/;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
98 s/^\s+|\s+$//g; # get rid of trailing or leading spaces
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
99 s/\s+/ /g; # normalize any other whitespace
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
100 # to do: stem query terms? exclude stop words?
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
101 push @go_query, $_;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
102 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
103 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
104 else{
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
105 @go_query = split / or /, $gene_ontology_file;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
106 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
107 my $go_query = join(" or ", @go_query);
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
108 close(GO);
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
109
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
110 my @cmds;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
111
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
112 if($human_lit_query){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
113 # do pubmed first because it has to potentially download references from the internet, so better to do this with just a couple concurrent rather than a lot, which would stress the remote iHOP server
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
114 push @cmds, "$dirname/filter_by_index_gamma $dbs_dir/IHOP/ PubMed $confident_input - '$human_lit_query'";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
115 push @cmds, "$dirname/filter_by_susceptibility_loci_pipe $dbs_dir/GWAS/gwascatalog.txt - - '$human_lit_query'";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
116 push @cmds, "$dirname/filter_by_index_gamma $dbs_dir/OMIM/omim.txt. OMIM - - '$human_lit_query'";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
117 push @cmds, "$dirname/filter_by_index_gamma $dbs_dir/ClinVar/ClinVarFullRelease.xml. ClinVar - - '$human_lit_query'";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
118 push @cmds, "$dirname/filter_by_human_phenotype_ontology_pipe $dbs_dir/HPO - - '$human_lit_query'";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
119 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
120
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
121 if($mouse_knockout_query or $human_lit_query){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
122 if($mouse_knockout_query){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
123 if($human_lit_query){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
124 $mouse_knockout_query .= " or $human_lit_query";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
125 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
126 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
127 else{
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
128 $mouse_knockout_query = $human_lit_query;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
129 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
130 if($human_lit_query){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
131 push @cmds, "$dirname/filter_by_mouse_knockout_pipe $dbs_dir/MGI/2013-03-15 - - '$mouse_knockout_query'";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
132 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
133 else{
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
134 push @cmds, "$dirname/filter_by_mouse_knockout_pipe $dbs_dir/MGI/2013-03-15 $confident_input - '$mouse_knockout_query'"
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
135 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
136 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
137
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
138 if($go_query or $human_lit_query){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
139 if($go_query){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
140 if(@human_lit_query){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
141 $go_query .= " or ".join(" or ", @human_lit_query);
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
142 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
143 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
144 else{
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
145 $go_query = join(" or ", @human_lit_query);
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
146 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
147 if($mouse_knockout_query or $human_lit_query){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
148 push @cmds, "$dirname/associate_phenotypes/filter_by_gene_ontology_pipe $dbs_dir/GOA - - '$go_query'";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
149 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
150 else{
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
151 push @cmds, "$dirname/associate_phenotypes/filter_by_gene_ontology_pipe $dbs_dir/GOA $confident_input - '$go_query'";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
152 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
153 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
154
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
155 &print_final_output($final_confident_outfiles_prefix, @cmds);
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
156
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
157 # Use Fisher's Method to combine p-values from various phenotype sources into a single score for ranking
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
158 # This is an okay method to use (rather than something more complicated like Brown's method), because our
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
159 # experience with real queries is that there is surprsingly little correlation (Spearman's rank or Kendall's tau) between
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
160 # the p-values for different sources (primary or curated secondary).
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
161 sub print_final_output{
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
162 my ($final_output_prefix, @cmds) = @_;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
163
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
164 my $cmd = join("|", @cmds). "|"; # pipe output so we read the stream in the handle below
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
165 open(ORIG, $cmd)
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
166 or die "Could not run '$cmd': $!\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
167 my $header = <ORIG>;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
168 chomp $header;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
169 my @orig_header = split /\t/, $header;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
170 my ($chr_column, $pos_column, $gene_column, $hgvs_aa_column, $maf_column, $srcs_column, @pvalue_columns, @pheno_match_columns);
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
171 for(my $i = 0; $i <= $#orig_header; $i++){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
172 if($orig_header[$i] eq "Chr"){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
173 $chr_column = $i;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
174 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
175 elsif($orig_header[$i] eq "DNA From"){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
176 $pos_column = $i;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
177 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
178 elsif($orig_header[$i] eq "Gene Name"){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
179 $gene_column = $i;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
180 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
181 elsif($orig_header[$i] eq "Protein HGVS"){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
182 $hgvs_aa_column = $i;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
183 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
184 elsif($orig_header[$i] eq "Pop. freq."){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
185 $maf_column = $i;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
186 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
187 elsif($orig_header[$i] eq "Sources"){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
188 $srcs_column = $i;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
189 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
190 elsif($orig_header[$i] =~ /p-value/){ # columns of pheno association with a stat
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
191 push @pvalue_columns, $i;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
192 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
193 elsif($orig_header[$i] =~ /\(matching/){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
194 push @pheno_match_columns, $i;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
195 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
196 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
197 if(not defined $chr_column){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
198 die "Could not find the 'Chr' column in the header, aborting ($header)\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
199 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
200 elsif(not defined $pos_column){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
201 die "Could not find the 'DNA From' column in the header, aborting ($header)\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
202 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
203 elsif(not defined $hgvs_aa_column){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
204 die "Could not find the 'Protein HGVS' column in the header, aborting ($header)\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
205 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
206 elsif(not defined $maf_column){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
207 die "Could not find the 'Pop. freq.' column in the header, aborting ($header)\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
208 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
209 # Sources is optional
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
210
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
211 # all other headers from other output files generated will be appended to the original ones
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
212 my @final_header = (@orig_header, "Combined phenotype relevance P-value");
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
213 if(@genes){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
214 push @final_header, "Targeted Gene?";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
215 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
216 my %lines; # chr -> position -> [dataline1, dataline2, ...]
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
217 my %source; # no. lines per variant source
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
218 while(<ORIG>){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
219 chomp;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
220 next unless /\S/; # ignore blank lines
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
221 my @F = split /\t/, $_, -1; # keep trailing blank fields
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
222 my $chr = $F[$chr_column];
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
223 $chr =~ s/^chr//; # helps for sorting purposes
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
224 my $pos = $F[$pos_column];
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
225 $pos =~ s/-.*$//; # CNVs have a range
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
226 $lines{$chr} = {} if not exists $lines{$F[$chr_column]};
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
227 $lines{$chr}->{$pos} = [] if not exists $lines{$F[$chr_column]}->{$pos};
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
228 my @final_dataline = @F; # fields that are the same in all files since they were in the original
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
229 for(my $i = 0; $i < $#final_dataline; $i++){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
230 $final_dataline[$i] = "" if not defined $final_dataline[$i];
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
231 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
232 # Create aggregate phenotype relevance score using Fisher's method
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
233 # A combined p-value for k p-values (P1...Pk) is calculated using a chi-square value (with 2k degrees of freedom) derived by -2*sum(ln(Pi), i=1..k)
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
234 my $chi_sq = 0;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
235 my $num_pvalues = 0;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
236 my $last_pvalue = 1;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
237 for my $pvalue_index (@pvalue_columns){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
238 next if $F[$pvalue_index] eq "";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
239 $last_pvalue = $F[$pvalue_index];
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
240 $F[$pvalue_index] = 0.00001 if not $F[$pvalue_index]; # avoids log(0) issue
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
241 $num_pvalues++;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
242 $chi_sq += log($F[$pvalue_index]);
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
243 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
244 my $fisher_pvalue = 1;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
245 if($num_pvalues > 1){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
246 $chi_sq *= -2;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
247 my $p = pchisq($chi_sq, 2*scalar(@pvalue_columns));
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
248 if(not defined $p){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
249 print STDERR "($num_pvalues) No X2 test value for $chi_sq (";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
250 for my $pvalue_index (@pvalue_columns){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
251 if($F[$pvalue_index] eq ""){print STDERR "NA "}
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
252 else{print STDERR $F[$pvalue_index], " "}
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
253 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
254 print STDERR ")\n$_\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
255 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
256 $fisher_pvalue = 1-$p;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
257 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
258 elsif($num_pvalues == 1){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
259 $fisher_pvalue = $last_pvalue; # no multiple testing correction
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
260 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
261 else{
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
262 for my $match_column (@pheno_match_columns){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
263 next if $F[$match_column] eq ""; # give a token amount of positive score to ontology term matches
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
264 for my $match (split /\/\/|;/, $F[$match_column]){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
265 last if $fisher_pvalue <= 0.001; # only make better if not realy close to zero anyway
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
266 $fisher_pvalue -= 0.001;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
267 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
268 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
269 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
270 push @final_dataline, abs($fisher_pvalue);
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
271 if(@genes){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
272 push @final_dataline, (grep({exists $genes{$_}} split(/; /, lc($F[$gene_column]))) ? "yes" : "no");
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
273 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
274 push @{$lines{$chr}->{$pos}}, \@final_dataline;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
275
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
276 next unless defined $srcs_column and $F[$srcs_column] =~ /(?:^|\+| )(\S+?)(?=;|$)/;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
277 $source{$1}++;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
278 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
279
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
280 my @outfiles = ("$final_output_prefix.novel.hgvs.txt", "$final_output_prefix.very_rare.hgvs.txt", "$final_output_prefix.rare.hgvs.txt", "$final_output_prefix.common.hgvs.txt");
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
281 open(OUT_NOVEL, ">$outfiles[0]")
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
282 or die "Cannot open $outfiles[0] for writing: $!\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
283 open(OUT_VERY_RARE, ">$outfiles[1]")
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
284 or die "Cannot open $outfiles[1] for writing: $!\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
285 open(OUT_RARE, ">$outfiles[2]")
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
286 or die "Cannot open $outfiles[2] for writing: $!\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
287 open(OUT_COMMON, ">$outfiles[3]")
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
288 or die "Cannot open $outfiles[3] for writing: $!\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
289 print OUT_NOVEL join("\t", @final_header), "\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
290 print OUT_VERY_RARE join("\t", @final_header), "\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
291 print OUT_RARE join("\t", @final_header), "\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
292 print OUT_COMMON join("\t", @final_header), "\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
293 my @sorted_chrs = sort {$a =~ /^\d+$/ and $b =~ /^\d+$/ and $a <=> $b or $a cmp $b} keys %lines;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
294 for my $chr (@sorted_chrs){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
295 for my $pos (sort {$a <=> $b} keys %{$lines{$chr}}){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
296 my $datalines_ref = $lines{$chr}->{$pos};
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
297 # The following sorting puts all protein coding effect for a variant before non-coding ones
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
298 my @sorted_dataline_refs = sort {$a ne "NA" and $b ne "NA" and $a->[$hgvs_aa_column] cmp $a->[$hgvs_aa_column] or $b cmp $a} @$datalines_ref;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
299 for my $dataline_ref (@sorted_dataline_refs){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
300 next unless defined $dataline_ref;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
301 my $maf = $dataline_ref->[$maf_column];
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
302 my $tot_line_length = 0;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
303 for(my $i = 0; $i < $#{$dataline_ref}; $i++){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
304 if(not defined $dataline_ref->[$i]){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
305 $dataline_ref->[$i] = ""; # so we don't get crappy warnings of undefined values
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
306 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
307 else{
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
308 $tot_line_length += length($dataline_ref->[$i]);
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
309 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
310 $tot_line_length++; # the tab
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
311 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
312 if($tot_line_length > 32000){ # Excel limit of 32767 characters in a cell. Also, undocumented bug that entire import line cannot exceeed this length.
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
313 # If we don't truncate, the rest of the line (including entire contents of cells to the right) are unceremoniously dumped.
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
314 # Note that personal experience has shown that the limit is actually a bit below this, so rounding down to the nearest 1000 for safety
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
315 my $overage = $tot_line_length - 32000;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
316 my $sum_of_large_cells = 0;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
317 my $num_large_cells = 0;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
318 for(my $i = 0; $i <= $#{$dataline_ref}; $i++){ # remove contents from the largest cells
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
319 if(length($dataline_ref->[$i]) > 3200){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
320 $sum_of_large_cells += length($dataline_ref->[$i]); # all cells that are at least 10% of the max
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
321 $num_large_cells++;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
322 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
323 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
324 my $cell_max_alloc = int((32000-($tot_line_length-$sum_of_large_cells))/$num_large_cells);
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
325 for(my $i = 0; $i <= $#{$dataline_ref}; $i++){ # truncate the bigger than average ones
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
326 if(length($dataline_ref->[$i]) > $cell_max_alloc){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
327 $dataline_ref->[$i] = substr($dataline_ref->[$i], 0, $cell_max_alloc-37)."[...remainder truncated for length]";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
328 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
329 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
330 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
331 if($maf eq "NA"){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
332 print OUT_NOVEL join("\t", @$dataline_ref), "\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
333 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
334 if($maf eq "NA" or $maf < 0.005){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
335 print OUT_VERY_RARE join("\t", @$dataline_ref), "\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
336 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
337 if($maf eq "NA" or $maf < 0.05){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
338 print OUT_RARE join("\t", @$dataline_ref), "\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
339 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
340 print OUT_COMMON join("\t", @$dataline_ref), "\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
341 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
342 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
343 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
344 close(OUT_NOVEL);
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
345 close(OUT_VERY_RARE);
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
346 close(OUT_RARE);
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
347 close(OUT_COMMON);
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
348
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
349 # Print per-source tables (e.g. for each patient in a cohort)
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
350 for my $src (keys %source){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
351 for my $outfile (@outfiles){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
352 open(IN, $outfile)
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
353 or die "cannot open $outfile for reading: $!\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
354 my $src_outfile = $outfile;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
355 $src_outfile =~ s/$final_output_prefix/$final_output_prefix-$src/;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
356 open(OUT, ">$src_outfile")
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
357 or die "Cannot open $src_outfile for writing: $!\n";
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
358 print OUT scalar(<IN>); # header line
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
359 while(<IN>){
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
360 print OUT $_ if /(?:^|\+| )($src)(?=;|$)/;
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
361 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
362 close(OUT);
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
363 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
364 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
365 }
6411ca16916e initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
366