Mercurial > repos > yusuf > associate_phenotypes
view filter_by_susceptibility_loci_pipe @ 0:6411ca16916e default tip
initial commit
author | Yusuf Ali <ali@yusuf.email> |
---|---|
date | Wed, 25 Mar 2015 13:23:29 -0600 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env perl # Reports SNPs associated with susceptibility loci determined by GWAS use strict; use warnings; @ARGV == 4 or die "Usage: $0 <GWAS db.txt> <input.annotated.hgvs.txt> <output.txt> <pheno query>\n"; my $gwas_file = shift @ARGV; my $hgvs_file = shift @ARGV; my $outfile = shift @ARGV; my $pheno_query = shift @ARGV; $pheno_query =~ s/^\s+|\s+$//g; # trim leading and trailing whitespace my $s; $pheno_query =~ s/\(.*?\)/$s=$&; $s=~s#\s+or\s+#|#g; $s/eg; my @or_terms = split /\s+or\s+/i, $pheno_query; for my $term (@or_terms){ $term = and_query($term); # recursively convert Boolean AND to equivalent regex } # Date Added to Catalog PUBMEDID First Author Date Journal Link Study Disease/Trait Initial Sample Size Replication Sample Size Region Chr_id Chr_pos Reported Gene(s) Mapped_gene Upstream_gene_id Downstream_gene_id Snp_gene_ids Upstream_gene_distance Downstream_gene_distance Strongest SNP-Risk Allele SNPs Merged Snp_id_current Context Intergenic Risk Allele Frequency p-Value Pvalue_mlog p-Value (text) OR or beta 95% CI (text) Platform [SNPs passing QC] CNV open(GWAS, $gwas_file) or die "Cannot open GWAS file for reading: $!\n"; my $header = <GWAS>; chomp $header; my @columns = split /\t/, $header; my $trait_column_index = -1; my $study_column_index = -1; my $chr_column_index = -1; my $pos_column_index = -1; my $allele_column_index = -1; my $context_column_index = -1; my $odds_column_index = -1; my $pubmed_column_index = -1; for(my $i = 0; $i < $#columns; $i++){ if($columns[$i] eq "Disease/Trait"){ $trait_column_index = $i; } elsif($columns[$i] eq "Study"){ $study_column_index = $i; } elsif($columns[$i] eq "Chr_id"){ $chr_column_index = $i; } elsif($columns[$i] eq "Chr_pos"){ $pos_column_index = $i; } elsif($columns[$i] eq "Strongest SNP-Risk Allele"){ $allele_column_index = $i; } elsif($columns[$i] eq "p-Value (text)"){ $context_column_index = $i; } elsif($columns[$i] eq "OR or beta"){ $odds_column_index = $i; } elsif($columns[$i] eq "PUBMEDID"){ $pubmed_column_index = $i; } } if($trait_column_index == -1){ die "Could not find Trait column header in provided GWAS catalog file $gwas_file, aborting\n"; } if($study_column_index == -1){ die "Could not find Study column header in provided GWAS catalog file $gwas_file, aborting\n"; } if($chr_column_index == -1){ die "Could not find chromosome name column header in provided GWAS catalog file $gwas_file, aborting\n"; } if($pos_column_index == -1){ die "Could not find chromosomal position column header in provided GWAS catalog file $gwas_file, aborting\n"; } if($allele_column_index == -1){ die "Could not find risk allele column header in provided GWAS catalog file $gwas_file, aborting\n"; } if($context_column_index == -1){ die "Could not find p-value context column header in provided GWAS catalog file $gwas_file, aborting\n"; } if($odds_column_index == -1){ die "Could not find odds ratio column header in provided GWAS catalog file $gwas_file, aborting\n"; } if($pubmed_column_index == -1){ die "Could not find PubMed ID column header in provided GWAS catalog file $gwas_file, aborting\n"; } my %snp2desc; while(<GWAS>){ chomp; next unless /\S/; my @F = split /\t/, $_; my $chr = $F[$chr_column_index]; $chr =~ s/^chr//; # drop prefix if present my $strongest_risk_allele = $F[$allele_column_index]; $strongest_risk_allele =~ s/^.*-//; # initially looks like rs4345897-G, strip to just letter $snp2desc{$chr.":".$F[$pos_column_index].":".$strongest_risk_allele} = [$F[$trait_column_index], $F[$context_column_index], $F[$study_column_index], $F[$odds_column_index], $F[$pubmed_column_index]]; } close(GWAS); open(MATCHOUT, ">$outfile") or die "Cannot open $outfile for writing: $!\n"; open(HGVS, $hgvs_file) or die "Cannot open $hgvs_file for reading: $!\n"; $header = <HGVS>; chomp $header; my @header_columns = split /\t/, $header; my ($chr_column, $obs_column, $pos_column); for(my $i = 0; $i <= $#header_columns; $i++){ if($header_columns[$i] eq "DNA From"){ $pos_column = $i; } elsif($header_columns[$i] eq "Obs base"){ $obs_column = $i; } elsif($header_columns[$i] eq "Chr"){ $chr_column = $i; } } if(not defined $chr_column){ die "Could not find 'Chr' column in the input header, aborting\n"; } if(not defined $pos_column){ die "Could not find 'DNA From' column in the input header, aborting\n"; } if(not defined $obs_column){ die "Could not find 'Obs base' column in the input header, aborting\n"; } print MATCHOUT $header, "\tPhenotype GWAS Catalog text matches (matching $pheno_query)\tGWAS odds ratio\tGWAS susceptibility description for this SNP\n"; while(<HGVS>){ chomp; my @F = split /\t/, $_, -1; my $allele = $F[$obs_column]; $allele =~ s(/.*$)(); # ignore ref allele in het calls my $chr = $F[$chr_column]; $chr =~ s/^chr//; # remove chr name prefix if present my $key = $chr.":".$F[$pos_column].":".$allele; #print STDERR "$key\n"; if(not exists $snp2desc{$key}){ print MATCHOUT join("\t", @F, "", "", ""), "\n"; next; } my @matches; for my $term (@or_terms){ next if grep {$_ eq $term} @matches; #print STDERR "Checking match for term $term...\n"; if($snp2desc{$key}->[0] =~ /\b$term/i or # trait $snp2desc{$key}->[1] =~ /\b$term/i or # p-value context $snp2desc{$key}->[2] =~ /\b$term/i){ # study name push @matches, $term; last; } } chomp $F[$#F]; # print all GWAS, whether they match query or not print MATCHOUT join("\t", @F, join("; ", sort @matches), $snp2desc{$key}->[3], "PubMedID ".$snp2desc{$key}->[4].": trait '".$snp2desc{$key}->[0]."'".($snp2desc{$key}->[1] =~ /\S/ ? " in context of '".$snp2desc{$key}->[1]."'" : "").". Study: ".$snp2desc{$key}->[2]), "\n"; } close(HGVS); close(MATCHOUT); sub and_query{ my ($query) = @_; if($query =~ /^(.+)\s+and\s+(.+)$/){ my $t1 = $1; my $t2 = $2; my $term1 = and_query($t1); my $term2 = and_query($t2); return "($term1.*$term2|$term2.*$term1)"; } else{ return $query; # base case: single term } return }