annotate tools/human_genome_variation/disease_ontology_gene_fuzzy_selector.pl @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env perl
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 use strict;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 use warnings;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 ##################################################################
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 # Select genes that are associated with the diseases listed in the
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 # disease ontology.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 # ontology: http://do-wiki.nubic.northwestern.edu/index.php/Main_Page
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 # gene associations by FunDO: http://projects.bioinformatics.northwestern.edu/do_rif/
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 # Sept 2010, switch to doLite
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 # input: build outfile sourceFileLoc.loc term or partial term
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 ##################################################################
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 if (!@ARGV or @ARGV < 3) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 print "usage: disease_ontology_gene_selector.pl build outfile.txt sourceFile.loc [list of terms]\n";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 exit;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 my $build = shift @ARGV;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 my $out = shift @ARGV;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 my $in = shift @ARGV;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 my $term = shift @ARGV;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 $term =~ s/^'//; #remove quotes protecting from shell
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 $term =~ s/'$//;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 my $data;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 open(LOC, $in) or die "Couldn't open $in, $!\n";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 while (<LOC>) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 chomp;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 if (/^\s*#/) { next; }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 my @f = split(/\t/);
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 if ($f[0] eq $build) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 if ($f[1] eq 'disease associated genes') {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 $data = $f[2];
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 close LOC or die "Couldn't close $in, $!\n";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 if (!$data) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 print "Error $build not found in $in\n";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 exit;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 if (!defined $term) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 print "No disease term entered\n";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 exit;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 #start with just fuzzy term matches
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 open(OUT, ">", $out) or die "Couldn't open $out, $!\n";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 open(FH, $data) or die "Couldn't open data file $data, $!\n";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 $term =~ s/\s+/|/g; #use OR between words
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 while (<FH>) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 chomp;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 my @f = split(/\t/); #chrom start end strand geneName geneID disease
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 if ($f[6] =~ /($term)/i) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 print OUT join("\t", @f), "\n";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 }elsif ($term eq 'disease') { #print all with disease
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 print OUT join("\t", @f), "\n";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 close FH or die "Couldn't close data file $data, $!\n";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 close OUT or die "Couldn't close $out, $!\n";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 exit;