Repository 'hgv_fundo'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/hgv_fundo

Changeset 0:64133669255f (2013-09-25)
Next changeset 1:ca3fcd18dc3e (2015-11-11)
Commit message:
Uploaded tool tarball.
added:
disease_ontology_gene_fuzzy_selector.pl
funDo.xml
test-data/funDo_output1.interval
tool-data/funDo.loc.sample
b
diff -r 000000000000 -r 64133669255f disease_ontology_gene_fuzzy_selector.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/disease_ontology_gene_fuzzy_selector.pl Wed Sep 25 11:22:47 2013 -0400
[
@@ -0,0 +1,64 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+##################################################################
+# Select genes that are associated with the diseases listed in the
+# disease ontology.
+# ontology: http://do-wiki.nubic.northwestern.edu/index.php/Main_Page
+# gene associations by FunDO: http://projects.bioinformatics.northwestern.edu/do_rif/
+# Sept 2010, switch to doLite
+# input: build outfile sourceFileLoc.loc term or partial term
+##################################################################
+
+if (!@ARGV or @ARGV < 3) { 
+   print "usage: disease_ontology_gene_selector.pl build outfile.txt sourceFile.loc [list of terms]\n";
+   exit;
+}
+
+my $build = shift @ARGV;
+my $out = shift @ARGV;
+my $in = shift @ARGV;
+my $term = shift @ARGV;
+$term =~ s/^'//; #remove quotes protecting from shell
+$term =~ s/'$//; 
+my $data;
+open(LOC, $in) or die  "Couldn't open $in, $!\n";
+while (<LOC>) {
+   chomp;
+   if (/^\s*#/) { next; }
+   my @f = split(/\t/);
+   if ($f[0] eq $build) { 
+      if ($f[1] eq 'disease associated genes') { 
+         $data = $f[2]; 
+      }
+   }
+}
+close LOC or die "Couldn't close $in, $!\n";
+if (!$data) { 
+   print "Error $build not found in $in\n";
+   exit; 
+}
+if (!defined $term) { 
+   print "No disease term entered\n";
+   exit;
+}
+
+#start with just fuzzy term matches
+open(OUT, ">", $out) or die "Couldn't open $out, $!\n";
+open(FH, $data) or die "Couldn't open data file $data, $!\n";
+$term =~ s/\s+/|/g; #use OR between words
+while (<FH>) {
+   chomp;
+   my @f = split(/\t/); #chrom start end strand geneName geneID disease
+   if ($f[6] =~ /($term)/i) { 
+      print OUT join("\t", @f), "\n";
+   }elsif ($term eq 'disease') { #print all with disease
+      print OUT join("\t", @f), "\n";
+   }
+}
+close FH or die "Couldn't close data file $data, $!\n";
+close OUT or die "Couldn't close $out, $!\n";
+
+exit;
b
diff -r 000000000000 -r 64133669255f funDo.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/funDo.xml Wed Sep 25 11:22:47 2013 -0400
b
@@ -0,0 +1,101 @@
+<tool id="hgv_funDo" name="FunDO" version="1.0.0">
+  <description>human genes associated with disease terms</description>
+
+  <command interpreter="perl">
+    disease_ontology_gene_fuzzy_selector.pl $build $out_file1 ${GALAXY_DATA_INDEX_DIR}/funDo.loc '$term'
+  </command>
+
+  <inputs>
+    <param name="build" type="select" label="Database build">
+      <options from_file="funDo.loc">
+        <column name="name" index="0"/>
+        <column name="value" index="0"/>
+        <filter type="unique_value" column="0"/>
+      </options>
+    </param>
+    <param name="term" size="40" type="text" label="Disease term(s)" />
+  </inputs>
+
+  <outputs>
+    <data format="interval" name="out_file1">
+    </data>
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="term" value="lung"/>
+      <param name="build" value="hg18"/>
+      <output name="out_file1" file="funDo_output1.interval" />
+    </test>
+  </tests>
+
+  <help>
+**Dataset formats**
+
+There is no input dataset.  The output is in interval_ format.
+
+.. _interval: ${static_path}/formatHelp.html#interval
+
+-----
+
+**What it does**
+
+This tool searches the disease-term field of the DOLite mappings
+used by the FunDO project and returns a set of genes that 
+are associated with terms matching the specified pattern.  (This is the
+reverse of what FunDO's own server does.)
+
+The search is case insensitive, and selects terms that contain any of
+the given words, either exactly or within a longer word (e.g. "nemia"
+selects not only "anemia", but also "hyperglycinemia", "tyrosinemias",
+and many other things).  Multiple words should be separated by spaces,
+not commas.  As a special case, entering the word "disease" returns all
+genes associated with any disease, even if that word does not actually
+appear in the term field.
+
+Website: http://django.nubic.northwestern.edu/fundo/
+
+-----
+
+**Example**
+
+Typing:: 
+
+    carcinoma
+
+results in::
+
+    1.     2.         3.         4. 5.       6.     7.
+    chr11  89507465   89565427   +  NAALAD2  10003  Adenocarcinoma
+    chr15  50189113   50192264   -  BCL2L10  10017  Carcinoma
+    chr7   150535855  150555250  -  ABCF2    10061  Clear cell carcinoma
+    chr7   150540508  150555250  -  ABCF2    10061  Clear cell carcinoma
+    chr10  134925911  134940397  -  ADAM8    101    Adenocarcinoma
+    chr10  134925911  134940397  -  ADAM8    101    Adenocarcinoma
+    etc.
+
+where the column contents are as follows::
+
+ 1. chromosome name
+ 2. start position of the gene
+ 3. end position of the gene
+ 4. strand
+ 4. gene name
+ 6. Entrez Gene ID
+ 7. disease term
+
+-----
+
+**References**
+
+Du P, Feng G, Flatow J, Song J, Holko M, Kibbe WA, Lin SM. (2009)
+From disease ontology to disease-ontology lite: statistical methods to adapt a general-purpose
+ontology for the test of gene-ontology associations.
+Bioinformatics. 25(12):i63-8.
+
+Osborne JD, Flatow J, Holko M, Lin SM, Kibbe WA, Zhu LJ, Danila MI, Feng G, Chisholm RL. (2009)
+Annotating the human genome with Disease Ontology.
+BMC Genomics. 10 Suppl 1:S6.
+
+  </help>
+</tool>
b
diff -r 000000000000 -r 64133669255f test-data/funDo_output1.interval
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funDo_output1.interval Wed Sep 25 11:22:47 2013 -0400
b
b'@@ -0,0 +1,483 @@\n+chr5\t324735\t368089\t+\tPDCD6\t10016\tLung cancer\n+chr10\t134925911\t134940397\t-\tADAM8\t101\tLung cancer\n+chr10\t134925911\t134940397\t-\tADAM8\t101\tLung cancer\n+chr10\t134925911\t134940397\t-\tADAM8\t101\tLung cancer\n+chr17\t46294585\t46296412\t-\tTOB1\t10140\tLung cancer\n+chr3\t50101371\t50131396\t+\tRBM5\t10181\tLung cancer\n+chr13\t94546025\t94751688\t-\tABCC4\t10257\tLung cancer\n+chr13\t94470083\t94751688\t-\tABCC4\t10257\tLung cancer\n+chr17\t38166737\t38168585\t+\tRAMP2\t10266\tLung cancer\n+chr16\t22264757\t22293439\t-\tCDR2\t1039\tLung disease\n+chr1\t144287344\t144297903\t+\tPIAS3\t10401\tLung cancer\n+chr19\t38556448\t38565432\t+\tCEBPG\t1054\tLung cancer\n+chr16\t66153964\t66230589\t+\tCTCF\t10664\tLung cancer\n+chr1\t11009166\t11029872\t-\tMASP2\t10747\tInfectious lung disease\n+chr1\t11027441\t11029872\t-\tMASP2\t10747\tInfectious lung disease\n+chrX\t133527538\t133620179\t-\tPLAC1\t10761\tLung cancer\n+chr8\t24207524\t24268671\t+\tADAM28\t10863\tLung cancer\n+chr8\t24207524\t24249557\t+\tADAM28\t10863\tLung cancer\n+chr18\t9465529\t9528106\t+\tRALBP1\t10928\tLung cancer\n+chr2\t9641556\t9688557\t-\tYWHAQ\t10971\tLung cancer\n+chr14\t23875066\t23879082\t-\tRIPK3\t11035\tLung cancer\n+chr5\t54309452\t54317171\t-\tESM1\t11082\tLung cancer\n+chr5\t54309452\t54317171\t-\tESM1\t11082\tLung cancer\n+chr6\t167332805\t167374056\t+\tFGFR1OP\t11116\tLung cancer\n+chr6\t167332805\t167374056\t+\tFGFR1OP\t11116\tLung cancer\n+chr11\t63098519\t63138517\t-\tPLA2G16\t11145\tLung cancer\n+chr11\t63098519\t63138485\t-\tPLA2G16\t11145\tLung cancer\n+chr4\t124033248\t124063209\t-\tNUDT6\t11162\tLung cancer\n+chr4\t124033248\t124063573\t-\tNUDT6\t11162\tLung cancer\n+chr12\t26096757\t26117073\t+\tRASSF8\t11228\tLung cancer\n+chr12\t26017954\t26117073\t+\tRASSF8\t11228\tLung cancer\n+chr12\t26003230\t26117073\t+\tRASSF8\t11228\tLung cancer\n+chr12\t26003230\t26124091\t+\tRASSF8\t11228\tLung cancer\n+chr3\t50337344\t50340672\t-\tTUSC2\t11334\tLung cancer\n+chr15\t76674701\t76700692\t-\tCHRNA3\t1136\tLung cancer\n+chr15\t76672451\t76700692\t-\tCHRNA3\t1136\tLung cancer\n+chr15\t76644960\t76673515\t+\tCHRNA5\t1138\tLung cancer\n+chr17\t7289129\t7301656\t+\tCHRNB1\t1140\tLung cancer\n+chr10\t101938113\t101979334\t-\tCHUK\t1147\tInfectious lung disease\n+chr18\t894943\t902173\t+\tADCYAP1\t116\tLung cancer\n+chr18\t895296\t902173\t+\tADCYAP1\t116\tLung cancer\n+chr3\t46386636\t46392701\t+\tCCR5\t1234\tLung cancer\n+chr3_random\t664914\t670974\t+\tCCR5\t1234\tLung cancer\n+chr3\t46386636\t46392701\t+\tCCR5\t1234\tLung cancer\n+chr3_random\t664914\t670974\t+\tCCR5\t1234\tLung cancer\n+chr4\t100211152\t100228954\t-\tADH5\t128\tLung cancer\n+chr16\t3715056\t3870122\t-\tCREBBP\t1387\tLung cancer\n+chr16\t3715056\t3870122\t-\tCREBBP\t1387\tLung cancer\n+chr4\t5873391\t5945686\t-\tCRMP1\t1400\tLung cancer\n+chr4\t5873391\t5941216\t-\tCRMP1\t1400\tLung cancer\n+chr5\t131437383\t131439758\t+\tCSF2\t1437\tLung cancer\n+chr17\t35425213\t35427592\t+\tCSF3\t1440\tLung cancer\n+chr17\t35425213\t35427592\t+\tCSF3\t1440\tLung cancer\n+chr17\t35425213\t35427592\t+\tCSF3\t1440\tLung cancer\n+chr3\t123526700\t123543505\t+\tCSTA\t1475\tLung cancer\n+chr21\t44018258\t44020687\t-\tCSTB\t1476\tLung cancer\n+chr6\t132311009\t132314211\t-\tCTGF\t1490\tLung cancer\n+chr5\t148186348\t148188389\t+\tADRB2\t154\tLung cancer\n+chr16\t49333461\t49393347\t+\tCYLD\t1540\tInfectious lung disease\n+chr16\t49333529\t49393347\t+\tCYLD\t1540\tInfectious lung disease\n+chr16\t49333461\t49393347\t+\tCYLD\t1540\tInfectious lung disease\n+chr19\t46041282\t46048192\t-\tCYP2A6\t1548\tLung cancer\n+chr19\t46286207\t46293939\t+\tCYP2A13\t1553\tLung cancer\n+chr12\t56442383\t56447243\t-\tCYP27B1\t1594\tLung cancer\n+chr17\t58908165\t58928711\t+\tACE\t1636\tInfectious lung disease\n+chr17\t58915908\t58928711\t+\tACE\t1636\tInfectious lung disease\n+chr8\t6860804\t6863226\t-\tDEFA3\t1668\tLung disease\n+chr6\t32256723\t32260001\t-\tAGER\t177\tLung cancer\n+chr6_cox_hap1\t3555933\t3559211\t-\tAGER\t177\tLung cancer\n+chr6_qbl_hap2\t3363712\t3366990\t-\tAGER\t177\tLung cancer\n+chr6\t32256723\t32260001\t-\tAGER\t177\tLung cancer\n+chr6_cox_hap1\t3555933\t3559211\t-\tAGER\t177\tLung cancer\n+chr6_qbl_hap2\t3363712\t3366990\t-\tAGER\t177\tLung cancer\n+chr6\t7486868\t7531945\t+\tDSP\t1832\tLung cancer\n+chr6\t7486868\t7531945\t+\tDSP\t1832\tLung cancer\n+chr13\t77367616\t77447665\t-\tEDNRB\t1910\tLung cancer\n+chr13\t77367616\t77390967\t-\tEDNRB\t1910\tLung cancer\n+chr13\t77367616\t77390967\t-\t'..b'1189\t+\tC2\t717\tInfectious lung disease\n+chr6\t32003244\t32021427\t+\tC2\t717\tInfectious lung disease\n+chr6_cox_hap1\t3341528\t3359711\t+\tC2\t717\tInfectious lung disease\n+chr6_qbl_hap2\t3143005\t3161189\t+\tC2\t717\tInfectious lung disease\n+chr18\t27425727\t27432982\t+\tTTR\t7276\tLung cancer\n+chr11\t1200871\t1239982\t+\tMUC5B\t727897\tLung disease\n+chr6\t3098900\t3102782\t-\tTUBB2A\t7280\tLung cancer\n+chr10\t80985613\t80990169\t-\tSFTPA2\t729238\tLung disease\n+chr10\t81305559\t81310114\t-\tSFTPA2\t729238\tLung disease\n+chr9\t112046130\t112058599\t-\tTXN\t7295\tInfectious lung disease\n+chr9\t112046130\t112058599\t-\tTXN\t7295\tLung cancer\n+chr4\t40953654\t40965202\t+\tUCHL1\t7345\tLung cancer\n+chr9\t99477011\t99499512\t-\tXPA\t7507\tLung cancer\n+chr9\t99477012\t99499512\t-\tXPA\t7507\tLung cancer\n+chr6\t30959839\t30975912\t+\tDDR1\t780\tLung disease\n+chr6_cox_hap1\t2300465\t2316540\t+\tDDR1\t780\tLung disease\n+chr6_qbl_hap2\t2098794\t2114869\t+\tDDR1\t780\tLung disease\n+chr6\t30960305\t30975912\t+\tDDR1\t780\tLung disease\n+chr6_cox_hap1\t2300931\t2316540\t+\tDDR1\t780\tLung disease\n+chr6_qbl_hap2\t2099260\t2114869\t+\tDDR1\t780\tLung disease\n+chr6\t30964443\t30975912\t+\tDDR1\t780\tLung disease\n+chr6_cox_hap1\t2305069\t2316540\t+\tDDR1\t780\tLung disease\n+chr6_qbl_hap2\t2103398\t2114869\t+\tDDR1\t780\tLung disease\n+chr12\t26164227\t26169270\t-\tBHLHE41\t79365\tLung cancer\n+chr11\t14946625\t14950408\t-\tCALCA\t796\tInfectious lung disease\n+chr11\t14946625\t14950408\t-\tCALCA\t796\tLung cancer\n+chr11\t14944791\t14950408\t-\tCALCA\t796\tInfectious lung disease\n+chr11\t14944791\t14950408\t-\tCALCA\t796\tLung cancer\n+chr11\t14946623\t14950408\t-\tCALCA\t796\tInfectious lung disease\n+chr11\t14946623\t14950408\t-\tCALCA\t796\tLung cancer\n+chr10\t14960904\t14986310\t+\tSUV39H2\t79723\tLung cancer\n+chr17\t43403427\t43414146\t+\tCDK5RAP3\t80279\tLung cancer\n+chr11\t104370176\t104399105\t-\tCASP5\t838\tLung cancer\n+chr11\t104370176\t104399105\t-\tCASP5\t838\tLung cancer\n+chr11\t104370176\t104399105\t-\tCASP5\t838\tLung cancer\n+chr11\t104370176\t104399105\t-\tCASP5\t838\tLung cancer\n+chr11\t104370176\t104399105\t-\tCASP5\t838\tLung cancer\n+chr11\t104370176\t104399105\t-\tCASP5\t838\tLung cancer\n+chr1\t15691381\t15723377\t-\tCASP9\t842\tLung cancer\n+chr1\t15691381\t15723377\t-\tCASP9\t842\tLung cancer\n+chr21\t43346371\t43369109\t-\tCBS\t875\tLung cancer\n+chr10\t103103814\t103307060\t+\tBTRC\t8945\tLung cancer\n+chr10\t103103814\t103307060\t+\tBTRC\t8945\tLung cancer\n+chr5\t86725837\t86744592\t-\tCCNH\t902\tLung cancer\n+chr17\t73864456\t73867753\t-\tSOCS3\t9021\tLung cancer\n+chr11\t1367704\t1439904\t+\tBRSK2\t9024\tLung cancer\n+chr12\t47373018\t47397048\t-\tCCNT1\t904\tLung disease\n+chrX\t129091019\t129099696\t-\tAIFM1\t9131\tLung cancer\n+chrX\t129091019\t129127489\t-\tAIFM1\t9131\tLung cancer\n+chrX\t129091018\t129127489\t-\tAIFM1\t9131\tLung cancer\n+chrX\t129091018\t129127489\t-\tAIFM1\t9131\tLung cancer\n+chrX\t129091018\t129127489\t-\tAIFM1\t9131\tLung cancer\n+chr15\t57184611\t57204536\t+\tCCNB2\t9133\tLung cancer\n+chr11\t117680504\t117692100\t+\tCD3E\t916\tLung cancer\n+chr3\t50375234\t50515896\t-\tCACNA2D2\t9254\tLung cancer\n+chr3\t50375235\t50515896\t-\tCACNA2D2\t9254\tLung cancer\n+chr3\t50375234\t50515896\t-\tCACNA2D2\t9254\tLung cancer\n+chr6\t14225843\t14245127\t+\tCD83\t9308\tLung cancer\n+chr6\t14225843\t14245127\t+\tCD83\t9308\tLung cancer\n+chr3\t188043156\t188058946\t+\tADIPOQ\t9370\tLung cancer\n+chr1\t87100715\t87152695\t-\tSEP15\t9403\tLung cancer\n+chr1\t87100715\t87152695\t-\tSEP15\t9403\tLung cancer\n+chr19\t55553545\t55560743\t-\tNAPSA\t9476\tLung cancer\n+chr12\t51948349\t51973694\t+\tESPL1\t9700\tLung cancer\n+chr20\t4708669\t4752291\t-\tRASSF2\t9770\tLung cancer\n+chr20\t4708668\t4743769\t-\tRASSF2\t9770\tLung cancer\n+chr19\t10457795\t10474481\t-\tKEAP1\t9817\tLung cancer\n+chr19\t10457795\t10475054\t-\tKEAP1\t9817\tLung cancer\n+chr1\t155171255\t155281786\t-\tARHGEF11\t9826\tLung cancer\n+chr1\t155171255\t155281786\t-\tARHGEF11\t9826\tLung cancer\n+chr3\t38055699\t38139232\t+\tDLEC1\t9940\tLung cancer\n+chr3\t38055699\t38139232\t+\tDLEC1\t9940\tLung cancer\n+chrX\t140819307\t140824851\t+\tMAGEC1\t9947\tLung cancer\n+chr7\t86619612\t86663582\t+\tDMTF1\t9988\tLung cancer\n+chr7\t86619612\t86663582\t+\tDMTF1\t9988\tLung cancer\n+chr7\t86619805\t86663582\t+\tDMTF1\t9988\tLung cancer\n+chr7\t86619612\t86663582\t+\tDMTF1\t9988\tLung cancer\n+chr7\t86619612\t86663582\t+\tDMTF1\t9988\tLung cancer\n'
b
diff -r 000000000000 -r 64133669255f tool-data/funDo.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/funDo.loc.sample Wed Sep 25 11:22:47 2013 -0400
b
@@ -0,0 +1,11 @@
+#This is a sample file distributed with Galaxy that is used by the FunDO tool.
+#The funDo.loc file has this format (white space characters are TAB
+#characters):
+#
+#<build> <description> <path to disease associated genes file>
+#
+#Your funDo.loc file should include an entry per line for each disease
+#associated genes file you have stored.
+#
+#hg18 disease associated genes /galaxy/data/hg18/misc/funDo/genes-disease.Sept2010.interval
+#hg19 disease associated genes /galaxy/data/hg19/misc/funDo/genes-disease.Sept2010.interval