Repository 'goenrichment'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/goenrichment

Changeset 0:52964064db8a (2019-01-11)
Next changeset 1:5ace5c7d1a86 (2019-01-18)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/goenrichment commit c41d1d8f48da033f601b003d71e0e22345ccdbdb
added:
goenrichment.xml
test-data/MF_result.txt
test-data/annotations.tab
test-data/go.obo
test-data/goslim_generic.obo
test-data/population.txt
test-data/slim_annotations.tab
test-data/study.txt
b
diff -r 000000000000 -r 52964064db8a goenrichment.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/goenrichment.xml Fri Jan 11 06:18:35 2019 -0500
b
@@ -0,0 +1,151 @@
+<tool id="goenrichment" name="GOEnrichment" version="2.0.1">
+    <description>performs GO enrichment analysis of a set of gene products</description>
+    <requirements>
+        <requirement type="package" version="2.0.1">goenrichment</requirement>
+    </requirements>
+    <command detect_errors="exit_code">goenrichment 
+--go '${go}'
+--annotation '${annotation}'
+--study '${study}'
+#if str($population) != 'None'
+--population '${population}'
+#end if
+--correction ${correction}
+$summarize
+$singletons
+$relations
+--graph_format $graph
+--cut_off $cutoff
+--mf_result '${mf_result}'
+--bp_result '${bp_result}'
+--cc_result '${cc_result}'
+--mf_graph '${mf_graph}'
+--bp_graph '${bp_graph}'
+--cc_graph '${cc_graph}'
+    </command>
+    <inputs>
+        <param name="go" type="data" format="obo,owl" label="Gene Ontology File" help="Gene Ontology file in OBO or OWL format (see http://geneontology.org/page/download-ontology)"/>
+        <param name="annotation" type="data" format="tabular,txt" label="Gene Product Annotation File" help="Tabular file containing annotations from gene products to GO terms (in GAF or BLAST2GO format, or a simple two-column table)"/>
+        <param name="study" type="data" format="txt" label="Study Set File" help="File containing the gene products corresponding to the study set (one per line)"/>
+        <param name="population" type="data" format="txt" optional="true" label="Population Set File (Optional)" help="File containing the gene products corresponding to the population set (one per line). If no file is submitted, the population set will be the set of all gene products listed in the annotation file."/>
+        <param name="correction" type="select" label="Multiple Test Correction" help="The multiple test correction method to use (Benjamini-Hochberg is recommended).">
+            <option value="Benjamini-Hochberg" selected="true"/>
+            <option value="SDA"/>
+            <option value="Bonferroni-Holm"/>
+            <option value="Sidak"/>
+            <option value="Bonferroni"/>
+        </param>
+        <param name="cutoff" type="select" label="P-Value Cut-Off" help="The corrected p-value (or q-value) cut-off to apply for the graph output.">
+            <option value="1.0"/>
+            <option value="0.1"/>
+            <option value="0.05"/>
+            <option value="0.01" selected="true"/>
+        </param>
+        <param name="graph" type="select" label="Output Graph Format" help="The format of the output graphs (png, svg, or tabular for importing into cytoscape).">
+            <option value="png" selected="true"/>
+            <option value="svg"/>
+            <option value="tabular"/>
+        </param>
+        <param name="summarize" type="boolean" checked="true" truevalue="--summarize_output" falsevalue="" label="Summarize Output" help="Whether to produce a summarized list of GO terms or the full list of those that are statistically significant"/>
+        <param name="singletons" type="boolean" checked="true" truevalue="--ignore_singletons" falsevalue="" label="Exclude Singletons" help="Whether to exclude GO terms that are annotated to a single gene product in the study set"/>
+        <param name="relations" type="boolean" checked="false" truevalue="--use_all_relations" falsevalue="" label="Use All Relations" help="Whether to infer annotations through 'part_of' and other non-hierarchical relationships, or only through 'is_a' relations"/>
+    </inputs>
+    <outputs>
+        <data name="mf_result" format="tabular" label="MF Result File"/>
+        <data name="bp_result" format="tabular" label="BP Result File"/>
+        <data name="cc_result" format="tabular" label="CC Result File"/>
+        <data name="mf_graph" format="png" label="MF Graph File">
+            <change_format>
+                <when input="graph" value="svg" format="svg"/>
+                <when input="graph" value="tabular" format="tabular"/>
+            </change_format>
+        </data>
+        <data name="bp_graph" format="png" label="BP Graph File">
+            <change_format>
+                <when input="graph" value="svg" format="svg"/>
+                <when input="graph" value="tabular" format="tabular"/>
+            </change_format>
+        </data>
+        <data name="cc_graph" format="png" label="CC Graph File">
+            <change_format>
+                <when input="graph" value="svg" format="svg"/>
+                <when input="graph" value="tabular" format="tabular"/>
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="go" ftype="obo" value="go.obo"/>
+            <param name="annotation" ftype="txt" value="annotations.tab"/>
+            <param name="study" ftype="txt" value="study.txt"/>
+            <param name="summarize" value="false"/>
+            <output name="mf_result" ftype="tabular" file="MF_result.txt" lines_diff="0"/>
+        </test>
+        <test>
+            <param name="go" ftype="obo" value="go.obo"/>
+            <param name="annotation" ftype="txt" value="annotations.tab"/>
+            <param name="study" ftype="txt" value="study.txt"/>
+            <param name="population" ftype="txt" value="population.txt"/>
+            <param name="summarize" value="false"/>
+            <output name="mf_result" ftype="tabular" file="MF_result.txt" lines_diff="0"/>
+        </test>
+    </tests>
+    <help>
+.. class:: infomark
+
+GOEnrichment is a Java application that can be used to analyze gene product sets (e.g., from microarray or RNAseq experiments) for enriched GO terms.
+
+-----
+
+.. class:: infomark
+
+GOEnrichment requires:
+
+- A Gene Ontology file in either OBO or OWL format (see http://geneontology.org/page/download-ontology).
+- A tabular annotation file in GAF (http://geneontology.org/page/download-annotations) format, BLAST2GO format, or a simple two-column table (e.g. from BioMart) with gene product ids in the first column and GO terms in the second one.
+- A list of gene products comprising the study set (a flat text file with one gene product per line).
+- Optionally, a list of gene products comprising the population set (if none is submitted, the population set will be the set of gene products listed in the annotation file).
+
+-----
+
+.. class:: infomark
+
+GOEnrichment produces a tabular result file and a graph file for each GO type (MF - Molecular Function, BP - Biological Process and CC - Cellular Component):
+
+- The result file is a tabular list of all GO terms present in the study set and their respective p-values.
+- The graph file can be either a png image, an svg image, or a text file for importing into cytoscape (together with the result file).
+
+-----
+
+.. class:: infomark
+
+The graph is colored by p-value: terms with p-value above cut-off appear in white; and the color gets darker as the p-value decreases
+
+.. image:: https://github.com/DanFaria/GOEnrichment/raw/master/Scale.png
+   :width: 600
+   :height: 315
+
+(see the scale at https://github.com/DanFaria/GOEnrichment/blob/master/Scale.png). In addition to the name of each GO term, the graph
+shows its frequency in the study set. Dashed edges indicate that one or more intermediate terms were ommited from the graph.
+
+-----
+
+.. class:: warningmark
+
+Gene products listed in either the study or population set files that are not present in the annotation file will be ignored.
+    </help>
+
+    <citations>
+        <citation type="bibtex">
+@misc{githubgoenrichment,
+  author = {Faria, Daniel},
+  year = {2017},
+  title = {GOEnrichment},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  url = {https://github.com/DanFaria/GOEnrichment},
+}
+        </citation>
+    </citations>
+
+</tool>
b
diff -r 000000000000 -r 52964064db8a test-data/MF_result.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/MF_result.txt Fri Jan 11 06:18:35 2019 -0500
b
@@ -0,0 +1,5 @@
+GO Term Study # Study Freq. Pop. Freq. p-value q-value name gene products
+GO:0005488 7 35% 7.0% 4.84E-6 1.94E-5 binding fake13,fake12,fake11,fake10,fake8,fake9,fake14
+GO:0016209 7 35% 7.0% 4.84E-6 1.94E-5 antioxidant activity fake4,fake3,fake2,fake1,fake7,fake6,fake5
+GO:0038024 6 30% 10% 3.93E-3 5.24E-3 cargo receptor activity fake20,fake19,fake18,fake17,fake16,fake15
+GO:0003674 20 100% 100% 1.0 1.0 molecular_function fake13,fake12,fake11,fake10,fake20,fake8,fake7,fake6,fake19,fake5,fake18,fake17,fake16,fake15,fake9,fake14,fake4,fake3,fake2,fake1
b
diff -r 000000000000 -r 52964064db8a test-data/annotations.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotations.tab Fri Jan 11 06:18:35 2019 -0500
b
@@ -0,0 +1,300 @@
+fake1 GO:0007610
+fake2 GO:0007610
+fake3 GO:0007610
+fake4 GO:0007610
+fake5 GO:0022610
+fake6 GO:0022610
+fake7 GO:0022610
+fake8 GO:0022610
+fake9 GO:0044848
+fake10 GO:0044848
+fake11 GO:0044848
+fake12 GO:0044848
+fake13 GO:0065007
+fake14 GO:0065007
+fake15 GO:0065007
+fake16 GO:0009758
+fake17 GO:0009758
+fake18 GO:0009758
+fake19 GO:0015976
+fake20 GO:0015976
+fake21 GO:0015976
+fake22 GO:0015976
+fake23 GO:0098743
+fake24 GO:0001906
+fake25 GO:0008283
+fake26 GO:0071840
+fake27 GO:0009987
+fake28 GO:0098754
+fake29 GO:0032502
+fake30 GO:0040007
+fake31 GO:0002376
+fake32 GO:0051179
+fake33 GO:0040011
+fake34 GO:0008152
+fake35 GO:0051704
+fake36 GO:0032501
+fake37 GO:0019740
+fake38 GO:0006794
+fake39 GO:0043473
+fake40 GO:0000003
+fake41 GO:0022414
+fake42 GO:0050896
+fake43 GO:0048511
+fake44 GO:0023052
+fake45 GO:0006791
+fake46 GO:0098743
+fake47 GO:0001906
+fake48 GO:0008283
+fake49 GO:0071840
+fake50 GO:0009987
+fake51 GO:0098754
+fake52 GO:0032502
+fake53 GO:0040007
+fake54 GO:0002376
+fake55 GO:0051179
+fake56 GO:0040011
+fake57 GO:0008152
+fake58 GO:0051704
+fake59 GO:0032501
+fake60 GO:0019740
+fake61 GO:0006794
+fake62 GO:0043473
+fake63 GO:0000003
+fake64 GO:0022414
+fake65 GO:0050896
+fake66 GO:0048511
+fake67 GO:0023052
+fake68 GO:0006791
+fake69 GO:0098743
+fake70 GO:0001906
+fake71 GO:0008283
+fake72 GO:0071840
+fake73 GO:0009987
+fake74 GO:0098754
+fake75 GO:0032502
+fake76 GO:0040007
+fake77 GO:0002376
+fake78 GO:0051179
+fake79 GO:0040011
+fake80 GO:0008152
+fake81 GO:0051704
+fake82 GO:0032501
+fake83 GO:0019740
+fake84 GO:0006794
+fake85 GO:0043473
+fake86 GO:0000003
+fake87 GO:0022414
+fake88 GO:0050896
+fake89 GO:0048511
+fake90 GO:0023052
+fake91 GO:0006791
+fake92 GO:0098743
+fake93 GO:0001906
+fake94 GO:0008283
+fake95 GO:0071840
+fake96 GO:0009987
+fake97 GO:0098754
+fake98 GO:0032502
+fake99 GO:0040007
+fake100 GO:0002376
+fake1 GO:0005623
+fake2 GO:0005623
+fake3 GO:0005623
+fake4 GO:0005623
+fake5 GO:0005623
+fake6 GO:0030054
+fake7 GO:0030054
+fake8 GO:0030054
+fake9 GO:0030054
+fake10 GO:0030054
+fake11 GO:0044464
+fake12 GO:0044464
+fake13 GO:0044464
+fake14 GO:0044464
+fake15 GO:0044464
+fake16 GO:0005576
+fake17 GO:0005576
+fake18 GO:0005576
+fake19 GO:0005576
+fake20 GO:0005576
+fake21 GO:0044421
+fake22 GO:0044421
+fake23 GO:0044421
+fake24 GO:0044421
+fake25 GO:0044421
+fake26 GO:0016020
+fake27 GO:0016020
+fake28 GO:0016020
+fake29 GO:0016020
+fake30 GO:0016020
+fake31 GO:0044425
+fake32 GO:0044425
+fake33 GO:0044425
+fake34 GO:0044425
+fake35 GO:0044425
+fake36 GO:0031974
+fake37 GO:0031974
+fake38 GO:0031974
+fake39 GO:0031974
+fake40 GO:0031974
+fake41 GO:0097423
+fake42 GO:0097423
+fake43 GO:0097423
+fake44 GO:0097423
+fake45 GO:0097423
+fake46 GO:0009295
+fake47 GO:0009295
+fake48 GO:0009295
+fake49 GO:0009295
+fake50 GO:0009295
+fake51 GO:0043226
+fake52 GO:0043226
+fake53 GO:0043226
+fake54 GO:0043226
+fake55 GO:0044422
+fake56 GO:0044422
+fake57 GO:0044422
+fake58 GO:0044422
+fake59 GO:0044215
+fake60 GO:0044215
+fake61 GO:0044215
+fake62 GO:0044215
+fake63 GO:0044217
+fake64 GO:0044217
+fake65 GO:0044217
+fake66 GO:0044217
+fake67 GO:0032991
+fake68 GO:0032991
+fake69 GO:0032991
+fake70 GO:0032991
+fake71 GO:0032991
+fake72 GO:0099080
+fake73 GO:0099080
+fake74 GO:0099080
+fake75 GO:0099080
+fake76 GO:0099080
+fake77 GO:0055044
+fake78 GO:0055044
+fake79 GO:0055044
+fake80 GO:0055044
+fake81 GO:0055044
+fake82 GO:0045202
+fake83 GO:0045202
+fake84 GO:0045202
+fake85 GO:0045202
+fake86 GO:0045202
+fake87 GO:0044456
+fake88 GO:0044456
+fake89 GO:0044456
+fake90 GO:0044456
+fake91 GO:0044456
+fake92 GO:0019012
+fake93 GO:0019012
+fake94 GO:0019012
+fake95 GO:0019012
+fake96 GO:0019012
+fake97 GO:0044423
+fake98 GO:0044423
+fake99 GO:0044423
+fake100 GO:0044423
+fake1 GO:0016209
+fake2 GO:0016209
+fake3 GO:0016209
+fake4 GO:0016209
+fake5 GO:0016209
+fake6 GO:0016209
+fake7 GO:0016209
+fake8 GO:0005488
+fake9 GO:0005488
+fake10 GO:0005488
+fake11 GO:0005488
+fake12 GO:0005488
+fake13 GO:0005488
+fake14 GO:0005488
+fake15 GO:0038024
+fake16 GO:0038024
+fake17 GO:0038024
+fake18 GO:0038024
+fake19 GO:0038024
+fake20 GO:0038024
+fake21 GO:0038024
+fake22 GO:0038024
+fake23 GO:0038024
+fake24 GO:0038024
+fake25 GO:0003824
+fake26 GO:0003824
+fake27 GO:0003824
+fake28 GO:0003824
+fake29 GO:0003824
+fake30 GO:0003824
+fake31 GO:0003824
+fake32 GO:0104005
+fake33 GO:0104005
+fake34 GO:0104005
+fake35 GO:0104005
+fake36 GO:0104005
+fake37 GO:0104005
+fake38 GO:0140104
+fake39 GO:0140104
+fake40 GO:0140104
+fake41 GO:0140104
+fake42 GO:0140104
+fake43 GO:0140104
+fake44 GO:0098772
+fake45 GO:0098772
+fake46 GO:0098772
+fake47 GO:0098772
+fake48 GO:0098772
+fake49 GO:0098772
+fake50 GO:0060089
+fake51 GO:0060089
+fake52 GO:0060089
+fake53 GO:0060089
+fake54 GO:0060089
+fake55 GO:0060089
+fake56 GO:0045735
+fake57 GO:0045735
+fake58 GO:0045735
+fake59 GO:0045735
+fake60 GO:0045735
+fake61 GO:0045735
+fake62 GO:0031386
+fake63 GO:0031386
+fake64 GO:0031386
+fake65 GO:0031386
+fake66 GO:0031386
+fake67 GO:0031386
+fake68 GO:0005198
+fake69 GO:0005198
+fake70 GO:0005198
+fake71 GO:0005198
+fake72 GO:0005198
+fake73 GO:0005198
+fake74 GO:0090729
+fake75 GO:0090729
+fake76 GO:0090729
+fake77 GO:0090729
+fake78 GO:0090729
+fake79 GO:0090729
+fake80 GO:0140110
+fake81 GO:0140110
+fake82 GO:0140110
+fake83 GO:0140110
+fake84 GO:0140110
+fake85 GO:0140110
+fake86 GO:0045182
+fake87 GO:0045182
+fake88 GO:0045182
+fake89 GO:0045182
+fake90 GO:0045182
+fake91 GO:0045182
+fake92 GO:0045182
+fake93 GO:0005215
+fake94 GO:0005215
+fake95 GO:0005215
+fake96 GO:0005215
+fake97 GO:0005215
+fake98 GO:0005215
+fake99 GO:0005215
+fake100 GO:0005215
b
diff -r 000000000000 -r 52964064db8a test-data/go.obo
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/go.obo Fri Jan 11 06:18:35 2019 -0500
[
b'@@ -0,0 +1,1050 @@\n+format-version: 1.2\n+data-version: releases/2018-04-30\n+subsetdef: goantislim_grouping "Grouping classes that can be excluded"\n+subsetdef: gocheck_do_not_annotate "Term not to be used for direct annotation"\n+subsetdef: gocheck_do_not_manually_annotate "Term not to be used for direct manual annotation"\n+subsetdef: goslim_agr "AGR slim"\n+subsetdef: goslim_aspergillus "Aspergillus GO slim"\n+subsetdef: goslim_candida "Candida GO slim"\n+subsetdef: goslim_chembl "ChEMBL protein targets summary"\n+subsetdef: goslim_generic "Generic GO slim"\n+subsetdef: goslim_goa "GOA and proteome slim"\n+subsetdef: goslim_metagenomics "Metagenomics GO slim"\n+subsetdef: goslim_mouse "Mouse GO slim"\n+subsetdef: goslim_pir "PIR GO slim"\n+subsetdef: goslim_plant "Plant GO slim"\n+subsetdef: goslim_pombe "Fission yeast GO slim"\n+subsetdef: goslim_synapse "synapse GO slim"\n+subsetdef: goslim_virus "Viral GO slim"\n+subsetdef: goslim_yeast "Yeast GO slim"\n+subsetdef: gosubset_prok "Prokaryotic GO subset"\n+subsetdef: mf_needs_review "Catalytic activity terms in need of attention"\n+subsetdef: termgenie_unvetted "Terms created by TermGenie that do not follow a template and require additional vetting by editors"\n+subsetdef: virus_checked "Viral overhaul terms"\n+synonymtypedef: syngo_official_label "label approved by the SynGO project"\n+synonymtypedef: systematic_synonym "Systematic synonym" EXACT\n+default-namespace: gene_ontology\n+remark: cvs version: $Revision: 38972$\n+remark: Includes Ontology(OntologyID(Anonymous-35)) [Axioms: 230 Logical Axioms: 228]\n+remark: Includes Ontology(OntologyID(OntologyIRI(<http://purl.obolibrary.org/obo/go/never_in_taxon.owl>))) [Axioms: 18 Logical Axioms: 0]\n+ontology: go\n+property_value: http://purl.org/dc/elements/1.1/license http://creativecommons.org/licenses/by/4.0/\n+\n+[Term]\n+id: GO:0000003\n+name: reproduction\n+namespace: biological_process\n+alt_id: GO:0019952\n+alt_id: GO:0050876\n+def: "The production of new individuals that contain some portion of genetic material inherited from one or more parent organisms." [GOC:go_curators, GOC:isa_complete, GOC:jl, ISBN:0198506732]\n+subset: goslim_agr\n+subset: goslim_chembl\n+subset: goslim_generic\n+subset: goslim_pir\n+subset: goslim_plant\n+subset: gosubset_prok\n+synonym: "reproductive physiological process" EXACT []\n+xref: Wikipedia:Reproduction\n+is_a: GO:0008150 ! biological_process\n+disjoint_from: GO:0044848 ! biological phase\n+\n+[Term]\n+id: GO:0001906\n+name: cell killing\n+namespace: biological_process\n+def: "Any process in an organism that results in the killing of its own cells or those of another organism, including in some cases the death of the other organism. Killing here refers to the induction of death in one cell by another cell, not cell-autonomous death due to internal or other environmental conditions." [GOC:add]\n+subset: goslim_pir\n+subset: gosubset_prok\n+synonym: "necrosis" RELATED []\n+is_a: GO:0008150 ! biological_process\n+disjoint_from: GO:0044848 ! biological phase\n+\n+[Term]\n+id: GO:0002376\n+name: immune system process\n+namespace: biological_process\n+def: "Any process involved in the development or functioning of the immune system, an organismal system for calibrated responses to potential internal or invasive threats." [GO_REF:0000022, GOC:add, GOC:mtg_15nov05]\n+comment: Note that this term is a direct child of \'biological_process ; GO:0008150\' because some immune system processes are types of cellular process (GO:0009987), whereas others are types of multicellular organism process (GO:0032501). This term was added by GO_REF:0000022.\n+subset: goslim_agr\n+subset: goslim_chembl\n+subset: goslim_generic\n+subset: goslim_mouse\n+subset: goslim_pir\n+xref: Wikipedia:Immune_system\n+is_a: GO:0008150 ! biological_process\n+disjoint_from: GO:0044848 ! biological phase\n+\n+[Term]\n+id: GO:0003674\n+name: molecular_function\n+namespace: molecular_function\n+alt_id: GO:0005554\n+def: "A molecular process that can be carried out by the action of a single macromolecular m'..b'logical_process\n+def: "Any process that reduces or removes the toxicity of a toxic substance. These may include transport of the toxic substance away from sensitive areas and to compartments or complexes whose purpose is sequestration of the toxic substance." [GOC:dos]\n+subset: goslim_pombe\n+is_a: GO:0008150 ! biological_process\n+\n+[Term]\n+id: GO:0098772\n+name: molecular function regulator\n+namespace: molecular_function\n+def: "A molecular function that modulates the activity of a gene product or complex.  Examples include enzyme regulators and channel regulators." [GOC:dos, GOC:pt]\n+is_a: GO:0003674 ! molecular_function\n+relationship: regulates GO:0003674 ! molecular_function\n+\n+[Term]\n+id: GO:0099080\n+name: supramolecular complex\n+namespace: cellular_component\n+def: "A cellular component that consists of an indeterminate number of proteins or macromolecular complexes, organized into a regular, higher-order structure such as a polymer, sheet, network or a fiber." [GOC:dos]\n+is_a: GO:0005575 ! cellular_component\n+\n+[Term]\n+id: GO:0104005\n+name: hijacked molecular function\n+namespace: molecular_function\n+def: "A function that was not selected for in the evolution of an organism, but arises from co-option by another organism, e.g. a human protein used as a virus receptor." [GOC:pdt]\n+is_a: GO:0003674 ! molecular_function\n+created_by: dos\n+creation_date: 2017-08-04T02:12:09Z\n+\n+[Term]\n+id: GO:0140104\n+name: molecular carrier activity\n+namespace: molecular_function\n+def: "Directly binding to a specific ion or molecule and delivering it either to an acceptor molecule or to a specific location." [GOC:pdt]\n+comment: https://github.com/geneontology/go-ontology/issues/14221\n+subset: gocheck_do_not_annotate\n+is_a: GO:0003674 ! molecular_function\n+created_by: pg\n+creation_date: 2017-09-19T13:10:18Z\n+\n+[Term]\n+id: GO:0140110\n+name: transcription regulator activity\n+namespace: molecular_function\n+def: "A molecular function that controls the rate, timing and/or magnitude of transcription of genetic information. The function of transcriptional regulators is to modulate gene expression at the transcription step so that they are expressed in the right cell at the right time and in the right amount throughout the life of the cell and the organism." [GOC:pg, GOC:txnOH-2018, Wikipedia:Transcription_factor]\n+comment: https://github.com/geneontology/go-ontology/issues/13588\n+subset: gocheck_do_not_annotate\n+is_a: GO:0003674 ! molecular_function\n+created_by: pg\n+creation_date: 2017-10-18T07:05:44Z\n+\n+[Typedef]\n+id: ends_during\n+name: ends_during\n+namespace: external\n+xref: RO:0002093\n+\n+[Typedef]\n+id: happens_during\n+name: happens_during\n+namespace: external\n+xref: RO:0002092\n+is_transitive: true\n+is_a: ends_during ! ends_during\n+\n+[Typedef]\n+id: has_part\n+name: has_part\n+namespace: external\n+xref: BFO:0000051\n+is_transitive: true\n+\n+[Typedef]\n+id: negatively_regulates\n+name: negatively regulates\n+namespace: external\n+xref: RO:0002212\n+is_a: regulates ! regulates\n+transitive_over: part_of ! part of\n+\n+[Typedef]\n+id: never_in_taxon\n+name: never_in_taxon\n+namespace: external\n+xref: RO:0002161\n+expand_assertion_to: "Class: ?X DisjointWith: RO_0002162 some ?Y" []\n+is_metadata_tag: true\n+is_class_level: true\n+\n+[Typedef]\n+id: occurs_in\n+name: occurs in\n+namespace: external\n+xref: BFO:0000066\n+holds_over_chain: part_of occurs_in\n+transitive_over: part_of ! part of\n+\n+[Typedef]\n+id: part_of\n+name: part of\n+namespace: external\n+xref: BFO:0000050\n+is_transitive: true\n+inverse_of: has_part ! has_part\n+\n+[Typedef]\n+id: positively_regulates\n+name: positively regulates\n+namespace: external\n+xref: RO:0002213\n+holds_over_chain: negatively_regulates negatively_regulates\n+is_a: regulates ! regulates\n+transitive_over: part_of ! part of\n+\n+[Typedef]\n+id: regulates\n+name: regulates\n+namespace: external\n+xref: RO:0002211\n+is_transitive: true\n+transitive_over: part_of ! part of\n+\n+[Typedef]\n+id: starts_during\n+name: starts_during\n+namespace: external\n+xref: RO:0002091\n+\n'
b
diff -r 000000000000 -r 52964064db8a test-data/goslim_generic.obo
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/goslim_generic.obo Fri Jan 11 06:18:35 2019 -0500
[
@@ -0,0 +1,170 @@
+format-version: 1.2
+subsetdef: goantislim_grouping "Grouping classes that can be excluded"
+subsetdef: gocheck_do_not_annotate "Term not to be used for direct annotation"
+subsetdef: gocheck_do_not_manually_annotate "Term not to be used for direct manual annotation"
+subsetdef: goslim_agr "AGR slim"
+subsetdef: goslim_aspergillus "Aspergillus GO slim"
+subsetdef: goslim_candida "Candida GO slim"
+subsetdef: goslim_chembl "ChEMBL protein targets summary"
+subsetdef: goslim_generic "Generic GO slim"
+subsetdef: goslim_goa "GOA and proteome slim"
+subsetdef: goslim_metagenomics "Metagenomics GO slim"
+subsetdef: goslim_mouse "Mouse GO slim"
+subsetdef: goslim_pir "PIR GO slim"
+subsetdef: goslim_plant "Plant GO slim"
+subsetdef: goslim_pombe "Fission yeast GO slim"
+subsetdef: goslim_synapse "synapse GO slim"
+subsetdef: goslim_virus "Viral GO slim"
+subsetdef: goslim_yeast "Yeast GO slim"
+subsetdef: gosubset_prok "Prokaryotic GO subset"
+subsetdef: mf_needs_review "Catalytic activity terms in need of attention"
+subsetdef: termgenie_unvetted "Terms created by TermGenie that do not follow a template and require additional vetting by editors"
+subsetdef: virus_checked "Viral overhaul terms"
+synonymtypedef: syngo_official_label "label approved by the SynGO project"
+synonymtypedef: systematic_synonym "Systematic synonym" EXACT
+ontology: go/subsets/goslim_generic
+
+[Term]
+id: GO:0003674
+name: molecular_function
+namespace: molecular_function
+alt_id: GO:0005554
+def: "A molecular process that can be carried out by the action of a single macromolecular machine, usually via direct physical interactions with other molecular entities. Function in this sense denotes an action, or activity, that a gene product (or a complex) performs. These actions are described from two distinct but related perspectives: (1) biochemical activity, and (2) role as a component in a larger system/process." [GOC:pdt]
+comment: Note that, in addition to forming the root of the molecular function ontology, this term is recommended for use for the annotation of gene products whose molecular function is unknown. When this term is used for annotation, it indicates that no information was available about the molecular function of the gene product annotated as of the date the annotation was made; the evidence code "no data" (ND), is used to indicate this. Despite its name, this is not a type of 'function' in the sense typically defined by upper ontologies such as Basic Formal Ontology (BFO). It is instead a BFO:process carried out by a single gene product or complex.
+subset: goslim_aspergillus
+subset: goslim_candida
+subset: goslim_chembl
+subset: goslim_generic
+subset: goslim_metagenomics
+subset: goslim_pir
+subset: goslim_plant
+subset: goslim_yeast
+subset: gosubset_prok
+synonym: "molecular function" EXACT []
+
+[Term]
+id: GO:0005575
+name: cellular_component
+namespace: cellular_component
+alt_id: GO:0008372
+def: "A location, relative to cellular compartments and structures, occupied by a macromolecular machine when it carries out a molecular function. There are two ways in which the gene ontology describes locations of gene products: (1) relative to cellular structures (e.g., cytoplasmic side of plasma membrane) or compartments (e.g., mitochondrion), and (2) the stable macromolecular complexes of which they are parts (e.g., the ribosome)." [GOC:pdt, NIF_Subcellular:sao-1337158144]
+comment: Note that, in addition to forming the root of the cellular component ontology, this term is recommended for use for the annotation of gene products whose cellular component is unknown. When this term is used for annotation, it indicates that no information was available about the cellular component of the gene product annotated as of the date the annotation was made; the evidence code "no data" (ND), is used to indicate this.
+subset: goslim_aspergillus
+subset: goslim_candida
+subset: goslim_chembl
+subset: goslim_generic
+subset: goslim_metagenomics
+subset: goslim_pir
+subset: goslim_plant
+subset: goslim_yeast
+subset: gosubset_prok
+synonym: "cell or subcellular entity" EXACT []
+synonym: "cellular component" EXACT []
+synonym: "subcellular entity" RELATED [NIF_Subcellular:nlx_subcell_100315]
+xref: NIF_Subcellular:sao-1337158144
+xref: NIF_Subcellular:sao1337158144
+
+[Term]
+id: GO:0008150
+name: biological_process
+namespace: biological_process
+alt_id: GO:0000004
+alt_id: GO:0007582
+alt_id: GO:0044699
+def: "A biological process represents a specific objective that the organism is genetically programmed to achieve. Biological processes are often described by their outcome or ending state, e.g., the biological process of cell division results in the creation of two daughter cells (a divided cell) from a single parent cell. A biological process is accomplished by a particular set of molecular functions carried out by specific gene products (or macromolecular complexes), often in a highly regulated manner and in a particular temporal sequence." [GOC:pdt]
+comment: Note that, in addition to forming the root of the biological process ontology, this term is recommended for use for the annotation of gene products whose biological process is unknown. When this term is used for annotation, it indicates that no information was available about the biological process of the gene product annotated as of the date the annotation was made; the evidence code "no data" (ND), is used to indicate this.
+subset: goslim_aspergillus
+subset: goslim_candida
+subset: goslim_chembl
+subset: goslim_generic
+subset: goslim_metagenomics
+subset: goslim_pir
+subset: goslim_plant
+subset: goslim_pombe
+subset: goslim_yeast
+subset: gosubset_prok
+synonym: "biological process" EXACT []
+synonym: "physiological process" EXACT []
+synonym: "single organism process" RELATED []
+synonym: "single-organism process" RELATED []
+xref: Wikipedia:Biological_process
+created_by: janelomax
+creation_date: 2012-09-19T15:05:24Z
+
+[Typedef]
+id: ends_during
+name: ends_during
+namespace: external
+xref: RO:0002093
+
+[Typedef]
+id: happens_during
+name: happens_during
+namespace: external
+xref: RO:0002092
+is_transitive: true
+is_a: ends_during ! ends_during
+
+[Typedef]
+id: has_part
+name: has_part
+namespace: external
+xref: BFO:0000051
+is_transitive: true
+
+[Typedef]
+id: negatively_regulates
+name: negatively regulates
+namespace: external
+xref: RO:0002212
+is_a: regulates ! regulates
+transitive_over: part_of ! part of
+
+[Typedef]
+id: never_in_taxon
+name: never_in_taxon
+namespace: external
+xref: RO:0002161
+expand_assertion_to: "Class: ?X DisjointWith: RO_0002162 some ?Y" []
+is_metadata_tag: true
+is_class_level: true
+
+[Typedef]
+id: occurs_in
+name: occurs in
+namespace: external
+xref: BFO:0000066
+holds_over_chain: part_of occurs_in
+transitive_over: part_of ! part of
+
+[Typedef]
+id: part_of
+name: part of
+namespace: external
+xref: BFO:0000050
+is_transitive: true
+inverse_of: has_part ! has_part
+
+[Typedef]
+id: positively_regulates
+name: positively regulates
+namespace: external
+xref: RO:0002213
+holds_over_chain: negatively_regulates negatively_regulates
+is_a: regulates ! regulates
+transitive_over: part_of ! part of
+
+[Typedef]
+id: regulates
+name: regulates
+namespace: external
+xref: RO:0002211
+is_transitive: true
+transitive_over: part_of ! part of
+
+[Typedef]
+id: starts_during
+name: starts_during
+namespace: external
+xref: RO:0002091
+
b
diff -r 000000000000 -r 52964064db8a test-data/population.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/population.txt Fri Jan 11 06:18:35 2019 -0500
b
@@ -0,0 +1,100 @@
+fake1
+fake2
+fake3
+fake4
+fake5
+fake6
+fake7
+fake8
+fake9
+fake10
+fake11
+fake12
+fake13
+fake14
+fake15
+fake16
+fake17
+fake18
+fake19
+fake20
+fake21
+fake22
+fake23
+fake24
+fake25
+fake26
+fake27
+fake28
+fake29
+fake30
+fake31
+fake32
+fake33
+fake34
+fake35
+fake36
+fake37
+fake38
+fake39
+fake40
+fake41
+fake42
+fake43
+fake44
+fake45
+fake46
+fake47
+fake48
+fake49
+fake50
+fake51
+fake52
+fake53
+fake54
+fake55
+fake56
+fake57
+fake58
+fake59
+fake60
+fake61
+fake62
+fake63
+fake64
+fake65
+fake66
+fake67
+fake68
+fake69
+fake70
+fake71
+fake72
+fake73
+fake74
+fake75
+fake76
+fake77
+fake78
+fake79
+fake80
+fake81
+fake82
+fake83
+fake84
+fake85
+fake86
+fake87
+fake88
+fake89
+fake90
+fake91
+fake92
+fake93
+fake94
+fake95
+fake96
+fake97
+fake98
+fake99
+fake100
b
diff -r 000000000000 -r 52964064db8a test-data/slim_annotations.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/slim_annotations.tab Fri Jan 11 06:18:35 2019 -0500
b
@@ -0,0 +1,301 @@
+Gene GOSlim Term
+fake46 GO:0003674
+fake46 GO:0005575
+fake46 GO:0008150
+fake45 GO:0003674
+fake45 GO:0005575
+fake45 GO:0008150
+fake44 GO:0003674
+fake44 GO:0005575
+fake44 GO:0008150
+fake43 GO:0003674
+fake43 GO:0005575
+fake43 GO:0008150
+fake42 GO:0003674
+fake42 GO:0005575
+fake42 GO:0008150
+fake41 GO:0003674
+fake41 GO:0005575
+fake41 GO:0008150
+fake40 GO:0003674
+fake40 GO:0005575
+fake40 GO:0008150
+fake49 GO:0003674
+fake49 GO:0005575
+fake49 GO:0008150
+fake48 GO:0003674
+fake48 GO:0005575
+fake48 GO:0008150
+fake47 GO:0003674
+fake47 GO:0005575
+fake47 GO:0008150
+fake57 GO:0003674
+fake57 GO:0005575
+fake57 GO:0008150
+fake56 GO:0003674
+fake56 GO:0005575
+fake56 GO:0008150
+fake55 GO:0003674
+fake55 GO:0005575
+fake55 GO:0008150
+fake54 GO:0003674
+fake54 GO:0005575
+fake54 GO:0008150
+fake53 GO:0003674
+fake53 GO:0005575
+fake53 GO:0008150
+fake52 GO:0003674
+fake52 GO:0005575
+fake52 GO:0008150
+fake51 GO:0003674
+fake51 GO:0005575
+fake51 GO:0008150
+fake50 GO:0003674
+fake50 GO:0005575
+fake50 GO:0008150
+fake100 GO:0003674
+fake100 GO:0005575
+fake100 GO:0008150
+fake59 GO:0003674
+fake59 GO:0005575
+fake59 GO:0008150
+fake58 GO:0003674
+fake58 GO:0005575
+fake58 GO:0008150
+fake68 GO:0003674
+fake68 GO:0005575
+fake68 GO:0008150
+fake67 GO:0003674
+fake67 GO:0005575
+fake67 GO:0008150
+fake66 GO:0003674
+fake66 GO:0005575
+fake66 GO:0008150
+fake65 GO:0003674
+fake65 GO:0005575
+fake65 GO:0008150
+fake64 GO:0003674
+fake64 GO:0005575
+fake64 GO:0008150
+fake63 GO:0003674
+fake63 GO:0005575
+fake63 GO:0008150
+fake62 GO:0003674
+fake62 GO:0005575
+fake62 GO:0008150
+fake61 GO:0003674
+fake61 GO:0005575
+fake61 GO:0008150
+fake8 GO:0003674
+fake8 GO:0005575
+fake8 GO:0008150
+fake7 GO:0003674
+fake7 GO:0005575
+fake7 GO:0008150
+fake6 GO:0003674
+fake6 GO:0005575
+fake6 GO:0008150
+fake5 GO:0003674
+fake5 GO:0005575
+fake5 GO:0008150
+fake9 GO:0003674
+fake9 GO:0005575
+fake9 GO:0008150
+fake69 GO:0003674
+fake69 GO:0005575
+fake69 GO:0008150
+fake60 GO:0003674
+fake60 GO:0005575
+fake60 GO:0008150
+fake79 GO:0003674
+fake79 GO:0005575
+fake79 GO:0008150
+fake78 GO:0003674
+fake78 GO:0005575
+fake78 GO:0008150
+fake77 GO:0003674
+fake77 GO:0005575
+fake77 GO:0008150
+fake76 GO:0003674
+fake76 GO:0005575
+fake76 GO:0008150
+fake75 GO:0003674
+fake75 GO:0005575
+fake75 GO:0008150
+fake74 GO:0003674
+fake74 GO:0005575
+fake74 GO:0008150
+fake73 GO:0003674
+fake73 GO:0005575
+fake73 GO:0008150
+fake72 GO:0003674
+fake72 GO:0005575
+fake72 GO:0008150
+fake71 GO:0003674
+fake71 GO:0005575
+fake71 GO:0008150
+fake70 GO:0003674
+fake70 GO:0005575
+fake70 GO:0008150
+fake89 GO:0003674
+fake89 GO:0005575
+fake89 GO:0008150
+fake88 GO:0003674
+fake88 GO:0005575
+fake88 GO:0008150
+fake87 GO:0003674
+fake87 GO:0005575
+fake87 GO:0008150
+fake86 GO:0003674
+fake86 GO:0005575
+fake86 GO:0008150
+fake85 GO:0003674
+fake85 GO:0005575
+fake85 GO:0008150
+fake84 GO:0003674
+fake84 GO:0005575
+fake84 GO:0008150
+fake83 GO:0003674
+fake83 GO:0005575
+fake83 GO:0008150
+fake82 GO:0003674
+fake82 GO:0005575
+fake82 GO:0008150
+fake81 GO:0003674
+fake81 GO:0005575
+fake81 GO:0008150
+fake80 GO:0003674
+fake80 GO:0005575
+fake80 GO:0008150
+fake13 GO:0003674
+fake13 GO:0005575
+fake13 GO:0008150
+fake12 GO:0003674
+fake12 GO:0005575
+fake12 GO:0008150
+fake11 GO:0003674
+fake11 GO:0005575
+fake11 GO:0008150
+fake99 GO:0003674
+fake99 GO:0005575
+fake99 GO:0008150
+fake10 GO:0003674
+fake10 GO:0005575
+fake10 GO:0008150
+fake98 GO:0003674
+fake98 GO:0005575
+fake98 GO:0008150
+fake97 GO:0003674
+fake97 GO:0005575
+fake97 GO:0008150
+fake96 GO:0003674
+fake96 GO:0005575
+fake96 GO:0008150
+fake95 GO:0003674
+fake95 GO:0005575
+fake95 GO:0008150
+fake94 GO:0003674
+fake94 GO:0005575
+fake94 GO:0008150
+fake19 GO:0003674
+fake19 GO:0005575
+fake19 GO:0008150
+fake18 GO:0003674
+fake18 GO:0005575
+fake18 GO:0008150
+fake17 GO:0003674
+fake17 GO:0005575
+fake17 GO:0008150
+fake16 GO:0003674
+fake16 GO:0005575
+fake16 GO:0008150
+fake15 GO:0003674
+fake15 GO:0005575
+fake15 GO:0008150
+fake14 GO:0003674
+fake14 GO:0005575
+fake14 GO:0008150
+fake4 GO:0003674
+fake4 GO:0005575
+fake4 GO:0008150
+fake3 GO:0003674
+fake3 GO:0005575
+fake3 GO:0008150
+fake2 GO:0003674
+fake2 GO:0005575
+fake2 GO:0008150
+fake1 GO:0003674
+fake1 GO:0005575
+fake1 GO:0008150
+fake93 GO:0003674
+fake93 GO:0005575
+fake93 GO:0008150
+fake92 GO:0003674
+fake92 GO:0005575
+fake92 GO:0008150
+fake91 GO:0003674
+fake91 GO:0005575
+fake91 GO:0008150
+fake90 GO:0003674
+fake90 GO:0005575
+fake90 GO:0008150
+fake24 GO:0003674
+fake24 GO:0005575
+fake24 GO:0008150
+fake23 GO:0003674
+fake23 GO:0005575
+fake23 GO:0008150
+fake22 GO:0003674
+fake22 GO:0005575
+fake22 GO:0008150
+fake21 GO:0003674
+fake21 GO:0005575
+fake21 GO:0008150
+fake20 GO:0003674
+fake20 GO:0005575
+fake20 GO:0008150
+fake29 GO:0003674
+fake29 GO:0005575
+fake29 GO:0008150
+fake28 GO:0003674
+fake28 GO:0005575
+fake28 GO:0008150
+fake27 GO:0003674
+fake27 GO:0005575
+fake27 GO:0008150
+fake26 GO:0003674
+fake26 GO:0005575
+fake26 GO:0008150
+fake25 GO:0003674
+fake25 GO:0005575
+fake25 GO:0008150
+fake35 GO:0003674
+fake35 GO:0005575
+fake35 GO:0008150
+fake34 GO:0003674
+fake34 GO:0005575
+fake34 GO:0008150
+fake33 GO:0003674
+fake33 GO:0005575
+fake33 GO:0008150
+fake32 GO:0003674
+fake32 GO:0005575
+fake32 GO:0008150
+fake31 GO:0003674
+fake31 GO:0005575
+fake31 GO:0008150
+fake30 GO:0003674
+fake30 GO:0005575
+fake30 GO:0008150
+fake39 GO:0003674
+fake39 GO:0005575
+fake39 GO:0008150
+fake38 GO:0003674
+fake38 GO:0005575
+fake38 GO:0008150
+fake37 GO:0003674
+fake37 GO:0005575
+fake37 GO:0008150
+fake36 GO:0003674
+fake36 GO:0005575
+fake36 GO:0008150
b
diff -r 000000000000 -r 52964064db8a test-data/study.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/study.txt Fri Jan 11 06:18:35 2019 -0500
b
@@ -0,0 +1,20 @@
+fake1
+fake2
+fake3
+fake4
+fake5
+fake6
+fake7
+fake8
+fake9
+fake10
+fake11
+fake12
+fake13
+fake14
+fake15
+fake16
+fake17
+fake18
+fake19
+fake20