changeset 2:ab492df30cdf draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/goseq commit 4a3c9f195ba5d899b1a1ce5e80281cdf230f456a
author iuc
date Mon, 23 Oct 2017 11:19:12 -0400
parents 9d1256d9ef0b
children 783e8b70b047
files goseq.r goseq.xml test-data/dge_list_zf.tab test-data/gc.tab test-data/gene_length_zf.tab test-data/getgo.danRer10.tab test-data/getgo.hg38.tab test-data/go_terms.tab test-data/goseq_analysis.RData test-data/length.tab test-data/length_bias_plot.pdf test-data/nobias.tab test-data/samp.tab test-data/sample_vs_wallenius_plot.pdf test-data/wal.tab
diffstat 15 files changed, 1625 insertions(+), 180 deletions(-) [+]
line wrap: on
line diff
--- a/goseq.r	Sun Jun 11 08:57:39 2017 -0400
+++ b/goseq.r	Mon Oct 23 11:19:12 2017 -0400
@@ -11,8 +11,8 @@
 option_list <- list(
     make_option(c("-d", "--dge_file"), type="character", help="Path to file with differential gene expression result"),
     make_option(c("-w","--wallenius_tab"), type="character", help="Path to output file with P-values estimated using wallenius distribution."),
-    make_option(c("-s","--sampling_tab"), type="character", default=FALSE, help="Path to output file with P-values estimated using wallenius distribution."),
-    make_option(c("-n","--nobias_tab"), type="character", default=FALSE, help="Path to output file with P-values estimated using wallenius distribution and no correction for gene length bias."),
+    make_option(c("-s","--sampling_tab"), type="character", default=FALSE, help="Path to output file with P-values estimated using sampling distribution."),
+    make_option(c("-n","--nobias_tab"), type="character", default=FALSE, help="Path to output file with P-values estimated using hypergeometric distribution and no correction for gene length bias."),
     make_option(c("-l","--length_bias_plot"), type="character", default=FALSE, help="Path to length-bias plot."),
     make_option(c("-sw","--sample_vs_wallenius_plot"), type="character", default=FALSE, help="Path to plot comparing sampling with wallenius p-values."),
     make_option(c("-r", "--repcnt"), type="integer", default=100, help="Number of repeats for sampling"),
@@ -23,7 +23,9 @@
     make_option(c("-p", "--p_adj_method"), default="BH", type="character", help="Multiple hypothesis testing correction method to use"),
     make_option(c("-cat", "--use_genes_without_cat"), default=FALSE, type="logical",
                 help="A large number of gene may have no GO term annotated. If this option is set to FALSE, genes without category will be ignored in the calculation of p-values(default behaviour). If TRUE these genes will count towards the total number of genes outside the tested category (default behaviour prior to version 1.15.2)."),
-    make_option(c("-plots", "--make_plots"), default=FALSE, type="logical", help="produce diagnostic plots?")
+    make_option(c("-plots", "--make_plots"), default=FALSE, type="logical", help="produce diagnostic plots?"),
+    make_option(c("-fc", "--fetch_cats"), default=NULL, type="character", help="Categories to get can include one or more of GO:CC, GO:BP, GO:MF, KEGG"),
+    make_option(c("-rd", "--rdata"), default=NULL, type="character", help="Path to RData output file.")
     )
 
 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
@@ -44,15 +46,27 @@
 p_adj_method = args$p_adj_method
 use_genes_without_cat = args$use_genes_without_cat
 make_plots = args$make_plots
+rdata = args$rdata
+
+if (!is.null(args$fetch_cats)) {
+  fetch_cats = unlist(strsplit(args$fetch_cats, ","))
+}
 
 # format DE genes into named vector suitable for goseq
-dge_table = read.delim(dge_file, header = FALSE, sep="\t")
+# check if header is present
+first_line = read.delim(dge_file, header = FALSE, nrow=1)
+second_col = toupper(first_line[, ncol(first_line)])
+if (second_col == TRUE || second_col == FALSE) {
+    dge_table = read.delim(dge_file, header = FALSE, sep="\t")
+} else {
+    dge_table = read.delim(dge_file, header = TRUE, sep="\t")
+}
 genes = as.numeric(as.logical(dge_table[,ncol(dge_table)])) # Last column contains TRUE/FALSE
 names(genes) = dge_table[,1] # Assuming first column contains gene names
 
 # gene lengths, assuming last column
 if (length_file != "FALSE" ) {
-  first_line = read.delim(dge_file, header = FALSE, nrow=1)
+  first_line = read.delim(length_file, header = FALSE, nrow=1)
   if (is.numeric(first_line[, ncol(first_line)])) {
     length_table = read.delim(length_file, header=FALSE, sep="\t", check.names=FALSE)
     } else {
@@ -66,7 +80,7 @@
 
 # Estimate PWF
 
-if (make_plots == TRUE) {
+if (make_plots != 'false') {
   pdf(length_bias_plot)
 }
 pwf=nullp(genes, genome = genome, id = gene_id, bias.data = gene_lengths, plot.fit=make_plots)
@@ -74,7 +88,7 @@
 
 # Fetch GO annotations if category_file hasn't been supplied:
 if (category_file == "FALSE") {
-  go_map=getgo(genes = names(genes), genome = genome, id = gene_id, fetch.cats=c("GO:CC", "GO:BP", "GO:MF", "KEGG"))
+  go_map=getgo(genes = names(genes), genome=genome, id=gene_id, fetch.cats=fetch_cats)
   } else {
   # check for header: first entry in first column must be present in genes, else it's a header
   first_line = read.delim(category_file, header = FALSE, nrow=1)
@@ -103,7 +117,13 @@
 
 # Sampling distribution
 if (repcnt > 0) {
+
+  # capture the sampling progress so it doesn't fill stdout  
+  zz <- file("/dev/null", open = "wt")
+  sink(zz)
   GO.samp=goseq(pwf, genome = genome, id = gene_id, method="Sampling", repcnt=repcnt, use_genes_without_cat = use_genes_without_cat, gene2cat=go_map)
+  sink()
+  
   GO.samp$p.adjust.over_represented = p.adjust(GO.samp$over_represented_pvalue, method=p_adj_method)
   GO.samp$p.adjust.under_represented = p.adjust(GO.samp$under_represented_pvalue, method=p_adj_method)
   write.table(GO.samp, sampling_tab, sep="\t", row.names = FALSE, quote = FALSE)
@@ -118,4 +138,10 @@
   }
 }
 
+# Output RData file
+if (!is.null(args$rdata)) {
+  save.image(file = "goseq_analysis.RData")
+}
+
+
 sessionInfo()
--- a/goseq.xml	Sun Jun 11 08:57:39 2017 -0400
+++ b/goseq.xml	Mon Oct 23 11:19:12 2017 -0400
@@ -1,8 +1,12 @@
-<tool id="goseq" name="goseq" version="0.2.2">
+<tool id="goseq" name="goseq" version="1.26.0">
     <description>tests for overrepresented gene categories</description>
     <requirements>
         <requirement type="package" version="1.3.2">r-optparse</requirement>
-        <requirement type="package" version="1.22.0">bioconductor-goseq</requirement>
+        <requirement type="package" version="1.26.0">bioconductor-goseq</requirement>
+        <requirement type="package" version="3.3.0">bioconductor-org.hs.eg.db</requirement>
+        <requirement type="package" version="3.4.0">bioconductor-org.dm.eg.db</requirement>
+        <requirement type="package" version="3.4.1">bioconductor-org.dr.eg.db</requirement>
+        <requirement type="package" version="3.4.0">bioconductor-org.mm.eg.db</requirement>
     </requirements>
     <stdio>
         <regex match="Execution halted"
@@ -18,122 +22,334 @@
                level="fatal"
                description="An undefined error occured, please check your input carefully and contact your administrator." />
     </stdio>
+    <version_command><![CDATA[
+echo $(R --version | grep version | grep -v GNU)", goseq version" $(R --vanilla --slave -e "library(goseq); cat(sessionInfo()\$otherPkgs\$goseq\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Hs.eg.db version" $(R --vanilla --slave -e "library(org.Hs.eg.db); cat(sessionInfo()\$otherPkgs\$org.Hs.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Dr.eg.db version" $(R --vanilla --slave -e "library(org.Dr.eg.db); cat(sessionInfo()\$otherPkgs\$org.Dr.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Dm.eg.db version" $(R --vanilla --slave -e "library(org.Dm.eg.db); cat(sessionInfo()\$otherPkgs\$org.Dm.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Mm.eg.db version" $(R --vanilla --slave -e "library(org.Mm.eg.db); cat(sessionInfo()\$otherPkgs\$org.Mm.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
+    ]]></version_command>
     <command><![CDATA[
-        Rscript '$__tool_directory__'/goseq.r --dge_file '$dge_file'
-        --length_file '$length_file'
-        --category_file '$category_file'
-        #if $methods['wallenius']:
-        --wallenius_tab '$wallenius_tab'
-        #end if
-        #if $methods['hypergeometric']:
-        --nobias_tab '$nobias_tab'
-        #end if
-        --repcnt '$methods.repcnt'
-        --sampling_tab '$sampling_tab'
-        --p_adj_method '$p_adj_method'
-        --use_genes_without_cat '$use_genes_without_cat'
-        --make_plots '$make_plots'
-        --length_bias_plot '$length_bias_plot'
-        --sample_vs_wallenius_plot '$sample_vs_wallenius_plot'
+Rscript '$__tool_directory__/goseq.r'
+
+--dge_file '$dge_file'
+--length_file '$length_file'
+
+#if $categorySource.catSource == 'getgo':
+    --genome $categorySource.genome
+    --gene_id $categorySource.gene_id
+    --fetch_cats '$categorySource.fetchcats'
+#elif $categorySource.catSource == 'history':
+    --category_file '$categorySource.category_file'
+#end if
+
+#if $methods['wallenius']:
+    --wallenius_tab '$wallenius_tab'
+#end if
+#if $methods['hypergeometric']:
+    --nobias_tab '$nobias_tab'
+#end if
+--repcnt '$methods.repcnt'
+--sampling_tab '$sampling_tab'
+
+--make_plots '$out.make_plots'
+--length_bias_plot '$length_bias_plot'
+--sample_vs_wallenius_plot '$sample_vs_wallenius_plot'
+--rdata '$out.rdata_out'
+
+--p_adj_method '$adv.p_adj_method'
+--use_genes_without_cat '$adv.use_genes_without_cat'
+
     ]]></command>
+
+    <!-- Input Files-->
     <inputs>
-        <param name="dge_file" help="A tabular file with gene names in the first column, and TRUE or FALSE in the last column. TRUE means a gene is differentially expressed. See help section for details." label="Differentially expressed gene file" type="data" format="tabular" />
-        <param name="length_file" label="Gene length file for length bias correction" help="You can calculate the gene length using the get length and gc content tool" type="data" format="tabular" />
-        <param name="category_file" label="Gene category file" help="You can obtain a mapping of gene id to gene ontology using the getgo tool" type="data" format="tabular" />
-        <param name="use_genes_without_cat" help="For example, a large number of gene may have no GO term annotated. If this option is set to FALSE, those genes will be ignored in the calculation of p-values. If this option is set to TRUE, then these genes will count towards the total number of genes outside the category being tested"
-               label="Count genes without any category?" type="boolean"/>
-        <section name="methods" title="Method options" expanded="True">
-            <param name="wallenius" type="boolean" checked="true" label="Use wallenius method" help="See help for details" />
-            <param name="hypergeometric" type="boolean" checked="false" label="Use hypergeometric method" help="Does not use gene length information. See help for details" />
-            <param name="repcnt" help="Draw this many random control gene sets. Set to 0 to not do sampling. Larger values take a long time" label="sampling depth" size="3" type="integer" min="0" max="10000" value="0" />
+        <param name="dge_file" type="data" format="tabular" label="Differentially expressed genes file" help="A tabular file with Gene IDs in the first column, and True or False in the second column. True means a gene is differentially expressed. See Help section for details."/>
+        <param name="length_file" type="data" format="tabular" label="Gene lengths file" help="You can calculate the gene lengths using featureCounts or the Gene length and GC content tool."/>
+        <conditional name="categorySource">
+            <param name="catSource" type="select" format="tabular" label="Gene categories" help="You can obtain a mapping of genes to categories (for some genomes only) or you can provide your own category file.">
+                <option value="getgo" selected="true">Get categories</option>
+                <option value="history">Use a category file from history</option>
+            </param>
+            <when value="getgo">
+                <param name="genome" type="select" label="Select a genome to use">
+                    <option value="hg38">Human (hg38)</option>
+                    <option value="mm10">Mouse (mm10)</option>
+                    <option value="dm6">Fruit fly (dm6)</option>
+                    <option value="danRer10">Zebrafish (danRer10)</option>
+                </param>
+                <param name="gene_id" type="select" label="Select Gene ID format" help="Supported Gene IDs to automatically fetch categories should either be Entrez, Ensembl, or gene symbols.">
+                    <option value="ensGene">Ensembl Gene ID</option>
+                    <option value="knownGene">Entrez Gene ID</option>
+                    <option value="geneSymbol">Gene Symbol</option>
+                </param>
+                <param name="fetchcats" type="select" multiple="True" display="checkboxes" label="Select one or more categories" help="By default, goseq tests all three major Gene Ontology branches; Cellular Component, Biological Process and Molecular Function. However, it is possible to limit testing to any combination and/or to also use KEGG pathways.">
+                    <option value="GO:CC" selected="True">GO: Cellular Component</option>
+                    <option value="GO:BP" selected="True">GO: Biological Process</option>
+                    <option value="GO:MF" selected="True">GO: Molecular Function</option>
+                    <option value="KEGG">KEGG</option>
+                </param>
+            </when>
+            <when value="history">
+                <param name="category_file" type="data" format="tabular" label="Gene category file"/>
+            </when>
+        </conditional>
+
+        <!-- Method Options -->
+        <section name="methods" title="Method Options">
+            <param name="wallenius" type="boolean" checked="true" label="Use Wallenius method" help="See help for details. Default: Yes" />
+            <param name="hypergeometric" type="boolean" checked="false" label="Use Hypergeometric method" help="Does not use gene length information. See help for details. Default: No" />
+            <param name="repcnt" type="integer" size="3" min="0" max="10000" value="0" label="Sampling number" help="Number of random samples to be calculated when sampling is used. Set to 0 to not do sampling. Larger values take a long time. Default: 0" />
         </section>
-        <param name="p_adj_method" type="select" label="Select a method for multiple hypothesis testing correction">
-            <option value="BH" selected="true">Benjamini-Hochberg [FDR] (1995)</option>
-            <option value="holm">Holm (1979)</option>
-            <option value="hommel">Hommel (1988)</option>
-            <option value="hochberg">Hochberg (1988)</option>
-            <option value="bonferroni">Bonferroni</option>
-            <option value="BY">Benjamini - Yekutieli (2001)</option>
-        </param>
-        <param help="These plots may help you compare the different p-value estimation methods that goseq can use." label="Produce diagnostic plots?" name="make_plots" type="boolean"></param>
+
+        <!-- Output Options -->
+        <section name="out" title="Output Options">
+            <param name="make_plots" type="boolean" checked="false" label="Produce diagnostic plots?" help="This will produce the length bias (PWF) plot. If both sampling and wallenius methods are selected, it will also produce a plot comparing their p-values. These plots may help you compare the different p-value estimation methods that goseq can use. Default: No" />
+            <param name="rdata_out" type="boolean" checked="false" label="Output RData file?" help="Output all the data used by R to construct the tables and plots, can be loaded into R. Default: No" />
+        </section>
+
+        <!-- Advanced Options -->
+        <section name="adv" title="Advanced Options">
+            <param name="p_adj_method" type="select" label="Select a method for multiple hypothesis testing correction">
+                <option value="BH" selected="True">Benjamini-Hochberg [FDR] (1995)</option>
+                <option value="holm">Holm (1979)</option>
+                <option value="hommel">Hommel (1988)</option>
+                <option value="hochberg">Hochberg (1988)</option>
+                <option value="bonferroni">Bonferroni</option>
+                <option value="BY">Benjamini - Yekutieli (2001)</option>
+            </param>
+            <param name="use_genes_without_cat" type="boolean" checked="false" label="Count genes without any category?" help="For example, a large number of genes may have no GO term annotated. If this option is set to No, those genes will be ignored in the calculation of p-values. If this option is set to Yes, then these genes will count towards the total number of genes outside the category being tested. This was the default behaviour for version 1.15.1 and earlier. Default: No"/>
+        </section>
     </inputs>
+
     <outputs>
-        <data name="length_bias_plot" format="pdf" label="length bias plot">
-            <filter>make_plots</filter>
-            <filter>methods['hypergeometric']</filter>
+        <data name="wallenius_tab" format="tabular" label="${tool.name} on ${on_string}: Ranked category list - Wallenius method">
+            <filter>methods['wallenius]'</filter>
         </data>
-        <data name="sample_vs_wallenius_plot" format="pdf" label="Plot P-value from sampling against wallenius distribution">
-            <filter>methods['repcnt'] != 0</filter>
-            <filter>methods['wallenius']</filter>
-            <filter>make_plots</filter>
-        </data>
-        <data name="nobias_tab" format="tabular" label="Ranked category list - no length bias correction">
-            <filter>methods['hypergeometric']</filter>
-        </data>
-        <data name="sampling_tab" format="tabular" label="Ranked category list - sampling">
+        <data name="sampling_tab" format="tabular" label="${tool.name} on ${on_string}: Ranked category list - Sampling method">
             <filter>methods['repcnt'] != 0</filter>
         </data>
-        <data name="wallenius_tab" format="tabular" label="Ranked category list - wallenius method">
+        <data name="nobias_tab" format="tabular" label="${tool.name} on ${on_string}: Ranked category list - Hypergeometric method">
+            <filter>methods['hypergeometric']</filter>
+        </data>
+        <data name="length_bias_plot" format="pdf" label="${tool.name} on ${on_string}: Length bias plot">
+            <filter>out['make_plots']</filter>
+        </data>
+        <data name="sample_vs_wallenius_plot" format="pdf" label="${tool.name} on ${on_string}: Sampling vs Wallenius P-values plot">
+            <filter>methods['repcnt'] != 0</filter>
             <filter>methods['wallenius']</filter>
+            <filter>out['make_plots']</filter>
+        </data>
+        <data name="rdata" format="rdata" from_work_dir="goseq_analysis.RData" label="${tool.name} on ${on_string}: RData file">
+            <filter>out['rdata_out']</filter>
         </data>
     </outputs>
+
     <tests>
-        <test>
+        <!-- Ensure Wallenius table is output -->
+        <test expect_num_outputs="1">
+            <param name="dge_file" value="dge_list.tab" ftype="tabular" />
+            <param name="length_file" value="gene_length.tab" ftype="tabular" />
+            <param name="catSource" value="history" />
+            <param name="category_file" value="category.tab" ftype="tabular" />
+            <param name="use_genes_without_cat" value="true" />
+            <output name="wallenius_tab" file="wal.tab" compare="contains" />
+        </test>
+        <!-- Ensure getting GO categories works -->
+        <test expect_num_outputs="1">
             <param name="dge_file" value="dge_list.tab" ftype="tabular"/>
             <param name="length_file" value="gene_length.tab" ftype="tabular"/>
-            <param name="category_file" value="category.tab" ftype="tabular"/>
+            <param name="catSource" value="getgo" />
+            <param name="genome" value="hg38" />
+            <param name="gene_id" value="ensGene" />
+            <param name="use_genes_without_cat" value="true" />
+            <output name="wallenius_tab" ftype="tabular" file="getgo.hg38.tab" compare="contains"/>
+        </test>
+        <!-- Ensure getting GO categories for another genome (zebrafish) works -->
+        <test expect_num_outputs="1">
+            <param name="dge_file" value="dge_list_zf.tab" ftype="tabular"/>
+            <param name="length_file" value="gene_length_zf.tab" ftype="tabular"/>
+            <param name="catSource" value="getgo" />
+            <param name="genome" value="danRer10"/>
+            <param name="gene_id" value="ensGene" />
+            <param name="use_genes_without_cat" value="true" />
+            <output name="wallenius_tab" ftype="tabular" file="getgo.danRer10.tab" compare="contains"/>
+        </test>
+        <!-- Ensure length bias plot works -->
+        <test expect_num_outputs="2">
+            <param name="dge_file" value="dge_list.tab" ftype="tabular" />
+            <param name="length_file" value="gene_length.tab" ftype="tabular" />
+            <param name="catSource" value="history" />
+            <param name="category_file" value="category.tab" ftype="tabular" />
+            <param name="make_plots" value="true" />
             <param name="use_genes_without_cat" value="true" />
-            <output name="wallenius_tab" file="wal.tab" compare="re_match"/>
+            <output name="length_bias_plot" ftype="pdf" file="length_bias_plot.pdf" compare="sim_size" />
+        </test>
+        <!-- Ensure hypergeometric works -->
+        <test expect_num_outputs="2">
+            <param name="dge_file" value="dge_list.tab" ftype="tabular" />
+            <param name="length_file" value="gene_length.tab" ftype="tabular" />
+            <param name="catSource" value="history" />
+            <param name="category_file" value="category.tab" ftype="tabular" />
+            <param name="use_genes_without_cat" value="true" />
+            <param name="hypergeometric" value="true" />
+            <output name="nobias_tab" file="nobias.tab" compare="contains" />
+        </test>
+        <!-- Ensure sampling vs wallenius works -->
+        <test expect_num_outputs="4">
+            <param name="dge_file" value="dge_list.tab" ftype="tabular" />
+            <param name="length_file" value="gene_length.tab" ftype="tabular" />
+            <param name="catSource" value="history" />
+            <param name="category_file" value="category.tab" ftype="tabular" />
+            <param name="use_genes_without_cat" value="true" />
+            <param name="make_plots" value="true" />
+            <param name="repcnt" value="1000" />
+            <output name="sampling_tab" file="samp.tab" compare="sim_size" />
+            <output name="length_bias_plot" ftype="pdf" file="length_bias_plot.pdf" compare="sim_size" />
+            <output name="sample_vs_wallenius_plot" ftype="pdf" file="sample_vs_wallenius_plot.pdf" compare="sim_size" />
+        </test>
+        <!-- Ensure RData output works -->
+        <test expect_num_outputs="2">
+            <param name="dge_file" value="dge_list.tab" ftype="tabular" />
+            <param name="length_file" value="gene_length.tab" ftype="tabular" />
+            <param name="catSource" value="history" />
+            <param name="category_file" value="category.tab" ftype="tabular" />
+            <param name="use_genes_without_cat" value="true" />
+            <param name="rdata_out" value="true" />
+            <output name="rdata" file="goseq_analysis.RData" compare="sim_size" />
         </test>
     </tests>
-    <help>
+
+    <help><![CDATA[
+
+.. class:: infomark
+
+**What it does**
 
-        **What it does**
+`Gene Ontology`_ (GO) analysis is widely used to reduce complexity and highlight biological processes in genome-wide expression studies, but standard methods give biased results on RNA-seq data due to over-detection of differential expression for long and highly expressed transcripts. This tool provides methods for performing GO analysis of RNA-seq data, taking length bias into account. The methods and software used by goseq are equally applicable to other category based tests of RNA-seq data, such as KEGG_ pathway analysis.
+
+Options map closely to the excellent goseq manual_.
+
+-----
+
+**Inputs**
 
-        Detects Gene Ontology and/or other user defined categories which are over/under-represented in RNA-seq data.
+*Differentially expressed genes file*
+
+goseq needs a tabular file containing information on differentially expressed genes. This should contain all genes assayed in the RNA-seq experiment. The file should have two columns with an optional header row. The first column should contain the Gene IDs, which must be unique within the file and not repeated. The second column should contain True or False. True means the gene should count as differentially expressed, False means it is not differentially expressed. You can use the "Compute an expression on every row" tool to create a True / False column for your dataset.
+
+Example:
 
-        Options map closely to the excellent manual_
+    =============== =====
+    ENSG00000236824 False
+    ENSG00000162526 False
+    ENSG00000090402 True
+    ENSG00000169188 False
+    ENSG00000124103 False
+    =============== =====
 
+*Gene lengths file*
 
-        **Input files**
+goseq needs information about the length of a gene to correct for potential length bias in differentially expressed genes using a Probability Weight Function (PWF). The PWF can be thought of, as a function which gives the probability that a gene will be differentially expressed, based on its length alone. The gene length file should have two columns with an optional header row. The first column should contain the Gene IDs, and the second column should contain the gene length in bp. If length data is unavailable for some genes, that entry should be set to NA. The goseq authors recommend using the gene lengths obtained from upstream summarization programs, such as **featureCounts**, if provided. Alternatively, the **Gene length and GC content** tool can produce such a file.
 
-        *DGE list:*
-        goseq needs a tabular file with genes in the first column, and TRUE or FALSE in the last column.
-        TRUE means the gene should count as differentially expressed, FALSE means it is not differentially expressed.
-        You can use the "Compute an expression on every row" tool to create a TRUE / FALSE column for your dataset.
+Example:
+
+    =============== =====
+    ENSG00000236824 13458
+    ENSG00000162526 2191
+    ENSG00000090402 6138
+    ENSG00000169188 3245
+    ENSG00000124103 1137
+    =============== =====
+
+*Gene categories file*
+
+This tool can get GO and KEGG categories for some genomes. The three GO categories are GO:MF (Molecular Function - molecular activities of gene products), GO:CC (Cellular Component - where gene products are active), GO:BP (Biological Process - pathways and larger processes made up of the activities of multiple gene products). If your genome is not available, you will also need a file describing the membership of genes in categories. The category file should have two columns with an optional header row. with Gene ID in the first column and category identifier in the second column. As the mapping between categories and genes is usually many-to-many, this table will usually have multiple rows with the same Gene ID and category identifier.
+
+Example:
 
-        *Gene length file:*
-        goseq needs information about the length of a gene to correct for potential length bias in differentially expressed genes
-        using a prodbability weight function (PWF).
-        The format of this file is tabular, with gene_id in the first column and length in the second column.
-        The "get length and gc content" tool can produce such a file.
+    =============== ===========
+    ENSG00000162526 GO\:0000003
+    ENSG00000198648 GO\:0000278
+    ENSG00000112312 GO\:0000278
+    ENSG00000174442 GO\:0000278
+    ENSG00000108953 GO\:0000278
+    =============== ===========
+
+-----
+
+**Outputs**
+
+* This tool outputs a tabular file containing a ranked list of gene categories, similar to below. The default output is the Wallenius method table. If the Sampling and/or Hypergeometric methods are also selected, additional tables are produced.
+* Optionally, this tool can also output some diagnostic plots and an RData file, see **Output Options** above.
+
+Example:
 
-        *Gene category file:*
-        You will also need a file describing the membership of genes in categories. The format of this file is gene_id in the first column,
-        category identifier in the second column.
+=========== =============== ================ ============ ========== ======================================== ========== =================== ====================
+*category*  *over_rep_pval* *under_rep_pval* *numDEInCat* *numInCat* *term*                                   *ontology* *p.adjust.over_rep* *p.adjust.under_rep*
+----------- --------------- ---------------- ------------ ---------- ---------------------------------------- ---------- ------------------- --------------------
+GO\:0005576  0.000054        0.999975         56           142       extracellular region                     CC         0.394825             1
+GO\:0005840  0.000143        0.999988         9            12        ribosome                                 CC         0.394825             1
+GO\:0044763  0.000252        0.999858         148          473       single-organism cellular process         BP         0.394825             1
+GO\:0044699  0.000279        0.999844         158          513       single-organism process                  BP         0.394825             1
+GO\:0065010  0.000428        0.999808         43           108       extracellular membrane-bounded organelle CC         0.394825             1
+GO\:0070062  0.000428        0.999808         43           108       extracellular exosome                    CC         0.394825             1
+=========== =============== ================ ============ ========== ======================================== ========== =================== ====================
 
-        **Method options**
+-----
+
+**Method options**
 
-        3 methods, "Wallenius", "Sampling" and "Hypergeometric", can be used to calculate the p-values as follows.
+3 methods, *Wallenius*, *Sampling* and *Hypergeometric*, can be used to calculate the p-values as follows.
+
+*Wallenius*
+
+approximates the true distribution of numbers of members of a category amongst DE genes by the Wallenius non-central hypergeometric distribution.
+This distribution assumes that within a category all genes have the same probability of being chosen. Therefore, this approximation works best when the range in probabilities obtained by the probability weighting function is small. This is the method used by default.
+
+*Sampling*
 
-        *"Wallenius"* approximates the true distribution of numbers of members of a category amongst DE genes by the Wallenius non-central hypergeometric distribution.
-        This distribution assumes that within a category all genes have the same probability of being chosen.
-        Therefore, this approximation works best when the range in probabilities obtained by the probability weighting function is small.
+uses random sampling to approximate the true distribution and uses it to calculate the p-values for over (and under) representation of categories.
+Although this is the most accurate method given a high enough value of sampling number, its use quickly becomes computationally prohibitive. It may sometimes be desirable to use random sampling to generate the null distribution for category
+membership. For example, to check consistency against results from the Wallenius approximation. This is easily accomplished by using the method option to additionally specify sampling and the number of samples to generate.
+
+*Hypergeometric*
+
+assumes there is no bias in power to detect differential expression at all and calculates the p-values using a standard hypergeometric distribution (no length bias correction is performed). Useful if you wish to test the effect of length bias on your results.
+Caution: Hypergeometric should NEVER be used for producing results for biological interpretation of RNA-seq data. If length bias is truly not present in your data, goseq will produce a nearly flat PWF plot, no length bias correction will be applied to your data, and all methods will produce the same results.
 
-        *"Sampling"* uses random sampling to approximate the true distribution and uses it to calculate the p-values for over (and under) representation of categories.
-        Although this is the most accurate method given a high enough value of sampling depth, its use quickly becomes computationally prohibitive.
+-----
+
+**More Information**
 
-        *"Hypergeometric"* assumes there is no bias in power to detect differential expression at all and calculates the p-values using a standard hypergeometric distribution.
-        Useful if you wish to test the effect of selection bias on your results.
+In order to account for the length bias inherent to RNA-seq data when performing a GO analysis
+(or other category based tests), one cannot simply use the hypergeometric distribution as the null
+distribution for category membership, which is appropriate for data without DE length bias, such
+as microarray data. GO analysis of RNA-seq data requires the use of random sampling in order
+to generate a suitable null distribution for GO category membership and calculate each categories
+significance for over representation amongst DE genes.
+
+However, this random sampling is computationally expensive. In most cases, the Wallenius
+distribution can be used to approximate the true null distribution, without any significant loss in
+accuracy. The goseq package implements this approximation as its default option. The option
+to generate the null distribution using random sampling is also included as an option, but users
+should be aware that the default number of samples generated will not be enough to accurately
+call enrichment when there are a large number of go terms.
 
-        CAUTION:  "Hypergeometric" should NEVER be used for producing results for biological interpretation.
-        If there is genuinely no bias in power to detect DE in your experiment, the PWF will reflect this and the other methods will produce accuracte results.
+Having established a null distribution, each category is then tested for over and under
+representation amongst the set of differentially expressed genes and the null is used to calculate a
+p-value for under and over representation.
+
+Having performed a GO analysis, you may now wish to interpret the results. If you wish to
+identify categories significantly enriched/unenriched below some p-value cutoff, it is necessary to
+first apply some kind of multiple hypothesis testing correction. For example, you can identify GO categories over
+enriched using a 0.05 FDR (p.adjust) cutoff [Benjamini and Hochberg, 1995].
 
-        .. _manual: https://bioconductor.org/packages/release/bioc/vignettes/goseq/inst/doc/goseq.pdf
+Unless you are a machine, GO and KEGG category identifiers are probably not very meaningful to you.
+Information about each identifier can be obtained from the `Gene Ontology`_ and KEGG_ websites.
 
+.. _manual: https://bioconductor.org/packages/release/bioc/vignettes/goseq/inst/doc/goseq.pdf
+.. _Gene Ontology: http://www.geneontology.org
+.. _KEGG: http://www.genome.jp/kegg
 
-    </help>
+    ]]></help>
     <citations>
         <citation type="doi">10.1186/gb-2010-11-2-r14</citation>
     </citations>
-</tool>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dge_list_zf.tab	Mon Oct 23 11:19:12 2017 -0400
@@ -0,0 +1,632 @@
+ENSDARG00000092696	FALSE
+ENSDARG00000104569	TRUE
+ENSDARG00000008472	FALSE
+ENSDARG00000058451	FALSE
+ENSDARG00000035957	TRUE
+ENSDARG00000043514	FALSE
+ENSDARG00000058114	TRUE
+ENSDARG00000102885	FALSE
+ENSDARG00000005451	FALSE
+ENSDARG00000058839	FALSE
+ENSDARG00000073999	FALSE
+ENSDARG00000079611	FALSE
+ENSDARG00000042623	FALSE
+ENSDARG00000044136	FALSE
+ENSDARG00000060983	FALSE
+ENSDARG00000108060	FALSE
+ENSDARG00000036852	FALSE
+ENSDARG00000089303	FALSE
+ENSDARG00000090013	TRUE
+ENSDARG00000042902	FALSE
+ENSDARG00000075203	FALSE
+ENSDARG00000069601	TRUE
+ENSDARG00000003822	FALSE
+ENSDARG00000057100	FALSE
+ENSDARG00000088508	TRUE
+ENSDARG00000026454	TRUE
+ENSDARG00000006399	TRUE
+ENSDARG00000079457	TRUE
+ENSDARG00000090654	FALSE
+ENSDARG00000092483	FALSE
+ENSDARG00000060627	FALSE
+ENSDARG00000039626	FALSE
+ENSDARG00000088475	FALSE
+ENSDARG00000055548	FALSE
+ENSDARG00000006196	FALSE
+ENSDARG00000035559	FALSE
+ENSDARG00000015254	FALSE
+ENSDARG00000031203	FALSE
+ENSDARG00000012790	TRUE
+ENSDARG00000004017	TRUE
+ENSDARG00000074558	FALSE
+ENSDARG00000043077	TRUE
+ENSDARG00000098550	FALSE
+ENSDARG00000095224	FALSE
+ENSDARG00000045352	FALSE
+ENSDARG00000079659	FALSE
+ENSDARG00000104567	FALSE
+ENSDARG00000060169	FALSE
+ENSDARG00000104837	FALSE
+ENSDARG00000097827	FALSE
+ENSDARG00000012684	TRUE
+ENSDARG00000002912	FALSE
+ENSDARG00000104861	FALSE
+ENSDARG00000089292	FALSE
+ENSDARG00000054903	FALSE
+ENSDARG00000031681	FALSE
+ENSDARG00000027586	TRUE
+ENSDARG00000061216	FALSE
+ENSDARG00000051853	FALSE
+ENSDARG00000062192	FALSE
+ENSDARG00000076568	FALSE
+ENSDARG00000004774	FALSE
+ENSDARG00000033889	FALSE
+ENSDARG00000044092	FALSE
+ENSDARG00000010098	FALSE
+ENSDARG00000043635	TRUE
+ENSDARG00000076804	FALSE
+ENSDARG00000061363	FALSE
+ENSDARG00000039522	FALSE
+ENSDARG00000022218	FALSE
+ENSDARG00000040024	FALSE
+ENSDARG00000019897	FALSE
+ENSDARG00000058701	FALSE
+ENSDARG00000036875	TRUE
+ENSDARG00000103380	FALSE
+ENSDARG00000025391	FALSE
+ENSDARG00000101637	TRUE
+ENSDARG00000067656	TRUE
+ENSDARG00000089467	FALSE
+ENSDARG00000002494	FALSE
+ENSDARG00000102896	FALSE
+ENSDARG00000100279	FALSE
+ENSDARG00000095969	TRUE
+ENSDARG00000039130	TRUE
+ENSDARG00000077060	TRUE
+ENSDARG00000037815	FALSE
+ENSDARG00000074170	FALSE
+ENSDARG00000013063	TRUE
+ENSDARG00000035570	TRUE
+ENSDARG00000104710	FALSE
+ENSDARG00000070168	FALSE
+ENSDARG00000052728	TRUE
+ENSDARG00000025949	FALSE
+ENSDARG00000076379	FALSE
+ENSDARG00000032631	FALSE
+ENSDARG00000024324	FALSE
+ENSDARG00000010445	FALSE
+ENSDARG00000008235	TRUE
+ENSDARG00000044752	FALSE
+ENSDARG00000099996	FALSE
+ENSDARG00000067719	FALSE
+ENSDARG00000063437	FALSE
+ENSDARG00000088631	TRUE
+ENSDARG00000074849	FALSE
+ENSDARG00000041853	TRUE
+ENSDARG00000060002	TRUE
+ENSDARG00000042977	TRUE
+ENSDARG00000043105	FALSE
+ENSDARG00000013842	FALSE
+ENSDARG00000004870	TRUE
+ENSDARG00000019000	FALSE
+ENSDARG00000100710	TRUE
+ENSDARG00000077988	FALSE
+ENSDARG00000073985	FALSE
+ENSDARG00000028192	FALSE
+ENSDARG00000075881	FALSE
+ENSDARG00000100203	FALSE
+ENSDARG00000073933	FALSE
+ENSDARG00000070478	FALSE
+ENSDARG00000068415	FALSE
+ENSDARG00000032765	TRUE
+ENSDARG00000092550	TRUE
+ENSDARG00000015678	FALSE
+ENSDARG00000075463	FALSE
+ENSDARG00000075172	TRUE
+ENSDARG00000098883	FALSE
+ENSDARG00000075721	FALSE
+ENSDARG00000017058	FALSE
+ENSDARG00000096249	FALSE
+ENSDARG00000059234	TRUE
+ENSDARG00000070644	FALSE
+ENSDARG00000035630	FALSE
+ENSDARG00000029003	FALSE
+ENSDARG00000073737	FALSE
+ENSDARG00000094336	FALSE
+ENSDARG00000058679	FALSE
+ENSDARG00000069266	FALSE
+ENSDARG00000002571	FALSE
+ENSDARG00000103594	TRUE
+ENSDARG00000041314	FALSE
+ENSDARG00000044490	FALSE
+ENSDARG00000075870	FALSE
+ENSDARG00000062646	FALSE
+ENSDARG00000053517	TRUE
+ENSDARG00000043334	FALSE
+ENSDARG00000076667	FALSE
+ENSDARG00000063375	TRUE
+ENSDARG00000104696	FALSE
+ENSDARG00000039125	FALSE
+ENSDARG00000078546	FALSE
+ENSDARG00000071060	TRUE
+ENSDARG00000077011	TRUE
+ENSDARG00000009953	FALSE
+ENSDARG00000038868	FALSE
+ENSDARG00000103610	FALSE
+ENSDARG00000041619	TRUE
+ENSDARG00000026109	FALSE
+ENSDARG00000003564	FALSE
+ENSDARG00000087333	TRUE
+ENSDARG00000099183	TRUE
+ENSDARG00000044524	FALSE
+ENSDARG00000041449	FALSE
+ENSDARG00000058285	FALSE
+ENSDARG00000059529	TRUE
+ENSDARG00000003251	FALSE
+ENSDARG00000008785	FALSE
+ENSDARG00000003022	FALSE
+ENSDARG00000101317	FALSE
+ENSDARG00000013528	FALSE
+ENSDARG00000101333	FALSE
+ENSDARG00000053990	TRUE
+ENSDARG00000055792	FALSE
+ENSDARG00000013628	TRUE
+ENSDARG00000090941	TRUE
+ENSDARG00000096081	FALSE
+ENSDARG00000014274	FALSE
+ENSDARG00000059925	TRUE
+ENSDARG00000057698	FALSE
+ENSDARG00000073792	FALSE
+ENSDARG00000069808	FALSE
+ENSDARG00000071197	FALSE
+ENSDARG00000021735	TRUE
+ENSDARG00000052376	FALSE
+ENSDARG00000103235	FALSE
+ENSDARG00000060176	FALSE
+ENSDARG00000014106	FALSE
+ENSDARG00000039882	FALSE
+ENSDARG00000099771	TRUE
+ENSDARG00000073718	FALSE
+ENSDARG00000008377	TRUE
+ENSDARG00000068199	FALSE
+ENSDARG00000090770	FALSE
+ENSDARG00000038312	TRUE
+ENSDARG00000058287	FALSE
+ENSDARG00000004937	TRUE
+ENSDARG00000102417	TRUE
+ENSDARG00000012485	FALSE
+ENSDARG00000079878	FALSE
+ENSDARG00000096867	TRUE
+ENSDARG00000102082	FALSE
+ENSDARG00000045515	TRUE
+ENSDARG00000079723	TRUE
+ENSDARG00000093007	FALSE
+ENSDARG00000056783	FALSE
+ENSDARG00000057159	FALSE
+ENSDARG00000053571	FALSE
+ENSDARG00000102381	TRUE
+ENSDARG00000009436	FALSE
+ENSDARG00000075567	FALSE
+ENSDARG00000097650	FALSE
+ENSDARG00000015722	FALSE
+ENSDARG00000060372	TRUE
+ENSDARG00000037066	FALSE
+ENSDARG00000005163	TRUE
+ENSDARG00000018627	TRUE
+ENSDARG00000004771	FALSE
+ENSDARG00000002220	FALSE
+ENSDARG00000104388	TRUE
+ENSDARG00000068912	TRUE
+ENSDARG00000015780	FALSE
+ENSDARG00000096989	FALSE
+ENSDARG00000019195	FALSE
+ENSDARG00000100742	TRUE
+ENSDARG00000012234	FALSE
+ENSDARG00000103472	FALSE
+ENSDARG00000043938	FALSE
+ENSDARG00000043209	FALSE
+ENSDARG00000036772	FALSE
+ENSDARG00000089236	FALSE
+ENSDARG00000042277	FALSE
+ENSDARG00000056740	FALSE
+ENSDARG00000024669	FALSE
+ENSDARG00000103892	FALSE
+ENSDARG00000042892	FALSE
+ENSDARG00000036235	FALSE
+ENSDARG00000010700	FALSE
+ENSDARG00000054804	TRUE
+ENSDARG00000031506	FALSE
+ENSDARG00000063726	FALSE
+ENSDARG00000019646	FALSE
+ENSDARG00000034753	TRUE
+ENSDARG00000055338	FALSE
+ENSDARG00000105098	FALSE
+ENSDARG00000062190	FALSE
+ENSDARG00000060380	TRUE
+ENSDARG00000015222	TRUE
+ENSDARG00000074779	FALSE
+ENSDARG00000102252	TRUE
+ENSDARG00000017154	FALSE
+ENSDARG00000100899	TRUE
+ENSDARG00000012314	TRUE
+ENSDARG00000102380	FALSE
+ENSDARG00000043404	TRUE
+ENSDARG00000000857	FALSE
+ENSDARG00000044642	TRUE
+ENSDARG00000098622	FALSE
+ENSDARG00000061472	FALSE
+ENSDARG00000007955	TRUE
+ENSDARG00000025094	FALSE
+ENSDARG00000011125	FALSE
+ENSDARG00000013006	TRUE
+ENSDARG00000033285	FALSE
+ENSDARG00000098695	TRUE
+ENSDARG00000035132	FALSE
+ENSDARG00000005897	FALSE
+ENSDARG00000008867	TRUE
+ENSDARG00000070452	TRUE
+ENSDARG00000017004	FALSE
+ENSDARG00000042799	FALSE
+ENSDARG00000063157	FALSE
+ENSDARG00000101849	FALSE
+ENSDARG00000086345	FALSE
+ENSDARG00000044298	FALSE
+ENSDARG00000103135	FALSE
+ENSDARG00000056862	FALSE
+ENSDARG00000044575	FALSE
+ENSDARG00000097964	FALSE
+ENSDARG00000088950	FALSE
+ENSDARG00000045853	FALSE
+ENSDARG00000032206	FALSE
+ENSDARG00000074756	FALSE
+ENSDARG00000019417	TRUE
+ENSDARG00000102118	TRUE
+ENSDARG00000077983	FALSE
+ENSDARG00000099740	FALSE
+ENSDARG00000020777	FALSE
+ENSDARG00000045415	FALSE
+ENSDARG00000045514	FALSE
+ENSDARG00000000001	FALSE
+ENSDARG00000103917	FALSE
+ENSDARG00000104516	FALSE
+ENSDARG00000018903	FALSE
+ENSDARG00000041431	FALSE
+ENSDARG00000056896	FALSE
+ENSDARG00000011703	FALSE
+ENSDARG00000061185	FALSE
+ENSDARG00000026448	TRUE
+ENSDARG00000077357	FALSE
+ENSDARG00000043417	FALSE
+ENSDARG00000052371	FALSE
+ENSDARG00000104288	TRUE
+ENSDARG00000102898	FALSE
+ENSDARG00000101258	TRUE
+ENSDARG00000069373	TRUE
+ENSDARG00000060109	TRUE
+ENSDARG00000025350	TRUE
+ENSDARG00000079499	TRUE
+ENSDARG00000057983	FALSE
+ENSDARG00000055708	FALSE
+ENSDARG00000099651	FALSE
+ENSDARG00000014366	TRUE
+ENSDARG00000061257	FALSE
+ENSDARG00000019791	TRUE
+ENSDARG00000100560	FALSE
+ENSDARG00000027381	FALSE
+ENSDARG00000026294	TRUE
+ENSDARG00000029955	FALSE
+ENSDARG00000039263	FALSE
+ENSDARG00000045257	FALSE
+ENSDARG00000101347	FALSE
+ENSDARG00000018623	TRUE
+ENSDARG00000005236	FALSE
+ENSDARG00000089856	TRUE
+ENSDARG00000040131	FALSE
+ENSDARG00000105046	FALSE
+ENSDARG00000023712	FALSE
+ENSDARG00000062485	TRUE
+ENSDARG00000045305	FALSE
+ENSDARG00000015495	TRUE
+ENSDARG00000011405	FALSE
+ENSDARG00000063197	FALSE
+ENSDARG00000100428	TRUE
+ENSDARG00000061600	TRUE
+ENSDARG00000078761	FALSE
+ENSDARG00000004840	FALSE
+ENSDARG00000099657	FALSE
+ENSDARG00000036911	FALSE
+ENSDARG00000071424	FALSE
+ENSDARG00000089930	FALSE
+ENSDARG00000013776	FALSE
+ENSDARG00000061294	TRUE
+ENSDARG00000012044	FALSE
+ENSDARG00000070239	TRUE
+ENSDARG00000003845	FALSE
+ENSDARG00000033443	TRUE
+ENSDARG00000075441	TRUE
+ENSDARG00000025667	TRUE
+ENSDARG00000069478	FALSE
+ENSDARG00000103826	FALSE
+ENSDARG00000073848	FALSE
+ENSDARG00000003869	TRUE
+ENSDARG00000040478	FALSE
+ENSDARG00000040505	FALSE
+ENSDARG00000071449	FALSE
+ENSDARG00000101947	FALSE
+ENSDARG00000032340	FALSE
+ENSDARG00000037229	FALSE
+ENSDARG00000027777	TRUE
+ENSDARG00000055903	FALSE
+ENSDARG00000075180	FALSE
+ENSDARG00000039901	FALSE
+ENSDARG00000061629	FALSE
+ENSDARG00000038585	TRUE
+ENSDARG00000029859	FALSE
+ENSDARG00000013871	FALSE
+ENSDARG00000053474	FALSE
+ENSDARG00000070675	FALSE
+ENSDARG00000076657	FALSE
+ENSDARG00000071570	FALSE
+ENSDARG00000068833	FALSE
+ENSDARG00000052331	FALSE
+ENSDARG00000021383	TRUE
+ENSDARG00000043705	TRUE
+ENSDARG00000069295	TRUE
+ENSDARG00000071551	FALSE
+ENSDARG00000003027	FALSE
+ENSDARG00000058608	TRUE
+ENSDARG00000095826	FALSE
+ENSDARG00000097889	FALSE
+ENSDARG00000075914	FALSE
+ENSDARG00000070348	FALSE
+ENSDARG00000030824	FALSE
+ENSDARG00000030665	FALSE
+ENSDARG00000022652	FALSE
+ENSDARG00000096651	FALSE
+ENSDARG00000105288	TRUE
+ENSDARG00000062168	FALSE
+ENSDARG00000075444	TRUE
+ENSDARG00000003829	FALSE
+ENSDARG00000086107	TRUE
+ENSDARG00000062063	FALSE
+ENSDARG00000006621	TRUE
+ENSDARG00000091271	TRUE
+ENSDARG00000060411	FALSE
+ENSDARG00000043137	FALSE
+ENSDARG00000029415	FALSE
+ENSDARG00000042877	FALSE
+ENSDARG00000045398	FALSE
+ENSDARG00000040237	FALSE
+ENSDARG00000098477	FALSE
+ENSDARG00000062418	TRUE
+ENSDARG00000060705	FALSE
+ENSDARG00000040874	TRUE
+ENSDARG00000033965	TRUE
+ENSDARG00000103720	FALSE
+ENSDARG00000058041	FALSE
+ENSDARG00000054343	TRUE
+ENSDARG00000059760	FALSE
+ENSDARG00000062707	FALSE
+ENSDARG00000055106	TRUE
+ENSDARG00000041565	FALSE
+ENSDARG00000007943	FALSE
+ENSDARG00000059794	TRUE
+ENSDARG00000089888	FALSE
+ENSDARG00000096110	FALSE
+ENSDARG00000063321	FALSE
+ENSDARG00000017673	TRUE
+ENSDARG00000041734	FALSE
+ENSDARG00000038557	FALSE
+ENSDARG00000053744	FALSE
+ENSDARG00000040314	FALSE
+ENSDARG00000021059	FALSE
+ENSDARG00000075670	FALSE
+ENSDARG00000043493	TRUE
+ENSDARG00000098813	FALSE
+ENSDARG00000032114	TRUE
+ENSDARG00000035890	FALSE
+ENSDARG00000100296	TRUE
+ENSDARG00000008413	TRUE
+ENSDARG00000100813	FALSE
+ENSDARG00000052739	FALSE
+ENSDARG00000006514	FALSE
+ENSDARG00000078434	TRUE
+ENSDARG00000003920	FALSE
+ENSDARG00000043247	TRUE
+ENSDARG00000090821	FALSE
+ENSDARG00000059870	FALSE
+ENSDARG00000023062	FALSE
+ENSDARG00000059406	FALSE
+ENSDARG00000059804	FALSE
+ENSDARG00000103296	TRUE
+ENSDARG00000006642	FALSE
+ENSDARG00000043046	TRUE
+ENSDARG00000106090	FALSE
+ENSDARG00000090996	FALSE
+ENSDARG00000077533	TRUE
+ENSDARG00000095879	FALSE
+ENSDARG00000060847	FALSE
+ENSDARG00000087402	TRUE
+ENSDARG00000052082	FALSE
+ENSDARG00000093622	TRUE
+ENSDARG00000028327	FALSE
+ENSDARG00000012144	TRUE
+ENSDARG00000042793	TRUE
+ENSDARG00000098105	TRUE
+ENSDARG00000101627	TRUE
+ENSDARG00000075608	TRUE
+ENSDARG00000101800	FALSE
+ENSDARG00000079684	FALSE
+ENSDARG00000041665	FALSE
+ENSDARG00000027099	FALSE
+ENSDARG00000025549	FALSE
+ENSDARG00000025421	FALSE
+ENSDARG00000099385	FALSE
+ENSDARG00000102705	TRUE
+ENSDARG00000051800	FALSE
+ENSDARG00000056847	FALSE
+ENSDARG00000017929	FALSE
+ENSDARG00000039302	FALSE
+ENSDARG00000099943	FALSE
+ENSDARG00000040851	TRUE
+ENSDARG00000100788	TRUE
+ENSDARG00000045681	FALSE
+ENSDARG00000090543	FALSE
+ENSDARG00000103902	FALSE
+ENSDARG00000060215	FALSE
+ENSDARG00000045909	FALSE
+ENSDARG00000055917	FALSE
+ENSDARG00000070426	TRUE
+ENSDARG00000076110	TRUE
+ENSDARG00000090002	FALSE
+ENSDARG00000023111	FALSE
+ENSDARG00000012125	FALSE
+ENSDARG00000007172	FALSE
+ENSDARG00000026178	FALSE
+ENSDARG00000060445	FALSE
+ENSDARG00000018162	TRUE
+ENSDARG00000060661	FALSE
+ENSDARG00000013659	FALSE
+ENSDARG00000102744	FALSE
+ENSDARG00000071501	FALSE
+ENSDARG00000036090	FALSE
+ENSDARG00000045704	FALSE
+ENSDARG00000007221	TRUE
+ENSDARG00000041407	FALSE
+ENSDARG00000005356	FALSE
+ENSDARG00000102356	TRUE
+ENSDARG00000078479	TRUE
+ENSDARG00000029368	TRUE
+ENSDARG00000058992	FALSE
+ENSDARG00000027529	FALSE
+ENSDARG00000075369	TRUE
+ENSDARG00000063731	TRUE
+ENSDARG00000036542	TRUE
+ENSDARG00000014569	TRUE
+ENSDARG00000099298	FALSE
+ENSDARG00000091851	FALSE
+ENSDARG00000070698	FALSE
+ENSDARG00000088807	FALSE
+ENSDARG00000028725	FALSE
+ENSDARG00000039719	FALSE
+ENSDARG00000013732	FALSE
+ENSDARG00000037425	TRUE
+ENSDARG00000057151	FALSE
+ENSDARG00000092488	FALSE
+ENSDARG00000096454	FALSE
+ENSDARG00000070028	FALSE
+ENSDARG00000016710	FALSE
+ENSDARG00000075072	FALSE
+ENSDARG00000070867	TRUE
+ENSDARG00000037846	FALSE
+ENSDARG00000018600	FALSE
+ENSDARG00000013711	FALSE
+ENSDARG00000077081	FALSE
+ENSDARG00000067784	FALSE
+ENSDARG00000038731	TRUE
+ENSDARG00000035181	FALSE
+ENSDARG00000011240	FALSE
+ENSDARG00000075616	TRUE
+ENSDARG00000038442	TRUE
+ENSDARG00000002026	TRUE
+ENSDARG00000098618	FALSE
+ENSDARG00000069505	FALSE
+ENSDARG00000074892	FALSE
+ENSDARG00000055360	FALSE
+ENSDARG00000056797	FALSE
+ENSDARG00000017115	FALSE
+ENSDARG00000075533	FALSE
+ENSDARG00000091906	FALSE
+ENSDARG00000076501	TRUE
+ENSDARG00000017659	TRUE
+ENSDARG00000029660	FALSE
+ENSDARG00000004297	FALSE
+ENSDARG00000010280	FALSE
+ENSDARG00000022968	FALSE
+ENSDARG00000019033	TRUE
+ENSDARG00000015638	FALSE
+ENSDARG00000086550	FALSE
+ENSDARG00000007918	FALSE
+ENSDARG00000039677	TRUE
+ENSDARG00000073784	TRUE
+ENSDARG00000100862	FALSE
+ENSDARG00000003058	FALSE
+ENSDARG00000074644	TRUE
+ENSDARG00000013892	FALSE
+ENSDARG00000104348	FALSE
+ENSDARG00000053457	TRUE
+ENSDARG00000058351	FALSE
+ENSDARG00000034670	TRUE
+ENSDARG00000075169	FALSE
+ENSDARG00000078894	FALSE
+ENSDARG00000044090	FALSE
+ENSDARG00000092077	TRUE
+ENSDARG00000030694	TRUE
+ENSDARG00000016886	TRUE
+ENSDARG00000043339	FALSE
+ENSDARG00000103225	FALSE
+ENSDARG00000086411	TRUE
+ENSDARG00000104874	FALSE
+ENSDARG00000052747	FALSE
+ENSDARG00000075823	TRUE
+ENSDARG00000043843	FALSE
+ENSDARG00000077614	TRUE
+ENSDARG00000054220	FALSE
+ENSDARG00000009626	FALSE
+ENSDARG00000009023	TRUE
+ENSDARG00000045561	FALSE
+ENSDARG00000056559	FALSE
+ENSDARG00000017272	TRUE
+ENSDARG00000022788	FALSE
+ENSDARG00000018272	FALSE
+ENSDARG00000056473	FALSE
+ENSDARG00000099766	TRUE
+ENSDARG00000002991	FALSE
+ENSDARG00000104431	FALSE
+ENSDARG00000062465	FALSE
+ENSDARG00000054583	TRUE
+ENSDARG00000013802	FALSE
+ENSDARG00000057910	TRUE
+ENSDARG00000027984	FALSE
+ENSDARG00000045420	FALSE
+ENSDARG00000031359	FALSE
+ENSDARG00000056764	FALSE
+ENSDARG00000095594	FALSE
+ENSDARG00000076191	FALSE
+ENSDARG00000103251	FALSE
+ENSDARG00000039501	TRUE
+ENSDARG00000103589	FALSE
+ENSDARG00000051926	FALSE
+ENSDARG00000018681	FALSE
+ENSDARG00000014138	TRUE
+ENSDARG00000061462	TRUE
+ENSDARG00000102111	FALSE
+ENSDARG00000035819	FALSE
+ENSDARG00000076486	FALSE
+ENSDARG00000101799	FALSE
+ENSDARG00000008105	FALSE
+ENSDARG00000040387	FALSE
+ENSDARG00000060518	FALSE
+ENSDARG00000001244	FALSE
+ENSDARG00000005540	FALSE
+ENSDARG00000070055	FALSE
+ENSDARG00000073841	FALSE
+ENSDARG00000032117	FALSE
+ENSDARG00000087110	FALSE
+ENSDARG00000075641	TRUE
+ENSDARG00000001829	FALSE
+ENSDARG00000103786	TRUE
+ENSDARG00000058486	FALSE
+ENSDARG00000042272	FALSE
+ENSDARG00000000474	FALSE
+ENSDARG00000061635	FALSE
+ENSDARG00000095745	FALSE
+ENSDARG00000073801	FALSE
+ENSDARG00000028086	FALSE
+ENSDARG00000056583	FALSE
+ENSDARG00000033182	TRUE
+ENSDARG00000052703	FALSE
+ENSDARG00000051873	FALSE
+ENSDARG00000002267	FALSE
+ENSDARG00000091538	FALSE
+ENSDARG00000067713	FALSE
\ No newline at end of file
--- a/test-data/gc.tab	Sun Jun 11 08:57:39 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-ENSG00000162526	0.388349514563107
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gene_length_zf.tab	Mon Oct 23 11:19:12 2017 -0400
@@ -0,0 +1,632 @@
+ENSDARG00000092696	6983
+ENSDARG00000104569	7302
+ENSDARG00000008472	1848
+ENSDARG00000058451	5878
+ENSDARG00000035957	3057
+ENSDARG00000043514	6176
+ENSDARG00000058114	5901
+ENSDARG00000102885	7262
+ENSDARG00000005451	9161
+ENSDARG00000058839	3602
+ENSDARG00000073999	2558
+ENSDARG00000079611	7620
+ENSDARG00000042623	2387
+ENSDARG00000044136	7553
+ENSDARG00000060983	8413
+ENSDARG00000108060	1774
+ENSDARG00000036852	3185
+ENSDARG00000089303	6313
+ENSDARG00000090013	3748
+ENSDARG00000042902	2536
+ENSDARG00000075203	2844
+ENSDARG00000069601	2545
+ENSDARG00000003822	2571
+ENSDARG00000057100	9297
+ENSDARG00000088508	5775
+ENSDARG00000026454	4448
+ENSDARG00000006399	9992
+ENSDARG00000079457	4005
+ENSDARG00000090654	2211
+ENSDARG00000092483	4033
+ENSDARG00000060627	10580
+ENSDARG00000039626	8419
+ENSDARG00000088475	7743
+ENSDARG00000055548	5115
+ENSDARG00000006196	3835
+ENSDARG00000035559	3281
+ENSDARG00000015254	8601
+ENSDARG00000031203	9908
+ENSDARG00000012790	14778
+ENSDARG00000004017	8583
+ENSDARG00000074558	3683
+ENSDARG00000043077	13733
+ENSDARG00000098550	4556
+ENSDARG00000095224	5969
+ENSDARG00000045352	4803
+ENSDARG00000079659	9493
+ENSDARG00000104567	5098
+ENSDARG00000060169	8487
+ENSDARG00000104837	15706
+ENSDARG00000097827	8517
+ENSDARG00000012684	5525
+ENSDARG00000002912	14371
+ENSDARG00000104861	12106
+ENSDARG00000089292	11052
+ENSDARG00000054903	1097
+ENSDARG00000031681	9198
+ENSDARG00000027586	3839
+ENSDARG00000061216	9084
+ENSDARG00000051853	3428
+ENSDARG00000062192	13781
+ENSDARG00000076568	6265
+ENSDARG00000004774	5463
+ENSDARG00000033889	3833
+ENSDARG00000044092	5870
+ENSDARG00000010098	13674
+ENSDARG00000043635	9947
+ENSDARG00000076804	5325
+ENSDARG00000061363	7039
+ENSDARG00000039522	4253
+ENSDARG00000022218	6814
+ENSDARG00000040024	4194
+ENSDARG00000019897	3572
+ENSDARG00000058701	5106
+ENSDARG00000036875	3762
+ENSDARG00000103380	9352
+ENSDARG00000025391	3212
+ENSDARG00000101637	3824
+ENSDARG00000067656	2864
+ENSDARG00000089467	2928
+ENSDARG00000002494	9067
+ENSDARG00000102896	4563
+ENSDARG00000100279	4447
+ENSDARG00000095969	5974
+ENSDARG00000039130	2856
+ENSDARG00000077060	6138
+ENSDARG00000037815	5299
+ENSDARG00000074170	5991
+ENSDARG00000013063	4982
+ENSDARG00000035570	2532
+ENSDARG00000104710	10945
+ENSDARG00000070168	12705
+ENSDARG00000052728	4646
+ENSDARG00000025949	7324
+ENSDARG00000076379	4713
+ENSDARG00000032631	6641
+ENSDARG00000024324	12238
+ENSDARG00000010445	2030
+ENSDARG00000008235	2405
+ENSDARG00000044752	4377
+ENSDARG00000099996	19998
+ENSDARG00000067719	8015
+ENSDARG00000063437	7223
+ENSDARG00000088631	1836
+ENSDARG00000074849	8477
+ENSDARG00000041853	10888
+ENSDARG00000060002	7217
+ENSDARG00000042977	10000
+ENSDARG00000043105	4213
+ENSDARG00000013842	4742
+ENSDARG00000004870	9022
+ENSDARG00000019000	11768
+ENSDARG00000100710	8586
+ENSDARG00000077988	5539
+ENSDARG00000073985	2740
+ENSDARG00000028192	12781
+ENSDARG00000075881	7046
+ENSDARG00000100203	3507
+ENSDARG00000073933	7594
+ENSDARG00000070478	6618
+ENSDARG00000068415	5055
+ENSDARG00000032765	2550
+ENSDARG00000092550	9112
+ENSDARG00000015678	5184
+ENSDARG00000075463	5802
+ENSDARG00000075172	2813
+ENSDARG00000098883	2251
+ENSDARG00000075721	4428
+ENSDARG00000017058	7437
+ENSDARG00000096249	5481
+ENSDARG00000059234	9240
+ENSDARG00000070644	1838
+ENSDARG00000035630	6902
+ENSDARG00000029003	9484
+ENSDARG00000073737	5717
+ENSDARG00000094336	1190
+ENSDARG00000058679	4183
+ENSDARG00000069266	10601
+ENSDARG00000002571	2682
+ENSDARG00000103594	928
+ENSDARG00000041314	4275
+ENSDARG00000044490	7345
+ENSDARG00000075870	11038
+ENSDARG00000062646	7411
+ENSDARG00000053517	5740
+ENSDARG00000043334	8151
+ENSDARG00000076667	3222
+ENSDARG00000063375	3847
+ENSDARG00000104696	3177
+ENSDARG00000039125	3889
+ENSDARG00000078546	6524
+ENSDARG00000071060	2128
+ENSDARG00000077011	3434
+ENSDARG00000009953	7360
+ENSDARG00000038868	6297
+ENSDARG00000103610	7596
+ENSDARG00000041619	5566
+ENSDARG00000026109	6403
+ENSDARG00000003564	4830
+ENSDARG00000087333	9865
+ENSDARG00000099183	2558
+ENSDARG00000044524	7648
+ENSDARG00000041449	8251
+ENSDARG00000058285	3113
+ENSDARG00000059529	5731
+ENSDARG00000003251	9462
+ENSDARG00000008785	12496
+ENSDARG00000003022	6860
+ENSDARG00000101317	767
+ENSDARG00000013528	2476
+ENSDARG00000101333	5519
+ENSDARG00000053990	5614
+ENSDARG00000055792	17368
+ENSDARG00000013628	4304
+ENSDARG00000090941	6511
+ENSDARG00000096081	3096
+ENSDARG00000014274	5265
+ENSDARG00000059925	8714
+ENSDARG00000057698	4066
+ENSDARG00000073792	5938
+ENSDARG00000069808	8733
+ENSDARG00000071197	9277
+ENSDARG00000021735	2062
+ENSDARG00000052376	12054
+ENSDARG00000103235	19425
+ENSDARG00000060176	15732
+ENSDARG00000014106	4562
+ENSDARG00000039882	6357
+ENSDARG00000099771	18125
+ENSDARG00000073718	5999
+ENSDARG00000008377	14054
+ENSDARG00000068199	8305
+ENSDARG00000090770	9149
+ENSDARG00000038312	2805
+ENSDARG00000058287	11967
+ENSDARG00000004937	6400
+ENSDARG00000102417	3281
+ENSDARG00000012485	2414
+ENSDARG00000079878	9710
+ENSDARG00000096867	5557
+ENSDARG00000102082	3535
+ENSDARG00000045515	10949
+ENSDARG00000079723	12937
+ENSDARG00000093007	3846
+ENSDARG00000056783	7098
+ENSDARG00000057159	2999
+ENSDARG00000053571	3814
+ENSDARG00000102381	6394
+ENSDARG00000009436	5051
+ENSDARG00000075567	4394
+ENSDARG00000097650	4573
+ENSDARG00000015722	6845
+ENSDARG00000060372	3940
+ENSDARG00000037066	1761
+ENSDARG00000005163	3784
+ENSDARG00000018627	10344
+ENSDARG00000004771	7105
+ENSDARG00000002220	3131
+ENSDARG00000104388	844
+ENSDARG00000068912	4157
+ENSDARG00000015780	4913
+ENSDARG00000096989	6686
+ENSDARG00000019195	4215
+ENSDARG00000100742	4108
+ENSDARG00000012234	5848
+ENSDARG00000103472	3223
+ENSDARG00000043938	7756
+ENSDARG00000043209	1799
+ENSDARG00000036772	9115
+ENSDARG00000089236	6380
+ENSDARG00000042277	5639
+ENSDARG00000056740	3561
+ENSDARG00000024669	2033
+ENSDARG00000103892	13230
+ENSDARG00000042892	20839
+ENSDARG00000036235	5677
+ENSDARG00000010700	2648
+ENSDARG00000054804	1137
+ENSDARG00000031506	7044
+ENSDARG00000063726	9648
+ENSDARG00000019646	6498
+ENSDARG00000034753	7732
+ENSDARG00000055338	4177
+ENSDARG00000105098	8083
+ENSDARG00000062190	5174
+ENSDARG00000060380	11349
+ENSDARG00000015222	13442
+ENSDARG00000074779	5979
+ENSDARG00000102252	3982
+ENSDARG00000017154	3213
+ENSDARG00000100899	5296
+ENSDARG00000012314	1127
+ENSDARG00000102380	8676
+ENSDARG00000043404	16295
+ENSDARG00000000857	5705
+ENSDARG00000044642	6243
+ENSDARG00000098622	5089
+ENSDARG00000061472	6372
+ENSDARG00000007955	16942
+ENSDARG00000025094	2735
+ENSDARG00000011125	4890
+ENSDARG00000013006	2010
+ENSDARG00000033285	2837
+ENSDARG00000098695	4375
+ENSDARG00000035132	1717
+ENSDARG00000005897	20027
+ENSDARG00000008867	2919
+ENSDARG00000070452	2058
+ENSDARG00000017004	4225
+ENSDARG00000042799	2214
+ENSDARG00000063157	2030
+ENSDARG00000101849	3879
+ENSDARG00000086345	4230
+ENSDARG00000044298	2387
+ENSDARG00000103135	1322
+ENSDARG00000056862	6909
+ENSDARG00000044575	3414
+ENSDARG00000097964	2036
+ENSDARG00000088950	5399
+ENSDARG00000045853	5422
+ENSDARG00000032206	2688
+ENSDARG00000074756	2225
+ENSDARG00000019417	7102
+ENSDARG00000102118	11710
+ENSDARG00000077983	9307
+ENSDARG00000099740	3212
+ENSDARG00000020777	5152
+ENSDARG00000045415	4731
+ENSDARG00000045514	7847
+ENSDARG00000000001	6020
+ENSDARG00000103917	6106
+ENSDARG00000104516	5621
+ENSDARG00000018903	1712
+ENSDARG00000041431	3527
+ENSDARG00000056896	10842
+ENSDARG00000011703	9503
+ENSDARG00000061185	8168
+ENSDARG00000026448	7185
+ENSDARG00000077357	2996
+ENSDARG00000043417	15809
+ENSDARG00000052371	2990
+ENSDARG00000104288	6758
+ENSDARG00000102898	1538
+ENSDARG00000101258	3475
+ENSDARG00000069373	11861
+ENSDARG00000060109	4302
+ENSDARG00000025350	3059
+ENSDARG00000079499	2185
+ENSDARG00000057983	2469
+ENSDARG00000055708	4271
+ENSDARG00000099651	11762
+ENSDARG00000014366	10460
+ENSDARG00000061257	9337
+ENSDARG00000019791	7656
+ENSDARG00000100560	2327
+ENSDARG00000027381	9625
+ENSDARG00000026294	8159
+ENSDARG00000029955	6341
+ENSDARG00000039263	6944
+ENSDARG00000045257	8450
+ENSDARG00000101347	4030
+ENSDARG00000018623	13273
+ENSDARG00000005236	2190
+ENSDARG00000089856	7177
+ENSDARG00000040131	4180
+ENSDARG00000105046	3114
+ENSDARG00000023712	14580
+ENSDARG00000062485	4683
+ENSDARG00000045305	5099
+ENSDARG00000015495	4815
+ENSDARG00000011405	2194
+ENSDARG00000063197	19449
+ENSDARG00000100428	8157
+ENSDARG00000061600	1790
+ENSDARG00000078761	10250
+ENSDARG00000004840	5114
+ENSDARG00000099657	2038
+ENSDARG00000036911	4039
+ENSDARG00000071424	4853
+ENSDARG00000089930	2504
+ENSDARG00000013776	10632
+ENSDARG00000061294	6775
+ENSDARG00000012044	4062
+ENSDARG00000070239	6127
+ENSDARG00000003845	13793
+ENSDARG00000033443	3188
+ENSDARG00000075441	3266
+ENSDARG00000025667	7993
+ENSDARG00000069478	10422
+ENSDARG00000103826	8314
+ENSDARG00000073848	3576
+ENSDARG00000003869	10046
+ENSDARG00000040478	7565
+ENSDARG00000040505	6634
+ENSDARG00000071449	8506
+ENSDARG00000101947	10026
+ENSDARG00000032340	12492
+ENSDARG00000037229	3491
+ENSDARG00000027777	8402
+ENSDARG00000055903	5880
+ENSDARG00000075180	11836
+ENSDARG00000039901	8445
+ENSDARG00000061629	8442
+ENSDARG00000038585	3945
+ENSDARG00000029859	3936
+ENSDARG00000013871	5145
+ENSDARG00000053474	10321
+ENSDARG00000070675	4273
+ENSDARG00000076657	5202
+ENSDARG00000071570	11162
+ENSDARG00000068833	11503
+ENSDARG00000052331	1987
+ENSDARG00000021383	6024
+ENSDARG00000043705	2455
+ENSDARG00000069295	5916
+ENSDARG00000071551	8401
+ENSDARG00000003027	677
+ENSDARG00000058608	9615
+ENSDARG00000095826	3605
+ENSDARG00000097889	4644
+ENSDARG00000075914	9189
+ENSDARG00000070348	7743
+ENSDARG00000030824	4160
+ENSDARG00000030665	3971
+ENSDARG00000022652	3991
+ENSDARG00000096651	13023
+ENSDARG00000105288	6066
+ENSDARG00000062168	2607
+ENSDARG00000075444	1451
+ENSDARG00000003829	13963
+ENSDARG00000086107	3837
+ENSDARG00000062063	8676
+ENSDARG00000006621	10686
+ENSDARG00000091271	668
+ENSDARG00000060411	6912
+ENSDARG00000043137	2949
+ENSDARG00000029415	7316
+ENSDARG00000042877	2762
+ENSDARG00000045398	7995
+ENSDARG00000040237	7139
+ENSDARG00000098477	6721
+ENSDARG00000062418	11438
+ENSDARG00000060705	4812
+ENSDARG00000040874	5072
+ENSDARG00000033965	1711
+ENSDARG00000103720	2629
+ENSDARG00000058041	11578
+ENSDARG00000054343	3039
+ENSDARG00000059760	9897
+ENSDARG00000062707	4191
+ENSDARG00000055106	6541
+ENSDARG00000041565	1691
+ENSDARG00000007943	5302
+ENSDARG00000059794	7534
+ENSDARG00000089888	6348
+ENSDARG00000096110	3785
+ENSDARG00000063321	1166
+ENSDARG00000017673	3898
+ENSDARG00000041734	6607
+ENSDARG00000038557	6844
+ENSDARG00000053744	6031
+ENSDARG00000040314	9966
+ENSDARG00000021059	3195
+ENSDARG00000075670	5630
+ENSDARG00000043493	3014
+ENSDARG00000098813	11707
+ENSDARG00000032114	6174
+ENSDARG00000035890	6326
+ENSDARG00000100296	9835
+ENSDARG00000008413	4802
+ENSDARG00000100813	7638
+ENSDARG00000052739	6294
+ENSDARG00000006514	8385
+ENSDARG00000078434	6391
+ENSDARG00000003920	1309
+ENSDARG00000043247	3885
+ENSDARG00000090821	4737
+ENSDARG00000059870	5667
+ENSDARG00000023062	3977
+ENSDARG00000059406	7823
+ENSDARG00000059804	4973
+ENSDARG00000103296	4784
+ENSDARG00000006642	4915
+ENSDARG00000043046	5056
+ENSDARG00000106090	2991
+ENSDARG00000090996	2850
+ENSDARG00000077533	6127
+ENSDARG00000095879	1418
+ENSDARG00000060847	23408
+ENSDARG00000087402	12711
+ENSDARG00000052082	7716
+ENSDARG00000093622	6302
+ENSDARG00000028327	9237
+ENSDARG00000012144	5939
+ENSDARG00000042793	3727
+ENSDARG00000098105	5105
+ENSDARG00000101627	6055
+ENSDARG00000075608	397
+ENSDARG00000101800	6858
+ENSDARG00000079684	4722
+ENSDARG00000041665	7939
+ENSDARG00000027099	7524
+ENSDARG00000025549	5118
+ENSDARG00000025421	3193
+ENSDARG00000099385	5477
+ENSDARG00000102705	2478
+ENSDARG00000051800	3402
+ENSDARG00000056847	7621
+ENSDARG00000017929	2490
+ENSDARG00000039302	4009
+ENSDARG00000099943	1825
+ENSDARG00000040851	2769
+ENSDARG00000100788	7146
+ENSDARG00000045681	2839
+ENSDARG00000090543	6309
+ENSDARG00000103902	3149
+ENSDARG00000060215	11708
+ENSDARG00000045909	861
+ENSDARG00000055917	2191
+ENSDARG00000070426	5827
+ENSDARG00000076110	4980
+ENSDARG00000090002	1848
+ENSDARG00000023111	6716
+ENSDARG00000012125	3204
+ENSDARG00000007172	3265
+ENSDARG00000026178	680
+ENSDARG00000060445	6490
+ENSDARG00000018162	7777
+ENSDARG00000060661	3630
+ENSDARG00000013659	4736
+ENSDARG00000102744	3411
+ENSDARG00000071501	5593
+ENSDARG00000036090	6565
+ENSDARG00000045704	3313
+ENSDARG00000007221	10574
+ENSDARG00000041407	8668
+ENSDARG00000005356	11291
+ENSDARG00000102356	5023
+ENSDARG00000078479	2775
+ENSDARG00000029368	4290
+ENSDARG00000058992	8283
+ENSDARG00000027529	7190
+ENSDARG00000075369	5051
+ENSDARG00000063731	7422
+ENSDARG00000036542	2676
+ENSDARG00000014569	7375
+ENSDARG00000099298	5756
+ENSDARG00000091851	8467
+ENSDARG00000070698	3395
+ENSDARG00000088807	4319
+ENSDARG00000028725	4065
+ENSDARG00000039719	9199
+ENSDARG00000013732	1092
+ENSDARG00000037425	6768
+ENSDARG00000057151	15144
+ENSDARG00000092488	2085
+ENSDARG00000096454	6749
+ENSDARG00000070028	2866
+ENSDARG00000016710	1806
+ENSDARG00000075072	7179
+ENSDARG00000070867	11000
+ENSDARG00000037846	2326
+ENSDARG00000018600	15242
+ENSDARG00000013711	3301
+ENSDARG00000077081	3115
+ENSDARG00000067784	15701
+ENSDARG00000038731	4003
+ENSDARG00000035181	8260
+ENSDARG00000011240	4901
+ENSDARG00000075616	2423
+ENSDARG00000038442	7459
+ENSDARG00000002026	3070
+ENSDARG00000098618	6180
+ENSDARG00000069505	2082
+ENSDARG00000074892	12773
+ENSDARG00000055360	7068
+ENSDARG00000056797	5199
+ENSDARG00000017115	3193
+ENSDARG00000075533	7239
+ENSDARG00000091906	8184
+ENSDARG00000076501	2093
+ENSDARG00000017659	3157
+ENSDARG00000029660	4699
+ENSDARG00000004297	3420
+ENSDARG00000010280	2807
+ENSDARG00000022968	2165
+ENSDARG00000019033	7868
+ENSDARG00000015638	3285
+ENSDARG00000086550	3859
+ENSDARG00000007918	5611
+ENSDARG00000039677	2769
+ENSDARG00000073784	2745
+ENSDARG00000100862	6157
+ENSDARG00000003058	2889
+ENSDARG00000074644	6922
+ENSDARG00000013892	5034
+ENSDARG00000104348	3245
+ENSDARG00000053457	7776
+ENSDARG00000058351	4276
+ENSDARG00000034670	5499
+ENSDARG00000075169	1365
+ENSDARG00000078894	4153
+ENSDARG00000044090	7828
+ENSDARG00000092077	7775
+ENSDARG00000030694	2395
+ENSDARG00000016886	5872
+ENSDARG00000043339	8700
+ENSDARG00000103225	3074
+ENSDARG00000086411	2266
+ENSDARG00000104874	2861
+ENSDARG00000052747	8005
+ENSDARG00000075823	2917
+ENSDARG00000043843	3153
+ENSDARG00000077614	4371
+ENSDARG00000054220	8511
+ENSDARG00000009626	8938
+ENSDARG00000009023	3406
+ENSDARG00000045561	3929
+ENSDARG00000056559	8333
+ENSDARG00000017272	7464
+ENSDARG00000022788	3285
+ENSDARG00000018272	1753
+ENSDARG00000056473	594
+ENSDARG00000099766	3391
+ENSDARG00000002991	3520
+ENSDARG00000104431	3327
+ENSDARG00000062465	497
+ENSDARG00000054583	3030
+ENSDARG00000013802	2392
+ENSDARG00000057910	3804
+ENSDARG00000027984	4204
+ENSDARG00000045420	4427
+ENSDARG00000031359	8630
+ENSDARG00000056764	3765
+ENSDARG00000095594	4175
+ENSDARG00000076191	4849
+ENSDARG00000103251	10864
+ENSDARG00000039501	6145
+ENSDARG00000103589	7977
+ENSDARG00000051926	8640
+ENSDARG00000018681	5479
+ENSDARG00000014138	6138
+ENSDARG00000061462	15787
+ENSDARG00000102111	9423
+ENSDARG00000035819	1981
+ENSDARG00000076486	7968
+ENSDARG00000101799	3670
+ENSDARG00000008105	8787
+ENSDARG00000040387	4616
+ENSDARG00000060518	8331
+ENSDARG00000001244	1781
+ENSDARG00000005540	1155
+ENSDARG00000070055	5411
+ENSDARG00000073841	4224
+ENSDARG00000032117	3659
+ENSDARG00000087110	5459
+ENSDARG00000075641	2527
+ENSDARG00000001829	2989
+ENSDARG00000103786	15155
+ENSDARG00000058486	5559
+ENSDARG00000042272	6005
+ENSDARG00000000474	2047
+ENSDARG00000061635	5731
+ENSDARG00000095745	2551
+ENSDARG00000073801	6118
+ENSDARG00000028086	8075
+ENSDARG00000056583	9656
+ENSDARG00000033182	2331
+ENSDARG00000052703	6689
+ENSDARG00000051873	4234
+ENSDARG00000002267	8114
+ENSDARG00000091538	8260
+ENSDARG00000067713	853
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/getgo.danRer10.tab	Mon Oct 23 11:19:12 2017 -0400
@@ -0,0 +1,10 @@
+category	over_represented_pvalue	under_represented_pvalue	numDEInCat	numInCat	term	ontology	p.adjust.over_represented	p.adjust.under_represented
+GO:0031324	0.019289727841568	0.997004018824821	6	9	negative regulation of cellular metabolic process	BP	1	1
+GO:0040011	0.0219399815699082	0.993554925323586	10	19	locomotion	BP	1	1
+GO:0048738	0.0232122438335162	1	3	3	cardiac muscle tissue development	BP	1	1
+GO:0031101	0.0232122438335199	1	3	3	fin regeneration	BP	1	1
+GO:0042246	0.0232122438335199	1	3	3	tissue regeneration	BP	1	1
+GO:0007050	0.023212243833521	1	3	3	cell cycle arrest	BP	1	1
+GO:0019783	0.0254384360641003	0.998148600664743	4	5	ubiquitin-like protein-specific protease activity	MF	1	1
+GO:0036459	0.0254384360641003	0.998148600664743	4	5	thiol-dependent ubiquitinyl hydrolase activity	MF	1	1
+GO:0101005	0.0254384360641003	0.998148600664743	4	5	ubiquitinyl hydrolase activity	MF	1	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/getgo.hg38.tab	Mon Oct 23 11:19:12 2017 -0400
@@ -0,0 +1,10 @@
+category	over_represented_pvalue	under_represented_pvalue	numDEInCat	numInCat	term	ontology	p.adjust.over_represented	p.adjust.under_represented
+GO:0005576	4.72734295222294e-05	0.999979271555286	56	142	extracellular region	CC	0.329456825863645	1
+GO:0005840	0.000150633625443482	0.999987765310632	9	12	ribosome	CC	0.329456825863645	1
+GO:0044763	0.000210237360853053	0.999883100939053	148	473	single-organism cellular process	BP	0.329456825863645	1
+GO:0044699	0.000229197548055812	0.999873090122854	158	513	single-organism process	BP	0.329456825863645	1
+GO:0065010	0.000394294879818402	0.999824474827037	43	108	extracellular membrane-bounded organelle	CC	0.329456825863645	1
+GO:0070062	0.000394294879818402	0.999824474827037	43	108	extracellular exosome	CC	0.329456825863645	1
+GO:0008150	0.000409074003076654	0.999785179807024	191	656	biological_process	BP	0.329456825863645	1
+GO:0005488	0.000447980265756431	0.99975072864471	175	589	binding	MF	0.329456825863645	1
+GO:0005198	0.000511195682086445	0.999905085898726	13	21	structural molecule activity	MF	0.329456825863645	1
--- a/test-data/go_terms.tab	Sun Jun 11 08:57:39 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,85 +0,0 @@
-ENSG00000162526	GO:0000003
-ENSG00000162526	GO:0000166
-ENSG00000162526	GO:0000287
-ENSG00000162526	GO:0001882
-ENSG00000162526	GO:0001883
-ENSG00000162526	GO:0003674
-ENSG00000162526	GO:0003824
-ENSG00000162526	GO:0004672
-ENSG00000162526	GO:0004674
-ENSG00000162526	GO:0005488
-ENSG00000162526	GO:0005515
-ENSG00000162526	GO:0005524
-ENSG00000162526	GO:0005575
-ENSG00000162526	GO:0005622
-ENSG00000162526	GO:0005623
-ENSG00000162526	GO:0005737
-ENSG00000162526	GO:0006464
-ENSG00000162526	GO:0006468
-ENSG00000162526	GO:0006793
-ENSG00000162526	GO:0006796
-ENSG00000162526	GO:0007154
-ENSG00000162526	GO:0007165
-ENSG00000162526	GO:0007275
-ENSG00000162526	GO:0007276
-ENSG00000162526	GO:0007283
-ENSG00000162526	GO:0008150
-ENSG00000162526	GO:0008152
-ENSG00000162526	GO:0009987
-ENSG00000162526	GO:0016301
-ENSG00000162526	GO:0016310
-ENSG00000162526	GO:0016740
-ENSG00000162526	GO:0016772
-ENSG00000162526	GO:0016773
-ENSG00000162526	GO:0017076
-ENSG00000162526	GO:0019538
-ENSG00000162526	GO:0019953
-ENSG00000162526	GO:0022414
-ENSG00000162526	GO:0023052
-ENSG00000162526	GO:0030154
-ENSG00000162526	GO:0030554
-ENSG00000162526	GO:0032501
-ENSG00000162526	GO:0032502
-ENSG00000162526	GO:0032504
-ENSG00000162526	GO:0032549
-ENSG00000162526	GO:0032550
-ENSG00000162526	GO:0032553
-ENSG00000162526	GO:0032555
-ENSG00000162526	GO:0032559
-ENSG00000162526	GO:0035556
-ENSG00000162526	GO:0035639
-ENSG00000162526	GO:0036094
-ENSG00000162526	GO:0036211
-ENSG00000162526	GO:0043167
-ENSG00000162526	GO:0043168
-ENSG00000162526	GO:0043169
-ENSG00000162526	GO:0043170
-ENSG00000162526	GO:0043412
-ENSG00000162526	GO:0044237
-ENSG00000162526	GO:0044238
-ENSG00000162526	GO:0044260
-ENSG00000162526	GO:0044267
-ENSG00000162526	GO:0044424
-ENSG00000162526	GO:0044464
-ENSG00000162526	GO:0044699
-ENSG00000162526	GO:0044700
-ENSG00000162526	GO:0044702
-ENSG00000162526	GO:0044703
-ENSG00000162526	GO:0044707
-ENSG00000162526	GO:0044763
-ENSG00000162526	GO:0044767
-ENSG00000162526	GO:0046872
-ENSG00000162526	GO:0048232
-ENSG00000162526	GO:0048609
-ENSG00000162526	GO:0048869
-ENSG00000162526	GO:0050789
-ENSG00000162526	GO:0050794
-ENSG00000162526	GO:0050896
-ENSG00000162526	GO:0051704
-ENSG00000162526	GO:0051716
-ENSG00000162526	GO:0065007
-ENSG00000162526	GO:0071704
-ENSG00000162526	GO:0097159
-ENSG00000162526	GO:0097367
-ENSG00000162526	GO:1901265
-ENSG00000162526	GO:1901363
Binary file test-data/goseq_analysis.RData has changed
--- a/test-data/length.tab	Sun Jun 11 08:57:39 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-ENSG00000162526	103
Binary file test-data/length_bias_plot.pdf has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/nobias.tab	Mon Oct 23 11:19:12 2017 -0400
@@ -0,0 +1,3 @@
+category	over_represented_pvalue	under_represented_pvalue	numDEInCat	numInCat	term	ontology	p.adjust.over_represented	p.adjust.under_represented
+GO:0000278	0.0129827306163772	0.999244816412166	4	5	mitotic cell cycle	BP	0.0259654612327543	0.999244816412166
+GO:0000003	1	0.761	0	1	reproduction	BP	1	0.999244816412166
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/samp.tab	Mon Oct 23 11:19:12 2017 -0400
@@ -0,0 +1,3 @@
+category	over_represented_pvalue	under_represented_pvalue	numDEInCat	numInCat	term	ontology	p.adjust.over_represented	p.adjust.under_represented
+GO:0000278	0.016983016983017	1	4	5	mitotic cell cycle	BP	0.033966033966034	1
+GO:0000003	1	0.802197802197802	0	1	reproduction	BP	1	1
Binary file test-data/sample_vs_wallenius_plot.pdf has changed
--- a/test-data/wal.tab	Sun Jun 11 08:57:39 2017 -0400
+++ b/test-data/wal.tab	Mon Oct 23 11:19:12 2017 -0400
@@ -1,3 +1,3 @@
-category	over_represented_pvalue	under_represented_pvalue	numDEInCat	numInCat	term	ontology	p\.adjust.over_represented	p\.adjust.under_represented
-GO:0000278	0\.0122.+	0\.999.+	4	5	mitotic cell cycle	BP	0\.0245.+	0\.999.+
-GO:0000003	1	0\.796.+	0	1	reproduction	BP	1	0\.999.+
+category	over_represented_pvalue	under_represented_pvalue	numDEInCat	numInCat	term	ontology	p.adjust.over_represented	p.adjust.under_represented
+GO:0000278	0.0112350612534339	0.999376653834006	4	5	mitotic cell cycle	BP	0.0224701225068678	0.999376653834006
+GO:0000003	1	0.805913166914892	0	1	reproduction	BP	1	0.999376653834006