Repository 'goseq'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/goseq

Changeset 0:ade933eff007 (2016-11-17)
Next changeset 1:9d1256d9ef0b (2017-06-11)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/goseq commit b7dcd020c6a15fa55f392cc09cbc37580d6e75c4
added:
goseq.r
goseq.xml
test-data/category.tab
test-data/dge_list.tab
test-data/gc.tab
test-data/gene_length.tab
test-data/go_terms.tab
test-data/length.tab
test-data/wal.tab
b
diff -r 000000000000 -r ade933eff007 goseq.r
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/goseq.r Thu Nov 17 16:40:19 2016 -0500
[
@@ -0,0 +1,121 @@
+options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+suppressPackageStartupMessages({
+    library("goseq")
+    library("optparse")
+})
+
+option_list <- list(
+    make_option(c("-d", "--dge_file"), type="character", help="Path to file with differential gene expression result"),
+    make_option(c("-w","--wallenius_tab"), type="character", help="Path to output file with P-values estimated using wallenius distribution."),
+    make_option(c("-s","--sampling_tab"), type="character", default=FALSE, help="Path to output file with P-values estimated using wallenius distribution."),
+    make_option(c("-n","--nobias_tab"), type="character", default=FALSE, help="Path to output file with P-values estimated using wallenius distribution and no correction for gene length bias."),
+    make_option(c("-l","--length_bias_plot"), type="character", default=FALSE, help="Path to length-bias plot."),
+    make_option(c("-sw","--sample_vs_wallenius_plot"), type="character", default=FALSE, help="Path to plot comparing sampling with wallenius p-values."),
+    make_option(c("-r", "--repcnt"), type="integer", default=100, help="Number of repeats for sampling"),
+    make_option(c("-lf", "--length_file"), type="character", default="FALSE", help = "Path to tabular file mapping gene id to length"),
+    make_option(c("-cat_file", "--category_file"), default="FALSE", type="character", help = "Path to tabular file with gene_id <-> category mapping."),
+    make_option(c("-g", "--genome"), default=NULL, type="character", help = "Genome [used for looking up correct gene length]"),
+    make_option(c("-i", "--gene_id"), default=NULL, type="character", help = "Gene ID format of genes in DGE file"),
+    make_option(c("-p", "--p_adj_method"), default="BH", type="character", help="Multiple hypothesis testing correction method to use"),
+    make_option(c("-cat", "--use_genes_without_cat"), default=FALSE, type="logical",
+                help="A large number of gene may have no GO term annotated. If this option is set to FALSE, genes without category will be ignored in the calculation of p-values(default behaviour). If TRUE these genes will count towards the total number of genes outside the tested category (default behaviour prior to version 1.15.2)."),
+    make_option(c("-plots", "--make_plots"), default=FALSE, type="logical", help="produce diagnostic plots?")
+    )
+
+parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
+args = parse_args(parser)
+
+# Vars:
+dge_file = args$dge_file
+category_file = args$category_file
+length_file = args$length_file
+genome = args$genome
+gene_id = args$gene_id
+wallenius_tab = args$wallenius_tab
+sampling_tab = args$sampling_tab
+nobias_tab = args$nobias_tab
+length_bias_plot = args$length_bias_plot
+sample_vs_wallenius_plot = args$sample_vs_wallenius_plot
+repcnt = args$repcnt
+p_adj_method = args$p_adj_method
+use_genes_without_cat = args$use_genes_without_cat
+make_plots = args$make_plots
+
+# format DE genes into named vector suitable for goseq
+dge_table = read.delim(dge_file, header = FALSE, sep="\t")
+genes = as.numeric(as.logical(dge_table[,ncol(dge_table)])) # Last column contains TRUE/FALSE
+names(genes) = dge_table[,1] # Assuming first column contains gene names
+
+# gene lengths, assuming last column
+if (length_file != "FALSE" ) {
+  first_line = read.delim(dge_file, header = FALSE, nrow=1)
+  if (is.numeric(first_line[, ncol(first_line)])) {
+    length_table = read.delim(length_file, header=FALSE, sep="\t", check.names=FALSE)
+    } else {
+    length_table = read.delim(length_file, header=TRUE, sep="\t", check.names=FALSE)
+    }
+  row.names(length_table) = length_table[,1]
+  gene_lengths = length_table[names(genes),][,ncol(length_table)]
+  } else {
+  gene_lengths = getlength(names(genes), genome, gene_id)
+  }
+
+# Estimate PWF
+
+if (make_plots == TRUE) {
+  pdf(length_bias_plot)
+}
+pwf=nullp(genes, genome = genome, id = gene_id, bias.data = gene_lengths, plot.fit=make_plots)
+graphics.off()
+
+# Fetch GO annotations if category_file hasn't been supplied:
+if (category_file == "FALSE") {
+  go_map=getgo(genes = names(genes), genome = genome, id = gene_id, fetch.cats=c("GO:CC", "GO:BP", "GO:MF", "KEGG"))
+  } else {
+  # check for header: first entry in first column must be present in genes, else it's a header
+  first_line = read.delim(category_file, header = FALSE, nrow=1)
+  if (first_line[,1] %in% names(genes)) {
+     go_map = read.delim(category_file, header = FALSE)
+     } else {
+     go_map = read.delim(category_file, header= TRUE)
+    }
+}
+
+# wallenius approximation of p-values
+if (wallenius_tab != "" && wallenius_tab!="None") {
+  GO.wall=goseq(pwf, genome = genome, id = gene_id, use_genes_without_cat = use_genes_without_cat, gene2cat=go_map)
+  GO.wall$p.adjust.over_represented = p.adjust(GO.wall$over_represented_pvalue, method=p_adj_method)
+  GO.wall$p.adjust.under_represented = p.adjust(GO.wall$under_represented_pvalue, method=p_adj_method)
+  write.table(GO.wall, wallenius_tab, sep="\t", row.names = FALSE, quote = FALSE)
+}
+
+# hypergeometric (no length bias correction)
+if (nobias_tab != "" && nobias_tab != "None") {
+  GO.nobias=goseq(pwf, genome = genome, id = gene_id, method="Hypergeometric", use_genes_without_cat = use_genes_without_cat, gene2cat=go_map)
+  GO.nobias$p.adjust.over_represented = p.adjust(GO.nobias$over_represented_pvalue, method=p_adj_method)
+  GO.nobias$p.adjust.under_represented = p.adjust(GO.nobias$under_represented_pvalue, method=p_adj_method)
+  write.table(GO.nobias, nobias_tab, sep="\t", row.names = FALSE, quote = FALSE)
+}
+
+# Sampling distribution
+if (repcnt > 0) {
+  GO.samp=goseq(pwf, genome = genome, id = gene_id, method="Sampling", repcnt=repcnt, use_genes_without_cat = use_genes_without_cat, gene2cat=go_map)
+  GO.samp$p.adjust.over_represented = p.adjust(GO.samp$over_represented_pvalue, method=p_adj_method)
+  GO.samp$p.adjust.under_represented = p.adjust(GO.samp$under_represented_pvalue, method=p_adj_method)
+  write.table(GO.samp, sampling_tab, sep="\t", row.names = FALSE, quote = FALSE)
+  # Compare sampling with wallenius
+  if (make_plots == TRUE) {
+  pdf(sample_vs_wallenius_plot)
+  plot(log10(GO.wall[,2]), log10(GO.samp[match(GO.samp[,1],GO.wall[,1]),2]),
+     xlab="log10(Wallenius p-values)",ylab="log10(Sampling p-values)",
+     xlim=c(-3,0))
+     abline(0,1,col=3,lty=2)
+  graphics.off()
+  }
+}
+
+sessionInfo()
b
diff -r 000000000000 -r ade933eff007 goseq.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/goseq.xml Thu Nov 17 16:40:19 2016 -0500
[
b'@@ -0,0 +1,140 @@\n+<tool id="goseq" name="goseq" version="0.2.2">\n+    <description>tests for overrepresented gene categories</description>\n+    <requirements>\n+        <requirement type="package" version="1.3.2">r-optparse</requirement>\n+        <requirement type="package" version="1.22.0">bioconductor-goseq</requirement>\n+    </requirements>\n+    <stdio>\n+        <regex match="Execution halted"\n+               source="both"\n+               level="fatal"\n+               description="Execution halted." />\n+        <regex match="Error in"\n+               source="both"\n+               level="fatal"\n+               description="An undefined error occured, please check your input carefully and contact your administrator." />\n+        <regex match="Fatal error"\n+               source="both"\n+               level="fatal"\n+               description="An undefined error occured, please check your input carefully and contact your administrator." />\n+    </stdio>\n+    <command><![CDATA[\n+        Rscript \'$__tool_directory__\'/goseq.r --dge_file \'$dge_file\'\n+        --length_file \'$length_file\'\n+        --category_file \'$category_file\'\n+        #if $methods[\'wallenius\']:\n+        --wallenius_tab \'$wallenius_tab\'\n+        #end if\n+        #if $methods[\'hypergeometric\']:\n+        --nobias_tab \'nobias_tab\'\n+        #end if\n+        --repcnt \'$methods.repcnt\'\n+        --sampling_tab \'$sampling_tab\'\n+        --p_adj_method \'$p_adj_method\'\n+        --use_genes_without_cat \'$use_genes_without_cat\'\n+        --make_plots \'$make_plots\'\n+        --length_bias_plot \'$length_bias_plot\'\n+        --sample_vs_wallenius_plot \'$sample_vs_wallenius_plot\'\n+    ]]></command>\n+    <inputs>\n+        <param name="dge_file" help="A tabular file with gene names in the first column, and TRUE or FALSE in the last column. TRUE means a gene is differentially expressed. See help section for details." label="Differentially expressed gene file" type="data" format="tabular" />\n+        <param name="length_file" label="Gene length file for length bias correction" help="You can calculate the gene length using the get length and gc content tool" type="data" format="tabular" />\n+        <param name="category_file" label="Gene category file" help="You can obtain a mapping of gene id to gene ontology using the getgo tool" type="data" format="tabular" />\n+        <param name="use_genes_without_cat" help="For example, a large number of gene may have no GO term annotated. If this option is set to FALSE, those genes will be ignored in the calculation of p-values. If this option is set to TRUE, then these genes will count towards the total number of genes outside the category being tested"\n+               label="Count genes without any category?" type="boolean"/>\n+        <section name="methods" title="Method options" expanded="True">\n+            <param name="wallenius" type="boolean" checked="true" label="Use wallenius method" help="See help for details" />\n+            <param name="hypergeometric" type="boolean" checked="false" label="Use hypergeometric method" help="Does not use gene length information. See help for details" />\n+            <param name="repcnt" help="Draw this many random control gene sets. Set to 0 to not do sampling. Larger values take a long time" label="sampling depth" size="3" type="integer" min="0" max="10000" value="0" />\n+        </section>\n+        <param name="p_adj_method" type="select" label="Select a method for multiple hypothesis testing correction">\n+            <option value="BH" selected="true">Benjamini-Hochberg [FDR] (1995)</option>\n+            <option value="holm">Holm (1979)</option>\n+            <option value="hommel">Hommel (1988)</option>\n+            <option value="hochberg">Hochberg (1988)</option>\n+            <option value="bonferroni">Bonferroni</option>\n+            <option value="BY">Benjamini - Yekutieli (2001)</option>\n+        </param>\n+        <param help="These plots may help you compare the different p-value estimation meth'..b'nobias_tab" format="tabular" label="Ranked category list - no length bias correction">\n+            <filter>methods[\'hypergeometric\']</filter>\n+        </data>\n+        <data name="sampling_tab" format="tabular" label="Ranked category list - sampling">\n+            <filter>methods[\'repcnt\'] != 0</filter>\n+        </data>\n+        <data name="wallenius_tab" format="tabular" label="Ranked category list - wallenius method">\n+            <filter>methods[\'wallenius\']</filter>\n+        </data>\n+    </outputs>\n+    <tests>\n+        <test>\n+            <param name="dge_file" value="dge_list.tab" ftype="tabular"/>\n+            <param name="length_file" value="gene_length.tab" ftype="tabular"/>\n+            <param name="category_file" value="category.tab" ftype="tabular"/>\n+            <param name="use_genes_without_cat" value="true" />\n+            <output name="wallenius_tab" file="wal.tab" compare="re_match"/>\n+        </test>\n+    </tests>\n+    <help>\n+\n+        **What it does**\n+\n+        Detects Gene Ontology and/or other user defined categories which are over/under-represented in RNA-seq data.\n+\n+        Options map closely to the excellent manual_\n+\n+\n+        **Input files**\n+\n+        *DGE list:*\n+        goseq needs a tabular file with genes in the first column, and TRUE or FALSE in the last column.\n+        TRUE means the gene should count as differentially expressed, FALSE means it is not differentially expressed.\n+        You can use the "Compute an expression on every row" tool to create a TRUE / FALSE column for your dataset.\n+\n+        *Gene length file:*\n+        goseq needs information about the length of a gene to correct for potential length bias in differentially expressed genes\n+        using a prodbability weight function (PWF).\n+        The format of this file is tabular, with gene_id in the first column and length in the second column.\n+        The "get length and gc content" tool can produce such a file.\n+\n+        *Gene category file:*\n+        You will also need a file describing the membership of genes in categories. The format of this file is gene_id in the first column,\n+        category name in the second column. If you are interested in gene ontology categories you can use the getgo file to retrive\n+        gene ontologies for model organisms, or you can construct your own file.\n+\n+        **Method options**\n+\n+        3 methods, "Wallenius", "Sampling" and "Hypergeometric", can be used to calculate the p-values as follows.\n+\n+        *"Wallenius"* approximates the true distribution of numbers of members of a category amongst DE genes by the Wallenius non-central hypergeometric distribution.\n+        This distribution assumes that within a category all genes have the same probability of being chosen.\n+        Therefore, this approximation works best when the range in probabilities obtained by the probability weighting function is small.\n+\n+        *"Sampling"* uses random sampling to approximate the true distribution and uses it to calculate the p-values for over (and under) representation of categories.\n+        Although this is the most accurate method given a high enough value of sampling depth, its use quickly becomes computationally prohibitive.\n+\n+        *"Hypergeometric"* assumes there is no bias in power to detect differential expression at all and calculates the p-values using a standard hypergeometric distribution.\n+        Useful if you wish to test the effect of selection bias on your results.\n+\n+        CAUTION:  "Hypergeometric" should NEVER be used for producing results for biological interpretation.\n+        If there is genuinely no bias in power to detect DE in your experiment, the PWF will reflect this and the other methods will produce accuracte results.\n+\n+        .. _manual: https://bioconductor.org/packages/release/bioc/vignettes/goseq/inst/doc/goseq.pdf\n+\n+\n+    </help>\n+    <citations>\n+        <citation type="doi">10.1186/gb-2010-11-2-r14</citation>\n+    </citations>\n+</tool>\n'
b
diff -r 000000000000 -r ade933eff007 test-data/category.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/category.tab Thu Nov 17 16:40:19 2016 -0500
b
@@ -0,0 +1,6 @@
+ENSG00000162526 GO:0000003
+ENSG00000198648 GO:0000278
+ENSG00000112312 GO:0000278
+ENSG00000174442 GO:0000278
+ENSG00000108953 GO:0000278
+ENSG00000167842 GO:0000278
b
diff -r 000000000000 -r ade933eff007 test-data/dge_list.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dge_list.tab Thu Nov 17 16:40:19 2016 -0500
b
b'@@ -0,0 +1,1000 @@\n+ENSG00000140459\tFalse\n+ENSG00000236824\tFalse\n+ENSG00000162526\tFalse\n+ENSG00000090402\tTrue\n+ENSG00000169188\tFalse\n+ENSG00000124103\tFalse\n+ENSG00000241680\tFalse\n+ENSG00000089009\tFalse\n+ENSG00000196696\tFalse\n+ENSG00000006837\tFalse\n+ENSG00000112312\tTrue\n+ENSG00000240392\tFalse\n+ENSG00000010932\tFalse\n+ENSG00000167685\tFalse\n+ENSG00000237101\tFalse\n+ENSG00000244270\tFalse\n+ENSG00000104331\tTrue\n+ENSG00000242858\tTrue\n+ENSG00000148484\tFalse\n+ENSG00000057608\tFalse\n+ENSG00000182841\tFalse\n+ENSG00000230903\tFalse\n+ENSG00000106868\tFalse\n+ENSG00000214564\tFalse\n+ENSG00000169762\tFalse\n+ENSG00000168758\tFalse\n+ENSG00000182827\tFalse\n+ENSG00000125637\tTrue\n+ENSG00000118873\tFalse\n+ENSG00000186532\tFalse\n+ENSG00000106771\tFalse\n+ENSG00000204348\tFalse\n+ENSG00000197062\tFalse\n+ENSG00000171067\tFalse\n+ENSG00000228118\tFalse\n+ENSG00000213077\tTrue\n+ENSG00000188549\tTrue\n+ENSG00000135537\tFalse\n+ENSG00000173715\tFalse\n+ENSG00000227742\tTrue\n+ENSG00000148158\tTrue\n+ENSG00000134461\tFalse\n+ENSG00000174442\tFalse\n+ENSG00000198648\tTrue\n+ENSG00000228599\tFalse\n+ENSG00000167842\tTrue\n+ENSG00000108953\tTrue\n+ENSG00000185504\tTrue\n+ENSG00000178950\tFalse\n+ENSG00000160051\tFalse\n+ENSG00000130787\tFalse\n+ENSG00000154146\tFalse\n+ENSG00000204956\tFalse\n+ENSG00000231181\tFalse\n+ENSG00000149308\tFalse\n+ENSG00000151748\tFalse\n+ENSG00000141510\tFalse\n+ENSG00000105325\tFalse\n+ENSG00000173163\tFalse\n+ENSG00000064545\tTrue\n+ENSG00000236946\tFalse\n+ENSG00000104450\tTrue\n+ENSG00000174469\tFalse\n+ENSG00000010322\tTrue\n+ENSG00000126562\tFalse\n+ENSG00000143919\tFalse\n+ENSG00000236285\tFalse\n+ENSG00000160213\tFalse\n+ENSG00000185798\tFalse\n+ENSG00000073614\tFalse\n+ENSG00000138587\tFalse\n+ENSG00000090273\tFalse\n+ENSG00000123384\tFalse\n+ENSG00000070961\tTrue\n+ENSG00000159346\tFalse\n+ENSG00000141425\tFalse\n+ENSG00000072071\tFalse\n+ENSG00000057935\tFalse\n+ENSG00000226790\tFalse\n+ENSG00000117410\tFalse\n+ENSG00000100038\tTrue\n+ENSG00000169660\tFalse\n+ENSG00000227968\tFalse\n+ENSG00000220483\tFalse\n+ENSG00000186130\tFalse\n+ENSG00000018607\tFalse\n+ENSG00000142676\tTrue\n+ENSG00000226144\tFalse\n+ENSG00000233454\tFalse\n+ENSG00000230006\tFalse\n+ENSG00000225405\tFalse\n+ENSG00000206192\tFalse\n+ENSG00000141012\tFalse\n+ENSG00000133138\tFalse\n+ENSG00000240418\tTrue\n+ENSG00000230074\tFalse\n+ENSG00000106803\tFalse\n+ENSG00000134987\tFalse\n+ENSG00000179119\tFalse\n+ENSG00000130770\tFalse\n+ENSG00000213760\tFalse\n+ENSG00000167785\tFalse\n+ENSG00000152782\tFalse\n+ENSG00000103121\tTrue\n+ENSG00000167614\tFalse\n+ENSG00000145990\tFalse\n+ENSG00000233846\tFalse\n+ENSG00000137285\tFalse\n+ENSG00000186743\tFalse\n+ENSG00000130717\tFalse\n+ENSG00000235363\tFalse\n+ENSG00000152642\tFalse\n+ENSG00000239377\tFalse\n+ENSG00000231245\tFalse\n+ENSG00000188295\tFalse\n+ENSG00000228195\tFalse\n+ENSG00000158669\tFalse\n+ENSG00000198929\tFalse\n+ENSG00000112306\tTrue\n+ENSG00000134324\tFalse\n+ENSG00000143256\tFalse\n+ENSG00000175322\tFalse\n+ENSG00000110092\tTrue\n+ENSG00000175176\tTrue\n+ENSG00000176407\tTrue\n+ENSG00000156050\tFalse\n+ENSG00000223068\tTrue\n+ENSG00000115221\tFalse\n+ENSG00000070495\tFalse\n+ENSG00000151718\tFalse\n+ENSG00000198752\tFalse\n+ENSG00000196747\tFalse\n+ENSG00000204253\tTrue\n+ENSG00000176593\tFalse\n+ENSG00000106789\tTrue\n+ENSG00000100526\tTrue\n+ENSG00000004534\tTrue\n+ENSG00000158470\tFalse\n+ENSG00000213062\tFalse\n+ENSG00000151876\tFalse\n+ENSG00000213493\tFalse\n+ENSG00000083720\tTrue\n+ENSG00000155621\tTrue\n+ENSG00000164985\tFalse\n+ENSG00000109610\tFalse\n+ENSG00000183530\tFalse\n+ENSG00000137776\tTrue\n+ENSG00000184216\tFalse\n+ENSG00000156831\tFalse\n+ENSG00000213906\tFalse\n+ENSG00000105438\tFalse\n+ENSG00000175220\tFalse\n+ENSG00000170638\tFalse\n+ENSG00000227401\tFalse\n+ENSG00000164597\tTrue\n+ENSG00000011638\tTrue\n+ENSG00000135124\tFalse\n+ENSG00000105568\tTrue\n+ENSG00000178229\tFalse\n+ENSG00000126858\tFalse\n+ENSG00000018408\tFalse\n+ENSG00000143816\tFalse\n+ENSG00000124496\tTrue\n+ENSG00000136238\tFalse\n+ENSG00000131051\tTrue\n+ENSG00000235444\tFalse\n+ENSG00000114026\tTrue\n+ENSG00000187144\tFalse\n+ENSG00000165030\tTrue\n+ENSG00000183474\tFalse\n+ENSG00000213339\tFalse\n+ENSG00000109680\tFalse\n+ENSG00000080815\tTrue\n+ENSG00000108055\tFalse\n+ENSG00000133678\tTr'..b'0234639\tFalse\n+ENSG00000101294\tFalse\n+ENSG00000042445\tFalse\n+ENSG00000229044\tFalse\n+ENSG00000105355\tFalse\n+ENSG00000157637\tFalse\n+ENSG00000186056\tFalse\n+ENSG00000090020\tFalse\n+ENSG00000114120\tTrue\n+ENSG00000214975\tFalse\n+ENSG00000164209\tFalse\n+ENSG00000118096\tFalse\n+ENSG00000236163\tFalse\n+ENSG00000230243\tFalse\n+ENSG00000147669\tTrue\n+ENSG00000213904\tFalse\n+ENSG00000213880\tFalse\n+ENSG00000124181\tTrue\n+ENSG00000197697\tFalse\n+ENSG00000235698\tFalse\n+ENSG00000112531\tTrue\n+ENSG00000241772\tFalse\n+ENSG00000164308\tFalse\n+ENSG00000189143\tFalse\n+ENSG00000198865\tTrue\n+ENSG00000167642\tTrue\n+ENSG00000156973\tFalse\n+ENSG00000139233\tFalse\n+ENSG00000074211\tFalse\n+ENSG00000110906\tFalse\n+ENSG00000236570\tFalse\n+ENSG00000112365\tFalse\n+ENSG00000162444\tFalse\n+ENSG00000143970\tTrue\n+ENSG00000179918\tTrue\n+ENSG00000170619\tFalse\n+ENSG00000187172\tFalse\n+ENSG00000119669\tFalse\n+ENSG00000074054\tFalse\n+ENSG00000114416\tFalse\n+ENSG00000204805\tFalse\n+ENSG00000226637\tFalse\n+ENSG00000213092\tFalse\n+ENSG00000116209\tTrue\n+ENSG00000241923\tFalse\n+ENSG00000092208\tFalse\n+ENSG00000161692\tFalse\n+ENSG00000018699\tFalse\n+ENSG00000134765\tTrue\n+ENSG00000232943\tFalse\n+ENSG00000171055\tTrue\n+ENSG00000170412\tFalse\n+ENSG00000180884\tFalse\n+ENSG00000213066\tFalse\n+ENSG00000189343\tFalse\n+ENSG00000152377\tTrue\n+ENSG00000137642\tFalse\n+ENSG00000230406\tFalse\n+ENSG00000027697\tTrue\n+ENSG00000138685\tFalse\n+ENSG00000125691\tTrue\n+ENSG00000139192\tFalse\n+ENSG00000170142\tTrue\n+ENSG00000236735\tFalse\n+ENSG00000127226\tFalse\n+ENSG00000213033\tFalse\n+ENSG00000174483\tFalse\n+ENSG00000232676\tFalse\n+ENSG00000181359\tFalse\n+ENSG00000114735\tFalse\n+ENSG00000084073\tFalse\n+ENSG00000132879\tTrue\n+ENSG00000187504\tTrue\n+ENSG00000131100\tTrue\n+ENSG00000022556\tFalse\n+ENSG00000128590\tTrue\n+ENSG00000214029\tFalse\n+ENSG00000163322\tFalse\n+ENSG00000164332\tFalse\n+ENSG00000188529\tTrue\n+ENSG00000164930\tFalse\n+ENSG00000188863\tFalse\n+ENSG00000198746\tFalse\n+ENSG00000231434\tFalse\n+ENSG00000168916\tTrue\n+ENSG00000203837\tTrue\n+ENSG00000118507\tFalse\n+ENSG00000184319\tFalse\n+ENSG00000130349\tTrue\n+ENSG00000100478\tFalse\n+ENSG00000185189\tFalse\n+ENSG00000113621\tTrue\n+ENSG00000238251\tFalse\n+ENSG00000198250\tTrue\n+ENSG00000232905\tFalse\n+ENSG00000206560\tTrue\n+ENSG00000225374\tFalse\n+ENSG00000236330\tFalse\n+ENSG00000136048\tFalse\n+ENSG00000146574\tFalse\n+ENSG00000129518\tTrue\n+ENSG00000111652\tFalse\n+ENSG00000136709\tFalse\n+ENSG00000159259\tFalse\n+ENSG00000205628\tFalse\n+ENSG00000236739\tFalse\n+ENSG00000118680\tTrue\n+ENSG00000174799\tFalse\n+ENSG00000227376\tFalse\n+ENSG00000079215\tFalse\n+ENSG00000224628\tFalse\n+ENSG00000197121\tFalse\n+ENSG00000151690\tTrue\n+ENSG00000230531\tFalse\n+ENSG00000126226\tFalse\n+ENSG00000074071\tTrue\n+ENSG00000100577\tFalse\n+ENSG00000086289\tFalse\n+ENSG00000205208\tFalse\n+ENSG00000134326\tFalse\n+ENSG00000157423\tFalse\n+ENSG00000239569\tFalse\n+ENSG00000164241\tFalse\n+ENSG00000161040\tFalse\n+ENSG00000174705\tFalse\n+ENSG00000197888\tTrue\n+ENSG00000141448\tFalse\n+ENSG00000181690\tFalse\n+ENSG00000166788\tFalse\n+ENSG00000101473\tTrue\n+ENSG00000204396\tTrue\n+ENSG00000168826\tFalse\n+ENSG00000155729\tFalse\n+ENSG00000142082\tFalse\n+ENSG00000143727\tFalse\n+ENSG00000203995\tFalse\n+ENSG00000165055\tFalse\n+ENSG00000213971\tFalse\n+ENSG00000224453\tFalse\n+ENSG00000108960\tFalse\n+ENSG00000183506\tFalse\n+ENSG00000099139\tFalse\n+ENSG00000111331\tTrue\n+ENSG00000133226\tFalse\n+ENSG00000217801\tFalse\n+ENSG00000130227\tFalse\n+ENSG00000164144\tFalse\n+ENSG00000128731\tFalse\n+ENSG00000079785\tFalse\n+ENSG00000106948\tFalse\n+ENSG00000115107\tTrue\n+ENSG00000236696\tFalse\n+ENSG00000198482\tFalse\n+ENSG00000173681\tFalse\n+ENSG00000204164\tFalse\n+ENSG00000228612\tFalse\n+ENSG00000003096\tTrue\n+ENSG00000180881\tFalse\n+ENSG00000205084\tFalse\n+ENSG00000159173\tFalse\n+ENSG00000197535\tFalse\n+ENSG00000223382\tTrue\n+ENSG00000183309\tFalse\n+ENSG00000215099\tFalse\n+ENSG00000116685\tFalse\n+ENSG00000181472\tFalse\n+ENSG00000132274\tFalse\n+ENSG00000145494\tFalse\n+ENSG00000122644\tTrue\n+ENSG00000157107\tTrue\n+ENSG00000145354\tFalse\n+ENSG00000158467\tFalse\n+ENSG00000164934\tFalse\n+ENSG00000134986\tTrue\n+ENSG00000121988\tFalse\n+ENSG00000178996\tFalse\n'
b
diff -r 000000000000 -r ade933eff007 test-data/gc.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gc.tab Thu Nov 17 16:40:19 2016 -0500
b
@@ -0,0 +1,1 @@
+ENSG00000162526 0.388349514563107
b
diff -r 000000000000 -r ade933eff007 test-data/gene_length.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gene_length.tab Thu Nov 17 16:40:19 2016 -0500
b
b'@@ -0,0 +1,948 @@\n+ENSG00000003096\t6983\n+ENSG00000004534\t7302\n+ENSG00000006327\t1848\n+ENSG00000006831\t5878\n+ENSG00000006837\t3057\n+ENSG00000007392\t6176\n+ENSG00000008735\t5901\n+ENSG00000009844\t7262\n+ENSG00000010322\t9161\n+ENSG00000010932\t3602\n+ENSG00000011638\t2558\n+ENSG00000012983\t7620\n+ENSG00000013275\t2387\n+ENSG00000014216\t7553\n+ENSG00000018408\t8413\n+ENSG00000018607\t1774\n+ENSG00000018699\t3185\n+ENSG00000022556\t6313\n+ENSG00000023041\t3748\n+ENSG00000023330\t2536\n+ENSG00000023697\t2844\n+ENSG00000023892\t2545\n+ENSG00000027697\t2571\n+ENSG00000029363\t9297\n+ENSG00000032389\t5775\n+ENSG00000033050\t4448\n+ENSG00000035403\t9992\n+ENSG00000042445\t4005\n+ENSG00000049541\t2211\n+ENSG00000057608\t4033\n+ENSG00000057935\t10580\n+ENSG00000059122\t8419\n+ENSG00000059588\t7743\n+ENSG00000063015\t5115\n+ENSG00000063322\t3835\n+ENSG00000064545\t3281\n+ENSG00000065000\t8601\n+ENSG00000065060\t9908\n+ENSG00000066739\t14778\n+ENSG00000066923\t8583\n+ENSG00000068028\t3683\n+ENSG00000068650\t13733\n+ENSG00000069712\t4556\n+ENSG00000070495\t5969\n+ENSG00000070610\t4803\n+ENSG00000070961\t9493\n+ENSG00000071889\t5098\n+ENSG00000072071\t8487\n+ENSG00000072121\t15706\n+ENSG00000072134\t8517\n+ENSG00000072864\t5525\n+ENSG00000072958\t14371\n+ENSG00000073614\t12106\n+ENSG00000074054\t11052\n+ENSG00000074071\t1097\n+ENSG00000074211\t9198\n+ENSG00000074319\t3839\n+ENSG00000074621\t9084\n+ENSG00000075399\t3428\n+ENSG00000076356\t13781\n+ENSG00000079215\t6265\n+ENSG00000079246\t5463\n+ENSG00000079785\t3833\n+ENSG00000079974\t5870\n+ENSG00000080603\t13674\n+ENSG00000080815\t9947\n+ENSG00000081087\t5325\n+ENSG00000082068\t7039\n+ENSG00000083535\t4253\n+ENSG00000083544\t6814\n+ENSG00000083720\t4194\n+ENSG00000084073\t3572\n+ENSG00000085365\t5106\n+ENSG00000085377\t3762\n+ENSG00000085982\t9352\n+ENSG00000085999\t3212\n+ENSG00000086205\t3824\n+ENSG00000086289\t2864\n+ENSG00000087586\t2928\n+ENSG00000088340\t9067\n+ENSG00000088448\t4563\n+ENSG00000089009\t4447\n+ENSG00000090020\t5974\n+ENSG00000090273\t2856\n+ENSG00000090402\t6138\n+ENSG00000091140\t5299\n+ENSG00000092068\t5991\n+ENSG00000092098\t4982\n+ENSG00000092208\t2532\n+ENSG00000092445\t10945\n+ENSG00000099139\t12705\n+ENSG00000099910\t4646\n+ENSG00000100014\t7324\n+ENSG00000100027\t4713\n+ENSG00000100038\t6641\n+ENSG00000100106\t12238\n+ENSG00000100191\t2030\n+ENSG00000100292\t2405\n+ENSG00000100336\t4377\n+ENSG00000100354\t19998\n+ENSG00000100441\t8015\n+ENSG00000100478\t7223\n+ENSG00000100526\t1836\n+ENSG00000100577\t8477\n+ENSG00000100852\t10888\n+ENSG00000101247\t7217\n+ENSG00000101294\t10000\n+ENSG00000101473\t4213\n+ENSG00000102030\t4742\n+ENSG00000102349\t9022\n+ENSG00000102606\t11768\n+ENSG00000102804\t8586\n+ENSG00000102901\t5539\n+ENSG00000103035\t2740\n+ENSG00000103121\t12781\n+ENSG00000103932\t7046\n+ENSG00000104325\t3507\n+ENSG00000104331\t7594\n+ENSG00000104368\t6618\n+ENSG00000104450\t5055\n+ENSG00000105173\t2550\n+ENSG00000105220\t9112\n+ENSG00000105223\t5184\n+ENSG00000105325\t5802\n+ENSG00000105355\t2813\n+ENSG00000105438\t2251\n+ENSG00000105519\t4428\n+ENSG00000105568\t7437\n+ENSG00000105879\t5481\n+ENSG00000106012\t9240\n+ENSG00000106305\t1838\n+ENSG00000106683\t6902\n+ENSG00000106771\t9484\n+ENSG00000106789\t5717\n+ENSG00000106803\t1190\n+ENSG00000106868\t4183\n+ENSG00000106948\t10601\n+ENSG00000107295\t2682\n+ENSG00000107833\t928\n+ENSG00000108055\t4275\n+ENSG00000108091\t7345\n+ENSG00000108306\t11038\n+ENSG00000108591\t7411\n+ENSG00000108666\t5740\n+ENSG00000108848\t8151\n+ENSG00000108947\t3222\n+ENSG00000108953\t3847\n+ENSG00000108960\t3177\n+ENSG00000109079\t3889\n+ENSG00000109171\t6524\n+ENSG00000109610\t2128\n+ENSG00000109680\t3434\n+ENSG00000109771\t7360\n+ENSG00000109787\t6297\n+ENSG00000109920\t7596\n+ENSG00000109929\t5566\n+ENSG00000110002\t6403\n+ENSG00000110092\t4830\n+ENSG00000110906\t9865\n+ENSG00000111247\t2558\n+ENSG00000111249\t7648\n+ENSG00000111331\t8251\n+ENSG00000111652\t3113\n+ENSG00000111707\t5731\n+ENSG00000111860\t9462\n+ENSG00000111877\t12496\n+ENSG00000112062\t6860\n+ENSG00000112306\t767\n+ENSG00000112312\t2476\n+ENSG00000112365\t5519\n+ENSG00000112406\t5614\n+ENSG00000112531\t17368\n+ENSG00000112874\t4304\n+ENSG00000113048\t6511\n+ENSG00000113328\t3096\n+ENSG00000113621\t5265\n+ENSG00000113649\t8714\n+ENSG00000113812\t4066\n+ENSG00000113916\t5938\n+ENSG00000114026\t8733\n+E'..b'213148\t464\n+ENSG00000213174\t414\n+ENSG00000213197\t694\n+ENSG00000213318\t783\n+ENSG00000213339\t3430\n+ENSG00000213493\t1451\n+ENSG00000213588\t3014\n+ENSG00000213711\t814\n+ENSG00000213742\t5308\n+ENSG00000213760\t2147\n+ENSG00000213793\t551\n+ENSG00000213864\t676\n+ENSG00000213880\t797\n+ENSG00000213904\t4208\n+ENSG00000213906\t3233\n+ENSG00000213917\t815\n+ENSG00000213971\t5091\n+ENSG00000214029\t15455\n+ENSG00000214174\t3858\n+ENSG00000214389\t784\n+ENSG00000214617\t4479\n+ENSG00000214694\t5490\n+ENSG00000214810\t311\n+ENSG00000214961\t1372\n+ENSG00000214975\t499\n+ENSG00000215286\t754\n+ENSG00000215333\t1283\n+ENSG00000216854\t553\n+ENSG00000216915\t1495\n+ENSG00000217716\t494\n+ENSG00000217801\t2171\n+ENSG00000218965\t609\n+ENSG00000219553\t723\n+ENSG00000220131\t354\n+ENSG00000220157\t961\n+ENSG00000220483\t871\n+ENSG00000221843\t6199\n+ENSG00000221909\t2717\n+ENSG00000222046\t1869\n+ENSG00000223382\t1326\n+ENSG00000223620\t1102\n+ENSG00000223877\t622\n+ENSG00000224016\t291\n+ENSG00000224520\t1447\n+ENSG00000224578\t1377\n+ENSG00000224628\t1519\n+ENSG00000224664\t316\n+ENSG00000224892\t997\n+ENSG00000225405\t390\n+ENSG00000225544\t392\n+ENSG00000225787\t306\n+ENSG00000225806\t1521\n+ENSG00000226067\t2075\n+ENSG00000226086\t822\n+ENSG00000226114\t361\n+ENSG00000226144\t454\n+ENSG00000226232\t1728\n+ENSG00000226268\t959\n+ENSG00000226478\t1126\n+ENSG00000226703\t812\n+ENSG00000226752\t7181\n+ENSG00000226790\t1139\n+ENSG00000226833\t1438\n+ENSG00000227006\t861\n+ENSG00000227057\t3115\n+ENSG00000227343\t600\n+ENSG00000227376\t552\n+ENSG00000227401\t284\n+ENSG00000227543\t3835\n+ENSG00000227666\t316\n+ENSG00000227742\t946\n+ENSG00000227968\t999\n+ENSG00000228118\t459\n+ENSG00000228195\t881\n+ENSG00000228236\t315\n+ENSG00000228599\t742\n+ENSG00000228612\t2737\n+ENSG00000228981\t843\n+ENSG00000229044\t439\n+ENSG00000229344\t682\n+ENSG00000229503\t477\n+ENSG00000229956\t6794\n+ENSG00000230006\t8042\n+ENSG00000230022\t634\n+ENSG00000230074\t665\n+ENSG00000230118\t258\n+ENSG00000230146\t1176\n+ENSG00000230243\t319\n+ENSG00000230295\t351\n+ENSG00000230406\t421\n+ENSG00000230531\t1798\n+ENSG00000230551\t8636\n+ENSG00000230650\t3130\n+ENSG00000230667\t909\n+ENSG00000230863\t742\n+ENSG00000230869\t2418\n+ENSG00000230913\t744\n+ENSG00000231096\t390\n+ENSG00000231181\t559\n+ENSG00000231245\t402\n+ENSG00000231434\t2167\n+ENSG00000231615\t1337\n+ENSG00000231711\t4947\n+ENSG00000231955\t1411\n+ENSG00000232186\t1228\n+ENSG00000232581\t357\n+ENSG00000232676\t1124\n+ENSG00000232699\t736\n+ENSG00000232905\t946\n+ENSG00000232943\t400\n+ENSG00000233122\t2436\n+ENSG00000233454\t275\n+ENSG00000233503\t1501\n+ENSG00000233602\t619\n+ENSG00000233836\t3242\n+ENSG00000233846\t487\n+ENSG00000234231\t2095\n+ENSG00000234639\t1239\n+ENSG00000234722\t3487\n+ENSG00000234742\t555\n+ENSG00000234981\t792\n+ENSG00000235065\t475\n+ENSG00000235363\t225\n+ENSG00000235424\t288\n+ENSG00000235444\t618\n+ENSG00000235512\t292\n+ENSG00000235623\t574\n+ENSG00000235655\t411\n+ENSG00000235698\t1200\n+ENSG00000235750\t4783\n+ENSG00000235847\t965\n+ENSG00000235859\t1234\n+ENSG00000235892\t1677\n+ENSG00000236086\t262\n+ENSG00000236285\t837\n+ENSG00000236290\t703\n+ENSG00000236330\t886\n+ENSG00000236468\t1335\n+ENSG00000236570\t1227\n+ENSG00000236680\t1238\n+ENSG00000236681\t523\n+ENSG00000236735\t375\n+ENSG00000236739\t535\n+ENSG00000236753\t2715\n+ENSG00000236801\t474\n+ENSG00000236824\t13458\n+ENSG00000236946\t1087\n+ENSG00000237017\t4158\n+ENSG00000237033\t609\n+ENSG00000237054\t3194\n+ENSG00000237101\t1323\n+ENSG00000237357\t2579\n+ENSG00000237517\t7448\n+ENSG00000237939\t652\n+ENSG00000237977\t563\n+ENSG00000238221\t500\n+ENSG00000238251\t514\n+ENSG00000239377\t420\n+ENSG00000239524\t400\n+ENSG00000239569\t736\n+ENSG00000239791\t1918\n+ENSG00000239887\t4495\n+ENSG00000239926\t747\n+ENSG00000240005\t589\n+ENSG00000240392\t575\n+ENSG00000240418\t893\n+ENSG00000240540\t1183\n+ENSG00000240821\t579\n+ENSG00000241258\t3540\n+ENSG00000241370\t1606\n+ENSG00000241494\t438\n+ENSG00000241680\t375\n+ENSG00000241697\t2611\n+ENSG00000241772\t1051\n+ENSG00000241923\t622\n+ENSG00000242061\t438\n+ENSG00000242140\t231\n+ENSG00000242349\t1427\n+ENSG00000242600\t2616\n+ENSG00000242612\t4046\n+ENSG00000242858\t602\n+ENSG00000243122\t413\n+ENSG00000243396\t402\n+ENSG00000243701\t4206\n+ENSG00000243779\t321\n+ENSG00000244171\t1291\n+ENSG00000244270\t403\n'
b
diff -r 000000000000 -r ade933eff007 test-data/go_terms.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/go_terms.tab Thu Nov 17 16:40:19 2016 -0500
b
@@ -0,0 +1,85 @@
+ENSG00000162526 GO:0000003
+ENSG00000162526 GO:0000166
+ENSG00000162526 GO:0000287
+ENSG00000162526 GO:0001882
+ENSG00000162526 GO:0001883
+ENSG00000162526 GO:0003674
+ENSG00000162526 GO:0003824
+ENSG00000162526 GO:0004672
+ENSG00000162526 GO:0004674
+ENSG00000162526 GO:0005488
+ENSG00000162526 GO:0005515
+ENSG00000162526 GO:0005524
+ENSG00000162526 GO:0005575
+ENSG00000162526 GO:0005622
+ENSG00000162526 GO:0005623
+ENSG00000162526 GO:0005737
+ENSG00000162526 GO:0006464
+ENSG00000162526 GO:0006468
+ENSG00000162526 GO:0006793
+ENSG00000162526 GO:0006796
+ENSG00000162526 GO:0007154
+ENSG00000162526 GO:0007165
+ENSG00000162526 GO:0007275
+ENSG00000162526 GO:0007276
+ENSG00000162526 GO:0007283
+ENSG00000162526 GO:0008150
+ENSG00000162526 GO:0008152
+ENSG00000162526 GO:0009987
+ENSG00000162526 GO:0016301
+ENSG00000162526 GO:0016310
+ENSG00000162526 GO:0016740
+ENSG00000162526 GO:0016772
+ENSG00000162526 GO:0016773
+ENSG00000162526 GO:0017076
+ENSG00000162526 GO:0019538
+ENSG00000162526 GO:0019953
+ENSG00000162526 GO:0022414
+ENSG00000162526 GO:0023052
+ENSG00000162526 GO:0030154
+ENSG00000162526 GO:0030554
+ENSG00000162526 GO:0032501
+ENSG00000162526 GO:0032502
+ENSG00000162526 GO:0032504
+ENSG00000162526 GO:0032549
+ENSG00000162526 GO:0032550
+ENSG00000162526 GO:0032553
+ENSG00000162526 GO:0032555
+ENSG00000162526 GO:0032559
+ENSG00000162526 GO:0035556
+ENSG00000162526 GO:0035639
+ENSG00000162526 GO:0036094
+ENSG00000162526 GO:0036211
+ENSG00000162526 GO:0043167
+ENSG00000162526 GO:0043168
+ENSG00000162526 GO:0043169
+ENSG00000162526 GO:0043170
+ENSG00000162526 GO:0043412
+ENSG00000162526 GO:0044237
+ENSG00000162526 GO:0044238
+ENSG00000162526 GO:0044260
+ENSG00000162526 GO:0044267
+ENSG00000162526 GO:0044424
+ENSG00000162526 GO:0044464
+ENSG00000162526 GO:0044699
+ENSG00000162526 GO:0044700
+ENSG00000162526 GO:0044702
+ENSG00000162526 GO:0044703
+ENSG00000162526 GO:0044707
+ENSG00000162526 GO:0044763
+ENSG00000162526 GO:0044767
+ENSG00000162526 GO:0046872
+ENSG00000162526 GO:0048232
+ENSG00000162526 GO:0048609
+ENSG00000162526 GO:0048869
+ENSG00000162526 GO:0050789
+ENSG00000162526 GO:0050794
+ENSG00000162526 GO:0050896
+ENSG00000162526 GO:0051704
+ENSG00000162526 GO:0051716
+ENSG00000162526 GO:0065007
+ENSG00000162526 GO:0071704
+ENSG00000162526 GO:0097159
+ENSG00000162526 GO:0097367
+ENSG00000162526 GO:1901265
+ENSG00000162526 GO:1901363
b
diff -r 000000000000 -r ade933eff007 test-data/length.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/length.tab Thu Nov 17 16:40:19 2016 -0500
b
@@ -0,0 +1,1 @@
+ENSG00000162526 103
b
diff -r 000000000000 -r ade933eff007 test-data/wal.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/wal.tab Thu Nov 17 16:40:19 2016 -0500
b
@@ -0,0 +1,3 @@
+category over_represented_pvalue under_represented_pvalue numDEInCat numInCat term ontology p\.adjust.over_represented p\.adjust.under_represented
+GO:0000278 0\.0122.+ 0\.999.+ 4 5 mitotic cell cycle BP 0\.0245.+ 0\.999.+
+GO:0000003 1 0\.796.+ 0 1 reproduction BP 1 0\.999.+