Mercurial > repos > iuc > goseq
changeset 2:ab492df30cdf draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/goseq commit 4a3c9f195ba5d899b1a1ce5e80281cdf230f456a
author | iuc |
---|---|
date | Mon, 23 Oct 2017 11:19:12 -0400 |
parents | 9d1256d9ef0b |
children | 783e8b70b047 |
files | goseq.r goseq.xml test-data/dge_list_zf.tab test-data/gc.tab test-data/gene_length_zf.tab test-data/getgo.danRer10.tab test-data/getgo.hg38.tab test-data/go_terms.tab test-data/goseq_analysis.RData test-data/length.tab test-data/length_bias_plot.pdf test-data/nobias.tab test-data/samp.tab test-data/sample_vs_wallenius_plot.pdf test-data/wal.tab |
diffstat | 15 files changed, 1625 insertions(+), 180 deletions(-) [+] |
line wrap: on
line diff
--- a/goseq.r Sun Jun 11 08:57:39 2017 -0400 +++ b/goseq.r Mon Oct 23 11:19:12 2017 -0400 @@ -11,8 +11,8 @@ option_list <- list( make_option(c("-d", "--dge_file"), type="character", help="Path to file with differential gene expression result"), make_option(c("-w","--wallenius_tab"), type="character", help="Path to output file with P-values estimated using wallenius distribution."), - make_option(c("-s","--sampling_tab"), type="character", default=FALSE, help="Path to output file with P-values estimated using wallenius distribution."), - make_option(c("-n","--nobias_tab"), type="character", default=FALSE, help="Path to output file with P-values estimated using wallenius distribution and no correction for gene length bias."), + make_option(c("-s","--sampling_tab"), type="character", default=FALSE, help="Path to output file with P-values estimated using sampling distribution."), + make_option(c("-n","--nobias_tab"), type="character", default=FALSE, help="Path to output file with P-values estimated using hypergeometric distribution and no correction for gene length bias."), make_option(c("-l","--length_bias_plot"), type="character", default=FALSE, help="Path to length-bias plot."), make_option(c("-sw","--sample_vs_wallenius_plot"), type="character", default=FALSE, help="Path to plot comparing sampling with wallenius p-values."), make_option(c("-r", "--repcnt"), type="integer", default=100, help="Number of repeats for sampling"), @@ -23,7 +23,9 @@ make_option(c("-p", "--p_adj_method"), default="BH", type="character", help="Multiple hypothesis testing correction method to use"), make_option(c("-cat", "--use_genes_without_cat"), default=FALSE, type="logical", help="A large number of gene may have no GO term annotated. If this option is set to FALSE, genes without category will be ignored in the calculation of p-values(default behaviour). If TRUE these genes will count towards the total number of genes outside the tested category (default behaviour prior to version 1.15.2)."), - make_option(c("-plots", "--make_plots"), default=FALSE, type="logical", help="produce diagnostic plots?") + make_option(c("-plots", "--make_plots"), default=FALSE, type="logical", help="produce diagnostic plots?"), + make_option(c("-fc", "--fetch_cats"), default=NULL, type="character", help="Categories to get can include one or more of GO:CC, GO:BP, GO:MF, KEGG"), + make_option(c("-rd", "--rdata"), default=NULL, type="character", help="Path to RData output file.") ) parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) @@ -44,15 +46,27 @@ p_adj_method = args$p_adj_method use_genes_without_cat = args$use_genes_without_cat make_plots = args$make_plots +rdata = args$rdata + +if (!is.null(args$fetch_cats)) { + fetch_cats = unlist(strsplit(args$fetch_cats, ",")) +} # format DE genes into named vector suitable for goseq -dge_table = read.delim(dge_file, header = FALSE, sep="\t") +# check if header is present +first_line = read.delim(dge_file, header = FALSE, nrow=1) +second_col = toupper(first_line[, ncol(first_line)]) +if (second_col == TRUE || second_col == FALSE) { + dge_table = read.delim(dge_file, header = FALSE, sep="\t") +} else { + dge_table = read.delim(dge_file, header = TRUE, sep="\t") +} genes = as.numeric(as.logical(dge_table[,ncol(dge_table)])) # Last column contains TRUE/FALSE names(genes) = dge_table[,1] # Assuming first column contains gene names # gene lengths, assuming last column if (length_file != "FALSE" ) { - first_line = read.delim(dge_file, header = FALSE, nrow=1) + first_line = read.delim(length_file, header = FALSE, nrow=1) if (is.numeric(first_line[, ncol(first_line)])) { length_table = read.delim(length_file, header=FALSE, sep="\t", check.names=FALSE) } else { @@ -66,7 +80,7 @@ # Estimate PWF -if (make_plots == TRUE) { +if (make_plots != 'false') { pdf(length_bias_plot) } pwf=nullp(genes, genome = genome, id = gene_id, bias.data = gene_lengths, plot.fit=make_plots) @@ -74,7 +88,7 @@ # Fetch GO annotations if category_file hasn't been supplied: if (category_file == "FALSE") { - go_map=getgo(genes = names(genes), genome = genome, id = gene_id, fetch.cats=c("GO:CC", "GO:BP", "GO:MF", "KEGG")) + go_map=getgo(genes = names(genes), genome=genome, id=gene_id, fetch.cats=fetch_cats) } else { # check for header: first entry in first column must be present in genes, else it's a header first_line = read.delim(category_file, header = FALSE, nrow=1) @@ -103,7 +117,13 @@ # Sampling distribution if (repcnt > 0) { + + # capture the sampling progress so it doesn't fill stdout + zz <- file("/dev/null", open = "wt") + sink(zz) GO.samp=goseq(pwf, genome = genome, id = gene_id, method="Sampling", repcnt=repcnt, use_genes_without_cat = use_genes_without_cat, gene2cat=go_map) + sink() + GO.samp$p.adjust.over_represented = p.adjust(GO.samp$over_represented_pvalue, method=p_adj_method) GO.samp$p.adjust.under_represented = p.adjust(GO.samp$under_represented_pvalue, method=p_adj_method) write.table(GO.samp, sampling_tab, sep="\t", row.names = FALSE, quote = FALSE) @@ -118,4 +138,10 @@ } } +# Output RData file +if (!is.null(args$rdata)) { + save.image(file = "goseq_analysis.RData") +} + + sessionInfo()
--- a/goseq.xml Sun Jun 11 08:57:39 2017 -0400 +++ b/goseq.xml Mon Oct 23 11:19:12 2017 -0400 @@ -1,8 +1,12 @@ -<tool id="goseq" name="goseq" version="0.2.2"> +<tool id="goseq" name="goseq" version="1.26.0"> <description>tests for overrepresented gene categories</description> <requirements> <requirement type="package" version="1.3.2">r-optparse</requirement> - <requirement type="package" version="1.22.0">bioconductor-goseq</requirement> + <requirement type="package" version="1.26.0">bioconductor-goseq</requirement> + <requirement type="package" version="3.3.0">bioconductor-org.hs.eg.db</requirement> + <requirement type="package" version="3.4.0">bioconductor-org.dm.eg.db</requirement> + <requirement type="package" version="3.4.1">bioconductor-org.dr.eg.db</requirement> + <requirement type="package" version="3.4.0">bioconductor-org.mm.eg.db</requirement> </requirements> <stdio> <regex match="Execution halted" @@ -18,122 +22,334 @@ level="fatal" description="An undefined error occured, please check your input carefully and contact your administrator." /> </stdio> + <version_command><![CDATA[ +echo $(R --version | grep version | grep -v GNU)", goseq version" $(R --vanilla --slave -e "library(goseq); cat(sessionInfo()\$otherPkgs\$goseq\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Hs.eg.db version" $(R --vanilla --slave -e "library(org.Hs.eg.db); cat(sessionInfo()\$otherPkgs\$org.Hs.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Dr.eg.db version" $(R --vanilla --slave -e "library(org.Dr.eg.db); cat(sessionInfo()\$otherPkgs\$org.Dr.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Dm.eg.db version" $(R --vanilla --slave -e "library(org.Dm.eg.db); cat(sessionInfo()\$otherPkgs\$org.Dm.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Mm.eg.db version" $(R --vanilla --slave -e "library(org.Mm.eg.db); cat(sessionInfo()\$otherPkgs\$org.Mm.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ") + ]]></version_command> <command><![CDATA[ - Rscript '$__tool_directory__'/goseq.r --dge_file '$dge_file' - --length_file '$length_file' - --category_file '$category_file' - #if $methods['wallenius']: - --wallenius_tab '$wallenius_tab' - #end if - #if $methods['hypergeometric']: - --nobias_tab '$nobias_tab' - #end if - --repcnt '$methods.repcnt' - --sampling_tab '$sampling_tab' - --p_adj_method '$p_adj_method' - --use_genes_without_cat '$use_genes_without_cat' - --make_plots '$make_plots' - --length_bias_plot '$length_bias_plot' - --sample_vs_wallenius_plot '$sample_vs_wallenius_plot' +Rscript '$__tool_directory__/goseq.r' + +--dge_file '$dge_file' +--length_file '$length_file' + +#if $categorySource.catSource == 'getgo': + --genome $categorySource.genome + --gene_id $categorySource.gene_id + --fetch_cats '$categorySource.fetchcats' +#elif $categorySource.catSource == 'history': + --category_file '$categorySource.category_file' +#end if + +#if $methods['wallenius']: + --wallenius_tab '$wallenius_tab' +#end if +#if $methods['hypergeometric']: + --nobias_tab '$nobias_tab' +#end if +--repcnt '$methods.repcnt' +--sampling_tab '$sampling_tab' + +--make_plots '$out.make_plots' +--length_bias_plot '$length_bias_plot' +--sample_vs_wallenius_plot '$sample_vs_wallenius_plot' +--rdata '$out.rdata_out' + +--p_adj_method '$adv.p_adj_method' +--use_genes_without_cat '$adv.use_genes_without_cat' + ]]></command> + + <!-- Input Files--> <inputs> - <param name="dge_file" help="A tabular file with gene names in the first column, and TRUE or FALSE in the last column. TRUE means a gene is differentially expressed. See help section for details." label="Differentially expressed gene file" type="data" format="tabular" /> - <param name="length_file" label="Gene length file for length bias correction" help="You can calculate the gene length using the get length and gc content tool" type="data" format="tabular" /> - <param name="category_file" label="Gene category file" help="You can obtain a mapping of gene id to gene ontology using the getgo tool" type="data" format="tabular" /> - <param name="use_genes_without_cat" help="For example, a large number of gene may have no GO term annotated. If this option is set to FALSE, those genes will be ignored in the calculation of p-values. If this option is set to TRUE, then these genes will count towards the total number of genes outside the category being tested" - label="Count genes without any category?" type="boolean"/> - <section name="methods" title="Method options" expanded="True"> - <param name="wallenius" type="boolean" checked="true" label="Use wallenius method" help="See help for details" /> - <param name="hypergeometric" type="boolean" checked="false" label="Use hypergeometric method" help="Does not use gene length information. See help for details" /> - <param name="repcnt" help="Draw this many random control gene sets. Set to 0 to not do sampling. Larger values take a long time" label="sampling depth" size="3" type="integer" min="0" max="10000" value="0" /> + <param name="dge_file" type="data" format="tabular" label="Differentially expressed genes file" help="A tabular file with Gene IDs in the first column, and True or False in the second column. True means a gene is differentially expressed. See Help section for details."/> + <param name="length_file" type="data" format="tabular" label="Gene lengths file" help="You can calculate the gene lengths using featureCounts or the Gene length and GC content tool."/> + <conditional name="categorySource"> + <param name="catSource" type="select" format="tabular" label="Gene categories" help="You can obtain a mapping of genes to categories (for some genomes only) or you can provide your own category file."> + <option value="getgo" selected="true">Get categories</option> + <option value="history">Use a category file from history</option> + </param> + <when value="getgo"> + <param name="genome" type="select" label="Select a genome to use"> + <option value="hg38">Human (hg38)</option> + <option value="mm10">Mouse (mm10)</option> + <option value="dm6">Fruit fly (dm6)</option> + <option value="danRer10">Zebrafish (danRer10)</option> + </param> + <param name="gene_id" type="select" label="Select Gene ID format" help="Supported Gene IDs to automatically fetch categories should either be Entrez, Ensembl, or gene symbols."> + <option value="ensGene">Ensembl Gene ID</option> + <option value="knownGene">Entrez Gene ID</option> + <option value="geneSymbol">Gene Symbol</option> + </param> + <param name="fetchcats" type="select" multiple="True" display="checkboxes" label="Select one or more categories" help="By default, goseq tests all three major Gene Ontology branches; Cellular Component, Biological Process and Molecular Function. However, it is possible to limit testing to any combination and/or to also use KEGG pathways."> + <option value="GO:CC" selected="True">GO: Cellular Component</option> + <option value="GO:BP" selected="True">GO: Biological Process</option> + <option value="GO:MF" selected="True">GO: Molecular Function</option> + <option value="KEGG">KEGG</option> + </param> + </when> + <when value="history"> + <param name="category_file" type="data" format="tabular" label="Gene category file"/> + </when> + </conditional> + + <!-- Method Options --> + <section name="methods" title="Method Options"> + <param name="wallenius" type="boolean" checked="true" label="Use Wallenius method" help="See help for details. Default: Yes" /> + <param name="hypergeometric" type="boolean" checked="false" label="Use Hypergeometric method" help="Does not use gene length information. See help for details. Default: No" /> + <param name="repcnt" type="integer" size="3" min="0" max="10000" value="0" label="Sampling number" help="Number of random samples to be calculated when sampling is used. Set to 0 to not do sampling. Larger values take a long time. Default: 0" /> </section> - <param name="p_adj_method" type="select" label="Select a method for multiple hypothesis testing correction"> - <option value="BH" selected="true">Benjamini-Hochberg [FDR] (1995)</option> - <option value="holm">Holm (1979)</option> - <option value="hommel">Hommel (1988)</option> - <option value="hochberg">Hochberg (1988)</option> - <option value="bonferroni">Bonferroni</option> - <option value="BY">Benjamini - Yekutieli (2001)</option> - </param> - <param help="These plots may help you compare the different p-value estimation methods that goseq can use." label="Produce diagnostic plots?" name="make_plots" type="boolean"></param> + + <!-- Output Options --> + <section name="out" title="Output Options"> + <param name="make_plots" type="boolean" checked="false" label="Produce diagnostic plots?" help="This will produce the length bias (PWF) plot. If both sampling and wallenius methods are selected, it will also produce a plot comparing their p-values. These plots may help you compare the different p-value estimation methods that goseq can use. Default: No" /> + <param name="rdata_out" type="boolean" checked="false" label="Output RData file?" help="Output all the data used by R to construct the tables and plots, can be loaded into R. Default: No" /> + </section> + + <!-- Advanced Options --> + <section name="adv" title="Advanced Options"> + <param name="p_adj_method" type="select" label="Select a method for multiple hypothesis testing correction"> + <option value="BH" selected="True">Benjamini-Hochberg [FDR] (1995)</option> + <option value="holm">Holm (1979)</option> + <option value="hommel">Hommel (1988)</option> + <option value="hochberg">Hochberg (1988)</option> + <option value="bonferroni">Bonferroni</option> + <option value="BY">Benjamini - Yekutieli (2001)</option> + </param> + <param name="use_genes_without_cat" type="boolean" checked="false" label="Count genes without any category?" help="For example, a large number of genes may have no GO term annotated. If this option is set to No, those genes will be ignored in the calculation of p-values. If this option is set to Yes, then these genes will count towards the total number of genes outside the category being tested. This was the default behaviour for version 1.15.1 and earlier. Default: No"/> + </section> </inputs> + <outputs> - <data name="length_bias_plot" format="pdf" label="length bias plot"> - <filter>make_plots</filter> - <filter>methods['hypergeometric']</filter> + <data name="wallenius_tab" format="tabular" label="${tool.name} on ${on_string}: Ranked category list - Wallenius method"> + <filter>methods['wallenius]'</filter> </data> - <data name="sample_vs_wallenius_plot" format="pdf" label="Plot P-value from sampling against wallenius distribution"> - <filter>methods['repcnt'] != 0</filter> - <filter>methods['wallenius']</filter> - <filter>make_plots</filter> - </data> - <data name="nobias_tab" format="tabular" label="Ranked category list - no length bias correction"> - <filter>methods['hypergeometric']</filter> - </data> - <data name="sampling_tab" format="tabular" label="Ranked category list - sampling"> + <data name="sampling_tab" format="tabular" label="${tool.name} on ${on_string}: Ranked category list - Sampling method"> <filter>methods['repcnt'] != 0</filter> </data> - <data name="wallenius_tab" format="tabular" label="Ranked category list - wallenius method"> + <data name="nobias_tab" format="tabular" label="${tool.name} on ${on_string}: Ranked category list - Hypergeometric method"> + <filter>methods['hypergeometric']</filter> + </data> + <data name="length_bias_plot" format="pdf" label="${tool.name} on ${on_string}: Length bias plot"> + <filter>out['make_plots']</filter> + </data> + <data name="sample_vs_wallenius_plot" format="pdf" label="${tool.name} on ${on_string}: Sampling vs Wallenius P-values plot"> + <filter>methods['repcnt'] != 0</filter> <filter>methods['wallenius']</filter> + <filter>out['make_plots']</filter> + </data> + <data name="rdata" format="rdata" from_work_dir="goseq_analysis.RData" label="${tool.name} on ${on_string}: RData file"> + <filter>out['rdata_out']</filter> </data> </outputs> + <tests> - <test> + <!-- Ensure Wallenius table is output --> + <test expect_num_outputs="1"> + <param name="dge_file" value="dge_list.tab" ftype="tabular" /> + <param name="length_file" value="gene_length.tab" ftype="tabular" /> + <param name="catSource" value="history" /> + <param name="category_file" value="category.tab" ftype="tabular" /> + <param name="use_genes_without_cat" value="true" /> + <output name="wallenius_tab" file="wal.tab" compare="contains" /> + </test> + <!-- Ensure getting GO categories works --> + <test expect_num_outputs="1"> <param name="dge_file" value="dge_list.tab" ftype="tabular"/> <param name="length_file" value="gene_length.tab" ftype="tabular"/> - <param name="category_file" value="category.tab" ftype="tabular"/> + <param name="catSource" value="getgo" /> + <param name="genome" value="hg38" /> + <param name="gene_id" value="ensGene" /> + <param name="use_genes_without_cat" value="true" /> + <output name="wallenius_tab" ftype="tabular" file="getgo.hg38.tab" compare="contains"/> + </test> + <!-- Ensure getting GO categories for another genome (zebrafish) works --> + <test expect_num_outputs="1"> + <param name="dge_file" value="dge_list_zf.tab" ftype="tabular"/> + <param name="length_file" value="gene_length_zf.tab" ftype="tabular"/> + <param name="catSource" value="getgo" /> + <param name="genome" value="danRer10"/> + <param name="gene_id" value="ensGene" /> + <param name="use_genes_without_cat" value="true" /> + <output name="wallenius_tab" ftype="tabular" file="getgo.danRer10.tab" compare="contains"/> + </test> + <!-- Ensure length bias plot works --> + <test expect_num_outputs="2"> + <param name="dge_file" value="dge_list.tab" ftype="tabular" /> + <param name="length_file" value="gene_length.tab" ftype="tabular" /> + <param name="catSource" value="history" /> + <param name="category_file" value="category.tab" ftype="tabular" /> + <param name="make_plots" value="true" /> <param name="use_genes_without_cat" value="true" /> - <output name="wallenius_tab" file="wal.tab" compare="re_match"/> + <output name="length_bias_plot" ftype="pdf" file="length_bias_plot.pdf" compare="sim_size" /> + </test> + <!-- Ensure hypergeometric works --> + <test expect_num_outputs="2"> + <param name="dge_file" value="dge_list.tab" ftype="tabular" /> + <param name="length_file" value="gene_length.tab" ftype="tabular" /> + <param name="catSource" value="history" /> + <param name="category_file" value="category.tab" ftype="tabular" /> + <param name="use_genes_without_cat" value="true" /> + <param name="hypergeometric" value="true" /> + <output name="nobias_tab" file="nobias.tab" compare="contains" /> + </test> + <!-- Ensure sampling vs wallenius works --> + <test expect_num_outputs="4"> + <param name="dge_file" value="dge_list.tab" ftype="tabular" /> + <param name="length_file" value="gene_length.tab" ftype="tabular" /> + <param name="catSource" value="history" /> + <param name="category_file" value="category.tab" ftype="tabular" /> + <param name="use_genes_without_cat" value="true" /> + <param name="make_plots" value="true" /> + <param name="repcnt" value="1000" /> + <output name="sampling_tab" file="samp.tab" compare="sim_size" /> + <output name="length_bias_plot" ftype="pdf" file="length_bias_plot.pdf" compare="sim_size" /> + <output name="sample_vs_wallenius_plot" ftype="pdf" file="sample_vs_wallenius_plot.pdf" compare="sim_size" /> + </test> + <!-- Ensure RData output works --> + <test expect_num_outputs="2"> + <param name="dge_file" value="dge_list.tab" ftype="tabular" /> + <param name="length_file" value="gene_length.tab" ftype="tabular" /> + <param name="catSource" value="history" /> + <param name="category_file" value="category.tab" ftype="tabular" /> + <param name="use_genes_without_cat" value="true" /> + <param name="rdata_out" value="true" /> + <output name="rdata" file="goseq_analysis.RData" compare="sim_size" /> </test> </tests> - <help> + + <help><![CDATA[ + +.. class:: infomark + +**What it does** - **What it does** +`Gene Ontology`_ (GO) analysis is widely used to reduce complexity and highlight biological processes in genome-wide expression studies, but standard methods give biased results on RNA-seq data due to over-detection of differential expression for long and highly expressed transcripts. This tool provides methods for performing GO analysis of RNA-seq data, taking length bias into account. The methods and software used by goseq are equally applicable to other category based tests of RNA-seq data, such as KEGG_ pathway analysis. + +Options map closely to the excellent goseq manual_. + +----- + +**Inputs** - Detects Gene Ontology and/or other user defined categories which are over/under-represented in RNA-seq data. +*Differentially expressed genes file* + +goseq needs a tabular file containing information on differentially expressed genes. This should contain all genes assayed in the RNA-seq experiment. The file should have two columns with an optional header row. The first column should contain the Gene IDs, which must be unique within the file and not repeated. The second column should contain True or False. True means the gene should count as differentially expressed, False means it is not differentially expressed. You can use the "Compute an expression on every row" tool to create a True / False column for your dataset. + +Example: - Options map closely to the excellent manual_ + =============== ===== + ENSG00000236824 False + ENSG00000162526 False + ENSG00000090402 True + ENSG00000169188 False + ENSG00000124103 False + =============== ===== +*Gene lengths file* - **Input files** +goseq needs information about the length of a gene to correct for potential length bias in differentially expressed genes using a Probability Weight Function (PWF). The PWF can be thought of, as a function which gives the probability that a gene will be differentially expressed, based on its length alone. The gene length file should have two columns with an optional header row. The first column should contain the Gene IDs, and the second column should contain the gene length in bp. If length data is unavailable for some genes, that entry should be set to NA. The goseq authors recommend using the gene lengths obtained from upstream summarization programs, such as **featureCounts**, if provided. Alternatively, the **Gene length and GC content** tool can produce such a file. - *DGE list:* - goseq needs a tabular file with genes in the first column, and TRUE or FALSE in the last column. - TRUE means the gene should count as differentially expressed, FALSE means it is not differentially expressed. - You can use the "Compute an expression on every row" tool to create a TRUE / FALSE column for your dataset. +Example: + + =============== ===== + ENSG00000236824 13458 + ENSG00000162526 2191 + ENSG00000090402 6138 + ENSG00000169188 3245 + ENSG00000124103 1137 + =============== ===== + +*Gene categories file* + +This tool can get GO and KEGG categories for some genomes. The three GO categories are GO:MF (Molecular Function - molecular activities of gene products), GO:CC (Cellular Component - where gene products are active), GO:BP (Biological Process - pathways and larger processes made up of the activities of multiple gene products). If your genome is not available, you will also need a file describing the membership of genes in categories. The category file should have two columns with an optional header row. with Gene ID in the first column and category identifier in the second column. As the mapping between categories and genes is usually many-to-many, this table will usually have multiple rows with the same Gene ID and category identifier. + +Example: - *Gene length file:* - goseq needs information about the length of a gene to correct for potential length bias in differentially expressed genes - using a prodbability weight function (PWF). - The format of this file is tabular, with gene_id in the first column and length in the second column. - The "get length and gc content" tool can produce such a file. + =============== =========== + ENSG00000162526 GO\:0000003 + ENSG00000198648 GO\:0000278 + ENSG00000112312 GO\:0000278 + ENSG00000174442 GO\:0000278 + ENSG00000108953 GO\:0000278 + =============== =========== + +----- + +**Outputs** + +* This tool outputs a tabular file containing a ranked list of gene categories, similar to below. The default output is the Wallenius method table. If the Sampling and/or Hypergeometric methods are also selected, additional tables are produced. +* Optionally, this tool can also output some diagnostic plots and an RData file, see **Output Options** above. + +Example: - *Gene category file:* - You will also need a file describing the membership of genes in categories. The format of this file is gene_id in the first column, - category identifier in the second column. +=========== =============== ================ ============ ========== ======================================== ========== =================== ==================== +*category* *over_rep_pval* *under_rep_pval* *numDEInCat* *numInCat* *term* *ontology* *p.adjust.over_rep* *p.adjust.under_rep* +----------- --------------- ---------------- ------------ ---------- ---------------------------------------- ---------- ------------------- -------------------- +GO\:0005576 0.000054 0.999975 56 142 extracellular region CC 0.394825 1 +GO\:0005840 0.000143 0.999988 9 12 ribosome CC 0.394825 1 +GO\:0044763 0.000252 0.999858 148 473 single-organism cellular process BP 0.394825 1 +GO\:0044699 0.000279 0.999844 158 513 single-organism process BP 0.394825 1 +GO\:0065010 0.000428 0.999808 43 108 extracellular membrane-bounded organelle CC 0.394825 1 +GO\:0070062 0.000428 0.999808 43 108 extracellular exosome CC 0.394825 1 +=========== =============== ================ ============ ========== ======================================== ========== =================== ==================== - **Method options** +----- + +**Method options** - 3 methods, "Wallenius", "Sampling" and "Hypergeometric", can be used to calculate the p-values as follows. +3 methods, *Wallenius*, *Sampling* and *Hypergeometric*, can be used to calculate the p-values as follows. + +*Wallenius* + +approximates the true distribution of numbers of members of a category amongst DE genes by the Wallenius non-central hypergeometric distribution. +This distribution assumes that within a category all genes have the same probability of being chosen. Therefore, this approximation works best when the range in probabilities obtained by the probability weighting function is small. This is the method used by default. + +*Sampling* - *"Wallenius"* approximates the true distribution of numbers of members of a category amongst DE genes by the Wallenius non-central hypergeometric distribution. - This distribution assumes that within a category all genes have the same probability of being chosen. - Therefore, this approximation works best when the range in probabilities obtained by the probability weighting function is small. +uses random sampling to approximate the true distribution and uses it to calculate the p-values for over (and under) representation of categories. +Although this is the most accurate method given a high enough value of sampling number, its use quickly becomes computationally prohibitive. It may sometimes be desirable to use random sampling to generate the null distribution for category +membership. For example, to check consistency against results from the Wallenius approximation. This is easily accomplished by using the method option to additionally specify sampling and the number of samples to generate. + +*Hypergeometric* + +assumes there is no bias in power to detect differential expression at all and calculates the p-values using a standard hypergeometric distribution (no length bias correction is performed). Useful if you wish to test the effect of length bias on your results. +Caution: Hypergeometric should NEVER be used for producing results for biological interpretation of RNA-seq data. If length bias is truly not present in your data, goseq will produce a nearly flat PWF plot, no length bias correction will be applied to your data, and all methods will produce the same results. - *"Sampling"* uses random sampling to approximate the true distribution and uses it to calculate the p-values for over (and under) representation of categories. - Although this is the most accurate method given a high enough value of sampling depth, its use quickly becomes computationally prohibitive. +----- + +**More Information** - *"Hypergeometric"* assumes there is no bias in power to detect differential expression at all and calculates the p-values using a standard hypergeometric distribution. - Useful if you wish to test the effect of selection bias on your results. +In order to account for the length bias inherent to RNA-seq data when performing a GO analysis +(or other category based tests), one cannot simply use the hypergeometric distribution as the null +distribution for category membership, which is appropriate for data without DE length bias, such +as microarray data. GO analysis of RNA-seq data requires the use of random sampling in order +to generate a suitable null distribution for GO category membership and calculate each categories +significance for over representation amongst DE genes. + +However, this random sampling is computationally expensive. In most cases, the Wallenius +distribution can be used to approximate the true null distribution, without any significant loss in +accuracy. The goseq package implements this approximation as its default option. The option +to generate the null distribution using random sampling is also included as an option, but users +should be aware that the default number of samples generated will not be enough to accurately +call enrichment when there are a large number of go terms. - CAUTION: "Hypergeometric" should NEVER be used for producing results for biological interpretation. - If there is genuinely no bias in power to detect DE in your experiment, the PWF will reflect this and the other methods will produce accuracte results. +Having established a null distribution, each category is then tested for over and under +representation amongst the set of differentially expressed genes and the null is used to calculate a +p-value for under and over representation. + +Having performed a GO analysis, you may now wish to interpret the results. If you wish to +identify categories significantly enriched/unenriched below some p-value cutoff, it is necessary to +first apply some kind of multiple hypothesis testing correction. For example, you can identify GO categories over +enriched using a 0.05 FDR (p.adjust) cutoff [Benjamini and Hochberg, 1995]. - .. _manual: https://bioconductor.org/packages/release/bioc/vignettes/goseq/inst/doc/goseq.pdf +Unless you are a machine, GO and KEGG category identifiers are probably not very meaningful to you. +Information about each identifier can be obtained from the `Gene Ontology`_ and KEGG_ websites. +.. _manual: https://bioconductor.org/packages/release/bioc/vignettes/goseq/inst/doc/goseq.pdf +.. _Gene Ontology: http://www.geneontology.org +.. _KEGG: http://www.genome.jp/kegg - </help> + ]]></help> <citations> <citation type="doi">10.1186/gb-2010-11-2-r14</citation> </citations> -</tool> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dge_list_zf.tab Mon Oct 23 11:19:12 2017 -0400 @@ -0,0 +1,632 @@ +ENSDARG00000092696 FALSE +ENSDARG00000104569 TRUE +ENSDARG00000008472 FALSE +ENSDARG00000058451 FALSE +ENSDARG00000035957 TRUE +ENSDARG00000043514 FALSE +ENSDARG00000058114 TRUE +ENSDARG00000102885 FALSE +ENSDARG00000005451 FALSE +ENSDARG00000058839 FALSE +ENSDARG00000073999 FALSE +ENSDARG00000079611 FALSE +ENSDARG00000042623 FALSE +ENSDARG00000044136 FALSE +ENSDARG00000060983 FALSE +ENSDARG00000108060 FALSE +ENSDARG00000036852 FALSE +ENSDARG00000089303 FALSE +ENSDARG00000090013 TRUE +ENSDARG00000042902 FALSE +ENSDARG00000075203 FALSE +ENSDARG00000069601 TRUE +ENSDARG00000003822 FALSE +ENSDARG00000057100 FALSE +ENSDARG00000088508 TRUE +ENSDARG00000026454 TRUE +ENSDARG00000006399 TRUE +ENSDARG00000079457 TRUE +ENSDARG00000090654 FALSE +ENSDARG00000092483 FALSE +ENSDARG00000060627 FALSE +ENSDARG00000039626 FALSE +ENSDARG00000088475 FALSE +ENSDARG00000055548 FALSE +ENSDARG00000006196 FALSE +ENSDARG00000035559 FALSE +ENSDARG00000015254 FALSE +ENSDARG00000031203 FALSE +ENSDARG00000012790 TRUE +ENSDARG00000004017 TRUE +ENSDARG00000074558 FALSE +ENSDARG00000043077 TRUE +ENSDARG00000098550 FALSE +ENSDARG00000095224 FALSE +ENSDARG00000045352 FALSE +ENSDARG00000079659 FALSE +ENSDARG00000104567 FALSE +ENSDARG00000060169 FALSE +ENSDARG00000104837 FALSE +ENSDARG00000097827 FALSE +ENSDARG00000012684 TRUE +ENSDARG00000002912 FALSE +ENSDARG00000104861 FALSE +ENSDARG00000089292 FALSE +ENSDARG00000054903 FALSE +ENSDARG00000031681 FALSE +ENSDARG00000027586 TRUE +ENSDARG00000061216 FALSE +ENSDARG00000051853 FALSE +ENSDARG00000062192 FALSE +ENSDARG00000076568 FALSE +ENSDARG00000004774 FALSE +ENSDARG00000033889 FALSE +ENSDARG00000044092 FALSE +ENSDARG00000010098 FALSE +ENSDARG00000043635 TRUE +ENSDARG00000076804 FALSE +ENSDARG00000061363 FALSE +ENSDARG00000039522 FALSE +ENSDARG00000022218 FALSE +ENSDARG00000040024 FALSE +ENSDARG00000019897 FALSE +ENSDARG00000058701 FALSE +ENSDARG00000036875 TRUE +ENSDARG00000103380 FALSE +ENSDARG00000025391 FALSE +ENSDARG00000101637 TRUE +ENSDARG00000067656 TRUE +ENSDARG00000089467 FALSE +ENSDARG00000002494 FALSE +ENSDARG00000102896 FALSE +ENSDARG00000100279 FALSE +ENSDARG00000095969 TRUE +ENSDARG00000039130 TRUE +ENSDARG00000077060 TRUE +ENSDARG00000037815 FALSE +ENSDARG00000074170 FALSE +ENSDARG00000013063 TRUE +ENSDARG00000035570 TRUE +ENSDARG00000104710 FALSE +ENSDARG00000070168 FALSE +ENSDARG00000052728 TRUE +ENSDARG00000025949 FALSE +ENSDARG00000076379 FALSE +ENSDARG00000032631 FALSE +ENSDARG00000024324 FALSE +ENSDARG00000010445 FALSE +ENSDARG00000008235 TRUE +ENSDARG00000044752 FALSE +ENSDARG00000099996 FALSE +ENSDARG00000067719 FALSE +ENSDARG00000063437 FALSE +ENSDARG00000088631 TRUE +ENSDARG00000074849 FALSE +ENSDARG00000041853 TRUE +ENSDARG00000060002 TRUE +ENSDARG00000042977 TRUE +ENSDARG00000043105 FALSE +ENSDARG00000013842 FALSE +ENSDARG00000004870 TRUE +ENSDARG00000019000 FALSE +ENSDARG00000100710 TRUE +ENSDARG00000077988 FALSE +ENSDARG00000073985 FALSE +ENSDARG00000028192 FALSE +ENSDARG00000075881 FALSE +ENSDARG00000100203 FALSE +ENSDARG00000073933 FALSE +ENSDARG00000070478 FALSE +ENSDARG00000068415 FALSE +ENSDARG00000032765 TRUE +ENSDARG00000092550 TRUE +ENSDARG00000015678 FALSE +ENSDARG00000075463 FALSE +ENSDARG00000075172 TRUE +ENSDARG00000098883 FALSE +ENSDARG00000075721 FALSE +ENSDARG00000017058 FALSE +ENSDARG00000096249 FALSE +ENSDARG00000059234 TRUE +ENSDARG00000070644 FALSE +ENSDARG00000035630 FALSE +ENSDARG00000029003 FALSE +ENSDARG00000073737 FALSE +ENSDARG00000094336 FALSE +ENSDARG00000058679 FALSE +ENSDARG00000069266 FALSE +ENSDARG00000002571 FALSE +ENSDARG00000103594 TRUE +ENSDARG00000041314 FALSE +ENSDARG00000044490 FALSE +ENSDARG00000075870 FALSE +ENSDARG00000062646 FALSE +ENSDARG00000053517 TRUE +ENSDARG00000043334 FALSE +ENSDARG00000076667 FALSE +ENSDARG00000063375 TRUE +ENSDARG00000104696 FALSE +ENSDARG00000039125 FALSE +ENSDARG00000078546 FALSE +ENSDARG00000071060 TRUE +ENSDARG00000077011 TRUE +ENSDARG00000009953 FALSE +ENSDARG00000038868 FALSE +ENSDARG00000103610 FALSE +ENSDARG00000041619 TRUE +ENSDARG00000026109 FALSE +ENSDARG00000003564 FALSE +ENSDARG00000087333 TRUE +ENSDARG00000099183 TRUE +ENSDARG00000044524 FALSE +ENSDARG00000041449 FALSE +ENSDARG00000058285 FALSE +ENSDARG00000059529 TRUE +ENSDARG00000003251 FALSE +ENSDARG00000008785 FALSE +ENSDARG00000003022 FALSE +ENSDARG00000101317 FALSE +ENSDARG00000013528 FALSE +ENSDARG00000101333 FALSE +ENSDARG00000053990 TRUE +ENSDARG00000055792 FALSE +ENSDARG00000013628 TRUE +ENSDARG00000090941 TRUE +ENSDARG00000096081 FALSE +ENSDARG00000014274 FALSE +ENSDARG00000059925 TRUE +ENSDARG00000057698 FALSE +ENSDARG00000073792 FALSE +ENSDARG00000069808 FALSE +ENSDARG00000071197 FALSE +ENSDARG00000021735 TRUE +ENSDARG00000052376 FALSE +ENSDARG00000103235 FALSE +ENSDARG00000060176 FALSE +ENSDARG00000014106 FALSE +ENSDARG00000039882 FALSE +ENSDARG00000099771 TRUE +ENSDARG00000073718 FALSE +ENSDARG00000008377 TRUE +ENSDARG00000068199 FALSE +ENSDARG00000090770 FALSE +ENSDARG00000038312 TRUE +ENSDARG00000058287 FALSE +ENSDARG00000004937 TRUE +ENSDARG00000102417 TRUE +ENSDARG00000012485 FALSE +ENSDARG00000079878 FALSE +ENSDARG00000096867 TRUE +ENSDARG00000102082 FALSE +ENSDARG00000045515 TRUE +ENSDARG00000079723 TRUE +ENSDARG00000093007 FALSE +ENSDARG00000056783 FALSE +ENSDARG00000057159 FALSE +ENSDARG00000053571 FALSE +ENSDARG00000102381 TRUE +ENSDARG00000009436 FALSE +ENSDARG00000075567 FALSE +ENSDARG00000097650 FALSE +ENSDARG00000015722 FALSE +ENSDARG00000060372 TRUE +ENSDARG00000037066 FALSE +ENSDARG00000005163 TRUE +ENSDARG00000018627 TRUE +ENSDARG00000004771 FALSE +ENSDARG00000002220 FALSE +ENSDARG00000104388 TRUE +ENSDARG00000068912 TRUE +ENSDARG00000015780 FALSE +ENSDARG00000096989 FALSE +ENSDARG00000019195 FALSE +ENSDARG00000100742 TRUE +ENSDARG00000012234 FALSE +ENSDARG00000103472 FALSE +ENSDARG00000043938 FALSE +ENSDARG00000043209 FALSE +ENSDARG00000036772 FALSE +ENSDARG00000089236 FALSE +ENSDARG00000042277 FALSE +ENSDARG00000056740 FALSE +ENSDARG00000024669 FALSE +ENSDARG00000103892 FALSE +ENSDARG00000042892 FALSE +ENSDARG00000036235 FALSE +ENSDARG00000010700 FALSE +ENSDARG00000054804 TRUE +ENSDARG00000031506 FALSE +ENSDARG00000063726 FALSE +ENSDARG00000019646 FALSE +ENSDARG00000034753 TRUE +ENSDARG00000055338 FALSE +ENSDARG00000105098 FALSE +ENSDARG00000062190 FALSE +ENSDARG00000060380 TRUE +ENSDARG00000015222 TRUE +ENSDARG00000074779 FALSE +ENSDARG00000102252 TRUE +ENSDARG00000017154 FALSE +ENSDARG00000100899 TRUE +ENSDARG00000012314 TRUE +ENSDARG00000102380 FALSE +ENSDARG00000043404 TRUE +ENSDARG00000000857 FALSE +ENSDARG00000044642 TRUE +ENSDARG00000098622 FALSE +ENSDARG00000061472 FALSE +ENSDARG00000007955 TRUE +ENSDARG00000025094 FALSE +ENSDARG00000011125 FALSE +ENSDARG00000013006 TRUE +ENSDARG00000033285 FALSE +ENSDARG00000098695 TRUE +ENSDARG00000035132 FALSE +ENSDARG00000005897 FALSE +ENSDARG00000008867 TRUE +ENSDARG00000070452 TRUE +ENSDARG00000017004 FALSE +ENSDARG00000042799 FALSE +ENSDARG00000063157 FALSE +ENSDARG00000101849 FALSE +ENSDARG00000086345 FALSE +ENSDARG00000044298 FALSE +ENSDARG00000103135 FALSE +ENSDARG00000056862 FALSE +ENSDARG00000044575 FALSE +ENSDARG00000097964 FALSE +ENSDARG00000088950 FALSE +ENSDARG00000045853 FALSE +ENSDARG00000032206 FALSE +ENSDARG00000074756 FALSE +ENSDARG00000019417 TRUE +ENSDARG00000102118 TRUE +ENSDARG00000077983 FALSE +ENSDARG00000099740 FALSE +ENSDARG00000020777 FALSE +ENSDARG00000045415 FALSE +ENSDARG00000045514 FALSE +ENSDARG00000000001 FALSE +ENSDARG00000103917 FALSE +ENSDARG00000104516 FALSE +ENSDARG00000018903 FALSE +ENSDARG00000041431 FALSE +ENSDARG00000056896 FALSE +ENSDARG00000011703 FALSE +ENSDARG00000061185 FALSE +ENSDARG00000026448 TRUE +ENSDARG00000077357 FALSE +ENSDARG00000043417 FALSE +ENSDARG00000052371 FALSE +ENSDARG00000104288 TRUE +ENSDARG00000102898 FALSE +ENSDARG00000101258 TRUE +ENSDARG00000069373 TRUE +ENSDARG00000060109 TRUE +ENSDARG00000025350 TRUE +ENSDARG00000079499 TRUE +ENSDARG00000057983 FALSE +ENSDARG00000055708 FALSE +ENSDARG00000099651 FALSE +ENSDARG00000014366 TRUE +ENSDARG00000061257 FALSE +ENSDARG00000019791 TRUE +ENSDARG00000100560 FALSE +ENSDARG00000027381 FALSE +ENSDARG00000026294 TRUE +ENSDARG00000029955 FALSE +ENSDARG00000039263 FALSE +ENSDARG00000045257 FALSE +ENSDARG00000101347 FALSE +ENSDARG00000018623 TRUE +ENSDARG00000005236 FALSE +ENSDARG00000089856 TRUE +ENSDARG00000040131 FALSE +ENSDARG00000105046 FALSE +ENSDARG00000023712 FALSE +ENSDARG00000062485 TRUE +ENSDARG00000045305 FALSE +ENSDARG00000015495 TRUE +ENSDARG00000011405 FALSE +ENSDARG00000063197 FALSE +ENSDARG00000100428 TRUE +ENSDARG00000061600 TRUE +ENSDARG00000078761 FALSE +ENSDARG00000004840 FALSE +ENSDARG00000099657 FALSE +ENSDARG00000036911 FALSE +ENSDARG00000071424 FALSE +ENSDARG00000089930 FALSE +ENSDARG00000013776 FALSE +ENSDARG00000061294 TRUE +ENSDARG00000012044 FALSE +ENSDARG00000070239 TRUE +ENSDARG00000003845 FALSE +ENSDARG00000033443 TRUE +ENSDARG00000075441 TRUE +ENSDARG00000025667 TRUE +ENSDARG00000069478 FALSE +ENSDARG00000103826 FALSE +ENSDARG00000073848 FALSE +ENSDARG00000003869 TRUE +ENSDARG00000040478 FALSE +ENSDARG00000040505 FALSE +ENSDARG00000071449 FALSE +ENSDARG00000101947 FALSE +ENSDARG00000032340 FALSE +ENSDARG00000037229 FALSE +ENSDARG00000027777 TRUE +ENSDARG00000055903 FALSE +ENSDARG00000075180 FALSE +ENSDARG00000039901 FALSE +ENSDARG00000061629 FALSE +ENSDARG00000038585 TRUE +ENSDARG00000029859 FALSE +ENSDARG00000013871 FALSE +ENSDARG00000053474 FALSE +ENSDARG00000070675 FALSE +ENSDARG00000076657 FALSE +ENSDARG00000071570 FALSE +ENSDARG00000068833 FALSE +ENSDARG00000052331 FALSE +ENSDARG00000021383 TRUE +ENSDARG00000043705 TRUE +ENSDARG00000069295 TRUE +ENSDARG00000071551 FALSE +ENSDARG00000003027 FALSE +ENSDARG00000058608 TRUE +ENSDARG00000095826 FALSE +ENSDARG00000097889 FALSE +ENSDARG00000075914 FALSE +ENSDARG00000070348 FALSE +ENSDARG00000030824 FALSE +ENSDARG00000030665 FALSE +ENSDARG00000022652 FALSE +ENSDARG00000096651 FALSE +ENSDARG00000105288 TRUE +ENSDARG00000062168 FALSE +ENSDARG00000075444 TRUE +ENSDARG00000003829 FALSE +ENSDARG00000086107 TRUE +ENSDARG00000062063 FALSE +ENSDARG00000006621 TRUE +ENSDARG00000091271 TRUE +ENSDARG00000060411 FALSE +ENSDARG00000043137 FALSE +ENSDARG00000029415 FALSE +ENSDARG00000042877 FALSE +ENSDARG00000045398 FALSE +ENSDARG00000040237 FALSE +ENSDARG00000098477 FALSE +ENSDARG00000062418 TRUE +ENSDARG00000060705 FALSE +ENSDARG00000040874 TRUE +ENSDARG00000033965 TRUE +ENSDARG00000103720 FALSE +ENSDARG00000058041 FALSE +ENSDARG00000054343 TRUE +ENSDARG00000059760 FALSE +ENSDARG00000062707 FALSE +ENSDARG00000055106 TRUE +ENSDARG00000041565 FALSE +ENSDARG00000007943 FALSE +ENSDARG00000059794 TRUE +ENSDARG00000089888 FALSE +ENSDARG00000096110 FALSE +ENSDARG00000063321 FALSE +ENSDARG00000017673 TRUE +ENSDARG00000041734 FALSE +ENSDARG00000038557 FALSE +ENSDARG00000053744 FALSE +ENSDARG00000040314 FALSE +ENSDARG00000021059 FALSE +ENSDARG00000075670 FALSE +ENSDARG00000043493 TRUE +ENSDARG00000098813 FALSE +ENSDARG00000032114 TRUE +ENSDARG00000035890 FALSE +ENSDARG00000100296 TRUE +ENSDARG00000008413 TRUE +ENSDARG00000100813 FALSE +ENSDARG00000052739 FALSE +ENSDARG00000006514 FALSE +ENSDARG00000078434 TRUE +ENSDARG00000003920 FALSE +ENSDARG00000043247 TRUE +ENSDARG00000090821 FALSE +ENSDARG00000059870 FALSE +ENSDARG00000023062 FALSE +ENSDARG00000059406 FALSE +ENSDARG00000059804 FALSE +ENSDARG00000103296 TRUE +ENSDARG00000006642 FALSE +ENSDARG00000043046 TRUE +ENSDARG00000106090 FALSE +ENSDARG00000090996 FALSE +ENSDARG00000077533 TRUE +ENSDARG00000095879 FALSE +ENSDARG00000060847 FALSE +ENSDARG00000087402 TRUE +ENSDARG00000052082 FALSE +ENSDARG00000093622 TRUE +ENSDARG00000028327 FALSE +ENSDARG00000012144 TRUE +ENSDARG00000042793 TRUE +ENSDARG00000098105 TRUE +ENSDARG00000101627 TRUE +ENSDARG00000075608 TRUE +ENSDARG00000101800 FALSE +ENSDARG00000079684 FALSE +ENSDARG00000041665 FALSE +ENSDARG00000027099 FALSE +ENSDARG00000025549 FALSE +ENSDARG00000025421 FALSE +ENSDARG00000099385 FALSE +ENSDARG00000102705 TRUE +ENSDARG00000051800 FALSE +ENSDARG00000056847 FALSE +ENSDARG00000017929 FALSE +ENSDARG00000039302 FALSE +ENSDARG00000099943 FALSE +ENSDARG00000040851 TRUE +ENSDARG00000100788 TRUE +ENSDARG00000045681 FALSE +ENSDARG00000090543 FALSE +ENSDARG00000103902 FALSE +ENSDARG00000060215 FALSE +ENSDARG00000045909 FALSE +ENSDARG00000055917 FALSE +ENSDARG00000070426 TRUE +ENSDARG00000076110 TRUE +ENSDARG00000090002 FALSE +ENSDARG00000023111 FALSE +ENSDARG00000012125 FALSE +ENSDARG00000007172 FALSE +ENSDARG00000026178 FALSE +ENSDARG00000060445 FALSE +ENSDARG00000018162 TRUE +ENSDARG00000060661 FALSE +ENSDARG00000013659 FALSE +ENSDARG00000102744 FALSE +ENSDARG00000071501 FALSE +ENSDARG00000036090 FALSE +ENSDARG00000045704 FALSE +ENSDARG00000007221 TRUE +ENSDARG00000041407 FALSE +ENSDARG00000005356 FALSE +ENSDARG00000102356 TRUE +ENSDARG00000078479 TRUE +ENSDARG00000029368 TRUE +ENSDARG00000058992 FALSE +ENSDARG00000027529 FALSE +ENSDARG00000075369 TRUE +ENSDARG00000063731 TRUE +ENSDARG00000036542 TRUE +ENSDARG00000014569 TRUE +ENSDARG00000099298 FALSE +ENSDARG00000091851 FALSE +ENSDARG00000070698 FALSE +ENSDARG00000088807 FALSE +ENSDARG00000028725 FALSE +ENSDARG00000039719 FALSE +ENSDARG00000013732 FALSE +ENSDARG00000037425 TRUE +ENSDARG00000057151 FALSE +ENSDARG00000092488 FALSE +ENSDARG00000096454 FALSE +ENSDARG00000070028 FALSE +ENSDARG00000016710 FALSE +ENSDARG00000075072 FALSE +ENSDARG00000070867 TRUE +ENSDARG00000037846 FALSE +ENSDARG00000018600 FALSE +ENSDARG00000013711 FALSE +ENSDARG00000077081 FALSE +ENSDARG00000067784 FALSE +ENSDARG00000038731 TRUE +ENSDARG00000035181 FALSE +ENSDARG00000011240 FALSE +ENSDARG00000075616 TRUE +ENSDARG00000038442 TRUE +ENSDARG00000002026 TRUE +ENSDARG00000098618 FALSE +ENSDARG00000069505 FALSE +ENSDARG00000074892 FALSE +ENSDARG00000055360 FALSE +ENSDARG00000056797 FALSE +ENSDARG00000017115 FALSE +ENSDARG00000075533 FALSE +ENSDARG00000091906 FALSE +ENSDARG00000076501 TRUE +ENSDARG00000017659 TRUE +ENSDARG00000029660 FALSE +ENSDARG00000004297 FALSE +ENSDARG00000010280 FALSE +ENSDARG00000022968 FALSE +ENSDARG00000019033 TRUE +ENSDARG00000015638 FALSE +ENSDARG00000086550 FALSE +ENSDARG00000007918 FALSE +ENSDARG00000039677 TRUE +ENSDARG00000073784 TRUE +ENSDARG00000100862 FALSE +ENSDARG00000003058 FALSE +ENSDARG00000074644 TRUE +ENSDARG00000013892 FALSE +ENSDARG00000104348 FALSE +ENSDARG00000053457 TRUE +ENSDARG00000058351 FALSE +ENSDARG00000034670 TRUE +ENSDARG00000075169 FALSE +ENSDARG00000078894 FALSE +ENSDARG00000044090 FALSE +ENSDARG00000092077 TRUE +ENSDARG00000030694 TRUE +ENSDARG00000016886 TRUE +ENSDARG00000043339 FALSE +ENSDARG00000103225 FALSE +ENSDARG00000086411 TRUE +ENSDARG00000104874 FALSE +ENSDARG00000052747 FALSE +ENSDARG00000075823 TRUE +ENSDARG00000043843 FALSE +ENSDARG00000077614 TRUE +ENSDARG00000054220 FALSE +ENSDARG00000009626 FALSE +ENSDARG00000009023 TRUE +ENSDARG00000045561 FALSE +ENSDARG00000056559 FALSE +ENSDARG00000017272 TRUE +ENSDARG00000022788 FALSE +ENSDARG00000018272 FALSE +ENSDARG00000056473 FALSE +ENSDARG00000099766 TRUE +ENSDARG00000002991 FALSE +ENSDARG00000104431 FALSE +ENSDARG00000062465 FALSE +ENSDARG00000054583 TRUE +ENSDARG00000013802 FALSE +ENSDARG00000057910 TRUE +ENSDARG00000027984 FALSE +ENSDARG00000045420 FALSE +ENSDARG00000031359 FALSE +ENSDARG00000056764 FALSE +ENSDARG00000095594 FALSE +ENSDARG00000076191 FALSE +ENSDARG00000103251 FALSE +ENSDARG00000039501 TRUE +ENSDARG00000103589 FALSE +ENSDARG00000051926 FALSE +ENSDARG00000018681 FALSE +ENSDARG00000014138 TRUE +ENSDARG00000061462 TRUE +ENSDARG00000102111 FALSE +ENSDARG00000035819 FALSE +ENSDARG00000076486 FALSE +ENSDARG00000101799 FALSE +ENSDARG00000008105 FALSE +ENSDARG00000040387 FALSE +ENSDARG00000060518 FALSE +ENSDARG00000001244 FALSE +ENSDARG00000005540 FALSE +ENSDARG00000070055 FALSE +ENSDARG00000073841 FALSE +ENSDARG00000032117 FALSE +ENSDARG00000087110 FALSE +ENSDARG00000075641 TRUE +ENSDARG00000001829 FALSE +ENSDARG00000103786 TRUE +ENSDARG00000058486 FALSE +ENSDARG00000042272 FALSE +ENSDARG00000000474 FALSE +ENSDARG00000061635 FALSE +ENSDARG00000095745 FALSE +ENSDARG00000073801 FALSE +ENSDARG00000028086 FALSE +ENSDARG00000056583 FALSE +ENSDARG00000033182 TRUE +ENSDARG00000052703 FALSE +ENSDARG00000051873 FALSE +ENSDARG00000002267 FALSE +ENSDARG00000091538 FALSE +ENSDARG00000067713 FALSE \ No newline at end of file
--- a/test-data/gc.tab Sun Jun 11 08:57:39 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -ENSG00000162526 0.388349514563107
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gene_length_zf.tab Mon Oct 23 11:19:12 2017 -0400 @@ -0,0 +1,632 @@ +ENSDARG00000092696 6983 +ENSDARG00000104569 7302 +ENSDARG00000008472 1848 +ENSDARG00000058451 5878 +ENSDARG00000035957 3057 +ENSDARG00000043514 6176 +ENSDARG00000058114 5901 +ENSDARG00000102885 7262 +ENSDARG00000005451 9161 +ENSDARG00000058839 3602 +ENSDARG00000073999 2558 +ENSDARG00000079611 7620 +ENSDARG00000042623 2387 +ENSDARG00000044136 7553 +ENSDARG00000060983 8413 +ENSDARG00000108060 1774 +ENSDARG00000036852 3185 +ENSDARG00000089303 6313 +ENSDARG00000090013 3748 +ENSDARG00000042902 2536 +ENSDARG00000075203 2844 +ENSDARG00000069601 2545 +ENSDARG00000003822 2571 +ENSDARG00000057100 9297 +ENSDARG00000088508 5775 +ENSDARG00000026454 4448 +ENSDARG00000006399 9992 +ENSDARG00000079457 4005 +ENSDARG00000090654 2211 +ENSDARG00000092483 4033 +ENSDARG00000060627 10580 +ENSDARG00000039626 8419 +ENSDARG00000088475 7743 +ENSDARG00000055548 5115 +ENSDARG00000006196 3835 +ENSDARG00000035559 3281 +ENSDARG00000015254 8601 +ENSDARG00000031203 9908 +ENSDARG00000012790 14778 +ENSDARG00000004017 8583 +ENSDARG00000074558 3683 +ENSDARG00000043077 13733 +ENSDARG00000098550 4556 +ENSDARG00000095224 5969 +ENSDARG00000045352 4803 +ENSDARG00000079659 9493 +ENSDARG00000104567 5098 +ENSDARG00000060169 8487 +ENSDARG00000104837 15706 +ENSDARG00000097827 8517 +ENSDARG00000012684 5525 +ENSDARG00000002912 14371 +ENSDARG00000104861 12106 +ENSDARG00000089292 11052 +ENSDARG00000054903 1097 +ENSDARG00000031681 9198 +ENSDARG00000027586 3839 +ENSDARG00000061216 9084 +ENSDARG00000051853 3428 +ENSDARG00000062192 13781 +ENSDARG00000076568 6265 +ENSDARG00000004774 5463 +ENSDARG00000033889 3833 +ENSDARG00000044092 5870 +ENSDARG00000010098 13674 +ENSDARG00000043635 9947 +ENSDARG00000076804 5325 +ENSDARG00000061363 7039 +ENSDARG00000039522 4253 +ENSDARG00000022218 6814 +ENSDARG00000040024 4194 +ENSDARG00000019897 3572 +ENSDARG00000058701 5106 +ENSDARG00000036875 3762 +ENSDARG00000103380 9352 +ENSDARG00000025391 3212 +ENSDARG00000101637 3824 +ENSDARG00000067656 2864 +ENSDARG00000089467 2928 +ENSDARG00000002494 9067 +ENSDARG00000102896 4563 +ENSDARG00000100279 4447 +ENSDARG00000095969 5974 +ENSDARG00000039130 2856 +ENSDARG00000077060 6138 +ENSDARG00000037815 5299 +ENSDARG00000074170 5991 +ENSDARG00000013063 4982 +ENSDARG00000035570 2532 +ENSDARG00000104710 10945 +ENSDARG00000070168 12705 +ENSDARG00000052728 4646 +ENSDARG00000025949 7324 +ENSDARG00000076379 4713 +ENSDARG00000032631 6641 +ENSDARG00000024324 12238 +ENSDARG00000010445 2030 +ENSDARG00000008235 2405 +ENSDARG00000044752 4377 +ENSDARG00000099996 19998 +ENSDARG00000067719 8015 +ENSDARG00000063437 7223 +ENSDARG00000088631 1836 +ENSDARG00000074849 8477 +ENSDARG00000041853 10888 +ENSDARG00000060002 7217 +ENSDARG00000042977 10000 +ENSDARG00000043105 4213 +ENSDARG00000013842 4742 +ENSDARG00000004870 9022 +ENSDARG00000019000 11768 +ENSDARG00000100710 8586 +ENSDARG00000077988 5539 +ENSDARG00000073985 2740 +ENSDARG00000028192 12781 +ENSDARG00000075881 7046 +ENSDARG00000100203 3507 +ENSDARG00000073933 7594 +ENSDARG00000070478 6618 +ENSDARG00000068415 5055 +ENSDARG00000032765 2550 +ENSDARG00000092550 9112 +ENSDARG00000015678 5184 +ENSDARG00000075463 5802 +ENSDARG00000075172 2813 +ENSDARG00000098883 2251 +ENSDARG00000075721 4428 +ENSDARG00000017058 7437 +ENSDARG00000096249 5481 +ENSDARG00000059234 9240 +ENSDARG00000070644 1838 +ENSDARG00000035630 6902 +ENSDARG00000029003 9484 +ENSDARG00000073737 5717 +ENSDARG00000094336 1190 +ENSDARG00000058679 4183 +ENSDARG00000069266 10601 +ENSDARG00000002571 2682 +ENSDARG00000103594 928 +ENSDARG00000041314 4275 +ENSDARG00000044490 7345 +ENSDARG00000075870 11038 +ENSDARG00000062646 7411 +ENSDARG00000053517 5740 +ENSDARG00000043334 8151 +ENSDARG00000076667 3222 +ENSDARG00000063375 3847 +ENSDARG00000104696 3177 +ENSDARG00000039125 3889 +ENSDARG00000078546 6524 +ENSDARG00000071060 2128 +ENSDARG00000077011 3434 +ENSDARG00000009953 7360 +ENSDARG00000038868 6297 +ENSDARG00000103610 7596 +ENSDARG00000041619 5566 +ENSDARG00000026109 6403 +ENSDARG00000003564 4830 +ENSDARG00000087333 9865 +ENSDARG00000099183 2558 +ENSDARG00000044524 7648 +ENSDARG00000041449 8251 +ENSDARG00000058285 3113 +ENSDARG00000059529 5731 +ENSDARG00000003251 9462 +ENSDARG00000008785 12496 +ENSDARG00000003022 6860 +ENSDARG00000101317 767 +ENSDARG00000013528 2476 +ENSDARG00000101333 5519 +ENSDARG00000053990 5614 +ENSDARG00000055792 17368 +ENSDARG00000013628 4304 +ENSDARG00000090941 6511 +ENSDARG00000096081 3096 +ENSDARG00000014274 5265 +ENSDARG00000059925 8714 +ENSDARG00000057698 4066 +ENSDARG00000073792 5938 +ENSDARG00000069808 8733 +ENSDARG00000071197 9277 +ENSDARG00000021735 2062 +ENSDARG00000052376 12054 +ENSDARG00000103235 19425 +ENSDARG00000060176 15732 +ENSDARG00000014106 4562 +ENSDARG00000039882 6357 +ENSDARG00000099771 18125 +ENSDARG00000073718 5999 +ENSDARG00000008377 14054 +ENSDARG00000068199 8305 +ENSDARG00000090770 9149 +ENSDARG00000038312 2805 +ENSDARG00000058287 11967 +ENSDARG00000004937 6400 +ENSDARG00000102417 3281 +ENSDARG00000012485 2414 +ENSDARG00000079878 9710 +ENSDARG00000096867 5557 +ENSDARG00000102082 3535 +ENSDARG00000045515 10949 +ENSDARG00000079723 12937 +ENSDARG00000093007 3846 +ENSDARG00000056783 7098 +ENSDARG00000057159 2999 +ENSDARG00000053571 3814 +ENSDARG00000102381 6394 +ENSDARG00000009436 5051 +ENSDARG00000075567 4394 +ENSDARG00000097650 4573 +ENSDARG00000015722 6845 +ENSDARG00000060372 3940 +ENSDARG00000037066 1761 +ENSDARG00000005163 3784 +ENSDARG00000018627 10344 +ENSDARG00000004771 7105 +ENSDARG00000002220 3131 +ENSDARG00000104388 844 +ENSDARG00000068912 4157 +ENSDARG00000015780 4913 +ENSDARG00000096989 6686 +ENSDARG00000019195 4215 +ENSDARG00000100742 4108 +ENSDARG00000012234 5848 +ENSDARG00000103472 3223 +ENSDARG00000043938 7756 +ENSDARG00000043209 1799 +ENSDARG00000036772 9115 +ENSDARG00000089236 6380 +ENSDARG00000042277 5639 +ENSDARG00000056740 3561 +ENSDARG00000024669 2033 +ENSDARG00000103892 13230 +ENSDARG00000042892 20839 +ENSDARG00000036235 5677 +ENSDARG00000010700 2648 +ENSDARG00000054804 1137 +ENSDARG00000031506 7044 +ENSDARG00000063726 9648 +ENSDARG00000019646 6498 +ENSDARG00000034753 7732 +ENSDARG00000055338 4177 +ENSDARG00000105098 8083 +ENSDARG00000062190 5174 +ENSDARG00000060380 11349 +ENSDARG00000015222 13442 +ENSDARG00000074779 5979 +ENSDARG00000102252 3982 +ENSDARG00000017154 3213 +ENSDARG00000100899 5296 +ENSDARG00000012314 1127 +ENSDARG00000102380 8676 +ENSDARG00000043404 16295 +ENSDARG00000000857 5705 +ENSDARG00000044642 6243 +ENSDARG00000098622 5089 +ENSDARG00000061472 6372 +ENSDARG00000007955 16942 +ENSDARG00000025094 2735 +ENSDARG00000011125 4890 +ENSDARG00000013006 2010 +ENSDARG00000033285 2837 +ENSDARG00000098695 4375 +ENSDARG00000035132 1717 +ENSDARG00000005897 20027 +ENSDARG00000008867 2919 +ENSDARG00000070452 2058 +ENSDARG00000017004 4225 +ENSDARG00000042799 2214 +ENSDARG00000063157 2030 +ENSDARG00000101849 3879 +ENSDARG00000086345 4230 +ENSDARG00000044298 2387 +ENSDARG00000103135 1322 +ENSDARG00000056862 6909 +ENSDARG00000044575 3414 +ENSDARG00000097964 2036 +ENSDARG00000088950 5399 +ENSDARG00000045853 5422 +ENSDARG00000032206 2688 +ENSDARG00000074756 2225 +ENSDARG00000019417 7102 +ENSDARG00000102118 11710 +ENSDARG00000077983 9307 +ENSDARG00000099740 3212 +ENSDARG00000020777 5152 +ENSDARG00000045415 4731 +ENSDARG00000045514 7847 +ENSDARG00000000001 6020 +ENSDARG00000103917 6106 +ENSDARG00000104516 5621 +ENSDARG00000018903 1712 +ENSDARG00000041431 3527 +ENSDARG00000056896 10842 +ENSDARG00000011703 9503 +ENSDARG00000061185 8168 +ENSDARG00000026448 7185 +ENSDARG00000077357 2996 +ENSDARG00000043417 15809 +ENSDARG00000052371 2990 +ENSDARG00000104288 6758 +ENSDARG00000102898 1538 +ENSDARG00000101258 3475 +ENSDARG00000069373 11861 +ENSDARG00000060109 4302 +ENSDARG00000025350 3059 +ENSDARG00000079499 2185 +ENSDARG00000057983 2469 +ENSDARG00000055708 4271 +ENSDARG00000099651 11762 +ENSDARG00000014366 10460 +ENSDARG00000061257 9337 +ENSDARG00000019791 7656 +ENSDARG00000100560 2327 +ENSDARG00000027381 9625 +ENSDARG00000026294 8159 +ENSDARG00000029955 6341 +ENSDARG00000039263 6944 +ENSDARG00000045257 8450 +ENSDARG00000101347 4030 +ENSDARG00000018623 13273 +ENSDARG00000005236 2190 +ENSDARG00000089856 7177 +ENSDARG00000040131 4180 +ENSDARG00000105046 3114 +ENSDARG00000023712 14580 +ENSDARG00000062485 4683 +ENSDARG00000045305 5099 +ENSDARG00000015495 4815 +ENSDARG00000011405 2194 +ENSDARG00000063197 19449 +ENSDARG00000100428 8157 +ENSDARG00000061600 1790 +ENSDARG00000078761 10250 +ENSDARG00000004840 5114 +ENSDARG00000099657 2038 +ENSDARG00000036911 4039 +ENSDARG00000071424 4853 +ENSDARG00000089930 2504 +ENSDARG00000013776 10632 +ENSDARG00000061294 6775 +ENSDARG00000012044 4062 +ENSDARG00000070239 6127 +ENSDARG00000003845 13793 +ENSDARG00000033443 3188 +ENSDARG00000075441 3266 +ENSDARG00000025667 7993 +ENSDARG00000069478 10422 +ENSDARG00000103826 8314 +ENSDARG00000073848 3576 +ENSDARG00000003869 10046 +ENSDARG00000040478 7565 +ENSDARG00000040505 6634 +ENSDARG00000071449 8506 +ENSDARG00000101947 10026 +ENSDARG00000032340 12492 +ENSDARG00000037229 3491 +ENSDARG00000027777 8402 +ENSDARG00000055903 5880 +ENSDARG00000075180 11836 +ENSDARG00000039901 8445 +ENSDARG00000061629 8442 +ENSDARG00000038585 3945 +ENSDARG00000029859 3936 +ENSDARG00000013871 5145 +ENSDARG00000053474 10321 +ENSDARG00000070675 4273 +ENSDARG00000076657 5202 +ENSDARG00000071570 11162 +ENSDARG00000068833 11503 +ENSDARG00000052331 1987 +ENSDARG00000021383 6024 +ENSDARG00000043705 2455 +ENSDARG00000069295 5916 +ENSDARG00000071551 8401 +ENSDARG00000003027 677 +ENSDARG00000058608 9615 +ENSDARG00000095826 3605 +ENSDARG00000097889 4644 +ENSDARG00000075914 9189 +ENSDARG00000070348 7743 +ENSDARG00000030824 4160 +ENSDARG00000030665 3971 +ENSDARG00000022652 3991 +ENSDARG00000096651 13023 +ENSDARG00000105288 6066 +ENSDARG00000062168 2607 +ENSDARG00000075444 1451 +ENSDARG00000003829 13963 +ENSDARG00000086107 3837 +ENSDARG00000062063 8676 +ENSDARG00000006621 10686 +ENSDARG00000091271 668 +ENSDARG00000060411 6912 +ENSDARG00000043137 2949 +ENSDARG00000029415 7316 +ENSDARG00000042877 2762 +ENSDARG00000045398 7995 +ENSDARG00000040237 7139 +ENSDARG00000098477 6721 +ENSDARG00000062418 11438 +ENSDARG00000060705 4812 +ENSDARG00000040874 5072 +ENSDARG00000033965 1711 +ENSDARG00000103720 2629 +ENSDARG00000058041 11578 +ENSDARG00000054343 3039 +ENSDARG00000059760 9897 +ENSDARG00000062707 4191 +ENSDARG00000055106 6541 +ENSDARG00000041565 1691 +ENSDARG00000007943 5302 +ENSDARG00000059794 7534 +ENSDARG00000089888 6348 +ENSDARG00000096110 3785 +ENSDARG00000063321 1166 +ENSDARG00000017673 3898 +ENSDARG00000041734 6607 +ENSDARG00000038557 6844 +ENSDARG00000053744 6031 +ENSDARG00000040314 9966 +ENSDARG00000021059 3195 +ENSDARG00000075670 5630 +ENSDARG00000043493 3014 +ENSDARG00000098813 11707 +ENSDARG00000032114 6174 +ENSDARG00000035890 6326 +ENSDARG00000100296 9835 +ENSDARG00000008413 4802 +ENSDARG00000100813 7638 +ENSDARG00000052739 6294 +ENSDARG00000006514 8385 +ENSDARG00000078434 6391 +ENSDARG00000003920 1309 +ENSDARG00000043247 3885 +ENSDARG00000090821 4737 +ENSDARG00000059870 5667 +ENSDARG00000023062 3977 +ENSDARG00000059406 7823 +ENSDARG00000059804 4973 +ENSDARG00000103296 4784 +ENSDARG00000006642 4915 +ENSDARG00000043046 5056 +ENSDARG00000106090 2991 +ENSDARG00000090996 2850 +ENSDARG00000077533 6127 +ENSDARG00000095879 1418 +ENSDARG00000060847 23408 +ENSDARG00000087402 12711 +ENSDARG00000052082 7716 +ENSDARG00000093622 6302 +ENSDARG00000028327 9237 +ENSDARG00000012144 5939 +ENSDARG00000042793 3727 +ENSDARG00000098105 5105 +ENSDARG00000101627 6055 +ENSDARG00000075608 397 +ENSDARG00000101800 6858 +ENSDARG00000079684 4722 +ENSDARG00000041665 7939 +ENSDARG00000027099 7524 +ENSDARG00000025549 5118 +ENSDARG00000025421 3193 +ENSDARG00000099385 5477 +ENSDARG00000102705 2478 +ENSDARG00000051800 3402 +ENSDARG00000056847 7621 +ENSDARG00000017929 2490 +ENSDARG00000039302 4009 +ENSDARG00000099943 1825 +ENSDARG00000040851 2769 +ENSDARG00000100788 7146 +ENSDARG00000045681 2839 +ENSDARG00000090543 6309 +ENSDARG00000103902 3149 +ENSDARG00000060215 11708 +ENSDARG00000045909 861 +ENSDARG00000055917 2191 +ENSDARG00000070426 5827 +ENSDARG00000076110 4980 +ENSDARG00000090002 1848 +ENSDARG00000023111 6716 +ENSDARG00000012125 3204 +ENSDARG00000007172 3265 +ENSDARG00000026178 680 +ENSDARG00000060445 6490 +ENSDARG00000018162 7777 +ENSDARG00000060661 3630 +ENSDARG00000013659 4736 +ENSDARG00000102744 3411 +ENSDARG00000071501 5593 +ENSDARG00000036090 6565 +ENSDARG00000045704 3313 +ENSDARG00000007221 10574 +ENSDARG00000041407 8668 +ENSDARG00000005356 11291 +ENSDARG00000102356 5023 +ENSDARG00000078479 2775 +ENSDARG00000029368 4290 +ENSDARG00000058992 8283 +ENSDARG00000027529 7190 +ENSDARG00000075369 5051 +ENSDARG00000063731 7422 +ENSDARG00000036542 2676 +ENSDARG00000014569 7375 +ENSDARG00000099298 5756 +ENSDARG00000091851 8467 +ENSDARG00000070698 3395 +ENSDARG00000088807 4319 +ENSDARG00000028725 4065 +ENSDARG00000039719 9199 +ENSDARG00000013732 1092 +ENSDARG00000037425 6768 +ENSDARG00000057151 15144 +ENSDARG00000092488 2085 +ENSDARG00000096454 6749 +ENSDARG00000070028 2866 +ENSDARG00000016710 1806 +ENSDARG00000075072 7179 +ENSDARG00000070867 11000 +ENSDARG00000037846 2326 +ENSDARG00000018600 15242 +ENSDARG00000013711 3301 +ENSDARG00000077081 3115 +ENSDARG00000067784 15701 +ENSDARG00000038731 4003 +ENSDARG00000035181 8260 +ENSDARG00000011240 4901 +ENSDARG00000075616 2423 +ENSDARG00000038442 7459 +ENSDARG00000002026 3070 +ENSDARG00000098618 6180 +ENSDARG00000069505 2082 +ENSDARG00000074892 12773 +ENSDARG00000055360 7068 +ENSDARG00000056797 5199 +ENSDARG00000017115 3193 +ENSDARG00000075533 7239 +ENSDARG00000091906 8184 +ENSDARG00000076501 2093 +ENSDARG00000017659 3157 +ENSDARG00000029660 4699 +ENSDARG00000004297 3420 +ENSDARG00000010280 2807 +ENSDARG00000022968 2165 +ENSDARG00000019033 7868 +ENSDARG00000015638 3285 +ENSDARG00000086550 3859 +ENSDARG00000007918 5611 +ENSDARG00000039677 2769 +ENSDARG00000073784 2745 +ENSDARG00000100862 6157 +ENSDARG00000003058 2889 +ENSDARG00000074644 6922 +ENSDARG00000013892 5034 +ENSDARG00000104348 3245 +ENSDARG00000053457 7776 +ENSDARG00000058351 4276 +ENSDARG00000034670 5499 +ENSDARG00000075169 1365 +ENSDARG00000078894 4153 +ENSDARG00000044090 7828 +ENSDARG00000092077 7775 +ENSDARG00000030694 2395 +ENSDARG00000016886 5872 +ENSDARG00000043339 8700 +ENSDARG00000103225 3074 +ENSDARG00000086411 2266 +ENSDARG00000104874 2861 +ENSDARG00000052747 8005 +ENSDARG00000075823 2917 +ENSDARG00000043843 3153 +ENSDARG00000077614 4371 +ENSDARG00000054220 8511 +ENSDARG00000009626 8938 +ENSDARG00000009023 3406 +ENSDARG00000045561 3929 +ENSDARG00000056559 8333 +ENSDARG00000017272 7464 +ENSDARG00000022788 3285 +ENSDARG00000018272 1753 +ENSDARG00000056473 594 +ENSDARG00000099766 3391 +ENSDARG00000002991 3520 +ENSDARG00000104431 3327 +ENSDARG00000062465 497 +ENSDARG00000054583 3030 +ENSDARG00000013802 2392 +ENSDARG00000057910 3804 +ENSDARG00000027984 4204 +ENSDARG00000045420 4427 +ENSDARG00000031359 8630 +ENSDARG00000056764 3765 +ENSDARG00000095594 4175 +ENSDARG00000076191 4849 +ENSDARG00000103251 10864 +ENSDARG00000039501 6145 +ENSDARG00000103589 7977 +ENSDARG00000051926 8640 +ENSDARG00000018681 5479 +ENSDARG00000014138 6138 +ENSDARG00000061462 15787 +ENSDARG00000102111 9423 +ENSDARG00000035819 1981 +ENSDARG00000076486 7968 +ENSDARG00000101799 3670 +ENSDARG00000008105 8787 +ENSDARG00000040387 4616 +ENSDARG00000060518 8331 +ENSDARG00000001244 1781 +ENSDARG00000005540 1155 +ENSDARG00000070055 5411 +ENSDARG00000073841 4224 +ENSDARG00000032117 3659 +ENSDARG00000087110 5459 +ENSDARG00000075641 2527 +ENSDARG00000001829 2989 +ENSDARG00000103786 15155 +ENSDARG00000058486 5559 +ENSDARG00000042272 6005 +ENSDARG00000000474 2047 +ENSDARG00000061635 5731 +ENSDARG00000095745 2551 +ENSDARG00000073801 6118 +ENSDARG00000028086 8075 +ENSDARG00000056583 9656 +ENSDARG00000033182 2331 +ENSDARG00000052703 6689 +ENSDARG00000051873 4234 +ENSDARG00000002267 8114 +ENSDARG00000091538 8260 +ENSDARG00000067713 853 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/getgo.danRer10.tab Mon Oct 23 11:19:12 2017 -0400 @@ -0,0 +1,10 @@ +category over_represented_pvalue under_represented_pvalue numDEInCat numInCat term ontology p.adjust.over_represented p.adjust.under_represented +GO:0031324 0.019289727841568 0.997004018824821 6 9 negative regulation of cellular metabolic process BP 1 1 +GO:0040011 0.0219399815699082 0.993554925323586 10 19 locomotion BP 1 1 +GO:0048738 0.0232122438335162 1 3 3 cardiac muscle tissue development BP 1 1 +GO:0031101 0.0232122438335199 1 3 3 fin regeneration BP 1 1 +GO:0042246 0.0232122438335199 1 3 3 tissue regeneration BP 1 1 +GO:0007050 0.023212243833521 1 3 3 cell cycle arrest BP 1 1 +GO:0019783 0.0254384360641003 0.998148600664743 4 5 ubiquitin-like protein-specific protease activity MF 1 1 +GO:0036459 0.0254384360641003 0.998148600664743 4 5 thiol-dependent ubiquitinyl hydrolase activity MF 1 1 +GO:0101005 0.0254384360641003 0.998148600664743 4 5 ubiquitinyl hydrolase activity MF 1 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/getgo.hg38.tab Mon Oct 23 11:19:12 2017 -0400 @@ -0,0 +1,10 @@ +category over_represented_pvalue under_represented_pvalue numDEInCat numInCat term ontology p.adjust.over_represented p.adjust.under_represented +GO:0005576 4.72734295222294e-05 0.999979271555286 56 142 extracellular region CC 0.329456825863645 1 +GO:0005840 0.000150633625443482 0.999987765310632 9 12 ribosome CC 0.329456825863645 1 +GO:0044763 0.000210237360853053 0.999883100939053 148 473 single-organism cellular process BP 0.329456825863645 1 +GO:0044699 0.000229197548055812 0.999873090122854 158 513 single-organism process BP 0.329456825863645 1 +GO:0065010 0.000394294879818402 0.999824474827037 43 108 extracellular membrane-bounded organelle CC 0.329456825863645 1 +GO:0070062 0.000394294879818402 0.999824474827037 43 108 extracellular exosome CC 0.329456825863645 1 +GO:0008150 0.000409074003076654 0.999785179807024 191 656 biological_process BP 0.329456825863645 1 +GO:0005488 0.000447980265756431 0.99975072864471 175 589 binding MF 0.329456825863645 1 +GO:0005198 0.000511195682086445 0.999905085898726 13 21 structural molecule activity MF 0.329456825863645 1
--- a/test-data/go_terms.tab Sun Jun 11 08:57:39 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,85 +0,0 @@ -ENSG00000162526 GO:0000003 -ENSG00000162526 GO:0000166 -ENSG00000162526 GO:0000287 -ENSG00000162526 GO:0001882 -ENSG00000162526 GO:0001883 -ENSG00000162526 GO:0003674 -ENSG00000162526 GO:0003824 -ENSG00000162526 GO:0004672 -ENSG00000162526 GO:0004674 -ENSG00000162526 GO:0005488 -ENSG00000162526 GO:0005515 -ENSG00000162526 GO:0005524 -ENSG00000162526 GO:0005575 -ENSG00000162526 GO:0005622 -ENSG00000162526 GO:0005623 -ENSG00000162526 GO:0005737 -ENSG00000162526 GO:0006464 -ENSG00000162526 GO:0006468 -ENSG00000162526 GO:0006793 -ENSG00000162526 GO:0006796 -ENSG00000162526 GO:0007154 -ENSG00000162526 GO:0007165 -ENSG00000162526 GO:0007275 -ENSG00000162526 GO:0007276 -ENSG00000162526 GO:0007283 -ENSG00000162526 GO:0008150 -ENSG00000162526 GO:0008152 -ENSG00000162526 GO:0009987 -ENSG00000162526 GO:0016301 -ENSG00000162526 GO:0016310 -ENSG00000162526 GO:0016740 -ENSG00000162526 GO:0016772 -ENSG00000162526 GO:0016773 -ENSG00000162526 GO:0017076 -ENSG00000162526 GO:0019538 -ENSG00000162526 GO:0019953 -ENSG00000162526 GO:0022414 -ENSG00000162526 GO:0023052 -ENSG00000162526 GO:0030154 -ENSG00000162526 GO:0030554 -ENSG00000162526 GO:0032501 -ENSG00000162526 GO:0032502 -ENSG00000162526 GO:0032504 -ENSG00000162526 GO:0032549 -ENSG00000162526 GO:0032550 -ENSG00000162526 GO:0032553 -ENSG00000162526 GO:0032555 -ENSG00000162526 GO:0032559 -ENSG00000162526 GO:0035556 -ENSG00000162526 GO:0035639 -ENSG00000162526 GO:0036094 -ENSG00000162526 GO:0036211 -ENSG00000162526 GO:0043167 -ENSG00000162526 GO:0043168 -ENSG00000162526 GO:0043169 -ENSG00000162526 GO:0043170 -ENSG00000162526 GO:0043412 -ENSG00000162526 GO:0044237 -ENSG00000162526 GO:0044238 -ENSG00000162526 GO:0044260 -ENSG00000162526 GO:0044267 -ENSG00000162526 GO:0044424 -ENSG00000162526 GO:0044464 -ENSG00000162526 GO:0044699 -ENSG00000162526 GO:0044700 -ENSG00000162526 GO:0044702 -ENSG00000162526 GO:0044703 -ENSG00000162526 GO:0044707 -ENSG00000162526 GO:0044763 -ENSG00000162526 GO:0044767 -ENSG00000162526 GO:0046872 -ENSG00000162526 GO:0048232 -ENSG00000162526 GO:0048609 -ENSG00000162526 GO:0048869 -ENSG00000162526 GO:0050789 -ENSG00000162526 GO:0050794 -ENSG00000162526 GO:0050896 -ENSG00000162526 GO:0051704 -ENSG00000162526 GO:0051716 -ENSG00000162526 GO:0065007 -ENSG00000162526 GO:0071704 -ENSG00000162526 GO:0097159 -ENSG00000162526 GO:0097367 -ENSG00000162526 GO:1901265 -ENSG00000162526 GO:1901363
--- a/test-data/length.tab Sun Jun 11 08:57:39 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -ENSG00000162526 103
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/nobias.tab Mon Oct 23 11:19:12 2017 -0400 @@ -0,0 +1,3 @@ +category over_represented_pvalue under_represented_pvalue numDEInCat numInCat term ontology p.adjust.over_represented p.adjust.under_represented +GO:0000278 0.0129827306163772 0.999244816412166 4 5 mitotic cell cycle BP 0.0259654612327543 0.999244816412166 +GO:0000003 1 0.761 0 1 reproduction BP 1 0.999244816412166
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/samp.tab Mon Oct 23 11:19:12 2017 -0400 @@ -0,0 +1,3 @@ +category over_represented_pvalue under_represented_pvalue numDEInCat numInCat term ontology p.adjust.over_represented p.adjust.under_represented +GO:0000278 0.016983016983017 1 4 5 mitotic cell cycle BP 0.033966033966034 1 +GO:0000003 1 0.802197802197802 0 1 reproduction BP 1 1
--- a/test-data/wal.tab Sun Jun 11 08:57:39 2017 -0400 +++ b/test-data/wal.tab Mon Oct 23 11:19:12 2017 -0400 @@ -1,3 +1,3 @@ -category over_represented_pvalue under_represented_pvalue numDEInCat numInCat term ontology p\.adjust.over_represented p\.adjust.under_represented -GO:0000278 0\.0122.+ 0\.999.+ 4 5 mitotic cell cycle BP 0\.0245.+ 0\.999.+ -GO:0000003 1 0\.796.+ 0 1 reproduction BP 1 0\.999.+ +category over_represented_pvalue under_represented_pvalue numDEInCat numInCat term ontology p.adjust.over_represented p.adjust.under_represented +GO:0000278 0.0112350612534339 0.999376653834006 4 5 mitotic cell cycle BP 0.0224701225068678 0.999376653834006 +GO:0000003 1 0.805913166914892 0 1 reproduction BP 1 0.999376653834006