changeset 19:c56e0689e46e draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
author iuc
date Tue, 04 Dec 2018 08:19:06 -0500
parents 3bf1b3ec1ddf
children 89d26b11d452
files deseq2.R deseq2.xml get_deseq_dataset.R test-data/GRCh38_latest_genomic.gff test-data/tx2gene.tab
diffstat 5 files changed, 171 insertions(+), 52 deletions(-) [+]
line wrap: on
line diff
--- a/deseq2.R	Fri Nov 16 14:47:19 2018 -0500
+++ b/deseq2.R	Tue Dec 04 08:19:06 2018 -0500
@@ -57,7 +57,7 @@
   "plots" , "p", 1, "character",
   "tximport", "i", 0, "logical",
   "txtype", "y", 1, "character",
-  "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF file (auto detect .gtf/.GTF)
+  "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF/GFF3 file
   "esf", "e", 1, "character",
   "fit_type", "t", 1, "integer",
   "many_contrasts", "m", 0, "logical",
--- a/deseq2.xml	Fri Nov 16 14:47:19 2018 -0500
+++ b/deseq2.xml	Tue Dec 04 08:19:06 2018 -0500
@@ -1,11 +1,16 @@
-<tool id="deseq2" name="DESeq2" version="2.11.40.3">
+<tool id="deseq2" name="DESeq2" version="2.11.40.4">
     <description>Determines differentially expressed features from count tables</description>
     <requirements>
-        <requirement type="package" version="1.18.1">bioconductor-deseq2</requirement>
-        <requirement type="package" version="1.6.0">bioconductor-tximport</requirement>
-        <requirement type="package" version="1.30.0">bioconductor-genomicfeatures</requirement>
-        <requirement type="package" version="0.6.5">r-ggrepel</requirement>
-        <requirement type="package" version="1.0.8">r-pheatmap</requirement>
+        <requirement type="package" version="1.20.0">bioconductor-deseq2</requirement>
+        <!-- Optional dependency of tximport, needed to import kallisto results https://github.com/galaxyproject/usegalaxy-playbook/issues/161 -->
+        <requirement type="package" version="2.24.0">bioconductor-rhdf5</requirement>
+        <requirement type="package" version="1.8.0">bioconductor-tximport</requirement>
+        <requirement type="package" version="1.32.3">bioconductor-genomicfeatures</requirement>
+        <requirement type="package" version="1.20.2">r-getopt</requirement>
+        <requirement type="package" version="0.8.0">r-ggrepel</requirement>
+        <requirement type="package" version="3.0.1">r-gplots</requirement>
+        <requirement type="package" version="1.0.10">r-pheatmap</requirement>
+        <requirement type="package" version="0.2.20">r-rjson</requirement>
     </requirements>
     <stdio>
         <regex match="Execution halted"
@@ -27,7 +32,7 @@
     <command><![CDATA[
 #if $tximport.tximport_selector == 'tximport':
     #if $tximport.mapping_format.mapping_format_selector == 'gtf':
-        ln -s '$tximport.mapping_format.gtf_file' mapping.gtf &&
+        ln -s '$tximport.mapping_format.gtf_file' mapping.gff &&
     #else:
         ln -s '$tximport.mapping_format.tabular_file' mapping.txt &&
     #end if
@@ -92,7 +97,7 @@
         -i
         -y $tximport.txtype
         #if $tximport.mapping_format.mapping_format_selector == 'gtf':
-            -x mapping.gtf
+            -x mapping.gff
         #else:
             -x mapping.txt
         #end if
@@ -133,14 +138,14 @@
                 </param>
                 <conditional name="mapping_format">
                     <param name="mapping_format_selector" type="select" label="Gene mapping format">
-                        <option value="gtf" selected="True">GTF</option>
-                        <option value="tabular">Transcript-ID and Gene-ID mapping file</option>
+                        <option value="gtf" selected="True">GTF/GFF3</option>
+                        <option value="tabular">Transcript-ID to Gene-ID mapping file</option>
                     </param>
                     <when value="gtf">
-                        <param name="gtf_file" type="data" format="gtf,gff3" label="GTF/GFF3 file with Transcript - Gene mapping"/>
+                        <param name="gtf_file" type="data" format="gtf,gff3" label="GTF/GFF3 annotation file"/>
                     </when>
                     <when value="tabular">
-                        <param name="tabular_file" type="data" format="tabular" label="Tabular file with Transcript - Gene mapping"/>
+                        <param name="tabular_file" type="data" format="tabular" label="Tabular file with Transcript-ID to Gene-ID mapping"/>
                     </when>
                 </conditional>
             </when>
@@ -190,7 +195,7 @@
             help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" />
     </inputs>
     <outputs>
-        <data format="tabular" name="deseq_out" label="DESeq2 result file on ${on_string}">
+        <data name="deseq_out" format="tabular" label="DESeq2 result file on ${on_string}">
             <filter>many_contrasts is False</filter>
             <actions>
                 <action name="column_names" type="metadata" default="GeneID,Base mean,log2(FC),StdErr,Wald-Stats,P-value,P-adj" />
@@ -200,16 +205,16 @@
             <filter>many_contrasts is True</filter>
             <discover_datasets pattern="None.(?P&lt;designation&gt;.+_vs_.+)" format="tabular" directory="." visible="false"/>
         </collection>
-        <data format="pdf" name="plots" label="DESeq2 plots on ${on_string}">
+        <data name="plots" format="pdf" label="DESeq2 plots on ${on_string}">
             <filter>pdf == True</filter>
         </data>
-        <data format="tabular" name="counts_out" label="Normalized counts file on ${on_string}">
+        <data name="counts_out" format="tabular" label="Normalized counts file on ${on_string}">
             <filter>normCounts == True</filter>
         </data>
-        <data format="tabular" name="rlog_out" label="rLog-Normalized counts file on ${on_string}">
+        <data name="rlog_out" format="tabular" label="rLog-Normalized counts file on ${on_string}">
             <filter>normRLog == True</filter>
         </data>
-        <data format="tabular" name="vst_out" label="VST-Normalized counts file on ${on_string}">
+        <data name="vst_out" format="tabular" label="VST-Normalized counts file on ${on_string}">
             <filter>normVST == True</filter>
         </data>
     </outputs>
@@ -251,7 +256,7 @@
             </output>
             <output name="deseq_out" >
                 <assert_contents>
-                    <has_text_matching expression="FBgn0003360\t1933.9504.*\t-2.8399.*\t0.1309.*-21.6851.*2.831.*8.024" />
+                    <has_text_matching expression="FBgn0003360\t1933\.9504.*\t-2\.8399.*\t0\.1309.*\t-21\.68.*\t.*e-104\t.*e-101" />
                 </assert_contents>
             </output>
         </test>
@@ -315,7 +320,7 @@
             </output>
             <output name="deseq_out" >
                 <assert_contents>
-                    <has_text_matching expression="FBgn0003360\t1933.9504.*\t-2.8399.*\t0.1309.*-21.6851.*2.831.*8.024" />
+                    <has_text_matching expression="FBgn0003360\t1933\.9504.*\t-2\.8399.*\t0\.1309.*\t-21\.68.*\t.*e-104\t.*e-101" />
                 </assert_contents>
             </output>
         </test>
@@ -339,7 +344,31 @@
             <param name="tabular_file" value="tx2gene.tab"/>
             <output name="deseq_out" >
                 <assert_contents>
-                    <has_text_matching expression="MIR6859-2\t1.1858.*\t-1.5832.*\t1.2956.*\t-1.2219.*\t0.2217.*\t0.8868.*" />
+                    <has_text_matching expression="UGT3A2\t1.8841.*\t-0.1329.*\t0.6936.*\t-0.1917.*\t0.8479.*\t0.9999.*" />
+                </assert_contents>
+            </output>
+        </test>
+        <!--Ensure Sailfish/Salmon input with GFF3 annotation works-->
+        <test expect_num_outputs="1">
+            <repeat name="rep_factorName">
+                <param name="factorName" value="Treatment"/>
+                <repeat name="rep_factorLevel">
+                    <param name="factorLevel" value="Treated"/>
+                    <param name="countsFile" value="sailfish/sailfish_quant.sf1.tab,sailfish/sailfish_quant.sf2.tab,sailfish/sailfish_quant.sf3.tab"/>
+                </repeat>
+                <repeat name="rep_factorLevel">
+                    <param name="factorLevel" value="Untreated"/>
+                    <param name="countsFile" value="sailfish/sailfish_quant.sf4.tab,sailfish/sailfish_quant.sf5.tab,sailfish/sailfish_quant.sf6.tab"/>
+                </repeat>
+            </repeat>
+            <param name="pdf" value="False"/>
+            <param name="tximport_selector" value="tximport"/>
+            <param name="txtype" value="sailfish"/>
+            <param name="mapping_format_selector" value="gtf"/>
+            <param name="gtf_file" value="GRCh38_latest_genomic.gff"/>
+            <output name="deseq_out" >
+                <assert_contents>
+                    <has_text_matching expression="UGT3A2\t1.8841.*\t-0.1329.*\t0.6936.*\t-0.1917.*\t0.8479.*\t0.9999.*" />
                 </assert_contents>
             </output>
         </test>
--- a/get_deseq_dataset.R	Fri Nov 16 14:47:19 2018 -0500
+++ b/get_deseq_dataset.R	Tue Dec 04 08:19:06 2018 -0500
@@ -9,11 +9,11 @@
   }
 
   if (!is.null(tximport)) {
-    if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF file is required for tximport")
-    if (tolower(file_ext(opt$tx2gene)) == "gtf") {
-      gtfFile <-tx2gene
+    if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF/GFF3 file is required for tximport")
+    if (tolower(file_ext(opt$tx2gene)) == "gff") {
+      gffFile <-tx2gene
     } else {
-      gtfFile <- NULL
+      gffFile <- NULL
       tx2gene <- read.table(tx2gene, header=FALSE)
     }
     useTXI <- TRUE
@@ -45,22 +45,26 @@
 
   } else {
       # construct the object using tximport
-      # first need to make the tx2gene table
-      # this takes ~2-3 minutes using Bioconductor functions
-      if (!is.null(gtfFile)) {
-        suppressPackageStartupMessages({
-          library("GenomicFeatures")
-        })
-        txdb <- makeTxDbFromGFF(gtfFile, format="gtf")
-        k <- keys(txdb, keytype = "GENEID")
-        df <- select(txdb, keys = k, keytype = "GENEID", columns = "TXNAME")
-        tx2gene <- df[, 2:1]  # tx ID, then gene ID
-      }
       library("tximport")
       txiFiles <- as.character(sampleTable$filename)
       labs <- row.names(sampleTable)
       names(txiFiles) <- labs
-      txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene)
+      if (!is.null(gffFile)) {
+        # first need to make the tx2gene table
+        # this takes ~2-3 minutes using Bioconductor functions
+        suppressPackageStartupMessages({
+          library("GenomicFeatures")
+        })
+        txdb <- makeTxDbFromGFF(gffFile)
+        k <- keys(txdb, keytype = "TXNAME")
+        tx2gene <- select(txdb, k, "GENEID", "TXNAME")
+      }
+      try(txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene))
+      if (!exists("txi")) {
+        # Remove version from transcript IDs
+        tx2gene$TXNAME <- sub('\\.[0-9]+', '', tx2gene$TXNAME)
+        txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene)
+      }
       dds <- DESeqDataSetFromTximport(txi,
                                       subset(sampleTable, select=-c(filename)),
                                       designFormula)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/GRCh38_latest_genomic.gff	Tue Dec 04 08:19:06 2018 -0500
@@ -0,0 +1,86 @@
+##gff-version 3
+#!gff-spec-version 1.21
+#!processor NCBI annotwriter
+#!genome-build GRCh38.p12
+#!genome-build-accession NCBI_Assembly:GCF_000001405.38
+#!annotation-source NCBI Homo sapiens Annotation Release 109
+# Trimmed version of ftp://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.gff.gz
+##sequence-region NC_000005.10 1 181538259
+##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606
+NC_000005.10	RefSeq	region	1	181538259	.	+	.	ID=id565344;Dbxref=taxon:9606;Name=5;chromosome=5;gbkey=Src;genome=chromosome;mol_type=genomic DNA
+NC_000005.10	BestRefSeq%2CGnomon	gene	36035017	36071358	.	-	.	ID=gene14857;Dbxref=GeneID:167127,HGNC:HGNC:27266,MIM:616384;Name=UGT3A2;description=UDP glycosyltransferase family 3 member A2;gbkey=Gene;gene=UGT3A2;gene_biotype=protein_coding
+NC_000005.10	BestRefSeq	mRNA	36035017	36066921	.	-	.	ID=rna45581;Parent=gene14857;Dbxref=GeneID:167127,Genbank:NM_001168316.1,HGNC:HGNC:27266,MIM:616384;Name=NM_001168316.1;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 2;transcript_id=NM_001168316.1
+NC_000005.10	BestRefSeq	exon	36066696	36066921	.	-	.	ID=id576076;Parent=rna45581;Dbxref=GeneID:167127,Genbank:NM_001168316.1,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 2;transcript_id=NM_001168316.1
+NC_000005.10	BestRefSeq	exon	36051870	36051984	.	-	.	ID=id576077;Parent=rna45581;Dbxref=GeneID:167127,Genbank:NM_001168316.1,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 2;transcript_id=NM_001168316.1
+NC_000005.10	BestRefSeq	exon	36048889	36049420	.	-	.	ID=id576078;Parent=rna45581;Dbxref=GeneID:167127,Genbank:NM_001168316.1,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 2;transcript_id=NM_001168316.1
+NC_000005.10	BestRefSeq	exon	36039477	36039708	.	-	.	ID=id576079;Parent=rna45581;Dbxref=GeneID:167127,Genbank:NM_001168316.1,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 2;transcript_id=NM_001168316.1
+NC_000005.10	BestRefSeq	exon	36037797	36038016	.	-	.	ID=id576080;Parent=rna45581;Dbxref=GeneID:167127,Genbank:NM_001168316.1,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 2;transcript_id=NM_001168316.1
+NC_000005.10	BestRefSeq	exon	36035017	36035974	.	-	.	ID=id576081;Parent=rna45581;Dbxref=GeneID:167127,Genbank:NM_001168316.1,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 2;transcript_id=NM_001168316.1
+NC_000005.10	BestRefSeq	transcript	36035017	36066921	.	-	.	ID=rna45582;Parent=gene14857;Dbxref=GeneID:167127,Genbank:NR_031764.1,HGNC:HGNC:27266,MIM:616384;Name=NR_031764.1;gbkey=misc_RNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 3;transcript_id=NR_031764.1
+NC_000005.10	BestRefSeq	exon	36066696	36066921	.	-	.	ID=id576082;Parent=rna45582;Dbxref=GeneID:167127,Genbank:NR_031764.1,HGNC:HGNC:27266,MIM:616384;gbkey=misc_RNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 3;transcript_id=NR_031764.1
+NC_000005.10	BestRefSeq	exon	36064249	36064350	.	-	.	ID=id576083;Parent=rna45582;Dbxref=GeneID:167127,Genbank:NR_031764.1,HGNC:HGNC:27266,MIM:616384;gbkey=misc_RNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 3;transcript_id=NR_031764.1
+NC_000005.10	BestRefSeq	exon	36051870	36051984	.	-	.	ID=id576084;Parent=rna45582;Dbxref=GeneID:167127,Genbank:NR_031764.1,HGNC:HGNC:27266,MIM:616384;gbkey=misc_RNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 3;transcript_id=NR_031764.1
+NC_000005.10	BestRefSeq	exon	36039477	36039708	.	-	.	ID=id576085;Parent=rna45582;Dbxref=GeneID:167127,Genbank:NR_031764.1,HGNC:HGNC:27266,MIM:616384;gbkey=misc_RNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 3;transcript_id=NR_031764.1
+NC_000005.10	BestRefSeq	exon	36037797	36038016	.	-	.	ID=id576086;Parent=rna45582;Dbxref=GeneID:167127,Genbank:NR_031764.1,HGNC:HGNC:27266,MIM:616384;gbkey=misc_RNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 3;transcript_id=NR_031764.1
+NC_000005.10	BestRefSeq	exon	36035017	36035974	.	-	.	ID=id576087;Parent=rna45582;Dbxref=GeneID:167127,Genbank:NR_031764.1,HGNC:HGNC:27266,MIM:616384;gbkey=misc_RNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 3;transcript_id=NR_031764.1
+NC_000005.10	BestRefSeq	mRNA	36035017	36066921	.	-	.	ID=rna45583;Parent=gene14857;Dbxref=GeneID:167127,Genbank:NM_174914.3,HGNC:HGNC:27266,MIM:616384;Name=NM_174914.3;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 1;transcript_id=NM_174914.3
+NC_000005.10	BestRefSeq	exon	36066696	36066921	.	-	.	ID=id576088;Parent=rna45583;Dbxref=GeneID:167127,Genbank:NM_174914.3,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 1;transcript_id=NM_174914.3
+NC_000005.10	BestRefSeq	exon	36064249	36064350	.	-	.	ID=id576089;Parent=rna45583;Dbxref=GeneID:167127,Genbank:NM_174914.3,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 1;transcript_id=NM_174914.3
+NC_000005.10	BestRefSeq	exon	36051870	36051984	.	-	.	ID=id576090;Parent=rna45583;Dbxref=GeneID:167127,Genbank:NM_174914.3,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 1;transcript_id=NM_174914.3
+NC_000005.10	BestRefSeq	exon	36048889	36049420	.	-	.	ID=id576091;Parent=rna45583;Dbxref=GeneID:167127,Genbank:NM_174914.3,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 1;transcript_id=NM_174914.3
+NC_000005.10	BestRefSeq	exon	36039477	36039708	.	-	.	ID=id576092;Parent=rna45583;Dbxref=GeneID:167127,Genbank:NM_174914.3,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 1;transcript_id=NM_174914.3
+NC_000005.10	BestRefSeq	exon	36037797	36038016	.	-	.	ID=id576093;Parent=rna45583;Dbxref=GeneID:167127,Genbank:NM_174914.3,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 1;transcript_id=NM_174914.3
+NC_000005.10	BestRefSeq	exon	36035017	36035974	.	-	.	ID=id576094;Parent=rna45583;Dbxref=GeneID:167127,Genbank:NM_174914.3,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 1;transcript_id=NM_174914.3
+##sequence-region NC_000012.12 1 133275309
+##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606
+NC_000012.12	RefSeq	region	1	133275309	.	+	.	ID=id1163836;Dbxref=taxon:9606;Name=12;chromosome=12;gbkey=Src;genome=chromosome;mol_type=genomic DNA
+NC_000012.12	BestRefSeq	gene	53938792	53946544	.	+	.	ID=gene33473;Dbxref=GeneID:3229,HGNC:HGNC:5125,MIM:142976;Name=HOXC13;description=homeobox C13;gbkey=Gene;gene=HOXC13;gene_biotype=protein_coding;gene_synonym=ECTD9,HOX3,HOX3G
+NC_000012.12	BestRefSeq	mRNA	53938792	53946544	.	+	.	ID=rna100330;Parent=gene33473;Dbxref=GeneID:3229,Genbank:NM_017410.2,HGNC:HGNC:5125,MIM:142976;Name=NM_017410.2;gbkey=mRNA;gene=HOXC13;product=homeobox C13;transcript_id=NM_017410.2
+NC_000012.12	BestRefSeq	exon	53938792	53939642	.	+	.	ID=id1209110;Parent=rna100330;Dbxref=GeneID:3229,Genbank:NM_017410.2,HGNC:HGNC:5125,MIM:142976;gbkey=mRNA;gene=HOXC13;product=homeobox C13;transcript_id=NM_017410.2
+NC_000012.12	BestRefSeq	exon	53945000	53946544	.	+	.	ID=id1209111;Parent=rna100330;Dbxref=GeneID:3229,Genbank:NM_017410.2,HGNC:HGNC:5125,MIM:142976;gbkey=mRNA;gene=HOXC13;product=homeobox C13;transcript_id=NM_017410.2
+NC_000012.12	BestRefSeq	gene	53954868	53956606	.	+	.	ID=gene33475;Dbxref=GeneID:3228,HGNC:HGNC:5124,MIM:142975;Name=HOXC12;description=homeobox C12;gbkey=Gene;gene=HOXC12;gene_biotype=protein_coding;gene_synonym=HOC3F,HOX3,HOX3F
+NC_000012.12	BestRefSeq	mRNA	53954868	53956606	.	+	.	ID=rna100332;Parent=gene33475;Dbxref=GeneID:3228,Genbank:NM_173860.2,HGNC:HGNC:5124,MIM:142975;Name=NM_173860.2;gbkey=mRNA;gene=HOXC12;product=homeobox C12;transcript_id=NM_173860.2
+NC_000012.12	BestRefSeq	exon	53954868	53955539	.	+	.	ID=id1209115;Parent=rna100332;Dbxref=GeneID:3228,Genbank:NM_173860.2,HGNC:HGNC:5124,MIM:142975;gbkey=mRNA;gene=HOXC12;product=homeobox C12;transcript_id=NM_173860.2
+NC_000012.12	BestRefSeq	exon	53956328	53956606	.	+	.	ID=id1209116;Parent=rna100332;Dbxref=GeneID:3228,Genbank:NM_173860.2,HGNC:HGNC:5124,MIM:142975;gbkey=mRNA;gene=HOXC12;product=homeobox C12;transcript_id=NM_173860.2
+NC_000012.12	BestRefSeq	gene	53973126	53976419	.	+	.	ID=gene33477;Dbxref=GeneID:3227,HGNC:HGNC:5123,MIM:605559;Name=HOXC11;description=homeobox C11;gbkey=Gene;gene=HOXC11;gene_biotype=protein_coding;gene_synonym=HOX3H
+NC_000012.12	BestRefSeq	mRNA	53973126	53976419	.	+	.	ID=rna100336;Parent=gene33477;Dbxref=GeneID:3227,Genbank:NM_014212.3,HGNC:HGNC:5123,MIM:605559;Name=NM_014212.3;Note=The RefSeq transcript has 1 non-frameshifting indel compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=HOXC11;inference=similar to RNA sequence%2C mRNA (same species):RefSeq:NM_014212.3;product=homeobox C11;transcript_id=NM_014212.3
+NC_000012.12	BestRefSeq	exon	53973126	53973923	.	+	.	ID=id1209133;Parent=rna100336;Dbxref=GeneID:3227,Genbank:NM_014212.3,HGNC:HGNC:5123,MIM:605559;Note=The RefSeq transcript has 1 non-frameshifting indel compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=HOXC11;inference=similar to RNA sequence%2C mRNA (same species):RefSeq:NM_014212.3;product=homeobox C11;transcript_id=NM_014212.3
+NC_000012.12	BestRefSeq	exon	53975181	53976419	.	+	.	ID=id1209134;Parent=rna100336;Dbxref=GeneID:3227,Genbank:NM_014212.3,HGNC:HGNC:5123,MIM:605559;Note=The RefSeq transcript has 1 non-frameshifting indel compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=HOXC11;inference=similar to RNA sequence%2C mRNA (same species):RefSeq:NM_014212.3;product=homeobox C11;transcript_id=NM_014212.3
+NC_000012.12	BestRefSeq	gene	53985162	53990279	.	+	.	ID=gene33479;Dbxref=GeneID:3226,HGNC:HGNC:5122,MIM:605560;Name=HOXC10;description=homeobox C10;gbkey=Gene;gene=HOXC10;gene_biotype=protein_coding;gene_synonym=HOX3I
+NC_000012.12	BestRefSeq	mRNA	53985162	53990279	.	+	.	ID=rna100338;Parent=gene33479;Dbxref=GeneID:3226,Genbank:NM_017409.3,HGNC:HGNC:5122,MIM:605560;Name=NM_017409.3;gbkey=mRNA;gene=HOXC10;product=homeobox C10;transcript_id=NM_017409.3
+NC_000012.12	BestRefSeq	exon	53985162	53986010	.	+	.	ID=id1209144;Parent=rna100338;Dbxref=GeneID:3226,Genbank:NM_017409.3,HGNC:HGNC:5122,MIM:605560;gbkey=mRNA;gene=HOXC10;product=homeobox C10;transcript_id=NM_017409.3
+NC_000012.12	BestRefSeq	exon	53989169	53990279	.	+	.	ID=id1209145;Parent=rna100338;Dbxref=GeneID:3226,Genbank:NM_017409.3,HGNC:HGNC:5122,MIM:605560;gbkey=mRNA;gene=HOXC10;product=homeobox C10;transcript_id=NM_017409.3
+NC_000012.12	BestRefSeq	gene	54000119	54003337	.	+	.	ID=gene33483;Dbxref=GeneID:3225,HGNC:HGNC:5130,MIM:142971;Name=HOXC9;description=homeobox C9;gbkey=Gene;gene=HOXC9;gene_biotype=protein_coding;gene_synonym=HOX3,HOX3B
+NC_000012.12	BestRefSeq	mRNA	54000119	54003337	.	+	.	ID=rna100344;Parent=gene33483;Dbxref=GeneID:3225,Genbank:NM_006897.2,HGNC:HGNC:5130,MIM:142971;Name=NM_006897.2;gbkey=mRNA;gene=HOXC9;product=homeobox C9;transcript_id=NM_006897.2
+NC_000012.12	BestRefSeq	exon	54000119	54000726	.	+	.	ID=id1209154;Parent=rna100344;Dbxref=GeneID:3225,Genbank:NM_006897.2,HGNC:HGNC:5130,MIM:142971;gbkey=mRNA;gene=HOXC9;product=homeobox C9;transcript_id=NM_006897.2
+NC_000012.12	BestRefSeq	exon	54002430	54003337	.	+	.	ID=id1209155;Parent=rna100344;Dbxref=GeneID:3225,Genbank:NM_006897.2,HGNC:HGNC:5130,MIM:142971;gbkey=mRNA;gene=HOXC9;product=homeobox C9;transcript_id=NM_006897.2
+NC_000012.12	BestRefSeq	gene	54009106	54012763	.	+	.	ID=gene33485;Dbxref=GeneID:3224,HGNC:HGNC:5129,MIM:142970;Name=HOXC8;description=homeobox C8;gbkey=Gene;gene=HOXC8;gene_biotype=protein_coding;gene_synonym=HOX3,HOX3A
+NC_000012.12	BestRefSeq	mRNA	54009106	54012763	.	+	.	ID=rna100346;Parent=gene33485;Dbxref=GeneID:3224,Genbank:NM_022658.3,HGNC:HGNC:5129,MIM:142970;Name=NM_022658.3;gbkey=mRNA;gene=HOXC8;product=homeobox C8;transcript_id=NM_022658.3
+NC_000012.12	BestRefSeq	exon	54009106	54009720	.	+	.	ID=id1209158;Parent=rna100346;Dbxref=GeneID:3224,Genbank:NM_022658.3,HGNC:HGNC:5129,MIM:142970;gbkey=mRNA;gene=HOXC8;product=homeobox C8;transcript_id=NM_022658.3
+NC_000012.12	BestRefSeq	exon	54011089	54012763	.	+	.	ID=id1209159;Parent=rna100346;Dbxref=GeneID:3224,Genbank:NM_022658.3,HGNC:HGNC:5129,MIM:142970;gbkey=mRNA;gene=HOXC8;product=homeobox C8;transcript_id=NM_022658.3
+NC_000012.12	BestRefSeq	gene	54016852	54056030	.	+	.	ID=gene33486;Dbxref=GeneID:3221,HGNC:HGNC:5126,MIM:142974;Name=HOXC4;description=homeobox C4;gbkey=Gene;gene=HOXC4;gene_biotype=protein_coding;gene_synonym=cp19,HOX3,HOX3E
+NC_000012.12	BestRefSeq	mRNA	54016852	54056030	.	+	.	ID=rna100347;Parent=gene33486;Dbxref=GeneID:3221,Genbank:NM_014620.5,HGNC:HGNC:5126,MIM:142974;Name=NM_014620.5;gbkey=mRNA;gene=HOXC4;product=homeobox C4%2C transcript variant 1;transcript_id=NM_014620.5
+NC_000012.12	BestRefSeq	exon	54016852	54017414	.	+	.	ID=id1209160;Parent=rna100347;Dbxref=GeneID:3221,Genbank:NM_014620.5,HGNC:HGNC:5126,MIM:142974;gbkey=mRNA;gene=HOXC4;product=homeobox C4%2C transcript variant 1;transcript_id=NM_014620.5
+NC_000012.12	BestRefSeq	exon	54053160	54053276	.	+	.	ID=id1209161;Parent=rna100347;Dbxref=GeneID:3221,Genbank:NM_014620.5,HGNC:HGNC:5126,MIM:142974;gbkey=mRNA;gene=HOXC4;product=homeobox C4%2C transcript variant 1;transcript_id=NM_014620.5
+NC_000012.12	BestRefSeq	exon	54053917	54054361	.	+	.	ID=id1209162;Parent=rna100347;Dbxref=GeneID:3221,Genbank:NM_014620.5,HGNC:HGNC:5126,MIM:142974;gbkey=mRNA;gene=HOXC4;product=homeobox C4%2C transcript variant 1;transcript_id=NM_014620.5
+NC_000012.12	BestRefSeq	exon	54054850	54056030	.	+	.	ID=id1209163;Parent=rna100347;Dbxref=GeneID:3221,Genbank:NM_014620.5,HGNC:HGNC:5126,MIM:142974;gbkey=mRNA;gene=HOXC4;product=homeobox C4%2C transcript variant 1;transcript_id=NM_014620.5
+NC_000012.12	BestRefSeq	mRNA	54053877	54056030	.	+	.	ID=rna100348;Parent=gene33486;Dbxref=GeneID:3221,Genbank:NM_153633.2,HGNC:HGNC:5126,MIM:142974;Name=NM_153633.2;gbkey=mRNA;gene=HOXC4;product=homeobox C4%2C transcript variant 2;transcript_id=NM_153633.2
+NC_000012.12	BestRefSeq	exon	54053877	54054361	.	+	.	ID=id1209164;Parent=rna100348;Dbxref=GeneID:3221,Genbank:NM_153633.2,HGNC:HGNC:5126,MIM:142974;gbkey=mRNA;gene=HOXC4;product=homeobox C4%2C transcript variant 2;transcript_id=NM_153633.2
+NC_000012.12	BestRefSeq	exon	54054850	54056030	.	+	.	ID=id1209165;Parent=rna100348;Dbxref=GeneID:3221,Genbank:NM_153633.2,HGNC:HGNC:5126,MIM:142974;gbkey=mRNA;gene=HOXC4;product=homeobox C4%2C transcript variant 2;transcript_id=NM_153633.2
+NC_000012.12	BestRefSeq	gene	54016852	54035361	.	+	.	ID=gene33487;Dbxref=GeneID:3222,HGNC:HGNC:5127,MIM:142973;Name=HOXC5;description=homeobox C5;gbkey=Gene;gene=HOXC5;gene_biotype=protein_coding;gene_synonym=CP11,HOX3,HOX3D
+NC_000012.12	BestRefSeq	transcript	54016852	54035361	.	+	.	ID=rna100349;Parent=gene33487;Dbxref=GeneID:3222,Genbank:NR_003084.2,HGNC:HGNC:5127,MIM:142973;Name=NR_003084.2;gbkey=misc_RNA;gene=HOXC5;product=homeobox C5%2C transcript variant 2;transcript_id=NR_003084.2
+NC_000012.12	BestRefSeq	exon	54016852	54017414	.	+	.	ID=id1209166;Parent=rna100349;Dbxref=GeneID:3222,Genbank:NR_003084.2,HGNC:HGNC:5127,MIM:142973;gbkey=misc_RNA;gene=HOXC5;product=homeobox C5%2C transcript variant 2;transcript_id=NR_003084.2
+NC_000012.12	BestRefSeq	exon	54034278	54035361	.	+	.	ID=id1209167;Parent=rna100349;Dbxref=GeneID:3222,Genbank:NR_003084.2,HGNC:HGNC:5127,MIM:142973;gbkey=misc_RNA;gene=HOXC5;product=homeobox C5%2C transcript variant 2;transcript_id=NR_003084.2
+NC_000012.12	BestRefSeq	mRNA	54033048	54035361	.	+	.	ID=rna100350;Parent=gene33487;Dbxref=GeneID:3222,Genbank:NM_018953.3,HGNC:HGNC:5127,MIM:142973;Name=NM_018953.3;gbkey=mRNA;gene=HOXC5;product=homeobox C5%2C transcript variant 1;transcript_id=NM_018953.3
+NC_000012.12	BestRefSeq	exon	54033048	54033576	.	+	.	ID=id1209168;Parent=rna100350;Dbxref=GeneID:3222,Genbank:NM_018953.3,HGNC:HGNC:5127,MIM:142973;gbkey=mRNA;gene=HOXC5;product=homeobox C5%2C transcript variant 1;transcript_id=NM_018953.3
+NC_000012.12	BestRefSeq	exon	54034278	54035361	.	+	.	ID=id1209169;Parent=rna100350;Dbxref=GeneID:3222,Genbank:NM_018953.3,HGNC:HGNC:5127,MIM:142973;gbkey=mRNA;gene=HOXC5;product=homeobox C5%2C transcript variant 1;transcript_id=NM_018953.3
+NC_000012.12	BestRefSeq	gene	54016852	54030823	.	+	.	ID=gene33488;Dbxref=GeneID:3223,HGNC:HGNC:5128,MIM:142972;Name=HOXC6;description=homeobox C6;gbkey=Gene;gene=HOXC6;gene_biotype=protein_coding;gene_synonym=CP25,HHO.C8,HOX3,HOX3C
+NC_000012.12	BestRefSeq	mRNA	54016852	54030823	.	+	.	ID=rna100351;Parent=gene33488;Dbxref=GeneID:3223,Genbank:NM_153693.4,HGNC:HGNC:5128,MIM:142972;Name=NM_153693.4;gbkey=mRNA;gene=HOXC6;product=homeobox C6%2C transcript variant 2;transcript_id=NM_153693.4
+NC_000012.12	BestRefSeq	exon	54016852	54017414	.	+	.	ID=id1209172;Parent=rna100351;Dbxref=GeneID:3223,Genbank:NM_153693.4,HGNC:HGNC:5128,MIM:142972;gbkey=mRNA;gene=HOXC6;product=homeobox C6%2C transcript variant 2;transcript_id=NM_153693.4
+NC_000012.12	BestRefSeq	exon	54028576	54028921	.	+	.	ID=id1209173;Parent=rna100351;Dbxref=GeneID:3223,Genbank:NM_153693.4,HGNC:HGNC:5128,MIM:142972;gbkey=mRNA;gene=HOXC6;product=homeobox C6%2C transcript variant 2;transcript_id=NM_153693.4
+NC_000012.12	BestRefSeq	exon	54029655	54030823	.	+	.	ID=id1209174;Parent=rna100351;Dbxref=GeneID:3223,Genbank:NM_153693.4,HGNC:HGNC:5128,MIM:142972;gbkey=mRNA;gene=HOXC6;product=homeobox C6%2C transcript variant 2;transcript_id=NM_153693.4
+NC_000012.12	BestRefSeq	mRNA	54028410	54030823	.	+	.	ID=rna100352;Parent=gene33488;Dbxref=GeneID:3223,Genbank:NM_004503.3,HGNC:HGNC:5128,MIM:142972;Name=NM_004503.3;gbkey=mRNA;gene=HOXC6;product=homeobox C6%2C transcript variant 1;transcript_id=NM_004503.3
+NC_000012.12	BestRefSeq	exon	54028410	54028921	.	+	.	ID=id1209175;Parent=rna100352;Dbxref=GeneID:3223,Genbank:NM_004503.3,HGNC:HGNC:5128,MIM:142972;gbkey=mRNA;gene=HOXC6;product=homeobox C6%2C transcript variant 1;transcript_id=NM_004503.3
+NC_000012.12	BestRefSeq	exon	54029655	54030823	.	+	.	ID=id1209176;Parent=rna100352;Dbxref=GeneID:3223,Genbank:NM_004503.3,HGNC:HGNC:5128,MIM:142972;gbkey=mRNA;gene=HOXC6;product=homeobox C6%2C transcript variant 1;transcript_id=NM_004503.3
+NC_000012.12	RefSeq	cDNA_match	53973126	53973923	798	+	.	ID=46c4f9c5-f2cf-415e-a892-fbd053a8f7eb;Target=NM_014212.3 1 798 +;assembly_bases_aln=152;assembly_bases_seq=152;consensus_splices=2;exon_identity=0.991241;for_remapping=2;gap_count=1;identity=0.991241;idty=1;matches=2037;num_ident=2037;num_mismatch=0;pct_coverage=99.1241;pct_coverage_hiqual=99.1241;pct_identity_gap=99.1241;pct_identity_ungap=100;product_coverage=1;rank=1;splices=2;weighted_identity=0.991461
+NC_000012.12	RefSeq	cDNA_match	53975181	53976419	1232.82	+	.	ID=46c4f9c5-f2cf-415e-a892-fbd053a8f7eb;Target=NM_014212.3 799 2055 +;assembly_bases_aln=152;assembly_bases_seq=152;consensus_splices=2;exon_identity=0.991241;for_remapping=2;gap_count=1;identity=0.991241;idty=0.98568;matches=2037;num_ident=2037;num_mismatch=0;pct_coverage=99.1241;pct_coverage_hiqual=99.1241;pct_identity_gap=99.1241;pct_identity_ungap=100;product_coverage=1;rank=1;splices=2;weighted_identity=0.991461;Gap=M705 I18 M534
--- a/test-data/tx2gene.tab	Fri Nov 16 14:47:19 2018 -0500
+++ b/test-data/tx2gene.tab	Tue Dec 04 08:19:06 2018 -0500
@@ -1,16 +1,16 @@
 TXNAME	GENEID
-NM_001168316	DDX11L1
-NM_174914	DDX11L1
-NR_031764	DDX11L1
-NM_004503	WASH7P
-NM_006897	WASH7P
-NM_014212	WASH7P
-NM_014620	WASH7P
-NM_017409	WASH7P
-NM_017410	WASH7P
-NM_018953	MIR6859-2
-NM_022658	MIR6859-1
-NM_153633	WASH7P
-NM_153693	WASH7P
-NM_173860	WASH7P
-NR_003084	WASH7P
+NM_001168316	UGT3A2
+NM_174914	UGT3A2
+NR_031764	UGT3A2
+NM_004503	HOXC6
+NM_006897	HOXC9
+NM_014212	HOXC11
+NM_014620	HOXC4
+NM_017409	HOXC10
+NM_017410	HOXC13
+NM_018953	HOXC5
+NM_022658	HOXC8
+NM_153633	HOXC4
+NM_153693	HOXC6
+NM_173860	HOXC12
+NR_003084	HOXC5