Repository 'length_and_gc_content'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/length_and_gc_content

Changeset 2:e3ba567abdf5 (2022-03-11)
Previous changeset 1:f088370d2a3c (2018-01-28)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/length_and_gc_content commit 7b6b07d22f3e6fed77b2c237de2b0d96fa939711"
modified:
get_length_and_gc_content.r
get_length_and_gc_content.xml
test-data/cached_locally/ref.fasta
test-data/cached_locally/ref.gtf
test-data/gc.tab
test-data/in.fasta
test-data/in.gtf
test-data/length.tab
added:
macros.xml
b
diff -r f088370d2a3c -r e3ba567abdf5 get_length_and_gc_content.r
--- a/get_length_and_gc_content.r Sun Jan 28 04:04:58 2018 -0500
+++ b/get_length_and_gc_content.r Fri Mar 11 14:08:11 2022 +0000
[
@@ -1,59 +1,94 @@
 # originally by Devon Ryan, https://www.biostars.org/p/84467/
 
-options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+options(show.error.messages = F,
+        error = function() {
+          cat(geterrmessage(), file = stderr())
+          q("no", 1, F)
+        })
 
 # we need that to not crash galaxy with an UTF8 error on German LC settings.
 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
 
 suppressPackageStartupMessages({
-    library("GenomicRanges")
-    library("rtracklayer")
-    library("Rsamtools")
-    library("optparse")
-    library("data.table")
+  library("GenomicRanges")
+  library("rtracklayer")
+  library("Rsamtools")
+  library("optparse")
+  library("data.table")
 })
 
 option_list <- list(
-    make_option(c("-g","--gtf"), type="character", help="Input GTF file with gene / exon information."),
-    make_option(c("-f","--fasta"), type="character", default=FALSE, help="FASTA file that corresponds to the supplied GTF."),
-    make_option(c("-l","--length"), type="character", default=FALSE, help="Output file with Gene ID and length."),
-    make_option(c("-gc","--gc_content"), type="character", default=FALSE, help="Output file with Gene ID and GC content.")
-  )
+  make_option(c("-g", "--gtf"), type = "character",
+              help = "Input gtf file with gene / exon information."),
+  make_option(c("-f", "--fasta"), type = "character", default = NULL,
+              help = "fasta file that corresponds to the supplied gtf."),
+  make_option(c("-l", "--length"), type = "character", default = NULL,
+              help = "Output file with Gene ID and length."),
+  make_option(c("-c", "--gc_content"), type = "character", default = NULL,
+              help = "Output file with Gene ID and GC content.")
+)
 
-parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
-args = parse_args(parser)
+parser <- OptionParser(usage = "%prog [options] file",
+                       option_list = option_list)
+args <- parse_args(parser)
 
-GTFfile = args$gtf
-FASTAfile = args$fasta
-length = args$length
-gc_content = args$gc_content
+gtf_file <- args$gtf
+fasta_file <- args$fasta
+length <- args$length
+gc_content <- args$gc_content
+
+# Check args:
+if (is.null(fasta_file) & !is.null(gc_content)) {
+  stop("gc_content output requires fasta input")
+}
+if (is.null(length) & is.null(gc_content)) {
+  stop("neither gc_content nor length was set nothing to do.")
+}
 
 #Load the annotation and reduce it
-GTF <- import.gff(GTFfile, format="gtf", genome=NA, feature.type="exon")
-grl <- reduce(split(GTF, elementMetadata(GTF)$gene_id))
-reducedGTF <- unlist(grl, use.names=T)
-elementMetadata(reducedGTF)$gene_id <- rep(names(grl), elementNROWS(grl))
+gtf <- import.gff(gtf_file, format = "gtf", genome = NA, feature.type = "exon")
+grl <- reduce(split(gtf, elementMetadata(gtf)$gene_id))
+reduced_gtf <- unlist(grl, use.names = T)
+elementMetadata(reduced_gtf)$gene_id <- rep(names(grl), elementNROWS(grl))
 
-#Open the fasta file
-FASTA <- FaFile(FASTAfile)
-open(FASTA)
+if (! is.null(gc_content)) {
+  #Open the fasta file
+  fasta <- FaFile(fasta_file)
+  open(fasta)
 
-#Add the GC numbers
-elementMetadata(reducedGTF)$nGCs <- letterFrequency(getSeq(FASTA, reducedGTF), "GC")[,1]
-elementMetadata(reducedGTF)$widths <- width(reducedGTF)
+  #Add the GC numbers
+  elementMetadata(reduced_gtf)$n_gcs <-
+    letterFrequency(getSeq(fasta, reduced_gtf), "GC")[, 1]
+}
+elementMetadata(reduced_gtf)$widths <- width(reduced_gtf)
 
 #Create a list of the ensembl_id/GC/length
-calc_GC_length <- function(x) {
-    nGCs = sum(elementMetadata(x)$nGCs)
-    width = sum(elementMetadata(x)$widths)
-    c(width, nGCs/width)
+if (! is.null(gc_content)) {
+  calc_gc_length <- function(x) {
+    n_gcs <- sum(elementMetadata(x)$n_gcs)
+    width <- sum(elementMetadata(x)$widths)
+    c(width, n_gcs / width)
+  }
+  output <- t(sapply(split(reduced_gtf, elementMetadata(reduced_gtf)$gene_id),
+                     calc_gc_length))
+  output <- data.frame(setDT(data.frame(output), keep.rownames = TRUE)[])
+  write.table(output[, c(1, 3)], file = gc_content,
+              col.names = FALSE, row.names = FALSE,
+              quote = FALSE, sep = "\t")
+} else {
+  all_widths <- sapply(split(reduced_gtf, elementMetadata(reduced_gtf)$gene_id),
+                       function(x) {
+                         sum(elementMetadata(x)$widths)
+                        })
+  output <- data.frame(gene_id = names(all_widths),
+                       length = all_widths)
 }
-output <- t(sapply(split(reducedGTF, elementMetadata(reducedGTF)$gene_id), calc_GC_length))
-output <- data.frame(setDT(data.frame(output), keep.rownames = TRUE)[])
 
-
-write.table(output[,c(1,2)], file=length, col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t")
-write.table(output[,c(1,3)], file=gc_content, col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t")
+if (! is.null(length)) {
+  write.table(output[, c(1, 2)], file = length,
+              col.names = FALSE, row.names = FALSE,
+              quote = FALSE, sep = "\t")
+}
 
 
 sessionInfo()
b
diff -r f088370d2a3c -r e3ba567abdf5 get_length_and_gc_content.xml
--- a/get_length_and_gc_content.xml Sun Jan 28 04:04:58 2018 -0500
+++ b/get_length_and_gc_content.xml Fri Mar 11 14:08:11 2022 +0000
[
@@ -1,11 +1,9 @@
-<tool id="length_and_gc_content" name="Gene length and GC content" version="0.1.1">
+<tool id="length_and_gc_content" name="Gene length and GC content" version="0.1.2">
     <description>from GTF and FASTA file</description>
-    <requirements>
-        <requirement type="package" version="1.3.2">r-optparse</requirement>
-        <requirement type="package" version="1.4.2">r-reshape2</requirement>
-        <requirement type="package" version="1.10.4">r-data.table</requirement>
-        <requirement type="package" version="1.34.2">bioconductor-rtracklayer</requirement>
-    </requirements>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
     <stdio>
         <regex match="Execution halted"
                source="both"
@@ -21,7 +19,7 @@
                description="An undefined error occured, please check your input carefully and contact your administrator." />
     </stdio>
     <version_command><![CDATA[
-        echo $(R --version | grep version | grep -v GNU)", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", reshape2 version" $(R --vanilla --slave -e "library(reshape2); cat(sessionInfo()\$otherPkgs\$reshape2\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rtracklayer version" $(R --vanilla --slave -e "library(rtracklayer); cat(sessionInfo()\$otherPkgs\$rtracklayer\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", data.table version" $(R --vanilla --slave -e "library(data.table); cat(sessionInfo()\$otherPkgs\$data.table\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
+        echo $(R --version | grep version | grep -v GNU)", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rtracklayer version" $(R --vanilla --slave -e "library(rtracklayer); cat(sessionInfo()\$otherPkgs\$rtracklayer\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", data.table version" $(R --vanilla --slave -e "library(data.table); cat(sessionInfo()\$otherPkgs\$data.table\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
     ]]></version_command>
     <command><![CDATA[
 
@@ -37,24 +35,24 @@
 
 ## Get FASTA
 
-#if $fasta_file.fastaSource == 'indexed':
-    ln -s '$fasta_file.fasta_pre_installed.fields.path' fasta
-#else:
-    ln -s '$fasta_file.fasta_history' fasta
+#if $analysis.analysis_select != "length":
+    #if $analysis.fasta_file.fastaSource == 'indexed':
+        ln -s '$analysis.fasta_file.fasta_pre_installed.fields.path' fasta &&
+    #else:
+        ln -s '$analysis.fasta_file.fasta_history' fasta &&
+    #end if
 #end if
 
-&&
-
 Rscript '$__tool_directory__/get_length_and_gc_content.r'
 
 --gtf gtf
---fasta fasta
 
-#if $length_out:
+#if $analysis.analysis_select != "gc":
     --length '$length'
 #end if
 
-#if $gc_out:
+#if $analysis.analysis_select != "length":
+    --fasta fasta
     --gc_content '$gc_content'
 #end if
 
@@ -79,39 +77,31 @@
             </when>
         </conditional>
 
-        <conditional name="fasta_file">
-            <param name="fastaSource" type="select" label="Select a built-in FASTA or one from your history" help="Choose history if you don't see the correct FASTA. The FASTA must be the same genome version as the GTF.">
-                <option value="indexed" selected="true">Use a built-in FASTA </option>
-                <option value="history">Use a FASTA from history</option>
+        <conditional name="analysis">
+            <param name="analysis_select" type="select" label="Analysis to perform">
+                <option value="all" selected="true">GC-content and gene lengths</option>
+                <option value="gc">GC-content only</option>
+                <option value="length">gene lengths only</option>
             </param>
-            <when value="indexed">
-                <param name="fasta_pre_installed" type="select" help="Select the FASTA file from a list of pre-installed genomes" label="Select a FASTA file">
-                    <options from_data_table="all_fasta">
-                        <filter type="sort_by" column="2" />
-                    </options>
-                    <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
-                </param>
+            <when value="all">
+                <expand macro="fasta" />
             </when>
-            <when value="history">
-                <param name="fasta_history" type="data" format="fasta" label="Select a FASTA file that matches the supplied GTF file" />
+            <when value="gc">
+                <expand macro="fasta" />
             </when>
+            <when value="length"/>
         </conditional>
-
-
-        <param name="length_out" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Output length file?" help="Default: Yes" />
-        <param name="gc_out" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Output GC content file?" help="Default: Yes" />
-
     </inputs>
 
     <outputs>
         <data name="length" format="tabular" label="Gene length">
-            <filter>length_out is True</filter>
+            <filter>analysis['analysis_select'] != "gc"</filter>
             <actions>
                 <action name="column_names" type="metadata" default="GeneID,Length" />
             </actions>
         </data>
         <data name="gc_content" format="tabular" label="Gene GC content">
-            <filter>gc_out is True</filter>
+            <filter>analysis['analysis_select'] != "length"</filter>
              <actions>
                 <action name="column_names" type="metadata" default="GeneID,GC_content" />
             </actions>
@@ -119,6 +109,8 @@
     </outputs>
 
     <tests>
+        <!-- The gtf file was generated by
+        zcat gencode.v39.basic.annotation.gtf.gz | grep "HOXD" | awk -F "\t" -v OFS="\t" '$0~/HOXD10/ || $0~/HOXD9/ {$1="fake_chr2";$4-=176116521;$5-=176116521; print} -->
         <!-- Ensure length and GC files are output -->
         <test expect_num_outputs="2">
             <param name="gtfSource" value="history" />
@@ -138,15 +130,14 @@
         <!-- Ensure optional gc content works  -->
         <test expect_num_outputs="1">
             <param name="gtfSource" value="cached" />
-            <param name="fastaSource" value="indexed" />
-            <param name="gc_out" value="False" />
+            <param name="analysis_select" value="length" />
             <output name="length" file="length.tab" />
         </test>
         <!-- Ensure optional length works -->
         <test expect_num_outputs="1">
             <param name="gtfSource" value="cached" />
             <param name="fastaSource" value="indexed" />
-            <param name="length_out" value="False" />
+            <param name="analysis_select" value="gc" />
             <output name="gc_content" file="gc.tab" />
         </test>
     </tests>
@@ -156,14 +147,15 @@
 
 .. class:: infomark
 
-This tool calculates the length and GC content for the genes in a GTF file. It requires a FASTA file that is the same genome version as the GTF.
+This tool calculates the length and/or GC content for the genes in a GTF file.
+For the GC content, it requires a FASTA file that is the same genome version as the GTF.
 
 -----
 
 **Inputs**
 
 - a GTF file
-- a FASTA file
+- a FASTA file (if GC content is requested)
 
 -----
 
b
diff -r f088370d2a3c -r e3ba567abdf5 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Fri Mar 11 14:08:11 2022 +0000
b
@@ -0,0 +1,28 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="1.7.1">r-optparse</requirement>
+            <requirement type="package" version="1.14.2">r-data.table</requirement>
+            <requirement type="package" version="1.54.0">bioconductor-rtracklayer</requirement>
+        </requirements>
+    </xml>
+    <xml name="fasta">
+        <conditional name="fasta_file">
+            <param name="fastaSource" type="select" label="Select a built-in FASTA or one from your history" help="Choose history if you don't see the correct FASTA. The FASTA must be the same genome version as the GTF.">
+                <option value="indexed" selected="true">Use a built-in FASTA </option>
+                <option value="history">Use a FASTA from history</option>
+            </param>
+            <when value="indexed">
+                <param name="fasta_pre_installed" type="select" help="Select the FASTA file from a list of pre-installed genomes" label="Select a FASTA file">
+                    <options from_data_table="all_fasta">
+                        <filter type="sort_by" column="2" />
+                    </options>
+                    <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="fasta_history" type="data" format="fasta" label="Select a FASTA file that matches the supplied GTF file" />
+            </when>
+        </conditional>
+    </xml>
+</macros>
b
diff -r f088370d2a3c -r e3ba567abdf5 test-data/cached_locally/ref.fasta
--- a/test-data/cached_locally/ref.fasta Sun Jan 28 04:04:58 2018 -0500
+++ b/test-data/cached_locally/ref.fasta Fri Mar 11 14:08:11 2022 +0000
b
b"@@ -1,2 +1,173 @@\n->1\n-AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGAAAAAAAAAAAAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGAAAAAAAAAATTTTT\n\\ No newline at end of file\n+>fake_chr2 hg38_dna range=chr2:176116522-176125113 5'pad=0 3'pad=0 strand=+ repeatMasking=none\n+TGGGGCGGGCTGGCCGAGCGAGCCCTGGAGAGGCGGACAGGAGGGCGGCG\n+GAGAGCGCTGGGCCGGTTGTCTCCAGCGCGCACTATCGCGGGCGCGTAGT\n+AGATGTCGCTGTTGTCCGTGCTTACCCGGCCGGCCGGCCAGGCTCTGGAG\n+CACGTGACCCGAGAGGAGGCTGCGGCTCAAGGCCATTTTCAAATCTCATT\n+GGCTTGGTTGTCATGTGGTCGGCAGAGGCATCCACAATTACACGGGGAAT\n+GTTTTCCTAGAGATGTCAGCCTACAAAGGACACAATCTCTCTTCTTCAAA\n+TTCTTCCCCAAAATGTCCTTTCCCAACAGCTCTCCTGCTGCTAATACTTT\n+TTTAGTAGATTCCTTGATCAGTGCCTGCAGGAGTGACAGTTTTTATTCCA\n+GCAGCGCCAGCATGTACATGCCACCACCTAGCGCAGACATGGGGACCTAT\n+GGAATGCAAACCTGTGGACTGCTCCCGTCTCTGGCCAAAAGAGAAGTGAA\n+CCACCAAAATATGGGTATGAATGTGCATCCTTATATACCTCAAGTAGACA\n+GTTGGACAGATCCGAACAGATCTTGTCGAATAGAGCAACCTGTTACACAG\n+CAAGTCCCCACTTGCTCCTTCACCACCAACATTAAGGAAGAATCCAATTG\n+CTGCATGTATTCTGATAAGCGCAACAAACTCATTTCGGCCGAGGTCCCTT\n+CGTACCAGAGGCTGGTCCCTGAGTCTTGTCCCGTTGAGAACCCTGAGGTT\n+CCCGTCCCTGGATATTTTAGACTGAGTCAGACCTACGCCACCGGGAAAAC\n+CCAAGAGTACAATAATAGCCCCGAAGGCAGCTCCACTGTCATGCTCCAGC\n+TCAACCCTCGTGGCGCGGCCAAGCCGCAGCTCTCCGCTGCCCAGCTGCAG\n+ATGGAAAAGAAGATGAACGAGCCCGTGAGCGGCCAGGAGCCCACCAAAGT\n+CTCCCAGGTGGAGAGCCCCGAGGCCAAAGGCGGCCTTCCCGAAGAGAGGA\n+GCTGCCTGGCTGAGGTCTCCGTGTCCAGTCCCGAAGTGCAGGAGAAGGAA\n+AGCAAAGGTCGGTATGAGCAGAGTTGCCACCCCAGCGGGGCGCGCAGCCC\n+GGGAACCCGGCAGAGAGGGAGTGCCGGGGTGCCCAGCGCCGAGCCGGAGC\n+CCGACTTGGCAGGTGCTGCTCCGCCTGGTTTTAGAGGGGTGATCTCAGCC\n+CTGAGATAGTCCCCGCTTCTCCCCTGCTGCCCTGGCCCTCTCCGCCAGTC\n+CTGGCCCCACGCTGATGGCGCCCGGGCAGAGGAAAAGCTTGCCGGTTTTA\n+TTTTTCCTGAGCTAGACCTGAACACAACAAAAGAGCGCAAAGGAGACCTG\n+CGGCTCATAAACACGACCACAGAGCCTCTTTTCTCCTGCTCAGATTTGCA\n+GTTCCAGTTTTGCCTTGAGCCCAATGATCATGTTAAGGTGATCCAGGGCA\n+CCGTGTTCGTGTTCAAGTGTATGCACCCCGCATCCTGCGAGCTTGGGGGT\n+GGTGAGGGGAAAGAGATGGCTGGGCTGGTTGGTGCTTGAGTTGGGAAACA\n+GGGCTTACTGCCTTTGCTGGGCTAGGTAACCTTGGCTTTGTTTAGGAAAA\n+GTGCTGCAGTCTTTGCAATCCGTCGGCAAAGAGGGCAAAGGCGGAGGGGG\n+AGAGTGGAACCCGCATTGCCCTCCCTGCAAGGCCAGCCTTAGGGCTGGGC\n+TAAGGCAAAGAGCCAGGGATCTGGCTTTTTGAGAAGGAACCCTCCTCCTC\n+TCCCCCAGTGCTTAGAGGTGGGCCACAGTAGGGGGCTCCCTTTCTGGGGG\n+AATGCTTTAGTGTGGGGGCAAGAAGACATGAAAATTAAGGAAATTCTGGG\n+GAATGCAACAATACCCAGGCAAGGTGGGGGAAGGTGTCTCGCTTCCCCAT\n+TTATCTTTTGAAAGAGAATGGGCACCTATAAACCTGACTGTCAGGATTCC\n+TGACTGCCTAGGAGAGGTGGGGAAGAAGTGGCAGATTTGGGGACCTGAGG\n+CAGCAGTGGGGTTGGTAGGCTTGTCCAGGTCGTGGCGTATTCCCCTCCGT\n+CCCTGTTAGGAGCTGAACCCTTAGAATGTTGCTGGGGAGATCTGGAAAGT\n+TTACTATTCTACTAATGTTTTGTACAAGTGAGAAAGTTGAAAGAGAGAGC\n+GAGAACCCAAATGCAGACTGTCCTGCCATCATGTCATTTAAGTAATGTGG\n+CATCAATGTAAGATTCCCTTCCAAGGCCCACTTCATGTGAGTAATGTTTA\n+ATACTAGCATTTTCCAAAGCGGCCTGGCTGCCAGCAGGGTCACGGCCAAG\n+GGTACATTTGAACAGTCTGAAGAAAAAAACAAAAACGAAAACCAAAACCA\n+AAACCAAAACAAAAACAAAAACAAAAACAAACAAACAAAAAACCTCTTGA\n+TTTTTTTCTTCTTCTCCCTTTAATTTTGTTAGAGGAAATCAAGTCTGATA\n+CACCAACCAGCAATTGGCTCACTGCAAAGAGTGGCAGAAAGAAGAGGTGC\n+CCTTACACTAAGCACCAAACGCTGGAATTAGAAAAAGAGTTCTTGTTCAA\n+TATGTACCTCACCCGCGAGCGCCGCCTAGAGATCAGTAAGAGCGTTAACC\n+TCACCGACAGGCAGGTCAAGATTTGGTTTCAAAACCGCCGAATGAAACTC\n+AAGAAGATGAGCCGAGAGAACCGGATCCGAGAACTGACCGCCAACCTCAC\n+GTTTTCTTAGGTCTGAGGCCGGTCTGAGGCCGGTCAGAGGCCAGGATTGG\n+AGAGGGGGCACCGCGTTCCAGGGCCCAGTGCTGGAGGACTGGGAAAGCGG\n+AAACAAAACCTTCACCGCTCTTTGTTTGTTGTTTTGTTGTATTTTGTTTT\n+CCTGCTAGAATGTGACTTTGGGGTCATTATGTTCGTGCTGCAAGTGATCT\n+GTAATCCCTATGAGTATATATATATATATATATATATATATATAAAAACT\n+TAGCACGTGTAATTTATTATTTTTTCATCGTAATGCAGGGTAACTATTAT\n+TGCGCATTTTCATTTGGGTCTTAACTTATTGGAACTGTAGAGCATCCATC\n+CATCCATCCATCCAGCAATGTGACTTTTTCATGTCTTTCCTAACACAAAA\n+GGTCTATGTGTGTGGTTAGTCCATGAACTCATGGCATTTTGAATACATCC\n+AGTACTTTAAAAATGACATATATATTTAAAAAAAAAAGATTAAGAAAACC\n+CACAAGTTGGAGGGAGGGGGACTTAAAAAGCACATTACAATGTATCTTTT\n+CACAAATGAATTTAGCAGTTGTCCTTGGTGAGATGGGATATTGGCGATTT\n+ATGCCTTGTAGCCTTTCCCTTGTGGTGCATCTGTGGTTTGGTAGAAGTAC\n+AACAGCAACCTGTCCTTTCTGTGCATGTTCTGGTCGCATGTATAATGCAA\n+TAAACTCTGGAAATGAGTTCACTCCCTCTGCTTTCTGAAATGGAAATATG\n+TTATGGTGGAAATGAAAGCCTATGGTGAGATTATCTTCTGGTTACACTCC\n+CTGTTTGGGGCATTTGGGCAGGGGAGTGATAGACTAGTAGGGGAAGGGAG\n+ATGGGGGAGAAAAGCTGGAGGAGGCCTAGGGTGTTGGATTTTGGCAGTGG\n+T"..b'GA\n+GGGTAATTTTTATTTAGCCGTTTCTCCGATCATGTGGGGAATACCATTAG\n+CTGTTGATAGCGGGCCATGTATCCGAGGAAAGCCTGAGCTACAAGGCAAA\n+GGCATCCCATCTGGAACAAAATCAGAAAGCTATTGGCAAAGGTAATCAAT\n+CAGGCCATAAATAGCCATTTACCCGCTTCCTTTTCGGGGCTGGAGGTGGG\n+CCGGGAGCCCTCCAAGGGTGAGCTGGGCAACTTGTAGAGCAAGGAATATG\n+CCCTCCGCTGCCGGCGCCCCGGCCGCTTTTGTCTGGGCTCCCAGCCGGGC\n+TTCCGAGGCTTTGTACCATGGATTTGGGAGTGACAATGGGCATTTCCCTC\n+AGATTCAAGGCTGCTCAACCTCACCTCTGTAGGGGGAAAAAAATCAGAAG\n+GGAGTGTCCCAAGGACCTAGCCATTCGGCCGAATTTTTTAGACATTTTGG\n+GAGTCTCCTCCGAGGCCTTTAAGTGCGAACCGCGCGAAGCGGCCCTGCCC\n+GGGGAGACTCGCTGAGGCAGGGCTGAGGCGGCGGGCGGGAGCAAGCTGCT\n+CTAGCATTTGGGTTCTGCCCTGTGGCGTGTTCTCTTCCAGGGCCTTTCCA\n+GCATCATCGGAGAAGACGAAGCACCCTGGCCGCCACTGTCCGTGCTGCGC\n+CAACTCGCCCGGCCGCCCGCCCTTCCGAGGGCAGGCAGAAGCCCCTCTGT\n+GTCCTCCACCGCCGCGCCCCGGCTCGCCCCTCGGGCCGCGGCGTGTGCCC\n+AGCCTCACGTCGGGGTGTGTGTGGCCGCGCGGGCGTGTGTGAGTGTGGCA\n+GGGGGAGGGGGCCCTCCGATCTGCTCCATCCGTCCGTTTTATTAGGGACA\n+CATTAATCTATAATCAAATACACCTCATAAAATTTTTATTGAAAGGCATA\n+ATATCATTACAGAGGTCTTCCACCTGTTTTAAACAACACGACAAGCTGTG\n+AGCAAGCGTGTGTGTGGGGATGTGTGGGGAGGGGTGGGTGTGAGTAGGGA\n+GAGAGGCGAGGGGAGAACAGCTCCCCTCGGGCGCTAGGGGCCGCCCCGAG\n+GGCCCGCCTGCCTCGGGCGACACCGGCCTGGCGCCCCCGCGGCCGCTCCG\n+TGTGCCCTGGACTCGCCGCCCGCGGCTCGGAAGCTGGAGAGTCAGCGACG\n+GGGCCCGACTGCGGGACCGAGGGCTGCAAGAAGAAGCGAACAAATAGTCC\n+CCAGCGCCTCCTCTGGATGCGGTCGCGTCTGTGGTCCTGGCAGCCGCTGG\n+GCGGGCCAGGCCAGGTCGGGCCGGGCCGAGCCGGGCACATGGACCTGGGC\n+CTGCGGGCTCTAATTGCGGCGCTTATGTTGATGATTTTTTTTTTAATCAC\n+AGCAGCCCCCAGTTTAGCGGACTGATTTACTCCCGGTATTGGTAAATATG\n+ATCACGTGGGCCGCGCGACCAATGGTGGAGGCTGCAGCCTGCGAACTAGT\n+CGGTGGCTCGGGCGCCGGCGGGGAGCTGCTCGGCGGCGGACAGTGTAATG\n+TTGGGTGGGAGTGCGGGACGCCTCAAAATGTCTTCCAGTGGCACCCTCAG\n+CAACTACTACGTGGACTCGCTTATAGGCCATGAGGGCGACGAGGTGTTCG\n+CGGCGCGCTTCGGGCCGCCGGGGCCAGGCGCGCAGGGCCGGCCTGCAGGT\n+GTGGCTGATGGCCCGGCCGCCACCGCCGCCGAGTTCGCCTCGTGTAGTTT\n+TGCCCCCAGATCGGCCGTGTTCTCTGCCTCGTGGTCCGCGGTGCCCTCCC\n+AGCCCCCGGCAGCGGCGGCGATGAGCGGCCTCTACCACCCGTACGTTCCC\n+CCGCCGCCCCTGGCCGCCTCTGCCTCCGAGCCCGGCCGCTACGTGCGCTC\n+CTGGATGGAGCCGCTGCCCGGCTTCCCGGGCGGTGCGGGCGGTGGCGGTG\n+GTGGTGGAGGCGGCGGTCCGGGCCGCGGTCCCAGCCCTGGCCCCAGCGGC\n+CCAGCCAACGGGCGCCACTACGGGATTAAGCCTGAAACCCGAGCGGCCCC\n+GGCCCCCGCCACGGCCGCCTCCACCACCTCCTCCTCCTCCACTTCCTTAT\n+CCTCCTCCTCCAAACGGACTGAGTGCTCCGTGGCCCGGGAGTCCCAGGGG\n+AGCAGCGGCCCCGAGTTCTCGTGCAACTCGTTCCTGCAGGAGAAGGCGGC\n+AGCGGCGACGGGGGGAACCGGGCCTGGGGCAGGGATCGGGGCCGCGACTG\n+GGACGGGCGGCTCGTCGGAGCCCTCAGCTTGCAGCGACCACCCGATCCCA\n+GGCTGTTCGCTGAAGGAGGAGGAGAAGCAGCATTCGCAGCCGCAGCAGCA\n+GCAACTTGACCCAAGTAAGTGCAAAAGAAATTGCCCCCTGATTTATTGCT\n+GAAACCTGTAAGGCTCGAATGTGCAAAACTGATAGTTTTACTAACCTATA\n+AAAACGTCTAGACGCCTACCCAAGCCTAGGCGAACAACATGCATCCATAA\n+AAAGAGCTTCCCATAACCACCTACCCTGGGCGCTCAGTTAGTACGGTAAA\n+CAGAGCGCGAGCATTAAGGCTTTTTATGATAATTCCCCACAAGTTGTGAA\n+AAGCGACCATCCTTGGTGAAATTAATTTAACGACCTCTCTTCCCCACCCT\n+GTGGTCTCTCCCTGCCTCCCCTCCTCTCCTCTCTCCCCGTCTCCAAACCT\n+CCCTCTTTGTAGACAACCCCGCCGCGAACTGGATCCACGCTCGCTCCACC\n+CGGAAAAAGCGCTGTCCCTACACCAAATACCAGACGCTTGAGCTGGAGAA\n+AGAATTCCTCTTCAACATGTACCTCACCCGGGACCGGCGCTACGAGGTGG\n+CCAGGATTCTCAACCTAACAGAGAGACAGGTCAAAATCTGGTTTCAGAAC\n+CGTAGGATGAAAATGAAAAAGATGAGCAAGGAGAAATGCCCCAAAGGAGA\n+CTGACCCGGCGCGGTGCTGGCGGGAGCGCTCAAGGGCAGCGGATTTGTTG\n+TTGTTGCTGTTTTCCTTTGTGGGTGTTTGGTGCTTGATTTCCAGAAACTC\n+TCCAGCGACTTGGACTTCTTCTTCTTTTTTTTTTTCTTTTTAGATAGAAG\n+TGACTGTGTGGTTGGTCTCTGAGGTATTTGGGGGACTCTGTATTTGCTCG\n+TTTACGTGTTGGAAAAACCAAGTGGCTTTGGGGTTTCGCCCTATCCCACT\n+CCCTCTCTTTCCTGCTCCATTGGTTCCTTAAGAAATGCTATATTTTGTGA\n+GTGCAAGCTGGCTTGGGGAGCCCTCTCTTGTGTAAATGTCCCCCATGTTT\n+CTGAAAAGTGCTGTAGTTTAGTCCCCTCACCCCCAGCACTGCCCAAACAG\n+GGGCCAAGTGCGCCCCAATTCCAAGAATGAAGGCAGAGCGACAACAGTGC\n+GGACACCCCGGCTGCTAGCCCACGGTGAAGCCCGGCGGGGTTGCCCACCA\n+GTTGCGAAAGCCCCCTTTCCTCAGGGAGCACGCGGGACCTCGGTGGAGAT\n+CTCCAGTGAGGCTTAGAGGAGCCCAGGGCCTCGGGCGGGTTGGGGTTTGT\n+CCTCAGTGCATTGGACGCGCTGCTCTCTCCCCTGAAGGCTGGGCTCGCGT\n+GGGCGGCCGCGGGTGGTGGCCCTCCCGGTTCCTGCCCGAGGACCAGTTGT\n+AAATGTTACTGCTTCCTACTAATAAATGCTGACCTGATCAAATGGAGCCC\n+AGACGCTGGCCCTAAACATTGTGTGCCTGCTTTCTCTGCCTCTCTGCAAA\n+ATATCACACTCAGGATATTTCTCCTCTACCCCTGGGAGTGAGACATTGTT\n+AAAAATTCAGGGCCCTTCCACCTGACAGATCTCTCTGATGTGTCTCTGCC\n+TTCTCTGCCTCACATCCCTTTGTGTAGGCAGATGCAGCAGCA\n'
b
diff -r f088370d2a3c -r e3ba567abdf5 test-data/cached_locally/ref.gtf
--- a/test-data/cached_locally/ref.gtf Sun Jan 28 04:04:58 2018 -0500
+++ b/test-data/cached_locally/ref.gtf Fri Mar 11 14:08:11 2022 +0000
b
b'@@ -1,6 +1,20 @@\n-1\tensembl_havana\tgene\t1\t103\t.\t+\t.\tgene_id "ENSG00000162526"; gene_version "4"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1";\n-1\tensembl_havana\ttranscript\t1\t103\t.\t+\t.\tgene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";\n-1\tensembl_havana\texon\t1\t103\t.\t+\t.\tgene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; exon_id "ENSE00002319515"; exon_version "1"; tag "basic"; transcript_support_level "NA";\n-1\tensembl_havana\tCDS\t1\t100\t.\t+\t0\tgene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; protein_id "ENSP00000334393"; protein_version "3"; tag "basic"; transcript_support_level "NA";\n-1\tensembl_havana\tstart_codon\t1\t3\t.\t+\t0\tgene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";\n-1\tensembl_havana\tstop_codon\t101\t103\t.\t+\t0\tgene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";\n+fake_chr2\tHAVANA\tgene\t257\t3416\t.\t+\t.\tgene_id "ENSG00000128710.6"; gene_type "protein_coding"; gene_name "HOXD10"; level 1; hgnc_id "HGNC:5133"; tag "overlapping_locus"; havana_gene "OTTHUMG00000132511.5";\n+fake_chr2\tHAVANA\ttranscript\t257\t3416\t.\t+\t.\tgene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3";\n+fake_chr2\tHAVANA\texon\t257\t1057\t.\t+\t.\tgene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 1; exon_id "E'..b' tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";\n+fake_chr2\tHAVANA\tCDS\t6248\t7064\t.\t+\t0\tgene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 1; exon_id "ENSE00000882914.5"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";\n+fake_chr2\tHAVANA\tstart_codon\t6248\t6250\t.\t+\t0\tgene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 1; exon_id "ENSE00000882914.5"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";\n+fake_chr2\tHAVANA\texon\t7413\t8416\t.\t+\t.\tgene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 2; exon_id "ENSE00001845001.4"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";\n+fake_chr2\tHAVANA\tCDS\t7413\t7651\t.\t+\t2\tgene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 2; exon_id "ENSE00001845001.4"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";\n+fake_chr2\tHAVANA\tstop_codon\t7652\t7654\t.\t+\t0\tgene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 2; exon_id "ENSE00001845001.4"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";\n+fake_chr2\tHAVANA\tUTR\t6198\t6247\t.\t+\t.\tgene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 1; exon_id "ENSE00000882914.5"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";\n+fake_chr2\tHAVANA\tUTR\t7652\t8416\t.\t+\t.\tgene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 2; exon_id "ENSE00001845001.4"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";\n'
b
diff -r f088370d2a3c -r e3ba567abdf5 test-data/gc.tab
--- a/test-data/gc.tab Sun Jan 28 04:04:58 2018 -0500
+++ b/test-data/gc.tab Fri Mar 11 14:08:11 2022 +0000
b
@@ -1,1 +1,2 @@
-ENSG00000162526 0.388349514563107
+ENSG00000128709.13 0.626402993051844
+ENSG00000128710.6 0.467226890756303
b
diff -r f088370d2a3c -r e3ba567abdf5 test-data/in.fasta
--- a/test-data/in.fasta Sun Jan 28 04:04:58 2018 -0500
+++ b/test-data/in.fasta Fri Mar 11 14:08:11 2022 +0000
b
b"@@ -1,2 +1,173 @@\n->1\n-AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGAAAAAAAAAAAAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGAAAAAAAAAATTTTT\n\\ No newline at end of file\n+>fake_chr2 hg38_dna range=chr2:176116522-176125113 5'pad=0 3'pad=0 strand=+ repeatMasking=none\n+TGGGGCGGGCTGGCCGAGCGAGCCCTGGAGAGGCGGACAGGAGGGCGGCG\n+GAGAGCGCTGGGCCGGTTGTCTCCAGCGCGCACTATCGCGGGCGCGTAGT\n+AGATGTCGCTGTTGTCCGTGCTTACCCGGCCGGCCGGCCAGGCTCTGGAG\n+CACGTGACCCGAGAGGAGGCTGCGGCTCAAGGCCATTTTCAAATCTCATT\n+GGCTTGGTTGTCATGTGGTCGGCAGAGGCATCCACAATTACACGGGGAAT\n+GTTTTCCTAGAGATGTCAGCCTACAAAGGACACAATCTCTCTTCTTCAAA\n+TTCTTCCCCAAAATGTCCTTTCCCAACAGCTCTCCTGCTGCTAATACTTT\n+TTTAGTAGATTCCTTGATCAGTGCCTGCAGGAGTGACAGTTTTTATTCCA\n+GCAGCGCCAGCATGTACATGCCACCACCTAGCGCAGACATGGGGACCTAT\n+GGAATGCAAACCTGTGGACTGCTCCCGTCTCTGGCCAAAAGAGAAGTGAA\n+CCACCAAAATATGGGTATGAATGTGCATCCTTATATACCTCAAGTAGACA\n+GTTGGACAGATCCGAACAGATCTTGTCGAATAGAGCAACCTGTTACACAG\n+CAAGTCCCCACTTGCTCCTTCACCACCAACATTAAGGAAGAATCCAATTG\n+CTGCATGTATTCTGATAAGCGCAACAAACTCATTTCGGCCGAGGTCCCTT\n+CGTACCAGAGGCTGGTCCCTGAGTCTTGTCCCGTTGAGAACCCTGAGGTT\n+CCCGTCCCTGGATATTTTAGACTGAGTCAGACCTACGCCACCGGGAAAAC\n+CCAAGAGTACAATAATAGCCCCGAAGGCAGCTCCACTGTCATGCTCCAGC\n+TCAACCCTCGTGGCGCGGCCAAGCCGCAGCTCTCCGCTGCCCAGCTGCAG\n+ATGGAAAAGAAGATGAACGAGCCCGTGAGCGGCCAGGAGCCCACCAAAGT\n+CTCCCAGGTGGAGAGCCCCGAGGCCAAAGGCGGCCTTCCCGAAGAGAGGA\n+GCTGCCTGGCTGAGGTCTCCGTGTCCAGTCCCGAAGTGCAGGAGAAGGAA\n+AGCAAAGGTCGGTATGAGCAGAGTTGCCACCCCAGCGGGGCGCGCAGCCC\n+GGGAACCCGGCAGAGAGGGAGTGCCGGGGTGCCCAGCGCCGAGCCGGAGC\n+CCGACTTGGCAGGTGCTGCTCCGCCTGGTTTTAGAGGGGTGATCTCAGCC\n+CTGAGATAGTCCCCGCTTCTCCCCTGCTGCCCTGGCCCTCTCCGCCAGTC\n+CTGGCCCCACGCTGATGGCGCCCGGGCAGAGGAAAAGCTTGCCGGTTTTA\n+TTTTTCCTGAGCTAGACCTGAACACAACAAAAGAGCGCAAAGGAGACCTG\n+CGGCTCATAAACACGACCACAGAGCCTCTTTTCTCCTGCTCAGATTTGCA\n+GTTCCAGTTTTGCCTTGAGCCCAATGATCATGTTAAGGTGATCCAGGGCA\n+CCGTGTTCGTGTTCAAGTGTATGCACCCCGCATCCTGCGAGCTTGGGGGT\n+GGTGAGGGGAAAGAGATGGCTGGGCTGGTTGGTGCTTGAGTTGGGAAACA\n+GGGCTTACTGCCTTTGCTGGGCTAGGTAACCTTGGCTTTGTTTAGGAAAA\n+GTGCTGCAGTCTTTGCAATCCGTCGGCAAAGAGGGCAAAGGCGGAGGGGG\n+AGAGTGGAACCCGCATTGCCCTCCCTGCAAGGCCAGCCTTAGGGCTGGGC\n+TAAGGCAAAGAGCCAGGGATCTGGCTTTTTGAGAAGGAACCCTCCTCCTC\n+TCCCCCAGTGCTTAGAGGTGGGCCACAGTAGGGGGCTCCCTTTCTGGGGG\n+AATGCTTTAGTGTGGGGGCAAGAAGACATGAAAATTAAGGAAATTCTGGG\n+GAATGCAACAATACCCAGGCAAGGTGGGGGAAGGTGTCTCGCTTCCCCAT\n+TTATCTTTTGAAAGAGAATGGGCACCTATAAACCTGACTGTCAGGATTCC\n+TGACTGCCTAGGAGAGGTGGGGAAGAAGTGGCAGATTTGGGGACCTGAGG\n+CAGCAGTGGGGTTGGTAGGCTTGTCCAGGTCGTGGCGTATTCCCCTCCGT\n+CCCTGTTAGGAGCTGAACCCTTAGAATGTTGCTGGGGAGATCTGGAAAGT\n+TTACTATTCTACTAATGTTTTGTACAAGTGAGAAAGTTGAAAGAGAGAGC\n+GAGAACCCAAATGCAGACTGTCCTGCCATCATGTCATTTAAGTAATGTGG\n+CATCAATGTAAGATTCCCTTCCAAGGCCCACTTCATGTGAGTAATGTTTA\n+ATACTAGCATTTTCCAAAGCGGCCTGGCTGCCAGCAGGGTCACGGCCAAG\n+GGTACATTTGAACAGTCTGAAGAAAAAAACAAAAACGAAAACCAAAACCA\n+AAACCAAAACAAAAACAAAAACAAAAACAAACAAACAAAAAACCTCTTGA\n+TTTTTTTCTTCTTCTCCCTTTAATTTTGTTAGAGGAAATCAAGTCTGATA\n+CACCAACCAGCAATTGGCTCACTGCAAAGAGTGGCAGAAAGAAGAGGTGC\n+CCTTACACTAAGCACCAAACGCTGGAATTAGAAAAAGAGTTCTTGTTCAA\n+TATGTACCTCACCCGCGAGCGCCGCCTAGAGATCAGTAAGAGCGTTAACC\n+TCACCGACAGGCAGGTCAAGATTTGGTTTCAAAACCGCCGAATGAAACTC\n+AAGAAGATGAGCCGAGAGAACCGGATCCGAGAACTGACCGCCAACCTCAC\n+GTTTTCTTAGGTCTGAGGCCGGTCTGAGGCCGGTCAGAGGCCAGGATTGG\n+AGAGGGGGCACCGCGTTCCAGGGCCCAGTGCTGGAGGACTGGGAAAGCGG\n+AAACAAAACCTTCACCGCTCTTTGTTTGTTGTTTTGTTGTATTTTGTTTT\n+CCTGCTAGAATGTGACTTTGGGGTCATTATGTTCGTGCTGCAAGTGATCT\n+GTAATCCCTATGAGTATATATATATATATATATATATATATATAAAAACT\n+TAGCACGTGTAATTTATTATTTTTTCATCGTAATGCAGGGTAACTATTAT\n+TGCGCATTTTCATTTGGGTCTTAACTTATTGGAACTGTAGAGCATCCATC\n+CATCCATCCATCCAGCAATGTGACTTTTTCATGTCTTTCCTAACACAAAA\n+GGTCTATGTGTGTGGTTAGTCCATGAACTCATGGCATTTTGAATACATCC\n+AGTACTTTAAAAATGACATATATATTTAAAAAAAAAAGATTAAGAAAACC\n+CACAAGTTGGAGGGAGGGGGACTTAAAAAGCACATTACAATGTATCTTTT\n+CACAAATGAATTTAGCAGTTGTCCTTGGTGAGATGGGATATTGGCGATTT\n+ATGCCTTGTAGCCTTTCCCTTGTGGTGCATCTGTGGTTTGGTAGAAGTAC\n+AACAGCAACCTGTCCTTTCTGTGCATGTTCTGGTCGCATGTATAATGCAA\n+TAAACTCTGGAAATGAGTTCACTCCCTCTGCTTTCTGAAATGGAAATATG\n+TTATGGTGGAAATGAAAGCCTATGGTGAGATTATCTTCTGGTTACACTCC\n+CTGTTTGGGGCATTTGGGCAGGGGAGTGATAGACTAGTAGGGGAAGGGAG\n+ATGGGGGAGAAAAGCTGGAGGAGGCCTAGGGTGTTGGATTTTGGCAGTGG\n+T"..b'GA\n+GGGTAATTTTTATTTAGCCGTTTCTCCGATCATGTGGGGAATACCATTAG\n+CTGTTGATAGCGGGCCATGTATCCGAGGAAAGCCTGAGCTACAAGGCAAA\n+GGCATCCCATCTGGAACAAAATCAGAAAGCTATTGGCAAAGGTAATCAAT\n+CAGGCCATAAATAGCCATTTACCCGCTTCCTTTTCGGGGCTGGAGGTGGG\n+CCGGGAGCCCTCCAAGGGTGAGCTGGGCAACTTGTAGAGCAAGGAATATG\n+CCCTCCGCTGCCGGCGCCCCGGCCGCTTTTGTCTGGGCTCCCAGCCGGGC\n+TTCCGAGGCTTTGTACCATGGATTTGGGAGTGACAATGGGCATTTCCCTC\n+AGATTCAAGGCTGCTCAACCTCACCTCTGTAGGGGGAAAAAAATCAGAAG\n+GGAGTGTCCCAAGGACCTAGCCATTCGGCCGAATTTTTTAGACATTTTGG\n+GAGTCTCCTCCGAGGCCTTTAAGTGCGAACCGCGCGAAGCGGCCCTGCCC\n+GGGGAGACTCGCTGAGGCAGGGCTGAGGCGGCGGGCGGGAGCAAGCTGCT\n+CTAGCATTTGGGTTCTGCCCTGTGGCGTGTTCTCTTCCAGGGCCTTTCCA\n+GCATCATCGGAGAAGACGAAGCACCCTGGCCGCCACTGTCCGTGCTGCGC\n+CAACTCGCCCGGCCGCCCGCCCTTCCGAGGGCAGGCAGAAGCCCCTCTGT\n+GTCCTCCACCGCCGCGCCCCGGCTCGCCCCTCGGGCCGCGGCGTGTGCCC\n+AGCCTCACGTCGGGGTGTGTGTGGCCGCGCGGGCGTGTGTGAGTGTGGCA\n+GGGGGAGGGGGCCCTCCGATCTGCTCCATCCGTCCGTTTTATTAGGGACA\n+CATTAATCTATAATCAAATACACCTCATAAAATTTTTATTGAAAGGCATA\n+ATATCATTACAGAGGTCTTCCACCTGTTTTAAACAACACGACAAGCTGTG\n+AGCAAGCGTGTGTGTGGGGATGTGTGGGGAGGGGTGGGTGTGAGTAGGGA\n+GAGAGGCGAGGGGAGAACAGCTCCCCTCGGGCGCTAGGGGCCGCCCCGAG\n+GGCCCGCCTGCCTCGGGCGACACCGGCCTGGCGCCCCCGCGGCCGCTCCG\n+TGTGCCCTGGACTCGCCGCCCGCGGCTCGGAAGCTGGAGAGTCAGCGACG\n+GGGCCCGACTGCGGGACCGAGGGCTGCAAGAAGAAGCGAACAAATAGTCC\n+CCAGCGCCTCCTCTGGATGCGGTCGCGTCTGTGGTCCTGGCAGCCGCTGG\n+GCGGGCCAGGCCAGGTCGGGCCGGGCCGAGCCGGGCACATGGACCTGGGC\n+CTGCGGGCTCTAATTGCGGCGCTTATGTTGATGATTTTTTTTTTAATCAC\n+AGCAGCCCCCAGTTTAGCGGACTGATTTACTCCCGGTATTGGTAAATATG\n+ATCACGTGGGCCGCGCGACCAATGGTGGAGGCTGCAGCCTGCGAACTAGT\n+CGGTGGCTCGGGCGCCGGCGGGGAGCTGCTCGGCGGCGGACAGTGTAATG\n+TTGGGTGGGAGTGCGGGACGCCTCAAAATGTCTTCCAGTGGCACCCTCAG\n+CAACTACTACGTGGACTCGCTTATAGGCCATGAGGGCGACGAGGTGTTCG\n+CGGCGCGCTTCGGGCCGCCGGGGCCAGGCGCGCAGGGCCGGCCTGCAGGT\n+GTGGCTGATGGCCCGGCCGCCACCGCCGCCGAGTTCGCCTCGTGTAGTTT\n+TGCCCCCAGATCGGCCGTGTTCTCTGCCTCGTGGTCCGCGGTGCCCTCCC\n+AGCCCCCGGCAGCGGCGGCGATGAGCGGCCTCTACCACCCGTACGTTCCC\n+CCGCCGCCCCTGGCCGCCTCTGCCTCCGAGCCCGGCCGCTACGTGCGCTC\n+CTGGATGGAGCCGCTGCCCGGCTTCCCGGGCGGTGCGGGCGGTGGCGGTG\n+GTGGTGGAGGCGGCGGTCCGGGCCGCGGTCCCAGCCCTGGCCCCAGCGGC\n+CCAGCCAACGGGCGCCACTACGGGATTAAGCCTGAAACCCGAGCGGCCCC\n+GGCCCCCGCCACGGCCGCCTCCACCACCTCCTCCTCCTCCACTTCCTTAT\n+CCTCCTCCTCCAAACGGACTGAGTGCTCCGTGGCCCGGGAGTCCCAGGGG\n+AGCAGCGGCCCCGAGTTCTCGTGCAACTCGTTCCTGCAGGAGAAGGCGGC\n+AGCGGCGACGGGGGGAACCGGGCCTGGGGCAGGGATCGGGGCCGCGACTG\n+GGACGGGCGGCTCGTCGGAGCCCTCAGCTTGCAGCGACCACCCGATCCCA\n+GGCTGTTCGCTGAAGGAGGAGGAGAAGCAGCATTCGCAGCCGCAGCAGCA\n+GCAACTTGACCCAAGTAAGTGCAAAAGAAATTGCCCCCTGATTTATTGCT\n+GAAACCTGTAAGGCTCGAATGTGCAAAACTGATAGTTTTACTAACCTATA\n+AAAACGTCTAGACGCCTACCCAAGCCTAGGCGAACAACATGCATCCATAA\n+AAAGAGCTTCCCATAACCACCTACCCTGGGCGCTCAGTTAGTACGGTAAA\n+CAGAGCGCGAGCATTAAGGCTTTTTATGATAATTCCCCACAAGTTGTGAA\n+AAGCGACCATCCTTGGTGAAATTAATTTAACGACCTCTCTTCCCCACCCT\n+GTGGTCTCTCCCTGCCTCCCCTCCTCTCCTCTCTCCCCGTCTCCAAACCT\n+CCCTCTTTGTAGACAACCCCGCCGCGAACTGGATCCACGCTCGCTCCACC\n+CGGAAAAAGCGCTGTCCCTACACCAAATACCAGACGCTTGAGCTGGAGAA\n+AGAATTCCTCTTCAACATGTACCTCACCCGGGACCGGCGCTACGAGGTGG\n+CCAGGATTCTCAACCTAACAGAGAGACAGGTCAAAATCTGGTTTCAGAAC\n+CGTAGGATGAAAATGAAAAAGATGAGCAAGGAGAAATGCCCCAAAGGAGA\n+CTGACCCGGCGCGGTGCTGGCGGGAGCGCTCAAGGGCAGCGGATTTGTTG\n+TTGTTGCTGTTTTCCTTTGTGGGTGTTTGGTGCTTGATTTCCAGAAACTC\n+TCCAGCGACTTGGACTTCTTCTTCTTTTTTTTTTTCTTTTTAGATAGAAG\n+TGACTGTGTGGTTGGTCTCTGAGGTATTTGGGGGACTCTGTATTTGCTCG\n+TTTACGTGTTGGAAAAACCAAGTGGCTTTGGGGTTTCGCCCTATCCCACT\n+CCCTCTCTTTCCTGCTCCATTGGTTCCTTAAGAAATGCTATATTTTGTGA\n+GTGCAAGCTGGCTTGGGGAGCCCTCTCTTGTGTAAATGTCCCCCATGTTT\n+CTGAAAAGTGCTGTAGTTTAGTCCCCTCACCCCCAGCACTGCCCAAACAG\n+GGGCCAAGTGCGCCCCAATTCCAAGAATGAAGGCAGAGCGACAACAGTGC\n+GGACACCCCGGCTGCTAGCCCACGGTGAAGCCCGGCGGGGTTGCCCACCA\n+GTTGCGAAAGCCCCCTTTCCTCAGGGAGCACGCGGGACCTCGGTGGAGAT\n+CTCCAGTGAGGCTTAGAGGAGCCCAGGGCCTCGGGCGGGTTGGGGTTTGT\n+CCTCAGTGCATTGGACGCGCTGCTCTCTCCCCTGAAGGCTGGGCTCGCGT\n+GGGCGGCCGCGGGTGGTGGCCCTCCCGGTTCCTGCCCGAGGACCAGTTGT\n+AAATGTTACTGCTTCCTACTAATAAATGCTGACCTGATCAAATGGAGCCC\n+AGACGCTGGCCCTAAACATTGTGTGCCTGCTTTCTCTGCCTCTCTGCAAA\n+ATATCACACTCAGGATATTTCTCCTCTACCCCTGGGAGTGAGACATTGTT\n+AAAAATTCAGGGCCCTTCCACCTGACAGATCTCTCTGATGTGTCTCTGCC\n+TTCTCTGCCTCACATCCCTTTGTGTAGGCAGATGCAGCAGCA\n'
b
diff -r f088370d2a3c -r e3ba567abdf5 test-data/in.gtf
--- a/test-data/in.gtf Sun Jan 28 04:04:58 2018 -0500
+++ b/test-data/in.gtf Fri Mar 11 14:08:11 2022 +0000
b
b'@@ -1,6 +1,20 @@\n-1\tensembl_havana\tgene\t1\t103\t.\t+\t.\tgene_id "ENSG00000162526"; gene_version "4"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1";\n-1\tensembl_havana\ttranscript\t1\t103\t.\t+\t.\tgene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";\n-1\tensembl_havana\texon\t1\t103\t.\t+\t.\tgene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; exon_id "ENSE00002319515"; exon_version "1"; tag "basic"; transcript_support_level "NA";\n-1\tensembl_havana\tCDS\t1\t100\t.\t+\t0\tgene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; protein_id "ENSP00000334393"; protein_version "3"; tag "basic"; transcript_support_level "NA";\n-1\tensembl_havana\tstart_codon\t1\t3\t.\t+\t0\tgene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";\n-1\tensembl_havana\tstop_codon\t101\t103\t.\t+\t0\tgene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";\n+fake_chr2\tHAVANA\tgene\t257\t3416\t.\t+\t.\tgene_id "ENSG00000128710.6"; gene_type "protein_coding"; gene_name "HOXD10"; level 1; hgnc_id "HGNC:5133"; tag "overlapping_locus"; havana_gene "OTTHUMG00000132511.5";\n+fake_chr2\tHAVANA\ttranscript\t257\t3416\t.\t+\t.\tgene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3";\n+fake_chr2\tHAVANA\texon\t257\t1057\t.\t+\t.\tgene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 1; exon_id "E'..b' tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";\n+fake_chr2\tHAVANA\tCDS\t6248\t7064\t.\t+\t0\tgene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 1; exon_id "ENSE00000882914.5"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";\n+fake_chr2\tHAVANA\tstart_codon\t6248\t6250\t.\t+\t0\tgene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 1; exon_id "ENSE00000882914.5"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";\n+fake_chr2\tHAVANA\texon\t7413\t8416\t.\t+\t.\tgene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 2; exon_id "ENSE00001845001.4"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";\n+fake_chr2\tHAVANA\tCDS\t7413\t7651\t.\t+\t2\tgene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 2; exon_id "ENSE00001845001.4"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";\n+fake_chr2\tHAVANA\tstop_codon\t7652\t7654\t.\t+\t0\tgene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 2; exon_id "ENSE00001845001.4"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";\n+fake_chr2\tHAVANA\tUTR\t6198\t6247\t.\t+\t.\tgene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 1; exon_id "ENSE00000882914.5"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";\n+fake_chr2\tHAVANA\tUTR\t7652\t8416\t.\t+\t.\tgene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 2; exon_id "ENSE00001845001.4"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";\n'
b
diff -r f088370d2a3c -r e3ba567abdf5 test-data/length.tab
--- a/test-data/length.tab Sun Jan 28 04:04:58 2018 -0500
+++ b/test-data/length.tab Fri Mar 11 14:08:11 2022 +0000
b
@@ -1,1 +1,2 @@
-ENSG00000162526 103
+ENSG00000128709.13 1871
+ENSG00000128710.6 1785