Repository 'gffread'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/gffread

Changeset 6:6ea09f60dee9 (2019-11-01)
Previous changeset 5:69e0806b63a4 (2019-10-01) Next changeset 7:4dea02886337 (2019-11-11)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gffread commit 956566e1f7b4390719db56b7488a720ccad181a4"
modified:
gffread.xml
test-data/Homo_sapiens.GRCh37_19.71.gff3
test-data/ecoli-k12.processed.gff3
removed:
cuff_macros.xml
b
diff -r 69e0806b63a4 -r 6ea09f60dee9 cuff_macros.xml
--- a/cuff_macros.xml Tue Oct 01 12:20:13 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,85 +0,0 @@
-<macros>
-  <token name="@VERSION@">2.2.1</token>
-
-  <xml name="requirements">
-    <requirements>
-      <requirement type="package" version="2.2.1">cufflinks</requirement>
-      <yield />
-    </requirements>
-  </xml>
-
-  <xml name="condition_inputs">
-    <!-- DEFAULT : use BAM/SAM files -->
-    <conditional name="in_type">
-        <param name="set_in_type" type="select" label="Input data type"
-            help="CuffNorm supports either CXB (from cuffquant) or SAM/BAM input files. Mixing is not supported. Default: SAM/BAM">
-            <option value="BAM">SAM/BAM</option>
-            <option value="CXB">Cuffquant (CXB)</option>
-            <option value="CONDITION_LIST">List of single replicate conditions</option>
-            <option value="CONDITION_REPLICATE_LIST">List of multiple replicate conditions</option>
-        </param>
-        <when value="BAM">
-            <repeat name="conditions" title="Condition" min="2">
-                <param name="name" label="Condition name" type="text"/>
-                <param name="samples" label="Replicates" type="data" format="sam,bam" multiple="true"/>
-            </repeat>
-        </when>
-        <when value="CXB">
-            <repeat name="conditions" title="Condition" min="2">
-                <param name="name" label="Condition name" type="text"/>
-                <param name="samples" label="Replicates" type="data" format="cxb" multiple="true"/>
-            </repeat>
-        </when>
-        <when value="CONDITION_LIST">
-            <param name="conditions" label="List of Conditions" type="data_collection" collection_type="list" />
-        </when>
-        <when value="CONDITION_REPLICATE_LIST">
-            <param name="conditions" label="List of Conditions" type="data_collection" collection_type="list:list" />
-        </when>
-    </conditional>
-  </xml>
-  <token name="@CONDITION_SAMPLES@">
-            #if $in_type.set_in_type in ['BAM', 'CXB']
-                #for $condition in $in_type.conditions:
-                    #set samples = ','.join( [ str( $sample ) for $sample in $condition.samples ] )
-                    '$samples'
-                #end for
-            #elif $in_type.set_in_type == 'CONDITION_LIST'
-                #for $sample in $in_type.conditions:
-                    '$sample'
-                #end for
-            #elif $in_type.set_in_type == 'CONDITION_REPLICATE_LIST'
-                #for $condition_list in $in_type.conditions:
-                    #set samples = ','.join( [ str( $sample ) for $sample in $condition_list ] )
-                    '$samples'
-                #end for
-            #end if
-  </token>
-  <token name="@CONDITION_LABELS@">
-            #import re
-            #if $in_type.set_in_type in ['BAM', 'CXB']
-                #set labels = '\'' + '\',\''.join( [ str( $condition.name ) for $condition in $in_type.conditions ] ) + '\''
-            #elif $in_type.set_in_type in ['CONDITION_LIST', 'CONDITION_REPLICATE_LIST']
-                #set labels = '\'' + '\',\''.join( map(lambda x: re.sub('[^\w\-_]', '_', x), $in_type.conditions.keys() ) ) + '\''
-            #end if
-            --labels $labels
-  </token>
-  <xml name="cufflinks_gtf_inputs">
-    <param format="gtf" name="inputs" type="data" label="GTF file(s) produced by Cufflinks" help="" multiple="true" />
-    <repeat name="additional_inputs" title="Additional GTF Inputs (type Dataset Collection Lists)">
-      <param format="gtf" name="additional_inputs" type="data_collection" label="GTF file(s) produced by Cufflinks" help="" />
-    </repeat>
-  </xml>
-  <token name="@CUFFLINKS_GTF_INPUTS@">
-            ## Inputs.
-            #for $input_file in $inputs:
-                '${input_file}'
-            #end for
-            #for $additional_input in $additional_inputs:
-                #for $input_file in $additional_input.additional_inputs:
-                    '${input_file}'
-                #end for
-            #end for
-  </token>
-  <token name="@HAS_MULTIPLE_INPUTS@">getattr(inputs, "__len__", [].__len__)() >= 2</token>
-</macros>
b
diff -r 69e0806b63a4 -r 6ea09f60dee9 gffread.xml
--- a/gffread.xml Tue Oct 01 12:20:13 2019 -0400
+++ b/gffread.xml Fri Nov 01 12:54:52 2019 -0400
[
b'@@ -1,7 +1,7 @@\n-<tool id="gffread" name="gffread" version="@VERSION@.2">\n+<tool id="gffread" name="gffread" version="@VERSION@.1">\n     <description>Filters and/or converts GFF3/GTF2 records</description>\n     <macros>\n-        <import>cuff_macros.xml</import>\n+        <token name="@VERSION@">0.11.4</token>\n         <xml name="fasta_output_select">\n             <param name="fa_outputs" type="select" display="checkboxes" multiple="true" label="Select fasta outputs">\n                 <option value="-w exons.fa">fasta file with spliced exons for each GFF transcript (-w exons.fa)</option>\n@@ -47,7 +47,9 @@\n             </param>\n         </xml>\n     </macros>\n-    <expand macro="requirements" />\n+    <requirements>\n+        <requirement type="package" version="@VERSION@">gffread</requirement>\n+    </requirements>\n     <command detect_errors="aggressive">\n <![CDATA[\n     #if $reference_genome.source == \'history\':\n@@ -124,7 +126,9 @@\n             <option value="-C">coding only: discard mRNAs that have no CDS feature (-C)</option>\n             <option value="-G">only parse additional exon attributes from the first exon and move them to the mRNA level (useful for GTF input) (-G)</option>\n             <option value="-O">process also non-transcript GFF records (by default non-transcript records are ignored) (-O)</option>\n-            <option value="--no-pseudo">filter out records matching the \'pseudo\' keyword (--no-pseudo)</option>\n+            <!-- The no-pseudo option is broken in 0.11.4 of gffread.\n+                 See https://github.com/gpertea/gffread/issues/43 -->\n+            <!-- <option value="\\-\\-no-pseudo">filter out records matching the \'pseudo\' keyword (\\-\\-no-pseudo)</option> -->\n         </param>\n         <conditional name="region">\n             <param name="region_filter" type="select" label="Filter by genome region">\n@@ -272,10 +276,13 @@\n             <param name="full_gff_attribute_preservation" value="-F"/>\n             <output name="output_gff" file="ecoli-k12.processed.gff3" ftype="gff3" lines_diff="2" />\n         </test>\n-\n+        \n+<!-- The no-pseudo option is broken in 0.11.4 of gffread.\n+     See https://github.com/gpertea/gffread/issues/43 -->\n+<!-- \n         <test>\n             <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>\n-            <param name="filtering" value="--no-pseudo"/>\n+            <param name="filtering" value="/-/-no-pseudo"/> # Fix dashes when uncommenting\n             <param name="gff_fmt" value="gtf"/>\n             <output name="output_gtf">\n                 <assert_contents>\n@@ -283,6 +290,7 @@\n                 </assert_contents>\n             </output>\n         </test>\n+-->\n \n         <test>\n             <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>\n@@ -335,25 +343,25 @@\n             <param name="gff_fmt" value="gtf"/>\n             <output name="output_gtf">\n                 <assert_contents>\n-                    <not_has_text text="ENST00000587541" />\n+                    <has_text text="ENST00000587541" />\n                     <has_text text="ENST00000382683" />\n                 </assert_contents>\n             </output>\n             <output name="output_exons">\n                 <assert_contents>\n-                    <has_text text="ENST00000346144 gene=MADCAM1 CDS=47-932" />\n+                    <has_text text="ENST00000346144 CDS=47-934" />\n                     <has_text text="CTATTTAAGCGGCTTCCCCGCGGCCTCGGGACAGAGGGGACTGAGCATGGATTTCGGACTGGCCCTCCTG" />\n                 </assert_contents>\n             </output>\n             <output name="output_cds">\n                 <assert_contents>\n-                    <has_text text="ENST00000346144 gene=MADCAM1" />\n+                    <has_text text="ENST00000346144" />\n                     <has_text text="ATGGATTTCGGACTGGCCCTCCTGCTGGCGGGGCTTCTGGGGCTCCTCCTCGGCCAGTCCCTCCAGGTGA" />\n                 </assert_contents>\n             </output>\n             <output name="output_pep">\n                 <asse'..b' without exon/CDS\n+               features (see --tlf option below); automatic if the input\n+               filename ends with .tlf)\n+               \n+    Clustering:\n+     -M/--merge : cluster the input transcripts into loci, discarding\n+          "duplicated" transcripts (those with the same exact introns\n+          and fully contained or equal boundaries)\n+     -d <dupinfo> : for -M option, write duplication info to file <dupinfo>\n+     --cluster-only: same as -M/--merge but without discarding any of the\n+          "duplicate" transcripts, only create "locus" features\n+     -K   for -M option: also discard as redundant the shorter, fully contained\n+           transcripts (intron chains matching a part of the container)\n+     -Q   for -M option, no longer require boundary containment when assessing\n+          redundancy (can be combined with -K); only introns have to match for\n+          multi-exon transcripts, and >=80% overlap for single-exon transcripts\n+     -Y   for -M option, enforce -Q but also discard overlapping single-exon \n+          transcripts, even on the opposite strand (can be combined with -K)\n+          \n+    Output options:\n+     --force-exons: make sure that the lowest level GFF features are considered\n+           "exon" features\n+     --gene2exon: for single-line genes not parenting any transcripts, add an\n+           exon feature spanning the entire gene (treat it as a transcript)\n+     --t-adopt:  try to find a parent gene overlapping/containing a transcript\n+           that does not have any explicit gene Parent\n+     -D    decode url encoded characters within attributes\n+     -Z    merge very close exons into a single exon (when intron size<4)\n+     -g   full path to a multi-fasta file with the genomic sequences\n+          for all input mappings, OR a directory with single-fasta files\n+          (one per genomic sequence, with file names matching sequence names)\n+     -w    write a fasta file with spliced exons for each GFF transcript\n+     -x    write a fasta file with spliced CDS for each GFF transcript\n+     -y    write a protein fasta file with the translation of CDS for each record\n+     -W    for -w and -x options, write in the FASTA defline the exon\n+           coordinates projected onto the spliced sequence;\n+           for -y option, write transcript attributes in the FASTA defline\n+     -S    for -y option, use \'*\' instead of \'.\' as stop codon translation\n+     -L    Ensembl GTF to GFF3 conversion (implies -F; should be used with -m)\n+     -m    <chr_replace> is a name mapping table for converting reference \n+           sequence names, having this 2-column format:\n+           <original_ref_ID> <new_ref_ID>\n+           WARNING: all GFF records on reference sequences whose original IDs\n+           are not found in the 1st column of this table will be discarded!\n+     -t    use <trackname> in the 2nd column of each GFF/GTF output line\n+     -o    write the records into <outfile> instead of stdout\n+     -T    main output will be GTF instead of GFF3\n+     --bed output records in BED format instead of default GFF3\n+     --tlf output "transcript line format" which is like GFF\n+           but exons, CDS features and related data are stored as GFF \n+           attributes in the transcript feature line, like this:\n+             exoncount=N;exons=<exons>;CDSphase=<N>;CDS=<CDScoords> \n+           <exons> is a comma-delimited list of exon_start-exon_end coordinates;\n+           <CDScoords> is CDS_start:CDS_end coordinates or a list like <exons>\n+     --table output a simple tab delimited format instead of GFF, with columns\n+           having the values of GFF attributes given in <attrlist>; special\n+           pseudo-attributes (prefixed by @) are recognized:\n+           @chr, @start, @end, @strand, @numexons, @exons, @cds, @covlen, @cdslen\n+     -v,-E expose (warn about) duplicate transcript IDs and other potential\n+           problems with the given GFF/GTF records\n ]]>\n     </help>\n     <citations>\n'
b
diff -r 69e0806b63a4 -r 6ea09f60dee9 test-data/Homo_sapiens.GRCh37_19.71.gff3
--- a/test-data/Homo_sapiens.GRCh37_19.71.gff3 Tue Oct 01 12:20:13 2019 -0400
+++ b/test-data/Homo_sapiens.GRCh37_19.71.gff3 Fri Nov 01 12:54:52 2019 -0400
b
b'@@ -1,4 +1,5 @@\n-# gffread test-data/Homo_sapiens.GRCh37_19.71.gtf -o test-data/Homo_sapiens.GRCh37_19.71.gff3\n+# gffread /tmp/tmpq6d_yfqc/files/9/2/2/dataset_922cd54b-d77c-48fb-abf7-6fc8d8fdb97c.dat -o output.gff3\n+# gffread v0.11.4\n ##gff-version 3\n 19\tsnRNA\ttranscript\t223158\t223261\t.\t-\t.\tID=ENST00000410397;geneID=ENSG00000222329;gene_name=U6\n 19\tsnRNA\texon\t223158\t223261\t.\t-\t.\tParent=ENST00000410397\n@@ -9,55 +10,55 @@\n 19\tprocessed_pseudogene\texon\t239145\t239247\t.\t-\t.\tParent=ENST00000588755\n 19\tprocessed_pseudogene\ttranscript\t279495\t280170\t.\t+\t.\tID=ENST00000589981;geneID=ENSG00000267447;gene_name=VN2R11P\n 19\tprocessed_pseudogene\texon\t279495\t280170\t.\t+\t.\tParent=ENST00000589981\n-19\tprotein_coding\tmRNA\t281043\t291386\t.\t-\t.\tID=ENST00000269812;geneID=ENSG00000141934;gene_name=PPAP2C\n+19\tprotein_coding\ttranscript\t281043\t291386\t.\t-\t.\tID=ENST00000269812;geneID=ENSG00000141934;gene_name=PPAP2C\n 19\tprotein_coding\texon\t281043\t281537\t.\t-\t.\tParent=ENST00000269812\n 19\tprotein_coding\texon\t282134\t282310\t.\t-\t.\tParent=ENST00000269812\n 19\tprotein_coding\texon\t282752\t282809\t.\t-\t.\tParent=ENST00000269812\n 19\tprotein_coding\texon\t287474\t287751\t.\t-\t.\tParent=ENST00000269812\n 19\tprotein_coding\texon\t288020\t288171\t.\t-\t.\tParent=ENST00000269812\n 19\tprotein_coding\texon\t291285\t291386\t.\t-\t.\tParent=ENST00000269812\n-19\tprotein_coding\tCDS\t281391\t281537\t.\t-\t0\tParent=ENST00000269812\n+19\tprotein_coding\tCDS\t281388\t281537\t.\t-\t0\tParent=ENST00000269812\n 19\tprotein_coding\tCDS\t282134\t282310\t.\t-\t0\tParent=ENST00000269812\n 19\tprotein_coding\tCDS\t282752\t282809\t.\t-\t1\tParent=ENST00000269812\n 19\tprotein_coding\tCDS\t287474\t287751\t.\t-\t0\tParent=ENST00000269812\n 19\tprotein_coding\tCDS\t288020\t288171\t.\t-\t2\tParent=ENST00000269812\n 19\tprotein_coding\tCDS\t291285\t291336\t.\t-\t0\tParent=ENST00000269812\n-19\tprotein_coding\tmRNA\t281345\t291393\t.\t-\t.\tID=ENST00000434325;geneID=ENSG00000141934;gene_name=PPAP2C\n+19\tprotein_coding\ttranscript\t281345\t291393\t.\t-\t.\tID=ENST00000434325;geneID=ENSG00000141934;gene_name=PPAP2C\n 19\tprotein_coding\texon\t281345\t281537\t.\t-\t.\tParent=ENST00000434325\n 19\tprotein_coding\texon\t282134\t282310\t.\t-\t.\tParent=ENST00000434325\n 19\tprotein_coding\texon\t282752\t282809\t.\t-\t.\tParent=ENST00000434325\n 19\tprotein_coding\texon\t287474\t287751\t.\t-\t.\tParent=ENST00000434325\n 19\tprotein_coding\texon\t288020\t288171\t.\t-\t.\tParent=ENST00000434325\n 19\tprotein_coding\texon\t291326\t291393\t.\t-\t.\tParent=ENST00000434325\n-19\tprotein_coding\tCDS\t281391\t281537\t.\t-\t0\tParent=ENST00000434325\n+19\tprotein_coding\tCDS\t281388\t281537\t.\t-\t0\tParent=ENST00000434325\n 19\tprotein_coding\tCDS\t282134\t282310\t.\t-\t0\tParent=ENST00000434325\n 19\tprotein_coding\tCDS\t282752\t282809\t.\t-\t1\tParent=ENST00000434325\n 19\tprotein_coding\tCDS\t287474\t287751\t.\t-\t0\tParent=ENST00000434325\n 19\tprotein_coding\tCDS\t288020\t288055\t.\t-\t0\tParent=ENST00000434325\n-19\tprotein_coding\tmRNA\t281388\t291200\t.\t-\t.\tID=ENST00000327790;geneID=ENSG00000141934;gene_name=PPAP2C\n+19\tprotein_coding\ttranscript\t281388\t291200\t.\t-\t.\tID=ENST00000327790;geneID=ENSG00000141934;gene_name=PPAP2C\n 19\tprotein_coding\texon\t281388\t281537\t.\t-\t.\tParent=ENST00000327790\n 19\tprotein_coding\texon\t282134\t282310\t.\t-\t.\tParent=ENST00000327790\n 19\tprotein_coding\texon\t282752\t282809\t.\t-\t.\tParent=ENST00000327790\n 19\tprotein_coding\texon\t287474\t287751\t.\t-\t.\tParent=ENST00000327790\n 19\tprotein_coding\texon\t288020\t288171\t.\t-\t.\tParent=ENST00000327790\n 19\tprotein_coding\texon\t290952\t291200\t.\t-\t.\tParent=ENST00000327790\n-19\tprotein_coding\tCDS\t281391\t281537\t.\t-\t0\tParent=ENST00000327790\n+19\tprotein_coding\tCDS\t281388\t281537\t.\t-\t0\tParent=ENST00000327790\n 19\tprotein_coding\tCDS\t282134\t282310\t.\t-\t0\tParent=ENST00000327790\n 19\tprotein_coding\tCDS\t282752\t282809\t.\t-\t1\tParent=ENST00000327790\n 19\tprotein_coding\tCDS\t287474\t287751\t.\t-\t0\tParent=ENST00000327790\n 19\tprotein_coding\tCDS\t288020\t288171\t.\t-\t2\tParent=ENST00000327790\n 19\tprotein_coding\tCDS\t290952\t291066\t.\t-\t0\tParent=ENST00000327790\n-19\tprotein_coding\tmRNA\t281991\t287636\t.\t-\t.\tID=ENST00000586998;geneID=ENSG00000141934;gene_name=PPAP2C\n+19\tprotein_coding\ttranscript\t281991\t287636\t.\t-\t'..b'000315489\n 19\tprotein_coding\tCDS\t474621\t474747\t.\t-\t0\tParent=ENST00000315489\n-19\tprotein_coding\tmRNA\t463467\t474880\t.\t-\t.\tID=ENST00000382696;geneID=ENSG00000181781;gene_name=ODF3L2\n+19\tprotein_coding\ttranscript\t463467\t474880\t.\t-\t.\tID=ENST00000382696;geneID=ENSG00000181781;gene_name=ODF3L2\n 19\tprotein_coding\texon\t463467\t464364\t.\t-\t.\tParent=ENST00000382696\n 19\tprotein_coding\texon\t467649\t467762\t.\t-\t.\tParent=ENST00000382696\n 19\tprotein_coding\texon\t474621\t474880\t.\t-\t.\tParent=ENST00000382696\n-19\tprotein_coding\tCDS\t463847\t464364\t.\t-\t2\tParent=ENST00000382696\n+19\tprotein_coding\tCDS\t463844\t464364\t.\t-\t2\tParent=ENST00000382696\n 19\tprotein_coding\tCDS\t467649\t467762\t.\t-\t2\tParent=ENST00000382696\n 19\tprotein_coding\tCDS\t474621\t474747\t.\t-\t0\tParent=ENST00000382696\n 19\tretained_intron\ttranscript\t464146\t472631\t.\t-\t.\tID=ENST00000591681;geneID=ENSG00000181781;gene_name=ODF3L2\n@@ -277,7 +278,7 @@\n 19\tantisense\texon\t490046\t490353\t.\t-\t.\tParent=ENST00000592413\n 19\tantisense\texon\t501541\t501624\t.\t-\t.\tParent=ENST00000592413\n 19\tantisense\texon\t507376\t507813\t.\t-\t.\tParent=ENST00000592413\n-19\tprotein_coding\tmRNA\t496454\t505207\t.\t+\t.\tID=ENST00000346144;geneID=ENSG00000099866;gene_name=MADCAM1\n+19\tprotein_coding\ttranscript\t496454\t505207\t.\t+\t.\tID=ENST00000346144;geneID=ENSG00000099866;gene_name=MADCAM1\n 19\tprotein_coding\texon\t496454\t496551\t.\t+\t.\tParent=ENST00000346144\n 19\tprotein_coding\texon\t497833\t498117\t.\t+\t.\tParent=ENST00000346144\n 19\tprotein_coding\texon\t498496\t498825\t.\t+\t.\tParent=ENST00000346144\n@@ -285,8 +286,8 @@\n 19\tprotein_coding\tCDS\t496500\t496551\t.\t+\t0\tParent=ENST00000346144\n 19\tprotein_coding\tCDS\t497833\t498117\t.\t+\t2\tParent=ENST00000346144\n 19\tprotein_coding\tCDS\t498496\t498825\t.\t+\t2\tParent=ENST00000346144\n-19\tprotein_coding\tCDS\t504745\t504963\t.\t+\t2\tParent=ENST00000346144\n-19\tprotein_coding\tmRNA\t496454\t505347\t.\t+\t.\tID=ENST00000215637;geneID=ENSG00000099866;gene_name=MADCAM1\n+19\tprotein_coding\tCDS\t504745\t504965\t.\t+\t2\tParent=ENST00000346144\n+19\tprotein_coding\ttranscript\t496454\t505347\t.\t+\t.\tID=ENST00000215637;geneID=ENSG00000099866;gene_name=MADCAM1\n 19\tprotein_coding\texon\t496454\t496551\t.\t+\t.\tParent=ENST00000215637\n 19\tprotein_coding\texon\t497833\t498117\t.\t+\t.\tParent=ENST00000215637\n 19\tprotein_coding\texon\t498496\t498825\t.\t+\t.\tParent=ENST00000215637\n@@ -296,18 +297,18 @@\n 19\tprotein_coding\tCDS\t497833\t498117\t.\t+\t2\tParent=ENST00000215637\n 19\tprotein_coding\tCDS\t498496\t498825\t.\t+\t2\tParent=ENST00000215637\n 19\tprotein_coding\tCDS\t501669\t501929\t.\t+\t2\tParent=ENST00000215637\n-19\tprotein_coding\tCDS\t504745\t504963\t.\t+\t2\tParent=ENST00000215637\n-19\tprotein_coding\tmRNA\t496500\t504965\t.\t+\t.\tID=ENST00000382683;geneID=ENSG00000099866;gene_name=MADCAM1\n+19\tprotein_coding\tCDS\t504745\t504965\t.\t+\t2\tParent=ENST00000215637\n+19\tprotein_coding\ttranscript\t496500\t504965\t.\t+\t.\tID=ENST00000382683;geneID=ENSG00000099866;gene_name=MADCAM1\n 19\tprotein_coding\texon\t496500\t496551\t.\t+\t.\tParent=ENST00000382683\n 19\tprotein_coding\texon\t498496\t498825\t.\t+\t.\tParent=ENST00000382683\n 19\tprotein_coding\texon\t504745\t504965\t.\t+\t.\tParent=ENST00000382683\n 19\tprotein_coding\tCDS\t496500\t496551\t.\t+\t0\tParent=ENST00000382683\n 19\tprotein_coding\tCDS\t498496\t498825\t.\t+\t2\tParent=ENST00000382683\n-19\tprotein_coding\tCDS\t504745\t504963\t.\t+\t2\tParent=ENST00000382683\n-19\tprotein_coding\tmRNA\t507299\t519654\t.\t+\t.\tID=ENST00000359315;geneID=ENSG00000141933;gene_name=TPGS1\n+19\tprotein_coding\tCDS\t504745\t504965\t.\t+\t2\tParent=ENST00000382683\n+19\tprotein_coding\ttranscript\t507299\t519654\t.\t+\t.\tID=ENST00000359315;geneID=ENSG00000141933;gene_name=TPGS1\n 19\tprotein_coding\texon\t507299\t507844\t.\t+\t.\tParent=ENST00000359315\n 19\tprotein_coding\texon\t518889\t519654\t.\t+\t.\tParent=ENST00000359315\n 19\tprotein_coding\tCDS\t507507\t507844\t.\t+\t0\tParent=ENST00000359315\n-19\tprotein_coding\tCDS\t518889\t519421\t.\t+\t1\tParent=ENST00000359315\n+19\tprotein_coding\tCDS\t518889\t519423\t.\t+\t1\tParent=ENST00000359315\n 19\tretained_intron\ttranscript\t507500\t510372\t.\t+\t.\tID=ENST00000588278;geneID=ENSG00000141933;gene_name=TPGS1\n 19\tretained_intron\texon\t507500\t510372\t.\t+\t.\tParent=ENST00000588278\n'
b
diff -r 69e0806b63a4 -r 6ea09f60dee9 test-data/ecoli-k12.processed.gff3
--- a/test-data/ecoli-k12.processed.gff3 Tue Oct 01 12:20:13 2019 -0400
+++ b/test-data/ecoli-k12.processed.gff3 Fri Nov 01 12:54:52 2019 -0400
b
b'@@ -1,32 +1,33 @@\n-# gffread /home/hxr/arbeit/galaxy/database/files/000/dataset_791.dat -F -D -E -o output.gff3\n+# gffread /tmp/tmpq6d_yfqc/files/2/7/7/dataset_277f6e18-b25a-4b59-b712-49b5c202a183.dat -F -o output.gff3\n+# gffread v0.11.4\n ##gff-version 3\n-NC_000913.3\tRefSeq\tgene\t190\t255\t.\t+\t.\tID=gene-b0001;geneID=gene-b0001;gene_name=thrL;Dbxref=ASAP:ABE-0000006,ECOCYC:EG11277,EcoGene:EG11277,GeneID:944742;Name=thrL;gbkey=Gene;gene_biotype=protein_coding;gene_synonym=ECK0001;locus_tag=b0001;protein_id=NP_414542.1\n-NC_000913.3\tRefSeq\tCDS\t190\t255\t.\t+\t0\tParent=gene-b0001;Dbxref=UniProtKB/Swiss-Prot:P0AD86,Genbank:NP_414542.1,ASAP:ABE-0000006,ECOCYC:EG11277,EcoGene:EG11277,GeneID:944742;Name=NP_414542.1;gbkey=CDS;locus_tag=b0001;orig_transcript_id=gnl|b0001|mrna.b0001;product=thr operon leader peptide;transl_table=11\n-NC_000913.3\tRefSeq\tgene\t337\t2799\t.\t+\t.\tID=gene-b0002;geneID=gene-b0002;gene_name=thrA;Dbxref=ASAP:ABE-0000008,ECOCYC:EG10998,EcoGene:EG10998,GeneID:945803;Name=thrA;gbkey=Gene;gene_biotype=protein_coding;gene_synonym=ECK0002,Hs,thrA1,thrA2,thrD;locus_tag=b0002;protein_id=NP_414543.1\n-NC_000913.3\tRefSeq\tCDS\t337\t2799\t.\t+\t0\tParent=gene-b0002;Dbxref=UniProtKB/Swiss-Prot:P00561,Genbank:NP_414543.1,ASAP:ABE-0000008,ECOCYC:EG10998,EcoGene:EG10998,GeneID:945803;Name=NP_414543.1;gbkey=CDS;locus_tag=b0002;orig_transcript_id=gnl|b0002|mrna.b0002;product=fused aspartate kinase/homoserine dehydrogenase 1;transl_table=11\n-NC_000913.3\tRefSeq\tgene\t2801\t3733\t.\t+\t.\tID=gene-b0003;geneID=gene-b0003;gene_name=thrB;Dbxref=ASAP:ABE-0000010,ECOCYC:EG10999,EcoGene:EG10999,GeneID:947498;Name=thrB;gbkey=Gene;gene_biotype=protein_coding;gene_synonym=ECK0003;locus_tag=b0003;protein_id=NP_414544.1\n-NC_000913.3\tRefSeq\tCDS\t2801\t3733\t.\t+\t0\tParent=gene-b0003;Dbxref=UniProtKB/Swiss-Prot:P00547,Genbank:NP_414544.1,ASAP:ABE-0000010,ECOCYC:EG10999,EcoGene:EG10999,GeneID:947498;Name=NP_414544.1;gbkey=CDS;locus_tag=b0003;orig_transcript_id=gnl|b0003|mrna.b0003;product=homoserine kinase;transl_table=11\n-NC_000913.3\tRefSeq\tgene\t3734\t5020\t.\t+\t.\tID=gene-b0004;geneID=gene-b0004;gene_name=thrC;Dbxref=ASAP:ABE-0000012,ECOCYC:EG11000,EcoGene:EG11000,GeneID:945198;Name=thrC;gbkey=Gene;gene_biotype=protein_coding;gene_synonym=ECK0004;locus_tag=b0004;protein_id=NP_414545.1\n-NC_000913.3\tRefSeq\tCDS\t3734\t5020\t.\t+\t0\tParent=gene-b0004;Dbxref=UniProtKB/Swiss-Prot:P00934,Genbank:NP_414545.1,ASAP:ABE-0000012,ECOCYC:EG11000,EcoGene:EG11000,GeneID:945198;Name=NP_414545.1;gbkey=CDS;locus_tag=b0004;orig_transcript_id=gnl|b0004|mrna.b0004;product=threonine synthase;transl_table=11\n-NC_000913.3\tRefSeq\tgene\t5234\t5530\t.\t+\t.\tID=gene-b0005;geneID=gene-b0005;gene_name=yaaX;Dbxref=ASAP:ABE-0000015,ECOCYC:G6081,EcoGene:EG14384,GeneID:944747;Name=yaaX;gbkey=Gene;gene_biotype=protein_coding;gene_synonym=ECK0005;locus_tag=b0005;protein_id=NP_414546.1\n-NC_000913.3\tRefSeq\tCDS\t5234\t5530\t.\t+\t0\tParent=gene-b0005;Dbxref=UniProtKB/Swiss-Prot:P75616,Genbank:NP_414546.1,ASAP:ABE-0000015,ECOCYC:G6081,EcoGene:EG14384,GeneID:944747;Name=NP_414546.1;gbkey=CDS;locus_tag=b0005;orig_transcript_id=gnl|b0005|mrna.b0005;product=DUF2502 domain-containing protein YaaX;transl_table=11\n-NC_000913.3\tRefSeq\tgene\t5683\t6459\t.\t-\t.\tID=gene-b0006;geneID=gene-b0006;gene_name=yaaA;Dbxref=ASAP:ABE-0000018,ECOCYC:EG10011,EcoGene:EG10011,GeneID:944749;Name=yaaA;gbkey=Gene;gene_biotype=protein_coding;gene_synonym=ECK0006;locus_tag=b0006;protein_id=NP_414547.1\n-NC_000913.3\tRefSeq\tCDS\t5683\t6459\t.\t-\t0\tParent=gene-b0006;Dbxref=UniProtKB/Swiss-Prot:P0A8I3,Genbank:NP_414547.1,ASAP:ABE-0000018,ECOCYC:EG10011,EcoGene:EG10011,GeneID:944749;Name=NP_414547.1;gbkey=CDS;locus_tag=b0006;orig_transcript_id=gnl|b0006|mrna.b0006;product=peroxide stress resistance protein YaaA;transl_table=11\n-NC_000913.3\tRefSeq\tgene\t6529\t7959\t.\t-\t.\tID=gene-b0007;geneID=gene-b0007;gene_name=yaaJ;Dbxref=ASAP:ABE-0000020,ECOCYC:EG11555,EcoGene:EG11555,GeneID:944745;Name=yaaJ;gbkey=Gene;gene_biotype=protein_coding;gene_synonym=ECK0007;locus_tag=b0007;protein_id='..b'306\t9893\t.\t+\t0\tParent=gene-b0009;Dbxref=UniProtKB/Swiss-Prot:P0AF03,Genbank:NP_414550.1,ASAP:ABE-0000030,ECOCYC:EG11511,EcoGene:EG11511,GeneID:944760;Name=NP_414550.1;gbkey=CDS;gene=mog;locus_tag=b0009;orig_transcript_id=gnl|b0009|mrna.b0009;product=molybdopterin adenylyltransferase;protein_id=NP_414550.1;transl_table=11\n+NC_000913.3\tRefSeq\tgene\t9928\t10494\t.\t-\t.\tID=gene-b0010;geneID=gene-b0010;gene_name=satP;Dbxref=ASAP:ABE-0000032,ECOCYC:EG11512,EcoGene:EG11512,GeneID:944792;Name=satP;gbkey=Gene;gene=satP;gene_biotype=protein_coding;gene_synonym=ECK0010,yaaH;locus_tag=b0010\n+NC_000913.3\tRefSeq\tCDS\t9928\t10494\t.\t-\t0\tParent=gene-b0010;Dbxref=UniProtKB/Swiss-Prot:P0AC98,Genbank:NP_414551.1,ASAP:ABE-0000032,ECOCYC:EG11512,EcoGene:EG11512,GeneID:944792;Name=NP_414551.1;gbkey=CDS;gene=satP;locus_tag=b0010;orig_transcript_id=gnl|b0010|mrna.b0010;product=acetate/succinate:H(+) symporter;protein_id=NP_414551.1;transl_table=11\n+NC_000913.3\tRefSeq\tgene\t10643\t11356\t.\t-\t.\tID=gene-b0011;geneID=gene-b0011;gene_name=yaaW;Dbxref=ASAP:ABE-0000037,ECOCYC:G6082,EcoGene:EG14340,GeneID:944771;Name=yaaW;gbkey=Gene;gene=yaaW;gene_biotype=protein_coding;gene_synonym=ECK0011;locus_tag=b0011\n+NC_000913.3\tRefSeq\tCDS\t10643\t11356\t.\t-\t0\tParent=gene-b0011;Dbxref=UniProtKB/Swiss-Prot:P75617,Genbank:NP_414552.1,ASAP:ABE-0000037,ECOCYC:G6082,EcoGene:EG14340,GeneID:944771;Name=NP_414552.1;gbkey=CDS;gene=yaaW;locus_tag=b0011;orig_transcript_id=gnl|b0011|mrna.b0011;product=putative enzyme-specific chaperone YaaW;protein_id=NP_414552.1;transl_table=11\n+NC_000913.3\tRefSeq\tgene\t10830\t11315\t.\t+\t.\tID=gene-b0012;geneID=gene-b0012;gene_name=mbiA;Dbxref=ASAP:ABE-0000040,ECOCYC:EG11509,EcoGene:EG11509,GeneID:948295;Name=mbiA;gbkey=Gene;gene=mbiA;gene_biotype=protein_coding;gene_synonym=ECK0012,htgA,htpY;locus_tag=b0012\n+NC_000913.3\tRefSeq\tCDS\t10830\t11315\t.\t+\t0\tParent=gene-b0012;Dbxref=UniProtKB/Swiss-Prot:P28697,Genbank:YP_009518733.1,ASAP:ABE-0000040,ECOCYC:EG11509,EcoGene:EG11509,GeneID:948295;Name=YP_009518733.1;gbkey=CDS;gene=mbiA;locus_tag=b0012;orig_transcript_id=gnl|b0012|mrna.CDS13;product=uncharacterized protein MbiA;protein_id=YP_009518733.1;transl_table=11\n+NC_000913.3\tRefSeq\tgene\t11382\t11786\t.\t-\t.\tID=gene-b0013;geneID=gene-b0013;gene_name=yaaI;Dbxref=ASAP:ABE-0000043,ECOCYC:G8202,EcoGene:EG11513,GeneID:944751;Name=yaaI;gbkey=Gene;gene=yaaI;gene_biotype=protein_coding;gene_synonym=ECK0013;locus_tag=b0013\n+NC_000913.3\tRefSeq\tCDS\t11382\t11786\t.\t-\t0\tParent=gene-b0013;Dbxref=UniProtKB/Swiss-Prot:P28696,Genbank:NP_414554.1,ASAP:ABE-0000043,ECOCYC:G8202,EcoGene:EG11513,GeneID:944751;Name=NP_414554.1;gbkey=CDS;gene=yaaI;locus_tag=b0013;orig_transcript_id=gnl|b0013|mrna.b0013;product=DUF2541 domain-containing protein YaaI;protein_id=NP_414554.1;transl_table=11\n+NC_000913.3\tRefSeq\tgene\t12163\t14079\t.\t+\t.\tID=gene-b0014;geneID=gene-b0014;gene_name=dnaK;Dbxref=ASAP:ABE-0000052,ECOCYC:EG10241,EcoGene:EG10241,GeneID:944750;Name=dnaK;gbkey=Gene;gene=dnaK;gene_biotype=protein_coding;gene_synonym=ECK0014,groPAB,groPC,groPF,grpC,grpF,seg;locus_tag=b0014\n+NC_000913.3\tRefSeq\tCDS\t12163\t14079\t.\t+\t0\tParent=gene-b0014;Dbxref=UniProtKB/Swiss-Prot:P0A6Y8,Genbank:NP_414555.1,ASAP:ABE-0000052,ECOCYC:EG10241,EcoGene:EG10241,GeneID:944750;Name=NP_414555.1;gbkey=CDS;gene=dnaK;locus_tag=b0014;orig_transcript_id=gnl|b0014|mrna.b0014;product=chaperone protein DnaK;protein_id=NP_414555.1;transl_table=11\n+NC_000913.3\tRefSeq\tgene\t14168\t15298\t.\t+\t.\tID=gene-b0015;geneID=gene-b0015;gene_name=dnaJ;Dbxref=ASAP:ABE-0000054,ECOCYC:EG10240,EcoGene:EG10240,GeneID:944753;Name=dnaJ;gbkey=Gene;gene=dnaJ;gene_biotype=protein_coding;gene_synonym=ECK0015,groP,grpC;locus_tag=b0015\n+NC_000913.3\tRefSeq\tCDS\t14168\t15298\t.\t+\t0\tParent=gene-b0015;Dbxref=UniProtKB/Swiss-Prot:P08622,Genbank:NP_414556.1,ASAP:ABE-0000054,ECOCYC:EG10240,EcoGene:EG10240,GeneID:944753;Name=NP_414556.1;gbkey=CDS;gene=dnaJ;locus_tag=b0015;orig_transcript_id=gnl|b0015|mrna.b0015;product=chaperone protein DnaJ;protein_id=NP_414556.1;transl_table=11\n'