Repository 'fml_gff3togtf'
hg clone https://toolshed.g2.bx.psu.edu/repos/vipints/fml_gff3togtf

Changeset 3:ff2c2e6f4ab3 (2014-06-11)
Previous changeset 2:db3c67b03d55 (2014-06-10) Next changeset 4:619e0fcd9126 (2014-06-11)
Commit message:
Uploaded version 2.0.0 of gfftools ready to import to local instance
added:
GFFtools-GX/GFFParser.py
GFFtools-GX/README
GFFtools-GX/bed_to_gff.py
GFFtools-GX/bed_to_gff.xml
GFFtools-GX/gbk_to_gff.py
GFFtools-GX/gbk_to_gff.xml
GFFtools-GX/gff_to_bed.py
GFFtools-GX/gff_to_bed.xml
GFFtools-GX/gff_to_gtf.py
GFFtools-GX/gff_to_gtf.xml
GFFtools-GX/gtf_to_gff.py
GFFtools-GX/gtf_to_gff.xml
GFFtools-GX/helper.py
GFFtools-GX/test-data/AceView_gff3_to_gtf.gtf
GFFtools-GX/test-data/AceView_ncbi_37.gff3
GFFtools-GX/test-data/AceView_ncbi_37.gtf
GFFtools-GX/test-data/Aly_JGI.bed
GFFtools-GX/test-data/Aly_JGI.gff3
GFFtools-GX/test-data/ENSEMBL_mm9.gff3
GFFtools-GX/test-data/ENSEMBL_mm9.gtf
GFFtools-GX/test-data/ENSEMBL_mm9_gff3_to_gtf.gtf
GFFtools-GX/test-data/JGI_genes.gff3
GFFtools-GX/test-data/JGI_genes.gtf
GFFtools-GX/test-data/MB7_3R.bed
GFFtools-GX/test-data/MB7_3R.gff3
GFFtools-GX/test-data/UCSC_transcripts.gff3
GFFtools-GX/test-data/UCSC_transcripts.gtf
GFFtools-GX/test-data/ccds_genes.bed
GFFtools-GX/test-data/ccds_genes.gff3
GFFtools-GX/test-data/hs_2009.bed
GFFtools-GX/test-data/hs_2009.gff3
GFFtools-GX/test-data/s_cerevisiae_SCU49845.gbk
GFFtools-GX/test-data/s_cerevisiae_SCU49845.gff3
GFFtools-GX/tool_conf.xml.sample
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/GFFParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/GFFParser.py Wed Jun 11 16:29:25 2014 -0400
[
b'@@ -0,0 +1,491 @@\n+#!/usr/bin/env python\n+"""\n+Extract genome annotation from a GFF (a tab delimited format for storing sequence features and annotations) file.\n+\n+Requirements: \n+    Numpy :- http://numpy.org/ \n+    Scipy :- http://scipy.org/ \n+\n+Copyright (C)\t\n+\n+2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany. \n+2012-2014 Memorial Sloan Kettering Cancer Center, New York City, USA.\n+"""\n+\n+import re\n+import os\n+import sys\n+import urllib\n+import numpy as np\n+import scipy.io as sio\n+from collections import defaultdict\n+import helper as utils \n+\n+def attribute_tags(col9):\n+    """ \n+    Split the key-value tags from the attribute column, it takes column number 9 from GTF/GFF file \n+\n+    @args col9: attribute column from GFF file \n+    @type col9: str\n+    """\n+    info = defaultdict(list)\n+    is_gff = False\n+    \n+    if not col9:\n+        return is_gff, info\n+        \n+    # trim the line ending semi-colon  ucsc may have some white-space  \n+    col9 = col9.rstrip(\';| \')\n+    # attributes from 9th column \n+    atbs = col9.split(" ; ")\n+    if len(atbs) == 1:\n+        atbs = col9.split("; ")\n+        if len(atbs) == 1:\n+            atbs = col9.split(";")\n+    # check the GFF3 pattern which has key value pairs like:\n+    gff3_pat = re.compile("\\w+=")\n+    # sometime GTF have: gene_id uc002zkg.1;\n+    gtf_pat = re.compile("\\s?\\w+\\s")\n+\n+    key_vals = []\n+\n+    if gff3_pat.match(atbs[0]): # gff3 pattern \n+        is_gff = True\n+        key_vals = [at.split(\'=\') for at in atbs]\n+    elif gtf_pat.match(atbs[0]): # gtf pattern\n+        for at in atbs:\n+            key_vals.append(at.strip().split(" ",1))\n+    else:\n+        # to handle attribute column has only single value \n+        key_vals.append([\'ID\', atbs[0]])\n+    # get key, val items \n+    for item in key_vals:\n+        key, val = item\n+        # replace the double qoutes from feature identifier \n+        val = re.sub(\'"\', \'\', val)\n+        # replace the web formating place holders to plain text format \n+        info[key].extend([urllib.unquote(v) for v in val.split(\',\') if v])\n+\n+    return is_gff, info\n+                \n+def spec_features_keywd(gff_parts):\n+    """\n+    Specify the feature key word according to the GFF specifications\n+\n+    @args gff_parts: attribute field key \n+    @type gff_parts: str \n+    """\n+    for t_id in ["transcript_id", "transcriptId", "proteinId"]:\n+        try:\n+            gff_parts["info"]["Parent"] = gff_parts["info"][t_id]\n+            break\n+        except KeyError:\n+            pass\n+    for g_id in ["gene_id", "geneid", "geneId", "name", "gene_name", "genename"]:\n+        try:\n+            gff_parts["info"]["GParent"] = gff_parts["info"][g_id]\n+            break\n+        except KeyError:\n+            pass\n+    ## TODO key words\n+    for flat_name in ["Transcript", "CDS"]:\n+        if gff_parts["info"].has_key(flat_name):\n+            # parents\n+            if gff_parts[\'type\'] in [flat_name] or re.search(r\'transcript\', gff_parts[\'type\'], re.IGNORECASE):\n+                if not gff_parts[\'id\']:\n+                    gff_parts[\'id\'] = gff_parts[\'info\'][flat_name][0]\n+                    #gff_parts["info"]["ID"] = [gff_parts["id"]]\n+            # children \n+            elif gff_parts["type"] in ["intron", "exon", "three_prime_UTR",\n+                        "coding_exon", "five_prime_UTR", "CDS", "stop_codon",\n+                        "start_codon"]:\n+                gff_parts["info"]["Parent"] = gff_parts["info"][flat_name]\n+            break\n+    return gff_parts\n+\n+def Parse(ga_file):\n+    """\n+    Parsing GFF/GTF file based on feature relationship, it takes the input file.\n+\n+    @args ga_file: input file name \n+    @type ga_file: str \n+    """\n+    child_map = defaultdict(list)\n+    parent_map = dict()\n+\n+    ga_handle = utils.open_file(ga_file)\n+\n+    for rec in ga_handle:\n+        rec = rec.strip(\'\\n\\r\')\n+\n+        # skip empty line fasta identifier and commented line\n+        if not rec or rec[0'..b'        gene[g_cnt][\'polya_conf\'] = []\n+        gene[g_cnt][\'is_valid\'] = []\n+        gene[g_cnt][\'transcript_complete\'] = []\n+        gene[g_cnt][\'is_complete\'] = []\n+        gene[g_cnt][\'is_correctly_gff3_referenced\'] = \'\'\n+        gene[g_cnt][\'splicegraph\'] = []\n+        g_cnt += 1 \n+\n+    ## deleting empty gene records from the main array\n+    XPFLG=0\n+    for XP, ens in enumerate(gene):\n+        if ens[0]==0:\n+            XPFLG=1\n+            break\n+    \n+    if XPFLG==1:\n+        XQC = range(XP, len(gene)+1)\n+        gene = np.delete(gene, XQC)\n+\n+    return gene \n+\n+def NonetoemptyList(XS):\n+    """\n+    Convert a None type to empty list \n+\n+    @args XS: None type \n+    @type XS: str \n+    """\n+    return [] if XS is None else XS \n+\n+def create_missing_feature_type(p_feat, c_feat):\n+    """\n+    GFF/GTF file defines only child features. This function tries to create \n+    the parent feature from the information provided in the attribute column. \n+\n+    example: \n+    chr21   hg19_knownGene  exon    9690071 9690100 0.000000        +       .       gene_id "uc002zkg.1"; transcript_id "uc002zkg.1"; \n+    chr21   hg19_knownGene  exon    9692178 9692207 0.000000        +       .       gene_id "uc021wgt.1"; transcript_id "uc021wgt.1"; \n+    chr21   hg19_knownGene  exon    9711935 9712038 0.000000        +       .       gene_id "uc011abu.2"; transcript_id "uc011abu.2"; \n+\n+    This function gets the parsed feature annotations. \n+    \n+    @args p_feat: Parent feature map  \n+    @type p_feat: collections defaultdict\n+    @args c_feat: Child feature map  \n+    @type c_feat: collections defaultdict\n+    """\n+\n+    child_n_map = defaultdict(list)\n+    for fid, det in c_feat.items():\n+        # get the details from grand child  \n+        GID = STRD = SCR = None\n+        SPOS, EPOS = [], [] \n+        TYP = dict()\n+        for gchild in det:\n+            GID = gchild.get(\'gene_id\', [\'\'])[0] \n+            SPOS.append(gchild.get(\'location\', [])[0]) \n+            EPOS.append(gchild.get(\'location\', [])[1]) \n+            STRD = gchild.get(\'strand\', \'\')\n+            SCR = gchild.get(\'score\', \'\')\n+            TYP[gchild.get(\'type\', \'\')] = 1\n+        SPOS.sort() \n+        EPOS.sort()\n+        \n+        # infer transcript type\n+        transcript_type = \'transcript\'\n+        transcript_type = \'mRNA\' if TYP.get(\'CDS\', \'\') or TYP.get(\'cds\', \'\') else transcript_type\n+        \n+        # gene id and transcript id are same\n+        transcript_id = fid[-1]\n+        if GID == transcript_id:\n+            transcript_id = \'Transcript:\' + str(GID)\n+        \n+        # level -1 feature type \n+        p_feat[(fid[0], fid[1], GID)] = dict( type = \'gene\',\n+                                            location = [], ## infer location based on multiple transcripts  \n+                                            strand = STRD,\n+                                            name = GID )\n+        # level -2 feature type \n+        child_n_map[(fid[0], fid[1], GID)].append(\n+                                            dict( type = transcript_type,\n+                                            location =  [SPOS[0], EPOS[-1]], \n+                                            strand = STRD, \n+                                            score = SCR, \n+                                            ID = transcript_id,\n+                                            gene_id = \'\' ))\n+        # reorganizing the grand child\n+        for gchild in det:\n+            child_n_map[(fid[0], fid[1], transcript_id)].append(\n+                                            dict( type = gchild.get(\'type\', \'\'),\n+                                            location =  gchild.get(\'location\'),\n+                                            strand = gchild.get(\'strand\'), \n+                                            ID = gchild.get(\'ID\'),\n+                                            score = gchild.get(\'score\'),\n+                                            gene_id = \'\' ))\n+    return p_feat, child_n_map \n+\n'
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/README Wed Jun 11 16:29:25 2014 -0400
[
@@ -0,0 +1,59 @@
+A collection of tools for converting genome annotation between GTF (Gene Transfer Format), 
+BED (Browser Extensible Data) and GFF (Generic Feature Format).
+
+INTRODUCTION
+
+Several genome annotation centers provide their data in GTF, BED, GFF3 etc. I have few programs 
+they mainly deals with converting between GTF, BED and GFF3 formats. They are extensively tested 
+with files from different centers like ENSEMBL, UCSC, JGI and NCBI AceView. Please follow the 
+instructions below to clone these tools into your galaxy instance.
+
+CONTENTS
+
+Tool configuration files in *.xml format. 
+
+    gtf_to_gff.xml
+    gff_to_gtf.xml
+    bed_to_gff.xml
+    gff_to_bed.xml
+    gbk_to_gff.xml
+
+Python based scripts. 
+
+    gtf_to_gff.py: convert data from GTF to valid GFF3.
+    gff_to_gtf.py: convert data from GFF3 to GTF.
+    bed_to_gff.py: convert data from a 12 column UCSC wiggle BED format to GFF3.
+    gff_to_bed.py: convert gene transcript annotation from GFF3 to UCSC wiggle 12 column BED format.
+    gbk_to_gff.py: convert data from genbank format to GFF. 
+    GFFParser.py: Parse GFF/GTF files.  
+    helper.py: Utility functions.
+
+test-data: Test data set. (move to your galaxy_root_folder/test-data/)
+    
+    You may need to move the test files into your test-data directory so galaxy can find them. 
+    If you want to run the functional tests eg as: 
+
+    exmaple: 
+    sh run_functional_tests.sh -id fml_gtf2gff
+
+REQUIREMENTS
+
+    python 
+
+COMMENTS/QUESTIONS 
+
+I can be reached at vipin [at] cbio.mskcc.org 
+
+LICENSE
+
+Copyright (C) 2009-2012 Friedrich Miescher Laboratory of the Max Planck Society
+              2013-2014 Memorial Sloan Kettering Cancer Center
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3 of the License, or
+(at your option) any later version.
+
+COURTESY
+
+To the Galaxy Team.
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/bed_to_gff.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/bed_to_gff.py Wed Jun 11 16:29:25 2014 -0400
[
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+"""
+Convert genome annotation data in a 12 column BED format to GFF3. 
+
+Usage: python bed_to_gff.py in.bed > out.gff
+
+Requirement:
+    helper.py : https://github.com/vipints/GFFtools-GX/blob/master/helper.py
+
+Copyright (C) 
+    2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany.
+    2012-2014 Memorial Sloan Kettering Cancer Center New York City, USA.
+"""
+
+import re
+import sys
+import helper 
+
+def __main__():
+    """
+    main function 
+    """
+
+    try:
+        bed_fname = sys.argv[1]
+    except:
+        print __doc__
+        sys.exit(-1)
+
+    bed_fh = helper.open_file(bed_fname)
+
+    for line in bed_fh: 
+        line = line.strip( '\n\r' )
+
+        if not line or line[0] in  ['#']:
+            continue 
+
+        parts = line.split('\t') 
+        assert len(parts) >= 12, line
+
+        rstarts = parts[-1].split(',')
+        rstarts.pop() if rstarts[-1] == '' else rstarts
+
+        exon_lens = parts[-2].split(',')
+        exon_lens.pop() if exon_lens[-1] == '' else exon_lens
+        
+        if len(rstarts) != len(exon_lens):
+            continue # checking the consistency col 11 and col 12 
+
+        if len(rstarts) != int(parts[-3]): 
+            continue # checking the number of exons and block count are same
+        
+        if not parts[5] in ['+', '-']:
+            parts[5] = '.' # replace the unknown strand with '.' 
+
+        # bed2gff result line 
+        print '%s\tbed2gff\tgene\t%d\t%s\t%s\t%s\t.\tID=Gene:%s;Name=Gene:%s' % (parts[0], int(parts[1])+1, parts[2], parts[4], parts[5], parts[3], parts[3])
+        print '%s\tbed2gff\ttranscript\t%d\t%s\t%s\t%s\t.\tID=%s;Name=%s;Parent=Gene:%s' % (parts[0], int(parts[1])+1, parts[2], parts[4], parts[5], parts[3], parts[3], parts[3])
+
+        st = int(parts[1])
+        for ex_cnt in range(int(parts[-3])):
+            start = st + int(rstarts[ex_cnt]) + 1
+            stop = start + int(exon_lens[ex_cnt]) - 1
+            print '%s\tbed2gff\texon\t%d\t%d\t%s\t%s\t.\tParent=%s' % (parts[0], start, stop, parts[4], parts[5], parts[3])
+
+    bed_fh.close()
+
+
+if __name__ == "__main__": 
+    __main__()
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/bed_to_gff.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/bed_to_gff.xml Wed Jun 11 16:29:25 2014 -0400
b
@@ -0,0 +1,89 @@
+<tool id="fml_bed2gff" name="BED-to-GFF" version="2.0.0">
+ <description>converter</description>
+ <command interpreter="python">bed_to_gff.py $inf_bed > $gff_format 
+ </command> 
+ <inputs>
+   <param format="bed" name="inf_bed" type="data" label="Convert this query" help="Provide genome annotation in 12 column BED format."/>
+    </inputs>
+   <outputs>
+   <data format="gff3" name="gff_format" label="${tool.name} on ${on_string}: Converted" /> 
+   </outputs>
+ <tests>
+        <test>
+                <param name="inf_bed" value="ccds_genes.bed" />
+                <output name="gff_format" file="ccds_genes.gff3" />
+        </test>
+        <test>
+                <param name="inf_bed" value="hs_2009.bed" />
+                <output name="gff_format" file="hs_2009.gff3" />
+        </test>
+        </tests>
+   <help>
+
+**What it does**
+
+This tool converts data from a 12 column UCSC wiggle BED format to GFF3 (scroll down for format description).
+
+--------
+
+**Example**
+
+- The following data in UCSC Wiggle BED format::
+
+ chr1    11873   14409   uc001aaa.3      0       +       11873   11873   0       3       354,109,1189,   0,739,1347,
+
+- Will be converted to GFF3::
+
+ ##gff-version 3
+ chr1    bed2gff gene    11874   14409   0       +       .       ID=Gene:uc001aaa.3;Name=Gene:uc001aaa.3
+ chr1    bed2gff transcript      11874   14409   0       +       .       ID=uc001aaa.3;Name=uc001aaa.3;Parent=Gene:uc001aaa.3
+ chr1    bed2gff exon    11874   12227   0       +       .       Parent=uc001aaa.3
+ chr1    bed2gff exon    12613   12721   0       +       .       Parent=uc001aaa.3
+ chr1    bed2gff exon    13221   14409   0       +       .       Parent=uc001aaa.3
+
+--------
+
+**About formats**
+
+**BED format** Browser Extensible Data format was designed at UCSC for displaying data tracks in the Genome Browser. It has three required fields and several additional optional ones:
+
+The first three BED fields (required) are::
+
+    1. chrom - The name of the chromosome (e.g. chr1, chrY_random).
+    2. chromStart - The starting position in the chromosome. (The first base in a chromosome is numbered 0.)
+    3. chromEnd - The ending position in the chromosome, plus 1 (i.e., a half-open interval).
+
+The additional BED fields (optional) are::
+
+    4. name - The name of the BED line.
+    5. score - A score between 0 and 1000.
+    6. strand - Defines the strand - either '+' or '-'.
+    7. thickStart - The starting position where the feature is drawn thickly at the Genome Browser.
+    8. thickEnd - The ending position where the feature is drawn thickly at the Genome Browser.
+    9. reserved - This should always be set to zero.
+   10. blockCount - The number of blocks (exons) in the BED line.
+   11. blockSizes - A comma-separated list of the block sizes. The number of items in this list should correspond to blockCount.
+   12. blockStarts - A comma-separated list of block starts. All of the blockStart positions should be calculated relative to chromStart. The number of items in this list should correspond to blockCount.
+
+**GFF3 format** General Feature Format is a format for describing genes and other features associated with DNA, RNA and Protein sequences. GFF3 lines have nine tab-separated fields::
+
+    1. seqid - Must be a chromosome or scaffold or contig.
+    2. source - The program that generated this feature.
+    3. type - The name of this type of feature. Some examples of standard feature types are "gene", "CDS", "protein", "mRNA", and "exon". 
+    4. start - The starting position of the feature in the sequence. The first base is numbered 1.
+    5. stop - The ending position of the feature (inclusive).
+    6. score - A score between 0 and 1000. If there is no score value, enter ".".
+    7. strand - Valid entries include '+', '-', or '.' (for don't know/care).
+    8. phase - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. If the feature is not a coding exon, the value should be '.'.
+    9. attributes - All lines with the same group are linked together into a single item.
+
+--------
+
+**Copyright**
+
+2009-2014 Max Planck Society, University of Tübingen &amp; Memorial Sloan Kettering Cancer Center
+
+Sreedharan VT, Schultheiss SJ, Jean G, Kahles A, Bohnert R, Drewe P, Mudrakarta P, Görnitz N, Zeller G, Rätsch G. Oqtans: the RNA-seq workbench in the cloud for complete and reproducible quantitative transcriptome analysis. Bioinformatics 10.1093/bioinformatics/btt731 (2014)
+
+ </help>
+</tool>
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/gbk_to_gff.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/gbk_to_gff.py Wed Jun 11 16:29:25 2014 -0400
[
@@ -0,0 +1,213 @@
+#!/usr/bin/env python
+"""
+Convert data from Genbank format to GFF. 
+
+Usage: 
+python gbk_to_gff.py in.gbk > out.gff 
+
+Requirements:
+    BioPython:- http://biopython.org/
+    helper.py : https://github.com/vipints/GFFtools-GX/blob/master/helper.py
+
+Copyright (C) 
+    2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany.
+    2012-2014 Memorial Sloan Kettering Cancer Center New York City, USA.
+"""
+
+import os
+import re
+import sys
+import collections
+from Bio import SeqIO
+import helper 
+
+def feature_table(chr_id, source, orient, genes, transcripts, cds, exons, unk):
+    """
+    Write the feature information
+    """
+
+    for gname, ginfo in genes.items():
+        line = [str(chr_id), 
+                'gbk_to_gff',
+                ginfo[3],
+                str(ginfo[0]),
+                str(ginfo[1]),
+                '.',
+                ginfo[2],
+                '.',
+                'ID=%s;Name=%s' % (str(gname), str(gname))]
+        print '\t'.join(line) 
+        ## construct the transcript line is not defined in the original file 
+        t_line = [str(chr_id), 'gbk_to_gff', source, 0, 1, '.', ginfo[2], '.'] 
+
+        if not transcripts:
+            t_line.append('ID=Transcript:%s;Parent=%s' % (str(gname), str(gname)))
+
+            if exons: ## get the entire transcript region  from the defined feature
+                t_line[3] = str(exons[gname][0][0])
+                t_line[4] = str(exons[gname][0][-1])
+            elif cds:
+                t_line[3] = str(cds[gname][0][0])
+                t_line[4] = str(cds[gname][0][-1])
+            print '\t'.join(t_line) 
+
+            if exons:
+                exon_line_print(t_line, exons[gname], 'Transcript:'+str(gname), 'exon')
+
+            if cds:
+                exon_line_print(t_line, cds[gname], 'Transcript:'+str(gname), 'CDS')
+                if not exons:
+                    exon_line_print(t_line, cds[gname], 'Transcript:'+str(gname), 'exon')
+
+        else: ## transcript is defined 
+            for idx in transcripts[gname]: 
+                t_line[2] = idx[3]
+                t_line[3] = str(idx[0])
+                t_line[4] = str(idx[1])
+                t_line.append('ID='+str(idx[2])+';Parent='+str(gname))
+                print '\t'.join(t_line) 
+                
+                ## feature line print call 
+                if exons:
+                    exon_line_print(t_line, exons[gname], str(idx[2]), 'exon')
+                if cds:
+                    exon_line_print(t_line, cds[gname], str(idx[2]), 'CDS')
+                    if not exons:
+                        exon_line_print(t_line, cds[gname], str(idx[2]), 'exon')
+
+    if len(genes) == 0: ## feature entry with fragment information 
+        
+        line = [str(chr_id), 'gbk_to_gff', source, 0, 1, '.', orient, '.'] 
+        fStart = fStop = None 
+
+        for eid, ex in cds.items(): 
+            fStart = ex[0][0] 
+            fStop = ex[0][-1]
+
+        for eid, ex in exons.items(): 
+            fStart = ex[0][0] 
+            fStop = ex[0][-1]
+
+        if fStart or fStart:
+
+            line[2] = 'gene'
+            line[3] = str(fStart)
+            line[4] = str(fStop)
+            line.append('ID=Unknown_Gene_' + str(unk) + ';Name=Unknown_Gene_' + str(unk))
+            print "\t".join(line)
+
+            if not cds:
+                line[2] = 'transcript'
+            else:
+                line[2] = 'mRNA'
+
+            line[8] = 'ID=Unknown_Transcript_' + str(unk) + ';Parent=Unknown_Gene_' + str(unk)
+            print "\t".join(line)
+           
+            if exons:
+                exon_line_print(line, cds[None], 'Unknown_Transcript_' + str(unk), 'exon')
+                
+            if cds:
+                exon_line_print(line, cds[None], 'Unknown_Transcript_' + str(unk), 'CDS')
+                if not exons:
+                    exon_line_print(line, cds[None], 'Unknown_Transcript_' + str(unk), 'exon')
+                
+            unk +=1 
+
+    return unk
+
+def exon_line_print(temp_line, trx_exons, parent, ftype):
+    """
+    Print the EXON feature line 
+    """
+
+    for ex in trx_exons:
+        temp_line[2] = ftype
+        temp_line[3] = str(ex[0])
+        temp_line[4] = str(ex[1])
+        temp_line[8] = 'Parent=%s' % parent
+        print '\t'.join(temp_line)
+
+def gbk_parse(fname):
+    """
+    Extract genome annotation recods from genbank format 
+
+    @args fname: gbk file name 
+    @type fname: str
+    """
+
+    fhand = helper.open_file(gbkfname)
+    unk = 1 
+
+    for record in SeqIO.parse(fhand, "genbank"):
+
+        gene_tags = dict()
+        tx_tags = collections.defaultdict(list) 
+        exon = collections.defaultdict(list) 
+        cds = collections.defaultdict(list) 
+        mol_type, chr_id = None, None 
+
+        for rec in record.features:
+
+            if rec.type == 'source':
+                try:
+                    mol_type = rec.qualifiers['mol_type'][0]
+                except:
+                    mol_type = '.'
+                    pass 
+                try:
+                    chr_id = rec.qualifiers['chromosome'][0]
+                except:
+                    chr_id = record.name 
+                continue 
+
+            strand='-'
+            strand='+' if rec.strand>0 else strand
+            
+            fid = None 
+            try:
+                fid = rec.qualifiers['gene'][0]
+            except:
+                pass
+
+            transcript_id = None
+            try:
+                transcript_id = rec.qualifiers['transcript_id'][0]
+            except:
+                pass 
+
+            if re.search(r'gene', rec.type):
+                gene_tags[fid] = (rec.location._start.position+1, 
+                                    rec.location._end.position, 
+                                    strand,
+                                    rec.type
+                                    )
+            elif rec.type == 'exon':
+                exon[fid].append((rec.location._start.position+1, 
+                                    rec.location._end.position))
+            elif rec.type=='CDS':
+                cds[fid].append((rec.location._start.position+1, 
+                                    rec.location._end.position))
+            else: 
+                # get all transcripts 
+                if transcript_id: 
+                    tx_tags[fid].append((rec.location._start.position+1,
+                                    rec.location._end.position, 
+                                    transcript_id,
+                                    rec.type))
+        # record extracted, generate feature table
+        unk = feature_table(chr_id, mol_type, strand, gene_tags, tx_tags, cds, exon, unk)
+        
+    fhand.close()
+
+
+if __name__=='__main__': 
+
+    try:
+        gbkfname = sys.argv[1]
+    except:
+        print __doc__
+        sys.exit(-1)
+
+    ## extract gbk records  
+    gbk_parse(gbkfname) 
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/gbk_to_gff.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/gbk_to_gff.xml Wed Jun 11 16:29:25 2014 -0400
b
@@ -0,0 +1,91 @@
+<tool id="fml_gbk2gff" name="GBK-to-GFF" version="2.0.0">
+  <description>converter</description>
+   <command interpreter="python">gbk_to_gff.py $inf_gbk > $gff_format
+   </command>
+   <inputs>
+ <param format="gb,gbk,genbank,txt" name="inf_gbk" type="data" label="Convert this query" help="GenBank flat file format consists of an annotation section and a sequence section."/>
+   </inputs>
+ <outputs>
+ <data format="gff3" name="gff_format" label="${tool.name} on ${on_string}: Converted"/>
+   </outputs>
+ <tests>
+        <test>
+                <param name="inf_gbk" value="s_cerevisiae_SCU49845.gbk" />
+                <output name="gff_format" file="s_cerevisiae_SCU49845.gff3" />
+        </test>
+ </tests>
+   <help>
+    
+**What it does**
+
+This tool converts data from a GenBank_ flat file format to GFF (scroll down for format description).
+
+.. _GenBank: http://www.ncbi.nlm.nih.gov/genbank/ 
+
+------
+
+**Example**
+
+- The following data in GenBank format::
+
+    LOCUS       NM_001202705            2406 bp    mRNA    linear   PLN 28-MAY-2011
+    DEFINITION  Arabidopsis thaliana thiamine biosynthesis protein ThiC (THIC)
+                mRNA, complete cds.
+    ACCESSION   NM_001202705
+    VERSION     NM_001202705.1  GI:334184566.........
+    FEATURES             Location/Qualifiers
+         source          1..2406
+                         /organism="Arabidopsis thaliana"
+                         /mol_type="mRNA"
+                         /db_xref="taxon:3702"........
+         gene            1..2406
+                         /gene="THIC"
+                         /locus_tag="AT2G29630"
+                         /gene_synonym="PY; PYRIMIDINE REQUIRING; T27A16.27;........
+    ORIGIN
+        1 aagcctttcg ctttaggctg cattgggccg tgacaatatt cagacgattc aggaggttcg
+        61 ttcctttttt aaaggaccct aatcactctg agtaccactg actcactcag tgtgcgcgat
+        121 tcatttcaaa aacgagccag cctcttcttc cttcgtctac tagatcagat ccaaagcttc
+        181 ctcttccagc tatggctgct tcagtacact gtaccttgat gtccgtcgta tgcaacaaca
+    //
+
+
+- Will be converted to GFF3::
+
+    ##gff-version 3
+    NM_001202705    gbk_to_gff chromosome      1       2406    .       +       1       ID=NM_001202705;Alias=2;Dbxref=taxon:3702;Name=NM_001202705
+    NM_001202705    gbk_to_gff gene    1       2406    .       +       1       ID=AT2G29630;Dbxref=GeneID:817513,TAIR:AT2G29630;Name=THIC
+    NM_001202705    gbk_to_gff mRNA    192     2126    .       +       1       ID=AT2G29630.t01;Parent=AT2G29630
+    NM_001202705    gbk_to_gff CDS     192     2126    .       +       1       ID=AT2G29630.p01;Parent=AT2G29630.t01
+    NM_001202705    gbk_to_gff exon    192     2126    .       +       1       Parent=AT2G29630.t01
+
+------
+
+**About formats** 
+
+**GenBank format** An example of a GenBank record may be viewed here_
+
+.. _here: http://www.ncbi.nlm.nih.gov/Sitemap/samplerecord.html 
+
+**GFF3** Generic Feature Format is a format for describing genes and other features associated with DNA, RNA and Protein sequences. GFF3 lines have nine tab-separated fields::
+
+    1. seqid - Must be a chromosome or scaffold or contig.
+    2. source - The program that generated this feature.
+    3. type - The name of this type of feature. Some examples of standard feature types are "gene", "CDS", "protein", "mRNA", and "exon".
+    4. start - The starting position of the feature in the sequence. The first base is numbered 1.
+    5. stop - The ending position of the feature (inclusive).
+    6. score - A score between 0 and 1000. If there is no score value, enter ".".
+    7. strand - Valid entries include '+', '-', or '.' (for don't know/care).
+    8. phase - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. If the feature is not a coding exon, the value should be '.'.
+    9. attributes - All lines with the same group are linked together into a single item.
+
+--------
+
+**Copyright**
+
+2009-2014 Max Planck Society, University of Tübingen &amp; Memorial Sloan Kettering Cancer Center
+
+Sreedharan VT, Schultheiss SJ, Jean G, Kahles A, Bohnert R, Drewe P, Mudrakarta P, Görnitz N, Zeller G, Rätsch G. Oqtans: the RNA-seq workbench in the cloud for complete and reproducible quantitative transcriptome analysis. Bioinformatics 10.1093/bioinformatics/btt731 (2014)
+
+ </help>
+</tool>
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/gff_to_bed.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/gff_to_bed.py Wed Jun 11 16:29:25 2014 -0400
[
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+"""
+Convert genome annotation data in GFF/GTF to a 12 column BED format. 
+BED format typically represents the transcript models. 
+
+Usage: python gff_to_bed.py in.gff > out.bed  
+
+Requirement:
+    GFFParser.py: https://github.com/vipints/GFFtools-GX/blob/master/GFFParser.py    
+
+Copyright (C) 
+    2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany.
+    2012-2014 Memorial Sloan Kettering Cancer Center New York City, USA.
+"""
+
+import re
+import sys
+import GFFParser
+
+def writeBED(tinfo):
+    """
+    writing result files in bed format 
+
+    @args tinfo: list of genes 
+    @args tinfo: numpy object  
+    """
+
+    for ent1 in tinfo:
+        for idx, tid in enumerate(ent1['transcripts']):
+            exon_cnt = len(ent1['exons'][idx])
+            exon_len = ''
+            exon_cod = '' 
+            rel_start = None 
+            rel_stop = None 
+            for idz, ex_cod in enumerate(ent1['exons'][idx]):#check for exons of corresponding transcript  
+                exon_len += '%d,' % (ex_cod[1]-ex_cod[0]+1)
+                if idz == 0: #calculate the relative start position 
+                    exon_cod += '0,'
+                    rel_start = int(ex_cod[0])
+                    rel_stop = ex_cod[1]
+                else:
+                    exon_cod += '%d,' % (ex_cod[0]-rel_start)
+                    rel_stop = int(ex_cod[1])
+            
+            if exon_len:
+                score = '0' 
+                score = ent1['score'][0] if ent1['score'] else score
+                out_print = [ent1['chr'],
+                            str(rel_start),
+                            str(rel_stop),
+                            tid[0],
+                            score, 
+                            ent1['strand'], 
+                            str(rel_start),
+                            str(rel_stop),
+                            '0',
+                            str(exon_cnt),
+                            exon_len,
+                            exon_cod]
+                print '\t'.join(out_print)  
+    
+def __main__():
+    try:
+        query_file = sys.argv[1]
+    except:
+        print __doc__
+        sys.exit(-1)
+
+    Transcriptdb = GFFParser.Parse(query_file)  
+    writeBED(Transcriptdb)
+
+if __name__ == "__main__": 
+    __main__() 
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/gff_to_bed.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/gff_to_bed.xml Wed Jun 11 16:29:25 2014 -0400
b
@@ -0,0 +1,90 @@
+<tool id="fml_gff2bed" name="GFF-to-BED" version="2.0.0">
+ <description>converter</description> 
+ <command interpreter="python">gff_to_bed.py $inf_gff > $bed_format 
+ </command> 
+ <inputs>
+   <param format="gtf,gff,gff3" name="inf_gff" type="data" label="Convert this query" help="Provide genome annotation file in GFF, GTF, GFF3."/>
+    </inputs>
+   <outputs>
+   <data format="bed" name="bed_format" label="${tool.name} on ${on_string}: Converted" /> 
+   </outputs>
+ <tests>
+        <test>
+                <param name="inf_gff" value="Aly_JGI.gff3" />
+                <output name="bed_format" file="Aly_JGI.bed" />
+        </test>
+        <test>
+                <param name="inf_gff" value="MB7_3R.gff3" />
+                <output name="bed_format" file="MB7_3R.bed" />
+        </test>
+        </tests>
+   <help>
+
+**What it does**
+
+This tool converts gene transcript annotation from GTF or GFF or GFF3 to UCSC wiggle 12 column BED format.
+
+--------
+
+**Example**
+
+- The following data in GFF3::
+
+ ##gff-version 3
+ chr1    protein_coding  gene    11874   14409   0       +       .       ID=Gene:uc001aaa.3;Name=Gene:uc001aaa.3
+ chr1    protein_coding  transcript      11874   14409   0       +       .       ID=uc001aaa.3;Name=uc001aaa.3;Parent=Gene:uc001aaa.3
+ chr1    protein_coding  exon    11874   12227   0       +       .       Parent=uc001aaa.3
+ chr1    protein_coding  exon    12613   12721   0       +       .       Parent=uc001aaa.3
+ chr1    protein_coding  exon    13221   14409   0       +       .       Parent=uc001aaa.3
+
+- Will be converted to UCSC Wiggle BED format::
+
+ chr1    11874   14409   uc001aaa.3      0       +       11874   14409   0       3       354,109,1189,   0,739,1347,
+
+--------
+
+**About formats**
+
+**GFF3 format** General Feature Format is a format for describing genes and other features associated with DNA, RNA and Protein sequences. GFF3 lines have nine tab-separated fields::
+
+
+    1. seqid - Must be a chromosome or scaffold or contig.
+    2. source - The program that generated this feature.
+    3. type - The name of this type of feature. Some examples of standard feature types are "gene", "CDS", "protein", "mRNA", and "exon". 
+    4. start - The starting position of the feature in the sequence. The first base is numbered 1.
+    5. stop - The ending position of the feature (inclusive).
+    6. score - A score between 0 and 1000. If there is no score value, enter ".".
+    7. strand - Valid entries include '+', '-', or '.' (for don't know/care).
+    8. phase - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. If the feature is not a coding exon, the value should be '.'.
+    9. attributes - All lines with the same group are linked together into a single item.
+
+**BED format** Browser Extensible Data format was designed at UCSC for displaying data tracks in the Genome Browser. It has three required fields and several additional optional ones:
+
+The first three BED fields (required) are::
+
+    1. chrom - The name of the chromosome (e.g. chr1, chrY_random).
+    2. chromStart - The starting position in the chromosome. (The first base in a chromosome is numbered 0.)
+    3. chromEnd - The ending position in the chromosome, plus 1 (i.e., a half-open interval).
+
+The additional BED fields (optional) are::
+
+    4. name - The name of the BED line.
+    5. score - A score between 0 and 1000.
+    6. strand - Defines the strand - either '+' or '-'.
+    7. thickStart - The starting position where the feature is drawn thickly at the Genome Browser.
+    8. thickEnd - The ending position where the feature is drawn thickly at the Genome Browser.
+    9. reserved - This should always be set to zero.
+   10. blockCount - The number of blocks (exons) in the BED line.
+   11. blockSizes - A comma-separated list of the block sizes. The number of items in this list should correspond to blockCount.
+   12. blockStarts - A comma-separated list of block starts. All of the blockStart positions should be calculated relative to chromStart. The number of items in this list should correspond to blockCount.
+
+--------
+
+**Copyright**
+
+2009-2014 Max Planck Society, University of Tübingen &amp; Memorial Sloan Kettering Cancer Center
+
+Sreedharan VT, Schultheiss SJ, Jean G, Kahles A, Bohnert R, Drewe P, Mudrakarta P, Görnitz N, Zeller G, Rätsch G. Oqtans: the RNA-seq workbench in the cloud for complete and reproducible quantitative transcriptome analysis. Bioinformatics 10.1093/bioinformatics/btt731 (2014)
+
+ </help>
+</tool>
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/gff_to_gtf.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/gff_to_gtf.py Wed Jun 11 16:29:25 2014 -0400
[
@@ -0,0 +1,76 @@
+#!/usr/bin/env python 
+"""
+Program to convert data from GFF to GTF 
+
+Usage: python gff_to_gtf.py in.gff > out.gtf 
+
+Requirement:
+    GFFParser.py: https://github.com/vipints/GFFtools-GX/blob/master/GFFParser.py    
+
+Copyright (C) 
+    2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany.
+    2012-2014 Memorial Sloan Kettering Cancer Center New York City, USA.
+"""
+
+import re
+import sys
+import GFFParser
+
+def printGTF(tinfo):
+    """
+    writing result file in GTF format
+
+    @args tinfo: parsed object from gff file
+    @type tinfo: numpy array 
+    """
+
+    for ent1 in tinfo:
+        for idx, tid in enumerate(ent1['transcripts']):
+            
+            exons = ent1['exons'][idx]
+            cds_exons = ent1['cds_exons'][idx]
+
+            stop_codon = start_codon = ()
+
+            if ent1['strand'] == '+':
+                if cds_exons.any():
+                    start_codon = (cds_exons[0][0], cds_exons[0][0]+2) 
+                    stop_codon = (cds_exons[-1][1]-2, cds_exons[-1][1]) 
+            elif ent1['strand'] == '-':
+                if cds_exons.any():
+                    start_codon = (cds_exons[-1][1]-2, cds_exons[-1][1])
+                    stop_codon = (cds_exons[0][0], cds_exons[0][0]+2)
+            else:
+                print 'STRAND information missing - %s, skip the transcript - %s' % (ent1['strand'], tid[0]) 
+                pass 
+                
+            last_cds_cod = 0 
+            for idz, ex_cod in enumerate(exons):
+
+                print '%s\t%s\texon\t%d\t%d\t.\t%s\t.\tgene_id "%s"; transcript_id "%s"; exon_number "%d"; gene_name "%s"; ' % (ent1['chr'], ent1['source'], ex_cod[0], ex_cod[1], ent1['strand'], ent1['name'], tid[0], idz+1, ent1['gene_info']['Name'])
+
+                if cds_exons.any():
+                    try:
+                        print '%s\t%s\tCDS\t%d\t%d\t.\t%s\t%d\tgene_id "%s"; transcript_id "%s"; exon_number "%d"; gene_name "%s"; ' % (ent1['chr'], ent1['source'], cds_exons[idz][0], cds_exons[idz][1], ent1['strand'], cds_exons[idz][2], ent1['name'], tid[0], idz+1, ent1['gene_info']['Name'])
+                        last_cds_cod = idz 
+                    except:
+                        pass 
+
+                    if idz == 0:
+                        print '%s\t%s\tstart_codon\t%d\t%d\t.\t%s\t%d\tgene_id "%s"; transcript_id "%s"; exon_number "%d"; gene_name "%s"; ' % (ent1['chr'], ent1['source'], start_codon[0], start_codon[1], ent1['strand'], cds_exons[idz][2], ent1['name'], tid[0], idz+1, ent1['gene_info']['Name'])
+
+            if stop_codon:
+                print '%s\t%s\tstop_codon\t%d\t%d\t.\t%s\t%d\tgene_id "%s"; transcript_id "%s"; exon_number "%d"; gene_name "%s"; ' % (ent1['chr'], ent1['source'], stop_codon[0], stop_codon[1], ent1['strand'], cds_exons[last_cds_cod][2], ent1['name'], tid[0], idz+1, ent1['gene_info']['Name'])
+
+    
+if __name__ == "__main__": 
+
+    try:
+        gff_fname = sys.argv[1]
+    except:
+        print __doc__
+        sys.exit(-1)
+
+    Transcriptdb = GFFParser.Parse(gff_fname)  
+
+    printGTF(Transcriptdb) 
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/gff_to_gtf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/gff_to_gtf.xml Wed Jun 11 16:29:25 2014 -0400
b
@@ -0,0 +1,88 @@
+<tool id="fml_gff2gtf" name="GFF-to-GTF" version="2.0.0">
+ <description>converter</description> 
+ <command interpreter="python">gff_to_gtf.py $inf_gff3 > $gtf_format
+ </command> 
+ <inputs>
+   <param format="gff3,gff" name="inf_gff3" type="data" label="Convert this query" help="Provide genome annotation file in GFF or GFF3."/>
+    </inputs>
+   <outputs>
+   <data format="gtf" name="gtf_format" label="${tool.name} on ${on_string}: Converted" /> 
+   </outputs>
+ <tests>
+        <test>
+                <param name="inf_gff3" value="AceView_ncbi_37.gff3" />
+                <output name="gtf_format" file="AceView_gff3_to_gtf.gtf" />
+        </test>
+        <test>
+                <param name="inf_gff3" value="ENSEMBL_mm9.gff3" />
+                <output name="gtf_format" file="ENSEMBL_mm9_gff3_to_gtf.gtf" />
+        </test>
+    </tests>
+   <help>
+
+**What it does**
+
+This tool converts data from GFF3 to GTF file format (scroll down for format description).
+
+--------
+
+**Example**
+
+- The following data in GFF3 format::
+
+ ##gff-version 3
+ 17      protein_coding  gene    7255208 7258258 .       +       .       ID=ENSG00000213859;Name=KCTD11
+ 17      protein_coding  mRNA    7255208 7258258 .       +       .       ID=ENST00000333751;Name=KCTD11-001;Parent=ENSG00000213859
+ 17      protein_coding  protein 7256262 7256960 .       +       .       ID=ENSP00000328352;Name=KCTD11-001;Parent=ENST00000333751
+ 17      protein_coding  five_prime_UTR  7255208 7256261 .       +       .       Parent=ENST00000333751
+ 17      protein_coding  CDS     7256262 7256960 .       +       0       Name=CDS:KCTD11;Parent=ENST00000333751,ENSP00000328352
+ 17      protein_coding  three_prime_UTR 7256961 7258258 .       +       .       Parent=ENST00000333751
+ 17      protein_coding  exon    7255208 7258258 .       +       .       Parent=ENST00000333751
+
+- Will be converted to GTF format::
+
+ 17      protein_coding  exon    7255208 7258258 .       +       .        gene_id "ENSG00000213859"; transcript_id "ENST00000333751"; exon_number "1"; gene_name "KCTD11"; transcript_name "KCTD11-001";
+ 17      protein_coding  CDS     7256262 7256957 .       +       0        gene_id "ENSG00000213859"; transcript_id "ENST00000333751"; exon_number "1"; gene_name "KCTD11"; transcript_name "KCTD11-001"; protein_id "ENSP00000328352";
+ 17      protein_coding  start_codon     7256262 7256264 .       +       0        gene_id "ENSG00000213859"; transcript_id "ENST00000333751"; exon_number "1"; gene_name "KCTD11"; transcript_name "KCTD11-001";
+ 17      protein_coding  stop_codon      7256958 7256960 .       +       0        gene_id "ENSG00000213859"; transcript_id "ENST00000333751"; exon_number "1"; gene_name "KCTD11"; transcript_name "KCTD11-001";
+
+--------
+
+**About formats**
+
+
+**GFF3 format** General Feature Format is a format for describing genes and other features associated with DNA, RNA and Protein sequences. GFF3 lines have nine tab-separated fields::
+
+    1. seqid - Must be a chromosome or scaffold.
+    2. source - The program that generated this feature.
+    3. type - The name of this type of feature. Some examples of standard feature types are "gene", "CDS", "protein", "mRNA", and "exon". 
+    4. start - The starting position of the feature in the sequence. The first base is numbered 1.
+    5. stop - The ending position of the feature (inclusive).
+    6. score - A score between 0 and 1000. If there is no score value, enter ".".
+    7. strand - Valid entries include '+', '-', or '.' (for don't know/care).
+    8. phase - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. If the feature is not a coding exon, the value should be '.'.
+    9. attributes - All lines with the same group are linked together into a single item.
+
+
+**GTF format** Gene Transfer Format, it borrows from GFF, but has additional structure that warrants a separate definition and format name. GTF lines have nine tab-seaparated fields::
+
+    1. seqname - The name of the sequence.
+    2. source - This indicating where the annotation came from.
+    3. feature - The name of the feature types. The following feature types are required: 'CDS', 'start_codon' and 'stop_codon'
+    4. start - The starting position of the feature in the sequence. The first base is numbered 1.
+    5. end - The ending position of the feature (inclusive).
+    6. score - The score field indicates a degree of confidence in the feature's existence and coordinates.
+    7. strand - Valid entries include '+', '-', or '.'
+    8. frame - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base.
+    9. attributes - These attributes are designed for handling multiple transcripts from the same genomic region.
+
+--------
+
+**Copyright**
+
+2009-2014 Max Planck Society, University of Tübingen &amp; Memorial Sloan Kettering Cancer Center
+
+Sreedharan VT, Schultheiss SJ, Jean G, Kahles A, Bohnert R, Drewe P, Mudrakarta P, Görnitz N, Zeller G, Rätsch G. Oqtans: the RNA-seq workbench in the cloud for complete and reproducible quantitative transcriptome analysis. Bioinformatics 10.1093/bioinformatics/btt731 (2014)
+
+ </help>
+</tool>
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/gtf_to_gff.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/gtf_to_gff.py Wed Jun 11 16:29:25 2014 -0400
[
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+"""
+Convert Gene Transfer Format [GTF] to Generic Feature Format Version 3 [GFF3].
+
+Usage: python gtf_to_gff.py in.gtf > out.gff3  
+    
+Requirement:
+    GFFParser.py: https://github.com/vipints/GFFtools-GX/blob/master/GFFParser.py    
+    helper.py : https://github.com/vipints/GFFtools-GX/blob/master/helper.py
+    
+Copyright (C) 
+    2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany.
+    2012-2014 Memorial Sloan Kettering Cancer Center New York City, USA.
+"""
+
+import re
+import sys
+import GFFParser
+import helper
+
+def GFFWriter(gtf_content):
+    """
+    write the feature information to GFF format
+
+    @args gtf_content: Parsed object from gtf file 
+    @type gtf_content: numpy array
+    """
+
+    print '##gff-version 3'
+
+    for ent1 in gtf_content:
+
+        chr_name = ent1['chr']
+        strand = ent1['strand']
+        start = ent1['start']
+        stop = ent1['stop']
+        source = ent1['source']
+        ID = ent1['name']
+        Name = ent1['gene_info']['Name']
+
+        print '%s\t%s\tgene\t%d\t%d\t.\t%s\t.\tID=%s;Name=%s' % (chr_name, source, start, stop, strand, ID, Name) 
+
+        for idx, tid in enumerate(ent1['transcripts']):
+
+            t_start = ent1['exons'][idx][0][0]
+            t_stop = ent1['exons'][idx][-1][-1]
+            t_type = ent1['transcript_type'][idx]
+
+            if ent1['exons'][idx].any() and ent1['cds_exons'][idx].any():
+                utr5_exons, utr3_exons = helper.buildUTR(ent1['cds_exons'][idx], ent1['exons'][idx], strand)
+
+            print '%s\t%s\t%s\t%d\t%d\t.\t%s\t.\tID=%s;Parent=%s' % (chr_name, source, t_type, t_start, t_stop, strand, tid[0], ID) 
+
+            for ex_cod in utr5_exons:
+                print '%s\t%s\tfive_prime_UTR\t%d\t%d\t.\t%s\t.\tParent=%s' % (chr_name, source, ex_cod[0], ex_cod[1], strand, tid[0]) 
+
+            for ex_cod in ent1['cds_exons'][idx]:
+                print '%s\t%s\tCDS\t%d\t%d\t.\t%s\t%d\tParent=%s' % (chr_name, source, ex_cod[0], ex_cod[1], strand, ex_cod[2], tid[0]) 
+
+            for ex_cod in utr3_exons:
+                print '%s\t%s\tthree_prime_UTR\t%d\t%d\t.\t%s\t.\tParent=%s' % (chr_name, source, ex_cod[0], ex_cod[1], strand, tid[0]) 
+
+            for ex_cod in ent1['exons'][idx]:
+                print '%s\t%s\texon\t%d\t%d\t.\t%s\t.\tParent=%s' % (chr_name, source, ex_cod[0], ex_cod[1], strand, tid[0]) 
+            
+
+def __main__():
+
+    try:
+        gtf_fname = sys.argv[1]
+    except:
+        print __doc__
+        sys.exit(-1)
+
+    gtf_file_content = GFFParser.Parse(gtf_fname)  
+
+    GFFWriter(gtf_file_content)
+
+if __name__ == "__main__": 
+    __main__()
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/gtf_to_gff.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/gtf_to_gff.xml Wed Jun 11 16:29:25 2014 -0400
b
@@ -0,0 +1,94 @@
+<tool id="fml_gtf2gff" name="GTF-to-GFF" version="2.0.0">
+ <description>converter</description> 
+ <command interpreter="python">gtf_to_gff.py $inf_gtf > $gff3_format 
+ </command> 
+ <inputs>
+   <param format="gtf" name="inf_gtf" type="data" label="Convert this query" help="Provide genome annotation file in GTF."/>
+        </inputs>
+   <outputs>
+   <data format="gff3" name="gff3_format" label="${tool.name} on ${on_string}: Converted" /> 
+   </outputs>
+     <tests>
+         <test>
+                <param name="inf_gtf" value="UCSC_transcripts.gtf" />
+                <output name="gff3_format" file="UCSC_transcripts.gff3" />
+         </test>
+         <test>
+                <param name="inf_gtf" value="JGI_genes.gtf" />
+                <output name="gff3_format" file="JGI_genes.gff3" />
+         </test>
+         <test>
+                <param name="inf_gtf" value="ENSEMBL_mm9.gtf" />
+                <output name="gff3_format" file="ENSEMBL_mm9.gff3" />
+         </test>
+         <test>
+                <param name="inf_gtf" value="AceView_ncbi_37.gtf" />
+                <output name="gff3_format" file="AceView_ncbi_37.gff3" />
+         </test>
+        </tests>
+   <help>
+
+**What it does**
+
+This tool converts data from GTF to a valid GFF3 file (scroll down for format description).
+
+--------
+
+**Example**
+
+- The following data in GTF format::
+
+ 17      protein_coding  exon    7255208 7258258 .       +       .        gene_id "ENSG00000213859"; transcript_id "ENST00000333751"; exon_number "1"; gene_name "KCTD11"; transcript_name "KCTD11-001";
+ 17      protein_coding  CDS     7256262 7256957 .       +       0        gene_id "ENSG00000213859"; transcript_id "ENST00000333751"; exon_number "1"; gene_name "KCTD11"; transcript_name "KCTD11-001"; protein_id "ENSP00000328352";
+ 17      protein_coding  start_codon     7256262 7256264 .       +       0        gene_id "ENSG00000213859"; transcript_id "ENST00000333751"; exon_number "1"; gene_name "KCTD11"; transcript_name "KCTD11-001";
+ 17      protein_coding  stop_codon      7256958 7256960 .       +       0        gene_id "ENSG00000213859"; transcript_id "ENST00000333751"; exon_number "1"; gene_name "KCTD11"; transcript_name "KCTD11-001";
+
+- Will be converted to GFF3 format::
+
+ ##gff-version 3
+ 17      protein_coding  gene    7255208 7258258 .       +       .       ID=ENSG00000213859;Name=KCTD11
+ 17      protein_coding  mRNA    7255208 7258258 .       +       .       ID=ENST00000333751;Name=KCTD11-001;Parent=ENSG00000213859
+ 17      protein_coding  protein 7256262 7256960 .       +       .       ID=ENSP00000328352;Name=KCTD11-001;Parent=ENST00000333751
+ 17      protein_coding  five_prime_UTR  7255208 7256261 .       +       .       Parent=ENST00000333751
+ 17      protein_coding  CDS     7256262 7256960 .       +       0       Name=CDS:KCTD11;Parent=ENST00000333751,ENSP00000328352
+ 17      protein_coding  three_prime_UTR 7256961 7258258 .       +       .       Parent=ENST00000333751
+ 17      protein_coding  exon    7255208 7258258 .       +       .       Parent=ENST00000333751
+
+--------
+
+**About formats**
+
+**GTF format** Gene Transfer Format, it borrows from GFF, but has additional structure that warrants a separate definition and format name. GTF lines have nine tab-seaparated fields::
+
+    1. seqname - The name of the sequence.
+    2. source - This indicating where the annotation came from.
+    3. feature - The name of the feature types. The following feature types are required: 'CDS', 'start_codon' and 'stop_codon'
+    4. start - The starting position of the feature in the sequence. The first base is numbered 1.
+    5. end - The ending position of the feature (inclusive).
+    6. score - The score field indicates a degree of confidence in the feature's existence and coordinates.
+    7. strand - Valid entries include '+', '-', or '.'
+    8. frame - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base.
+    9. attributes - These attributes are designed for handling multiple transcripts from the same genomic region.
+
+**GFF3 format** General Feature Format is a format for describing genes and other features associated with DNA, RNA and Protein sequences. GFF3 lines have nine tab-separated fields::
+
+    1. seqid - Must be a chromosome or scaffold.
+    2. source - The program that generated this feature.
+    3. type - The name of this type of feature. Some examples of standard feature types are "gene", "CDS", "protein", "mRNA", and "exon". 
+    4. start - The starting position of the feature in the sequence. The first base is numbered 1.
+    5. stop - The ending position of the feature (inclusive).
+    6. score - A score between 0 and 1000. If there is no score value, enter ".".
+    7. strand - Valid entries include '+', '-', or '.' (for don't know/care).
+    8. phase - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. If the feature is not a coding exon, the value should be '.'.
+    9. attributes - All lines with the same group are linked together into a single item.
+
+--------
+
+**Copyright**
+
+2009-2014 Max Planck Society, University of Tübingen &amp; Memorial Sloan Kettering Cancer Center
+
+Sreedharan VT, Schultheiss SJ, Jean G, Kahles A, Bohnert R, Drewe P, Mudrakarta P, Görnitz N, Zeller G, Rätsch G. Oqtans: the RNA-seq workbench in the cloud for complete and reproducible quantitative transcriptome analysis. Bioinformatics 10.1093/bioinformatics/btt731 (2014)
+
+ </help>
+</tool>
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/helper.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/helper.py Wed Jun 11 16:29:25 2014 -0400
[
b'@@ -0,0 +1,332 @@\n+#!/usr/bin/env python\n+"""\n+Common utility functions\n+"""\n+\n+import os \n+import re\n+import sys \n+import gzip \n+import bz2\n+import numpy \n+\n+def init_gene():\n+    """\n+    Initializing the gene structure \n+    """\n+\n+    gene_det = [(\'id\', \'f8\'), \n+            (\'anno_id\', numpy.dtype), \n+            (\'confgenes_id\', numpy.dtype),\n+            (\'name\', \'S25\'),\n+            (\'source\', \'S25\'),\n+            (\'gene_info\', numpy.dtype),\n+            (\'alias\', \'S15\'),\n+            (\'name2\', numpy.dtype),\n+            (\'strand\', \'S2\'), \n+            (\'score\', \'S15\'), \n+            (\'chr\', \'S15\'), \n+            (\'chr_num\', numpy.dtype),\n+            (\'paralogs\', numpy.dtype),\n+            (\'start\', \'f8\'),\n+            (\'stop\', \'f8\'), \n+            (\'transcripts\', numpy.dtype),\n+            (\'transcript_type\', numpy.dtype),\n+            (\'transcript_info\', numpy.dtype),\n+            (\'transcript_status\', numpy.dtype),\n+            (\'transcript_valid\', numpy.dtype),\n+            (\'exons\', numpy.dtype),\n+            (\'exons_confirmed\', numpy.dtype),\n+            (\'cds_exons\', numpy.dtype),\n+            (\'utr5_exons\', numpy.dtype),\n+            (\'utr3_exons\', numpy.dtype),\n+            (\'tis\', numpy.dtype),\n+            (\'tis_conf\', numpy.dtype),\n+            (\'tis_info\', numpy.dtype),\n+            (\'cdsStop\', numpy.dtype),\n+            (\'cdsStop_conf\', numpy.dtype),\n+            (\'cdsStop_info\', numpy.dtype),\n+            (\'tss\', numpy.dtype),\n+            (\'tss_info\', numpy.dtype),\n+            (\'tss_conf\', numpy.dtype),\n+            (\'cleave\', numpy.dtype),\n+            (\'cleave_info\', numpy.dtype),\n+            (\'cleave_conf\', numpy.dtype),\n+            (\'polya\', numpy.dtype),\n+            (\'polya_info\', numpy.dtype),\n+            (\'polya_conf\', numpy.dtype),\n+            (\'is_alt\', \'f8\'), \n+            (\'is_alt_spliced\', \'f8\'), \n+            (\'is_valid\',  numpy.dtype),\n+            (\'transcript_complete\', numpy.dtype),\n+            (\'is_complete\', numpy.dtype),\n+            (\'is_correctly_gff3_referenced\', \'S5\'),\n+            (\'splicegraph\', numpy.dtype) ]\n+\n+    return gene_det\n+\n+def open_file(fname):\n+    """\n+    Open the file (supports .gz .bz2) and returns the handler\n+\n+    @args fname: input file name for reading \n+    @type fname: str\n+    """\n+\n+    try:\n+        if os.path.splitext(fname)[1] == ".gz":\n+            FH = gzip.open(fname, \'rb\')\n+        elif os.path.splitext(fname)[1] == ".bz2":\n+            FH = bz2.BZ2File(fname, \'rb\')\n+        else:\n+            FH = open(fname, \'rU\')\n+    except Exception as error:\n+        sys.exit(error)\n+\n+    return FH\n+\n+def add_CDS_phase(strand, cds):\n+    """\n+    Calculate CDS phase and add to the CDS exons\n+\n+    @args strand: feature strand information \n+    @type strand: +/- \n+    @args cds: coding exon coordinates \n+    @type cds: numpy array [[int, int, int]]\n+    """\n+\n+    cds_region, cds_flag = [], 0 \n+    if strand == \'+\':\n+        for cdspos in cds:\n+            if cds_flag == 0:\n+                cdspos = (cdspos[0], cdspos[1], 0)\n+                diff = (cdspos[1]-(cdspos[0]-1))%3\n+            else:\n+                xy = 0\n+                if diff == 0: \n+                    cdspos = (cdspos[0], cdspos[1], 0)\n+                elif diff == 1: \n+                    cdspos = (cdspos[0], cdspos[1], 2)\n+                    xy = 2\n+                elif diff == 2: \n+                    cdspos = (cdspos[0], cdspos[1], 1)\n+                    xy = 1\n+                diff = ((cdspos[1]-(cdspos[0]-1))-xy)%3\n+            cds_region.append(cdspos)\n+            cds_flag = 1 \n+    elif strand == \'-\':\n+        cds.reverse()\n+        for cdspos in cds: \n+            if cds_flag == 0:\n+                cdspos = (cdspos[0], cdspos[1], 0)\n+                diff = (cdspos[1]-(cdspos[0]-1))%3\n+            else:  \n+                xy = 0 \n+                if diff == 0: \n+                    cdspos = (cdspos[0], cdspos[1], 0)\n+                elif diff == 1:\n+                  '..b"              exon_pos.append([cds_5start, utr3_end])\n+            for cds in cds_cod:\n+                exon_pos.append(cds)\n+            for utr3 in three_p_utr:\n+                exon_pos.append(utr3)\n+        else:    \n+            if jun_exon != []:\n+                five_p_utr = five_p_utr[:-1]\n+                cds_cod = cds_cod[1:]\n+            for utr5 in five_p_utr:\n+                exon_pos.append(utr5)\n+            exon_pos.append(jun_exon) if jun_exon != [] else ''\n+            jun_exon = []\n+            utr3_start, utr3_end = 0, 0\n+            if three_p_utr != []:\n+                utr3_start = three_p_utr[0][0]\n+                utr3_end = three_p_utr[0][1]\n+            cds_3start = cds_cod[-1][0]\n+            cds_3end = cds_cod[-1][1]\n+            if utr3_start-cds_3end == 0 or utr3_start-cds_3end == 1:       \n+                jun_exon = [cds_3start, utr3_end]\n+            if jun_exon != []:\n+                cds_cod = cds_cod[:-1]\n+                three_p_utr = three_p_utr[1:]\n+            for cds in cds_cod:\n+                exon_pos.append(cds)\n+            exon_pos.append(jun_exon) if jun_exon != [] else ''\n+            for utr3 in three_p_utr:\n+                exon_pos.append(utr3)\n+    elif strand_p == '-':\n+        utr3_start, utr3_end = 0, 0        \n+        if three_p_utr != []:\n+            utr3_start = three_p_utr[-1][0]\n+            utr3_end = three_p_utr[-1][1]\n+        cds_3start = cds_cod[0][0]\n+        cds_3end = cds_cod[0][1]\n+        jun_exon = []\n+        if cds_3start-utr3_end == 0 or cds_3start-utr3_end == 1:\n+            jun_exon = [utr3_start, cds_3end]  \n+        if len(cds_cod) == 1:    \n+            three_prime_flag = 0\n+            if jun_exon != []:\n+                three_p_utr = three_p_utr[:-1]\n+                three_prime_flag = 1\n+            for utr3 in three_p_utr:\n+                exon_pos.append(utr3)\n+            jun_exon = []\n+            (utr5_start, utr5_end) = (0, 0)\n+            if five_p_utr != []:\n+                utr5_start = five_p_utr[0][0]\n+                utr5_end = five_p_utr[0][1]\n+            if utr5_start-cds_3end == 0 or utr5_start-cds_3end == 1:\n+                jun_exon = [cds_3start, utr5_end]\n+            five_prime_flag = 0\n+            if jun_exon != []:\n+                cds_cod = cds_cod[:-1]\n+                five_p_utr = five_p_utr[1:]\n+                five_prime_flag = 1\n+            if three_prime_flag == 1 and five_prime_flag == 1:\n+                exon_pos.append([utr3_start, utr5_end])\n+            if three_prime_flag == 1 and five_prime_flag == 0:\n+                exon_pos.append([utr3_start, cds_3end])\n+                cds_cod = cds_cod[:-1]\n+            if three_prime_flag == 0 and five_prime_flag == 1:\n+                exon_pos.append([cds_3start, utr5_end])        \n+            for cds in cds_cod:\n+                exon_pos.append(cds)\n+            for utr5 in five_p_utr:\n+                exon_pos.append(utr5)\n+        else:\n+            if jun_exon != []:\n+                three_p_utr = three_p_utr[:-1]\n+                cds_cod = cds_cod[1:]\n+            for utr3 in three_p_utr:\n+                exon_pos.append(utr3)   \n+            if jun_exon != []:\n+                exon_pos.append(jun_exon)\n+            jun_exon = []\n+            (utr5_start, utr5_end) = (0, 0)\n+            if five_p_utr != []:\n+                utr5_start = five_p_utr[0][0]\n+                utr5_end = five_p_utr[0][1]    \n+            cds_5start = cds_cod[-1][0]\n+            cds_5end = cds_cod[-1][1]\n+            if utr5_start-cds_5end == 0 or utr5_start-cds_5end == 1:\n+                jun_exon = [cds_5start, utr5_end]\n+            if jun_exon != []:\n+                cds_cod = cds_cod[:-1]\n+                five_p_utr = five_p_utr[1:]\n+            for cds in cds_cod:\n+                exon_pos.append(cds)\n+            if jun_exon != []:\n+                exon_pos.append(jun_exon)    \n+            for utr5 in five_p_utr:\n+                exon_pos.append(utr5)\n+    return exon_pos\n"
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/AceView_gff3_to_gtf.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/test-data/AceView_gff3_to_gtf.gtf Wed Jun 11 16:29:25 2014 -0400
b
b'@@ -0,0 +1,2544 @@\n+##gff-version 2.5\n+1\tprotein_coding\tCDS\t12704566\t12704733\t.\t+\t0\tgene_id "AADACL4"; transcript_id "AADACL4.aAug10"; exon_number "1"; protein_id "AADACL4.aAug10";\n+1\tprotein_coding\tCDS\t12711142\t12711358\t.\t+\t0\tgene_id "AADACL4"; transcript_id "AADACL4.aAug10"; exon_number "2"; protein_id "AADACL4.aAug10";\n+1\tprotein_coding\tCDS\t12721802\t12721865\t.\t+\t0\tgene_id "AADACL4"; transcript_id "AADACL4.aAug10"; exon_number "3"; protein_id "AADACL4.aAug10";\n+1\tprotein_coding\tCDS\t12725972\t12726746\t.\t+\t0\tgene_id "AADACL4"; transcript_id "AADACL4.aAug10"; exon_number "4"; protein_id "AADACL4.aAug10";\n+1\tprotein_coding\texon\t12704566\t12704733\t.\t+\t0\tgene_id "AADACL4"; transcript_id "AADACL4.aAug10"; exon_number "1";\n+1\tprotein_coding\texon\t12711142\t12711358\t.\t+\t0\tgene_id "AADACL4"; transcript_id "AADACL4.aAug10"; exon_number "2";\n+1\tprotein_coding\texon\t12721802\t12721865\t.\t+\t0\tgene_id "AADACL4"; transcript_id "AADACL4.aAug10"; exon_number "3";\n+1\tprotein_coding\texon\t12725972\t12727097\t.\t+\t0\tgene_id "AADACL4"; transcript_id "AADACL4.aAug10"; exon_number "4";\n+1\tprotein_coding\tCDS\t12776344\t12776347\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.bAug10"; exon_number "1"; protein_id "AADACL3.bAug10";\n+1\tprotein_coding\tCDS\t12780885\t12780948\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.bAug10"; exon_number "2"; protein_id "AADACL3.bAug10";\n+1\tprotein_coding\tCDS\t12785189\t12785963\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.bAug10"; exon_number "3"; protein_id "AADACL3.bAug10";\n+1\tprotein_coding\texon\t12776119\t12776347\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.bAug10"; exon_number "1";\n+1\tprotein_coding\texon\t12780885\t12780948\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.bAug10"; exon_number "2";\n+1\tprotein_coding\texon\t12785189\t12788726\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.bAug10"; exon_number "3";\n+1\tprotein_coding\tCDS\t12779480\t12779693\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.aAug10"; exon_number "1"; protein_id "AADACL3.aAug10";\n+1\tprotein_coding\tCDS\t12780885\t12780948\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.aAug10"; exon_number "2"; protein_id "AADACL3.aAug10";\n+1\tprotein_coding\tCDS\t12785189\t12785963\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.aAug10"; exon_number "3"; protein_id "AADACL3.aAug10";\n+1\tprotein_coding\texon\t12776119\t12776347\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.aAug10"; exon_number "1";\n+1\tprotein_coding\texon\t12779477\t12779693\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.aAug10"; exon_number "2";\n+1\tprotein_coding\texon\t12780885\t12780948\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.aAug10"; exon_number "3";\n+1\tprotein_coding\texon\t12785189\t12788726\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.aAug10"; exon_number "4";\n+10\tprotein_coding\tCDS\t52566489\t52566640\t.\t-\t0\tgene_id "A1CF"; transcript_id "A1CF.fAug10"; exon_number "1"; protein_id "A1CF.fAug10";\n+10\tprotein_coding\tCDS\t52569654\t52569802\t.\t-\t0\tgene_id "A1CF"; transcript_id "A1CF.fAug10"; exon_number "2"; protein_id "A1CF.fAug10";\n+10\tprotein_coding\tCDS\t52570800\t52570936\t.\t-\t0\tgene_id "A1CF"; transcript_id "A1CF.fAug10"; exon_number "3"; protein_id "A1CF.fAug10";\n+10\tprotein_coding\tCDS\t52573617\t52573798\t.\t-\t0\tgene_id "A1CF"; transcript_id "A1CF.fAug10"; exon_number "4"; protein_id "A1CF.fAug10";\n+10\tprotein_coding\tCDS\t52575766\t52576039\t.\t-\t0\tgene_id "A1CF"; transcript_id "A1CF.fAug10"; exon_number "5"; protein_id "A1CF.fAug10";\n+10\tprotein_coding\tCDS\t52580312\t52580409\t.\t-\t0\tgene_id "A1CF"; transcript_id "A1CF.fAug10"; exon_number "6"; protein_id "A1CF.fAug10";\n+10\tprotein_coding\tCDS\t52587891\t52588055\t.\t-\t0\tgene_id "A1CF"; transcript_id "A1CF.fAug10"; exon_number "7"; protein_id "A1CF.fAug10";\n+10\tprotein_coding\tCDS\t52595834\t52596072\t.\t-\t0\tgene_id "A1CF"; transcript_id "A1CF.fAug10"; exon_number "8"; protein_id "A1CF.fAug10";\n+10\tprotein_coding\tCDS\t52601622\t52601752\t.\t-\t0\tgene_id "A1CF"; transcript_id "A1CF.fAug10"; exon_number "9"; protein_id "A1CF.fAug10";\n+10\tprotein_coding\tCDS\t52603748\t52603882\t.\t-\t0\tgene'..b'10"; exon_number "1"; protein_id "AAA1.aAug10";\n+7\tprotein_coding\tCDS\t34768349\t34768428\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.aAug10"; exon_number "2"; protein_id "AAA1.aAug10";\n+7\tprotein_coding\tCDS\t34797686\t34797710\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.aAug10"; exon_number "3"; protein_id "AAA1.aAug10";\n+7\tprotein_coding\texon\t34607864\t34607984\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.aAug10"; exon_number "1";\n+7\tprotein_coding\texon\t34609324\t34609473\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.aAug10"; exon_number "2";\n+7\tprotein_coding\texon\t34768349\t34768428\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.aAug10"; exon_number "3";\n+7\tprotein_coding\texon\t34797686\t34797884\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.aAug10"; exon_number "4";\n+7\tprotein_coding\tCDS\t34682958\t34682963\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.dAug10"; exon_number "1"; protein_id "AAA1.dAug10";\n+7\tprotein_coding\tCDS\t34768349\t34768428\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.dAug10"; exon_number "2"; protein_id "AAA1.dAug10";\n+7\tprotein_coding\tCDS\t34800724\t34800802\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.dAug10"; exon_number "3"; protein_id "AAA1.dAug10";\n+7\tprotein_coding\texon\t34682839\t34682963\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.dAug10"; exon_number "1";\n+7\tprotein_coding\texon\t34768349\t34768428\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.dAug10"; exon_number "2";\n+7\tprotein_coding\texon\t34800724\t34800803\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.dAug10"; exon_number "3";\n+7\ttranscript\texon\t34758479\t34759420\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.gAug10"; exon_number "1";\n+7\ttranscript\texon\t34760254\t34760397\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.gAug10"; exon_number "2";\n+7\ttranscript\texon\t34762896\t34763007\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.gAug10"; exon_number "3";\n+7\ttranscript\texon\t34768349\t34768428\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.gAug10"; exon_number "4";\n+7\ttranscript\texon\t34800724\t34800803\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.gAug10"; exon_number "5";\n+7\ttranscript\texon\t34873773\t34873948\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.gAug10"; exon_number "6";\n+7\ttranscript\texon\t34758474\t34759420\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.hAug10"; exon_number "1";\n+7\ttranscript\texon\t34762896\t34763007\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.hAug10"; exon_number "2";\n+7\ttranscript\texon\t34768349\t34768428\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.hAug10"; exon_number "3";\n+7\ttranscript\texon\t34807954\t34808052\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.hAug10"; exon_number "4";\n+7\ttranscript\texon\t34873773\t34873943\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.hAug10"; exon_number "5";\n+7\ttranscript\texon\t34390034\t34390459\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.iAug10"; exon_number "1";\n+7\ttranscript\texon\t34457191\t34457284\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.iAug10"; exon_number "2";\n+7\ttranscript\texon\t34609324\t34609473\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.iAug10"; exon_number "3";\n+7\ttranscript\texon\t34768349\t34768428\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.iAug10"; exon_number "4";\n+7\ttranscript\texon\t34800724\t34800803\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.iAug10"; exon_number "5";\n+7\tprotein_coding\tCDS\t34457198\t34457284\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.bAug10"; exon_number "1"; protein_id "AAA1.bAug10";\n+7\tprotein_coding\tCDS\t34768349\t34768428\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.bAug10"; exon_number "2"; protein_id "AAA1.bAug10";\n+7\tprotein_coding\tCDS\t34797686\t34797710\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.bAug10"; exon_number "3"; protein_id "AAA1.bAug10";\n+7\tprotein_coding\texon\t34386126\t34390459\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.bAug10"; exon_number "1";\n+7\tprotein_coding\texon\t34457191\t34457284\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.bAug10"; exon_number "2";\n+7\tprotein_coding\texon\t34768349\t34768428\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.bAug10"; exon_number "3";\n+7\tprotein_coding\texon\t34797686\t34797884\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.bAug10"; exon_number "4";\n'
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/AceView_ncbi_37.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/test-data/AceView_ncbi_37.gff3 Wed Jun 11 16:29:25 2014 -0400
b
b'@@ -0,0 +1,3164 @@\n+##gff-version 3\n+1\tAceView\tgene\t12704566\t12727097\t.\t+\t.\tID=AADACL4;Name=AADACL4\n+1\tAceView\tmRNA\t12704566\t12727097\t.\t+\t.\tID=AADACL4.aAug10;Parent=AADACL4\n+1\tAceView\tCDS\t12704566\t12704733\t.\t+\t0\tParent=AADACL4.aAug10\n+1\tAceView\tCDS\t12711142\t12711358\t.\t+\t0\tParent=AADACL4.aAug10\n+1\tAceView\tCDS\t12721802\t12721865\t.\t+\t2\tParent=AADACL4.aAug10\n+1\tAceView\tCDS\t12725972\t12726746\t.\t+\t1\tParent=AADACL4.aAug10\n+1\tAceView\tthree_prime_UTR\t12726747\t12727097\t.\t+\t.\tParent=AADACL4.aAug10\n+1\tAceView\texon\t12704566\t12704733\t.\t+\t.\tParent=AADACL4.aAug10\n+1\tAceView\texon\t12711142\t12711358\t.\t+\t.\tParent=AADACL4.aAug10\n+1\tAceView\texon\t12721802\t12721865\t.\t+\t.\tParent=AADACL4.aAug10\n+1\tAceView\texon\t12725972\t12727097\t.\t+\t.\tParent=AADACL4.aAug10\n+1\tAceView\tgene\t12776119\t12788726\t.\t+\t.\tID=AADACL3;Name=AADACL3\n+1\tAceView\tmRNA\t12776119\t12788726\t.\t+\t.\tID=AADACL3.bAug10;Parent=AADACL3\n+1\tAceView\tfive_prime_UTR\t12776119\t12776343\t.\t+\t.\tParent=AADACL3.bAug10\n+1\tAceView\tCDS\t12776344\t12776347\t.\t+\t0\tParent=AADACL3.bAug10\n+1\tAceView\tCDS\t12780885\t12780948\t.\t+\t2\tParent=AADACL3.bAug10\n+1\tAceView\tCDS\t12785189\t12785963\t.\t+\t1\tParent=AADACL3.bAug10\n+1\tAceView\tthree_prime_UTR\t12785964\t12788726\t.\t+\t.\tParent=AADACL3.bAug10\n+1\tAceView\texon\t12776119\t12776347\t.\t+\t.\tParent=AADACL3.bAug10\n+1\tAceView\texon\t12780885\t12780948\t.\t+\t.\tParent=AADACL3.bAug10\n+1\tAceView\texon\t12785189\t12788726\t.\t+\t.\tParent=AADACL3.bAug10\n+1\tAceView\tmRNA\t12776119\t12788726\t.\t+\t.\tID=AADACL3.aAug10;Parent=AADACL3\n+1\tAceView\tfive_prime_UTR\t12776119\t12776347\t.\t+\t.\tParent=AADACL3.aAug10\n+1\tAceView\tfive_prime_UTR\t12779477\t12779479\t.\t+\t.\tParent=AADACL3.aAug10\n+1\tAceView\tCDS\t12779480\t12779693\t.\t+\t0\tParent=AADACL3.aAug10\n+1\tAceView\tCDS\t12780885\t12780948\t.\t+\t2\tParent=AADACL3.aAug10\n+1\tAceView\tCDS\t12785189\t12785963\t.\t+\t1\tParent=AADACL3.aAug10\n+1\tAceView\tthree_prime_UTR\t12785964\t12788726\t.\t+\t.\tParent=AADACL3.aAug10\n+1\tAceView\texon\t12776119\t12776347\t.\t+\t.\tParent=AADACL3.aAug10\n+1\tAceView\texon\t12779477\t12779693\t.\t+\t.\tParent=AADACL3.aAug10\n+1\tAceView\texon\t12780885\t12780948\t.\t+\t.\tParent=AADACL3.aAug10\n+1\tAceView\texon\t12785189\t12788726\t.\t+\t.\tParent=AADACL3.aAug10\n+10\tAceView\tgene\t52566307\t52588060\t.\t-\t.\tID=A1CF;Name=A1CF\n+10\tAceView\tmRNA\t52566307\t52645387\t.\t-\t.\tID=A1CF.fAug10;Parent=A1CF\n+10\tAceView\tfive_prime_UTR\t52619701\t52619745\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\tfive_prime_UTR\t52622649\t52622741\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\tfive_prime_UTR\t52623793\t52623840\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\tfive_prime_UTR\t52645341\t52645387\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\tCDS\t52566489\t52566640\t.\t-\t2\tParent=A1CF.fAug10\n+10\tAceView\tCDS\t52569654\t52569802\t.\t-\t1\tParent=A1CF.fAug10\n+10\tAceView\tCDS\t52570800\t52570936\t.\t-\t0\tParent=A1CF.fAug10\n+10\tAceView\tCDS\t52573617\t52573798\t.\t-\t2\tParent=A1CF.fAug10\n+10\tAceView\tCDS\t52575766\t52576039\t.\t-\t0\tParent=A1CF.fAug10\n+10\tAceView\tCDS\t52580312\t52580409\t.\t-\t2\tParent=A1CF.fAug10\n+10\tAceView\tCDS\t52587891\t52588055\t.\t-\t2\tParent=A1CF.fAug10\n+10\tAceView\tCDS\t52595834\t52596072\t.\t-\t1\tParent=A1CF.fAug10\n+10\tAceView\tCDS\t52601622\t52601752\t.\t-\t0\tParent=A1CF.fAug10\n+10\tAceView\tCDS\t52603748\t52603882\t.\t-\t0\tParent=A1CF.fAug10\n+10\tAceView\tCDS\t52619602\t52619700\t.\t-\t0\tParent=A1CF.fAug10\n+10\tAceView\tthree_prime_UTR\t52566307\t52566488\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52566307\t52566640\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52569654\t52569802\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52570800\t52570936\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52573617\t52573798\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52575766\t52576039\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52580312\t52580409\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52587891\t52588055\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52595834\t52596072\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52601622\t52601752\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52603748\t52603882\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52619602\t52619745\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52622649\t52622741\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52623793\t52623840\t.\t-\t.\tP'..b'743462\t34800803\t.\t-\t.\tID=AAA1.cAug10;Parent=AAA1\n+7\tAceView\tfive_prime_UTR\t34800803\t34800803\t.\t-\t.\tParent=AAA1.cAug10\n+7\tAceView\tCDS\t34743797\t34743811\t.\t-\t0\tParent=AAA1.cAug10\n+7\tAceView\tCDS\t34768349\t34768428\t.\t-\t2\tParent=AAA1.cAug10\n+7\tAceView\tCDS\t34800724\t34800802\t.\t-\t0\tParent=AAA1.cAug10\n+7\tAceView\tthree_prime_UTR\t34743462\t34743796\t.\t-\t.\tParent=AAA1.cAug10\n+7\tAceView\texon\t34743462\t34743811\t.\t-\t.\tParent=AAA1.cAug10\n+7\tAceView\texon\t34768349\t34768428\t.\t-\t.\tParent=AAA1.cAug10\n+7\tAceView\texon\t34800724\t34800803\t.\t-\t.\tParent=AAA1.cAug10\n+7\tAceView\ttranscript\t34386126\t34797884\t.\t-\t.\tID=AAA1.eAug10;Parent=AAA1\n+7\tAceView\texon\t34386126\t34390459\t.\t-\t.\tParent=AAA1.eAug10\n+7\tAceView\texon\t34457191\t34457284\t.\t-\t.\tParent=AAA1.eAug10\n+7\tAceView\texon\t34609324\t34609473\t.\t-\t.\tParent=AAA1.eAug10\n+7\tAceView\texon\t34768349\t34768428\t.\t-\t.\tParent=AAA1.eAug10\n+7\tAceView\texon\t34797686\t34797884\t.\t-\t.\tParent=AAA1.eAug10\n+7\tAceView\tmRNA\t34607864\t34797884\t.\t-\t.\tID=AAA1.aAug10;Parent=AAA1\n+7\tAceView\tfive_prime_UTR\t34797711\t34797884\t.\t-\t.\tParent=AAA1.aAug10\n+7\tAceView\tCDS\t34609384\t34609473\t.\t-\t0\tParent=AAA1.aAug10\n+7\tAceView\tCDS\t34768349\t34768428\t.\t-\t2\tParent=AAA1.aAug10\n+7\tAceView\tCDS\t34797686\t34797710\t.\t-\t0\tParent=AAA1.aAug10\n+7\tAceView\tthree_prime_UTR\t34607864\t34607984\t.\t-\t.\tParent=AAA1.aAug10\n+7\tAceView\tthree_prime_UTR\t34609324\t34609383\t.\t-\t.\tParent=AAA1.aAug10\n+7\tAceView\texon\t34607864\t34607984\t.\t-\t.\tParent=AAA1.aAug10\n+7\tAceView\texon\t34609324\t34609473\t.\t-\t.\tParent=AAA1.aAug10\n+7\tAceView\texon\t34768349\t34768428\t.\t-\t.\tParent=AAA1.aAug10\n+7\tAceView\texon\t34797686\t34797884\t.\t-\t.\tParent=AAA1.aAug10\n+7\tAceView\tmRNA\t34682839\t34800803\t.\t-\t.\tID=AAA1.dAug10;Parent=AAA1\n+7\tAceView\tfive_prime_UTR\t34800803\t34800803\t.\t-\t.\tParent=AAA1.dAug10\n+7\tAceView\tCDS\t34682958\t34682963\t.\t-\t0\tParent=AAA1.dAug10\n+7\tAceView\tCDS\t34768349\t34768428\t.\t-\t2\tParent=AAA1.dAug10\n+7\tAceView\tCDS\t34800724\t34800802\t.\t-\t0\tParent=AAA1.dAug10\n+7\tAceView\tthree_prime_UTR\t34682839\t34682957\t.\t-\t.\tParent=AAA1.dAug10\n+7\tAceView\texon\t34682839\t34682963\t.\t-\t.\tParent=AAA1.dAug10\n+7\tAceView\texon\t34768349\t34768428\t.\t-\t.\tParent=AAA1.dAug10\n+7\tAceView\texon\t34800724\t34800803\t.\t-\t.\tParent=AAA1.dAug10\n+7\tAceView\ttranscript\t34758479\t34873948\t.\t-\t.\tID=AAA1.gAug10;Parent=AAA1\n+7\tAceView\texon\t34758479\t34759420\t.\t-\t.\tParent=AAA1.gAug10\n+7\tAceView\texon\t34760254\t34760397\t.\t-\t.\tParent=AAA1.gAug10\n+7\tAceView\texon\t34762896\t34763007\t.\t-\t.\tParent=AAA1.gAug10\n+7\tAceView\texon\t34768349\t34768428\t.\t-\t.\tParent=AAA1.gAug10\n+7\tAceView\texon\t34800724\t34800803\t.\t-\t.\tParent=AAA1.gAug10\n+7\tAceView\texon\t34873773\t34873948\t.\t-\t.\tParent=AAA1.gAug10\n+7\tAceView\ttranscript\t34758474\t34873943\t.\t-\t.\tID=AAA1.hAug10;Parent=AAA1\n+7\tAceView\texon\t34758474\t34759420\t.\t-\t.\tParent=AAA1.hAug10\n+7\tAceView\texon\t34762896\t34763007\t.\t-\t.\tParent=AAA1.hAug10\n+7\tAceView\texon\t34768349\t34768428\t.\t-\t.\tParent=AAA1.hAug10\n+7\tAceView\texon\t34807954\t34808052\t.\t-\t.\tParent=AAA1.hAug10\n+7\tAceView\texon\t34873773\t34873943\t.\t-\t.\tParent=AAA1.hAug10\n+7\tAceView\ttranscript\t34390034\t34800803\t.\t-\t.\tID=AAA1.iAug10;Parent=AAA1\n+7\tAceView\texon\t34390034\t34390459\t.\t-\t.\tParent=AAA1.iAug10\n+7\tAceView\texon\t34457191\t34457284\t.\t-\t.\tParent=AAA1.iAug10\n+7\tAceView\texon\t34609324\t34609473\t.\t-\t.\tParent=AAA1.iAug10\n+7\tAceView\texon\t34768349\t34768428\t.\t-\t.\tParent=AAA1.iAug10\n+7\tAceView\texon\t34800724\t34800803\t.\t-\t.\tParent=AAA1.iAug10\n+7\tAceView\tmRNA\t34386126\t34797884\t.\t-\t.\tID=AAA1.bAug10;Parent=AAA1\n+7\tAceView\tfive_prime_UTR\t34797711\t34797884\t.\t-\t.\tParent=AAA1.bAug10\n+7\tAceView\tCDS\t34457198\t34457284\t.\t-\t0\tParent=AAA1.bAug10\n+7\tAceView\tCDS\t34768349\t34768428\t.\t-\t2\tParent=AAA1.bAug10\n+7\tAceView\tCDS\t34797686\t34797710\t.\t-\t0\tParent=AAA1.bAug10\n+7\tAceView\tthree_prime_UTR\t34386126\t34390459\t.\t-\t.\tParent=AAA1.bAug10\n+7\tAceView\tthree_prime_UTR\t34457191\t34457197\t.\t-\t.\tParent=AAA1.bAug10\n+7\tAceView\texon\t34386126\t34390459\t.\t-\t.\tParent=AAA1.bAug10\n+7\tAceView\texon\t34457191\t34457284\t.\t-\t.\tParent=AAA1.bAug10\n+7\tAceView\texon\t34768349\t34768428\t.\t-\t.\tParent=AAA1.bAug10\n+7\tAceView\texon\t34797686\t34797884\t.\t-\t.\tParent=AAA1.bAug10\n'
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/AceView_ncbi_37.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/test-data/AceView_ncbi_37.gtf Wed Jun 11 16:29:25 2014 -0400
b
b'@@ -0,0 +1,3989 @@\n+11\tAceView\texon\t111933358\t111934981\t.\t-\t0\tgene_id 2-oxoacid_dh; Gene_type cDNA_supported; transcript_id 2-oxoacid_dh.aAug10-unspliced; exon_number 1\n+19\tAceView\tCDS\t58859154\t58859210\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.aAug10; product_id A1BGAS.aAug10; exon_number 1\n+19\tAceView\texon\t58859153\t58859210\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.aAug10; exon_number 1\n+19\tAceView\tintron\t58859211\t58864686\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.aAug10; type gt_ag\n+19\tAceView\tCDS\t58864687\t58864840\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.aAug10; product_id A1BGAS.aAug10; exon_number 2\n+19\tAceView\texon\t58864687\t58864840\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.aAug10; exon_number 2\n+19\tAceView\tintron\t58864841\t58865079\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.aAug10; type gt_ag\n+19\tAceView\tCDS\t58865080\t58865114\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.aAug10; product_id A1BGAS.aAug10; exon_number 3\n+19\tAceView\texon\t58865080\t58865223\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.aAug10; exon_number 3\n+19\tAceView\tstop_codon\t58865115\t58865117\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.aAug10; product_id A1BGAS.aAug10;\n+19\tAceView\tintron\t58865224\t58865734\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.aAug10; type gt_ag\n+19\tAceView\texon\t58865735\t58866090\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.aAug10; exon_number 4\n+19\tAceView\tstart_codon\t58864404\t58864406\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.bAug10; product_id A1BGAS.bAug10;\n+19\tAceView\tCDS\t58864404\t58864410\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.bAug10; product_id A1BGAS.bAug10; exon_number 1\n+19\tAceView\texon\t58862110\t58864410\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.bAug10; exon_number 1\n+19\tAceView\tintron\t58864411\t58864744\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.bAug10; type gt_ag\n+19\tAceView\tCDS\t58864745\t58864840\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.bAug10; product_id A1BGAS.bAug10; exon_number 2\n+19\tAceView\texon\t58864745\t58864840\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.bAug10; exon_number 2\n+19\tAceView\tintron\t58864841\t58865079\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.bAug10; type gt_ag\n+19\tAceView\tCDS\t58865080\t58865114\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.bAug10; product_id A1BGAS.bAug10; exon_number 3\n+19\tAceView\texon\t58865080\t58865223\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.bAug10; exon_number 3\n+19\tAceView\tstop_codon\t58865115\t58865117\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.bAug10; product_id A1BGAS.bAug10;\n+19\tAceView\tintron\t58865224\t58865734\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.bAug10; type gt_ag\n+19\tAceView\texon\t58865735\t58866548\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.bAug10; exon_number 4\n+19\tAceView\texon\t58859122\t58859210\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.cAug10; exon_number 1\n+19\tAceView\tintron\t58859211\t58864686\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.cAug10; type gt_ag\n+19\tAceView\texon\t58864687\t58864840\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.cAug10; exon_number 2\n+19\tAceView\tintron\t58864841\t58865079\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.cAug10; type gt_ag\n+19\tAceView\tstart_codon\t58865831\t58865833\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.cAug10; product_id A1BGAS.cAug10;\n+19\tAceView\tCDS\t58865831\t58866547\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; tran'..b'codon\t219129739\t219129741\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.gAug10; product_id AAMP.gAug10;\n+2\tAceView\tintron\t219129332\t219129738\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.gAug10; type gt_ag\n+2\tAceView\texon\t219128853\t219129331\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.gAug10; exon_number 11\n+2\tAceView\tstart_codon\t219134807\t219134809\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; product_id AAMP.hAug10;\n+2\tAceView\tCDS\t219134689\t219134809\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; product_id AAMP.hAug10; exon_number 1\n+2\tAceView\texon\t219134689\t219134843\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; exon_number 1\n+2\tAceView\tintron\t219134258\t219134688\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; type gt_ag\n+2\tAceView\tCDS\t219134105\t219134257\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; product_id AAMP.hAug10; exon_number 2\n+2\tAceView\texon\t219134105\t219134257\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; exon_number 2\n+2\tAceView\tintron\t219132337\t219134104\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; type gt_ag\n+2\tAceView\tCDS\t219132217\t219132336\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; product_id AAMP.hAug10; exon_number 3\n+2\tAceView\texon\t219132217\t219132336\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; exon_number 3\n+2\tAceView\tintron\t219131710\t219132216\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; type gt_ag\n+2\tAceView\tCDS\t219131570\t219131709\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; product_id AAMP.hAug10; exon_number 4\n+2\tAceView\texon\t219131570\t219131709\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; exon_number 4\n+2\tAceView\tintron\t219131311\t219131569\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; type gt_ag\n+2\tAceView\tCDS\t219131166\t219131310\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; product_id AAMP.hAug10; exon_number 5\n+2\tAceView\texon\t219131166\t219131310\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; exon_number 5\n+2\tAceView\tintron\t219130871\t219131165\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; type gt_ag\n+2\tAceView\tCDS\t219130787\t219130870\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; product_id AAMP.hAug10; exon_number 6\n+2\tAceView\texon\t219130787\t219130870\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; exon_number 6\n+2\tAceView\tintron\t219130670\t219130786\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; type gt_ag\n+2\tAceView\tCDS\t219130392\t219130669\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; product_id AAMP.hAug10; exon_number 7\n+2\tAceView\texon\t219130302\t219130669\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; exon_number 7\n+2\tAceView\tstop_codon\t219130389\t219130391\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; product_id AAMP.hAug10;\n+2\tAceView\tintron\t219130185\t219130301\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; type gt_ag\n+2\tAceView\texon\t219130094\t219130184\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; exon_number 8\n+2\tAceView\tintron\t219129898\t219130093\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; type gt_ag\n+2\tAceView\texon\t219129743\t219129897\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; exon_number 9\n+2\tAceView\tintron\t219129332\t219129742\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; type gt_ag\n+2\tAceView\texon\t219128853\t219129331\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; exon_number 10\n'
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/Aly_JGI.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/test-data/Aly_JGI.bed Wed Jun 11 16:29:25 2014 -0400
b
b'@@ -0,0 +1,252 @@\n+scaffold_1\t10652\t11944\tTranscript:909750\t.\t-\t10652\t11944\t0\t5\t60,225,159,360,132,\t0,130,392,608,1161,\n+scaffold_1\t464358\t467635\tTranscript:470161\t.\t+\t464358\t467635\t0\t18\t202,63,72,108,81,54,126,90,126,108,117,57,81,5,174,10,20,31,\t0,599,744,929,1130,1329,1606,1827,2037,2253,2453,2649,2797,2915,2978,3174,3209,3247,\n+scaffold_1\t548765\t550572\tTranscript:918872\t.\t-\t548765\t550572\t0\t4\t170,274,174,28,\t0,593,965,1780,\n+scaffold_1\t80941\t82330\tTranscript:470071\t.\t+\t80941\t82330\t0\t2\t277,1014,\t0,376,\n+scaffold_1\t483425\t484126\tTranscript:311324\t.\t+\t483425\t484126\t0\t3\t72,216,54,\t0,343,648,\n+scaffold_1\t124594\t126229\tTranscript:470077\t.\t-\t124594\t126229\t0\t2\t792,700,\t0,936,\n+scaffold_1\t122468\t124310\tTranscript:470076\t.\t+\t122468\t124310\t0\t4\t1626,84,4,79,\t0,1653,1749,1764,\n+scaffold_1\t118008\t119066\tTranscript:470075\t.\t+\t118008\t119066\t0\t6\t49,732,57,44,56,29,\t0,66,819,899,960,1030,\n+scaffold_1\t90714\t113497\tTranscript:470074\t.\t-\t90714\t113497\t0\t22\t358,90,91,197,78,117,96,126,97,71,99,84,171,129,33,97,51,79,82,56,90,75,\t0,18818,19001,19212,19496,19665,19860,20042,20257,20536,20684,20856,21022,21382,21651,21776,21947,22094,22283,22441,22603,22709,\n+scaffold_1\t621551\t622441\tTranscript:909905\t.\t-\t621551\t622441\t0\t5\t85,140,120,77,151,\t0,175,386,578,740,\n+scaffold_1\t134679\t139817\tTranscript:470079\t.\t-\t134679\t139817\t0\t20\t19,354,129,144,126,126,93,133,132,135,108,204,89,150,154,222,186,123,101,172,\t0,42,474,701,922,1186,1410,1591,2068,2300,2516,2754,3045,3252,3555,3824,4128,4399,4621,4967,\n+scaffold_1\t127652\t134697\tTranscript:470078\t.\t+\t127652\t134697\t0\t11\t1166,318,355,423,293,274,500,322,358,650,825,\t0,1371,1874,2318,2863,3443,4043,4627,5028,5477,6221,\n+scaffold_1\t798586\t799140\tTranscript:470261\t.\t-\t798586\t799140\t0\t1\t555,\t0,\n+scaffold_1\t684011\t686672\tTranscript:909919\t.\t-\t684011\t686672\t0\t7\t81,96,78,139,68,72,129,\t0,250,454,1428,1680,2360,2533,\n+scaffold_1\t156948\t159348\tTranscript:311256\t.\t+\t156948\t159348\t0\t8\t195,177,171,182,212,120,479,237,\t0,384,637,888,1149,1393,1594,2164,\n+scaffold_1\t560362\t562206\tTranscript:470187\t.\t-\t560362\t562206\t0\t5\t42,10,290,190,1058,\t0,68,97,508,787,\n+scaffold_1\t860953\t874764\tTranscript:311407\t.\t-\t860953\t874764\t0\t19\t1236,147,307,309,3006,1651,1115,103,700,1166,115,66,192,174,57,136,86,73,182,\t0,1347,1588,2052,2707,5983,7729,8952,9162,10011,11333,11619,11827,12166,12484,12621,12933,13180,13630,\n+scaffold_1\t771817\t774951\tTranscript:909946\t.\t+\t771817\t774951\t0\t11\t108,198,76,72,88,256,116,221,19,72,256,\t0,231,535,682,855,1025,1350,1548,2129,2732,2879,\n+scaffold_1\t479138\t481385\tTranscript:333544\t.\t-\t479138\t481385\t0\t5\t792,141,246,297,108,\t0,874,1318,1748,2140,\n+scaffold_1\t765430\t766468\tTranscript:918940\t.\t-\t765430\t766468\t0\t2\t529,96,\t0,943,\n+scaffold_1\t766738\t768326\tTranscript:918941\t.\t-\t766738\t768326\t0\t3\t211,573,73,\t0,578,1516,\n+scaffold_1\t849099\t851591\tTranscript:470280\t.\t+\t849099\t851591\t0\t15\t76,3,107,44,78,193,31,59,74,87,62,29,136,100,30,\t0,111,179,357,767,954,1237,1349,1498,1683,1859,2016,2146,2311,2463,\n+scaffold_1\t57579\t57871\tTranscript:918741\t.\t+\t57579\t57871\t0\t1\t293,\t0,\n+scaffold_1\t58865\t72177\tTranscript:918742\t.\t+\t58865\t72177\t0\t49\t113,298,229,340,114,129,192,195,138,123,254,104,95,225,162,73,68,146,109,147,48,129,243,138,86,106,96,153,183,230,103,126,45,195,127,101,135,84,147,132,48,147,62,154,192,105,135,201,280,\t0,209,584,904,1435,1635,1838,2119,2442,2671,2877,3232,3571,3753,4317,4563,4746,4949,5205,5406,5644,5766,6016,6374,6597,6778,7127,7380,7687,7945,8270,8456,8667,8886,9339,9568,9964,10175,10675,10910,11151,11293,11507,11767,12025,12306,12503,12726,13033,\n+scaffold_1\t356318\t357400\tTranscript:470139\t.\t-\t356318\t357400\t0\t2\t727,329,\t0,754,\n+scaffold_1\t786367\t786721\tTranscript:918948\t.\t+\t786367\t786721\t0\t1\t355,\t0,\n+scaffold_1\t787193\t787397\tTranscript:918949\t.\t+\t787193\t787397\t0\t1\t205,\t0,\n+scaffold_1\t511272\t518844\tTranscript:333551\t.\t-\t511272\t518844\t0\t21\t178,407,165,906,191,98,180,629,96,162,183,151,234,161,123,173,220,395,211,114,912,\t0,277,781,1027,2017,2290,2433,2661,3383,3564,3812,4083,4319,4644,4887,5097,5359,5667,'..b'66,3239,3467,3671,3861,4147,4363,4706,4918,5333,\n+scaffold_1\t3311\t6198\tTranscript:470048\t.\t-\t3311\t6198\t0\t9\t180,196,273,201,108,7,378,220,568,\t0,202,499,853,1172,1419,1540,2008,2320,\n+scaffold_1\t9512\t10567\tTranscript:470049\t.\t+\t9512\t10567\t0\t3\t53,82,695,\t0,64,361,\n+scaffold_1\t605536\t607891\tTranscript:470198\t.\t+\t605536\t607891\t0\t2\t765,1020,\t0,1336,\n+scaffold_1\t652374\t653539\tTranscript:470212\t.\t-\t652374\t653539\t0\t2\t302,779,\t0,387,\n+scaffold_1\t650407\t652252\tTranscript:470210\t.\t+\t650407\t652252\t0\t4\t44,59,162,1193,\t0,71,447,653,\n+scaffold_1\t436171\t436800\tTranscript:311313\t.\t-\t436171\t436800\t0\t3\t125,91,105,\t0,320,525,\n+scaffold_1\t428322\t429820\tTranscript:311310\t.\t-\t428322\t429820\t0\t7\t264,33,90,44,64,152,100,\t0,304,434,682,800,1084,1399,\n+scaffold_1\t152834\t155670\tTranscript:909785\t.\t-\t152834\t155670\t0\t2\t1675,650,\t0,2187,\n+scaffold_1\t266834\t270418\tTranscript:470119\t.\t-\t266834\t270418\t0\t8\t237,716,244,328,235,195,103,217,\t0,337,1181,1516,2329,2822,3099,3368,\n+scaffold_1\t759661\t760663\tTranscript:470240\t.\t+\t759661\t760663\t0\t6\t141,48,531,103,15,69,\t0,157,223,768,896,934,\n+scaffold_1\t441904\t443720\tTranscript:311315\t.\t+\t441904\t443720\t0\t2\t993,627,\t0,1190,\n+scaffold_1\t246920\t248384\tTranscript:470115\t.\t+\t246920\t248384\t0\t9\t62,353,296,141,98,45,17,18,35,\t0,93,533,905,1131,1248,1320,1372,1430,\n+scaffold_1\t501318\t503489\tTranscript:918864\t.\t+\t501318\t503489\t0\t6\t224,63,117,400,152,171,\t0,353,605,1211,1738,2001,\n+scaffold_1\t236778\t241721\tTranscript:470110\t.\t+\t236778\t241721\t0\t16\t103,180,159,112,120,152,183,35,163,54,93,143,71,84,294,60,\t0,114,768,1022,1245,1446,1715,1992,2454,2779,3173,3418,4243,4395,4571,4884,\n+scaffold_1\t243695\t245459\tTranscript:470113\t.\t-\t243695\t245459\t0\t6\t44,166,111,106,296,330,\t0,68,317,838,1049,1435,\n+scaffold_1\t330470\t334264\tTranscript:909830\t.\t+\t330470\t334264\t0\t3\t785,1961,167,\t0,1600,3628,\n+scaffold_1\t313347\t315496\tTranscript:918815\t.\t-\t313347\t315496\t0\t4\t1048,161,132,191,\t0,1315,1707,1959,\n+scaffold_1\t46396\t48761\tTranscript:470065\t.\t+\t46396\t48761\t0\t9\t164,76,89,90,45,70,140,89,172,\t0,297,454,641,850,991,1267,2009,2194,\n+scaffold_1\t365041\t365523\tTranscript:909838\t.\t-\t365041\t365523\t0\t3\t97,88,67,\t0,207,416,\n+scaffold_1\t367696\t369417\tTranscript:909839\t.\t-\t367696\t369417\t0\t8\t56,76,78,105,35,46,98,52,\t0,195,411,905,1212,1368,1514,1670,\n+scaffold_1\t779234\t780531\tTranscript:470252\t.\t-\t779234\t780531\t0\t5\t39,196,122,348,72,\t0,68,345,860,1226,\n+scaffold_1\t768374\t770507\tTranscript:311385\t.\t+\t768374\t770507\t0\t10\t87,66,75,145,47,58,50,57,248,61,\t0,187,401,562,791,924,1055,1197,1336,2073,\n+scaffold_1\t328181\t328380\tTranscript:918819\t.\t+\t328181\t328380\t0\t1\t200,\t0,\n+scaffold_1\t816662\t816932\tTranscript:918958\t.\t+\t816662\t816932\t0\t1\t271,\t0,\n+scaffold_1\t928527\t930332\tTranscript:470297\t.\t+\t928527\t930332\t0\t8\t70,161,58,309,49,95,80,115,\t0,91,692,829,1236,1365,1538,1691,\n+scaffold_1\t113969\t115315\tTranscript:918756\t.\t-\t113969\t115315\t0\t1\t1347,\t0,\n+scaffold_1\t915290\t920350\tTranscript:470295\t.\t+\t915290\t920350\t0\t11\t515,539,69,198,132,87,441,168,231,116,401,\t0,612,1269,1908,2192,2873,3050,3591,3934,4388,4660,\n+scaffold_1\t731564\t731809\tTranscript:918928\t.\t-\t731564\t731809\t0\t1\t246,\t0,\n+scaffold_1\t835648\t840282\tTranscript:470277\t.\t-\t835648\t840282\t0\t15\t32,47,96,81,145,272,123,159,282,189,184,142,244,636,320,\t0,68,133,312,500,728,1086,1294,1572,2023,2474,2846,3211,3532,4315,\n+scaffold_1\t628623\t630888\tTranscript:918899\t.\t-\t628623\t630888\t0\t6\t531,85,208,48,168,476,\t0,616,930,1217,1528,1790,\n+scaffold_1\t800563\t803042\tTranscript:918952\t.\t-\t800563\t803042\t0\t9\t95,97,101,189,158,232,136,104,291,\t0,235,433,623,894,1134,1449,1701,2189,\n+scaffold_1\t617112\t618613\tTranscript:918895\t.\t+\t617112\t618613\t0\t7\t100,316,53,73,105,81,176,\t0,218,634,797,960,1155,1326,\n+scaffold_1\t601736\t602094\tTranscript:918890\t.\t-\t601736\t602094\t0\t1\t359,\t0,\n+scaffold_1\t602514\t603658\tTranscript:918891\t.\t+\t602514\t603658\t0\t3\t510,145,81,\t0,792,1064,\n+scaffold_1\t930976\t935010\tTranscript:470299\t.\t+\t930976\t935010\t0\t13\t548,281,83,404,194,181,125,90,79,77,107,157,111,\t0,688,1271,1460,1997,2278,2610,2903,3099,3272,3549,3754,3924,\n'
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/Aly_JGI.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/test-data/Aly_JGI.gff3 Wed Jun 11 16:29:25 2014 -0400
b
b'@@ -0,0 +1,3996 @@\n+##gff-version 3\n+##sequence-region scaffold_1 1 33132539\n+scaffold_1\tGenomic_canonical\tregion\t1\t33132539\t.\t+\t.\tID=scaffold_1;Name=scaffold_1\n+scaffold_1\tJGI_Filtered\tgene\t47\t2523\t.\t-\t.\tID=scaffold_100001.1;Name=scaffold_100001.1\n+scaffold_1\tJGI_Filtered\tmRNA\t47\t2523\t.\t-\t.\tID=Transcript:918720;Name=Transcript:918720;Parent=scaffold_100001.1\n+scaffold_1\tJGI_Filtered\tthree_prime_UTR\t47\t66\t.\t-\t.\tID=three_prime_UTR:918720;Parent=Transcript:918720\n+scaffold_1\tJGI_Filtered\texon\t47\t252\t.\t-\t.\tID=exon:918720;Parent=Transcript:918720\n+scaffold_1\tJGI_Filtered\tCDS\t67\t252\t.\t-\t0\tID=CDS:918720;Parent=Transcript:918720;Name=scaffold_100001.1\n+scaffold_1\tJGI_Filtered\texon\t407\t782\t.\t-\t.\tID=exon:918720;Parent=Transcript:918720\n+scaffold_1\tJGI_Filtered\tCDS\t407\t782\t.\t-\t0\tID=CDS:918720;Parent=Transcript:918720;Name=scaffold_100001.1\n+scaffold_1\tJGI_Filtered\texon\t1423\t1642\t.\t-\t.\tID=exon:918720;Parent=Transcript:918720\n+scaffold_1\tJGI_Filtered\tCDS\t1423\t1642\t.\t-\t1\tID=CDS:918720;Parent=Transcript:918720;Name=scaffold_100001.1\n+scaffold_1\tJGI_Filtered\texon\t1803\t2035\t.\t-\t.\tID=exon:918720;Parent=Transcript:918720\n+scaffold_1\tJGI_Filtered\tCDS\t1803\t2035\t.\t-\t2\tID=CDS:918720;Parent=Transcript:918720;Name=scaffold_100001.1\n+scaffold_1\tJGI_Filtered\texon\t2124\t2347\t.\t-\t.\tID=exon:918720;Parent=Transcript:918720\n+scaffold_1\tJGI_Filtered\tCDS\t2124\t2347\t.\t-\t1\tID=CDS:918720;Parent=Transcript:918720;Name=scaffold_100001.1\n+scaffold_1\tJGI_Filtered\texon\t2444\t2523\t.\t-\t.\tID=exon:918720;Parent=Transcript:918720\n+scaffold_1\tJGI_Filtered\tCDS\t2444\t2503\t.\t-\t0\tID=CDS:918720;Parent=Transcript:918720;Name=scaffold_100001.1\n+scaffold_1\tJGI_Filtered\tfive_prime_UTR\t2504\t2523\t.\t-\t.\tID=five_prime_UTR:918720;Parent=Transcript:918720\n+scaffold_1\tJGI_Filtered\tgene\t3311\t6198\t.\t-\t.\tID=fgenesh2_kg.1__2__AT1G02190.2;Name=fgenesh2_kg.1__2__AT1G02190.2\n+scaffold_1\tJGI_Filtered\tmRNA\t3311\t6198\t.\t-\t.\tID=Transcript:470048;Name=Transcript:470048;Parent=fgenesh2_kg.1__2__AT1G02190.2\n+scaffold_1\tJGI_Filtered\tthree_prime_UTR\t3311\t3490\t.\t-\t.\tID=three_prime_UTR:470048;Parent=Transcript:470048\n+scaffold_1\tJGI_Filtered\tthree_prime_UTR\t3513\t3528\t.\t-\t.\tID=three_prime_UTR:470048;Parent=Transcript:470048\n+scaffold_1\tJGI_Filtered\texon\t3311\t3490\t.\t-\t.\tID=exon:470048;Parent=Transcript:470048\n+scaffold_1\tJGI_Filtered\texon\t3513\t3708\t.\t-\t.\tID=exon:470048;Parent=Transcript:470048\n+scaffold_1\tJGI_Filtered\tCDS\t3529\t3708\t.\t-\t0\tID=CDS:470048;Parent=Transcript:470048;Name=fgenesh2_kg.1__2__AT1G02190.2\n+scaffold_1\tJGI_Filtered\texon\t3810\t4082\t.\t-\t.\tID=exon:470048;Parent=Transcript:470048\n+scaffold_1\tJGI_Filtered\tCDS\t3810\t4082\t.\t-\t0\tID=CDS:470048;Parent=Transcript:470048;Name=fgenesh2_kg.1__2__AT1G02190.2\n+scaffold_1\tJGI_Filtered\texon\t4164\t4364\t.\t-\t.\tID=exon:470048;Parent=Transcript:470048\n+scaffold_1\tJGI_Filtered\tCDS\t4164\t4364\t.\t-\t0\tID=CDS:470048;Parent=Transcript:470048;Name=fgenesh2_kg.1__2__AT1G02190.2\n+scaffold_1\tJGI_Filtered\texon\t4483\t4590\t.\t-\t.\tID=exon:470048;Parent=Transcript:470048\n+scaffold_1\tJGI_Filtered\tCDS\t4483\t4590\t.\t-\t0\tID=CDS:470048;Parent=Transcript:470048;Name=fgenesh2_kg.1__2__AT1G02190.2\n+scaffold_1\tJGI_Filtered\texon\t4730\t4736\t.\t-\t.\tID=exon:470048;Parent=Transcript:470048\n+scaffold_1\tJGI_Filtered\tCDS\t4730\t4736\t.\t-\t0\tID=CDS:470048;Parent=Transcript:470048;Name=fgenesh2_kg.1__2__AT1G02190.2\n+scaffold_1\tJGI_Filtered\texon\t4851\t5228\t.\t-\t.\tID=exon:470048;Parent=Transcript:470048\n+scaffold_1\tJGI_Filtered\tCDS\t4851\t5228\t.\t-\t1\tID=CDS:470048;Parent=Transcript:470048;Name=fgenesh2_kg.1__2__AT1G02190.2\n+scaffold_1\tJGI_Filtered\texon\t5319\t5538\t.\t-\t.\tID=exon:470048;Parent=Transcript:470048\n+scaffold_1\tJGI_Filtered\tCDS\t5319\t5538\t.\t-\t1\tID=CDS:470048;Parent=Transcript:470048;Name=fgenesh2_kg.1__2__AT1G02190.2\n+scaffold_1\tJGI_Filtered\texon\t5631\t6198\t.\t-\t.\tID=exon:470048;Parent=Transcript:470048\n+scaffold_1\tJGI_Filtered\tCDS\t5631\t6123\t.\t-\t2\tID=CDS:470048;Parent=Transcript:470048;Name=fgenesh2_kg.1__2__AT1G02190.2\n+scaffold_1\tJGI_Filtered\tfive_prime_UTR\t6124\t6198\t.\t-\t.\tID=five_prime_UTR:470048;Parent=Transcri'..b'me=fgenesh2_kg.1__253__AT1G03190.1\n+scaffold_1\tJGI_Filtered\texon\t933586\t933710\t.\t+\t.\tID=exon:470299;Parent=Transcript:470299\n+scaffold_1\tJGI_Filtered\tCDS\t933586\t933710\t.\t+\t0\tID=CDS:470299;Parent=Transcript:470299;Name=fgenesh2_kg.1__253__AT1G03190.1\n+scaffold_1\tJGI_Filtered\texon\t933879\t933968\t.\t+\t.\tID=exon:470299;Parent=Transcript:470299\n+scaffold_1\tJGI_Filtered\tCDS\t933879\t933968\t.\t+\t2\tID=CDS:470299;Parent=Transcript:470299;Name=fgenesh2_kg.1__253__AT1G03190.1\n+scaffold_1\tJGI_Filtered\texon\t934075\t934153\t.\t+\t.\tID=exon:470299;Parent=Transcript:470299\n+scaffold_1\tJGI_Filtered\tCDS\t934075\t934153\t.\t+\t2\tID=CDS:470299;Parent=Transcript:470299;Name=fgenesh2_kg.1__253__AT1G03190.1\n+scaffold_1\tJGI_Filtered\texon\t934248\t934324\t.\t+\t.\tID=exon:470299;Parent=Transcript:470299\n+scaffold_1\tJGI_Filtered\tCDS\t934248\t934324\t.\t+\t0\tID=CDS:470299;Parent=Transcript:470299;Name=fgenesh2_kg.1__253__AT1G03190.1\n+scaffold_1\tJGI_Filtered\texon\t934525\t934631\t.\t+\t.\tID=exon:470299;Parent=Transcript:470299\n+scaffold_1\tJGI_Filtered\tCDS\t934525\t934631\t.\t+\t2\tID=CDS:470299;Parent=Transcript:470299;Name=fgenesh2_kg.1__253__AT1G03190.1\n+scaffold_1\tJGI_Filtered\texon\t934730\t934886\t.\t+\t.\tID=exon:470299;Parent=Transcript:470299\n+scaffold_1\tJGI_Filtered\tCDS\t934730\t934851\t.\t+\t1\tID=CDS:470299;Parent=Transcript:470299;Name=fgenesh2_kg.1__253__AT1G03190.1\n+scaffold_1\tJGI_Filtered\texon\t934900\t935010\t.\t+\t.\tID=exon:470299;Parent=Transcript:470299\n+scaffold_1\tJGI_Filtered\tthree_prime_UTR\t934852\t934886\t.\t+\t.\tID=three_prime_UTR:470299;Parent=Transcript:470299\n+scaffold_1\tJGI_Filtered\tthree_prime_UTR\t934900\t935010\t.\t+\t.\tID=three_prime_UTR:470299;Parent=Transcript:470299\n+scaffold_1\tJGI_Filtered\tgene\t938460\t939704\t.\t+\t.\tID=fgenesh1_pm.C_scaffold_1000202;Name=fgenesh1_pm.C_scaffold_1000202\n+scaffold_1\tJGI_Filtered\tmRNA\t938460\t939704\t.\t+\t.\tID=Transcript:311422;Name=Transcript:311422;Parent=fgenesh1_pm.C_scaffold_1000202\n+scaffold_1\tJGI_Filtered\texon\t938460\t938693\t.\t+\t.\tID=exon:311422;Parent=Transcript:311422\n+scaffold_1\tJGI_Filtered\tCDS\t938460\t938693\t.\t+\t0\tID=CDS:311422;Parent=Transcript:311422;Name=fgenesh1_pm.C_scaffold_1000202\n+scaffold_1\tJGI_Filtered\texon\t938841\t939104\t.\t+\t.\tID=exon:311422;Parent=Transcript:311422\n+scaffold_1\tJGI_Filtered\tCDS\t938841\t939104\t.\t+\t0\tID=CDS:311422;Parent=Transcript:311422;Name=fgenesh1_pm.C_scaffold_1000202\n+scaffold_1\tJGI_Filtered\texon\t939200\t939361\t.\t+\t.\tID=exon:311422;Parent=Transcript:311422\n+scaffold_1\tJGI_Filtered\tCDS\t939200\t939361\t.\t+\t0\tID=CDS:311422;Parent=Transcript:311422;Name=fgenesh1_pm.C_scaffold_1000202\n+scaffold_1\tJGI_Filtered\texon\t939394\t939477\t.\t+\t.\tID=exon:311422;Parent=Transcript:311422\n+scaffold_1\tJGI_Filtered\tCDS\t939394\t939477\t.\t+\t0\tID=CDS:311422;Parent=Transcript:311422;Name=fgenesh1_pm.C_scaffold_1000202\n+scaffold_1\tJGI_Filtered\texon\t939597\t939704\t.\t+\t.\tID=exon:311422;Parent=Transcript:311422\n+scaffold_1\tJGI_Filtered\tCDS\t939597\t939704\t.\t+\t0\tID=CDS:311422;Parent=Transcript:311422;Name=fgenesh1_pm.C_scaffold_1000202\n+scaffold_1\tJGI_Filtered\tgene\t940106\t941321\t.\t-\t.\tID=scaffold_100268.1;Name=scaffold_100268.1\n+scaffold_1\tJGI_Filtered\tmRNA\t940106\t941321\t.\t-\t.\tID=Transcript:918987;Name=Transcript:918987;Parent=scaffold_100268.1\n+scaffold_1\tJGI_Filtered\tthree_prime_UTR\t940106\t940125\t.\t-\t.\tID=three_prime_UTR:918987;Parent=Transcript:918987\n+scaffold_1\tJGI_Filtered\texon\t940106\t940528\t.\t-\t.\tID=exon:918987;Parent=Transcript:918987\n+scaffold_1\tJGI_Filtered\tCDS\t940126\t940528\t.\t-\t0\tID=CDS:918987;Parent=Transcript:918987;Name=scaffold_100268.1\n+scaffold_1\tJGI_Filtered\texon\t940628\t940686\t.\t-\t.\tID=exon:918987;Parent=Transcript:918987\n+scaffold_1\tJGI_Filtered\tCDS\t940628\t940686\t.\t-\t1\tID=CDS:918987;Parent=Transcript:918987;Name=scaffold_100268.1\n+scaffold_1\tJGI_Filtered\texon\t941262\t941321\t.\t-\t.\tID=exon:918987;Parent=Transcript:918987\n+scaffold_1\tJGI_Filtered\tCDS\t941262\t941300\t.\t-\t0\tID=CDS:918987;Parent=Transcript:918987;Name=scaffold_100268.1\n+scaffold_1\tJGI_Filtered\tfive_prime_UTR\t941301\t941321\t.\t-\t.\tID=five_prime_UTR:918987;Parent=Transcript:918987\n'
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/ENSEMBL_mm9.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/test-data/ENSEMBL_mm9.gff3 Wed Jun 11 16:29:25 2014 -0400
b
b'@@ -0,0 +1,4424 @@\n+##gff-version 3\n+18\tlincRNA\tgene\t3336414\t3366861\t.\t+\t.\tID=ENSMUSG00000091488;Name=AC124336.2\n+18\tlincRNA\ttranscript\t3336414\t3366861\t.\t+\t.\tID=ENSMUST00000171726;Parent=ENSMUSG00000091488;Name=AC124336.2-201\n+18\tlincRNA\texon\t3336414\t3337176\t.\t+\t.\tParent=ENSMUST00000171726\n+18\tlincRNA\texon\t3365925\t3366861\t.\t+\t.\tParent=ENSMUST00000171726\n+18\tprotein_coding\tgene\t9314042\t9450148\t.\t-\t.\tID=ENSMUSG00000024286;Name=Ccny\n+18\tprotein_coding\tmRNA\t9314042\t9450148\t.\t-\t.\tID=ENSMUST00000053917;Parent=ENSMUSG00000024286;Name=Ccny-201\n+18\tprotein_coding\tfive_prime_UTR\t9449670\t9450148\t.\t-\t.\tParent=ENSMUST00000053917\n+18\tprotein_coding\tCDS\t9316554\t9316670\t.\t-\t0\tParent=ENSMUST00000053917\n+18\tprotein_coding\tCDS\t9319407\t9319569\t.\t-\t1\tParent=ENSMUST00000053917\n+18\tprotein_coding\tCDS\t9332782\t9332948\t.\t-\t0\tParent=ENSMUST00000053917\n+18\tprotein_coding\tCDS\t9345192\t9345311\t.\t-\t0\tParent=ENSMUST00000053917\n+18\tprotein_coding\tCDS\t9345412\t9345469\t.\t-\t1\tParent=ENSMUST00000053917\n+18\tprotein_coding\tCDS\t9349386\t9349421\t.\t-\t1\tParent=ENSMUST00000053917\n+18\tprotein_coding\tCDS\t9353405\t9353505\t.\t-\t0\tParent=ENSMUST00000053917\n+18\tprotein_coding\tCDS\t9377792\t9377826\t.\t-\t2\tParent=ENSMUST00000053917\n+18\tprotein_coding\tCDS\t9386733\t9386807\t.\t-\t2\tParent=ENSMUST00000053917\n+18\tprotein_coding\tCDS\t9449516\t9449669\t.\t-\t0\tParent=ENSMUST00000053917\n+18\tprotein_coding\tthree_prime_UTR\t9314042\t9316553\t.\t-\t.\tParent=ENSMUST00000053917\n+18\tprotein_coding\texon\t9314042\t9316670\t.\t-\t.\tParent=ENSMUST00000053917\n+18\tprotein_coding\texon\t9319407\t9319569\t.\t-\t.\tParent=ENSMUST00000053917\n+18\tprotein_coding\texon\t9332782\t9332948\t.\t-\t.\tParent=ENSMUST00000053917\n+18\tprotein_coding\texon\t9345192\t9345311\t.\t-\t.\tParent=ENSMUST00000053917\n+18\tprotein_coding\texon\t9345412\t9345469\t.\t-\t.\tParent=ENSMUST00000053917\n+18\tprotein_coding\texon\t9349386\t9349421\t.\t-\t.\tParent=ENSMUST00000053917\n+18\tprotein_coding\texon\t9353405\t9353505\t.\t-\t.\tParent=ENSMUST00000053917\n+18\tprotein_coding\texon\t9377792\t9377826\t.\t-\t.\tParent=ENSMUST00000053917\n+18\tprotein_coding\texon\t9386733\t9386807\t.\t-\t.\tParent=ENSMUST00000053917\n+18\tprotein_coding\texon\t9449516\t9450148\t.\t-\t.\tParent=ENSMUST00000053917\n+18\tprotein_coding\tmRNA\t9314042\t9450148\t.\t-\t.\tID=ENSMUST00000115867;Parent=ENSMUSG00000024286;Name=Ccny-202\n+18\tprotein_coding\tfive_prime_UTR\t9449670\t9450148\t.\t-\t.\tParent=ENSMUST00000115867\n+18\tprotein_coding\tCDS\t9316554\t9316670\t.\t-\t0\tParent=ENSMUST00000115867\n+18\tprotein_coding\tCDS\t9319407\t9319569\t.\t-\t1\tParent=ENSMUST00000115867\n+18\tprotein_coding\tCDS\t9332782\t9332948\t.\t-\t0\tParent=ENSMUST00000115867\n+18\tprotein_coding\tCDS\t9345192\t9345311\t.\t-\t0\tParent=ENSMUST00000115867\n+18\tprotein_coding\tCDS\t9345412\t9345469\t.\t-\t1\tParent=ENSMUST00000115867\n+18\tprotein_coding\tCDS\t9349386\t9349421\t.\t-\t1\tParent=ENSMUST00000115867\n+18\tprotein_coding\tCDS\t9353405\t9353505\t.\t-\t0\tParent=ENSMUST00000115867\n+18\tprotein_coding\tCDS\t9377792\t9377826\t.\t-\t2\tParent=ENSMUST00000115867\n+18\tprotein_coding\tCDS\t9449516\t9449669\t.\t-\t0\tParent=ENSMUST00000115867\n+18\tprotein_coding\tthree_prime_UTR\t9314042\t9316553\t.\t-\t.\tParent=ENSMUST00000115867\n+18\tprotein_coding\texon\t9314042\t9316670\t.\t-\t.\tParent=ENSMUST00000115867\n+18\tprotein_coding\texon\t9319407\t9319569\t.\t-\t.\tParent=ENSMUST00000115867\n+18\tprotein_coding\texon\t9332782\t9332948\t.\t-\t.\tParent=ENSMUST00000115867\n+18\tprotein_coding\texon\t9345192\t9345311\t.\t-\t.\tParent=ENSMUST00000115867\n+18\tprotein_coding\texon\t9345412\t9345469\t.\t-\t.\tParent=ENSMUST00000115867\n+18\tprotein_coding\texon\t9349386\t9349421\t.\t-\t.\tParent=ENSMUST00000115867\n+18\tprotein_coding\texon\t9353405\t9353505\t.\t-\t.\tParent=ENSMUST00000115867\n+18\tprotein_coding\texon\t9377792\t9377826\t.\t-\t.\tParent=ENSMUST00000115867\n+18\tprotein_coding\texon\t9449516\t9450148\t.\t-\t.\tParent=ENSMUST00000115867\n+18\tmiRNA\tgene\t10782897\t10782983\t.\t-\t.\tID=ENSMUSG00000065399;Name=Mir133a-1\n+18\tmiRNA\ttranscript\t10782897\t10782983\t.\t-\t.\tID=ENSMUST00000083465;Parent=ENSMUSG00000065399;Name=Mir133a-1-201\n+18\tmiRNA\texon\t10782897\t10782983\t.\t-\t.\tParent=ENSMUST00000083465\n+18\tprotein_coding\tgene\t9726195\t9726668\t.\t-\t.\tID='..b'694\n+NT_166402\tprotein_coding\texon\t36964\t37064\t.\t+\t.\tParent=ENSMUST00000096694\n+NT_166402\tprotein_coding\texon\t37217\t38054\t.\t+\t.\tParent=ENSMUST00000096694\n+NT_166433\tprotein_coding\tgene\t28587\t52512\t.\t+\t.\tID=ENSMUSG00000078423;Name=AC007307.2\n+NT_166433\tprotein_coding\tmRNA\t28587\t52512\t.\t+\t.\tID=ENSMUST00000105217;Parent=ENSMUSG00000078423;Name=AC007307.2-201\n+NT_166433\tprotein_coding\tfive_prime_UTR\t28587\t28657\t.\t+\t.\tParent=ENSMUST00000105217\n+NT_166433\tprotein_coding\tCDS\t28658\t28798\t.\t+\t0\tParent=ENSMUST00000105217\n+NT_166433\tprotein_coding\tCDS\t31129\t31299\t.\t+\t0\tParent=ENSMUST00000105217\n+NT_166433\tprotein_coding\tCDS\t32196\t32249\t.\t+\t0\tParent=ENSMUST00000105217\n+NT_166433\tprotein_coding\tthree_prime_UTR\t32250\t32270\t.\t+\t.\tParent=ENSMUST00000105217\n+NT_166433\tprotein_coding\tthree_prime_UTR\t51920\t52512\t.\t+\t.\tParent=ENSMUST00000105217\n+NT_166433\tprotein_coding\texon\t28587\t28798\t.\t+\t.\tParent=ENSMUST00000105217\n+NT_166433\tprotein_coding\texon\t31129\t31299\t.\t+\t.\tParent=ENSMUST00000105217\n+NT_166433\tprotein_coding\texon\t32196\t32270\t.\t+\t.\tParent=ENSMUST00000105217\n+NT_166433\tprotein_coding\texon\t51920\t52512\t.\t+\t.\tParent=ENSMUST00000105217\n+NT_166433\tprotein_coding\tgene\t47745\t52514\t.\t+\t.\tID=ENSMUSG00000078424;Name=AC007307.3\n+NT_166433\tprotein_coding\tmRNA\t47745\t52514\t.\t+\t.\tID=ENSMUST00000105218;Parent=ENSMUSG00000078424;Name=AC007307.3-201\n+NT_166433\tprotein_coding\tfive_prime_UTR\t47745\t47746\t.\t+\t.\tParent=ENSMUST00000105218\n+NT_166433\tprotein_coding\tCDS\t47747\t47845\t.\t+\t0\tParent=ENSMUST00000105218\n+NT_166433\tprotein_coding\tCDS\t50322\t50492\t.\t+\t0\tParent=ENSMUST00000105218\n+NT_166433\tprotein_coding\tCDS\t51351\t51404\t.\t+\t0\tParent=ENSMUST00000105218\n+NT_166433\tprotein_coding\tthree_prime_UTR\t51405\t51425\t.\t+\t.\tParent=ENSMUST00000105218\n+NT_166433\tprotein_coding\tthree_prime_UTR\t51920\t52514\t.\t+\t.\tParent=ENSMUST00000105218\n+NT_166433\tprotein_coding\texon\t47745\t47845\t.\t+\t.\tParent=ENSMUST00000105218\n+NT_166433\tprotein_coding\texon\t50322\t50492\t.\t+\t.\tParent=ENSMUST00000105218\n+NT_166433\tprotein_coding\texon\t51351\t51425\t.\t+\t.\tParent=ENSMUST00000105218\n+NT_166433\tprotein_coding\texon\t51920\t52514\t.\t+\t.\tParent=ENSMUST00000105218\n+NT_166433\tprotein_coding\tmRNA\t47928\t52514\t.\t+\t.\tID=ENSMUST00000105219;Parent=ENSMUSG00000078424;Name=AC007307.3-202\n+NT_166433\tprotein_coding\tfive_prime_UTR\t47928\t47985\t.\t+\t.\tParent=ENSMUST00000105219\n+NT_166433\tprotein_coding\tCDS\t47986\t48129\t.\t+\t0\tParent=ENSMUST00000105219\n+NT_166433\tprotein_coding\tCDS\t50322\t50492\t.\t+\t0\tParent=ENSMUST00000105219\n+NT_166433\tprotein_coding\tCDS\t51351\t51404\t.\t+\t0\tParent=ENSMUST00000105219\n+NT_166433\tprotein_coding\tthree_prime_UTR\t51405\t51425\t.\t+\t.\tParent=ENSMUST00000105219\n+NT_166433\tprotein_coding\tthree_prime_UTR\t51920\t52514\t.\t+\t.\tParent=ENSMUST00000105219\n+NT_166433\tprotein_coding\texon\t47928\t48129\t.\t+\t.\tParent=ENSMUST00000105219\n+NT_166433\tprotein_coding\texon\t50322\t50492\t.\t+\t.\tParent=ENSMUST00000105219\n+NT_166433\tprotein_coding\texon\t51351\t51425\t.\t+\t.\tParent=ENSMUST00000105219\n+NT_166433\tprotein_coding\texon\t51920\t52514\t.\t+\t.\tParent=ENSMUST00000105219\n+NT_166433\tprotein_coding\tgene\t11955\t18898\t.\t+\t.\tID=ENSMUSG00000000702;Name=AC007307.1\n+NT_166433\tprotein_coding\tmRNA\t11955\t18898\t.\t+\t.\tID=ENSMUST00000105216;Parent=ENSMUSG00000000702;Name=AC007307.1-201\n+NT_166433\tprotein_coding\tfive_prime_UTR\t11955\t12025\t.\t+\t.\tParent=ENSMUST00000105216\n+NT_166433\tprotein_coding\tCDS\t12026\t12166\t.\t+\t0\tParent=ENSMUST00000105216\n+NT_166433\tprotein_coding\tCDS\t16677\t16841\t.\t+\t0\tParent=ENSMUST00000105216\n+NT_166433\tprotein_coding\tCDS\t17745\t17789\t.\t+\t0\tParent=ENSMUST00000105216\n+NT_166433\tprotein_coding\tthree_prime_UTR\t17790\t17814\t.\t+\t.\tParent=ENSMUST00000105216\n+NT_166433\tprotein_coding\tthree_prime_UTR\t18309\t18898\t.\t+\t.\tParent=ENSMUST00000105216\n+NT_166433\tprotein_coding\texon\t11955\t12166\t.\t+\t.\tParent=ENSMUST00000105216\n+NT_166433\tprotein_coding\texon\t16677\t16841\t.\t+\t.\tParent=ENSMUST00000105216\n+NT_166433\tprotein_coding\texon\t17745\t17814\t.\t+\t.\tParent=ENSMUST00000105216\n+NT_166433\tprotein_coding\texon\t18309\t18898\t.\t+\t.\tParent=ENSMUST00000105216\n'
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/ENSEMBL_mm9.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/test-data/ENSEMBL_mm9.gtf Wed Jun 11 16:29:25 2014 -0400
b
b'@@ -0,0 +1,3974 @@\n+NT_166433\tprotein_coding\texon\t11955\t12166\t.\t+\t.\t gene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "1"; gene_name "AC007307.1"; transcript_name "AC007307.1-201";\n+NT_166433\tprotein_coding\tCDS\t12026\t12166\t.\t+\t0\t gene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "1"; gene_name "AC007307.1"; transcript_name "AC007307.1-201"; protein_id "ENSMUSP00000100851";\n+NT_166433\tprotein_coding\tstart_codon\t12026\t12028\t.\t+\t0\t gene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "1"; gene_name "AC007307.1"; transcript_name "AC007307.1-201";\n+NT_166433\tprotein_coding\texon\t16677\t16841\t.\t+\t.\t gene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "2"; gene_name "AC007307.1"; transcript_name "AC007307.1-201";\n+NT_166433\tprotein_coding\tCDS\t16677\t16841\t.\t+\t0\t gene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "2"; gene_name "AC007307.1"; transcript_name "AC007307.1-201"; protein_id "ENSMUSP00000100851";\n+NT_166433\tprotein_coding\texon\t17745\t17814\t.\t+\t.\t gene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "3"; gene_name "AC007307.1"; transcript_name "AC007307.1-201";\n+NT_166433\tprotein_coding\tCDS\t17745\t17786\t.\t+\t0\t gene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "3"; gene_name "AC007307.1"; transcript_name "AC007307.1-201"; protein_id "ENSMUSP00000100851";\n+NT_166433\tprotein_coding\tstop_codon\t17787\t17789\t.\t+\t0\t gene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "3"; gene_name "AC007307.1"; transcript_name "AC007307.1-201";\n+NT_166433\tprotein_coding\texon\t18309\t18898\t.\t+\t.\t gene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "4"; gene_name "AC007307.1"; transcript_name "AC007307.1-201";\n+NT_166433\tprotein_coding\texon\t28587\t28798\t.\t+\t.\t gene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "1"; gene_name "AC007307.2"; transcript_name "AC007307.2-201";\n+NT_166433\tprotein_coding\tCDS\t28658\t28798\t.\t+\t0\t gene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "1"; gene_name "AC007307.2"; transcript_name "AC007307.2-201"; protein_id "ENSMUSP00000100852";\n+NT_166433\tprotein_coding\tstart_codon\t28658\t28660\t.\t+\t0\t gene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "1"; gene_name "AC007307.2"; transcript_name "AC007307.2-201";\n+NT_166433\tprotein_coding\texon\t31129\t31299\t.\t+\t.\t gene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "2"; gene_name "AC007307.2"; transcript_name "AC007307.2-201";\n+NT_166433\tprotein_coding\tCDS\t31129\t31299\t.\t+\t0\t gene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "2"; gene_name "AC007307.2"; transcript_name "AC007307.2-201"; protein_id "ENSMUSP00000100852";\n+NT_166433\tprotein_coding\texon\t32196\t32270\t.\t+\t.\t gene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "3"; gene_name "AC007307.2"; transcript_name "AC007307.2-201";\n+NT_166433\tprotein_coding\tCDS\t32196\t32246\t.\t+\t0\t gene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "3"; gene_name "AC007307.2"; transcript_name "AC007307.2-201"; protein_id "ENSMUSP00000100852";\n+NT_166433\tprotein_coding\tstop_codon\t32247\t32249\t.\t+\t0\t gene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "3"; gene_name "AC007307.2"; transcript_name "AC007307.2-201";\n+NT_166433\tprotein_coding\texon\t51920\t52512\t.\t+\t.\t gene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "4"; gene_name "AC007307.2"; transcript_name "AC007307.2-201";\n+NT_166433\tprotein_coding\texon\t47745\t47845\t.\t+\t.\t gene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105218"; exon_number "1"; gene_name "AC007307.3"; transcript_name "AC007307.3-201";\n+NT_166433\tprotein_coding\tCDS\t47747\t47845\t.\t+\t0\t gene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105218"; exon_number "1"; g'..b'32"; gene_name "Lama3"; transcript_name "Lama3-202";\n+18\tprotein_coding\tCDS\t12726175\t12726341\t.\t+\t0\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "32"; gene_name "Lama3"; transcript_name "Lama3-202"; protein_id "ENSMUSP00000111524";\n+18\tprotein_coding\texon\t12730409\t12730592\t.\t+\t.\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "33"; gene_name "Lama3"; transcript_name "Lama3-202";\n+18\tprotein_coding\tCDS\t12730409\t12730592\t.\t+\t1\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "33"; gene_name "Lama3"; transcript_name "Lama3-202"; protein_id "ENSMUSP00000111524";\n+18\tprotein_coding\texon\t12732723\t12732863\t.\t+\t.\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "34"; gene_name "Lama3"; transcript_name "Lama3-202";\n+18\tprotein_coding\tCDS\t12732723\t12732863\t.\t+\t0\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "34"; gene_name "Lama3"; transcript_name "Lama3-202"; protein_id "ENSMUSP00000111524";\n+18\tprotein_coding\texon\t12736261\t12736420\t.\t+\t.\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "35"; gene_name "Lama3"; transcript_name "Lama3-202";\n+18\tprotein_coding\tCDS\t12736261\t12736420\t.\t+\t0\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "35"; gene_name "Lama3"; transcript_name "Lama3-202"; protein_id "ENSMUSP00000111524";\n+18\tprotein_coding\texon\t12736523\t12736653\t.\t+\t.\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "36"; gene_name "Lama3"; transcript_name "Lama3-202";\n+18\tprotein_coding\tCDS\t12736523\t12736653\t.\t+\t2\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "36"; gene_name "Lama3"; transcript_name "Lama3-202"; protein_id "ENSMUSP00000111524";\n+18\tprotein_coding\texon\t12738795\t12738888\t.\t+\t.\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "37"; gene_name "Lama3"; transcript_name "Lama3-202";\n+18\tprotein_coding\tCDS\t12738795\t12738888\t.\t+\t0\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "37"; gene_name "Lama3"; transcript_name "Lama3-202"; protein_id "ENSMUSP00000111524";\n+18\tprotein_coding\texon\t12740202\t12740321\t.\t+\t.\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "38"; gene_name "Lama3"; transcript_name "Lama3-202";\n+18\tprotein_coding\tCDS\t12740202\t12740321\t.\t+\t2\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "38"; gene_name "Lama3"; transcript_name "Lama3-202"; protein_id "ENSMUSP00000111524";\n+18\tprotein_coding\texon\t12741027\t12741522\t.\t+\t.\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "39"; gene_name "Lama3"; transcript_name "Lama3-202";\n+18\tprotein_coding\tCDS\t12741027\t12741169\t.\t+\t2\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "39"; gene_name "Lama3"; transcript_name "Lama3-202"; protein_id "ENSMUSP00000111524";\n+18\tprotein_coding\tstop_codon\t12741170\t12741172\t.\t+\t0\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "39"; gene_name "Lama3"; transcript_name "Lama3-202";\n+18\tprotein_coding\texon\t12657194\t12657637\t.\t-\t.\t gene_id "ENSMUSG00000090309"; transcript_id "ENSMUST00000172267"; exon_number "1"; gene_name "AC102131.1"; transcript_name "AC102131.1-201";\n+18\tprotein_coding\tCDS\t12657197\t12657637\t.\t-\t0\t gene_id "ENSMUSG00000090309"; transcript_id "ENSMUST00000172267"; exon_number "1"; gene_name "AC102131.1"; transcript_name "AC102131.1-201"; protein_id "ENSMUSP00000129942";\n+18\tprotein_coding\tstop_codon\t12657194\t12657196\t.\t-\t0\t gene_id "ENSMUSG00000090309"; transcript_id "ENSMUST00000172267"; exon_number "1"; gene_name "AC102131.1"; transcript_name "AC102131.1-201";\n+18\trRNA\texon\t12736933\t12737046\t.\t-\t.\t gene_id "ENSMUSG00000088342"; transcript_id "ENSMUST00000157717"; exon_number "1"; gene_name "5S_rRNA.42"; transcript_name "5S_rRNA.42-201";\n'
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/ENSEMBL_mm9_gff3_to_gtf.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/test-data/ENSEMBL_mm9_gff3_to_gtf.gtf Wed Jun 11 16:29:25 2014 -0400
b
b'@@ -0,0 +1,3709 @@\n+##gff-version 2.5\n+18\ttranscript\texon\t3336414\t3337176\t.\t+\t0\tgene_id "ENSMUSG00000091488"; transcript_id "ENSMUST00000171726"; exon_number "1";\n+18\ttranscript\texon\t3365925\t3366861\t.\t+\t0\tgene_id "ENSMUSG00000091488"; transcript_id "ENSMUST00000171726"; exon_number "2";\n+18\tprotein_coding\tCDS\t9316554\t9316670\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "1"; protein_id "ENSMUST00000053917";\n+18\tprotein_coding\tCDS\t9319407\t9319569\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "2"; protein_id "ENSMUST00000053917";\n+18\tprotein_coding\tCDS\t9332782\t9332948\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "3"; protein_id "ENSMUST00000053917";\n+18\tprotein_coding\tCDS\t9345192\t9345311\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "4"; protein_id "ENSMUST00000053917";\n+18\tprotein_coding\tCDS\t9345412\t9345469\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "5"; protein_id "ENSMUST00000053917";\n+18\tprotein_coding\tCDS\t9349386\t9349421\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "6"; protein_id "ENSMUST00000053917";\n+18\tprotein_coding\tCDS\t9353405\t9353505\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "7"; protein_id "ENSMUST00000053917";\n+18\tprotein_coding\tCDS\t9377792\t9377826\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "8"; protein_id "ENSMUST00000053917";\n+18\tprotein_coding\tCDS\t9386733\t9386807\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "9"; protein_id "ENSMUST00000053917";\n+18\tprotein_coding\tCDS\t9449516\t9449669\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "10"; protein_id "ENSMUST00000053917";\n+18\tprotein_coding\texon\t9314042\t9316670\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "1";\n+18\tprotein_coding\texon\t9319407\t9319569\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "2";\n+18\tprotein_coding\texon\t9332782\t9332948\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "3";\n+18\tprotein_coding\texon\t9345192\t9345311\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "4";\n+18\tprotein_coding\texon\t9345412\t9345469\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "5";\n+18\tprotein_coding\texon\t9349386\t9349421\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "6";\n+18\tprotein_coding\texon\t9353405\t9353505\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "7";\n+18\tprotein_coding\texon\t9377792\t9377826\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "8";\n+18\tprotein_coding\texon\t9386733\t9386807\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "9";\n+18\tprotein_coding\texon\t9449516\t9450148\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "10";\n+18\tprotein_coding\tCDS\t9316554\t9316670\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000115867"; exon_number "1"; protein_id "ENSMUST00000115867";\n+18\tprotein_coding\tCDS\t9319407\t9319569\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000115867"; exon_number "2"; protein_id "ENSMUST00000115867";\n+18\tprotein_coding\tCDS\t9332782\t9332948\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000115867"; exon_number "3"; protein_id "ENSMUST00000115867";\n+18\tprotein_coding\tCDS\t9345192\t9345311\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000115867"; exon_number "4"; protein_id "ENSMUST00000115867";\n+18\tprotein_coding\tCDS\t9345412\t9345469\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000115867"; exon_number "5"; protein_id "ENSMUST00000115867";\n+18\t'..b'nscript_id "ENSMUST00000105217"; exon_number "1"; protein_id "ENSMUST00000105217";\n+NT_166433\tprotein_coding\tCDS\t31129\t31299\t.\t+\t0\tgene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "2"; protein_id "ENSMUST00000105217";\n+NT_166433\tprotein_coding\tCDS\t32196\t32249\t.\t+\t0\tgene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "3"; protein_id "ENSMUST00000105217";\n+NT_166433\tprotein_coding\texon\t28587\t28798\t.\t+\t0\tgene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "1";\n+NT_166433\tprotein_coding\texon\t31129\t31299\t.\t+\t0\tgene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "2";\n+NT_166433\tprotein_coding\texon\t32196\t32270\t.\t+\t0\tgene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "3";\n+NT_166433\tprotein_coding\texon\t51920\t52512\t.\t+\t0\tgene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "4";\n+NT_166433\tprotein_coding\tCDS\t47747\t47845\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105218"; exon_number "1"; protein_id "ENSMUST00000105218";\n+NT_166433\tprotein_coding\tCDS\t50322\t50492\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105218"; exon_number "2"; protein_id "ENSMUST00000105218";\n+NT_166433\tprotein_coding\tCDS\t51351\t51404\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105218"; exon_number "3"; protein_id "ENSMUST00000105218";\n+NT_166433\tprotein_coding\texon\t47745\t47845\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105218"; exon_number "1";\n+NT_166433\tprotein_coding\texon\t50322\t50492\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105218"; exon_number "2";\n+NT_166433\tprotein_coding\texon\t51351\t51425\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105218"; exon_number "3";\n+NT_166433\tprotein_coding\texon\t51920\t52514\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105218"; exon_number "4";\n+NT_166433\tprotein_coding\tCDS\t47986\t48129\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105219"; exon_number "1"; protein_id "ENSMUST00000105219";\n+NT_166433\tprotein_coding\tCDS\t50322\t50492\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105219"; exon_number "2"; protein_id "ENSMUST00000105219";\n+NT_166433\tprotein_coding\tCDS\t51351\t51404\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105219"; exon_number "3"; protein_id "ENSMUST00000105219";\n+NT_166433\tprotein_coding\texon\t47928\t48129\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105219"; exon_number "1";\n+NT_166433\tprotein_coding\texon\t50322\t50492\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105219"; exon_number "2";\n+NT_166433\tprotein_coding\texon\t51351\t51425\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105219"; exon_number "3";\n+NT_166433\tprotein_coding\texon\t51920\t52514\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105219"; exon_number "4";\n+NT_166433\tprotein_coding\tCDS\t12026\t12166\t.\t+\t0\tgene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "1"; protein_id "ENSMUST00000105216";\n+NT_166433\tprotein_coding\tCDS\t16677\t16841\t.\t+\t0\tgene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "2"; protein_id "ENSMUST00000105216";\n+NT_166433\tprotein_coding\tCDS\t17745\t17789\t.\t+\t0\tgene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "3"; protein_id "ENSMUST00000105216";\n+NT_166433\tprotein_coding\texon\t11955\t12166\t.\t+\t0\tgene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "1";\n+NT_166433\tprotein_coding\texon\t16677\t16841\t.\t+\t0\tgene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "2";\n+NT_166433\tprotein_coding\texon\t17745\t17814\t.\t+\t0\tgene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "3";\n+NT_166433\tprotein_coding\texon\t18309\t18898\t.\t+\t0\tgene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "4";\n'
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/JGI_genes.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/test-data/JGI_genes.gff3 Wed Jun 11 16:29:25 2014 -0400
b
b'@@ -0,0 +1,4626 @@\n+##gff-version 3\n+scaffold_1\tJGI\tgene\t1237411\t1237623\t.\t+\t.\tID=e_gw2.1.2098.1;Name=e_gw2.1.2098.1\n+scaffold_1\tJGI\tmRNA\t1237411\t1237623\t.\t+\t.\tID=1027156;Parent=e_gw2.1.2098.1\n+scaffold_1\tJGI\tCDS\t1237411\t1237456\t.\t+\t0\tParent=1027156\n+scaffold_1\tJGI\tCDS\t1237502\t1237623\t.\t+\t2\tParent=1027156\n+scaffold_1\tJGI\texon\t1237411\t1237456\t.\t+\t.\tParent=1027156\n+scaffold_1\tJGI\texon\t1237502\t1237623\t.\t+\t.\tParent=1027156\n+scaffold_1\tJGI\tgene\t5902548\t5912971\t.\t+\t.\tID=estExt_Genewise2Plus.C_10864;Name=estExt_Genewise2Plus.C_10864\n+scaffold_1\tJGI\tmRNA\t5902548\t5912971\t.\t+\t.\tID=1045793;Parent=estExt_Genewise2Plus.C_10864\n+scaffold_1\tJGI\tCDS\t5902548\t5902888\t.\t+\t0\tParent=1045793\n+scaffold_1\tJGI\tCDS\t5902966\t5909048\t.\t+\t1\tParent=1045793\n+scaffold_1\tJGI\tCDS\t5909123\t5910952\t.\t+\t2\tParent=1045793\n+scaffold_1\tJGI\tCDS\t5911034\t5911827\t.\t+\t2\tParent=1045793\n+scaffold_1\tJGI\tCDS\t5911903\t5912205\t.\t+\t0\tParent=1045793\n+scaffold_1\tJGI\tCDS\t5912269\t5912760\t.\t+\t0\tParent=1045793\n+scaffold_1\tJGI\tthree_prime_UTR\t5912761\t5912971\t.\t+\t.\tParent=1045793\n+scaffold_1\tJGI\texon\t5902548\t5902888\t.\t+\t.\tParent=1045793\n+scaffold_1\tJGI\texon\t5902966\t5909048\t.\t+\t.\tParent=1045793\n+scaffold_1\tJGI\texon\t5909123\t5910952\t.\t+\t.\tParent=1045793\n+scaffold_1\tJGI\texon\t5911034\t5911827\t.\t+\t.\tParent=1045793\n+scaffold_1\tJGI\texon\t5911903\t5912205\t.\t+\t.\tParent=1045793\n+scaffold_1\tJGI\texon\t5912269\t5912971\t.\t+\t.\tParent=1045793\n+scaffold_1\tJGI\tgene\t5701930\t5702697\t.\t-\t.\tID=fgenesh2_pg.1_#_81;Name=fgenesh2_pg.1_#_81\n+scaffold_1\tJGI\tmRNA\t5701930\t5702697\t.\t-\t.\tID=1066497;Parent=fgenesh2_pg.1_#_81\n+scaffold_1\tJGI\tCDS\t5701930\t5702605\t.\t-\t1\tParent=1066497\n+scaffold_1\tJGI\tCDS\t5702660\t5702697\t.\t-\t0\tParent=1066497\n+scaffold_1\tJGI\texon\t5701930\t5702605\t.\t-\t.\tParent=1066497\n+scaffold_1\tJGI\texon\t5702660\t5702697\t.\t-\t.\tParent=1066497\n+scaffold_1\tJGI\tgene\t6192379\t6193551\t.\t-\t.\tID=estExt_Genewise2Plus.C_10944;Name=estExt_Genewise2Plus.C_10944\n+scaffold_1\tJGI\tmRNA\t6192379\t6193551\t.\t-\t.\tID=1045858;Parent=estExt_Genewise2Plus.C_10944\n+scaffold_1\tJGI\tfive_prime_UTR\t6193484\t6193551\t.\t-\t.\tParent=1045858\n+scaffold_1\tJGI\tCDS\t6192379\t6192999\t.\t-\t0\tParent=1045858\n+scaffold_1\tJGI\tCDS\t6193076\t6193367\t.\t-\t1\tParent=1045858\n+scaffold_1\tJGI\tCDS\t6193440\t6193483\t.\t-\t0\tParent=1045858\n+scaffold_1\tJGI\texon\t6192379\t6192999\t.\t-\t.\tParent=1045858\n+scaffold_1\tJGI\texon\t6193076\t6193367\t.\t-\t.\tParent=1045858\n+scaffold_1\tJGI\texon\t6193440\t6193551\t.\t-\t.\tParent=1045858\n+scaffold_1\tJGI\tgene\t5709177\t5710596\t.\t-\t.\tID=fgenesh2_pg.1_#_84;Name=fgenesh2_pg.1_#_84\n+scaffold_1\tJGI\tmRNA\t5709177\t5710596\t.\t-\t.\tID=1066500;Parent=fgenesh2_pg.1_#_84\n+scaffold_1\tJGI\tCDS\t5709177\t5709216\t.\t-\t1\tParent=1066500\n+scaffold_1\tJGI\tCDS\t5709320\t5710164\t.\t-\t0\tParent=1066500\n+scaffold_1\tJGI\tCDS\t5710228\t5710596\t.\t-\t0\tParent=1066500\n+scaffold_1\tJGI\texon\t5709177\t5709216\t.\t-\t.\tParent=1066500\n+scaffold_1\tJGI\texon\t5709320\t5710164\t.\t-\t.\tParent=1066500\n+scaffold_1\tJGI\texon\t5710228\t5710596\t.\t-\t.\tParent=1066500\n+scaffold_1\tJGI\tgene\t3582929\t3583102\t.\t+\t.\tID=e_gw2.1.2720.1;Name=e_gw2.1.2720.1\n+scaffold_1\tJGI\tmRNA\t3582929\t3583102\t.\t+\t.\tID=1026247;Parent=e_gw2.1.2720.1\n+scaffold_1\tJGI\tCDS\t3582929\t3583102\t.\t+\t0\tParent=1026247\n+scaffold_1\tJGI\texon\t3582929\t3583102\t.\t+\t.\tParent=1026247\n+scaffold_1\tJGI\tgene\t5061339\t5072066\t.\t-\t.\tID=gm1.335_g;Name=gm1.335_g\n+scaffold_1\tJGI\tmRNA\t5061339\t5072066\t.\t-\t.\tID=204986;Parent=gm1.335_g\n+scaffold_1\tJGI\tCDS\t5061339\t5061410\t.\t-\t0\tParent=204986\n+scaffold_1\tJGI\tCDS\t5070743\t5070801\t.\t-\t2\tParent=204986\n+scaffold_1\tJGI\tCDS\t5071914\t5071951\t.\t-\t1\tParent=204986\n+scaffold_1\tJGI\tCDS\t5072047\t5072066\t.\t-\t0\tParent=204986\n+scaffold_1\tJGI\texon\t5061339\t5061410\t.\t-\t.\tParent=204986\n+scaffold_1\tJGI\texon\t5070743\t5070801\t.\t-\t.\tParent=204986\n+scaffold_1\tJGI\texon\t5071914\t5071951\t.\t-\t.\tParent=204986\n+scaffold_1\tJGI\texon\t5072047\t5072066\t.\t-\t.\tParent=204986\n+scaffold_1\tJGI\tgene\t6220485\t6222489\t.\t+\t.\tID=gm1.515_g;Name=gm1.515_g\n+scaffold_1\tJGI\tmRNA\t6220485\t6222489\t.\t+\t.\tID=205166;Parent=gm1.515_g\n+scaffold_1\tJGI\tCDS\t6220485\t6220508\t.\t+\t0\tParent=205166\n+scaffold_1\tJGI\tCDS\t6220551\t622'..b'Genemark2.C_10474\n+scaffold_1\tJGI\tmRNA\t6068472\t6069874\t.\t-\t.\tID=1090684;Parent=estExt_Genemark2.C_10474\n+scaffold_1\tJGI\tCDS\t6068473\t6068941\t.\t-\t1\tParent=1090684\n+scaffold_1\tJGI\tCDS\t6068976\t6069014\t.\t-\t1\tParent=1090684\n+scaffold_1\tJGI\tCDS\t6069080\t6069105\t.\t-\t0\tParent=1090684\n+scaffold_1\tJGI\tCDS\t6069143\t6069420\t.\t-\t2\tParent=1090684\n+scaffold_1\tJGI\tCDS\t6069507\t6069611\t.\t-\t2\tParent=1090684\n+scaffold_1\tJGI\tCDS\t6069662\t6069669\t.\t-\t1\tParent=1090684\n+scaffold_1\tJGI\tCDS\t6069778\t6069797\t.\t-\t0\tParent=1090684\n+scaffold_1\tJGI\tCDS\t6069836\t6069874\t.\t-\t0\tParent=1090684\n+scaffold_1\tJGI\tthree_prime_UTR\t6068472\t6068472\t.\t-\t.\tParent=1090684\n+scaffold_1\tJGI\texon\t6068472\t6068941\t.\t-\t.\tParent=1090684\n+scaffold_1\tJGI\texon\t6068976\t6069014\t.\t-\t.\tParent=1090684\n+scaffold_1\tJGI\texon\t6069080\t6069105\t.\t-\t.\tParent=1090684\n+scaffold_1\tJGI\texon\t6069143\t6069420\t.\t-\t.\tParent=1090684\n+scaffold_1\tJGI\texon\t6069507\t6069611\t.\t-\t.\tParent=1090684\n+scaffold_1\tJGI\texon\t6069662\t6069669\t.\t-\t.\tParent=1090684\n+scaffold_1\tJGI\texon\t6069778\t6069797\t.\t-\t.\tParent=1090684\n+scaffold_1\tJGI\texon\t6069836\t6069874\t.\t-\t.\tParent=1090684\n+scaffold_1\tJGI\tgene\t6222830\t6223956\t.\t-\t.\tID=fgenesh2_kg.1_#_199_#_Contig10010;Name=fgenesh2_kg.1_#_199_#_Contig10010\n+scaffold_1\tJGI\tmRNA\t6222830\t6223956\t.\t-\t.\tID=1059305;Parent=fgenesh2_kg.1_#_199_#_Contig10010\n+scaffold_1\tJGI\tfive_prime_UTR\t6223850\t6223956\t.\t-\t.\tParent=1059305\n+scaffold_1\tJGI\tCDS\t6223123\t6223377\t.\t-\t0\tParent=1059305\n+scaffold_1\tJGI\tCDS\t6223466\t6223513\t.\t-\t0\tParent=1059305\n+scaffold_1\tJGI\tCDS\t6223599\t6223636\t.\t-\t2\tParent=1059305\n+scaffold_1\tJGI\tCDS\t6223717\t6223849\t.\t-\t0\tParent=1059305\n+scaffold_1\tJGI\tthree_prime_UTR\t6222830\t6223122\t.\t-\t.\tParent=1059305\n+scaffold_1\tJGI\texon\t6222830\t6223377\t.\t-\t.\tParent=1059305\n+scaffold_1\tJGI\texon\t6223466\t6223513\t.\t-\t.\tParent=1059305\n+scaffold_1\tJGI\texon\t6223599\t6223636\t.\t-\t.\tParent=1059305\n+scaffold_1\tJGI\texon\t6223717\t6223956\t.\t-\t.\tParent=1059305\n+scaffold_1\tJGI\tgene\t6194330\t6196070\t.\t+\t.\tID=fgenesh2_kg.1_#_188_#_Contig951;Name=fgenesh2_kg.1_#_188_#_Contig951\n+scaffold_1\tJGI\tmRNA\t6194330\t6196070\t.\t+\t.\tID=1059294;Parent=fgenesh2_kg.1_#_188_#_Contig951\n+scaffold_1\tJGI\tfive_prime_UTR\t6194330\t6194523\t.\t+\t.\tParent=1059294\n+scaffold_1\tJGI\tCDS\t6194524\t6194757\t.\t+\t0\tParent=1059294\n+scaffold_1\tJGI\tCDS\t6194844\t6195164\t.\t+\t0\tParent=1059294\n+scaffold_1\tJGI\tthree_prime_UTR\t6195165\t6195696\t.\t+\t.\tParent=1059294\n+scaffold_1\tJGI\tthree_prime_UTR\t6195774\t6196070\t.\t+\t.\tParent=1059294\n+scaffold_1\tJGI\texon\t6194330\t6194757\t.\t+\t.\tParent=1059294\n+scaffold_1\tJGI\texon\t6194844\t6195696\t.\t+\t.\tParent=1059294\n+scaffold_1\tJGI\texon\t6195774\t6196070\t.\t+\t.\tParent=1059294\n+scaffold_1\tJGI\tgene\t4208772\t4210540\t.\t-\t.\tID=gm1.273_g;Name=gm1.273_g\n+scaffold_1\tJGI\tmRNA\t4208772\t4210540\t.\t-\t.\tID=204924;Parent=gm1.273_g\n+scaffold_1\tJGI\tCDS\t4208772\t4208899\t.\t-\t2\tParent=204924\n+scaffold_1\tJGI\tCDS\t4208964\t4209686\t.\t-\t2\tParent=204924\n+scaffold_1\tJGI\tCDS\t4209714\t4209734\t.\t-\t2\tParent=204924\n+scaffold_1\tJGI\tCDS\t4209782\t4210451\t.\t-\t0\tParent=204924\n+scaffold_1\tJGI\tCDS\t4210511\t4210540\t.\t-\t0\tParent=204924\n+scaffold_1\tJGI\texon\t4208772\t4208899\t.\t-\t.\tParent=204924\n+scaffold_1\tJGI\texon\t4208964\t4209686\t.\t-\t.\tParent=204924\n+scaffold_1\tJGI\texon\t4209714\t4209734\t.\t-\t.\tParent=204924\n+scaffold_1\tJGI\texon\t4209782\t4210451\t.\t-\t.\tParent=204924\n+scaffold_1\tJGI\texon\t4210511\t4210540\t.\t-\t.\tParent=204924\n+scaffold_1\tJGI\tgene\t5585408\t5590273\t.\t+\t.\tID=estExt_fgenesh2_pm.C_10049;Name=estExt_fgenesh2_pm.C_10049\n+scaffold_1\tJGI\tmRNA\t5585408\t5590273\t.\t+\t.\tID=1073299;Parent=estExt_fgenesh2_pm.C_10049\n+scaffold_1\tJGI\tfive_prime_UTR\t5585408\t5585504\t.\t+\t.\tParent=1073299\n+scaffold_1\tJGI\tCDS\t5585505\t5585741\t.\t+\t0\tParent=1073299\n+scaffold_1\tJGI\tCDS\t5585815\t5586858\t.\t+\t0\tParent=1073299\n+scaffold_1\tJGI\tthree_prime_UTR\t5586859\t5587647\t.\t+\t.\tParent=1073299\n+scaffold_1\tJGI\tthree_prime_UTR\t5587746\t5590273\t.\t+\t.\tParent=1073299\n+scaffold_1\tJGI\texon\t5585408\t5585741\t.\t+\t.\tParent=1073299\n+scaffold_1\tJGI\texon\t5585815\t5587647\t.\t+\t.\tParent=1073299\n+scaffold_1\tJGI\texon\t5587746\t5590273\t.\t+\t.\tParent=1073299\n'
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/JGI_genes.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/test-data/JGI_genes.gtf Wed Jun 11 16:29:25 2014 -0400
b
b'@@ -0,0 +1,3994 @@\n+scaffold_1\tJGI\texon\t7\t12\t.\t+\t.\tname "fgenesh2_pg.1_#_1"; transcriptId 1066417\n+scaffold_1\tJGI\tCDS\t7\t12\t.\t+\t0\tname "fgenesh2_pg.1_#_1"; proteinId 1066417; exonNumber 1\n+scaffold_1\tJGI\tstart_codon\t7\t9\t.\t+\t0\tname "fgenesh2_pg.1_#_1"\n+scaffold_1\tJGI\texon\t363\t902\t.\t+\t.\tname "fgenesh2_pg.1_#_1"; transcriptId 1066417\n+scaffold_1\tJGI\tCDS\t363\t902\t.\t+\t0\tname "fgenesh2_pg.1_#_1"; proteinId 1066417; exonNumber 2\n+scaffold_1\tJGI\texon\t954\t1160\t.\t+\t.\tname "fgenesh2_pg.1_#_1"; transcriptId 1066417\n+scaffold_1\tJGI\tCDS\t954\t1160\t.\t+\t0\tname "fgenesh2_pg.1_#_1"; proteinId 1066417; exonNumber 3\n+scaffold_1\tJGI\tstop_codon\t1158\t1160\t.\t+\t0\tname "fgenesh2_pg.1_#_1"\n+scaffold_1\tJGI\texon\t17310\t18075\t.\t-\t.\tname "estExt_Genewise2Plus.C_10002"; transcriptId 1045566\n+scaffold_1\tJGI\tCDS\t17597\t18075\t.\t-\t2\tname "estExt_Genewise2Plus.C_10002"; proteinId 1045566; exonNumber 3\n+scaffold_1\tJGI\tstop_codon\t17597\t17599\t.\t-\t0\tname "estExt_Genewise2Plus.C_10002"\n+scaffold_1\tJGI\texon\t18135\t18268\t.\t-\t.\tname "estExt_Genewise2Plus.C_10002"; transcriptId 1045566\n+scaffold_1\tJGI\tCDS\t18135\t18268\t.\t-\t1\tname "estExt_Genewise2Plus.C_10002"; proteinId 1045566; exonNumber 2\n+scaffold_1\tJGI\texon\t18353\t19188\t.\t-\t.\tname "estExt_Genewise2Plus.C_10002"; transcriptId 1045566\n+scaffold_1\tJGI\tCDS\t18353\t19188\t.\t-\t0\tname "estExt_Genewise2Plus.C_10002"; proteinId 1045566; exonNumber 1\n+scaffold_1\tJGI\tstart_codon\t19186\t19188\t.\t-\t0\tname "estExt_Genewise2Plus.C_10002"\n+scaffold_1\tJGI\texon\t29168\t29186\t.\t-\t.\tname "estExt_Genemark2.C_10004"; transcriptId 1090574\n+scaffold_1\tJGI\texon\t31979\t32005\t.\t-\t.\tname "estExt_Genemark2.C_10004"; transcriptId 1090574\n+scaffold_1\tJGI\texon\t32085\t32211\t.\t-\t.\tname "estExt_Genemark2.C_10004"; transcriptId 1090574\n+scaffold_1\tJGI\tCDS\t32170\t32211\t.\t-\t0\tname "estExt_Genemark2.C_10004"; proteinId 1090574; exonNumber 3\n+scaffold_1\tJGI\tstop_codon\t32170\t32172\t.\t-\t0\tname "estExt_Genemark2.C_10004"\n+scaffold_1\tJGI\texon\t32249\t32298\t.\t-\t.\tname "estExt_Genemark2.C_10004"; transcriptId 1090574\n+scaffold_1\tJGI\tCDS\t32249\t32298\t.\t-\t2\tname "estExt_Genemark2.C_10004"; proteinId 1090574; exonNumber 2\n+scaffold_1\tJGI\texon\t39594\t39912\t.\t-\t.\tname "estExt_Genemark2.C_10004"; transcriptId 1090574\n+scaffold_1\tJGI\tCDS\t39594\t39912\t.\t-\t0\tname "estExt_Genemark2.C_10004"; proteinId 1090574; exonNumber 1\n+scaffold_1\tJGI\texon\t48024\t49977\t.\t-\t.\tname "fgenesh2_kg.1_#_3_#_Contig2893"; transcriptId 1059109\n+scaffold_1\tJGI\tCDS\t48459\t49977\t.\t-\t1\tname "fgenesh2_kg.1_#_3_#_Contig2893"; proteinId 1059109; exonNumber 4\n+scaffold_1\tJGI\tstop_codon\t48459\t48461\t.\t-\t0\tname "fgenesh2_kg.1_#_3_#_Contig2893"\n+scaffold_1\tJGI\texon\t50732\t50830\t.\t-\t.\tname "fgenesh2_kg.1_#_3_#_Contig2893"; transcriptId 1059109\n+scaffold_1\tJGI\tCDS\t50732\t50830\t.\t-\t1\tname "fgenesh2_kg.1_#_3_#_Contig2893"; proteinId 1059109; exonNumber 3\n+scaffold_1\tJGI\texon\t50924\t51099\t.\t-\t.\tname "fgenesh2_kg.1_#_3_#_Contig2893"; transcriptId 1059109\n+scaffold_1\tJGI\tCDS\t50924\t51099\t.\t-\t0\tname "fgenesh2_kg.1_#_3_#_Contig2893"; proteinId 1059109; exonNumber 2\n+scaffold_1\tJGI\texon\t51227\t51548\t.\t-\t.\tname "fgenesh2_kg.1_#_3_#_Contig2893"; transcriptId 1059109\n+scaffold_1\tJGI\tCDS\t51227\t51403\t.\t-\t0\tname "fgenesh2_kg.1_#_3_#_Contig2893"; proteinId 1059109; exonNumber 1\n+scaffold_1\tJGI\tstart_codon\t51401\t51403\t.\t-\t0\tname "fgenesh2_kg.1_#_3_#_Contig2893"\n+scaffold_1\tJGI\texon\t62584\t62802\t.\t+\t.\tname "e_gw2.1.1272.1"; transcriptId 1026707\n+scaffold_1\tJGI\tCDS\t62584\t62802\t.\t+\t0\tname "e_gw2.1.1272.1"; proteinId 1026707; exonNumber 1\n+scaffold_1\tJGI\tstop_codon\t62800\t62802\t.\t+\t0\tname "e_gw2.1.1272.1"\n+scaffold_1\tJGI\texon\t82115\t82306\t.\t-\t.\tname "e_gw2.1.1271.1"; transcriptId 1026712\n+scaffold_1\tJGI\tCDS\t82115\t82306\t.\t-\t0\tname "e_gw2.1.1271.1"; proteinId 1026712; exonNumber 1\n+scaffold_1\tJGI\tstop_codon\t82115\t82117\t.\t-\t0\tname "e_gw2.1.1271.1"\n+scaffold_1\tJGI\texon\t120058\t120311\t.\t-\t.\tname "fgenesh2_kg.1_#_6_#_Contig4211"; transcriptId 1059112\n+scaffold_1\tJGI\tCDS\t120203\t120311\t.\t-\t1\tname "fgenesh2_kg.1_#_6_#_Contig4211"; proteinId 1059112; exonNumber 5\n+scaffol'..b'n\t6530335\t6530597\t.\t-\t.\tname "estExt_fgenesh2_pg.C_10280"; transcriptId 1078583\n+scaffold_1\tJGI\tCDS\t6530335\t6530597\t.\t-\t1\tname "estExt_fgenesh2_pg.C_10280"; proteinId 1078583; exonNumber 2\n+scaffold_1\tJGI\texon\t6530713\t6530971\t.\t-\t.\tname "estExt_fgenesh2_pg.C_10280"; transcriptId 1078583\n+scaffold_1\tJGI\tCDS\t6530713\t6530909\t.\t-\t0\tname "estExt_fgenesh2_pg.C_10280"; proteinId 1078583; exonNumber 1\n+scaffold_1\tJGI\tstart_codon\t6530907\t6530909\t.\t-\t0\tname "estExt_fgenesh2_pg.C_10280"\n+scaffold_1\tJGI\texon\t6530961\t6531797\t.\t-\t.\tname "estExt_fgenesh2_pg.C_10281"; transcriptId 1078584\n+scaffold_1\tJGI\tCDS\t6531507\t6531797\t.\t-\t0\tname "estExt_fgenesh2_pg.C_10281"; proteinId 1078584; exonNumber 2\n+scaffold_1\tJGI\tstop_codon\t6531507\t6531509\t.\t-\t0\tname "estExt_fgenesh2_pg.C_10281"\n+scaffold_1\tJGI\texon\t6531869\t6532149\t.\t-\t.\tname "estExt_fgenesh2_pg.C_10281"; transcriptId 1078584\n+scaffold_1\tJGI\tCDS\t6531869\t6532027\t.\t-\t0\tname "estExt_fgenesh2_pg.C_10281"; proteinId 1078584; exonNumber 1\n+scaffold_1\tJGI\tstart_codon\t6532025\t6532027\t.\t-\t0\tname "estExt_fgenesh2_pg.C_10281"\n+scaffold_1\tJGI\texon\t6532672\t6535468\t.\t+\t.\tname "estExt_Genemark2.C_10601"; transcriptId 1090763\n+scaffold_1\tJGI\tCDS\t6532672\t6535468\t.\t+\t0\tname "estExt_Genemark2.C_10601"; proteinId 1090763; exonNumber 1\n+scaffold_1\tJGI\tstart_codon\t6532672\t6532674\t.\t+\t0\tname "estExt_Genemark2.C_10601"\n+scaffold_1\tJGI\texon\t6535501\t6536357\t.\t+\t.\tname "estExt_Genemark2.C_10601"; transcriptId 1090763\n+scaffold_1\tJGI\tCDS\t6535501\t6536357\t.\t+\t2\tname "estExt_Genemark2.C_10601"; proteinId 1090763; exonNumber 2\n+scaffold_1\tJGI\texon\t6536382\t6536530\t.\t+\t.\tname "estExt_Genemark2.C_10601"; transcriptId 1090763\n+scaffold_1\tJGI\tCDS\t6536382\t6536530\t.\t+\t0\tname "estExt_Genemark2.C_10601"; proteinId 1090763; exonNumber 3\n+scaffold_1\tJGI\texon\t6536577\t6536589\t.\t+\t.\tname "estExt_Genemark2.C_10601"; transcriptId 1090763\n+scaffold_1\tJGI\tCDS\t6536577\t6536589\t.\t+\t1\tname "estExt_Genemark2.C_10601"; proteinId 1090763; exonNumber 4\n+scaffold_1\tJGI\tstop_codon\t6536587\t6536589\t.\t+\t0\tname "estExt_Genemark2.C_10601"\n+scaffold_1\tJGI\texon\t6536649\t6537413\t.\t-\t.\tname "fgenesh2_kg.1_#_289_#_Contig6235"; transcriptId 1059395\n+scaffold_1\tJGI\tCDS\t6536793\t6537413\t.\t-\t0\tname "fgenesh2_kg.1_#_289_#_Contig6235"; proteinId 1059395; exonNumber 2\n+scaffold_1\tJGI\tstop_codon\t6536793\t6536795\t.\t-\t0\tname "fgenesh2_kg.1_#_289_#_Contig6235"\n+scaffold_1\tJGI\texon\t6537494\t6537976\t.\t-\t.\tname "fgenesh2_kg.1_#_289_#_Contig6235"; transcriptId 1059395\n+scaffold_1\tJGI\tCDS\t6537494\t6537910\t.\t-\t0\tname "fgenesh2_kg.1_#_289_#_Contig6235"; proteinId 1059395; exonNumber 1\n+scaffold_1\tJGI\tstart_codon\t6537908\t6537910\t.\t-\t0\tname "fgenesh2_kg.1_#_289_#_Contig6235"\n+scaffold_1\tJGI\texon\t6537938\t6537972\t.\t+\t.\tname "gm1.603_g"; transcriptId 205254\n+scaffold_1\tJGI\tCDS\t6537938\t6537972\t.\t+\t0\tname "gm1.603_g"; proteinId 205254; exonNumber 1\n+scaffold_1\tJGI\tstart_codon\t6537938\t6537940\t.\t+\t0\tname "gm1.603_g"\n+scaffold_1\tJGI\texon\t6538014\t6538046\t.\t+\t.\tname "gm1.603_g"; transcriptId 205254\n+scaffold_1\tJGI\tCDS\t6538014\t6538046\t.\t+\t1\tname "gm1.603_g"; proteinId 205254; exonNumber 2\n+scaffold_1\tJGI\texon\t6538107\t6538156\t.\t+\t.\tname "gm1.603_g"; transcriptId 205254\n+scaffold_1\tJGI\tCDS\t6538107\t6538156\t.\t+\t1\tname "gm1.603_g"; proteinId 205254; exonNumber 3\n+scaffold_1\tJGI\texon\t6538201\t6538314\t.\t+\t.\tname "gm1.603_g"; transcriptId 205254\n+scaffold_1\tJGI\tCDS\t6538201\t6538314\t.\t+\t2\tname "gm1.603_g"; proteinId 205254; exonNumber 4\n+scaffold_1\tJGI\texon\t6538367\t6538787\t.\t+\t.\tname "gm1.603_g"; transcriptId 205254\n+scaffold_1\tJGI\tCDS\t6538367\t6538787\t.\t+\t2\tname "gm1.603_g"; proteinId 205254; exonNumber 5\n+scaffold_1\tJGI\texon\t6538843\t6539173\t.\t+\t.\tname "gm1.603_g"; transcriptId 205254\n+scaffold_1\tJGI\tCDS\t6538843\t6539173\t.\t+\t1\tname "gm1.603_g"; proteinId 205254; exonNumber 6\n+scaffold_1\tJGI\texon\t6539217\t6539489\t.\t+\t.\tname "gm1.603_g"; transcriptId 205254\n+scaffold_1\tJGI\tCDS\t6539217\t6539489\t.\t+\t0\tname "gm1.603_g"; proteinId 205254; exonNumber 7\n+scaffold_1\tJGI\tstop_codon\t6539487\t6539489\t.\t+\t0\tname "gm1.603_g"\n'
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/MB7_3R.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/test-data/MB7_3R.bed Wed Jun 11 16:29:25 2014 -0400
b
b'@@ -0,0 +1,241 @@\n+3R\t60868\t66780\tCG1106-RD\t2\t+\t60868\t66780\t0\t10\t179,61,43,252,193,75,196,1706,126,181,\t0,942,1093,1407,2479,3157,3424,3673,5443,5732,\n+3R\t60868\t66780\tCG1106-RF\t3\t+\t60868\t66780\t0\t8\t179,223,193,75,196,1706,126,181,\t0,1436,2479,3157,3424,3673,5443,5732,\n+3R\t485305\t530979\tCG31531-RC\t2\t+\t485305\t530979\t0\t9\t311,161,54,59,352,207,143,157,4100,\t0,656,11214,21196,37608,38466,40134,40588,41575,\n+3R\t62515\t66780\tCG1106-RA\t1\t+\t62515\t66780\t0\t7\t147,193,75,196,1706,126,181,\t0,832,1510,1777,2026,3796,4085,\n+3R\t60868\t66780\tCG1106-RB\t1\t+\t60868\t66780\t0\t8\t179,252,193,75,196,1706,126,181,\t0,1407,2479,3157,3424,3673,5443,5732,\n+3R\t44184\t45852\tCG31516.a\t31\t-\t44184\t45852\t0\t1\t1669,\t0,\n+3R\t60868\t66780\tCG1106-RH\t1\t+\t60868\t66780\t0\t9\t179,61,252,193,75,196,1706,126,181,\t0,942,1407,2479,3157,3424,3673,5443,5732,\n+3R\t612767\t627445\tCG42574.b\t3\t-\t612767\t627445\t0\t9\t1243,202,189,1836,989,1105,1847,987,511,\t0,1329,1636,1888,3831,4913,6173,12647,14168,\n+3R\t612767\t627445\tCG42574.a\t3\t-\t612767\t627445\t0\t11\t1243,202,189,1836,989,1105,1847,87,579,987,511,\t0,1329,1636,1888,3831,4913,6173,8692,9444,12647,14168,\n+3R\t538609\t539918\tCG9769-RA.3d\t31\t-\t538609\t539918\t0\t1\t1310,\t0,\n+3R\t94943\t103515\tCG9766-RB\t1\t-\t94943\t103515\t0\t4\t568,205,246,165,\t0,627,888,8408,\n+3R\t17136\t21871\tDMG5-MB6.chr3R.1.002.a.a\t2\t+\t17136\t21871\t0\t6\t116,95,486,540,168,281,\t0,2817,2978,3535,4231,4455,\n+3R\t44179\t45852\tCG31516-RA\t34\t-\t44179\t45852\t0\t1\t1674,\t0,\n+3R\t228771\t232914\tCG14648-RA\t1\t+\t228771\t232914\t0\t6\t232,171,719,69,211,1326,\t0,1420,1652,2431,2552,2818,\n+3R\t1053010\t1057940\tCG10229-RA\t1\t-\t1053010\t1057940\t0\t6\t903,560,122,604,222,162,\t0,989,2419,2734,3705,4769,\n+3R\t310763\t313185\tCG1078-RA\t1\t+\t310763\t313185\t0\t4\t1491,93,71,516,\t0,1555,1780,1907,\n+3R\t15388\t16170\tCG18090-RA\t34\t-\t15388\t16170\t0\t1\t783,\t0,\n+3R\t74439\t76518\tCG14643-RA\t1\t-\t74439\t76518\t0\t4\t301,655,608,62,\t0,474,1190,2018,\n+3R\t117996\t120558\tCG9780-RA\t1\t-\t117996\t120558\t0\t2\t1094,1084,\t0,1479,\n+3R\t1090664\t1094197\tCG31543-RA\t1\t+\t1090664\t1094197\t0\t5\t696,280,81,140,1014,\t0,1811,2179,2321,2520,\n+3R\t1082763\t1094197\tCG31543-RC\t1\t+\t1082763\t1094197\t0\t5\t1081,280,81,140,1014,\t0,9712,10080,10222,10421,\n+3R\t1063223\t1077468\tCG12163-RA\t1\t-\t1063223\t1077468\t0\t6\t656,182,744,128,417,323,\t0,712,953,1991,6337,13923,\n+3R\t470349\t471316\tCG9771-RA\t1\t-\t470349\t471316\t0\t2\t450,414,\t0,554,\n+3R\t92676\t94166\tCG1092.a\t1\t+\t92676\t94166\t0\t2\t252,1184,\t0,307,\n+3R\t306534\t309943\tCG14651-RB\t34\t+\t306534\t309943\t0\t1\t3410,\t0,\n+3R\t160820\t161237\tCG14645-RA\t34\t+\t160820\t161237\t0\t1\t418,\t0,\n+3R\t161164\t163374\tCG9772-RD\t1\t-\t161164\t163374\t0\t4\t288,160,400,1184,\t0,344,567,1027,\n+3R\t255639\t259652\tCG9805-RA\t1\t-\t255639\t259652\t0\t4\t252,2239,1085,232,\t0,311,2600,3782,\n+3R\t161143\t163408\tCG9772-RA\t2\t-\t161143\t163408\t0\t5\t309,160,400,874,139,\t0,365,588,1048,2127,\n+3R\t23030\t30295\tCG12582-RB\t1\t+\t23030\t30295\t0\t8\t564,514,97,289,480,422,381,361,\t0,951,1522,1697,4535,5068,6474,6905,\n+3R\t22997\t30295\tCG12582-RA\t1\t+\t22997\t30295\t0\t9\t288,135,514,97,289,480,422,381,361,\t0,462,984,1555,1730,4568,5101,6507,6938,\n+3R\t161143\t165287\tCG9772-RB\t2\t-\t161143\t165287\t0\t5\t309,160,400,874,199,\t0,365,588,1048,3946,\n+3R\t15414\t15982\tCG18090.a\t31\t-\t15414\t15982\t0\t1\t569,\t0,\n+3R\t135364\t136669\tCG9779-RA\t1\t-\t135364\t136669\t0\t3\t717,241,161,\t0,772,1145,\n+3R\t438597\t459031\tCG1056-RA\t2\t+\t438597\t459031\t0\t7\t476,1116,225,199,444,393,1502,\t0,9634,14098,14384,15031,17254,18933,\n+3R\t467696\t470358\tCG18271-RB\t1\t+\t467696\t470358\t0\t2\t35,1093,\t0,1570,\n+3R\t1045390\t1047270\tCG1116.a\t3\t+\t1045390\t1047270\t0\t6\t188,218,272,150,577,157,\t0,248,531,868,1081,1724,\n+3R\t145412\t151817\tCG9795-RB\t3\t+\t145412\t151817\t0\t10\t167,203,392,203,212,174,85,82,589,399,\t0,2155,3541,3997,4263,4647,4898,5040,5183,6007,\n+3R\t145412\t151817\tCG9795-RC\t3\t+\t145412\t151817\t0\t10\t201,203,392,203,212,174,85,82,589,399,\t0,2155,3541,3997,4263,4647,4898,5040,5183,6007,\n+3R\t248610\t255054\tCG9809-RD\t1\t-\t248610\t255054\t0\t7\t777,146,1832,795,106,119,358,\t0,840,1045,2943,3835,3994,6087,\n+3R\t72744\t74040\tCG14639-RA\t1\t+\t72744\t74040\t0\t2\t63,1175,\t0,122,\n+3R\t248220\t255054\tCG9809-RB\t1\t-\t248220\t255054\t0\t7\t1167,146,1832,'..b',1752,\t0,2490,4042,7888,14945,15384,17304,19606,\n+3R\t107427\t127263\tCG32490.d\t3\t+\t107427\t127263\t0\t8\t167,161,181,12,158,133,1098,1752,\t0,969,2521,6367,13424,13863,15783,18085,\n+3R\t107427\t127263\tCG32490.c\t3\t+\t107427\t127263\t0\t8\t62,161,181,12,158,133,1098,1752,\t0,969,2521,6367,13424,13863,15783,18085,\n+3R\t107627\t127263\tCG32490.b\t3\t+\t107627\t127263\t0\t8\t99,161,181,12,158,133,1098,1752,\t0,769,2321,6167,13224,13663,15583,17885,\n+3R\t107886\t127263\tCG32490.a\t3\t+\t107886\t127263\t0\t8\t151,161,181,12,158,133,1098,1752,\t0,510,2062,5908,12965,13404,15324,17626,\n+3R\t340352\t383789\tCG34357-RB\t1\t-\t340352\t383789\t0\t6\t490,100,377,885,311,126,\t0,8358,19519,23175,42066,43312,\n+3R\t160820\t161223\tCG14645.a\t31\t+\t160820\t161223\t0\t1\t404,\t0,\n+3R\t1079070\t1081125\tCG1113-RA\t3\t+\t1079070\t1081125\t0\t3\t322,1045,430,\t0,527,1626,\n+3R\t23013\t30295\tCG12582.a\t3\t+\t23013\t30295\t0\t9\t272,135,552,97,289,480,422,381,361,\t0,446,930,1539,1714,4552,5085,6491,6922,\n+3R\t403661\t404368\tCG32945.a\t34\t-\t403661\t404368\t0\t1\t708,\t0,\n+3R\t107427\t128309\tCG32490.i\t2\t+\t107427\t128309\t0\t6\t167,161,181,167,133,62,\t0,969,2521,13415,13863,20821,\n+3R\t107427\t127263\tCG32490.h\t3\t+\t107427\t127263\t0\t7\t167,161,181,167,133,1098,1752,\t0,969,2521,13415,13863,15783,18085,\n+3R\t1045390\t1047270\tCG1116-RA\t1\t+\t1045390\t1047270\t0\t6\t188,215,272,150,577,157,\t0,251,531,868,1081,1724,\n+3R\t1045390\t1047270\tCG1116-RB\t3\t+\t1045390\t1047270\t0\t5\t466,272,150,577,157,\t0,531,868,1081,1724,\n+3R\t60868\t66781\tCG1106.d\t3\t+\t60868\t66781\t0\t7\t179,223,193,75,196,1896,182,\t0,1436,2479,3157,3424,3673,5732,\n+3R\t60868\t66781\tCG1106.e\t3\t+\t60868\t66781\t0\t6\t179,193,75,196,1896,182,\t0,2479,3157,3424,3673,5732,\n+3R\t64259\t66780\tCG1106.f\t3\t+\t64259\t66780\t0\t4\t229,1706,126,181,\t0,282,2052,2341,\n+3R\t64259\t66781\tCG1106.g\t3\t+\t64259\t66781\t0\t3\t229,1896,182,\t0,282,2341,\n+3R\t60942\t66780\tCG1106.a\t2\t+\t60942\t66780\t0\t8\t105,387,193,75,196,1706,126,181,\t0,1333,2405,3083,3350,3599,5369,5658,\n+3R\t60868\t66781\tCG1106.b\t3\t+\t60868\t66781\t0\t8\t179,61,252,193,75,196,1896,182,\t0,942,1407,2479,3157,3424,3673,5732,\n+3R\t60868\t66780\tCG1106.c\t3\t+\t60868\t66780\t0\t7\t179,193,75,196,1706,126,181,\t0,2479,3157,3424,3673,5443,5732,\n+3R\t560136\t574777\tCG9765-RA\t1\t-\t560136\t574777\t0\t10\t933,150,162,105,72,62,308,2331,202,265,\t0,994,1228,2609,2767,2900,3059,7438,10050,14377,\n+3R\t560132\t572136\tCG9765-RC\t1\t-\t560132\t572136\t0\t10\t937,150,162,105,72,65,266,2331,202,618,\t0,998,1232,2613,2771,2904,3063,7442,10054,11387,\n+3R\t560132\t572136\tCG9765-RD\t3\t-\t560132\t572136\t0\t10\t937,150,162,105,72,65,266,2331,202,573,\t0,998,1232,2613,2771,2904,3063,7442,10054,11432,\n+3R\t233159\t241835\tCG32944-RE\t2\t-\t233159\t241835\t0\t8\t567,182,126,127,71,166,56,431,\t0,622,5444,5804,5999,7096,7323,8246,\n+3R\t560132\t565269\tCG9765-RF\t2\t-\t560132\t565269\t0\t8\t937,150,162,105,72,65,266,37,\t0,998,1232,2613,2771,2904,3063,5101,\n+3R\t216097\t217839\tCG14646-RA\t1\t+\t216097\t217839\t0\t2\t684,1007,\t0,736,\n+3R\t263102\t267050\tCG14650-RA\t1\t+\t263102\t267050\t0\t6\t2380,154,125,103,293,582,\t0,2437,2647,2841,3013,3367,\n+3R\t538609\t540025\tCG9769.a\t34\t-\t538609\t540025\t0\t1\t1417,\t0,\n+3R\t655633\t656232\tCG14658-RA\t34\t-\t655633\t656232\t0\t1\t600,\t0,\n+3R\t145412\t151817\tCG9795-RA\t1\t+\t145412\t151817\t0\t11\t201,203,148,392,203,212,174,85,82,589,399,\t0,2155,2658,3541,3997,4263,4647,4898,5040,5183,6007,\n+3R\t485305\t530979\tCG31531.a\t2\t+\t485305\t530979\t0\t9\t94,164,54,59,352,207,143,157,4100,\t0,653,11214,21196,37608,38466,40134,40588,41575,\n+3R\t226212\t227739\tCG14647.a\t1\t+\t226212\t227739\t0\t3\t649,132,417,\t0,864,1111,\n+3R\t204401\t206932\tCG11739-RA\t2\t+\t204401\t206932\t0\t7\t331,169,150,118,162,128,508,\t0,771,996,1249,1430,1825,2024,\n+3R\t205029\t206932\tCG11739-RB\t1\t+\t205029\t206932\t0\t6\t312,150,118,162,128,508,\t0,368,621,802,1197,1396,\n+3R\t204386\t206932\tCG11739-RC\t1\t+\t204386\t206932\t0\t7\t66,169,150,118,162,128,508,\t0,786,1011,1264,1445,1840,2039,\n+3R\t204644\t206932\tCG11739-RD\t2\t+\t204644\t206932\t0\t7\t88,169,150,118,162,128,508,\t0,528,753,1006,1187,1582,1781,\n+3R\t639058\t641751\tCG1129-RB\t1\t+\t639058\t641751\t0\t4\t273,306,535,743,\t0,335,903,1951,\n+3R\t639076\t641751\tCG1129-RA\t1\t+\t639076\t641751\t0\t5\t26,90,306,535,743,\t0,165,317,885,1933,\n'
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/MB7_3R.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/test-data/MB7_3R.gff3 Wed Jun 11 16:29:25 2014 -0400
b
b'@@ -0,0 +1,3971 @@\n+##gff-version 3\n+3R\tMB7\tgene\t361\t10200\t0\t+\t.\tID=CG12581;Name=CG12581\n+3R\tMB7\tmRNA\t361\t10200\t3\t+\t.\tID=CG12581-RB;Parent=CG12581;Name=CG12581-RB\n+3R\tMB7\texon\t361\t509\t0\t+\t.\tParent=CG12581-RB\n+3R\tMB7\texon\t578\t1913\t0\t+\t.\tParent=CG12581-RB\n+3R\tMB7\texon\t7784\t8649\t0\t+\t.\tParent=CG12581-RB\n+3R\tMB7\texon\t9439\t10200\t0\t+\t.\tParent=CG12581-RB\n+3R\tMB7\tfive_prime_UTR\t361\t509\t0\t+\t.\tParent=CG12581-RB\n+3R\tMB7\tfive_prime_UTR\t578\t1114\t0\t+\t.\tParent=CG12581-RB\n+3R\tMB7\tstart_codon\t1115\t1117\t0\t+\t0\tParent=CG12581-RB\n+3R\tMB7\tCDS\t1115\t1913\t0\t+\t0\tParent=CG12581-RB\n+3R\tMB7\tCDS\t7784\t8649\t0\t+\t2\tParent=CG12581-RB\n+3R\tMB7\tCDS\t9439\t9771\t0\t+\t0\tParent=CG12581-RB\n+3R\tMB7\tstop_codon\t9769\t9771\t0\t+\t0\tParent=CG12581-RB\n+3R\tMB7\tthree_prime_UTR\t9772\t10200\t0\t+\t.\tParent=CG12581-RB\n+3R\tMB7\tmRNA\t380\t10200\t1\t+\t.\tID=CG12581-RA;Parent=CG12581;Name=CG12581-RA\n+3R\tMB7\texon\t380\t1913\t0\t+\t.\tParent=CG12581-RA\n+3R\tMB7\texon\t7784\t8649\t0\t+\t.\tParent=CG12581-RA\n+3R\tMB7\texon\t9439\t10200\t0\t+\t.\tParent=CG12581-RA\n+3R\tMB7\tfive_prime_UTR\t380\t1114\t0\t+\t.\tParent=CG12581-RA\n+3R\tMB7\tstart_codon\t1115\t1117\t0\t+\t0\tParent=CG12581-RA\n+3R\tMB7\tCDS\t1115\t1913\t0\t+\t0\tParent=CG12581-RA\n+3R\tMB7\tCDS\t7784\t8649\t0\t+\t2\tParent=CG12581-RA\n+3R\tMB7\tCDS\t9439\t9771\t0\t+\t0\tParent=CG12581-RA\n+3R\tMB7\tstop_codon\t9769\t9771\t0\t+\t0\tParent=CG12581-RA\n+3R\tMB7\tthree_prime_UTR\t9772\t10200\t0\t+\t.\tParent=CG12581-RA\n+3R\tMB7\tgene\t15388\t16170\t0\t-\t.\tID=CG18090;Name=CG18090\n+3R\tMB7\tmRNA\t15414\t15982\t31\t-\t.\tID=CG18090.a;Parent=CG18090;Name=CG18090.a\n+3R\tMB7\texon\t15414\t15982\t0\t-\t.\tParent=CG18090.a\n+3R\tMB7\tthree_prime_UTR\t15414\t15529\t0\t-\t.\tParent=CG18090.a\n+3R\tMB7\tstop_codon\t15530\t15532\t0\t-\t0\tParent=CG18090.a\n+3R\tMB7\tCDS\t15530\t15955\t0\t-\t0\tParent=CG18090.a\n+3R\tMB7\tstart_codon\t15953\t15955\t0\t-\t0\tParent=CG18090.a\n+3R\tMB7\tfive_prime_UTR\t15956\t15982\t0\t-\t.\tParent=CG18090.a\n+3R\tMB7\tmRNA\t15388\t16170\t34\t-\t.\tID=CG18090-RA;Parent=CG18090;Name=CG18090-RA\n+3R\tMB7\texon\t15388\t16170\t0\t-\t.\tParent=CG18090-RA\n+3R\tMB7\tthree_prime_UTR\t15388\t15529\t0\t-\t.\tParent=CG18090-RA\n+3R\tMB7\tstop_codon\t15530\t15532\t0\t-\t0\tParent=CG18090-RA\n+3R\tMB7\tCDS\t15530\t15955\t0\t-\t0\tParent=CG18090-RA\n+3R\tMB7\tstart_codon\t15953\t15955\t0\t-\t0\tParent=CG18090-RA\n+3R\tMB7\tfive_prime_UTR\t15956\t16170\t0\t-\t.\tParent=CG18090-RA\n+3R\tMB7\tgene\t17136\t21871\t0\t+\t.\tID=DMG5-MB6.chr3R.1.002.a;Name=DMG5-MB6.chr3R.1.002.a\n+3R\tMB7\tmRNA\t17136\t21871\t2\t+\t.\tID=DMG5-MB6.chr3R.1.002.a.a;Parent=DMG5-MB6.chr3R.1.002.a;Name=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\texon\t17136\t17251\t0\t+\t.\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\texon\t19953\t20047\t0\t+\t.\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\texon\t20114\t20599\t0\t+\t.\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\texon\t20671\t21210\t0\t+\t.\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\texon\t21367\t21534\t0\t+\t.\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\texon\t21591\t21871\t0\t+\t.\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\tstart_codon\t17136\t17138\t0\t+\t0\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\tCDS\t17136\t17251\t0\t+\t0\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\tCDS\t19953\t20047\t0\t+\t1\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\tCDS\t20114\t20599\t0\t+\t2\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\tCDS\t20671\t20759\t0\t+\t2\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\tstop_codon\t20757\t20759\t0\t+\t0\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\tthree_prime_UTR\t20760\t21210\t0\t+\t.\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\tthree_prime_UTR\t21367\t21534\t0\t+\t.\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\tthree_prime_UTR\t21591\t21871\t0\t+\t.\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\tgene\t22931\t30295\t0\t+\t.\tID=CG12582;Name=CG12582\n+3R\tMB7\tmRNA\t23013\t30295\t3\t+\t.\tID=CG12582.a;Parent=CG12582;Name=CG12582.a\n+3R\tMB7\texon\t23013\t23284\t0\t+\t.\tParent=CG12582.a\n+3R\tMB7\texon\t23459\t23593\t0\t+\t.\tParent=CG12582.a\n+3R\tMB7\texon\t23943\t24494\t0\t+\t.\tParent=CG12582.a\n+3R\tMB7\texon\t24552\t24648\t0\t+\t.\tParent=CG12582.a\n+3R\tMB7\texon\t24727\t25015\t0\t+\t.\tParent=CG12582.a\n+3R\tMB7\texon\t27565\t28044\t0\t+\t.\tParent=CG12582.a\n+3R\tMB7\texon\t28098\t28519\t0\t+\t.\tParent=CG12582.a\n+3R\tMB7\texon\t29504\t29884\t0\t+\t.\tParent=CG12582.a\n+3R\tMB7\texon\t29935\t30295\t0\t+\t.\tParent=CG12582.a\n+3R\tMB7\tfive_prime_UTR\t23013\t23284\t0\t+\t.\tParent=CG125'..b'517\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\texon\t1121579\t1121685\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\texon\t1121869\t1122357\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\texon\t1123924\t1124211\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\texon\t1125192\t1125295\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\texon\t1129833\t1129904\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\texon\t1138711\t1139219\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\texon\t1139660\t1140027\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\texon\t1148710\t1148847\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\texon\t1149387\t1149566\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\tthree_prime_UTR\t1098665\t1099668\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\tstop_codon\t1099669\t1099671\t0\t-\t0\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1099669\t1099804\t0\t-\t1\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1099871\t1100040\t0\t-\t0\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1100457\t1100616\t0\t-\t1\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1100688\t1100809\t0\t-\t0\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1118362\t1118563\t0\t-\t1\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1118720\t1118882\t0\t-\t2\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1118941\t1119092\t0\t-\t1\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1119784\t1119956\t0\t-\t0\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1120028\t1120577\t0\t-\t1\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1121363\t1121517\t0\t-\t0\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1121579\t1121685\t0\t-\t2\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1121869\t1122357\t0\t-\t2\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1123924\t1124211\t0\t-\t2\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1125192\t1125295\t0\t-\t1\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1129833\t1129904\t0\t-\t1\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1138711\t1139219\t0\t-\t0\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1139660\t1139920\t0\t-\t0\tParent=CG32464-RB\n+3R\tMB7\tstart_codon\t1139918\t1139920\t0\t-\t0\tParent=CG32464-RB\n+3R\tMB7\tfive_prime_UTR\t1139921\t1140027\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\tfive_prime_UTR\t1148710\t1148847\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\tfive_prime_UTR\t1149387\t1149566\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\tmRNA\t1098665\t1149566\t3\t-\t.\tID=CG32464-RU;Parent=CG32464;Name=CG32464-RU\n+3R\tMB7\texon\t1098665\t1099804\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1099871\t1100040\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1100457\t1100616\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1100688\t1100809\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1118362\t1118563\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1118720\t1118882\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1118941\t1119092\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1119784\t1119956\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1120028\t1120577\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1121363\t1121517\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1121579\t1121685\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1121869\t1122357\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1123924\t1124211\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1125192\t1125295\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1138711\t1139219\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1139660\t1140027\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1148710\t1148847\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1149387\t1149566\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\tthree_prime_UTR\t1098665\t1099668\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\tstop_codon\t1099669\t1099671\t0\t-\t0\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1099669\t1099804\t0\t-\t1\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1099871\t1100040\t0\t-\t0\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1100457\t1100616\t0\t-\t1\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1100688\t1100809\t0\t-\t0\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1118362\t1118563\t0\t-\t1\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1118720\t1118882\t0\t-\t2\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1118941\t1119092\t0\t-\t1\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1119784\t1119956\t0\t-\t0\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1120028\t1120577\t0\t-\t1\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1121363\t1121517\t0\t-\t0\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1121579\t1121685\t0\t-\t2\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1121869\t1122357\t0\t-\t2\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1123924\t1124211\t0\t-\t2\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1125192\t1125295\t0\t-\t1\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1138711\t1139219\t0\t-\t0\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1139660\t1139920\t0\t-\t0\tParent=CG32464-RU\n+3R\tMB7\tstart_codon\t1139918\t1139920\t0\t-\t0\tParent=CG32464-RU\n+3R\tMB7\tfive_prime_UTR\t1139921\t1140027\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\tfive_prime_UTR\t1148710\t1148847\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\tfive_prime_UTR\t1149387\t1149566\t0\t-\t.\tParent=CG32464-RU\n'
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/UCSC_transcripts.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/test-data/UCSC_transcripts.gff3 Wed Jun 11 16:29:25 2014 -0400
b
b'@@ -0,0 +1,129 @@\n+##gff-version 3\n+chr1\thg19_ccdsGene\tgene\t896074\t900571\t.\t+\t.\tID=Gene:CCDS30550.1;Name=Gene:CCDS30550.1\n+chr1\thg19_ccdsGene\tmRNA\t896074\t900571\t0.000000\t+\t.\tID=Transcript:CCDS30550.1;Parent=Gene:CCDS30550.1\n+chr1\thg19_ccdsGene\tCDS\t896074\t896180\t.\t+\t0\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\tCDS\t896673\t896932\t.\t+\t1\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\tCDS\t897009\t897130\t.\t+\t2\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\tCDS\t897206\t897427\t.\t+\t0\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\tCDS\t897735\t897851\t.\t+\t0\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\tCDS\t898084\t898297\t.\t+\t0\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\tCDS\t898489\t898633\t.\t+\t2\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\tCDS\t898717\t898884\t.\t+\t1\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\tCDS\t899300\t899388\t.\t+\t1\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\tCDS\t899487\t899560\t.\t+\t2\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\tCDS\t899729\t899910\t.\t+\t0\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\tCDS\t900343\t900571\t.\t+\t1\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\texon\t896074\t896180\t.\t+\t.\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\texon\t896673\t896932\t.\t+\t.\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\texon\t897009\t897130\t.\t+\t.\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\texon\t897206\t897427\t.\t+\t.\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\texon\t897735\t897851\t.\t+\t.\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\texon\t898084\t898297\t.\t+\t.\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\texon\t898489\t898633\t.\t+\t.\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\texon\t898717\t898884\t.\t+\t.\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\texon\t899300\t899388\t.\t+\t.\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\texon\t899487\t899560\t.\t+\t.\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\texon\t899729\t899910\t.\t+\t.\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\texon\t900343\t900571\t.\t+\t.\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\tgene\t880074\t894620\t.\t-\t.\tID=Gene:CCDS3.1;Name=Gene:CCDS3.1\n+chr1\thg19_ccdsGene\tmRNA\t880074\t894620\t0.000000\t-\t.\tID=Transcript:CCDS3.1;Parent=Gene:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t880074\t880180\t.\t-\t2\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t880437\t880526\t.\t-\t2\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t880898\t881033\t.\t-\t0\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t881553\t881666\t.\t-\t0\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t881782\t881925\t.\t-\t0\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t883511\t883612\t.\t-\t0\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t883870\t883983\t.\t-\t0\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t886507\t886618\t.\t-\t1\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t887380\t887519\t.\t-\t0\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t887792\t887980\t.\t-\t0\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t888555\t888668\t.\t-\t0\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t889162\t889272\t.\t-\t0\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t889384\t889462\t.\t-\t1\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t891303\t891393\t.\t-\t2\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t891475\t891595\t.\t-\t0\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t892274\t892405\t.\t-\t0\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t892479\t892653\t.\t-\t1\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t894309\t894461\t.\t-\t1\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t894595\t894620\t.\t-\t0\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\texon\t880074\t880180\t.\t-\t.\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\texon\t880437\t880526\t.\t-\t.\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\texon\t880898\t881033\t.\t-\t.\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\texon\t881553\t881666\t.\t-\t.\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\texon\t881782\t881925\t.\t-\t.\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\texon\t883511\t883612\t.\t-\t.\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\texon\t883870\t883983\t.\t-\t.\tParent=Transc'..b'anscript:CCDS2.2\n+chr1\thg19_ccdsGene\tCDS\t874420\t874509\t.\t+\t2\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\tCDS\t874655\t874840\t.\t+\t2\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\tCDS\t876524\t876686\t.\t+\t2\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\tCDS\t877516\t877631\t.\t+\t1\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\tCDS\t877790\t877868\t.\t+\t2\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\tCDS\t877939\t878438\t.\t+\t1\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\tCDS\t878633\t878757\t.\t+\t2\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\tCDS\t879078\t879188\t.\t+\t0\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\tCDS\t879288\t879533\t.\t+\t0\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t861322\t861393\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t865535\t865716\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t866419\t866469\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t871152\t871276\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t874420\t874509\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t874655\t874840\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t876524\t876686\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t877516\t877631\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t877790\t877868\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t877939\t878438\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t878633\t878757\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t879078\t879188\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t879288\t879533\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\tgene\t901912\t909955\t.\t+\t.\tID=Gene:CCDS4.1;Name=Gene:CCDS4.1\n+chr1\thg19_ccdsGene\tmRNA\t901912\t909955\t0.000000\t+\t.\tID=Transcript:CCDS4.1;Parent=Gene:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t901912\t901994\t.\t+\t0\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t902084\t902183\t.\t+\t1\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t905657\t905803\t.\t+\t0\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t905901\t905981\t.\t+\t0\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t906066\t906138\t.\t+\t0\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t906259\t906386\t.\t+\t2\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t906493\t906588\t.\t+\t0\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t906704\t906784\t.\t+\t0\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t907455\t907530\t.\t+\t0\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t907668\t907804\t.\t+\t2\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t908241\t908390\t.\t+\t0\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t908566\t908706\t.\t+\t0\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t908880\t909020\t.\t+\t0\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t909213\t909431\t.\t+\t0\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t909696\t909744\t.\t+\t0\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t909822\t909955\t.\t+\t2\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t901912\t901994\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t902084\t902183\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t905657\t905803\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t905901\t905981\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t906066\t906138\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t906259\t906386\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t906493\t906588\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t906704\t906784\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t907455\t907530\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t907668\t907804\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t908241\t908390\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t908566\t908706\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t908880\t909020\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t909213\t909431\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t909696\t909744\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t909822\t909955\t.\t+\t.\tParent=Transcript:CCDS4.1\n'
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/UCSC_transcripts.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/test-data/UCSC_transcripts.gtf Wed Jun 11 16:29:25 2014 -0400
b
b'@@ -0,0 +1,129 @@\n+chr1\thg19_ccdsGene\tstart_codon\t861322\t861324\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t861322\t861393\t0.000000\t+\t0\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t861322\t861393\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t865535\t865716\t0.000000\t+\t0\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t865535\t865716\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t866419\t866469\t0.000000\t+\t1\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t866419\t866469\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t871152\t871276\t0.000000\t+\t1\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t871152\t871276\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t874420\t874509\t0.000000\t+\t2\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t874420\t874509\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t874655\t874840\t0.000000\t+\t2\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t874655\t874840\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t876524\t876686\t0.000000\t+\t2\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t876524\t876686\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t877516\t877631\t0.000000\t+\t1\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t877516\t877631\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t877790\t877868\t0.000000\t+\t2\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t877790\t877868\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t877939\t878438\t0.000000\t+\t1\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t877939\t878438\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t878633\t878757\t0.000000\t+\t2\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t878633\t878757\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t879078\t879188\t0.000000\t+\t0\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t879078\t879188\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t879288\t879530\t0.000000\t+\t0\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tstop_codon\t879531\t879533\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t879288\t879533\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tstop_codon\t880074\t880076\t0.000000\t-\t.\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr1\thg19_ccdsGene\tCDS\t880077\t880180\t0.000000\t-\t2\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr1\thg19_ccdsGene\texon\t880074\t880180\t0.000000\t-\t.\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr1\thg19_ccdsGene\tCDS\t880437\t880526\t0.000000\t-\t2\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr1\thg19_ccdsGene\texon\t880437\t880526\t0.000000\t-\t.\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr1\thg19_ccdsGene\tCDS\t880898\t881033\t0.000000\t-\t0\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr1\thg19_ccdsGene\texon\t880898\t881033\t0.000000\t-\t.\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr1\thg19_ccdsGene\tCDS\t881553\t881666\t0.000000\t-\t0\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr1\thg19_ccdsGene\texon\t881553\t881666\t0.000000\t-\t.\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr1\thg19_ccdsGene\tCDS\t881782\t881925\t0.000000\t-\t0\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr1\thg19_ccdsGene\texon\t881782\t881925\t0.000000\t-\t.\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr1\thg19_ccdsGene\tCDS\t883511\t883612\t0.000000\t-\t0\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr1\thg19_ccdsGene\texon\t883511\t883612\t0.000000\t-\t.\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr'..b'0\t0.000000\t+\t2\tgene_id "CCDS30550.1"; transcript_id "CCDS30550.1"; \n+chr1\thg19_ccdsGene\texon\t899487\t899560\t0.000000\t+\t.\tgene_id "CCDS30550.1"; transcript_id "CCDS30550.1"; \n+chr1\thg19_ccdsGene\tCDS\t899729\t899910\t0.000000\t+\t0\tgene_id "CCDS30550.1"; transcript_id "CCDS30550.1"; \n+chr1\thg19_ccdsGene\texon\t899729\t899910\t0.000000\t+\t.\tgene_id "CCDS30550.1"; transcript_id "CCDS30550.1"; \n+chr1\thg19_ccdsGene\tCDS\t900343\t900568\t0.000000\t+\t1\tgene_id "CCDS30550.1"; transcript_id "CCDS30550.1"; \n+chr1\thg19_ccdsGene\tstop_codon\t900569\t900571\t0.000000\t+\t.\tgene_id "CCDS30550.1"; transcript_id "CCDS30550.1"; \n+chr1\thg19_ccdsGene\texon\t900343\t900571\t0.000000\t+\t.\tgene_id "CCDS30550.1"; transcript_id "CCDS30550.1"; \n+chr1\thg19_ccdsGene\tstart_codon\t901912\t901914\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t901912\t901994\t0.000000\t+\t0\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t901912\t901994\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t902084\t902183\t0.000000\t+\t1\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t902084\t902183\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t905657\t905803\t0.000000\t+\t0\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t905657\t905803\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t905901\t905981\t0.000000\t+\t0\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t905901\t905981\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t906066\t906138\t0.000000\t+\t0\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t906066\t906138\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t906259\t906386\t0.000000\t+\t2\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t906259\t906386\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t906493\t906588\t0.000000\t+\t0\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t906493\t906588\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t906704\t906784\t0.000000\t+\t0\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t906704\t906784\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t907455\t907530\t0.000000\t+\t0\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t907455\t907530\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t907668\t907804\t0.000000\t+\t2\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t907668\t907804\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t908241\t908390\t0.000000\t+\t0\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t908241\t908390\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t908566\t908706\t0.000000\t+\t0\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t908566\t908706\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t908880\t909020\t0.000000\t+\t0\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t908880\t909020\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t909213\t909431\t0.000000\t+\t0\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t909213\t909431\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t909696\t909744\t0.000000\t+\t0\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t909696\t909744\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t909822\t909952\t0.000000\t+\t2\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tstop_codon\t909953\t909955\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t909822\t909955\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+\n'
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/ccds_genes.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/test-data/ccds_genes.bed Wed Jun 11 16:29:25 2014 -0400
b
@@ -0,0 +1,20 @@
+chr1 92149295 92327088 CCDS30770.1 0 - 92149295 92327088 0 16 119,108,42,121,300,159,141,153,338,190,148,169,184,138,185,61, 0,11933,14350,24924,28504,32497,32829,35573,36154,38216,43920,46066,51037,74874,113548,177732,
+chr1 67000041 67208778 CCDS30744.1 0 + 67000041 67208778 0 25 10,64,25,72,57,55,176,12,12,25,52,86,93,75,501,128,127,60,112,156,133,203,65,165,23, 0,91488,98711,101585,105418,108451,109185,126154,133171,136636,137585,138922,142645,145319,147510,154789,155831,161075,184935,194905,199389,204976,206299,206913,208714,
+chr1 8384389 8404073 CCDS30577.1 0 + 8384389 8404073 0 8 397,93,225,728,154,177,206,267, 0,968,1488,5879,11107,13486,15163,19417,
+chr1 16767256 16785385 CCDS44067.1 0 + 16767256 16785385 0 8 14,101,105,82,109,178,76,49, 0,2870,7108,7298,8331,11076,15056,18080,
+chr1 16767256 16785491 CCDS44066.1 0 + 16767256 16785491 0 7 92,101,105,82,109,178,155, 0,2870,7108,7298,8331,11076,18080,
+chr1 16767256 16785385 CCDS173.1 0 + 16767256 16785385 0 8 92,101,105,82,109,178,76,49, 0,2870,7108,7298,8331,11076,15056,18080,
+chr1 25072044 25167428 CCDS256.1 0 + 25072044 25167428 0 6 72,110,126,107,182,165, 0,52188,68540,81456,94306,95219,
+chr1 33547850 33585783 CCDS375.1 0 + 33547850 33585783 0 9 105,174,173,135,166,163,113,215,139, 0,1704,9800,11032,12298,14457,15817,35652,37794,
+chr1 48999844 50489468 CCDS44137.1 0 - 48999844 50489468 0 14 121,27,97,163,153,112,115,90,40,217,95,125,123,34, 0,717,5469,52831,56660,100320,119164,128979,333018,511411,711597,1163140,1317223,1489590,
+chr1 100661810 100715376 CCDS767.1 0 - 100661810 100715376 0 11 168,72,192,78,167,217,122,182,76,124,51, 0,9975,10190,14439,18562,19728,22371,34478,39181,44506,53515,
+chr1 150981108 151006710 CCDS977.1 0 + 150981108 151006710 0 8 39,93,203,185,159,95,159,429, 0,9179,9834,15978,16882,18600,20153,25173,
+chr1 175914288 176176114 CCDS44279.1 0 - 175914288 176176114 0 19 18,45,161,125,118,117,82,109,144,136,115,58,77,69,120,65,98,60,407, 0,2042,41790,43135,44209,82419,98033,98557,101028,135999,140623,171471,189857,203853,217716,218674,230757,239480,261419,
+chr1 175914288 176176114 CCDS30944.1 0 - 175914288 176176114 0 20 18,45,161,125,118,117,82,109,144,136,115,58,77,60,69,120,77,98,60,407, 0,2042,41790,43135,44209,82419,98033,98557,101028,135999,140623,171471,189857,191335,203853,217716,218662,230757,239480,261419,
+chr1 184446643 184588690 CCDS1362.1 0 + 184446643 184588690 0 5 94,95,77,61,39, 0,30078,113229,120891,142008,
+chr1 226420201 226496888 CCDS1553.1 0 - 226420201 226496888 0 15 106,98,180,126,81,102,120,134,158,126,134,105,95,33,79, 0,595,843,6470,18338,33032,33712,35456,45274,53832,55163,63341,65218,68672,76608,
+chr1 1982069 2116448 CCDS37.1 0 + 1982069 2116448 0 18 71,122,90,51,86,132,82,53,189,98,87,136,88,120,80,90,116,88, 0,4810,5853,8910,84631,93579,95396,98241,100159,105364,118887,121424,121670,123266,124123,124593,133952,134291,
+chr1 2075777 2116448 CCDS41229.1 0 + 2075777 2116448 0 13 3,82,53,189,98,87,136,88,120,80,90,116,88, 0,1688,4533,6451,11656,25179,27716,27962,29558,30415,30885,40244,40583,
+chr1 2985823 3350375 CCDS44048.1 0 + 2985823 3350375 0 17 37,350,51,135,103,208,148,154,1417,85,170,78,170,175,237,175,78, 0,116865,174827,315892,327231,333531,335479,336235,342124,345303,348568,349407,356321,356791,361612,362706,364474,
+chr1 2985823 3350375 CCDS41236.1 0 + 2985823 3350375 0 17 37,350,51,135,103,208,148,154,1417,85,170,78,170,175,237,175,135, 0,116865,174827,315892,327231,333531,335479,336235,342124,345303,348568,349407,356321,356791,361612,362706,364417,
+chr1 6285139 6295971 CCDS61.1 0 - 6285139 6295971 0 5 183,218,170,89,195, 0,6822,8394,9806,10637,
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/ccds_genes.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/test-data/ccds_genes.gff3 Wed Jun 11 16:29:25 2014 -0400
b
b'@@ -0,0 +1,289 @@\n+chr1\tbed2gff\tgene\t92149296\t92327088\t0\t-\t.\tID=Gene:CCDS30770.1;Name=Gene:CCDS30770.1\n+chr1\tbed2gff\ttranscript\t92149296\t92327088\t0\t-\t.\tID=CCDS30770.1;Name=CCDS30770.1;Parent=Gene:CCDS30770.1\n+chr1\tbed2gff\texon\t92149296\t92149414\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92161229\t92161336\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92163646\t92163687\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92174220\t92174340\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92177800\t92178099\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92181793\t92181951\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92182125\t92182265\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92184869\t92185021\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92185450\t92185787\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92187512\t92187701\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92193216\t92193363\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92195362\t92195530\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92200333\t92200516\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92224170\t92224307\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92262844\t92263028\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92327028\t92327088\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\tgene\t67000042\t67208778\t0\t+\t.\tID=Gene:CCDS30744.1;Name=Gene:CCDS30744.1\n+chr1\tbed2gff\ttranscript\t67000042\t67208778\t0\t+\t.\tID=CCDS30744.1;Name=CCDS30744.1;Parent=Gene:CCDS30744.1\n+chr1\tbed2gff\texon\t67000042\t67000051\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67091530\t67091593\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67098753\t67098777\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67101627\t67101698\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67105460\t67105516\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67108493\t67108547\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67109227\t67109402\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67126196\t67126207\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67133213\t67133224\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67136678\t67136702\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67137627\t67137678\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67138964\t67139049\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67142687\t67142779\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67145361\t67145435\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67147552\t67148052\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67154831\t67154958\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67155873\t67155999\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67161117\t67161176\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67184977\t67185088\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67194947\t67195102\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67199431\t67199563\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67205018\t67205220\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67206341\t67206405\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67206955\t67207119\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67208756\t67208778\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\tgene\t8384390\t8404073\t0\t+\t.\tID=Gene:CCDS30577.1;Name=Gene:CCDS30577.1\n+chr1\tbed2gff\ttranscript\t8384390\t8404073\t0\t+\t.\tID=CCDS30577.1;Name=CCDS30577.1;Parent=Gene:CCDS30577.1\n+chr1\tbed2gff\texon\t8384390\t8384786\t0\t+\t.\tParent=CCDS30577.1\n+chr1\tbed2gff\texon\t8385358\t8385450\t0\t+\t.\tParent=CCDS30577.1\n+chr1\tbed2gff\texon\t8385878\t8386102\t0\t+\t.\tParent=CCDS30577.1\n+chr1\tbed2gff\texon\t8390269\t8390996\t0\t+\t.\tParent=CCDS30577.1\n+chr1\tbed2gff\texon\t8395497\t8395650\t0\t+\t.\tParent=CCDS30577.1\n+chr1\tbed2gff\texon\t8397876\t8398052\t0\t+\t.\tParent=CCDS30577.1\n+chr1\tbed2gff\texon\t8399553\t8399758\t0\t+\t.\tParent=CCDS30577.1\n+chr1\tbed2gff\texon\t8403807\t8404073\t0\t+\t.\tParent=CCDS30577.1\n+chr1\tbed2gff\tgene\t16767257\t16785385\t0\t+\t.\tID=Gene:CCDS44067.1;Name=Gene:CCDS44067.1\n+chr1\tbed2gff\ttranscript\t16767257\t16785385\t0\t+\t.\tID=CCDS44067.1;Name=CCDS44067.1;Parent=Gene:CCDS44067.1\n+chr1\tbed2gff\texon\t16767257\t16767270\t0\t+\t.\tParent=CCDS44067.1\n+chr1\tbed2gff\texon\t16770127\t16770227\t0\t+\t.\tParent=CCDS44067.1\n+chr1\tbed2gff\texon\t16774365\t16774469\t0\t+\t.\tParent=CCDS44067.1\n+chr1\tbed2gff\texo'..b'bed2gff\texon\t2106663\t2106752\t0\t+\t.\tParent=CCDS37.1\n+chr1\tbed2gff\texon\t2116022\t2116137\t0\t+\t.\tParent=CCDS37.1\n+chr1\tbed2gff\texon\t2116361\t2116448\t0\t+\t.\tParent=CCDS37.1\n+chr1\tbed2gff\tgene\t2075778\t2116448\t0\t+\t.\tID=Gene:CCDS41229.1;Name=Gene:CCDS41229.1\n+chr1\tbed2gff\ttranscript\t2075778\t2116448\t0\t+\t.\tID=CCDS41229.1;Name=CCDS41229.1;Parent=Gene:CCDS41229.1\n+chr1\tbed2gff\texon\t2075778\t2075780\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\texon\t2077466\t2077547\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\texon\t2080311\t2080363\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\texon\t2082229\t2082417\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\texon\t2087434\t2087531\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\texon\t2100957\t2101043\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\texon\t2103494\t2103629\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\texon\t2103740\t2103827\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\texon\t2105336\t2105455\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\texon\t2106193\t2106272\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\texon\t2106663\t2106752\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\texon\t2116022\t2116137\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\texon\t2116361\t2116448\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\tgene\t2985824\t3350375\t0\t+\t.\tID=Gene:CCDS44048.1;Name=Gene:CCDS44048.1\n+chr1\tbed2gff\ttranscript\t2985824\t3350375\t0\t+\t.\tID=CCDS44048.1;Name=CCDS44048.1;Parent=Gene:CCDS44048.1\n+chr1\tbed2gff\texon\t2985824\t2985860\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3102689\t3103038\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3160651\t3160701\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3301716\t3301850\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3313055\t3313157\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3319355\t3319562\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3321303\t3321450\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3322059\t3322212\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3327948\t3329364\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3331127\t3331211\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3334392\t3334561\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3335231\t3335308\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3342145\t3342314\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3342615\t3342789\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3347436\t3347672\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3348530\t3348704\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3350298\t3350375\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\tgene\t2985824\t3350375\t0\t+\t.\tID=Gene:CCDS41236.1;Name=Gene:CCDS41236.1\n+chr1\tbed2gff\ttranscript\t2985824\t3350375\t0\t+\t.\tID=CCDS41236.1;Name=CCDS41236.1;Parent=Gene:CCDS41236.1\n+chr1\tbed2gff\texon\t2985824\t2985860\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3102689\t3103038\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3160651\t3160701\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3301716\t3301850\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3313055\t3313157\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3319355\t3319562\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3321303\t3321450\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3322059\t3322212\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3327948\t3329364\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3331127\t3331211\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3334392\t3334561\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3335231\t3335308\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3342145\t3342314\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3342615\t3342789\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3347436\t3347672\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3348530\t3348704\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3350241\t3350375\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\tgene\t6285140\t6295971\t0\t-\t.\tID=Gene:CCDS61.1;Name=Gene:CCDS61.1\n+chr1\tbed2gff\ttranscript\t6285140\t6295971\t0\t-\t.\tID=CCDS61.1;Name=CCDS61.1;Parent=Gene:CCDS61.1\n+chr1\tbed2gff\texon\t6285140\t6285322\t0\t-\t.\tParent=CCDS61.1\n+chr1\tbed2gff\texon\t6291962\t6292179\t0\t-\t.\tParent=CCDS61.1\n+chr1\tbed2gff\texon\t6293534\t6293703\t0\t-\t.\tParent=CCDS61.1\n+chr1\tbed2gff\texon\t6294946\t6295034\t0\t-\t.\tParent=CCDS61.1\n+chr1\tbed2gff\texon\t6295777\t6295971\t0\t-\t.\tParent=CCDS61.1\n'
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/hs_2009.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/test-data/hs_2009.bed Wed Jun 11 16:29:25 2014 -0400
b
@@ -0,0 +1,58 @@
+chr1 11873 14409 uc001aaa.3 0 + 11873 11873 0 3 354,109,1189, 0,739,1347,
+chr1 11873 14409 uc010nxq.1 0 + 12189 13639 0 3 354,127,1007, 0,721,1529,
+chr1 11873 14409 uc010nxr.1 0 + 11873 11873 0 3 354,52,1189, 0,772,1347,
+chr1 14362 16765 uc009vis.2 0 - 14362 14362 0 4 467,69,147,159, 0,607,1433,2244,
+chr1 16857 17751 uc009vjc.1 0 - 16857 16857 0 2 198,519, 0,375,
+chr1 15795 18061 uc009vjd.2 0 - 15795 15795 0 5 152,159,198,136,456, 0,811,1062,1437,1810,
+chr1 14362 19759 uc009vit.2 0 - 14362 14362 0 9 467,69,152,159,198,510,147,99,847, 0,607,1433,2244,2495,2870,3552,3905,4550,
+chr1 14362 19759 uc001aae.3 0 - 14362 14362 0 10 467,69,152,159,198,136,137,147,99,847, 0,607,1433,2244,2495,2870,3243,3552,3905,4550,
+chr1 14362 19759 uc009viu.2 0 - 14362 14362 0 10 467,69,152,159,198,510,147,102,54,847, 0,607,1433,2244,2495,2870,3552,3905,4138,4550,
+chr1 16857 19759 uc001aai.1 0 - 16857 16857 0 6 198,136,137,147,112,847, 0,375,748,1057,1410,2055,
+chr1 14362 24901 uc001aab.3 0 - 14362 14362 0 10 467,69,152,159,202,136,137,147,112,164, 0,607,1433,2244,2491,2870,3243,3552,3905,10375,
+chr1 14362 29370 uc001aah.3 0 - 14362 14362 0 11 467,69,152,159,198,136,137,147,99,154,50, 0,607,1433,2244,2495,2870,3243,3552,3905,10375,14958,
+chr1 14362 29370 uc009vir.2 0 - 14362 14362 0 10 467,69,152,159,198,510,147,99,154,50, 0,607,1433,2244,2495,2870,3552,3905,10375,14958,
+chr1 14362 29370 uc009viq.2 0 - 14362 14362 0 7 467,152,159,198,456,154,50, 0,1433,2244,2495,3243,10375,14958,
+chr1 14362 29370 uc001aac.3 0 - 14362 14362 0 11 467,69,152,159,198,110,137,147,102,154,50, 0,607,1433,2244,2495,2896,3243,3552,3905,10375,14958,
+chr1 14406 29370 uc009viv.2 0 - 14406 14406 0 7 2359,198,136,137,147,154,50, 0,2451,2826,3199,3508,10331,14914,
+chr1 14406 29370 uc009viw.2 0 - 14406 14406 0 7 2359,198,510,147,99,154,50, 0,2451,2826,3508,3861,10331,14914,
+chr1 15602 29370 uc009vix.2 0 - 15602 15602 0 7 345,159,198,136,147,154,50, 0,1004,1255,1630,2312,9135,13718,
+chr1 16606 29370 uc009viz.2 0 - 16606 16606 0 8 159,202,136,137,147,112,154,50, 0,247,626,999,1308,1661,8131,12714,
+chr1 16606 29370 uc009viy.2 0 - 16606 16606 0 9 159,198,136,137,147,95,58,154,50, 0,251,626,999,1308,1661,1890,8131,12714,
+chr1 16857 29370 uc010nxs.1 0 - 16857 16857 0 8 198,136,137,147,99,227,154,50, 0,375,748,1057,1410,2055,7880,12463,
+chr1 17232 29370 uc009vje.2 0 - 17232 17232 0 4 510,147,99,50, 0,682,1035,12088,
+chr1 17605 29370 uc009vjf.2 0 - 17605 17605 0 7 137,147,95,58,227,154,50, 0,309,662,891,1307,7132,11715,
+chr1 16857 29961 uc009vjb.1 0 - 16857 16857 0 7 198,136,137,147,112,154,138, 0,375,748,1057,1410,7880,12966,
+chr1 34611 36081 uc001aak.2 0 - 34611 34611 0 3 563,205,361, 0,665,1109,
+chr1 69090 70008 uc001aal.1 0 + 69090 70008 0 1 918, 0,
+chr1 137838 139228 uc001aam.3 0 - 137838 137838 0 1 1390, 0,
+chr1 89294 237877 uc010nxt.1 0 - 89294 89294 0 2 1110,1263, 0,147320,
+chr1 321083 321114 uc001aaq.1 0 + 321083 321083 0 1 31, 0,
+chr1 321145 321223 uc001aar.1 0 + 321145 321145 0 1 78, 0,
+chr1 322036 326938 uc009vjk.2 0 + 324342 325605 0 3 192,58,2500, 0,2251,2402,
+chr1 323891 328580 uc001aav.3 0 + 323891 323891 0 4 169,58,2500,1545, 0,396,547,3144,
+chr1 323891 328580 uc001aau.2 0 + 324342 325605 0 3 169,58,4142, 0,396,547,
+chr1 367658 368595 uc010nxu.1 0 + 367658 368594 0 1 937, 0,
+chr1 420205 421839 uc001aax.1 0 + 420205 420205 0 3 91,267,444, 0,786,1190,
+chr1 566461 568045 uc001aaz.2 0 + 566461 566461 0 1 1584, 0,
+chr1 568148 568842 uc001aba.1 0 + 568148 568148 0 1 694, 0,
+chr1 568843 568912 uc001abb.2 0 + 568843 568843 0 1 69, 0,
+chr1 569326 570349 uc001abc.2 0 + 569326 569326 0 1 1023, 0,
+chr1 621097 622034 uc010nxv.1 0 - 621098 622034 0 1 937, 0,
+chr1 661139 665731 uc001abe.3 0 - 664484 665108 0 3 4045,58,169, 0,4138,4423,
+chr1 668401 668479 uc001abi.1 0 - 668401 668401 0 1 78, 0,
+chr1 668510 668541 uc001abj.2 0 - 668510 668510 0 1 31, 0,
+chr1 661139 670994 uc009vjm.2 0 - 664484 665108 0 3 4045,58,192, 0,7547,9663,
+chr1 671807 671885 uc010nxw.1 0 - 671807 671807 0 1 78, 0,
+chr1 671916 671947 uc001abl.2 0 - 671916 671916 0 1 31, 0,
+chr1 661139 679736 uc002khh.2 0 - 661139 661139 0 7 4045,58,191,233,58,65,162, 0,4138,6257,14043,14369,17526,18435,
+chr1 674239 679736 uc001abm.2 0 - 674239 674239 0 5 165,233,58,65,162, 0,943,1269,4426,5335,
+chr1 700236 714006 uc001abo.2 0 - 700236 700236 0 7 391,59,66,216,132,110,343, 0,1472,3691,4640,8119,9314,13427,
+chr1 761586 762902 uc010nxx.1 0 - 762079 762571 0 1 1316, 0,
+chr1 763063 788902 uc009vjn.1 0 + 763063 763063 0 4 92,102,184,132, 0,1319,24243,25707,
+chr1 763063 788997 uc001abp.1 0 + 763063 763063 0 6 92,102,153,184,96,227, 0,1319,19970,24243,24987,25707,
+chr1 763063 788997 uc001abq.1 0 + 763063 763063 0 5 92,102,184,96,227, 0,1319,24243,24987,25707,
+chr1 763063 788997 uc009vjo.1 0 + 763063 763063 0 4 92,102,96,227, 0,1319,24987,25707,
+chr1 763063 789740 uc001abr.1 0 + 763063 763063 0 7 92,102,153,184,96,132,784, 0,1319,19970,24243,24987,25707,25893,
+chr1 791897 794579 uc001abs.2 0 + 791897 791897 0 1 2682, 0,
+chr1 803452 812182 uc001abt.3 0 - 803452 803452 0 3 603,1044,57, 0,6039,8673,
+chr1 846814 850328 uc001abu.1 0 + 846814 846814 0 2 39,3004, 0,510,
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/hs_2009.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/test-data/hs_2009.gff3 Wed Jun 11 16:29:25 2014 -0400
b
b'@@ -0,0 +1,375 @@\n+##gff-version 3\n+chr1\tbed2gff\tgene\t11874\t14409\t0\t+\t.\tID=Gene:uc001aaa.3;Name=Gene:uc001aaa.3\n+chr1\tbed2gff\ttranscript\t11874\t14409\t0\t+\t.\tID=uc001aaa.3;Name=uc001aaa.3;Parent=Gene:uc001aaa.3\n+chr1\tbed2gff\texon\t11874\t12227\t0\t+\t.\tParent=uc001aaa.3\n+chr1\tbed2gff\texon\t12613\t12721\t0\t+\t.\tParent=uc001aaa.3\n+chr1\tbed2gff\texon\t13221\t14409\t0\t+\t.\tParent=uc001aaa.3\n+chr1\tbed2gff\tgene\t11874\t14409\t0\t+\t.\tID=Gene:uc010nxq.1;Name=Gene:uc010nxq.1\n+chr1\tbed2gff\ttranscript\t11874\t14409\t0\t+\t.\tID=uc010nxq.1;Name=uc010nxq.1;Parent=Gene:uc010nxq.1\n+chr1\tbed2gff\texon\t11874\t12227\t0\t+\t.\tParent=uc010nxq.1\n+chr1\tbed2gff\texon\t12595\t12721\t0\t+\t.\tParent=uc010nxq.1\n+chr1\tbed2gff\texon\t13403\t14409\t0\t+\t.\tParent=uc010nxq.1\n+chr1\tbed2gff\tgene\t11874\t14409\t0\t+\t.\tID=Gene:uc010nxr.1;Name=Gene:uc010nxr.1\n+chr1\tbed2gff\ttranscript\t11874\t14409\t0\t+\t.\tID=uc010nxr.1;Name=uc010nxr.1;Parent=Gene:uc010nxr.1\n+chr1\tbed2gff\texon\t11874\t12227\t0\t+\t.\tParent=uc010nxr.1\n+chr1\tbed2gff\texon\t12646\t12697\t0\t+\t.\tParent=uc010nxr.1\n+chr1\tbed2gff\texon\t13221\t14409\t0\t+\t.\tParent=uc010nxr.1\n+chr1\tbed2gff\tgene\t14363\t16765\t0\t-\t.\tID=Gene:uc009vis.2;Name=Gene:uc009vis.2\n+chr1\tbed2gff\ttranscript\t14363\t16765\t0\t-\t.\tID=uc009vis.2;Name=uc009vis.2;Parent=Gene:uc009vis.2\n+chr1\tbed2gff\texon\t14363\t14829\t0\t-\t.\tParent=uc009vis.2\n+chr1\tbed2gff\texon\t14970\t15038\t0\t-\t.\tParent=uc009vis.2\n+chr1\tbed2gff\texon\t15796\t15942\t0\t-\t.\tParent=uc009vis.2\n+chr1\tbed2gff\texon\t16607\t16765\t0\t-\t.\tParent=uc009vis.2\n+chr1\tbed2gff\tgene\t16858\t17751\t0\t-\t.\tID=Gene:uc009vjc.1;Name=Gene:uc009vjc.1\n+chr1\tbed2gff\ttranscript\t16858\t17751\t0\t-\t.\tID=uc009vjc.1;Name=uc009vjc.1;Parent=Gene:uc009vjc.1\n+chr1\tbed2gff\texon\t16858\t17055\t0\t-\t.\tParent=uc009vjc.1\n+chr1\tbed2gff\texon\t17233\t17751\t0\t-\t.\tParent=uc009vjc.1\n+chr1\tbed2gff\tgene\t15796\t18061\t0\t-\t.\tID=Gene:uc009vjd.2;Name=Gene:uc009vjd.2\n+chr1\tbed2gff\ttranscript\t15796\t18061\t0\t-\t.\tID=uc009vjd.2;Name=uc009vjd.2;Parent=Gene:uc009vjd.2\n+chr1\tbed2gff\texon\t15796\t15947\t0\t-\t.\tParent=uc009vjd.2\n+chr1\tbed2gff\texon\t16607\t16765\t0\t-\t.\tParent=uc009vjd.2\n+chr1\tbed2gff\texon\t16858\t17055\t0\t-\t.\tParent=uc009vjd.2\n+chr1\tbed2gff\texon\t17233\t17368\t0\t-\t.\tParent=uc009vjd.2\n+chr1\tbed2gff\texon\t17606\t18061\t0\t-\t.\tParent=uc009vjd.2\n+chr1\tbed2gff\tgene\t14363\t19759\t0\t-\t.\tID=Gene:uc009vit.2;Name=Gene:uc009vit.2\n+chr1\tbed2gff\ttranscript\t14363\t19759\t0\t-\t.\tID=uc009vit.2;Name=uc009vit.2;Parent=Gene:uc009vit.2\n+chr1\tbed2gff\texon\t14363\t14829\t0\t-\t.\tParent=uc009vit.2\n+chr1\tbed2gff\texon\t14970\t15038\t0\t-\t.\tParent=uc009vit.2\n+chr1\tbed2gff\texon\t15796\t15947\t0\t-\t.\tParent=uc009vit.2\n+chr1\tbed2gff\texon\t16607\t16765\t0\t-\t.\tParent=uc009vit.2\n+chr1\tbed2gff\texon\t16858\t17055\t0\t-\t.\tParent=uc009vit.2\n+chr1\tbed2gff\texon\t17233\t17742\t0\t-\t.\tParent=uc009vit.2\n+chr1\tbed2gff\texon\t17915\t18061\t0\t-\t.\tParent=uc009vit.2\n+chr1\tbed2gff\texon\t18268\t18366\t0\t-\t.\tParent=uc009vit.2\n+chr1\tbed2gff\texon\t18913\t19759\t0\t-\t.\tParent=uc009vit.2\n+chr1\tbed2gff\tgene\t14363\t19759\t0\t-\t.\tID=Gene:uc001aae.3;Name=Gene:uc001aae.3\n+chr1\tbed2gff\ttranscript\t14363\t19759\t0\t-\t.\tID=uc001aae.3;Name=uc001aae.3;Parent=Gene:uc001aae.3\n+chr1\tbed2gff\texon\t14363\t14829\t0\t-\t.\tParent=uc001aae.3\n+chr1\tbed2gff\texon\t14970\t15038\t0\t-\t.\tParent=uc001aae.3\n+chr1\tbed2gff\texon\t15796\t15947\t0\t-\t.\tParent=uc001aae.3\n+chr1\tbed2gff\texon\t16607\t16765\t0\t-\t.\tParent=uc001aae.3\n+chr1\tbed2gff\texon\t16858\t17055\t0\t-\t.\tParent=uc001aae.3\n+chr1\tbed2gff\texon\t17233\t17368\t0\t-\t.\tParent=uc001aae.3\n+chr1\tbed2gff\texon\t17606\t17742\t0\t-\t.\tParent=uc001aae.3\n+chr1\tbed2gff\texon\t17915\t18061\t0\t-\t.\tParent=uc001aae.3\n+chr1\tbed2gff\texon\t18268\t18366\t0\t-\t.\tParent=uc001aae.3\n+chr1\tbed2gff\texon\t18913\t19759\t0\t-\t.\tParent=uc001aae.3\n+chr1\tbed2gff\tgene\t14363\t19759\t0\t-\t.\tID=Gene:uc009viu.2;Name=Gene:uc009viu.2\n+chr1\tbed2gff\ttranscript\t14363\t19759\t0\t-\t.\tID=uc009viu.2;Name=uc009viu.2;Parent=Gene:uc009viu.2\n+chr1\tbed2gff\texon\t14363\t14829\t0\t-\t.\tParent=uc009viu.2\n+chr1\tbed2gff\texon\t14970\t15038\t0\t-\t.\tParent=uc009viu.2\n+chr1\tbed2gff\texon\t15796\t15947\t0\t-\t.\tParent=uc009viu.2\n+chr1\tbed2gff\texon\t16607\t16765\t0\t-\t.\tParent=uc009viu.2\n+chr1\tbed2gff\texon\t16858\t17055\t0\t-\t.\tParen'..b'1abo.2;Name=Gene:uc001abo.2\n+chr1\tbed2gff\ttranscript\t700237\t714006\t0\t-\t.\tID=uc001abo.2;Name=uc001abo.2;Parent=Gene:uc001abo.2\n+chr1\tbed2gff\texon\t700237\t700627\t0\t-\t.\tParent=uc001abo.2\n+chr1\tbed2gff\texon\t701709\t701767\t0\t-\t.\tParent=uc001abo.2\n+chr1\tbed2gff\texon\t703928\t703993\t0\t-\t.\tParent=uc001abo.2\n+chr1\tbed2gff\texon\t704877\t705092\t0\t-\t.\tParent=uc001abo.2\n+chr1\tbed2gff\texon\t708356\t708487\t0\t-\t.\tParent=uc001abo.2\n+chr1\tbed2gff\texon\t709551\t709660\t0\t-\t.\tParent=uc001abo.2\n+chr1\tbed2gff\texon\t713664\t714006\t0\t-\t.\tParent=uc001abo.2\n+chr1\tbed2gff\tgene\t761587\t762902\t0\t-\t.\tID=Gene:uc010nxx.1;Name=Gene:uc010nxx.1\n+chr1\tbed2gff\ttranscript\t761587\t762902\t0\t-\t.\tID=uc010nxx.1;Name=uc010nxx.1;Parent=Gene:uc010nxx.1\n+chr1\tbed2gff\texon\t761587\t762902\t0\t-\t.\tParent=uc010nxx.1\n+chr1\tbed2gff\tgene\t763064\t788902\t0\t+\t.\tID=Gene:uc009vjn.1;Name=Gene:uc009vjn.1\n+chr1\tbed2gff\ttranscript\t763064\t788902\t0\t+\t.\tID=uc009vjn.1;Name=uc009vjn.1;Parent=Gene:uc009vjn.1\n+chr1\tbed2gff\texon\t763064\t763155\t0\t+\t.\tParent=uc009vjn.1\n+chr1\tbed2gff\texon\t764383\t764484\t0\t+\t.\tParent=uc009vjn.1\n+chr1\tbed2gff\texon\t787307\t787490\t0\t+\t.\tParent=uc009vjn.1\n+chr1\tbed2gff\texon\t788771\t788902\t0\t+\t.\tParent=uc009vjn.1\n+chr1\tbed2gff\tgene\t763064\t788997\t0\t+\t.\tID=Gene:uc001abp.1;Name=Gene:uc001abp.1\n+chr1\tbed2gff\ttranscript\t763064\t788997\t0\t+\t.\tID=uc001abp.1;Name=uc001abp.1;Parent=Gene:uc001abp.1\n+chr1\tbed2gff\texon\t763064\t763155\t0\t+\t.\tParent=uc001abp.1\n+chr1\tbed2gff\texon\t764383\t764484\t0\t+\t.\tParent=uc001abp.1\n+chr1\tbed2gff\texon\t783034\t783186\t0\t+\t.\tParent=uc001abp.1\n+chr1\tbed2gff\texon\t787307\t787490\t0\t+\t.\tParent=uc001abp.1\n+chr1\tbed2gff\texon\t788051\t788146\t0\t+\t.\tParent=uc001abp.1\n+chr1\tbed2gff\texon\t788771\t788997\t0\t+\t.\tParent=uc001abp.1\n+chr1\tbed2gff\tgene\t763064\t788997\t0\t+\t.\tID=Gene:uc001abq.1;Name=Gene:uc001abq.1\n+chr1\tbed2gff\ttranscript\t763064\t788997\t0\t+\t.\tID=uc001abq.1;Name=uc001abq.1;Parent=Gene:uc001abq.1\n+chr1\tbed2gff\texon\t763064\t763155\t0\t+\t.\tParent=uc001abq.1\n+chr1\tbed2gff\texon\t764383\t764484\t0\t+\t.\tParent=uc001abq.1\n+chr1\tbed2gff\texon\t787307\t787490\t0\t+\t.\tParent=uc001abq.1\n+chr1\tbed2gff\texon\t788051\t788146\t0\t+\t.\tParent=uc001abq.1\n+chr1\tbed2gff\texon\t788771\t788997\t0\t+\t.\tParent=uc001abq.1\n+chr1\tbed2gff\tgene\t763064\t788997\t0\t+\t.\tID=Gene:uc009vjo.1;Name=Gene:uc009vjo.1\n+chr1\tbed2gff\ttranscript\t763064\t788997\t0\t+\t.\tID=uc009vjo.1;Name=uc009vjo.1;Parent=Gene:uc009vjo.1\n+chr1\tbed2gff\texon\t763064\t763155\t0\t+\t.\tParent=uc009vjo.1\n+chr1\tbed2gff\texon\t764383\t764484\t0\t+\t.\tParent=uc009vjo.1\n+chr1\tbed2gff\texon\t788051\t788146\t0\t+\t.\tParent=uc009vjo.1\n+chr1\tbed2gff\texon\t788771\t788997\t0\t+\t.\tParent=uc009vjo.1\n+chr1\tbed2gff\tgene\t763064\t789740\t0\t+\t.\tID=Gene:uc001abr.1;Name=Gene:uc001abr.1\n+chr1\tbed2gff\ttranscript\t763064\t789740\t0\t+\t.\tID=uc001abr.1;Name=uc001abr.1;Parent=Gene:uc001abr.1\n+chr1\tbed2gff\texon\t763064\t763155\t0\t+\t.\tParent=uc001abr.1\n+chr1\tbed2gff\texon\t764383\t764484\t0\t+\t.\tParent=uc001abr.1\n+chr1\tbed2gff\texon\t783034\t783186\t0\t+\t.\tParent=uc001abr.1\n+chr1\tbed2gff\texon\t787307\t787490\t0\t+\t.\tParent=uc001abr.1\n+chr1\tbed2gff\texon\t788051\t788146\t0\t+\t.\tParent=uc001abr.1\n+chr1\tbed2gff\texon\t788771\t788902\t0\t+\t.\tParent=uc001abr.1\n+chr1\tbed2gff\texon\t788957\t789740\t0\t+\t.\tParent=uc001abr.1\n+chr1\tbed2gff\tgene\t791898\t794579\t0\t+\t.\tID=Gene:uc001abs.2;Name=Gene:uc001abs.2\n+chr1\tbed2gff\ttranscript\t791898\t794579\t0\t+\t.\tID=uc001abs.2;Name=uc001abs.2;Parent=Gene:uc001abs.2\n+chr1\tbed2gff\texon\t791898\t794579\t0\t+\t.\tParent=uc001abs.2\n+chr1\tbed2gff\tgene\t803453\t812182\t0\t-\t.\tID=Gene:uc001abt.3;Name=Gene:uc001abt.3\n+chr1\tbed2gff\ttranscript\t803453\t812182\t0\t-\t.\tID=uc001abt.3;Name=uc001abt.3;Parent=Gene:uc001abt.3\n+chr1\tbed2gff\texon\t803453\t804055\t0\t-\t.\tParent=uc001abt.3\n+chr1\tbed2gff\texon\t809492\t810535\t0\t-\t.\tParent=uc001abt.3\n+chr1\tbed2gff\texon\t812126\t812182\t0\t-\t.\tParent=uc001abt.3\n+chr1\tbed2gff\tgene\t846815\t850328\t0\t+\t.\tID=Gene:uc001abu.1;Name=Gene:uc001abu.1\n+chr1\tbed2gff\ttranscript\t846815\t850328\t0\t+\t.\tID=uc001abu.1;Name=uc001abu.1;Parent=Gene:uc001abu.1\n+chr1\tbed2gff\texon\t846815\t846853\t0\t+\t.\tParent=uc001abu.1\n+chr1\tbed2gff\texon\t847325\t850328\t0\t+\t.\tParent=uc001abu.1\n'
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/s_cerevisiae_SCU49845.gbk
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/test-data/s_cerevisiae_SCU49845.gbk Wed Jun 11 16:29:25 2014 -0400
b
b'@@ -0,0 +1,165 @@\n+LOCUS       SCU49845     5028 bp    DNA             PLN       21-JUN-1999\n+DEFINITION  Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p\n+            (AXL2) and Rev7p (REV7) genes, complete cds.\n+ACCESSION   U49845\n+VERSION     U49845.1  GI:1293613\n+KEYWORDS    .\n+SOURCE      Saccharomyces cerevisiae (baker\'s yeast)\n+  ORGANISM  Saccharomyces cerevisiae\n+            Eukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;\n+            Saccharomycetales; Saccharomycetaceae; Saccharomyces.\n+REFERENCE   1  (bases 1 to 5028)\n+  AUTHORS   Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.\n+  TITLE     Cloning and sequence of REV7, a gene whose function is required for\n+            DNA damage-induced mutagenesis in Saccharomyces cerevisiae\n+  JOURNAL   Yeast 10 (11), 1503-1509 (1994)\n+  PUBMED    7871890\n+REFERENCE   2  (bases 1 to 5028)\n+  AUTHORS   Roemer,T., Madden,K., Chang,J. and Snyder,M.\n+  TITLE     Selection of axial growth sites in yeast requires Axl2p, a novel\n+            plasma membrane glycoprotein\n+  JOURNAL   Genes Dev. 10 (7), 777-793 (1996)\n+  PUBMED    8846915\n+REFERENCE   3  (bases 1 to 5028)\n+  AUTHORS   Roemer,T.\n+  TITLE     Direct Submission\n+  JOURNAL   Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, New\n+            Haven, CT, USA\n+FEATURES             Location/Qualifiers\n+     source          1..5028\n+                     /organism="Saccharomyces cerevisiae"\n+                     /db_xref="taxon:4932"\n+                     /chromosome="IX"\n+                     /map="9"\n+     CDS             <1..206\n+                     /codon_start=3\n+                     /product="TCP1-beta"\n+                     /protein_id="AAA98665.1"\n+                     /db_xref="GI:1293614"\n+                     /translation="SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEA\n+                     AEVLLRVDNIIRARPRTANRQHM"\n+     gene            687..3158\n+                     /gene="AXL2"\n+     CDS             687..3158\n+                     /gene="AXL2"\n+                     /note="plasma membrane glycoprotein"\n+                     /codon_start=1\n+                     /function="required for axial budding pattern of S.\n+                     cerevisiae"\n+                     /product="Axl2p"\n+                     /protein_id="AAA98666.1"\n+                     /db_xref="GI:1293615"\n+                     /translation="MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESF\n+                     TFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFN\n+                     VILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNE\n+                     VFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPE\n+                     TSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYV\n+                     YLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYG\n+                     DVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQ\n+                     DHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSA\n+                     NATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIA\n+                     CGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLN\n+                     NPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQ\n+                     SQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDS\n+                     YGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTK\n+                     HRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRL\n+                     VDFSNKSNVNVGQVKDIHGRIPEML"\n+     gene            complement(3300..4037)\n+                     /gene="REV7"\n+     CDS             complement(3300..4037)\n+                     /gene="REV7"\n+                     /codon_start=1\n+                     /product="Rev7p"\n+                     /protein_id="AAA98667.1"\n+                     /db_xref="GI:1293616"\n+                     /translation="MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQ\n+                     FVPINRHPALIDYI'..b'cca\n+     1921 agaatttcga caagctttca ttaggtttga aagcgaacca aggttcacaa tctcaagagc\n+     1981 tatattttaa catcattggc atggattcaa agataactca ctcaaaccac agtgcgaatg\n+     2041 caacgtccac aagaagttct caccactcca cctcaacaag ttcttacaca tcttctactt\n+     2101 acactgcaaa aatttcttct acctccgctg ctgctacttc ttctgctcca gcagcgctgc\n+     2161 cagcagccaa taaaacttca tctcacaata aaaaagcagt agcaattgcg tgcggtgttg\n+     2221 ctatcccatt aggcgttatc ctagtagctc tcatttgctt cctaatattc tggagacgca\n+     2281 gaagggaaaa tccagacgat gaaaacttac cgcatgctat tagtggacct gatttgaata\n+     2341 atcctgcaaa taaaccaaat caagaaaacg ctacaccttt gaacaacccc tttgatgatg\n+     2401 atgcttcctc gtacgatgat acttcaatag caagaagatt ggctgctttg aacactttga\n+     2461 aattggataa ccactctgcc actgaatctg atatttccag cgtggatgaa aagagagatt\n+     2521 ctctatcagg tatgaataca tacaatgatc agttccaatc ccaaagtaaa gaagaattat\n+     2581 tagcaaaacc cccagtacag cctccagaga gcccgttctt tgacccacag aataggtctt\n+     2641 cttctgtgta tatggatagt gaaccagcag taaataaatc ctggcgatat actggcaacc\n+     2701 tgtcaccagt ctctgatatt gtcagagaca gttacggatc acaaaaaact gttgatacag\n+     2761 aaaaactttt cgatttagaa gcaccagaga aggaaaaacg tacgtcaagg gatgtcacta\n+     2821 tgtcttcact ggacccttgg aacagcaata ttagcccttc tcccgtaaga aaatcagtaa\n+     2881 caccatcacc atataacgta acgaagcatc gtaaccgcca cttacaaaat attcaagact\n+     2941 ctcaaagcgg taaaaacgga atcactccca caacaatgtc aacttcatct tctgacgatt\n+     3001 ttgttccggt taaagatggt gaaaattttt gctgggtcca tagcatggaa ccagacagaa\n+     3061 gaccaagtaa gaaaaggtta gtagattttt caaataagag taatgtcaat gttggtcaag\n+     3121 ttaaggacat tcacggacgc atcccagaaa tgctgtgatt atacgcaacg atattttgct\n+     3181 taattttatt ttcctgtttt attttttatt agtggtttac agatacccta tattttattt\n+     3241 agtttttata cttagagaca tttaatttta attccattct tcaaatttca tttttgcact\n+     3301 taaaacaaag atccaaaaat gctctcgccc tcttcatatt gagaatacac tccattcaaa\n+     3361 attttgtcgt caccgctgat taatttttca ctaaactgat gaataatcaa aggccccacg\n+     3421 tcagaaccga ctaaagaagt gagttttatt ttaggaggtt gaaaaccatt attgtctggt\n+     3481 aaattttcat cttcttgaca tttaacccag tttgaatccc tttcaatttc tgctttttcc\n+     3541 tccaaactat cgaccctcct gtttctgtcc aacttatgtc ctagttccaa ttcgatcgca\n+     3601 ttaataactg cttcaaatgt tattgtgtca tcgttgactt taggtaattt ctccaaatgc\n+     3661 ataatcaaac tatttaagga agatcggaat tcgtcgaaca cttcagtttc cgtaatgatc\n+     3721 tgatcgtctt tatccacatg ttgtaattca ctaaaatcta aaacgtattt ttcaatgcat\n+     3781 aaatcgttct ttttattaat aatgcagatg gaaaatctgt aaacgtgcgt taatttagaa\n+     3841 agaacatcca gtataagttc ttctatatag tcaattaaag caggatgcct attaatggga\n+     3901 acgaactgcg gcaagttgaa tgactggtaa gtagtgtagt cgaatgactg aggtgggtat\n+     3961 acatttctat aaaataaaat caaattaatg tagcatttta agtataccct cagccacttc\n+     4021 tctacccatc tattcataaa gctgacgcaa cgattactat tttttttttc ttcttggatc\n+     4081 tcagtcgtcg caaaaacgta taccttcttt ttccgacctt ttttttagct ttctggaaaa\n+     4141 gtttatatta gttaaacagg gtctagtctt agtgtgaaag ctagtggttt cgattgactg\n+     4201 atattaagaa agtggaaatt aaattagtag tgtagacgta tatgcatatg tatttctcgc\n+     4261 ctgtttatgt ttctacgtac ttttgattta tagcaagggg aaaagaaata catactattt\n+     4321 tttggtaaag gtgaaagcat aatgtaaaag ctagaataaa atggacgaaa taaagagagg\n+     4381 cttagttcat cttttttcca aaaagcaccc aatgataata actaaaatga aaaggatttg\n+     4441 ccatctgtca gcaacatcag ttgtgtgagc aataataaaa tcatcacctc cgttgccttt\n+     4501 agcgcgtttg tcgtttgtat cttccgtaat tttagtctta tcaatgggaa tcataaattt\n+     4561 tccaatgaat tagcaatttc gtccaattct ttttgagctt cttcatattt gctttggaat\n+     4621 tcttcgcact tcttttccca ttcatctctt tcttcttcca aagcaacgat ccttctaccc\n+     4681 atttgctcag agttcaaatc ggcctctttc agtttatcca ttgcttcctt cagtttggct\n+     4741 tcactgtctt ctagctgttg ttctagatcc tggtttttct tggtgtagtt ctcattatta\n+     4801 gatctcaagt tattggagtc ttcagccaat tgctttgtat cagacaattg actctctaac\n+     4861 ttctccactt cactgtcgag ttgctcgttt ttagcggaca aagatttaat ctcgttttct\n+     4921 ttttcagtgt tagattgctc taattctttg agctgttctc tcagctcctc atatttttct\n+     4981 tgccatgact cagattctaa ttttaagcta ttcaatttct ctttgatc\n+//\n'
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/s_cerevisiae_SCU49845.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/test-data/s_cerevisiae_SCU49845.gff3 Wed Jun 11 16:29:25 2014 -0400
b
@@ -0,0 +1,8 @@
+IX gbk_to_gff gene 687 3158 . + . ID=AXL2;Name=AXL2
+IX gbk_to_gff . 687 3158 . + . ID=Transcript:AXL2;Parent=AXL2
+IX gbk_to_gff CDS 687 3158 . + . Parent=Transcript:AXL2
+IX gbk_to_gff exon 687 3158 . + . Parent=Transcript:AXL2
+IX gbk_to_gff gene 3300 4037 . - . ID=REV7;Name=REV7
+IX gbk_to_gff . 3300 4037 . - . ID=Transcript:REV7;Parent=REV7
+IX gbk_to_gff CDS 3300 4037 . - . Parent=Transcript:REV7
+IX gbk_to_gff exon 3300 4037 . - . Parent=Transcript:REV7
b
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/tool_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFtools-GX/tool_conf.xml.sample Wed Jun 11 16:29:25 2014 -0400
b
@@ -0,0 +1,7 @@
+<section name="GFFtools" id="gfftools.web">
+    <tool file="GFFtools-GX/gff_to_bed.xml"/>
+    <tool file="GFFtools-GX/bed_to_gff.xml"/>
+    <tool file="GFFtools-GX/gbk_to_gff.xml"/>
+    <tool file="GFFtools-GX/gff_to_gtf.xml"/>
+    <tool file="GFFtools-GX/gtf_to_gff.xml"/>
+</section>