Previous changeset 2:db3c67b03d55 (2014-06-10) Next changeset 4:619e0fcd9126 (2014-06-11) |
Commit message:
Uploaded version 2.0.0 of gfftools ready to import to local instance |
added:
GFFtools-GX/GFFParser.py GFFtools-GX/README GFFtools-GX/bed_to_gff.py GFFtools-GX/bed_to_gff.xml GFFtools-GX/gbk_to_gff.py GFFtools-GX/gbk_to_gff.xml GFFtools-GX/gff_to_bed.py GFFtools-GX/gff_to_bed.xml GFFtools-GX/gff_to_gtf.py GFFtools-GX/gff_to_gtf.xml GFFtools-GX/gtf_to_gff.py GFFtools-GX/gtf_to_gff.xml GFFtools-GX/helper.py GFFtools-GX/test-data/AceView_gff3_to_gtf.gtf GFFtools-GX/test-data/AceView_ncbi_37.gff3 GFFtools-GX/test-data/AceView_ncbi_37.gtf GFFtools-GX/test-data/Aly_JGI.bed GFFtools-GX/test-data/Aly_JGI.gff3 GFFtools-GX/test-data/ENSEMBL_mm9.gff3 GFFtools-GX/test-data/ENSEMBL_mm9.gtf GFFtools-GX/test-data/ENSEMBL_mm9_gff3_to_gtf.gtf GFFtools-GX/test-data/JGI_genes.gff3 GFFtools-GX/test-data/JGI_genes.gtf GFFtools-GX/test-data/MB7_3R.bed GFFtools-GX/test-data/MB7_3R.gff3 GFFtools-GX/test-data/UCSC_transcripts.gff3 GFFtools-GX/test-data/UCSC_transcripts.gtf GFFtools-GX/test-data/ccds_genes.bed GFFtools-GX/test-data/ccds_genes.gff3 GFFtools-GX/test-data/hs_2009.bed GFFtools-GX/test-data/hs_2009.gff3 GFFtools-GX/test-data/s_cerevisiae_SCU49845.gbk GFFtools-GX/test-data/s_cerevisiae_SCU49845.gff3 GFFtools-GX/tool_conf.xml.sample |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/GFFParser.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/GFFParser.py Wed Jun 11 16:29:25 2014 -0400 |
[ |
b'@@ -0,0 +1,491 @@\n+#!/usr/bin/env python\n+"""\n+Extract genome annotation from a GFF (a tab delimited format for storing sequence features and annotations) file.\n+\n+Requirements: \n+ Numpy :- http://numpy.org/ \n+ Scipy :- http://scipy.org/ \n+\n+Copyright (C)\t\n+\n+2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany. \n+2012-2014 Memorial Sloan Kettering Cancer Center, New York City, USA.\n+"""\n+\n+import re\n+import os\n+import sys\n+import urllib\n+import numpy as np\n+import scipy.io as sio\n+from collections import defaultdict\n+import helper as utils \n+\n+def attribute_tags(col9):\n+ """ \n+ Split the key-value tags from the attribute column, it takes column number 9 from GTF/GFF file \n+\n+ @args col9: attribute column from GFF file \n+ @type col9: str\n+ """\n+ info = defaultdict(list)\n+ is_gff = False\n+ \n+ if not col9:\n+ return is_gff, info\n+ \n+ # trim the line ending semi-colon ucsc may have some white-space \n+ col9 = col9.rstrip(\';| \')\n+ # attributes from 9th column \n+ atbs = col9.split(" ; ")\n+ if len(atbs) == 1:\n+ atbs = col9.split("; ")\n+ if len(atbs) == 1:\n+ atbs = col9.split(";")\n+ # check the GFF3 pattern which has key value pairs like:\n+ gff3_pat = re.compile("\\w+=")\n+ # sometime GTF have: gene_id uc002zkg.1;\n+ gtf_pat = re.compile("\\s?\\w+\\s")\n+\n+ key_vals = []\n+\n+ if gff3_pat.match(atbs[0]): # gff3 pattern \n+ is_gff = True\n+ key_vals = [at.split(\'=\') for at in atbs]\n+ elif gtf_pat.match(atbs[0]): # gtf pattern\n+ for at in atbs:\n+ key_vals.append(at.strip().split(" ",1))\n+ else:\n+ # to handle attribute column has only single value \n+ key_vals.append([\'ID\', atbs[0]])\n+ # get key, val items \n+ for item in key_vals:\n+ key, val = item\n+ # replace the double qoutes from feature identifier \n+ val = re.sub(\'"\', \'\', val)\n+ # replace the web formating place holders to plain text format \n+ info[key].extend([urllib.unquote(v) for v in val.split(\',\') if v])\n+\n+ return is_gff, info\n+ \n+def spec_features_keywd(gff_parts):\n+ """\n+ Specify the feature key word according to the GFF specifications\n+\n+ @args gff_parts: attribute field key \n+ @type gff_parts: str \n+ """\n+ for t_id in ["transcript_id", "transcriptId", "proteinId"]:\n+ try:\n+ gff_parts["info"]["Parent"] = gff_parts["info"][t_id]\n+ break\n+ except KeyError:\n+ pass\n+ for g_id in ["gene_id", "geneid", "geneId", "name", "gene_name", "genename"]:\n+ try:\n+ gff_parts["info"]["GParent"] = gff_parts["info"][g_id]\n+ break\n+ except KeyError:\n+ pass\n+ ## TODO key words\n+ for flat_name in ["Transcript", "CDS"]:\n+ if gff_parts["info"].has_key(flat_name):\n+ # parents\n+ if gff_parts[\'type\'] in [flat_name] or re.search(r\'transcript\', gff_parts[\'type\'], re.IGNORECASE):\n+ if not gff_parts[\'id\']:\n+ gff_parts[\'id\'] = gff_parts[\'info\'][flat_name][0]\n+ #gff_parts["info"]["ID"] = [gff_parts["id"]]\n+ # children \n+ elif gff_parts["type"] in ["intron", "exon", "three_prime_UTR",\n+ "coding_exon", "five_prime_UTR", "CDS", "stop_codon",\n+ "start_codon"]:\n+ gff_parts["info"]["Parent"] = gff_parts["info"][flat_name]\n+ break\n+ return gff_parts\n+\n+def Parse(ga_file):\n+ """\n+ Parsing GFF/GTF file based on feature relationship, it takes the input file.\n+\n+ @args ga_file: input file name \n+ @type ga_file: str \n+ """\n+ child_map = defaultdict(list)\n+ parent_map = dict()\n+\n+ ga_handle = utils.open_file(ga_file)\n+\n+ for rec in ga_handle:\n+ rec = rec.strip(\'\\n\\r\')\n+\n+ # skip empty line fasta identifier and commented line\n+ if not rec or rec[0'..b' gene[g_cnt][\'polya_conf\'] = []\n+ gene[g_cnt][\'is_valid\'] = []\n+ gene[g_cnt][\'transcript_complete\'] = []\n+ gene[g_cnt][\'is_complete\'] = []\n+ gene[g_cnt][\'is_correctly_gff3_referenced\'] = \'\'\n+ gene[g_cnt][\'splicegraph\'] = []\n+ g_cnt += 1 \n+\n+ ## deleting empty gene records from the main array\n+ XPFLG=0\n+ for XP, ens in enumerate(gene):\n+ if ens[0]==0:\n+ XPFLG=1\n+ break\n+ \n+ if XPFLG==1:\n+ XQC = range(XP, len(gene)+1)\n+ gene = np.delete(gene, XQC)\n+\n+ return gene \n+\n+def NonetoemptyList(XS):\n+ """\n+ Convert a None type to empty list \n+\n+ @args XS: None type \n+ @type XS: str \n+ """\n+ return [] if XS is None else XS \n+\n+def create_missing_feature_type(p_feat, c_feat):\n+ """\n+ GFF/GTF file defines only child features. This function tries to create \n+ the parent feature from the information provided in the attribute column. \n+\n+ example: \n+ chr21 hg19_knownGene exon 9690071 9690100 0.000000 + . gene_id "uc002zkg.1"; transcript_id "uc002zkg.1"; \n+ chr21 hg19_knownGene exon 9692178 9692207 0.000000 + . gene_id "uc021wgt.1"; transcript_id "uc021wgt.1"; \n+ chr21 hg19_knownGene exon 9711935 9712038 0.000000 + . gene_id "uc011abu.2"; transcript_id "uc011abu.2"; \n+\n+ This function gets the parsed feature annotations. \n+ \n+ @args p_feat: Parent feature map \n+ @type p_feat: collections defaultdict\n+ @args c_feat: Child feature map \n+ @type c_feat: collections defaultdict\n+ """\n+\n+ child_n_map = defaultdict(list)\n+ for fid, det in c_feat.items():\n+ # get the details from grand child \n+ GID = STRD = SCR = None\n+ SPOS, EPOS = [], [] \n+ TYP = dict()\n+ for gchild in det:\n+ GID = gchild.get(\'gene_id\', [\'\'])[0] \n+ SPOS.append(gchild.get(\'location\', [])[0]) \n+ EPOS.append(gchild.get(\'location\', [])[1]) \n+ STRD = gchild.get(\'strand\', \'\')\n+ SCR = gchild.get(\'score\', \'\')\n+ TYP[gchild.get(\'type\', \'\')] = 1\n+ SPOS.sort() \n+ EPOS.sort()\n+ \n+ # infer transcript type\n+ transcript_type = \'transcript\'\n+ transcript_type = \'mRNA\' if TYP.get(\'CDS\', \'\') or TYP.get(\'cds\', \'\') else transcript_type\n+ \n+ # gene id and transcript id are same\n+ transcript_id = fid[-1]\n+ if GID == transcript_id:\n+ transcript_id = \'Transcript:\' + str(GID)\n+ \n+ # level -1 feature type \n+ p_feat[(fid[0], fid[1], GID)] = dict( type = \'gene\',\n+ location = [], ## infer location based on multiple transcripts \n+ strand = STRD,\n+ name = GID )\n+ # level -2 feature type \n+ child_n_map[(fid[0], fid[1], GID)].append(\n+ dict( type = transcript_type,\n+ location = [SPOS[0], EPOS[-1]], \n+ strand = STRD, \n+ score = SCR, \n+ ID = transcript_id,\n+ gene_id = \'\' ))\n+ # reorganizing the grand child\n+ for gchild in det:\n+ child_n_map[(fid[0], fid[1], transcript_id)].append(\n+ dict( type = gchild.get(\'type\', \'\'),\n+ location = gchild.get(\'location\'),\n+ strand = gchild.get(\'strand\'), \n+ ID = gchild.get(\'ID\'),\n+ score = gchild.get(\'score\'),\n+ gene_id = \'\' ))\n+ return p_feat, child_n_map \n+\n' |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/README Wed Jun 11 16:29:25 2014 -0400 |
[ |
@@ -0,0 +1,59 @@ +A collection of tools for converting genome annotation between GTF (Gene Transfer Format), +BED (Browser Extensible Data) and GFF (Generic Feature Format). + +INTRODUCTION + +Several genome annotation centers provide their data in GTF, BED, GFF3 etc. I have few programs +they mainly deals with converting between GTF, BED and GFF3 formats. They are extensively tested +with files from different centers like ENSEMBL, UCSC, JGI and NCBI AceView. Please follow the +instructions below to clone these tools into your galaxy instance. + +CONTENTS + +Tool configuration files in *.xml format. + + gtf_to_gff.xml + gff_to_gtf.xml + bed_to_gff.xml + gff_to_bed.xml + gbk_to_gff.xml + +Python based scripts. + + gtf_to_gff.py: convert data from GTF to valid GFF3. + gff_to_gtf.py: convert data from GFF3 to GTF. + bed_to_gff.py: convert data from a 12 column UCSC wiggle BED format to GFF3. + gff_to_bed.py: convert gene transcript annotation from GFF3 to UCSC wiggle 12 column BED format. + gbk_to_gff.py: convert data from genbank format to GFF. + GFFParser.py: Parse GFF/GTF files. + helper.py: Utility functions. + +test-data: Test data set. (move to your galaxy_root_folder/test-data/) + + You may need to move the test files into your test-data directory so galaxy can find them. + If you want to run the functional tests eg as: + + exmaple: + sh run_functional_tests.sh -id fml_gtf2gff + +REQUIREMENTS + + python + +COMMENTS/QUESTIONS + +I can be reached at vipin [at] cbio.mskcc.org + +LICENSE + +Copyright (C) 2009-2012 Friedrich Miescher Laboratory of the Max Planck Society + 2013-2014 Memorial Sloan Kettering Cancer Center + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +(at your option) any later version. + +COURTESY + +To the Galaxy Team. |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/bed_to_gff.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/bed_to_gff.py Wed Jun 11 16:29:25 2014 -0400 |
[ |
@@ -0,0 +1,70 @@ +#!/usr/bin/env python +""" +Convert genome annotation data in a 12 column BED format to GFF3. + +Usage: python bed_to_gff.py in.bed > out.gff + +Requirement: + helper.py : https://github.com/vipints/GFFtools-GX/blob/master/helper.py + +Copyright (C) + 2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany. + 2012-2014 Memorial Sloan Kettering Cancer Center New York City, USA. +""" + +import re +import sys +import helper + +def __main__(): + """ + main function + """ + + try: + bed_fname = sys.argv[1] + except: + print __doc__ + sys.exit(-1) + + bed_fh = helper.open_file(bed_fname) + + for line in bed_fh: + line = line.strip( '\n\r' ) + + if not line or line[0] in ['#']: + continue + + parts = line.split('\t') + assert len(parts) >= 12, line + + rstarts = parts[-1].split(',') + rstarts.pop() if rstarts[-1] == '' else rstarts + + exon_lens = parts[-2].split(',') + exon_lens.pop() if exon_lens[-1] == '' else exon_lens + + if len(rstarts) != len(exon_lens): + continue # checking the consistency col 11 and col 12 + + if len(rstarts) != int(parts[-3]): + continue # checking the number of exons and block count are same + + if not parts[5] in ['+', '-']: + parts[5] = '.' # replace the unknown strand with '.' + + # bed2gff result line + print '%s\tbed2gff\tgene\t%d\t%s\t%s\t%s\t.\tID=Gene:%s;Name=Gene:%s' % (parts[0], int(parts[1])+1, parts[2], parts[4], parts[5], parts[3], parts[3]) + print '%s\tbed2gff\ttranscript\t%d\t%s\t%s\t%s\t.\tID=%s;Name=%s;Parent=Gene:%s' % (parts[0], int(parts[1])+1, parts[2], parts[4], parts[5], parts[3], parts[3], parts[3]) + + st = int(parts[1]) + for ex_cnt in range(int(parts[-3])): + start = st + int(rstarts[ex_cnt]) + 1 + stop = start + int(exon_lens[ex_cnt]) - 1 + print '%s\tbed2gff\texon\t%d\t%d\t%s\t%s\t.\tParent=%s' % (parts[0], start, stop, parts[4], parts[5], parts[3]) + + bed_fh.close() + + +if __name__ == "__main__": + __main__() |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/bed_to_gff.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/bed_to_gff.xml Wed Jun 11 16:29:25 2014 -0400 |
b |
@@ -0,0 +1,89 @@ +<tool id="fml_bed2gff" name="BED-to-GFF" version="2.0.0"> + <description>converter</description> + <command interpreter="python">bed_to_gff.py $inf_bed > $gff_format + </command> + <inputs> + <param format="bed" name="inf_bed" type="data" label="Convert this query" help="Provide genome annotation in 12 column BED format."/> + </inputs> + <outputs> + <data format="gff3" name="gff_format" label="${tool.name} on ${on_string}: Converted" /> + </outputs> + <tests> + <test> + <param name="inf_bed" value="ccds_genes.bed" /> + <output name="gff_format" file="ccds_genes.gff3" /> + </test> + <test> + <param name="inf_bed" value="hs_2009.bed" /> + <output name="gff_format" file="hs_2009.gff3" /> + </test> + </tests> + <help> + +**What it does** + +This tool converts data from a 12 column UCSC wiggle BED format to GFF3 (scroll down for format description). + +-------- + +**Example** + +- The following data in UCSC Wiggle BED format:: + + chr1 11873 14409 uc001aaa.3 0 + 11873 11873 0 3 354,109,1189, 0,739,1347, + +- Will be converted to GFF3:: + + ##gff-version 3 + chr1 bed2gff gene 11874 14409 0 + . ID=Gene:uc001aaa.3;Name=Gene:uc001aaa.3 + chr1 bed2gff transcript 11874 14409 0 + . ID=uc001aaa.3;Name=uc001aaa.3;Parent=Gene:uc001aaa.3 + chr1 bed2gff exon 11874 12227 0 + . Parent=uc001aaa.3 + chr1 bed2gff exon 12613 12721 0 + . Parent=uc001aaa.3 + chr1 bed2gff exon 13221 14409 0 + . Parent=uc001aaa.3 + +-------- + +**About formats** + +**BED format** Browser Extensible Data format was designed at UCSC for displaying data tracks in the Genome Browser. It has three required fields and several additional optional ones: + +The first three BED fields (required) are:: + + 1. chrom - The name of the chromosome (e.g. chr1, chrY_random). + 2. chromStart - The starting position in the chromosome. (The first base in a chromosome is numbered 0.) + 3. chromEnd - The ending position in the chromosome, plus 1 (i.e., a half-open interval). + +The additional BED fields (optional) are:: + + 4. name - The name of the BED line. + 5. score - A score between 0 and 1000. + 6. strand - Defines the strand - either '+' or '-'. + 7. thickStart - The starting position where the feature is drawn thickly at the Genome Browser. + 8. thickEnd - The ending position where the feature is drawn thickly at the Genome Browser. + 9. reserved - This should always be set to zero. + 10. blockCount - The number of blocks (exons) in the BED line. + 11. blockSizes - A comma-separated list of the block sizes. The number of items in this list should correspond to blockCount. + 12. blockStarts - A comma-separated list of block starts. All of the blockStart positions should be calculated relative to chromStart. The number of items in this list should correspond to blockCount. + +**GFF3 format** General Feature Format is a format for describing genes and other features associated with DNA, RNA and Protein sequences. GFF3 lines have nine tab-separated fields:: + + 1. seqid - Must be a chromosome or scaffold or contig. + 2. source - The program that generated this feature. + 3. type - The name of this type of feature. Some examples of standard feature types are "gene", "CDS", "protein", "mRNA", and "exon". + 4. start - The starting position of the feature in the sequence. The first base is numbered 1. + 5. stop - The ending position of the feature (inclusive). + 6. score - A score between 0 and 1000. If there is no score value, enter ".". + 7. strand - Valid entries include '+', '-', or '.' (for don't know/care). + 8. phase - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. If the feature is not a coding exon, the value should be '.'. + 9. attributes - All lines with the same group are linked together into a single item. + +-------- + +**Copyright** + +2009-2014 Max Planck Society, University of Tübingen & Memorial Sloan Kettering Cancer Center + +Sreedharan VT, Schultheiss SJ, Jean G, Kahles A, Bohnert R, Drewe P, Mudrakarta P, Görnitz N, Zeller G, Rätsch G. Oqtans: the RNA-seq workbench in the cloud for complete and reproducible quantitative transcriptome analysis. Bioinformatics 10.1093/bioinformatics/btt731 (2014) + + </help> +</tool> |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/gbk_to_gff.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/gbk_to_gff.py Wed Jun 11 16:29:25 2014 -0400 |
[ |
@@ -0,0 +1,213 @@ +#!/usr/bin/env python +""" +Convert data from Genbank format to GFF. + +Usage: +python gbk_to_gff.py in.gbk > out.gff + +Requirements: + BioPython:- http://biopython.org/ + helper.py : https://github.com/vipints/GFFtools-GX/blob/master/helper.py + +Copyright (C) + 2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany. + 2012-2014 Memorial Sloan Kettering Cancer Center New York City, USA. +""" + +import os +import re +import sys +import collections +from Bio import SeqIO +import helper + +def feature_table(chr_id, source, orient, genes, transcripts, cds, exons, unk): + """ + Write the feature information + """ + + for gname, ginfo in genes.items(): + line = [str(chr_id), + 'gbk_to_gff', + ginfo[3], + str(ginfo[0]), + str(ginfo[1]), + '.', + ginfo[2], + '.', + 'ID=%s;Name=%s' % (str(gname), str(gname))] + print '\t'.join(line) + ## construct the transcript line is not defined in the original file + t_line = [str(chr_id), 'gbk_to_gff', source, 0, 1, '.', ginfo[2], '.'] + + if not transcripts: + t_line.append('ID=Transcript:%s;Parent=%s' % (str(gname), str(gname))) + + if exons: ## get the entire transcript region from the defined feature + t_line[3] = str(exons[gname][0][0]) + t_line[4] = str(exons[gname][0][-1]) + elif cds: + t_line[3] = str(cds[gname][0][0]) + t_line[4] = str(cds[gname][0][-1]) + print '\t'.join(t_line) + + if exons: + exon_line_print(t_line, exons[gname], 'Transcript:'+str(gname), 'exon') + + if cds: + exon_line_print(t_line, cds[gname], 'Transcript:'+str(gname), 'CDS') + if not exons: + exon_line_print(t_line, cds[gname], 'Transcript:'+str(gname), 'exon') + + else: ## transcript is defined + for idx in transcripts[gname]: + t_line[2] = idx[3] + t_line[3] = str(idx[0]) + t_line[4] = str(idx[1]) + t_line.append('ID='+str(idx[2])+';Parent='+str(gname)) + print '\t'.join(t_line) + + ## feature line print call + if exons: + exon_line_print(t_line, exons[gname], str(idx[2]), 'exon') + if cds: + exon_line_print(t_line, cds[gname], str(idx[2]), 'CDS') + if not exons: + exon_line_print(t_line, cds[gname], str(idx[2]), 'exon') + + if len(genes) == 0: ## feature entry with fragment information + + line = [str(chr_id), 'gbk_to_gff', source, 0, 1, '.', orient, '.'] + fStart = fStop = None + + for eid, ex in cds.items(): + fStart = ex[0][0] + fStop = ex[0][-1] + + for eid, ex in exons.items(): + fStart = ex[0][0] + fStop = ex[0][-1] + + if fStart or fStart: + + line[2] = 'gene' + line[3] = str(fStart) + line[4] = str(fStop) + line.append('ID=Unknown_Gene_' + str(unk) + ';Name=Unknown_Gene_' + str(unk)) + print "\t".join(line) + + if not cds: + line[2] = 'transcript' + else: + line[2] = 'mRNA' + + line[8] = 'ID=Unknown_Transcript_' + str(unk) + ';Parent=Unknown_Gene_' + str(unk) + print "\t".join(line) + + if exons: + exon_line_print(line, cds[None], 'Unknown_Transcript_' + str(unk), 'exon') + + if cds: + exon_line_print(line, cds[None], 'Unknown_Transcript_' + str(unk), 'CDS') + if not exons: + exon_line_print(line, cds[None], 'Unknown_Transcript_' + str(unk), 'exon') + + unk +=1 + + return unk + +def exon_line_print(temp_line, trx_exons, parent, ftype): + """ + Print the EXON feature line + """ + + for ex in trx_exons: + temp_line[2] = ftype + temp_line[3] = str(ex[0]) + temp_line[4] = str(ex[1]) + temp_line[8] = 'Parent=%s' % parent + print '\t'.join(temp_line) + +def gbk_parse(fname): + """ + Extract genome annotation recods from genbank format + + @args fname: gbk file name + @type fname: str + """ + + fhand = helper.open_file(gbkfname) + unk = 1 + + for record in SeqIO.parse(fhand, "genbank"): + + gene_tags = dict() + tx_tags = collections.defaultdict(list) + exon = collections.defaultdict(list) + cds = collections.defaultdict(list) + mol_type, chr_id = None, None + + for rec in record.features: + + if rec.type == 'source': + try: + mol_type = rec.qualifiers['mol_type'][0] + except: + mol_type = '.' + pass + try: + chr_id = rec.qualifiers['chromosome'][0] + except: + chr_id = record.name + continue + + strand='-' + strand='+' if rec.strand>0 else strand + + fid = None + try: + fid = rec.qualifiers['gene'][0] + except: + pass + + transcript_id = None + try: + transcript_id = rec.qualifiers['transcript_id'][0] + except: + pass + + if re.search(r'gene', rec.type): + gene_tags[fid] = (rec.location._start.position+1, + rec.location._end.position, + strand, + rec.type + ) + elif rec.type == 'exon': + exon[fid].append((rec.location._start.position+1, + rec.location._end.position)) + elif rec.type=='CDS': + cds[fid].append((rec.location._start.position+1, + rec.location._end.position)) + else: + # get all transcripts + if transcript_id: + tx_tags[fid].append((rec.location._start.position+1, + rec.location._end.position, + transcript_id, + rec.type)) + # record extracted, generate feature table + unk = feature_table(chr_id, mol_type, strand, gene_tags, tx_tags, cds, exon, unk) + + fhand.close() + + +if __name__=='__main__': + + try: + gbkfname = sys.argv[1] + except: + print __doc__ + sys.exit(-1) + + ## extract gbk records + gbk_parse(gbkfname) |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/gbk_to_gff.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/gbk_to_gff.xml Wed Jun 11 16:29:25 2014 -0400 |
b |
@@ -0,0 +1,91 @@ +<tool id="fml_gbk2gff" name="GBK-to-GFF" version="2.0.0"> + <description>converter</description> + <command interpreter="python">gbk_to_gff.py $inf_gbk > $gff_format + </command> + <inputs> + <param format="gb,gbk,genbank,txt" name="inf_gbk" type="data" label="Convert this query" help="GenBank flat file format consists of an annotation section and a sequence section."/> + </inputs> + <outputs> + <data format="gff3" name="gff_format" label="${tool.name} on ${on_string}: Converted"/> + </outputs> + <tests> + <test> + <param name="inf_gbk" value="s_cerevisiae_SCU49845.gbk" /> + <output name="gff_format" file="s_cerevisiae_SCU49845.gff3" /> + </test> + </tests> + <help> + +**What it does** + +This tool converts data from a GenBank_ flat file format to GFF (scroll down for format description). + +.. _GenBank: http://www.ncbi.nlm.nih.gov/genbank/ + +------ + +**Example** + +- The following data in GenBank format:: + + LOCUS NM_001202705 2406 bp mRNA linear PLN 28-MAY-2011 + DEFINITION Arabidopsis thaliana thiamine biosynthesis protein ThiC (THIC) + mRNA, complete cds. + ACCESSION NM_001202705 + VERSION NM_001202705.1 GI:334184566......... + FEATURES Location/Qualifiers + source 1..2406 + /organism="Arabidopsis thaliana" + /mol_type="mRNA" + /db_xref="taxon:3702"........ + gene 1..2406 + /gene="THIC" + /locus_tag="AT2G29630" + /gene_synonym="PY; PYRIMIDINE REQUIRING; T27A16.27;........ + ORIGIN + 1 aagcctttcg ctttaggctg cattgggccg tgacaatatt cagacgattc aggaggttcg + 61 ttcctttttt aaaggaccct aatcactctg agtaccactg actcactcag tgtgcgcgat + 121 tcatttcaaa aacgagccag cctcttcttc cttcgtctac tagatcagat ccaaagcttc + 181 ctcttccagc tatggctgct tcagtacact gtaccttgat gtccgtcgta tgcaacaaca + // + + +- Will be converted to GFF3:: + + ##gff-version 3 + NM_001202705 gbk_to_gff chromosome 1 2406 . + 1 ID=NM_001202705;Alias=2;Dbxref=taxon:3702;Name=NM_001202705 + NM_001202705 gbk_to_gff gene 1 2406 . + 1 ID=AT2G29630;Dbxref=GeneID:817513,TAIR:AT2G29630;Name=THIC + NM_001202705 gbk_to_gff mRNA 192 2126 . + 1 ID=AT2G29630.t01;Parent=AT2G29630 + NM_001202705 gbk_to_gff CDS 192 2126 . + 1 ID=AT2G29630.p01;Parent=AT2G29630.t01 + NM_001202705 gbk_to_gff exon 192 2126 . + 1 Parent=AT2G29630.t01 + +------ + +**About formats** + +**GenBank format** An example of a GenBank record may be viewed here_ + +.. _here: http://www.ncbi.nlm.nih.gov/Sitemap/samplerecord.html + +**GFF3** Generic Feature Format is a format for describing genes and other features associated with DNA, RNA and Protein sequences. GFF3 lines have nine tab-separated fields:: + + 1. seqid - Must be a chromosome or scaffold or contig. + 2. source - The program that generated this feature. + 3. type - The name of this type of feature. Some examples of standard feature types are "gene", "CDS", "protein", "mRNA", and "exon". + 4. start - The starting position of the feature in the sequence. The first base is numbered 1. + 5. stop - The ending position of the feature (inclusive). + 6. score - A score between 0 and 1000. If there is no score value, enter ".". + 7. strand - Valid entries include '+', '-', or '.' (for don't know/care). + 8. phase - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. If the feature is not a coding exon, the value should be '.'. + 9. attributes - All lines with the same group are linked together into a single item. + +-------- + +**Copyright** + +2009-2014 Max Planck Society, University of Tübingen & Memorial Sloan Kettering Cancer Center + +Sreedharan VT, Schultheiss SJ, Jean G, Kahles A, Bohnert R, Drewe P, Mudrakarta P, Görnitz N, Zeller G, Rätsch G. Oqtans: the RNA-seq workbench in the cloud for complete and reproducible quantitative transcriptome analysis. Bioinformatics 10.1093/bioinformatics/btt731 (2014) + + </help> +</tool> |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/gff_to_bed.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/gff_to_bed.py Wed Jun 11 16:29:25 2014 -0400 |
[ |
@@ -0,0 +1,73 @@ +#!/usr/bin/env python +""" +Convert genome annotation data in GFF/GTF to a 12 column BED format. +BED format typically represents the transcript models. + +Usage: python gff_to_bed.py in.gff > out.bed + +Requirement: + GFFParser.py: https://github.com/vipints/GFFtools-GX/blob/master/GFFParser.py + +Copyright (C) + 2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany. + 2012-2014 Memorial Sloan Kettering Cancer Center New York City, USA. +""" + +import re +import sys +import GFFParser + +def writeBED(tinfo): + """ + writing result files in bed format + + @args tinfo: list of genes + @args tinfo: numpy object + """ + + for ent1 in tinfo: + for idx, tid in enumerate(ent1['transcripts']): + exon_cnt = len(ent1['exons'][idx]) + exon_len = '' + exon_cod = '' + rel_start = None + rel_stop = None + for idz, ex_cod in enumerate(ent1['exons'][idx]):#check for exons of corresponding transcript + exon_len += '%d,' % (ex_cod[1]-ex_cod[0]+1) + if idz == 0: #calculate the relative start position + exon_cod += '0,' + rel_start = int(ex_cod[0]) + rel_stop = ex_cod[1] + else: + exon_cod += '%d,' % (ex_cod[0]-rel_start) + rel_stop = int(ex_cod[1]) + + if exon_len: + score = '0' + score = ent1['score'][0] if ent1['score'] else score + out_print = [ent1['chr'], + str(rel_start), + str(rel_stop), + tid[0], + score, + ent1['strand'], + str(rel_start), + str(rel_stop), + '0', + str(exon_cnt), + exon_len, + exon_cod] + print '\t'.join(out_print) + +def __main__(): + try: + query_file = sys.argv[1] + except: + print __doc__ + sys.exit(-1) + + Transcriptdb = GFFParser.Parse(query_file) + writeBED(Transcriptdb) + +if __name__ == "__main__": + __main__() |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/gff_to_bed.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/gff_to_bed.xml Wed Jun 11 16:29:25 2014 -0400 |
b |
@@ -0,0 +1,90 @@ +<tool id="fml_gff2bed" name="GFF-to-BED" version="2.0.0"> + <description>converter</description> + <command interpreter="python">gff_to_bed.py $inf_gff > $bed_format + </command> + <inputs> + <param format="gtf,gff,gff3" name="inf_gff" type="data" label="Convert this query" help="Provide genome annotation file in GFF, GTF, GFF3."/> + </inputs> + <outputs> + <data format="bed" name="bed_format" label="${tool.name} on ${on_string}: Converted" /> + </outputs> + <tests> + <test> + <param name="inf_gff" value="Aly_JGI.gff3" /> + <output name="bed_format" file="Aly_JGI.bed" /> + </test> + <test> + <param name="inf_gff" value="MB7_3R.gff3" /> + <output name="bed_format" file="MB7_3R.bed" /> + </test> + </tests> + <help> + +**What it does** + +This tool converts gene transcript annotation from GTF or GFF or GFF3 to UCSC wiggle 12 column BED format. + +-------- + +**Example** + +- The following data in GFF3:: + + ##gff-version 3 + chr1 protein_coding gene 11874 14409 0 + . ID=Gene:uc001aaa.3;Name=Gene:uc001aaa.3 + chr1 protein_coding transcript 11874 14409 0 + . ID=uc001aaa.3;Name=uc001aaa.3;Parent=Gene:uc001aaa.3 + chr1 protein_coding exon 11874 12227 0 + . Parent=uc001aaa.3 + chr1 protein_coding exon 12613 12721 0 + . Parent=uc001aaa.3 + chr1 protein_coding exon 13221 14409 0 + . Parent=uc001aaa.3 + +- Will be converted to UCSC Wiggle BED format:: + + chr1 11874 14409 uc001aaa.3 0 + 11874 14409 0 3 354,109,1189, 0,739,1347, + +-------- + +**About formats** + +**GFF3 format** General Feature Format is a format for describing genes and other features associated with DNA, RNA and Protein sequences. GFF3 lines have nine tab-separated fields:: + + + 1. seqid - Must be a chromosome or scaffold or contig. + 2. source - The program that generated this feature. + 3. type - The name of this type of feature. Some examples of standard feature types are "gene", "CDS", "protein", "mRNA", and "exon". + 4. start - The starting position of the feature in the sequence. The first base is numbered 1. + 5. stop - The ending position of the feature (inclusive). + 6. score - A score between 0 and 1000. If there is no score value, enter ".". + 7. strand - Valid entries include '+', '-', or '.' (for don't know/care). + 8. phase - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. If the feature is not a coding exon, the value should be '.'. + 9. attributes - All lines with the same group are linked together into a single item. + +**BED format** Browser Extensible Data format was designed at UCSC for displaying data tracks in the Genome Browser. It has three required fields and several additional optional ones: + +The first three BED fields (required) are:: + + 1. chrom - The name of the chromosome (e.g. chr1, chrY_random). + 2. chromStart - The starting position in the chromosome. (The first base in a chromosome is numbered 0.) + 3. chromEnd - The ending position in the chromosome, plus 1 (i.e., a half-open interval). + +The additional BED fields (optional) are:: + + 4. name - The name of the BED line. + 5. score - A score between 0 and 1000. + 6. strand - Defines the strand - either '+' or '-'. + 7. thickStart - The starting position where the feature is drawn thickly at the Genome Browser. + 8. thickEnd - The ending position where the feature is drawn thickly at the Genome Browser. + 9. reserved - This should always be set to zero. + 10. blockCount - The number of blocks (exons) in the BED line. + 11. blockSizes - A comma-separated list of the block sizes. The number of items in this list should correspond to blockCount. + 12. blockStarts - A comma-separated list of block starts. All of the blockStart positions should be calculated relative to chromStart. The number of items in this list should correspond to blockCount. + +-------- + +**Copyright** + +2009-2014 Max Planck Society, University of Tübingen & Memorial Sloan Kettering Cancer Center + +Sreedharan VT, Schultheiss SJ, Jean G, Kahles A, Bohnert R, Drewe P, Mudrakarta P, Görnitz N, Zeller G, Rätsch G. Oqtans: the RNA-seq workbench in the cloud for complete and reproducible quantitative transcriptome analysis. Bioinformatics 10.1093/bioinformatics/btt731 (2014) + + </help> +</tool> |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/gff_to_gtf.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/gff_to_gtf.py Wed Jun 11 16:29:25 2014 -0400 |
[ |
@@ -0,0 +1,76 @@ +#!/usr/bin/env python +""" +Program to convert data from GFF to GTF + +Usage: python gff_to_gtf.py in.gff > out.gtf + +Requirement: + GFFParser.py: https://github.com/vipints/GFFtools-GX/blob/master/GFFParser.py + +Copyright (C) + 2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany. + 2012-2014 Memorial Sloan Kettering Cancer Center New York City, USA. +""" + +import re +import sys +import GFFParser + +def printGTF(tinfo): + """ + writing result file in GTF format + + @args tinfo: parsed object from gff file + @type tinfo: numpy array + """ + + for ent1 in tinfo: + for idx, tid in enumerate(ent1['transcripts']): + + exons = ent1['exons'][idx] + cds_exons = ent1['cds_exons'][idx] + + stop_codon = start_codon = () + + if ent1['strand'] == '+': + if cds_exons.any(): + start_codon = (cds_exons[0][0], cds_exons[0][0]+2) + stop_codon = (cds_exons[-1][1]-2, cds_exons[-1][1]) + elif ent1['strand'] == '-': + if cds_exons.any(): + start_codon = (cds_exons[-1][1]-2, cds_exons[-1][1]) + stop_codon = (cds_exons[0][0], cds_exons[0][0]+2) + else: + print 'STRAND information missing - %s, skip the transcript - %s' % (ent1['strand'], tid[0]) + pass + + last_cds_cod = 0 + for idz, ex_cod in enumerate(exons): + + print '%s\t%s\texon\t%d\t%d\t.\t%s\t.\tgene_id "%s"; transcript_id "%s"; exon_number "%d"; gene_name "%s"; ' % (ent1['chr'], ent1['source'], ex_cod[0], ex_cod[1], ent1['strand'], ent1['name'], tid[0], idz+1, ent1['gene_info']['Name']) + + if cds_exons.any(): + try: + print '%s\t%s\tCDS\t%d\t%d\t.\t%s\t%d\tgene_id "%s"; transcript_id "%s"; exon_number "%d"; gene_name "%s"; ' % (ent1['chr'], ent1['source'], cds_exons[idz][0], cds_exons[idz][1], ent1['strand'], cds_exons[idz][2], ent1['name'], tid[0], idz+1, ent1['gene_info']['Name']) + last_cds_cod = idz + except: + pass + + if idz == 0: + print '%s\t%s\tstart_codon\t%d\t%d\t.\t%s\t%d\tgene_id "%s"; transcript_id "%s"; exon_number "%d"; gene_name "%s"; ' % (ent1['chr'], ent1['source'], start_codon[0], start_codon[1], ent1['strand'], cds_exons[idz][2], ent1['name'], tid[0], idz+1, ent1['gene_info']['Name']) + + if stop_codon: + print '%s\t%s\tstop_codon\t%d\t%d\t.\t%s\t%d\tgene_id "%s"; transcript_id "%s"; exon_number "%d"; gene_name "%s"; ' % (ent1['chr'], ent1['source'], stop_codon[0], stop_codon[1], ent1['strand'], cds_exons[last_cds_cod][2], ent1['name'], tid[0], idz+1, ent1['gene_info']['Name']) + + +if __name__ == "__main__": + + try: + gff_fname = sys.argv[1] + except: + print __doc__ + sys.exit(-1) + + Transcriptdb = GFFParser.Parse(gff_fname) + + printGTF(Transcriptdb) |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/gff_to_gtf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/gff_to_gtf.xml Wed Jun 11 16:29:25 2014 -0400 |
b |
@@ -0,0 +1,88 @@ +<tool id="fml_gff2gtf" name="GFF-to-GTF" version="2.0.0"> + <description>converter</description> + <command interpreter="python">gff_to_gtf.py $inf_gff3 > $gtf_format + </command> + <inputs> + <param format="gff3,gff" name="inf_gff3" type="data" label="Convert this query" help="Provide genome annotation file in GFF or GFF3."/> + </inputs> + <outputs> + <data format="gtf" name="gtf_format" label="${tool.name} on ${on_string}: Converted" /> + </outputs> + <tests> + <test> + <param name="inf_gff3" value="AceView_ncbi_37.gff3" /> + <output name="gtf_format" file="AceView_gff3_to_gtf.gtf" /> + </test> + <test> + <param name="inf_gff3" value="ENSEMBL_mm9.gff3" /> + <output name="gtf_format" file="ENSEMBL_mm9_gff3_to_gtf.gtf" /> + </test> + </tests> + <help> + +**What it does** + +This tool converts data from GFF3 to GTF file format (scroll down for format description). + +-------- + +**Example** + +- The following data in GFF3 format:: + + ##gff-version 3 + 17 protein_coding gene 7255208 7258258 . + . ID=ENSG00000213859;Name=KCTD11 + 17 protein_coding mRNA 7255208 7258258 . + . ID=ENST00000333751;Name=KCTD11-001;Parent=ENSG00000213859 + 17 protein_coding protein 7256262 7256960 . + . ID=ENSP00000328352;Name=KCTD11-001;Parent=ENST00000333751 + 17 protein_coding five_prime_UTR 7255208 7256261 . + . Parent=ENST00000333751 + 17 protein_coding CDS 7256262 7256960 . + 0 Name=CDS:KCTD11;Parent=ENST00000333751,ENSP00000328352 + 17 protein_coding three_prime_UTR 7256961 7258258 . + . Parent=ENST00000333751 + 17 protein_coding exon 7255208 7258258 . + . Parent=ENST00000333751 + +- Will be converted to GTF format:: + + 17 protein_coding exon 7255208 7258258 . + . gene_id "ENSG00000213859"; transcript_id "ENST00000333751"; exon_number "1"; gene_name "KCTD11"; transcript_name "KCTD11-001"; + 17 protein_coding CDS 7256262 7256957 . + 0 gene_id "ENSG00000213859"; transcript_id "ENST00000333751"; exon_number "1"; gene_name "KCTD11"; transcript_name "KCTD11-001"; protein_id "ENSP00000328352"; + 17 protein_coding start_codon 7256262 7256264 . + 0 gene_id "ENSG00000213859"; transcript_id "ENST00000333751"; exon_number "1"; gene_name "KCTD11"; transcript_name "KCTD11-001"; + 17 protein_coding stop_codon 7256958 7256960 . + 0 gene_id "ENSG00000213859"; transcript_id "ENST00000333751"; exon_number "1"; gene_name "KCTD11"; transcript_name "KCTD11-001"; + +-------- + +**About formats** + + +**GFF3 format** General Feature Format is a format for describing genes and other features associated with DNA, RNA and Protein sequences. GFF3 lines have nine tab-separated fields:: + + 1. seqid - Must be a chromosome or scaffold. + 2. source - The program that generated this feature. + 3. type - The name of this type of feature. Some examples of standard feature types are "gene", "CDS", "protein", "mRNA", and "exon". + 4. start - The starting position of the feature in the sequence. The first base is numbered 1. + 5. stop - The ending position of the feature (inclusive). + 6. score - A score between 0 and 1000. If there is no score value, enter ".". + 7. strand - Valid entries include '+', '-', or '.' (for don't know/care). + 8. phase - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. If the feature is not a coding exon, the value should be '.'. + 9. attributes - All lines with the same group are linked together into a single item. + + +**GTF format** Gene Transfer Format, it borrows from GFF, but has additional structure that warrants a separate definition and format name. GTF lines have nine tab-seaparated fields:: + + 1. seqname - The name of the sequence. + 2. source - This indicating where the annotation came from. + 3. feature - The name of the feature types. The following feature types are required: 'CDS', 'start_codon' and 'stop_codon' + 4. start - The starting position of the feature in the sequence. The first base is numbered 1. + 5. end - The ending position of the feature (inclusive). + 6. score - The score field indicates a degree of confidence in the feature's existence and coordinates. + 7. strand - Valid entries include '+', '-', or '.' + 8. frame - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. + 9. attributes - These attributes are designed for handling multiple transcripts from the same genomic region. + +-------- + +**Copyright** + +2009-2014 Max Planck Society, University of Tübingen & Memorial Sloan Kettering Cancer Center + +Sreedharan VT, Schultheiss SJ, Jean G, Kahles A, Bohnert R, Drewe P, Mudrakarta P, Görnitz N, Zeller G, Rätsch G. Oqtans: the RNA-seq workbench in the cloud for complete and reproducible quantitative transcriptome analysis. Bioinformatics 10.1093/bioinformatics/btt731 (2014) + + </help> +</tool> |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/gtf_to_gff.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/gtf_to_gff.py Wed Jun 11 16:29:25 2014 -0400 |
[ |
@@ -0,0 +1,80 @@ +#!/usr/bin/env python +""" +Convert Gene Transfer Format [GTF] to Generic Feature Format Version 3 [GFF3]. + +Usage: python gtf_to_gff.py in.gtf > out.gff3 + +Requirement: + GFFParser.py: https://github.com/vipints/GFFtools-GX/blob/master/GFFParser.py + helper.py : https://github.com/vipints/GFFtools-GX/blob/master/helper.py + +Copyright (C) + 2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany. + 2012-2014 Memorial Sloan Kettering Cancer Center New York City, USA. +""" + +import re +import sys +import GFFParser +import helper + +def GFFWriter(gtf_content): + """ + write the feature information to GFF format + + @args gtf_content: Parsed object from gtf file + @type gtf_content: numpy array + """ + + print '##gff-version 3' + + for ent1 in gtf_content: + + chr_name = ent1['chr'] + strand = ent1['strand'] + start = ent1['start'] + stop = ent1['stop'] + source = ent1['source'] + ID = ent1['name'] + Name = ent1['gene_info']['Name'] + + print '%s\t%s\tgene\t%d\t%d\t.\t%s\t.\tID=%s;Name=%s' % (chr_name, source, start, stop, strand, ID, Name) + + for idx, tid in enumerate(ent1['transcripts']): + + t_start = ent1['exons'][idx][0][0] + t_stop = ent1['exons'][idx][-1][-1] + t_type = ent1['transcript_type'][idx] + + if ent1['exons'][idx].any() and ent1['cds_exons'][idx].any(): + utr5_exons, utr3_exons = helper.buildUTR(ent1['cds_exons'][idx], ent1['exons'][idx], strand) + + print '%s\t%s\t%s\t%d\t%d\t.\t%s\t.\tID=%s;Parent=%s' % (chr_name, source, t_type, t_start, t_stop, strand, tid[0], ID) + + for ex_cod in utr5_exons: + print '%s\t%s\tfive_prime_UTR\t%d\t%d\t.\t%s\t.\tParent=%s' % (chr_name, source, ex_cod[0], ex_cod[1], strand, tid[0]) + + for ex_cod in ent1['cds_exons'][idx]: + print '%s\t%s\tCDS\t%d\t%d\t.\t%s\t%d\tParent=%s' % (chr_name, source, ex_cod[0], ex_cod[1], strand, ex_cod[2], tid[0]) + + for ex_cod in utr3_exons: + print '%s\t%s\tthree_prime_UTR\t%d\t%d\t.\t%s\t.\tParent=%s' % (chr_name, source, ex_cod[0], ex_cod[1], strand, tid[0]) + + for ex_cod in ent1['exons'][idx]: + print '%s\t%s\texon\t%d\t%d\t.\t%s\t.\tParent=%s' % (chr_name, source, ex_cod[0], ex_cod[1], strand, tid[0]) + + +def __main__(): + + try: + gtf_fname = sys.argv[1] + except: + print __doc__ + sys.exit(-1) + + gtf_file_content = GFFParser.Parse(gtf_fname) + + GFFWriter(gtf_file_content) + +if __name__ == "__main__": + __main__() |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/gtf_to_gff.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/gtf_to_gff.xml Wed Jun 11 16:29:25 2014 -0400 |
b |
@@ -0,0 +1,94 @@ +<tool id="fml_gtf2gff" name="GTF-to-GFF" version="2.0.0"> + <description>converter</description> + <command interpreter="python">gtf_to_gff.py $inf_gtf > $gff3_format + </command> + <inputs> + <param format="gtf" name="inf_gtf" type="data" label="Convert this query" help="Provide genome annotation file in GTF."/> + </inputs> + <outputs> + <data format="gff3" name="gff3_format" label="${tool.name} on ${on_string}: Converted" /> + </outputs> + <tests> + <test> + <param name="inf_gtf" value="UCSC_transcripts.gtf" /> + <output name="gff3_format" file="UCSC_transcripts.gff3" /> + </test> + <test> + <param name="inf_gtf" value="JGI_genes.gtf" /> + <output name="gff3_format" file="JGI_genes.gff3" /> + </test> + <test> + <param name="inf_gtf" value="ENSEMBL_mm9.gtf" /> + <output name="gff3_format" file="ENSEMBL_mm9.gff3" /> + </test> + <test> + <param name="inf_gtf" value="AceView_ncbi_37.gtf" /> + <output name="gff3_format" file="AceView_ncbi_37.gff3" /> + </test> + </tests> + <help> + +**What it does** + +This tool converts data from GTF to a valid GFF3 file (scroll down for format description). + +-------- + +**Example** + +- The following data in GTF format:: + + 17 protein_coding exon 7255208 7258258 . + . gene_id "ENSG00000213859"; transcript_id "ENST00000333751"; exon_number "1"; gene_name "KCTD11"; transcript_name "KCTD11-001"; + 17 protein_coding CDS 7256262 7256957 . + 0 gene_id "ENSG00000213859"; transcript_id "ENST00000333751"; exon_number "1"; gene_name "KCTD11"; transcript_name "KCTD11-001"; protein_id "ENSP00000328352"; + 17 protein_coding start_codon 7256262 7256264 . + 0 gene_id "ENSG00000213859"; transcript_id "ENST00000333751"; exon_number "1"; gene_name "KCTD11"; transcript_name "KCTD11-001"; + 17 protein_coding stop_codon 7256958 7256960 . + 0 gene_id "ENSG00000213859"; transcript_id "ENST00000333751"; exon_number "1"; gene_name "KCTD11"; transcript_name "KCTD11-001"; + +- Will be converted to GFF3 format:: + + ##gff-version 3 + 17 protein_coding gene 7255208 7258258 . + . ID=ENSG00000213859;Name=KCTD11 + 17 protein_coding mRNA 7255208 7258258 . + . ID=ENST00000333751;Name=KCTD11-001;Parent=ENSG00000213859 + 17 protein_coding protein 7256262 7256960 . + . ID=ENSP00000328352;Name=KCTD11-001;Parent=ENST00000333751 + 17 protein_coding five_prime_UTR 7255208 7256261 . + . Parent=ENST00000333751 + 17 protein_coding CDS 7256262 7256960 . + 0 Name=CDS:KCTD11;Parent=ENST00000333751,ENSP00000328352 + 17 protein_coding three_prime_UTR 7256961 7258258 . + . Parent=ENST00000333751 + 17 protein_coding exon 7255208 7258258 . + . Parent=ENST00000333751 + +-------- + +**About formats** + +**GTF format** Gene Transfer Format, it borrows from GFF, but has additional structure that warrants a separate definition and format name. GTF lines have nine tab-seaparated fields:: + + 1. seqname - The name of the sequence. + 2. source - This indicating where the annotation came from. + 3. feature - The name of the feature types. The following feature types are required: 'CDS', 'start_codon' and 'stop_codon' + 4. start - The starting position of the feature in the sequence. The first base is numbered 1. + 5. end - The ending position of the feature (inclusive). + 6. score - The score field indicates a degree of confidence in the feature's existence and coordinates. + 7. strand - Valid entries include '+', '-', or '.' + 8. frame - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. + 9. attributes - These attributes are designed for handling multiple transcripts from the same genomic region. + +**GFF3 format** General Feature Format is a format for describing genes and other features associated with DNA, RNA and Protein sequences. GFF3 lines have nine tab-separated fields:: + + 1. seqid - Must be a chromosome or scaffold. + 2. source - The program that generated this feature. + 3. type - The name of this type of feature. Some examples of standard feature types are "gene", "CDS", "protein", "mRNA", and "exon". + 4. start - The starting position of the feature in the sequence. The first base is numbered 1. + 5. stop - The ending position of the feature (inclusive). + 6. score - A score between 0 and 1000. If there is no score value, enter ".". + 7. strand - Valid entries include '+', '-', or '.' (for don't know/care). + 8. phase - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. If the feature is not a coding exon, the value should be '.'. + 9. attributes - All lines with the same group are linked together into a single item. + +-------- + +**Copyright** + +2009-2014 Max Planck Society, University of Tübingen & Memorial Sloan Kettering Cancer Center + +Sreedharan VT, Schultheiss SJ, Jean G, Kahles A, Bohnert R, Drewe P, Mudrakarta P, Görnitz N, Zeller G, Rätsch G. Oqtans: the RNA-seq workbench in the cloud for complete and reproducible quantitative transcriptome analysis. Bioinformatics 10.1093/bioinformatics/btt731 (2014) + + </help> +</tool> |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/helper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/helper.py Wed Jun 11 16:29:25 2014 -0400 |
[ |
b'@@ -0,0 +1,332 @@\n+#!/usr/bin/env python\n+"""\n+Common utility functions\n+"""\n+\n+import os \n+import re\n+import sys \n+import gzip \n+import bz2\n+import numpy \n+\n+def init_gene():\n+ """\n+ Initializing the gene structure \n+ """\n+\n+ gene_det = [(\'id\', \'f8\'), \n+ (\'anno_id\', numpy.dtype), \n+ (\'confgenes_id\', numpy.dtype),\n+ (\'name\', \'S25\'),\n+ (\'source\', \'S25\'),\n+ (\'gene_info\', numpy.dtype),\n+ (\'alias\', \'S15\'),\n+ (\'name2\', numpy.dtype),\n+ (\'strand\', \'S2\'), \n+ (\'score\', \'S15\'), \n+ (\'chr\', \'S15\'), \n+ (\'chr_num\', numpy.dtype),\n+ (\'paralogs\', numpy.dtype),\n+ (\'start\', \'f8\'),\n+ (\'stop\', \'f8\'), \n+ (\'transcripts\', numpy.dtype),\n+ (\'transcript_type\', numpy.dtype),\n+ (\'transcript_info\', numpy.dtype),\n+ (\'transcript_status\', numpy.dtype),\n+ (\'transcript_valid\', numpy.dtype),\n+ (\'exons\', numpy.dtype),\n+ (\'exons_confirmed\', numpy.dtype),\n+ (\'cds_exons\', numpy.dtype),\n+ (\'utr5_exons\', numpy.dtype),\n+ (\'utr3_exons\', numpy.dtype),\n+ (\'tis\', numpy.dtype),\n+ (\'tis_conf\', numpy.dtype),\n+ (\'tis_info\', numpy.dtype),\n+ (\'cdsStop\', numpy.dtype),\n+ (\'cdsStop_conf\', numpy.dtype),\n+ (\'cdsStop_info\', numpy.dtype),\n+ (\'tss\', numpy.dtype),\n+ (\'tss_info\', numpy.dtype),\n+ (\'tss_conf\', numpy.dtype),\n+ (\'cleave\', numpy.dtype),\n+ (\'cleave_info\', numpy.dtype),\n+ (\'cleave_conf\', numpy.dtype),\n+ (\'polya\', numpy.dtype),\n+ (\'polya_info\', numpy.dtype),\n+ (\'polya_conf\', numpy.dtype),\n+ (\'is_alt\', \'f8\'), \n+ (\'is_alt_spliced\', \'f8\'), \n+ (\'is_valid\', numpy.dtype),\n+ (\'transcript_complete\', numpy.dtype),\n+ (\'is_complete\', numpy.dtype),\n+ (\'is_correctly_gff3_referenced\', \'S5\'),\n+ (\'splicegraph\', numpy.dtype) ]\n+\n+ return gene_det\n+\n+def open_file(fname):\n+ """\n+ Open the file (supports .gz .bz2) and returns the handler\n+\n+ @args fname: input file name for reading \n+ @type fname: str\n+ """\n+\n+ try:\n+ if os.path.splitext(fname)[1] == ".gz":\n+ FH = gzip.open(fname, \'rb\')\n+ elif os.path.splitext(fname)[1] == ".bz2":\n+ FH = bz2.BZ2File(fname, \'rb\')\n+ else:\n+ FH = open(fname, \'rU\')\n+ except Exception as error:\n+ sys.exit(error)\n+\n+ return FH\n+\n+def add_CDS_phase(strand, cds):\n+ """\n+ Calculate CDS phase and add to the CDS exons\n+\n+ @args strand: feature strand information \n+ @type strand: +/- \n+ @args cds: coding exon coordinates \n+ @type cds: numpy array [[int, int, int]]\n+ """\n+\n+ cds_region, cds_flag = [], 0 \n+ if strand == \'+\':\n+ for cdspos in cds:\n+ if cds_flag == 0:\n+ cdspos = (cdspos[0], cdspos[1], 0)\n+ diff = (cdspos[1]-(cdspos[0]-1))%3\n+ else:\n+ xy = 0\n+ if diff == 0: \n+ cdspos = (cdspos[0], cdspos[1], 0)\n+ elif diff == 1: \n+ cdspos = (cdspos[0], cdspos[1], 2)\n+ xy = 2\n+ elif diff == 2: \n+ cdspos = (cdspos[0], cdspos[1], 1)\n+ xy = 1\n+ diff = ((cdspos[1]-(cdspos[0]-1))-xy)%3\n+ cds_region.append(cdspos)\n+ cds_flag = 1 \n+ elif strand == \'-\':\n+ cds.reverse()\n+ for cdspos in cds: \n+ if cds_flag == 0:\n+ cdspos = (cdspos[0], cdspos[1], 0)\n+ diff = (cdspos[1]-(cdspos[0]-1))%3\n+ else: \n+ xy = 0 \n+ if diff == 0: \n+ cdspos = (cdspos[0], cdspos[1], 0)\n+ elif diff == 1:\n+ '..b" exon_pos.append([cds_5start, utr3_end])\n+ for cds in cds_cod:\n+ exon_pos.append(cds)\n+ for utr3 in three_p_utr:\n+ exon_pos.append(utr3)\n+ else: \n+ if jun_exon != []:\n+ five_p_utr = five_p_utr[:-1]\n+ cds_cod = cds_cod[1:]\n+ for utr5 in five_p_utr:\n+ exon_pos.append(utr5)\n+ exon_pos.append(jun_exon) if jun_exon != [] else ''\n+ jun_exon = []\n+ utr3_start, utr3_end = 0, 0\n+ if three_p_utr != []:\n+ utr3_start = three_p_utr[0][0]\n+ utr3_end = three_p_utr[0][1]\n+ cds_3start = cds_cod[-1][0]\n+ cds_3end = cds_cod[-1][1]\n+ if utr3_start-cds_3end == 0 or utr3_start-cds_3end == 1: \n+ jun_exon = [cds_3start, utr3_end]\n+ if jun_exon != []:\n+ cds_cod = cds_cod[:-1]\n+ three_p_utr = three_p_utr[1:]\n+ for cds in cds_cod:\n+ exon_pos.append(cds)\n+ exon_pos.append(jun_exon) if jun_exon != [] else ''\n+ for utr3 in three_p_utr:\n+ exon_pos.append(utr3)\n+ elif strand_p == '-':\n+ utr3_start, utr3_end = 0, 0 \n+ if three_p_utr != []:\n+ utr3_start = three_p_utr[-1][0]\n+ utr3_end = three_p_utr[-1][1]\n+ cds_3start = cds_cod[0][0]\n+ cds_3end = cds_cod[0][1]\n+ jun_exon = []\n+ if cds_3start-utr3_end == 0 or cds_3start-utr3_end == 1:\n+ jun_exon = [utr3_start, cds_3end] \n+ if len(cds_cod) == 1: \n+ three_prime_flag = 0\n+ if jun_exon != []:\n+ three_p_utr = three_p_utr[:-1]\n+ three_prime_flag = 1\n+ for utr3 in three_p_utr:\n+ exon_pos.append(utr3)\n+ jun_exon = []\n+ (utr5_start, utr5_end) = (0, 0)\n+ if five_p_utr != []:\n+ utr5_start = five_p_utr[0][0]\n+ utr5_end = five_p_utr[0][1]\n+ if utr5_start-cds_3end == 0 or utr5_start-cds_3end == 1:\n+ jun_exon = [cds_3start, utr5_end]\n+ five_prime_flag = 0\n+ if jun_exon != []:\n+ cds_cod = cds_cod[:-1]\n+ five_p_utr = five_p_utr[1:]\n+ five_prime_flag = 1\n+ if three_prime_flag == 1 and five_prime_flag == 1:\n+ exon_pos.append([utr3_start, utr5_end])\n+ if three_prime_flag == 1 and five_prime_flag == 0:\n+ exon_pos.append([utr3_start, cds_3end])\n+ cds_cod = cds_cod[:-1]\n+ if three_prime_flag == 0 and five_prime_flag == 1:\n+ exon_pos.append([cds_3start, utr5_end]) \n+ for cds in cds_cod:\n+ exon_pos.append(cds)\n+ for utr5 in five_p_utr:\n+ exon_pos.append(utr5)\n+ else:\n+ if jun_exon != []:\n+ three_p_utr = three_p_utr[:-1]\n+ cds_cod = cds_cod[1:]\n+ for utr3 in three_p_utr:\n+ exon_pos.append(utr3) \n+ if jun_exon != []:\n+ exon_pos.append(jun_exon)\n+ jun_exon = []\n+ (utr5_start, utr5_end) = (0, 0)\n+ if five_p_utr != []:\n+ utr5_start = five_p_utr[0][0]\n+ utr5_end = five_p_utr[0][1] \n+ cds_5start = cds_cod[-1][0]\n+ cds_5end = cds_cod[-1][1]\n+ if utr5_start-cds_5end == 0 or utr5_start-cds_5end == 1:\n+ jun_exon = [cds_5start, utr5_end]\n+ if jun_exon != []:\n+ cds_cod = cds_cod[:-1]\n+ five_p_utr = five_p_utr[1:]\n+ for cds in cds_cod:\n+ exon_pos.append(cds)\n+ if jun_exon != []:\n+ exon_pos.append(jun_exon) \n+ for utr5 in five_p_utr:\n+ exon_pos.append(utr5)\n+ return exon_pos\n" |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/AceView_gff3_to_gtf.gtf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/test-data/AceView_gff3_to_gtf.gtf Wed Jun 11 16:29:25 2014 -0400 |
b |
b'@@ -0,0 +1,2544 @@\n+##gff-version 2.5\n+1\tprotein_coding\tCDS\t12704566\t12704733\t.\t+\t0\tgene_id "AADACL4"; transcript_id "AADACL4.aAug10"; exon_number "1"; protein_id "AADACL4.aAug10";\n+1\tprotein_coding\tCDS\t12711142\t12711358\t.\t+\t0\tgene_id "AADACL4"; transcript_id "AADACL4.aAug10"; exon_number "2"; protein_id "AADACL4.aAug10";\n+1\tprotein_coding\tCDS\t12721802\t12721865\t.\t+\t0\tgene_id "AADACL4"; transcript_id "AADACL4.aAug10"; exon_number "3"; protein_id "AADACL4.aAug10";\n+1\tprotein_coding\tCDS\t12725972\t12726746\t.\t+\t0\tgene_id "AADACL4"; transcript_id "AADACL4.aAug10"; exon_number "4"; protein_id "AADACL4.aAug10";\n+1\tprotein_coding\texon\t12704566\t12704733\t.\t+\t0\tgene_id "AADACL4"; transcript_id "AADACL4.aAug10"; exon_number "1";\n+1\tprotein_coding\texon\t12711142\t12711358\t.\t+\t0\tgene_id "AADACL4"; transcript_id "AADACL4.aAug10"; exon_number "2";\n+1\tprotein_coding\texon\t12721802\t12721865\t.\t+\t0\tgene_id "AADACL4"; transcript_id "AADACL4.aAug10"; exon_number "3";\n+1\tprotein_coding\texon\t12725972\t12727097\t.\t+\t0\tgene_id "AADACL4"; transcript_id "AADACL4.aAug10"; exon_number "4";\n+1\tprotein_coding\tCDS\t12776344\t12776347\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.bAug10"; exon_number "1"; protein_id "AADACL3.bAug10";\n+1\tprotein_coding\tCDS\t12780885\t12780948\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.bAug10"; exon_number "2"; protein_id "AADACL3.bAug10";\n+1\tprotein_coding\tCDS\t12785189\t12785963\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.bAug10"; exon_number "3"; protein_id "AADACL3.bAug10";\n+1\tprotein_coding\texon\t12776119\t12776347\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.bAug10"; exon_number "1";\n+1\tprotein_coding\texon\t12780885\t12780948\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.bAug10"; exon_number "2";\n+1\tprotein_coding\texon\t12785189\t12788726\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.bAug10"; exon_number "3";\n+1\tprotein_coding\tCDS\t12779480\t12779693\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.aAug10"; exon_number "1"; protein_id "AADACL3.aAug10";\n+1\tprotein_coding\tCDS\t12780885\t12780948\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.aAug10"; exon_number "2"; protein_id "AADACL3.aAug10";\n+1\tprotein_coding\tCDS\t12785189\t12785963\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.aAug10"; exon_number "3"; protein_id "AADACL3.aAug10";\n+1\tprotein_coding\texon\t12776119\t12776347\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.aAug10"; exon_number "1";\n+1\tprotein_coding\texon\t12779477\t12779693\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.aAug10"; exon_number "2";\n+1\tprotein_coding\texon\t12780885\t12780948\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.aAug10"; exon_number "3";\n+1\tprotein_coding\texon\t12785189\t12788726\t.\t+\t0\tgene_id "AADACL3"; transcript_id "AADACL3.aAug10"; exon_number "4";\n+10\tprotein_coding\tCDS\t52566489\t52566640\t.\t-\t0\tgene_id "A1CF"; transcript_id "A1CF.fAug10"; exon_number "1"; protein_id "A1CF.fAug10";\n+10\tprotein_coding\tCDS\t52569654\t52569802\t.\t-\t0\tgene_id "A1CF"; transcript_id "A1CF.fAug10"; exon_number "2"; protein_id "A1CF.fAug10";\n+10\tprotein_coding\tCDS\t52570800\t52570936\t.\t-\t0\tgene_id "A1CF"; transcript_id "A1CF.fAug10"; exon_number "3"; protein_id "A1CF.fAug10";\n+10\tprotein_coding\tCDS\t52573617\t52573798\t.\t-\t0\tgene_id "A1CF"; transcript_id "A1CF.fAug10"; exon_number "4"; protein_id "A1CF.fAug10";\n+10\tprotein_coding\tCDS\t52575766\t52576039\t.\t-\t0\tgene_id "A1CF"; transcript_id "A1CF.fAug10"; exon_number "5"; protein_id "A1CF.fAug10";\n+10\tprotein_coding\tCDS\t52580312\t52580409\t.\t-\t0\tgene_id "A1CF"; transcript_id "A1CF.fAug10"; exon_number "6"; protein_id "A1CF.fAug10";\n+10\tprotein_coding\tCDS\t52587891\t52588055\t.\t-\t0\tgene_id "A1CF"; transcript_id "A1CF.fAug10"; exon_number "7"; protein_id "A1CF.fAug10";\n+10\tprotein_coding\tCDS\t52595834\t52596072\t.\t-\t0\tgene_id "A1CF"; transcript_id "A1CF.fAug10"; exon_number "8"; protein_id "A1CF.fAug10";\n+10\tprotein_coding\tCDS\t52601622\t52601752\t.\t-\t0\tgene_id "A1CF"; transcript_id "A1CF.fAug10"; exon_number "9"; protein_id "A1CF.fAug10";\n+10\tprotein_coding\tCDS\t52603748\t52603882\t.\t-\t0\tgene'..b'10"; exon_number "1"; protein_id "AAA1.aAug10";\n+7\tprotein_coding\tCDS\t34768349\t34768428\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.aAug10"; exon_number "2"; protein_id "AAA1.aAug10";\n+7\tprotein_coding\tCDS\t34797686\t34797710\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.aAug10"; exon_number "3"; protein_id "AAA1.aAug10";\n+7\tprotein_coding\texon\t34607864\t34607984\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.aAug10"; exon_number "1";\n+7\tprotein_coding\texon\t34609324\t34609473\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.aAug10"; exon_number "2";\n+7\tprotein_coding\texon\t34768349\t34768428\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.aAug10"; exon_number "3";\n+7\tprotein_coding\texon\t34797686\t34797884\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.aAug10"; exon_number "4";\n+7\tprotein_coding\tCDS\t34682958\t34682963\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.dAug10"; exon_number "1"; protein_id "AAA1.dAug10";\n+7\tprotein_coding\tCDS\t34768349\t34768428\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.dAug10"; exon_number "2"; protein_id "AAA1.dAug10";\n+7\tprotein_coding\tCDS\t34800724\t34800802\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.dAug10"; exon_number "3"; protein_id "AAA1.dAug10";\n+7\tprotein_coding\texon\t34682839\t34682963\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.dAug10"; exon_number "1";\n+7\tprotein_coding\texon\t34768349\t34768428\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.dAug10"; exon_number "2";\n+7\tprotein_coding\texon\t34800724\t34800803\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.dAug10"; exon_number "3";\n+7\ttranscript\texon\t34758479\t34759420\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.gAug10"; exon_number "1";\n+7\ttranscript\texon\t34760254\t34760397\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.gAug10"; exon_number "2";\n+7\ttranscript\texon\t34762896\t34763007\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.gAug10"; exon_number "3";\n+7\ttranscript\texon\t34768349\t34768428\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.gAug10"; exon_number "4";\n+7\ttranscript\texon\t34800724\t34800803\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.gAug10"; exon_number "5";\n+7\ttranscript\texon\t34873773\t34873948\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.gAug10"; exon_number "6";\n+7\ttranscript\texon\t34758474\t34759420\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.hAug10"; exon_number "1";\n+7\ttranscript\texon\t34762896\t34763007\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.hAug10"; exon_number "2";\n+7\ttranscript\texon\t34768349\t34768428\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.hAug10"; exon_number "3";\n+7\ttranscript\texon\t34807954\t34808052\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.hAug10"; exon_number "4";\n+7\ttranscript\texon\t34873773\t34873943\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.hAug10"; exon_number "5";\n+7\ttranscript\texon\t34390034\t34390459\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.iAug10"; exon_number "1";\n+7\ttranscript\texon\t34457191\t34457284\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.iAug10"; exon_number "2";\n+7\ttranscript\texon\t34609324\t34609473\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.iAug10"; exon_number "3";\n+7\ttranscript\texon\t34768349\t34768428\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.iAug10"; exon_number "4";\n+7\ttranscript\texon\t34800724\t34800803\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.iAug10"; exon_number "5";\n+7\tprotein_coding\tCDS\t34457198\t34457284\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.bAug10"; exon_number "1"; protein_id "AAA1.bAug10";\n+7\tprotein_coding\tCDS\t34768349\t34768428\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.bAug10"; exon_number "2"; protein_id "AAA1.bAug10";\n+7\tprotein_coding\tCDS\t34797686\t34797710\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.bAug10"; exon_number "3"; protein_id "AAA1.bAug10";\n+7\tprotein_coding\texon\t34386126\t34390459\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.bAug10"; exon_number "1";\n+7\tprotein_coding\texon\t34457191\t34457284\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.bAug10"; exon_number "2";\n+7\tprotein_coding\texon\t34768349\t34768428\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.bAug10"; exon_number "3";\n+7\tprotein_coding\texon\t34797686\t34797884\t.\t-\t0\tgene_id "AAA1"; transcript_id "AAA1.bAug10"; exon_number "4";\n' |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/AceView_ncbi_37.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/test-data/AceView_ncbi_37.gff3 Wed Jun 11 16:29:25 2014 -0400 |
b |
b'@@ -0,0 +1,3164 @@\n+##gff-version 3\n+1\tAceView\tgene\t12704566\t12727097\t.\t+\t.\tID=AADACL4;Name=AADACL4\n+1\tAceView\tmRNA\t12704566\t12727097\t.\t+\t.\tID=AADACL4.aAug10;Parent=AADACL4\n+1\tAceView\tCDS\t12704566\t12704733\t.\t+\t0\tParent=AADACL4.aAug10\n+1\tAceView\tCDS\t12711142\t12711358\t.\t+\t0\tParent=AADACL4.aAug10\n+1\tAceView\tCDS\t12721802\t12721865\t.\t+\t2\tParent=AADACL4.aAug10\n+1\tAceView\tCDS\t12725972\t12726746\t.\t+\t1\tParent=AADACL4.aAug10\n+1\tAceView\tthree_prime_UTR\t12726747\t12727097\t.\t+\t.\tParent=AADACL4.aAug10\n+1\tAceView\texon\t12704566\t12704733\t.\t+\t.\tParent=AADACL4.aAug10\n+1\tAceView\texon\t12711142\t12711358\t.\t+\t.\tParent=AADACL4.aAug10\n+1\tAceView\texon\t12721802\t12721865\t.\t+\t.\tParent=AADACL4.aAug10\n+1\tAceView\texon\t12725972\t12727097\t.\t+\t.\tParent=AADACL4.aAug10\n+1\tAceView\tgene\t12776119\t12788726\t.\t+\t.\tID=AADACL3;Name=AADACL3\n+1\tAceView\tmRNA\t12776119\t12788726\t.\t+\t.\tID=AADACL3.bAug10;Parent=AADACL3\n+1\tAceView\tfive_prime_UTR\t12776119\t12776343\t.\t+\t.\tParent=AADACL3.bAug10\n+1\tAceView\tCDS\t12776344\t12776347\t.\t+\t0\tParent=AADACL3.bAug10\n+1\tAceView\tCDS\t12780885\t12780948\t.\t+\t2\tParent=AADACL3.bAug10\n+1\tAceView\tCDS\t12785189\t12785963\t.\t+\t1\tParent=AADACL3.bAug10\n+1\tAceView\tthree_prime_UTR\t12785964\t12788726\t.\t+\t.\tParent=AADACL3.bAug10\n+1\tAceView\texon\t12776119\t12776347\t.\t+\t.\tParent=AADACL3.bAug10\n+1\tAceView\texon\t12780885\t12780948\t.\t+\t.\tParent=AADACL3.bAug10\n+1\tAceView\texon\t12785189\t12788726\t.\t+\t.\tParent=AADACL3.bAug10\n+1\tAceView\tmRNA\t12776119\t12788726\t.\t+\t.\tID=AADACL3.aAug10;Parent=AADACL3\n+1\tAceView\tfive_prime_UTR\t12776119\t12776347\t.\t+\t.\tParent=AADACL3.aAug10\n+1\tAceView\tfive_prime_UTR\t12779477\t12779479\t.\t+\t.\tParent=AADACL3.aAug10\n+1\tAceView\tCDS\t12779480\t12779693\t.\t+\t0\tParent=AADACL3.aAug10\n+1\tAceView\tCDS\t12780885\t12780948\t.\t+\t2\tParent=AADACL3.aAug10\n+1\tAceView\tCDS\t12785189\t12785963\t.\t+\t1\tParent=AADACL3.aAug10\n+1\tAceView\tthree_prime_UTR\t12785964\t12788726\t.\t+\t.\tParent=AADACL3.aAug10\n+1\tAceView\texon\t12776119\t12776347\t.\t+\t.\tParent=AADACL3.aAug10\n+1\tAceView\texon\t12779477\t12779693\t.\t+\t.\tParent=AADACL3.aAug10\n+1\tAceView\texon\t12780885\t12780948\t.\t+\t.\tParent=AADACL3.aAug10\n+1\tAceView\texon\t12785189\t12788726\t.\t+\t.\tParent=AADACL3.aAug10\n+10\tAceView\tgene\t52566307\t52588060\t.\t-\t.\tID=A1CF;Name=A1CF\n+10\tAceView\tmRNA\t52566307\t52645387\t.\t-\t.\tID=A1CF.fAug10;Parent=A1CF\n+10\tAceView\tfive_prime_UTR\t52619701\t52619745\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\tfive_prime_UTR\t52622649\t52622741\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\tfive_prime_UTR\t52623793\t52623840\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\tfive_prime_UTR\t52645341\t52645387\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\tCDS\t52566489\t52566640\t.\t-\t2\tParent=A1CF.fAug10\n+10\tAceView\tCDS\t52569654\t52569802\t.\t-\t1\tParent=A1CF.fAug10\n+10\tAceView\tCDS\t52570800\t52570936\t.\t-\t0\tParent=A1CF.fAug10\n+10\tAceView\tCDS\t52573617\t52573798\t.\t-\t2\tParent=A1CF.fAug10\n+10\tAceView\tCDS\t52575766\t52576039\t.\t-\t0\tParent=A1CF.fAug10\n+10\tAceView\tCDS\t52580312\t52580409\t.\t-\t2\tParent=A1CF.fAug10\n+10\tAceView\tCDS\t52587891\t52588055\t.\t-\t2\tParent=A1CF.fAug10\n+10\tAceView\tCDS\t52595834\t52596072\t.\t-\t1\tParent=A1CF.fAug10\n+10\tAceView\tCDS\t52601622\t52601752\t.\t-\t0\tParent=A1CF.fAug10\n+10\tAceView\tCDS\t52603748\t52603882\t.\t-\t0\tParent=A1CF.fAug10\n+10\tAceView\tCDS\t52619602\t52619700\t.\t-\t0\tParent=A1CF.fAug10\n+10\tAceView\tthree_prime_UTR\t52566307\t52566488\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52566307\t52566640\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52569654\t52569802\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52570800\t52570936\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52573617\t52573798\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52575766\t52576039\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52580312\t52580409\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52587891\t52588055\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52595834\t52596072\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52601622\t52601752\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52603748\t52603882\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52619602\t52619745\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52622649\t52622741\t.\t-\t.\tParent=A1CF.fAug10\n+10\tAceView\texon\t52623793\t52623840\t.\t-\t.\tP'..b'743462\t34800803\t.\t-\t.\tID=AAA1.cAug10;Parent=AAA1\n+7\tAceView\tfive_prime_UTR\t34800803\t34800803\t.\t-\t.\tParent=AAA1.cAug10\n+7\tAceView\tCDS\t34743797\t34743811\t.\t-\t0\tParent=AAA1.cAug10\n+7\tAceView\tCDS\t34768349\t34768428\t.\t-\t2\tParent=AAA1.cAug10\n+7\tAceView\tCDS\t34800724\t34800802\t.\t-\t0\tParent=AAA1.cAug10\n+7\tAceView\tthree_prime_UTR\t34743462\t34743796\t.\t-\t.\tParent=AAA1.cAug10\n+7\tAceView\texon\t34743462\t34743811\t.\t-\t.\tParent=AAA1.cAug10\n+7\tAceView\texon\t34768349\t34768428\t.\t-\t.\tParent=AAA1.cAug10\n+7\tAceView\texon\t34800724\t34800803\t.\t-\t.\tParent=AAA1.cAug10\n+7\tAceView\ttranscript\t34386126\t34797884\t.\t-\t.\tID=AAA1.eAug10;Parent=AAA1\n+7\tAceView\texon\t34386126\t34390459\t.\t-\t.\tParent=AAA1.eAug10\n+7\tAceView\texon\t34457191\t34457284\t.\t-\t.\tParent=AAA1.eAug10\n+7\tAceView\texon\t34609324\t34609473\t.\t-\t.\tParent=AAA1.eAug10\n+7\tAceView\texon\t34768349\t34768428\t.\t-\t.\tParent=AAA1.eAug10\n+7\tAceView\texon\t34797686\t34797884\t.\t-\t.\tParent=AAA1.eAug10\n+7\tAceView\tmRNA\t34607864\t34797884\t.\t-\t.\tID=AAA1.aAug10;Parent=AAA1\n+7\tAceView\tfive_prime_UTR\t34797711\t34797884\t.\t-\t.\tParent=AAA1.aAug10\n+7\tAceView\tCDS\t34609384\t34609473\t.\t-\t0\tParent=AAA1.aAug10\n+7\tAceView\tCDS\t34768349\t34768428\t.\t-\t2\tParent=AAA1.aAug10\n+7\tAceView\tCDS\t34797686\t34797710\t.\t-\t0\tParent=AAA1.aAug10\n+7\tAceView\tthree_prime_UTR\t34607864\t34607984\t.\t-\t.\tParent=AAA1.aAug10\n+7\tAceView\tthree_prime_UTR\t34609324\t34609383\t.\t-\t.\tParent=AAA1.aAug10\n+7\tAceView\texon\t34607864\t34607984\t.\t-\t.\tParent=AAA1.aAug10\n+7\tAceView\texon\t34609324\t34609473\t.\t-\t.\tParent=AAA1.aAug10\n+7\tAceView\texon\t34768349\t34768428\t.\t-\t.\tParent=AAA1.aAug10\n+7\tAceView\texon\t34797686\t34797884\t.\t-\t.\tParent=AAA1.aAug10\n+7\tAceView\tmRNA\t34682839\t34800803\t.\t-\t.\tID=AAA1.dAug10;Parent=AAA1\n+7\tAceView\tfive_prime_UTR\t34800803\t34800803\t.\t-\t.\tParent=AAA1.dAug10\n+7\tAceView\tCDS\t34682958\t34682963\t.\t-\t0\tParent=AAA1.dAug10\n+7\tAceView\tCDS\t34768349\t34768428\t.\t-\t2\tParent=AAA1.dAug10\n+7\tAceView\tCDS\t34800724\t34800802\t.\t-\t0\tParent=AAA1.dAug10\n+7\tAceView\tthree_prime_UTR\t34682839\t34682957\t.\t-\t.\tParent=AAA1.dAug10\n+7\tAceView\texon\t34682839\t34682963\t.\t-\t.\tParent=AAA1.dAug10\n+7\tAceView\texon\t34768349\t34768428\t.\t-\t.\tParent=AAA1.dAug10\n+7\tAceView\texon\t34800724\t34800803\t.\t-\t.\tParent=AAA1.dAug10\n+7\tAceView\ttranscript\t34758479\t34873948\t.\t-\t.\tID=AAA1.gAug10;Parent=AAA1\n+7\tAceView\texon\t34758479\t34759420\t.\t-\t.\tParent=AAA1.gAug10\n+7\tAceView\texon\t34760254\t34760397\t.\t-\t.\tParent=AAA1.gAug10\n+7\tAceView\texon\t34762896\t34763007\t.\t-\t.\tParent=AAA1.gAug10\n+7\tAceView\texon\t34768349\t34768428\t.\t-\t.\tParent=AAA1.gAug10\n+7\tAceView\texon\t34800724\t34800803\t.\t-\t.\tParent=AAA1.gAug10\n+7\tAceView\texon\t34873773\t34873948\t.\t-\t.\tParent=AAA1.gAug10\n+7\tAceView\ttranscript\t34758474\t34873943\t.\t-\t.\tID=AAA1.hAug10;Parent=AAA1\n+7\tAceView\texon\t34758474\t34759420\t.\t-\t.\tParent=AAA1.hAug10\n+7\tAceView\texon\t34762896\t34763007\t.\t-\t.\tParent=AAA1.hAug10\n+7\tAceView\texon\t34768349\t34768428\t.\t-\t.\tParent=AAA1.hAug10\n+7\tAceView\texon\t34807954\t34808052\t.\t-\t.\tParent=AAA1.hAug10\n+7\tAceView\texon\t34873773\t34873943\t.\t-\t.\tParent=AAA1.hAug10\n+7\tAceView\ttranscript\t34390034\t34800803\t.\t-\t.\tID=AAA1.iAug10;Parent=AAA1\n+7\tAceView\texon\t34390034\t34390459\t.\t-\t.\tParent=AAA1.iAug10\n+7\tAceView\texon\t34457191\t34457284\t.\t-\t.\tParent=AAA1.iAug10\n+7\tAceView\texon\t34609324\t34609473\t.\t-\t.\tParent=AAA1.iAug10\n+7\tAceView\texon\t34768349\t34768428\t.\t-\t.\tParent=AAA1.iAug10\n+7\tAceView\texon\t34800724\t34800803\t.\t-\t.\tParent=AAA1.iAug10\n+7\tAceView\tmRNA\t34386126\t34797884\t.\t-\t.\tID=AAA1.bAug10;Parent=AAA1\n+7\tAceView\tfive_prime_UTR\t34797711\t34797884\t.\t-\t.\tParent=AAA1.bAug10\n+7\tAceView\tCDS\t34457198\t34457284\t.\t-\t0\tParent=AAA1.bAug10\n+7\tAceView\tCDS\t34768349\t34768428\t.\t-\t2\tParent=AAA1.bAug10\n+7\tAceView\tCDS\t34797686\t34797710\t.\t-\t0\tParent=AAA1.bAug10\n+7\tAceView\tthree_prime_UTR\t34386126\t34390459\t.\t-\t.\tParent=AAA1.bAug10\n+7\tAceView\tthree_prime_UTR\t34457191\t34457197\t.\t-\t.\tParent=AAA1.bAug10\n+7\tAceView\texon\t34386126\t34390459\t.\t-\t.\tParent=AAA1.bAug10\n+7\tAceView\texon\t34457191\t34457284\t.\t-\t.\tParent=AAA1.bAug10\n+7\tAceView\texon\t34768349\t34768428\t.\t-\t.\tParent=AAA1.bAug10\n+7\tAceView\texon\t34797686\t34797884\t.\t-\t.\tParent=AAA1.bAug10\n' |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/AceView_ncbi_37.gtf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/test-data/AceView_ncbi_37.gtf Wed Jun 11 16:29:25 2014 -0400 |
b |
b'@@ -0,0 +1,3989 @@\n+11\tAceView\texon\t111933358\t111934981\t.\t-\t0\tgene_id 2-oxoacid_dh; Gene_type cDNA_supported; transcript_id 2-oxoacid_dh.aAug10-unspliced; exon_number 1\n+19\tAceView\tCDS\t58859154\t58859210\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.aAug10; product_id A1BGAS.aAug10; exon_number 1\n+19\tAceView\texon\t58859153\t58859210\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.aAug10; exon_number 1\n+19\tAceView\tintron\t58859211\t58864686\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.aAug10; type gt_ag\n+19\tAceView\tCDS\t58864687\t58864840\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.aAug10; product_id A1BGAS.aAug10; exon_number 2\n+19\tAceView\texon\t58864687\t58864840\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.aAug10; exon_number 2\n+19\tAceView\tintron\t58864841\t58865079\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.aAug10; type gt_ag\n+19\tAceView\tCDS\t58865080\t58865114\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.aAug10; product_id A1BGAS.aAug10; exon_number 3\n+19\tAceView\texon\t58865080\t58865223\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.aAug10; exon_number 3\n+19\tAceView\tstop_codon\t58865115\t58865117\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.aAug10; product_id A1BGAS.aAug10;\n+19\tAceView\tintron\t58865224\t58865734\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.aAug10; type gt_ag\n+19\tAceView\texon\t58865735\t58866090\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.aAug10; exon_number 4\n+19\tAceView\tstart_codon\t58864404\t58864406\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.bAug10; product_id A1BGAS.bAug10;\n+19\tAceView\tCDS\t58864404\t58864410\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.bAug10; product_id A1BGAS.bAug10; exon_number 1\n+19\tAceView\texon\t58862110\t58864410\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.bAug10; exon_number 1\n+19\tAceView\tintron\t58864411\t58864744\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.bAug10; type gt_ag\n+19\tAceView\tCDS\t58864745\t58864840\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.bAug10; product_id A1BGAS.bAug10; exon_number 2\n+19\tAceView\texon\t58864745\t58864840\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.bAug10; exon_number 2\n+19\tAceView\tintron\t58864841\t58865079\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.bAug10; type gt_ag\n+19\tAceView\tCDS\t58865080\t58865114\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.bAug10; product_id A1BGAS.bAug10; exon_number 3\n+19\tAceView\texon\t58865080\t58865223\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.bAug10; exon_number 3\n+19\tAceView\tstop_codon\t58865115\t58865117\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.bAug10; product_id A1BGAS.bAug10;\n+19\tAceView\tintron\t58865224\t58865734\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.bAug10; type gt_ag\n+19\tAceView\texon\t58865735\t58866548\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.bAug10; exon_number 4\n+19\tAceView\texon\t58859122\t58859210\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.cAug10; exon_number 1\n+19\tAceView\tintron\t58859211\t58864686\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.cAug10; type gt_ag\n+19\tAceView\texon\t58864687\t58864840\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.cAug10; exon_number 2\n+19\tAceView\tintron\t58864841\t58865079\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.cAug10; type gt_ag\n+19\tAceView\tstart_codon\t58865831\t58865833\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; transcript_id A1BGAS.cAug10; product_id A1BGAS.cAug10;\n+19\tAceView\tCDS\t58865831\t58866547\t.\t+\t0\tgene_id A1BGAS; Gene_type cDNA_supported; tran'..b'codon\t219129739\t219129741\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.gAug10; product_id AAMP.gAug10;\n+2\tAceView\tintron\t219129332\t219129738\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.gAug10; type gt_ag\n+2\tAceView\texon\t219128853\t219129331\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.gAug10; exon_number 11\n+2\tAceView\tstart_codon\t219134807\t219134809\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; product_id AAMP.hAug10;\n+2\tAceView\tCDS\t219134689\t219134809\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; product_id AAMP.hAug10; exon_number 1\n+2\tAceView\texon\t219134689\t219134843\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; exon_number 1\n+2\tAceView\tintron\t219134258\t219134688\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; type gt_ag\n+2\tAceView\tCDS\t219134105\t219134257\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; product_id AAMP.hAug10; exon_number 2\n+2\tAceView\texon\t219134105\t219134257\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; exon_number 2\n+2\tAceView\tintron\t219132337\t219134104\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; type gt_ag\n+2\tAceView\tCDS\t219132217\t219132336\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; product_id AAMP.hAug10; exon_number 3\n+2\tAceView\texon\t219132217\t219132336\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; exon_number 3\n+2\tAceView\tintron\t219131710\t219132216\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; type gt_ag\n+2\tAceView\tCDS\t219131570\t219131709\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; product_id AAMP.hAug10; exon_number 4\n+2\tAceView\texon\t219131570\t219131709\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; exon_number 4\n+2\tAceView\tintron\t219131311\t219131569\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; type gt_ag\n+2\tAceView\tCDS\t219131166\t219131310\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; product_id AAMP.hAug10; exon_number 5\n+2\tAceView\texon\t219131166\t219131310\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; exon_number 5\n+2\tAceView\tintron\t219130871\t219131165\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; type gt_ag\n+2\tAceView\tCDS\t219130787\t219130870\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; product_id AAMP.hAug10; exon_number 6\n+2\tAceView\texon\t219130787\t219130870\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; exon_number 6\n+2\tAceView\tintron\t219130670\t219130786\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; type gt_ag\n+2\tAceView\tCDS\t219130392\t219130669\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; product_id AAMP.hAug10; exon_number 7\n+2\tAceView\texon\t219130302\t219130669\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; exon_number 7\n+2\tAceView\tstop_codon\t219130389\t219130391\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; product_id AAMP.hAug10;\n+2\tAceView\tintron\t219130185\t219130301\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; type gt_ag\n+2\tAceView\texon\t219130094\t219130184\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; exon_number 8\n+2\tAceView\tintron\t219129898\t219130093\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; type gt_ag\n+2\tAceView\texon\t219129743\t219129897\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; exon_number 9\n+2\tAceView\tintron\t219129332\t219129742\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; type gt_ag\n+2\tAceView\texon\t219128853\t219129331\t.\t-\t0\tgene_id AAMP; Gene_type cDNA_supported; transcript_id AAMP.hAug10; exon_number 10\n' |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/Aly_JGI.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/test-data/Aly_JGI.bed Wed Jun 11 16:29:25 2014 -0400 |
b |
b'@@ -0,0 +1,252 @@\n+scaffold_1\t10652\t11944\tTranscript:909750\t.\t-\t10652\t11944\t0\t5\t60,225,159,360,132,\t0,130,392,608,1161,\n+scaffold_1\t464358\t467635\tTranscript:470161\t.\t+\t464358\t467635\t0\t18\t202,63,72,108,81,54,126,90,126,108,117,57,81,5,174,10,20,31,\t0,599,744,929,1130,1329,1606,1827,2037,2253,2453,2649,2797,2915,2978,3174,3209,3247,\n+scaffold_1\t548765\t550572\tTranscript:918872\t.\t-\t548765\t550572\t0\t4\t170,274,174,28,\t0,593,965,1780,\n+scaffold_1\t80941\t82330\tTranscript:470071\t.\t+\t80941\t82330\t0\t2\t277,1014,\t0,376,\n+scaffold_1\t483425\t484126\tTranscript:311324\t.\t+\t483425\t484126\t0\t3\t72,216,54,\t0,343,648,\n+scaffold_1\t124594\t126229\tTranscript:470077\t.\t-\t124594\t126229\t0\t2\t792,700,\t0,936,\n+scaffold_1\t122468\t124310\tTranscript:470076\t.\t+\t122468\t124310\t0\t4\t1626,84,4,79,\t0,1653,1749,1764,\n+scaffold_1\t118008\t119066\tTranscript:470075\t.\t+\t118008\t119066\t0\t6\t49,732,57,44,56,29,\t0,66,819,899,960,1030,\n+scaffold_1\t90714\t113497\tTranscript:470074\t.\t-\t90714\t113497\t0\t22\t358,90,91,197,78,117,96,126,97,71,99,84,171,129,33,97,51,79,82,56,90,75,\t0,18818,19001,19212,19496,19665,19860,20042,20257,20536,20684,20856,21022,21382,21651,21776,21947,22094,22283,22441,22603,22709,\n+scaffold_1\t621551\t622441\tTranscript:909905\t.\t-\t621551\t622441\t0\t5\t85,140,120,77,151,\t0,175,386,578,740,\n+scaffold_1\t134679\t139817\tTranscript:470079\t.\t-\t134679\t139817\t0\t20\t19,354,129,144,126,126,93,133,132,135,108,204,89,150,154,222,186,123,101,172,\t0,42,474,701,922,1186,1410,1591,2068,2300,2516,2754,3045,3252,3555,3824,4128,4399,4621,4967,\n+scaffold_1\t127652\t134697\tTranscript:470078\t.\t+\t127652\t134697\t0\t11\t1166,318,355,423,293,274,500,322,358,650,825,\t0,1371,1874,2318,2863,3443,4043,4627,5028,5477,6221,\n+scaffold_1\t798586\t799140\tTranscript:470261\t.\t-\t798586\t799140\t0\t1\t555,\t0,\n+scaffold_1\t684011\t686672\tTranscript:909919\t.\t-\t684011\t686672\t0\t7\t81,96,78,139,68,72,129,\t0,250,454,1428,1680,2360,2533,\n+scaffold_1\t156948\t159348\tTranscript:311256\t.\t+\t156948\t159348\t0\t8\t195,177,171,182,212,120,479,237,\t0,384,637,888,1149,1393,1594,2164,\n+scaffold_1\t560362\t562206\tTranscript:470187\t.\t-\t560362\t562206\t0\t5\t42,10,290,190,1058,\t0,68,97,508,787,\n+scaffold_1\t860953\t874764\tTranscript:311407\t.\t-\t860953\t874764\t0\t19\t1236,147,307,309,3006,1651,1115,103,700,1166,115,66,192,174,57,136,86,73,182,\t0,1347,1588,2052,2707,5983,7729,8952,9162,10011,11333,11619,11827,12166,12484,12621,12933,13180,13630,\n+scaffold_1\t771817\t774951\tTranscript:909946\t.\t+\t771817\t774951\t0\t11\t108,198,76,72,88,256,116,221,19,72,256,\t0,231,535,682,855,1025,1350,1548,2129,2732,2879,\n+scaffold_1\t479138\t481385\tTranscript:333544\t.\t-\t479138\t481385\t0\t5\t792,141,246,297,108,\t0,874,1318,1748,2140,\n+scaffold_1\t765430\t766468\tTranscript:918940\t.\t-\t765430\t766468\t0\t2\t529,96,\t0,943,\n+scaffold_1\t766738\t768326\tTranscript:918941\t.\t-\t766738\t768326\t0\t3\t211,573,73,\t0,578,1516,\n+scaffold_1\t849099\t851591\tTranscript:470280\t.\t+\t849099\t851591\t0\t15\t76,3,107,44,78,193,31,59,74,87,62,29,136,100,30,\t0,111,179,357,767,954,1237,1349,1498,1683,1859,2016,2146,2311,2463,\n+scaffold_1\t57579\t57871\tTranscript:918741\t.\t+\t57579\t57871\t0\t1\t293,\t0,\n+scaffold_1\t58865\t72177\tTranscript:918742\t.\t+\t58865\t72177\t0\t49\t113,298,229,340,114,129,192,195,138,123,254,104,95,225,162,73,68,146,109,147,48,129,243,138,86,106,96,153,183,230,103,126,45,195,127,101,135,84,147,132,48,147,62,154,192,105,135,201,280,\t0,209,584,904,1435,1635,1838,2119,2442,2671,2877,3232,3571,3753,4317,4563,4746,4949,5205,5406,5644,5766,6016,6374,6597,6778,7127,7380,7687,7945,8270,8456,8667,8886,9339,9568,9964,10175,10675,10910,11151,11293,11507,11767,12025,12306,12503,12726,13033,\n+scaffold_1\t356318\t357400\tTranscript:470139\t.\t-\t356318\t357400\t0\t2\t727,329,\t0,754,\n+scaffold_1\t786367\t786721\tTranscript:918948\t.\t+\t786367\t786721\t0\t1\t355,\t0,\n+scaffold_1\t787193\t787397\tTranscript:918949\t.\t+\t787193\t787397\t0\t1\t205,\t0,\n+scaffold_1\t511272\t518844\tTranscript:333551\t.\t-\t511272\t518844\t0\t21\t178,407,165,906,191,98,180,629,96,162,183,151,234,161,123,173,220,395,211,114,912,\t0,277,781,1027,2017,2290,2433,2661,3383,3564,3812,4083,4319,4644,4887,5097,5359,5667,'..b'66,3239,3467,3671,3861,4147,4363,4706,4918,5333,\n+scaffold_1\t3311\t6198\tTranscript:470048\t.\t-\t3311\t6198\t0\t9\t180,196,273,201,108,7,378,220,568,\t0,202,499,853,1172,1419,1540,2008,2320,\n+scaffold_1\t9512\t10567\tTranscript:470049\t.\t+\t9512\t10567\t0\t3\t53,82,695,\t0,64,361,\n+scaffold_1\t605536\t607891\tTranscript:470198\t.\t+\t605536\t607891\t0\t2\t765,1020,\t0,1336,\n+scaffold_1\t652374\t653539\tTranscript:470212\t.\t-\t652374\t653539\t0\t2\t302,779,\t0,387,\n+scaffold_1\t650407\t652252\tTranscript:470210\t.\t+\t650407\t652252\t0\t4\t44,59,162,1193,\t0,71,447,653,\n+scaffold_1\t436171\t436800\tTranscript:311313\t.\t-\t436171\t436800\t0\t3\t125,91,105,\t0,320,525,\n+scaffold_1\t428322\t429820\tTranscript:311310\t.\t-\t428322\t429820\t0\t7\t264,33,90,44,64,152,100,\t0,304,434,682,800,1084,1399,\n+scaffold_1\t152834\t155670\tTranscript:909785\t.\t-\t152834\t155670\t0\t2\t1675,650,\t0,2187,\n+scaffold_1\t266834\t270418\tTranscript:470119\t.\t-\t266834\t270418\t0\t8\t237,716,244,328,235,195,103,217,\t0,337,1181,1516,2329,2822,3099,3368,\n+scaffold_1\t759661\t760663\tTranscript:470240\t.\t+\t759661\t760663\t0\t6\t141,48,531,103,15,69,\t0,157,223,768,896,934,\n+scaffold_1\t441904\t443720\tTranscript:311315\t.\t+\t441904\t443720\t0\t2\t993,627,\t0,1190,\n+scaffold_1\t246920\t248384\tTranscript:470115\t.\t+\t246920\t248384\t0\t9\t62,353,296,141,98,45,17,18,35,\t0,93,533,905,1131,1248,1320,1372,1430,\n+scaffold_1\t501318\t503489\tTranscript:918864\t.\t+\t501318\t503489\t0\t6\t224,63,117,400,152,171,\t0,353,605,1211,1738,2001,\n+scaffold_1\t236778\t241721\tTranscript:470110\t.\t+\t236778\t241721\t0\t16\t103,180,159,112,120,152,183,35,163,54,93,143,71,84,294,60,\t0,114,768,1022,1245,1446,1715,1992,2454,2779,3173,3418,4243,4395,4571,4884,\n+scaffold_1\t243695\t245459\tTranscript:470113\t.\t-\t243695\t245459\t0\t6\t44,166,111,106,296,330,\t0,68,317,838,1049,1435,\n+scaffold_1\t330470\t334264\tTranscript:909830\t.\t+\t330470\t334264\t0\t3\t785,1961,167,\t0,1600,3628,\n+scaffold_1\t313347\t315496\tTranscript:918815\t.\t-\t313347\t315496\t0\t4\t1048,161,132,191,\t0,1315,1707,1959,\n+scaffold_1\t46396\t48761\tTranscript:470065\t.\t+\t46396\t48761\t0\t9\t164,76,89,90,45,70,140,89,172,\t0,297,454,641,850,991,1267,2009,2194,\n+scaffold_1\t365041\t365523\tTranscript:909838\t.\t-\t365041\t365523\t0\t3\t97,88,67,\t0,207,416,\n+scaffold_1\t367696\t369417\tTranscript:909839\t.\t-\t367696\t369417\t0\t8\t56,76,78,105,35,46,98,52,\t0,195,411,905,1212,1368,1514,1670,\n+scaffold_1\t779234\t780531\tTranscript:470252\t.\t-\t779234\t780531\t0\t5\t39,196,122,348,72,\t0,68,345,860,1226,\n+scaffold_1\t768374\t770507\tTranscript:311385\t.\t+\t768374\t770507\t0\t10\t87,66,75,145,47,58,50,57,248,61,\t0,187,401,562,791,924,1055,1197,1336,2073,\n+scaffold_1\t328181\t328380\tTranscript:918819\t.\t+\t328181\t328380\t0\t1\t200,\t0,\n+scaffold_1\t816662\t816932\tTranscript:918958\t.\t+\t816662\t816932\t0\t1\t271,\t0,\n+scaffold_1\t928527\t930332\tTranscript:470297\t.\t+\t928527\t930332\t0\t8\t70,161,58,309,49,95,80,115,\t0,91,692,829,1236,1365,1538,1691,\n+scaffold_1\t113969\t115315\tTranscript:918756\t.\t-\t113969\t115315\t0\t1\t1347,\t0,\n+scaffold_1\t915290\t920350\tTranscript:470295\t.\t+\t915290\t920350\t0\t11\t515,539,69,198,132,87,441,168,231,116,401,\t0,612,1269,1908,2192,2873,3050,3591,3934,4388,4660,\n+scaffold_1\t731564\t731809\tTranscript:918928\t.\t-\t731564\t731809\t0\t1\t246,\t0,\n+scaffold_1\t835648\t840282\tTranscript:470277\t.\t-\t835648\t840282\t0\t15\t32,47,96,81,145,272,123,159,282,189,184,142,244,636,320,\t0,68,133,312,500,728,1086,1294,1572,2023,2474,2846,3211,3532,4315,\n+scaffold_1\t628623\t630888\tTranscript:918899\t.\t-\t628623\t630888\t0\t6\t531,85,208,48,168,476,\t0,616,930,1217,1528,1790,\n+scaffold_1\t800563\t803042\tTranscript:918952\t.\t-\t800563\t803042\t0\t9\t95,97,101,189,158,232,136,104,291,\t0,235,433,623,894,1134,1449,1701,2189,\n+scaffold_1\t617112\t618613\tTranscript:918895\t.\t+\t617112\t618613\t0\t7\t100,316,53,73,105,81,176,\t0,218,634,797,960,1155,1326,\n+scaffold_1\t601736\t602094\tTranscript:918890\t.\t-\t601736\t602094\t0\t1\t359,\t0,\n+scaffold_1\t602514\t603658\tTranscript:918891\t.\t+\t602514\t603658\t0\t3\t510,145,81,\t0,792,1064,\n+scaffold_1\t930976\t935010\tTranscript:470299\t.\t+\t930976\t935010\t0\t13\t548,281,83,404,194,181,125,90,79,77,107,157,111,\t0,688,1271,1460,1997,2278,2610,2903,3099,3272,3549,3754,3924,\n' |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/Aly_JGI.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/test-data/Aly_JGI.gff3 Wed Jun 11 16:29:25 2014 -0400 |
b |
b'@@ -0,0 +1,3996 @@\n+##gff-version 3\n+##sequence-region scaffold_1 1 33132539\n+scaffold_1\tGenomic_canonical\tregion\t1\t33132539\t.\t+\t.\tID=scaffold_1;Name=scaffold_1\n+scaffold_1\tJGI_Filtered\tgene\t47\t2523\t.\t-\t.\tID=scaffold_100001.1;Name=scaffold_100001.1\n+scaffold_1\tJGI_Filtered\tmRNA\t47\t2523\t.\t-\t.\tID=Transcript:918720;Name=Transcript:918720;Parent=scaffold_100001.1\n+scaffold_1\tJGI_Filtered\tthree_prime_UTR\t47\t66\t.\t-\t.\tID=three_prime_UTR:918720;Parent=Transcript:918720\n+scaffold_1\tJGI_Filtered\texon\t47\t252\t.\t-\t.\tID=exon:918720;Parent=Transcript:918720\n+scaffold_1\tJGI_Filtered\tCDS\t67\t252\t.\t-\t0\tID=CDS:918720;Parent=Transcript:918720;Name=scaffold_100001.1\n+scaffold_1\tJGI_Filtered\texon\t407\t782\t.\t-\t.\tID=exon:918720;Parent=Transcript:918720\n+scaffold_1\tJGI_Filtered\tCDS\t407\t782\t.\t-\t0\tID=CDS:918720;Parent=Transcript:918720;Name=scaffold_100001.1\n+scaffold_1\tJGI_Filtered\texon\t1423\t1642\t.\t-\t.\tID=exon:918720;Parent=Transcript:918720\n+scaffold_1\tJGI_Filtered\tCDS\t1423\t1642\t.\t-\t1\tID=CDS:918720;Parent=Transcript:918720;Name=scaffold_100001.1\n+scaffold_1\tJGI_Filtered\texon\t1803\t2035\t.\t-\t.\tID=exon:918720;Parent=Transcript:918720\n+scaffold_1\tJGI_Filtered\tCDS\t1803\t2035\t.\t-\t2\tID=CDS:918720;Parent=Transcript:918720;Name=scaffold_100001.1\n+scaffold_1\tJGI_Filtered\texon\t2124\t2347\t.\t-\t.\tID=exon:918720;Parent=Transcript:918720\n+scaffold_1\tJGI_Filtered\tCDS\t2124\t2347\t.\t-\t1\tID=CDS:918720;Parent=Transcript:918720;Name=scaffold_100001.1\n+scaffold_1\tJGI_Filtered\texon\t2444\t2523\t.\t-\t.\tID=exon:918720;Parent=Transcript:918720\n+scaffold_1\tJGI_Filtered\tCDS\t2444\t2503\t.\t-\t0\tID=CDS:918720;Parent=Transcript:918720;Name=scaffold_100001.1\n+scaffold_1\tJGI_Filtered\tfive_prime_UTR\t2504\t2523\t.\t-\t.\tID=five_prime_UTR:918720;Parent=Transcript:918720\n+scaffold_1\tJGI_Filtered\tgene\t3311\t6198\t.\t-\t.\tID=fgenesh2_kg.1__2__AT1G02190.2;Name=fgenesh2_kg.1__2__AT1G02190.2\n+scaffold_1\tJGI_Filtered\tmRNA\t3311\t6198\t.\t-\t.\tID=Transcript:470048;Name=Transcript:470048;Parent=fgenesh2_kg.1__2__AT1G02190.2\n+scaffold_1\tJGI_Filtered\tthree_prime_UTR\t3311\t3490\t.\t-\t.\tID=three_prime_UTR:470048;Parent=Transcript:470048\n+scaffold_1\tJGI_Filtered\tthree_prime_UTR\t3513\t3528\t.\t-\t.\tID=three_prime_UTR:470048;Parent=Transcript:470048\n+scaffold_1\tJGI_Filtered\texon\t3311\t3490\t.\t-\t.\tID=exon:470048;Parent=Transcript:470048\n+scaffold_1\tJGI_Filtered\texon\t3513\t3708\t.\t-\t.\tID=exon:470048;Parent=Transcript:470048\n+scaffold_1\tJGI_Filtered\tCDS\t3529\t3708\t.\t-\t0\tID=CDS:470048;Parent=Transcript:470048;Name=fgenesh2_kg.1__2__AT1G02190.2\n+scaffold_1\tJGI_Filtered\texon\t3810\t4082\t.\t-\t.\tID=exon:470048;Parent=Transcript:470048\n+scaffold_1\tJGI_Filtered\tCDS\t3810\t4082\t.\t-\t0\tID=CDS:470048;Parent=Transcript:470048;Name=fgenesh2_kg.1__2__AT1G02190.2\n+scaffold_1\tJGI_Filtered\texon\t4164\t4364\t.\t-\t.\tID=exon:470048;Parent=Transcript:470048\n+scaffold_1\tJGI_Filtered\tCDS\t4164\t4364\t.\t-\t0\tID=CDS:470048;Parent=Transcript:470048;Name=fgenesh2_kg.1__2__AT1G02190.2\n+scaffold_1\tJGI_Filtered\texon\t4483\t4590\t.\t-\t.\tID=exon:470048;Parent=Transcript:470048\n+scaffold_1\tJGI_Filtered\tCDS\t4483\t4590\t.\t-\t0\tID=CDS:470048;Parent=Transcript:470048;Name=fgenesh2_kg.1__2__AT1G02190.2\n+scaffold_1\tJGI_Filtered\texon\t4730\t4736\t.\t-\t.\tID=exon:470048;Parent=Transcript:470048\n+scaffold_1\tJGI_Filtered\tCDS\t4730\t4736\t.\t-\t0\tID=CDS:470048;Parent=Transcript:470048;Name=fgenesh2_kg.1__2__AT1G02190.2\n+scaffold_1\tJGI_Filtered\texon\t4851\t5228\t.\t-\t.\tID=exon:470048;Parent=Transcript:470048\n+scaffold_1\tJGI_Filtered\tCDS\t4851\t5228\t.\t-\t1\tID=CDS:470048;Parent=Transcript:470048;Name=fgenesh2_kg.1__2__AT1G02190.2\n+scaffold_1\tJGI_Filtered\texon\t5319\t5538\t.\t-\t.\tID=exon:470048;Parent=Transcript:470048\n+scaffold_1\tJGI_Filtered\tCDS\t5319\t5538\t.\t-\t1\tID=CDS:470048;Parent=Transcript:470048;Name=fgenesh2_kg.1__2__AT1G02190.2\n+scaffold_1\tJGI_Filtered\texon\t5631\t6198\t.\t-\t.\tID=exon:470048;Parent=Transcript:470048\n+scaffold_1\tJGI_Filtered\tCDS\t5631\t6123\t.\t-\t2\tID=CDS:470048;Parent=Transcript:470048;Name=fgenesh2_kg.1__2__AT1G02190.2\n+scaffold_1\tJGI_Filtered\tfive_prime_UTR\t6124\t6198\t.\t-\t.\tID=five_prime_UTR:470048;Parent=Transcri'..b'me=fgenesh2_kg.1__253__AT1G03190.1\n+scaffold_1\tJGI_Filtered\texon\t933586\t933710\t.\t+\t.\tID=exon:470299;Parent=Transcript:470299\n+scaffold_1\tJGI_Filtered\tCDS\t933586\t933710\t.\t+\t0\tID=CDS:470299;Parent=Transcript:470299;Name=fgenesh2_kg.1__253__AT1G03190.1\n+scaffold_1\tJGI_Filtered\texon\t933879\t933968\t.\t+\t.\tID=exon:470299;Parent=Transcript:470299\n+scaffold_1\tJGI_Filtered\tCDS\t933879\t933968\t.\t+\t2\tID=CDS:470299;Parent=Transcript:470299;Name=fgenesh2_kg.1__253__AT1G03190.1\n+scaffold_1\tJGI_Filtered\texon\t934075\t934153\t.\t+\t.\tID=exon:470299;Parent=Transcript:470299\n+scaffold_1\tJGI_Filtered\tCDS\t934075\t934153\t.\t+\t2\tID=CDS:470299;Parent=Transcript:470299;Name=fgenesh2_kg.1__253__AT1G03190.1\n+scaffold_1\tJGI_Filtered\texon\t934248\t934324\t.\t+\t.\tID=exon:470299;Parent=Transcript:470299\n+scaffold_1\tJGI_Filtered\tCDS\t934248\t934324\t.\t+\t0\tID=CDS:470299;Parent=Transcript:470299;Name=fgenesh2_kg.1__253__AT1G03190.1\n+scaffold_1\tJGI_Filtered\texon\t934525\t934631\t.\t+\t.\tID=exon:470299;Parent=Transcript:470299\n+scaffold_1\tJGI_Filtered\tCDS\t934525\t934631\t.\t+\t2\tID=CDS:470299;Parent=Transcript:470299;Name=fgenesh2_kg.1__253__AT1G03190.1\n+scaffold_1\tJGI_Filtered\texon\t934730\t934886\t.\t+\t.\tID=exon:470299;Parent=Transcript:470299\n+scaffold_1\tJGI_Filtered\tCDS\t934730\t934851\t.\t+\t1\tID=CDS:470299;Parent=Transcript:470299;Name=fgenesh2_kg.1__253__AT1G03190.1\n+scaffold_1\tJGI_Filtered\texon\t934900\t935010\t.\t+\t.\tID=exon:470299;Parent=Transcript:470299\n+scaffold_1\tJGI_Filtered\tthree_prime_UTR\t934852\t934886\t.\t+\t.\tID=three_prime_UTR:470299;Parent=Transcript:470299\n+scaffold_1\tJGI_Filtered\tthree_prime_UTR\t934900\t935010\t.\t+\t.\tID=three_prime_UTR:470299;Parent=Transcript:470299\n+scaffold_1\tJGI_Filtered\tgene\t938460\t939704\t.\t+\t.\tID=fgenesh1_pm.C_scaffold_1000202;Name=fgenesh1_pm.C_scaffold_1000202\n+scaffold_1\tJGI_Filtered\tmRNA\t938460\t939704\t.\t+\t.\tID=Transcript:311422;Name=Transcript:311422;Parent=fgenesh1_pm.C_scaffold_1000202\n+scaffold_1\tJGI_Filtered\texon\t938460\t938693\t.\t+\t.\tID=exon:311422;Parent=Transcript:311422\n+scaffold_1\tJGI_Filtered\tCDS\t938460\t938693\t.\t+\t0\tID=CDS:311422;Parent=Transcript:311422;Name=fgenesh1_pm.C_scaffold_1000202\n+scaffold_1\tJGI_Filtered\texon\t938841\t939104\t.\t+\t.\tID=exon:311422;Parent=Transcript:311422\n+scaffold_1\tJGI_Filtered\tCDS\t938841\t939104\t.\t+\t0\tID=CDS:311422;Parent=Transcript:311422;Name=fgenesh1_pm.C_scaffold_1000202\n+scaffold_1\tJGI_Filtered\texon\t939200\t939361\t.\t+\t.\tID=exon:311422;Parent=Transcript:311422\n+scaffold_1\tJGI_Filtered\tCDS\t939200\t939361\t.\t+\t0\tID=CDS:311422;Parent=Transcript:311422;Name=fgenesh1_pm.C_scaffold_1000202\n+scaffold_1\tJGI_Filtered\texon\t939394\t939477\t.\t+\t.\tID=exon:311422;Parent=Transcript:311422\n+scaffold_1\tJGI_Filtered\tCDS\t939394\t939477\t.\t+\t0\tID=CDS:311422;Parent=Transcript:311422;Name=fgenesh1_pm.C_scaffold_1000202\n+scaffold_1\tJGI_Filtered\texon\t939597\t939704\t.\t+\t.\tID=exon:311422;Parent=Transcript:311422\n+scaffold_1\tJGI_Filtered\tCDS\t939597\t939704\t.\t+\t0\tID=CDS:311422;Parent=Transcript:311422;Name=fgenesh1_pm.C_scaffold_1000202\n+scaffold_1\tJGI_Filtered\tgene\t940106\t941321\t.\t-\t.\tID=scaffold_100268.1;Name=scaffold_100268.1\n+scaffold_1\tJGI_Filtered\tmRNA\t940106\t941321\t.\t-\t.\tID=Transcript:918987;Name=Transcript:918987;Parent=scaffold_100268.1\n+scaffold_1\tJGI_Filtered\tthree_prime_UTR\t940106\t940125\t.\t-\t.\tID=three_prime_UTR:918987;Parent=Transcript:918987\n+scaffold_1\tJGI_Filtered\texon\t940106\t940528\t.\t-\t.\tID=exon:918987;Parent=Transcript:918987\n+scaffold_1\tJGI_Filtered\tCDS\t940126\t940528\t.\t-\t0\tID=CDS:918987;Parent=Transcript:918987;Name=scaffold_100268.1\n+scaffold_1\tJGI_Filtered\texon\t940628\t940686\t.\t-\t.\tID=exon:918987;Parent=Transcript:918987\n+scaffold_1\tJGI_Filtered\tCDS\t940628\t940686\t.\t-\t1\tID=CDS:918987;Parent=Transcript:918987;Name=scaffold_100268.1\n+scaffold_1\tJGI_Filtered\texon\t941262\t941321\t.\t-\t.\tID=exon:918987;Parent=Transcript:918987\n+scaffold_1\tJGI_Filtered\tCDS\t941262\t941300\t.\t-\t0\tID=CDS:918987;Parent=Transcript:918987;Name=scaffold_100268.1\n+scaffold_1\tJGI_Filtered\tfive_prime_UTR\t941301\t941321\t.\t-\t.\tID=five_prime_UTR:918987;Parent=Transcript:918987\n' |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/ENSEMBL_mm9.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/test-data/ENSEMBL_mm9.gff3 Wed Jun 11 16:29:25 2014 -0400 |
b |
b'@@ -0,0 +1,4424 @@\n+##gff-version 3\n+18\tlincRNA\tgene\t3336414\t3366861\t.\t+\t.\tID=ENSMUSG00000091488;Name=AC124336.2\n+18\tlincRNA\ttranscript\t3336414\t3366861\t.\t+\t.\tID=ENSMUST00000171726;Parent=ENSMUSG00000091488;Name=AC124336.2-201\n+18\tlincRNA\texon\t3336414\t3337176\t.\t+\t.\tParent=ENSMUST00000171726\n+18\tlincRNA\texon\t3365925\t3366861\t.\t+\t.\tParent=ENSMUST00000171726\n+18\tprotein_coding\tgene\t9314042\t9450148\t.\t-\t.\tID=ENSMUSG00000024286;Name=Ccny\n+18\tprotein_coding\tmRNA\t9314042\t9450148\t.\t-\t.\tID=ENSMUST00000053917;Parent=ENSMUSG00000024286;Name=Ccny-201\n+18\tprotein_coding\tfive_prime_UTR\t9449670\t9450148\t.\t-\t.\tParent=ENSMUST00000053917\n+18\tprotein_coding\tCDS\t9316554\t9316670\t.\t-\t0\tParent=ENSMUST00000053917\n+18\tprotein_coding\tCDS\t9319407\t9319569\t.\t-\t1\tParent=ENSMUST00000053917\n+18\tprotein_coding\tCDS\t9332782\t9332948\t.\t-\t0\tParent=ENSMUST00000053917\n+18\tprotein_coding\tCDS\t9345192\t9345311\t.\t-\t0\tParent=ENSMUST00000053917\n+18\tprotein_coding\tCDS\t9345412\t9345469\t.\t-\t1\tParent=ENSMUST00000053917\n+18\tprotein_coding\tCDS\t9349386\t9349421\t.\t-\t1\tParent=ENSMUST00000053917\n+18\tprotein_coding\tCDS\t9353405\t9353505\t.\t-\t0\tParent=ENSMUST00000053917\n+18\tprotein_coding\tCDS\t9377792\t9377826\t.\t-\t2\tParent=ENSMUST00000053917\n+18\tprotein_coding\tCDS\t9386733\t9386807\t.\t-\t2\tParent=ENSMUST00000053917\n+18\tprotein_coding\tCDS\t9449516\t9449669\t.\t-\t0\tParent=ENSMUST00000053917\n+18\tprotein_coding\tthree_prime_UTR\t9314042\t9316553\t.\t-\t.\tParent=ENSMUST00000053917\n+18\tprotein_coding\texon\t9314042\t9316670\t.\t-\t.\tParent=ENSMUST00000053917\n+18\tprotein_coding\texon\t9319407\t9319569\t.\t-\t.\tParent=ENSMUST00000053917\n+18\tprotein_coding\texon\t9332782\t9332948\t.\t-\t.\tParent=ENSMUST00000053917\n+18\tprotein_coding\texon\t9345192\t9345311\t.\t-\t.\tParent=ENSMUST00000053917\n+18\tprotein_coding\texon\t9345412\t9345469\t.\t-\t.\tParent=ENSMUST00000053917\n+18\tprotein_coding\texon\t9349386\t9349421\t.\t-\t.\tParent=ENSMUST00000053917\n+18\tprotein_coding\texon\t9353405\t9353505\t.\t-\t.\tParent=ENSMUST00000053917\n+18\tprotein_coding\texon\t9377792\t9377826\t.\t-\t.\tParent=ENSMUST00000053917\n+18\tprotein_coding\texon\t9386733\t9386807\t.\t-\t.\tParent=ENSMUST00000053917\n+18\tprotein_coding\texon\t9449516\t9450148\t.\t-\t.\tParent=ENSMUST00000053917\n+18\tprotein_coding\tmRNA\t9314042\t9450148\t.\t-\t.\tID=ENSMUST00000115867;Parent=ENSMUSG00000024286;Name=Ccny-202\n+18\tprotein_coding\tfive_prime_UTR\t9449670\t9450148\t.\t-\t.\tParent=ENSMUST00000115867\n+18\tprotein_coding\tCDS\t9316554\t9316670\t.\t-\t0\tParent=ENSMUST00000115867\n+18\tprotein_coding\tCDS\t9319407\t9319569\t.\t-\t1\tParent=ENSMUST00000115867\n+18\tprotein_coding\tCDS\t9332782\t9332948\t.\t-\t0\tParent=ENSMUST00000115867\n+18\tprotein_coding\tCDS\t9345192\t9345311\t.\t-\t0\tParent=ENSMUST00000115867\n+18\tprotein_coding\tCDS\t9345412\t9345469\t.\t-\t1\tParent=ENSMUST00000115867\n+18\tprotein_coding\tCDS\t9349386\t9349421\t.\t-\t1\tParent=ENSMUST00000115867\n+18\tprotein_coding\tCDS\t9353405\t9353505\t.\t-\t0\tParent=ENSMUST00000115867\n+18\tprotein_coding\tCDS\t9377792\t9377826\t.\t-\t2\tParent=ENSMUST00000115867\n+18\tprotein_coding\tCDS\t9449516\t9449669\t.\t-\t0\tParent=ENSMUST00000115867\n+18\tprotein_coding\tthree_prime_UTR\t9314042\t9316553\t.\t-\t.\tParent=ENSMUST00000115867\n+18\tprotein_coding\texon\t9314042\t9316670\t.\t-\t.\tParent=ENSMUST00000115867\n+18\tprotein_coding\texon\t9319407\t9319569\t.\t-\t.\tParent=ENSMUST00000115867\n+18\tprotein_coding\texon\t9332782\t9332948\t.\t-\t.\tParent=ENSMUST00000115867\n+18\tprotein_coding\texon\t9345192\t9345311\t.\t-\t.\tParent=ENSMUST00000115867\n+18\tprotein_coding\texon\t9345412\t9345469\t.\t-\t.\tParent=ENSMUST00000115867\n+18\tprotein_coding\texon\t9349386\t9349421\t.\t-\t.\tParent=ENSMUST00000115867\n+18\tprotein_coding\texon\t9353405\t9353505\t.\t-\t.\tParent=ENSMUST00000115867\n+18\tprotein_coding\texon\t9377792\t9377826\t.\t-\t.\tParent=ENSMUST00000115867\n+18\tprotein_coding\texon\t9449516\t9450148\t.\t-\t.\tParent=ENSMUST00000115867\n+18\tmiRNA\tgene\t10782897\t10782983\t.\t-\t.\tID=ENSMUSG00000065399;Name=Mir133a-1\n+18\tmiRNA\ttranscript\t10782897\t10782983\t.\t-\t.\tID=ENSMUST00000083465;Parent=ENSMUSG00000065399;Name=Mir133a-1-201\n+18\tmiRNA\texon\t10782897\t10782983\t.\t-\t.\tParent=ENSMUST00000083465\n+18\tprotein_coding\tgene\t9726195\t9726668\t.\t-\t.\tID='..b'694\n+NT_166402\tprotein_coding\texon\t36964\t37064\t.\t+\t.\tParent=ENSMUST00000096694\n+NT_166402\tprotein_coding\texon\t37217\t38054\t.\t+\t.\tParent=ENSMUST00000096694\n+NT_166433\tprotein_coding\tgene\t28587\t52512\t.\t+\t.\tID=ENSMUSG00000078423;Name=AC007307.2\n+NT_166433\tprotein_coding\tmRNA\t28587\t52512\t.\t+\t.\tID=ENSMUST00000105217;Parent=ENSMUSG00000078423;Name=AC007307.2-201\n+NT_166433\tprotein_coding\tfive_prime_UTR\t28587\t28657\t.\t+\t.\tParent=ENSMUST00000105217\n+NT_166433\tprotein_coding\tCDS\t28658\t28798\t.\t+\t0\tParent=ENSMUST00000105217\n+NT_166433\tprotein_coding\tCDS\t31129\t31299\t.\t+\t0\tParent=ENSMUST00000105217\n+NT_166433\tprotein_coding\tCDS\t32196\t32249\t.\t+\t0\tParent=ENSMUST00000105217\n+NT_166433\tprotein_coding\tthree_prime_UTR\t32250\t32270\t.\t+\t.\tParent=ENSMUST00000105217\n+NT_166433\tprotein_coding\tthree_prime_UTR\t51920\t52512\t.\t+\t.\tParent=ENSMUST00000105217\n+NT_166433\tprotein_coding\texon\t28587\t28798\t.\t+\t.\tParent=ENSMUST00000105217\n+NT_166433\tprotein_coding\texon\t31129\t31299\t.\t+\t.\tParent=ENSMUST00000105217\n+NT_166433\tprotein_coding\texon\t32196\t32270\t.\t+\t.\tParent=ENSMUST00000105217\n+NT_166433\tprotein_coding\texon\t51920\t52512\t.\t+\t.\tParent=ENSMUST00000105217\n+NT_166433\tprotein_coding\tgene\t47745\t52514\t.\t+\t.\tID=ENSMUSG00000078424;Name=AC007307.3\n+NT_166433\tprotein_coding\tmRNA\t47745\t52514\t.\t+\t.\tID=ENSMUST00000105218;Parent=ENSMUSG00000078424;Name=AC007307.3-201\n+NT_166433\tprotein_coding\tfive_prime_UTR\t47745\t47746\t.\t+\t.\tParent=ENSMUST00000105218\n+NT_166433\tprotein_coding\tCDS\t47747\t47845\t.\t+\t0\tParent=ENSMUST00000105218\n+NT_166433\tprotein_coding\tCDS\t50322\t50492\t.\t+\t0\tParent=ENSMUST00000105218\n+NT_166433\tprotein_coding\tCDS\t51351\t51404\t.\t+\t0\tParent=ENSMUST00000105218\n+NT_166433\tprotein_coding\tthree_prime_UTR\t51405\t51425\t.\t+\t.\tParent=ENSMUST00000105218\n+NT_166433\tprotein_coding\tthree_prime_UTR\t51920\t52514\t.\t+\t.\tParent=ENSMUST00000105218\n+NT_166433\tprotein_coding\texon\t47745\t47845\t.\t+\t.\tParent=ENSMUST00000105218\n+NT_166433\tprotein_coding\texon\t50322\t50492\t.\t+\t.\tParent=ENSMUST00000105218\n+NT_166433\tprotein_coding\texon\t51351\t51425\t.\t+\t.\tParent=ENSMUST00000105218\n+NT_166433\tprotein_coding\texon\t51920\t52514\t.\t+\t.\tParent=ENSMUST00000105218\n+NT_166433\tprotein_coding\tmRNA\t47928\t52514\t.\t+\t.\tID=ENSMUST00000105219;Parent=ENSMUSG00000078424;Name=AC007307.3-202\n+NT_166433\tprotein_coding\tfive_prime_UTR\t47928\t47985\t.\t+\t.\tParent=ENSMUST00000105219\n+NT_166433\tprotein_coding\tCDS\t47986\t48129\t.\t+\t0\tParent=ENSMUST00000105219\n+NT_166433\tprotein_coding\tCDS\t50322\t50492\t.\t+\t0\tParent=ENSMUST00000105219\n+NT_166433\tprotein_coding\tCDS\t51351\t51404\t.\t+\t0\tParent=ENSMUST00000105219\n+NT_166433\tprotein_coding\tthree_prime_UTR\t51405\t51425\t.\t+\t.\tParent=ENSMUST00000105219\n+NT_166433\tprotein_coding\tthree_prime_UTR\t51920\t52514\t.\t+\t.\tParent=ENSMUST00000105219\n+NT_166433\tprotein_coding\texon\t47928\t48129\t.\t+\t.\tParent=ENSMUST00000105219\n+NT_166433\tprotein_coding\texon\t50322\t50492\t.\t+\t.\tParent=ENSMUST00000105219\n+NT_166433\tprotein_coding\texon\t51351\t51425\t.\t+\t.\tParent=ENSMUST00000105219\n+NT_166433\tprotein_coding\texon\t51920\t52514\t.\t+\t.\tParent=ENSMUST00000105219\n+NT_166433\tprotein_coding\tgene\t11955\t18898\t.\t+\t.\tID=ENSMUSG00000000702;Name=AC007307.1\n+NT_166433\tprotein_coding\tmRNA\t11955\t18898\t.\t+\t.\tID=ENSMUST00000105216;Parent=ENSMUSG00000000702;Name=AC007307.1-201\n+NT_166433\tprotein_coding\tfive_prime_UTR\t11955\t12025\t.\t+\t.\tParent=ENSMUST00000105216\n+NT_166433\tprotein_coding\tCDS\t12026\t12166\t.\t+\t0\tParent=ENSMUST00000105216\n+NT_166433\tprotein_coding\tCDS\t16677\t16841\t.\t+\t0\tParent=ENSMUST00000105216\n+NT_166433\tprotein_coding\tCDS\t17745\t17789\t.\t+\t0\tParent=ENSMUST00000105216\n+NT_166433\tprotein_coding\tthree_prime_UTR\t17790\t17814\t.\t+\t.\tParent=ENSMUST00000105216\n+NT_166433\tprotein_coding\tthree_prime_UTR\t18309\t18898\t.\t+\t.\tParent=ENSMUST00000105216\n+NT_166433\tprotein_coding\texon\t11955\t12166\t.\t+\t.\tParent=ENSMUST00000105216\n+NT_166433\tprotein_coding\texon\t16677\t16841\t.\t+\t.\tParent=ENSMUST00000105216\n+NT_166433\tprotein_coding\texon\t17745\t17814\t.\t+\t.\tParent=ENSMUST00000105216\n+NT_166433\tprotein_coding\texon\t18309\t18898\t.\t+\t.\tParent=ENSMUST00000105216\n' |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/ENSEMBL_mm9.gtf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/test-data/ENSEMBL_mm9.gtf Wed Jun 11 16:29:25 2014 -0400 |
b |
b'@@ -0,0 +1,3974 @@\n+NT_166433\tprotein_coding\texon\t11955\t12166\t.\t+\t.\t gene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "1"; gene_name "AC007307.1"; transcript_name "AC007307.1-201";\n+NT_166433\tprotein_coding\tCDS\t12026\t12166\t.\t+\t0\t gene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "1"; gene_name "AC007307.1"; transcript_name "AC007307.1-201"; protein_id "ENSMUSP00000100851";\n+NT_166433\tprotein_coding\tstart_codon\t12026\t12028\t.\t+\t0\t gene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "1"; gene_name "AC007307.1"; transcript_name "AC007307.1-201";\n+NT_166433\tprotein_coding\texon\t16677\t16841\t.\t+\t.\t gene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "2"; gene_name "AC007307.1"; transcript_name "AC007307.1-201";\n+NT_166433\tprotein_coding\tCDS\t16677\t16841\t.\t+\t0\t gene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "2"; gene_name "AC007307.1"; transcript_name "AC007307.1-201"; protein_id "ENSMUSP00000100851";\n+NT_166433\tprotein_coding\texon\t17745\t17814\t.\t+\t.\t gene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "3"; gene_name "AC007307.1"; transcript_name "AC007307.1-201";\n+NT_166433\tprotein_coding\tCDS\t17745\t17786\t.\t+\t0\t gene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "3"; gene_name "AC007307.1"; transcript_name "AC007307.1-201"; protein_id "ENSMUSP00000100851";\n+NT_166433\tprotein_coding\tstop_codon\t17787\t17789\t.\t+\t0\t gene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "3"; gene_name "AC007307.1"; transcript_name "AC007307.1-201";\n+NT_166433\tprotein_coding\texon\t18309\t18898\t.\t+\t.\t gene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "4"; gene_name "AC007307.1"; transcript_name "AC007307.1-201";\n+NT_166433\tprotein_coding\texon\t28587\t28798\t.\t+\t.\t gene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "1"; gene_name "AC007307.2"; transcript_name "AC007307.2-201";\n+NT_166433\tprotein_coding\tCDS\t28658\t28798\t.\t+\t0\t gene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "1"; gene_name "AC007307.2"; transcript_name "AC007307.2-201"; protein_id "ENSMUSP00000100852";\n+NT_166433\tprotein_coding\tstart_codon\t28658\t28660\t.\t+\t0\t gene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "1"; gene_name "AC007307.2"; transcript_name "AC007307.2-201";\n+NT_166433\tprotein_coding\texon\t31129\t31299\t.\t+\t.\t gene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "2"; gene_name "AC007307.2"; transcript_name "AC007307.2-201";\n+NT_166433\tprotein_coding\tCDS\t31129\t31299\t.\t+\t0\t gene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "2"; gene_name "AC007307.2"; transcript_name "AC007307.2-201"; protein_id "ENSMUSP00000100852";\n+NT_166433\tprotein_coding\texon\t32196\t32270\t.\t+\t.\t gene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "3"; gene_name "AC007307.2"; transcript_name "AC007307.2-201";\n+NT_166433\tprotein_coding\tCDS\t32196\t32246\t.\t+\t0\t gene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "3"; gene_name "AC007307.2"; transcript_name "AC007307.2-201"; protein_id "ENSMUSP00000100852";\n+NT_166433\tprotein_coding\tstop_codon\t32247\t32249\t.\t+\t0\t gene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "3"; gene_name "AC007307.2"; transcript_name "AC007307.2-201";\n+NT_166433\tprotein_coding\texon\t51920\t52512\t.\t+\t.\t gene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "4"; gene_name "AC007307.2"; transcript_name "AC007307.2-201";\n+NT_166433\tprotein_coding\texon\t47745\t47845\t.\t+\t.\t gene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105218"; exon_number "1"; gene_name "AC007307.3"; transcript_name "AC007307.3-201";\n+NT_166433\tprotein_coding\tCDS\t47747\t47845\t.\t+\t0\t gene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105218"; exon_number "1"; g'..b'32"; gene_name "Lama3"; transcript_name "Lama3-202";\n+18\tprotein_coding\tCDS\t12726175\t12726341\t.\t+\t0\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "32"; gene_name "Lama3"; transcript_name "Lama3-202"; protein_id "ENSMUSP00000111524";\n+18\tprotein_coding\texon\t12730409\t12730592\t.\t+\t.\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "33"; gene_name "Lama3"; transcript_name "Lama3-202";\n+18\tprotein_coding\tCDS\t12730409\t12730592\t.\t+\t1\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "33"; gene_name "Lama3"; transcript_name "Lama3-202"; protein_id "ENSMUSP00000111524";\n+18\tprotein_coding\texon\t12732723\t12732863\t.\t+\t.\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "34"; gene_name "Lama3"; transcript_name "Lama3-202";\n+18\tprotein_coding\tCDS\t12732723\t12732863\t.\t+\t0\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "34"; gene_name "Lama3"; transcript_name "Lama3-202"; protein_id "ENSMUSP00000111524";\n+18\tprotein_coding\texon\t12736261\t12736420\t.\t+\t.\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "35"; gene_name "Lama3"; transcript_name "Lama3-202";\n+18\tprotein_coding\tCDS\t12736261\t12736420\t.\t+\t0\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "35"; gene_name "Lama3"; transcript_name "Lama3-202"; protein_id "ENSMUSP00000111524";\n+18\tprotein_coding\texon\t12736523\t12736653\t.\t+\t.\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "36"; gene_name "Lama3"; transcript_name "Lama3-202";\n+18\tprotein_coding\tCDS\t12736523\t12736653\t.\t+\t2\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "36"; gene_name "Lama3"; transcript_name "Lama3-202"; protein_id "ENSMUSP00000111524";\n+18\tprotein_coding\texon\t12738795\t12738888\t.\t+\t.\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "37"; gene_name "Lama3"; transcript_name "Lama3-202";\n+18\tprotein_coding\tCDS\t12738795\t12738888\t.\t+\t0\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "37"; gene_name "Lama3"; transcript_name "Lama3-202"; protein_id "ENSMUSP00000111524";\n+18\tprotein_coding\texon\t12740202\t12740321\t.\t+\t.\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "38"; gene_name "Lama3"; transcript_name "Lama3-202";\n+18\tprotein_coding\tCDS\t12740202\t12740321\t.\t+\t2\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "38"; gene_name "Lama3"; transcript_name "Lama3-202"; protein_id "ENSMUSP00000111524";\n+18\tprotein_coding\texon\t12741027\t12741522\t.\t+\t.\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "39"; gene_name "Lama3"; transcript_name "Lama3-202";\n+18\tprotein_coding\tCDS\t12741027\t12741169\t.\t+\t2\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "39"; gene_name "Lama3"; transcript_name "Lama3-202"; protein_id "ENSMUSP00000111524";\n+18\tprotein_coding\tstop_codon\t12741170\t12741172\t.\t+\t0\t gene_id "ENSMUSG00000024421"; transcript_id "ENSMUST00000115858"; exon_number "39"; gene_name "Lama3"; transcript_name "Lama3-202";\n+18\tprotein_coding\texon\t12657194\t12657637\t.\t-\t.\t gene_id "ENSMUSG00000090309"; transcript_id "ENSMUST00000172267"; exon_number "1"; gene_name "AC102131.1"; transcript_name "AC102131.1-201";\n+18\tprotein_coding\tCDS\t12657197\t12657637\t.\t-\t0\t gene_id "ENSMUSG00000090309"; transcript_id "ENSMUST00000172267"; exon_number "1"; gene_name "AC102131.1"; transcript_name "AC102131.1-201"; protein_id "ENSMUSP00000129942";\n+18\tprotein_coding\tstop_codon\t12657194\t12657196\t.\t-\t0\t gene_id "ENSMUSG00000090309"; transcript_id "ENSMUST00000172267"; exon_number "1"; gene_name "AC102131.1"; transcript_name "AC102131.1-201";\n+18\trRNA\texon\t12736933\t12737046\t.\t-\t.\t gene_id "ENSMUSG00000088342"; transcript_id "ENSMUST00000157717"; exon_number "1"; gene_name "5S_rRNA.42"; transcript_name "5S_rRNA.42-201";\n' |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/ENSEMBL_mm9_gff3_to_gtf.gtf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/test-data/ENSEMBL_mm9_gff3_to_gtf.gtf Wed Jun 11 16:29:25 2014 -0400 |
b |
b'@@ -0,0 +1,3709 @@\n+##gff-version 2.5\n+18\ttranscript\texon\t3336414\t3337176\t.\t+\t0\tgene_id "ENSMUSG00000091488"; transcript_id "ENSMUST00000171726"; exon_number "1";\n+18\ttranscript\texon\t3365925\t3366861\t.\t+\t0\tgene_id "ENSMUSG00000091488"; transcript_id "ENSMUST00000171726"; exon_number "2";\n+18\tprotein_coding\tCDS\t9316554\t9316670\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "1"; protein_id "ENSMUST00000053917";\n+18\tprotein_coding\tCDS\t9319407\t9319569\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "2"; protein_id "ENSMUST00000053917";\n+18\tprotein_coding\tCDS\t9332782\t9332948\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "3"; protein_id "ENSMUST00000053917";\n+18\tprotein_coding\tCDS\t9345192\t9345311\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "4"; protein_id "ENSMUST00000053917";\n+18\tprotein_coding\tCDS\t9345412\t9345469\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "5"; protein_id "ENSMUST00000053917";\n+18\tprotein_coding\tCDS\t9349386\t9349421\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "6"; protein_id "ENSMUST00000053917";\n+18\tprotein_coding\tCDS\t9353405\t9353505\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "7"; protein_id "ENSMUST00000053917";\n+18\tprotein_coding\tCDS\t9377792\t9377826\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "8"; protein_id "ENSMUST00000053917";\n+18\tprotein_coding\tCDS\t9386733\t9386807\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "9"; protein_id "ENSMUST00000053917";\n+18\tprotein_coding\tCDS\t9449516\t9449669\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "10"; protein_id "ENSMUST00000053917";\n+18\tprotein_coding\texon\t9314042\t9316670\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "1";\n+18\tprotein_coding\texon\t9319407\t9319569\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "2";\n+18\tprotein_coding\texon\t9332782\t9332948\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "3";\n+18\tprotein_coding\texon\t9345192\t9345311\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "4";\n+18\tprotein_coding\texon\t9345412\t9345469\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "5";\n+18\tprotein_coding\texon\t9349386\t9349421\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "6";\n+18\tprotein_coding\texon\t9353405\t9353505\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "7";\n+18\tprotein_coding\texon\t9377792\t9377826\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "8";\n+18\tprotein_coding\texon\t9386733\t9386807\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "9";\n+18\tprotein_coding\texon\t9449516\t9450148\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000053917"; exon_number "10";\n+18\tprotein_coding\tCDS\t9316554\t9316670\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000115867"; exon_number "1"; protein_id "ENSMUST00000115867";\n+18\tprotein_coding\tCDS\t9319407\t9319569\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000115867"; exon_number "2"; protein_id "ENSMUST00000115867";\n+18\tprotein_coding\tCDS\t9332782\t9332948\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000115867"; exon_number "3"; protein_id "ENSMUST00000115867";\n+18\tprotein_coding\tCDS\t9345192\t9345311\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000115867"; exon_number "4"; protein_id "ENSMUST00000115867";\n+18\tprotein_coding\tCDS\t9345412\t9345469\t.\t-\t0\tgene_id "ENSMUSG00000024286"; transcript_id "ENSMUST00000115867"; exon_number "5"; protein_id "ENSMUST00000115867";\n+18\t'..b'nscript_id "ENSMUST00000105217"; exon_number "1"; protein_id "ENSMUST00000105217";\n+NT_166433\tprotein_coding\tCDS\t31129\t31299\t.\t+\t0\tgene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "2"; protein_id "ENSMUST00000105217";\n+NT_166433\tprotein_coding\tCDS\t32196\t32249\t.\t+\t0\tgene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "3"; protein_id "ENSMUST00000105217";\n+NT_166433\tprotein_coding\texon\t28587\t28798\t.\t+\t0\tgene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "1";\n+NT_166433\tprotein_coding\texon\t31129\t31299\t.\t+\t0\tgene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "2";\n+NT_166433\tprotein_coding\texon\t32196\t32270\t.\t+\t0\tgene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "3";\n+NT_166433\tprotein_coding\texon\t51920\t52512\t.\t+\t0\tgene_id "ENSMUSG00000078423"; transcript_id "ENSMUST00000105217"; exon_number "4";\n+NT_166433\tprotein_coding\tCDS\t47747\t47845\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105218"; exon_number "1"; protein_id "ENSMUST00000105218";\n+NT_166433\tprotein_coding\tCDS\t50322\t50492\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105218"; exon_number "2"; protein_id "ENSMUST00000105218";\n+NT_166433\tprotein_coding\tCDS\t51351\t51404\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105218"; exon_number "3"; protein_id "ENSMUST00000105218";\n+NT_166433\tprotein_coding\texon\t47745\t47845\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105218"; exon_number "1";\n+NT_166433\tprotein_coding\texon\t50322\t50492\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105218"; exon_number "2";\n+NT_166433\tprotein_coding\texon\t51351\t51425\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105218"; exon_number "3";\n+NT_166433\tprotein_coding\texon\t51920\t52514\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105218"; exon_number "4";\n+NT_166433\tprotein_coding\tCDS\t47986\t48129\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105219"; exon_number "1"; protein_id "ENSMUST00000105219";\n+NT_166433\tprotein_coding\tCDS\t50322\t50492\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105219"; exon_number "2"; protein_id "ENSMUST00000105219";\n+NT_166433\tprotein_coding\tCDS\t51351\t51404\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105219"; exon_number "3"; protein_id "ENSMUST00000105219";\n+NT_166433\tprotein_coding\texon\t47928\t48129\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105219"; exon_number "1";\n+NT_166433\tprotein_coding\texon\t50322\t50492\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105219"; exon_number "2";\n+NT_166433\tprotein_coding\texon\t51351\t51425\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105219"; exon_number "3";\n+NT_166433\tprotein_coding\texon\t51920\t52514\t.\t+\t0\tgene_id "ENSMUSG00000078424"; transcript_id "ENSMUST00000105219"; exon_number "4";\n+NT_166433\tprotein_coding\tCDS\t12026\t12166\t.\t+\t0\tgene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "1"; protein_id "ENSMUST00000105216";\n+NT_166433\tprotein_coding\tCDS\t16677\t16841\t.\t+\t0\tgene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "2"; protein_id "ENSMUST00000105216";\n+NT_166433\tprotein_coding\tCDS\t17745\t17789\t.\t+\t0\tgene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "3"; protein_id "ENSMUST00000105216";\n+NT_166433\tprotein_coding\texon\t11955\t12166\t.\t+\t0\tgene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "1";\n+NT_166433\tprotein_coding\texon\t16677\t16841\t.\t+\t0\tgene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "2";\n+NT_166433\tprotein_coding\texon\t17745\t17814\t.\t+\t0\tgene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "3";\n+NT_166433\tprotein_coding\texon\t18309\t18898\t.\t+\t0\tgene_id "ENSMUSG00000000702"; transcript_id "ENSMUST00000105216"; exon_number "4";\n' |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/JGI_genes.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/test-data/JGI_genes.gff3 Wed Jun 11 16:29:25 2014 -0400 |
b |
b'@@ -0,0 +1,4626 @@\n+##gff-version 3\n+scaffold_1\tJGI\tgene\t1237411\t1237623\t.\t+\t.\tID=e_gw2.1.2098.1;Name=e_gw2.1.2098.1\n+scaffold_1\tJGI\tmRNA\t1237411\t1237623\t.\t+\t.\tID=1027156;Parent=e_gw2.1.2098.1\n+scaffold_1\tJGI\tCDS\t1237411\t1237456\t.\t+\t0\tParent=1027156\n+scaffold_1\tJGI\tCDS\t1237502\t1237623\t.\t+\t2\tParent=1027156\n+scaffold_1\tJGI\texon\t1237411\t1237456\t.\t+\t.\tParent=1027156\n+scaffold_1\tJGI\texon\t1237502\t1237623\t.\t+\t.\tParent=1027156\n+scaffold_1\tJGI\tgene\t5902548\t5912971\t.\t+\t.\tID=estExt_Genewise2Plus.C_10864;Name=estExt_Genewise2Plus.C_10864\n+scaffold_1\tJGI\tmRNA\t5902548\t5912971\t.\t+\t.\tID=1045793;Parent=estExt_Genewise2Plus.C_10864\n+scaffold_1\tJGI\tCDS\t5902548\t5902888\t.\t+\t0\tParent=1045793\n+scaffold_1\tJGI\tCDS\t5902966\t5909048\t.\t+\t1\tParent=1045793\n+scaffold_1\tJGI\tCDS\t5909123\t5910952\t.\t+\t2\tParent=1045793\n+scaffold_1\tJGI\tCDS\t5911034\t5911827\t.\t+\t2\tParent=1045793\n+scaffold_1\tJGI\tCDS\t5911903\t5912205\t.\t+\t0\tParent=1045793\n+scaffold_1\tJGI\tCDS\t5912269\t5912760\t.\t+\t0\tParent=1045793\n+scaffold_1\tJGI\tthree_prime_UTR\t5912761\t5912971\t.\t+\t.\tParent=1045793\n+scaffold_1\tJGI\texon\t5902548\t5902888\t.\t+\t.\tParent=1045793\n+scaffold_1\tJGI\texon\t5902966\t5909048\t.\t+\t.\tParent=1045793\n+scaffold_1\tJGI\texon\t5909123\t5910952\t.\t+\t.\tParent=1045793\n+scaffold_1\tJGI\texon\t5911034\t5911827\t.\t+\t.\tParent=1045793\n+scaffold_1\tJGI\texon\t5911903\t5912205\t.\t+\t.\tParent=1045793\n+scaffold_1\tJGI\texon\t5912269\t5912971\t.\t+\t.\tParent=1045793\n+scaffold_1\tJGI\tgene\t5701930\t5702697\t.\t-\t.\tID=fgenesh2_pg.1_#_81;Name=fgenesh2_pg.1_#_81\n+scaffold_1\tJGI\tmRNA\t5701930\t5702697\t.\t-\t.\tID=1066497;Parent=fgenesh2_pg.1_#_81\n+scaffold_1\tJGI\tCDS\t5701930\t5702605\t.\t-\t1\tParent=1066497\n+scaffold_1\tJGI\tCDS\t5702660\t5702697\t.\t-\t0\tParent=1066497\n+scaffold_1\tJGI\texon\t5701930\t5702605\t.\t-\t.\tParent=1066497\n+scaffold_1\tJGI\texon\t5702660\t5702697\t.\t-\t.\tParent=1066497\n+scaffold_1\tJGI\tgene\t6192379\t6193551\t.\t-\t.\tID=estExt_Genewise2Plus.C_10944;Name=estExt_Genewise2Plus.C_10944\n+scaffold_1\tJGI\tmRNA\t6192379\t6193551\t.\t-\t.\tID=1045858;Parent=estExt_Genewise2Plus.C_10944\n+scaffold_1\tJGI\tfive_prime_UTR\t6193484\t6193551\t.\t-\t.\tParent=1045858\n+scaffold_1\tJGI\tCDS\t6192379\t6192999\t.\t-\t0\tParent=1045858\n+scaffold_1\tJGI\tCDS\t6193076\t6193367\t.\t-\t1\tParent=1045858\n+scaffold_1\tJGI\tCDS\t6193440\t6193483\t.\t-\t0\tParent=1045858\n+scaffold_1\tJGI\texon\t6192379\t6192999\t.\t-\t.\tParent=1045858\n+scaffold_1\tJGI\texon\t6193076\t6193367\t.\t-\t.\tParent=1045858\n+scaffold_1\tJGI\texon\t6193440\t6193551\t.\t-\t.\tParent=1045858\n+scaffold_1\tJGI\tgene\t5709177\t5710596\t.\t-\t.\tID=fgenesh2_pg.1_#_84;Name=fgenesh2_pg.1_#_84\n+scaffold_1\tJGI\tmRNA\t5709177\t5710596\t.\t-\t.\tID=1066500;Parent=fgenesh2_pg.1_#_84\n+scaffold_1\tJGI\tCDS\t5709177\t5709216\t.\t-\t1\tParent=1066500\n+scaffold_1\tJGI\tCDS\t5709320\t5710164\t.\t-\t0\tParent=1066500\n+scaffold_1\tJGI\tCDS\t5710228\t5710596\t.\t-\t0\tParent=1066500\n+scaffold_1\tJGI\texon\t5709177\t5709216\t.\t-\t.\tParent=1066500\n+scaffold_1\tJGI\texon\t5709320\t5710164\t.\t-\t.\tParent=1066500\n+scaffold_1\tJGI\texon\t5710228\t5710596\t.\t-\t.\tParent=1066500\n+scaffold_1\tJGI\tgene\t3582929\t3583102\t.\t+\t.\tID=e_gw2.1.2720.1;Name=e_gw2.1.2720.1\n+scaffold_1\tJGI\tmRNA\t3582929\t3583102\t.\t+\t.\tID=1026247;Parent=e_gw2.1.2720.1\n+scaffold_1\tJGI\tCDS\t3582929\t3583102\t.\t+\t0\tParent=1026247\n+scaffold_1\tJGI\texon\t3582929\t3583102\t.\t+\t.\tParent=1026247\n+scaffold_1\tJGI\tgene\t5061339\t5072066\t.\t-\t.\tID=gm1.335_g;Name=gm1.335_g\n+scaffold_1\tJGI\tmRNA\t5061339\t5072066\t.\t-\t.\tID=204986;Parent=gm1.335_g\n+scaffold_1\tJGI\tCDS\t5061339\t5061410\t.\t-\t0\tParent=204986\n+scaffold_1\tJGI\tCDS\t5070743\t5070801\t.\t-\t2\tParent=204986\n+scaffold_1\tJGI\tCDS\t5071914\t5071951\t.\t-\t1\tParent=204986\n+scaffold_1\tJGI\tCDS\t5072047\t5072066\t.\t-\t0\tParent=204986\n+scaffold_1\tJGI\texon\t5061339\t5061410\t.\t-\t.\tParent=204986\n+scaffold_1\tJGI\texon\t5070743\t5070801\t.\t-\t.\tParent=204986\n+scaffold_1\tJGI\texon\t5071914\t5071951\t.\t-\t.\tParent=204986\n+scaffold_1\tJGI\texon\t5072047\t5072066\t.\t-\t.\tParent=204986\n+scaffold_1\tJGI\tgene\t6220485\t6222489\t.\t+\t.\tID=gm1.515_g;Name=gm1.515_g\n+scaffold_1\tJGI\tmRNA\t6220485\t6222489\t.\t+\t.\tID=205166;Parent=gm1.515_g\n+scaffold_1\tJGI\tCDS\t6220485\t6220508\t.\t+\t0\tParent=205166\n+scaffold_1\tJGI\tCDS\t6220551\t622'..b'Genemark2.C_10474\n+scaffold_1\tJGI\tmRNA\t6068472\t6069874\t.\t-\t.\tID=1090684;Parent=estExt_Genemark2.C_10474\n+scaffold_1\tJGI\tCDS\t6068473\t6068941\t.\t-\t1\tParent=1090684\n+scaffold_1\tJGI\tCDS\t6068976\t6069014\t.\t-\t1\tParent=1090684\n+scaffold_1\tJGI\tCDS\t6069080\t6069105\t.\t-\t0\tParent=1090684\n+scaffold_1\tJGI\tCDS\t6069143\t6069420\t.\t-\t2\tParent=1090684\n+scaffold_1\tJGI\tCDS\t6069507\t6069611\t.\t-\t2\tParent=1090684\n+scaffold_1\tJGI\tCDS\t6069662\t6069669\t.\t-\t1\tParent=1090684\n+scaffold_1\tJGI\tCDS\t6069778\t6069797\t.\t-\t0\tParent=1090684\n+scaffold_1\tJGI\tCDS\t6069836\t6069874\t.\t-\t0\tParent=1090684\n+scaffold_1\tJGI\tthree_prime_UTR\t6068472\t6068472\t.\t-\t.\tParent=1090684\n+scaffold_1\tJGI\texon\t6068472\t6068941\t.\t-\t.\tParent=1090684\n+scaffold_1\tJGI\texon\t6068976\t6069014\t.\t-\t.\tParent=1090684\n+scaffold_1\tJGI\texon\t6069080\t6069105\t.\t-\t.\tParent=1090684\n+scaffold_1\tJGI\texon\t6069143\t6069420\t.\t-\t.\tParent=1090684\n+scaffold_1\tJGI\texon\t6069507\t6069611\t.\t-\t.\tParent=1090684\n+scaffold_1\tJGI\texon\t6069662\t6069669\t.\t-\t.\tParent=1090684\n+scaffold_1\tJGI\texon\t6069778\t6069797\t.\t-\t.\tParent=1090684\n+scaffold_1\tJGI\texon\t6069836\t6069874\t.\t-\t.\tParent=1090684\n+scaffold_1\tJGI\tgene\t6222830\t6223956\t.\t-\t.\tID=fgenesh2_kg.1_#_199_#_Contig10010;Name=fgenesh2_kg.1_#_199_#_Contig10010\n+scaffold_1\tJGI\tmRNA\t6222830\t6223956\t.\t-\t.\tID=1059305;Parent=fgenesh2_kg.1_#_199_#_Contig10010\n+scaffold_1\tJGI\tfive_prime_UTR\t6223850\t6223956\t.\t-\t.\tParent=1059305\n+scaffold_1\tJGI\tCDS\t6223123\t6223377\t.\t-\t0\tParent=1059305\n+scaffold_1\tJGI\tCDS\t6223466\t6223513\t.\t-\t0\tParent=1059305\n+scaffold_1\tJGI\tCDS\t6223599\t6223636\t.\t-\t2\tParent=1059305\n+scaffold_1\tJGI\tCDS\t6223717\t6223849\t.\t-\t0\tParent=1059305\n+scaffold_1\tJGI\tthree_prime_UTR\t6222830\t6223122\t.\t-\t.\tParent=1059305\n+scaffold_1\tJGI\texon\t6222830\t6223377\t.\t-\t.\tParent=1059305\n+scaffold_1\tJGI\texon\t6223466\t6223513\t.\t-\t.\tParent=1059305\n+scaffold_1\tJGI\texon\t6223599\t6223636\t.\t-\t.\tParent=1059305\n+scaffold_1\tJGI\texon\t6223717\t6223956\t.\t-\t.\tParent=1059305\n+scaffold_1\tJGI\tgene\t6194330\t6196070\t.\t+\t.\tID=fgenesh2_kg.1_#_188_#_Contig951;Name=fgenesh2_kg.1_#_188_#_Contig951\n+scaffold_1\tJGI\tmRNA\t6194330\t6196070\t.\t+\t.\tID=1059294;Parent=fgenesh2_kg.1_#_188_#_Contig951\n+scaffold_1\tJGI\tfive_prime_UTR\t6194330\t6194523\t.\t+\t.\tParent=1059294\n+scaffold_1\tJGI\tCDS\t6194524\t6194757\t.\t+\t0\tParent=1059294\n+scaffold_1\tJGI\tCDS\t6194844\t6195164\t.\t+\t0\tParent=1059294\n+scaffold_1\tJGI\tthree_prime_UTR\t6195165\t6195696\t.\t+\t.\tParent=1059294\n+scaffold_1\tJGI\tthree_prime_UTR\t6195774\t6196070\t.\t+\t.\tParent=1059294\n+scaffold_1\tJGI\texon\t6194330\t6194757\t.\t+\t.\tParent=1059294\n+scaffold_1\tJGI\texon\t6194844\t6195696\t.\t+\t.\tParent=1059294\n+scaffold_1\tJGI\texon\t6195774\t6196070\t.\t+\t.\tParent=1059294\n+scaffold_1\tJGI\tgene\t4208772\t4210540\t.\t-\t.\tID=gm1.273_g;Name=gm1.273_g\n+scaffold_1\tJGI\tmRNA\t4208772\t4210540\t.\t-\t.\tID=204924;Parent=gm1.273_g\n+scaffold_1\tJGI\tCDS\t4208772\t4208899\t.\t-\t2\tParent=204924\n+scaffold_1\tJGI\tCDS\t4208964\t4209686\t.\t-\t2\tParent=204924\n+scaffold_1\tJGI\tCDS\t4209714\t4209734\t.\t-\t2\tParent=204924\n+scaffold_1\tJGI\tCDS\t4209782\t4210451\t.\t-\t0\tParent=204924\n+scaffold_1\tJGI\tCDS\t4210511\t4210540\t.\t-\t0\tParent=204924\n+scaffold_1\tJGI\texon\t4208772\t4208899\t.\t-\t.\tParent=204924\n+scaffold_1\tJGI\texon\t4208964\t4209686\t.\t-\t.\tParent=204924\n+scaffold_1\tJGI\texon\t4209714\t4209734\t.\t-\t.\tParent=204924\n+scaffold_1\tJGI\texon\t4209782\t4210451\t.\t-\t.\tParent=204924\n+scaffold_1\tJGI\texon\t4210511\t4210540\t.\t-\t.\tParent=204924\n+scaffold_1\tJGI\tgene\t5585408\t5590273\t.\t+\t.\tID=estExt_fgenesh2_pm.C_10049;Name=estExt_fgenesh2_pm.C_10049\n+scaffold_1\tJGI\tmRNA\t5585408\t5590273\t.\t+\t.\tID=1073299;Parent=estExt_fgenesh2_pm.C_10049\n+scaffold_1\tJGI\tfive_prime_UTR\t5585408\t5585504\t.\t+\t.\tParent=1073299\n+scaffold_1\tJGI\tCDS\t5585505\t5585741\t.\t+\t0\tParent=1073299\n+scaffold_1\tJGI\tCDS\t5585815\t5586858\t.\t+\t0\tParent=1073299\n+scaffold_1\tJGI\tthree_prime_UTR\t5586859\t5587647\t.\t+\t.\tParent=1073299\n+scaffold_1\tJGI\tthree_prime_UTR\t5587746\t5590273\t.\t+\t.\tParent=1073299\n+scaffold_1\tJGI\texon\t5585408\t5585741\t.\t+\t.\tParent=1073299\n+scaffold_1\tJGI\texon\t5585815\t5587647\t.\t+\t.\tParent=1073299\n+scaffold_1\tJGI\texon\t5587746\t5590273\t.\t+\t.\tParent=1073299\n' |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/JGI_genes.gtf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/test-data/JGI_genes.gtf Wed Jun 11 16:29:25 2014 -0400 |
b |
b'@@ -0,0 +1,3994 @@\n+scaffold_1\tJGI\texon\t7\t12\t.\t+\t.\tname "fgenesh2_pg.1_#_1"; transcriptId 1066417\n+scaffold_1\tJGI\tCDS\t7\t12\t.\t+\t0\tname "fgenesh2_pg.1_#_1"; proteinId 1066417; exonNumber 1\n+scaffold_1\tJGI\tstart_codon\t7\t9\t.\t+\t0\tname "fgenesh2_pg.1_#_1"\n+scaffold_1\tJGI\texon\t363\t902\t.\t+\t.\tname "fgenesh2_pg.1_#_1"; transcriptId 1066417\n+scaffold_1\tJGI\tCDS\t363\t902\t.\t+\t0\tname "fgenesh2_pg.1_#_1"; proteinId 1066417; exonNumber 2\n+scaffold_1\tJGI\texon\t954\t1160\t.\t+\t.\tname "fgenesh2_pg.1_#_1"; transcriptId 1066417\n+scaffold_1\tJGI\tCDS\t954\t1160\t.\t+\t0\tname "fgenesh2_pg.1_#_1"; proteinId 1066417; exonNumber 3\n+scaffold_1\tJGI\tstop_codon\t1158\t1160\t.\t+\t0\tname "fgenesh2_pg.1_#_1"\n+scaffold_1\tJGI\texon\t17310\t18075\t.\t-\t.\tname "estExt_Genewise2Plus.C_10002"; transcriptId 1045566\n+scaffold_1\tJGI\tCDS\t17597\t18075\t.\t-\t2\tname "estExt_Genewise2Plus.C_10002"; proteinId 1045566; exonNumber 3\n+scaffold_1\tJGI\tstop_codon\t17597\t17599\t.\t-\t0\tname "estExt_Genewise2Plus.C_10002"\n+scaffold_1\tJGI\texon\t18135\t18268\t.\t-\t.\tname "estExt_Genewise2Plus.C_10002"; transcriptId 1045566\n+scaffold_1\tJGI\tCDS\t18135\t18268\t.\t-\t1\tname "estExt_Genewise2Plus.C_10002"; proteinId 1045566; exonNumber 2\n+scaffold_1\tJGI\texon\t18353\t19188\t.\t-\t.\tname "estExt_Genewise2Plus.C_10002"; transcriptId 1045566\n+scaffold_1\tJGI\tCDS\t18353\t19188\t.\t-\t0\tname "estExt_Genewise2Plus.C_10002"; proteinId 1045566; exonNumber 1\n+scaffold_1\tJGI\tstart_codon\t19186\t19188\t.\t-\t0\tname "estExt_Genewise2Plus.C_10002"\n+scaffold_1\tJGI\texon\t29168\t29186\t.\t-\t.\tname "estExt_Genemark2.C_10004"; transcriptId 1090574\n+scaffold_1\tJGI\texon\t31979\t32005\t.\t-\t.\tname "estExt_Genemark2.C_10004"; transcriptId 1090574\n+scaffold_1\tJGI\texon\t32085\t32211\t.\t-\t.\tname "estExt_Genemark2.C_10004"; transcriptId 1090574\n+scaffold_1\tJGI\tCDS\t32170\t32211\t.\t-\t0\tname "estExt_Genemark2.C_10004"; proteinId 1090574; exonNumber 3\n+scaffold_1\tJGI\tstop_codon\t32170\t32172\t.\t-\t0\tname "estExt_Genemark2.C_10004"\n+scaffold_1\tJGI\texon\t32249\t32298\t.\t-\t.\tname "estExt_Genemark2.C_10004"; transcriptId 1090574\n+scaffold_1\tJGI\tCDS\t32249\t32298\t.\t-\t2\tname "estExt_Genemark2.C_10004"; proteinId 1090574; exonNumber 2\n+scaffold_1\tJGI\texon\t39594\t39912\t.\t-\t.\tname "estExt_Genemark2.C_10004"; transcriptId 1090574\n+scaffold_1\tJGI\tCDS\t39594\t39912\t.\t-\t0\tname "estExt_Genemark2.C_10004"; proteinId 1090574; exonNumber 1\n+scaffold_1\tJGI\texon\t48024\t49977\t.\t-\t.\tname "fgenesh2_kg.1_#_3_#_Contig2893"; transcriptId 1059109\n+scaffold_1\tJGI\tCDS\t48459\t49977\t.\t-\t1\tname "fgenesh2_kg.1_#_3_#_Contig2893"; proteinId 1059109; exonNumber 4\n+scaffold_1\tJGI\tstop_codon\t48459\t48461\t.\t-\t0\tname "fgenesh2_kg.1_#_3_#_Contig2893"\n+scaffold_1\tJGI\texon\t50732\t50830\t.\t-\t.\tname "fgenesh2_kg.1_#_3_#_Contig2893"; transcriptId 1059109\n+scaffold_1\tJGI\tCDS\t50732\t50830\t.\t-\t1\tname "fgenesh2_kg.1_#_3_#_Contig2893"; proteinId 1059109; exonNumber 3\n+scaffold_1\tJGI\texon\t50924\t51099\t.\t-\t.\tname "fgenesh2_kg.1_#_3_#_Contig2893"; transcriptId 1059109\n+scaffold_1\tJGI\tCDS\t50924\t51099\t.\t-\t0\tname "fgenesh2_kg.1_#_3_#_Contig2893"; proteinId 1059109; exonNumber 2\n+scaffold_1\tJGI\texon\t51227\t51548\t.\t-\t.\tname "fgenesh2_kg.1_#_3_#_Contig2893"; transcriptId 1059109\n+scaffold_1\tJGI\tCDS\t51227\t51403\t.\t-\t0\tname "fgenesh2_kg.1_#_3_#_Contig2893"; proteinId 1059109; exonNumber 1\n+scaffold_1\tJGI\tstart_codon\t51401\t51403\t.\t-\t0\tname "fgenesh2_kg.1_#_3_#_Contig2893"\n+scaffold_1\tJGI\texon\t62584\t62802\t.\t+\t.\tname "e_gw2.1.1272.1"; transcriptId 1026707\n+scaffold_1\tJGI\tCDS\t62584\t62802\t.\t+\t0\tname "e_gw2.1.1272.1"; proteinId 1026707; exonNumber 1\n+scaffold_1\tJGI\tstop_codon\t62800\t62802\t.\t+\t0\tname "e_gw2.1.1272.1"\n+scaffold_1\tJGI\texon\t82115\t82306\t.\t-\t.\tname "e_gw2.1.1271.1"; transcriptId 1026712\n+scaffold_1\tJGI\tCDS\t82115\t82306\t.\t-\t0\tname "e_gw2.1.1271.1"; proteinId 1026712; exonNumber 1\n+scaffold_1\tJGI\tstop_codon\t82115\t82117\t.\t-\t0\tname "e_gw2.1.1271.1"\n+scaffold_1\tJGI\texon\t120058\t120311\t.\t-\t.\tname "fgenesh2_kg.1_#_6_#_Contig4211"; transcriptId 1059112\n+scaffold_1\tJGI\tCDS\t120203\t120311\t.\t-\t1\tname "fgenesh2_kg.1_#_6_#_Contig4211"; proteinId 1059112; exonNumber 5\n+scaffol'..b'n\t6530335\t6530597\t.\t-\t.\tname "estExt_fgenesh2_pg.C_10280"; transcriptId 1078583\n+scaffold_1\tJGI\tCDS\t6530335\t6530597\t.\t-\t1\tname "estExt_fgenesh2_pg.C_10280"; proteinId 1078583; exonNumber 2\n+scaffold_1\tJGI\texon\t6530713\t6530971\t.\t-\t.\tname "estExt_fgenesh2_pg.C_10280"; transcriptId 1078583\n+scaffold_1\tJGI\tCDS\t6530713\t6530909\t.\t-\t0\tname "estExt_fgenesh2_pg.C_10280"; proteinId 1078583; exonNumber 1\n+scaffold_1\tJGI\tstart_codon\t6530907\t6530909\t.\t-\t0\tname "estExt_fgenesh2_pg.C_10280"\n+scaffold_1\tJGI\texon\t6530961\t6531797\t.\t-\t.\tname "estExt_fgenesh2_pg.C_10281"; transcriptId 1078584\n+scaffold_1\tJGI\tCDS\t6531507\t6531797\t.\t-\t0\tname "estExt_fgenesh2_pg.C_10281"; proteinId 1078584; exonNumber 2\n+scaffold_1\tJGI\tstop_codon\t6531507\t6531509\t.\t-\t0\tname "estExt_fgenesh2_pg.C_10281"\n+scaffold_1\tJGI\texon\t6531869\t6532149\t.\t-\t.\tname "estExt_fgenesh2_pg.C_10281"; transcriptId 1078584\n+scaffold_1\tJGI\tCDS\t6531869\t6532027\t.\t-\t0\tname "estExt_fgenesh2_pg.C_10281"; proteinId 1078584; exonNumber 1\n+scaffold_1\tJGI\tstart_codon\t6532025\t6532027\t.\t-\t0\tname "estExt_fgenesh2_pg.C_10281"\n+scaffold_1\tJGI\texon\t6532672\t6535468\t.\t+\t.\tname "estExt_Genemark2.C_10601"; transcriptId 1090763\n+scaffold_1\tJGI\tCDS\t6532672\t6535468\t.\t+\t0\tname "estExt_Genemark2.C_10601"; proteinId 1090763; exonNumber 1\n+scaffold_1\tJGI\tstart_codon\t6532672\t6532674\t.\t+\t0\tname "estExt_Genemark2.C_10601"\n+scaffold_1\tJGI\texon\t6535501\t6536357\t.\t+\t.\tname "estExt_Genemark2.C_10601"; transcriptId 1090763\n+scaffold_1\tJGI\tCDS\t6535501\t6536357\t.\t+\t2\tname "estExt_Genemark2.C_10601"; proteinId 1090763; exonNumber 2\n+scaffold_1\tJGI\texon\t6536382\t6536530\t.\t+\t.\tname "estExt_Genemark2.C_10601"; transcriptId 1090763\n+scaffold_1\tJGI\tCDS\t6536382\t6536530\t.\t+\t0\tname "estExt_Genemark2.C_10601"; proteinId 1090763; exonNumber 3\n+scaffold_1\tJGI\texon\t6536577\t6536589\t.\t+\t.\tname "estExt_Genemark2.C_10601"; transcriptId 1090763\n+scaffold_1\tJGI\tCDS\t6536577\t6536589\t.\t+\t1\tname "estExt_Genemark2.C_10601"; proteinId 1090763; exonNumber 4\n+scaffold_1\tJGI\tstop_codon\t6536587\t6536589\t.\t+\t0\tname "estExt_Genemark2.C_10601"\n+scaffold_1\tJGI\texon\t6536649\t6537413\t.\t-\t.\tname "fgenesh2_kg.1_#_289_#_Contig6235"; transcriptId 1059395\n+scaffold_1\tJGI\tCDS\t6536793\t6537413\t.\t-\t0\tname "fgenesh2_kg.1_#_289_#_Contig6235"; proteinId 1059395; exonNumber 2\n+scaffold_1\tJGI\tstop_codon\t6536793\t6536795\t.\t-\t0\tname "fgenesh2_kg.1_#_289_#_Contig6235"\n+scaffold_1\tJGI\texon\t6537494\t6537976\t.\t-\t.\tname "fgenesh2_kg.1_#_289_#_Contig6235"; transcriptId 1059395\n+scaffold_1\tJGI\tCDS\t6537494\t6537910\t.\t-\t0\tname "fgenesh2_kg.1_#_289_#_Contig6235"; proteinId 1059395; exonNumber 1\n+scaffold_1\tJGI\tstart_codon\t6537908\t6537910\t.\t-\t0\tname "fgenesh2_kg.1_#_289_#_Contig6235"\n+scaffold_1\tJGI\texon\t6537938\t6537972\t.\t+\t.\tname "gm1.603_g"; transcriptId 205254\n+scaffold_1\tJGI\tCDS\t6537938\t6537972\t.\t+\t0\tname "gm1.603_g"; proteinId 205254; exonNumber 1\n+scaffold_1\tJGI\tstart_codon\t6537938\t6537940\t.\t+\t0\tname "gm1.603_g"\n+scaffold_1\tJGI\texon\t6538014\t6538046\t.\t+\t.\tname "gm1.603_g"; transcriptId 205254\n+scaffold_1\tJGI\tCDS\t6538014\t6538046\t.\t+\t1\tname "gm1.603_g"; proteinId 205254; exonNumber 2\n+scaffold_1\tJGI\texon\t6538107\t6538156\t.\t+\t.\tname "gm1.603_g"; transcriptId 205254\n+scaffold_1\tJGI\tCDS\t6538107\t6538156\t.\t+\t1\tname "gm1.603_g"; proteinId 205254; exonNumber 3\n+scaffold_1\tJGI\texon\t6538201\t6538314\t.\t+\t.\tname "gm1.603_g"; transcriptId 205254\n+scaffold_1\tJGI\tCDS\t6538201\t6538314\t.\t+\t2\tname "gm1.603_g"; proteinId 205254; exonNumber 4\n+scaffold_1\tJGI\texon\t6538367\t6538787\t.\t+\t.\tname "gm1.603_g"; transcriptId 205254\n+scaffold_1\tJGI\tCDS\t6538367\t6538787\t.\t+\t2\tname "gm1.603_g"; proteinId 205254; exonNumber 5\n+scaffold_1\tJGI\texon\t6538843\t6539173\t.\t+\t.\tname "gm1.603_g"; transcriptId 205254\n+scaffold_1\tJGI\tCDS\t6538843\t6539173\t.\t+\t1\tname "gm1.603_g"; proteinId 205254; exonNumber 6\n+scaffold_1\tJGI\texon\t6539217\t6539489\t.\t+\t.\tname "gm1.603_g"; transcriptId 205254\n+scaffold_1\tJGI\tCDS\t6539217\t6539489\t.\t+\t0\tname "gm1.603_g"; proteinId 205254; exonNumber 7\n+scaffold_1\tJGI\tstop_codon\t6539487\t6539489\t.\t+\t0\tname "gm1.603_g"\n' |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/MB7_3R.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/test-data/MB7_3R.bed Wed Jun 11 16:29:25 2014 -0400 |
b |
b'@@ -0,0 +1,241 @@\n+3R\t60868\t66780\tCG1106-RD\t2\t+\t60868\t66780\t0\t10\t179,61,43,252,193,75,196,1706,126,181,\t0,942,1093,1407,2479,3157,3424,3673,5443,5732,\n+3R\t60868\t66780\tCG1106-RF\t3\t+\t60868\t66780\t0\t8\t179,223,193,75,196,1706,126,181,\t0,1436,2479,3157,3424,3673,5443,5732,\n+3R\t485305\t530979\tCG31531-RC\t2\t+\t485305\t530979\t0\t9\t311,161,54,59,352,207,143,157,4100,\t0,656,11214,21196,37608,38466,40134,40588,41575,\n+3R\t62515\t66780\tCG1106-RA\t1\t+\t62515\t66780\t0\t7\t147,193,75,196,1706,126,181,\t0,832,1510,1777,2026,3796,4085,\n+3R\t60868\t66780\tCG1106-RB\t1\t+\t60868\t66780\t0\t8\t179,252,193,75,196,1706,126,181,\t0,1407,2479,3157,3424,3673,5443,5732,\n+3R\t44184\t45852\tCG31516.a\t31\t-\t44184\t45852\t0\t1\t1669,\t0,\n+3R\t60868\t66780\tCG1106-RH\t1\t+\t60868\t66780\t0\t9\t179,61,252,193,75,196,1706,126,181,\t0,942,1407,2479,3157,3424,3673,5443,5732,\n+3R\t612767\t627445\tCG42574.b\t3\t-\t612767\t627445\t0\t9\t1243,202,189,1836,989,1105,1847,987,511,\t0,1329,1636,1888,3831,4913,6173,12647,14168,\n+3R\t612767\t627445\tCG42574.a\t3\t-\t612767\t627445\t0\t11\t1243,202,189,1836,989,1105,1847,87,579,987,511,\t0,1329,1636,1888,3831,4913,6173,8692,9444,12647,14168,\n+3R\t538609\t539918\tCG9769-RA.3d\t31\t-\t538609\t539918\t0\t1\t1310,\t0,\n+3R\t94943\t103515\tCG9766-RB\t1\t-\t94943\t103515\t0\t4\t568,205,246,165,\t0,627,888,8408,\n+3R\t17136\t21871\tDMG5-MB6.chr3R.1.002.a.a\t2\t+\t17136\t21871\t0\t6\t116,95,486,540,168,281,\t0,2817,2978,3535,4231,4455,\n+3R\t44179\t45852\tCG31516-RA\t34\t-\t44179\t45852\t0\t1\t1674,\t0,\n+3R\t228771\t232914\tCG14648-RA\t1\t+\t228771\t232914\t0\t6\t232,171,719,69,211,1326,\t0,1420,1652,2431,2552,2818,\n+3R\t1053010\t1057940\tCG10229-RA\t1\t-\t1053010\t1057940\t0\t6\t903,560,122,604,222,162,\t0,989,2419,2734,3705,4769,\n+3R\t310763\t313185\tCG1078-RA\t1\t+\t310763\t313185\t0\t4\t1491,93,71,516,\t0,1555,1780,1907,\n+3R\t15388\t16170\tCG18090-RA\t34\t-\t15388\t16170\t0\t1\t783,\t0,\n+3R\t74439\t76518\tCG14643-RA\t1\t-\t74439\t76518\t0\t4\t301,655,608,62,\t0,474,1190,2018,\n+3R\t117996\t120558\tCG9780-RA\t1\t-\t117996\t120558\t0\t2\t1094,1084,\t0,1479,\n+3R\t1090664\t1094197\tCG31543-RA\t1\t+\t1090664\t1094197\t0\t5\t696,280,81,140,1014,\t0,1811,2179,2321,2520,\n+3R\t1082763\t1094197\tCG31543-RC\t1\t+\t1082763\t1094197\t0\t5\t1081,280,81,140,1014,\t0,9712,10080,10222,10421,\n+3R\t1063223\t1077468\tCG12163-RA\t1\t-\t1063223\t1077468\t0\t6\t656,182,744,128,417,323,\t0,712,953,1991,6337,13923,\n+3R\t470349\t471316\tCG9771-RA\t1\t-\t470349\t471316\t0\t2\t450,414,\t0,554,\n+3R\t92676\t94166\tCG1092.a\t1\t+\t92676\t94166\t0\t2\t252,1184,\t0,307,\n+3R\t306534\t309943\tCG14651-RB\t34\t+\t306534\t309943\t0\t1\t3410,\t0,\n+3R\t160820\t161237\tCG14645-RA\t34\t+\t160820\t161237\t0\t1\t418,\t0,\n+3R\t161164\t163374\tCG9772-RD\t1\t-\t161164\t163374\t0\t4\t288,160,400,1184,\t0,344,567,1027,\n+3R\t255639\t259652\tCG9805-RA\t1\t-\t255639\t259652\t0\t4\t252,2239,1085,232,\t0,311,2600,3782,\n+3R\t161143\t163408\tCG9772-RA\t2\t-\t161143\t163408\t0\t5\t309,160,400,874,139,\t0,365,588,1048,2127,\n+3R\t23030\t30295\tCG12582-RB\t1\t+\t23030\t30295\t0\t8\t564,514,97,289,480,422,381,361,\t0,951,1522,1697,4535,5068,6474,6905,\n+3R\t22997\t30295\tCG12582-RA\t1\t+\t22997\t30295\t0\t9\t288,135,514,97,289,480,422,381,361,\t0,462,984,1555,1730,4568,5101,6507,6938,\n+3R\t161143\t165287\tCG9772-RB\t2\t-\t161143\t165287\t0\t5\t309,160,400,874,199,\t0,365,588,1048,3946,\n+3R\t15414\t15982\tCG18090.a\t31\t-\t15414\t15982\t0\t1\t569,\t0,\n+3R\t135364\t136669\tCG9779-RA\t1\t-\t135364\t136669\t0\t3\t717,241,161,\t0,772,1145,\n+3R\t438597\t459031\tCG1056-RA\t2\t+\t438597\t459031\t0\t7\t476,1116,225,199,444,393,1502,\t0,9634,14098,14384,15031,17254,18933,\n+3R\t467696\t470358\tCG18271-RB\t1\t+\t467696\t470358\t0\t2\t35,1093,\t0,1570,\n+3R\t1045390\t1047270\tCG1116.a\t3\t+\t1045390\t1047270\t0\t6\t188,218,272,150,577,157,\t0,248,531,868,1081,1724,\n+3R\t145412\t151817\tCG9795-RB\t3\t+\t145412\t151817\t0\t10\t167,203,392,203,212,174,85,82,589,399,\t0,2155,3541,3997,4263,4647,4898,5040,5183,6007,\n+3R\t145412\t151817\tCG9795-RC\t3\t+\t145412\t151817\t0\t10\t201,203,392,203,212,174,85,82,589,399,\t0,2155,3541,3997,4263,4647,4898,5040,5183,6007,\n+3R\t248610\t255054\tCG9809-RD\t1\t-\t248610\t255054\t0\t7\t777,146,1832,795,106,119,358,\t0,840,1045,2943,3835,3994,6087,\n+3R\t72744\t74040\tCG14639-RA\t1\t+\t72744\t74040\t0\t2\t63,1175,\t0,122,\n+3R\t248220\t255054\tCG9809-RB\t1\t-\t248220\t255054\t0\t7\t1167,146,1832,'..b',1752,\t0,2490,4042,7888,14945,15384,17304,19606,\n+3R\t107427\t127263\tCG32490.d\t3\t+\t107427\t127263\t0\t8\t167,161,181,12,158,133,1098,1752,\t0,969,2521,6367,13424,13863,15783,18085,\n+3R\t107427\t127263\tCG32490.c\t3\t+\t107427\t127263\t0\t8\t62,161,181,12,158,133,1098,1752,\t0,969,2521,6367,13424,13863,15783,18085,\n+3R\t107627\t127263\tCG32490.b\t3\t+\t107627\t127263\t0\t8\t99,161,181,12,158,133,1098,1752,\t0,769,2321,6167,13224,13663,15583,17885,\n+3R\t107886\t127263\tCG32490.a\t3\t+\t107886\t127263\t0\t8\t151,161,181,12,158,133,1098,1752,\t0,510,2062,5908,12965,13404,15324,17626,\n+3R\t340352\t383789\tCG34357-RB\t1\t-\t340352\t383789\t0\t6\t490,100,377,885,311,126,\t0,8358,19519,23175,42066,43312,\n+3R\t160820\t161223\tCG14645.a\t31\t+\t160820\t161223\t0\t1\t404,\t0,\n+3R\t1079070\t1081125\tCG1113-RA\t3\t+\t1079070\t1081125\t0\t3\t322,1045,430,\t0,527,1626,\n+3R\t23013\t30295\tCG12582.a\t3\t+\t23013\t30295\t0\t9\t272,135,552,97,289,480,422,381,361,\t0,446,930,1539,1714,4552,5085,6491,6922,\n+3R\t403661\t404368\tCG32945.a\t34\t-\t403661\t404368\t0\t1\t708,\t0,\n+3R\t107427\t128309\tCG32490.i\t2\t+\t107427\t128309\t0\t6\t167,161,181,167,133,62,\t0,969,2521,13415,13863,20821,\n+3R\t107427\t127263\tCG32490.h\t3\t+\t107427\t127263\t0\t7\t167,161,181,167,133,1098,1752,\t0,969,2521,13415,13863,15783,18085,\n+3R\t1045390\t1047270\tCG1116-RA\t1\t+\t1045390\t1047270\t0\t6\t188,215,272,150,577,157,\t0,251,531,868,1081,1724,\n+3R\t1045390\t1047270\tCG1116-RB\t3\t+\t1045390\t1047270\t0\t5\t466,272,150,577,157,\t0,531,868,1081,1724,\n+3R\t60868\t66781\tCG1106.d\t3\t+\t60868\t66781\t0\t7\t179,223,193,75,196,1896,182,\t0,1436,2479,3157,3424,3673,5732,\n+3R\t60868\t66781\tCG1106.e\t3\t+\t60868\t66781\t0\t6\t179,193,75,196,1896,182,\t0,2479,3157,3424,3673,5732,\n+3R\t64259\t66780\tCG1106.f\t3\t+\t64259\t66780\t0\t4\t229,1706,126,181,\t0,282,2052,2341,\n+3R\t64259\t66781\tCG1106.g\t3\t+\t64259\t66781\t0\t3\t229,1896,182,\t0,282,2341,\n+3R\t60942\t66780\tCG1106.a\t2\t+\t60942\t66780\t0\t8\t105,387,193,75,196,1706,126,181,\t0,1333,2405,3083,3350,3599,5369,5658,\n+3R\t60868\t66781\tCG1106.b\t3\t+\t60868\t66781\t0\t8\t179,61,252,193,75,196,1896,182,\t0,942,1407,2479,3157,3424,3673,5732,\n+3R\t60868\t66780\tCG1106.c\t3\t+\t60868\t66780\t0\t7\t179,193,75,196,1706,126,181,\t0,2479,3157,3424,3673,5443,5732,\n+3R\t560136\t574777\tCG9765-RA\t1\t-\t560136\t574777\t0\t10\t933,150,162,105,72,62,308,2331,202,265,\t0,994,1228,2609,2767,2900,3059,7438,10050,14377,\n+3R\t560132\t572136\tCG9765-RC\t1\t-\t560132\t572136\t0\t10\t937,150,162,105,72,65,266,2331,202,618,\t0,998,1232,2613,2771,2904,3063,7442,10054,11387,\n+3R\t560132\t572136\tCG9765-RD\t3\t-\t560132\t572136\t0\t10\t937,150,162,105,72,65,266,2331,202,573,\t0,998,1232,2613,2771,2904,3063,7442,10054,11432,\n+3R\t233159\t241835\tCG32944-RE\t2\t-\t233159\t241835\t0\t8\t567,182,126,127,71,166,56,431,\t0,622,5444,5804,5999,7096,7323,8246,\n+3R\t560132\t565269\tCG9765-RF\t2\t-\t560132\t565269\t0\t8\t937,150,162,105,72,65,266,37,\t0,998,1232,2613,2771,2904,3063,5101,\n+3R\t216097\t217839\tCG14646-RA\t1\t+\t216097\t217839\t0\t2\t684,1007,\t0,736,\n+3R\t263102\t267050\tCG14650-RA\t1\t+\t263102\t267050\t0\t6\t2380,154,125,103,293,582,\t0,2437,2647,2841,3013,3367,\n+3R\t538609\t540025\tCG9769.a\t34\t-\t538609\t540025\t0\t1\t1417,\t0,\n+3R\t655633\t656232\tCG14658-RA\t34\t-\t655633\t656232\t0\t1\t600,\t0,\n+3R\t145412\t151817\tCG9795-RA\t1\t+\t145412\t151817\t0\t11\t201,203,148,392,203,212,174,85,82,589,399,\t0,2155,2658,3541,3997,4263,4647,4898,5040,5183,6007,\n+3R\t485305\t530979\tCG31531.a\t2\t+\t485305\t530979\t0\t9\t94,164,54,59,352,207,143,157,4100,\t0,653,11214,21196,37608,38466,40134,40588,41575,\n+3R\t226212\t227739\tCG14647.a\t1\t+\t226212\t227739\t0\t3\t649,132,417,\t0,864,1111,\n+3R\t204401\t206932\tCG11739-RA\t2\t+\t204401\t206932\t0\t7\t331,169,150,118,162,128,508,\t0,771,996,1249,1430,1825,2024,\n+3R\t205029\t206932\tCG11739-RB\t1\t+\t205029\t206932\t0\t6\t312,150,118,162,128,508,\t0,368,621,802,1197,1396,\n+3R\t204386\t206932\tCG11739-RC\t1\t+\t204386\t206932\t0\t7\t66,169,150,118,162,128,508,\t0,786,1011,1264,1445,1840,2039,\n+3R\t204644\t206932\tCG11739-RD\t2\t+\t204644\t206932\t0\t7\t88,169,150,118,162,128,508,\t0,528,753,1006,1187,1582,1781,\n+3R\t639058\t641751\tCG1129-RB\t1\t+\t639058\t641751\t0\t4\t273,306,535,743,\t0,335,903,1951,\n+3R\t639076\t641751\tCG1129-RA\t1\t+\t639076\t641751\t0\t5\t26,90,306,535,743,\t0,165,317,885,1933,\n' |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/MB7_3R.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/test-data/MB7_3R.gff3 Wed Jun 11 16:29:25 2014 -0400 |
b |
b'@@ -0,0 +1,3971 @@\n+##gff-version 3\n+3R\tMB7\tgene\t361\t10200\t0\t+\t.\tID=CG12581;Name=CG12581\n+3R\tMB7\tmRNA\t361\t10200\t3\t+\t.\tID=CG12581-RB;Parent=CG12581;Name=CG12581-RB\n+3R\tMB7\texon\t361\t509\t0\t+\t.\tParent=CG12581-RB\n+3R\tMB7\texon\t578\t1913\t0\t+\t.\tParent=CG12581-RB\n+3R\tMB7\texon\t7784\t8649\t0\t+\t.\tParent=CG12581-RB\n+3R\tMB7\texon\t9439\t10200\t0\t+\t.\tParent=CG12581-RB\n+3R\tMB7\tfive_prime_UTR\t361\t509\t0\t+\t.\tParent=CG12581-RB\n+3R\tMB7\tfive_prime_UTR\t578\t1114\t0\t+\t.\tParent=CG12581-RB\n+3R\tMB7\tstart_codon\t1115\t1117\t0\t+\t0\tParent=CG12581-RB\n+3R\tMB7\tCDS\t1115\t1913\t0\t+\t0\tParent=CG12581-RB\n+3R\tMB7\tCDS\t7784\t8649\t0\t+\t2\tParent=CG12581-RB\n+3R\tMB7\tCDS\t9439\t9771\t0\t+\t0\tParent=CG12581-RB\n+3R\tMB7\tstop_codon\t9769\t9771\t0\t+\t0\tParent=CG12581-RB\n+3R\tMB7\tthree_prime_UTR\t9772\t10200\t0\t+\t.\tParent=CG12581-RB\n+3R\tMB7\tmRNA\t380\t10200\t1\t+\t.\tID=CG12581-RA;Parent=CG12581;Name=CG12581-RA\n+3R\tMB7\texon\t380\t1913\t0\t+\t.\tParent=CG12581-RA\n+3R\tMB7\texon\t7784\t8649\t0\t+\t.\tParent=CG12581-RA\n+3R\tMB7\texon\t9439\t10200\t0\t+\t.\tParent=CG12581-RA\n+3R\tMB7\tfive_prime_UTR\t380\t1114\t0\t+\t.\tParent=CG12581-RA\n+3R\tMB7\tstart_codon\t1115\t1117\t0\t+\t0\tParent=CG12581-RA\n+3R\tMB7\tCDS\t1115\t1913\t0\t+\t0\tParent=CG12581-RA\n+3R\tMB7\tCDS\t7784\t8649\t0\t+\t2\tParent=CG12581-RA\n+3R\tMB7\tCDS\t9439\t9771\t0\t+\t0\tParent=CG12581-RA\n+3R\tMB7\tstop_codon\t9769\t9771\t0\t+\t0\tParent=CG12581-RA\n+3R\tMB7\tthree_prime_UTR\t9772\t10200\t0\t+\t.\tParent=CG12581-RA\n+3R\tMB7\tgene\t15388\t16170\t0\t-\t.\tID=CG18090;Name=CG18090\n+3R\tMB7\tmRNA\t15414\t15982\t31\t-\t.\tID=CG18090.a;Parent=CG18090;Name=CG18090.a\n+3R\tMB7\texon\t15414\t15982\t0\t-\t.\tParent=CG18090.a\n+3R\tMB7\tthree_prime_UTR\t15414\t15529\t0\t-\t.\tParent=CG18090.a\n+3R\tMB7\tstop_codon\t15530\t15532\t0\t-\t0\tParent=CG18090.a\n+3R\tMB7\tCDS\t15530\t15955\t0\t-\t0\tParent=CG18090.a\n+3R\tMB7\tstart_codon\t15953\t15955\t0\t-\t0\tParent=CG18090.a\n+3R\tMB7\tfive_prime_UTR\t15956\t15982\t0\t-\t.\tParent=CG18090.a\n+3R\tMB7\tmRNA\t15388\t16170\t34\t-\t.\tID=CG18090-RA;Parent=CG18090;Name=CG18090-RA\n+3R\tMB7\texon\t15388\t16170\t0\t-\t.\tParent=CG18090-RA\n+3R\tMB7\tthree_prime_UTR\t15388\t15529\t0\t-\t.\tParent=CG18090-RA\n+3R\tMB7\tstop_codon\t15530\t15532\t0\t-\t0\tParent=CG18090-RA\n+3R\tMB7\tCDS\t15530\t15955\t0\t-\t0\tParent=CG18090-RA\n+3R\tMB7\tstart_codon\t15953\t15955\t0\t-\t0\tParent=CG18090-RA\n+3R\tMB7\tfive_prime_UTR\t15956\t16170\t0\t-\t.\tParent=CG18090-RA\n+3R\tMB7\tgene\t17136\t21871\t0\t+\t.\tID=DMG5-MB6.chr3R.1.002.a;Name=DMG5-MB6.chr3R.1.002.a\n+3R\tMB7\tmRNA\t17136\t21871\t2\t+\t.\tID=DMG5-MB6.chr3R.1.002.a.a;Parent=DMG5-MB6.chr3R.1.002.a;Name=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\texon\t17136\t17251\t0\t+\t.\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\texon\t19953\t20047\t0\t+\t.\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\texon\t20114\t20599\t0\t+\t.\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\texon\t20671\t21210\t0\t+\t.\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\texon\t21367\t21534\t0\t+\t.\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\texon\t21591\t21871\t0\t+\t.\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\tstart_codon\t17136\t17138\t0\t+\t0\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\tCDS\t17136\t17251\t0\t+\t0\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\tCDS\t19953\t20047\t0\t+\t1\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\tCDS\t20114\t20599\t0\t+\t2\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\tCDS\t20671\t20759\t0\t+\t2\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\tstop_codon\t20757\t20759\t0\t+\t0\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\tthree_prime_UTR\t20760\t21210\t0\t+\t.\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\tthree_prime_UTR\t21367\t21534\t0\t+\t.\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\tthree_prime_UTR\t21591\t21871\t0\t+\t.\tParent=DMG5-MB6.chr3R.1.002.a.a\n+3R\tMB7\tgene\t22931\t30295\t0\t+\t.\tID=CG12582;Name=CG12582\n+3R\tMB7\tmRNA\t23013\t30295\t3\t+\t.\tID=CG12582.a;Parent=CG12582;Name=CG12582.a\n+3R\tMB7\texon\t23013\t23284\t0\t+\t.\tParent=CG12582.a\n+3R\tMB7\texon\t23459\t23593\t0\t+\t.\tParent=CG12582.a\n+3R\tMB7\texon\t23943\t24494\t0\t+\t.\tParent=CG12582.a\n+3R\tMB7\texon\t24552\t24648\t0\t+\t.\tParent=CG12582.a\n+3R\tMB7\texon\t24727\t25015\t0\t+\t.\tParent=CG12582.a\n+3R\tMB7\texon\t27565\t28044\t0\t+\t.\tParent=CG12582.a\n+3R\tMB7\texon\t28098\t28519\t0\t+\t.\tParent=CG12582.a\n+3R\tMB7\texon\t29504\t29884\t0\t+\t.\tParent=CG12582.a\n+3R\tMB7\texon\t29935\t30295\t0\t+\t.\tParent=CG12582.a\n+3R\tMB7\tfive_prime_UTR\t23013\t23284\t0\t+\t.\tParent=CG125'..b'517\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\texon\t1121579\t1121685\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\texon\t1121869\t1122357\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\texon\t1123924\t1124211\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\texon\t1125192\t1125295\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\texon\t1129833\t1129904\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\texon\t1138711\t1139219\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\texon\t1139660\t1140027\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\texon\t1148710\t1148847\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\texon\t1149387\t1149566\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\tthree_prime_UTR\t1098665\t1099668\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\tstop_codon\t1099669\t1099671\t0\t-\t0\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1099669\t1099804\t0\t-\t1\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1099871\t1100040\t0\t-\t0\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1100457\t1100616\t0\t-\t1\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1100688\t1100809\t0\t-\t0\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1118362\t1118563\t0\t-\t1\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1118720\t1118882\t0\t-\t2\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1118941\t1119092\t0\t-\t1\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1119784\t1119956\t0\t-\t0\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1120028\t1120577\t0\t-\t1\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1121363\t1121517\t0\t-\t0\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1121579\t1121685\t0\t-\t2\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1121869\t1122357\t0\t-\t2\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1123924\t1124211\t0\t-\t2\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1125192\t1125295\t0\t-\t1\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1129833\t1129904\t0\t-\t1\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1138711\t1139219\t0\t-\t0\tParent=CG32464-RB\n+3R\tMB7\tCDS\t1139660\t1139920\t0\t-\t0\tParent=CG32464-RB\n+3R\tMB7\tstart_codon\t1139918\t1139920\t0\t-\t0\tParent=CG32464-RB\n+3R\tMB7\tfive_prime_UTR\t1139921\t1140027\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\tfive_prime_UTR\t1148710\t1148847\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\tfive_prime_UTR\t1149387\t1149566\t0\t-\t.\tParent=CG32464-RB\n+3R\tMB7\tmRNA\t1098665\t1149566\t3\t-\t.\tID=CG32464-RU;Parent=CG32464;Name=CG32464-RU\n+3R\tMB7\texon\t1098665\t1099804\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1099871\t1100040\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1100457\t1100616\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1100688\t1100809\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1118362\t1118563\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1118720\t1118882\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1118941\t1119092\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1119784\t1119956\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1120028\t1120577\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1121363\t1121517\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1121579\t1121685\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1121869\t1122357\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1123924\t1124211\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1125192\t1125295\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1138711\t1139219\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1139660\t1140027\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1148710\t1148847\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\texon\t1149387\t1149566\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\tthree_prime_UTR\t1098665\t1099668\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\tstop_codon\t1099669\t1099671\t0\t-\t0\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1099669\t1099804\t0\t-\t1\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1099871\t1100040\t0\t-\t0\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1100457\t1100616\t0\t-\t1\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1100688\t1100809\t0\t-\t0\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1118362\t1118563\t0\t-\t1\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1118720\t1118882\t0\t-\t2\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1118941\t1119092\t0\t-\t1\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1119784\t1119956\t0\t-\t0\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1120028\t1120577\t0\t-\t1\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1121363\t1121517\t0\t-\t0\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1121579\t1121685\t0\t-\t2\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1121869\t1122357\t0\t-\t2\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1123924\t1124211\t0\t-\t2\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1125192\t1125295\t0\t-\t1\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1138711\t1139219\t0\t-\t0\tParent=CG32464-RU\n+3R\tMB7\tCDS\t1139660\t1139920\t0\t-\t0\tParent=CG32464-RU\n+3R\tMB7\tstart_codon\t1139918\t1139920\t0\t-\t0\tParent=CG32464-RU\n+3R\tMB7\tfive_prime_UTR\t1139921\t1140027\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\tfive_prime_UTR\t1148710\t1148847\t0\t-\t.\tParent=CG32464-RU\n+3R\tMB7\tfive_prime_UTR\t1149387\t1149566\t0\t-\t.\tParent=CG32464-RU\n' |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/UCSC_transcripts.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/test-data/UCSC_transcripts.gff3 Wed Jun 11 16:29:25 2014 -0400 |
b |
b'@@ -0,0 +1,129 @@\n+##gff-version 3\n+chr1\thg19_ccdsGene\tgene\t896074\t900571\t.\t+\t.\tID=Gene:CCDS30550.1;Name=Gene:CCDS30550.1\n+chr1\thg19_ccdsGene\tmRNA\t896074\t900571\t0.000000\t+\t.\tID=Transcript:CCDS30550.1;Parent=Gene:CCDS30550.1\n+chr1\thg19_ccdsGene\tCDS\t896074\t896180\t.\t+\t0\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\tCDS\t896673\t896932\t.\t+\t1\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\tCDS\t897009\t897130\t.\t+\t2\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\tCDS\t897206\t897427\t.\t+\t0\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\tCDS\t897735\t897851\t.\t+\t0\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\tCDS\t898084\t898297\t.\t+\t0\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\tCDS\t898489\t898633\t.\t+\t2\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\tCDS\t898717\t898884\t.\t+\t1\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\tCDS\t899300\t899388\t.\t+\t1\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\tCDS\t899487\t899560\t.\t+\t2\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\tCDS\t899729\t899910\t.\t+\t0\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\tCDS\t900343\t900571\t.\t+\t1\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\texon\t896074\t896180\t.\t+\t.\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\texon\t896673\t896932\t.\t+\t.\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\texon\t897009\t897130\t.\t+\t.\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\texon\t897206\t897427\t.\t+\t.\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\texon\t897735\t897851\t.\t+\t.\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\texon\t898084\t898297\t.\t+\t.\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\texon\t898489\t898633\t.\t+\t.\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\texon\t898717\t898884\t.\t+\t.\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\texon\t899300\t899388\t.\t+\t.\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\texon\t899487\t899560\t.\t+\t.\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\texon\t899729\t899910\t.\t+\t.\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\texon\t900343\t900571\t.\t+\t.\tParent=Transcript:CCDS30550.1\n+chr1\thg19_ccdsGene\tgene\t880074\t894620\t.\t-\t.\tID=Gene:CCDS3.1;Name=Gene:CCDS3.1\n+chr1\thg19_ccdsGene\tmRNA\t880074\t894620\t0.000000\t-\t.\tID=Transcript:CCDS3.1;Parent=Gene:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t880074\t880180\t.\t-\t2\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t880437\t880526\t.\t-\t2\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t880898\t881033\t.\t-\t0\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t881553\t881666\t.\t-\t0\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t881782\t881925\t.\t-\t0\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t883511\t883612\t.\t-\t0\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t883870\t883983\t.\t-\t0\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t886507\t886618\t.\t-\t1\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t887380\t887519\t.\t-\t0\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t887792\t887980\t.\t-\t0\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t888555\t888668\t.\t-\t0\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t889162\t889272\t.\t-\t0\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t889384\t889462\t.\t-\t1\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t891303\t891393\t.\t-\t2\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t891475\t891595\t.\t-\t0\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t892274\t892405\t.\t-\t0\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t892479\t892653\t.\t-\t1\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t894309\t894461\t.\t-\t1\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\tCDS\t894595\t894620\t.\t-\t0\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\texon\t880074\t880180\t.\t-\t.\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\texon\t880437\t880526\t.\t-\t.\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\texon\t880898\t881033\t.\t-\t.\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\texon\t881553\t881666\t.\t-\t.\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\texon\t881782\t881925\t.\t-\t.\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\texon\t883511\t883612\t.\t-\t.\tParent=Transcript:CCDS3.1\n+chr1\thg19_ccdsGene\texon\t883870\t883983\t.\t-\t.\tParent=Transc'..b'anscript:CCDS2.2\n+chr1\thg19_ccdsGene\tCDS\t874420\t874509\t.\t+\t2\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\tCDS\t874655\t874840\t.\t+\t2\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\tCDS\t876524\t876686\t.\t+\t2\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\tCDS\t877516\t877631\t.\t+\t1\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\tCDS\t877790\t877868\t.\t+\t2\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\tCDS\t877939\t878438\t.\t+\t1\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\tCDS\t878633\t878757\t.\t+\t2\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\tCDS\t879078\t879188\t.\t+\t0\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\tCDS\t879288\t879533\t.\t+\t0\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t861322\t861393\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t865535\t865716\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t866419\t866469\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t871152\t871276\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t874420\t874509\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t874655\t874840\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t876524\t876686\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t877516\t877631\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t877790\t877868\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t877939\t878438\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t878633\t878757\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t879078\t879188\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\texon\t879288\t879533\t.\t+\t.\tParent=Transcript:CCDS2.2\n+chr1\thg19_ccdsGene\tgene\t901912\t909955\t.\t+\t.\tID=Gene:CCDS4.1;Name=Gene:CCDS4.1\n+chr1\thg19_ccdsGene\tmRNA\t901912\t909955\t0.000000\t+\t.\tID=Transcript:CCDS4.1;Parent=Gene:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t901912\t901994\t.\t+\t0\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t902084\t902183\t.\t+\t1\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t905657\t905803\t.\t+\t0\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t905901\t905981\t.\t+\t0\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t906066\t906138\t.\t+\t0\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t906259\t906386\t.\t+\t2\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t906493\t906588\t.\t+\t0\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t906704\t906784\t.\t+\t0\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t907455\t907530\t.\t+\t0\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t907668\t907804\t.\t+\t2\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t908241\t908390\t.\t+\t0\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t908566\t908706\t.\t+\t0\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t908880\t909020\t.\t+\t0\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t909213\t909431\t.\t+\t0\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t909696\t909744\t.\t+\t0\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\tCDS\t909822\t909955\t.\t+\t2\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t901912\t901994\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t902084\t902183\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t905657\t905803\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t905901\t905981\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t906066\t906138\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t906259\t906386\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t906493\t906588\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t906704\t906784\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t907455\t907530\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t907668\t907804\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t908241\t908390\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t908566\t908706\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t908880\t909020\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t909213\t909431\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t909696\t909744\t.\t+\t.\tParent=Transcript:CCDS4.1\n+chr1\thg19_ccdsGene\texon\t909822\t909955\t.\t+\t.\tParent=Transcript:CCDS4.1\n' |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/UCSC_transcripts.gtf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/test-data/UCSC_transcripts.gtf Wed Jun 11 16:29:25 2014 -0400 |
b |
b'@@ -0,0 +1,129 @@\n+chr1\thg19_ccdsGene\tstart_codon\t861322\t861324\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t861322\t861393\t0.000000\t+\t0\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t861322\t861393\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t865535\t865716\t0.000000\t+\t0\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t865535\t865716\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t866419\t866469\t0.000000\t+\t1\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t866419\t866469\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t871152\t871276\t0.000000\t+\t1\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t871152\t871276\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t874420\t874509\t0.000000\t+\t2\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t874420\t874509\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t874655\t874840\t0.000000\t+\t2\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t874655\t874840\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t876524\t876686\t0.000000\t+\t2\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t876524\t876686\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t877516\t877631\t0.000000\t+\t1\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t877516\t877631\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t877790\t877868\t0.000000\t+\t2\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t877790\t877868\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t877939\t878438\t0.000000\t+\t1\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t877939\t878438\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t878633\t878757\t0.000000\t+\t2\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t878633\t878757\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t879078\t879188\t0.000000\t+\t0\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t879078\t879188\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tCDS\t879288\t879530\t0.000000\t+\t0\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tstop_codon\t879531\t879533\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\texon\t879288\t879533\t0.000000\t+\t.\tgene_id "CCDS2.2"; transcript_id "CCDS2.2"; \n+chr1\thg19_ccdsGene\tstop_codon\t880074\t880076\t0.000000\t-\t.\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr1\thg19_ccdsGene\tCDS\t880077\t880180\t0.000000\t-\t2\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr1\thg19_ccdsGene\texon\t880074\t880180\t0.000000\t-\t.\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr1\thg19_ccdsGene\tCDS\t880437\t880526\t0.000000\t-\t2\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr1\thg19_ccdsGene\texon\t880437\t880526\t0.000000\t-\t.\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr1\thg19_ccdsGene\tCDS\t880898\t881033\t0.000000\t-\t0\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr1\thg19_ccdsGene\texon\t880898\t881033\t0.000000\t-\t.\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr1\thg19_ccdsGene\tCDS\t881553\t881666\t0.000000\t-\t0\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr1\thg19_ccdsGene\texon\t881553\t881666\t0.000000\t-\t.\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr1\thg19_ccdsGene\tCDS\t881782\t881925\t0.000000\t-\t0\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr1\thg19_ccdsGene\texon\t881782\t881925\t0.000000\t-\t.\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr1\thg19_ccdsGene\tCDS\t883511\t883612\t0.000000\t-\t0\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr1\thg19_ccdsGene\texon\t883511\t883612\t0.000000\t-\t.\tgene_id "CCDS3.1"; transcript_id "CCDS3.1"; \n+chr'..b'0\t0.000000\t+\t2\tgene_id "CCDS30550.1"; transcript_id "CCDS30550.1"; \n+chr1\thg19_ccdsGene\texon\t899487\t899560\t0.000000\t+\t.\tgene_id "CCDS30550.1"; transcript_id "CCDS30550.1"; \n+chr1\thg19_ccdsGene\tCDS\t899729\t899910\t0.000000\t+\t0\tgene_id "CCDS30550.1"; transcript_id "CCDS30550.1"; \n+chr1\thg19_ccdsGene\texon\t899729\t899910\t0.000000\t+\t.\tgene_id "CCDS30550.1"; transcript_id "CCDS30550.1"; \n+chr1\thg19_ccdsGene\tCDS\t900343\t900568\t0.000000\t+\t1\tgene_id "CCDS30550.1"; transcript_id "CCDS30550.1"; \n+chr1\thg19_ccdsGene\tstop_codon\t900569\t900571\t0.000000\t+\t.\tgene_id "CCDS30550.1"; transcript_id "CCDS30550.1"; \n+chr1\thg19_ccdsGene\texon\t900343\t900571\t0.000000\t+\t.\tgene_id "CCDS30550.1"; transcript_id "CCDS30550.1"; \n+chr1\thg19_ccdsGene\tstart_codon\t901912\t901914\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t901912\t901994\t0.000000\t+\t0\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t901912\t901994\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t902084\t902183\t0.000000\t+\t1\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t902084\t902183\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t905657\t905803\t0.000000\t+\t0\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t905657\t905803\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t905901\t905981\t0.000000\t+\t0\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t905901\t905981\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t906066\t906138\t0.000000\t+\t0\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t906066\t906138\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t906259\t906386\t0.000000\t+\t2\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t906259\t906386\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t906493\t906588\t0.000000\t+\t0\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t906493\t906588\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t906704\t906784\t0.000000\t+\t0\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t906704\t906784\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t907455\t907530\t0.000000\t+\t0\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t907455\t907530\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t907668\t907804\t0.000000\t+\t2\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t907668\t907804\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t908241\t908390\t0.000000\t+\t0\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t908241\t908390\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t908566\t908706\t0.000000\t+\t0\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t908566\t908706\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t908880\t909020\t0.000000\t+\t0\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t908880\t909020\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t909213\t909431\t0.000000\t+\t0\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t909213\t909431\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t909696\t909744\t0.000000\t+\t0\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t909696\t909744\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tCDS\t909822\t909952\t0.000000\t+\t2\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\tstop_codon\t909953\t909955\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+chr1\thg19_ccdsGene\texon\t909822\t909955\t0.000000\t+\t.\tgene_id "CCDS4.1"; transcript_id "CCDS4.1"; \n+\n' |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/ccds_genes.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/test-data/ccds_genes.bed Wed Jun 11 16:29:25 2014 -0400 |
b |
@@ -0,0 +1,20 @@ +chr1 92149295 92327088 CCDS30770.1 0 - 92149295 92327088 0 16 119,108,42,121,300,159,141,153,338,190,148,169,184,138,185,61, 0,11933,14350,24924,28504,32497,32829,35573,36154,38216,43920,46066,51037,74874,113548,177732, +chr1 67000041 67208778 CCDS30744.1 0 + 67000041 67208778 0 25 10,64,25,72,57,55,176,12,12,25,52,86,93,75,501,128,127,60,112,156,133,203,65,165,23, 0,91488,98711,101585,105418,108451,109185,126154,133171,136636,137585,138922,142645,145319,147510,154789,155831,161075,184935,194905,199389,204976,206299,206913,208714, +chr1 8384389 8404073 CCDS30577.1 0 + 8384389 8404073 0 8 397,93,225,728,154,177,206,267, 0,968,1488,5879,11107,13486,15163,19417, +chr1 16767256 16785385 CCDS44067.1 0 + 16767256 16785385 0 8 14,101,105,82,109,178,76,49, 0,2870,7108,7298,8331,11076,15056,18080, +chr1 16767256 16785491 CCDS44066.1 0 + 16767256 16785491 0 7 92,101,105,82,109,178,155, 0,2870,7108,7298,8331,11076,18080, +chr1 16767256 16785385 CCDS173.1 0 + 16767256 16785385 0 8 92,101,105,82,109,178,76,49, 0,2870,7108,7298,8331,11076,15056,18080, +chr1 25072044 25167428 CCDS256.1 0 + 25072044 25167428 0 6 72,110,126,107,182,165, 0,52188,68540,81456,94306,95219, +chr1 33547850 33585783 CCDS375.1 0 + 33547850 33585783 0 9 105,174,173,135,166,163,113,215,139, 0,1704,9800,11032,12298,14457,15817,35652,37794, +chr1 48999844 50489468 CCDS44137.1 0 - 48999844 50489468 0 14 121,27,97,163,153,112,115,90,40,217,95,125,123,34, 0,717,5469,52831,56660,100320,119164,128979,333018,511411,711597,1163140,1317223,1489590, +chr1 100661810 100715376 CCDS767.1 0 - 100661810 100715376 0 11 168,72,192,78,167,217,122,182,76,124,51, 0,9975,10190,14439,18562,19728,22371,34478,39181,44506,53515, +chr1 150981108 151006710 CCDS977.1 0 + 150981108 151006710 0 8 39,93,203,185,159,95,159,429, 0,9179,9834,15978,16882,18600,20153,25173, +chr1 175914288 176176114 CCDS44279.1 0 - 175914288 176176114 0 19 18,45,161,125,118,117,82,109,144,136,115,58,77,69,120,65,98,60,407, 0,2042,41790,43135,44209,82419,98033,98557,101028,135999,140623,171471,189857,203853,217716,218674,230757,239480,261419, +chr1 175914288 176176114 CCDS30944.1 0 - 175914288 176176114 0 20 18,45,161,125,118,117,82,109,144,136,115,58,77,60,69,120,77,98,60,407, 0,2042,41790,43135,44209,82419,98033,98557,101028,135999,140623,171471,189857,191335,203853,217716,218662,230757,239480,261419, +chr1 184446643 184588690 CCDS1362.1 0 + 184446643 184588690 0 5 94,95,77,61,39, 0,30078,113229,120891,142008, +chr1 226420201 226496888 CCDS1553.1 0 - 226420201 226496888 0 15 106,98,180,126,81,102,120,134,158,126,134,105,95,33,79, 0,595,843,6470,18338,33032,33712,35456,45274,53832,55163,63341,65218,68672,76608, +chr1 1982069 2116448 CCDS37.1 0 + 1982069 2116448 0 18 71,122,90,51,86,132,82,53,189,98,87,136,88,120,80,90,116,88, 0,4810,5853,8910,84631,93579,95396,98241,100159,105364,118887,121424,121670,123266,124123,124593,133952,134291, +chr1 2075777 2116448 CCDS41229.1 0 + 2075777 2116448 0 13 3,82,53,189,98,87,136,88,120,80,90,116,88, 0,1688,4533,6451,11656,25179,27716,27962,29558,30415,30885,40244,40583, +chr1 2985823 3350375 CCDS44048.1 0 + 2985823 3350375 0 17 37,350,51,135,103,208,148,154,1417,85,170,78,170,175,237,175,78, 0,116865,174827,315892,327231,333531,335479,336235,342124,345303,348568,349407,356321,356791,361612,362706,364474, +chr1 2985823 3350375 CCDS41236.1 0 + 2985823 3350375 0 17 37,350,51,135,103,208,148,154,1417,85,170,78,170,175,237,175,135, 0,116865,174827,315892,327231,333531,335479,336235,342124,345303,348568,349407,356321,356791,361612,362706,364417, +chr1 6285139 6295971 CCDS61.1 0 - 6285139 6295971 0 5 183,218,170,89,195, 0,6822,8394,9806,10637, |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/ccds_genes.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/test-data/ccds_genes.gff3 Wed Jun 11 16:29:25 2014 -0400 |
b |
b'@@ -0,0 +1,289 @@\n+chr1\tbed2gff\tgene\t92149296\t92327088\t0\t-\t.\tID=Gene:CCDS30770.1;Name=Gene:CCDS30770.1\n+chr1\tbed2gff\ttranscript\t92149296\t92327088\t0\t-\t.\tID=CCDS30770.1;Name=CCDS30770.1;Parent=Gene:CCDS30770.1\n+chr1\tbed2gff\texon\t92149296\t92149414\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92161229\t92161336\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92163646\t92163687\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92174220\t92174340\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92177800\t92178099\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92181793\t92181951\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92182125\t92182265\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92184869\t92185021\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92185450\t92185787\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92187512\t92187701\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92193216\t92193363\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92195362\t92195530\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92200333\t92200516\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92224170\t92224307\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92262844\t92263028\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\texon\t92327028\t92327088\t0\t-\t.\tParent=CCDS30770.1\n+chr1\tbed2gff\tgene\t67000042\t67208778\t0\t+\t.\tID=Gene:CCDS30744.1;Name=Gene:CCDS30744.1\n+chr1\tbed2gff\ttranscript\t67000042\t67208778\t0\t+\t.\tID=CCDS30744.1;Name=CCDS30744.1;Parent=Gene:CCDS30744.1\n+chr1\tbed2gff\texon\t67000042\t67000051\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67091530\t67091593\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67098753\t67098777\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67101627\t67101698\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67105460\t67105516\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67108493\t67108547\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67109227\t67109402\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67126196\t67126207\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67133213\t67133224\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67136678\t67136702\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67137627\t67137678\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67138964\t67139049\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67142687\t67142779\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67145361\t67145435\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67147552\t67148052\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67154831\t67154958\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67155873\t67155999\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67161117\t67161176\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67184977\t67185088\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67194947\t67195102\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67199431\t67199563\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67205018\t67205220\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67206341\t67206405\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67206955\t67207119\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\texon\t67208756\t67208778\t0\t+\t.\tParent=CCDS30744.1\n+chr1\tbed2gff\tgene\t8384390\t8404073\t0\t+\t.\tID=Gene:CCDS30577.1;Name=Gene:CCDS30577.1\n+chr1\tbed2gff\ttranscript\t8384390\t8404073\t0\t+\t.\tID=CCDS30577.1;Name=CCDS30577.1;Parent=Gene:CCDS30577.1\n+chr1\tbed2gff\texon\t8384390\t8384786\t0\t+\t.\tParent=CCDS30577.1\n+chr1\tbed2gff\texon\t8385358\t8385450\t0\t+\t.\tParent=CCDS30577.1\n+chr1\tbed2gff\texon\t8385878\t8386102\t0\t+\t.\tParent=CCDS30577.1\n+chr1\tbed2gff\texon\t8390269\t8390996\t0\t+\t.\tParent=CCDS30577.1\n+chr1\tbed2gff\texon\t8395497\t8395650\t0\t+\t.\tParent=CCDS30577.1\n+chr1\tbed2gff\texon\t8397876\t8398052\t0\t+\t.\tParent=CCDS30577.1\n+chr1\tbed2gff\texon\t8399553\t8399758\t0\t+\t.\tParent=CCDS30577.1\n+chr1\tbed2gff\texon\t8403807\t8404073\t0\t+\t.\tParent=CCDS30577.1\n+chr1\tbed2gff\tgene\t16767257\t16785385\t0\t+\t.\tID=Gene:CCDS44067.1;Name=Gene:CCDS44067.1\n+chr1\tbed2gff\ttranscript\t16767257\t16785385\t0\t+\t.\tID=CCDS44067.1;Name=CCDS44067.1;Parent=Gene:CCDS44067.1\n+chr1\tbed2gff\texon\t16767257\t16767270\t0\t+\t.\tParent=CCDS44067.1\n+chr1\tbed2gff\texon\t16770127\t16770227\t0\t+\t.\tParent=CCDS44067.1\n+chr1\tbed2gff\texon\t16774365\t16774469\t0\t+\t.\tParent=CCDS44067.1\n+chr1\tbed2gff\texo'..b'bed2gff\texon\t2106663\t2106752\t0\t+\t.\tParent=CCDS37.1\n+chr1\tbed2gff\texon\t2116022\t2116137\t0\t+\t.\tParent=CCDS37.1\n+chr1\tbed2gff\texon\t2116361\t2116448\t0\t+\t.\tParent=CCDS37.1\n+chr1\tbed2gff\tgene\t2075778\t2116448\t0\t+\t.\tID=Gene:CCDS41229.1;Name=Gene:CCDS41229.1\n+chr1\tbed2gff\ttranscript\t2075778\t2116448\t0\t+\t.\tID=CCDS41229.1;Name=CCDS41229.1;Parent=Gene:CCDS41229.1\n+chr1\tbed2gff\texon\t2075778\t2075780\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\texon\t2077466\t2077547\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\texon\t2080311\t2080363\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\texon\t2082229\t2082417\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\texon\t2087434\t2087531\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\texon\t2100957\t2101043\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\texon\t2103494\t2103629\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\texon\t2103740\t2103827\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\texon\t2105336\t2105455\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\texon\t2106193\t2106272\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\texon\t2106663\t2106752\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\texon\t2116022\t2116137\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\texon\t2116361\t2116448\t0\t+\t.\tParent=CCDS41229.1\n+chr1\tbed2gff\tgene\t2985824\t3350375\t0\t+\t.\tID=Gene:CCDS44048.1;Name=Gene:CCDS44048.1\n+chr1\tbed2gff\ttranscript\t2985824\t3350375\t0\t+\t.\tID=CCDS44048.1;Name=CCDS44048.1;Parent=Gene:CCDS44048.1\n+chr1\tbed2gff\texon\t2985824\t2985860\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3102689\t3103038\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3160651\t3160701\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3301716\t3301850\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3313055\t3313157\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3319355\t3319562\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3321303\t3321450\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3322059\t3322212\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3327948\t3329364\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3331127\t3331211\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3334392\t3334561\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3335231\t3335308\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3342145\t3342314\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3342615\t3342789\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3347436\t3347672\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3348530\t3348704\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\texon\t3350298\t3350375\t0\t+\t.\tParent=CCDS44048.1\n+chr1\tbed2gff\tgene\t2985824\t3350375\t0\t+\t.\tID=Gene:CCDS41236.1;Name=Gene:CCDS41236.1\n+chr1\tbed2gff\ttranscript\t2985824\t3350375\t0\t+\t.\tID=CCDS41236.1;Name=CCDS41236.1;Parent=Gene:CCDS41236.1\n+chr1\tbed2gff\texon\t2985824\t2985860\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3102689\t3103038\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3160651\t3160701\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3301716\t3301850\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3313055\t3313157\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3319355\t3319562\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3321303\t3321450\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3322059\t3322212\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3327948\t3329364\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3331127\t3331211\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3334392\t3334561\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3335231\t3335308\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3342145\t3342314\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3342615\t3342789\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3347436\t3347672\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3348530\t3348704\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\texon\t3350241\t3350375\t0\t+\t.\tParent=CCDS41236.1\n+chr1\tbed2gff\tgene\t6285140\t6295971\t0\t-\t.\tID=Gene:CCDS61.1;Name=Gene:CCDS61.1\n+chr1\tbed2gff\ttranscript\t6285140\t6295971\t0\t-\t.\tID=CCDS61.1;Name=CCDS61.1;Parent=Gene:CCDS61.1\n+chr1\tbed2gff\texon\t6285140\t6285322\t0\t-\t.\tParent=CCDS61.1\n+chr1\tbed2gff\texon\t6291962\t6292179\t0\t-\t.\tParent=CCDS61.1\n+chr1\tbed2gff\texon\t6293534\t6293703\t0\t-\t.\tParent=CCDS61.1\n+chr1\tbed2gff\texon\t6294946\t6295034\t0\t-\t.\tParent=CCDS61.1\n+chr1\tbed2gff\texon\t6295777\t6295971\t0\t-\t.\tParent=CCDS61.1\n' |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/hs_2009.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/test-data/hs_2009.bed Wed Jun 11 16:29:25 2014 -0400 |
b |
@@ -0,0 +1,58 @@ +chr1 11873 14409 uc001aaa.3 0 + 11873 11873 0 3 354,109,1189, 0,739,1347, +chr1 11873 14409 uc010nxq.1 0 + 12189 13639 0 3 354,127,1007, 0,721,1529, +chr1 11873 14409 uc010nxr.1 0 + 11873 11873 0 3 354,52,1189, 0,772,1347, +chr1 14362 16765 uc009vis.2 0 - 14362 14362 0 4 467,69,147,159, 0,607,1433,2244, +chr1 16857 17751 uc009vjc.1 0 - 16857 16857 0 2 198,519, 0,375, +chr1 15795 18061 uc009vjd.2 0 - 15795 15795 0 5 152,159,198,136,456, 0,811,1062,1437,1810, +chr1 14362 19759 uc009vit.2 0 - 14362 14362 0 9 467,69,152,159,198,510,147,99,847, 0,607,1433,2244,2495,2870,3552,3905,4550, +chr1 14362 19759 uc001aae.3 0 - 14362 14362 0 10 467,69,152,159,198,136,137,147,99,847, 0,607,1433,2244,2495,2870,3243,3552,3905,4550, +chr1 14362 19759 uc009viu.2 0 - 14362 14362 0 10 467,69,152,159,198,510,147,102,54,847, 0,607,1433,2244,2495,2870,3552,3905,4138,4550, +chr1 16857 19759 uc001aai.1 0 - 16857 16857 0 6 198,136,137,147,112,847, 0,375,748,1057,1410,2055, +chr1 14362 24901 uc001aab.3 0 - 14362 14362 0 10 467,69,152,159,202,136,137,147,112,164, 0,607,1433,2244,2491,2870,3243,3552,3905,10375, +chr1 14362 29370 uc001aah.3 0 - 14362 14362 0 11 467,69,152,159,198,136,137,147,99,154,50, 0,607,1433,2244,2495,2870,3243,3552,3905,10375,14958, +chr1 14362 29370 uc009vir.2 0 - 14362 14362 0 10 467,69,152,159,198,510,147,99,154,50, 0,607,1433,2244,2495,2870,3552,3905,10375,14958, +chr1 14362 29370 uc009viq.2 0 - 14362 14362 0 7 467,152,159,198,456,154,50, 0,1433,2244,2495,3243,10375,14958, +chr1 14362 29370 uc001aac.3 0 - 14362 14362 0 11 467,69,152,159,198,110,137,147,102,154,50, 0,607,1433,2244,2495,2896,3243,3552,3905,10375,14958, +chr1 14406 29370 uc009viv.2 0 - 14406 14406 0 7 2359,198,136,137,147,154,50, 0,2451,2826,3199,3508,10331,14914, +chr1 14406 29370 uc009viw.2 0 - 14406 14406 0 7 2359,198,510,147,99,154,50, 0,2451,2826,3508,3861,10331,14914, +chr1 15602 29370 uc009vix.2 0 - 15602 15602 0 7 345,159,198,136,147,154,50, 0,1004,1255,1630,2312,9135,13718, +chr1 16606 29370 uc009viz.2 0 - 16606 16606 0 8 159,202,136,137,147,112,154,50, 0,247,626,999,1308,1661,8131,12714, +chr1 16606 29370 uc009viy.2 0 - 16606 16606 0 9 159,198,136,137,147,95,58,154,50, 0,251,626,999,1308,1661,1890,8131,12714, +chr1 16857 29370 uc010nxs.1 0 - 16857 16857 0 8 198,136,137,147,99,227,154,50, 0,375,748,1057,1410,2055,7880,12463, +chr1 17232 29370 uc009vje.2 0 - 17232 17232 0 4 510,147,99,50, 0,682,1035,12088, +chr1 17605 29370 uc009vjf.2 0 - 17605 17605 0 7 137,147,95,58,227,154,50, 0,309,662,891,1307,7132,11715, +chr1 16857 29961 uc009vjb.1 0 - 16857 16857 0 7 198,136,137,147,112,154,138, 0,375,748,1057,1410,7880,12966, +chr1 34611 36081 uc001aak.2 0 - 34611 34611 0 3 563,205,361, 0,665,1109, +chr1 69090 70008 uc001aal.1 0 + 69090 70008 0 1 918, 0, +chr1 137838 139228 uc001aam.3 0 - 137838 137838 0 1 1390, 0, +chr1 89294 237877 uc010nxt.1 0 - 89294 89294 0 2 1110,1263, 0,147320, +chr1 321083 321114 uc001aaq.1 0 + 321083 321083 0 1 31, 0, +chr1 321145 321223 uc001aar.1 0 + 321145 321145 0 1 78, 0, +chr1 322036 326938 uc009vjk.2 0 + 324342 325605 0 3 192,58,2500, 0,2251,2402, +chr1 323891 328580 uc001aav.3 0 + 323891 323891 0 4 169,58,2500,1545, 0,396,547,3144, +chr1 323891 328580 uc001aau.2 0 + 324342 325605 0 3 169,58,4142, 0,396,547, +chr1 367658 368595 uc010nxu.1 0 + 367658 368594 0 1 937, 0, +chr1 420205 421839 uc001aax.1 0 + 420205 420205 0 3 91,267,444, 0,786,1190, +chr1 566461 568045 uc001aaz.2 0 + 566461 566461 0 1 1584, 0, +chr1 568148 568842 uc001aba.1 0 + 568148 568148 0 1 694, 0, +chr1 568843 568912 uc001abb.2 0 + 568843 568843 0 1 69, 0, +chr1 569326 570349 uc001abc.2 0 + 569326 569326 0 1 1023, 0, +chr1 621097 622034 uc010nxv.1 0 - 621098 622034 0 1 937, 0, +chr1 661139 665731 uc001abe.3 0 - 664484 665108 0 3 4045,58,169, 0,4138,4423, +chr1 668401 668479 uc001abi.1 0 - 668401 668401 0 1 78, 0, +chr1 668510 668541 uc001abj.2 0 - 668510 668510 0 1 31, 0, +chr1 661139 670994 uc009vjm.2 0 - 664484 665108 0 3 4045,58,192, 0,7547,9663, +chr1 671807 671885 uc010nxw.1 0 - 671807 671807 0 1 78, 0, +chr1 671916 671947 uc001abl.2 0 - 671916 671916 0 1 31, 0, +chr1 661139 679736 uc002khh.2 0 - 661139 661139 0 7 4045,58,191,233,58,65,162, 0,4138,6257,14043,14369,17526,18435, +chr1 674239 679736 uc001abm.2 0 - 674239 674239 0 5 165,233,58,65,162, 0,943,1269,4426,5335, +chr1 700236 714006 uc001abo.2 0 - 700236 700236 0 7 391,59,66,216,132,110,343, 0,1472,3691,4640,8119,9314,13427, +chr1 761586 762902 uc010nxx.1 0 - 762079 762571 0 1 1316, 0, +chr1 763063 788902 uc009vjn.1 0 + 763063 763063 0 4 92,102,184,132, 0,1319,24243,25707, +chr1 763063 788997 uc001abp.1 0 + 763063 763063 0 6 92,102,153,184,96,227, 0,1319,19970,24243,24987,25707, +chr1 763063 788997 uc001abq.1 0 + 763063 763063 0 5 92,102,184,96,227, 0,1319,24243,24987,25707, +chr1 763063 788997 uc009vjo.1 0 + 763063 763063 0 4 92,102,96,227, 0,1319,24987,25707, +chr1 763063 789740 uc001abr.1 0 + 763063 763063 0 7 92,102,153,184,96,132,784, 0,1319,19970,24243,24987,25707,25893, +chr1 791897 794579 uc001abs.2 0 + 791897 791897 0 1 2682, 0, +chr1 803452 812182 uc001abt.3 0 - 803452 803452 0 3 603,1044,57, 0,6039,8673, +chr1 846814 850328 uc001abu.1 0 + 846814 846814 0 2 39,3004, 0,510, |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/hs_2009.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/test-data/hs_2009.gff3 Wed Jun 11 16:29:25 2014 -0400 |
b |
b'@@ -0,0 +1,375 @@\n+##gff-version 3\n+chr1\tbed2gff\tgene\t11874\t14409\t0\t+\t.\tID=Gene:uc001aaa.3;Name=Gene:uc001aaa.3\n+chr1\tbed2gff\ttranscript\t11874\t14409\t0\t+\t.\tID=uc001aaa.3;Name=uc001aaa.3;Parent=Gene:uc001aaa.3\n+chr1\tbed2gff\texon\t11874\t12227\t0\t+\t.\tParent=uc001aaa.3\n+chr1\tbed2gff\texon\t12613\t12721\t0\t+\t.\tParent=uc001aaa.3\n+chr1\tbed2gff\texon\t13221\t14409\t0\t+\t.\tParent=uc001aaa.3\n+chr1\tbed2gff\tgene\t11874\t14409\t0\t+\t.\tID=Gene:uc010nxq.1;Name=Gene:uc010nxq.1\n+chr1\tbed2gff\ttranscript\t11874\t14409\t0\t+\t.\tID=uc010nxq.1;Name=uc010nxq.1;Parent=Gene:uc010nxq.1\n+chr1\tbed2gff\texon\t11874\t12227\t0\t+\t.\tParent=uc010nxq.1\n+chr1\tbed2gff\texon\t12595\t12721\t0\t+\t.\tParent=uc010nxq.1\n+chr1\tbed2gff\texon\t13403\t14409\t0\t+\t.\tParent=uc010nxq.1\n+chr1\tbed2gff\tgene\t11874\t14409\t0\t+\t.\tID=Gene:uc010nxr.1;Name=Gene:uc010nxr.1\n+chr1\tbed2gff\ttranscript\t11874\t14409\t0\t+\t.\tID=uc010nxr.1;Name=uc010nxr.1;Parent=Gene:uc010nxr.1\n+chr1\tbed2gff\texon\t11874\t12227\t0\t+\t.\tParent=uc010nxr.1\n+chr1\tbed2gff\texon\t12646\t12697\t0\t+\t.\tParent=uc010nxr.1\n+chr1\tbed2gff\texon\t13221\t14409\t0\t+\t.\tParent=uc010nxr.1\n+chr1\tbed2gff\tgene\t14363\t16765\t0\t-\t.\tID=Gene:uc009vis.2;Name=Gene:uc009vis.2\n+chr1\tbed2gff\ttranscript\t14363\t16765\t0\t-\t.\tID=uc009vis.2;Name=uc009vis.2;Parent=Gene:uc009vis.2\n+chr1\tbed2gff\texon\t14363\t14829\t0\t-\t.\tParent=uc009vis.2\n+chr1\tbed2gff\texon\t14970\t15038\t0\t-\t.\tParent=uc009vis.2\n+chr1\tbed2gff\texon\t15796\t15942\t0\t-\t.\tParent=uc009vis.2\n+chr1\tbed2gff\texon\t16607\t16765\t0\t-\t.\tParent=uc009vis.2\n+chr1\tbed2gff\tgene\t16858\t17751\t0\t-\t.\tID=Gene:uc009vjc.1;Name=Gene:uc009vjc.1\n+chr1\tbed2gff\ttranscript\t16858\t17751\t0\t-\t.\tID=uc009vjc.1;Name=uc009vjc.1;Parent=Gene:uc009vjc.1\n+chr1\tbed2gff\texon\t16858\t17055\t0\t-\t.\tParent=uc009vjc.1\n+chr1\tbed2gff\texon\t17233\t17751\t0\t-\t.\tParent=uc009vjc.1\n+chr1\tbed2gff\tgene\t15796\t18061\t0\t-\t.\tID=Gene:uc009vjd.2;Name=Gene:uc009vjd.2\n+chr1\tbed2gff\ttranscript\t15796\t18061\t0\t-\t.\tID=uc009vjd.2;Name=uc009vjd.2;Parent=Gene:uc009vjd.2\n+chr1\tbed2gff\texon\t15796\t15947\t0\t-\t.\tParent=uc009vjd.2\n+chr1\tbed2gff\texon\t16607\t16765\t0\t-\t.\tParent=uc009vjd.2\n+chr1\tbed2gff\texon\t16858\t17055\t0\t-\t.\tParent=uc009vjd.2\n+chr1\tbed2gff\texon\t17233\t17368\t0\t-\t.\tParent=uc009vjd.2\n+chr1\tbed2gff\texon\t17606\t18061\t0\t-\t.\tParent=uc009vjd.2\n+chr1\tbed2gff\tgene\t14363\t19759\t0\t-\t.\tID=Gene:uc009vit.2;Name=Gene:uc009vit.2\n+chr1\tbed2gff\ttranscript\t14363\t19759\t0\t-\t.\tID=uc009vit.2;Name=uc009vit.2;Parent=Gene:uc009vit.2\n+chr1\tbed2gff\texon\t14363\t14829\t0\t-\t.\tParent=uc009vit.2\n+chr1\tbed2gff\texon\t14970\t15038\t0\t-\t.\tParent=uc009vit.2\n+chr1\tbed2gff\texon\t15796\t15947\t0\t-\t.\tParent=uc009vit.2\n+chr1\tbed2gff\texon\t16607\t16765\t0\t-\t.\tParent=uc009vit.2\n+chr1\tbed2gff\texon\t16858\t17055\t0\t-\t.\tParent=uc009vit.2\n+chr1\tbed2gff\texon\t17233\t17742\t0\t-\t.\tParent=uc009vit.2\n+chr1\tbed2gff\texon\t17915\t18061\t0\t-\t.\tParent=uc009vit.2\n+chr1\tbed2gff\texon\t18268\t18366\t0\t-\t.\tParent=uc009vit.2\n+chr1\tbed2gff\texon\t18913\t19759\t0\t-\t.\tParent=uc009vit.2\n+chr1\tbed2gff\tgene\t14363\t19759\t0\t-\t.\tID=Gene:uc001aae.3;Name=Gene:uc001aae.3\n+chr1\tbed2gff\ttranscript\t14363\t19759\t0\t-\t.\tID=uc001aae.3;Name=uc001aae.3;Parent=Gene:uc001aae.3\n+chr1\tbed2gff\texon\t14363\t14829\t0\t-\t.\tParent=uc001aae.3\n+chr1\tbed2gff\texon\t14970\t15038\t0\t-\t.\tParent=uc001aae.3\n+chr1\tbed2gff\texon\t15796\t15947\t0\t-\t.\tParent=uc001aae.3\n+chr1\tbed2gff\texon\t16607\t16765\t0\t-\t.\tParent=uc001aae.3\n+chr1\tbed2gff\texon\t16858\t17055\t0\t-\t.\tParent=uc001aae.3\n+chr1\tbed2gff\texon\t17233\t17368\t0\t-\t.\tParent=uc001aae.3\n+chr1\tbed2gff\texon\t17606\t17742\t0\t-\t.\tParent=uc001aae.3\n+chr1\tbed2gff\texon\t17915\t18061\t0\t-\t.\tParent=uc001aae.3\n+chr1\tbed2gff\texon\t18268\t18366\t0\t-\t.\tParent=uc001aae.3\n+chr1\tbed2gff\texon\t18913\t19759\t0\t-\t.\tParent=uc001aae.3\n+chr1\tbed2gff\tgene\t14363\t19759\t0\t-\t.\tID=Gene:uc009viu.2;Name=Gene:uc009viu.2\n+chr1\tbed2gff\ttranscript\t14363\t19759\t0\t-\t.\tID=uc009viu.2;Name=uc009viu.2;Parent=Gene:uc009viu.2\n+chr1\tbed2gff\texon\t14363\t14829\t0\t-\t.\tParent=uc009viu.2\n+chr1\tbed2gff\texon\t14970\t15038\t0\t-\t.\tParent=uc009viu.2\n+chr1\tbed2gff\texon\t15796\t15947\t0\t-\t.\tParent=uc009viu.2\n+chr1\tbed2gff\texon\t16607\t16765\t0\t-\t.\tParent=uc009viu.2\n+chr1\tbed2gff\texon\t16858\t17055\t0\t-\t.\tParen'..b'1abo.2;Name=Gene:uc001abo.2\n+chr1\tbed2gff\ttranscript\t700237\t714006\t0\t-\t.\tID=uc001abo.2;Name=uc001abo.2;Parent=Gene:uc001abo.2\n+chr1\tbed2gff\texon\t700237\t700627\t0\t-\t.\tParent=uc001abo.2\n+chr1\tbed2gff\texon\t701709\t701767\t0\t-\t.\tParent=uc001abo.2\n+chr1\tbed2gff\texon\t703928\t703993\t0\t-\t.\tParent=uc001abo.2\n+chr1\tbed2gff\texon\t704877\t705092\t0\t-\t.\tParent=uc001abo.2\n+chr1\tbed2gff\texon\t708356\t708487\t0\t-\t.\tParent=uc001abo.2\n+chr1\tbed2gff\texon\t709551\t709660\t0\t-\t.\tParent=uc001abo.2\n+chr1\tbed2gff\texon\t713664\t714006\t0\t-\t.\tParent=uc001abo.2\n+chr1\tbed2gff\tgene\t761587\t762902\t0\t-\t.\tID=Gene:uc010nxx.1;Name=Gene:uc010nxx.1\n+chr1\tbed2gff\ttranscript\t761587\t762902\t0\t-\t.\tID=uc010nxx.1;Name=uc010nxx.1;Parent=Gene:uc010nxx.1\n+chr1\tbed2gff\texon\t761587\t762902\t0\t-\t.\tParent=uc010nxx.1\n+chr1\tbed2gff\tgene\t763064\t788902\t0\t+\t.\tID=Gene:uc009vjn.1;Name=Gene:uc009vjn.1\n+chr1\tbed2gff\ttranscript\t763064\t788902\t0\t+\t.\tID=uc009vjn.1;Name=uc009vjn.1;Parent=Gene:uc009vjn.1\n+chr1\tbed2gff\texon\t763064\t763155\t0\t+\t.\tParent=uc009vjn.1\n+chr1\tbed2gff\texon\t764383\t764484\t0\t+\t.\tParent=uc009vjn.1\n+chr1\tbed2gff\texon\t787307\t787490\t0\t+\t.\tParent=uc009vjn.1\n+chr1\tbed2gff\texon\t788771\t788902\t0\t+\t.\tParent=uc009vjn.1\n+chr1\tbed2gff\tgene\t763064\t788997\t0\t+\t.\tID=Gene:uc001abp.1;Name=Gene:uc001abp.1\n+chr1\tbed2gff\ttranscript\t763064\t788997\t0\t+\t.\tID=uc001abp.1;Name=uc001abp.1;Parent=Gene:uc001abp.1\n+chr1\tbed2gff\texon\t763064\t763155\t0\t+\t.\tParent=uc001abp.1\n+chr1\tbed2gff\texon\t764383\t764484\t0\t+\t.\tParent=uc001abp.1\n+chr1\tbed2gff\texon\t783034\t783186\t0\t+\t.\tParent=uc001abp.1\n+chr1\tbed2gff\texon\t787307\t787490\t0\t+\t.\tParent=uc001abp.1\n+chr1\tbed2gff\texon\t788051\t788146\t0\t+\t.\tParent=uc001abp.1\n+chr1\tbed2gff\texon\t788771\t788997\t0\t+\t.\tParent=uc001abp.1\n+chr1\tbed2gff\tgene\t763064\t788997\t0\t+\t.\tID=Gene:uc001abq.1;Name=Gene:uc001abq.1\n+chr1\tbed2gff\ttranscript\t763064\t788997\t0\t+\t.\tID=uc001abq.1;Name=uc001abq.1;Parent=Gene:uc001abq.1\n+chr1\tbed2gff\texon\t763064\t763155\t0\t+\t.\tParent=uc001abq.1\n+chr1\tbed2gff\texon\t764383\t764484\t0\t+\t.\tParent=uc001abq.1\n+chr1\tbed2gff\texon\t787307\t787490\t0\t+\t.\tParent=uc001abq.1\n+chr1\tbed2gff\texon\t788051\t788146\t0\t+\t.\tParent=uc001abq.1\n+chr1\tbed2gff\texon\t788771\t788997\t0\t+\t.\tParent=uc001abq.1\n+chr1\tbed2gff\tgene\t763064\t788997\t0\t+\t.\tID=Gene:uc009vjo.1;Name=Gene:uc009vjo.1\n+chr1\tbed2gff\ttranscript\t763064\t788997\t0\t+\t.\tID=uc009vjo.1;Name=uc009vjo.1;Parent=Gene:uc009vjo.1\n+chr1\tbed2gff\texon\t763064\t763155\t0\t+\t.\tParent=uc009vjo.1\n+chr1\tbed2gff\texon\t764383\t764484\t0\t+\t.\tParent=uc009vjo.1\n+chr1\tbed2gff\texon\t788051\t788146\t0\t+\t.\tParent=uc009vjo.1\n+chr1\tbed2gff\texon\t788771\t788997\t0\t+\t.\tParent=uc009vjo.1\n+chr1\tbed2gff\tgene\t763064\t789740\t0\t+\t.\tID=Gene:uc001abr.1;Name=Gene:uc001abr.1\n+chr1\tbed2gff\ttranscript\t763064\t789740\t0\t+\t.\tID=uc001abr.1;Name=uc001abr.1;Parent=Gene:uc001abr.1\n+chr1\tbed2gff\texon\t763064\t763155\t0\t+\t.\tParent=uc001abr.1\n+chr1\tbed2gff\texon\t764383\t764484\t0\t+\t.\tParent=uc001abr.1\n+chr1\tbed2gff\texon\t783034\t783186\t0\t+\t.\tParent=uc001abr.1\n+chr1\tbed2gff\texon\t787307\t787490\t0\t+\t.\tParent=uc001abr.1\n+chr1\tbed2gff\texon\t788051\t788146\t0\t+\t.\tParent=uc001abr.1\n+chr1\tbed2gff\texon\t788771\t788902\t0\t+\t.\tParent=uc001abr.1\n+chr1\tbed2gff\texon\t788957\t789740\t0\t+\t.\tParent=uc001abr.1\n+chr1\tbed2gff\tgene\t791898\t794579\t0\t+\t.\tID=Gene:uc001abs.2;Name=Gene:uc001abs.2\n+chr1\tbed2gff\ttranscript\t791898\t794579\t0\t+\t.\tID=uc001abs.2;Name=uc001abs.2;Parent=Gene:uc001abs.2\n+chr1\tbed2gff\texon\t791898\t794579\t0\t+\t.\tParent=uc001abs.2\n+chr1\tbed2gff\tgene\t803453\t812182\t0\t-\t.\tID=Gene:uc001abt.3;Name=Gene:uc001abt.3\n+chr1\tbed2gff\ttranscript\t803453\t812182\t0\t-\t.\tID=uc001abt.3;Name=uc001abt.3;Parent=Gene:uc001abt.3\n+chr1\tbed2gff\texon\t803453\t804055\t0\t-\t.\tParent=uc001abt.3\n+chr1\tbed2gff\texon\t809492\t810535\t0\t-\t.\tParent=uc001abt.3\n+chr1\tbed2gff\texon\t812126\t812182\t0\t-\t.\tParent=uc001abt.3\n+chr1\tbed2gff\tgene\t846815\t850328\t0\t+\t.\tID=Gene:uc001abu.1;Name=Gene:uc001abu.1\n+chr1\tbed2gff\ttranscript\t846815\t850328\t0\t+\t.\tID=uc001abu.1;Name=uc001abu.1;Parent=Gene:uc001abu.1\n+chr1\tbed2gff\texon\t846815\t846853\t0\t+\t.\tParent=uc001abu.1\n+chr1\tbed2gff\texon\t847325\t850328\t0\t+\t.\tParent=uc001abu.1\n' |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/s_cerevisiae_SCU49845.gbk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/test-data/s_cerevisiae_SCU49845.gbk Wed Jun 11 16:29:25 2014 -0400 |
b |
b'@@ -0,0 +1,165 @@\n+LOCUS SCU49845 5028 bp DNA PLN 21-JUN-1999\n+DEFINITION Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p\n+ (AXL2) and Rev7p (REV7) genes, complete cds.\n+ACCESSION U49845\n+VERSION U49845.1 GI:1293613\n+KEYWORDS .\n+SOURCE Saccharomyces cerevisiae (baker\'s yeast)\n+ ORGANISM Saccharomyces cerevisiae\n+ Eukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;\n+ Saccharomycetales; Saccharomycetaceae; Saccharomyces.\n+REFERENCE 1 (bases 1 to 5028)\n+ AUTHORS Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.\n+ TITLE Cloning and sequence of REV7, a gene whose function is required for\n+ DNA damage-induced mutagenesis in Saccharomyces cerevisiae\n+ JOURNAL Yeast 10 (11), 1503-1509 (1994)\n+ PUBMED 7871890\n+REFERENCE 2 (bases 1 to 5028)\n+ AUTHORS Roemer,T., Madden,K., Chang,J. and Snyder,M.\n+ TITLE Selection of axial growth sites in yeast requires Axl2p, a novel\n+ plasma membrane glycoprotein\n+ JOURNAL Genes Dev. 10 (7), 777-793 (1996)\n+ PUBMED 8846915\n+REFERENCE 3 (bases 1 to 5028)\n+ AUTHORS Roemer,T.\n+ TITLE Direct Submission\n+ JOURNAL Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, New\n+ Haven, CT, USA\n+FEATURES Location/Qualifiers\n+ source 1..5028\n+ /organism="Saccharomyces cerevisiae"\n+ /db_xref="taxon:4932"\n+ /chromosome="IX"\n+ /map="9"\n+ CDS <1..206\n+ /codon_start=3\n+ /product="TCP1-beta"\n+ /protein_id="AAA98665.1"\n+ /db_xref="GI:1293614"\n+ /translation="SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEA\n+ AEVLLRVDNIIRARPRTANRQHM"\n+ gene 687..3158\n+ /gene="AXL2"\n+ CDS 687..3158\n+ /gene="AXL2"\n+ /note="plasma membrane glycoprotein"\n+ /codon_start=1\n+ /function="required for axial budding pattern of S.\n+ cerevisiae"\n+ /product="Axl2p"\n+ /protein_id="AAA98666.1"\n+ /db_xref="GI:1293615"\n+ /translation="MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESF\n+ TFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFN\n+ VILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNE\n+ VFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPE\n+ TSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYV\n+ YLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYG\n+ DVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQ\n+ DHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSA\n+ NATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIA\n+ CGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLN\n+ NPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQ\n+ SQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDS\n+ YGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTK\n+ HRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRL\n+ VDFSNKSNVNVGQVKDIHGRIPEML"\n+ gene complement(3300..4037)\n+ /gene="REV7"\n+ CDS complement(3300..4037)\n+ /gene="REV7"\n+ /codon_start=1\n+ /product="Rev7p"\n+ /protein_id="AAA98667.1"\n+ /db_xref="GI:1293616"\n+ /translation="MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQ\n+ FVPINRHPALIDYI'..b'cca\n+ 1921 agaatttcga caagctttca ttaggtttga aagcgaacca aggttcacaa tctcaagagc\n+ 1981 tatattttaa catcattggc atggattcaa agataactca ctcaaaccac agtgcgaatg\n+ 2041 caacgtccac aagaagttct caccactcca cctcaacaag ttcttacaca tcttctactt\n+ 2101 acactgcaaa aatttcttct acctccgctg ctgctacttc ttctgctcca gcagcgctgc\n+ 2161 cagcagccaa taaaacttca tctcacaata aaaaagcagt agcaattgcg tgcggtgttg\n+ 2221 ctatcccatt aggcgttatc ctagtagctc tcatttgctt cctaatattc tggagacgca\n+ 2281 gaagggaaaa tccagacgat gaaaacttac cgcatgctat tagtggacct gatttgaata\n+ 2341 atcctgcaaa taaaccaaat caagaaaacg ctacaccttt gaacaacccc tttgatgatg\n+ 2401 atgcttcctc gtacgatgat acttcaatag caagaagatt ggctgctttg aacactttga\n+ 2461 aattggataa ccactctgcc actgaatctg atatttccag cgtggatgaa aagagagatt\n+ 2521 ctctatcagg tatgaataca tacaatgatc agttccaatc ccaaagtaaa gaagaattat\n+ 2581 tagcaaaacc cccagtacag cctccagaga gcccgttctt tgacccacag aataggtctt\n+ 2641 cttctgtgta tatggatagt gaaccagcag taaataaatc ctggcgatat actggcaacc\n+ 2701 tgtcaccagt ctctgatatt gtcagagaca gttacggatc acaaaaaact gttgatacag\n+ 2761 aaaaactttt cgatttagaa gcaccagaga aggaaaaacg tacgtcaagg gatgtcacta\n+ 2821 tgtcttcact ggacccttgg aacagcaata ttagcccttc tcccgtaaga aaatcagtaa\n+ 2881 caccatcacc atataacgta acgaagcatc gtaaccgcca cttacaaaat attcaagact\n+ 2941 ctcaaagcgg taaaaacgga atcactccca caacaatgtc aacttcatct tctgacgatt\n+ 3001 ttgttccggt taaagatggt gaaaattttt gctgggtcca tagcatggaa ccagacagaa\n+ 3061 gaccaagtaa gaaaaggtta gtagattttt caaataagag taatgtcaat gttggtcaag\n+ 3121 ttaaggacat tcacggacgc atcccagaaa tgctgtgatt atacgcaacg atattttgct\n+ 3181 taattttatt ttcctgtttt attttttatt agtggtttac agatacccta tattttattt\n+ 3241 agtttttata cttagagaca tttaatttta attccattct tcaaatttca tttttgcact\n+ 3301 taaaacaaag atccaaaaat gctctcgccc tcttcatatt gagaatacac tccattcaaa\n+ 3361 attttgtcgt caccgctgat taatttttca ctaaactgat gaataatcaa aggccccacg\n+ 3421 tcagaaccga ctaaagaagt gagttttatt ttaggaggtt gaaaaccatt attgtctggt\n+ 3481 aaattttcat cttcttgaca tttaacccag tttgaatccc tttcaatttc tgctttttcc\n+ 3541 tccaaactat cgaccctcct gtttctgtcc aacttatgtc ctagttccaa ttcgatcgca\n+ 3601 ttaataactg cttcaaatgt tattgtgtca tcgttgactt taggtaattt ctccaaatgc\n+ 3661 ataatcaaac tatttaagga agatcggaat tcgtcgaaca cttcagtttc cgtaatgatc\n+ 3721 tgatcgtctt tatccacatg ttgtaattca ctaaaatcta aaacgtattt ttcaatgcat\n+ 3781 aaatcgttct ttttattaat aatgcagatg gaaaatctgt aaacgtgcgt taatttagaa\n+ 3841 agaacatcca gtataagttc ttctatatag tcaattaaag caggatgcct attaatggga\n+ 3901 acgaactgcg gcaagttgaa tgactggtaa gtagtgtagt cgaatgactg aggtgggtat\n+ 3961 acatttctat aaaataaaat caaattaatg tagcatttta agtataccct cagccacttc\n+ 4021 tctacccatc tattcataaa gctgacgcaa cgattactat tttttttttc ttcttggatc\n+ 4081 tcagtcgtcg caaaaacgta taccttcttt ttccgacctt ttttttagct ttctggaaaa\n+ 4141 gtttatatta gttaaacagg gtctagtctt agtgtgaaag ctagtggttt cgattgactg\n+ 4201 atattaagaa agtggaaatt aaattagtag tgtagacgta tatgcatatg tatttctcgc\n+ 4261 ctgtttatgt ttctacgtac ttttgattta tagcaagggg aaaagaaata catactattt\n+ 4321 tttggtaaag gtgaaagcat aatgtaaaag ctagaataaa atggacgaaa taaagagagg\n+ 4381 cttagttcat cttttttcca aaaagcaccc aatgataata actaaaatga aaaggatttg\n+ 4441 ccatctgtca gcaacatcag ttgtgtgagc aataataaaa tcatcacctc cgttgccttt\n+ 4501 agcgcgtttg tcgtttgtat cttccgtaat tttagtctta tcaatgggaa tcataaattt\n+ 4561 tccaatgaat tagcaatttc gtccaattct ttttgagctt cttcatattt gctttggaat\n+ 4621 tcttcgcact tcttttccca ttcatctctt tcttcttcca aagcaacgat ccttctaccc\n+ 4681 atttgctcag agttcaaatc ggcctctttc agtttatcca ttgcttcctt cagtttggct\n+ 4741 tcactgtctt ctagctgttg ttctagatcc tggtttttct tggtgtagtt ctcattatta\n+ 4801 gatctcaagt tattggagtc ttcagccaat tgctttgtat cagacaattg actctctaac\n+ 4861 ttctccactt cactgtcgag ttgctcgttt ttagcggaca aagatttaat ctcgttttct\n+ 4921 ttttcagtgt tagattgctc taattctttg agctgttctc tcagctcctc atatttttct\n+ 4981 tgccatgact cagattctaa ttttaagcta ttcaatttct ctttgatc\n+//\n' |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/test-data/s_cerevisiae_SCU49845.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/test-data/s_cerevisiae_SCU49845.gff3 Wed Jun 11 16:29:25 2014 -0400 |
b |
@@ -0,0 +1,8 @@ +IX gbk_to_gff gene 687 3158 . + . ID=AXL2;Name=AXL2 +IX gbk_to_gff . 687 3158 . + . ID=Transcript:AXL2;Parent=AXL2 +IX gbk_to_gff CDS 687 3158 . + . Parent=Transcript:AXL2 +IX gbk_to_gff exon 687 3158 . + . Parent=Transcript:AXL2 +IX gbk_to_gff gene 3300 4037 . - . ID=REV7;Name=REV7 +IX gbk_to_gff . 3300 4037 . - . ID=Transcript:REV7;Parent=REV7 +IX gbk_to_gff CDS 3300 4037 . - . Parent=Transcript:REV7 +IX gbk_to_gff exon 3300 4037 . - . Parent=Transcript:REV7 |
b |
diff -r db3c67b03d55 -r ff2c2e6f4ab3 GFFtools-GX/tool_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFtools-GX/tool_conf.xml.sample Wed Jun 11 16:29:25 2014 -0400 |
b |
@@ -0,0 +1,7 @@ +<section name="GFFtools" id="gfftools.web"> + <tool file="GFFtools-GX/gff_to_bed.xml"/> + <tool file="GFFtools-GX/bed_to_gff.xml"/> + <tool file="GFFtools-GX/gbk_to_gff.xml"/> + <tool file="GFFtools-GX/gff_to_gtf.xml"/> + <tool file="GFFtools-GX/gtf_to_gff.xml"/> +</section> |