Repository 'bp_genbank2gff3'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/bp_genbank2gff3

Changeset 0:f79bcd53b9a3 (2015-10-09)
Next changeset 1:792a280ebeea (2015-10-09)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bioperl commit 799339e22181d28cb2b145454d353d6025779636
added:
bp_genbank2gff3.xml
macros.xml
test-data/seq.gb
test-data/seq.gb.0.gff
test-data/seq.gb.1.gff
tool_dependencies.xml
b
diff -r 000000000000 -r f79bcd53b9a3 bp_genbank2gff3.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bp_genbank2gff3.xml Fri Oct 09 09:19:49 2015 -0400
[
@@ -0,0 +1,122 @@
+<tool id="bp_genbank2gff3" name="Genbank to GFF3" version="1.0">
+  <description>converter</description>
+  <macros>
+      <import>macros.xml</import>
+  </macros>
+  <expand macro="stdio" />
+  <command><![CDATA[
+bp_genbank2gff3.pl
+$noinfer
+#if str($sofile.sofile) != "__none__":
+    --sofile
+    #if str($sofile.sofile) == "url":
+        "${sofile.so_url}"
+    #else:
+        live
+    #end if
+#end if
+--outdir -
+--ethresh $ethresh
+$model
+--typesource "${typesource}"
+
+$genbank
+> $gff3]]></command>
+  <inputs>
+    <param label="Genbank file" name="genbank" type="data" format="gb"/>
+    <param name="noinfer" truevalue="" falsevalue="--noinfer" checked="true" type="boolean" label="Infer exon/mRNA subfeatures"/>
+    <conditional name="sofile" label="Sequence Ontology">
+        <param name="sofile" label="Sequence Ontology File" type="select">
+            <option value="__none__" selected="True">None specified</option>
+            <option value="live">Latest Sequence Ontology</option>
+            <option value="url">User Specified</option>
+        </param>
+        <when value="__none__" />
+        <when value="live" />
+        <when value="url">
+            <param name="so_url" label="Sequence Ontology URL" type="text"/>
+        </when>
+    </conditional>
+    <param name="ethresh" label="Error threshold for unflattener" type="select">
+        <option value="0">Strict</option>
+        <option value="1" selected="True">Medium</option>
+        <option value="2">Loose</option>
+        <option value="3">Ignore Errors</option>
+    </param>
+    <param name="model" label="Gene Model" type="select">
+        <option value="--CDS" selected="True">Default GFF gene model</option>
+        <option value="--noCDS">Alternate gene-RNA-protein-exon model</option>
+    </param>
+    <param name="typesource" label="Sequence Ontology type for landmark feature" help="E.g. chromosome, region, contig" value="contig" type="text" />
+  </inputs>
+  <outputs>
+    <data format="gff3" name="gff3" label="${genbank.name} as GFF3"/>
+  </outputs>
+  <tests>
+      <test>
+          <param name="genbank" value="seq.gb" />
+          <param name="noinfer" value="True" />
+          <output name="gff3" file="seq.gb.0.gff" ftype="gff3" lines_diff="6"/>
+      </test>
+      <test>
+          <param name="genbank" value="seq.gb" />
+          <output name="gff3" file="seq.gb.1.gff" ftype="gff3" lines_diff="4"/>
+      </test>
+  </tests>
+  <help><![CDATA[
+**What it does**:
+
+This tool uses Bio::SeqFeature::Tools::Unflattener and
+Bio::Tools::GFF to convert GenBank flatfiles to GFF3 with gene
+containment hierarchies mapped for optimal display in gbrowse.
+
+The input files are assumed to be gzipped GenBank flatfiles for refseq
+contigs. The files may contain multiple GenBank records.
+
+**Designed for RefSeq**
+
+This script is designed for RefSeq genomic sequence entries.  It may
+work for third party annotations but this has not been tested.
+But see below, Uniprot/Swissprot works, EMBL and possibly EMBL/Ensembl
+if you don't mind some gene model unflattener errors (dgg).
+
+**G-R-P-E Gene Model**
+
+Don Gilbert worked this over with needs to produce GFF3 suited to
+loading to GMOD Chado databases.
+
+This writes GFF with an alternate, but useful Gene model,
+instead of the consensus model for GFF3
+
+  [ gene > mRNA> (exon,CDS,UTR) ]
+
+This alternate is
+
+  gene > mRNA > polypeptide > exon
+
+means the only feature with dna bases is the exon.  The others
+specify only location ranges on a genome. Exon of course is a child
+of mRNA and protein/peptide.
+
+The protein/polypeptide feature is an important one, having all the
+annotations of the GenBank CDS feature, protein ID, translation, GO
+terms, Dbxrefs to other proteins.
+
+UTRs, introns, CDS-exons are all inferred from the primary exon bases
+inside/outside appropriate higher feature ranges.  Other special gene
+model features remain the same.
+
+**Authors**
+
+Sheldon McKay (mckays@cshl.edu)
+
+Copyright (c) 2004 Cold Spring Harbor Laboratory.
+
+**Author of hacks for GFF2Chado loading**
+
+Don Gilbert (gilbertd@indiana.edu)
+      ]]></help>
+  <citations>
+    <citation type="doi">10.1101/gr.361602</citation>
+  </citations>
+</tool>
b
diff -r 000000000000 -r f79bcd53b9a3 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Fri Oct 09 09:19:49 2015 -0400
b
@@ -0,0 +1,20 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="1.6">bioperl</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <token name="@WRAPPER_VERSION@">1.6</token>
+    <xml name="stdio">
+        <stdio>
+            <!-- Anything other than zero is an error -->
+            <exit_code range="1:" />
+            <exit_code range=":-1" />
+            <!-- In case the return code has not been set propery check stderr too -->
+            <regex match="Error:" />
+            <regex match="Exception:" />
+        </stdio>
+    </xml>
+
+</macros>
b
diff -r 000000000000 -r f79bcd53b9a3 test-data/seq.gb
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/seq.gb Fri Oct 09 09:19:49 2015 -0400
b
b'@@ -0,0 +1,7506 @@\n+LOCUS       NC_014662             165540 bp    DNA     linear   PHG 12-NOV-2010\n+DEFINITION  Enterobacteria phage CC31, complete genome.\n+ACCESSION   NC_014662\n+VERSION     NC_014662.1  GI:311992992\n+DBLINK      BioProject: PRJNA60119\n+KEYWORDS    RefSeq.\n+SOURCE      Enterobacteria phage CC31\n+  ORGANISM  Enterobacteria phage CC31\n+            Viruses; dsDNA viruses, no RNA stage; Caudovirales; Myoviridae;\n+            Tevenvirinae; T4likevirus.\n+REFERENCE   1  (bases 1 to 165540)\n+  AUTHORS   Petrov,V.M., Ratnayaka,S., Nolan,J.M., Miller,E.S. and Karam,J.D.\n+  TITLE     Genomes of the T4-related bacteriophages as windows on microbial\n+            genome evolution\n+  JOURNAL   Virol. J. 7 (1), 292 (2010)\n+   PUBMED   21029436\n+  REMARK    Publication Status: Online-Only\n+REFERENCE   2  (bases 1 to 165540)\n+  CONSRTM   NCBI Genome Project\n+  TITLE     Direct Submission\n+  JOURNAL   Submitted (12-NOV-2010) National Center for Biotechnology\n+            Information, NIH, Bethesda, MD 20894, USA\n+REFERENCE   3  (bases 1 to 165540)\n+  AUTHORS   Petrov,V.M., Ratnayaka,S. and Karam,J.D.\n+  TITLE     Direct Submission\n+  JOURNAL   Submitted (14-DEC-2009) Biochemistry, Tulane University Medical\n+            Center, 1430 Tulane Ave., New Orleans, LA 70112, USA\n+COMMENT     PROVISIONAL REFSEQ: This record has not yet been subject to final\n+            NCBI review. The reference sequence is identical to GU323318.\n+            COMPLETENESS: full length.\n+FEATURES             Location/Qualifiers\n+     source          1..165540\n+                     /organism="Enterobacteria phage CC31"\n+                     /mol_type="genomic DNA"\n+                     /host="Escherichia coli"\n+                     /db_xref="taxon:709484"\n+     gene            complement(1..2214)\n+                     /gene="rIIA"\n+                     /locus_tag="CC31p001"\n+                     /db_xref="GeneID:9926434"\n+     CDS             complement(1..2214)\n+                     /gene="rIIA"\n+                     /locus_tag="CC31p001"\n+                     /codon_start=1\n+                     /transl_table=11\n+                     /product="membrane-associated affects host membrane\n+                     ATPase"\n+                     /protein_id="YP_004009859.1"\n+                     /db_xref="GI:311992993"\n+                     /db_xref="GeneID:9926434"\n+                     /translation="MKLIADNEEVLGSAGKKTKFTIQASPKAFMILSDKLYKNKIRAV\n+                     VRELTTNWLDAHILNGKQDVPCEIKCPNKLDPRFIIRDFGPGMSDFQIRGNDEEPGLY\n+                     NSYFASTKAESNDFIGALGLGSKSPFSYTKSFTIVSYHDGEARGYMAVMNNGEPDIRP\n+                     LFVEPMKEGEQTGIEITVPVRLEDVEKFAHEIAYVMRPMPVKPIITGASINIDSFPQD\n+                     VEWFHSPNGFGKDSRGLYAVYGKIVYPIDQFQGLECSWLLNRYGCVYVNFPLGELDIT\n+                     PSREELSLDDVTIENIKKRVNSLEKATLEADIAHLQSIENKRELVRQLSQFDSNQRAI\n+                     LNRQNIMFGDKTYGEWVETYNINELQKKIESSMVYTYLVNLDAERMRLTSSWSTRKRT\n+                     SVSNLLNVQQNKVHIMIDDKPSRRAAMFRGMYLKDFHRYQRFIMIDPEDPKHLEIKDE\n+                     IIKLFDQDEVVVLKSSEMEEYRKFEKEHYSNSSKGDGGPRPKSPNGQLHKLDAKGGWW\n+                     TSEDLFMNKDDIAELEGYAIFRSRDEIRTFPEELYWSGIDIETIRTLAKELGVTEFYV\n+                     IRPNSAKVAKLNDNLESLDRFIVDEFIKIIDDLDADEYLPSTFFNRRVVSNIINTPEL\n+                     KWLLKFITGKDNGERVSRINEIGRNLKNTYITASPDGSSQIREDLALCVRIYNKLTDA\n+                     ASAEVDAAFKKFEKEYPVIEHMLNEWRVANYADDISRIMRALESAPSLKGKDEDE"\n+     gene            complement(2220..2426)\n+                     /gene="rIIA.1"\n+                     /locus_tag="CC31p002"\n+                     /db_xref="GeneID:9926148"\n+     CDS             complement(2220..2426)\n+                     /gene="rIIA.1"\n+                     /locus_tag="CC31p002"\n+                     /codon_start=1\n+                     /transl_table=11\n+                     /product="hypothetical protein"\n+                     /protein_id="YP_004009860.1"\n+                     /db_xref="GI:311992994"\n+                     /db_xref="GeneID:9926148"'..b'agttaggaat cgcacggttt tctactgtgt agattgcgaa ttctttagct tcattatcaa\n+   162481 tgatactctg aaggtctctc aaattgagct gacctattga gtcatcaact tgtttagcca\n+   162541 tcatgtcaaa aatagttgtc atattaccct accagattaa attaccgagg tccatcataa\n+   162601 cacatgcaag aaaaagcatt atttcataaa taacataaca gactccaaca aatccagctg\n+   162661 aagctataat taaagccaaa ataatactta cagtaagttt cattttgtta gacctgtgta\n+   162721 gtagaacaaa cgttctacag ccaatgcggc tacaccaaag cacatgcttc caatgactct\n+   162781 tggtggaggg gttaacccct cccaaataaa aataccagag gcaatgaaca gcggagccat\n+   162841 aagcaagaac accaaagccc aaatctgttt gaatggactc atattaatat gcctgcagaa\n+   162901 taaatttgaa gttatcattc agcatacgat tcatttcttc aaggttctgg taagaatcgt\n+   162961 tgtgtttacg ggtgaatgcc agagccaatt ggcctttacc aaagccagta gtcagaggtt\n+   163021 tcattttgtc agctggaatg aaatacacat catagatgac gttgtttgaa cgcatggtgc\n+   163081 gaccgagctg agaacggcct tgacgaatct gagacagaac attcatgaaa ccggacttag\n+   163141 aacgttgacg accaacgtag aatcgtgctg ctactgcacg ccaagctgat gcacctttta\n+   163201 ccatgaagta gaagcctggt tcagccagaa cagatgggtc aacatatcct acagtttcgc\n+   163261 catttttcac agaaacaaca tgagtaccgc cagctgccag aatatcacca cgagtcatat\n+   163321 aatcacgcat atcattttcc tcaatcaatt aaaagtttat cctcaacggg cccgaaggcc\n+   163381 ctgaattaaa gcccggagct ttgctgagca ccgttcatca gaacttctac agcacctttc\n+   163441 aaaccttcag tttgaatgac gttaacaatt tcaacagtcg cccaaatgct aaaaccgatt\n+   163501 ccaacaaaaa ccataatagc tattatggaa aagaaaatcc aaaaaatctt ttgcatttta\n+   163561 ttcatgctag aactataaac tttgcgaccc ataatatttt cctcagaagt taatccatgc\n+   163621 cattacaaca tcatgtgcta aggcgaatgc gaaagctcca aagagcacca tcagaagagt\n+   163681 taaagtccag atggctttga gaatcttttt gatgatgttc atttgtttct ccgttagttg\n+   163741 atttctagtc tatagtatac catctaacgg aggatgtaaa cggttgagtt aaagatttag\n+   163801 ataccaacct tggtagttgc tcttgcgaac cactgacttg aggtcttcat ggtcaccagc\n+   163861 gaactttagg gtaaactggt aaacactagg cccactttct accacatcta atatgataat\n+   163921 acagtctgct atcctgtgtg tcaagaaggg tcctaggttt atccctgagc caccgggata\n+   163981 gtcacccgag taaccagttg taactgaaat ccactttgaa tctgactggt gagttttgac\n+   164041 ctcgacccgg agtccacaga atcgaggatg agcaagaaca tcccacgcgt atgtgtaagg\n+   164101 gtcgtcatga ttttcttgac cgcctgcaac atagccgtcc atccaatcag ctactgcttt\n+   164161 ttcagctaac tgagcaattg cacaacgatt gattacttca gttttatctt ggtccgggtc\n+   164221 ttggcgcaga gaataagcgg cagtgctttt aattttgacc ctatcttctg gagttaaatc\n+   164281 actgaacgcc cgggtaaaag tcttcagggc tttcaatctc aacaggcctg gattcgtctt\n+   164341 ttccataaat tcctctgata tgtagttcac caaaataaat gcggtcatct tcttcaagat\n+   164401 attctgcatc agcaggaggg atttcattca ccacttcatc acagtgtaac caagcataat\n+   164461 gaacatctcc tggatgtttg accagttttc tgcaatatac tgattttgca taatggtctc\n+   164521 cacgaagatt aacaacagct gaatcactta cgttgaatgg attaatcatg acagcttcct\n+   164581 caaaaagaaa gggcccgaag gcccttagat tagatataac aatcagtttc ttggttgtat\n+   164641 tccgcttcac ggattacttt gtactggcaa gtacgcattt tagcgttgtt gtaatctacc\n+   164701 gggatagata ctacatctcg tggatgtact ttaacaacta ccagacggtc attaccaccg\n+   164761 cggaagtgtt tgatgtaact gcgagcacaa acgtgcagac cagcttcgca ggtgcgattt\n+   164821 tcatcttcaa ctacatgagt acgaggcatt ttaactactc gaccgattga gttatcgaag\n+   164881 cgaccggtat agcagtcagt gtaatcgtta cgaataactt tccatgccag gaagtgtcca\n+   164941 tcttcagtga tttcgatgtc gttagcctgc aggaagtcaa acagacgagt cacagcagtt\n+   165001 ttacttgggt tttccagcag attttccagg aatggcagat agaattcaaa atcttcacca\n+   165061 ttctgcatat cgttaatgat acggtctaca aggccagatt tgatttcaat gtcctgatag\n+   165121 aacaactgac cattttcaat gcgaatgtta ccatcaacat aagaagtgat tgccttctca\n+   165181 atgttaatca gattaatcgc agattcaaaa tcaccatcaa cacagaactg aagagcttct\n+   165241 ttaaagtttg gatggtcttt atcagcagcg taagtatcac ggcctactgt gatagacagg\n+   165301 aacttagaag aaccagccca tacaacatct tctggattga agactttaac tggcgaagtc\n+   165361 actttaactt ccggttcaac ttccgcttta cggcttttaa tttcattaac aacacgacga\n+   165421 atagtatcaa cagaacaaga ataaatttct gctaattcgg tttgagtata gccctgcatg\n+   165481 aattcatcat gaatggctac tttttctaca tcattaaaca ttttaacaac agagactttc\n+//\n+\n'
b
diff -r 000000000000 -r f79bcd53b9a3 test-data/seq.gb.0.gff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/seq.gb.0.gff Fri Oct 09 09:19:49 2015 -0400
b
b'@@ -0,0 +1,5208 @@\n+##gff-version 3\n+##sequence-region NC_014662 1 165540\n+# conversion-by bp_genbank2gff3.pl\n+# organism Enterobacteria phage CC31\n+# Note Enterobacteria phage CC31, complete genome.\n+# date 12-NOV-2010\n+NC_014662\tGenBank\tregion\t1\t165540\t.\t+\t1\tID=NC_014662;Dbxref=BioProject:PRJNA60119,taxon:709484;Name=NC_014662;Note=Enterobacteria phage CC31%2C complete genome.,PROVISIONAL REFSEQ: This record has not yet been subject to final NCBI review. The reference sequence is identical to GU323318. COMPLETENESS: full length. ;comment1=PROVISIONAL REFSEQ: This record has not yet been subject to final NCBI review. The reference sequence is identical to GU323318. COMPLETENESS: full length. ;date=12-NOV-2010;host=Escherichia coli;mol_type=genomic DNA;organism=Enterobacteria phage CC31\n+NC_014662\tGenBank\tgene\t1\t2214\t.\t-\t1\tID=CC31p001;Dbxref=GeneID:9926434;Name=rIIA;locus_tag=CC31p001\n+NC_014662\tGenBank\tmRNA\t1\t2214\t.\t-\t1\tID=CC31p001.t01;Parent=CC31p001\n+NC_014662\tGenBank\tCDS\t1\t2214\t.\t-\t1\tID=CC31p001.p01;Parent=CC31p001.t01;Dbxref=GI:311992993,GeneID:9926434;Name=rIIA;codon_start=1;locus_tag=CC31p001;product=membrane-associated affects host membrane ATPase;protein_id=YP_004009859.1;transl_table=11;translation=length.737\n+NC_014662\tGenBank\texon\t1\t2214\t.\t-\t1\tParent=CC31p001.t01\n+NC_014662\tGenBank\tgene\t2220\t2426\t.\t-\t1\tID=CC31p002;Dbxref=GeneID:9926148;Name=rIIA.1;locus_tag=CC31p002\n+NC_014662\tGenBank\tmRNA\t2220\t2426\t.\t-\t1\tID=CC31p002.t01;Parent=CC31p002\n+NC_014662\tGenBank\tCDS\t2220\t2426\t.\t-\t1\tID=CC31p002.p01;Parent=CC31p002.t01;Dbxref=GI:311992994,GeneID:9926148;Name=rIIA.1;codon_start=1;locus_tag=CC31p002;product=hypothetical protein;protein_id=YP_004009860.1;transl_table=11;translation=length.68\n+NC_014662\tGenBank\texon\t2220\t2426\t.\t-\t1\tParent=CC31p002.t01\n+NC_014662\tGenBank\tgene\t2420\t2704\t.\t-\t1\tID=CC31p003;Dbxref=GeneID:9926149;Name=CC31p003\n+NC_014662\tGenBank\tmRNA\t2420\t2704\t.\t-\t1\tID=CC31p003.t01;Parent=CC31p003\n+NC_014662\tGenBank\tCDS\t2420\t2704\t.\t-\t1\tID=CC31p003.p01;Parent=CC31p003.t01;Dbxref=GI:311992995,GeneID:9926149;Name=CC31p003;Note=predicted by GenMarkS;codon_start=1;product=hypothetical protein;protein_id=YP_004009861.1;transl_table=11;translation=length.94\n+NC_014662\tGenBank\texon\t2420\t2704\t.\t-\t1\tParent=CC31p003.t01\n+NC_014662\tGenBank\tgene\t2750\t2905\t.\t-\t1\tID=CC31p004;Dbxref=GeneID:9926150;Name=CC31p004\n+NC_014662\tGenBank\tmRNA\t2750\t2905\t.\t-\t1\tID=CC31p004.t01;Parent=CC31p004\n+NC_014662\tGenBank\tCDS\t2750\t2905\t.\t-\t1\tID=CC31p004.p01;Parent=CC31p004.t01;Dbxref=GI:311992996,GeneID:9926150;Name=CC31p004;Note=predicted by GenMarkS;codon_start=1;product=hypothetical protein;protein_id=YP_004009862.1;transl_table=11;translation=length.51\n+NC_014662\tGenBank\texon\t2750\t2905\t.\t-\t1\tParent=CC31p004.t01\n+NC_014662\tGenBank\tgene\t2945\t4789\t.\t-\t1\tID=CC31p005;Dbxref=GeneID:9926151;Name=60plus39;locus_tag=CC31p005\n+NC_014662\tGenBank\tmRNA\t2945\t4789\t.\t-\t1\tID=CC31p005.t01;Parent=CC31p005\n+NC_014662\tGenBank\tCDS\t2945\t4789\t.\t-\t1\tID=CC31p005.p01;Parent=CC31p005.t01;Dbxref=GI:311992997,GeneID:9926151;Name=60plus39;codon_start=1;locus_tag=CC31p005;product=DNA topoisomerase subunit;protein_id=YP_004009863.1;transl_table=11;translation=length.614\n+NC_014662\tGenBank\texon\t2945\t4789\t.\t-\t1\tParent=CC31p005.t01\n+NC_014662\tGenBank\tgene\t4835\t5302\t.\t-\t1\tID=CC31p006;Dbxref=GeneID:9926152;Name=CC31p006\n+NC_014662\tGenBank\tmRNA\t4835\t5302\t.\t-\t1\tID=CC31p006.t01;Parent=CC31p006\n+NC_014662\tGenBank\tCDS\t4835\t5302\t.\t-\t1\tID=CC31p006.p01;Parent=CC31p006.t01;Dbxref=GI:311992998,GeneID:9926152;Name=CC31p006;Note=predicted by GenMarkS;codon_start=1;product=hypothetical protein;protein_id=YP_004009864.1;transl_table=11;translation=length.155\n+NC_014662\tGenBank\texon\t4835\t5302\t.\t-\t1\tParent=CC31p006.t01\n+NC_014662\tGenBank\tgene\t5302\t6837\t.\t-\t1\tID=CC31p007;Dbxref=GeneID:9926153;Name=CC31p007\n+NC_014662\tGenBank\tmRNA\t5302\t6837\t.\t-\t1\tID=CC31p007.t01;Parent=CC31p007\n+NC_014662\tGenBank\tCDS\t5302\t6837\t.\t-\t1\tID=CC31p007.p01;Parent=CC31p007.t01;Dbxref=GI:311992999,GeneID:9926153;Name=CC31p007;Note'..b'ISSGKYYHYFRGSGYVAYDMDEGVKI\n+NKGGLNVGGNTSITGNTYVTGAVTSNGQFKTSANDGLKIWNGDYGMILRRSENNFYLIPT\n+AQGQAENGGISNLRPFYIDCATGNATLGHNVTINGQSTLNGNVTLGSGQINLLGGSGNIG\n+FAKAGTSPYSMRIFYAGNTERGNRLEIADDSSYLMYIERHPSIGIQLVTNGGHIKTNAGS\n+VYTEAIALNSGARFVADGNIYLPNATNGFSAGWVLGQINSRLNAAVQKSGDTMTGTLTIN\n+NGANTGVMVSGITSGSDKGLIRGNVDGGAHDQWENRSSGLQLDCPSSDDSAYNVWKATKW\n+GAYHIAAMDVYAPSGNGYVRLVIRNGGAHIWNNSSYTSPVQINAPEFYLTSDISLKKDIR\n+SIEDSRSNLHKVEIKRYAMKDGSNDNAIGVIAQEVQEVYPELVNENKDTGKLSVNYRGLS\n+SVLWKIVQEQDKELEDVKSRLARIEELLSK\n+>CC31p258.p01\n+MAIAGPNIGTSWFRETGQRPMSAARVAVRLPARPGGARQMVGLSKEVNYNIGANNSYNKD\n+TLINYLRSQGSTPVVVTITGNLVSYSSGVACLEFPANLPNAYVHLIINGGVTLYGRGGNG\n+GVKGNGAAGGHAINNQFGTRLRITNNGAIAGGGGGGGGNSANGGMGGGGRPFGYADKTHP\n+PAAATSRAATDGTLTSPGIGAEYKIGTAVQYTCGSGGNVGANGGASTGRLGTNYGGGSAG\n+RAVIGNAPTWNKVGTIYGSRV\n+>CC31p259.p01\n+MTQRTPLPGISDILFGVLDRLFKDNATGRVLASRIVALIVVFILSLTWYRLDAIMQVWKE\n+SRYETYTKVLQQDKEAKFEASALEQLQIAHVSSNADFSAIYSFRPRNLNYFVDLIAYEGR\n+LPSTVNEKNLGGFPVDKTSNEYSAHLRGAYFSSEDEFVFLPTKKKDGELKYMYSCPYFNL\n+DNVYAGTVSMYWYSKPLLNENRLAAICSQAARTLGRAK\n+>CC31p260.p01\n+MSKLEIVREIVTVASVLIKFGSEHILEKREHFIAFLNEIGIKNDLGRPLNQSNFRKMIEE\n+MTAEEKQQLVEEFNEGFESVYRYMMMYSKP\n+>CC31p261.p01\n+MNQTVEIQRYLEGMMNKLALGDMVDYSYQEAMEICHWMKRRVRVVGAEWYISAELIDGRY\n+AIRYDSGDEYVTLPGHVLQRWEVVN\n+>CC31p262.p01\n+MNKISHIEAERKAWDEHTSVVDAITPVYHLVVWFSLSQEEQDCSWKYFEDTTFQKFVNAI\n+NHPESLLTHCEIKASEETFCYFTVSSKRSVSDVMQGYQFLKGVADEFELKINYEKI\n+>CC31p263.p01\n+MSTSEIKMVPYVTYTSERLREFQDQFNGTGIFYDTLSEIENDVKSDINDNDFIIRMFLNG\n+TFEIVAISDKRIEDAIAHIDNIIDEMTEGYYE\n+>CC31p264.p01\n+MNNPVAKHDFNKGGAHKDMKRQEKESRRKQKHKGKGYEHI\n+>CC31p265.p01\n+MSDLSCLRHNIILIKTQIASLQRANEMMDENWGTYANDPGFRMAEHPFMKKLLGKDYICP\n+FETPYNGGVKPFLLDIYKAMNNEMIKELERRLEQLNENNTQKE\n+>CC31p266.p01\n+MNGDLIETQNIGERIPEICFIKADWWDGRLLQRVIVCAANRFKLKDGGELVIPGTRHYSK\n+DMALVLDQMRDKVVSEQVYGDDQGFLDQWGNYLTRKEALIIATHAGQINTRRQKGGPADT\n+LFSEDLY\n+>CC31p267.p01\n+MNMKNLNAQIDRVKKSMNRPAILNELQRCAERVTDEHYLPTEAWEVWFRGTHLGSIERKY\n+KGCYAVHSSLGRHCGDCATYMQALARFIDSCSVVIAKKELEEVEEWINEVVKEPELRVWG\n+IREPKTLWQKIKGFFK\n+>CC31p268.p01\n+MSKVIYIVKASENSISENAANVLIVVAKKDFITSSEVRDVLADKLSAASVNSNIGVLIKK\n+GLIEKSGDGLIVSAEGQEIINQAAVIYAEENAPELLEKRNTRKARPITDQMEADKNLMME\n+ILATKDNLFTIKKLDVYRSNFIAVLEKRTFGIRSFEVSNKGNFRISGYKMTEEQVKHFED\n+LGMVAKHSKNGNVYLDIPRTQENIENIIHAVDTL\n+>CC31p269.p01\n+MKTLINNLNALLANSGVDLDDTMHAARLHSSNTDSNSYLTIWYNTESENYVLVWVYVNNY\n+DMVAVLDAEVEDVAETLNEAKKLFADFFRG\n+>CC31p270.p01\n+MISIIVAALKNGGVITETSDFAYVKFNRMSIDKDTQARYWVMVYDHNESQYILTEVLVDL\n+ETMEADFVGCPELEGTFEEVLEAYVAK\n+>CC31p271.p01\n+MTTIFDMMAKQVDDSIGQLNLRDLQSIIDNEAKEFAIYTVENRAIPNLIDGFKPVQRFVI\n+ARALDLSRGNKEKFHKLASVAGGVADLGYHHGEGSAQDAGALMANTWNNNYPLLDGQGNF\n+GSRLVQKAAASRYIFCRISDNFRKVYKDTEIAPEHKDKEHVPPAFYLPIIPTVLLNGVQG\n+IATGYATKILPHSFESVVECTKLALQGKLDKEPEVQIPQFRGEVVRLEDGSIECRGLYKF\n+TSASQMYISEIPAKFDRETYVEKVLEPMVDKNFISYVDDCSKTGFGFKVKFKKDYMLGEC\n+DEKYRHEKIMRDFKLVEKMSQFIVVIDENGKLNDKFQSSSELIKHFVEVRKTYIVKRIEH\n+KIKECDEAFKLALAKAMFIKEVIEGSIVIQGKTRKQLTSELESRPTYAPFADKLVSMNIY\n+HITSDEAKKLAQQAKDLKAELKYWQETTPETEYMKDLEAL\n+>CC31p272.p01\n+MKLTVSIILALIIASAGFVGVCYVIYEIMLFLACVMMDLGNLIW\n+>CC31p273.p01\n+MSPFKQIWALVFLLMAPLFIASGIFIWEGLTPPPRVIGSMCFGVAALAVERLFYYTGLTK\n+\n+>CC31p274.p01\n+MRDYMTRGDILAAGGTHVVSVKNGETVGYVDPSVLAEPGFYFMVKGASAWRAVAARFYVG\n+RQRSKSGFMNVLSQIRQGRSQLGRTMRSNNVIYDVYFIPADKMKPLTTGFGKGQLALAFT\n+RKHNDSYQNLEEMNRMLNDNFKFILQAY\n+>CC31p275.p01\n+MGRKVYSSSMNKMQKIFWIFFSIIAIMVFVGIGFSIWATVEIVNVIQTEGLKGAVEVLMN\n+GAQQSSGL\n+>CC31p276.p01\n+MNIIKKILKAIWTLTLLMVLFGAFAFALAHDVVMAWINF\n+>CC31p277.p01\n+MKALKTFTRAFSDLTPEDRVKIKSTAAYSLRQDPDQDKTEVINRCAIAQLAEKAVADWMD\n+GYVAGGQENHDDPYTYAWDVLAHPRFCGLRVEVKTHQSDSKWISVTTGYSGDYPGGSGIN\n+LGPFLTHRIADCIIILDVVESGPSVYQFTLKFAGDHEDLKSVVRKSNYQGWYLNL\n+>CC31p278.p01\n+MINPFNVSDSAVVNLRGDHYAKSVYCRKLVKHPGDVHYAWLHCDEVVNEIPPADAEYLEE\n+DDRIYFGELHIRGIYGKDESRPVEIESPEDFYPGVQ\n+>CC31p279.p01\n+MFNDVEKVAIHDEFMQGYTQTELAEIYSCSVDTIRRVVNEIKSRKAEVEPEVKVTSPVKV\n+FNPEDVVWAGSSKFLSITVGRDTYAADKDHPNFKEALQFCVDGDFESAINLINIEKAITS\n+YVDGNIRIENGQLFYQDIEIKSGLVDRIINDMQNGEDFEFYLPFLENLLENPSKTAVTRL\n+FDFLQANDIEITEDGHFLAWKVIRNDYTDCYTGRFDNSIGRVVKMPRTHVVEDENRTCEA\n+GLHVCARSYIKHFRGGNDRLVVVKVHPRDVVSIPVDYNNAKMRTCQYKVIREAEYNQETD\n+CYI\n'
b
diff -r 000000000000 -r f79bcd53b9a3 test-data/seq.gb.1.gff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/seq.gb.1.gff Fri Oct 09 09:19:49 2015 -0400
b
b'@@ -0,0 +1,4645 @@\n+# Input: test-data/seq.gb\n+##gff-version 3\n+##sequence-region NC_014662 1 165540\n+# conversion-by bp_genbank2gff3.pl\n+# organism Enterobacteria phage CC31\n+# Note Enterobacteria phage CC31, complete genome.\n+# date 12-NOV-2010\n+# working on contig:NC_014662, Enterobacteria phage CC31, Enterobacteria phage CC31, complete genome., 12-NOV-2010\n+NC_014662\tGenBank\tcontig\t1\t165540\t.\t+\t1\tID=NC_014662;Dbxref=BioProject:PRJNA60119,taxon:709484;Name=NC_014662;Note=Enterobacteria phage CC31%2C complete genome.,PROVISIONAL REFSEQ: This record has not yet been subject to final NCBI review. The reference sequence is identical to GU323318. COMPLETENESS: full length. ;comment1=PROVISIONAL REFSEQ: This record has not yet been subject to final NCBI review. The reference sequence is identical to GU323318. COMPLETENESS: full length. ;date=12-NOV-2010;host=Escherichia coli;mol_type=genomic DNA;organism=Enterobacteria phage CC31\n+NC_014662\tGenBank\tCDS\t1\t2214\t.\t-\t1\tID=CC31p001;Dbxref=GI:311992993,GeneID:9926434;Name=rIIA;codon_start=1;locus_tag=CC31p001;product=membrane-associated affects host membrane ATPase;protein_id=YP_004009859.1;transl_table=11;translation=length.737\n+NC_014662\tGenBank\tgene\t1\t2214\t.\t-\t1\tID=CC31p001.gene;Alias=CC31p001;Dbxref=GeneID:9926434;Name=rIIA;locus_tag=CC31p001\n+NC_014662\tGenBank\tCDS\t2220\t2426\t.\t-\t1\tID=CC31p002;Dbxref=GI:311992994,GeneID:9926148;Name=rIIA.1;codon_start=1;locus_tag=CC31p002;product=hypothetical protein;protein_id=YP_004009860.1;transl_table=11;translation=length.68\n+NC_014662\tGenBank\tgene\t2220\t2426\t.\t-\t1\tID=CC31p002.gene;Alias=CC31p002;Dbxref=GeneID:9926148;Name=rIIA.1;locus_tag=CC31p002\n+NC_014662\tGenBank\tCDS\t2420\t2704\t.\t-\t1\tID=CC31p003;Dbxref=GI:311992995,GeneID:9926149;Name=CC31p003;Note=predicted by GenMarkS;codon_start=1;product=hypothetical protein;protein_id=YP_004009861.1;transl_table=11;translation=length.94\n+NC_014662\tGenBank\tgene\t2420\t2704\t.\t-\t1\tID=CC31p003.gene;Alias=CC31p003;Dbxref=GeneID:9926149;Name=CC31p003\n+NC_014662\tGenBank\tCDS\t2750\t2905\t.\t-\t1\tID=CC31p004;Dbxref=GI:311992996,GeneID:9926150;Name=CC31p004;Note=predicted by GenMarkS;codon_start=1;product=hypothetical protein;protein_id=YP_004009862.1;transl_table=11;translation=length.51\n+NC_014662\tGenBank\tgene\t2750\t2905\t.\t-\t1\tID=CC31p004.gene;Alias=CC31p004;Dbxref=GeneID:9926150;Name=CC31p004\n+NC_014662\tGenBank\tCDS\t2945\t4789\t.\t-\t1\tID=CC31p005;Dbxref=GI:311992997,GeneID:9926151;Name=60plus39;codon_start=1;locus_tag=CC31p005;product=DNA topoisomerase subunit;protein_id=YP_004009863.1;transl_table=11;translation=length.614\n+NC_014662\tGenBank\tgene\t2945\t4789\t.\t-\t1\tID=CC31p005.gene;Alias=CC31p005;Dbxref=GeneID:9926151;Name=60plus39;locus_tag=CC31p005\n+NC_014662\tGenBank\tCDS\t4835\t5302\t.\t-\t1\tID=CC31p006;Dbxref=GI:311992998,GeneID:9926152;Name=CC31p006;Note=predicted by GenMarkS;codon_start=1;product=hypothetical protein;protein_id=YP_004009864.1;transl_table=11;translation=length.155\n+NC_014662\tGenBank\tgene\t4835\t5302\t.\t-\t1\tID=CC31p006.gene;Alias=CC31p006;Dbxref=GeneID:9926152;Name=CC31p006\n+NC_014662\tGenBank\tCDS\t5302\t6837\t.\t-\t1\tID=CC31p007;Dbxref=GI:311992999,GeneID:9926153;Name=CC31p007;Note=N-terminal part is similar to Hoc protein and C-terminal part is similar to lipolytic enzyme%2C G-D-S-L;codon_start=1;product=hypothetical protein;protein_id=YP_004009865.1;transl_table=11;translation=length.511\n+NC_014662\tGenBank\tgene\t5302\t6837\t.\t-\t1\tID=CC31p007.gene;Alias=CC31p007;Dbxref=GeneID:9926153;Name=CC31p007\n+NC_014662\tGenBank\tCDS\t6870\t7130\t.\t-\t1\tID=CC31p008;Dbxref=GI:311993000,GeneID:9926154;Name=39.1;codon_start=1;locus_tag=CC31p008;product=gp39.1 hypothetical protein;protein_id=YP_004009866.1;transl_table=11;translation=length.86\n+NC_014662\tGenBank\tgene\t6870\t7130\t.\t-\t1\tID=CC31p008.gene;Alias=CC31p008;Dbxref=GeneID:9926154;Name=39.1;locus_tag=CC31p008\n+NC_014662\tGenBank\tCDS\t7127\t7222\t.\t-\t1\tID=CC31p009;Dbxref=GI:311993001,GeneID:9926155;Name=CC31p009;Note=predicted by GenMarkS;codon_start=1;product=hypothetical protein;protein_'..b'ALGDNDTGFRNDGDGMFSVMANSRALVNYNASAPKFQIEHRKATRITHTDNT\n+NTTILPSNNNSLLEIDTSLDGNNAGGNGLTLLGYISSGKYYHYFRGSGYVAYDMDEGVKI\n+NKGGLNVGGNTSITGNTYVTGAVTSNGQFKTSANDGLKIWNGDYGMILRRSENNFYLIPT\n+AQGQAENGGISNLRPFYIDCATGNATLGHNVTINGQSTLNGNVTLGSGQINLLGGSGNIG\n+FAKAGTSPYSMRIFYAGNTERGNRLEIADDSSYLMYIERHPSIGIQLVTNGGHIKTNAGS\n+VYTEAIALNSGARFVADGNIYLPNATNGFSAGWVLGQINSRLNAAVQKSGDTMTGTLTIN\n+NGANTGVMVSGITSGSDKGLIRGNVDGGAHDQWENRSSGLQLDCPSSDDSAYNVWKATKW\n+GAYHIAAMDVYAPSGNGYVRLVIRNGGAHIWNNSSYTSPVQINAPEFYLTSDISLKKDIR\n+SIEDSRSNLHKVEIKRYAMKDGSNDNAIGVIAQEVQEVYPELVNENKDTGKLSVNYRGLS\n+SVLWKIVQEQDKELEDVKSRLARIEELLSK\n+>CC31p258\n+MAIAGPNIGTSWFRETGQRPMSAARVAVRLPARPGGARQMVGLSKEVNYNIGANNSYNKD\n+TLINYLRSQGSTPVVVTITGNLVSYSSGVACLEFPANLPNAYVHLIINGGVTLYGRGGNG\n+GVKGNGAAGGHAINNQFGTRLRITNNGAIAGGGGGGGGNSANGGMGGGGRPFGYADKTHP\n+PAAATSRAATDGTLTSPGIGAEYKIGTAVQYTCGSGGNVGANGGASTGRLGTNYGGGSAG\n+RAVIGNAPTWNKVGTIYGSRV\n+>CC31p259\n+MTQRTPLPGISDILFGVLDRLFKDNATGRVLASRIVALIVVFILSLTWYRLDAIMQVWKE\n+SRYETYTKVLQQDKEAKFEASALEQLQIAHVSSNADFSAIYSFRPRNLNYFVDLIAYEGR\n+LPSTVNEKNLGGFPVDKTSNEYSAHLRGAYFSSEDEFVFLPTKKKDGELKYMYSCPYFNL\n+DNVYAGTVSMYWYSKPLLNENRLAAICSQAARTLGRAK\n+>CC31p260\n+MSKLEIVREIVTVASVLIKFGSEHILEKREHFIAFLNEIGIKNDLGRPLNQSNFRKMIEE\n+MTAEEKQQLVEEFNEGFESVYRYMMMYSKP\n+>CC31p261\n+MNQTVEIQRYLEGMMNKLALGDMVDYSYQEAMEICHWMKRRVRVVGAEWYISAELIDGRY\n+AIRYDSGDEYVTLPGHVLQRWEVVN\n+>CC31p262\n+MNKISHIEAERKAWDEHTSVVDAITPVYHLVVWFSLSQEEQDCSWKYFEDTTFQKFVNAI\n+NHPESLLTHCEIKASEETFCYFTVSSKRSVSDVMQGYQFLKGVADEFELKINYEKI\n+>CC31p263\n+MSTSEIKMVPYVTYTSERLREFQDQFNGTGIFYDTLSEIENDVKSDINDNDFIIRMFLNG\n+TFEIVAISDKRIEDAIAHIDNIIDEMTEGYYE\n+>CC31p264\n+MNNPVAKHDFNKGGAHKDMKRQEKESRRKQKHKGKGYEHI\n+>CC31p265\n+MSDLSCLRHNIILIKTQIASLQRANEMMDENWGTYANDPGFRMAEHPFMKKLLGKDYICP\n+FETPYNGGVKPFLLDIYKAMNNEMIKELERRLEQLNENNTQKE\n+>CC31p266\n+MNGDLIETQNIGERIPEICFIKADWWDGRLLQRVIVCAANRFKLKDGGELVIPGTRHYSK\n+DMALVLDQMRDKVVSEQVYGDDQGFLDQWGNYLTRKEALIIATHAGQINTRRQKGGPADT\n+LFSEDLY\n+>CC31p267\n+MNMKNLNAQIDRVKKSMNRPAILNELQRCAERVTDEHYLPTEAWEVWFRGTHLGSIERKY\n+KGCYAVHSSLGRHCGDCATYMQALARFIDSCSVVIAKKELEEVEEWINEVVKEPELRVWG\n+IREPKTLWQKIKGFFK\n+>CC31p268\n+MSKVIYIVKASENSISENAANVLIVVAKKDFITSSEVRDVLADKLSAASVNSNIGVLIKK\n+GLIEKSGDGLIVSAEGQEIINQAAVIYAEENAPELLEKRNTRKARPITDQMEADKNLMME\n+ILATKDNLFTIKKLDVYRSNFIAVLEKRTFGIRSFEVSNKGNFRISGYKMTEEQVKHFED\n+LGMVAKHSKNGNVYLDIPRTQENIENIIHAVDTL\n+>CC31p269\n+MKTLINNLNALLANSGVDLDDTMHAARLHSSNTDSNSYLTIWYNTESENYVLVWVYVNNY\n+DMVAVLDAEVEDVAETLNEAKKLFADFFRG\n+>CC31p270\n+MISIIVAALKNGGVITETSDFAYVKFNRMSIDKDTQARYWVMVYDHNESQYILTEVLVDL\n+ETMEADFVGCPELEGTFEEVLEAYVAK\n+>CC31p271\n+MTTIFDMMAKQVDDSIGQLNLRDLQSIIDNEAKEFAIYTVENRAIPNLIDGFKPVQRFVI\n+ARALDLSRGNKEKFHKLASVAGGVADLGYHHGEGSAQDAGALMANTWNNNYPLLDGQGNF\n+GSRLVQKAAASRYIFCRISDNFRKVYKDTEIAPEHKDKEHVPPAFYLPIIPTVLLNGVQG\n+IATGYATKILPHSFESVVECTKLALQGKLDKEPEVQIPQFRGEVVRLEDGSIECRGLYKF\n+TSASQMYISEIPAKFDRETYVEKVLEPMVDKNFISYVDDCSKTGFGFKVKFKKDYMLGEC\n+DEKYRHEKIMRDFKLVEKMSQFIVVIDENGKLNDKFQSSSELIKHFVEVRKTYIVKRIEH\n+KIKECDEAFKLALAKAMFIKEVIEGSIVIQGKTRKQLTSELESRPTYAPFADKLVSMNIY\n+HITSDEAKKLAQQAKDLKAELKYWQETTPETEYMKDLEAL\n+>CC31p272\n+MKLTVSIILALIIASAGFVGVCYVIYEIMLFLACVMMDLGNLIW\n+>CC31p273\n+MSPFKQIWALVFLLMAPLFIASGIFIWEGLTPPPRVIGSMCFGVAALAVERLFYYTGLTK\n+\n+>CC31p274\n+MRDYMTRGDILAAGGTHVVSVKNGETVGYVDPSVLAEPGFYFMVKGASAWRAVAARFYVG\n+RQRSKSGFMNVLSQIRQGRSQLGRTMRSNNVIYDVYFIPADKMKPLTTGFGKGQLALAFT\n+RKHNDSYQNLEEMNRMLNDNFKFILQAY\n+>CC31p275\n+MGRKVYSSSMNKMQKIFWIFFSIIAIMVFVGIGFSIWATVEIVNVIQTEGLKGAVEVLMN\n+GAQQSSGL\n+>CC31p276\n+MNIIKKILKAIWTLTLLMVLFGAFAFALAHDVVMAWINF\n+>CC31p277\n+MKALKTFTRAFSDLTPEDRVKIKSTAAYSLRQDPDQDKTEVINRCAIAQLAEKAVADWMD\n+GYVAGGQENHDDPYTYAWDVLAHPRFCGLRVEVKTHQSDSKWISVTTGYSGDYPGGSGIN\n+LGPFLTHRIADCIIILDVVESGPSVYQFTLKFAGDHEDLKSVVRKSNYQGWYLNL\n+>CC31p278\n+MINPFNVSDSAVVNLRGDHYAKSVYCRKLVKHPGDVHYAWLHCDEVVNEIPPADAEYLEE\n+DDRIYFGELHIRGIYGKDESRPVEIESPEDFYPGVQ\n+>CC31p279\n+MFNDVEKVAIHDEFMQGYTQTELAEIYSCSVDTIRRVVNEIKSRKAEVEPEVKVTSPVKV\n+FNPEDVVWAGSSKFLSITVGRDTYAADKDHPNFKEALQFCVDGDFESAINLINIEKAITS\n+YVDGNIRIENGQLFYQDIEIKSGLVDRIINDMQNGEDFEFYLPFLENLLENPSKTAVTRL\n+FDFLQANDIEITEDGHFLAWKVIRNDYTDCYTGRFDNSIGRVVKMPRTHVVEDENRTCEA\n+GLHVCARSYIKHFRGGNDRLVVVKVHPRDVVSIPVDYNNAKMRTCQYKVIREAEYNQETD\n+CYI\n'
b
diff -r 000000000000 -r f79bcd53b9a3 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Fri Oct 09 09:19:49 2015 -0400
b
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="bioperl" version="1.6">
+        <repository changeset_revision="5ef71da82044" name="package_bioperl_1_6" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>