Next changeset 1:792a280ebeea (2015-10-09) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bioperl commit 799339e22181d28cb2b145454d353d6025779636 |
added:
bp_genbank2gff3.xml macros.xml test-data/seq.gb test-data/seq.gb.0.gff test-data/seq.gb.1.gff tool_dependencies.xml |
b |
diff -r 000000000000 -r f79bcd53b9a3 bp_genbank2gff3.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bp_genbank2gff3.xml Fri Oct 09 09:19:49 2015 -0400 |
[ |
@@ -0,0 +1,122 @@ +<tool id="bp_genbank2gff3" name="Genbank to GFF3" version="1.0"> + <description>converter</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="stdio" /> + <command><![CDATA[ +bp_genbank2gff3.pl +$noinfer +#if str($sofile.sofile) != "__none__": + --sofile + #if str($sofile.sofile) == "url": + "${sofile.so_url}" + #else: + live + #end if +#end if +--outdir - +--ethresh $ethresh +$model +--typesource "${typesource}" + +$genbank +> $gff3]]></command> + <inputs> + <param label="Genbank file" name="genbank" type="data" format="gb"/> + <param name="noinfer" truevalue="" falsevalue="--noinfer" checked="true" type="boolean" label="Infer exon/mRNA subfeatures"/> + <conditional name="sofile" label="Sequence Ontology"> + <param name="sofile" label="Sequence Ontology File" type="select"> + <option value="__none__" selected="True">None specified</option> + <option value="live">Latest Sequence Ontology</option> + <option value="url">User Specified</option> + </param> + <when value="__none__" /> + <when value="live" /> + <when value="url"> + <param name="so_url" label="Sequence Ontology URL" type="text"/> + </when> + </conditional> + <param name="ethresh" label="Error threshold for unflattener" type="select"> + <option value="0">Strict</option> + <option value="1" selected="True">Medium</option> + <option value="2">Loose</option> + <option value="3">Ignore Errors</option> + </param> + <param name="model" label="Gene Model" type="select"> + <option value="--CDS" selected="True">Default GFF gene model</option> + <option value="--noCDS">Alternate gene-RNA-protein-exon model</option> + </param> + <param name="typesource" label="Sequence Ontology type for landmark feature" help="E.g. chromosome, region, contig" value="contig" type="text" /> + </inputs> + <outputs> + <data format="gff3" name="gff3" label="${genbank.name} as GFF3"/> + </outputs> + <tests> + <test> + <param name="genbank" value="seq.gb" /> + <param name="noinfer" value="True" /> + <output name="gff3" file="seq.gb.0.gff" ftype="gff3" lines_diff="6"/> + </test> + <test> + <param name="genbank" value="seq.gb" /> + <output name="gff3" file="seq.gb.1.gff" ftype="gff3" lines_diff="4"/> + </test> + </tests> + <help><![CDATA[ +**What it does**: + +This tool uses Bio::SeqFeature::Tools::Unflattener and +Bio::Tools::GFF to convert GenBank flatfiles to GFF3 with gene +containment hierarchies mapped for optimal display in gbrowse. + +The input files are assumed to be gzipped GenBank flatfiles for refseq +contigs. The files may contain multiple GenBank records. + +**Designed for RefSeq** + +This script is designed for RefSeq genomic sequence entries. It may +work for third party annotations but this has not been tested. +But see below, Uniprot/Swissprot works, EMBL and possibly EMBL/Ensembl +if you don't mind some gene model unflattener errors (dgg). + +**G-R-P-E Gene Model** + +Don Gilbert worked this over with needs to produce GFF3 suited to +loading to GMOD Chado databases. + +This writes GFF with an alternate, but useful Gene model, +instead of the consensus model for GFF3 + + [ gene > mRNA> (exon,CDS,UTR) ] + +This alternate is + + gene > mRNA > polypeptide > exon + +means the only feature with dna bases is the exon. The others +specify only location ranges on a genome. Exon of course is a child +of mRNA and protein/peptide. + +The protein/polypeptide feature is an important one, having all the +annotations of the GenBank CDS feature, protein ID, translation, GO +terms, Dbxrefs to other proteins. + +UTRs, introns, CDS-exons are all inferred from the primary exon bases +inside/outside appropriate higher feature ranges. Other special gene +model features remain the same. + +**Authors** + +Sheldon McKay (mckays@cshl.edu) + +Copyright (c) 2004 Cold Spring Harbor Laboratory. + +**Author of hacks for GFF2Chado loading** + +Don Gilbert (gilbertd@indiana.edu) + ]]></help> + <citations> + <citation type="doi">10.1101/gr.361602</citation> + </citations> +</tool> |
b |
diff -r 000000000000 -r f79bcd53b9a3 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Oct 09 09:19:49 2015 -0400 |
b |
@@ -0,0 +1,20 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="1.6">bioperl</requirement> + <yield/> + </requirements> + </xml> + <token name="@WRAPPER_VERSION@">1.6</token> + <xml name="stdio"> + <stdio> + <!-- Anything other than zero is an error --> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <!-- In case the return code has not been set propery check stderr too --> + <regex match="Error:" /> + <regex match="Exception:" /> + </stdio> + </xml> + +</macros> |
b |
diff -r 000000000000 -r f79bcd53b9a3 test-data/seq.gb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/seq.gb Fri Oct 09 09:19:49 2015 -0400 |
b |
b'@@ -0,0 +1,7506 @@\n+LOCUS NC_014662 165540 bp DNA linear PHG 12-NOV-2010\n+DEFINITION Enterobacteria phage CC31, complete genome.\n+ACCESSION NC_014662\n+VERSION NC_014662.1 GI:311992992\n+DBLINK BioProject: PRJNA60119\n+KEYWORDS RefSeq.\n+SOURCE Enterobacteria phage CC31\n+ ORGANISM Enterobacteria phage CC31\n+ Viruses; dsDNA viruses, no RNA stage; Caudovirales; Myoviridae;\n+ Tevenvirinae; T4likevirus.\n+REFERENCE 1 (bases 1 to 165540)\n+ AUTHORS Petrov,V.M., Ratnayaka,S., Nolan,J.M., Miller,E.S. and Karam,J.D.\n+ TITLE Genomes of the T4-related bacteriophages as windows on microbial\n+ genome evolution\n+ JOURNAL Virol. J. 7 (1), 292 (2010)\n+ PUBMED 21029436\n+ REMARK Publication Status: Online-Only\n+REFERENCE 2 (bases 1 to 165540)\n+ CONSRTM NCBI Genome Project\n+ TITLE Direct Submission\n+ JOURNAL Submitted (12-NOV-2010) National Center for Biotechnology\n+ Information, NIH, Bethesda, MD 20894, USA\n+REFERENCE 3 (bases 1 to 165540)\n+ AUTHORS Petrov,V.M., Ratnayaka,S. and Karam,J.D.\n+ TITLE Direct Submission\n+ JOURNAL Submitted (14-DEC-2009) Biochemistry, Tulane University Medical\n+ Center, 1430 Tulane Ave., New Orleans, LA 70112, USA\n+COMMENT PROVISIONAL REFSEQ: This record has not yet been subject to final\n+ NCBI review. The reference sequence is identical to GU323318.\n+ COMPLETENESS: full length.\n+FEATURES Location/Qualifiers\n+ source 1..165540\n+ /organism="Enterobacteria phage CC31"\n+ /mol_type="genomic DNA"\n+ /host="Escherichia coli"\n+ /db_xref="taxon:709484"\n+ gene complement(1..2214)\n+ /gene="rIIA"\n+ /locus_tag="CC31p001"\n+ /db_xref="GeneID:9926434"\n+ CDS complement(1..2214)\n+ /gene="rIIA"\n+ /locus_tag="CC31p001"\n+ /codon_start=1\n+ /transl_table=11\n+ /product="membrane-associated affects host membrane\n+ ATPase"\n+ /protein_id="YP_004009859.1"\n+ /db_xref="GI:311992993"\n+ /db_xref="GeneID:9926434"\n+ /translation="MKLIADNEEVLGSAGKKTKFTIQASPKAFMILSDKLYKNKIRAV\n+ VRELTTNWLDAHILNGKQDVPCEIKCPNKLDPRFIIRDFGPGMSDFQIRGNDEEPGLY\n+ NSYFASTKAESNDFIGALGLGSKSPFSYTKSFTIVSYHDGEARGYMAVMNNGEPDIRP\n+ LFVEPMKEGEQTGIEITVPVRLEDVEKFAHEIAYVMRPMPVKPIITGASINIDSFPQD\n+ VEWFHSPNGFGKDSRGLYAVYGKIVYPIDQFQGLECSWLLNRYGCVYVNFPLGELDIT\n+ PSREELSLDDVTIENIKKRVNSLEKATLEADIAHLQSIENKRELVRQLSQFDSNQRAI\n+ LNRQNIMFGDKTYGEWVETYNINELQKKIESSMVYTYLVNLDAERMRLTSSWSTRKRT\n+ SVSNLLNVQQNKVHIMIDDKPSRRAAMFRGMYLKDFHRYQRFIMIDPEDPKHLEIKDE\n+ IIKLFDQDEVVVLKSSEMEEYRKFEKEHYSNSSKGDGGPRPKSPNGQLHKLDAKGGWW\n+ TSEDLFMNKDDIAELEGYAIFRSRDEIRTFPEELYWSGIDIETIRTLAKELGVTEFYV\n+ IRPNSAKVAKLNDNLESLDRFIVDEFIKIIDDLDADEYLPSTFFNRRVVSNIINTPEL\n+ KWLLKFITGKDNGERVSRINEIGRNLKNTYITASPDGSSQIREDLALCVRIYNKLTDA\n+ ASAEVDAAFKKFEKEYPVIEHMLNEWRVANYADDISRIMRALESAPSLKGKDEDE"\n+ gene complement(2220..2426)\n+ /gene="rIIA.1"\n+ /locus_tag="CC31p002"\n+ /db_xref="GeneID:9926148"\n+ CDS complement(2220..2426)\n+ /gene="rIIA.1"\n+ /locus_tag="CC31p002"\n+ /codon_start=1\n+ /transl_table=11\n+ /product="hypothetical protein"\n+ /protein_id="YP_004009860.1"\n+ /db_xref="GI:311992994"\n+ /db_xref="GeneID:9926148"'..b'agttaggaat cgcacggttt tctactgtgt agattgcgaa ttctttagct tcattatcaa\n+ 162481 tgatactctg aaggtctctc aaattgagct gacctattga gtcatcaact tgtttagcca\n+ 162541 tcatgtcaaa aatagttgtc atattaccct accagattaa attaccgagg tccatcataa\n+ 162601 cacatgcaag aaaaagcatt atttcataaa taacataaca gactccaaca aatccagctg\n+ 162661 aagctataat taaagccaaa ataatactta cagtaagttt cattttgtta gacctgtgta\n+ 162721 gtagaacaaa cgttctacag ccaatgcggc tacaccaaag cacatgcttc caatgactct\n+ 162781 tggtggaggg gttaacccct cccaaataaa aataccagag gcaatgaaca gcggagccat\n+ 162841 aagcaagaac accaaagccc aaatctgttt gaatggactc atattaatat gcctgcagaa\n+ 162901 taaatttgaa gttatcattc agcatacgat tcatttcttc aaggttctgg taagaatcgt\n+ 162961 tgtgtttacg ggtgaatgcc agagccaatt ggcctttacc aaagccagta gtcagaggtt\n+ 163021 tcattttgtc agctggaatg aaatacacat catagatgac gttgtttgaa cgcatggtgc\n+ 163081 gaccgagctg agaacggcct tgacgaatct gagacagaac attcatgaaa ccggacttag\n+ 163141 aacgttgacg accaacgtag aatcgtgctg ctactgcacg ccaagctgat gcacctttta\n+ 163201 ccatgaagta gaagcctggt tcagccagaa cagatgggtc aacatatcct acagtttcgc\n+ 163261 catttttcac agaaacaaca tgagtaccgc cagctgccag aatatcacca cgagtcatat\n+ 163321 aatcacgcat atcattttcc tcaatcaatt aaaagtttat cctcaacggg cccgaaggcc\n+ 163381 ctgaattaaa gcccggagct ttgctgagca ccgttcatca gaacttctac agcacctttc\n+ 163441 aaaccttcag tttgaatgac gttaacaatt tcaacagtcg cccaaatgct aaaaccgatt\n+ 163501 ccaacaaaaa ccataatagc tattatggaa aagaaaatcc aaaaaatctt ttgcatttta\n+ 163561 ttcatgctag aactataaac tttgcgaccc ataatatttt cctcagaagt taatccatgc\n+ 163621 cattacaaca tcatgtgcta aggcgaatgc gaaagctcca aagagcacca tcagaagagt\n+ 163681 taaagtccag atggctttga gaatcttttt gatgatgttc atttgtttct ccgttagttg\n+ 163741 atttctagtc tatagtatac catctaacgg aggatgtaaa cggttgagtt aaagatttag\n+ 163801 ataccaacct tggtagttgc tcttgcgaac cactgacttg aggtcttcat ggtcaccagc\n+ 163861 gaactttagg gtaaactggt aaacactagg cccactttct accacatcta atatgataat\n+ 163921 acagtctgct atcctgtgtg tcaagaaggg tcctaggttt atccctgagc caccgggata\n+ 163981 gtcacccgag taaccagttg taactgaaat ccactttgaa tctgactggt gagttttgac\n+ 164041 ctcgacccgg agtccacaga atcgaggatg agcaagaaca tcccacgcgt atgtgtaagg\n+ 164101 gtcgtcatga ttttcttgac cgcctgcaac atagccgtcc atccaatcag ctactgcttt\n+ 164161 ttcagctaac tgagcaattg cacaacgatt gattacttca gttttatctt ggtccgggtc\n+ 164221 ttggcgcaga gaataagcgg cagtgctttt aattttgacc ctatcttctg gagttaaatc\n+ 164281 actgaacgcc cgggtaaaag tcttcagggc tttcaatctc aacaggcctg gattcgtctt\n+ 164341 ttccataaat tcctctgata tgtagttcac caaaataaat gcggtcatct tcttcaagat\n+ 164401 attctgcatc agcaggaggg atttcattca ccacttcatc acagtgtaac caagcataat\n+ 164461 gaacatctcc tggatgtttg accagttttc tgcaatatac tgattttgca taatggtctc\n+ 164521 cacgaagatt aacaacagct gaatcactta cgttgaatgg attaatcatg acagcttcct\n+ 164581 caaaaagaaa gggcccgaag gcccttagat tagatataac aatcagtttc ttggttgtat\n+ 164641 tccgcttcac ggattacttt gtactggcaa gtacgcattt tagcgttgtt gtaatctacc\n+ 164701 gggatagata ctacatctcg tggatgtact ttaacaacta ccagacggtc attaccaccg\n+ 164761 cggaagtgtt tgatgtaact gcgagcacaa acgtgcagac cagcttcgca ggtgcgattt\n+ 164821 tcatcttcaa ctacatgagt acgaggcatt ttaactactc gaccgattga gttatcgaag\n+ 164881 cgaccggtat agcagtcagt gtaatcgtta cgaataactt tccatgccag gaagtgtcca\n+ 164941 tcttcagtga tttcgatgtc gttagcctgc aggaagtcaa acagacgagt cacagcagtt\n+ 165001 ttacttgggt tttccagcag attttccagg aatggcagat agaattcaaa atcttcacca\n+ 165061 ttctgcatat cgttaatgat acggtctaca aggccagatt tgatttcaat gtcctgatag\n+ 165121 aacaactgac cattttcaat gcgaatgtta ccatcaacat aagaagtgat tgccttctca\n+ 165181 atgttaatca gattaatcgc agattcaaaa tcaccatcaa cacagaactg aagagcttct\n+ 165241 ttaaagtttg gatggtcttt atcagcagcg taagtatcac ggcctactgt gatagacagg\n+ 165301 aacttagaag aaccagccca tacaacatct tctggattga agactttaac tggcgaagtc\n+ 165361 actttaactt ccggttcaac ttccgcttta cggcttttaa tttcattaac aacacgacga\n+ 165421 atagtatcaa cagaacaaga ataaatttct gctaattcgg tttgagtata gccctgcatg\n+ 165481 aattcatcat gaatggctac tttttctaca tcattaaaca ttttaacaac agagactttc\n+//\n+\n' |
b |
diff -r 000000000000 -r f79bcd53b9a3 test-data/seq.gb.0.gff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/seq.gb.0.gff Fri Oct 09 09:19:49 2015 -0400 |
b |
b'@@ -0,0 +1,5208 @@\n+##gff-version 3\n+##sequence-region NC_014662 1 165540\n+# conversion-by bp_genbank2gff3.pl\n+# organism Enterobacteria phage CC31\n+# Note Enterobacteria phage CC31, complete genome.\n+# date 12-NOV-2010\n+NC_014662\tGenBank\tregion\t1\t165540\t.\t+\t1\tID=NC_014662;Dbxref=BioProject:PRJNA60119,taxon:709484;Name=NC_014662;Note=Enterobacteria phage CC31%2C complete genome.,PROVISIONAL REFSEQ: This record has not yet been subject to final NCBI review. The reference sequence is identical to GU323318. COMPLETENESS: full length. ;comment1=PROVISIONAL REFSEQ: This record has not yet been subject to final NCBI review. The reference sequence is identical to GU323318. COMPLETENESS: full length. ;date=12-NOV-2010;host=Escherichia coli;mol_type=genomic DNA;organism=Enterobacteria phage CC31\n+NC_014662\tGenBank\tgene\t1\t2214\t.\t-\t1\tID=CC31p001;Dbxref=GeneID:9926434;Name=rIIA;locus_tag=CC31p001\n+NC_014662\tGenBank\tmRNA\t1\t2214\t.\t-\t1\tID=CC31p001.t01;Parent=CC31p001\n+NC_014662\tGenBank\tCDS\t1\t2214\t.\t-\t1\tID=CC31p001.p01;Parent=CC31p001.t01;Dbxref=GI:311992993,GeneID:9926434;Name=rIIA;codon_start=1;locus_tag=CC31p001;product=membrane-associated affects host membrane ATPase;protein_id=YP_004009859.1;transl_table=11;translation=length.737\n+NC_014662\tGenBank\texon\t1\t2214\t.\t-\t1\tParent=CC31p001.t01\n+NC_014662\tGenBank\tgene\t2220\t2426\t.\t-\t1\tID=CC31p002;Dbxref=GeneID:9926148;Name=rIIA.1;locus_tag=CC31p002\n+NC_014662\tGenBank\tmRNA\t2220\t2426\t.\t-\t1\tID=CC31p002.t01;Parent=CC31p002\n+NC_014662\tGenBank\tCDS\t2220\t2426\t.\t-\t1\tID=CC31p002.p01;Parent=CC31p002.t01;Dbxref=GI:311992994,GeneID:9926148;Name=rIIA.1;codon_start=1;locus_tag=CC31p002;product=hypothetical protein;protein_id=YP_004009860.1;transl_table=11;translation=length.68\n+NC_014662\tGenBank\texon\t2220\t2426\t.\t-\t1\tParent=CC31p002.t01\n+NC_014662\tGenBank\tgene\t2420\t2704\t.\t-\t1\tID=CC31p003;Dbxref=GeneID:9926149;Name=CC31p003\n+NC_014662\tGenBank\tmRNA\t2420\t2704\t.\t-\t1\tID=CC31p003.t01;Parent=CC31p003\n+NC_014662\tGenBank\tCDS\t2420\t2704\t.\t-\t1\tID=CC31p003.p01;Parent=CC31p003.t01;Dbxref=GI:311992995,GeneID:9926149;Name=CC31p003;Note=predicted by GenMarkS;codon_start=1;product=hypothetical protein;protein_id=YP_004009861.1;transl_table=11;translation=length.94\n+NC_014662\tGenBank\texon\t2420\t2704\t.\t-\t1\tParent=CC31p003.t01\n+NC_014662\tGenBank\tgene\t2750\t2905\t.\t-\t1\tID=CC31p004;Dbxref=GeneID:9926150;Name=CC31p004\n+NC_014662\tGenBank\tmRNA\t2750\t2905\t.\t-\t1\tID=CC31p004.t01;Parent=CC31p004\n+NC_014662\tGenBank\tCDS\t2750\t2905\t.\t-\t1\tID=CC31p004.p01;Parent=CC31p004.t01;Dbxref=GI:311992996,GeneID:9926150;Name=CC31p004;Note=predicted by GenMarkS;codon_start=1;product=hypothetical protein;protein_id=YP_004009862.1;transl_table=11;translation=length.51\n+NC_014662\tGenBank\texon\t2750\t2905\t.\t-\t1\tParent=CC31p004.t01\n+NC_014662\tGenBank\tgene\t2945\t4789\t.\t-\t1\tID=CC31p005;Dbxref=GeneID:9926151;Name=60plus39;locus_tag=CC31p005\n+NC_014662\tGenBank\tmRNA\t2945\t4789\t.\t-\t1\tID=CC31p005.t01;Parent=CC31p005\n+NC_014662\tGenBank\tCDS\t2945\t4789\t.\t-\t1\tID=CC31p005.p01;Parent=CC31p005.t01;Dbxref=GI:311992997,GeneID:9926151;Name=60plus39;codon_start=1;locus_tag=CC31p005;product=DNA topoisomerase subunit;protein_id=YP_004009863.1;transl_table=11;translation=length.614\n+NC_014662\tGenBank\texon\t2945\t4789\t.\t-\t1\tParent=CC31p005.t01\n+NC_014662\tGenBank\tgene\t4835\t5302\t.\t-\t1\tID=CC31p006;Dbxref=GeneID:9926152;Name=CC31p006\n+NC_014662\tGenBank\tmRNA\t4835\t5302\t.\t-\t1\tID=CC31p006.t01;Parent=CC31p006\n+NC_014662\tGenBank\tCDS\t4835\t5302\t.\t-\t1\tID=CC31p006.p01;Parent=CC31p006.t01;Dbxref=GI:311992998,GeneID:9926152;Name=CC31p006;Note=predicted by GenMarkS;codon_start=1;product=hypothetical protein;protein_id=YP_004009864.1;transl_table=11;translation=length.155\n+NC_014662\tGenBank\texon\t4835\t5302\t.\t-\t1\tParent=CC31p006.t01\n+NC_014662\tGenBank\tgene\t5302\t6837\t.\t-\t1\tID=CC31p007;Dbxref=GeneID:9926153;Name=CC31p007\n+NC_014662\tGenBank\tmRNA\t5302\t6837\t.\t-\t1\tID=CC31p007.t01;Parent=CC31p007\n+NC_014662\tGenBank\tCDS\t5302\t6837\t.\t-\t1\tID=CC31p007.p01;Parent=CC31p007.t01;Dbxref=GI:311992999,GeneID:9926153;Name=CC31p007;Note'..b'ISSGKYYHYFRGSGYVAYDMDEGVKI\n+NKGGLNVGGNTSITGNTYVTGAVTSNGQFKTSANDGLKIWNGDYGMILRRSENNFYLIPT\n+AQGQAENGGISNLRPFYIDCATGNATLGHNVTINGQSTLNGNVTLGSGQINLLGGSGNIG\n+FAKAGTSPYSMRIFYAGNTERGNRLEIADDSSYLMYIERHPSIGIQLVTNGGHIKTNAGS\n+VYTEAIALNSGARFVADGNIYLPNATNGFSAGWVLGQINSRLNAAVQKSGDTMTGTLTIN\n+NGANTGVMVSGITSGSDKGLIRGNVDGGAHDQWENRSSGLQLDCPSSDDSAYNVWKATKW\n+GAYHIAAMDVYAPSGNGYVRLVIRNGGAHIWNNSSYTSPVQINAPEFYLTSDISLKKDIR\n+SIEDSRSNLHKVEIKRYAMKDGSNDNAIGVIAQEVQEVYPELVNENKDTGKLSVNYRGLS\n+SVLWKIVQEQDKELEDVKSRLARIEELLSK\n+>CC31p258.p01\n+MAIAGPNIGTSWFRETGQRPMSAARVAVRLPARPGGARQMVGLSKEVNYNIGANNSYNKD\n+TLINYLRSQGSTPVVVTITGNLVSYSSGVACLEFPANLPNAYVHLIINGGVTLYGRGGNG\n+GVKGNGAAGGHAINNQFGTRLRITNNGAIAGGGGGGGGNSANGGMGGGGRPFGYADKTHP\n+PAAATSRAATDGTLTSPGIGAEYKIGTAVQYTCGSGGNVGANGGASTGRLGTNYGGGSAG\n+RAVIGNAPTWNKVGTIYGSRV\n+>CC31p259.p01\n+MTQRTPLPGISDILFGVLDRLFKDNATGRVLASRIVALIVVFILSLTWYRLDAIMQVWKE\n+SRYETYTKVLQQDKEAKFEASALEQLQIAHVSSNADFSAIYSFRPRNLNYFVDLIAYEGR\n+LPSTVNEKNLGGFPVDKTSNEYSAHLRGAYFSSEDEFVFLPTKKKDGELKYMYSCPYFNL\n+DNVYAGTVSMYWYSKPLLNENRLAAICSQAARTLGRAK\n+>CC31p260.p01\n+MSKLEIVREIVTVASVLIKFGSEHILEKREHFIAFLNEIGIKNDLGRPLNQSNFRKMIEE\n+MTAEEKQQLVEEFNEGFESVYRYMMMYSKP\n+>CC31p261.p01\n+MNQTVEIQRYLEGMMNKLALGDMVDYSYQEAMEICHWMKRRVRVVGAEWYISAELIDGRY\n+AIRYDSGDEYVTLPGHVLQRWEVVN\n+>CC31p262.p01\n+MNKISHIEAERKAWDEHTSVVDAITPVYHLVVWFSLSQEEQDCSWKYFEDTTFQKFVNAI\n+NHPESLLTHCEIKASEETFCYFTVSSKRSVSDVMQGYQFLKGVADEFELKINYEKI\n+>CC31p263.p01\n+MSTSEIKMVPYVTYTSERLREFQDQFNGTGIFYDTLSEIENDVKSDINDNDFIIRMFLNG\n+TFEIVAISDKRIEDAIAHIDNIIDEMTEGYYE\n+>CC31p264.p01\n+MNNPVAKHDFNKGGAHKDMKRQEKESRRKQKHKGKGYEHI\n+>CC31p265.p01\n+MSDLSCLRHNIILIKTQIASLQRANEMMDENWGTYANDPGFRMAEHPFMKKLLGKDYICP\n+FETPYNGGVKPFLLDIYKAMNNEMIKELERRLEQLNENNTQKE\n+>CC31p266.p01\n+MNGDLIETQNIGERIPEICFIKADWWDGRLLQRVIVCAANRFKLKDGGELVIPGTRHYSK\n+DMALVLDQMRDKVVSEQVYGDDQGFLDQWGNYLTRKEALIIATHAGQINTRRQKGGPADT\n+LFSEDLY\n+>CC31p267.p01\n+MNMKNLNAQIDRVKKSMNRPAILNELQRCAERVTDEHYLPTEAWEVWFRGTHLGSIERKY\n+KGCYAVHSSLGRHCGDCATYMQALARFIDSCSVVIAKKELEEVEEWINEVVKEPELRVWG\n+IREPKTLWQKIKGFFK\n+>CC31p268.p01\n+MSKVIYIVKASENSISENAANVLIVVAKKDFITSSEVRDVLADKLSAASVNSNIGVLIKK\n+GLIEKSGDGLIVSAEGQEIINQAAVIYAEENAPELLEKRNTRKARPITDQMEADKNLMME\n+ILATKDNLFTIKKLDVYRSNFIAVLEKRTFGIRSFEVSNKGNFRISGYKMTEEQVKHFED\n+LGMVAKHSKNGNVYLDIPRTQENIENIIHAVDTL\n+>CC31p269.p01\n+MKTLINNLNALLANSGVDLDDTMHAARLHSSNTDSNSYLTIWYNTESENYVLVWVYVNNY\n+DMVAVLDAEVEDVAETLNEAKKLFADFFRG\n+>CC31p270.p01\n+MISIIVAALKNGGVITETSDFAYVKFNRMSIDKDTQARYWVMVYDHNESQYILTEVLVDL\n+ETMEADFVGCPELEGTFEEVLEAYVAK\n+>CC31p271.p01\n+MTTIFDMMAKQVDDSIGQLNLRDLQSIIDNEAKEFAIYTVENRAIPNLIDGFKPVQRFVI\n+ARALDLSRGNKEKFHKLASVAGGVADLGYHHGEGSAQDAGALMANTWNNNYPLLDGQGNF\n+GSRLVQKAAASRYIFCRISDNFRKVYKDTEIAPEHKDKEHVPPAFYLPIIPTVLLNGVQG\n+IATGYATKILPHSFESVVECTKLALQGKLDKEPEVQIPQFRGEVVRLEDGSIECRGLYKF\n+TSASQMYISEIPAKFDRETYVEKVLEPMVDKNFISYVDDCSKTGFGFKVKFKKDYMLGEC\n+DEKYRHEKIMRDFKLVEKMSQFIVVIDENGKLNDKFQSSSELIKHFVEVRKTYIVKRIEH\n+KIKECDEAFKLALAKAMFIKEVIEGSIVIQGKTRKQLTSELESRPTYAPFADKLVSMNIY\n+HITSDEAKKLAQQAKDLKAELKYWQETTPETEYMKDLEAL\n+>CC31p272.p01\n+MKLTVSIILALIIASAGFVGVCYVIYEIMLFLACVMMDLGNLIW\n+>CC31p273.p01\n+MSPFKQIWALVFLLMAPLFIASGIFIWEGLTPPPRVIGSMCFGVAALAVERLFYYTGLTK\n+\n+>CC31p274.p01\n+MRDYMTRGDILAAGGTHVVSVKNGETVGYVDPSVLAEPGFYFMVKGASAWRAVAARFYVG\n+RQRSKSGFMNVLSQIRQGRSQLGRTMRSNNVIYDVYFIPADKMKPLTTGFGKGQLALAFT\n+RKHNDSYQNLEEMNRMLNDNFKFILQAY\n+>CC31p275.p01\n+MGRKVYSSSMNKMQKIFWIFFSIIAIMVFVGIGFSIWATVEIVNVIQTEGLKGAVEVLMN\n+GAQQSSGL\n+>CC31p276.p01\n+MNIIKKILKAIWTLTLLMVLFGAFAFALAHDVVMAWINF\n+>CC31p277.p01\n+MKALKTFTRAFSDLTPEDRVKIKSTAAYSLRQDPDQDKTEVINRCAIAQLAEKAVADWMD\n+GYVAGGQENHDDPYTYAWDVLAHPRFCGLRVEVKTHQSDSKWISVTTGYSGDYPGGSGIN\n+LGPFLTHRIADCIIILDVVESGPSVYQFTLKFAGDHEDLKSVVRKSNYQGWYLNL\n+>CC31p278.p01\n+MINPFNVSDSAVVNLRGDHYAKSVYCRKLVKHPGDVHYAWLHCDEVVNEIPPADAEYLEE\n+DDRIYFGELHIRGIYGKDESRPVEIESPEDFYPGVQ\n+>CC31p279.p01\n+MFNDVEKVAIHDEFMQGYTQTELAEIYSCSVDTIRRVVNEIKSRKAEVEPEVKVTSPVKV\n+FNPEDVVWAGSSKFLSITVGRDTYAADKDHPNFKEALQFCVDGDFESAINLINIEKAITS\n+YVDGNIRIENGQLFYQDIEIKSGLVDRIINDMQNGEDFEFYLPFLENLLENPSKTAVTRL\n+FDFLQANDIEITEDGHFLAWKVIRNDYTDCYTGRFDNSIGRVVKMPRTHVVEDENRTCEA\n+GLHVCARSYIKHFRGGNDRLVVVKVHPRDVVSIPVDYNNAKMRTCQYKVIREAEYNQETD\n+CYI\n' |
b |
diff -r 000000000000 -r f79bcd53b9a3 test-data/seq.gb.1.gff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/seq.gb.1.gff Fri Oct 09 09:19:49 2015 -0400 |
b |
b'@@ -0,0 +1,4645 @@\n+# Input: test-data/seq.gb\n+##gff-version 3\n+##sequence-region NC_014662 1 165540\n+# conversion-by bp_genbank2gff3.pl\n+# organism Enterobacteria phage CC31\n+# Note Enterobacteria phage CC31, complete genome.\n+# date 12-NOV-2010\n+# working on contig:NC_014662, Enterobacteria phage CC31, Enterobacteria phage CC31, complete genome., 12-NOV-2010\n+NC_014662\tGenBank\tcontig\t1\t165540\t.\t+\t1\tID=NC_014662;Dbxref=BioProject:PRJNA60119,taxon:709484;Name=NC_014662;Note=Enterobacteria phage CC31%2C complete genome.,PROVISIONAL REFSEQ: This record has not yet been subject to final NCBI review. The reference sequence is identical to GU323318. COMPLETENESS: full length. ;comment1=PROVISIONAL REFSEQ: This record has not yet been subject to final NCBI review. The reference sequence is identical to GU323318. COMPLETENESS: full length. ;date=12-NOV-2010;host=Escherichia coli;mol_type=genomic DNA;organism=Enterobacteria phage CC31\n+NC_014662\tGenBank\tCDS\t1\t2214\t.\t-\t1\tID=CC31p001;Dbxref=GI:311992993,GeneID:9926434;Name=rIIA;codon_start=1;locus_tag=CC31p001;product=membrane-associated affects host membrane ATPase;protein_id=YP_004009859.1;transl_table=11;translation=length.737\n+NC_014662\tGenBank\tgene\t1\t2214\t.\t-\t1\tID=CC31p001.gene;Alias=CC31p001;Dbxref=GeneID:9926434;Name=rIIA;locus_tag=CC31p001\n+NC_014662\tGenBank\tCDS\t2220\t2426\t.\t-\t1\tID=CC31p002;Dbxref=GI:311992994,GeneID:9926148;Name=rIIA.1;codon_start=1;locus_tag=CC31p002;product=hypothetical protein;protein_id=YP_004009860.1;transl_table=11;translation=length.68\n+NC_014662\tGenBank\tgene\t2220\t2426\t.\t-\t1\tID=CC31p002.gene;Alias=CC31p002;Dbxref=GeneID:9926148;Name=rIIA.1;locus_tag=CC31p002\n+NC_014662\tGenBank\tCDS\t2420\t2704\t.\t-\t1\tID=CC31p003;Dbxref=GI:311992995,GeneID:9926149;Name=CC31p003;Note=predicted by GenMarkS;codon_start=1;product=hypothetical protein;protein_id=YP_004009861.1;transl_table=11;translation=length.94\n+NC_014662\tGenBank\tgene\t2420\t2704\t.\t-\t1\tID=CC31p003.gene;Alias=CC31p003;Dbxref=GeneID:9926149;Name=CC31p003\n+NC_014662\tGenBank\tCDS\t2750\t2905\t.\t-\t1\tID=CC31p004;Dbxref=GI:311992996,GeneID:9926150;Name=CC31p004;Note=predicted by GenMarkS;codon_start=1;product=hypothetical protein;protein_id=YP_004009862.1;transl_table=11;translation=length.51\n+NC_014662\tGenBank\tgene\t2750\t2905\t.\t-\t1\tID=CC31p004.gene;Alias=CC31p004;Dbxref=GeneID:9926150;Name=CC31p004\n+NC_014662\tGenBank\tCDS\t2945\t4789\t.\t-\t1\tID=CC31p005;Dbxref=GI:311992997,GeneID:9926151;Name=60plus39;codon_start=1;locus_tag=CC31p005;product=DNA topoisomerase subunit;protein_id=YP_004009863.1;transl_table=11;translation=length.614\n+NC_014662\tGenBank\tgene\t2945\t4789\t.\t-\t1\tID=CC31p005.gene;Alias=CC31p005;Dbxref=GeneID:9926151;Name=60plus39;locus_tag=CC31p005\n+NC_014662\tGenBank\tCDS\t4835\t5302\t.\t-\t1\tID=CC31p006;Dbxref=GI:311992998,GeneID:9926152;Name=CC31p006;Note=predicted by GenMarkS;codon_start=1;product=hypothetical protein;protein_id=YP_004009864.1;transl_table=11;translation=length.155\n+NC_014662\tGenBank\tgene\t4835\t5302\t.\t-\t1\tID=CC31p006.gene;Alias=CC31p006;Dbxref=GeneID:9926152;Name=CC31p006\n+NC_014662\tGenBank\tCDS\t5302\t6837\t.\t-\t1\tID=CC31p007;Dbxref=GI:311992999,GeneID:9926153;Name=CC31p007;Note=N-terminal part is similar to Hoc protein and C-terminal part is similar to lipolytic enzyme%2C G-D-S-L;codon_start=1;product=hypothetical protein;protein_id=YP_004009865.1;transl_table=11;translation=length.511\n+NC_014662\tGenBank\tgene\t5302\t6837\t.\t-\t1\tID=CC31p007.gene;Alias=CC31p007;Dbxref=GeneID:9926153;Name=CC31p007\n+NC_014662\tGenBank\tCDS\t6870\t7130\t.\t-\t1\tID=CC31p008;Dbxref=GI:311993000,GeneID:9926154;Name=39.1;codon_start=1;locus_tag=CC31p008;product=gp39.1 hypothetical protein;protein_id=YP_004009866.1;transl_table=11;translation=length.86\n+NC_014662\tGenBank\tgene\t6870\t7130\t.\t-\t1\tID=CC31p008.gene;Alias=CC31p008;Dbxref=GeneID:9926154;Name=39.1;locus_tag=CC31p008\n+NC_014662\tGenBank\tCDS\t7127\t7222\t.\t-\t1\tID=CC31p009;Dbxref=GI:311993001,GeneID:9926155;Name=CC31p009;Note=predicted by GenMarkS;codon_start=1;product=hypothetical protein;protein_'..b'ALGDNDTGFRNDGDGMFSVMANSRALVNYNASAPKFQIEHRKATRITHTDNT\n+NTTILPSNNNSLLEIDTSLDGNNAGGNGLTLLGYISSGKYYHYFRGSGYVAYDMDEGVKI\n+NKGGLNVGGNTSITGNTYVTGAVTSNGQFKTSANDGLKIWNGDYGMILRRSENNFYLIPT\n+AQGQAENGGISNLRPFYIDCATGNATLGHNVTINGQSTLNGNVTLGSGQINLLGGSGNIG\n+FAKAGTSPYSMRIFYAGNTERGNRLEIADDSSYLMYIERHPSIGIQLVTNGGHIKTNAGS\n+VYTEAIALNSGARFVADGNIYLPNATNGFSAGWVLGQINSRLNAAVQKSGDTMTGTLTIN\n+NGANTGVMVSGITSGSDKGLIRGNVDGGAHDQWENRSSGLQLDCPSSDDSAYNVWKATKW\n+GAYHIAAMDVYAPSGNGYVRLVIRNGGAHIWNNSSYTSPVQINAPEFYLTSDISLKKDIR\n+SIEDSRSNLHKVEIKRYAMKDGSNDNAIGVIAQEVQEVYPELVNENKDTGKLSVNYRGLS\n+SVLWKIVQEQDKELEDVKSRLARIEELLSK\n+>CC31p258\n+MAIAGPNIGTSWFRETGQRPMSAARVAVRLPARPGGARQMVGLSKEVNYNIGANNSYNKD\n+TLINYLRSQGSTPVVVTITGNLVSYSSGVACLEFPANLPNAYVHLIINGGVTLYGRGGNG\n+GVKGNGAAGGHAINNQFGTRLRITNNGAIAGGGGGGGGNSANGGMGGGGRPFGYADKTHP\n+PAAATSRAATDGTLTSPGIGAEYKIGTAVQYTCGSGGNVGANGGASTGRLGTNYGGGSAG\n+RAVIGNAPTWNKVGTIYGSRV\n+>CC31p259\n+MTQRTPLPGISDILFGVLDRLFKDNATGRVLASRIVALIVVFILSLTWYRLDAIMQVWKE\n+SRYETYTKVLQQDKEAKFEASALEQLQIAHVSSNADFSAIYSFRPRNLNYFVDLIAYEGR\n+LPSTVNEKNLGGFPVDKTSNEYSAHLRGAYFSSEDEFVFLPTKKKDGELKYMYSCPYFNL\n+DNVYAGTVSMYWYSKPLLNENRLAAICSQAARTLGRAK\n+>CC31p260\n+MSKLEIVREIVTVASVLIKFGSEHILEKREHFIAFLNEIGIKNDLGRPLNQSNFRKMIEE\n+MTAEEKQQLVEEFNEGFESVYRYMMMYSKP\n+>CC31p261\n+MNQTVEIQRYLEGMMNKLALGDMVDYSYQEAMEICHWMKRRVRVVGAEWYISAELIDGRY\n+AIRYDSGDEYVTLPGHVLQRWEVVN\n+>CC31p262\n+MNKISHIEAERKAWDEHTSVVDAITPVYHLVVWFSLSQEEQDCSWKYFEDTTFQKFVNAI\n+NHPESLLTHCEIKASEETFCYFTVSSKRSVSDVMQGYQFLKGVADEFELKINYEKI\n+>CC31p263\n+MSTSEIKMVPYVTYTSERLREFQDQFNGTGIFYDTLSEIENDVKSDINDNDFIIRMFLNG\n+TFEIVAISDKRIEDAIAHIDNIIDEMTEGYYE\n+>CC31p264\n+MNNPVAKHDFNKGGAHKDMKRQEKESRRKQKHKGKGYEHI\n+>CC31p265\n+MSDLSCLRHNIILIKTQIASLQRANEMMDENWGTYANDPGFRMAEHPFMKKLLGKDYICP\n+FETPYNGGVKPFLLDIYKAMNNEMIKELERRLEQLNENNTQKE\n+>CC31p266\n+MNGDLIETQNIGERIPEICFIKADWWDGRLLQRVIVCAANRFKLKDGGELVIPGTRHYSK\n+DMALVLDQMRDKVVSEQVYGDDQGFLDQWGNYLTRKEALIIATHAGQINTRRQKGGPADT\n+LFSEDLY\n+>CC31p267\n+MNMKNLNAQIDRVKKSMNRPAILNELQRCAERVTDEHYLPTEAWEVWFRGTHLGSIERKY\n+KGCYAVHSSLGRHCGDCATYMQALARFIDSCSVVIAKKELEEVEEWINEVVKEPELRVWG\n+IREPKTLWQKIKGFFK\n+>CC31p268\n+MSKVIYIVKASENSISENAANVLIVVAKKDFITSSEVRDVLADKLSAASVNSNIGVLIKK\n+GLIEKSGDGLIVSAEGQEIINQAAVIYAEENAPELLEKRNTRKARPITDQMEADKNLMME\n+ILATKDNLFTIKKLDVYRSNFIAVLEKRTFGIRSFEVSNKGNFRISGYKMTEEQVKHFED\n+LGMVAKHSKNGNVYLDIPRTQENIENIIHAVDTL\n+>CC31p269\n+MKTLINNLNALLANSGVDLDDTMHAARLHSSNTDSNSYLTIWYNTESENYVLVWVYVNNY\n+DMVAVLDAEVEDVAETLNEAKKLFADFFRG\n+>CC31p270\n+MISIIVAALKNGGVITETSDFAYVKFNRMSIDKDTQARYWVMVYDHNESQYILTEVLVDL\n+ETMEADFVGCPELEGTFEEVLEAYVAK\n+>CC31p271\n+MTTIFDMMAKQVDDSIGQLNLRDLQSIIDNEAKEFAIYTVENRAIPNLIDGFKPVQRFVI\n+ARALDLSRGNKEKFHKLASVAGGVADLGYHHGEGSAQDAGALMANTWNNNYPLLDGQGNF\n+GSRLVQKAAASRYIFCRISDNFRKVYKDTEIAPEHKDKEHVPPAFYLPIIPTVLLNGVQG\n+IATGYATKILPHSFESVVECTKLALQGKLDKEPEVQIPQFRGEVVRLEDGSIECRGLYKF\n+TSASQMYISEIPAKFDRETYVEKVLEPMVDKNFISYVDDCSKTGFGFKVKFKKDYMLGEC\n+DEKYRHEKIMRDFKLVEKMSQFIVVIDENGKLNDKFQSSSELIKHFVEVRKTYIVKRIEH\n+KIKECDEAFKLALAKAMFIKEVIEGSIVIQGKTRKQLTSELESRPTYAPFADKLVSMNIY\n+HITSDEAKKLAQQAKDLKAELKYWQETTPETEYMKDLEAL\n+>CC31p272\n+MKLTVSIILALIIASAGFVGVCYVIYEIMLFLACVMMDLGNLIW\n+>CC31p273\n+MSPFKQIWALVFLLMAPLFIASGIFIWEGLTPPPRVIGSMCFGVAALAVERLFYYTGLTK\n+\n+>CC31p274\n+MRDYMTRGDILAAGGTHVVSVKNGETVGYVDPSVLAEPGFYFMVKGASAWRAVAARFYVG\n+RQRSKSGFMNVLSQIRQGRSQLGRTMRSNNVIYDVYFIPADKMKPLTTGFGKGQLALAFT\n+RKHNDSYQNLEEMNRMLNDNFKFILQAY\n+>CC31p275\n+MGRKVYSSSMNKMQKIFWIFFSIIAIMVFVGIGFSIWATVEIVNVIQTEGLKGAVEVLMN\n+GAQQSSGL\n+>CC31p276\n+MNIIKKILKAIWTLTLLMVLFGAFAFALAHDVVMAWINF\n+>CC31p277\n+MKALKTFTRAFSDLTPEDRVKIKSTAAYSLRQDPDQDKTEVINRCAIAQLAEKAVADWMD\n+GYVAGGQENHDDPYTYAWDVLAHPRFCGLRVEVKTHQSDSKWISVTTGYSGDYPGGSGIN\n+LGPFLTHRIADCIIILDVVESGPSVYQFTLKFAGDHEDLKSVVRKSNYQGWYLNL\n+>CC31p278\n+MINPFNVSDSAVVNLRGDHYAKSVYCRKLVKHPGDVHYAWLHCDEVVNEIPPADAEYLEE\n+DDRIYFGELHIRGIYGKDESRPVEIESPEDFYPGVQ\n+>CC31p279\n+MFNDVEKVAIHDEFMQGYTQTELAEIYSCSVDTIRRVVNEIKSRKAEVEPEVKVTSPVKV\n+FNPEDVVWAGSSKFLSITVGRDTYAADKDHPNFKEALQFCVDGDFESAINLINIEKAITS\n+YVDGNIRIENGQLFYQDIEIKSGLVDRIINDMQNGEDFEFYLPFLENLLENPSKTAVTRL\n+FDFLQANDIEITEDGHFLAWKVIRNDYTDCYTGRFDNSIGRVVKMPRTHVVEDENRTCEA\n+GLHVCARSYIKHFRGGNDRLVVVKVHPRDVVSIPVDYNNAKMRTCQYKVIREAEYNQETD\n+CYI\n' |
b |
diff -r 000000000000 -r f79bcd53b9a3 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Fri Oct 09 09:19:49 2015 -0400 |
b |
@@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="bioperl" version="1.6"> + <repository changeset_revision="5ef71da82044" name="package_bioperl_1_6" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency> |