Mercurial > repos > iuc > bp_genbank2gff3
diff bp_genbank2gff3.xml @ 0:f79bcd53b9a3 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bioperl commit 799339e22181d28cb2b145454d353d6025779636
author | iuc |
---|---|
date | Fri, 09 Oct 2015 09:19:49 -0400 |
parents | |
children | 792a280ebeea |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bp_genbank2gff3.xml Fri Oct 09 09:19:49 2015 -0400 @@ -0,0 +1,122 @@ +<tool id="bp_genbank2gff3" name="Genbank to GFF3" version="1.0"> + <description>converter</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="stdio" /> + <command><![CDATA[ +bp_genbank2gff3.pl +$noinfer +#if str($sofile.sofile) != "__none__": + --sofile + #if str($sofile.sofile) == "url": + "${sofile.so_url}" + #else: + live + #end if +#end if +--outdir - +--ethresh $ethresh +$model +--typesource "${typesource}" + +$genbank +> $gff3]]></command> + <inputs> + <param label="Genbank file" name="genbank" type="data" format="gb"/> + <param name="noinfer" truevalue="" falsevalue="--noinfer" checked="true" type="boolean" label="Infer exon/mRNA subfeatures"/> + <conditional name="sofile" label="Sequence Ontology"> + <param name="sofile" label="Sequence Ontology File" type="select"> + <option value="__none__" selected="True">None specified</option> + <option value="live">Latest Sequence Ontology</option> + <option value="url">User Specified</option> + </param> + <when value="__none__" /> + <when value="live" /> + <when value="url"> + <param name="so_url" label="Sequence Ontology URL" type="text"/> + </when> + </conditional> + <param name="ethresh" label="Error threshold for unflattener" type="select"> + <option value="0">Strict</option> + <option value="1" selected="True">Medium</option> + <option value="2">Loose</option> + <option value="3">Ignore Errors</option> + </param> + <param name="model" label="Gene Model" type="select"> + <option value="--CDS" selected="True">Default GFF gene model</option> + <option value="--noCDS">Alternate gene-RNA-protein-exon model</option> + </param> + <param name="typesource" label="Sequence Ontology type for landmark feature" help="E.g. chromosome, region, contig" value="contig" type="text" /> + </inputs> + <outputs> + <data format="gff3" name="gff3" label="${genbank.name} as GFF3"/> + </outputs> + <tests> + <test> + <param name="genbank" value="seq.gb" /> + <param name="noinfer" value="True" /> + <output name="gff3" file="seq.gb.0.gff" ftype="gff3" lines_diff="6"/> + </test> + <test> + <param name="genbank" value="seq.gb" /> + <output name="gff3" file="seq.gb.1.gff" ftype="gff3" lines_diff="4"/> + </test> + </tests> + <help><![CDATA[ +**What it does**: + +This tool uses Bio::SeqFeature::Tools::Unflattener and +Bio::Tools::GFF to convert GenBank flatfiles to GFF3 with gene +containment hierarchies mapped for optimal display in gbrowse. + +The input files are assumed to be gzipped GenBank flatfiles for refseq +contigs. The files may contain multiple GenBank records. + +**Designed for RefSeq** + +This script is designed for RefSeq genomic sequence entries. It may +work for third party annotations but this has not been tested. +But see below, Uniprot/Swissprot works, EMBL and possibly EMBL/Ensembl +if you don't mind some gene model unflattener errors (dgg). + +**G-R-P-E Gene Model** + +Don Gilbert worked this over with needs to produce GFF3 suited to +loading to GMOD Chado databases. + +This writes GFF with an alternate, but useful Gene model, +instead of the consensus model for GFF3 + + [ gene > mRNA> (exon,CDS,UTR) ] + +This alternate is + + gene > mRNA > polypeptide > exon + +means the only feature with dna bases is the exon. The others +specify only location ranges on a genome. Exon of course is a child +of mRNA and protein/peptide. + +The protein/polypeptide feature is an important one, having all the +annotations of the GenBank CDS feature, protein ID, translation, GO +terms, Dbxrefs to other proteins. + +UTRs, introns, CDS-exons are all inferred from the primary exon bases +inside/outside appropriate higher feature ranges. Other special gene +model features remain the same. + +**Authors** + +Sheldon McKay (mckays@cshl.edu) + +Copyright (c) 2004 Cold Spring Harbor Laboratory. + +**Author of hacks for GFF2Chado loading** + +Don Gilbert (gilbertd@indiana.edu) + ]]></help> + <citations> + <citation type="doi">10.1101/gr.361602</citation> + </citations> +</tool>