view long-orfs_wrapper.xml @ 0:9c8ffce71f7c draft default tip

Uploaded
author crs4
date Mon, 09 Sep 2013 12:16:17 -0400
parents
children
line wrap: on
line source

<tool id="long_orfs_wrapper" name="Long-ORFs" version="0.2">
  <description></description>
  <requirements>
    <requirement type="package" version="3.02">glimmer</requirement>
  </requirements>
  <command interpreter="python">
    long-orfs_wrapper.py --loSequence $loSequence
    #if $loStartCodons
      --loStartCodons="$loStartCodons"
    #end if
    #if $loEntropy
      --loEntropy=$loEntropy
    #end if
    #if $loFixed
      --loFixed
    #end if
    #if str($loMinLen)
      --loMinLen=$loMinLen
    #end if
    #if $loLinear
      --loLinear
    #end if
    #if $loLengthOpt
      --loLengthOpt
    #end if
    #if $loNoHeader
      --loNoHeader
    #end if
    #if str($loMaxOverlap)
      --loMaxOverlap=$loMaxOverlap
    #end if
    #if str($loCutoff)
      --loCutoff=$loCutoff
    #end if
    #if $loWithoutStops
      --loWithoutStops
    #end if
    #if str($loTransTable)
      --loTransTable=$loTransTable
    #end if
    #if $loStopCodons
      --loStopCodons="$loStopCodons"
    #end if
    --loOutput $loOutput --logfile $logfile
  </command>

  <inputs>
    <param name="loSequence" type="data" format="fasta" label="DNA sequence to be analyzed" help="FASTA format" />

    <param name="loStartCodons" type="text" value="" optional="true" label="Specify allowable start codons as a comma-separated list (-A, --start_codons)" help="Sample format: 'atg,gtg'. The default start codons are atg, gtg and ttg." />

    <param name="loEntropy" type="data" format="glimmer_entropy_profiles" optional="true" label="Entropy profiles (-E, --entropy)" help="The entropy profiles are used only if the cutoff option is specified." />

    <param name="loFixed" type="boolean" checked="false" label="Fixed minimum gene length (-f, --fixed)" help="Do NOT automatically calculate the minimum gene length that maximizes the number or length of output regions, but instead use either the value specified by the 'minimum gene length' option or else the default, which is 90." />

    <param name="loMinLen" type="integer" value="" optional="true" label="Minimum gene length in number of nucleotides (-g, --min_len)" help="It does not include the bases in the stop codon." />

    <param name="loLinear" type="boolean" checked="false" label="Assuming a linear genome (-l, --linear)" help="No 'wrap-around' genes with part at the beginning of the sequence and the rest at the end of the sequence." />

    <param name="loLengthOpt" type="boolean" checked="false" label="Optimize minimum gene length (-L, --length_opt)" help="Find and use as the minimum gene length the value that maximizes the total length of non-overlapping genes, instead of the default behaviour, which is to maximize the total number of non-overlapping genes." />

    <param name="loNoHeader" type="boolean" checked="true" label="Do not include the program-settings header information in the output file (-n, --no_header)" help="The output file will contain only the coordinates of the selected ORFs." />

    <param name="loMaxOverlap" type="integer" value="" optional="true" label="Maximum overlap length (-o, --max_olap)" help="Overlaps of this many or fewer bases between genes are not regarded as overlaps." />

    <param name="loCutoff" type="float" value="" optional="true" label="Only genes with an entropy distance score less than this value will be considered (-t, --cutoff)" help=" This cutoff is made before any subsequent steps in the algorithm." />

    <param name="loWithoutStops" type="boolean" checked="false" label="Do NOT include the stop codon in the region described by the output coordinates (-w, --without_stops)" help="By default it is included." />

    <param name="loTransTable" type="integer" value="" optional="true" label="Use GenBank translation table number n to specify stop codons (-z, --trans_table)" help="" />

    <param name="loStopCodons" type="text" value="" optional="true" label="Specify allowable stop codons as a comma-separated list (-Z, --stop_codons)" help="Sample format: 'tag,tga'. The default stop codons are tag, tga and taa." />
  </inputs>

  <outputs>
    <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log" />
    <data name="loOutput" format="glimmer_coords" label="${tool.name} on ${on_string}: output" />
  </outputs>

  <tests>

  </tests>
  <help>
**What it does**

This program identifies long, non-overlapping open reading frames (ORFs) in a DNA sequence file. These ORFs are very likely to contain genes, and can be used as a set of training sequences for Build-ICM. More specifically, among all ORFs longer than a minimum length, those that do not overlap any others are output. The start codon used for each ORF is the first possible one. The program, by default, automatically determines the value that maximizes the number of ORFs that are output. With the -t option, the initial set of candidate ORFs also can be filtered using entropy distance, which generally produces a larger, more accurate training set, particularly for high-GC-content genomes.


**License and citation**

This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_.

.. _CRS4 Srl.: http://www.crs4.it/
.. _MIT license: http://opensource.org/licenses/MIT

If you use this tool in Galaxy, please cite |Cuccuru2013|_.

.. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted*
.. _Cuccuru2013: http://orione.crs4.it/

This tool uses `Glimmer`_, which is licensed separately. Please cite |Delcher2007|_.

.. _Glimmer: http://ccb.jhu.edu/software/glimmer/index.shtml
.. |Delcher2007| replace:: Delcher, A. L., Bratke, K. A., Powers, E. C., Salzberg, S. L. (2007) Identifying bacterial genes and endosymbiont DNA with Glimmer. *Bioinformatics* 23(6), 673-679
.. _Delcher2007: http://bioinformatics.oxfordjournals.org/content/23/6/673
  </help>
</tool>