Mercurial > repos > iuc > transdecoder

<tool id="transdecoder" name="TransDecoder" version="1.1">
    <description>Find coding regions within transcripts</description>
    <requirements>
        <requirement type="package" version="2.1.0">transdecoder</requirement>
    </requirements>

    <stdio>
        <exit_code range="1:" level="fatal" description="Error occurred" />
    </stdio>

    <command><![CDATA[
        TransDecoder.LongOrfs -t "${input}"

        #if ($min_len):
            -m ${min_len}
        #end if

        ${adv.stranded}

        #if ($adv.gen_code):
            -G ${adv.gen_code}
        #end if

        &&

        TransDecoder.Predict --cpu \${GALAXY_SLOTS:-1} -t "${input}"

        #if ($adv.retain_long_orfs):
            --retain_long_orfs ${adv.retain_long_orfs}
        #end if

        #if str( $training_sect.training.training_selector ) == "training_top":
            #if ($training_sect.training.top_longest):
                -T ${training_sect.training.top_longest}
            #end if
        #else
            #if ($training_sect.training.train):
                --train ${training_sect.training.train}
            #end if
        #end if

        &&

        out_prefix=`basename "${input}"`

        mv `basename "${input}"`.transdecoder.pep transcript.transdecoder.pep &&
        mv `basename "${input}"`.transdecoder.cds transcript.transdecoder.cds &&
        mv `basename "${input}"`.transdecoder.bed transcript.transdecoder.bed &&
        mv `basename "${input}"`.transdecoder.gff3 transcript.transdecoder.gff3 &&
        mv `basename "${input}"`.transdecoder.mRNA transcript.transdecoder.mRNA
    ]]></command>

    <inputs>
        <param format="fasta" name="input" type="data" label="Transcripts"/>

        <param name="min_len" size="5" type="integer" optional='true' value="100" label="Minimum protein length (default: 100aa)"/>

        <section name="adv" title="Advanced Options" expanded="False">
            <param name="stranded" type="boolean" checked="false" truevalue="-S" falsevalue="" label="Strand-specific" help="Only analyzes top strand"/>
            <param name="gen_code" type="select" label="Genetic code">
                <option value="universal" selected="True">universal</option>
                <option value="Euplotes">Euplotes</option>
                <option value="Tetrahymena">Tetrahymena</option>
                <option value="Candida">Candida</option>
                <option value="Acetabularia">Acetabularia</option>
                <option value="Mitochondrial-Canonical">Mitochondrial-Canonical</option>
                <option value="Mitochondrial-Vertebrates">Mitochondrial-Vertebrates</option>
                <option value="Mitochondrial-Arthropods">Mitochondrial-Arthropods</option>
                <option value="Mitochondrial-Echinoderms">Mitochondrial-Echinoderms</option>
                <option value="Mitochondrial-Molluscs">Mitochondrial-Molluscs</option>
                <option value="Mitochondrial-Ascidians">Mitochondrial-Ascidians</option>
                <option value="Mitochondrial-Nematodes">Mitochondrial-Nematodes</option>
                <option value="Mitochondrial-Platyhelminths">Mitochondrial-Platyhelminths</option>
                <option value="Mitochondrial-Yeasts">Mitochondrial-Yeasts</option>
                <option value="Mitochondrial-Euascomycetes">Mitochondrial-Euascomycetes</option>
                <option value="Mitochondrial-Protozoans">Mitochondrial-Protozoans</option>
            </param>

            <param name="retain_long_orfs" type="integer" optional="true" label="Retain long orfs" help="Retain all ORFs found that are equal or longer than these many nucleotides even if no other evidence marks it as coding (default: 900 bp => 300aa)" />
        </section>

        <section name="training_sect" title="Training Options" expanded="False">
            <conditional name="training">
                <param name="training_selector" type="select" label="Select the training method">
                    <option value="training_top" selected="True">Train with the top longest ORFs</option>
                    <option value="training_set">Train with a set of known ORFs</option>
                </param>
                <when value="training_top">
                    <param name="top_longest" type="integer" optional="true" label="Number of top longest ORFs" help="Number of top longest ORFs to train Markov Model (hexamer stats) (default: 500 sequences)" />
                </when>
                <when value="training_set">
                    <param format="fasta" name="train" type="data" label="Training set of transcripts" optional="true" help="FASTA file with ORFs to train Markov Mod for protein identification" />
                </when>
            </conditional>
        </section>

    </inputs>


    <outputs>
        <data name='transdecoder_pep' format='fasta' label="${tool.name} on ${on_string}: pep" from_work_dir="transcript.transdecoder.pep"/>
        <data name='transdecoder_cds' format='fasta' label="${tool.name} on ${on_string}: cds" from_work_dir="transcript.transdecoder.cds"/>
        <data name='transdecoder_bed' format='bed' label="${tool.name} on ${on_string}: bed" from_work_dir="transcript.transdecoder.bed"/>
        <data name='transdecoder_gff3' format='gff3' label="${tool.name} on ${on_string}: gff3" from_work_dir="transcript.transdecoder.gff3"/>
        <data name='transdecoder_mRNA' format='fasta' label="${tool.name} on ${on_string}: mRNA" from_work_dir="transcript.transdecoder.mRNA"/>
    </outputs>


    <tests>
        <test>
            <param name="input" value="test.fa"/>
            <output name="transdecoder_gff3" file="raw/test.fa.transdecoder.gff3" compare="sim_size" />
            <output name="transdecoder_bed" file="raw/test.fa.transdecoder.bed" compare="sim_size" />
            <output name="transdecoder_cds" file="raw/test.fa.transdecoder.cds" compare="sim_size" />
            <output name="transdecoder_mRNA" file="raw/test.fa.transdecoder.mRNA" compare="sim_size" />
            <output name="transdecoder_pep" file="raw/test.fa.transdecoder.pep" compare="sim_size" />
        </test>
        <test>
            <param name="input" value="test.fa"/>
            <param name="training_selector" value="training_top"/>
            <param name="top_longest" value="10"/>
            <output name="transdecoder_gff3" file="top/test.fa.transdecoder.gff3" compare="sim_size" />
            <output name="transdecoder_bed" file="top/test.fa.transdecoder.bed" compare="sim_size" />
            <output name="transdecoder_cds" file="top/test.fa.transdecoder.cds" compare="sim_size" />
            <output name="transdecoder_mRNA" file="top/test.fa.transdecoder.mRNA" compare="sim_size" />
            <output name="transdecoder_pep" file="top/test.fa.transdecoder.pep" compare="sim_size" />
        </test>
        <test>
            <param name="input" value="test.fa"/>
            <param name="gen_code" value="Mitochondrial-Arthropods"/>
            <output name="transdecoder_gff3" file="gencode/test.fa.transdecoder.gff3" compare="sim_size" />
            <output name="transdecoder_bed" file="gencode/test.fa.transdecoder.bed" compare="sim_size" />
            <output name="transdecoder_cds" file="gencode/test.fa.transdecoder.cds" compare="sim_size" />
            <output name="transdecoder_mRNA" file="gencode/test.fa.transdecoder.mRNA" compare="sim_size" />
            <output name="transdecoder_pep" file="gencode/test.fa.transdecoder.pep" compare="sim_size" />
        </test>
        <test>
            <param name="input" value="test.fa"/>
            <param name="stranded" value="true"/>
            <output name="transdecoder_gff3" file="strand/test.fa.transdecoder.gff3" compare="sim_size" />
            <output name="transdecoder_bed" file="strand/test.fa.transdecoder.bed" compare="sim_size" />
            <output name="transdecoder_cds" file="strand/test.fa.transdecoder.cds" compare="sim_size" />
            <output name="transdecoder_mRNA" file="strand/test.fa.transdecoder.mRNA" compare="sim_size" />
            <output name="transdecoder_pep" file="strand/test.fa.transdecoder.pep" compare="sim_size" />
        </test>
    </tests>
    <help>

**What it does**

TransDecoder identifies candidate coding regions within transcript sequences, such as those generated by de novo RNA-Seq transcript assembly using Trinity, or constructed based on RNA-Seq alignments to the genome using Tophat and Cufflinks.

TransDecoder identifies likely coding sequences based on the following criteria:

 - a minimum length open reading frame (ORF) is found in a transcript sequence

 - a log-likelihood score similar to what is computed by the GeneID software is > 0.

 - the above coding score is greatest when the ORF is scored in the 1st reading frame as compared to scores in the other 5 reading frames.

 - if a candidate ORF is found fully encapsulated by the coordinates of another candidate ORF, the longer one is reported. However, a single transcript can report multiple ORFs (allowing for operons, chimeras, etc).

 - optional the putative peptide has a match to a Pfam domain above the noise cutoff score.

The software is primarily maintained by Brian Haas at the Broad Institute and Alexie Papanicolaou at the Commonwealth Scientific and Industrial Research Organisation (CSIRO). It is integrated into other related software such as Trinity, PASA, EVidenceModeler, and Trinotate.

    </help>
     <citations>
        <citation type="doi">10.1038/nprot.2013.084</citation>
    </citations>
</tool>
author	iuc
date	Wed, 06 Jul 2016 11:30:14 -0400
parents	1de64ee71145
children	0db979fead3a