diff transdecoder.xml @ 5:c6334cb383ff draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transdecoder commit 860cdb41f79283c76d898404e5f28e2d506bed4d"
author iuc
date Thu, 08 Oct 2020 12:33:46 +0000
parents 0db979fead3a
children d0d4cef4f967
line wrap: on
line diff
--- a/transdecoder.xml	Thu Jun 01 06:04:12 2017 -0400
+++ b/transdecoder.xml	Thu Oct 08 12:33:46 2020 +0000
@@ -1,134 +1,420 @@
-<tool id="transdecoder" name="TransDecoder" version="3.0.1">
-    <description>Find coding regions within transcripts</description>
+<tool id="transdecoder" name="TransDecoder" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
+    <description>finds coding regions within transcripts</description>
+    <macros>
+        <token name="@TOOL_VERSION@">5.5.0</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
     <requirements>
-        <requirement type="package" version="3.0.1">transdecoder</requirement>
+        <requirement type="package" version="@TOOL_VERSION@">transdecoder</requirement>
     </requirements>
-
+    <version_command><![CDATA[TransDecoder.LongOrfs --version 2>&1 | grep 'TransDecoder.LongOrfs' | cut -f 2 -d ' ']]></version_command>
     <command detect_errors="exit_code"><![CDATA[
-        TransDecoder.LongOrfs -t '${input}'
-        -m ${min_len}
-        ${adv.stranded}
-        -G ${adv.gen_code}
-        #if str($adv.partials)
-            -p ${adv.partials}
-        #end if
-        &&
-        TransDecoder.Predict --cpu \${GALAXY_SLOTS:-1} -t '${input}'
-        --retain_long_orfs ${adv.retain_long_orfs}
-        ${adv.single_best_orf}
-        #if str( $training_sect.training.training_selector ) == "training_top":
-            -T ${training_sect.training.top_longest}
-        #else
-            --train '${training_sect.training.train}'
-        #end if
-        &&
-        mv `basename '${input}'`.transdecoder.pep '$transdecoder_pep' &&
-        mv `basename '${input}'`.transdecoder.cds '$transdecoder_cds' &&
-        mv `basename '${input}'`.transdecoder.bed '$transdecoder_bed' &&
-        mv `basename '${input}'`.transdecoder.gff3 '$transdecoder_gff3'
+## initialize
+ln -s '${t}' 'transcripts.fasta' &&
+
+## run TransDecoder.LongOrfs
+TransDecoder.LongOrfs
+## Shared options
+-t 'transcripts.fasta'
+-G '${G}'
+## LongOrfs options
+#if $lo.gene_trans_map
+    --gene_trans_map '${lo.gene_trans_map}'
+#end if
+-m $lo.m
+${lo.S}
+-O 'output' ## required, otherwise value of -t is used as output folder
+
+## run TransDecoder.Predict
+#if $po.predict_cond.predict_sel == 'yes'
+    && TransDecoder.Predict
+    ## Shared options
+    -t 'transcripts.fasta'
+    -G '${G}'
+    ## Predict options
+    --retain_long_orfs_mode $po.predict_cond.mode_cond.mode_sel
+    #if $po.predict_cond.mode_cond.mode_sel == 'strict'
+        --retain_long_orfs_length $po.predict_cond.mode_cond.retain_long_orfs_length
+    #end if
+    #if $po.predict_cond.retain_pfam_hits
+        --retain_pfam_hits '$po.predict_cond.retain_pfam_hits'
+    #end if
+    #if $po.predict_cond.retain_blastp_hits
+        --retain_blastp_hits '$po.predict_cond.retain_blastp_hits'
+    #end if
+    $po.predict_cond.single_best_only
+    $po.predict_cond.no_refine_starts
+    -T $po.predict_cond.T
+    -O 'output'
+#end if
+
+## postprocessing
+#if 'log' in $oo.out
+    |& tee '$out_log'
+#end if
     ]]></command>
     <inputs>
-        <param name="input" argument="-t" type="data" format="fasta" label="Transcripts" />
-        <param name="min_len" argument="-m" type="integer" value="100" label="Minimum protein length" />
-        <section name="adv" title="Advanced Options" expanded="False">
-            <param name="stranded" argument="-S" type="boolean" truevalue="-S" falsevalue="" label="Strand-specific" help="Only analyzes top strand" />
-            <param name="gen_code" argument="-G" type="select" label="Genetic code">
-                <option value="universal" selected="True">universal</option>
-                <option value="Euplotes">Euplotes</option>
-                <option value="Tetrahymena">Tetrahymena</option>
-                <option value="Candida">Candida</option>
-                <option value="Acetabularia">Acetabularia</option>
-                <option value="Mitochondrial-Canonical">Mitochondrial-Canonical</option>
-                <option value="Mitochondrial-Vertebrates">Mitochondrial-Vertebrates</option>
-                <option value="Mitochondrial-Arthropods">Mitochondrial-Arthropods</option>
-                <option value="Mitochondrial-Echinoderms">Mitochondrial-Echinoderms</option>
-                <option value="Mitochondrial-Molluscs">Mitochondrial-Molluscs</option>
-                <option value="Mitochondrial-Ascidians">Mitochondrial-Ascidians</option>
-                <option value="Mitochondrial-Nematodes">Mitochondrial-Nematodes</option>
-                <option value="Mitochondrial-Platyhelminths">Mitochondrial-Platyhelminths</option>
-                <option value="Mitochondrial-Yeasts">Mitochondrial-Yeasts</option>
-                <option value="Mitochondrial-Euascomycetes">Mitochondrial-Euascomycetes</option>
-                <option value="Mitochondrial-Protozoans">Mitochondrial-Protozoans</option>
+        <param argument="-t" type="data" format="fasta" label="Select file with transcripts"/>
+        <param argument="-G" type="select" label="Select genetic code">
+            <option value="Acetabularia">Acetabularia</option>
+            <option value="Candida">Candida</option>
+            <option value="Ciliate">Ciliate</option>
+            <option value="Dasycladacean">Dasycladacean</option>
+            <option value="Euplotid">Euplotid</option>
+            <option value="Hexamita">Hexamita</option>
+            <option value="Mesodinium">Mesodinium</option>
+            <option value="Mitochondrial-Ascidian">Mitochondrial-Ascidian</option>
+            <option value="Mitochondrial-Chlorophycean">Mitochondrial-Chlorophycean</option>
+            <option value="Mitochondrial-Echinoderm">Mitochondrial-Echinoderm</option>
+            <option value="Mitochondrial-Flatworm">Mitochondrial-Flatworm</option>
+            <option value="Mitochondrial-Invertebrates">Mitochondrial-Invertebrates</option>
+            <option value="Mitochondrial-Protozoan">Mitochondrial-Protozoan</option>
+            <option value="Mitochondrial-Pterobranchia">Mitochondrial-Pterobranchia</option>
+            <option value="Mitochondrial-Scenedesmus_obliquus">Mitochondrial-Scenedesmus_obliquus</option>
+            <option value="Mitochondrial-Thraustochytrium">Mitochondrial-Thraustochytrium</option>
+            <option value="Mitochondrial-Trematode">Mitochondrial-Trematode</option>
+            <option value="Mitochondrial-Vertebrates">Mitochondrial-Vertebrates</option>
+            <option value="Mitochondrial-Yeast">Mitochondrial-Yeast</option>
+            <option value="Pachysolen_tannophilus">Pachysolen_tannophilus</option>
+            <option value="Peritrich">Peritrich</option>
+            <option value="SR1_Gracilibacteria">SR1_Gracilibacteria</option>
+            <option value="Tetrahymena">Tetrahymena</option>
+            <option value="Universal" selected="true">Universal</option>
+        </param>
+        <section name="lo" title="LongOrfs options" expanded="true">
+            <param argument="--gene_trans_map" type="data" format="tabular" optional="true" label="Select gene-to-transcript identifier mapping file" help="gene_id&lt;tab&gt;trans_id&lt;return&gt;"/>
+            <param argument="-m" type="integer" value="100" min="1" label="Set minimum protein length"/>
+            <param argument="-S" type="boolean" truevalue="-S" falsevalue="" label="Activate strand-specificity?" help="Only analyse top strand."/>
+        </section>
+        <section name="po" title="Predict options" expanded="true">
+            <!-- 
+                TransDecoder.Predict can be skipped if only longest_orfs.pep (as a result of TransDecoder.LongOrfs) is required, e.g. for homology search via BlastP and Pfam.
+            -->
+            <conditional name="predict_cond">
+                <param name="predict_sel" type="select" label="Should likely coding regions be predicted?" help="(TransDecoder.Predict)">
+                    <option value="yes" selected="true">Yes</option>
+                    <option value="no">No</option>
+                </param>
+                <when value="yes">
+                    <conditional name="mode_cond">
+                        <param argument="mode_sel" type="select" label="Select mode to retain long ORFs" help="In dynamic mode: set range according to 1% FDR in a random sequence of same GC content.">
+                            <option value="dynamic" selected="true">Dynamic</option>
+                            <option value="strict">Strict</option>
+                        </param>
+                        <when value="dynamic"/>
+                        <when value="strict">
+                            <param argument="--retain_long_orfs_length" type="integer" value="1000000" min="0" label="Set long ORFs length" help="Retain all ORFs found that are equal or longer than these many nucleotides even if no other evidence marks it as coding."/>
+                        </when>
+                    </conditional>
+                    <param argument="--retain_blastp_hits" type="data" format="tabular" optional="true" label="Select BlastP result file" help="Any ORF with a blast match will be retained in the final output. (outfmt 6 format)"/>
+                    <param argument="--retain_pfam_hits" type="data" format="tabular" optional="true" label="Select Pfam result file" help="Domain table output file from running hmmscan to search Pfam. Any ORF with a pfam domain hit will be retained in the final output. (domtblout file)"/>
+                    <param argument="--single_best_only" type="boolean" truevalue="--single_best_only" falsevalue="" label="Retain only the single best ORF per transcript?" help="Prioritized by homology than ORF length."/>
+                    <param argument="--no_refine_starts" type="boolean" truevalue="--no_refine_starts" falsevalue="" label="Start refinement that identifies potential start codons for 5' partial ORFs using a PWM?"/>
+                    <param argument="-T" type="integer" value="500" min="1" label="Set top longest ORFs to train Markov Model" help="The first (10*value) elements are selected for removing redundancies. Then number of longst ORFs of this value are selected from the non-redundant set."/>
+                </when>
+                <when value="no"/>
+            </conditional>
+        </section>
+        <section name="oo" title="Output options">
+            <param name="out" type="select" multiple="true" optional="false" label="Select output file(s)" help="Only shown in history if selected here and generated by the specific NOVOplasty run.">
+                <!-- LongOrfs -->
+                <option value="lo_cds">Longest ORFs (CDS)</option>
+                <option value="lo_gff3">Longest ORFs (GFF3)</option>
+                <option value="lo_pep" selected="true">Longest ORFs (PEP)</option>
+                <!-- Predict -->
+                <option value="bed" selected="true">Results (BED)</option>
+                <option value="cds" selected="true">Results (CDS)</option>
+                <option value="gff3" selected="true">Results (GFF3)</option>
+                <option value="pep" selected="true">Results (PEP)</option>
+                <!-- Others -->
+                <option value="log">Log</option>
             </param>
-            <param name="partials" argument="-p" type="integer" value="" optional="true" label="Shorten potential 5' partials if they are this percentage of the original protein or longer" />
-            <param name="retain_long_orfs" argument="--retain_long_orfs" type="integer" value="900" label="Retain long ORFs" help="Retain all ORFs found that are equal or longer than these many nucleotides even if no other evidence marks it as coding (default: 900 bp => 300aa)" />
-            <param argument="--single_best_orf" type="boolean" truevalue="--single_best_orf" falsevalue="" label="Retain only the single best ORF per transcript" help="Best is defined as having (optionally Pfam and/or BLAST support) and longest ORF" />
-        </section>
-        <section name="training_sect" title="Training Options" expanded="False">
-            <conditional name="training">
-                <param name="training_selector" type="select" label="Select the training method">
-                    <option value="training_top" selected="True">Train with the top longest ORFs</option>
-                    <option value="training_set">Train with a set of known ORFs</option>
-                </param>
-                <when value="training_top">
-                    <param name="top_longest" argument="-T" type="integer" value="500" label="Number of top longest ORFs" help="Number of top longest ORFs to train Markov Model (hexamer stats). Note, 10x this value are first selected for use with cd-hit to remove redundancies, and then this value of longest ORFs are selected from the non-redundant set" />
-                </when>
-                <when value="training_set">
-                    <param name="train" argument="--train" type="data" format="fasta" label="Training set of transcripts" help="FASTA file with ORFs to train Markov Mod for protein identification" />
-                </when>
-            </conditional>
         </section>
     </inputs>
     <outputs>
-        <data name="transdecoder_pep" format="fasta" label="${tool.name} on ${on_string}: pep" />
-        <data name="transdecoder_cds" format="fasta" label="${tool.name} on ${on_string}: cds" />
-        <data name="transdecoder_bed" format="bed" label="${tool.name} on ${on_string}: bed" />
-        <data name="transdecoder_gff3" format="gff3" label="${tool.name} on ${on_string}: gff3" />
+        <!-- LongOrfs -->
+        <data name="out_lo_cds" format="fasta" from_work_dir="output/longest_orfs.cds" label="${tool.name} on ${on_string}: Longest ORFs (CDS/FASTA)">
+            <filter>'lo_cds' in oo['out']</filter>
+        </data>
+        <data name="out_lo_gff3" format="gff3" from_work_dir="output/longest_orfs.gff3" label="${tool.name} on ${on_string}: Longest ORFs (GFF3)">
+            <filter>'lo_gff3' in oo['out']</filter>
+        </data>
+        <data name="out_lo_pep" format="fasta" from_work_dir="output/longest_orfs.pep" label="${tool.name} on ${on_string}: Longest ORFs (PEP/FASTA)">
+            <filter>'lo_pep' in oo['out']</filter>
+        </data>
+        <!-- Predict -->
+        <data name="out_bed" format="bed" from_work_dir="transcripts.fasta.transdecoder.bed" label="${tool.name} on ${on_string}: Results (BED)">
+            <filter>'bed' in oo['out'] and po['predict_cond']['predict_sel'] == 'yes'</filter>
+        </data>
+        <data name="out_cds" format="fasta" from_work_dir="transcripts.fasta.transdecoder.cds" label="${tool.name} on ${on_string}: Results (CDS/FASTA)">
+            <filter>'cds' in oo['out'] and po['predict_cond']['predict_sel'] == 'yes'</filter>
+        </data>
+        <data name="out_gff3" format="gff3" from_work_dir="transcripts.fasta.transdecoder.gff3" label="${tool.name} on ${on_string}: Results (GFF3)">
+            <filter>'gff3' in oo['out'] and po['predict_cond']['predict_sel'] =='yes'</filter>
+        </data>
+        <data name="out_pep" format="fasta" from_work_dir="transcripts.fasta.transdecoder.pep" label="${tool.name} on ${on_string}: Results (PEP/FASTA)">
+            <filter>'pep' in oo['out'] and po['predict_cond']['predict_sel'] == 'yes'</filter>
+        </data>
+        <!-- Others -->
+        <data name="out_log" format="txt" label="${tool.name} on ${on_string}: Log">
+            <filter>'log' in oo['out']</filter>
+        </data>
     </outputs>
     <tests>
-        <test>
-            <param name="input" value="test.fa"/>
-            <output name="transdecoder_gff3" file="raw/test.fa.transdecoder.gff3" compare="sim_size" />
-            <output name="transdecoder_bed" file="raw/test.fa.transdecoder.bed" compare="sim_size" />
-            <output name="transdecoder_cds" file="raw/test.fa.transdecoder.cds" compare="sim_size" />
-            <output name="transdecoder_pep" file="raw/test.fa.transdecoder.pep" compare="sim_size" />
-        </test>
-        <test>
-            <param name="input" value="test.fa"/>
-            <param name="training_selector" value="training_top"/>
-            <param name="top_longest" value="10"/>
-            <output name="transdecoder_gff3" file="top/test.fa.transdecoder.gff3" compare="sim_size" />
-            <output name="transdecoder_bed" file="top/test.fa.transdecoder.bed" compare="sim_size" />
-            <output name="transdecoder_cds" file="top/test.fa.transdecoder.cds" compare="sim_size" />
-            <output name="transdecoder_pep" file="top/test.fa.transdecoder.pep" compare="sim_size" />
+        <!-- no test implemented for: gene_trans_map, retain_blastp_hits, retain_pfam_hits -->
+
+        <!-- #1 default -->
+        <test expect_num_outputs="5">
+            <param name="t" value="transcripts.fasta"/>
+            <!-- LongOrfs -->
+            <output name="out_lo_pep">
+                <assert_contents>
+                    <has_n_lines n="772"/>
+                    <has_text_matching expression=".+comp874.+"/>
+                </assert_contents>
+            </output>
+            <!-- Predict -->
+            <output name="out_bed">
+                <assert_contents>
+                    <has_n_lines n="337"/>
+                    <has_text_matching expression="comp98\_c0\_seq1.+"/>
+                </assert_contents>
+            </output>
+            <output name="out_cds">
+                <assert_contents>
+                    <has_n_lines n="6959"/>
+                    <has_text_matching expression=">comp98\_c0\_seq1.+"/>
+                </assert_contents>
+            </output>
+            <output name="out_gff3">
+                <assert_contents>
+                    <has_n_lines n="2165"/>
+                    <has_text_matching expression="comp98\_c0\_seq1.+"/>
+                </assert_contents>
+            </output>
+            <output name="out_pep">
+                <assert_contents>
+                    <has_n_lines n="2644"/>
+                    <has_text_matching expression="comp98\_c0\_seq1.+"/>
+                </assert_contents>
+            </output>
         </test>
-        <test>
-            <param name="input" value="test.fa"/>
-            <param name="gen_code" value="Mitochondrial-Arthropods"/>
-            <output name="transdecoder_gff3" file="gencode/test.fa.transdecoder.gff3" compare="sim_size" />
-            <output name="transdecoder_bed" file="gencode/test.fa.transdecoder.bed" compare="sim_size" />
-            <output name="transdecoder_cds" file="gencode/test.fa.transdecoder.cds" compare="sim_size" />
-            <output name="transdecoder_pep" file="gencode/test.fa.transdecoder.pep" compare="sim_size" />
+        <!-- #2 -->
+        <test expect_num_outputs="8">
+            <param name="t" value="transcripts.fasta"/>
+            <param name="G" value="Acetabularia"/>
+            <section name="lo">
+                <param name="m" value="101"/>
+                <param name="S" value="true"/>
+            </section>
+            <section name="po">
+                <conditional name="predict_cond">
+                    <param name="predict_sel" value="yes"/>
+                    <conditional name="mode_cond">
+                        <param name="mode_sel" value="dynamic"/>
+                    </conditional>
+                    <param name="single_best_only" value="true"/>
+                    <param name="no_refine_starts" value="true"/>
+                    <param name="T" value="501"/>
+                </conditional>
+            </section>
+            <section name="oo">
+                <param name="out" value="lo_pep,lo_gff3,lo_cds,bed,cds,gff3,pep,log"/>
+            </section>
+            <!-- LongOrfs -->
+            <output name="out_lo_cds">
+                <assert_contents>
+                    <has_n_lines n="1454"/>
+                    <has_text_matching expression=">comp874\_c0\_seq1.+"/>
+                </assert_contents>
+            </output>
+            <output name="out_lo_gff3">
+                <assert_contents>
+                    <has_n_lines n="4565"/>
+                    <has_text_matching expression="comp874\_c0\_seq1.+"/>
+                </assert_contents>
+            </output>
+            <output name="out_lo_pep">
+                <assert_contents>
+                    <has_n_lines n="1454"/>
+                    <has_text_matching expression=">comp874\_c0\_seq1.+"/>
+                </assert_contents>
+            </output>
+            <!-- Predict -->
+            <output name="out_bed">
+                <assert_contents>
+                    <has_n_lines n="340"/>
+                    <has_text_matching expression="comp98\_c0\_seq1.+"/>
+                </assert_contents>
+            </output>
+            <output name="out_cds">
+                <assert_contents>
+                    <has_n_lines n="7512"/>
+                    <has_text_matching expression=">comp98\_c0\_seq1.+"/>
+                </assert_contents>
+            </output>
+            <output name="out_gff3">
+                <assert_contents>
+                    <has_n_lines n="2000"/>
+                    <has_text_matching expression="comp98\_c0\_seq1.+"/>
+                </assert_contents>
+            </output>
+            <output name="out_pep">
+                <assert_contents>
+                    <has_n_lines n="2833"/>
+                    <has_text_matching expression=">comp98\_c0\_seq1.+"/>
+                </assert_contents>
+            </output>
+            <!-- Others -->
+            <output name="out_log">
+                <assert_contents>
+                    <has_text_matching expression="transdecoder is finished.+"/>
+                </assert_contents>
+            </output>
         </test>
-        <test>
-            <param name="input" value="test.fa"/>
-            <param name="stranded" value="true"/>
-            <output name="transdecoder_gff3" file="strand/test.fa.transdecoder.gff3" compare="sim_size" />
-            <output name="transdecoder_bed" file="strand/test.fa.transdecoder.bed" compare="sim_size" />
-            <output name="transdecoder_cds" file="strand/test.fa.transdecoder.cds" compare="sim_size" />
-            <output name="transdecoder_pep" file="strand/test.fa.transdecoder.pep" compare="sim_size" />
+        <!-- #3 -->
+        <test expect_num_outputs="8">
+            <param name="t" value="transcripts.fasta"/>
+            <section name="po">
+                <conditional name="predict_cond">
+                    <param name="predict_sel" value="yes"/>
+                    <conditional name="mode_cond">
+                        <param name="mode_sel" value="strict"/>
+                        <param name="retain_long_orfs_length" value="1000001"/>
+                    </conditional>
+                </conditional>
+            </section>
+            <section name="oo">
+                <param name="out" value="lo_pep,lo_gff3,lo_cds,bed,cds,gff3,pep,log"/>
+            </section>
+            <!-- LongOrfs -->
+            <output name="out_lo_cds">
+                <assert_contents>
+                    <has_n_lines n="772"/>
+                    <has_text_matching expression=">comp874\_c0\_seq1.+"/>
+                </assert_contents>
+            </output>
+            <output name="out_lo_gff3">
+                <assert_contents>
+                    <has_n_lines n="2486"/>
+                    <has_text_matching expression="comp874\_c0\_seq1.+"/>
+                </assert_contents>
+            </output>
+            <output name="out_lo_pep">
+                <assert_contents>
+                    <has_n_lines n="772"/>
+                    <has_text_matching expression=">comp874\_c0\_seq1.+"/>
+                </assert_contents>
+            </output>
+            <!-- Predict -->
+            <output name="out_bed">
+                <assert_contents>
+                    <has_n_lines n="337"/>
+                    <has_text_matching expression="comp98\_c0\_seq1.+"/>
+                </assert_contents>
+            </output>
+            <output name="out_cds">
+                <assert_contents>
+                    <has_n_lines n="6959"/>
+                    <has_text_matching expression=">comp98\_c0\_seq1.+"/>
+                </assert_contents>
+            </output>
+            <output name="out_gff3">
+                <assert_contents>
+                    <has_n_lines n="2165"/>
+                    <has_text_matching expression="comp98\_c0\_seq1.+"/>
+                </assert_contents>
+            </output>
+            <output name="out_pep">
+                <assert_contents>
+                    <has_n_lines n="2644"/>
+                    <has_text_matching expression=">comp98\_c0\_seq1.+"/>
+                </assert_contents>
+            </output>
+            <!-- Others -->
+            <output name="out_log">
+                <assert_contents>
+                    <has_text_matching expression="transdecoder is finished.+"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- #4 -->
+        <test expect_num_outputs="1">
+            <param name="t" value="transcripts.fasta"/>
+            <section name="po">
+                <conditional name="predict_cond">
+                    <param name="predict_sel" value="no"/>
+                </conditional>
+            </section>
+            <!-- LongOrfs -->
+            <output name="out_lo_pep">
+                <assert_contents>
+                    <has_n_lines n="772"/>
+                    <has_text_matching expression="c"/>
+                </assert_contents>
+            </output>
         </test>
     </tests>
-    <help>
+    <help><![CDATA[
+.. class:: infomark
+
 **What it does**
 
-TransDecoder identifies candidate coding regions within transcript sequences, such as those generated by de novo RNA-Seq transcript assembly using Trinity, or constructed based on RNA-Seq alignments to the genome using Tophat and Cufflinks.
+TransDecoder identifies candidate coding regions within transcript sequences such as those generated by de novo RNA-Seq transcript assembly using Trinity or constructed based on RNA-Seq alignments to the genome using Tophat and Cufflinks.
 
 TransDecoder identifies likely coding sequences based on the following criteria:
 
- - a minimum length open reading frame (ORF) is found in a transcript sequence
-
+ - a minimum length open reading frame (ORF) is found in a transcript sequence.
  - a log-likelihood score similar to what is computed by the GeneID software is > 0.
-
  - the above coding score is greatest when the ORF is scored in the 1st reading frame as compared to scores in the other 5 reading frames.
-
  - if a candidate ORF is found fully encapsulated by the coordinates of another candidate ORF, the longer one is reported. However, a single transcript can report multiple ORFs (allowing for operons, chimeras, etc).
-
+ - a PSSM is built/trained/used to refine the start codon prediction.
  - optional the putative peptide has a match to a Pfam domain above the noise cutoff score.
 
-The software is primarily maintained by Brian Haas at the Broad Institute and Alexie Papanicolaou at the Commonwealth Scientific and Industrial Research Organisation (CSIRO). It is integrated into other related software such as Trinity, PASA, EVidenceModeler, and Trinotate.
-    </help>
+*Step 1*: Extract long open reading frames
+
+By default, TransDecoder.LongOrfs will identify ORFs that are at least 100 amino acids long. You can lower this via the '-m' parameter, but know that the rate of false positive ORF predictions increases drastically with shorter minimum length criteria.
+
+*Step 2*: (optional and not part of this wrapper)
+
+The result "longest ORFs (PEP)" can be used to identify ORFs with homology to known proteins via BlastP or Pfam searches (`details <https://github.com/TransDecoder/TransDecoder/wiki#including-homology-searches-as-orf-retention-criteria>`_).
+
+*Step 3*: Predict the likely coding regions
+
+Optionally apply results of homology searches in this step and re-run the whole analysis.
+
+**Input**
+
+- FASTA file with transcripts
+- (optional) gene-to-transcript identifier mapping file
+- (optional) BLAST or Pfam database file (`details <https://github.com/TransDecoder/TransDecoder/wiki#including-homology-searches-as-orf-retention-criteria>`_)
+
+**Output**
+
+*LongOrfs*
+
+- longest ORFs (PEP/FASTA): all ORFs meeting the minimum length criteria, regardless of coding potential
+- longest ORFs (GFF3): positions of all ORFs as found in the target transcripts
+- longest ORFs (CDS/FASTA): the nucleotide coding sequence for all detected ORFs
+
+*Predict*
+
+- Results (PEP/FASTA): peptide sequences for the final candidate ORFs; all shorter candidates within longer ORFs were removed
+- Results (CDS/FASTA): nucleotide sequences for coding regions of the final candidate ORFs
+- Results (GFF3): positions within the target transcripts of the final selected ORFs
+- Results (BED): BED-formatted file describing ORF positions, best for viewing using GenomeView or IGV
+
+*Other*
+
+- Log file
+
+.. class:: infomark
+
+**References**
+
+More information are available on `GitHub <https://github.com/TransDecoder/TransDecoder>`_.
+    ]]></help>
     <citations>
         <citation type="doi">10.1038/nprot.2013.084</citation>
     </citations>
-</tool>
+</tool>
\ No newline at end of file