comparison transdecoder.xml @ 5:c6334cb383ff draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transdecoder commit 860cdb41f79283c76d898404e5f28e2d506bed4d"
author iuc
date Thu, 08 Oct 2020 12:33:46 +0000
parents 0db979fead3a
children d0d4cef4f967
comparison
equal deleted inserted replaced
4:0db979fead3a 5:c6334cb383ff
1 <tool id="transdecoder" name="TransDecoder" version="3.0.1"> 1 <tool id="transdecoder" name="TransDecoder" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
2 <description>Find coding regions within transcripts</description> 2 <description>finds coding regions within transcripts</description>
3 <macros>
4 <token name="@TOOL_VERSION@">5.5.0</token>
5 <token name="@VERSION_SUFFIX@">0</token>
6 </macros>
3 <requirements> 7 <requirements>
4 <requirement type="package" version="3.0.1">transdecoder</requirement> 8 <requirement type="package" version="@TOOL_VERSION@">transdecoder</requirement>
5 </requirements> 9 </requirements>
6 10 <version_command><![CDATA[TransDecoder.LongOrfs --version 2>&1 | grep 'TransDecoder.LongOrfs' | cut -f 2 -d ' ']]></version_command>
7 <command detect_errors="exit_code"><![CDATA[ 11 <command detect_errors="exit_code"><![CDATA[
8 TransDecoder.LongOrfs -t '${input}' 12 ## initialize
9 -m ${min_len} 13 ln -s '${t}' 'transcripts.fasta' &&
10 ${adv.stranded} 14
11 -G ${adv.gen_code} 15 ## run TransDecoder.LongOrfs
12 #if str($adv.partials) 16 TransDecoder.LongOrfs
13 -p ${adv.partials} 17 ## Shared options
14 #end if 18 -t 'transcripts.fasta'
15 && 19 -G '${G}'
16 TransDecoder.Predict --cpu \${GALAXY_SLOTS:-1} -t '${input}' 20 ## LongOrfs options
17 --retain_long_orfs ${adv.retain_long_orfs} 21 #if $lo.gene_trans_map
18 ${adv.single_best_orf} 22 --gene_trans_map '${lo.gene_trans_map}'
19 #if str( $training_sect.training.training_selector ) == "training_top": 23 #end if
20 -T ${training_sect.training.top_longest} 24 -m $lo.m
21 #else 25 ${lo.S}
22 --train '${training_sect.training.train}' 26 -O 'output' ## required, otherwise value of -t is used as output folder
23 #end if 27
24 && 28 ## run TransDecoder.Predict
25 mv `basename '${input}'`.transdecoder.pep '$transdecoder_pep' && 29 #if $po.predict_cond.predict_sel == 'yes'
26 mv `basename '${input}'`.transdecoder.cds '$transdecoder_cds' && 30 && TransDecoder.Predict
27 mv `basename '${input}'`.transdecoder.bed '$transdecoder_bed' && 31 ## Shared options
28 mv `basename '${input}'`.transdecoder.gff3 '$transdecoder_gff3' 32 -t 'transcripts.fasta'
33 -G '${G}'
34 ## Predict options
35 --retain_long_orfs_mode $po.predict_cond.mode_cond.mode_sel
36 #if $po.predict_cond.mode_cond.mode_sel == 'strict'
37 --retain_long_orfs_length $po.predict_cond.mode_cond.retain_long_orfs_length
38 #end if
39 #if $po.predict_cond.retain_pfam_hits
40 --retain_pfam_hits '$po.predict_cond.retain_pfam_hits'
41 #end if
42 #if $po.predict_cond.retain_blastp_hits
43 --retain_blastp_hits '$po.predict_cond.retain_blastp_hits'
44 #end if
45 $po.predict_cond.single_best_only
46 $po.predict_cond.no_refine_starts
47 -T $po.predict_cond.T
48 -O 'output'
49 #end if
50
51 ## postprocessing
52 #if 'log' in $oo.out
53 |& tee '$out_log'
54 #end if
29 ]]></command> 55 ]]></command>
30 <inputs> 56 <inputs>
31 <param name="input" argument="-t" type="data" format="fasta" label="Transcripts" /> 57 <param argument="-t" type="data" format="fasta" label="Select file with transcripts"/>
32 <param name="min_len" argument="-m" type="integer" value="100" label="Minimum protein length" /> 58 <param argument="-G" type="select" label="Select genetic code">
33 <section name="adv" title="Advanced Options" expanded="False"> 59 <option value="Acetabularia">Acetabularia</option>
34 <param name="stranded" argument="-S" type="boolean" truevalue="-S" falsevalue="" label="Strand-specific" help="Only analyzes top strand" /> 60 <option value="Candida">Candida</option>
35 <param name="gen_code" argument="-G" type="select" label="Genetic code"> 61 <option value="Ciliate">Ciliate</option>
36 <option value="universal" selected="True">universal</option> 62 <option value="Dasycladacean">Dasycladacean</option>
37 <option value="Euplotes">Euplotes</option> 63 <option value="Euplotid">Euplotid</option>
38 <option value="Tetrahymena">Tetrahymena</option> 64 <option value="Hexamita">Hexamita</option>
39 <option value="Candida">Candida</option> 65 <option value="Mesodinium">Mesodinium</option>
40 <option value="Acetabularia">Acetabularia</option> 66 <option value="Mitochondrial-Ascidian">Mitochondrial-Ascidian</option>
41 <option value="Mitochondrial-Canonical">Mitochondrial-Canonical</option> 67 <option value="Mitochondrial-Chlorophycean">Mitochondrial-Chlorophycean</option>
42 <option value="Mitochondrial-Vertebrates">Mitochondrial-Vertebrates</option> 68 <option value="Mitochondrial-Echinoderm">Mitochondrial-Echinoderm</option>
43 <option value="Mitochondrial-Arthropods">Mitochondrial-Arthropods</option> 69 <option value="Mitochondrial-Flatworm">Mitochondrial-Flatworm</option>
44 <option value="Mitochondrial-Echinoderms">Mitochondrial-Echinoderms</option> 70 <option value="Mitochondrial-Invertebrates">Mitochondrial-Invertebrates</option>
45 <option value="Mitochondrial-Molluscs">Mitochondrial-Molluscs</option> 71 <option value="Mitochondrial-Protozoan">Mitochondrial-Protozoan</option>
46 <option value="Mitochondrial-Ascidians">Mitochondrial-Ascidians</option> 72 <option value="Mitochondrial-Pterobranchia">Mitochondrial-Pterobranchia</option>
47 <option value="Mitochondrial-Nematodes">Mitochondrial-Nematodes</option> 73 <option value="Mitochondrial-Scenedesmus_obliquus">Mitochondrial-Scenedesmus_obliquus</option>
48 <option value="Mitochondrial-Platyhelminths">Mitochondrial-Platyhelminths</option> 74 <option value="Mitochondrial-Thraustochytrium">Mitochondrial-Thraustochytrium</option>
49 <option value="Mitochondrial-Yeasts">Mitochondrial-Yeasts</option> 75 <option value="Mitochondrial-Trematode">Mitochondrial-Trematode</option>
50 <option value="Mitochondrial-Euascomycetes">Mitochondrial-Euascomycetes</option> 76 <option value="Mitochondrial-Vertebrates">Mitochondrial-Vertebrates</option>
51 <option value="Mitochondrial-Protozoans">Mitochondrial-Protozoans</option> 77 <option value="Mitochondrial-Yeast">Mitochondrial-Yeast</option>
78 <option value="Pachysolen_tannophilus">Pachysolen_tannophilus</option>
79 <option value="Peritrich">Peritrich</option>
80 <option value="SR1_Gracilibacteria">SR1_Gracilibacteria</option>
81 <option value="Tetrahymena">Tetrahymena</option>
82 <option value="Universal" selected="true">Universal</option>
83 </param>
84 <section name="lo" title="LongOrfs options" expanded="true">
85 <param argument="--gene_trans_map" type="data" format="tabular" optional="true" label="Select gene-to-transcript identifier mapping file" help="gene_id&lt;tab&gt;trans_id&lt;return&gt;"/>
86 <param argument="-m" type="integer" value="100" min="1" label="Set minimum protein length"/>
87 <param argument="-S" type="boolean" truevalue="-S" falsevalue="" label="Activate strand-specificity?" help="Only analyse top strand."/>
88 </section>
89 <section name="po" title="Predict options" expanded="true">
90 <!--
91 TransDecoder.Predict can be skipped if only longest_orfs.pep (as a result of TransDecoder.LongOrfs) is required, e.g. for homology search via BlastP and Pfam.
92 -->
93 <conditional name="predict_cond">
94 <param name="predict_sel" type="select" label="Should likely coding regions be predicted?" help="(TransDecoder.Predict)">
95 <option value="yes" selected="true">Yes</option>
96 <option value="no">No</option>
97 </param>
98 <when value="yes">
99 <conditional name="mode_cond">
100 <param argument="mode_sel" type="select" label="Select mode to retain long ORFs" help="In dynamic mode: set range according to 1% FDR in a random sequence of same GC content.">
101 <option value="dynamic" selected="true">Dynamic</option>
102 <option value="strict">Strict</option>
103 </param>
104 <when value="dynamic"/>
105 <when value="strict">
106 <param argument="--retain_long_orfs_length" type="integer" value="1000000" min="0" label="Set long ORFs length" help="Retain all ORFs found that are equal or longer than these many nucleotides even if no other evidence marks it as coding."/>
107 </when>
108 </conditional>
109 <param argument="--retain_blastp_hits" type="data" format="tabular" optional="true" label="Select BlastP result file" help="Any ORF with a blast match will be retained in the final output. (outfmt 6 format)"/>
110 <param argument="--retain_pfam_hits" type="data" format="tabular" optional="true" label="Select Pfam result file" help="Domain table output file from running hmmscan to search Pfam. Any ORF with a pfam domain hit will be retained in the final output. (domtblout file)"/>
111 <param argument="--single_best_only" type="boolean" truevalue="--single_best_only" falsevalue="" label="Retain only the single best ORF per transcript?" help="Prioritized by homology than ORF length."/>
112 <param argument="--no_refine_starts" type="boolean" truevalue="--no_refine_starts" falsevalue="" label="Start refinement that identifies potential start codons for 5' partial ORFs using a PWM?"/>
113 <param argument="-T" type="integer" value="500" min="1" label="Set top longest ORFs to train Markov Model" help="The first (10*value) elements are selected for removing redundancies. Then number of longst ORFs of this value are selected from the non-redundant set."/>
114 </when>
115 <when value="no"/>
116 </conditional>
117 </section>
118 <section name="oo" title="Output options">
119 <param name="out" type="select" multiple="true" optional="false" label="Select output file(s)" help="Only shown in history if selected here and generated by the specific NOVOplasty run.">
120 <!-- LongOrfs -->
121 <option value="lo_cds">Longest ORFs (CDS)</option>
122 <option value="lo_gff3">Longest ORFs (GFF3)</option>
123 <option value="lo_pep" selected="true">Longest ORFs (PEP)</option>
124 <!-- Predict -->
125 <option value="bed" selected="true">Results (BED)</option>
126 <option value="cds" selected="true">Results (CDS)</option>
127 <option value="gff3" selected="true">Results (GFF3)</option>
128 <option value="pep" selected="true">Results (PEP)</option>
129 <!-- Others -->
130 <option value="log">Log</option>
52 </param> 131 </param>
53 <param name="partials" argument="-p" type="integer" value="" optional="true" label="Shorten potential 5' partials if they are this percentage of the original protein or longer" />
54 <param name="retain_long_orfs" argument="--retain_long_orfs" type="integer" value="900" label="Retain long ORFs" help="Retain all ORFs found that are equal or longer than these many nucleotides even if no other evidence marks it as coding (default: 900 bp => 300aa)" />
55 <param argument="--single_best_orf" type="boolean" truevalue="--single_best_orf" falsevalue="" label="Retain only the single best ORF per transcript" help="Best is defined as having (optionally Pfam and/or BLAST support) and longest ORF" />
56 </section>
57 <section name="training_sect" title="Training Options" expanded="False">
58 <conditional name="training">
59 <param name="training_selector" type="select" label="Select the training method">
60 <option value="training_top" selected="True">Train with the top longest ORFs</option>
61 <option value="training_set">Train with a set of known ORFs</option>
62 </param>
63 <when value="training_top">
64 <param name="top_longest" argument="-T" type="integer" value="500" label="Number of top longest ORFs" help="Number of top longest ORFs to train Markov Model (hexamer stats). Note, 10x this value are first selected for use with cd-hit to remove redundancies, and then this value of longest ORFs are selected from the non-redundant set" />
65 </when>
66 <when value="training_set">
67 <param name="train" argument="--train" type="data" format="fasta" label="Training set of transcripts" help="FASTA file with ORFs to train Markov Mod for protein identification" />
68 </when>
69 </conditional>
70 </section> 132 </section>
71 </inputs> 133 </inputs>
72 <outputs> 134 <outputs>
73 <data name="transdecoder_pep" format="fasta" label="${tool.name} on ${on_string}: pep" /> 135 <!-- LongOrfs -->
74 <data name="transdecoder_cds" format="fasta" label="${tool.name} on ${on_string}: cds" /> 136 <data name="out_lo_cds" format="fasta" from_work_dir="output/longest_orfs.cds" label="${tool.name} on ${on_string}: Longest ORFs (CDS/FASTA)">
75 <data name="transdecoder_bed" format="bed" label="${tool.name} on ${on_string}: bed" /> 137 <filter>'lo_cds' in oo['out']</filter>
76 <data name="transdecoder_gff3" format="gff3" label="${tool.name} on ${on_string}: gff3" /> 138 </data>
139 <data name="out_lo_gff3" format="gff3" from_work_dir="output/longest_orfs.gff3" label="${tool.name} on ${on_string}: Longest ORFs (GFF3)">
140 <filter>'lo_gff3' in oo['out']</filter>
141 </data>
142 <data name="out_lo_pep" format="fasta" from_work_dir="output/longest_orfs.pep" label="${tool.name} on ${on_string}: Longest ORFs (PEP/FASTA)">
143 <filter>'lo_pep' in oo['out']</filter>
144 </data>
145 <!-- Predict -->
146 <data name="out_bed" format="bed" from_work_dir="transcripts.fasta.transdecoder.bed" label="${tool.name} on ${on_string}: Results (BED)">
147 <filter>'bed' in oo['out'] and po['predict_cond']['predict_sel'] == 'yes'</filter>
148 </data>
149 <data name="out_cds" format="fasta" from_work_dir="transcripts.fasta.transdecoder.cds" label="${tool.name} on ${on_string}: Results (CDS/FASTA)">
150 <filter>'cds' in oo['out'] and po['predict_cond']['predict_sel'] == 'yes'</filter>
151 </data>
152 <data name="out_gff3" format="gff3" from_work_dir="transcripts.fasta.transdecoder.gff3" label="${tool.name} on ${on_string}: Results (GFF3)">
153 <filter>'gff3' in oo['out'] and po['predict_cond']['predict_sel'] =='yes'</filter>
154 </data>
155 <data name="out_pep" format="fasta" from_work_dir="transcripts.fasta.transdecoder.pep" label="${tool.name} on ${on_string}: Results (PEP/FASTA)">
156 <filter>'pep' in oo['out'] and po['predict_cond']['predict_sel'] == 'yes'</filter>
157 </data>
158 <!-- Others -->
159 <data name="out_log" format="txt" label="${tool.name} on ${on_string}: Log">
160 <filter>'log' in oo['out']</filter>
161 </data>
77 </outputs> 162 </outputs>
78 <tests> 163 <tests>
79 <test> 164 <!-- no test implemented for: gene_trans_map, retain_blastp_hits, retain_pfam_hits -->
80 <param name="input" value="test.fa"/> 165
81 <output name="transdecoder_gff3" file="raw/test.fa.transdecoder.gff3" compare="sim_size" /> 166 <!-- #1 default -->
82 <output name="transdecoder_bed" file="raw/test.fa.transdecoder.bed" compare="sim_size" /> 167 <test expect_num_outputs="5">
83 <output name="transdecoder_cds" file="raw/test.fa.transdecoder.cds" compare="sim_size" /> 168 <param name="t" value="transcripts.fasta"/>
84 <output name="transdecoder_pep" file="raw/test.fa.transdecoder.pep" compare="sim_size" /> 169 <!-- LongOrfs -->
170 <output name="out_lo_pep">
171 <assert_contents>
172 <has_n_lines n="772"/>
173 <has_text_matching expression=".+comp874.+"/>
174 </assert_contents>
175 </output>
176 <!-- Predict -->
177 <output name="out_bed">
178 <assert_contents>
179 <has_n_lines n="337"/>
180 <has_text_matching expression="comp98\_c0\_seq1.+"/>
181 </assert_contents>
182 </output>
183 <output name="out_cds">
184 <assert_contents>
185 <has_n_lines n="6959"/>
186 <has_text_matching expression=">comp98\_c0\_seq1.+"/>
187 </assert_contents>
188 </output>
189 <output name="out_gff3">
190 <assert_contents>
191 <has_n_lines n="2165"/>
192 <has_text_matching expression="comp98\_c0\_seq1.+"/>
193 </assert_contents>
194 </output>
195 <output name="out_pep">
196 <assert_contents>
197 <has_n_lines n="2644"/>
198 <has_text_matching expression="comp98\_c0\_seq1.+"/>
199 </assert_contents>
200 </output>
85 </test> 201 </test>
86 <test> 202 <!-- #2 -->
87 <param name="input" value="test.fa"/> 203 <test expect_num_outputs="8">
88 <param name="training_selector" value="training_top"/> 204 <param name="t" value="transcripts.fasta"/>
89 <param name="top_longest" value="10"/> 205 <param name="G" value="Acetabularia"/>
90 <output name="transdecoder_gff3" file="top/test.fa.transdecoder.gff3" compare="sim_size" /> 206 <section name="lo">
91 <output name="transdecoder_bed" file="top/test.fa.transdecoder.bed" compare="sim_size" /> 207 <param name="m" value="101"/>
92 <output name="transdecoder_cds" file="top/test.fa.transdecoder.cds" compare="sim_size" /> 208 <param name="S" value="true"/>
93 <output name="transdecoder_pep" file="top/test.fa.transdecoder.pep" compare="sim_size" /> 209 </section>
210 <section name="po">
211 <conditional name="predict_cond">
212 <param name="predict_sel" value="yes"/>
213 <conditional name="mode_cond">
214 <param name="mode_sel" value="dynamic"/>
215 </conditional>
216 <param name="single_best_only" value="true"/>
217 <param name="no_refine_starts" value="true"/>
218 <param name="T" value="501"/>
219 </conditional>
220 </section>
221 <section name="oo">
222 <param name="out" value="lo_pep,lo_gff3,lo_cds,bed,cds,gff3,pep,log"/>
223 </section>
224 <!-- LongOrfs -->
225 <output name="out_lo_cds">
226 <assert_contents>
227 <has_n_lines n="1454"/>
228 <has_text_matching expression=">comp874\_c0\_seq1.+"/>
229 </assert_contents>
230 </output>
231 <output name="out_lo_gff3">
232 <assert_contents>
233 <has_n_lines n="4565"/>
234 <has_text_matching expression="comp874\_c0\_seq1.+"/>
235 </assert_contents>
236 </output>
237 <output name="out_lo_pep">
238 <assert_contents>
239 <has_n_lines n="1454"/>
240 <has_text_matching expression=">comp874\_c0\_seq1.+"/>
241 </assert_contents>
242 </output>
243 <!-- Predict -->
244 <output name="out_bed">
245 <assert_contents>
246 <has_n_lines n="340"/>
247 <has_text_matching expression="comp98\_c0\_seq1.+"/>
248 </assert_contents>
249 </output>
250 <output name="out_cds">
251 <assert_contents>
252 <has_n_lines n="7512"/>
253 <has_text_matching expression=">comp98\_c0\_seq1.+"/>
254 </assert_contents>
255 </output>
256 <output name="out_gff3">
257 <assert_contents>
258 <has_n_lines n="2000"/>
259 <has_text_matching expression="comp98\_c0\_seq1.+"/>
260 </assert_contents>
261 </output>
262 <output name="out_pep">
263 <assert_contents>
264 <has_n_lines n="2833"/>
265 <has_text_matching expression=">comp98\_c0\_seq1.+"/>
266 </assert_contents>
267 </output>
268 <!-- Others -->
269 <output name="out_log">
270 <assert_contents>
271 <has_text_matching expression="transdecoder is finished.+"/>
272 </assert_contents>
273 </output>
94 </test> 274 </test>
95 <test> 275 <!-- #3 -->
96 <param name="input" value="test.fa"/> 276 <test expect_num_outputs="8">
97 <param name="gen_code" value="Mitochondrial-Arthropods"/> 277 <param name="t" value="transcripts.fasta"/>
98 <output name="transdecoder_gff3" file="gencode/test.fa.transdecoder.gff3" compare="sim_size" /> 278 <section name="po">
99 <output name="transdecoder_bed" file="gencode/test.fa.transdecoder.bed" compare="sim_size" /> 279 <conditional name="predict_cond">
100 <output name="transdecoder_cds" file="gencode/test.fa.transdecoder.cds" compare="sim_size" /> 280 <param name="predict_sel" value="yes"/>
101 <output name="transdecoder_pep" file="gencode/test.fa.transdecoder.pep" compare="sim_size" /> 281 <conditional name="mode_cond">
282 <param name="mode_sel" value="strict"/>
283 <param name="retain_long_orfs_length" value="1000001"/>
284 </conditional>
285 </conditional>
286 </section>
287 <section name="oo">
288 <param name="out" value="lo_pep,lo_gff3,lo_cds,bed,cds,gff3,pep,log"/>
289 </section>
290 <!-- LongOrfs -->
291 <output name="out_lo_cds">
292 <assert_contents>
293 <has_n_lines n="772"/>
294 <has_text_matching expression=">comp874\_c0\_seq1.+"/>
295 </assert_contents>
296 </output>
297 <output name="out_lo_gff3">
298 <assert_contents>
299 <has_n_lines n="2486"/>
300 <has_text_matching expression="comp874\_c0\_seq1.+"/>
301 </assert_contents>
302 </output>
303 <output name="out_lo_pep">
304 <assert_contents>
305 <has_n_lines n="772"/>
306 <has_text_matching expression=">comp874\_c0\_seq1.+"/>
307 </assert_contents>
308 </output>
309 <!-- Predict -->
310 <output name="out_bed">
311 <assert_contents>
312 <has_n_lines n="337"/>
313 <has_text_matching expression="comp98\_c0\_seq1.+"/>
314 </assert_contents>
315 </output>
316 <output name="out_cds">
317 <assert_contents>
318 <has_n_lines n="6959"/>
319 <has_text_matching expression=">comp98\_c0\_seq1.+"/>
320 </assert_contents>
321 </output>
322 <output name="out_gff3">
323 <assert_contents>
324 <has_n_lines n="2165"/>
325 <has_text_matching expression="comp98\_c0\_seq1.+"/>
326 </assert_contents>
327 </output>
328 <output name="out_pep">
329 <assert_contents>
330 <has_n_lines n="2644"/>
331 <has_text_matching expression=">comp98\_c0\_seq1.+"/>
332 </assert_contents>
333 </output>
334 <!-- Others -->
335 <output name="out_log">
336 <assert_contents>
337 <has_text_matching expression="transdecoder is finished.+"/>
338 </assert_contents>
339 </output>
102 </test> 340 </test>
103 <test> 341 <!-- #4 -->
104 <param name="input" value="test.fa"/> 342 <test expect_num_outputs="1">
105 <param name="stranded" value="true"/> 343 <param name="t" value="transcripts.fasta"/>
106 <output name="transdecoder_gff3" file="strand/test.fa.transdecoder.gff3" compare="sim_size" /> 344 <section name="po">
107 <output name="transdecoder_bed" file="strand/test.fa.transdecoder.bed" compare="sim_size" /> 345 <conditional name="predict_cond">
108 <output name="transdecoder_cds" file="strand/test.fa.transdecoder.cds" compare="sim_size" /> 346 <param name="predict_sel" value="no"/>
109 <output name="transdecoder_pep" file="strand/test.fa.transdecoder.pep" compare="sim_size" /> 347 </conditional>
348 </section>
349 <!-- LongOrfs -->
350 <output name="out_lo_pep">
351 <assert_contents>
352 <has_n_lines n="772"/>
353 <has_text_matching expression="c"/>
354 </assert_contents>
355 </output>
110 </test> 356 </test>
111 </tests> 357 </tests>
112 <help> 358 <help><![CDATA[
359 .. class:: infomark
360
113 **What it does** 361 **What it does**
114 362
115 TransDecoder identifies candidate coding regions within transcript sequences, such as those generated by de novo RNA-Seq transcript assembly using Trinity, or constructed based on RNA-Seq alignments to the genome using Tophat and Cufflinks. 363 TransDecoder identifies candidate coding regions within transcript sequences such as those generated by de novo RNA-Seq transcript assembly using Trinity or constructed based on RNA-Seq alignments to the genome using Tophat and Cufflinks.
116 364
117 TransDecoder identifies likely coding sequences based on the following criteria: 365 TransDecoder identifies likely coding sequences based on the following criteria:
118 366
119 - a minimum length open reading frame (ORF) is found in a transcript sequence 367 - a minimum length open reading frame (ORF) is found in a transcript sequence.
120
121 - a log-likelihood score similar to what is computed by the GeneID software is > 0. 368 - a log-likelihood score similar to what is computed by the GeneID software is > 0.
122
123 - the above coding score is greatest when the ORF is scored in the 1st reading frame as compared to scores in the other 5 reading frames. 369 - the above coding score is greatest when the ORF is scored in the 1st reading frame as compared to scores in the other 5 reading frames.
124
125 - if a candidate ORF is found fully encapsulated by the coordinates of another candidate ORF, the longer one is reported. However, a single transcript can report multiple ORFs (allowing for operons, chimeras, etc). 370 - if a candidate ORF is found fully encapsulated by the coordinates of another candidate ORF, the longer one is reported. However, a single transcript can report multiple ORFs (allowing for operons, chimeras, etc).
126 371 - a PSSM is built/trained/used to refine the start codon prediction.
127 - optional the putative peptide has a match to a Pfam domain above the noise cutoff score. 372 - optional the putative peptide has a match to a Pfam domain above the noise cutoff score.
128 373
129 The software is primarily maintained by Brian Haas at the Broad Institute and Alexie Papanicolaou at the Commonwealth Scientific and Industrial Research Organisation (CSIRO). It is integrated into other related software such as Trinity, PASA, EVidenceModeler, and Trinotate. 374 *Step 1*: Extract long open reading frames
130 </help> 375
376 By default, TransDecoder.LongOrfs will identify ORFs that are at least 100 amino acids long. You can lower this via the '-m' parameter, but know that the rate of false positive ORF predictions increases drastically with shorter minimum length criteria.
377
378 *Step 2*: (optional and not part of this wrapper)
379
380 The result "longest ORFs (PEP)" can be used to identify ORFs with homology to known proteins via BlastP or Pfam searches (`details <https://github.com/TransDecoder/TransDecoder/wiki#including-homology-searches-as-orf-retention-criteria>`_).
381
382 *Step 3*: Predict the likely coding regions
383
384 Optionally apply results of homology searches in this step and re-run the whole analysis.
385
386 **Input**
387
388 - FASTA file with transcripts
389 - (optional) gene-to-transcript identifier mapping file
390 - (optional) BLAST or Pfam database file (`details <https://github.com/TransDecoder/TransDecoder/wiki#including-homology-searches-as-orf-retention-criteria>`_)
391
392 **Output**
393
394 *LongOrfs*
395
396 - longest ORFs (PEP/FASTA): all ORFs meeting the minimum length criteria, regardless of coding potential
397 - longest ORFs (GFF3): positions of all ORFs as found in the target transcripts
398 - longest ORFs (CDS/FASTA): the nucleotide coding sequence for all detected ORFs
399
400 *Predict*
401
402 - Results (PEP/FASTA): peptide sequences for the final candidate ORFs; all shorter candidates within longer ORFs were removed
403 - Results (CDS/FASTA): nucleotide sequences for coding regions of the final candidate ORFs
404 - Results (GFF3): positions within the target transcripts of the final selected ORFs
405 - Results (BED): BED-formatted file describing ORF positions, best for viewing using GenomeView or IGV
406
407 *Other*
408
409 - Log file
410
411 .. class:: infomark
412
413 **References**
414
415 More information are available on `GitHub <https://github.com/TransDecoder/TransDecoder>`_.
416 ]]></help>
131 <citations> 417 <citations>
132 <citation type="doi">10.1038/nprot.2013.084</citation> 418 <citation type="doi">10.1038/nprot.2013.084</citation>
133 </citations> 419 </citations>
134 </tool> 420 </tool>