Mercurial > repos > abims-sbr > concatphyl
comparison ConcatPhyl.xml @ 0:b186cae246bd draft default tip
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
author | abims-sbr |
---|---|
date | Fri, 01 Feb 2019 10:27:42 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:b186cae246bd |
---|---|
1 <tool name="ConcatPhyl" id="concatphyl" version="2.0.2"> | |
2 | |
3 <description> | |
4 Concatenation and phylogeny | |
5 </description> | |
6 | |
7 <macros> | |
8 <import>macros.xml</import> | |
9 </macros> | |
10 | |
11 <requirements> | |
12 <expand macro="python_required" /> | |
13 <requirement type="package" version="8.2.9">raxml</requirement> | |
14 </requirements> | |
15 | |
16 <command><![CDATA[ | |
17 #set $infiles_filter_assemblies = "" | |
18 #for $input_filter_assemblie in $input_filter_assemblies | |
19 ln -s '$input_filter_assemblie' '$input_filter_assemblie.element_identifier'; | |
20 #set $infiles_filter_assemblies = $infiles_filter_assemblies + $input_filter_assemblie.element_identifier + "," | |
21 #end for | |
22 #set $infiles_filter_assemblies = $infiles_filter_assemblies[:-1] | |
23 | |
24 #for $input_alignment in $input_alignments | |
25 ln -s '$input_alignment' '$input_alignment.element_identifier'; | |
26 echo '$input_alignment.element_identifier' >> list_files; | |
27 #end for | |
28 | |
29 python $__tool_directory__/scripts/S01_concatenate.py | |
30 | |
31 $infiles_filter_assemblies | |
32 | |
33 #if $format.format_run == "nucleic" : | |
34 nucleic | |
35 #elif $format.format_run == "proteic" : | |
36 proteic | |
37 #end if | |
38 | |
39 list_files | |
40 | |
41 > ${output}; | |
42 | |
43 raxmlHPC -n galaxy_run | |
44 #if $format.format_run == "nucleic" : | |
45 ##-q 05_partitions_gene_NUC | |
46 -s "03_Concatenation_nuc.phy" | |
47 -m $format.base_model | |
48 #elif $format.format_run == "proteic" : | |
49 ##-q 06_partitions_gene_AA | |
50 -s 02_Concatenation_aa.phy | |
51 -m $format.base_model$format.aa_search_matrix | |
52 #end if | |
53 | |
54 -p $random_seed | |
55 | |
56 #if $number_of_runs !="" and $number_of_runs_bootstop =="": | |
57 -N $number_of_runs | |
58 -x $rapid_bootstrap_random_seed | |
59 #elif ($number_of_runs !="" and $number_of_runs_bootstop !="") or ($number_of_runs =="" and $number_of_runs_bootstop !=""): | |
60 -N $number_of_runs_bootstop | |
61 -x $rapid_bootstrap_random_seed | |
62 #end if | |
63 | |
64 -f $search_algorithm | |
65 | |
66 >> ${output}; | |
67 ]]> | |
68 </command> | |
69 | |
70 <inputs> | |
71 | |
72 <param name="input_filter_assemblies" type="data" format="fasta" multiple="true" label="Files from Filter assemblies" /> | |
73 <param name="input_alignments" type="data" format="fasta" multiple="true" label="Aligned files without indels" help="nucleic or proteic format according to the analysis you want to do below"/> | |
74 | |
75 <conditional name="format"> | |
76 <param name="format_run" type="select" label="Which format do you want to use for this tool (concatenation and RAxML run) ? "> | |
77 <option value="nucleic">Nucleic format</option> | |
78 <option value="proteic">Proteic format</option> | |
79 </param> | |
80 | |
81 <when value="nucleic"> | |
82 <param name="base_model" type="select" label="Substitution Model"> | |
83 <option value="GTRCAT">GTRCAT</option> | |
84 <option value="GTRCATI">GTRCATI</option> | |
85 <option value="GTRGAMMA" selected="true">GTRGAMMA</option> | |
86 <option value="GTRGAMMAI">GTRGAMMAI</option> | |
87 </param> | |
88 </when> | |
89 | |
90 <when value="proteic"> | |
91 <param name="base_model" type="select" label="Substitution Model (-m)"> | |
92 <option value="PROTCAT" selected="true">PROTCAT</option> | |
93 <option value="PROTCATI">PROTCATI</option> | |
94 <option value="PROTGAMMA">PROTGAMMA</option> | |
95 <option value="PROTGAMMAI">PROTGAMMAI</option> | |
96 </param> | |
97 <param name="aa_search_matrix" type="select" label="Matrix"> | |
98 <option value="DAYHOFF" selected="true">DAYHOFF</option> | |
99 <option value="JTT">JTT</option> | |
100 <option value="WAG">WAG</option> | |
101 <option value="BLOSUM62">BLOSUM62</option> | |
102 </param> | |
103 </when> | |
104 </conditional> | |
105 | |
106 <param name="random_seed" type="integer" value="1234567890" size="12" label="Random seed used for the parsimony inferences" /> | |
107 | |
108 <!-- ## (-N/#) --> | |
109 <param name="number_of_runs" type="integer" size="8" value="100" | |
110 label="Number of runs" help="Specify the number of | |
111 alternative runs (-N|#) on distinct starting trees In combination | |
112 with the '-b' option will invoke a multiple boostrap analysis. | |
113 You can add the bootstopping criteria by choosing the autoMR, | |
114 autoMRE, autoMRE_IGN, or autoFC value in a menu below instead of | |
115 providing a number here. Bootstopping will only work in | |
116 combination with '-x' or '-b'." | |
117 optional="True" /> | |
118 <param name="number_of_runs_bootstop" type="select" label="Use bootstopping criteria for number of runs" optional="True"> | |
119 <option value="" selected="yes"></option> | |
120 <option value="autoMR">autoMR</option> | |
121 <option value="autoMRE">autoMRE</option> | |
122 <option value="autoMRE_IGN">autoMRE_IGN</option> | |
123 <option value="autoFC">autoFC</option> | |
124 </param> | |
125 | |
126 <!-- ## (-f) --> | |
127 <param name="search_algorithm" type="select" label="Algorithm to execute" optional="True"> | |
128 <option value="a" selected="true">Rapid bootstrap and best ML tree search (a)</option> | |
129 <option value="A">Compute marginal ancestral states (A)</option> | |
130 <option value="b">Draw bipartition information (b)</option> | |
131 <option value="c">Check if the alignment can be read (c)</option> | |
132 <option value="d">Hill-climbing ML Search (d) (default)</option> | |
133 <option value="e">Optimize GAMMA/GAMMAI model/branches (e)</option> | |
134 <option value="g">Compute per-site log likelihoods for -z trees (g)</option> | |
135 <option value="h">Compute log likelihood test for -t / -z trees (h)</option> | |
136 <option value="j">Generate bootstrapped alignment files (j)</option> | |
137 <option value="J">Compute SH-like support values for the -t tree (J)</option> | |
138 <option value="m">Compare bipartitions between -t and -z trees (m)</option> | |
139 <option value="n">Compute log likelihood score for -z trees (n)</option> | |
140 <option value="o">Use old slower search algorithm (o)</option> | |
141 <option value="p">Stepwise MP addition of new sequences (p)</option> | |
142 <option value="q">Fast quartet calculator (q)</option> | |
143 <option value="r">Compute pairwise RF distances in -z trees (r)</option> | |
144 <option value="s">Split a multi-gene alignment (s)</option> | |
145 <option value="S">Compute site-specific placement bias (S)</option> | |
146 <option value="t">Randomized tree searches on a fixed starting tree (t)</option> | |
147 <option value="T">Final optimization of a ML tree from a bootstrap (T)</option> | |
148 <option value="u">Morphological weight calibration using ML on a -t tree (u)</option> | |
149 <option value="v">Classify environmental sequences (v)</option> | |
150 <option value="w">Compute ELW-test on -z trees (w)</option> | |
151 <option value="x">Compute GAMMA model pair-wise ML distances on a tree (x)</option> | |
152 <option value="y">Classify environmental sequences into a reference tree (y)</option> | |
153 </param> | |
154 | |
155 <!-- ## (-q) --> | |
156 <param name="multiple_model" format="txt" type="data" label="Multiple model assignment to alignment partitions" optional="True" help="Specify the file name which contains the assignment of models to alignment partitions for multiple models of substitution. For the syntax of this file please consult the manual." /> | |
157 | |
158 <!-- ## (-x) --> | |
159 <param name="rapid_bootstrap_random_seed" type="integer" value='12345' size="7" label="Rapid bootstrapping random seed" optional="True" help="Specify a random seed and turn on rapid bootstrapping. CAUTION: unlike in version 7.0.4 RAxML will conduct rapid BS replicates under the model of rate heterogeneity you specified via '-m' and not by default under CAT." /> | |
160 | |
161 <param name="out" type="select" label="What format of file do you want for your output (concatenation of the sequences) ? "> | |
162 <option value="nothing">No output</option> | |
163 <option value="fasta" selected="true">Fasta format</option> | |
164 <option value="phylip">Phylip format</option> | |
165 <option value="nexus">Nexus format</option> | |
166 </param> | |
167 | |
168 <param name="raxml1" type="boolean" checked="True" label="Do you want the output of RAxML : best tree ? " /> | |
169 <param name="raxml3" type="boolean" label="Do you want the output of RAxML : bi-partition ? " /> | |
170 <param name="raxml4" type="boolean" label="Do you want the output of RAxML : bootstrap ? " help="Only if the option 'rapid bootsptrap' is chosen. When you don't want to choose your options, this output is accessible"/> | |
171 | |
172 </inputs> | |
173 | |
174 <outputs> | |
175 <data name="output" format="txt" label="Phylogeny"/> | |
176 | |
177 <data name="out_fasta_aa" format="fasta" label="Phylogeny_concatenation_fasta_aa" from_work_dir="02_Concatenation_aa.fas"> | |
178 <filter>format['format_run'] == "proteic" and out == "fasta"</filter> | |
179 </data> | |
180 | |
181 <data name="out_phylip_aa" format="phylip" label="Phylogeny_concatenation_phylip_aa" from_work_dir="02_Concatenation_aa.phy"> | |
182 <filter>format['format_run'] == "proteic" and out == "phylip"</filter> | |
183 </data> | |
184 | |
185 <data name="out_nexus_aa" format="nexus" label="Phylogeny_concatenation_nexus_aa" from_work_dir="02_Concatenation_aa.nex"> | |
186 <filter>format['format_run'] == "proteic" and out == "nexus"</filter> | |
187 </data> | |
188 | |
189 <data name="out_fasta_nuc" format="fasta" label="Phylogeny_concatenation_fasta_nuc" from_work_dir="03_Concatenation_nuc.fas"> | |
190 <filter>format['format_run'] == "nucleic" and out == "fasta"</filter> | |
191 </data> | |
192 | |
193 <data name="out_phylip_nuc" format="phylip" label="Phylogeny_concatenation_phylip_nuc" from_work_dir="03_Concatenation_nuc.phy"> | |
194 <filter>format['format_run'] == "nucleic" and out == "phylip"</filter> | |
195 </data> | |
196 | |
197 <data name="out_nexus_nuc" format="nexus" label="Phylogeny_concatenation_nexus_nuc" from_work_dir="03_Concatenation_nuc.nex"> | |
198 <filter>format['format_run'] == "nucleic" and out == "nexus"</filter> | |
199 </data> | |
200 | |
201 <data name="out_raxml1" format="nhx" label="Phylogeny_RAxML_BestTree" from_work_dir="RAxML_bestTree.galaxy_run"> | |
202 <filter>raxml1 == True</filter> | |
203 </data> | |
204 | |
205 <data name="out_raxml3" format="nhx" label="Phylogeny_RAxML_BiPartition" from_work_dir="RAxML_bipartitions.galaxy_run"> | |
206 <filter>raxml3 == True</filter> | |
207 </data> | |
208 | |
209 <data name="out_raxml4" format="txt" label="Phylogeny_RAxML_BootStrap" from_work_dir="RAxML_bootstrap.galaxy_run"> | |
210 <filter>raxml4 == True</filter> | |
211 </data> | |
212 </outputs> | |
213 | |
214 <tests> | |
215 <test> | |
216 <param name="input_filter_assemblies" ftype="fasta" value="input_filter_assemblies/AcAcaud_trinity.fasta,input_filter_assemblies/AmAmphi_trinity.fasta,input_filter_assemblies/ApApomp_trinity.fasta,input_filter_assemblies/PgPgras_trinity.fasta,input_filter_assemblies/PhPhess_trinity.fasta,input_filter_assemblies/ThThelep_trinity.fasta" /> | |
217 <param name="input_alignments" ftype="fasta" value="input_from_CDS_Search/orthogroup_17_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_147_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_183_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_334_sp3_sp3.fasta" /> | |
218 <conditional name="format"> | |
219 <param name="format_run" value="nucleic" /> | |
220 <param name="base_model" value="GTRGAMMA" /> | |
221 </conditional> | |
222 <param name="random_seed" value="1234567890" /> | |
223 <param name="number_of_runs" value="100" /> | |
224 <param name="number_of_runs_bootstop" value="" /> | |
225 <param name="search_algorithm" value="d" /> | |
226 <!-- <param name="multiple_model" value="" /> --> | |
227 <param name="rapid_bootstrap_random_seed" value="123456789" /> | |
228 <param name="out" value="nothing" /> | |
229 <param name="raxml1" value="True" /> | |
230 <param name="raxml3" value="True" /> | |
231 <param name="raxml4" value="True" /> | |
232 <output name="out_raxml4"> | |
233 <assert_contents> | |
234 <has_text text="((Pg,(Am,Th)),(Ph,Ap),Ac);"/> | |
235 <has_text text="((Th,(Pg,Am)),(Ph,Ap),Ac);"/> | |
236 <has_text text="((Ph,Ap),(Am,(Pg,Th)),Ac);"/> | |
237 </assert_contents> | |
238 </output> | |
239 </test> | |
240 | |
241 <test> | |
242 <param name="input_filter_assemblies" ftype="fasta" value="input_filter_assemblies/AcAcaud_trinity.fasta,input_filter_assemblies/AmAmphi_trinity.fasta,input_filter_assemblies/ApApomp_trinity.fasta,input_filter_assemblies/PgPgras_trinity.fasta,input_filter_assemblies/PhPhess_trinity.fasta,input_filter_assemblies/ThThelep_trinity.fasta" /> | |
243 <param name="input_alignments" ftype="fasta" value="input_from_CDS_Search/orthogroup_17_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_147_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_183_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_334_sp3_sp3.fasta" /> | |
244 <conditional name="format"> | |
245 <param name="format_run" value="nucleic" /> | |
246 <param name="base_model" value="GTRGAMMA" /> | |
247 </conditional> | |
248 <param name="random_seed" value="1234567890" /> | |
249 <param name="number_of_runs" value="100" /> | |
250 <param name="number_of_runs_bootstop" value="" /> | |
251 <param name="search_algorithm" value="a" /> | |
252 <param name="rapid_bootstrap_random_seed" value="1234567890" /> | |
253 <param name="out" value="nothing" /> | |
254 <param name="raxml1" value="True" /> | |
255 <param name="raxml3" value="True" /> | |
256 <param name="raxml4" value="True" /> | |
257 <output name="out_raxml1" value="RAxML_bestTree.nwk"/> | |
258 <output name="out_raxml3" value="RAxML_bipartitions.nwk"/> | |
259 </test> | |
260 | |
261 <test> | |
262 <param name="input_filter_assemblies" ftype="fasta" value="input_filter_assemblies/AcAcaud_trinity.fasta,input_filter_assemblies/AmAmphi_trinity.fasta,input_filter_assemblies/ApApomp_trinity.fasta,input_filter_assemblies/PgPgras_trinity.fasta,input_filter_assemblies/PhPhess_trinity.fasta,input_filter_assemblies/ThThelep_trinity.fasta" /> | |
263 <param name="input_alignments" ftype="fasta" value="input_from_CDS_Search/orthogroup_17_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_147_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_183_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_334_sp3_sp3.fasta" /> | |
264 <conditional name="format"> | |
265 <param name="format_run" value="nucleic" /> | |
266 <param name="base_model" value="GTRGAMMA" /> | |
267 </conditional> | |
268 <param name="random_seed" value="1234567890" /> | |
269 <param name="number_of_runs" value="100" /> | |
270 <param name="number_of_runs_bootstop" value="autoMR" /> | |
271 <param name="search_algorithm" value="a" /> | |
272 <param name="rapid_bootstrap_random_seed" value="1234567890" /> | |
273 <param name="out" value="nothing" /> | |
274 <param name="raxml1" value="True" /> | |
275 <param name="raxml3" value="True" /> | |
276 <param name="raxml4" value="True" /> | |
277 <output name="out_raxml1" value="RAxML_bestTree_test3.nwk"/> | |
278 <output name="out_raxml3" value="RAxML_bipartitions_test3.nwk"/> | |
279 </test> | |
280 </tests> | |
281 | |
282 <help> | |
283 | |
284 @HELP_AUTHORS@ | |
285 | |
286 <![CDATA[ | |
287 | |
288 **Description** | |
289 | |
290 This tool takes files containing fasta sequences (from the CDS_Search in the AdaptSearch suite) and run RAxML to build a phylogeny. | |
291 | |
292 .. class:: infomark | |
293 | |
294 full RAxML manual here_ | |
295 | |
296 .. _here: https://sco.h-its.org/exelixis/resource/download/NewManual.pdf | |
297 | |
298 -------- | |
299 | |
300 **Parameters** | |
301 | |
302 - The choice of the format sequences is possible : **proteic** or **nucleic** | |
303 | |
304 - Several RAxML parameters can be set : | |
305 | |
306 - Substitution model (-m) : Model of Binary (Morphological), Nucleotide, Multi-state, or Amino-Acid substitution | |
307 Default : GTRGAMMA (nucleic), PROTCAT (proteic). | |
308 | |
309 - Matrix : AA substitution model (when proteic inputs) | |
310 Default : DAYHOFF | |
311 | |
312 - random seed : Specifies a random number seed for the parsimony inferences. For all options/algorithms in RAxML that require some sort of randomization, this option must be specified. Make sure to pass different random number seeds to RAxML and not only 12345. | |
313 | |
314 - Number of runs (-N) : Specifies the number of alternative runs. | |
315 By default it's an integer of value 100. | |
316 | |
317 - Use bootstopping criteria for number of runs : | |
318 If selected, overxwrites the number of runs to use bootstopping criteria. | |
319 | |
320 - Algorithm to execute (-f) : allows to choose what kind of algorithme RAxML shall execute. | |
321 Default : Rapid bootsrap and best ML tree search (-f a). | |
322 | |
323 - Multiple model assignement t oalignment partitions (-q) : an optional parameter. Permits to specify the file name which contains the assignment of models to alignment partitions for multiple models of substitution. For the syntax of this file please consult the manual. | |
324 This option allows you to specify the regions of your alignment for which an individual model of nucleotide substitution should be estimated. This will typically be useful to infer trees for long multi-gene alignments. | |
325 | |
326 - Rapid bootstrapping random seed (-x) : Specify an integer number (random seed) and turn on rapid bootstrapping. | |
327 In addition to the best tree search. | |
328 By default, this option is choosen. | |
329 | |
330 -------- | |
331 | |
332 **Inputs** | |
333 | |
334 - Files from Filter Assemblies : a set of fasta files (one file per species), e.g. the outputs of the first tool of the AdaptSearch suite. | |
335 Used to retrieve all the species names. | |
336 | |
337 - Alignment files without indels : a set of fasta files with aligned sequences (with the same species than into the previous parameter), e.g the outputs of the CDS_Search tool of the AdaptSearch suite. | |
338 | |
339 -------- | |
340 | |
341 **Outputs** | |
342 | |
343 This tool, produces the following files : | |
344 | |
345 - Phylogeny : | |
346 the general output. It gives the information about the concatenation (statistics) and the RAxML run. | |
347 | |
348 - Phylogeny_concatenation_fasta_aa : | |
349 contains the sequences concatenated in fasta format when you choose the option proteic. | |
350 | |
351 - Phylogeny_concatenation_phylip_aa : | |
352 contains the sequences concatenated in phylip format when you choose the option proteic. | |
353 | |
354 - Phylogeny_concatenation_nexus_aa : | |
355 contains the sequences concatenated in nexus format when you choose the option proteic. | |
356 | |
357 - Phylogeny_concatenation_fasta_nuc : | |
358 contains the sequences concatenated in fasta format when you choose the option nucleic. | |
359 | |
360 - Phylogeny_concatenation_phylip_nuc : | |
361 contains the sequences concatenated in phylip format when you choose the option nucleic. | |
362 it's this output which is used for the RAxML run. | |
363 | |
364 - Phylogeny_concatenation_nexus_nuc : | |
365 contains the sequences concatenated in nexus format when you choose the option nucleic. | |
366 | |
367 - Phylogeny_RAxML_BestTree** : | |
368 the output of RAxML run which contains the Best Tree found. | |
369 | |
370 - Phylogeny_RAxML_BiPartitionBranchLabel : | |
371 the output of RAxML run which contains the Best Tree found with supported values as branch labels. | |
372 | |
373 - Phylogeny_RAxML_BiPartition : | |
374 the output of RAxML run which contains the Best Tree found with supported values. | |
375 | |
376 - Phylogeny_RAxML_BootStrap : | |
377 the output of RAxML run which contains all the boostrapped trees. The number of boostraped trees depending of the option -N (number of run). | |
378 | |
379 --------- | |
380 | |
381 **The AdaptSearch Pipeline** | |
382 | |
383 .. image:: adaptsearch_picture_helps.png | |
384 | |
385 --------- | |
386 | |
387 Changelog | |
388 --------- | |
389 | |
390 **Version 2.0 - 06/07/2017** | |
391 | |
392 - NEW: Replace the zip between tools by Dataset Collection | |
393 | |
394 **Version 1.0 - 13/04/2017** | |
395 | |
396 - Add funtional test with planemo | |
397 - Planemo test with conda dependencies for raxml and python | |
398 - Scripts renamed + symlinks to the directory 'scripts' | |
399 | |
400 ]]> | |
401 | |
402 </help> | |
403 | |
404 <citations> | |
405 <citation type="doi">10.1093/bioinformatics/btu033</citation> | |
406 </citations> | |
407 | |
408 </tool> |