Mercurial > repos > bgruening > infernal
comparison cmbuild.xml @ 5:6e18e0b098cd draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit 80c721dcfe02a2b8baf8e2c64b76cbcd71b23d86
author | bgruening |
---|---|
date | Sat, 21 Jan 2017 17:36:57 -0500 |
parents | c47a7c52ac4f |
children | ee4be6eadd34 |
comparison
equal
deleted
inserted
replaced
4:c47a7c52ac4f | 5:6e18e0b098cd |
---|---|
1 <tool id="infernal_cmbuild" name="Build covariance models" version="1.1.0.2"> | 1 <tool id="infernal_cmbuild" name="cmbuild" version="@VERSION@.0"> |
2 <description>from sequence alignments (cmbuild)</description> | 2 <description>Build covariance models from sequence alignments</description> |
3 <parallelism method="multi" split_inputs="alignment_infile" split_mode="to_size" split_size="10" shared_inputs="" merge_outputs="cmfile_outfile"></parallelism> | 3 <macros> |
4 <requirements> | 4 <import>macros.xml</import> |
5 <requirement type="package">infernal</requirement> | 5 </macros> |
6 <requirement type="package" version="1.1">infernal</requirement> | 6 <parallelism method="multi" split_inputs="alignment_infile" split_mode="to_size" split_size="10" merge_outputs="cmfile_outfile"></parallelism> |
7 <requirement type="package" version="8.22">gnu_coreutils</requirement> | 7 <expand macro="requirements" /> |
8 </requirements> | 8 <expand macro="stdio" /> |
9 <command> | 9 <command> |
10 <![CDATA[ | 10 <![CDATA[ |
11 cmbuild -F | 11 cmbuild |
12 #if $is_summery_output: | 12 -F |
13 -o '$summary_outfile' | 13 #if $is_summery_output: |
14 #end if | 14 -o '$summary_outfile' |
15 | 15 #end if |
16 $model_construction_opts.model_construction_opts_selector | 16 ## to many outputs, is that one really needed? |
17 #if $model_construction_opts.model_construction_opts_selector == '--fast': | 17 ##-O $annotated_source_alignment_outfile |
18 --symfrac $model_construction_opts.symfrac | 18 $model_construction_opts.model_construction_opts_selector |
19 #end if | 19 #if $model_construction_opts.model_construction_opts_selector == '--fast': |
20 | 20 --symfrac $model_construction_opts.symfrac |
21 $noss | 21 #end if |
22 | 22 $noss |
23 $relative_weights_opts.relative_weights_opts_selector | 23 $relative_weights_opts.relative_weights_opts_selector |
24 #if $relative_weights_opts.relative_weights_opts_selector == '--wblosum': | 24 #if $relative_weights_opts.relative_weights_opts_selector == '--wblosum': |
25 --wid $relative_weights_opts.wid | 25 --wid $relative_weights_opts.wid |
26 #end if | 26 #end if |
27 | 27 --p7ere $controlling_filter_p7_hmm.p7ere |
28 $effective_opts.effective_opts_selector | 28 $controlling_filter_p7_hmm.p7ml |
29 #if str($effective_opts.effective_opts_selector) == '--eent': | 29 --EmN $controlling_filter_p7_hmm.EmN |
30 --EvN $controlling_filter_p7_hmm.EvN | |
31 --ElfN $controlling_filter_p7_hmm.ElfN | |
32 --EgfN $controlling_filter_p7_hmm.EgfN | |
33 $effective_opts.effective_opts_selector | |
34 #if str($effective_opts.effective_opts_selector) == '--eent': | |
35 #if $effective_opts.ere | |
30 --ere $effective_opts.ere | 36 --ere $effective_opts.ere |
37 #end if | |
38 #if $effective_opts.eminseq | |
31 --eminseq $effective_opts.eminseq | 39 --eminseq $effective_opts.eminseq |
40 #end if | |
41 #if $effective_opts.ehmmre | |
32 --ehmmre $effective_opts.ehmmre | 42 --ehmmre $effective_opts.ehmmre |
43 #end if | |
44 #if $effective_opts.eset | |
33 --eset $effective_opts.eset | 45 --eset $effective_opts.eset |
34 #end if | 46 #end if |
35 | 47 #end if |
36 #if str($refining_opts.refining_opts_selector) == '--refine': | 48 #if str($refining_opts.refining_opts_selector) == '--refine': |
37 #if $refining_opts.refine_output: | 49 #if $refining_opts.refine_output: |
38 --refine $refined_multiple_alignment_output | 50 --refine '$refined_multiple_alignment_output' |
39 #else: | 51 #else: |
40 --refine /dev/null | 52 --refine /dev/null |
53 #end if | |
54 $refining_opts.l | |
55 $refining_opts.gibbs_opts.gibbs_opts_selector | |
56 #if str($refining_opts.gibbs_opts.gibbs_opts_selector) == '--gibbs': | |
57 --seed $refining_opts.gibbs_opts.random_seed | |
58 #end if | |
59 $refining_opts.notrunc | |
60 $refining_opts.cyk | |
61 #end if | |
62 '$cmfile_outfile' | |
63 '$alignment_infile' | |
64 | |
65 | |
66 #if $Calibrate.selector=="true" | |
67 && cmcalibrate | |
68 -L$Calibrate.L | |
69 #if $Calibrate.output_options_cond.selector == "extra" | |
70 #if str($Calibrate.output_options_cond.output_options) != 'None' | |
71 #for j in $Calibrate.output_options_cond.output_options.value: | |
72 --$j $getVar($j) | |
73 #end for | |
41 #end if | 74 #end if |
42 | 75 #end if |
43 $l | 76 #if $Calibrate.cont_exp_tails_fits.selector == "top_n" |
44 $refining_opts.gibbs_opts.gibbs_opts_selector | 77 --gtailn $Calibrate.cont_exp_tails_fits.gtailn |
45 | 78 --ltailn $Calibrate.cont_exp_tails_fits.ltailn |
46 #if str($refining_opts.gibbs_opts.gibbs_opts_selector) == '--gibbs': | 79 #elif $Calibrate.cont_exp_tails_fits.selector == "frac" |
47 $refining_opts.gibbs_opts.random_seed | 80 --tailp $Calibrate.cont_exp_tails_fits.tailp |
48 #end if | 81 #end if |
49 | 82 --seed $Calibrate.add_opts.seed |
50 $notrunc | 83 --beta $Calibrate.add_opts.beta |
51 $cyk | 84 $Calibrate.add_opts.nonbanded |
52 #end if | 85 $Calibrate.add_opts.nonull3 |
53 | 86 $Calibrate.add_opts.random |
87 #if str($Calibrate.add_opts.gc) != 'None' | |
88 --gc '$Calibrate.add_opts.gc' | |
89 #end if | |
90 --cpu "\${GALAXY_SLOTS:-2}" | |
54 '$cmfile_outfile' | 91 '$cmfile_outfile' |
55 '$alignment_infile' | 92 #end if |
56 && | 93 |
57 cmcalibrate | |
58 -L 0.01 --cpu \${GALAXY_SLOTS:-2} | |
59 '$cmfile_outfile' | |
60 ]]> | 94 ]]> |
61 </command> | 95 </command> |
62 <inputs> | 96 <inputs> |
63 <!-- Stockholm or SELEX | 97 <!-- Stockholm or SELEX |
64 SELEX is defined in EMBOSS datatypes | 98 SELEX is defined in EMBOSS datatypes |
96 <param name="wid" type="float" value="0.5" | 130 <param name="wid" type="float" value="0.5" |
97 label="Percent identity for clustering the alignment (--wid)" help=""/> | 131 label="Percent identity for clustering the alignment (--wid)" help=""/> |
98 </when> | 132 </when> |
99 </conditional> | 133 </conditional> |
100 | 134 |
135 <section name="controlling_filter_p7_hmm" title="Controlling Filter P7 HMM construction" > | |
136 <param argument="--p7ere" type="float" value="0.38" label="For the filter p7 HMM, set minimum rel entropy/posn to" help="Set the target mean match state relative entropy for the filter p7 HMM"/> | |
137 <param argument="--p7ml" type="boolean" truevalue="--p7ml" falsevalue="" checked="false" label="Define the filter p7 HMM as the ML p7 HMM" help="Use a mzimum likelihood p7 HMM built from the CM as the filter HMM"/> | |
138 <param argument="--EmN" type="integer" value="200" label="Number of sampled seqs to use for p7 local MSV calibration" help=""/> | |
139 <param argument="--EvN" type="integer" value="200" label="Number of sampled seqs to use for p7 local Vit calibration" help=""/> | |
140 <param argument="--ElfN" type="integer" value="200" label="Number of sampled seqs to use for p7 local Fwd calibration" help=""/> | |
141 <param argument="--EgfN" type="integer" value="200" label="Number of sampled seqs to use for p7 glocal Fwd calibration" help=""/> | |
142 </section> | |
143 | |
101 <conditional name="effective_opts"> | 144 <conditional name="effective_opts"> |
102 <param name="effective_opts_selector" type="select" label="Options controlling effective sequence number" help=""> | 145 <param name="effective_opts_selector" type="select" label="Options controlling effective sequence number" help=""> |
103 <option value="--eent" >entropy weighting strategy (--eent)</option> | 146 <option value="--eent" >entropy weighting strategy (--eent)</option> |
104 <option value="--enone" selected="true">Turn off the entropy weighting strategy (--enone)</option> | 147 <option value="--enone" selected="true">Turn off the entropy weighting strategy (--enone)</option> |
105 </param> | 148 </param> |
106 <when value="--enone"/> | 149 <when value="--enone"/> |
107 <when value="--eent"> | 150 <when value="--eent"> |
108 <param name="ere" type="float" value="0.59" | 151 <param name="ere" type="float" value="" |
109 label="Set the target mean match state relative entropy (--ere)" help=""/> | 152 label="Set the target mean match state relative entropy" help="(--ere)" optional="true"/> |
110 | 153 |
111 <param name="eminseq" type="integer" value="" | 154 <param name="eminseq" type="integer" value="" |
112 label="Define the minimum allowed effective sequence number (--eminseq)" help=""/> | 155 label="Define the minimum allowed effective sequence number" help="(--eminseq)" optional="true"/> |
113 | 156 |
114 <param name="ehmmre" type="float" value="" | 157 <param name="ehmmre" type="float" value="" |
115 label="Set the target HMM mean match state relative entropy (--ehmmre)" help=""/> | 158 label="Set the target HMM mean match state relative entropy" help="(--ehmmre)" optional="true"/> |
116 | 159 |
117 <param name="eset" type="integer" value="" | 160 <param name="eset" type="integer" value="" |
118 label="Set the effective sequence number for entropy weighting (--eset)" help=""/> | 161 label="Set the effective sequence number for entropy weighting" help="(--eset)" optional="true"/> |
119 </when> | 162 </when> |
120 </conditional> | 163 </conditional> |
121 | 164 |
122 <conditional name="refining_opts"> | 165 <conditional name="refining_opts"> |
123 <param name="refining_opts_selector" type="select" label="Options for refining the input alignment" help=""> | 166 <param name="refining_opts_selector" type="select" label="Options for refining the input alignment" help=""> |
167 <option value="--refine">refine the input alignment</option> | |
124 <option value="" selected="true">No refinement</option> | 168 <option value="" selected="true">No refinement</option> |
125 <option value="--refine">refine the input alignment</option> | |
126 </param> | 169 </param> |
127 <when value=""/> | 170 <when value=""/> |
128 <when value="--refine"> | 171 <when value="--refine"> |
129 | 172 |
130 <conditional name="gibbs_opts"> | 173 <conditional name="gibbs_opts"> |
156 </conditional> | 199 </conditional> |
157 | 200 |
158 <param name="is_summery_output" truevalue="" falsevalue="" checked="False" type="boolean" | 201 <param name="is_summery_output" truevalue="" falsevalue="" checked="False" type="boolean" |
159 label="Output a summery file?" help=""/> | 202 label="Output a summery file?" help=""/> |
160 | 203 |
204 <!-- calibrate options --> | |
205 <conditional name="Calibrate"> | |
206 <param name="selector" type="boolean" checked="true" label="Calibrate the covariance model" | |
207 help="A CM file must be calibrated with cmcalibrate before it can be used in cmsearch or cmscan. cmcalibrate is very slow. It takes a couple of hours to calibrate a single average sized CM on a single CPU"/> | |
208 <when value="false"/> | |
209 <when value="true"> | |
210 <param argument="-L" type="float" value="1.6" min="0.01" max="160" label="Total length of random sequences to search" help="Set random seq length to search in Mb (megabases)"/> | |
211 <conditional name="cont_exp_tails_fits" > | |
212 <param name="selector" type="select" label="Options controlling exponential tail fits"> | |
213 <option value="top_n">Fit the top 'n' hits/Mb in thehistogram</option> | |
214 <option value="frac">Fit a fraction of the histogram</option> | |
215 </param> | |
216 <when value="top_n"> | |
217 <param argument="--gtailn" type="integer" value="250" min="0" label=" Fit the top 'n' hits/Mb in histogram for glocal modes" help=""/> | |
218 <param argument="--ltailn" type="integer" value="750" min="0" label=" Fit the top 'n' hits/Mb in histogram for local modes" help=""/> | |
219 </when> | |
220 <when value="frac"> | |
221 <param argument="--tailp" type="float" min="0" max="0.6" optional="true" label="Set fraction of histogram tail to fit to exp tail" help=""/> | |
222 </when> | |
223 </conditional> | |
224 <conditional name="output_options_cond"> | |
225 <param name="selector" type="select" label="Output extra files"> | |
226 <option value="extra">Output extra files</option> | |
227 <option value="none" selected="true">Don't output extra files</option> | |
228 </param> | |
229 <when value="extra"> | |
230 <param name="output_options" type="select" label="Optional output files" multiple="true" display="checkboxes"> | |
231 <option value="hfile">save fitted score histogram(s)</option> | |
232 <option value="sfile">save survival plot </option> | |
233 <option value="qqfile">save Q-Q plot for score histograms </option> | |
234 <option value="ffile">save lambdas for different tail fit probs</option> | |
235 <option value="xfile">save scores in fit tail</option> | |
236 </param> | |
237 </when> | |
238 <when value="none"/> | |
239 </conditional> | |
240 <section name="add_opts" title="Additional Options"> | |
241 <param argument="--seed" type="integer" value="181" | |
242 label="RNG seed" help="If the seed is nonzero, stochas-tic simulations will be reproducible. If 0, the random number generator is seeded arbitrarily"/> | |
243 <param argument="--beta" type="float" value="1e-15" | |
244 label="Tail loss prob. for query dependent banding (QDB)" help="The beta parameter is the amount of probability mass excluded during band calculation, higher values of beta give greater speedups but sacrifice more accuracy than lower values."/> | |
245 <param argument="--nonbanded" truevalue="--nonbanded" falsevalue="" checked="false" type="boolean" | |
246 label="Turn off QDB during E-value calibration" help="This will slow down calibration"/> | |
247 <param argument="--nonull3" truevalue="--nonull3" falsevalue="" checked="false" type="boolean" | |
248 label="Turn off the null3 post hoc additional null model" help="This is not recommended unless you plan on using the same option to cmsearch and/or cmscan"/> | |
249 <param argument="--random" truevalue="--random" falsevalue="" checked="false" type="boolean" | |
250 label="use GC content of random null background model of CM" help="Use the background null model of the CM to generate the random sequences, instead of the more realistic HMM. Unless the CM was built using the --null option to cmbuild, the background null model will be 25% each A, C, G and U"/> | |
251 <param argument="--gc" type="data" format="*" optional="true" label="Use GC content distribution from file" help="Generate the random sequences using the nucleotide distribution from the sequence file"/> | |
252 </section> | |
253 </when> | |
254 </conditional> | |
255 | |
256 | |
161 </inputs> | 257 </inputs> |
162 <outputs> | 258 <outputs> |
163 <data format="text" name="summary_outfile" label="cmbuild summary on ${on_string}"> | 259 <data format="text" name="summary_outfile" label="cmbuild summary on ${on_string}"> |
164 <filter>is_summery_output is True</filter> | 260 <filter>is_summery_output is True</filter> |
165 </data> | 261 </data> |
173 refining_opts['refine_output'] is True | 269 refining_opts['refine_output'] is True |
174 )) | 270 )) |
175 </filter> | 271 </filter> |
176 </data> | 272 </data> |
177 | 273 |
274 <!-- cmcalibrate additional output files --> | |
275 <data name="hfile" format="txt" label="hfile, an cmcalibrate additional output file, on ${on_string}"> | |
276 <filter> | |
277 (( | |
278 Calibrate['selector'] is True and | |
279 Calibrate['output_options_cond']['selector'] == "extra" and | |
280 'hfile' in Calibrate['output_options_cond']['output_options'] | |
281 )) | |
282 </filter> | |
283 </data> | |
284 <data name="sfile" format="txt" label="sfile, an cmcalibrate additional output file, on ${on_string}"> | |
285 <filter> | |
286 (( | |
287 Calibrate['selector'] is True and | |
288 Calibrate['output_options_cond']['selector'] == "extra" and | |
289 'sfile' in Calibrate['output_options_cond']['output_options'] | |
290 )) | |
291 </filter> | |
292 </data> | |
293 <data name="qqfile" format="txt" label="qqfile, an cmcalibrate additional output file, on ${on_string}"> | |
294 <filter> | |
295 (( | |
296 Calibrate['selector'] is True and | |
297 Calibrate['output_options_cond']['selector'] == "extra" and | |
298 'qqfile' in Calibrate['output_options_cond']['output_options'] | |
299 )) | |
300 </filter> | |
301 </data> | |
302 <data name="ffile" format="txt" label="ffile, an cmcalibrate additional output file, on ${on_string}"> | |
303 <filter> | |
304 (( | |
305 Calibrate['selector'] is True and | |
306 Calibrate['output_options_cond']['selector'] == "extra" and | |
307 'ffile' in Calibrate['output_options_cond']['output_options'] | |
308 )) | |
309 </filter> | |
310 </data> | |
311 <data name="xfile" format="txt" label="xfile, an cmcalibrate additional output file, on ${on_string}"> | |
312 <filter> | |
313 (( | |
314 Calibrate['selector'] is True and | |
315 Calibrate['output_options_cond']['selector'] == "extra" and | |
316 'xfile' in Calibrate['output_options_cond']['output_options'] | |
317 )) | |
318 </filter> | |
319 </data> | |
320 | |
178 </outputs> | 321 </outputs> |
322 | |
323 <tests> | |
324 <test> | |
325 <param name="alignment_infile" value="cmbuild_input_tRNA5.sto"/> | |
326 <conditional name="Calibrate"> | |
327 <param name="selector" value="true"/> | |
328 </conditional> | |
329 <output name="outfile"> | |
330 <assert_contents> | |
331 <has_text text="S 0 -1 0 1 4 0 1 88 108 -7.713 -8.959 -0.044 -5.412"/> | |
332 </assert_contents> | |
333 </output> | |
334 </test> | |
335 </tests> | |
179 <help> | 336 <help> |
180 <![CDATA[ | 337 <![CDATA[ |
181 | 338 |
182 **What it does** | 339 **What it does** |
183 | 340 |
266 | 423 |
267 - *--refine*: Attempt to refine the alignment before building the CM using expectation-maximization (EM). A CM is first built from the initial alignment as usual. Then, the sequences in the alignment are realigned optimally (with the HMM banded CYK algorithm, optimal means optimal given the bands) to the CM, and a new CM is built from the resulting alignment. The sequences are then realigned to the new CM, and a new CM is built from that alignment. This is continued until convergence, specifically when the alignments for two successive iterations are not significantly different (the summed bit scores of all the sequences in the alignment changes less than 1% between two successive iterations). | 424 - *--refine*: Attempt to refine the alignment before building the CM using expectation-maximization (EM). A CM is first built from the initial alignment as usual. Then, the sequences in the alignment are realigned optimally (with the HMM banded CYK algorithm, optimal means optimal given the bands) to the CM, and a new CM is built from the resulting alignment. The sequences are then realigned to the new CM, and a new CM is built from that alignment. This is continued until convergence, specifically when the alignments for two successive iterations are not significantly different (the summed bit scores of all the sequences in the alignment changes less than 1% between two successive iterations). |
268 - *Turn on the local alignment algorithm*: allows the alignment to span two or more subsequences if necessary (e.g. if the structures of the query model and target sequence are only partially shared), allowing certain large insertions and deletions in the structure to be penalized differently than normal indels. The default is to globally align the query model to the target sequences. | 425 - *Turn on the local alignment algorithm*: allows the alignment to span two or more subsequences if necessary (e.g. if the structures of the query model and target sequence are only partially shared), allowing certain large insertions and deletions in the structure to be penalized differently than normal indels. The default is to globally align the query model to the target sequences. |
269 - *--gibbs sampling*: Modifies the behavior of --refine so Gibbs sampling is used instead of EM. The difference is that during the alignment stage the alignment is not necessarily optimal, instead an alignment (parsetree) for each sequences is sampled from the posterior distribution of alignments as determined by the Inside algorithm. Due to this sampling step --gibbs is non- deterministic, so different runs with the same alignment may yield different results. This is not true when --refine is used without the --gibbs option, in which case the final alignment and CM will always be the same. When --gibbs is enabled, the --seed "number" option can be used to seed the random number generator predictably, making the results reproducible. The goal of the --gibbs option is to help expert RNA alignment curators refine structural alignments by allowing them to observe alternative high scoring alignments. | 426 - *--gibbs sampling*: Modifies the behavior of --refine so Gibbs sampling is used instead of EM. The difference is that during the alignment stage the alignment is not necessarily optimal, instead an alignment (parsetree) for each sequences is sampled from the posterior distribution of alignments as determined by the Inside algorithm. Due to this sampling step --gibbs is non- deterministic, so different runs with the same alignment may yield different results. This is not true when --refine is used without the --gibbs option, in which case the final alignment and CM will always be the same. When --gibbs is enabled, the --seed "number" option can be used to seed the random number generator predictably, making the results reproducible. The goal of the --gibbs option is to help expert RNA alignment curators refine structural alignments by allowing them to observe alternative high scoring alignments. |
270 - *--Random seed*: Seed the random number generator with an integer >= 0. This option can only be used in combination with --gibbs. If the given number is nonzero, stochastic sampling of alignments will be reproducible; the same command will give the same results. If the given number is 0, the random number generator is seeded arbitrarily, and stochastic samplings may vary from run to run of the same command. The default seed is 0. | 427 - *--Random seed*: Seed the random number generator with an integer >= 0. This option can only be used in combination with --gibbs. If the given number is nonzero, stochastic sampling of alignments will be reproducible; the same command will give the same results. If the given number is 0, the random number generator is seeded arbitrarily, and stochastic samplings may vary from run to run of the same command. The default seed is 0. |
271 - *--Turn off the truncated alignment algorithm*: With --refine, turn off the truncated alignment algorithm. There is more information on this in the cmalign manual page. | 428 - *--Turn off the truncated alignment algorithm*: With --refine, turn off the truncated alignment algorithm. There is more information on this in the cmalign manual page. |
272 - *--cyk algorithm*: With --refine, align with the CYK algorithm. By default the optimal accuracy algorithm is used. There is more information on this in the cmalign manual page. | 429 - *--cyk algorithm*: With --refine, align with the CYK algorithm. By default the optimal accuracy algorithm is used. There is more information on this in the cmalign manual page. |
273 | 430 |
431 | |
274 For further questions please refere to the Infernal Userguide_. | 432 For further questions please refere to the Infernal Userguide_. |
275 | 433 |
276 .. _Userguide: http://selab.janelia.org/software/infernal/Userguide.pdf | 434 .. _Userguide: http://eddylab.org/infernal/Userguide.pdf |
277 | 435 |
278 ]]> | 436 ]]> |
279 </help> | 437 </help> |
280 <citations> | 438 |
281 <citation type="doi">10.1093/bioinformatics/btt509</citation> | 439 <expand macro="citations" /> |
282 <citation type="bibtex"> | 440 |
283 @ARTICLE{bgruening_galaxytools, | |
284 Author = {Björn Grüning, Cameron Smith, Torsten Houwaart, Nicola Soranzo, Eric Rasche}, | |
285 keywords = {bioinformatics, ngs, galaxy, cheminformatics, rna}, | |
286 title = {{Galaxy Tools - A collection of bioinformatics and cheminformatics tools for the Galaxy environment}}, | |
287 url = {https://github.com/bgruening/galaxytools} | |
288 } | |
289 </citation> | |
290 </citations> | |
291 </tool> | 441 </tool> |