comparison macros.xml @ 0:13192095fd5a draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
author iuc
date Tue, 10 Dec 2019 16:04:22 -0500
parents
children 8dfc90f35aba
comparison
equal deleted inserted replaced
-1:000000000000 0:13192095fd5a
1 <macros>
2 <token name="@VERSION@">5.0.3</token>
3 <xml name="requirements">
4 <requirements>
5 <requirement type="package" version="@VERSION@">cat</requirement>
6 <yield/>
7 </requirements>
8 </xml>
9 <xml name="version_command">
10 <version_command><![CDATA[CAT --version]]></version_command>
11 </xml>
12 <token name="@DATABASE_FOLDER@">CAT_database</token>
13 <token name="@TAXONOMY_FOLDER@">taxonomy</token>
14 <xml name="cat_db">
15 <conditional name="db">
16 <param name="db_src" type="select" label="CAT database (--database_folder,--taxonomy_folder) from">
17 <option value="cached">local cached database</option>
18 <option value="history">history</option>
19 </param>
20 <when value="cached">
21 <param name="cat_builtin" type="select" label="Use a built-in CAT database" help="If the CAT database of interest is not listed, contact your Galaxy administrator">
22 <options from_data_table="cat_database">
23 <filter type="sort_by" column="2" />
24 <validator type="no_options" message="No CAT database is available." />
25 </options>
26 </param>
27 </when>
28 <when value="history">
29 <param name="cat_db" type="data" format="txt" label="A history dataset from CAT prepare tool"/>
30 </when>
31 </conditional>
32 </xml>
33 <token name="@CAT_DB@"><![CDATA[
34 #if $db.db_src == 'cached':
35 --database_folder '$db.cat_builtin.fields.database_folder'
36 --taxonomy_folder '$db.cat_builtin.fields.taxonomy_folder'
37 #else
38 #import os.path
39 #set $catdb = $db.cat_db.extra_files_path
40 --database_folder '$os.path.join($catdb,"@DATABASE_FOLDER@")'
41 --taxonomy_folder '$os.path.join($catdb,"@TAXONOMY_FOLDER@")'
42 #end if
43 ]]></token>
44 <token name="@CAT_TAXONOMY@"><![CDATA[
45 #if $db.db_src == 'cached':
46 --taxonomy_folder '$db.cat_builtin.fields.taxonomy_folder'
47 #else
48 #import os.path
49 #set $catdb = $db.cat_db.extra_files_path
50 --taxonomy_folder '$os.path.join($catdb,"@TAXONOMY_FOLDER@")'
51 #end if
52 ]]></token>
53 <xml name="test_catdb">
54 <conditional name="db">
55 <param name="db_src" value="cached"/>
56 <param name="cat_builtin" value="CAT_prepare_test"/>
57 </conditional>
58 </xml>
59 <xml name="use_intermediates">
60 <conditional name="previous">
61 <param name="use_previous" type="select" label="Use previous prodigal gene prediction and diamond alignment">
62 <help>predicted_proteins.faa and alignment.diamond from previous CAT run.</help>
63 <option value="yes">Yes</option>
64 <option value="no" selected="true">No</option>
65 </param>
66 <when value="yes">
67 <param argument="--proteins_fasta" type="data" format="fasta" label="prodigal predicted proteins fasta"/>
68 <param argument="--diamond_alignment" type="data" format="tabular" label="alignment.diamond file"/>
69 </when>
70 <when value="no"/>
71 </conditional>
72 </xml>
73 <token name="@USE_INTERMEDIATES@"><![CDATA[
74 #if $previous.use_previous == 'yes'
75 --proteins_fasta '$previous.proteins_fasta'
76 --diamond_alignment '$previous.diamond_alignment'
77 #end if
78 --out_prefix 'cat_output'
79 ]]></token>
80 <xml name="custom_settings">
81 <param argument="--range" type="integer" value="10" min="0" max="49" label="range"/>
82 <param argument="--fraction" type="float" value="0.5" min="0" max="0.99" label="fraction"/>
83 </xml>
84 <token name="@CUSTOM_SETTINGS@"><![CDATA[
85 --range '$range'
86 --fraction '$fraction'
87 ]]></token>
88 <xml name="diamond_options">
89 <conditional name="diamond">
90 <param name="set_diamond_opts" type="select" label="Set advanced diamond options">
91 <option value="yes">Yes</option>
92 <option value="no" selected="true">No</option>
93 </param>
94 <when value="yes">
95 <param argument="--sensitive" type="boolean" truevalue="--sensitive" falsevalue="" checked="false"
96 label="Run DIAMOND in sensitive mode (considerably slower)"/>
97 <param argument="--block_size" type="float" value="2.0" min="1" max="10" label="DIAMOND block-size parameter."
98 help="lower will decrease memory and temporary disk space usage, higher will increase performance."/>
99 <param argument="--index_chunks" type="integer" value="4" min="1" max="10" label="DIAMOND index-chunks parameter"
100 help="Set to 1 on high memory machines. The parameter has no effect on temporary disk space usage."/>
101 <param argument="--top" type="integer" value="50" min="1" max="50" label="DIAMOND top parameter"
102 help="Governs hits within range of best hit that are written to the alignment file. This implies you know what you are doing."/>
103 </when>
104 <when value="no"/>
105 </conditional>
106 </xml>
107 <token name="@DIAMOND_OPTIONS@"><![CDATA[
108 #if $diamond.set_diamond_opts == 'yes':
109 $diamond.sensitive
110 --block_size '$diamond.block_size'
111 --index_chunks '$diamond.index_chunks'
112 #if $diamond.top < 50:
113 --I_know_what_Im_doing
114 --top '$diamond.top'
115 #end if
116 #end if
117 ]]></token>
118
119 <xml name="add_names_options">
120 <param argument="--only_official" type="boolean" truevalue="--only_official" falsevalue="" checked="true"
121 label="Only output official level names."/>
122 <param argument="--exclude_scores" type="boolean" truevalue="--exclude_scores" falsevalue="" checked="false"
123 label="Exclude bit-score support scores in the lineage."/>
124 </xml>
125 <token name="@ADD_NAMES_OPTIONS@"><![CDATA[
126 $only_official $exclude_scores
127 ]]></token>
128 <xml name="add_names">
129 <conditional name="names">
130 <param name="add_names" type="select" label="CAT add_names for"
131 help="annotate with taxonomic names.">
132 <option value="no">No</option>
133 <option value="orf2lca">ORF2LCA.names.txt</option>
134 <option value="classification">classification.names.txt</option>
135 <option value="both">ORF2LCA.names.txt and classification.names.txt</option>
136 </param>
137 <when value="no"/>
138 <when value="orf2lca">
139 <expand macro="add_names_options"/>
140 </when>
141 <when value="classification">
142 <expand macro="add_names_options"/>
143 </when>
144 <when value="both">
145 <expand macro="add_names_options"/>
146 </when>
147 </conditional>
148 </xml>
149 <token name="@TXT2TSV@">${__tool_directory__}/tabpad.py</token>
150 <token name="@ADD_NAMES@"><![CDATA[
151 #if $names.add_names in ['classification','both']:
152 && CAT add_names $names.only_official $names.exclude_scores
153 @CAT_TAXONOMY@
154 #if $bcat == 'CAT'
155 -i 'cat_output.contig2classification.tsv'
156 #else
157 -i 'cat_output.bin2classification.tsv'
158 #end if
159 -o 'classification_names.txt'
160 && ${__tool_directory__}/tabpad.py -i 'classification_names.txt' -o '$classification_names'
161 #end if
162 #if $names.add_names in ['orf2lca','both']:
163 && CAT add_names $names.only_official $names.exclude_scores
164 @CAT_TAXONOMY@
165 -i 'cat_output.ORF2LCA.tsv'
166 -o 'orf2lca_names.txt'
167 && ${__tool_directory__}/tabpad.py -i 'orf2lca_names.txt' -o '$orf2lca_names'
168 #end if
169 ]]></token>
170 <xml name="summarise">
171 <param name="summarise" type="select" label="CAT summarise report"
172 help="Report the number of assignments to each taxonomic name">
173 <option value="no">No</option>
174 <option value="classification">classification.summary.txt</option>
175 </param>
176 </xml>
177 <token name="@SUMMARISE@"><![CDATA[
178 #if $summarise in ['classification']:
179 #if $names.add_names in ['classification','both'] and $names.only_official:
180 #set $summary_input = $classification_names
181 #else
182 #set $summary_input = 'classification_offical_names'
183 && CAT add_names --only_official
184 @CAT_TAXONOMY@
185 #if $bcat == 'CAT'
186 -i 'cat_output.contig2classification.tsv'
187 #else
188 -i 'cat_output.bin2classification.tsv'
189 #end if
190 -o '$summary_input'
191 #end if
192 && CAT summarise
193 #if $bcat == 'CAT'
194 -c '$contigs_fasta'
195 #end if
196 -i '$summary_input'
197 -o 'classification_summary.txt'
198 && ${__tool_directory__}/tabpad.py -i 'classification_summary.txt' -o '$classification_summary'
199 #end if
200 ]]></token>
201
202 <xml name="select_outputs">
203 <param name="select_outputs" type="select" multiple="true" optional="false" label="Select outputs">
204 <option value="log" selected="true">log</option>
205 <option value="predicted_proteins_faa" selected="true">Prodigal predicted_proteins.faa</option>
206 <option value="predicted_proteins_gff">Prodigal predicted_proteins.gff</option>
207 <option value="alignment_diamond">Diamond blastp alignment.diamond</option>
208 <option value="orf2lca" selected="true">ORF2LCA.txt (taxonomic assignment per predicted ORF)</option>
209 <yield/>
210 </param>
211 </xml>
212 <xml name="select_cat_outputs">
213 <param name="bcat" type="hidden" value="CAT"/>
214 <param name="seqtype" type="hidden" value="contig"/>
215 <param name="sum_titles" type="hidden" value="contigs,number of ORFs,number of positions"/>
216 <param name="bin_col" type="hidden" value=""/>
217 <expand macro="select_outputs">
218 <option value="contig2classification" selected="true">contig2classification.txt (taxonomic assignment per contig)</option>
219 </expand>
220 </xml>
221 <xml name="select_bat_outputs">
222 <param name="bcat" type="hidden" value="BAT"/>
223 <param name="seqtype" type="hidden" value="bin"/>
224 <param name="sum_titles" type="hidden" value="bins"/>
225 <param name="bin_col" type="hidden" value="bin,"/>
226 <expand macro="select_outputs">
227 <option value="bin2classification" selected="true">bin2classification.txt (taxonomic assignment per metagenome assembly)</option>
228 </expand>
229 </xml>
230 <xml name="outputs">
231 <data name="log" format="txt" label="${bcat}.log" from_work_dir="cat_output.log">
232 <filter>'log' in select_outputs or not select_outputs</filter>
233 </data>
234 <data name="predicted_proteins_faa" format="fasta" label="${bcat}.predicted_proteins.faa" from_work_dir="cat_output.predicted_proteins.faa">
235 <filter>'predicted_proteins_faa' in select_outputs and previous['use_previous'] == 'no'</filter>
236 </data>
237 <data name="predicted_proteins_gff" format="gff" label="${bcat}.predicted_proteins.gff" from_work_dir="cat_output.predicted_proteins.gff">
238 <filter>'predicted_proteins_gff' in select_outputs and previous['use_previous'] == 'no'</filter>
239 </data>
240 <data name="alignment_diamond" format="tabular" label="${bcat}.alignment.diamond" from_work_dir="cat_output.alignment.diamond">
241 <filter>'alignment_diamond' in select_outputs and previous['use_previous'] == 'no'</filter>
242 <actions>
243 <action name="comment_lines" type="metadata" default="1" />
244 <action name="column_names" type="metadata" default="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore" />
245 </actions>
246 </data>
247 <data name="orf2lca" format="tabular" label="${bcat}.ORF2LCA.txt" from_work_dir="cat_output.ORF2LCA.tsv">
248 <filter>'orf2lca' in select_outputs</filter>
249 <actions>
250 <action name="comment_lines" type="metadata" default="1" />
251 <action name="column_names" type="metadata" default="ORF,${bin_col}lineage,bit-score" />
252 </actions>
253 </data>
254 <data name="contig2classification" format="tabular" label="${bcat}.contig2classification.txt" from_work_dir="cat_output.contig2classification.tsv">
255 <filter>'contig2classification' in select_outputs</filter>
256 <actions>
257 <action name="comment_lines" type="metadata" default="1" />
258 <action name="column_names" type="metadata" default="contig,classification,reason,lineage,lineage scores" />
259 </actions>
260 </data>
261 <data name="bin2classification" format="tabular" label="${bcat}.bin2classification.txt" from_work_dir="cat_output.bin2classification.tsv">
262 <filter>'bin2classification' in select_outputs</filter>
263 <actions>
264 <action name="comment_lines" type="metadata" default="1" />
265 <action name="column_names" type="metadata" default="bin,classification,reason,lineage,lineage scores" />
266 </actions>
267 </data>
268 <data name="orf2lca_names" format="tabular" label="${bcat}.ORF2LCA.names.txt">
269 <filter>names['add_names'] in ['both','orf2lca']</filter>
270 <actions>
271 <action name="comment_lines" type="metadata" default="1" />
272 <action name="column_names" type="metadata" default="ORF,${bin_col}lineage,bit-score,superkingdom,phylum,class,order,family,genus,species" />
273 </actions>
274 </data>
275 <data name="classification_names" format="tabular" label="${bcat}.${seqtype}2classification.names.txt">
276 <filter>names['add_names'] in ['both','classification']</filter>
277 <actions>
278 <action name="comment_lines" type="metadata" default="1" />
279 <action name="column_names" type="metadata" default="${seqtype},classification,reason,lineage,lineage scores,superkingdom,phylum,class,order,family,genus,species" />
280 </actions>
281 </data>
282 <data name="classification_summary" format="tabular" label="${bcat}.${seqtype}2classification.summary.txt">
283 <filter>'classification' in summarise</filter>
284 <actions>
285 <action name="comment_lines" type="metadata" default="4" />
286 <action name="column_names" type="metadata" default="rank,clade,number of ${sum_titles}" />
287 </actions>
288 </data>
289 </xml>
290 <token name="@COMMON_HELP@"><![CDATA[
291 The Contig Annotation Tool (CAT) and Bin Annotation Tool (BAT) workflows are described at: https://github.com/dutilh/CAT
292
293 - CAT contigs/CAT bins - runs Prodigal_ prokaryotic protein prediction on the fasta input.
294 - CAT contigs/CAT bins - runs Diamond_ to align predicted proteins to the reference proteins in the CAT database.
295 - CAT contigs/CAT bins - assigns taxonomic classification to fasta entries and ORFs based on alignments.
296 - CAT add_names - annotates outputs with taxonomic names.
297 - CAT summerise - reports number of assignments to each taxonomic name.
298
299 A CAT database can either be installed by data_manager_cat or in the local history by CAT prepare tool.
300
301 .. _Prodigal: https://github.com/hyattpd/Prodigal
302 .. _Diamond: https://github.com/bbuchfink/diamond
303
304 ]]></token>
305 <token name="@OUTPUTS_HELP@"><![CDATA[
306
307 **OUTPUTS**
308
309 Any of the files produced by the CAT workflow are available as outputs
310 - Prodigal
311
312 - predicted_proteins.faa
313 - predicted_proteins.gff
314
315 - Diamond
316
317 - alignment.diamond
318
319 - CAT contigs/bins
320
321 - contigs/bin2classification.txt
322 - ORF2LCA.txt
323
324 - CAT add_names (optional)
325
326 - contigs/bin2classification.names.txt
327 - ORF2LCA.names.txt
328
329 - CAT summarise (optional)
330
331 - contigs/bin2classification.summary.txt
332
333
334 ]]></token>
335
336 <token name="@OPTIONS_HELP@"><![CDATA[
337
338 Optional arguments:
339 -r, --range cut-off range after alignment [0-49] (default: 10).
340 -f, --fraction fraction of bit-score support for each classification
341 [0-0.99] (default: 0.5).
342 -p, --proteins_fasta
343 Path to predicted proteins fasta file. If supplied,
344 CAT will skip the protein prediction step.
345 -a, --diamond_alignment
346 Path to DIAMOND alignment table. If supplied, CAT will
347 skip the DIAMOND alignment step and directly classify
348 the sequences. A predicted proteins fasta file should
349 also be supplied with argument [-p / --proteins].
350
351
352 DIAMOND specific optional arguments:
353 --sensitive Run DIAMOND in sensitive mode (default: not enabled).
354
355 --block_size DIAMOND block-size parameter (default: 2.0). Lower
356 numbers will decrease memory and temporary disk space
357 usage.
358
359 --index_chunks
360 DIAMOND index-chunks parameter (default: 4). Set to 1
361 on high memory machines. The parameter has no effect
362 on temporary disk space usage.
363
364 --top
365 DIAMOND top parameter [0-50] (default: 50). Governs
366 hits within range of best hit that are written to the
367 alignment file. This is not the [-r / --range]
368 parameter!
369
370
371 Setting the DIAMOND --top parameter
372
373 You can speed up DIAMOND considerably, and at the same time greatly reduce disk usage, by setting the DIAMOND --top parameter to lower values. This will govern hits within range of the best hit that are written to the alignment file.
374
375 You have to be very carefull to 1) not confuse this parameter with the r / --range parameter, which does a similar cut-off but after alignment and 2) be aware that if you want to run CAT or BAT again afterwards with different values of the -r / --range parameter, your options will be limited to the range you have chosen with --top earlier, because all hits that fall outside this range will not be included in the alignment file. Importantly, CAT and BAT currently do not warn you if you choose -r / --range in a second run higher than --top in a previous one, so it's up to you to remember this!
376
377 If you have understood all this, or you do not plan to tune -r / --range at all afterwards, you can enjoy a huge speedup with much smaller alignment files! For CAT you can for example set --top 11 and for BAT --top 6.
378
379 ]]></token>
380 <xml name="citations">
381 <citations>
382 <citation type="doi">https://doi.org/10.1101/072868</citation>
383 <citation type="doi">https://doi.org/10.1186/s13059-019-1817-x</citation>
384 <citation type="doi">https://doi.org/10.1038/nmeth.3176</citation>
385 <citation type="doi">https://doi.org/10.1186/1471-2105-11-119</citation>
386 <yield />
387 </citations>
388 </xml>
389 </macros>