Mercurial > repos > iuc > semibin
comparison semibin.xml @ 0:7b382efabb98 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
author | iuc |
---|---|
date | Fri, 14 Oct 2022 21:38:26 +0000 |
parents | |
children | 6b517dc161e4 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:7b382efabb98 |
---|---|
1 <tool id="semibin" name="SemiBin" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
2 <description> | |
3 for Semi-supervised Metagenomic Binning | |
4 </description> | |
5 <macros> | |
6 <import>macros.xml</import> | |
7 </macros> | |
8 <expand macro="biotools"/> | |
9 <expand macro="requirements"/> | |
10 <expand macro="version"/> | |
11 <command detect_errors="exit_code"><![CDATA[ | |
12 #import re | |
13 @BAM_FILES@ | |
14 @FASTA_FILES@ | |
15 | |
16 SemiBin | |
17 #if $mode.select == 'single' or $mode.select == 'co' | |
18 single_easy_bin | |
19 #if $mode.select == 'single' and str($mode.environment) != '' | |
20 --environment '$mode.environment' | |
21 #end if | |
22 #if $mode.ref.select == "cached" | |
23 --reference-db-data-dir '$mode.ref.cached_db.fields.path' | |
24 #else | |
25 --taxonomy-annotation-table '$mode.ref.taxonomy_annotation_table' | |
26 #end if | |
27 #else | |
28 multi_easy_bin | |
29 --separator '$separator' | |
30 #if $mode.ref.select == "cached" | |
31 --reference-db-data-dir '$mode.ref.cached_db.fields.path' | |
32 #else | |
33 --taxonomy-annotation-table | |
34 #for $e in $mode.ref.taxonomy_annotation_table | |
35 '$e' | |
36 #end for | |
37 #end if | |
38 #end if | |
39 --input-fasta 'contigs.fasta' | |
40 --input-bam *.bam | |
41 --output 'output' | |
42 --cannot-name 'cannot' | |
43 @MIN_LEN@ | |
44 --orf-finder '$orf_finder' | |
45 --random-seed $random_seed | |
46 | |
47 #if str($annot.ml_threshold) != '' | |
48 --ml-threshold $annot.ml_threshold | |
49 #end if | |
50 --epoches $training.epoches | |
51 --batch-size $training.batch_size | |
52 --max-node $bin.max_node | |
53 --max-edges $bin.max_edges | |
54 --minfasta-kbs $bin.minfasta_kbs | |
55 $bin.no_recluster | |
56 --threads \${GALAXY_SLOTS:-1} | |
57 --processes \${GALAXY_SLOTS:-1} | |
58 && | |
59 echo "output" && | |
60 ls output | |
61 ]]></command> | |
62 <inputs> | |
63 <conditional name="mode"> | |
64 <expand macro="mode_select"/> | |
65 <when value="single"> | |
66 <expand macro="input-fasta-single"/> | |
67 <expand macro="input-bam-single"/> | |
68 <expand macro="ref-single"/> | |
69 <expand macro="environment"/> | |
70 </when> | |
71 <when value="co"> | |
72 <expand macro="input-fasta-single"/> | |
73 <expand macro="input-bam-multi"/> | |
74 <expand macro="ref-single"/> | |
75 </when> | |
76 <when value="multi"> | |
77 <expand macro="input-fasta-multi"/> | |
78 <expand macro="input-bam-multi"/> | |
79 <expand macro="ref-multi"/> | |
80 </when> | |
81 </conditional> | |
82 <expand macro="min_len"/> | |
83 <expand macro="orf-finder"/> | |
84 <expand macro="random-seed"/> | |
85 <section name="annot" title="Contig annotations" expanded="true"> | |
86 <expand macro="ml-threshold"/> | |
87 </section> | |
88 <section name="training" title="Training"> | |
89 <expand macro="epoches"/> | |
90 <expand macro="batch-size"/> | |
91 </section> | |
92 <section name="bin" title="Binning"> | |
93 <expand macro="max-node"/> | |
94 <expand macro="max-edges"/> | |
95 <expand macro="minfasta-kbs"/> | |
96 <expand macro="no-recluster"/> | |
97 </section> | |
98 <param name="extra_output" type="select" multiple="true" optional="true" label="Extra outputs" help="In addition to the training data"> | |
99 <option value="data">Training data</option> | |
100 <option value="coverage">Coverage files</option> | |
101 <option value="contigs">Contigs (if multiple sample)</option> | |
102 </param> | |
103 </inputs> | |
104 <outputs> | |
105 <collection name="output_recluster_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins after reclustering"> | |
106 <filter>not bin["no_recluster"]</filter> | |
107 <discover_datasets pattern=".*?\.(?P<designation>.*).fa" format="fasta" directory="output/output_recluster_bins" /> | |
108 </collection> | |
109 <collection name="output_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins before reclustering"> | |
110 <filter>mode["select"]!="multi"</filter> | |
111 <discover_datasets pattern=".*?\.(?P<designation>.*).fa" format="fasta" directory="output/output_bins" /> | |
112 </collection> | |
113 <collection name="multi_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins before reclustering"> | |
114 <filter>mode["select"]=="multi"</filter> | |
115 <discover_datasets pattern=".*?\.(?P<designation>.*).fa" format="fasta" directory="output/bins" /> | |
116 </collection> | |
117 <data name="single_data" format="csv" from_work_dir="output/data.csv" label="${tool.name} on ${on_string}: Training data"> | |
118 <filter>(mode["select"]=="single" or mode["select"]=="co") and extra_output and "data" in extra_output</filter> | |
119 </data> | |
120 <data name="single_data_split" format="csv" from_work_dir="output/data_split.csv" label="${tool.name} on ${on_string}: Split training data"> | |
121 <filter>(mode["select"]=="single" or mode["select"]=="co") and extra_output and "data" in extra_output</filter> | |
122 </data> | |
123 <collection name="multi_data" type="list" label="${tool.name} on ${on_string}: Training data per sample"> | |
124 <filter>mode["select"]=="multi" and extra_output and "data" in extra_output</filter> | |
125 <discover_datasets pattern="(?P<designation>.*)\/data.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/> | |
126 </collection> | |
127 <collection name="multi_data_split" type="list" label="${tool.name} on ${on_string}: Split training data per sample"> | |
128 <filter>mode["select"]=="multi" and extra_output and "data" in extra_output</filter> | |
129 <discover_datasets pattern="(?P<designation>.*)\/data_split.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/> | |
130 </collection> | |
131 <expand macro="generate_sequence_features_extra_outputs"/> | |
132 </outputs> | |
133 <tests> | |
134 <test expect_num_outputs="6"> | |
135 <conditional name="mode"> | |
136 <param name="select" value="single"/> | |
137 <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> | |
138 <param name="input_bam" ftype="bam" value="input_single.bam"/> | |
139 <conditional name="ref"> | |
140 <param name="select" value="taxonomy"/> | |
141 <param name="taxonomy_annotation_table" value="taxonomy.tsv"/> | |
142 </conditional> | |
143 <param name="environment" value="human_gut"/> | |
144 </conditional> | |
145 <conditional name="min_len"> | |
146 <param name="method" value="min-len"/> | |
147 <param name="min_len" value="0" /> | |
148 </conditional> | |
149 <param name="orf_finder" value="prodigal"/> | |
150 <param name="random-seed" value="0"/> | |
151 <section name="annot"> | |
152 <param name="ml_threshold" value=""/> | |
153 </section> | |
154 <section name="training"> | |
155 <param name="epoches" value="20"/> | |
156 <param name="batch_size" value="2048"/> | |
157 </section> | |
158 <section name="bin"> | |
159 <param name="max_node" value="1"/> | |
160 <param name="max_edges" value="200"/> | |
161 <param name="minfasta_kbs" value="200"/> | |
162 <param name="no_recluster" value="false"/> | |
163 </section> | |
164 <param name="extra_output" value="data,coverage,contigs"/> | |
165 <output_collection name="output_recluster_bins" count="0"/> | |
166 <output_collection name="output_bins" count="3"> | |
167 <element name="0" ftype="fasta"> | |
168 <assert_contents> | |
169 <has_text text=">g1k_0"/> | |
170 </assert_contents> | |
171 </element> | |
172 <element name="1" ftype="fasta"> | |
173 <assert_contents> | |
174 <has_text text=">g2k_0"/> | |
175 </assert_contents> | |
176 </element> | |
177 <element name="2" ftype="fasta"> | |
178 <assert_contents> | |
179 <has_text text=">g3k_0"/> | |
180 </assert_contents> | |
181 </element> | |
182 </output_collection> | |
183 <output name="single_data" ftype="csv"> | |
184 <assert_contents> | |
185 <has_text text="g1k_0"/> | |
186 <has_text text="g4k_7"/> | |
187 </assert_contents> | |
188 </output> | |
189 <output name="single_data_split" ftype="csv"> | |
190 <assert_contents> | |
191 <has_text text="g1k_0_1"/> | |
192 <has_text text="g1k_6_2"/> | |
193 </assert_contents> | |
194 </output> | |
195 <output name="single_cov" ftype="csv"> | |
196 <assert_contents> | |
197 <has_text text="g1k_0"/> | |
198 <has_text text="0.027"/> | |
199 </assert_contents> | |
200 </output> | |
201 <output name="single_split_cov" ftype="csv"> | |
202 <assert_contents> | |
203 <has_size value="1" delta="1"/> | |
204 </assert_contents> | |
205 </output> | |
206 </test> | |
207 <test expect_num_outputs="3"> | |
208 <conditional name="mode"> | |
209 <param name="select" value="co"/> | |
210 <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> | |
211 <param name="input_bam" ftype="bam" value="input_coassembly_sorted1.bam,input_coassembly_sorted2.bam,input_coassembly_sorted3.bam,input_coassembly_sorted4.bam,input_coassembly_sorted5.bam"/> | |
212 <conditional name="ref"> | |
213 <param name="select" value="taxonomy"/> | |
214 <param name="taxonomy_annotation_table" value="taxonomy.tsv"/> | |
215 </conditional> | |
216 </conditional> | |
217 <conditional name="min_len"> | |
218 <param name="method" value="ratio"/> | |
219 <param name="ratio" value="0.05"/> | |
220 </conditional> | |
221 <param name="orf_finder" value="fraggenescan"/> | |
222 <param name="random-seed" value="0"/> | |
223 <section name="annot"> | |
224 <param name="ml_threshold" value=""/> | |
225 </section> | |
226 <section name="training"> | |
227 <param name="epoches" value="20"/> | |
228 <param name="batch_size" value="2048"/> | |
229 </section> | |
230 <section name="bin"> | |
231 <param name="max_node" value="1"/> | |
232 <param name="max_edges" value="200"/> | |
233 <param name="minfasta_kbs" value="200"/> | |
234 <param name="no_recluster" value="true"/> | |
235 </section> | |
236 <param name="extra_output" value="coverage"/> | |
237 <output_collection name="output_bins" count="3"> | |
238 <element name="0" ftype="fasta"> | |
239 <assert_contents> | |
240 <has_text text=">g1k_0"/> | |
241 </assert_contents> | |
242 </element> | |
243 <element name="1" ftype="fasta"> | |
244 <assert_contents> | |
245 <has_text text=">g2k_0"/> | |
246 </assert_contents> | |
247 </element> | |
248 <element name="2" ftype="fasta"> | |
249 <assert_contents> | |
250 <has_text text=">g3k_0"/> | |
251 </assert_contents> | |
252 </element> | |
253 </output_collection> | |
254 <output_collection name="co_cov" count="5"> | |
255 <element name="0" ftype="csv"> | |
256 <assert_contents> | |
257 <has_text text="g1k_0"/> | |
258 <has_text text="g2k_7"/> | |
259 </assert_contents> | |
260 </element> | |
261 <element name="1" ftype="csv"> | |
262 <assert_contents> | |
263 <has_text text="g1k_0"/> | |
264 <has_text text="g2k_7"/> | |
265 </assert_contents> | |
266 </element> | |
267 <element name="4" ftype="csv"> | |
268 <assert_contents> | |
269 <has_text text="g1k_0"/> | |
270 <has_text text="g2k_7"/> | |
271 </assert_contents> | |
272 </element> | |
273 </output_collection> | |
274 <output_collection name="co_split_cov" count="5"> | |
275 <element name="0" ftype="csv"> | |
276 <assert_contents> | |
277 <has_text text="g1k_0_1"/> | |
278 <has_text text="g2k_7_2"/> | |
279 </assert_contents> | |
280 </element> | |
281 <element name="1" ftype="csv"> | |
282 <assert_contents> | |
283 <has_text text="g1k_0_1"/> | |
284 <has_text text="g2k_7_2"/> | |
285 </assert_contents> | |
286 </element> | |
287 <element name="2" ftype="csv"> | |
288 <assert_contents> | |
289 <has_text text="g1k_0_1"/> | |
290 <has_text text="g2k_7_2"/> | |
291 </assert_contents> | |
292 </element> | |
293 </output_collection> | |
294 </test> | |
295 <test expect_num_outputs="1"> | |
296 <conditional name="mode"> | |
297 <param name="select" value="single"/> | |
298 <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> | |
299 <param name="input_bam" ftype="bam" value="input_single.bam"/> | |
300 <conditional name="ref"> | |
301 <param name="db_selector" value="cached"/> | |
302 <param name="cached_db" value="test-db"/> | |
303 </conditional> | |
304 </conditional> | |
305 <conditional name="min_len"> | |
306 <param name="method" value="ratio"/> | |
307 <param name="ratio" value="0.05"/> | |
308 </conditional> | |
309 <param name="orf_finder" value="fraggenescan"/> | |
310 <param name="random-seed" value="0"/> | |
311 <section name="annot"> | |
312 <param name="ml_threshold" value=""/> | |
313 </section> | |
314 <section name="training"> | |
315 <param name="epoches" value="20"/> | |
316 <param name="batch_size" value="2048"/> | |
317 </section> | |
318 <section name="bin"> | |
319 <param name="max_node" value="1"/> | |
320 <param name="max_edges" value="200"/> | |
321 <param name="minfasta_kbs" value="200"/> | |
322 <param name="no_recluster" value="true"/> | |
323 </section> | |
324 <param name="extra_output" value=""/> | |
325 <output_collection name="output_bins" count="3"> | |
326 <element name="0" ftype="fasta"> | |
327 <assert_contents> | |
328 <has_text text=">g1k_0"/> | |
329 </assert_contents> | |
330 </element> | |
331 </output_collection> | |
332 </test> | |
333 <test expect_num_outputs="8"> | |
334 <conditional name="mode"> | |
335 <param name="select" value="multi"/> | |
336 <conditional name="multi_fasta"> | |
337 <param name="select" value="concatenated"/> | |
338 <param name="input_fasta" ftype="fasta" value="input_multi.fasta.gz"/> | |
339 </conditional> | |
340 <param name="input_bam" ftype="bam" value="input_multi_sorted1.bam,input_multi_sorted2.bam,input_multi_sorted3.bam,input_multi_sorted4.bam,input_multi_sorted5.bam,input_multi_sorted6.bam,input_multi_sorted7.bam,input_multi_sorted8.bam,input_multi_sorted9.bam,input_multi_sorted10.bam"/> | |
341 <conditional name="ref"> | |
342 <param name="select" value="taxonomy"/> | |
343 <param name="taxonomy_annotation_table" value="taxonomy.tsv,taxonomy_2.tsv,taxonomy_3.tsv,taxonomy_4.tsv,taxonomy_5.tsv,taxonomy_6.tsv,taxonomy_7.tsv,taxonomy_8.tsv,taxonomy_9.tsv,taxonomy_10.tsv"/> | |
344 </conditional> | |
345 </conditional> | |
346 <conditional name="min_len"> | |
347 <param name="method" value="ratio"/> | |
348 <param name="ratio" value="0.05"/> | |
349 </conditional> | |
350 <param name="orf_finder" value="fraggenescan"/> | |
351 <param name="random_seed" value="0"/> | |
352 <section name="annot"> | |
353 <param name="ml_threshold" value=""/> | |
354 </section> | |
355 <section name="training"> | |
356 <param name="epoches" value="20"/> | |
357 <param name="batch_size" value="2048"/> | |
358 </section> | |
359 <section name="bin"> | |
360 <param name="max_node" value="1"/> | |
361 <param name="max_edges" value="200"/> | |
362 <param name="minfasta_kbs" value="200"/> | |
363 <param name="no_recluster" value="true"/> | |
364 </section> | |
365 <param name="extra_output" value="data,coverage,contigs"/> | |
366 <output_collection name="multi_bins" count="2"> | |
367 <element name="0" ftype="fasta"> | |
368 <assert_contents> | |
369 <has_text text=">g1k_0"/> | |
370 </assert_contents> | |
371 </element> | |
372 </output_collection> | |
373 <output_collection name="multi_contigs" count="10"> | |
374 <element name="S8" ftype="fasta"> | |
375 <assert_contents> | |
376 <has_text text=">g1k_0"/> | |
377 </assert_contents> | |
378 </element> | |
379 </output_collection> | |
380 <output_collection name="multi_data" count="10"> | |
381 <element name="S8" ftype="csv"> | |
382 <assert_contents> | |
383 <has_text text="g1k_0,"/> | |
384 </assert_contents> | |
385 </element> | |
386 </output_collection> | |
387 <output_collection name="multi_cov" count="10"> | |
388 <element name="8" ftype="csv"> | |
389 <assert_contents> | |
390 <has_text text="S1:g1k_5,"/> | |
391 </assert_contents> | |
392 </element> | |
393 </output_collection> | |
394 <output_collection name="multi_cov_sample" count="10"> | |
395 <element name="S8" ftype="csv"> | |
396 <assert_contents> | |
397 <has_text text="g1k_3"/> | |
398 </assert_contents> | |
399 </element> | |
400 </output_collection> | |
401 <output_collection name="multi_split_cov" count="10"> | |
402 <element name="8" ftype="csv"> | |
403 <assert_contents> | |
404 <has_text text="S1:g1k_5_1,0."/> | |
405 </assert_contents> | |
406 </element> | |
407 </output_collection> | |
408 <output_collection name="multi_split_cov_sample" count="10"> | |
409 <element name="S8" ftype="csv"> | |
410 <assert_contents> | |
411 <has_text text="g1k_3_1"/> | |
412 </assert_contents> | |
413 </element> | |
414 </output_collection> | |
415 <output_collection name="multi_contigs" count="10"> | |
416 <element name="S8" ftype="fasta"> | |
417 <assert_contents> | |
418 <has_text text=">g1k_0"/> | |
419 </assert_contents> | |
420 </element> | |
421 </output_collection> | |
422 </test> | |
423 </tests> | |
424 <help><![CDATA[ | |
425 @HELP_HEADER@ | |
426 | |
427 Inputs | |
428 ====== | |
429 | |
430 @HELP_INPUT_FASTA@ | |
431 @HELP_INPUT_BAM@ | |
432 | |
433 ]]></help> | |
434 <expand macro="citations"/> | |
435 </tool> |