comparison semibin.xml @ 0:7b382efabb98 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
author iuc
date Fri, 14 Oct 2022 21:38:26 +0000
parents
children 6b517dc161e4
comparison
equal deleted inserted replaced
-1:000000000000 0:7b382efabb98
1 <tool id="semibin" name="SemiBin" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>
3 for Semi-supervised Metagenomic Binning
4 </description>
5 <macros>
6 <import>macros.xml</import>
7 </macros>
8 <expand macro="biotools"/>
9 <expand macro="requirements"/>
10 <expand macro="version"/>
11 <command detect_errors="exit_code"><![CDATA[
12 #import re
13 @BAM_FILES@
14 @FASTA_FILES@
15
16 SemiBin
17 #if $mode.select == 'single' or $mode.select == 'co'
18 single_easy_bin
19 #if $mode.select == 'single' and str($mode.environment) != ''
20 --environment '$mode.environment'
21 #end if
22 #if $mode.ref.select == "cached"
23 --reference-db-data-dir '$mode.ref.cached_db.fields.path'
24 #else
25 --taxonomy-annotation-table '$mode.ref.taxonomy_annotation_table'
26 #end if
27 #else
28 multi_easy_bin
29 --separator '$separator'
30 #if $mode.ref.select == "cached"
31 --reference-db-data-dir '$mode.ref.cached_db.fields.path'
32 #else
33 --taxonomy-annotation-table
34 #for $e in $mode.ref.taxonomy_annotation_table
35 '$e'
36 #end for
37 #end if
38 #end if
39 --input-fasta 'contigs.fasta'
40 --input-bam *.bam
41 --output 'output'
42 --cannot-name 'cannot'
43 @MIN_LEN@
44 --orf-finder '$orf_finder'
45 --random-seed $random_seed
46
47 #if str($annot.ml_threshold) != ''
48 --ml-threshold $annot.ml_threshold
49 #end if
50 --epoches $training.epoches
51 --batch-size $training.batch_size
52 --max-node $bin.max_node
53 --max-edges $bin.max_edges
54 --minfasta-kbs $bin.minfasta_kbs
55 $bin.no_recluster
56 --threads \${GALAXY_SLOTS:-1}
57 --processes \${GALAXY_SLOTS:-1}
58 &&
59 echo "output" &&
60 ls output
61 ]]></command>
62 <inputs>
63 <conditional name="mode">
64 <expand macro="mode_select"/>
65 <when value="single">
66 <expand macro="input-fasta-single"/>
67 <expand macro="input-bam-single"/>
68 <expand macro="ref-single"/>
69 <expand macro="environment"/>
70 </when>
71 <when value="co">
72 <expand macro="input-fasta-single"/>
73 <expand macro="input-bam-multi"/>
74 <expand macro="ref-single"/>
75 </when>
76 <when value="multi">
77 <expand macro="input-fasta-multi"/>
78 <expand macro="input-bam-multi"/>
79 <expand macro="ref-multi"/>
80 </when>
81 </conditional>
82 <expand macro="min_len"/>
83 <expand macro="orf-finder"/>
84 <expand macro="random-seed"/>
85 <section name="annot" title="Contig annotations" expanded="true">
86 <expand macro="ml-threshold"/>
87 </section>
88 <section name="training" title="Training">
89 <expand macro="epoches"/>
90 <expand macro="batch-size"/>
91 </section>
92 <section name="bin" title="Binning">
93 <expand macro="max-node"/>
94 <expand macro="max-edges"/>
95 <expand macro="minfasta-kbs"/>
96 <expand macro="no-recluster"/>
97 </section>
98 <param name="extra_output" type="select" multiple="true" optional="true" label="Extra outputs" help="In addition to the training data">
99 <option value="data">Training data</option>
100 <option value="coverage">Coverage files</option>
101 <option value="contigs">Contigs (if multiple sample)</option>
102 </param>
103 </inputs>
104 <outputs>
105 <collection name="output_recluster_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins after reclustering">
106 <filter>not bin["no_recluster"]</filter>
107 <discover_datasets pattern=".*?\.(?P&lt;designation&gt;.*).fa" format="fasta" directory="output/output_recluster_bins" />
108 </collection>
109 <collection name="output_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins before reclustering">
110 <filter>mode["select"]!="multi"</filter>
111 <discover_datasets pattern=".*?\.(?P&lt;designation&gt;.*).fa" format="fasta" directory="output/output_bins" />
112 </collection>
113 <collection name="multi_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins before reclustering">
114 <filter>mode["select"]=="multi"</filter>
115 <discover_datasets pattern=".*?\.(?P&lt;designation&gt;.*).fa" format="fasta" directory="output/bins" />
116 </collection>
117 <data name="single_data" format="csv" from_work_dir="output/data.csv" label="${tool.name} on ${on_string}: Training data">
118 <filter>(mode["select"]=="single" or mode["select"]=="co") and extra_output and "data" in extra_output</filter>
119 </data>
120 <data name="single_data_split" format="csv" from_work_dir="output/data_split.csv" label="${tool.name} on ${on_string}: Split training data">
121 <filter>(mode["select"]=="single" or mode["select"]=="co") and extra_output and "data" in extra_output</filter>
122 </data>
123 <collection name="multi_data" type="list" label="${tool.name} on ${on_string}: Training data per sample">
124 <filter>mode["select"]=="multi" and extra_output and "data" in extra_output</filter>
125 <discover_datasets pattern="(?P&lt;designation&gt;.*)\/data.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/>
126 </collection>
127 <collection name="multi_data_split" type="list" label="${tool.name} on ${on_string}: Split training data per sample">
128 <filter>mode["select"]=="multi" and extra_output and "data" in extra_output</filter>
129 <discover_datasets pattern="(?P&lt;designation&gt;.*)\/data_split.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/>
130 </collection>
131 <expand macro="generate_sequence_features_extra_outputs"/>
132 </outputs>
133 <tests>
134 <test expect_num_outputs="6">
135 <conditional name="mode">
136 <param name="select" value="single"/>
137 <param name="input_fasta" ftype="fasta" value="input_single.fasta"/>
138 <param name="input_bam" ftype="bam" value="input_single.bam"/>
139 <conditional name="ref">
140 <param name="select" value="taxonomy"/>
141 <param name="taxonomy_annotation_table" value="taxonomy.tsv"/>
142 </conditional>
143 <param name="environment" value="human_gut"/>
144 </conditional>
145 <conditional name="min_len">
146 <param name="method" value="min-len"/>
147 <param name="min_len" value="0" />
148 </conditional>
149 <param name="orf_finder" value="prodigal"/>
150 <param name="random-seed" value="0"/>
151 <section name="annot">
152 <param name="ml_threshold" value=""/>
153 </section>
154 <section name="training">
155 <param name="epoches" value="20"/>
156 <param name="batch_size" value="2048"/>
157 </section>
158 <section name="bin">
159 <param name="max_node" value="1"/>
160 <param name="max_edges" value="200"/>
161 <param name="minfasta_kbs" value="200"/>
162 <param name="no_recluster" value="false"/>
163 </section>
164 <param name="extra_output" value="data,coverage,contigs"/>
165 <output_collection name="output_recluster_bins" count="0"/>
166 <output_collection name="output_bins" count="3">
167 <element name="0" ftype="fasta">
168 <assert_contents>
169 <has_text text=">g1k_0"/>
170 </assert_contents>
171 </element>
172 <element name="1" ftype="fasta">
173 <assert_contents>
174 <has_text text=">g2k_0"/>
175 </assert_contents>
176 </element>
177 <element name="2" ftype="fasta">
178 <assert_contents>
179 <has_text text=">g3k_0"/>
180 </assert_contents>
181 </element>
182 </output_collection>
183 <output name="single_data" ftype="csv">
184 <assert_contents>
185 <has_text text="g1k_0"/>
186 <has_text text="g4k_7"/>
187 </assert_contents>
188 </output>
189 <output name="single_data_split" ftype="csv">
190 <assert_contents>
191 <has_text text="g1k_0_1"/>
192 <has_text text="g1k_6_2"/>
193 </assert_contents>
194 </output>
195 <output name="single_cov" ftype="csv">
196 <assert_contents>
197 <has_text text="g1k_0"/>
198 <has_text text="0.027"/>
199 </assert_contents>
200 </output>
201 <output name="single_split_cov" ftype="csv">
202 <assert_contents>
203 <has_size value="1" delta="1"/>
204 </assert_contents>
205 </output>
206 </test>
207 <test expect_num_outputs="3">
208 <conditional name="mode">
209 <param name="select" value="co"/>
210 <param name="input_fasta" ftype="fasta" value="input_single.fasta"/>
211 <param name="input_bam" ftype="bam" value="input_coassembly_sorted1.bam,input_coassembly_sorted2.bam,input_coassembly_sorted3.bam,input_coassembly_sorted4.bam,input_coassembly_sorted5.bam"/>
212 <conditional name="ref">
213 <param name="select" value="taxonomy"/>
214 <param name="taxonomy_annotation_table" value="taxonomy.tsv"/>
215 </conditional>
216 </conditional>
217 <conditional name="min_len">
218 <param name="method" value="ratio"/>
219 <param name="ratio" value="0.05"/>
220 </conditional>
221 <param name="orf_finder" value="fraggenescan"/>
222 <param name="random-seed" value="0"/>
223 <section name="annot">
224 <param name="ml_threshold" value=""/>
225 </section>
226 <section name="training">
227 <param name="epoches" value="20"/>
228 <param name="batch_size" value="2048"/>
229 </section>
230 <section name="bin">
231 <param name="max_node" value="1"/>
232 <param name="max_edges" value="200"/>
233 <param name="minfasta_kbs" value="200"/>
234 <param name="no_recluster" value="true"/>
235 </section>
236 <param name="extra_output" value="coverage"/>
237 <output_collection name="output_bins" count="3">
238 <element name="0" ftype="fasta">
239 <assert_contents>
240 <has_text text=">g1k_0"/>
241 </assert_contents>
242 </element>
243 <element name="1" ftype="fasta">
244 <assert_contents>
245 <has_text text=">g2k_0"/>
246 </assert_contents>
247 </element>
248 <element name="2" ftype="fasta">
249 <assert_contents>
250 <has_text text=">g3k_0"/>
251 </assert_contents>
252 </element>
253 </output_collection>
254 <output_collection name="co_cov" count="5">
255 <element name="0" ftype="csv">
256 <assert_contents>
257 <has_text text="g1k_0"/>
258 <has_text text="g2k_7"/>
259 </assert_contents>
260 </element>
261 <element name="1" ftype="csv">
262 <assert_contents>
263 <has_text text="g1k_0"/>
264 <has_text text="g2k_7"/>
265 </assert_contents>
266 </element>
267 <element name="4" ftype="csv">
268 <assert_contents>
269 <has_text text="g1k_0"/>
270 <has_text text="g2k_7"/>
271 </assert_contents>
272 </element>
273 </output_collection>
274 <output_collection name="co_split_cov" count="5">
275 <element name="0" ftype="csv">
276 <assert_contents>
277 <has_text text="g1k_0_1"/>
278 <has_text text="g2k_7_2"/>
279 </assert_contents>
280 </element>
281 <element name="1" ftype="csv">
282 <assert_contents>
283 <has_text text="g1k_0_1"/>
284 <has_text text="g2k_7_2"/>
285 </assert_contents>
286 </element>
287 <element name="2" ftype="csv">
288 <assert_contents>
289 <has_text text="g1k_0_1"/>
290 <has_text text="g2k_7_2"/>
291 </assert_contents>
292 </element>
293 </output_collection>
294 </test>
295 <test expect_num_outputs="1">
296 <conditional name="mode">
297 <param name="select" value="single"/>
298 <param name="input_fasta" ftype="fasta" value="input_single.fasta"/>
299 <param name="input_bam" ftype="bam" value="input_single.bam"/>
300 <conditional name="ref">
301 <param name="db_selector" value="cached"/>
302 <param name="cached_db" value="test-db"/>
303 </conditional>
304 </conditional>
305 <conditional name="min_len">
306 <param name="method" value="ratio"/>
307 <param name="ratio" value="0.05"/>
308 </conditional>
309 <param name="orf_finder" value="fraggenescan"/>
310 <param name="random-seed" value="0"/>
311 <section name="annot">
312 <param name="ml_threshold" value=""/>
313 </section>
314 <section name="training">
315 <param name="epoches" value="20"/>
316 <param name="batch_size" value="2048"/>
317 </section>
318 <section name="bin">
319 <param name="max_node" value="1"/>
320 <param name="max_edges" value="200"/>
321 <param name="minfasta_kbs" value="200"/>
322 <param name="no_recluster" value="true"/>
323 </section>
324 <param name="extra_output" value=""/>
325 <output_collection name="output_bins" count="3">
326 <element name="0" ftype="fasta">
327 <assert_contents>
328 <has_text text=">g1k_0"/>
329 </assert_contents>
330 </element>
331 </output_collection>
332 </test>
333 <test expect_num_outputs="8">
334 <conditional name="mode">
335 <param name="select" value="multi"/>
336 <conditional name="multi_fasta">
337 <param name="select" value="concatenated"/>
338 <param name="input_fasta" ftype="fasta" value="input_multi.fasta.gz"/>
339 </conditional>
340 <param name="input_bam" ftype="bam" value="input_multi_sorted1.bam,input_multi_sorted2.bam,input_multi_sorted3.bam,input_multi_sorted4.bam,input_multi_sorted5.bam,input_multi_sorted6.bam,input_multi_sorted7.bam,input_multi_sorted8.bam,input_multi_sorted9.bam,input_multi_sorted10.bam"/>
341 <conditional name="ref">
342 <param name="select" value="taxonomy"/>
343 <param name="taxonomy_annotation_table" value="taxonomy.tsv,taxonomy_2.tsv,taxonomy_3.tsv,taxonomy_4.tsv,taxonomy_5.tsv,taxonomy_6.tsv,taxonomy_7.tsv,taxonomy_8.tsv,taxonomy_9.tsv,taxonomy_10.tsv"/>
344 </conditional>
345 </conditional>
346 <conditional name="min_len">
347 <param name="method" value="ratio"/>
348 <param name="ratio" value="0.05"/>
349 </conditional>
350 <param name="orf_finder" value="fraggenescan"/>
351 <param name="random_seed" value="0"/>
352 <section name="annot">
353 <param name="ml_threshold" value=""/>
354 </section>
355 <section name="training">
356 <param name="epoches" value="20"/>
357 <param name="batch_size" value="2048"/>
358 </section>
359 <section name="bin">
360 <param name="max_node" value="1"/>
361 <param name="max_edges" value="200"/>
362 <param name="minfasta_kbs" value="200"/>
363 <param name="no_recluster" value="true"/>
364 </section>
365 <param name="extra_output" value="data,coverage,contigs"/>
366 <output_collection name="multi_bins" count="2">
367 <element name="0" ftype="fasta">
368 <assert_contents>
369 <has_text text=">g1k_0"/>
370 </assert_contents>
371 </element>
372 </output_collection>
373 <output_collection name="multi_contigs" count="10">
374 <element name="S8" ftype="fasta">
375 <assert_contents>
376 <has_text text=">g1k_0"/>
377 </assert_contents>
378 </element>
379 </output_collection>
380 <output_collection name="multi_data" count="10">
381 <element name="S8" ftype="csv">
382 <assert_contents>
383 <has_text text="g1k_0,"/>
384 </assert_contents>
385 </element>
386 </output_collection>
387 <output_collection name="multi_cov" count="10">
388 <element name="8" ftype="csv">
389 <assert_contents>
390 <has_text text="S1:g1k_5,"/>
391 </assert_contents>
392 </element>
393 </output_collection>
394 <output_collection name="multi_cov_sample" count="10">
395 <element name="S8" ftype="csv">
396 <assert_contents>
397 <has_text text="g1k_3"/>
398 </assert_contents>
399 </element>
400 </output_collection>
401 <output_collection name="multi_split_cov" count="10">
402 <element name="8" ftype="csv">
403 <assert_contents>
404 <has_text text="S1:g1k_5_1,0."/>
405 </assert_contents>
406 </element>
407 </output_collection>
408 <output_collection name="multi_split_cov_sample" count="10">
409 <element name="S8" ftype="csv">
410 <assert_contents>
411 <has_text text="g1k_3_1"/>
412 </assert_contents>
413 </element>
414 </output_collection>
415 <output_collection name="multi_contigs" count="10">
416 <element name="S8" ftype="fasta">
417 <assert_contents>
418 <has_text text=">g1k_0"/>
419 </assert_contents>
420 </element>
421 </output_collection>
422 </test>
423 </tests>
424 <help><![CDATA[
425 @HELP_HEADER@
426
427 Inputs
428 ======
429
430 @HELP_INPUT_FASTA@
431 @HELP_INPUT_BAM@
432
433 ]]></help>
434 <expand macro="citations"/>
435 </tool>