Mercurial > repos > iuc > semibin
comparison semibin.xml @ 2:99ff9221182c draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/semibin commit 13abac83068b126399ec415141007a48c2efaa84
author | iuc |
---|---|
date | Fri, 10 Nov 2023 20:50:01 +0000 |
parents | 6b517dc161e4 |
children |
comparison
equal
deleted
inserted
replaced
1:6b517dc161e4 | 2:99ff9221182c |
---|---|
10 <expand macro="version"/> | 10 <expand macro="version"/> |
11 <command detect_errors="exit_code"><![CDATA[ | 11 <command detect_errors="exit_code"><![CDATA[ |
12 #import re | 12 #import re |
13 @BAM_FILES@ | 13 @BAM_FILES@ |
14 @FASTA_FILES@ | 14 @FASTA_FILES@ |
15 | 15 SemiBin2 |
16 SemiBin | |
17 #if $mode.select == 'single' or $mode.select == 'co' | 16 #if $mode.select == 'single' or $mode.select == 'co' |
18 single_easy_bin | 17 single_easy_bin |
19 #if $mode.select == 'single' and str($mode.environment) != '' | 18 #if $mode.select == 'single' and str($mode.environment) != '' |
20 --environment '$mode.environment' | 19 --environment '$mode.environment' |
21 #end if | 20 #end if |
50 --epoches $training.epoches | 49 --epoches $training.epoches |
51 --batch-size $training.batch_size | 50 --batch-size $training.batch_size |
52 --max-node $bin.max_node | 51 --max-node $bin.max_node |
53 --max-edges $bin.max_edges | 52 --max-edges $bin.max_edges |
54 --minfasta-kbs $bin.minfasta_kbs | 53 --minfasta-kbs $bin.minfasta_kbs |
55 $bin.no_recluster | 54 #if ($mode.select == 'single' or $mode.select == 'co') and "pre_reclustering_bins" in $extra_output |
55 --write-pre-reclustering-bins | |
56 #end if | |
57 --compression none | |
56 --threads \${GALAXY_SLOTS:-1} | 58 --threads \${GALAXY_SLOTS:-1} |
57 --processes \${GALAXY_SLOTS:-1} | 59 --processes \${GALAXY_SLOTS:-1} |
58 && | 60 && |
59 echo "output" && | 61 echo "output" && |
60 ls output | 62 ls output |
91 </section> | 93 </section> |
92 <section name="bin" title="Binning"> | 94 <section name="bin" title="Binning"> |
93 <expand macro="max-node"/> | 95 <expand macro="max-node"/> |
94 <expand macro="max-edges"/> | 96 <expand macro="max-edges"/> |
95 <expand macro="minfasta-kbs"/> | 97 <expand macro="minfasta-kbs"/> |
96 <expand macro="no-recluster"/> | |
97 </section> | 98 </section> |
98 <param name="extra_output" type="select" multiple="true" optional="true" label="Extra outputs" help="In addition to the training data"> | 99 <param name="extra_output" type="select" multiple="true" optional="true" label="Extra outputs" help="In addition to the training data"> |
99 <option value="data">Training data</option> | 100 <option value="data">Training data</option> |
100 <option value="coverage">Coverage files</option> | 101 <option value="coverage">Coverage files</option> |
101 <option value="contigs">Contigs (if multiple sample)</option> | 102 <option value="contigs">Contigs (if multiple sample)</option> |
103 <option value="pre_reclustering_bins">Pre-reclustering bins (only single sample and co-assembly)</option> | |
102 </param> | 104 </param> |
103 </inputs> | 105 </inputs> |
104 <outputs> | 106 <outputs> |
105 <collection name="output_recluster_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins after reclustering"> | 107 <collection name="output_pre_recluster_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins before reclustering"> |
106 <filter>not bin["no_recluster"]</filter> | 108 <filter>mode["select"]!="multi" and extra_output and "pre_reclustering_bins" in extra_output</filter> |
107 <discover_datasets pattern=".*?\.(?P<designation>.*).fa" format="fasta" directory="output/output_recluster_bins" /> | 109 <discover_datasets pattern="(?P<designation>.*).fa" format="fasta" directory="output/output_prerecluster_bins"/> |
108 </collection> | 110 </collection> |
109 <collection name="output_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins before reclustering"> | 111 <collection name="output_after_recluster_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins after reclustering"> |
110 <filter>mode["select"]!="multi"</filter> | 112 <filter>mode["select"]!="multi" and extra_output and "pre_reclustering_bins" in extra_output</filter> |
111 <discover_datasets pattern=".*?\.(?P<designation>.*).fa" format="fasta" directory="output/output_bins" /> | 113 <discover_datasets pattern="(?P<designation>.*).fa" format="fasta" directory="output/output_recluster_bins"/> |
114 </collection> | |
115 <collection name="output_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins"> | |
116 <filter>mode["select"]!="multi" and not "pre_reclustering_bins" in extra_output</filter> | |
117 <discover_datasets pattern="(?P<designation>.*).fa" format="fasta" directory="output/output_bins"/> | |
112 </collection> | 118 </collection> |
113 <collection name="multi_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins before reclustering (multi_bins)"> | 119 <collection name="multi_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins before reclustering (multi_bins)"> |
114 <filter>mode["select"]=="multi"</filter> | 120 <filter>mode["select"]=="multi"</filter> |
115 <discover_datasets pattern=".*?\.(?P<designation>.*).fa" format="fasta" directory="output/bins" /> | 121 <discover_datasets pattern="(?P<designation>.*).fa" format="fasta" directory="output/bins"/> |
116 </collection> | 122 </collection> |
117 <data name="single_data" format="csv" from_work_dir="output/data.csv" label="${tool.name} on ${on_string}: Training data"> | 123 <data name="single_data" format="csv" from_work_dir="output/data.csv" label="${tool.name} on ${on_string}: Training data"> |
118 <filter>(mode["select"]=="single" or mode["select"]=="co") and extra_output and "data" in extra_output</filter> | 124 <filter>(mode["select"]=="single" or mode["select"]=="co") and extra_output and "data" in extra_output</filter> |
119 </data> | 125 </data> |
120 <data name="single_data_split" format="csv" from_work_dir="output/data_split.csv" label="${tool.name} on ${on_string}: Split training data"> | 126 <data name="single_data_split" format="csv" from_work_dir="output/data_split.csv" label="${tool.name} on ${on_string}: Split training data"> |
129 <discover_datasets pattern="(?P<designation>.*)\/data_split.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/> | 135 <discover_datasets pattern="(?P<designation>.*)\/data_split.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/> |
130 </collection> | 136 </collection> |
131 <expand macro="generate_sequence_features_extra_outputs"/> | 137 <expand macro="generate_sequence_features_extra_outputs"/> |
132 </outputs> | 138 </outputs> |
133 <tests> | 139 <tests> |
134 <test expect_num_outputs="6"> | 140 <test expect_num_outputs="5"> |
135 <conditional name="mode"> | 141 <conditional name="mode"> |
136 <param name="select" value="single"/> | 142 <param name="select" value="single"/> |
137 <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> | 143 <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> |
138 <param name="input_bam" ftype="bam" value="input_single.bam"/> | 144 <param name="input_bam" ftype="bam" value="input_single.bam"/> |
139 <conditional name="ref"> | 145 <conditional name="ref"> |
156 <param name="batch_size" value="2048"/> | 162 <param name="batch_size" value="2048"/> |
157 </section> | 163 </section> |
158 <section name="bin"> | 164 <section name="bin"> |
159 <param name="max_node" value="1"/> | 165 <param name="max_node" value="1"/> |
160 <param name="max_edges" value="200"/> | 166 <param name="max_edges" value="200"/> |
161 <param name="minfasta_kbs" value="2"/> | 167 <param name="minfasta_kbs" value="200"/> |
162 <param name="no_recluster" value="false"/> | |
163 </section> | 168 </section> |
164 <param name="extra_output" value="data,coverage,contigs"/> | 169 <param name="extra_output" value="data,coverage,contigs"/> |
165 <output_collection name="output_recluster_bins" count="39"> | 170 <output_collection name="output_bins" count="0"/> |
166 <element name="0" ftype="fasta"> | |
167 <assert_contents> | |
168 <has_text text=">g1k_0"/> | |
169 </assert_contents> | |
170 </element> | |
171 <element name="1" ftype="fasta"> | |
172 <assert_contents> | |
173 <has_text text=">g1k_1"/> | |
174 </assert_contents> | |
175 </element> | |
176 <element name="2" ftype="fasta"> | |
177 <assert_contents> | |
178 <has_text text=">g1k_2"/> | |
179 </assert_contents> | |
180 </element> | |
181 <element name="39" ftype="fasta"> | |
182 <assert_contents> | |
183 <has_text text=">g3k_9"/> | |
184 </assert_contents> | |
185 </element> | |
186 </output_collection> | |
187 <output_collection name="output_bins" count="0"> | |
188 <!--<element name="0" ftype="fasta"> | |
189 <assert_contents> | |
190 <has_text text=">g1k_0"/> | |
191 </assert_contents> | |
192 </element> | |
193 <element name="1" ftype="fasta"> | |
194 <assert_contents> | |
195 <has_text text=">g2k_0"/> | |
196 </assert_contents> | |
197 </element> | |
198 <element name="2" ftype="fasta"> | |
199 <assert_contents> | |
200 <has_text text=">g3k_0"/> | |
201 </assert_contents> | |
202 </element>--> | |
203 </output_collection> | |
204 <output name="single_data" ftype="csv"> | 171 <output name="single_data" ftype="csv"> |
205 <assert_contents> | 172 <assert_contents> |
206 <has_text text="g1k_0"/> | 173 <has_text text="g1k_0"/> |
207 <has_text text="g4k_7"/> | 174 <has_text text="g4k_7"/> |
208 </assert_contents> | 175 </assert_contents> |
237 </conditional> | 204 </conditional> |
238 <conditional name="min_len"> | 205 <conditional name="min_len"> |
239 <param name="method" value="ratio"/> | 206 <param name="method" value="ratio"/> |
240 <param name="ratio" value="0.05"/> | 207 <param name="ratio" value="0.05"/> |
241 </conditional> | 208 </conditional> |
242 <param name="orf_finder" value="fraggenescan"/> | 209 <param name="orf_finder" value="fast-naive"/> |
243 <param name="random-seed" value="0"/> | 210 <param name="random-seed" value="0"/> |
244 <section name="annot"> | 211 <section name="annot"> |
245 <param name="ml_threshold" value=""/> | 212 <param name="ml_threshold" value=""/> |
246 </section> | 213 </section> |
247 <section name="training"> | 214 <section name="training"> |
250 </section> | 217 </section> |
251 <section name="bin"> | 218 <section name="bin"> |
252 <param name="max_node" value="1"/> | 219 <param name="max_node" value="1"/> |
253 <param name="max_edges" value="200"/> | 220 <param name="max_edges" value="200"/> |
254 <param name="minfasta_kbs" value="200"/> | 221 <param name="minfasta_kbs" value="200"/> |
255 <param name="no_recluster" value="true"/> | |
256 </section> | 222 </section> |
257 <param name="extra_output" value="coverage"/> | 223 <param name="extra_output" value="coverage"/> |
258 <output_collection name="output_bins" count="3"> | 224 <output_collection name="output_bins" count="0"/> |
259 <element name="0" ftype="fasta"> | |
260 <assert_contents> | |
261 <has_text text=">g1k_0"/> | |
262 </assert_contents> | |
263 </element> | |
264 <element name="1" ftype="fasta"> | |
265 <assert_contents> | |
266 <has_text text=">g2k_0"/> | |
267 </assert_contents> | |
268 </element> | |
269 <element name="2" ftype="fasta"> | |
270 <assert_contents> | |
271 <has_text text=">g3k_0"/> | |
272 </assert_contents> | |
273 </element> | |
274 </output_collection> | |
275 <output_collection name="co_cov" count="5"> | 225 <output_collection name="co_cov" count="5"> |
276 <element name="0" ftype="csv"> | 226 <element name="0" ftype="csv"> |
277 <assert_contents> | 227 <assert_contents> |
278 <has_text text="g1k_0"/> | 228 <has_text text="g1k_0"/> |
279 <has_text text="g2k_7"/> | 229 <has_text text="g2k_7"/> |
311 <has_text text="g2k_7_2"/> | 261 <has_text text="g2k_7_2"/> |
312 </assert_contents> | 262 </assert_contents> |
313 </element> | 263 </element> |
314 </output_collection> | 264 </output_collection> |
315 </test> | 265 </test> |
316 <test expect_num_outputs="4"> | 266 <test expect_num_outputs="3"> |
317 <conditional name="mode"> | 267 <conditional name="mode"> |
318 <param name="select" value="co"/> | 268 <param name="select" value="co"/> |
319 <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> | 269 <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> |
320 <param name="input_bam" ftype="bam" value="input_coassembly_sorted1.bam,input_coassembly_sorted2.bam,input_coassembly_sorted3.bam,input_coassembly_sorted4.bam,input_coassembly_sorted5.bam"/> | 270 <param name="input_bam" ftype="bam" value="input_coassembly_sorted1.bam,input_coassembly_sorted2.bam,input_coassembly_sorted3.bam,input_coassembly_sorted4.bam,input_coassembly_sorted5.bam"/> |
321 <conditional name="ref"> | 271 <conditional name="ref"> |
338 </section> | 288 </section> |
339 <section name="bin"> | 289 <section name="bin"> |
340 <param name="max_node" value="1"/> | 290 <param name="max_node" value="1"/> |
341 <param name="max_edges" value="200"/> | 291 <param name="max_edges" value="200"/> |
342 <param name="minfasta_kbs" value="200"/> | 292 <param name="minfasta_kbs" value="200"/> |
343 <param name="no_recluster" value="false"/> | |
344 </section> | 293 </section> |
345 <param name="extra_output" value="coverage"/> | 294 <param name="extra_output" value="coverage"/> |
346 <output_collection name="output_recluster_bins" count="1"> | 295 <output_collection name="output_bins" count="0"/> |
347 <element name="30" ftype="fasta"> | |
348 <assert_contents> | |
349 <has_text text=">g3k_0"/> | |
350 </assert_contents> | |
351 </element> | |
352 </output_collection> | |
353 <output_collection name="co_cov" count="5"> | 296 <output_collection name="co_cov" count="5"> |
354 <element name="0" ftype="csv"> | 297 <element name="0" ftype="csv"> |
355 <assert_contents> | 298 <assert_contents> |
356 <has_text text="g1k_0"/> | 299 <has_text text="g1k_0"/> |
357 <has_text text="g2k_7"/> | 300 <has_text text="g2k_7"/> |
416 </section> | 359 </section> |
417 <section name="bin"> | 360 <section name="bin"> |
418 <param name="max_node" value="1"/> | 361 <param name="max_node" value="1"/> |
419 <param name="max_edges" value="200"/> | 362 <param name="max_edges" value="200"/> |
420 <param name="minfasta_kbs" value="200"/> | 363 <param name="minfasta_kbs" value="200"/> |
421 <param name="no_recluster" value="true"/> | |
422 </section> | 364 </section> |
423 <param name="extra_output" value=""/> | 365 <param name="extra_output" value=""/> |
424 <output_collection name="output_bins" count="3"> | 366 <output_collection name="output_bins" count="1"> |
425 <element name="0" ftype="fasta"> | 367 <element name="SemiBin_30" ftype="fasta"> |
426 <assert_contents> | 368 <assert_contents> |
427 <has_text text=">g1k_0"/> | 369 <has_text text=">g3k_0"/> |
428 </assert_contents> | 370 </assert_contents> |
429 </element> | 371 </element> |
430 </output_collection> | 372 </output_collection> |
431 </test> | 373 </test> |
432 <test expect_num_outputs="2"> | 374 <test expect_num_outputs="2"> |
454 </section> | 396 </section> |
455 <section name="bin"> | 397 <section name="bin"> |
456 <param name="max_node" value="1"/> | 398 <param name="max_node" value="1"/> |
457 <param name="max_edges" value="200"/> | 399 <param name="max_edges" value="200"/> |
458 <param name="minfasta_kbs" value="200"/> | 400 <param name="minfasta_kbs" value="200"/> |
459 <param name="no_recluster" value="false"/> | 401 </section> |
460 </section> | 402 <param name="extra_output" value="pre_reclustering_bins"/> |
461 <param name="extra_output" value=""/> | 403 <output_collection name="output_pre_recluster_bins" count="3"> |
462 <output_collection name="output_recluster_bins" count="1"> | 404 <element name="SemiBin_0" ftype="fasta"> |
463 <element name="30" ftype="fasta"> | 405 <assert_contents> |
464 <assert_contents> | 406 <has_text text="g1k_0"/> |
465 <has_text text=">g3k_0"/> | 407 </assert_contents> |
408 </element> | |
409 <element name="SemiBin_1" ftype="fasta"> | |
410 <assert_contents> | |
411 <has_text text="g2k_0"/> | |
412 </assert_contents> | |
413 </element> | |
414 <element name="SemiBin_2" ftype="fasta"> | |
415 <assert_contents> | |
416 <has_text text="g3k_0"/> | |
417 </assert_contents> | |
418 </element> | |
419 </output_collection> | |
420 <output_collection name="output_after_recluster_bins" count="1"> | |
421 <element name="SemiBin_30" ftype="fasta"> | |
422 <assert_contents> | |
423 <has_text text="g3k_0"/> | |
466 </assert_contents> | 424 </assert_contents> |
467 </element> | 425 </element> |
468 </output_collection> | 426 </output_collection> |
469 </test> | 427 </test> |
470 <test expect_num_outputs="8"> | 428 <test expect_num_outputs="8"> |
495 </section> | 453 </section> |
496 <section name="bin"> | 454 <section name="bin"> |
497 <param name="max_node" value="1"/> | 455 <param name="max_node" value="1"/> |
498 <param name="max_edges" value="200"/> | 456 <param name="max_edges" value="200"/> |
499 <param name="minfasta_kbs" value="200"/> | 457 <param name="minfasta_kbs" value="200"/> |
500 <param name="no_recluster" value="true"/> | |
501 </section> | 458 </section> |
502 <param name="extra_output" value="data,coverage,contigs"/> | 459 <param name="extra_output" value="data,coverage,contigs"/> |
503 <output_collection name="multi_bins" count="2"> | 460 <output_collection name="multi_bins" count="0"/> |
504 <element name="0" ftype="fasta"> | 461 <output_collection name="multi_data" count="10"> |
505 <assert_contents> | 462 <element name="S8" ftype="csv"> |
506 <has_text text=">g1k_0"/> | 463 <assert_contents> |
464 <has_text text="g1k_0,"/> | |
465 </assert_contents> | |
466 </element> | |
467 </output_collection> | |
468 <output_collection name="multi_data_split" count="10"> | |
469 <element name="S8" ftype="csv"> | |
470 <assert_contents> | |
471 <has_text text="g1k_0_1,"/> | |
472 </assert_contents> | |
473 </element> | |
474 </output_collection> | |
475 <output_collection name="multi_cov" count="10"> | |
476 <element name="8" ftype="csv"> | |
477 <assert_contents> | |
478 <has_text text="S1:g1k_5,"/> | |
479 </assert_contents> | |
480 </element> | |
481 </output_collection> | |
482 <output_collection name="multi_cov_sample" count="10"> | |
483 <element name="S8" ftype="csv"> | |
484 <assert_contents> | |
485 <has_text text="g1k_3"/> | |
486 </assert_contents> | |
487 </element> | |
488 </output_collection> | |
489 <output_collection name="multi_split_cov" count="10"> | |
490 <element name="8" ftype="csv"> | |
491 <assert_contents> | |
492 <has_text text="S1:g1k_5_1,0."/> | |
493 </assert_contents> | |
494 </element> | |
495 </output_collection> | |
496 <output_collection name="multi_split_cov_sample" count="10"> | |
497 <element name="S8" ftype="csv"> | |
498 <assert_contents> | |
499 <has_text text="g1k_3_1"/> | |
507 </assert_contents> | 500 </assert_contents> |
508 </element> | 501 </element> |
509 </output_collection> | 502 </output_collection> |
510 <output_collection name="multi_contigs" count="10"> | 503 <output_collection name="multi_contigs" count="10"> |
511 <element name="S8" ftype="fasta"> | 504 <element name="S8" ftype="fasta"> |
512 <assert_contents> | 505 <assert_contents> |
513 <has_text text=">g1k_0"/> | 506 <has_text text=">g1k_0"/> |
514 </assert_contents> | 507 </assert_contents> |
515 </element> | 508 </element> |
516 </output_collection> | 509 </output_collection> |
517 <output_collection name="multi_data" count="10"> | |
518 <element name="S8" ftype="csv"> | |
519 <assert_contents> | |
520 <has_text text="g1k_0,"/> | |
521 </assert_contents> | |
522 </element> | |
523 </output_collection> | |
524 <output_collection name="multi_cov" count="10"> | |
525 <element name="8" ftype="csv"> | |
526 <assert_contents> | |
527 <has_text text="S1:g1k_5,"/> | |
528 </assert_contents> | |
529 </element> | |
530 </output_collection> | |
531 <output_collection name="multi_cov_sample" count="10"> | |
532 <element name="S8" ftype="csv"> | |
533 <assert_contents> | |
534 <has_text text="g1k_3"/> | |
535 </assert_contents> | |
536 </element> | |
537 </output_collection> | |
538 <output_collection name="multi_split_cov" count="10"> | |
539 <element name="8" ftype="csv"> | |
540 <assert_contents> | |
541 <has_text text="S1:g1k_5_1,0."/> | |
542 </assert_contents> | |
543 </element> | |
544 </output_collection> | |
545 <output_collection name="multi_split_cov_sample" count="10"> | |
546 <element name="S8" ftype="csv"> | |
547 <assert_contents> | |
548 <has_text text="g1k_3_1"/> | |
549 </assert_contents> | |
550 </element> | |
551 </output_collection> | |
552 <output_collection name="multi_contigs" count="10"> | |
553 <element name="S8" ftype="fasta"> | |
554 <assert_contents> | |
555 <has_text text=">g1k_0"/> | |
556 </assert_contents> | |
557 </element> | |
558 </output_collection> | |
559 </test> | 510 </test> |
560 | |
561 </tests> | 511 </tests> |
562 <help><![CDATA[ | 512 <help><![CDATA[ |
563 @HELP_HEADER@ | 513 @HELP_HEADER@ |
564 | 514 |
565 Inputs | 515 Inputs |