Mercurial > repos > iuc > semibin
diff semibin.xml @ 0:7b382efabb98 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
author | iuc |
---|---|
date | Fri, 14 Oct 2022 21:38:26 +0000 |
parents | |
children | 6b517dc161e4 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/semibin.xml Fri Oct 14 21:38:26 2022 +0000 @@ -0,0 +1,435 @@ +<tool id="semibin" name="SemiBin" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description> + for Semi-supervised Metagenomic Binning + </description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="biotools"/> + <expand macro="requirements"/> + <expand macro="version"/> + <command detect_errors="exit_code"><![CDATA[ +#import re +@BAM_FILES@ +@FASTA_FILES@ + +SemiBin +#if $mode.select == 'single' or $mode.select == 'co' + single_easy_bin + #if $mode.select == 'single' and str($mode.environment) != '' + --environment '$mode.environment' + #end if + #if $mode.ref.select == "cached" + --reference-db-data-dir '$mode.ref.cached_db.fields.path' + #else + --taxonomy-annotation-table '$mode.ref.taxonomy_annotation_table' + #end if +#else + multi_easy_bin + --separator '$separator' + #if $mode.ref.select == "cached" + --reference-db-data-dir '$mode.ref.cached_db.fields.path' + #else + --taxonomy-annotation-table + #for $e in $mode.ref.taxonomy_annotation_table + '$e' + #end for + #end if +#end if + --input-fasta 'contigs.fasta' + --input-bam *.bam + --output 'output' + --cannot-name 'cannot' + @MIN_LEN@ + --orf-finder '$orf_finder' + --random-seed $random_seed + +#if str($annot.ml_threshold) != '' + --ml-threshold $annot.ml_threshold +#end if + --epoches $training.epoches + --batch-size $training.batch_size + --max-node $bin.max_node + --max-edges $bin.max_edges + --minfasta-kbs $bin.minfasta_kbs + $bin.no_recluster + --threads \${GALAXY_SLOTS:-1} + --processes \${GALAXY_SLOTS:-1} +&& +echo "output" && +ls output + ]]></command> + <inputs> + <conditional name="mode"> + <expand macro="mode_select"/> + <when value="single"> + <expand macro="input-fasta-single"/> + <expand macro="input-bam-single"/> + <expand macro="ref-single"/> + <expand macro="environment"/> + </when> + <when value="co"> + <expand macro="input-fasta-single"/> + <expand macro="input-bam-multi"/> + <expand macro="ref-single"/> + </when> + <when value="multi"> + <expand macro="input-fasta-multi"/> + <expand macro="input-bam-multi"/> + <expand macro="ref-multi"/> + </when> + </conditional> + <expand macro="min_len"/> + <expand macro="orf-finder"/> + <expand macro="random-seed"/> + <section name="annot" title="Contig annotations" expanded="true"> + <expand macro="ml-threshold"/> + </section> + <section name="training" title="Training"> + <expand macro="epoches"/> + <expand macro="batch-size"/> + </section> + <section name="bin" title="Binning"> + <expand macro="max-node"/> + <expand macro="max-edges"/> + <expand macro="minfasta-kbs"/> + <expand macro="no-recluster"/> + </section> + <param name="extra_output" type="select" multiple="true" optional="true" label="Extra outputs" help="In addition to the training data"> + <option value="data">Training data</option> + <option value="coverage">Coverage files</option> + <option value="contigs">Contigs (if multiple sample)</option> + </param> + </inputs> + <outputs> + <collection name="output_recluster_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins after reclustering"> + <filter>not bin["no_recluster"]</filter> + <discover_datasets pattern=".*?\.(?P<designation>.*).fa" format="fasta" directory="output/output_recluster_bins" /> + </collection> + <collection name="output_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins before reclustering"> + <filter>mode["select"]!="multi"</filter> + <discover_datasets pattern=".*?\.(?P<designation>.*).fa" format="fasta" directory="output/output_bins" /> + </collection> + <collection name="multi_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins before reclustering"> + <filter>mode["select"]=="multi"</filter> + <discover_datasets pattern=".*?\.(?P<designation>.*).fa" format="fasta" directory="output/bins" /> + </collection> + <data name="single_data" format="csv" from_work_dir="output/data.csv" label="${tool.name} on ${on_string}: Training data"> + <filter>(mode["select"]=="single" or mode["select"]=="co") and extra_output and "data" in extra_output</filter> + </data> + <data name="single_data_split" format="csv" from_work_dir="output/data_split.csv" label="${tool.name} on ${on_string}: Split training data"> + <filter>(mode["select"]=="single" or mode["select"]=="co") and extra_output and "data" in extra_output</filter> + </data> + <collection name="multi_data" type="list" label="${tool.name} on ${on_string}: Training data per sample"> + <filter>mode["select"]=="multi" and extra_output and "data" in extra_output</filter> + <discover_datasets pattern="(?P<designation>.*)\/data.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/> + </collection> + <collection name="multi_data_split" type="list" label="${tool.name} on ${on_string}: Split training data per sample"> + <filter>mode["select"]=="multi" and extra_output and "data" in extra_output</filter> + <discover_datasets pattern="(?P<designation>.*)\/data_split.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/> + </collection> + <expand macro="generate_sequence_features_extra_outputs"/> + </outputs> + <tests> + <test expect_num_outputs="6"> + <conditional name="mode"> + <param name="select" value="single"/> + <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> + <param name="input_bam" ftype="bam" value="input_single.bam"/> + <conditional name="ref"> + <param name="select" value="taxonomy"/> + <param name="taxonomy_annotation_table" value="taxonomy.tsv"/> + </conditional> + <param name="environment" value="human_gut"/> + </conditional> + <conditional name="min_len"> + <param name="method" value="min-len"/> + <param name="min_len" value="0" /> + </conditional> + <param name="orf_finder" value="prodigal"/> + <param name="random-seed" value="0"/> + <section name="annot"> + <param name="ml_threshold" value=""/> + </section> + <section name="training"> + <param name="epoches" value="20"/> + <param name="batch_size" value="2048"/> + </section> + <section name="bin"> + <param name="max_node" value="1"/> + <param name="max_edges" value="200"/> + <param name="minfasta_kbs" value="200"/> + <param name="no_recluster" value="false"/> + </section> + <param name="extra_output" value="data,coverage,contigs"/> + <output_collection name="output_recluster_bins" count="0"/> + <output_collection name="output_bins" count="3"> + <element name="0" ftype="fasta"> + <assert_contents> + <has_text text=">g1k_0"/> + </assert_contents> + </element> + <element name="1" ftype="fasta"> + <assert_contents> + <has_text text=">g2k_0"/> + </assert_contents> + </element> + <element name="2" ftype="fasta"> + <assert_contents> + <has_text text=">g3k_0"/> + </assert_contents> + </element> + </output_collection> + <output name="single_data" ftype="csv"> + <assert_contents> + <has_text text="g1k_0"/> + <has_text text="g4k_7"/> + </assert_contents> + </output> + <output name="single_data_split" ftype="csv"> + <assert_contents> + <has_text text="g1k_0_1"/> + <has_text text="g1k_6_2"/> + </assert_contents> + </output> + <output name="single_cov" ftype="csv"> + <assert_contents> + <has_text text="g1k_0"/> + <has_text text="0.027"/> + </assert_contents> + </output> + <output name="single_split_cov" ftype="csv"> + <assert_contents> + <has_size value="1" delta="1"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="3"> + <conditional name="mode"> + <param name="select" value="co"/> + <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> + <param name="input_bam" ftype="bam" value="input_coassembly_sorted1.bam,input_coassembly_sorted2.bam,input_coassembly_sorted3.bam,input_coassembly_sorted4.bam,input_coassembly_sorted5.bam"/> + <conditional name="ref"> + <param name="select" value="taxonomy"/> + <param name="taxonomy_annotation_table" value="taxonomy.tsv"/> + </conditional> + </conditional> + <conditional name="min_len"> + <param name="method" value="ratio"/> + <param name="ratio" value="0.05"/> + </conditional> + <param name="orf_finder" value="fraggenescan"/> + <param name="random-seed" value="0"/> + <section name="annot"> + <param name="ml_threshold" value=""/> + </section> + <section name="training"> + <param name="epoches" value="20"/> + <param name="batch_size" value="2048"/> + </section> + <section name="bin"> + <param name="max_node" value="1"/> + <param name="max_edges" value="200"/> + <param name="minfasta_kbs" value="200"/> + <param name="no_recluster" value="true"/> + </section> + <param name="extra_output" value="coverage"/> + <output_collection name="output_bins" count="3"> + <element name="0" ftype="fasta"> + <assert_contents> + <has_text text=">g1k_0"/> + </assert_contents> + </element> + <element name="1" ftype="fasta"> + <assert_contents> + <has_text text=">g2k_0"/> + </assert_contents> + </element> + <element name="2" ftype="fasta"> + <assert_contents> + <has_text text=">g3k_0"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="co_cov" count="5"> + <element name="0" ftype="csv"> + <assert_contents> + <has_text text="g1k_0"/> + <has_text text="g2k_7"/> + </assert_contents> + </element> + <element name="1" ftype="csv"> + <assert_contents> + <has_text text="g1k_0"/> + <has_text text="g2k_7"/> + </assert_contents> + </element> + <element name="4" ftype="csv"> + <assert_contents> + <has_text text="g1k_0"/> + <has_text text="g2k_7"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="co_split_cov" count="5"> + <element name="0" ftype="csv"> + <assert_contents> + <has_text text="g1k_0_1"/> + <has_text text="g2k_7_2"/> + </assert_contents> + </element> + <element name="1" ftype="csv"> + <assert_contents> + <has_text text="g1k_0_1"/> + <has_text text="g2k_7_2"/> + </assert_contents> + </element> + <element name="2" ftype="csv"> + <assert_contents> + <has_text text="g1k_0_1"/> + <has_text text="g2k_7_2"/> + </assert_contents> + </element> + </output_collection> + </test> + <test expect_num_outputs="1"> + <conditional name="mode"> + <param name="select" value="single"/> + <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> + <param name="input_bam" ftype="bam" value="input_single.bam"/> + <conditional name="ref"> + <param name="db_selector" value="cached"/> + <param name="cached_db" value="test-db"/> + </conditional> + </conditional> + <conditional name="min_len"> + <param name="method" value="ratio"/> + <param name="ratio" value="0.05"/> + </conditional> + <param name="orf_finder" value="fraggenescan"/> + <param name="random-seed" value="0"/> + <section name="annot"> + <param name="ml_threshold" value=""/> + </section> + <section name="training"> + <param name="epoches" value="20"/> + <param name="batch_size" value="2048"/> + </section> + <section name="bin"> + <param name="max_node" value="1"/> + <param name="max_edges" value="200"/> + <param name="minfasta_kbs" value="200"/> + <param name="no_recluster" value="true"/> + </section> + <param name="extra_output" value=""/> + <output_collection name="output_bins" count="3"> + <element name="0" ftype="fasta"> + <assert_contents> + <has_text text=">g1k_0"/> + </assert_contents> + </element> + </output_collection> + </test> + <test expect_num_outputs="8"> + <conditional name="mode"> + <param name="select" value="multi"/> + <conditional name="multi_fasta"> + <param name="select" value="concatenated"/> + <param name="input_fasta" ftype="fasta" value="input_multi.fasta.gz"/> + </conditional> + <param name="input_bam" ftype="bam" value="input_multi_sorted1.bam,input_multi_sorted2.bam,input_multi_sorted3.bam,input_multi_sorted4.bam,input_multi_sorted5.bam,input_multi_sorted6.bam,input_multi_sorted7.bam,input_multi_sorted8.bam,input_multi_sorted9.bam,input_multi_sorted10.bam"/> + <conditional name="ref"> + <param name="select" value="taxonomy"/> + <param name="taxonomy_annotation_table" value="taxonomy.tsv,taxonomy_2.tsv,taxonomy_3.tsv,taxonomy_4.tsv,taxonomy_5.tsv,taxonomy_6.tsv,taxonomy_7.tsv,taxonomy_8.tsv,taxonomy_9.tsv,taxonomy_10.tsv"/> + </conditional> + </conditional> + <conditional name="min_len"> + <param name="method" value="ratio"/> + <param name="ratio" value="0.05"/> + </conditional> + <param name="orf_finder" value="fraggenescan"/> + <param name="random_seed" value="0"/> + <section name="annot"> + <param name="ml_threshold" value=""/> + </section> + <section name="training"> + <param name="epoches" value="20"/> + <param name="batch_size" value="2048"/> + </section> + <section name="bin"> + <param name="max_node" value="1"/> + <param name="max_edges" value="200"/> + <param name="minfasta_kbs" value="200"/> + <param name="no_recluster" value="true"/> + </section> + <param name="extra_output" value="data,coverage,contigs"/> + <output_collection name="multi_bins" count="2"> + <element name="0" ftype="fasta"> + <assert_contents> + <has_text text=">g1k_0"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="multi_contigs" count="10"> + <element name="S8" ftype="fasta"> + <assert_contents> + <has_text text=">g1k_0"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="multi_data" count="10"> + <element name="S8" ftype="csv"> + <assert_contents> + <has_text text="g1k_0,"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="multi_cov" count="10"> + <element name="8" ftype="csv"> + <assert_contents> + <has_text text="S1:g1k_5,"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="multi_cov_sample" count="10"> + <element name="S8" ftype="csv"> + <assert_contents> + <has_text text="g1k_3"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="multi_split_cov" count="10"> + <element name="8" ftype="csv"> + <assert_contents> + <has_text text="S1:g1k_5_1,0."/> + </assert_contents> + </element> + </output_collection> + <output_collection name="multi_split_cov_sample" count="10"> + <element name="S8" ftype="csv"> + <assert_contents> + <has_text text="g1k_3_1"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="multi_contigs" count="10"> + <element name="S8" ftype="fasta"> + <assert_contents> + <has_text text=">g1k_0"/> + </assert_contents> + </element> + </output_collection> + </test> + </tests> + <help><![CDATA[ +@HELP_HEADER@ + +Inputs +====== + +@HELP_INPUT_FASTA@ +@HELP_INPUT_BAM@ + + ]]></help> + <expand macro="citations"/> +</tool>