Mercurial > repos > iuc > semibin
changeset 3:b5a7583b8db0 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/semibin commit a9fc83e0029266f910b549d5d1eef6a9bc3e3f7b
author | iuc |
---|---|
date | Tue, 25 Mar 2025 15:55:28 +0000 |
parents | 99ff9221182c |
children | |
files | macros.xml semibin.xml |
diffstat | 2 files changed, 167 insertions(+), 34 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Fri Nov 10 20:50:01 2023 +0000 +++ b/macros.xml Tue Mar 25 15:55:28 2025 +0000 @@ -1,7 +1,7 @@ <?xml version="1.0"?> <macros> <token name="@TOOL_VERSION@">2.0.2</token> - <token name="@VERSION_SUFFIX@">0</token> + <token name="@VERSION_SUFFIX@">1</token> <token name="@PROFILE@">21.01</token> <xml name="biotools"> <xrefs> @@ -140,9 +140,16 @@ #end for #end if ]]></token> + <xml name="ref_select_cannot"> + <param name="select" type="select" label="Reference database"> + <option value="cached">Cached database</option> + <option value="taxonomy">Pre-computed taxonomy</option> + </param> + </xml> <xml name="ref_select"> <param name="select" type="select" label="Reference database"> - <option value="cached" selected="true">Cached database</option> + <option value="ml" selected="true">Use SemiBin ML function</option> + <option value="cached">Cached database</option> <option value="taxonomy">Pre-computed taxonomy</option> </param> </xml> @@ -153,6 +160,28 @@ </options> </param> </xml> + <xml name="ref-single-cannot"> + <conditional name="ref"> + <expand macro="ref_select_cannot"/> + <when value="cached"> + <expand macro="cached_db"/> + </when> + <when value="taxonomy"> + <param argument="--taxonomy-annotation-table" type="data" format="tabular" label="Pre-computed mmseqs2 format taxonomy TSV file"/> + </when> + </conditional> + </xml> + <xml name="ref-multi-cannot"> + <conditional name="ref"> + <expand macro="ref_select_cannot"/> + <when value="cached"> + <expand macro="cached_db"/> + </when> + <when value="taxonomy"> + <param argument="--taxonomy-annotation-table" type="data" format="tabular" multiple="true" label="Pre-computed mmseqs2 format taxonomy TSV file" help="One per bin file"/> + </when> + </conditional> + </xml> <xml name="ref-single"> <conditional name="ref"> <expand macro="ref_select"/> @@ -162,6 +191,7 @@ <when value="taxonomy"> <param argument="--taxonomy-annotation-table" type="data" format="tabular" label="Pre-computed mmseqs2 format taxonomy TSV file"/> </when> + <when value="ml"/> </conditional> </xml> <xml name="ref-multi"> @@ -173,6 +203,7 @@ <when value="taxonomy"> <param argument="--taxonomy-annotation-table" type="data" format="tabular" multiple="true" label="Pre-computed mmseqs2 format taxonomy TSV file" help="One per bin file"/> </when> + <when value="ml"/> </conditional> </xml> <xml name="ref_single"> @@ -184,6 +215,7 @@ <when value="taxonomy"> <param argument="--taxonomy-annotation-table" type="data" format="tabular" label="Pre-computed mmseqs2 format taxonomy TSV file"/> </when> + <when value="ml"/> </conditional> </xml> <xml name="min_len">
--- a/semibin.xml Fri Nov 10 20:50:01 2023 +0000 +++ b/semibin.xml Tue Mar 25 15:55:28 2025 +0000 @@ -18,17 +18,19 @@ #if $mode.select == 'single' and str($mode.environment) != '' --environment '$mode.environment' #end if - #if $mode.ref.select == "cached" + #if $mode.ref.select == "cached": --reference-db-data-dir '$mode.ref.cached_db.fields.path' - #else + #end if + #if $mode.ref.select == "taxonomy" --taxonomy-annotation-table '$mode.ref.taxonomy_annotation_table' #end if #else multi_easy_bin --separator '$separator' - #if $mode.ref.select == "cached" + #if $mode.ref.select == "cached": --reference-db-data-dir '$mode.ref.cached_db.fields.path' - #else + #end if + #if $mode.ref.select == "taxonomy" --taxonomy-annotation-table #for $e in $mode.ref.taxonomy_annotation_table '$e' @@ -43,7 +45,7 @@ --orf-finder '$orf_finder' --random-seed $random_seed -#if str($annot.ml_threshold) != '' +#if $annot.ml_threshold: --ml-threshold $annot.ml_threshold #end if --epoches $training.epoches @@ -51,7 +53,7 @@ --max-node $bin.max_node --max-edges $bin.max_edges --minfasta-kbs $bin.minfasta_kbs -#if ($mode.select == 'single' or $mode.select == 'co') and "pre_reclustering_bins" in $extra_output +#if ($mode.select == 'single' or $mode.select == 'co') and $extra_output and "pre_reclustering_bins" in $extra_output --write-pre-reclustering-bins #end if --compression none @@ -153,10 +155,58 @@ <param name="min_len" value="0" /> </conditional> <param name="orf_finder" value="prodigal"/> - <param name="random-seed" value="0"/> - <section name="annot"> - <param name="ml_threshold" value=""/> + <param name="random_seed" value="0"/> + <section name="training"> + <param name="epoches" value="20"/> + <param name="batch_size" value="2048"/> + </section> + <section name="bin"> + <param name="max_node" value="1"/> + <param name="max_edges" value="200"/> + <param name="minfasta_kbs" value="200"/> </section> + <param name="extra_output" value="data,coverage,contigs"/> + <output_collection name="output_bins" count="0"/> + <output name="single_data" ftype="csv"> + <assert_contents> + <has_text text="g1k_0"/> + <has_text text="g4k_7"/> + </assert_contents> + </output> + <output name="single_data_split" ftype="csv"> + <assert_contents> + <has_text text="g1k_0_1"/> + <has_text text="g1k_6_2"/> + </assert_contents> + </output> + <output name="single_cov" ftype="csv"> + <assert_contents> + <has_text text="g1k_0"/> + <has_text text="0.027"/> + </assert_contents> + </output> + <output name="single_split_cov" ftype="csv"> + <assert_contents> + <has_size value="1" delta="1"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="5"> + <conditional name="mode"> + <param name="select" value="single"/> + <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> + <param name="input_bam" ftype="bam" value="input_single.bam"/> + <conditional name="ref"> + <param name="select" value="ml"/> + </conditional> + <param name="environment" value="human_gut"/> + </conditional> + <conditional name="min_len"> + <param name="method" value="min-len"/> + <param name="min_len" value="0" /> + </conditional> + <param name="orf_finder" value="prodigal"/> + <param name="random_seed" value="0"/> <section name="training"> <param name="epoches" value="20"/> <param name="batch_size" value="2048"/> @@ -198,6 +248,73 @@ <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> <param name="input_bam" ftype="bam" value="input_coassembly_sorted1.bam,input_coassembly_sorted2.bam,input_coassembly_sorted3.bam,input_coassembly_sorted4.bam,input_coassembly_sorted5.bam"/> <conditional name="ref"> + <param name="select" value="ml"/> + </conditional> + </conditional> + <conditional name="min_len"> + <param name="method" value="ratio"/> + <param name="ratio" value="0.05"/> + </conditional> + <param name="orf_finder" value="fast-naive"/> + <param name="random_seed" value="0"/> + <section name="training"> + <param name="epoches" value="20"/> + <param name="batch_size" value="2048"/> + </section> + <section name="bin"> + <param name="max_node" value="1"/> + <param name="max_edges" value="200"/> + <param name="minfasta_kbs" value="200"/> + </section> + <param name="extra_output" value="coverage"/> + <output_collection name="output_bins" count="0"/> + <output_collection name="co_cov" count="5"> + <element name="0" ftype="csv"> + <assert_contents> + <has_text text="g1k_0"/> + <has_text text="g2k_7"/> + </assert_contents> + </element> + <element name="1" ftype="csv"> + <assert_contents> + <has_text text="g1k_0"/> + <has_text text="g2k_7"/> + </assert_contents> + </element> + <element name="4" ftype="csv"> + <assert_contents> + <has_text text="g1k_0"/> + <has_text text="g2k_7"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="co_split_cov" count="5"> + <element name="0" ftype="csv"> + <assert_contents> + <has_text text="g1k_0_1"/> + <has_text text="g2k_7_2"/> + </assert_contents> + </element> + <element name="1" ftype="csv"> + <assert_contents> + <has_text text="g1k_0_1"/> + <has_text text="g2k_7_2"/> + </assert_contents> + </element> + <element name="2" ftype="csv"> + <assert_contents> + <has_text text="g1k_0_1"/> + <has_text text="g2k_7_2"/> + </assert_contents> + </element> + </output_collection> + </test> + <test expect_num_outputs="3"> + <conditional name="mode"> + <param name="select" value="co"/> + <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> + <param name="input_bam" ftype="bam" value="input_coassembly_sorted1.bam,input_coassembly_sorted2.bam,input_coassembly_sorted3.bam,input_coassembly_sorted4.bam,input_coassembly_sorted5.bam"/> + <conditional name="ref"> <param name="select" value="taxonomy"/> <param name="taxonomy_annotation_table" value="taxonomy.tsv"/> </conditional> @@ -207,10 +324,7 @@ <param name="ratio" value="0.05"/> </conditional> <param name="orf_finder" value="fast-naive"/> - <param name="random-seed" value="0"/> - <section name="annot"> - <param name="ml_threshold" value=""/> - </section> + <param name="random_seed" value="0"/> <section name="training"> <param name="epoches" value="20"/> <param name="batch_size" value="2048"/> @@ -278,10 +392,7 @@ <param name="ratio" value="0.05"/> </conditional> <param name="orf_finder" value="fraggenescan"/> - <param name="random-seed" value="0"/> - <section name="annot"> - <param name="ml_threshold" value=""/> - </section> + <param name="random_seed" value="0"/> <section name="training"> <param name="epoches" value="20"/> <param name="batch_size" value="2048"/> @@ -340,7 +451,7 @@ <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> <param name="input_bam" ftype="bam" value="input_single.bam"/> <conditional name="ref"> - <param name="db_selector" value="cached"/> + <param name="select" value="cached"/> <param name="cached_db" value="test-db"/> </conditional> </conditional> @@ -349,10 +460,7 @@ <param name="ratio" value="0.05"/> </conditional> <param name="orf_finder" value="fraggenescan"/> - <param name="random-seed" value="0"/> - <section name="annot"> - <param name="ml_threshold" value=""/> - </section> + <param name="random_seed" value="0"/> <section name="training"> <param name="epoches" value="20"/> <param name="batch_size" value="2048"/> @@ -362,7 +470,6 @@ <param name="max_edges" value="200"/> <param name="minfasta_kbs" value="200"/> </section> - <param name="extra_output" value=""/> <output_collection name="output_bins" count="1"> <element name="SemiBin_30" ftype="fasta"> <assert_contents> @@ -377,7 +484,7 @@ <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> <param name="input_bam" ftype="bam" value="input_single.bam"/> <conditional name="ref"> - <param name="db_selector" value="cached"/> + <param name="select" value="cached"/> <param name="cached_db" value="test-db"/> </conditional> </conditional> @@ -386,10 +493,7 @@ <param name="ratio" value="0.05"/> </conditional> <param name="orf_finder" value="fraggenescan"/> - <param name="random-seed" value="0"/> - <section name="annot"> - <param name="ml_threshold" value=""/> - </section> + <param name="random_seed" value="0"/> <section name="training"> <param name="epoches" value="20"/> <param name="batch_size" value="2048"/> @@ -444,9 +548,6 @@ </conditional> <param name="orf_finder" value="fraggenescan"/> <param name="random_seed" value="0"/> - <section name="annot"> - <param name="ml_threshold" value=""/> - </section> <section name="training"> <param name="epoches" value="20"/> <param name="batch_size" value="2048"/> @@ -520,4 +621,4 @@ ]]></help> <expand macro="citations"/> -</tool> +</tool> \ No newline at end of file