diff semibin.xml @ 0:7b382efabb98 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
author iuc
date Fri, 14 Oct 2022 21:38:26 +0000
parents
children 6b517dc161e4
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/semibin.xml	Fri Oct 14 21:38:26 2022 +0000
@@ -0,0 +1,435 @@
+<tool id="semibin" name="SemiBin" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>
+        for Semi-supervised Metagenomic Binning
+    </description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="biotools"/>
+    <expand macro="requirements"/>
+    <expand macro="version"/>
+    <command detect_errors="exit_code"><![CDATA[
+#import re
+@BAM_FILES@
+@FASTA_FILES@
+
+SemiBin
+#if $mode.select == 'single' or $mode.select == 'co'
+    single_easy_bin
+    #if $mode.select == 'single' and str($mode.environment) != ''
+    --environment '$mode.environment'
+    #end if
+    #if $mode.ref.select == "cached"
+    --reference-db-data-dir '$mode.ref.cached_db.fields.path'
+    #else
+    --taxonomy-annotation-table '$mode.ref.taxonomy_annotation_table'
+    #end if
+#else
+    multi_easy_bin
+    --separator '$separator'
+    #if $mode.ref.select == "cached"
+    --reference-db-data-dir '$mode.ref.cached_db.fields.path'
+    #else
+    --taxonomy-annotation-table 
+        #for $e in $mode.ref.taxonomy_annotation_table
+            '$e' 
+        #end for
+    #end if
+#end if
+    --input-fasta 'contigs.fasta'
+    --input-bam *.bam
+    --output 'output'
+    --cannot-name 'cannot'
+    @MIN_LEN@
+    --orf-finder '$orf_finder'
+    --random-seed $random_seed
+
+#if str($annot.ml_threshold) != ''
+    --ml-threshold $annot.ml_threshold
+#end if
+    --epoches $training.epoches
+    --batch-size $training.batch_size
+    --max-node $bin.max_node
+    --max-edges $bin.max_edges
+    --minfasta-kbs $bin.minfasta_kbs
+    $bin.no_recluster
+    --threads \${GALAXY_SLOTS:-1}
+    --processes \${GALAXY_SLOTS:-1}
+&& 
+echo "output" &&
+ls output
+    ]]></command>
+    <inputs>
+        <conditional name="mode">
+            <expand macro="mode_select"/>
+            <when value="single">
+                <expand macro="input-fasta-single"/>
+                <expand macro="input-bam-single"/>
+                <expand macro="ref-single"/>
+                <expand macro="environment"/>
+            </when>
+            <when value="co">
+                <expand macro="input-fasta-single"/>
+                <expand macro="input-bam-multi"/>
+                <expand macro="ref-single"/>
+            </when>
+            <when value="multi">
+                <expand macro="input-fasta-multi"/>
+                <expand macro="input-bam-multi"/>
+                <expand macro="ref-multi"/>
+            </when>
+        </conditional>
+        <expand macro="min_len"/>
+        <expand macro="orf-finder"/>
+        <expand macro="random-seed"/>
+        <section name="annot" title="Contig annotations" expanded="true">
+            <expand macro="ml-threshold"/>
+        </section>
+        <section name="training" title="Training">
+            <expand macro="epoches"/>
+            <expand macro="batch-size"/>
+        </section>
+        <section name="bin" title="Binning">
+            <expand macro="max-node"/>
+            <expand macro="max-edges"/>
+            <expand macro="minfasta-kbs"/>
+            <expand macro="no-recluster"/>
+        </section>
+        <param name="extra_output" type="select" multiple="true" optional="true" label="Extra outputs" help="In addition to the training data">
+            <option value="data">Training data</option>
+            <option value="coverage">Coverage files</option>
+            <option value="contigs">Contigs (if multiple sample)</option>
+        </param>
+    </inputs>
+    <outputs>
+        <collection name="output_recluster_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins after reclustering">
+            <filter>not bin["no_recluster"]</filter>
+            <discover_datasets pattern=".*?\.(?P&lt;designation&gt;.*).fa" format="fasta" directory="output/output_recluster_bins" />
+        </collection>
+        <collection name="output_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins before reclustering">
+            <filter>mode["select"]!="multi"</filter>
+            <discover_datasets pattern=".*?\.(?P&lt;designation&gt;.*).fa" format="fasta" directory="output/output_bins" />
+        </collection>
+        <collection name="multi_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins before reclustering">
+            <filter>mode["select"]=="multi"</filter>
+            <discover_datasets pattern=".*?\.(?P&lt;designation&gt;.*).fa" format="fasta" directory="output/bins" />
+        </collection>
+        <data name="single_data" format="csv" from_work_dir="output/data.csv" label="${tool.name} on ${on_string}: Training data">
+            <filter>(mode["select"]=="single" or mode["select"]=="co") and extra_output and "data" in extra_output</filter>
+        </data>
+        <data name="single_data_split" format="csv" from_work_dir="output/data_split.csv" label="${tool.name} on ${on_string}: Split training data">
+            <filter>(mode["select"]=="single" or mode["select"]=="co") and extra_output and "data" in extra_output</filter>
+        </data>
+        <collection name="multi_data" type="list" label="${tool.name} on ${on_string}: Training data per sample">
+            <filter>mode["select"]=="multi" and extra_output and "data" in extra_output</filter>
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)\/data.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/>
+        </collection>
+        <collection name="multi_data_split" type="list" label="${tool.name} on ${on_string}: Split training data per sample">
+            <filter>mode["select"]=="multi" and extra_output and "data" in extra_output</filter>
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)\/data_split.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/>
+        </collection>
+        <expand macro="generate_sequence_features_extra_outputs"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="6">
+            <conditional name="mode">
+                <param name="select" value="single"/>
+                <param name="input_fasta" ftype="fasta" value="input_single.fasta"/>
+                <param name="input_bam" ftype="bam" value="input_single.bam"/>
+                <conditional name="ref">
+                    <param name="select" value="taxonomy"/>
+                    <param name="taxonomy_annotation_table" value="taxonomy.tsv"/>
+                </conditional>
+                <param name="environment" value="human_gut"/>
+            </conditional>
+            <conditional name="min_len">
+                <param name="method" value="min-len"/>
+                <param name="min_len" value="0" />
+            </conditional>
+            <param name="orf_finder" value="prodigal"/>
+            <param name="random-seed" value="0"/>
+            <section name="annot">
+                <param name="ml_threshold" value=""/>
+            </section>
+            <section name="training">
+                <param name="epoches" value="20"/>
+                <param name="batch_size" value="2048"/>
+            </section>
+            <section name="bin">
+                <param name="max_node" value="1"/>
+                <param name="max_edges" value="200"/>
+                <param name="minfasta_kbs" value="200"/>
+                <param name="no_recluster" value="false"/>
+            </section>
+            <param name="extra_output" value="data,coverage,contigs"/>
+            <output_collection name="output_recluster_bins" count="0"/>
+            <output_collection name="output_bins" count="3">
+                <element name="0" ftype="fasta">
+                    <assert_contents>
+                        <has_text text=">g1k_0"/>
+                    </assert_contents>
+                </element>
+                <element name="1" ftype="fasta">
+                    <assert_contents>
+                        <has_text text=">g2k_0"/>
+                    </assert_contents>
+                </element>
+                <element name="2" ftype="fasta">
+                    <assert_contents>
+                        <has_text text=">g3k_0"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output name="single_data" ftype="csv">
+                <assert_contents>
+                    <has_text text="g1k_0"/>
+                    <has_text text="g4k_7"/>
+                </assert_contents>
+            </output>
+            <output name="single_data_split" ftype="csv">
+                <assert_contents>
+                    <has_text text="g1k_0_1"/>
+                    <has_text text="g1k_6_2"/>
+                </assert_contents>
+            </output>
+            <output name="single_cov" ftype="csv">
+                <assert_contents>
+                    <has_text text="g1k_0"/>
+                    <has_text text="0.027"/>
+                </assert_contents>
+            </output>
+            <output name="single_split_cov" ftype="csv">
+                <assert_contents>
+                    <has_size value="1" delta="1"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="3">
+            <conditional name="mode">
+                <param name="select" value="co"/>
+                <param name="input_fasta" ftype="fasta" value="input_single.fasta"/>
+                <param name="input_bam" ftype="bam" value="input_coassembly_sorted1.bam,input_coassembly_sorted2.bam,input_coassembly_sorted3.bam,input_coassembly_sorted4.bam,input_coassembly_sorted5.bam"/>
+                <conditional name="ref">
+                    <param name="select" value="taxonomy"/>
+                    <param name="taxonomy_annotation_table" value="taxonomy.tsv"/>
+                </conditional>
+            </conditional>
+            <conditional name="min_len">
+                <param name="method" value="ratio"/>
+                <param name="ratio" value="0.05"/>
+            </conditional>
+            <param name="orf_finder" value="fraggenescan"/>
+            <param name="random-seed" value="0"/>
+            <section name="annot">
+                <param name="ml_threshold" value=""/>
+            </section>
+            <section name="training">
+                <param name="epoches" value="20"/>
+                <param name="batch_size" value="2048"/>
+            </section>
+            <section name="bin">
+                <param name="max_node" value="1"/>
+                <param name="max_edges" value="200"/>
+                <param name="minfasta_kbs" value="200"/>
+                <param name="no_recluster" value="true"/>
+            </section>
+            <param name="extra_output" value="coverage"/>
+            <output_collection name="output_bins" count="3">
+                <element name="0" ftype="fasta">
+                    <assert_contents>
+                        <has_text text=">g1k_0"/>
+                    </assert_contents>
+                </element>
+                <element name="1" ftype="fasta">
+                    <assert_contents>
+                        <has_text text=">g2k_0"/>
+                    </assert_contents>
+                </element>
+                <element name="2" ftype="fasta">
+                    <assert_contents>
+                        <has_text text=">g3k_0"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="co_cov" count="5">
+                <element name="0" ftype="csv">
+                    <assert_contents>
+                        <has_text text="g1k_0"/>
+                        <has_text text="g2k_7"/>
+                    </assert_contents>
+                </element>
+                <element name="1" ftype="csv">
+                    <assert_contents>
+                        <has_text text="g1k_0"/>
+                        <has_text text="g2k_7"/>
+                    </assert_contents>
+                </element>
+                <element name="4" ftype="csv">
+                    <assert_contents>
+                        <has_text text="g1k_0"/>
+                        <has_text text="g2k_7"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="co_split_cov" count="5">
+                <element name="0" ftype="csv">
+                    <assert_contents>
+                        <has_text text="g1k_0_1"/>
+                        <has_text text="g2k_7_2"/>
+                    </assert_contents>
+                </element>
+                <element name="1" ftype="csv">
+                    <assert_contents>
+                        <has_text text="g1k_0_1"/>
+                        <has_text text="g2k_7_2"/>
+                    </assert_contents>
+                </element>
+                <element name="2" ftype="csv">
+                    <assert_contents>
+                        <has_text text="g1k_0_1"/>
+                        <has_text text="g2k_7_2"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="mode">
+                <param name="select" value="single"/>
+                <param name="input_fasta" ftype="fasta" value="input_single.fasta"/>
+                <param name="input_bam" ftype="bam" value="input_single.bam"/>
+                <conditional name="ref">
+                    <param name="db_selector" value="cached"/>
+                    <param name="cached_db" value="test-db"/>
+                </conditional>
+            </conditional>
+            <conditional name="min_len">
+                <param name="method" value="ratio"/>
+                <param name="ratio" value="0.05"/>
+            </conditional>
+            <param name="orf_finder" value="fraggenescan"/>
+            <param name="random-seed" value="0"/>
+            <section name="annot">
+                <param name="ml_threshold" value=""/>
+            </section>
+            <section name="training">
+                <param name="epoches" value="20"/>
+                <param name="batch_size" value="2048"/>
+            </section>
+            <section name="bin">
+                <param name="max_node" value="1"/>
+                <param name="max_edges" value="200"/>
+                <param name="minfasta_kbs" value="200"/>
+                <param name="no_recluster" value="true"/>
+            </section>
+            <param name="extra_output" value=""/>
+            <output_collection name="output_bins" count="3">
+                <element name="0" ftype="fasta">
+                    <assert_contents>
+                        <has_text text=">g1k_0"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="8">
+            <conditional name="mode">
+                <param name="select" value="multi"/>
+                <conditional name="multi_fasta">
+                    <param name="select" value="concatenated"/>
+                    <param name="input_fasta" ftype="fasta" value="input_multi.fasta.gz"/>
+                </conditional>
+                <param name="input_bam" ftype="bam" value="input_multi_sorted1.bam,input_multi_sorted2.bam,input_multi_sorted3.bam,input_multi_sorted4.bam,input_multi_sorted5.bam,input_multi_sorted6.bam,input_multi_sorted7.bam,input_multi_sorted8.bam,input_multi_sorted9.bam,input_multi_sorted10.bam"/>
+                <conditional name="ref">
+                    <param name="select" value="taxonomy"/>
+                    <param name="taxonomy_annotation_table" value="taxonomy.tsv,taxonomy_2.tsv,taxonomy_3.tsv,taxonomy_4.tsv,taxonomy_5.tsv,taxonomy_6.tsv,taxonomy_7.tsv,taxonomy_8.tsv,taxonomy_9.tsv,taxonomy_10.tsv"/>
+                </conditional>
+            </conditional>
+            <conditional name="min_len">
+                <param name="method" value="ratio"/>
+                <param name="ratio" value="0.05"/>
+            </conditional>
+            <param name="orf_finder" value="fraggenescan"/>
+            <param name="random_seed" value="0"/>
+            <section name="annot">
+                <param name="ml_threshold" value=""/>
+            </section>
+            <section name="training">
+                <param name="epoches" value="20"/>
+                <param name="batch_size" value="2048"/>
+            </section>
+            <section name="bin">
+                <param name="max_node" value="1"/>
+                <param name="max_edges" value="200"/>
+                <param name="minfasta_kbs" value="200"/>
+                <param name="no_recluster" value="true"/>
+            </section>
+            <param name="extra_output" value="data,coverage,contigs"/>
+            <output_collection name="multi_bins" count="2">
+                <element name="0" ftype="fasta">
+                    <assert_contents>
+                        <has_text text=">g1k_0"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="multi_contigs" count="10">
+                <element name="S8" ftype="fasta">
+                    <assert_contents>
+                        <has_text text=">g1k_0"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="multi_data" count="10">
+                <element name="S8" ftype="csv">
+                    <assert_contents>
+                        <has_text text="g1k_0,"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="multi_cov" count="10">
+                <element name="8" ftype="csv">
+                    <assert_contents>
+                        <has_text text="S1:g1k_5,"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="multi_cov_sample" count="10">
+                <element name="S8" ftype="csv">
+                    <assert_contents>
+                        <has_text text="g1k_3"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="multi_split_cov" count="10">
+                <element name="8" ftype="csv">
+                    <assert_contents>
+                        <has_text text="S1:g1k_5_1,0."/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="multi_split_cov_sample" count="10">
+                <element name="S8" ftype="csv">
+                    <assert_contents>
+                        <has_text text="g1k_3_1"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="multi_contigs" count="10">
+                <element name="S8" ftype="fasta">
+                    <assert_contents>
+                        <has_text text=">g1k_0"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+    </tests>
+    <help><![CDATA[
+@HELP_HEADER@
+
+Inputs
+======
+
+@HELP_INPUT_FASTA@
+@HELP_INPUT_BAM@
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>