changeset 0:70b492856e17 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/binette/ commit 59b031eff1c156122720281e42b0eaa8d3724c57
author iuc
date Mon, 20 Jan 2025 16:19:35 +0000
parents
children 6641160f2053
files binette.xml test-data/A.binning test-data/B.binning test-data/C.binning test-data/all_contig.fasta.gz test-data/checkm2.loc test-data/checkm2_tiny_db.dmnd test-data/proteins.fasta.gz tool-data/checkm2.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 11 files changed, 292 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/binette.xml	Mon Jan 20 16:19:35 2025 +0000
@@ -0,0 +1,188 @@
+<tool id="binette" name="Binette" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>Binning refinement tool</description>
+    <macros>
+        <token name="@TOOL_VERSION@">1.0.5</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+        <token name="@PROFILE@">24.1</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">binette</requirement>
+    </requirements>
+    <command detect_errors="exit_code">
+        <![CDATA[
+            mkdir -p 'input' 'output' &&
+
+            #for $i, $file in enumerate($contig2bin_tables):
+                ln -s '$file' 'input/bin_table_${i}.tsv' &&
+            #end for
+
+            ln -s '$contigs' 'input_contigs.fasta' &&
+            #if $database_type.is_select == 'his':
+                ln -s '$checkm2_db' 'input_database.dmnd' &&
+            #end if
+
+            #if $proteins:
+                ln -s '$proteins' 'input_proteins.fasta' &&
+            #end if
+
+            binette
+            -b input/*.tsv
+            -c 'input_contigs.fasta'
+            #if $proteins:
+                -p 'input_proteins.fasta'
+            #end if
+            -m ${min_completeness}
+            -t "\${GALAXY_SLOTS:-1}"
+            -o 'output/'
+            -w ${contamination_weight}
+            #if $database_type.is_select == 'his':
+                --checkm2_db 'input_database.dmnd'
+            #else
+                --checkm2_db '$datamanager.fields.path'
+            #end if
+        
+        ]]>
+    </command>
+    <inputs>
+        <param argument="--contig2bin_tables" type="data" multiple="true" min="2" format="tabular" label="Input contig table"
+            help="Input at least 2 different contig tables. Look into the help section at the bottom for more information!"/>
+        <param argument="--contigs" type="data" format="fasta,fasta.gz" label="Input contig file"/>
+        <param argument="--proteins" type="data" format="fasta,fasta.gz" optional="true" label="Input FASTA file in Prodigal format (>contigID_geneID)"
+            help="If this file is provided all predicted genes contained in this file will be skipped. A example for this format is in the help section"/>
+        <param argument="--min_completeness" type="integer" min="0" max="100" value="40" label="Set minimus completeness"
+            help="Threshold for bins for the final bin selection"/>
+        <param argument="--contamination_weight" type="integer" value="2" label="Set contamination weight"
+            help="This weight is used for the scoring the bins. A low weight favor complete bins over low contaminated bins"/>
+        <conditional name="database_type">
+            <param name="is_select" type="select" label="Select if database should be used either via file or cached database">
+                <option value="cached">cached database</option>
+                <option value="his">History</option>
+            </param>
+            <when value="his">
+                <param argument="--checkm2_db" type="data" format="dmnd" label="Input CheckM2 diamond database"
+                help="When a CheckM2 diamond database should be used download and input it here."/>
+            </when>
+            <when value="cached">
+                <param name="datamanager" type="select" label="Select reference genome" help="Checkm2 Diamond database">
+                    <options from_data_table="checkm2">
+                            <filter type="sort_by" column="2"/>
+                    </options>
+                    <validator type="no_options" message="No databases are available for this version of Checkm2. Please contact the Galaxy administrators to request one be installed."/>
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <collection name="bins" type="list" label="${tool.name} on ${on_string}: Bins">
+            <discover_datasets pattern="((?P&lt;designation&gt;.*)\.fa)" format="fasta" directory="output/final_bins"/>
+        </collection>
+        <collection name="quality" type="list" label="${tool.name} on ${on_string}: Quality Report">
+            <discover_datasets pattern="((?P&lt;designation&gt;.*)\.tsv)" format="tabular" directory="output/input_bins_quality_reports"/>
+        </collection>
+        <data name="final" format="tabular" from_work_dir="output/final_bins_quality_reports.tsv" label="${tool.name} on ${on_string}: Final Quality Report"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="3">
+            <param name="contig2bin_tables" ftype="tabular" value="A.binning,B.binning,C.binning"/>
+            <param name="contigs" value="all_contig.fasta.gz" ftype="fasta.gz"/>
+            <param name="min_completeness" value="5"/>
+            <param name="contamination_weight" value="0"/>
+            <conditional name="database_type">
+                <param name="is_select" value="his"/> 
+                <param name="checkm2_db" value="checkm2_tiny_db.dmnd"/>
+            </conditional>
+            <output name="final" ftype="tabular">
+                <assert_contents>
+                    <has_text text="binC"/>
+                    <has_text text="50"/>
+                    <has_text text="9"/>
+                </assert_contents>
+            </output>
+            <output_collection name="bins" count="4"/>
+        </test>
+        <test expect_num_outputs="3">
+            <param name="contig2bin_tables" ftype="tabular" value="A.binning,B.binning,C.binning"/> 
+            <param name="contigs" value="all_contig.fasta.gz" ftype="fasta.gz"/>
+            <param name="min_completeness" value="40"/>
+            <param name="contamination_weight" value="2"/>
+            <conditional name="database_type">
+                <param name="is_select" value="cached"/> 
+                <param name="datamanager" value="test_db"/>
+            </conditional> 
+            <param name="proteins" ftype="fasta.gz" value="proteins.fasta.gz"/>
+            <output name="final" ftype="tabular">
+                <assert_contents>
+                    <has_text text="binC"/>
+                    <has_text text="50"/>
+                    <has_text text="40"/>
+                </assert_contents>
+            </output>
+            <output_collection name="bins" count="4"/>
+        </test>
+    </tests>
+    <help>
+        <![CDATA[
+
+        .. class:: infomark
+
+        **What does Binette**
+
+        Binette is a fast and accurate binning refinement tool to constructs high quality MAGs from the output of multiple binning tools.
+
+        **Inputs**
+
+        - At least 2 different contig tables.
+
+        .. class:: infomark
+
+        The contig tables can be generate by the tool *Converts genome bins in fasta format*. This tool only need the bins which where created by any binner as input.
+
+        - The contig file
+
+        .. class:: infomark
+        
+        This file should contain all reads used to create the bins. The format of this file should be either fasta or fasta.gz.
+
+        - A CheckM2 diamond database
+
+        .. class::infomark
+
+        This database can be download with using the CheckM2 package and the followed command: *checkm2 database --download --path <checkm2/database/>* or it is possible to use a database cached on Galaxy.
+        
+
+        - An optional (fasta/fasta.gz) file with predicted genes 
+
+        .. class:: infomark
+
+        This file, in a fasta format, is generate with the tool *Prodigal*
+
+        Example:
+        
+        ::
+
+         >Chlamydia_trachomatis_part1_1 # 1 # 1776 # 1 # ID=1_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.466
+         MSIRGVGGNGNSRIPSHNGDGSNRRSQNTKGNNKVEDRVCSLYSSRSNENRESPYAVVDV
+         SSMIESTPTSGETTRASRGVFSRFQRGLVRIADKVRRAVQCAWSSVSTSRSSATRAAESG
+         SSSRTARGASSGYREYSPSAARGLRLMFTDFWRTRVLRQTSPMAGVFGNLDVNEARLMAA
+         YTSECADHLEAKELAGPDGVAAAREIAKRWEKRVRDLQDKGAARKLLNDPLGRRTPNYQS
+         KNPGEYTVGNSMFYDGPQVANLQNVDTGFWLDMSNLSDVVLSREIQTGLRARATLEESMP
+         MLENLEERFRRLQETCDAARTEIEESGWTRESASRMEGDEAQGPSRAQQAFQSFVNECNS
+         IEFSFGSFGEHVRVLCARVSRGLAAAGEAIRRCFSCCKGSTHRYAPRDDLSPEGASLAET
+         LARFADDMGIERGADGTYDIPLVDDWRRGVPSIEGEGSDSIYEIMMPIYEVMNMDLETRR
+         SFAVQQGHYQDPRASDYDLPRASDYDLPRSPYPTPPLPPRYQLQNMDVEAGFREAVYASF
+         VAGMYNYVVTQPQERIPNSQQVEGILRDMLTNGSQTFRDLMKRWNREVDRE* 
+        
+        **Outputs**
+
+        - A collection (list) with all the selected bins in fasta format.
+
+        - A final quality report file containing quality information about the final selected bins.
+
+        - A collection (list) storing quality reports for the input bin sets, with files following the same structure as the final quality report file.
+        
+        ]]>
+    </help>
+    <citations>
+        <citation type="doi">10.21105/joss.06782</citation>
+    </citations> 
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/A.binning	Mon Jan 20 16:19:35 2025 +0000
@@ -0,0 +1,34 @@
+Chlamydia_trachomatis_part1	bin1
+Chlamydia_trachomatis_part2	bin2
+Chlamydia_trachomatis_part3	bin3
+Mycoplasmoides_genitalium_part25	bin4
+Mycoplasmoides_genitalium_part24	bin4
+Mycoplasmoides_genitalium_part23	bin4
+Mycoplasmoides_genitalium_part22	bin4
+Mycoplasmoides_genitalium_part21	bin4
+Mycoplasmoides_genitalium_part20	bin4
+Mycoplasmoides_genitalium_part19	bin4
+Mycoplasmoides_genitalium_part18	bin4
+Mycoplasmoides_genitalium_part17	bin4
+Mycoplasmoides_genitalium_part16	bin4
+Mycoplasmoides_genitalium_part15	bin4
+Mycoplasmoides_genitalium_part14	bin4
+Mycoplasmoides_genitalium_part13	bin4
+Mycoplasmoides_genitalium_part12	bin4
+Mycoplasmoides_genitalium_part11	bin4
+Mycoplasmoides_genitalium_part10	bin5
+Mycoplasmoides_genitalium_part09	bin5
+Mycoplasmoides_genitalium_part08	bin5
+Mycoplasmoides_genitalium_part07	bin5
+Mycoplasmoides_genitalium_part06	bin5
+Mycoplasmoides_genitalium_part05	bin5
+Mycoplasmoides_genitalium_part04	bin5
+Mycoplasmoides_genitalium_part03	bin5
+Mycoplasmoides_genitalium_part02	bin5
+Mycoplasmoides_genitalium_part01	bin5
+Nanoarchaeum_equitans_part1	bin6
+Nanoarchaeum_equitans_part2	bin6
+Nanoarchaeum_equitans_part3	bin6
+Nanoarchaeum_equitans_part4	bin6
+Wigglesworthia_glossinidia_part1	bin6
+Wigglesworthia_glossinidia_part2	bin6
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/B.binning	Mon Jan 20 16:19:35 2025 +0000
@@ -0,0 +1,20 @@
+Chlamydia_trachomatis_part2	binA
+Chlamydia_trachomatis_part3	binA
+Mycoplasmoides_genitalium_part25	binB
+Mycoplasmoides_genitalium_part24	binB
+Mycoplasmoides_genitalium_part23	binB
+Mycoplasmoides_genitalium_part22	binB
+Mycoplasmoides_genitalium_part21	binB
+Mycoplasmoides_genitalium_part20	binB
+Mycoplasmoides_genitalium_part19	binB
+Mycoplasmoides_genitalium_part18	binB
+Mycoplasmoides_genitalium_part17	binB
+Mycoplasmoides_genitalium_part16	binB
+Mycoplasmoides_genitalium_part15	binB
+Mycoplasmoides_genitalium_part14	binB
+Mycoplasmoides_genitalium_part13	binB
+Mycoplasmoides_genitalium_part12	binB
+Mycoplasmoides_genitalium_part11	binB
+Mycoplasmoides_genitalium_part10	binB
+Wigglesworthia_glossinidia_part1	binC
+Wigglesworthia_glossinidia_part2	binC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/C.binning	Mon Jan 20 16:19:35 2025 +0000
@@ -0,0 +1,17 @@
+Chlamydia_trachomatis_part1	binT
+Chlamydia_trachomatis_part3	binT
+Mycoplasmoides_genitalium_part25	binF
+Mycoplasmoides_genitalium_part24	binF
+Mycoplasmoides_genitalium_part23	binF
+Mycoplasmoides_genitalium_part22	binF
+Mycoplasmoides_genitalium_part21	binF
+Mycoplasmoides_genitalium_part13	binD
+Mycoplasmoides_genitalium_part12	binD
+Mycoplasmoides_genitalium_part11	binD
+Mycoplasmoides_genitalium_part10	binD
+Mycoplasmoides_genitalium_part09	binD
+Mycoplasmoides_genitalium_part08	binD
+Mycoplasmoides_genitalium_part07	binD
+Mycoplasmoides_genitalium_part06	binD
+Mycoplasmoides_genitalium_part05	binD
+Wigglesworthia_glossinidia_part2	binE
Binary file test-data/all_contig.fasta.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/checkm2.loc	Mon Jan 20 16:19:35 2025 +0000
@@ -0,0 +1,3 @@
+##Checkm2 versioned indexes
+#build_id	dbkey	display_name	path	version
+001	1.0.0	test_db	0.0.0	${__HERE__}/checkm2_tiny_db.dmnd
\ No newline at end of file
Binary file test-data/checkm2_tiny_db.dmnd has changed
Binary file test-data/proteins.fasta.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/checkm2.loc.sample	Mon Jan 20 16:19:35 2025 +0000
@@ -0,0 +1,18 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a the checkm2 dabase.
+#You will need to create these data files using the following command
+
+#checkm2 database --download --path /custom/path/
+
+#and then create a checkm2_db_versioned.loc file similar to this one (store it in this
+#directory) that points to the directories in which those files are stored.
+
+#The checkm2_db_versioned.loc file has this format (longer white space
+#characters are TAB characters):
+#
+#<unique_build_id>	<dbkey>	<display_name>	<version>	<file_base_path>
+
+#The <version> column indicates the checkm2 version that generated the database
+
+#
+#diamond_db_1.0.2	1.0.2	Diamond database	1.0.2	/mnt/galaxyIndices/Checkm2_database/uniref100.KO.1.dmnd
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Mon Jan 20 16:19:35 2025 +0000
@@ -0,0 +1,6 @@
+<tables>
+    <table name="checkm2" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, version, path</columns>
+        <file path="tool-data/checkm2.loc" />
+    </table>
+</tables>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Mon Jan 20 16:19:35 2025 +0000
@@ -0,0 +1,6 @@
+<tables>
+    <table name="checkm2" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, version, path</columns>
+        <file path="${__HERE__}/test-data/checkm2.loc" />
+    </table>
+</tables>
\ No newline at end of file