Mercurial > repos > iuc > binette
changeset 0:70b492856e17 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/binette/ commit 59b031eff1c156122720281e42b0eaa8d3724c57
| author | iuc |
|---|---|
| date | Mon, 20 Jan 2025 16:19:35 +0000 |
| parents | |
| children | 6641160f2053 |
| files | binette.xml test-data/A.binning test-data/B.binning test-data/C.binning test-data/all_contig.fasta.gz test-data/checkm2.loc test-data/checkm2_tiny_db.dmnd test-data/proteins.fasta.gz tool-data/checkm2.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
| diffstat | 11 files changed, 292 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/binette.xml Mon Jan 20 16:19:35 2025 +0000 @@ -0,0 +1,188 @@ +<tool id="binette" name="Binette" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>Binning refinement tool</description> + <macros> + <token name="@TOOL_VERSION@">1.0.5</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">24.1</token> + </macros> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">binette</requirement> + </requirements> + <command detect_errors="exit_code"> + <![CDATA[ + mkdir -p 'input' 'output' && + + #for $i, $file in enumerate($contig2bin_tables): + ln -s '$file' 'input/bin_table_${i}.tsv' && + #end for + + ln -s '$contigs' 'input_contigs.fasta' && + #if $database_type.is_select == 'his': + ln -s '$checkm2_db' 'input_database.dmnd' && + #end if + + #if $proteins: + ln -s '$proteins' 'input_proteins.fasta' && + #end if + + binette + -b input/*.tsv + -c 'input_contigs.fasta' + #if $proteins: + -p 'input_proteins.fasta' + #end if + -m ${min_completeness} + -t "\${GALAXY_SLOTS:-1}" + -o 'output/' + -w ${contamination_weight} + #if $database_type.is_select == 'his': + --checkm2_db 'input_database.dmnd' + #else + --checkm2_db '$datamanager.fields.path' + #end if + + ]]> + </command> + <inputs> + <param argument="--contig2bin_tables" type="data" multiple="true" min="2" format="tabular" label="Input contig table" + help="Input at least 2 different contig tables. Look into the help section at the bottom for more information!"/> + <param argument="--contigs" type="data" format="fasta,fasta.gz" label="Input contig file"/> + <param argument="--proteins" type="data" format="fasta,fasta.gz" optional="true" label="Input FASTA file in Prodigal format (>contigID_geneID)" + help="If this file is provided all predicted genes contained in this file will be skipped. A example for this format is in the help section"/> + <param argument="--min_completeness" type="integer" min="0" max="100" value="40" label="Set minimus completeness" + help="Threshold for bins for the final bin selection"/> + <param argument="--contamination_weight" type="integer" value="2" label="Set contamination weight" + help="This weight is used for the scoring the bins. A low weight favor complete bins over low contaminated bins"/> + <conditional name="database_type"> + <param name="is_select" type="select" label="Select if database should be used either via file or cached database"> + <option value="cached">cached database</option> + <option value="his">History</option> + </param> + <when value="his"> + <param argument="--checkm2_db" type="data" format="dmnd" label="Input CheckM2 diamond database" + help="When a CheckM2 diamond database should be used download and input it here."/> + </when> + <when value="cached"> + <param name="datamanager" type="select" label="Select reference genome" help="Checkm2 Diamond database"> + <options from_data_table="checkm2"> + <filter type="sort_by" column="2"/> + </options> + <validator type="no_options" message="No databases are available for this version of Checkm2. Please contact the Galaxy administrators to request one be installed."/> + </param> + </when> + </conditional> + </inputs> + <outputs> + <collection name="bins" type="list" label="${tool.name} on ${on_string}: Bins"> + <discover_datasets pattern="((?P<designation>.*)\.fa)" format="fasta" directory="output/final_bins"/> + </collection> + <collection name="quality" type="list" label="${tool.name} on ${on_string}: Quality Report"> + <discover_datasets pattern="((?P<designation>.*)\.tsv)" format="tabular" directory="output/input_bins_quality_reports"/> + </collection> + <data name="final" format="tabular" from_work_dir="output/final_bins_quality_reports.tsv" label="${tool.name} on ${on_string}: Final Quality Report"/> + </outputs> + <tests> + <test expect_num_outputs="3"> + <param name="contig2bin_tables" ftype="tabular" value="A.binning,B.binning,C.binning"/> + <param name="contigs" value="all_contig.fasta.gz" ftype="fasta.gz"/> + <param name="min_completeness" value="5"/> + <param name="contamination_weight" value="0"/> + <conditional name="database_type"> + <param name="is_select" value="his"/> + <param name="checkm2_db" value="checkm2_tiny_db.dmnd"/> + </conditional> + <output name="final" ftype="tabular"> + <assert_contents> + <has_text text="binC"/> + <has_text text="50"/> + <has_text text="9"/> + </assert_contents> + </output> + <output_collection name="bins" count="4"/> + </test> + <test expect_num_outputs="3"> + <param name="contig2bin_tables" ftype="tabular" value="A.binning,B.binning,C.binning"/> + <param name="contigs" value="all_contig.fasta.gz" ftype="fasta.gz"/> + <param name="min_completeness" value="40"/> + <param name="contamination_weight" value="2"/> + <conditional name="database_type"> + <param name="is_select" value="cached"/> + <param name="datamanager" value="test_db"/> + </conditional> + <param name="proteins" ftype="fasta.gz" value="proteins.fasta.gz"/> + <output name="final" ftype="tabular"> + <assert_contents> + <has_text text="binC"/> + <has_text text="50"/> + <has_text text="40"/> + </assert_contents> + </output> + <output_collection name="bins" count="4"/> + </test> + </tests> + <help> + <![CDATA[ + + .. class:: infomark + + **What does Binette** + + Binette is a fast and accurate binning refinement tool to constructs high quality MAGs from the output of multiple binning tools. + + **Inputs** + + - At least 2 different contig tables. + + .. class:: infomark + + The contig tables can be generate by the tool *Converts genome bins in fasta format*. This tool only need the bins which where created by any binner as input. + + - The contig file + + .. class:: infomark + + This file should contain all reads used to create the bins. The format of this file should be either fasta or fasta.gz. + + - A CheckM2 diamond database + + .. class::infomark + + This database can be download with using the CheckM2 package and the followed command: *checkm2 database --download --path <checkm2/database/>* or it is possible to use a database cached on Galaxy. + + + - An optional (fasta/fasta.gz) file with predicted genes + + .. class:: infomark + + This file, in a fasta format, is generate with the tool *Prodigal* + + Example: + + :: + + >Chlamydia_trachomatis_part1_1 # 1 # 1776 # 1 # ID=1_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.466 + MSIRGVGGNGNSRIPSHNGDGSNRRSQNTKGNNKVEDRVCSLYSSRSNENRESPYAVVDV + SSMIESTPTSGETTRASRGVFSRFQRGLVRIADKVRRAVQCAWSSVSTSRSSATRAAESG + SSSRTARGASSGYREYSPSAARGLRLMFTDFWRTRVLRQTSPMAGVFGNLDVNEARLMAA + YTSECADHLEAKELAGPDGVAAAREIAKRWEKRVRDLQDKGAARKLLNDPLGRRTPNYQS + KNPGEYTVGNSMFYDGPQVANLQNVDTGFWLDMSNLSDVVLSREIQTGLRARATLEESMP + MLENLEERFRRLQETCDAARTEIEESGWTRESASRMEGDEAQGPSRAQQAFQSFVNECNS + IEFSFGSFGEHVRVLCARVSRGLAAAGEAIRRCFSCCKGSTHRYAPRDDLSPEGASLAET + LARFADDMGIERGADGTYDIPLVDDWRRGVPSIEGEGSDSIYEIMMPIYEVMNMDLETRR + SFAVQQGHYQDPRASDYDLPRASDYDLPRSPYPTPPLPPRYQLQNMDVEAGFREAVYASF + VAGMYNYVVTQPQERIPNSQQVEGILRDMLTNGSQTFRDLMKRWNREVDRE* + + **Outputs** + + - A collection (list) with all the selected bins in fasta format. + + - A final quality report file containing quality information about the final selected bins. + + - A collection (list) storing quality reports for the input bin sets, with files following the same structure as the final quality report file. + + ]]> + </help> + <citations> + <citation type="doi">10.21105/joss.06782</citation> + </citations> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/A.binning Mon Jan 20 16:19:35 2025 +0000 @@ -0,0 +1,34 @@ +Chlamydia_trachomatis_part1 bin1 +Chlamydia_trachomatis_part2 bin2 +Chlamydia_trachomatis_part3 bin3 +Mycoplasmoides_genitalium_part25 bin4 +Mycoplasmoides_genitalium_part24 bin4 +Mycoplasmoides_genitalium_part23 bin4 +Mycoplasmoides_genitalium_part22 bin4 +Mycoplasmoides_genitalium_part21 bin4 +Mycoplasmoides_genitalium_part20 bin4 +Mycoplasmoides_genitalium_part19 bin4 +Mycoplasmoides_genitalium_part18 bin4 +Mycoplasmoides_genitalium_part17 bin4 +Mycoplasmoides_genitalium_part16 bin4 +Mycoplasmoides_genitalium_part15 bin4 +Mycoplasmoides_genitalium_part14 bin4 +Mycoplasmoides_genitalium_part13 bin4 +Mycoplasmoides_genitalium_part12 bin4 +Mycoplasmoides_genitalium_part11 bin4 +Mycoplasmoides_genitalium_part10 bin5 +Mycoplasmoides_genitalium_part09 bin5 +Mycoplasmoides_genitalium_part08 bin5 +Mycoplasmoides_genitalium_part07 bin5 +Mycoplasmoides_genitalium_part06 bin5 +Mycoplasmoides_genitalium_part05 bin5 +Mycoplasmoides_genitalium_part04 bin5 +Mycoplasmoides_genitalium_part03 bin5 +Mycoplasmoides_genitalium_part02 bin5 +Mycoplasmoides_genitalium_part01 bin5 +Nanoarchaeum_equitans_part1 bin6 +Nanoarchaeum_equitans_part2 bin6 +Nanoarchaeum_equitans_part3 bin6 +Nanoarchaeum_equitans_part4 bin6 +Wigglesworthia_glossinidia_part1 bin6 +Wigglesworthia_glossinidia_part2 bin6
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/B.binning Mon Jan 20 16:19:35 2025 +0000 @@ -0,0 +1,20 @@ +Chlamydia_trachomatis_part2 binA +Chlamydia_trachomatis_part3 binA +Mycoplasmoides_genitalium_part25 binB +Mycoplasmoides_genitalium_part24 binB +Mycoplasmoides_genitalium_part23 binB +Mycoplasmoides_genitalium_part22 binB +Mycoplasmoides_genitalium_part21 binB +Mycoplasmoides_genitalium_part20 binB +Mycoplasmoides_genitalium_part19 binB +Mycoplasmoides_genitalium_part18 binB +Mycoplasmoides_genitalium_part17 binB +Mycoplasmoides_genitalium_part16 binB +Mycoplasmoides_genitalium_part15 binB +Mycoplasmoides_genitalium_part14 binB +Mycoplasmoides_genitalium_part13 binB +Mycoplasmoides_genitalium_part12 binB +Mycoplasmoides_genitalium_part11 binB +Mycoplasmoides_genitalium_part10 binB +Wigglesworthia_glossinidia_part1 binC +Wigglesworthia_glossinidia_part2 binC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/C.binning Mon Jan 20 16:19:35 2025 +0000 @@ -0,0 +1,17 @@ +Chlamydia_trachomatis_part1 binT +Chlamydia_trachomatis_part3 binT +Mycoplasmoides_genitalium_part25 binF +Mycoplasmoides_genitalium_part24 binF +Mycoplasmoides_genitalium_part23 binF +Mycoplasmoides_genitalium_part22 binF +Mycoplasmoides_genitalium_part21 binF +Mycoplasmoides_genitalium_part13 binD +Mycoplasmoides_genitalium_part12 binD +Mycoplasmoides_genitalium_part11 binD +Mycoplasmoides_genitalium_part10 binD +Mycoplasmoides_genitalium_part09 binD +Mycoplasmoides_genitalium_part08 binD +Mycoplasmoides_genitalium_part07 binD +Mycoplasmoides_genitalium_part06 binD +Mycoplasmoides_genitalium_part05 binD +Wigglesworthia_glossinidia_part2 binE
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/checkm2.loc Mon Jan 20 16:19:35 2025 +0000 @@ -0,0 +1,3 @@ +##Checkm2 versioned indexes +#build_id dbkey display_name path version +001 1.0.0 test_db 0.0.0 ${__HERE__}/checkm2_tiny_db.dmnd \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/checkm2.loc.sample Mon Jan 20 16:19:35 2025 +0000 @@ -0,0 +1,18 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a the checkm2 dabase. +#You will need to create these data files using the following command + +#checkm2 database --download --path /custom/path/ + +#and then create a checkm2_db_versioned.loc file similar to this one (store it in this +#directory) that points to the directories in which those files are stored. + +#The checkm2_db_versioned.loc file has this format (longer white space +#characters are TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <version> <file_base_path> + +#The <version> column indicates the checkm2 version that generated the database + +# +#diamond_db_1.0.2 1.0.2 Diamond database 1.0.2 /mnt/galaxyIndices/Checkm2_database/uniref100.KO.1.dmnd \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Mon Jan 20 16:19:35 2025 +0000 @@ -0,0 +1,6 @@ +<tables> + <table name="checkm2" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, version, path</columns> + <file path="tool-data/checkm2.loc" /> + </table> +</tables> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Mon Jan 20 16:19:35 2025 +0000 @@ -0,0 +1,6 @@ +<tables> + <table name="checkm2" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, version, path</columns> + <file path="${__HERE__}/test-data/checkm2.loc" /> + </table> +</tables> \ No newline at end of file
