Mercurial > repos > iuc > binning_refiner
changeset 0:f350d182f786 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/binning_refiner/ commit 591fef692e8efeb65c5214e6512aeaaf66201b26"
author | iuc |
---|---|
date | Fri, 18 Feb 2022 13:12:43 +0000 |
parents | |
children | 1c8d0916f97f |
files | binning_refiner.xml macros.xml test-data/Concoct_1.fa.gz test-data/Concoct_3.fa.gz test-data/Concoct_8.fa.gz test-data/MetaBAT_17.fa.gz test-data/MetaBAT_18.fa.gz test-data/MetaBAT_19.fa.gz test-data/MetaBAT_20.fa.gz test-data/MetaBAT_21.fa.gz test-data/MetaBAT_22.fa.gz test-data/MetaBAT_23.fa.gz test-data/output_refined_contigs.tabular test-data/output_sources_and_length.tabular |
diffstat | 14 files changed, 156 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/binning_refiner.xml Fri Feb 18 13:12:43 2022 +0000 @@ -0,0 +1,125 @@ +<tool id="bin_refiner" name="Binning refiner" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>refines metagenome bins</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ +#import re + +## Binning refiner prepends the -p param value +## (which we set to be the string 'refined' in +## the command line) to a hard-coded string (i.e., +## '_Binning_refiner_outputs') to set the base +## output directory. + +## Binning_refiner requires 2 levels of directories +## for the input bins. +mkdir -p input_bin_dir/bins && +#for $f in $input_bins: + #set identifier = re.sub('[^\s\w\-]', '_', str($f.element_identifier)) + #set file_name = $identifier + '.' + $f.ext + ## Binning_refiner doesn't handle gzipped files. + #if $f.ext.endswith(".gz") + gunzip -c '${f}' > 'input_bin_dir/bins/${file_name}' && + #else: + ln -s '${f}' 'input_bin_dir/bins/${file_name}' && + #end if +#end for + +Binning_refiner +-i input_bin_dir +-p 'refined' +&& mv 'refined_Binning_refiner_outputs/refined_contigs.txt' '$output_refined_contigs' +&& mv 'refined_Binning_refiner_outputs/refined_sources_and_length.txt' '$output_sources_and_length' + ]]></command> + <inputs> + <param name="input_bins" format="fasta,fasta.gz" type="data" multiple="true" label="Binned fasta files"/> + <param argument="-m" type="integer" value="512" label="Minimum size (Kbp) of refined bin" help="Bins smaller than this will be eliminated"/> + </inputs> + <outputs> + <collection name="output_refined_bins" type="list" label="${tool.name} on ${on_string}: (refined bins)"> + <discover_datasets pattern="(?P<designation>.*)\.fasta" format="fasta" directory="refined_Binning_refiner_outputs/refined_refined_bins"/> + </collection> + <data name="output_refined_contigs" format="tabular" label="${tool.name} on ${on_string} (refined contigs)"/> + <data name="output_sources_and_length" format="tabular" label="${tool.name} on ${on_string} (sources and length)"/> + </outputs> + <tests> + <test expect_num_outputs="3"> + <param name="input_bins" value="MetaBAT_17.fa.gz,MetaBAT_18.fa.gz,MetaBAT_19.fa.gz,MetaBAT_20.fa.gz,MetaBAT_21.fa.gz,MetaBAT_22.fa.gz,MetaBAT_23.fa.gz,Concoct_1.fa.gz,Concoct_3.fa.gz,Concoct_8.fa.gz" ftype="fasta.gz"/> + <output_collection name="output_refined_bins" type="list" count="7"> + <element name="refined_1" ftype="fasta"> + <assert_contents> + <has_size value="1320640"/> + <has_text text=">scaffold_2064"/> + <has_n_lines n="21765"/> + </assert_contents> + </element> + <element name="refined_2" ftype="fasta"> + <assert_contents> + <has_size value="941488"/> + <has_text text=">scaffold_1301"/> + <has_n_lines n="15461"/> + </assert_contents> + </element> + <element name="refined_3" ftype="fasta"> + <assert_contents> + <has_size value="883642"/> + <has_text text=">scaffold_262"/> + <has_n_lines n="14495"/> + </assert_contents> + </element> + <element name="refined_4" ftype="fasta"> + <assert_contents> + <has_size value="758509"/> + <has_text text=">scaffold_923"/> + <has_n_lines n="12484"/> + </assert_contents> + </element> + <element name="refined_5" ftype="fasta"> + <assert_contents> + <has_size value="722197"/> + <has_text text=">scaffold_232"/> + <has_n_lines n="11849"/> + </assert_contents> + </element> + <element name="refined_6" ftype="fasta"> + <assert_contents> + <has_size value="637342"/> + <has_text text=">scaffold_259"/> + <has_n_lines n="10460"/> + </assert_contents> + </element> + <element name="refined_7" ftype="fasta"> + <assert_contents> + <has_size value="560996"/> + <has_text text=">scaffold_1510"/> + <has_n_lines n="9219"/> + </assert_contents> + </element> + </output_collection> + <output name="output_refined_contigs" file="output_refined_contigs.tabular" ftype="tabular"/> + <output name="output_sources_and_length" file="output_sources_and_length.tabular" ftype="tabular"/> + </test> + </tests> + <help><![CDATA[ +**What it does** + +Reconciles the outputs of different binning programs with the aim to improve the quality of genome bins, +especially with respect to contamination levels. + +The tool accepts one or more fasta datasets (i.e., bins) that were produced by metagenome binning tools +(CONCOCT MaxBin2, MetaBAT2 and others). + +All refined bins larger than the specified "Minimum size (Kbp) of refined bin" will be output as a dataset +collection of fasta files. Additional outputs include a tabular dataset containing the id of the contigs +in each refined bin (refined contigs) and another tabular dataset containing the size of each refined bin +and the origin of its contigs (sources and length). + +**More information** + +https://github.com/songweizhi/Binning_refiner + + ]]></help> + <expand macro="citations"/> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Feb 18 13:12:43 2022 +0000 @@ -0,0 +1,15 @@ +<macros> + <token name="@TOOL_VERSION@">1.4.3</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">21.09</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">binning_refiner</requirement> + </requirements> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1093/bioinformatics/btx086</citation> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_refined_contigs.tabular Fri Feb 18 13:12:43 2022 +0000 @@ -0,0 +1,8 @@ +Refined_bin Contigs +refined_1 scaffold_1016,scaffold_1020,scaffold_1044,scaffold_1066,scaffold_1069,scaffold_1073,scaffold_1086,scaffold_1096,scaffold_1116,scaffold_1122,scaffold_1129,scaffold_1137,scaffold_1167,scaffold_1170,scaffold_1201,scaffold_1253,scaffold_1275,scaffold_1287,scaffold_1296,scaffold_1306,scaffold_1331,scaffold_1333,scaffold_1350,scaffold_1351,scaffold_1353,scaffold_1385,scaffold_1395,scaffold_1425,scaffold_1442,scaffold_1452,scaffold_1455,scaffold_1470,scaffold_1474,scaffold_1499,scaffold_1514,scaffold_1538,scaffold_1541,scaffold_1555,scaffold_1556,scaffold_1559,scaffold_1570,scaffold_1572,scaffold_1599,scaffold_1612,scaffold_1625,scaffold_1628,scaffold_1634,scaffold_1646,scaffold_1771,scaffold_1779,scaffold_1781,scaffold_1808,scaffold_1827,scaffold_1847,scaffold_1879,scaffold_1891,scaffold_1906,scaffold_1909,scaffold_1921,scaffold_1934,scaffold_1936,scaffold_1952,scaffold_1960,scaffold_1985,scaffold_1995,scaffold_2033,scaffold_2039,scaffold_2064,scaffold_2080,scaffold_2102,scaffold_2124,scaffold_2128,scaffold_2172,scaffold_2196,scaffold_563,scaffold_583,scaffold_602,scaffold_655,scaffold_734,scaffold_752,scaffold_838,scaffold_895,scaffold_928,scaffold_929,scaffold_933,scaffold_945,scaffold_965,scaffold_967,scaffold_973 +refined_2 scaffold_1064,scaffold_1162,scaffold_1200,scaffold_1236,scaffold_1248,scaffold_1301,scaffold_138,scaffold_150,scaffold_1825,scaffold_2013,scaffold_2040,scaffold_205,scaffold_2134,scaffold_214,scaffold_2164,scaffold_247,scaffold_377,scaffold_380,scaffold_419,scaffold_486,scaffold_488,scaffold_558 +refined_3 scaffold_15,scaffold_20,scaffold_222,scaffold_262,scaffold_660,scaffold_684,scaffold_77,scaffold_861 +refined_4 scaffold_1014,scaffold_1061,scaffold_1081,scaffold_1177,scaffold_1193,scaffold_1257,scaffold_1349,scaffold_1377,scaffold_1428,scaffold_1503,scaffold_1520,scaffold_1558,scaffold_1576,scaffold_1598,scaffold_1635,scaffold_1698,scaffold_1704,scaffold_1759,scaffold_1763,scaffold_1784,scaffold_1867,scaffold_1945,scaffold_2046,scaffold_2139,scaffold_2210,scaffold_437,scaffold_537,scaffold_568,scaffold_667,scaffold_680,scaffold_699,scaffold_700,scaffold_768,scaffold_804,scaffold_832,scaffold_834,scaffold_923,scaffold_939,scaffold_986 +refined_5 scaffold_1048,scaffold_1049,scaffold_1150,scaffold_126,scaffold_14,scaffold_232,scaffold_571,scaffold_66 +refined_6 scaffold_1667,scaffold_186,scaffold_259,scaffold_301,scaffold_32,scaffold_461,scaffold_466,scaffold_659,scaffold_708 +refined_7 scaffold_1140,scaffold_1172,scaffold_1303,scaffold_1510,scaffold_1614,scaffold_1649,scaffold_1814,scaffold_1852,scaffold_2104,scaffold_2153,scaffold_260,scaffold_273,scaffold_361,scaffold_369,scaffold_429,scaffold_589,scaffold_906,scaffold_987,scaffold_988
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_sources_and_length.tabular Fri Feb 18 13:12:43 2022 +0000 @@ -0,0 +1,8 @@ +Refined_bin Size(Kbp) Source +refined_1 1267.23 Concoct_3_fa_gz.fasta.gz +refined_2 904.03 MetaBAT_17_fa_gz.fasta.gz +refined_3 848.68 MetaBAT_18_fa_gz.fasta.gz +refined_4 728.02 MetaBAT_20_fa_gz.fasta.gz +refined_5 693.6 MetaBAT_21_fa_gz.fasta.gz +refined_6 612.08 MetaBAT_22_fa_gz.fasta.gz +refined_7 538.59 MetaBAT_23_fa_gz.fasta.gz