Mercurial > repos > galaxy-australia > metawrapmg_binning
diff metawrapmg_binning.xml @ 0:024ea3c4c29f draft
planemo upload for repository https://github.com/usegalaxy-au/tools-au/tree/master/tools/metawrapmg commit e8f404630d1b01ef5f110369f0cc6eac03d2d2d7
author | galaxy-australia |
---|---|
date | Mon, 30 Jan 2023 22:28:33 +0000 |
parents | |
children | 2a8bc1d26d06 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/metawrapmg_binning.xml Mon Jan 30 22:28:33 2023 +0000 @@ -0,0 +1,189 @@ +<tool id="metawrapmg_binning" name="MetaWRAP" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT"> + <description>metagenome binning pipeline</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="xrefs"/> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ + ## set memory usage + if [ -n "\$GALAXY_MEMORY_MB" ] ; then + GALAXY_MEMORY_GB=\$((GALAXY_MEMORY_MB / 1024)) ; + fi ; + + ################## + ## SET UP FILES ## + ################## + + ## should always be FASTA + #set mg_fn = 'metagenome.' + str($metagenome.ext) + ln -s '$metagenome' $mg_fn + && + + ## Only FASTQ. Separate files for each sample. Metawrap checks for + ## files named _1.fastq and _2.fastq. + #set input1_fn = 'reads_1.fastq' + ln -s '$input_1' $input1_fn + && + + #set input2_fn = 'reads_2.fastq' + ln -s '$input_2' $input2_fn + && + + ##################### + ## INITIAL BINNING ## + ##################### + + metawrap binning + --metabat2 --maxbin2 --concoct + -a '$mg_fn' + -m \${GALAXY_MEMORY_GB:-16} + -o INITIAL_BINNING + -t \${GALAXY_SLOTS:-4} + '$input1_fn' + '$input2_fn' + && + + ## Check which binning programs produced bins + bin_dirs=(INITIAL_BINNING/concoct_bins INITIAL_BINNING/maxbin2_bins INITIAL_BINNING/metabat2_bins) && + switches=('-A' '-B' '-C') && + + i=0 && + bin_string="" && + + for dir in "\${bin_dirs[@]}" ; do + if find "\${dir}" -mindepth 1 -maxdepth 1 | read; then + bin_string="\${bin_string} \${switches[\$i]} \${dir}" ; + i+=1 ; + fi + done && + + #################### + ## BIN REFINEMENT ## + #################### + + ## The checkm database is included in the conda package. + ## Requires metawrap-mg_1.3.0--hdfd78af_1 or later. See + ## https://github.com/bioconda/bioconda-recipes/pull/38299. + + metawrap bin_refinement + -t \${GALAXY_SLOTS:-4} + -m \${GALAXY_MEMORY_GB:-16} + -c $binning.c + -x $binning.x + -o BIN_REFINEMENT + ## Only run bin_refinement on bins with contigs + \${bin_string} + ]]></command> + <inputs> + <param name="metagenome" format="fasta" type="data" label="Metagenome" help="Metagenome co-assembly for binning" /> + <param name="input_1" format="fastqsanger" type="data" label="Read 1" help="Original reads that were used for the assembly: read 1." /> + <param name="input_2" format="fastqsanger" type="data" label="Read 2" help="Original reads that were used for the assembly: read 2." /> + <section name="binning" title="Binning parameters" expanded="false"> + <param argument='-c' type="integer" value="70" min="50" max="100" label="Percent completion" help="Minimum % completion of bins" /> + <param argument='-x' type="integer" value="10" min="0" max="100" label="Percent contamination" help="Maximum % contamination of bins that is acceptable" /> + </section> + </inputs> + <outputs> + <!-- contigs binned into fasta files --> + <collection name="metawrap_bins" type="list" label="MetaWRAP on ${on_string}: bins"> + <discover_datasets pattern="metawrap_\d+_\d+_bins/(?P<designation>.+)\.fa" format="fasta" directory="BIN_REFINEMENT" recurse="true" match_relative_path="true" visible="false" /> + </collection> + <!-- summary figures --> + <collection name="metawrap_figures" type="list" label="MetaWRAP on ${on_string}: summary figures"> + <discover_datasets pattern="__designation_and_ext__" directory="BIN_REFINEMENT/figures" visible="false" /> + </collection> + <!-- statistics on binning --> + <collection name="metawrap_stats" type="list" label="MetaWRAP on ${on_string}: stat files"> + <discover_datasets pattern="(?P<designation>.+)\.stats" format="tabular" directory="BIN_REFINEMENT" visible="false" /> + </collection> + <!-- which contig went into which bin --> + <collection name="metawrap_contigs" type="list" label="MetaWRAP on ${on_string}: contig assignments"> + <discover_datasets pattern="(?P<designation>.+)\.contigs" format="tabular" directory="BIN_REFINEMENT" visible="false" /> + </collection> + </outputs> + <tests> + <!-- 01: basic function --> + <test> + <param name="metagenome" value="subset.fasta.gz"/> + <param name="input_1" value="mapped_reads.r1.fastq.gz"/> + <param name="input_2" value="mapped_reads.r2.fastq.gz"/> + <param name="c" value="60"/> + <param name="x" value="15"/> + <!-- this is the main output, but it's too large to test --> + <!-- <output_collection name="metawrap_bins" type="list"> + <element name="bin.1" file="test02.fa" ftype="fasta"/> + </output_collection> --> + <output_collection name="metawrap_stats" type="list"> + <element name="metawrap_60_15_bins" file="test02.stats" ftype="tabular"/> + </output_collection> + <output_collection name="metawrap_contigs" type="list"> + <element name="metawrap_60_15_bins" file="test02.contigs" ftype="tabular"/> + </output_collection> + </test> + </tests> + <help><![CDATA[ +MetaWRAP +-------- + +MetaWRAP aims to be an easy-to-use metagenomic wrapper suite that +accomplishes the core tasks of metagenomic analysis. Additionally, +metaWRAP takes bin extraction and analysis to the next level. metaWRAP +is meant to be a fast and simple approach before you delve deeper into +parameterization of your analysis. MetaWRAP can be applied to a variety +of environments, including gut, water, and soil microbiomes (see +metaWRAP paper for benchmarks). + +MetaWRAP binning module +~~~~~~~~~~~~~~~~~~~~~~~ + +The metaWRAP::Binning module is meant to be a convenient wrapper around +three metagenomic binning software: MaxBin2, metaBAT2, and CONCOCT. +First the metagenomic assembly is indexed with bwa-index, and then +paired end reads from any number of samples are aligned to it. The +alignments are sorted and compressed with samtools, and library insert +size statistics are also gathered at the same time (insert size average +and standard deviation). metaBAT2’s jgi_summarize_bam_contig_depths +function is used to generate contig adundance table, and it is then +converted into the correct format for each of the three binners to take +as input. After MaxBin2, metaBAT2, and CONCOCT finish binning the +contigs with default settings, the final bins folders are created with +formatted bin fasta files. CheckM’s lineage_wf function is used to +predict essential genes and estimate the completion and contamination of +each bin. + +MetaWRAP bin refinement +~~~~~~~~~~~~~~~~~~~~~~~ + +The metaWRAP::Bin_refinement module utilizes a hybrid approach to take +in two or three bin sets that were obtained with different software and +produces a consolidated, improved bin set. First, binning_refiner is +used to create hybridized bins from every possible combination of sets. +If there were three bin sets: A, B, and C, then the following hybrid +sets will be produced with binning_refiner: AB, BC, AC, and ABC. CheckM +is then run to evaluate the completion and contamination of the bins in +each of the 7 bin sets (3 originals, 4 hybridized). The bins sets are +then iteratively compared to each other, and each pair is consolidated +into an improved bin set. To do this, the same bin is identified within +the two bin sets based on a minimum of 80% overlap in genome length, and +the better bin is determined based on which bin has the higher score. +The scoring function is S=Completion-5*Contamination. After all bin sets +are incorporated into the consolidated bin collection, a de-replication +function removes any duplicate contigs. If a contig is present in more +than one bin, it is removed from all but the best bin (based on scoring +function). CheckM is then run on the final bin set and a final report +file is generated showing the completion, contamination, and other +statistics generated by CheckM for each bin. Completion and +contamination rank plots are also generated to evaluate the success of +the Bin_refinement module, and compare its output to the quality of the +original bins. + +-------------- + +MetaWRAP’s home page is +`bxlab/metaWRAP <https://github.com/bxlab/metaWRAP>`__. + +This tool was wrapped by the Galaxy Australia team. + ]]></help> + <expand macro="citations"/> +</tool>