diff metawrapmg_binning.xml @ 0:024ea3c4c29f draft

planemo upload for repository https://github.com/usegalaxy-au/tools-au/tree/master/tools/metawrapmg commit e8f404630d1b01ef5f110369f0cc6eac03d2d2d7
author galaxy-australia
date Mon, 30 Jan 2023 22:28:33 +0000
parents
children 2a8bc1d26d06
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metawrapmg_binning.xml	Mon Jan 30 22:28:33 2023 +0000
@@ -0,0 +1,189 @@
+<tool id="metawrapmg_binning" name="MetaWRAP" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT">
+    <description>metagenome binning pipeline</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="xrefs"/>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+            ## set memory usage
+            if [ -n "\$GALAXY_MEMORY_MB" ] ; then
+                GALAXY_MEMORY_GB=\$((GALAXY_MEMORY_MB / 1024)) ;
+            fi ;
+
+            ##################
+            ## SET UP FILES ##
+            ##################
+
+            ## should always be FASTA
+            #set mg_fn = 'metagenome.' + str($metagenome.ext)
+            ln -s '$metagenome' $mg_fn
+            &&
+
+            ## Only FASTQ. Separate files for each sample. Metawrap checks for
+            ## files named _1.fastq and _2.fastq.
+            #set input1_fn = 'reads_1.fastq'
+            ln -s '$input_1' $input1_fn
+            &&
+
+            #set input2_fn = 'reads_2.fastq'
+            ln -s '$input_2' $input2_fn
+            &&
+
+            #####################
+            ## INITIAL BINNING ##
+            #####################
+
+            metawrap binning 
+            --metabat2 --maxbin2 --concoct 
+            -a '$mg_fn'
+            -m \${GALAXY_MEMORY_GB:-16}
+            -o INITIAL_BINNING
+            -t \${GALAXY_SLOTS:-4}
+            '$input1_fn'
+            '$input2_fn'
+            &&
+
+            ## Check which binning programs produced bins            
+            bin_dirs=(INITIAL_BINNING/concoct_bins INITIAL_BINNING/maxbin2_bins INITIAL_BINNING/metabat2_bins) &&
+            switches=('-A' '-B' '-C') &&
+
+            i=0 &&
+            bin_string="" &&
+
+            for dir in "\${bin_dirs[@]}" ; do
+                if find "\${dir}" -mindepth 1 -maxdepth 1 | read; then
+                    bin_string="\${bin_string} \${switches[\$i]} \${dir}" ;
+                    i+=1 ;
+                fi
+            done &&
+
+            ####################
+            ## BIN REFINEMENT ##
+            ####################
+
+            ## The checkm database is included in the conda package.
+            ## Requires metawrap-mg_1.3.0--hdfd78af_1 or later. See
+            ## https://github.com/bioconda/bioconda-recipes/pull/38299.
+
+            metawrap bin_refinement
+            -t \${GALAXY_SLOTS:-4}
+            -m \${GALAXY_MEMORY_GB:-16}
+            -c $binning.c
+            -x $binning.x
+            -o BIN_REFINEMENT
+            ## Only run bin_refinement on bins with contigs
+            \${bin_string}
+    ]]></command>
+    <inputs>
+        <param name="metagenome" format="fasta" type="data" label="Metagenome" help="Metagenome co-assembly for binning" />
+        <param name="input_1" format="fastqsanger" type="data" label="Read 1" help="Original reads that were used for the assembly: read 1." />
+        <param name="input_2" format="fastqsanger" type="data" label="Read 2" help="Original reads that were used for the assembly: read 2." />
+        <section name="binning" title="Binning parameters" expanded="false">
+            <param argument='-c' type="integer" value="70" min="50" max="100" label="Percent completion" help="Minimum % completion of bins" />
+            <param argument='-x' type="integer" value="10" min="0" max="100" label="Percent contamination" help="Maximum % contamination of bins that is acceptable" />
+        </section>
+    </inputs>
+    <outputs>
+        <!-- contigs binned into fasta files -->
+        <collection name="metawrap_bins" type="list" label="MetaWRAP on ${on_string}: bins">
+            <discover_datasets pattern="metawrap_\d+_\d+_bins/(?P&lt;designation&gt;.+)\.fa" format="fasta" directory="BIN_REFINEMENT" recurse="true" match_relative_path="true" visible="false" />
+        </collection>
+        <!-- summary figures -->
+        <collection name="metawrap_figures" type="list" label="MetaWRAP on ${on_string}: summary figures">
+            <discover_datasets pattern="__designation_and_ext__" directory="BIN_REFINEMENT/figures" visible="false" />
+        </collection>
+        <!-- statistics on binning -->
+        <collection name="metawrap_stats" type="list" label="MetaWRAP on ${on_string}: stat files">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.stats" format="tabular" directory="BIN_REFINEMENT" visible="false" />
+        </collection>
+        <!-- which contig went into which bin -->
+        <collection name="metawrap_contigs" type="list" label="MetaWRAP on ${on_string}: contig assignments">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.contigs" format="tabular" directory="BIN_REFINEMENT" visible="false" />
+        </collection>
+    </outputs>
+    <tests>
+        <!-- 01: basic function -->
+        <test>
+            <param name="metagenome" value="subset.fasta.gz"/>
+            <param name="input_1" value="mapped_reads.r1.fastq.gz"/>
+            <param name="input_2" value="mapped_reads.r2.fastq.gz"/>
+            <param name="c" value="60"/>
+            <param name="x" value="15"/>
+            <!-- this is the main output, but it's too large to test -->
+            <!-- <output_collection name="metawrap_bins" type="list">
+                <element name="bin.1" file="test02.fa" ftype="fasta"/>
+            </output_collection> -->
+            <output_collection name="metawrap_stats" type="list">
+                <element name="metawrap_60_15_bins" file="test02.stats" ftype="tabular"/>
+            </output_collection>
+            <output_collection name="metawrap_contigs" type="list">
+                <element name="metawrap_60_15_bins" file="test02.contigs" ftype="tabular"/>
+            </output_collection>
+        </test>
+    </tests>
+        <help><![CDATA[
+MetaWRAP
+--------
+
+MetaWRAP aims to be an easy-to-use metagenomic wrapper suite that
+accomplishes the core tasks of metagenomic analysis. Additionally,
+metaWRAP takes bin extraction and analysis to the next level. metaWRAP
+is meant to be a fast and simple approach before you delve deeper into
+parameterization of your analysis. MetaWRAP can be applied to a variety
+of environments, including gut, water, and soil microbiomes (see
+metaWRAP paper for benchmarks).
+
+MetaWRAP binning module
+~~~~~~~~~~~~~~~~~~~~~~~
+
+The metaWRAP::Binning module is meant to be a convenient wrapper around
+three metagenomic binning software: MaxBin2, metaBAT2, and CONCOCT.
+First the metagenomic assembly is indexed with bwa-index, and then
+paired end reads from any number of samples are aligned to it. The
+alignments are sorted and compressed with samtools, and library insert
+size statistics are also gathered at the same time (insert size average
+and standard deviation). metaBAT2’s jgi_summarize_bam_contig_depths
+function is used to generate contig adundance table, and it is then
+converted into the correct format for each of the three binners to take
+as input. After MaxBin2, metaBAT2, and CONCOCT finish binning the
+contigs with default settings, the final bins folders are created with
+formatted bin fasta files. CheckM’s lineage_wf function is used to
+predict essential genes and estimate the completion and contamination of
+each bin.
+
+MetaWRAP bin refinement
+~~~~~~~~~~~~~~~~~~~~~~~
+
+The metaWRAP::Bin_refinement module utilizes a hybrid approach to take
+in two or three bin sets that were obtained with different software and
+produces a consolidated, improved bin set. First, binning_refiner is
+used to create hybridized bins from every possible combination of sets.
+If there were three bin sets: A, B, and C, then the following hybrid
+sets will be produced with binning_refiner: AB, BC, AC, and ABC. CheckM
+is then run to evaluate the completion and contamination of the bins in
+each of the 7 bin sets (3 originals, 4 hybridized). The bins sets are
+then iteratively compared to each other, and each pair is consolidated
+into an improved bin set. To do this, the same bin is identified within
+the two bin sets based on a minimum of 80% overlap in genome length, and
+the better bin is determined based on which bin has the higher score.
+The scoring function is S=Completion-5*Contamination. After all bin sets
+are incorporated into the consolidated bin collection, a de-replication
+function removes any duplicate contigs. If a contig is present in more
+than one bin, it is removed from all but the best bin (based on scoring
+function). CheckM is then run on the final bin set and a final report
+file is generated showing the completion, contamination, and other
+statistics generated by CheckM for each bin. Completion and
+contamination rank plots are also generated to evaluate the success of
+the Bin_refinement module, and compare its output to the quality of the
+original bins.
+
+--------------
+
+MetaWRAP’s home page is
+`bxlab/metaWRAP <https://github.com/bxlab/metaWRAP>`__.
+
+This tool was wrapped by the Galaxy Australia team.
+        ]]></help>
+    <expand macro="citations"/>
+</tool>