comparison metawrapmg_binning.xml @ 0:024ea3c4c29f draft

planemo upload for repository https://github.com/usegalaxy-au/tools-au/tree/master/tools/metawrapmg commit e8f404630d1b01ef5f110369f0cc6eac03d2d2d7
author galaxy-australia
date Mon, 30 Jan 2023 22:28:33 +0000
parents
children 2a8bc1d26d06
comparison
equal deleted inserted replaced
-1:000000000000 0:024ea3c4c29f
1 <tool id="metawrapmg_binning" name="MetaWRAP" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT">
2 <description>metagenome binning pipeline</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="xrefs"/>
7 <expand macro="requirements"/>
8 <command detect_errors="exit_code"><![CDATA[
9 ## set memory usage
10 if [ -n "\$GALAXY_MEMORY_MB" ] ; then
11 GALAXY_MEMORY_GB=\$((GALAXY_MEMORY_MB / 1024)) ;
12 fi ;
13
14 ##################
15 ## SET UP FILES ##
16 ##################
17
18 ## should always be FASTA
19 #set mg_fn = 'metagenome.' + str($metagenome.ext)
20 ln -s '$metagenome' $mg_fn
21 &&
22
23 ## Only FASTQ. Separate files for each sample. Metawrap checks for
24 ## files named _1.fastq and _2.fastq.
25 #set input1_fn = 'reads_1.fastq'
26 ln -s '$input_1' $input1_fn
27 &&
28
29 #set input2_fn = 'reads_2.fastq'
30 ln -s '$input_2' $input2_fn
31 &&
32
33 #####################
34 ## INITIAL BINNING ##
35 #####################
36
37 metawrap binning
38 --metabat2 --maxbin2 --concoct
39 -a '$mg_fn'
40 -m \${GALAXY_MEMORY_GB:-16}
41 -o INITIAL_BINNING
42 -t \${GALAXY_SLOTS:-4}
43 '$input1_fn'
44 '$input2_fn'
45 &&
46
47 ## Check which binning programs produced bins
48 bin_dirs=(INITIAL_BINNING/concoct_bins INITIAL_BINNING/maxbin2_bins INITIAL_BINNING/metabat2_bins) &&
49 switches=('-A' '-B' '-C') &&
50
51 i=0 &&
52 bin_string="" &&
53
54 for dir in "\${bin_dirs[@]}" ; do
55 if find "\${dir}" -mindepth 1 -maxdepth 1 | read; then
56 bin_string="\${bin_string} \${switches[\$i]} \${dir}" ;
57 i+=1 ;
58 fi
59 done &&
60
61 ####################
62 ## BIN REFINEMENT ##
63 ####################
64
65 ## The checkm database is included in the conda package.
66 ## Requires metawrap-mg_1.3.0--hdfd78af_1 or later. See
67 ## https://github.com/bioconda/bioconda-recipes/pull/38299.
68
69 metawrap bin_refinement
70 -t \${GALAXY_SLOTS:-4}
71 -m \${GALAXY_MEMORY_GB:-16}
72 -c $binning.c
73 -x $binning.x
74 -o BIN_REFINEMENT
75 ## Only run bin_refinement on bins with contigs
76 \${bin_string}
77 ]]></command>
78 <inputs>
79 <param name="metagenome" format="fasta" type="data" label="Metagenome" help="Metagenome co-assembly for binning" />
80 <param name="input_1" format="fastqsanger" type="data" label="Read 1" help="Original reads that were used for the assembly: read 1." />
81 <param name="input_2" format="fastqsanger" type="data" label="Read 2" help="Original reads that were used for the assembly: read 2." />
82 <section name="binning" title="Binning parameters" expanded="false">
83 <param argument='-c' type="integer" value="70" min="50" max="100" label="Percent completion" help="Minimum % completion of bins" />
84 <param argument='-x' type="integer" value="10" min="0" max="100" label="Percent contamination" help="Maximum % contamination of bins that is acceptable" />
85 </section>
86 </inputs>
87 <outputs>
88 <!-- contigs binned into fasta files -->
89 <collection name="metawrap_bins" type="list" label="MetaWRAP on ${on_string}: bins">
90 <discover_datasets pattern="metawrap_\d+_\d+_bins/(?P&lt;designation&gt;.+)\.fa" format="fasta" directory="BIN_REFINEMENT" recurse="true" match_relative_path="true" visible="false" />
91 </collection>
92 <!-- summary figures -->
93 <collection name="metawrap_figures" type="list" label="MetaWRAP on ${on_string}: summary figures">
94 <discover_datasets pattern="__designation_and_ext__" directory="BIN_REFINEMENT/figures" visible="false" />
95 </collection>
96 <!-- statistics on binning -->
97 <collection name="metawrap_stats" type="list" label="MetaWRAP on ${on_string}: stat files">
98 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.stats" format="tabular" directory="BIN_REFINEMENT" visible="false" />
99 </collection>
100 <!-- which contig went into which bin -->
101 <collection name="metawrap_contigs" type="list" label="MetaWRAP on ${on_string}: contig assignments">
102 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.contigs" format="tabular" directory="BIN_REFINEMENT" visible="false" />
103 </collection>
104 </outputs>
105 <tests>
106 <!-- 01: basic function -->
107 <test>
108 <param name="metagenome" value="subset.fasta.gz"/>
109 <param name="input_1" value="mapped_reads.r1.fastq.gz"/>
110 <param name="input_2" value="mapped_reads.r2.fastq.gz"/>
111 <param name="c" value="60"/>
112 <param name="x" value="15"/>
113 <!-- this is the main output, but it's too large to test -->
114 <!-- <output_collection name="metawrap_bins" type="list">
115 <element name="bin.1" file="test02.fa" ftype="fasta"/>
116 </output_collection> -->
117 <output_collection name="metawrap_stats" type="list">
118 <element name="metawrap_60_15_bins" file="test02.stats" ftype="tabular"/>
119 </output_collection>
120 <output_collection name="metawrap_contigs" type="list">
121 <element name="metawrap_60_15_bins" file="test02.contigs" ftype="tabular"/>
122 </output_collection>
123 </test>
124 </tests>
125 <help><![CDATA[
126 MetaWRAP
127 --------
128
129 MetaWRAP aims to be an easy-to-use metagenomic wrapper suite that
130 accomplishes the core tasks of metagenomic analysis. Additionally,
131 metaWRAP takes bin extraction and analysis to the next level. metaWRAP
132 is meant to be a fast and simple approach before you delve deeper into
133 parameterization of your analysis. MetaWRAP can be applied to a variety
134 of environments, including gut, water, and soil microbiomes (see
135 metaWRAP paper for benchmarks).
136
137 MetaWRAP binning module
138 ~~~~~~~~~~~~~~~~~~~~~~~
139
140 The metaWRAP::Binning module is meant to be a convenient wrapper around
141 three metagenomic binning software: MaxBin2, metaBAT2, and CONCOCT.
142 First the metagenomic assembly is indexed with bwa-index, and then
143 paired end reads from any number of samples are aligned to it. The
144 alignments are sorted and compressed with samtools, and library insert
145 size statistics are also gathered at the same time (insert size average
146 and standard deviation). metaBAT2’s jgi_summarize_bam_contig_depths
147 function is used to generate contig adundance table, and it is then
148 converted into the correct format for each of the three binners to take
149 as input. After MaxBin2, metaBAT2, and CONCOCT finish binning the
150 contigs with default settings, the final bins folders are created with
151 formatted bin fasta files. CheckM’s lineage_wf function is used to
152 predict essential genes and estimate the completion and contamination of
153 each bin.
154
155 MetaWRAP bin refinement
156 ~~~~~~~~~~~~~~~~~~~~~~~
157
158 The metaWRAP::Bin_refinement module utilizes a hybrid approach to take
159 in two or three bin sets that were obtained with different software and
160 produces a consolidated, improved bin set. First, binning_refiner is
161 used to create hybridized bins from every possible combination of sets.
162 If there were three bin sets: A, B, and C, then the following hybrid
163 sets will be produced with binning_refiner: AB, BC, AC, and ABC. CheckM
164 is then run to evaluate the completion and contamination of the bins in
165 each of the 7 bin sets (3 originals, 4 hybridized). The bins sets are
166 then iteratively compared to each other, and each pair is consolidated
167 into an improved bin set. To do this, the same bin is identified within
168 the two bin sets based on a minimum of 80% overlap in genome length, and
169 the better bin is determined based on which bin has the higher score.
170 The scoring function is S=Completion-5*Contamination. After all bin sets
171 are incorporated into the consolidated bin collection, a de-replication
172 function removes any duplicate contigs. If a contig is present in more
173 than one bin, it is removed from all but the best bin (based on scoring
174 function). CheckM is then run on the final bin set and a final report
175 file is generated showing the completion, contamination, and other
176 statistics generated by CheckM for each bin. Completion and
177 contamination rank plots are also generated to evaluate the success of
178 the Bin_refinement module, and compare its output to the quality of the
179 original bins.
180
181 --------------
182
183 MetaWRAP’s home page is
184 `bxlab/metaWRAP <https://github.com/bxlab/metaWRAP>`__.
185
186 This tool was wrapped by the Galaxy Australia team.
187 ]]></help>
188 <expand macro="citations"/>
189 </tool>