Mercurial > repos > galaxy-australia > metawrapmg_binning
comparison metawrapmg_binning.xml @ 0:024ea3c4c29f draft
planemo upload for repository https://github.com/usegalaxy-au/tools-au/tree/master/tools/metawrapmg commit e8f404630d1b01ef5f110369f0cc6eac03d2d2d7
author | galaxy-australia |
---|---|
date | Mon, 30 Jan 2023 22:28:33 +0000 |
parents | |
children | 2a8bc1d26d06 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:024ea3c4c29f |
---|---|
1 <tool id="metawrapmg_binning" name="MetaWRAP" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT"> | |
2 <description>metagenome binning pipeline</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="xrefs"/> | |
7 <expand macro="requirements"/> | |
8 <command detect_errors="exit_code"><![CDATA[ | |
9 ## set memory usage | |
10 if [ -n "\$GALAXY_MEMORY_MB" ] ; then | |
11 GALAXY_MEMORY_GB=\$((GALAXY_MEMORY_MB / 1024)) ; | |
12 fi ; | |
13 | |
14 ################## | |
15 ## SET UP FILES ## | |
16 ################## | |
17 | |
18 ## should always be FASTA | |
19 #set mg_fn = 'metagenome.' + str($metagenome.ext) | |
20 ln -s '$metagenome' $mg_fn | |
21 && | |
22 | |
23 ## Only FASTQ. Separate files for each sample. Metawrap checks for | |
24 ## files named _1.fastq and _2.fastq. | |
25 #set input1_fn = 'reads_1.fastq' | |
26 ln -s '$input_1' $input1_fn | |
27 && | |
28 | |
29 #set input2_fn = 'reads_2.fastq' | |
30 ln -s '$input_2' $input2_fn | |
31 && | |
32 | |
33 ##################### | |
34 ## INITIAL BINNING ## | |
35 ##################### | |
36 | |
37 metawrap binning | |
38 --metabat2 --maxbin2 --concoct | |
39 -a '$mg_fn' | |
40 -m \${GALAXY_MEMORY_GB:-16} | |
41 -o INITIAL_BINNING | |
42 -t \${GALAXY_SLOTS:-4} | |
43 '$input1_fn' | |
44 '$input2_fn' | |
45 && | |
46 | |
47 ## Check which binning programs produced bins | |
48 bin_dirs=(INITIAL_BINNING/concoct_bins INITIAL_BINNING/maxbin2_bins INITIAL_BINNING/metabat2_bins) && | |
49 switches=('-A' '-B' '-C') && | |
50 | |
51 i=0 && | |
52 bin_string="" && | |
53 | |
54 for dir in "\${bin_dirs[@]}" ; do | |
55 if find "\${dir}" -mindepth 1 -maxdepth 1 | read; then | |
56 bin_string="\${bin_string} \${switches[\$i]} \${dir}" ; | |
57 i+=1 ; | |
58 fi | |
59 done && | |
60 | |
61 #################### | |
62 ## BIN REFINEMENT ## | |
63 #################### | |
64 | |
65 ## The checkm database is included in the conda package. | |
66 ## Requires metawrap-mg_1.3.0--hdfd78af_1 or later. See | |
67 ## https://github.com/bioconda/bioconda-recipes/pull/38299. | |
68 | |
69 metawrap bin_refinement | |
70 -t \${GALAXY_SLOTS:-4} | |
71 -m \${GALAXY_MEMORY_GB:-16} | |
72 -c $binning.c | |
73 -x $binning.x | |
74 -o BIN_REFINEMENT | |
75 ## Only run bin_refinement on bins with contigs | |
76 \${bin_string} | |
77 ]]></command> | |
78 <inputs> | |
79 <param name="metagenome" format="fasta" type="data" label="Metagenome" help="Metagenome co-assembly for binning" /> | |
80 <param name="input_1" format="fastqsanger" type="data" label="Read 1" help="Original reads that were used for the assembly: read 1." /> | |
81 <param name="input_2" format="fastqsanger" type="data" label="Read 2" help="Original reads that were used for the assembly: read 2." /> | |
82 <section name="binning" title="Binning parameters" expanded="false"> | |
83 <param argument='-c' type="integer" value="70" min="50" max="100" label="Percent completion" help="Minimum % completion of bins" /> | |
84 <param argument='-x' type="integer" value="10" min="0" max="100" label="Percent contamination" help="Maximum % contamination of bins that is acceptable" /> | |
85 </section> | |
86 </inputs> | |
87 <outputs> | |
88 <!-- contigs binned into fasta files --> | |
89 <collection name="metawrap_bins" type="list" label="MetaWRAP on ${on_string}: bins"> | |
90 <discover_datasets pattern="metawrap_\d+_\d+_bins/(?P<designation>.+)\.fa" format="fasta" directory="BIN_REFINEMENT" recurse="true" match_relative_path="true" visible="false" /> | |
91 </collection> | |
92 <!-- summary figures --> | |
93 <collection name="metawrap_figures" type="list" label="MetaWRAP on ${on_string}: summary figures"> | |
94 <discover_datasets pattern="__designation_and_ext__" directory="BIN_REFINEMENT/figures" visible="false" /> | |
95 </collection> | |
96 <!-- statistics on binning --> | |
97 <collection name="metawrap_stats" type="list" label="MetaWRAP on ${on_string}: stat files"> | |
98 <discover_datasets pattern="(?P<designation>.+)\.stats" format="tabular" directory="BIN_REFINEMENT" visible="false" /> | |
99 </collection> | |
100 <!-- which contig went into which bin --> | |
101 <collection name="metawrap_contigs" type="list" label="MetaWRAP on ${on_string}: contig assignments"> | |
102 <discover_datasets pattern="(?P<designation>.+)\.contigs" format="tabular" directory="BIN_REFINEMENT" visible="false" /> | |
103 </collection> | |
104 </outputs> | |
105 <tests> | |
106 <!-- 01: basic function --> | |
107 <test> | |
108 <param name="metagenome" value="subset.fasta.gz"/> | |
109 <param name="input_1" value="mapped_reads.r1.fastq.gz"/> | |
110 <param name="input_2" value="mapped_reads.r2.fastq.gz"/> | |
111 <param name="c" value="60"/> | |
112 <param name="x" value="15"/> | |
113 <!-- this is the main output, but it's too large to test --> | |
114 <!-- <output_collection name="metawrap_bins" type="list"> | |
115 <element name="bin.1" file="test02.fa" ftype="fasta"/> | |
116 </output_collection> --> | |
117 <output_collection name="metawrap_stats" type="list"> | |
118 <element name="metawrap_60_15_bins" file="test02.stats" ftype="tabular"/> | |
119 </output_collection> | |
120 <output_collection name="metawrap_contigs" type="list"> | |
121 <element name="metawrap_60_15_bins" file="test02.contigs" ftype="tabular"/> | |
122 </output_collection> | |
123 </test> | |
124 </tests> | |
125 <help><![CDATA[ | |
126 MetaWRAP | |
127 -------- | |
128 | |
129 MetaWRAP aims to be an easy-to-use metagenomic wrapper suite that | |
130 accomplishes the core tasks of metagenomic analysis. Additionally, | |
131 metaWRAP takes bin extraction and analysis to the next level. metaWRAP | |
132 is meant to be a fast and simple approach before you delve deeper into | |
133 parameterization of your analysis. MetaWRAP can be applied to a variety | |
134 of environments, including gut, water, and soil microbiomes (see | |
135 metaWRAP paper for benchmarks). | |
136 | |
137 MetaWRAP binning module | |
138 ~~~~~~~~~~~~~~~~~~~~~~~ | |
139 | |
140 The metaWRAP::Binning module is meant to be a convenient wrapper around | |
141 three metagenomic binning software: MaxBin2, metaBAT2, and CONCOCT. | |
142 First the metagenomic assembly is indexed with bwa-index, and then | |
143 paired end reads from any number of samples are aligned to it. The | |
144 alignments are sorted and compressed with samtools, and library insert | |
145 size statistics are also gathered at the same time (insert size average | |
146 and standard deviation). metaBAT2’s jgi_summarize_bam_contig_depths | |
147 function is used to generate contig adundance table, and it is then | |
148 converted into the correct format for each of the three binners to take | |
149 as input. After MaxBin2, metaBAT2, and CONCOCT finish binning the | |
150 contigs with default settings, the final bins folders are created with | |
151 formatted bin fasta files. CheckM’s lineage_wf function is used to | |
152 predict essential genes and estimate the completion and contamination of | |
153 each bin. | |
154 | |
155 MetaWRAP bin refinement | |
156 ~~~~~~~~~~~~~~~~~~~~~~~ | |
157 | |
158 The metaWRAP::Bin_refinement module utilizes a hybrid approach to take | |
159 in two or three bin sets that were obtained with different software and | |
160 produces a consolidated, improved bin set. First, binning_refiner is | |
161 used to create hybridized bins from every possible combination of sets. | |
162 If there were three bin sets: A, B, and C, then the following hybrid | |
163 sets will be produced with binning_refiner: AB, BC, AC, and ABC. CheckM | |
164 is then run to evaluate the completion and contamination of the bins in | |
165 each of the 7 bin sets (3 originals, 4 hybridized). The bins sets are | |
166 then iteratively compared to each other, and each pair is consolidated | |
167 into an improved bin set. To do this, the same bin is identified within | |
168 the two bin sets based on a minimum of 80% overlap in genome length, and | |
169 the better bin is determined based on which bin has the higher score. | |
170 The scoring function is S=Completion-5*Contamination. After all bin sets | |
171 are incorporated into the consolidated bin collection, a de-replication | |
172 function removes any duplicate contigs. If a contig is present in more | |
173 than one bin, it is removed from all but the best bin (based on scoring | |
174 function). CheckM is then run on the final bin set and a final report | |
175 file is generated showing the completion, contamination, and other | |
176 statistics generated by CheckM for each bin. Completion and | |
177 contamination rank plots are also generated to evaluate the success of | |
178 the Bin_refinement module, and compare its output to the quality of the | |
179 original bins. | |
180 | |
181 -------------- | |
182 | |
183 MetaWRAP’s home page is | |
184 `bxlab/metaWRAP <https://github.com/bxlab/metaWRAP>`__. | |
185 | |
186 This tool was wrapped by the Galaxy Australia team. | |
187 ]]></help> | |
188 <expand macro="citations"/> | |
189 </tool> |