Mercurial > repos > mbernt > maxbin2
comparison maxbin2.xml @ 0:35aa0df55a62 draft
planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/tools/maxbin2 commit 8e118a4d24047e2c62912b962e854f789d6ff559-dirty
author | mbernt |
---|---|
date | Thu, 28 Jun 2018 08:49:29 -0400 |
parents | |
children | 864279a0d64b |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:35aa0df55a62 |
---|---|
1 <tool id="maxbin2" name="MaxBin2" version="2.2.4"> | |
2 <requirements> | |
3 <requirement type="package" version="2.2.4">maxbin2</requirement> | |
4 </requirements> | |
5 <version_command><![CDATA[run_MaxBin.pl -version | head -n 1]]></version_command> | |
6 <command detect_errors="exit_code"><![CDATA[ | |
7 ## generate read or abundance files | |
8 #if $intype_cond.intype_select == 'rds': | |
9 #for $r in $intype_cond.reads | |
10 #if $r | |
11 echo '$r' >> reads_list && | |
12 #end if | |
13 #end for | |
14 #else if $intype_cond.intype_select == 'abdc': | |
15 #for $a in $intype_cond.abund | |
16 #if $a | |
17 echo '$a' >> abund_list && | |
18 #end if | |
19 #end for | |
20 #end if | |
21 | |
22 ## in case of reassembly the IBDA out and err is appended | |
23 ## to differentiate this a header is added also befor the | |
24 ## MaxBin2 outputs | |
25 #if $intype_cond.intype_select == 'rds' and $intype_cond.reassembly != "" | |
26 echo "==== MaxBin2 stdout ====" && | |
27 echo "==== MaxBin2 stderr ====" 1>&2 && | |
28 #end if | |
29 | |
30 run_MaxBin.pl | |
31 -contig '$contig' | |
32 -out out | |
33 #if $intype_cond.intype_select == 'rds': | |
34 -reads_list reads_list | |
35 $intype_cond.reassembly | |
36 #else if $intype_cond.intype_select == 'abdc': | |
37 -abund_list abund_list | |
38 #end if | |
39 #if $adv_cond.adv_select == 'yes': | |
40 -min_contig_length $adv_cond.min_contig_length | |
41 -max_iteration $adv_cond.max_iteration | |
42 -prob_threshold $adv_cond.prob_threshold | |
43 $adv_cond.plotmarker | |
44 -markerset $adv_cond.markerset | |
45 #end if | |
46 -thread \${GALAXY_SLOTS:-1} | |
47 | |
48 && tar -xf out.marker_of_each_bin.tar.gz | |
49 | |
50 ## redirect the idba out and err file content to stdout and err | |
51 #if $intype_cond.intype_select == 'rds' and $intype_cond.reassembly != "" | |
52 && echo "==== IDBA stdout ====" | |
53 && cat out.idba.out | |
54 && echo "==== IDBA stderr ====" 1>&2 | |
55 && cat out.idba.err 1>&2 | |
56 #end if | |
57 ]]></command> | |
58 <inputs> | |
59 <param argument="-contig" type="data" format="fasta,fasta.gz" label="Contig file"/> | |
60 <conditional name="intype_cond"> | |
61 <param name="intype_select" type="select" label="Input type"> | |
62 <option value="rds" selected="true">Sequencing Reads</option> | |
63 <option value="abdc">Abundances</option> | |
64 </param> | |
65 <when value="rds"> | |
66 <param name="reads" argument="-read/-read2/..." type="data" format="fasta,fastq" multiple="true" label="Reads file(s)"/> | |
67 <param argument="--reassembly" type="boolean" truevalue="-reassembly" falsevalue="" checked="false" label="" help="Reassembly option is still highly experimental. To use this function, you need to feed MaxBin interleaved paired-end fastq or fasta file if you were to use this option." /> | |
68 </when> | |
69 <when value="abdc"> | |
70 <param name="abund" argument="-abund/-abund2/..." type="data" format="tabular" multiple="true" label="Abundance file(s)"/> | |
71 </when> | |
72 </conditional> | |
73 <conditional name="adv_cond"> | |
74 <param name="adv_select" type="select" label="Advanced options"> | |
75 <option value="yes">Yes</option> | |
76 <option value="no" selected="true">No</option> | |
77 </param> | |
78 <when value="no"/> | |
79 <when value="yes"> | |
80 <param argument="-min_contig_length" type="integer" min="0" value="1000" label="minimum contig length" /> | |
81 <param argument="-max_iteration" type="integer" min="0" value="50" label="Maximum Expectation-Maximization algorithm iteration number" /> | |
82 <param argument="-prob_threshold" type="float" min="0" max="1.0" value="0.9" label="Probability threshold for EM final classification" /> | |
83 <param name="plotmarker" type="boolean" truevalue="-plotmarker" falsevalue="" checked="false" label="Generate visualization of the marker gene presence numbers" /> | |
84 <param argument="-markerset" type="select" label="Marker gene set"> | |
85 <option value="107" selected="true">107 marker genes present in >95% of bacteria</option> | |
86 <option value="40">40 marker gene sets that are universal among bacteria and archaea</option> | |
87 </param> | |
88 </when> | |
89 </conditional> | |
90 </inputs> | |
91 <outputs> | |
92 <collection name="bins" type="list" label="${tool.name} on ${on_string} (bins)"> | |
93 <discover_datasets pattern="out.(?P<designation>[0-9]+).fasta" format="fasta" visible="false" /> | |
94 </collection> | |
95 <data name="summary" format="tabular" label="${tool.name} on ${on_string} (summary)" from_work_dir="out.summary"/> | |
96 <data name="log" format="txt" label="${tool.name} on ${on_string} (log)" from_work_dir="out.log"/> | |
97 <data name="abundout" format="tabular" label="${tool.name} on ${on_string} (abundances)" from_work_dir="out.abund1"> | |
98 <filter>intype_cond['intype_select']=='rds'</filter> | |
99 </data> | |
100 <data name="marker" format="tabular" label="${tool.name} on ${on_string} (marker gene presence)" from_work_dir="out.marker"/> | |
101 <data name="plot" format="pdf" label="${tool.name} on ${on_string} (marker gene presence plot)" from_work_dir="out.marker.pdf"> | |
102 <filter>adv_cond['adv_select']=='yes' and adv_cond['plotmarker']</filter> | |
103 </data> | |
104 <data name="noclass" format="fasta" label="${tool.name} on ${on_string} (unclassified sequences)" from_work_dir="out.noclass"/> | |
105 <data name="toshort" format="fasta" label="${tool.name} on ${on_string} (to short sequences)" from_work_dir="out.tooshort"/> | |
106 <collection name="markers" type="list" label="${tool.name} on ${on_string} (markers prediced for bins)"> | |
107 <discover_datasets pattern="out.(?P<designation>[0-9]+).marker.fasta" format="fasta" visible="false" /> | |
108 </collection> | |
109 <!-- additional output in case of reassembly --> | |
110 <collection name="reassembly_bins" type="list" label="${tool.name} on ${on_string} (reassembly bins)"> | |
111 <discover_datasets directory="out.reassem" pattern="out.(?P<designation>[0-9]+).fasta" format="fasta" visible="false" /> | |
112 <filter>intype_cond['intype_select']=='rds' and intype_cond['reassembly']!=""</filter> | |
113 </collection> | |
114 <collection name="reassembly_reads" type="list" label="${tool.name} on ${on_string} (reassembly reads)"> | |
115 <discover_datasets directory="out.reassem" pattern="out.reads.(?P<designation>[0-9]+)" format="fasta" visible="false" /> | |
116 <filter>intype_cond['intype_select']=='rds' and intype_cond['reassembly']!=""</filter> | |
117 </collection> | |
118 <data name="reassembly_noclass" format="fasta" label="${tool.name} on ${on_string} (reassembly unclassified sequences)" from_work_dir="out.reassem/out.reads.noclass"> | |
119 <filter>intype_cond['intype_select']=='rds' and intype_cond['reassembly']!=""</filter> | |
120 </data> | |
121 <data name="reassembly_n50" format="text" label="${tool.name} on ${on_string} (reassembly N50)" from_work_dir="out.reassem/N50.txt"> | |
122 <filter>intype_cond['intype_select']=='rds' and intype_cond['reassembly']!=""</filter> | |
123 </data> | |
124 </outputs> | |
125 <tests> | |
126 <test><!-- test w contigs and reads as input --> | |
127 <param name="contig" value="Bin_Sample3_contigs.fasta" ftype="fasta" /> | |
128 <conditional name="intype_cond"> | |
129 <param name="intype_select" value="rds"/> | |
130 <param name="reads" value="interleavedPE_unmapped_Sample3_total.fasta" ftype="fasta"/> | |
131 </conditional> | |
132 <conditional name="adv_cond"> | |
133 <param name="adv_select" value="no"/> | |
134 </conditional> | |
135 <output_collection name="bins" type="list" count="2"> | |
136 <element name="001" file="1/out.001.fasta" ftype="fasta"/> | |
137 <element name="002" file="1/out.002.fasta" ftype="fasta"/> | |
138 </output_collection> | |
139 <output name="summary" file="1/out.summary" ftype="tabular" /> | |
140 <output name="log" file="1/out.log" ftype="txt" compare="diff" lines_diff="17" /> | |
141 <output name="abundout" file="1/out.abund1" ftype="tabular" /> | |
142 <output name="marker" file="1/out.marker" ftype="tabular" /> | |
143 <output name="noclass" file="1/out.noclass" ftype="fasta" /> | |
144 <output name="toshort" file="1/out.tooshort" ftype="fasta" /> | |
145 <output_collection name="markers" type="list" count="1"> | |
146 <element name="001" file="1/out.001.marker.fasta" ftype="fasta"/> | |
147 </output_collection> | |
148 </test> | |
149 <test><!--test w contigs and abundances as input + advanced options --> | |
150 <param name="contig" value="Bin_Sample3_contigs.fasta" ftype="fasta" /> | |
151 <conditional name="intype_cond"> | |
152 <param name="intype_select" value="abdc"/> | |
153 <param name="abund" value="abundances.tsv" ftype="tabular"/> | |
154 </conditional> | |
155 <conditional name="adv_cond"> | |
156 <param name="adv_select" value="yes"/> | |
157 <param name="min_contig_length" value="500"/> | |
158 <param name="max_iteration" value="10"/> | |
159 <param name="prob_threshold" value="0.95"/> | |
160 <param name="plotmarker" value="-plotmarker"/> | |
161 <param name="markerset" value="107"/> | |
162 </conditional> | |
163 <output_collection name="bins" type="list" count="2"> | |
164 <element name="001" file="2/out.001.fasta" ftype="fasta"/> | |
165 <element name="002" file="2/out.002.fasta" ftype="fasta"/> | |
166 </output_collection> | |
167 <output name="summary" file="2/out.summary" ftype="tabular" /> | |
168 <output name="log" file="2/out.log" ftype="txt" compare="diff" lines_diff="17" /> | |
169 <output name="marker" file="2/out.marker" ftype="tabular" /> | |
170 <output name="plot" file="2/out.marker.pdf" ftype="pdf" compare="sim_size" /> | |
171 <output name="noclass" file="2/out.noclass" ftype="fasta" /> | |
172 <output name="toshort" file="2/out.tooshort" ftype="fasta" /> | |
173 <output_collection name="markers" type="list" count="1"> | |
174 <element name="001" file="2/out.001.marker.fasta" ftype="fasta"/> | |
175 </output_collection> | |
176 </test> | |
177 <test><!-- test w contigs and reads as input + reassembly--> | |
178 <param name="contig" value="Bin_Sample3_contigs.fasta" ftype="fasta" /> | |
179 <conditional name="intype_cond"> | |
180 <param name="intype_select" value="rds"/> | |
181 <param name="reads" value="interleavedPE_unmapped_Sample3_total.fasta" ftype="fasta"/> | |
182 <param name="reassembly" value="-reassembly"/> | |
183 </conditional> | |
184 <conditional name="adv_cond"> | |
185 <param name="adv_select" value="no"/> | |
186 </conditional> | |
187 <output_collection name="bins" type="list" count="2"> | |
188 <element name="001" file="3/out.001.fasta" ftype="fasta"/> | |
189 <element name="002" file="3/out.002.fasta" ftype="fasta"/> | |
190 </output_collection> | |
191 <output name="summary" file="3/out.summary" ftype="tabular" /> | |
192 <output name="log" file="3/out.log" ftype="txt" compare="diff" lines_diff="17" /> | |
193 <output name="abundout" file="3/out.abund1" ftype="tabular" /> | |
194 <output name="marker" file="3/out.marker" ftype="tabular" /> | |
195 <output name="noclass" file="3/out.noclass" ftype="fasta" /> | |
196 <output name="toshort" file="3/out.tooshort" ftype="fasta" /> | |
197 <output_collection name="markers" type="list" count="1"> | |
198 <element name="001" file="3/out.001.marker.fasta" ftype="fasta"/> | |
199 </output_collection> | |
200 <output_collection name="reassembly_bins" type="list" count="2"> | |
201 <element name="001" file="3/out.reassem/out.001.fasta" ftype="fasta"/> | |
202 <element name="002" file="3/out.reassem/out.002.fasta" ftype="fasta"/> | |
203 </output_collection> | |
204 <output_collection name="reassembly_reads" type="list" count="2"> | |
205 <element name="001" file="3/out.reassem/out.reads.001" ftype="fasta"/> | |
206 <element name="002" file="3/out.reassem/out.reads.002" ftype="fasta"/> | |
207 </output_collection> | |
208 <output name="reassembly_noclass" file="3/out.reassem/out.reads.noclass" ftype="fasta" /> | |
209 <output name="reassembly_n50" file="3/out.reassem/N50.txt" ftype="text" /> | |
210 </test> | |
211 </tests> | |
212 <help><![CDATA[ | |
213 MaxBin is a software that clusters metagenomic contigs into different bins, | |
214 each consists (hopefully) of contigs from one species. MaxBin uses the | |
215 nucleotide composition information and contig abundance information to do | |
216 achieve binning through an Expectation-Maximization algorithm. | |
217 | |
218 | |
219 **Input**: | |
220 | |
221 MaxBin need the contigs and contig abundance information. The contig abundance | |
222 information can be provided in two ways: the user can choose to provide | |
223 | |
224 - the abundance file or | |
225 - the sequencing reads in fasta format (and MaxBin will use Bowtie2 to map the | |
226 sequencing reads against the contigs and generate the abundance information) | |
227 | |
228 The abundance information can be provided as tabular file: | |
229 | |
230 For example, assume I have three contigs named A0001, A0002, and A0003, then my abundance file will look like | |
231 | |
232 A0001 30.89 | |
233 A0002 20.02 | |
234 A0003 78.93 | |
235 | |
236 Reads/Abundundance files can be given in multiple files. | |
237 | |
238 By default MaxBin will look for 107 marker genes present in >95% of bacteria. | |
239 Alternatively you can also choose 40 marker gene sets that are universal among | |
240 bacteria and archaea (Wu et al., PLoS ONE 2013). This option may be better | |
241 suited for environment dominated by archaea; however it tend to split genomes | |
242 into more bins. You can choose between different marker gene sets and see which | |
243 one works better. | |
244 | |
245 **Outputs** | |
246 | |
247 - bins: binned sequences | |
248 - summary: a summary file describing which contigs are being classified into which bin. | |
249 - log: a log file recording the core steps of MaxBin algorithm | |
250 - abundances (only if reads are used as input): a summary file describing which contigs are being classified into which bin | |
251 - marker: marker gene presence numbers for each bin. This table is ready to be plotted by R or other 3rd-party software. | |
252 - marker plot (anly present if selected in the advanced options): visualization of the marker gene presence numbers using R. Will only appear if -plotmarker is specified. | |
253 - unclassified sequences: this file stores all sequences that pass the minimum length threshold but are not classified successfully. | |
254 - to short sequences: this file stores all sequences that do not meet the minimum length threshold. | |
255 - markers prediced for bins: these data sets store all markers predicted from the individual bins. | |
256 | |
257 **Reassembly** | |
258 | |
259 This is an experimental feature of MaxBin. It calls for each read bin IDBA_UD with the pre_correction parameter. Of course this IDBA_UD call can be done also with the corresponding Galaxy tool | |
260 | |
261 ]]></help> | |
262 <citations> | |
263 <citation type="doi">10.1093/bioinformatics/btv638</citation> | |
264 <citation type="bibtex"> | |
265 @misc{renameTODO, | |
266 author = {LastTODO, FirstTODO}, | |
267 year = {TODO}, | |
268 title = {TODO}, | |
269 url = {https://downloads.jbei.org/data/microbial_communities/MaxBin/MaxBin.html}, | |
270 }</citation> | |
271 </citations> | |
272 </tool> |