comparison maxbin2.xml @ 2:6a638de7915c draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/maxbin2/ commit 08beeabf7a8e91d0e95f5cdb71249dd6efe2bd46"
author iuc
date Tue, 29 Oct 2019 08:45:20 -0400
parents 864279a0d64b
children cfd50144a871
comparison
equal deleted inserted replaced
1:864279a0d64b 2:6a638de7915c
1 <tool id="maxbin2" name="MaxBin2" version="2.2.4_galaxy1"> 1 <tool id="maxbin2" name="MaxBin2" version="@MAXBIN_VERSION@+galaxy1">
2 <description>clusters metagenomic contigs into bins</description>
3 <macros>
4 <token name="@MAXBIN_VERSION@">2.2.7</token>
5 </macros>
2 <requirements> 6 <requirements>
3 <requirement type="package" version="2.2.4">maxbin2</requirement> 7 <requirement type="package" version="@MAXBIN_VERSION@">maxbin2</requirement>
4 </requirements> 8 </requirements>
5 <version_command><![CDATA[run_MaxBin.pl -version | head -n 1]]></version_command> 9 <version_command><![CDATA[run_MaxBin.pl -version | head -n 1]]></version_command>
6 <command detect_errors="exit_code"><![CDATA[ 10 <command detect_errors="exit_code"><![CDATA[
7 ## generate read or abundance files 11 ## generate read or abundance files
8 #if $intype_cond.intype_select == 'rds': 12 #if $intype_cond.intype_select == 'rds':
45 #end if 49 #end if
46 -thread \${GALAXY_SLOTS:-1} 50 -thread \${GALAXY_SLOTS:-1}
47 51
48 && tar -xf out.marker_of_each_bin.tar.gz 52 && tar -xf out.marker_of_each_bin.tar.gz
49 53
50 ## redirect the idba out and err file content to stdout and err 54 ## redirect the idba out and err file content to stdout and err
55 ## since this is also wanted in case the error case ';' is used here to
56 ## separate commands
51 #if $intype_cond.intype_select == 'rds' and $intype_cond.reassembly != "" 57 #if $intype_cond.intype_select == 'rds' and $intype_cond.reassembly != ""
52 && echo "==== IDBA stdout ====" 58 ; echo "==== IDBA stdout ===="
53 && cat out.idba.out 59 && cat out.idba.out
54 && echo "==== IDBA stderr ====" 1>&2 60 && echo "==== IDBA stderr ====" 1>&2
55 && cat out.idba.err 1>&2 61 && cat out.idba.err 1>&2
56 #end if 62 #end if
57 ]]></command> 63 ]]></command>
61 <param name="intype_select" type="select" label="Input type"> 67 <param name="intype_select" type="select" label="Input type">
62 <option value="rds" selected="true">Sequencing Reads</option> 68 <option value="rds" selected="true">Sequencing Reads</option>
63 <option value="abdc">Abundances</option> 69 <option value="abdc">Abundances</option>
64 </param> 70 </param>
65 <when value="rds"> 71 <when value="rds">
66 <param name="reads" argument="-read/-read2/..." type="data" format="fasta,fastq" multiple="true" label="Reads file(s)"/> 72 <param name="reads" type="data" format="fasta,fastq" multiple="true" label="Reads file(s)" help="(-read/-read2/...)"/>
67 <param argument="--reassembly" type="boolean" truevalue="-reassembly" falsevalue="" checked="false" label="" help="Reassembly option is still highly experimental. To use this function, you need to feed MaxBin interleaved paired-end fastq or fasta file." /> 73 <param name="output_abundances" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output abundances" help="" />
74 <param argument="--reassembly" type="boolean" truevalue="-reassembly" falsevalue="" checked="false" label="Reassembly" help="Reassembly option is still highly experimental. To use this function, you need to feed MaxBin interleaved paired-end fastq or fasta file." />
68 </when> 75 </when>
69 <when value="abdc"> 76 <when value="abdc">
70 <param name="abund" argument="-abund/-abund2/..." type="data" format="tabular" multiple="true" label="Abundance file(s)"/> 77 <param name="abund" type="data" format="tabular" multiple="true" label="Abundance file(s)" help="(-abund/-abund2/...)" />
71 </when> 78 </when>
72 </conditional> 79 </conditional>
73 <conditional name="adv_cond"> 80 <conditional name="adv_cond">
74 <param name="adv_select" type="select" label="Advanced options"> 81 <param name="adv_select" type="select" label="Advanced options">
75 <option value="yes">Yes</option> 82 <option value="yes">Yes</option>
77 </param> 84 </param>
78 <when value="no"/> 85 <when value="no"/>
79 <when value="yes"> 86 <when value="yes">
80 <param argument="-min_contig_length" type="integer" min="0" value="1000" label="minimum contig length" /> 87 <param argument="-min_contig_length" type="integer" min="0" value="1000" label="minimum contig length" />
81 <param argument="-max_iteration" type="integer" min="0" value="50" label="Maximum Expectation-Maximization algorithm iteration number" /> 88 <param argument="-max_iteration" type="integer" min="0" value="50" label="Maximum Expectation-Maximization algorithm iteration number" />
82 <param argument="-prob_threshold" type="float" min="0" max="1.0" value="0.9" label="Probability threshold for EM final classification" /> 89 <param argument="-prob_threshold" type="float" min="0" max="1.0" value="0.5" label="Probability threshold for EM final classification" />
83 <param name="plotmarker" type="boolean" truevalue="-plotmarker" falsevalue="" checked="false" label="Generate visualization of the marker gene presence numbers" /> 90 <param argument="-plotmarker" type="boolean" truevalue="-plotmarker" falsevalue="" checked="false" label="Generate visualization of the marker gene presence numbers" />
91 <param name="output_marker" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output marker gene presence for bins table" />
92 <param name="output_markers" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output marker genes for each bin as fasta" />
93 <param name="output_log" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output log" />
84 <param argument="-markerset" type="select" label="Marker gene set"> 94 <param argument="-markerset" type="select" label="Marker gene set">
85 <option value="107" selected="true">107 marker genes present in >95% of bacteria</option> 95 <option value="107" selected="true">107 marker genes present in >95% of bacteria</option>
86 <option value="40">40 marker gene sets that are universal among bacteria and archaea</option> 96 <option value="40">40 marker gene sets that are universal among bacteria and archaea</option>
87 </param> 97 </param>
88 </when> 98 </when>
89 </conditional> 99 </conditional>
90 </inputs> 100 </inputs>
91 <outputs> 101 <outputs>
102 <!-- default outputs -->
92 <collection name="bins" type="list" label="${tool.name} on ${on_string} (bins)"> 103 <collection name="bins" type="list" label="${tool.name} on ${on_string} (bins)">
93 <discover_datasets pattern="out.(?P&lt;designation&gt;[0-9]+).fasta" format="fasta" visible="false" /> 104 <discover_datasets pattern="out.(?P&lt;designation&gt;[0-9]+).fasta" format="fasta" visible="false" />
94 </collection> 105 </collection>
106 <data name="noclass" format="fasta" label="${tool.name} on ${on_string} (unclassified sequences)" from_work_dir="out.noclass"/>
107 <data name="toshort" format="fasta" label="${tool.name} on ${on_string} (to short sequences)" from_work_dir="out.tooshort"/>
95 <data name="summary" format="tabular" label="${tool.name} on ${on_string} (summary)" from_work_dir="out.summary"/> 108 <data name="summary" format="tabular" label="${tool.name} on ${on_string} (summary)" from_work_dir="out.summary"/>
96 <data name="log" format="txt" label="${tool.name} on ${on_string} (log)" from_work_dir="out.log"/> 109
110 <!-- optional outputs -->
111 <data name="log" format="txt" label="${tool.name} on ${on_string} (log)" from_work_dir="out.log">
112 <filter>adv_cond['adv_select']=='yes' and adv_cond['output_log']</filter>
113 </data>
114 <data name="marker" format="tabular" label="${tool.name} on ${on_string} (marker gene presence)" from_work_dir="out.marker">
115 <filter>adv_cond['adv_select']=='yes' and adv_cond['output_marker']</filter>
116 </data>
97 <data name="abundout" format="tabular" label="${tool.name} on ${on_string} (abundances)" from_work_dir="out.abund1"> 117 <data name="abundout" format="tabular" label="${tool.name} on ${on_string} (abundances)" from_work_dir="out.abund1">
98 <filter>intype_cond['intype_select']=='rds'</filter> 118 <filter>intype_cond['intype_select']=='rds' and intype_cond['output_abundances']</filter>
99 </data> 119 </data>
100 <data name="marker" format="tabular" label="${tool.name} on ${on_string} (marker gene presence)" from_work_dir="out.marker"/>
101 <data name="plot" format="pdf" label="${tool.name} on ${on_string} (marker gene presence plot)" from_work_dir="out.marker.pdf"> 120 <data name="plot" format="pdf" label="${tool.name} on ${on_string} (marker gene presence plot)" from_work_dir="out.marker.pdf">
102 <filter>adv_cond['adv_select']=='yes' and adv_cond['plotmarker']</filter> 121 <filter>adv_cond['adv_select']=='yes' and adv_cond['plotmarker']</filter>
103 </data> 122 </data>
104 <data name="noclass" format="fasta" label="${tool.name} on ${on_string} (unclassified sequences)" from_work_dir="out.noclass"/>
105 <data name="toshort" format="fasta" label="${tool.name} on ${on_string} (to short sequences)" from_work_dir="out.tooshort"/>
106 <collection name="markers" type="list" label="${tool.name} on ${on_string} (markers prediced for bins)"> 123 <collection name="markers" type="list" label="${tool.name} on ${on_string} (markers prediced for bins)">
107 <discover_datasets pattern="out.(?P&lt;designation&gt;[0-9]+).marker.fasta" format="fasta" visible="false" /> 124 <discover_datasets pattern="out.(?P&lt;designation&gt;[0-9]+).marker.fasta" format="fasta" visible="false" />
125 <filter>adv_cond['adv_select']=='yes' and adv_cond['output_markers']</filter>
108 </collection> 126 </collection>
127
109 <!-- additional output in case of reassembly --> 128 <!-- additional output in case of reassembly -->
110 <collection name="reassembly_bins" type="list" label="${tool.name} on ${on_string} (reassembly bins)"> 129 <collection name="reassembly_bins" type="list" label="${tool.name} on ${on_string} (reassembly bins)">
111 <discover_datasets directory="out.reassem" pattern="out.(?P&lt;designation&gt;[0-9]+).fasta" format="fasta" visible="false" /> 130 <discover_datasets directory="out.reassem" pattern="out.(?P&lt;designation&gt;[0-9]+).fasta" format="fasta" visible="false" />
112 <filter>intype_cond['intype_select']=='rds' and intype_cond['reassembly']</filter> 131 <filter>intype_cond['intype_select']=='rds' and intype_cond['reassembly']</filter>
113 </collection> 132 </collection>
114 <collection name="reassembly_reads" type="list" label="${tool.name} on ${on_string} (reassembly reads)"> 133 <collection name="reassembly_reads" type="list" label="${tool.name} on ${on_string} (reassembly reads)">
115 <discover_datasets directory="out.reassem" pattern="out.reads.(?P&lt;designation&gt;[0-9]+)" format="fasta" visible="false" /> 134 <discover_datasets directory="out.reassem" pattern="out.reads.(?P&lt;designation&gt;[0-9]+)" format="fasta" visible="false" />
121 <data name="reassembly_n50" format="txt" label="${tool.name} on ${on_string} (reassembly N50)" from_work_dir="out.reassem/N50.txt"> 140 <data name="reassembly_n50" format="txt" label="${tool.name} on ${on_string} (reassembly N50)" from_work_dir="out.reassem/N50.txt">
122 <filter>intype_cond['intype_select']=='rds' and intype_cond['reassembly']</filter> 141 <filter>intype_cond['intype_select']=='rds' and intype_cond['reassembly']</filter>
123 </data> 142 </data>
124 </outputs> 143 </outputs>
125 <tests> 144 <tests>
126 <test><!-- test w contigs and reads as input --> 145 <test expect_num_outputs="4"><!-- test w contigs and reads as input -->
127 <param name="contig" value="Bin_Sample3_contigs.fasta" ftype="fasta" /> 146 <param name="contig" value="Bin_Sample3_contigs.fasta" ftype="fasta" />
128 <conditional name="intype_cond"> 147 <conditional name="intype_cond">
129 <param name="intype_select" value="rds"/> 148 <param name="intype_select" value="rds"/>
130 <param name="reads" value="interleavedPE_unmapped_Sample3_total.fasta" ftype="fasta"/> 149 <param name="reads" value="interleavedPE_unmapped_Sample3_small.fasta" ftype="fasta"/>
131 </conditional> 150 </conditional>
132 <conditional name="adv_cond"> 151 <conditional name="adv_cond">
133 <param name="adv_select" value="no"/> 152 <param name="adv_select" value="no"/>
134 </conditional> 153 </conditional>
135 <output_collection name="bins" type="list" count="2"> 154 <output_collection name="bins" type="list" count="2">
136 <element name="001" file="1/out.001.fasta" ftype="fasta"/> 155 <element name="001" file="1/out.001.fasta" ftype="fasta"/>
137 <element name="002" file="1/out.002.fasta" ftype="fasta"/> 156 <element name="002" file="1/out.002.fasta" ftype="fasta"/>
138 </output_collection> 157 </output_collection>
139 <output name="summary" file="1/out.summary" ftype="tabular" /> 158 <output name="summary" file="1/out.summary" ftype="tabular" />
140 <output name="log" file="1/out.log" ftype="txt" compare="diff" lines_diff="17" /> 159 <output name="noclass" file="1/out.noclass" ftype="fasta" />
160 <output name="toshort" file="1/out.tooshort" ftype="fasta" />
161 </test>
162 <!-- test w contigs and reads as input test for optional outputs -->
163 <test expect_num_outputs="9">
164 <param name="contig" value="Bin_Sample3_contigs.fasta" ftype="fasta" />
165 <conditional name="intype_cond">
166 <param name="intype_select" value="rds"/>
167 <param name="reads" value="interleavedPE_unmapped_Sample3_small.fasta" ftype="fasta"/>
168 <param name="output_abundances" value="true" />
169 </conditional>
170 <conditional name="adv_cond">
171 <param name="adv_select" value="yes"/>
172 <param name="plotmarker" value="true" />
173 <param name="output_marker" value="true" />
174 <param name="output_markers" value="true" />
175 <param name="output_log" value="true" />
176 </conditional>
177 <output_collection name="bins" type="list" count="2">
178 <element name="001" file="1/out.001.fasta" ftype="fasta"/>
179 <element name="002" file="1/out.002.fasta" ftype="fasta"/>
180 </output_collection>
181 <output name="summary" file="1/out.summary" ftype="tabular" />
182 <output name="noclass" file="1/out.noclass" ftype="fasta" />
183 <output name="toshort" file="1/out.tooshort" ftype="fasta" />
184 <output name="log" file="1/out.log" ftype="txt" compare="diff" lines_diff="21" />
141 <output name="abundout" file="1/out.abund1" ftype="tabular" /> 185 <output name="abundout" file="1/out.abund1" ftype="tabular" />
142 <output name="marker" file="1/out.marker" ftype="tabular" /> 186 <output name="marker" file="1/out.marker" ftype="tabular" />
143 <output name="noclass" file="1/out.noclass" ftype="fasta" /> 187 <output name="plot" file="1/out.marker.pdf" ftype="pdf" compare="sim_size" />
144 <output name="toshort" file="1/out.tooshort" ftype="fasta" />
145 <output_collection name="markers" type="list" count="1"> 188 <output_collection name="markers" type="list" count="1">
146 <element name="001" file="1/out.001.marker.fasta" ftype="fasta"/> 189 <element name="001" file="1/out.001.marker.fasta" ftype="fasta"/>
147 </output_collection> 190 </output_collection>
148 </test> 191 </test>
149 <test><!--test w contigs and abundances as input + advanced options --> 192 <!--test w contigs and abundances as input + advanced options -->
193 <test expect_num_outputs="5">
150 <param name="contig" value="Bin_Sample3_contigs.fasta" ftype="fasta" /> 194 <param name="contig" value="Bin_Sample3_contigs.fasta" ftype="fasta" />
151 <conditional name="intype_cond"> 195 <conditional name="intype_cond">
152 <param name="intype_select" value="abdc"/> 196 <param name="intype_select" value="abdc"/>
153 <param name="abund" value="abundances.tsv" ftype="tabular"/> 197 <param name="abund" value="abundances.tsv" ftype="tabular"/>
154 </conditional> 198 </conditional>
163 <output_collection name="bins" type="list" count="2"> 207 <output_collection name="bins" type="list" count="2">
164 <element name="001" file="2/out.001.fasta" ftype="fasta"/> 208 <element name="001" file="2/out.001.fasta" ftype="fasta"/>
165 <element name="002" file="2/out.002.fasta" ftype="fasta"/> 209 <element name="002" file="2/out.002.fasta" ftype="fasta"/>
166 </output_collection> 210 </output_collection>
167 <output name="summary" file="2/out.summary" ftype="tabular" /> 211 <output name="summary" file="2/out.summary" ftype="tabular" />
168 <output name="log" file="2/out.log" ftype="txt" compare="diff" lines_diff="17" />
169 <output name="marker" file="2/out.marker" ftype="tabular" />
170 <output name="plot" file="2/out.marker.pdf" ftype="pdf" compare="sim_size" />
171 <output name="noclass" file="2/out.noclass" ftype="fasta" /> 212 <output name="noclass" file="2/out.noclass" ftype="fasta" />
172 <output name="toshort" file="2/out.tooshort" ftype="fasta" /> 213 <output name="toshort" file="2/out.tooshort" ftype="fasta" />
173 <output_collection name="markers" type="list" count="1"> 214 <output name="plot" file="2/out.marker.pdf" ftype="pdf" compare="sim_size" />
174 <element name="001" file="2/out.001.marker.fasta" ftype="fasta"/>
175 </output_collection>
176 </test> 215 </test>
177 <test><!-- test w contigs and reads as input + reassembly--> 216 <!-- test w contigs and reads as input + reassembly-->
217 <test expect_num_outputs="8">
178 <param name="contig" value="Bin_Sample3_contigs.fasta" ftype="fasta" /> 218 <param name="contig" value="Bin_Sample3_contigs.fasta" ftype="fasta" />
179 <conditional name="intype_cond"> 219 <conditional name="intype_cond">
180 <param name="intype_select" value="rds"/> 220 <param name="intype_select" value="rds"/>
181 <param name="reads" value="interleavedPE_unmapped_Sample3_total.fasta" ftype="fasta"/> 221 <param name="reads" value="interleavedPE_unmapped_Sample3_small.fasta" ftype="fasta"/>
182 <param name="reassembly" value="-reassembly"/> 222 <param name="reassembly" value="-reassembly"/>
183 </conditional> 223 </conditional>
184 <conditional name="adv_cond"> 224 <conditional name="adv_cond">
185 <param name="adv_select" value="no"/> 225 <param name="adv_select" value="no"/>
186 </conditional> 226 </conditional>
187 <output_collection name="bins" type="list" count="2"> 227 <output_collection name="bins" type="list" count="2">
188 <element name="001" file="3/out.001.fasta" ftype="fasta"/> 228 <element name="001" file="3/out.001.fasta" ftype="fasta"/>
189 <element name="002" file="3/out.002.fasta" ftype="fasta"/> 229 <element name="002" file="3/out.002.fasta" ftype="fasta"/>
190 </output_collection> 230 </output_collection>
191 <output name="summary" file="3/out.summary" ftype="tabular" /> 231 <output name="summary" file="3/out.summary" ftype="tabular" />
192 <output name="log" file="3/out.log" ftype="txt" compare="diff" lines_diff="17" />
193 <output name="abundout" file="3/out.abund1" ftype="tabular" />
194 <output name="marker" file="3/out.marker" ftype="tabular" />
195 <output name="noclass" file="3/out.noclass" ftype="fasta" /> 232 <output name="noclass" file="3/out.noclass" ftype="fasta" />
196 <output name="toshort" file="3/out.tooshort" ftype="fasta" /> 233 <output name="toshort" file="3/out.tooshort" ftype="fasta" />
197 <output_collection name="markers" type="list" count="1">
198 <element name="001" file="3/out.001.marker.fasta" ftype="fasta"/>
199 </output_collection>
200 <output_collection name="reassembly_bins" type="list" count="2"> 234 <output_collection name="reassembly_bins" type="list" count="2">
201 <element name="001" file="3/out.reassem/out.001.fasta" ftype="fasta"/> 235 <element name="001" file="3/out.reassem/out.001.fasta" ftype="fasta"/>
202 <element name="002" file="3/out.reassem/out.002.fasta" ftype="fasta"/> 236 <element name="002" file="3/out.reassem/out.002.fasta" ftype="fasta"/>
203 </output_collection> 237 </output_collection>
204 <output_collection name="reassembly_reads" type="list" count="2"> 238 <output_collection name="reassembly_reads" type="list" count="2">
213 MaxBin is a software that clusters metagenomic contigs into different bins, 247 MaxBin is a software that clusters metagenomic contigs into different bins,
214 each consists (hopefully) of contigs from one species. MaxBin uses the 248 each consists (hopefully) of contigs from one species. MaxBin uses the
215 nucleotide composition information and contig abundance information to do 249 nucleotide composition information and contig abundance information to do
216 achieve binning through an Expectation-Maximization algorithm. 250 achieve binning through an Expectation-Maximization algorithm.
217 251
218
219 **Input**: 252 **Input**:
220 253
221 MaxBin need the contigs and contig abundance information. The contig abundance 254 MaxBin need the contigs and contig abundance information. The contig abundance
222 information can be provided in two ways: the user can choose to provide 255 information can be provided in two ways: the user can choose to provide
223 256
256 289
257 **Reassembly** 290 **Reassembly**
258 291
259 This is an experimental feature of MaxBin. It calls for each read bin IDBA_UD with the pre_correction parameter. Of course this IDBA_UD call can be done also with the corresponding Galaxy tool 292 This is an experimental feature of MaxBin. It calls for each read bin IDBA_UD with the pre_correction parameter. Of course this IDBA_UD call can be done also with the corresponding Galaxy tool
260 293
294
295 ** More information **
296
297 https://downloads.jbei.org/data/microbial_communities/MaxBin/MaxBin.html
298
261 ]]></help> 299 ]]></help>
262 <citations> 300 <citations>
263 <citation type="doi">10.1093/bioinformatics/btv638</citation> 301 <citation type="doi">10.1093/bioinformatics/btv638</citation>
264 <citation type="bibtex">
265 @misc{renameTODO,
266 author = {LastTODO, FirstTODO},
267 year = {TODO},
268 title = {TODO},
269 url = {https://downloads.jbei.org/data/microbial_communities/MaxBin/MaxBin.html},
270 }</citation>
271 </citations> 302 </citations>
272 </tool> 303 </tool>