comparison pick_otus.xml @ 0:f77fff416ea7 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime/ commit c9bf747b23b4a9d6adc20c7740b9247c22654862
author iuc
date Thu, 18 May 2017 09:31:22 -0400
parents
children 127947ced93f
comparison
equal deleted inserted replaced
-1:000000000000 0:f77fff416ea7
1 <tool id="qiime_pick_otus" name="Pick OTUs" version="@WRAPPER_VERSION@.0">
2 <description></description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements">
7 <requirement type="package" version="2.1b">sortmerna</requirement>
8 <!--<requirement type="package" version="1.38.1.1">mothur</requirement>-->
9 <!--<requirement type="package" version="2.2.22">blast-legacy</requirement>-->
10 <requirement type="package" version="4.6.6">cd-hit</requirement>
11 </expand>
12 <version_command>pick_otus.py --version</version_command>
13 <command detect_errors="aggressive"><![CDATA[
14 pick_otus.py
15 --input_seqs_filepath '$input_seqs_filepath'
16 --output_dir pick_otus
17 --otu_picking_method '$method.otu_picking_method'
18
19 #if $method.otu_picking_method == "sortmerna"
20 #if $method.references.source_selector == 'history'
21 --refseqs_fp '$method.references.refseqs_fp'
22 #else if $method.references.source_selector == 'cached'
23 --refseqs_fp '$method.references.refseqs_fp.fields.path'
24 #end if
25 --sortmerna_e_value '$method.sortmerna_e_value'
26 --sortmerna_coverage '$method.sortmerna_coverage'
27 #if $method.sortmerna_tabular.test == 'yes'
28 --sortmerna_tabular
29 --sortmerna_best_N_alignments '$method.sortmerna_tabular.sortmerna_best_N_alignments'
30 #end if
31 --sortmerna_max_pos '$method.sortmerna_max_pos'
32 --similarity '$method.similarity'
33 $method.suppress_prefilter_exact_match
34 --threads \${GALAXY_SLOTS:-2}
35 #else if $method.otu_picking_method == "mothur"
36 --clustering_algorithm '$method.clustering_algorithm'
37 #else if $method.otu_picking_method == "trie"
38 $method.trie_reverse_seqs
39 #else if $method.otu_picking_method == "uclust_ref"
40 #if $method.references.source_selector == 'history'
41 --refseqs_fp '$method.references.refseqs_fp'
42 #else if $method.references.source_selector == 'cached'
43 --refseqs_fp '$method.references.refseqs_fp.fields.path'
44 #end if
45 --similarity '$method.similarity'
46 $method.enable_rev_strand_match
47 $method.suppress_presort_by_abundance_uclust
48 $method.suppress_new_clusters
49 --max_accepts '$method.max_accepts'
50 --max_rejects '$method.max_rejects'
51 --stepwords '$method.stepwords'
52 --word_length '$method.word_length'
53 #else if $method.otu_picking_method == "blast"
54 #if $method.references.source_selector == 'history'
55 --refseqs_fp '$method.references.refseqs_fp'
56 #else if $method.references.source_selector == 'cached'
57 --refseqs_fp '$method.references.refseqs_fp.fields.path'
58 #end if
59 --max_e_value_blast '$method.max_e_value_blast'
60 --min_aligned_percent '$method.min_aligned_percent'
61 --similarity '$method.similarity'
62 #else if $method.otu_picking_method == "sumaclust"
63 --similarity '$method.similarity'
64 '$method.sumaclust_exact'
65 --denovo_otu_id_prefix '$method.denovo_otu_id_prefix'
66 $method.suppress_prefilter_exact_match
67 --threads \${GALAXY_SLOTS:-2}
68 #else if $method.otu_picking_method == "swarm"
69 --denovo_otu_id_prefix '$method.denovo_otu_id_prefix'
70 --swarm_resolution '$method.swarm_resolution'
71 --threads \${GALAXY_SLOTS:-2}
72 #else if $method.otu_picking_method == "prefix_suffix"
73 --prefix_length '$method.prefix_length'
74 --suffix_length '$method.suffix_length'
75 #else if $method.otu_picking_method == "cdhit"
76 --similarity '$method.similarity'
77 #else if $method.otu_picking_method == "uclust"
78 --similarity '$method.similarity'
79 --denovo_otu_id_prefix '$method.denovo_otu_id_prefix'
80 $method.enable_rev_strand_match
81 $method.suppress_presort_by_abundance_uclust
82 $method.optimal_uclust
83 $method.exact_uclust
84 $method.user_sort
85 --max_accepts '$method.max_accepts'
86 --max_rejects '$method.max_rejects'
87 --stepwords '$method.stepwords'
88 --word_length '$method.word_length'
89 $method.suppress_uclust_stable_sort
90 $method.suppress_prefilter_exact_match
91 #end if
92
93 #if str($prefix_prefilter_length) != ''
94 --prefix_prefilter_length '$prefix_prefilter_length'
95 #end if
96 $trie_prefilter
97 --non_chimeras_retention '$non_chimeras_retention'
98
99 #if $method.otu_picking_method == "sortmerna"
100 &&
101 rm pick_otus/sortmerna_otus.log
102 #end if
103 ]]></command>
104 <inputs>
105 <param argument="--input_seqs_filepath" type="data" format="fasta" label="Input sequences file"/>
106 <conditional name="method">
107 <param argument="--otu_picking_method" type="select" label="Method for picking OTUs">
108 <option value="sortmerna">sortmerna</option>
109 <!--<option value="mothur">mothur (requires an input file of aligned sequences)</option>-->
110 <option value="trie">trie</option>
111 <option value="uclust_ref">uclust_ref</option>
112 <!--<option value="blast">blast</option>-->
113 <!--<option value="sumaclust">sumaclust</option>-->
114 <option value="swarm">swarm</option>
115 <option value="prefix_suffix">prefix_suffix</option>
116 <!--<option value="cdhit">cdhit</option>-->
117 <option value="uclust" selected="true">uclust</option>
118 </param>
119 <when value="sortmerna">
120 <expand macro="pick_otus_reference_source"/>
121 <param argument="--sortmerna_e_value" type="float" value="1" label="Max E-value when clustering"/>
122 <param argument="--sortmerna_coverage" type="float" value="0.97" min="0" max="1" label="Mininum percent query coverage (of an alignment) to consider a hit"/>
123 <conditional name="sortmerna_tabular">
124 <param argument="test" type="select" label="Output alignments in the Blast tabular format with two additional columns including the CIGAR string and the percent query coverage?">
125 <option value="yes">Yes</option>
126 <option value="no" selected="true">No</option>
127 </param>
128 <when value="yes">
129 <param argument="--sortmerna_best_N_alignments" type="integer" value="1" label="how many alignments per read will be written"/>
130 </when>
131 <when value="no"/>
132 </conditional>
133 <param argument="--sortmerna_max_pos" type="integer" value="10000" label="Maximum number of positions per seed to store in the indexed database"/>
134 <expand macro="pick_otus_similarity"/>
135 <expand macro="pick_otus_suppress_prefilter_exact_match"/>
136 </when>
137 <when value="mothur">
138 <param argument="--clustering_algorithm" type="select" label="Clustering algorithm">
139 <option value="furthest" selected="true">furthest</option>
140 <option value="nearest">nearest</option>
141 <option value="average">average</option>
142 </param>
143 </when>
144 <when value="trie">
145 <param argument="--trie_reverse_seqs" type="boolean" truevalue="--trie_reverse_seqs" falsevalue="" checked="false" label="Reverse seqs before picking OTUs for suffix (rather than prefix) collapsing?"/>
146 </when>
147 <when value="uclust_ref">
148 <expand macro="pick_otus_reference_source"/>
149 <expand macro="pick_otus_similarity"/>
150 <expand macro="pick_otus_enable_rev_strand_match"/>
151 <expand macro="pick_otus_suppress_presort_by_abundance_uclust"/>
152 <param argument="--suppress_new_clusters" type="boolean" truevalue="--suppress_new_clusters" falsevalue="" checked="false" label="Suppress creation of new clusters using seqs that don't match reference?"/>
153 <expand macro="pick_otus_max"/>
154 <expand macro="pick_otus_stepwords"/>
155 <expand macro="pick_otus_word_length"/>
156 </when>
157 <when value="blast">
158 <expand macro="pick_otus_reference_source"/>
159 <expand macro="pick_otus_similarity"/>
160 <param argument="--max_e_value_blast" type="float" value="1e-10" label="Max E-value when clustering"/>
161 <param argument="--min_aligned_percent" type="float" value="0.5" min="0" max="1" label="Minimum percent of query sequence that can be aligned to consider a hit"/>
162 </when>
163 <when value="sumaclust">
164 <expand macro="pick_otus_similarity"/>
165 <param argument="--sumaclust_exact" type="boolean" truevalue="--sumaclust_exact" falsevalue="" checked="false" label="Assign a sequence to the best matching seed rather than the first matching seed passing the similarity threshold?"/>
166 <param argument="--sumaclust_l" type="boolean" truevalue="--sumaclust_l" falsevalue="" checked="true" label="Reference sequence length if the shortest?"/>
167 <expand macro="pick_otus_denovo_otu_id_prefix"/>
168 <expand macro="pick_otus_suppress_prefilter_exact_match"/>
169 </when>
170 <when value="swarm">
171 <expand macro="pick_otus_denovo_otu_id_prefix"/>
172 <param argument="--swarm_resolution" type="integer" value="1" label="Maximum number of differences allowed between two amplicons" help="Two amplicons will be grouped if they have integer (or less) differences"/>
173 </when>
174 <when value="prefix_suffix">
175 <param argument="--prefix_length" type="integer" value="50" label="Prefix length"/>
176 <param argument="--suffix_length" type="integer" value="50" label="Suffix length"/>
177 </when>
178 <when value="cdhit">
179 <expand macro="pick_otus_similarity"/>
180 </when>
181 <when value="uclust">
182 <expand macro="pick_otus_similarity"/>
183 <expand macro="pick_otus_denovo_otu_id_prefix"/>
184 <expand macro="pick_otus_enable_rev_strand_match"/>
185 <expand macro="pick_otus_suppress_presort_by_abundance_uclust"/>
186 <param argument="--optimal_uclust" type="boolean" truevalue="--optimal_uclust" falsevalue="" checked="false" label="Pass the –optimal flag to uclust?"/>
187 <param argument="--exact_uclust" type="boolean" truevalue="--exact_uclust" falsevalue="" checked="false" label="Pass the –exact flag to uclust?"/>
188 <param argument="--user_sort" type="boolean" truevalue="--user_sort" falsevalue="" checked="false" label="Pass the -user_sort flag to uclust?"/>
189 <expand macro="pick_otus_max"/>
190 <expand macro="pick_otus_stepwords"/>
191 <expand macro="pick_otus_word_length"/>
192 <param argument="--suppress_uclust_stable_sort" type="boolean" truevalue="--suppress_uclust_stable_sort" falsevalue="" checked="false" label="Don't pass –stable-sort to uclust?"/>
193 <expand macro="pick_otus_suppress_prefilter_exact_match"/>
194 </when>
195 </conditional>
196 <param argument="--prefix_prefilter_length" type="integer" label="Threshold to automatically group first identical prefix_prefilter_length into a single OTU" help="This is useful for large sequence collections where OTU picking doesn't scale well" optional="true"/>
197 <param argument="--trie_prefilter" type="boolean" truevalue="--trie_prefilter" falsevalue="" checked="false" label="Prefilter data so seqs which are identical prefixes of a longer seq are automatically grouped into a single OTU?" help="This is useful for large sequence collections where OTU picking doesn't scale well"/>
198 <param argument="--non_chimeras_retention" type="select" label="Selects subsets of sequences detected as non-chimeras to retain after de novo and reference based chimera detection">
199 <option value="intersection">Intersection (retains only those sequences that are flagged as non-chimeras from both detection methods)</option>
200 <option value="union" selected="true">Union (retains sequences that are flagged as non-chimeric from either filter)</option>
201 </param>
202 </inputs>
203 <outputs>
204 <data name="otus" format="txt" from_work_dir="pick_otus/*_otus.txt" label="${tool.name} on ${on_string}: OTUs"/>
205 <data name="log" format="txt" from_work_dir="pick_otus/*_otus.log" label="${tool.name} on ${on_string}: Log"/>
206 <data name="failures" format="txt" from_work_dir="pick_otus/*_failures.txt" label="${tool.name} on ${on_string}: Failures">
207 <filter>method['otu_picking_method'] == "sortmerna" or method['otu_picking_method'] == "uclust_ref"</filter>
208 </data>
209 <data name="blast_output" format="tabular" from_work_dir="pick_otus/sortmerna_otus.blast" label="${tool.name} on ${on_string}: SortMeRNA Blast output">
210 <filter>method['otu_picking_method'] == "sortmerna" and method["sortmerna_tabular"]["test"] == "yes"</filter>
211 </data>
212 </outputs>
213 <tests>
214 <test>
215 <param name="input_seqs_filepath" value="pick_otus/seqs.fna"/>
216 <param name="otu_picking_method" value="uclust"/>
217 <param name="similarity" value="0.97" />
218 <param name="denovo_otu_id_prefix" value="denovo"/>
219 <param name="enable_rev_strand_match" value=""/>
220 <param name="suppress_presort_by_abundance_uclust" value=""/>
221 <param name="optimal_uclust" value=""/>
222 <param name="exact_uclust" value=""/>
223 <param name="user_sort" value=""/>
224 <param name="max_accepts" value="1"/>
225 <param name="max_rejects" value="8"/>
226 <param name="stepwords" value="8"/>
227 <param name="word_length" value="8"/>
228 <param name="suppress_uclust_stable_sort" value=""/>
229 <param name="suppress_prefilter_exact_match" value=""/>
230 <param name="trie_prefilter" value=""/>
231 <param name="non_chimeras_retention" value="union"/>
232 <output name="otus" md5="2ca01ee393e8a795e5d09a15c5ca77c3"/>
233 <output name="log">
234 <assert_contents>
235 <has_text text="UclustOtuPicker parameters"/>
236 </assert_contents>
237 </output>
238 </test>
239 <test>
240 <param name="input_seqs_filepath" value="pick_otus/seqs.fna"/>
241 <param name="otu_picking_method" value="sortmerna"/>
242 <param name="source_selector" value="history"/>
243 <param name="refseqs_fp" value="pick_otus/refseqs.fasta"/>
244 <param name="sortmerna_e_value" value="1"/>
245 <param name="sortmerna_coverage" value="0.97"/>
246 <param name="test" value="yes"/>
247 <param name="sortmerna_best_N_alignments" value="1"/>
248 <param name="sortmerna_max_pos" value="10000"/>
249 <param name="similarity" value="0.97"/>
250 <param name="suppress_prefilter_exact_match" value=""/>
251 <param name="trie_prefilter" value=""/>
252 <param name="non_chimeras_retention" value="union"/>
253 <output name="otus" md5="dbc3e13e7da742ac3df317054821f522"/>
254 <output name="log">
255 <assert_contents>
256 <has_text text="OtuPicker parameters"/>
257 <has_text text="SortMeRNA database"/>
258 </assert_contents>
259 </output>
260 <output name="failures" md5="cce9699de546618df2db9de7a8098916"/>
261 <output name="blast_output" md5="214b1674f5f84dcb68c0c0c1c8a001be"/>
262 </test>
263 <test>
264 <param name="input_seqs_filepath" value="pick_otus/seqs.fna"/>
265 <param name="otu_picking_method" value="trie"/>
266 <param name="trie_reverse_seqs" value=""/>
267 <param name="trie_prefilter" value=""/>
268 <param name="non_chimeras_retention" value="union"/>
269 <output name="otus" md5="441c2bc34cd6766e75585dbe2ea09f9b"/>
270 <output name="log">
271 <assert_contents>
272 <has_text text="TrieOtuPicker parameters:"/>
273 </assert_contents>
274 </output>
275 </test>
276 <test>
277 <param name="input_seqs_filepath" value="pick_otus/seqs.fna"/>
278 <param name="otu_picking_method" value="uclust_ref"/>
279 <param name="source_selector" value="history"/>
280 <param name="refseqs_fp" value="pick_otus/refseqs.fasta"/>
281 <param name="similarity" value="0.97"/>
282 <param name="enable_rev_strand_match" value=""/>
283 <param name="suppress_presort_by_abundance_uclust" value=""/>
284 <param name="suppress_new_clusters" value="" />
285 <param name="max_accepts" value="1"/>
286 <param name="max_rejects" value="8"/>
287 <param name="stepwords" value="8"/>
288 <param name="word_length" value="8"/>
289 <param name="trie_prefilter" value=""/>
290 <param name="non_chimeras_retention" value="union"/>
291 <output name="otus" md5="e44b21022227e9fc213e13fd028efb81"/>
292 <output name="log">
293 <assert_contents>
294 <has_text text="OtuPicker parameters"/>
295 <has_text text="Application:uclust"/>
296 </assert_contents>
297 </output>
298 <output name="failures" md5="d41d8cd98f00b204e9800998ecf8427e"/>
299 </test>
300 <test>
301 <param name="input_seqs_filepath" value="pick_otus/seqs.fna"/>
302 <param name="otu_picking_method" value="swarm"/>
303 <param name="denovo_otu_id_prefix" value="denovo"/>
304 <param name="swarm_resolution" value="1"/>
305 <param name="trie_prefilter" value=""/>
306 <param name="non_chimeras_retention" value="union"/>
307 <output name="otus" md5="9b603183a1116071e762e31525928f94"/>
308 <output name="log">
309 <assert_contents>
310 <has_text text="OtuPicker parameters"/>
311 </assert_contents>
312 </output>
313 </test>
314 <test>
315 <param name="input_seqs_filepath" value="pick_otus/seqs.fna"/>
316 <param name="otu_picking_method" value="prefix_suffix"/>
317 <param name="prefix_length" value="50"/>
318 <param name="suffix_length" value="50"/>
319 <param name="trie_prefilter" value=""/>
320 <param name="non_chimeras_retention" value="union"/>
321 <output name="otus" md5="3bf79c70affb264a977fd8f2f34f2889"/>
322 <output name="log">
323 <assert_contents>
324 <has_text text="PrefixSuffixOtuPicker parameters:"/>
325 </assert_contents>
326 </output>
327 </test>
328 <test>
329 <param name="input_seqs_filepath" value="pick_otus/seqs.fna"/>
330 <param name="otu_picking_method" value="uclust"/>
331 <param name="similarity" value="0.97"/>
332 <param name="denovo_otu_id_prefix" value="denovo"/>
333 <param name="enable_rev_strand_match" value=""/>
334 <param name="suppress_presort_by_abundance_uclust" value=""/>
335 <param name="optimal_uclust" value=""/>
336 <param name="exact_uclust" value=""/>
337 <param name="user_sort" value=""/>
338 <param name="max_accepts" value="1"/>
339 <param name="max_rejects" value="8"/>
340 <param name="stepwords" value="8"/>
341 <param name="word_length" value="8"/>
342 <param name="suppress_uclust_stable_sort" value=""/>
343 <param name="suppress_prefilter_exact_match" value=""/>
344 <param name="trie_prefilter" value=""/>
345 <param name="non_chimeras_retention" value="union"/>
346 <output name="otus" md5="2ca01ee393e8a795e5d09a15c5ca77c3"/>
347 <output name="log">
348 <assert_contents>
349 <has_text text="UclustOtuPicker parameters"/>
350 </assert_contents>
351 </output>
352 </test>
353 </tests>
354 <help><![CDATA[
355 **What it does**
356
357 The OTU picking step assigns similar sequences to operational taxonomic units, or OTUs, by clustering sequences based on a user-defined similarity threshold. Sequences which are similar at or above the threshold level are taken to represent the presence of a taxonomic unit (e.g., a genus, when the similarity threshold is set at 0.94) in the sequence collection.
358
359 Currently, the following clustering methods have been implemented in QIIME:
360
361 1. uclust, creates &quot;seeds&quot; of sequences which generate clusters based on percent identity.
362
363 2. uclust_ref, as uclust, but takes a reference database to use as seeds. New clusters can be toggled on or off.
364
365 3. usearch, creates &quot;seeds&quot; of sequences which generate clusters based on percent identity, filters low abundance clusters, performs de novo and reference based chimera detection.
366
367 4. usearch_ref, as usearch, but takes a reference database to use as seeds. New clusters can be toggled on or off.
368
369 5. sumaclust, creates &quot;seeds&quot; of sequences which generate clusters based on similarity threshold.
370
371 6. swarm, creates &quot;seeds&quot; of sequences which generate clusters based on a resolution threshold.
372
373
374 Chimera checking with usearch 6.X is implemented in identify_chimeric_seqs.py. Chimera checking should be done first with usearch 6.X, and the filtered resulting fasta file can then be clustered.
375
376
377 The primary inputs for pick_otus.py are:
378
379 1. A FASTA file containing sequences to be clustered
380
381 2. An OTU threshold (default is 0.97, roughly corresponding to species-level OTUs);
382
383 3. The method to be applied for clustering sequences into OTUs.
384
385 pick_otus.py takes a standard fasta file as input.
386
387
388 The output consists of two files (i.e. seqs_otus.txt and seqs_otus.log). The .txt file is composed of tab-delimited lines, where the first field on each line corresponds to an (arbitrary) cluster identifier, and the remaining fields correspond to sequence identifiers assigned to that cluster. Sequence identifiers correspond to those provided in the input FASTA file. Usearch (i.e. usearch quality filter) can additionally have log files for each intermediate call to usearch.
389
390 Example lines from the resulting .txt file:
391
392 = ==== ==== ====
393 0 seq1 seq5
394 1 seq2
395 2 seq3
396 3 seq4 seq6 seq7
397 = ==== ==== ====
398
399 This result implies that four clusters were created based on 7 input sequences.
400 The first cluster (cluster id 0) contains two sequences, sequence ids seq1 and
401 seq5; the second cluster (cluster id 1) contains one sequence, sequence id seq2;
402 the third cluster (cluster id 2) contains one sequence, sequence id seq3, and the
403 final cluster (cluster id 3) contains three sequences, sequence ids seq4, seq6,
404 and seq7.
405
406 The resulting .log file contains a list of parameters passed to the pick_otus.py
407 script along with the output location of the resulting .txt file.
408 ]]></help>
409 <citations>
410 <expand macro="citations"/>
411 <citation type="doi">10.1093/bioinformatics/btv231</citation>
412 <citation type="doi">10.1093/bioinformatics/btq461</citation>
413 <citation type="doi">10.1093/bioinformatics/bts611</citation>
414 <citation type="doi">10.7287/peerj.preprints.386v1/supp-1</citation>
415 </citations>
416 </tool>