comparison mmseqs2_taxonomy_assignment.xml @ 0:d0acde079e2e draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mmsesq2 commit 1400593429eb4e9c6e307df3621825a8b84a6fa7
author iuc
date Thu, 27 Mar 2025 14:38:20 +0000
parents
children 876d26806584
comparison
equal deleted inserted replaced
-1:000000000000 0:d0acde079e2e
1 <tool id="mmseqs2_taxonomy_assignment" name="MMseqs2 Taxonomy Assignments" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>
3 of sequences by comparing them to a reference database
4 </description>
5 <macros>
6 <import>macro.xml</import>
7 </macros>
8 <expand macro="biotools"/>
9 <expand macro="requirements"/>
10 <expand macro="version_command"/>
11 <command detect_errors="exit_code"><![CDATA[
12 ln -s -f '${createdb.input_fasta}' 'input' &&
13 mmseqs createdb
14 'input'
15 'sequenceDB'
16 --dbtype '$createdb.alph_type.dbtype'
17 --shuffle $createdb.shuffle &&
18
19 cp -r '$createtaxdb.database_type.mmseqs2_db_select.fields.path'/database* . &&
20
21 mmseqs createtaxdb
22 database
23 'tmp'
24 #if $createtaxdb.tax_mapping_file
25 --tax-mapping-file '$createtaxdb.tax_mapping_file'
26 #end if
27 --tax-mapping-mode '$createtaxdb.tax_mapping_mode'
28 --threads "\${GALAXY_SLOTS:-1}" &&
29
30 #if $filtertaxseqdb.taxon_list
31 mmseqs filtertaxseqdb
32 'database'
33 'database_filtered'
34 --taxon-list '$filtertaxseqdb.taxon_list'
35 &&
36 #end if
37
38 mmseqs taxonomy
39 'sequenceDB'
40 #if $filtertaxseqdb.taxon_list
41 'database_filtered'
42 #else
43 'database'
44 #end if
45 'output_taxonomy'
46 'tmp'
47 #if str($createdb.alph_type.dbtype) == "1"
48 --comp-bias-corr-scale $createdb.alph_type.comp_bias_corr_scale
49 #elif str($createdb.alph_type.dbtype) == "2"
50 --zdrop $createdb.alph_type.zdrop
51 #end if
52 ##Pre-filter options
53 --add-self-matches $taxonomy.prefilter.add_self_matches
54 -s $taxonomy.prefilter.sensitivity
55 -k $taxonomy.prefilter.kmer_length
56 --target-search-mode $taxonomy.prefilter.target_search_mode
57 ##--k-score TWIN k-mer threshold for generating similar k-mer lists [seq:2147483647,prof:2147483647]
58 --max-seqs $taxonomy.prefilter.max_seqs
59 --split $taxonomy.prefilter.split
60 --split-mode $taxonomy.prefilter.split_mode
61 ##--split-memory-limit BYTE Set max memory per split. E.g. 800B, 5K, 10M, 1G. Default (0) to all available system memory [0]
62 --diag-score $taxonomy.prefilter.diag_score
63 --exact-kmer-matching $taxonomy.prefilter.exact_kmer_matching
64 --mask $taxonomy.prefilter.mask
65 --mask-prob $taxonomy.prefilter.mask_prob
66 --mask-lower-case $taxonomy.prefilter.mask_lower_case
67 --min-ungapped-score $taxonomy.prefilter.min_ungapped_score
68 --spaced-kmer-mode $taxonomy.prefilter.spaced_kmer_mode
69 ##--spaced-kmer-pattern STR User-specified spaced k-mer pattern []
70 ##--local-tmp STR Path where some of the temporary files will be created []
71 ##--disk-space-limit BYTE Set max disk space to use for reverse profile searches. E.g. 800B, 5K, 10M, 1G. Default (0) to all available disk space in the temp folder [0]
72
73 ##Align options
74 -a $taxonomy.align.convertalis
75 ##The next 2 parameters seems to be the same
76 --alignment-mode $taxonomy.align.alignment_mode
77 --alignment-output-mode $taxonomy.align.alignment_output_mode
78 --wrapped-scoring $taxonomy.align.wrapped_scoring
79 -e $taxonomy.align.evalue
80 --min-seq-id $taxonomy.align.min_seq_id
81 --min-aln-len $taxonomy.align.min_aln_len
82 --seq-id-mode $taxonomy.align.seq_id_mode
83 --alt-ali $taxonomy.align.alt_ali
84 -c $taxonomy.align.cov
85 --cov-mode $taxonomy.align.cov_mode
86 --max-rejected $taxonomy.align.max_rejected
87 --max-accept $taxonomy.align.max_accept
88 --score-bias $taxonomy.align.score_bias
89 --realign $taxonomy.align.realign
90 --realign-score-bias $taxonomy.align.realign_score_bias
91 --realign-max-seqs $taxonomy.align.realign_max_seqs
92 --corr-score-weight $taxonomy.align.corr_score_weight
93 --exhaustive-search-filter $taxonomy.align.exhaustive_search_filter
94
95 ##Profile options
96 ##--pca Pseudo count admixture strength []
97 ##--pcb Pseudo counts: Neff at half of maximum admixture (range 0.0-inf) []
98 --mask-profile $taxonomy.profile.mask_profile
99 --e-profile $taxonomy.profile.e_profile
100 --wg $taxonomy.profile.wg
101 --filter-msa $taxonomy.profile.filter_msa
102 --filter-min-enable $taxonomy.profile.filter_min_enable
103 --max-seq-id $taxonomy.profile.max_seq_id
104 --qid $taxonomy.profile.qid
105 --qsc $taxonomy.profile.qsc
106 --cov $taxonomy.profile.cov
107 --diff $taxonomy.profile.diff
108 --pseudo-cnt-mode $taxonomy.profile.pseudo_cnt_mode
109 --exhaustive-search $taxonomy.profile.exhaustive_search
110 --lca-search $taxonomy.profile.lca_search
111
112 ##Misc options
113 ##--orf-filter INT Prefilter query ORFs with non-selective search
114 ## Only used during nucleotide-vs-protein classification
115 ## NOTE: Consider disabling when classifying short reads [1]
116 --orf-filter-e $taxonomy.misc.orf_filter_e
117 --orf-filter-s $taxonomy.misc.orf_filter_s
118 --lca-mode $taxonomy.misc.lca_mode
119 --tax-output-mode $taxonomy.misc.tax_output_mode
120 --majority $taxonomy.misc.majority
121 --vote-mode $taxonomy.misc.vote_mode
122 ##--lca-ranks STR Add column with specified ranks (',' separated) []
123 --tax-lineage $taxonomy.misc.tax_lineage
124 --blacklist $taxonomy.misc.blacklist
125 --taxon-list $taxonomy.misc.taxon_list
126 --rescore-mode $taxonomy.misc.rescore_mode
127 --allow-deletion $taxonomy.misc.allow_deletion
128 --min-length $taxonomy.misc.min_length
129 --max-length $taxonomy.misc.max_length
130 --max-gaps $taxonomy.misc.max_gaps
131 --contig-start-mode $taxonomy.misc.contig_start_mode
132 --contig-end-mode $taxonomy.misc.contig_end_mode
133 --orf-start-mode $taxonomy.misc.orf_start_mode
134 --forward-frames $taxonomy.misc.forward_frames
135 --reverse-frames $taxonomy.misc.reverse_frames
136 --translation-table $taxonomy.misc.translation_table
137 --translate $taxonomy.misc.translate
138 --use-all-table-starts $taxonomy.misc.use_all_table_starts
139 --id-offset $taxonomy.misc.id_offset
140 --add-orf-stop $taxonomy.misc.add_orf_stop
141 --sequence-overlap $taxonomy.misc.sequence_overlap
142 --sequence-split-mode $taxonomy.misc.sequence_split_mode
143 --headers-split-mode $taxonomy.misc.headers_split_mode
144 --search-type $createtaxdb.database_type.search_type
145 --prefilter-mode $taxonomy.misc.prefilter_mode
146
147 ##Common options
148 ##--compressed INT Write compressed output [0]
149 --threads "\${GALAXY_SLOTS:-1}"
150 ##-v INT Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3]
151 --max-seq-len $taxonomy.common.max_seq_len
152 ##--db-load-mode INT Database preload mode 0: auto, 1: fread, 2: mmap, 3: mmap+touch [0]
153 ##--mpi-runner STR Use MPI on compute cluster with this MPI command (e.g. "mpirun -np 42") []
154 ##--force-reuse BOOL Reuse tmp filse in tmp/latest folder ignoring parameters and version changes [0]
155 ##--remove-tmp-files BOOL Delete temporary files [0]
156
157 ##Expert options
158 --filter-hits $taxonomy.expert.filter_hits
159 --sort-results $taxonomy.expert.sort_results
160 ##--create-lookup INT Create database lookup file (can be very large) [0]
161 --chain-alignments $taxonomy.expert.chain_alignments
162 --merge-query $taxonomy.expert.merge_query
163 ##--strand INT Strand selection only works for DNA/DNA search 0: reverse, 1: forward, 2: both [1]
164 &&
165 mmseqs createtsv
166 'sequenceDB'
167 'output_taxonomy'
168 'taxo_result.tsv'
169
170 --first-seq-as-repr $createtsv.first_seq_as_repr
171 --target-column $createtsv.target_column
172 --full-header $createtsv.full_header
173 --idx-seq-src $createtsv.idx_seq_src
174 --threads "\${GALAXY_SLOTS:-1}"
175 ##--compressed INT Write compressed output [0]
176 ##-v INT Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3]
177 ##--db-output BOOL Return a result DB instead of a text file [0]
178
179 #if str($kraken_report.keep_report) == "Yes"
180 &&
181 mmseqs taxonomyreport
182 #if $filtertaxseqdb.taxon_list
183 'database_filtered'
184 #else
185 'database'
186 #end if
187 'output_taxonomy'
188 'taxo_result.txt'
189 --report-mode 0
190 --threads "\${GALAXY_SLOTS:-1}"
191 #end if
192 #if str($krona_report.keep_report) == "Yes"
193 &&
194 mmseqs taxonomyreport
195 #if $filtertaxseqdb.taxon_list
196 'database_filtered'
197 #else
198 'database'
199 #end if
200 'output_taxonomy'
201 'taxo_result.html'
202 --report-mode 1
203 --threads "\${GALAXY_SLOTS:-1}"
204 #end if
205 ##-v INT Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3]
206
207 ]]></command>
208 <inputs>
209 <section name="createdb" title="Convert FASTA/Q file(s) to MMseqs sequence DB format" expanded="true">
210 <param name="input_fasta" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Input fasta file" help="" />
211 <conditional name="alph_type">
212 <param argument="--dbtype" type="select" label="Input type" help="" >
213 <option value="0" selected="true">Auto</option>
214 <option value="1">Amino acid</option>
215 <option value="2">Nucleotides</option>
216 </param>
217 <when value="0"/>
218 <when value="1">
219 <param argument="--comp-bias-corr-scale" type="float" min="0" max="1" value="1" label="Scale composition bias correction" help=""/>
220 </when>
221 <when value="2">
222 <param argument="--zdrop" type="integer" min="0" value="40" label="Maximal allowed difference between score values before alignment is truncated" help=""/>
223 </when>
224 </conditional>
225 <param argument="--shuffle" type="boolean" checked="true" label="Shuffle input database" truevalue="1" falsevalue="0" optional="true" help="" />
226 </section>
227 <section name="createtaxdb" title="Add taxonomic labels to reference sequence DB" expanded="true">
228 <conditional name="database_type">
229 <param name="type" type="select" label="Database type" help="" >
230 <option value="amino_acid_tax" selected="true">Amino acid with taxonomy information</option>
231 <option value="nucleotides_tax">Nucleotides with taxonomy information</option>
232 <option value="amino_acid">Amino acid without taxonomy information</option>
233 <option value="nucleotides">Nucleotides without taxonomy information</option>
234 </param>
235 <when value="amino_acid_tax">
236 <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases">
237 <options from_data_table="mmseqs2_databases">
238 <filter type="static_value" value="aminoacid" column="type"/>
239 <filter type="static_value" value="yes" column="taxonomy"/>
240 <validator message="No mmseqs2 database is available" type="no_options"/>
241 </options>
242 </param>
243 <expand macro="search_type_aa" />
244 </when>
245 <when value="nucleotides_tax">
246 <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases">
247 <options from_data_table="mmseqs2_databases">
248 <filter type="static_value" value="nucleotide" column="type"/>
249 <filter type="static_value" value="yes" column="taxonomy"/>
250 <validator message="No mmseqs2 database is available" type="no_options"/>
251 </options>
252 </param>
253 <expand macro="search_type_nt" />
254 </when>
255 <when value="amino_acid">
256 <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases">
257 <options from_data_table="mmseqs2_databases">
258 <filter type="static_value" value="aminoacid" column="type"/>
259 <filter type="static_value" value="no" column="taxonomy"/>
260 <validator message="No mmseqs2 database is available" type="no_options"/>
261 </options>
262 </param>
263 <expand macro="search_type_aa" />
264 </when>
265 <when value="nucleotides">
266 <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases">
267 <options from_data_table="mmseqs2_databases">
268 <filter type="static_value" value="nucleotide" column="type"/>
269 <filter type="static_value" value="no" column="taxonomy"/>
270 <validator message="No mmseqs2 database is available" type="no_options"/>
271 </options>
272 </param>
273 <expand macro="search_type_nt" />
274 </when>
275 </conditional>
276 <param argument="--tax-mapping-file" type="data" format="tabular,tsv,txt" label="File to map sequence identifier to taxonomical identifier" optional="true"/>
277 <param argument="--tax-mapping-mode" type="select" label="Map taxonomy based on sequence database" help="" >
278 <option value="0" selected="true">0: .lookup file</option>
279 <option value="1">1: .source file</option>
280 </param>
281 </section>
282 <section name="filtertaxseqdb" title="Filter taxonomy sequence database">
283 <param argument="--taxon-list" type="text" optional="true" value="" label="Taxonomy ID" help="Possibly multiple values separated by ','"/>
284 </section>
285 <section name="taxonomy" title="Taxonomy assignment by computing the lowest common ancestor of homologs">
286 <section name="prefilter" title="Pre-filter">
287 <expand macro="prefilter_common_parameters" />
288 <param argument="--spaced-kmer-mode" type="select" label="Spaced k-mer mode" help="">
289 <option value="0">Use consecutive positions in k-mers</option>
290 <option value="1" selected="true">Use spaced k-mers</option>
291 </param>
292 <param argument="--min-ungapped-score" type="integer" min="0" value="15" label="Accept only matches with ungapped alignment score above threshold" help=""/>
293 <param argument="-s" name="sensitivity" type="float" min="0" max="7.5" value="2" label="Sensitivity" help="1.0 faster; 4.0 fast; 7.5 sensitive"/>
294 <param argument="--target-search-mode" type="select" label="Target search mode" help="" >
295 <option value="0" selected="true">Regular k-mer</option>
296 <option value="1">Similar k-mer</option>
297 </param>
298 <param argument="--max-seqs" type="integer" min="0" value="300" label="Maximum results per query sequence allowed to pass the prefilter" help="Affects sensitivity"/>
299 <param argument="--split" type="integer" min="0" value="0" label="Split input into N equally distributed chunks" help="0: set the best split automatically"/>
300 <param argument="--split-mode" type="select" label="Split mode" help="" >
301 <option value="0">Split target db</option>
302 <option value="1">Split query db</option>
303 <option value="2" selected="true">Auto, depending on main memory</option>
304 </param>
305 <param argument="--diag-score" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Use ungapped diagonal scoring during prefilter" help=""/>
306 <param argument="--exact-kmer-matching" type="integer" min="0" max="1" value="0" label="Extract only exact k-mers for matching" help=""/>
307 </section>
308 <section name="align" title="Align">
309 <expand macro="align_common_parameters" />
310 <param argument="--alignment-mode" type="select" label="Alignment mode : How to compute the alignment" help="" >
311 <option value="0">Automatic</option>
312 <option value="1" selected="true">Only score and end_pos</option>
313 <option value="2">Also start_pos and cov</option>
314 <option value="3">Also seq.id</option>
315 <option value="4">Only ungapped alignment</option>
316 </param>
317 <param argument="-e" name="evalue" type="float" min="0" value="1" label="E-value threshold" help="List matches below this E-value"/>
318 <param argument="--min-seq-id" type="float" min="0" max="1" value="0" label="Minimum sequence identity" help="List matches above this sequence identity for clustering"/>
319 <param argument="-c" name="cov" type="float" min="0" value="0" label="List matches above this fraction of aligned (covered) residues" help=""/>
320 <param argument="--cov-mode" type="select" label="Coverage mode" help="" >
321 <option value="0" selected="true">Coverage of query and target</option>
322 <option value="1">Coverage of target</option>
323 <option value="2">Coverage of query</option>
324 <option value="3">Target seq. length has to be at least x% of query length</option>
325 <option value="4">Query seq. length has to be at least x% of target length</option>
326 <option value="5">Short seq. needs to be at least x% of the other seq. length</option>
327 </param>
328 <param argument="--max-rejected" type="integer" min="0" value="5" label="Maximum rejected alignments before alignment calculation for a query is stopped" help=""/>
329 <param argument="--max-accept" type="integer" min="0" value="30" label="Maximum accepted alignments before alignment calculation for a query is stopped" help=""/>
330 <param argument="--exhaustive-search-filter" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Filter result during search ?" help=""/>
331 </section>
332 <section name="profile" title="Profile">
333 <param argument="--mask-profile" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Mask query sequence of profile using tantan" help=""/>
334 <param argument="--e-profile" type="float" min="0" value="1e-03" label="Include sequences matches with inf E-value threshold into the profile" help=""/>
335 <param argument="--wg" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Use global sequence weighting for profile calculation" help=""/>
336 <param argument="--filter-msa" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Filter MSA" help=""/>
337 <param argument="--filter-min-enable" type="integer" min="0" value="0" label="Only filter MSAs with more than N sequences, 0 always filters" help=""/>
338 <param argument="--max-seq-id" type="float" min="0" max="1" value="0.9" label="Reduce redundancy of output MSA using max. pairwise sequence identity" help=""/>
339 <param argument="--qid" type="text" value="0" label="Reduce diversity of output MSAs using min.seq. identity with query sequences [0.0,1.0]" help="Alternatively, can be a list of multiple thresholds:
340 E.g.: 0.15,0.30,0.50 to defines filter buckets of ]0.15-0.30] and ]0.30-0.50]"/>
341 <param argument="--qsc" type="float" min="-50" max="100" value="-20" label="Reduce diversity of output MSAs using min. score per aligned residue with query sequences" help=""/>
342 <param argument="--cov" type="float" min="0" max="1" value="0" label="Filter output MSAs using min. fraction of query residues covered by matched sequences" help=""/>
343 <param argument="--diff" type="integer" min="0" value="1000" label="Filter MSAs by selecting most diverse set of sequences, keeping at least this many seqs in each MSA block of length 50" help=""/>
344 <param argument="--pseudo-cnt-mode" type="select" label="Pseudo count mode" help="" >
345 <option value="0" selected="true">Substitution-matrix</option>
346 <option value="1">Context-specific pseudocounts</option>
347 </param>
348 <param argument="--exhaustive-search" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Exhaustive search" help=""/>
349 <param argument="--lca-search" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Efficient search for LCA candidates" help=""/>
350 </section>
351 <section name="misc" title="Misc">
352 <param argument="--orf-filter-e" type="float" min="0" value="1.000E+02" label="E-value threshold used for query ORF prefiltering" help=""/>
353 <param argument="--orf-filter-s" type="float" min="0" value="2" label="Sensitivity used for query ORF prefiltering" help=""/>
354 <param argument="--lca-mode" type="select" label="LCA mode" help="" >
355 <option value="1">Single search LCA</option>
356 <option value="3" selected="true">Approximate 2bLCA</option>
357 <option value="4">Top hit</option>
358 </param>
359 <param argument="--tax-output-mode" type="select" label="Taxonomy output mode" help="" >
360 <option value="0" selected="true">Output LCA</option>
361 <option value="1">Output alignment</option>
362 <option value="2">Output both</option>
363 </param>
364 <param argument="--majority" type="float" min="0" value="0.5" label="Minimal fraction of agreement among taxonomically assigned sequences of a set" help=""/>
365 <param argument="--vote-mode" type="select" label="Mode of assigning weights to compute majority" help="" >
366 <option value="0">Uniform</option>
367 <option value="1" selected="true">Minus log E-value</option>
368 <option value="2">Score</option>
369 </param>
370 <param argument="--tax-lineage" type="select" label="Taxonomy lineage" help="" >
371 <option value="0" selected="true">Don't show</option>
372 <option value="1">Add all lineage names</option>
373 <option value="2">Add all lineage taxids</option>
374 </param>
375 <param argument="--blacklist" type="text" value="" label="Comma separated list of ignored taxa in LCA computation" help=""/>
376 <param argument="--taxon-list" type="text" value="" label="Taxonomy ID, possibly multiple values separated by ','" help=""/>
377 <param argument="--rescore-mode" type="select" label="Rescore diagonals with" help="" >
378 <option value="0" selected="true">Hamming distance</option>
379 <option value="1">Local alignment (score only)</option>
380 <option value="2">Local alignment</option>
381 <option value="3">Global alignment</option>
382 <option value="4">Longest alignment fulfilling window quality criterion</option>
383 </param>
384 <param argument="--allow-deletion" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Allow deletions in a MSA" help=""/>
385 <param argument="--min-length" type="integer" min="0" value="30" label="Minimum codon number in open reading frames" help=""/>
386 <param argument="--max-length" type="integer" min="0" value="32734" label="Maximum codon number in open reading frames" help=""/>
387 <param argument="--max-gaps" type="integer" min="0" value="2147483647" label="Maximum number of codons with gaps or unknown residues before an open reading frame is rejected" help=""/>
388 <param argument="--contig-start-mode" type="select" label="Contig start can be" help="" >
389 <option value="0">Incomplete</option>
390 <option value="1">Complete</option>
391 <option value="2" selected="true">Both</option>
392 </param>
393 <param argument="--contig-end-mode" type="select" label="Contig end can be" help="" >
394 <option value="0">Incomplete</option>
395 <option value="1">Complete</option>
396 <option value="2" selected="true">Both</option>
397 </param>
398 <param argument="--orf-start-mode" type="select" label="ORF fragment can be" help="" >
399 <option value="0">From start to stop</option>
400 <option value="1" selected="true">From any to stop</option>
401 <option value="2">From last encountered start to stop (no start in the middle)</option>
402 </param>
403 <param argument="--forward-frames" type="text" value="1,2,3" label="Comma-separated list of frames on the forward strand to be extracted" help=""/>
404 <param argument="--reverse-frames" type="text" value="1,2,3" label="Comma-separated list of frames on the reverse strand to be extracted" help=""/>
405 <param argument="--translation-table" type="select" label="Translation table" help="">
406 <option value="1" selected="true">Canonical</option>
407 <option value="2">The Vertebrate Mitochondrial Code</option>
408 <option value="3">The Yeast Mitochondrial Code</option>
409 <option value="4">The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>
410 <option value="5">The Invertebrate Mitochondrial Code</option>
411 <option value="6">The Ciliate, Dasycladacean and Hexamita Nuclear Code</option>
412 <option value="9">The Echinoderm and Flatworm Mitochondrial Code</option>
413 <option value="10">The Euplotid Nuclear Code</option>
414 <option value="11">The Bacterial, Archaeal and Plant Plastid Code</option>
415 <option value="12">The Alternative Yeast Nuclear Code</option>
416 <option value="13">The Ascidian Mitochondrial Code</option>
417 <option value="14">The Alternative Flatworm Mitochondrial Code</option>
418 <option value="15">Blepharisma Nuclear Code</option>
419 <option value="16">Chlorophycean Mitochondrial Code</option>
420 <option value="21">Trematode Mitochondrial Code</option>
421 <option value="22">Scenedesmus obliquus Mitochondrial Code</option>
422 <option value="23">Thraustochytrium Mitochondrial Code</option>
423 <option value="24">Rhabdopleuridae Mitochondrial Code</option>
424 <option value="25">Candidate Division SR1 and Gracilibacteria Code</option>
425 <option value="26">Pachysolen tannophilus Nuclear Code</option>
426 <option value="27">Karyorelict Nuclear Code</option>
427 <option value="28">Condylostoma Nuclear Code</option>
428 <option value="29">Mesodinium Nuclear Code</option>
429 <option value="30">Peritrich Nuclear Code</option>
430 <option value="31">Blastocrithidia Nuclear Code</option>
431 </param>
432 <param argument="--translate" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Translate ORF to amino acid" help=""/>
433 <param argument="--use-all-table-starts" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Use all alternatives for a start codon in the genetic table, if false - only ATG (AUG)" help=""/>
434 <param argument="--id-offset" type="integer" min="0" value="0" label="Numeric ids in index file are offset by this value" help=""/>
435 <param argument="--add-orf-stop" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Add stop codon '*' at complete start and end" help=""/>
436 <param argument="--sequence-overlap" type="integer" min="0" value="0" label="Overlap between sequences" help=""/>
437 <param argument="--sequence-split-mode" type="select" label="Sequence split mode" help="" >
438 <option value="0">Copy data</option>
439 <option value="1" selected="true">Soft link data and write new index</option>
440 </param>
441 <param argument="--headers-split-mode" type="select" label="Headers split mode" help="" >
442 <option value="0" selected="true">Split position</option>
443 <option value="1">Original header</option>
444 </param>
445 <param argument="--prefilter-mode" type="select" label="Prefilter mode" help="" >
446 <option value="0" selected="true">Kmer/ungapped</option>
447 <option value="1">Ungapped</option>
448 <option value="2">No filter</option>
449 </param>
450 </section>
451 <expand macro="common_section"/>
452 <section name="expert" title="Expert">
453 <expand macro="expert_common_parameters" />
454 <param argument="--chain-alignments" type="integer" min="0" value="0" label="Chain alignments" help=""/>
455 <param argument="--merge-query" type="integer" min="0" value="1" label="Combine ORFs/split sequences to a single entry" help=""/>
456 </section>
457 </section>
458 <section name="createtsv" title="Create a tsv report from taxonomy output ">
459 <param argument="--first-seq-as-repr" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Use the first sequence of the clustering result as representative sequence" help=""/>
460 <param argument="--target-column" type="integer" min="0" value="1" label="Select a target column, 0 if no target id exists" help="" />
461 <param argument="--full-header" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Replace DB ID by its corresponding Full Header" help=""/>
462 <param argument="--idx-seq-src" type="select" label="Index sequences source" help="">
463 <option value="0" selected="true">Auto</option>
464 <option value="1">Split/translated sequences</option>
465 <option value="2">Input sequences</option>
466 </param>
467 </section>
468 <conditional name="kraken_report">
469 <param name="keep_report" type="select" label="Do you want a Kraken style report" help="" >
470 <option value="Yes" selected="true">Yes</option>
471 <option value="No">No</option>
472 </param>
473 <when value="Yes"/>
474 <when value="No"/>
475 </conditional>
476 <conditional name="krona_report">
477 <param name="keep_report" type="select" label="Do you want a Krona style report" help="" >
478 <option value="Yes" selected="true">Yes</option>
479 <option value="No">No</option>
480 </param>
481 <when value="Yes"/>
482 <when value="No"/>
483 </conditional>
484 </inputs>
485 <outputs>
486 <data name="output_taxonomy_tsv" format="tabular" from_work_dir="taxo_result.tsv" label="${tool.name} on ${on_string}: Taxonomy Report"/>
487 <data name="output_taxonomy_kraken" format="txt" from_work_dir="taxo_result.txt" label="${tool.name} on ${on_string}: Kraken Report">
488 <filter>kraken_report['keep_report'] == "Yes"</filter>
489 </data>
490 <data name="output_taxonomy_krona" format="html" from_work_dir="taxo_result.html" label="${tool.name} on ${on_string}: Krona Report">
491 <filter>krona_report['keep_report'] == "Yes"</filter>
492 </data>
493 </outputs>
494 <tests>
495 <!-- Test with Kraken report -->
496 <test expect_num_outputs="2">
497 <section name="createdb">
498 <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/>
499 <conditional name="alph_type">
500 <param name="dbtype" value="2"/>
501 </conditional>
502 </section>
503 <section name="createtaxdb">
504 <conditional name="database_type">
505 <param name="type" value="amino_acid_tax"/>
506 <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" />
507 </conditional>
508 </section>
509 <section name="filtertaxseqdb">
510 <param name="taxon_list" value="2" />
511 </section>
512 <conditional name="krona_report">
513 <param name="keep_report" value="No"/>
514 </conditional>
515 <output name="output_taxonomy_tsv" ftype="tabular">
516 <assert_contents>
517 <has_line line="MYSTERY.222&#009;1236&#009;class&#009;Gammaproteobacteria&#009;1&#009;1&#009;1&#009;1.000"/>
518 <has_line line="MYSTERY.64&#009;119060&#009;family&#009;Burkholderiaceae&#009;1&#009;1&#009;1&#009;1.000"/>
519 <has_n_columns n="8"/>
520 </assert_contents>
521 </output>
522 <output name="output_taxonomy_kraken" ftype="txt">
523 <assert_contents>
524 <has_text text="93.3333"/>
525 <has_text text="33.3333"/>
526 </assert_contents>
527 </output>
528 </test>
529 <test expect_num_outputs="2">
530 <section name="createdb">
531 <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/>
532 </section>
533 <section name="createtaxdb">
534 <conditional name="database_type">
535 <param name="type" value="amino_acid_tax"/>
536 <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" />
537 </conditional>
538 </section>
539 <conditional name="kraken_report">
540 <param name="keep_report" value="No"/>
541 </conditional>
542 <output name="output_taxonomy_tsv" ftype="tabular">
543 <assert_contents>
544 <has_line line="MYSTERY.222&#009;1236&#009;class&#009;Gammaproteobacteria&#009;1&#009;1&#009;1&#009;1.000"/>
545 <has_line line="MYSTERY.64&#009;119060&#009;family&#009;Burkholderiaceae&#009;1&#009;1&#009;1&#009;1.000"/>
546 <has_n_columns n="8"/>
547 </assert_contents>
548 </output>
549 <output name="output_taxonomy_krona" ftype="html">
550 <assert_contents>
551 <has_line line="// Krona is a flexible tool for exploring the relative proportions of"/>
552 </assert_contents>
553 </output>
554 </test>
555 <test expect_num_outputs="3">
556 <section name="createdb">
557 <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/>
558 </section>
559 <section name="createtaxdb">
560 <conditional name="database_type">
561 <param name="type" value="amino_acid_tax"/>
562 <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" />
563 </conditional>
564 </section>
565 <output name="output_taxonomy_tsv" ftype="tabular">
566 <assert_contents>
567 <has_line line="MYSTERY.222&#009;1236&#009;class&#009;Gammaproteobacteria&#009;1&#009;1&#009;1&#009;1.000"/>
568 <has_line line="MYSTERY.64&#009;119060&#009;family&#009;Burkholderiaceae&#009;1&#009;1&#009;1&#009;1.000"/>
569 <has_n_columns n="8"/>
570 </assert_contents>
571 </output>
572 <output name="output_taxonomy_krona" ftype="html">
573 <assert_contents>
574 <has_line line="// Krona is a flexible tool for exploring the relative proportions of"/>
575 </assert_contents>
576 </output>
577 <output name="output_taxonomy_kraken" ftype="txt">
578 <assert_contents>
579 <has_text text="93.3333"/>
580 <has_text text="33.3333"/>
581 </assert_contents>
582 </output>
583 </test>
584 </tests>
585 <help><![CDATA[
586 **MMseqs2: ultra fast and sensitive sequence search and clustering suite**
587
588 MMseqs2 (Many-against-Many sequence searching) is a software suite to search and cluster huge protein and nucleotide sequence sets.
589 MMseqs2 is open source GPL-licensed software implemented in C++ for Linux, MacOS, and (as beta version, via cygwin) Windows.
590 The software is designed to run on multiple cores and servers and exhibits very good scalability.
591 MMseqs2 can run 10000 times faster than BLAST. At 100 times its speed it achieves almost the same sensitivity.
592 It can perform profile searches with the same sensitivity as PSI-BLAST at over 400 times its speed.
593
594 **Usage**
595
596 * Convert FASTA/Q file(s) to MMseqs sequence DB format
597 *mmseqs createdb <i:fastaFile1[.gz|.bz2]> ... <i:fastaFileN[.gz|.bz2]>|<i:stdin> <o:sequenceDB> [options]*
598
599 * Add taxonomic labels to sequence DB
600 *mmseqs createtaxdb <i:sequenceDB> <tmpDir> [options]*
601
602 * Filter taxonomy sequence database
603 *mmseqs filtertaxseqdb <i:taxSeqDB> <o:taxSeqDB> [options]*
604
605 * Taxonomy assignment by computing the lowest common ancestor of homologs
606 *mmseqs taxonomy <i:queryDB> <i:targetDB> <o:taxaDB> <tmpDir> [options]*
607
608 * Convert result DB to tab-separated flat file
609 *mmseqs createtsv <i:queryDB> [<i:targetDB>] <i:resultDB> <o:tsvFile> [options]*
610
611 * Create a taxonomy report in Kraken or Krona format
612 *mmseqs taxonomyreport <i:seqTaxDB> <i:taxResultDB/resultDB/sequenceDB> <o:taxonomyReport> [options]*
613
614 https://github.com/soedinglab/MMseqs2
615
616 ]]></help>
617 <expand macro="citations"/>
618 </tool>