Mercurial > repos > iuc > mmseqs2_taxonomy_assignment
comparison mmseqs2_taxonomy_assignment.xml @ 0:d0acde079e2e draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mmsesq2 commit 1400593429eb4e9c6e307df3621825a8b84a6fa7
author | iuc |
---|---|
date | Thu, 27 Mar 2025 14:38:20 +0000 |
parents | |
children | 876d26806584 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d0acde079e2e |
---|---|
1 <tool id="mmseqs2_taxonomy_assignment" name="MMseqs2 Taxonomy Assignments" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
2 <description> | |
3 of sequences by comparing them to a reference database | |
4 </description> | |
5 <macros> | |
6 <import>macro.xml</import> | |
7 </macros> | |
8 <expand macro="biotools"/> | |
9 <expand macro="requirements"/> | |
10 <expand macro="version_command"/> | |
11 <command detect_errors="exit_code"><![CDATA[ | |
12 ln -s -f '${createdb.input_fasta}' 'input' && | |
13 mmseqs createdb | |
14 'input' | |
15 'sequenceDB' | |
16 --dbtype '$createdb.alph_type.dbtype' | |
17 --shuffle $createdb.shuffle && | |
18 | |
19 cp -r '$createtaxdb.database_type.mmseqs2_db_select.fields.path'/database* . && | |
20 | |
21 mmseqs createtaxdb | |
22 database | |
23 'tmp' | |
24 #if $createtaxdb.tax_mapping_file | |
25 --tax-mapping-file '$createtaxdb.tax_mapping_file' | |
26 #end if | |
27 --tax-mapping-mode '$createtaxdb.tax_mapping_mode' | |
28 --threads "\${GALAXY_SLOTS:-1}" && | |
29 | |
30 #if $filtertaxseqdb.taxon_list | |
31 mmseqs filtertaxseqdb | |
32 'database' | |
33 'database_filtered' | |
34 --taxon-list '$filtertaxseqdb.taxon_list' | |
35 && | |
36 #end if | |
37 | |
38 mmseqs taxonomy | |
39 'sequenceDB' | |
40 #if $filtertaxseqdb.taxon_list | |
41 'database_filtered' | |
42 #else | |
43 'database' | |
44 #end if | |
45 'output_taxonomy' | |
46 'tmp' | |
47 #if str($createdb.alph_type.dbtype) == "1" | |
48 --comp-bias-corr-scale $createdb.alph_type.comp_bias_corr_scale | |
49 #elif str($createdb.alph_type.dbtype) == "2" | |
50 --zdrop $createdb.alph_type.zdrop | |
51 #end if | |
52 ##Pre-filter options | |
53 --add-self-matches $taxonomy.prefilter.add_self_matches | |
54 -s $taxonomy.prefilter.sensitivity | |
55 -k $taxonomy.prefilter.kmer_length | |
56 --target-search-mode $taxonomy.prefilter.target_search_mode | |
57 ##--k-score TWIN k-mer threshold for generating similar k-mer lists [seq:2147483647,prof:2147483647] | |
58 --max-seqs $taxonomy.prefilter.max_seqs | |
59 --split $taxonomy.prefilter.split | |
60 --split-mode $taxonomy.prefilter.split_mode | |
61 ##--split-memory-limit BYTE Set max memory per split. E.g. 800B, 5K, 10M, 1G. Default (0) to all available system memory [0] | |
62 --diag-score $taxonomy.prefilter.diag_score | |
63 --exact-kmer-matching $taxonomy.prefilter.exact_kmer_matching | |
64 --mask $taxonomy.prefilter.mask | |
65 --mask-prob $taxonomy.prefilter.mask_prob | |
66 --mask-lower-case $taxonomy.prefilter.mask_lower_case | |
67 --min-ungapped-score $taxonomy.prefilter.min_ungapped_score | |
68 --spaced-kmer-mode $taxonomy.prefilter.spaced_kmer_mode | |
69 ##--spaced-kmer-pattern STR User-specified spaced k-mer pattern [] | |
70 ##--local-tmp STR Path where some of the temporary files will be created [] | |
71 ##--disk-space-limit BYTE Set max disk space to use for reverse profile searches. E.g. 800B, 5K, 10M, 1G. Default (0) to all available disk space in the temp folder [0] | |
72 | |
73 ##Align options | |
74 -a $taxonomy.align.convertalis | |
75 ##The next 2 parameters seems to be the same | |
76 --alignment-mode $taxonomy.align.alignment_mode | |
77 --alignment-output-mode $taxonomy.align.alignment_output_mode | |
78 --wrapped-scoring $taxonomy.align.wrapped_scoring | |
79 -e $taxonomy.align.evalue | |
80 --min-seq-id $taxonomy.align.min_seq_id | |
81 --min-aln-len $taxonomy.align.min_aln_len | |
82 --seq-id-mode $taxonomy.align.seq_id_mode | |
83 --alt-ali $taxonomy.align.alt_ali | |
84 -c $taxonomy.align.cov | |
85 --cov-mode $taxonomy.align.cov_mode | |
86 --max-rejected $taxonomy.align.max_rejected | |
87 --max-accept $taxonomy.align.max_accept | |
88 --score-bias $taxonomy.align.score_bias | |
89 --realign $taxonomy.align.realign | |
90 --realign-score-bias $taxonomy.align.realign_score_bias | |
91 --realign-max-seqs $taxonomy.align.realign_max_seqs | |
92 --corr-score-weight $taxonomy.align.corr_score_weight | |
93 --exhaustive-search-filter $taxonomy.align.exhaustive_search_filter | |
94 | |
95 ##Profile options | |
96 ##--pca Pseudo count admixture strength [] | |
97 ##--pcb Pseudo counts: Neff at half of maximum admixture (range 0.0-inf) [] | |
98 --mask-profile $taxonomy.profile.mask_profile | |
99 --e-profile $taxonomy.profile.e_profile | |
100 --wg $taxonomy.profile.wg | |
101 --filter-msa $taxonomy.profile.filter_msa | |
102 --filter-min-enable $taxonomy.profile.filter_min_enable | |
103 --max-seq-id $taxonomy.profile.max_seq_id | |
104 --qid $taxonomy.profile.qid | |
105 --qsc $taxonomy.profile.qsc | |
106 --cov $taxonomy.profile.cov | |
107 --diff $taxonomy.profile.diff | |
108 --pseudo-cnt-mode $taxonomy.profile.pseudo_cnt_mode | |
109 --exhaustive-search $taxonomy.profile.exhaustive_search | |
110 --lca-search $taxonomy.profile.lca_search | |
111 | |
112 ##Misc options | |
113 ##--orf-filter INT Prefilter query ORFs with non-selective search | |
114 ## Only used during nucleotide-vs-protein classification | |
115 ## NOTE: Consider disabling when classifying short reads [1] | |
116 --orf-filter-e $taxonomy.misc.orf_filter_e | |
117 --orf-filter-s $taxonomy.misc.orf_filter_s | |
118 --lca-mode $taxonomy.misc.lca_mode | |
119 --tax-output-mode $taxonomy.misc.tax_output_mode | |
120 --majority $taxonomy.misc.majority | |
121 --vote-mode $taxonomy.misc.vote_mode | |
122 ##--lca-ranks STR Add column with specified ranks (',' separated) [] | |
123 --tax-lineage $taxonomy.misc.tax_lineage | |
124 --blacklist $taxonomy.misc.blacklist | |
125 --taxon-list $taxonomy.misc.taxon_list | |
126 --rescore-mode $taxonomy.misc.rescore_mode | |
127 --allow-deletion $taxonomy.misc.allow_deletion | |
128 --min-length $taxonomy.misc.min_length | |
129 --max-length $taxonomy.misc.max_length | |
130 --max-gaps $taxonomy.misc.max_gaps | |
131 --contig-start-mode $taxonomy.misc.contig_start_mode | |
132 --contig-end-mode $taxonomy.misc.contig_end_mode | |
133 --orf-start-mode $taxonomy.misc.orf_start_mode | |
134 --forward-frames $taxonomy.misc.forward_frames | |
135 --reverse-frames $taxonomy.misc.reverse_frames | |
136 --translation-table $taxonomy.misc.translation_table | |
137 --translate $taxonomy.misc.translate | |
138 --use-all-table-starts $taxonomy.misc.use_all_table_starts | |
139 --id-offset $taxonomy.misc.id_offset | |
140 --add-orf-stop $taxonomy.misc.add_orf_stop | |
141 --sequence-overlap $taxonomy.misc.sequence_overlap | |
142 --sequence-split-mode $taxonomy.misc.sequence_split_mode | |
143 --headers-split-mode $taxonomy.misc.headers_split_mode | |
144 --search-type $createtaxdb.database_type.search_type | |
145 --prefilter-mode $taxonomy.misc.prefilter_mode | |
146 | |
147 ##Common options | |
148 ##--compressed INT Write compressed output [0] | |
149 --threads "\${GALAXY_SLOTS:-1}" | |
150 ##-v INT Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3] | |
151 --max-seq-len $taxonomy.common.max_seq_len | |
152 ##--db-load-mode INT Database preload mode 0: auto, 1: fread, 2: mmap, 3: mmap+touch [0] | |
153 ##--mpi-runner STR Use MPI on compute cluster with this MPI command (e.g. "mpirun -np 42") [] | |
154 ##--force-reuse BOOL Reuse tmp filse in tmp/latest folder ignoring parameters and version changes [0] | |
155 ##--remove-tmp-files BOOL Delete temporary files [0] | |
156 | |
157 ##Expert options | |
158 --filter-hits $taxonomy.expert.filter_hits | |
159 --sort-results $taxonomy.expert.sort_results | |
160 ##--create-lookup INT Create database lookup file (can be very large) [0] | |
161 --chain-alignments $taxonomy.expert.chain_alignments | |
162 --merge-query $taxonomy.expert.merge_query | |
163 ##--strand INT Strand selection only works for DNA/DNA search 0: reverse, 1: forward, 2: both [1] | |
164 && | |
165 mmseqs createtsv | |
166 'sequenceDB' | |
167 'output_taxonomy' | |
168 'taxo_result.tsv' | |
169 | |
170 --first-seq-as-repr $createtsv.first_seq_as_repr | |
171 --target-column $createtsv.target_column | |
172 --full-header $createtsv.full_header | |
173 --idx-seq-src $createtsv.idx_seq_src | |
174 --threads "\${GALAXY_SLOTS:-1}" | |
175 ##--compressed INT Write compressed output [0] | |
176 ##-v INT Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3] | |
177 ##--db-output BOOL Return a result DB instead of a text file [0] | |
178 | |
179 #if str($kraken_report.keep_report) == "Yes" | |
180 && | |
181 mmseqs taxonomyreport | |
182 #if $filtertaxseqdb.taxon_list | |
183 'database_filtered' | |
184 #else | |
185 'database' | |
186 #end if | |
187 'output_taxonomy' | |
188 'taxo_result.txt' | |
189 --report-mode 0 | |
190 --threads "\${GALAXY_SLOTS:-1}" | |
191 #end if | |
192 #if str($krona_report.keep_report) == "Yes" | |
193 && | |
194 mmseqs taxonomyreport | |
195 #if $filtertaxseqdb.taxon_list | |
196 'database_filtered' | |
197 #else | |
198 'database' | |
199 #end if | |
200 'output_taxonomy' | |
201 'taxo_result.html' | |
202 --report-mode 1 | |
203 --threads "\${GALAXY_SLOTS:-1}" | |
204 #end if | |
205 ##-v INT Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3] | |
206 | |
207 ]]></command> | |
208 <inputs> | |
209 <section name="createdb" title="Convert FASTA/Q file(s) to MMseqs sequence DB format" expanded="true"> | |
210 <param name="input_fasta" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Input fasta file" help="" /> | |
211 <conditional name="alph_type"> | |
212 <param argument="--dbtype" type="select" label="Input type" help="" > | |
213 <option value="0" selected="true">Auto</option> | |
214 <option value="1">Amino acid</option> | |
215 <option value="2">Nucleotides</option> | |
216 </param> | |
217 <when value="0"/> | |
218 <when value="1"> | |
219 <param argument="--comp-bias-corr-scale" type="float" min="0" max="1" value="1" label="Scale composition bias correction" help=""/> | |
220 </when> | |
221 <when value="2"> | |
222 <param argument="--zdrop" type="integer" min="0" value="40" label="Maximal allowed difference between score values before alignment is truncated" help=""/> | |
223 </when> | |
224 </conditional> | |
225 <param argument="--shuffle" type="boolean" checked="true" label="Shuffle input database" truevalue="1" falsevalue="0" optional="true" help="" /> | |
226 </section> | |
227 <section name="createtaxdb" title="Add taxonomic labels to reference sequence DB" expanded="true"> | |
228 <conditional name="database_type"> | |
229 <param name="type" type="select" label="Database type" help="" > | |
230 <option value="amino_acid_tax" selected="true">Amino acid with taxonomy information</option> | |
231 <option value="nucleotides_tax">Nucleotides with taxonomy information</option> | |
232 <option value="amino_acid">Amino acid without taxonomy information</option> | |
233 <option value="nucleotides">Nucleotides without taxonomy information</option> | |
234 </param> | |
235 <when value="amino_acid_tax"> | |
236 <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases"> | |
237 <options from_data_table="mmseqs2_databases"> | |
238 <filter type="static_value" value="aminoacid" column="type"/> | |
239 <filter type="static_value" value="yes" column="taxonomy"/> | |
240 <validator message="No mmseqs2 database is available" type="no_options"/> | |
241 </options> | |
242 </param> | |
243 <expand macro="search_type_aa" /> | |
244 </when> | |
245 <when value="nucleotides_tax"> | |
246 <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases"> | |
247 <options from_data_table="mmseqs2_databases"> | |
248 <filter type="static_value" value="nucleotide" column="type"/> | |
249 <filter type="static_value" value="yes" column="taxonomy"/> | |
250 <validator message="No mmseqs2 database is available" type="no_options"/> | |
251 </options> | |
252 </param> | |
253 <expand macro="search_type_nt" /> | |
254 </when> | |
255 <when value="amino_acid"> | |
256 <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases"> | |
257 <options from_data_table="mmseqs2_databases"> | |
258 <filter type="static_value" value="aminoacid" column="type"/> | |
259 <filter type="static_value" value="no" column="taxonomy"/> | |
260 <validator message="No mmseqs2 database is available" type="no_options"/> | |
261 </options> | |
262 </param> | |
263 <expand macro="search_type_aa" /> | |
264 </when> | |
265 <when value="nucleotides"> | |
266 <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases"> | |
267 <options from_data_table="mmseqs2_databases"> | |
268 <filter type="static_value" value="nucleotide" column="type"/> | |
269 <filter type="static_value" value="no" column="taxonomy"/> | |
270 <validator message="No mmseqs2 database is available" type="no_options"/> | |
271 </options> | |
272 </param> | |
273 <expand macro="search_type_nt" /> | |
274 </when> | |
275 </conditional> | |
276 <param argument="--tax-mapping-file" type="data" format="tabular,tsv,txt" label="File to map sequence identifier to taxonomical identifier" optional="true"/> | |
277 <param argument="--tax-mapping-mode" type="select" label="Map taxonomy based on sequence database" help="" > | |
278 <option value="0" selected="true">0: .lookup file</option> | |
279 <option value="1">1: .source file</option> | |
280 </param> | |
281 </section> | |
282 <section name="filtertaxseqdb" title="Filter taxonomy sequence database"> | |
283 <param argument="--taxon-list" type="text" optional="true" value="" label="Taxonomy ID" help="Possibly multiple values separated by ','"/> | |
284 </section> | |
285 <section name="taxonomy" title="Taxonomy assignment by computing the lowest common ancestor of homologs"> | |
286 <section name="prefilter" title="Pre-filter"> | |
287 <expand macro="prefilter_common_parameters" /> | |
288 <param argument="--spaced-kmer-mode" type="select" label="Spaced k-mer mode" help=""> | |
289 <option value="0">Use consecutive positions in k-mers</option> | |
290 <option value="1" selected="true">Use spaced k-mers</option> | |
291 </param> | |
292 <param argument="--min-ungapped-score" type="integer" min="0" value="15" label="Accept only matches with ungapped alignment score above threshold" help=""/> | |
293 <param argument="-s" name="sensitivity" type="float" min="0" max="7.5" value="2" label="Sensitivity" help="1.0 faster; 4.0 fast; 7.5 sensitive"/> | |
294 <param argument="--target-search-mode" type="select" label="Target search mode" help="" > | |
295 <option value="0" selected="true">Regular k-mer</option> | |
296 <option value="1">Similar k-mer</option> | |
297 </param> | |
298 <param argument="--max-seqs" type="integer" min="0" value="300" label="Maximum results per query sequence allowed to pass the prefilter" help="Affects sensitivity"/> | |
299 <param argument="--split" type="integer" min="0" value="0" label="Split input into N equally distributed chunks" help="0: set the best split automatically"/> | |
300 <param argument="--split-mode" type="select" label="Split mode" help="" > | |
301 <option value="0">Split target db</option> | |
302 <option value="1">Split query db</option> | |
303 <option value="2" selected="true">Auto, depending on main memory</option> | |
304 </param> | |
305 <param argument="--diag-score" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Use ungapped diagonal scoring during prefilter" help=""/> | |
306 <param argument="--exact-kmer-matching" type="integer" min="0" max="1" value="0" label="Extract only exact k-mers for matching" help=""/> | |
307 </section> | |
308 <section name="align" title="Align"> | |
309 <expand macro="align_common_parameters" /> | |
310 <param argument="--alignment-mode" type="select" label="Alignment mode : How to compute the alignment" help="" > | |
311 <option value="0">Automatic</option> | |
312 <option value="1" selected="true">Only score and end_pos</option> | |
313 <option value="2">Also start_pos and cov</option> | |
314 <option value="3">Also seq.id</option> | |
315 <option value="4">Only ungapped alignment</option> | |
316 </param> | |
317 <param argument="-e" name="evalue" type="float" min="0" value="1" label="E-value threshold" help="List matches below this E-value"/> | |
318 <param argument="--min-seq-id" type="float" min="0" max="1" value="0" label="Minimum sequence identity" help="List matches above this sequence identity for clustering"/> | |
319 <param argument="-c" name="cov" type="float" min="0" value="0" label="List matches above this fraction of aligned (covered) residues" help=""/> | |
320 <param argument="--cov-mode" type="select" label="Coverage mode" help="" > | |
321 <option value="0" selected="true">Coverage of query and target</option> | |
322 <option value="1">Coverage of target</option> | |
323 <option value="2">Coverage of query</option> | |
324 <option value="3">Target seq. length has to be at least x% of query length</option> | |
325 <option value="4">Query seq. length has to be at least x% of target length</option> | |
326 <option value="5">Short seq. needs to be at least x% of the other seq. length</option> | |
327 </param> | |
328 <param argument="--max-rejected" type="integer" min="0" value="5" label="Maximum rejected alignments before alignment calculation for a query is stopped" help=""/> | |
329 <param argument="--max-accept" type="integer" min="0" value="30" label="Maximum accepted alignments before alignment calculation for a query is stopped" help=""/> | |
330 <param argument="--exhaustive-search-filter" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Filter result during search ?" help=""/> | |
331 </section> | |
332 <section name="profile" title="Profile"> | |
333 <param argument="--mask-profile" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Mask query sequence of profile using tantan" help=""/> | |
334 <param argument="--e-profile" type="float" min="0" value="1e-03" label="Include sequences matches with inf E-value threshold into the profile" help=""/> | |
335 <param argument="--wg" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Use global sequence weighting for profile calculation" help=""/> | |
336 <param argument="--filter-msa" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Filter MSA" help=""/> | |
337 <param argument="--filter-min-enable" type="integer" min="0" value="0" label="Only filter MSAs with more than N sequences, 0 always filters" help=""/> | |
338 <param argument="--max-seq-id" type="float" min="0" max="1" value="0.9" label="Reduce redundancy of output MSA using max. pairwise sequence identity" help=""/> | |
339 <param argument="--qid" type="text" value="0" label="Reduce diversity of output MSAs using min.seq. identity with query sequences [0.0,1.0]" help="Alternatively, can be a list of multiple thresholds: | |
340 E.g.: 0.15,0.30,0.50 to defines filter buckets of ]0.15-0.30] and ]0.30-0.50]"/> | |
341 <param argument="--qsc" type="float" min="-50" max="100" value="-20" label="Reduce diversity of output MSAs using min. score per aligned residue with query sequences" help=""/> | |
342 <param argument="--cov" type="float" min="0" max="1" value="0" label="Filter output MSAs using min. fraction of query residues covered by matched sequences" help=""/> | |
343 <param argument="--diff" type="integer" min="0" value="1000" label="Filter MSAs by selecting most diverse set of sequences, keeping at least this many seqs in each MSA block of length 50" help=""/> | |
344 <param argument="--pseudo-cnt-mode" type="select" label="Pseudo count mode" help="" > | |
345 <option value="0" selected="true">Substitution-matrix</option> | |
346 <option value="1">Context-specific pseudocounts</option> | |
347 </param> | |
348 <param argument="--exhaustive-search" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Exhaustive search" help=""/> | |
349 <param argument="--lca-search" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Efficient search for LCA candidates" help=""/> | |
350 </section> | |
351 <section name="misc" title="Misc"> | |
352 <param argument="--orf-filter-e" type="float" min="0" value="1.000E+02" label="E-value threshold used for query ORF prefiltering" help=""/> | |
353 <param argument="--orf-filter-s" type="float" min="0" value="2" label="Sensitivity used for query ORF prefiltering" help=""/> | |
354 <param argument="--lca-mode" type="select" label="LCA mode" help="" > | |
355 <option value="1">Single search LCA</option> | |
356 <option value="3" selected="true">Approximate 2bLCA</option> | |
357 <option value="4">Top hit</option> | |
358 </param> | |
359 <param argument="--tax-output-mode" type="select" label="Taxonomy output mode" help="" > | |
360 <option value="0" selected="true">Output LCA</option> | |
361 <option value="1">Output alignment</option> | |
362 <option value="2">Output both</option> | |
363 </param> | |
364 <param argument="--majority" type="float" min="0" value="0.5" label="Minimal fraction of agreement among taxonomically assigned sequences of a set" help=""/> | |
365 <param argument="--vote-mode" type="select" label="Mode of assigning weights to compute majority" help="" > | |
366 <option value="0">Uniform</option> | |
367 <option value="1" selected="true">Minus log E-value</option> | |
368 <option value="2">Score</option> | |
369 </param> | |
370 <param argument="--tax-lineage" type="select" label="Taxonomy lineage" help="" > | |
371 <option value="0" selected="true">Don't show</option> | |
372 <option value="1">Add all lineage names</option> | |
373 <option value="2">Add all lineage taxids</option> | |
374 </param> | |
375 <param argument="--blacklist" type="text" value="" label="Comma separated list of ignored taxa in LCA computation" help=""/> | |
376 <param argument="--taxon-list" type="text" value="" label="Taxonomy ID, possibly multiple values separated by ','" help=""/> | |
377 <param argument="--rescore-mode" type="select" label="Rescore diagonals with" help="" > | |
378 <option value="0" selected="true">Hamming distance</option> | |
379 <option value="1">Local alignment (score only)</option> | |
380 <option value="2">Local alignment</option> | |
381 <option value="3">Global alignment</option> | |
382 <option value="4">Longest alignment fulfilling window quality criterion</option> | |
383 </param> | |
384 <param argument="--allow-deletion" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Allow deletions in a MSA" help=""/> | |
385 <param argument="--min-length" type="integer" min="0" value="30" label="Minimum codon number in open reading frames" help=""/> | |
386 <param argument="--max-length" type="integer" min="0" value="32734" label="Maximum codon number in open reading frames" help=""/> | |
387 <param argument="--max-gaps" type="integer" min="0" value="2147483647" label="Maximum number of codons with gaps or unknown residues before an open reading frame is rejected" help=""/> | |
388 <param argument="--contig-start-mode" type="select" label="Contig start can be" help="" > | |
389 <option value="0">Incomplete</option> | |
390 <option value="1">Complete</option> | |
391 <option value="2" selected="true">Both</option> | |
392 </param> | |
393 <param argument="--contig-end-mode" type="select" label="Contig end can be" help="" > | |
394 <option value="0">Incomplete</option> | |
395 <option value="1">Complete</option> | |
396 <option value="2" selected="true">Both</option> | |
397 </param> | |
398 <param argument="--orf-start-mode" type="select" label="ORF fragment can be" help="" > | |
399 <option value="0">From start to stop</option> | |
400 <option value="1" selected="true">From any to stop</option> | |
401 <option value="2">From last encountered start to stop (no start in the middle)</option> | |
402 </param> | |
403 <param argument="--forward-frames" type="text" value="1,2,3" label="Comma-separated list of frames on the forward strand to be extracted" help=""/> | |
404 <param argument="--reverse-frames" type="text" value="1,2,3" label="Comma-separated list of frames on the reverse strand to be extracted" help=""/> | |
405 <param argument="--translation-table" type="select" label="Translation table" help=""> | |
406 <option value="1" selected="true">Canonical</option> | |
407 <option value="2">The Vertebrate Mitochondrial Code</option> | |
408 <option value="3">The Yeast Mitochondrial Code</option> | |
409 <option value="4">The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> | |
410 <option value="5">The Invertebrate Mitochondrial Code</option> | |
411 <option value="6">The Ciliate, Dasycladacean and Hexamita Nuclear Code</option> | |
412 <option value="9">The Echinoderm and Flatworm Mitochondrial Code</option> | |
413 <option value="10">The Euplotid Nuclear Code</option> | |
414 <option value="11">The Bacterial, Archaeal and Plant Plastid Code</option> | |
415 <option value="12">The Alternative Yeast Nuclear Code</option> | |
416 <option value="13">The Ascidian Mitochondrial Code</option> | |
417 <option value="14">The Alternative Flatworm Mitochondrial Code</option> | |
418 <option value="15">Blepharisma Nuclear Code</option> | |
419 <option value="16">Chlorophycean Mitochondrial Code</option> | |
420 <option value="21">Trematode Mitochondrial Code</option> | |
421 <option value="22">Scenedesmus obliquus Mitochondrial Code</option> | |
422 <option value="23">Thraustochytrium Mitochondrial Code</option> | |
423 <option value="24">Rhabdopleuridae Mitochondrial Code</option> | |
424 <option value="25">Candidate Division SR1 and Gracilibacteria Code</option> | |
425 <option value="26">Pachysolen tannophilus Nuclear Code</option> | |
426 <option value="27">Karyorelict Nuclear Code</option> | |
427 <option value="28">Condylostoma Nuclear Code</option> | |
428 <option value="29">Mesodinium Nuclear Code</option> | |
429 <option value="30">Peritrich Nuclear Code</option> | |
430 <option value="31">Blastocrithidia Nuclear Code</option> | |
431 </param> | |
432 <param argument="--translate" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Translate ORF to amino acid" help=""/> | |
433 <param argument="--use-all-table-starts" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Use all alternatives for a start codon in the genetic table, if false - only ATG (AUG)" help=""/> | |
434 <param argument="--id-offset" type="integer" min="0" value="0" label="Numeric ids in index file are offset by this value" help=""/> | |
435 <param argument="--add-orf-stop" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Add stop codon '*' at complete start and end" help=""/> | |
436 <param argument="--sequence-overlap" type="integer" min="0" value="0" label="Overlap between sequences" help=""/> | |
437 <param argument="--sequence-split-mode" type="select" label="Sequence split mode" help="" > | |
438 <option value="0">Copy data</option> | |
439 <option value="1" selected="true">Soft link data and write new index</option> | |
440 </param> | |
441 <param argument="--headers-split-mode" type="select" label="Headers split mode" help="" > | |
442 <option value="0" selected="true">Split position</option> | |
443 <option value="1">Original header</option> | |
444 </param> | |
445 <param argument="--prefilter-mode" type="select" label="Prefilter mode" help="" > | |
446 <option value="0" selected="true">Kmer/ungapped</option> | |
447 <option value="1">Ungapped</option> | |
448 <option value="2">No filter</option> | |
449 </param> | |
450 </section> | |
451 <expand macro="common_section"/> | |
452 <section name="expert" title="Expert"> | |
453 <expand macro="expert_common_parameters" /> | |
454 <param argument="--chain-alignments" type="integer" min="0" value="0" label="Chain alignments" help=""/> | |
455 <param argument="--merge-query" type="integer" min="0" value="1" label="Combine ORFs/split sequences to a single entry" help=""/> | |
456 </section> | |
457 </section> | |
458 <section name="createtsv" title="Create a tsv report from taxonomy output "> | |
459 <param argument="--first-seq-as-repr" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Use the first sequence of the clustering result as representative sequence" help=""/> | |
460 <param argument="--target-column" type="integer" min="0" value="1" label="Select a target column, 0 if no target id exists" help="" /> | |
461 <param argument="--full-header" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Replace DB ID by its corresponding Full Header" help=""/> | |
462 <param argument="--idx-seq-src" type="select" label="Index sequences source" help=""> | |
463 <option value="0" selected="true">Auto</option> | |
464 <option value="1">Split/translated sequences</option> | |
465 <option value="2">Input sequences</option> | |
466 </param> | |
467 </section> | |
468 <conditional name="kraken_report"> | |
469 <param name="keep_report" type="select" label="Do you want a Kraken style report" help="" > | |
470 <option value="Yes" selected="true">Yes</option> | |
471 <option value="No">No</option> | |
472 </param> | |
473 <when value="Yes"/> | |
474 <when value="No"/> | |
475 </conditional> | |
476 <conditional name="krona_report"> | |
477 <param name="keep_report" type="select" label="Do you want a Krona style report" help="" > | |
478 <option value="Yes" selected="true">Yes</option> | |
479 <option value="No">No</option> | |
480 </param> | |
481 <when value="Yes"/> | |
482 <when value="No"/> | |
483 </conditional> | |
484 </inputs> | |
485 <outputs> | |
486 <data name="output_taxonomy_tsv" format="tabular" from_work_dir="taxo_result.tsv" label="${tool.name} on ${on_string}: Taxonomy Report"/> | |
487 <data name="output_taxonomy_kraken" format="txt" from_work_dir="taxo_result.txt" label="${tool.name} on ${on_string}: Kraken Report"> | |
488 <filter>kraken_report['keep_report'] == "Yes"</filter> | |
489 </data> | |
490 <data name="output_taxonomy_krona" format="html" from_work_dir="taxo_result.html" label="${tool.name} on ${on_string}: Krona Report"> | |
491 <filter>krona_report['keep_report'] == "Yes"</filter> | |
492 </data> | |
493 </outputs> | |
494 <tests> | |
495 <!-- Test with Kraken report --> | |
496 <test expect_num_outputs="2"> | |
497 <section name="createdb"> | |
498 <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/> | |
499 <conditional name="alph_type"> | |
500 <param name="dbtype" value="2"/> | |
501 </conditional> | |
502 </section> | |
503 <section name="createtaxdb"> | |
504 <conditional name="database_type"> | |
505 <param name="type" value="amino_acid_tax"/> | |
506 <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" /> | |
507 </conditional> | |
508 </section> | |
509 <section name="filtertaxseqdb"> | |
510 <param name="taxon_list" value="2" /> | |
511 </section> | |
512 <conditional name="krona_report"> | |
513 <param name="keep_report" value="No"/> | |
514 </conditional> | |
515 <output name="output_taxonomy_tsv" ftype="tabular"> | |
516 <assert_contents> | |
517 <has_line line="MYSTERY.222	1236	class	Gammaproteobacteria	1	1	1	1.000"/> | |
518 <has_line line="MYSTERY.64	119060	family	Burkholderiaceae	1	1	1	1.000"/> | |
519 <has_n_columns n="8"/> | |
520 </assert_contents> | |
521 </output> | |
522 <output name="output_taxonomy_kraken" ftype="txt"> | |
523 <assert_contents> | |
524 <has_text text="93.3333"/> | |
525 <has_text text="33.3333"/> | |
526 </assert_contents> | |
527 </output> | |
528 </test> | |
529 <test expect_num_outputs="2"> | |
530 <section name="createdb"> | |
531 <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/> | |
532 </section> | |
533 <section name="createtaxdb"> | |
534 <conditional name="database_type"> | |
535 <param name="type" value="amino_acid_tax"/> | |
536 <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" /> | |
537 </conditional> | |
538 </section> | |
539 <conditional name="kraken_report"> | |
540 <param name="keep_report" value="No"/> | |
541 </conditional> | |
542 <output name="output_taxonomy_tsv" ftype="tabular"> | |
543 <assert_contents> | |
544 <has_line line="MYSTERY.222	1236	class	Gammaproteobacteria	1	1	1	1.000"/> | |
545 <has_line line="MYSTERY.64	119060	family	Burkholderiaceae	1	1	1	1.000"/> | |
546 <has_n_columns n="8"/> | |
547 </assert_contents> | |
548 </output> | |
549 <output name="output_taxonomy_krona" ftype="html"> | |
550 <assert_contents> | |
551 <has_line line="// Krona is a flexible tool for exploring the relative proportions of"/> | |
552 </assert_contents> | |
553 </output> | |
554 </test> | |
555 <test expect_num_outputs="3"> | |
556 <section name="createdb"> | |
557 <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/> | |
558 </section> | |
559 <section name="createtaxdb"> | |
560 <conditional name="database_type"> | |
561 <param name="type" value="amino_acid_tax"/> | |
562 <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" /> | |
563 </conditional> | |
564 </section> | |
565 <output name="output_taxonomy_tsv" ftype="tabular"> | |
566 <assert_contents> | |
567 <has_line line="MYSTERY.222	1236	class	Gammaproteobacteria	1	1	1	1.000"/> | |
568 <has_line line="MYSTERY.64	119060	family	Burkholderiaceae	1	1	1	1.000"/> | |
569 <has_n_columns n="8"/> | |
570 </assert_contents> | |
571 </output> | |
572 <output name="output_taxonomy_krona" ftype="html"> | |
573 <assert_contents> | |
574 <has_line line="// Krona is a flexible tool for exploring the relative proportions of"/> | |
575 </assert_contents> | |
576 </output> | |
577 <output name="output_taxonomy_kraken" ftype="txt"> | |
578 <assert_contents> | |
579 <has_text text="93.3333"/> | |
580 <has_text text="33.3333"/> | |
581 </assert_contents> | |
582 </output> | |
583 </test> | |
584 </tests> | |
585 <help><![CDATA[ | |
586 **MMseqs2: ultra fast and sensitive sequence search and clustering suite** | |
587 | |
588 MMseqs2 (Many-against-Many sequence searching) is a software suite to search and cluster huge protein and nucleotide sequence sets. | |
589 MMseqs2 is open source GPL-licensed software implemented in C++ for Linux, MacOS, and (as beta version, via cygwin) Windows. | |
590 The software is designed to run on multiple cores and servers and exhibits very good scalability. | |
591 MMseqs2 can run 10000 times faster than BLAST. At 100 times its speed it achieves almost the same sensitivity. | |
592 It can perform profile searches with the same sensitivity as PSI-BLAST at over 400 times its speed. | |
593 | |
594 **Usage** | |
595 | |
596 * Convert FASTA/Q file(s) to MMseqs sequence DB format | |
597 *mmseqs createdb <i:fastaFile1[.gz|.bz2]> ... <i:fastaFileN[.gz|.bz2]>|<i:stdin> <o:sequenceDB> [options]* | |
598 | |
599 * Add taxonomic labels to sequence DB | |
600 *mmseqs createtaxdb <i:sequenceDB> <tmpDir> [options]* | |
601 | |
602 * Filter taxonomy sequence database | |
603 *mmseqs filtertaxseqdb <i:taxSeqDB> <o:taxSeqDB> [options]* | |
604 | |
605 * Taxonomy assignment by computing the lowest common ancestor of homologs | |
606 *mmseqs taxonomy <i:queryDB> <i:targetDB> <o:taxaDB> <tmpDir> [options]* | |
607 | |
608 * Convert result DB to tab-separated flat file | |
609 *mmseqs createtsv <i:queryDB> [<i:targetDB>] <i:resultDB> <o:tsvFile> [options]* | |
610 | |
611 * Create a taxonomy report in Kraken or Krona format | |
612 *mmseqs taxonomyreport <i:seqTaxDB> <i:taxResultDB/resultDB/sequenceDB> <o:taxonomyReport> [options]* | |
613 | |
614 https://github.com/soedinglab/MMseqs2 | |
615 | |
616 ]]></help> | |
617 <expand macro="citations"/> | |
618 </tool> |