Mercurial > repos > iuc > mmseqs2_taxonomy_assignment
comparison mmseqs2_taxonomy_assignment.xml @ 2:876d26806584 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mmsesq2 commit 611b90f1628037f05d85905c88629a422d0a2053
author | iuc |
---|---|
date | Mon, 14 Apr 2025 18:39:38 +0000 |
parents | d0acde079e2e |
children |
comparison
equal
deleted
inserted
replaced
1:9e47b28bff57 | 2:876d26806584 |
---|---|
14 'input' | 14 'input' |
15 'sequenceDB' | 15 'sequenceDB' |
16 --dbtype '$createdb.alph_type.dbtype' | 16 --dbtype '$createdb.alph_type.dbtype' |
17 --shuffle $createdb.shuffle && | 17 --shuffle $createdb.shuffle && |
18 | 18 |
19 cp -r '$createtaxdb.database_type.mmseqs2_db_select.fields.path'/database* . && | 19 ##Used only for test |
20 #if str($download_tax_db) == 'true': | |
21 cp -r '$database.database_type.mmseqs2_db_select.fields.path'/database* . && | |
20 | 22 |
21 mmseqs createtaxdb | 23 mmseqs createtaxdb |
22 database | 24 database |
23 'tmp' | 25 'tmp' && |
24 #if $createtaxdb.tax_mapping_file | 26 #end if |
25 --tax-mapping-file '$createtaxdb.tax_mapping_file' | 27 ## |
26 #end if | |
27 --tax-mapping-mode '$createtaxdb.tax_mapping_mode' | |
28 --threads "\${GALAXY_SLOTS:-1}" && | |
29 | 28 |
30 #if $filtertaxseqdb.taxon_list | 29 #if $filtertaxseqdb.taxon_list |
31 mmseqs filtertaxseqdb | 30 mmseqs filtertaxseqdb |
32 'database' | 31 ##Used only for test |
32 #if str($download_tax_db) == 'true': | |
33 'database' | |
34 ## | |
35 #else | |
36 '$database.database_type.mmseqs2_db_select.fields.path'/database | |
37 #end if | |
33 'database_filtered' | 38 'database_filtered' |
34 --taxon-list '$filtertaxseqdb.taxon_list' | 39 --taxon-list '$filtertaxseqdb.taxon_list' |
35 && | 40 && |
36 #end if | 41 #end if |
37 | 42 |
38 mmseqs taxonomy | 43 mmseqs taxonomy |
39 'sequenceDB' | 44 'sequenceDB' |
40 #if $filtertaxseqdb.taxon_list | 45 #if $filtertaxseqdb.taxon_list |
41 'database_filtered' | 46 'database_filtered' |
42 #else | 47 #else |
43 'database' | 48 ##Used only for test |
49 #if str($download_tax_db) == 'true': | |
50 'database' | |
51 ## | |
52 #else | |
53 '$database.database_type.mmseqs2_db_select.fields.path'/database | |
54 #end if | |
44 #end if | 55 #end if |
45 'output_taxonomy' | 56 'output_taxonomy' |
46 'tmp' | 57 'tmp' |
47 #if str($createdb.alph_type.dbtype) == "1" | 58 #if str($createdb.alph_type.dbtype) == "1" |
48 --comp-bias-corr-scale $createdb.alph_type.comp_bias_corr_scale | 59 --comp-bias-corr-scale $createdb.alph_type.comp_bias_corr_scale |
62 --diag-score $taxonomy.prefilter.diag_score | 73 --diag-score $taxonomy.prefilter.diag_score |
63 --exact-kmer-matching $taxonomy.prefilter.exact_kmer_matching | 74 --exact-kmer-matching $taxonomy.prefilter.exact_kmer_matching |
64 --mask $taxonomy.prefilter.mask | 75 --mask $taxonomy.prefilter.mask |
65 --mask-prob $taxonomy.prefilter.mask_prob | 76 --mask-prob $taxonomy.prefilter.mask_prob |
66 --mask-lower-case $taxonomy.prefilter.mask_lower_case | 77 --mask-lower-case $taxonomy.prefilter.mask_lower_case |
78 --mask-n-repeat $taxonomy.prefilter.mask_n_repeat | |
67 --min-ungapped-score $taxonomy.prefilter.min_ungapped_score | 79 --min-ungapped-score $taxonomy.prefilter.min_ungapped_score |
68 --spaced-kmer-mode $taxonomy.prefilter.spaced_kmer_mode | 80 --spaced-kmer-mode $taxonomy.prefilter.spaced_kmer_mode |
69 ##--spaced-kmer-pattern STR User-specified spaced k-mer pattern [] | 81 ##--spaced-kmer-pattern STR User-specified spaced k-mer pattern [] |
70 ##--local-tmp STR Path where some of the temporary files will be created [] | 82 ##--local-tmp STR Path where some of the temporary files will be created [] |
71 ##--disk-space-limit BYTE Set max disk space to use for reverse profile searches. E.g. 800B, 5K, 10M, 1G. Default (0) to all available disk space in the temp folder [0] | 83 ##--disk-space-limit BYTE Set max disk space to use for reverse profile searches. E.g. 800B, 5K, 10M, 1G. Default (0) to all available disk space in the temp folder [0] |
135 --reverse-frames $taxonomy.misc.reverse_frames | 147 --reverse-frames $taxonomy.misc.reverse_frames |
136 --translation-table $taxonomy.misc.translation_table | 148 --translation-table $taxonomy.misc.translation_table |
137 --translate $taxonomy.misc.translate | 149 --translate $taxonomy.misc.translate |
138 --use-all-table-starts $taxonomy.misc.use_all_table_starts | 150 --use-all-table-starts $taxonomy.misc.use_all_table_starts |
139 --id-offset $taxonomy.misc.id_offset | 151 --id-offset $taxonomy.misc.id_offset |
140 --add-orf-stop $taxonomy.misc.add_orf_stop | |
141 --sequence-overlap $taxonomy.misc.sequence_overlap | 152 --sequence-overlap $taxonomy.misc.sequence_overlap |
142 --sequence-split-mode $taxonomy.misc.sequence_split_mode | 153 --sequence-split-mode $taxonomy.misc.sequence_split_mode |
143 --headers-split-mode $taxonomy.misc.headers_split_mode | 154 --headers-split-mode $taxonomy.misc.headers_split_mode |
144 --search-type $createtaxdb.database_type.search_type | 155 --search-type $database.database_type.search_type |
145 --prefilter-mode $taxonomy.misc.prefilter_mode | 156 --prefilter-mode $taxonomy.misc.prefilter_mode |
146 | 157 |
147 ##Common options | 158 ##Common options |
148 ##--compressed INT Write compressed output [0] | 159 ##--compressed INT Write compressed output [0] |
149 --threads "\${GALAXY_SLOTS:-1}" | 160 --threads "\${GALAXY_SLOTS:-1}" |
180 && | 191 && |
181 mmseqs taxonomyreport | 192 mmseqs taxonomyreport |
182 #if $filtertaxseqdb.taxon_list | 193 #if $filtertaxseqdb.taxon_list |
183 'database_filtered' | 194 'database_filtered' |
184 #else | 195 #else |
185 'database' | 196 ##Used only for test |
197 #if str($download_tax_db) == 'true': | |
198 'database' | |
199 ## | |
200 #else | |
201 '$database.database_type.mmseqs2_db_select.fields.path'/database | |
202 #end if | |
186 #end if | 203 #end if |
187 'output_taxonomy' | 204 'output_taxonomy' |
188 'taxo_result.txt' | 205 'taxo_result.txt' |
189 --report-mode 0 | 206 --report-mode 0 |
190 --threads "\${GALAXY_SLOTS:-1}" | 207 --threads "\${GALAXY_SLOTS:-1}" |
193 && | 210 && |
194 mmseqs taxonomyreport | 211 mmseqs taxonomyreport |
195 #if $filtertaxseqdb.taxon_list | 212 #if $filtertaxseqdb.taxon_list |
196 'database_filtered' | 213 'database_filtered' |
197 #else | 214 #else |
198 'database' | 215 ##Used only for test |
216 #if str($download_tax_db) == 'true': | |
217 'database' | |
218 ## | |
219 #else | |
220 '$database.database_type.mmseqs2_db_select.fields.path'/database | |
221 #end if | |
199 #end if | 222 #end if |
200 'output_taxonomy' | 223 'output_taxonomy' |
201 'taxo_result.html' | 224 'taxo_result.html' |
202 --report-mode 1 | 225 --report-mode 1 |
203 --threads "\${GALAXY_SLOTS:-1}" | 226 --threads "\${GALAXY_SLOTS:-1}" |
204 #end if | 227 #end if |
205 ##-v INT Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3] | 228 ##-v INT Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3] |
206 | 229 |
207 ]]></command> | 230 ]]></command> |
208 <inputs> | 231 <inputs> |
232 <!-- used only for tests, this makes it possible to download the taxonomy part of the db without having to have it in the test data directory, which is too large and cannot be reduced --> | |
233 <param name="download_tax_db" type="hidden" value=""/> | |
234 <!-- --> | |
209 <section name="createdb" title="Convert FASTA/Q file(s) to MMseqs sequence DB format" expanded="true"> | 235 <section name="createdb" title="Convert FASTA/Q file(s) to MMseqs sequence DB format" expanded="true"> |
210 <param name="input_fasta" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Input fasta file" help="" /> | 236 <param name="input_fasta" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Input fasta file" help="" /> |
211 <conditional name="alph_type"> | 237 <conditional name="alph_type"> |
212 <param argument="--dbtype" type="select" label="Input type" help="" > | 238 <param argument="--dbtype" type="select" label="Input type" help="" > |
213 <option value="0" selected="true">Auto</option> | 239 <option value="0" selected="true">Auto</option> |
222 <param argument="--zdrop" type="integer" min="0" value="40" label="Maximal allowed difference between score values before alignment is truncated" help=""/> | 248 <param argument="--zdrop" type="integer" min="0" value="40" label="Maximal allowed difference between score values before alignment is truncated" help=""/> |
223 </when> | 249 </when> |
224 </conditional> | 250 </conditional> |
225 <param argument="--shuffle" type="boolean" checked="true" label="Shuffle input database" truevalue="1" falsevalue="0" optional="true" help="" /> | 251 <param argument="--shuffle" type="boolean" checked="true" label="Shuffle input database" truevalue="1" falsevalue="0" optional="true" help="" /> |
226 </section> | 252 </section> |
227 <section name="createtaxdb" title="Add taxonomic labels to reference sequence DB" expanded="true"> | 253 <section name="database" title="Choose the taxonomic reference database that you want to use" expanded="true"> |
228 <conditional name="database_type"> | 254 <conditional name="database_type"> |
229 <param name="type" type="select" label="Database type" help="" > | 255 <param name="type" type="select" label="Database type" help="" > |
230 <option value="amino_acid_tax" selected="true">Amino acid with taxonomy information</option> | 256 <option value="amino_acid_tax" selected="true">Amino acid with taxonomy information</option> |
231 <option value="nucleotides_tax">Nucleotides with taxonomy information</option> | 257 <option value="nucleotides_tax">Nucleotides with taxonomy information</option> |
232 <option value="amino_acid">Amino acid without taxonomy information</option> | |
233 <option value="nucleotides">Nucleotides without taxonomy information</option> | |
234 </param> | 258 </param> |
235 <when value="amino_acid_tax"> | 259 <when value="amino_acid_tax"> |
236 <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases"> | 260 <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases"> |
237 <options from_data_table="mmseqs2_databases"> | 261 <options from_data_table="mmseqs2_databases"> |
238 <filter type="static_value" value="aminoacid" column="type"/> | 262 <filter type="static_value" value="aminoacid" column="type"/> |
250 <validator message="No mmseqs2 database is available" type="no_options"/> | 274 <validator message="No mmseqs2 database is available" type="no_options"/> |
251 </options> | 275 </options> |
252 </param> | 276 </param> |
253 <expand macro="search_type_nt" /> | 277 <expand macro="search_type_nt" /> |
254 </when> | 278 </when> |
255 <when value="amino_acid"> | |
256 <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases"> | |
257 <options from_data_table="mmseqs2_databases"> | |
258 <filter type="static_value" value="aminoacid" column="type"/> | |
259 <filter type="static_value" value="no" column="taxonomy"/> | |
260 <validator message="No mmseqs2 database is available" type="no_options"/> | |
261 </options> | |
262 </param> | |
263 <expand macro="search_type_aa" /> | |
264 </when> | |
265 <when value="nucleotides"> | |
266 <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases"> | |
267 <options from_data_table="mmseqs2_databases"> | |
268 <filter type="static_value" value="nucleotide" column="type"/> | |
269 <filter type="static_value" value="no" column="taxonomy"/> | |
270 <validator message="No mmseqs2 database is available" type="no_options"/> | |
271 </options> | |
272 </param> | |
273 <expand macro="search_type_nt" /> | |
274 </when> | |
275 </conditional> | 279 </conditional> |
276 <param argument="--tax-mapping-file" type="data" format="tabular,tsv,txt" label="File to map sequence identifier to taxonomical identifier" optional="true"/> | |
277 <param argument="--tax-mapping-mode" type="select" label="Map taxonomy based on sequence database" help="" > | |
278 <option value="0" selected="true">0: .lookup file</option> | |
279 <option value="1">1: .source file</option> | |
280 </param> | |
281 </section> | 280 </section> |
282 <section name="filtertaxseqdb" title="Filter taxonomy sequence database"> | 281 <section name="filtertaxseqdb" title="Filter taxonomy sequence database"> |
283 <param argument="--taxon-list" type="text" optional="true" value="" label="Taxonomy ID" help="Possibly multiple values separated by ','"/> | 282 <param argument="--taxon-list" type="text" optional="true" value="" label="Taxonomy ID" help="Possibly multiple values separated by ','"/> |
284 </section> | 283 </section> |
285 <section name="taxonomy" title="Taxonomy assignment by computing the lowest common ancestor of homologs"> | 284 <section name="taxonomy" title="Taxonomy assignment by computing the lowest common ancestor of homologs"> |
430 <option value="31">Blastocrithidia Nuclear Code</option> | 429 <option value="31">Blastocrithidia Nuclear Code</option> |
431 </param> | 430 </param> |
432 <param argument="--translate" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Translate ORF to amino acid" help=""/> | 431 <param argument="--translate" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Translate ORF to amino acid" help=""/> |
433 <param argument="--use-all-table-starts" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Use all alternatives for a start codon in the genetic table, if false - only ATG (AUG)" help=""/> | 432 <param argument="--use-all-table-starts" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Use all alternatives for a start codon in the genetic table, if false - only ATG (AUG)" help=""/> |
434 <param argument="--id-offset" type="integer" min="0" value="0" label="Numeric ids in index file are offset by this value" help=""/> | 433 <param argument="--id-offset" type="integer" min="0" value="0" label="Numeric ids in index file are offset by this value" help=""/> |
435 <param argument="--add-orf-stop" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Add stop codon '*' at complete start and end" help=""/> | |
436 <param argument="--sequence-overlap" type="integer" min="0" value="0" label="Overlap between sequences" help=""/> | 434 <param argument="--sequence-overlap" type="integer" min="0" value="0" label="Overlap between sequences" help=""/> |
437 <param argument="--sequence-split-mode" type="select" label="Sequence split mode" help="" > | 435 <param argument="--sequence-split-mode" type="select" label="Sequence split mode" help="" > |
438 <option value="0">Copy data</option> | 436 <option value="0">Copy data</option> |
439 <option value="1" selected="true">Soft link data and write new index</option> | 437 <option value="1" selected="true">Soft link data and write new index</option> |
440 </param> | 438 </param> |
492 </data> | 490 </data> |
493 </outputs> | 491 </outputs> |
494 <tests> | 492 <tests> |
495 <!-- Test with Kraken report --> | 493 <!-- Test with Kraken report --> |
496 <test expect_num_outputs="2"> | 494 <test expect_num_outputs="2"> |
495 <param name="download_tax_db" value="true"/> | |
497 <section name="createdb"> | 496 <section name="createdb"> |
498 <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/> | 497 <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/> |
499 <conditional name="alph_type"> | 498 <conditional name="alph_type"> |
500 <param name="dbtype" value="2"/> | 499 <param name="dbtype" value="2"/> |
501 </conditional> | 500 </conditional> |
502 </section> | 501 </section> |
503 <section name="createtaxdb"> | 502 <section name="database"> |
504 <conditional name="database_type"> | 503 <conditional name="database_type"> |
505 <param name="type" value="amino_acid_tax"/> | 504 <param name="type" value="amino_acid_tax"/> |
506 <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" /> | 505 <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" /> |
507 </conditional> | 506 </conditional> |
508 </section> | 507 </section> |
509 <section name="filtertaxseqdb"> | 508 <section name="filtertaxseqdb"> |
510 <param name="taxon_list" value="2" /> | 509 <param name="taxon_list" value="2" /> |
510 </section> | |
511 <section name="taxonomy"> | |
512 <section name="prefilter"> | |
513 <param name="mask_n_repeat" value="1" /> | |
514 </section> | |
511 </section> | 515 </section> |
512 <conditional name="krona_report"> | 516 <conditional name="krona_report"> |
513 <param name="keep_report" value="No"/> | 517 <param name="keep_report" value="No"/> |
514 </conditional> | 518 </conditional> |
515 <output name="output_taxonomy_tsv" ftype="tabular"> | 519 <output name="output_taxonomy_tsv" ftype="tabular"> |
516 <assert_contents> | 520 <assert_contents> |
517 <has_line line="MYSTERY.222	1236	class	Gammaproteobacteria	1	1	1	1.000"/> | |
518 <has_line line="MYSTERY.64	119060	family	Burkholderiaceae	1	1	1	1.000"/> | 521 <has_line line="MYSTERY.64	119060	family	Burkholderiaceae	1	1	1	1.000"/> |
519 <has_n_columns n="8"/> | 522 <has_n_columns n="8"/> |
520 </assert_contents> | 523 </assert_contents> |
521 </output> | 524 </output> |
522 <output name="output_taxonomy_kraken" ftype="txt"> | 525 <output name="output_taxonomy_kraken" ftype="txt"> |
523 <assert_contents> | 526 <assert_contents> |
524 <has_text text="93.3333"/> | 527 <has_text text="kingdom"/> |
525 <has_text text="33.3333"/> | 528 <has_text text="Pseudomonadati"/> |
526 </assert_contents> | 529 </assert_contents> |
527 </output> | 530 </output> |
528 </test> | 531 </test> |
529 <test expect_num_outputs="2"> | 532 <test expect_num_outputs="2"> |
533 <param name="download_tax_db" value="true"/> | |
530 <section name="createdb"> | 534 <section name="createdb"> |
531 <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/> | 535 <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/> |
532 </section> | 536 </section> |
533 <section name="createtaxdb"> | 537 <section name="database"> |
534 <conditional name="database_type"> | 538 <conditional name="database_type"> |
535 <param name="type" value="amino_acid_tax"/> | 539 <param name="type" value="amino_acid_tax"/> |
536 <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" /> | 540 <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" /> |
537 </conditional> | 541 </conditional> |
538 </section> | 542 </section> |
551 <has_line line="// Krona is a flexible tool for exploring the relative proportions of"/> | 555 <has_line line="// Krona is a flexible tool for exploring the relative proportions of"/> |
552 </assert_contents> | 556 </assert_contents> |
553 </output> | 557 </output> |
554 </test> | 558 </test> |
555 <test expect_num_outputs="3"> | 559 <test expect_num_outputs="3"> |
560 <param name="download_tax_db" value="true"/> | |
556 <section name="createdb"> | 561 <section name="createdb"> |
557 <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/> | 562 <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/> |
558 </section> | 563 </section> |
559 <section name="createtaxdb"> | 564 <section name="database"> |
560 <conditional name="database_type"> | 565 <conditional name="database_type"> |
561 <param name="type" value="amino_acid_tax"/> | 566 <param name="type" value="amino_acid_tax"/> |
562 <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" /> | 567 <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" /> |
563 </conditional> | 568 </conditional> |
564 </section> | 569 </section> |
574 <has_line line="// Krona is a flexible tool for exploring the relative proportions of"/> | 579 <has_line line="// Krona is a flexible tool for exploring the relative proportions of"/> |
575 </assert_contents> | 580 </assert_contents> |
576 </output> | 581 </output> |
577 <output name="output_taxonomy_kraken" ftype="txt"> | 582 <output name="output_taxonomy_kraken" ftype="txt"> |
578 <assert_contents> | 583 <assert_contents> |
579 <has_text text="93.3333"/> | 584 <has_text text="kingdom"/> |
580 <has_text text="33.3333"/> | 585 <has_text text="Pseudomonadati"/> |
581 </assert_contents> | 586 </assert_contents> |
582 </output> | 587 </output> |
583 </test> | 588 </test> |
584 </tests> | 589 </tests> |
585 <help><![CDATA[ | 590 <help><![CDATA[ |
594 **Usage** | 599 **Usage** |
595 | 600 |
596 * Convert FASTA/Q file(s) to MMseqs sequence DB format | 601 * Convert FASTA/Q file(s) to MMseqs sequence DB format |
597 *mmseqs createdb <i:fastaFile1[.gz|.bz2]> ... <i:fastaFileN[.gz|.bz2]>|<i:stdin> <o:sequenceDB> [options]* | 602 *mmseqs createdb <i:fastaFile1[.gz|.bz2]> ... <i:fastaFileN[.gz|.bz2]>|<i:stdin> <o:sequenceDB> [options]* |
598 | 603 |
599 * Add taxonomic labels to sequence DB | |
600 *mmseqs createtaxdb <i:sequenceDB> <tmpDir> [options]* | |
601 | |
602 * Filter taxonomy sequence database | 604 * Filter taxonomy sequence database |
603 *mmseqs filtertaxseqdb <i:taxSeqDB> <o:taxSeqDB> [options]* | 605 *mmseqs filtertaxseqdb <i:taxSeqDB> <o:taxSeqDB> [options]* |
604 | 606 |
605 * Taxonomy assignment by computing the lowest common ancestor of homologs | 607 * Taxonomy assignment by computing the lowest common ancestor of homologs |
606 *mmseqs taxonomy <i:queryDB> <i:targetDB> <o:taxaDB> <tmpDir> [options]* | 608 *mmseqs taxonomy <i:queryDB> <i:targetDB> <o:taxaDB> <tmpDir> [options]* |