comparison mmseqs2_taxonomy_assignment.xml @ 2:876d26806584 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mmsesq2 commit 611b90f1628037f05d85905c88629a422d0a2053
author iuc
date Mon, 14 Apr 2025 18:39:38 +0000
parents d0acde079e2e
children
comparison
equal deleted inserted replaced
1:9e47b28bff57 2:876d26806584
14 'input' 14 'input'
15 'sequenceDB' 15 'sequenceDB'
16 --dbtype '$createdb.alph_type.dbtype' 16 --dbtype '$createdb.alph_type.dbtype'
17 --shuffle $createdb.shuffle && 17 --shuffle $createdb.shuffle &&
18 18
19 cp -r '$createtaxdb.database_type.mmseqs2_db_select.fields.path'/database* . && 19 ##Used only for test
20 #if str($download_tax_db) == 'true':
21 cp -r '$database.database_type.mmseqs2_db_select.fields.path'/database* . &&
20 22
21 mmseqs createtaxdb 23 mmseqs createtaxdb
22 database 24 database
23 'tmp' 25 'tmp' &&
24 #if $createtaxdb.tax_mapping_file 26 #end if
25 --tax-mapping-file '$createtaxdb.tax_mapping_file' 27 ##
26 #end if
27 --tax-mapping-mode '$createtaxdb.tax_mapping_mode'
28 --threads "\${GALAXY_SLOTS:-1}" &&
29 28
30 #if $filtertaxseqdb.taxon_list 29 #if $filtertaxseqdb.taxon_list
31 mmseqs filtertaxseqdb 30 mmseqs filtertaxseqdb
32 'database' 31 ##Used only for test
32 #if str($download_tax_db) == 'true':
33 'database'
34 ##
35 #else
36 '$database.database_type.mmseqs2_db_select.fields.path'/database
37 #end if
33 'database_filtered' 38 'database_filtered'
34 --taxon-list '$filtertaxseqdb.taxon_list' 39 --taxon-list '$filtertaxseqdb.taxon_list'
35 && 40 &&
36 #end if 41 #end if
37 42
38 mmseqs taxonomy 43 mmseqs taxonomy
39 'sequenceDB' 44 'sequenceDB'
40 #if $filtertaxseqdb.taxon_list 45 #if $filtertaxseqdb.taxon_list
41 'database_filtered' 46 'database_filtered'
42 #else 47 #else
43 'database' 48 ##Used only for test
49 #if str($download_tax_db) == 'true':
50 'database'
51 ##
52 #else
53 '$database.database_type.mmseqs2_db_select.fields.path'/database
54 #end if
44 #end if 55 #end if
45 'output_taxonomy' 56 'output_taxonomy'
46 'tmp' 57 'tmp'
47 #if str($createdb.alph_type.dbtype) == "1" 58 #if str($createdb.alph_type.dbtype) == "1"
48 --comp-bias-corr-scale $createdb.alph_type.comp_bias_corr_scale 59 --comp-bias-corr-scale $createdb.alph_type.comp_bias_corr_scale
62 --diag-score $taxonomy.prefilter.diag_score 73 --diag-score $taxonomy.prefilter.diag_score
63 --exact-kmer-matching $taxonomy.prefilter.exact_kmer_matching 74 --exact-kmer-matching $taxonomy.prefilter.exact_kmer_matching
64 --mask $taxonomy.prefilter.mask 75 --mask $taxonomy.prefilter.mask
65 --mask-prob $taxonomy.prefilter.mask_prob 76 --mask-prob $taxonomy.prefilter.mask_prob
66 --mask-lower-case $taxonomy.prefilter.mask_lower_case 77 --mask-lower-case $taxonomy.prefilter.mask_lower_case
78 --mask-n-repeat $taxonomy.prefilter.mask_n_repeat
67 --min-ungapped-score $taxonomy.prefilter.min_ungapped_score 79 --min-ungapped-score $taxonomy.prefilter.min_ungapped_score
68 --spaced-kmer-mode $taxonomy.prefilter.spaced_kmer_mode 80 --spaced-kmer-mode $taxonomy.prefilter.spaced_kmer_mode
69 ##--spaced-kmer-pattern STR User-specified spaced k-mer pattern [] 81 ##--spaced-kmer-pattern STR User-specified spaced k-mer pattern []
70 ##--local-tmp STR Path where some of the temporary files will be created [] 82 ##--local-tmp STR Path where some of the temporary files will be created []
71 ##--disk-space-limit BYTE Set max disk space to use for reverse profile searches. E.g. 800B, 5K, 10M, 1G. Default (0) to all available disk space in the temp folder [0] 83 ##--disk-space-limit BYTE Set max disk space to use for reverse profile searches. E.g. 800B, 5K, 10M, 1G. Default (0) to all available disk space in the temp folder [0]
135 --reverse-frames $taxonomy.misc.reverse_frames 147 --reverse-frames $taxonomy.misc.reverse_frames
136 --translation-table $taxonomy.misc.translation_table 148 --translation-table $taxonomy.misc.translation_table
137 --translate $taxonomy.misc.translate 149 --translate $taxonomy.misc.translate
138 --use-all-table-starts $taxonomy.misc.use_all_table_starts 150 --use-all-table-starts $taxonomy.misc.use_all_table_starts
139 --id-offset $taxonomy.misc.id_offset 151 --id-offset $taxonomy.misc.id_offset
140 --add-orf-stop $taxonomy.misc.add_orf_stop
141 --sequence-overlap $taxonomy.misc.sequence_overlap 152 --sequence-overlap $taxonomy.misc.sequence_overlap
142 --sequence-split-mode $taxonomy.misc.sequence_split_mode 153 --sequence-split-mode $taxonomy.misc.sequence_split_mode
143 --headers-split-mode $taxonomy.misc.headers_split_mode 154 --headers-split-mode $taxonomy.misc.headers_split_mode
144 --search-type $createtaxdb.database_type.search_type 155 --search-type $database.database_type.search_type
145 --prefilter-mode $taxonomy.misc.prefilter_mode 156 --prefilter-mode $taxonomy.misc.prefilter_mode
146 157
147 ##Common options 158 ##Common options
148 ##--compressed INT Write compressed output [0] 159 ##--compressed INT Write compressed output [0]
149 --threads "\${GALAXY_SLOTS:-1}" 160 --threads "\${GALAXY_SLOTS:-1}"
180 && 191 &&
181 mmseqs taxonomyreport 192 mmseqs taxonomyreport
182 #if $filtertaxseqdb.taxon_list 193 #if $filtertaxseqdb.taxon_list
183 'database_filtered' 194 'database_filtered'
184 #else 195 #else
185 'database' 196 ##Used only for test
197 #if str($download_tax_db) == 'true':
198 'database'
199 ##
200 #else
201 '$database.database_type.mmseqs2_db_select.fields.path'/database
202 #end if
186 #end if 203 #end if
187 'output_taxonomy' 204 'output_taxonomy'
188 'taxo_result.txt' 205 'taxo_result.txt'
189 --report-mode 0 206 --report-mode 0
190 --threads "\${GALAXY_SLOTS:-1}" 207 --threads "\${GALAXY_SLOTS:-1}"
193 && 210 &&
194 mmseqs taxonomyreport 211 mmseqs taxonomyreport
195 #if $filtertaxseqdb.taxon_list 212 #if $filtertaxseqdb.taxon_list
196 'database_filtered' 213 'database_filtered'
197 #else 214 #else
198 'database' 215 ##Used only for test
216 #if str($download_tax_db) == 'true':
217 'database'
218 ##
219 #else
220 '$database.database_type.mmseqs2_db_select.fields.path'/database
221 #end if
199 #end if 222 #end if
200 'output_taxonomy' 223 'output_taxonomy'
201 'taxo_result.html' 224 'taxo_result.html'
202 --report-mode 1 225 --report-mode 1
203 --threads "\${GALAXY_SLOTS:-1}" 226 --threads "\${GALAXY_SLOTS:-1}"
204 #end if 227 #end if
205 ##-v INT Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3] 228 ##-v INT Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3]
206 229
207 ]]></command> 230 ]]></command>
208 <inputs> 231 <inputs>
232 <!-- used only for tests, this makes it possible to download the taxonomy part of the db without having to have it in the test data directory, which is too large and cannot be reduced -->
233 <param name="download_tax_db" type="hidden" value=""/>
234 <!-- -->
209 <section name="createdb" title="Convert FASTA/Q file(s) to MMseqs sequence DB format" expanded="true"> 235 <section name="createdb" title="Convert FASTA/Q file(s) to MMseqs sequence DB format" expanded="true">
210 <param name="input_fasta" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Input fasta file" help="" /> 236 <param name="input_fasta" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Input fasta file" help="" />
211 <conditional name="alph_type"> 237 <conditional name="alph_type">
212 <param argument="--dbtype" type="select" label="Input type" help="" > 238 <param argument="--dbtype" type="select" label="Input type" help="" >
213 <option value="0" selected="true">Auto</option> 239 <option value="0" selected="true">Auto</option>
222 <param argument="--zdrop" type="integer" min="0" value="40" label="Maximal allowed difference between score values before alignment is truncated" help=""/> 248 <param argument="--zdrop" type="integer" min="0" value="40" label="Maximal allowed difference between score values before alignment is truncated" help=""/>
223 </when> 249 </when>
224 </conditional> 250 </conditional>
225 <param argument="--shuffle" type="boolean" checked="true" label="Shuffle input database" truevalue="1" falsevalue="0" optional="true" help="" /> 251 <param argument="--shuffle" type="boolean" checked="true" label="Shuffle input database" truevalue="1" falsevalue="0" optional="true" help="" />
226 </section> 252 </section>
227 <section name="createtaxdb" title="Add taxonomic labels to reference sequence DB" expanded="true"> 253 <section name="database" title="Choose the taxonomic reference database that you want to use" expanded="true">
228 <conditional name="database_type"> 254 <conditional name="database_type">
229 <param name="type" type="select" label="Database type" help="" > 255 <param name="type" type="select" label="Database type" help="" >
230 <option value="amino_acid_tax" selected="true">Amino acid with taxonomy information</option> 256 <option value="amino_acid_tax" selected="true">Amino acid with taxonomy information</option>
231 <option value="nucleotides_tax">Nucleotides with taxonomy information</option> 257 <option value="nucleotides_tax">Nucleotides with taxonomy information</option>
232 <option value="amino_acid">Amino acid without taxonomy information</option>
233 <option value="nucleotides">Nucleotides without taxonomy information</option>
234 </param> 258 </param>
235 <when value="amino_acid_tax"> 259 <when value="amino_acid_tax">
236 <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases"> 260 <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases">
237 <options from_data_table="mmseqs2_databases"> 261 <options from_data_table="mmseqs2_databases">
238 <filter type="static_value" value="aminoacid" column="type"/> 262 <filter type="static_value" value="aminoacid" column="type"/>
250 <validator message="No mmseqs2 database is available" type="no_options"/> 274 <validator message="No mmseqs2 database is available" type="no_options"/>
251 </options> 275 </options>
252 </param> 276 </param>
253 <expand macro="search_type_nt" /> 277 <expand macro="search_type_nt" />
254 </when> 278 </when>
255 <when value="amino_acid">
256 <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases">
257 <options from_data_table="mmseqs2_databases">
258 <filter type="static_value" value="aminoacid" column="type"/>
259 <filter type="static_value" value="no" column="taxonomy"/>
260 <validator message="No mmseqs2 database is available" type="no_options"/>
261 </options>
262 </param>
263 <expand macro="search_type_aa" />
264 </when>
265 <when value="nucleotides">
266 <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases">
267 <options from_data_table="mmseqs2_databases">
268 <filter type="static_value" value="nucleotide" column="type"/>
269 <filter type="static_value" value="no" column="taxonomy"/>
270 <validator message="No mmseqs2 database is available" type="no_options"/>
271 </options>
272 </param>
273 <expand macro="search_type_nt" />
274 </when>
275 </conditional> 279 </conditional>
276 <param argument="--tax-mapping-file" type="data" format="tabular,tsv,txt" label="File to map sequence identifier to taxonomical identifier" optional="true"/>
277 <param argument="--tax-mapping-mode" type="select" label="Map taxonomy based on sequence database" help="" >
278 <option value="0" selected="true">0: .lookup file</option>
279 <option value="1">1: .source file</option>
280 </param>
281 </section> 280 </section>
282 <section name="filtertaxseqdb" title="Filter taxonomy sequence database"> 281 <section name="filtertaxseqdb" title="Filter taxonomy sequence database">
283 <param argument="--taxon-list" type="text" optional="true" value="" label="Taxonomy ID" help="Possibly multiple values separated by ','"/> 282 <param argument="--taxon-list" type="text" optional="true" value="" label="Taxonomy ID" help="Possibly multiple values separated by ','"/>
284 </section> 283 </section>
285 <section name="taxonomy" title="Taxonomy assignment by computing the lowest common ancestor of homologs"> 284 <section name="taxonomy" title="Taxonomy assignment by computing the lowest common ancestor of homologs">
430 <option value="31">Blastocrithidia Nuclear Code</option> 429 <option value="31">Blastocrithidia Nuclear Code</option>
431 </param> 430 </param>
432 <param argument="--translate" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Translate ORF to amino acid" help=""/> 431 <param argument="--translate" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Translate ORF to amino acid" help=""/>
433 <param argument="--use-all-table-starts" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Use all alternatives for a start codon in the genetic table, if false - only ATG (AUG)" help=""/> 432 <param argument="--use-all-table-starts" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Use all alternatives for a start codon in the genetic table, if false - only ATG (AUG)" help=""/>
434 <param argument="--id-offset" type="integer" min="0" value="0" label="Numeric ids in index file are offset by this value" help=""/> 433 <param argument="--id-offset" type="integer" min="0" value="0" label="Numeric ids in index file are offset by this value" help=""/>
435 <param argument="--add-orf-stop" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Add stop codon '*' at complete start and end" help=""/>
436 <param argument="--sequence-overlap" type="integer" min="0" value="0" label="Overlap between sequences" help=""/> 434 <param argument="--sequence-overlap" type="integer" min="0" value="0" label="Overlap between sequences" help=""/>
437 <param argument="--sequence-split-mode" type="select" label="Sequence split mode" help="" > 435 <param argument="--sequence-split-mode" type="select" label="Sequence split mode" help="" >
438 <option value="0">Copy data</option> 436 <option value="0">Copy data</option>
439 <option value="1" selected="true">Soft link data and write new index</option> 437 <option value="1" selected="true">Soft link data and write new index</option>
440 </param> 438 </param>
492 </data> 490 </data>
493 </outputs> 491 </outputs>
494 <tests> 492 <tests>
495 <!-- Test with Kraken report --> 493 <!-- Test with Kraken report -->
496 <test expect_num_outputs="2"> 494 <test expect_num_outputs="2">
495 <param name="download_tax_db" value="true"/>
497 <section name="createdb"> 496 <section name="createdb">
498 <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/> 497 <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/>
499 <conditional name="alph_type"> 498 <conditional name="alph_type">
500 <param name="dbtype" value="2"/> 499 <param name="dbtype" value="2"/>
501 </conditional> 500 </conditional>
502 </section> 501 </section>
503 <section name="createtaxdb"> 502 <section name="database">
504 <conditional name="database_type"> 503 <conditional name="database_type">
505 <param name="type" value="amino_acid_tax"/> 504 <param name="type" value="amino_acid_tax"/>
506 <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" /> 505 <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" />
507 </conditional> 506 </conditional>
508 </section> 507 </section>
509 <section name="filtertaxseqdb"> 508 <section name="filtertaxseqdb">
510 <param name="taxon_list" value="2" /> 509 <param name="taxon_list" value="2" />
510 </section>
511 <section name="taxonomy">
512 <section name="prefilter">
513 <param name="mask_n_repeat" value="1" />
514 </section>
511 </section> 515 </section>
512 <conditional name="krona_report"> 516 <conditional name="krona_report">
513 <param name="keep_report" value="No"/> 517 <param name="keep_report" value="No"/>
514 </conditional> 518 </conditional>
515 <output name="output_taxonomy_tsv" ftype="tabular"> 519 <output name="output_taxonomy_tsv" ftype="tabular">
516 <assert_contents> 520 <assert_contents>
517 <has_line line="MYSTERY.222&#009;1236&#009;class&#009;Gammaproteobacteria&#009;1&#009;1&#009;1&#009;1.000"/>
518 <has_line line="MYSTERY.64&#009;119060&#009;family&#009;Burkholderiaceae&#009;1&#009;1&#009;1&#009;1.000"/> 521 <has_line line="MYSTERY.64&#009;119060&#009;family&#009;Burkholderiaceae&#009;1&#009;1&#009;1&#009;1.000"/>
519 <has_n_columns n="8"/> 522 <has_n_columns n="8"/>
520 </assert_contents> 523 </assert_contents>
521 </output> 524 </output>
522 <output name="output_taxonomy_kraken" ftype="txt"> 525 <output name="output_taxonomy_kraken" ftype="txt">
523 <assert_contents> 526 <assert_contents>
524 <has_text text="93.3333"/> 527 <has_text text="kingdom"/>
525 <has_text text="33.3333"/> 528 <has_text text="Pseudomonadati"/>
526 </assert_contents> 529 </assert_contents>
527 </output> 530 </output>
528 </test> 531 </test>
529 <test expect_num_outputs="2"> 532 <test expect_num_outputs="2">
533 <param name="download_tax_db" value="true"/>
530 <section name="createdb"> 534 <section name="createdb">
531 <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/> 535 <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/>
532 </section> 536 </section>
533 <section name="createtaxdb"> 537 <section name="database">
534 <conditional name="database_type"> 538 <conditional name="database_type">
535 <param name="type" value="amino_acid_tax"/> 539 <param name="type" value="amino_acid_tax"/>
536 <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" /> 540 <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" />
537 </conditional> 541 </conditional>
538 </section> 542 </section>
551 <has_line line="// Krona is a flexible tool for exploring the relative proportions of"/> 555 <has_line line="// Krona is a flexible tool for exploring the relative proportions of"/>
552 </assert_contents> 556 </assert_contents>
553 </output> 557 </output>
554 </test> 558 </test>
555 <test expect_num_outputs="3"> 559 <test expect_num_outputs="3">
560 <param name="download_tax_db" value="true"/>
556 <section name="createdb"> 561 <section name="createdb">
557 <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/> 562 <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/>
558 </section> 563 </section>
559 <section name="createtaxdb"> 564 <section name="database">
560 <conditional name="database_type"> 565 <conditional name="database_type">
561 <param name="type" value="amino_acid_tax"/> 566 <param name="type" value="amino_acid_tax"/>
562 <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" /> 567 <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" />
563 </conditional> 568 </conditional>
564 </section> 569 </section>
574 <has_line line="// Krona is a flexible tool for exploring the relative proportions of"/> 579 <has_line line="// Krona is a flexible tool for exploring the relative proportions of"/>
575 </assert_contents> 580 </assert_contents>
576 </output> 581 </output>
577 <output name="output_taxonomy_kraken" ftype="txt"> 582 <output name="output_taxonomy_kraken" ftype="txt">
578 <assert_contents> 583 <assert_contents>
579 <has_text text="93.3333"/> 584 <has_text text="kingdom"/>
580 <has_text text="33.3333"/> 585 <has_text text="Pseudomonadati"/>
581 </assert_contents> 586 </assert_contents>
582 </output> 587 </output>
583 </test> 588 </test>
584 </tests> 589 </tests>
585 <help><![CDATA[ 590 <help><![CDATA[
594 **Usage** 599 **Usage**
595 600
596 * Convert FASTA/Q file(s) to MMseqs sequence DB format 601 * Convert FASTA/Q file(s) to MMseqs sequence DB format
597 *mmseqs createdb <i:fastaFile1[.gz|.bz2]> ... <i:fastaFileN[.gz|.bz2]>|<i:stdin> <o:sequenceDB> [options]* 602 *mmseqs createdb <i:fastaFile1[.gz|.bz2]> ... <i:fastaFileN[.gz|.bz2]>|<i:stdin> <o:sequenceDB> [options]*
598 603
599 * Add taxonomic labels to sequence DB
600 *mmseqs createtaxdb <i:sequenceDB> <tmpDir> [options]*
601
602 * Filter taxonomy sequence database 604 * Filter taxonomy sequence database
603 *mmseqs filtertaxseqdb <i:taxSeqDB> <o:taxSeqDB> [options]* 605 *mmseqs filtertaxseqdb <i:taxSeqDB> <o:taxSeqDB> [options]*
604 606
605 * Taxonomy assignment by computing the lowest common ancestor of homologs 607 * Taxonomy assignment by computing the lowest common ancestor of homologs
606 *mmseqs taxonomy <i:queryDB> <i:targetDB> <o:taxaDB> <tmpDir> [options]* 608 *mmseqs taxonomy <i:queryDB> <i:targetDB> <o:taxaDB> <tmpDir> [options]*