mmseqs2_taxonomy_assignment: mmseqs2_taxonomy

comparison mmseqs2_taxonomy_assignment.xml @ 2:876d26806584 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mmsesq2 commit 611b90f1628037f05d85905c88629a422d0a2053

author	iuc
date	Mon, 14 Apr 2025 18:39:38 +0000
parents	d0acde079e2e
children

comparison

equal deleted inserted replaced

-:9e47b28bff57
+:876d26806584
 'input'
 'sequenceDB'
 --dbtype '$createdb.alph_type.dbtype'
 --shuffle $createdb.shuffle &&
-cp -r '$createtaxdb.database_type.mmseqs2_db_select.fields.path'/database* . &&
+##Used only for test
+#if str($download_tax_db) == 'true':
+cp -r '$database.database_type.mmseqs2_db_select.fields.path'/database* . &&
 mmseqs createtaxdb
 database
-'tmp'
+'tmp' &&
-#if $createtaxdb.tax_mapping_file
+#end if
---tax-mapping-file '$createtaxdb.tax_mapping_file'
+##
-#end if
---tax-mapping-mode '$createtaxdb.tax_mapping_mode'
---threads "\${GALAXY_SLOTS:-1}" &&
 #if $filtertaxseqdb.taxon_list
 mmseqs filtertaxseqdb
-'database'
+##Used only for test
+#if str($download_tax_db) == 'true':
+'database'
+##
+#else
+'$database.database_type.mmseqs2_db_select.fields.path'/database
+#end if
 'database_filtered'
 --taxon-list '$filtertaxseqdb.taxon_list'
 &&
 #end if
 mmseqs taxonomy
 'sequenceDB'
 #if $filtertaxseqdb.taxon_list
 'database_filtered'
 #else
-'database'
+##Used only for test
+#if str($download_tax_db) == 'true':
+'database'
+##
+#else
+'$database.database_type.mmseqs2_db_select.fields.path'/database
+#end if
 #end if
 'output_taxonomy'
 'tmp'
 #if str($createdb.alph_type.dbtype) == "1"
 --comp-bias-corr-scale $createdb.alph_type.comp_bias_corr_scale
 --diag-score $taxonomy.prefilter.diag_score
 --exact-kmer-matching $taxonomy.prefilter.exact_kmer_matching
 --mask $taxonomy.prefilter.mask
 --mask-prob $taxonomy.prefilter.mask_prob
 --mask-lower-case $taxonomy.prefilter.mask_lower_case
+--mask-n-repeat $taxonomy.prefilter.mask_n_repeat
 --min-ungapped-score $taxonomy.prefilter.min_ungapped_score
 --spaced-kmer-mode $taxonomy.prefilter.spaced_kmer_mode
 ##--spaced-kmer-pattern STR        User-specified spaced k-mer pattern []
 ##--local-tmp STR                  Path where some of the temporary files will be created []
 ##--disk-space-limit BYTE          Set max disk space to use for reverse profile searches. E.g. 800B, 5K, 10M, 1G. Default (0) to all available disk space in the temp folder [0]
 --reverse-frames $taxonomy.misc.reverse_frames
 --translation-table $taxonomy.misc.translation_table
 --translate $taxonomy.misc.translate
 --use-all-table-starts $taxonomy.misc.use_all_table_starts
 --id-offset $taxonomy.misc.id_offset
---add-orf-stop $taxonomy.misc.add_orf_stop
 --sequence-overlap $taxonomy.misc.sequence_overlap
 --sequence-split-mode $taxonomy.misc.sequence_split_mode
 --headers-split-mode $taxonomy.misc.headers_split_mode
---search-type $createtaxdb.database_type.search_type
+--search-type $database.database_type.search_type
 --prefilter-mode $taxonomy.misc.prefilter_mode
 ##Common options
 ##--compressed INT                 Write compressed output [0]
 --threads "\${GALAXY_SLOTS:-1}"
 &&
 mmseqs taxonomyreport
 #if $filtertaxseqdb.taxon_list
 'database_filtered'
 #else
-'database'
+##Used only for test
+#if str($download_tax_db) == 'true':
+'database'
+##
+#else
+'$database.database_type.mmseqs2_db_select.fields.path'/database
+#end if
 #end if
 'output_taxonomy'
 'taxo_result.txt'
 --report-mode 0
 --threads "\${GALAXY_SLOTS:-1}"
 &&
 mmseqs taxonomyreport
 #if $filtertaxseqdb.taxon_list
 'database_filtered'
 #else
-'database'
+##Used only for test
+#if str($download_tax_db) == 'true':
+'database'
+##
+#else
+'$database.database_type.mmseqs2_db_select.fields.path'/database
+#end if
 #end if
 'output_taxonomy'
 'taxo_result.html'
 --report-mode 1
 --threads "\${GALAXY_SLOTS:-1}"
 #end if
 ##-v INT              Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3]
 ]]></command>
 <inputs>
+<!-- used only for tests, this makes it possible to download the taxonomy part of the db without having to have it in the test data directory, which is too large and cannot be reduced -->
+<param name="download_tax_db" type="hidden" value=""/>
+<!-- -->
 <section name="createdb" title="Convert FASTA/Q file(s) to MMseqs sequence DB format"  expanded="true">
 <param name="input_fasta" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Input fasta file" help="" />
 <conditional name="alph_type">
 <param argument="--dbtype" type="select" label="Input type" help="" >
 <option value="0" selected="true">Auto</option>
 <param argument="--zdrop" type="integer" min="0" value="40" label="Maximal allowed difference between score values before alignment is truncated" help=""/>
 </when>
 </conditional>
 <param argument="--shuffle" type="boolean" checked="true" label="Shuffle input database" truevalue="1" falsevalue="0" optional="true" help="" />
 </section>
-<section name="createtaxdb" title="Add taxonomic labels to reference sequence DB" expanded="true">
+<section name="database" title="Choose the taxonomic reference database that you want to use" expanded="true">
 <conditional name="database_type">
 <param name="type" type="select" label="Database type" help="" >
 <option value="amino_acid_tax" selected="true">Amino acid with taxonomy information</option>
 <option value="nucleotides_tax">Nucleotides with taxonomy information</option>
-<option value="amino_acid">Amino acid without taxonomy information</option>
-<option value="nucleotides">Nucleotides without taxonomy information</option>
 </param>
 <when value="amino_acid_tax">
 <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases">
 <options from_data_table="mmseqs2_databases">
 <filter type="static_value" value="aminoacid" column="type"/>
 <validator message="No mmseqs2 database is available" type="no_options"/>
 </options>
 </param>
 <expand macro="search_type_nt" />
 </when>
-<when value="amino_acid">
-<param name="mmseqs2_db_select" type="select" label="MMseqs2 databases">
-<options from_data_table="mmseqs2_databases">
-<filter type="static_value" value="aminoacid" column="type"/>
-<filter type="static_value" value="no" column="taxonomy"/>
-<validator message="No mmseqs2 database is available" type="no_options"/>
-</options>
-</param>
-<expand macro="search_type_aa" />
-</when>
-<when value="nucleotides">
-<param name="mmseqs2_db_select" type="select" label="MMseqs2 databases">
-<options from_data_table="mmseqs2_databases">
-<filter type="static_value" value="nucleotide" column="type"/>
-<filter type="static_value" value="no" column="taxonomy"/>
-<validator message="No mmseqs2 database is available" type="no_options"/>
-</options>
-</param>
-<expand macro="search_type_nt" />
-</when>
 </conditional>
-<param argument="--tax-mapping-file" type="data" format="tabular,tsv,txt" label="File to map sequence identifier to taxonomical identifier" optional="true"/>
-<param argument="--tax-mapping-mode" type="select" label="Map taxonomy based on sequence database" help="" >
-<option value="0" selected="true">0: .lookup file</option>
-<option value="1">1: .source file</option>
-</param>
 </section>
 <section name="filtertaxseqdb" title="Filter taxonomy sequence database">
 <param argument="--taxon-list" type="text" optional="true" value="" label="Taxonomy ID" help="Possibly multiple values separated by ','"/>
 </section>
 <section name="taxonomy" title="Taxonomy assignment by computing the lowest common ancestor of homologs">
 <option value="31">Blastocrithidia Nuclear Code</option>
 </param>
 <param argument="--translate" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Translate ORF to amino acid" help=""/>
 <param argument="--use-all-table-starts" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Use all alternatives for a start codon in the genetic table, if false - only ATG (AUG)" help=""/>
 <param argument="--id-offset" type="integer" min="0" value="0" label="Numeric ids in index file are offset by this value" help=""/>
-<param argument="--add-orf-stop" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Add stop codon '*' at complete start and end" help=""/>
 <param argument="--sequence-overlap" type="integer" min="0" value="0" label="Overlap between sequences" help=""/>
 <param argument="--sequence-split-mode" type="select" label="Sequence split mode" help="" >
 <option value="0">Copy data</option>
 <option value="1" selected="true">Soft link data and write new index</option>
 </param>
 </data>
 </outputs>
 <tests>
 <!-- Test with Kraken report -->
 <test expect_num_outputs="2">
+<param name="download_tax_db" value="true"/>
 <section name="createdb">
 <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/>
 <conditional name="alph_type">
 <param name="dbtype" value="2"/>
 </conditional>
 </section>
-<section name="createtaxdb">
+<section name="database">
 <conditional name="database_type">
 <param name="type" value="amino_acid_tax"/>
 <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" />
 </conditional>
 </section>
 <section name="filtertaxseqdb">
 <param name="taxon_list" value="2" />
+</section>
+<section name="taxonomy">
+<section name="prefilter">
+<param name="mask_n_repeat" value="1" />
+</section>
 </section>
 <conditional name="krona_report">
 <param name="keep_report" value="No"/>
 </conditional>
 <output name="output_taxonomy_tsv" ftype="tabular">
 <assert_contents>
-<has_line line="MYSTERY.222&#009;1236&#009;class&#009;Gammaproteobacteria&#009;1&#009;1&#009;1&#009;1.000"/>
 <has_line line="MYSTERY.64&#009;119060&#009;family&#009;Burkholderiaceae&#009;1&#009;1&#009;1&#009;1.000"/>
 <has_n_columns n="8"/>
 </assert_contents>
 </output>
 <output name="output_taxonomy_kraken" ftype="txt">
 <assert_contents>
-<has_text text="93.3333"/>
+<has_text text="kingdom"/>
-<has_text text="33.3333"/>
+<has_text text="Pseudomonadati"/>
 </assert_contents>
 </output>
 </test>
 <test expect_num_outputs="2">
+<param name="download_tax_db" value="true"/>
 <section name="createdb">
 <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/>
 </section>
-<section name="createtaxdb">
+<section name="database">
 <conditional name="database_type">
 <param name="type" value="amino_acid_tax"/>
 <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" />
 </conditional>
 </section>
 <has_line line="// Krona is a flexible tool for exploring the relative proportions of"/>
 </assert_contents>
 </output>
 </test>
 <test expect_num_outputs="3">
+<param name="download_tax_db" value="true"/>
 <section name="createdb">
 <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/>
 </section>
-<section name="createtaxdb">
+<section name="database">
 <conditional name="database_type">
 <param name="type" value="amino_acid_tax"/>
 <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" />
 </conditional>
 </section>
 <has_line line="// Krona is a flexible tool for exploring the relative proportions of"/>
 </assert_contents>
 </output>
 <output name="output_taxonomy_kraken" ftype="txt">
 <assert_contents>
-<has_text text="93.3333"/>
+<has_text text="kingdom"/>
-<has_text text="33.3333"/>
+<has_text text="Pseudomonadati"/>
 </assert_contents>
 </output>
 </test>
 </tests>
 <help><![CDATA[
 **Usage**
 * Convert FASTA/Q file(s) to MMseqs sequence DB format
 *mmseqs createdb <i:fastaFile1[.gz|.bz2]> ... <i:fastaFileN[.gz|.bz2]>|<i:stdin> <o:sequenceDB> [options]*
-* Add taxonomic labels to sequence DB
-*mmseqs createtaxdb <i:sequenceDB> <tmpDir> [options]*
 * Filter taxonomy sequence database
 *mmseqs filtertaxseqdb <i:taxSeqDB> <o:taxSeqDB> [options]*
 * Taxonomy assignment by computing the lowest common ancestor of homologs
 *mmseqs taxonomy <i:queryDB> <i:targetDB> <o:taxaDB> <tmpDir> [options]*

Mercurial > repos > iuc > mmseqs2_taxonomy_assignment

comparison mmseqs2_taxonomy_assignment.xml @ 2:876d26806584 draft default tip