Mercurial > repos > iuc > mmseqs2_taxonomy_assignment
diff mmseqs2_taxonomy_assignment.xml @ 2:876d26806584 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mmsesq2 commit 611b90f1628037f05d85905c88629a422d0a2053
author | iuc |
---|---|
date | Mon, 14 Apr 2025 18:39:38 +0000 |
parents | d0acde079e2e |
children |
line wrap: on
line diff
--- a/mmseqs2_taxonomy_assignment.xml Thu Mar 27 16:43:10 2025 +0000 +++ b/mmseqs2_taxonomy_assignment.xml Mon Apr 14 18:39:38 2025 +0000 @@ -16,20 +16,25 @@ --dbtype '$createdb.alph_type.dbtype' --shuffle $createdb.shuffle && -cp -r '$createtaxdb.database_type.mmseqs2_db_select.fields.path'/database* . && +##Used only for test +#if str($download_tax_db) == 'true': +cp -r '$database.database_type.mmseqs2_db_select.fields.path'/database* . && mmseqs createtaxdb database - 'tmp' - #if $createtaxdb.tax_mapping_file - --tax-mapping-file '$createtaxdb.tax_mapping_file' - #end if - --tax-mapping-mode '$createtaxdb.tax_mapping_mode' - --threads "\${GALAXY_SLOTS:-1}" && + 'tmp' && +#end if +## #if $filtertaxseqdb.taxon_list mmseqs filtertaxseqdb - 'database' + ##Used only for test + #if str($download_tax_db) == 'true': + 'database' + ## + #else + '$database.database_type.mmseqs2_db_select.fields.path'/database + #end if 'database_filtered' --taxon-list '$filtertaxseqdb.taxon_list' && @@ -40,7 +45,13 @@ #if $filtertaxseqdb.taxon_list 'database_filtered' #else - 'database' + ##Used only for test + #if str($download_tax_db) == 'true': + 'database' + ## + #else + '$database.database_type.mmseqs2_db_select.fields.path'/database + #end if #end if 'output_taxonomy' 'tmp' @@ -64,6 +75,7 @@ --mask $taxonomy.prefilter.mask --mask-prob $taxonomy.prefilter.mask_prob --mask-lower-case $taxonomy.prefilter.mask_lower_case + --mask-n-repeat $taxonomy.prefilter.mask_n_repeat --min-ungapped-score $taxonomy.prefilter.min_ungapped_score --spaced-kmer-mode $taxonomy.prefilter.spaced_kmer_mode ##--spaced-kmer-pattern STR User-specified spaced k-mer pattern [] @@ -137,11 +149,10 @@ --translate $taxonomy.misc.translate --use-all-table-starts $taxonomy.misc.use_all_table_starts --id-offset $taxonomy.misc.id_offset - --add-orf-stop $taxonomy.misc.add_orf_stop --sequence-overlap $taxonomy.misc.sequence_overlap --sequence-split-mode $taxonomy.misc.sequence_split_mode --headers-split-mode $taxonomy.misc.headers_split_mode - --search-type $createtaxdb.database_type.search_type + --search-type $database.database_type.search_type --prefilter-mode $taxonomy.misc.prefilter_mode ##Common options @@ -182,7 +193,13 @@ #if $filtertaxseqdb.taxon_list 'database_filtered' #else - 'database' + ##Used only for test + #if str($download_tax_db) == 'true': + 'database' + ## + #else + '$database.database_type.mmseqs2_db_select.fields.path'/database + #end if #end if 'output_taxonomy' 'taxo_result.txt' @@ -195,7 +212,13 @@ #if $filtertaxseqdb.taxon_list 'database_filtered' #else - 'database' + ##Used only for test + #if str($download_tax_db) == 'true': + 'database' + ## + #else + '$database.database_type.mmseqs2_db_select.fields.path'/database + #end if #end if 'output_taxonomy' 'taxo_result.html' @@ -206,6 +229,9 @@ ]]></command> <inputs> + <!-- used only for tests, this makes it possible to download the taxonomy part of the db without having to have it in the test data directory, which is too large and cannot be reduced --> + <param name="download_tax_db" type="hidden" value=""/> + <!-- --> <section name="createdb" title="Convert FASTA/Q file(s) to MMseqs sequence DB format" expanded="true"> <param name="input_fasta" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Input fasta file" help="" /> <conditional name="alph_type"> @@ -224,13 +250,11 @@ </conditional> <param argument="--shuffle" type="boolean" checked="true" label="Shuffle input database" truevalue="1" falsevalue="0" optional="true" help="" /> </section> - <section name="createtaxdb" title="Add taxonomic labels to reference sequence DB" expanded="true"> + <section name="database" title="Choose the taxonomic reference database that you want to use" expanded="true"> <conditional name="database_type"> <param name="type" type="select" label="Database type" help="" > <option value="amino_acid_tax" selected="true">Amino acid with taxonomy information</option> <option value="nucleotides_tax">Nucleotides with taxonomy information</option> - <option value="amino_acid">Amino acid without taxonomy information</option> - <option value="nucleotides">Nucleotides without taxonomy information</option> </param> <when value="amino_acid_tax"> <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases"> @@ -252,32 +276,7 @@ </param> <expand macro="search_type_nt" /> </when> - <when value="amino_acid"> - <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases"> - <options from_data_table="mmseqs2_databases"> - <filter type="static_value" value="aminoacid" column="type"/> - <filter type="static_value" value="no" column="taxonomy"/> - <validator message="No mmseqs2 database is available" type="no_options"/> - </options> - </param> - <expand macro="search_type_aa" /> - </when> - <when value="nucleotides"> - <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases"> - <options from_data_table="mmseqs2_databases"> - <filter type="static_value" value="nucleotide" column="type"/> - <filter type="static_value" value="no" column="taxonomy"/> - <validator message="No mmseqs2 database is available" type="no_options"/> - </options> - </param> - <expand macro="search_type_nt" /> - </when> </conditional> - <param argument="--tax-mapping-file" type="data" format="tabular,tsv,txt" label="File to map sequence identifier to taxonomical identifier" optional="true"/> - <param argument="--tax-mapping-mode" type="select" label="Map taxonomy based on sequence database" help="" > - <option value="0" selected="true">0: .lookup file</option> - <option value="1">1: .source file</option> - </param> </section> <section name="filtertaxseqdb" title="Filter taxonomy sequence database"> <param argument="--taxon-list" type="text" optional="true" value="" label="Taxonomy ID" help="Possibly multiple values separated by ','"/> @@ -432,7 +431,6 @@ <param argument="--translate" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Translate ORF to amino acid" help=""/> <param argument="--use-all-table-starts" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Use all alternatives for a start codon in the genetic table, if false - only ATG (AUG)" help=""/> <param argument="--id-offset" type="integer" min="0" value="0" label="Numeric ids in index file are offset by this value" help=""/> - <param argument="--add-orf-stop" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Add stop codon '*' at complete start and end" help=""/> <param argument="--sequence-overlap" type="integer" min="0" value="0" label="Overlap between sequences" help=""/> <param argument="--sequence-split-mode" type="select" label="Sequence split mode" help="" > <option value="0">Copy data</option> @@ -494,13 +492,14 @@ <tests> <!-- Test with Kraken report --> <test expect_num_outputs="2"> + <param name="download_tax_db" value="true"/> <section name="createdb"> <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/> <conditional name="alph_type"> <param name="dbtype" value="2"/> </conditional> </section> - <section name="createtaxdb"> + <section name="database"> <conditional name="database_type"> <param name="type" value="amino_acid_tax"/> <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" /> @@ -509,28 +508,33 @@ <section name="filtertaxseqdb"> <param name="taxon_list" value="2" /> </section> + <section name="taxonomy"> + <section name="prefilter"> + <param name="mask_n_repeat" value="1" /> + </section> + </section> <conditional name="krona_report"> <param name="keep_report" value="No"/> </conditional> <output name="output_taxonomy_tsv" ftype="tabular"> <assert_contents> - <has_line line="MYSTERY.222	1236	class	Gammaproteobacteria	1	1	1	1.000"/> <has_line line="MYSTERY.64	119060	family	Burkholderiaceae	1	1	1	1.000"/> <has_n_columns n="8"/> </assert_contents> </output> <output name="output_taxonomy_kraken" ftype="txt"> <assert_contents> - <has_text text="93.3333"/> - <has_text text="33.3333"/> + <has_text text="kingdom"/> + <has_text text="Pseudomonadati"/> </assert_contents> </output> </test> <test expect_num_outputs="2"> + <param name="download_tax_db" value="true"/> <section name="createdb"> <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/> </section> - <section name="createtaxdb"> + <section name="database"> <conditional name="database_type"> <param name="type" value="amino_acid_tax"/> <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" /> @@ -553,10 +557,11 @@ </output> </test> <test expect_num_outputs="3"> + <param name="download_tax_db" value="true"/> <section name="createdb"> <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/> </section> - <section name="createtaxdb"> + <section name="database"> <conditional name="database_type"> <param name="type" value="amino_acid_tax"/> <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" /> @@ -576,8 +581,8 @@ </output> <output name="output_taxonomy_kraken" ftype="txt"> <assert_contents> - <has_text text="93.3333"/> - <has_text text="33.3333"/> + <has_text text="kingdom"/> + <has_text text="Pseudomonadati"/> </assert_contents> </output> </test> @@ -596,9 +601,6 @@ * Convert FASTA/Q file(s) to MMseqs sequence DB format *mmseqs createdb <i:fastaFile1[.gz|.bz2]> ... <i:fastaFileN[.gz|.bz2]>|<i:stdin> <o:sequenceDB> [options]* -* Add taxonomic labels to sequence DB - *mmseqs createtaxdb <i:sequenceDB> <tmpDir> [options]* - * Filter taxonomy sequence database *mmseqs filtertaxseqdb <i:taxSeqDB> <o:taxSeqDB> [options]*