mmseqs2_taxonomy_assignment: mmseqs2_taxonomy

comparison mmseqs2_taxonomy_assignment.xml @ 0:d0acde079e2e draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mmsesq2 commit 1400593429eb4e9c6e307df3621825a8b84a6fa7

author	iuc
date	Thu, 27 Mar 2025 14:38:20 +0000
parents
children	876d26806584

comparison

equal deleted inserted replaced

--1:000000000000
+:d0acde079e2e
+<tool id="mmseqs2_taxonomy_assignment" name="MMseqs2 Taxonomy Assignments" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+<description>
+of sequences by comparing them to a reference database
+</description>
+<macros>
+<import>macro.xml</import>
+</macros>
+<expand macro="biotools"/>
+<expand macro="requirements"/>
+<expand macro="version_command"/>
+<command detect_errors="exit_code"><![CDATA[
+ln -s -f '${createdb.input_fasta}' 'input' &&
+mmseqs createdb
+'input'
+'sequenceDB'
+--dbtype '$createdb.alph_type.dbtype'
+--shuffle $createdb.shuffle &&
+cp -r '$createtaxdb.database_type.mmseqs2_db_select.fields.path'/database* . &&
+mmseqs createtaxdb
+database
+'tmp'
+#if $createtaxdb.tax_mapping_file
+--tax-mapping-file '$createtaxdb.tax_mapping_file'
+#end if
+--tax-mapping-mode '$createtaxdb.tax_mapping_mode'
+--threads "\${GALAXY_SLOTS:-1}" &&
+#if $filtertaxseqdb.taxon_list
+mmseqs filtertaxseqdb
+'database'
+'database_filtered'
+--taxon-list '$filtertaxseqdb.taxon_list'
+&&
+#end if
+mmseqs taxonomy
+'sequenceDB'
+#if $filtertaxseqdb.taxon_list
+'database_filtered'
+#else
+'database'
+#end if
+'output_taxonomy'
+'tmp'
+#if str($createdb.alph_type.dbtype) == "1"
+--comp-bias-corr-scale $createdb.alph_type.comp_bias_corr_scale
+#elif str($createdb.alph_type.dbtype) == "2"
+--zdrop $createdb.alph_type.zdrop
+#end if
+##Pre-filter options
+--add-self-matches $taxonomy.prefilter.add_self_matches
+-s $taxonomy.prefilter.sensitivity
+-k $taxonomy.prefilter.kmer_length
+--target-search-mode $taxonomy.prefilter.target_search_mode
+##--k-score TWIN                   k-mer threshold for generating similar k-mer lists [seq:2147483647,prof:2147483647]
+--max-seqs $taxonomy.prefilter.max_seqs
+--split $taxonomy.prefilter.split
+--split-mode $taxonomy.prefilter.split_mode
+##--split-memory-limit BYTE        Set max memory per split. E.g. 800B, 5K, 10M, 1G. Default (0) to all available system memory [0]
+--diag-score $taxonomy.prefilter.diag_score
+--exact-kmer-matching $taxonomy.prefilter.exact_kmer_matching
+--mask $taxonomy.prefilter.mask
+--mask-prob $taxonomy.prefilter.mask_prob
+--mask-lower-case $taxonomy.prefilter.mask_lower_case
+--min-ungapped-score $taxonomy.prefilter.min_ungapped_score
+--spaced-kmer-mode $taxonomy.prefilter.spaced_kmer_mode
+##--spaced-kmer-pattern STR        User-specified spaced k-mer pattern []
+##--local-tmp STR                  Path where some of the temporary files will be created []
+##--disk-space-limit BYTE          Set max disk space to use for reverse profile searches. E.g. 800B, 5K, 10M, 1G. Default (0) to all available disk space in the temp folder [0]
+##Align options
+-a $taxonomy.align.convertalis
+##The next 2 parameters seems to be the same
+--alignment-mode $taxonomy.align.alignment_mode
+--alignment-output-mode $taxonomy.align.alignment_output_mode
+--wrapped-scoring $taxonomy.align.wrapped_scoring
+-e $taxonomy.align.evalue
+--min-seq-id $taxonomy.align.min_seq_id
+--min-aln-len $taxonomy.align.min_aln_len
+--seq-id-mode $taxonomy.align.seq_id_mode
+--alt-ali $taxonomy.align.alt_ali
+-c $taxonomy.align.cov
+--cov-mode $taxonomy.align.cov_mode
+--max-rejected $taxonomy.align.max_rejected
+--max-accept $taxonomy.align.max_accept
+--score-bias $taxonomy.align.score_bias
+--realign $taxonomy.align.realign
+--realign-score-bias $taxonomy.align.realign_score_bias
+--realign-max-seqs $taxonomy.align.realign_max_seqs
+--corr-score-weight $taxonomy.align.corr_score_weight
+--exhaustive-search-filter $taxonomy.align.exhaustive_search_filter
+##Profile options
+##--pca                            Pseudo count admixture strength []
+##--pcb                            Pseudo counts: Neff at half of maximum admixture (range 0.0-inf) []
+--mask-profile $taxonomy.profile.mask_profile
+--e-profile $taxonomy.profile.e_profile
+--wg $taxonomy.profile.wg
+--filter-msa $taxonomy.profile.filter_msa
+--filter-min-enable $taxonomy.profile.filter_min_enable
+--max-seq-id $taxonomy.profile.max_seq_id
+--qid $taxonomy.profile.qid
+--qsc $taxonomy.profile.qsc
+--cov $taxonomy.profile.cov
+--diff $taxonomy.profile.diff
+--pseudo-cnt-mode $taxonomy.profile.pseudo_cnt_mode
+--exhaustive-search $taxonomy.profile.exhaustive_search
+--lca-search $taxonomy.profile.lca_search
+##Misc options
+##--orf-filter INT                 Prefilter query ORFs with non-selective search
+##                             Only used during nucleotide-vs-protein classification
+##                             NOTE: Consider disabling when classifying short reads [1]
+--orf-filter-e $taxonomy.misc.orf_filter_e
+--orf-filter-s $taxonomy.misc.orf_filter_s
+--lca-mode $taxonomy.misc.lca_mode
+--tax-output-mode $taxonomy.misc.tax_output_mode
+--majority $taxonomy.misc.majority
+--vote-mode $taxonomy.misc.vote_mode
+##--lca-ranks STR                  Add column with specified ranks (',' separated) []
+--tax-lineage $taxonomy.misc.tax_lineage
+--blacklist $taxonomy.misc.blacklist
+--taxon-list $taxonomy.misc.taxon_list
+--rescore-mode $taxonomy.misc.rescore_mode
+--allow-deletion $taxonomy.misc.allow_deletion
+--min-length $taxonomy.misc.min_length
+--max-length $taxonomy.misc.max_length
+--max-gaps $taxonomy.misc.max_gaps
+--contig-start-mode $taxonomy.misc.contig_start_mode
+--contig-end-mode $taxonomy.misc.contig_end_mode
+--orf-start-mode $taxonomy.misc.orf_start_mode
+--forward-frames $taxonomy.misc.forward_frames
+--reverse-frames $taxonomy.misc.reverse_frames
+--translation-table $taxonomy.misc.translation_table
+--translate $taxonomy.misc.translate
+--use-all-table-starts $taxonomy.misc.use_all_table_starts
+--id-offset $taxonomy.misc.id_offset
+--add-orf-stop $taxonomy.misc.add_orf_stop
+--sequence-overlap $taxonomy.misc.sequence_overlap
+--sequence-split-mode $taxonomy.misc.sequence_split_mode
+--headers-split-mode $taxonomy.misc.headers_split_mode
+--search-type $createtaxdb.database_type.search_type
+--prefilter-mode $taxonomy.misc.prefilter_mode
+##Common options
+##--compressed INT                 Write compressed output [0]
+--threads "\${GALAXY_SLOTS:-1}"
+##-v INT                           Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3]
+--max-seq-len $taxonomy.common.max_seq_len
+##--db-load-mode INT               Database preload mode 0: auto, 1: fread, 2: mmap, 3: mmap+touch [0]
+##--mpi-runner STR                 Use MPI on compute cluster with this MPI command (e.g. "mpirun -np 42") []
+##--force-reuse BOOL               Reuse tmp filse in tmp/latest folder ignoring parameters and version changes [0]
+##--remove-tmp-files BOOL          Delete temporary files [0]
+##Expert options
+--filter-hits $taxonomy.expert.filter_hits
+--sort-results $taxonomy.expert.sort_results
+##--create-lookup INT              Create database lookup file (can be very large) [0]
+--chain-alignments $taxonomy.expert.chain_alignments
+--merge-query $taxonomy.expert.merge_query
+##--strand INT                     Strand selection only works for DNA/DNA search 0: reverse, 1: forward, 2: both [1]
+&&
+mmseqs createtsv
+'sequenceDB'
+'output_taxonomy'
+'taxo_result.tsv'
+--first-seq-as-repr $createtsv.first_seq_as_repr
+--target-column $createtsv.target_column
+--full-header $createtsv.full_header
+--idx-seq-src $createtsv.idx_seq_src
+--threads "\${GALAXY_SLOTS:-1}"
+##--compressed INT          Write compressed output [0]
+##-v INT                    Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3]
+##--db-output BOOL          Return a result DB instead of a text file [0]
+#if str($kraken_report.keep_report) == "Yes"
+&&
+mmseqs taxonomyreport
+#if $filtertaxseqdb.taxon_list
+'database_filtered'
+#else
+'database'
+#end if
+'output_taxonomy'
+'taxo_result.txt'
+--report-mode 0
+--threads "\${GALAXY_SLOTS:-1}"
+#end if
+#if str($krona_report.keep_report) == "Yes"
+&&
+mmseqs taxonomyreport
+#if $filtertaxseqdb.taxon_list
+'database_filtered'
+#else
+'database'
+#end if
+'output_taxonomy'
+'taxo_result.html'
+--report-mode 1
+--threads "\${GALAXY_SLOTS:-1}"
+#end if
+##-v INT              Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3]
+]]></command>
+<inputs>
+<section name="createdb" title="Convert FASTA/Q file(s) to MMseqs sequence DB format"  expanded="true">
+<param name="input_fasta" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Input fasta file" help="" />
+<conditional name="alph_type">
+<param argument="--dbtype" type="select" label="Input type" help="" >
+<option value="0" selected="true">Auto</option>
+<option value="1">Amino acid</option>
+<option value="2">Nucleotides</option>
+</param>
+<when value="0"/>
+<when value="1">
+<param argument="--comp-bias-corr-scale" type="float" min="0" max="1" value="1" label="Scale composition bias correction" help=""/>
+</when>
+<when value="2">
+<param argument="--zdrop" type="integer" min="0" value="40" label="Maximal allowed difference between score values before alignment is truncated" help=""/>
+</when>
+</conditional>
+<param argument="--shuffle" type="boolean" checked="true" label="Shuffle input database" truevalue="1" falsevalue="0" optional="true" help="" />
+</section>
+<section name="createtaxdb" title="Add taxonomic labels to reference sequence DB" expanded="true">
+<conditional name="database_type">
+<param name="type" type="select" label="Database type" help="" >
+<option value="amino_acid_tax" selected="true">Amino acid with taxonomy information</option>
+<option value="nucleotides_tax">Nucleotides with taxonomy information</option>
+<option value="amino_acid">Amino acid without taxonomy information</option>
+<option value="nucleotides">Nucleotides without taxonomy information</option>
+</param>
+<when value="amino_acid_tax">
+<param name="mmseqs2_db_select" type="select" label="MMseqs2 databases">
+<options from_data_table="mmseqs2_databases">
+<filter type="static_value" value="aminoacid" column="type"/>
+<filter type="static_value" value="yes" column="taxonomy"/>
+<validator message="No mmseqs2 database is available" type="no_options"/>
+</options>
+</param>
+<expand macro="search_type_aa" />
+</when>
+<when value="nucleotides_tax">
+<param name="mmseqs2_db_select" type="select" label="MMseqs2 databases">
+<options from_data_table="mmseqs2_databases">
+<filter type="static_value" value="nucleotide" column="type"/>
+<filter type="static_value" value="yes" column="taxonomy"/>
+<validator message="No mmseqs2 database is available" type="no_options"/>
+</options>
+</param>
+<expand macro="search_type_nt" />
+</when>
+<when value="amino_acid">
+<param name="mmseqs2_db_select" type="select" label="MMseqs2 databases">
+<options from_data_table="mmseqs2_databases">
+<filter type="static_value" value="aminoacid" column="type"/>
+<filter type="static_value" value="no" column="taxonomy"/>
+<validator message="No mmseqs2 database is available" type="no_options"/>
+</options>
+</param>
+<expand macro="search_type_aa" />
+</when>
+<when value="nucleotides">
+<param name="mmseqs2_db_select" type="select" label="MMseqs2 databases">
+<options from_data_table="mmseqs2_databases">
+<filter type="static_value" value="nucleotide" column="type"/>
+<filter type="static_value" value="no" column="taxonomy"/>
+<validator message="No mmseqs2 database is available" type="no_options"/>
+</options>
+</param>
+<expand macro="search_type_nt" />
+</when>
+</conditional>
+<param argument="--tax-mapping-file" type="data" format="tabular,tsv,txt" label="File to map sequence identifier to taxonomical identifier" optional="true"/>
+<param argument="--tax-mapping-mode" type="select" label="Map taxonomy based on sequence database" help="" >
+<option value="0" selected="true">0: .lookup file</option>
+<option value="1">1: .source file</option>
+</param>
+</section>
+<section name="filtertaxseqdb" title="Filter taxonomy sequence database">
+<param argument="--taxon-list" type="text" optional="true" value="" label="Taxonomy ID" help="Possibly multiple values separated by ','"/>
+</section>
+<section name="taxonomy" title="Taxonomy assignment by computing the lowest common ancestor of homologs">
+<section name="prefilter" title="Pre-filter">
+<expand macro="prefilter_common_parameters" />
+<param argument="--spaced-kmer-mode" type="select" label="Spaced k-mer mode" help="">
+<option value="0">Use consecutive positions in k-mers</option>
+<option value="1" selected="true">Use spaced k-mers</option>
+</param>
+<param argument="--min-ungapped-score" type="integer" min="0" value="15" label="Accept only matches with ungapped alignment score above threshold" help=""/>
+<param argument="-s" name="sensitivity" type="float" min="0" max="7.5" value="2" label="Sensitivity" help="1.0 faster; 4.0 fast; 7.5 sensitive"/>
+<param argument="--target-search-mode" type="select" label="Target search mode" help="" >
+<option value="0" selected="true">Regular k-mer</option>
+<option value="1">Similar k-mer</option>
+</param>
+<param argument="--max-seqs" type="integer" min="0" value="300" label="Maximum results per query sequence allowed to pass the prefilter" help="Affects sensitivity"/>
+<param argument="--split" type="integer" min="0" value="0" label="Split input into N equally distributed chunks" help="0: set the best split automatically"/>
+<param argument="--split-mode" type="select" label="Split mode" help="" >
+<option value="0">Split target db</option>
+<option value="1">Split query db</option>
+<option value="2" selected="true">Auto, depending on main memory</option>
+</param>
+<param argument="--diag-score" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Use ungapped diagonal scoring during prefilter" help=""/>
+<param argument="--exact-kmer-matching" type="integer" min="0" max="1" value="0" label="Extract only exact k-mers for matching" help=""/>
+</section>
+<section name="align" title="Align">
+<expand macro="align_common_parameters" />
+<param argument="--alignment-mode" type="select" label="Alignment mode : How to compute the alignment" help="" >
+<option value="0">Automatic</option>
+<option value="1" selected="true">Only score and end_pos</option>
+<option value="2">Also start_pos and cov</option>
+<option value="3">Also seq.id</option>
+<option value="4">Only ungapped alignment</option>
+</param>
+<param argument="-e" name="evalue" type="float" min="0" value="1" label="E-value threshold" help="List matches below this E-value"/>
+<param argument="--min-seq-id" type="float" min="0" max="1" value="0" label="Minimum sequence identity" help="List matches above this sequence identity for clustering"/>
+<param argument="-c" name="cov" type="float" min="0" value="0" label="List matches above this fraction of aligned (covered) residues" help=""/>
+<param argument="--cov-mode" type="select" label="Coverage mode" help="" >
+<option value="0" selected="true">Coverage of query and target</option>
+<option value="1">Coverage of target</option>
+<option value="2">Coverage of query</option>
+<option value="3">Target seq. length has to be at least x% of query length</option>
+<option value="4">Query seq. length has to be at least x% of target length</option>
+<option value="5">Short seq. needs to be at least x% of the other seq. length</option>
+</param>
+<param argument="--max-rejected" type="integer" min="0" value="5" label="Maximum rejected alignments before alignment calculation for a query is stopped" help=""/>
+<param argument="--max-accept" type="integer" min="0" value="30" label="Maximum accepted alignments before alignment calculation for a query is stopped" help=""/>
+<param argument="--exhaustive-search-filter" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Filter result during search ?" help=""/>
+</section>
+<section name="profile" title="Profile">
+<param argument="--mask-profile" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Mask query sequence of profile using tantan" help=""/>
+<param argument="--e-profile" type="float" min="0" value="1e-03" label="Include sequences matches with inf E-value threshold into the profile" help=""/>
+<param argument="--wg" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Use global sequence weighting for profile calculation" help=""/>
+<param argument="--filter-msa" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Filter MSA" help=""/>
+<param argument="--filter-min-enable" type="integer" min="0" value="0" label="Only filter MSAs with more than N sequences, 0 always filters" help=""/>
+<param argument="--max-seq-id" type="float" min="0" max="1" value="0.9" label="Reduce redundancy of output MSA using max. pairwise sequence identity" help=""/>
+<param argument="--qid" type="text" value="0" label="Reduce diversity of output MSAs using min.seq. identity with query sequences [0.0,1.0]" help="Alternatively, can be a list of multiple thresholds:
+E.g.: 0.15,0.30,0.50 to defines filter buckets of ]0.15-0.30] and ]0.30-0.50]"/>
+<param argument="--qsc" type="float" min="-50" max="100" value="-20" label="Reduce diversity of output MSAs using min. score per aligned residue with query sequences" help=""/>
+<param argument="--cov" type="float" min="0" max="1" value="0" label="Filter output MSAs using min. fraction of query residues covered by matched sequences" help=""/>
+<param argument="--diff" type="integer" min="0" value="1000" label="Filter MSAs by selecting most diverse set of sequences, keeping at least this many seqs in each MSA block of length 50" help=""/>
+<param argument="--pseudo-cnt-mode" type="select" label="Pseudo count mode" help="" >
+<option value="0" selected="true">Substitution-matrix</option>
+<option value="1">Context-specific pseudocounts</option>
+</param>
+<param argument="--exhaustive-search" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Exhaustive search" help=""/>
+<param argument="--lca-search" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Efficient search for LCA candidates" help=""/>
+</section>
+<section name="misc" title="Misc">
+<param argument="--orf-filter-e" type="float" min="0" value="1.000E+02" label="E-value threshold used for query ORF prefiltering" help=""/>
+<param argument="--orf-filter-s" type="float" min="0" value="2" label="Sensitivity used for query ORF prefiltering" help=""/>
+<param argument="--lca-mode" type="select" label="LCA mode" help="" >
+<option value="1">Single search LCA</option>
+<option value="3" selected="true">Approximate 2bLCA</option>
+<option value="4">Top hit</option>
+</param>
+<param argument="--tax-output-mode" type="select" label="Taxonomy output mode" help="" >
+<option value="0" selected="true">Output LCA</option>
+<option value="1">Output alignment</option>
+<option value="2">Output both</option>
+</param>
+<param argument="--majority" type="float" min="0" value="0.5" label="Minimal fraction of agreement among taxonomically assigned sequences of a set" help=""/>
+<param argument="--vote-mode" type="select" label="Mode of assigning weights to compute majority" help="" >
+<option value="0">Uniform</option>
+<option value="1" selected="true">Minus log E-value</option>
+<option value="2">Score</option>
+</param>
+<param argument="--tax-lineage" type="select" label="Taxonomy lineage" help="" >
+<option value="0" selected="true">Don't show</option>
+<option value="1">Add all lineage names</option>
+<option value="2">Add all lineage taxids</option>
+</param>
+<param argument="--blacklist" type="text" value="" label="Comma separated list of ignored taxa in LCA computation" help=""/>
+<param argument="--taxon-list" type="text" value="" label="Taxonomy ID, possibly multiple values separated by ','" help=""/>
+<param argument="--rescore-mode" type="select" label="Rescore diagonals with" help="" >
+<option value="0" selected="true">Hamming distance</option>
+<option value="1">Local alignment (score only)</option>
+<option value="2">Local alignment</option>
+<option value="3">Global alignment</option>
+<option value="4">Longest alignment fulfilling window quality criterion</option>
+</param>
+<param argument="--allow-deletion" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Allow deletions in a MSA" help=""/>
+<param argument="--min-length" type="integer" min="0" value="30" label="Minimum codon number in open reading frames" help=""/>
+<param argument="--max-length" type="integer" min="0" value="32734" label="Maximum codon number in open reading frames" help=""/>
+<param argument="--max-gaps" type="integer" min="0" value="2147483647" label="Maximum number of codons with gaps or unknown residues before an open reading frame is rejected" help=""/>
+<param argument="--contig-start-mode" type="select" label="Contig start can be" help="" >
+<option value="0">Incomplete</option>
+<option value="1">Complete</option>
+<option value="2" selected="true">Both</option>
+</param>
+<param argument="--contig-end-mode" type="select" label="Contig end can be" help="" >
+<option value="0">Incomplete</option>
+<option value="1">Complete</option>
+<option value="2" selected="true">Both</option>
+</param>
+<param argument="--orf-start-mode" type="select" label="ORF fragment can be" help="" >
+<option value="0">From start to stop</option>
+<option value="1" selected="true">From any to stop</option>
+<option value="2">From last encountered start to stop (no start in the middle)</option>
+</param>
+<param argument="--forward-frames" type="text" value="1,2,3" label="Comma-separated list of frames on the forward strand to be extracted" help=""/>
+<param argument="--reverse-frames" type="text" value="1,2,3" label="Comma-separated list of frames on the reverse strand to be extracted" help=""/>
+<param argument="--translation-table" type="select" label="Translation table" help="">
+<option value="1" selected="true">Canonical</option>
+<option value="2">The Vertebrate Mitochondrial Code</option>
+<option value="3">The Yeast Mitochondrial Code</option>
+<option value="4">The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>
+<option value="5">The Invertebrate Mitochondrial Code</option>
+<option value="6">The Ciliate, Dasycladacean and Hexamita Nuclear Code</option>
+<option value="9">The Echinoderm and Flatworm Mitochondrial Code</option>
+<option value="10">The Euplotid Nuclear Code</option>
+<option value="11">The Bacterial, Archaeal and Plant Plastid Code</option>
+<option value="12">The Alternative Yeast Nuclear Code</option>
+<option value="13">The Ascidian Mitochondrial Code</option>
+<option value="14">The Alternative Flatworm Mitochondrial Code</option>
+<option value="15">Blepharisma Nuclear Code</option>
+<option value="16">Chlorophycean Mitochondrial Code</option>
+<option value="21">Trematode Mitochondrial Code</option>
+<option value="22">Scenedesmus obliquus Mitochondrial Code</option>
+<option value="23">Thraustochytrium Mitochondrial Code</option>
+<option value="24">Rhabdopleuridae Mitochondrial Code</option>
+<option value="25">Candidate Division SR1 and Gracilibacteria Code</option>
+<option value="26">Pachysolen tannophilus Nuclear Code</option>
+<option value="27">Karyorelict Nuclear Code</option>
+<option value="28">Condylostoma Nuclear Code</option>
+<option value="29">Mesodinium Nuclear Code</option>
+<option value="30">Peritrich Nuclear Code</option>
+<option value="31">Blastocrithidia Nuclear Code</option>
+</param>
+<param argument="--translate" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Translate ORF to amino acid" help=""/>
+<param argument="--use-all-table-starts" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Use all alternatives for a start codon in the genetic table, if false - only ATG (AUG)" help=""/>
+<param argument="--id-offset" type="integer" min="0" value="0" label="Numeric ids in index file are offset by this value" help=""/>
+<param argument="--add-orf-stop" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Add stop codon '*' at complete start and end" help=""/>
+<param argument="--sequence-overlap" type="integer" min="0" value="0" label="Overlap between sequences" help=""/>
+<param argument="--sequence-split-mode" type="select" label="Sequence split mode" help="" >
+<option value="0">Copy data</option>
+<option value="1" selected="true">Soft link data and write new index</option>
+</param>
+<param argument="--headers-split-mode" type="select" label="Headers split mode" help="" >
+<option value="0" selected="true">Split position</option>
+<option value="1">Original header</option>
+</param>
+<param argument="--prefilter-mode" type="select" label="Prefilter mode" help="" >
+<option value="0" selected="true">Kmer/ungapped</option>
+<option value="1">Ungapped</option>
+<option value="2">No filter</option>
+</param>
+</section>
+<expand macro="common_section"/>
+<section name="expert" title="Expert">
+<expand macro="expert_common_parameters" />
+<param argument="--chain-alignments" type="integer" min="0" value="0" label="Chain alignments" help=""/>
+<param argument="--merge-query" type="integer" min="0" value="1" label="Combine ORFs/split sequences to a single entry" help=""/>
+</section>
+</section>
+<section name="createtsv" title="Create a tsv report from taxonomy output ">
+<param argument="--first-seq-as-repr" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Use the first sequence of the clustering result as representative sequence" help=""/>
+<param argument="--target-column" type="integer" min="0" value="1" label="Select a target column, 0 if no target id exists" help="" />
+<param argument="--full-header" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Replace DB ID by its corresponding Full Header" help=""/>
+<param argument="--idx-seq-src" type="select" label="Index sequences source" help="">
+<option value="0" selected="true">Auto</option>
+<option value="1">Split/translated sequences</option>
+<option value="2">Input sequences</option>
+</param>
+</section>
+<conditional name="kraken_report">
+<param name="keep_report" type="select" label="Do you want a Kraken style report" help="" >
+<option value="Yes" selected="true">Yes</option>
+<option value="No">No</option>
+</param>
+<when value="Yes"/>
+<when value="No"/>
+</conditional>
+<conditional name="krona_report">
+<param name="keep_report" type="select" label="Do you want a Krona style report" help="" >
+<option value="Yes" selected="true">Yes</option>
+<option value="No">No</option>
+</param>
+<when value="Yes"/>
+<when value="No"/>
+</conditional>
+</inputs>
+<outputs>
+<data name="output_taxonomy_tsv" format="tabular" from_work_dir="taxo_result.tsv" label="${tool.name} on ${on_string}: Taxonomy Report"/>
+<data name="output_taxonomy_kraken" format="txt" from_work_dir="taxo_result.txt" label="${tool.name} on ${on_string}: Kraken Report">
+<filter>kraken_report['keep_report'] == "Yes"</filter>
+</data>
+<data name="output_taxonomy_krona" format="html" from_work_dir="taxo_result.html" label="${tool.name} on ${on_string}: Krona Report">
+<filter>krona_report['keep_report'] == "Yes"</filter>
+</data>
+</outputs>
+<tests>
+<!-- Test with Kraken report -->
+<test expect_num_outputs="2">
+<section name="createdb">
+<param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/>
+<conditional name="alph_type">
+<param name="dbtype" value="2"/>
+</conditional>
+</section>
+<section name="createtaxdb">
+<conditional name="database_type">
+<param name="type" value="amino_acid_tax"/>
+<param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" />
+</conditional>
+</section>
+<section name="filtertaxseqdb">
+<param name="taxon_list" value="2" />
+</section>
+<conditional name="krona_report">
+<param name="keep_report" value="No"/>
+</conditional>
+<output name="output_taxonomy_tsv" ftype="tabular">
+<assert_contents>
+<has_line line="MYSTERY.222&#009;1236&#009;class&#009;Gammaproteobacteria&#009;1&#009;1&#009;1&#009;1.000"/>
+<has_line line="MYSTERY.64&#009;119060&#009;family&#009;Burkholderiaceae&#009;1&#009;1&#009;1&#009;1.000"/>
+<has_n_columns n="8"/>
+</assert_contents>
+</output>
+<output name="output_taxonomy_kraken" ftype="txt">
+<assert_contents>
+<has_text text="93.3333"/>
+<has_text text="33.3333"/>
+</assert_contents>
+</output>
+</test>
+<test expect_num_outputs="2">
+<section name="createdb">
+<param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/>
+</section>
+<section name="createtaxdb">
+<conditional name="database_type">
+<param name="type" value="amino_acid_tax"/>
+<param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" />
+</conditional>
+</section>
+<conditional name="kraken_report">
+<param name="keep_report" value="No"/>
+</conditional>
+<output name="output_taxonomy_tsv" ftype="tabular">
+<assert_contents>
+<has_line line="MYSTERY.222&#009;1236&#009;class&#009;Gammaproteobacteria&#009;1&#009;1&#009;1&#009;1.000"/>
+<has_line line="MYSTERY.64&#009;119060&#009;family&#009;Burkholderiaceae&#009;1&#009;1&#009;1&#009;1.000"/>
+<has_n_columns n="8"/>
+</assert_contents>
+</output>
+<output name="output_taxonomy_krona" ftype="html">
+<assert_contents>
+<has_line line="// Krona is a flexible tool for exploring the relative proportions of"/>
+</assert_contents>
+</output>
+</test>
+<test expect_num_outputs="3">
+<section name="createdb">
+<param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/>
+</section>
+<section name="createtaxdb">
+<conditional name="database_type">
+<param name="type" value="amino_acid_tax"/>
+<param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" />
+</conditional>
+</section>
+<output name="output_taxonomy_tsv" ftype="tabular">
+<assert_contents>
+<has_line line="MYSTERY.222&#009;1236&#009;class&#009;Gammaproteobacteria&#009;1&#009;1&#009;1&#009;1.000"/>
+<has_line line="MYSTERY.64&#009;119060&#009;family&#009;Burkholderiaceae&#009;1&#009;1&#009;1&#009;1.000"/>
+<has_n_columns n="8"/>
+</assert_contents>
+</output>
+<output name="output_taxonomy_krona" ftype="html">
+<assert_contents>
+<has_line line="// Krona is a flexible tool for exploring the relative proportions of"/>
+</assert_contents>
+</output>
+<output name="output_taxonomy_kraken" ftype="txt">
+<assert_contents>
+<has_text text="93.3333"/>
+<has_text text="33.3333"/>
+</assert_contents>
+</output>
+</test>
+</tests>
+<help><![CDATA[
+**MMseqs2: ultra fast and sensitive sequence search and clustering suite**
+MMseqs2 (Many-against-Many sequence searching) is a software suite to search and cluster huge protein and nucleotide sequence sets.
+MMseqs2 is open source GPL-licensed software implemented in C++ for Linux, MacOS, and (as beta version, via cygwin) Windows.
+The software is designed to run on multiple cores and servers and exhibits very good scalability.
+MMseqs2 can run 10000 times faster than BLAST. At 100 times its speed it achieves almost the same sensitivity.
+It can perform profile searches with the same sensitivity as PSI-BLAST at over 400 times its speed.
+**Usage**
+* Convert FASTA/Q file(s) to MMseqs sequence DB format
+*mmseqs createdb <i:fastaFile1[.gz|.bz2]> ... <i:fastaFileN[.gz|.bz2]>|<i:stdin> <o:sequenceDB> [options]*
+* Add taxonomic labels to sequence DB
+*mmseqs createtaxdb <i:sequenceDB> <tmpDir> [options]*
+* Filter taxonomy sequence database
+*mmseqs filtertaxseqdb <i:taxSeqDB> <o:taxSeqDB> [options]*
+* Taxonomy assignment by computing the lowest common ancestor of homologs
+*mmseqs taxonomy <i:queryDB> <i:targetDB> <o:taxaDB> <tmpDir> [options]*
+* Convert result DB to tab-separated flat file
+*mmseqs createtsv <i:queryDB> [<i:targetDB>] <i:resultDB> <o:tsvFile> [options]*
+* Create a taxonomy report in Kraken or Krona format
+*mmseqs taxonomyreport <i:seqTaxDB> <i:taxResultDB/resultDB/sequenceDB> <o:taxonomyReport> [options]*
+https://github.com/soedinglab/MMseqs2
+]]></help>
+<expand macro="citations"/>
+</tool>

Mercurial > repos > iuc > mmseqs2_taxonomy_assignment

comparison mmseqs2_taxonomy_assignment.xml @ 0:d0acde079e2e draft