Mercurial > repos > florianbegusch > qiime2_suite
diff qiime2/qiime_feature-classifier_classify-hybrid-vsearch-sklearn.xml @ 14:a0a8d77a991c draft
Uploaded
author | florianbegusch |
---|---|
date | Thu, 03 Sep 2020 09:51:29 +0000 |
parents | f190567fe3f6 |
children |
line wrap: on
line diff
--- a/qiime2/qiime_feature-classifier_classify-hybrid-vsearch-sklearn.xml Thu Sep 03 09:46:00 2020 +0000 +++ b/qiime2/qiime_feature-classifier_classify-hybrid-vsearch-sklearn.xml Thu Sep 03 09:51:29 2020 +0000 @@ -1,168 +1,126 @@ <?xml version="1.0" ?> -<tool id="qiime_feature-classifier_classify-hybrid-vsearch-sklearn" name="qiime feature-classifier classify-hybrid-vsearch-sklearn" version="2019.7"> - <description> - ALPHA Hybrid classifier: VSEARCH exact match + sklearn classifier</description> - <requirements> - <requirement type="package" version="2019.7">qiime2</requirement> - </requirements> - <command><![CDATA[ +<tool id="qiime_feature-classifier_classify-hybrid-vsearch-sklearn" name="qiime feature-classifier classify-hybrid-vsearch-sklearn" + version="2020.8"> + <description> ALPHA Hybrid classifier: VSEARCH exact match + sklearn classifier</description> + <requirements> + <requirement type="package" version="2020.8">qiime2</requirement> + </requirements> + <command><![CDATA[ qiime feature-classifier classify-hybrid-vsearch-sklearn - + --i-query=$iquery + --i-reference-reads=$ireferencereads - - - -#if str( $id_to_taxonomy_fp.selector ) == 'history' -#set $tax = $id_to_taxonomy_fp.taxonomy_fp ---i-reference-taxonomy '$tax' -#else: -#set $tax = $id_to_taxonomy_fp.taxonomy_fp.fields.path ---i-reference-taxonomy '$tax' -#end if - - - - +--i-reference-taxonomy=$ireferencetaxonomy -#if str( $id_to_classifier_fp.selector ) == 'history' -#set $classifier = $id_to_classifier_fp.classifier_fp ---i-classifier '$classifier' -#else: -#set $classifier = $id_to_classifier_fp.classifier_fp.fields.path ---i-classifier '$classifier' -#end if - +--i-classifier=$iclassifier - - - -#if str($pmaxaccepts): - --p-maxaccepts=$pmaxaccepts -#end if - -#if str($pconfidence): - --p-confidence=$pconfidence +#if str($pmaxaccepts) != 'None': +--p-maxaccepts=$pmaxaccepts #end if - - +--p-perc-identity=$ppercidentity -#if str($ppercidentity): - --p-perc-identity=$ppercidentity -#end if +--p-query-cov=$pquerycov -#if str($pquerycov): - --p-query-cov=$pquerycov +#if str($pstrand) != 'None': +--p-strand=$pstrand #end if -#if str($pstrand) != 'None': - --p-strand=$pstrand +--p-min-consensus=$pminconsensus + +#if str($pmaxhits) != 'None': +--p-maxhits=$pmaxhits #end if -#if str($pminconsensus): - --p-min-consensus=$pminconsensus -#end if - - -#if str($preadorientation) != 'None': - --p-read-orientation=$preadorientation +#if str($pmaxrejects) != 'None': +--p-maxrejects=$pmaxrejects #end if -#set $pthreads = '${GALAXY_SLOTS:-4}' - -#if str($pthreads): - -#if str($pthreads): - --p-threads="$pthreads" +#if str($pconfidence) != 'None': +--p-confidence=$pconfidence #end if +#if str($preadorientation) != 'None': +--p-read-orientation=$preadorientation #end if +--p-threads=$pthreads -#if $pprefilter: - --p-prefilter +#if $pnoprefilter: + --p-no-prefilter #end if -#if str($psamplesize): - --p-sample-size=$psamplesize -#end if +--p-sample-size=$psamplesize -#if str($prandseed): - --p-randseed=$prandseed -#end if - +--p-randseed=$prandseed --o-classification=oclassification +#if str($examples) != 'None': +--examples=$examples +#end if + ; cp oclassification.qza $oclassification - ]]></command> - <inputs> - <param format="qza,no_unzip.zip" label="--i-query: ARTIFACT FeatureData[Sequence] Sequences to classify taxonomically. [required]" name="iquery" optional="False" type="data"/> - <param format="qza,no_unzip.zip" label="--i-reference-reads: ARTIFACT FeatureData[Sequence] reference sequences. [required]" name="ireferencereads" optional="False" type="data"/> - - - <conditional name="id_to_taxonomy_fp" optional="True"> - <param name="selector" type="select" label="Reference taxonomy to query"> - <option value="cached">Public databases</option> - <option value="history">Databases from your history</option> - </param> - <when value="cached"> - <param argument="--taxonomy_fp" label="Reference taxonomy" type="select" optional="True"> - <options from_data_table="qiime_taxonomy" /> - </param> - </when> - <when value="history"> - <param argument="--taxonomy_fp" type="data" format="qza,no_unzip.zip" label="Reference databases" optional="True" /> - </when> - </conditional> - - <conditional name="id_to_classifier_fp" optional="True"> - <param name="selector" type="select" label="Reference classifier to query"> - <option value="cached">Public classifiers</option> - <option value="history">Classifiers from your history</option> - </param> - <when value="cached"> - <param name="classifier_fp" label="Reference classifier" type="select" optional="True"> - <options from_data_table="qiime_rep_set" /> - </param> - </when> - <when value="history"> - <param name="classifier_fp" type="data" format="qza,no_unzip.zip" label="Reference classifier" optional="True" /> - </when> - </conditional> - - - <param label="--p-maxaccepts: VALUE Int % Range(1, None) | Str % Choices('all') Maximum number of hits to keep for each query. Set to 'all' to keep all hits > perc-identity similarity. [default: 10]" name="pmaxaccepts" optional="True" type="text" value="10" /> - <param label="--p-confidence: VALUE Float % Range(0, 1, inclusive_end=True) | Str % Choices('disable') Confidence threshold for limiting taxonomic depth. Set to 'disable' to disable confidence calculation, or 0 to calculate confidence but not apply it to limit the taxonomic depth of the assignments. [default: 0.7]" name="pconfidence" optional="True" type="text" value="0.7" /> - + ]]></command> + <inputs> + <param format="qza,no_unzip.zip" label="--i-query: ARTIFACT FeatureData[Sequence] Sequences to classify taxonomically. [required]" name="iquery" optional="False" type="data" /> + <param format="qza,no_unzip.zip" label="--i-reference-reads: ARTIFACT FeatureData[Sequence] reference sequences. [required]" name="ireferencereads" optional="False" type="data" /> + <param format="qza,no_unzip.zip" label="--i-reference-taxonomy: ARTIFACT FeatureData[Taxonomy] reference taxonomy labels. [required]" name="ireferencetaxonomy" optional="False" type="data" /> + <param format="qza,no_unzip.zip" label="--i-classifier: ARTIFACT TaxonomicClassifier Pre-trained sklearn taxonomic classifier for classifying the reads. [required]" name="iclassifier" optional="False" type="data" /> + <param label="--p-maxaccepts: " name="pmaxaccepts" optional="True" type="select"> + <option selected="True" value="None">Selection is Optional</option> + <option value="Int % Range(1">Int % Range(1</option> + <option value="None">None</option> + </param> + <param exclude_max="False" label="--p-perc-identity: PROPORTION Range(0.0, 1.0, inclusive_end=True) Percent sequence similarity to use for PREFILTER. Reject match if percent identity to query is lower. Set to a lower value to perform a rough pre-filter. This parameter is ignored if `prefilter` is disabled. [default: 0.5]" max="1.0" min="0.0" name="ppercidentity" optional="True" type="float" value="0.5" /> + <param exclude_max="False" label="--p-query-cov: PROPORTION Range(0.0, 1.0, inclusive_end=True) Query coverage threshold to use for PREFILTER. Reject match if query alignment coverage per high-scoring pair is lower. Set to a lower value to perform a rough pre-filter. This parameter is ignored if `prefilter` is disabled. [default: 0.8]" max="1.0" min="0.0" name="pquerycov" optional="True" type="float" value="0.8" /> + <param label="--p-strand: " name="pstrand" optional="True" type="select"> + <option selected="True" value="None">Selection is Optional</option> + <option value="both">both</option> + <option value="plus">plus</option> + </param> + <param exclude_max="False" exclude_min="True" label="--p-min-consensus: NUMBER Range(0.5, 1.0, inclusive_start=False, inclusive_end=True) Minimum fraction of assignments must match top hit to be accepted as consensus assignment. [default: 0.51]" max="1.0" min="0.5" name="pminconsensus" optional="True" type="float" value="0.51" /> + <param label="--p-maxhits: " name="pmaxhits" optional="True" type="select"> + <option selected="True" value="None">Selection is Optional</option> + <option value="Int % Range(1">Int % Range(1</option> + <option value="None">None</option> + </param> + <param label="--p-maxrejects: " name="pmaxrejects" optional="True" type="select"> + <option selected="True" value="None">Selection is Optional</option> + <option value="Int % Range(1">Int % Range(1</option> + <option value="None">None</option> + </param> + <param label="--p-confidence: " name="pconfidence" optional="True" type="select"> + <option selected="True" value="None">Selection is Optional</option> + <option value="Float % Range(0">Float % Range(0</option> + <option value="1">1</option> + <option value="inclusive_end=True">inclusive_end=True</option> + </param> + <param label="--p-read-orientation: " name="preadorientation" optional="True" type="select"> + <option selected="True" value="None">Selection is Optional</option> + <option value="same">same</option> + <option value="reverse-complement">reverse-complement</option> + <option value="auto">auto</option> + </param> + <param label="--p-no-prefilter: Do not toggle positive filter of query sequences on or off. [default: True]" name="pnoprefilter" selected="False" type="boolean" /> + <param label="--p-sample-size: INTEGER Range(1, None) Randomly extract the given number of sequences from the reference database to use for prefiltering. This parameter is ignored if `prefilter` is disabled. [default: 1000]" min="1" name="psamplesize" optional="True" type="integer" value="1000" /> + <param label="--p-randseed: INTEGER Use integer as a seed for the pseudo-random generator Range(0, None) used during prefiltering. A given seed always produces the same output, which is useful for replicability. Set to 0 to use a pseudo-random seed. This parameter is ignored if `prefilter` is disabled. [default: 0]" min="0" name="prandseed" optional="True" type="integer" value="0" /> + <param label="--examples: Show usage examples and exit." name="examples" optional="False" type="data" /> + + </inputs> - <param label="--p-perc-identity: PROPORTION Range(0.0, 1.0, inclusive_end=True) Percent sequence similarity to use for PREFILTER. Reject match if percent identity to query is lower. Set to a lower value to perform a rough pre-filter. This parameter is ignored if `prefilter` is disabled. [default: 0.5]" name="ppercidentity" optional="True" type="float" value="0.5" min="0" max="1" exclusive_end="False" /> - <param label="--p-query-cov: PROPORTION Range(0.0, 1.0, inclusive_end=True) Query coverage threshold to use for PREFILTER. Reject match if query alignment coverage per high-scoring pair is lower. Set to a lower value to perform a rough pre-filter. This parameter is ignored if `prefilter` is disabled. [default: 0.8]" name="pquerycov" optional="True" type="float" value="0.8" min="0" max="1" exclusive_end="False" /> - <param label="--p-strand: " name="pstrand" optional="True" type="select"> - <option selected="True" value="None">Selection is Optional</option> - <option value="both">both</option> - <option value="plus">plus</option> - </param> - <param label="--p-min-consensus: NUMBER Range(0.5, 1.0, inclusive_start=False, inclusive_end=True) Minimum fraction of assignments must match top hit to be accepted as consensus assignment. [default: 0.51]" name="pminconsensus" optional="True" type="float" value="0.51" min="0.5" max="1" exclusive_end="True" /> - <param label="--p-read-orientation: TEXT Choices('same', 'reverse-complement', 'auto') Direction of reads with respect to reference sequences in pre-trained sklearn classifier. same will cause reads to be classified unchanged; reverse-complement will cause reads to be reversed and complemented prior to classification. 'auto' will autodetect orientation based on the confidence estimates for the first 100 reads. [default: 'auto'] " name="preadorientation" optional="True" type="select" > - <option value="None">Selection is Optional</option> - <option value="same">same</option> - <option value="reverse-complement">reverse-complement</option> - <option selected="True" value="auto">auto</option> - </param> - <param label="--p-prefilter: --p-no-prefilter Toggle positive filter of query sequences on or off. [default: True]" name="pprefilter" selected="False" type="boolean"/> - <param label="--p-sample-size: INTEGER Range(1, None) Randomly extract the given number of sequences from the reference database to use for prefiltering. This parameter is ignored if `prefilter` is disabled. [default: 1000]" name="psamplesize" optional="True" type="integer" value="1000" min="1"/> - <param label="--p-randseed: INTEGER Use integer as a seed for the pseudo-random generator Range(0, None) used during prefiltering. A given seed always produces the same output, which is useful for replicability. Set to 0 to use a pseudo-random seed. This parameter is ignored if `prefilter` is disabled. [default: 0]" name="prandseed" optional="True" type="integer" value="0" min="0"/> - </inputs> - <outputs> - <data format="qza" label="${tool.name} on ${on_string}: classification.qza" name="oclassification"/> - </outputs> - <help><![CDATA[ -ALPHA Hybrid classifier: VSEARCH exact match + sklearn classifier -################################################################## + <outputs> + <data format="qza" label="${tool.name} on ${on_string}: classification.qza" name="oclassification" /> + + </outputs> + + <help><![CDATA[ + ALPHA Hybrid classifier: VSEARCH exact match + sklearn classifier +############################################################### NOTE: THIS PIPELINE IS AN ALPHA RELEASE. Please report bugs to https://forum.qiime2.org! Assign taxonomy to query sequences using hybrid @@ -187,7 +145,18 @@ Pre-trained sklearn taxonomic classifier for classifying the reads. maxaccepts : Int % Range(1, None) | Str % Choices('all'), optional Maximum number of hits to keep for each query. Set to "all" to keep all - hits > perc_identity similarity. + hits > perc_identity similarity. Note that if strand=both, maxaccepts + will keep N hits for each direction (if searches in the opposite + direction yield results that exceed the minimum perc_identity). In + those cases use maxhits to control the total number of hits returned. + This option works in pair with maxrejects. The search process sorts + target sequences by decreasing number of k-mers they have in common + with the query sequence, using that information as a proxy for sequence + similarity. After pairwise alignments, if the first target sequence + passes the acceptation criteria, it is accepted as best hit and the + search process stops for that query. If maxaccepts is set to a higher + value, more hits are accepted. If maxaccepts and maxrejects are both + set to "all", the complete database is searched. perc_identity : Float % Range(0.0, 1.0, inclusive_end=True), optional Percent sequence similarity to use for PREFILTER. Reject match if percent identity to query is lower. Set to a lower value to perform a @@ -203,6 +172,8 @@ min_consensus : Float % Range(0.5, 1.0, inclusive_start=False, inclusive_end=True), optional Minimum fraction of assignments must match top hit to be accepted as consensus assignment. +maxhits : Int % Range(1, None) | Str % Choices('all'), optional +maxrejects : Int % Range(1, None) | Str % Choices('all'), optional reads_per_batch : Int % Range(0, None), optional Number of reads to process in each batch for sklearn classification. If "auto", this parameter is autoscaled to min(number of query sequences / @@ -217,6 +188,8 @@ reverse-complement will cause reads to be reversed and complemented prior to classification. "auto" will autodetect orientation based on the confidence estimates for the first 100 reads. +threads : Int % Range(1, None), optional + Number of threads to use for job parallelization. prefilter : Bool, optional Toggle positive filter of query sequences on or off. sample_size : Int % Range(1, None), optional @@ -233,9 +206,9 @@ ------- classification : FeatureData[Taxonomy] The resulting taxonomy classifications. - ]]></help> -<macros> + ]]></help> + <macros> <import>qiime_citation.xml</import> -</macros> -<expand macro="qiime_citation"/> -</tool> + </macros> + <expand macro="qiime_citation"/> +</tool> \ No newline at end of file