diff qiime2/qiime_feature-classifier_classify-hybrid-vsearch-sklearn.xml @ 14:a0a8d77a991c draft

Uploaded
author florianbegusch
date Thu, 03 Sep 2020 09:51:29 +0000
parents f190567fe3f6
children
line wrap: on
line diff
--- a/qiime2/qiime_feature-classifier_classify-hybrid-vsearch-sklearn.xml	Thu Sep 03 09:46:00 2020 +0000
+++ b/qiime2/qiime_feature-classifier_classify-hybrid-vsearch-sklearn.xml	Thu Sep 03 09:51:29 2020 +0000
@@ -1,168 +1,126 @@
 <?xml version="1.0" ?>
-<tool id="qiime_feature-classifier_classify-hybrid-vsearch-sklearn" name="qiime feature-classifier classify-hybrid-vsearch-sklearn" version="2019.7">
-	<description> -  ALPHA Hybrid classifier: VSEARCH exact match + sklearn classifier</description>
-	<requirements>
-		<requirement type="package" version="2019.7">qiime2</requirement>
-	</requirements>
-	<command><![CDATA[
+<tool id="qiime_feature-classifier_classify-hybrid-vsearch-sklearn" name="qiime feature-classifier classify-hybrid-vsearch-sklearn"
+      version="2020.8">
+  <description> ALPHA Hybrid classifier: VSEARCH exact match + sklearn classifier</description>
+  <requirements>
+    <requirement type="package" version="2020.8">qiime2</requirement>
+  </requirements>
+  <command><![CDATA[
 qiime feature-classifier classify-hybrid-vsearch-sklearn
-    
+
 --i-query=$iquery
+
 --i-reference-reads=$ireferencereads
 
-
-
-
-#if str( $id_to_taxonomy_fp.selector ) == 'history'
-#set $tax = $id_to_taxonomy_fp.taxonomy_fp
---i-reference-taxonomy '$tax'
-#else:
-#set $tax = $id_to_taxonomy_fp.taxonomy_fp.fields.path
---i-reference-taxonomy '$tax'
-#end if
-
-
-
-
+--i-reference-taxonomy=$ireferencetaxonomy
 
-#if str( $id_to_classifier_fp.selector ) == 'history'
-#set $classifier = $id_to_classifier_fp.classifier_fp
---i-classifier '$classifier'
-#else:
-#set $classifier = $id_to_classifier_fp.classifier_fp.fields.path
---i-classifier '$classifier'
-#end if
-
+--i-classifier=$iclassifier
 
-
-
-
-#if str($pmaxaccepts):
- --p-maxaccepts=$pmaxaccepts
-#end if
-
-#if str($pconfidence):
- --p-confidence=$pconfidence
+#if str($pmaxaccepts) != 'None':
+--p-maxaccepts=$pmaxaccepts
 #end if
 
-
-
+--p-perc-identity=$ppercidentity
 
-#if str($ppercidentity):
- --p-perc-identity=$ppercidentity
-#end if
+--p-query-cov=$pquerycov
 
-#if str($pquerycov):
- --p-query-cov=$pquerycov
+#if str($pstrand) != 'None':
+--p-strand=$pstrand
 #end if
 
-#if str($pstrand) != 'None':
- --p-strand=$pstrand
+--p-min-consensus=$pminconsensus
+
+#if str($pmaxhits) != 'None':
+--p-maxhits=$pmaxhits
 #end if
 
-#if str($pminconsensus):
- --p-min-consensus=$pminconsensus
-#end if
-
-
-#if str($preadorientation) != 'None':
- --p-read-orientation=$preadorientation
+#if str($pmaxrejects) != 'None':
+--p-maxrejects=$pmaxrejects
 #end if
 
-#set $pthreads = '${GALAXY_SLOTS:-4}'
-
-#if str($pthreads):
-
-#if str($pthreads):
- --p-threads="$pthreads"
+#if str($pconfidence) != 'None':
+--p-confidence=$pconfidence
 #end if
 
+#if str($preadorientation) != 'None':
+--p-read-orientation=$preadorientation
 #end if
 
+--p-threads=$pthreads
 
-#if $pprefilter:
- --p-prefilter
+#if $pnoprefilter:
+ --p-no-prefilter
 #end if
 
-#if str($psamplesize):
- --p-sample-size=$psamplesize
-#end if
+--p-sample-size=$psamplesize
 
-#if str($prandseed):
- --p-randseed=$prandseed
-#end if
-
+--p-randseed=$prandseed
 
 --o-classification=oclassification
 
+#if str($examples) != 'None':
+--examples=$examples
+#end if
+
 ;
 cp oclassification.qza $oclassification
-	]]></command>
-	<inputs>
-		<param format="qza,no_unzip.zip" label="--i-query: ARTIFACT FeatureData[Sequence] Sequences to classify taxonomically.        [required]" name="iquery" optional="False" type="data"/>
-		<param format="qza,no_unzip.zip" label="--i-reference-reads: ARTIFACT FeatureData[Sequence] reference sequences.                        [required]" name="ireferencereads" optional="False" type="data"/>
-
-
-		<conditional name="id_to_taxonomy_fp" optional="True">
-		   <param name="selector" type="select" label="Reference taxonomy to query">
-			  <option value="cached">Public databases</option>
-			  <option value="history">Databases from your history</option>
-		   </param>
-		   <when value="cached">
-			  <param argument="--taxonomy_fp" label="Reference taxonomy" type="select" optional="True">
-				 <options from_data_table="qiime_taxonomy" />
-			  </param>
-		   </when>
-		   <when value="history">
-			  <param argument="--taxonomy_fp" type="data" format="qza,no_unzip.zip" label="Reference databases" optional="True" />
-		   </when>
-		</conditional>
-
 
-		<conditional name="id_to_classifier_fp" optional="True">
-		   <param name="selector" type="select" label="Reference classifier to query">
-			  <option value="cached">Public classifiers</option>
-			  <option value="history">Classifiers from your history</option>
-		   </param>
-		   <when value="cached">
-			  <param name="classifier_fp" label="Reference classifier" type="select" optional="True">
-				 <options from_data_table="qiime_rep_set" />
-			  </param>
-		   </when>
-		   <when value="history">
-			  <param name="classifier_fp" type="data" format="qza,no_unzip.zip" label="Reference classifier" optional="True" />
-		   </when>
-		</conditional>
-
-
-		<param label="--p-maxaccepts: VALUE Int % Range(1, None) | Str % Choices('all') Maximum number of hits to keep for each query. Set to 'all' to keep all hits > perc-identity similarity.  [default: 10]" name="pmaxaccepts" optional="True" type="text" value="10" />
-		<param label="--p-confidence: VALUE Float % Range(0, 1, inclusive_end=True) | Str % Choices('disable')  Confidence threshold for limiting taxonomic depth. Set to 'disable' to disable confidence calculation, or 0 to calculate confidence but not apply it to limit the taxonomic depth of the assignments. [default: 0.7]" name="pconfidence" optional="True" type="text" value="0.7" />
-
+  ]]></command>
+  <inputs>
+    <param format="qza,no_unzip.zip" label="--i-query: ARTIFACT FeatureData[Sequence] Sequences to classify taxonomically.        [required]" name="iquery" optional="False" type="data" />
+    <param format="qza,no_unzip.zip" label="--i-reference-reads: ARTIFACT FeatureData[Sequence] reference sequences.                        [required]" name="ireferencereads" optional="False" type="data" />
+    <param format="qza,no_unzip.zip" label="--i-reference-taxonomy: ARTIFACT FeatureData[Taxonomy] reference taxonomy labels.                  [required]" name="ireferencetaxonomy" optional="False" type="data" />
+    <param format="qza,no_unzip.zip" label="--i-classifier: ARTIFACT TaxonomicClassifier Pre-trained sklearn taxonomic classifier for classifying the reads.                      [required]" name="iclassifier" optional="False" type="data" />
+    <param label="--p-maxaccepts: " name="pmaxaccepts" optional="True" type="select">
+      <option selected="True" value="None">Selection is Optional</option>
+      <option value="Int % Range(1">Int % Range(1</option>
+      <option value="None">None</option>
+    </param>
+    <param exclude_max="False" label="--p-perc-identity: PROPORTION Range(0.0, 1.0, inclusive_end=True) Percent sequence similarity to use for PREFILTER. Reject match if percent identity to query is lower. Set to a lower value to perform a rough pre-filter. This parameter is ignored if `prefilter` is disabled. [default: 0.5]" max="1.0" min="0.0" name="ppercidentity" optional="True" type="float" value="0.5" />
+    <param exclude_max="False" label="--p-query-cov: PROPORTION Range(0.0, 1.0, inclusive_end=True) Query coverage threshold to use for PREFILTER. Reject match if query alignment coverage per high-scoring pair is lower. Set to a lower value to perform a rough pre-filter. This parameter is ignored if `prefilter` is disabled.                            [default: 0.8]" max="1.0" min="0.0" name="pquerycov" optional="True" type="float" value="0.8" />
+    <param label="--p-strand: " name="pstrand" optional="True" type="select">
+      <option selected="True" value="None">Selection is Optional</option>
+      <option value="both">both</option>
+      <option value="plus">plus</option>
+    </param>
+    <param exclude_max="False" exclude_min="True" label="--p-min-consensus: NUMBER Range(0.5, 1.0, inclusive_start=False, inclusive_end=True) Minimum fraction of assignments must match top hit to be accepted as consensus assignment.   [default: 0.51]" max="1.0" min="0.5" name="pminconsensus" optional="True" type="float" value="0.51" />
+    <param label="--p-maxhits: " name="pmaxhits" optional="True" type="select">
+      <option selected="True" value="None">Selection is Optional</option>
+      <option value="Int % Range(1">Int % Range(1</option>
+      <option value="None">None</option>
+    </param>
+    <param label="--p-maxrejects: " name="pmaxrejects" optional="True" type="select">
+      <option selected="True" value="None">Selection is Optional</option>
+      <option value="Int % Range(1">Int % Range(1</option>
+      <option value="None">None</option>
+    </param>
+    <param label="--p-confidence: " name="pconfidence" optional="True" type="select">
+      <option selected="True" value="None">Selection is Optional</option>
+      <option value="Float % Range(0">Float % Range(0</option>
+      <option value="1">1</option>
+      <option value="inclusive_end=True">inclusive_end=True</option>
+    </param>
+    <param label="--p-read-orientation: " name="preadorientation" optional="True" type="select">
+      <option selected="True" value="None">Selection is Optional</option>
+      <option value="same">same</option>
+      <option value="reverse-complement">reverse-complement</option>
+      <option value="auto">auto</option>
+    </param>
+    <param label="--p-no-prefilter: Do not toggle positive filter of query sequences on or off. [default: True]" name="pnoprefilter" selected="False" type="boolean" />
+    <param label="--p-sample-size: INTEGER Range(1, None)      Randomly extract the given number of sequences from the reference database to use for prefiltering. This parameter is ignored if `prefilter` is disabled. [default: 1000]" min="1" name="psamplesize" optional="True" type="integer" value="1000" />
+    <param label="--p-randseed: INTEGER  Use integer as a seed for the pseudo-random generator Range(0, None)      used during prefiltering. A given seed always produces the same output, which is useful for replicability. Set to 0 to use a pseudo-random seed. This parameter is ignored if `prefilter` is disabled.    [default: 0]" min="0" name="prandseed" optional="True" type="integer" value="0" />
+    <param label="--examples: Show usage examples and exit." name="examples" optional="False" type="data" />
+    
+  </inputs>
 
-		<param label="--p-perc-identity: PROPORTION Range(0.0, 1.0, inclusive_end=True) Percent sequence similarity to use for PREFILTER. Reject match if percent identity to query is lower. Set to a lower value to perform a rough pre-filter. This parameter is ignored if `prefilter` is disabled. [default: 0.5]" name="ppercidentity" optional="True" type="float" value="0.5" min="0" max="1" exclusive_end="False" />
-		<param label="--p-query-cov: PROPORTION Range(0.0, 1.0, inclusive_end=True) Query coverage threshold to use for PREFILTER. Reject match if query alignment coverage per high-scoring pair is lower. Set to a lower value to perform a rough pre-filter. This parameter is ignored if `prefilter` is disabled.                            [default: 0.8]" name="pquerycov" optional="True" type="float" value="0.8" min="0" max="1" exclusive_end="False" />
-		<param label="--p-strand: " name="pstrand" optional="True" type="select">
-			<option selected="True" value="None">Selection is Optional</option>
-			<option value="both">both</option>
-			<option value="plus">plus</option>
-		</param>
-		<param label="--p-min-consensus: NUMBER Range(0.5, 1.0, inclusive_start=False, inclusive_end=True) Minimum fraction of assignments must match top hit to be accepted as consensus assignment.   [default: 0.51]" name="pminconsensus" optional="True" type="float" value="0.51" min="0.5" max="1" exclusive_end="True" />
-		<param label="--p-read-orientation: TEXT Choices('same', 'reverse-complement', 'auto') Direction of reads with respect to reference sequences in pre-trained sklearn classifier. same will cause reads to be classified unchanged; reverse-complement will cause reads to be reversed and complemented prior to classification. 'auto' will autodetect orientation based on the confidence estimates for the first 100 reads.   [default: 'auto'] " name="preadorientation" optional="True" type="select" >
-			<option value="None">Selection is Optional</option>
-			<option value="same">same</option>
-			<option value="reverse-complement">reverse-complement</option>
-			<option selected="True" value="auto">auto</option>
-		</param>
-		<param label="--p-prefilter: --p-no-prefilter Toggle positive filter of query sequences on or off. [default: True]" name="pprefilter" selected="False" type="boolean"/>
-		<param label="--p-sample-size: INTEGER Range(1, None)      Randomly extract the given number of sequences from the reference database to use for prefiltering. This parameter is ignored if `prefilter` is disabled. [default: 1000]" name="psamplesize" optional="True" type="integer" value="1000" min="1"/>
-		<param label="--p-randseed: INTEGER  Use integer as a seed for the pseudo-random generator Range(0, None)      used during prefiltering. A given seed always produces the same output, which is useful for replicability. Set to 0 to use a pseudo-random seed. This parameter is ignored if `prefilter` is disabled.    [default: 0]" name="prandseed" optional="True" type="integer" value="0" min="0"/>
-	</inputs>
-	<outputs>
-		<data format="qza" label="${tool.name} on ${on_string}: classification.qza" name="oclassification"/>
-	</outputs>
-	<help><![CDATA[
-ALPHA Hybrid classifier: VSEARCH exact match + sklearn classifier
-##################################################################
+  <outputs>
+    <data format="qza" label="${tool.name} on ${on_string}: classification.qza" name="oclassification" />
+    
+  </outputs>
+
+  <help><![CDATA[
+ ALPHA Hybrid classifier: VSEARCH exact match + sklearn classifier
+###############################################################
 
 NOTE: THIS PIPELINE IS AN ALPHA RELEASE. Please report bugs to
 https://forum.qiime2.org! Assign taxonomy to query sequences using hybrid
@@ -187,7 +145,18 @@
     Pre-trained sklearn taxonomic classifier for classifying the reads.
 maxaccepts : Int % Range(1, None) | Str % Choices('all'), optional
     Maximum number of hits to keep for each query. Set to "all" to keep all
-    hits > perc_identity similarity.
+    hits > perc_identity similarity. Note that if strand=both, maxaccepts
+    will keep N hits for each direction (if searches in the opposite
+    direction yield results that exceed the minimum perc_identity). In
+    those cases use maxhits to control the total number of hits returned.
+    This option works in pair with maxrejects. The search process sorts
+    target sequences by decreasing number of k-mers they have in common
+    with the query sequence, using that information as a proxy for sequence
+    similarity. After pairwise alignments, if the first target sequence
+    passes the acceptation criteria, it is accepted as best hit and the
+    search process stops for that query. If maxaccepts is set to a higher
+    value, more hits are accepted. If maxaccepts and maxrejects are both
+    set to "all", the complete database is searched.
 perc_identity : Float % Range(0.0, 1.0, inclusive_end=True), optional
     Percent sequence similarity to use for PREFILTER. Reject match if
     percent identity to query is lower. Set to a lower value to perform a
@@ -203,6 +172,8 @@
 min_consensus : Float % Range(0.5, 1.0, inclusive_start=False, inclusive_end=True), optional
     Minimum fraction of assignments must match top hit to be accepted as
     consensus assignment.
+maxhits : Int % Range(1, None) | Str % Choices('all'), optional
+maxrejects : Int % Range(1, None) | Str % Choices('all'), optional
 reads_per_batch : Int % Range(0, None), optional
     Number of reads to process in each batch for sklearn classification. If
     "auto", this parameter is autoscaled to min(number of query sequences /
@@ -217,6 +188,8 @@
     reverse-complement will cause reads to be reversed and complemented
     prior to classification. "auto" will autodetect orientation based on
     the confidence estimates for the first 100 reads.
+threads : Int % Range(1, None), optional
+    Number of threads to use for job parallelization.
 prefilter : Bool, optional
     Toggle positive filter of query sequences on or off.
 sample_size : Int % Range(1, None), optional
@@ -233,9 +206,9 @@
 -------
 classification : FeatureData[Taxonomy]
     The resulting taxonomy classifications.
-	]]></help>
-<macros>
+  ]]></help>
+  <macros>
     <import>qiime_citation.xml</import>
-</macros>
-<expand macro="qiime_citation"/>
-</tool>
+  </macros>
+  <expand macro="qiime_citation"/>
+</tool>
\ No newline at end of file