comparison qiime2-2020.8/qiime_feature-classifier_classify-hybrid-vsearch-sklearn.xml @ 20:d93d8888f0b0 draft

Uploaded
author florianbegusch
date Fri, 04 Sep 2020 12:44:24 +0000
parents
children
comparison
equal deleted inserted replaced
19:6c48f8d82424 20:d93d8888f0b0
1 <?xml version="1.0" ?>
2 <tool id="qiime_feature-classifier_classify-hybrid-vsearch-sklearn" name="qiime feature-classifier classify-hybrid-vsearch-sklearn"
3 version="2020.8">
4 <description> ALPHA Hybrid classifier: VSEARCH exact match + sklearn classifier</description>
5 <requirements>
6 <requirement type="package" version="2020.8">qiime2</requirement>
7 </requirements>
8 <command><![CDATA[
9 qiime feature-classifier classify-hybrid-vsearch-sklearn
10
11 --i-query=$iquery
12
13 --i-reference-reads=$ireferencereads
14
15 --i-reference-taxonomy=$ireferencetaxonomy
16
17 --i-classifier=$iclassifier
18
19 #if str($pmaxaccepts) != 'None':
20 --p-maxaccepts=$pmaxaccepts
21 #end if
22
23 --p-perc-identity=$ppercidentity
24
25 --p-query-cov=$pquerycov
26
27 #if str($pstrand) != 'None':
28 --p-strand=$pstrand
29 #end if
30
31 --p-min-consensus=$pminconsensus
32
33 #if str($pmaxhits) != 'None':
34 --p-maxhits=$pmaxhits
35 #end if
36
37 #if str($pmaxrejects) != 'None':
38 --p-maxrejects=$pmaxrejects
39 #end if
40
41 #if str($pconfidence) != 'None':
42 --p-confidence=$pconfidence
43 #end if
44
45 #if str($preadorientation) != 'None':
46 --p-read-orientation=$preadorientation
47 #end if
48
49 --p-threads=$pthreads
50
51 #if $pnoprefilter:
52 --p-no-prefilter
53 #end if
54
55 --p-sample-size=$psamplesize
56
57 --p-randseed=$prandseed
58
59 --o-classification=oclassification
60
61 #if str($examples) != 'None':
62 --examples=$examples
63 #end if
64
65 ;
66 cp oclassification.qza $oclassification
67
68 ]]></command>
69 <inputs>
70 <param format="qza,no_unzip.zip" label="--i-query: ARTIFACT FeatureData[Sequence] Sequences to classify taxonomically. [required]" name="iquery" optional="False" type="data" />
71 <param format="qza,no_unzip.zip" label="--i-reference-reads: ARTIFACT FeatureData[Sequence] reference sequences. [required]" name="ireferencereads" optional="False" type="data" />
72 <param format="qza,no_unzip.zip" label="--i-reference-taxonomy: ARTIFACT FeatureData[Taxonomy] reference taxonomy labels. [required]" name="ireferencetaxonomy" optional="False" type="data" />
73 <param format="qza,no_unzip.zip" label="--i-classifier: ARTIFACT TaxonomicClassifier Pre-trained sklearn taxonomic classifier for classifying the reads. [required]" name="iclassifier" optional="False" type="data" />
74 <param label="--p-maxaccepts: " name="pmaxaccepts" optional="True" type="select">
75 <option selected="True" value="None">Selection is Optional</option>
76 <option value="Int % Range(1">Int % Range(1</option>
77 <option value="None">None</option>
78 </param>
79 <param exclude_max="False" label="--p-perc-identity: PROPORTION Range(0.0, 1.0, inclusive_end=True) Percent sequence similarity to use for PREFILTER. Reject match if percent identity to query is lower. Set to a lower value to perform a rough pre-filter. This parameter is ignored if `prefilter` is disabled. [default: 0.5]" max="1.0" min="0.0" name="ppercidentity" optional="True" type="float" value="0.5" />
80 <param exclude_max="False" label="--p-query-cov: PROPORTION Range(0.0, 1.0, inclusive_end=True) Query coverage threshold to use for PREFILTER. Reject match if query alignment coverage per high-scoring pair is lower. Set to a lower value to perform a rough pre-filter. This parameter is ignored if `prefilter` is disabled. [default: 0.8]" max="1.0" min="0.0" name="pquerycov" optional="True" type="float" value="0.8" />
81 <param label="--p-strand: " name="pstrand" optional="True" type="select">
82 <option selected="True" value="None">Selection is Optional</option>
83 <option value="both">both</option>
84 <option value="plus">plus</option>
85 </param>
86 <param exclude_max="False" exclude_min="True" label="--p-min-consensus: NUMBER Range(0.5, 1.0, inclusive_start=False, inclusive_end=True) Minimum fraction of assignments must match top hit to be accepted as consensus assignment. [default: 0.51]" max="1.0" min="0.5" name="pminconsensus" optional="True" type="float" value="0.51" />
87 <param label="--p-maxhits: " name="pmaxhits" optional="True" type="select">
88 <option selected="True" value="None">Selection is Optional</option>
89 <option value="Int % Range(1">Int % Range(1</option>
90 <option value="None">None</option>
91 </param>
92 <param label="--p-maxrejects: " name="pmaxrejects" optional="True" type="select">
93 <option selected="True" value="None">Selection is Optional</option>
94 <option value="Int % Range(1">Int % Range(1</option>
95 <option value="None">None</option>
96 </param>
97 <param label="--p-confidence: " name="pconfidence" optional="True" type="select">
98 <option selected="True" value="None">Selection is Optional</option>
99 <option value="Float % Range(0">Float % Range(0</option>
100 <option value="1">1</option>
101 <option value="inclusive_end=True">inclusive_end=True</option>
102 </param>
103 <param label="--p-read-orientation: " name="preadorientation" optional="True" type="select">
104 <option selected="True" value="None">Selection is Optional</option>
105 <option value="same">same</option>
106 <option value="reverse-complement">reverse-complement</option>
107 <option value="auto">auto</option>
108 </param>
109 <param label="--p-no-prefilter: Do not toggle positive filter of query sequences on or off. [default: True]" name="pnoprefilter" selected="False" type="boolean" />
110 <param label="--p-sample-size: INTEGER Range(1, None) Randomly extract the given number of sequences from the reference database to use for prefiltering. This parameter is ignored if `prefilter` is disabled. [default: 1000]" min="1" name="psamplesize" optional="True" type="integer" value="1000" />
111 <param label="--p-randseed: INTEGER Use integer as a seed for the pseudo-random generator Range(0, None) used during prefiltering. A given seed always produces the same output, which is useful for replicability. Set to 0 to use a pseudo-random seed. This parameter is ignored if `prefilter` is disabled. [default: 0]" min="0" name="prandseed" optional="True" type="integer" value="0" />
112 <param label="--examples: Show usage examples and exit." name="examples" optional="False" type="data" />
113
114 </inputs>
115
116 <outputs>
117 <data format="qza" label="${tool.name} on ${on_string}: classification.qza" name="oclassification" />
118
119 </outputs>
120
121 <help><![CDATA[
122 ALPHA Hybrid classifier: VSEARCH exact match + sklearn classifier
123 ###############################################################
124
125 NOTE: THIS PIPELINE IS AN ALPHA RELEASE. Please report bugs to
126 https://forum.qiime2.org! Assign taxonomy to query sequences using hybrid
127 classifier. First performs rough positive filter to remove artifact and
128 low-coverage sequences (use "prefilter" parameter to toggle this step on or
129 off). Second, performs VSEARCH exact match between query and
130 reference_reads to find exact matches, followed by least common ancestor
131 consensus taxonomy assignment from among maxaccepts top hits, min_consensus
132 of which share that taxonomic assignment. Query sequences without an exact
133 match are then classified with a pre-trained sklearn taxonomy classifier to
134 predict the most likely taxonomic lineage.
135
136 Parameters
137 ----------
138 query : FeatureData[Sequence]
139 Sequences to classify taxonomically.
140 reference_reads : FeatureData[Sequence]
141 reference sequences.
142 reference_taxonomy : FeatureData[Taxonomy]
143 reference taxonomy labels.
144 classifier : TaxonomicClassifier
145 Pre-trained sklearn taxonomic classifier for classifying the reads.
146 maxaccepts : Int % Range(1, None) | Str % Choices('all'), optional
147 Maximum number of hits to keep for each query. Set to "all" to keep all
148 hits > perc_identity similarity. Note that if strand=both, maxaccepts
149 will keep N hits for each direction (if searches in the opposite
150 direction yield results that exceed the minimum perc_identity). In
151 those cases use maxhits to control the total number of hits returned.
152 This option works in pair with maxrejects. The search process sorts
153 target sequences by decreasing number of k-mers they have in common
154 with the query sequence, using that information as a proxy for sequence
155 similarity. After pairwise alignments, if the first target sequence
156 passes the acceptation criteria, it is accepted as best hit and the
157 search process stops for that query. If maxaccepts is set to a higher
158 value, more hits are accepted. If maxaccepts and maxrejects are both
159 set to "all", the complete database is searched.
160 perc_identity : Float % Range(0.0, 1.0, inclusive_end=True), optional
161 Percent sequence similarity to use for PREFILTER. Reject match if
162 percent identity to query is lower. Set to a lower value to perform a
163 rough pre-filter. This parameter is ignored if `prefilter` is disabled.
164 query_cov : Float % Range(0.0, 1.0, inclusive_end=True), optional
165 Query coverage threshold to use for PREFILTER. Reject match if query
166 alignment coverage per high-scoring pair is lower. Set to a lower value
167 to perform a rough pre-filter. This parameter is ignored if `prefilter`
168 is disabled.
169 strand : Str % Choices('both', 'plus'), optional
170 Align against reference sequences in forward ("plus") or both
171 directions ("both").
172 min_consensus : Float % Range(0.5, 1.0, inclusive_start=False, inclusive_end=True), optional
173 Minimum fraction of assignments must match top hit to be accepted as
174 consensus assignment.
175 maxhits : Int % Range(1, None) | Str % Choices('all'), optional
176 maxrejects : Int % Range(1, None) | Str % Choices('all'), optional
177 reads_per_batch : Int % Range(0, None), optional
178 Number of reads to process in each batch for sklearn classification. If
179 "auto", this parameter is autoscaled to min(number of query sequences /
180 threads, 20000).
181 confidence : Float % Range(0, 1, inclusive_end=True) | Str % Choices('disable'), optional
182 Confidence threshold for limiting taxonomic depth. Set to "disable" to
183 disable confidence calculation, or 0 to calculate confidence but not
184 apply it to limit the taxonomic depth of the assignments.
185 read_orientation : Str % Choices('same', 'reverse-complement', 'auto'), optional
186 Direction of reads with respect to reference sequences in pre-trained
187 sklearn classifier. same will cause reads to be classified unchanged;
188 reverse-complement will cause reads to be reversed and complemented
189 prior to classification. "auto" will autodetect orientation based on
190 the confidence estimates for the first 100 reads.
191 threads : Int % Range(1, None), optional
192 Number of threads to use for job parallelization.
193 prefilter : Bool, optional
194 Toggle positive filter of query sequences on or off.
195 sample_size : Int % Range(1, None), optional
196 Randomly extract the given number of sequences from the reference
197 database to use for prefiltering. This parameter is ignored if
198 `prefilter` is disabled.
199 randseed : Int % Range(0, None), optional
200 Use integer as a seed for the pseudo-random generator used during
201 prefiltering. A given seed always produces the same output, which is
202 useful for replicability. Set to 0 to use a pseudo-random seed. This
203 parameter is ignored if `prefilter` is disabled.
204
205 Returns
206 -------
207 classification : FeatureData[Taxonomy]
208 The resulting taxonomy classifications.
209 ]]></help>
210 <macros>
211 <import>qiime_citation.xml</import>
212 </macros>
213 <expand macro="qiime_citation"/>
214 </tool>