Mercurial > repos > florianbegusch > qiime2_suite_zmf
comparison qiime2-2020.8/qiime_feature-classifier_classify-hybrid-vsearch-sklearn.xml @ 0:5c352d975ef7 draft
Uploaded
| author | florianbegusch |
|---|---|
| date | Thu, 03 Sep 2020 09:33:04 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:5c352d975ef7 |
|---|---|
| 1 <?xml version="1.0" ?> | |
| 2 <tool id="qiime_feature-classifier_classify-hybrid-vsearch-sklearn" name="qiime feature-classifier classify-hybrid-vsearch-sklearn" | |
| 3 version="2020.8"> | |
| 4 <description> ALPHA Hybrid classifier: VSEARCH exact match + sklearn classifier</description> | |
| 5 <requirements> | |
| 6 <requirement type="package" version="2020.8">qiime2</requirement> | |
| 7 </requirements> | |
| 8 <command><![CDATA[ | |
| 9 qiime feature-classifier classify-hybrid-vsearch-sklearn | |
| 10 | |
| 11 --i-query=$iquery | |
| 12 | |
| 13 --i-reference-reads=$ireferencereads | |
| 14 | |
| 15 --i-reference-taxonomy=$ireferencetaxonomy | |
| 16 | |
| 17 --i-classifier=$iclassifier | |
| 18 | |
| 19 #if str($pmaxaccepts) != 'None': | |
| 20 --p-maxaccepts=$pmaxaccepts | |
| 21 #end if | |
| 22 | |
| 23 --p-perc-identity=$ppercidentity | |
| 24 | |
| 25 --p-query-cov=$pquerycov | |
| 26 | |
| 27 #if str($pstrand) != 'None': | |
| 28 --p-strand=$pstrand | |
| 29 #end if | |
| 30 | |
| 31 --p-min-consensus=$pminconsensus | |
| 32 | |
| 33 #if str($pmaxhits) != 'None': | |
| 34 --p-maxhits=$pmaxhits | |
| 35 #end if | |
| 36 | |
| 37 #if str($pmaxrejects) != 'None': | |
| 38 --p-maxrejects=$pmaxrejects | |
| 39 #end if | |
| 40 | |
| 41 #if str($pconfidence) != 'None': | |
| 42 --p-confidence=$pconfidence | |
| 43 #end if | |
| 44 | |
| 45 #if str($preadorientation) != 'None': | |
| 46 --p-read-orientation=$preadorientation | |
| 47 #end if | |
| 48 | |
| 49 --p-threads=$pthreads | |
| 50 | |
| 51 #if $pnoprefilter: | |
| 52 --p-no-prefilter | |
| 53 #end if | |
| 54 | |
| 55 --p-sample-size=$psamplesize | |
| 56 | |
| 57 --p-randseed=$prandseed | |
| 58 | |
| 59 --o-classification=oclassification | |
| 60 | |
| 61 #if str($examples) != 'None': | |
| 62 --examples=$examples | |
| 63 #end if | |
| 64 | |
| 65 ; | |
| 66 cp oclassification.qza $oclassification | |
| 67 | |
| 68 ]]></command> | |
| 69 <inputs> | |
| 70 <param format="qza,no_unzip.zip" label="--i-query: ARTIFACT FeatureData[Sequence] Sequences to classify taxonomically. [required]" name="iquery" optional="False" type="data" /> | |
| 71 <param format="qza,no_unzip.zip" label="--i-reference-reads: ARTIFACT FeatureData[Sequence] reference sequences. [required]" name="ireferencereads" optional="False" type="data" /> | |
| 72 <param format="qza,no_unzip.zip" label="--i-reference-taxonomy: ARTIFACT FeatureData[Taxonomy] reference taxonomy labels. [required]" name="ireferencetaxonomy" optional="False" type="data" /> | |
| 73 <param format="qza,no_unzip.zip" label="--i-classifier: ARTIFACT TaxonomicClassifier Pre-trained sklearn taxonomic classifier for classifying the reads. [required]" name="iclassifier" optional="False" type="data" /> | |
| 74 <param label="--p-maxaccepts: " name="pmaxaccepts" optional="True" type="select"> | |
| 75 <option selected="True" value="None">Selection is Optional</option> | |
| 76 <option value="Int % Range(1">Int % Range(1</option> | |
| 77 <option value="None">None</option> | |
| 78 </param> | |
| 79 <param exclude_max="False" label="--p-perc-identity: PROPORTION Range(0.0, 1.0, inclusive_end=True) Percent sequence similarity to use for PREFILTER. Reject match if percent identity to query is lower. Set to a lower value to perform a rough pre-filter. This parameter is ignored if `prefilter` is disabled. [default: 0.5]" max="1.0" min="0.0" name="ppercidentity" optional="True" type="float" value="0.5" /> | |
| 80 <param exclude_max="False" label="--p-query-cov: PROPORTION Range(0.0, 1.0, inclusive_end=True) Query coverage threshold to use for PREFILTER. Reject match if query alignment coverage per high-scoring pair is lower. Set to a lower value to perform a rough pre-filter. This parameter is ignored if `prefilter` is disabled. [default: 0.8]" max="1.0" min="0.0" name="pquerycov" optional="True" type="float" value="0.8" /> | |
| 81 <param label="--p-strand: " name="pstrand" optional="True" type="select"> | |
| 82 <option selected="True" value="None">Selection is Optional</option> | |
| 83 <option value="both">both</option> | |
| 84 <option value="plus">plus</option> | |
| 85 </param> | |
| 86 <param exclude_max="False" exclude_min="True" label="--p-min-consensus: NUMBER Range(0.5, 1.0, inclusive_start=False, inclusive_end=True) Minimum fraction of assignments must match top hit to be accepted as consensus assignment. [default: 0.51]" max="1.0" min="0.5" name="pminconsensus" optional="True" type="float" value="0.51" /> | |
| 87 <param label="--p-maxhits: " name="pmaxhits" optional="True" type="select"> | |
| 88 <option selected="True" value="None">Selection is Optional</option> | |
| 89 <option value="Int % Range(1">Int % Range(1</option> | |
| 90 <option value="None">None</option> | |
| 91 </param> | |
| 92 <param label="--p-maxrejects: " name="pmaxrejects" optional="True" type="select"> | |
| 93 <option selected="True" value="None">Selection is Optional</option> | |
| 94 <option value="Int % Range(1">Int % Range(1</option> | |
| 95 <option value="None">None</option> | |
| 96 </param> | |
| 97 <param label="--p-confidence: " name="pconfidence" optional="True" type="select"> | |
| 98 <option selected="True" value="None">Selection is Optional</option> | |
| 99 <option value="Float % Range(0">Float % Range(0</option> | |
| 100 <option value="1">1</option> | |
| 101 <option value="inclusive_end=True">inclusive_end=True</option> | |
| 102 </param> | |
| 103 <param label="--p-read-orientation: " name="preadorientation" optional="True" type="select"> | |
| 104 <option selected="True" value="None">Selection is Optional</option> | |
| 105 <option value="same">same</option> | |
| 106 <option value="reverse-complement">reverse-complement</option> | |
| 107 <option value="auto">auto</option> | |
| 108 </param> | |
| 109 <param label="--p-no-prefilter: Do not toggle positive filter of query sequences on or off. [default: True]" name="pnoprefilter" selected="False" type="boolean" /> | |
| 110 <param label="--p-sample-size: INTEGER Range(1, None) Randomly extract the given number of sequences from the reference database to use for prefiltering. This parameter is ignored if `prefilter` is disabled. [default: 1000]" min="1" name="psamplesize" optional="True" type="integer" value="1000" /> | |
| 111 <param label="--p-randseed: INTEGER Use integer as a seed for the pseudo-random generator Range(0, None) used during prefiltering. A given seed always produces the same output, which is useful for replicability. Set to 0 to use a pseudo-random seed. This parameter is ignored if `prefilter` is disabled. [default: 0]" min="0" name="prandseed" optional="True" type="integer" value="0" /> | |
| 112 <param label="--examples: Show usage examples and exit." name="examples" optional="False" type="data" /> | |
| 113 | |
| 114 </inputs> | |
| 115 | |
| 116 <outputs> | |
| 117 <data format="qza" label="${tool.name} on ${on_string}: classification.qza" name="oclassification" /> | |
| 118 | |
| 119 </outputs> | |
| 120 | |
| 121 <help><![CDATA[ | |
| 122 ALPHA Hybrid classifier: VSEARCH exact match + sklearn classifier | |
| 123 ############################################################### | |
| 124 | |
| 125 NOTE: THIS PIPELINE IS AN ALPHA RELEASE. Please report bugs to | |
| 126 https://forum.qiime2.org! Assign taxonomy to query sequences using hybrid | |
| 127 classifier. First performs rough positive filter to remove artifact and | |
| 128 low-coverage sequences (use "prefilter" parameter to toggle this step on or | |
| 129 off). Second, performs VSEARCH exact match between query and | |
| 130 reference_reads to find exact matches, followed by least common ancestor | |
| 131 consensus taxonomy assignment from among maxaccepts top hits, min_consensus | |
| 132 of which share that taxonomic assignment. Query sequences without an exact | |
| 133 match are then classified with a pre-trained sklearn taxonomy classifier to | |
| 134 predict the most likely taxonomic lineage. | |
| 135 | |
| 136 Parameters | |
| 137 ---------- | |
| 138 query : FeatureData[Sequence] | |
| 139 Sequences to classify taxonomically. | |
| 140 reference_reads : FeatureData[Sequence] | |
| 141 reference sequences. | |
| 142 reference_taxonomy : FeatureData[Taxonomy] | |
| 143 reference taxonomy labels. | |
| 144 classifier : TaxonomicClassifier | |
| 145 Pre-trained sklearn taxonomic classifier for classifying the reads. | |
| 146 maxaccepts : Int % Range(1, None) | Str % Choices('all'), optional | |
| 147 Maximum number of hits to keep for each query. Set to "all" to keep all | |
| 148 hits > perc_identity similarity. Note that if strand=both, maxaccepts | |
| 149 will keep N hits for each direction (if searches in the opposite | |
| 150 direction yield results that exceed the minimum perc_identity). In | |
| 151 those cases use maxhits to control the total number of hits returned. | |
| 152 This option works in pair with maxrejects. The search process sorts | |
| 153 target sequences by decreasing number of k-mers they have in common | |
| 154 with the query sequence, using that information as a proxy for sequence | |
| 155 similarity. After pairwise alignments, if the first target sequence | |
| 156 passes the acceptation criteria, it is accepted as best hit and the | |
| 157 search process stops for that query. If maxaccepts is set to a higher | |
| 158 value, more hits are accepted. If maxaccepts and maxrejects are both | |
| 159 set to "all", the complete database is searched. | |
| 160 perc_identity : Float % Range(0.0, 1.0, inclusive_end=True), optional | |
| 161 Percent sequence similarity to use for PREFILTER. Reject match if | |
| 162 percent identity to query is lower. Set to a lower value to perform a | |
| 163 rough pre-filter. This parameter is ignored if `prefilter` is disabled. | |
| 164 query_cov : Float % Range(0.0, 1.0, inclusive_end=True), optional | |
| 165 Query coverage threshold to use for PREFILTER. Reject match if query | |
| 166 alignment coverage per high-scoring pair is lower. Set to a lower value | |
| 167 to perform a rough pre-filter. This parameter is ignored if `prefilter` | |
| 168 is disabled. | |
| 169 strand : Str % Choices('both', 'plus'), optional | |
| 170 Align against reference sequences in forward ("plus") or both | |
| 171 directions ("both"). | |
| 172 min_consensus : Float % Range(0.5, 1.0, inclusive_start=False, inclusive_end=True), optional | |
| 173 Minimum fraction of assignments must match top hit to be accepted as | |
| 174 consensus assignment. | |
| 175 maxhits : Int % Range(1, None) | Str % Choices('all'), optional | |
| 176 maxrejects : Int % Range(1, None) | Str % Choices('all'), optional | |
| 177 reads_per_batch : Int % Range(0, None), optional | |
| 178 Number of reads to process in each batch for sklearn classification. If | |
| 179 "auto", this parameter is autoscaled to min(number of query sequences / | |
| 180 threads, 20000). | |
| 181 confidence : Float % Range(0, 1, inclusive_end=True) | Str % Choices('disable'), optional | |
| 182 Confidence threshold for limiting taxonomic depth. Set to "disable" to | |
| 183 disable confidence calculation, or 0 to calculate confidence but not | |
| 184 apply it to limit the taxonomic depth of the assignments. | |
| 185 read_orientation : Str % Choices('same', 'reverse-complement', 'auto'), optional | |
| 186 Direction of reads with respect to reference sequences in pre-trained | |
| 187 sklearn classifier. same will cause reads to be classified unchanged; | |
| 188 reverse-complement will cause reads to be reversed and complemented | |
| 189 prior to classification. "auto" will autodetect orientation based on | |
| 190 the confidence estimates for the first 100 reads. | |
| 191 threads : Int % Range(1, None), optional | |
| 192 Number of threads to use for job parallelization. | |
| 193 prefilter : Bool, optional | |
| 194 Toggle positive filter of query sequences on or off. | |
| 195 sample_size : Int % Range(1, None), optional | |
| 196 Randomly extract the given number of sequences from the reference | |
| 197 database to use for prefiltering. This parameter is ignored if | |
| 198 `prefilter` is disabled. | |
| 199 randseed : Int % Range(0, None), optional | |
| 200 Use integer as a seed for the pseudo-random generator used during | |
| 201 prefiltering. A given seed always produces the same output, which is | |
| 202 useful for replicability. Set to 0 to use a pseudo-random seed. This | |
| 203 parameter is ignored if `prefilter` is disabled. | |
| 204 | |
| 205 Returns | |
| 206 ------- | |
| 207 classification : FeatureData[Taxonomy] | |
| 208 The resulting taxonomy classifications. | |
| 209 ]]></help> | |
| 210 <macros> | |
| 211 <import>qiime_citation.xml</import> | |
| 212 </macros> | |
| 213 <expand macro="qiime_citation"/> | |
| 214 </tool> |
