Mercurial > repos > iuc > dada2_assigntaxonomyaddspecies
comparison dada2_assignTaxonomyAddspecies.xml @ 0:18517edb4733 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
author | iuc |
---|---|
date | Fri, 08 Nov 2019 18:50:24 -0500 |
parents | |
children | 1c9715cef808 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:18517edb4733 |
---|---|
1 <tool id="dada2_assignTaxonomyAddspecies" name="dada2: assignTaxonomy and addSpecies" version="@DADA2_VERSION@+galaxy@WRAPPER_VERSION@" profile="19.09"> | |
2 <description>Learn Error rates</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements"/> | |
7 <expand macro="stdio"/> | |
8 <expand macro="version_command"/> | |
9 <command detect_errors="exit_code"><![CDATA[ | |
10 Rscript '$dada2_script' \${GALAXY_SLOTS:-1} | |
11 ]]></command> | |
12 <configfiles> | |
13 <configfile name="dada2_script"><![CDATA[ | |
14 @READ_FOO@ | |
15 | |
16 library(dada2, quietly=T) | |
17 | |
18 args <- commandArgs(trailingOnly = TRUE) | |
19 nthreads <- as.integer(args[1]) | |
20 | |
21 seqs <- $read_data($seqs) | |
22 | |
23 #if $reference_cond.reference_select == "history" | |
24 ref <- '$reference_cond.refFasta' | |
25 tl <- '$reference_cond.taxLevels' | |
26 #else | |
27 ref <- '$reference_cond.refFasta.fields.path' | |
28 tl <- '$reference_cond.refFasta.fields.taxlevels' | |
29 #end if | |
30 tl <- strsplit(tl, ",")[[1]] | |
31 | |
32 taxa <- assignTaxonomy(seqs, ref, minBoot = $minBoot, tryRC = $tryRC, | |
33 outputBootstraps = $outputBootstraps, | |
34 taxLevels = tl, multithread = nthreads, verbose=T) | |
35 | |
36 #if $outputBootstraps | |
37 boot <- taxa\$boot | |
38 taxa <- taxa\$tax | |
39 #end if | |
40 | |
41 #if $addSpecies_cond.addSpecies_select == "TRUE" | |
42 #if $addSpecies_cond.allowMultiple_cond.allowMultiple == "num" | |
43 aM <- $addSpecies_cond.allowMultiple_cond.num | |
44 #else | |
45 aM <- $addSpecies_cond.allowMultiple_cond.allowMultiple | |
46 #end if | |
47 #if $addSpecies_cond.speciesreference_cond.speciesreference_select == "history" | |
48 ref <- '$addSpecies_cond.speciesreference_cond.speciesrefFasta' | |
49 #else | |
50 ref <- '$addSpecies_cond.speciesreference_cond.speciesrefFasta.fields.path' | |
51 #end if | |
52 taxa <- addSpecies(taxa, ref, allowMultiple = aM, tryRC = $addSpecies_cond.tryRC) | |
53 #end if | |
54 write.table(taxa, file = '$output', quote = F, sep = "\t", row.names = T, col.names = NA) | |
55 | |
56 #if $outputBootstraps | |
57 write.table(boot, file = '$bootstraps', quote = F, sep = "\t", row.names = T, col.names = NA) | |
58 #end if | |
59 ]]></configfile> | |
60 </configfiles> | |
61 <inputs> | |
62 <param name="seqs" type="data" format="@DADA_UNIQUES@,dada2_sequencetable,dada2_uniques" label="sequences to be assigned" help=""/> | |
63 <conditional name="reference_cond"> | |
64 <param name="reference_select" type="select" label="Select a reference dataset your history or use a built-in?"> | |
65 <option value="builtin">Use a built-in reference</option> | |
66 <option value="history">Use reference data from the history</option> | |
67 </param> | |
68 <when value="builtin"> | |
69 <param name="refFasta" type="select" label="Select reference data set" help="If a reference data set of interest is not listed, contact the Galaxy administrators"> | |
70 <options from_data_table="dada2_taxonomy"> | |
71 <filter type="sort_by" column="2"/> | |
72 <validator type="no_options" message="No indexes are available for the selected input dataset"/> | |
73 </options> | |
74 </param> | |
75 </when> | |
76 <when value="history"> | |
77 <param name="refFasta" type="data" format="fasta,fasta.gz" label="Reference data set" /> | |
78 <param argument="taxLevels" type="text" label="Names of the taxonomic levels in the data set" help="comma separated list" /> | |
79 </when> | |
80 </conditional> | |
81 <param argument="minBoot" type="integer" value="50" min="0" label="Minimum bootstrap confidence" help="for assigning a | |
82 taxonomic level"/> | |
83 <param argument="tryRC" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Try reverse complement" help="the reverse-complement of each sequence will be used for classification if it is a better match to the reference sequences than the forward sequence"/> | |
84 <param argument="outputBootstraps" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Output bootstrap values"/> | |
85 | |
86 <conditional name="addSpecies_cond"> | |
87 <param name="addSpecies_select" type="select" label="Add genus-species binomials to the taxonomic table"> | |
88 <option value="FALSE">No</option> | |
89 <option value="TRUE">Yes</option> | |
90 </param> | |
91 <when value="FALSE"/> | |
92 <when value="TRUE"> | |
93 <conditional name="speciesreference_cond"> | |
94 <param name="speciesreference_select" type="select" label="Select a reference dataset your history or use a built-in?"> | |
95 <option value="builtin">Use a built-in reference</option> | |
96 <option value="history">Use reference data from the history</option> | |
97 </param> | |
98 <when value="builtin"> | |
99 <param name="speciesrefFasta" argument="refFasta" type="select" label="Select reference data set" help="If a reference data set of interest is not listed, contact the Galaxy administrators"> | |
100 <options from_data_table="dada2_species"> | |
101 <filter type="sort_by" column="2"/> | |
102 <validator type="no_options" message="No indexes are available for the selected input dataset"/> | |
103 </options> | |
104 </param> | |
105 </when> | |
106 <when value="history"> | |
107 <param name="speciesrefFasta" argument="refFasta" type="data" format="fasta,fasta.gz" label="Reference data set" /> | |
108 </when> | |
109 </conditional> | |
110 <conditional name="allowMultiple_cond"> | |
111 <param argument="allowMultiple" type="select" label="reporting options"> | |
112 <option value="FALSE">only unambiguous identifications</option> | |
113 <option value="TRUE">all exactly matched species</option> | |
114 <option value="num">specify the maximal number of reported exactly matched species</option> | |
115 </param> | |
116 <when value="FALSE"/> | |
117 <when value="TRUE"/> | |
118 <when value="num"> | |
119 <param name="num" type="integer" value="" min="1" label="Number of matched species"/> | |
120 </when> | |
121 </conditional> | |
122 <param argument="tryRC" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Try reverse complement" help="the reverse-complement of each sequence will be used for classification if it is a better match to the reference sequences than the forward sequence"/> | |
123 </when> | |
124 </conditional> | |
125 </inputs> | |
126 <outputs> | |
127 <data name="output" format="tabular" label="${tool.name} on ${on_string}"/> | |
128 <data name="bootstraps" format="tabular" label="${tool.name} on ${on_string}: bootstraps"> | |
129 <filter>outputBootstraps</filter> | |
130 </data> | |
131 </outputs> | |
132 <tests> | |
133 <!-- test w default params --> | |
134 <test expect_num_outputs="1"> | |
135 <param name="seqs" ftype="dada2_sequencetable" value="removeBimeraDenovo_F3D0.tab"/> | |
136 <param name="reference_cond|reference_select" value="history"/> | |
137 <param name="reference_cond|refFasta" ftype="fasta" value="reference.fa"/> | |
138 <param name="reference_cond|taxLevels" value="Level1,Level2,Level3,Level4,Level5" /> | |
139 <param name="addSpecies_cond|addSpecies_select" value="TRUE"/> | |
140 <param name="addSpecies_cond|speciesreference_cond|speciesreference_select" value="history"/> | |
141 <param name="addSpecies_cond|speciesreference_cond|speciesrefFasta" ftype="fasta" value="reference_species.fa" /> | |
142 <output name="output" value="assignTaxonomyAddspecies_F3D0.tab" ftype="tabular" /> | |
143 </test> | |
144 <!-- test w default params, bulit in reference --> | |
145 <test expect_num_outputs="1"> | |
146 <param name="seqs" ftype="dada2_sequencetable" value="removeBimeraDenovo_F3D0.tab"/> | |
147 <param name="reference_cond|reference_select" value="builtin"/> | |
148 <param name="reference_cond|refFasta" value="test_buildid"/> | |
149 <param name="addSpecies_cond|addSpecies_select" value="TRUE"/> | |
150 <param name="addSpecies_cond|speciesreference_cond|speciesreference_select" value="builtin"/> | |
151 <param name="addSpecies_cond|speciesreference_cond|speciesrefFasta" value="test_buildid" /> | |
152 <output name="output" value="assignTaxonomyAddspecies_F3D0.tab" ftype="tabular" /> | |
153 </test> | |
154 <!-- test w output bootstraps, minRC, note: sim_size for bootstraps output due to the probabilistics --> | |
155 <test expect_num_outputs="2"> | |
156 <param name="seqs" ftype="dada2_sequencetable" value="removeBimeraDenovo_F3D0.tab"/> | |
157 <param name="reference_cond|reference_select" value="history"/> | |
158 <param name="reference_cond|refFasta" ftype="fasta" value="reference.fa"/> | |
159 <param name="reference_cond|taxLevels" value="Level1,Level2,Level3,Level4,Level5" /> | |
160 <param name="minBoot" value="42" /> | |
161 <param name="tryRC" value="TRUE" /> | |
162 <param name="outputBootstraps" value="TRUE" /> | |
163 <param name="addSpecies_cond|addSpecies_select" value="TRUE"/> | |
164 <param name="addSpecies_cond|speciesreference_cond|speciesreference_select" value="history"/> | |
165 <param name="addSpecies_cond|speciesreference_cond|speciesrefFasta" ftype="fasta" value="reference_species.fa" /> | |
166 <param name="addSpecies_cond|allowMultiple" value="TRUE"/> | |
167 <param name="addSpecies_cond|tryRC" value="TRUE" /> | |
168 <output name="output" value="assignTaxonomyAddspecies_F3D0.tab" ftype="tabular" compare="sim_size" /> | |
169 <output name="bootstraps" value="assignTaxonomyAddspecies_F3D0_boot.tab" ftype="tabular" compare="sim_size" /> | |
170 </test> | |
171 </tests> | |
172 <help><![CDATA[ | |
173 Description | |
174 ........... | |
175 | |
176 This tool implements dada2's assignTaxonomy and assignSpecies functions. | |
177 | |
178 - assignTaxonomy assigns taxonomy to the sequence variants. The DADA2 package provides a native implementation of the naive Bayesian classifier method for this purpose (see Wang et al. 2007, kmer size 8 and 100 bootstrap replicates). The assignTaxonomy function takes as input a set of sequences to be classified and a training set of reference sequences with known taxonomy, and outputs taxonomic assignments with at least minBoot bootstrap confidence. Properly formatted reference files for several popular taxonomic databases are available http://benjjneb.github.io/dada2/training.html | |
179 - assignSpecies makes species level assignments based on exact matching between ASVs and sequenced reference strains. Recent analysis suggests that exact matching (or 100% identity) is the only appropriate way to assign species to 16S gene fragments. Currently, species-assignment training fastas are available for the Silva and RDP 16S databases. | |
180 | |
181 Usage | |
182 ..... | |
183 | |
184 **Input** | |
185 | |
186 - A list of sequences contained in the results of removeBimeraDenovo or sequenceTable (note that also the results of dada, and mergePairs are accepted). | |
187 - Reference data bases for taxonomic and species/genus level assignment. Several cached data bases can be chosen (ask your Galaxy admin if they are missing). For using custom data bases see below. | |
188 | |
189 **Output** | |
190 | |
191 - A table containing the assigned taxonomies exceeding the minBoot level of bootstrapping confidence. Rows correspond to the provided sequences, columns to the taxonomic levels. NA indicates that the sequence was not consistently classified at that level at the minBoot threshold. | |
192 - Optionally two columns for the genus and species taxonomic levels can be added. NA indicates that the sequence was not classified at that level. | |
193 - If outputBootstraps checked, a table containing the assigned taxonomies (named "taxa") and the bootstrap values (named "boot") will be returned. | |
194 | |
195 @HELP_OVERVIEW@ | |
196 | |
197 Custom Reference data sets | |
198 .......................... | |
199 | |
200 For ** taxonomy assignment ** the following is needed: | |
201 | |
202 - a reference fasta data base | |
203 - a comma separated list of taxonomic ranks present in the reference data base | |
204 | |
205 The reference fasta data base for taxonomic assignment (fasta or compressed fasta) needs to encode the taxonomy corresponding to each sequence in the fasta header lines in the following fashion (note, the second sequence is not assigned down to level 6): | |
206 | |
207 :: | |
208 | |
209 >Level1;Level2;Level3;Level4;Level5;Level6; | |
210 ACCTAGAAAGTCGTAGATCGAAGTTGAAGCATCGCCCGATGATCGTCTGAAGCTGTAGCATGAGTCGATTTTCACATTCAGGGATACCATAGGATAC | |
211 >Level1;Level2;Level3;Level4;Level5; | |
212 CGCTAGAAAGTCGTAGAAGGCTCGGAGGTTTGAAGCATCGCCCGATGGGATCTCGTTGCTGTAGCATGAGTACGGACATTCAGGGATCATAGGATAC | |
213 | |
214 The list of required taxonomic ranks could be for instance: "Kingdom,Phylum,Class,Order,Family,Genus" | |
215 | |
216 The reference data base for ** species assignment ** is a fasta file (or compressed fasta file), with the id line formatted as follows: | |
217 | |
218 :: | |
219 | |
220 >ID Genus species | |
221 ACCTAGAAAGTCGTAGATCGAAGTTGAAGCATCGCCCGATGATCGTCTGAAGCTGTAGCATGAGTCGATTTTCACATTCAGGGATACCATAGGATAC | |
222 >ID Genus species | |
223 CGCTAGAAAGTCGTAGAAGGCTCGGAGGTTTGAAGCATCGCCCGATGGGATCTCGTTGCTGTAGCATGAGTACGGACATTCAGGGATCATAGGATAC | |
224 | |
225 | |
226 ]]></help> | |
227 <expand macro="citations"> | |
228 <citation type="doi">10.1128/AEM.00062-07</citation> | |
229 </expand> | |
230 </tool> | |
231 | |
232 |