Mercurial > repos > frogs > frogs_2_0_0
comparison filters.xml @ 0:76c750c5f0d1 draft default tip
planemo upload for repository https://github.com/oinizan/FROGS-wrappers commit 0b900a51e220ce6f17c1e76292c06a5f4d934055-dirty
author | frogs |
---|---|
date | Thu, 25 Oct 2018 05:01:13 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:76c750c5f0d1 |
---|---|
1 <?xml version="1.0"?> | |
2 <!-- | |
3 # Copyright (C) 2015 INRA | |
4 # | |
5 # This program is free software: you can redistribute it and/or modify | |
6 # it under the terms of the GNU General Public License as published by | |
7 # the Free Software Foundation, either version 3 of the License, or | |
8 # (at your option) any later version. | |
9 # | |
10 # This program is distributed in the hope that it will be useful, | |
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 # GNU General Public License for more details. | |
14 # | |
15 # You should have received a copy of the GNU General Public License | |
16 # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
17 --> | |
18 <tool id="FROGS_filters" name="FROGS Filters" version="1.3.0"> | |
19 <description>Filters OTUs on several criteria.</description> | |
20 <requirements> | |
21 <requirement type="package" version="2.0.1">frogs</requirement> | |
22 </requirements> | |
23 <stdio> | |
24 <exit_code range="1:" /> | |
25 <exit_code range=":-1" /> | |
26 </stdio> | |
27 | |
28 <command> | |
29 filters.py | |
30 --nb-cpus $nb_cpu | |
31 --input-biom $input_biom | |
32 --input-fasta $input_fasta | |
33 --output-fasta $output_fasta | |
34 --output-biom $output_biom | |
35 --excluded $output_excluded | |
36 --summary $output_summary | |
37 #if $contaminations_filter.contaminations_filter_isApplied == "yes" | |
38 --contaminant $contaminations_filter.contaminants_db | |
39 #end if | |
40 | |
41 #if $abundance_filters.abundance_filters_areApplied == "yes" | |
42 #if $abundance_filters.min_sample_presence | |
43 --min-sample-presence $abundance_filters.min_sample_presence | |
44 #end if | |
45 #if $abundance_filters.min_abundance | |
46 --min-abundance $abundance_filters.min_abundance | |
47 #end if | |
48 #if $abundance_filters.nb_biggest_otu | |
49 --nb-biggest-otu $abundance_filters.nb_biggest_otu | |
50 #end if | |
51 #end if | |
52 | |
53 #if $RDP_filters.RDP_filters_areApplied == "yes" | |
54 --min-rdp-bootstrap $RDP_filters.rdp_rank:$RDP_filters.rdp_bootstrap | |
55 #end if | |
56 | |
57 #if $blast_filters.blast_filters_areApplied == "yes" | |
58 #if $blast_filters.min_blast_length | |
59 --min-blast-length $blast_filters.min_blast_length | |
60 #end if | |
61 #if $blast_filters.max_blast_evalue | |
62 --max-blast-evalue $blast_filters.max_blast_evalue | |
63 #end if | |
64 #if $blast_filters.min_blast_identity | |
65 --min-blast-identity $blast_filters.min_blast_identity | |
66 #end if | |
67 #if $blast_filters.min_blast_coverage | |
68 --min-blast-coverage $blast_filters.min_blast_coverage | |
69 #end if | |
70 #end if | |
71 </command> | |
72 <inputs> | |
73 <!-- Files --> | |
74 <param format="fasta" name="input_fasta" type="data" label="Sequences file" help="The sequence file to filter (format: fasta)." /> | |
75 <param format="biom1" name="input_biom" type="data" label="Abundance file" help="The abundance file to filter (format: BIOM)." /> | |
76 | |
77 <!-- Parameters --> | |
78 <param name="nb_cpu" type="hidden" label="CPU number" help="The maximum number of CPUs used." value="1"></param> | |
79 | |
80 <conditional name="abundance_filters"> | |
81 <param name="abundance_filters_areApplied" type="select" label=" *** THE FILTERS ON OTUS IN SAMPLES, OTUS SIZE and SEQUENCE PERCENTAGE" help="If you want to filter OTUs on their abundance and occurrence." > | |
82 <option value="no">No filters</option> | |
83 <option value="yes">Apply filters</option> | |
84 </param> | |
85 <when value="no"></when> | |
86 <when value="yes"> | |
87 <param name="min_sample_presence" type="integer" optional="true" label="Minimum number of samples" size="5" help="Fill the field only if you want this treatment. Keep OTU present in at least this number of samples." /> | |
88 <param name="min_abundance" type="text" optional="true" label="Minimum proportion/number of sequences to keep OTU" size="5" help="Fill the field only if you want this treatment. Use decimal notation for proportion (example: 0.01 for keep OTU with at least 1% of all sequences) ; Use integer notation for number of sequence (example: 2 for keep OTU with at least 2 sequences, so remove single singleton)." /> | |
89 <param name="nb_biggest_otu" type="integer" optional="true" label="N biggest OTU" size="5" help="Fill the fields only if you want this treatment. Keep the N biggest OTU." /> | |
90 </when> | |
91 </conditional> | |
92 | |
93 <conditional name="RDP_filters"> | |
94 <param name="RDP_filters_areApplied" type="select" label=" *** THE FILTERS ON RDP" help="If you want to filter OTUs on their taxonomic affiliation produced by RDP."> | |
95 <option value="no">No filters</option> | |
96 <option value="yes">Apply filters</option> | |
97 </param> | |
98 <when value="no"></when> | |
99 <when value="yes"> | |
100 <param name="rdp_rank" type="select" label="Rank with the bootstrap filter" optional="true"> | |
101 <option value="Domain">Domain</option> | |
102 <option value="Phylum">Phylum</option> | |
103 <option value="Class">Class</option> | |
104 <option value="Order">Order</option> | |
105 <option value="Family">Family</option> | |
106 <option value="Genus">Genus</option> | |
107 <option value="Species">Species</option> | |
108 </param> | |
109 <param name="rdp_bootstrap" type="float" min="0.0" max="1.0" label="Minimum bootstrap % (between 0 and 1)" size="5" optional="true" /> | |
110 </when> | |
111 </conditional> | |
112 | |
113 <conditional name="blast_filters"> | |
114 <param name="blast_filters_areApplied" type="select" label=" *** THE FILTERS ON BLAST" help="If you want to filter OTUs on their taxonomic affiliation produced by Blast."> | |
115 <option value="no">No filters</option> | |
116 <option value="yes">Apply filters</option> | |
117 </param> | |
118 <when value="no"></when> | |
119 <when value="yes"> | |
120 <param name="max_blast_evalue" type="text" value="" label="Maximum e-value (between 0 and 1)" size="5" help="Fill the field only if you want this treatment" /> | |
121 <param name="min_blast_identity" type="float" min="0.0" max="1.0" optional="true" label="Minimum identity % (between 0 and 1)" size="5" help="Fill the field only if you want this treatment" /> | |
122 <param name="min_blast_coverage" type="float" min="0.0" max="1.0" optional="true" label="Minimum coverage % (between 0 and 1)" size="5" help="Fill the field only if you want this treatment" /> | |
123 <param name="min_blast_length" type="integer" optional="true" label="Minimum alignment length" size="5" help="Fill the field only if you want this treatment" /> | |
124 </when> | |
125 </conditional> | |
126 | |
127 <conditional name="contaminations_filter"> | |
128 <param name="contaminations_filter_isApplied" type="select" label=" *** THE FILTERS ON CONTAMINATIONS" help="If you want to filter OTUs on classical contaminations."> | |
129 <option value="no">No filters</option> | |
130 <option value="yes">Apply filters</option> | |
131 </param> | |
132 <when value="no"></when> | |
133 <when value="yes"> | |
134 <param name="contaminants_db" type="select" label="Cotaminant databank" help="The phiX databank (the phiX is a control added in Illumina sequencing technologies)."> | |
135 <options from_file="phiX_db.loc"> | |
136 <column name="name" index="0"/> | |
137 <column name="value" index="1"/> | |
138 </options> | |
139 </param> | |
140 </when> | |
141 </conditional> | |
142 </inputs> | |
143 <outputs> | |
144 <data format="fasta" name="output_fasta" label="${tool.name}: sequences.fasta" from_work_dir="sequences.fasta" /> | |
145 <data format="biom1" name="output_biom" label="${tool.name}: abundance.biom" from_work_dir="abundance.biom" /> | |
146 <data format="tabular" name="output_excluded" label="${tool.name}: excluded.tsv" from_work_dir="excluded.tsv" /> | |
147 <data format="html" name="output_summary" label="${tool.name}: report.html" from_work_dir="report.html" /> | |
148 </outputs> | |
149 <tests> | |
150 <test> | |
151 <param name="input_fasta" value="references/03-chimera.fasta" /> | |
152 <param name="input_biom" value="references/03-chimera.biom" /> | |
153 <conditional name="abundance_filters"> | |
154 <param name="abundance_filters_areApplied" value="yes" /> | |
155 <param name="min_abundance" value="0.00005" /> | |
156 <param name="min_sample_presence" value="3" /> | |
157 </conditional> | |
158 <output name="output_fasta" file="references/04-filters.fasta" /> | |
159 <output name="output_excluded" file="references/04-filters.excluded" /> | |
160 </test> | |
161 </tests> | |
162 <help> | |
163 | |
164 .. image:: static/images/FROGS_logo.png | |
165 :height: 144 | |
166 :width: 110 | |
167 | |
168 | |
169 | |
170 .. class:: infomark page-header h2 | |
171 | |
172 What it does | |
173 | |
174 Filter the OTUs of an abundance table according : | |
175 | |
176 -The abundance and the occurence of OTUs: presence in samples, OTU size and maximum number of OTUs. | |
177 | |
178 -The taxonomic affiliation produced by RDP: rank and bootstrap. | |
179 | |
180 -The taxonomic affiliation produced by Blast: e-value, percentage of identity, percentage of coverage and alignment length. | |
181 | |
182 -Contamination: phiX a control added in Illumina sequencing technologies. | |
183 | |
184 | |
185 | |
186 .. class:: infomark page-header h2 | |
187 | |
188 Inputs/outputs | |
189 | |
190 | |
191 .. class:: h3 | |
192 | |
193 Inputs | |
194 | |
195 **Sequence file**: | |
196 | |
197 The sequences (format `FASTA <https://en.wikipedia.org/wiki/FASTA_format>`_). | |
198 | |
199 **Abundance file**: | |
200 | |
201 The abundance of each OTU in each sample (format `BIOM <http://biom-format.org/>`_). | |
202 | |
203 | |
204 .. class:: h3 | |
205 | |
206 Outputs | |
207 | |
208 **Sequence file** (sequences.fasta): | |
209 | |
210 The sequences after filtering (format `FASTA <https://en.wikipedia.org/wiki/FASTA_format>`_). | |
211 | |
212 **Abundance file** (abundance.biom): | |
213 | |
214 The abundance after filtering (format `BIOM <http://biom-format.org/>`_). | |
215 | |
216 **Excluded file** (excluded.txt): | |
217 | |
218 The list of the OTUs deleted by filters (format `TSV <https://en.wikipedia.org/wiki/Tab-separated_values>`_). | |
219 | |
220 **Summary file** (report.html): | |
221 | |
222 The filters and the number of removed sequences (format `HTML <https://en.wikipedia.org/wiki/HTML>`_). | |
223 | |
224 | |
225 | |
226 .. class:: infomark page-header h2 | |
227 | |
228 How it works | |
229 | |
230 | |
231 | |
232 | |
233 The OTUs kept are the ones that satisfy into the BIOM input file the thresholds specified by the user. | |
234 | |
235 The BIOM abundance table and the fasta file are written again according to the OTUs kept. | |
236 | |
237 The OTUs discarded are listed in the excluded file. | |
238 | |
239 .. csv-table:: | |
240 :header: "Steps", "description" | |
241 :widths: 5, 150 | |
242 :class: table table-striped | |
243 | |
244 "1", "Except the filter to select the n most abundant OTUs, all the selected filters are run independently. For each filters an list of the OTUs to remove is generated." | |
245 "2", "All the OTUs tagged to remove by at least one filter are removed." | |
246 "3", "If the filter to select the N most abundant OTUs is filled it is applied." | |
247 | |
248 | |
249 | |
250 .. class:: infomark page-header h2 | |
251 | |
252 Advices | |
253 | |
254 Please check that the input fasta file and the input BIOM file correspond to the same OTUs. | |
255 | |
256 Examples for the filters on abundance and occurence of the OTUs : | |
257 | |
258 -To keep the filters that are present in 5 samples, fill the corresponding field with "5". | |
259 | |
260 -To display the 20 biggest OTU, fill the corresponding field with "20". | |
261 | |
262 -To filter on abundance, we advise you to specify 0.005%. It seems to be the optimal threshold (`Bokulich *et al*, 2013 <http://www.nature.com/nmeth/journal/v10/n1/abs/nmeth.2276.html>`_ ). | |
263 | |
264 If you use "FROGS Filters" before "FROGS Affiliation", the filters on RDP and Blast are useless. | |
265 | |
266 ---- | |
267 | |
268 **Contact** | |
269 | |
270 Contacts: frogs@inra.fr | |
271 | |
272 Repository: https://github.com/geraldinepascal/FROGS | |
273 | |
274 Please cite the FROGS Publication: *Escudie F., Auer L., Bernard M., Cauquil L., Vidal K., Maman S., Mariadassou M., Hernadez-Raquet G., Pascal G., 2015. FROGS: Find Rapidly OTU with Galaxy Solution. In: The environmental genomic Conference, Montpellier, France,* http://bioinfo.genotoul.fr/fileadmin/user_upload/FROGS_2015_GE_Montpellier_poster.pdf | |
275 | |
276 Depending on the help provided you can cite us in acknowledgements, references or both. | |
277 </help> | |
278 <citations> | |
279 <citation type="doi">10.1093/bioinformatics/btx791</citation> | |
280 <citation type="doi">10.1128/AEM.01043-13</citation> | |
281 <citation type="doi">10.14806/ej.17.1.200</citation> | |
282 <citation type="doi">10.1093/bioinformatics/btr507</citation> | |
283 </citations> | |
284 | |
285 </tool> |