comparison filters.xml @ 0:76c750c5f0d1 draft default tip

planemo upload for repository https://github.com/oinizan/FROGS-wrappers commit 0b900a51e220ce6f17c1e76292c06a5f4d934055-dirty
author frogs
date Thu, 25 Oct 2018 05:01:13 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:76c750c5f0d1
1 <?xml version="1.0"?>
2 <!--
3 # Copyright (C) 2015 INRA
4 #
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
17 -->
18 <tool id="FROGS_filters" name="FROGS Filters" version="1.3.0">
19 <description>Filters OTUs on several criteria.</description>
20 <requirements>
21 <requirement type="package" version="2.0.1">frogs</requirement>
22 </requirements>
23 <stdio>
24 <exit_code range="1:" />
25 <exit_code range=":-1" />
26 </stdio>
27
28 <command>
29 filters.py
30 --nb-cpus $nb_cpu
31 --input-biom $input_biom
32 --input-fasta $input_fasta
33 --output-fasta $output_fasta
34 --output-biom $output_biom
35 --excluded $output_excluded
36 --summary $output_summary
37 #if $contaminations_filter.contaminations_filter_isApplied == "yes"
38 --contaminant $contaminations_filter.contaminants_db
39 #end if
40
41 #if $abundance_filters.abundance_filters_areApplied == "yes"
42 #if $abundance_filters.min_sample_presence
43 --min-sample-presence $abundance_filters.min_sample_presence
44 #end if
45 #if $abundance_filters.min_abundance
46 --min-abundance $abundance_filters.min_abundance
47 #end if
48 #if $abundance_filters.nb_biggest_otu
49 --nb-biggest-otu $abundance_filters.nb_biggest_otu
50 #end if
51 #end if
52
53 #if $RDP_filters.RDP_filters_areApplied == "yes"
54 --min-rdp-bootstrap $RDP_filters.rdp_rank:$RDP_filters.rdp_bootstrap
55 #end if
56
57 #if $blast_filters.blast_filters_areApplied == "yes"
58 #if $blast_filters.min_blast_length
59 --min-blast-length $blast_filters.min_blast_length
60 #end if
61 #if $blast_filters.max_blast_evalue
62 --max-blast-evalue $blast_filters.max_blast_evalue
63 #end if
64 #if $blast_filters.min_blast_identity
65 --min-blast-identity $blast_filters.min_blast_identity
66 #end if
67 #if $blast_filters.min_blast_coverage
68 --min-blast-coverage $blast_filters.min_blast_coverage
69 #end if
70 #end if
71 </command>
72 <inputs>
73 <!-- Files -->
74 <param format="fasta" name="input_fasta" type="data" label="Sequences file" help="The sequence file to filter (format: fasta)." />
75 <param format="biom1" name="input_biom" type="data" label="Abundance file" help="The abundance file to filter (format: BIOM)." />
76
77 <!-- Parameters -->
78 <param name="nb_cpu" type="hidden" label="CPU number" help="The maximum number of CPUs used." value="1"></param>
79
80 <conditional name="abundance_filters">
81 <param name="abundance_filters_areApplied" type="select" label=" *** THE FILTERS ON OTUS IN SAMPLES, OTUS SIZE and SEQUENCE PERCENTAGE" help="If you want to filter OTUs on their abundance and occurrence." >
82 <option value="no">No filters</option>
83 <option value="yes">Apply filters</option>
84 </param>
85 <when value="no"></when>
86 <when value="yes">
87 <param name="min_sample_presence" type="integer" optional="true" label="Minimum number of samples" size="5" help="Fill the field only if you want this treatment. Keep OTU present in at least this number of samples." />
88 <param name="min_abundance" type="text" optional="true" label="Minimum proportion/number of sequences to keep OTU" size="5" help="Fill the field only if you want this treatment. Use decimal notation for proportion (example: 0.01 for keep OTU with at least 1% of all sequences) ; Use integer notation for number of sequence (example: 2 for keep OTU with at least 2 sequences, so remove single singleton)." />
89 <param name="nb_biggest_otu" type="integer" optional="true" label="N biggest OTU" size="5" help="Fill the fields only if you want this treatment. Keep the N biggest OTU." />
90 </when>
91 </conditional>
92
93 <conditional name="RDP_filters">
94 <param name="RDP_filters_areApplied" type="select" label=" *** THE FILTERS ON RDP" help="If you want to filter OTUs on their taxonomic affiliation produced by RDP.">
95 <option value="no">No filters</option>
96 <option value="yes">Apply filters</option>
97 </param>
98 <when value="no"></when>
99 <when value="yes">
100 <param name="rdp_rank" type="select" label="Rank with the bootstrap filter" optional="true">
101 <option value="Domain">Domain</option>
102 <option value="Phylum">Phylum</option>
103 <option value="Class">Class</option>
104 <option value="Order">Order</option>
105 <option value="Family">Family</option>
106 <option value="Genus">Genus</option>
107 <option value="Species">Species</option>
108 </param>
109 <param name="rdp_bootstrap" type="float" min="0.0" max="1.0" label="Minimum bootstrap % (between 0 and 1)" size="5" optional="true" />
110 </when>
111 </conditional>
112
113 <conditional name="blast_filters">
114 <param name="blast_filters_areApplied" type="select" label=" *** THE FILTERS ON BLAST" help="If you want to filter OTUs on their taxonomic affiliation produced by Blast.">
115 <option value="no">No filters</option>
116 <option value="yes">Apply filters</option>
117 </param>
118 <when value="no"></when>
119 <when value="yes">
120 <param name="max_blast_evalue" type="text" value="" label="Maximum e-value (between 0 and 1)" size="5" help="Fill the field only if you want this treatment" />
121 <param name="min_blast_identity" type="float" min="0.0" max="1.0" optional="true" label="Minimum identity % (between 0 and 1)" size="5" help="Fill the field only if you want this treatment" />
122 <param name="min_blast_coverage" type="float" min="0.0" max="1.0" optional="true" label="Minimum coverage % (between 0 and 1)" size="5" help="Fill the field only if you want this treatment" />
123 <param name="min_blast_length" type="integer" optional="true" label="Minimum alignment length" size="5" help="Fill the field only if you want this treatment" />
124 </when>
125 </conditional>
126
127 <conditional name="contaminations_filter">
128 <param name="contaminations_filter_isApplied" type="select" label=" *** THE FILTERS ON CONTAMINATIONS" help="If you want to filter OTUs on classical contaminations.">
129 <option value="no">No filters</option>
130 <option value="yes">Apply filters</option>
131 </param>
132 <when value="no"></when>
133 <when value="yes">
134 <param name="contaminants_db" type="select" label="Cotaminant databank" help="The phiX databank (the phiX is a control added in Illumina sequencing technologies).">
135 <options from_file="phiX_db.loc">
136 <column name="name" index="0"/>
137 <column name="value" index="1"/>
138 </options>
139 </param>
140 </when>
141 </conditional>
142 </inputs>
143 <outputs>
144 <data format="fasta" name="output_fasta" label="${tool.name}: sequences.fasta" from_work_dir="sequences.fasta" />
145 <data format="biom1" name="output_biom" label="${tool.name}: abundance.biom" from_work_dir="abundance.biom" />
146 <data format="tabular" name="output_excluded" label="${tool.name}: excluded.tsv" from_work_dir="excluded.tsv" />
147 <data format="html" name="output_summary" label="${tool.name}: report.html" from_work_dir="report.html" />
148 </outputs>
149 <tests>
150 <test>
151 <param name="input_fasta" value="references/03-chimera.fasta" />
152 <param name="input_biom" value="references/03-chimera.biom" />
153 <conditional name="abundance_filters">
154 <param name="abundance_filters_areApplied" value="yes" />
155 <param name="min_abundance" value="0.00005" />
156 <param name="min_sample_presence" value="3" />
157 </conditional>
158 <output name="output_fasta" file="references/04-filters.fasta" />
159 <output name="output_excluded" file="references/04-filters.excluded" />
160 </test>
161 </tests>
162 <help>
163
164 .. image:: static/images/FROGS_logo.png
165 :height: 144
166 :width: 110
167
168
169
170 .. class:: infomark page-header h2
171
172 What it does
173
174 Filter the OTUs of an abundance table according :
175
176 -The abundance and the occurence of OTUs: presence in samples, OTU size and maximum number of OTUs.
177
178 -The taxonomic affiliation produced by RDP: rank and bootstrap.
179
180 -The taxonomic affiliation produced by Blast: e-value, percentage of identity, percentage of coverage and alignment length.
181
182 -Contamination: phiX a control added in Illumina sequencing technologies.
183
184
185
186 .. class:: infomark page-header h2
187
188 Inputs/outputs
189
190
191 .. class:: h3
192
193 Inputs
194
195 **Sequence file**:
196
197 The sequences (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_).
198
199 **Abundance file**:
200
201 The abundance of each OTU in each sample (format `BIOM &lt;http://biom-format.org/&gt;`_).
202
203
204 .. class:: h3
205
206 Outputs
207
208 **Sequence file** (sequences.fasta):
209
210 The sequences after filtering (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_).
211
212 **Abundance file** (abundance.biom):
213
214 The abundance after filtering (format `BIOM &lt;http://biom-format.org/&gt;`_).
215
216 **Excluded file** (excluded.txt):
217
218 The list of the OTUs deleted by filters (format `TSV &lt;https://en.wikipedia.org/wiki/Tab-separated_values&gt;`_).
219
220 **Summary file** (report.html):
221
222 The filters and the number of removed sequences (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_).
223
224
225
226 .. class:: infomark page-header h2
227
228 How it works
229
230
231
232
233 The OTUs kept are the ones that satisfy into the BIOM input file the thresholds specified by the user.
234
235 The BIOM abundance table and the fasta file are written again according to the OTUs kept.
236
237 The OTUs discarded are listed in the excluded file.
238
239 .. csv-table::
240 :header: "Steps", "description"
241 :widths: 5, 150
242 :class: table table-striped
243
244 "1", "Except the filter to select the n most abundant OTUs, all the selected filters are run independently. For each filters an list of the OTUs to remove is generated."
245 "2", "All the OTUs tagged to remove by at least one filter are removed."
246 "3", "If the filter to select the N most abundant OTUs is filled it is applied."
247
248
249
250 .. class:: infomark page-header h2
251
252 Advices
253
254 Please check that the input fasta file and the input BIOM file correspond to the same OTUs.
255
256 Examples for the filters on abundance and occurence of the OTUs :
257
258 -To keep the filters that are present in 5 samples, fill the corresponding field with "5".
259
260 -To display the 20 biggest OTU, fill the corresponding field with "20".
261
262 -To filter on abundance, we advise you to specify 0.005%. It seems to be the optimal threshold (`Bokulich *et al*, 2013 &lt;http://www.nature.com/nmeth/journal/v10/n1/abs/nmeth.2276.html&gt;`_ ).
263
264 If you use "FROGS Filters" before "FROGS Affiliation", the filters on RDP and Blast are useless.
265
266 ----
267
268 **Contact**
269
270 Contacts: frogs@inra.fr
271
272 Repository: https://github.com/geraldinepascal/FROGS
273
274 Please cite the FROGS Publication: *Escudie F., Auer L., Bernard M., Cauquil L., Vidal K., Maman S., Mariadassou M., Hernadez-Raquet G., Pascal G., 2015. FROGS: Find Rapidly OTU with Galaxy Solution. In: The environmental genomic Conference, Montpellier, France,* http://bioinfo.genotoul.fr/fileadmin/user_upload/FROGS_2015_GE_Montpellier_poster.pdf
275
276 Depending on the help provided you can cite us in acknowledgements, references or both.
277 </help>
278 <citations>
279 <citation type="doi">10.1093/bioinformatics/btx791</citation>
280 <citation type="doi">10.1128/AEM.01043-13</citation>
281 <citation type="doi">10.14806/ej.17.1.200</citation>
282 <citation type="doi">10.1093/bioinformatics/btr507</citation>
283 </citations>
284
285 </tool>