Mercurial > repos > frogs > frogs_3_1_0
comparison filters.xml @ 0:59bc96331073 draft default tip
planemo upload for repository https://github.com/geraldinepascal/FROGS-wrappers/tree/v3.1.0 commit 08296fc88e3e938c482c631bd515b3b7a0499647
author | frogs |
---|---|
date | Thu, 28 Feb 2019 10:14:49 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:59bc96331073 |
---|---|
1 <?xml version="1.0"?> | |
2 <!-- | |
3 # Copyright (C) 2015 INRA | |
4 # | |
5 # This program is free software: you can redistribute it and/or modify | |
6 # it under the terms of the GNU General Public License as published by | |
7 # the Free Software Foundation, either version 3 of the License, or | |
8 # (at your option) any later version. | |
9 # | |
10 # This program is distributed in the hope that it will be useful, | |
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 # GNU General Public License for more details. | |
14 # | |
15 # You should have received a copy of the GNU General Public License | |
16 # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
17 --> | |
18 <tool id="FROGS_filters" name="FROGS Filters" version="3.1"> | |
19 <description>Filters OTUs on several criteria.</description> | |
20 <requirements> | |
21 <requirement type="package" version="3.1.0">frogs</requirement> | |
22 </requirements> | |
23 <stdio> | |
24 <exit_code range="1:" /> | |
25 <exit_code range=":-1" /> | |
26 </stdio> | |
27 | |
28 <command> | |
29 | |
30 filters.py | |
31 --nb-cpus \${GALAXY_SLOTS:-1} | |
32 --input-biom $input_biom | |
33 --input-fasta $input_fasta | |
34 --output-fasta $output_fasta | |
35 --output-biom $output_biom | |
36 --excluded $output_excluded | |
37 --summary $output_summary | |
38 #if $contaminations_filter.contaminations_filter_isApplied == "yes" | |
39 #set $contaminants_filename = str( $contaminations_filter.contaminants_db.fields.path ) | |
40 --contaminant $contaminants_filename | |
41 #end if | |
42 | |
43 #if $abundance_filters.abundance_filters_areApplied == "yes" | |
44 #if $abundance_filters.min_sample_presence | |
45 --min-sample-presence $abundance_filters.min_sample_presence | |
46 #end if | |
47 #if $abundance_filters.min_abundance | |
48 --min-abundance $abundance_filters.min_abundance | |
49 #end if | |
50 #if $abundance_filters.nb_biggest_otu | |
51 --nb-biggest-otu $abundance_filters.nb_biggest_otu | |
52 #end if | |
53 #end if | |
54 | |
55 #if $RDP_filters.RDP_filters_areApplied == "yes" | |
56 --min-rdp-bootstrap $RDP_filters.rdp_rank:$RDP_filters.rdp_bootstrap | |
57 #end if | |
58 | |
59 #if $blast_filters.blast_filters_areApplied == "yes" | |
60 #if $blast_filters.min_blast_length | |
61 --min-blast-length $blast_filters.min_blast_length | |
62 #end if | |
63 #if $blast_filters.max_blast_evalue | |
64 --max-blast-evalue $blast_filters.max_blast_evalue | |
65 #end if | |
66 #if $blast_filters.min_blast_identity | |
67 --min-blast-identity $blast_filters.min_blast_identity | |
68 #end if | |
69 #if $blast_filters.min_blast_coverage | |
70 --min-blast-coverage $blast_filters.min_blast_coverage | |
71 #end if | |
72 #end if | |
73 </command> | |
74 <inputs> | |
75 <!-- Files --> | |
76 <param format="fasta" name="input_fasta" type="data" label="Sequences file" help="The sequence file to filter (format: fasta)." /> | |
77 <param format="biom1" name="input_biom" type="data" label="Abundance file" help="The abundance file to filter (format: BIOM)." /> | |
78 | |
79 <conditional name="abundance_filters"> | |
80 <param name="abundance_filters_areApplied" type="select" label=" *** THE FILTERS ON OTUS IN SAMPLES, OTUS SIZE and SEQUENCE PERCENTAGE" help="If you want to filter OTUs on their abundance and occurrence." > | |
81 <option value="no">No filters</option> | |
82 <option value="yes">Apply filters</option> | |
83 </param> | |
84 <when value="no"></when> | |
85 <when value="yes"> | |
86 <param name="min_sample_presence" type="integer" optional="true" label="Minimum number of samples" size="5" help="Fill the field only if you want this treatment. Keep OTU present in at least this number of samples." /> | |
87 <param name="min_abundance" type="text" optional="true" label="Minimum proportion/number of sequences to keep OTU" size="5" help="Fill the field only if you want this treatment. Use decimal notation for proportion (example: 0.01 for keep OTU with at least 1% of all sequences) ; Use integer notation for number of sequence (example: 2 for keep OTU with at least 2 sequences, so remove single singleton)." /> | |
88 <param name="nb_biggest_otu" type="integer" optional="true" label="N biggest OTU" size="5" help="Fill the fields only if you want this treatment. Keep the N biggest OTU." /> | |
89 </when> | |
90 </conditional> | |
91 | |
92 <conditional name="RDP_filters"> | |
93 <param name="RDP_filters_areApplied" type="select" label=" *** THE FILTERS ON RDP" help="If you want to filter OTUs on their taxonomic affiliation produced by RDP."> | |
94 <option value="no">No filters</option> | |
95 <option value="yes">Apply filters</option> | |
96 </param> | |
97 <when value="no"></when> | |
98 <when value="yes"> | |
99 <param name="rdp_rank" type="select" label="Rank with the bootstrap filter" optional="true"> | |
100 <option value="Domain">Domain</option> | |
101 <option value="Phylum">Phylum</option> | |
102 <option value="Class">Class</option> | |
103 <option value="Order">Order</option> | |
104 <option value="Family">Family</option> | |
105 <option value="Genus">Genus</option> | |
106 <option value="Species">Species</option> | |
107 </param> | |
108 <param name="rdp_bootstrap" type="float" min="0.0" max="1.0" label="Minimum bootstrap % (between 0 and 1)" size="5" optional="true" /> | |
109 </when> | |
110 </conditional> | |
111 | |
112 <conditional name="blast_filters"> | |
113 <param name="blast_filters_areApplied" type="select" label=" *** THE FILTERS ON BLAST" help="If you want to filter OTUs on their taxonomic affiliation produced by Blast."> | |
114 <option value="no">No filters</option> | |
115 <option value="yes">Apply filters</option> | |
116 </param> | |
117 <when value="no"></when> | |
118 <when value="yes"> | |
119 <param name="max_blast_evalue" type="text" value="" label="Maximum e-value (between 0 and 1)" size="5" help="Fill the field only if you want this treatment" /> | |
120 <param name="min_blast_identity" type="float" min="0.0" max="1.0" optional="true" label="Minimum identity % (between 0 and 1)" size="5" help="Fill the field only if you want this treatment" /> | |
121 <param name="min_blast_coverage" type="float" min="0.0" max="1.0" optional="true" label="Minimum coverage % (between 0 and 1)" size="5" help="Fill the field only if you want this treatment" /> | |
122 <param name="min_blast_length" type="integer" optional="true" label="Minimum alignment length" size="5" help="Fill the field only if you want this treatment" /> | |
123 </when> | |
124 </conditional> | |
125 | |
126 <conditional name="contaminations_filter"> | |
127 <param name="contaminations_filter_isApplied" type="select" label="Contaminant databank" help="The phiX databank (the phiX is a control added in Illumina sequencing technologies)."> | |
128 <option value="no">No filters</option> | |
129 <option value="yes">Apply filters</option> | |
130 </param> | |
131 <when value="no"></when> | |
132 <when value="yes"> | |
133 <param name="contaminants_db" type="select" label="Contaminant databank" help="The phiX databank (the phiX is a control added in Illumina sequencing technologies)."> | |
134 <options from_data_table="phiX_db"></options> | |
135 <validator type="no_options" message="A built-in database is not available" /> | |
136 </param> | |
137 </when> | |
138 </conditional> | |
139 </inputs> | |
140 <outputs> | |
141 <data format="fasta" name="output_fasta" label="${tool.name}: sequences.fasta" from_work_dir="sequences.fasta" /> | |
142 <data format="biom1" name="output_biom" label="${tool.name}: abundance.biom" from_work_dir="abundance.biom" /> | |
143 <data format="tabular" name="output_excluded" label="${tool.name}: excluded.tsv" from_work_dir="excluded.tsv" /> | |
144 <data format="html" name="output_summary" label="${tool.name}: report.html" from_work_dir="report.html" /> | |
145 </outputs> | |
146 <tests> | |
147 <test> | |
148 <param name="input_fasta" value="references/03-chimera.fasta" /> | |
149 <param name="input_biom" value="references/03-chimera.biom" /> | |
150 <conditional name="abundance_filters"> | |
151 <param name="abundance_filters_areApplied" value="yes" /> | |
152 <param name="min_abundance" value="0.00005" /> | |
153 <param name="min_sample_presence" value="3" /> | |
154 </conditional> | |
155 <conditional name="contaminations_filter"> | |
156 <param name="contaminations_filter_isApplied" value="yes" /> | |
157 <param name="contaminants_db" value="phiX_test" /> | |
158 </conditional> | |
159 <output name="output_fasta" file="references/04-filters.fasta" /> | |
160 <output name="output_excluded" file="references/04-filters.excluded" /> | |
161 </test> | |
162 </tests> | |
163 <help> | |
164 | |
165 .. image:: static/images/frogs_images/FROGS_logo.png | |
166 :height: 144 | |
167 :width: 110 | |
168 | |
169 | |
170 | |
171 .. class:: infomark page-header h2 | |
172 | |
173 What it does | |
174 | |
175 Filter the OTUs of an abundance table according : | |
176 | |
177 -The abundance and the occurence of OTUs: presence in samples, OTU size and maximum number of OTUs. | |
178 | |
179 -The taxonomic affiliation produced by RDP: rank and bootstrap. | |
180 | |
181 -The taxonomic affiliation produced by Blast: e-value, percentage of identity, percentage of coverage and alignment length. | |
182 | |
183 -Contamination: phiX a control added in Illumina sequencing technologies. | |
184 | |
185 | |
186 | |
187 .. class:: infomark page-header h2 | |
188 | |
189 Inputs/outputs | |
190 | |
191 | |
192 .. class:: h3 | |
193 | |
194 Inputs | |
195 | |
196 **Sequence file**: | |
197 | |
198 The sequences (format `FASTA <https://en.wikipedia.org/wiki/FASTA_format>`_). | |
199 | |
200 **Abundance file**: | |
201 | |
202 The abundance of each OTU in each sample (format `BIOM <http://biom-format.org/>`_). | |
203 | |
204 | |
205 .. class:: h3 | |
206 | |
207 Outputs | |
208 | |
209 **Sequence file** (sequences.fasta): | |
210 | |
211 The sequences after filtering (format `FASTA <https://en.wikipedia.org/wiki/FASTA_format>`_). | |
212 | |
213 **Abundance file** (abundance.biom): | |
214 | |
215 The abundance after filtering (format `BIOM <http://biom-format.org/>`_). | |
216 | |
217 **Excluded file** (excluded.txt): | |
218 | |
219 The list of the OTUs deleted by filters (format `TSV <https://en.wikipedia.org/wiki/Tab-separated_values>`_). | |
220 | |
221 **Summary file** (report.html): | |
222 | |
223 The filters and the number of removed sequences (format `HTML <https://en.wikipedia.org/wiki/HTML>`_). | |
224 | |
225 | |
226 | |
227 .. class:: infomark page-header h2 | |
228 | |
229 How it works | |
230 | |
231 | |
232 | |
233 | |
234 The OTUs kept are the ones that satisfy into the BIOM input file the thresholds specified by the user. | |
235 | |
236 The BIOM abundance table and the fasta file are written again according to the OTUs kept. | |
237 | |
238 The OTUs discarded are listed in the excluded file. | |
239 | |
240 .. csv-table:: | |
241 :header: "Steps", "description" | |
242 :widths: 5, 150 | |
243 :class: table table-striped | |
244 | |
245 "1", "Except the filter to select the n most abundant OTUs, all the selected filters are run independently. For each filters an list of the OTUs to remove is generated." | |
246 "2", "All the OTUs tagged to remove by at least one filter are removed." | |
247 "3", "If the filter to select the N most abundant OTUs is filled it is applied." | |
248 | |
249 | |
250 | |
251 .. class:: infomark page-header h2 | |
252 | |
253 Advices | |
254 | |
255 Please check that the input fasta file and the input BIOM file correspond to the same OTUs. | |
256 | |
257 Examples for the filters on abundance and occurence of the OTUs : | |
258 | |
259 -To keep the filters that are present in 5 samples, fill the corresponding field with "5". | |
260 | |
261 -To display the 20 biggest OTU, fill the corresponding field with "20". | |
262 | |
263 -To filter on abundance, we advise you to specify 0.005%. It seems to be the optimal threshold (`Bokulich *et al*, 2013 <http://www.nature.com/nmeth/journal/v10/n1/abs/nmeth.2276.html>`_ ). | |
264 | |
265 If you use "FROGS Filters" before "FROGS Affiliation", the filters on RDP and Blast are useless. | |
266 | |
267 ---- | |
268 | |
269 **Contact** | |
270 | |
271 Contacts: frogs@inra.fr | |
272 | |
273 Repository: https://github.com/geraldinepascal/FROGS | |
274 website: http://frogs.toulouse.inra.fr/ | |
275 | |
276 Please cite the **FROGS article**: *Escudie F., et al. Bioinformatics, 2018. FROGS: Find, Rapidly, OTUs with Galaxy Solution.* | |
277 | |
278 </help> | |
279 <citations> | |
280 <citation type="doi">10.1093/bioinformatics/btx791</citation> | |
281 </citations> | |
282 </tool> |