Mercurial > repos > frogs > frogs_2_0_0
diff filters.xml @ 0:76c750c5f0d1 draft default tip
planemo upload for repository https://github.com/oinizan/FROGS-wrappers commit 0b900a51e220ce6f17c1e76292c06a5f4d934055-dirty
author | frogs |
---|---|
date | Thu, 25 Oct 2018 05:01:13 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filters.xml Thu Oct 25 05:01:13 2018 -0400 @@ -0,0 +1,285 @@ +<?xml version="1.0"?> +<!-- +# Copyright (C) 2015 INRA +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +--> +<tool id="FROGS_filters" name="FROGS Filters" version="1.3.0"> + <description>Filters OTUs on several criteria.</description> + <requirements> + <requirement type="package" version="2.0.1">frogs</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + <exit_code range=":-1" /> + </stdio> + + <command> + filters.py + --nb-cpus $nb_cpu + --input-biom $input_biom + --input-fasta $input_fasta + --output-fasta $output_fasta + --output-biom $output_biom + --excluded $output_excluded + --summary $output_summary + #if $contaminations_filter.contaminations_filter_isApplied == "yes" + --contaminant $contaminations_filter.contaminants_db + #end if + + #if $abundance_filters.abundance_filters_areApplied == "yes" + #if $abundance_filters.min_sample_presence + --min-sample-presence $abundance_filters.min_sample_presence + #end if + #if $abundance_filters.min_abundance + --min-abundance $abundance_filters.min_abundance + #end if + #if $abundance_filters.nb_biggest_otu + --nb-biggest-otu $abundance_filters.nb_biggest_otu + #end if + #end if + + #if $RDP_filters.RDP_filters_areApplied == "yes" + --min-rdp-bootstrap $RDP_filters.rdp_rank:$RDP_filters.rdp_bootstrap + #end if + + #if $blast_filters.blast_filters_areApplied == "yes" + #if $blast_filters.min_blast_length + --min-blast-length $blast_filters.min_blast_length + #end if + #if $blast_filters.max_blast_evalue + --max-blast-evalue $blast_filters.max_blast_evalue + #end if + #if $blast_filters.min_blast_identity + --min-blast-identity $blast_filters.min_blast_identity + #end if + #if $blast_filters.min_blast_coverage + --min-blast-coverage $blast_filters.min_blast_coverage + #end if + #end if + </command> + <inputs> + <!-- Files --> + <param format="fasta" name="input_fasta" type="data" label="Sequences file" help="The sequence file to filter (format: fasta)." /> + <param format="biom1" name="input_biom" type="data" label="Abundance file" help="The abundance file to filter (format: BIOM)." /> + + <!-- Parameters --> + <param name="nb_cpu" type="hidden" label="CPU number" help="The maximum number of CPUs used." value="1"></param> + + <conditional name="abundance_filters"> + <param name="abundance_filters_areApplied" type="select" label=" *** THE FILTERS ON OTUS IN SAMPLES, OTUS SIZE and SEQUENCE PERCENTAGE" help="If you want to filter OTUs on their abundance and occurrence." > + <option value="no">No filters</option> + <option value="yes">Apply filters</option> + </param> + <when value="no"></when> + <when value="yes"> + <param name="min_sample_presence" type="integer" optional="true" label="Minimum number of samples" size="5" help="Fill the field only if you want this treatment. Keep OTU present in at least this number of samples." /> + <param name="min_abundance" type="text" optional="true" label="Minimum proportion/number of sequences to keep OTU" size="5" help="Fill the field only if you want this treatment. Use decimal notation for proportion (example: 0.01 for keep OTU with at least 1% of all sequences) ; Use integer notation for number of sequence (example: 2 for keep OTU with at least 2 sequences, so remove single singleton)." /> + <param name="nb_biggest_otu" type="integer" optional="true" label="N biggest OTU" size="5" help="Fill the fields only if you want this treatment. Keep the N biggest OTU." /> + </when> + </conditional> + + <conditional name="RDP_filters"> + <param name="RDP_filters_areApplied" type="select" label=" *** THE FILTERS ON RDP" help="If you want to filter OTUs on their taxonomic affiliation produced by RDP."> + <option value="no">No filters</option> + <option value="yes">Apply filters</option> + </param> + <when value="no"></when> + <when value="yes"> + <param name="rdp_rank" type="select" label="Rank with the bootstrap filter" optional="true"> + <option value="Domain">Domain</option> + <option value="Phylum">Phylum</option> + <option value="Class">Class</option> + <option value="Order">Order</option> + <option value="Family">Family</option> + <option value="Genus">Genus</option> + <option value="Species">Species</option> + </param> + <param name="rdp_bootstrap" type="float" min="0.0" max="1.0" label="Minimum bootstrap % (between 0 and 1)" size="5" optional="true" /> + </when> + </conditional> + + <conditional name="blast_filters"> + <param name="blast_filters_areApplied" type="select" label=" *** THE FILTERS ON BLAST" help="If you want to filter OTUs on their taxonomic affiliation produced by Blast."> + <option value="no">No filters</option> + <option value="yes">Apply filters</option> + </param> + <when value="no"></when> + <when value="yes"> + <param name="max_blast_evalue" type="text" value="" label="Maximum e-value (between 0 and 1)" size="5" help="Fill the field only if you want this treatment" /> + <param name="min_blast_identity" type="float" min="0.0" max="1.0" optional="true" label="Minimum identity % (between 0 and 1)" size="5" help="Fill the field only if you want this treatment" /> + <param name="min_blast_coverage" type="float" min="0.0" max="1.0" optional="true" label="Minimum coverage % (between 0 and 1)" size="5" help="Fill the field only if you want this treatment" /> + <param name="min_blast_length" type="integer" optional="true" label="Minimum alignment length" size="5" help="Fill the field only if you want this treatment" /> + </when> + </conditional> + + <conditional name="contaminations_filter"> + <param name="contaminations_filter_isApplied" type="select" label=" *** THE FILTERS ON CONTAMINATIONS" help="If you want to filter OTUs on classical contaminations."> + <option value="no">No filters</option> + <option value="yes">Apply filters</option> + </param> + <when value="no"></when> + <when value="yes"> + <param name="contaminants_db" type="select" label="Cotaminant databank" help="The phiX databank (the phiX is a control added in Illumina sequencing technologies)."> + <options from_file="phiX_db.loc"> + <column name="name" index="0"/> + <column name="value" index="1"/> + </options> + </param> + </when> + </conditional> + </inputs> + <outputs> + <data format="fasta" name="output_fasta" label="${tool.name}: sequences.fasta" from_work_dir="sequences.fasta" /> + <data format="biom1" name="output_biom" label="${tool.name}: abundance.biom" from_work_dir="abundance.biom" /> + <data format="tabular" name="output_excluded" label="${tool.name}: excluded.tsv" from_work_dir="excluded.tsv" /> + <data format="html" name="output_summary" label="${tool.name}: report.html" from_work_dir="report.html" /> + </outputs> + <tests> + <test> + <param name="input_fasta" value="references/03-chimera.fasta" /> + <param name="input_biom" value="references/03-chimera.biom" /> + <conditional name="abundance_filters"> + <param name="abundance_filters_areApplied" value="yes" /> + <param name="min_abundance" value="0.00005" /> + <param name="min_sample_presence" value="3" /> + </conditional> + <output name="output_fasta" file="references/04-filters.fasta" /> + <output name="output_excluded" file="references/04-filters.excluded" /> + </test> + </tests> + <help> + +.. image:: static/images/FROGS_logo.png + :height: 144 + :width: 110 + + + +.. class:: infomark page-header h2 + +What it does + +Filter the OTUs of an abundance table according : + + -The abundance and the occurence of OTUs: presence in samples, OTU size and maximum number of OTUs. + + -The taxonomic affiliation produced by RDP: rank and bootstrap. + + -The taxonomic affiliation produced by Blast: e-value, percentage of identity, percentage of coverage and alignment length. + + -Contamination: phiX a control added in Illumina sequencing technologies. + + + +.. class:: infomark page-header h2 + +Inputs/outputs + + +.. class:: h3 + +Inputs + +**Sequence file**: + +The sequences (format `FASTA <https://en.wikipedia.org/wiki/FASTA_format>`_). + +**Abundance file**: + +The abundance of each OTU in each sample (format `BIOM <http://biom-format.org/>`_). + + +.. class:: h3 + +Outputs + +**Sequence file** (sequences.fasta): + + The sequences after filtering (format `FASTA <https://en.wikipedia.org/wiki/FASTA_format>`_). + +**Abundance file** (abundance.biom): + + The abundance after filtering (format `BIOM <http://biom-format.org/>`_). + +**Excluded file** (excluded.txt): + + The list of the OTUs deleted by filters (format `TSV <https://en.wikipedia.org/wiki/Tab-separated_values>`_). + +**Summary file** (report.html): + + The filters and the number of removed sequences (format `HTML <https://en.wikipedia.org/wiki/HTML>`_). + + + +.. class:: infomark page-header h2 + +How it works + + + + +The OTUs kept are the ones that satisfy into the BIOM input file the thresholds specified by the user. + +The BIOM abundance table and the fasta file are written again according to the OTUs kept. + +The OTUs discarded are listed in the excluded file. + +.. csv-table:: + :header: "Steps", "description" + :widths: 5, 150 + :class: table table-striped + + "1", "Except the filter to select the n most abundant OTUs, all the selected filters are run independently. For each filters an list of the OTUs to remove is generated." + "2", "All the OTUs tagged to remove by at least one filter are removed." + "3", "If the filter to select the N most abundant OTUs is filled it is applied." + + + +.. class:: infomark page-header h2 + +Advices + +Please check that the input fasta file and the input BIOM file correspond to the same OTUs. + +Examples for the filters on abundance and occurence of the OTUs : + +-To keep the filters that are present in 5 samples, fill the corresponding field with "5". + +-To display the 20 biggest OTU, fill the corresponding field with "20". + +-To filter on abundance, we advise you to specify 0.005%. It seems to be the optimal threshold (`Bokulich *et al*, 2013 <http://www.nature.com/nmeth/journal/v10/n1/abs/nmeth.2276.html>`_ ). + +If you use "FROGS Filters" before "FROGS Affiliation", the filters on RDP and Blast are useless. + +---- + +**Contact** + +Contacts: frogs@inra.fr + +Repository: https://github.com/geraldinepascal/FROGS + +Please cite the FROGS Publication: *Escudie F., Auer L., Bernard M., Cauquil L., Vidal K., Maman S., Mariadassou M., Hernadez-Raquet G., Pascal G., 2015. FROGS: Find Rapidly OTU with Galaxy Solution. In: The environmental genomic Conference, Montpellier, France,* http://bioinfo.genotoul.fr/fileadmin/user_upload/FROGS_2015_GE_Montpellier_poster.pdf + +Depending on the help provided you can cite us in acknowledgements, references or both. + </help> + <citations> + <citation type="doi">10.1093/bioinformatics/btx791</citation> + <citation type="doi">10.1128/AEM.01043-13</citation> + <citation type="doi">10.14806/ej.17.1.200</citation> + <citation type="doi">10.1093/bioinformatics/btr507</citation> + </citations> + +</tool>