diff filters.xml @ 0:76c750c5f0d1 draft default tip

planemo upload for repository https://github.com/oinizan/FROGS-wrappers commit 0b900a51e220ce6f17c1e76292c06a5f4d934055-dirty
author frogs
date Thu, 25 Oct 2018 05:01:13 -0400
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filters.xml	Thu Oct 25 05:01:13 2018 -0400
@@ -0,0 +1,285 @@
+<?xml version="1.0"?>
+# Copyright (C) 2015 INRA
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# GNU General Public License for more details.
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+<tool id="FROGS_filters" name="FROGS Filters" version="1.3.0">
+	<description>Filters OTUs on several criteria.</description>
+        <requirements>
+                <requirement type="package" version="2.0.1">frogs</requirement>
+        </requirements>
+        <stdio> 
+                <exit_code range="1:" />
+                <exit_code range=":-1" />
+        </stdio>
+	<command>
+		filters.py
+			--nb-cpus $nb_cpu 
+			--input-biom $input_biom  
+			--input-fasta $input_fasta
+			--output-fasta $output_fasta
+			--output-biom $output_biom
+			--excluded $output_excluded
+			--summary $output_summary
+			#if $contaminations_filter.contaminations_filter_isApplied == "yes"
+				--contaminant $contaminations_filter.contaminants_db
+			#end if	      
+			#if $abundance_filters.abundance_filters_areApplied == "yes"
+				#if $abundance_filters.min_sample_presence
+					--min-sample-presence $abundance_filters.min_sample_presence
+				#end if
+				#if $abundance_filters.min_abundance
+					--min-abundance $abundance_filters.min_abundance
+				#end if
+				#if $abundance_filters.nb_biggest_otu
+					--nb-biggest-otu $abundance_filters.nb_biggest_otu
+				#end if
+			#end if
+			#if $RDP_filters.RDP_filters_areApplied == "yes"
+				--min-rdp-bootstrap $RDP_filters.rdp_rank:$RDP_filters.rdp_bootstrap
+			#end if
+			#if $blast_filters.blast_filters_areApplied == "yes"
+				#if $blast_filters.min_blast_length
+					--min-blast-length $blast_filters.min_blast_length
+				#end if
+				#if $blast_filters.max_blast_evalue
+					--max-blast-evalue $blast_filters.max_blast_evalue
+				#end if
+				#if $blast_filters.min_blast_identity
+					--min-blast-identity $blast_filters.min_blast_identity
+				#end if
+				#if $blast_filters.min_blast_coverage
+					--min-blast-coverage $blast_filters.min_blast_coverage
+				#end if
+			#end if
+	</command>
+	<inputs>
+		<!-- Files -->
+		<param format="fasta" name="input_fasta" type="data" label="Sequences file" help="The sequence file to filter (format: fasta)." />
+		<param format="biom1" name="input_biom" type="data" label="Abundance file" help="The abundance file to filter (format: BIOM)." />
+   		<!-- Parameters -->
+		<param name="nb_cpu" type="hidden" label="CPU number" help="The maximum number of CPUs used." value="1"></param>
+		<conditional name="abundance_filters">
+			<param name="abundance_filters_areApplied" type="select" label=" *** THE FILTERS ON OTUS IN SAMPLES, OTUS SIZE and SEQUENCE PERCENTAGE" help="If you want to filter OTUs on their abundance and occurrence." >
+				<option value="no">No filters</option>
+				<option value="yes">Apply filters</option>
+			</param>
+			<when value="no"></when>
+			<when value="yes">
+				<param name="min_sample_presence" type="integer" optional="true" label="Minimum number of samples" size="5" help="Fill the field only if you want this treatment. Keep OTU present in at least this number of samples." />
+				<param name="min_abundance" type="text" optional="true" label="Minimum proportion/number of sequences to keep OTU" size="5" help="Fill the field only if you want this treatment. Use decimal notation for proportion (example: 0.01 for keep OTU with at least 1% of all sequences) ; Use integer notation for number of sequence (example: 2 for keep OTU with at least 2 sequences, so remove single singleton)." />
+				<param name="nb_biggest_otu" type="integer" optional="true" label="N biggest OTU" size="5" help="Fill the fields only if you want this treatment. Keep the N biggest OTU." /> 
+			</when>
+		</conditional>
+		<conditional name="RDP_filters">
+			<param name="RDP_filters_areApplied" type="select" label=" *** THE FILTERS ON RDP" help="If you want to filter OTUs on their taxonomic affiliation produced by RDP.">
+				<option value="no">No filters</option>
+				<option value="yes">Apply filters</option>
+			</param>
+			<when value="no"></when>
+			<when value="yes">
+				<param name="rdp_rank" type="select" label="Rank with the bootstrap filter" optional="true">
+					<option value="Domain">Domain</option>
+					<option value="Phylum">Phylum</option>
+					<option value="Class">Class</option>
+					<option value="Order">Order</option>
+					<option value="Family">Family</option>
+					<option value="Genus">Genus</option>
+					<option value="Species">Species</option>
+   				</param>
+				<param name="rdp_bootstrap" type="float" min="0.0" max="1.0" label="Minimum bootstrap % (between 0 and 1)" size="5" optional="true" />
+			</when>
+		</conditional>
+		<conditional name="blast_filters">
+			<param name="blast_filters_areApplied" type="select" label=" *** THE FILTERS ON BLAST" help="If you want to filter OTUs on their taxonomic affiliation produced by Blast.">
+				<option value="no">No filters</option>
+				<option value="yes">Apply filters</option>
+			</param>
+			<when value="no"></when>
+			<when value="yes">
+				<param name="max_blast_evalue" type="text" value="" label="Maximum e-value (between 0 and 1)" size="5" help="Fill the field only if you want this treatment" /> 
+				<param name="min_blast_identity" type="float" min="0.0" max="1.0" optional="true" label="Minimum identity % (between 0 and 1)" size="5" help="Fill the field only if you want this treatment" />
+				<param name="min_blast_coverage" type="float" min="0.0" max="1.0" optional="true" label="Minimum coverage % (between 0 and 1)" size="5" help="Fill the field only if you want this treatment" />
+				<param name="min_blast_length" type="integer" optional="true" label="Minimum alignment length" size="5" help="Fill the field only if you want this treatment" />
+			</when>
+		</conditional>
+		<conditional name="contaminations_filter">
+			<param name="contaminations_filter_isApplied" type="select" label=" *** THE FILTERS ON CONTAMINATIONS" help="If you want to filter OTUs on classical contaminations.">
+				<option value="no">No filters</option>
+				<option value="yes">Apply filters</option>
+			</param>
+			<when value="no"></when>
+			<when value="yes">
+				<param name="contaminants_db" type="select" label="Cotaminant databank" help="The phiX databank (the phiX is a control added in Illumina sequencing technologies).">
+		       		<options from_file="phiX_db.loc">
+		          		<column name="name" index="0"/>
+		          		<column name="value" index="1"/>
+		       		</options>
+		   		</param>
+			</when>
+		</conditional>
+	</inputs>
+	<outputs>
+		<data format="fasta" name="output_fasta" label="${tool.name}: sequences.fasta" from_work_dir="sequences.fasta" />
+		<data format="biom1" name="output_biom" label="${tool.name}: abundance.biom" from_work_dir="abundance.biom" />
+		<data format="tabular" name="output_excluded" label="${tool.name}: excluded.tsv" from_work_dir="excluded.tsv" />
+		<data format="html" name="output_summary" label="${tool.name}: report.html" from_work_dir="report.html" />
+	</outputs>
+	<tests>
+		<test>
+	                <param name="input_fasta" value="references/03-chimera.fasta" />
+	                <param name="input_biom" value="references/03-chimera.biom" />    
+			<conditional name="abundance_filters">
+	                        <param name="abundance_filters_areApplied" value="yes" />
+				<param name="min_abundance" value="0.00005" />	
+				<param name="min_sample_presence" value="3" />
+			</conditional>
+			<output name="output_fasta" file="references/04-filters.fasta" />
+			<output name="output_excluded" file="references/04-filters.excluded" />
+	        </test>
+	</tests>
+	<help>
+.. image:: static/images/FROGS_logo.png
+   :height: 144
+   :width: 110
+.. class:: infomark page-header h2
+What it does
+Filter the OTUs of an abundance table according :
+ -The abundance and the occurence of OTUs: presence in samples, OTU size and maximum number of OTUs.
+ -The taxonomic affiliation produced by RDP: rank and bootstrap.
+ -The taxonomic affiliation produced by Blast: e-value, percentage of identity, percentage of coverage and alignment length.
+ -Contamination: phiX a control added in Illumina sequencing technologies.
+.. class:: infomark page-header h2
+.. class:: h3
+**Sequence file**:
+The sequences (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_).
+**Abundance file**:
+The abundance of each OTU in each sample (format `BIOM &lt;http://biom-format.org/&gt;`_).
+.. class:: h3
+**Sequence file** (sequences.fasta):
+ The sequences after filtering (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_).
+**Abundance file** (abundance.biom):
+ The abundance after filtering (format `BIOM &lt;http://biom-format.org/&gt;`_).
+**Excluded file** (excluded.txt):
+ The list of the OTUs deleted by filters (format `TSV &lt;https://en.wikipedia.org/wiki/Tab-separated_values&gt;`_).
+**Summary file** (report.html):
+ The filters and the number of removed sequences (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_).
+.. class:: infomark page-header h2
+How it works
+The OTUs kept are the ones that satisfy into the BIOM input file the thresholds specified by the user.
+The BIOM abundance table and the fasta file are written again according to the OTUs kept.
+The OTUs discarded are listed in the excluded file.
+.. csv-table:: 
+   :header: "Steps", "description"
+   :widths: 5, 150
+   :class: table table-striped
+   "1", "Except the filter to select the n most abundant OTUs, all the selected filters are run independently. For each filters an list of the OTUs to remove is generated."
+   "2", "All the OTUs tagged to remove by at least one filter are removed."
+   "3", "If the filter to select the N most abundant OTUs is filled it is applied."
+.. class:: infomark page-header h2
+Please check that the input fasta file and the input BIOM file correspond to the same OTUs.
+Examples for the filters on abundance and occurence of the OTUs : 
+-To keep the filters that are present in 5 samples, fill the corresponding field with "5".
+-To display the 20 biggest OTU, fill the corresponding field with "20".
+-To filter on abundance, we advise you to specify 0.005%. It seems to be the optimal threshold (`Bokulich *et al*, 2013 &lt;http://www.nature.com/nmeth/journal/v10/n1/abs/nmeth.2276.html&gt;`_ ).
+If you use "FROGS Filters" before "FROGS Affiliation", the filters on RDP and Blast are useless.
+Contacts: frogs@inra.fr
+Repository: https://github.com/geraldinepascal/FROGS
+Please cite the FROGS Publication: *Escudie F., Auer L., Bernard M., Cauquil L., Vidal K., Maman S., Mariadassou M., Hernadez-Raquet G., Pascal G., 2015. FROGS: Find Rapidly OTU with Galaxy Solution. In: The environmental genomic Conference, Montpellier, France,* http://bioinfo.genotoul.fr/fileadmin/user_upload/FROGS_2015_GE_Montpellier_poster.pdf
+Depending on the help provided you can cite us in acknowledgements, references or both.
+	</help> 
+        <citations>
+                <citation type="doi">10.1093/bioinformatics/btx791</citation>
+                <citation type="doi">10.1128/AEM.01043-13</citation>
+                <citation type="doi">10.14806/ej.17.1.200</citation>
+                <citation type="doi">10.1093/bioinformatics/btr507</citation>
+        </citations>