Mercurial > repos > frogs > frogs_3_1_0

diff filters.xml @ 0:59bc96331073 draft default tip
planemo upload for repository https://github.com/geraldinepascal/FROGS-wrappers/tree/v3.1.0 commit 08296fc88e3e938c482c631bd515b3b7a0499647
author: frogs
date: Thu, 28 Feb 2019 10:14:49 -0500
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filters.xml	Thu Feb 28 10:14:49 2019 -0500
@@ -0,0 +1,282 @@
+<?xml version="1.0"?>
+<!--
+# Copyright (C) 2015 INRA
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+-->
+<tool id="FROGS_filters" name="FROGS Filters" version="3.1">
+	<description>Filters OTUs on several criteria.</description>
+        <requirements>
+                <requirement type="package" version="3.1.0">frogs</requirement>
+        </requirements>
+        <stdio> 
+                <exit_code range="1:" />
+                <exit_code range=":-1" />
+        </stdio>
+
+	<command>
+
+		filters.py
+			--nb-cpus \${GALAXY_SLOTS:-1} 
+			--input-biom $input_biom  
+			--input-fasta $input_fasta
+			--output-fasta $output_fasta
+			--output-biom $output_biom
+			--excluded $output_excluded
+			--summary $output_summary
+			#if $contaminations_filter.contaminations_filter_isApplied == "yes"
+                		#set $contaminants_filename = str( $contaminations_filter.contaminants_db.fields.path )
+				--contaminant $contaminants_filename
+			#end if	      
+			
+			#if $abundance_filters.abundance_filters_areApplied == "yes"
+				#if $abundance_filters.min_sample_presence
+					--min-sample-presence $abundance_filters.min_sample_presence
+				#end if
+				#if $abundance_filters.min_abundance
+					--min-abundance $abundance_filters.min_abundance
+				#end if
+				#if $abundance_filters.nb_biggest_otu
+					--nb-biggest-otu $abundance_filters.nb_biggest_otu
+				#end if
+			#end if
+
+			#if $RDP_filters.RDP_filters_areApplied == "yes"
+				--min-rdp-bootstrap $RDP_filters.rdp_rank:$RDP_filters.rdp_bootstrap
+			#end if
+		
+			#if $blast_filters.blast_filters_areApplied == "yes"
+				#if $blast_filters.min_blast_length
+					--min-blast-length $blast_filters.min_blast_length
+				#end if
+				#if $blast_filters.max_blast_evalue
+					--max-blast-evalue $blast_filters.max_blast_evalue
+				#end if
+				#if $blast_filters.min_blast_identity
+					--min-blast-identity $blast_filters.min_blast_identity
+				#end if
+				#if $blast_filters.min_blast_coverage
+					--min-blast-coverage $blast_filters.min_blast_coverage
+				#end if
+			#end if
+	</command>
+	<inputs>
+		<!-- Files -->
+		<param format="fasta" name="input_fasta" type="data" label="Sequences file" help="The sequence file to filter (format: fasta)." />
+		<param format="biom1" name="input_biom" type="data" label="Abundance file" help="The abundance file to filter (format: BIOM)." />
+		
+		<conditional name="abundance_filters">
+			<param name="abundance_filters_areApplied" type="select" label=" *** THE FILTERS ON OTUS IN SAMPLES, OTUS SIZE and SEQUENCE PERCENTAGE" help="If you want to filter OTUs on their abundance and occurrence." >
+				<option value="no">No filters</option>
+				<option value="yes">Apply filters</option>
+			</param>
+			<when value="no"></when>
+			<when value="yes">
+				<param name="min_sample_presence" type="integer" optional="true" label="Minimum number of samples" size="5" help="Fill the field only if you want this treatment. Keep OTU present in at least this number of samples." />
+				<param name="min_abundance" type="text" optional="true" label="Minimum proportion/number of sequences to keep OTU" size="5" help="Fill the field only if you want this treatment. Use decimal notation for proportion (example: 0.01 for keep OTU with at least 1% of all sequences) ; Use integer notation for number of sequence (example: 2 for keep OTU with at least 2 sequences, so remove single singleton)." />
+				<param name="nb_biggest_otu" type="integer" optional="true" label="N biggest OTU" size="5" help="Fill the fields only if you want this treatment. Keep the N biggest OTU." /> 
+			</when>
+		</conditional>
+
+		<conditional name="RDP_filters">
+			<param name="RDP_filters_areApplied" type="select" label=" *** THE FILTERS ON RDP" help="If you want to filter OTUs on their taxonomic affiliation produced by RDP.">
+				<option value="no">No filters</option>
+				<option value="yes">Apply filters</option>
+			</param>
+			<when value="no"></when>
+			<when value="yes">
+				<param name="rdp_rank" type="select" label="Rank with the bootstrap filter" optional="true">
+					<option value="Domain">Domain</option>
+					<option value="Phylum">Phylum</option>
+					<option value="Class">Class</option>
+					<option value="Order">Order</option>
+					<option value="Family">Family</option>
+					<option value="Genus">Genus</option>
+					<option value="Species">Species</option>
+   				</param>
+				<param name="rdp_bootstrap" type="float" min="0.0" max="1.0" label="Minimum bootstrap % (between 0 and 1)" size="5" optional="true" />
+			</when>
+		</conditional>
+
+		<conditional name="blast_filters">
+			<param name="blast_filters_areApplied" type="select" label=" *** THE FILTERS ON BLAST" help="If you want to filter OTUs on their taxonomic affiliation produced by Blast.">
+				<option value="no">No filters</option>
+				<option value="yes">Apply filters</option>
+			</param>
+			<when value="no"></when>
+			<when value="yes">
+				<param name="max_blast_evalue" type="text" value="" label="Maximum e-value (between 0 and 1)" size="5" help="Fill the field only if you want this treatment" /> 
+				<param name="min_blast_identity" type="float" min="0.0" max="1.0" optional="true" label="Minimum identity % (between 0 and 1)" size="5" help="Fill the field only if you want this treatment" />
+				<param name="min_blast_coverage" type="float" min="0.0" max="1.0" optional="true" label="Minimum coverage % (between 0 and 1)" size="5" help="Fill the field only if you want this treatment" />
+				<param name="min_blast_length" type="integer" optional="true" label="Minimum alignment length" size="5" help="Fill the field only if you want this treatment" />
+			</when>
+		</conditional>
+		
+		<conditional name="contaminations_filter">
+			<param name="contaminations_filter_isApplied" type="select" label="Contaminant databank" help="The phiX databank (the phiX is a control added in Illumina sequencing technologies).">
+				<option value="no">No filters</option>
+				<option value="yes">Apply filters</option>
+			</param>
+			<when value="no"></when>
+			<when value="yes">
+				<param name="contaminants_db" type="select" label="Contaminant databank" help="The phiX databank (the phiX is a control added in Illumina sequencing technologies).">
+					<options from_data_table="phiX_db"></options>
+                        		<validator type="no_options" message="A built-in database is not available" />
+		   		</param>
+			</when>
+		</conditional>
+	</inputs>
+	<outputs>
+		<data format="fasta" name="output_fasta" label="${tool.name}: sequences.fasta" from_work_dir="sequences.fasta" />
+		<data format="biom1" name="output_biom" label="${tool.name}: abundance.biom" from_work_dir="abundance.biom" />
+		<data format="tabular" name="output_excluded" label="${tool.name}: excluded.tsv" from_work_dir="excluded.tsv" />
+		<data format="html" name="output_summary" label="${tool.name}: report.html" from_work_dir="report.html" />
+	</outputs>
+	<tests>
+		<test>
+	        <param name="input_fasta" value="references/03-chimera.fasta" />
+            <param name="input_biom" value="references/03-chimera.biom" />    
+		    <conditional name="abundance_filters">
+	     	  	<param name="abundance_filters_areApplied" value="yes" />
+				<param name="min_abundance" value="0.00005" />	
+				<param name="min_sample_presence" value="3" />
+		    </conditional>
+		    <conditional name="contaminations_filter">
+	            <param name="contaminations_filter_isApplied" value="yes" />
+				<param name="contaminants_db" value="phiX_test" />	
+		    </conditional>
+	 	    <output name="output_fasta" file="references/04-filters.fasta" />
+		    <output name="output_excluded" file="references/04-filters.excluded" />
+	    </test>
+	</tests>
+	<help>
+
+.. image:: static/images/frogs_images/FROGS_logo.png
+   :height: 144
+   :width: 110
+
+
+
+.. class:: infomark page-header h2
+
+What it does
+
+Filter the OTUs of an abundance table according :
+
+ -The abundance and the occurence of OTUs: presence in samples, OTU size and maximum number of OTUs.
+
+ -The taxonomic affiliation produced by RDP: rank and bootstrap.
+
+ -The taxonomic affiliation produced by Blast: e-value, percentage of identity, percentage of coverage and alignment length.
+ 
+ -Contamination: phiX a control added in Illumina sequencing technologies.
+
+
+
+.. class:: infomark page-header h2
+
+Inputs/outputs
+
+
+.. class:: h3
+
+Inputs
+
+**Sequence file**:
+
+The sequences (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_).
+
+**Abundance file**:
+
+The abundance of each OTU in each sample (format `BIOM &lt;http://biom-format.org/&gt;`_).
+
+
+.. class:: h3
+
+Outputs
+
+**Sequence file** (sequences.fasta):
+
+ The sequences after filtering (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_).
+
+**Abundance file** (abundance.biom):
+
+ The abundance after filtering (format `BIOM &lt;http://biom-format.org/&gt;`_).
+
+**Excluded file** (excluded.txt):
+
+ The list of the OTUs deleted by filters (format `TSV &lt;https://en.wikipedia.org/wiki/Tab-separated_values&gt;`_).
+
+**Summary file** (report.html):
+
+ The filters and the number of removed sequences (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_).
+
+
+
+.. class:: infomark page-header h2
+
+How it works
+
+
+
+
+The OTUs kept are the ones that satisfy into the BIOM input file the thresholds specified by the user.
+
+The BIOM abundance table and the fasta file are written again according to the OTUs kept.
+
+The OTUs discarded are listed in the excluded file.
+
+.. csv-table:: 
+   :header: "Steps", "description"
+   :widths: 5, 150
+   :class: table table-striped
+
+   "1", "Except the filter to select the n most abundant OTUs, all the selected filters are run independently. For each filters an list of the OTUs to remove is generated."
+   "2", "All the OTUs tagged to remove by at least one filter are removed."
+   "3", "If the filter to select the N most abundant OTUs is filled it is applied."
+
+
+
+.. class:: infomark page-header h2
+
+Advices
+
+Please check that the input fasta file and the input BIOM file correspond to the same OTUs.
+
+Examples for the filters on abundance and occurence of the OTUs : 
+
+-To keep the filters that are present in 5 samples, fill the corresponding field with "5".
+
+-To display the 20 biggest OTU, fill the corresponding field with "20".
+
+-To filter on abundance, we advise you to specify 0.005%. It seems to be the optimal threshold (`Bokulich *et al*, 2013 &lt;http://www.nature.com/nmeth/journal/v10/n1/abs/nmeth.2276.html&gt;`_ ).
+
+If you use "FROGS Filters" before "FROGS Affiliation", the filters on RDP and Blast are useless.
+
+----
+
+**Contact**
+
+Contacts: frogs@inra.fr
+
+Repository: https://github.com/geraldinepascal/FROGS
+website: http://frogs.toulouse.inra.fr/
+
+Please cite the **FROGS article**: *Escudie F., et al. Bioinformatics, 2018. FROGS: Find, Rapidly, OTUs with Galaxy Solution.*
+
+	</help>
+	<citations>
+		<citation type="doi">10.1093/bioinformatics/btx791</citation>
+	</citations>
+</tool>
author	frogs
date	Thu, 28 Feb 2019 10:14:49 -0500
parents
children