sortmerna: sortmerna_wrapper.xml comparison

comparison sortmerna_wrapper.xml @ 0:2e7f0da431e3 draft default tip

Uploaded version 1.0

author	bonsai
date	Tue, 30 Apr 2013 13:12:35 -0400
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:2e7f0da431e3
+<?xml version="1.0" encoding="utf-8"?>
+<tool id="sortmerna_wrapper" version="1.0" name="Filter with SortMeRNA">
+<requirements>
+<requirement type='package' version="1.7">sortmerna</requirement>
+</requirements>
+<description>Fast and accurate filtering of ribosomal RNAs in metatranscriptomic data</description>
+<command interpreter="python">
+sortmerna_wrapper.py
+--sortmerna "
+$strand_search
+#if str( $read_family.read_family_selector ) == 'other':
+--I $input_reads -r $read_family.ratio_parameter
+#else:
+	$read_family.read_family_selector $input_reads
+#end if
+#if str( $sequencing_type.sequencing_type_selector ) == 'paired':
+$sequencing_type.paired_type
+#end if
+#if $outputs_selected:
+#if 'accept' in $outputs_selected.value:
+	--accept accept_file
+#end if
+#if 'other' in $outputs_selected.value:
+	--other other_file
+#end if
+#end if
+$log
+#if str( $options.options_type_selector ) == 'more':
+	-a $options.number_of_threads
+#end if
+"
+#if str( $databases_type.databases_selector ) == 'history':
+--buildtrie
+#for $db in $databases_type.input_databases
+$db.database_name
+#end for
+#else:
+## databases path is not directly accessible, must match by hand with LOC file contents
+${' '.join([dict([(x[0], x[2]) for x in $databases_type.input_databases.input.options.tool_data_table.data])[y]
+for y in  $databases_type.input_databases.value])}
+#end if
+</command>
+<inputs>
+<conditional name="read_family">
+<param name="read_family_selector" type="select" format="text"
+	   help="The Illumina platform is more common for large scale metatranscriptomic projects requiring a high throughput.">
+	<label>Sequencing technology of querying sequences (reads)</label>
+	<option value="--I">Illumina Solexa</option>
+	<option value="--454">454 Roche</option>
+	<option value="other">Other</option>
+</param>
+<when value="other">
+	<param name="ratio_parameter" type="float" value="1"  min="0" max="1"
+	       label="Ratio parameter (the number of hits on the read / read length)"
+	       help="The ratio parameter for SortMeRNA has been set to r=0.25 for Illumina Solexa reads and to r=0.15 for 454 Roche reads.
+		     For other read types, if the sequencing technology produces high quality reads with a low substitution error rate
+		     (0.1 substitutions per 100 bases, such as Illumina), then the ratio parameter can be set to r=[0.23,0.27].
+		     If the sequencing technology has a high indel error rate (1-2 indels per 100 bases, such as 454 or Ion Torrent),
+		     then the ratio parameter can be set to r=[0.13,0.17]."/>
+</when>
+</conditional>
+<param format="fasta,fastq" name="input_reads" type="data" label="Querying sequences (reads)" help=""/>
+<conditional name="sequencing_type">
+<param name="sequencing_type_selector" type="select" label="Sequencing type">
+	<option value="not_paired">Reads are not paired</option>
+	<option value="paired">Reads are paired</option>
+</param>
+<when value="paired">
+	<param name="paired_type" type="select" label="If one read of a pair is accepted and the other not, output both reads" display="radio"
+	       help="SortMeRNA does not use the pairing information for filtering RNA,
+		     however if one read of a pair is accepted and the other is not,
+		     the resulting output may break apart the pair into two separate files.
+		     The purpose of 'Reads are paired' option is to preserve the pairing of the reads.">
+	  <option value="--paired-in">to accepted file</option>
+	  <option value="--paired-out">to rejected file</option>
+	</param>
+</when>
+</conditional>
+<param name="strand_search" type="select" label="Which strands to search" display="radio">
+<option value="">Search both strands</option>
+<option value="-F">Search only the forward strand</option>
+<option value="-R">Search only the reverse-complementary strand</option>
+</param>
+<conditional name="databases_type">
+<param name="databases_selector" type="select" label="Databases to query"
+	     help="Public rRNA databases provided with SortMeRNA have been indexed.
+		   On the contrary, personal databases must be indexed each time SortMeRNA is launched.
+		   Please be patient, this may take some time depending on the size of the given database.">
+	<option value="cached" selected="true">Public ribosomal databases</option>
+	<option value="history">Databases from your history</option>
+</param>
+<when value="cached">
+	<param name="input_databases" label="rRNA database"
+	       type="select" display="checkboxes" multiple="true">
+	  <options from_data_table="rRNA_databases" />
+	  <validator type="no_options" message="Select at least one database"/>
+	</param>
+</when>
+<when value="history">
+	<repeat name="input_databases" title="Database" min="1">
+	  <param name="database_name" type="data" format="fasta" label="rRNA database"
+		 help="Your database will be indexed first, which may take up to several minutes."/>
+	</repeat>
+</when>
+</conditional>
+<!-- Outputs -->
+<param name="outputs_selected" type="select" display="checkboxes" multiple="true" label="Output options">
+<option value="accept" selected="True">Reads matching to at least one database</option>
+<option value="other">Reads not found in any database</option>
+</param>
+<param  name="log" type="boolean" checked="False" truevalue="--log log_file" falsevalue="" label="Statistics file"
+	    help="Generates statistics for the rRNA content of reads, as well as rRNA subunit distribution.">
+</param>
+<!-- Advanced options -->
+<conditional name="options">
+<param name="options_type_selector" type="select" label="Advanced Options">
+	<option value="less" selected="True">Less options</option>
+	<option value="more">More options</option>
+</param>
+<when value="less">
+	<!-- no options -->
+</when>
+<when value="more">
+	<param name="number_of_threads" type="integer" label="Number of threads to use" value="1" min="1"/>
+</when>
+</conditional>
+</inputs>
+<outputs>
+<data format="input" format_source="input_reads" name="output_accept" from_work_dir="accept_file.dat"
+	  label="Matching reads on ${on_string} (${input_reads.datatype.file_ext})">
+<filter>outputs_selected and 'accept' in outputs_selected</filter>
+</data>
+<data format="input" format_source="input_reads" name="output_other" from_work_dir="other_file.dat"
+	  label="Reads not found on ${on_string} (${input_reads.datatype.file_ext})">
+<filter>outputs_selected and 'other' in outputs_selected</filter>
+</data>
+<data format="txt" name="output_log" label="${tool.name} statistics (txt)" from_work_dir="log_file.log">
+<filter>log</filter>
+</data>
+</outputs>
+<stdio>
+<regex match="This program builds a Burst trie on an input rRNA database"
+	   source="both"
+	   level="fatal"
+	   description="Buildtrie program failed to execute." />
+<regex match="The database name"
+	   source="both"
+	   level="fatal"
+	   description="The database ${databases} has not been preprocessed using buildtrie before using SortMeRNA." />
+</stdio>
+<tests>
+<test>
+<param name="read_family_selector" value="I" />
+<param name="input_reads" value="sortmerna_wrapper_in1.fastq" />
+<param name="sequencing_type_selector" value ="not_paired" />
+<param name="strand_search" value="" />
+<param name="databases_selector" value="cached" />
+<param name="input_databases" value="rfam-5.8s,rfam-5s" />
+<param name="outputs_selected" value="accept,other" />
+<param name="log" value="" />
+<param name="options_type_selector" value="less" />
+<output name="output_accept" file="sortmerna_wrapper_accept1.fastq" />
+<output name="output_other" file="sortmerna_wrapper_other1.fastq" />
+</test>
+</tests>
+<help>
+**Overview**
+SortMeRNA_ is a software designed to rapidly filter ribosomal RNA fragments
+from metatransriptomic data produced by next-generation sequencers.
+It is capable of handling large RNA databases and sorting out all fragments
+matching to the database with high accuracy and specificity.
+.. _SortMeRNA: http://bioinfo.lifl.fr/RNA/sortmerna/
+If you use this tool, please cite Kopylova E., Noé L. and Touzet H.,
+`"SortMeRNA: Fast and accurate filtering of ribosomal RNAs in metatranscriptomic data"`__,
+Bioinformatics (2012), doi: 10.1093/bioinformatics/bts611.
+.. __: http://bioinformatics.oxfordjournals.org/content/28/24/3211
+------
+**Input**
+The input is one file of reads in FASTA or FASTQ format and any number of rRNA databases to search against.
+If the user has two foward-reverse paired-sequencing reads files, they may use
+the script "merge_paired_reads.sh" to interleave the reads into one file, preserving their order.
+If the sequencing type for the reads is paired-ended, the user has two options under
+"Sequencing type" to filter the reads and preserve their order in the file.
+For a further example of each option, please refer to Section 4.2.3 in the `SortMeRNA User Manual`_.
+.. _sortmerna user manual: http://bioinfo.lifl.fr/RNA/sortmerna/code/SortMeRNA-user-manual-v1.7.pdf
+------
+**Output**
+The output will follow the same format (FASTA or FASTQ) as the reads.
+In the standalone version of SortMeRNA, the user may output the matching reads in a separate file per database (--bydbs option). This option will be made available in a future version of Galaxy.
+------
+**rRNA databases**
+SortMeRNA is distributed with 8 representative rRNA databases, which were
+all constructed from the SILVA SSU,LSU (version 111) and the RFAM 5/5.8S
+(version 11.0) databases using the tool UCLUST.
++--------------------------+------+--------------+-------+------------------------+--------+--------------------+
+| Representative database  | id % | avergage id% | # seq | Origin                 |  # seq | filtered to remove |
++==========================+======+==============+=======+========================+========+====================+
+| SILVA 16S bacteria       |   85 |         91.6 |  8174 | SILVA SSU Ref NR v.111 | 244077 | 23s                |
++--------------------------+------+--------------+-------+------------------------+--------+--------------------+
+| SILVA 16S archaea        |   95 |         96.7 |  3845 | SILVA SSU Ref NR v.111 |  10919 | 23s                |
++--------------------------+------+--------------+-------+------------------------+--------+--------------------+
+| SILVA 18S eukarya        |   95 |         96.7 |  4512 | SILVA SSU Ref NR v.111 |  31862 | 26s,28s,23s        |
++--------------------------+------+--------------+-------+------------------------+--------+--------------------+
+|                                                                                                               |
++--------------------------+------+--------------+-------+------------------------+--------+--------------------+
+| SILVA 23S bacteria       |   98 |         99.4 |  3055 | SILVA LSU Ref v.111    |  19580 | 16s,26s,28s        |
++--------------------------+------+--------------+-------+------------------------+--------+--------------------+
+| SILVA 23s archaea        |   98 |         99.5 |   164 | SILVA LSU Ref v.111    |    405 | 16s,26s,28s        |
++--------------------------+------+--------------+-------+------------------------+--------+--------------------+
+| SILVA 28S eukarya        |   98 |         99.1 |  4578 | SILVA LSU Ref v.111    |   9321 | 18s                |
++--------------------------+------+--------------+-------+------------------------+--------+--------------------+
+|                                                                                                               |
++--------------------------+------+--------------+-------+------------------------+--------+--------------------+
+| Rfam 5S archaea/bacteria |   98 |         99.2 | 59513 | RFAM                   | 116760 |                    |
++--------------------------+------+--------------+-------+------------------------+--------+--------------------+
+| Rfam 5.8S eukarya        |   98 |         98.9 | 13034 | RFAM                   | 225185 |                    |
++--------------------------+------+--------------+-------+------------------------+--------+--------------------+
+id % :
+members of the cluster must have identity at least 'id %' identity with the representative sequence
+average id % :
+average identity of a cluster member to the representative sequence
+The user may also choose to  use their own rRNA databases.
+.. class:: warningmark
+Note that your personal databases are indexed each time, and that
+this may take some time depending on the size of the given database.
+------
+**SortMeRNA parameter list**
+The standalone, command-line version of SortMeRNA uses the following parameters.
+For indexing (buildtrie):
+This program builds a Burst trie on an input rRNA database file in fasta format
+and stores the material in binary files under the folder '/automata'::
+./buildtrie --db [path to rrnas database file name {.fasta}]  {OPTIONS}
+The list of OPTIONS can be left blank, the default values will be used::
+-L  length of the sliding window (the seed)
+(default: 18)
+-F  search only the forward strand
+-R  search only the reverse-complementary strand
+(default: both strands are searched)
+-h  help
+For sorting (sortmerna):
+To run SortMeRNA, type in any order after 'sortmerna'::
+--I      [illumina reads file name {fasta/fastq}]
+--454    [roche 454 reads file name {fasta/fastq}]
+-n       number of databases to use (must precede --db)
+--db     [rrnas database name(s)]
+One database,
+ex 1. -n 1 --db /path1/database1.fasta
+Multiple databases,
+ex 2. -n 2 --db /path2/database2.fasta /path3/database3.fasta
+{OPTIONS}
+The list of OPTIONS can be left blank, the default values will be used::
+--accept      [accepted reads file name]
+--other       [rejected reads file name]
+(default: no output file is created)
+--bydbs       output the accepted reads by database
+(default: concatenated file of reads)
+--log         [overall statistics file name]
+(default: no statistics file created)
+--paired-in   put both paired-end reads into --accept file
+--paired-out  put both paired-end reads into --other file
+(default: if one read is accepted and the other is not,
+separate the reads into --accept and --other files)
+-r            ratio of the number of hits on the read / read length
+(default Illumina: 0.25, Roche 454: 0.15)
+-F            search only the forward strand
+-R            search only the reverse-complementary strand
+(default: both strands are searched)
+-a            number of threads to use
+(default: 1)
+-m            (m x 4096 bytes) for loading the reads into memory
+ex. '-m 4' means 4*4096 = 16384 bytes will be allocated for the reads
+note: maximum -m is 1020039
+(default: m = 262144 = 1GB)
+-v            verbose
+(default: deactivated)
+-h            help
+--version     version number
+------
+**Bibliography**
+[1] Quast C, Pruesse E, Yilmaz P, Gerken J, Schweer T, Yarza P, Peplies J, Glöckner FO (2013) The SILVA ribosomal RNA gene database project: improved data processing and web-based tools, Nucleic Acids Research, 41 (D1): D590-D596.
+[2] Rfam 11.0: 10 years of RNA families. S.W. Burge, J. Daub, R. Eberhardt, J. Tate, L. Barquist, E.P. Nawrocki, S.R. Eddy, P.P. Gardner, A. Bateman. Nucleic Acids Research (2012),  doi: 10.1093/nar/gks1005
+[3] Edgar, R.C. (2010) Search and clustering orders of magnitude faster than BLAST, Bioinformatics 26(19), 2460-2461, doi: 10.1093/bioinformatics/btq461
+[4] Loman, N. J. and Misra, Raju V and Dallman, Timothy J and Constantinidou, Chrystala and Gharbia, Saheer E and Wain, John and Pallen, Mark J., Performance comparison of benchtop high-throughput sequencing platforms (2012), Nature Biotechnology, 30 (5). pp. 434-439
+</help>
+</tool>

Mercurial > repos > bonsai > sortmerna

comparison sortmerna_wrapper.xml @ 0:2e7f0da431e3 draft default tip