Mercurial > repos > rnateam > sortmerna
comparison sortmerna.xml @ 0:a8ac09e937f3 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/sortmerna commit 04cfb5475292e4fd1f7c0ca86d8d0d5e5f886c3d-dirty
author | rnateam |
---|---|
date | Mon, 03 Aug 2015 08:18:26 -0400 |
parents | |
children | b482293b2987 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a8ac09e937f3 |
---|---|
1 <tool id="bg_sortmerna" name="Filter with SortMeRNA" version="1.9.0"> | |
2 <description>Fast and accurate filtering of ribosomal RNAs in metatranscriptomic data</description> | |
3 <requirements> | |
4 <requirement type='package' version="1.9">sortmerna</requirement> | |
5 </requirements> | |
6 <stdio> | |
7 <regex match="This program builds a Burst trie on an input rRNA database" | |
8 source="both" | |
9 level="fatal" | |
10 description="Buildtrie program failed to execute." /> | |
11 <regex match="The database name" | |
12 source="both" | |
13 level="fatal" | |
14 description="The database ${databases} has not been preprocessed using buildtrie before using SortMeRNA." /> | |
15 </stdio> | |
16 <version_command> | |
17 <![CDATA[ | |
18 sortmerna --version 2>&1|grep 'SortMeRNA version' | |
19 ]]> | |
20 </version_command> | |
21 <command interpreter="python"> | |
22 <![CDATA[ | |
23 sortmerna.py | |
24 --sortmerna " | |
25 $strand_search | |
26 #if str( $read_family.read_family_selector ) == 'other': | |
27 --I $input_reads -r $read_family.ratio_parameter | |
28 #else: | |
29 $read_family.read_family_selector $input_reads | |
30 #end if | |
31 | |
32 #if str( $sequencing_type.sequencing_type_selector ) == 'paired': | |
33 $sequencing_type.paired_type | |
34 #end if | |
35 | |
36 #if $outputs_selected: | |
37 #if 'accept' in $outputs_selected.value: | |
38 --accept accept_file | |
39 #end if | |
40 #if 'other' in $outputs_selected.value: | |
41 --other other_file | |
42 #end if | |
43 #end if | |
44 | |
45 $log | |
46 -a \${GALAXY_SLOTS:-4} | |
47 " | |
48 #if str( $databases_type.databases_selector ) == 'history': | |
49 --buildtrie | |
50 #for $db in $databases_type.input_databases | |
51 $db.database_name | |
52 #end for | |
53 #else: | |
54 ## databases path is not directly accessible, must match by hand with LOC file contents | |
55 ${' '.join([dict([(x[0], x[2]) for x in $databases_type.input_databases.input.options.tool_data_table.data])[y] | |
56 for y in $databases_type.input_databases.value])} | |
57 #end if | |
58 ]]> | |
59 </command> | |
60 <inputs> | |
61 <conditional name="read_family"> | |
62 <param name="read_family_selector" type="select" format="text" label="Sequencing technology of querying sequences (reads)" | |
63 help="The Illumina platform is more common for large scale metatranscriptomic projects requiring a high throughput."> | |
64 <option value="--I">Illumina Solexa</option> | |
65 <option value="--454">454 Roche</option> | |
66 <option value="other">Other</option> | |
67 </param> | |
68 <when value="other"> | |
69 <param name="ratio_parameter" type="float" value="1" min="0" max="1" | |
70 label="Ratio parameter (the number of hits on the read / read length)" | |
71 help="The ratio parameter for SortMeRNA has been set to r=0.25 for Illumina Solexa reads and to r=0.15 for 454 Roche reads. | |
72 For other read types, if the sequencing technology produces high quality reads with a low substitution error rate | |
73 (0.1 substitutions per 100 bases, such as Illumina), then the ratio parameter can be set to r=[0.23,0.27]. | |
74 If the sequencing technology has a high indel error rate (1-2 indels per 100 bases, such as 454 or Ion Torrent), | |
75 then the ratio parameter can be set to r=[0.13,0.17] (-r)."/> | |
76 </when> | |
77 </conditional> | |
78 <param format="fasta,fastq" name="input_reads" type="data" label="Querying sequences (reads)" help=""/> | |
79 <conditional name="sequencing_type"> | |
80 <param name="sequencing_type_selector" type="select" label="Sequencing type"> | |
81 <option value="not_paired">Reads are not paired</option> | |
82 <option value="paired">Reads are paired</option> | |
83 </param> | |
84 <when value="paired"> | |
85 <param name="paired_type" type="select" display="radio" label="If one read of a pair is accepted and the other not, output both reads" | |
86 help="SortMeRNA does not use the pairing information for filtering RNA, | |
87 however if one read of a pair is accepted and the other is not, | |
88 the resulting output may break apart the pair into two separate files. | |
89 The purpose of 'Reads are paired' option is to preserve the pairing of the reads."> | |
90 <option value="--paired-in">to accepted file (--paired-in)</option> | |
91 <option value="--paired-out">to rejected file (--paired-out)</option> | |
92 </param> | |
93 </when> | |
94 </conditional> | |
95 | |
96 <param name="strand_search" type="select" label="Which strands to search" display="radio"> | |
97 <option value="">Search both strands</option> | |
98 <option value="-F">Search only the forward strand (-F)</option> | |
99 <option value="-R">Search only the reverse-complementary strand (-R)</option> | |
100 </param> | |
101 | |
102 <conditional name="databases_type"> | |
103 <param name="databases_selector" type="select" label="Databases to query" | |
104 help="Public rRNA databases provided with SortMeRNA have been indexed. | |
105 On the contrary, personal databases must be indexed each time SortMeRNA is launched. | |
106 Please be patient, this may take some time depending on the size of the given database."> | |
107 <option value="cached" selected="true">Public ribosomal databases</option> | |
108 <option value="history">Databases from your history</option> | |
109 </param> | |
110 <when value="cached"> | |
111 <param name="input_databases" label="rRNA database" type="select" display="checkboxes" multiple="true"> | |
112 <options from_data_table="rRNA_databases" /> | |
113 <validator type="no_options" message="Select at least one database"/> | |
114 </param> | |
115 </when> | |
116 <when value="history"> | |
117 <repeat name="input_databases" title="Database" min="1"> | |
118 <param name="database_name" type="data" format="fasta" label="rRNA database" | |
119 help="Your database will be indexed first, which may take up to several minutes."/> | |
120 </repeat> | |
121 </when> | |
122 </conditional> | |
123 | |
124 <!-- Outputs --> | |
125 <param name="outputs_selected" type="select" display="checkboxes" multiple="true" label="Output options"> | |
126 <option value="accept" selected="True">Reads matching to at least one database</option> | |
127 <option value="other">Reads not found in any database</option> | |
128 </param> | |
129 <param name="log" type="boolean" checked="False" truevalue="--log log_file" falsevalue="" label="Statistics file" | |
130 help="Generates statistics for the rRNA content of reads, as well as rRNA subunit distribution. (--log)"> | |
131 </param> | |
132 | |
133 </inputs> | |
134 <outputs> | |
135 <data format_source="input_reads" name="output_accept" from_work_dir="accept_file.dat" | |
136 label="Matching reads on ${on_string} (${input_reads.datatype.file_ext})"> | |
137 <filter>outputs_selected and 'accept' in outputs_selected</filter> | |
138 </data> | |
139 <data format_source="input_reads" name="output_other" from_work_dir="other_file.dat" | |
140 label="Reads not found on ${on_string} (${input_reads.datatype.file_ext})"> | |
141 <filter>outputs_selected and 'other' in outputs_selected</filter> | |
142 </data> | |
143 <data format="txt" name="output_log" label="${tool.name} statistics (txt)" from_work_dir="log_file.log"> | |
144 <filter>log</filter> | |
145 </data> | |
146 </outputs> | |
147 <tests> | |
148 <test> | |
149 <param name="read_family_selector" value="I" /> | |
150 <param name="input_reads" value="sortmerna_wrapper_in1.fastq" /> | |
151 <param name="sequencing_type_selector" value="not_paired" /> | |
152 <param name="strand_search" value="" /> | |
153 <param name="databases_selector" value="cached" /> | |
154 <param name="input_databases" value="rfam-5.8s,rfam-5s" /> | |
155 <param name="outputs_selected" value="accept,other" /> | |
156 <param name="log" value="" /> | |
157 <param name="options_type_selector" value="less" /> | |
158 <output name="output_accept" file="sortmerna_wrapper_accept1.fastq" /> | |
159 <output name="output_other" file="sortmerna_wrapper_other1.fastq" /> | |
160 </test> | |
161 </tests> | |
162 <help> | |
163 <![CDATA[ | |
164 **What it does** | |
165 | |
166 SortMeRNA_ is a software designed to rapidly filter ribosomal RNA fragments | |
167 from metatransriptomic data produced by next-generation sequencers. | |
168 It is capable of handling large RNA databases and sorting out all fragments | |
169 matching to the database with high accuracy and specificity. | |
170 | |
171 .. _SortMeRNA: http://bioinfo.lifl.fr/RNA/sortmerna/ | |
172 | |
173 | |
174 **Input** | |
175 | |
176 The input is one file of reads in FASTA or FASTQ format and any number of rRNA databases to search against. | |
177 If the user has two foward-reverse paired-sequencing reads files, they may use | |
178 the script "merge_paired_reads.sh" to interleave the reads into one file, preserving their order. | |
179 | |
180 If the sequencing type for the reads is paired-ended, the user has two options under | |
181 "Sequencing type" to filter the reads and preserve their order in the file. | |
182 For a further example of each option, please refer to Section 4.2.3 in the `SortMeRNA User Manual`_. | |
183 | |
184 .. _sortmerna user manual: http://bioinfo.lifl.fr/RNA/sortmerna/code/SortMeRNA-user-manual-v1.7.pdf | |
185 | |
186 | |
187 **Output** | |
188 | |
189 The output will follow the same format (FASTA or FASTQ) as the reads. Optionally, a statistic file for the rRNA content of reads, as well as rRNA subunit distribution can be generated. | |
190 | |
191 | |
192 **rRNA databases** | |
193 | |
194 SortMeRNA is distributed with 8 representative rRNA databases, which were | |
195 all constructed from the SILVA SSU,LSU (version 111) and the RFAM 5/5.8S | |
196 (version 11.0) databases using the tool UCLUST. | |
197 | |
198 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ | |
199 | Representative database | id % | average id% | # seq (clustered) | Origin | # seq (original) | | |
200 +==========================+======+=============+===================+========================+===================+ | |
201 | SILVA 16S bacteria | 85 | 91.6 | 8174 | SILVA SSU Ref NR v.111 | 244077 | | |
202 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ | |
203 | SILVA 16S archaea | 95 | 96.7 | 3845 | SILVA SSU Ref NR v.111 | 10919 | | |
204 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ | |
205 | SILVA 18S eukarya | 95 | 96.7 | 4512 | SILVA SSU Ref NR v.111 | 31862 | | |
206 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ | |
207 | SILVA 23S bacteria | 98 | 99.4 | 3055 | SILVA LSU Ref v.111 | 19580 | | |
208 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ | |
209 | SILVA 23s archaea | 98 | 99.5 | 164 | SILVA LSU Ref v.111 | 405 | | |
210 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ | |
211 | SILVA 28S eukarya | 98 | 99.1 | 4578 | SILVA LSU Ref v.111 | 9321 | | |
212 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ | |
213 | Rfam 5S archaea/bacteria | 98 | 99.2 | 59513 | RFAM | 116760 | | |
214 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ | |
215 | Rfam 5.8S eukarya | 98 | 98.9 | 13034 | RFAM | 225185 | | |
216 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ | |
217 | |
218 id %: members of the cluster must have identity at least 'id %' identity with the representative sequence | |
219 | |
220 average id %: average identity of a cluster member to the representative sequence | |
221 | |
222 The user may also choose to use their own rRNA databases. | |
223 | |
224 .. class:: warningmark | |
225 | |
226 Note that your personal databases are indexed each time, and that | |
227 this may take some time depending on the size of the given database. | |
228 ]]> | |
229 </help> | |
230 | |
231 <citations> | |
232 <citation type="doi">10.1093/bioinformatics/bts611</citation> | |
233 <citation type="doi">10.1093/nar/gks1219</citation> | |
234 <citation type="doi">10.1093/nar/gks1005</citation> | |
235 <citation type="doi">10.1093/bioinformatics/btq461</citation> | |
236 <citation type="doi">10.1038/nbt.2198</citation> | |
237 </citations> | |
238 </tool> |