|
8
|
1 <tool id="glassgo" name="GLASSgo" version="@VERSION@">
|
|
|
2 <description>sRNA homolog finder</description>
|
|
|
3 <macros>
|
|
|
4 <import>glassgo_macros.xml</import>
|
|
|
5 </macros>
|
|
|
6 <expand macro="requirements"/>
|
|
|
7 <command detect_errors="aggressive">
|
|
|
8 <![CDATA[
|
|
|
9 GLASSgo.py
|
|
|
10 -t \${GALAXY_SLOTS:-Z}
|
|
|
11 -i ${input_data.input}
|
|
|
12 -d ${input_data.database}
|
|
|
13 #if str($search.cond_param_setup.param_setup) == "manual":
|
|
|
14 -e ${search.cond_param_setup.evalue}
|
|
|
15 -p ${search.cond_param_setup.identity}
|
|
24
|
16 #if str($search.cond_param_setup.cond_clust_setup.clust_setup) == "off":
|
|
|
17 -l 0
|
|
|
18 #else:
|
|
|
19 -l 2
|
|
|
20 #if str($search.cond_param_setup.cond_clust_setup.clustering.conditional_filtering_setup.filtering_setup) == "manual":
|
|
|
21 -a ${search.cond_param_setup.cond_clust_setup.clustering.conditional_filtering_setup.filtering}
|
|
|
22 #else:
|
|
|
23 -a -1
|
|
|
24 #end if
|
|
|
25 #end if
|
|
8
|
26 #end if
|
|
|
27 -u ${additional_setting.upstream_region}
|
|
13
|
28 #if str($search.acclist) != "global":
|
|
|
29 -g ${search.acclist}
|
|
|
30 #end if
|
|
8
|
31 -n 500
|
|
|
32 -o $output
|
|
|
33 ]]>
|
|
|
34 </command>
|
|
|
35 <inputs>
|
|
|
36 <section name="input_data" title="Input" expanded="true">
|
|
|
37 <param name="input" type="data" format="fasta" label="Query sRNA in FASTA" />
|
|
|
38 <param name="database" type="select" display="radio" label="NCBI-nt database">
|
|
|
39 <options from_file="blastdb.loc">
|
|
|
40 <column name="name" index="1"/>
|
|
|
41 <column name="value" index="2"/>
|
|
|
42 </options>
|
|
|
43 </param>
|
|
|
44 </section>
|
|
|
45 <section name="search" title="Search Parameters" expanded="true">
|
|
13
|
46 <param name="acclist" type="select" label="Choose taxon">
|
|
|
47 <options from_file="glassgo_accession_list.txt">
|
|
|
48 <column name="name" index="0"/>
|
|
|
49 <column name="value" index="1"/>
|
|
|
50 </options>
|
|
|
51 </param>
|
|
8
|
52 <conditional name="cond_param_setup">
|
|
|
53 <param name="param_setup" type="select" display="radio" label="Parameter Setup">
|
|
|
54 <option value="automatic">automatic</option>
|
|
|
55 <option value="manual">manual</option>
|
|
|
56 </param>
|
|
|
57 <when value="manual">
|
|
|
58 <param name="evalue" type="float" value="1.0" label="Maximum allowed E-Value" />
|
|
|
59 <param name="identity" type="integer" value="52" label="Minimum allowed identity [%]" />
|
|
|
60 <conditional name="cond_clust_setup">
|
|
|
61 <param name="clust_setup" type="select" display="radio" label="Structure-based clustering">
|
|
|
62 <option value="off">off</option>
|
|
24
|
63 <option value="on" selected="true">on</option>
|
|
8
|
64 </param>
|
|
|
65 <when value="on">
|
|
|
66 <section name="clustering" title="Structure-based Clustering" expanded="true">
|
|
|
67 <conditional name="conditional_filtering_setup">
|
|
|
68 <param name="filtering_setup" type="select" display="radio" label="Structure-based filtering">
|
|
|
69 <option value="automatic">automatic</option>
|
|
|
70 <option value="manual">manual</option>
|
|
|
71 </param>
|
|
|
72 <when value="manual">
|
|
|
73 <param name="filtering" type="integer" value="2" label="manual value for filtering" />
|
|
|
74 </when>
|
|
|
75 </conditional>
|
|
|
76 </section>
|
|
|
77 </when>
|
|
|
78 </conditional>
|
|
|
79 </when>
|
|
|
80 </conditional>
|
|
|
81 </section>
|
|
|
82 <section name="additional_setting" title="Additional Settings" expanded="true">
|
|
|
83 <param name="upstream_region" type="integer" value="0" label="Include upstream region">
|
|
|
84 <validator type="in_range" min="0" message="Value must be positive" />
|
|
|
85 </param>
|
|
|
86 </section>
|
|
|
87 </inputs>
|
|
|
88 <outputs>
|
|
|
89 <data name="output" format_source="input" />
|
|
|
90 </outputs>
|
|
|
91 <tests>
|
|
|
92 <test>
|
|
|
93 <param name="input" value="NsiR4_Synechocystis_sp_PCC6803.fa" />
|
|
|
94 <param name="taxon_setup" value="global" />
|
|
|
95 <param name="param_setup" value="automatic" />
|
|
|
96 <param name="upstream_region" value="0" />
|
|
|
97 <output name="output" file="glassgo_NsiR4_Synechocystis_sp_PCC6803.fa" />
|
|
|
98 </test>
|
|
|
99 </tests>
|
|
|
100 <help>
|
|
|
101 <![CDATA[
|
|
|
102 **Introduction**
|
|
|
103
|
|
|
104 GLASSgo (GLobal Automated sRNA Search go) combines iterative BLAST searches, pairwise identity filtering, and structure based clustering in an automated prediction pipeline to find sRNA homologs from scratch. The web server provides predefined parameter sets for a non-expert usage as well as enables a manual setup of the query parameters. The returned GLASSgo result is in FASTA format, whereby the first entry represents the input sequence.
|
|
|
105
|
|
|
106
|
|
|
107 **Input**
|
|
|
108
|
|
|
109 - **Query sRNA in FASTA**
|
|
|
110 The (single) sRNA sequence has to be provided in FASTA format. Input can be given either as direct text input or by uploading a file. A sequence in FASTA format begins with a single-line sequence
|
|
|
111 identifier that starts with a greater-than (">") symbol, followed by lines of sequence data. For readability, it is recommended that each line is at most 80 characters in length.
|
|
|
112
|
|
|
113 .. class:: warningmark
|
|
|
114
|
|
|
115 The parameter constraints are: The input has to be in valid FASTA format. The number of sequences has to be at least 1 and at most 1. Sequence lengths have to be in the range 20-800. The allowed sequence alphabet is 'ACGUTacgut'.
|
|
|
116
|
|
|
117 - **NCBI Nucloetide database**
|
|
|
118 The (latest) NCBI Nucleotide database.
|
|
|
119
|
|
|
120 **Search Parameters**
|
|
|
121
|
|
|
122 - **Taxon selection**
|
|
|
123 The GLASSgo search is by default based on the complete NCBI Nucleotide database. In general, sRNAs show a limited distribution among the phylogenetic tree, such that a targeted search in a specfic taxonomic group is likely to perform better. For that, we provide accession lists for the taxonomic groups the search should
|
|
9
|
124 be limited to.
|
|
8
|
125
|
|
13
|
126
|
|
|
127
|
|
8
|
128 - **Parameter Setup**
|
|
|
129 You can run GLASSgo either in automated mode or you can manually set the advanced parameters.
|
|
|
130
|
|
|
131 - **Maximum allowed E-value**
|
|
|
132 The E-value mainly influences the sensitivity of GLASSgo. A relaxed E-value (>1.0) increases the chance to get more sequences, but also increases computation time.
|
|
|
133
|
|
|
134 .. class:: warningmark
|
|
|
135
|
|
|
136 The parameter constraints are: Input value has to be parsable as a Double. The value must be smaller than or equal to 50.
|
|
|
137
|
|
|
138 - **Minimum allowed identity[%]**
|
|
|
139 Each sRNA candidate is compared to the query sRNA on sequence level and should have a percent identity larger than the value of this parameter to be kept for further
|
|
|
140 analysis. Please note, that values lower than 65% increase the total number of hits, but also slightly increase the probability for false positives.
|
|
|
141
|
|
|
142 .. class:: warningmark
|
|
|
143
|
|
|
144 The parameter constraints are: Input value has to be parsable as a Double. The value must be greater than or equal to 10 and must be smaller than or equal to 75.
|
|
|
145
|
|
|
146 - **Structure-based clustering**
|
|
|
147 Defines whether or not structural clustering (via Londen) is to be applied.
|
|
|
148
|
|
|
149 .. class:: warningmark
|
|
|
150
|
|
|
151
|
|
|
152 **Structure-based clustering**
|
|
|
153
|
|
|
154 - **Structure-based filtering**
|
|
|
155 Structure-based filtering can either be done automatically or you can set manually an according structure-based filtering value (see according parameter).
|
|
|
156
|
|
|
157 - **Manual value for filtering**
|
|
|
158 The structure-based filtering represents the third filtering step of GLASSgo and is applied to the candidate hits with medium percent identity (80% > %ID > min_identity).
|
|
|
159 Lowering the parameter value will result in a more strict analysis (less false positives) and vice versa.
|
|
|
160
|
|
|
161 .. class:: warningmark
|
|
|
162
|
|
|
163 The parameter constraints are: Input value has to be parsable as a Double. The value must be greater than or equal to 0 and must be smaller than or equal to 3.
|
|
|
164
|
|
|
165 **Additional Settings**
|
|
|
166
|
|
|
167 - **Include upstream region**
|
|
|
168 Setting the parameter 'Upstream Region' to 100 extracts 100 nucleotides upstream for each predicted GLASSgo hit. This additionally sequence information is directly
|
|
|
169 concatenated with the corresponding GLASSgo hit and therefore an integral part of the returned GLASSgo results. Note: The upstream region is not considered while the
|
|
|
170 similarity value [%] is computed! In addition, the FASTA header (e.g. start position) for each GLASSgo hit will be updated (if upstream region is activated), whereas
|
|
|
171 the upstream region is additionally mentioned like -UTR-REGION-100nt:1002422-1002521-. You can find further information about the GLASSgo results in the output help
|
|
|
172 section. (0 == no consideration).
|
|
|
173
|
|
|
174
|
|
|
175 **Output Description**
|
|
|
176
|
|
|
177 The output of GLASSgo is a file in multi-FASTA format where the input sequence (query) is followed by the identified homologs. If no homologs could be found, only the input sequence is shown. In the following the output format will be discussed using two examples. Both examples show a partial result of GLASSgo applying EcpR1. In the first example,the upstream region was turned off while the value for the upstream region was set to 100 nt for the second. For this very reason, the headers as well as the sequence sizes are unequal.
|
|
|
178
|
|
|
179 The following header shows the Accession number of the respective genome followed by the genomic coordinates of the proposed sRNA homolog (no upstream region included).
|
|
|
180
|
|
|
181 >CP013051.1:1422247-1422417 Sinorhizobium americanum CCGM7, complete genome-p.c.VAL:80.75%-taxID:1408224
|
|
|
182 AAAGGAAGTGAGACTTCCACGATCGATCGGTTACCCCATGATGCTCAGGTCCGCCGCATCTCCTGGGTCGTGGGGTCGGTCGGCTGGCTTCCGACATCCGCGGATTCCTCGTGCCGCAGTCGGAGCCAGCCGACCCCCTTTCAAAACGCCGCTTCAAAAGAGGCGGCGTTT
|
|
|
183
|
|
|
184 In contrast, the next header shows the genomic coordinates of the combined upstream region (100nt) and the proposed sRNA. The exact coordinates of the upstream region are given later in the header (UPSTREAM-REGION-100nt:1422147-1422246).
|
|
|
185
|
|
|
186 >CP013051.1:1422147-1422417 Sinorhizobium americanum CCGM7, complete genome-UPSTREAM-REGION-100nt:1422147-1422246-p.c.VAL:80.75%-taxID:1408224
|
|
|
187 ATTTGTCCGAATACGAGACAGAATTAACCAAACGCCGAGCAACCCGCTTCGGCGATTAAGAATTCGTTGATTTTTTTTTATTTTCAAGCAATGCTGATATAAAGGAAGTGAGACTTCCACGATCGATCGGTTACCCCATGATGCTCAGGTCCGCCGCATCTCCTGGGTCGTGGGGTCGGTCGGCTGGCTTCCGACATCCGCGGATTCCTCGTGCCGCAGTCGGAGCCAGCCGACCCCCTTTCAAAACGCCGCTTCAAAAGAGGCGGCGTTT
|
|
|
188
|
|
|
189 Both examples contain the name of the genome entry and a pairwise similarity value of p.c.Val:80.75% (query vs. GLASSgo hit ) as well as their corresponding taxonomic number taxID:1408224.
|
|
|
190
|
|
|
191 ]]>
|
|
|
192 </help>
|
|
|
193 <citations>
|
|
|
194 <citation type="bibtex">
|
|
|
195 @article{10.3389/fgene.2018.00124,
|
|
|
196 author={Lott, Steffen C. and Schäfer, Richard A. and Mann, Martin and Backofen, Rolf and Hess, Wolfgang R. and Voß, Björn and Georg, Jens},
|
|
|
197 title={GLASSgo – Automated and Reliable Detection of sRNA Homologs From a Single Input Sequence},
|
|
|
198 journal={Frontiers in Genetics},
|
|
|
199 volume={9},
|
|
|
200 pages={124},
|
|
|
201 year={2018},
|
|
|
202 url={https://www.frontiersin.org/article/10.3389/fgene.2018.00124},
|
|
|
203 doi={10.3389/fgene.2018.00124}
|
|
|
204 }
|
|
|
205 </citation>
|
|
|
206 <citation type="bibtex">
|
|
|
207 @article{Raden-2018-websrv,
|
|
|
208 author = {Raden, Martin and Ali, Syed M and Alkhnbashi, Omer S and Busch, Anke and Costa, Fabrizio and Davis, Jason A and Eggenhofer, Florian and Gelhausen,
|
|
|
209 Rick and Georg, Jens and Heyne, Steffen and Hiller, Michael and Kundu, Kousik and Kleinkauf, Robert and Lott, Steffen C and Mohamed, Mostafa M and Mattheis,
|
|
|
210 Alexander and Miladi, Milad and Richter, Andreas S and Will, Sebastian and Wolff, Joachim and Wright, Patrick R and Backofen, Rolf},
|
|
|
211 title = {{Freiburg} {RNA} {tools}: a central online resource for {RNA}-focused research and teaching},
|
|
|
212 journal = {Nucleic Acids Research},
|
|
|
213 volume = {46},
|
|
|
214 number = {W1},
|
|
|
215 pages = {W25-W29},
|
|
|
216 year = {2018},
|
|
|
217 doi = {10.1093/nar/gky329}
|
|
|
218 }
|
|
|
219 </citation>
|
|
|
220 </citations>
|
|
|
221
|
|
|
222
|
|
|
223 </tool>
|