Mercurial > repos > rnateam > graphclust_mlocarna
comparison locarna_best_subtree.xml @ 4:51261dff08a5 draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/LocARNAGraphClust commit f971832d2b34a182314e5201ea6895dd207c5923
author | rnateam |
---|---|
date | Mon, 13 Mar 2017 18:02:55 -0400 |
parents | 606440a3852d |
children | 859ee7d8f207 |
comparison
equal
deleted
inserted
replaced
3:606440a3852d | 4:51261dff08a5 |
---|---|
1 <tool id="locarna_best_subtree" name="locarna_graphclust" version="0.1.0" > | 1 <tool id="locarna_best_subtree" name="locarna_graphclust" version="0.1.0" > |
2 <requirements> | 2 <requirements> |
3 <requirement type="package" version="0.1.9">graphclust-wrappers</requirement> | 3 <requirement type="package" version="0.1.10">graphclust-wrappers</requirement> |
4 <requirement type="package" version='1.8.10'>locarna</requirement> | 4 <requirement type="package" version='1.8.10'>locarna</requirement> |
5 <requirement type="package" version='2.1'>rnaz</requirement> | 5 <requirement type="package" version='2.1'>rnaz</requirement> |
6 <requirement type="package" version='0.07'>perl-math-round</requirement> | 6 <requirement type="package" version='0.07'>perl-math-round</requirement> |
7 </requirements> | 7 </requirements> |
8 <stdio> | 8 <stdio> |
9 <exit_code range="1:" /> | 9 <exit_code range="1:" /> |
10 </stdio> | 10 </stdio> |
11 <command> | 11 <command> |
12 <![CDATA[ | 12 <![CDATA[ |
13 | 13 |
14 'locARNAGraphClust.pl' '$center_fa_file' '$tree_file' '$tree_matrix' '$data_map' $allow_overlap | 14 locARNAGraphClust.pl |
15 '$center_fa_file' | |
16 '$tree_file' | |
17 '$tree_matrix' | |
18 '$data_map' | |
19 $allow_overlap | |
20 $free_endgaps | |
21 | |
15 #if str($param_type.param_type_selector) == "gclust" | 22 #if str($param_type.param_type_selector) == "gclust" |
16 $param_type.p | 23 $param_type.p |
17 $param_type.max_diff_am | 24 $param_type.max_diff_am |
18 $param_type.max_diff | 25 $param_type.max_diff |
19 $param_type.tau | 26 $param_type.tau |
20 $param_type.plfold_minlen | 27 $param_type.plfold_minlen |
21 $param_type.struct_weight | 28 $param_type.struct_weight |
32 <param type="data" name="tree_file" label="trees" format="text" help="text format" /> | 39 <param type="data" name="tree_file" label="trees" format="text" help="text format" /> |
33 <param type="data" name="tree_matrix" label="tree_matrix" format="text" help="text format" /> | 40 <param type="data" name="tree_matrix" label="tree_matrix" format="text" help="text format" /> |
34 <param type="data" name="data_map" label="data_map" format="txt" help="text format" /> | 41 <param type="data" name="data_map" label="data_map" format="txt" help="text format" /> |
35 <param name="allow_overlap" type="boolean" truevalue="1" falsevalue="0" label="Allow overlap in subtrees" help="otherwise ignore subtree if it contains overlapping sequences"/> | 42 <param name="allow_overlap" type="boolean" truevalue="1" falsevalue="0" label="Allow overlap in subtrees" help="otherwise ignore subtree if it contains overlapping sequences"/> |
36 | 43 |
44 <param name="free_endgaps" type="select" label="Free endgaps" | |
45 help="Specify whether gaps at the ends (all, 5', or 3' ends) | |
46 of the sequences should be penalized or allowed for free."> | |
47 <option value="0">No free endgaps</option> | |
48 <option value="--free-endgaps">Free endgaps</option> | |
49 <option value="--free-endgaps-5">Free endgaps, only 5'</option> | |
50 <option value="--free-endgaps-3">Free endgaps, only 3'</option> | |
51 </param> | |
37 <conditional name="param_type"> | 52 <conditional name="param_type"> |
38 <param name="param_type_selector" type="select" label="Choose the type of parameters"> | 53 <param name="param_type_selector" type="select" label="Choose the type of parameters"> |
39 <option value="locarna">LocARNA defaults</option> | 54 <option value="locarna">LocARNA defaults</option> |
40 <option value="gclust" selected="True">GrapClust defaults(changeable)</option> | 55 <option value="gclust" selected="True">GrapClust defaults(changeable)</option> |
41 </param> | 56 </param> |
42 <when value="gclust"> | 57 <when value="gclust"> |
43 | 58 <param name="p" type="float" value="0.001" size="5" label="minimal probability" help="-p"/> |
44 <param name="p" type="float" value="0.001" size="5" label="minimal probability" help="-p"/> | 59 <param name="max_diff_am" type="integer" value="50" size="5" label=" maximal difference for sizes of matched arcs" help="--max-diff-am"/> |
45 <param name="max_diff_am" type="integer" value="50" size="5" label=" maximal difference for sizes of matched arcs" help="--max-diff-am"/> | 60 <param argument="tau" type="integer" value="50" min="0" max="200" label="Sequence contribution at structure match in percent"/> |
46 <param argument="tau" type="integer" value="50" min="0" max="200" label="Sequence contribution at structure match in percent"/> | 61 <param name="max_diff" type="integer" value="100" size="5" label="maximal difference for alignment traces" help="--max-diff"/> |
47 <param name="max_diff" type="integer" value="100" size="5" label="maximal difference for alignment traces" help="--max-diff"/> | 62 <param name="plfold_minlen" type="integer" value="210" size="5" label="Minimal length of a sequences for which RNAplfold is used" /> |
48 <param name="plfold_minlen" type="integer" value="210" size="5" label="Minimal length of a sequences for which RNAplfold is used" /> | 63 <param name="struct_weight" argument="struct-weight" |
49 | 64 label="Structure weight" type="integer" |
50 <param name="struct_weight" argument="struct-weight" | 65 value="180" min="0" max="800" /> |
51 label="Structure weight" type="integer" | 66 <param name="indel_opening" argument="indel-opening" |
52 value="180" min="0" max="800" /> | 67 label="Indel opening score" type="integer" |
53 <param name="indel_opening" argument="indel-opening" | 68 value="-400" max="0" min="-1500" /> |
54 label="Indel opening score" type="integer" | 69 <param argument="indel" label="Indel score" type="integer" |
55 value="-400" max="0" min="-1500" /> | 70 value="-200" min="-1000" max="0" /> |
56 <param argument="indel" label="Indel score" type="integer" | 71 <param name="alifold_consensus_dp" |
57 value="-200" min="-1000" max="0" /> | 72 type="boolean" checked="True" |
58 | 73 truevalue="--alifold-consensus-dp" falsevalue=" " |
59 <param name="alifold_consensus_dp" | 74 label="Compute consensus dot plot by alifold" /> |
60 type="boolean" checked="True" | 75 <param name="plfold_span" |
61 truevalue="--alifold-consensus-dp" falsevalue=" " | 76 type="integer" value="150" min="-1" max="400" |
62 label="Compute consensus dot plot by alifold" /> | 77 label="Maximum basepair span by RNAplfold (local folding); -1 for global folding" /> |
63 | 78 <param name="plfold_winsize" |
64 <param name="plfold_span" | |
65 type="integer" value="150" min="-1" max="400" | |
66 label="Maximum basepair span by RNAplfold (local folding); -1 for global folding" /> | |
67 | |
68 <param name="plfold_winsize" | |
69 type="integer" value="300" min="-1" max="800" | 79 type="integer" value="300" min="-1" max="800" |
70 label="Window size for local folding" /> | 80 label="Window size for local folding" /> |
71 | 81 </when> |
72 | 82 <when value="locarna" /> |
73 </when> | 83 </conditional> |
74 <when value="locarna"> | 84 </inputs> |
75 </when> | 85 <outputs> |
76 </conditional> | 86 <data name="model_tree_stk" format="stockholm" label="model.tree.stk" from_work_dir="MODEL/best_subtree.aln" /> |
77 | 87 </outputs> |
78 </inputs> | 88 <tests> |
79 <outputs> | 89 <test> |
80 <data name="model_tree_stk" format="stockholm" label="model.tree.stk" from_work_dir="MODEL/best_subtree.aln" /> | 90 <param name="tree_file" value="1.1.tree"/> |
81 </outputs> | 91 <param name="center_fa_file" value="1.1.center.fa"/> |
82 <tests> | 92 <param name="data_map" value="data.map"/> |
83 <test> | 93 <param name="tree_matrix" value="1.1.matrix.tree"/> |
84 <param name="tree_file" value="1.1.tree"/> | 94 <param name="allow_overlap" value="0"/> |
85 <param name="center_fa_file" value="1.1.center.fa"/> | 95 <param name="free_endgaps" value="0"/> |
86 <param name="data_map" value="data.map"/> | 96 <param name="param_type_selector" value="gclust"/> |
87 <param name="tree_matrix" value="1.1.matrix.tree"/> | 97 <param name="p" value="0.001"/> |
88 <param name="allow_overlap" value="0"/> | 98 <param name="max_diff_am" value="50"/> |
89 <conditional name="param_type"> | 99 <param name="tau" value="50"/> |
90 <param name="iteration_num_selector" value="gclust"/> | 100 <param name="max_diff" value="100"/> |
91 <param name="p" value="0.001"/> | 101 <param name="plfold_minlen" value="210"/> |
92 <param name="max_diff_am" value="50"/> | 102 <param name="struct_weight" value="180"/> |
93 <param name="tau" value="50"/> | 103 <param name="indel_opening" value="-400"/> |
94 <param name="max_diff" value="100"/> | 104 <param name="indel" value="-200"/> |
95 <param name="plfold_minlen" value="210"/> | 105 <param name="alifold_consensus_dp" value="--alifold-consensus-dp"/> |
96 <param name="struct_weight" value="180"/> | 106 <param name="plfold_span" value="150"/> |
97 <param name="indel_opening" value="-400"/> | 107 <param name="plfold_winsize" value="300"/> |
98 <param name="indel" value="-200"/> | 108 <output name="model_tree_stk" file="best_subtree.aln"/> |
99 <param name="alifold_consensus_dp" value="--alifold-consensus-dp"/> | 109 </test> |
100 <param name="plfold_span" value="150"/> | 110 </tests> |
101 <param name="plfold_winsize" value="300"/> | 111 <help> |
102 </conditional> | |
103 <output name="model_tree_stk" file="best_subtree.aln"/> | |
104 </test> | |
105 </tests> | |
106 <help> | |
107 <![CDATA[ | 112 <![CDATA[ |
108 **What it does** | 113 **What it does** |
109 | 114 |
110 MLocARNA computes a multiple sequence-structure alignment of RNA sequences. | 115 MLocARNA computes a multiple sequence-structure alignment of RNA sequences. |
111 It uses *treefile* - file with guide tree in NEWICK format. The given tree is used as guide tree for the progressive alignment. | 116 It uses *treefile* - file with guide tree in NEWICK format. The given tree is used as guide tree for the progressive alignment. |
112 This saves the calculation of pairwise all-vs-all similarities and construction of the guide tree. | 117 This saves the calculation of pairwise all-vs-all similarities and construction of the guide tree. |
113 | |
114 | 118 |
115 | 119 |
116 ]]> | 120 ]]> |
117 </help> | 121 </help> |
118 <citations> | 122 <citations> |
123 pages={255--262}, | 127 pages={255--262}, |
124 year={2010}, | 128 year={2010}, |
125 organization={Omnipress} | 129 organization={Omnipress} |
126 } | 130 } |
127 </citation> | 131 </citation> |
128 <citation type="bibtex">@Article{Will_Joshi_Hofacker-LocAR_Accur_bound-2012, | 132 <citation type="doi">10.1261/rna.029041.111</citation> |
129 author = {Will, Sebastian and Joshi, Tejal and Hofacker, Ivo L. and | 133 <citation type="doi">10.1371/journal.pcbi.0030065</citation> |
130 Stadler, Peter F. and Backofen, Rolf}, | 134 <citation type="doi">10.1093/nar/gkq316</citation> |
131 title = {{LocARNA}-{P}: {Accurate} boundary prediction and improved | |
132 detection of structural {RNAs}}, | |
133 journal = {RNA}, | |
134 year = {2012}, | |
135 volume = {18}, | |
136 number = {5}, | |
137 pages = {900-14}, | |
138 user = {will}, | |
139 pmid = {22450757}, | |
140 doi = {10.1261/rna.029041.111}, | |
141 issn = {1469-9001}, | |
142 issn = {1355-8382}, | |
143 abstract = {Current genomic screens for noncoding RNAs (ncRNAs) predict | |
144 a large number of genomic regions containing potential | |
145 structural ncRNAs. The analysis of these data requires | |
146 highly accurate prediction of ncRNA boundaries and | |
147 discrimination of promising candidate ncRNAs from weak | |
148 predictions. Existing methods struggle with these goals | |
149 because they rely on sequence-based multiple sequence | |
150 alignments, which regularly misalign RNA structure and | |
151 therefore do not support identification of structural | |
152 similarities. To overcome this limitation, we compute | |
153 columnwise and global reliabilities of alignments based on | |
154 sequence and structure similarity; we refer to these | |
155 structure-based alignment reliabilities as STARs. The | |
156 columnwise STARs of alignments, or STAR profiles, provide a | |
157 versatile tool for the manual and automatic analysis of | |
158 ncRNAs. In particular, we improve the boundary prediction of | |
159 the widely used ncRNA gene finder RNAz by a factor of 3 from | |
160 a median deviation of 47 to 13 nt. Post-processing RNAz | |
161 predictions, LocARNA-P's STAR score allows much stronger | |
162 discrimination between true- and false-positive predictions | |
163 than RNAz's own evaluation. The improved accuracy, in this | |
164 scenario increased from AUC 0.71 to AUC 0.87, significantly | |
165 reduces the cost of successive analysis steps. The | |
166 ready-to-use software tool LocARNA-P produces | |
167 structure-based multiple RNA alignments with associated | |
168 columnwise STARs and predicts ncRNA boundaries. We provide | |
169 additional results, a web server for LocARNA/LocARNA-P, and | |
170 the software package, including documentation and a pipeline | |
171 for refining screens for structural ncRNA, at | |
172 http://www.bioinf.uni-freiburg.de/Supplements/LocARNA-P/.} | |
173 } | |
174 </citation> | |
175 <citation type="bibtex">@Article{Will:etal:_infer_non_codin_rna_famil:PLOS2007, | |
176 author = {Sebastian Will and Kristin Reiche and Ivo L. Hofacker and | |
177 Peter F. Stadler and Rolf Backofen}, | |
178 title = {Inferring Non-Coding {RNA} Families and Classes by Means of | |
179 Genome-Scale Structure-Based Clustering}, | |
180 journal = {PLoS Comput Biol}, | |
181 year = 2007, | |
182 volume = {3}, | |
183 number = {4}, | |
184 pages = {e65}, | |
185 issn = {1553-7358}, | |
186 issn = {1553-734X}, | |
187 pmid = {17432929}, | |
188 doi = {10.1371/journal.pcbi.0030065}, | |
189 user = {will}, | |
190 abstract = {The RFAM database defines families of ncRNAs by means of | |
191 sequence similarities that are sufficientto establish | |
192 homology. In some cases, such as microRNAs, box H/ACA | |
193 snoRNAs, functional commonalities define classes of RNAs | |
194 that are characterized by structural similarities, and | |
195 typically consist ofmultiple RNA families. Recent advances | |
196 in high-throughput transcriptomics and comparative genomics | |
197 have produced very large sets of putative non-coding RNAs | |
198 and regulatory RNA signals. For many ofthem, evidence for | |
199 stabilizing selection acting on their secondary structures | |
200 has been derived, and at least approximate models of their | |
201 structures have been computed. The overwhelming majority of | |
202 these hypo-thetical RNAs cannot be assigned to established | |
203 families or classes. We present here a structure-based | |
204 clustering approach that is capable of extracting putative | |
205 RNA classesfrom genome-wide surveys for structured RNAs. The | |
206 LocARNA tool implements a novel variant of theSankoff | |
207 algorithm that is sufficiently fast to deal with several | |
208 thousand candidate sequences. The method is also robust | |
209 against false positive predictions, i.e., a contamination of | |
210 the input data with unstructured ornon-conserved | |
211 sequences. We have successfully tested the LocARNA-based | |
212 clustering approach on the sequences of the | |
213 RFAM-seedalignments. Furthermore, we have applied it to a | |
214 previously published set of 3332 predicted structured | |
215 elements in the Ciona intestinalis genomes (Missal et al., | |
216 Bioinformatics 21(S2), i77-i78). In addition torecovering | |
217 e.g. tRNAs as a structure-based class, the method identifies | |
218 several RNA families, including microRNA and snoRNA | |
219 candidates, and suggests several novel classes of ncRNAs for | |
220 which to-date norepresentative has been experimentally | |
221 characterized.} | |
222 } | |
223 | |
224 </citation> | |
225 <citation type="bibtex">@Article{Smith:Heyne:Richter:Freib_RNA_Tools:NAR2010, | |
226 author = {Smith, Cameron and Heyne, Steffen and Richter, Andreas S. | |
227 and Will, Sebastian and Backofen, Rolf}, | |
228 title = {Freiburg {RNA} {Tools}: a web server integrating {IntaRNA}, | |
229 {ExpaRNA} and {LocARNA}}, | |
230 journal = NAR, | |
231 year = {2010}, | |
232 volume = {38 Suppl}, | |
233 number = {}, | |
234 pages = {W373-7}, | |
235 user = {arichter}, | |
236 pmid = {20444875}, | |
237 doi = {10.1093/nar/gkq316}, | |
238 issn = {0305-1048}, | |
239 issn = {1362-4962}, | |
240 abstract = {The Freiburg RNA tools web server integrates three tools | |
241 for the advanced analysis of RNA in a common web-based user | |
242 interface. The tools IntaRNA, ExpaRNA and LocARNA support | |
243 the prediction of RNA-RNA interaction, exact RNA matching | |
244 and alignment of RNA, respectively. The Freiburg RNA tools | |
245 web server and the software packages of the stand-alone | |
246 tools are freely accessible at | |
247 http://rna.informatik.uni-freiburg.de.} | |
248 } | |
249 </citation> | |
250 </citations> | 135 </citations> |
251 </tool> | 136 </tool> |