Mercurial > repos > rnateam > graphclust_mlocarna
comparison locarna_best_subtree.xml @ 0:15bd4fb05e5c draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/LocARNAGraphClust commit 21aaee40723b5341b4236edeb0e72995c2054053
author | rnateam |
---|---|
date | Fri, 16 Dec 2016 07:35:29 -0500 |
parents | |
children | c6c4a7adb099 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:15bd4fb05e5c |
---|---|
1 <tool id="locarna_best_subtree" name="locarna_best_subtree" version="0.1.0" > | |
2 <requirements> | |
3 <requirement type="package" version="0.1">graphclust-wrappers</requirement> | |
4 <requirement type="package" version='1.8.10'>locarna</requirement> | |
5 <requirement type="package" version='2.1'>rnaz</requirement> | |
6 <requirement type="package" version='0.07'>perl-math-round</requirement> | |
7 </requirements> | |
8 <stdio> | |
9 <exit_code range="1:" /> | |
10 </stdio> | |
11 <command> | |
12 <![CDATA[ | |
13 | |
14 'locARNAGraphClust.pl' '$center_fa_file' '$tree_file' '$tree_matrix' $p $max_diff_am $tau $max_diff '' '$data_map' $plfold_minlen | |
15 ]]> | |
16 </command> | |
17 <inputs> | |
18 <param type="data" name="center_fa_file" label="centers" format="fa, fasta" help="fasta format" /> | |
19 <param type="data" name="tree_file" label="trees" format="text" help="text format" /> | |
20 <param type="data" name="tree_matrix" label="tree_matrix" format="text" help="text format" /> | |
21 <param type="data" name="data_map" label="data_map" format="txt" help="text format" /> | |
22 <param name="p" type="float" value="0.001" size="5" label="minimal probability" help="-p"/> | |
23 <param name="max_diff_am" type="integer" value="50" size="5" label=" maximal difference for sizes of matched arcs" help="--max-diff-am"/> | |
24 <param name="tau" type="integer" value="50" size="5" label="Tau factor in percent" help="--tau"/> | |
25 <param name="max_diff" type="integer" value="100" size="5" label="maximal difference for alignment traces" help="--max-diff"/> | |
26 <param name="plfold_minlen" type="integer" value="210" size="5" label="Minimal length of a sequences for which RNAplfold is used" /> | |
27 </inputs> | |
28 <outputs> | |
29 | |
30 <data name="model_tree_stk" format="stockholm" label="model.tree.stk" from_work_dir="MODEL/best_subtree.aln" /> | |
31 </outputs> | |
32 <tests> | |
33 <test> | |
34 <param name="tree_file" value="1.1.tree"/> | |
35 <param name="center_fa_file" value="1.1.center.fa"/> | |
36 <param name="data_map" value="data.map"/> | |
37 <param name="tree_matrix" value="1.1.matrix.tree"/> | |
38 <param name="p" value="0.001"/> | |
39 <param name="max-diff-am" value="50"/> | |
40 <param name="tau" value="50"/> | |
41 <param name="max-diff-am" value="100"/> | |
42 <output name="model_tree_stk" file="best_subtree.aln"/> | |
43 </test> | |
44 </tests> | |
45 <help> | |
46 <![CDATA[ | |
47 **What it does** | |
48 | |
49 MLocARNA computes a multiple sequence-structure alignment of RNA sequences. | |
50 It uses *treefile* - file with guide tree in NEWICK format. The given tree is used as guide tree for the progressive alignment. | |
51 This saves the calculation of pairwise all-vs-all similarities and construction of the guide tree. | |
52 | |
53 | |
54 | |
55 ]]> | |
56 </help> | |
57 <citations> | |
58 <citation type="bibtex">@inproceedings{costa2010fast, | |
59 title={Fast neighborhood subgraph pairwise distance kernel}, | |
60 author={Costa, Fabrizio and De Grave, Kurt}, | |
61 booktitle={Proceedings of the 26th International Conference on Machine Learning}, | |
62 pages={255--262}, | |
63 year={2010}, | |
64 organization={Omnipress} | |
65 } | |
66 </citation> | |
67 <citation type="bibtex">@Article{Will_Joshi_Hofacker-LocAR_Accur_bound-2012, | |
68 author = {Will, Sebastian and Joshi, Tejal and Hofacker, Ivo L. and | |
69 Stadler, Peter F. and Backofen, Rolf}, | |
70 title = {{LocARNA}-{P}: {Accurate} boundary prediction and improved | |
71 detection of structural {RNAs}}, | |
72 journal = {RNA}, | |
73 year = {2012}, | |
74 volume = {18}, | |
75 number = {5}, | |
76 pages = {900-14}, | |
77 user = {will}, | |
78 pmid = {22450757}, | |
79 doi = {10.1261/rna.029041.111}, | |
80 issn = {1469-9001}, | |
81 issn = {1355-8382}, | |
82 abstract = {Current genomic screens for noncoding RNAs (ncRNAs) predict | |
83 a large number of genomic regions containing potential | |
84 structural ncRNAs. The analysis of these data requires | |
85 highly accurate prediction of ncRNA boundaries and | |
86 discrimination of promising candidate ncRNAs from weak | |
87 predictions. Existing methods struggle with these goals | |
88 because they rely on sequence-based multiple sequence | |
89 alignments, which regularly misalign RNA structure and | |
90 therefore do not support identification of structural | |
91 similarities. To overcome this limitation, we compute | |
92 columnwise and global reliabilities of alignments based on | |
93 sequence and structure similarity; we refer to these | |
94 structure-based alignment reliabilities as STARs. The | |
95 columnwise STARs of alignments, or STAR profiles, provide a | |
96 versatile tool for the manual and automatic analysis of | |
97 ncRNAs. In particular, we improve the boundary prediction of | |
98 the widely used ncRNA gene finder RNAz by a factor of 3 from | |
99 a median deviation of 47 to 13 nt. Post-processing RNAz | |
100 predictions, LocARNA-P's STAR score allows much stronger | |
101 discrimination between true- and false-positive predictions | |
102 than RNAz's own evaluation. The improved accuracy, in this | |
103 scenario increased from AUC 0.71 to AUC 0.87, significantly | |
104 reduces the cost of successive analysis steps. The | |
105 ready-to-use software tool LocARNA-P produces | |
106 structure-based multiple RNA alignments with associated | |
107 columnwise STARs and predicts ncRNA boundaries. We provide | |
108 additional results, a web server for LocARNA/LocARNA-P, and | |
109 the software package, including documentation and a pipeline | |
110 for refining screens for structural ncRNA, at | |
111 http://www.bioinf.uni-freiburg.de/Supplements/LocARNA-P/.} | |
112 } | |
113 </citation> | |
114 <citation type="bibtex">@Article{Will:etal:_infer_non_codin_rna_famil:PLOS2007, | |
115 author = {Sebastian Will and Kristin Reiche and Ivo L. Hofacker and | |
116 Peter F. Stadler and Rolf Backofen}, | |
117 title = {Inferring Non-Coding {RNA} Families and Classes by Means of | |
118 Genome-Scale Structure-Based Clustering}, | |
119 journal = {PLoS Comput Biol}, | |
120 year = 2007, | |
121 volume = {3}, | |
122 number = {4}, | |
123 pages = {e65}, | |
124 issn = {1553-7358}, | |
125 issn = {1553-734X}, | |
126 pmid = {17432929}, | |
127 doi = {10.1371/journal.pcbi.0030065}, | |
128 user = {will}, | |
129 abstract = {The RFAM database defines families of ncRNAs by means of | |
130 sequence similarities that are sufficientto establish | |
131 homology. In some cases, such as microRNAs, box H/ACA | |
132 snoRNAs, functional commonalities define classes of RNAs | |
133 that are characterized by structural similarities, and | |
134 typically consist ofmultiple RNA families. Recent advances | |
135 in high-throughput transcriptomics and comparative genomics | |
136 have produced very large sets of putative non-coding RNAs | |
137 and regulatory RNA signals. For many ofthem, evidence for | |
138 stabilizing selection acting on their secondary structures | |
139 has been derived, and at least approximate models of their | |
140 structures have been computed. The overwhelming majority of | |
141 these hypo-thetical RNAs cannot be assigned to established | |
142 families or classes. We present here a structure-based | |
143 clustering approach that is capable of extracting putative | |
144 RNA classesfrom genome-wide surveys for structured RNAs. The | |
145 LocARNA tool implements a novel variant of theSankoff | |
146 algorithm that is sufficiently fast to deal with several | |
147 thousand candidate sequences. The method is also robust | |
148 against false positive predictions, i.e., a contamination of | |
149 the input data with unstructured ornon-conserved | |
150 sequences. We have successfully tested the LocARNA-based | |
151 clustering approach on the sequences of the | |
152 RFAM-seedalignments. Furthermore, we have applied it to a | |
153 previously published set of 3332 predicted structured | |
154 elements in the Ciona intestinalis genomes (Missal et al., | |
155 Bioinformatics 21(S2), i77-i78). In addition torecovering | |
156 e.g. tRNAs as a structure-based class, the method identifies | |
157 several RNA families, including microRNA and snoRNA | |
158 candidates, and suggests several novel classes of ncRNAs for | |
159 which to-date norepresentative has been experimentally | |
160 characterized.} | |
161 } | |
162 | |
163 </citation> | |
164 <citation type="bibtex">@Article{Smith:Heyne:Richter:Freib_RNA_Tools:NAR2010, | |
165 author = {Smith, Cameron and Heyne, Steffen and Richter, Andreas S. | |
166 and Will, Sebastian and Backofen, Rolf}, | |
167 title = {Freiburg {RNA} {Tools}: a web server integrating {IntaRNA}, | |
168 {ExpaRNA} and {LocARNA}}, | |
169 journal = NAR, | |
170 year = {2010}, | |
171 volume = {38 Suppl}, | |
172 number = {}, | |
173 pages = {W373-7}, | |
174 user = {arichter}, | |
175 pmid = {20444875}, | |
176 doi = {10.1093/nar/gkq316}, | |
177 issn = {0305-1048}, | |
178 issn = {1362-4962}, | |
179 abstract = {The Freiburg RNA tools web server integrates three tools | |
180 for the advanced analysis of RNA in a common web-based user | |
181 interface. The tools IntaRNA, ExpaRNA and LocARNA support | |
182 the prediction of RNA-RNA interaction, exact RNA matching | |
183 and alignment of RNA, respectively. The Freiburg RNA tools | |
184 web server and the software packages of the stand-alone | |
185 tools are freely accessible at | |
186 http://rna.informatik.uni-freiburg.de.} | |
187 } | |
188 </citation> | |
189 </citations> | |
190 </tool> |