comparison locarna_best_subtree.xml @ 4:51261dff08a5 draft

planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/LocARNAGraphClust commit f971832d2b34a182314e5201ea6895dd207c5923
author rnateam
date Mon, 13 Mar 2017 18:02:55 -0400
parents 606440a3852d
children 859ee7d8f207
comparison
equal deleted inserted replaced
3:606440a3852d 4:51261dff08a5
1 <tool id="locarna_best_subtree" name="locarna_graphclust" version="0.1.0" > 1 <tool id="locarna_best_subtree" name="locarna_graphclust" version="0.1.0" >
2 <requirements> 2 <requirements>
3 <requirement type="package" version="0.1.9">graphclust-wrappers</requirement> 3 <requirement type="package" version="0.1.10">graphclust-wrappers</requirement>
4 <requirement type="package" version='1.8.10'>locarna</requirement> 4 <requirement type="package" version='1.8.10'>locarna</requirement>
5 <requirement type="package" version='2.1'>rnaz</requirement> 5 <requirement type="package" version='2.1'>rnaz</requirement>
6 <requirement type="package" version='0.07'>perl-math-round</requirement> 6 <requirement type="package" version='0.07'>perl-math-round</requirement>
7 </requirements> 7 </requirements>
8 <stdio> 8 <stdio>
9 <exit_code range="1:" /> 9 <exit_code range="1:" />
10 </stdio> 10 </stdio>
11 <command> 11 <command>
12 <![CDATA[ 12 <![CDATA[
13 13
14 'locARNAGraphClust.pl' '$center_fa_file' '$tree_file' '$tree_matrix' '$data_map' $allow_overlap 14 locARNAGraphClust.pl
15 '$center_fa_file'
16 '$tree_file'
17 '$tree_matrix'
18 '$data_map'
19 $allow_overlap
20 $free_endgaps
21
15 #if str($param_type.param_type_selector) == "gclust" 22 #if str($param_type.param_type_selector) == "gclust"
16 $param_type.p 23 $param_type.p
17 $param_type.max_diff_am 24 $param_type.max_diff_am
18 $param_type.max_diff 25 $param_type.max_diff
19 $param_type.tau 26 $param_type.tau
20 $param_type.plfold_minlen 27 $param_type.plfold_minlen
21 $param_type.struct_weight 28 $param_type.struct_weight
32 <param type="data" name="tree_file" label="trees" format="text" help="text format" /> 39 <param type="data" name="tree_file" label="trees" format="text" help="text format" />
33 <param type="data" name="tree_matrix" label="tree_matrix" format="text" help="text format" /> 40 <param type="data" name="tree_matrix" label="tree_matrix" format="text" help="text format" />
34 <param type="data" name="data_map" label="data_map" format="txt" help="text format" /> 41 <param type="data" name="data_map" label="data_map" format="txt" help="text format" />
35 <param name="allow_overlap" type="boolean" truevalue="1" falsevalue="0" label="Allow overlap in subtrees" help="otherwise ignore subtree if it contains overlapping sequences"/> 42 <param name="allow_overlap" type="boolean" truevalue="1" falsevalue="0" label="Allow overlap in subtrees" help="otherwise ignore subtree if it contains overlapping sequences"/>
36 43
44 <param name="free_endgaps" type="select" label="Free endgaps"
45 help="Specify whether gaps at the ends (all, 5', or 3' ends)
46 of the sequences should be penalized or allowed for free.">
47 <option value="0">No free endgaps</option>
48 <option value="--free-endgaps">Free endgaps</option>
49 <option value="--free-endgaps-5">Free endgaps, only 5'</option>
50 <option value="--free-endgaps-3">Free endgaps, only 3'</option>
51 </param>
37 <conditional name="param_type"> 52 <conditional name="param_type">
38 <param name="param_type_selector" type="select" label="Choose the type of parameters"> 53 <param name="param_type_selector" type="select" label="Choose the type of parameters">
39 <option value="locarna">LocARNA defaults</option> 54 <option value="locarna">LocARNA defaults</option>
40 <option value="gclust" selected="True">GrapClust defaults(changeable)</option> 55 <option value="gclust" selected="True">GrapClust defaults(changeable)</option>
41 </param> 56 </param>
42 <when value="gclust"> 57 <when value="gclust">
43 58 <param name="p" type="float" value="0.001" size="5" label="minimal probability" help="-p"/>
44 <param name="p" type="float" value="0.001" size="5" label="minimal probability" help="-p"/> 59 <param name="max_diff_am" type="integer" value="50" size="5" label=" maximal difference for sizes of matched arcs" help="--max-diff-am"/>
45 <param name="max_diff_am" type="integer" value="50" size="5" label=" maximal difference for sizes of matched arcs" help="--max-diff-am"/> 60 <param argument="tau" type="integer" value="50" min="0" max="200" label="Sequence contribution at structure match in percent"/>
46 <param argument="tau" type="integer" value="50" min="0" max="200" label="Sequence contribution at structure match in percent"/> 61 <param name="max_diff" type="integer" value="100" size="5" label="maximal difference for alignment traces" help="--max-diff"/>
47 <param name="max_diff" type="integer" value="100" size="5" label="maximal difference for alignment traces" help="--max-diff"/> 62 <param name="plfold_minlen" type="integer" value="210" size="5" label="Minimal length of a sequences for which RNAplfold is used" />
48 <param name="plfold_minlen" type="integer" value="210" size="5" label="Minimal length of a sequences for which RNAplfold is used" /> 63 <param name="struct_weight" argument="struct-weight"
49 64 label="Structure weight" type="integer"
50 <param name="struct_weight" argument="struct-weight" 65 value="180" min="0" max="800" />
51 label="Structure weight" type="integer" 66 <param name="indel_opening" argument="indel-opening"
52 value="180" min="0" max="800" /> 67 label="Indel opening score" type="integer"
53 <param name="indel_opening" argument="indel-opening" 68 value="-400" max="0" min="-1500" />
54 label="Indel opening score" type="integer" 69 <param argument="indel" label="Indel score" type="integer"
55 value="-400" max="0" min="-1500" /> 70 value="-200" min="-1000" max="0" />
56 <param argument="indel" label="Indel score" type="integer" 71 <param name="alifold_consensus_dp"
57 value="-200" min="-1000" max="0" /> 72 type="boolean" checked="True"
58 73 truevalue="--alifold-consensus-dp" falsevalue=" "
59 <param name="alifold_consensus_dp" 74 label="Compute consensus dot plot by alifold" />
60 type="boolean" checked="True" 75 <param name="plfold_span"
61 truevalue="--alifold-consensus-dp" falsevalue=" " 76 type="integer" value="150" min="-1" max="400"
62 label="Compute consensus dot plot by alifold" /> 77 label="Maximum basepair span by RNAplfold (local folding); -1 for global folding" />
63 78 <param name="plfold_winsize"
64 <param name="plfold_span"
65 type="integer" value="150" min="-1" max="400"
66 label="Maximum basepair span by RNAplfold (local folding); -1 for global folding" />
67
68 <param name="plfold_winsize"
69 type="integer" value="300" min="-1" max="800" 79 type="integer" value="300" min="-1" max="800"
70 label="Window size for local folding" /> 80 label="Window size for local folding" />
71 81 </when>
72 82 <when value="locarna" />
73 </when> 83 </conditional>
74 <when value="locarna"> 84 </inputs>
75 </when> 85 <outputs>
76 </conditional> 86 <data name="model_tree_stk" format="stockholm" label="model.tree.stk" from_work_dir="MODEL/best_subtree.aln" />
77 87 </outputs>
78 </inputs> 88 <tests>
79 <outputs> 89 <test>
80 <data name="model_tree_stk" format="stockholm" label="model.tree.stk" from_work_dir="MODEL/best_subtree.aln" /> 90 <param name="tree_file" value="1.1.tree"/>
81 </outputs> 91 <param name="center_fa_file" value="1.1.center.fa"/>
82 <tests> 92 <param name="data_map" value="data.map"/>
83 <test> 93 <param name="tree_matrix" value="1.1.matrix.tree"/>
84 <param name="tree_file" value="1.1.tree"/> 94 <param name="allow_overlap" value="0"/>
85 <param name="center_fa_file" value="1.1.center.fa"/> 95 <param name="free_endgaps" value="0"/>
86 <param name="data_map" value="data.map"/> 96 <param name="param_type_selector" value="gclust"/>
87 <param name="tree_matrix" value="1.1.matrix.tree"/> 97 <param name="p" value="0.001"/>
88 <param name="allow_overlap" value="0"/> 98 <param name="max_diff_am" value="50"/>
89 <conditional name="param_type"> 99 <param name="tau" value="50"/>
90 <param name="iteration_num_selector" value="gclust"/> 100 <param name="max_diff" value="100"/>
91 <param name="p" value="0.001"/> 101 <param name="plfold_minlen" value="210"/>
92 <param name="max_diff_am" value="50"/> 102 <param name="struct_weight" value="180"/>
93 <param name="tau" value="50"/> 103 <param name="indel_opening" value="-400"/>
94 <param name="max_diff" value="100"/> 104 <param name="indel" value="-200"/>
95 <param name="plfold_minlen" value="210"/> 105 <param name="alifold_consensus_dp" value="--alifold-consensus-dp"/>
96 <param name="struct_weight" value="180"/> 106 <param name="plfold_span" value="150"/>
97 <param name="indel_opening" value="-400"/> 107 <param name="plfold_winsize" value="300"/>
98 <param name="indel" value="-200"/> 108 <output name="model_tree_stk" file="best_subtree.aln"/>
99 <param name="alifold_consensus_dp" value="--alifold-consensus-dp"/> 109 </test>
100 <param name="plfold_span" value="150"/> 110 </tests>
101 <param name="plfold_winsize" value="300"/> 111 <help>
102 </conditional>
103 <output name="model_tree_stk" file="best_subtree.aln"/>
104 </test>
105 </tests>
106 <help>
107 <![CDATA[ 112 <![CDATA[
108 **What it does** 113 **What it does**
109 114
110 MLocARNA computes a multiple sequence-structure alignment of RNA sequences. 115 MLocARNA computes a multiple sequence-structure alignment of RNA sequences.
111 It uses *treefile* - file with guide tree in NEWICK format. The given tree is used as guide tree for the progressive alignment. 116 It uses *treefile* - file with guide tree in NEWICK format. The given tree is used as guide tree for the progressive alignment.
112 This saves the calculation of pairwise all-vs-all similarities and construction of the guide tree. 117 This saves the calculation of pairwise all-vs-all similarities and construction of the guide tree.
113
114 118
115 119
116 ]]> 120 ]]>
117 </help> 121 </help>
118 <citations> 122 <citations>
123 pages={255--262}, 127 pages={255--262},
124 year={2010}, 128 year={2010},
125 organization={Omnipress} 129 organization={Omnipress}
126 } 130 }
127 </citation> 131 </citation>
128 <citation type="bibtex">@Article{Will_Joshi_Hofacker-LocAR_Accur_bound-2012, 132 <citation type="doi">10.1261/rna.029041.111</citation>
129 author = {Will, Sebastian and Joshi, Tejal and Hofacker, Ivo L. and 133 <citation type="doi">10.1371/journal.pcbi.0030065</citation>
130 Stadler, Peter F. and Backofen, Rolf}, 134 <citation type="doi">10.1093/nar/gkq316</citation>
131 title = {{LocARNA}-{P}: {Accurate} boundary prediction and improved
132 detection of structural {RNAs}},
133 journal = {RNA},
134 year = {2012},
135 volume = {18},
136 number = {5},
137 pages = {900-14},
138 user = {will},
139 pmid = {22450757},
140 doi = {10.1261/rna.029041.111},
141 issn = {1469-9001},
142 issn = {1355-8382},
143 abstract = {Current genomic screens for noncoding RNAs (ncRNAs) predict
144 a large number of genomic regions containing potential
145 structural ncRNAs. The analysis of these data requires
146 highly accurate prediction of ncRNA boundaries and
147 discrimination of promising candidate ncRNAs from weak
148 predictions. Existing methods struggle with these goals
149 because they rely on sequence-based multiple sequence
150 alignments, which regularly misalign RNA structure and
151 therefore do not support identification of structural
152 similarities. To overcome this limitation, we compute
153 columnwise and global reliabilities of alignments based on
154 sequence and structure similarity; we refer to these
155 structure-based alignment reliabilities as STARs. The
156 columnwise STARs of alignments, or STAR profiles, provide a
157 versatile tool for the manual and automatic analysis of
158 ncRNAs. In particular, we improve the boundary prediction of
159 the widely used ncRNA gene finder RNAz by a factor of 3 from
160 a median deviation of 47 to 13 nt. Post-processing RNAz
161 predictions, LocARNA-P's STAR score allows much stronger
162 discrimination between true- and false-positive predictions
163 than RNAz's own evaluation. The improved accuracy, in this
164 scenario increased from AUC 0.71 to AUC 0.87, significantly
165 reduces the cost of successive analysis steps. The
166 ready-to-use software tool LocARNA-P produces
167 structure-based multiple RNA alignments with associated
168 columnwise STARs and predicts ncRNA boundaries. We provide
169 additional results, a web server for LocARNA/LocARNA-P, and
170 the software package, including documentation and a pipeline
171 for refining screens for structural ncRNA, at
172 http://www.bioinf.uni-freiburg.de/Supplements/LocARNA-P/.}
173 }
174 </citation>
175 <citation type="bibtex">@Article{Will:etal:_infer_non_codin_rna_famil:PLOS2007,
176 author = {Sebastian Will and Kristin Reiche and Ivo L. Hofacker and
177 Peter F. Stadler and Rolf Backofen},
178 title = {Inferring Non-Coding {RNA} Families and Classes by Means of
179 Genome-Scale Structure-Based Clustering},
180 journal = {PLoS Comput Biol},
181 year = 2007,
182 volume = {3},
183 number = {4},
184 pages = {e65},
185 issn = {1553-7358},
186 issn = {1553-734X},
187 pmid = {17432929},
188 doi = {10.1371/journal.pcbi.0030065},
189 user = {will},
190 abstract = {The RFAM database defines families of ncRNAs by means of
191 sequence similarities that are sufficientto establish
192 homology. In some cases, such as microRNAs, box H/ACA
193 snoRNAs, functional commonalities define classes of RNAs
194 that are characterized by structural similarities, and
195 typically consist ofmultiple RNA families. Recent advances
196 in high-throughput transcriptomics and comparative genomics
197 have produced very large sets of putative non-coding RNAs
198 and regulatory RNA signals. For many ofthem, evidence for
199 stabilizing selection acting on their secondary structures
200 has been derived, and at least approximate models of their
201 structures have been computed. The overwhelming majority of
202 these hypo-thetical RNAs cannot be assigned to established
203 families or classes. We present here a structure-based
204 clustering approach that is capable of extracting putative
205 RNA classesfrom genome-wide surveys for structured RNAs. The
206 LocARNA tool implements a novel variant of theSankoff
207 algorithm that is sufficiently fast to deal with several
208 thousand candidate sequences. The method is also robust
209 against false positive predictions, i.e., a contamination of
210 the input data with unstructured ornon-conserved
211 sequences. We have successfully tested the LocARNA-based
212 clustering approach on the sequences of the
213 RFAM-seedalignments. Furthermore, we have applied it to a
214 previously published set of 3332 predicted structured
215 elements in the Ciona intestinalis genomes (Missal et al.,
216 Bioinformatics 21(S2), i77-i78). In addition torecovering
217 e.g. tRNAs as a structure-based class, the method identifies
218 several RNA families, including microRNA and snoRNA
219 candidates, and suggests several novel classes of ncRNAs for
220 which to-date norepresentative has been experimentally
221 characterized.}
222 }
223
224 </citation>
225 <citation type="bibtex">@Article{Smith:Heyne:Richter:Freib_RNA_Tools:NAR2010,
226 author = {Smith, Cameron and Heyne, Steffen and Richter, Andreas S.
227 and Will, Sebastian and Backofen, Rolf},
228 title = {Freiburg {RNA} {Tools}: a web server integrating {IntaRNA},
229 {ExpaRNA} and {LocARNA}},
230 journal = NAR,
231 year = {2010},
232 volume = {38 Suppl},
233 number = {},
234 pages = {W373-7},
235 user = {arichter},
236 pmid = {20444875},
237 doi = {10.1093/nar/gkq316},
238 issn = {0305-1048},
239 issn = {1362-4962},
240 abstract = {The Freiburg RNA tools web server integrates three tools
241 for the advanced analysis of RNA in a common web-based user
242 interface. The tools IntaRNA, ExpaRNA and LocARNA support
243 the prediction of RNA-RNA interaction, exact RNA matching
244 and alignment of RNA, respectively. The Freiburg RNA tools
245 web server and the software packages of the stand-alone
246 tools are freely accessible at
247 http://rna.informatik.uni-freiburg.de.}
248 }
249 </citation>
250 </citations> 135 </citations>
251 </tool> 136 </tool>