Mercurial > repos > rnateam > graphclust_mlocarna
view locarna_best_subtree.xml @ 1:c6c4a7adb099 draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/LocARNAGraphClust commit 80c721dcfe02a2b8baf8e2c64b76cbcd71b23d86
author | rnateam |
---|---|
date | Sat, 21 Jan 2017 17:39:36 -0500 |
parents | 15bd4fb05e5c |
children | 5970f6f2a36b |
line wrap: on
line source
<tool id="locarna_best_subtree" name="locarna_best_subtree" version="0.1.0" > <requirements> <requirement type="package" version="0.1.7">graphclust-wrappers</requirement> <requirement type="package" version='1.8.10'>locarna</requirement> <requirement type="package" version='2.1'>rnaz</requirement> <requirement type="package" version='0.07'>perl-math-round</requirement> </requirements> <stdio> <exit_code range="1:" /> </stdio> <command> <![CDATA[ 'locARNAGraphClust.pl' '$center_fa_file' '$tree_file' '$tree_matrix' '$data_map' #if str($param_type.param_type_selector) == "gclust" $param_type.p $param_type.max_diff_am $param_type.max_diff $param_type.tau $param_type.plfold_minlen $param_type.struct_weight $param_type.indel_opening $param_type.indel $param_type.alifold_consensus_dp $param_type.plfold_span $param_type.plfold_winsize #end if ]]> </command> <inputs> <param type="data" name="center_fa_file" label="centers" format="fa, fasta" help="fasta format" /> <param type="data" name="tree_file" label="trees" format="text" help="text format" /> <param type="data" name="tree_matrix" label="tree_matrix" format="text" help="text format" /> <param type="data" name="data_map" label="data_map" format="txt" help="text format" /> <conditional name="param_type"> <param name="param_type_selector" type="select" label="Choose the type of parameters"> <option value="locarna">LocARNA defaults</option> <option value="gclust" selected="True">GrapClust defaults(changeable)</option> </param> <when value="gclust"> <param name="p" type="float" value="0.001" size="5" label="minimal probability" help="-p"/> <param name="max_diff_am" type="integer" value="50" size="5" label=" maximal difference for sizes of matched arcs" help="--max-diff-am"/> <param argument="tau" type="integer" value="50" min="0" max="200" label="Sequence contribution at structure match in percent"/> <param name="max_diff" type="integer" value="100" size="5" label="maximal difference for alignment traces" help="--max-diff"/> <param name="plfold_minlen" type="integer" value="210" size="5" label="Minimal length of a sequences for which RNAplfold is used" /> <param name="struct_weight" argument="struct-weight" label="Structure weight" type="integer" value="180" min="0" max="800" /> <param name="indel_opening" argument="indel-opening" label="Indel opening score" type="integer" value="-400" max="0" min="-1500" /> <param argument="indel" label="Indel score" type="integer" value="-200" min="-1000" max="0" /> <param name="alifold_consensus_dp" type="boolean" checked="True" truevalue="--alifold-consensus-dp" falsevalue=" " label="Compute consensus dot plot by alifold" /> <param name="plfold_span" type="integer" value="150" min="-1" max="400" label="Maximum basepair span by RNAplfold (local folding); -1 for global folding" /> <param name="plfold_winsize" type="integer" value="300" min="-1" max="800" label="Window size for local folding" /> </when> <when value="locarna"> </when> </conditional> </inputs> <outputs> <data name="model_tree_stk" format="stockholm" label="model.tree.stk" from_work_dir="MODEL/best_subtree.aln" /> </outputs> <tests> <test> <param name="tree_file" value="1.1.tree"/> <param name="center_fa_file" value="1.1.center.fa"/> <param name="data_map" value="data.map"/> <param name="tree_matrix" value="1.1.matrix.tree"/> <conditional name="param_type"> <param name="iteration_num_selector" value="gclust"/> <param name="p" value="0.001"/> <param name="max_diff_am" value="50"/> <param name="tau" value="50"/> <param name="max_diff" value="100"/> <param name="plfold_minlen" value="210"/> <param name="struct_weight" value="180"/> <param name="indel_opening" value="-400"/> <param name="indel" value="-200"/> <param name="alifold_consensus_dp" value="--alifold-consensus-dp"/> <param name="plfold_span" value="150"/> <param name="plfold_winsize" value="300"/> </conditional> <output name="model_tree_stk" file="best_subtree.aln"/> </test> </tests> <help> <![CDATA[ **What it does** MLocARNA computes a multiple sequence-structure alignment of RNA sequences. It uses *treefile* - file with guide tree in NEWICK format. The given tree is used as guide tree for the progressive alignment. This saves the calculation of pairwise all-vs-all similarities and construction of the guide tree. ]]> </help> <citations> <citation type="bibtex">@inproceedings{costa2010fast, title={Fast neighborhood subgraph pairwise distance kernel}, author={Costa, Fabrizio and De Grave, Kurt}, booktitle={Proceedings of the 26th International Conference on Machine Learning}, pages={255--262}, year={2010}, organization={Omnipress} } </citation> <citation type="bibtex">@Article{Will_Joshi_Hofacker-LocAR_Accur_bound-2012, author = {Will, Sebastian and Joshi, Tejal and Hofacker, Ivo L. and Stadler, Peter F. and Backofen, Rolf}, title = {{LocARNA}-{P}: {Accurate} boundary prediction and improved detection of structural {RNAs}}, journal = {RNA}, year = {2012}, volume = {18}, number = {5}, pages = {900-14}, user = {will}, pmid = {22450757}, doi = {10.1261/rna.029041.111}, issn = {1469-9001}, issn = {1355-8382}, abstract = {Current genomic screens for noncoding RNAs (ncRNAs) predict a large number of genomic regions containing potential structural ncRNAs. The analysis of these data requires highly accurate prediction of ncRNA boundaries and discrimination of promising candidate ncRNAs from weak predictions. Existing methods struggle with these goals because they rely on sequence-based multiple sequence alignments, which regularly misalign RNA structure and therefore do not support identification of structural similarities. To overcome this limitation, we compute columnwise and global reliabilities of alignments based on sequence and structure similarity; we refer to these structure-based alignment reliabilities as STARs. The columnwise STARs of alignments, or STAR profiles, provide a versatile tool for the manual and automatic analysis of ncRNAs. In particular, we improve the boundary prediction of the widely used ncRNA gene finder RNAz by a factor of 3 from a median deviation of 47 to 13 nt. Post-processing RNAz predictions, LocARNA-P's STAR score allows much stronger discrimination between true- and false-positive predictions than RNAz's own evaluation. The improved accuracy, in this scenario increased from AUC 0.71 to AUC 0.87, significantly reduces the cost of successive analysis steps. The ready-to-use software tool LocARNA-P produces structure-based multiple RNA alignments with associated columnwise STARs and predicts ncRNA boundaries. We provide additional results, a web server for LocARNA/LocARNA-P, and the software package, including documentation and a pipeline for refining screens for structural ncRNA, at http://www.bioinf.uni-freiburg.de/Supplements/LocARNA-P/.} } </citation> <citation type="bibtex">@Article{Will:etal:_infer_non_codin_rna_famil:PLOS2007, author = {Sebastian Will and Kristin Reiche and Ivo L. Hofacker and Peter F. Stadler and Rolf Backofen}, title = {Inferring Non-Coding {RNA} Families and Classes by Means of Genome-Scale Structure-Based Clustering}, journal = {PLoS Comput Biol}, year = 2007, volume = {3}, number = {4}, pages = {e65}, issn = {1553-7358}, issn = {1553-734X}, pmid = {17432929}, doi = {10.1371/journal.pcbi.0030065}, user = {will}, abstract = {The RFAM database defines families of ncRNAs by means of sequence similarities that are sufficientto establish homology. In some cases, such as microRNAs, box H/ACA snoRNAs, functional commonalities define classes of RNAs that are characterized by structural similarities, and typically consist ofmultiple RNA families. Recent advances in high-throughput transcriptomics and comparative genomics have produced very large sets of putative non-coding RNAs and regulatory RNA signals. For many ofthem, evidence for stabilizing selection acting on their secondary structures has been derived, and at least approximate models of their structures have been computed. The overwhelming majority of these hypo-thetical RNAs cannot be assigned to established families or classes. We present here a structure-based clustering approach that is capable of extracting putative RNA classesfrom genome-wide surveys for structured RNAs. The LocARNA tool implements a novel variant of theSankoff algorithm that is sufficiently fast to deal with several thousand candidate sequences. The method is also robust against false positive predictions, i.e., a contamination of the input data with unstructured ornon-conserved sequences. We have successfully tested the LocARNA-based clustering approach on the sequences of the RFAM-seedalignments. Furthermore, we have applied it to a previously published set of 3332 predicted structured elements in the Ciona intestinalis genomes (Missal et al., Bioinformatics 21(S2), i77-i78). In addition torecovering e.g. tRNAs as a structure-based class, the method identifies several RNA families, including microRNA and snoRNA candidates, and suggests several novel classes of ncRNAs for which to-date norepresentative has been experimentally characterized.} } </citation> <citation type="bibtex">@Article{Smith:Heyne:Richter:Freib_RNA_Tools:NAR2010, author = {Smith, Cameron and Heyne, Steffen and Richter, Andreas S. and Will, Sebastian and Backofen, Rolf}, title = {Freiburg {RNA} {Tools}: a web server integrating {IntaRNA}, {ExpaRNA} and {LocARNA}}, journal = NAR, year = {2010}, volume = {38 Suppl}, number = {}, pages = {W373-7}, user = {arichter}, pmid = {20444875}, doi = {10.1093/nar/gkq316}, issn = {0305-1048}, issn = {1362-4962}, abstract = {The Freiburg RNA tools web server integrates three tools for the advanced analysis of RNA in a common web-based user interface. The tools IntaRNA, ExpaRNA and LocARNA support the prediction of RNA-RNA interaction, exact RNA matching and alignment of RNA, respectively. The Freiburg RNA tools web server and the software packages of the stand-alone tools are freely accessible at http://rna.informatik.uni-freiburg.de.} } </citation> </citations> </tool>