Mercurial > repos > rnateam > graphclust_mlocarna
changeset 4:51261dff08a5 draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/LocARNAGraphClust commit f971832d2b34a182314e5201ea6895dd207c5923
author | rnateam |
---|---|
date | Mon, 13 Mar 2017 18:02:55 -0400 |
parents | 606440a3852d |
children | 859ee7d8f207 |
files | locarna_best_subtree.xml |
diffstat | 1 files changed, 78 insertions(+), 193 deletions(-) [+] |
line wrap: on
line diff
--- a/locarna_best_subtree.xml Mon Feb 27 12:02:38 2017 -0500 +++ b/locarna_best_subtree.xml Mon Mar 13 18:02:55 2017 -0400 @@ -1,6 +1,6 @@ <tool id="locarna_best_subtree" name="locarna_graphclust" version="0.1.0" > <requirements> - <requirement type="package" version="0.1.9">graphclust-wrappers</requirement> + <requirement type="package" version="0.1.10">graphclust-wrappers</requirement> <requirement type="package" version='1.8.10'>locarna</requirement> <requirement type="package" version='2.1'>rnaz</requirement> <requirement type="package" version='0.07'>perl-math-round</requirement> @@ -11,9 +11,16 @@ <command> <![CDATA[ - 'locARNAGraphClust.pl' '$center_fa_file' '$tree_file' '$tree_matrix' '$data_map' $allow_overlap + locARNAGraphClust.pl + '$center_fa_file' + '$tree_file' + '$tree_matrix' + '$data_map' + $allow_overlap + $free_endgaps + #if str($param_type.param_type_selector) == "gclust" - $param_type.p + $param_type.p $param_type.max_diff_am $param_type.max_diff $param_type.tau @@ -34,76 +41,74 @@ <param type="data" name="data_map" label="data_map" format="txt" help="text format" /> <param name="allow_overlap" type="boolean" truevalue="1" falsevalue="0" label="Allow overlap in subtrees" help="otherwise ignore subtree if it contains overlapping sequences"/> - <conditional name="param_type"> - <param name="param_type_selector" type="select" label="Choose the type of parameters"> - <option value="locarna">LocARNA defaults</option> - <option value="gclust" selected="True">GrapClust defaults(changeable)</option> + <param name="free_endgaps" type="select" label="Free endgaps" + help="Specify whether gaps at the ends (all, 5', or 3' ends) + of the sequences should be penalized or allowed for free."> + <option value="0">No free endgaps</option> + <option value="--free-endgaps">Free endgaps</option> + <option value="--free-endgaps-5">Free endgaps, only 5'</option> + <option value="--free-endgaps-3">Free endgaps, only 3'</option> </param> - <when value="gclust"> - - <param name="p" type="float" value="0.001" size="5" label="minimal probability" help="-p"/> - <param name="max_diff_am" type="integer" value="50" size="5" label=" maximal difference for sizes of matched arcs" help="--max-diff-am"/> - <param argument="tau" type="integer" value="50" min="0" max="200" label="Sequence contribution at structure match in percent"/> - <param name="max_diff" type="integer" value="100" size="5" label="maximal difference for alignment traces" help="--max-diff"/> - <param name="plfold_minlen" type="integer" value="210" size="5" label="Minimal length of a sequences for which RNAplfold is used" /> - - <param name="struct_weight" argument="struct-weight" - label="Structure weight" type="integer" - value="180" min="0" max="800" /> - <param name="indel_opening" argument="indel-opening" - label="Indel opening score" type="integer" - value="-400" max="0" min="-1500" /> - <param argument="indel" label="Indel score" type="integer" - value="-200" min="-1000" max="0" /> - - <param name="alifold_consensus_dp" - type="boolean" checked="True" - truevalue="--alifold-consensus-dp" falsevalue=" " - label="Compute consensus dot plot by alifold" /> - - <param name="plfold_span" - type="integer" value="150" min="-1" max="400" - label="Maximum basepair span by RNAplfold (local folding); -1 for global folding" /> - - <param name="plfold_winsize" + <conditional name="param_type"> + <param name="param_type_selector" type="select" label="Choose the type of parameters"> + <option value="locarna">LocARNA defaults</option> + <option value="gclust" selected="True">GrapClust defaults(changeable)</option> + </param> + <when value="gclust"> + <param name="p" type="float" value="0.001" size="5" label="minimal probability" help="-p"/> + <param name="max_diff_am" type="integer" value="50" size="5" label=" maximal difference for sizes of matched arcs" help="--max-diff-am"/> + <param argument="tau" type="integer" value="50" min="0" max="200" label="Sequence contribution at structure match in percent"/> + <param name="max_diff" type="integer" value="100" size="5" label="maximal difference for alignment traces" help="--max-diff"/> + <param name="plfold_minlen" type="integer" value="210" size="5" label="Minimal length of a sequences for which RNAplfold is used" /> + <param name="struct_weight" argument="struct-weight" + label="Structure weight" type="integer" + value="180" min="0" max="800" /> + <param name="indel_opening" argument="indel-opening" + label="Indel opening score" type="integer" + value="-400" max="0" min="-1500" /> + <param argument="indel" label="Indel score" type="integer" + value="-200" min="-1000" max="0" /> + <param name="alifold_consensus_dp" + type="boolean" checked="True" + truevalue="--alifold-consensus-dp" falsevalue=" " + label="Compute consensus dot plot by alifold" /> + <param name="plfold_span" + type="integer" value="150" min="-1" max="400" + label="Maximum basepair span by RNAplfold (local folding); -1 for global folding" /> + <param name="plfold_winsize" type="integer" value="300" min="-1" max="800" label="Window size for local folding" /> - - - </when> - <when value="locarna"> - </when> -</conditional> - - </inputs> - <outputs> - <data name="model_tree_stk" format="stockholm" label="model.tree.stk" from_work_dir="MODEL/best_subtree.aln" /> - </outputs> - <tests> - <test> - <param name="tree_file" value="1.1.tree"/> - <param name="center_fa_file" value="1.1.center.fa"/> - <param name="data_map" value="data.map"/> - <param name="tree_matrix" value="1.1.matrix.tree"/> - <param name="allow_overlap" value="0"/> - <conditional name="param_type"> - <param name="iteration_num_selector" value="gclust"/> - <param name="p" value="0.001"/> - <param name="max_diff_am" value="50"/> - <param name="tau" value="50"/> - <param name="max_diff" value="100"/> - <param name="plfold_minlen" value="210"/> - <param name="struct_weight" value="180"/> - <param name="indel_opening" value="-400"/> - <param name="indel" value="-200"/> - <param name="alifold_consensus_dp" value="--alifold-consensus-dp"/> - <param name="plfold_span" value="150"/> - <param name="plfold_winsize" value="300"/> - </conditional> - <output name="model_tree_stk" file="best_subtree.aln"/> - </test> - </tests> - <help> + </when> + <when value="locarna" /> + </conditional> + </inputs> + <outputs> + <data name="model_tree_stk" format="stockholm" label="model.tree.stk" from_work_dir="MODEL/best_subtree.aln" /> + </outputs> + <tests> + <test> + <param name="tree_file" value="1.1.tree"/> + <param name="center_fa_file" value="1.1.center.fa"/> + <param name="data_map" value="data.map"/> + <param name="tree_matrix" value="1.1.matrix.tree"/> + <param name="allow_overlap" value="0"/> + <param name="free_endgaps" value="0"/> + <param name="param_type_selector" value="gclust"/> + <param name="p" value="0.001"/> + <param name="max_diff_am" value="50"/> + <param name="tau" value="50"/> + <param name="max_diff" value="100"/> + <param name="plfold_minlen" value="210"/> + <param name="struct_weight" value="180"/> + <param name="indel_opening" value="-400"/> + <param name="indel" value="-200"/> + <param name="alifold_consensus_dp" value="--alifold-consensus-dp"/> + <param name="plfold_span" value="150"/> + <param name="plfold_winsize" value="300"/> + <output name="model_tree_stk" file="best_subtree.aln"/> + </test> + </tests> + <help> <![CDATA[ **What it does** @@ -112,7 +117,6 @@ This saves the calculation of pairwise all-vs-all similarities and construction of the guide tree. - ]]> </help> <citations> @@ -125,127 +129,8 @@ organization={Omnipress} } </citation> - <citation type="bibtex">@Article{Will_Joshi_Hofacker-LocAR_Accur_bound-2012, - author = {Will, Sebastian and Joshi, Tejal and Hofacker, Ivo L. and - Stadler, Peter F. and Backofen, Rolf}, - title = {{LocARNA}-{P}: {Accurate} boundary prediction and improved - detection of structural {RNAs}}, - journal = {RNA}, - year = {2012}, - volume = {18}, - number = {5}, - pages = {900-14}, - user = {will}, - pmid = {22450757}, - doi = {10.1261/rna.029041.111}, - issn = {1469-9001}, - issn = {1355-8382}, - abstract = {Current genomic screens for noncoding RNAs (ncRNAs) predict - a large number of genomic regions containing potential - structural ncRNAs. The analysis of these data requires - highly accurate prediction of ncRNA boundaries and - discrimination of promising candidate ncRNAs from weak - predictions. Existing methods struggle with these goals - because they rely on sequence-based multiple sequence - alignments, which regularly misalign RNA structure and - therefore do not support identification of structural - similarities. To overcome this limitation, we compute - columnwise and global reliabilities of alignments based on - sequence and structure similarity; we refer to these - structure-based alignment reliabilities as STARs. The - columnwise STARs of alignments, or STAR profiles, provide a - versatile tool for the manual and automatic analysis of - ncRNAs. In particular, we improve the boundary prediction of - the widely used ncRNA gene finder RNAz by a factor of 3 from - a median deviation of 47 to 13 nt. Post-processing RNAz - predictions, LocARNA-P's STAR score allows much stronger - discrimination between true- and false-positive predictions - than RNAz's own evaluation. The improved accuracy, in this - scenario increased from AUC 0.71 to AUC 0.87, significantly - reduces the cost of successive analysis steps. The - ready-to-use software tool LocARNA-P produces - structure-based multiple RNA alignments with associated - columnwise STARs and predicts ncRNA boundaries. We provide - additional results, a web server for LocARNA/LocARNA-P, and - the software package, including documentation and a pipeline - for refining screens for structural ncRNA, at - http://www.bioinf.uni-freiburg.de/Supplements/LocARNA-P/.} -} - </citation> - <citation type="bibtex">@Article{Will:etal:_infer_non_codin_rna_famil:PLOS2007, - author = {Sebastian Will and Kristin Reiche and Ivo L. Hofacker and - Peter F. Stadler and Rolf Backofen}, - title = {Inferring Non-Coding {RNA} Families and Classes by Means of - Genome-Scale Structure-Based Clustering}, - journal = {PLoS Comput Biol}, - year = 2007, - volume = {3}, - number = {4}, - pages = {e65}, - issn = {1553-7358}, - issn = {1553-734X}, - pmid = {17432929}, - doi = {10.1371/journal.pcbi.0030065}, - user = {will}, - abstract = {The RFAM database defines families of ncRNAs by means of - sequence similarities that are sufficientto establish - homology. In some cases, such as microRNAs, box H/ACA - snoRNAs, functional commonalities define classes of RNAs - that are characterized by structural similarities, and - typically consist ofmultiple RNA families. Recent advances - in high-throughput transcriptomics and comparative genomics - have produced very large sets of putative non-coding RNAs - and regulatory RNA signals. For many ofthem, evidence for - stabilizing selection acting on their secondary structures - has been derived, and at least approximate models of their - structures have been computed. The overwhelming majority of - these hypo-thetical RNAs cannot be assigned to established - families or classes. We present here a structure-based - clustering approach that is capable of extracting putative - RNA classesfrom genome-wide surveys for structured RNAs. The - LocARNA tool implements a novel variant of theSankoff - algorithm that is sufficiently fast to deal with several - thousand candidate sequences. The method is also robust - against false positive predictions, i.e., a contamination of - the input data with unstructured ornon-conserved - sequences. We have successfully tested the LocARNA-based - clustering approach on the sequences of the - RFAM-seedalignments. Furthermore, we have applied it to a - previously published set of 3332 predicted structured - elements in the Ciona intestinalis genomes (Missal et al., - Bioinformatics 21(S2), i77-i78). In addition torecovering - e.g. tRNAs as a structure-based class, the method identifies - several RNA families, including microRNA and snoRNA - candidates, and suggests several novel classes of ncRNAs for - which to-date norepresentative has been experimentally - characterized.} -} - - </citation> - <citation type="bibtex">@Article{Smith:Heyne:Richter:Freib_RNA_Tools:NAR2010, - author = {Smith, Cameron and Heyne, Steffen and Richter, Andreas S. - and Will, Sebastian and Backofen, Rolf}, - title = {Freiburg {RNA} {Tools}: a web server integrating {IntaRNA}, - {ExpaRNA} and {LocARNA}}, - journal = NAR, - year = {2010}, - volume = {38 Suppl}, - number = {}, - pages = {W373-7}, - user = {arichter}, - pmid = {20444875}, - doi = {10.1093/nar/gkq316}, - issn = {0305-1048}, - issn = {1362-4962}, - abstract = {The Freiburg RNA tools web server integrates three tools - for the advanced analysis of RNA in a common web-based user - interface. The tools IntaRNA, ExpaRNA and LocARNA support - the prediction of RNA-RNA interaction, exact RNA matching - and alignment of RNA, respectively. The Freiburg RNA tools - web server and the software packages of the stand-alone - tools are freely accessible at - http://rna.informatik.uni-freiburg.de.} -} - </citation> + <citation type="doi">10.1261/rna.029041.111</citation> + <citation type="doi">10.1371/journal.pcbi.0030065</citation> + <citation type="doi">10.1093/nar/gkq316</citation> </citations> </tool>