# HG changeset patch # User rnateam # Date 1481891729 18000 # Node ID 15bd4fb05e5c73dcd6e522602cece70d6046ae93 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/LocARNAGraphClust commit 21aaee40723b5341b4236edeb0e72995c2054053 diff -r 000000000000 -r 15bd4fb05e5c locarna_best_subtree.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/locarna_best_subtree.xml Fri Dec 16 07:35:29 2016 -0500 @@ -0,0 +1,190 @@ + + + graphclust-wrappers + locarna + rnaz + perl-math-round + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @inproceedings{costa2010fast, + title={Fast neighborhood subgraph pairwise distance kernel}, + author={Costa, Fabrizio and De Grave, Kurt}, + booktitle={Proceedings of the 26th International Conference on Machine Learning}, + pages={255--262}, + year={2010}, + organization={Omnipress} + } + + @Article{Will_Joshi_Hofacker-LocAR_Accur_bound-2012, + author = {Will, Sebastian and Joshi, Tejal and Hofacker, Ivo L. and + Stadler, Peter F. and Backofen, Rolf}, + title = {{LocARNA}-{P}: {Accurate} boundary prediction and improved + detection of structural {RNAs}}, + journal = {RNA}, + year = {2012}, + volume = {18}, + number = {5}, + pages = {900-14}, + user = {will}, + pmid = {22450757}, + doi = {10.1261/rna.029041.111}, + issn = {1469-9001}, + issn = {1355-8382}, + abstract = {Current genomic screens for noncoding RNAs (ncRNAs) predict + a large number of genomic regions containing potential + structural ncRNAs. The analysis of these data requires + highly accurate prediction of ncRNA boundaries and + discrimination of promising candidate ncRNAs from weak + predictions. Existing methods struggle with these goals + because they rely on sequence-based multiple sequence + alignments, which regularly misalign RNA structure and + therefore do not support identification of structural + similarities. To overcome this limitation, we compute + columnwise and global reliabilities of alignments based on + sequence and structure similarity; we refer to these + structure-based alignment reliabilities as STARs. The + columnwise STARs of alignments, or STAR profiles, provide a + versatile tool for the manual and automatic analysis of + ncRNAs. In particular, we improve the boundary prediction of + the widely used ncRNA gene finder RNAz by a factor of 3 from + a median deviation of 47 to 13 nt. Post-processing RNAz + predictions, LocARNA-P's STAR score allows much stronger + discrimination between true- and false-positive predictions + than RNAz's own evaluation. The improved accuracy, in this + scenario increased from AUC 0.71 to AUC 0.87, significantly + reduces the cost of successive analysis steps. The + ready-to-use software tool LocARNA-P produces + structure-based multiple RNA alignments with associated + columnwise STARs and predicts ncRNA boundaries. We provide + additional results, a web server for LocARNA/LocARNA-P, and + the software package, including documentation and a pipeline + for refining screens for structural ncRNA, at + http://www.bioinf.uni-freiburg.de/Supplements/LocARNA-P/.} +} + + @Article{Will:etal:_infer_non_codin_rna_famil:PLOS2007, + author = {Sebastian Will and Kristin Reiche and Ivo L. Hofacker and + Peter F. Stadler and Rolf Backofen}, + title = {Inferring Non-Coding {RNA} Families and Classes by Means of + Genome-Scale Structure-Based Clustering}, + journal = {PLoS Comput Biol}, + year = 2007, + volume = {3}, + number = {4}, + pages = {e65}, + issn = {1553-7358}, + issn = {1553-734X}, + pmid = {17432929}, + doi = {10.1371/journal.pcbi.0030065}, + user = {will}, + abstract = {The RFAM database defines families of ncRNAs by means of + sequence similarities that are sufficientto establish + homology. In some cases, such as microRNAs, box H/ACA + snoRNAs, functional commonalities define classes of RNAs + that are characterized by structural similarities, and + typically consist ofmultiple RNA families. Recent advances + in high-throughput transcriptomics and comparative genomics + have produced very large sets of putative non-coding RNAs + and regulatory RNA signals. For many ofthem, evidence for + stabilizing selection acting on their secondary structures + has been derived, and at least approximate models of their + structures have been computed. The overwhelming majority of + these hypo-thetical RNAs cannot be assigned to established + families or classes. We present here a structure-based + clustering approach that is capable of extracting putative + RNA classesfrom genome-wide surveys for structured RNAs. The + LocARNA tool implements a novel variant of theSankoff + algorithm that is sufficiently fast to deal with several + thousand candidate sequences. The method is also robust + against false positive predictions, i.e., a contamination of + the input data with unstructured ornon-conserved + sequences. We have successfully tested the LocARNA-based + clustering approach on the sequences of the + RFAM-seedalignments. Furthermore, we have applied it to a + previously published set of 3332 predicted structured + elements in the Ciona intestinalis genomes (Missal et al., + Bioinformatics 21(S2), i77-i78). In addition torecovering + e.g. tRNAs as a structure-based class, the method identifies + several RNA families, including microRNA and snoRNA + candidates, and suggests several novel classes of ncRNAs for + which to-date norepresentative has been experimentally + characterized.} +} + + + @Article{Smith:Heyne:Richter:Freib_RNA_Tools:NAR2010, + author = {Smith, Cameron and Heyne, Steffen and Richter, Andreas S. + and Will, Sebastian and Backofen, Rolf}, + title = {Freiburg {RNA} {Tools}: a web server integrating {IntaRNA}, + {ExpaRNA} and {LocARNA}}, + journal = NAR, + year = {2010}, + volume = {38 Suppl}, + number = {}, + pages = {W373-7}, + user = {arichter}, + pmid = {20444875}, + doi = {10.1093/nar/gkq316}, + issn = {0305-1048}, + issn = {1362-4962}, + abstract = {The Freiburg RNA tools web server integrates three tools + for the advanced analysis of RNA in a common web-based user + interface. The tools IntaRNA, ExpaRNA and LocARNA support + the prediction of RNA-RNA interaction, exact RNA matching + and alignment of RNA, respectively. The Freiburg RNA tools + web server and the software packages of the stand-alone + tools are freely accessible at + http://rna.informatik.uni-freiburg.de.} +} + + + diff -r 000000000000 -r 15bd4fb05e5c test-data/1.1.center.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/1.1.center.fa Fri Dec 16 07:35:29 2016 -0500 @@ -0,0 +1,40 @@ +>7 SEQ7#1#83#+ ORIGID RF00005_rep.12_AC108081.2/59868-59786_7 ORIGHEAD RF00005_rep.12 +GUCAGGAUGGCCGAGCGGUCUAAGGCGCUGCGUUCAGGUCGCAGUCUCCCCUGGAGGCGUGGGUUCGAAUCCCACUUCUGACA +>9 SEQ9#1#73#+ ORIGID RF00005_rep.14_AL021808.2/65570-65498_9 ORIGHEAD RF00005_rep.14 +GCUUCUGUAGUGUAGUGGUUAUCACGUUCGCCUCACACGCGAAAGGUCCCCGGUUCGAAACCGGGCAGAAGCA +>10 SEQ10#1#73#+ ORIGID RF00005_rep.15_AC008443.10/42590-42518_10 ORIGHEAD RF00005_rep.15 +GCCCGGCUAGCUCAGUCGGUAGAGCAUGAGACUCUUAAUCUCAGGGUCGUGGGUUCGAGCCCCACGUUGGGCG +>15 SEQ15#1#73#+ ORIGID RF00005_rep.1_AC005329.1/7043-6971_15 ORIGHEAD RF00005_rep.1 +GCCGAAAUAGCUCAGUUGGGAGAGCGUUAGACUGAAGAUCUAAAGGUCCCUGGUUCGAUCCCGGGUUUCGGCA +>16 SEQ16#1#72#+ ORIGID RF00005_rep.20_AL671879.2/100356-100285_16 ORIGHEAD RF00005_rep.20 +GGGGAUGUAGCUCAGUGGUAGAGCGCAUGCUUCGCAUGUAUGAGGCCCCGGGUUCGAUCCCCGGCAUCUCCA +>17 SEQ17#1#71#+ ORIGID RF00005_rep.21_AL355149.13/15278-15208_17 ORIGHEAD RF00005_rep.21 +GCAUUGGUGGUUCAGUGGUAGAAUUCUCGCCUCCCACGCGGGAGACCCGGGUUCAAUUCCCGGCCAAUGCA +>18 SEQ18#1#72#+ ORIGID RF00005_rep.22_AL590385.23/26487-26416_18 ORIGHEAD RF00005_rep.22 +GCGUUGGUGGUAUAGUGGUGAGCAUAGCUGCCUUCCAAGCAGUUGACCCGGGUUCGAUUCCCGGCCAACGCA +>23 SEQ23#1#74#+ ORIGID RF00005_rep.27_AL352978.6/119697-119770_23 ORIGHEAD RF00005_rep.27 +GGCCGGUUAGCUCAGUUGGUUAGAGCGUGGUGCUAAUAACGCCAAGGUCGCGGGUUCGAUCCCCGUACGGGCCA +>28 SEQ28#1#71#+ ORIGID RF00005_rep.31_AC092686.3/29631-29561_28 ORIGHEAD RF00005_rep.31 +GCAUUGGUGGUUCAGUGGUAGAAUUCUCGCCUGCCACGCGGGAGGCCCGGGUUCGAUUCCCGGCCAAUGCA +>30 SEQ30#1#72#+ ORIGID RF00005_rep.33_AC018638.5/4694-4623_30 ORIGHEAD RF00005_rep.33 +GGCUCGUUGGUCUAGGGGUAUGAUUCUCGCUUAGGGUGCGAGAGGUCCCGGGUUCAAAUCCCGGACGAGCCC +>31 SEQ31#1#73#+ ORIGID RF00005_rep.34_AC008443.10/43006-42934_31 ORIGHEAD RF00005_rep.34 +GUUUCCGUAGUGUAGUGGUUAUCACGUUCGCCUCACACGCGAAAGGUCCCCGGUUCGAAACCGGGCGGAAACA +>32 SEQ32#1#73#+ ORIGID RF00005_rep.35_AC005783.1/27398-27326_32 ORIGHEAD RF00005_rep.35 +GUUUCCGUAGUGUAGCGGUUAUCACAUUCGCCUCACACGCGAAAGGUCCCCGGUUCGAUCCCGGGCGGAAACA +>33 SEQ33#1#72#+ ORIGID RF00005_rep.36_AC007298.17/145366-145295_33 ORIGHEAD RF00005_rep.36 +UCCUCGUUAGUAUAGUGGUGAGUAUCCCCGCCUGUCACGCGGGAGACCGGGGUUCGAUUCCCCGACGGGGAG +>35 SEQ35#1#72#+ ORIGID RF00005_rep.38_J00309.1/356-427_35 ORIGHEAD RF00005_rep.38 +UCCCUGGUGGUCUAGUGGCUAGGAUUCGGCGCUUUCACCGCCGCGCCCCGGGUUCGAUUCCCGGCCAGGAAU +>37 SEQ37#1#82#+ ORIGID RF00005_rep.3_Z54587.1/126-45_37 ORIGHEAD RF00005_rep.3 +GGUAGCGUGGCCGAGCGGUCUAAGGCGCUGGAUUUAGGCUCCAGUCUCUUCGGAGGCGUGGGUUCGAAUCCCACCGCUGCCA +>46 SEQ46#1#72#+ ORIGID RF00005_rep.5_AL590385.23/26129-26058_46 ORIGHEAD RF00005_rep.5 +UCCCUGGUGGUCUAGUGGUUAGGAUUCGGCGCUCUCACCGCCGCGGCCCGGGUUCGAUUCCCGGUCAGGGAA +>51 SEQ51#1#88#+ ORIGID RF00006_rep.0_AF045145.1/1-88_51 ORIGHEAD RF00006_rep.0 +GGCUGGCUUUAGCUCAGCGGUUACUUCGCGUGUCAUCAAACCACCUCUCUGGGUUGUUCGAGACCCGCGGGCGCUCUCCAGCCCUCUU +>52 SEQ52#1#101#+ ORIGID RF00006_rep.1_AC005219.1/49914-50014_52 ORIGHEAD RF00006_rep.1 +GGGUCGGAGUUAGCUCAAGCGGUUACCUCCUCAUGCCGGACUUUCUAUCUGUCCAUCUCUGUGCUGGGGUUCGAGACCCGCGGGUGCUUACUGACCCUUUU +>53 SEQ53#1#98#+ ORIGID RF00006_rep.2_AF045143.1/1-98_53 ORIGHEAD RF00006_rep.2 +GGCUGGCUUUAGCUCAGCGGUUACUUCGACAGUUCUUUAAUUGAAACAAGCAACCUGUCUGGGUUGUUCGAGACCCGCGGGCGCUCUCCAGUCCUUUU +>54 SEQ54#1#88#+ ORIGID RF00006_rep.3_AF045144.1/1-88_54 ORIGHEAD RF00006_rep.3 +GGCUGGCUUUAGCUCAGCGGUUACUUCGAGUACAUUGUAACCACCUCUCUGGGUGGUUCGAGACCCGCGGGUGCUUUCCAGCUCUUUU diff -r 000000000000 -r 15bd4fb05e5c test-data/1.1.matrix.tree --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/1.1.matrix.tree Fri Dec 16 07:35:29 2016 -0500 @@ -0,0 +1,20 @@ +0.000000 0.399566 0.449067 0.416346 0.422581 0.421691 0.420328 0.433314 0.421864 0.415656 0.388584 0.396080 0.395237 0.425138 0.525331 0.428431 0.423873 0.426227 0.430509 0.421103 +0.399566 0.000000 0.413478 0.410598 0.415992 0.448487 0.444776 0.413799 0.454424 0.422079 0.758362 0.628040 0.449935 0.432423 0.395294 0.432108 0.413869 0.394038 0.410417 0.404625 +0.449067 0.413478 0.000000 0.447753 0.452406 0.432149 0.429802 0.446808 0.434943 0.437722 0.401281 0.400204 0.406763 0.418486 0.433646 0.417759 0.427363 0.424735 0.425622 0.422313 +0.416346 0.410598 0.447753 0.000000 0.432407 0.413443 0.406226 0.453570 0.424393 0.406074 0.403132 0.413663 0.412748 0.409772 0.411737 0.422837 0.408136 0.419974 0.412883 0.409892 +0.422581 0.415992 0.452406 0.432407 0.000000 0.445562 0.440880 0.457374 0.453192 0.448142 0.396150 0.415425 0.416902 0.440906 0.424893 0.433323 0.417320 0.420319 0.412897 0.417445 +0.421691 0.448487 0.432149 0.413443 0.445562 0.000000 0.473887 0.422858 0.692833 0.442948 0.434741 0.428054 0.474703 0.466744 0.423109 0.468801 0.428776 0.430573 0.428577 0.424501 +0.420328 0.444776 0.429802 0.406226 0.440880 0.473887 0.000000 0.423941 0.488936 0.425014 0.412393 0.417502 0.429722 0.471749 0.413970 0.438547 0.402300 0.405816 0.416556 0.412890 +0.433314 0.413799 0.446808 0.453570 0.457374 0.422858 0.423941 0.000000 0.427433 0.436563 0.407055 0.414622 0.415155 0.429164 0.444031 0.426024 0.435089 0.420686 0.422545 0.422448 +0.421864 0.454424 0.434943 0.424393 0.453192 0.692833 0.488936 0.427433 0.000000 0.434849 0.439028 0.434897 0.467830 0.489166 0.434846 0.473733 0.418076 0.412067 0.415370 0.421449 +0.415656 0.422079 0.437722 0.406074 0.448142 0.442948 0.425014 0.436563 0.434849 0.000000 0.414830 0.419767 0.423499 0.443441 0.420867 0.440759 0.423357 0.422084 0.413487 0.413927 +0.388584 0.758362 0.401281 0.403132 0.396150 0.434741 0.412393 0.407055 0.439028 0.414830 0.000000 0.826283 0.458338 0.420348 0.389570 0.431953 0.402238 0.389074 0.397992 0.391832 +0.396080 0.628040 0.400204 0.413663 0.415425 0.428054 0.417502 0.414622 0.434897 0.419767 0.826283 0.000000 0.444033 0.419958 0.392449 0.423827 0.410142 0.395175 0.397606 0.395594 +0.395237 0.449935 0.406763 0.412748 0.416902 0.474703 0.429722 0.415155 0.467830 0.423499 0.458338 0.444033 0.000000 0.431310 0.401528 0.447342 0.406519 0.423555 0.410966 0.407189 +0.425138 0.432423 0.418486 0.409772 0.440906 0.466744 0.471749 0.429164 0.489166 0.443441 0.420348 0.419958 0.431310 0.000000 0.442879 0.626901 0.429861 0.419845 0.414696 0.426070 +0.525331 0.395294 0.433646 0.411737 0.424893 0.423109 0.413970 0.444031 0.434846 0.420867 0.389570 0.392449 0.401528 0.442879 0.000000 0.452328 0.441888 0.432467 0.434868 0.431986 +0.428431 0.432108 0.417759 0.422837 0.433323 0.468801 0.438547 0.426024 0.473733 0.440759 0.431953 0.423827 0.447342 0.626901 0.452328 0.000000 0.423858 0.419329 0.419532 0.421059 +0.423873 0.413869 0.427363 0.408136 0.417320 0.428776 0.402300 0.435089 0.418076 0.423357 0.402238 0.410142 0.406519 0.429861 0.441888 0.423858 0.000000 0.474825 0.594380 0.571671 +0.426227 0.394038 0.424735 0.419974 0.420319 0.430573 0.405816 0.420686 0.412067 0.422084 0.389074 0.395175 0.423555 0.419845 0.432467 0.419329 0.474825 0.000000 0.466968 0.489649 +0.430509 0.410417 0.425622 0.412883 0.412897 0.428577 0.416556 0.422545 0.415370 0.413487 0.397992 0.397606 0.410966 0.414696 0.434868 0.419532 0.594380 0.466968 0.000000 0.548030 +0.421103 0.404625 0.422313 0.409892 0.417445 0.424501 0.412890 0.422448 0.421449 0.413927 0.391832 0.395594 0.407189 0.426070 0.431986 0.421059 0.571671 0.489649 0.548030 0.000000 diff -r 000000000000 -r 15bd4fb05e5c test-data/1.1.tree --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/1.1.tree Fri Dec 16 07:35:29 2016 -0500 @@ -0,0 +1,1 @@ +(((((54:0.133216,(53:0.115952,51:0.115952):0.0172647):0.0413516,52:0.174568):0.0233911,(37:0.150476,7:0.150476):0.047483):0.00361419,(((23:0.184455,16:0.184455):0.0038835,10:0.188338):0.00251517,15:0.190853):0.01072):0.00340405,((((46:0.099691,35:0.099691):0.0793888,((28:0.066725,17:0.066725):0.105711,18:0.172436):0.00664407):0.0153606,30:0.19444):0.00190942,(33:0.187757,((32:0,31:0):0.066541,9:0.066541):0.121216):0.00859264):0.00862737):0 diff -r 000000000000 -r 15bd4fb05e5c test-data/best_subtree.aln --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/best_subtree.aln Fri Dec 16 07:35:29 2016 -0500 @@ -0,0 +1,8 @@ +CLUSTAL W --- LocARNA 1.8.10 + +18 GCGUUGGUGGUAUAGUGGUGAGCAUAGCUGCCUUCCAAGCAGUUGA-CCCGGGUUCGAUUCCCGGCCAACGCA +17 GCAUUGGUGGUUCAGUGGU-AGAAUUCUCGCCUCCCACGCGGGAGA-CCCGGGUUCAAUUCCCGGCCAAUGCA +28 GCAUUGGUGGUUCAGUGGU-AGAAUUCUCGCCUGCCACGCGGGAGG-CCCGGGUUCGAUUCCCGGCCAAUGCA +46 UCCCUGGUGGUCUAGUGGUUAGGAUUCGGCGCUCUCACCGCCGCGG-CCCGGGUUCGAUUCCCGGUCAGGGAA +35 UCCCUGGUGGUCUAGUGGCUAGGAUUCGGCGCUUUCACCGCCGCGC-CCCGGGUUCGAUUCCCGGCCAGGAAU +30 GGCUCGUUGGUCUAGGGGU-AUGAUUCUCGCUUAGGGUGCGAGAGGUCCCGGGUUCAAAUCCCGGACGAGCCC diff -r 000000000000 -r 15bd4fb05e5c test-data/data.map --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/data.map Fri Dec 16 07:35:29 2016 -0500 @@ -0,0 +1,92 @@ +1 SEQ1#1#120#+ +2 SEQ2#1#118#+ +3 SEQ3#1#104#+ +4 SEQ4#1#73#+ +5 SEQ5#1#72#+ +6 SEQ6#1#66#+ +7 SEQ7#1#83#+ +8 SEQ8#1#70#+ +9 SEQ9#1#73#+ +10 SEQ10#1#73#+ +11 SEQ11#1#82#+ +12 SEQ12#1#82#+ +13 SEQ13#1#82#+ +14 SEQ14#1#73#+ +15 SEQ15#1#73#+ +16 SEQ16#1#72#+ +17 SEQ17#1#71#+ +18 SEQ18#1#72#+ +19 SEQ19#1#82#+ +20 SEQ20#1#72#+ +21 SEQ21#1#74#+ +22 SEQ22#1#72#+ +23 SEQ23#1#74#+ +24 SEQ24#1#73#+ +25 SEQ25#1#69#+ +26 SEQ26#1#72#+ +27 SEQ27#1#69#+ +28 SEQ28#1#71#+ +29 SEQ29#1#66#+ +30 SEQ30#1#72#+ +31 SEQ31#1#73#+ +32 SEQ32#1#73#+ +33 SEQ33#1#72#+ +34 SEQ34#1#68#+ +35 SEQ35#1#72#+ +36 SEQ36#1#73#+ +37 SEQ37#1#82#+ +38 SEQ38#1#65#+ +39 SEQ39#1#69#+ +40 SEQ40#1#66#+ +41 SEQ41#1#68#+ +42 SEQ42#1#69#+ +43 SEQ43#1#71#+ +44 SEQ44#1#69#+ +45 SEQ45#1#72#+ +46 SEQ46#1#72#+ +47 SEQ47#1#68#+ +48 SEQ48#1#71#+ +49 SEQ49#1#68#+ +50 SEQ50#1#73#+ +51 SEQ51#1#88#+ +52 SEQ52#1#101#+ +53 SEQ53#1#98#+ +54 SEQ54#1#88#+ +55 SEQ55#1#113#+ +56 SEQ56#1#95#+ +57 SEQ57#1#102#+ +58 SEQ58#1#84#+ +59 SEQ59#1#79#+ +60 SEQ60#1#87#+ +61 SEQ61#1#76#+ +62 SEQ62#1#62#+ +63 SEQ63#1#68#+ +64 SEQ64#1#62#+ +65 SEQ65#1#57#+ +66 SEQ66#1#71#+ +67 SEQ67#1#66#+ +68 SEQ68#1#64#+ +69 SEQ69#1#82#+ +70 SEQ70#1#68#+ +71 SEQ71#1#76#+ +72 SEQ72#1#126#+ +73 SEQ73#1#126#+ +74 SEQ74#1#127#+ +75 SEQ75#1#126#+ +76 SEQ76#1#79#+ +77 SEQ77#1#77#+ +78 SEQ78#1#80#+ +79 SEQ79#1#79#+ +80 SEQ80#1#79#+ +81 SEQ81#1#77#+ +82 SEQ82#1#81#+ +83 SEQ83#1#79#+ +84 SEQ84#1#79#+ +85 SEQ85#1#79#+ +86 SEQ86#1#79#+ +87 SEQ87#1#78#+ +88 SEQ88#1#81#+ +89 SEQ89#1#80#+ +90 SEQ90#1#66#+ +91 SEQ91#1#69#+ +92 SEQ92#1#69#+