changeset 0:05bc31ccc323 draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/miranda commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
author earlhaminst
date Fri, 11 Nov 2016 07:03:25 -0500
parents
children 89d595ffa9db
files miranda.xml miranda_parser.py test-data/microrna.fasta test-data/query.fasta test-data/raw_output.txt test-data/tabular_output.txt tool_dependencies.xml
diffstat 7 files changed, 236 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/miranda.xml	Fri Nov 11 07:03:25 2016 -0500
@@ -0,0 +1,56 @@
+<tool id="miranda" name="miRanda" version="3.3a">
+    <description>finds potential target sites for miRNAs in genomic sequences</description>
+    <requirements>
+        <requirement type="package" version="3.3a">miranda</requirement>
+    </requirements>
+    <version_command>miranda --version|grep 'miranda v'</version_command>
+    <command>
+<![CDATA[
+miranda '$microrna' '$query'
+#if str($sc)
+    -sc $sc
+#end if
+#if str($en)
+    -en $en
+#end if
+-out '$raw_output'
+&&
+python '$__tool_directory__/miranda_parser.py' '$raw_output' '$tabular_output'
+]]>
+    </command>
+    <inputs>
+        <param name="microrna" type="data" format="fasta" label="MicroRNA sequences" help="FASTA format" />
+        <param name="query" type="data" format="fasta" label="Query sequences" help="FASTA format" />
+        <param name="sc" type="float" min="0.0" value="140.0" optional="true" label="Alignment score threshold" help="Only alignments with score greater or equal to this value will be used for further analysis (-sc)" />
+        <param name="en" type="float" value="1.0" optional="true" label="Set energy threshold to -E kcal/mol" help="Only alignments with energy less or equal to this value will be used for further analysis. A negative value is required for filtering to occur (-en)" />
+    </inputs>
+    <outputs>
+        <data name="raw_output" format="txt" label="${tool.name} on ${on_string}: raw output" />
+        <data name="tabular_output" format="tabular" label="${tool.name} on ${on_string}: tabular output" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="microrna" ftype="fasta" value="microrna.fasta" />
+            <param name="query" ftype="fasta" value="query.fasta" />
+            <output name="raw_output" file="raw_output.txt" lines_diff="4" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+`miRanda`_ is an algorithm for the detection of potential microRNA target sites in genomic sequences.
+
+One or more miRNA sequences are scanned against all query sequences and potential target sites are reported. Potential target sites are identified using a two-step strategy. First a dynamic programming local alignment is carried out between the query miRNA sequence and the reference sequence. This alignment procedure scores based on sequence complementarity and not on sequence identity. In other words we look for A:U and G:C matches instead of A:A, G:G, etc. The G:U wobble bair is also permitted, but generally scores less than the more optimal matches.
+
+The second phase of the algorithm takes high-scoring alignments (i.e. those above the alignment score threshold) detected from phase 1 and estimates the thermodynamic stability of RNA duplexes based on these alignments. This second phase of the method utilizes folding routines from the RNAlib library, which is part of the ViennaRNA package written by Ivo Hofacker. At this stage we generate a constrained fictional single-stranded RNA composed of the query sequence, a linker and the reference sequence (reversed). This structure is then folded using RNAlib and the minimum free energy (DG kcal/mol) is calculated for that structure.
+
+Finally, detected targets with energies less than a chosen energy threshold are selected as potential target sites. Target site alignments passing both thresholds and other information is produced as output.
+
+View the original miRanda manual: http://cbio.mskcc.org/microrna_data/manual.html
+
+.. _miRanda: http://www.microrna.org/
+]]>
+    </help>
+    <citations>
+        <citation type="doi">10.1186/gb-2003-5-1-r1</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/miranda_parser.py	Fri Nov 11 07:03:25 2016 -0500
@@ -0,0 +1,8 @@
+import sys
+
+# get hits from miranda scans
+with open(sys.argv[1]) as infile1:
+    with open(sys.argv[2], "w") as outfile:
+        for line1 in infile1:
+            if "%" in line1 and "Forward:" not in line1:
+                outfile.write(line1)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/microrna.fasta	Fri Nov 11 07:03:25 2016 -0500
@@ -0,0 +1,2 @@
+>gi|29565487|emb|AJ550546.1| Drosophila melanogaster microRNA miR-bantam
+GTGAGATCATTTTGAAAGCTG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/query.fasta	Fri Nov 11 07:03:25 2016 -0500
@@ -0,0 +1,57 @@
+>gi|945100|gb|U31226.1|DMU31226 Drosophila melanogaster head involution defective protein (hid) mRNA, complete cds (3'UTR only)
+TGACAAAAAATAAAAAACGAAATCCATCGTGAACAGTTTTGTGTTTTTAAATCAGTTCTAAACACGAAAA
+GGGTTGATGAAAAACGCAGAAGAATCCGAAAAACTAACTAACCGAGCAAAAACTTGACTTGAGTGTTGTT
+TGACAAATCAGGAAAGATAAAAAACAAATCATAAGAAAAAACTGCACGAAAAATGAAAAAGTTTCTAATA
+TTCAAAATCTTGCACAAGAAATACAAAATCAATTAAAGTGAACTCTAACCAAAAGTTGTACACAAAATAA
+AAAGCAAAACAAAGCAGCGAAGAACAATCACAAGAAGAGCAAAGTGCCAACAAAGTGCAGGAAGGAAGGA
+AGCGGATAAGGACAAAAAGGAAGCCAGCACACACACACACACCCACACAATGGCCGTGCCCTTTTATTTG
+CCCGAGGGCGGCGCCGATGACGTAGCGTCGAGTTCATCGGGAGCCTCGGGCAACTCCTCCCCCCACAACC
+ACCCACTTCCCTCGAGCGCATCCTCGTCCGTCTCCTCCTCGGGCGTGTCCTCGGCCTCCGCCTCCTCGGC
+CTCATCTTCGTCATCCGCATCGTCGGACGGCGCCAGCAGCGCCGCCTCGCAATCGCCGAACACCACCACC
+TCGTCGGCCACGCAGACGCCGATGCAGTCTCCACTGCCCACCGACCAAGTGCTATACGCCCTCTACGAGT
+GGGTCAGGATGTACCAGAGCCAGCAGAGTGCCCCGCAAATCTTCCAGTATCCGCCGCCAAGCCCCTCTTG
+CAATTTCACTGGCGGCGATGTGTTCTTTCCGCACGGCCATCCGAATCCGAACTCGAATCCCCATCCGCGC
+ACCCCCCGAACCAGCGTGAGCTTCTCCTCCGGCGAGGAGTACAACTTCTTCCGGCAGCAGCAGCCGCAAC
+CACATCCGTCATATCCGGCGCCATCAACACCGCAGCCAATGCCACCGCAGTCAGCGCCGCCGATGCACTG
+CAGCCACAGCTACCCGCAGCAGTCGGCGCACATGATGCCACACCATTCCGCTCCCTTCGGAATGGGCGGT
+ACCTACTACGCCGGCTACACGCCACCACCCACTCCGAACACGGCCAGTGCGGGCACCTCCAGCTCATCGG
+CGGCCTTCGGCTGGCACGGCCACCCCCACAGCCCCTTCACGTCGACCTCCACGCCGTTATCGGCGCCAGT
+GGCGCCCAAGATGCGCCTGCAGCGCAGCCAGTCGGATGCGGCCAGACGCAAGCGATTGACCTCGACGGGC
+GAGGATGAGCGCGAGTACCAGAGCGATCATGAGGCCACTTGGGACGAGTTTGGCGATCGCTACGACAACT
+TTACGGCCGGCCGGGAGCGTCTGCAGGAGTTCAATGGACGCATCCCGCCCCGGAAGAAGAAGAGCTCCAA
+TAGCCACTCGAGCAGCAGCAATAATCCAGTCTGCCATACCGACAGCCAGTCCGGTGGTACATCCCAAGCG
+GAGAGCGGTGCCATCCATGGCCACATCAGTCAGCAGCGACAGGTGGAGCGAGAACGACAAAAGGCGAAGG
+CCGAGAAGAAGAAACCACAGAGCTTCACTTGGCCAACTGTTGTGACCGTTTTCGTTTTGGCCATGGGCTG
+TGGCTTCTTTGCGGCGCGATGAAAGCGCAGGAGACGTGTAATCGAATGATCTATAGTGAAATCAGCTAGC
+CCTTAAGATATATGCCGATCTAAACATAGTTGTAGTTAAACCGTACATAAGTGCAACGAATTTATTGAAC
+TGCAGGAGCGAAAGCAGAAAGTCATTAATTCGTAAACGGATTGTTAGATACACAAACAGCCAACATACAC
+GAAGAGTGTGCCTAAGATTAAGAAGGTTGACGGGACACAAGAACAATATATTCTATCTGTCTATGGTAAC
+TGCATTTGTATTTCTAAAACGAAACGAAAGATAACAATCTTAACTGCTCAAAGTAATGAAAACTCTTAGA
+CTGGCAAGAGACTCAAATCACACTTATTTTTTTGCTGATCCATATTTTTGTACAACCTTTTGAGCGATAT
+TTACAAATTATACTAGTACAAAAAAAAGAGAGAGAGAGATAAGCAAAAGAAAACTGCCACTTTTGAGATA
+CTTTTGATAATCTTTGATTTGCATTTAATCATTTCCACACTTGCATTTTTTATAAACAACAAACAAAATT
+ACTTCCATTGTAGAACAAAGTAAACTGCAATTTCAATGTCTTCGCATTTGTAATTCCGAATTGCAAGAAA
+AACAAAAATATTTTAAATATGTTTAACTAGTAGAATTTTTTAAACGTAAGTCCACAAAAACAAGCACATC
+TAGCTTTAATTGTTGAAACAAAAGCAGAAAAAACGCAACAAAAAAATGAATGAAAATCATTAAATTAATT
+TTGTATATAGTTTTTATGCCATTTTTGTGATGTTTTGTGTCTACGGTTTATGTCATGTTATTTTAGTTAA
+ATTTCTTATGATTTATGTTTATTTGTAATATTTTTTGTCATTGTTTGTTCATCATCATATTCAAATTGGT
+CTCACAATATAATAGTTTTAAGCTCCACGCCCGGGAGATTGATGGCAAAACGATTGAAATTTGGCCAGAA
+GAGAGATAGTTTTCCCCATTCGTACACAGTCTTTTTTGGAATGCACATTAATGATCTCTCACAATGGAAA
+TTAATGAAAATTGATCTCCGCAGCTAGCCAAAGTTAAAAAAGAAATGAAGAGGAAAACATATTCTATAGG
+CAATTTTCACTATATGCTAGAATTTCCCGGGCGTTTCAATGCTAATCGAATACAGTGACATGAAAGCAAA
+CATAGCGAAAATATTAAGAAAATCAATCAAAAAGAAAGAAAAACCAATTCCCAAAAATCGCATTGATCTC
+ATGGATTTATACAATACAATTACATCAACCGTTTTTTTACAATGAGAAATGTTATAAAAAGCAGAAAGTG
+AAACACAGAAACATAAACAAAAATTAACGAAAAGCTTAGATATAAGTTCGCCAAGCGTTTTAGTTCTATT
+TTCTAGAATGTCTAAGTCGGTTTAGTGAGTTTATTAAGCTGTCTTCGGACACAAGTTTATTTGTATATAA
+GCAATATTATTTGTGTAGCCTAAGTGACAGTCCCAATCAAATCCAATCCAATATCACCCAGTCCCGGACA
+TTTCCCAGCAAAACAATAGACTATTCTCGCGTTCACATGTATCAATCTTAATTTGAATTACCACAAAATG
+AAATGAAATACTAAAACCATACACAAATGAAAAATTATTTTTGTAAATTGTTTGCATCAAGTGAGCAAGG
+GGATTAGATTAAGGAATCATCCTTGCTTTATCCCCTGCTTATTGCTAATTAGTTTTCACAATGATCTCGG
+TAAAGTTTTGTGGCCTTGCGCCCAAAAGTCGTACAGATTTTTGGTTTGCCATAAATACTCGAACAAAAAG
+TTAATGAAAAACGAAGCAAATGGAAAAAAAATCAGAATGAAACACAAGAAATTTATATTTTTGACCCAAT
+GCTACTTAATCCGTTTTTGTAATTTAAGTATCTTTACTCGACCTTGTATATAGCGCAGTTCGAATCACAG
+AATCAAATGCCATTTTTGTATAGAATTTTATTTGGTGCCAAAACAGTGACAGATAATTAAATGTCTATGA
+ACCCGTGTATTTCGCATATTATACATTTATACATATATCGTAACTTCAATGATAAGTTTGATTCTGAAAT
+TTTGTCAACTCAATTTAAGAAACATTTCTGTTGTAGTTTAGTGATTGCTAGCAGAAAGCACTTTGTTTAA
+TTGTACATTTTATATTATGCTGTAATATTTTAATATACATAAATATCATTATTGATCTCATGAATATGTT
+CATAAGACAACAAAAATTATATATATGAATACATCTATGTGTATGTGTAAAG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/raw_output.txt	Fri Nov 11 07:03:25 2016 -0500
@@ -0,0 +1,92 @@
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+miranda v3.3a    microRNA Target Scanning Algorithm
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+(c) 2003 Memorial Sloan-Kettering Cancer Center, New York
+
+Authors: Anton Enright, Bino John, Chris Sander and Debora Marks
+(mirnatargets (at) cbio.mskcc.org - reaches all authors)
+
+Software written by: Anton Enright
+Distributed for anyone to use under the GNU Public License (GPL),
+See the files 'COPYING' and 'LICENSE' for details
+
+If you use this software please cite:
+Enright AJ, John B, Gaul U, Tuschl T, Sander C and Marks DS;
+(2003) Genome Biology; 5(1):R1.
+
+   miranda comes with ABSOLUTELY NO WARRANTY;
+   This is free software, and you are welcome to redistribute it
+   under certain conditions; type `miranda --license' for details.
+
+Current Settings:
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+Query Filename: 	/tmp/tmpCIUFbf/files/000/dataset_109.dat
+Reference Filename:	/tmp/tmpCIUFbf/files/000/dataset_110.dat
+Gap Open Penalty:	-9.000000
+Gap Extend Penalty:	-4.000000
+Score Threshold:	140.000000
+Energy Threshold:	1.000000 kcal/mol
+Scaling Parameter:	4.000000
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+Read Sequence:gi|29565487|emb|AJ550546.1| Drosophila melanogaster microRNA miR-bantam(21 nt)
+Read Sequence:gi|945100|gb|U31226.1|DMU31226 Drosophila melanogaster head involution defective protein (hid) mRNA, complete cds (3'UTR only)(3902 nt)
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+Performing Scan: gi|29565487|emb|AJ550546.1| vs gi|945100|gb|U31226.1|DMU31226
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+
+   Forward:	Score: 167.000000  Q:2 to 20  R:3340 to 3360 Align Len (18) (83.33%) (94.44%)
+
+   Query:    3' gtCGAAAGTTTTACTAGAGTg 5'
+                  |:||||| |||||||||: 
+   Ref:      5' taGTTTTCACAATGATCTCGg 3'
+
+   Energy:  -24.540001 kCal/Mol
+
+Scores for this hit:
+>gi|29565487|emb|AJ550546.1|	gi|945100|gb|U31226.1|DMU31226	167.00	-24.54	2 20	3340 3360	18	83.33%	94.44%
+
+
+   Forward:	Score: 156.000000  Q:2 to 17  R:2505 to 2525 Align Len (15) (86.67%) (93.33%)
+
+   Query:    3' gtcgaAAGTTTTACTAGAGTg 5'
+                     |||||| ||:||||| 
+   Ref:      5' tcataTTCAAATTGGTCTCAc 3'
+
+   Energy:  -20.030001 kCal/Mol
+
+Scores for this hit:
+>gi|29565487|emb|AJ550546.1|	gi|945100|gb|U31226.1|DMU31226	156.00	-20.03	2 17	2505 2525	15	86.67%	93.33%
+
+
+   Forward:	Score: 155.000000  Q:2 to 16  R:2852 to 2872 Align Len (14) (78.57%) (85.71%)
+
+   Query:    3' gtcgaaAGTTTTACTAGAGTg 5'
+                      ||: | |||||||| 
+   Ref:      5' caaaaaTCGCATTGATCTCAt 3'
+
+   Energy:  -14.570000 kCal/Mol
+
+Scores for this hit:
+>gi|29565487|emb|AJ550546.1|	gi|945100|gb|U31226.1|DMU31226	155.00	-14.57	2 16	2852 2872	14	78.57%	85.71%
+
+
+   Forward:	Score: 152.000000  Q:2 to 18  R:3820 to 3841 Align Len (17) (76.47%) (76.47%)
+
+   Query:    3' gtcgAAAGT-TTTACTAGAGTg 5'
+                    | |||  | |||||||| 
+   Ref:      5' taaaTATCATTATTGATCTCAt 3'
+
+   Energy:  -14.180000 kCal/Mol
+
+Scores for this hit:
+>gi|29565487|emb|AJ550546.1|	gi|945100|gb|U31226.1|DMU31226	152.00	-14.18	2 18	3820 3841	17	76.47%	76.47%
+
+Score for this Scan:
+Seq1,Seq2,Tot Score,Tot Energy,Max Score,Max Energy,Strand,Len1,Len2,Positions
+>>gi|29565487|emb|AJ550546.1|	gi|945100|gb|U31226.1|DMU31226	630.00	-73.32	167.00	-24.54	1	21	3902	 3340 2505 2852 3820
+Complete
+
+Scan Complete
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tabular_output.txt	Fri Nov 11 07:03:25 2016 -0500
@@ -0,0 +1,4 @@
+>gi|29565487|emb|AJ550546.1|	gi|945100|gb|U31226.1|DMU31226	167.00	-24.54	2 20	3340 3360	18	83.33%	94.44%
+>gi|29565487|emb|AJ550546.1|	gi|945100|gb|U31226.1|DMU31226	156.00	-20.03	2 17	2505 2525	15	86.67%	93.33%
+>gi|29565487|emb|AJ550546.1|	gi|945100|gb|U31226.1|DMU31226	155.00	-14.57	2 16	2852 2872	14	78.57%	85.71%
+>gi|29565487|emb|AJ550546.1|	gi|945100|gb|U31226.1|DMU31226	152.00	-14.18	2 18	3820 3841	17	76.47%	76.47%
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Fri Nov 11 07:03:25 2016 -0500
@@ -0,0 +1,17 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="miranda" version="3.3a">
+        <install version="1.0">
+            <actions>
+                <action type="download_by_url" target_filename="miRanda-3.3a.tar.gz" sha256sum="a671da562cf4636ef5085b27349df2df2f335774663fd423deb08f31212ec778">http://cbio.mskcc.org/microrna_data/miRanda-aug2010.tar.gz</action>
+                <action type="autoconf" />
+                <action type="set_environment">
+                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>
+miRanda finds potential target sites for miRNAs in genomic sequences.
+        </readme>
+    </package>
+</tool_dependency>