# HG changeset patch
# User yhoogstrate
# Date 1434468072 14400
# Node ID bf022d3751fa8ecd02942a85d94159f10165379a
planemo upload for repository https://github.com/ErasmusMC-Bioinformatics/unafold_galaxy_wrapper commit 84b70c01144fa018db45215941fb395798376100-dirty
diff -r 000000000000 -r bf022d3751fa datatypes_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml Tue Jun 16 11:21:12 2015 -0400
@@ -0,0 +1,25 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r bf022d3751fa lib/galaxy/datatypes/RNAStructure.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/galaxy/datatypes/RNAStructure.py Tue Jun 16 11:21:12 2015 -0400
@@ -0,0 +1,132 @@
+import logging
+log = logging.getLogger(__name__)
+
+from galaxy import util
+import galaxy
+import galaxy.model
+import galaxy.datatypes
+import galaxy.datatypes.data
+
+from galaxy.datatypes.metadata import MetadataElement
+
+from galaxy.datatypes.sequence import Sequence
+from galaxy.datatypes.tabular import Tabular
+from galaxy.datatypes.xml import GenericXml
+
+from galaxy.datatypes.data import Data
+
+
+import re
+
+class DotBracket ( Sequence ):
+ edam_format = "format_1457"
+ file_ext = "dbn"
+
+ sequence_regexp = re.compile( "^[ACGTURYKMSWBDHVN]*" )
+ structure_regexp = re.compile( "^[\(\)\.]*" )
+
+ def set_meta( self, dataset, **kwd ):
+ """
+ Set the number of sequences and the number of data lines
+ in dataset.
+ """
+ if self.max_optional_metadata_filesize >= 0 and dataset.get_size() > self.max_optional_metadata_filesize:
+ dataset.metadata.data_lines = None
+ dataset.metadata.sequences = None
+ dataset.metadata.seconday_structures = None
+ return
+
+ data_lines = 0
+ sequences = 0
+
+ for line in file( dataset.file_name ):
+ line = line.strip()
+ data_lines += 1
+
+ if line and line.startswith( '>' ):
+ sequences += 1
+
+ dataset.metadata.data_lines = data_lines
+ dataset.metadata.sequences = sequences
+
+ def sniff(self, filename):
+ """
+ The format is as follows, although it remains unclear whether
+ the Dot-Bracket format may contain multiple sequences per file:
+
+ >sequenceName1
+ CCCaaaGGG
+ (((...)))
+ >sequenceName2
+ GGGuuuCCC
+ (((...)))
+ """
+
+ i = 0
+ pairs = False
+
+ with open( filename ) as handle:
+ for line in handle:
+ line = line.strip()
+
+ state = i % 3
+
+ if state == 0:#header line
+ if(line[0] != '>'):
+ return False
+ elif state == 1:#sequence line
+ if not sequence_regexp.match(line.upper()):
+ return False
+ else:
+ sequence_size = len(line)
+ elif state == 2:#dot-bracket structure line
+ if (sequence_size != len(line)) or (not structure_regexp.match(line)):
+ return False
+
+ i += 1
+ return True
+
+class ConnectivityTable( Tabular ):
+ edam_format = "format_3309"
+ file_ext = "ct"
+
+ header_regexp = re.compile( "^[0-9]+" + "(?:\t|[ ]+)" + "[^ \t]+")
+ structure_regexp = re.compile( "^[0-9]+" + "(?:\t|[ ]+)" + "[ACGTURYKMSWBDHVN]+" + "(?:\t|[ ]+)" + "[^\t]+" + "(?:\t|[ ]+)" + "[^\t]+" + "(?:\t|[ ]+)" + "[^\t]+" + "(?:\t|[ ]+)" + "[^\t]+")
+
+ def __init__(self, **kwd):
+ Tabular.__init__( self, **kwd )
+
+ self.columns = 6
+ self.column_names = ['base_index', 'base', 'neighbor_left', 'neighbor_right', 'partner', 'natural_numbering']
+ self.column_types = ['int', 'str', 'int', 'int', 'int', 'int']
+
+ def set_meta( self, dataset, **kwd ):
+ data_lines = 0
+
+ for line in file( dataset.file_name ):
+ data_lines += 1
+
+ dataset.metadata.data_lines = data_lines
+
+ def sniff(self, filename):
+
+ filename = filename.file_name
+
+ i = 0
+ with open( filename ) as handle:
+ for line in handle:
+ line = line.strip()
+
+ if(i == 0):
+ if not self.header_regexp.match(line):
+ return False
+ else:
+ if not self.structure_regexp.match(line.upper()):
+ return False
+ i += 1
+ return True
+
+
+class RNAML( GenericXml ):
+ edam_format = "format_3311"
+ file_ext = "rnaml"
diff -r 000000000000 -r bf022d3751fa test-data/test1_input.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test1_input.fa Tue Jun 16 11:21:12 2015 -0400
@@ -0,0 +1,2 @@
+>seq1
+GGGGGaaaCCCCC
\ No newline at end of file
diff -r 000000000000 -r bf022d3751fa test-data/test1_output.ct
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test1_output.ct Tue Jun 16 11:21:12 2015 -0400
@@ -0,0 +1,14 @@
+13 dG = -8.9 seq1
+1 G 0 2 12 1 0 2
+2 G 1 3 11 2 1 3
+3 G 2 4 10 3 2 4
+4 G 3 5 9 4 3 0
+5 G 4 6 0 5 0 0
+6 a 5 7 0 6 0 0
+7 a 6 8 0 7 0 0
+8 a 7 9 0 8 0 0
+9 C 8 10 4 9 0 10
+10 C 9 11 3 10 9 11
+11 C 10 12 2 11 10 12
+12 C 11 13 1 12 11 13
+13 C 12 0 0 13 12 0
diff -r 000000000000 -r bf022d3751fa tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Tue Jun 16 11:21:12 2015 -0400
@@ -0,0 +1,6 @@
+
+
+
+
+
+
diff -r 000000000000 -r bf022d3751fa unafold.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/unafold.xml Tue Jun 16 11:21:12 2015 -0400
@@ -0,0 +1,101 @@
+
+
+ UNAFold RNA and DNA structure prediction
+
+
+ unafold
+
+
+
+
+
+
+ UNAFold.pl --version
+
+
+ UNAFold.pl
+ -n $n.a
+ -t $temp
+
+ #if $n.a == "DNA"
+ -N $sodium
+ -M $magnesium
+ #end if
+
+ $input_file &&
+ output=\$(ls | grep \.ct | sort -r | head -n 1) &&
+ mv "\$output" "$output_ct"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Usage: UNAFold.pl [options] file [file]
+
+ Options:
+ -V, --version
+ -h, --help
+ -n, --NA=(RNA | DNA) (defaults to RNA)
+ -t, --temp=<temperature> (defaults to 37)
+ -N, --sodium=<[Na+] in M> (defaults to 1)
+ -M, --magnesium=<[Mg++] in M> (defaults to 0)
+ -p, --polymer
+ -C, --Ct=<total strand concentration>
+ -I, --noisolate
+ -m, --maxbp=<maximum basepair distance>
+ -c, --constraints=<name of constraints file> (defaults to prefix.aux)
+ -P, --percent=<energy increment percent> (defaults to 5)
+ -W, --window=<window size> (default set by sequence length)
+ -X, --max=<maximum number of foldings> (defaults to 100)
+ --ann=(none | p-num | ss-count) (defaults to none)
+ --mode=(auto | bases | lines) (defaults to auto)
+ --label=<base numbering frequency>
+ --rotate=<structure rotation angle>
+ --run-type=(text | html) (defaults to text)
+ --model=(EM | PF) (defaults to EM)
+ --circular
+ Obscure options:
+ --allpairs
+ --maxloop=<maximum bulge/interior loop size> (defaults to 30)
+ --nodangle
+ --simple
+ --prefilter=<filter value>
+
+ Report bugs to markhn@rpi.edu
+
+
+
+ 10.1007/978-1-60327-429-6_1
+
+