# HG changeset patch # User yhoogstrate # Date 1434468072 14400 # Node ID bf022d3751fa8ecd02942a85d94159f10165379a planemo upload for repository https://github.com/ErasmusMC-Bioinformatics/unafold_galaxy_wrapper commit 84b70c01144fa018db45215941fb395798376100-dirty diff -r 000000000000 -r bf022d3751fa datatypes_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes_conf.xml Tue Jun 16 11:21:12 2015 -0400 @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + diff -r 000000000000 -r bf022d3751fa lib/galaxy/datatypes/RNAStructure.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/datatypes/RNAStructure.py Tue Jun 16 11:21:12 2015 -0400 @@ -0,0 +1,132 @@ +import logging +log = logging.getLogger(__name__) + +from galaxy import util +import galaxy +import galaxy.model +import galaxy.datatypes +import galaxy.datatypes.data + +from galaxy.datatypes.metadata import MetadataElement + +from galaxy.datatypes.sequence import Sequence +from galaxy.datatypes.tabular import Tabular +from galaxy.datatypes.xml import GenericXml + +from galaxy.datatypes.data import Data + + +import re + +class DotBracket ( Sequence ): + edam_format = "format_1457" + file_ext = "dbn" + + sequence_regexp = re.compile( "^[ACGTURYKMSWBDHVN]*" ) + structure_regexp = re.compile( "^[\(\)\.]*" ) + + def set_meta( self, dataset, **kwd ): + """ + Set the number of sequences and the number of data lines + in dataset. + """ + if self.max_optional_metadata_filesize >= 0 and dataset.get_size() > self.max_optional_metadata_filesize: + dataset.metadata.data_lines = None + dataset.metadata.sequences = None + dataset.metadata.seconday_structures = None + return + + data_lines = 0 + sequences = 0 + + for line in file( dataset.file_name ): + line = line.strip() + data_lines += 1 + + if line and line.startswith( '>' ): + sequences += 1 + + dataset.metadata.data_lines = data_lines + dataset.metadata.sequences = sequences + + def sniff(self, filename): + """ + The format is as follows, although it remains unclear whether + the Dot-Bracket format may contain multiple sequences per file: + + >sequenceName1 + CCCaaaGGG + (((...))) + >sequenceName2 + GGGuuuCCC + (((...))) + """ + + i = 0 + pairs = False + + with open( filename ) as handle: + for line in handle: + line = line.strip() + + state = i % 3 + + if state == 0:#header line + if(line[0] != '>'): + return False + elif state == 1:#sequence line + if not sequence_regexp.match(line.upper()): + return False + else: + sequence_size = len(line) + elif state == 2:#dot-bracket structure line + if (sequence_size != len(line)) or (not structure_regexp.match(line)): + return False + + i += 1 + return True + +class ConnectivityTable( Tabular ): + edam_format = "format_3309" + file_ext = "ct" + + header_regexp = re.compile( "^[0-9]+" + "(?:\t|[ ]+)" + "[^ \t]+") + structure_regexp = re.compile( "^[0-9]+" + "(?:\t|[ ]+)" + "[ACGTURYKMSWBDHVN]+" + "(?:\t|[ ]+)" + "[^\t]+" + "(?:\t|[ ]+)" + "[^\t]+" + "(?:\t|[ ]+)" + "[^\t]+" + "(?:\t|[ ]+)" + "[^\t]+") + + def __init__(self, **kwd): + Tabular.__init__( self, **kwd ) + + self.columns = 6 + self.column_names = ['base_index', 'base', 'neighbor_left', 'neighbor_right', 'partner', 'natural_numbering'] + self.column_types = ['int', 'str', 'int', 'int', 'int', 'int'] + + def set_meta( self, dataset, **kwd ): + data_lines = 0 + + for line in file( dataset.file_name ): + data_lines += 1 + + dataset.metadata.data_lines = data_lines + + def sniff(self, filename): + + filename = filename.file_name + + i = 0 + with open( filename ) as handle: + for line in handle: + line = line.strip() + + if(i == 0): + if not self.header_regexp.match(line): + return False + else: + if not self.structure_regexp.match(line.upper()): + return False + i += 1 + return True + + +class RNAML( GenericXml ): + edam_format = "format_3311" + file_ext = "rnaml" diff -r 000000000000 -r bf022d3751fa test-data/test1_input.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test1_input.fa Tue Jun 16 11:21:12 2015 -0400 @@ -0,0 +1,2 @@ +>seq1 +GGGGGaaaCCCCC \ No newline at end of file diff -r 000000000000 -r bf022d3751fa test-data/test1_output.ct --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test1_output.ct Tue Jun 16 11:21:12 2015 -0400 @@ -0,0 +1,14 @@ +13 dG = -8.9 seq1 +1 G 0 2 12 1 0 2 +2 G 1 3 11 2 1 3 +3 G 2 4 10 3 2 4 +4 G 3 5 9 4 3 0 +5 G 4 6 0 5 0 0 +6 a 5 7 0 6 0 0 +7 a 6 8 0 7 0 0 +8 a 7 9 0 8 0 0 +9 C 8 10 4 9 0 10 +10 C 9 11 3 10 9 11 +11 C 10 12 2 11 10 12 +12 C 11 13 1 12 11 13 +13 C 12 0 0 13 12 0 diff -r 000000000000 -r bf022d3751fa tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Tue Jun 16 11:21:12 2015 -0400 @@ -0,0 +1,6 @@ + + + + + + diff -r 000000000000 -r bf022d3751fa unafold.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/unafold.xml Tue Jun 16 11:21:12 2015 -0400 @@ -0,0 +1,101 @@ + + + UNAFold RNA and DNA structure prediction + + + unafold + + + + + + + UNAFold.pl --version + + + UNAFold.pl + -n $n.a + -t $temp + + #if $n.a == "DNA" + -N $sodium + -M $magnesium + #end if + + $input_file && + output=\$(ls | grep \.ct | sort -r | head -n 1) && + mv "\$output" "$output_ct" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Usage: UNAFold.pl [options] file [file] + + Options: + -V, --version + -h, --help + -n, --NA=(RNA | DNA) (defaults to RNA) + -t, --temp=<temperature> (defaults to 37) + -N, --sodium=<[Na+] in M> (defaults to 1) + -M, --magnesium=<[Mg++] in M> (defaults to 0) + -p, --polymer + -C, --Ct=<total strand concentration> + -I, --noisolate + -m, --maxbp=<maximum basepair distance> + -c, --constraints=<name of constraints file> (defaults to prefix.aux) + -P, --percent=<energy increment percent> (defaults to 5) + -W, --window=<window size> (default set by sequence length) + -X, --max=<maximum number of foldings> (defaults to 100) + --ann=(none | p-num | ss-count) (defaults to none) + --mode=(auto | bases | lines) (defaults to auto) + --label=<base numbering frequency> + --rotate=<structure rotation angle> + --run-type=(text | html) (defaults to text) + --model=(EM | PF) (defaults to EM) + --circular + Obscure options: + --allpairs + --maxloop=<maximum bulge/interior loop size> (defaults to 30) + --nodangle + --simple + --prefilter=<filter value> + + Report bugs to markhn@rpi.edu + + + + 10.1007/978-1-60327-429-6_1 + +