changeset 1:37198dc9311c draft default tip

planemo upload for repository https://github.com/ErasmusMC-Bioinformatics/unafold_galaxy_wrapper commit 3d01452f19416679047a05e4c0a05c15d11f34ea
author yhoogstrate
date Wed, 24 Feb 2016 11:46:54 -0500
parents bf022d3751fa
children
files README.rst datatypes_conf.xml lib/galaxy/datatypes/RNAStructure.py unafold.xml
diffstat 4 files changed, 178 insertions(+), 205 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst	Wed Feb 24 11:46:54 2016 -0500
@@ -0,0 +1,78 @@
+# UNAFold Galaxy wrapper #
+
+# License #
+
+UNAFold 3.x ACADEMIC NON-COMMERCIAL USE LICENSE AGREEMENT
+
+1. UNAFold 3.x is the work of Drs. Nicholas R. Markham and Michael Zuker.  The
+   copyright in UNAFold 3.x is owned by RENSSELAER POLYTECHNIC INSTITUTE.
+
+2. This is a legal agreement between you, RECIPIENT, and RENSSELAER POLYTECHNIC
+   INSTITUTE.  By accepting, receiving, and using UNAFold 3.x, you agree to be
+   bound by all of its terms.  If you do not agree to all of the terms of this
+   Agreement, please DO NOT download the software; or, if you have already done
+   so, please delete all of the source code, documentation and compiled programs
+   associated with UNAFold 3.x.
+
+3. UNAFold 3.x is provided to RECIPIENT as source code.  RENSSELAER POLYTECHNIC
+   INSTITUTE grants to RECIPIENT a royalty free, non-exclusive, and non-
+   transferable license to compile, install, use UNAFold for internal research
+   only.  RECIPIENT acknowledges that UNAFold 3.x is a research tool that is
+   provided free of charge and only provided "as is." RENSSELAER POLYTECHNIC
+   INSTITUTE, its faculty, employees, or students, or former employees who have
+   developed UNAFold 3.x, have no obligation to assist RECIPIENT in its use,
+   correction, modification, or enhancement and are without any obligation to
+   provide any updates.  RECIPIENT may also offer UNAFold 3.x to the public over
+   a web server provided the program resides on a server at RECIPIENT's
+   institution and users are not able to download or modify the program.
+
+4. The title and copyright to UNAFold 3.x and any associated programs and
+   documentation shall remain with RENSSELAER POLYTECHNIC INSTITUTE.  RECIPIENT
+   agrees to preserve same.  RECIPIENT agrees not to make any copies of UNAFold
+   except for use in RECIPIENT's laboratory without RENSSELAER POLYTECHNIC
+   INSTITUTE's prior written permission.  Written permission can be obtained by
+   contacting Rensselaer Polytechnic Institute's Office of Technology
+   Commercialization by e-mail at burtok2@rpi.edu, or by telephone at 518-276-
+   3675.  RECIPIENT agrees to place the appropriate copyright notice on any such
+   copies.
+
+5. RECIPIENT may not modify UNAFold 3.x, except to fix minor errors, or create
+   derivative works without RENSSELAER POLYTECHNIC INSTITUTE's permission.
+   Please send all requests for modification to markhn@rpi.edu and
+   zukerm@rpi.edu.  Errors and bugs that are found should be reported to Nick
+   Markham and Michael Zuker, whether or not they are corrected.
+
+6. RECIPIENT shall not distribute UNAFold 3.x to other laboratories within
+   RECIPIENT's institution.  RECIPIENT shall not transfer UNAFold 3.x to another
+   location or person outside of RECIPIENT's institution without RENSSELAER
+   POLYTECHNIC INSTITUTE's prior written permission.  Please send all requests
+   for distribution to markhn@rpi.edu and zukerm@rpi.edu.
+
+7. If RENSSELAER POLYTECHNIC INSTITUTE grants RECIPIENT permission to distribute
+   UNAFold 3.x, under Paragraph 6, RECIPIENT shall only export UNAFold 3.x or
+   any part thereof, directly or indirectly, to any country where such export or
+   reexport is authorized in full compliance with the laws of the United States
+   of America.
+
+8. RECIPENT shall cite the following publications in any abstract, paper, or
+   presentation referencing UNAFold:
+
+a. N. Markham & M. Zuker. (2003) DINAMelt web server for nucleic acid melting
+   prediction.  Nucleic Acids Res. 33:W577-W581.
+
+9. RECIPIENT acknowledges that as UNAFold 3.x is a research tool and provided
+   free of charge, it is only provided "as is."  RENSSELAER POLYTECHNIC
+   INSTITUTE, its faculty, employees, or students, have no obligation to assist
+   RECIPIENT in its use, correction, modification, or enhancement and are
+   without any obligation to provide any updates.
+
+10. RENSSELAER POLYTECHNIC INSTITUTE MAKES NO REPRESENTATIONS AND EXTENDS NO
+    WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED. THERE ARE NO EXPRESS OR
+    IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS OF UNAFold 3.x FOR A
+    PARTICULAR PURPOSE, OR THAT THE USE OF UNAFold 3.x WILL NOT INFRINGE ANY
+    PATENT, COPYRIGHT, TRADEMARK, TRADE SECRET, OR OTHER INTELLECTUAL PROPERTY
+    RIGHTS OF ANOTHER PARTY, OR ANY OTHER EXPRESS OR IMPLIED WARRANTIES.
+    RENSSELAER POLYTECHNIC INSTITUTE WILL NOT BE LIABLE TO RECIPIENT FOR ANY
+    CLAIMS OR DAMAGES ARISING FROM YOUR RECIPIENT'S OF UNAFold 3.x, ANY CLAIM
+    FOR ANY LOSS OR INTERRUPTION OF BUSINESS, OR FOR ANY INDIRECT, SPECIAL, OR
+    CONSEQUENTIAL DAMAGES OF ANY KIND.
\ No newline at end of file
--- a/datatypes_conf.xml	Tue Jun 16 11:21:12 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,25 +0,0 @@
-<?xml version="1.0"?>
-<datatypes>
-    <datatype_files>
-        <datatype_file name="RNAStructure.py"/>
-    </datatype_files>
-
-    <registration>
-        <datatype
-            extension="ct"
-            type="galaxy.datatypes.RNAStructure:ConnectivityTable"
-            display_in_upload="True"
-            subclass="True"
-            description="ConnectivityTable format is a text-based column wise format for storing both an RNA sequence and its corresponding 2D structure." />
-        <datatype
-            extension="rnaml"
-            type="galaxy.datatypes.RNAStructure:RNAML"
-            display_in_upload="True"
-            subclass="True"
-            description="RNAML: a standard syntax for exchanging RNA information."
-            url="http://www.ncbi.nlm.nih.gov/pubmed/12088144" />
-    </registration>
-    
-    <sniffers>
-    </sniffers>
-</datatypes>
--- a/lib/galaxy/datatypes/RNAStructure.py	Tue Jun 16 11:21:12 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,132 +0,0 @@
-import logging
-log = logging.getLogger(__name__)
-
-from galaxy import util
-import galaxy
-import galaxy.model
-import galaxy.datatypes
-import galaxy.datatypes.data
-
-from galaxy.datatypes.metadata import MetadataElement
-
-from galaxy.datatypes.sequence import Sequence
-from galaxy.datatypes.tabular import Tabular
-from galaxy.datatypes.xml import GenericXml
-
-from galaxy.datatypes.data import Data
-
-
-import re
-
-class DotBracket ( Sequence ):
-    edam_format = "format_1457"
-    file_ext = "dbn"
-    
-    sequence_regexp = re.compile( "^[ACGTURYKMSWBDHVN]*" )
-    structure_regexp = re.compile( "^[\(\)\.]*" )
-    
-    def set_meta( self, dataset, **kwd ):
-        """
-        Set the number of sequences and the number of data lines
-        in dataset.
-        """
-        if self.max_optional_metadata_filesize >= 0 and dataset.get_size() > self.max_optional_metadata_filesize:
-            dataset.metadata.data_lines = None
-            dataset.metadata.sequences = None
-            dataset.metadata.seconday_structures = None
-            return
-        
-        data_lines = 0
-        sequences = 0
-        
-        for line in file( dataset.file_name ):
-            line = line.strip()
-            data_lines += 1
-            
-            if line and line.startswith( '>' ):
-                sequences += 1
-        
-        dataset.metadata.data_lines = data_lines
-        dataset.metadata.sequences = sequences
-    
-    def sniff(self, filename):
-        """
-        The format is as follows, although it remains unclear whether
-        the Dot-Bracket format may contain multiple sequences per file:
-        
-        >sequenceName1
-        CCCaaaGGG
-        (((...)))
-        >sequenceName2
-        GGGuuuCCC
-        (((...)))
-        """
-        
-        i = 0
-        pairs = False
-        
-        with open( filename ) as handle:
-            for line in handle:
-                line = line.strip()
-                
-                state = i % 3
-                
-                if state == 0:#header line
-                    if(line[0] != '>'):
-                        return False
-                elif state == 1:#sequence line
-                    if not sequence_regexp.match(line.upper()):
-                        return False
-                    else:
-                        sequence_size = len(line)
-                elif state == 2:#dot-bracket structure line
-                    if (sequence_size != len(line)) or (not structure_regexp.match(line)):
-                        return False
-                
-                i += 1
-        return True
-
-class ConnectivityTable( Tabular ):
-    edam_format = "format_3309"
-    file_ext = "ct"
-    
-    header_regexp = re.compile( "^[0-9]+" + "(?:\t|[ ]+)" + "[^ \t]+")
-    structure_regexp = re.compile( "^[0-9]+" + "(?:\t|[ ]+)" +  "[ACGTURYKMSWBDHVN]+" + "(?:\t|[ ]+)" + "[^\t]+" + "(?:\t|[ ]+)" + "[^\t]+" + "(?:\t|[ ]+)" + "[^\t]+" + "(?:\t|[ ]+)" + "[^\t]+")
-    
-    def __init__(self, **kwd):
-        Tabular.__init__( self, **kwd )
-        
-        self.columns = 6
-        self.column_names = ['base_index', 'base', 'neighbor_left', 'neighbor_right', 'partner', 'natural_numbering']
-        self.column_types = ['int', 'str', 'int', 'int', 'int', 'int']
-
-    def set_meta( self, dataset, **kwd ):
-        data_lines = 0
-        
-        for line in file( dataset.file_name ):
-            data_lines += 1
-        
-        dataset.metadata.data_lines = data_lines
-    
-    def sniff(self, filename):
-        
-        filename = filename.file_name
-        
-        i = 0
-        with open( filename ) as handle:
-            for line in handle:
-                line = line.strip()
-                
-                if(i == 0):
-                    if not self.header_regexp.match(line):
-                        return False
-                else:
-                    if not self.structure_regexp.match(line.upper()):
-                        return False
-                i += 1
-        return True
-
-
-class RNAML( GenericXml ):
-    edam_format = "format_3311"
-    file_ext = "rnaml"
--- a/unafold.xml	Tue Jun 16 11:21:12 2015 -0400
+++ b/unafold.xml	Wed Feb 24 11:46:54 2016 -0500
@@ -1,34 +1,54 @@
 <?xml version="1.0"?>
-<tool id="unafold" name="UNAFold" version="1.0.0">
+<tool id="unafold" name="UNAFold" version="1.1.0">
     <description>UNAFold RNA and DNA structure prediction</description>
-    
+
     <requirements>
         <requirement type="package" version="3.8">unafold</requirement>
     </requirements>
-    
+
     <stdio>
         <regex match="mv: cannot stat ..: No such file or directory" source="stderr" level="fatal" description="Could not find CT output file\n" />
     </stdio>
-    
+
     <version_command>UNAFold.pl --version</version_command>
-    
-    <command>
+
+    <command><![CDATA[
+        #if str($input_source.select_fasta) == "false"
+            echo ">Sequence"                      >  "input.fasta" &&
+            echo "${input_source.input_sequence}" >> "input.fasta" &&
+        #end if
+        
         UNAFold.pl
             -n $n.a
             -t $temp
-            
             #if $n.a == "DNA"
                 -N $sodium
                 -M $magnesium
             #end if
+            
+            #if str($input_source.select_fasta) == "false"
+                "input.fasta"
+            #else
+                "${input_source.input_file}"
+            #end if
+            &&
         
-        $input_file &amp;&amp;
-        output=\$(ls | grep &#92;.ct | sort -r | head -n 1) &amp;&amp;
+        output=\$(ls | grep \.ct | sort -r | head -n 1) &&
+        
         mv "\$output" "$output_ct"
-    </command>
-    
+    ]]></command>
+
     <inputs>
-        <param format="fasta" name="input_file" type="data" label="Input sequence (FASTA)"/>
+        <conditional name="input_source">
+            <param name="select_fasta" type="boolean" truevalue="true" falsevalue="false" label="Input from FASTA file" selected="false" />
+            
+            <when value="true">
+                <param format="fasta" name="input_file" type="data" label="Input sequence (FASTA)"/>
+            </when>
+            <when value="false">
+                <param name="input_sequence" type="text" label="Input sequence"/>
+            </when>
+        </conditional>
     
         <conditional name="n">
             <param name="a" type="select" label="Nucleic Acid Type">
@@ -46,55 +66,87 @@
         
         <param name="temp" type="integer" size="3" value="37" min="0" max="100" label="Temperature (&#176;C)"/>
     </inputs>
-    
+
     <outputs>
-        <data format="ct" name="output_ct" label="${tool.name} on ${input_file.hid}: ${input_file.name}"/>
+        <data format="ct" name="output_ct" label="${tool.name}"/>
     </outputs>
-    
+
     <tests>
         <test>
+            <param name="select_fasta" value="true" />
             <param name="input_file" value="test1_input.fa" ftype="fasta" />
             <param name="temp" value="37" />
             
             <output name="output_ct" file="test1_output.ct" />
         </test>
+        <test>
+            <param name="select_fasta" value="false" />
+            <param name="input_sequence" value="GGGGGaaaCCCCC" />
+            <param name="temp" value="37" />
+            
+            <output name="output_ct" file="test1_output.ct" lines_diff="2" /><!-- Sequence name (header) differs -->
+        </test>
     </tests>
 
-    <help>
-        Usage: UNAFold.pl [options] file [file]
+    <help><![CDATA[
+``Usage: UNAFold.pl [options] file [file]``
+
+``Options:``
+``-V, --version``
+
+``-h, --help``
+
+``-n, --NA=(RNA | DNA) (defaults to RNA)``
+
+``-t, --temp=<temperature> (defaults to 37)``
+
+``-N, --sodium=<[Na+] in M> (defaults to 1)``
+
+``-M, --magnesium=<[Mg++] in M> (defaults to 0)``
+
+``-p, --polymer``
+
+``-C, --Ct=<total strand concentration>``
+
+``-I, --noisolate``
+
+``-m, --maxbp=<maximum basepair distance>``
+
+``-c, --constraints=<name of constraints file> (defaults to prefix.aux)``
+
+``-P, --percent=<energy increment percent> (defaults to 5)``
+
+``-W, --window=<window size> (default set by sequence length)``
 
-        Options:
-        -V, --version
-        -h, --help
-        -n, --NA=(RNA | DNA) (defaults to RNA)
-        -t, --temp=&lt;temperature&gt; (defaults to 37)
-        -N, --sodium=&lt;[Na+] in M&gt; (defaults to 1)
-        -M, --magnesium=&lt;[Mg++] in M&gt; (defaults to 0)
-        -p, --polymer
-        -C, --Ct=&lt;total strand concentration&gt;
-        -I, --noisolate
-        -m, --maxbp=&lt;maximum basepair distance&gt;
-        -c, --constraints=&lt;name of constraints file&gt; (defaults to prefix.aux)
-        -P, --percent=&lt;energy increment percent&gt; (defaults to 5)
-        -W, --window=&lt;window size&gt; (default set by sequence length)
-        -X, --max=&lt;maximum number of foldings&gt; (defaults to 100)
-            --ann=(none | p-num | ss-count) (defaults to none)
-            --mode=(auto | bases | lines) (defaults to auto)
-            --label=&lt;base numbering frequency&gt;
-            --rotate=&lt;structure rotation angle&gt;
-            --run-type=(text | html) (defaults to text)
-            --model=(EM | PF) (defaults to EM)
-            --circular
-        Obscure options:
-            --allpairs
-            --maxloop=&lt;maximum bulge/interior loop size&gt; (defaults to 30)
-            --nodangle
-            --simple
-            --prefilter=&lt;filter value&gt;
+``-X, --max=<maximum number of foldings> (defaults to 100)``
+
+``--ann=(none | p-num | ss-count) (defaults to none)``
+
+``--mode=(auto | bases | lines) (defaults to auto)``
+
+``--label=<base numbering frequency>``
+
+``--rotate=<structure rotation angle>``
+
+``--run-type=(text | html) (defaults to text)``
+
+``--model=(EM | PF) (defaults to EM)``
 
-        Report bugs to markhn@rpi.edu
-    </help>
-    
+``--circular``
+
+``Obscure options:``
+
+``--allpairs``
+
+``--maxloop=<maximum bulge/interior loop size> (defaults to 30)``
+
+``--nodangle``
+
+``--simple``
+
+``--prefilter=<filter value>``
+    ]]></help>
+
     <citations>
         <citation type="doi">10.1007/978-1-60327-429-6_1</citation>
     </citations>