Repository 'gap_datatypes'
hg clone https://toolshed.g2.bx.psu.edu/repos/chrisb/gap_datatypes

Changeset 0:0e941a69a6fa (2016-03-23)
Commit message:
Uploaded
added:
datatypes/README.md
datatypes/datatypes_conf.xml
datatypes/glycan.py
b
diff -r 000000000000 -r 0e941a69a6fa datatypes/README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/README.md Wed Mar 23 14:34:50 2016 -0400
b
@@ -0,0 +1,19 @@
+
+Custom glycan data types for galaxy
+===================================
+
+New glycan data types for galaxy, included as part of the glycan tools repo instead of being included manually (as done previously)
+Ideas from http://gregvonkuster.org/galaxy-tool-shed-including-custom-datatypes-repositories/
+
+Supported data types include (copied from datatypes_conf.xml):
+
+    <sniffer type="galaxy.datatypes.glycan:kcf"/>
+    <sniffer type="galaxy.datatypes.glycan:glycoct"/>
+    <sniffer type="galaxy.datatypes.glycan:glycoct_xml"/>
+    <sniffer type="galaxy.datatypes.glycan:glydeii"/>
+    <sniffer type="galaxy.datatypes.glycan:linucs"/>
+    <sniffer type="galaxy.datatypes.glycan:iupac"/>
+    <sniffer type="galaxy.datatypes.glycan:linearcode"/>
+    <sniffer type="galaxy.datatypes.glycan:msa"/>
+    <sniffer type="galaxy.datatypes.glycan:wurcs"/>
+
b
diff -r 000000000000 -r 0e941a69a6fa datatypes/datatypes_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/datatypes_conf.xml Wed Mar 23 14:34:50 2016 -0400
b
@@ -0,0 +1,30 @@
+<?xml version="1.0"?>
+<datatypes>
+    <datatype_files>
+        <datatype_file name="glycan.py"/>
+    </datatype_files>
+    <registration>
+        <datatype extension="kcf" type="galaxy.datatypes.glycan:kcf" mimetype="text/plain" display_in_upload="true"/>
+        <datatype extension="glycoct" type="galaxy.datatypes.glycan:glycoct" mimetype="text/plain" display_in_upload="true"/>
+        <datatype extension="glycoct_xml" type="galaxy.datatypes.glycan:glycoct_xml" mimetype="text/xml" display_in_upload="true"/>
+        <datatype extension="glydeii" type="galaxy.datatypes.glycan:glydeii" mimetype="text/xml" display_in_upload="true"/>
+        <datatype extension="linucs" type="galaxy.datatypes.glycan:linucs" mimetype="text/plain" display_in_upload="true"/>
+        <datatype extension="iupac" type="galaxy.datatypes.glycan:iupac" mimetype="text/plain" display_in_upload="true"/>
+        <datatype extension="linearcode" type="galaxy.datatypes.glycan:linearcode" mimetype="text/plain" display_in_upload="true"/>
+        <datatype extension="msa" type="galaxy.datatypes.glycan:msa" mimetype="text/plain" display_in_upload="true"/>
+        <datatype extension="wurcs" type="galaxy.datatypes.glycan:wurcs" mimetype="text/plain" display_in_upload="true"/>
+
+    </registration>
+    <sniffers>
+        <sniffer type="galaxy.datatypes.glycan:kcf"/>
+        <sniffer type="galaxy.datatypes.glycan:glycoct"/>
+        <sniffer type="galaxy.datatypes.glycan:glycoct_xml"/>
+        <sniffer type="galaxy.datatypes.glycan:glydeii"/>
+        <sniffer type="galaxy.datatypes.glycan:linucs"/>
+        <sniffer type="galaxy.datatypes.glycan:iupac"/>
+        <sniffer type="galaxy.datatypes.glycan:linearcode"/>
+        <sniffer type="galaxy.datatypes.glycan:msa"/>
+        <sniffer type="galaxy.datatypes.glycan:wurcs"/>
+    </sniffers>
+</datatypes>
+
b
diff -r 000000000000 -r 0e941a69a6fa datatypes/glycan.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/glycan.py Wed Mar 23 14:34:50 2016 -0400
[
b'@@ -0,0 +1,1964 @@\n+__license__ = "MIT"\n+\n+import logging\n+from galaxy.datatypes import metadata\n+import mimetypes\n+import os\n+import shutil\n+import sys\n+import traceback\n+import tempfile\n+import zipfile\n+from cgi import escape\n+from inspect import isclass\n+import galaxy.util as util\n+from galaxy.datatypes import data\n+from galaxy.datatypes.metadata import \\\n+    MetadataElement  # import directly to maintain ease of use in Datatype class definitions\n+from galaxy.util import inflector\n+from galaxy.util.bunch import Bunch\n+from galaxy.util.odict import odict\n+from galaxy.util.sanitize_html import sanitize_html\n+\n+from galaxy.datatypes import dataproviders\n+\n+from galaxy import eggs\n+\n+eggs.require("Paste")\n+import paste\n+\n+\n+class kcf(data.Data):\n+    file_ext = \'kcf\'\n+    line_class = \'line\'\n+\n+    """Add metadata elements"""\n+    MetadataElement(name="data_lines", default=0, desc="Number of data lines", readonly=True, optional=True,\n+                    visible=False, no_value=0)\n+\n+    def write_from_stream(self, dataset, stream):\n+        """Writes data from a stream"""\n+        # write it twice for now\n+        fd, temp_name = tempfile.mkstemp()\n+        while 1:\n+            chunk = stream.read(1048576)\n+            if not chunk:\n+                break\n+            os.write(fd, chunk)\n+        os.close(fd)\n+        # rewrite the file with unix newlines\n+        fp = open(dataset.file_name, \'wt\')\n+        for line in file(temp_name, "U"):\n+            line = line.strip() + \'\\n\'\n+            fp.write(line)\n+        fp.close()\n+\n+    def set_raw_data(self, dataset, data):\n+        """Saves the data on the disc"""\n+        fd, temp_name = tempfile.mkstemp()\n+        os.write(fd, data)\n+        os.close(fd)\n+        # rewrite the file with unix newlines\n+        fp = open(dataset.file_name, \'wt\')\n+        for line in file(temp_name, "U"):\n+            line = line.strip() + \'\\n\'\n+            fp.write(line)\n+        fp.close()\n+        os.remove(temp_name)\n+\n+    def get_mime(self):\n+        """Returns the mime type of the datatype"""\n+        return \'text/plain\'\n+\n+    def set_meta(self, dataset, **kwd):\n+        """\n+        Set the number of lines of data in dataset.\n+        """\n+        dataset.metadata.data_lines = self.count_data_lines(dataset)\n+\n+    def estimate_file_lines(self, dataset):\n+        """\n+        Perform a rough estimate by extrapolating number of lines from a small read.\n+        """\n+        sample_size = 1048576\n+        dataset_fh = open(dataset.file_name)\n+        dataset_read = dataset_fh.read(sample_size)\n+        dataset_fh.close()\n+        sample_lines = dataset_read.count(\'\\n\')\n+        est_lines = int(sample_lines * (float(dataset.get_size()) / float(sample_size)))\n+        return est_lines\n+\n+    def count_data_lines(self, dataset):\n+        """\n+        Count the number of lines of data in dataset,\n+        skipping all blank lines and comments.\n+        """\n+        data_lines = 0\n+        for line in file(dataset.file_name):\n+            line = line.strip()\n+            if line and not line.startswith(\'#\'):\n+                data_lines += 1\n+        return data_lines\n+\n+    def set_peek(self, dataset, line_count=None, is_multi_byte=False, WIDTH=256, skipchars=[]):\n+        """\n+        Set the peek.  This method is used by various subclasses of Text.\n+        """\n+        if not dataset.dataset.purged:\n+            # The file must exist on disk for the get_file_peek() method\n+            dataset.peek = get_file_peek(dataset.file_name, is_multi_byte=is_multi_byte, WIDTH=WIDTH,\n+                                         skipchars=skipchars)\n+            if line_count is None:\n+                # See if line_count is stored in the metadata\n+                if dataset.metadata.data_lines:\n+                    dataset.blurb = "%s %s" % (util.commaify(str(dataset.metadata.data_lines)),\n+                                               inflector.cond_plural(dataset.metadata.data_lines, self.line_class)'..b'                                           inflector.cond_plural(est_lines, self.line_class) )\n+            else:\n+                dataset.blurb = "%s %s" % (\n+                    util.commaify(str(line_count)), inflector.cond_plural(line_count, self.line_class) )\n+        else:\n+            dataset.peek = \'file does not exist\'\n+            dataset.blurb = \'file purged from disk\'\n+\n+    def sniff(self, filename):\n+        """All WURCS Files start with WURCS= then the version number. see http://www.wurcs-wg.org/definition.php and  http://rings.t.soka.ac.jp/\n+WURCS=2.0/4,3/[x2112h+1:x|1,5|2*NCC/3=O|4*OSO/3=O/3=O][12122a+1:b|1,5][12112h+1:b|1,5|2*NCC/3=O|6*OSO/3=O/3=O][12122a+1:b|1,5]1+3,2+1|2+4,3+1|3+3,4+1"""\n+        try:\n+            f = open(filename, "r")\n+            firstline = f.readline().upper()  # note we are uppercasing here to avoid CasE SenSitIVity\n+            f.close()\n+            if "WURCS" in firstline:\n+                return True\n+            else:\n+                return False\n+        except:\n+            traceback.print_exc(file=sys.stdout)\n+            return False\n+\n+\n+    def split(cls, input_datasets, subdir_generator_function, split_params):\n+        """\n+        Split the input files by line.\n+        """\n+        if split_params is None:\n+            return\n+\n+        if len(input_datasets) > 1:\n+            raise Exception("Text file splitting does not support multiple files")\n+        input_files = [ds.file_name for ds in input_datasets]\n+\n+        lines_per_file = None\n+        chunk_size = None\n+        if split_params[\'split_mode\'] == \'number_of_parts\':\n+            lines_per_file = []\n+            # Computing the length is expensive!\n+            def _file_len(fname):\n+                i = 0\n+                f = open(fname)\n+                for i, l in enumerate(f):\n+                    pass\n+                f.close()\n+                return i + 1\n+\n+            length = _file_len(input_files[0])\n+            parts = int(split_params[\'split_size\'])\n+            if length < parts:\n+                parts = length\n+            len_each, remainder = divmod(length, parts)\n+            while length > 0:\n+                chunk = len_each\n+                if remainder > 0:\n+                    chunk += 1\n+                lines_per_file.append(chunk)\n+                remainder = - 1\n+                length -= chunk\n+        elif split_params[\'split_mode\'] == \'to_size\':\n+            chunk_size = int(split_params[\'split_size\'])\n+        else:\n+            raise Exception(\'Unsupported split mode %s\' % split_params[\'split_mode\'])\n+\n+        f = open(input_files[0], \'rt\')\n+        try:\n+            chunk_idx = 0\n+            file_done = False\n+            part_file = None\n+            while not file_done:\n+                if lines_per_file is None:\n+                    this_chunk_size = chunk_size\n+                elif chunk_idx < len(lines_per_file):\n+                    this_chunk_size = lines_per_file[chunk_idx]\n+                    chunk_idx += 1\n+                lines_remaining = this_chunk_size\n+                part_file = None\n+                while lines_remaining > 0:\n+                    a_line = f.readline()\n+                    if a_line == \'\':\n+                        file_done = True\n+                        break\n+                    if part_file is None:\n+                        part_dir = subdir_generator_function()\n+                        part_path = os.path.join(part_dir, os.path.basename(input_files[0]))\n+                        part_file = open(part_path, \'w\')\n+                    part_file.write(a_line)\n+                    lines_remaining -= 1\n+                if part_file is not None:\n+                    part_file.close()\n+        except Exception, e:\n+            log.error(\'Unable to split files: %s\' % str(e))\n+            f.close()\n+            if part_file is not None:\n+                part_file.close()\n+            raise\n+        f.close()\n+\n+    split = classmethod(split)\n+\n+\n'