Repository 'chado_feature_load_go'
hg clone https://toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_go

Changeset 0:cdf222d77b32 (2018-11-05)
Next changeset 1:bc1940bf58f2 (2018-11-05)
Commit message:
planemo upload for repository https://github.com/galaxy-genome-annotation/galaxy-tools/tree/master/tools/chado commit 3c4fb887c809face4fbe4602617d6dc094b15864
added:
README.rst
chado.py
feature_load_go.xml
macros.xml
b
diff -r 000000000000 -r cdf222d77b32 README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst Mon Nov 05 08:06:39 2018 -0500
b
@@ -0,0 +1,43 @@
+Galaxy-chado
+=============
+
+Galaxy tools to interface with Tripal using python-chado
+
+Dependencies
+------------
+
+You will need to install some python modules in the Galaxy virtualenv for these
+tools to be fully functional:
+
+.. code:: bash
+
+    . /path/to/galaxy/.venv/bin/activate
+    pip install future chado
+    deactivate
+
+Environment
+-----------
+
+The following environment variables must be set:
+
++--------------------------------+-----------------------------------------------------------+
+| ENV                            | Use                                                       |
++================================+===========================================================+
+| ``$GALAXY_CHADO_DBHOST``       | Host of the Chado database                                |
++--------------------------------+-----------------------------------------------------------+
+| ``$GALAXY_CHADO_DBNAME``       | Name of the Chado database                                |
++--------------------------------+-----------------------------------------------------------+
+| ``$GALAXY_CHADO_DBUSER``       | Username to connect to the database                       |
++--------------------------------+-----------------------------------------------------------+
+| ``$GALAXY_CHADO_DBPASS``       | Password to connect to the database                       |
++--------------------------------+-----------------------------------------------------------+
+| ``$GALAXY_CHADO_DBSCHEMA``     | Database schema.                                          |
++--------------------------------+-----------------------------------------------------------+
+| ``$GALAXY_CHADO_DBPORT``       | Port of the Chado database                                |
++--------------------------------+-----------------------------------------------------------+
+
+
+License
+-------
+
+All python scripts and wrappers are licensed under MIT license.
b
diff -r 000000000000 -r cdf222d77b32 chado.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/chado.py Mon Nov 05 08:06:39 2018 -0500
[
b'@@ -0,0 +1,470 @@\n+import collections\n+import os\n+import time\n+from abc import abstractmethod\n+\n+import chado\n+\n+\n+#############################################\n+#      BEGIN IMPORT OF CACHING LIBRARY      #\n+#############################################\n+# This code is licensed under the MIT       #\n+# License and is a copy of code publicly    #\n+# available in rev.                         #\n+# e27332bc82f4e327aedaec17c9b656ae719322ed  #\n+# of https://github.com/tkem/cachetools/    #\n+#############################################\n+class DefaultMapping(collections.MutableMapping):\n+\n+    __slots__ = ()\n+\n+    @abstractmethod\n+    def __contains__(self, key):  # pragma: nocover\n+        return False\n+\n+    @abstractmethod\n+    def __getitem__(self, key):  # pragma: nocover\n+        if hasattr(self.__class__, \'__missing__\'):\n+            return self.__class__.__missing__(self, key)\n+        else:\n+            raise KeyError(key)\n+\n+    def get(self, key, default=None):\n+        if key in self:\n+            return self[key]\n+        else:\n+            return default\n+\n+    __marker = object()\n+\n+    def pop(self, key, default=__marker):\n+        if key in self:\n+            value = self[key]\n+            del self[key]\n+        elif default is self.__marker:\n+            raise KeyError(key)\n+        else:\n+            value = default\n+        return value\n+\n+    def setdefault(self, key, default=None):\n+        if key in self:\n+            value = self[key]\n+        else:\n+            self[key] = value = default\n+        return value\n+\n+\n+DefaultMapping.register(dict)\n+\n+\n+class _DefaultSize(object):\n+    def __getitem__(self, _):\n+        return 1\n+\n+    def __setitem__(self, _, value):\n+        assert value == 1\n+\n+    def pop(self, _):\n+        return 1\n+\n+\n+class Cache(DefaultMapping):\n+    """Mutable mapping to serve as a simple cache or cache base class."""\n+\n+    __size = _DefaultSize()\n+\n+    def __init__(self, maxsize, missing=None, getsizeof=None):\n+        if missing:\n+            self.__missing = missing\n+        if getsizeof:\n+            self.__getsizeof = getsizeof\n+            self.__size = dict()\n+        self.__data = dict()\n+        self.__currsize = 0\n+        self.__maxsize = maxsize\n+\n+    def __repr__(self):\n+        return \'%s(%r, maxsize=%r, currsize=%r)\' % (\n+            self.__class__.__name__,\n+            list(self.__data.items()),\n+            self.__maxsize,\n+            self.__currsize,\n+        )\n+\n+    def __getitem__(self, key):\n+        try:\n+            return self.__data[key]\n+        except KeyError:\n+            return self.__missing__(key)\n+\n+    def __setitem__(self, key, value):\n+        maxsize = self.__maxsize\n+        size = self.getsizeof(value)\n+        if size > maxsize:\n+            raise ValueError(\'value too large\')\n+        if key not in self.__data or self.__size[key] < size:\n+            while self.__currsize + size > maxsize:\n+                self.popitem()\n+        if key in self.__data:\n+            diffsize = size - self.__size[key]\n+        else:\n+            diffsize = size\n+        self.__data[key] = value\n+        self.__size[key] = size\n+        self.__currsize += diffsize\n+\n+    def __delitem__(self, key):\n+        size = self.__size.pop(key)\n+        del self.__data[key]\n+        self.__currsize -= size\n+\n+    def __contains__(self, key):\n+        return key in self.__data\n+\n+    def __missing__(self, key):\n+        value = self.__missing(key)\n+        try:\n+            self.__setitem__(key, value)\n+        except ValueError:\n+            pass  # value too large\n+        return value\n+\n+    def __iter__(self):\n+        return iter(self.__data)\n+\n+    def __len__(self):\n+        return len(self.__data)\n+\n+    @staticmethod\n+    def __getsizeof(value):\n+        return 1\n+\n+    @staticmethod\n+    def __missing(key):\n+        raise KeyError(key)\n+\n+    @property\n+    def maxsize(self):\n+        """The maximum size of the cache."""\n+        return self.__maxsize\n+\n+    '..b'""\n+        with self.__timer as time:\n+            self.expire(time)\n+            try:\n+                key = next(iter(self.__links))\n+            except StopIteration:\n+                raise KeyError(\'%s is empty\' % self.__class__.__name__)\n+            else:\n+                return (key, self.pop(key))\n+\n+    if hasattr(collections.OrderedDict, \'move_to_end\'):\n+        def __getlink(self, key):\n+            value = self.__links[key]\n+            self.__links.move_to_end(key)\n+            return value\n+    else:\n+        def __getlink(self, key):\n+            value = self.__links.pop(key)\n+            self.__links[key] = value\n+            return value\n+\n+\n+#############################################\n+#       END IMPORT OF CACHING LIBRARY       #\n+#############################################\n+\n+cache = TTLCache(\n+    100,  # Up to 100 items\n+    1 * 60  # 5 minute cache life\n+)\n+\n+\n+def _get_instance():\n+    return chado.ChadoInstance(\n+        os.environ[\'GALAXY_CHADO_DBHOST\'],\n+        os.environ[\'GALAXY_CHADO_DBNAME\'],\n+        os.environ[\'GALAXY_CHADO_DBUSER\'],\n+        os.environ[\'GALAXY_CHADO_DBPASS\'],\n+        os.environ[\'GALAXY_CHADO_DBSCHEMA\'],\n+        os.environ[\'GALAXY_CHADO_DBPORT\'],\n+        no_reflect=True\n+    )\n+\n+\n+def list_organisms(*args, **kwargs):\n+\n+    ci = _get_instance()\n+\n+    # Key for cached data\n+    cacheKey = \'orgs\'\n+    # We don\'t want to trust "if key in cache" because between asking and fetch\n+    # it might through key error.\n+    if cacheKey not in cache:\n+        # However if it ISN\'T there, we know we\'re safe to fetch + put in\n+        # there.\n+        data = _list_organisms(ci, *args, **kwargs)\n+        cache[cacheKey] = data\n+        return data\n+    try:\n+        # The cache key may or may not be in the cache at this point, it\n+        # /likely/ is. However we take no chances that it wasn\'t evicted between\n+        # when we checked above and now, so we reference the object from the\n+        # cache in preparation to return.\n+        data = cache[cacheKey]\n+        return data\n+    except KeyError:\n+        # If access fails due to eviction, we will fail over and can ensure that\n+        # data is inserted.\n+        data = _list_organisms(ci, *args, **kwargs)\n+        cache[cacheKey] = data\n+        return data\n+\n+\n+def _list_organisms(ci, *args, **kwargs):\n+    # Fetch the orgs.\n+    orgs_data = []\n+    for org in ci.organism.get_organisms():\n+        clean_name = \'%s %s\' % (org[\'genus\'], org[\'species\'])\n+        if \'infraspecific_name\' in org and org[\'infraspecific_name\']:\n+            clean_name += \' (%s)\' % (org[\'infraspecific_name\'])\n+        orgs_data.append((clean_name, str(org[\'organism_id\']), False))\n+    return orgs_data\n+\n+\n+def list_analyses(*args, **kwargs):\n+\n+    ci = _get_instance()\n+\n+    # Key for cached data\n+    cacheKey = \'analyses\'\n+    # We don\'t want to trust "if key in cache" because between asking and fetch\n+    # it might through key error.\n+    if cacheKey not in cache:\n+        # However if it ISN\'T there, we know we\'re safe to fetch + put in\n+        # there.<?xml version="1.0"?>\n+\n+        data = _list_analyses(ci, *args, **kwargs)\n+        cache[cacheKey] = data\n+        return data\n+    try:\n+        # The cache key may or may not be in the cache at this point, it\n+        # /likely/ is. However we take no chances that it wasn\'t evicted between\n+        # when we checked above and now, so we reference the object from the\n+        # cache in preparation to return.\n+        data = cache[cacheKey]\n+        return data\n+    except KeyError:\n+        # If access fails due to eviction, we will fail over and can ensure that\n+        # data is inserted.\n+        data = _list_analyses(ci, *args, **kwargs)\n+        cache[cacheKey] = data\n+        return data\n+\n+\n+def _list_analyses(ci, *args, **kwargs):\n+    ans_data = []\n+    for an in ci.analysis.get_analyses():\n+        ans_data.append((an[\'name\'], str(an[\'analysis_id\']), False))\n+    return ans_data\n'
b
diff -r 000000000000 -r cdf222d77b32 feature_load_go.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/feature_load_go.xml Mon Nov 05 08:06:39 2018 -0500
[
@@ -0,0 +1,63 @@
+<?xml version="1.0"?>
+<tool id="feature_load_go" name="Chado load GO annotation" version="@WRAPPER_VERSION@.0">
+    <description></description>
+    <macros>
+      <import>macros.xml</import>
+    </macros>
+    <code file="chado.py"/>
+    <expand macro="requirements"/>
+    <command detect_errors="aggressive"><![CDATA[
+@AUTH@
+
+chakin feature load_go
+'$input'
+'$organism_id'
+'$analysis_id'
+
+--query_type '$query_type'
+
+$match_on_name
+
+--name_column '$name_column'
+--go_column '$go_column'
+
+#if $re_name:
+  --re_name '$re_name'
+#end if
+
+$skip_missing
+
+ > $results
+    ]]></command>
+   <inputs>
+          <!-- arguments -->
+       <param name="input" label="GO annotation" argument="input" type="data" format="tabular" help="Path to the GO annotation file to load" />
+       <param argument="analysis_id" type="select" dynamic_options="list_analyses()"  label="Analysis" />
+     <param argument="organism_id" type="select" dynamic_options="list_organisms()" label="Organism" />
+
+        <!-- options -->
+        <param name="query_type" label="Query type" argument="query_type" type="text" help="The feature type (e.g. 'gene', 'mRNA', 'polypeptide', 'contig') of the query. It must be a valid Sequence Ontology term." value="polypeptide" />
+
+     <param name="match_on_name" label="Match On Name" argument="match_on_name" type="boolean" truevalue="--match_on_name" falsevalue="" help="Match features using their name instead of their uniquename" />
+
+     <param name="name_column" label="Identifier column" argument="name_column" type="integer" value="2" help="Column containing the feature identifiers." />
+
+     <param name="go_column" label="GO column" argument="go_column" type="integer" value="5" help="Column containing the GO id." />
+
+     <param name="re_name" label="Name regular expression" argument="re_name" type="text" help="Regular expression to extract the feature name from the input file (first capturing group will be used)." optional="true">
+            <expand macro="sanitized"/>
+        </param>
+
+     <param name="skip_missing" label="Skip Missing" argument="skip_missing" type="boolean" truevalue="--skip_missing" falsevalue="" help="Skip lines with unknown features or GO id instead of aborting everything." />
+
+        <expand macro="wait_for"/>
+   </inputs>
+   <outputs>
+     <data format="txt" name="results"/>
+   </outputs>
+   <help>
+Load GO annotation from a tabular file
+
+@HELP@
+   </help>
+</tool>
b
diff -r 000000000000 -r cdf222d77b32 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Mon Nov 05 08:06:39 2018 -0500
[
@@ -0,0 +1,86 @@
+<?xml version="1.0"?>
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="2.2.2">python-chado</requirement>
+            <requirement type="package" version="1.5">jq</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+
+    <xml name="stdio">
+        <stdio>
+            <regex level="fatal" match="Exception:" source="stderr" />
+            <regex level="fatal" match="error" source="stderr" />
+            <exit_code range="1:" />
+        </stdio>
+    </xml>
+
+    <token name="@WRAPPER_VERSION@">2.1.5</token>
+
+    <xml name="citation">
+        <citations>
+        </citations>
+    </xml>
+
+    <token name="@HELP_OVERVIEW@"><![CDATA[
+        **Python-chado Overview**
+
+        Python-cado provides several tools allowing to load data into a remote Chado database.
+    ]]></token>
+
+    <token name="@HELP@"><![CDATA[
+    ]]></token>
+
+    <token name="@AUTH@"><![CDATA[
+        echo "__default: local" > '.auth.yml' &&
+        echo "local:" >> '.auth.yml' &&
+        echo "    dbhost: \"\$GALAXY_CHADO_DBHOST\"" >> '.auth.yml' &&
+        echo "    dbname: \"\$GALAXY_CHADO_DBNAME\"" >> '.auth.yml' &&
+        echo "    dbpass: \"\$GALAXY_CHADO_DBPASS\"" >> '.auth.yml' &&
+        echo "    dbuser: \"\$GALAXY_CHADO_DBUSER\"" >> '.auth.yml' &&
+        echo "    dbschema: \"\$GALAXY_CHADO_DBSCHEMA\"" >> '.auth.yml' &&
+        echo "    dbport: \"\$GALAXY_CHADO_DBPORT\"" >> '.auth.yml' &&
+
+        CHAKIN_GLOBAL_CONFIG_PATH='.auth.yml'
+    ]]></token>
+
+    <xml name="sanitized">
+        <sanitizer>
+            <valid initial="string.printable">
+                <remove value="&apos;"/>
+            </valid>
+            <mapping initial="none">
+                <add source="&apos;" target="&apos;&quot;&apos;&quot;&apos;"/>
+                <add source="(" target="\("/>
+                <add source=")" target="\)"/>
+            </mapping>
+        </sanitizer>
+    </xml>
+
+    <!-- I'm not proud of it, but it is needed for workflows -->
+    <xml name="wait_for">
+        <param name="wait_for"
+               type="data"
+               format="data"
+               optional="true"
+               label="Run this only after the following dataset is ready"
+               help="Use this if you want to delay the job execution until some data is already loaded. The selected dataset will not be used for anything else."/>
+    </xml>
+
+    <xml name="feature_rel">
+        <param name="rel_subject_re"
+               argument="--rel-subject-re"
+               type="text"
+               label="Regular expression to extract the unique name of the parent feature"
+               help="this regex will be applied on the fasta definition line to generate the unique name of the parent feature">
+            <expand macro="sanitized"/>
+        </param>
+
+        <param name="rel_subject_type"
+               argument="--rel-subject-type"
+               type="text"
+               label="Sequence type of the parent"
+               help="this should be a Sequence Ontology term" />
+    </xml>
+</macros>