Next changeset 1:bc1940bf58f2 (2018-11-05) |
Commit message:
planemo upload for repository https://github.com/galaxy-genome-annotation/galaxy-tools/tree/master/tools/chado commit 3c4fb887c809face4fbe4602617d6dc094b15864 |
added:
README.rst chado.py feature_load_go.xml macros.xml |
b |
diff -r 000000000000 -r cdf222d77b32 README.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.rst Mon Nov 05 08:06:39 2018 -0500 |
b |
@@ -0,0 +1,43 @@ +Galaxy-chado +============= + +Galaxy tools to interface with Tripal using python-chado + +Dependencies +------------ + +You will need to install some python modules in the Galaxy virtualenv for these +tools to be fully functional: + +.. code:: bash + + . /path/to/galaxy/.venv/bin/activate + pip install future chado + deactivate + +Environment +----------- + +The following environment variables must be set: + ++--------------------------------+-----------------------------------------------------------+ +| ENV | Use | ++================================+===========================================================+ +| ``$GALAXY_CHADO_DBHOST`` | Host of the Chado database | ++--------------------------------+-----------------------------------------------------------+ +| ``$GALAXY_CHADO_DBNAME`` | Name of the Chado database | ++--------------------------------+-----------------------------------------------------------+ +| ``$GALAXY_CHADO_DBUSER`` | Username to connect to the database | ++--------------------------------+-----------------------------------------------------------+ +| ``$GALAXY_CHADO_DBPASS`` | Password to connect to the database | ++--------------------------------+-----------------------------------------------------------+ +| ``$GALAXY_CHADO_DBSCHEMA`` | Database schema. | ++--------------------------------+-----------------------------------------------------------+ +| ``$GALAXY_CHADO_DBPORT`` | Port of the Chado database | ++--------------------------------+-----------------------------------------------------------+ + + +License +------- + +All python scripts and wrappers are licensed under MIT license. |
b |
diff -r 000000000000 -r cdf222d77b32 chado.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chado.py Mon Nov 05 08:06:39 2018 -0500 |
[ |
b'@@ -0,0 +1,470 @@\n+import collections\n+import os\n+import time\n+from abc import abstractmethod\n+\n+import chado\n+\n+\n+#############################################\n+# BEGIN IMPORT OF CACHING LIBRARY #\n+#############################################\n+# This code is licensed under the MIT #\n+# License and is a copy of code publicly #\n+# available in rev. #\n+# e27332bc82f4e327aedaec17c9b656ae719322ed #\n+# of https://github.com/tkem/cachetools/ #\n+#############################################\n+class DefaultMapping(collections.MutableMapping):\n+\n+ __slots__ = ()\n+\n+ @abstractmethod\n+ def __contains__(self, key): # pragma: nocover\n+ return False\n+\n+ @abstractmethod\n+ def __getitem__(self, key): # pragma: nocover\n+ if hasattr(self.__class__, \'__missing__\'):\n+ return self.__class__.__missing__(self, key)\n+ else:\n+ raise KeyError(key)\n+\n+ def get(self, key, default=None):\n+ if key in self:\n+ return self[key]\n+ else:\n+ return default\n+\n+ __marker = object()\n+\n+ def pop(self, key, default=__marker):\n+ if key in self:\n+ value = self[key]\n+ del self[key]\n+ elif default is self.__marker:\n+ raise KeyError(key)\n+ else:\n+ value = default\n+ return value\n+\n+ def setdefault(self, key, default=None):\n+ if key in self:\n+ value = self[key]\n+ else:\n+ self[key] = value = default\n+ return value\n+\n+\n+DefaultMapping.register(dict)\n+\n+\n+class _DefaultSize(object):\n+ def __getitem__(self, _):\n+ return 1\n+\n+ def __setitem__(self, _, value):\n+ assert value == 1\n+\n+ def pop(self, _):\n+ return 1\n+\n+\n+class Cache(DefaultMapping):\n+ """Mutable mapping to serve as a simple cache or cache base class."""\n+\n+ __size = _DefaultSize()\n+\n+ def __init__(self, maxsize, missing=None, getsizeof=None):\n+ if missing:\n+ self.__missing = missing\n+ if getsizeof:\n+ self.__getsizeof = getsizeof\n+ self.__size = dict()\n+ self.__data = dict()\n+ self.__currsize = 0\n+ self.__maxsize = maxsize\n+\n+ def __repr__(self):\n+ return \'%s(%r, maxsize=%r, currsize=%r)\' % (\n+ self.__class__.__name__,\n+ list(self.__data.items()),\n+ self.__maxsize,\n+ self.__currsize,\n+ )\n+\n+ def __getitem__(self, key):\n+ try:\n+ return self.__data[key]\n+ except KeyError:\n+ return self.__missing__(key)\n+\n+ def __setitem__(self, key, value):\n+ maxsize = self.__maxsize\n+ size = self.getsizeof(value)\n+ if size > maxsize:\n+ raise ValueError(\'value too large\')\n+ if key not in self.__data or self.__size[key] < size:\n+ while self.__currsize + size > maxsize:\n+ self.popitem()\n+ if key in self.__data:\n+ diffsize = size - self.__size[key]\n+ else:\n+ diffsize = size\n+ self.__data[key] = value\n+ self.__size[key] = size\n+ self.__currsize += diffsize\n+\n+ def __delitem__(self, key):\n+ size = self.__size.pop(key)\n+ del self.__data[key]\n+ self.__currsize -= size\n+\n+ def __contains__(self, key):\n+ return key in self.__data\n+\n+ def __missing__(self, key):\n+ value = self.__missing(key)\n+ try:\n+ self.__setitem__(key, value)\n+ except ValueError:\n+ pass # value too large\n+ return value\n+\n+ def __iter__(self):\n+ return iter(self.__data)\n+\n+ def __len__(self):\n+ return len(self.__data)\n+\n+ @staticmethod\n+ def __getsizeof(value):\n+ return 1\n+\n+ @staticmethod\n+ def __missing(key):\n+ raise KeyError(key)\n+\n+ @property\n+ def maxsize(self):\n+ """The maximum size of the cache."""\n+ return self.__maxsize\n+\n+ '..b'""\n+ with self.__timer as time:\n+ self.expire(time)\n+ try:\n+ key = next(iter(self.__links))\n+ except StopIteration:\n+ raise KeyError(\'%s is empty\' % self.__class__.__name__)\n+ else:\n+ return (key, self.pop(key))\n+\n+ if hasattr(collections.OrderedDict, \'move_to_end\'):\n+ def __getlink(self, key):\n+ value = self.__links[key]\n+ self.__links.move_to_end(key)\n+ return value\n+ else:\n+ def __getlink(self, key):\n+ value = self.__links.pop(key)\n+ self.__links[key] = value\n+ return value\n+\n+\n+#############################################\n+# END IMPORT OF CACHING LIBRARY #\n+#############################################\n+\n+cache = TTLCache(\n+ 100, # Up to 100 items\n+ 1 * 60 # 5 minute cache life\n+)\n+\n+\n+def _get_instance():\n+ return chado.ChadoInstance(\n+ os.environ[\'GALAXY_CHADO_DBHOST\'],\n+ os.environ[\'GALAXY_CHADO_DBNAME\'],\n+ os.environ[\'GALAXY_CHADO_DBUSER\'],\n+ os.environ[\'GALAXY_CHADO_DBPASS\'],\n+ os.environ[\'GALAXY_CHADO_DBSCHEMA\'],\n+ os.environ[\'GALAXY_CHADO_DBPORT\'],\n+ no_reflect=True\n+ )\n+\n+\n+def list_organisms(*args, **kwargs):\n+\n+ ci = _get_instance()\n+\n+ # Key for cached data\n+ cacheKey = \'orgs\'\n+ # We don\'t want to trust "if key in cache" because between asking and fetch\n+ # it might through key error.\n+ if cacheKey not in cache:\n+ # However if it ISN\'T there, we know we\'re safe to fetch + put in\n+ # there.\n+ data = _list_organisms(ci, *args, **kwargs)\n+ cache[cacheKey] = data\n+ return data\n+ try:\n+ # The cache key may or may not be in the cache at this point, it\n+ # /likely/ is. However we take no chances that it wasn\'t evicted between\n+ # when we checked above and now, so we reference the object from the\n+ # cache in preparation to return.\n+ data = cache[cacheKey]\n+ return data\n+ except KeyError:\n+ # If access fails due to eviction, we will fail over and can ensure that\n+ # data is inserted.\n+ data = _list_organisms(ci, *args, **kwargs)\n+ cache[cacheKey] = data\n+ return data\n+\n+\n+def _list_organisms(ci, *args, **kwargs):\n+ # Fetch the orgs.\n+ orgs_data = []\n+ for org in ci.organism.get_organisms():\n+ clean_name = \'%s %s\' % (org[\'genus\'], org[\'species\'])\n+ if \'infraspecific_name\' in org and org[\'infraspecific_name\']:\n+ clean_name += \' (%s)\' % (org[\'infraspecific_name\'])\n+ orgs_data.append((clean_name, str(org[\'organism_id\']), False))\n+ return orgs_data\n+\n+\n+def list_analyses(*args, **kwargs):\n+\n+ ci = _get_instance()\n+\n+ # Key for cached data\n+ cacheKey = \'analyses\'\n+ # We don\'t want to trust "if key in cache" because between asking and fetch\n+ # it might through key error.\n+ if cacheKey not in cache:\n+ # However if it ISN\'T there, we know we\'re safe to fetch + put in\n+ # there.<?xml version="1.0"?>\n+\n+ data = _list_analyses(ci, *args, **kwargs)\n+ cache[cacheKey] = data\n+ return data\n+ try:\n+ # The cache key may or may not be in the cache at this point, it\n+ # /likely/ is. However we take no chances that it wasn\'t evicted between\n+ # when we checked above and now, so we reference the object from the\n+ # cache in preparation to return.\n+ data = cache[cacheKey]\n+ return data\n+ except KeyError:\n+ # If access fails due to eviction, we will fail over and can ensure that\n+ # data is inserted.\n+ data = _list_analyses(ci, *args, **kwargs)\n+ cache[cacheKey] = data\n+ return data\n+\n+\n+def _list_analyses(ci, *args, **kwargs):\n+ ans_data = []\n+ for an in ci.analysis.get_analyses():\n+ ans_data.append((an[\'name\'], str(an[\'analysis_id\']), False))\n+ return ans_data\n' |
b |
diff -r 000000000000 -r cdf222d77b32 feature_load_go.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/feature_load_go.xml Mon Nov 05 08:06:39 2018 -0500 |
[ |
@@ -0,0 +1,63 @@ +<?xml version="1.0"?> +<tool id="feature_load_go" name="Chado load GO annotation" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <code file="chado.py"/> + <expand macro="requirements"/> + <command detect_errors="aggressive"><![CDATA[ +@AUTH@ + +chakin feature load_go +'$input' +'$organism_id' +'$analysis_id' + +--query_type '$query_type' + +$match_on_name + +--name_column '$name_column' +--go_column '$go_column' + +#if $re_name: + --re_name '$re_name' +#end if + +$skip_missing + + > $results + ]]></command> + <inputs> + <!-- arguments --> + <param name="input" label="GO annotation" argument="input" type="data" format="tabular" help="Path to the GO annotation file to load" /> + <param argument="analysis_id" type="select" dynamic_options="list_analyses()" label="Analysis" /> + <param argument="organism_id" type="select" dynamic_options="list_organisms()" label="Organism" /> + + <!-- options --> + <param name="query_type" label="Query type" argument="query_type" type="text" help="The feature type (e.g. 'gene', 'mRNA', 'polypeptide', 'contig') of the query. It must be a valid Sequence Ontology term." value="polypeptide" /> + + <param name="match_on_name" label="Match On Name" argument="match_on_name" type="boolean" truevalue="--match_on_name" falsevalue="" help="Match features using their name instead of their uniquename" /> + + <param name="name_column" label="Identifier column" argument="name_column" type="integer" value="2" help="Column containing the feature identifiers." /> + + <param name="go_column" label="GO column" argument="go_column" type="integer" value="5" help="Column containing the GO id." /> + + <param name="re_name" label="Name regular expression" argument="re_name" type="text" help="Regular expression to extract the feature name from the input file (first capturing group will be used)." optional="true"> + <expand macro="sanitized"/> + </param> + + <param name="skip_missing" label="Skip Missing" argument="skip_missing" type="boolean" truevalue="--skip_missing" falsevalue="" help="Skip lines with unknown features or GO id instead of aborting everything." /> + + <expand macro="wait_for"/> + </inputs> + <outputs> + <data format="txt" name="results"/> + </outputs> + <help> +Load GO annotation from a tabular file + +@HELP@ + </help> +</tool> |
b |
diff -r 000000000000 -r cdf222d77b32 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Nov 05 08:06:39 2018 -0500 |
[ |
@@ -0,0 +1,86 @@ +<?xml version="1.0"?> +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="2.2.2">python-chado</requirement> + <requirement type="package" version="1.5">jq</requirement> + <yield/> + </requirements> + </xml> + + <xml name="stdio"> + <stdio> + <regex level="fatal" match="Exception:" source="stderr" /> + <regex level="fatal" match="error" source="stderr" /> + <exit_code range="1:" /> + </stdio> + </xml> + + <token name="@WRAPPER_VERSION@">2.1.5</token> + + <xml name="citation"> + <citations> + </citations> + </xml> + + <token name="@HELP_OVERVIEW@"><![CDATA[ + **Python-chado Overview** + + Python-cado provides several tools allowing to load data into a remote Chado database. + ]]></token> + + <token name="@HELP@"><![CDATA[ + ]]></token> + + <token name="@AUTH@"><![CDATA[ + echo "__default: local" > '.auth.yml' && + echo "local:" >> '.auth.yml' && + echo " dbhost: \"\$GALAXY_CHADO_DBHOST\"" >> '.auth.yml' && + echo " dbname: \"\$GALAXY_CHADO_DBNAME\"" >> '.auth.yml' && + echo " dbpass: \"\$GALAXY_CHADO_DBPASS\"" >> '.auth.yml' && + echo " dbuser: \"\$GALAXY_CHADO_DBUSER\"" >> '.auth.yml' && + echo " dbschema: \"\$GALAXY_CHADO_DBSCHEMA\"" >> '.auth.yml' && + echo " dbport: \"\$GALAXY_CHADO_DBPORT\"" >> '.auth.yml' && + + CHAKIN_GLOBAL_CONFIG_PATH='.auth.yml' + ]]></token> + + <xml name="sanitized"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + <mapping initial="none"> + <add source="'" target="'"'"'"/> + <add source="(" target="\("/> + <add source=")" target="\)"/> + </mapping> + </sanitizer> + </xml> + + <!-- I'm not proud of it, but it is needed for workflows --> + <xml name="wait_for"> + <param name="wait_for" + type="data" + format="data" + optional="true" + label="Run this only after the following dataset is ready" + help="Use this if you want to delay the job execution until some data is already loaded. The selected dataset will not be used for anything else."/> + </xml> + + <xml name="feature_rel"> + <param name="rel_subject_re" + argument="--rel-subject-re" + type="text" + label="Regular expression to extract the unique name of the parent feature" + help="this regex will be applied on the fasta definition line to generate the unique name of the parent feature"> + <expand macro="sanitized"/> + </param> + + <param name="rel_subject_type" + argument="--rel-subject-type" + type="text" + label="Sequence type of the parent" + help="this should be a Sequence Ontology term" /> + </xml> +</macros> |