# HG changeset patch # User iuc # Date 1480624100 18000 # Node ID bb7989bd88baec2f1f537d853cbf793669881764 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ebi_tools commit 7a9c88c1c80b80aaa63e55e9d9125b6a4dd695ac diff -r 000000000000 -r bb7989bd88ba README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Thu Dec 01 15:28:20 2016 -0500 @@ -0,0 +1,14 @@ +EBI Search +========== + +EBI Search is a tool to provide text search functionality and uniform access to resources and services hosted at the European Bioinformatics Institute. + +As the possible options in EBI Search are numerous, the `macros.xml` for this wrapper with all options is automatically generated using [`ebeye_urllib3.py`](http://www.ebi.ac.uk/Tools/webservices/download_clients/python/urllib/ebeye_urllib3.py) tool from EBI and a Python script ([`generate_macros.py`](generate_macros.py)). + +For any change in the `macros.xml`, please change on [`generate_macros.py`](generate_macros.py) and regenerate the `macros.xml` with + +``` +$ conda env create -f environment.yml +$ source activate ebeye_urllib +(ebeye_urllib) $ python generate_macros.py +``` \ No newline at end of file diff -r 000000000000 -r bb7989bd88ba download_ebi_metagenomics_run_data --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/download_ebi_metagenomics_run_data Thu Dec 01 15:28:20 2016 -0500 @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +set -e + +python_script=$1 +run_id=$2 +information_type=$3 +information_to_download=$4 +chunk_type=$5 +output_file=$6 + +touch $output_file + +run_link=$(python $python_script getRunLink $run_id) +run_link="$run_link/$information_type/$information_to_download" + +if [[ $chunk_type == 'multiple_chunks' ]]; then + chunk_nb=$(curl "$run_link/chunks") + + if [[ "$chunk_nb" < 1 ]]; then + >&2 echo "-----" + >&2 echo "ERROR" + >&2 echo "-----" + >&2 echo "No data are found for this link:" + >&2 echo "$run_link/chunks" + >&2 echo "-----" + exit 6 + fi + + for i in $(seq "$chunk_nb"); do + curl "$run_link/chunks/$i" | gunzip >> $output_file + done +else + curl "$run_link" >> $output_file +fi + +if [ ! -s $output_file ]; then + >&2 echo "-----" + >&2 echo "ERROR" + >&2 echo "-----" + >&2 echo "The output file is empty probably because the following link is not working:" + >&2 echo "$run_link" + >&2 echo "-----" + exit 6 +fi diff -r 000000000000 -r bb7989bd88ba ebeye_urllib.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ebeye_urllib.py Thu Dec 01 15:28:20 2016 -0500 @@ -0,0 +1,321 @@ +#!/usr/bin/env python +# ====================================================================== +# Script derived from the EB-eye (REST) Python client available at +# http://www.ebi.ac.uk/Tools/webservices/services/eb-eye_rest +# and distributed under the Apache License +# ====================================================================== +# Load libraries +import platform +import os +import urllib +import re +from optparse import OptionParser +from gzip import GzipFile +from xmltramp2 import xmltramp +# python2 +from StringIO import StringIO +import urllib2 +# python3 +# import urllib.request as urllib2 + + +# Service base URL +baseUrl = 'http://www.ebi.ac.uk/ebisearch/ws/rest' + +# Debug level +debugLevel = 0 + + +# Debug print +def printDebugMessage(functionName, message, level): + if(level <= debugLevel): + print ('[' + functionName + '] ' + message) + + +# User-agent for request. +def getUserAgent(): + printDebugMessage('getUserAgent', 'Begin', 11) + urllib_agent = 'Python-urllib/%s' % urllib2.__version__ + clientRevision = '$Revision: 2468 $' + clientVersion = '0' + if len(clientRevision) > 11: + clientVersion = clientRevision[11:-2] + user_agent = 'EBI-Sample-Client/%s (%s; Python %s; %s) %s' % ( + clientVersion, os.path.basename(__file__), + platform.python_version(), platform.system(), + urllib_agent + ) + printDebugMessage('getUserAgent', 'user_agent: ' + user_agent, 12) + printDebugMessage('getUserAgent', 'End', 11) + return user_agent + + +# Wrapper for a REST (HTTP GET) request +def restRequest(url): + printDebugMessage('restRequest', 'Begin', 11) + printDebugMessage('restRequest', 'url: ' + url, 11) + # python 2 + url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]") + # python 3 + # url = urllib.request.quote(url, safe="%/:=&?~#+!$,;'@()*[]") + + try: + user_agent = getUserAgent() + http_headers = { + 'User-Agent': user_agent, + 'Accept-Encoding': 'gzip' + } + req = urllib2.Request(url, None, http_headers) + resp = urllib2.urlopen(req) + # python2 + encoding = resp.info().getheader('Content-Encoding') + # python3 + # encoding = resp.info().__getitem__('Content-Encoding') + result = None + if encoding is None or encoding == 'identity': + # python2 + result = resp.read() + # python3 + # result = str(resp.read(), 'utf-8') + elif encoding == 'gzip': + result = resp.read() + printDebugMessage('restRequest', 'result: ' + str(result), 21) + # python2 + gz = GzipFile( + fileobj=StringIO(result), + mode="r") + result = gz.read() + # python3 + # result = str(gzip.decompress(result), 'utf-8') + else: + raise Exception('Unsupported Content-Encoding') + resp.close() + except urllib2.HTTPError as ex: + raise ex + printDebugMessage('restRequest', 'result: ' + result, 11) + printDebugMessage('restRequest', 'End', 11) + return result + + +def hasSubdomains(domainInfo): + for dir in domainInfo._dir: + if dir._name == 'subdomains': + return True + return False + + +def extractUsefulFields(fieldInfos): + searchable = [] + retrievable = [] + + for fieldInfo in fieldInfos: + if fieldInfo('id') == "$facets": + continue + + options = fieldInfo['options']['option':] + for option in options: + if option("name") == "searchable" and str(option) == "true": + searchable.append(fieldInfo('id')) + if option("name") == "retrievable" and str(option) == "true": + retrievable.append(fieldInfo('id')) + return searchable, retrievable + + +def extractLowerLevelDomains(domainInfo, domains): + if hasSubdomains(domainInfo): + subdomains = domainInfo['subdomains']['domain':] + for subdomain in subdomains: + domains = extractLowerLevelDomains( subdomain, domains) + else: + searchable, retrievable = extractUsefulFields( + domainInfo['fieldInfos']['fieldInfo':]) + + domain_id = domainInfo('id') + domains.setdefault(domain_id, {}) + domains[domain_id]["name"] = domainInfo('name') + domains[domain_id]["searchable_fields"] = sorted(searchable) + domains[domain_id]["retrievable_fields"] = sorted(retrievable) + return domains + + +# Get domain Hierarchy +def getDomainHierarchy(): + requestUrl = baseUrl + '/allebi' + xmlDoc = restRequest(requestUrl) + doc = xmltramp.parse(xmlDoc) + allebi = doc['domains']['domain'] + lower_level_domains = extractLowerLevelDomains(allebi, {}) + printDebugMessage('getDomainHierarchy', 'End', 1) + return lower_level_domains + + +# Check if a databaseInfo matches a database name. +def is_database(dbInfo, dbName): + printDebugMessage('is_database', 'Begin', 11) + retVal = False + if str(dbInfo.name) == dbName: + retVal = True + else: + for dbAlias in dbInfo.aliasList: + if str(dbAlias) == dbName: + retVal = True + printDebugMessage('is_database', 'retVal: ' + str(retVal), 11) + printDebugMessage('is_database', 'End', 11) + return retVal + + +# Get number of results +def getNumberOfResults(domain, query): + printDebugMessage('getNumberOfResults', 'Begin', 1) + requestUrl = baseUrl + '/' + domain + '?query=' + query + '&size=0' + printDebugMessage('getNumberOfResults', requestUrl, 2) + xmlDoc = restRequest(requestUrl) + doc = xmltramp.parse(xmlDoc) + numberOfResults = int(str(doc['hitCount'])) + printDebugMessage('getNumberOfResults', 'End', 1) + return numberOfResults + + +def makeRequest(requestUrl): + xmlDoc = restRequest(requestUrl) + doc = xmltramp.parse(xmlDoc) + entries = doc['entries']['entry':] + formatted_output = printEntries(entries) + return formatted_output + + +# Get search results +def getResults(domain, query, fields): + numberOfResults = getNumberOfResults(domain, query) + maximum_size = 100 + quotient = numberOfResults / maximum_size + start = 0 + + printDebugMessage('getResults', 'Begin', 1) + request_output = "%s\tlink\n" % (fields.replace(",", "\t")) + for i in range(quotient): + start = maximum_size * i + requestUrl = baseUrl + '/' + domain + '?query=' + query + requestUrl += '&fields=' + fields + '&size=' + str(maximum_size) + requestUrl += '&start=' + str(start) + '&fieldurl=true' + request_output += makeRequest(requestUrl) + + if (numberOfResults % 100) > 0: + start = maximum_size * quotient + remainder = numberOfResults - start + requestUrl = baseUrl + '/' + domain + '?query=' + query + requestUrl += '&fields=' + fields + '&size=' + str(remainder) + requestUrl += '&start=' + str(start) + '&fieldurl=true' + request_output += makeRequest(requestUrl) + + print(request_output) + + +def printEntries(entries): + output = "" + printDebugMessage('printEntries', 'Begin', 1) + for entry in entries: + sep = "" + for field in entry['fields']['field':]: + output += "%s" % (sep) + fields = field['values']['value':] + if len(fields) > 0: + sub_sep = "" + for value in field['values']['value':]: + output += "%s%s" % (sub_sep, value) + sub_sep = "," + sep = "\t" + + if hasFieldUrls(entry): + output += "%s" % (sep) + sub_sep = "" + for fieldurl in entry['fieldURLs']['fieldURL':]: + output += "%s%s" % (sub_sep, str(fieldurl)) + sub_sep = "," + sep = "\t" + if hasViewUrls(entry): + output += "%s" % (sep) + sub_sep = "" + for viewurl in entry['viewURLs']['viewURL':]: + output += "%s%s" % (sub_sep, str(viewurl)) + sub_sep = "," + output += "\n" + printDebugMessage('printEntries', 'End', 1) + return output + + +def hasFieldUrls(entry): + for dir in entry._dir: + if dir._name == 'fieldURLs': + return True + return False + + +def hasViewUrls(entry): + for dir in entry._dir: + if dir._name == 'viewURLs': + return True + return False + + +def getRunLink(run_id): + printDebugMessage('getEntries', 'Begin', 1) + requestUrl = baseUrl + '/metagenomics_runs/entry/' + run_id + '?fieldurl=true' + printDebugMessage('getEntries', requestUrl, 2) + xmlDoc = restRequest(requestUrl) + doc = xmltramp.parse(xmlDoc) + entries = doc['entries']['entry':] + fieldURL = '' + for entry in entries: + for fieldurl in entry['fieldURLs']['fieldURL':]: + fieldURL += str(fieldurl) + printDebugMessage('getEntries', 'End', 1) + p = re.compile('http') + fieldURL = p.sub('https', fieldURL) + print fieldURL + + +if __name__ == '__main__': + # Usage message + usage = """ + %prog getDomainHierarchy + %prog getResults + %prog getRunLink + """ + + description = "Tools to query and download data from several EMBL-EBI databases" + description += "The searching tools are using the EB-eye search engine. " + description += "http://www.ebi.ac.uk/ebisearch/" + # Process command-line options + parser = OptionParser( + usage=usage, + description=description, + version='1.0') + (options, args) = parser.parse_args() + + # No arguments, print usage + if len(args) < 1: + parser.print_help() + + # Get domain hierarchy + elif args[0] == 'getDomainHierarchy': + getDomainHierarchy() + + # Get search results + elif args[0] == 'getResults': + if len(args) < 4: + print ('domain, query and fields should be given.') + else: + getResults(args[1], args[2], args[3]) + + # Get run link results + elif args[0] == 'getRunLink': + if len(args) < 2: + print ('run id should be given.') + else: + getRunLink(args[1]) + + # Unknown argument combination, display usage + else: + print ('Error: unrecognised argument combination') + parser.print_help() diff -r 000000000000 -r bb7989bd88ba ebi_search_rest_results.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ebi_search_rest_results.xml Thu Dec 01 15:28:20 2016 -0500 @@ -0,0 +1,140 @@ + + to obtain search results on resources and services hosted at the EBI + + + macros.xml + + + + + '$search_results' + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r bb7989bd88ba environment.yml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/environment.yml Thu Dec 01 15:28:20 2016 -0500 @@ -0,0 +1,19 @@ +name: ebeye_urllib +channels: !!python/tuple +- biocore +- bioconda +- defaults +dependencies: +- bioconda::urllib3=1.12=py27_0 +- bioconda::xmltramp2=3.1.1=py27_0 +- openssl=1.0.2j=0 +- pip=9.0.1=py27_0 +- python=2.7.12=1 +- readline=6.2=2 +- setuptools=27.2.0=py27_0 +- six=1.10.0=py27_0 +- sqlite=3.13.0=0 +- tk=8.5.18=0 +- wheel=0.29.0=py27_0 +- zlib=1.2.8=3 + diff -r 000000000000 -r bb7989bd88ba generate_macros.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/generate_macros.py Thu Dec 01 15:28:20 2016 -0500 @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 + +import ebeye_urllib + + +def add_option(value, name, selected=False): + to_write = '