Repository 'ebi_search_rest_results'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/ebi_search_rest_results

Changeset 0:bb7989bd88ba (2016-12-01)
Next changeset 1:a2c83f79f253 (2016-12-12)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ebi_tools commit 7a9c88c1c80b80aaa63e55e9d9125b6a4dd695ac
added:
README.md
download_ebi_metagenomics_run_data
ebeye_urllib.py
ebi_search_rest_results.xml
environment.yml
generate_macros.py
macros.xml
test-data/ERR675640_qc-stats_stats
b
diff -r 000000000000 -r bb7989bd88ba README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md Thu Dec 01 15:28:20 2016 -0500
[
@@ -0,0 +1,14 @@
+EBI Search
+==========
+
+EBI Search is a tool to provide text search functionality and uniform access to resources and services hosted at the European Bioinformatics Institute.
+
+As the possible options in EBI Search are numerous, the `macros.xml` for this wrapper with all options is automatically generated using [`ebeye_urllib3.py`](http://www.ebi.ac.uk/Tools/webservices/download_clients/python/urllib/ebeye_urllib3.py) tool from EBI and a Python script ([`generate_macros.py`](generate_macros.py)). 
+
+For any change in the `macros.xml`, please change on [`generate_macros.py`](generate_macros.py) and regenerate the `macros.xml` with
+
+```
+$ conda env create -f environment.yml
+$ source activate ebeye_urllib
+(ebeye_urllib) $ python generate_macros.py
+```
\ No newline at end of file
b
diff -r 000000000000 -r bb7989bd88ba download_ebi_metagenomics_run_data
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/download_ebi_metagenomics_run_data Thu Dec 01 15:28:20 2016 -0500
[
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+set -e
+
+python_script=$1
+run_id=$2
+information_type=$3
+information_to_download=$4
+chunk_type=$5
+output_file=$6
+
+touch $output_file
+
+run_link=$(python $python_script getRunLink $run_id)
+run_link="$run_link/$information_type/$information_to_download"
+
+if [[ $chunk_type == 'multiple_chunks' ]]; then
+    chunk_nb=$(curl "$run_link/chunks")
+
+    if [[ "$chunk_nb" < 1 ]]; then
+        >&2 echo "-----"
+        >&2 echo "ERROR"
+        >&2 echo "-----"
+        >&2 echo "No data are found for this link:"
+        >&2 echo "$run_link/chunks"
+        >&2 echo "-----"
+        exit 6
+    fi
+
+    for i in $(seq "$chunk_nb"); do
+        curl "$run_link/chunks/$i" | gunzip >> $output_file
+    done
+else
+    curl "$run_link" >> $output_file
+fi
+
+if [ ! -s $output_file ]; then
+    >&2 echo "-----"
+    >&2 echo "ERROR"
+    >&2 echo "-----"
+    >&2 echo "The output file is empty probably because the following link is not working:"
+    >&2 echo "$run_link"
+    >&2 echo "-----"
+    exit 6
+fi
b
diff -r 000000000000 -r bb7989bd88ba ebeye_urllib.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ebeye_urllib.py Thu Dec 01 15:28:20 2016 -0500
[
b'@@ -0,0 +1,321 @@\n+#!/usr/bin/env python\n+# ======================================================================\n+# Script derived from the EB-eye (REST) Python client available at\n+# http://www.ebi.ac.uk/Tools/webservices/services/eb-eye_rest\n+# and distributed under the Apache License\n+# ======================================================================\n+# Load libraries\n+import platform\n+import os\n+import urllib\n+import re\n+from optparse import OptionParser\n+from gzip import GzipFile\n+from xmltramp2 import xmltramp\n+# python2\n+from StringIO import StringIO\n+import urllib2\n+# python3\n+# import urllib.request as urllib2\n+\n+\n+# Service base URL\n+baseUrl = \'http://www.ebi.ac.uk/ebisearch/ws/rest\'\n+\n+# Debug level\n+debugLevel = 0\n+\n+\n+# Debug print\n+def printDebugMessage(functionName, message, level):\n+    if(level <= debugLevel):\n+        print (\'[\' + functionName + \'] \' + message)\n+\n+\n+# User-agent for request.\n+def getUserAgent():\n+    printDebugMessage(\'getUserAgent\', \'Begin\', 11)\n+    urllib_agent = \'Python-urllib/%s\' % urllib2.__version__\n+    clientRevision = \'$Revision: 2468 $\'\n+    clientVersion = \'0\'\n+    if len(clientRevision) > 11:\n+        clientVersion = clientRevision[11:-2]\n+    user_agent = \'EBI-Sample-Client/%s (%s; Python %s; %s) %s\' % (\n+        clientVersion, os.path.basename(__file__),\n+        platform.python_version(), platform.system(),\n+        urllib_agent\n+    )\n+    printDebugMessage(\'getUserAgent\', \'user_agent: \' + user_agent, 12)\n+    printDebugMessage(\'getUserAgent\', \'End\', 11)\n+    return user_agent\n+\n+\n+# Wrapper for a REST (HTTP GET) request\n+def restRequest(url):\n+    printDebugMessage(\'restRequest\', \'Begin\', 11)\n+    printDebugMessage(\'restRequest\', \'url: \' + url, 11)\n+    # python 2\n+    url = urllib.quote(url, safe="%/:=&?~#+!$,;\'@()*[]")\n+    # python 3\n+    # url = urllib.request.quote(url, safe="%/:=&?~#+!$,;\'@()*[]")\n+\n+    try:\n+        user_agent = getUserAgent()\n+        http_headers = {\n+            \'User-Agent\': user_agent,\n+            \'Accept-Encoding\': \'gzip\'\n+        }\n+        req = urllib2.Request(url, None, http_headers)\n+        resp = urllib2.urlopen(req)\n+        # python2\n+        encoding = resp.info().getheader(\'Content-Encoding\')\n+        # python3\n+        # encoding = resp.info().__getitem__(\'Content-Encoding\')\n+        result = None\n+        if encoding is None or encoding == \'identity\':\n+            # python2\n+            result = resp.read()\n+            # python3\n+            # result = str(resp.read(), \'utf-8\')\n+        elif encoding == \'gzip\':\n+            result = resp.read()\n+            printDebugMessage(\'restRequest\', \'result: \' + str(result), 21)\n+            # python2\n+            gz = GzipFile(\n+                fileobj=StringIO(result),\n+                mode="r")\n+            result = gz.read()\n+            # python3\n+            # result = str(gzip.decompress(result), \'utf-8\')\n+        else:\n+            raise Exception(\'Unsupported Content-Encoding\')\n+        resp.close()\n+    except urllib2.HTTPError as ex:\n+        raise ex\n+    printDebugMessage(\'restRequest\', \'result: \' + result, 11)\n+    printDebugMessage(\'restRequest\', \'End\', 11)\n+    return result\n+\n+\n+def hasSubdomains(domainInfo):\n+    for dir in domainInfo._dir:\n+        if dir._name == \'subdomains\':\n+            return True\n+    return False\n+\n+\n+def extractUsefulFields(fieldInfos):\n+    searchable = []\n+    retrievable = []\n+\n+    for fieldInfo in fieldInfos:\n+        if fieldInfo(\'id\') == "$facets":\n+            continue\n+\n+        options = fieldInfo[\'options\'][\'option\':]\n+        for option in options:\n+            if option("name") == "searchable" and str(option) == "true":\n+                searchable.append(fieldInfo(\'id\'))\n+            if option("name") == "retrievable" and str(option) == "true":\n+                retrievable.append(fieldInfo(\'id\'))\n+    return searchable, retrievable\n+\n+\n+def extractLowerLevelDomains(domainInfo, domains):\n+    if hasSubdomains(domainInfo):\n+  '..b'baseUrl + \'/\' + domain + \'?query=\' + query\n+        requestUrl += \'&fields=\' + fields + \'&size=\' + str(maximum_size)\n+        requestUrl += \'&start=\' + str(start) + \'&fieldurl=true\'\n+        request_output += makeRequest(requestUrl)\n+\n+    if (numberOfResults % 100) > 0:\n+        start = maximum_size * quotient\n+        remainder = numberOfResults - start\n+        requestUrl = baseUrl + \'/\' + domain + \'?query=\' + query\n+        requestUrl += \'&fields=\' + fields + \'&size=\' + str(remainder)\n+        requestUrl += \'&start=\' + str(start) + \'&fieldurl=true\'\n+        request_output += makeRequest(requestUrl)\n+\n+    print(request_output)\n+\n+\n+def printEntries(entries):\n+    output = ""\n+    printDebugMessage(\'printEntries\', \'Begin\', 1)\n+    for entry in entries:\n+        sep = ""\n+        for field in entry[\'fields\'][\'field\':]:\n+            output += "%s" % (sep)\n+            fields = field[\'values\'][\'value\':]\n+            if len(fields) > 0:\n+                sub_sep = ""\n+                for value in field[\'values\'][\'value\':]:\n+                    output += "%s%s" % (sub_sep, value)\n+                    sub_sep = ","\n+            sep = "\\t"\n+\n+        if hasFieldUrls(entry):\n+            output += "%s" % (sep)\n+            sub_sep = ""\n+            for fieldurl in entry[\'fieldURLs\'][\'fieldURL\':]:\n+                output += "%s%s" % (sub_sep, str(fieldurl))\n+                sub_sep = ","\n+            sep = "\\t"\n+        if hasViewUrls(entry):\n+            output += "%s" % (sep)\n+            sub_sep = ""\n+            for viewurl in entry[\'viewURLs\'][\'viewURL\':]:\n+                output += "%s%s" % (sub_sep, str(viewurl))\n+                sub_sep = ","\n+        output += "\\n"\n+    printDebugMessage(\'printEntries\', \'End\', 1)\n+    return output\n+\n+\n+def hasFieldUrls(entry):\n+    for dir in entry._dir:\n+        if dir._name == \'fieldURLs\':\n+            return True\n+    return False\n+\n+\n+def hasViewUrls(entry):\n+    for dir in entry._dir:\n+        if dir._name == \'viewURLs\':\n+            return True\n+    return False\n+\n+\n+def getRunLink(run_id):\n+    printDebugMessage(\'getEntries\', \'Begin\', 1)\n+    requestUrl = baseUrl + \'/metagenomics_runs/entry/\' + run_id + \'?fieldurl=true\'\n+    printDebugMessage(\'getEntries\', requestUrl, 2)\n+    xmlDoc = restRequest(requestUrl)\n+    doc = xmltramp.parse(xmlDoc)\n+    entries = doc[\'entries\'][\'entry\':]\n+    fieldURL = \'\'\n+    for entry in entries:\n+        for fieldurl in entry[\'fieldURLs\'][\'fieldURL\':]:\n+            fieldURL += str(fieldurl)\n+    printDebugMessage(\'getEntries\', \'End\', 1)\n+    p = re.compile(\'http\')\n+    fieldURL = p.sub(\'https\', fieldURL)\n+    print fieldURL\n+\n+\n+if __name__ == \'__main__\':\n+    # Usage message\n+    usage = """\n+      %prog getDomainHierarchy\n+      %prog getResults <domain> <query> <fields>\n+      %prog getRunLink <runId>\n+      """\n+\n+    description = "Tools to query and download data from several EMBL-EBI databases"\n+    description += "The searching tools are using the EB-eye search engine. "\n+    description += "http://www.ebi.ac.uk/ebisearch/"\n+    # Process command-line options\n+    parser = OptionParser(\n+        usage=usage,\n+        description=description,\n+        version=\'1.0\')\n+    (options, args) = parser.parse_args()\n+\n+    # No arguments, print usage\n+    if len(args) < 1:\n+        parser.print_help()\n+\n+    # Get domain hierarchy\n+    elif args[0] == \'getDomainHierarchy\':\n+        getDomainHierarchy()\n+\n+    # Get search results\n+    elif args[0] == \'getResults\':\n+        if len(args) < 4:\n+            print (\'domain, query and fields should be given.\')\n+        else:\n+            getResults(args[1], args[2], args[3])\n+\n+    # Get run link results\n+    elif args[0] == \'getRunLink\':\n+        if len(args) < 2:\n+            print (\'run id should be given.\')\n+        else:\n+            getRunLink(args[1])\n+\n+    # Unknown argument combination, display usage\n+    else:\n+        print (\'Error: unrecognised argument combination\')\n+        parser.print_help()\n'
b
diff -r 000000000000 -r bb7989bd88ba ebi_search_rest_results.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ebi_search_rest_results.xml Thu Dec 01 15:28:20 2016 -0500
[
@@ -0,0 +1,140 @@
+<tool id="ebi_search_rest_results" name="EBI Search" version="0.1.0">
+    <description>to obtain search results on resources and services hosted at the EBI</description>
+
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <expand macro="requirements" />
+
+    <command detect_errors="exit_code"><![CDATA[
+        #set $queries=''
+        #set $sep=''
+        #for query in $searched_domain.queries:
+            #if $sep == ''
+                #set $queries = $queries
+                #set $sep = str($query.combination_operation)
+            #else
+                #set $sep = str($query.combination_operation)
+                #set $queries += ' %s ' % ($sep)
+            #end if
+
+            #set $queries += '%s:' % (str($query.query_field))
+            #if str($query.comp_operation.operation) == 'equal'
+                #set $queries += '(%s)' % (str($query.comp_operation.query_text))
+            #else if str($query.comp_operation.operation) == 'not'
+                #set $queries += '(%s NOT %s)' % (str($query.comp_operation.query_text), str($query.comp_operation.not_query_text))
+            #else if str($query.comp_operation.operation) == 'range'
+                #set $queries += '[%s TO %s]' % (str($query.comp_operation.min), str($query.comp_operation.max))
+            #end if
+        #end for
+
+        python '$__tool_directory__/ebeye_urllib.py'
+            getResults
+            '$searched_domain.domain'
+            '$queries'
+            '$searched_domain.fields'
+            > '$search_results'
+    ]]></command>
+
+    <inputs>
+        <expand macro="inputs" />
+    </inputs>
+
+    <outputs>
+        <data name="search_results" format="tabular" label="${tool.name} on ${searched_domain.domain}" />
+    </outputs>
+
+    <tests>
+        <test>
+            <conditional name="searched_domain">
+                <param name="domain" value="metagenomics_runs" />
+                <param name="fields" value="id,experiment_type" />
+                <repeat name="queries">
+                    <param name="combination_operation" value="AND" />
+                    <param name="query_field" value="biome" />
+                    <conditional name="comp_operation">
+                        <param name="operation" value="equal" />
+                        <param name="query_text" value="gut" />
+                    </conditional>
+                </repeat>
+            </conditional>
+            <output name="search_results">
+                <assert_contents>
+                    <has_line_matching expression="^ERR762518.*" />
+                    <has_line_matching expression="^ERR1337677.*" />
+                    <has_line_matching expression="^ERR489293.*" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <conditional name="searched_domain">
+                <param name="domain" value="metagenomics_runs" />
+                <param name="fields" value="id,experiment_type" />
+                <repeat name="queries">
+                    <param name="combination_operation" value="AND" />
+                    <param name="query_field" value="project_name" />
+                    <conditional name="comp_operation">
+                        <param name="operation" value="not" />
+                        <param name="query_text" value="cheese" />
+                        <param name="not_query_text" value="microb" />
+                    </conditional>
+                </repeat>
+            </conditional>
+            <output name="search_results">
+                <assert_contents>
+                    <has_line_matching expression="^SRR1205272.*" />
+                    <has_line_matching expression="^ERR364233, ERR364234.*" />
+                    <has_line_matching expression="^SRR2126380.*" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <conditional name="searched_domain">
+                <param name="domain" value="metagenomics_runs" />
+                <param name="fields" value="id,experiment_type" />
+                <repeat name="queries">
+                    <param name="combination_operation" value="AND" />
+                    <param name="query_field" value="biome" />
+                    <conditional name="comp_operation">
+                        <param name="operation" value="equal" />
+                        <param name="query_text" value="marine" />
+                    </conditional>
+                </repeat>
+                <repeat name="queries">
+                    <param name="combination_operation" value="AND" />
+                    <param name="query_field" value="depth" />
+                    <conditional name="comp_operation">
+                        <param name="operation" value="range" />
+                        <param name="min" value="0" />
+                        <param name="max" value="200" />
+                    </conditional>
+                </repeat>
+            </conditional>
+            <output name="search_results">
+                <assert_contents>
+                    <has_line_matching expression="^ERR091545,ERR091546,ERR091547,ERR091548.*" />
+                    <has_line_matching expression="^ERR771081.*" />
+                    <has_line_matching expression="^ERR1278020.*" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+**What it does**
+
+The European Bioinformatics Institute (EMBL-EBI) maintains the world’s most comprehensive range of freely available and up-to-date molecular databases.
+
+EBI Search, also named as 'EB-eye', is a scalable search engine that:
+
+- provides text search functionality and uniform access to resources and services hosted at the European Bioinformatics Institute (EMBL-EBI)
+- is based on the consolidated  Apache Lucene  technology
+- exposes both a Web and  RESTful Web Services interfaces
+- provides inter-domain navigation via a network of cross-references
+
+Here, sample clients provided by EBI is used
+    ]]></help>
+
+    <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r bb7989bd88ba environment.yml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/environment.yml Thu Dec 01 15:28:20 2016 -0500
b
@@ -0,0 +1,19 @@
+name: ebeye_urllib
+channels: !!python/tuple
+- biocore
+- bioconda
+- defaults
+dependencies:
+- bioconda::urllib3=1.12=py27_0
+- bioconda::xmltramp2=3.1.1=py27_0
+- openssl=1.0.2j=0
+- pip=9.0.1=py27_0
+- python=2.7.12=1
+- readline=6.2=2
+- setuptools=27.2.0=py27_0
+- six=1.10.0=py27_0
+- sqlite=3.13.0=0
+- tk=8.5.18=0
+- wheel=0.29.0=py27_0
+- zlib=1.2.8=3
+
b
diff -r 000000000000 -r bb7989bd88ba generate_macros.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/generate_macros.py Thu Dec 01 15:28:20 2016 -0500
[
@@ -0,0 +1,141 @@
+#!/usr/bin/env python3
+
+import ebeye_urllib
+
+
+def add_option(value, name, selected=False):
+    to_write = '<option '
+    to_write += 'value="%s"' % (value)
+    if selected:
+        to_write += ' selected="true"'
+    to_write += '>%s' % (name)
+    to_write += '</option>\n'
+    return to_write
+
+
+def add_select_parameter(name, label, multiple=False):
+    to_write = '<param '
+    to_write += 'name="%s" ' % (name)
+    to_write += 'type="select" '
+    to_write += 'label="%s" ' % (label)
+    if multiple:
+        to_write += 'multiple=\"true\"'
+    to_write += '>\n'
+    return to_write
+
+
+def write_macros_file(macros_filepath, domains_fields):
+    spaces = '    '
+    to_write = '<macros>\n'
+
+    to_write += '%s<xml name="requirements">\n' % (spaces)
+    to_write += '%s<requirements>\n' % (2 * spaces)
+    to_write += '%s<requirement type="package" version="3.1.1">xmltramp2</requirement>\n' % (3 * spaces)
+    to_write += '%s<requirement type="package" version="1.12">urllib3</requirement>\n' % (3 * spaces)
+    to_write += '%s<yield/>\n' % (3 * spaces)
+    to_write += '%s</requirements>\n' % (2 * spaces)
+    to_write += '%s</xml>\n' % (spaces)
+
+    to_write += '%s<xml name="inputs">\n' % (spaces)
+
+    to_write += '%s<conditional name="searched_domain">\n' % (2 * spaces)
+    to_write += '%s%s' % (3 * spaces, add_select_parameter(
+        'domain',
+        'Domain to query'))
+
+    sorted_domains = [(d, domains_fields[d]['name']) for d in domains_fields.keys()]
+    sorted_domains = sorted(sorted_domains, key=lambda tup: tup[1])
+    for domain in sorted_domains:
+        to_write += '%s%s' % (4 * spaces, add_option(
+            domain[0],
+            domain[1]))
+
+    to_write += '%s</param>\n\n' % (3 * spaces)
+
+    for d in sorted_domains:
+        domain = d[0]
+        to_write += '%s<when value="%s">\n' % (3 * spaces, domain)
+
+        to_write += '%s%s' % (4 * spaces, add_select_parameter(
+            'fields',
+            'Fields to extract',
+            multiple=True))
+        for field in domains_fields[domain]['retrievable_fields']:
+            to_write += '%s%s' % (5 * spaces, add_option(
+                field,
+                field,
+                selected=True))
+        to_write += '%s<validator type="no_options" message="Please select at least one field" />\n' % (5 * spaces)
+        to_write += '%s</param>\n' % (4 * spaces)
+
+        to_write += '%s<repeat name="queries" title="Add a query">\n' % (
+            4 * spaces)
+
+        to_write += '%s%s' % (5 * spaces, add_select_parameter(
+            'combination_operation',
+            'Combination operation'))
+        to_write += '%s%s' % (6 * spaces, add_option('AND', 'AND'))
+        to_write += '%s%s' % (6 * spaces, add_option('OR', 'OR'))
+        to_write += '%s%s' % (6 * spaces, add_option('NOT', 'NOT'))
+        to_write += '%s</param>\n' % (5 * spaces)
+
+        to_write += '%s%s' % (5 * spaces, add_select_parameter(
+            'query_field',
+            'Fields'))
+        for field in domains_fields[domain]['searchable_fields']:
+            to_write += '%s%s' % (6 * spaces, add_option(field, field))
+        to_write += '%s</param>\n' % (5 * spaces)
+
+        to_write += '%s<conditional name="comp_operation">\n' % (5 * spaces)
+        to_write += '%s%s' % (6 * spaces, add_select_parameter(
+            'operation',
+            'Comparison operation'))
+        to_write += '%s%s' % (7 * spaces, add_option('equal', 'equal'))
+        to_write += '%s%s' % (7 * spaces, add_option('not', 'not'))
+        to_write += '%s%s' % (7 * spaces, add_option('range', 'range'))
+        to_write += '%s</param>\n' % (6 * spaces)
+
+        to_write += '%s<when value="equal">\n' % (6 * spaces)
+        to_write += '%s<param name="query_text" type="text" label="Searched term"/>\n' % (7 * spaces)
+        to_write += '%s</when>\n' % (6 * spaces)
+
+        to_write += '%s<when value="not">\n' % (6 * spaces)
+        to_write += '%s<param name="query_text" type="text" label="Searched term"/>\n' % (7 * spaces)
+        to_write += '%s<param name="not_query_text" type="text" label="Limiting term"/>\n' % (7 * spaces)
+        to_write += '%s</when>\n' % (6 * spaces)
+
+        to_write += '%s<when value="range">\n' % (6 * spaces)
+        to_write += '%s<param name="min" type="text" label="From"/>\n' % (7 * spaces)
+        to_write += '%s<param name="max" type="text" label="To"/>\n' % (
+            7 * spaces)
+        to_write += '%s</when>\n' % (6 * spaces)
+
+        to_write += '%s</conditional>\n' % (5 * spaces)
+
+        to_write += '%s</repeat>\n' % (4 * spaces)
+
+        to_write += '%s</when>\n\n' % (3 * spaces)
+
+    to_write += '%s</conditional>\n' % (2 * spaces)
+    to_write += '%s</xml>\n' % (spaces)
+
+    to_write += '%s<xml name="citations">\n' % (spaces)
+    to_write += '%s<citations>\n' % (2 * spaces)
+    to_write += '%s<citation type="doi">10.1093/bib/bbp065</citation>\n' % (3 * spaces)
+    to_write += '%s<citation type="doi">10.1093/nar/gkv316</citation>\n' % (3 * spaces)
+    to_write += '%s</citations>\n' % (2 * spaces)
+    to_write += '%s</xml>\n' % (spaces)
+
+    to_write += '</macros>\n'
+
+    with open(macros_filepath, 'w') as macros_file:
+        macros_file.write(to_write)
+
+
+def generate_macros():
+    domains_fields = ebeye_urllib.getDomainHierarchy()
+    write_macros_file('macros.xml', domains_fields)
+
+
+if __name__ == '__main__':
+    generate_macros()
b
diff -r 000000000000 -r bb7989bd88ba macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Dec 01 15:28:20 2016 -0500
b
b'@@ -0,0 +1,9383 @@\n+<macros>\n+    <xml name="requirements">\n+        <requirements>\n+            <requirement type="package" version="3.1.1">xmltramp2</requirement>\n+            <requirement type="package" version="1.12">urllib3</requirement>\n+            <yield/>\n+        </requirements>\n+    </xml>\n+    <xml name="inputs">\n+        <conditional name="searched_domain">\n+            <param name="domain" type="select" label="Domain to query" >\n+                <option value="sra-analysis">Analysis</option>\n+                <option value="arrayexpress-repository">ArrayExpress</option>\n+                <option value="genome_assembly">Assembly</option>\n+                <option value="wgs_masters">Assembly contig set</option>\n+                <option value="emblrelease_con">Assembly scaffold (Release)</option>\n+                <option value="emblnew_con">Assembly scaffold (Update)</option>\n+                <option value="atlas-genes">Baseline Expression Atlas Genes</option>\n+                <option value="biomodels">BioModels</option>\n+                <option value="biosamples">BioSamples</option>\n+                <option value="biosamples-groups">BioSamples groups</option>\n+                <option value="chebi">ChEBI</option>\n+                <option value="chembl-activity">ChEMBL Activity</option>\n+                <option value="chembl-assay">ChEMBL Assay</option>\n+                <option value="chembl-document">ChEMBL Document</option>\n+                <option value="chembl-molecule">ChEMBL Molecule</option>\n+                <option value="chembl-target">ChEMBL Target</option>\n+                <option value="chembl-target_component">ChEMBL Target Component</option>\n+                <option value="coding_release">Coding (Release)</option>\n+                <option value="coding_update">Coding (Update)</option>\n+                <option value="dgva">DGVa</option>\n+                <option value="atlas-genes-differential">Differential Expression Atlas Genes</option>\n+                <option value="efo">EFO</option>\n+                <option value="ega">EGA</option>\n+                <option value="emdb">EMDB</option>\n+                <option value="epo">EPO</option>\n+                <option value="elixir-registry">Elixir registry</option>\n+                <option value="ensembl_gene">Ensembl Gene</option>\n+                <option value="ensemblGenomes_gene">Ensembl Genomes Gene</option>\n+                <option value="ensemblGenomes_genome">Ensembl Genomes Genome</option>\n+                <option value="ensemblGenomes_seqRegion">Ensembl Genomes Sequence Region</option>\n+                <option value="ensemblGenomes_variant">Ensembl Genomes Variant</option>\n+                <option value="enzymeportal">Enzyme Portal</option>\n+                <option value="enzymeportal_enzymes">Enzyme Portal - Enzymes</option>\n+                <option value="atlas-experiments">Expression Atlas Experiments</option>\n+                <option value="gnps">GNPS</option>\n+                <option value="go">GO</option>\n+                <option value="gpcrdb">GPCRDB</option>\n+                <option value="gpmdb">GPMdb</option>\n+                <option value="hgnc">HGNC</option>\n+                <option value="human_diseases">Human diseases</option>\n+                <option value="imgt-hla">IMGT/HLA</option>\n+                <option value="ipd-kir">IPD-KIR</option>\n+                <option value="ipd-mhc">IPD-MHC</option>\n+                <option value="intact-complexes">IntAct Complexes</option>\n+                <option value="intact-experiments">IntAct Experiments</option>\n+                <option value="intact-interactions">IntAct Interactions</option>\n+                <option value="intact-interactors">IntAct Interactors</option>\n+                <option value="intenz">IntEnz</option>\n+                <option value="interpro">InterPro</option>\n+                <option value="iprmatches">Iprmatches</option>\n+                <option value="jpo">JPO</option>\n+'..b'EFSEQ_MRNA_PREDICTED">REFSEQ_MRNA_PREDICTED</option>\n+                        <option value="REFSEQ_NCRNA">REFSEQ_NCRNA</option>\n+                        <option value="REFSEQ_PEPTIDE">REFSEQ_PEPTIDE</option>\n+                        <option value="REFSEQ_PEPTIDE_PREDICTED">REFSEQ_PEPTIDE_PREDICTED</option>\n+                        <option value="SCHISTODB">SCHISTODB</option>\n+                        <option value="UNIGENE">UNIGENE</option>\n+                        <option value="UNIPARC">UNIPARC</option>\n+                        <option value="UNIPATHWAY">UNIPATHWAY</option>\n+                        <option value="UNIPROT">UNIPROT</option>\n+                        <option value="WIKIGENE">WIKIGENE</option>\n+                        <option value="WORMBASE_GENE">WORMBASE_GENE</option>\n+                        <option value="WORMBASE_GSEQNAME">WORMBASE_GSEQNAME</option>\n+                        <option value="WORMBASE_LOCUS">WORMBASE_LOCUS</option>\n+                        <option value="WORMBASE_TRANSCRIPT">WORMBASE_TRANSCRIPT</option>\n+                        <option value="WORMPEP_ID">WORMPEP_ID</option>\n+                        <option value="collection">collection</option>\n+                        <option value="database">database</option>\n+                        <option value="description">description</option>\n+                        <option value="domain">domain</option>\n+                        <option value="domain_count">domain_count</option>\n+                        <option value="domain_source">domain_source</option>\n+                        <option value="exon">exon</option>\n+                        <option value="gene_name">gene_name</option>\n+                        <option value="gene_synonym">gene_synonym</option>\n+                        <option value="genetree">genetree</option>\n+                        <option value="genomic_unit">genomic_unit</option>\n+                        <option value="haplotype">haplotype</option>\n+                        <option value="id">id</option>\n+                        <option value="id_noversion">id_noversion</option>\n+                        <option value="name">name</option>\n+                        <option value="peptide">peptide</option>\n+                        <option value="seq_region_name">seq_region_name</option>\n+                        <option value="source">source</option>\n+                        <option value="species">species</option>\n+                        <option value="system_name">system_name</option>\n+                        <option value="transcript">transcript</option>\n+                    </param>\n+                    <conditional name="comp_operation">\n+                        <param name="operation" type="select" label="Comparison operation" >\n+                            <option value="equal">equal</option>\n+                            <option value="not">not</option>\n+                            <option value="range">range</option>\n+                        </param>\n+                        <when value="equal">\n+                            <param name="query_text" type="text" label="Searched term"/>\n+                        </when>\n+                        <when value="not">\n+                            <param name="query_text" type="text" label="Searched term"/>\n+                            <param name="not_query_text" type="text" label="Limiting term"/>\n+                        </when>\n+                        <when value="range">\n+                            <param name="min" type="text" label="From"/>\n+                            <param name="max" type="text" label="To"/>\n+                        </when>\n+                    </conditional>\n+                </repeat>\n+            </when>\n+\n+        </conditional>\n+    </xml>\n+    <xml name="citations">\n+        <citations>\n+            <citation type="doi">10.1093/bib/bbp065</citation>\n+            <citation type="doi">10.1093/nar/gkv316</citation>\n+        </citations>\n+    </xml>\n+</macros>\n'
b
diff -r 000000000000 -r bb7989bd88ba test-data/ERR675640_qc-stats_stats
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ERR675640_qc-stats_stats Thu Dec 01 15:28:20 2016 -0500
b
@@ -0,0 +1,13 @@
+bp_count 85213813
+sequence_count 670470
+average_length 127.096
+standard_deviation_length 17.309
+length_min 100
+length_max 159
+average_gc_content 49.226
+standard_deviation_gc_content 9.333
+average_gc_ratio 1.117
+standard_deviation_gc_ratio 0.470
+ambig_char_count 6
+ambig_sequence_count 6
+average_ambig_chars 0.000