Repository 'ebi_metagenomics_run_downloader'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/ebi_metagenomics_run_downloader

Changeset 0:e2e9fae080ad (2016-12-01)
Next changeset 1:d4e7e110dfdb (2016-12-12)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ebi_tools commit 7a9c88c1c80b80aaa63e55e9d9125b6a4dd695ac
added:
README.md
download_ebi_metagenomics_run_data
ebeye_urllib.py
ebi_metagenomics_run_downloader.xml
environment.yml
generate_macros.py
macros.xml
test-data/ERR675640_qc-stats_stats
b
diff -r 000000000000 -r e2e9fae080ad README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md Thu Dec 01 15:27:59 2016 -0500
[
@@ -0,0 +1,14 @@
+EBI Search
+==========
+
+EBI Search is a tool to provide text search functionality and uniform access to resources and services hosted at the European Bioinformatics Institute.
+
+As the possible options in EBI Search are numerous, the `macros.xml` for this wrapper with all options is automatically generated using [`ebeye_urllib3.py`](http://www.ebi.ac.uk/Tools/webservices/download_clients/python/urllib/ebeye_urllib3.py) tool from EBI and a Python script ([`generate_macros.py`](generate_macros.py)). 
+
+For any change in the `macros.xml`, please change on [`generate_macros.py`](generate_macros.py) and regenerate the `macros.xml` with
+
+```
+$ conda env create -f environment.yml
+$ source activate ebeye_urllib
+(ebeye_urllib) $ python generate_macros.py
+```
\ No newline at end of file
b
diff -r 000000000000 -r e2e9fae080ad download_ebi_metagenomics_run_data
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/download_ebi_metagenomics_run_data Thu Dec 01 15:27:59 2016 -0500
[
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+set -e
+
+python_script=$1
+run_id=$2
+information_type=$3
+information_to_download=$4
+chunk_type=$5
+output_file=$6
+
+touch $output_file
+
+run_link=$(python $python_script getRunLink $run_id)
+run_link="$run_link/$information_type/$information_to_download"
+
+if [[ $chunk_type == 'multiple_chunks' ]]; then
+    chunk_nb=$(curl "$run_link/chunks")
+
+    if [[ "$chunk_nb" < 1 ]]; then
+        >&2 echo "-----"
+        >&2 echo "ERROR"
+        >&2 echo "-----"
+        >&2 echo "No data are found for this link:"
+        >&2 echo "$run_link/chunks"
+        >&2 echo "-----"
+        exit 6
+    fi
+
+    for i in $(seq "$chunk_nb"); do
+        curl "$run_link/chunks/$i" | gunzip >> $output_file
+    done
+else
+    curl "$run_link" >> $output_file
+fi
+
+if [ ! -s $output_file ]; then
+    >&2 echo "-----"
+    >&2 echo "ERROR"
+    >&2 echo "-----"
+    >&2 echo "The output file is empty probably because the following link is not working:"
+    >&2 echo "$run_link"
+    >&2 echo "-----"
+    exit 6
+fi
b
diff -r 000000000000 -r e2e9fae080ad ebeye_urllib.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ebeye_urllib.py Thu Dec 01 15:27:59 2016 -0500
[
b'@@ -0,0 +1,321 @@\n+#!/usr/bin/env python\n+# ======================================================================\n+# Script derived from the EB-eye (REST) Python client available at\n+# http://www.ebi.ac.uk/Tools/webservices/services/eb-eye_rest\n+# and distributed under the Apache License\n+# ======================================================================\n+# Load libraries\n+import platform\n+import os\n+import urllib\n+import re\n+from optparse import OptionParser\n+from gzip import GzipFile\n+from xmltramp2 import xmltramp\n+# python2\n+from StringIO import StringIO\n+import urllib2\n+# python3\n+# import urllib.request as urllib2\n+\n+\n+# Service base URL\n+baseUrl = \'http://www.ebi.ac.uk/ebisearch/ws/rest\'\n+\n+# Debug level\n+debugLevel = 0\n+\n+\n+# Debug print\n+def printDebugMessage(functionName, message, level):\n+    if(level <= debugLevel):\n+        print (\'[\' + functionName + \'] \' + message)\n+\n+\n+# User-agent for request.\n+def getUserAgent():\n+    printDebugMessage(\'getUserAgent\', \'Begin\', 11)\n+    urllib_agent = \'Python-urllib/%s\' % urllib2.__version__\n+    clientRevision = \'$Revision: 2468 $\'\n+    clientVersion = \'0\'\n+    if len(clientRevision) > 11:\n+        clientVersion = clientRevision[11:-2]\n+    user_agent = \'EBI-Sample-Client/%s (%s; Python %s; %s) %s\' % (\n+        clientVersion, os.path.basename(__file__),\n+        platform.python_version(), platform.system(),\n+        urllib_agent\n+    )\n+    printDebugMessage(\'getUserAgent\', \'user_agent: \' + user_agent, 12)\n+    printDebugMessage(\'getUserAgent\', \'End\', 11)\n+    return user_agent\n+\n+\n+# Wrapper for a REST (HTTP GET) request\n+def restRequest(url):\n+    printDebugMessage(\'restRequest\', \'Begin\', 11)\n+    printDebugMessage(\'restRequest\', \'url: \' + url, 11)\n+    # python 2\n+    url = urllib.quote(url, safe="%/:=&?~#+!$,;\'@()*[]")\n+    # python 3\n+    # url = urllib.request.quote(url, safe="%/:=&?~#+!$,;\'@()*[]")\n+\n+    try:\n+        user_agent = getUserAgent()\n+        http_headers = {\n+            \'User-Agent\': user_agent,\n+            \'Accept-Encoding\': \'gzip\'\n+        }\n+        req = urllib2.Request(url, None, http_headers)\n+        resp = urllib2.urlopen(req)\n+        # python2\n+        encoding = resp.info().getheader(\'Content-Encoding\')\n+        # python3\n+        # encoding = resp.info().__getitem__(\'Content-Encoding\')\n+        result = None\n+        if encoding is None or encoding == \'identity\':\n+            # python2\n+            result = resp.read()\n+            # python3\n+            # result = str(resp.read(), \'utf-8\')\n+        elif encoding == \'gzip\':\n+            result = resp.read()\n+            printDebugMessage(\'restRequest\', \'result: \' + str(result), 21)\n+            # python2\n+            gz = GzipFile(\n+                fileobj=StringIO(result),\n+                mode="r")\n+            result = gz.read()\n+            # python3\n+            # result = str(gzip.decompress(result), \'utf-8\')\n+        else:\n+            raise Exception(\'Unsupported Content-Encoding\')\n+        resp.close()\n+    except urllib2.HTTPError as ex:\n+        raise ex\n+    printDebugMessage(\'restRequest\', \'result: \' + result, 11)\n+    printDebugMessage(\'restRequest\', \'End\', 11)\n+    return result\n+\n+\n+def hasSubdomains(domainInfo):\n+    for dir in domainInfo._dir:\n+        if dir._name == \'subdomains\':\n+            return True\n+    return False\n+\n+\n+def extractUsefulFields(fieldInfos):\n+    searchable = []\n+    retrievable = []\n+\n+    for fieldInfo in fieldInfos:\n+        if fieldInfo(\'id\') == "$facets":\n+            continue\n+\n+        options = fieldInfo[\'options\'][\'option\':]\n+        for option in options:\n+            if option("name") == "searchable" and str(option) == "true":\n+                searchable.append(fieldInfo(\'id\'))\n+            if option("name") == "retrievable" and str(option) == "true":\n+                retrievable.append(fieldInfo(\'id\'))\n+    return searchable, retrievable\n+\n+\n+def extractLowerLevelDomains(domainInfo, domains):\n+    if hasSubdomains(domainInfo):\n+  '..b'baseUrl + \'/\' + domain + \'?query=\' + query\n+        requestUrl += \'&fields=\' + fields + \'&size=\' + str(maximum_size)\n+        requestUrl += \'&start=\' + str(start) + \'&fieldurl=true\'\n+        request_output += makeRequest(requestUrl)\n+\n+    if (numberOfResults % 100) > 0:\n+        start = maximum_size * quotient\n+        remainder = numberOfResults - start\n+        requestUrl = baseUrl + \'/\' + domain + \'?query=\' + query\n+        requestUrl += \'&fields=\' + fields + \'&size=\' + str(remainder)\n+        requestUrl += \'&start=\' + str(start) + \'&fieldurl=true\'\n+        request_output += makeRequest(requestUrl)\n+\n+    print(request_output)\n+\n+\n+def printEntries(entries):\n+    output = ""\n+    printDebugMessage(\'printEntries\', \'Begin\', 1)\n+    for entry in entries:\n+        sep = ""\n+        for field in entry[\'fields\'][\'field\':]:\n+            output += "%s" % (sep)\n+            fields = field[\'values\'][\'value\':]\n+            if len(fields) > 0:\n+                sub_sep = ""\n+                for value in field[\'values\'][\'value\':]:\n+                    output += "%s%s" % (sub_sep, value)\n+                    sub_sep = ","\n+            sep = "\\t"\n+\n+        if hasFieldUrls(entry):\n+            output += "%s" % (sep)\n+            sub_sep = ""\n+            for fieldurl in entry[\'fieldURLs\'][\'fieldURL\':]:\n+                output += "%s%s" % (sub_sep, str(fieldurl))\n+                sub_sep = ","\n+            sep = "\\t"\n+        if hasViewUrls(entry):\n+            output += "%s" % (sep)\n+            sub_sep = ""\n+            for viewurl in entry[\'viewURLs\'][\'viewURL\':]:\n+                output += "%s%s" % (sub_sep, str(viewurl))\n+                sub_sep = ","\n+        output += "\\n"\n+    printDebugMessage(\'printEntries\', \'End\', 1)\n+    return output\n+\n+\n+def hasFieldUrls(entry):\n+    for dir in entry._dir:\n+        if dir._name == \'fieldURLs\':\n+            return True\n+    return False\n+\n+\n+def hasViewUrls(entry):\n+    for dir in entry._dir:\n+        if dir._name == \'viewURLs\':\n+            return True\n+    return False\n+\n+\n+def getRunLink(run_id):\n+    printDebugMessage(\'getEntries\', \'Begin\', 1)\n+    requestUrl = baseUrl + \'/metagenomics_runs/entry/\' + run_id + \'?fieldurl=true\'\n+    printDebugMessage(\'getEntries\', requestUrl, 2)\n+    xmlDoc = restRequest(requestUrl)\n+    doc = xmltramp.parse(xmlDoc)\n+    entries = doc[\'entries\'][\'entry\':]\n+    fieldURL = \'\'\n+    for entry in entries:\n+        for fieldurl in entry[\'fieldURLs\'][\'fieldURL\':]:\n+            fieldURL += str(fieldurl)\n+    printDebugMessage(\'getEntries\', \'End\', 1)\n+    p = re.compile(\'http\')\n+    fieldURL = p.sub(\'https\', fieldURL)\n+    print fieldURL\n+\n+\n+if __name__ == \'__main__\':\n+    # Usage message\n+    usage = """\n+      %prog getDomainHierarchy\n+      %prog getResults <domain> <query> <fields>\n+      %prog getRunLink <runId>\n+      """\n+\n+    description = "Tools to query and download data from several EMBL-EBI databases"\n+    description += "The searching tools are using the EB-eye search engine. "\n+    description += "http://www.ebi.ac.uk/ebisearch/"\n+    # Process command-line options\n+    parser = OptionParser(\n+        usage=usage,\n+        description=description,\n+        version=\'1.0\')\n+    (options, args) = parser.parse_args()\n+\n+    # No arguments, print usage\n+    if len(args) < 1:\n+        parser.print_help()\n+\n+    # Get domain hierarchy\n+    elif args[0] == \'getDomainHierarchy\':\n+        getDomainHierarchy()\n+\n+    # Get search results\n+    elif args[0] == \'getResults\':\n+        if len(args) < 4:\n+            print (\'domain, query and fields should be given.\')\n+        else:\n+            getResults(args[1], args[2], args[3])\n+\n+    # Get run link results\n+    elif args[0] == \'getRunLink\':\n+        if len(args) < 2:\n+            print (\'run id should be given.\')\n+        else:\n+            getRunLink(args[1])\n+\n+    # Unknown argument combination, display usage\n+    else:\n+        print (\'Error: unrecognised argument combination\')\n+        parser.print_help()\n'
b
diff -r 000000000000 -r e2e9fae080ad ebi_metagenomics_run_downloader.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ebi_metagenomics_run_downloader.xml Thu Dec 01 15:27:59 2016 -0500
[
@@ -0,0 +1,139 @@
+<tool id="ebi_metagenomics_run_downloader" name="Download run data" version="0.1.0">
+    <description>from EBI Metagenomics database</description>
+
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <expand macro="requirements">
+        <requirement type="package" version="7.49.0">curl</requirement>
+    </expand>
+
+    <command detect_errors="exit_code"><![CDATA[
+        '$__tool_directory__/download_ebi_metagenomics_run_data'
+            '$__tool_directory__/ebeye_urllib.py'
+            '$run_id'
+            '$information.type'
+            '$information.to_download'
+
+        #if ( $information.type == 'sequences' and $information.to_download != 'ncRNA-tRNA-FASTA') or ($information.type == 'function' and $information.to_download == 'InterProScan')
+            'multiple_chunks'
+        #else
+            'single_chunk'
+        #end if
+
+            'output.dat'
+    ]]></command>
+
+    <inputs>
+        <param name="run_id" type="text" label="Identifiant of the run in EBI Metagenomics" />
+
+        <conditional name="information">
+            <param name="type" type="select" label="Type of information to download">
+                <option value="sequences" selected="true">Sequence data</option>
+                <option value="qc-stats">Quality control statistics</option>
+                <option value="taxonomy">Taxonomic analysis</option>
+                <option value="function">Functional analysis</option>
+            </param>
+
+            <when value="sequences">
+                <param name="to_download" type="select" label="Sequences to download">
+                    <option value="ProcessedReads" selected="true">Processed nucleotide reads</option>
+                    <option value="ReadsWithPredictedCDS">Processed reads with pCDS</option>
+                    <option value="ReadsWithMatches">Processed reads with annotation</option>
+                    <option value="ReadsWithoutMatches">Processed reads without annotation</option>
+                    <option value="PredictedCDSWithAnnotation">Predicted CDS with annotation</option>
+                    <option value="PredictedCDSWithoutAnnotation">Predicted CDS without annotation</option>
+                    <option value="PredictedORFWithoutAnnotation">Predicted ORF without annotation</option>
+                    <option value="ncRNA-tRNA-FASTA">Predicted tRNAs</option>
+                </param>
+            </when>
+
+            <when value="qc-stats">
+                <param name="to_download" type="select" label="Quality control statistics to download">
+                    <option value="summary" selected="true">Number of sequence reads per Quality Control step</option>
+                    <option value="stats">Quality control statistics</option>
+                    <option value="length">Read length distribution</option>
+                    <option value="gc_bin">Read GC distribution</option>
+                    <option value="base">Nucleotide position distribution</option>
+                </param>
+            </when>
+
+            <when value="taxonomy">
+                <param name="to_download" type="select" label="Taxonomic analysis to download">
+                    <option value="5S-rRNA-FASTA" selected="true">Reads encoding 5S rRNA</option>
+                    <option value="16S-rRNA-FASTA">Reads encoding 16S rRNA</option>
+                    <option value="23S-rRNA-FASTA">Reads encoding 23S rRNA</option>
+                    <option value="OTU-TSV">OTUs, reads and taxonomic assignments (TSV)</option>
+                    <option value="NewickPrunedTree">Phylogenetic tree</option>
+                </param>
+            </when>
+
+            <when value="function">
+                <param name="to_download" type="select" label="Functional analysis to download">
+                    <option value="InterProScan" selected="true">InterPro matches</option>
+                    <option value="GOAnnotations">Complete GO annotation</option>
+                    <option value="GOSlimAnnotations">GO slim annotation</option>
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+
+    <outputs>
+        <data name="output_file_fasta" format="fasta" label="${tool.name} for ${run_id}" from_work_dir="output.dat">
+            <filter>( information['type'] == 'sequences' or
+                ( information['type'] == 'taxonomy' and ( information['to_download'] == '5S-rRNA-FASTA' or
+                information['to_download'] == '16S-rRNA-FASTA' or information['to_download'] == '23S-rRNA-FASTA' )))
+            </filter>
+        </data>
+        <data name="output_file_tsv" format="tsv" label="${tool.name} for ${run_id}" from_work_dir="output.dat">
+            <filter>( information['type'] == 'qc-stats' or
+                ( information['type'] == 'taxonomy' and information['to_download'] == 'OTU-TSV' ) or
+                ( information['type'] == 'function' and information['to_download'] == 'InterProScan/chunks/1' ))
+            </filter>
+        </data>
+        <data name="output_file_csv" format="csv" label="${tool.name} for ${run_id}" from_work_dir="output.dat">
+            <filter>( information['type'] == 'function' and ( information['to_download'] == 'GOAnnotations' or information['to_download'] == 'GOSlimAnnotations' ))
+            </filter>
+        </data>
+        <data name="output_file_newick" format="newick" label="${tool.name} for ${run_id}" from_work_dir="output.dat">
+            <filter>( information['type'] == 'taxonomy' and information['to_download'] == 'NewickPrunedTree')
+            </filter>
+        </data>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="run_id" value="ERR675640" />
+            <conditional name="information">
+                <param name="type" value="qc-stats" />
+                <param name="to_download" value="stats" />
+            </conditional>
+            <output name="output_file_tsv" file="ERR675640_qc-stats_stats" />
+        </test>
+        <test>
+            <param name="run_id" value="ERR675573" />
+            <conditional name="information">
+                <param name="type" value="sequences" />
+                <param name="to_download" value="PredictedCDSWithAnnotation" />
+            </conditional>
+            <output name="output_file_fasta">
+                <assert_contents>
+                    <has_line_matching expression="^>ERR675573.1772-FC81EB0ABXX:7:1101:19215:2564-ATCACGAT-1_1_117_- IPR010690/PF06898/2-34$" />
+                    <has_line_matching expression="^>ERR675573.6692248-FC81EB0ABXX:7:1202:20007:191300-ATCACGAT-1_1_127_- IPR004089/PF00015/2-41|G3DSA:1.10.287.950/2-41$" />
+                    <has_line_matching expression="^>ERR675573.21754698-FC81EB0ABXX:7:2208:13328:189792-ATCACGAT-1_1_150_- IPR027414/G3DSA:2.70.98.50/7-45$" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+**What it does**
+
+The European Bioinformatics Institute (EMBL-EBI) maintains the world’s most comprehensive range of freely available and up-to-date molecular databases
+
+This tool download data related to a run in EBI Metagenomics database.
+    ]]></help>
+
+    <expand macro="citations" />
+</tool>
\ No newline at end of file
b
diff -r 000000000000 -r e2e9fae080ad environment.yml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/environment.yml Thu Dec 01 15:27:59 2016 -0500
b
@@ -0,0 +1,19 @@
+name: ebeye_urllib
+channels: !!python/tuple
+- biocore
+- bioconda
+- defaults
+dependencies:
+- bioconda::urllib3=1.12=py27_0
+- bioconda::xmltramp2=3.1.1=py27_0
+- openssl=1.0.2j=0
+- pip=9.0.1=py27_0
+- python=2.7.12=1
+- readline=6.2=2
+- setuptools=27.2.0=py27_0
+- six=1.10.0=py27_0
+- sqlite=3.13.0=0
+- tk=8.5.18=0
+- wheel=0.29.0=py27_0
+- zlib=1.2.8=3
+
b
diff -r 000000000000 -r e2e9fae080ad generate_macros.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/generate_macros.py Thu Dec 01 15:27:59 2016 -0500
[
@@ -0,0 +1,141 @@
+#!/usr/bin/env python3
+
+import ebeye_urllib
+
+
+def add_option(value, name, selected=False):
+    to_write = '<option '
+    to_write += 'value="%s"' % (value)
+    if selected:
+        to_write += ' selected="true"'
+    to_write += '>%s' % (name)
+    to_write += '</option>\n'
+    return to_write
+
+
+def add_select_parameter(name, label, multiple=False):
+    to_write = '<param '
+    to_write += 'name="%s" ' % (name)
+    to_write += 'type="select" '
+    to_write += 'label="%s" ' % (label)
+    if multiple:
+        to_write += 'multiple=\"true\"'
+    to_write += '>\n'
+    return to_write
+
+
+def write_macros_file(macros_filepath, domains_fields):
+    spaces = '    '
+    to_write = '<macros>\n'
+
+    to_write += '%s<xml name="requirements">\n' % (spaces)
+    to_write += '%s<requirements>\n' % (2 * spaces)
+    to_write += '%s<requirement type="package" version="3.1.1">xmltramp2</requirement>\n' % (3 * spaces)
+    to_write += '%s<requirement type="package" version="1.12">urllib3</requirement>\n' % (3 * spaces)
+    to_write += '%s<yield/>\n' % (3 * spaces)
+    to_write += '%s</requirements>\n' % (2 * spaces)
+    to_write += '%s</xml>\n' % (spaces)
+
+    to_write += '%s<xml name="inputs">\n' % (spaces)
+
+    to_write += '%s<conditional name="searched_domain">\n' % (2 * spaces)
+    to_write += '%s%s' % (3 * spaces, add_select_parameter(
+        'domain',
+        'Domain to query'))
+
+    sorted_domains = [(d, domains_fields[d]['name']) for d in domains_fields.keys()]
+    sorted_domains = sorted(sorted_domains, key=lambda tup: tup[1])
+    for domain in sorted_domains:
+        to_write += '%s%s' % (4 * spaces, add_option(
+            domain[0],
+            domain[1]))
+
+    to_write += '%s</param>\n\n' % (3 * spaces)
+
+    for d in sorted_domains:
+        domain = d[0]
+        to_write += '%s<when value="%s">\n' % (3 * spaces, domain)
+
+        to_write += '%s%s' % (4 * spaces, add_select_parameter(
+            'fields',
+            'Fields to extract',
+            multiple=True))
+        for field in domains_fields[domain]['retrievable_fields']:
+            to_write += '%s%s' % (5 * spaces, add_option(
+                field,
+                field,
+                selected=True))
+        to_write += '%s<validator type="no_options" message="Please select at least one field" />\n' % (5 * spaces)
+        to_write += '%s</param>\n' % (4 * spaces)
+
+        to_write += '%s<repeat name="queries" title="Add a query">\n' % (
+            4 * spaces)
+
+        to_write += '%s%s' % (5 * spaces, add_select_parameter(
+            'combination_operation',
+            'Combination operation'))
+        to_write += '%s%s' % (6 * spaces, add_option('AND', 'AND'))
+        to_write += '%s%s' % (6 * spaces, add_option('OR', 'OR'))
+        to_write += '%s%s' % (6 * spaces, add_option('NOT', 'NOT'))
+        to_write += '%s</param>\n' % (5 * spaces)
+
+        to_write += '%s%s' % (5 * spaces, add_select_parameter(
+            'query_field',
+            'Fields'))
+        for field in domains_fields[domain]['searchable_fields']:
+            to_write += '%s%s' % (6 * spaces, add_option(field, field))
+        to_write += '%s</param>\n' % (5 * spaces)
+
+        to_write += '%s<conditional name="comp_operation">\n' % (5 * spaces)
+        to_write += '%s%s' % (6 * spaces, add_select_parameter(
+            'operation',
+            'Comparison operation'))
+        to_write += '%s%s' % (7 * spaces, add_option('equal', 'equal'))
+        to_write += '%s%s' % (7 * spaces, add_option('not', 'not'))
+        to_write += '%s%s' % (7 * spaces, add_option('range', 'range'))
+        to_write += '%s</param>\n' % (6 * spaces)
+
+        to_write += '%s<when value="equal">\n' % (6 * spaces)
+        to_write += '%s<param name="query_text" type="text" label="Searched term"/>\n' % (7 * spaces)
+        to_write += '%s</when>\n' % (6 * spaces)
+
+        to_write += '%s<when value="not">\n' % (6 * spaces)
+        to_write += '%s<param name="query_text" type="text" label="Searched term"/>\n' % (7 * spaces)
+        to_write += '%s<param name="not_query_text" type="text" label="Limiting term"/>\n' % (7 * spaces)
+        to_write += '%s</when>\n' % (6 * spaces)
+
+        to_write += '%s<when value="range">\n' % (6 * spaces)
+        to_write += '%s<param name="min" type="text" label="From"/>\n' % (7 * spaces)
+        to_write += '%s<param name="max" type="text" label="To"/>\n' % (
+            7 * spaces)
+        to_write += '%s</when>\n' % (6 * spaces)
+
+        to_write += '%s</conditional>\n' % (5 * spaces)
+
+        to_write += '%s</repeat>\n' % (4 * spaces)
+
+        to_write += '%s</when>\n\n' % (3 * spaces)
+
+    to_write += '%s</conditional>\n' % (2 * spaces)
+    to_write += '%s</xml>\n' % (spaces)
+
+    to_write += '%s<xml name="citations">\n' % (spaces)
+    to_write += '%s<citations>\n' % (2 * spaces)
+    to_write += '%s<citation type="doi">10.1093/bib/bbp065</citation>\n' % (3 * spaces)
+    to_write += '%s<citation type="doi">10.1093/nar/gkv316</citation>\n' % (3 * spaces)
+    to_write += '%s</citations>\n' % (2 * spaces)
+    to_write += '%s</xml>\n' % (spaces)
+
+    to_write += '</macros>\n'
+
+    with open(macros_filepath, 'w') as macros_file:
+        macros_file.write(to_write)
+
+
+def generate_macros():
+    domains_fields = ebeye_urllib.getDomainHierarchy()
+    write_macros_file('macros.xml', domains_fields)
+
+
+if __name__ == '__main__':
+    generate_macros()
b
diff -r 000000000000 -r e2e9fae080ad macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Dec 01 15:27:59 2016 -0500
b
b'@@ -0,0 +1,9383 @@\n+<macros>\n+    <xml name="requirements">\n+        <requirements>\n+            <requirement type="package" version="3.1.1">xmltramp2</requirement>\n+            <requirement type="package" version="1.12">urllib3</requirement>\n+            <yield/>\n+        </requirements>\n+    </xml>\n+    <xml name="inputs">\n+        <conditional name="searched_domain">\n+            <param name="domain" type="select" label="Domain to query" >\n+                <option value="sra-analysis">Analysis</option>\n+                <option value="arrayexpress-repository">ArrayExpress</option>\n+                <option value="genome_assembly">Assembly</option>\n+                <option value="wgs_masters">Assembly contig set</option>\n+                <option value="emblrelease_con">Assembly scaffold (Release)</option>\n+                <option value="emblnew_con">Assembly scaffold (Update)</option>\n+                <option value="atlas-genes">Baseline Expression Atlas Genes</option>\n+                <option value="biomodels">BioModels</option>\n+                <option value="biosamples">BioSamples</option>\n+                <option value="biosamples-groups">BioSamples groups</option>\n+                <option value="chebi">ChEBI</option>\n+                <option value="chembl-activity">ChEMBL Activity</option>\n+                <option value="chembl-assay">ChEMBL Assay</option>\n+                <option value="chembl-document">ChEMBL Document</option>\n+                <option value="chembl-molecule">ChEMBL Molecule</option>\n+                <option value="chembl-target">ChEMBL Target</option>\n+                <option value="chembl-target_component">ChEMBL Target Component</option>\n+                <option value="coding_release">Coding (Release)</option>\n+                <option value="coding_update">Coding (Update)</option>\n+                <option value="dgva">DGVa</option>\n+                <option value="atlas-genes-differential">Differential Expression Atlas Genes</option>\n+                <option value="efo">EFO</option>\n+                <option value="ega">EGA</option>\n+                <option value="emdb">EMDB</option>\n+                <option value="epo">EPO</option>\n+                <option value="elixir-registry">Elixir registry</option>\n+                <option value="ensembl_gene">Ensembl Gene</option>\n+                <option value="ensemblGenomes_gene">Ensembl Genomes Gene</option>\n+                <option value="ensemblGenomes_genome">Ensembl Genomes Genome</option>\n+                <option value="ensemblGenomes_seqRegion">Ensembl Genomes Sequence Region</option>\n+                <option value="ensemblGenomes_variant">Ensembl Genomes Variant</option>\n+                <option value="enzymeportal">Enzyme Portal</option>\n+                <option value="enzymeportal_enzymes">Enzyme Portal - Enzymes</option>\n+                <option value="atlas-experiments">Expression Atlas Experiments</option>\n+                <option value="gnps">GNPS</option>\n+                <option value="go">GO</option>\n+                <option value="gpcrdb">GPCRDB</option>\n+                <option value="gpmdb">GPMdb</option>\n+                <option value="hgnc">HGNC</option>\n+                <option value="human_diseases">Human diseases</option>\n+                <option value="imgt-hla">IMGT/HLA</option>\n+                <option value="ipd-kir">IPD-KIR</option>\n+                <option value="ipd-mhc">IPD-MHC</option>\n+                <option value="intact-complexes">IntAct Complexes</option>\n+                <option value="intact-experiments">IntAct Experiments</option>\n+                <option value="intact-interactions">IntAct Interactions</option>\n+                <option value="intact-interactors">IntAct Interactors</option>\n+                <option value="intenz">IntEnz</option>\n+                <option value="interpro">InterPro</option>\n+                <option value="iprmatches">Iprmatches</option>\n+                <option value="jpo">JPO</option>\n+'..b'EFSEQ_MRNA_PREDICTED">REFSEQ_MRNA_PREDICTED</option>\n+                        <option value="REFSEQ_NCRNA">REFSEQ_NCRNA</option>\n+                        <option value="REFSEQ_PEPTIDE">REFSEQ_PEPTIDE</option>\n+                        <option value="REFSEQ_PEPTIDE_PREDICTED">REFSEQ_PEPTIDE_PREDICTED</option>\n+                        <option value="SCHISTODB">SCHISTODB</option>\n+                        <option value="UNIGENE">UNIGENE</option>\n+                        <option value="UNIPARC">UNIPARC</option>\n+                        <option value="UNIPATHWAY">UNIPATHWAY</option>\n+                        <option value="UNIPROT">UNIPROT</option>\n+                        <option value="WIKIGENE">WIKIGENE</option>\n+                        <option value="WORMBASE_GENE">WORMBASE_GENE</option>\n+                        <option value="WORMBASE_GSEQNAME">WORMBASE_GSEQNAME</option>\n+                        <option value="WORMBASE_LOCUS">WORMBASE_LOCUS</option>\n+                        <option value="WORMBASE_TRANSCRIPT">WORMBASE_TRANSCRIPT</option>\n+                        <option value="WORMPEP_ID">WORMPEP_ID</option>\n+                        <option value="collection">collection</option>\n+                        <option value="database">database</option>\n+                        <option value="description">description</option>\n+                        <option value="domain">domain</option>\n+                        <option value="domain_count">domain_count</option>\n+                        <option value="domain_source">domain_source</option>\n+                        <option value="exon">exon</option>\n+                        <option value="gene_name">gene_name</option>\n+                        <option value="gene_synonym">gene_synonym</option>\n+                        <option value="genetree">genetree</option>\n+                        <option value="genomic_unit">genomic_unit</option>\n+                        <option value="haplotype">haplotype</option>\n+                        <option value="id">id</option>\n+                        <option value="id_noversion">id_noversion</option>\n+                        <option value="name">name</option>\n+                        <option value="peptide">peptide</option>\n+                        <option value="seq_region_name">seq_region_name</option>\n+                        <option value="source">source</option>\n+                        <option value="species">species</option>\n+                        <option value="system_name">system_name</option>\n+                        <option value="transcript">transcript</option>\n+                    </param>\n+                    <conditional name="comp_operation">\n+                        <param name="operation" type="select" label="Comparison operation" >\n+                            <option value="equal">equal</option>\n+                            <option value="not">not</option>\n+                            <option value="range">range</option>\n+                        </param>\n+                        <when value="equal">\n+                            <param name="query_text" type="text" label="Searched term"/>\n+                        </when>\n+                        <when value="not">\n+                            <param name="query_text" type="text" label="Searched term"/>\n+                            <param name="not_query_text" type="text" label="Limiting term"/>\n+                        </when>\n+                        <when value="range">\n+                            <param name="min" type="text" label="From"/>\n+                            <param name="max" type="text" label="To"/>\n+                        </when>\n+                    </conditional>\n+                </repeat>\n+            </when>\n+\n+        </conditional>\n+    </xml>\n+    <xml name="citations">\n+        <citations>\n+            <citation type="doi">10.1093/bib/bbp065</citation>\n+            <citation type="doi">10.1093/nar/gkv316</citation>\n+        </citations>\n+    </xml>\n+</macros>\n'
b
diff -r 000000000000 -r e2e9fae080ad test-data/ERR675640_qc-stats_stats
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ERR675640_qc-stats_stats Thu Dec 01 15:27:59 2016 -0500
b
@@ -0,0 +1,13 @@
+bp_count 85213813
+sequence_count 670470
+average_length 127.096
+standard_deviation_length 17.309
+length_min 100
+length_max 159
+average_gc_content 49.226
+standard_deviation_gc_content 9.333
+average_gc_ratio 1.117
+standard_deviation_gc_ratio 0.470
+ambig_char_count 6
+ambig_sequence_count 6
+average_ambig_chars 0.000