Next changeset 1:d4e7e110dfdb (2016-12-12) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ebi_tools commit 7a9c88c1c80b80aaa63e55e9d9125b6a4dd695ac |
added:
README.md download_ebi_metagenomics_run_data ebeye_urllib.py ebi_metagenomics_run_downloader.xml environment.yml generate_macros.py macros.xml test-data/ERR675640_qc-stats_stats |
b |
diff -r 000000000000 -r e2e9fae080ad README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Thu Dec 01 15:27:59 2016 -0500 |
[ |
@@ -0,0 +1,14 @@ +EBI Search +========== + +EBI Search is a tool to provide text search functionality and uniform access to resources and services hosted at the European Bioinformatics Institute. + +As the possible options in EBI Search are numerous, the `macros.xml` for this wrapper with all options is automatically generated using [`ebeye_urllib3.py`](http://www.ebi.ac.uk/Tools/webservices/download_clients/python/urllib/ebeye_urllib3.py) tool from EBI and a Python script ([`generate_macros.py`](generate_macros.py)). + +For any change in the `macros.xml`, please change on [`generate_macros.py`](generate_macros.py) and regenerate the `macros.xml` with + +``` +$ conda env create -f environment.yml +$ source activate ebeye_urllib +(ebeye_urllib) $ python generate_macros.py +``` \ No newline at end of file |
b |
diff -r 000000000000 -r e2e9fae080ad download_ebi_metagenomics_run_data --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/download_ebi_metagenomics_run_data Thu Dec 01 15:27:59 2016 -0500 |
[ |
@@ -0,0 +1,44 @@ +#!/usr/bin/env bash +set -e + +python_script=$1 +run_id=$2 +information_type=$3 +information_to_download=$4 +chunk_type=$5 +output_file=$6 + +touch $output_file + +run_link=$(python $python_script getRunLink $run_id) +run_link="$run_link/$information_type/$information_to_download" + +if [[ $chunk_type == 'multiple_chunks' ]]; then + chunk_nb=$(curl "$run_link/chunks") + + if [[ "$chunk_nb" < 1 ]]; then + >&2 echo "-----" + >&2 echo "ERROR" + >&2 echo "-----" + >&2 echo "No data are found for this link:" + >&2 echo "$run_link/chunks" + >&2 echo "-----" + exit 6 + fi + + for i in $(seq "$chunk_nb"); do + curl "$run_link/chunks/$i" | gunzip >> $output_file + done +else + curl "$run_link" >> $output_file +fi + +if [ ! -s $output_file ]; then + >&2 echo "-----" + >&2 echo "ERROR" + >&2 echo "-----" + >&2 echo "The output file is empty probably because the following link is not working:" + >&2 echo "$run_link" + >&2 echo "-----" + exit 6 +fi |
b |
diff -r 000000000000 -r e2e9fae080ad ebeye_urllib.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ebeye_urllib.py Thu Dec 01 15:27:59 2016 -0500 |
[ |
b'@@ -0,0 +1,321 @@\n+#!/usr/bin/env python\n+# ======================================================================\n+# Script derived from the EB-eye (REST) Python client available at\n+# http://www.ebi.ac.uk/Tools/webservices/services/eb-eye_rest\n+# and distributed under the Apache License\n+# ======================================================================\n+# Load libraries\n+import platform\n+import os\n+import urllib\n+import re\n+from optparse import OptionParser\n+from gzip import GzipFile\n+from xmltramp2 import xmltramp\n+# python2\n+from StringIO import StringIO\n+import urllib2\n+# python3\n+# import urllib.request as urllib2\n+\n+\n+# Service base URL\n+baseUrl = \'http://www.ebi.ac.uk/ebisearch/ws/rest\'\n+\n+# Debug level\n+debugLevel = 0\n+\n+\n+# Debug print\n+def printDebugMessage(functionName, message, level):\n+ if(level <= debugLevel):\n+ print (\'[\' + functionName + \'] \' + message)\n+\n+\n+# User-agent for request.\n+def getUserAgent():\n+ printDebugMessage(\'getUserAgent\', \'Begin\', 11)\n+ urllib_agent = \'Python-urllib/%s\' % urllib2.__version__\n+ clientRevision = \'$Revision: 2468 $\'\n+ clientVersion = \'0\'\n+ if len(clientRevision) > 11:\n+ clientVersion = clientRevision[11:-2]\n+ user_agent = \'EBI-Sample-Client/%s (%s; Python %s; %s) %s\' % (\n+ clientVersion, os.path.basename(__file__),\n+ platform.python_version(), platform.system(),\n+ urllib_agent\n+ )\n+ printDebugMessage(\'getUserAgent\', \'user_agent: \' + user_agent, 12)\n+ printDebugMessage(\'getUserAgent\', \'End\', 11)\n+ return user_agent\n+\n+\n+# Wrapper for a REST (HTTP GET) request\n+def restRequest(url):\n+ printDebugMessage(\'restRequest\', \'Begin\', 11)\n+ printDebugMessage(\'restRequest\', \'url: \' + url, 11)\n+ # python 2\n+ url = urllib.quote(url, safe="%/:=&?~#+!$,;\'@()*[]")\n+ # python 3\n+ # url = urllib.request.quote(url, safe="%/:=&?~#+!$,;\'@()*[]")\n+\n+ try:\n+ user_agent = getUserAgent()\n+ http_headers = {\n+ \'User-Agent\': user_agent,\n+ \'Accept-Encoding\': \'gzip\'\n+ }\n+ req = urllib2.Request(url, None, http_headers)\n+ resp = urllib2.urlopen(req)\n+ # python2\n+ encoding = resp.info().getheader(\'Content-Encoding\')\n+ # python3\n+ # encoding = resp.info().__getitem__(\'Content-Encoding\')\n+ result = None\n+ if encoding is None or encoding == \'identity\':\n+ # python2\n+ result = resp.read()\n+ # python3\n+ # result = str(resp.read(), \'utf-8\')\n+ elif encoding == \'gzip\':\n+ result = resp.read()\n+ printDebugMessage(\'restRequest\', \'result: \' + str(result), 21)\n+ # python2\n+ gz = GzipFile(\n+ fileobj=StringIO(result),\n+ mode="r")\n+ result = gz.read()\n+ # python3\n+ # result = str(gzip.decompress(result), \'utf-8\')\n+ else:\n+ raise Exception(\'Unsupported Content-Encoding\')\n+ resp.close()\n+ except urllib2.HTTPError as ex:\n+ raise ex\n+ printDebugMessage(\'restRequest\', \'result: \' + result, 11)\n+ printDebugMessage(\'restRequest\', \'End\', 11)\n+ return result\n+\n+\n+def hasSubdomains(domainInfo):\n+ for dir in domainInfo._dir:\n+ if dir._name == \'subdomains\':\n+ return True\n+ return False\n+\n+\n+def extractUsefulFields(fieldInfos):\n+ searchable = []\n+ retrievable = []\n+\n+ for fieldInfo in fieldInfos:\n+ if fieldInfo(\'id\') == "$facets":\n+ continue\n+\n+ options = fieldInfo[\'options\'][\'option\':]\n+ for option in options:\n+ if option("name") == "searchable" and str(option) == "true":\n+ searchable.append(fieldInfo(\'id\'))\n+ if option("name") == "retrievable" and str(option) == "true":\n+ retrievable.append(fieldInfo(\'id\'))\n+ return searchable, retrievable\n+\n+\n+def extractLowerLevelDomains(domainInfo, domains):\n+ if hasSubdomains(domainInfo):\n+ '..b'baseUrl + \'/\' + domain + \'?query=\' + query\n+ requestUrl += \'&fields=\' + fields + \'&size=\' + str(maximum_size)\n+ requestUrl += \'&start=\' + str(start) + \'&fieldurl=true\'\n+ request_output += makeRequest(requestUrl)\n+\n+ if (numberOfResults % 100) > 0:\n+ start = maximum_size * quotient\n+ remainder = numberOfResults - start\n+ requestUrl = baseUrl + \'/\' + domain + \'?query=\' + query\n+ requestUrl += \'&fields=\' + fields + \'&size=\' + str(remainder)\n+ requestUrl += \'&start=\' + str(start) + \'&fieldurl=true\'\n+ request_output += makeRequest(requestUrl)\n+\n+ print(request_output)\n+\n+\n+def printEntries(entries):\n+ output = ""\n+ printDebugMessage(\'printEntries\', \'Begin\', 1)\n+ for entry in entries:\n+ sep = ""\n+ for field in entry[\'fields\'][\'field\':]:\n+ output += "%s" % (sep)\n+ fields = field[\'values\'][\'value\':]\n+ if len(fields) > 0:\n+ sub_sep = ""\n+ for value in field[\'values\'][\'value\':]:\n+ output += "%s%s" % (sub_sep, value)\n+ sub_sep = ","\n+ sep = "\\t"\n+\n+ if hasFieldUrls(entry):\n+ output += "%s" % (sep)\n+ sub_sep = ""\n+ for fieldurl in entry[\'fieldURLs\'][\'fieldURL\':]:\n+ output += "%s%s" % (sub_sep, str(fieldurl))\n+ sub_sep = ","\n+ sep = "\\t"\n+ if hasViewUrls(entry):\n+ output += "%s" % (sep)\n+ sub_sep = ""\n+ for viewurl in entry[\'viewURLs\'][\'viewURL\':]:\n+ output += "%s%s" % (sub_sep, str(viewurl))\n+ sub_sep = ","\n+ output += "\\n"\n+ printDebugMessage(\'printEntries\', \'End\', 1)\n+ return output\n+\n+\n+def hasFieldUrls(entry):\n+ for dir in entry._dir:\n+ if dir._name == \'fieldURLs\':\n+ return True\n+ return False\n+\n+\n+def hasViewUrls(entry):\n+ for dir in entry._dir:\n+ if dir._name == \'viewURLs\':\n+ return True\n+ return False\n+\n+\n+def getRunLink(run_id):\n+ printDebugMessage(\'getEntries\', \'Begin\', 1)\n+ requestUrl = baseUrl + \'/metagenomics_runs/entry/\' + run_id + \'?fieldurl=true\'\n+ printDebugMessage(\'getEntries\', requestUrl, 2)\n+ xmlDoc = restRequest(requestUrl)\n+ doc = xmltramp.parse(xmlDoc)\n+ entries = doc[\'entries\'][\'entry\':]\n+ fieldURL = \'\'\n+ for entry in entries:\n+ for fieldurl in entry[\'fieldURLs\'][\'fieldURL\':]:\n+ fieldURL += str(fieldurl)\n+ printDebugMessage(\'getEntries\', \'End\', 1)\n+ p = re.compile(\'http\')\n+ fieldURL = p.sub(\'https\', fieldURL)\n+ print fieldURL\n+\n+\n+if __name__ == \'__main__\':\n+ # Usage message\n+ usage = """\n+ %prog getDomainHierarchy\n+ %prog getResults <domain> <query> <fields>\n+ %prog getRunLink <runId>\n+ """\n+\n+ description = "Tools to query and download data from several EMBL-EBI databases"\n+ description += "The searching tools are using the EB-eye search engine. "\n+ description += "http://www.ebi.ac.uk/ebisearch/"\n+ # Process command-line options\n+ parser = OptionParser(\n+ usage=usage,\n+ description=description,\n+ version=\'1.0\')\n+ (options, args) = parser.parse_args()\n+\n+ # No arguments, print usage\n+ if len(args) < 1:\n+ parser.print_help()\n+\n+ # Get domain hierarchy\n+ elif args[0] == \'getDomainHierarchy\':\n+ getDomainHierarchy()\n+\n+ # Get search results\n+ elif args[0] == \'getResults\':\n+ if len(args) < 4:\n+ print (\'domain, query and fields should be given.\')\n+ else:\n+ getResults(args[1], args[2], args[3])\n+\n+ # Get run link results\n+ elif args[0] == \'getRunLink\':\n+ if len(args) < 2:\n+ print (\'run id should be given.\')\n+ else:\n+ getRunLink(args[1])\n+\n+ # Unknown argument combination, display usage\n+ else:\n+ print (\'Error: unrecognised argument combination\')\n+ parser.print_help()\n' |
b |
diff -r 000000000000 -r e2e9fae080ad ebi_metagenomics_run_downloader.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ebi_metagenomics_run_downloader.xml Thu Dec 01 15:27:59 2016 -0500 |
[ |
@@ -0,0 +1,139 @@ +<tool id="ebi_metagenomics_run_downloader" name="Download run data" version="0.1.0"> + <description>from EBI Metagenomics database</description> + + <macros> + <import>macros.xml</import> + </macros> + + <expand macro="requirements"> + <requirement type="package" version="7.49.0">curl</requirement> + </expand> + + <command detect_errors="exit_code"><![CDATA[ + '$__tool_directory__/download_ebi_metagenomics_run_data' + '$__tool_directory__/ebeye_urllib.py' + '$run_id' + '$information.type' + '$information.to_download' + + #if ( $information.type == 'sequences' and $information.to_download != 'ncRNA-tRNA-FASTA') or ($information.type == 'function' and $information.to_download == 'InterProScan') + 'multiple_chunks' + #else + 'single_chunk' + #end if + + 'output.dat' + ]]></command> + + <inputs> + <param name="run_id" type="text" label="Identifiant of the run in EBI Metagenomics" /> + + <conditional name="information"> + <param name="type" type="select" label="Type of information to download"> + <option value="sequences" selected="true">Sequence data</option> + <option value="qc-stats">Quality control statistics</option> + <option value="taxonomy">Taxonomic analysis</option> + <option value="function">Functional analysis</option> + </param> + + <when value="sequences"> + <param name="to_download" type="select" label="Sequences to download"> + <option value="ProcessedReads" selected="true">Processed nucleotide reads</option> + <option value="ReadsWithPredictedCDS">Processed reads with pCDS</option> + <option value="ReadsWithMatches">Processed reads with annotation</option> + <option value="ReadsWithoutMatches">Processed reads without annotation</option> + <option value="PredictedCDSWithAnnotation">Predicted CDS with annotation</option> + <option value="PredictedCDSWithoutAnnotation">Predicted CDS without annotation</option> + <option value="PredictedORFWithoutAnnotation">Predicted ORF without annotation</option> + <option value="ncRNA-tRNA-FASTA">Predicted tRNAs</option> + </param> + </when> + + <when value="qc-stats"> + <param name="to_download" type="select" label="Quality control statistics to download"> + <option value="summary" selected="true">Number of sequence reads per Quality Control step</option> + <option value="stats">Quality control statistics</option> + <option value="length">Read length distribution</option> + <option value="gc_bin">Read GC distribution</option> + <option value="base">Nucleotide position distribution</option> + </param> + </when> + + <when value="taxonomy"> + <param name="to_download" type="select" label="Taxonomic analysis to download"> + <option value="5S-rRNA-FASTA" selected="true">Reads encoding 5S rRNA</option> + <option value="16S-rRNA-FASTA">Reads encoding 16S rRNA</option> + <option value="23S-rRNA-FASTA">Reads encoding 23S rRNA</option> + <option value="OTU-TSV">OTUs, reads and taxonomic assignments (TSV)</option> + <option value="NewickPrunedTree">Phylogenetic tree</option> + </param> + </when> + + <when value="function"> + <param name="to_download" type="select" label="Functional analysis to download"> + <option value="InterProScan" selected="true">InterPro matches</option> + <option value="GOAnnotations">Complete GO annotation</option> + <option value="GOSlimAnnotations">GO slim annotation</option> + </param> + </when> + </conditional> + </inputs> + + <outputs> + <data name="output_file_fasta" format="fasta" label="${tool.name} for ${run_id}" from_work_dir="output.dat"> + <filter>( information['type'] == 'sequences' or + ( information['type'] == 'taxonomy' and ( information['to_download'] == '5S-rRNA-FASTA' or + information['to_download'] == '16S-rRNA-FASTA' or information['to_download'] == '23S-rRNA-FASTA' ))) + </filter> + </data> + <data name="output_file_tsv" format="tsv" label="${tool.name} for ${run_id}" from_work_dir="output.dat"> + <filter>( information['type'] == 'qc-stats' or + ( information['type'] == 'taxonomy' and information['to_download'] == 'OTU-TSV' ) or + ( information['type'] == 'function' and information['to_download'] == 'InterProScan/chunks/1' )) + </filter> + </data> + <data name="output_file_csv" format="csv" label="${tool.name} for ${run_id}" from_work_dir="output.dat"> + <filter>( information['type'] == 'function' and ( information['to_download'] == 'GOAnnotations' or information['to_download'] == 'GOSlimAnnotations' )) + </filter> + </data> + <data name="output_file_newick" format="newick" label="${tool.name} for ${run_id}" from_work_dir="output.dat"> + <filter>( information['type'] == 'taxonomy' and information['to_download'] == 'NewickPrunedTree') + </filter> + </data> + </outputs> + + <tests> + <test> + <param name="run_id" value="ERR675640" /> + <conditional name="information"> + <param name="type" value="qc-stats" /> + <param name="to_download" value="stats" /> + </conditional> + <output name="output_file_tsv" file="ERR675640_qc-stats_stats" /> + </test> + <test> + <param name="run_id" value="ERR675573" /> + <conditional name="information"> + <param name="type" value="sequences" /> + <param name="to_download" value="PredictedCDSWithAnnotation" /> + </conditional> + <output name="output_file_fasta"> + <assert_contents> + <has_line_matching expression="^>ERR675573.1772-FC81EB0ABXX:7:1101:19215:2564-ATCACGAT-1_1_117_- IPR010690/PF06898/2-34$" /> + <has_line_matching expression="^>ERR675573.6692248-FC81EB0ABXX:7:1202:20007:191300-ATCACGAT-1_1_127_- IPR004089/PF00015/2-41|G3DSA:1.10.287.950/2-41$" /> + <has_line_matching expression="^>ERR675573.21754698-FC81EB0ABXX:7:2208:13328:189792-ATCACGAT-1_1_150_- IPR027414/G3DSA:2.70.98.50/7-45$" /> + </assert_contents> + </output> + </test> + </tests> + + <help><![CDATA[ +**What it does** + +The European Bioinformatics Institute (EMBL-EBI) maintains the world’s most comprehensive range of freely available and up-to-date molecular databases + +This tool download data related to a run in EBI Metagenomics database. + ]]></help> + + <expand macro="citations" /> +</tool> \ No newline at end of file |
b |
diff -r 000000000000 -r e2e9fae080ad environment.yml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/environment.yml Thu Dec 01 15:27:59 2016 -0500 |
b |
@@ -0,0 +1,19 @@ +name: ebeye_urllib +channels: !!python/tuple +- biocore +- bioconda +- defaults +dependencies: +- bioconda::urllib3=1.12=py27_0 +- bioconda::xmltramp2=3.1.1=py27_0 +- openssl=1.0.2j=0 +- pip=9.0.1=py27_0 +- python=2.7.12=1 +- readline=6.2=2 +- setuptools=27.2.0=py27_0 +- six=1.10.0=py27_0 +- sqlite=3.13.0=0 +- tk=8.5.18=0 +- wheel=0.29.0=py27_0 +- zlib=1.2.8=3 + |
b |
diff -r 000000000000 -r e2e9fae080ad generate_macros.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/generate_macros.py Thu Dec 01 15:27:59 2016 -0500 |
[ |
@@ -0,0 +1,141 @@ +#!/usr/bin/env python3 + +import ebeye_urllib + + +def add_option(value, name, selected=False): + to_write = '<option ' + to_write += 'value="%s"' % (value) + if selected: + to_write += ' selected="true"' + to_write += '>%s' % (name) + to_write += '</option>\n' + return to_write + + +def add_select_parameter(name, label, multiple=False): + to_write = '<param ' + to_write += 'name="%s" ' % (name) + to_write += 'type="select" ' + to_write += 'label="%s" ' % (label) + if multiple: + to_write += 'multiple=\"true\"' + to_write += '>\n' + return to_write + + +def write_macros_file(macros_filepath, domains_fields): + spaces = ' ' + to_write = '<macros>\n' + + to_write += '%s<xml name="requirements">\n' % (spaces) + to_write += '%s<requirements>\n' % (2 * spaces) + to_write += '%s<requirement type="package" version="3.1.1">xmltramp2</requirement>\n' % (3 * spaces) + to_write += '%s<requirement type="package" version="1.12">urllib3</requirement>\n' % (3 * spaces) + to_write += '%s<yield/>\n' % (3 * spaces) + to_write += '%s</requirements>\n' % (2 * spaces) + to_write += '%s</xml>\n' % (spaces) + + to_write += '%s<xml name="inputs">\n' % (spaces) + + to_write += '%s<conditional name="searched_domain">\n' % (2 * spaces) + to_write += '%s%s' % (3 * spaces, add_select_parameter( + 'domain', + 'Domain to query')) + + sorted_domains = [(d, domains_fields[d]['name']) for d in domains_fields.keys()] + sorted_domains = sorted(sorted_domains, key=lambda tup: tup[1]) + for domain in sorted_domains: + to_write += '%s%s' % (4 * spaces, add_option( + domain[0], + domain[1])) + + to_write += '%s</param>\n\n' % (3 * spaces) + + for d in sorted_domains: + domain = d[0] + to_write += '%s<when value="%s">\n' % (3 * spaces, domain) + + to_write += '%s%s' % (4 * spaces, add_select_parameter( + 'fields', + 'Fields to extract', + multiple=True)) + for field in domains_fields[domain]['retrievable_fields']: + to_write += '%s%s' % (5 * spaces, add_option( + field, + field, + selected=True)) + to_write += '%s<validator type="no_options" message="Please select at least one field" />\n' % (5 * spaces) + to_write += '%s</param>\n' % (4 * spaces) + + to_write += '%s<repeat name="queries" title="Add a query">\n' % ( + 4 * spaces) + + to_write += '%s%s' % (5 * spaces, add_select_parameter( + 'combination_operation', + 'Combination operation')) + to_write += '%s%s' % (6 * spaces, add_option('AND', 'AND')) + to_write += '%s%s' % (6 * spaces, add_option('OR', 'OR')) + to_write += '%s%s' % (6 * spaces, add_option('NOT', 'NOT')) + to_write += '%s</param>\n' % (5 * spaces) + + to_write += '%s%s' % (5 * spaces, add_select_parameter( + 'query_field', + 'Fields')) + for field in domains_fields[domain]['searchable_fields']: + to_write += '%s%s' % (6 * spaces, add_option(field, field)) + to_write += '%s</param>\n' % (5 * spaces) + + to_write += '%s<conditional name="comp_operation">\n' % (5 * spaces) + to_write += '%s%s' % (6 * spaces, add_select_parameter( + 'operation', + 'Comparison operation')) + to_write += '%s%s' % (7 * spaces, add_option('equal', 'equal')) + to_write += '%s%s' % (7 * spaces, add_option('not', 'not')) + to_write += '%s%s' % (7 * spaces, add_option('range', 'range')) + to_write += '%s</param>\n' % (6 * spaces) + + to_write += '%s<when value="equal">\n' % (6 * spaces) + to_write += '%s<param name="query_text" type="text" label="Searched term"/>\n' % (7 * spaces) + to_write += '%s</when>\n' % (6 * spaces) + + to_write += '%s<when value="not">\n' % (6 * spaces) + to_write += '%s<param name="query_text" type="text" label="Searched term"/>\n' % (7 * spaces) + to_write += '%s<param name="not_query_text" type="text" label="Limiting term"/>\n' % (7 * spaces) + to_write += '%s</when>\n' % (6 * spaces) + + to_write += '%s<when value="range">\n' % (6 * spaces) + to_write += '%s<param name="min" type="text" label="From"/>\n' % (7 * spaces) + to_write += '%s<param name="max" type="text" label="To"/>\n' % ( + 7 * spaces) + to_write += '%s</when>\n' % (6 * spaces) + + to_write += '%s</conditional>\n' % (5 * spaces) + + to_write += '%s</repeat>\n' % (4 * spaces) + + to_write += '%s</when>\n\n' % (3 * spaces) + + to_write += '%s</conditional>\n' % (2 * spaces) + to_write += '%s</xml>\n' % (spaces) + + to_write += '%s<xml name="citations">\n' % (spaces) + to_write += '%s<citations>\n' % (2 * spaces) + to_write += '%s<citation type="doi">10.1093/bib/bbp065</citation>\n' % (3 * spaces) + to_write += '%s<citation type="doi">10.1093/nar/gkv316</citation>\n' % (3 * spaces) + to_write += '%s</citations>\n' % (2 * spaces) + to_write += '%s</xml>\n' % (spaces) + + to_write += '</macros>\n' + + with open(macros_filepath, 'w') as macros_file: + macros_file.write(to_write) + + +def generate_macros(): + domains_fields = ebeye_urllib.getDomainHierarchy() + write_macros_file('macros.xml', domains_fields) + + +if __name__ == '__main__': + generate_macros() |
b |
diff -r 000000000000 -r e2e9fae080ad macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Dec 01 15:27:59 2016 -0500 |
b |
b'@@ -0,0 +1,9383 @@\n+<macros>\n+ <xml name="requirements">\n+ <requirements>\n+ <requirement type="package" version="3.1.1">xmltramp2</requirement>\n+ <requirement type="package" version="1.12">urllib3</requirement>\n+ <yield/>\n+ </requirements>\n+ </xml>\n+ <xml name="inputs">\n+ <conditional name="searched_domain">\n+ <param name="domain" type="select" label="Domain to query" >\n+ <option value="sra-analysis">Analysis</option>\n+ <option value="arrayexpress-repository">ArrayExpress</option>\n+ <option value="genome_assembly">Assembly</option>\n+ <option value="wgs_masters">Assembly contig set</option>\n+ <option value="emblrelease_con">Assembly scaffold (Release)</option>\n+ <option value="emblnew_con">Assembly scaffold (Update)</option>\n+ <option value="atlas-genes">Baseline Expression Atlas Genes</option>\n+ <option value="biomodels">BioModels</option>\n+ <option value="biosamples">BioSamples</option>\n+ <option value="biosamples-groups">BioSamples groups</option>\n+ <option value="chebi">ChEBI</option>\n+ <option value="chembl-activity">ChEMBL Activity</option>\n+ <option value="chembl-assay">ChEMBL Assay</option>\n+ <option value="chembl-document">ChEMBL Document</option>\n+ <option value="chembl-molecule">ChEMBL Molecule</option>\n+ <option value="chembl-target">ChEMBL Target</option>\n+ <option value="chembl-target_component">ChEMBL Target Component</option>\n+ <option value="coding_release">Coding (Release)</option>\n+ <option value="coding_update">Coding (Update)</option>\n+ <option value="dgva">DGVa</option>\n+ <option value="atlas-genes-differential">Differential Expression Atlas Genes</option>\n+ <option value="efo">EFO</option>\n+ <option value="ega">EGA</option>\n+ <option value="emdb">EMDB</option>\n+ <option value="epo">EPO</option>\n+ <option value="elixir-registry">Elixir registry</option>\n+ <option value="ensembl_gene">Ensembl Gene</option>\n+ <option value="ensemblGenomes_gene">Ensembl Genomes Gene</option>\n+ <option value="ensemblGenomes_genome">Ensembl Genomes Genome</option>\n+ <option value="ensemblGenomes_seqRegion">Ensembl Genomes Sequence Region</option>\n+ <option value="ensemblGenomes_variant">Ensembl Genomes Variant</option>\n+ <option value="enzymeportal">Enzyme Portal</option>\n+ <option value="enzymeportal_enzymes">Enzyme Portal - Enzymes</option>\n+ <option value="atlas-experiments">Expression Atlas Experiments</option>\n+ <option value="gnps">GNPS</option>\n+ <option value="go">GO</option>\n+ <option value="gpcrdb">GPCRDB</option>\n+ <option value="gpmdb">GPMdb</option>\n+ <option value="hgnc">HGNC</option>\n+ <option value="human_diseases">Human diseases</option>\n+ <option value="imgt-hla">IMGT/HLA</option>\n+ <option value="ipd-kir">IPD-KIR</option>\n+ <option value="ipd-mhc">IPD-MHC</option>\n+ <option value="intact-complexes">IntAct Complexes</option>\n+ <option value="intact-experiments">IntAct Experiments</option>\n+ <option value="intact-interactions">IntAct Interactions</option>\n+ <option value="intact-interactors">IntAct Interactors</option>\n+ <option value="intenz">IntEnz</option>\n+ <option value="interpro">InterPro</option>\n+ <option value="iprmatches">Iprmatches</option>\n+ <option value="jpo">JPO</option>\n+'..b'EFSEQ_MRNA_PREDICTED">REFSEQ_MRNA_PREDICTED</option>\n+ <option value="REFSEQ_NCRNA">REFSEQ_NCRNA</option>\n+ <option value="REFSEQ_PEPTIDE">REFSEQ_PEPTIDE</option>\n+ <option value="REFSEQ_PEPTIDE_PREDICTED">REFSEQ_PEPTIDE_PREDICTED</option>\n+ <option value="SCHISTODB">SCHISTODB</option>\n+ <option value="UNIGENE">UNIGENE</option>\n+ <option value="UNIPARC">UNIPARC</option>\n+ <option value="UNIPATHWAY">UNIPATHWAY</option>\n+ <option value="UNIPROT">UNIPROT</option>\n+ <option value="WIKIGENE">WIKIGENE</option>\n+ <option value="WORMBASE_GENE">WORMBASE_GENE</option>\n+ <option value="WORMBASE_GSEQNAME">WORMBASE_GSEQNAME</option>\n+ <option value="WORMBASE_LOCUS">WORMBASE_LOCUS</option>\n+ <option value="WORMBASE_TRANSCRIPT">WORMBASE_TRANSCRIPT</option>\n+ <option value="WORMPEP_ID">WORMPEP_ID</option>\n+ <option value="collection">collection</option>\n+ <option value="database">database</option>\n+ <option value="description">description</option>\n+ <option value="domain">domain</option>\n+ <option value="domain_count">domain_count</option>\n+ <option value="domain_source">domain_source</option>\n+ <option value="exon">exon</option>\n+ <option value="gene_name">gene_name</option>\n+ <option value="gene_synonym">gene_synonym</option>\n+ <option value="genetree">genetree</option>\n+ <option value="genomic_unit">genomic_unit</option>\n+ <option value="haplotype">haplotype</option>\n+ <option value="id">id</option>\n+ <option value="id_noversion">id_noversion</option>\n+ <option value="name">name</option>\n+ <option value="peptide">peptide</option>\n+ <option value="seq_region_name">seq_region_name</option>\n+ <option value="source">source</option>\n+ <option value="species">species</option>\n+ <option value="system_name">system_name</option>\n+ <option value="transcript">transcript</option>\n+ </param>\n+ <conditional name="comp_operation">\n+ <param name="operation" type="select" label="Comparison operation" >\n+ <option value="equal">equal</option>\n+ <option value="not">not</option>\n+ <option value="range">range</option>\n+ </param>\n+ <when value="equal">\n+ <param name="query_text" type="text" label="Searched term"/>\n+ </when>\n+ <when value="not">\n+ <param name="query_text" type="text" label="Searched term"/>\n+ <param name="not_query_text" type="text" label="Limiting term"/>\n+ </when>\n+ <when value="range">\n+ <param name="min" type="text" label="From"/>\n+ <param name="max" type="text" label="To"/>\n+ </when>\n+ </conditional>\n+ </repeat>\n+ </when>\n+\n+ </conditional>\n+ </xml>\n+ <xml name="citations">\n+ <citations>\n+ <citation type="doi">10.1093/bib/bbp065</citation>\n+ <citation type="doi">10.1093/nar/gkv316</citation>\n+ </citations>\n+ </xml>\n+</macros>\n' |
b |
diff -r 000000000000 -r e2e9fae080ad test-data/ERR675640_qc-stats_stats --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ERR675640_qc-stats_stats Thu Dec 01 15:27:59 2016 -0500 |
b |
@@ -0,0 +1,13 @@ +bp_count 85213813 +sequence_count 670470 +average_length 127.096 +standard_deviation_length 17.309 +length_min 100 +length_max 159 +average_gc_content 49.226 +standard_deviation_gc_content 9.333 +average_gc_ratio 1.117 +standard_deviation_gc_ratio 0.470 +ambig_char_count 6 +ambig_sequence_count 6 +average_ambig_chars 0.000 |