Galaxy |

Changeset 0:68cd8d564e0a (2016-07-07)

Next changeset 1:732a52c18758 (2018-10-30)

Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_entrez_eutils commit 15bcc5104c577b4b9c761f2854fc686c07ffa9db

added:
README.rst
__efetch_build_options.py
ecitmatch.py
ecitmatch.xml
efetch.py
egquery.py
einfo.py
elink.py
epost.py
esearch.py
esummary.py
eutils.py
eutils.pyc
macros.xml
test-data/ecitmatch.results.tsv
test-data/ecitmatch.tsv
test-data/egquery.1.xml
test-data/esearch.pubmed.2014-01-pnas.xml
test-data/esearch.pubmed.xml
test-data/esummary.tax.xml
test-data/example.history.json
test-data/pm-tax-neighbor.xml
test-data/pubmed.metadata.xml
test-data/viruses.tax.xml
tool_dependencies.xml

diff -r 000000000000 -r 68cd8d564e0a README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst Thu Jul 07 02:39:21 2016 -0400

@@ -0,0 +1,38 @@
+Galaxy NCBI Entrez Tools
+========================
+
+This repo requires a readme as administrators should very aware of some
+restrictions NCBI places on the use of the Entrez service.
+
+NCBI requests that you please limit large jobs to either weekends or
+between 9:00 PM and 5:00 AM Eastern time during weekdays. This is not a
+request that the Galaxy tool can easily service, so we've included it in
+the disclaimer on every tool quite prominently.
+
+Failure to comply with NCBI's policies may result in an block until
+you/the user contacts NCBI and registers the tool ID and their email.
+
+Note that these are *IP* level blocks so the Galaxy tools uses a
+concatenation of the administrator's emails, and the user email, in
+hopes that NCBI will contact all relevant parties should their system be
+abused.
+
+Additionally, since these are IP level blocks, the Galaxy tool author
+(@erasche) recommends using the following ``jobs_conf.xml`` snippet in
+order to place a system-wide restriction of 1 concurrent Entrez job
+amongst all users.
+
+.. code:: xml
+
+    <destination id="entrez" runner="local">
+    </destination>
+    <limit type="concurrent_jobs" id="entrez">1</limit>
+    <tools>
+      <tool id="ncbi.eutils.efetch" destination="entrez" />
+      <tool id="ncbi.eutils.esearch" destination="entrez" />
+      <tool id="ncbi.eutils.epost" destination="entrez" />
+      <tool id="ncbi.eutils.elink" destination="entrez" />
+      <tool id="ncbi.eutils.einfo" destination="entrez" />
+      <tool id="ncbi.eutils.esummary" destination="entrez" />
+    </tools>
+

diff -r 000000000000 -r 68cd8d564e0a __efetch_build_options.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/__efetch_build_options.py Thu Jul 07 02:39:21 2016 -0400

[

b'@@ -0,0 +1,267 @@\n+#!/usr/bin/env python\n+# Daniel Blankenberg\n+# Creates the options for tool interface\n+import re\n+\n+# http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi\n+db_list = \'\'\'\n+<DbName>annotinfo</DbName>\n+<DbName>assembly</DbName>\n+<DbName>bioproject</DbName>\n+<DbName>biosample</DbName>\n+<DbName>biosystems</DbName>\n+<DbName>blastdbinfo</DbName>\n+<DbName>books</DbName>\n+<DbName>cdd</DbName>\n+<DbName>clinvar</DbName>\n+<DbName>clone</DbName>\n+<DbName>dbvar</DbName>\n+<DbName>gap</DbName>\n+<DbName>gapplus</DbName>\n+<DbName>gds</DbName>\n+<DbName>gencoll</DbName>\n+<DbName>gene</DbName>\n+<DbName>genome</DbName>\n+<DbName>geoprofiles</DbName>\n+<DbName>grasp</DbName>\n+<DbName>gtr</DbName>\n+<DbName>homologene</DbName>\n+<DbName>medgen</DbName>\n+<DbName>mesh</DbName>\n+<DbName>ncbisearch</DbName>\n+<DbName>nlmcatalog</DbName>\n+<DbName>nuccore</DbName>\n+<DbName>nucest</DbName>\n+<DbName>nucgss</DbName>\n+<DbName>nucleotide</DbName>\n+<DbName>omim</DbName>\n+<DbName>orgtrack</DbName>\n+<DbName>pcassay</DbName>\n+<DbName>pccompound</DbName>\n+<DbName>pcsubstance</DbName>\n+<DbName>pmc</DbName>\n+<DbName>popset</DbName>\n+<DbName>probe</DbName>\n+<DbName>protein</DbName>\n+<DbName>proteinclusters</DbName>\n+<DbName>pubmed</DbName>\n+<DbName>pubmedhealth</DbName>\n+<DbName>seqannot</DbName>\n+<DbName>snp</DbName>\n+<DbName>sra</DbName>\n+<DbName>structure</DbName>\n+<DbName>taxonomy</DbName>\n+<DbName>unigene</DbName>\'\'\'.replace( "<DbName>", "").replace( "</DbName>", "").split("\\n")\n+\n+\n+help = \'\'\' (all)\n+ docsum xml Document Summary\n+ docsum json Document Summary\n+ full text Full Document\n+ uilist xml Unique Identifier List\n+ uilist text Unique Identifier List\n+ full xml Full Document\n+\n+ bioproject\n+ native BioProject Report\n+ native xml RecordSet\n+\n+ biosample\n+ native BioSample Report\n+ native xml BioSampleSet\n+\n+ biosystems\n+ native xml Sys-set\n+\n+ gds\n+ native xml RecordSet\n+ summary text Summary\n+\n+ gene\n+ gene_table xml Gene Table\n+ native text Gene Report\n+ native asn.1 Entrezgene\n+ native xml Entrezgene-Set\n+ tabular tabular Tabular Report\n+\n+ homologene\n+ alignmentscores text Alignment Scores\n+ fasta fasta FASTA\n+ homologene text Homologene Report\n+ native text Homologene List\n+ native asn.1 HG-Entry\n+ native xml Entrez-Homologene-Set\n+\n+ mesh\n+ full text Full Record\n+ native text MeSH Report\n+ native xml RecordSet\n+\n+ nlmcatalog\n+ native text Full Record\n+ native xml NLMCatalogRecordSet\n+\n+ pmc\n+ medline text MEDLINE\n+ native xml pmc-articleset\n+\n+ pubmed\n+ abstract xml Abstract\n+ medline text MEDLINE\n+ native asn.1 Pubmed-entry\n+ native xml PubmedArticleSet\n+\n+ (sequences)\n+ acc text Accession Number\n+ est xml EST Report\n+ fasta fasta FASTA\n+ fasta xml TinySeq\n+ fasta_cds_aa '..b'SDSet\n+ gss text GSS Report\n+ ipg text Identical Protein Report\n+ ipg xml IPGReportSet\n+ native text Seq-entry\n+ native xml Bioseq-set\n+ seqid asn.1 Seq-id\n+\n+ snp\n+ chr text Chromosome Report\n+ docset text Summary\n+ fasta fasta FASTA\n+ flt text Flat File\n+ native asn.1 Rs\n+ native xml ExchangeSet\n+ rsr tabular RS Cluster Report\n+ ssexemplar text SS Exemplar List\n+\n+ sra\n+ native xml EXPERIMENT_PACKAGE_SET\n+ runinfo xml SraRunInfo\n+\n+ structure\n+ mmdb asn.1 Ncbi-mime-asn1 strucseq\n+ native text MMDB Report\n+ native xml RecordSet\n+\n+ taxonomy\n+ native text Taxonomy List\n+ native xml TaxaSet\'\'\'.split("\\n")\n+\n+\n+db = {}\n+for db_name in db_list:\n+ db[db_name] = []\n+\n+section = None\n+for line in help:\n+ line = re.split(\'\\s{2,}\', line.strip())\n+ # Ignore empties\n+ if len(line) == 0:\n+ continue\n+ # Section headers have one item\n+ elif len(line) == 1:\n+ section = line[0]\n+ db[section] = []\n+ # Format lines have 2+\n+ elif len(line) == 2:\n+ parent_format = line[0]\n+ description = line[1]\n+\n+ if parent_format not in db[section]:\n+ db[section].append((parent_format, None, description))\n+ elif len(line) == 3:\n+ parent_format = line[0]\n+ format_modifier = line[1]\n+ description = line[2]\n+\n+ if parent_format not in db[section]:\n+ db[section].append((parent_format, format_modifier, description))\n+\n+\n+all_formats = db[\'(all)\']\n+del db[\'(all)\']\n+sequences_formats = db[\'(sequences)\']\n+del db[\'(sequences)\']\n+del db[\'\']\n+\n+for key in db:\n+ db[key] += all_formats\n+\n+for key in (\'nuccore\', \'nucest\', \'nucgss\', \'nucleotide\'):\n+ db[key] += sequences_formats\n+\n+MACRO_TPL = \'\'\'\n+\n+\'\'\'\n+\n+WHEN_TPL = \'\'\' <when value="{format}">\n+ <param name="output_format" type="select" label="Output Format">\n+ {format_options}\n+ </param>\n+ </when>\'\'\'\n+\n+FORMAT_OPTION_TPL = \'\'\'<option value="{name_type}">{name_type_human}</option>\'\'\'\n+\n+format_names = {}\n+\n+print \'\'\' <xml name="db">\n+ <conditional name="db">\n+ <expand macro="dbselect" />\'\'\'\n+for key in sorted(db):\n+ format_options = []\n+\n+ for (parent_format, format_modifier, description) in sorted(db[key]):\n+ name_human = description\n+ if format_modifier:\n+ name_human += \' (%s)\' % format_modifier\n+ format_string = \'%s-%s\' % (parent_format, format_modifier)\n+\n+ format_options.append(FORMAT_OPTION_TPL.format(\n+ name_type=format_string,\n+ name_type_human=name_human,\n+ ))\n+\n+ format_names[format_string] = format_modifier\n+\n+ print WHEN_TPL.format(\n+ format=key,\n+ format_options=\'\\n \'.join(format_options)\n+ )\n+\n+print \'\'\' </conditional>\n+ </xml>\'\'\'\n+\n+CHANGE_FORMAT_TPL = \'\'\'\n+ <xml name="efetch_formats">\n+ <change_format>\n+ {formats}\n+ </change_format>\n+ </xml>\n+\'\'\'\n+\n+CHANGE_FORMAT_WHEN_TPL = \'\'\'<when input="output_format" value="{key}" format="{value}"/>\'\'\'\n+# Format options\n+\n+\n+whens = []\n+for (k, v) in format_names.items():\n+ if v is None:\n+ v = \'text\'\n+ elif v == \'asn.1\':\n+ v = \'asn1\'\n+\n+ whens.append(CHANGE_FORMAT_WHEN_TPL.format(\n+ key=k, value=v\n+ ))\n+\n+print CHANGE_FORMAT_TPL.format(formats=\'\\n \'.join(whens))\n'

diff -r 000000000000 -r 68cd8d564e0a ecitmatch.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ecitmatch.py Thu Jul 07 02:39:21 2016 -0400

[

@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+import argparse
+import eutils
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='ECitMatch', epilog='')
+    parser.add_argument('--file', type=argparse.FileType('r'), help='Tabular file containing citations to search')
+
+    parser.add_argument('--key', nargs='*', help='Citation Key')
+    parser.add_argument('--journal_title', nargs='*', help='Journal Title')
+    parser.add_argument('--year', nargs='*', help='Year')
+    parser.add_argument('--volume', nargs='*', help='Volume')
+    parser.add_argument('--first_page', nargs='*', help='First Page')
+    parser.add_argument('--author_name', nargs='*', help='Author name')
+
+    # Emails
+    parser.add_argument('--user_email', help="User email")
+    parser.add_argument('--admin_email', help="Admin email")
+    args = parser.parse_args()
+
+    c = eutils.Client(user_email=args.user_email, admin_email=args.admin_email)
+
+    citations = []
+    if args.file is None:
+        for key, journal, year, volume, first_page, author_name in \
+                zip(args.key, args.journal_title, args.year, args.volume, args.first_page, args.author_name):
+            citations.append({
+                'key': key,
+                'journal': journal,
+                'year': year,
+                'volume': volume,
+                'first_page': first_page,
+                'author_name': author_name,
+            })
+    else:
+        for line in args.file:
+            line = line.strip()
+            if not line.startswith('#'):
+                tmp = line.split('\t')
+                try:
+                    citations.append({
+                        'journal': tmp[0],
+                        'year': tmp[1],
+                        'volume': tmp[2],
+                        'first_page': tmp[3],
+                        'author_name': tmp[4],
+                        'key': tmp[5],
+                    })
+                except KeyError:
+                    print "Could not parse line: %s" % line
+
+    payload = {
+        'db': 'pubmed',
+        'bdata': citations
+    }
+
+    results = c.citmatch(**payload)
+    # We get data back as pipe separated, so just replace those with tabs
+    print results.replace('|', '\t')

diff -r 000000000000 -r 68cd8d564e0a ecitmatch.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ecitmatch.xml Thu Jul 07 02:39:21 2016 -0400

[

@@ -0,0 +1,105 @@
+<?xml version="1.0"?>
+<tool id="ncbi_eutils_ecitmatch" name="NCBI ECitMatch" version="@WRAPPER_VERSION@">
+  <description>search NCBI for citations in PubMed</description>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <expand macro="requirements"/>
+  <version_command>python ecitmatch.py --version</version_command>
+  <command detect_errors="aggressive" interpreter="python"><![CDATA[ecitmatch.py
+
+#if str($input.method) == "file":
+    --file $input.citation_file
+#else
+    #set keys = '" "'.join( [ str( $citation.key ) for $citation in $input.citations ] )
+    #set journal_title = '" "'.join( [ str( $citation.key ) for $citation in $input.citations ] )
+    #set year = '" "'.join( [ str( $citation.key ) for $citation in $input.citations ] )
+    #set volume = '" "'.join( [ str( $citation.key ) for $citation in $input.citations ] )
+    #set first_page = '" "'.join( [ str( $citation.key ) for $citation in $input.citations ] )
+    #set author = '" "'.join( [ str( $citation.key ) for $citation in $input.citations ] )
+
+    --key "$keys"
+    --journal_title "$journal_title"
+    --key "$year"
+    --key "$volume"
+    --key "$first_page"
+    --key "$author"
+#end if
+
+@EMAIL_ARGUMENTS@
+> $default]]></command>
+  <inputs>
+    <conditional name="input">
+      <param name="method" type="select" label="Input method">
+        <option value="file">Load citations from a formatted table</option>
+        <option value="direct">Direct Input</option>
+      </param>
+      <when value="file">
+          <param label="Citation table" name="citation_file" type="data"
+              format="tabular" help="Columns must be in a specific order, see help documentation"/>
+      </when>
+      <when value="direct">
+        <repeat name="citations" title="Citations">
+          <param name="journal_title" type="text" label="Journal Title"
+            help="E.g. proc natl acad sci u s a" />
+          <param name="year" type="integer" label="Year" value="2000"/>
+          <param name="volume" type="integer" label="Volume" value="88"/>
+          <param name="first_page" type="integer" label="First Page" value="1"/>
+          <param name="author" type="text" label="Author's Name" />
+          <param name="key" type="text" label="Citation key"
+            help="Used to match input results to NCBI's output" />
+        </repeat>
+      </when>
+    </conditional>
+
+  </inputs>
+  <outputs>
+    <data format="tabular" name="default" label="ECitMatch Results"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="method" value="file"/>
+      <param name="citation_file" value="ecitmatch.tsv"/>
+      <output name="default" file="ecitmatch.results.tsv" ftype="tabular"/>
+    </test>
+  </tests>
+  <help><![CDATA[
+NCBI ECitMatch
+==============
+
+Search for citation PubMed IDs. These can be provided via a tabular file, or
+via direct input. If provided via file, the columns should be ordered:
+
+1. Journal Name
+2. Year
+3. Volume
+4. First Page
+5. Author Name
+6. Citation Key
+
+
+An example query:
+
++---------------+--------------------------+
+| Parameter     | Value                    |
++===============+==========================+
+| Journal Title | proc natl acad sci u s a |
++---------------+--------------------------+
+| Year          | 1991                     |
++---------------+--------------------------+
+| Volume        | 88                       |
++---------------+--------------------------+
+| First Page    | 3248                     |
++---------------+--------------------------+
+| Author Name   | mann bj                  |
++---------------+--------------------------+
+| Citation Key  | citation_1               |
++---------------+--------------------------+
+
+
+@REFERENCES@
+
+@DISCLAIMER@
+      ]]></help>
+  <expand macro="citations"/>
+</tool>

diff -r 000000000000 -r 68cd8d564e0a efetch.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/efetch.py Thu Jul 07 02:39:21 2016 -0400

[

@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+import argparse
+import eutils
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='EFetch', epilog='')
+    parser.add_argument('db', help='Database to use')
+    parser.add_argument('--user_email', help="User email")
+    parser.add_argument('--admin_email', help="Admin email")
+
+    # ID source
+    parser.add_argument('--id_list', help='list of ids')
+    parser.add_argument('--id', help='Comma separated individual IDs')
+    parser.add_argument('--history_file', help='Fetch results from previous query')
+
+    # Output
+    parser.add_argument('--retmode', help='Retmode')
+    parser.add_argument('--rettype', help='Rettype')
+    args = parser.parse_args()
+
+    c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email)
+    merged_ids = c.parse_ids(args.id_list, args.id, args.history_file)
+
+    payload = {}
+    if args.history_file is not None:
+        payload.update(c.get_history())
+    else:
+        payload['id'] = ','.join(merged_ids)
+
+    for attr in ('retmode', 'rettype'):
+        if getattr(args, attr, None) is not None:
+            payload[attr] = getattr(args, attr)
+
+    c.fetch(args.db, ftype=args.retmode, **payload)

diff -r 000000000000 -r 68cd8d564e0a egquery.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/egquery.py Thu Jul 07 02:39:21 2016 -0400

@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+import argparse
+import eutils
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='EGQuery', epilog='')
+    parser.add_argument('term', help='Query')
+    #
+    parser.add_argument('--user_email', help="User email")
+    parser.add_argument('--admin_email', help="Admin email")
+    args = parser.parse_args()
+
+    c = eutils.Client(user_email=args.user_email, admin_email=args.admin_email)
+
+    payload = {
+        'term': args.term,
+    }
+    results = c.gquery(**payload)
+    print results

diff -r 000000000000 -r 68cd8d564e0a einfo.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/einfo.py Thu Jul 07 02:39:21 2016 -0400

[

@@ -0,0 +1,18 @@
+#!/usr/bin/env python
+import argparse
+import eutils
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='EInfo', epilog='')
+    parser.add_argument('--db', help='Database to use')
+    parser.add_argument('--user_email', help="User email")
+    parser.add_argument('--admin_email', help="Admin email")
+    args = parser.parse_args()
+
+    c = eutils.Client(user_email=args.user_email, admin_email=args.admin_email)
+    payload = {}
+    if args.db is not None:
+        payload['db'] = args.db
+        payload['version'] = '2.0'
+    print c.info(**payload)

diff -r 000000000000 -r 68cd8d564e0a elink.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/elink.py Thu Jul 07 02:39:21 2016 -0400

[

@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+import argparse
+import json
+
+import eutils
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='EFetch', epilog='')
+    parser.add_argument('db', help='Database to use, sometimes "none" (e.g. *check)')
+    parser.add_argument('dbfrom', help='Database containing input UIDs')
+    parser.add_argument('cmd', choices=['neighbor', 'neighbor_score',
+                                        'neighbor_history', 'acheck', 'ncheck', 'lcheck',
+                                        'llinks', 'llinkslib', 'prlinks'],
+                        help='ELink command mode')
+    # Only used in case of neighbor_history
+    parser.add_argument('--history_out', type=argparse.FileType('w'),
+                        help='Output history file', default='-')
+
+    parser.add_argument('--user_email', help="User email")
+    parser.add_argument('--admin_email', help="Admin email")
+    # ID Sources
+    parser.add_argument('--id_list', help='list of ids')
+    parser.add_argument('--id', help='Comma separated individual IDs')
+    parser.add_argument('--history_file', help='Fetch results from previous query')
+
+    # TODO: dates, linkname, term, holding
+    # neighbor or neighbor_history and dbfrom is pubmed
+    # parser.add_argument('--datetype', help='Date type')
+    # parser.add_argument('--reldate', help='In past N days')
+    # parser.add_argument('--mindate', help='Minimum date')
+    # parser.add_argument('--maxdate', help='maximum date')
+
+    # Output
+    args = parser.parse_args()
+
+    c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email)
+    merged_ids = c.parse_ids(args.id_list, args.id, args.history_file)
+
+    payload = {
+        'dbfrom': args.dbfrom,
+        'cmd': args.cmd,
+    }
+    if args.history_file is not None:
+        payload.update(c.get_history())
+    else:
+        payload['id'] = ','.join(merged_ids)
+
+    # DB can be 'none' in a few cases.
+    if args.db != "none":
+        payload['db'] = args.db
+
+    results = c.link(**payload)
+
+    if args.cmd == "neighbor_history":
+        history = c.extract_history(results)
+        args.history_out.write(json.dumps(history, indent=4))
+
+    print results

diff -r 000000000000 -r 68cd8d564e0a epost.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/epost.py Thu Jul 07 02:39:21 2016 -0400

[

@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+import argparse
+import eutils
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='EPost', epilog='')
+    parser.add_argument('db', help='Database to use')
+    parser.add_argument('--id_list', help='list of ids')
+    parser.add_argument('--id', help='Comma separated individual IDs')
+    parser.add_argument('--history_file', help='Post to new QueryKey in an existing WebEnv')
+    parser.add_argument('--user_email', help="User email")
+    parser.add_argument('--admin_email', help="Admin email")
+
+    args = parser.parse_args()
+
+    c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email)
+    merged_ids = c.parse_ids(args.id_list, args.id, args.history_file)
+
+    payload = {}
+    if args.history_file is not None:
+        payload.update(c.get_history())
+    else:
+        payload['id'] = ','.join(merged_ids)
+        payload['WebEnv'] = ''
+
+    print c.post(args.db, **payload)

diff -r 000000000000 -r 68cd8d564e0a esearch.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/esearch.py Thu Jul 07 02:39:21 2016 -0400

[

@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+import json
+import argparse
+import eutils
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='ESearch', epilog='')
+    parser.add_argument('db', help='Database to use')
+    parser.add_argument('term', help='Query')
+    parser.add_argument('--history_file', help='Filter existing history')
+    parser.add_argument('--datetype', help='Date type')
+    parser.add_argument('--reldate', help='In past N days')
+    parser.add_argument('--mindate', help='Minimum date')
+    parser.add_argument('--maxdate', help='maximum date')
+    # History
+    parser.add_argument('--history_out', type=argparse.FileType('w'),
+                        help='Output history file')
+    parser.add_argument('--user_email', help="User email")
+    parser.add_argument('--admin_email', help="Admin email")
+    args = parser.parse_args()
+
+    c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email)
+
+    payload = {
+        'db': args.db,
+        'term': args.term,
+        'retstart': 0,
+        'retmax': 20,
+        # hmmm @ retmax
+    }
+    if args.history_file is not None:
+        payload.update(c.get_history())
+    if args.history_out is not None:
+        payload['usehistory'] = 'y'
+
+    for attr in ('datetype', 'reldate', 'mindate', 'maxdate'):
+        if getattr(args, attr, None) is not None:
+            payload[attr] = getattr(args, attr)
+
+    results = c.search(**payload)
+
+    if args.history_out is not None:
+        history = c.extract_history(results)
+        args.history_out.write(json.dumps(history, indent=4))
+
+    print results

diff -r 000000000000 -r 68cd8d564e0a esummary.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/esummary.py Thu Jul 07 02:39:21 2016 -0400

[

@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+import argparse
+import eutils
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='ESummary', epilog='')
+    parser.add_argument('db', help='Database to use')
+    parser.add_argument('--id_list', help='list of ids')
+    parser.add_argument('--id', help='Comma separated individual IDs')
+    parser.add_argument('--history_file', help='Filter existing history')
+    parser.add_argument('--user_email', help="User email")
+    parser.add_argument('--admin_email', help="Admin email")
+    args = parser.parse_args()
+
+    c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email)
+
+    merged_ids = c.parse_ids(args.id_list, args.id, args.history_file)
+
+    payload = {
+        'db': args.db,
+    }
+
+    if args.history_file is not None:
+        payload.update(c.get_history())
+    else:
+        payload['id'] = ','.join(merged_ids)
+
+    print c.summary(**payload)

diff -r 000000000000 -r 68cd8d564e0a eutils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/eutils.py Thu Jul 07 02:39:21 2016 -0400

[

@@ -0,0 +1,127 @@
+import os
+import json
+import StringIO
+from Bio import Entrez
+Entrez.tool = "GalaxyEutils_1_0"
+BATCH_SIZE = 200
+
+
+class Client(object):
+
+    def __init__(self, history_file=None, user_email=None, admin_email=None):
+        self.using_history = False
+
+        if user_email is not None and admin_email is not None:
+            Entrez.email = ';'.join((admin_email, user_email))
+        elif user_email is not None:
+            Entrez.email = user_email
+        elif admin_email is not None:
+            Entrez.email = admin_email
+        else:
+            Entrez.email = os.environ.get('NCBI_EUTILS_CONTACT', None)
+
+        if Entrez.email is None:
+            raise Exception("Cannot continue without an email; please set "
+                            "administrator email in NCBI_EUTILS_CONTACT")
+
+        if history_file is not None:
+            with open(history_file, 'r') as handle:
+                data = json.loads(handle.read())
+                self.query_key = data['QueryKey']
+                self.webenv = data['WebEnv']
+                self.using_history = True
+
+    def get_history(self):
+        if not self.using_history:
+            return {}
+        else:
+            return {
+                'query_key': self.query_key,
+                'WebEnv': self.webenv,
+            }
+
+    def post(self, database, **payload):
+        return json.dumps(Entrez.read(Entrez.epost(database, **payload)), indent=4)
+
+    def fetch(self, db, ftype=None, **payload):
+        os.makedirs("downloads")
+
+        if 'id' in payload:
+            summary = self.id_summary(db, payload['id'])
+        else:
+            summary = self.history_summary(db)
+
+        count = len(summary)
+        payload['retmax'] = BATCH_SIZE
+
+        # This may be bad. I'm not sure yet. I think it will be ... but UGH.
+        for i in range(0, count, BATCH_SIZE):
+            payload['retstart'] = i
+            file_path = os.path.join('downloads', 'EFetch Results Chunk %s.%s' % (i, ftype))
+            with open(file_path, 'w') as handle:
+                handle.write(Entrez.efetch(db, **payload).read())
+
+    def id_summary(self, db, id_list):
+        payload = {
+            'db': db,
+            'id': id_list,
+        }
+        return Entrez.read(Entrez.esummary(**payload))
+
+    def history_summary(self, db):
+        if not self.using_history:
+            raise Exception("History must be available for this method")
+
+        payload = {
+            'db': db,
+            'query_key': self.query_key,
+            'WebEnv': self.webenv,
+        }
+        return Entrez.read(Entrez.esummary(**payload))
+
+    def summary(self, **payload):
+        return Entrez.esummary(**payload).read()
+
+    def link(self, **payload):
+        return Entrez.elink(**payload).read()
+
+    def extract_history(self, xml_data):
+        parsed_data = Entrez.read(StringIO.StringIO(xml_data))
+        history = {}
+        for key in ('QueryKey', 'WebEnv'):
+            if key in parsed_data:
+                history[key] = parsed_data[key]
+
+        return history
+
+    def search(self, **payload):
+        return Entrez.esearch(**payload).read()
+
+    def info(self, **kwargs):
+        return Entrez.einfo(**kwargs).read()
+
+    def gquery(self, **kwargs):
+        return Entrez.egquery(**kwargs).read()
+
+    def citmatch(self, **kwargs):
+        return Entrez.ecitmatch(**kwargs).read()
+
+    @classmethod
+    def parse_ids(cls, id_list, id, history_file):
+        """Parse IDs passed on --cli or in a file passed to the cli
+        """
+        merged_ids = []
+        if id is not None:
+            for pid in id.replace('__cn__', ',').replace('\n', ',').split(','):
+                if pid is not None and len(pid) > 0:
+                    merged_ids.append(pid)
+
+        if id_list is not None:
+            with open(id_list, 'r') as handle:
+                merged_ids += [x.strip() for x in handle.readlines()]
+
+        # Exception hanlded here for uniformity
+        if len(merged_ids) == 0 and history_file is None:
+            raise Exception("Must provide history file or IDs")
+
+        return merged_ids

diff -r 000000000000 -r 68cd8d564e0a eutils.pyc

Binary file eutils.pyc has changed

diff -r 000000000000 -r 68cd8d564e0a macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Jul 07 02:39:21 2016 -0400

[

b'@@ -0,0 +1,847 @@\n+<?xml version="1.0"?>\n+<macros>\n+ <token name="@WRAPPER_VERSION@">1.1</token>\n+ <token name="@EMAIL_ARGUMENTS@">\n+--user_email "$__user_email__"\n+#set admin_emails = \';\'.join(str($__admin_users__).split(\',\'))\n+--admin_email "$admin_emails"\n+ </token>\n+ \n+ <token name="@REFERENCES@"><![CDATA[\n+ ]]></token>\n+ <token name="@DISCLAIMER@"><![CDATA[\n+Usage Guidelines and Requirements\n+=================================\n+\n+Frequency, Timing, and Registration of E-utility URL Requests\n+-------------------------------------------------------------\n+\n+In order not to overload the E-utility servers, NCBI recommends that users\n+limit large jobs to either weekends or between 9:00 PM and 5:00 AM Eastern time\n+during weekdays. Failure to comply with this policy may result in an IP address\n+being blocked from accessing NCBI.\n+\n+Minimizing the Number of Requests\n+---------------------------------\n+\n+If a task requires searching for and/or downloading a large number of\n+records, it is much more efficient to use the Entrez History to upload\n+and/or retrieve these records in batches rather than using separate\n+requests for each record. Please refer to Application 3 in Chapter 3\n+for an example. Many thousands of IDs can be uploaded using a single\n+EPost request, and several hundred records can be downloaded using one\n+EFetch request.\n+\n+\n+Disclaimer and Copyright Issues\n+-------------------------------\n+\n+In accordance with requirements of NCBI\'s E-Utilities, we must provide\n+the following disclaimer:\n+\n+Please note that abstracts in PubMed may incorporate material that may\n+be protected by U.S. and foreign copyright laws. All persons\n+reproducing, redistributing, or making commercial use of this\n+information are expected to adhere to the terms and conditions asserted\n+by the copyright holder. Transmission or reproduction of protected\n+items beyond that allowed by fair use (PDF) as defined in the copyright\n+laws requires the written permission of the copyright owners. NLM\n+provides no legal advice concerning distribution of copyrighted\n+materials. Please consult your legal counsel. If you wish to do a large\n+data mining project on PubMed data, you can enter into a licensing\n+agreement and lease the data for free from NLM. For more information on\n+this please see `http://www.nlm.nih.gov/databases/leased.html <http://www.nlm.nih.gov/databases/leased.html>`__\n+\n+The `full disclaimer <http://www.ncbi.nlm.nih.gov/About/disclaimer.html>`__ is available on\n+their website\n+\n+Liability\n+~~~~~~~~~\n+\n+For documents and software available from this server, the\n+U.S. Government does not warrant or assume any legal liability or\n+responsibility for the accuracy, completeness, or usefulness of any\n+information, apparatus, product, or process disclosed.\n+\n+Endorsement\n+~~~~~~~~~~~\n+\n+NCBI does not endorse or recommend any commercial\n+products, processes, or services. The views and opinions of authors\n+expressed on NCBI\'s Web sites do not necessarily state or reflect those\n+of the U.S. Government, and they may not be used for advertising or\n+product endorsement purposes.\n+\n+External Links\n+~~~~~~~~~~~~~~\n+\n+Some NCBI Web pages may provide links to other Internet\n+sites for the convenience of users. NCBI is not responsible for the\n+availability or content of these external sites, nor does NCBI endorse,\n+warrant, or guarantee the products, services, or information described\n+or offered at these other Internet sites. Users cannot assume that the\n+external sites will abide by the same Privacy Policy to which NCBI\n+adheres. It is the responsibility of the user to examine the copyright\n+and licensing restrictions of linked pages and to secure all necessary\n+permissions.\n+ ]]></token>\n+ <xml name="dbselect"\n+ token_name="db_select"\n+ token_label="NCBI Database to Use"\n+ >\n+ <param name="@NAME@" type="select" label="@LABEL@">\n+ <option value="annotinfo">Annotation Information</option>'..b'output_format" value="ipg-text" format="text"/>\n+ <when input="output_format" value="uilist-xml" format="xml"/>\n+ <when input="output_format" value="docsum-xml" format="xml"/>\n+ <when input="output_format" value="rsr-tabular" format="tabular"/>\n+ <when input="output_format" value="uilist-text" format="text"/>\n+ <when input="output_format" value="gb-text" format="text"/>\n+ <when input="output_format" value="chr-text" format="text"/>\n+ <when input="output_format" value="alignmentscores-text" format="text"/>\n+ <when input="output_format" value="native-asn.1" format="asn1"/>\n+ <when input="output_format" value="gp-xml" format="xml"/>\n+ <when input="output_format" value="tabular-tabular" format="tabular"/>\n+ <when input="output_format" value="ssexemplar-text" format="text"/>\n+ <when input="output_format" value="docsum-json" format="json"/>\n+ <when input="output_format" value="fasta-xml" format="xml"/>\n+ <when input="output_format" value="runinfo-xml" format="xml"/>\n+ <when input="output_format" value="flt-text" format="text"/>\n+ <when input="output_format" value="fasta-fasta" format="fasta"/>\n+ <when input="output_format" value="full-text" format="text"/>\n+ <when input="output_format" value="gb-xml" format="xml"/>\n+ <when input="output_format" value="abstract-xml" format="xml"/>\n+ <when input="output_format" value="full-xml" format="xml"/>\n+ <when input="output_format" value="ft-text" format="text"/>\n+ <when input="output_format" value="homologene-text" format="text"/>\n+ <when input="output_format" value="est-xml" format="xml"/>\n+ <when input="output_format" value="gene_table-xml" format="xml"/>\n+ <when input="output_format" value="docset-text" format="text"/>\n+ <when input="output_format" value="native-xml" format="xml"/>\n+ </change_format>\n+ </xml>\n+ <token name="@LIST_OR_HIST@">\n+#if $query_source.qss == "history":\n+ --history_file $query_source.history_file\n+#else if $query_source.qss == "id_file":\n+ --id_list $query_source.id_file\n+#else if $query_source.qss == "id_list":\n+ --id $query_source.id_list\n+#end if\n+ </token>\n+ <xml name="list_or_hist">\n+ <conditional name="query_source">\n+ <param name="qss" type="select" label="Select source for IDs">\n+ <option value="history">NCBI WebEnv History</option>\n+ <option value="id_file">File containing IDs (one per line)</option>\n+ <option value="id_list">Direct Entry</option>\n+ </param>\n+ <when value="history">\n+ <param label="History File" name="history_file" type="data" format="json"/>\n+ </when>\n+ <when value="id_file">\n+ <param label="ID List" name="id_file" type="data" format="text,tabular"/>\n+ </when>\n+ <when value="id_list">\n+ <param label="ID List" name="id_list" type="text" area="true" help="Newline/Comma separated list of IDs"/>\n+ </when>\n+ </conditional>\n+ </xml>\n+ <xml name="history_out">\n+ <data format="json" name="history" label="NCBI Entrez WebEnv History">\n+ <yield/>\n+ </data>\n+ </xml>\n+ <xml name="citations">\n+ <citations>\n+ <citation type="bibtex">@Book{ncbiEutils,\n+ author = {Eric Sayers},\n+ title = {Entrez Programming Utilities Help},\n+ year = {2010},\n+ publisher = {National Center for Biotechnology Information, Bethesda, Maryland},\n+ note = {http://ww.ncbi.nlm.nih.gov/books/NBK25500/}\n+ }</citation>\n+ </citations>\n+ </xml>\n+ <xml name="requirements">\n+ <requirements>\n+ <requirement type="package" version="2.7">python</requirement>\n+ <requirement type="package" version="1.66">biopython</requirement>\n+ </requirements>\n+ </xml>\n+ <xml name="linkname">\n+ <param name="linkname" type="select" label="To NCBI Database">\n+ \n+ </param>\n+ </xml>\n+</macros>\n'

diff -r 000000000000 -r 68cd8d564e0a test-data/ecitmatch.results.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ecitmatch.results.tsv Thu Jul 07 02:39:21 2016 -0400

@@ -0,0 +1,2 @@
+ 1991 88 3248 mann bj citation_1 2014248
+

diff -r 000000000000 -r 68cd8d564e0a test-data/ecitmatch.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ecitmatch.tsv Thu Jul 07 02:39:21 2016 -0400

@@ -0,0 +1,2 @@
+#journal year volume first page author key
+proc natl acad sci u s a 1991 88 3248 mann bj citation_1

diff -r 000000000000 -r 68cd8d564e0a test-data/egquery.1.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/egquery.1.xml Thu Jul 07 02:39:21 2016 -0400

@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE Result PUBLIC "-//NLM//DTD eSearchResult, January 2004//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/egquery.dtd">
+<Result>
+
+    <Term>bacteriophage</Term>
+
+    <eGQueryResult>

diff -r 000000000000 -r 68cd8d564e0a test-data/esearch.pubmed.2014-01-pnas.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/esearch.pubmed.2014-01-pnas.xml Thu Jul 07 02:39:21 2016 -0400

[

@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD esearch 20060628//EN" "http://eutils.ncbi.nlm.nih.gov/eutils/dtd/20060628/esearch.dtd">
+<eSearchResult><Count>524</Count><RetMax>20</RetMax><RetStart>0</RetStart><IdList>
+<Id>24620368</Id>
+<Id>24613929</Id>
+<Id>24596955</Id>
+<Id>24596954</Id>
+<Id>24571024</Id>
+<Id>24555201</Id>
+<Id>24555200</Id>
+<Id>24550301</Id>
+<Id>24520173</Id>
+<Id>24520172</Id>
+<Id>24497494</Id>
+<Id>24497493</Id>
+<Id>24488973</Id>
+<Id>24488972</Id>
+<Id>24488971</Id>
+<Id>24481254</Id>
+<Id>24481253</Id>
+<Id>24481252</Id>
+<Id>24477693</Id>
+<Id>24477692</Id>
+</IdList><TranslationSet><Translation> <From>PNAS[ta]</From> <To>"Proc Natl Acad Sci U S A"[Journal]</To> </Translation></TranslationSet><TranslationStack> <TermSet> <Term>"Proc Natl Acad Sci U S A"[Journal]</Term> <Field>Journal</Field> <Count>124812</Count> <Explode>N</Explode> </TermSet> <TermSet> <Term>2014/01/01[PDAT]</Term> <Field>PDAT</Field> <Count>0</Count> <Explode>N</Explode> </TermSet> <TermSet> <Term>2014/02/01[PDAT]</Term> <Field>PDAT</Field> <Count>0</Count> <Explode>N</Explode> </TermSet> <OP>RANGE</OP> <OP>AND</OP> </TranslationStack><QueryTranslation>"Proc Natl Acad Sci U S A"[Journal] AND 2014/01/01[PDAT] : 2014/02/01[PDAT]</QueryTranslation></eSearchResult>
+

diff -r 000000000000 -r 68cd8d564e0a test-data/esearch.pubmed.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/esearch.pubmed.xml Thu Jul 07 02:39:21 2016 -0400

[

@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD esearch 20060628//EN" "http://eutils.ncbi.nlm.nih.gov/eutils/dtd/20060628/esearch.dtd">
+<eSearchResult><Count>2651</Count><RetMax>20</RetMax><RetStart>0</RetStart><IdList>
+<Id>16578858</Id>
+<Id>11186225</Id>
+<Id>11121081</Id>
+<Id>11121080</Id>
+<Id>11121079</Id>
+<Id>11121078</Id>
+<Id>11121077</Id>
+<Id>11121076</Id>
+<Id>11121075</Id>
+<Id>11121074</Id>
+<Id>11121073</Id>
+<Id>11121072</Id>
+<Id>11121071</Id>
+<Id>11121070</Id>
+<Id>11121069</Id>
+<Id>11121068</Id>
+<Id>11121067</Id>
+<Id>11121066</Id>
+<Id>11121065</Id>
+<Id>11121064</Id>
+</IdList><TranslationSet><Translation> <From>PNAS[ta]</From> <To>"Proc Natl Acad Sci U S A"[Journal]</To> </Translation></TranslationSet><TranslationStack> <TermSet> <Term>"Proc Natl Acad Sci U S A"[Journal]</Term> <Field>Journal</Field> <Count>124812</Count> <Explode>N</Explode> </TermSet> <TermSet> <Term>97[vi]</Term> <Field>vi</Field> <Count>77218</Count> <Explode>N</Explode> </TermSet> <OP>AND</OP> <OP>GROUP</OP> </TranslationStack><QueryTranslation>"Proc Natl Acad Sci U S A"[Journal] AND 97[vi]</QueryTranslation></eSearchResult>
+

diff -r 000000000000 -r 68cd8d564e0a test-data/esummary.tax.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/esummary.tax.xml Thu Jul 07 02:39:21 2016 -0400

@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE eSummaryResult PUBLIC "-//NLM//DTD esummary v1 20041029//EN" "http://eutils.ncbi.nlm.nih.gov/eutils/dtd/20041029/esummary-v1.dtd">
+<eSummaryResult>
+<DocSum>
+ <Id>10239</Id>
+ <Item Name="Status" Type="String">active</Item>
+ <Item Name="Rank" Type="String">superkingdom</Item>
+ <Item Name="Division" Type="String">viruses</Item>
+ <Item Name="ScientificName" Type="String">Viruses</Item>
+ <Item Name="CommonName" Type="String"></Item>
+ <Item Name="TaxId" Type="Integer">10239</Item>
+ <Item Name="AkaTaxId" Type="Integer">0</Item>
+ <Item Name="Genus" Type="String"></Item>
+ <Item Name="Species" Type="String"></Item>
+ <Item Name="Subsp" Type="String"></Item>
+ <Item Name="ModificationDate" Type="Date">2010/11/23 00:00</Item>
+</DocSum>
+
+</eSummaryResult>
+

diff -r 000000000000 -r 68cd8d564e0a test-data/example.history.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/example.history.json Thu Jul 07 02:39:21 2016 -0400

@@ -0,0 +1,4 @@
+{
+ "QueryKey": "1",
+ "WebEnv": "NCID_1_9485527_130.14.22.215_9001_1430928295_33285243_0MetA0_S_MegaStore_F_1"
+}

diff -r 000000000000 -r 68cd8d564e0a test-data/pm-tax-neighbor.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pm-tax-neighbor.xml Thu Jul 07 02:39:21 2016 -0400

@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE eLinkResult PUBLIC "-//NLM//DTD elink 20101123//EN" "http://eutils.ncbi.nlm.nih.gov/eutils/dtd/20101123/elink.dtd">
+<eLinkResult>
+
+  <LinkSet>
+    <DbFrom>taxonomy</DbFrom>
+    <IdList>
+      <Id>510899</Id>
+    </IdList>
+
+    <LinkSetDb>
+      <DbTo>pubmed</DbTo>
+      <LinkName>taxonomy_pubmed_entrez</LinkName>
+
+        <Link>
+ <Id>22241621</Id>
+ </Link>
+
+    </LinkSetDb>
+
+
+  </LinkSet>
+</eLinkResult>
+

diff -r 000000000000 -r 68cd8d564e0a test-data/pubmed.metadata.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pubmed.metadata.xml Thu Jul 07 02:39:21 2016 -0400

@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE eInfoResult PUBLIC "-//NLM//DTD einfo 20130322//EN" "http://eutils.ncbi.nlm.nih.gov/eutils/dtd/20130322/einfo.dtd">
+<eInfoResult>
+ <DbInfo>
+ <DbName>pubmed</DbName>
+ <MenuName>PubMed</MenuName>
+ <Description>PubMed bibliographic record</Description>

diff -r 000000000000 -r 68cd8d564e0a test-data/viruses.tax.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/viruses.tax.xml Thu Jul 07 02:39:21 2016 -0400

@@ -0,0 +1,29 @@
+<?xml version="1.0"?>
+<!DOCTYPE TaxaSet PUBLIC "-//NLM//DTD Taxon, 14th January 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/taxon.dtd">
+<TaxaSet><Taxon>
+    <TaxId>10239</TaxId>
+    <ScientificName>Viruses</ScientificName>
+    <OtherNames>
+        <BlastName>viruses</BlastName>
+        <Synonym>Vira</Synonym>
+        <Synonym>Viridae</Synonym>
+    </OtherNames>
+    <ParentTaxId>1</ParentTaxId>
+    <Rank>superkingdom</Rank>
+    <Division>Viruses</Division>
+    <GeneticCode>
+        <GCId>1</GCId>
+        <GCName>Standard</GCName>
+    </GeneticCode>
+    <MitoGeneticCode>
+        <MGCId>0</MGCId>
+        <MGCName>Unspecified</MGCName>
+    </MitoGeneticCode>
+    <Lineage/>
+    <CreateDate>1995/02/27 09:24:00</CreateDate>
+    <UpdateDate>2010/11/23 11:40:11</UpdateDate>
+    <PubDate>1993/04/20 01:00:00</PubDate>
+</Taxon>
+
+</TaxaSet>
+

diff -r 000000000000 -r 68cd8d564e0a tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Thu Jul 07 02:39:21 2016 -0400

@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <set_environment version="1.0">
+    <environment_variable action="set_to" name="NCBI_EUTILS_CONTACT">/please set the administrator's contact email in the corresponding env.sh file/</environment_variable>
+  </set_environment>
+  <package name="biopython" version="1.66">
+    <repository changeset_revision="8433ee4531ff" name="package_biopython_1_66" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" />
+  </package>
+</tool_dependency>