# HG changeset patch
# User iuc
# Date 1600854506 0
# Node ID e267701c187b5c7be610fc8734c93752bbd2cdf9
# Parent c6096cd971201a8821d69b62aa82c38f97becb28
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_entrez_eutils commit dae34e5e182b4cceb808d7353080f14aa9a78ca9"
diff -r c6096cd97120 -r e267701c187b __efetch_build_options.py
--- a/__efetch_build_options.py Wed Mar 11 04:03:14 2020 -0400
+++ b/__efetch_build_options.py Wed Sep 23 09:48:26 2020 +0000
@@ -53,7 +53,7 @@
sra
structure
taxonomy
-unigene'''.replace( "", "").replace( "", "").split("\n")
+unigene'''.replace("", "").replace("", "").split("\n")
help = ''' (all)
diff -r c6096cd97120 -r e267701c187b ecitmatch.py
--- a/ecitmatch.py Wed Mar 11 04:03:14 2020 -0400
+++ b/ecitmatch.py Wed Sep 23 09:48:26 2020 +0000
@@ -1,5 +1,4 @@
#!/usr/bin/env python
-from __future__ import print_function
import argparse
@@ -17,6 +16,8 @@
parser.add_argument('--first_page', nargs='*', help='First Page')
parser.add_argument('--author_name', nargs='*', help='Author name')
+ parser.add_argument('--version', action='version', version=eutils.Client.getVersion(), help='Version (reports Biopython version)')
+
# Emails
parser.add_argument('--user_email', help="User email")
parser.add_argument('--admin_email', help="Admin email")
diff -r c6096cd97120 -r e267701c187b efetch.py
--- a/efetch.py Wed Mar 11 04:03:14 2020 -0400
+++ b/efetch.py Wed Sep 23 09:48:26 2020 +0000
@@ -1,36 +1,111 @@
#!/usr/bin/env python
+
import argparse
+import glob
+import json
+import logging
+import os
+
import eutils
+logging.basicConfig(level=logging.INFO)
+
+
+def handleEfetchException(e, db, payload):
+ logging.error('No results returned. This could either be due to no records matching the supplied IDs for the query database or it could be an error due to invalid parameters. The reported exception was "%s".\n\nPayload used for the efetch query to database "%s"\n\n%s', e, db, json.dumps(payload, indent=4))
+
+ # Create a file in the downloads folder so that the user can access run information
+ current_directory = os.getcwd()
+ final_directory = os.path.join(current_directory, r'downloads')
+ if not os.path.exists(final_directory):
+ os.makedirs(final_directory)
+
+ print('The following files were downloaded:')
+ print(os.listdir(final_directory))
+
+ file_path = os.path.join('downloads', 'no_results.txt')
+ with open(file_path, 'w') as handle:
+ handle.write('No results')
+
+
+def localFetch(db, gformat, newname, **payload):
+ problem = None
+ try:
+ c.fetch(db, **payload)
+
+ for chunk, file in enumerate(glob.glob('downloads/EFetch *')):
+ os.rename(file, '%s%s.%s' % (newname, chunk + 1, gformat))
+
+ except Exception as e:
+ problem = e
+ handleEfetchException(e, db, payload)
+ else:
+ print('The following files were downloaded:')
+ print(os.listdir('downloads'))
+
+ return problem
+
+
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='EFetch', epilog='')
parser.add_argument('db', help='Database to use')
parser.add_argument('--user_email', help="User email")
parser.add_argument('--admin_email', help="Admin email")
+ parser.add_argument('--version', action='version', version=eutils.Client.getVersion(), help='Version (reports Biopython version)')
+
# ID source
+ parser.add_argument('--id_json', help='list of ids in a json file as returned by esearch or elink')
+ parser.add_argument('--id_xml', help='list of ids in an xml file as returned by esearch or elink')
parser.add_argument('--id_list', help='list of ids')
parser.add_argument('--id', help='Comma separated individual IDs')
- parser.add_argument('--history_file', help='Fetch results from previous query')
+ parser.add_argument('--history_file', help='Fetch results from previous query (JSON)')
+ parser.add_argument('--history_xml', help='Fetch results from previous query (XML)')
# Output
parser.add_argument('--retmode', help='Retmode')
parser.add_argument('--rettype', help='Rettype')
+ parser.add_argument('--galaxy_format', help='Galaxy format')
args = parser.parse_args()
c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email)
- merged_ids = c.parse_ids(args.id_list, args.id, args.history_file)
payload = {}
- if args.history_file is not None:
- payload.update(c.get_history())
- else:
- payload['id'] = ','.join(merged_ids)
-
for attr in ('retmode', 'rettype'):
if getattr(args, attr, None) is not None:
payload[attr] = getattr(args, attr)
- c.fetch(args.db, ftype=args.retmode, **payload)
+ if args.history_file is not None or args.history_xml is not None:
+ if args.history_file is not None:
+ input_histories = c.get_histories()
+ else:
+ input_histories = c.extract_histories_from_xml_file(args.history_xml)
+
+ problem = None
+ for hist in input_histories:
+ qkey = hist['query_key']
+ tmp_payload = payload
+ tmp_payload.update(hist)
+ newname = 'downloads/EFetch-%s-%s-querykey%s-chunk' % (args.rettype, args.retmode, qkey)
+ problem = localFetch(args.db, args.galaxy_format, newname, **tmp_payload)
+
+ if os.path.exists('downloads'):
+ os.rename('downloads', 'downloads-qkey%s' % (qkey))
+
+ if not os.path.exists('downloads'):
+ os.makedirs('downloads')
+
+ for relpath in glob.glob('downloads-qkey*/*'):
+ file = os.path.basename(relpath)
+ os.rename(relpath, 'downloads/%s' % (file))
+
+ if problem is not None:
+ raise(problem)
+
+ else:
+ merged_ids = c.parse_ids(args.id_list, args.id, args.history_file, args.id_xml, args.id_json)
+ payload['id'] = ','.join(merged_ids)
+ newname = 'downloads/EFetch-%s-%s-chunk' % (args.rettype, args.retmode)
+ localFetch(args.db, args.galaxy_format, newname, **payload)
diff -r c6096cd97120 -r e267701c187b egquery.py
--- a/egquery.py Wed Mar 11 04:03:14 2020 -0400
+++ b/egquery.py Wed Sep 23 09:48:26 2020 +0000
@@ -1,5 +1,4 @@
#!/usr/bin/env python
-from __future__ import print_function
import argparse
@@ -9,9 +8,12 @@
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='EGQuery', epilog='')
parser.add_argument('term', help='Query')
- #
+
parser.add_argument('--user_email', help="User email")
parser.add_argument('--admin_email', help="Admin email")
+
+ parser.add_argument('--version', action='version', version=eutils.Client.getVersion(), help='Version (reports Biopython version)')
+
args = parser.parse_args()
c = eutils.Client(user_email=args.user_email, admin_email=args.admin_email)
diff -r c6096cd97120 -r e267701c187b einfo.py
--- a/einfo.py Wed Mar 11 04:03:14 2020 -0400
+++ b/einfo.py Wed Sep 23 09:48:26 2020 +0000
@@ -1,5 +1,4 @@
#!/usr/bin/env python
-from __future__ import print_function
import argparse
@@ -11,6 +10,7 @@
parser.add_argument('--db', help='Database to use')
parser.add_argument('--user_email', help="User email")
parser.add_argument('--admin_email', help="Admin email")
+ parser.add_argument('--version', action='version', version=eutils.Client.getVersion(), help='Version (reports Biopython version)')
args = parser.parse_args()
c = eutils.Client(user_email=args.user_email, admin_email=args.admin_email)
diff -r c6096cd97120 -r e267701c187b elink.py
--- a/elink.py Wed Mar 11 04:03:14 2020 -0400
+++ b/elink.py Wed Sep 23 09:48:26 2020 +0000
@@ -1,12 +1,16 @@
#!/usr/bin/env python
-from __future__ import print_function
import argparse
import json
+import logging
+import os
import eutils
+logging.basicConfig(level=logging.INFO)
+
+
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='EFetch', epilog='')
parser.add_argument('db', help='Database to use, sometimes "none" (e.g. *check)')
@@ -15,16 +19,23 @@
'neighbor_history', 'acheck', 'ncheck', 'lcheck',
'llinks', 'llinkslib', 'prlinks'],
help='ELink command mode')
- # Only used in case of neighbor_history
- parser.add_argument('--history_out', type=argparse.FileType('w'),
- help='Output history file', default='-')
+
+ parser.add_argument('--version', action='version', version=eutils.Client.getVersion(), help='Version (reports Biopython version)')
parser.add_argument('--user_email', help="User email")
parser.add_argument('--admin_email', help="Admin email")
+
# ID Sources
+ parser.add_argument('--id_xml', help='list of ids in an xml file as returned by esearch or elink')
+ parser.add_argument('--id_json', help='list of ids in a json file as returned by esearch or elink')
parser.add_argument('--id_list', help='list of ids')
parser.add_argument('--id', help='Comma separated individual IDs')
parser.add_argument('--history_file', help='Fetch results from previous query')
+ parser.add_argument('--history_xml', help='Fetch results from previous query')
+
+ # Optional
+ parser.add_argument('--linkname', help='Restrict results to a specific link source')
+ parser.add_argument('--retmode', choices=['xml', 'json', 'uilist'], help='Output format')
# TODO: dates, linkname, term, holding
# neighbor or neighbor_history and dbfrom is pubmed
@@ -37,25 +48,91 @@
args = parser.parse_args()
c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email)
- merged_ids = c.parse_ids(args.id_list, args.id, args.history_file)
payload = {
'dbfrom': args.dbfrom,
'cmd': args.cmd,
}
- if args.history_file is not None:
- payload.update(c.get_history())
- else:
- payload['id'] = ','.join(merged_ids)
# DB can be 'none' in a few cases.
if args.db != "none":
payload['db'] = args.db
- results = c.link(**payload)
+ if args.linkname is not None:
+ payload['linkname'] = args.linkname
+
+ results = []
+ qkeys = []
+ if args.history_file is not None or args.history_xml is not None:
+ payload['retmode'] = args.retmode
+ if args.history_file is not None:
+ input_histories = c.get_histories()
+ else:
+ input_histories = c.extract_histories_from_xml_file(args.history_xml)
+ for hist in input_histories:
+ qkeys += [hist['query_key']]
+ tmp_payload = payload
+ tmp_payload.update(hist)
+ results += [c.link(**tmp_payload)]
+ else:
+ # There is no uilist retmode
+ if args.retmode == "uilist":
+ payload['retmode'] = 'xml'
+ else:
+ payload['retmode'] = args.retmode
+ merged_ids = c.parse_ids(args.id_list, args.id, args.history_file, args.id_xml, args.id_json)
+ payload['id'] = ','.join(merged_ids)
+ qkeys += [1]
+ results += [c.link(**payload)]
+
+ # There could be multiple sets of results if a history was supplied
+ if args.history_file is not None or args.history_xml is not None:
+ # Multiple result sets can be returned
+ # Create a directory for the output files
+ current_directory = os.getcwd()
+ final_directory = os.path.join(current_directory, r'downloads')
+ if not os.path.exists(final_directory):
+ os.makedirs(final_directory)
- if args.cmd == "neighbor_history":
- history = c.extract_history(results)
- args.history_out.write(json.dumps(history, indent=4))
-
- print(results)
+ logging.info("Writing files:")
+ # When rettype is uilist, convert to text format (which elink does not do)
+ count = 0
+ if args.retmode == 'uilist':
+ for result in results:
+ qkey = qkeys[count]
+ count += 1
+ ids = c.xmlstring2UIlist(result)
+ file_path = os.path.join('downloads', '%s-querykey%s.tabular' % (args.db, qkey))
+ logging.info('%s.tabular' % (args.db))
+ with open(file_path, 'w') as handle:
+ for id in ids:
+ handle.write(id)
+ handle.write(os.linesep)
+ elif args.retmode == 'json':
+ for result in results:
+ qkey = qkeys[count]
+ count += 1
+ file_path = os.path.join('downloads', '%s-querykey%s.json' % (args.db, qkey))
+ logging.info('%s-link%s.json' % (args.db, count))
+ with open(file_path, 'w') as handle:
+ json_data = c.jsonstring2jsondata(result)
+ handle.write(json.dumps(json_data, indent=4))
+ else:
+ for result in results:
+ qkey = qkeys[count]
+ count += 1
+ file_path = os.path.join('downloads', '%s-querykey%s.xml' % (args.db, qkey))
+ logging.info('%s-link%s.xml' % (args.db, count))
+ with open(file_path, 'w') as handle:
+ handle.write(result)
+ else:
+ # When rettype is uilist, convert to text format (which elink does not do)
+ if args.retmode == 'uilist':
+ ids = c.xmlstring2UIlist(results[0])
+ for id in ids:
+ print(id)
+ elif args.retmode == 'json':
+ json_data = c.jsonstring2jsondata(results[0])
+ print(json.dumps(json_data, indent=4))
+ else:
+ print(results[0])
diff -r c6096cd97120 -r e267701c187b epost.py
--- a/epost.py Wed Mar 11 04:03:14 2020 -0400
+++ b/epost.py Wed Sep 23 09:48:26 2020 +0000
@@ -1,5 +1,4 @@
#!/usr/bin/env python
-from __future__ import print_function
import argparse
@@ -9,22 +8,37 @@
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='EPost', epilog='')
parser.add_argument('db', help='Database to use')
+ parser.add_argument('--user_email', help="User email")
+ parser.add_argument('--admin_email', help="Admin email")
+
+ parser.add_argument('--version', action='version', version=eutils.Client.getVersion(), help='Version (reports Biopython version)')
+
+ # ID source
parser.add_argument('--id_list', help='list of ids')
parser.add_argument('--id', help='Comma separated individual IDs')
- parser.add_argument('--history_file', help='Post to new QueryKey in an existing WebEnv')
- parser.add_argument('--user_email', help="User email")
- parser.add_argument('--admin_email', help="Admin email")
+ parser.add_argument('--id_json', help='list of ids in a json file as returned by esearch or elink')
+ parser.add_argument('--id_xml', help='list of ids in an xml file as returned by esearch or elink')
+
+ # Target history
+ parser.add_argument('--history_xml', help='Post to new QueryKey in an existing WebEnv (XML)')
+ parser.add_argument('--history_file', help='Post to new QueryKey in an existing WebEnv (JSON)')
+ parser.add_argument('--webenv', help='Post to new WebEnv (History ID)')
args = parser.parse_args()
c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email)
- merged_ids = c.parse_ids(args.id_list, args.id, args.history_file)
payload = {}
if args.history_file is not None:
- payload.update(c.get_history())
- else:
- payload['id'] = ','.join(merged_ids)
- payload['WebEnv'] = ''
+ hist = c.get_history()
+ payload['WebEnv'] = hist['WebEnv']
+ elif args.history_xml is not None:
+ hist = c.extract_history_from_xml_file(args.history_xml)
+ payload['WebEnv'] = hist['WebEnv']
+ elif args.webenv is not None:
+ payload['WebEnv'] = args.webenv
+
+ merged_ids = c.parse_ids(args.id_list, args.id, None, args.id_xml, args.id_json)
+ payload['id'] = ','.join(merged_ids)
print(c.post(args.db, **payload))
diff -r c6096cd97120 -r e267701c187b esearch.py
--- a/esearch.py Wed Mar 11 04:03:14 2020 -0400
+++ b/esearch.py Wed Sep 23 09:48:26 2020 +0000
@@ -1,12 +1,16 @@
#!/usr/bin/env python
-from __future__ import print_function
import argparse
import json
+import logging
+
import eutils
+logging.basicConfig(level=logging.INFO)
+
+
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='ESearch', epilog='')
parser.add_argument('db', help='Database to use')
@@ -17,34 +21,54 @@
parser.add_argument('--mindate', help='Minimum date')
parser.add_argument('--maxdate', help='maximum date')
# History
- parser.add_argument('--history_out', type=argparse.FileType('w'),
- help='Output history file')
+ parser.add_argument('--history_out', action="store_true", help='Output history file')
parser.add_argument('--user_email', help="User email")
parser.add_argument('--admin_email', help="Admin email")
+
+ parser.add_argument('--version', action='version', version=eutils.Client.getVersion(), help='Version (reports Biopython version)')
+
+ # Output
+ parser.add_argument('--retmode', help='Retmode')
+ parser.add_argument('--rettype', help='Rettype')
+ parser.add_argument('--retstart', type=int, default=0, help='Retstart - Starting rec number (0)')
+ parser.add_argument('--retmax', type=int, default=20, help='Retmax - max number of recs returned (20, max 100000)')
+
args = parser.parse_args()
c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email)
+ max_retmax = 100000
+ min_retmax = 1
+ max = max(min(args.retmax, max_retmax), min_retmax)
+
payload = {
'db': args.db,
'term': args.term,
- 'retstart': 0,
- 'retmax': 20,
- # hmmm @ retmax
}
if args.history_file is not None:
payload.update(c.get_history())
- if args.history_out is not None:
+
+ # if args.history_out is not None:
+ if args.history_out:
payload['usehistory'] = 'y'
- for attr in ('datetype', 'reldate', 'mindate', 'maxdate'):
+ payload['retmode'] = args.retmode
+
+ for attr in ('datetype', 'reldate', 'mindate', 'maxdate', 'rettype', 'retmax', 'retstart'):
if getattr(args, attr, None) is not None:
payload[attr] = getattr(args, attr)
+ logging.info("Payload used for query:" + json.dumps(payload, indent=4))
+
results = c.search(**payload)
- if args.history_out is not None:
- history = c.extract_history(results)
- args.history_out.write(json.dumps(history, indent=4))
-
- print(results)
+ # We're going to infer that rettype being uilist means convert to text format (which esearch does not do)
+ if args.retmode == 'text':
+ ids = c.xmlstring2UIlist(results)
+ for id in ids:
+ print(id)
+ elif args.retmode == 'json':
+ json_data = c.jsonstring2jsondata(results)
+ print(json.dumps(json_data, indent=4))
+ else:
+ print(results)
diff -r c6096cd97120 -r e267701c187b esearch.xml
--- a/esearch.xml Wed Mar 11 04:03:14 2020 -0400
+++ b/esearch.xml Wed Sep 23 09:48:26 2020 +0000
@@ -6,14 +6,21 @@
python esearch.py --version
-
+ $default]]>
+ #if $output_format == 'history_xml':
+ --history_out
+ --retmode xml
+ #elif $output_format == 'history_json':
+ --history_out
+ --retmode json
+ #elif $output_format == 'id_xml':
+ --retmode xml
+ #elif $output_format == 'id_json':
+ --retmode json
+ #elif $output_format == 'id_text':
+ --retmode text
+ #end if
+
+ @EMAIL_ARGUMENTS@
+
+ > $default
+
+ ]]>
+
@@ -45,11 +74,18 @@
+
-
+
+
+
+
+
+
+
@@ -67,32 +103,73 @@
+
+
-
+
-
+
+
+
+
+
-
- use_history
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
macros.xml
+
+#Note, this script uses einfo.py to get database info. It also uses manually compiled data stored at the bottom of this script that is based on: https://www.ncbi.nlm.nih.gov/books/NBK25499/table/chapter4.T._valid_values_of__retmode_and/?report=objectonly
+#The data in the table on that page was manipulated to replace nulls with 'none', remove duplicates, and add missing formats based on correspondence with MLN.
+
+##
+## use einfo to retrieve all the valid databases
+##
+
+print STDERR "Retrieving database list\n";
+
+my $dbxml = `python einfo.py --user_email "planemo@galaxyproject.org" --admin_email "planemo@galaxyproject.org;test@bx.psu.edu"`;
+
+my(@dblist);
+my $dbs = {};
+my $dbfroms = {};
+my $dbnames = {};
+foreach(split(/\n/,$dbxml))
+ {
+ if(/(.+)<\/DbName>/)
+ {
+ my $db = $1;
+ push(@dblist,$db);
+ $dbs->{$db} = 0;
+ $dbfroms->{$db} = 0;
+ $dbnames->{$db} = $_;
+ }
+ }
+
+##
+## Use einfo to retrieve all the valid links for each database (Note: some databases are not linked)
+##
+
+my $h = {};
+foreach my $db (sort {$dbnames->{$a} cmp $dbnames->{$b}} @dblist)
+ {
+ sleep(2);
+
+ print STDERR "Retrieving info for $db\n";
+
+ my $response = `python einfo.py --db $db --user_email "planemo\@galaxyproject.org" --admin_email "planemo\@galaxyproject.org;test\@bx.psu.edu"`;
+
+ my $dolinks = 0;
+ my $link = "";
+ my $name = "";
+
+ foreach(split(/\n/,$response))
+ {
+ if(//)
+ {
+ $dolinks = 1;
+ #Save whether there exist links from this database
+ $dbfroms->{$db} = 1;
+ }
+ elsif(!$dolinks)
+ {
+ if(/(.+)<\/MenuName>/)
+ {$dbnames->{$db} = "$1 ($db)"}
+ }
+ elsif($dolinks)
+ {
+ if(/(.+)<\/Name>/)
+ {$link=$1}
+ elsif(/