diff uniprotxml_downloader.py @ 5:265c35540faa draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
author galaxyp
date Fri, 04 Nov 2022 15:08:37 +0000
parents 12692567c7f9
children a371252a2cf6
line wrap: on
line diff
--- a/uniprotxml_downloader.py	Tue Jun 01 11:54:47 2021 +0000
+++ b/uniprotxml_downloader.py	Fri Nov 04 15:08:37 2022 +0000
@@ -52,6 +52,7 @@
     parser.add_option('-t', '--taxon', dest='taxon', action='append', default=[], help='NCBI taxon ID to download')
     parser.add_option('-r', '--reviewed', dest='reviewed', help='Only uniprot reviewed entries')
     parser.add_option('-f', '--format', dest='format', choices=['xml', 'fasta'], default='xml', help='output format')
+    parser.add_option('-k', '--field', dest='field', choices=['taxonomy_name', 'taxonomy_id'], default='taxonomy_name', help='query field')
     parser.add_option('-o', '--output', dest='output', help='file path for the downloaded uniprot xml')
     parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr')
     (options, args) = parser.parse_args()
@@ -66,7 +67,7 @@
                     taxid = fields[options.column].strip()
                     if taxid:
                         taxids.add(taxid)
-    taxon_queries = ['taxonomy:"%s"' % taxid for taxid in taxids]
+    taxon_queries = [f'{options.field}:"{taxid}"' for taxid in taxids]
     taxon_query = ' OR '.join(taxon_queries)
     if options.output:
         dest_path = options.output
@@ -74,20 +75,26 @@
         dest_path = "uniprot_%s.xml" % '_'.join(taxids)
     reviewed = " reviewed:%s" % options.reviewed if options.reviewed else ''
     try:
-        url = 'https://www.uniprot.org/uniprot/'
+        url = 'https://rest.uniprot.org/uniprotkb/stream'
         query = "%s%s" % (taxon_query, reviewed)
-        params = {'query': query, 'force': 'yes', 'format': options.format}
+        params = {'query': query, 'format': options.format}
         if options.debug:
             print("%s ? %s" % (url, params), file=sys.stderr)
         data = parse.urlencode(params)
-        print(f"Retrieving: {url+data}")
+        print(f"Retrieving: {url}?{data}")
         adapter = TimeoutHTTPAdapter(max_retries=retry_strategy)
+
         http = requests.Session()
         http.mount("https://", adapter)
-        response = http.post(url, data=params)
+        response = http.get(url, params=params)
         http.close()
+
+        if response.status_code != 200:
+            exit(f"Request failed with status code {response.status_code}:\n{response.text}")
+
         with open(dest_path, 'w') as fh:
             fh.write(response.text)
+
         if options.format == 'xml':
             with open(dest_path, 'r') as contents:
                 while True: