# HG changeset patch
# User galaxyp
# Date 1667574517 0
# Node ID 265c35540faa8119c4565d6db18aef35a0e73073
# Parent 12692567c7f9e4ac9bfbfe63eab6b882580929ae
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
diff -r 12692567c7f9 -r 265c35540faa macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Fri Nov 04 15:08:37 2022 +0000
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
diff -r 12692567c7f9 -r 265c35540faa uniprotxml_downloader.py
--- a/uniprotxml_downloader.py Tue Jun 01 11:54:47 2021 +0000
+++ b/uniprotxml_downloader.py Fri Nov 04 15:08:37 2022 +0000
@@ -52,6 +52,7 @@
parser.add_option('-t', '--taxon', dest='taxon', action='append', default=[], help='NCBI taxon ID to download')
parser.add_option('-r', '--reviewed', dest='reviewed', help='Only uniprot reviewed entries')
parser.add_option('-f', '--format', dest='format', choices=['xml', 'fasta'], default='xml', help='output format')
+ parser.add_option('-k', '--field', dest='field', choices=['taxonomy_name', 'taxonomy_id'], default='taxonomy_name', help='query field')
parser.add_option('-o', '--output', dest='output', help='file path for the downloaded uniprot xml')
parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr')
(options, args) = parser.parse_args()
@@ -66,7 +67,7 @@
taxid = fields[options.column].strip()
if taxid:
taxids.add(taxid)
- taxon_queries = ['taxonomy:"%s"' % taxid for taxid in taxids]
+ taxon_queries = [f'{options.field}:"{taxid}"' for taxid in taxids]
taxon_query = ' OR '.join(taxon_queries)
if options.output:
dest_path = options.output
@@ -74,20 +75,26 @@
dest_path = "uniprot_%s.xml" % '_'.join(taxids)
reviewed = " reviewed:%s" % options.reviewed if options.reviewed else ''
try:
- url = 'https://www.uniprot.org/uniprot/'
+ url = 'https://rest.uniprot.org/uniprotkb/stream'
query = "%s%s" % (taxon_query, reviewed)
- params = {'query': query, 'force': 'yes', 'format': options.format}
+ params = {'query': query, 'format': options.format}
if options.debug:
print("%s ? %s" % (url, params), file=sys.stderr)
data = parse.urlencode(params)
- print(f"Retrieving: {url+data}")
+ print(f"Retrieving: {url}?{data}")
adapter = TimeoutHTTPAdapter(max_retries=retry_strategy)
+
http = requests.Session()
http.mount("https://", adapter)
- response = http.post(url, data=params)
+ response = http.get(url, params=params)
http.close()
+
+ if response.status_code != 200:
+ exit(f"Request failed with status code {response.status_code}:\n{response.text}")
+
with open(dest_path, 'w') as fh:
fh.write(response.text)
+
if options.format == 'xml':
with open(dest_path, 'r') as contents:
while True:
diff -r 12692567c7f9 -r 265c35540faa uniprotxml_downloader.xml
--- a/uniprotxml_downloader.xml Tue Jun 01 11:54:47 2021 +0000
+++ b/uniprotxml_downloader.xml Fri Nov 04 15:08:37 2022 +0000
@@ -1,5 +1,8 @@
-
+
download proteome as XML or fasta
+
+ macros.xml
+
requests
@@ -11,14 +14,17 @@
python '$__tool_directory__/uniprotxml_downloader.py'
#if $taxid.input_choice == 'common':
--taxon $taxid.organism
+ --field taxonomy_id
#if $taxid.reviewed:
--reviewed=$taxid.reviewed
#end if
#elif $taxid.input_choice == 'taxids':
+ --field $taxid.field
#for $id in $taxid.taxons.split(','):
-t '$id'
#end for
#elif $taxid.input_choice == 'history':
+ --field $taxid.field
--input='${taxid.taxon_file}'
--column=#echo int(str($taxid.column)) - 1#
#end if
@@ -58,10 +64,12 @@
help="Enter one or more Organsim IDs (separated by commas) from http://www.uniprot.org/proteomes/">
^\w+( \w+)*(,\w+( \w+)*)*$
+
+
@@ -89,7 +97,20 @@
+
+
+
+
+
+
+
+