# HG changeset patch # User galaxyp # Date 1667574517 0 # Node ID 265c35540faa8119c4565d6db18aef35a0e73073 # Parent 12692567c7f9e4ac9bfbfe63eab6b882580929ae planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9 diff -r 12692567c7f9 -r 265c35540faa macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Nov 04 15:08:37 2022 +0000 @@ -0,0 +1,8 @@ + + + + + + + + diff -r 12692567c7f9 -r 265c35540faa uniprotxml_downloader.py --- a/uniprotxml_downloader.py Tue Jun 01 11:54:47 2021 +0000 +++ b/uniprotxml_downloader.py Fri Nov 04 15:08:37 2022 +0000 @@ -52,6 +52,7 @@ parser.add_option('-t', '--taxon', dest='taxon', action='append', default=[], help='NCBI taxon ID to download') parser.add_option('-r', '--reviewed', dest='reviewed', help='Only uniprot reviewed entries') parser.add_option('-f', '--format', dest='format', choices=['xml', 'fasta'], default='xml', help='output format') + parser.add_option('-k', '--field', dest='field', choices=['taxonomy_name', 'taxonomy_id'], default='taxonomy_name', help='query field') parser.add_option('-o', '--output', dest='output', help='file path for the downloaded uniprot xml') parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr') (options, args) = parser.parse_args() @@ -66,7 +67,7 @@ taxid = fields[options.column].strip() if taxid: taxids.add(taxid) - taxon_queries = ['taxonomy:"%s"' % taxid for taxid in taxids] + taxon_queries = [f'{options.field}:"{taxid}"' for taxid in taxids] taxon_query = ' OR '.join(taxon_queries) if options.output: dest_path = options.output @@ -74,20 +75,26 @@ dest_path = "uniprot_%s.xml" % '_'.join(taxids) reviewed = " reviewed:%s" % options.reviewed if options.reviewed else '' try: - url = 'https://www.uniprot.org/uniprot/' + url = 'https://rest.uniprot.org/uniprotkb/stream' query = "%s%s" % (taxon_query, reviewed) - params = {'query': query, 'force': 'yes', 'format': options.format} + params = {'query': query, 'format': options.format} if options.debug: print("%s ? %s" % (url, params), file=sys.stderr) data = parse.urlencode(params) - print(f"Retrieving: {url+data}") + print(f"Retrieving: {url}?{data}") adapter = TimeoutHTTPAdapter(max_retries=retry_strategy) + http = requests.Session() http.mount("https://", adapter) - response = http.post(url, data=params) + response = http.get(url, params=params) http.close() + + if response.status_code != 200: + exit(f"Request failed with status code {response.status_code}:\n{response.text}") + with open(dest_path, 'w') as fh: fh.write(response.text) + if options.format == 'xml': with open(dest_path, 'r') as contents: while True: diff -r 12692567c7f9 -r 265c35540faa uniprotxml_downloader.xml --- a/uniprotxml_downloader.xml Tue Jun 01 11:54:47 2021 +0000 +++ b/uniprotxml_downloader.xml Fri Nov 04 15:08:37 2022 +0000 @@ -1,5 +1,8 @@ - + download proteome as XML or fasta + + macros.xml + requests @@ -11,14 +14,17 @@ python '$__tool_directory__/uniprotxml_downloader.py' #if $taxid.input_choice == 'common': --taxon $taxid.organism + --field taxonomy_id #if $taxid.reviewed: --reviewed=$taxid.reviewed #end if #elif $taxid.input_choice == 'taxids': + --field $taxid.field #for $id in $taxid.taxons.split(','): -t '$id' #end for #elif $taxid.input_choice == 'history': + --field $taxid.field --input='${taxid.taxon_file}' --column=#echo int(str($taxid.column)) - 1# #end if @@ -58,10 +64,12 @@ help="Enter one or more Organsim IDs (separated by commas) from http://www.uniprot.org/proteomes/"> ^\w+( \w+)*(,\w+( \w+)*)*$ + + @@ -89,7 +97,20 @@ + + + + + + + + + + + + + @@ -102,6 +123,7 @@ + @@ -114,6 +136,7 @@ +