Mercurial > repos > galaxyp > uniprotxml_downloader
annotate uniprotxml_downloader.py @ 4:12692567c7f9 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
author | galaxyp |
---|---|
date | Tue, 01 Jun 2021 11:54:47 +0000 |
parents | 1a5690a5eedc |
children | 265c35540faa |
rev | line source |
---|---|
0
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
1 #!/usr/bin/env python |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
2 """ |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
3 # |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
4 #------------------------------------------------------------------------------ |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
5 # University of Minnesota |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
6 # Copyright 2016, Regents of the University of Minnesota |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
7 #------------------------------------------------------------------------------ |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
8 # Author: |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
9 # |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
10 # James E Johnson |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
11 # |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
12 #------------------------------------------------------------------------------ |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
13 """ |
4
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
14 import optparse |
0
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
15 import re |
4
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
16 import sys |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
17 from urllib import parse |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
18 |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
19 import requests |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
20 from requests.adapters import HTTPAdapter |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
21 from requests.packages.urllib3.util.retry import Retry |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
22 |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
23 DEFAULT_TIMEOUT = 5 # seconds |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
24 retry_strategy = Retry( |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
25 total=5, |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
26 backoff_factor=2, |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
27 status_forcelist=[429, 500, 502, 503, 504], |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
28 allowed_methods=["HEAD", "GET", "OPTIONS", "POST"] |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
29 ) |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
30 |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
31 |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
32 class TimeoutHTTPAdapter(HTTPAdapter): |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
33 def __init__(self, *args, **kwargs): |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
34 self.timeout = DEFAULT_TIMEOUT |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
35 if "timeout" in kwargs: |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
36 self.timeout = kwargs["timeout"] |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
37 del kwargs["timeout"] |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
38 super().__init__(*args, **kwargs) |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
39 |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
40 def send(self, request, **kwargs): |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
41 timeout = kwargs.get("timeout") |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
42 if timeout is None: |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
43 kwargs["timeout"] = self.timeout |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
44 return super().send(request, **kwargs) |
0
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
45 |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
46 |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
47 def __main__(): |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
48 # Parse Command Line |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
49 parser = optparse.OptionParser() |
2
e1abc9a35c64
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
50 parser.add_option('-i', '--input', dest='input', default=None, help='Tabular file containing a column of NCBI Taxon IDs') |
4
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
51 parser.add_option('-c', '--column', dest='column', type='int', default=0, help='The column (zero-based) in the tabular file that contains Taxon IDs') |
0
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
52 parser.add_option('-t', '--taxon', dest='taxon', action='append', default=[], help='NCBI taxon ID to download') |
2
e1abc9a35c64
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
53 parser.add_option('-r', '--reviewed', dest='reviewed', help='Only uniprot reviewed entries') |
3
1a5690a5eedc
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 6aac77a68426533c8c18c9f6aabd2df56a82de24
galaxyp
parents:
2
diff
changeset
|
54 parser.add_option('-f', '--format', dest='format', choices=['xml', 'fasta'], default='xml', help='output format') |
1a5690a5eedc
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 6aac77a68426533c8c18c9f6aabd2df56a82de24
galaxyp
parents:
2
diff
changeset
|
55 parser.add_option('-o', '--output', dest='output', help='file path for the downloaded uniprot xml') |
0
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
56 parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr') |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
57 (options, args) = parser.parse_args() |
2
e1abc9a35c64
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
58 taxids = set(options.taxon) |
e1abc9a35c64
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
59 if options.input: |
4
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
60 with open(options.input, 'r') as inputFile: |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
61 for linenum, line in enumerate(inputFile): |
2
e1abc9a35c64
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
62 if line.startswith('#'): |
e1abc9a35c64
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
63 continue |
e1abc9a35c64
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
64 fields = line.rstrip('\r\n').split('\t') |
e1abc9a35c64
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
65 if len(fields) > abs(options.column): |
e1abc9a35c64
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
66 taxid = fields[options.column].strip() |
e1abc9a35c64
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
67 if taxid: |
4
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
68 taxids.add(taxid) |
0
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
69 taxon_queries = ['taxonomy:"%s"' % taxid for taxid in taxids] |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
70 taxon_query = ' OR '.join(taxon_queries) |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
71 if options.output: |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
72 dest_path = options.output |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
73 else: |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
74 dest_path = "uniprot_%s.xml" % '_'.join(taxids) |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
75 reviewed = " reviewed:%s" % options.reviewed if options.reviewed else '' |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
76 try: |
3
1a5690a5eedc
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 6aac77a68426533c8c18c9f6aabd2df56a82de24
galaxyp
parents:
2
diff
changeset
|
77 url = 'https://www.uniprot.org/uniprot/' |
2
e1abc9a35c64
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
78 query = "%s%s" % (taxon_query, reviewed) |
4
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
79 params = {'query': query, 'force': 'yes', 'format': options.format} |
2
e1abc9a35c64
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
80 if options.debug: |
4
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
81 print("%s ? %s" % (url, params), file=sys.stderr) |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
82 data = parse.urlencode(params) |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
83 print(f"Retrieving: {url+data}") |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
84 adapter = TimeoutHTTPAdapter(max_retries=retry_strategy) |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
85 http = requests.Session() |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
86 http.mount("https://", adapter) |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
87 response = http.post(url, data=params) |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
88 http.close() |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
89 with open(dest_path, 'w') as fh: |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
90 fh.write(response.text) |
2
e1abc9a35c64
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
91 if options.format == 'xml': |
0
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
92 with open(dest_path, 'r') as contents: |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
93 while True: |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
94 line = contents.readline() |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
95 if options.debug: |
4
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
96 print(line, file=sys.stderr) |
2
e1abc9a35c64
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
97 if line is None: |
0
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
98 break |
2
e1abc9a35c64
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
99 if line.startswith('<?'): |
e1abc9a35c64
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
100 continue |
0
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
101 # pattern match <root or <ns:root for any ns string |
4
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
102 pattern = r'^<(\w*:)?uniprot' |
0
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
103 if re.match(pattern, line): |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
104 break |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
105 else: |
4
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
106 print("failed: Not a uniprot xml file", file=sys.stderr) |
0
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
107 exit(1) |
4
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
108 print("NCBI Taxon ID:%s" % taxids, file=sys.stdout) |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
109 if 'X-UniProt-Release' in response.headers: |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
110 print("UniProt-Release:%s" % response.headers['X-UniProt-Release'], file=sys.stdout) |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
111 if 'X-Total-Results' in response.headers: |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
112 print("Entries:%s" % response.headers['X-Total-Results'], file=sys.stdout) |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
113 except Exception as e: |
12692567c7f9
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
3
diff
changeset
|
114 exit("%s" % e) |
0
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
115 |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
116 |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
117 if __name__ == "__main__": |
0bd2688166a5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff
changeset
|
118 __main__() |