Mercurial > repos > bgruening > get_online_data
comparison get_online_data.py @ 0:2538366eb8fb draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
author | bgruening |
---|---|
date | Wed, 22 May 2019 07:43:41 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2538366eb8fb |
---|---|
1 import os | |
2 import urllib.request | |
3 import gzip, tempfile | |
4 import zipfile | |
5 import subprocess | |
6 import shutil | |
7 import argparse | |
8 from io import BytesIO | |
9 | |
10 def unescape(cond_text): | |
11 # Unescape if input has been escaped | |
12 mapped_chars = { '>' :'__gt__', | |
13 '<' :'__lt__', | |
14 "'" :'__sq__', | |
15 '"' :'__dq__', | |
16 '[' :'__ob__', | |
17 ']' :'__cb__', | |
18 '{' :'__oc__', | |
19 '}' :'__cc__', | |
20 '@' : '__at__', | |
21 '\n' : '__cn__', | |
22 '\r' : '__cr__', | |
23 '\t' : '__tc__' | |
24 } | |
25 for key, value in mapped_chars.items(): | |
26 cond_text = cond_text.replace( value, key ) | |
27 return cond_text | |
28 | |
29 def get_files(options): | |
30 urls = unescape(options.url) | |
31 with open(options.out, 'wb+') as out: | |
32 if options.whitelist: | |
33 allowed_extensions = [ext.strip() for ext in unescape(options.whitelist).split('\n')] | |
34 else: | |
35 allowed_extensions = ['.sdf', '.smi', '.inchi', '.mol'] | |
36 | |
37 for url in urls.split('\n'): | |
38 request = urllib.request.Request(url) | |
39 response = urllib.request.urlopen(request) | |
40 resp_read = response.read() | |
41 if resp_read[:2] == b'\x1f\x8b': # test magic number for gzipped files | |
42 response = urllib.request.urlopen(request) | |
43 out.write(gzip.decompress(resp_read)) | |
44 elif resp_read[:2] == b'PK': # test magic number for zipped files | |
45 temp = tempfile.NamedTemporaryFile(delete=False) | |
46 temp.close() | |
47 zf = zipfile.ZipFile(BytesIO(resp_read), allowZip64=True) | |
48 tmpdir = tempfile.mkdtemp() | |
49 | |
50 for filename in zf.namelist(): | |
51 zf.extractall(tmpdir) | |
52 | |
53 os.remove(temp.name) | |
54 molfiles = [] | |
55 for root, dirs, files in os.walk(tmpdir): | |
56 for filename in files: | |
57 if os.path.splitext(filename)[-1].lower() in allowed_extensions or allowed_extensions == []: | |
58 mfile = os.path.join(root, filename) | |
59 shutil.copyfileobj(open(mfile, 'rb'), out) | |
60 shutil.rmtree( tmpdir ) | |
61 zf.close() | |
62 else: | |
63 out.write(resp_read) | |
64 | |
65 if __name__ == "__main__": | |
66 parser = argparse.ArgumentParser(description="""Download compressed files and extract files of with chosen extensions | |
67 """) | |
68 parser.add_argument('--url', dest='url', help='URL') | |
69 parser.add_argument('--whitelist', dest='whitelist', default=None, help='whitelist') | |
70 parser.add_argument('--out', dest='out', help='output') | |
71 | |
72 options = parser.parse_args() | |
73 get_files(options) |