comparison get_online_data.py @ 0:2538366eb8fb draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
author bgruening
date Wed, 22 May 2019 07:43:41 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2538366eb8fb
1 import os
2 import urllib.request
3 import gzip, tempfile
4 import zipfile
5 import subprocess
6 import shutil
7 import argparse
8 from io import BytesIO
9
10 def unescape(cond_text):
11 # Unescape if input has been escaped
12 mapped_chars = { '>' :'__gt__',
13 '<' :'__lt__',
14 "'" :'__sq__',
15 '"' :'__dq__',
16 '[' :'__ob__',
17 ']' :'__cb__',
18 '{' :'__oc__',
19 '}' :'__cc__',
20 '@' : '__at__',
21 '\n' : '__cn__',
22 '\r' : '__cr__',
23 '\t' : '__tc__'
24 }
25 for key, value in mapped_chars.items():
26 cond_text = cond_text.replace( value, key )
27 return cond_text
28
29 def get_files(options):
30 urls = unescape(options.url)
31 with open(options.out, 'wb+') as out:
32 if options.whitelist:
33 allowed_extensions = [ext.strip() for ext in unescape(options.whitelist).split('\n')]
34 else:
35 allowed_extensions = ['.sdf', '.smi', '.inchi', '.mol']
36
37 for url in urls.split('\n'):
38 request = urllib.request.Request(url)
39 response = urllib.request.urlopen(request)
40 resp_read = response.read()
41 if resp_read[:2] == b'\x1f\x8b': # test magic number for gzipped files
42 response = urllib.request.urlopen(request)
43 out.write(gzip.decompress(resp_read))
44 elif resp_read[:2] == b'PK': # test magic number for zipped files
45 temp = tempfile.NamedTemporaryFile(delete=False)
46 temp.close()
47 zf = zipfile.ZipFile(BytesIO(resp_read), allowZip64=True)
48 tmpdir = tempfile.mkdtemp()
49
50 for filename in zf.namelist():
51 zf.extractall(tmpdir)
52
53 os.remove(temp.name)
54 molfiles = []
55 for root, dirs, files in os.walk(tmpdir):
56 for filename in files:
57 if os.path.splitext(filename)[-1].lower() in allowed_extensions or allowed_extensions == []:
58 mfile = os.path.join(root, filename)
59 shutil.copyfileobj(open(mfile, 'rb'), out)
60 shutil.rmtree( tmpdir )
61 zf.close()
62 else:
63 out.write(resp_read)
64
65 if __name__ == "__main__":
66 parser = argparse.ArgumentParser(description="""Download compressed files and extract files of with chosen extensions
67 """)
68 parser.add_argument('--url', dest='url', help='URL')
69 parser.add_argument('--whitelist', dest='whitelist', default=None, help='whitelist')
70 parser.add_argument('--out', dest='out', help='output')
71
72 options = parser.parse_args()
73 get_files(options)