0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 __author__ = 'Bjoern Gruening'
|
|
4 __version__ = '0.1'
|
|
5 __date__ = '2012'
|
|
6 __license__ = 'GLP3+'
|
|
7
|
|
8 import os, sys
|
|
9 import urllib2
|
|
10 import gzip, tempfile
|
|
11 import zipfile
|
|
12 import subprocess
|
|
13 import shutil
|
|
14
|
|
15 def unescape(cond_text):
|
|
16 # Unescape if input has been escaped
|
|
17 mapped_chars = { '>' :'__gt__',
|
|
18 '<' :'__lt__',
|
|
19 "'" :'__sq__',
|
|
20 '"' :'__dq__',
|
|
21 '[' :'__ob__',
|
|
22 ']' :'__cb__',
|
|
23 '{' :'__oc__',
|
|
24 '}' :'__cc__',
|
|
25 '@' : '__at__',
|
|
26 '\n' : '__cn__',
|
|
27 '\r' : '__cr__',
|
|
28 '\t' : '__tc__'
|
|
29 }
|
|
30 for key, value in mapped_chars.items():
|
|
31 cond_text = cond_text.replace( value, key )
|
|
32 return cond_text
|
|
33
|
|
34 urls = unescape(sys.argv[1])
|
|
35 out = open(sys.argv[2], 'wb')
|
|
36
|
|
37 if len(sys.argv) > 3:
|
|
38 allowed_extensions = [ ext.strip() for ext in unescape(sys.argv[3]).split('\n') ]
|
|
39 else:
|
5
|
40 allowed_extensions = ['.sdf', '.smi', '.inchi', '.mol']
|
0
|
41
|
|
42 for url in urls.split('\n'):
|
|
43 url = url.strip()
|
|
44 request = urllib2.Request( url )
|
|
45 request.add_header('Accept-encoding', 'gzip')
|
|
46 request.add_header('Accept-encoding', 'gz')
|
|
47 response = urllib2.urlopen( request )
|
|
48
|
|
49 if response.info().get('Content-Encoding') in ['gz','gzip'] or os.path.splitext(url)[-1] in ['.gz','.gzip']:
|
|
50 temp = tempfile.NamedTemporaryFile( delete=False )
|
|
51 temp.write( response.read() )
|
|
52 temp.close()
|
|
53 zipfile = gzip.open(temp.name, 'rb')
|
|
54 out.write( zipfile.read() )
|
|
55 os.remove(temp.name)
|
|
56 elif response.info().get('Content-Encoding') in ['zip'] or os.path.splitext(url)[-1] in ['.zip']:
|
|
57 temp = tempfile.NamedTemporaryFile(delete=False)
|
|
58 temp.close()
|
|
59 with open(temp.name, 'wb') as fp:
|
|
60 shutil.copyfileobj(response, fp)
|
|
61
|
|
62 zf = zipfile.ZipFile(temp.name, allowZip64=True)
|
|
63 tmpdir = tempfile.mkdtemp( )
|
|
64
|
|
65 for filename in zf.namelist():
|
|
66 zf.extractall( tmpdir )
|
|
67
|
|
68 os.remove( temp.name )
|
|
69 molfiles = []
|
|
70 for root, dirs, files in os.walk(tmpdir):
|
|
71 for filename in files:
|
|
72 if os.path.splitext(filename)[-1].lower() in allowed_extensions or allowed_extensions == []:
|
|
73 mfile = os.path.join( root, filename)
|
|
74 molfiles.append( mfile )
|
|
75
|
|
76 for filename in molfiles:
|
|
77 shutil.copyfileobj(open(filename, 'rb'), out)
|
|
78 shutil.rmtree( tmpdir )
|
|
79 zf.close()
|
|
80 elif response.info().get('Content-Encoding') == 'rar' or os.path.splitext(url)[-1] in ['.rar']:
|
|
81 temp = tempfile.NamedTemporaryFile(delete=False)
|
|
82 temp.close()
|
|
83 with open(temp.name, 'wb') as fp:
|
|
84 shutil.copyfileobj(response, fp)
|
|
85 cmd = subprocess.Popen('unrar p -inul %s' % temp.name, stdout=out, shell=True)
|
|
86 os.remove( temp.name )
|
|
87 else:
|
|
88 out.write( response.read() )
|
|
89 out.close()
|