Mercurial > repos > bgruening > chemical_data_sources
comparison get_pubchem/get_pubchem_as_smiles.py @ 0:f653fd06f055 draft
Uploaded
author | bgruening |
---|---|
date | Thu, 15 Aug 2013 03:23:17 -0400 |
parents | |
children | b65518a007fa |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f653fd06f055 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 __author__ = 'Bjoern Gruening' | |
4 __version__ = '0.1' | |
5 __date__ = '2012' | |
6 __license__ = 'GLP3+' | |
7 | |
8 import ftplib | |
9 import os, sys | |
10 import argparse | |
11 import subprocess | |
12 from multiprocessing import Pool | |
13 import tempfile | |
14 import shutil | |
15 | |
16 def main(output, processors = 10): | |
17 output_handle = open(output,'w+') | |
18 | |
19 td = tempfile.mkdtemp() | |
20 ftp = ftplib.FTP('ftp.ncbi.nih.gov') | |
21 ftp.login() | |
22 ftp.cwd('/pubchem/Compound/CURRENT-Full/SDF/') | |
23 filelist = ftp.nlst() | |
24 | |
25 pool = Pool(processes = processors) | |
26 filenames = zip(filelist, [td]*len(filelist)) | |
27 | |
28 result = pool.map_async(fetch_convert, filenames) | |
29 result.get() | |
30 | |
31 for filename in os.listdir(td): | |
32 path = os.path.join(td, filename) | |
33 shutil.copyfileobj(open(path, 'rb'), output_handle) | |
34 | |
35 output_handle.close() | |
36 shutil.rmtree( td ) | |
37 | |
38 def fetch_convert(args): | |
39 (filename, td) = args | |
40 | |
41 tmp_name = os.path.join( tempfile.gettempdir(), filename) | |
42 subprocess.call( ['wget', '-O', tmp_name, os.path.join('ftp://ftp.ncbi.nih.gov/pubchem/Compound/CURRENT-Full/SDF/', filename)] ) | |
43 output = os.path.join(td, filename) | |
44 subprocess.call(["obabel", "-isdf", tmp_name, "-ocan", '-O', output]) | |
45 os.remove(tmp_name) | |
46 | |
47 | |
48 if __name__ == '__main__': | |
49 parser = argparse.ArgumentParser(description='Download the whole PubChem and converts it to canonical SMILES on the fly.') | |
50 parser.add_argument("-o", "--output", dest="output", | |
51 required=True, | |
52 help="Path to the output file.") | |
53 parser.add_argument("-p", "--processors", dest="processors", | |
54 type=int, default=10, | |
55 help="How many processors you want to use.") | |
56 | |
57 options = parser.parse_args() | |
58 main( options.output, options.processors ) | |
59 |