# HG changeset patch
# User bgruening
# Date 1603118541 0
# Node ID 2cd8aee0d830377db1fe55e3aa85d0760a54a57e
# Parent 3ecaa96341266a1801e5bb085e0d45515ab83b71
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 1fe240ef0064a1a4a66d9be1ccace53824280b75"
diff -r 3ecaa9634126 -r 2cd8aee0d830 change_title_to_metadata_value.py
--- a/change_title_to_metadata_value.py Tue Jul 28 08:39:22 2020 -0400
+++ b/change_title_to_metadata_value.py Mon Oct 19 14:42:21 2020 +0000
@@ -6,29 +6,27 @@
value of a given-id of the same molecule file.
"""
-import os
-import sys
import argparse
import random
import string
-
from openbabel import openbabel, pybel
openbabel.obErrorLog.StopLogging()
+
def main():
parser = argparse.ArgumentParser(
description="Change the title from a molecule file to metadata \
-value of a given-id of the same molecule file.",
+ value of a given-id of the same molecule file.",
)
- parser.add_argument('--infile', '-i',
- required=True, help="path to the input file")
- parser.add_argument('--outfile', '-o',
- required=True, help="path to the output file")
- parser.add_argument('--key', '-k',
- required=True, help="the metadata key from the sdf file which should inlcude the new title")
- parser.add_argument('--random', '-r',
- action="store_true", help="Add random suffix to the title.")
+ parser.add_argument('--infile', '-i', required=True,
+ help="path to the input file")
+ parser.add_argument('--outfile', '-o', required=True,
+ help="path to the output file")
+ parser.add_argument('--key', '-k', required=True,
+ help="the metadata key from the sdf file which should inlcude the new title")
+ parser.add_argument('--random', '-r', action="store_true",
+ help="Add random suffix to the title.")
args = parser.parse_args()
@@ -39,11 +37,10 @@
if args.random:
suffix = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(13))
mol.title += '__%s' % suffix
- output.write( mol )
+ output.write(mol)
output.close()
if __name__ == "__main__":
main()
-
diff -r 3ecaa9634126 -r 2cd8aee0d830 cheminfolib.py
--- a/cheminfolib.py Tue Jul 28 08:39:22 2020 -0400
+++ b/cheminfolib.py Mon Oct 19 14:42:21 2020 +0000
@@ -4,31 +4,37 @@
Copyright 2012, Bjoern Gruening and Xavier Lucas
"""
-import os, sys
+import glob
+import re
+import subprocess
+import sys
+import tempfile
+from multiprocessing import Pool
+
try:
from galaxy import eggs
eggs.require('psycopg2')
-except:
+except ImportError:
+ psycopg2 = None
print('psycopg2 is not available. It is currently used in the pgchem wrappers, that are not shipped with default CTB')
try:
from openbabel import openbabel, pybel
openbabel.obErrorLog.StopLogging()
-except:
+except ImportError:
+ openbabel, pybel = None, None
print('OpenBabel could not be found. A few functions are not available without OpenBabel.')
-from multiprocessing import Pool
-import glob, tempfile, re
-import subprocess
-def CountLines( path ):
+def CountLines(path):
out = subprocess.Popen(['wc', '-l', path],
- stdout=subprocess.PIPE,
- stderr=subprocess.STDOUT
- ).communicate()[0]
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT
+ ).communicate()[0]
return int(out.partition(b' ')[0])
+
def grep(pattern, file_obj):
grepper = re.compile(pattern)
for line in file_obj:
@@ -36,6 +42,7 @@
return True
return False
+
def check_filetype(filepath):
mol = False
possible_inchi = True
@@ -50,76 +57,78 @@
return 'drf'
elif possible_inchi and re.findall('^InChI=', line):
return 'inchi'
- elif re.findall('^M\s+END', line):
+ elif re.findall(r'^M\s+END', line):
mol = True
# first line is not an InChI, so it can't be an InChI file
possible_inchi = False
if mol:
- # END can occures before $$$$, so and SDF file will
+ # END can occures before $$$$, so and SDF file will
# be recognised as mol, if you not using this hack'
return 'mol'
return 'smi'
+
def db_connect(args):
try:
- db_conn = psycopg2.connect("dbname=%s user=%s host=%s password=%s" % (args.dbname, args.dbuser, args.dbhost, args.dbpasswd));
+ db_conn = psycopg2.connect("dbname=%s user=%s host=%s password=%s" % (args.dbname, args.dbuser, args.dbhost, args.dbpasswd))
return db_conn
- except:
+ except psycopg2.Error:
sys.exit('Unable to connect to the db')
+
ColumnNames = {
- 'can_smiles' : 'Canonical SMILES',
- 'can' : 'Canonical SMILES',
- 'inchi' : 'InChI',
- 'inchi_key' : 'InChI key',
- 'inchi_key_first' : 'InChI key first',
- 'inchi_key_last' : 'InChI key last',
- 'molwt' : 'Molecular weight',
- 'hbd' : 'Hydrogen-bond donors',
- 'donors' : 'Hydrogen-bond donors',
- 'hba' : 'Hydrogen-bond acceptors',
- 'acceptors' : 'Hydrogen-bond acceptors',
- 'rotbonds' : 'Rotatable bonds',
- 'logp' : 'logP',
- 'psa' : 'Polar surface area',
- 'mr' : 'Molecular refractivity',
- 'atoms' : 'Number of heavy atoms',
- 'rings' : 'Number of rings',
- 'set_bits' : 'FP2 bits',
- 'id' : 'Internal identifier',
- 'tani' : 'Tanimoto coefficient',
- 'spectrophore' : 'Spectrophores(TM)',
- 'dist_spectrophore' : 'Spectrophores(TM) distance to target',
- 'synonym' : 'Entry id',
+ 'can_smiles': 'Canonical SMILES',
+ 'can': 'Canonical SMILES',
+ 'inchi': 'InChI',
+ 'inchi_key': 'InChI key',
+ 'inchi_key_first': 'InChI key first',
+ 'inchi_key_last': 'InChI key last',
+ 'molwt': 'Molecular weight',
+ 'hbd': 'Hydrogen-bond donors',
+ 'donors': 'Hydrogen-bond donors',
+ 'hba': 'Hydrogen-bond acceptors',
+ 'acceptors': 'Hydrogen-bond acceptors',
+ 'rotbonds': 'Rotatable bonds',
+ 'logp': 'logP',
+ 'psa': 'Polar surface area',
+ 'mr': 'Molecular refractivity',
+ 'atoms': 'Number of heavy atoms',
+ 'rings': 'Number of rings',
+ 'set_bits': 'FP2 bits',
+ 'id': 'Internal identifier',
+ 'tani': 'Tanimoto coefficient',
+ 'spectrophore': 'Spectrophores(TM)',
+ 'dist_spectrophore': 'Spectrophores(TM) distance to target',
+ 'synonym': 'Entry id',
}
OBDescriptor = {
- 'atoms': ["atoms","Number of atoms"],
- 'hatoms': ["hatoms","Number of heavy atoms"], # self defined tag hatoms in plugindefines.txt
- 'can_smiles' : ["cansmi","Canonical SMILES"],
- 'can_smilesNS' : ["cansmiNS","Canonical SMILES without isotopes or stereo"],
- #["abonds","Number of aromatic bonds"],
- #["bonds","Number of bonds"],
- #["dbonds","Number of double bonds"],
- #["formula","Chemical formula"],
- 'hba': ["HBA1","Number of Hydrogen Bond Acceptors 1 (JoelLib)"],
- 'hba2': ["HBA2","Number of Hydrogen Bond Acceptors 2 (JoelLib)"],
- 'hbd': ["HBD","Number of Hydrogen Bond Donors (JoelLib)"],
- 'inchi': ["InChI","IUPAC InChI identifier"],
- 'inchi_key': ["InChIKey","InChIKey"],
- #["L5","Lipinski Rule of Five"],
- 'logp': ["logP","octanol/water partition coefficient"],
- 'mr': ["MR","molar refractivity"],
- 'molwt': ["MW","Molecular Weight filter"],
- #["nF","Number of Fluorine Atoms"],
- #["s","SMARTS filter"],
- #["sbonds","Number of single bonds"],
- #["smarts","SMARTS filter"],
- #["tbonds","Number of triple bonds"],
- #["title","For comparing a molecule's title"],
- 'psa': ["TPSA","topological polar surface area"],
- 'rotbonds' : ['ROTATABLE_BOND', 'rotatable bonds'],
+ 'atoms': ["atoms", "Number of atoms"],
+ 'hatoms': ["hatoms", "Number of heavy atoms"], # self defined tag hatoms in plugindefines.txt
+ 'can_smiles': ["cansmi", "Canonical SMILES"],
+ 'can_smilesNS': ["cansmiNS", "Canonical SMILES without isotopes or stereo"],
+ # ["abonds", "Number of aromatic bonds"],
+ # ["bonds", "Number of bonds"],
+ # ["dbonds", "Number of double bonds"],
+ # ["formula", "Chemical formula"],
+ 'hba': ["HBA1", "Number of Hydrogen Bond Acceptors 1 (JoelLib)"],
+ 'hba2': ["HBA2", "Number of Hydrogen Bond Acceptors 2 (JoelLib)"],
+ 'hbd': ["HBD", "Number of Hydrogen Bond Donors (JoelLib)"],
+ 'inchi': ["InChI", "IUPAC InChI identifier"],
+ 'inchi_key': ["InChIKey", "InChIKey"],
+ # ["L5", "Lipinski Rule of Five"],
+ 'logp': ["logP", "octanol/water partition coefficient"],
+ 'mr': ["MR", "molar refractivity"],
+ 'molwt': ["MW", "Molecular Weight filter"],
+ # ["nF", "Number of Fluorine Atoms"],
+ # ["s", "SMARTS filter"],
+ # ["sbonds", "Number of single bonds"],
+ # ["smarts", "SMARTS filter"],
+ # ["tbonds", "Number of triple bonds"],
+ # ["title", "For comparing a molecule's title"],
+ 'psa': ["TPSA", "topological polar surface area"],
+ 'rotbonds': ['ROTATABLE_BOND', 'rotatable bonds'],
}
@@ -128,9 +137,9 @@
outfile = open(args.output, 'w')
requested_fields = (filter(lambda x: x not in ["[", "]", "'"], args.fetch)).split(', ')
if args.header:
- outfile.write( 'Identifier\t' + '\t'.join( [ColumnNames[key] for key in requested_fields] ) + '\n' )
+ outfile.write('Identifier\t' + '\t'.join([ColumnNames[key] for key in requested_fields]) + '\n')
for row in rows:
- outfile.write( row['synonym'] + '\t' + '\t'.join( [str(row[key]) for key in requested_fields] ) + '\n' )
+ outfile.write(row['synonym'] + '\t' + '\t'.join([str(row[key]) for key in requested_fields]) + '\n')
elif args.oformat in ['sdf', 'mol2']:
outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True)
@@ -139,103 +148,102 @@
mol = pybel.readstring('sdf', row['mol'])
if args.oformat == 'sdf':
keys = filter(lambda x: x not in ["[", "]", "'"], args.fetch).split(', ')
- mol.data.update( { ColumnNames['synonym'] : row['synonym'] } )
+ mol.data.update({ColumnNames['synonym']: row['synonym']})
if 'inchi_key' in keys:
- keys = (', '.join(keys).replace( "inchi_key", "inchi_key_first, inchi_key_last" )).split(', ')
- [ mol.data.update( { ColumnNames[key] : row[key] } ) for key in keys if key]
+ keys = (', '.join(keys).replace("inchi_key", "inchi_key_first, inchi_key_last")).split(', ')
+ [mol.data.update({ColumnNames[key]: row[key]}) for key in keys if key]
outfile.write(mol)
- except:
+ except OSError:
pass
else:
outfile = open(args.output, 'w')
- outfile.write( '\n'.join( [ '%s\t%s' % (row[args.oformat], row['synonym'] ) for row in rows ] ) )
+ outfile.write('\n'.join(['%s\t%s' % (row[args.oformat], row['synonym']) for row in rows]))
outfile.close()
+
def pybel_stop_logging():
openbabel.obErrorLog.StopLogging()
+
def get_properties_ext(mol):
-
HBD = pybel.Smarts("[!#6;!H0]")
- HBA = pybel.Smarts("[$([$([#8,#16]);!$(*=N~O);" +
- "!$(*~N=O);X1,X2]),$([#7;v3;" +
- "!$([nH]);!$(*(-a)-a)])]"
- )
+ HBA = pybel.Smarts(("[$([$([#8,#16]);!$(*=N~O);"
+ "!$(*~N=O);X1,X2]),$([#7;v3;"
+ "!$([nH]);!$(*(-a)-a)])]"
+ ))
calc_desc_dict = mol.calcdesc()
try:
logp = calc_desc_dict['logP']
- except:
+ except KeyError:
logp = calc_desc_dict['LogP']
return {"molwt": mol.molwt,
"logp": logp,
"donors": len(HBD.findall(mol)),
- "acceptors": len(HBA.findall(mol)),
+ "acceptors": len(HBA.findall(mol)),
"psa": calc_desc_dict['TPSA'],
"mr": calc_desc_dict['MR'],
"rotbonds": mol.OBMol.NumRotors(),
- "can": mol.write("can").split()[0].strip(), ### tthis one works fine for both zinc and chembl (no ZINC code added after can descriptor string)
+ "can": mol.write("can").split()[0].strip(), # tthis one works fine for both zinc and chembl (no ZINC code added after can descriptor string)
"inchi": mol.write("inchi").strip(),
"inchi_key": get_inchikey(mol).strip(),
"rings": len(mol.sssr),
"atoms": mol.OBMol.NumHvyAtoms(),
- "spectrophore" : OBspectrophore(mol),
- }
+ "spectrophore": OBspectrophore(mol),
+ }
+
def get_inchikey(mol):
conv = openbabel.OBConversion()
conv.SetInAndOutFormats("mol", "inchi")
conv.SetOptions("K", conv.OUTOPTIONS)
- inchikey = conv.WriteString( mol.OBMol )
+ inchikey = conv.WriteString(mol.OBMol)
return inchikey
+
def OBspectrophore(mol):
spectrophore = pybel.ob.OBSpectrophore()
# Parameters: rotation angle = 20, normalization for mean and sd, accuracy = 3.0 A and non-stereospecific cages.
- spectrophore.SetNormalization( spectrophore.NormalizationTowardsZeroMeanAndUnitStd )
- return ', '.join( [ "%.3f" % value for value in spectrophore.GetSpectrophore( mol.OBMol ) ] )
+ spectrophore.SetNormalization(spectrophore.NormalizationTowardsZeroMeanAndUnitStd)
+ return ', '.join(["%.3f" % value for value in spectrophore.GetSpectrophore(mol.OBMol)])
+
-def squared_euclidean_distance(a, b):
- try:
- return ((np.asarray( a ) - np.asarray( b ))**2).sum()
- except ValueError:
- return 0
-
-def split_library( lib_path, lib_format = 'sdf', package_size = None ):
+def split_library(lib_path, lib_format='sdf', package_size=None):
"""
- Split a library of compounds. Usage: split_library( lib_path, lib_format, package_size )
- IT currently ONLY WORKS FOR SD-Files
+ Split a library of compounds. Usage: split_library(lib_path, lib_format, package_size)
+ IT currently ONLY WORKS FOR SD-Files
"""
pack = 1
mol_counter = 0
- outfile = open('/%s/%s_pack_%i.%s' % ( '/'.join(lib_path.split('/')[:-1]), lib_path.split('/')[-1].split('.')[0], pack, 'sdf'), 'w' )
+ outfile = open('/%s/%s_pack_%i.%s' % ('/'.join(lib_path.split('/')[:-1]), lib_path.split('/')[-1].split('.')[0], pack, 'sdf'), 'w')
for line in open(lib_path, 'r'):
- outfile.write( line )
+ outfile.write(line)
if line.strip() == '$$$$':
mol_counter += 1
if mol_counter % package_size == 0:
outfile.close()
pack += 1
- outfile = open('/%s/%s_pack_%i.%s' % ( '/'.join(lib_path.split('/')[:-1]), lib_path.split('/')[-1].split('.')[0], pack, 'sdf'), 'w' )
- if mol_counter*10 % package_size == 0:
- print('%i molecules parsed, starting pack nr. %i' % ( mol_counter, pack - 1 ))
+ outfile = open('/%s/%s_pack_%i.%s' % ('/'.join(lib_path.split('/')[:-1]), lib_path.split('/')[-1].split('.')[0], pack, 'sdf'), 'w')
+ if mol_counter * 10 % package_size == 0:
+ print('%i molecules parsed, starting pack nr. %i' % (mol_counter, pack - 1))
outfile.close()
return True
-def split_smi_library( smiles_file, structures_in_one_file ):
+
+def split_smi_library(smiles_file, structures_in_one_file):
"""
- Split a file with SMILES to several files for multiprocessing usage.
- Usage: split_smi_library( smiles_file, 10 )
+ Split a file with SMILES to several files for multiprocessing usage.
+ Usage: split_smi_library(smiles_file, 10)
"""
output_files = []
tfile = tempfile.NamedTemporaryFile(delete=False)
smiles_handle = open(smiles_file, 'r')
- for count, line in enumerate( smiles_handle ):
+ for count, line in enumerate(smiles_handle):
if count % structures_in_one_file == 0 and count != 0:
tfile.close()
output_files.append(tfile.name)
@@ -247,9 +255,9 @@
return output_files
-def mp_run(input_path, regex, PROCESSES, function_to_call ):
+def mp_run(input_path, regex, PROCESSES, function_to_call):
paths = []
- [ paths.append(compound_file) for compound_file in glob.glob(str(input_path) + str(regex)) ]
+ [paths.append(compound_file) for compound_file in glob.glob(str(input_path) + str(regex))]
paths.sort()
pool = Pool(processes=PROCESSES)
@@ -259,6 +267,6 @@
return paths
+
if __name__ == '__main__':
print(check_filetype(sys.argv[1]))
-
diff -r 3ecaa9634126 -r 2cd8aee0d830 distance_finder.py
--- a/distance_finder.py Tue Jul 28 08:39:22 2020 -0400
+++ b/distance_finder.py Mon Oct 19 14:42:21 2020 +0000
@@ -11,7 +11,9 @@
# a property named distance1 where the numeric part is the index (starting from 1) of the points (in that example
# there would be properties for distance1, distance2 and distance3.
-import argparse, os, sys, math
+import argparse
+import math
+import sys
from openbabel import pybel
@@ -30,7 +32,6 @@
:return:
"""
-
points = []
# read the points
@@ -41,7 +42,7 @@
p = line.split()
if len(p) == 3:
points.append((float(p[0]), float(p[1]), float(p[2])))
- log("Read points",p)
+ log("Read points", p)
continue
log("Failed to read line:", line)
log('Found', len(points), 'atom points')
@@ -56,7 +57,6 @@
try:
# print("Processing mol", mol.title)
-
clone = pybel.Molecule(mol)
clone.removeh()
@@ -82,7 +82,7 @@
sdf_writer.write(mol)
except Exception as e:
- log('Failed to handle molecule: '+ str(e))
+ log('Failed to handle molecule: ' + str(e))
continue
sdf_writer.close()
@@ -93,12 +93,10 @@
global work_dir
parser = argparse.ArgumentParser(description='XChem distances - measure distances to particular points')
-
parser.add_argument('-i', '--input', help="SDF containing the 3D molecules to score)")
parser.add_argument('-p', '--points', help="PDB format file with atoms")
parser.add_argument('-o', '--outfile', default='output.sdf', help="File name for results")
-
args = parser.parse_args()
log("XChem distances args: ", args)
diff -r 3ecaa9634126 -r 2cd8aee0d830 macros.xml
--- a/macros.xml Tue Jul 28 08:39:22 2020 -0400
+++ b/macros.xml Mon Oct 19 14:42:21 2020 +0000
@@ -18,6 +18,11 @@
help="Valid file types are: SDF, MOL, MOL2, CML, InChI, SMILES, and PDB"/>
+
+
+
+
diff -r 3ecaa9634126 -r 2cd8aee0d830 multi_obgrep.py
--- a/multi_obgrep.py Tue Jul 28 08:39:22 2020 -0400
+++ b/multi_obgrep.py Mon Oct 19 14:42:21 2020 +0000
@@ -4,40 +4,43 @@
Output: Molecule file filtered with obgrep.
Copyright 2013, Bjoern Gruening and Xavier Lucas
"""
-import sys, os
import argparse
import multiprocessing
-import tempfile
-import subprocess
-import shutil
+import os
import shlex
+import shutil
+import subprocess
+import tempfile
-from openbabel import openbabel, pybel
-openbabel.obErrorLog.StopLogging()
+
def parse_command_line():
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--infile', required=True, help='Molecule file.')
- parser.add_argument('-q', '--query', required=True, help='Query file, containing different SMARTS in each line.')
+ parser.add_argument('-q', '--query', required=True, help='Query file, containing different SMARTS in each line.')
parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.')
parser.add_argument("--iformat", help="Input format, like smi, sdf, inchi")
parser.add_argument("--n-times", dest="n_times", type=int,
- default=0, help="Print a molecule only if the pattern occurs # times inside the molecule.")
+ default=0, help="Print a molecule only if the pattern occurs # times inside the molecule.")
parser.add_argument('-p', '--processors', type=int, default=multiprocessing.cpu_count())
parser.add_argument("--invert-matches", dest="invert_matches", action="store_true",
- default=False, help="Invert the matching, print non-matching molecules.")
+ default=False, help="Invert the matching, print non-matching molecules.")
parser.add_argument("--only-name", dest="only_name", action="store_true",
- default=False, help="Only print the name of the molecules.")
+ default=False, help="Only print the name of the molecules.")
parser.add_argument("--full-match", dest="full_match", action="store_true",
- default=False, help="Full match, print matching-molecules only when the number of heavy atoms is also equal to the number of atoms in the SMARTS pattern.")
+ default=False, help="Full match, print matching-molecules only when the number of heavy atoms is also equal to the number of atoms in the SMARTS pattern.")
parser.add_argument("--number-of-matches", dest="number_of_matches", action="store_true",
- default=False, help="Print the number of matches.")
+ default=False, help="Print the number of matches.")
return parser.parse_args()
+
results = list()
+
+
def mp_callback(res):
results.append(res)
-def mp_helper( query, args ):
+
+def mp_helper(query, args):
"""
Helper function for multiprocessing.
That function is a wrapper around obgrep.
@@ -57,44 +60,44 @@
tmp = tempfile.NamedTemporaryFile(delete=False)
cmd = 'obgrep %s "%s" %s' % (' '.join(cmd_list), query, args.infile)
- child = subprocess.Popen(shlex.split(cmd),
- stdout=open(tmp.name, 'w+'), stderr=subprocess.PIPE)
+ child = subprocess.Popen(shlex.split(cmd), stdout=open(tmp.name, 'w+'), stderr=subprocess.PIPE)
stdout, stderr = child.communicate()
return (tmp.name, query)
-def obgrep( args ):
-
+def obgrep(args):
temp_file = tempfile.NamedTemporaryFile()
temp_link = "%s.%s" % (temp_file.name, args.iformat)
temp_file.close()
os.symlink(args.infile, temp_link)
args.infile = temp_link
- pool = multiprocessing.Pool( args.processors )
- for query in open( args.query ):
+ pool = multiprocessing.Pool(args.processors)
+ for query in open(args.query):
pool.apply_async(mp_helper, args=(query.strip(), args), callback=mp_callback)
- #mp_callback( mp_helper(query.strip(), args) )
+ # mp_callback(mp_helper(query.strip(), args))
pool.close()
pool.join()
- out_handle = open( args.outfile, 'wb' )
+ out_handle = open(args.outfile, 'wb')
for result_file, query in results:
- res_handle = open(result_file,'rb')
- shutil.copyfileobj( res_handle, out_handle )
+ res_handle = open(result_file, 'rb')
+ shutil.copyfileobj(res_handle, out_handle)
res_handle.close()
- os.remove( result_file )
+ os.remove(result_file)
out_handle.close()
- os.remove( temp_link )
+ os.remove(temp_link)
+
def __main__():
"""
Multiprocessing obgrep search.
"""
args = parse_command_line()
- obgrep( args )
+ obgrep(args)
+
-if __name__ == "__main__" :
+if __name__ == "__main__":
__main__()
diff -r 3ecaa9634126 -r 2cd8aee0d830 ob_addh.py
--- a/ob_addh.py Tue Jul 28 08:39:22 2020 -0400
+++ b/ob_addh.py Mon Oct 19 14:42:21 2020 +0000
@@ -3,21 +3,23 @@
Input: Molecule file
Output: Molecule file with hydrogen atoms added at the target pH.
"""
-import sys, os
import argparse
+import sys
from openbabel import openbabel, pybel
openbabel.obErrorLog.StopLogging()
+
def parse_command_line(argv):
parser = argparse.ArgumentParser()
- parser.add_argument('--iformat', type=str, default='sdf' , help='input file format')
+ parser.add_argument('--iformat', type=str, default='sdf', help='input file format')
parser.add_argument('-i', '--input', type=str, required=True, help='input file name')
parser.add_argument('-o', '--output', type=str, required=True, help='output file name')
parser.add_argument('--polar', action="store_true", default=False, help='Add hydrogen atoms only to polar atoms')
parser.add_argument('--pH', type=float, default="7.4", help='Specify target pH value')
return parser.parse_args()
+
def addh(args):
outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True)
for mol in pybel.readfile(args.iformat, args.input):
@@ -27,6 +29,7 @@
outfile.write(mol)
outfile.close()
+
def __main__():
"""
Add hydrogen atoms at a certain pH value
@@ -34,5 +37,6 @@
args = parse_command_line(sys.argv)
addh(args)
-if __name__ == "__main__" :
+
+if __name__ == "__main__":
__main__()
diff -r 3ecaa9634126 -r 2cd8aee0d830 ob_filter.py
--- a/ob_filter.py Tue Jul 28 08:39:22 2020 -0400
+++ b/ob_filter.py Mon Oct 19 14:42:21 2020 +0000
@@ -6,35 +6,32 @@
TODO: AND/OR conditions?
"""
-import sys, os
import argparse
+import json
+import shlex
+import subprocess
+import sys
+
import cheminfolib
-import json
-import shlex, subprocess
-
from openbabel import pybel
cheminfolib.pybel_stop_logging()
+
def parse_command_line():
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input', help='Input file name')
parser.add_argument('-iformat', help='Input file format')
- parser.add_argument('-oformat',
- default='smi',
- help='Output file format')
- parser.add_argument('-o', '--output',
- help='Output file name',
- required=True)
- parser.add_argument('--filters',
- help="Specify the filters to apply",
- required=True,
- )
- parser.add_argument('--list_of_names',
- help="A file with list of molecule names to extract. Every name is in one line.",
- required=False,
- )
+ parser.add_argument('-oformat', default='smi',
+ help='Output file format')
+ parser.add_argument('-o', '--output', help='Output file name',
+ required=True)
+ parser.add_argument('--filters', help="Specify the filters to apply",
+ required=True)
+ parser.add_argument('--list_of_names', required=False,
+ help="A file with list of molecule names to extract. Every name is in one line.")
return parser.parse_args()
+
def filter_precalculated_compounds(args, filters):
outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True)
for mol in pybel.readfile('sdf', args.input):
@@ -53,6 +50,7 @@
outfile.write(mol)
outfile.close()
+
def filter_new_compounds(args, filters):
if args.iformat == args.oformat:
@@ -70,10 +68,9 @@
filter_cmd += ' %s>=%s %s<=%s ' % (ob_descriptor_name, min, ob_descriptor_name, max)
args = shlex.split('%s "%s"' % (cmd, filter_cmd))
- #print '%s "%s"' % (cmd, filter_cmd)
+ # print '%s "%s"' % (cmd, filter_cmd)
# calling openbabel with subprocess and pipe potential errors occuring in openbabel to stdout
- child = subprocess.Popen(args,
- stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ child = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = child.communicate()
return_code = child.returncode
@@ -87,6 +84,7 @@
sys.stdout.write(stdout.decode('utf-8'))
sys.stdout.write(stderr.decode('utf-8'))
+
def filter_by_name(args):
outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True)
for mol in pybel.readfile('sdf', args.input):
@@ -95,16 +93,17 @@
outfile.write(mol)
outfile.close()
+
def __main__():
"""
Select compounds with certain properties from a small library
"""
args = parse_command_line()
-
+
if args.filters == '__filter_by_name__':
filter_by_name(args)
return
-
+
# Its a small trick to get the parameters in an easy way from the xml file.
# To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed.
# Also the last loop creates a ',{' that is not an valid jason expression.
@@ -114,7 +113,7 @@
mol = next(pybel.readfile('sdf', args.input))
for key, elem in filters.items():
property = cheminfolib.ColumnNames.get(key, key)
- if not property in mol.data:
+ if property not in mol.data:
break
else:
# if the for loop finishes in a normal way, we should habe all properties at least in the first molecule
@@ -124,5 +123,5 @@
filter_new_compounds(args, filters)
-if __name__ == "__main__" :
+if __name__ == "__main__":
__main__()
diff -r 3ecaa9634126 -r 2cd8aee0d830 ob_genProp.py
--- a/ob_genProp.py Tue Jul 28 08:39:22 2020 -0400
+++ b/ob_genProp.py Mon Oct 19 14:42:21 2020 +0000
@@ -4,23 +4,25 @@
Output: Physico-chemical properties are computed and stored as metadata in the sdf output file.
Copyright 2012, Bjoern Gruening and Xavier Lucas
"""
-import sys, os
import argparse
+import sys
+
+import cheminfolib
import openbabel
+from openbabel import pybel
openbabel.obErrorLog.StopLogging()
-import cheminfolib
-from openbabel import pybel
def parse_command_line(argv):
parser = argparse.ArgumentParser()
- parser.add_argument('--iformat', default='sdf' , help='input file format')
+ parser.add_argument('--iformat', default='sdf', help='input file format')
parser.add_argument('-i', '--input', required=True, help='input file name')
- parser.add_argument('--oformat', default='sdf', choices = ['sdf', 'table'] , help='output file format')
+ parser.add_argument('--oformat', default='sdf', choices=['sdf', 'table'], help='output file format')
parser.add_argument('--header', type=bool, help='Include the header as the first line of the output table')
parser.add_argument('-o', '--output', required=True, help='output file name')
return parser.parse_args()
+
def compute_properties(args):
if args.oformat == 'sdf':
outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True)
@@ -29,18 +31,19 @@
if args.header:
mol = next(pybel.readfile(args.iformat, args.input))
metadata = cheminfolib.get_properties_ext(mol)
- outfile.write( '%s\n' % '\t'.join( [ cheminfolib.ColumnNames[key] for key in metadata ] ) )
+ outfile.write('%s\n' % '\t'.join([cheminfolib.ColumnNames[key] for key in metadata]))
for mol in pybel.readfile(args.iformat, args.input):
if mol.OBMol.NumHvyAtoms() > 5:
metadata = cheminfolib.get_properties_ext(mol)
if args.oformat == 'sdf':
- [ mol.data.update( { cheminfolib.ColumnNames[key] : metadata[key] } ) for key in metadata ]
+ [mol.data.update({cheminfolib.ColumnNames[key]: metadata[key]}) for key in metadata]
outfile.write(mol)
else:
- outfile.write( '%s\n' % ('\t'.join( [ str(metadata[key]) for key in metadata ] ) ) )
+ outfile.write('%s\n' % ('\t'.join([str(metadata[key]) for key in metadata])))
outfile.close()
+
def __main__():
"""
Physico-chemical properties are computed and stored as metadata in the sdf output file
@@ -48,5 +51,6 @@
args = parse_command_line(sys.argv)
compute_properties(args)
-if __name__ == "__main__" :
+
+if __name__ == "__main__":
__main__()
diff -r 3ecaa9634126 -r 2cd8aee0d830 ob_remIons.py
--- a/ob_remIons.py Tue Jul 28 08:39:22 2020 -0400
+++ b/ob_remIons.py Mon Oct 19 14:42:21 2020 +0000
@@ -4,29 +4,33 @@
Output: Molecule file with removed ions and fragments.
Copyright 2012, Bjoern Gruening and Xavier Lucas
"""
-import sys, os
import argparse
from openbabel import openbabel, pybel
openbabel.obErrorLog.StopLogging()
+
def parse_command_line():
parser = argparse.ArgumentParser()
- parser.add_argument('-iformat', default='sdf' , help='input file format')
+ parser.add_argument('-iformat', default='sdf', help='input file format')
parser.add_argument('-i', '--input', required=True, help='input file name')
parser.add_argument('-o', '--output', required=True, help='output file name')
return parser.parse_args()
+
def remove_ions(args):
outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True)
for mol in pybel.readfile(args.iformat, args.input):
if mol.OBMol.NumHvyAtoms() > 5:
mol.OBMol.StripSalts(0)
+ if 'inchi' in mol.data:
+ del mol.data['inchi'] # remove inchi cache so modified mol is saved
# Check if new small fragments have been created and remove them
if mol.OBMol.NumHvyAtoms() > 5:
outfile.write(mol)
outfile.close()
+
def __main__():
"""
Remove any counterion and delete any fragment but the largest one for each molecule.
@@ -34,5 +38,6 @@
args = parse_command_line()
remove_ions(args)
-if __name__ == "__main__" :
+
+if __name__ == "__main__":
__main__()
diff -r 3ecaa9634126 -r 2cd8aee0d830 ob_spectrophore_search.py
--- a/ob_spectrophore_search.py Tue Jul 28 08:39:22 2020 -0400
+++ b/ob_spectrophore_search.py Mon Oct 19 14:42:21 2020 +0000
@@ -4,18 +4,17 @@
Output: parse the target file using the same protocol used to generate the databases in our servers. Physico-chemical properties are computed and stored as metadata in the sdf output file.
Copyright 2012, Bjoern Gruening and Xavier Lucas
"""
-import sys, os
import argparse
-import math
+
import numpy as np
-
from openbabel import openbabel, pybel
openbabel.obErrorLog.StopLogging()
-#TODO get rid of eval()
+# TODO get rid of eval()
global spectrophore
spectrophore = pybel.ob.OBSpectrophore()
+
def parse_command_line():
parser = argparse.ArgumentParser()
parser.add_argument('--target', required=True, help='target file name in sdf format with Spectrophores(TM) descriptors stored as meta-data')
@@ -28,26 +27,29 @@
parser.add_argument('-r', '--resolution', type=float, default="3.0", help='Resolution')
return parser.parse_args()
+
def set_parameters(args):
if args.normalization == 'No':
- spectrophore.SetNormalization( spectrophore.NoNormalization )
+ spectrophore.SetNormalization(spectrophore.NoNormalization)
else:
- spectrophore.SetNormalization( eval('spectrophore.NormalizationTowards' + args.normalization) )
- spectrophore.SetAccuracy( eval('spectrophore.AngStepSize' + args.accuracy) )
- spectrophore.SetStereo( eval('spectrophore.' + args.stereo + 'StereoSpecificProbes') )
- spectrophore.SetResolution( args.resolution )
+ spectrophore.SetNormalization(eval('spectrophore.NormalizationTowards' + args.normalization))
+ spectrophore.SetAccuracy(eval('spectrophore.AngStepSize' + args.accuracy))
+ spectrophore.SetStereo(eval('spectrophore.' + args.stereo + 'StereoSpecificProbes'))
+ spectrophore.SetResolution(args.resolution)
return True
+
def Compute_Spectrophores_distance(target_spectrophore, args):
outfile = open(args.output, 'w')
for mol in open(args.library, 'r'):
try:
- distance = ( ( np.asarray( target_spectrophore, dtype=float ) - np.asarray( mol.split('\t')[ args.column - 1 ].strip().split(', '), dtype=float) )**2).sum()
+ distance = ((np.asarray(target_spectrophore, dtype=float) - np.asarray(mol.split('\t')[args.column - 1].strip().split(', '), dtype=float))**2).sum()
except ValueError:
distance = 0
- outfile.write( '%s\t%f\n' % (mol.strip(), distance ) )
+ outfile.write('%s\t%f\n' % (mol.strip(), distance))
outfile.close()
+
def __main__():
"""
Computation of Spectrophores(TM) distances to a target molecule.
@@ -59,7 +61,8 @@
mol = next(pybel.readfile('sdf', args.target))
target_spectrophore = mol.data["Spectrophores(TM)"].strip().split(', ')
# Compute the paired-distance between every molecule in the library and the target
- distances = Compute_Spectrophores_distance(target_spectrophore, args)
+ Compute_Spectrophores_distance(target_spectrophore, args)
+
-if __name__ == "__main__" :
+if __name__ == "__main__":
__main__()
diff -r 3ecaa9634126 -r 2cd8aee0d830 remove_protonation_state.py
--- a/remove_protonation_state.py Tue Jul 28 08:39:22 2020 -0400
+++ b/remove_protonation_state.py Mon Oct 19 14:42:21 2020 +0000
@@ -4,32 +4,37 @@
Output: Molecule file with removed ions and fragments.
Copyright 2013, Bjoern Gruening and Xavier Lucas
"""
-import sys, os
import argparse
from openbabel import openbabel, pybel
openbabel.obErrorLog.StopLogging()
+
def parse_command_line():
parser = argparse.ArgumentParser()
- parser.add_argument('--iformat', default='sdf' , help='input file format')
+ parser.add_argument('--iformat', default='sdf', help='input file format')
parser.add_argument('-i', '--input', required=True, help='input file name')
parser.add_argument('-o', '--output', required=True, help='output file name')
return parser.parse_args()
-def remove_protonation( args ):
+
+def remove_protonation(args):
outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True)
for mol in pybel.readfile(args.iformat, args.input):
[atom.OBAtom.SetFormalCharge(0) for atom in mol.atoms]
- outfile.write( mol )
+ if 'inchi' in mol.data:
+ del mol.data['inchi'] # remove inchi cache so modified mol is saved
+ outfile.write(mol)
outfile.close()
+
def __main__():
"""
Remove any protonation state from each atom in each molecule.
"""
args = parse_command_line()
- remove_protonation( args )
+ remove_protonation(args)
+
-if __name__ == "__main__" :
+if __name__ == "__main__":
__main__()
diff -r 3ecaa9634126 -r 2cd8aee0d830 subsearch.py
--- a/subsearch.py Tue Jul 28 08:39:22 2020 -0400
+++ b/subsearch.py Mon Oct 19 14:42:21 2020 +0000
@@ -4,36 +4,41 @@
Output: Moleculs filtered with specified substructures.
Copyright 2013, Bjoern Gruening and Xavier Lucas
"""
-import sys, os
import argparse
import multiprocessing
-import tempfile
+import os
+import shutil
import subprocess
-import shutil
+import sys
+import tempfile
from openbabel import openbabel, pybel
openbabel.obErrorLog.StopLogging()
+
def parse_command_line():
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--infile', required=True, help='Molecule file.')
parser.add_argument('--iformat', help='Input format.')
- parser.add_argument('--fastsearch-index', dest="fastsearch_index",
- required=True, help='Path to the openbabel fastsearch index.')
+ parser.add_argument('--fastsearch-index', dest="fastsearch_index", required=True,
+ help='Path to the openbabel fastsearch index.')
parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.')
- parser.add_argument('--oformat',
- default='smi', help='Output file format')
- parser.add_argument("--max-candidates", dest="max_candidates", type=int,
- default=4000, help="The maximum number of candidates.")
- parser.add_argument('-p', '--processors', type=int,
- default=multiprocessing.cpu_count())
+ parser.add_argument('--oformat', default='smi', help='Output file format')
+ parser.add_argument("--max-candidates", dest="max_candidates", type=int, default=4000,
+ help="The maximum number of candidates.")
+ parser.add_argument('-p', '--processors', type=int,
+ default=multiprocessing.cpu_count())
return parser.parse_args()
+
results = list()
+
+
def mp_callback(res):
results.append(res)
-def mp_helper( query, args ):
+
+def mp_helper(query, args):
"""
Helper function for multiprocessing.
That function is a wrapper around the following command:
@@ -48,8 +53,7 @@
tmp = tempfile.NamedTemporaryFile(delete=False)
cmd = 'obabel -ifs %s -O %s %s -s%s -al %s' % (args.fastsearch_index, tmp.name, opts, query, args.max_candidates)
- child = subprocess.Popen(cmd.split(),
- stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ child = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = child.communicate()
return_code = child.returncode
@@ -65,43 +69,43 @@
return (tmp.name, query)
-def get_smiles_or_smarts( args ):
+def get_smiles_or_smarts(args):
"""
Wrapper to retrieve a striped SMILES or SMARTS string from different input formats.
"""
if args.iformat in ['smi', 'text', 'tabular']:
- with open( args.infile ) as text_file:
+ with open(args.infile) as text_file:
for line in text_file:
yield line.split('\t')[0].strip()
else:
# inchi or sdf files
- for mol in pybel.readfile( args.iformat, args.infile ):
+ for mol in pybel.readfile(args.iformat, args.infile):
yield mol.write('smiles').split('\t')[0]
-def substructure_search( args ):
- pool = multiprocessing.Pool( args.processors )
- for query in get_smiles_or_smarts( args ):
+def substructure_search(args):
+ pool = multiprocessing.Pool(args.processors)
+ for query in get_smiles_or_smarts(args):
pool.apply_async(mp_helper, args=(query, args), callback=mp_callback)
- #mp_callback( mp_helper(query, args) )
+ # mp_callback(mp_helper(query, args))
pool.close()
pool.join()
if args.oformat == 'names':
- out_handle = open( args.outfile, 'w' )
+ out_handle = open(args.outfile, 'w')
for result_file, query in results:
with open(result_file) as res_handle:
for line in res_handle:
- out_handle.write('%s\t%s\n' % ( line.strip(), query ))
- os.remove( result_file )
+ out_handle.write('%s\t%s\n' % (line.strip(), query))
+ os.remove(result_file)
out_handle.close()
else:
- out_handle = open( args.outfile, 'wb' )
+ out_handle = open(args.outfile, 'wb')
for result_file, query in results:
- res_handle = open(result_file,'rb')
- shutil.copyfileobj( res_handle, out_handle )
+ res_handle = open(result_file, 'rb')
+ shutil.copyfileobj(res_handle, out_handle)
res_handle.close()
- os.remove( result_file )
+ os.remove(result_file)
out_handle.close()
@@ -110,7 +114,8 @@
Multiprocessing Open Babel Substructure Search.
"""
args = parse_command_line()
- substructure_search( args )
+ substructure_search(args)
+
-if __name__ == "__main__" :
+if __name__ == "__main__":
__main__()
diff -r 3ecaa9634126 -r 2cd8aee0d830 test-data/na-sal.inchi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/na-sal.inchi Mon Oct 19 14:42:21 2020 +0000
@@ -0,0 +1,1 @@
+InChI=1S/C7H6O3.Na/c8-6-4-2-1-3-5(6)7(9)10;/h1-4,8H,(H,9,10);/q;+1/p-1
diff -r 3ecaa9634126 -r 2cd8aee0d830 test-data/na-sal_obrmions.inchi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/na-sal_obrmions.inchi Mon Oct 19 14:42:21 2020 +0000
@@ -0,0 +1,1 @@
+InChI=1S/C7H6O3/c8-6-4-2-1-3-5(6)7(9)10/h1-4,8H,(H,9,10)/p-1
diff -r 3ecaa9634126 -r 2cd8aee0d830 test-data/ob_remove_protonation_state.inchi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ob_remove_protonation_state.inchi Mon Oct 19 14:42:21 2020 +0000
@@ -0,0 +1,1 @@
+InChI=1S/C7H5O3.Na/c8-6-4-2-1-3-5(6)7(9)10;/h1-4H,(H,9,10);