openbabel_svg_depiction: cheminfolib.py comparison

comparison cheminfolib.py @ 15:75ee9c410951 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit d9c51279c061a1da948a2582d5b502ca7573adbf

author	bgruening
date	Thu, 15 Aug 2024 11:02:03 +0000
parents	a66827fc370d
children

comparison

equal deleted inserted replaced

-:7672039a0bb0
+:75ee9c410951
 import subprocess
 import sys
 import tempfile
 from multiprocessing import Pool
 try:
 from galaxy import eggs
-eggs.require('psycopg2')
+eggs.require("psycopg2")
 except ImportError:
 psycopg2 = None
-print('psycopg2 is not available. It is currently used in the pgchem wrappers, that are not shipped with default CTB')
+print(
+"psycopg2 is not available. It is currently used in the pgchem wrappers, that are not shipped with default CTB"
+)
 try:
 from openbabel import openbabel, pybel
 openbabel.obErrorLog.StopLogging()
 except ImportError:
 openbabel, pybel = None, None
-print('OpenBabel could not be found. A few functions are not available without OpenBabel.')
+print(
+"OpenBabel could not be found. A few functions are not available without OpenBabel."
+)
 def CountLines(path):
-out = subprocess.Popen(['wc', '-l', path],
+out = subprocess.Popen(
-stdout=subprocess.PIPE,
+["wc", "-l", path], stdout=subprocess.PIPE, stderr=subprocess.STDOUT
-stderr=subprocess.STDOUT
+).communicate()[0]
-).communicate()[0]
+return int(out.partition(b" ")[0])
-return int(out.partition(b' ')[0])
 def grep(pattern, file_obj):
 grepper = re.compile(pattern)
 for line in file_obj:
 mol = False
 possible_inchi = True
 for line_counter, line in enumerate(open(filepath)):
 if line_counter > 10000:
 break
-if line.find('$$$$') != -1:
+if line.find("$$$$") != -1:
-return 'sdf'
+return "sdf"
-elif line.find('@<TRIPOS>MOLECULE') != -1:
+elif line.find("@<TRIPOS>MOLECULE") != -1:
-return 'mol2'
+return "mol2"
-elif line.find('ligand id') != -1:
+elif line.find("ligand id") != -1:
-return 'drf'
+return "drf"
-elif possible_inchi and re.findall('^InChI=', line):
+elif possible_inchi and re.findall("^InChI=", line):
-return 'inchi'
+return "inchi"
-elif re.findall(r'^M\s+END', line):
+elif re.findall(r"^M\s+END", line):
 mol = True
 # first line is not an InChI, so it can't be an InChI file
 possible_inchi = False
 if mol:
 # END can occures before $$$$, so and SDF file will
 # be recognised as mol, if you not using this hack'
-return 'mol'
+return "mol"
-return 'smi'
+return "smi"
 def db_connect(args):
 try:
-db_conn = psycopg2.connect("dbname=%s user=%s host=%s password=%s" % (args.dbname, args.dbuser, args.dbhost, args.dbpasswd))
+db_conn = psycopg2.connect(
+"dbname=%s user=%s host=%s password=%s"
+% (args.dbname, args.dbuser, args.dbhost, args.dbpasswd)
+)
 return db_conn
 except psycopg2.Error:
-sys.exit('Unable to connect to the db')
+sys.exit("Unable to connect to the db")
 ColumnNames = {
-'can_smiles': 'Canonical SMILES',
+"can_smiles": "Canonical SMILES",
-'can': 'Canonical SMILES',
+"can": "Canonical SMILES",
-'inchi': 'InChI',
+"inchi": "InChI",
-'inchi_key': 'InChI key',
+"inchi_key": "InChI key",
-'inchi_key_first': 'InChI key first',
+"inchi_key_first": "InChI key first",
-'inchi_key_last': 'InChI key last',
+"inchi_key_last": "InChI key last",
-'molwt': 'Molecular weight',
+"molwt": "Molecular weight",
-'hbd': 'Hydrogen-bond donors',
+"hbd": "Hydrogen-bond donors",
-'donors': 'Hydrogen-bond donors',
+"donors": "Hydrogen-bond donors",
-'hba': 'Hydrogen-bond acceptors',
+"hba": "Hydrogen-bond acceptors",
-'acceptors': 'Hydrogen-bond acceptors',
+"acceptors": "Hydrogen-bond acceptors",
-'rotbonds': 'Rotatable bonds',
+"rotbonds": "Rotatable bonds",
-'logp': 'logP',
+"logp": "logP",
-'psa': 'Polar surface area',
+"psa": "Polar surface area",
-'mr': 'Molecular refractivity',
+"mr": "Molecular refractivity",
-'atoms': 'Number of heavy atoms',
+"atoms": "Number of heavy atoms",
-'rings': 'Number of rings',
+"rings": "Number of rings",
-'set_bits': 'FP2 bits',
+"set_bits": "FP2 bits",
-'id': 'Internal identifier',
+"id": "Internal identifier",
-'tani': 'Tanimoto coefficient',
+"tani": "Tanimoto coefficient",
-'spectrophore': 'Spectrophores(TM)',
+"spectrophore": "Spectrophores(TM)",
-'dist_spectrophore': 'Spectrophores(TM) distance to target',
+"dist_spectrophore": "Spectrophores(TM) distance to target",
-'synonym': 'Entry id',
+"synonym": "Entry id",
 }
 OBDescriptor = {
-'atoms': ["atoms", "Number of atoms"],
+"atoms": ["atoms", "Number of atoms"],
-'hatoms': ["hatoms", "Number of heavy atoms"],  # self defined tag hatoms in plugindefines.txt
+"hatoms": [
-'can_smiles': ["cansmi", "Canonical SMILES"],
+"hatoms",
-'can_smilesNS': ["cansmiNS", "Canonical SMILES without isotopes or stereo"],
+"Number of heavy atoms",
+],  # self defined tag hatoms in plugindefines.txt
+"can_smiles": ["cansmi", "Canonical SMILES"],
+"can_smilesNS": ["cansmiNS", "Canonical SMILES without isotopes or stereo"],
 # ["abonds", "Number of aromatic bonds"],
 # ["bonds", "Number of bonds"],
 # ["dbonds", "Number of double bonds"],
 # ["formula", "Chemical formula"],
-'hba': ["HBA1", "Number of Hydrogen Bond Acceptors 1 (JoelLib)"],
+"hba": ["HBA1", "Number of Hydrogen Bond Acceptors 1 (JoelLib)"],
-'hba2': ["HBA2", "Number of Hydrogen Bond Acceptors 2 (JoelLib)"],
+"hba2": ["HBA2", "Number of Hydrogen Bond Acceptors 2 (JoelLib)"],
-'hbd': ["HBD", "Number of Hydrogen Bond Donors (JoelLib)"],
+"hbd": ["HBD", "Number of Hydrogen Bond Donors (JoelLib)"],
-'inchi': ["InChI", "IUPAC InChI identifier"],
+"inchi": ["InChI", "IUPAC InChI identifier"],
-'inchi_key': ["InChIKey", "InChIKey"],
+"inchi_key": ["InChIKey", "InChIKey"],
 # ["L5", "Lipinski Rule of Five"],
-'logp': ["logP", "octanol/water partition coefficient"],
+"logp": ["logP", "octanol/water partition coefficient"],
-'mr': ["MR", "molar refractivity"],
+"mr": ["MR", "molar refractivity"],
-'molwt': ["MW", "Molecular Weight filter"],
+"molwt": ["MW", "Molecular Weight filter"],
 # ["nF", "Number of Fluorine Atoms"],
 # ["s", "SMARTS filter"],
 # ["sbonds", "Number of single bonds"],
 # ["smarts", "SMARTS filter"],
 # ["tbonds", "Number of triple bonds"],
 # ["title", "For comparing a molecule's title"],
-'psa': ["TPSA", "topological polar surface area"],
+"psa": ["TPSA", "topological polar surface area"],
-'rotbonds': ['ROTATABLE_BOND', 'rotatable bonds'],
+"rotbonds": ["ROTATABLE_BOND", "rotatable bonds"],
 }
 def print_output(args, rows):
-if args.oformat == 'table':
+if args.oformat == "table":
-outfile = open(args.output, 'w')
+outfile = open(args.output, "w")
-requested_fields = (filter(lambda x: x not in ["[", "]", "'"], args.fetch)).split(', ')
+requested_fields = (
+filter(lambda x: x not in ["[", "]", "'"], args.fetch)
+).split(", ")
 if args.header:
-outfile.write('Identifier\t' + '\t'.join([ColumnNames[key] for key in requested_fields]) + '\n')
+outfile.write(
+"Identifier\t"
++ "\t".join([ColumnNames[key] for key in requested_fields])
++ "\n"
+)
 for row in rows:
-outfile.write(row['synonym'] + '\t' + '\t'.join([str(row[key]) for key in requested_fields]) + '\n')
+outfile.write(
+row["synonym"]
-elif args.oformat in ['sdf', 'mol2']:
++ "\t"
++ "\t".join([str(row[key]) for key in requested_fields])
++ "\n"
+)
+elif args.oformat in ["sdf", "mol2"]:
 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True)
 for row in rows:
 try:
-mol = pybel.readstring('sdf', row['mol'])
+mol = pybel.readstring("sdf", row["mol"])
-if args.oformat == 'sdf':
+if args.oformat == "sdf":
-keys = filter(lambda x: x not in ["[", "]", "'"], args.fetch).split(', ')
+keys = filter(lambda x: x not in ["[", "]", "'"], args.fetch).split(
-mol.data.update({ColumnNames['synonym']: row['synonym']})
+", "
-if 'inchi_key' in keys:
+)
-keys = (', '.join(keys).replace("inchi_key", "inchi_key_first, inchi_key_last")).split(', ')
+mol.data.update({ColumnNames["synonym"]: row["synonym"]})
-[mol.data.update({ColumnNames[key]: row[key]}) for key in keys if key]
+if "inchi_key" in keys:
+keys = (
+", ".join(keys).replace(
+"inchi_key", "inchi_key_first, inchi_key_last"
+)
+).split(", ")
+[
+mol.data.update({ColumnNames[key]: row[key]})
+for key in keys
+if key
+]
 outfile.write(mol)
 except OSError:
 pass
 else:
-outfile = open(args.output, 'w')
+outfile = open(args.output, "w")
-outfile.write('\n'.join(['%s\t%s' % (row[args.oformat], row['synonym']) for row in rows]))
+outfile.write(
+"\n".join(["%s\t%s" % (row[args.oformat], row["synonym"]) for row in rows])
+)
 outfile.close()
 def pybel_stop_logging():
 openbabel.obErrorLog.StopLogging()
 def get_properties_ext(mol):
 HBD = pybel.Smarts("[!#6;!H0]")
-HBA = pybel.Smarts(("[$([$([#8,#16]);!$(*=N~O);"
+HBA = pybel.Smarts(
-"!$(*~N=O);X1,X2]),$([#7;v3;"
+(
-"!$([nH]);!$(*(-a)-a)])]"
+"[$([$([#8,#16]);!$(*=N~O);"
-))
+"!$(*~N=O);X1,X2]),$([#7;v3;"
+"!$([nH]);!$(*(-a)-a)])]"
+)
+)
 calc_desc_dict = mol.calcdesc()
 try:
-logp = calc_desc_dict['logP']
+logp = calc_desc_dict["logP"]
 except KeyError:
-logp = calc_desc_dict['LogP']
+logp = calc_desc_dict["LogP"]
-return {"molwt": mol.molwt,
+return {
-"logp": logp,
+"molwt": mol.molwt,
-"donors": len(HBD.findall(mol)),
+"logp": logp,
-"acceptors": len(HBA.findall(mol)),
+"donors": len(HBD.findall(mol)),
-"psa": calc_desc_dict['TPSA'],
+"acceptors": len(HBA.findall(mol)),
-"mr": calc_desc_dict['MR'],
+"psa": calc_desc_dict["TPSA"],
-"rotbonds": mol.OBMol.NumRotors(),
+"mr": calc_desc_dict["MR"],
-"can": mol.write("can").split()[0].strip(),  # tthis one works fine for both zinc and chembl (no ZINC code added after can descriptor string)
+"rotbonds": mol.OBMol.NumRotors(),
-"inchi": mol.write("inchi").strip(),
+"can": mol.write("can")
-"inchi_key": get_inchikey(mol).strip(),
+.split()[0]
-"rings": len(mol.sssr),
+.strip(),  # tthis one works fine for both zinc and chembl (no ZINC code added after can descriptor string)
-"atoms": mol.OBMol.NumHvyAtoms(),
+"inchi": mol.write("inchi").strip(),
-"spectrophore": OBspectrophore(mol),
+"inchi_key": get_inchikey(mol).strip(),
-}
+"rings": len(mol.sssr),
+"atoms": mol.OBMol.NumHvyAtoms(),
+"spectrophore": OBspectrophore(mol),
+}
 def get_inchikey(mol):
 conv = openbabel.OBConversion()
 conv.SetInAndOutFormats("mol", "inchi")
 def OBspectrophore(mol):
 spectrophore = pybel.ob.OBSpectrophore()
 # Parameters: rotation angle = 20, normalization for mean and sd, accuracy = 3.0 A and non-stereospecific cages.
 spectrophore.SetNormalization(spectrophore.NormalizationTowardsZeroMeanAndUnitStd)
-return ', '.join(["%.3f" % value for value in spectrophore.GetSpectrophore(mol.OBMol)])
+return ", ".join(
+["%.3f" % value for value in spectrophore.GetSpectrophore(mol.OBMol)]
+)
-def split_library(lib_path, lib_format='sdf', package_size=None):
+def split_library(lib_path, lib_format="sdf", package_size=None):
 """
 Split a library of compounds. Usage: split_library(lib_path, lib_format, package_size)
 IT currently ONLY WORKS FOR SD-Files
 """
 pack = 1
 mol_counter = 0
-outfile = open('/%s/%s_pack_%i.%s' % ('/'.join(lib_path.split('/')[:-1]), lib_path.split('/')[-1].split('.')[0], pack, 'sdf'), 'w')
+outfile = open(
+"/%s/%s_pack_%i.%s"
-for line in open(lib_path, 'r'):
+% (
+"/".join(lib_path.split("/")[:-1]),
+lib_path.split("/")[-1].split(".")[0],
+pack,
+"sdf",
+),
+"w",
+)
+for line in open(lib_path, "r"):
 outfile.write(line)
-if line.strip() == '$$$$':
+if line.strip() == "$$$$":
 mol_counter += 1
 if mol_counter % package_size == 0:
 outfile.close()
 pack += 1
-outfile = open('/%s/%s_pack_%i.%s' % ('/'.join(lib_path.split('/')[:-1]), lib_path.split('/')[-1].split('.')[0], pack, 'sdf'), 'w')
+outfile = open(
+"/%s/%s_pack_%i.%s"
+% (
+"/".join(lib_path.split("/")[:-1]),
+lib_path.split("/")[-1].split(".")[0],
+pack,
+"sdf",
+),
+"w",
+)
 if mol_counter * 10 % package_size == 0:
-print('%i molecules parsed, starting pack nr. %i' % (mol_counter, pack - 1))
+print(
+"%i molecules parsed, starting pack nr. %i"
+% (mol_counter, pack - 1)
+)
 outfile.close()
 return True
 Usage: split_smi_library(smiles_file, 10)
 """
 output_files = []
 tfile = tempfile.NamedTemporaryFile(delete=False)
-smiles_handle = open(smiles_file, 'r')
+smiles_handle = open(smiles_file, "r")
 for count, line in enumerate(smiles_handle):
 if count % structures_in_one_file == 0 and count != 0:
 tfile.close()
 output_files.append(tfile.name)
 tfile = tempfile.NamedTemporaryFile(delete=False)
 return output_files
 def mp_run(input_path, regex, PROCESSES, function_to_call):
 paths = []
-[paths.append(compound_file) for compound_file in glob.glob(str(input_path) + str(regex))]
+[
+paths.append(compound_file)
+for compound_file in glob.glob(str(input_path) + str(regex))
+]
 paths.sort()
 pool = Pool(processes=PROCESSES)
-print('Process initialized with', PROCESSES, 'processors')
+print("Process initialized with", PROCESSES, "processors")
 result = pool.map_async(function_to_call, paths)
 result.get()
 return paths
-if __name__ == '__main__':
+if __name__ == "__main__":
 print(check_filetype(sys.argv[1]))

Mercurial > repos > bgruening > openbabel_svg_depiction

comparison cheminfolib.py @ 15:75ee9c410951 draft default tip