Previous changeset 12:d3b48303045b (2020-07-28) Next changeset 14:7672039a0bb0 (2020-11-10) |
Commit message:
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 1fe240ef0064a1a4a66d9be1ccace53824280b75" |
modified:
change_title_to_metadata_value.py cheminfolib.py distance_finder.py macros.xml multi_obgrep.py ob_addh.py ob_filter.py ob_genProp.py ob_remIons.py ob_spectrophore_search.py remove_protonation_state.py subsearch.py |
added:
test-data/na-sal.inchi test-data/na-sal_obrmions.inchi test-data/ob_remove_protonation_state.inchi |
b |
diff -r d3b48303045b -r a66827fc370d change_title_to_metadata_value.py --- a/change_title_to_metadata_value.py Tue Jul 28 08:35:16 2020 -0400 +++ b/change_title_to_metadata_value.py Mon Oct 19 14:45:37 2020 +0000 |
b |
@@ -6,29 +6,27 @@ value of a given-id of the same molecule file. """ -import os -import sys import argparse import random import string - from openbabel import openbabel, pybel openbabel.obErrorLog.StopLogging() + def main(): parser = argparse.ArgumentParser( description="Change the title from a molecule file to metadata \ -value of a given-id of the same molecule file.", + value of a given-id of the same molecule file.", ) - parser.add_argument('--infile', '-i', - required=True, help="path to the input file") - parser.add_argument('--outfile', '-o', - required=True, help="path to the output file") - parser.add_argument('--key', '-k', - required=True, help="the metadata key from the sdf file which should inlcude the new title") - parser.add_argument('--random', '-r', - action="store_true", help="Add random suffix to the title.") + parser.add_argument('--infile', '-i', required=True, + help="path to the input file") + parser.add_argument('--outfile', '-o', required=True, + help="path to the output file") + parser.add_argument('--key', '-k', required=True, + help="the metadata key from the sdf file which should inlcude the new title") + parser.add_argument('--random', '-r', action="store_true", + help="Add random suffix to the title.") args = parser.parse_args() @@ -39,11 +37,10 @@ if args.random: suffix = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(13)) mol.title += '__%s' % suffix - output.write( mol ) + output.write(mol) output.close() if __name__ == "__main__": main() - |
b |
diff -r d3b48303045b -r a66827fc370d cheminfolib.py --- a/cheminfolib.py Tue Jul 28 08:35:16 2020 -0400 +++ b/cheminfolib.py Mon Oct 19 14:45:37 2020 +0000 |
[ |
b'@@ -4,31 +4,37 @@\n Copyright 2012, Bjoern Gruening and Xavier Lucas\n """\n \n-import os, sys\n+import glob\n+import re\n+import subprocess\n+import sys\n+import tempfile\n+from multiprocessing import Pool\n+\n \n try:\n from galaxy import eggs\n eggs.require(\'psycopg2\')\n-except:\n+except ImportError:\n+ psycopg2 = None\n print(\'psycopg2 is not available. It is currently used in the pgchem wrappers, that are not shipped with default CTB\')\n \n try:\n from openbabel import openbabel, pybel\n openbabel.obErrorLog.StopLogging()\n-except:\n+except ImportError:\n+ openbabel, pybel = None, None\n print(\'OpenBabel could not be found. A few functions are not available without OpenBabel.\')\n \n-from multiprocessing import Pool\n-import glob, tempfile, re\n-import subprocess\n \n-def CountLines( path ):\n+def CountLines(path):\n out = subprocess.Popen([\'wc\', \'-l\', path],\n- stdout=subprocess.PIPE,\n- stderr=subprocess.STDOUT\n- ).communicate()[0]\n+ stdout=subprocess.PIPE,\n+ stderr=subprocess.STDOUT\n+ ).communicate()[0]\n return int(out.partition(b\' \')[0])\n \n+\n def grep(pattern, file_obj):\n grepper = re.compile(pattern)\n for line in file_obj:\n@@ -36,6 +42,7 @@\n return True\n return False\n \n+\n def check_filetype(filepath):\n mol = False\n possible_inchi = True\n@@ -50,76 +57,78 @@\n return \'drf\'\n elif possible_inchi and re.findall(\'^InChI=\', line):\n return \'inchi\'\n- elif re.findall(\'^M\\s+END\', line):\n+ elif re.findall(r\'^M\\s+END\', line):\n mol = True\n # first line is not an InChI, so it can\'t be an InChI file\n possible_inchi = False\n \n if mol:\n- # END can occures before $$$$, so and SDF file will \n+ # END can occures before $$$$, so and SDF file will\n # be recognised as mol, if you not using this hack\'\n return \'mol\'\n return \'smi\'\n \n+\n def db_connect(args):\n try:\n- db_conn = psycopg2.connect("dbname=%s user=%s host=%s password=%s" % (args.dbname, args.dbuser, args.dbhost, args.dbpasswd));\n+ db_conn = psycopg2.connect("dbname=%s user=%s host=%s password=%s" % (args.dbname, args.dbuser, args.dbhost, args.dbpasswd))\n return db_conn\n- except:\n+ except psycopg2.Error:\n sys.exit(\'Unable to connect to the db\')\n \n+\n ColumnNames = {\n- \'can_smiles\' : \'Canonical SMILES\',\n- \'can\' : \'Canonical SMILES\',\n- \'inchi\' : \'InChI\',\n- \'inchi_key\' : \'InChI key\',\n- \'inchi_key_first\' : \'InChI key first\',\n- \'inchi_key_last\' : \'InChI key last\',\n- \'molwt\' : \'Molecular weight\',\n- \'hbd\' : \'Hydrogen-bond donors\',\n- \'donors\' : \'Hydrogen-bond donors\',\n- \'hba\' : \'Hydrogen-bond acceptors\',\n- \'acceptors\' : \'Hydrogen-bond acceptors\',\n- \'rotbonds\' : \'Rotatable bonds\',\n- \'logp\' : \'logP\',\n- \'psa\' : \'Polar surface area\',\n- \'mr\' : \'Molecular refractivity\',\n- \'atoms\' : \'Number of heavy atoms\',\n- \'rings\' : \'Number of rings\',\n- \'set_bits\' : \'FP2 bits\',\n- \'id\' : \'Internal identifier\',\n- \'tani\' : \'Tanimoto coefficient\',\n- \'spectrophore\' : \'Spectrophores(TM)\',\n- \'dist_spectrophore\' : \'Spectrophores(TM) distance to target\',\n- \'synonym\' : \'Entry id\',\n+ \'can_smiles\': \'Canonical SMILES\',\n+ \'can\': \'Canonical SMILES\',\n+ \'inchi\': \'InChI\',\n+ \'inchi_key\': \'InChI key\',\n+ \'inchi_key_first\': \'InChI key first\',\n+ \'inchi_key_last\': \'InChI key last\',\n+ \'molwt\': \'Molecular weight\',\n+ \'hbd\': \'Hydrogen-bond donors\',\n+ \'donors\': \'Hydrogen-bond donors\',\n+ \'hba\': \'Hydrogen-bond acceptors\',\n+ \'acceptors\': \'Hydrogen-bond acceptors\',\n+ \'rotbonds\': \'Rotatable bonds\',\n+ \'logp\': \'logP\',\n+ \'psa\': \'Polar surface area\',\n+ \'mr\': \'Molecular refractivity\',\n+ \'atoms\': \'Number of heavy atoms\',\n+ \'rings\': \'Number of rings\',\n+ \'set_bits\': \'FP2 bits\',\n+ \'id\': \'Internal iden'..b'ol),\n+ }\n+\n \n def get_inchikey(mol):\n conv = openbabel.OBConversion()\n conv.SetInAndOutFormats("mol", "inchi")\n conv.SetOptions("K", conv.OUTOPTIONS)\n- inchikey = conv.WriteString( mol.OBMol )\n+ inchikey = conv.WriteString(mol.OBMol)\n return inchikey\n \n+\n def OBspectrophore(mol):\n spectrophore = pybel.ob.OBSpectrophore()\n # Parameters: rotation angle = 20, normalization for mean and sd, accuracy = 3.0 A and non-stereospecific cages.\n- spectrophore.SetNormalization( spectrophore.NormalizationTowardsZeroMeanAndUnitStd )\n- return \', \'.join( [ "%.3f" % value for value in spectrophore.GetSpectrophore( mol.OBMol ) ] )\n+ spectrophore.SetNormalization(spectrophore.NormalizationTowardsZeroMeanAndUnitStd)\n+ return \', \'.join(["%.3f" % value for value in spectrophore.GetSpectrophore(mol.OBMol)])\n+\n \n-def squared_euclidean_distance(a, b):\n- try:\n- return ((np.asarray( a ) - np.asarray( b ))**2).sum()\n- except ValueError:\n- return 0\n-\n-def split_library( lib_path, lib_format = \'sdf\', package_size = None ):\n+def split_library(lib_path, lib_format=\'sdf\', package_size=None):\n """\n- Split a library of compounds. Usage: split_library( lib_path, lib_format, package_size )\n- IT currently ONLY WORKS FOR SD-Files\n+ Split a library of compounds. Usage: split_library(lib_path, lib_format, package_size)\n+ IT currently ONLY WORKS FOR SD-Files\n """\n pack = 1\n mol_counter = 0\n \n- outfile = open(\'/%s/%s_pack_%i.%s\' % ( \'/\'.join(lib_path.split(\'/\')[:-1]), lib_path.split(\'/\')[-1].split(\'.\')[0], pack, \'sdf\'), \'w\' )\n+ outfile = open(\'/%s/%s_pack_%i.%s\' % (\'/\'.join(lib_path.split(\'/\')[:-1]), lib_path.split(\'/\')[-1].split(\'.\')[0], pack, \'sdf\'), \'w\')\n \n for line in open(lib_path, \'r\'):\n- outfile.write( line )\n+ outfile.write(line)\n if line.strip() == \'$$$$\':\n mol_counter += 1\n if mol_counter % package_size == 0:\n outfile.close()\n pack += 1\n- outfile = open(\'/%s/%s_pack_%i.%s\' % ( \'/\'.join(lib_path.split(\'/\')[:-1]), lib_path.split(\'/\')[-1].split(\'.\')[0], pack, \'sdf\'), \'w\' )\n- if mol_counter*10 % package_size == 0:\n- print(\'%i molecules parsed, starting pack nr. %i\' % ( mol_counter, pack - 1 ))\n+ outfile = open(\'/%s/%s_pack_%i.%s\' % (\'/\'.join(lib_path.split(\'/\')[:-1]), lib_path.split(\'/\')[-1].split(\'.\')[0], pack, \'sdf\'), \'w\')\n+ if mol_counter * 10 % package_size == 0:\n+ print(\'%i molecules parsed, starting pack nr. %i\' % (mol_counter, pack - 1))\n outfile.close()\n \n return True\n \n-def split_smi_library( smiles_file, structures_in_one_file ):\n+\n+def split_smi_library(smiles_file, structures_in_one_file):\n """\n- Split a file with SMILES to several files for multiprocessing usage. \n- Usage: split_smi_library( smiles_file, 10 )\n+ Split a file with SMILES to several files for multiprocessing usage.\n+ Usage: split_smi_library(smiles_file, 10)\n """\n output_files = []\n tfile = tempfile.NamedTemporaryFile(delete=False)\n \n smiles_handle = open(smiles_file, \'r\')\n- for count, line in enumerate( smiles_handle ):\n+ for count, line in enumerate(smiles_handle):\n if count % structures_in_one_file == 0 and count != 0:\n tfile.close()\n output_files.append(tfile.name)\n@@ -247,9 +255,9 @@\n return output_files\n \n \n-def mp_run(input_path, regex, PROCESSES, function_to_call ):\n+def mp_run(input_path, regex, PROCESSES, function_to_call):\n paths = []\n- [ paths.append(compound_file) for compound_file in glob.glob(str(input_path) + str(regex)) ]\n+ [paths.append(compound_file) for compound_file in glob.glob(str(input_path) + str(regex))]\n paths.sort()\n \n pool = Pool(processes=PROCESSES)\n@@ -259,6 +267,6 @@\n \n return paths\n \n+\n if __name__ == \'__main__\':\n print(check_filetype(sys.argv[1]))\n-\n' |
b |
diff -r d3b48303045b -r a66827fc370d distance_finder.py --- a/distance_finder.py Tue Jul 28 08:35:16 2020 -0400 +++ b/distance_finder.py Mon Oct 19 14:45:37 2020 +0000 |
[ |
@@ -11,7 +11,9 @@ # a property named distance1 where the numeric part is the index (starting from 1) of the points (in that example # there would be properties for distance1, distance2 and distance3. -import argparse, os, sys, math +import argparse +import math +import sys from openbabel import pybel @@ -30,7 +32,6 @@ :return: """ - points = [] # read the points @@ -41,7 +42,7 @@ p = line.split() if len(p) == 3: points.append((float(p[0]), float(p[1]), float(p[2]))) - log("Read points",p) + log("Read points", p) continue log("Failed to read line:", line) log('Found', len(points), 'atom points') @@ -56,7 +57,6 @@ try: # print("Processing mol", mol.title) - clone = pybel.Molecule(mol) clone.removeh() @@ -82,7 +82,7 @@ sdf_writer.write(mol) except Exception as e: - log('Failed to handle molecule: '+ str(e)) + log('Failed to handle molecule: ' + str(e)) continue sdf_writer.close() @@ -93,12 +93,10 @@ global work_dir parser = argparse.ArgumentParser(description='XChem distances - measure distances to particular points') - parser.add_argument('-i', '--input', help="SDF containing the 3D molecules to score)") parser.add_argument('-p', '--points', help="PDB format file with atoms") parser.add_argument('-o', '--outfile', default='output.sdf', help="File name for results") - args = parser.parse_args() log("XChem distances args: ", args) |
b |
diff -r d3b48303045b -r a66827fc370d macros.xml --- a/macros.xml Tue Jul 28 08:35:16 2020 -0400 +++ b/macros.xml Mon Oct 19 14:45:37 2020 +0000 |
b |
@@ -18,6 +18,11 @@ help="Valid file types are: SDF, MOL, MOL2, CML, InChI, SMILES, and PDB"/> </xml> + <xml name="infile_all_types_except_inchi"> + <param name="infile" format="sdf,mol,mol2,cml,smi,pdb" type="data" label="Molecular input file" + help="Valid file types are: SDF, MOL, MOL2, CML, SMILES, and PDB"/> + </xml> + <xml name="2D_3D_opts"> <param name="gen2d" type="boolean" truevalue="--gen2d" falsevalue="" checked="false" label="Generate 2D coordinates" help="(--gen2d)" /> |
b |
diff -r d3b48303045b -r a66827fc370d multi_obgrep.py --- a/multi_obgrep.py Tue Jul 28 08:35:16 2020 -0400 +++ b/multi_obgrep.py Mon Oct 19 14:45:37 2020 +0000 |
b |
@@ -4,40 +4,43 @@ Output: Molecule file filtered with obgrep. Copyright 2013, Bjoern Gruening and Xavier Lucas """ -import sys, os import argparse import multiprocessing -import tempfile -import subprocess -import shutil +import os import shlex +import shutil +import subprocess +import tempfile -from openbabel import openbabel, pybel -openbabel.obErrorLog.StopLogging() + def parse_command_line(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--infile', required=True, help='Molecule file.') - parser.add_argument('-q', '--query', required=True, help='Query file, containing different SMARTS in each line.') + parser.add_argument('-q', '--query', required=True, help='Query file, containing different SMARTS in each line.') parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') parser.add_argument("--iformat", help="Input format, like smi, sdf, inchi") parser.add_argument("--n-times", dest="n_times", type=int, - default=0, help="Print a molecule only if the pattern occurs # times inside the molecule.") + default=0, help="Print a molecule only if the pattern occurs # times inside the molecule.") parser.add_argument('-p', '--processors', type=int, default=multiprocessing.cpu_count()) parser.add_argument("--invert-matches", dest="invert_matches", action="store_true", - default=False, help="Invert the matching, print non-matching molecules.") + default=False, help="Invert the matching, print non-matching molecules.") parser.add_argument("--only-name", dest="only_name", action="store_true", - default=False, help="Only print the name of the molecules.") + default=False, help="Only print the name of the molecules.") parser.add_argument("--full-match", dest="full_match", action="store_true", - default=False, help="Full match, print matching-molecules only when the number of heavy atoms is also equal to the number of atoms in the SMARTS pattern.") + default=False, help="Full match, print matching-molecules only when the number of heavy atoms is also equal to the number of atoms in the SMARTS pattern.") parser.add_argument("--number-of-matches", dest="number_of_matches", action="store_true", - default=False, help="Print the number of matches.") + default=False, help="Print the number of matches.") return parser.parse_args() + results = list() + + def mp_callback(res): results.append(res) -def mp_helper( query, args ): + +def mp_helper(query, args): """ Helper function for multiprocessing. That function is a wrapper around obgrep. @@ -57,44 +60,44 @@ tmp = tempfile.NamedTemporaryFile(delete=False) cmd = 'obgrep %s "%s" %s' % (' '.join(cmd_list), query, args.infile) - child = subprocess.Popen(shlex.split(cmd), - stdout=open(tmp.name, 'w+'), stderr=subprocess.PIPE) + child = subprocess.Popen(shlex.split(cmd), stdout=open(tmp.name, 'w+'), stderr=subprocess.PIPE) stdout, stderr = child.communicate() return (tmp.name, query) -def obgrep( args ): - +def obgrep(args): temp_file = tempfile.NamedTemporaryFile() temp_link = "%s.%s" % (temp_file.name, args.iformat) temp_file.close() os.symlink(args.infile, temp_link) args.infile = temp_link - pool = multiprocessing.Pool( args.processors ) - for query in open( args.query ): + pool = multiprocessing.Pool(args.processors) + for query in open(args.query): pool.apply_async(mp_helper, args=(query.strip(), args), callback=mp_callback) - #mp_callback( mp_helper(query.strip(), args) ) + # mp_callback(mp_helper(query.strip(), args)) pool.close() pool.join() - out_handle = open( args.outfile, 'wb' ) + out_handle = open(args.outfile, 'wb') for result_file, query in results: - res_handle = open(result_file,'rb') - shutil.copyfileobj( res_handle, out_handle ) + res_handle = open(result_file, 'rb') + shutil.copyfileobj(res_handle, out_handle) res_handle.close() - os.remove( result_file ) + os.remove(result_file) out_handle.close() - os.remove( temp_link ) + os.remove(temp_link) + def __main__(): """ Multiprocessing obgrep search. """ args = parse_command_line() - obgrep( args ) + obgrep(args) + -if __name__ == "__main__" : +if __name__ == "__main__": __main__() |
b |
diff -r d3b48303045b -r a66827fc370d ob_addh.py --- a/ob_addh.py Tue Jul 28 08:35:16 2020 -0400 +++ b/ob_addh.py Mon Oct 19 14:45:37 2020 +0000 |
b |
@@ -3,21 +3,23 @@ Input: Molecule file Output: Molecule file with hydrogen atoms added at the target pH. """ -import sys, os import argparse +import sys from openbabel import openbabel, pybel openbabel.obErrorLog.StopLogging() + def parse_command_line(argv): parser = argparse.ArgumentParser() - parser.add_argument('--iformat', type=str, default='sdf' , help='input file format') + parser.add_argument('--iformat', type=str, default='sdf', help='input file format') parser.add_argument('-i', '--input', type=str, required=True, help='input file name') parser.add_argument('-o', '--output', type=str, required=True, help='output file name') parser.add_argument('--polar', action="store_true", default=False, help='Add hydrogen atoms only to polar atoms') parser.add_argument('--pH', type=float, default="7.4", help='Specify target pH value') return parser.parse_args() + def addh(args): outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True) for mol in pybel.readfile(args.iformat, args.input): @@ -27,6 +29,7 @@ outfile.write(mol) outfile.close() + def __main__(): """ Add hydrogen atoms at a certain pH value @@ -34,5 +37,6 @@ args = parse_command_line(sys.argv) addh(args) -if __name__ == "__main__" : + +if __name__ == "__main__": __main__() |
b |
diff -r d3b48303045b -r a66827fc370d ob_filter.py --- a/ob_filter.py Tue Jul 28 08:35:16 2020 -0400 +++ b/ob_filter.py Mon Oct 19 14:45:37 2020 +0000 |
b |
@@ -6,35 +6,32 @@ TODO: AND/OR conditions? """ -import sys, os import argparse +import json +import shlex +import subprocess +import sys + import cheminfolib -import json -import shlex, subprocess - from openbabel import pybel cheminfolib.pybel_stop_logging() + def parse_command_line(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', help='Input file name') parser.add_argument('-iformat', help='Input file format') - parser.add_argument('-oformat', - default='smi', - help='Output file format') - parser.add_argument('-o', '--output', - help='Output file name', - required=True) - parser.add_argument('--filters', - help="Specify the filters to apply", - required=True, - ) - parser.add_argument('--list_of_names', - help="A file with list of molecule names to extract. Every name is in one line.", - required=False, - ) + parser.add_argument('-oformat', default='smi', + help='Output file format') + parser.add_argument('-o', '--output', help='Output file name', + required=True) + parser.add_argument('--filters', help="Specify the filters to apply", + required=True) + parser.add_argument('--list_of_names', required=False, + help="A file with list of molecule names to extract. Every name is in one line.") return parser.parse_args() + def filter_precalculated_compounds(args, filters): outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) for mol in pybel.readfile('sdf', args.input): @@ -53,6 +50,7 @@ outfile.write(mol) outfile.close() + def filter_new_compounds(args, filters): if args.iformat == args.oformat: @@ -70,10 +68,9 @@ filter_cmd += ' %s>=%s %s<=%s ' % (ob_descriptor_name, min, ob_descriptor_name, max) args = shlex.split('%s "%s"' % (cmd, filter_cmd)) - #print '%s "%s"' % (cmd, filter_cmd) + # print '%s "%s"' % (cmd, filter_cmd) # calling openbabel with subprocess and pipe potential errors occuring in openbabel to stdout - child = subprocess.Popen(args, - stdout=subprocess.PIPE, stderr=subprocess.PIPE) + child = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = child.communicate() return_code = child.returncode @@ -87,6 +84,7 @@ sys.stdout.write(stdout.decode('utf-8')) sys.stdout.write(stderr.decode('utf-8')) + def filter_by_name(args): outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) for mol in pybel.readfile('sdf', args.input): @@ -95,16 +93,17 @@ outfile.write(mol) outfile.close() + def __main__(): """ Select compounds with certain properties from a small library """ args = parse_command_line() - + if args.filters == '__filter_by_name__': filter_by_name(args) return - + # Its a small trick to get the parameters in an easy way from the xml file. # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed. # Also the last loop creates a ',{' that is not an valid jason expression. @@ -114,7 +113,7 @@ mol = next(pybel.readfile('sdf', args.input)) for key, elem in filters.items(): property = cheminfolib.ColumnNames.get(key, key) - if not property in mol.data: + if property not in mol.data: break else: # if the for loop finishes in a normal way, we should habe all properties at least in the first molecule @@ -124,5 +123,5 @@ filter_new_compounds(args, filters) -if __name__ == "__main__" : +if __name__ == "__main__": __main__() |
b |
diff -r d3b48303045b -r a66827fc370d ob_genProp.py --- a/ob_genProp.py Tue Jul 28 08:35:16 2020 -0400 +++ b/ob_genProp.py Mon Oct 19 14:45:37 2020 +0000 |
[ |
@@ -4,23 +4,25 @@ Output: Physico-chemical properties are computed and stored as metadata in the sdf output file. Copyright 2012, Bjoern Gruening and Xavier Lucas """ -import sys, os import argparse +import sys + +import cheminfolib import openbabel +from openbabel import pybel openbabel.obErrorLog.StopLogging() -import cheminfolib -from openbabel import pybel def parse_command_line(argv): parser = argparse.ArgumentParser() - parser.add_argument('--iformat', default='sdf' , help='input file format') + parser.add_argument('--iformat', default='sdf', help='input file format') parser.add_argument('-i', '--input', required=True, help='input file name') - parser.add_argument('--oformat', default='sdf', choices = ['sdf', 'table'] , help='output file format') + parser.add_argument('--oformat', default='sdf', choices=['sdf', 'table'], help='output file format') parser.add_argument('--header', type=bool, help='Include the header as the first line of the output table') parser.add_argument('-o', '--output', required=True, help='output file name') return parser.parse_args() + def compute_properties(args): if args.oformat == 'sdf': outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) @@ -29,18 +31,19 @@ if args.header: mol = next(pybel.readfile(args.iformat, args.input)) metadata = cheminfolib.get_properties_ext(mol) - outfile.write( '%s\n' % '\t'.join( [ cheminfolib.ColumnNames[key] for key in metadata ] ) ) + outfile.write('%s\n' % '\t'.join([cheminfolib.ColumnNames[key] for key in metadata])) for mol in pybel.readfile(args.iformat, args.input): if mol.OBMol.NumHvyAtoms() > 5: metadata = cheminfolib.get_properties_ext(mol) if args.oformat == 'sdf': - [ mol.data.update( { cheminfolib.ColumnNames[key] : metadata[key] } ) for key in metadata ] + [mol.data.update({cheminfolib.ColumnNames[key]: metadata[key]}) for key in metadata] outfile.write(mol) else: - outfile.write( '%s\n' % ('\t'.join( [ str(metadata[key]) for key in metadata ] ) ) ) + outfile.write('%s\n' % ('\t'.join([str(metadata[key]) for key in metadata]))) outfile.close() + def __main__(): """ Physico-chemical properties are computed and stored as metadata in the sdf output file @@ -48,5 +51,6 @@ args = parse_command_line(sys.argv) compute_properties(args) -if __name__ == "__main__" : + +if __name__ == "__main__": __main__() |
b |
diff -r d3b48303045b -r a66827fc370d ob_remIons.py --- a/ob_remIons.py Tue Jul 28 08:35:16 2020 -0400 +++ b/ob_remIons.py Mon Oct 19 14:45:37 2020 +0000 |
[ |
@@ -4,29 +4,33 @@ Output: Molecule file with removed ions and fragments. Copyright 2012, Bjoern Gruening and Xavier Lucas """ -import sys, os import argparse from openbabel import openbabel, pybel openbabel.obErrorLog.StopLogging() + def parse_command_line(): parser = argparse.ArgumentParser() - parser.add_argument('-iformat', default='sdf' , help='input file format') + parser.add_argument('-iformat', default='sdf', help='input file format') parser.add_argument('-i', '--input', required=True, help='input file name') parser.add_argument('-o', '--output', required=True, help='output file name') return parser.parse_args() + def remove_ions(args): outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True) for mol in pybel.readfile(args.iformat, args.input): if mol.OBMol.NumHvyAtoms() > 5: mol.OBMol.StripSalts(0) + if 'inchi' in mol.data: + del mol.data['inchi'] # remove inchi cache so modified mol is saved # Check if new small fragments have been created and remove them if mol.OBMol.NumHvyAtoms() > 5: outfile.write(mol) outfile.close() + def __main__(): """ Remove any counterion and delete any fragment but the largest one for each molecule. @@ -34,5 +38,6 @@ args = parse_command_line() remove_ions(args) -if __name__ == "__main__" : + +if __name__ == "__main__": __main__() |
b |
diff -r d3b48303045b -r a66827fc370d ob_spectrophore_search.py --- a/ob_spectrophore_search.py Tue Jul 28 08:35:16 2020 -0400 +++ b/ob_spectrophore_search.py Mon Oct 19 14:45:37 2020 +0000 |
[ |
@@ -4,18 +4,17 @@ Output: parse the target file using the same protocol used to generate the databases in our servers. Physico-chemical properties are computed and stored as metadata in the sdf output file. Copyright 2012, Bjoern Gruening and Xavier Lucas """ -import sys, os import argparse -import math + import numpy as np - from openbabel import openbabel, pybel openbabel.obErrorLog.StopLogging() -#TODO get rid of eval() +# TODO get rid of eval() global spectrophore spectrophore = pybel.ob.OBSpectrophore() + def parse_command_line(): parser = argparse.ArgumentParser() parser.add_argument('--target', required=True, help='target file name in sdf format with Spectrophores(TM) descriptors stored as meta-data') @@ -28,26 +27,29 @@ parser.add_argument('-r', '--resolution', type=float, default="3.0", help='Resolution') return parser.parse_args() + def set_parameters(args): if args.normalization == 'No': - spectrophore.SetNormalization( spectrophore.NoNormalization ) + spectrophore.SetNormalization(spectrophore.NoNormalization) else: - spectrophore.SetNormalization( eval('spectrophore.NormalizationTowards' + args.normalization) ) - spectrophore.SetAccuracy( eval('spectrophore.AngStepSize' + args.accuracy) ) - spectrophore.SetStereo( eval('spectrophore.' + args.stereo + 'StereoSpecificProbes') ) - spectrophore.SetResolution( args.resolution ) + spectrophore.SetNormalization(eval('spectrophore.NormalizationTowards' + args.normalization)) + spectrophore.SetAccuracy(eval('spectrophore.AngStepSize' + args.accuracy)) + spectrophore.SetStereo(eval('spectrophore.' + args.stereo + 'StereoSpecificProbes')) + spectrophore.SetResolution(args.resolution) return True + def Compute_Spectrophores_distance(target_spectrophore, args): outfile = open(args.output, 'w') for mol in open(args.library, 'r'): try: - distance = ( ( np.asarray( target_spectrophore, dtype=float ) - np.asarray( mol.split('\t')[ args.column - 1 ].strip().split(', '), dtype=float) )**2).sum() + distance = ((np.asarray(target_spectrophore, dtype=float) - np.asarray(mol.split('\t')[args.column - 1].strip().split(', '), dtype=float))**2).sum() except ValueError: distance = 0 - outfile.write( '%s\t%f\n' % (mol.strip(), distance ) ) + outfile.write('%s\t%f\n' % (mol.strip(), distance)) outfile.close() + def __main__(): """ Computation of Spectrophores(TM) distances to a target molecule. @@ -59,7 +61,8 @@ mol = next(pybel.readfile('sdf', args.target)) target_spectrophore = mol.data["Spectrophores(TM)"].strip().split(', ') # Compute the paired-distance between every molecule in the library and the target - distances = Compute_Spectrophores_distance(target_spectrophore, args) + Compute_Spectrophores_distance(target_spectrophore, args) + -if __name__ == "__main__" : +if __name__ == "__main__": __main__() |
b |
diff -r d3b48303045b -r a66827fc370d remove_protonation_state.py --- a/remove_protonation_state.py Tue Jul 28 08:35:16 2020 -0400 +++ b/remove_protonation_state.py Mon Oct 19 14:45:37 2020 +0000 |
[ |
@@ -4,32 +4,37 @@ Output: Molecule file with removed ions and fragments. Copyright 2013, Bjoern Gruening and Xavier Lucas """ -import sys, os import argparse from openbabel import openbabel, pybel openbabel.obErrorLog.StopLogging() + def parse_command_line(): parser = argparse.ArgumentParser() - parser.add_argument('--iformat', default='sdf' , help='input file format') + parser.add_argument('--iformat', default='sdf', help='input file format') parser.add_argument('-i', '--input', required=True, help='input file name') parser.add_argument('-o', '--output', required=True, help='output file name') return parser.parse_args() -def remove_protonation( args ): + +def remove_protonation(args): outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True) for mol in pybel.readfile(args.iformat, args.input): [atom.OBAtom.SetFormalCharge(0) for atom in mol.atoms] - outfile.write( mol ) + if 'inchi' in mol.data: + del mol.data['inchi'] # remove inchi cache so modified mol is saved + outfile.write(mol) outfile.close() + def __main__(): """ Remove any protonation state from each atom in each molecule. """ args = parse_command_line() - remove_protonation( args ) + remove_protonation(args) + -if __name__ == "__main__" : +if __name__ == "__main__": __main__() |
b |
diff -r d3b48303045b -r a66827fc370d subsearch.py --- a/subsearch.py Tue Jul 28 08:35:16 2020 -0400 +++ b/subsearch.py Mon Oct 19 14:45:37 2020 +0000 |
[ |
@@ -4,36 +4,41 @@ Output: Moleculs filtered with specified substructures. Copyright 2013, Bjoern Gruening and Xavier Lucas """ -import sys, os import argparse import multiprocessing -import tempfile +import os +import shutil import subprocess -import shutil +import sys +import tempfile from openbabel import openbabel, pybel openbabel.obErrorLog.StopLogging() + def parse_command_line(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--infile', required=True, help='Molecule file.') parser.add_argument('--iformat', help='Input format.') - parser.add_argument('--fastsearch-index', dest="fastsearch_index", - required=True, help='Path to the openbabel fastsearch index.') + parser.add_argument('--fastsearch-index', dest="fastsearch_index", required=True, + help='Path to the openbabel fastsearch index.') parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') - parser.add_argument('--oformat', - default='smi', help='Output file format') - parser.add_argument("--max-candidates", dest="max_candidates", type=int, - default=4000, help="The maximum number of candidates.") - parser.add_argument('-p', '--processors', type=int, - default=multiprocessing.cpu_count()) + parser.add_argument('--oformat', default='smi', help='Output file format') + parser.add_argument("--max-candidates", dest="max_candidates", type=int, default=4000, + help="The maximum number of candidates.") + parser.add_argument('-p', '--processors', type=int, + default=multiprocessing.cpu_count()) return parser.parse_args() + results = list() + + def mp_callback(res): results.append(res) -def mp_helper( query, args ): + +def mp_helper(query, args): """ Helper function for multiprocessing. That function is a wrapper around the following command: @@ -48,8 +53,7 @@ tmp = tempfile.NamedTemporaryFile(delete=False) cmd = 'obabel -ifs %s -O %s %s -s%s -al %s' % (args.fastsearch_index, tmp.name, opts, query, args.max_candidates) - child = subprocess.Popen(cmd.split(), - stdout=subprocess.PIPE, stderr=subprocess.PIPE) + child = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = child.communicate() return_code = child.returncode @@ -65,43 +69,43 @@ return (tmp.name, query) -def get_smiles_or_smarts( args ): +def get_smiles_or_smarts(args): """ Wrapper to retrieve a striped SMILES or SMARTS string from different input formats. """ if args.iformat in ['smi', 'text', 'tabular']: - with open( args.infile ) as text_file: + with open(args.infile) as text_file: for line in text_file: yield line.split('\t')[0].strip() else: # inchi or sdf files - for mol in pybel.readfile( args.iformat, args.infile ): + for mol in pybel.readfile(args.iformat, args.infile): yield mol.write('smiles').split('\t')[0] -def substructure_search( args ): - pool = multiprocessing.Pool( args.processors ) - for query in get_smiles_or_smarts( args ): +def substructure_search(args): + pool = multiprocessing.Pool(args.processors) + for query in get_smiles_or_smarts(args): pool.apply_async(mp_helper, args=(query, args), callback=mp_callback) - #mp_callback( mp_helper(query, args) ) + # mp_callback(mp_helper(query, args)) pool.close() pool.join() if args.oformat == 'names': - out_handle = open( args.outfile, 'w' ) + out_handle = open(args.outfile, 'w') for result_file, query in results: with open(result_file) as res_handle: for line in res_handle: - out_handle.write('%s\t%s\n' % ( line.strip(), query )) - os.remove( result_file ) + out_handle.write('%s\t%s\n' % (line.strip(), query)) + os.remove(result_file) out_handle.close() else: - out_handle = open( args.outfile, 'wb' ) + out_handle = open(args.outfile, 'wb') for result_file, query in results: - res_handle = open(result_file,'rb') - shutil.copyfileobj( res_handle, out_handle ) + res_handle = open(result_file, 'rb') + shutil.copyfileobj(res_handle, out_handle) res_handle.close() - os.remove( result_file ) + os.remove(result_file) out_handle.close() @@ -110,7 +114,8 @@ Multiprocessing Open Babel Substructure Search. """ args = parse_command_line() - substructure_search( args ) + substructure_search(args) + -if __name__ == "__main__" : +if __name__ == "__main__": __main__() |
b |
diff -r d3b48303045b -r a66827fc370d test-data/na-sal.inchi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/na-sal.inchi Mon Oct 19 14:45:37 2020 +0000 |
b |
@@ -0,0 +1,1 @@ +InChI=1S/C7H6O3.Na/c8-6-4-2-1-3-5(6)7(9)10;/h1-4,8H,(H,9,10);/q;+1/p-1 |
b |
diff -r d3b48303045b -r a66827fc370d test-data/na-sal_obrmions.inchi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/na-sal_obrmions.inchi Mon Oct 19 14:45:37 2020 +0000 |
b |
@@ -0,0 +1,1 @@ +InChI=1S/C7H6O3/c8-6-4-2-1-3-5(6)7(9)10/h1-4,8H,(H,9,10)/p-1 |
b |
diff -r d3b48303045b -r a66827fc370d test-data/ob_remove_protonation_state.inchi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ob_remove_protonation_state.inchi Mon Oct 19 14:45:37 2020 +0000 |
b |
@@ -0,0 +1,1 @@ +InChI=1S/C7H5O3.Na/c8-6-4-2-1-3-5(6)7(9)10;/h1-4H,(H,9,10); |