Galaxy |

Changeset 5:a5f4b80e6769 (2020-10-19)

Previous changeset 4:de4c80d17527 (2020-07-28) Next changeset 6:5486f7a2b0cb (2020-11-10)

Commit message:
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 1fe240ef0064a1a4a66d9be1ccace53824280b75"

modified:
change_title_to_metadata_value.py
cheminfolib.py
distance_finder.py
macros.xml
multi_obgrep.py
ob_addh.py
ob_filter.py
ob_genProp.py
ob_remIons.py
ob_spectrophore_search.py
remove_protonation_state.py
subsearch.py

added:
test-data/na-sal.inchi
test-data/na-sal_obrmions.inchi
test-data/ob_remove_protonation_state.inchi

diff -r de4c80d17527 -r a5f4b80e6769 change_title_to_metadata_value.py
--- a/change_title_to_metadata_value.py Tue Jul 28 08:40:43 2020 -0400
+++ b/change_title_to_metadata_value.py Mon Oct 19 14:48:46 2020 +0000

@@ -6,29 +6,27 @@
     value of a given-id of the same molecule file.
"""

-import os
-import sys
import argparse
import random
import string

-
from openbabel import openbabel, pybel
openbabel.obErrorLog.StopLogging()

+
def main():
     parser = argparse.ArgumentParser(
         description="Change the title from a molecule file to metadata \
-value of a given-id of the same molecule file.",
+                     value of a given-id of the same molecule file.",
     )
-    parser.add_argument('--infile', '-i',
-        required=True, help="path to the input file")
-    parser.add_argument('--outfile', '-o',
-        required=True, help="path to the output file")
-    parser.add_argument('--key', '-k',
-        required=True, help="the metadata key from the sdf file which should inlcude the new title")
-    parser.add_argument('--random', '-r',
-        action="store_true", help="Add random suffix to the title.")
+    parser.add_argument('--infile', '-i', required=True,
+                        help="path to the input file")
+    parser.add_argument('--outfile', '-o', required=True,
+                        help="path to the output file")
+    parser.add_argument('--key', '-k', required=True,
+                        help="the metadata key from the sdf file which should inlcude the new title")
+    parser.add_argument('--random', '-r', action="store_true",
+                        help="Add random suffix to the title.")

     args = parser.parse_args()

@@ -39,11 +37,10 @@
             if args.random:
                 suffix = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(13))
                 mol.title += '__%s' % suffix
-        output.write( mol )
+        output.write(mol)

     output.close()

if __name__ == "__main__":
     main()
-

diff -r de4c80d17527 -r a5f4b80e6769 cheminfolib.py
--- a/cheminfolib.py Tue Jul 28 08:40:43 2020 -0400
+++ b/cheminfolib.py Mon Oct 19 14:48:46 2020 +0000

[

b'@@ -4,31 +4,37 @@\n Copyright 2012, Bjoern Gruening and Xavier Lucas\n """\n \n-import os, sys\n+import glob\n+import re\n+import subprocess\n+import sys\n+import tempfile\n+from multiprocessing import Pool\n+\n \n try:\n from galaxy import eggs\n eggs.require(\'psycopg2\')\n-except:\n+except ImportError:\n+ psycopg2 = None\n print(\'psycopg2 is not available. It is currently used in the pgchem wrappers, that are not shipped with default CTB\')\n \n try:\n from openbabel import openbabel, pybel\n openbabel.obErrorLog.StopLogging()\n-except:\n+except ImportError:\n+ openbabel, pybel = None, None\n print(\'OpenBabel could not be found. A few functions are not available without OpenBabel.\')\n \n-from multiprocessing import Pool\n-import glob, tempfile, re\n-import subprocess\n \n-def CountLines( path ):\n+def CountLines(path):\n out = subprocess.Popen([\'wc\', \'-l\', path],\n- stdout=subprocess.PIPE,\n- stderr=subprocess.STDOUT\n- ).communicate()[0]\n+ stdout=subprocess.PIPE,\n+ stderr=subprocess.STDOUT\n+ ).communicate()[0]\n return int(out.partition(b\' \')[0])\n \n+\n def grep(pattern, file_obj):\n grepper = re.compile(pattern)\n for line in file_obj:\n@@ -36,6 +42,7 @@\n return True\n return False\n \n+\n def check_filetype(filepath):\n mol = False\n possible_inchi = True\n@@ -50,76 +57,78 @@\n return \'drf\'\n elif possible_inchi and re.findall(\'^InChI=\', line):\n return \'inchi\'\n- elif re.findall(\'^M\\s+END\', line):\n+ elif re.findall(r\'^M\\s+END\', line):\n mol = True\n # first line is not an InChI, so it can\'t be an InChI file\n possible_inchi = False\n \n if mol:\n- # END can occures before $$$$, so and SDF file will \n+ # END can occures before $$$$, so and SDF file will\n # be recognised as mol, if you not using this hack\'\n return \'mol\'\n return \'smi\'\n \n+\n def db_connect(args):\n try:\n- db_conn = psycopg2.connect("dbname=%s user=%s host=%s password=%s" % (args.dbname, args.dbuser, args.dbhost, args.dbpasswd));\n+ db_conn = psycopg2.connect("dbname=%s user=%s host=%s password=%s" % (args.dbname, args.dbuser, args.dbhost, args.dbpasswd))\n return db_conn\n- except:\n+ except psycopg2.Error:\n sys.exit(\'Unable to connect to the db\')\n \n+\n ColumnNames = {\n- \'can_smiles\' : \'Canonical SMILES\',\n- \'can\' : \'Canonical SMILES\',\n- \'inchi\' : \'InChI\',\n- \'inchi_key\' : \'InChI key\',\n- \'inchi_key_first\' : \'InChI key first\',\n- \'inchi_key_last\' : \'InChI key last\',\n- \'molwt\' : \'Molecular weight\',\n- \'hbd\' : \'Hydrogen-bond donors\',\n- \'donors\' : \'Hydrogen-bond donors\',\n- \'hba\' : \'Hydrogen-bond acceptors\',\n- \'acceptors\' : \'Hydrogen-bond acceptors\',\n- \'rotbonds\' : \'Rotatable bonds\',\n- \'logp\' : \'logP\',\n- \'psa\' : \'Polar surface area\',\n- \'mr\' : \'Molecular refractivity\',\n- \'atoms\' : \'Number of heavy atoms\',\n- \'rings\' : \'Number of rings\',\n- \'set_bits\' : \'FP2 bits\',\n- \'id\' : \'Internal identifier\',\n- \'tani\' : \'Tanimoto coefficient\',\n- \'spectrophore\' : \'Spectrophores(TM)\',\n- \'dist_spectrophore\' : \'Spectrophores(TM) distance to target\',\n- \'synonym\' : \'Entry id\',\n+ \'can_smiles\': \'Canonical SMILES\',\n+ \'can\': \'Canonical SMILES\',\n+ \'inchi\': \'InChI\',\n+ \'inchi_key\': \'InChI key\',\n+ \'inchi_key_first\': \'InChI key first\',\n+ \'inchi_key_last\': \'InChI key last\',\n+ \'molwt\': \'Molecular weight\',\n+ \'hbd\': \'Hydrogen-bond donors\',\n+ \'donors\': \'Hydrogen-bond donors\',\n+ \'hba\': \'Hydrogen-bond acceptors\',\n+ \'acceptors\': \'Hydrogen-bond acceptors\',\n+ \'rotbonds\': \'Rotatable bonds\',\n+ \'logp\': \'logP\',\n+ \'psa\': \'Polar surface area\',\n+ \'mr\': \'Molecular refractivity\',\n+ \'atoms\': \'Number of heavy atoms\',\n+ \'rings\': \'Number of rings\',\n+ \'set_bits\': \'FP2 bits\',\n+ \'id\': \'Internal iden'..b'ol),\n+ }\n+\n \n def get_inchikey(mol):\n conv = openbabel.OBConversion()\n conv.SetInAndOutFormats("mol", "inchi")\n conv.SetOptions("K", conv.OUTOPTIONS)\n- inchikey = conv.WriteString( mol.OBMol )\n+ inchikey = conv.WriteString(mol.OBMol)\n return inchikey\n \n+\n def OBspectrophore(mol):\n spectrophore = pybel.ob.OBSpectrophore()\n # Parameters: rotation angle = 20, normalization for mean and sd, accuracy = 3.0 A and non-stereospecific cages.\n- spectrophore.SetNormalization( spectrophore.NormalizationTowardsZeroMeanAndUnitStd )\n- return \', \'.join( [ "%.3f" % value for value in spectrophore.GetSpectrophore( mol.OBMol ) ] )\n+ spectrophore.SetNormalization(spectrophore.NormalizationTowardsZeroMeanAndUnitStd)\n+ return \', \'.join(["%.3f" % value for value in spectrophore.GetSpectrophore(mol.OBMol)])\n+\n \n-def squared_euclidean_distance(a, b):\n- try:\n- return ((np.asarray( a ) - np.asarray( b ))**2).sum()\n- except ValueError:\n- return 0\n-\n-def split_library( lib_path, lib_format = \'sdf\', package_size = None ):\n+def split_library(lib_path, lib_format=\'sdf\', package_size=None):\n """\n- Split a library of compounds. Usage: split_library( lib_path, lib_format, package_size )\n- IT currently ONLY WORKS FOR SD-Files\n+ Split a library of compounds. Usage: split_library(lib_path, lib_format, package_size)\n+ IT currently ONLY WORKS FOR SD-Files\n """\n pack = 1\n mol_counter = 0\n \n- outfile = open(\'/%s/%s_pack_%i.%s\' % ( \'/\'.join(lib_path.split(\'/\')[:-1]), lib_path.split(\'/\')[-1].split(\'.\')[0], pack, \'sdf\'), \'w\' )\n+ outfile = open(\'/%s/%s_pack_%i.%s\' % (\'/\'.join(lib_path.split(\'/\')[:-1]), lib_path.split(\'/\')[-1].split(\'.\')[0], pack, \'sdf\'), \'w\')\n \n for line in open(lib_path, \'r\'):\n- outfile.write( line )\n+ outfile.write(line)\n if line.strip() == \'$$$$\':\n mol_counter += 1\n if mol_counter % package_size == 0:\n outfile.close()\n pack += 1\n- outfile = open(\'/%s/%s_pack_%i.%s\' % ( \'/\'.join(lib_path.split(\'/\')[:-1]), lib_path.split(\'/\')[-1].split(\'.\')[0], pack, \'sdf\'), \'w\' )\n- if mol_counter*10 % package_size == 0:\n- print(\'%i molecules parsed, starting pack nr. %i\' % ( mol_counter, pack - 1 ))\n+ outfile = open(\'/%s/%s_pack_%i.%s\' % (\'/\'.join(lib_path.split(\'/\')[:-1]), lib_path.split(\'/\')[-1].split(\'.\')[0], pack, \'sdf\'), \'w\')\n+ if mol_counter * 10 % package_size == 0:\n+ print(\'%i molecules parsed, starting pack nr. %i\' % (mol_counter, pack - 1))\n outfile.close()\n \n return True\n \n-def split_smi_library( smiles_file, structures_in_one_file ):\n+\n+def split_smi_library(smiles_file, structures_in_one_file):\n """\n- Split a file with SMILES to several files for multiprocessing usage. \n- Usage: split_smi_library( smiles_file, 10 )\n+ Split a file with SMILES to several files for multiprocessing usage.\n+ Usage: split_smi_library(smiles_file, 10)\n """\n output_files = []\n tfile = tempfile.NamedTemporaryFile(delete=False)\n \n smiles_handle = open(smiles_file, \'r\')\n- for count, line in enumerate( smiles_handle ):\n+ for count, line in enumerate(smiles_handle):\n if count % structures_in_one_file == 0 and count != 0:\n tfile.close()\n output_files.append(tfile.name)\n@@ -247,9 +255,9 @@\n return output_files\n \n \n-def mp_run(input_path, regex, PROCESSES, function_to_call ):\n+def mp_run(input_path, regex, PROCESSES, function_to_call):\n paths = []\n- [ paths.append(compound_file) for compound_file in glob.glob(str(input_path) + str(regex)) ]\n+ [paths.append(compound_file) for compound_file in glob.glob(str(input_path) + str(regex))]\n paths.sort()\n \n pool = Pool(processes=PROCESSES)\n@@ -259,6 +267,6 @@\n \n return paths\n \n+\n if __name__ == \'__main__\':\n print(check_filetype(sys.argv[1]))\n-\n'

diff -r de4c80d17527 -r a5f4b80e6769 distance_finder.py
--- a/distance_finder.py Tue Jul 28 08:40:43 2020 -0400
+++ b/distance_finder.py Mon Oct 19 14:48:46 2020 +0000

[

@@ -11,7 +11,9 @@
# a property named distance1 where the numeric part is the index (starting from 1) of the points (in that example
# there would be properties for distance1, distance2 and distance3.

-import argparse, os, sys, math
+import argparse
+import math
+import sys

from openbabel import pybel

@@ -30,7 +32,6 @@
     :return:
     """

-
     points = []

     # read the points
@@ -41,7 +42,7 @@
                 p = line.split()
                 if len(p) == 3:
                     points.append((float(p[0]), float(p[1]), float(p[2])))
-                    log("Read points",p)
+                    log("Read points", p)
                     continue
             log("Failed to read line:", line)
     log('Found', len(points), 'atom points')
@@ -56,7 +57,6 @@

         try:
             # print("Processing mol", mol.title)
-
             clone = pybel.Molecule(mol)
             clone.removeh()

@@ -82,7 +82,7 @@
             sdf_writer.write(mol)

         except Exception as e:
-            log('Failed to handle molecule: '+ str(e))
+            log('Failed to handle molecule: ' + str(e))
             continue

     sdf_writer.close()
@@ -93,12 +93,10 @@
     global work_dir

     parser = argparse.ArgumentParser(description='XChem distances - measure distances to particular points')
-
     parser.add_argument('-i', '--input', help="SDF containing the 3D molecules to score)")
     parser.add_argument('-p', '--points', help="PDB format file with atoms")
     parser.add_argument('-o', '--outfile', default='output.sdf', help="File name for results")

-
     args = parser.parse_args()
     log("XChem distances args: ", args)

diff -r de4c80d17527 -r a5f4b80e6769 macros.xml
--- a/macros.xml Tue Jul 28 08:40:43 2020 -0400
+++ b/macros.xml Mon Oct 19 14:48:46 2020 +0000

@@ -18,6 +18,11 @@
         help="Valid file types are: SDF, MOL, MOL2, CML, InChI, SMILES, and PDB"/>
     </xml>

+    <xml name="infile_all_types_except_inchi">
+        <param name="infile" format="sdf,mol,mol2,cml,smi,pdb" type="data" label="Molecular input file"
+        help="Valid file types are: SDF, MOL, MOL2, CML, SMILES, and PDB"/>
+    </xml>
+
     <xml name="2D_3D_opts">
         <param name="gen2d" type="boolean" truevalue="--gen2d" falsevalue="" checked="false"
             label="Generate 2D coordinates" help="(--gen2d)" />

diff -r de4c80d17527 -r a5f4b80e6769 multi_obgrep.py
--- a/multi_obgrep.py Tue Jul 28 08:40:43 2020 -0400
+++ b/multi_obgrep.py Mon Oct 19 14:48:46 2020 +0000

@@ -4,40 +4,43 @@
     Output: Molecule file filtered with obgrep.
     Copyright 2013, Bjoern Gruening and Xavier Lucas
"""
-import sys, os
import argparse
import multiprocessing
-import tempfile
-import subprocess
-import shutil
+import os
import shlex
+import shutil
+import subprocess
+import tempfile

-from openbabel import openbabel, pybel
-openbabel.obErrorLog.StopLogging()
+
def parse_command_line():
     parser = argparse.ArgumentParser()
     parser.add_argument('-i', '--infile', required=True, help='Molecule file.')
-    parser.add_argument('-q', '--query',  required=True, help='Query file, containing different SMARTS in each line.')
+    parser.add_argument('-q', '--query', required=True, help='Query file, containing different SMARTS in each line.')
     parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.')
     parser.add_argument("--iformat", help="Input format, like smi, sdf, inchi")
     parser.add_argument("--n-times", dest="n_times", type=int,
-                    default=0, help="Print a molecule only if the pattern occurs # times inside the molecule.")
+                        default=0, help="Print a molecule only if the pattern occurs # times inside the molecule.")
     parser.add_argument('-p', '--processors', type=int, default=multiprocessing.cpu_count())
     parser.add_argument("--invert-matches", dest="invert_matches", action="store_true",
-                    default=False, help="Invert the matching, print non-matching molecules.")
+                        default=False, help="Invert the matching, print non-matching molecules.")
     parser.add_argument("--only-name", dest="only_name", action="store_true",
-                    default=False, help="Only print the name of the molecules.")
+                        default=False, help="Only print the name of the molecules.")
     parser.add_argument("--full-match", dest="full_match", action="store_true",
-                    default=False, help="Full match, print matching-molecules only when the number of heavy atoms is also equal to the number of atoms in the SMARTS pattern.")
+                        default=False, help="Full match, print matching-molecules only when the number of heavy atoms is also equal to the number of atoms in the SMARTS pattern.")
     parser.add_argument("--number-of-matches", dest="number_of_matches", action="store_true",
-                    default=False, help="Print the number of matches.")
+                        default=False, help="Print the number of matches.")
     return parser.parse_args()

+
results = list()
+
+
def mp_callback(res):
     results.append(res)

-def mp_helper( query, args ):
+
+def mp_helper(query, args):
     """
         Helper function for multiprocessing.
         That function is a wrapper around obgrep.
@@ -57,44 +60,44 @@

     tmp = tempfile.NamedTemporaryFile(delete=False)
     cmd = 'obgrep %s "%s" %s' % (' '.join(cmd_list), query, args.infile)
-    child = subprocess.Popen(shlex.split(cmd),
-        stdout=open(tmp.name, 'w+'), stderr=subprocess.PIPE)
+    child = subprocess.Popen(shlex.split(cmd), stdout=open(tmp.name, 'w+'), stderr=subprocess.PIPE)

     stdout, stderr = child.communicate()
     return (tmp.name, query)

-def obgrep( args ):
-
+def obgrep(args):
     temp_file = tempfile.NamedTemporaryFile()
     temp_link = "%s.%s" % (temp_file.name, args.iformat)
     temp_file.close()
     os.symlink(args.infile, temp_link)
     args.infile = temp_link

-    pool = multiprocessing.Pool( args.processors )
-    for query in open( args.query ):
+    pool = multiprocessing.Pool(args.processors)
+    for query in open(args.query):
         pool.apply_async(mp_helper, args=(query.strip(), args), callback=mp_callback)
-        #mp_callback( mp_helper(query.strip(), args) )
+        # mp_callback(mp_helper(query.strip(), args))
     pool.close()
     pool.join()

-    out_handle = open( args.outfile, 'wb' )
+    out_handle = open(args.outfile, 'wb')
     for result_file, query in results:
-        res_handle = open(result_file,'rb')
-        shutil.copyfileobj( res_handle, out_handle )
+        res_handle = open(result_file, 'rb')
+        shutil.copyfileobj(res_handle, out_handle)
         res_handle.close()
-        os.remove( result_file )
+        os.remove(result_file)
     out_handle.close()

-    os.remove( temp_link )
+    os.remove(temp_link)
+

def __main__():
     """
         Multiprocessing obgrep search.
     """
     args = parse_command_line()
-    obgrep( args )
+    obgrep(args)
+

-if __name__ == "__main__" :
+if __name__ == "__main__":
     __main__()

diff -r de4c80d17527 -r a5f4b80e6769 ob_addh.py
--- a/ob_addh.py Tue Jul 28 08:40:43 2020 -0400
+++ b/ob_addh.py Mon Oct 19 14:48:46 2020 +0000

@@ -3,21 +3,23 @@
     Input:  Molecule file
     Output: Molecule file with hydrogen atoms added at the target pH.
"""
-import sys, os
import argparse
+import sys

from openbabel import openbabel, pybel
openbabel.obErrorLog.StopLogging()

+
def parse_command_line(argv):
     parser = argparse.ArgumentParser()
-    parser.add_argument('--iformat', type=str, default='sdf' , help='input file format')
+    parser.add_argument('--iformat', type=str, default='sdf', help='input file format')
     parser.add_argument('-i', '--input', type=str, required=True, help='input file name')
     parser.add_argument('-o', '--output', type=str, required=True, help='output file name')
     parser.add_argument('--polar', action="store_true", default=False, help='Add hydrogen atoms only to polar atoms')
     parser.add_argument('--pH', type=float, default="7.4", help='Specify target pH value')
     return parser.parse_args()

+
def addh(args):
     outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True)
     for mol in pybel.readfile(args.iformat, args.input):
@@ -27,6 +29,7 @@
             outfile.write(mol)
     outfile.close()

+
def __main__():
     """
         Add hydrogen atoms at a certain pH value
@@ -34,5 +37,6 @@
     args = parse_command_line(sys.argv)
     addh(args)

-if __name__ == "__main__" :
+
+if __name__ == "__main__":
     __main__()

diff -r de4c80d17527 -r a5f4b80e6769 ob_filter.py
--- a/ob_filter.py Tue Jul 28 08:40:43 2020 -0400
+++ b/ob_filter.py Mon Oct 19 14:48:46 2020 +0000

@@ -6,35 +6,32 @@

     TODO: AND/OR conditions?
"""
-import sys, os
import argparse
+import json
+import shlex
+import subprocess
+import sys
+
import cheminfolib
-import json
-import shlex, subprocess
-
from openbabel import pybel
cheminfolib.pybel_stop_logging()

+
def parse_command_line():
     parser = argparse.ArgumentParser()
     parser.add_argument('-i', '--input', help='Input file name')
     parser.add_argument('-iformat', help='Input file format')
-    parser.add_argument('-oformat',
-        default='smi',
-        help='Output file format')
-    parser.add_argument('-o', '--output',
-        help='Output file name',
-        required=True)
-    parser.add_argument('--filters',
-        help="Specify the filters to apply",
-        required=True,
-        )
-    parser.add_argument('--list_of_names',
-        help="A file with list of molecule names to extract. Every name is in one line.",
-        required=False,
-        )
+    parser.add_argument('-oformat', default='smi',
+                        help='Output file format')
+    parser.add_argument('-o', '--output', help='Output file name',
+                        required=True)
+    parser.add_argument('--filters', help="Specify the filters to apply",
+                        required=True)
+    parser.add_argument('--list_of_names', required=False,
+                        help="A file with list of molecule names to extract. Every name is in one line.")
     return parser.parse_args()

+
def filter_precalculated_compounds(args, filters):
     outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True)
     for mol in pybel.readfile('sdf', args.input):
@@ -53,6 +50,7 @@
             outfile.write(mol)
     outfile.close()

+
def filter_new_compounds(args, filters):

     if args.iformat == args.oformat:
@@ -70,10 +68,9 @@
         filter_cmd += ' %s>=%s %s<=%s ' % (ob_descriptor_name, min, ob_descriptor_name, max)

     args = shlex.split('%s "%s"' % (cmd, filter_cmd))
-    #print '%s "%s"' % (cmd, filter_cmd)
+    # print '%s "%s"' % (cmd, filter_cmd)
     # calling openbabel with subprocess and pipe potential errors occuring in openbabel to stdout
-    child = subprocess.Popen(args,
-        stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    child = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

     stdout, stderr = child.communicate()
     return_code = child.returncode
@@ -87,6 +84,7 @@
         sys.stdout.write(stdout.decode('utf-8'))
         sys.stdout.write(stderr.decode('utf-8'))

+
def filter_by_name(args):
     outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True)
     for mol in pybel.readfile('sdf', args.input):
@@ -95,16 +93,17 @@
                 outfile.write(mol)
     outfile.close()

+
def __main__():
     """
         Select compounds with certain properties from a small library
     """
     args = parse_command_line()
-
+
     if args.filters == '__filter_by_name__':
         filter_by_name(args)
         return
-
+
     # Its a small trick to get the parameters in an easy way from the xml file.
     # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed.
     # Also the last loop creates a ',{' that is not an valid jason expression.
@@ -114,7 +113,7 @@
         mol = next(pybel.readfile('sdf', args.input))
         for key, elem in filters.items():
             property = cheminfolib.ColumnNames.get(key, key)
-            if not property in mol.data:
+            if property not in mol.data:
                 break
         else:
             # if the for loop finishes in a normal way, we should habe all properties at least in the first molecule
@@ -124,5 +123,5 @@
     filter_new_compounds(args, filters)

-if __name__ == "__main__" :
+if __name__ == "__main__":
     __main__()

diff -r de4c80d17527 -r a5f4b80e6769 ob_genProp.py
--- a/ob_genProp.py Tue Jul 28 08:40:43 2020 -0400
+++ b/ob_genProp.py Mon Oct 19 14:48:46 2020 +0000

[

@@ -4,23 +4,25 @@
     Output: Physico-chemical properties are computed and stored as metadata in the sdf output file.
     Copyright 2012, Bjoern Gruening and Xavier Lucas
"""
-import sys, os
import argparse
+import sys
+
+import cheminfolib
import openbabel
+from openbabel import pybel
openbabel.obErrorLog.StopLogging()
-import cheminfolib

-from openbabel import pybel

def parse_command_line(argv):
     parser = argparse.ArgumentParser()
-    parser.add_argument('--iformat', default='sdf' , help='input file format')
+    parser.add_argument('--iformat', default='sdf', help='input file format')
     parser.add_argument('-i', '--input', required=True, help='input file name')
-    parser.add_argument('--oformat', default='sdf', choices = ['sdf', 'table'] , help='output file format')
+    parser.add_argument('--oformat', default='sdf', choices=['sdf', 'table'], help='output file format')
     parser.add_argument('--header', type=bool, help='Include the header as the first line of the output table')
     parser.add_argument('-o', '--output', required=True, help='output file name')
     return parser.parse_args()

+
def compute_properties(args):
     if args.oformat == 'sdf':
         outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True)
@@ -29,18 +31,19 @@
         if args.header:
             mol = next(pybel.readfile(args.iformat, args.input))
             metadata = cheminfolib.get_properties_ext(mol)
-            outfile.write( '%s\n' % '\t'.join( [ cheminfolib.ColumnNames[key] for key in metadata ] ) )
+            outfile.write('%s\n' % '\t'.join([cheminfolib.ColumnNames[key] for key in metadata]))

     for mol in pybel.readfile(args.iformat, args.input):
         if mol.OBMol.NumHvyAtoms() > 5:
             metadata = cheminfolib.get_properties_ext(mol)
             if args.oformat == 'sdf':
-                [ mol.data.update( { cheminfolib.ColumnNames[key] : metadata[key] } ) for key in metadata ]
+                [mol.data.update({cheminfolib.ColumnNames[key]: metadata[key]}) for key in metadata]
                 outfile.write(mol)
             else:
-                outfile.write( '%s\n' % ('\t'.join( [ str(metadata[key]) for key in metadata ] ) ) )
+                outfile.write('%s\n' % ('\t'.join([str(metadata[key]) for key in metadata])))
     outfile.close()

+
def __main__():
     """
         Physico-chemical properties are computed and stored as metadata in the sdf output file
@@ -48,5 +51,6 @@
     args = parse_command_line(sys.argv)
     compute_properties(args)

-if __name__ == "__main__" :
+
+if __name__ == "__main__":
     __main__()

diff -r de4c80d17527 -r a5f4b80e6769 ob_remIons.py
--- a/ob_remIons.py Tue Jul 28 08:40:43 2020 -0400
+++ b/ob_remIons.py Mon Oct 19 14:48:46 2020 +0000

[

@@ -4,29 +4,33 @@
     Output: Molecule file with removed ions and fragments.
     Copyright 2012, Bjoern Gruening and Xavier Lucas
"""
-import sys, os
import argparse

from openbabel import openbabel, pybel
openbabel.obErrorLog.StopLogging()

+
def parse_command_line():
     parser = argparse.ArgumentParser()
-    parser.add_argument('-iformat', default='sdf' , help='input file format')
+    parser.add_argument('-iformat', default='sdf', help='input file format')
     parser.add_argument('-i', '--input', required=True, help='input file name')
     parser.add_argument('-o', '--output', required=True, help='output file name')
     return parser.parse_args()

+
def remove_ions(args):
     outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True)
     for mol in pybel.readfile(args.iformat, args.input):
         if mol.OBMol.NumHvyAtoms() > 5:
             mol.OBMol.StripSalts(0)
+            if 'inchi' in mol.data:
+                del mol.data['inchi']  # remove inchi cache so modified mol is saved
             # Check if new small fragments have been created and remove them
             if mol.OBMol.NumHvyAtoms() > 5:
                 outfile.write(mol)
     outfile.close()

+
def __main__():
     """
         Remove any counterion and delete any fragment but the largest one for each molecule.
@@ -34,5 +38,6 @@
     args = parse_command_line()
     remove_ions(args)

-if __name__ == "__main__" :
+
+if __name__ == "__main__":
     __main__()

diff -r de4c80d17527 -r a5f4b80e6769 ob_spectrophore_search.py
--- a/ob_spectrophore_search.py Tue Jul 28 08:40:43 2020 -0400
+++ b/ob_spectrophore_search.py Mon Oct 19 14:48:46 2020 +0000

[

@@ -4,18 +4,17 @@
     Output: parse the target file using the same protocol used to generate the databases in our servers. Physico-chemical properties are computed and stored as metadata in the sdf output file.
     Copyright 2012, Bjoern Gruening and Xavier Lucas
"""
-import sys, os
import argparse
-import math
+
import numpy as np
-
from openbabel import openbabel, pybel
openbabel.obErrorLog.StopLogging()
-#TODO get rid of eval()
+# TODO get rid of eval()

global spectrophore
spectrophore = pybel.ob.OBSpectrophore()

+
def parse_command_line():
     parser = argparse.ArgumentParser()
     parser.add_argument('--target', required=True, help='target file name in sdf format with Spectrophores(TM) descriptors stored as meta-data')
@@ -28,26 +27,29 @@
     parser.add_argument('-r', '--resolution', type=float, default="3.0", help='Resolution')
     return parser.parse_args()

+
def set_parameters(args):
     if args.normalization == 'No':
-        spectrophore.SetNormalization( spectrophore.NoNormalization )
+        spectrophore.SetNormalization(spectrophore.NoNormalization)
     else:
-        spectrophore.SetNormalization( eval('spectrophore.NormalizationTowards' + args.normalization) )
-    spectrophore.SetAccuracy( eval('spectrophore.AngStepSize' + args.accuracy) )
-    spectrophore.SetStereo( eval('spectrophore.' + args.stereo + 'StereoSpecificProbes') )
-    spectrophore.SetResolution( args.resolution )
+        spectrophore.SetNormalization(eval('spectrophore.NormalizationTowards' + args.normalization))
+    spectrophore.SetAccuracy(eval('spectrophore.AngStepSize' + args.accuracy))
+    spectrophore.SetStereo(eval('spectrophore.' + args.stereo + 'StereoSpecificProbes'))
+    spectrophore.SetResolution(args.resolution)
     return True

+
def Compute_Spectrophores_distance(target_spectrophore, args):
     outfile = open(args.output, 'w')
     for mol in open(args.library, 'r'):
         try:
-            distance = ( ( np.asarray( target_spectrophore, dtype=float ) - np.asarray( mol.split('\t')[ args.column - 1 ].strip().split(', '), dtype=float) )**2).sum()
+            distance = ((np.asarray(target_spectrophore, dtype=float) - np.asarray(mol.split('\t')[args.column - 1].strip().split(', '), dtype=float))**2).sum()
         except ValueError:
             distance = 0
-        outfile.write( '%s\t%f\n' % (mol.strip(), distance ) )
+        outfile.write('%s\t%f\n' % (mol.strip(), distance))
     outfile.close()

+
def __main__():
     """
         Computation of Spectrophores(TM) distances to a target molecule.
@@ -59,7 +61,8 @@
     mol = next(pybel.readfile('sdf', args.target))
     target_spectrophore = mol.data["Spectrophores(TM)"].strip().split(', ')
     # Compute the paired-distance between every molecule in the library and the target
-    distances = Compute_Spectrophores_distance(target_spectrophore, args)
+    Compute_Spectrophores_distance(target_spectrophore, args)
+

-if __name__ == "__main__" :
+if __name__ == "__main__":
     __main__()

diff -r de4c80d17527 -r a5f4b80e6769 remove_protonation_state.py
--- a/remove_protonation_state.py Tue Jul 28 08:40:43 2020 -0400
+++ b/remove_protonation_state.py Mon Oct 19 14:48:46 2020 +0000

[

@@ -4,32 +4,37 @@
     Output: Molecule file with removed ions and fragments.
     Copyright 2013, Bjoern Gruening and Xavier Lucas
"""
-import sys, os
import argparse

from openbabel import openbabel, pybel
openbabel.obErrorLog.StopLogging()

+
def parse_command_line():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--iformat', default='sdf' , help='input file format')
+    parser.add_argument('--iformat', default='sdf', help='input file format')
     parser.add_argument('-i', '--input', required=True, help='input file name')
     parser.add_argument('-o', '--output', required=True, help='output file name')
     return parser.parse_args()

-def remove_protonation( args ):
+
+def remove_protonation(args):
     outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True)
     for mol in pybel.readfile(args.iformat, args.input):
         [atom.OBAtom.SetFormalCharge(0) for atom in mol.atoms]
-        outfile.write( mol )
+        if 'inchi' in mol.data:
+            del mol.data['inchi']  # remove inchi cache so modified mol is saved
+        outfile.write(mol)
     outfile.close()

+
def __main__():
     """
         Remove any protonation state from each atom in each molecule.
     """
     args = parse_command_line()
-    remove_protonation( args )
+    remove_protonation(args)
+

-if __name__ == "__main__" :
+if __name__ == "__main__":
     __main__()

diff -r de4c80d17527 -r a5f4b80e6769 subsearch.py
--- a/subsearch.py Tue Jul 28 08:40:43 2020 -0400
+++ b/subsearch.py Mon Oct 19 14:48:46 2020 +0000

[

@@ -4,36 +4,41 @@
     Output: Moleculs filtered with specified substructures.
     Copyright 2013, Bjoern Gruening and Xavier Lucas
"""
-import sys, os
import argparse
import multiprocessing
-import tempfile
+import os
+import shutil
import subprocess
-import shutil
+import sys
+import tempfile

from openbabel import openbabel, pybel
openbabel.obErrorLog.StopLogging()

+
def parse_command_line():
     parser = argparse.ArgumentParser()
     parser.add_argument('-i', '--infile', required=True, help='Molecule file.')
     parser.add_argument('--iformat', help='Input format.')
-    parser.add_argument('--fastsearch-index', dest="fastsearch_index",
-        required=True, help='Path to the openbabel fastsearch index.')
+    parser.add_argument('--fastsearch-index', dest="fastsearch_index", required=True,
+                        help='Path to the openbabel fastsearch index.')
     parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.')
-    parser.add_argument('--oformat',
-        default='smi', help='Output file format')
-    parser.add_argument("--max-candidates", dest="max_candidates", type=int,
-                    default=4000, help="The maximum number of candidates.")
-    parser.add_argument('-p', '--processors', type=int,
-        default=multiprocessing.cpu_count())
+    parser.add_argument('--oformat', default='smi', help='Output file format')
+    parser.add_argument("--max-candidates", dest="max_candidates", type=int, default=4000,
+                        help="The maximum number of candidates.")
+    parser.add_argument('-p', '--processors', type=int,
+                        default=multiprocessing.cpu_count())
     return parser.parse_args()

+
results = list()
+
+
def mp_callback(res):
     results.append(res)

-def mp_helper( query, args ):
+
+def mp_helper(query, args):
     """
         Helper function for multiprocessing.
         That function is a wrapper around the following command:
@@ -48,8 +53,7 @@
     tmp = tempfile.NamedTemporaryFile(delete=False)
     cmd = 'obabel -ifs %s -O %s %s -s%s -al %s' % (args.fastsearch_index, tmp.name, opts, query, args.max_candidates)

-    child = subprocess.Popen(cmd.split(),
-        stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    child = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)

     stdout, stderr = child.communicate()
     return_code = child.returncode
@@ -65,43 +69,43 @@
     return (tmp.name, query)

-def get_smiles_or_smarts( args ):
+def get_smiles_or_smarts(args):
     """
     Wrapper to retrieve a striped SMILES or SMARTS string from different input formats.
     """
     if args.iformat in ['smi', 'text', 'tabular']:
-        with open( args.infile ) as text_file:
+        with open(args.infile) as text_file:
             for line in text_file:
                 yield line.split('\t')[0].strip()
     else:
         # inchi or sdf files
-        for mol in pybel.readfile( args.iformat, args.infile ):
+        for mol in pybel.readfile(args.iformat, args.infile):
             yield mol.write('smiles').split('\t')[0]

-def substructure_search( args ):

-    pool = multiprocessing.Pool( args.processors )
-    for query in get_smiles_or_smarts( args ):
+def substructure_search(args):
+    pool = multiprocessing.Pool(args.processors)
+    for query in get_smiles_or_smarts(args):
         pool.apply_async(mp_helper, args=(query, args), callback=mp_callback)
-        #mp_callback( mp_helper(query, args) )
+        # mp_callback(mp_helper(query, args))
     pool.close()
     pool.join()

     if args.oformat == 'names':
-        out_handle = open( args.outfile, 'w' )
+        out_handle = open(args.outfile, 'w')
         for result_file, query in results:
             with open(result_file) as res_handle:
                 for line in res_handle:
-                    out_handle.write('%s\t%s\n' % ( line.strip(), query ))
-            os.remove( result_file )
+                    out_handle.write('%s\t%s\n' % (line.strip(), query))
+            os.remove(result_file)
         out_handle.close()
     else:
-        out_handle = open( args.outfile, 'wb' )
+        out_handle = open(args.outfile, 'wb')
         for result_file, query in results:
-            res_handle = open(result_file,'rb')
-            shutil.copyfileobj( res_handle, out_handle )
+            res_handle = open(result_file, 'rb')
+            shutil.copyfileobj(res_handle, out_handle)
             res_handle.close()
-            os.remove( result_file )
+            os.remove(result_file)
         out_handle.close()

@@ -110,7 +114,8 @@
         Multiprocessing Open Babel Substructure Search.
     """
     args = parse_command_line()
-    substructure_search( args )
+    substructure_search(args)
+

-if __name__ == "__main__" :
+if __name__ == "__main__":
     __main__()

diff -r de4c80d17527 -r a5f4b80e6769 test-data/na-sal.inchi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/na-sal.inchi Mon Oct 19 14:48:46 2020 +0000

@@ -0,0 +1,1 @@
+InChI=1S/C7H6O3.Na/c8-6-4-2-1-3-5(6)7(9)10;/h1-4,8H,(H,9,10);/q;+1/p-1

diff -r de4c80d17527 -r a5f4b80e6769 test-data/na-sal_obrmions.inchi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/na-sal_obrmions.inchi Mon Oct 19 14:48:46 2020 +0000

@@ -0,0 +1,1 @@
+InChI=1S/C7H6O3/c8-6-4-2-1-3-5(6)7(9)10/h1-4,8H,(H,9,10)/p-1

diff -r de4c80d17527 -r a5f4b80e6769 test-data/ob_remove_protonation_state.inchi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ob_remove_protonation_state.inchi Mon Oct 19 14:48:46 2020 +0000

@@ -0,0 +1,1 @@
+InChI=1S/C7H5O3.Na/c8-6-4-2-1-3-5(6)7(9)10;/h1-4H,(H,9,10);