Repository 'ctb_rdkit_descriptors'
hg clone https://toolshed.g2.bx.psu.edu/repos/bgruening/ctb_rdkit_descriptors

Changeset 8:a1c53f0533b0 (2021-02-17)
Previous changeset 7:cf725c82c865 (2020-07-28) Next changeset 9:0993ac4f4a23 (2021-12-04)
Commit message:
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
modified:
dimorphite_dl.py
rdkit_descriptors.py
rdkit_descriptors.xml
sdf_to_tab.py
test-data/rdkit_descriptors_result1.tab
added:
test-data/mol.pdb
test-data/mol_pdb_charges.tab
test-data/rdkit_descriptors_subset.tab
b
diff -r cf725c82c865 -r a1c53f0533b0 dimorphite_dl.py
--- a/dimorphite_dl.py Tue Jul 28 08:43:19 2020 -0400
+++ b/dimorphite_dl.py Wed Feb 17 12:59:43 2021 +0000
b
@@ -1,3 +1,4 @@
+# flake8: noqa
 # Copyright 2018 Jacob D. Durrant
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -18,7 +19,6 @@
 """
 
 from __future__ import print_function
-import copy
 import os
 import argparse
 import sys
b
diff -r cf725c82c865 -r a1c53f0533b0 rdkit_descriptors.py
--- a/rdkit_descriptors.py Tue Jul 28 08:43:19 2020 -0400
+++ b/rdkit_descriptors.py Wed Feb 17 12:59:43 2021 +0000
[
@@ -1,44 +1,49 @@
 #!/usr/bin/env python
 
-from rdkit.Chem import Descriptors
-from rdkit import Chem
-import sys, os, re
 import argparse
 import inspect
+import sys
 
-def get_supplier( infile, format = 'smiles' ):
+from rdkit import Chem
+from rdkit.Chem import Descriptors
+
+
+def get_supplier(infile, format='smiles'):
     """
-    Returns a generator over a SMILES or InChI file. Every element is of RDKit 
+    Returns a generator over a SMILES or InChI file. Every element is of RDKit
     molecule and has its original string as _Name property.
     """
     with open(infile) as handle:
         for line in handle:
             line = line.strip()
             if format == 'smiles':
-                mol = Chem.MolFromSmiles( line, sanitize=True )
+                mol = Chem.MolFromSmiles(line, sanitize=True)
             elif format == 'inchi':
-                mol = Chem.inchi.MolFromInchi( line, sanitize=True, removeHs=True, logLevel=None, treatWarningAsError=False )
+                mol = Chem.inchi.MolFromInchi(line, sanitize=True, removeHs=True, logLevel=None, treatWarningAsError=False)
             if mol is None:
                 yield False
             else:
-                mol.SetProp( '_Name', line.split('\t')[0] )
+                mol.SetProp('_Name', line.split('\t')[0])
                 yield mol
 
+
 def get_rdkit_descriptor_functions():
     """
     Returns all descriptor functions under the Chem.Descriptors Module as tuple of (name, function)
     """
-    ret = [ (name, f) for name, f in inspect.getmembers( Descriptors ) if inspect.isfunction( f ) and not name.startswith( '_' ) ]
+    ret = [(name, f) for name, f in inspect.getmembers(Descriptors) if inspect.isfunction(f) and not name.startswith('_')]
+    # some which are not in the official Descriptors module we need to add manually
+    ret.extend([('FormalCharge', Chem.GetFormalCharge), ('SSSR', Chem.GetSSSR)])
     ret.sort()
     return ret
 
 
-def descriptors( mol, functions ):
+def descriptors(mol, functions):
     """
     Calculates the descriptors of a given molecule.
     """
     for name, function in functions:
-        yield (name, function( mol ))
+        yield (name, function(mol))
 
 
 if __name__ == "__main__":
@@ -46,31 +51,44 @@
     parser.add_argument('-i', '--infile', required=True, help='Path to the input file.')
     parser.add_argument("--iformat", help="Specify the input file format.")
 
-    parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'), 
-        default=sys.stdout, help="path to the result file, default it sdtout")
+    parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'),
+                        default=sys.stdout,
+                        help="path to the result file, default is stdout")
+
+    parser.add_argument('-s', '--select', default=None,
+                        help="select a subset of comma-separated descriptors to use")
 
     parser.add_argument("--header", dest="header", action="store_true",
-                    default=False,
-                    help="Write header line.")
+                        default=False,
+                        help="Write header line.")
 
     args = parser.parse_args()
 
     if args.iformat == 'sdf':
-        supplier = Chem.SDMolSupplier( args.infile )
-    elif args.iformat =='smi':
-        supplier = get_supplier( args.infile, format = 'smiles' )
+        supplier = Chem.SDMolSupplier(args.infile)
+    elif args.iformat == 'smi':
+        supplier = get_supplier(args.infile, format='smiles')
     elif args.iformat == 'inchi':
-        supplier = get_supplier( args.infile, format = 'inchi' )
+        supplier = get_supplier(args.infile, format='inchi')
+    elif args.iformat == 'pdb':
+        supplier = [Chem.MolFromPDBFile(args.infile)]
+    elif args.iformat == 'mol2':
+        supplier = [Chem.MolFromMol2File(args.infile)]
 
     functions = get_rdkit_descriptor_functions()
+    if args.select and args.select != 'None':
+        selected = args.select.split(',')
+        functions = [(name, f) for name, f in functions if name in selected]
 
     if args.header:
-        args.outfile.write( '%s\n' % '\t'.join( ['MoleculeID'] + [name for name, f in functions] ) )
+        args.outfile.write('%s\n' % '\t'.join(['MoleculeID'] + [name for name, f in functions]))
 
     for mol in supplier:
         if not mol:
             continue
-        descs = descriptors( mol, functions )
-        molecule_id = mol.GetProp("_Name")
-        args.outfile.write( "%s\n" % '\t'.join( [molecule_id]+ [str(round(res, 6)) for name, res in descs] ) )
-
+        descs = descriptors(mol, functions)
+        try:
+            molecule_id = mol.GetProp("_Name")
+        except KeyError:
+            molecule_id = Chem.MolToSmiles(mol)
+        args.outfile.write("%s\n" % '\t'.join([molecule_id] + [str(round(res, 6)) for name, res in descs]))
b
diff -r cf725c82c865 -r a1c53f0533b0 rdkit_descriptors.xml
--- a/rdkit_descriptors.xml Tue Jul 28 08:43:19 2020 -0400
+++ b/rdkit_descriptors.xml Wed Feb 17 12:59:43 2021 +0000
b
b'@@ -1,9 +1,9 @@\n <tool id="ctb_rdkit_descriptors" name="Descriptors" version="@TOOL_VERSION@+galaxy@GALAXY_VERSION@">\n+    <description>calculated with RDKit</description>\n     <macros>\n         <token name="@TOOL_VERSION@">2020.03.4</token>\n-        <token name="@GALAXY_VERSION@">0</token>\n+        <token name="@GALAXY_VERSION@">1</token>\n     </macros>\n-    <description>calculated with RDKit</description>\n     <!--parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism-->\n     <requirements>\n         <requirement type="package" version="@TOOL_VERSION@">rdkit</requirement>\n@@ -14,14 +14,222 @@\n             -i \'${infile}\'\n             --iformat \'${infile.ext}\'\n             -o \'${outfile}\'\n+            #if $select_multiple:\n+                -s \'${select_multiple}\'\n+            #end if\n             $header\n ]]>\n     </command>\n     <inputs>\n-        <param name="infile" format="smi,sdf,mol2" type="data" label="Molecule data"\n-            help="In SDF or SMILES format"/>\n+        <param name="infile" format="smi,sdf,mol2,pdb,inchi" type="data" label="Molecule data"\n+            help="In SDF, SMILES, MOL2, PDB or InChI format"/>\n         <param name="header" type="boolean" label="Include the descriptor name as header"\n             truevalue="--header" falsevalue="" checked="false" />\n+        <param name="select_multiple" type="select" label="Calculate only a subset of the descriptors"\n+            multiple="true" help="If left blank, all descriptors will be calculated">\n+            <option value="BalabanJ">BalabanJ</option>\n+            <option value="BertzCT">BertzCT</option>\n+            <option value="Chi0">Chi0</option>\n+            <option value="Chi0n">Chi0n</option>\n+            <option value="Chi0v">Chi0v</option>\n+            <option value="Chi1">Chi1</option>\n+            <option value="Chi1n">Chi1n</option>\n+            <option value="Chi1v">Chi1v</option>\n+            <option value="Chi2n">Chi2n</option>\n+            <option value="Chi2v">Chi2v</option>\n+            <option value="Chi3n">Chi3n</option>\n+            <option value="Chi3v">Chi3v</option>\n+            <option value="Chi4n">Chi4n</option>\n+            <option value="Chi4v">Chi4v</option>\n+            <option value="EState_VSA1">EState_VSA1</option>\n+            <option value="EState_VSA10">EState_VSA10</option>\n+            <option value="EState_VSA11">EState_VSA11</option>\n+            <option value="EState_VSA2">EState_VSA2</option>\n+            <option value="EState_VSA3">EState_VSA3</option>\n+            <option value="EState_VSA4">EState_VSA4</option>\n+            <option value="EState_VSA5">EState_VSA5</option>\n+            <option value="EState_VSA6">EState_VSA6</option>\n+            <option value="EState_VSA7">EState_VSA7</option>\n+            <option value="EState_VSA8">EState_VSA8</option>\n+            <option value="EState_VSA9">EState_VSA9</option>\n+            <option value="ExactMolWt">ExactMolWt</option>\n+            <option value="FormalCharge">FormalCharge</option>\n+            <option value="FpDensityMorgan1">FpDensityMorgan1</option>\n+            <option value="FpDensityMorgan2">FpDensityMorgan2</option>\n+            <option value="FpDensityMorgan3">FpDensityMorgan3</option>\n+            <option value="FractionCSP3">FractionCSP3</option>\n+            <option value="HallKierAlpha">HallKierAlpha</option>\n+            <option value="HeavyAtomCount">HeavyAtomCount</option>\n+            <option value="HeavyAtomMolWt">HeavyAtomMolWt</option>\n+            <option value="Ipc">Ipc</option>\n+            <option value="Kappa1">Kappa1</option>\n+            <option value="Kappa2">Kappa2</option>\n+            <option value="Kappa3">Kappa3</option>\n+            <option value="LabuteASA">LabuteASA</option>\n+            <option value="MaxAbsEStateIndex">MaxAbsEStateIndex</option>\n+            <option value="MaxAbsPartialCharge">MaxAbsPartialCharge</option>\n+        '..b'--------+------------+\n-|          HallKierAlpha            |   Python   |\n-+-----------------------------------+------------+\n-|         Kappa1 - Kappa3           |   Python   |\n-+-----------------------------------+------------+\n-|            Chi0, Chi1             |   Python   |\n-+-----------------------------------+------------+\n-|           Chi0n - Chi4n           |   Python   |\n-+-----------------------------------+------------+\n-|           Chi0v - Chi4v           |   Python   |\n-+-----------------------------------+------------+\n-|              MolLogP              |     C++    |\n-+-----------------------------------+------------+\n-|               MolMR               |     C++    |\n-+-----------------------------------+------------+\n-|               MolWt               |     C++    |\n-+-----------------------------------+------------+\n-|           HeavyAtomCount          |   Python   |\n-+-----------------------------------+------------+\n-|           HeavyAtomMolWt          |   Python   |\n-+-----------------------------------+------------+\n-|             NHOHCount             |     C++    |\n-+-----------------------------------+------------+\n-|              NOCount              |     C++    |\n-+-----------------------------------+------------+\n-|            NumHAcceptors          |     C++    |\n-+-----------------------------------+------------+\n-|             NumHDonors            |     C++    |\n-+-----------------------------------+------------+\n-|            NumHeteroatoms         |     C++    |\n-+-----------------------------------+------------+\n-|          NumRotatableBonds        |     C++    |\n-+-----------------------------------+------------+\n-|         NumValenceElectrons       |   Python   |\n-+-----------------------------------+------------+\n-|              RingCount            |     C++    |\n-+-----------------------------------+------------+\n-|                 TPSA              |     C++    |\n-+-----------------------------------+------------+\n-|              LabuteASA            |     C++    |\n-+-----------------------------------+------------+\n-|       PEOE_VSA1 - PEOE_VSA14      | Python/C++ |\n-+-----------------------------------+------------+\n-|         SMR_VSA1 - SMR_VSA10      | Python/C++ |\n-+-----------------------------------+------------+\n-|      SlogP_VSA1 - SlogP_VSA12     | Python/C++ |\n-+-----------------------------------+------------+\n-|     EState_VSA1 - EState_VSA11    |   Python   |\n-+-----------------------------------+------------+\n-|     VSA_EState1 - VSA_EState10    |   Python   |\n-+-----------------------------------+------------+\n-|           Topliss fragments       |   Python   |\n-+-----------------------------------+------------+\n-\n+| - Gasteiger/Marsili Partial Charges\n+| - BalabanJ\n+| - BertzCT\n+| - Ipc\n+| - HallKierAlpha\n+| - Kappa1 - Kappa3\n+| - Chi0, Chi1\n+| - Chi0n - Chi4n\n+| - Chi0v - Chi4v\n+| - FormalCharge\n+| - MolLogP\n+| - MolMR\n+| - MolWt\n+| - HeavyAtomCount\n+| - HeavyAtomMolWt\n+| - NHOHCount\n+| - NOCount\n+| - NumHAcceptors\n+| - NumHDonors\n+| - NumHeteroatoms\n+| - NumRotatableBonds\n+| - NumValenceElectrons\n+| - RingCount\n+| - SSSR\n+| - TPSA\n+| - LabuteASA\n+| - PEOE_VSA1 - PEOE_VSA14\n+| - SMR_VSA1 - SMR_VSA10\n+| - SlogP_VSA1 - SlogP_VSA12\n+| - EState_VSA1 - EState_VSA11\n+| - VSA_EState1 - VSA_EState10\n+| - Topliss fragments\n |\n | A full list of the descriptors can be obtained here_.\n \n@@ -119,7 +306,7 @@\n \n **Hint**\n \n-Use the **cut columns from a table** tool to select just the desired descriptors.\n+Use the **cut columns from a table** tool to select just the desired descriptors. Alternatively, if you just want to calculate a subset of the descriptors, there is an option available to do so.\n \n -----\n \n@@ -130,6 +317,8 @@\n | - `SDF Format`_\n | - `SMILES Format`_\n | - `Corina MOL2`_\n+| - PDB\n+| - InChi\n \n .. _SDF Format: http://en.wikipedia.org/wiki/Chemical_table_file\n .. _SMILES Format: http://en.wikipedia.org/wiki/Simplified_molecular_input_line_entry_specification\n'
b
diff -r cf725c82c865 -r a1c53f0533b0 sdf_to_tab.py
--- a/sdf_to_tab.py Tue Jul 28 08:43:19 2020 -0400
+++ b/sdf_to_tab.py Wed Feb 17 12:59:43 2021 +0000
b
@@ -1,8 +1,10 @@
 #!/usr/bin/env python3
 import argparse
+
 import pandas as pd
 from rdkit import Chem
 
+
 def sdf_to_tab(vars):
     mols = Chem.SDMolSupplier(vars.inp, sanitize=False)
     df = pd.DataFrame()  # for output
@@ -29,6 +31,7 @@
     sorted_cols = sorted(df.columns.values.tolist())
     df.to_csv(vars.out, sep='\t', header=vars.header, columns=sorted_cols)
 
+
 def main():
     parser = argparse.ArgumentParser(description="Convert SDF to tabular")
     parser.add_argument('--inp', '-i', help="The input file", required=True)
@@ -41,7 +44,7 @@
     parser.add_argument('--name', '-n', action='store_true',
                         help="Include molecule name in output.")
     sdf_to_tab(parser.parse_args())
-    
+
 
 if __name__ == "__main__":
     main()
b
diff -r cf725c82c865 -r a1c53f0533b0 test-data/mol.pdb
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mol.pdb Wed Feb 17 12:59:43 2021 +0000
b
@@ -0,0 +1,72 @@
+COMPND    CNCC(O)CCCc1ccccc1 
+AUTHOR    GENERATED BY OPEN BABEL 3.1.0
+HETATM    1  C   UNL     1       9.206   6.617  23.375  1.00  0.00           C  
+HETATM    2  N   UNL     1       9.288   5.239  22.843  1.00  0.00           N1+
+HETATM    3  C   UNL     1       9.901   4.245  23.787  1.00  0.00           C  
+HETATM    4  C   UNL     1       9.552   2.837  23.275  1.00  0.00           C  
+HETATM    5  O   UNL     1      10.280   2.666  22.057  1.00  0.00           O  
+HETATM    6  C   UNL     1       9.877   1.780  24.331  1.00  0.00           C  
+HETATM    7  C   UNL     1       9.398   0.387  23.922  1.00  0.00           C  
+HETATM    8  C   UNL     1      10.424  -0.687  24.293  1.00  0.00           C  
+HETATM    9  C   UNL     1      11.616  -0.605  23.380  1.00  0.00           C  
+HETATM   10  C   UNL     1      11.773  -1.516  22.327  1.00  0.00           C  
+HETATM   11  C   UNL     1      12.918  -1.487  21.533  1.00  0.00           C  
+HETATM   12  C   UNL     1      13.916  -0.552  21.786  1.00  0.00           C  
+HETATM   13  C   UNL     1      13.767   0.367  22.824  1.00  0.00           C  
+HETATM   14  C   UNL     1      12.623   0.342  23.620  1.00  0.00           C  
+HETATM   15  H   UNL     1       8.759   7.256  22.643  1.00  0.00           H  
+HETATM   16  H   UNL     1      10.189   6.970  23.605  1.00  0.00           H  
+HETATM   17  H   UNL     1       8.609   6.620  24.264  1.00  0.00           H  
+HETATM   18  H   UNL     1       9.849   5.259  21.991  1.00  0.00           H  
+HETATM   19  H   UNL     1       8.329   4.932  22.679  1.00  0.00           H  
+HETATM   20  H   UNL     1       9.504   4.384  24.771  1.00  0.00           H  
+HETATM   21  H   UNL     1      10.962   4.375  23.832  1.00  0.00           H  
+HETATM   22  H   UNL     1       8.505   2.722  23.087  1.00  0.00           H  
+HETATM   23  H   UNL     1      11.228   2.771  22.229  1.00  0.00           H  
+HETATM   24  H   UNL     1       9.401   2.052  25.249  1.00  0.00           H  
+HETATM   25  H   UNL     1      10.941   1.741  24.440  1.00  0.00           H  
+HETATM   26  H   UNL     1       9.242   0.370  22.864  1.00  0.00           H  
+HETATM   27  H   UNL     1       8.487   0.178  24.443  1.00  0.00           H  
+HETATM   28  H   UNL     1       9.974  -1.653  24.199  1.00  0.00           H  
+HETATM   29  H   UNL     1      10.746  -0.530  25.301  1.00  0.00           H  
+HETATM   30  H   UNL     1      11.037  -2.214  22.138  1.00  0.00           H  
+HETATM   31  H   UNL     1      13.025  -2.159  20.758  1.00  0.00           H  
+HETATM   32  H   UNL     1      14.769  -0.538  21.204  1.00  0.00           H  
+HETATM   33  H   UNL     1      14.504   1.066  23.003  1.00  0.00           H  
+HETATM   34  H   UNL     1      12.517   1.022  24.389  1.00  0.00           H  
+CONECT    1    2   15   16   17                                       
+CONECT    2    1    3   18   19                                       
+CONECT    3    2    4   20   21                                       
+CONECT    4    3    5    6   22                                       
+CONECT    5    4   23                                                 
+CONECT    6    4    7   24   25                                       
+CONECT    7    6    8   26   27                                       
+CONECT    8    7    9   28   29                                       
+CONECT    9    8   10   10   14                                       
+CONECT   10    9    9   11   30                                       
+CONECT   11   10   12   12   31                                       
+CONECT   12   11   11   13   32                                       
+CONECT   13   12   14   14   33                                       
+CONECT   14    9   13   13   34                                       
+CONECT   15    1                                                      
+CONECT   16    1                                                      
+CONECT   17    1                                                      
+CONECT   18    2                                                      
+CONECT   19    2                                                      
+CONECT   20    3                                                      
+CONECT   21    3                                                      
+CONECT   22    4                                                      
+CONECT   23    5                                                      
+CONECT   24    6                                                      
+CONECT   25    6                                                      
+CONECT   26    7                                                      
+CONECT   27    7                                                      
+CONECT   28    8                                                      
+CONECT   29    8                                                      
+CONECT   30   10                                                      
+CONECT   31   11                                                      
+CONECT   32   12                                                      
+CONECT   33   13                                                      
+CONECT   34   14                                                      
+MASTER        0    0    0    0    0    0    0    0   34    0   34    0
+END
b
diff -r cf725c82c865 -r a1c53f0533b0 test-data/mol_pdb_charges.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mol_pdb_charges.tab Wed Feb 17 12:59:43 2021 +0000
b
@@ -0,0 +1,1 @@
+CNCC(O)CCCc1ccccc1 1
b
diff -r cf725c82c865 -r a1c53f0533b0 test-data/rdkit_descriptors_result1.tab
--- a/test-data/rdkit_descriptors_result1.tab Tue Jul 28 08:43:19 2020 -0400
+++ b/test-data/rdkit_descriptors_result1.tab Wed Feb 17 12:59:43 2021 +0000
b
@@ -1,2 +1,2 @@
-MoleculeID BalabanJ BertzCT Chi0 Chi0n Chi0v Chi1 Chi1n Chi1v Chi2n Chi2v Chi3n Chi3v Chi4n Chi4v EState_VSA1 EState_VSA10 EState_VSA11 EState_VSA2 EState_VSA3 EState_VSA4 EState_VSA5 EState_VSA6 EState_VSA7 EState_VSA8 EState_VSA9 ExactMolWt FpDensityMorgan1 FpDensityMorgan2 FpDensityMorgan3 FractionCSP3 HallKierAlpha HeavyAtomCount HeavyAtomMolWt Ipc Kappa1 Kappa2 Kappa3 LabuteASA MaxAbsEStateIndex MaxAbsPartialCharge MaxEStateIndex MaxPartialCharge MinAbsEStateIndex MinAbsPartialCharge MinEStateIndex MinPartialCharge MolLogP MolMR MolWt NHOHCount NOCount NumAliphaticCarbocycles NumAliphaticHeterocycles NumAliphaticRings NumAromaticCarbocycles NumAromaticHeterocycles NumAromaticRings NumHAcceptors NumHDonors NumHeteroatoms NumRadicalElectrons NumRotatableBonds NumSaturatedCarbocycles NumSaturatedHeterocycles NumSaturatedRings NumValenceElectrons PEOE_VSA1 PEOE_VSA10 PEOE_VSA11 PEOE_VSA12 PEOE_VSA13 PEOE_VSA14 PEOE_VSA2 PEOE_VSA3 PEOE_VSA4 PEOE_VSA5 PEOE_VSA6 PEOE_VSA7 PEOE_VSA8 PEOE_VSA9 RingCount SMR_VSA1 SMR_VSA10 SMR_VSA2 SMR_VSA3 SMR_VSA4 SMR_VSA5 SMR_VSA6 SMR_VSA7 SMR_VSA8 SMR_VSA9 SlogP_VSA1 SlogP_VSA10 SlogP_VSA11 SlogP_VSA12 SlogP_VSA2 SlogP_VSA3 SlogP_VSA4 SlogP_VSA5 SlogP_VSA6 SlogP_VSA7 SlogP_VSA8 SlogP_VSA9 TPSA VSA_EState1 VSA_EState10 VSA_EState2 VSA_EState3 VSA_EState4 VSA_EState5 VSA_EState6 VSA_EState7 VSA_EState8 VSA_EState9 fr_Al_COO fr_Al_OH fr_Al_OH_noTert fr_ArN fr_Ar_COO fr_Ar_N fr_Ar_NH fr_Ar_OH fr_COO fr_COO2 fr_C_O fr_C_O_noCOO fr_C_S fr_HOCCN fr_Imine fr_NH0 fr_NH1 fr_NH2 fr_N_O fr_Ndealkylation1 fr_Ndealkylation2 fr_Nhpyrrole fr_SH fr_aldehyde fr_alkyl_carbamate fr_alkyl_halide fr_allylic_oxid fr_amide fr_amidine fr_aniline fr_aryl_methyl fr_azide fr_azo fr_barbitur fr_benzene fr_benzodiazepine fr_bicyclic fr_diazo fr_dihydropyridine fr_epoxide fr_ester fr_ether fr_furan fr_guanido fr_halogen fr_hdrzine fr_hdrzone fr_imidazole fr_imide fr_isocyan fr_isothiocyan fr_ketone fr_ketone_Topliss fr_lactam fr_lactone fr_methoxy fr_morpholine fr_nitrile fr_nitro fr_nitro_arom fr_nitro_arom_nonortho fr_nitroso fr_oxazole fr_oxime fr_para_hydroxylation fr_phenol fr_phenol_noOrthoHbond fr_phos_acid fr_phos_ester fr_piperdine fr_piperzine fr_priamide fr_prisulfonamd fr_pyridine fr_quatN fr_sulfide fr_sulfonamd fr_sulfone fr_term_acetylene fr_tetrazole fr_thiazole fr_thiocyan fr_thiophene fr_unbrch_alkane fr_urea qed
-3037 2.370228 503.61088 12.413849 8.821565 10.333422 8.058551 5.008353 5.764282 3.722845 4.595717 2.463985 2.934179 1.596526 1.985926 0.0 10.213055 0.0 11.499024 27.592991 0.0 12.132734 24.265468 0.0 0.0 23.20188 268.005785 0.764706 1.176471 1.588235 0.076923 -1.38 17 259.047 6943.4452 12.086867 4.861181 2.842672 109.048439 9.683208 0.507662 9.683208 0.118709 0.147014 0.118709 0.147014 -0.507662 3.9954 69.0396 269.127 2 2 0 0 0 2 0 2 2 2 4 0 2 0 0 0 88 10.213055 11.499024 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 23.20188 47.525105 16.466088 0.0 2 10.213055 23.20188 0.0 0.0 0.0 6.420822 0.0 57.570372 0.0 11.499024 0.0 0.0 11.499024 23.20188 10.213055 6.420822 0.0 11.126903 36.398202 10.045267 0.0 0.0 40.46 0.0 11.70887 0.0 20.448487 1.29642 0.294029 9.600621 0.373796 0.0 0.0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.864713
+MoleculeID BalabanJ BertzCT Chi0 Chi0n Chi0v Chi1 Chi1n Chi1v Chi2n Chi2v Chi3n Chi3v Chi4n Chi4v EState_VSA1 EState_VSA10 EState_VSA11 EState_VSA2 EState_VSA3 EState_VSA4 EState_VSA5 EState_VSA6 EState_VSA7 EState_VSA8 EState_VSA9 ExactMolWt FormalCharge FpDensityMorgan1 FpDensityMorgan2 FpDensityMorgan3 FractionCSP3 HallKierAlpha HeavyAtomCount HeavyAtomMolWt Ipc Kappa1 Kappa2 Kappa3 LabuteASA MaxAbsEStateIndex MaxAbsPartialCharge MaxEStateIndex MaxPartialCharge MinAbsEStateIndex MinAbsPartialCharge MinEStateIndex MinPartialCharge MolLogP MolMR MolWt NHOHCount NOCount NumAliphaticCarbocycles NumAliphaticHeterocycles NumAliphaticRings NumAromaticCarbocycles NumAromaticHeterocycles NumAromaticRings NumHAcceptors NumHDonors NumHeteroatoms NumRadicalElectrons NumRotatableBonds NumSaturatedCarbocycles NumSaturatedHeterocycles NumSaturatedRings NumValenceElectrons PEOE_VSA1 PEOE_VSA10 PEOE_VSA11 PEOE_VSA12 PEOE_VSA13 PEOE_VSA14 PEOE_VSA2 PEOE_VSA3 PEOE_VSA4 PEOE_VSA5 PEOE_VSA6 PEOE_VSA7 PEOE_VSA8 PEOE_VSA9 RingCount SMR_VSA1 SMR_VSA10 SMR_VSA2 SMR_VSA3 SMR_VSA4 SMR_VSA5 SMR_VSA6 SMR_VSA7 SMR_VSA8 SMR_VSA9 SSSR SlogP_VSA1 SlogP_VSA10 SlogP_VSA11 SlogP_VSA12 SlogP_VSA2 SlogP_VSA3 SlogP_VSA4 SlogP_VSA5 SlogP_VSA6 SlogP_VSA7 SlogP_VSA8 SlogP_VSA9 TPSA VSA_EState1 VSA_EState10 VSA_EState2 VSA_EState3 VSA_EState4 VSA_EState5 VSA_EState6 VSA_EState7 VSA_EState8 VSA_EState9 fr_Al_COO fr_Al_OH fr_Al_OH_noTert fr_ArN fr_Ar_COO fr_Ar_N fr_Ar_NH fr_Ar_OH fr_COO fr_COO2 fr_C_O fr_C_O_noCOO fr_C_S fr_HOCCN fr_Imine fr_NH0 fr_NH1 fr_NH2 fr_N_O fr_Ndealkylation1 fr_Ndealkylation2 fr_Nhpyrrole fr_SH fr_aldehyde fr_alkyl_carbamate fr_alkyl_halide fr_allylic_oxid fr_amide fr_amidine fr_aniline fr_aryl_methyl fr_azide fr_azo fr_barbitur fr_benzene fr_benzodiazepine fr_bicyclic fr_diazo fr_dihydropyridine fr_epoxide fr_ester fr_ether fr_furan fr_guanido fr_halogen fr_hdrzine fr_hdrzone fr_imidazole fr_imide fr_isocyan fr_isothiocyan fr_ketone fr_ketone_Topliss fr_lactam fr_lactone fr_methoxy fr_morpholine fr_nitrile fr_nitro fr_nitro_arom fr_nitro_arom_nonortho fr_nitroso fr_oxazole fr_oxime fr_para_hydroxylation fr_phenol fr_phenol_noOrthoHbond fr_phos_acid fr_phos_ester fr_piperdine fr_piperzine fr_priamide fr_prisulfonamd fr_pyridine fr_quatN fr_sulfide fr_sulfonamd fr_sulfone fr_term_acetylene fr_tetrazole fr_thiazole fr_thiocyan fr_thiophene fr_unbrch_alkane fr_urea qed
+3037 2.370228 503.61088 12.413849 8.821565 10.333422 8.058551 5.008353 5.764282 3.722845 4.595717 2.463985 2.934179 1.596526 1.985926 0.0 10.213055 0.0 11.499024 27.592991 0.0 12.132734 24.265468 0.0 0.0 23.20188 268.005785 0 0.764706 1.176471 1.588235 0.076923 -1.38 17 259.047 6943.4452 12.086867 4.861181 2.842672 109.048439 9.683208 0.507662 9.683208 0.118709 0.147014 0.118709 0.147014 -0.507662 3.9954 69.0396 269.127 2 2 0 0 0 2 0 2 2 2 4 0 2 0 0 0 88 10.213055 11.499024 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 23.20188 47.525105 16.466088 0.0 2 10.213055 23.20188 0.0 0.0 0.0 6.420822 0.0 57.570372 0.0 11.499024 2 0.0 0.0 11.499024 23.20188 10.213055 6.420822 0.0 11.126903 36.398202 10.045267 0.0 0.0 40.46 0.0 11.70887 0.0 20.448487 1.29642 0.294029 9.600621 0.373796 0.0 0.0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.864713
b
diff -r cf725c82c865 -r a1c53f0533b0 test-data/rdkit_descriptors_subset.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rdkit_descriptors_subset.tab Wed Feb 17 12:59:43 2021 +0000
b
@@ -0,0 +1,2 @@
+MoleculeID FormalCharge MolWt qed
+3037 0 269.127 0.864713