changeset 0:5ccd3a432785 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/silicos-it/qed commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
author bgruening
date Tue, 23 May 2017 03:57:14 -0400
parents
children ab73abead7fa
files errors.pyc qed.py silicos_qed.xml test-data/qed_test.smi test-data/qed_test_max.tab test-data/qed_test_mean.tab test-data/qed_test_unweighted.tab
diffstat 7 files changed, 654 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
Binary file errors.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qed.py	Tue May 23 03:57:14 2017 -0400
@@ -0,0 +1,428 @@
+#!/usr/bin/env python
+__all__ = ['weights_max', 'weights_mean', 'weights_none', 'default']
+
+# RDKit
+from rdkit.Chem import Descriptors
+from rdkit import Chem
+
+# General
+from copy import deepcopy
+from math import exp, log
+import sys, os, re
+import argparse
+
+
+class SilicosItError(Exception):
+    """Base class for exceptions in Silicos-it code"""
+    pass
+
+class WrongArgument(SilicosItError):
+    """
+    Exception raised when argument to function is not of correct type.
+
+    Attributes:
+        function -- function in which error occurred
+        msg      -- explanation of the error
+    """
+    def __init__(self, function, msg):
+        self.function = function
+        self.msg = msg
+
+def check_filetype(filepath):
+    mol = False
+    possible_inchi = True
+    for line_counter, line in enumerate(open(filepath)):
+        if line_counter > 10000:
+            break
+        if line.find('$$$$') != -1:
+            return 'sdf'
+        elif line.find('@<TRIPOS>MOLECULE') != -1:
+            return 'mol2'
+        elif line.find('ligand id') != -1:
+            return 'drf'
+        elif possible_inchi and re.findall('^InChI=', line):
+            return 'inchi'
+        elif re.findall('^M\s+END', line):
+            mol = True
+        # first line is not an InChI, so it can't be an InChI file
+        possible_inchi = False
+
+    if mol:
+        # END can occures before $$$$, so and SDF file will 
+        # be recognised as mol, if you not using this hack'
+        return 'mol'
+    return 'smi'
+
+AliphaticRings = Chem.MolFromSmarts('[$([A;R][!a])]')
+
+AcceptorSmarts = [
+    '[oH0;X2]',
+    '[OH1;X2;v2]',
+    '[OH0;X2;v2]',
+    '[OH0;X1;v2]',
+    '[O-;X1]',
+    '[SH0;X2;v2]',
+    '[SH0;X1;v2]',
+    '[S-;X1]',
+    '[nH0;X2]',
+    '[NH0;X1;v3]',
+    '[$([N;+0;X3;v3]);!$(N[C,S]=O)]'
+    ]
+Acceptors = []
+for hba in AcceptorSmarts:
+    Acceptors.append(Chem.MolFromSmarts(hba))
+
+StructuralAlertSmarts = [
+    '*1[O,S,N]*1',
+    '[S,C](=[O,S])[F,Br,Cl,I]',
+    '[CX4][Cl,Br,I]',
+    '[C,c]S(=O)(=O)O[C,c]',
+    '[$([CH]),$(CC)]#CC(=O)[C,c]',
+    '[$([CH]),$(CC)]#CC(=O)O[C,c]',
+    'n[OH]',
+    '[$([CH]),$(CC)]#CS(=O)(=O)[C,c]',
+    'C=C(C=O)C=O',
+    'n1c([F,Cl,Br,I])cccc1',
+    '[CH1](=O)',
+    '[O,o][O,o]',
+    '[C;!R]=[N;!R]',
+    '[N!R]=[N!R]',
+    '[#6](=O)[#6](=O)',
+    '[S,s][S,s]',
+    '[N,n][NH2]',
+    'C(=O)N[NH2]',
+    '[C,c]=S',
+    '[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]=[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]',
+    'C1(=[O,N])C=CC(=[O,N])C=C1',
+    'C1(=[O,N])C(=[O,N])C=CC=C1',
+    'a21aa3a(aa1aaaa2)aaaa3',
+    'a31a(a2a(aa1)aaaa2)aaaa3',
+    'a1aa2a3a(a1)A=AA=A3=AA=A2',
+    'c1cc([NH2])ccc1',
+    '[Hg,Fe,As,Sb,Zn,Se,se,Te,B,Si,Na,Ca,Ge,Ag,Mg,K,Ba,Sr,Be,Ti,Mo,Mn,Ru,Pd,Ni,Cu,Au,Cd,Al,Ga,Sn,Rh,Tl,Bi,Nb,Li,Pb,Hf,Ho]',
+    'I',
+    'OS(=O)(=O)[O-]',
+    '[N+](=O)[O-]',
+    'C(=O)N[OH]',
+    'C1NC(=O)NC(=O)1',
+    '[SH]',
+    '[S-]',
+    'c1ccc([Cl,Br,I,F])c([Cl,Br,I,F])c1[Cl,Br,I,F]',
+    'c1cc([Cl,Br,I,F])cc([Cl,Br,I,F])c1[Cl,Br,I,F]',
+    '[CR1]1[CR1][CR1][CR1][CR1][CR1][CR1]1',
+    '[CR1]1[CR1][CR1]cc[CR1][CR1]1',
+    '[CR2]1[CR2][CR2][CR2][CR2][CR2][CR2][CR2]1',
+    '[CR2]1[CR2][CR2]cc[CR2][CR2][CR2]1',
+    '[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1',
+    '[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1',
+    'C#C',
+    '[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]',
+    '[$([N+R]),$([n+R]),$([N+]=C)][O-]',
+    '[C,c]=N[OH]',
+    '[C,c]=NOC=O',
+    '[C,c](=O)[CX4,CR0X3,O][C,c](=O)',
+    'c1ccc2c(c1)ccc(=O)o2',
+    '[O+,o+,S+,s+]',
+    'N=C=O',
+    '[NX3,NX4][F,Cl,Br,I]',
+    'c1ccccc1OC(=O)[#6]',
+    '[CR0]=[CR0][CR0]=[CR0]',
+    '[C+,c+,C-,c-]',
+    'N=[N+]=[N-]',
+    'C12C(NC(N1)=O)CSC2',
+    'c1c([OH])c([OH,NH2,NH])ccc1',
+    'P',
+    '[N,O,S]C#N',
+    'C=C=O',
+    '[Si][F,Cl,Br,I]',
+    '[SX2]O',
+    '[SiR0,CR0](c1ccccc1)(c2ccccc2)(c3ccccc3)',
+    'O1CCCCC1OC2CCC3CCCCC3C2',
+    'N=[CR0][N,n,O,S]',
+    '[cR2]1[cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2][cR2]1[cR2]2[cR2][cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2]2',
+    'C=[C!r]C#N',
+    '[cR2]1[cR2]c([N+0X3R0,nX3R0])c([N+0X3R0,nX3R0])[cR2][cR2]1',
+    '[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2]c([N+0X3R0,nX3R0])[cR2]1',
+    '[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2][cR2]c1([N+0X3R0,nX3R0])',
+    '[OH]c1ccc([OH,NH2,NH])cc1',
+    'c1ccccc1OC(=O)O',
+    '[SX2H0][N]',
+    'c12ccccc1(SC(S)=N2)',
+    'c12ccccc1(SC(=S)N2)',
+    'c1nnnn1C=O',
+    's1c(S)nnc1NC=O',
+    'S1C=CSC1=S',
+    'C(=O)Onnn',
+    'OS(=O)(=O)C(F)(F)F',
+    'N#CC[OH]',
+    'N#CC(=O)',
+    'S(=O)(=O)C#N',
+    'N[CH2]C#N',
+    'C1(=O)NCC1',
+    'S(=O)(=O)[O-,OH]',
+    'NC[F,Cl,Br,I]',
+    'C=[C!r]O',
+    '[NX2+0]=[O+0]',
+    '[OR0,NR0][OR0,NR0]',
+    'C(=O)O[C,H1].C(=O)O[C,H1].C(=O)O[C,H1]',
+    '[CX2R0][NX3R0]',
+    'c1ccccc1[C;!R]=[C;!R]c2ccccc2',
+    '[NX3R0,NX4R0,OR0,SX2R0][CX4][NX3R0,NX4R0,OR0,SX2R0]',
+    '[s,S,c,C,n,N,o,O]~[n+,N+](~[s,S,c,C,n,N,o,O])(~[s,S,c,C,n,N,o,O])~[s,S,c,C,n,N,o,O]',
+    '[s,S,c,C,n,N,o,O]~[nX3+,NX3+](~[s,S,c,C,n,N])~[s,S,c,C,n,N]',
+    '[*]=[N+]=[*]',
+    '[SX3](=O)[O-,OH]',
+    'N#N',
+    'F.F.F.F',
+    '[R0;D2][R0;D2][R0;D2][R0;D2]',
+    '[cR,CR]~C(=O)NC(=O)~[cR,CR]',
+    'C=!@CC=[O,S]',
+    '[#6,#8,#16][C,c](=O)O[C,c]',
+    'c[C;R0](=[O,S])[C,c]',
+    'c[SX2][C;!R]',
+    'C=C=C',
+    'c1nc([F,Cl,Br,I,S])ncc1',
+    'c1ncnc([F,Cl,Br,I,S])c1',
+    'c1nc(c2c(n1)nc(n2)[F,Cl,Br,I])',
+    '[C,c]S(=O)(=O)c1ccc(cc1)F',
+    '[15N]',
+    '[13C]',
+    '[18O]',
+    '[34S]'
+    ]
+
+StructuralAlerts = []
+for smarts in StructuralAlertSmarts:
+    StructuralAlerts.append(Chem.MolFromSmarts(smarts))
+
+
+# ADS parameters for the 8 molecular properties: [row][column]
+#     rows[8]:     MW, ALOGP, HBA, HBD, PSA, ROTB, AROM, ALERTS
+#     columns[7]: A, B, C, D, E, F, DMAX
+# ALOGP parameters from Gregory Gerebtzoff (2012, Roche)
+pads1 = [    [2.817065973, 392.5754953, 290.7489764, 2.419764353, 49.22325677, 65.37051707, 104.9805561],
+            [0.486849448, 186.2293718, 2.066177165, 3.902720615, 1.027025453, 0.913012565, 145.4314800],
+            [2.948620388, 160.4605972, 3.615294657, 4.435986202, 0.290141953, 1.300669958, 148.7763046],    
+            [1.618662227, 1010.051101, 0.985094388, 0.000000001, 0.713820843, 0.920922555, 258.1632616],
+            [1.876861559, 125.2232657, 62.90773554, 87.83366614, 12.01999824, 28.51324732, 104.5686167],
+            [0.010000000, 272.4121427, 2.558379970, 1.565547684, 1.271567166, 2.758063707, 105.4420403],
+            [3.217788970, 957.7374108, 2.274627939, 0.000000001, 1.317690384, 0.375760881, 312.3372610],
+            [0.010000000, 1199.094025, -0.09002883, 0.000000001, 0.185904477, 0.875193782, 417.7253140]        ]
+# ALOGP parameters from the original publication
+pads2 = [    [2.817065973, 392.5754953, 290.7489764, 2.419764353, 49.22325677, 65.37051707, 104.9805561],
+            [3.172690585, 137.8624751, 2.534937431, 4.581497897, 0.822739154, 0.576295591, 131.3186604],
+            [2.948620388, 160.4605972, 3.615294657, 4.435986202, 0.290141953, 1.300669958, 148.7763046],    
+            [1.618662227, 1010.051101, 0.985094388, 0.000000001, 0.713820843, 0.920922555, 258.1632616],
+            [1.876861559, 125.2232657, 62.90773554, 87.83366614, 12.01999824, 28.51324732, 104.5686167],
+            [0.010000000, 272.4121427, 2.558379970, 1.565547684, 1.271567166, 2.758063707, 105.4420403],
+            [3.217788970, 957.7374108, 2.274627939, 0.000000001, 1.317690384, 0.375760881, 312.3372610],
+            [0.010000000, 1199.094025, -0.09002883, 0.000000001, 0.185904477, 0.875193782, 417.7253140]        ]
+
+def ads(x, a, b, c, d, e, f, dmax):
+    return ((a+(b/(1+exp(-1*(x-c+d/2)/e))*(1-1/(1+exp(-1*(x-c-d/2)/f))))) / dmax)
+
+def properties(mol):
+    """
+    Calculates the properties that are required to calculate the QED descriptor.
+    """
+    matches = []
+    if mol is None:
+        raise WrongArgument("properties(mol)", "mol argument is \'None\'")
+    x = [0] * 9
+    x[0] = Descriptors.MolWt(mol)                                                # MW 
+    x[1] = Descriptors.MolLogP(mol)                                                # ALOGP
+    for hba in Acceptors:                                                        # HBA
+        if mol.HasSubstructMatch(hba):
+            matches = mol.GetSubstructMatches(hba)
+            x[2] += len(matches)
+    x[3] = Descriptors.NumHDonors(mol)                                            # HBD
+    x[4] = Descriptors.TPSA(mol)                                                # PSA
+    x[5] = Descriptors.NumRotatableBonds(mol)                                    # ROTB
+    x[6] = Chem.GetSSSR(Chem.DeleteSubstructs(deepcopy(mol), AliphaticRings))    # AROM
+    for alert in StructuralAlerts:                                                # ALERTS
+        if (mol.HasSubstructMatch(alert)): x[7] += 1
+    ro5_failed = 0
+    if x[3] > 5:
+        ro5_failed += 1 #HBD
+    if x[2] > 10:
+        ro5_failed += 1 #HBA
+    if x[0] >= 500:
+        ro5_failed += 1
+    if x[1] > 5:
+        ro5_failed += 1
+    x[8] = ro5_failed
+    return x
+
+
+def qed(w, p, gerebtzoff):
+    d = [0.00] * 8
+    if gerebtzoff:
+        for i in range(0, 8):
+            d[i] = ads(p[i], pads1[i][0], pads1[i][1], pads1[i][2], pads1[i][3], pads1[i][4], pads1[i][5], pads1[i][6])
+    else:
+        for i in range(0, 8):
+            d[i] = ads(p[i], pads2[i][0], pads2[i][1], pads2[i][2], pads2[i][3], pads2[i][4], pads2[i][5], pads2[i][6])
+    t = 0.0
+    for i in range(0, 8):
+        t += w[i] * log(d[i])
+    return (exp(t / sum(w)))
+
+
+def weights_max(mol, gerebtzoff = True, props = False):
+    """
+    Calculates the QED descriptor using maximal descriptor weights.
+    If props is specified we skip the calculation step and use the props-list of properties.
+    """
+    if not props:
+        props = properties(mol)
+    return qed([0.50, 0.25, 0.00, 0.50, 0.00, 0.50, 0.25, 1.00], props, gerebtzoff)
+
+
+def weights_mean(mol, gerebtzoff = True, props = False):
+    """
+    Calculates the QED descriptor using average descriptor weights.
+    If props is specified we skip the calculation step and use the props-list of properties.
+    """
+    if not props:
+        props = properties(mol)
+    return qed([0.66, 0.46, 0.05, 0.61, 0.06, 0.65, 0.48, 0.95], props, gerebtzoff)
+
+
+def weights_none(mol, gerebtzoff = True, props = False):
+    """
+    Calculates the QED descriptor using unit weights.
+    If props is specified we skip the calculation step and use the props-list of properties.
+    """
+    if not props:
+        props = properties(mol)
+    return qed([1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00], props, gerebtzoff)
+
+
+def default(mol, gerebtzoff = True):
+    """
+    Calculates the QED descriptor using average descriptor weights and Gregory Gerebtzoff parameters.
+    """
+    return weights_mean(mol, gerebtzoff)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-i', '--input', 
+                required=True, 
+                help='path to the input file name')
+    parser.add_argument("-m", "--method", 
+                dest="method",
+                choices=['max', 'mean', 'unweighted'],
+                default="mean",
+                help="Specify the method you want to use.")
+    parser.add_argument("--iformat",
+                help="Input format. It must be supported by openbabel.")
+    parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'), 
+                default=sys.stdout, 
+                help="path to the result file, default it sdtout")
+    parser.add_argument("--header", dest="header", action="store_true",
+                default=False,
+                help="Write header line.")
+
+
+    args = parser.parse_args()
+
+    # Elucidate filetype and open supplier
+    ifile = os.path.abspath(args.input)
+    if not os.path.isfile(ifile):
+        print "Error: ", ifile, " is not a file or cannot be found."
+        sys.exit(1)
+    if not os.path.exists(ifile):
+        print "Error: ", ifile, " does not exist or cannot be found."
+        sys.exit(1)
+    if not os.access(ifile, os.R_OK):
+        print "Error: ", ifile, " is not readable."
+        sys.exit(1)
+
+    if not args.iformat:
+        # try to guess the filetype
+        filetype = check_filetype( ifile )
+    else:
+        filetype = args.iformat # sdf or smi
+
+
+    """
+        We want to store the original SMILES in the output. So in case of a SMILES file iterate over the file and convert each line separate.
+    """
+    if filetype == 'sdf':
+        supplier = Chem.SDMolSupplier( ifile )
+        # Process file
+        if args.header:
+            args.outfile.write("MW\tALOGP\tHBA\tHBD\tPSA\tROTB\tAROM\tALERTS\tLRo5\tQED\tNAME\n")
+        count = 0
+        for mol in supplier:
+            count += 1
+            if mol is None:
+                print "Warning: skipping molecule ", count, " and continuing with next."
+                continue
+            props = properties(mol)
+
+            if args.method == 'max':
+                calc_qed = weights_max(mol, True, props)
+            elif args.method == 'unweighted':
+                calc_qed = weights_none(mol, True, props)
+            else:
+                calc_qed = weights_mean(mol, True, props)
+
+            args.outfile.write( "%.2f\t%.3f\t%d\t%d\t%.2f\t%d\t%d\t%d\t%s\t%.3f\t%-s\n" % (
+                props[0], 
+                props[1], 
+                props[2], 
+                props[3], 
+                props[4], 
+                props[5], 
+                props[6], 
+                props[7],
+                props[8],
+                calc_qed,
+                mol.GetProp("_Name"),
+                ))
+    elif filetype == 'smi':
+        supplier = Chem.SmilesMolSupplier( ifile, " \t", 0, 1, False, True )
+
+        # Process file
+        if args.header:
+            args.outfile.write("MW\tALOGP\tHBA\tHBD\tPSA\tROTB\tAROM\tALERTS\tLRo5\tQED\tNAME\tSMILES\n")
+        count = 0
+        for line in open(ifile):
+            tokens = line.strip().split('\t')
+            if len(tokens) > 1:
+                smiles, title = tokens
+            else:
+                smiles = tokens[0]
+                title = ''
+            mol = Chem.MolFromSmiles(smiles)
+            count += 1
+            if mol is None:
+                print "Warning: skipping molecule ", count, " and continuing with next."
+                continue
+            props = properties(mol)
+
+            if args.method == 'max':
+                calc_qed = weights_max(mol, True, props)
+            elif args.method == 'unweighted':
+                calc_qed = weights_none(mol, True, props)
+            else:
+                calc_qed = weights_mean(mol, True, props)
+
+            args.outfile.write( "%.2f\t%.3f\t%d\t%d\t%.2f\t%d\t%d\t%d\t%s\t%.3f\t%-s\t%s\n" % (
+                props[0], 
+                props[1], 
+                props[2], 
+                props[3], 
+                props[4], 
+                props[5], 
+                props[6], 
+                props[7],
+                props[8],
+                calc_qed,
+                title,
+                smiles
+                ))
+    else:
+        sys.exit("Error: unknown file-type: %s" % filetype)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/silicos_qed.xml	Tue May 23 03:57:14 2017 -0400
@@ -0,0 +1,123 @@
+<tool id="ctb_silicos_qed" name="Drug-likeness" version="0.1">
+    <description>quantitative estimation (QED)</description>
+    <!--parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism-->
+    <requirements>
+        <requirement type="package" version="2015.09.2">rdkit</requirement>
+    </requirements>
+    <command detect_errors="aggressive">
+<![CDATA[
+    python '$__tool_directory__/qed.py'
+        -i '${infile}'
+        --method '${method}'
+        --iformat ${infile.ext}
+        -o '${outfile}'
+        $header
+]]>
+    </command>
+    <inputs>
+        <param format="smi,sdf" name="infile" type="data" label="Molecule data in SD- or SMILES-format" help="Dataset missing? See TIP below"/>
+        <param name="method" type="select" label="Method">
+            <option value="max">Max weight (QEDw,max)</option>
+            <option value="mean">Mean weight (QEDw,mo)</option>
+            <option value="unweighted">unweighted (QEDw,u)</option>
+        </param>
+        <param name="header" type="boolean" label="Include the descriptor name as header" truevalue="--header" falsevalue="" checked="false" />
+    </inputs>
+    <outputs>
+        <data format="tabular" name="outfile" />
+    </outputs>
+    <tests>
+        <!--
+        Test a tabular input with the first line being a comment without a # character to start
+        -->
+        <test>
+          <param name="infile" value="qed_test.smi" ftype="smi"/>
+          <param name="method" value="max"/>
+          <param name="header" value="True"/>
+          <output name="outfile" file="qed_test_max.tab" ftype="tabular"/>
+        </test>
+        <test>
+          <param name="infile" value="qed_test.smi" ftype="smi"/>
+          <param name="method" value="mean"/>
+          <param name="header" value="True"/>
+          <output name="outfile" file="qed_test_mean.tab" ftype="tabular"/>
+        </test>
+        <test>
+          <param name="infile" value="qed_test.smi" ftype="smi" />
+          <param name="method" value="unweighted"/>
+          <param name="header" value="True"/>
+          <output name="outfile" file="qed_test_unweighted.tab" ftype="tabular" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+
+.. class:: infomark
+
+**What this tool does**
+
+Estimates the drug-likeness of molecules and reports a score. Comes with three applicable varieties (QED\ :sub:`w,mo`\ , QED\ :sub:`w,max`\ , QED\ :sub:`w,u` ).
+
+-----
+
+.. class:: warningmark
+
+**HINT**
+
+- All invalid, blank and comment lines are skipped when performing computations. The number of skipped lines is displayed in the resulting history item.
+
+- QED\ :sub:`w,max` using the set of weights that give maximal information content
+
+- QED\ :sub:`w,mo` using the mean weights of the optimal 1,000 weight combinations that give the highest information content
+
+- QED\ :sub:`w,u` with all weights as unity, hence unweighted.
+
+-----
+
+.. class:: infomark
+
+**Input**
+
+
+| - `SD-Format`_
+| - `SMILES Format`_
+
+.. _SD-Format: http://en.wikipedia.org/wiki/Chemical_table_file
+.. _SMILES Format: http://en.wikipedia.org/wiki/Simplified_molecular_input_line_entry_specification
+
+-----
+
+.. class:: infomark
+
+**Output**
+
++--------+-------+-----+-----+--------+------+------+--------+-------+----------------+-----+
+|   MW   | ALOGP | HBA | HBD |   PSA  | ROTB | AROM | ALERTS |  QED  |      NAME      | Ro5 |
++========+=======+=====+=====+========+======+======+========+=======+================+=====+
+| 286.34 | 1.092 |  6  |  3  | 101.88 |   4  |   2  |    1   | 0.737 | Abacavir       |  0  |
++--------+-------+-----+-----+--------+------+------+--------+-------+----------------+-----+
+| 181.21 | 0.481 |  4  |  2  |  83.47 |   5  |   0  |    2   | 0.487 | Acamprosate    |  0  |
++--------+-------+-----+-----+--------+------+------+--------+-------+----------------+-----+
+| 336.43 | 2.365 |  5  |  3  |  87.66 |  11  |   1  |    1   | 0.540 | Acebutolol     |  0  |
++--------+-------+-----+-----+--------+------+------+--------+-------+----------------+-----+
+| 151.16 | 1.351 |  2  |  2  |  49.33 |   2  |   1  |    1   | 0.633 | Acetaminophen  |  0  |
++--------+-------+-----+-----+--------+------+------+--------+-------+----------------+-----+
+| 222.25 | 0.225 |  5  |  2  | 115.04 |   3  |   1  |    1   | 0.727 | Acetazolamide  |  0  |
++--------+-------+-----+-----+--------+------+------+--------+-------+----------------+-----+
+| 324.40 | 3.291 |  4  |  2  |  92.34 |   6  |   1  |    1   | 0.772 | Acetohexamide  |  0  |
++--------+-------+-----+-----+--------+------+------+--------+-------+----------------+-----+
+| 411.57 | 3.492 |  6  |  1  |  47.02 |   7  |   2  |    1   | 0.688 | Acetophenazine |  0  |
++--------+-------+-----+-----+--------+------+------+--------+-------+----------------+-----+
+| 329.37 | 3.327 |  4  |  1  |  39.72 |   4  |   2  |    0   | 0.917 | Paroxetine     |  0  |
++--------+-------+-----+-----+--------+------+------+--------+-------+----------------+-----+
+| 270.21 | 3.146 |  3  |  1  |  55.13 |   4  |   2  |    0   | 0.915 | Leflunomide    |  0  |
++--------+-------+-----+-----+--------+------+------+--------+-------+----------------+-----+
+
+
+
+]]>
+    </help>
+    <citations>
+        <citation type="doi">10.1038/nchem.1243</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/qed_test.smi	Tue May 23 03:57:14 2017 -0400
@@ -0,0 +1,25 @@
+Nc1nc(NC2CC2)c2ncn([C@@H]3C[C@H](CO)C=C3)c2n1	Abacavir
+CC(=O)NCCCS(O)(=O)=O	Acamprosate
+CCCC(=O)Nc1ccc(OCC(O)CNC(C)C)c(c1)C(C)=O	Acebutolol
+CC(=O)Nc1ccc(O)cc1	Acetaminophen
+CC(=O)Nc1nnc(s1)S(N)(=O)=O	Acetazolamide
+CC(=O)c1ccc(cc1)S(=O)(=O)NC(=O)NC1CCCCC1	Acetohexamide
+CC(=O)c1ccc2Sc3ccccc3N(CCCN3CCN(CCO)CC3)c2c1	Acetophenazine
+Fc4ccc(C1CCNCC1COc3ccc2OCOc2c3)cc4	Paroxetine
+Cc1oncc1C(=O)Nc2ccc(C(F)(F)F)cc2	Leflunomide
+CN1C4CCCC1CC(NC(=O)c2nn(C)c3ccccc23)C4	Granisetron
+CCCN2CC(CSC)CC1c3cccc4[nH]cc(CC12)c34	Pergolide
+CCc3c(C)[nH]c2CCC(CN1CCOCC1)C(=O)c23	Molindone
+CCCCCCCCCCCCCCCC(=O)OCC(NC(=O)C(Cl)Cl)C(O)c1ccc([N+]([O-])=O)cc1	ChloramphenicalPalmitate
+CCCCCCCCCCCCCCCOC(=O)C2C(O)C(O)C(C(NC(=O)C1CC(CCC)CN1C)C(C)Cl)OC2SC	ClindamycinPalmitate
+CCOc3nc2cccc(C(=O)OC(C)OC(=O)OC1CCCCC1)c2n3Cc6ccc(c4ccccc4c5nn[nH]n5)cc6	CandesartanCilexetil
+CN(C)CCC=c2c1ccccc1sc3ccc(Cl)cc23	Chlorprothixene
+O=c3c(O)c(C2CCC(c1ccc(Cl)cc1)CC2)c(=O)c4ccccc34	Atovaquone
+CN(C)CCCN3c1ccccc1CCc2ccc(Cl)cc23	Clomipramine
+CN4CCCC(CC3c1ccccc1Sc2ccccc23)C4	Methixene
+CCN(CC)C(C)Cn3c1ccccc1sc2ccccc23	Ethopropazine
+N=C(CCSCc1csc(N=C(N)N)n1)NS(N)(=O)=O	Famotidine
+CNC(=NCCSCc1nc[nH]c1C)NC#N	Cimetidine
+CCCCCNC(=N)NN=Cc1c[nH]c2ccc(CO)cc12	Tegaserod
+C=CC3=C(C(=O)O)N2C(=O)C(NC(=O)C(=NO)c1csc(N)n1)C2SC3	Cefdinir
+CC5(C)SC4C(NC(=O)C(C(=O)Oc2ccc1CCCc1c2)c3ccccc3)C(=O)N4C5C(=O)O	CarbenicillinIndanyl
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/qed_test_max.tab	Tue May 23 03:57:14 2017 -0400
@@ -0,0 +1,26 @@
+MW	ALOGP	HBA	HBD	PSA	ROTB	AROM	ALERTS	LRo5	QED	NAME	SMILES
+286.34	1.092	6	3	101.88	4	2	1	0	0.715	Abacavir	Nc1nc(NC2CC2)c2ncn([C@@H]3C[C@H](CO)C=C3)c2n1
+181.21	-0.600	4	2	83.47	4	0	2	0	0.436	Acamprosate	CC(=O)NCCCS(O)(=O)=O
+336.43	2.365	5	3	87.66	10	1	1	0	0.550	Acebutolol	CCCC(=O)Nc1ccc(OCC(O)CNC(C)C)c(c1)C(C)=O
+151.16	1.351	2	2	49.33	1	1	1	0	0.583	Acetaminophen	CC(=O)Nc1ccc(O)cc1
+222.25	-0.856	5	2	115.04	2	1	1	0	0.671	Acetazolamide	CC(=O)Nc1nnc(s1)S(N)(=O)=O
+324.40	2.210	4	2	92.34	4	1	1	0	0.808	Acetohexamide	CC(=O)c1ccc(cc1)S(=O)(=O)NC(=O)NC1CCCCC1
+411.57	3.492	6	1	47.02	7	2	1	0	0.674	Acetophenazine	CC(=O)c1ccc2Sc3ccccc3N(CCCN3CCN(CCO)CC3)c2c1
+329.37	3.327	4	1	39.72	4	2	0	0	0.913	Paroxetine	Fc4ccc(C1CCNCC1COc3ccc2OCOc2c3)cc4
+270.21	3.254	3	1	55.13	2	2	0	0	0.889	Leflunomide	Cc1oncc1C(=O)Nc2ccc(C(F)(F)F)cc2
+312.42	2.318	3	1	50.16	2	2	0	0	0.915	Granisetron	CN1C4CCCC1CC(NC(=O)c2nn(C)c3ccccc23)C4
+314.50	4.271	2	1	19.03	4	2	0	0	0.886	Pergolide	CCCN2CC(CSC)CC1c3cccc4[nH]cc(CC12)c34
+276.38	1.963	3	1	45.33	3	1	0	0	0.916	Molindone	CCc3c(C)[nH]c2CCC(CN1CCOCC1)C(=O)c23
+561.55	6.941	6	2	118.77	21	1	5	2	0.041	ChloramphenicalPalmitate	CCCCCCCCCCCCCCCC(=O)OCC(NC(=O)C(Cl)Cl)C(O)c1ccc([N+]([O-])=O)cc1
+663.41	6.279	8	3	108.33	22	0	3	2	0.064	ClindamycinPalmitate	CCCCCCCCCCCCCCCOC(=O)C2C(O)C(O)C(C(NC(=O)C1CC(CCC)CN1C)C(C)Cl)OC2SC
+610.67	6.319	10	1	143.34	10	5	2	2	0.169	CandesartanCilexetil	CCOc3nc2cccc(C(=O)OC(C)OC(=O)OC1CCCCC1)c2n3Cc6ccc(c4ccccc4c5nn[nH]n5)cc6
+315.87	5.188	2	0	3.24	3	3	0	1	0.693	Chlorprothixene	CN(C)CCC=c2c1ccccc1sc3ccc(Cl)cc23
+366.84	5.505	3	1	54.37	2	2	0	1	0.771	Atovaquone	O=c3c(O)c(C2CCC(c1ccc(Cl)cc1)CC2)c(=O)c4ccccc34
+314.86	4.528	2	0	6.48	4	2	0	0	0.802	Clomipramine	CN(C)CCCN3c1ccccc1CCc2ccc(Cl)cc23
+309.48	5.015	2	0	3.24	2	2	0	1	0.765	Methixene	CN4CCCC(CC3c1ccccc1Sc2ccccc23)C4
+312.48	5.020	3	0	6.48	5	2	0	1	0.761	Ethopropazine	CCN(CC)C(C)Cn3c1ccccc1sc2ccccc23
+337.46	-0.558	6	5	173.33	7	1	3	0	0.219	Famotidine	N=C(CCSCc1csc(N=C(N)N)n1)NS(N)(=O)=O
+252.35	0.597	5	3	88.89	5	1	5	0	0.157	Cimetidine	CNC(=NCCSCc1nc[nH]c1C)NC#N
+301.39	2.298	3	5	96.29	7	2	4	0	0.165	Tegaserod	CCCCCNC(=N)NN=Cc1c[nH]c2ccc(CO)cc12
+395.42	-0.172	8	4	158.21	5	1	4	0	0.181	Cefdinir	C=CC3=C(C(=O)O)N2C(=O)C(NC(=O)C(=NO)c1csc(N)n1)C2SC3
+494.57	2.496	7	2	113.01	6	2	4	0	0.202	CarbenicillinIndanyl	CC5(C)SC4C(NC(=O)C(C(=O)Oc2ccc1CCCc1c2)c3ccccc3)C(=O)N4C5C(=O)O
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/qed_test_mean.tab	Tue May 23 03:57:14 2017 -0400
@@ -0,0 +1,26 @@
+MW	ALOGP	HBA	HBD	PSA	ROTB	AROM	ALERTS	LRo5	QED	NAME	SMILES
+286.34	1.092	6	3	101.88	4	2	1	0	0.737	Abacavir	Nc1nc(NC2CC2)c2ncn([C@@H]3C[C@H](CO)C=C3)c2n1
+181.21	-0.600	4	2	83.47	4	0	2	0	0.467	Acamprosate	CC(=O)NCCCS(O)(=O)=O
+336.43	2.365	5	3	87.66	10	1	1	0	0.571	Acebutolol	CCCC(=O)Nc1ccc(OCC(O)CNC(C)C)c(c1)C(C)=O
+151.16	1.351	2	2	49.33	1	1	1	0	0.602	Acetaminophen	CC(=O)Nc1ccc(O)cc1
+222.25	-0.856	5	2	115.04	2	1	1	0	0.662	Acetazolamide	CC(=O)Nc1nnc(s1)S(N)(=O)=O
+324.40	2.210	4	2	92.34	4	1	1	0	0.833	Acetohexamide	CC(=O)c1ccc(cc1)S(=O)(=O)NC(=O)NC1CCCCC1
+411.57	3.492	6	1	47.02	7	2	1	0	0.688	Acetophenazine	CC(=O)c1ccc2Sc3ccccc3N(CCCN3CCN(CCO)CC3)c2c1
+329.37	3.327	4	1	39.72	4	2	0	0	0.917	Paroxetine	Fc4ccc(C1CCNCC1COc3ccc2OCOc2c3)cc4
+270.21	3.254	3	1	55.13	2	2	0	0	0.896	Leflunomide	Cc1oncc1C(=O)Nc2ccc(C(F)(F)F)cc2
+312.42	2.318	3	1	50.16	2	2	0	0	0.927	Granisetron	CN1C4CCCC1CC(NC(=O)c2nn(C)c3ccccc23)C4
+314.50	4.271	2	1	19.03	4	2	0	0	0.871	Pergolide	CCCN2CC(CSC)CC1c3cccc4[nH]cc(CC12)c34
+276.38	1.963	3	1	45.33	3	1	0	0	0.923	Molindone	CCc3c(C)[nH]c2CCC(CN1CCOCC1)C(=O)c23
+561.55	6.941	6	2	118.77	21	1	5	2	0.056	ChloramphenicalPalmitate	CCCCCCCCCCCCCCCC(=O)OCC(NC(=O)C(Cl)Cl)C(O)c1ccc([N+]([O-])=O)cc1
+663.41	6.279	8	3	108.33	22	0	3	2	0.071	ClindamycinPalmitate	CCCCCCCCCCCCCCCOC(=O)C2C(O)C(O)C(C(NC(=O)C1CC(CCC)CN1C)C(C)Cl)OC2SC
+610.67	6.319	10	1	143.34	10	5	2	2	0.141	CandesartanCilexetil	CCOc3nc2cccc(C(=O)OC(C)OC(=O)OC1CCCCC1)c2n3Cc6ccc(c4ccccc4c5nn[nH]n5)cc6
+315.87	5.188	2	0	3.24	3	3	0	1	0.629	Chlorprothixene	CN(C)CCC=c2c1ccccc1sc3ccc(Cl)cc23
+366.84	5.505	3	1	54.37	2	2	0	1	0.741	Atovaquone	O=c3c(O)c(C2CCC(c1ccc(Cl)cc1)CC2)c(=O)c4ccccc34
+314.86	4.528	2	0	6.48	4	2	0	0	0.782	Clomipramine	CN(C)CCCN3c1ccccc1CCc2ccc(Cl)cc23
+309.48	5.015	2	0	3.24	2	2	0	1	0.735	Methixene	CN4CCCC(CC3c1ccccc1Sc2ccccc23)C4
+312.48	5.020	3	0	6.48	5	2	0	1	0.734	Ethopropazine	CCN(CC)C(C)Cn3c1ccccc1sc2ccccc23
+337.46	-0.558	6	5	173.33	7	1	3	0	0.263	Famotidine	N=C(CCSCc1csc(N=C(N)N)n1)NS(N)(=O)=O
+252.35	0.597	5	3	88.89	5	1	5	0	0.239	Cimetidine	CNC(=NCCSCc1nc[nH]c1C)NC#N
+301.39	2.298	3	5	96.29	7	2	4	0	0.235	Tegaserod	CCCCCNC(=N)NN=Cc1c[nH]c2ccc(CO)cc12
+395.42	-0.172	8	4	158.21	5	1	4	0	0.239	Cefdinir	C=CC3=C(C(=O)O)N2C(=O)C(NC(=O)C(=NO)c1csc(N)n1)C2SC3
+494.57	2.496	7	2	113.01	6	2	4	0	0.274	CarbenicillinIndanyl	CC5(C)SC4C(NC(=O)C(C(=O)Oc2ccc1CCCc1c2)c3ccccc3)C(=O)N4C5C(=O)O
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/qed_test_unweighted.tab	Tue May 23 03:57:14 2017 -0400
@@ -0,0 +1,26 @@
+MW	ALOGP	HBA	HBD	PSA	ROTB	AROM	ALERTS	LRo5	QED	NAME	SMILES
+286.34	1.092	6	3	101.88	4	2	1	0	0.713	Abacavir	Nc1nc(NC2CC2)c2ncn([C@@H]3C[C@H](CO)C=C3)c2n1
+181.21	-0.600	4	2	83.47	4	0	2	0	0.559	Acamprosate	CC(=O)NCCCS(O)(=O)=O
+336.43	2.365	5	3	87.66	10	1	1	0	0.624	Acebutolol	CCCC(=O)Nc1ccc(OCC(O)CNC(C)C)c(c1)C(C)=O
+151.16	1.351	2	2	49.33	1	1	1	0	0.688	Acetaminophen	CC(=O)Nc1ccc(O)cc1
+222.25	-0.856	5	2	115.04	2	1	1	0	0.645	Acetazolamide	CC(=O)Nc1nnc(s1)S(N)(=O)=O
+324.40	2.210	4	2	92.34	4	1	1	0	0.848	Acetohexamide	CC(=O)c1ccc(cc1)S(=O)(=O)NC(=O)NC1CCCCC1
+411.57	3.492	6	1	47.02	7	2	1	0	0.711	Acetophenazine	CC(=O)c1ccc2Sc3ccccc3N(CCCN3CCN(CCO)CC3)c2c1
+329.37	3.327	4	1	39.72	4	2	0	0	0.919	Paroxetine	Fc4ccc(C1CCNCC1COc3ccc2OCOc2c3)cc4
+270.21	3.254	3	1	55.13	2	2	0	0	0.921	Leflunomide	Cc1oncc1C(=O)Nc2ccc(C(F)(F)F)cc2
+312.42	2.318	3	1	50.16	2	2	0	0	0.950	Granisetron	CN1C4CCCC1CC(NC(=O)c2nn(C)c3ccccc23)C4
+314.50	4.271	2	1	19.03	4	2	0	0	0.830	Pergolide	CCCN2CC(CSC)CC1c3cccc4[nH]cc(CC12)c34
+276.38	1.963	3	1	45.33	3	1	0	0	0.941	Molindone	CCc3c(C)[nH]c2CCC(CN1CCOCC1)C(=O)c23
+561.55	6.941	6	2	118.77	21	1	5	2	0.114	ChloramphenicalPalmitate	CCCCCCCCCCCCCCCC(=O)OCC(NC(=O)C(Cl)Cl)C(O)c1ccc([N+]([O-])=O)cc1
+663.41	6.279	8	3	108.33	22	0	3	2	0.110	ClindamycinPalmitate	CCCCCCCCCCCCCCCOC(=O)C2C(O)C(O)C(C(NC(=O)C1CC(CCC)CN1C)C(C)Cl)OC2SC
+610.67	6.319	10	1	143.34	10	5	2	2	0.124	CandesartanCilexetil	CCOc3nc2cccc(C(=O)OC(C)OC(=O)OC1CCCCC1)c2n3Cc6ccc(c4ccccc4c5nn[nH]n5)cc6
+315.87	5.188	2	0	3.24	3	3	0	1	0.553	Chlorprothixene	CN(C)CCC=c2c1ccccc1sc3ccc(Cl)cc23
+366.84	5.505	3	1	54.37	2	2	0	1	0.759	Atovaquone	O=c3c(O)c(C2CCC(c1ccc(Cl)cc1)CC2)c(=O)c4ccccc34
+314.86	4.528	2	0	6.48	4	2	0	0	0.707	Clomipramine	CN(C)CCCN3c1ccccc1CCc2ccc(Cl)cc23
+309.48	5.015	2	0	3.24	2	2	0	1	0.651	Methixene	CN4CCCC(CC3c1ccccc1Sc2ccccc23)C4
+312.48	5.020	3	0	6.48	5	2	0	1	0.669	Ethopropazine	CCN(CC)C(C)Cn3c1ccccc1sc2ccccc23
+337.46	-0.558	6	5	173.33	7	1	3	0	0.292	Famotidine	N=C(CCSCc1csc(N=C(N)N)n1)NS(N)(=O)=O
+252.35	0.597	5	3	88.89	5	1	5	0	0.411	Cimetidine	CNC(=NCCSCc1nc[nH]c1C)NC#N
+301.39	2.298	3	5	96.29	7	2	4	0	0.389	Tegaserod	CCCCCNC(=N)NN=Cc1c[nH]c2ccc(CO)cc12
+395.42	-0.172	8	4	158.21	5	1	4	0	0.275	Cefdinir	C=CC3=C(C(=O)O)N2C(=O)C(NC(=O)C(=NO)c1csc(N)n1)C2SC3
+494.57	2.496	7	2	113.01	6	2	4	0	0.382	CarbenicillinIndanyl	CC5(C)SC4C(NC(=O)C(C(=O)Oc2ccc1CCCc1c2)c3ccccc3)C(=O)N4C5C(=O)O