changeset 1:ab73abead7fa draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/silicos-it/qed commit ed9b6859de648aa5f7cde483732f5df20aaff90e
author bgruening
date Tue, 07 May 2019 13:41:23 -0400
parents 5ccd3a432785
children fc45bf8b6e01
files errors.pyc qed.py silicos_qed.xml
diffstat 3 files changed, 40 insertions(+), 21 deletions(-) [+]
line wrap: on
line diff
Binary file errors.pyc has changed
--- a/qed.py	Tue May 23 03:57:14 2017 -0400
+++ b/qed.py	Tue May 07 13:41:23 2019 -0400
@@ -330,18 +330,18 @@
     # Elucidate filetype and open supplier
     ifile = os.path.abspath(args.input)
     if not os.path.isfile(ifile):
-        print "Error: ", ifile, " is not a file or cannot be found."
+        print("Error: ", ifile, " is not a file or cannot be found.")
         sys.exit(1)
     if not os.path.exists(ifile):
-        print "Error: ", ifile, " does not exist or cannot be found."
+        print("Error: ", ifile, " does not exist or cannot be found.")
         sys.exit(1)
     if not os.access(ifile, os.R_OK):
-        print "Error: ", ifile, " is not readable."
+        print("Error: ", ifile, " is not readable.")
         sys.exit(1)
 
     if not args.iformat:
         # try to guess the filetype
-        filetype = check_filetype( ifile )
+        filetype = check_filetype(ifile)
     else:
         filetype = args.iformat # sdf or smi
 
@@ -350,7 +350,7 @@
         We want to store the original SMILES in the output. So in case of a SMILES file iterate over the file and convert each line separate.
     """
     if filetype == 'sdf':
-        supplier = Chem.SDMolSupplier( ifile )
+        supplier = Chem.SDMolSupplier(ifile)
         # Process file
         if args.header:
             args.outfile.write("MW\tALOGP\tHBA\tHBD\tPSA\tROTB\tAROM\tALERTS\tLRo5\tQED\tNAME\n")
@@ -358,7 +358,7 @@
         for mol in supplier:
             count += 1
             if mol is None:
-                print "Warning: skipping molecule ", count, " and continuing with next."
+                print("Warning: skipping molecule ", count, " and continuing with next.")
                 continue
             props = properties(mol)
 
@@ -399,7 +399,7 @@
             mol = Chem.MolFromSmiles(smiles)
             count += 1
             if mol is None:
-                print "Warning: skipping molecule ", count, " and continuing with next."
+                print("Warning: skipping molecule ", count, " and continuing with next.")
                 continue
             props = properties(mol)
 
--- a/silicos_qed.xml	Tue May 23 03:57:14 2017 -0400
+++ b/silicos_qed.xml	Tue May 07 13:41:23 2019 -0400
@@ -1,8 +1,8 @@
-<tool id="ctb_silicos_qed" name="Drug-likeness" version="0.1">
+<tool id="ctb_silicos_qed" name="Drug-likeness" version="0.2">
     <description>quantitative estimation (QED)</description>
     <!--parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism-->
     <requirements>
-        <requirement type="package" version="2015.09.2">rdkit</requirement>
+        <requirement type="package" version="2019.03.1">rdkit</requirement>
     </requirements>
     <command detect_errors="aggressive">
 <![CDATA[
@@ -15,13 +15,13 @@
 ]]>
     </command>
     <inputs>
-        <param format="smi,sdf" name="infile" type="data" label="Molecule data in SD- or SMILES-format" help="Dataset missing? See TIP below"/>
-        <param name="method" type="select" label="Method">
+        <param format="smi,sdf" name="infile" type="data" label="Molecule data in SDF or SMILES format" help="Dataset missing? See TIP below"/>
+        <param name="method" type="select" label="Method" help="Method for weighting features">
             <option value="max">Max weight (QEDw,max)</option>
             <option value="mean">Mean weight (QEDw,mo)</option>
-            <option value="unweighted">unweighted (QEDw,u)</option>
+            <option value="unweighted">Unweighted (QEDw,u)</option>
         </param>
-        <param name="header" type="boolean" label="Include the descriptor name as header" truevalue="--header" falsevalue="" checked="false" />
+        <param name="header" type="boolean" label="Include the descriptor names as header" truevalue="--header" falsevalue="" checked="false" />
     </inputs>
     <outputs>
         <data format="tabular" name="outfile" />
@@ -56,21 +56,37 @@
 
 **What this tool does**
 
-Estimates the drug-likeness of molecules and reports a score. Comes with three applicable varieties (QED\ :sub:`w,mo`\ , QED\ :sub:`w,max`\ , QED\ :sub:`w,u` ).
+Estimates the drug-likeness of molecules, based on eight commonly used molecular
+properties, and reports a score between 0 (all properties unfavourable) to 1 (all
+properties favourable). Two possible methods to weight the features are available
+(QED\ :sub:`w,mo`\ , QED\ :sub:`w,max`\), as well as an option to leave features
+unweighted (QED\ :sub:`w,u`).
+
+The eight properties used are: molecular weight (MW), octanol–water partition 
+coefficient (ALOGP), number of hydrogen bond donors (HBDs), number of hydrogen
+bond acceptors (HBAs), molecular polar surface area (PSA), number of rotatable
+bonds (ROTBs), number of aromatic rings (AROMs) and number of structural alerts
+(ALERTS).
 
 -----
 
 .. class:: warningmark
 
-**HINT**
+**Hint**
 
-- All invalid, blank and comment lines are skipped when performing computations. The number of skipped lines is displayed in the resulting history item.
+All invalid, blank and comment lines are skipped when performing computations. The
+number of skipped lines is displayed in the resulting history item. The method refers
+to a set of weights that can be applied to the features. These are derived in the
+`original paper`_ describing QED.
+
 
 - QED\ :sub:`w,max` using the set of weights that give maximal information content
 
 - QED\ :sub:`w,mo` using the mean weights of the optimal 1,000 weight combinations that give the highest information content
 
-- QED\ :sub:`w,u` with all weights as unity, hence unweighted.
+- QED\ :sub:`w,u` with all weights as unity i.e. unweighted.
+
+.. _original paper: https://www.nature.com/articles/nchem.1243
 
 -----
 
@@ -79,11 +95,11 @@
 **Input**
 
 
-| - `SD-Format`_
-| - `SMILES Format`_
+| - `SDF format`_
+| - `SMILES format`_
 
-.. _SD-Format: http://en.wikipedia.org/wiki/Chemical_table_file
-.. _SMILES Format: http://en.wikipedia.org/wiki/Simplified_molecular_input_line_entry_specification
+.. _SDF format: http://en.wikipedia.org/wiki/Chemical_table_file
+.. _SMILES format: http://en.wikipedia.org/wiki/Simplified_molecular_input_line_entry_specification
 
 -----
 
@@ -91,6 +107,9 @@
 
 **Output**
 
+A table listing the values of the eight features, the QED score, the name of the
+molecule, and the number of Lipinski rules which the molecule obeys. 
+
 +--------+-------+-----+-----+--------+------+------+--------+-------+----------------+-----+
 |   MW   | ALOGP | HBA | HBD |   PSA  | ROTB | AROM | ALERTS |  QED  |      NAME      | Ro5 |
 +========+=======+=====+=====+========+======+======+========+=======+================+=====+