changeset 0:a2369e86bc48 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
author bgruening
date Mon, 30 Sep 2019 17:38:26 -0400
parents
children 0e330829de40
files sygma.xml sygma_metabolites.py test-data/i.sdf test-data/i.smi test-data/o.smi test-data/o2.smi
diffstat 6 files changed, 356 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sygma.xml	Mon Sep 30 17:38:26 2019 -0400
@@ -0,0 +1,94 @@
+<tool id="sygma" name="Generate possible metabolites with SyGMa" version="@VERSION@">
+    <macros>
+        <token name="@VERSION@">1.1.1</token>
+    </macros>
+    <description>by performing common reactions on one or more parent molecule(s)</description>
+    <requirements>
+        <requirement type="package" version="@VERSION@">sygma</requirement>
+        <requirement type="package" version="2019.03.4">rdkit</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        python '$__tool_directory__/sygma_metabolites.py' 
+            -i '$input' 
+            --iformat '$input.ext' 
+            -o '$output' 
+            --phase1 '$phase1' 
+            --phase2 '$phase2'
+    ]]></command>
+    <inputs>
+        <param type="data" name="input" format="smi,sdf" label="Parent molecule(s)" help="Upload an SDF or SMILES file."/>
+        <param type="integer" name="phase1" value="1" min="0" label="Number of reaction cycles to apply for phase 1" help="Phase 1 metabolism rules include different types of oxidation, reduction, hydrolysis and condensation reactions."/>
+        <param type="integer" name="phase2" value="1" min="0" label="Number of reaction cycles to apply for phase 2" help="Phase 2 metabolism rules include several conjugation reactions, e.g. with glucuronyl, sulfate, methyl or acetyl."/>
+    </inputs>
+    <outputs>
+        <data name="output" format="smi"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="i.smi" ftype="smi"/>
+            <param name="phase1" value="1"/>
+            <param name="phase2" value="1"/>
+            <output name="output" file="o.smi"/>
+        </test>
+        <test>
+            <param name="input" value="i.sdf" ftype="sdf"/>
+            <param name="phase1" value="2"/>
+            <param name="phase2" value="0"/>
+            <output name="output" file="o2.smi"/>
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+
+.. class:: infomark
+
+**What this tool does**
+
+SyGMa (Systematic Generation of potential Metabolites) is a tool to generate 
+possible metabolic products of an input parent structure. The tool provides 
+two rulesets to cover both phase 1 and 2 metabolism.
+
+-----
+
+.. class:: infomark
+
+**Input**
+
+A file in SMILES or SDF format. Files may contain multiple molecule 
+entries; in this case outputs are distinguished by the code included in the 
+output file (e.g. SYGMA0MOL0 vs SYGMA1MOL0).
+
+The number of reaction cycles to be performed for both phase 1 and phase 2 
+metabolism should also be specified.
+
+-----
+
+.. class:: infomark
+
+ **Output**
+
+For each molecule in the input file, a SMILES file is produced containing 
+SMILES strings of the metabolite outputs, a generated ID code, and an empirical 
+probability score (corresponding to an estimated probability that a product is 
+actually metabolically produced in humans). The first line is always the parent
+molecule itself::
+
+    Oc1ccccc1   SYGMA0MOL0    1.0
+    O=C(O)C1OC(Oc2ccccc2)C(O)C(O)C1O    SYGMA0MOL1 0.25
+    O=S(=O)(O)Oc1ccccc1 SYGMA0MOL2  0.119
+    Oc1ccc(O)cc1    SYGMA0MOL3 0.056
+    COc1ccccc1  SYGMA0MOL4   0.054
+    Oc1ccccc1O  SYGMA0MOL5   0.032
+    O=C(O)C1OC(Oc2ccc(O)cc2)C(O)C(O)C1O SYGMA0MOL6  0.014
+    O=C(O)C1OC(Oc2ccccc2O)C(O)C(O)C1O   SYGMA0MOL7    0.008
+    O=S(=O)(O)Oc1ccc(O)cc1  SYGMA0MOL8   0.00666
+    O=S(=O)(O)Oc1ccccc1O    SYGMA0MOL9 0.00381
+    COc1ccc(O)cc1   SYGMA0MOL10   0.00302
+    COc1ccccc1O SYGMA0MOL11 0.00173
+
+
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1002/cmdc.200700312</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sygma_metabolites.py	Mon Sep 30 17:38:26 2019 -0400
@@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+
+import argparse
+import csv
+import sygma
+import numpy as np
+from rdkit import Chem
+from rdkit.Chem.rdmolfiles import SDMolSupplier, SmilesMolSupplier
+
+def mol_supplier(filename, ext):
+    """
+    Based on the file extension, use the appropriate RDKit function to
+    load a chemical data file (SMILES or SDF) containing multiple molecules
+    and return a list of RDKit Mol objects
+    """
+    if ext == 'sdf':
+        return [n for n in SDMolSupplier(filename)]
+    with open(filename) as f: 
+        mols = f.read().split('\n') 
+    if ext == 'smi' or ext == 'inchi':
+        return [Chem.MolFromSmiles(mol, sanitize=True) for mol in mols if mol != '']
+
+def predict_metabolites(parent, phase1_cycles, phase2_cycles):
+    """
+    Prediction of metabolites derived from a parent molecule
+    """
+    scenario = sygma.Scenario([
+        [sygma.ruleset['phase1'], int(phase1_cycles)],
+        [sygma.ruleset['phase2'], int(phase2_cycles)]])
+    metabolic_tree = scenario.run(parent)
+    metabolic_tree.calc_scores()
+    return metabolic_tree.to_smiles()
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-i', '--infile', required=True, help='Path to the input file.')
+    parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.')
+    parser.add_argument("--iformat", help="Specify the input file format.")
+    parser.add_argument("--phase1", help="Number of phase1 cycles.")
+    parser.add_argument("--phase2", help="Number of phase2 cycles.")
+    args = parser.parse_args()
+
+    mols = mol_supplier(args.infile, args.iformat)
+    outp = np.zeros((0,3))
+    for n in range(len(mols)):
+        metabs = np.array(predict_metabolites(mols[n], args.phase1, args.phase2))
+        metabs = np.column_stack((
+            metabs[:,0],  # SMILES
+            ['SYGMA{}MOL{}'.format(n, m) for m in range(metabs.shape[0])],  # SMILES label
+            np.round(np.array(metabs[:,1], dtype=float), decimals=5)  # score rounded to 5 dp
+        ))
+        outp = np.vstack((outp, metabs))
+    np.savetxt(args.outfile, outp, fmt="%s")
+
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/i.sdf	Mon Sep 30 17:38:26 2019 -0400
@@ -0,0 +1,154 @@
+2244
+ OpenBabel07101213142D
+
+ 21 21  0  0  0  0  0  0  0  0999 V2000
+    3.7320   -0.0600    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    6.3301    1.4400    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    4.5981    1.4400    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    2.8660   -1.5600    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    4.5981   -0.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    5.4641   -0.0600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    4.5981   -1.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    6.3301   -0.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    5.4641   -2.0600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    6.3301   -1.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    5.4641    0.9400    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.8660   -0.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.0000   -0.0600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    4.0611   -1.8700    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    6.8671   -0.2500    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    5.4641   -2.6800    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    6.8671   -1.8700    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    2.3100    0.4769    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    1.4631    0.2500    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    1.6900   -0.5969    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    6.3301    2.0600    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+  1  5  1  0  0  0  0
+  1 12  1  0  0  0  0
+  2 11  1  0  0  0  0
+  2 21  1  0  0  0  0
+  3 11  2  0  0  0  0
+  4 12  2  0  0  0  0
+  5  6  1  0  0  0  0
+  5  7  2  0  0  0  0
+  6  8  2  0  0  0  0
+  6 11  1  0  0  0  0
+  7  9  1  0  0  0  0
+  7 14  1  0  0  0  0
+  8 10  1  0  0  0  0
+  8 15  1  0  0  0  0
+  9 10  2  0  0  0  0
+  9 16  1  0  0  0  0
+ 10 17  1  0  0  0  0
+ 12 13  1  0  0  0  0
+ 13 18  1  0  0  0  0
+ 13 19  1  0  0  0  0
+ 13 20  1  0  0  0  0
+M  END
+>  <PUBCHEM_COMPOUND_CID>
+2244
+
+>  <PUBCHEM_COMPOUND_CANONICALIZED>
+1
+
+>  <PUBCHEM_CACTVS_COMPLEXITY>
+212
+
+>  <PUBCHEM_CACTVS_HBOND_ACCEPTOR>
+4
+
+>  <PUBCHEM_CACTVS_HBOND_DONOR>
+1
+
+>  <PUBCHEM_CACTVS_ROTATABLE_BOND>
+3
+
+>  <PUBCHEM_CACTVS_SUBSKEYS>
+AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA==
+
+>  <PUBCHEM_IUPAC_OPENEYE_NAME>
+2-acetoxybenzoic acid
+
+>  <PUBCHEM_IUPAC_CAS_NAME>
+2-acetyloxybenzoic acid
+
+>  <PUBCHEM_IUPAC_NAME>
+2-acetyloxybenzoic acid
+
+>  <PUBCHEM_IUPAC_SYSTEMATIC_NAME>
+2-acetyloxybenzoic acid
+
+>  <PUBCHEM_IUPAC_TRADITIONAL_NAME>
+2-acetoxybenzoic acid
+
+>  <PUBCHEM_IUPAC_INCHI>
+InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)
+
+>  <PUBCHEM_IUPAC_INCHIKEY>
+BSYNRYMUTXBXSQ-UHFFFAOYSA-N
+
+>  <PUBCHEM_XLOGP3>
+1.2
+
+>  <PUBCHEM_EXACT_MASS>
+180.042259
+
+>  <PUBCHEM_MOLECULAR_FORMULA>
+C9H8O4
+
+>  <PUBCHEM_MOLECULAR_WEIGHT>
+180.15742
+
+>  <PUBCHEM_OPENEYE_CAN_SMILES>
+CC(=O)OC1=CC=CC=C1C(=O)O
+
+>  <PUBCHEM_OPENEYE_ISO_SMILES>
+CC(=O)OC1=CC=CC=C1C(=O)O
+
+>  <PUBCHEM_CACTVS_TPSA>
+63.6
+
+>  <PUBCHEM_MONOISOTOPIC_WEIGHT>
+180.042259
+
+>  <PUBCHEM_TOTAL_CHARGE>
+0
+
+>  <PUBCHEM_HEAVY_ATOM_COUNT>
+13
+
+>  <PUBCHEM_ATOM_DEF_STEREO_COUNT>
+0
+
+>  <PUBCHEM_ATOM_UDEF_STEREO_COUNT>
+0
+
+>  <PUBCHEM_BOND_DEF_STEREO_COUNT>
+0
+
+>  <PUBCHEM_BOND_UDEF_STEREO_COUNT>
+0
+
+>  <PUBCHEM_ISOTOPIC_ATOM_COUNT>
+0
+
+>  <PUBCHEM_COMPONENT_COUNT>
+1
+
+>  <PUBCHEM_CACTVS_TAUTO_COUNT>
+1
+
+>  <PUBCHEM_COORDINATE_TYPE>
+1
+5
+255
+
+>  <PUBCHEM_BONDANNOTATIONS>
+5  6  8
+5  7  8
+6  8  8
+7  9  8
+8  10  8
+9  10  8
+
+$$$$
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/i.smi	Mon Sep 30 17:38:26 2019 -0400
@@ -0,0 +1,2 @@
+c1ccccc1O
+CCOCC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/o.smi	Mon Sep 30 17:38:26 2019 -0400
@@ -0,0 +1,16 @@
+Oc1ccccc1 SYGMA0MOL0 1.0
+O=C(O)C1OC(Oc2ccccc2)C(O)C(O)C1O SYGMA0MOL1 0.25
+O=S(=O)(O)Oc1ccccc1 SYGMA0MOL2 0.119
+Oc1ccc(O)cc1 SYGMA0MOL3 0.056
+COc1ccccc1 SYGMA0MOL4 0.054
+Oc1ccccc1O SYGMA0MOL5 0.032
+O=C(O)C1OC(Oc2ccc(O)cc2)C(O)C(O)C1O SYGMA0MOL6 0.014
+O=C(O)C1OC(Oc2ccccc2O)C(O)C(O)C1O SYGMA0MOL7 0.008
+O=S(=O)(O)Oc1ccc(O)cc1 SYGMA0MOL8 0.00666
+O=S(=O)(O)Oc1ccccc1O SYGMA0MOL9 0.00381
+COc1ccc(O)cc1 SYGMA0MOL10 0.00302
+COc1ccccc1O SYGMA0MOL11 0.00173
+CCOCC SYGMA1MOL0 1.0
+CCO SYGMA1MOL1 0.087
+CCOC1OC(C(=O)O)C(O)C(O)C1O SYGMA1MOL2 0.00879
+CCOS(=O)(=O)O SYGMA1MOL3 0.00157
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/o2.smi	Mon Sep 30 17:38:26 2019 -0400
@@ -0,0 +1,32 @@
+CC(=O)Oc1ccccc1C(=O)O SYGMA0MOL0 1.0
+O=C(O)c1ccccc1O SYGMA0MOL1 0.529
+CC(=O)Oc1cc(O)ccc1C(=O)O SYGMA0MOL2 0.061
+CC(=O)Oc1ccc(O)cc1C(=O)O SYGMA0MOL3 0.056
+O=C(CO)Oc1ccccc1C(=O)O SYGMA0MOL4 0.049
+O=C(O)c1ccc(O)cc1O SYGMA0MOL5 0.03227
+CC(=O)Oc1c(O)cccc1C(=O)O SYGMA0MOL6 0.032
+O=C(O)c1cc(O)ccc1O SYGMA0MOL7 0.02962
+CC(=O)Oc1ccccc1 SYGMA0MOL8 0.023
+O=C(O)c1cccc(O)c1O SYGMA0MOL9 0.01693
+O=C(O)C(=O)Oc1ccccc1C(=O)O SYGMA0MOL10 0.016
+O=C(O)CO SYGMA0MOL11 0.01333
+Oc1ccccc1 SYGMA0MOL12 0.01217
+O=C(O)C(=O)O SYGMA0MOL13 0.00435
+O=C(O)Oc1ccccc1C(=O)O SYGMA0MOL14 0.00355
+CC(=O)Oc1cc(O)c(O)cc1C(=O)O SYGMA0MOL15 0.00342
+O=C(CO)Oc1cc(O)ccc1C(=O)O SYGMA0MOL16 0.00299
+O=C(CO)Oc1ccc(O)cc1C(=O)O SYGMA0MOL17 0.00274
+CC(=O)Oc1c(C(=O)O)ccc(O)c1O SYGMA0MOL18 0.00195
+CC(=O)Oc1c(O)cc(O)cc1C(=O)O SYGMA0MOL19 0.00179
+CC(=O)Oc1c(O)ccc(O)c1C(=O)O SYGMA0MOL20 0.00179
+O=C(CO)Oc1c(O)cccc1C(=O)O SYGMA0MOL21 0.00157
+CC(=O)Oc1cccc(O)c1 SYGMA0MOL22 0.0014
+CC(=O)Oc1ccc(O)cc1 SYGMA0MOL23 0.00129
+O=C(CO)Oc1ccccc1 SYGMA0MOL24 0.00113
+O=C(O)C(=O)Oc1cc(O)ccc1C(=O)O SYGMA0MOL25 0.00098
+O=C(O)C(=O)Oc1ccc(O)cc1C(=O)O SYGMA0MOL26 0.0009
+CC(=O)Oc1ccccc1O SYGMA0MOL27 0.00074
+CC(=O)Oc1ccc(O)c(O)c1C(=O)O SYGMA0MOL28 0.00073
+O=C(O)C(=O)Oc1c(O)cccc1C(=O)O SYGMA0MOL29 0.00051
+O=COc1ccccc1C(=O)O SYGMA0MOL30 0.00037
+O=C(O)C(=O)Oc1ccccc1 SYGMA0MOL31 0.00037