Mercurial > repos > bgruening > sygma
changeset 0:a2369e86bc48 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
author | bgruening |
---|---|
date | Mon, 30 Sep 2019 17:38:26 -0400 |
parents | |
children | 0e330829de40 |
files | sygma.xml sygma_metabolites.py test-data/i.sdf test-data/i.smi test-data/o.smi test-data/o2.smi |
diffstat | 6 files changed, 356 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sygma.xml Mon Sep 30 17:38:26 2019 -0400 @@ -0,0 +1,94 @@ +<tool id="sygma" name="Generate possible metabolites with SyGMa" version="@VERSION@"> + <macros> + <token name="@VERSION@">1.1.1</token> + </macros> + <description>by performing common reactions on one or more parent molecule(s)</description> + <requirements> + <requirement type="package" version="@VERSION@">sygma</requirement> + <requirement type="package" version="2019.03.4">rdkit</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + python '$__tool_directory__/sygma_metabolites.py' + -i '$input' + --iformat '$input.ext' + -o '$output' + --phase1 '$phase1' + --phase2 '$phase2' + ]]></command> + <inputs> + <param type="data" name="input" format="smi,sdf" label="Parent molecule(s)" help="Upload an SDF or SMILES file."/> + <param type="integer" name="phase1" value="1" min="0" label="Number of reaction cycles to apply for phase 1" help="Phase 1 metabolism rules include different types of oxidation, reduction, hydrolysis and condensation reactions."/> + <param type="integer" name="phase2" value="1" min="0" label="Number of reaction cycles to apply for phase 2" help="Phase 2 metabolism rules include several conjugation reactions, e.g. with glucuronyl, sulfate, methyl or acetyl."/> + </inputs> + <outputs> + <data name="output" format="smi"/> + </outputs> + <tests> + <test> + <param name="input" value="i.smi" ftype="smi"/> + <param name="phase1" value="1"/> + <param name="phase2" value="1"/> + <output name="output" file="o.smi"/> + </test> + <test> + <param name="input" value="i.sdf" ftype="sdf"/> + <param name="phase1" value="2"/> + <param name="phase2" value="0"/> + <output name="output" file="o2.smi"/> + </test> + </tests> + <help> +<![CDATA[ + +.. class:: infomark + +**What this tool does** + +SyGMa (Systematic Generation of potential Metabolites) is a tool to generate +possible metabolic products of an input parent structure. The tool provides +two rulesets to cover both phase 1 and 2 metabolism. + +----- + +.. class:: infomark + +**Input** + +A file in SMILES or SDF format. Files may contain multiple molecule +entries; in this case outputs are distinguished by the code included in the +output file (e.g. SYGMA0MOL0 vs SYGMA1MOL0). + +The number of reaction cycles to be performed for both phase 1 and phase 2 +metabolism should also be specified. + +----- + +.. class:: infomark + + **Output** + +For each molecule in the input file, a SMILES file is produced containing +SMILES strings of the metabolite outputs, a generated ID code, and an empirical +probability score (corresponding to an estimated probability that a product is +actually metabolically produced in humans). The first line is always the parent +molecule itself:: + + Oc1ccccc1 SYGMA0MOL0 1.0 + O=C(O)C1OC(Oc2ccccc2)C(O)C(O)C1O SYGMA0MOL1 0.25 + O=S(=O)(O)Oc1ccccc1 SYGMA0MOL2 0.119 + Oc1ccc(O)cc1 SYGMA0MOL3 0.056 + COc1ccccc1 SYGMA0MOL4 0.054 + Oc1ccccc1O SYGMA0MOL5 0.032 + O=C(O)C1OC(Oc2ccc(O)cc2)C(O)C(O)C1O SYGMA0MOL6 0.014 + O=C(O)C1OC(Oc2ccccc2O)C(O)C(O)C1O SYGMA0MOL7 0.008 + O=S(=O)(O)Oc1ccc(O)cc1 SYGMA0MOL8 0.00666 + O=S(=O)(O)Oc1ccccc1O SYGMA0MOL9 0.00381 + COc1ccc(O)cc1 SYGMA0MOL10 0.00302 + COc1ccccc1O SYGMA0MOL11 0.00173 + + + ]]></help> + <citations> + <citation type="doi">10.1002/cmdc.200700312</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sygma_metabolites.py Mon Sep 30 17:38:26 2019 -0400 @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 + +import argparse +import csv +import sygma +import numpy as np +from rdkit import Chem +from rdkit.Chem.rdmolfiles import SDMolSupplier, SmilesMolSupplier + +def mol_supplier(filename, ext): + """ + Based on the file extension, use the appropriate RDKit function to + load a chemical data file (SMILES or SDF) containing multiple molecules + and return a list of RDKit Mol objects + """ + if ext == 'sdf': + return [n for n in SDMolSupplier(filename)] + with open(filename) as f: + mols = f.read().split('\n') + if ext == 'smi' or ext == 'inchi': + return [Chem.MolFromSmiles(mol, sanitize=True) for mol in mols if mol != ''] + +def predict_metabolites(parent, phase1_cycles, phase2_cycles): + """ + Prediction of metabolites derived from a parent molecule + """ + scenario = sygma.Scenario([ + [sygma.ruleset['phase1'], int(phase1_cycles)], + [sygma.ruleset['phase2'], int(phase2_cycles)]]) + metabolic_tree = scenario.run(parent) + metabolic_tree.calc_scores() + return metabolic_tree.to_smiles() + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-i', '--infile', required=True, help='Path to the input file.') + parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') + parser.add_argument("--iformat", help="Specify the input file format.") + parser.add_argument("--phase1", help="Number of phase1 cycles.") + parser.add_argument("--phase2", help="Number of phase2 cycles.") + args = parser.parse_args() + + mols = mol_supplier(args.infile, args.iformat) + outp = np.zeros((0,3)) + for n in range(len(mols)): + metabs = np.array(predict_metabolites(mols[n], args.phase1, args.phase2)) + metabs = np.column_stack(( + metabs[:,0], # SMILES + ['SYGMA{}MOL{}'.format(n, m) for m in range(metabs.shape[0])], # SMILES label + np.round(np.array(metabs[:,1], dtype=float), decimals=5) # score rounded to 5 dp + )) + outp = np.vstack((outp, metabs)) + np.savetxt(args.outfile, outp, fmt="%s") + + +if __name__ == "__main__": + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/i.sdf Mon Sep 30 17:38:26 2019 -0400 @@ -0,0 +1,154 @@ +2244 + OpenBabel07101213142D + + 21 21 0 0 0 0 0 0 0 0999 V2000 + 3.7320 -0.0600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -1.5600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 0.9400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0000 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.0611 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.6800 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3100 0.4769 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.4631 0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6900 -0.5969 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 2.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 5 1 0 0 0 0 + 1 12 1 0 0 0 0 + 2 11 1 0 0 0 0 + 2 21 1 0 0 0 0 + 3 11 2 0 0 0 0 + 4 12 2 0 0 0 0 + 5 6 1 0 0 0 0 + 5 7 2 0 0 0 0 + 6 8 2 0 0 0 0 + 6 11 1 0 0 0 0 + 7 9 1 0 0 0 0 + 7 14 1 0 0 0 0 + 8 10 1 0 0 0 0 + 8 15 1 0 0 0 0 + 9 10 2 0 0 0 0 + 9 16 1 0 0 0 0 + 10 17 1 0 0 0 0 + 12 13 1 0 0 0 0 + 13 18 1 0 0 0 0 + 13 19 1 0 0 0 0 + 13 20 1 0 0 0 0 +M END +> <PUBCHEM_COMPOUND_CID> +2244 + +> <PUBCHEM_COMPOUND_CANONICALIZED> +1 + +> <PUBCHEM_CACTVS_COMPLEXITY> +212 + +> <PUBCHEM_CACTVS_HBOND_ACCEPTOR> +4 + +> <PUBCHEM_CACTVS_HBOND_DONOR> +1 + +> <PUBCHEM_CACTVS_ROTATABLE_BOND> +3 + +> <PUBCHEM_CACTVS_SUBSKEYS> +AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA== + +> <PUBCHEM_IUPAC_OPENEYE_NAME> +2-acetoxybenzoic acid + +> <PUBCHEM_IUPAC_CAS_NAME> +2-acetyloxybenzoic acid + +> <PUBCHEM_IUPAC_NAME> +2-acetyloxybenzoic acid + +> <PUBCHEM_IUPAC_SYSTEMATIC_NAME> +2-acetyloxybenzoic acid + +> <PUBCHEM_IUPAC_TRADITIONAL_NAME> +2-acetoxybenzoic acid + +> <PUBCHEM_IUPAC_INCHI> +InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) + +> <PUBCHEM_IUPAC_INCHIKEY> +BSYNRYMUTXBXSQ-UHFFFAOYSA-N + +> <PUBCHEM_XLOGP3> +1.2 + +> <PUBCHEM_EXACT_MASS> +180.042259 + +> <PUBCHEM_MOLECULAR_FORMULA> +C9H8O4 + +> <PUBCHEM_MOLECULAR_WEIGHT> +180.15742 + +> <PUBCHEM_OPENEYE_CAN_SMILES> +CC(=O)OC1=CC=CC=C1C(=O)O + +> <PUBCHEM_OPENEYE_ISO_SMILES> +CC(=O)OC1=CC=CC=C1C(=O)O + +> <PUBCHEM_CACTVS_TPSA> +63.6 + +> <PUBCHEM_MONOISOTOPIC_WEIGHT> +180.042259 + +> <PUBCHEM_TOTAL_CHARGE> +0 + +> <PUBCHEM_HEAVY_ATOM_COUNT> +13 + +> <PUBCHEM_ATOM_DEF_STEREO_COUNT> +0 + +> <PUBCHEM_ATOM_UDEF_STEREO_COUNT> +0 + +> <PUBCHEM_BOND_DEF_STEREO_COUNT> +0 + +> <PUBCHEM_BOND_UDEF_STEREO_COUNT> +0 + +> <PUBCHEM_ISOTOPIC_ATOM_COUNT> +0 + +> <PUBCHEM_COMPONENT_COUNT> +1 + +> <PUBCHEM_CACTVS_TAUTO_COUNT> +1 + +> <PUBCHEM_COORDINATE_TYPE> +1 +5 +255 + +> <PUBCHEM_BONDANNOTATIONS> +5 6 8 +5 7 8 +6 8 8 +7 9 8 +8 10 8 +9 10 8 + +$$$$
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/i.smi Mon Sep 30 17:38:26 2019 -0400 @@ -0,0 +1,2 @@ +c1ccccc1O +CCOCC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/o.smi Mon Sep 30 17:38:26 2019 -0400 @@ -0,0 +1,16 @@ +Oc1ccccc1 SYGMA0MOL0 1.0 +O=C(O)C1OC(Oc2ccccc2)C(O)C(O)C1O SYGMA0MOL1 0.25 +O=S(=O)(O)Oc1ccccc1 SYGMA0MOL2 0.119 +Oc1ccc(O)cc1 SYGMA0MOL3 0.056 +COc1ccccc1 SYGMA0MOL4 0.054 +Oc1ccccc1O SYGMA0MOL5 0.032 +O=C(O)C1OC(Oc2ccc(O)cc2)C(O)C(O)C1O SYGMA0MOL6 0.014 +O=C(O)C1OC(Oc2ccccc2O)C(O)C(O)C1O SYGMA0MOL7 0.008 +O=S(=O)(O)Oc1ccc(O)cc1 SYGMA0MOL8 0.00666 +O=S(=O)(O)Oc1ccccc1O SYGMA0MOL9 0.00381 +COc1ccc(O)cc1 SYGMA0MOL10 0.00302 +COc1ccccc1O SYGMA0MOL11 0.00173 +CCOCC SYGMA1MOL0 1.0 +CCO SYGMA1MOL1 0.087 +CCOC1OC(C(=O)O)C(O)C(O)C1O SYGMA1MOL2 0.00879 +CCOS(=O)(=O)O SYGMA1MOL3 0.00157
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/o2.smi Mon Sep 30 17:38:26 2019 -0400 @@ -0,0 +1,32 @@ +CC(=O)Oc1ccccc1C(=O)O SYGMA0MOL0 1.0 +O=C(O)c1ccccc1O SYGMA0MOL1 0.529 +CC(=O)Oc1cc(O)ccc1C(=O)O SYGMA0MOL2 0.061 +CC(=O)Oc1ccc(O)cc1C(=O)O SYGMA0MOL3 0.056 +O=C(CO)Oc1ccccc1C(=O)O SYGMA0MOL4 0.049 +O=C(O)c1ccc(O)cc1O SYGMA0MOL5 0.03227 +CC(=O)Oc1c(O)cccc1C(=O)O SYGMA0MOL6 0.032 +O=C(O)c1cc(O)ccc1O SYGMA0MOL7 0.02962 +CC(=O)Oc1ccccc1 SYGMA0MOL8 0.023 +O=C(O)c1cccc(O)c1O SYGMA0MOL9 0.01693 +O=C(O)C(=O)Oc1ccccc1C(=O)O SYGMA0MOL10 0.016 +O=C(O)CO SYGMA0MOL11 0.01333 +Oc1ccccc1 SYGMA0MOL12 0.01217 +O=C(O)C(=O)O SYGMA0MOL13 0.00435 +O=C(O)Oc1ccccc1C(=O)O SYGMA0MOL14 0.00355 +CC(=O)Oc1cc(O)c(O)cc1C(=O)O SYGMA0MOL15 0.00342 +O=C(CO)Oc1cc(O)ccc1C(=O)O SYGMA0MOL16 0.00299 +O=C(CO)Oc1ccc(O)cc1C(=O)O SYGMA0MOL17 0.00274 +CC(=O)Oc1c(C(=O)O)ccc(O)c1O SYGMA0MOL18 0.00195 +CC(=O)Oc1c(O)cc(O)cc1C(=O)O SYGMA0MOL19 0.00179 +CC(=O)Oc1c(O)ccc(O)c1C(=O)O SYGMA0MOL20 0.00179 +O=C(CO)Oc1c(O)cccc1C(=O)O SYGMA0MOL21 0.00157 +CC(=O)Oc1cccc(O)c1 SYGMA0MOL22 0.0014 +CC(=O)Oc1ccc(O)cc1 SYGMA0MOL23 0.00129 +O=C(CO)Oc1ccccc1 SYGMA0MOL24 0.00113 +O=C(O)C(=O)Oc1cc(O)ccc1C(=O)O SYGMA0MOL25 0.00098 +O=C(O)C(=O)Oc1ccc(O)cc1C(=O)O SYGMA0MOL26 0.0009 +CC(=O)Oc1ccccc1O SYGMA0MOL27 0.00074 +CC(=O)Oc1ccc(O)c(O)c1C(=O)O SYGMA0MOL28 0.00073 +O=C(O)C(=O)Oc1c(O)cccc1C(=O)O SYGMA0MOL29 0.00051 +O=COc1ccccc1C(=O)O SYGMA0MOL30 0.00037 +O=C(O)C(=O)Oc1ccccc1 SYGMA0MOL31 0.00037