# HG changeset patch # User bgruening # Date 1569879506 14400 # Node ID a2369e86bc480d273572cbb81c33cda88000f50a "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e" diff -r 000000000000 -r a2369e86bc48 sygma.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sygma.xml Mon Sep 30 17:38:26 2019 -0400 @@ -0,0 +1,94 @@ + + + 1.1.1 + + by performing common reactions on one or more parent molecule(s) + + sygma + rdkit + + + + + + + + + + + + + + + + + + + + + + + + + + + + 10.1002/cmdc.200700312 + + diff -r 000000000000 -r a2369e86bc48 sygma_metabolites.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sygma_metabolites.py Mon Sep 30 17:38:26 2019 -0400 @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 + +import argparse +import csv +import sygma +import numpy as np +from rdkit import Chem +from rdkit.Chem.rdmolfiles import SDMolSupplier, SmilesMolSupplier + +def mol_supplier(filename, ext): + """ + Based on the file extension, use the appropriate RDKit function to + load a chemical data file (SMILES or SDF) containing multiple molecules + and return a list of RDKit Mol objects + """ + if ext == 'sdf': + return [n for n in SDMolSupplier(filename)] + with open(filename) as f: + mols = f.read().split('\n') + if ext == 'smi' or ext == 'inchi': + return [Chem.MolFromSmiles(mol, sanitize=True) for mol in mols if mol != ''] + +def predict_metabolites(parent, phase1_cycles, phase2_cycles): + """ + Prediction of metabolites derived from a parent molecule + """ + scenario = sygma.Scenario([ + [sygma.ruleset['phase1'], int(phase1_cycles)], + [sygma.ruleset['phase2'], int(phase2_cycles)]]) + metabolic_tree = scenario.run(parent) + metabolic_tree.calc_scores() + return metabolic_tree.to_smiles() + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-i', '--infile', required=True, help='Path to the input file.') + parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') + parser.add_argument("--iformat", help="Specify the input file format.") + parser.add_argument("--phase1", help="Number of phase1 cycles.") + parser.add_argument("--phase2", help="Number of phase2 cycles.") + args = parser.parse_args() + + mols = mol_supplier(args.infile, args.iformat) + outp = np.zeros((0,3)) + for n in range(len(mols)): + metabs = np.array(predict_metabolites(mols[n], args.phase1, args.phase2)) + metabs = np.column_stack(( + metabs[:,0], # SMILES + ['SYGMA{}MOL{}'.format(n, m) for m in range(metabs.shape[0])], # SMILES label + np.round(np.array(metabs[:,1], dtype=float), decimals=5) # score rounded to 5 dp + )) + outp = np.vstack((outp, metabs)) + np.savetxt(args.outfile, outp, fmt="%s") + + +if __name__ == "__main__": + main() diff -r 000000000000 -r a2369e86bc48 test-data/i.sdf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/i.sdf Mon Sep 30 17:38:26 2019 -0400 @@ -0,0 +1,154 @@ +2244 + OpenBabel07101213142D + + 21 21 0 0 0 0 0 0 0 0999 V2000 + 3.7320 -0.0600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -1.5600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 0.9400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0000 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.0611 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.6800 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3100 0.4769 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.4631 0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6900 -0.5969 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 2.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 5 1 0 0 0 0 + 1 12 1 0 0 0 0 + 2 11 1 0 0 0 0 + 2 21 1 0 0 0 0 + 3 11 2 0 0 0 0 + 4 12 2 0 0 0 0 + 5 6 1 0 0 0 0 + 5 7 2 0 0 0 0 + 6 8 2 0 0 0 0 + 6 11 1 0 0 0 0 + 7 9 1 0 0 0 0 + 7 14 1 0 0 0 0 + 8 10 1 0 0 0 0 + 8 15 1 0 0 0 0 + 9 10 2 0 0 0 0 + 9 16 1 0 0 0 0 + 10 17 1 0 0 0 0 + 12 13 1 0 0 0 0 + 13 18 1 0 0 0 0 + 13 19 1 0 0 0 0 + 13 20 1 0 0 0 0 +M END +> +2244 + +> +1 + +> +212 + +> +4 + +> +1 + +> +3 + +> +AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA== + +> +2-acetoxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetoxybenzoic acid + +> +InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) + +> +BSYNRYMUTXBXSQ-UHFFFAOYSA-N + +> +1.2 + +> +180.042259 + +> +C9H8O4 + +> +180.15742 + +> +CC(=O)OC1=CC=CC=C1C(=O)O + +> +CC(=O)OC1=CC=CC=C1C(=O)O + +> +63.6 + +> +180.042259 + +> +0 + +> +13 + +> +0 + +> +0 + +> +0 + +> +0 + +> +0 + +> +1 + +> +1 + +> +1 +5 +255 + +> +5 6 8 +5 7 8 +6 8 8 +7 9 8 +8 10 8 +9 10 8 + +$$$$ diff -r 000000000000 -r a2369e86bc48 test-data/i.smi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/i.smi Mon Sep 30 17:38:26 2019 -0400 @@ -0,0 +1,2 @@ +c1ccccc1O +CCOCC diff -r 000000000000 -r a2369e86bc48 test-data/o.smi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/o.smi Mon Sep 30 17:38:26 2019 -0400 @@ -0,0 +1,16 @@ +Oc1ccccc1 SYGMA0MOL0 1.0 +O=C(O)C1OC(Oc2ccccc2)C(O)C(O)C1O SYGMA0MOL1 0.25 +O=S(=O)(O)Oc1ccccc1 SYGMA0MOL2 0.119 +Oc1ccc(O)cc1 SYGMA0MOL3 0.056 +COc1ccccc1 SYGMA0MOL4 0.054 +Oc1ccccc1O SYGMA0MOL5 0.032 +O=C(O)C1OC(Oc2ccc(O)cc2)C(O)C(O)C1O SYGMA0MOL6 0.014 +O=C(O)C1OC(Oc2ccccc2O)C(O)C(O)C1O SYGMA0MOL7 0.008 +O=S(=O)(O)Oc1ccc(O)cc1 SYGMA0MOL8 0.00666 +O=S(=O)(O)Oc1ccccc1O SYGMA0MOL9 0.00381 +COc1ccc(O)cc1 SYGMA0MOL10 0.00302 +COc1ccccc1O SYGMA0MOL11 0.00173 +CCOCC SYGMA1MOL0 1.0 +CCO SYGMA1MOL1 0.087 +CCOC1OC(C(=O)O)C(O)C(O)C1O SYGMA1MOL2 0.00879 +CCOS(=O)(=O)O SYGMA1MOL3 0.00157 diff -r 000000000000 -r a2369e86bc48 test-data/o2.smi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/o2.smi Mon Sep 30 17:38:26 2019 -0400 @@ -0,0 +1,32 @@ +CC(=O)Oc1ccccc1C(=O)O SYGMA0MOL0 1.0 +O=C(O)c1ccccc1O SYGMA0MOL1 0.529 +CC(=O)Oc1cc(O)ccc1C(=O)O SYGMA0MOL2 0.061 +CC(=O)Oc1ccc(O)cc1C(=O)O SYGMA0MOL3 0.056 +O=C(CO)Oc1ccccc1C(=O)O SYGMA0MOL4 0.049 +O=C(O)c1ccc(O)cc1O SYGMA0MOL5 0.03227 +CC(=O)Oc1c(O)cccc1C(=O)O SYGMA0MOL6 0.032 +O=C(O)c1cc(O)ccc1O SYGMA0MOL7 0.02962 +CC(=O)Oc1ccccc1 SYGMA0MOL8 0.023 +O=C(O)c1cccc(O)c1O SYGMA0MOL9 0.01693 +O=C(O)C(=O)Oc1ccccc1C(=O)O SYGMA0MOL10 0.016 +O=C(O)CO SYGMA0MOL11 0.01333 +Oc1ccccc1 SYGMA0MOL12 0.01217 +O=C(O)C(=O)O SYGMA0MOL13 0.00435 +O=C(O)Oc1ccccc1C(=O)O SYGMA0MOL14 0.00355 +CC(=O)Oc1cc(O)c(O)cc1C(=O)O SYGMA0MOL15 0.00342 +O=C(CO)Oc1cc(O)ccc1C(=O)O SYGMA0MOL16 0.00299 +O=C(CO)Oc1ccc(O)cc1C(=O)O SYGMA0MOL17 0.00274 +CC(=O)Oc1c(C(=O)O)ccc(O)c1O SYGMA0MOL18 0.00195 +CC(=O)Oc1c(O)cc(O)cc1C(=O)O SYGMA0MOL19 0.00179 +CC(=O)Oc1c(O)ccc(O)c1C(=O)O SYGMA0MOL20 0.00179 +O=C(CO)Oc1c(O)cccc1C(=O)O SYGMA0MOL21 0.00157 +CC(=O)Oc1cccc(O)c1 SYGMA0MOL22 0.0014 +CC(=O)Oc1ccc(O)cc1 SYGMA0MOL23 0.00129 +O=C(CO)Oc1ccccc1 SYGMA0MOL24 0.00113 +O=C(O)C(=O)Oc1cc(O)ccc1C(=O)O SYGMA0MOL25 0.00098 +O=C(O)C(=O)Oc1ccc(O)cc1C(=O)O SYGMA0MOL26 0.0009 +CC(=O)Oc1ccccc1O SYGMA0MOL27 0.00074 +CC(=O)Oc1ccc(O)c(O)c1C(=O)O SYGMA0MOL28 0.00073 +O=C(O)C(=O)Oc1c(O)cccc1C(=O)O SYGMA0MOL29 0.00051 +O=COc1ccccc1C(=O)O SYGMA0MOL30 0.00037 +O=C(O)C(=O)Oc1ccccc1 SYGMA0MOL31 0.00037