# HG changeset patch
# User bgruening
# Date 1569879506 14400
# Node ID a2369e86bc480d273572cbb81c33cda88000f50a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
diff -r 000000000000 -r a2369e86bc48 sygma.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sygma.xml Mon Sep 30 17:38:26 2019 -0400
@@ -0,0 +1,94 @@
+
+
+ 1.1.1
+
+ by performing common reactions on one or more parent molecule(s)
+
+ sygma
+ rdkit
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 10.1002/cmdc.200700312
+
+
diff -r 000000000000 -r a2369e86bc48 sygma_metabolites.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sygma_metabolites.py Mon Sep 30 17:38:26 2019 -0400
@@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+
+import argparse
+import csv
+import sygma
+import numpy as np
+from rdkit import Chem
+from rdkit.Chem.rdmolfiles import SDMolSupplier, SmilesMolSupplier
+
+def mol_supplier(filename, ext):
+ """
+ Based on the file extension, use the appropriate RDKit function to
+ load a chemical data file (SMILES or SDF) containing multiple molecules
+ and return a list of RDKit Mol objects
+ """
+ if ext == 'sdf':
+ return [n for n in SDMolSupplier(filename)]
+ with open(filename) as f:
+ mols = f.read().split('\n')
+ if ext == 'smi' or ext == 'inchi':
+ return [Chem.MolFromSmiles(mol, sanitize=True) for mol in mols if mol != '']
+
+def predict_metabolites(parent, phase1_cycles, phase2_cycles):
+ """
+ Prediction of metabolites derived from a parent molecule
+ """
+ scenario = sygma.Scenario([
+ [sygma.ruleset['phase1'], int(phase1_cycles)],
+ [sygma.ruleset['phase2'], int(phase2_cycles)]])
+ metabolic_tree = scenario.run(parent)
+ metabolic_tree.calc_scores()
+ return metabolic_tree.to_smiles()
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-i', '--infile', required=True, help='Path to the input file.')
+ parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.')
+ parser.add_argument("--iformat", help="Specify the input file format.")
+ parser.add_argument("--phase1", help="Number of phase1 cycles.")
+ parser.add_argument("--phase2", help="Number of phase2 cycles.")
+ args = parser.parse_args()
+
+ mols = mol_supplier(args.infile, args.iformat)
+ outp = np.zeros((0,3))
+ for n in range(len(mols)):
+ metabs = np.array(predict_metabolites(mols[n], args.phase1, args.phase2))
+ metabs = np.column_stack((
+ metabs[:,0], # SMILES
+ ['SYGMA{}MOL{}'.format(n, m) for m in range(metabs.shape[0])], # SMILES label
+ np.round(np.array(metabs[:,1], dtype=float), decimals=5) # score rounded to 5 dp
+ ))
+ outp = np.vstack((outp, metabs))
+ np.savetxt(args.outfile, outp, fmt="%s")
+
+
+if __name__ == "__main__":
+ main()
diff -r 000000000000 -r a2369e86bc48 test-data/i.sdf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/i.sdf Mon Sep 30 17:38:26 2019 -0400
@@ -0,0 +1,154 @@
+2244
+ OpenBabel07101213142D
+
+ 21 21 0 0 0 0 0 0 0 0999 V2000
+ 3.7320 -0.0600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.3301 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ 4.5981 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ 2.8660 -1.5600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ 4.5981 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 5.4641 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 4.5981 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.3301 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 5.4641 -2.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.3301 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 5.4641 0.9400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 2.8660 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 2.0000 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 4.0611 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.8671 -0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 5.4641 -2.6800 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.8671 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 2.3100 0.4769 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 1.4631 0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 1.6900 -0.5969 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.3301 2.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 1 5 1 0 0 0 0
+ 1 12 1 0 0 0 0
+ 2 11 1 0 0 0 0
+ 2 21 1 0 0 0 0
+ 3 11 2 0 0 0 0
+ 4 12 2 0 0 0 0
+ 5 6 1 0 0 0 0
+ 5 7 2 0 0 0 0
+ 6 8 2 0 0 0 0
+ 6 11 1 0 0 0 0
+ 7 9 1 0 0 0 0
+ 7 14 1 0 0 0 0
+ 8 10 1 0 0 0 0
+ 8 15 1 0 0 0 0
+ 9 10 2 0 0 0 0
+ 9 16 1 0 0 0 0
+ 10 17 1 0 0 0 0
+ 12 13 1 0 0 0 0
+ 13 18 1 0 0 0 0
+ 13 19 1 0 0 0 0
+ 13 20 1 0 0 0 0
+M END
+>
+2244
+
+>
+1
+
+>
+212
+
+>
+4
+
+>
+1
+
+>
+3
+
+>
+AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA==
+
+>
+2-acetoxybenzoic acid
+
+>
+2-acetyloxybenzoic acid
+
+>
+2-acetyloxybenzoic acid
+
+>
+2-acetyloxybenzoic acid
+
+>
+2-acetoxybenzoic acid
+
+>
+InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)
+
+>
+BSYNRYMUTXBXSQ-UHFFFAOYSA-N
+
+>
+1.2
+
+>
+180.042259
+
+>
+C9H8O4
+
+>
+180.15742
+
+>
+CC(=O)OC1=CC=CC=C1C(=O)O
+
+>
+CC(=O)OC1=CC=CC=C1C(=O)O
+
+>
+63.6
+
+>
+180.042259
+
+>
+0
+
+>
+13
+
+>
+0
+
+>
+0
+
+>
+0
+
+>
+0
+
+>
+0
+
+>
+1
+
+>
+1
+
+>
+1
+5
+255
+
+>
+5 6 8
+5 7 8
+6 8 8
+7 9 8
+8 10 8
+9 10 8
+
+$$$$
diff -r 000000000000 -r a2369e86bc48 test-data/i.smi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/i.smi Mon Sep 30 17:38:26 2019 -0400
@@ -0,0 +1,2 @@
+c1ccccc1O
+CCOCC
diff -r 000000000000 -r a2369e86bc48 test-data/o.smi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/o.smi Mon Sep 30 17:38:26 2019 -0400
@@ -0,0 +1,16 @@
+Oc1ccccc1 SYGMA0MOL0 1.0
+O=C(O)C1OC(Oc2ccccc2)C(O)C(O)C1O SYGMA0MOL1 0.25
+O=S(=O)(O)Oc1ccccc1 SYGMA0MOL2 0.119
+Oc1ccc(O)cc1 SYGMA0MOL3 0.056
+COc1ccccc1 SYGMA0MOL4 0.054
+Oc1ccccc1O SYGMA0MOL5 0.032
+O=C(O)C1OC(Oc2ccc(O)cc2)C(O)C(O)C1O SYGMA0MOL6 0.014
+O=C(O)C1OC(Oc2ccccc2O)C(O)C(O)C1O SYGMA0MOL7 0.008
+O=S(=O)(O)Oc1ccc(O)cc1 SYGMA0MOL8 0.00666
+O=S(=O)(O)Oc1ccccc1O SYGMA0MOL9 0.00381
+COc1ccc(O)cc1 SYGMA0MOL10 0.00302
+COc1ccccc1O SYGMA0MOL11 0.00173
+CCOCC SYGMA1MOL0 1.0
+CCO SYGMA1MOL1 0.087
+CCOC1OC(C(=O)O)C(O)C(O)C1O SYGMA1MOL2 0.00879
+CCOS(=O)(=O)O SYGMA1MOL3 0.00157
diff -r 000000000000 -r a2369e86bc48 test-data/o2.smi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/o2.smi Mon Sep 30 17:38:26 2019 -0400
@@ -0,0 +1,32 @@
+CC(=O)Oc1ccccc1C(=O)O SYGMA0MOL0 1.0
+O=C(O)c1ccccc1O SYGMA0MOL1 0.529
+CC(=O)Oc1cc(O)ccc1C(=O)O SYGMA0MOL2 0.061
+CC(=O)Oc1ccc(O)cc1C(=O)O SYGMA0MOL3 0.056
+O=C(CO)Oc1ccccc1C(=O)O SYGMA0MOL4 0.049
+O=C(O)c1ccc(O)cc1O SYGMA0MOL5 0.03227
+CC(=O)Oc1c(O)cccc1C(=O)O SYGMA0MOL6 0.032
+O=C(O)c1cc(O)ccc1O SYGMA0MOL7 0.02962
+CC(=O)Oc1ccccc1 SYGMA0MOL8 0.023
+O=C(O)c1cccc(O)c1O SYGMA0MOL9 0.01693
+O=C(O)C(=O)Oc1ccccc1C(=O)O SYGMA0MOL10 0.016
+O=C(O)CO SYGMA0MOL11 0.01333
+Oc1ccccc1 SYGMA0MOL12 0.01217
+O=C(O)C(=O)O SYGMA0MOL13 0.00435
+O=C(O)Oc1ccccc1C(=O)O SYGMA0MOL14 0.00355
+CC(=O)Oc1cc(O)c(O)cc1C(=O)O SYGMA0MOL15 0.00342
+O=C(CO)Oc1cc(O)ccc1C(=O)O SYGMA0MOL16 0.00299
+O=C(CO)Oc1ccc(O)cc1C(=O)O SYGMA0MOL17 0.00274
+CC(=O)Oc1c(C(=O)O)ccc(O)c1O SYGMA0MOL18 0.00195
+CC(=O)Oc1c(O)cc(O)cc1C(=O)O SYGMA0MOL19 0.00179
+CC(=O)Oc1c(O)ccc(O)c1C(=O)O SYGMA0MOL20 0.00179
+O=C(CO)Oc1c(O)cccc1C(=O)O SYGMA0MOL21 0.00157
+CC(=O)Oc1cccc(O)c1 SYGMA0MOL22 0.0014
+CC(=O)Oc1ccc(O)cc1 SYGMA0MOL23 0.00129
+O=C(CO)Oc1ccccc1 SYGMA0MOL24 0.00113
+O=C(O)C(=O)Oc1cc(O)ccc1C(=O)O SYGMA0MOL25 0.00098
+O=C(O)C(=O)Oc1ccc(O)cc1C(=O)O SYGMA0MOL26 0.0009
+CC(=O)Oc1ccccc1O SYGMA0MOL27 0.00074
+CC(=O)Oc1ccc(O)c(O)c1C(=O)O SYGMA0MOL28 0.00073
+O=C(O)C(=O)Oc1c(O)cccc1C(=O)O SYGMA0MOL29 0.00051
+O=COc1ccccc1C(=O)O SYGMA0MOL30 0.00037
+O=C(O)C(=O)Oc1ccccc1 SYGMA0MOL31 0.00037