annotate sygma_metabolites.py @ 1:0e330829de40 draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
author bgruening
date Sun, 15 Mar 2020 13:18:38 -0400
parents a2369e86bc48
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
1 #!/usr/bin/env python3
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
2
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
3 import argparse
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
4 import csv
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
5 import sygma
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
6 import numpy as np
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
7 from rdkit import Chem
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
8 from rdkit.Chem.rdmolfiles import SDMolSupplier, SmilesMolSupplier
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
9
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
10 def mol_supplier(filename, ext):
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
11 """
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
12 Based on the file extension, use the appropriate RDKit function to
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
13 load a chemical data file (SMILES or SDF) containing multiple molecules
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
14 and return a list of RDKit Mol objects
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
15 """
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
16 if ext == 'sdf':
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
17 return [n for n in SDMolSupplier(filename)]
1
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
18 with open(filename) as f:
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
19 mols = f.read().split('\n')
0
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
20 if ext == 'smi' or ext == 'inchi':
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
21 return [Chem.MolFromSmiles(mol, sanitize=True) for mol in mols if mol != '']
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
22
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
23 def predict_metabolites(parent, phase1_cycles, phase2_cycles):
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
24 """
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
25 Prediction of metabolites derived from a parent molecule
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
26 """
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
27 scenario = sygma.Scenario([
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
28 [sygma.ruleset['phase1'], int(phase1_cycles)],
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
29 [sygma.ruleset['phase2'], int(phase2_cycles)]])
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
30 metabolic_tree = scenario.run(parent)
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
31 metabolic_tree.calc_scores()
1
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
32 return metabolic_tree.to_list()
0
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
33
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
34
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
35 def main():
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
36 parser = argparse.ArgumentParser()
1
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
37 parser.add_argument("-i", "--infile", required=True, help="Path to the input file.")
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
38 parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.")
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
39 parser.add_argument("--iformat", required=True, help="Specify the input file format.")
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
40 parser.add_argument("--phase1", required=True, help="Number of phase1 cycles.")
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
41 parser.add_argument("--phase2", required=True, help="Number of phase2 cycles.")
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
42 parser.add_argument("--detailed", dest="detailed",
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
43 action="store_true", help="Returns more detailed output")
0
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
44 args = parser.parse_args()
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
45
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
46 mols = mol_supplier(args.infile, args.iformat)
1
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
47 if args.detailed:
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
48 outp = np.zeros((0,6))
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
49 else:
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
50 outp = np.zeros((0,3))
0
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
51 for n in range(len(mols)):
1
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
52 metabs = predict_metabolites(mols[n], args.phase1, args.phase2)
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
53 for entry in range(len(metabs)):
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
54 smiles = Chem.MolToSmiles(metabs[entry]['SyGMa_metabolite'])
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
55 if args.detailed:
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
56 out = np.column_stack((
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
57 smiles, # SMILES
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
58 'SYGMA{}MOL{}'.format(n, entry), # SMILES label
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
59 np.round(np.array(metabs[entry]['SyGMa_score'], dtype=float),
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
60 decimals=5), # score rounded to 5 dp
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
61 Chem.rdMolDescriptors.CalcMolFormula(Chem.MolFromSmiles(smiles)), # Molecular formula
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
62 len(metabs[entry]["SyGMa_pathway"].split("\n")), # SyGMa_n Sygma pathway length
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
63 metabs[entry]["SyGMa_pathway"].replace("\n", "") # SyGMa pathway
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
64 ))
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
65 else:
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
66 out = np.column_stack((
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
67 smiles, # SMILES
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
68 'SYGMA{}MOL{}'.format(n, entry), # SMILES label
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
69 np.round(np.array(metabs[entry]['SyGMa_score'], dtype=float),
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
70 decimals=5) # score rounded to 5 dp
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
71 ))
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
72 outp = np.vstack((outp, out))
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
73 if args.detailed:
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
74 np.savetxt(args.outfile, outp, fmt="%s", delimiter="\t",
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
75 header="smiles\tcompound_id\tsygma_score\tmolecular_formula\tsygma_n\tsygma_pathway", comments="")
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
76 else:
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
77 np.savetxt(args.outfile, outp, fmt="%s", delimiter="\t",
0e330829de40 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents: 0
diff changeset
78 header="smiles\tcompound_id\tsygma_score", comments="")
0
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
79
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
80 if __name__ == "__main__":
a2369e86bc48 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff changeset
81 main()