Mercurial > repos > bgruening > sygma
annotate sygma_metabolites.py @ 1:0e330829de40 draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
author | bgruening |
---|---|
date | Sun, 15 Mar 2020 13:18:38 -0400 |
parents | a2369e86bc48 |
children |
rev | line source |
---|---|
0
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
2 |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
3 import argparse |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
4 import csv |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
5 import sygma |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
6 import numpy as np |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
7 from rdkit import Chem |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
8 from rdkit.Chem.rdmolfiles import SDMolSupplier, SmilesMolSupplier |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
9 |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
10 def mol_supplier(filename, ext): |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
11 """ |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
12 Based on the file extension, use the appropriate RDKit function to |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
13 load a chemical data file (SMILES or SDF) containing multiple molecules |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
14 and return a list of RDKit Mol objects |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
15 """ |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
16 if ext == 'sdf': |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
17 return [n for n in SDMolSupplier(filename)] |
1
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
18 with open(filename) as f: |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
19 mols = f.read().split('\n') |
0
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
20 if ext == 'smi' or ext == 'inchi': |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
21 return [Chem.MolFromSmiles(mol, sanitize=True) for mol in mols if mol != ''] |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
22 |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
23 def predict_metabolites(parent, phase1_cycles, phase2_cycles): |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
24 """ |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
25 Prediction of metabolites derived from a parent molecule |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
26 """ |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
27 scenario = sygma.Scenario([ |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
28 [sygma.ruleset['phase1'], int(phase1_cycles)], |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
29 [sygma.ruleset['phase2'], int(phase2_cycles)]]) |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
30 metabolic_tree = scenario.run(parent) |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
31 metabolic_tree.calc_scores() |
1
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
32 return metabolic_tree.to_list() |
0
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
33 |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
34 |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
35 def main(): |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
36 parser = argparse.ArgumentParser() |
1
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
37 parser.add_argument("-i", "--infile", required=True, help="Path to the input file.") |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
38 parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.") |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
39 parser.add_argument("--iformat", required=True, help="Specify the input file format.") |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
40 parser.add_argument("--phase1", required=True, help="Number of phase1 cycles.") |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
41 parser.add_argument("--phase2", required=True, help="Number of phase2 cycles.") |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
42 parser.add_argument("--detailed", dest="detailed", |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
43 action="store_true", help="Returns more detailed output") |
0
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
44 args = parser.parse_args() |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
45 |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
46 mols = mol_supplier(args.infile, args.iformat) |
1
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
47 if args.detailed: |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
48 outp = np.zeros((0,6)) |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
49 else: |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
50 outp = np.zeros((0,3)) |
0
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
51 for n in range(len(mols)): |
1
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
52 metabs = predict_metabolites(mols[n], args.phase1, args.phase2) |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
53 for entry in range(len(metabs)): |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
54 smiles = Chem.MolToSmiles(metabs[entry]['SyGMa_metabolite']) |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
55 if args.detailed: |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
56 out = np.column_stack(( |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
57 smiles, # SMILES |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
58 'SYGMA{}MOL{}'.format(n, entry), # SMILES label |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
59 np.round(np.array(metabs[entry]['SyGMa_score'], dtype=float), |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
60 decimals=5), # score rounded to 5 dp |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
61 Chem.rdMolDescriptors.CalcMolFormula(Chem.MolFromSmiles(smiles)), # Molecular formula |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
62 len(metabs[entry]["SyGMa_pathway"].split("\n")), # SyGMa_n Sygma pathway length |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
63 metabs[entry]["SyGMa_pathway"].replace("\n", "") # SyGMa pathway |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
64 )) |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
65 else: |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
66 out = np.column_stack(( |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
67 smiles, # SMILES |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
68 'SYGMA{}MOL{}'.format(n, entry), # SMILES label |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
69 np.round(np.array(metabs[entry]['SyGMa_score'], dtype=float), |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
70 decimals=5) # score rounded to 5 dp |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
71 )) |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
72 outp = np.vstack((outp, out)) |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
73 if args.detailed: |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
74 np.savetxt(args.outfile, outp, fmt="%s", delimiter="\t", |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
75 header="smiles\tcompound_id\tsygma_score\tmolecular_formula\tsygma_n\tsygma_pathway", comments="") |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
76 else: |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
77 np.savetxt(args.outfile, outp, fmt="%s", delimiter="\t", |
0e330829de40
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
bgruening
parents:
0
diff
changeset
|
78 header="smiles\tcompound_id\tsygma_score", comments="") |
0
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
79 |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
80 if __name__ == "__main__": |
a2369e86bc48
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 2bf5c0cc96e8768a36219297eab1e6cf3766651e"
bgruening
parents:
diff
changeset
|
81 main() |