Mercurial > repos > bgruening > sygma
comparison sygma_metabolites.py @ 1:0e330829de40 draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
author | bgruening |
---|---|
date | Sun, 15 Mar 2020 13:18:38 -0400 |
parents | a2369e86bc48 |
children |
comparison
equal
deleted
inserted
replaced
0:a2369e86bc48 | 1:0e330829de40 |
---|---|
13 load a chemical data file (SMILES or SDF) containing multiple molecules | 13 load a chemical data file (SMILES or SDF) containing multiple molecules |
14 and return a list of RDKit Mol objects | 14 and return a list of RDKit Mol objects |
15 """ | 15 """ |
16 if ext == 'sdf': | 16 if ext == 'sdf': |
17 return [n for n in SDMolSupplier(filename)] | 17 return [n for n in SDMolSupplier(filename)] |
18 with open(filename) as f: | 18 with open(filename) as f: |
19 mols = f.read().split('\n') | 19 mols = f.read().split('\n') |
20 if ext == 'smi' or ext == 'inchi': | 20 if ext == 'smi' or ext == 'inchi': |
21 return [Chem.MolFromSmiles(mol, sanitize=True) for mol in mols if mol != ''] | 21 return [Chem.MolFromSmiles(mol, sanitize=True) for mol in mols if mol != ''] |
22 | 22 |
23 def predict_metabolites(parent, phase1_cycles, phase2_cycles): | 23 def predict_metabolites(parent, phase1_cycles, phase2_cycles): |
24 """ | 24 """ |
27 scenario = sygma.Scenario([ | 27 scenario = sygma.Scenario([ |
28 [sygma.ruleset['phase1'], int(phase1_cycles)], | 28 [sygma.ruleset['phase1'], int(phase1_cycles)], |
29 [sygma.ruleset['phase2'], int(phase2_cycles)]]) | 29 [sygma.ruleset['phase2'], int(phase2_cycles)]]) |
30 metabolic_tree = scenario.run(parent) | 30 metabolic_tree = scenario.run(parent) |
31 metabolic_tree.calc_scores() | 31 metabolic_tree.calc_scores() |
32 return metabolic_tree.to_smiles() | 32 return metabolic_tree.to_list() |
33 | 33 |
34 | 34 |
35 def main(): | 35 def main(): |
36 parser = argparse.ArgumentParser() | 36 parser = argparse.ArgumentParser() |
37 parser.add_argument('-i', '--infile', required=True, help='Path to the input file.') | 37 parser.add_argument("-i", "--infile", required=True, help="Path to the input file.") |
38 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') | 38 parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.") |
39 parser.add_argument("--iformat", help="Specify the input file format.") | 39 parser.add_argument("--iformat", required=True, help="Specify the input file format.") |
40 parser.add_argument("--phase1", help="Number of phase1 cycles.") | 40 parser.add_argument("--phase1", required=True, help="Number of phase1 cycles.") |
41 parser.add_argument("--phase2", help="Number of phase2 cycles.") | 41 parser.add_argument("--phase2", required=True, help="Number of phase2 cycles.") |
42 parser.add_argument("--detailed", dest="detailed", | |
43 action="store_true", help="Returns more detailed output") | |
42 args = parser.parse_args() | 44 args = parser.parse_args() |
43 | 45 |
44 mols = mol_supplier(args.infile, args.iformat) | 46 mols = mol_supplier(args.infile, args.iformat) |
45 outp = np.zeros((0,3)) | 47 if args.detailed: |
48 outp = np.zeros((0,6)) | |
49 else: | |
50 outp = np.zeros((0,3)) | |
46 for n in range(len(mols)): | 51 for n in range(len(mols)): |
47 metabs = np.array(predict_metabolites(mols[n], args.phase1, args.phase2)) | 52 metabs = predict_metabolites(mols[n], args.phase1, args.phase2) |
48 metabs = np.column_stack(( | 53 for entry in range(len(metabs)): |
49 metabs[:,0], # SMILES | 54 smiles = Chem.MolToSmiles(metabs[entry]['SyGMa_metabolite']) |
50 ['SYGMA{}MOL{}'.format(n, m) for m in range(metabs.shape[0])], # SMILES label | 55 if args.detailed: |
51 np.round(np.array(metabs[:,1], dtype=float), decimals=5) # score rounded to 5 dp | 56 out = np.column_stack(( |
52 )) | 57 smiles, # SMILES |
53 outp = np.vstack((outp, metabs)) | 58 'SYGMA{}MOL{}'.format(n, entry), # SMILES label |
54 np.savetxt(args.outfile, outp, fmt="%s") | 59 np.round(np.array(metabs[entry]['SyGMa_score'], dtype=float), |
55 | 60 decimals=5), # score rounded to 5 dp |
61 Chem.rdMolDescriptors.CalcMolFormula(Chem.MolFromSmiles(smiles)), # Molecular formula | |
62 len(metabs[entry]["SyGMa_pathway"].split("\n")), # SyGMa_n Sygma pathway length | |
63 metabs[entry]["SyGMa_pathway"].replace("\n", "") # SyGMa pathway | |
64 )) | |
65 else: | |
66 out = np.column_stack(( | |
67 smiles, # SMILES | |
68 'SYGMA{}MOL{}'.format(n, entry), # SMILES label | |
69 np.round(np.array(metabs[entry]['SyGMa_score'], dtype=float), | |
70 decimals=5) # score rounded to 5 dp | |
71 )) | |
72 outp = np.vstack((outp, out)) | |
73 if args.detailed: | |
74 np.savetxt(args.outfile, outp, fmt="%s", delimiter="\t", | |
75 header="smiles\tcompound_id\tsygma_score\tmolecular_formula\tsygma_n\tsygma_pathway", comments="") | |
76 else: | |
77 np.savetxt(args.outfile, outp, fmt="%s", delimiter="\t", | |
78 header="smiles\tcompound_id\tsygma_score", comments="") | |
56 | 79 |
57 if __name__ == "__main__": | 80 if __name__ == "__main__": |
58 main() | 81 main() |