comparison sygma_metabolites.py @ 1:0e330829de40 draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
author bgruening
date Sun, 15 Mar 2020 13:18:38 -0400
parents a2369e86bc48
children
comparison
equal deleted inserted replaced
0:a2369e86bc48 1:0e330829de40
13 load a chemical data file (SMILES or SDF) containing multiple molecules 13 load a chemical data file (SMILES or SDF) containing multiple molecules
14 and return a list of RDKit Mol objects 14 and return a list of RDKit Mol objects
15 """ 15 """
16 if ext == 'sdf': 16 if ext == 'sdf':
17 return [n for n in SDMolSupplier(filename)] 17 return [n for n in SDMolSupplier(filename)]
18 with open(filename) as f: 18 with open(filename) as f:
19 mols = f.read().split('\n') 19 mols = f.read().split('\n')
20 if ext == 'smi' or ext == 'inchi': 20 if ext == 'smi' or ext == 'inchi':
21 return [Chem.MolFromSmiles(mol, sanitize=True) for mol in mols if mol != ''] 21 return [Chem.MolFromSmiles(mol, sanitize=True) for mol in mols if mol != '']
22 22
23 def predict_metabolites(parent, phase1_cycles, phase2_cycles): 23 def predict_metabolites(parent, phase1_cycles, phase2_cycles):
24 """ 24 """
27 scenario = sygma.Scenario([ 27 scenario = sygma.Scenario([
28 [sygma.ruleset['phase1'], int(phase1_cycles)], 28 [sygma.ruleset['phase1'], int(phase1_cycles)],
29 [sygma.ruleset['phase2'], int(phase2_cycles)]]) 29 [sygma.ruleset['phase2'], int(phase2_cycles)]])
30 metabolic_tree = scenario.run(parent) 30 metabolic_tree = scenario.run(parent)
31 metabolic_tree.calc_scores() 31 metabolic_tree.calc_scores()
32 return metabolic_tree.to_smiles() 32 return metabolic_tree.to_list()
33 33
34 34
35 def main(): 35 def main():
36 parser = argparse.ArgumentParser() 36 parser = argparse.ArgumentParser()
37 parser.add_argument('-i', '--infile', required=True, help='Path to the input file.') 37 parser.add_argument("-i", "--infile", required=True, help="Path to the input file.")
38 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') 38 parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.")
39 parser.add_argument("--iformat", help="Specify the input file format.") 39 parser.add_argument("--iformat", required=True, help="Specify the input file format.")
40 parser.add_argument("--phase1", help="Number of phase1 cycles.") 40 parser.add_argument("--phase1", required=True, help="Number of phase1 cycles.")
41 parser.add_argument("--phase2", help="Number of phase2 cycles.") 41 parser.add_argument("--phase2", required=True, help="Number of phase2 cycles.")
42 parser.add_argument("--detailed", dest="detailed",
43 action="store_true", help="Returns more detailed output")
42 args = parser.parse_args() 44 args = parser.parse_args()
43 45
44 mols = mol_supplier(args.infile, args.iformat) 46 mols = mol_supplier(args.infile, args.iformat)
45 outp = np.zeros((0,3)) 47 if args.detailed:
48 outp = np.zeros((0,6))
49 else:
50 outp = np.zeros((0,3))
46 for n in range(len(mols)): 51 for n in range(len(mols)):
47 metabs = np.array(predict_metabolites(mols[n], args.phase1, args.phase2)) 52 metabs = predict_metabolites(mols[n], args.phase1, args.phase2)
48 metabs = np.column_stack(( 53 for entry in range(len(metabs)):
49 metabs[:,0], # SMILES 54 smiles = Chem.MolToSmiles(metabs[entry]['SyGMa_metabolite'])
50 ['SYGMA{}MOL{}'.format(n, m) for m in range(metabs.shape[0])], # SMILES label 55 if args.detailed:
51 np.round(np.array(metabs[:,1], dtype=float), decimals=5) # score rounded to 5 dp 56 out = np.column_stack((
52 )) 57 smiles, # SMILES
53 outp = np.vstack((outp, metabs)) 58 'SYGMA{}MOL{}'.format(n, entry), # SMILES label
54 np.savetxt(args.outfile, outp, fmt="%s") 59 np.round(np.array(metabs[entry]['SyGMa_score'], dtype=float),
55 60 decimals=5), # score rounded to 5 dp
61 Chem.rdMolDescriptors.CalcMolFormula(Chem.MolFromSmiles(smiles)), # Molecular formula
62 len(metabs[entry]["SyGMa_pathway"].split("\n")), # SyGMa_n Sygma pathway length
63 metabs[entry]["SyGMa_pathway"].replace("\n", "") # SyGMa pathway
64 ))
65 else:
66 out = np.column_stack((
67 smiles, # SMILES
68 'SYGMA{}MOL{}'.format(n, entry), # SMILES label
69 np.round(np.array(metabs[entry]['SyGMa_score'], dtype=float),
70 decimals=5) # score rounded to 5 dp
71 ))
72 outp = np.vstack((outp, out))
73 if args.detailed:
74 np.savetxt(args.outfile, outp, fmt="%s", delimiter="\t",
75 header="smiles\tcompound_id\tsygma_score\tmolecular_formula\tsygma_n\tsygma_pathway", comments="")
76 else:
77 np.savetxt(args.outfile, outp, fmt="%s", delimiter="\t",
78 header="smiles\tcompound_id\tsygma_score", comments="")
56 79
57 if __name__ == "__main__": 80 if __name__ == "__main__":
58 main() 81 main()