Mercurial > repos > bgruening > sygma
view sygma_metabolites.py @ 1:0e330829de40 draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sygma commit 5b2d7437ba0875c0913630fd2165c82ed933422c"
author | bgruening |
---|---|
date | Sun, 15 Mar 2020 13:18:38 -0400 |
parents | a2369e86bc48 |
children |
line wrap: on
line source
#!/usr/bin/env python3 import argparse import csv import sygma import numpy as np from rdkit import Chem from rdkit.Chem.rdmolfiles import SDMolSupplier, SmilesMolSupplier def mol_supplier(filename, ext): """ Based on the file extension, use the appropriate RDKit function to load a chemical data file (SMILES or SDF) containing multiple molecules and return a list of RDKit Mol objects """ if ext == 'sdf': return [n for n in SDMolSupplier(filename)] with open(filename) as f: mols = f.read().split('\n') if ext == 'smi' or ext == 'inchi': return [Chem.MolFromSmiles(mol, sanitize=True) for mol in mols if mol != ''] def predict_metabolites(parent, phase1_cycles, phase2_cycles): """ Prediction of metabolites derived from a parent molecule """ scenario = sygma.Scenario([ [sygma.ruleset['phase1'], int(phase1_cycles)], [sygma.ruleset['phase2'], int(phase2_cycles)]]) metabolic_tree = scenario.run(parent) metabolic_tree.calc_scores() return metabolic_tree.to_list() def main(): parser = argparse.ArgumentParser() parser.add_argument("-i", "--infile", required=True, help="Path to the input file.") parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.") parser.add_argument("--iformat", required=True, help="Specify the input file format.") parser.add_argument("--phase1", required=True, help="Number of phase1 cycles.") parser.add_argument("--phase2", required=True, help="Number of phase2 cycles.") parser.add_argument("--detailed", dest="detailed", action="store_true", help="Returns more detailed output") args = parser.parse_args() mols = mol_supplier(args.infile, args.iformat) if args.detailed: outp = np.zeros((0,6)) else: outp = np.zeros((0,3)) for n in range(len(mols)): metabs = predict_metabolites(mols[n], args.phase1, args.phase2) for entry in range(len(metabs)): smiles = Chem.MolToSmiles(metabs[entry]['SyGMa_metabolite']) if args.detailed: out = np.column_stack(( smiles, # SMILES 'SYGMA{}MOL{}'.format(n, entry), # SMILES label np.round(np.array(metabs[entry]['SyGMa_score'], dtype=float), decimals=5), # score rounded to 5 dp Chem.rdMolDescriptors.CalcMolFormula(Chem.MolFromSmiles(smiles)), # Molecular formula len(metabs[entry]["SyGMa_pathway"].split("\n")), # SyGMa_n Sygma pathway length metabs[entry]["SyGMa_pathway"].replace("\n", "") # SyGMa pathway )) else: out = np.column_stack(( smiles, # SMILES 'SYGMA{}MOL{}'.format(n, entry), # SMILES label np.round(np.array(metabs[entry]['SyGMa_score'], dtype=float), decimals=5) # score rounded to 5 dp )) outp = np.vstack((outp, out)) if args.detailed: np.savetxt(args.outfile, outp, fmt="%s", delimiter="\t", header="smiles\tcompound_id\tsygma_score\tmolecular_formula\tsygma_n\tsygma_pathway", comments="") else: np.savetxt(args.outfile, outp, fmt="%s", delimiter="\t", header="smiles\tcompound_id\tsygma_score", comments="") if __name__ == "__main__": main()