Mercurial > repos > bgruening > sucos_clustering
comparison sucos_max.py @ 2:58d18838e244 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit 6f1ee2812cca091561a2b2e464498dae2f913b8d"
author | bgruening |
---|---|
date | Thu, 19 Mar 2020 07:25:25 -0400 |
parents | f80cfac80c53 |
children | bd12f4b4c3a8 |
comparison
equal
deleted
inserted
replaced
1:334ad24525db | 2:58d18838e244 |
---|---|
37 import sucos, utils | 37 import sucos, utils |
38 import argparse, gzip, os | 38 import argparse, gzip, os |
39 from rdkit import Chem | 39 from rdkit import Chem |
40 | 40 |
41 | 41 |
42 def process(inputfilename, clusterfilenames, outputfilename, mode): | 42 def process(inputfilename, clusterfilenames, outputfilename): |
43 | |
44 all_clusters = {} | 43 all_clusters = {} |
45 for filename in clusterfilenames: | 44 for filename in clusterfilenames: |
46 cluster = [] | 45 cluster = [] |
47 cluster_file = utils.open_file_for_reading(filename) | 46 cluster_file = utils.open_file_for_reading(filename) |
48 suppl = Chem.ForwardSDMolSupplier(cluster_file) | 47 suppl = Chem.ForwardSDMolSupplier(cluster_file) |
77 try: | 76 try: |
78 query_features = sucos.getRawFeatures(mol) | 77 query_features = sucos.getRawFeatures(mol) |
79 except: | 78 except: |
80 utils.log("WARNING: failed to generate features for molecule", mol_num, "in input") | 79 utils.log("WARNING: failed to generate features for molecule", mol_num, "in input") |
81 continue | 80 continue |
82 scores = [0, 0, 0] | 81 scores_max = [0, 0, 0] |
82 scores_cum = [0, 0, 0] | |
83 for clusterfilename in all_clusters: | 83 for clusterfilename in all_clusters: |
84 cluster = all_clusters[clusterfilename] | 84 cluster = all_clusters[clusterfilename] |
85 index = 0 | 85 index = 0 |
86 for entry in cluster: | 86 for entry in cluster: |
87 hit = entry[0] | 87 hit = entry[0] |
88 ref_features = entry[1] | 88 ref_features = entry[1] |
89 index += 1 | 89 index += 1 |
90 comparisons += 1 | 90 comparisons += 1 |
91 sucos_score, fm_score, vol_score = sucos.get_SucosScore(hit, mol, | 91 sucos_score, fm_score, vol_score = sucos.get_SucosScore(hit, mol, |
92 tani=False, ref_features=ref_features, query_features=query_features) | 92 tani=False, ref_features=ref_features, |
93 if mode == 'max': | 93 query_features=query_features) |
94 if sucos_score > scores[0]: | |
95 scores[0] = sucos_score | |
96 scores[1] = fm_score | |
97 scores[2] = vol_score | |
98 cluster_name = clusterfilename | |
99 cluster_index = index | |
100 elif mode == 'cum': | |
101 scores[0] += sucos_score | |
102 scores[1] += fm_score | |
103 scores[2] += vol_score | |
104 else: | |
105 raise ValueError("Invalid mode: " + mode) | |
106 | 94 |
107 if scores[0] > 0: | 95 if sucos_score > scores_max[0]: |
108 if mode == 'max': | 96 scores_max[0] = sucos_score |
109 cluster_file_name_only = cluster_name.split(os.sep)[-1] | 97 scores_max[1] = fm_score |
110 #utils.log("Max SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2],"File:", cluster_file_name_only, "Index:", cluster_index) | 98 scores_max[2] = vol_score |
111 mol.SetDoubleProp("Max_SuCOS_Score", scores[0]) | 99 cluster_name = clusterfilename |
112 mol.SetDoubleProp("Max_SuCOS_FeatureMap_Score", scores[1]) | 100 cluster_index = index |
113 mol.SetDoubleProp("Max_SuCOS_Protrude_Score", scores[2]) | |
114 mol.SetProp("Max_SuCOS_Cluster", cluster_file_name_only) | |
115 mol.SetIntProp("Max_SuCOS_Index", cluster_index) | |
116 | 101 |
117 else: | 102 scores_cum[0] += sucos_score |
118 #utils.log("Cum SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2]) | 103 scores_cum[1] += fm_score |
119 mol.SetDoubleProp("Cum_SuCOS_Score", scores[0]) | 104 scores_cum[2] += vol_score |
120 mol.SetDoubleProp("Cum_SuCOS_FeatureMap_Score", scores[1]) | |
121 mol.SetDoubleProp("Cum_SuCOS_Protrude_Score", scores[2]) | |
122 | 105 |
123 writer.write(mol) | 106 if scores_max[0] > 0: |
107 cluster_file_name_only = cluster_name.split(os.sep)[-1] | |
108 # utils.log("Max SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2],"File:", cluster_file_name_only, "Index:", cluster_index) | |
109 mol.SetDoubleProp("Max_SuCOS_Score", scores_max[0]) | |
110 mol.SetDoubleProp("Max_SuCOS_FeatureMap_Score", scores_max[1]) | |
111 mol.SetDoubleProp("Max_SuCOS_Protrude_Score", scores_max[2]) | |
112 mol.SetProp("Max_SuCOS_Cluster", cluster_file_name_only) | |
113 mol.SetIntProp("Max_SuCOS_Index", cluster_index) | |
124 | 114 |
125 else: | 115 # utils.log("Cum SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2]) |
126 utils.log("Molecule", mol_num, "did not overlay. Omitting from results") | 116 mol.SetDoubleProp("Cum_SuCOS_Score", scores_cum[0]) |
117 mol.SetDoubleProp("Cum_SuCOS_FeatureMap_Score", scores_cum[1]) | |
118 mol.SetDoubleProp("Cum_SuCOS_Protrude_Score", scores_cum[2]) | |
127 | 119 |
120 writer.write(mol) | |
128 | 121 |
129 input_file.close() | 122 input_file.close() |
130 writer.flush() | 123 writer.flush() |
131 writer.close() | 124 writer.close() |
132 output_file.close() | 125 output_file.close() |
138 | 131 |
139 def main(): | 132 def main(): |
140 parser = argparse.ArgumentParser(description='Max SuCOS scores with RDKit') | 133 parser = argparse.ArgumentParser(description='Max SuCOS scores with RDKit') |
141 parser.add_argument('-i', '--input', help='Input file to score in SDF format. Can be gzipped (*.gz).') | 134 parser.add_argument('-i', '--input', help='Input file to score in SDF format. Can be gzipped (*.gz).') |
142 parser.add_argument('-o', '--output', help='Output file in SDF format. Can be gzipped (*.gz).') | 135 parser.add_argument('-o', '--output', help='Output file in SDF format. Can be gzipped (*.gz).') |
143 parser.add_argument('-m', '--mode', choices=['max', 'cum'], | |
144 default='max', help='Score mode: max = best score, cum = sum of all scores') | |
145 parser.add_argument('clusters', nargs='*', help="One or more SDF files with the clustered hits") | 136 parser.add_argument('clusters', nargs='*', help="One or more SDF files with the clustered hits") |
146 | 137 |
147 args = parser.parse_args() | 138 args = parser.parse_args() |
148 utils.log("Max SuCOS Args: ", args) | 139 utils.log("Max SuCOS Args: ", args) |
149 | 140 |
150 process(args.input, args.clusters, args.output, args.mode) | 141 process(args.input, args.clusters, args.output) |
151 | 142 |
152 | 143 |
153 if __name__ == "__main__": | 144 if __name__ == "__main__": |
154 main() | 145 main() |