Mercurial > repos > bgruening > sucos_clustering
comparison sucos_max.py @ 6:b8725fec8c7b draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit 05dc325ce687441e5d3bdbdedcc0e3529cd5e070"
author | bgruening |
---|---|
date | Wed, 14 Apr 2021 09:30:48 +0000 |
parents | 791c86130585 |
children |
comparison
equal
deleted
inserted
replaced
5:12725d4b90f3 | 6:b8725fec8c7b |
---|---|
32 SuCOS is the work of Susan Leung. | 32 SuCOS is the work of Susan Leung. |
33 GitHub: https://github.com/susanhleung/SuCOS | 33 GitHub: https://github.com/susanhleung/SuCOS |
34 Publication: https://doi.org/10.26434/chemrxiv.8100203.v1 | 34 Publication: https://doi.org/10.26434/chemrxiv.8100203.v1 |
35 """ | 35 """ |
36 | 36 |
37 import sucos, utils | 37 import argparse |
38 import argparse, gzip, os | 38 import os |
39 | |
40 import sucos | |
41 import utils | |
39 from rdkit import Chem | 42 from rdkit import Chem |
40 | 43 |
41 | 44 |
42 def process(inputfilename, clusterfilenames, outputfilename, filter_value, filter_field): | 45 def process( |
46 inputfilename, clusterfilenames, outputfilename, filter_value, filter_field | |
47 ): | |
43 all_clusters = {} | 48 all_clusters = {} |
44 for filename in clusterfilenames: | 49 for filename in clusterfilenames: |
45 cluster = [] | 50 cluster = [] |
46 cluster_file = utils.open_file_for_reading(filename) | 51 cluster_file = utils.open_file_for_reading(filename) |
47 suppl = Chem.ForwardSDMolSupplier(cluster_file) | 52 suppl = Chem.ForwardSDMolSupplier(cluster_file) |
48 i = 0 | 53 i = 0 |
49 for mol in suppl: | 54 for mol in suppl: |
50 i += 1 | 55 i += 1 |
51 if not mol: | 56 if not mol: |
52 utils.log("WARNING: failed to generate molecule", i, "in cluster", filename) | 57 utils.log( |
58 "WARNING: failed to generate molecule", i, "in cluster", filename | |
59 ) | |
53 continue | 60 continue |
54 try: | 61 try: |
55 features = sucos.getRawFeatures(mol) | 62 features = sucos.getRawFeatures(mol) |
56 cluster.append((mol, features)) | 63 cluster.append((mol, features)) |
57 except: | 64 except Exception: |
58 utils.log("WARNING: failed to generate features for molecule", i, "in cluster", filename) | 65 utils.log( |
66 "WARNING: failed to generate features for molecule", | |
67 i, | |
68 "in cluster", | |
69 filename, | |
70 ) | |
59 | 71 |
60 cluster_file.close() | 72 cluster_file.close() |
61 all_clusters[filename] = cluster | 73 all_clusters[filename] = cluster |
62 | 74 |
63 input_file = utils.open_file_for_reading(inputfilename) | 75 input_file = utils.open_file_for_reading(inputfilename) |
73 if not mol: | 85 if not mol: |
74 utils.log("WARNING: failed to generate molecule", mol_num, "in input") | 86 utils.log("WARNING: failed to generate molecule", mol_num, "in input") |
75 continue | 87 continue |
76 try: | 88 try: |
77 query_features = sucos.getRawFeatures(mol) | 89 query_features = sucos.getRawFeatures(mol) |
78 except: | 90 except Exception: |
79 utils.log("WARNING: failed to generate features for molecule", mol_num, "in input") | 91 utils.log( |
92 "WARNING: failed to generate features for molecule", mol_num, "in input" | |
93 ) | |
80 continue | 94 continue |
81 scores_max = [0, 0, 0] | 95 scores_max = [0, 0, 0] |
82 scores_cum = [0, 0, 0] | 96 scores_cum = [0, 0, 0] |
83 cluster_name = None | 97 cluster_name = None |
84 for clusterfilename in all_clusters: | 98 for clusterfilename in all_clusters: |
87 for entry in cluster: | 101 for entry in cluster: |
88 hit = entry[0] | 102 hit = entry[0] |
89 ref_features = entry[1] | 103 ref_features = entry[1] |
90 index += 1 | 104 index += 1 |
91 comparisons += 1 | 105 comparisons += 1 |
92 sucos_score, fm_score, vol_score = sucos.get_SucosScore(hit, mol, | 106 sucos_score, fm_score, vol_score = sucos.get_SucosScore( |
93 tani=False, ref_features=ref_features, | 107 hit, |
94 query_features=query_features) | 108 mol, |
109 tani=False, | |
110 ref_features=ref_features, | |
111 query_features=query_features, | |
112 ) | |
95 | 113 |
96 if sucos_score > scores_max[0]: | 114 if sucos_score > scores_max[0]: |
97 scores_max[0] = sucos_score | 115 scores_max[0] = sucos_score |
98 scores_max[1] = fm_score | 116 scores_max[1] = fm_score |
99 scores_max[2] = vol_score | 117 scores_max[2] = vol_score |
102 | 120 |
103 scores_cum[0] += sucos_score | 121 scores_cum[0] += sucos_score |
104 scores_cum[1] += fm_score | 122 scores_cum[1] += fm_score |
105 scores_cum[2] += vol_score | 123 scores_cum[2] += vol_score |
106 | 124 |
107 | |
108 # utils.log("Max SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2],"File:", cluster_file_name_only, "Index:", cluster_index) | 125 # utils.log("Max SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2],"File:", cluster_file_name_only, "Index:", cluster_index) |
109 mol.SetDoubleProp("Max_SuCOS_Score", scores_max[0] if scores_max[0] > 0 else 0) | 126 mol.SetDoubleProp("Max_SuCOS_Score", scores_max[0] if scores_max[0] > 0 else 0) |
110 mol.SetDoubleProp("Max_SuCOS_FeatureMap_Score", scores_max[1] if scores_max[1] > 0 else 0) | 127 mol.SetDoubleProp( |
111 mol.SetDoubleProp("Max_SuCOS_Protrude_Score", scores_max[2] if scores_max[2] > 0 else 0) | 128 "Max_SuCOS_FeatureMap_Score", scores_max[1] if scores_max[1] > 0 else 0 |
129 ) | |
130 mol.SetDoubleProp( | |
131 "Max_SuCOS_Protrude_Score", scores_max[2] if scores_max[2] > 0 else 0 | |
132 ) | |
112 | 133 |
113 if cluster_name: | 134 if cluster_name: |
114 cluster_file_name_only = cluster_name.split(os.sep)[-1] | 135 cluster_file_name_only = cluster_name.split(os.sep)[-1] |
115 mol.SetProp("Max_SuCOS_Cluster", cluster_file_name_only) | 136 mol.SetProp("Max_SuCOS_Cluster", cluster_file_name_only) |
116 mol.SetIntProp("Max_SuCOS_Index", cluster_index) | 137 mol.SetIntProp("Max_SuCOS_Index", cluster_index) |
117 | 138 |
118 # utils.log("Cum SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2]) | 139 # utils.log("Cum SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2]) |
119 mol.SetDoubleProp("Cum_SuCOS_Score", scores_cum[0] if scores_cum[0] > 0 else 0) | 140 mol.SetDoubleProp("Cum_SuCOS_Score", scores_cum[0] if scores_cum[0] > 0 else 0) |
120 mol.SetDoubleProp("Cum_SuCOS_FeatureMap_Score", scores_cum[1] if scores_cum[1] > 0 else 0) | 141 mol.SetDoubleProp( |
121 mol.SetDoubleProp("Cum_SuCOS_Protrude_Score", scores_cum[2] if scores_cum[2] > 0 else 0) | 142 "Cum_SuCOS_FeatureMap_Score", scores_cum[1] if scores_cum[1] > 0 else 0 |
143 ) | |
144 mol.SetDoubleProp( | |
145 "Cum_SuCOS_Protrude_Score", scores_cum[2] if scores_cum[2] > 0 else 0 | |
146 ) | |
122 | 147 |
123 if filter_value and filter_field: | 148 if filter_value and filter_field: |
124 if mol.HasProp(filter_field): | 149 if mol.HasProp(filter_field): |
125 val = mol.GetDoubleProp(filter_field) | 150 val = mol.GetDoubleProp(filter_field) |
126 if val > filter_value: | 151 if val > filter_value: |
134 output_file.close() | 159 output_file.close() |
135 | 160 |
136 utils.log("Completed", comparisons, "comparisons") | 161 utils.log("Completed", comparisons, "comparisons") |
137 | 162 |
138 | 163 |
139 ### start main execution ######################################### | 164 # start main execution ######################################### |
165 | |
140 | 166 |
141 def main(): | 167 def main(): |
142 parser = argparse.ArgumentParser(description='Max SuCOS scores with RDKit') | 168 parser = argparse.ArgumentParser(description="Max SuCOS scores with RDKit") |
143 parser.add_argument('-i', '--input', help='Input file to score in SDF format. Can be gzipped (*.gz).') | 169 parser.add_argument( |
144 parser.add_argument('-o', '--output', help='Output file in SDF format. Can be gzipped (*.gz).') | 170 "-i", |
145 parser.add_argument('clusters', nargs='*', help="One or more SDF files with the clustered hits") | 171 "--input", |
146 parser.add_argument('--filter-value', type=float, help='Filter out values with scores less than this.') | 172 help="Input file to score in SDF format. Can be gzipped (*.gz).", |
147 parser.add_argument('--filter-field', help='Field to use to filter values.') | 173 ) |
174 parser.add_argument( | |
175 "-o", "--output", help="Output file in SDF format. Can be gzipped (*.gz)." | |
176 ) | |
177 parser.add_argument( | |
178 "clusters", nargs="*", help="One or more SDF files with the clustered hits" | |
179 ) | |
180 parser.add_argument( | |
181 "--filter-value", | |
182 type=float, | |
183 help="Filter out values with scores less than this.", | |
184 ) | |
185 parser.add_argument("--filter-field", help="Field to use to filter values.") | |
148 | 186 |
149 args = parser.parse_args() | 187 args = parser.parse_args() |
150 utils.log("Max SuCOS Args: ", args) | 188 utils.log("Max SuCOS Args: ", args) |
151 | 189 |
152 process(args.input, args.clusters, args.output, args.filter_value, args.filter_field) | 190 process( |
191 args.input, args.clusters, args.output, args.filter_value, args.filter_field | |
192 ) | |
153 | 193 |
154 | 194 |
155 if __name__ == "__main__": | 195 if __name__ == "__main__": |
156 main() | 196 main() |