comparison sucos_max.py @ 6:4f1896782f7c draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit 05dc325ce687441e5d3bdbdedcc0e3529cd5e070"
author bgruening
date Wed, 14 Apr 2021 09:31:11 +0000
parents a574f6e8b909
children
comparison
equal deleted inserted replaced
5:fe318c648502 6:4f1896782f7c
32 SuCOS is the work of Susan Leung. 32 SuCOS is the work of Susan Leung.
33 GitHub: https://github.com/susanhleung/SuCOS 33 GitHub: https://github.com/susanhleung/SuCOS
34 Publication: https://doi.org/10.26434/chemrxiv.8100203.v1 34 Publication: https://doi.org/10.26434/chemrxiv.8100203.v1
35 """ 35 """
36 36
37 import sucos, utils 37 import argparse
38 import argparse, gzip, os 38 import os
39
40 import sucos
41 import utils
39 from rdkit import Chem 42 from rdkit import Chem
40 43
41 44
42 def process(inputfilename, clusterfilenames, outputfilename, filter_value, filter_field): 45 def process(
46 inputfilename, clusterfilenames, outputfilename, filter_value, filter_field
47 ):
43 all_clusters = {} 48 all_clusters = {}
44 for filename in clusterfilenames: 49 for filename in clusterfilenames:
45 cluster = [] 50 cluster = []
46 cluster_file = utils.open_file_for_reading(filename) 51 cluster_file = utils.open_file_for_reading(filename)
47 suppl = Chem.ForwardSDMolSupplier(cluster_file) 52 suppl = Chem.ForwardSDMolSupplier(cluster_file)
48 i = 0 53 i = 0
49 for mol in suppl: 54 for mol in suppl:
50 i += 1 55 i += 1
51 if not mol: 56 if not mol:
52 utils.log("WARNING: failed to generate molecule", i, "in cluster", filename) 57 utils.log(
58 "WARNING: failed to generate molecule", i, "in cluster", filename
59 )
53 continue 60 continue
54 try: 61 try:
55 features = sucos.getRawFeatures(mol) 62 features = sucos.getRawFeatures(mol)
56 cluster.append((mol, features)) 63 cluster.append((mol, features))
57 except: 64 except Exception:
58 utils.log("WARNING: failed to generate features for molecule", i, "in cluster", filename) 65 utils.log(
66 "WARNING: failed to generate features for molecule",
67 i,
68 "in cluster",
69 filename,
70 )
59 71
60 cluster_file.close() 72 cluster_file.close()
61 all_clusters[filename] = cluster 73 all_clusters[filename] = cluster
62 74
63 input_file = utils.open_file_for_reading(inputfilename) 75 input_file = utils.open_file_for_reading(inputfilename)
73 if not mol: 85 if not mol:
74 utils.log("WARNING: failed to generate molecule", mol_num, "in input") 86 utils.log("WARNING: failed to generate molecule", mol_num, "in input")
75 continue 87 continue
76 try: 88 try:
77 query_features = sucos.getRawFeatures(mol) 89 query_features = sucos.getRawFeatures(mol)
78 except: 90 except Exception:
79 utils.log("WARNING: failed to generate features for molecule", mol_num, "in input") 91 utils.log(
92 "WARNING: failed to generate features for molecule", mol_num, "in input"
93 )
80 continue 94 continue
81 scores_max = [0, 0, 0] 95 scores_max = [0, 0, 0]
82 scores_cum = [0, 0, 0] 96 scores_cum = [0, 0, 0]
83 cluster_name = None 97 cluster_name = None
84 for clusterfilename in all_clusters: 98 for clusterfilename in all_clusters:
87 for entry in cluster: 101 for entry in cluster:
88 hit = entry[0] 102 hit = entry[0]
89 ref_features = entry[1] 103 ref_features = entry[1]
90 index += 1 104 index += 1
91 comparisons += 1 105 comparisons += 1
92 sucos_score, fm_score, vol_score = sucos.get_SucosScore(hit, mol, 106 sucos_score, fm_score, vol_score = sucos.get_SucosScore(
93 tani=False, ref_features=ref_features, 107 hit,
94 query_features=query_features) 108 mol,
109 tani=False,
110 ref_features=ref_features,
111 query_features=query_features,
112 )
95 113
96 if sucos_score > scores_max[0]: 114 if sucos_score > scores_max[0]:
97 scores_max[0] = sucos_score 115 scores_max[0] = sucos_score
98 scores_max[1] = fm_score 116 scores_max[1] = fm_score
99 scores_max[2] = vol_score 117 scores_max[2] = vol_score
102 120
103 scores_cum[0] += sucos_score 121 scores_cum[0] += sucos_score
104 scores_cum[1] += fm_score 122 scores_cum[1] += fm_score
105 scores_cum[2] += vol_score 123 scores_cum[2] += vol_score
106 124
107
108 # utils.log("Max SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2],"File:", cluster_file_name_only, "Index:", cluster_index) 125 # utils.log("Max SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2],"File:", cluster_file_name_only, "Index:", cluster_index)
109 mol.SetDoubleProp("Max_SuCOS_Score", scores_max[0] if scores_max[0] > 0 else 0) 126 mol.SetDoubleProp("Max_SuCOS_Score", scores_max[0] if scores_max[0] > 0 else 0)
110 mol.SetDoubleProp("Max_SuCOS_FeatureMap_Score", scores_max[1] if scores_max[1] > 0 else 0) 127 mol.SetDoubleProp(
111 mol.SetDoubleProp("Max_SuCOS_Protrude_Score", scores_max[2] if scores_max[2] > 0 else 0) 128 "Max_SuCOS_FeatureMap_Score", scores_max[1] if scores_max[1] > 0 else 0
129 )
130 mol.SetDoubleProp(
131 "Max_SuCOS_Protrude_Score", scores_max[2] if scores_max[2] > 0 else 0
132 )
112 133
113 if cluster_name: 134 if cluster_name:
114 cluster_file_name_only = cluster_name.split(os.sep)[-1] 135 cluster_file_name_only = cluster_name.split(os.sep)[-1]
115 mol.SetProp("Max_SuCOS_Cluster", cluster_file_name_only) 136 mol.SetProp("Max_SuCOS_Cluster", cluster_file_name_only)
116 mol.SetIntProp("Max_SuCOS_Index", cluster_index) 137 mol.SetIntProp("Max_SuCOS_Index", cluster_index)
117 138
118 # utils.log("Cum SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2]) 139 # utils.log("Cum SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2])
119 mol.SetDoubleProp("Cum_SuCOS_Score", scores_cum[0] if scores_cum[0] > 0 else 0) 140 mol.SetDoubleProp("Cum_SuCOS_Score", scores_cum[0] if scores_cum[0] > 0 else 0)
120 mol.SetDoubleProp("Cum_SuCOS_FeatureMap_Score", scores_cum[1] if scores_cum[1] > 0 else 0) 141 mol.SetDoubleProp(
121 mol.SetDoubleProp("Cum_SuCOS_Protrude_Score", scores_cum[2] if scores_cum[2] > 0 else 0) 142 "Cum_SuCOS_FeatureMap_Score", scores_cum[1] if scores_cum[1] > 0 else 0
143 )
144 mol.SetDoubleProp(
145 "Cum_SuCOS_Protrude_Score", scores_cum[2] if scores_cum[2] > 0 else 0
146 )
122 147
123 if filter_value and filter_field: 148 if filter_value and filter_field:
124 if mol.HasProp(filter_field): 149 if mol.HasProp(filter_field):
125 val = mol.GetDoubleProp(filter_field) 150 val = mol.GetDoubleProp(filter_field)
126 if val > filter_value: 151 if val > filter_value:
134 output_file.close() 159 output_file.close()
135 160
136 utils.log("Completed", comparisons, "comparisons") 161 utils.log("Completed", comparisons, "comparisons")
137 162
138 163
139 ### start main execution ######################################### 164 # start main execution #########################################
165
140 166
141 def main(): 167 def main():
142 parser = argparse.ArgumentParser(description='Max SuCOS scores with RDKit') 168 parser = argparse.ArgumentParser(description="Max SuCOS scores with RDKit")
143 parser.add_argument('-i', '--input', help='Input file to score in SDF format. Can be gzipped (*.gz).') 169 parser.add_argument(
144 parser.add_argument('-o', '--output', help='Output file in SDF format. Can be gzipped (*.gz).') 170 "-i",
145 parser.add_argument('clusters', nargs='*', help="One or more SDF files with the clustered hits") 171 "--input",
146 parser.add_argument('--filter-value', type=float, help='Filter out values with scores less than this.') 172 help="Input file to score in SDF format. Can be gzipped (*.gz).",
147 parser.add_argument('--filter-field', help='Field to use to filter values.') 173 )
174 parser.add_argument(
175 "-o", "--output", help="Output file in SDF format. Can be gzipped (*.gz)."
176 )
177 parser.add_argument(
178 "clusters", nargs="*", help="One or more SDF files with the clustered hits"
179 )
180 parser.add_argument(
181 "--filter-value",
182 type=float,
183 help="Filter out values with scores less than this.",
184 )
185 parser.add_argument("--filter-field", help="Field to use to filter values.")
148 186
149 args = parser.parse_args() 187 args = parser.parse_args()
150 utils.log("Max SuCOS Args: ", args) 188 utils.log("Max SuCOS Args: ", args)
151 189
152 process(args.input, args.clusters, args.output, args.filter_value, args.filter_field) 190 process(
191 args.input, args.clusters, args.output, args.filter_value, args.filter_field
192 )
153 193
154 194
155 if __name__ == "__main__": 195 if __name__ == "__main__":
156 main() 196 main()