Mercurial > repos > bgruening > sucos_clustering
comparison sucos_max.py @ 4:791c86130585 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit c35334ca80c87a5078da1a6df85b34e23b80d837"
author | bgruening |
---|---|
date | Wed, 15 Apr 2020 09:26:30 -0400 |
parents | bd12f4b4c3a8 |
children | b8725fec8c7b |
comparison
equal
deleted
inserted
replaced
3:bd12f4b4c3a8 | 4:791c86130585 |
---|---|
37 import sucos, utils | 37 import sucos, utils |
38 import argparse, gzip, os | 38 import argparse, gzip, os |
39 from rdkit import Chem | 39 from rdkit import Chem |
40 | 40 |
41 | 41 |
42 def process(inputfilename, clusterfilenames, outputfilename): | 42 def process(inputfilename, clusterfilenames, outputfilename, filter_value, filter_field): |
43 all_clusters = {} | 43 all_clusters = {} |
44 for filename in clusterfilenames: | 44 for filename in clusterfilenames: |
45 cluster = [] | 45 cluster = [] |
46 cluster_file = utils.open_file_for_reading(filename) | 46 cluster_file = utils.open_file_for_reading(filename) |
47 suppl = Chem.ForwardSDMolSupplier(cluster_file) | 47 suppl = Chem.ForwardSDMolSupplier(cluster_file) |
78 except: | 78 except: |
79 utils.log("WARNING: failed to generate features for molecule", mol_num, "in input") | 79 utils.log("WARNING: failed to generate features for molecule", mol_num, "in input") |
80 continue | 80 continue |
81 scores_max = [0, 0, 0] | 81 scores_max = [0, 0, 0] |
82 scores_cum = [0, 0, 0] | 82 scores_cum = [0, 0, 0] |
83 cluster_name = None | |
83 for clusterfilename in all_clusters: | 84 for clusterfilename in all_clusters: |
84 cluster = all_clusters[clusterfilename] | 85 cluster = all_clusters[clusterfilename] |
85 index = 0 | 86 index = 0 |
86 for entry in cluster: | 87 for entry in cluster: |
87 hit = entry[0] | 88 hit = entry[0] |
102 scores_cum[0] += sucos_score | 103 scores_cum[0] += sucos_score |
103 scores_cum[1] += fm_score | 104 scores_cum[1] += fm_score |
104 scores_cum[2] += vol_score | 105 scores_cum[2] += vol_score |
105 | 106 |
106 | 107 |
107 cluster_file_name_only = cluster_name.split(os.sep)[-1] | |
108 | |
109 # utils.log("Max SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2],"File:", cluster_file_name_only, "Index:", cluster_index) | 108 # utils.log("Max SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2],"File:", cluster_file_name_only, "Index:", cluster_index) |
110 mol.SetDoubleProp("Max_SuCOS_Score", scores_max[0] if scores_max[0] > 0 else 0) | 109 mol.SetDoubleProp("Max_SuCOS_Score", scores_max[0] if scores_max[0] > 0 else 0) |
111 mol.SetDoubleProp("Max_SuCOS_FeatureMap_Score", scores_max[1] if scores_max[1] > 0 else 0) | 110 mol.SetDoubleProp("Max_SuCOS_FeatureMap_Score", scores_max[1] if scores_max[1] > 0 else 0) |
112 mol.SetDoubleProp("Max_SuCOS_Protrude_Score", scores_max[2] if scores_max[2] > 0 else 0) | 111 mol.SetDoubleProp("Max_SuCOS_Protrude_Score", scores_max[2] if scores_max[2] > 0 else 0) |
113 mol.SetProp("Max_SuCOS_Cluster", cluster_file_name_only) | 112 |
114 mol.SetIntProp("Max_SuCOS_Index", cluster_index) | 113 if cluster_name: |
114 cluster_file_name_only = cluster_name.split(os.sep)[-1] | |
115 mol.SetProp("Max_SuCOS_Cluster", cluster_file_name_only) | |
116 mol.SetIntProp("Max_SuCOS_Index", cluster_index) | |
115 | 117 |
116 # utils.log("Cum SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2]) | 118 # utils.log("Cum SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2]) |
117 mol.SetDoubleProp("Cum_SuCOS_Score", scores_cum[0] if scores_cum[0] > 0 else 0) | 119 mol.SetDoubleProp("Cum_SuCOS_Score", scores_cum[0] if scores_cum[0] > 0 else 0) |
118 mol.SetDoubleProp("Cum_SuCOS_FeatureMap_Score", scores_cum[1] if scores_cum[1] > 0 else 0) | 120 mol.SetDoubleProp("Cum_SuCOS_FeatureMap_Score", scores_cum[1] if scores_cum[1] > 0 else 0) |
119 mol.SetDoubleProp("Cum_SuCOS_Protrude_Score", scores_cum[2] if scores_cum[2] > 0 else 0) | 121 mol.SetDoubleProp("Cum_SuCOS_Protrude_Score", scores_cum[2] if scores_cum[2] > 0 else 0) |
120 | 122 |
121 writer.write(mol) | 123 if filter_value and filter_field: |
124 if mol.HasProp(filter_field): | |
125 val = mol.GetDoubleProp(filter_field) | |
126 if val > filter_value: | |
127 writer.write(mol) | |
128 else: | |
129 writer.write(mol) | |
122 | 130 |
123 input_file.close() | 131 input_file.close() |
124 writer.flush() | 132 writer.flush() |
125 writer.close() | 133 writer.close() |
126 output_file.close() | 134 output_file.close() |
133 def main(): | 141 def main(): |
134 parser = argparse.ArgumentParser(description='Max SuCOS scores with RDKit') | 142 parser = argparse.ArgumentParser(description='Max SuCOS scores with RDKit') |
135 parser.add_argument('-i', '--input', help='Input file to score in SDF format. Can be gzipped (*.gz).') | 143 parser.add_argument('-i', '--input', help='Input file to score in SDF format. Can be gzipped (*.gz).') |
136 parser.add_argument('-o', '--output', help='Output file in SDF format. Can be gzipped (*.gz).') | 144 parser.add_argument('-o', '--output', help='Output file in SDF format. Can be gzipped (*.gz).') |
137 parser.add_argument('clusters', nargs='*', help="One or more SDF files with the clustered hits") | 145 parser.add_argument('clusters', nargs='*', help="One or more SDF files with the clustered hits") |
146 parser.add_argument('--filter-value', type=float, help='Filter out values with scores less than this.') | |
147 parser.add_argument('--filter-field', help='Field to use to filter values.') | |
138 | 148 |
139 args = parser.parse_args() | 149 args = parser.parse_args() |
140 utils.log("Max SuCOS Args: ", args) | 150 utils.log("Max SuCOS Args: ", args) |
141 | 151 |
142 process(args.input, args.clusters, args.output) | 152 process(args.input, args.clusters, args.output, args.filter_value, args.filter_field) |
143 | 153 |
144 | 154 |
145 if __name__ == "__main__": | 155 if __name__ == "__main__": |
146 main() | 156 main() |