Mercurial > repos > bgruening > sucos_clustering
annotate sucos_cluster.py @ 4:791c86130585 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit c35334ca80c87a5078da1a6df85b34e23b80d837"
author | bgruening |
---|---|
date | Wed, 15 Apr 2020 09:26:30 -0400 |
parents | f80cfac80c53 |
children | b8725fec8c7b |
rev | line source |
---|---|
0
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
1 #!/usr/bin/env python |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
2 """ |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
3 Cluster a set of molecules based on their 3D overlays as determined by the SuCOS score. |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
4 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
5 This will generate a set of SD files, one for each cluster of molecules (presumably corresponding to a |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
6 binding pocket in the protein target). |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
7 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
8 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
9 SuCOS is the work of Susan Leung. |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
10 GitHub: https://github.com/susanhleung/SuCOS |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
11 Publication: https://doi.org/10.26434/chemrxiv.8100203.v1 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
12 """ |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
13 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
14 import sucos, utils |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
15 import argparse, gzip |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
16 from rdkit import Chem |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
17 import numpy as np |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
18 import pandas as pd |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
19 from scipy.cluster.hierarchy import linkage, fcluster |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
20 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
21 ### start main execution ######################################### |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
22 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
23 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
24 def calc_distance_matrix(mols): |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
25 """ |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
26 Calculate a full distance matrix for the given molecules. Identical molecules get a score of 0.0 with the maximum |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
27 distance possible being 1.0. |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
28 :param mols: A list of molecules. It must be possible to iterate through this list multiple times |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
29 :return: A NxN 2D array of distance scores, with N being the number of molecules in the input |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
30 """ |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
31 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
32 # TODO - do we need to calculate both sides of the matrix? Tanimoto is supposed to be a symmetric distance measure, |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
33 # but the matrix that is generated does not seem to be symmetric. |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
34 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
35 mol_fm_tuples = [] |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
36 for mol in mols: |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
37 features = sucos.getRawFeatures(mol) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
38 mol_fm_tuples.append((mol, features)) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
39 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
40 matrix = [] |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
41 for tuple1 in mol_fm_tuples: |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
42 tmp = [] |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
43 for tuple2 in mol_fm_tuples: |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
44 if tuple1[0] == tuple2[0]: |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
45 tmp.append(0.0) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
46 else: |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
47 #utils.log("Calculating SuCOS between", mol1, mol2) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
48 sucos_score, fm_score, tani_score = sucos.get_SucosScore(tuple1[0], tuple2[0], |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
49 tani=True, ref_features=tuple1[1], query_features=tuple2[1]) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
50 tmp.append(1.0 - sucos_score) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
51 matrix.append(tmp) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
52 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
53 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
54 return matrix |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
55 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
56 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
57 def cluster(matrix, threshold=0.8): |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
58 """ |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
59 Cluster the supplied distance matrix returning an array of clusters. |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
60 :param matrix: the distance matrix, as calculated with the calc_distance_matrix function. |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
61 :param threshold: The clustering cuttoff. The default of 0.8 is a reasonable value to use. |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
62 :return: An array of clusters, each cluster being an array of the indices from the matrix. |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
63 """ |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
64 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
65 indexes = [x for x in range(0, len(matrix))] |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
66 cols = [x for x in range(0, len(matrix[0]))] |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
67 #utils.log("indexes", indexes) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
68 #utils.log("cols", cols) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
69 df = pd.DataFrame(matrix, columns=cols, index=indexes) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
70 utils.log("DataFrame:", df.shape) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
71 #utils.log(df) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
72 indices = np.triu_indices(df.shape[0], k=1) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
73 #utils.log("Indices:", indices) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
74 t = np.array(df)[indices] |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
75 Z = linkage(t, 'average') |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
76 lig_clusters = [] |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
77 cluster_arr = fcluster(Z, t=threshold, criterion='distance') |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
78 for i in range(np.amax(cluster_arr)): |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
79 clus = df.columns[np.argwhere(cluster_arr==i+1)] |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
80 lig_clusters.append([x[0] for x in clus.tolist()]) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
81 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
82 utils.log("Clusters", lig_clusters) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
83 return lig_clusters |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
84 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
85 def write_clusters_to_sdfs(mols, clusters, basename, gzip=False): |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
86 """ |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
87 Write the molecules to SDF files, 1 file for each cluster. |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
88 :param mols The molecules to write: |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
89 :param clusters The clusters, as returned by the cluster function: |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
90 :param basename The basename for the file name. e.g. if basename is 'output' then files like |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
91 output1.sdf, output2.sdf will be written: |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
92 :param gzip Whether to gzip the output |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
93 :return: |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
94 """ |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
95 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
96 i = 0 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
97 for cluster in clusters: |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
98 i += 1 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
99 filename = basename + str(i) + ".sdf" |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
100 if gzip: |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
101 filename += ".gz" |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
102 utils.log("Writing ", len(cluster), "molecules in cluster", i, "to file", filename) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
103 output_file = utils.open_file_for_writing(filename) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
104 writer = Chem.SDWriter(output_file) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
105 for index in cluster: |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
106 mol = mols[index] |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
107 writer.write(mol) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
108 writer.flush() |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
109 writer.close() |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
110 output_file.close() |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
111 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
112 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
113 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
114 def main(): |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
115 parser = argparse.ArgumentParser(description='Clustering with SuCOS and RDKit') |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
116 parser.add_argument('-i', '--input', help='Input file in SDF format. Can be gzipped (*.gz).') |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
117 parser.add_argument('-o', '--output', default="cluster", help="Base name for output files in SDF format. " + |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
118 "e.g. if value is 'output' then files like output1.sdf, output2.sdf will be created") |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
119 parser.add_argument('--gzip', action='store_true', help='Gzip the outputs generating files like output1.sdf.gz, output2.sdf.gz') |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
120 parser.add_argument('-t', '--threshold', type=float, default=0.8, help='Clustering threshold') |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
121 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
122 args = parser.parse_args() |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
123 utils.log("SuCOS Cluster Args: ", args) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
124 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
125 input_file = utils.open_file_for_reading(args.input) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
126 suppl = Chem.ForwardSDMolSupplier(input_file) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
127 mols = list(suppl) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
128 matrix = calc_distance_matrix(mols) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
129 clusters = cluster(matrix, threshold=args.threshold) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
130 write_clusters_to_sdfs(mols, clusters, args.output, gzip=args.gzip) |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
131 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
132 |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
133 if __name__ == "__main__": |
f80cfac80c53
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit ef86cfa5f7ab5043de420511211579d03df58645"
bgruening
parents:
diff
changeset
|
134 main() |