annotate get_clusters.py @ 1:d9f8cc3258f9 draft

"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
author chemteam
date Mon, 24 Aug 2020 06:09:11 -0400
parents
children 9ca30ad95444
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
1 import argparse
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
2 import collections
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
3 import json
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
4
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
5 import numpy as np
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
6
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
7 from scipy.cluster.hierarchy import fcluster
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
8
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
9
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
10 def separate_clusters(Z_fpath, threshold, min_members, output):
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
11 Z = np.loadtxt(Z_fpath)
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
12 branch_assignments = fcluster(Z, threshold, criterion='distance')
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
13 cluster_dict = collections.defaultdict(list)
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
14 for n, val in enumerate(branch_assignments):
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
15 cluster_dict[branch_assignments[n]].append(n)
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
16 cluster_dict = {int(k): v for k, v in cluster_dict.items()
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
17 if len(v) >= min_members}
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
18 with open(output, 'w') as f:
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
19 json.dump(cluster_dict, f, indent=4, sort_keys=True)
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
20
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
21
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
22 def main():
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
23 parser = argparse.ArgumentParser()
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
24 parser.add_argument('--Z', required=True,
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
25 help='File for cluster linkage array.')
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
26 parser.add_argument('--threshold', type=int, required=True,
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
27 help='Distance cutoff.')
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
28 parser.add_argument('--min-members', type=int, required=True,
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
29 help='Minimum number of members of the cluster.')
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
30 parser.add_argument('--output', required=True,
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
31 help='Output file.')
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
32 args = parser.parse_args()
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
33
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
34 separate_clusters(args.Z, args.threshold,
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
35 args.min_members, args.output)
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
36
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
37
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
38 if __name__ == "__main__":
d9f8cc3258f9 "planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff changeset
39 main()