Mercurial > repos > bgruening > chemfp
annotate butina_clustering.py @ 2:70b071de9bee draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
author | bgruening |
---|---|
date | Sat, 20 May 2017 08:31:44 -0400 |
parents | |
children | 3b14765c22ee |
rev | line source |
---|---|
2
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
1 #!/usr/bin/env python |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
2 """ |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
3 Modified version of code examples from the chemfp project. |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
4 http://code.google.com/p/chem-fingerprints/ |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
5 Thanks to Andrew Dalke of Andrew Dalke Scientific! |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
6 """ |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
7 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
8 import chemfp |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
9 import sys |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
10 import os |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
11 import tempfile |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
12 import argparse |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
13 import subprocess |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
14 from chemfp import search |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
15 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
16 def unix_sort(results): |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
17 temp_unsorted = tempfile.NamedTemporaryFile(delete=False) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
18 for (i,indices) in enumerate( results.iter_indices() ): |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
19 temp_unsorted.write('%s %s\n' % (len(indices), i)) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
20 temp_unsorted.close() |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
21 temp_sorted = tempfile.NamedTemporaryFile(delete=False) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
22 temp_sorted.close() |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
23 p = subprocess.Popen(['sort', '-n', '-r', '-k', '1,1'], stdin=open(temp_unsorted.name), stdout=open(temp_sorted.name, 'w+')) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
24 stdout, stderr = p.communicate() |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
25 return_code = p.returncode |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
26 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
27 if return_code: |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
28 sys.stdout.write(stdout) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
29 sys.stderr.write(stderr) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
30 sys.stderr.write("Return error code %i from command:\n" % return_code) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
31 temp_sorted.close() |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
32 os.remove(temp_unsorted.name) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
33 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
34 for line in open(temp_sorted.name): |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
35 size, fp_idx = line.strip().split() |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
36 yield (int(size), int(fp_idx)) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
37 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
38 os.remove(temp_sorted.name) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
39 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
40 def butina( args ): |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
41 """ |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
42 Taylor-Butina clustering from the chemfp help. |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
43 """ |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
44 out = args.output_path |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
45 targets = chemfp.open( args.input_path, format='fps' ) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
46 arena = chemfp.load_fingerprints( targets ) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
47 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
48 chemfp.set_num_threads( args.processors ) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
49 results = search.threshold_tanimoto_search_symmetric(arena, threshold = args.tanimoto_threshold) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
50 results.reorder_all("move-closest-first") |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
51 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
52 sorted_ids = unix_sort(results) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
53 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
54 # Determine the true/false singletons and the clusters |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
55 true_singletons = [] |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
56 false_singletons = [] |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
57 clusters = [] |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
58 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
59 seen = set() |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
60 #for (size, fp_idx, members) in results: |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
61 for (size, fp_idx) in sorted_ids: |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
62 members = results[fp_idx].get_indices() |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
63 #print arena.ids[ fp_idx ], [arena.ids[ m ] for m in members] |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
64 if fp_idx in seen: |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
65 # Can't use a centroid which is already assigned |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
66 continue |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
67 seen.add(fp_idx) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
68 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
69 if size == 0: |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
70 # The only fingerprint in the exclusion sphere is itself |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
71 true_singletons.append( fp_idx ) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
72 continue |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
73 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
74 # Figure out which ones haven't yet been assigned |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
75 unassigned = set(members) - seen |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
76 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
77 if not unassigned: |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
78 false_singletons.append(fp_idx) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
79 continue |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
80 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
81 # this is a new cluster |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
82 clusters.append( (fp_idx, unassigned) ) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
83 seen.update(unassigned) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
84 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
85 len_cluster = len(clusters) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
86 #out.write( "#%s true singletons: %s\n" % ( len(true_singletons), " ".join(sorted(arena.ids[idx] for idx in true_singletons)) ) ) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
87 #out.write( "#%s false singletons: %s\n" % ( len(false_singletons), " ".join(sorted(arena.ids[idx] for idx in false_singletons)) ) ) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
88 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
89 out.write( "#%s true singletons\n" % len(true_singletons) ) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
90 out.write( "#%s false singletons\n" % len(false_singletons) ) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
91 out.write( "#clusters: %s\n" % len_cluster ) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
92 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
93 # Sort so the cluster with the most compounds comes first, |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
94 # then by alphabetically smallest id |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
95 def cluster_sort_key(cluster): |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
96 centroid_idx, members = cluster |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
97 return -len(members), arena.ids[centroid_idx] |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
98 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
99 clusters.sort(key=cluster_sort_key) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
100 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
101 for centroid_idx, members in clusters: |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
102 centroid_name = arena.ids[centroid_idx] |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
103 out.write("%s\t%s\t%s\n" % (centroid_name, len(members), " ".join(arena.ids[idx] for idx in members))) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
104 #ToDo: len(members) need to be some biggest top 90% or something ... |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
105 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
106 for idx in true_singletons: |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
107 out.write("%s\t%s\n" % (arena.ids[idx], 0)) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
108 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
109 out.close() |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
110 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
111 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
112 if __name__ == "__main__": |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
113 parser = argparse.ArgumentParser(description="""Taylor-Butina clustering for fps files. |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
114 For more details please see the original publication or the chemfp documentation: |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
115 http://www.chemomine.co.uk/dbclus-paper.pdf |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
116 https://chemfp.readthedocs.org |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
117 """) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
118 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
119 parser.add_argument("-i", "--input", dest="input_path", |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
120 required=True, |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
121 help="Path to the input file.") |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
122 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
123 parser.add_argument("-o", "--output", dest="output_path", type=argparse.FileType('w'), |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
124 default=sys.stdout, |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
125 help="Path to the output file.") |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
126 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
127 parser.add_argument("-t", "--threshold", dest="tanimoto_threshold", type=float, |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
128 default=0.8, |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
129 help="Tanimoto threshold [0.8]") |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
130 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
131 parser.add_argument('-p', '--processors', type=int, default=4) |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
132 |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
133 options = parser.parse_args() |
70b071de9bee
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
bgruening
parents:
diff
changeset
|
134 butina( options ) |