# HG changeset patch
# User bgruening
# Date 1495299426 14400
# Node ID 57a1a58056a6b95903966aabf9a2f6bf28540b73
# Parent 685a138131f0cc8b55278bf7e02f67836be67eae
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit d786052cd04f8b25eb4aff80b1b9724f62031b61
diff -r 685a138131f0 -r 57a1a58056a6 butina_clustering.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/butina_clustering.py Sat May 20 12:57:06 2017 -0400
@@ -0,0 +1,134 @@
+#!/usr/bin/env python
+"""
+ Modified version of code examples from the chemfp project.
+ http://code.google.com/p/chem-fingerprints/
+ Thanks to Andrew Dalke of Andrew Dalke Scientific!
+"""
+
+import chemfp
+import sys
+import os
+import tempfile
+import argparse
+import subprocess
+from chemfp import search
+
+def unix_sort(results):
+ temp_unsorted = tempfile.NamedTemporaryFile(delete=False)
+ for (i,indices) in enumerate( results.iter_indices() ):
+ temp_unsorted.write('%s %s\n' % (len(indices), i))
+ temp_unsorted.close()
+ temp_sorted = tempfile.NamedTemporaryFile(delete=False)
+ temp_sorted.close()
+ p = subprocess.Popen(['sort', '-n', '-r', '-k', '1,1'], stdin=open(temp_unsorted.name), stdout=open(temp_sorted.name, 'w+'))
+ stdout, stderr = p.communicate()
+ return_code = p.returncode
+
+ if return_code:
+ sys.stdout.write(stdout)
+ sys.stderr.write(stderr)
+ sys.stderr.write("Return error code %i from command:\n" % return_code)
+ temp_sorted.close()
+ os.remove(temp_unsorted.name)
+
+ for line in open(temp_sorted.name):
+ size, fp_idx = line.strip().split()
+ yield (int(size), int(fp_idx))
+
+ os.remove(temp_sorted.name)
+
+def butina( args ):
+ """
+ Taylor-Butina clustering from the chemfp help.
+ """
+ out = args.output_path
+ targets = chemfp.open( args.input_path, format='fps' )
+ arena = chemfp.load_fingerprints( targets )
+
+ chemfp.set_num_threads( args.processors )
+ results = search.threshold_tanimoto_search_symmetric(arena, threshold = args.tanimoto_threshold)
+ results.reorder_all("move-closest-first")
+
+ sorted_ids = unix_sort(results)
+
+ # Determine the true/false singletons and the clusters
+ true_singletons = []
+ false_singletons = []
+ clusters = []
+
+ seen = set()
+ #for (size, fp_idx, members) in results:
+ for (size, fp_idx) in sorted_ids:
+ members = results[fp_idx].get_indices()
+ #print arena.ids[ fp_idx ], [arena.ids[ m ] for m in members]
+ if fp_idx in seen:
+ # Can't use a centroid which is already assigned
+ continue
+ seen.add(fp_idx)
+
+ if size == 0:
+ # The only fingerprint in the exclusion sphere is itself
+ true_singletons.append( fp_idx )
+ continue
+
+ # Figure out which ones haven't yet been assigned
+ unassigned = set(members) - seen
+
+ if not unassigned:
+ false_singletons.append(fp_idx)
+ continue
+
+ # this is a new cluster
+ clusters.append( (fp_idx, unassigned) )
+ seen.update(unassigned)
+
+ len_cluster = len(clusters)
+ #out.write( "#%s true singletons: %s\n" % ( len(true_singletons), " ".join(sorted(arena.ids[idx] for idx in true_singletons)) ) )
+ #out.write( "#%s false singletons: %s\n" % ( len(false_singletons), " ".join(sorted(arena.ids[idx] for idx in false_singletons)) ) )
+
+ out.write( "#%s true singletons\n" % len(true_singletons) )
+ out.write( "#%s false singletons\n" % len(false_singletons) )
+ out.write( "#clusters: %s\n" % len_cluster )
+
+ # Sort so the cluster with the most compounds comes first,
+ # then by alphabetically smallest id
+ def cluster_sort_key(cluster):
+ centroid_idx, members = cluster
+ return -len(members), arena.ids[centroid_idx]
+
+ clusters.sort(key=cluster_sort_key)
+
+ for centroid_idx, members in clusters:
+ centroid_name = arena.ids[centroid_idx]
+ out.write("%s\t%s\t%s\n" % (centroid_name, len(members), " ".join(arena.ids[idx] for idx in members)))
+ #ToDo: len(members) need to be some biggest top 90% or something ...
+
+ for idx in true_singletons:
+ out.write("%s\t%s\n" % (arena.ids[idx], 0))
+
+ out.close()
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="""Taylor-Butina clustering for fps files.
+For more details please see the original publication or the chemfp documentation:
+http://www.chemomine.co.uk/dbclus-paper.pdf
+https://chemfp.readthedocs.org
+""")
+
+ parser.add_argument("-i", "--input", dest="input_path",
+ required=True,
+ help="Path to the input file.")
+
+ parser.add_argument("-o", "--output", dest="output_path", type=argparse.FileType('w'),
+ default=sys.stdout,
+ help="Path to the output file.")
+
+ parser.add_argument("-t", "--threshold", dest="tanimoto_threshold", type=float,
+ default=0.8,
+ help="Tanimoto threshold [0.8]")
+
+ parser.add_argument('-p', '--processors', type=int, default=4)
+
+ options = parser.parse_args()
+ butina( options )
diff -r 685a138131f0 -r 57a1a58056a6 butina_clustering.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/butina_clustering.xml Sat May 20 12:57:06 2017 -0400
@@ -0,0 +1,93 @@
+
+ of molecular fingerprints
+
+ chemfp
+ openbabel
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 0 false singletons
+ =>
+
+ 1 clusters
+ 55091849 has 12 other members
+ => 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091752 55091467 55168823
+
+
+]]>
+
+
+ 10.1186/1758-2946-5-S1-P36
+
+
diff -r 685a138131f0 -r 57a1a58056a6 mol2fps.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mol2fps.xml Sat May 20 12:57:06 2017 -0400
@@ -0,0 +1,276 @@
+
+ with different fingerprint types
+
+
+ chemfp
+ rdkit
+ openbabel
+
+
+&1
+]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+PUBCHEM_COMPOUND_CID<
+ 28434379
+
+ >
+ 1
+
+ >
+ 280
+
+ >
+ 2
+
+ >
+ 2
+
+ >
+ 2
+
+ >
+ AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA==
+
+ >
+
+ - type : FP2
+
+-----
+
+.. class:: infomark
+
+**Output**
+
+* Example::
+
+ #FPS1
+ #num_bits=1021
+ #type=OpenBabel-FP2/1
+ #software=OpenBabel/2.3.0
+ #source=/tmp/dataset_409.dat.sdf
+ #date=2012-02-03T11:13:39
+ c0000000000008c0000846000400000000000010800000000000004000000000100010000700802170000018000000c
+ 0010000000020600208008000008000000c000c02c00002000000c00000100000008001400c800001c0180000000300
+ 10000000000080000000c0000060000c0000060810000010000000800102000000 28434379
+
+
+]]>
+
+
+ 10.1186/1758-2946-3-33
+ 10.1186/1758-2946-5-S1-P36
+
+ @electronic{rdkit,
+ title = {RDKit: Open-source cheminformatics},
+ url ={http://www.rdkit.org}
+ }
+
+
+
diff -r 685a138131f0 -r 57a1a58056a6 nxn_clustering.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nxn_clustering.py Sat May 20 12:57:06 2017 -0400
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+"""
+ Modified version of code examples from the chemfp project.
+ http://code.google.com/p/chem-fingerprints/
+ Thanks to Andrew Dalke of Andrew Dalke Scientific!
+"""
+import matplotlib
+matplotlib.use('Agg')
+import argparse
+import os
+import chemfp
+import scipy.cluster.hierarchy as hcluster
+import pylab
+import numpy
+
+def distance_matrix(arena, tanimoto_threshold = 0.0):
+ n = len(arena)
+ # Start off a similarity matrix with 1.0s along the diagonal
+ try:
+ similarities = numpy.identity(n, "d")
+ except:
+ raise Exception('Input dataset is to large!')
+ chemfp.set_num_threads( args.processors )
+
+ ## Compute the full similarity matrix.
+ # The implementation computes the upper-triangle then copies
+ # the upper-triangle into lower-triangle. It does not include
+ # terms for the diagonal.
+ results = chemfp.search.threshold_tanimoto_search_symmetric(arena, threshold=tanimoto_threshold)
+
+ # Copy the results into the NumPy array.
+ for row_index, row in enumerate(results.iter_indices_and_scores()):
+ for target_index, target_score in row:
+ similarities[row_index, target_index] = target_score
+
+ # Return the distance matrix using the similarity matrix
+ return 1.0 - similarities
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="""NxN clustering for fps files.
+For more details please see the chemfp documentation:
+https://chemfp.readthedocs.org
+""")
+
+ parser.add_argument("-i", "--input", dest="input_path",
+ required=True,
+ help="Path to the input file.")
+
+ parser.add_argument("-c", "--cluster", dest="cluster_image",
+ help="Path to the output cluster image.")
+
+ parser.add_argument("-s", "--smatrix", dest="similarity_matrix",
+ help="Path to the similarity matrix output file.")
+
+ parser.add_argument("-t", "--threshold", dest="tanimoto_threshold",
+ type=float, default=0.0,
+ help="Tanimoto threshold [0.0]")
+
+ parser.add_argument("--oformat", default='png', help="Output format (png, svg)")
+
+ parser.add_argument('-p', '--processors', type=int,
+ default=4)
+
+ args = parser.parse_args()
+
+ targets = chemfp.open( args.input_path, format='fps' )
+ arena = chemfp.load_fingerprints( targets )
+ distances = distance_matrix( arena, args.tanimoto_threshold )
+
+ if args.similarity_matrix:
+ distances.tofile( args.similarity_matrix )
+
+ if args.cluster_image:
+ linkage = hcluster.linkage( distances, method="single", metric="euclidean" )
+
+ hcluster.dendrogram(linkage, labels=arena.ids)
+
+ pylab.savefig( args.cluster_image, format=args.oformat )
+
diff -r 685a138131f0 -r 57a1a58056a6 nxn_clustering.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nxn_clustering.xml Sat May 20 12:57:06 2017 -0400
@@ -0,0 +1,120 @@
+
+ of molecular fingerprints
+
+ chemfp
+ python
+ matplotlib
+ scipy
+ openbabel
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ output_files == "both" or output_files == "image"
+
+
+
+
+
+ output_files == "both" or output_files == "matrix"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 10.1186/1758-2946-5-S1-P36
+
+
diff -r 685a138131f0 -r 57a1a58056a6 sdf2fps.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sdf2fps.xml Sat May 20 12:57:06 2017 -0400
@@ -0,0 +1,116 @@
+
+ extract fingerprints from sdf files metadata
+
+
+ chemfp
+ openbabel
+
+
+ '${outfile}'
+]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+PUBCHEM_COMPOUND_CID<
+ 28434379
+
+ >
+ 1
+
+ >
+ 280
+
+ >
+ 2
+
+ >
+ 2
+
+ >
+ 2
+
+ >
+ AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA==
+
+ >
+
+-----
+
+.. class:: infomark
+
+**Output**
+
+* Example::
+
+ #FPS1
+ #num_bits=881
+ #type=CACTVS-E_SCREEN/1.0 extended=2
+ #software=CACTVS/unknown
+ #source=/home/mohammed/galaxy-central/database/files/000/dataset_409.dat
+ #date=2012-02-03T10:44:12
+ 07ce04000000000000000000000000000080060000000c0600
+ 00000000001a800f0000780008100000101487e9608c0bed32
+ 48000580644626204101b4844805901b041c2e19511e45039b
+ 8b2924101609401b13e4080000000000010020000004008000
+ 0010000002000000000000 28434379
+
+
+]]>
+
+
+ 10.1186/1758-2946-5-S1-P36
+
+
diff -r 685a138131f0 -r 57a1a58056a6 simsearch.xml
--- a/simsearch.xml Sat May 20 12:45:01 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,121 +0,0 @@
-
- of fingerprint data sets
-
- chemfp
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 10.1186/1758-2946-3-33
- 10.1186/1758-2946-5-S1-P36
-
-
diff -r 685a138131f0 -r 57a1a58056a6 static/images/NxN_clustering.png
Binary file static/images/NxN_clustering.png has changed
diff -r 685a138131f0 -r 57a1a58056a6 static/images/NxN_clustering.svg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/static/images/NxN_clustering.svg Sat May 20 12:57:06 2017 -0400
@@ -0,0 +1,2275 @@
+
+
+
+
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244.can
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244.can Sat May 20 12:57:06 2017 -0400
@@ -0,0 +1,1 @@
+CC(=O)Oc1ccccc1C(=O)O 2244
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244.inchi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244.inchi Sat May 20 12:57:06 2017 -0400
@@ -0,0 +1,1 @@
+InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244.sdf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244.sdf Sat May 20 12:57:06 2017 -0400
@@ -0,0 +1,154 @@
+2244
+ -OEChem-05151212332D
+
+ 21 21 0 0 0 0 0 0 0999 V2000
+ 3.7320 -0.0600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.3301 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ 4.5981 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ 2.8660 -1.5600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ 4.5981 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 5.4641 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 4.5981 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.3301 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 5.4641 -2.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.3301 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 5.4641 0.9400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 2.8660 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 2.0000 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 4.0611 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.8671 -0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 5.4641 -2.6800 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.8671 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 2.3100 0.4769 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 1.4631 0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 1.6900 -0.5969 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.3301 2.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 1 5 1 0 0 0 0
+ 1 12 1 0 0 0 0
+ 2 11 1 0 0 0 0
+ 2 21 1 0 0 0 0
+ 3 11 2 0 0 0 0
+ 4 12 2 0 0 0 0
+ 5 6 1 0 0 0 0
+ 5 7 2 0 0 0 0
+ 6 8 2 0 0 0 0
+ 6 11 1 0 0 0 0
+ 7 9 1 0 0 0 0
+ 7 14 1 0 0 0 0
+ 8 10 1 0 0 0 0
+ 8 15 1 0 0 0 0
+ 9 10 2 0 0 0 0
+ 9 16 1 0 0 0 0
+ 10 17 1 0 0 0 0
+ 12 13 1 0 0 0 0
+ 13 18 1 0 0 0 0
+ 13 19 1 0 0 0 0
+ 13 20 1 0 0 0 0
+M END
+>
+2244
+
+>
+1
+
+>
+212
+
+>
+4
+
+>
+1
+
+>
+3
+
+>
+AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA==
+
+>
+2-acetoxybenzoic acid
+
+>
+2-acetyloxybenzoic acid
+
+>
+2-acetyloxybenzoic acid
+
+>
+2-acetyloxybenzoic acid
+
+>
+2-acetoxybenzoic acid
+
+>
+InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)
+
+>
+BSYNRYMUTXBXSQ-UHFFFAOYSA-N
+
+>
+1.2
+
+>
+180.042259
+
+>
+C9H8O4
+
+>
+180.15742
+
+>
+CC(=O)OC1=CC=CC=C1C(=O)O
+
+>
+CC(=O)OC1=CC=CC=C1C(=O)O
+
+>
+63.6
+
+>
+180.042259
+
+>
+0
+
+>
+13
+
+>
+0
+
+>
+0
+
+>
+0
+
+>
+0
+
+>
+0
+
+>
+1
+
+>
+1
+
+>
+1
+5
+255
+
+>
+5 6 8
+5 7 8
+6 8 8
+7 9 8
+8 10 8
+9 10 8
+
+$$$$
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244.smi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244.smi Sat May 20 12:57:06 2017 -0400
@@ -0,0 +1,1 @@
+O(c1c(cccc1)C(=O)O)C(=O)C 2244
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244_FP2.fps
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244_FP2.fps Sat May 20 12:57:06 2017 -0400
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=1021
+#type=OpenBabel-FP2/1
+#software=OpenBabel/2.4.1
+#source=/tmp/tmptaAke4/files/000/dataset_3.dat
+#date=2017-05-19T13:52:59
+00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244_FP3.fps
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244_FP3.fps Sat May 20 12:57:06 2017 -0400
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=1021
+#type=OpenBabel-FP2/1
+#software=OpenBabel/2.4.1
+#source=/tmp/tmptaAke4/files/000/dataset_7.dat
+#date=2017-05-19T13:53:45
+00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244_FP4.fps
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244_FP4.fps Sat May 20 12:57:06 2017 -0400
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=1021
+#type=OpenBabel-FP2/1
+#software=OpenBabel/2.4.1
+#source=/tmp/tmptaAke4/files/000/dataset_11.dat
+#date=2017-05-19T13:54:39
+00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244_MACCS.fps
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244_MACCS.fps Sat May 20 12:57:06 2017 -0400
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=1021
+#type=OpenBabel-FP2/1
+#software=OpenBabel/2.4.1
+#source=/tmp/tmptaAke4/files/000/dataset_15.dat
+#date=2017-05-19T13:55:30
+00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244_maccs.fps
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244_maccs.fps Sat May 20 12:57:06 2017 -0400
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=166
+#type=OpenBabel-MACCS/2
+#software=OpenBabel/2.3.1
+#source=CID_2244.sdf
+#date=2012-05-15T17:00:39
+0000000000000000000000010000016480cca2d21e 2244
diff -r 685a138131f0 -r 57a1a58056a6 test-data/NxN_Clustering_on_q.svg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/NxN_Clustering_on_q.svg Sat May 20 12:57:06 2017 -0400
@@ -0,0 +1,707 @@
+
+
+
+
diff -r 685a138131f0 -r 57a1a58056a6 test-data/Taylor-Butina_Clustering_on_data_q.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Taylor-Butina_Clustering_on_data_q.txt Sat May 20 12:57:06 2017 -0400
@@ -0,0 +1,4 @@
+#0 true singletons
+#0 false singletons
+#clusters: 1
+55091752 12 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091467 55168823 55091849
diff -r 685a138131f0 -r 57a1a58056a6 test-data/sdf2fps_result1.fps
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sdf2fps_result1.fps Sat May 20 12:57:06 2017 -0400
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=881
+#type=CACTVS-E_SCREEN/1.0 extended=2
+#software=CACTVS/unknown
+#source=/tmp/tmpN2w37z/files/000/dataset_1.dat
+#date=2017-05-19T14:27:41
+030e1c000000000000000000000000000000000000000c00000000000000008000000058000010000030200119004c70010060001140044b100040100024040010118060101330e46c21ac5841de06a50788109de11113047100000000001000000000000000080000000000000000 2244
diff -r 685a138131f0 -r 57a1a58056a6 test-data/simsearch_on_tragets_and_q.tabular
--- a/test-data/simsearch_on_tragets_and_q.tabular Sat May 20 12:45:01 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-#Simsearch/1
-#num_bits=881
-#type=Tanimoto k=all threshold=0.7
-#software=chemfp/1.1p1
-#queries=./query.fps
-#targets=./targets.fps
-#query_sources=CID_28434379.sdf
-#target_sources=Desktop/3579363516810334491.sdf
-13 28434379 6499094 0.9615 6485578 0.9679 55079807 0.9313 3153534 0.9557 55102353 0.9682 55091466 0.9682 55091416 0.9682 6485577 0.9497 55169009 0.9560 55091752 0.9684 55091467 0.9623 55168823 0.9563 55091849 0.9563