# HG changeset patch
# User bgruening
# Date 1495284081 14400
# Node ID 892811a1f12c36e017e92c5e217774c21ed044b0
# Parent 70b071de9bee7e6efb52be9796cb19233ce4343f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/simsearch commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
diff -r 70b071de9bee -r 892811a1f12c butina_clustering.py
--- a/butina_clustering.py Sat May 20 08:31:44 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,134 +0,0 @@
-#!/usr/bin/env python
-"""
- Modified version of code examples from the chemfp project.
- http://code.google.com/p/chem-fingerprints/
- Thanks to Andrew Dalke of Andrew Dalke Scientific!
-"""
-
-import chemfp
-import sys
-import os
-import tempfile
-import argparse
-import subprocess
-from chemfp import search
-
-def unix_sort(results):
- temp_unsorted = tempfile.NamedTemporaryFile(delete=False)
- for (i,indices) in enumerate( results.iter_indices() ):
- temp_unsorted.write('%s %s\n' % (len(indices), i))
- temp_unsorted.close()
- temp_sorted = tempfile.NamedTemporaryFile(delete=False)
- temp_sorted.close()
- p = subprocess.Popen(['sort', '-n', '-r', '-k', '1,1'], stdin=open(temp_unsorted.name), stdout=open(temp_sorted.name, 'w+'))
- stdout, stderr = p.communicate()
- return_code = p.returncode
-
- if return_code:
- sys.stdout.write(stdout)
- sys.stderr.write(stderr)
- sys.stderr.write("Return error code %i from command:\n" % return_code)
- temp_sorted.close()
- os.remove(temp_unsorted.name)
-
- for line in open(temp_sorted.name):
- size, fp_idx = line.strip().split()
- yield (int(size), int(fp_idx))
-
- os.remove(temp_sorted.name)
-
-def butina( args ):
- """
- Taylor-Butina clustering from the chemfp help.
- """
- out = args.output_path
- targets = chemfp.open( args.input_path, format='fps' )
- arena = chemfp.load_fingerprints( targets )
-
- chemfp.set_num_threads( args.processors )
- results = search.threshold_tanimoto_search_symmetric(arena, threshold = args.tanimoto_threshold)
- results.reorder_all("move-closest-first")
-
- sorted_ids = unix_sort(results)
-
- # Determine the true/false singletons and the clusters
- true_singletons = []
- false_singletons = []
- clusters = []
-
- seen = set()
- #for (size, fp_idx, members) in results:
- for (size, fp_idx) in sorted_ids:
- members = results[fp_idx].get_indices()
- #print arena.ids[ fp_idx ], [arena.ids[ m ] for m in members]
- if fp_idx in seen:
- # Can't use a centroid which is already assigned
- continue
- seen.add(fp_idx)
-
- if size == 0:
- # The only fingerprint in the exclusion sphere is itself
- true_singletons.append( fp_idx )
- continue
-
- # Figure out which ones haven't yet been assigned
- unassigned = set(members) - seen
-
- if not unassigned:
- false_singletons.append(fp_idx)
- continue
-
- # this is a new cluster
- clusters.append( (fp_idx, unassigned) )
- seen.update(unassigned)
-
- len_cluster = len(clusters)
- #out.write( "#%s true singletons: %s\n" % ( len(true_singletons), " ".join(sorted(arena.ids[idx] for idx in true_singletons)) ) )
- #out.write( "#%s false singletons: %s\n" % ( len(false_singletons), " ".join(sorted(arena.ids[idx] for idx in false_singletons)) ) )
-
- out.write( "#%s true singletons\n" % len(true_singletons) )
- out.write( "#%s false singletons\n" % len(false_singletons) )
- out.write( "#clusters: %s\n" % len_cluster )
-
- # Sort so the cluster with the most compounds comes first,
- # then by alphabetically smallest id
- def cluster_sort_key(cluster):
- centroid_idx, members = cluster
- return -len(members), arena.ids[centroid_idx]
-
- clusters.sort(key=cluster_sort_key)
-
- for centroid_idx, members in clusters:
- centroid_name = arena.ids[centroid_idx]
- out.write("%s\t%s\t%s\n" % (centroid_name, len(members), " ".join(arena.ids[idx] for idx in members)))
- #ToDo: len(members) need to be some biggest top 90% or something ...
-
- for idx in true_singletons:
- out.write("%s\t%s\n" % (arena.ids[idx], 0))
-
- out.close()
-
-
-if __name__ == "__main__":
- parser = argparse.ArgumentParser(description="""Taylor-Butina clustering for fps files.
-For more details please see the original publication or the chemfp documentation:
-http://www.chemomine.co.uk/dbclus-paper.pdf
-https://chemfp.readthedocs.org
-""")
-
- parser.add_argument("-i", "--input", dest="input_path",
- required=True,
- help="Path to the input file.")
-
- parser.add_argument("-o", "--output", dest="output_path", type=argparse.FileType('w'),
- default=sys.stdout,
- help="Path to the output file.")
-
- parser.add_argument("-t", "--threshold", dest="tanimoto_threshold", type=float,
- default=0.8,
- help="Tanimoto threshold [0.8]")
-
- parser.add_argument('-p', '--processors', type=int, default=4)
-
- options = parser.parse_args()
- butina( options )
diff -r 70b071de9bee -r 892811a1f12c butina_clustering.xml
--- a/butina_clustering.xml Sat May 20 08:31:44 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,93 +0,0 @@
-
- of molecular fingerprints
-
- chemfp
- openbabel
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 0 false singletons
- =>
-
- 1 clusters
- 55091849 has 12 other members
- => 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091752 55091467 55168823
-
-
-]]>
-
-
- 10.1186/1758-2946-5-S1-P36
-
-
diff -r 70b071de9bee -r 892811a1f12c mol2fps.xml
--- a/mol2fps.xml Sat May 20 08:31:44 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,276 +0,0 @@
-
- with different fingerprint types
-
-
- chemfp
- rdkit
- openbabel
-
-
-&1
-]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-PUBCHEM_COMPOUND_CID<
- 28434379
-
- >
- 1
-
- >
- 280
-
- >
- 2
-
- >
- 2
-
- >
- 2
-
- >
- AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA==
-
- >
-
- - type : FP2
-
------
-
-.. class:: infomark
-
-**Output**
-
-* Example::
-
- #FPS1
- #num_bits=1021
- #type=OpenBabel-FP2/1
- #software=OpenBabel/2.3.0
- #source=/tmp/dataset_409.dat.sdf
- #date=2012-02-03T11:13:39
- c0000000000008c0000846000400000000000010800000000000004000000000100010000700802170000018000000c
- 0010000000020600208008000008000000c000c02c00002000000c00000100000008001400c800001c0180000000300
- 10000000000080000000c0000060000c0000060810000010000000800102000000 28434379
-
-
-]]>
-
-
- 10.1186/1758-2946-3-33
- 10.1186/1758-2946-5-S1-P36
-
- @electronic{rdkit,
- title = {RDKit: Open-source cheminformatics},
- url ={http://www.rdkit.org}
- }
-
-
-
diff -r 70b071de9bee -r 892811a1f12c nxn_clustering.py
--- a/nxn_clustering.py Sat May 20 08:31:44 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,80 +0,0 @@
-#!/usr/bin/env python
-"""
- Modified version of code examples from the chemfp project.
- http://code.google.com/p/chem-fingerprints/
- Thanks to Andrew Dalke of Andrew Dalke Scientific!
-"""
-import matplotlib
-matplotlib.use('Agg')
-import argparse
-import os
-import chemfp
-import scipy.cluster.hierarchy as hcluster
-import pylab
-import numpy
-
-def distance_matrix(arena, tanimoto_threshold = 0.0):
- n = len(arena)
- # Start off a similarity matrix with 1.0s along the diagonal
- try:
- similarities = numpy.identity(n, "d")
- except:
- raise Exception('Input dataset is to large!')
- chemfp.set_num_threads( args.processors )
-
- ## Compute the full similarity matrix.
- # The implementation computes the upper-triangle then copies
- # the upper-triangle into lower-triangle. It does not include
- # terms for the diagonal.
- results = chemfp.search.threshold_tanimoto_search_symmetric(arena, threshold=tanimoto_threshold)
-
- # Copy the results into the NumPy array.
- for row_index, row in enumerate(results.iter_indices_and_scores()):
- for target_index, target_score in row:
- similarities[row_index, target_index] = target_score
-
- # Return the distance matrix using the similarity matrix
- return 1.0 - similarities
-
-
-if __name__ == "__main__":
- parser = argparse.ArgumentParser(description="""NxN clustering for fps files.
-For more details please see the chemfp documentation:
-https://chemfp.readthedocs.org
-""")
-
- parser.add_argument("-i", "--input", dest="input_path",
- required=True,
- help="Path to the input file.")
-
- parser.add_argument("-c", "--cluster", dest="cluster_image",
- help="Path to the output cluster image.")
-
- parser.add_argument("-s", "--smatrix", dest="similarity_matrix",
- help="Path to the similarity matrix output file.")
-
- parser.add_argument("-t", "--threshold", dest="tanimoto_threshold",
- type=float, default=0.0,
- help="Tanimoto threshold [0.0]")
-
- parser.add_argument("--oformat", default='png', help="Output format (png, svg)")
-
- parser.add_argument('-p', '--processors', type=int,
- default=4)
-
- args = parser.parse_args()
-
- targets = chemfp.open( args.input_path, format='fps' )
- arena = chemfp.load_fingerprints( targets )
- distances = distance_matrix( arena, args.tanimoto_threshold )
-
- if args.similarity_matrix:
- distances.tofile( args.similarity_matrix )
-
- if args.cluster_image:
- linkage = hcluster.linkage( distances, method="single", metric="euclidean" )
-
- hcluster.dendrogram(linkage, labels=arena.ids)
-
- pylab.savefig( args.cluster_image, format=args.oformat )
-
diff -r 70b071de9bee -r 892811a1f12c nxn_clustering.xml
--- a/nxn_clustering.xml Sat May 20 08:31:44 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,120 +0,0 @@
-
- of molecular fingerprints
-
- chemfp
- python
- matplotlib
- scipy
- openbabel
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- output_files == "both" or output_files == "image"
-
-
-
-
-
- output_files == "both" or output_files == "matrix"
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 10.1186/1758-2946-5-S1-P36
-
-
diff -r 70b071de9bee -r 892811a1f12c sdf2fps.xml
--- a/sdf2fps.xml Sat May 20 08:31:44 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,116 +0,0 @@
-
- extract fingerprints from sdf files metadata
-
-
- chemfp
- openbabel
-
-
- '${outfile}'
-]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-PUBCHEM_COMPOUND_CID<
- 28434379
-
- >
- 1
-
- >
- 280
-
- >
- 2
-
- >
- 2
-
- >
- 2
-
- >
- AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA==
-
- >
-
------
-
-.. class:: infomark
-
-**Output**
-
-* Example::
-
- #FPS1
- #num_bits=881
- #type=CACTVS-E_SCREEN/1.0 extended=2
- #software=CACTVS/unknown
- #source=/home/mohammed/galaxy-central/database/files/000/dataset_409.dat
- #date=2012-02-03T10:44:12
- 07ce04000000000000000000000000000080060000000c0600
- 00000000001a800f0000780008100000101487e9608c0bed32
- 48000580644626204101b4844805901b041c2e19511e45039b
- 8b2924101609401b13e4080000000000010020000004008000
- 0010000002000000000000 28434379
-
-
-]]>
-
-
- 10.1186/1758-2946-5-S1-P36
-
-
diff -r 70b071de9bee -r 892811a1f12c simsearch.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/simsearch.xml Sat May 20 08:41:21 2017 -0400
@@ -0,0 +1,131 @@
+
+ of fingerprint data sets
+
+ chemfp
+
+
+&1;
+ rm $temp_link
+ #end if
+]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 10.1186/1758-2946-3-33
+
+
diff -r 70b071de9bee -r 892811a1f12c static/images/NxN_clustering.png
Binary file static/images/NxN_clustering.png has changed
diff -r 70b071de9bee -r 892811a1f12c static/images/NxN_clustering.svg
--- a/static/images/NxN_clustering.svg Sat May 20 08:31:44 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,2275 +0,0 @@
-
-
-
-
diff -r 70b071de9bee -r 892811a1f12c test-data/CID_2244.can
--- a/test-data/CID_2244.can Sat May 20 08:31:44 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-CC(=O)Oc1ccccc1C(=O)O 2244
diff -r 70b071de9bee -r 892811a1f12c test-data/CID_2244.inchi
--- a/test-data/CID_2244.inchi Sat May 20 08:31:44 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)
diff -r 70b071de9bee -r 892811a1f12c test-data/CID_2244.sdf
--- a/test-data/CID_2244.sdf Sat May 20 08:31:44 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,154 +0,0 @@
-2244
- -OEChem-05151212332D
-
- 21 21 0 0 0 0 0 0 0999 V2000
- 3.7320 -0.0600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
- 6.3301 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
- 4.5981 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
- 2.8660 -1.5600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
- 4.5981 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
- 5.4641 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
- 4.5981 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
- 6.3301 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
- 5.4641 -2.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
- 6.3301 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
- 5.4641 0.9400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
- 2.8660 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
- 2.0000 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
- 4.0611 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
- 6.8671 -0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
- 5.4641 -2.6800 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
- 6.8671 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
- 2.3100 0.4769 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
- 1.4631 0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
- 1.6900 -0.5969 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
- 6.3301 2.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
- 1 5 1 0 0 0 0
- 1 12 1 0 0 0 0
- 2 11 1 0 0 0 0
- 2 21 1 0 0 0 0
- 3 11 2 0 0 0 0
- 4 12 2 0 0 0 0
- 5 6 1 0 0 0 0
- 5 7 2 0 0 0 0
- 6 8 2 0 0 0 0
- 6 11 1 0 0 0 0
- 7 9 1 0 0 0 0
- 7 14 1 0 0 0 0
- 8 10 1 0 0 0 0
- 8 15 1 0 0 0 0
- 9 10 2 0 0 0 0
- 9 16 1 0 0 0 0
- 10 17 1 0 0 0 0
- 12 13 1 0 0 0 0
- 13 18 1 0 0 0 0
- 13 19 1 0 0 0 0
- 13 20 1 0 0 0 0
-M END
->
-2244
-
->
-1
-
->
-212
-
->
-4
-
->
-1
-
->
-3
-
->
-AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA==
-
->
-2-acetoxybenzoic acid
-
->
-2-acetyloxybenzoic acid
-
->
-2-acetyloxybenzoic acid
-
->
-2-acetyloxybenzoic acid
-
->
-2-acetoxybenzoic acid
-
->
-InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)
-
->
-BSYNRYMUTXBXSQ-UHFFFAOYSA-N
-
->
-1.2
-
->
-180.042259
-
->
-C9H8O4
-
->
-180.15742
-
->
-CC(=O)OC1=CC=CC=C1C(=O)O
-
->
-CC(=O)OC1=CC=CC=C1C(=O)O
-
->
-63.6
-
->
-180.042259
-
->
-0
-
->
-13
-
->
-0
-
->
-0
-
->
-0
-
->
-0
-
->
-0
-
->
-1
-
->
-1
-
->
-1
-5
-255
-
->
-5 6 8
-5 7 8
-6 8 8
-7 9 8
-8 10 8
-9 10 8
-
-$$$$
diff -r 70b071de9bee -r 892811a1f12c test-data/CID_2244.smi
--- a/test-data/CID_2244.smi Sat May 20 08:31:44 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-O(c1c(cccc1)C(=O)O)C(=O)C 2244
diff -r 70b071de9bee -r 892811a1f12c test-data/CID_2244_FP2.fps
--- a/test-data/CID_2244_FP2.fps Sat May 20 08:31:44 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-#FPS1
-#num_bits=1021
-#type=OpenBabel-FP2/1
-#software=OpenBabel/2.4.1
-#source=/tmp/tmptaAke4/files/000/dataset_3.dat
-#date=2017-05-19T13:52:59
-00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244
diff -r 70b071de9bee -r 892811a1f12c test-data/CID_2244_FP3.fps
--- a/test-data/CID_2244_FP3.fps Sat May 20 08:31:44 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-#FPS1
-#num_bits=1021
-#type=OpenBabel-FP2/1
-#software=OpenBabel/2.4.1
-#source=/tmp/tmptaAke4/files/000/dataset_7.dat
-#date=2017-05-19T13:53:45
-00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244
diff -r 70b071de9bee -r 892811a1f12c test-data/CID_2244_FP4.fps
--- a/test-data/CID_2244_FP4.fps Sat May 20 08:31:44 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-#FPS1
-#num_bits=1021
-#type=OpenBabel-FP2/1
-#software=OpenBabel/2.4.1
-#source=/tmp/tmptaAke4/files/000/dataset_11.dat
-#date=2017-05-19T13:54:39
-00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244
diff -r 70b071de9bee -r 892811a1f12c test-data/CID_2244_MACCS.fps
--- a/test-data/CID_2244_MACCS.fps Sat May 20 08:31:44 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-#FPS1
-#num_bits=1021
-#type=OpenBabel-FP2/1
-#software=OpenBabel/2.4.1
-#source=/tmp/tmptaAke4/files/000/dataset_15.dat
-#date=2017-05-19T13:55:30
-00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244
diff -r 70b071de9bee -r 892811a1f12c test-data/CID_2244_maccs.fps
--- a/test-data/CID_2244_maccs.fps Sat May 20 08:31:44 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-#FPS1
-#num_bits=166
-#type=OpenBabel-MACCS/2
-#software=OpenBabel/2.3.1
-#source=CID_2244.sdf
-#date=2012-05-15T17:00:39
-0000000000000000000000010000016480cca2d21e 2244
diff -r 70b071de9bee -r 892811a1f12c test-data/NxN_Clustering_on_q.svg
--- a/test-data/NxN_Clustering_on_q.svg Sat May 20 08:31:44 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,707 +0,0 @@
-
-
-
-
diff -r 70b071de9bee -r 892811a1f12c test-data/Taylor-Butina_Clustering_on_data_q.txt
--- a/test-data/Taylor-Butina_Clustering_on_data_q.txt Sat May 20 08:31:44 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-#0 true singletons
-#0 false singletons
-#clusters: 1
-55091752 12 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091467 55168823 55091849
diff -r 70b071de9bee -r 892811a1f12c test-data/sdf2fps_result1.fps
--- a/test-data/sdf2fps_result1.fps Sat May 20 08:31:44 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-#FPS1
-#num_bits=881
-#type=CACTVS-E_SCREEN/1.0 extended=2
-#software=CACTVS/unknown
-#source=/tmp/tmpN2w37z/files/000/dataset_1.dat
-#date=2017-05-19T14:27:41
-030e1c000000000000000000000000000000000000000c00000000000000008000000058000010000030200119004c70010060001140044b100040100024040010118060101330e46c21ac5841de06a50788109de11113047100000000001000000000000000080000000000000000 2244
diff -r 70b071de9bee -r 892811a1f12c test-data/simsearch_on_tragets_and_q.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/simsearch_on_tragets_and_q.tabular Sat May 20 08:41:21 2017 -0400
@@ -0,0 +1,9 @@
+#Simsearch/1
+#num_bits=881
+#type=Tanimoto k=all threshold=0.7
+#software=chemfp/1.1p1
+#queries=./query.fps
+#targets=./targets.fps
+#query_sources=CID_28434379.sdf
+#target_sources=Desktop/3579363516810334491.sdf
+13 28434379 6499094 0.9615 6485578 0.9679 55079807 0.9313 3153534 0.9557 55102353 0.9682 55091466 0.9682 55091416 0.9682 6485577 0.9497 55169009 0.9560 55091752 0.9684 55091467 0.9623 55168823 0.9563 55091849 0.9563