Repository 'chemfp'
hg clone https://toolshed.g2.bx.psu.edu/repos/bgruening/chemfp

Changeset 2:70b071de9bee (2017-05-20)
Previous changeset 1:43a9e7d9b24f (2015-11-01) Next changeset 3:892811a1f12c (2017-05-20)
Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
added:
butina_clustering.py
butina_clustering.xml
mol2fps.xml
nxn_clustering.py
nxn_clustering.xml
sdf2fps.xml
test-data/CID_2244.can
test-data/CID_2244.inchi
test-data/CID_2244.sdf
test-data/CID_2244.smi
test-data/CID_2244_FP2.fps
test-data/CID_2244_FP3.fps
test-data/CID_2244_FP4.fps
test-data/CID_2244_MACCS.fps
test-data/CID_2244_maccs.fps
test-data/NxN_Clustering_on_q.svg
test-data/Taylor-Butina_Clustering_on_data_q.txt
test-data/q.fps
test-data/sdf2fps_result1.fps
test-data/targets.fps
removed:
chemfp_clustering/butina_clustering.py
chemfp_clustering/butina_clustering.xml
chemfp_clustering/nxn_clustering.py
chemfp_clustering/nxn_clustering.xml
chemfp_clustering/old/butina_clustering_old.py
chemfp_clustering/old/in.fps
chemfp_clustering/old/new.txt
chemfp_clustering/old/old.res
chemfp_clustering/old/old.txt
chemfp_clustering/old/oldm.res
chemfp_clustering/old/out.txt
chemfp_clustering/old/u_new.txt
chemfp_clustering/old/u_old.txt
chemfp_clustering/test-data/NxN_Clustering_on_q.svg
chemfp_clustering/test-data/Taylor-Butina_Clustering_on_data_q.txt
chemfp_mol2fps/mol2fps.xml
chemfp_mol2fps/test-data/CID_2244.can
chemfp_mol2fps/test-data/CID_2244.inchi
chemfp_mol2fps/test-data/CID_2244.sdf
chemfp_mol2fps/test-data/CID_2244.smi
chemfp_mol2fps/test-data/CID_2244_FP2.fps
chemfp_mol2fps/test-data/CID_2244_FP3.fps
chemfp_mol2fps/test-data/CID_2244_FP4.fps
chemfp_mol2fps/test-data/CID_2244_maccs.fps
chemfp_sdf2fps/sdf2fps.xml
repository_dependencies.xml
tool_dependencies.xml
b
diff -r 43a9e7d9b24f -r 70b071de9bee butina_clustering.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/butina_clustering.py Sat May 20 08:31:44 2017 -0400
[
@@ -0,0 +1,134 @@
+#!/usr/bin/env python
+"""
+    Modified version of code examples from the chemfp project.
+    http://code.google.com/p/chem-fingerprints/
+    Thanks to Andrew Dalke of Andrew Dalke Scientific!
+"""
+
+import chemfp
+import sys
+import os
+import tempfile
+import argparse
+import subprocess
+from chemfp import search
+
+def unix_sort(results):
+    temp_unsorted = tempfile.NamedTemporaryFile(delete=False)
+    for (i,indices) in enumerate( results.iter_indices() ):
+        temp_unsorted.write('%s %s\n' % (len(indices), i))
+    temp_unsorted.close()
+    temp_sorted = tempfile.NamedTemporaryFile(delete=False)
+    temp_sorted.close()
+    p = subprocess.Popen(['sort', '-n', '-r', '-k', '1,1'], stdin=open(temp_unsorted.name), stdout=open(temp_sorted.name, 'w+'))
+    stdout, stderr = p.communicate()
+    return_code = p.returncode
+
+    if return_code:
+        sys.stdout.write(stdout)
+        sys.stderr.write(stderr)
+        sys.stderr.write("Return error code %i from command:\n" % return_code)
+    temp_sorted.close()
+    os.remove(temp_unsorted.name)
+
+    for line in open(temp_sorted.name):
+        size, fp_idx = line.strip().split()
+        yield (int(size), int(fp_idx))
+
+    os.remove(temp_sorted.name)
+
+def butina( args ):
+    """
+        Taylor-Butina clustering from the chemfp help.
+    """
+    out = args.output_path
+    targets = chemfp.open( args.input_path, format='fps' )
+    arena = chemfp.load_fingerprints( targets )
+
+    chemfp.set_num_threads( args.processors )
+    results = search.threshold_tanimoto_search_symmetric(arena, threshold = args.tanimoto_threshold)
+    results.reorder_all("move-closest-first")
+
+    sorted_ids = unix_sort(results)
+
+    # Determine the true/false singletons and the clusters
+    true_singletons = []
+    false_singletons = []
+    clusters = []
+
+    seen = set()
+    #for (size, fp_idx, members) in results:
+    for (size, fp_idx) in sorted_ids:
+        members = results[fp_idx].get_indices()
+        #print arena.ids[ fp_idx ], [arena.ids[ m ] for m in members]
+        if fp_idx in seen:
+            # Can't use a centroid which is already assigned
+            continue
+        seen.add(fp_idx)
+
+        if size == 0:
+            # The only fingerprint in the exclusion sphere is itself
+            true_singletons.append( fp_idx )
+            continue
+
+        # Figure out which ones haven't yet been assigned
+        unassigned = set(members) - seen
+
+        if not unassigned:
+            false_singletons.append(fp_idx)
+            continue
+
+        # this is a new cluster
+        clusters.append( (fp_idx, unassigned) )
+        seen.update(unassigned)
+
+    len_cluster = len(clusters)
+    #out.write( "#%s true singletons: %s\n" % ( len(true_singletons), " ".join(sorted(arena.ids[idx] for idx in true_singletons)) ) )
+    #out.write( "#%s false singletons: %s\n" % ( len(false_singletons), " ".join(sorted(arena.ids[idx] for idx in false_singletons)) ) )
+
+    out.write( "#%s true singletons\n" % len(true_singletons) )
+    out.write( "#%s false singletons\n" % len(false_singletons) )
+    out.write( "#clusters: %s\n" % len_cluster )
+
+    # Sort so the cluster with the most compounds comes first,
+    # then by alphabetically smallest id
+    def cluster_sort_key(cluster):
+        centroid_idx, members = cluster
+        return -len(members), arena.ids[centroid_idx]
+
+    clusters.sort(key=cluster_sort_key)
+
+    for centroid_idx, members in clusters:
+        centroid_name = arena.ids[centroid_idx]
+        out.write("%s\t%s\t%s\n" % (centroid_name, len(members), " ".join(arena.ids[idx] for idx in members)))
+        #ToDo: len(members) need to be some biggest top 90% or something ...
+
+    for idx in true_singletons:
+        out.write("%s\t%s\n" % (arena.ids[idx], 0))
+
+    out.close()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="""Taylor-Butina clustering for fps files.
+For more details please see the original publication or the chemfp documentation:
+http://www.chemomine.co.uk/dbclus-paper.pdf
+https://chemfp.readthedocs.org
+""")
+
+    parser.add_argument("-i", "--input", dest="input_path",
+                    required=True,
+                    help="Path to the input file.")
+
+    parser.add_argument("-o", "--output", dest="output_path", type=argparse.FileType('w'),
+                    default=sys.stdout,
+                    help="Path to the output file.")
+
+    parser.add_argument("-t", "--threshold", dest="tanimoto_threshold", type=float,
+                    default=0.8,
+                    help="Tanimoto threshold [0.8]")
+
+    parser.add_argument('-p', '--processors', type=int, default=4)
+
+    options = parser.parse_args()
+    butina( options )
b
diff -r 43a9e7d9b24f -r 70b071de9bee butina_clustering.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/butina_clustering.xml Sat May 20 08:31:44 2017 -0400
[
@@ -0,0 +1,93 @@
+<tool id="ctb_chemfp_butina_clustering" name="Taylor-Butina Clustering" version="0.2">
+    <description>of molecular fingerprints</description>
+    <requirements>
+        <requirement type="package" version="1.1p1">chemfp</requirement>
+        <requirement type="package" version="2.4.1">openbabel</requirement>
+    </requirements>
+    <command detect_errors="exit_code">
+<![CDATA[
+        python '$__tool_directory__/butina_clustering.py'
+            -i '$infile'
+            -t $threshold
+            -o '$outfile'
+            -p \${GALAXY_SLOTS:-1}
+]]>
+    </command>
+    <inputs>
+        <param name="infile" type="data" format="fps" label="Finperprint dataset" help="Dataset missing? See TIP below"/>
+        <param name='threshold' type='float' value='0.8'/>
+    </inputs>
+    <outputs>
+        <data format="tabular" name="outfile"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" ftype="fps" value="targets.fps"/>
+            <param name='threshold' value='0.8' ></param>
+            <output name="outfile" ftype="tabular"  file='Taylor-Butina_Clustering_on_data_q.txt'/>
+        </test>
+    </tests>
+<help>
+<![CDATA[
+
+
+.. class:: infomark
+
+**What this tool does**
+
+Unsupervised non-hierarchical clustering method based on the Taylor-Butina algorithm, which guarantees that every cluster contains molecules which are within a distance cutoff of the central molecule. This tool is based on the chemfp_ project.
+
+.. _chemfp: http://chemfp.com/
+
+-----
+
+.. class:: infomark
+
+**Input**
+
+| Molecular fingerprints in FPS format.
+| Open Babel Fastsearch index is not supported.
+
+* Example::
+
+ -  fingerprints in FPS format
+
+ #FPS1
+ #num_bits=881
+ #type=CACTVS-E_SCREEN/1.0 extended=2
+ #software=CACTVS/unknown
+ #source=/home/mohammed/galaxy-central/database/files/000/dataset_423.dat
+ #date=2012-02-09T13:20:37
+ 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960cc0bed3248000580644626004101b4844805901b041c2e
+ 19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000 55169009
+ 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960cc0bed3248000580644626004101b4844805901b041c2e
+ 19111e45039b8b2926105609401313e40800000000000100200000040080000010000002000000000000 55079807
+ ........
+
+ - Tanimoto threshold : 0.8 (between 0 and 1)
+
+-----
+
+.. class:: infomark
+
+**Output**
+
+* Example::
+
+ 0 true singletons
+ =>
+
+ 0 false singletons
+ =>
+
+ 1 clusters
+ 55091849 has 12 other members
+ => 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091752 55091467 55168823
+
+
+]]>
+ </help>
+    <citations>
+        <citation type="doi">10.1186/1758-2946-5-S1-P36</citation>
+    </citations>
+</tool>
b
diff -r 43a9e7d9b24f -r 70b071de9bee chemfp_clustering/butina_clustering.py
--- a/chemfp_clustering/butina_clustering.py Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,135 +0,0 @@
-#!/usr/bin/env python
-"""
-    Modified version of code examples from the chemfp project.
-    http://code.google.com/p/chem-fingerprints/
-    Thanks to Andrew Dalke of Andrew Dalke Scientific!
-"""
-
-import chemfp
-import sys
-import os
-import tempfile
-import argparse
-import subprocess
-from chemfp import search
-
-def unix_sort(results):
-    temp_unsorted = tempfile.NamedTemporaryFile(delete=False)
-    for (i,indices) in enumerate( results.iter_indices() ):
-        temp_unsorted.write('%s %s\n' % (len(indices), i))
-    temp_unsorted.close()
-    temp_sorted = tempfile.NamedTemporaryFile(delete=False)
-    temp_sorted.close()
-    p = subprocess.Popen(['sort', '-n', '-r', '-k', '1,1'], stdin=open(temp_unsorted.name), stdout=open(temp_sorted.name, 'w+'))
-    stdout, stderr = p.communicate()
-    return_code = p.returncode
-
-    if return_code:
-        sys.stdout.write(stdout)
-        sys.stderr.write(stderr)
-        sys.stderr.write("Return error code %i from command:\n" % return_code)
-    temp_sorted.close()
-    os.remove(temp_unsorted.name)
-
-    for line in open(temp_sorted.name):
-        size, fp_idx = line.strip().split()
-        yield (int(size), int(fp_idx))
-
-    os.remove(temp_sorted.name)
-
-def butina( args ):
-    """
-        Taylor-Butina clustering from the chemfp help.
-    """
-    out = args.output_path
-    targets = chemfp.open( args.input_path, format='fps' )
-    arena = chemfp.load_fingerprints( targets )
-
-    chemfp.set_num_threads( args.processors )
-    results = search.threshold_tanimoto_search_symmetric(arena, threshold = args.tanimoto_threshold)
-    results.reorder_all("move-closest-first")
-
-    sorted_ids = unix_sort(results)
-
-    # Determine the true/false singletons and the clusters
-    true_singletons = []
-    false_singletons = []
-    clusters = []
-
-    seen = set()
-    #for (size, fp_idx, members) in results:
-    for (size, fp_idx) in sorted_ids:
-        members = results[fp_idx].get_indices()
-        #print arena.ids[ fp_idx ], [arena.ids[ m ] for m in members]
-        if fp_idx in seen:
-            # Can't use a centroid which is already assigned
-            continue
-        seen.add(fp_idx)
-
-        if size == 0:
-            # The only fingerprint in the exclusion sphere is itself
-            true_singletons.append( fp_idx )
-            continue
-
-        # Figure out which ones haven't yet been assigned
-        unassigned = set(members) - seen
-
-        if not unassigned:
-            false_singletons.append(fp_idx)
-            continue
-
-        # this is a new cluster
-        clusters.append( (fp_idx, unassigned) )
-        seen.update(unassigned)
-
-    len_cluster = len(clusters)
-    #out.write( "#%s true singletons: %s\n" % ( len(true_singletons), " ".join(sorted(arena.ids[idx] for idx in true_singletons)) ) )
-    #out.write( "#%s false singletons: %s\n" % ( len(false_singletons), " ".join(sorted(arena.ids[idx] for idx in false_singletons)) ) )
-
-    out.write( "#%s true singletons\n" % len(true_singletons) )
-    out.write( "#%s false singletons\n" % len(false_singletons) )
-    out.write( "#clusters: %s\n" % len_cluster )
-
-    # Sort so the cluster with the most compounds comes first,
-    # then by alphabetically smallest id
-    def cluster_sort_key(cluster):
-        centroid_idx, members = cluster
-        return -len(members), arena.ids[centroid_idx]
-
-    clusters.sort(key=cluster_sort_key)
-
-    for centroid_idx, members in clusters:
-        centroid_name = arena.ids[centroid_idx]
-        out.write("%s\t%s\t%s\n" % (centroid_name, len(members), " ".join(arena.ids[idx] for idx in members)))
-        #ToDo: len(members) need to be some biggest top 90% or something ...
-
-    for idx in true_singletons:
-        out.write("%s\t%s\n" % (arena.ids[idx], 0))
-
-    out.close()
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="""Taylor-Butina clustering for fps files.
-For more details please see the original publication or the chemfp documentation:
-http://www.chemomine.co.uk/dbclus-paper.pdf
-https://chemfp.readthedocs.org
-""")
-
-    parser.add_argument("-i", "--input", dest="input_path",
-                    required=True,
-                    help="Path to the input file.")
-
-    parser.add_argument("-o", "--output", dest="output_path", type=argparse.FileType('w'),
-                    default=sys.stdout,
-                    help="Path to the output file.")
-
-    parser.add_argument("-t", "--threshold", dest="tanimoto_threshold", type=float,
-                    default=0.8,
-                    help="Tanimoto threshold [0.8]")
-
-    parser.add_argument('-p', '--processors', type=int, 
-        default=4)
-
-    options = parser.parse_args()
-    butina( options )
b
diff -r 43a9e7d9b24f -r 70b071de9bee chemfp_clustering/butina_clustering.xml
--- a/chemfp_clustering/butina_clustering.xml Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,100 +0,0 @@
-<tool id="ctb_chemfp_butina_clustering" name="Taylor-Butina Clustering" version="0.1">
-    <description>of molecular fingerprints</description>
-    <requirements>
-        <requirement type="package" version="1.1p1">chemfp</requirement>
-        <requirement type="package" version="2.3.2">openbabel</requirement>
-    </requirements>
-    <command interpreter='python'>
-<![CDATA[
-        butina_clustering.py
-            -i $infile
-            -t $threshold
-            -o $outfile
-            -p 4
-]]>
-    </command>
-    <inputs>
-        <param name="infile" type="data" format="fps" label="Finperprint dataset" help="Dataset missing? See TIP below"/>
-        <param name='threshold' type='float' value='0.8'/>
-    </inputs>
-    <outputs>
-        <data format="tabular" name="outfile" label="${tool.name} on ${on_string}"/>
-    </outputs>
-    <tests>
-        <test>
-            <param name="infile" ftype="fps" value="q.fps"/>
-            <param name='threshold' value='0.8' ></param>
-            <output name="outfile" ftype="tabular"  file='Taylor-Butina_Clustering_on_data_q.txt'/>
-        </test>
-    </tests>
-<help>
-<![CDATA[
-
-
-.. class:: infomark
-
-**What this tool does**
-
-Unsupervised non-hierarchical clustering method based on the Taylor-Butina algorithm, which guarantees that every cluster contains molecules which are within a distance cutoff of the central molecule. This tool is based on the chemfp_ project.
-
-.. _chemfp: http://chemfp.com/
-
------
-
-.. class:: infomark
-
-**Input**
-
-| Molecular fingerprints in FPS format.
-| Open Babel Fastsearch index is not supported.
-
-* Example::
-
- -  fingerprints in FPS format
-
- #FPS1
- #num_bits=881
- #type=CACTVS-E_SCREEN/1.0 extended=2
- #software=CACTVS/unknown
- #source=/home/mohammed/galaxy-central/database/files/000/dataset_423.dat
- #date=2012-02-09T13:20:37
- 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960cc0bed3248000580644626004101b4844805901b041c2e
- 19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000 55169009
- 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960cc0bed3248000580644626004101b4844805901b041c2e
- 19111e45039b8b2926105609401313e40800000000000100200000040080000010000002000000000000 55079807
- ........
-
- - Tanimoto threshold : 0.8 (between 0 and 1)
-
------
-
-.. class:: infomark
-
-**Output**
-
-* Example::
-
- 0 true singletons
- =>
-
- 0 false singletons
- =>
-
- 1 clusters
- 55091849 has 12 other members
- => 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091752 55091467 55168823
-
------
-
-.. class:: infomark
-
-**Cite**
-
-The chemfp_ project from Andrew Dalke!
-
-.. _chemfp: http://chemfp.com/
-
-]]>
- </help>
-
-</tool>
b
diff -r 43a9e7d9b24f -r 70b071de9bee chemfp_clustering/nxn_clustering.py
--- a/chemfp_clustering/nxn_clustering.py Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,80 +0,0 @@
-#!/usr/bin/env python
-"""
-    Modified version of code examples from the chemfp project.
-    http://code.google.com/p/chem-fingerprints/
-    Thanks to Andrew Dalke of Andrew Dalke Scientific!
-"""
-import matplotlib
-matplotlib.use('Agg')
-import argparse
-import os
-import chemfp
-import scipy.cluster.hierarchy as hcluster
-import pylab
-import numpy
-
-def distance_matrix(arena, tanimoto_threshold = 0.0):
-    n = len(arena)
-    # Start off a similarity matrix with 1.0s along the diagonal
-    try:
-        similarities = numpy.identity(n, "d")
-    except:
-        raise Exception('Input dataset is to large!')
-    chemfp.set_num_threads( args.processors )
-
-    ## Compute the full similarity matrix.
-    # The implementation computes the upper-triangle then copies
-    # the upper-triangle into lower-triangle. It does not include
-    # terms for the diagonal.
-    results = chemfp.search.threshold_tanimoto_search_symmetric(arena, threshold=tanimoto_threshold)
-
-    # Copy the results into the NumPy array.
-    for row_index, row in enumerate(results.iter_indices_and_scores()):
-        for target_index, target_score in row:
-            similarities[row_index, target_index] = target_score
-
-    # Return the distance matrix using the similarity matrix
-    return 1.0 - similarities
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="""NxN clustering for fps files.
-For more details please see the chemfp documentation:
-https://chemfp.readthedocs.org
-""")
-
-    parser.add_argument("-i", "--input", dest="input_path",
-                    required=True,
-                    help="Path to the input file.")
-
-    parser.add_argument("-c", "--cluster", dest="cluster_image",
-                    help="Path to the output cluster image.")
-
-    parser.add_argument("-s", "--smatrix", dest="similarity_matrix",
-                    help="Path to the similarity matrix output file.")
-
-    parser.add_argument("-t", "--threshold", dest="tanimoto_threshold", 
-                    type=float, default=0.0,
-                    help="Tanimoto threshold [0.0]")
-
-    parser.add_argument("--oformat", default='png', help="Output format (png, svg)")
-
-    parser.add_argument('-p', '--processors', type=int, 
-        default=4)
-
-    args = parser.parse_args()
-
-    targets = chemfp.open( args.input_path, format='fps' )
-    arena = chemfp.load_fingerprints( targets )
-    distances  = distance_matrix( arena, args.tanimoto_threshold )
-
-    if args.similarity_matrix:
-        distances.tofile( args.similarity_matrix )
-
-    if args.cluster_image:
-        linkage = hcluster.linkage( distances, method="single", metric="euclidean" )
-
-        hcluster.dendrogram(linkage, labels=arena.ids)
-
-        pylab.savefig( args.cluster_image, format=args.oformat )
-
b
diff -r 43a9e7d9b24f -r 70b071de9bee chemfp_clustering/nxn_clustering.xml
--- a/chemfp_clustering/nxn_clustering.xml Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,126 +0,0 @@
-<tool id="ctb_chemfp_nxn_clustering" name="NxN Clustering" version="0.2">
-    <description>of molecular fingerprints</description>
-    <requirements>
-        <requirement type="package" version="1.7.0">numpy</requirement>
-        <requirement type="package" version="1.1p1">chemfp</requirement>
-        <requirement type="package" version="1.2.1">matplotlib</requirement>
-        <requirement type="package" version="0.12.0">scipy</requirement>
-        <requirement type="package" version="2.3.2">openbabel</requirement>
-    </requirements>
-    <command interpreter='python'>
-<![CDATA[
-        nxn_clustering.py
-            -i $infile
-            -t $threshold
-            #if str($output_files) in ['both', 'image']:
-                --cluster $image
-            #end if
-            #if str($output_files) in ['both', 'matrix']:
-                --smatrix $smilarity_matrix
-            #end if
-            --oformat $oformat
-]]>
-    </command>
-    <inputs>
-        <param name="infile" type="data" format="fps" label="Finperprint dataset" help="Dataset missing? See TIP below"/>
-        <param name='threshold' type='float' value='0.0' />
-        <param name='oformat' type='select' format='text' label="Format of the resulting picture">
-            <option value='png'>PNG</option>
-            <option value='svg'>SVG</option>
-        </param>
-        <param name='output_files' type='select' format='text' label="Output options">
-            <option value='both'>NxN matrix and Image</option>
-            <option value='image'>Image</option>
-            <option value='matrix'>NxN Matrix</option>
-        </param>
-
-    </inputs>
-    <outputs>
-        <data name="image" type="data" format="svg" label="${tool.name} on ${on_string} - Cluster Image">
-            <filter>output_files == "both" or output_files == "image"</filter>
-            <change_format>
-                <when input="oformat" value="png" format="png"/>
-            </change_format>
-        </data>
-        <data name="smilarity_matrix" format="binary" label="${tool.name} on ${on_string} - Similarity Matrix">
-            <filter>output_files == "both" or output_files == "matrix"</filter>
-        </data>
-    </outputs>
-    <tests>
-        <test>
-            <param name="infile" ftype="fps" value="q.fps" />
-            <param name='treshold' value='0.75' />
-            <param name='output_files' value='image' />
-            <output ftype="svg" name="outfile" file='NxN_Clustering_on_q.svg' />
-        </test>
-    </tests>
-    <help>
-<![CDATA[
-
-.. class:: infomark
-
-**What this tool does**
-
-Generating hierarchical clusters and visualizing clusters with dendrograms.
-For the clustering and the fingerprint handling the chemfp_ project is used.
-
-.. _chemfp: http://chemfp.com/
-
------
-
-.. class:: warningmark
-
-**Hint**
-
-The plotting of the cluster image is sensible only with a small dataset.
-
------
-
-.. class:: infomark
-
-**Input**
-
-Molecular fingerprints in FPS format. Open Babel Fastsearch index is not supported.
-
-* Example::
-
- -  fingerprints in FPS format
-
- #FPS1
- #num_bits=881
- #type=CACTVS-E_SCREEN/1.0 extended=2
- #software=CACTVS/unknown
- #source=/home/mohammed/galaxy-central/database/files/000/dataset_423.dat
- #date=2012-02-09T13:20:37
- 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960cc0bed3248000580644626004101b4844805901b041c2e
- 19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000 55169009
- 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960cc0bed3248000580644626004101b4844805901b041c2e
- 19111e45039b8b2926105609401313e40800000000000100200000040080000010000002000000000000 55079807
- ........
-
- - Tanimoto threshold : 0.8 (between 0 and 1)
-
------
-
-.. class:: informark
-
-**Output**
-
-* Example::
-
- .. image:: $PATH_TO_IMAGES/NxN_clustering.png
-
------
-
-.. class:: infomark
-
-**Cite**
-
-The chemfp_ project from Andrew Dalke!
-
-.. _chemfp: http://chemfp.com/
-
-]]>
-    </help>
-
-</tool>
b
diff -r 43a9e7d9b24f -r 70b071de9bee chemfp_clustering/old/butina_clustering_old.py
--- a/chemfp_clustering/old/butina_clustering_old.py Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,97 +0,0 @@
-#!/usr/bin/env python
-"""
-    Modified version of code examples from the chemfp project.
-    http://code.google.com/p/chem-fingerprints/
-    Thanks to Andrew Dalke of Andrew Dalke Scientific!
-"""
-
-import chemfp
-import sys
-import os
-import tempfile
-
-temp_file = tempfile.NamedTemporaryFile()
-temp_link = "%s.%s" % (temp_file.name, 'fps')
-temp_file.close()
-os.system('ln -s %s %s' % (os.path.realpath(sys.argv[1]), temp_link) )
-
-
-chemfp_fingerprint_file = temp_link
-tanimoto_threshold = float(sys.argv[2])
-outfile = sys.argv[3]
-processors = int(sys.argv[4])
-
-
-def get_hit_indicies(hits):
-    return [id for (id, score) in hits]
-
-out = open(outfile, 'w')
-dataset = chemfp.load_fingerprints( chemfp_fingerprint_file )
-
-chemfp.set_num_threads( processors )
-search = dataset.threshold_tanimoto_search_arena(dataset, threshold = tanimoto_threshold)
-#search = chemfp.search.threshold_tanimoto_search_symmetric (dataset, threshold = tanimoto_threshold)
-
-# Reorder so the centroid with the most hits comes first.
-# (That's why I do a reverse search.)
-# Ignore the arbitrariness of breaking ties by fingerprint index
-results = sorted( (  (len(hits), i, hits) for (i, hits) in enumerate(search.iter_indices_and_scores())  ),reverse=True)
-
-
-# Determine the true/false singletons and the clusters
-true_singletons = []
-false_singletons = []
-clusters = []
-
-seen = set()
-
-for (size, fp_idx, hits) in results:
-    if fp_idx in seen:
-        # Can't use a centroid which is already assigned
-        continue
-    seen.add(fp_idx)
-    print size, fp_idx, hits
-    if size == 1:
-        # The only fingerprint in the exclusion sphere is itself
-        true_singletons.append(fp_idx)
-        continue
-
-    members = get_hit_indicies(hits)
-    # Figure out which ones haven't yet been assigned
-    unassigned = [target_idx for target_idx in members if target_idx not in seen]
-
-    if not unassigned:
-        false_singletons.append(fp_idx)
-        continue
-
-    # this is a new cluster
-    clusters.append( (fp_idx, unassigned) )
-    seen.update(unassigned)
-
-len_cluster = len(clusters)
-#out.write( "#%s true singletons: %s\n" % ( len(true_singletons), " ".join(sorted(dataset.ids[idx] for idx in true_singletons)) ) )
-#out.write( "#%s false singletons: %s\n" % ( len(false_singletons), " ".join(sorted(dataset.ids[idx] for idx in false_singletons)) ) )
-
-out.write( "#%s true singletons\n" % len(true_singletons) )
-out.write( "#%s false singletons\n" % len(false_singletons) )
-out.write( "#clusters: %s\n" % len_cluster )
-
-# Sort so the cluster with the most compounds comes first,
-# then by alphabetically smallest id
-def cluster_sort_key(cluster):
-    centroid_idx, members = cluster
-    return -len(members), dataset.ids[centroid_idx]
-
-clusters.sort(key=cluster_sort_key)
-
-
-for centroid_idx, members in clusters:
-    centroid_name = dataset.ids[centroid_idx]
-    out.write("%s\t%s\t%s\n" % (centroid_name, len(members), " ".join(sorted(dataset.ids[idx] for idx in members))))
-    #ToDo: len(members) need to be some biggest top 90% or something ...
-
-for idx in sorted(true_singletons):
-    out.write("%s\t%s\n" % (dataset.ids[idx], 0))
-
-out.close()
-os.remove( temp_link )
b
diff -r 43a9e7d9b24f -r 70b071de9bee chemfp_clustering/old/in.fps
--- a/chemfp_clustering/old/in.fps Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,8 +0,0 @@
-#FPS1
-#num_bits=1021
-#type=OpenBabel-FP2/1
-#software=OpenBabel/2.3.90
-#source=/media/data/web/galaxy-central/database/job_working_directory/065/65986/task_0/dataset_69208.dat
-#date=2013-04-12T10:40:57
-00220160a00006e20000e6060f0300e4841084982840200c014e002c0410040014011115004010279800311100043605210244501832c000a2a801162080000144000c302065020881082000c2040738a2088150018a210090800b088049500019b00c08006440811300500021010120104c801102081842100003010a024200 a
-00220160a00006e20000e6060f0300e4841084982840200c014e002c0410040014011115004010279800311100043605210244501832c000a2a801162080000144000c302065020881082000c2040738a2088150018a210090800b088049500019b00c08006440811300500021010120104c801102081842100003010a024200 b
b
diff -r 43a9e7d9b24f -r 70b071de9bee chemfp_clustering/old/new.txt
--- a/chemfp_clustering/old/new.txt Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,10315 +0,0 @@\n-#4831 true singletons\n-#7412 false singletons\n-#clusters: 5481\n-ZINC71602893\t661\tZINC04765100 ZINC05104846 ZINC04769792 ZINC04769794 ZINC04769797 ZINC04769800 ZINC05191386 ZINC04786883 ZINC04786885 ZINC04786888 ZINC04786890 ZINC04787909 ZINC01765469 ZINC05275407 ZINC05275425 ZINC05276235 ZINC05276256 ZINC02575335 ZINC05046463 ZINC01730614 ZINC05442645 ZINC02003567 ZINC05184936 ZINC05742758 ZINC05211501 ZINC05211510 ZINC05225291 ZINC02026704 ZINC05309659 ZINC05309665 ZINC05309998 ZINC05310000 ZINC01736727 ZINC02032622 ZINC02034633 ZINC05191648 ZINC02037188 ZINC01687049 ZINC02039429 ZINC02039430 ZINC12407621 ZINC06020484 ZINC15222822 ZINC15880088 ZINC06036257 ZINC16951501 ZINC02164095 ZINC17013835 ZINC17020625 ZINC02164129 ZINC02164131 ZINC17835665 ZINC18716182 ZINC08602617 ZINC08602618 ZINC18716185 ZINC02164168 ZINC02166571 ZINC12405013 ZINC02508039 ZINC02508043 ZINC02508044 ZINC02508087 ZINC02508096 ZINC02539331 ZINC13541814 ZINC13765172 ZINC13765175 ZINC13765178 ZINC14588590 ZINC14807062 ZINC14807075 ZINC03860614 ZINC03860615 ZINC16137972 ZINC16889870 ZINC16926751 ZINC04582342 ZINC04582343 ZINC04582344 ZINC05539679 ZINC34582485 ZINC05225259 ZINC05225340 ZINC05225465 ZINC05225470 ZINC05225476 ZINC05226159 ZINC05226163 ZINC05226167 ZINC05651120 ZINC04261946 ZINC04261947 ZINC22013264 ZINC22013268 ZINC22013273 ZINC22013279 ZINC31317854 ZINC31319331 ZINC31319335 ZINC31740234 ZINC31740239 ZINC15119988 ZINC32152665 ZINC32214132 ZINC43761699 ZINC43761700 ZINC43761701 ZINC43761702 ZINC01481755 ZINC34303159 ZINC34303160 ZINC34303161 ZINC34303162 ZINC45070295 ZINC02031628 ZINC02032404 ZINC34349782 ZINC02032619 ZINC34582222 ZINC71785898 ZINC71786843 ZINC01571260 ZINC01571261 ZINC43763722 ZINC45069745 ZINC45069748 ZINC45069752 ZINC34156324 ZINC01577370 ZINC01529247 ZINC01529451 ZINC01529452 ZINC70651338 ZINC70651339 ZINC70651340 ZINC01532765 ZINC01561953 ZINC01561960 ZINC01574321 ZINC01577208 ZINC71786720 ZINC01587683 ZINC01587688 ZINC02242645 ZINC00158103 ZINC00162386 ZINC01598495 ZINC01598496 ZINC01598497 ZINC01598498 ZINC01598518 ZINC01587614 ZINC00393753 ZINC00393754 ZINC00393755 ZINC00393756 ZINC01641048 ZINC16991119 ZINC00394761 ZINC00394806 ZINC00394807 ZINC00394808 ZINC02164097 ZINC01662386 ZINC00399241 ZINC02164128 ZINC01591808 ZINC01672871 ZINC01674839 ZINC01674842 ZINC00406932 ZINC00406933 ZINC02164132 ZINC00502094 ZINC00507608 ZINC01680238 ZINC02164158 ZINC18716183 ZINC01685148 ZINC18716184 ZINC01687047 ZINC01687048 ZINC00967533 ZINC01687050 ZINC02164167 ZINC00968099 ZINC00968100 ZINC00968101 ZINC00968128 ZINC01081099 ZINC01081323 ZINC01694726 ZINC01696964 ZINC01319163 ZINC01436122 ZINC01436123 ZINC01436124 ZINC01436125 ZINC01699906 ZINC01701838 ZINC01716732 ZINC01724736 ZINC01571243 ZINC01571245 ZINC02003566 ZINC02011663 ZINC02011664 ZINC01590023 ZINC02013559 ZINC02013560 ZINC02014257 ZINC02014258 ZINC02014867 ZINC02014868 ZINC02016526 ZINC02016527 ZINC02024203 ZINC02030897 ZINC01664354 ZINC01664389 ZINC02031663 ZINC02031664 ZINC02036681 ZINC02037295 ZINC02038302 ZINC02038947 ZINC02039356 ZINC01684585 ZINC01684714 ZINC02041115 ZINC02041265 ZINC02042933 ZINC02043763 ZINC02045088 ZINC02046918 ZINC02164102 ZINC02164138 ZINC01696943 ZINC02164171 ZINC01696963 ZINC02164174 ZINC02164175 ZINC02164177 ZINC02164178 ZINC02164180 ZINC02164183 ZINC02164185 ZINC02164186 ZINC02164188 ZINC02324820 ZINC01729357 ZINC01632734 ZINC02504418 ZINC01783052 ZINC02508035 ZINC02508046 ZINC02508063 ZINC02508071 ZINC02508078 ZINC01850974 ZINC02545280 ZINC02559331 ZINC02569878 ZINC02043148 ZINC02129241 ZINC02129243 ZINC03164162 ZINC03860308 ZINC02242637 ZINC03861506 ZINC02508249 ZINC04528636 ZINC04532208 ZINC04532209 ZINC04532210 ZINC02530724 ZINC04783008 ZINC02534413 ZINC02534432 ZINC02545294 ZINC02164169 ZINC02560409 ZINC05133736 ZINC02164172 ZINC02566228 ZINC02571329 ZINC05225162 ZINC05225172 ZINC05225175 ZINC05225381 ZINC05225388 ZINC05225399 ZINC05225403 ZINC05225491 ZINC05225496 ZINC05225500 ZINC05225504 ZINC03096510 ZINC058201'..b'ZINC37632182\t0\n-ZINC37631885\t0\n-ZINC37628972\t0\n-ZINC37628816\t0\n-ZINC34939445\t0\n-ZINC34939444\t0\n-ZINC34327972\t0\n-ZINC33362792\t0\n-ZINC33359474\t0\n-ZINC19387498\t0\n-ZINC19331291\t0\n-ZINC19331287\t0\n-ZINC16159295\t0\n-ZINC16159293\t0\n-ZINC16082659\t0\n-ZINC15771889\t0\n-ZINC14983445\t0\n-ZINC14628482\t0\n-ZINC12480368\t0\n-ZINC12153803\t0\n-ZINC06590260\t0\n-ZINC04721345\t0\n-ZINC04620596\t0\n-ZINC04352993\t0\n-ZINC04284434\t0\n-ZINC04284404\t0\n-ZINC03860745\t0\n-ZINC02585927\t0\n-ZINC02584614\t0\n-ZINC02575364\t0\n-ZINC02572120\t0\n-ZINC02563326\t0\n-ZINC02555213\t0\n-ZINC02170245\t0\n-ZINC02034508\t0\n-ZINC01997857\t0\n-ZINC01997856\t0\n-ZINC01682521\t0\n-ZINC01675291\t0\n-ZINC01657319\t0\n-ZINC01641030\t0\n-ZINC01608901\t0\n-ZINC01594670\t0\n-ZINC01586656\t0\n-ZINC01555332\t0\n-ZINC01439400\t0\n-ZINC00396158\t0\n-ZINC71621196\t0\n-ZINC66381592\t0\n-ZINC63148589\t0\n-ZINC59754680\t0\n-ZINC51074718\t0\n-ZINC51074717\t0\n-ZINC37632186\t0\n-ZINC37629044\t0\n-ZINC36533269\t0\n-ZINC34337607\t0\n-ZINC34337605\t0\n-ZINC34330539\t0\n-ZINC34330538\t0\n-ZINC31414746\t0\n-ZINC26896049\t0\n-ZINC26896046\t0\n-ZINC16138636\t0\n-ZINC16034387\t0\n-ZINC15777627\t0\n-ZINC15115331\t0\n-ZINC12416533\t0\n-ZINC05663502\t0\n-ZINC05576053\t0\n-ZINC05283247\t0\n-ZINC04051396\t0\n-ZINC03875370\t0\n-ZINC03612758\t0\n-ZINC02539354\t0\n-ZINC02390369\t0\n-ZINC02041062\t0\n-ZINC02034873\t0\n-ZINC02030980\t0\n-ZINC01733027\t0\n-ZINC01693132\t0\n-ZINC01684527\t0\n-ZINC01680393\t0\n-ZINC01674462\t0\n-ZINC01609947\t0\n-ZINC01603496\t0\n-ZINC01482803\t0\n-ZINC71617645\t0\n-ZINC71257177\t0\n-ZINC66351573\t0\n-ZINC66351572\t0\n-ZINC64370185\t0\n-ZINC63148423\t0\n-ZINC63146279\t0\n-ZINC53993715\t0\n-ZINC53993714\t0\n-ZINC39242270\t0\n-ZINC39242269\t0\n-ZINC37633669\t0\n-ZINC37633667\t0\n-ZINC37633133\t0\n-ZINC34539501\t0\n-ZINC33753907\t0\n-ZINC32182223\t0\n-ZINC30678251\t0\n-ZINC20475278\t0\n-ZINC19324731\t0\n-ZINC19319501\t0\n-ZINC19166968\t0\n-ZINC14807113\t0\n-ZINC14448389\t0\n-ZINC04984177\t0\n-ZINC04895939\t0\n-ZINC04804787\t0\n-ZINC04775232\t0\n-ZINC04706620\t0\n-ZINC04521315\t0\n-ZINC04261780\t0\n-ZINC03875935\t0\n-ZINC03861020\t0\n-ZINC03140714\t0\n-ZINC02584263\t0\n-ZINC02527961\t0\n-ZINC02168531\t0\n-ZINC01845619\t0\n-ZINC01682053\t0\n-ZINC01680814\t0\n-ZINC01611664\t0\n-ZINC01598156\t0\n-ZINC01586364\t0\n-ZINC01575687\t0\n-ZINC66381590\t0\n-ZINC66377722\t0\n-ZINC57217534\t0\n-ZINC39274309\t0\n-ZINC31938007\t0\n-ZINC06403302\t0\n-ZINC04744395\t0\n-ZINC04742364\t0\n-ZINC04722803\t0\n-ZINC03860254\t0\n-ZINC03070145\t0\n-ZINC01730558\t0\n-ZINC01690285\t0\n-ZINC01621716\t0\n-ZINC71614813\t0\n-ZINC66377720\t0\n-ZINC64370193\t0\n-ZINC39369735\t0\n-ZINC39221831\t0\n-ZINC36720865\t0\n-ZINC32163098\t0\n-ZINC19321044\t0\n-ZINC06734738\t0\n-ZINC05178397\t0\n-ZINC05167654\t0\n-ZINC05157937\t0\n-ZINC04963934\t0\n-ZINC04268917\t0\n-ZINC04097424\t0\n-ZINC03875757\t0\n-ZINC03875372\t0\n-ZINC03860600\t0\n-ZINC02040420\t0\n-ZINC02037807\t0\n-ZINC02037390\t0\n-ZINC02037305\t0\n-ZINC02015871\t0\n-ZINC02004049\t0\n-ZINC01845620\t0\n-ZINC01712063\t0\n-ZINC01702289\t0\n-ZINC01693334\t0\n-ZINC01693329\t0\n-ZINC01648252\t0\n-ZINC01627348\t0\n-ZINC00400095\t0\n-ZINC71647667\t0\n-ZINC57988933\t0\n-ZINC55345805\t0\n-ZINC39278760\t0\n-ZINC25695466\t0\n-ZINC12350099\t0\n-ZINC05650406\t0\n-ZINC05177750\t0\n-ZINC04742917\t0\n-ZINC04722893\t0\n-ZINC03861678\t0\n-ZINC03861677\t0\n-ZINC03860808\t0\n-ZINC02039983\t0\n-ZINC02034592\t0\n-ZINC01845913\t0\n-ZINC01734876\t0\n-ZINC01658753\t0\n-ZINC01648381\t0\n-ZINC01613078\t0\n-ZINC01601383\t0\n-ZINC01577259\t0\n-ZINC38343415\t0\n-ZINC34593547\t0\n-ZINC34115094\t0\n-ZINC33505753\t0\n-ZINC32163202\t0\n-ZINC32163200\t0\n-ZINC13410577\t0\n-ZINC06691140\t0\n-ZINC05158078\t0\n-ZINC05112484\t0\n-ZINC05019026\t0\n-ZINC04899414\t0\n-ZINC04712486\t0\n-ZINC03958467\t0\n-ZINC02036753\t0\n-ZINC01699944\t0\n-ZINC01690289\t0\n-ZINC01585463\t0\n-ZINC24718443\t0\n-ZINC19169386\t0\n-ZINC19167376\t0\n-ZINC05700932\t0\n-ZINC03957736\t0\n-ZINC02388228\t0\n-ZINC02168687\t0\n-ZINC01754610\t0\n-ZINC01677558\t0\n-ZINC01591817\t0\n-ZINC01491944\t0\n-ZINC01439067\t0\n-ZINC00967522\t0\n-ZINC71602724\t0\n-ZINC57989955\t0\n-ZINC40454334\t0\n-ZINC31667387\t0\n-ZINC31297995\t0\n-ZINC15440412\t0\n-ZINC05225679\t0\n-ZINC04658606\t0\n-ZINC01632445\t0\n-ZINC00967532\t0\n-ZINC19324737\t0\n-ZINC19230186\t0\n-ZINC08221057\t0\n-ZINC05224188\t0\n-ZINC01688358\t0\n-ZINC71769114\t0\n-ZINC60272425\t0\n-ZINC01846023\t0\n-ZINC71769112\t0\n-ZINC08214586\t0\n'
b
diff -r 43a9e7d9b24f -r 70b071de9bee chemfp_clustering/old/old.res
--- a/chemfp_clustering/old/old.res Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,5790 +0,0 @@\n-#1432 true singletons\n-#1581 false singletons\n-#clusters: 4355\n-ZINC71621761\t1604\tZINC00002687 ZINC00025200 ZINC00025201 ZINC00033574 ZINC00033576 ZINC00050696 ZINC00051186 ZINC00093403 ZINC00093407 ZINC00103395 ZINC00128696 ZINC00135212 ZINC00140739 ZINC00140746 ZINC00153929 ZINC00154430 ZINC00154431 ZINC00154439 ZINC00154441 ZINC00157589 ZINC00158261 ZINC00158393 ZINC00158439 ZINC00159317 ZINC00160268 ZINC00160270 ZINC00201438 ZINC00201439 ZINC00201442 ZINC00332029 ZINC00332032 ZINC00332774 ZINC00336705 ZINC00388102 ZINC00388107 ZINC00388243 ZINC00388912 ZINC00389642 ZINC00389643 ZINC00393998 ZINC00394259 ZINC00394260 ZINC00394261 ZINC00394917 ZINC00407037 ZINC00409877 ZINC00409878 ZINC00500705 ZINC00586785 ZINC00896223 ZINC01055289 ZINC01057001 ZINC01075984 ZINC01075986 ZINC01075987 ZINC01075990 ZINC01233190 ZINC01235041 ZINC01235043 ZINC01235045 ZINC01235046 ZINC01235047 ZINC01235048 ZINC01296023 ZINC01296024 ZINC01296026 ZINC01296039 ZINC01296064 ZINC01296065 ZINC01296077 ZINC01296165 ZINC01296167 ZINC01296263 ZINC01299913 ZINC01299914 ZINC01299915 ZINC01420527 ZINC01433213 ZINC01481910 ZINC01482094 ZINC01482137 ZINC01486612 ZINC01486613 ZINC01504039 ZINC01529080 ZINC01529277 ZINC01530366 ZINC01536989 ZINC01555363 ZINC01556128 ZINC01559667 ZINC01559720 ZINC01562012 ZINC01562014 ZINC01562159 ZINC01570850 ZINC01574316 ZINC01576547 ZINC01576864 ZINC01577159 ZINC01581015 ZINC01581722 ZINC01581723 ZINC01584638 ZINC01585343 ZINC01585344 ZINC01585345 ZINC01585350 ZINC01585353 ZINC01585537 ZINC01587671 ZINC01593180 ZINC01601226 ZINC01609647 ZINC01609650 ZINC01609652 ZINC01609656 ZINC01609756 ZINC01609966 ZINC01611324 ZINC01614479 ZINC01620991 ZINC01621724 ZINC01621741 ZINC01622139 ZINC01622140 ZINC01622141 ZINC01627037 ZINC01627101 ZINC01627245 ZINC01634320 ZINC01640007 ZINC01640941 ZINC01641394 ZINC01646213 ZINC01646411 ZINC01646412 ZINC01646417 ZINC01646418 ZINC01646652 ZINC01648180 ZINC01648181 ZINC01655612 ZINC01657466 ZINC01666118 ZINC01666121 ZINC01666746 ZINC01672866 ZINC01672868 ZINC01672869 ZINC01676195 ZINC01676196 ZINC01676197 ZINC01676198 ZINC01677095 ZINC01678748 ZINC01680076 ZINC01680839 ZINC01681220 ZINC01681221 ZINC01681222 ZINC01682062 ZINC01682364 ZINC01683616 ZINC01685639 ZINC01690112 ZINC01691308 ZINC01691716 ZINC01693264 ZINC01693265 ZINC01693266 ZINC01693288 ZINC01693289 ZINC01693365 ZINC01693374 ZINC01693404 ZINC01693790 ZINC01693922 ZINC01693923 ZINC01693935 ZINC01693939 ZINC01697445 ZINC01698140 ZINC01698146 ZINC01698151 ZINC01698152 ZINC01698304 ZINC01698610 ZINC01699081 ZINC01699660 ZINC01700488 ZINC01700500 ZINC01701707 ZINC01704927 ZINC01705504 ZINC01706211 ZINC01706731 ZINC01706732 ZINC01706733 ZINC01706734 ZINC01709997 ZINC01711517 ZINC01711575 ZINC01713244 ZINC01717647 ZINC01718834 ZINC01720072 ZINC01721844 ZINC01721845 ZINC01721846 ZINC01721847 ZINC01721941 ZINC01721942 ZINC01727082 ZINC01728235 ZINC01733260 ZINC01736410 ZINC01736777 ZINC01747064 ZINC01748855 ZINC01748864 ZINC01760818 ZINC01766324 ZINC01769286 ZINC01845714 ZINC01848438 ZINC01999085 ZINC02001666 ZINC02004000 ZINC02009340 ZINC02013388 ZINC02013389 ZINC02014674 ZINC02014828 ZINC02019373 ZINC02019562 ZINC02019563 ZINC02019687 ZINC02023663 ZINC02024055 ZINC02026895 ZINC02031501 ZINC02031579 ZINC02032759 ZINC02035003 ZINC02035126 ZINC02036278 ZINC02036712 ZINC02037813 ZINC02040895 ZINC02041084 ZINC02041846 ZINC02042499 ZINC02043705 ZINC02145025 ZINC02146751 ZINC02163878 ZINC02164545 ZINC02168757 ZINC02169075 ZINC02169383 ZINC02170256 ZINC02171744 ZINC02171746 ZINC02171748 ZINC02193161 ZINC02243193 ZINC02381215 ZINC02382431 ZINC02382432 ZINC02382434 ZINC02390167 ZINC02391957 ZINC02504715 ZINC02506576 ZINC02506577 ZINC02506773 ZINC02507088 ZINC02507950 ZINC02509146 ZINC02509233 ZINC02510141 ZINC02510505 ZINC02524905 ZINC02524906 ZINC02524907 ZINC02525333 ZINC02528070 ZINC02529769 ZINC02534017 ZINC02534288 ZINC02534452 ZINC02539250 ZINC02539359 ZINC02541773 ZINC02545051 ZINC02545183 ZINC02545398 ZINC02553963 ZINC025553'..b'ZINC02454447\t0\n-ZINC04272152\t0\n-ZINC05323064\t0\n-ZINC05482163\t0\n-ZINC05500597\t0\n-ZINC25950018\t0\n-ZINC37037703\t0\n-ZINC39057707\t0\n-ZINC39246041\t0\n-ZINC40164004\t0\n-ZINC59366355\t0\n-ZINC00126217\t0\n-ZINC00135670\t0\n-ZINC01705111\t0\n-ZINC04417004\t0\n-ZINC05964436\t0\n-ZINC13213074\t0\n-ZINC15042097\t0\n-ZINC15778418\t0\n-ZINC25924587\t0\n-ZINC35655573\t0\n-ZINC70461052\t0\n-ZINC00334260\t0\n-ZINC01850507\t0\n-ZINC02539766\t0\n-ZINC17005549\t0\n-ZINC32599880\t0\n-ZINC32915063\t0\n-ZINC34545636\t0\n-ZINC36047670\t0\n-ZINC53314439\t0\n-ZINC68577201\t0\n-ZINC06667833\t0\n-ZINC11802711\t0\n-ZINC19481036\t0\n-ZINC45331647\t0\n-ZINC01592536\t0\n-ZINC06452776\t0\n-ZINC20975313\t0\n-ZINC23670817\t0\n-ZINC33942624\t0\n-ZINC36382846\t0\n-ZINC38538387\t0\n-ZINC00087666\t0\n-ZINC00141689\t0\n-ZINC01052727\t0\n-ZINC02266593\t0\n-ZINC08729741\t0\n-ZINC18421965\t0\n-ZINC21993376\t0\n-ZINC24429398\t0\n-ZINC38700948\t0\n-ZINC39080406\t0\n-ZINC00331562\t0\n-ZINC12367568\t0\n-ZINC12530959\t0\n-ZINC19735685\t0\n-ZINC38583367\t0\n-ZINC39059338\t0\n-ZINC00165268\t0\n-ZINC01406334\t0\n-ZINC01607175\t0\n-ZINC03954328\t0\n-ZINC05522038\t0\n-ZINC12506829\t0\n-ZINC24730285\t0\n-ZINC00165059\t0\n-ZINC00170582\t0\n-ZINC00191688\t0\n-ZINC05427278\t0\n-ZINC06424096\t0\n-ZINC19801135\t0\n-ZINC71618323\t0\n-ZINC01052718\t0\n-ZINC01700846\t0\n-ZINC31777136\t0\n-ZINC34566820\t0\n-ZINC35270593\t0\n-ZINC40845396\t0\n-ZINC00152432\t0\n-ZINC00155922\t0\n-ZINC08667498\t0\n-ZINC12406232\t0\n-ZINC16846962\t0\n-ZINC20311077\t0\n-ZINC22010892\t0\n-ZINC39712197\t0\n-ZINC45893683\t0\n-ZINC49455550\t0\n-ZINC00163649\t0\n-ZINC01703705\t0\n-ZINC02149591\t0\n-ZINC02504676\t0\n-ZINC21995258\t0\n-ZINC24552078\t0\n-ZINC32599882\t0\n-ZINC04180505\t0\n-ZINC05427731\t0\n-ZINC06764016\t0\n-ZINC15781594\t0\n-ZINC31658722\t0\n-ZINC38292434\t0\n-ZINC42750300\t0\n-ZINC13284871\t0\n-ZINC40174031\t0\n-ZINC01734287\t0\n-ZINC02173450\t0\n-ZINC02577510\t0\n-ZINC03399145\t0\n-ZINC04234975\t0\n-ZINC05580386\t0\n-ZINC24532930\t0\n-ZINC71785329\t0\n-ZINC01617258\t0\n-ZINC01666514\t0\n-ZINC02029233\t0\n-ZINC05778321\t0\n-ZINC15544684\t0\n-ZINC26724637\t0\n-ZINC66347421\t0\n-ZINC04992560\t0\n-ZINC05567619\t0\n-ZINC26423850\t0\n-ZINC33906245\t0\n-ZINC62152155\t0\n-ZINC04360611\t0\n-ZINC12397068\t0\n-ZINC05282717\t0\n-ZINC05306779\t0\n-ZINC17173521\t0\n-ZINC17216448\t0\n-ZINC26898312\t0\n-ZINC36533596\t0\n-ZINC64032801\t0\n-ZINC00528478\t0\n-ZINC01601586\t0\n-ZINC06494942\t0\n-ZINC06691779\t0\n-ZINC07951901\t0\n-ZINC13805946\t0\n-ZINC21298025\t0\n-ZINC49478753\t0\n-ZINC49587279\t0\n-ZINC00390270\t0\n-ZINC00492735\t0\n-ZINC01616887\t0\n-ZINC19798253\t0\n-ZINC22004903\t0\n-ZINC26057270\t0\n-ZINC39224594\t0\n-ZINC01595963\t0\n-ZINC01736214\t0\n-ZINC05331463\t0\n-ZINC21996808\t0\n-ZINC45328778\t0\n-ZINC57218941\t0\n-ZINC69986382\t0\n-ZINC01666582\t0\n-ZINC36447927\t0\n-ZINC01665624\t0\n-ZINC16939872\t0\n-ZINC26894428\t0\n-ZINC39062193\t0\n-ZINC67173025\t0\n-ZINC00500527\t0\n-ZINC14985637\t0\n-ZINC01648606\t0\n-ZINC06472858\t0\n-ZINC20445365\t0\n-ZINC16124660\t0\n-ZINC33414686\t0\n-ZINC39386927\t0\n-ZINC01443271\t0\n-ZINC12396623\t0\n-ZINC34478730\t0\n-ZINC39038272\t0\n-ZINC67175713\t0\n-ZINC01614602\t0\n-ZINC17204105\t0\n-ZINC33378689\t0\n-ZINC39565220\t0\n-ZINC01401979\t0\n-ZINC64541683\t0\n-ZINC00241747\t0\n-ZINC01614601\t0\n-ZINC21990499\t0\n-ZINC24544114\t0\n-ZINC41128072\t0\n-ZINC63228997\t0\n-ZINC05462676\t0\n-ZINC71618739\t0\n-ZINC05604749\t0\n-ZINC26898871\t0\n-ZINC54954869\t0\n-ZINC14983158\t0\n-ZINC38531914\t0\n-ZINC39565221\t0\n-ZINC06378775\t0\n-ZINC39565222\t0\n-ZINC52608109\t0\n-ZINC05518327\t0\n-ZINC36473086\t0\n-ZINC15767207\t0\n-ZINC28001275\t0\n-ZINC28005310\t0\n-ZINC39050847\t0\n-ZINC06169744\t0\n-ZINC05517404\t0\n-ZINC06564386\t0\n-ZINC00049756\t0\n-ZINC01655490\t0\n-ZINC01872049\t0\n-ZINC26054553\t0\n-ZINC26897718\t0\n-ZINC40448253\t0\n-ZINC63506102\t0\n-ZINC71405364\t0\n-ZINC01613042\t0\n-ZINC21982657\t0\n-ZINC01598330\t0\n-ZINC16958770\t0\n-ZINC06244388\t0\n-ZINC39943253\t0\n-ZINC24190825\t0\n-ZINC38803854\t0\n-ZINC71611076\t0\n-ZINC12501389\t0\n-ZINC40148296\t0\n-ZINC71257224\t0\n-ZINC39712204\t0\n-ZINC38541361\t0\n-ZINC39706311\t0\n-ZINC65336223\t0\n-ZINC38644172\t0\n-ZINC63766449\t0\n-ZINC39711916\t0\n-ZINC39964231\t0\n-ZINC66339773\t0\n-ZINC03204866\t0\n-ZINC63932368\t0\n-ZINC38665416\t0\n-ZINC65356652\t0\n-ZINC12493517\t0\n-ZINC39048596\t0\n-ZINC21984044\t0\n-ZINC39937104\t0\n-ZINC39280057\t0\n-ZINC67175959\t0\n-ZINC38618062\t0\n-ZINC38600121\t0\n'
b
diff -r 43a9e7d9b24f -r 70b071de9bee chemfp_clustering/old/old.txt
--- a/chemfp_clustering/old/old.txt Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,13577 +0,0 @@\n-#6111 true singletons\n-#2168 false singletons\n-#clusters: 7463\n-ZINC71602893\t661\tZINC00158103 ZINC00162386 ZINC00388163 ZINC00393753 ZINC00393754 ZINC00393755 ZINC00393756 ZINC00394761 ZINC00394806 ZINC00394807 ZINC00394808 ZINC00399241 ZINC00404394 ZINC00404395 ZINC00404396 ZINC00406932 ZINC00406933 ZINC00502094 ZINC00507608 ZINC00967511 ZINC00967515 ZINC00967533 ZINC00968099 ZINC00968100 ZINC00968101 ZINC00968128 ZINC00968131 ZINC01057145 ZINC01057147 ZINC01081099 ZINC01081323 ZINC01271111 ZINC01295991 ZINC01297856 ZINC01319163 ZINC01319182 ZINC01420477 ZINC01436122 ZINC01436123 ZINC01436124 ZINC01436125 ZINC01481755 ZINC01482164 ZINC01529247 ZINC01529451 ZINC01529452 ZINC01532735 ZINC01532765 ZINC01555566 ZINC01561953 ZINC01561960 ZINC01562407 ZINC01571243 ZINC01571245 ZINC01571260 ZINC01571261 ZINC01571613 ZINC01574321 ZINC01577208 ZINC01577368 ZINC01577369 ZINC01577370 ZINC01577376 ZINC01586761 ZINC01587612 ZINC01587613 ZINC01587614 ZINC01587683 ZINC01587688 ZINC01589668 ZINC01589674 ZINC01589675 ZINC01589680 ZINC01590023 ZINC01591808 ZINC01597124 ZINC01597139 ZINC01598495 ZINC01598496 ZINC01598497 ZINC01598498 ZINC01598511 ZINC01598518 ZINC01609500 ZINC01609502 ZINC01609504 ZINC01609505 ZINC01609506 ZINC01609507 ZINC01613077 ZINC01615755 ZINC01615757 ZINC01615759 ZINC01622090 ZINC01627036 ZINC01627091 ZINC01632733 ZINC01632734 ZINC01632735 ZINC01632736 ZINC01632740 ZINC01632751 ZINC01640788 ZINC01640835 ZINC01640836 ZINC01640837 ZINC01640838 ZINC01641013 ZINC01641048 ZINC01648182 ZINC01653215 ZINC01656270 ZINC01662386 ZINC01662505 ZINC01662506 ZINC01664354 ZINC01664389 ZINC01666986 ZINC01669114 ZINC01672810 ZINC01672871 ZINC01672941 ZINC01672943 ZINC01672944 ZINC01674839 ZINC01674842 ZINC01676554 ZINC01677105 ZINC01679949 ZINC01680042 ZINC01680043 ZINC01680044 ZINC01680045 ZINC01680046 ZINC01680238 ZINC01680686 ZINC01680688 ZINC01680819 ZINC01680831 ZINC01682988 ZINC01684585 ZINC01684714 ZINC01685148 ZINC01687031 ZINC01687047 ZINC01687048 ZINC01687049 ZINC01687050 ZINC01690973 ZINC01690974 ZINC01690975 ZINC01690976 ZINC01690977 ZINC01691022 ZINC01691023 ZINC01691024 ZINC01691025 ZINC01691303 ZINC01693862 ZINC01694726 ZINC01696148 ZINC01696943 ZINC01696951 ZINC01696956 ZINC01696963 ZINC01696964 ZINC01696981 ZINC01699882 ZINC01699906 ZINC01700002 ZINC01700004 ZINC01701702 ZINC01701703 ZINC01701704 ZINC01701705 ZINC01701828 ZINC01701830 ZINC01701838 ZINC01702386 ZINC01706120 ZINC01712308 ZINC01712309 ZINC01712310 ZINC01716732 ZINC01722901 ZINC01724736 ZINC01729357 ZINC01730614 ZINC01733114 ZINC01736672 ZINC01736727 ZINC01758465 ZINC01758746 ZINC01758747 ZINC01758749 ZINC01758751 ZINC01758760 ZINC01765469 ZINC01769331 ZINC01783052 ZINC01841118 ZINC01845715 ZINC01845720 ZINC01850485 ZINC01850974 ZINC02000305 ZINC02003566 ZINC02003567 ZINC02011663 ZINC02011664 ZINC02013559 ZINC02013560 ZINC02014257 ZINC02014258 ZINC02014867 ZINC02014868 ZINC02016526 ZINC02016527 ZINC02024203 ZINC02026704 ZINC02030897 ZINC02031628 ZINC02031629 ZINC02031630 ZINC02031663 ZINC02031664 ZINC02032404 ZINC02032619 ZINC02032622 ZINC02034633 ZINC02036681 ZINC02037188 ZINC02037295 ZINC02038302 ZINC02038603 ZINC02038947 ZINC02039356 ZINC02039429 ZINC02039430 ZINC02039905 ZINC02040440 ZINC02041115 ZINC02041265 ZINC02042927 ZINC02042933 ZINC02043048 ZINC02043148 ZINC02043763 ZINC02045088 ZINC02045405 ZINC02046381 ZINC02046918 ZINC02129241 ZINC02129243 ZINC02145095 ZINC02145096 ZINC02145099 ZINC02145257 ZINC02145258 ZINC02145599 ZINC02145601 ZINC02145618 ZINC02162281 ZINC02162299 ZINC02162301 ZINC02164067 ZINC02164068 ZINC02164095 ZINC02164097 ZINC02164102 ZINC02164128 ZINC02164129 ZINC02164131 ZINC02164132 ZINC02164138 ZINC02164146 ZINC02164148 ZINC02164149 ZINC02164151 ZINC02164158 ZINC02164163 ZINC02164165 ZINC02164167 ZINC02164168 ZINC02164169 ZINC02164171 ZINC02164172 ZINC02164174 ZINC02164175 ZINC02164177 ZINC02164178 ZINC02164180 ZINC02164183 ZINC02164185 ZINC02164186 ZINC02164188 ZINC02166518 ZINC02166520 ZINC02166521 ZINC021665'..b'ZINC02383174\t0\n-ZINC05231820\t0\n-ZINC13284871\t0\n-ZINC17061891\t0\n-ZINC33419160\t0\n-ZINC33804383\t0\n-ZINC35269972\t0\n-ZINC40174031\t0\n-ZINC52627064\t0\n-ZINC01675069\t0\n-ZINC01734287\t0\n-ZINC02173450\t0\n-ZINC02577510\t0\n-ZINC03399145\t0\n-ZINC04234975\t0\n-ZINC05580386\t0\n-ZINC24532930\t0\n-ZINC39213118\t0\n-ZINC39326082\t0\n-ZINC71785329\t0\n-ZINC01617258\t0\n-ZINC01666514\t0\n-ZINC02029233\t0\n-ZINC02559724\t0\n-ZINC04697251\t0\n-ZINC04786960\t0\n-ZINC05778321\t0\n-ZINC06678070\t0\n-ZINC15544684\t0\n-ZINC16951413\t0\n-ZINC26724637\t0\n-ZINC30677778\t0\n-ZINC54956961\t0\n-ZINC66347421\t0\n-ZINC04992560\t0\n-ZINC05464544\t0\n-ZINC05567619\t0\n-ZINC05725942\t0\n-ZINC16124649\t0\n-ZINC26423850\t0\n-ZINC33906245\t0\n-ZINC36393676\t0\n-ZINC62152155\t0\n-ZINC00001555\t0\n-ZINC01235980\t0\n-ZINC04360611\t0\n-ZINC04748751\t0\n-ZINC05418159\t0\n-ZINC06504607\t0\n-ZINC06750293\t0\n-ZINC11919855\t0\n-ZINC12397068\t0\n-ZINC15021969\t0\n-ZINC03883058\t0\n-ZINC05282717\t0\n-ZINC05306779\t0\n-ZINC05420133\t0\n-ZINC17173521\t0\n-ZINC17216448\t0\n-ZINC26898312\t0\n-ZINC36533596\t0\n-ZINC64032801\t0\n-ZINC00528478\t0\n-ZINC01601586\t0\n-ZINC01708119\t0\n-ZINC04759937\t0\n-ZINC06494942\t0\n-ZINC06691779\t0\n-ZINC07951901\t0\n-ZINC12363450\t0\n-ZINC13805946\t0\n-ZINC21298025\t0\n-ZINC36378113\t0\n-ZINC42379509\t0\n-ZINC49478753\t0\n-ZINC49587279\t0\n-ZINC71186650\t0\n-ZINC00390270\t0\n-ZINC00492735\t0\n-ZINC01616887\t0\n-ZINC01756949\t0\n-ZINC01781195\t0\n-ZINC19798253\t0\n-ZINC22004903\t0\n-ZINC26057270\t0\n-ZINC39224594\t0\n-ZINC40385485\t0\n-ZINC00148116\t0\n-ZINC01595963\t0\n-ZINC01736214\t0\n-ZINC05331463\t0\n-ZINC12505966\t0\n-ZINC13281467\t0\n-ZINC21996808\t0\n-ZINC29753875\t0\n-ZINC45069296\t0\n-ZINC45328778\t0\n-ZINC57218941\t0\n-ZINC69986382\t0\n-ZINC01666582\t0\n-ZINC01727902\t0\n-ZINC03650937\t0\n-ZINC30678133\t0\n-ZINC36447927\t0\n-ZINC39136870\t0\n-ZINC39217225\t0\n-ZINC01665624\t0\n-ZINC16939872\t0\n-ZINC18532101\t0\n-ZINC26894428\t0\n-ZINC36473085\t0\n-ZINC39062193\t0\n-ZINC67173025\t0\n-ZINC00500527\t0\n-ZINC14985637\t0\n-ZINC01594777\t0\n-ZINC01648606\t0\n-ZINC01700208\t0\n-ZINC06472858\t0\n-ZINC20272872\t0\n-ZINC20445365\t0\n-ZINC20446544\t0\n-ZINC22014911\t0\n-ZINC24721853\t0\n-ZINC03013985\t0\n-ZINC16124660\t0\n-ZINC25924368\t0\n-ZINC33414686\t0\n-ZINC39062151\t0\n-ZINC39386927\t0\n-ZINC50329708\t0\n-ZINC01443271\t0\n-ZINC12396623\t0\n-ZINC34478730\t0\n-ZINC39038272\t0\n-ZINC52627062\t0\n-ZINC67175713\t0\n-ZINC67176050\t0\n-ZINC01614602\t0\n-ZINC12138157\t0\n-ZINC17204105\t0\n-ZINC24539714\t0\n-ZINC33378689\t0\n-ZINC39565220\t0\n-ZINC01401979\t0\n-ZINC04261784\t0\n-ZINC39712143\t0\n-ZINC64541683\t0\n-ZINC00241747\t0\n-ZINC01614601\t0\n-ZINC04428983\t0\n-ZINC13518519\t0\n-ZINC17020701\t0\n-ZINC21990499\t0\n-ZINC24544114\t0\n-ZINC38595221\t0\n-ZINC41128072\t0\n-ZINC63228997\t0\n-ZINC05462676\t0\n-ZINC38594993\t0\n-ZINC39698292\t0\n-ZINC66054468\t0\n-ZINC71618739\t0\n-ZINC05604749\t0\n-ZINC23583823\t0\n-ZINC26898871\t0\n-ZINC54930860\t0\n-ZINC54954869\t0\n-ZINC14983158\t0\n-ZINC38531914\t0\n-ZINC39565221\t0\n-ZINC40435253\t0\n-ZINC06378775\t0\n-ZINC39565222\t0\n-ZINC39732582\t0\n-ZINC52608109\t0\n-ZINC05518327\t0\n-ZINC20941280\t0\n-ZINC36473086\t0\n-ZINC38810696\t0\n-ZINC64032657\t0\n-ZINC01648910\t0\n-ZINC31776302\t0\n-ZINC52627059\t0\n-ZINC68576688\t0\n-ZINC11850542\t0\n-ZINC15767207\t0\n-ZINC28001275\t0\n-ZINC28005310\t0\n-ZINC39050847\t0\n-ZINC06169744\t0\n-ZINC36472944\t0\n-ZINC67175971\t0\n-ZINC05517404\t0\n-ZINC06564386\t0\n-ZINC16951267\t0\n-ZINC64032971\t0\n-ZINC66054197\t0\n-ZINC00049756\t0\n-ZINC01655490\t0\n-ZINC01872049\t0\n-ZINC15042492\t0\n-ZINC26054553\t0\n-ZINC26897718\t0\n-ZINC36474304\t0\n-ZINC40448253\t0\n-ZINC63506102\t0\n-ZINC71405364\t0\n-ZINC01613042\t0\n-ZINC21982657\t0\n-ZINC01598330\t0\n-ZINC16958770\t0\n-ZINC06244388\t0\n-ZINC39943253\t0\n-ZINC24190825\t0\n-ZINC38803854\t0\n-ZINC71611076\t0\n-ZINC12501389\t0\n-ZINC25982230\t0\n-ZINC39112865\t0\n-ZINC25979031\t0\n-ZINC40148296\t0\n-ZINC71257224\t0\n-ZINC08618967\t0\n-ZINC39712204\t0\n-ZINC38541361\t0\n-ZINC39706311\t0\n-ZINC65336223\t0\n-ZINC38644172\t0\n-ZINC63766449\t0\n-ZINC01563950\t0\n-ZINC39711916\t0\n-ZINC39964231\t0\n-ZINC66339773\t0\n-ZINC03204866\t0\n-ZINC63932368\t0\n-ZINC67172217\t0\n-ZINC38665416\t0\n-ZINC65356652\t0\n-ZINC38618253\t0\n-ZINC12493517\t0\n-ZINC39048596\t0\n-ZINC21984044\t0\n-ZINC39937104\t0\n-ZINC01563952\t0\n-ZINC39051079\t0\n-ZINC39280057\t0\n-ZINC67175959\t0\n-ZINC38618062\t0\n-ZINC38600121\t0\n'
b
diff -r 43a9e7d9b24f -r 70b071de9bee chemfp_clustering/old/oldm.res
--- a/chemfp_clustering/old/oldm.res Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,3952 +0,0 @@\n-#787 true singletons\n-#1827 false singletons\n-#clusters: 3162\n-ZINC71621761\t2105\tZINC00001592 ZINC00002687 ZINC00025200 ZINC00025201 ZINC00033574 ZINC00033576 ZINC00050696 ZINC00051186 ZINC00074846 ZINC00093403 ZINC00093407 ZINC00103395 ZINC00128696 ZINC00135212 ZINC00140739 ZINC00140746 ZINC00153929 ZINC00154430 ZINC00154431 ZINC00154439 ZINC00154441 ZINC00157259 ZINC00157541 ZINC00157589 ZINC00158261 ZINC00158393 ZINC00158421 ZINC00158439 ZINC00159317 ZINC00160268 ZINC00160270 ZINC00161414 ZINC00173567 ZINC00201438 ZINC00201439 ZINC00201442 ZINC00332029 ZINC00332032 ZINC00332774 ZINC00336705 ZINC00336723 ZINC00388102 ZINC00388107 ZINC00388243 ZINC00388246 ZINC00388248 ZINC00388340 ZINC00388341 ZINC00388342 ZINC00388343 ZINC00388344 ZINC00388346 ZINC00388699 ZINC00388910 ZINC00388912 ZINC00389642 ZINC00389643 ZINC00392996 ZINC00393134 ZINC00393639 ZINC00393998 ZINC00394259 ZINC00394260 ZINC00394261 ZINC00394917 ZINC00399457 ZINC00399458 ZINC00399459 ZINC00399460 ZINC00402074 ZINC00407019 ZINC00407037 ZINC00409877 ZINC00409878 ZINC00500705 ZINC00507886 ZINC00507888 ZINC00586785 ZINC00896223 ZINC00967784 ZINC01055289 ZINC01057001 ZINC01075984 ZINC01075986 ZINC01075987 ZINC01075990 ZINC01233190 ZINC01235041 ZINC01235043 ZINC01235045 ZINC01235046 ZINC01235047 ZINC01235048 ZINC01296023 ZINC01296024 ZINC01296026 ZINC01296039 ZINC01296064 ZINC01296065 ZINC01296077 ZINC01296165 ZINC01296167 ZINC01296208 ZINC01296263 ZINC01299913 ZINC01299914 ZINC01299915 ZINC01420527 ZINC01420540 ZINC01433213 ZINC01456875 ZINC01481910 ZINC01482094 ZINC01482137 ZINC01486612 ZINC01486613 ZINC01504039 ZINC01529080 ZINC01529277 ZINC01530365 ZINC01530366 ZINC01536989 ZINC01555363 ZINC01556128 ZINC01556391 ZINC01559667 ZINC01559720 ZINC01562012 ZINC01562014 ZINC01562159 ZINC01570850 ZINC01574316 ZINC01576547 ZINC01576864 ZINC01577159 ZINC01577754 ZINC01577755 ZINC01577756 ZINC01580145 ZINC01580298 ZINC01580299 ZINC01580300 ZINC01580301 ZINC01581015 ZINC01581722 ZINC01581723 ZINC01584638 ZINC01585343 ZINC01585344 ZINC01585345 ZINC01585350 ZINC01585353 ZINC01585537 ZINC01587671 ZINC01593180 ZINC01597739 ZINC01601226 ZINC01609647 ZINC01609650 ZINC01609652 ZINC01609656 ZINC01609756 ZINC01609966 ZINC01611324 ZINC01614479 ZINC01620991 ZINC01621724 ZINC01621741 ZINC01622139 ZINC01622140 ZINC01622141 ZINC01627037 ZINC01627101 ZINC01627245 ZINC01631220 ZINC01634320 ZINC01640007 ZINC01640921 ZINC01640941 ZINC01641394 ZINC01646213 ZINC01646411 ZINC01646412 ZINC01646417 ZINC01646418 ZINC01646652 ZINC01648180 ZINC01648181 ZINC01653153 ZINC01655612 ZINC01657466 ZINC01666118 ZINC01666121 ZINC01666746 ZINC01672866 ZINC01672868 ZINC01672869 ZINC01676195 ZINC01676196 ZINC01676197 ZINC01676198 ZINC01677095 ZINC01677414 ZINC01678748 ZINC01680076 ZINC01680839 ZINC01681220 ZINC01681221 ZINC01681222 ZINC01682062 ZINC01682364 ZINC01682443 ZINC01683616 ZINC01685639 ZINC01686739 ZINC01687368 ZINC01690112 ZINC01690416 ZINC01691308 ZINC01691716 ZINC01691724 ZINC01692996 ZINC01692997 ZINC01692998 ZINC01692999 ZINC01693264 ZINC01693265 ZINC01693266 ZINC01693288 ZINC01693289 ZINC01693365 ZINC01693374 ZINC01693400 ZINC01693404 ZINC01693790 ZINC01693920 ZINC01693922 ZINC01693923 ZINC01693935 ZINC01693939 ZINC01697445 ZINC01697751 ZINC01698140 ZINC01698146 ZINC01698151 ZINC01698152 ZINC01698304 ZINC01698610 ZINC01698748 ZINC01699081 ZINC01699660 ZINC01700296 ZINC01700488 ZINC01700500 ZINC01701707 ZINC01702522 ZINC01704927 ZINC01705504 ZINC01706211 ZINC01706731 ZINC01706732 ZINC01706733 ZINC01706734 ZINC01708361 ZINC01709997 ZINC01711517 ZINC01711575 ZINC01713244 ZINC01716818 ZINC01716840 ZINC01717259 ZINC01717647 ZINC01718834 ZINC01720072 ZINC01721844 ZINC01721845 ZINC01721846 ZINC01721847 ZINC01721941 ZINC01721942 ZINC01727082 ZINC01728235 ZINC01729030 ZINC01732507 ZINC01733260 ZINC01734094 ZINC01736410 ZINC01736777 ZINC01744525 ZINC01747064 ZINC01748855 ZINC01748864 ZINC01760647 ZINC01760818 ZINC01766324 ZINC01769286 ZINC01845714 ZINC01848438 ZINC01854990 ZINC0199908'..b'ZINC13560683\t0\n-ZINC15767202\t0\n-ZINC16125028\t0\n-ZINC19802594\t0\n-ZINC33347253\t0\n-ZINC38227842\t0\n-ZINC39565322\t0\n-ZINC40757072\t0\n-ZINC55167611\t0\n-ZINC55167616\t0\n-ZINC04476324\t0\n-ZINC12364192\t0\n-ZINC22003624\t0\n-ZINC22003628\t0\n-ZINC22130346\t0\n-ZINC37382703\t0\n-ZINC55045520\t0\n-ZINC00025100\t0\n-ZINC00493973\t0\n-ZINC03785742\t0\n-ZINC04692648\t0\n-ZINC25194012\t0\n-ZINC25194020\t0\n-ZINC35569149\t0\n-ZINC39570046\t0\n-ZINC40148112\t0\n-ZINC40506013\t0\n-ZINC40506016\t0\n-ZINC66362206\t0\n-ZINC00334382\t0\n-ZINC02046049\t0\n-ZINC05377586\t0\n-ZINC05377590\t0\n-ZINC34016761\t0\n-ZINC67172163\t0\n-ZINC67172164\t0\n-ZINC00107550\t0\n-ZINC00507988\t0\n-ZINC01397496\t0\n-ZINC01436106\t0\n-ZINC05799585\t0\n-ZINC13598177\t0\n-ZINC39191743\t0\n-ZINC39191745\t0\n-ZINC50909683\t0\n-ZINC00153021\t0\n-ZINC25463280\t0\n-ZINC35269729\t0\n-ZINC00165711\t0\n-ZINC13516868\t0\n-ZINC17431007\t0\n-ZINC20216425\t0\n-ZINC20829009\t0\n-ZINC12341163\t0\n-ZINC13560682\t0\n-ZINC39289435\t0\n-ZINC00253679\t0\n-ZINC00337472\t0\n-ZINC16983356\t0\n-ZINC38234614\t0\n-ZINC63767576\t0\n-ZINC01744350\t0\n-ZINC13402109\t0\n-ZINC01580007\t0\n-ZINC05642224\t0\n-ZINC00391038\t0\n-ZINC00870181\t0\n-ZINC00870182\t0\n-ZINC05964436\t0\n-ZINC13213074\t0\n-ZINC63767579\t0\n-ZINC70461052\t0\n-ZINC26892863\t0\n-ZINC26892868\t0\n-ZINC32915063\t0\n-ZINC32918853\t0\n-ZINC39224728\t0\n-ZINC68571807\t0\n-ZINC68571809\t0\n-ZINC01618618\t0\n-ZINC71257192\t0\n-ZINC71257193\t0\n-ZINC71618955\t0\n-ZINC71618956\t0\n-ZINC05800308\t0\n-ZINC71785293\t0\n-ZINC71785294\t0\n-ZINC02266593\t0\n-ZINC04175772\t0\n-ZINC22002154\t0\n-ZINC22002159\t0\n-ZINC24429398\t0\n-ZINC12530959\t0\n-ZINC05800410\t0\n-ZINC42040502\t0\n-ZINC42040505\t0\n-ZINC06424096\t0\n-ZINC13763633\t0\n-ZINC13763636\t0\n-ZINC65335373\t0\n-ZINC65335375\t0\n-ZINC01416783\t0\n-ZINC01694736\t0\n-ZINC01694737\t0\n-ZINC32181645\t0\n-ZINC32181647\t0\n-ZINC39950109\t0\n-ZINC05416298\t0\n-ZINC13125180\t0\n-ZINC01606359\t0\n-ZINC01674121\t0\n-ZINC01674122\t0\n-ZINC01703705\t0\n-ZINC24552078\t0\n-ZINC39566237\t0\n-ZINC39566238\t0\n-ZINC59360192\t0\n-ZINC71259679\t0\n-ZINC02383174\t0\n-ZINC17061891\t0\n-ZINC37625847\t0\n-ZINC37625848\t0\n-ZINC02285757\t0\n-ZINC06130715\t0\n-ZINC02029233\t0\n-ZINC39978019\t0\n-ZINC39978020\t0\n-ZINC52514012\t0\n-ZINC52514013\t0\n-ZINC01190625\t0\n-ZINC01190626\t0\n-ZINC05726443\t0\n-ZINC36393676\t0\n-ZINC00001555\t0\n-ZINC06750293\t0\n-ZINC38312150\t0\n-ZINC36294308\t0\n-ZINC39337374\t0\n-ZINC39337375\t0\n-ZINC00528478\t0\n-ZINC01410075\t0\n-ZINC03874615\t0\n-ZINC03874616\t0\n-ZINC25173253\t0\n-ZINC42379509\t0\n-ZINC01756949\t0\n-ZINC00148116\t0\n-ZINC13281467\t0\n-ZINC41167167\t0\n-ZINC41167169\t0\n-ZINC45069296\t0\n-ZINC63763762\t0\n-ZINC63763763\t0\n-ZINC39136870\t0\n-ZINC25949941\t0\n-ZINC39256850\t0\n-ZINC00500527\t0\n-ZINC20272872\t0\n-ZINC40148443\t0\n-ZINC40148444\t0\n-ZINC45067986\t0\n-ZINC45067989\t0\n-ZINC03013985\t0\n-ZINC04995369\t0\n-ZINC05767777\t0\n-ZINC39188342\t0\n-ZINC39188343\t0\n-ZINC39211839\t0\n-ZINC39211840\t0\n-ZINC40434630\t0\n-ZINC34478730\t0\n-ZINC48095370\t0\n-ZINC48095371\t0\n-ZINC17204105\t0\n-ZINC66354610\t0\n-ZINC04204274\t0\n-ZINC24718515\t0\n-ZINC24718517\t0\n-ZINC39712143\t0\n-ZINC39566974\t0\n-ZINC39566975\t0\n-ZINC39698292\t0\n-ZINC63785028\t0\n-ZINC63785030\t0\n-ZINC39732582\t0\n-ZINC63514826\t0\n-ZINC63514827\t0\n-ZINC38810696\t0\n-ZINC01648910\t0\n-ZINC12504135\t0\n-ZINC38545492\t0\n-ZINC38545493\t0\n-ZINC01391572\t0\n-ZINC28001275\t0\n-ZINC28005310\t0\n-ZINC36472944\t0\n-ZINC39246331\t0\n-ZINC39246333\t0\n-ZINC43214656\t0\n-ZINC43214657\t0\n-ZINC67175971\t0\n-ZINC14984294\t0\n-ZINC39223588\t0\n-ZINC39223590\t0\n-ZINC71251042\t0\n-ZINC71251047\t0\n-ZINC05444916\t0\n-ZINC05444917\t0\n-ZINC36474304\t0\n-ZINC18187616\t0\n-ZINC44136841\t0\n-ZINC44136843\t0\n-ZINC13516897\t0\n-ZINC18102838\t0\n-ZINC64033587\t0\n-ZINC03165666\t0\n-ZINC39964232\t0\n-ZINC39964233\t0\n-ZINC39978691\t0\n-ZINC39978692\t0\n-ZINC11850541\t0\n-ZINC63763770\t0\n-ZINC63763771\t0\n-ZINC00397791\t0\n-ZINC12341552\t0\n-ZINC25982230\t0\n-ZINC39764356\t0\n-ZINC39764357\t0\n-ZINC25979031\t0\n-ZINC71613556\t0\n-ZINC71613557\t0\n-ZINC39030248\t0\n-ZINC39030249\t0\n-ZINC39062189\t0\n-ZINC39062191\t0\n-ZINC39712195\t0\n-ZINC39712196\t0\n-ZINC01563950\t0\n-ZINC64032659\t0\n-ZINC64032660\t0\n-ZINC67172217\t0\n-ZINC39978671\t0\n-ZINC39978672\t0\n-ZINC06816747\t0\n-ZINC12493517\t0\n-ZINC39052135\t0\n-ZINC01563952\t0\n-ZINC39293947\t0\n-ZINC39293948\t0\n-ZINC36378581\t0\n'
b
diff -r 43a9e7d9b24f -r 70b071de9bee chemfp_clustering/old/out.txt
--- a/chemfp_clustering/old/out.txt Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,5 +0,0 @@
-#1 true singletons
-#0 false singletons
-#clusters: 1
-b 1 a
-c 0
b
diff -r 43a9e7d9b24f -r 70b071de9bee chemfp_clustering/old/u_new.txt
--- a/chemfp_clustering/old/u_new.txt Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,10309 +0,0 @@\n-#4830 true singletons\n-#7419 false singletons\n-#clusters: 5476\n-ZINC71602893\t661\tZINC04765100 ZINC05104846 ZINC04769792 ZINC04769794 ZINC04769797 ZINC04769800 ZINC05191386 ZINC04786883 ZINC04786885 ZINC04786888 ZINC04786890 ZINC04787909 ZINC01765469 ZINC05275407 ZINC05275425 ZINC05276235 ZINC05276256 ZINC02575335 ZINC05046463 ZINC01730614 ZINC05442645 ZINC02003567 ZINC05184936 ZINC05742758 ZINC05211501 ZINC05211510 ZINC05225291 ZINC02026704 ZINC05309659 ZINC05309665 ZINC05309998 ZINC05310000 ZINC01736727 ZINC02032622 ZINC02034633 ZINC05191648 ZINC02037188 ZINC01687049 ZINC02039429 ZINC02039430 ZINC12407621 ZINC06020484 ZINC15222822 ZINC15880088 ZINC06036257 ZINC16951501 ZINC02164095 ZINC17013835 ZINC17020625 ZINC02164129 ZINC02164131 ZINC17835665 ZINC18716182 ZINC08602617 ZINC08602618 ZINC18716185 ZINC02164168 ZINC02166571 ZINC12405013 ZINC02508039 ZINC02508043 ZINC02508044 ZINC02508087 ZINC02508096 ZINC02539331 ZINC13541814 ZINC13765172 ZINC13765175 ZINC13765178 ZINC14588590 ZINC14807062 ZINC14807075 ZINC03860614 ZINC03860615 ZINC16137972 ZINC16889870 ZINC16926751 ZINC04582342 ZINC04582343 ZINC04582344 ZINC05539679 ZINC34582485 ZINC05225259 ZINC05225340 ZINC05225465 ZINC05225470 ZINC05225476 ZINC05226159 ZINC05226163 ZINC05226167 ZINC05651120 ZINC04261946 ZINC04261947 ZINC22013264 ZINC22013268 ZINC22013273 ZINC22013279 ZINC31317854 ZINC31319331 ZINC31319335 ZINC31740234 ZINC31740239 ZINC15119988 ZINC32152665 ZINC32214132 ZINC43761699 ZINC43761700 ZINC43761701 ZINC43761702 ZINC01481755 ZINC34303159 ZINC34303160 ZINC34303161 ZINC34303162 ZINC45070295 ZINC02031628 ZINC02032404 ZINC34349782 ZINC02032619 ZINC34582222 ZINC71785898 ZINC71786843 ZINC01571260 ZINC01571261 ZINC43763722 ZINC45069745 ZINC45069748 ZINC45069752 ZINC34156324 ZINC01577370 ZINC01529247 ZINC01529451 ZINC01529452 ZINC70651338 ZINC70651339 ZINC70651340 ZINC01532765 ZINC01561953 ZINC01561960 ZINC01574321 ZINC01577208 ZINC71786720 ZINC01587683 ZINC01587688 ZINC02242645 ZINC00158103 ZINC00162386 ZINC01598495 ZINC01598496 ZINC01598497 ZINC01598498 ZINC01598518 ZINC01587614 ZINC00393753 ZINC00393754 ZINC00393755 ZINC00393756 ZINC01641048 ZINC16991119 ZINC00394761 ZINC00394806 ZINC00394807 ZINC00394808 ZINC02164097 ZINC01662386 ZINC00399241 ZINC02164128 ZINC01591808 ZINC01672871 ZINC01674839 ZINC01674842 ZINC00406932 ZINC00406933 ZINC02164132 ZINC00502094 ZINC00507608 ZINC01680238 ZINC02164158 ZINC18716183 ZINC01685148 ZINC18716184 ZINC01687047 ZINC01687048 ZINC00967533 ZINC01687050 ZINC02164167 ZINC00968099 ZINC00968100 ZINC00968101 ZINC00968128 ZINC01081099 ZINC01081323 ZINC01694726 ZINC01696964 ZINC01319163 ZINC01436122 ZINC01436123 ZINC01436124 ZINC01436125 ZINC01699906 ZINC01701838 ZINC01716732 ZINC01724736 ZINC01571243 ZINC01571245 ZINC02003566 ZINC02011663 ZINC02011664 ZINC01590023 ZINC02013559 ZINC02013560 ZINC02014257 ZINC02014258 ZINC02014867 ZINC02014868 ZINC02016526 ZINC02016527 ZINC02024203 ZINC02030897 ZINC01664354 ZINC01664389 ZINC02031663 ZINC02031664 ZINC02036681 ZINC02037295 ZINC02038302 ZINC02038947 ZINC02039356 ZINC01684585 ZINC01684714 ZINC02041115 ZINC02041265 ZINC02042933 ZINC02043763 ZINC02045088 ZINC02046918 ZINC02164102 ZINC02164138 ZINC01696943 ZINC02164171 ZINC01696963 ZINC02164174 ZINC02164175 ZINC02164177 ZINC02164178 ZINC02164180 ZINC02164183 ZINC02164185 ZINC02164186 ZINC02164188 ZINC02324820 ZINC01729357 ZINC01632734 ZINC02504418 ZINC01783052 ZINC02508035 ZINC02508046 ZINC02508063 ZINC02508071 ZINC02508078 ZINC01850974 ZINC02545280 ZINC02559331 ZINC02569878 ZINC02043148 ZINC02129241 ZINC02129243 ZINC03164162 ZINC03860308 ZINC02242637 ZINC03861506 ZINC02508249 ZINC04528636 ZINC04532208 ZINC04532209 ZINC04532210 ZINC02530724 ZINC04783008 ZINC02534413 ZINC02534432 ZINC02545294 ZINC02164169 ZINC02560409 ZINC05133736 ZINC02164172 ZINC02566228 ZINC02571329 ZINC05225162 ZINC05225172 ZINC05225175 ZINC05225381 ZINC05225388 ZINC05225399 ZINC05225403 ZINC05225491 ZINC05225496 ZINC05225500 ZINC05225504 ZINC03096510 ZINC058201'..b'ZINC36419582\t0\n-ZINC36419581\t0\n-ZINC35569014\t0\n-ZINC33359467\t0\n-ZINC33359466\t0\n-ZINC32915144\t0\n-ZINC32626813\t0\n-ZINC32156202\t0\n-ZINC00400095\t0\n-ZINC31613973\t0\n-ZINC28628771\t0\n-ZINC26898079\t0\n-ZINC26898076\t0\n-ZINC25623654\t0\n-ZINC22056810\t0\n-ZINC19949184\t0\n-ZINC19909645\t0\n-ZINC16124713\t0\n-ZINC14684052\t0\n-ZINC13488083\t0\n-ZINC71647667\t0\n-ZINC12405149\t0\n-ZINC08698275\t0\n-ZINC08294972\t0\n-ZINC08294972\t0\n-ZINC06816635\t0\n-ZINC06491845\t0\n-ZINC06090867\t0\n-ZINC05810854\t0\n-ZINC05783309\t0\n-ZINC05380238\t0\n-ZINC05379471\t0\n-ZINC04896115\t0\n-ZINC04802660\t0\n-ZINC04776448\t0\n-ZINC04771902\t0\n-ZINC57988933\t0\n-ZINC04530237\t0\n-ZINC04501380\t0\n-ZINC04276948\t0\n-ZINC03886878\t0\n-ZINC03886877\t0\n-ZINC55345805\t0\n-ZINC02585418\t0\n-ZINC02572153\t0\n-ZINC02555299\t0\n-ZINC02525243\t0\n-ZINC39278760\t0\n-ZINC02168360\t0\n-ZINC02168358\t0\n-ZINC02037793\t0\n-ZINC02037792\t0\n-ZINC02037586\t0\n-ZINC02034789\t0\n-ZINC02015855\t0\n-ZINC02015854\t0\n-ZINC01995111\t0\n-ZINC01870441\t0\n-ZINC01814067\t0\n-ZINC01736719\t0\n-ZINC01724764\t0\n-ZINC01708305\t0\n-ZINC01693339\t0\n-ZINC01679740\t0\n-ZINC01628846\t0\n-ZINC01435792\t0\n-ZINC01387005\t0\n-ZINC00967324\t0\n-ZINC00409357\t0\n-ZINC00404010\t0\n-ZINC00393601\t0\n-ZINC00393600\t0\n-ZINC00388603\t0\n-ZINC25695466\t0\n-ZINC66377680\t0\n-ZINC66345673\t0\n-ZINC59244808\t0\n-ZINC40448569\t0\n-ZINC39342816\t0\n-ZINC39342815\t0\n-ZINC39095069\t0\n-ZINC37935192\t0\n-ZINC37632172\t0\n-ZINC37631091\t0\n-ZINC37631089\t0\n-ZINC34926230\t0\n-ZINC32162209\t0\n-ZINC32162207\t0\n-ZINC31318812\t0\n-ZINC31318809\t0\n-ZINC26950508\t0\n-ZINC26895198\t0\n-ZINC26895195\t0\n-ZINC22211523\t0\n-ZINC22143666\t0\n-ZINC22004600\t0\n-ZINC22004594\t0\n-ZINC00967532\t0\n-ZINC19426011\t0\n-ZINC19321006\t0\n-ZINC17014696\t0\n-ZINC16158775\t0\n-ZINC13541480\t0\n-ZINC12350099\t0\n-ZINC05997859\t0\n-ZINC05997614\t0\n-ZINC05493298\t0\n-ZINC05481663\t0\n-ZINC05439988\t0\n-ZINC05380440\t0\n-ZINC05379646\t0\n-ZINC05377860\t0\n-ZINC05260521\t0\n-ZINC04682861\t0\n-ZINC04501359\t0\n-ZINC04118774\t0\n-ZINC03882602\t0\n-ZINC05650406\t0\n-ZINC03075416\t0\n-ZINC03075415\t0\n-ZINC02570855\t0\n-ZINC02562433\t0\n-ZINC02510647\t0\n-ZINC02040627\t0\n-ZINC02040626\t0\n-ZINC02034061\t0\n-ZINC05177750\t0\n-ZINC01704500\t0\n-ZINC01693399\t0\n-ZINC01692445\t0\n-ZINC01684358\t0\n-ZINC01677538\t0\n-ZINC01666987\t0\n-ZINC01628237\t0\n-ZINC01595132\t0\n-ZINC01593931\t0\n-ZINC01577237\t0\n-ZINC01574399\t0\n-ZINC01572084\t0\n-ZINC04742917\t0\n-ZINC01235150\t0\n-ZINC04722893\t0\n-ZINC00967322\t0\n-ZINC00397809\t0\n-ZINC00396003\t0\n-ZINC00396002\t0\n-ZINC00163535\t0\n-ZINC00163531\t0\n-ZINC00155424\t0\n-ZINC03861678\t0\n-ZINC71606607\t0\n-ZINC71606604\t0\n-ZINC03861677\t0\n-ZINC68577577\t0\n-ZINC03860808\t0\n-ZINC66381569\t0\n-ZINC44124059\t0\n-ZINC42390337\t0\n-ZINC39326059\t0\n-ZINC37628380\t0\n-ZINC36533214\t0\n-ZINC36384628\t0\n-ZINC36384627\t0\n-ZINC34239535\t0\n-ZINC33374159\t0\n-ZINC32600272\t0\n-ZINC32228391\t0\n-ZINC22061540\t0\n-ZINC22013353\t0\n-ZINC22009776\t0\n-ZINC22009771\t0\n-ZINC19423344\t0\n-ZINC19090218\t0\n-ZINC19090215\t0\n-ZINC16083110\t0\n-ZINC16083109\t0\n-ZINC16082792\t0\n-ZINC15045247\t0\n-ZINC13513507\t0\n-ZINC13404423\t0\n-ZINC12417434\t0\n-ZINC12398213\t0\n-ZINC01846023\t0\n-ZINC08100986\t0\n-ZINC06157404\t0\n-ZINC06090864\t0\n-ZINC05308095\t0\n-ZINC05286604\t0\n-ZINC05193890\t0\n-ZINC02039983\t0\n-ZINC04280916\t0\n-ZINC19324737\t0\n-ZINC03099484\t0\n-ZINC03099483\t0\n-ZINC02530944\t0\n-ZINC02034592\t0\n-ZINC02512338\t0\n-ZINC02383335\t0\n-ZINC02170597\t0\n-ZINC02170595\t0\n-ZINC19230186\t0\n-ZINC02163516\t0\n-ZINC02140860\t0\n-ZINC02034399\t0\n-ZINC01846017\t0\n-ZINC01733026\t0\n-ZINC01682600\t0\n-ZINC01676370\t0\n-ZINC01673301\t0\n-ZINC01669338\t0\n-ZINC01662472\t0\n-ZINC01628264\t0\n-ZINC01580392\t0\n-ZINC01555606\t0\n-ZINC01529480\t0\n-ZINC01529479\t0\n-ZINC01845913\t0\n-ZINC00333845\t0\n-ZINC00159189\t0\n-ZINC01734876\t0\n-ZINC68576821\t0\n-ZINC68576819\t0\n-ZINC66340177\t0\n-ZINC66340171\t0\n-ZINC47476061\t0\n-ZINC44564099\t0\n-ZINC39326057\t0\n-ZINC38948801\t0\n-ZINC08221057\t0\n-ZINC37628991\t0\n-ZINC37628989\t0\n-ZINC37628824\t0\n-ZINC37169896\t0\n-ZINC37169895\t0\n-ZINC34428922\t0\n-ZINC33986378\t0\n-ZINC33373265\t0\n-ZINC33364639\t0\n-ZINC32629181\t0\n-ZINC32629179\t0\n-ZINC32600868\t0\n-ZINC31308455\t0\n-ZINC22002836\t0\n-ZINC21997893\t0\n-ZINC21997889\t0\n-ZINC21989051\t0\n-ZINC21303952\t0\n-ZINC19851381\t0\n-ZINC19364945\t0\n-ZINC08214586\t0\n'
b
diff -r 43a9e7d9b24f -r 70b071de9bee chemfp_clustering/old/u_old.txt
--- a/chemfp_clustering/old/u_old.txt Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,10315 +0,0 @@\n-#4831 true singletons\n-#7412 false singletons\n-#clusters: 5481\n-ZINC71602893\t661\tZINC04765100 ZINC05104846 ZINC04769792 ZINC04769794 ZINC04769797 ZINC04769800 ZINC05191386 ZINC04786883 ZINC04786885 ZINC04786888 ZINC04786890 ZINC04787909 ZINC01765469 ZINC05275407 ZINC05275425 ZINC05276235 ZINC05276256 ZINC02575335 ZINC05046463 ZINC01730614 ZINC05442645 ZINC02003567 ZINC05184936 ZINC05742758 ZINC05211501 ZINC05211510 ZINC05225291 ZINC02026704 ZINC05309659 ZINC05309665 ZINC05309998 ZINC05310000 ZINC01736727 ZINC02032622 ZINC02034633 ZINC05191648 ZINC02037188 ZINC01687049 ZINC02039429 ZINC02039430 ZINC12407621 ZINC06020484 ZINC15222822 ZINC15880088 ZINC06036257 ZINC16951501 ZINC02164095 ZINC17013835 ZINC17020625 ZINC02164129 ZINC02164131 ZINC17835665 ZINC18716182 ZINC08602617 ZINC08602618 ZINC18716185 ZINC02164168 ZINC02166571 ZINC12405013 ZINC02508039 ZINC02508043 ZINC02508044 ZINC02508087 ZINC02508096 ZINC02539331 ZINC13541814 ZINC13765172 ZINC13765175 ZINC13765178 ZINC14588590 ZINC14807062 ZINC14807075 ZINC03860614 ZINC03860615 ZINC16137972 ZINC16889870 ZINC16926751 ZINC04582342 ZINC04582343 ZINC04582344 ZINC05539679 ZINC34582485 ZINC05225259 ZINC05225340 ZINC05225465 ZINC05225470 ZINC05225476 ZINC05226159 ZINC05226163 ZINC05226167 ZINC05651120 ZINC04261946 ZINC04261947 ZINC22013264 ZINC22013268 ZINC22013273 ZINC22013279 ZINC31317854 ZINC31319331 ZINC31319335 ZINC31740234 ZINC31740239 ZINC15119988 ZINC32152665 ZINC32214132 ZINC43761699 ZINC43761700 ZINC43761701 ZINC43761702 ZINC01481755 ZINC34303159 ZINC34303160 ZINC34303161 ZINC34303162 ZINC45070295 ZINC02031628 ZINC02032404 ZINC34349782 ZINC02032619 ZINC34582222 ZINC71785898 ZINC71786843 ZINC01571260 ZINC01571261 ZINC43763722 ZINC45069745 ZINC45069748 ZINC45069752 ZINC34156324 ZINC01577370 ZINC01529247 ZINC01529451 ZINC01529452 ZINC70651338 ZINC70651339 ZINC70651340 ZINC01532765 ZINC01561953 ZINC01561960 ZINC01574321 ZINC01577208 ZINC71786720 ZINC01587683 ZINC01587688 ZINC02242645 ZINC00158103 ZINC00162386 ZINC01598495 ZINC01598496 ZINC01598497 ZINC01598498 ZINC01598518 ZINC01587614 ZINC00393753 ZINC00393754 ZINC00393755 ZINC00393756 ZINC01641048 ZINC16991119 ZINC00394761 ZINC00394806 ZINC00394807 ZINC00394808 ZINC02164097 ZINC01662386 ZINC00399241 ZINC02164128 ZINC01591808 ZINC01672871 ZINC01674839 ZINC01674842 ZINC00406932 ZINC00406933 ZINC02164132 ZINC00502094 ZINC00507608 ZINC01680238 ZINC02164158 ZINC18716183 ZINC01685148 ZINC18716184 ZINC01687047 ZINC01687048 ZINC00967533 ZINC01687050 ZINC02164167 ZINC00968099 ZINC00968100 ZINC00968101 ZINC00968128 ZINC01081099 ZINC01081323 ZINC01694726 ZINC01696964 ZINC01319163 ZINC01436122 ZINC01436123 ZINC01436124 ZINC01436125 ZINC01699906 ZINC01701838 ZINC01716732 ZINC01724736 ZINC01571243 ZINC01571245 ZINC02003566 ZINC02011663 ZINC02011664 ZINC01590023 ZINC02013559 ZINC02013560 ZINC02014257 ZINC02014258 ZINC02014867 ZINC02014868 ZINC02016526 ZINC02016527 ZINC02024203 ZINC02030897 ZINC01664354 ZINC01664389 ZINC02031663 ZINC02031664 ZINC02036681 ZINC02037295 ZINC02038302 ZINC02038947 ZINC02039356 ZINC01684585 ZINC01684714 ZINC02041115 ZINC02041265 ZINC02042933 ZINC02043763 ZINC02045088 ZINC02046918 ZINC02164102 ZINC02164138 ZINC01696943 ZINC02164171 ZINC01696963 ZINC02164174 ZINC02164175 ZINC02164177 ZINC02164178 ZINC02164180 ZINC02164183 ZINC02164185 ZINC02164186 ZINC02164188 ZINC02324820 ZINC01729357 ZINC01632734 ZINC02504418 ZINC01783052 ZINC02508035 ZINC02508046 ZINC02508063 ZINC02508071 ZINC02508078 ZINC01850974 ZINC02545280 ZINC02559331 ZINC02569878 ZINC02043148 ZINC02129241 ZINC02129243 ZINC03164162 ZINC03860308 ZINC02242637 ZINC03861506 ZINC02508249 ZINC04528636 ZINC04532208 ZINC04532209 ZINC04532210 ZINC02530724 ZINC04783008 ZINC02534413 ZINC02534432 ZINC02545294 ZINC02164169 ZINC02560409 ZINC05133736 ZINC02164172 ZINC02566228 ZINC02571329 ZINC05225162 ZINC05225172 ZINC05225175 ZINC05225381 ZINC05225388 ZINC05225399 ZINC05225403 ZINC05225491 ZINC05225496 ZINC05225500 ZINC05225504 ZINC03096510 ZINC058201'..b'ZINC37632182\t0\n-ZINC37631885\t0\n-ZINC37628972\t0\n-ZINC37628816\t0\n-ZINC34939445\t0\n-ZINC34939444\t0\n-ZINC34327972\t0\n-ZINC33362792\t0\n-ZINC33359474\t0\n-ZINC19387498\t0\n-ZINC19331291\t0\n-ZINC19331287\t0\n-ZINC16159295\t0\n-ZINC16159293\t0\n-ZINC16082659\t0\n-ZINC15771889\t0\n-ZINC14983445\t0\n-ZINC14628482\t0\n-ZINC12480368\t0\n-ZINC12153803\t0\n-ZINC06590260\t0\n-ZINC04721345\t0\n-ZINC04620596\t0\n-ZINC04352993\t0\n-ZINC04284434\t0\n-ZINC04284404\t0\n-ZINC03860745\t0\n-ZINC02585927\t0\n-ZINC02584614\t0\n-ZINC02575364\t0\n-ZINC02572120\t0\n-ZINC02563326\t0\n-ZINC02555213\t0\n-ZINC02170245\t0\n-ZINC02034508\t0\n-ZINC01997857\t0\n-ZINC01997856\t0\n-ZINC01682521\t0\n-ZINC01675291\t0\n-ZINC01657319\t0\n-ZINC01641030\t0\n-ZINC01608901\t0\n-ZINC01594670\t0\n-ZINC01586656\t0\n-ZINC01555332\t0\n-ZINC01439400\t0\n-ZINC00396158\t0\n-ZINC71621196\t0\n-ZINC66381592\t0\n-ZINC63148589\t0\n-ZINC59754680\t0\n-ZINC51074718\t0\n-ZINC51074717\t0\n-ZINC37632186\t0\n-ZINC37629044\t0\n-ZINC36533269\t0\n-ZINC34337607\t0\n-ZINC34337605\t0\n-ZINC34330539\t0\n-ZINC34330538\t0\n-ZINC31414746\t0\n-ZINC26896049\t0\n-ZINC26896046\t0\n-ZINC16138636\t0\n-ZINC16034387\t0\n-ZINC15777627\t0\n-ZINC15115331\t0\n-ZINC12416533\t0\n-ZINC05663502\t0\n-ZINC05576053\t0\n-ZINC05283247\t0\n-ZINC04051396\t0\n-ZINC03875370\t0\n-ZINC03612758\t0\n-ZINC02539354\t0\n-ZINC02390369\t0\n-ZINC02041062\t0\n-ZINC02034873\t0\n-ZINC02030980\t0\n-ZINC01733027\t0\n-ZINC01693132\t0\n-ZINC01684527\t0\n-ZINC01680393\t0\n-ZINC01674462\t0\n-ZINC01609947\t0\n-ZINC01603496\t0\n-ZINC01482803\t0\n-ZINC71617645\t0\n-ZINC71257177\t0\n-ZINC66351573\t0\n-ZINC66351572\t0\n-ZINC64370185\t0\n-ZINC63148423\t0\n-ZINC63146279\t0\n-ZINC53993715\t0\n-ZINC53993714\t0\n-ZINC39242270\t0\n-ZINC39242269\t0\n-ZINC37633669\t0\n-ZINC37633667\t0\n-ZINC37633133\t0\n-ZINC34539501\t0\n-ZINC33753907\t0\n-ZINC32182223\t0\n-ZINC30678251\t0\n-ZINC20475278\t0\n-ZINC19324731\t0\n-ZINC19319501\t0\n-ZINC19166968\t0\n-ZINC14807113\t0\n-ZINC14448389\t0\n-ZINC04984177\t0\n-ZINC04895939\t0\n-ZINC04804787\t0\n-ZINC04775232\t0\n-ZINC04706620\t0\n-ZINC04521315\t0\n-ZINC04261780\t0\n-ZINC03875935\t0\n-ZINC03861020\t0\n-ZINC03140714\t0\n-ZINC02584263\t0\n-ZINC02527961\t0\n-ZINC02168531\t0\n-ZINC01845619\t0\n-ZINC01682053\t0\n-ZINC01680814\t0\n-ZINC01611664\t0\n-ZINC01598156\t0\n-ZINC01586364\t0\n-ZINC01575687\t0\n-ZINC66381590\t0\n-ZINC66377722\t0\n-ZINC57217534\t0\n-ZINC39274309\t0\n-ZINC31938007\t0\n-ZINC06403302\t0\n-ZINC04744395\t0\n-ZINC04742364\t0\n-ZINC04722803\t0\n-ZINC03860254\t0\n-ZINC03070145\t0\n-ZINC01730558\t0\n-ZINC01690285\t0\n-ZINC01621716\t0\n-ZINC71614813\t0\n-ZINC66377720\t0\n-ZINC64370193\t0\n-ZINC39369735\t0\n-ZINC39221831\t0\n-ZINC36720865\t0\n-ZINC32163098\t0\n-ZINC19321044\t0\n-ZINC06734738\t0\n-ZINC05178397\t0\n-ZINC05167654\t0\n-ZINC05157937\t0\n-ZINC04963934\t0\n-ZINC04268917\t0\n-ZINC04097424\t0\n-ZINC03875757\t0\n-ZINC03875372\t0\n-ZINC03860600\t0\n-ZINC02040420\t0\n-ZINC02037807\t0\n-ZINC02037390\t0\n-ZINC02037305\t0\n-ZINC02015871\t0\n-ZINC02004049\t0\n-ZINC01845620\t0\n-ZINC01712063\t0\n-ZINC01702289\t0\n-ZINC01693334\t0\n-ZINC01693329\t0\n-ZINC01648252\t0\n-ZINC01627348\t0\n-ZINC00400095\t0\n-ZINC71647667\t0\n-ZINC57988933\t0\n-ZINC55345805\t0\n-ZINC39278760\t0\n-ZINC25695466\t0\n-ZINC12350099\t0\n-ZINC05650406\t0\n-ZINC05177750\t0\n-ZINC04742917\t0\n-ZINC04722893\t0\n-ZINC03861678\t0\n-ZINC03861677\t0\n-ZINC03860808\t0\n-ZINC02039983\t0\n-ZINC02034592\t0\n-ZINC01845913\t0\n-ZINC01734876\t0\n-ZINC01658753\t0\n-ZINC01648381\t0\n-ZINC01613078\t0\n-ZINC01601383\t0\n-ZINC01577259\t0\n-ZINC38343415\t0\n-ZINC34593547\t0\n-ZINC34115094\t0\n-ZINC33505753\t0\n-ZINC32163202\t0\n-ZINC32163200\t0\n-ZINC13410577\t0\n-ZINC06691140\t0\n-ZINC05158078\t0\n-ZINC05112484\t0\n-ZINC05019026\t0\n-ZINC04899414\t0\n-ZINC04712486\t0\n-ZINC03958467\t0\n-ZINC02036753\t0\n-ZINC01699944\t0\n-ZINC01690289\t0\n-ZINC01585463\t0\n-ZINC24718443\t0\n-ZINC19169386\t0\n-ZINC19167376\t0\n-ZINC05700932\t0\n-ZINC03957736\t0\n-ZINC02388228\t0\n-ZINC02168687\t0\n-ZINC01754610\t0\n-ZINC01677558\t0\n-ZINC01591817\t0\n-ZINC01491944\t0\n-ZINC01439067\t0\n-ZINC00967522\t0\n-ZINC71602724\t0\n-ZINC57989955\t0\n-ZINC40454334\t0\n-ZINC31667387\t0\n-ZINC31297995\t0\n-ZINC15440412\t0\n-ZINC05225679\t0\n-ZINC04658606\t0\n-ZINC01632445\t0\n-ZINC00967532\t0\n-ZINC19324737\t0\n-ZINC19230186\t0\n-ZINC08221057\t0\n-ZINC05224188\t0\n-ZINC01688358\t0\n-ZINC71769114\t0\n-ZINC60272425\t0\n-ZINC01846023\t0\n-ZINC71769112\t0\n-ZINC08214586\t0\n'
b
diff -r 43a9e7d9b24f -r 70b071de9bee chemfp_clustering/test-data/NxN_Clustering_on_q.svg
--- a/chemfp_clustering/test-data/NxN_Clustering_on_q.svg Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,793 +0,0 @@\n-<?xml version="1.0" encoding="utf-8" standalone="no"?>\n-<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"\n-  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n-<!-- Created with matplotlib (http://matplotlib.sourceforge.net/) -->\n-<svg height="432pt" version="1.1" viewBox="0 0 576 432" width="576pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">\n- <defs>\n-  <style type="text/css">\n-*{stroke-linecap:square;stroke-linejoin:round;}\n-  </style>\n- </defs>\n- <g id="figure_1">\n-  <g id="patch_1">\n-   <path d="\n-M0 432\n-L576 432\n-L576 0\n-L0 0\n-z\n-" style="fill:#ffffff;"/>\n-  </g>\n-  <g id="axes_1">\n-   <g id="patch_2">\n-    <path d="\n-M72 388.8\n-L518.4 388.8\n-L518.4 43.2\n-L72 43.2\n-z\n-" style="fill:#ffffff;"/>\n-   </g>\n-   <g id="LineCollection_1">\n-    <defs>\n-     <path d="\n-M123.508 -43.2\n-L123.508 -234.55\n-L157.846 -234.55\n-L157.846 -43.2" id="C0_0_36e0ca0abb"/>\n-    </defs>\n-    <g clip-path="url(#p7ff5b81e1d)">\n-     <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_0_36e0ca0abb" y="432.0"/>\n-    </g>\n-   </g>\n-   <g id="LineCollection_2">\n-    <defs>\n-     <path d="\n-M260.862 -43.2\n-L260.862 -43.2\n-L295.2 -43.2\n-L295.2 -43.2" id="C1_0_d55749c544"/>\n-     <path d="\n-M226.523 -43.2\n-L226.523 -43.2\n-L278.031 -43.2\n-L278.031 -43.2" id="C1_1_f284ff091a"/>\n-     <path d="\n-M329.538 -43.2\n-L329.538 -151.689\n-L363.877 -151.689\n-L363.877 -43.2" id="C1_2_ad9c2700c6"/>\n-     <path d="\n-M252.277 -43.2\n-L252.277 -180.048\n-L346.708 -180.048\n-L346.708 -151.689" id="C1_3_59bcda1988"/>\n-    </defs>\n-    <g clip-path="url(#p7ff5b81e1d)">\n-     <use style="fill:none;stroke:#ff0000;stroke-linecap:butt;" x="0" xlink:href="#C1_0_d55749c544" y="432.0"/>\n-    </g>\n-    <g clip-path="url(#p7ff5b81e1d)">\n-     <use style="fill:none;stroke:#ff0000;stroke-linecap:butt;" x="0" xlink:href="#C1_1_f284ff091a" y="432.0"/>\n-    </g>\n-    <g clip-path="url(#p7ff5b81e1d)">\n-     <use style="fill:none;stroke:#ff0000;stroke-linecap:butt;" x="0" xlink:href="#C1_2_ad9c2700c6" y="432.0"/>\n-    </g>\n-    <g clip-path="url(#p7ff5b81e1d)">\n-     <use style="fill:none;stroke:#ff0000;stroke-linecap:butt;" x="0" xlink:href="#C1_3_59bcda1988" y="432.0"/>\n-    </g>\n-   </g>\n-   <g id="LineCollection_3">\n-    <defs>\n-     <path d="\n-M398.215 -43.2\n-L398.215 -147.208\n-L432.554 -147.208\n-L432.554 -43.2" id="C2_0_63eb41fae6"/>\n-     <path d="\n-M466.892 -43.2\n-L466.892 -149.207\n-L501.231 -149.207\n-L501.231 -43.2" id="C2_1_2114d8afff"/>\n-     <path d="\n-M415.385 -147.208\n-L415.385 -210.283\n-L484.062 -210.283\n-L484.062 -149.207" id="C2_2_580dfac2d3"/>\n-    </defs>\n-    <g clip-path="url(#p7ff5b81e1d)">\n-     <use style="fill:none;stroke:#00bfbf;stroke-linecap:butt;" x="0" xlink:href="#C2_0_63eb41fae6" y="432.0"/>\n-    </g>\n-    <g clip-path="url(#p7ff5b81e1d)">\n-     <use style="fill:none;stroke:#00bfbf;stroke-linecap:butt;" x="0" xlink:href="#C2_1_2114d8afff" y="432.0"/>\n-    </g>\n-    <g clip-path="url(#p7ff5b81e1d)">\n-     <use style="fill:none;stroke:#00bfbf;stroke-linecap:butt;" x="0" xlink:href="#C2_2_580dfac2d3" y="432.0"/>\n-    </g>\n-   </g>\n-   <g id="LineCollection_4">\n-    <defs>\n-     <path d="\n-M299.492 -180.048\n-L299.492 -278.97\n-L449.723 -278.97\n-L449.723 -210.283" id="C3_0_351ea019a2"/>\n-     <path d="\n-M192.185 -43.2\n-L192.185 -315.042\n-L374.608 -315.042\n-L374.608 -278.97" id="C3_1_f2cbe41b26"/>\n-     <path d="\n-M140.677 -234.55\n-L140.677 -322.212\n-L283.396 -322.212\n-L283.396 -315.042" id="C3_2_0ff010f580"/>\n-     <path d="\n-M89.1692 -43.2\n-L89.1692 -372.343\n-L212.037 -372.343\n-L212.037 -322.212" id="C3_3_64df8a0051"/>\n-    </defs>\n-    <g clip-path="url(#p7ff5b81e1d)">\n-     <use style="fill:none;stroke:#0000ff;stroke-linecap:butt;" x="0" xlink:href="#C3_0_351ea019a2" y="432.0"/>\n-    </g>\n-    <g clip-path="url(#p7ff5b81e1d)">\n-     <use style="fill:none;stroke:#0000ff;stroke-linecap:butt;" x="0" xlink:href="#C3_1_f2cbe41b26" y="432.0"/>\n-    </g>\n-    <g clip-path="url(#p7ff5b81e1d)'..b'<g transform="translate(43.034375 251.644127867)scale(0.12 -0.12)">\n-       <use xlink:href="#DejaVuSans-30"/>\n-       <use x="63.623046875" xlink:href="#DejaVuSans-2e"/>\n-       <use x="95.41015625" xlink:href="#DejaVuSans-30"/>\n-       <use x="159.033203125" xlink:href="#DejaVuSans-33"/>\n-      </g>\n-     </g>\n-    </g>\n-    <g id="ytick_5">\n-     <g id="line2d_9">\n-      <g>\n-       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#me8a85f7bf6" y="200.101753823"/>\n-      </g>\n-     </g>\n-     <g id="line2d_10">\n-      <g>\n-       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m1a32005dea" y="200.101753823"/>\n-      </g>\n-     </g>\n-     <g id="text_18">\n-      <!-- 0.04 -->\n-      <g transform="translate(42.745625 204.469566323)scale(0.12 -0.12)">\n-       <use xlink:href="#DejaVuSans-30"/>\n-       <use x="63.623046875" xlink:href="#DejaVuSans-2e"/>\n-       <use x="95.41015625" xlink:href="#DejaVuSans-30"/>\n-       <use x="159.033203125" xlink:href="#DejaVuSans-34"/>\n-      </g>\n-     </g>\n-    </g>\n-    <g id="ytick_6">\n-     <g id="line2d_11">\n-      <g>\n-       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#me8a85f7bf6" y="152.927192279"/>\n-      </g>\n-     </g>\n-     <g id="line2d_12">\n-      <g>\n-       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m1a32005dea" y="152.927192279"/>\n-      </g>\n-     </g>\n-     <g id="text_19">\n-      <!-- 0.05 -->\n-      <g transform="translate(43.120625 157.295004779)scale(0.12 -0.12)">\n-       <use xlink:href="#DejaVuSans-30"/>\n-       <use x="63.623046875" xlink:href="#DejaVuSans-2e"/>\n-       <use x="95.41015625" xlink:href="#DejaVuSans-30"/>\n-       <use x="159.033203125" xlink:href="#DejaVuSans-35"/>\n-      </g>\n-     </g>\n-    </g>\n-    <g id="ytick_7">\n-     <g id="line2d_13">\n-      <g>\n-       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#me8a85f7bf6" y="105.752630735"/>\n-      </g>\n-     </g>\n-     <g id="line2d_14">\n-      <g>\n-       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m1a32005dea" y="105.752630735"/>\n-      </g>\n-     </g>\n-     <g id="text_20">\n-      <!-- 0.06 -->\n-      <g transform="translate(42.828125 110.120443235)scale(0.12 -0.12)">\n-       <use xlink:href="#DejaVuSans-30"/>\n-       <use x="63.623046875" xlink:href="#DejaVuSans-2e"/>\n-       <use x="95.41015625" xlink:href="#DejaVuSans-30"/>\n-       <use x="159.033203125" xlink:href="#DejaVuSans-36"/>\n-      </g>\n-     </g>\n-    </g>\n-    <g id="ytick_8">\n-     <g id="line2d_15">\n-      <g>\n-       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#me8a85f7bf6" y="58.5780691907"/>\n-      </g>\n-     </g>\n-     <g id="line2d_16">\n-      <g>\n-       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m1a32005dea" y="58.5780691907"/>\n-      </g>\n-     </g>\n-     <g id="text_21">\n-      <!-- 0.07 -->\n-      <g transform="translate(43.098125 62.9458816907)scale(0.12 -0.12)">\n-       <use xlink:href="#DejaVuSans-30"/>\n-       <use x="63.623046875" xlink:href="#DejaVuSans-2e"/>\n-       <use x="95.41015625" xlink:href="#DejaVuSans-30"/>\n-       <use x="159.033203125" xlink:href="#DejaVuSans-37"/>\n-      </g>\n-     </g>\n-    </g>\n-   </g>\n-   <g id="patch_3">\n-    <path d="\n-M72 43.2\n-L518.4 43.2" style="fill:none;stroke:#000000;"/>\n-   </g>\n-   <g id="patch_4">\n-    <path d="\n-M518.4 388.8\n-L518.4 43.2" style="fill:none;stroke:#000000;"/>\n-   </g>\n-   <g id="patch_5">\n-    <path d="\n-M72 388.8\n-L518.4 388.8" style="fill:none;stroke:#000000;"/>\n-   </g>\n-   <g id="patch_6">\n-    <path d="\n-M72 388.8\n-L72 43.2" style="fill:none;stroke:#000000;"/>\n-   </g>\n-  </g>\n- </g>\n- <defs>\n-  <clipPath id="p7ff5b81e1d">\n-   <rect height="345.6" width="446.4" x="72.0" y="43.2"/>\n-  </clipPath>\n- </defs>\n-</svg>\n'
b
diff -r 43a9e7d9b24f -r 70b071de9bee chemfp_clustering/test-data/Taylor-Butina_Clustering_on_data_q.txt
--- a/chemfp_clustering/test-data/Taylor-Butina_Clustering_on_data_q.txt Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,4 +0,0 @@
-#0 true singletons
-#0 false singletons
-#clusters: 1
-55091849 12 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091752 55091467 55168823
b
diff -r 43a9e7d9b24f -r 70b071de9bee chemfp_mol2fps/mol2fps.xml
--- a/chemfp_mol2fps/mol2fps.xml Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,280 +0,0 @@\n-<tool id="ctb_chemfp_mol2fps" name="Molecules to Fingerprints" version="0.2.0">\n-    <description>with different fingerprint types</description>\n-    <parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism>\n-    <requirements>\n-        <requirement type="package" version="1.1p1">chemfp</requirement>\n-        <requirement type="package" version="2012_12_1">rdkit</requirement>\n-        <requirement type="package" version="2.3.2">openbabel</requirement>\n-    </requirements>\n-    <command>\n-<![CDATA[\n-        #set $fptype = $fp_opts.fp_opts_selector\n-\n-        #if $fptype in [\'--FP2\', \'--FP3\', \'--FP4\', \'--MACCS\']:\n-            ## Open Babel fingerprints\n-            ob2fps $fptype --in "${infile.ext}" "${infile}" -o "${outfile}"\n-        #else:\n-            ## RDKit fingerprints\n-            rdkit2fps --in "${infile.ext}" "${infile}" -o "${outfile}"\n-            #if $fp_opts.fp_opts_selector == "--RDK":\n-                --RDK\n-                --fpSize $fp_opts.fpSize\n-                --minPath $fp_opts.minPath\n-                --maxPath $fp_opts.maxPath\n-                --nBitsPerHash $fp_opts.nBitsPerHash\n-                $fp_opts.useHs\n-            #elif $fp_opts.fp_opts_selector == "--torsions":\n-                --torsions\n-                --fpSize $fp_opts.fpSize\n-                --targetSize $fp_opts.targetSize\n-            #elif $fp_opts.fp_opts_selector == "--morgan":\n-                --morgan\n-                --fpSize $fp_opts.fpSize\n-                --radius $fp_opts.radius\n-                $fp_opts.useFeatures\n-                $fp_opts.useChirality\n-                $fp_opts.useBondTypes\n-            #elif $fp_opts.fp_opts_selector == "--pairs":\n-                --paris\n-                --fpSize $fp_opts.fpSize\n-                --minLength $fp_opts.minLength\n-                --maxLength $fp_opts.maxLength\n-            #elif $fp_opts.fp_opts_selector == "--maccs166":\n-                --maccs166\n-            #elif $fp_opts.fp_opts_selector == "--substruct":\n-                --substruct\n-            #end if\n-        #end if\n-        --errors report 2>&1\n-]]>\n-    </command>\n-    <inputs>\n-        <param name="infile" type=\'data\' format="sdf,smi,mol,mol2,cml,inchi" label="molecule file"/>\n-        <conditional name="fp_opts">\n-            <param name="fp_opts_selector" type="select" label="Type of fingerprint">\n-                <option value=\'--FP2\' selected="True">Open Babel FP2 fingerprints</option>\n-                <option value=\'--FP3\'>Open Babel FP3 fingerprints</option>\n-                <option value=\'--FP4\'>Open Babel FP4 fingerprints</option>\n-                <option value=\'--MACCS\'>Open Babel MACCS fingerprints</option>\n-                <option value=\'--RDK\'>RDKit topological fingerprint</option>\n-                <option value=\'--torsions\'>RDKit topological Torsion fingerprints</option>\n-                <option value=\'--morgan\'>RDKit Morgan fingerprints</option>\n-                <option value=\'--pairs\'>RDKit Atom Pair fingerprints</option>\n-                <option value=\'--maccs166\'>RDKit MACCS fingerprints</option>\n-                <option value=\'--substruct\'>RDKit substructure fingerprints</option>\n-            </param>\n-            <when value="--FP2" />\n-            <when value="--FP3" />\n-            <when value="--FP4" />\n-            <when value="--MACCS" />\n-            <when value="--RDK">\n-                <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help="">\n-                    <validator type="in_range" min="1" />\n-                </param>\n-                <param name="minPath" type="integer" value="1" label="minimum number of bonds to include in the subgraph" help="">\n-                    <validator type="in_range" min="1" />\n-                </param>\n-                <param name="maxPath" type="integer" value="7" label="maximum number of bonds to incl'..b'\n-        </test>\n-        <test>\n-            <param name="infile" value="CID_2244.smi" ftype="smi" />\n-            <param name="fp_opts.fp_opts_selector" value="--FP3" />\n-            <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" />\n-        </test>\n-        <!-- FP4 -->\n-        <test>\n-            <param name="infile" value="CID_2244.sdf" ftype="sdf" />\n-            <param name="fp_opts.fp_opts_selector" value="--FP4" />\n-            <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" />\n-        </test>\n-        <test>\n-            <param name="infile" value="CID_2244.smi" ftype="smi" />\n-            <param name="fp_opts.fp_opts_selector" value="--FP4" />\n-            <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" />\n-        </test>\n-        <!-- MACCS -->\n-        <test>\n-            <param name="infile" value="CID_2244.sdf" ftype="sdf" />\n-            <param name="fp_opts.fp_opts_selector" value="--MACCS" />\n-            <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" />\n-        </test>\n-        <test>\n-            <param name="infile" value="CID_2244.smi" ftype="smi" />\n-            <param name="fp_opts.fp_opts_selector" value="--MACCS" />\n-            <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" />\n-        </test>\n-    </tests>\n-    <help>\n-<![CDATA[\n-\n-.. class:: infomark\n-\n-**What this tool does**\n-\n-This tool uses chemfp_ to calculate 10 different fingerprints of common file formats. Chemfp uses `Open Babel`_, OpenEye_ and RDKit_.\n-\n-For more information check the websites listed below::\n-\n-\t- http://code.google.com/p/rdkit/wiki/FingerprintsInTheRDKit\n-\t- http://openbabel.org/wiki/Tutorial:Fingerprints\n-\n------\n-\n-.. class:: infomark\n-\n-**Input**\n-\n-FPS fingerprint file format\n-\n-* Example::\n-\n-\t      - SDF File\n-\n-\t\t28434379\n-\t\t  -OEChem-02031205132D\n-\n-\t\t 37 39  0     0  0  0  0  0  0999 V2000\n-\t\t    8.1648   -1.8842    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0\n-\t\t    6.0812   -0.2134    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0\n-\t\t    6.0812   -1.8229    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0\n-\t\t    2.5369   -2.0182    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0\n-\t\t    6.3919    0.7371    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n-\t\t    7.3704    0.9433    0.0000 C   0  0  0  0\n-\t\t    ......\n-\t\t  1 15  1  0  0  0  0\n-\t\t  1 35  1  0  0  0  0\n-\t\t  2  5  1  0  0  0  0\n-\t\t  2 11  1  0  0  0  0\n-\t\t  2 12  1  0  0  0  0\n-\t\t  3 12  2  0  0  0  0\n-\t\t  3 13  1  0  0  0  0\n-\t\t  4 18  1  0  0  0  0\n-\t\t  ......\n-\n-\t\t\t>PUBCHEM_COMPOUND_CID<\n-\t\t\t28434379\n-\n-\t\t\t> <PUBCHEM_COMPOUND_CANONICALIZED>\n-\t\t\t1\n-\n-\t\t\t> <PUBCHEM_CACTVS_COMPLEXITY>\n-\t\t\t280\n-\n-\t\t\t> <PUBCHEM_CACTVS_HBOND_ACCEPTOR>\n-\t\t\t2\n-\n-\t\t\t> <PUBCHEM_CACTVS_HBOND_DONOR>\n-\t\t\t2\n-\n-\t\t\t> <PUBCHEM_CACTVS_ROTATABLE_BOND>\n-\t\t\t2\n-\n-\t\t\t> <PUBCHEM_CACTVS_SUBSKEYS>\n-\t\t\tAAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA==\n-\n-\t\t\t>\n-\n-\t\t- type : FP2\n-\n------\n-\n-.. class:: infomark\n-\n-**Output**\n-\n-* Example::\n-\n-\t#FPS1\n-\t#num_bits=1021\n-\t#type=OpenBabel-FP2/1\n-\t#software=OpenBabel/2.3.0\n-\t#source=/tmp/dataset_409.dat.sdf\n-\t#date=2012-02-03T11:13:39\n-\tc0000000000008c0000846000400000000000010800000000000004000000000100010000700802170000018000000c\n-\t0010000000020600208008000008000000c000c02c00002000000c00000100000008001400c800001c0180000000300\n-\t10000000000080000000c0000060000c0000060810000010000000800102000000\t28434379\n-\n------\n-\n-.. class:: infomark\n-\n-**Cite**\n-\n-| `Open Babel`_\n-| RDKit_ project\n-| chemfp_ project.\n-|\n-| N M O\'Boyle, M Banck, C A James, C Morley, T Vandermeersch and G R Hutchison. `Open Babel: An open chemical toolbox.`_\n-\n-.. _`Open Babel: An open chemical toolbox.`: http://www.jcheminf.com/content/3/1/33\n-.. _OpenEye: http://www.eyesopen.com/\n-.. _chemfp: http://chemfp.com/\n-.. _RDKit: http://www.rdkit.org/\n-.. _`Open Babel`: http://openbabel.org/\n-\n-\n-]]>\n-    </help>\n-</tool>\n'
b
diff -r 43a9e7d9b24f -r 70b071de9bee chemfp_mol2fps/test-data/CID_2244.can
--- a/chemfp_mol2fps/test-data/CID_2244.can Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,1 +0,0 @@
-CC(=O)Oc1ccccc1C(=O)O 2244
b
diff -r 43a9e7d9b24f -r 70b071de9bee chemfp_mol2fps/test-data/CID_2244.inchi
--- a/chemfp_mol2fps/test-data/CID_2244.inchi Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,1 +0,0 @@
-InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)
b
diff -r 43a9e7d9b24f -r 70b071de9bee chemfp_mol2fps/test-data/CID_2244.sdf
--- a/chemfp_mol2fps/test-data/CID_2244.sdf Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,155 +0,0 @@
-2244
-  -OEChem-05151212332D
-
- 21 21  0     0  0  0  0  0  0999 V2000
-    3.7320   -0.0600    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
-    6.3301    1.4400    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
-    4.5981    1.4400    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
-    2.8660   -1.5600    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
-    4.5981   -0.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
-    5.4641   -0.0600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
-    4.5981   -1.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
-    6.3301   -0.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
-    5.4641   -2.0600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
-    6.3301   -1.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
-    5.4641    0.9400    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
-    2.8660   -0.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
-    2.0000   -0.0600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
-    4.0611   -1.8700    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
-    6.8671   -0.2500    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
-    5.4641   -2.6800    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
-    6.8671   -1.8700    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
-    2.3100    0.4769    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
-    1.4631    0.2500    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
-    1.6900   -0.5969    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
-    6.3301    2.0600    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
-  1  5  1  0  0  0  0
-  1 12  1  0  0  0  0
-  2 11  1  0  0  0  0
-  2 21  1  0  0  0  0
-  3 11  2  0  0  0  0
-  4 12  2  0  0  0  0
-  5  6  1  0  0  0  0
-  5  7  2  0  0  0  0
-  6  8  2  0  0  0  0
-  6 11  1  0  0  0  0
-  7  9  1  0  0  0  0
-  7 14  1  0  0  0  0
-  8 10  1  0  0  0  0
-  8 15  1  0  0  0  0
-  9 10  2  0  0  0  0
-  9 16  1  0  0  0  0
- 10 17  1  0  0  0  0
- 12 13  1  0  0  0  0
- 13 18  1  0  0  0  0
- 13 19  1  0  0  0  0
- 13 20  1  0  0  0  0
-M  END
-> <PUBCHEM_COMPOUND_CID>
-2244
-
-> <PUBCHEM_COMPOUND_CANONICALIZED>
-1
-
-> <PUBCHEM_CACTVS_COMPLEXITY>
-212
-
-> <PUBCHEM_CACTVS_HBOND_ACCEPTOR>
-4
-
-> <PUBCHEM_CACTVS_HBOND_DONOR>
-1
-
-> <PUBCHEM_CACTVS_ROTATABLE_BOND>
-3
-
-> <PUBCHEM_CACTVS_SUBSKEYS>
-AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA==
-
-> <PUBCHEM_IUPAC_OPENEYE_NAME>
-2-acetoxybenzoic acid
-
-> <PUBCHEM_IUPAC_CAS_NAME>
-2-acetyloxybenzoic acid
-
-> <PUBCHEM_IUPAC_NAME>
-2-acetyloxybenzoic acid
-
-> <PUBCHEM_IUPAC_SYSTEMATIC_NAME>
-2-acetyloxybenzoic acid
-
-> <PUBCHEM_IUPAC_TRADITIONAL_NAME>
-2-acetoxybenzoic acid
-
-> <PUBCHEM_IUPAC_INCHI>
-InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)
-
-> <PUBCHEM_IUPAC_INCHIKEY>
-BSYNRYMUTXBXSQ-UHFFFAOYSA-N
-
-> <PUBCHEM_XLOGP3>
-1.2
-
-> <PUBCHEM_EXACT_MASS>
-180.042259
-
-> <PUBCHEM_MOLECULAR_FORMULA>
-C9H8O4
-
-> <PUBCHEM_MOLECULAR_WEIGHT>
-180.15742
-
-> <PUBCHEM_OPENEYE_CAN_SMILES>
-CC(=O)OC1=CC=CC=C1C(=O)O
-
-> <PUBCHEM_OPENEYE_ISO_SMILES>
-CC(=O)OC1=CC=CC=C1C(=O)O
-
-> <PUBCHEM_CACTVS_TPSA>
-63.6
-
-> <PUBCHEM_MONOISOTOPIC_WEIGHT>
-180.042259
-
-> <PUBCHEM_TOTAL_CHARGE>
-0
-
-> <PUBCHEM_HEAVY_ATOM_COUNT>
-13
-
-> <PUBCHEM_ATOM_DEF_STEREO_COUNT>
-0
-
-> <PUBCHEM_ATOM_UDEF_STEREO_COUNT>
-0
-
-> <PUBCHEM_BOND_DEF_STEREO_COUNT>
-0
-
-> <PUBCHEM_BOND_UDEF_STEREO_COUNT>
-0
-
-> <PUBCHEM_ISOTOPIC_ATOM_COUNT>
-0
-
-> <PUBCHEM_COMPONENT_COUNT>
-1
-
-> <PUBCHEM_CACTVS_TAUTO_COUNT>
-1
-
-> <PUBCHEM_COORDINATE_TYPE>
-1
-5
-255
-
-> <PUBCHEM_BONDANNOTATIONS>
-5  6  8
-5  7  8
-6  8  8
-7  9  8
-8  10  8
-9  10  8
-
-$$$$
-
b
diff -r 43a9e7d9b24f -r 70b071de9bee chemfp_mol2fps/test-data/CID_2244.smi
--- a/chemfp_mol2fps/test-data/CID_2244.smi Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,1 +0,0 @@
-O(c1c(cccc1)C(=O)O)C(=O)C 2244
b
diff -r 43a9e7d9b24f -r 70b071de9bee chemfp_mol2fps/test-data/CID_2244_FP2.fps
--- a/chemfp_mol2fps/test-data/CID_2244_FP2.fps Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,7 +0,0 @@
-#FPS1
-#num_bits=1021
-#type=OpenBabel-FP2/1
-#software=OpenBabel/2.3.1
-#source=CID_2244.sdf
-#date=2012-05-15T16:40:38
-00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244
b
diff -r 43a9e7d9b24f -r 70b071de9bee chemfp_mol2fps/test-data/CID_2244_FP3.fps
--- a/chemfp_mol2fps/test-data/CID_2244_FP3.fps Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,7 +0,0 @@
-#FPS1
-#num_bits=55
-#type=OpenBabel-FP3/1
-#software=OpenBabel/2.3.1
-#source=CID_2244.sdf
-#date=2012-05-15T16:59:15
-0400000c50b007 2244
b
diff -r 43a9e7d9b24f -r 70b071de9bee chemfp_mol2fps/test-data/CID_2244_FP4.fps
--- a/chemfp_mol2fps/test-data/CID_2244_FP4.fps Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,7 +0,0 @@
-#FPS1
-#num_bits=307
-#type=OpenBabel-FP4/1
-#software=OpenBabel/2.3.1
-#source=CID_2244.sdf
-#date=2012-05-15T16:59:22
-010000000000000000009800000000004001000000000000000000000000000000000240402801 2244
b
diff -r 43a9e7d9b24f -r 70b071de9bee chemfp_mol2fps/test-data/CID_2244_maccs.fps
--- a/chemfp_mol2fps/test-data/CID_2244_maccs.fps Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,7 +0,0 @@
-#FPS1
-#num_bits=166
-#type=OpenBabel-MACCS/2
-#software=OpenBabel/2.3.1
-#source=CID_2244.sdf
-#date=2012-05-15T17:00:39
-0000000000000000000000010000016480cca2d21e 2244
b
diff -r 43a9e7d9b24f -r 70b071de9bee chemfp_sdf2fps/sdf2fps.xml
--- a/chemfp_sdf2fps/sdf2fps.xml Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,119 +0,0 @@
-<tool id="ctb_sdf2fps" name="SDF to Fingerprint" version="0.1.1">
-    <description>extract fingerprints from sdf files metadata</description>
-    <parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism>
-    <requirements>
-        <requirement type="package" version="1.1p1">chemfp</requirement>
-        <requirement type="package" version="2.3.2">openbabel</requirement>
-    </requirements>
-    <command>
-<![CDATA[
-        sdf2fps --pubchem "${infile}" > "${outfile}"
-]]>
-    </command>
-    <inputs>
-        <param name="infile" type='data' format="sdf" label="SDF file with fingerprints as metadata"/>
-    </inputs>
-    <outputs>
-        <data name="outfile" format="fps"/>
-    </outputs>
-    <tests>
-    </tests>
-    <help>
-<![CDATA[
-
-.. class:: infomark
-
-**What this tool does**
-
-Read an input SD file, extract the fingerprints and store them in a FPS-file.
-
------
-
-.. class:: infomark
-
-**Input**
-
-`SD-Format`_
-
-.. _`SD-Format`: http://en.wikipedia.org/wiki/Chemical_table_file
-
-* Example::
-
- 28434379
-   -OEChem-02031205132D
-
-  37 39  0     0  0  0  0  0  0999 V2000
-     8.1648   -1.8842    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
-     6.0812   -0.2134    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
-     6.0812   -1.8229    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
-     2.5369   -2.0182    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
-     6.3919    0.7371    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
-     7.3704    0.9433    0.0000 C   0  0  0  0
-     ......
-   1 15  1  0  0  0  0
-   1 35  1  0  0  0  0
-   2  5  1  0  0  0  0
-   2 11  1  0  0  0  0
-   2 12  1  0  0  0  0
-   3 12  2  0  0  0  0
-   3 13  1  0  0  0  0
-   4 18  1  0  0  0  0
-   ......
-
- >PUBCHEM_COMPOUND_CID<
- 28434379
-
- > <PUBCHEM_COMPOUND_CANONICALIZED>
- 1
-
- > <PUBCHEM_CACTVS_COMPLEXITY>
- 280
-
- > <PUBCHEM_CACTVS_HBOND_ACCEPTOR>
- 2
-
- > <PUBCHEM_CACTVS_HBOND_DONOR>
- 2
-
- > <PUBCHEM_CACTVS_ROTATABLE_BOND>
- 2
-
- > <PUBCHEM_CACTVS_SUBSKEYS>
- AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA==
-
- >
-
------
-
-.. class:: infomark
-
-**Output**
-
-* Example::
-
- #FPS1
- #num_bits=881
- #type=CACTVS-E_SCREEN/1.0 extended=2
- #software=CACTVS/unknown
- #source=/home/mohammed/galaxy-central/database/files/000/dataset_409.dat
- #date=2012-02-03T10:44:12
- 07ce04000000000000000000000000000080060000000c0600
- 00000000001a800f0000780008100000101487e9608c0bed32
- 48000580644626204101b4844805901b041c2e19511e45039b
- 8b2924101609401b13e4080000000000010020000004008000
- 0010000002000000000000 28434379
-
------
-
-.. class:: infomark
-
-**Cite**
-
-chemfp_ project
-
-.. _chemfp: http://chemfp.com/
-
-
-]]>
-    </help>
-</tool>
b
diff -r 43a9e7d9b24f -r 70b071de9bee mol2fps.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mol2fps.xml Sat May 20 08:31:44 2017 -0400
[
b'@@ -0,0 +1,276 @@\n+<tool id="ctb_chemfp_mol2fps" name="Molecules to Fingerprints" version="0.3.0">\n+    <description>with different fingerprint types</description>\n+    <!--parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" merge_outputs="outfile"></parallelism-->\n+    <requirements>\n+        <requirement type="package" version="1.1p1">chemfp</requirement>\n+        <requirement type="package" version="2016.03.3">rdkit</requirement>\n+        <requirement type="package" version="2.4.1">openbabel</requirement>\n+    </requirements>\n+    <command>\n+<![CDATA[\n+        #set $fptype = $fp_opts.fp_opts_selector\n+\n+        #if $fptype in [\'--FP2\', \'--FP3\', \'--FP4\', \'--MACCS\']:\n+            ## Open Babel fingerprints\n+            ob2fps $fptype --in \'${infile.ext}\' \'${infile}\' -o \'${outfile}\'\n+        #else:\n+            ## RDKit fingerprints\n+            rdkit2fps --in \'${infile.ext}\' \'${infile}\' -o \'${outfile}\'\n+            #if $fp_opts.fp_opts_selector == "--RDK":\n+                --RDK\n+                --fpSize $fp_opts.fpSize\n+                --minPath $fp_opts.minPath\n+                --maxPath $fp_opts.maxPath\n+                --nBitsPerHash $fp_opts.nBitsPerHash\n+                $fp_opts.useHs\n+            #elif $fp_opts.fp_opts_selector == "--torsions":\n+                --torsions\n+                --fpSize $fp_opts.fpSize\n+                --targetSize $fp_opts.targetSize\n+            #elif $fp_opts.fp_opts_selector == "--morgan":\n+                --morgan\n+                --fpSize $fp_opts.fpSize\n+                --radius $fp_opts.radius\n+                $fp_opts.useFeatures\n+                $fp_opts.useChirality\n+                $fp_opts.useBondTypes\n+            #elif $fp_opts.fp_opts_selector == "--pairs":\n+                --paris\n+                --fpSize $fp_opts.fpSize\n+                --minLength $fp_opts.minLength\n+                --maxLength $fp_opts.maxLength\n+            #elif $fp_opts.fp_opts_selector == "--maccs166":\n+                --maccs166\n+            #elif $fp_opts.fp_opts_selector == "--substruct":\n+                --substruct\n+            #end if\n+        #end if\n+        --errors report 2>&1\n+]]>\n+    </command>\n+    <inputs>\n+        <param name="infile" type=\'data\' format="sdf,smi,mol,mol2,cml,inchi" label="molecule file"/>\n+        <conditional name="fp_opts">\n+            <param name="fp_opts_selector" type="select" label="Type of fingerprint">\n+                <option value=\'--FP2\' selected="True">Open Babel FP2 fingerprints</option>\n+                <option value=\'--FP3\'>Open Babel FP3 fingerprints</option>\n+                <option value=\'--FP4\'>Open Babel FP4 fingerprints</option>\n+                <option value=\'--MACCS\'>Open Babel MACCS fingerprints</option>\n+                <option value=\'--RDK\'>RDKit topological fingerprint</option>\n+                <option value=\'--torsions\'>RDKit topological Torsion fingerprints</option>\n+                <option value=\'--morgan\'>RDKit Morgan fingerprints</option>\n+                <option value=\'--pairs\'>RDKit Atom Pair fingerprints</option>\n+                <option value=\'--maccs166\'>RDKit MACCS fingerprints</option>\n+                <option value=\'--substruct\'>RDKit substructure fingerprints</option>\n+            </param>\n+            <when value="--FP2" />\n+            <when value="--FP3" />\n+            <when value="--FP4" />\n+            <when value="--MACCS" />\n+            <when value="--RDK">\n+                <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help="">\n+                    <validator type="in_range" min="1" />\n+                </param>\n+                <param name="minPath" type="integer" value="1" label="minimum number of bonds to include in the subgraph" help="">\n+                    <validator type="in_range" min="1" />\n+                </param>\n+                <param name="maxPath" type="integer" value="7" label="maximum number of bonds to include in the s'..b'    <test>\n+            <param name="infile" value="CID_2244.smi" ftype="smi" />\n+            <param name="fp_opts.fp_opts_selector" value="--FP3" />\n+            <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" lines_diff="4"/>\n+        </test>\n+        <!-- FP4 -->\n+        <test>\n+            <param name="infile" value="CID_2244.sdf" ftype="sdf" />\n+            <param name="fp_opts.fp_opts_selector" value="--FP4" />\n+            <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" lines_diff="4"/>\n+        </test>\n+        <test>\n+            <param name="infile" value="CID_2244.smi" ftype="smi" />\n+            <param name="fp_opts.fp_opts_selector" value="--FP4" />\n+            <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" lines_diff="4"/>\n+        </test>\n+        <!-- MACCS -->\n+        <test>\n+            <param name="infile" value="CID_2244.sdf" ftype="sdf" />\n+            <param name="fp_opts.fp_opts_selector" value="--MACCS" />\n+            <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" lines_diff="4"/>\n+        </test>\n+        <test>\n+            <param name="infile" value="CID_2244.smi" ftype="smi" />\n+            <param name="fp_opts.fp_opts_selector" value="--MACCS" />\n+            <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" lines_diff="4"/>\n+        </test>\n+    </tests>\n+    <help>\n+<![CDATA[\n+\n+.. class:: infomark\n+\n+**What this tool does**\n+\n+This tool uses chemfp_ to calculate 10 different fingerprints of common file formats. Chemfp uses `Open Babel`_, OpenEye_ and RDKit_.\n+\n+For more information check the websites listed below::\n+\n+\t- http://www.rdkit.org/docs/GettingStartedInPython.html#fingerprinting-and-molecular-similarity\n+\t- http://openbabel.org/wiki/Tutorial:Fingerprints\n+\n+-----\n+\n+.. class:: infomark\n+\n+**Input**\n+\n+FPS fingerprint file format\n+\n+* Example::\n+\n+\t      - SDF File\n+\n+\t\t28434379\n+\t\t  -OEChem-02031205132D\n+\n+\t\t 37 39  0     0  0  0  0  0  0999 V2000\n+\t\t    8.1648   -1.8842    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0\n+\t\t    6.0812   -0.2134    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0\n+\t\t    6.0812   -1.8229    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0\n+\t\t    2.5369   -2.0182    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0\n+\t\t    6.3919    0.7371    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n+\t\t    7.3704    0.9433    0.0000 C   0  0  0  0\n+\t\t    ......\n+\t\t  1 15  1  0  0  0  0\n+\t\t  1 35  1  0  0  0  0\n+\t\t  2  5  1  0  0  0  0\n+\t\t  2 11  1  0  0  0  0\n+\t\t  2 12  1  0  0  0  0\n+\t\t  3 12  2  0  0  0  0\n+\t\t  3 13  1  0  0  0  0\n+\t\t  4 18  1  0  0  0  0\n+\t\t  ......\n+\n+\t\t\t>PUBCHEM_COMPOUND_CID<\n+\t\t\t28434379\n+\n+\t\t\t> <PUBCHEM_COMPOUND_CANONICALIZED>\n+\t\t\t1\n+\n+\t\t\t> <PUBCHEM_CACTVS_COMPLEXITY>\n+\t\t\t280\n+\n+\t\t\t> <PUBCHEM_CACTVS_HBOND_ACCEPTOR>\n+\t\t\t2\n+\n+\t\t\t> <PUBCHEM_CACTVS_HBOND_DONOR>\n+\t\t\t2\n+\n+\t\t\t> <PUBCHEM_CACTVS_ROTATABLE_BOND>\n+\t\t\t2\n+\n+\t\t\t> <PUBCHEM_CACTVS_SUBSKEYS>\n+\t\t\tAAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA==\n+\n+\t\t\t>\n+\n+\t\t- type : FP2\n+\n+-----\n+\n+.. class:: infomark\n+\n+**Output**\n+\n+* Example::\n+\n+\t#FPS1\n+\t#num_bits=1021\n+\t#type=OpenBabel-FP2/1\n+\t#software=OpenBabel/2.3.0\n+\t#source=/tmp/dataset_409.dat.sdf\n+\t#date=2012-02-03T11:13:39\n+\tc0000000000008c0000846000400000000000010800000000000004000000000100010000700802170000018000000c\n+\t0010000000020600208008000008000000c000c02c00002000000c00000100000008001400c800001c0180000000300\n+\t10000000000080000000c0000060000c0000060810000010000000800102000000\t28434379\n+\n+\n+]]>\n+    </help>\n+    <citations>\n+        <citation type="doi">10.1186/1758-2946-3-33</citation>\n+        <citation type="doi">10.1186/1758-2946-5-S1-P36</citation>\n+        <citation type="bibtex">\n+            @electronic{rdkit,\n+                title = {RDKit: Open-source cheminformatics},\n+                url ={http://www.rdkit.org}\n+            }\n+        </citation>\n+    </citations>\n+</tool>\n'
b
diff -r 43a9e7d9b24f -r 70b071de9bee nxn_clustering.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nxn_clustering.py Sat May 20 08:31:44 2017 -0400
[
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+"""
+    Modified version of code examples from the chemfp project.
+    http://code.google.com/p/chem-fingerprints/
+    Thanks to Andrew Dalke of Andrew Dalke Scientific!
+"""
+import matplotlib
+matplotlib.use('Agg')
+import argparse
+import os
+import chemfp
+import scipy.cluster.hierarchy as hcluster
+import pylab
+import numpy
+
+def distance_matrix(arena, tanimoto_threshold = 0.0):
+    n = len(arena)
+    # Start off a similarity matrix with 1.0s along the diagonal
+    try:
+        similarities = numpy.identity(n, "d")
+    except:
+        raise Exception('Input dataset is to large!')
+    chemfp.set_num_threads( args.processors )
+
+    ## Compute the full similarity matrix.
+    # The implementation computes the upper-triangle then copies
+    # the upper-triangle into lower-triangle. It does not include
+    # terms for the diagonal.
+    results = chemfp.search.threshold_tanimoto_search_symmetric(arena, threshold=tanimoto_threshold)
+
+    # Copy the results into the NumPy array.
+    for row_index, row in enumerate(results.iter_indices_and_scores()):
+        for target_index, target_score in row:
+            similarities[row_index, target_index] = target_score
+
+    # Return the distance matrix using the similarity matrix
+    return 1.0 - similarities
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="""NxN clustering for fps files.
+For more details please see the chemfp documentation:
+https://chemfp.readthedocs.org
+""")
+
+    parser.add_argument("-i", "--input", dest="input_path",
+                    required=True,
+                    help="Path to the input file.")
+
+    parser.add_argument("-c", "--cluster", dest="cluster_image",
+                    help="Path to the output cluster image.")
+
+    parser.add_argument("-s", "--smatrix", dest="similarity_matrix",
+                    help="Path to the similarity matrix output file.")
+
+    parser.add_argument("-t", "--threshold", dest="tanimoto_threshold", 
+                    type=float, default=0.0,
+                    help="Tanimoto threshold [0.0]")
+
+    parser.add_argument("--oformat", default='png', help="Output format (png, svg)")
+
+    parser.add_argument('-p', '--processors', type=int, 
+        default=4)
+
+    args = parser.parse_args()
+
+    targets = chemfp.open( args.input_path, format='fps' )
+    arena = chemfp.load_fingerprints( targets )
+    distances  = distance_matrix( arena, args.tanimoto_threshold )
+
+    if args.similarity_matrix:
+        distances.tofile( args.similarity_matrix )
+
+    if args.cluster_image:
+        linkage = hcluster.linkage( distances, method="single", metric="euclidean" )
+
+        hcluster.dendrogram(linkage, labels=arena.ids)
+
+        pylab.savefig( args.cluster_image, format=args.oformat )
+
b
diff -r 43a9e7d9b24f -r 70b071de9bee nxn_clustering.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nxn_clustering.xml Sat May 20 08:31:44 2017 -0400
[
@@ -0,0 +1,120 @@
+<tool id="ctb_chemfp_nxn_clustering" name="NxN Clustering" version="0.4">
+    <description>of molecular fingerprints</description>
+    <requirements>
+        <requirement type="package" version="1.1p1">chemfp</requirement>
+        <requirement type="package" version="2">python</requirement>
+        <requirement type="package" version="2.0.2">matplotlib</requirement>
+        <requirement type="package" version="0.19.0">scipy</requirement>
+        <requirement type="package" version="2.4.1">openbabel</requirement>
+    </requirements>
+    <command detect_errors="exit_code">
+<![CDATA[
+        python '$__tool_directory__/nxn_clustering.py'
+            -i '$infile'
+            -t $threshold
+            #if str($output_files) in ['both', 'image']:
+                --cluster '$image'
+            #end if
+            #if str($output_files) in ['both', 'matrix']:
+                --smatrix '$smilarity_matrix'
+            #end if
+            --oformat '$oformat'
+]]>
+    </command>
+    <inputs>
+        <param name="infile" type="data" format="fps" label="Finperprint dataset" help="Dataset missing? See TIP below"/>
+        <param name='threshold' type='float' value='0.0' />
+        <param name='oformat' type='select' format='text' label="Format of the resulting picture">
+            <option value='png'>PNG</option>
+            <option value='svg'>SVG</option>
+        </param>
+        <param name='output_files' type='select' format='text' label="Output options">
+            <option value='both'>NxN matrix and Image</option>
+            <option value='image'>Image</option>
+            <option value='matrix'>NxN Matrix</option>
+        </param>
+
+    </inputs>
+    <outputs>
+        <data name="image" format="svg" label="${tool.name} on ${on_string} - Cluster Image">
+            <filter>output_files == "both" or output_files == "image"</filter>
+            <change_format>
+                <when input="oformat" value="png" format="png"/>
+            </change_format>
+        </data>
+        <data name="smilarity_matrix" format="binary" label="${tool.name} on ${on_string} - Similarity Matrix">
+            <filter>output_files == "both" or output_files == "matrix"</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" ftype="fps" value="targets.fps" />
+            <param name='treshold' value='0.75' />
+            <param name='oformat' value='svg' />
+            <param name='output_files' value='image' />
+            <output name="image" file='NxN_Clustering_on_q.svg' ftype="svg" compare="sim_size"/>
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+
+.. class:: infomark
+
+**What this tool does**
+
+Generating hierarchical clusters and visualizing clusters with dendrograms.
+For the clustering and the fingerprint handling the chemfp_ project is used.
+
+.. _chemfp: http://chemfp.com/
+
+-----
+
+.. class:: warningmark
+
+**Hint**
+
+The plotting of the cluster image is sensible only with a small dataset.
+
+-----
+
+.. class:: infomark
+
+**Input**
+
+Molecular fingerprints in FPS format. Open Babel Fastsearch index is not supported.
+
+* Example::
+
+ -  fingerprints in FPS format
+
+ #FPS1
+ #num_bits=881
+ #type=CACTVS-E_SCREEN/1.0 extended=2
+ #software=CACTVS/unknown
+ #source=/home/mohammed/galaxy-central/database/files/000/dataset_423.dat
+ #date=2012-02-09T13:20:37
+ 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960cc0bed3248000580644626004101b4844805901b041c2e
+ 19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000 55169009
+ 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960cc0bed3248000580644626004101b4844805901b041c2e
+ 19111e45039b8b2926105609401313e40800000000000100200000040080000010000002000000000000 55079807
+ ........
+
+ - Tanimoto threshold : 0.8 (between 0 and 1)
+
+-----
+
+.. class:: informark
+
+**Output**
+
+* Example::
+
+ .. image:: $PATH_TO_IMAGES/NxN_clustering.png
+
+
+]]>
+    </help>
+    <citations>
+        <citation type="doi">10.1186/1758-2946-5-S1-P36</citation>
+    </citations>
+</tool>
b
diff -r 43a9e7d9b24f -r 70b071de9bee repository_dependencies.xml
--- a/repository_dependencies.xml Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,4 +0,0 @@
-<?xml version="1.0"?>
-<repositories description="This requires the Molecule datatype definitions (e.g. SMILES, InChI, SD-format) and the python numpy package.">
-    <repository changeset_revision="85eca06eefc6" name="molecule_datatypes" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
-</repositories>
b
diff -r 43a9e7d9b24f -r 70b071de9bee sdf2fps.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sdf2fps.xml Sat May 20 08:31:44 2017 -0400
[
@@ -0,0 +1,116 @@
+<tool id="ctb_sdf2fps" name="SDF to Fingerprint" version="0.2">
+    <description>extract fingerprints from sdf files metadata</description>
+    <!--parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism-->
+    <requirements>
+        <requirement type="package" version="1.1p1">chemfp</requirement>
+        <requirement type="package" version="2.4.1">openbabel</requirement>
+    </requirements>
+    <command>
+<![CDATA[
+        sdf2fps --pubchem '${infile}' > '${outfile}'
+]]>
+    </command>
+    <inputs>
+        <param name="infile" type='data' format="sdf" label="SDF file with fingerprints as metadata"/>
+    </inputs>
+    <outputs>
+        <data name="outfile" format="fps"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" ftype="sdf" value="CID_2244.sdf" />
+            <output name="outfile" file='sdf2fps_result1.fps' ftype="fps" lines_diff="4" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+
+.. class:: infomark
+
+**What this tool does**
+
+Read an input SD file, extract the fingerprints and store them in a FPS-file.
+
+-----
+
+.. class:: infomark
+
+**Input**
+
+`SD-Format`_
+
+.. _`SD-Format`: http://en.wikipedia.org/wiki/Chemical_table_file
+
+* Example::
+
+ 28434379
+   -OEChem-02031205132D
+
+  37 39  0     0  0  0  0  0  0999 V2000
+     8.1648   -1.8842    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+     6.0812   -0.2134    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
+     6.0812   -1.8229    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
+     2.5369   -2.0182    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
+     6.3919    0.7371    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+     7.3704    0.9433    0.0000 C   0  0  0  0
+     ......
+   1 15  1  0  0  0  0
+   1 35  1  0  0  0  0
+   2  5  1  0  0  0  0
+   2 11  1  0  0  0  0
+   2 12  1  0  0  0  0
+   3 12  2  0  0  0  0
+   3 13  1  0  0  0  0
+   4 18  1  0  0  0  0
+   ......
+
+ >PUBCHEM_COMPOUND_CID<
+ 28434379
+
+ > <PUBCHEM_COMPOUND_CANONICALIZED>
+ 1
+
+ > <PUBCHEM_CACTVS_COMPLEXITY>
+ 280
+
+ > <PUBCHEM_CACTVS_HBOND_ACCEPTOR>
+ 2
+
+ > <PUBCHEM_CACTVS_HBOND_DONOR>
+ 2
+
+ > <PUBCHEM_CACTVS_ROTATABLE_BOND>
+ 2
+
+ > <PUBCHEM_CACTVS_SUBSKEYS>
+ AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA==
+
+ >
+
+-----
+
+.. class:: infomark
+
+**Output**
+
+* Example::
+
+ #FPS1
+ #num_bits=881
+ #type=CACTVS-E_SCREEN/1.0 extended=2
+ #software=CACTVS/unknown
+ #source=/home/mohammed/galaxy-central/database/files/000/dataset_409.dat
+ #date=2012-02-03T10:44:12
+ 07ce04000000000000000000000000000080060000000c0600
+ 00000000001a800f0000780008100000101487e9608c0bed32
+ 48000580644626204101b4844805901b041c2e19511e45039b
+ 8b2924101609401b13e4080000000000010020000004008000
+ 0010000002000000000000 28434379
+
+
+]]>
+    </help>
+    <citations>
+        <citation type="doi">10.1186/1758-2946-5-S1-P36</citation>
+    </citations>
+</tool>
b
diff -r 43a9e7d9b24f -r 70b071de9bee test-data/CID_2244.can
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244.can Sat May 20 08:31:44 2017 -0400
b
@@ -0,0 +1,1 @@
+CC(=O)Oc1ccccc1C(=O)O 2244
b
diff -r 43a9e7d9b24f -r 70b071de9bee test-data/CID_2244.inchi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244.inchi Sat May 20 08:31:44 2017 -0400
b
@@ -0,0 +1,1 @@
+InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)
b
diff -r 43a9e7d9b24f -r 70b071de9bee test-data/CID_2244.sdf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244.sdf Sat May 20 08:31:44 2017 -0400
b
@@ -0,0 +1,154 @@
+2244
+  -OEChem-05151212332D
+
+ 21 21  0     0  0  0  0  0  0999 V2000
+    3.7320   -0.0600    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    6.3301    1.4400    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    4.5981    1.4400    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    2.8660   -1.5600    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    4.5981   -0.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    5.4641   -0.0600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    4.5981   -1.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    6.3301   -0.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    5.4641   -2.0600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    6.3301   -1.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    5.4641    0.9400    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.8660   -0.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.0000   -0.0600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    4.0611   -1.8700    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    6.8671   -0.2500    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    5.4641   -2.6800    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    6.8671   -1.8700    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    2.3100    0.4769    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    1.4631    0.2500    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    1.6900   -0.5969    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    6.3301    2.0600    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+  1  5  1  0  0  0  0
+  1 12  1  0  0  0  0
+  2 11  1  0  0  0  0
+  2 21  1  0  0  0  0
+  3 11  2  0  0  0  0
+  4 12  2  0  0  0  0
+  5  6  1  0  0  0  0
+  5  7  2  0  0  0  0
+  6  8  2  0  0  0  0
+  6 11  1  0  0  0  0
+  7  9  1  0  0  0  0
+  7 14  1  0  0  0  0
+  8 10  1  0  0  0  0
+  8 15  1  0  0  0  0
+  9 10  2  0  0  0  0
+  9 16  1  0  0  0  0
+ 10 17  1  0  0  0  0
+ 12 13  1  0  0  0  0
+ 13 18  1  0  0  0  0
+ 13 19  1  0  0  0  0
+ 13 20  1  0  0  0  0
+M  END
+> <PUBCHEM_COMPOUND_CID>
+2244
+
+> <PUBCHEM_COMPOUND_CANONICALIZED>
+1
+
+> <PUBCHEM_CACTVS_COMPLEXITY>
+212
+
+> <PUBCHEM_CACTVS_HBOND_ACCEPTOR>
+4
+
+> <PUBCHEM_CACTVS_HBOND_DONOR>
+1
+
+> <PUBCHEM_CACTVS_ROTATABLE_BOND>
+3
+
+> <PUBCHEM_CACTVS_SUBSKEYS>
+AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA==
+
+> <PUBCHEM_IUPAC_OPENEYE_NAME>
+2-acetoxybenzoic acid
+
+> <PUBCHEM_IUPAC_CAS_NAME>
+2-acetyloxybenzoic acid
+
+> <PUBCHEM_IUPAC_NAME>
+2-acetyloxybenzoic acid
+
+> <PUBCHEM_IUPAC_SYSTEMATIC_NAME>
+2-acetyloxybenzoic acid
+
+> <PUBCHEM_IUPAC_TRADITIONAL_NAME>
+2-acetoxybenzoic acid
+
+> <PUBCHEM_IUPAC_INCHI>
+InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)
+
+> <PUBCHEM_IUPAC_INCHIKEY>
+BSYNRYMUTXBXSQ-UHFFFAOYSA-N
+
+> <PUBCHEM_XLOGP3>
+1.2
+
+> <PUBCHEM_EXACT_MASS>
+180.042259
+
+> <PUBCHEM_MOLECULAR_FORMULA>
+C9H8O4
+
+> <PUBCHEM_MOLECULAR_WEIGHT>
+180.15742
+
+> <PUBCHEM_OPENEYE_CAN_SMILES>
+CC(=O)OC1=CC=CC=C1C(=O)O
+
+> <PUBCHEM_OPENEYE_ISO_SMILES>
+CC(=O)OC1=CC=CC=C1C(=O)O
+
+> <PUBCHEM_CACTVS_TPSA>
+63.6
+
+> <PUBCHEM_MONOISOTOPIC_WEIGHT>
+180.042259
+
+> <PUBCHEM_TOTAL_CHARGE>
+0
+
+> <PUBCHEM_HEAVY_ATOM_COUNT>
+13
+
+> <PUBCHEM_ATOM_DEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_ATOM_UDEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_BOND_DEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_BOND_UDEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_ISOTOPIC_ATOM_COUNT>
+0
+
+> <PUBCHEM_COMPONENT_COUNT>
+1
+
+> <PUBCHEM_CACTVS_TAUTO_COUNT>
+1
+
+> <PUBCHEM_COORDINATE_TYPE>
+1
+5
+255
+
+> <PUBCHEM_BONDANNOTATIONS>
+5  6  8
+5  7  8
+6  8  8
+7  9  8
+8  10  8
+9  10  8
+
+$$$$
b
diff -r 43a9e7d9b24f -r 70b071de9bee test-data/CID_2244.smi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244.smi Sat May 20 08:31:44 2017 -0400
b
@@ -0,0 +1,1 @@
+O(c1c(cccc1)C(=O)O)C(=O)C 2244
b
diff -r 43a9e7d9b24f -r 70b071de9bee test-data/CID_2244_FP2.fps
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244_FP2.fps Sat May 20 08:31:44 2017 -0400
b
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=1021
+#type=OpenBabel-FP2/1
+#software=OpenBabel/2.4.1
+#source=/tmp/tmptaAke4/files/000/dataset_3.dat
+#date=2017-05-19T13:52:59
+00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244
b
diff -r 43a9e7d9b24f -r 70b071de9bee test-data/CID_2244_FP3.fps
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244_FP3.fps Sat May 20 08:31:44 2017 -0400
b
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=1021
+#type=OpenBabel-FP2/1
+#software=OpenBabel/2.4.1
+#source=/tmp/tmptaAke4/files/000/dataset_7.dat
+#date=2017-05-19T13:53:45
+00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244
b
diff -r 43a9e7d9b24f -r 70b071de9bee test-data/CID_2244_FP4.fps
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244_FP4.fps Sat May 20 08:31:44 2017 -0400
b
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=1021
+#type=OpenBabel-FP2/1
+#software=OpenBabel/2.4.1
+#source=/tmp/tmptaAke4/files/000/dataset_11.dat
+#date=2017-05-19T13:54:39
+00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244
b
diff -r 43a9e7d9b24f -r 70b071de9bee test-data/CID_2244_MACCS.fps
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244_MACCS.fps Sat May 20 08:31:44 2017 -0400
b
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=1021
+#type=OpenBabel-FP2/1
+#software=OpenBabel/2.4.1
+#source=/tmp/tmptaAke4/files/000/dataset_15.dat
+#date=2017-05-19T13:55:30
+00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244
b
diff -r 43a9e7d9b24f -r 70b071de9bee test-data/CID_2244_maccs.fps
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244_maccs.fps Sat May 20 08:31:44 2017 -0400
b
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=166
+#type=OpenBabel-MACCS/2
+#software=OpenBabel/2.3.1
+#source=CID_2244.sdf
+#date=2012-05-15T17:00:39
+0000000000000000000000010000016480cca2d21e 2244
b
diff -r 43a9e7d9b24f -r 70b071de9bee test-data/NxN_Clustering_on_q.svg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/NxN_Clustering_on_q.svg Sat May 20 08:31:44 2017 -0400
b
b'@@ -0,0 +1,707 @@\n+<?xml version="1.0" encoding="utf-8" standalone="no"?>\n+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"\n+  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n+<!-- Created with matplotlib (http://matplotlib.org/) -->\n+<svg height="345pt" version="1.1" viewBox="0 0 460 345" width="460pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">\n+ <defs>\n+  <style type="text/css">\n+*{stroke-linecap:butt;stroke-linejoin:round;}\n+  </style>\n+ </defs>\n+ <g id="figure_1">\n+  <g id="patch_1">\n+   <path d="M 0 345.6 \n+L 460.8 345.6 \n+L 460.8 0 \n+L 0 0 \n+z\n+" style="fill:#ffffff;"/>\n+  </g>\n+  <g id="axes_1">\n+   <g id="patch_2">\n+    <path d="M 57.6 307.584 \n+L 414.72 307.584 \n+L 414.72 41.472 \n+L 57.6 41.472 \n+z\n+" style="fill:#ffffff;"/>\n+   </g>\n+   <g id="matplotlib.axis_1">\n+    <g id="xtick_1">\n+     <g id="text_1">\n+      <!-- 55079807 -->\n+      <defs>\n+       <path d="M 10.796875 72.90625 \n+L 49.515625 72.90625 \n+L 49.515625 64.59375 \n+L 19.828125 64.59375 \n+L 19.828125 46.734375 \n+Q 21.96875 47.46875 24.109375 47.828125 \n+Q 26.265625 48.1875 28.421875 48.1875 \n+Q 40.625 48.1875 47.75 41.5 \n+Q 54.890625 34.8125 54.890625 23.390625 \n+Q 54.890625 11.625 47.5625 5.09375 \n+Q 40.234375 -1.421875 26.90625 -1.421875 \n+Q 22.3125 -1.421875 17.546875 -0.640625 \n+Q 12.796875 0.140625 7.71875 1.703125 \n+L 7.71875 11.625 \n+Q 12.109375 9.234375 16.796875 8.0625 \n+Q 21.484375 6.890625 26.703125 6.890625 \n+Q 35.15625 6.890625 40.078125 11.328125 \n+Q 45.015625 15.765625 45.015625 23.390625 \n+Q 45.015625 31 40.078125 35.4375 \n+Q 35.15625 39.890625 26.703125 39.890625 \n+Q 22.75 39.890625 18.8125 39.015625 \n+Q 14.890625 38.140625 10.796875 36.28125 \n+z\n+" id="DejaVuSans-35"/>\n+       <path d="M 31.78125 66.40625 \n+Q 24.171875 66.40625 20.328125 58.90625 \n+Q 16.5 51.421875 16.5 36.375 \n+Q 16.5 21.390625 20.328125 13.890625 \n+Q 24.171875 6.390625 31.78125 6.390625 \n+Q 39.453125 6.390625 43.28125 13.890625 \n+Q 47.125 21.390625 47.125 36.375 \n+Q 47.125 51.421875 43.28125 58.90625 \n+Q 39.453125 66.40625 31.78125 66.40625 \n+z\n+M 31.78125 74.21875 \n+Q 44.046875 74.21875 50.515625 64.515625 \n+Q 56.984375 54.828125 56.984375 36.375 \n+Q 56.984375 17.96875 50.515625 8.265625 \n+Q 44.046875 -1.421875 31.78125 -1.421875 \n+Q 19.53125 -1.421875 13.0625 8.265625 \n+Q 6.59375 17.96875 6.59375 36.375 \n+Q 6.59375 54.828125 13.0625 64.515625 \n+Q 19.53125 74.21875 31.78125 74.21875 \n+z\n+" id="DejaVuSans-30"/>\n+       <path d="M 8.203125 72.90625 \n+L 55.078125 72.90625 \n+L 55.078125 68.703125 \n+L 28.609375 0 \n+L 18.3125 0 \n+L 43.21875 64.59375 \n+L 8.203125 64.59375 \n+z\n+" id="DejaVuSans-37"/>\n+       <path d="M 10.984375 1.515625 \n+L 10.984375 10.5 \n+Q 14.703125 8.734375 18.5 7.8125 \n+Q 22.3125 6.890625 25.984375 6.890625 \n+Q 35.75 6.890625 40.890625 13.453125 \n+Q 46.046875 20.015625 46.78125 33.40625 \n+Q 43.953125 29.203125 39.59375 26.953125 \n+Q 35.25 24.703125 29.984375 24.703125 \n+Q 19.046875 24.703125 12.671875 31.3125 \n+Q 6.296875 37.9375 6.296875 49.421875 \n+Q 6.296875 60.640625 12.9375 67.421875 \n+Q 19.578125 74.21875 30.609375 74.21875 \n+Q 43.265625 74.21875 49.921875 64.515625 \n+Q 56.59375 54.828125 56.59375 36.375 \n+Q 56.59375 19.140625 48.40625 8.859375 \n+Q 40.234375 -1.421875 26.421875 -1.421875 \n+Q 22.703125 -1.421875 18.890625 -0.6875 \n+Q 15.09375 0.046875 10.984375 1.515625 \n+z\n+M 30.609375 32.421875 \n+Q 37.25 32.421875 41.125 36.953125 \n+Q 45.015625 41.5 45.015625 49.421875 \n+Q 45.015625 57.28125 41.125 61.84375 \n+Q 37.25 66.40625 30.609375 66.40625 \n+Q 23.96875 66.40625 20.09375 61.84375 \n+Q 16.21875 57.28125 16.21875 49.421875 \n+Q 16.21875 41.5 20.09375 36.953125 \n+Q 23.96875 32.421875 30.609375 32.421875 \n+z\n+" id="DejaVuSans-39"/>\n+       <path d="M 31.78125 34.625 \n+Q 24.75 34.625 20.71875 30.859375 \n+Q 16.703125 27.09375 16.703125 20.515625 \n+Q 16.703125 13.921875 20.71875 10.15625 \n+Q 24.75 6.390625 31.78125 6.390625 \n+Q 38.8125 6.390625 42.859375 10.171875 \n+Q 46.921875 13.96875 46.921875 20.515625 \n+Q 4'..b'59.033203" xlink:href="#DejaVuSans-36"/>\n+      </g>\n+     </g>\n+    </g>\n+    <g id="ytick_8">\n+     <g id="line2d_8">\n+      <g>\n+       <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#me28c3a741e" y="53.313113"/>\n+      </g>\n+     </g>\n+     <g id="text_21">\n+      <!-- 0.07 -->\n+      <g transform="translate(28.334375 57.112332)scale(0.1 -0.1)">\n+       <use xlink:href="#DejaVuSans-30"/>\n+       <use x="63.623047" xlink:href="#DejaVuSans-2e"/>\n+       <use x="95.410156" xlink:href="#DejaVuSans-30"/>\n+       <use x="159.033203" xlink:href="#DejaVuSans-37"/>\n+      </g>\n+     </g>\n+    </g>\n+   </g>\n+   <g id="LineCollection_1">\n+    <path clip-path="url(#p7a554818f3)" d="M 98.806154 307.584 \n+L 98.806154 160.244138 \n+L 126.276923 160.244138 \n+L 126.276923 307.584 \n+" style="fill:none;stroke:#008000;stroke-width:1.5;"/>\n+   </g>\n+   <g id="LineCollection_2">\n+    <path clip-path="url(#p7a554818f3)" d="M 208.689231 307.584 \n+L 208.689231 307.584 \n+L 236.16 307.584 \n+L 236.16 307.584 \n+" style="fill:none;stroke:#ff0000;stroke-width:1.5;"/>\n+    <path clip-path="url(#p7a554818f3)" d="M 181.218462 307.584 \n+L 181.218462 307.584 \n+L 222.424615 307.584 \n+L 222.424615 307.584 \n+" style="fill:none;stroke:#ff0000;stroke-width:1.5;"/>\n+    <path clip-path="url(#p7a554818f3)" d="M 263.630769 307.584 \n+L 263.630769 224.047744 \n+L 291.101538 224.047744 \n+L 291.101538 307.584 \n+" style="fill:none;stroke:#ff0000;stroke-width:1.5;"/>\n+    <path clip-path="url(#p7a554818f3)" d="M 201.821538 307.584 \n+L 201.821538 202.211048 \n+L 277.366154 202.211048 \n+L 277.366154 224.047744 \n+" style="fill:none;stroke:#ff0000;stroke-width:1.5;"/>\n+   </g>\n+   <g id="LineCollection_3">\n+    <path clip-path="url(#p7a554818f3)" d="M 318.572308 307.584 \n+L 318.572308 227.498079 \n+L 346.043077 227.498079 \n+L 346.043077 307.584 \n+" style="fill:none;stroke:#00bfbf;stroke-width:1.5;"/>\n+    <path clip-path="url(#p7a554818f3)" d="M 373.513846 307.584 \n+L 373.513846 225.958341 \n+L 400.984615 225.958341 \n+L 400.984615 307.584 \n+" style="fill:none;stroke:#00bfbf;stroke-width:1.5;"/>\n+    <path clip-path="url(#p7a554818f3)" d="M 332.307692 227.498079 \n+L 332.307692 178.92987 \n+L 387.249231 178.92987 \n+L 387.249231 225.958341 \n+" style="fill:none;stroke:#00bfbf;stroke-width:1.5;"/>\n+   </g>\n+   <g id="LineCollection_4">\n+    <path clip-path="url(#p7a554818f3)" d="M 239.593846 202.211048 \n+L 239.593846 126.040908 \n+L 359.778462 126.040908 \n+L 359.778462 178.92987 \n+" style="fill:none;stroke:#0000ff;stroke-width:1.5;"/>\n+    <path clip-path="url(#p7a554818f3)" d="M 153.747692 307.584 \n+L 153.747692 98.265487 \n+L 299.686154 98.265487 \n+L 299.686154 126.040908 \n+" style="fill:none;stroke:#0000ff;stroke-width:1.5;"/>\n+    <path clip-path="url(#p7a554818f3)" d="M 112.541538 160.244138 \n+L 112.541538 92.745033 \n+L 226.716923 92.745033 \n+L 226.716923 98.265487 \n+" style="fill:none;stroke:#0000ff;stroke-width:1.5;"/>\n+    <path clip-path="url(#p7a554818f3)" d="M 71.335385 307.584 \n+L 71.335385 54.144 \n+L 169.629231 54.144 \n+L 169.629231 92.745033 \n+" style="fill:none;stroke:#0000ff;stroke-width:1.5;"/>\n+   </g>\n+   <g id="patch_3">\n+    <path d="M 57.6 307.584 \n+L 57.6 41.472 \n+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/>\n+   </g>\n+   <g id="patch_4">\n+    <path d="M 414.72 307.584 \n+L 414.72 41.472 \n+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/>\n+   </g>\n+   <g id="patch_5">\n+    <path d="M 57.6 307.584 \n+L 414.72 307.584 \n+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/>\n+   </g>\n+   <g id="patch_6">\n+    <path d="M 57.6 41.472 \n+L 414.72 41.472 \n+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/>\n+   </g>\n+  </g>\n+ </g>\n+ <defs>\n+  <clipPath id="p7a554818f3">\n+   <rect height="266.112" width="357.12" x="57.6" y="41.472"/>\n+  </clipPath>\n+ </defs>\n+</svg>\n'
b
diff -r 43a9e7d9b24f -r 70b071de9bee test-data/Taylor-Butina_Clustering_on_data_q.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Taylor-Butina_Clustering_on_data_q.txt Sat May 20 08:31:44 2017 -0400
b
@@ -0,0 +1,4 @@
+#0 true singletons
+#0 false singletons
+#clusters: 1
+55091752 12 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091467 55168823 55091849
b
diff -r 43a9e7d9b24f -r 70b071de9bee test-data/q.fps
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/q.fps Sat May 20 08:31:44 2017 -0400
b
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=881
+#type=CACTVS-E_SCREEN/1.0 extended=2
+#software=CACTVS/unknown
+#source=CID_28434379.sdf
+#date=2012-02-03T13:08:39
+07ce04000000000000000000000000000080060000000c060000000000001a800f0000780008100000101487e9608c0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000 28434379
b
diff -r 43a9e7d9b24f -r 70b071de9bee test-data/sdf2fps_result1.fps
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sdf2fps_result1.fps Sat May 20 08:31:44 2017 -0400
b
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=881
+#type=CACTVS-E_SCREEN/1.0 extended=2
+#software=CACTVS/unknown
+#source=/tmp/tmpN2w37z/files/000/dataset_1.dat
+#date=2017-05-19T14:27:41
+030e1c000000000000000000000000000000000000000c00000000000000008000000058000010000030200119004c70010060001140044b100040100024040010118060101330e46c21ac5841de06a50788109de11113047100000000001000000000000000080000000000000000 2244
b
diff -r 43a9e7d9b24f -r 70b071de9bee test-data/targets.fps
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/targets.fps Sat May 20 08:31:44 2017 -0400
b
@@ -0,0 +1,19 @@
+#FPS1
+#num_bits=881
+#type=CACTVS-E_SCREEN/1.0 extended=2
+#software=CACTVS/unknown
+#source=Desktop/3579363516810334491.sdf
+#date=2012-02-03T13:07:47
+07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000 55169009
+07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960cc0bed3248000580644626004101b4844805901b041c2e19111e45039b8b2926105609401313e40800000000000100200000040080000010000002000000000000 55079807
+07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01087e960cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000 3153534
+07ce0c000000000000000000000000000080060000000c000000000000001a800f0000780008100000103c87e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000 55168823
+07ce05000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000 55102353
+07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01487e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800003000000100200000040080000010000002000000000000 55091849
+07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000103c87e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000 55091752
+07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01487e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800001000000100200000040080000010000002000000000000 55091467
+07ce05000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000 55091466
+07ce05000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000 55091416
+03ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e960cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000 6499094
+03ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000 6485578
+07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01087e960cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2924101609401b13e40800001000000100200000040080000010000002000000000000 6485577
b
diff -r 43a9e7d9b24f -r 70b071de9bee tool_dependencies.xml
--- a/tool_dependencies.xml Sun Nov 01 10:27:01 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,21 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="rdkit" version="2012_12_1">
-        <repository changeset_revision="2ab9cdc5cd14" name="package_rdkit_2012_12" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="numpy" version="1.7.1">
-        <repository changeset_revision="300877695495" name="package_numpy_1_7" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="matplotlib" version="1.2.1">
-        <repository changeset_revision="dddf79f7a6f9" name="package_matplotlib_1_2" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="chemfp" version="1.1p1">
-        <repository changeset_revision="cb2b38ca9d96" name="package_chemfp_1_1" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="scipy" version="0.12.0">
-        <repository changeset_revision="cfbbe183f8a7" name="package_scipy_0_12" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="openbabel" version="2.3.2">
-        <repository changeset_revision="e5ef70185d24" name="package_openbabel_2_3" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>