Repository 'chemfp'
hg clone https://toolshed.g2.bx.psu.edu/repos/bgruening/chemfp

Changeset 5:57a1a58056a6 (2017-05-20)
Previous changeset 4:685a138131f0 (2017-05-20) Next changeset 6:e3a7d6cc87af (2018-03-23)
Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit d786052cd04f8b25eb4aff80b1b9724f62031b61
added:
butina_clustering.py
butina_clustering.xml
mol2fps.xml
nxn_clustering.py
nxn_clustering.xml
sdf2fps.xml
static/images/NxN_clustering.png
static/images/NxN_clustering.svg
test-data/CID_2244.can
test-data/CID_2244.inchi
test-data/CID_2244.sdf
test-data/CID_2244.smi
test-data/CID_2244_FP2.fps
test-data/CID_2244_FP3.fps
test-data/CID_2244_FP4.fps
test-data/CID_2244_MACCS.fps
test-data/CID_2244_maccs.fps
test-data/NxN_Clustering_on_q.svg
test-data/Taylor-Butina_Clustering_on_data_q.txt
test-data/sdf2fps_result1.fps
removed:
simsearch.xml
test-data/simsearch_on_tragets_and_q.tabular
b
diff -r 685a138131f0 -r 57a1a58056a6 butina_clustering.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/butina_clustering.py Sat May 20 12:57:06 2017 -0400
[
@@ -0,0 +1,134 @@
+#!/usr/bin/env python
+"""
+    Modified version of code examples from the chemfp project.
+    http://code.google.com/p/chem-fingerprints/
+    Thanks to Andrew Dalke of Andrew Dalke Scientific!
+"""
+
+import chemfp
+import sys
+import os
+import tempfile
+import argparse
+import subprocess
+from chemfp import search
+
+def unix_sort(results):
+    temp_unsorted = tempfile.NamedTemporaryFile(delete=False)
+    for (i,indices) in enumerate( results.iter_indices() ):
+        temp_unsorted.write('%s %s\n' % (len(indices), i))
+    temp_unsorted.close()
+    temp_sorted = tempfile.NamedTemporaryFile(delete=False)
+    temp_sorted.close()
+    p = subprocess.Popen(['sort', '-n', '-r', '-k', '1,1'], stdin=open(temp_unsorted.name), stdout=open(temp_sorted.name, 'w+'))
+    stdout, stderr = p.communicate()
+    return_code = p.returncode
+
+    if return_code:
+        sys.stdout.write(stdout)
+        sys.stderr.write(stderr)
+        sys.stderr.write("Return error code %i from command:\n" % return_code)
+    temp_sorted.close()
+    os.remove(temp_unsorted.name)
+
+    for line in open(temp_sorted.name):
+        size, fp_idx = line.strip().split()
+        yield (int(size), int(fp_idx))
+
+    os.remove(temp_sorted.name)
+
+def butina( args ):
+    """
+        Taylor-Butina clustering from the chemfp help.
+    """
+    out = args.output_path
+    targets = chemfp.open( args.input_path, format='fps' )
+    arena = chemfp.load_fingerprints( targets )
+
+    chemfp.set_num_threads( args.processors )
+    results = search.threshold_tanimoto_search_symmetric(arena, threshold = args.tanimoto_threshold)
+    results.reorder_all("move-closest-first")
+
+    sorted_ids = unix_sort(results)
+
+    # Determine the true/false singletons and the clusters
+    true_singletons = []
+    false_singletons = []
+    clusters = []
+
+    seen = set()
+    #for (size, fp_idx, members) in results:
+    for (size, fp_idx) in sorted_ids:
+        members = results[fp_idx].get_indices()
+        #print arena.ids[ fp_idx ], [arena.ids[ m ] for m in members]
+        if fp_idx in seen:
+            # Can't use a centroid which is already assigned
+            continue
+        seen.add(fp_idx)
+
+        if size == 0:
+            # The only fingerprint in the exclusion sphere is itself
+            true_singletons.append( fp_idx )
+            continue
+
+        # Figure out which ones haven't yet been assigned
+        unassigned = set(members) - seen
+
+        if not unassigned:
+            false_singletons.append(fp_idx)
+            continue
+
+        # this is a new cluster
+        clusters.append( (fp_idx, unassigned) )
+        seen.update(unassigned)
+
+    len_cluster = len(clusters)
+    #out.write( "#%s true singletons: %s\n" % ( len(true_singletons), " ".join(sorted(arena.ids[idx] for idx in true_singletons)) ) )
+    #out.write( "#%s false singletons: %s\n" % ( len(false_singletons), " ".join(sorted(arena.ids[idx] for idx in false_singletons)) ) )
+
+    out.write( "#%s true singletons\n" % len(true_singletons) )
+    out.write( "#%s false singletons\n" % len(false_singletons) )
+    out.write( "#clusters: %s\n" % len_cluster )
+
+    # Sort so the cluster with the most compounds comes first,
+    # then by alphabetically smallest id
+    def cluster_sort_key(cluster):
+        centroid_idx, members = cluster
+        return -len(members), arena.ids[centroid_idx]
+
+    clusters.sort(key=cluster_sort_key)
+
+    for centroid_idx, members in clusters:
+        centroid_name = arena.ids[centroid_idx]
+        out.write("%s\t%s\t%s\n" % (centroid_name, len(members), " ".join(arena.ids[idx] for idx in members)))
+        #ToDo: len(members) need to be some biggest top 90% or something ...
+
+    for idx in true_singletons:
+        out.write("%s\t%s\n" % (arena.ids[idx], 0))
+
+    out.close()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="""Taylor-Butina clustering for fps files.
+For more details please see the original publication or the chemfp documentation:
+http://www.chemomine.co.uk/dbclus-paper.pdf
+https://chemfp.readthedocs.org
+""")
+
+    parser.add_argument("-i", "--input", dest="input_path",
+                    required=True,
+                    help="Path to the input file.")
+
+    parser.add_argument("-o", "--output", dest="output_path", type=argparse.FileType('w'),
+                    default=sys.stdout,
+                    help="Path to the output file.")
+
+    parser.add_argument("-t", "--threshold", dest="tanimoto_threshold", type=float,
+                    default=0.8,
+                    help="Tanimoto threshold [0.8]")
+
+    parser.add_argument('-p', '--processors', type=int, default=4)
+
+    options = parser.parse_args()
+    butina( options )
b
diff -r 685a138131f0 -r 57a1a58056a6 butina_clustering.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/butina_clustering.xml Sat May 20 12:57:06 2017 -0400
[
@@ -0,0 +1,93 @@
+<tool id="ctb_chemfp_butina_clustering" name="Taylor-Butina Clustering" version="0.2">
+    <description>of molecular fingerprints</description>
+    <requirements>
+        <requirement type="package" version="1.1p1">chemfp</requirement>
+        <requirement type="package" version="2.4.1">openbabel</requirement>
+    </requirements>
+    <command detect_errors="exit_code">
+<![CDATA[
+        python '$__tool_directory__/butina_clustering.py'
+            -i '$infile'
+            -t $threshold
+            -o '$outfile'
+            -p \${GALAXY_SLOTS:-1}
+]]>
+    </command>
+    <inputs>
+        <param name="infile" type="data" format="fps" label="Finperprint dataset" help="Dataset missing? See TIP below"/>
+        <param name='threshold' type='float' value='0.8'/>
+    </inputs>
+    <outputs>
+        <data format="tabular" name="outfile"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" ftype="fps" value="targets.fps"/>
+            <param name='threshold' value='0.8' ></param>
+            <output name="outfile" ftype="tabular"  file='Taylor-Butina_Clustering_on_data_q.txt'/>
+        </test>
+    </tests>
+<help>
+<![CDATA[
+
+
+.. class:: infomark
+
+**What this tool does**
+
+Unsupervised non-hierarchical clustering method based on the Taylor-Butina algorithm, which guarantees that every cluster contains molecules which are within a distance cutoff of the central molecule. This tool is based on the chemfp_ project.
+
+.. _chemfp: http://chemfp.com/
+
+-----
+
+.. class:: infomark
+
+**Input**
+
+| Molecular fingerprints in FPS format.
+| Open Babel Fastsearch index is not supported.
+
+* Example::
+
+ -  fingerprints in FPS format
+
+ #FPS1
+ #num_bits=881
+ #type=CACTVS-E_SCREEN/1.0 extended=2
+ #software=CACTVS/unknown
+ #source=/home/mohammed/galaxy-central/database/files/000/dataset_423.dat
+ #date=2012-02-09T13:20:37
+ 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960cc0bed3248000580644626004101b4844805901b041c2e
+ 19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000 55169009
+ 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960cc0bed3248000580644626004101b4844805901b041c2e
+ 19111e45039b8b2926105609401313e40800000000000100200000040080000010000002000000000000 55079807
+ ........
+
+ - Tanimoto threshold : 0.8 (between 0 and 1)
+
+-----
+
+.. class:: infomark
+
+**Output**
+
+* Example::
+
+ 0 true singletons
+ =>
+
+ 0 false singletons
+ =>
+
+ 1 clusters
+ 55091849 has 12 other members
+ => 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091752 55091467 55168823
+
+
+]]>
+ </help>
+    <citations>
+        <citation type="doi">10.1186/1758-2946-5-S1-P36</citation>
+    </citations>
+</tool>
b
diff -r 685a138131f0 -r 57a1a58056a6 mol2fps.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mol2fps.xml Sat May 20 12:57:06 2017 -0400
[
b'@@ -0,0 +1,276 @@\n+<tool id="ctb_chemfp_mol2fps" name="Molecules to Fingerprints" version="0.3.0">\n+    <description>with different fingerprint types</description>\n+    <!--parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" merge_outputs="outfile"></parallelism-->\n+    <requirements>\n+        <requirement type="package" version="1.1p1">chemfp</requirement>\n+        <requirement type="package" version="2016.03.3">rdkit</requirement>\n+        <requirement type="package" version="2.4.1">openbabel</requirement>\n+    </requirements>\n+    <command>\n+<![CDATA[\n+        #set $fptype = $fp_opts.fp_opts_selector\n+\n+        #if $fptype in [\'--FP2\', \'--FP3\', \'--FP4\', \'--MACCS\']:\n+            ## Open Babel fingerprints\n+            ob2fps $fptype --in \'${infile.ext}\' \'${infile}\' -o \'${outfile}\'\n+        #else:\n+            ## RDKit fingerprints\n+            rdkit2fps --in \'${infile.ext}\' \'${infile}\' -o \'${outfile}\'\n+            #if $fp_opts.fp_opts_selector == "--RDK":\n+                --RDK\n+                --fpSize $fp_opts.fpSize\n+                --minPath $fp_opts.minPath\n+                --maxPath $fp_opts.maxPath\n+                --nBitsPerHash $fp_opts.nBitsPerHash\n+                $fp_opts.useHs\n+            #elif $fp_opts.fp_opts_selector == "--torsions":\n+                --torsions\n+                --fpSize $fp_opts.fpSize\n+                --targetSize $fp_opts.targetSize\n+            #elif $fp_opts.fp_opts_selector == "--morgan":\n+                --morgan\n+                --fpSize $fp_opts.fpSize\n+                --radius $fp_opts.radius\n+                $fp_opts.useFeatures\n+                $fp_opts.useChirality\n+                $fp_opts.useBondTypes\n+            #elif $fp_opts.fp_opts_selector == "--pairs":\n+                --paris\n+                --fpSize $fp_opts.fpSize\n+                --minLength $fp_opts.minLength\n+                --maxLength $fp_opts.maxLength\n+            #elif $fp_opts.fp_opts_selector == "--maccs166":\n+                --maccs166\n+            #elif $fp_opts.fp_opts_selector == "--substruct":\n+                --substruct\n+            #end if\n+        #end if\n+        --errors report 2>&1\n+]]>\n+    </command>\n+    <inputs>\n+        <param name="infile" type=\'data\' format="sdf,smi,mol,mol2,cml,inchi" label="molecule file"/>\n+        <conditional name="fp_opts">\n+            <param name="fp_opts_selector" type="select" label="Type of fingerprint">\n+                <option value=\'--FP2\' selected="True">Open Babel FP2 fingerprints</option>\n+                <option value=\'--FP3\'>Open Babel FP3 fingerprints</option>\n+                <option value=\'--FP4\'>Open Babel FP4 fingerprints</option>\n+                <option value=\'--MACCS\'>Open Babel MACCS fingerprints</option>\n+                <option value=\'--RDK\'>RDKit topological fingerprint</option>\n+                <option value=\'--torsions\'>RDKit topological Torsion fingerprints</option>\n+                <option value=\'--morgan\'>RDKit Morgan fingerprints</option>\n+                <option value=\'--pairs\'>RDKit Atom Pair fingerprints</option>\n+                <option value=\'--maccs166\'>RDKit MACCS fingerprints</option>\n+                <option value=\'--substruct\'>RDKit substructure fingerprints</option>\n+            </param>\n+            <when value="--FP2" />\n+            <when value="--FP3" />\n+            <when value="--FP4" />\n+            <when value="--MACCS" />\n+            <when value="--RDK">\n+                <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help="">\n+                    <validator type="in_range" min="1" />\n+                </param>\n+                <param name="minPath" type="integer" value="1" label="minimum number of bonds to include in the subgraph" help="">\n+                    <validator type="in_range" min="1" />\n+                </param>\n+                <param name="maxPath" type="integer" value="7" label="maximum number of bonds to include in the s'..b'    <test>\n+            <param name="infile" value="CID_2244.smi" ftype="smi" />\n+            <param name="fp_opts.fp_opts_selector" value="--FP3" />\n+            <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" lines_diff="4"/>\n+        </test>\n+        <!-- FP4 -->\n+        <test>\n+            <param name="infile" value="CID_2244.sdf" ftype="sdf" />\n+            <param name="fp_opts.fp_opts_selector" value="--FP4" />\n+            <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" lines_diff="4"/>\n+        </test>\n+        <test>\n+            <param name="infile" value="CID_2244.smi" ftype="smi" />\n+            <param name="fp_opts.fp_opts_selector" value="--FP4" />\n+            <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" lines_diff="4"/>\n+        </test>\n+        <!-- MACCS -->\n+        <test>\n+            <param name="infile" value="CID_2244.sdf" ftype="sdf" />\n+            <param name="fp_opts.fp_opts_selector" value="--MACCS" />\n+            <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" lines_diff="4"/>\n+        </test>\n+        <test>\n+            <param name="infile" value="CID_2244.smi" ftype="smi" />\n+            <param name="fp_opts.fp_opts_selector" value="--MACCS" />\n+            <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" lines_diff="4"/>\n+        </test>\n+    </tests>\n+    <help>\n+<![CDATA[\n+\n+.. class:: infomark\n+\n+**What this tool does**\n+\n+This tool uses chemfp_ to calculate 10 different fingerprints of common file formats. Chemfp uses `Open Babel`_, OpenEye_ and RDKit_.\n+\n+For more information check the websites listed below::\n+\n+\t- http://www.rdkit.org/docs/GettingStartedInPython.html#fingerprinting-and-molecular-similarity\n+\t- http://openbabel.org/wiki/Tutorial:Fingerprints\n+\n+-----\n+\n+.. class:: infomark\n+\n+**Input**\n+\n+FPS fingerprint file format\n+\n+* Example::\n+\n+\t      - SDF File\n+\n+\t\t28434379\n+\t\t  -OEChem-02031205132D\n+\n+\t\t 37 39  0     0  0  0  0  0  0999 V2000\n+\t\t    8.1648   -1.8842    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0\n+\t\t    6.0812   -0.2134    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0\n+\t\t    6.0812   -1.8229    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0\n+\t\t    2.5369   -2.0182    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0\n+\t\t    6.3919    0.7371    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n+\t\t    7.3704    0.9433    0.0000 C   0  0  0  0\n+\t\t    ......\n+\t\t  1 15  1  0  0  0  0\n+\t\t  1 35  1  0  0  0  0\n+\t\t  2  5  1  0  0  0  0\n+\t\t  2 11  1  0  0  0  0\n+\t\t  2 12  1  0  0  0  0\n+\t\t  3 12  2  0  0  0  0\n+\t\t  3 13  1  0  0  0  0\n+\t\t  4 18  1  0  0  0  0\n+\t\t  ......\n+\n+\t\t\t>PUBCHEM_COMPOUND_CID<\n+\t\t\t28434379\n+\n+\t\t\t> <PUBCHEM_COMPOUND_CANONICALIZED>\n+\t\t\t1\n+\n+\t\t\t> <PUBCHEM_CACTVS_COMPLEXITY>\n+\t\t\t280\n+\n+\t\t\t> <PUBCHEM_CACTVS_HBOND_ACCEPTOR>\n+\t\t\t2\n+\n+\t\t\t> <PUBCHEM_CACTVS_HBOND_DONOR>\n+\t\t\t2\n+\n+\t\t\t> <PUBCHEM_CACTVS_ROTATABLE_BOND>\n+\t\t\t2\n+\n+\t\t\t> <PUBCHEM_CACTVS_SUBSKEYS>\n+\t\t\tAAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA==\n+\n+\t\t\t>\n+\n+\t\t- type : FP2\n+\n+-----\n+\n+.. class:: infomark\n+\n+**Output**\n+\n+* Example::\n+\n+\t#FPS1\n+\t#num_bits=1021\n+\t#type=OpenBabel-FP2/1\n+\t#software=OpenBabel/2.3.0\n+\t#source=/tmp/dataset_409.dat.sdf\n+\t#date=2012-02-03T11:13:39\n+\tc0000000000008c0000846000400000000000010800000000000004000000000100010000700802170000018000000c\n+\t0010000000020600208008000008000000c000c02c00002000000c00000100000008001400c800001c0180000000300\n+\t10000000000080000000c0000060000c0000060810000010000000800102000000\t28434379\n+\n+\n+]]>\n+    </help>\n+    <citations>\n+        <citation type="doi">10.1186/1758-2946-3-33</citation>\n+        <citation type="doi">10.1186/1758-2946-5-S1-P36</citation>\n+        <citation type="bibtex">\n+            @electronic{rdkit,\n+                title = {RDKit: Open-source cheminformatics},\n+                url ={http://www.rdkit.org}\n+            }\n+        </citation>\n+    </citations>\n+</tool>\n'
b
diff -r 685a138131f0 -r 57a1a58056a6 nxn_clustering.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nxn_clustering.py Sat May 20 12:57:06 2017 -0400
[
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+"""
+    Modified version of code examples from the chemfp project.
+    http://code.google.com/p/chem-fingerprints/
+    Thanks to Andrew Dalke of Andrew Dalke Scientific!
+"""
+import matplotlib
+matplotlib.use('Agg')
+import argparse
+import os
+import chemfp
+import scipy.cluster.hierarchy as hcluster
+import pylab
+import numpy
+
+def distance_matrix(arena, tanimoto_threshold = 0.0):
+    n = len(arena)
+    # Start off a similarity matrix with 1.0s along the diagonal
+    try:
+        similarities = numpy.identity(n, "d")
+    except:
+        raise Exception('Input dataset is to large!')
+    chemfp.set_num_threads( args.processors )
+
+    ## Compute the full similarity matrix.
+    # The implementation computes the upper-triangle then copies
+    # the upper-triangle into lower-triangle. It does not include
+    # terms for the diagonal.
+    results = chemfp.search.threshold_tanimoto_search_symmetric(arena, threshold=tanimoto_threshold)
+
+    # Copy the results into the NumPy array.
+    for row_index, row in enumerate(results.iter_indices_and_scores()):
+        for target_index, target_score in row:
+            similarities[row_index, target_index] = target_score
+
+    # Return the distance matrix using the similarity matrix
+    return 1.0 - similarities
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="""NxN clustering for fps files.
+For more details please see the chemfp documentation:
+https://chemfp.readthedocs.org
+""")
+
+    parser.add_argument("-i", "--input", dest="input_path",
+                    required=True,
+                    help="Path to the input file.")
+
+    parser.add_argument("-c", "--cluster", dest="cluster_image",
+                    help="Path to the output cluster image.")
+
+    parser.add_argument("-s", "--smatrix", dest="similarity_matrix",
+                    help="Path to the similarity matrix output file.")
+
+    parser.add_argument("-t", "--threshold", dest="tanimoto_threshold", 
+                    type=float, default=0.0,
+                    help="Tanimoto threshold [0.0]")
+
+    parser.add_argument("--oformat", default='png', help="Output format (png, svg)")
+
+    parser.add_argument('-p', '--processors', type=int, 
+        default=4)
+
+    args = parser.parse_args()
+
+    targets = chemfp.open( args.input_path, format='fps' )
+    arena = chemfp.load_fingerprints( targets )
+    distances  = distance_matrix( arena, args.tanimoto_threshold )
+
+    if args.similarity_matrix:
+        distances.tofile( args.similarity_matrix )
+
+    if args.cluster_image:
+        linkage = hcluster.linkage( distances, method="single", metric="euclidean" )
+
+        hcluster.dendrogram(linkage, labels=arena.ids)
+
+        pylab.savefig( args.cluster_image, format=args.oformat )
+
b
diff -r 685a138131f0 -r 57a1a58056a6 nxn_clustering.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nxn_clustering.xml Sat May 20 12:57:06 2017 -0400
[
@@ -0,0 +1,120 @@
+<tool id="ctb_chemfp_nxn_clustering" name="NxN Clustering" version="0.4">
+    <description>of molecular fingerprints</description>
+    <requirements>
+        <requirement type="package" version="1.1p1">chemfp</requirement>
+        <requirement type="package" version="2">python</requirement>
+        <requirement type="package" version="2.0.2">matplotlib</requirement>
+        <requirement type="package" version="0.19.0">scipy</requirement>
+        <requirement type="package" version="2.4.1">openbabel</requirement>
+    </requirements>
+    <command detect_errors="exit_code">
+<![CDATA[
+        python '$__tool_directory__/nxn_clustering.py'
+            -i '$infile'
+            -t $threshold
+            #if str($output_files) in ['both', 'image']:
+                --cluster '$image'
+            #end if
+            #if str($output_files) in ['both', 'matrix']:
+                --smatrix '$smilarity_matrix'
+            #end if
+            --oformat '$oformat'
+]]>
+    </command>
+    <inputs>
+        <param name="infile" type="data" format="fps" label="Finperprint dataset" help="Dataset missing? See TIP below"/>
+        <param name='threshold' type='float' value='0.0' />
+        <param name='oformat' type='select' format='text' label="Format of the resulting picture">
+            <option value='png'>PNG</option>
+            <option value='svg'>SVG</option>
+        </param>
+        <param name='output_files' type='select' format='text' label="Output options">
+            <option value='both'>NxN matrix and Image</option>
+            <option value='image'>Image</option>
+            <option value='matrix'>NxN Matrix</option>
+        </param>
+
+    </inputs>
+    <outputs>
+        <data name="image" format="svg" label="${tool.name} on ${on_string} - Cluster Image">
+            <filter>output_files == "both" or output_files == "image"</filter>
+            <change_format>
+                <when input="oformat" value="png" format="png"/>
+            </change_format>
+        </data>
+        <data name="smilarity_matrix" format="binary" label="${tool.name} on ${on_string} - Similarity Matrix">
+            <filter>output_files == "both" or output_files == "matrix"</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" ftype="fps" value="targets.fps" />
+            <param name='treshold' value='0.75' />
+            <param name='oformat' value='svg' />
+            <param name='output_files' value='image' />
+            <output name="image" file='NxN_Clustering_on_q.svg' ftype="svg" compare="sim_size"/>
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+
+.. class:: infomark
+
+**What this tool does**
+
+Generating hierarchical clusters and visualizing clusters with dendrograms.
+For the clustering and the fingerprint handling the chemfp_ project is used.
+
+.. _chemfp: http://chemfp.com/
+
+-----
+
+.. class:: warningmark
+
+**Hint**
+
+The plotting of the cluster image is sensible only with a small dataset.
+
+-----
+
+.. class:: infomark
+
+**Input**
+
+Molecular fingerprints in FPS format. Open Babel Fastsearch index is not supported.
+
+* Example::
+
+ -  fingerprints in FPS format
+
+ #FPS1
+ #num_bits=881
+ #type=CACTVS-E_SCREEN/1.0 extended=2
+ #software=CACTVS/unknown
+ #source=/home/mohammed/galaxy-central/database/files/000/dataset_423.dat
+ #date=2012-02-09T13:20:37
+ 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960cc0bed3248000580644626004101b4844805901b041c2e
+ 19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000 55169009
+ 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960cc0bed3248000580644626004101b4844805901b041c2e
+ 19111e45039b8b2926105609401313e40800000000000100200000040080000010000002000000000000 55079807
+ ........
+
+ - Tanimoto threshold : 0.8 (between 0 and 1)
+
+-----
+
+.. class:: informark
+
+**Output**
+
+* Example::
+
+ .. image:: $PATH_TO_IMAGES/NxN_clustering.png
+
+
+]]>
+    </help>
+    <citations>
+        <citation type="doi">10.1186/1758-2946-5-S1-P36</citation>
+    </citations>
+</tool>
b
diff -r 685a138131f0 -r 57a1a58056a6 sdf2fps.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sdf2fps.xml Sat May 20 12:57:06 2017 -0400
[
@@ -0,0 +1,116 @@
+<tool id="ctb_sdf2fps" name="SDF to Fingerprint" version="0.2">
+    <description>extract fingerprints from sdf files metadata</description>
+    <!--parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism-->
+    <requirements>
+        <requirement type="package" version="1.1p1">chemfp</requirement>
+        <requirement type="package" version="2.4.1">openbabel</requirement>
+    </requirements>
+    <command>
+<![CDATA[
+        sdf2fps --pubchem '${infile}' > '${outfile}'
+]]>
+    </command>
+    <inputs>
+        <param name="infile" type='data' format="sdf" label="SDF file with fingerprints as metadata"/>
+    </inputs>
+    <outputs>
+        <data name="outfile" format="fps"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" ftype="sdf" value="CID_2244.sdf" />
+            <output name="outfile" file='sdf2fps_result1.fps' ftype="fps" lines_diff="4" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+
+.. class:: infomark
+
+**What this tool does**
+
+Read an input SD file, extract the fingerprints and store them in a FPS-file.
+
+-----
+
+.. class:: infomark
+
+**Input**
+
+`SD-Format`_
+
+.. _`SD-Format`: http://en.wikipedia.org/wiki/Chemical_table_file
+
+* Example::
+
+ 28434379
+   -OEChem-02031205132D
+
+  37 39  0     0  0  0  0  0  0999 V2000
+     8.1648   -1.8842    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+     6.0812   -0.2134    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
+     6.0812   -1.8229    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
+     2.5369   -2.0182    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
+     6.3919    0.7371    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+     7.3704    0.9433    0.0000 C   0  0  0  0
+     ......
+   1 15  1  0  0  0  0
+   1 35  1  0  0  0  0
+   2  5  1  0  0  0  0
+   2 11  1  0  0  0  0
+   2 12  1  0  0  0  0
+   3 12  2  0  0  0  0
+   3 13  1  0  0  0  0
+   4 18  1  0  0  0  0
+   ......
+
+ >PUBCHEM_COMPOUND_CID<
+ 28434379
+
+ > <PUBCHEM_COMPOUND_CANONICALIZED>
+ 1
+
+ > <PUBCHEM_CACTVS_COMPLEXITY>
+ 280
+
+ > <PUBCHEM_CACTVS_HBOND_ACCEPTOR>
+ 2
+
+ > <PUBCHEM_CACTVS_HBOND_DONOR>
+ 2
+
+ > <PUBCHEM_CACTVS_ROTATABLE_BOND>
+ 2
+
+ > <PUBCHEM_CACTVS_SUBSKEYS>
+ AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA==
+
+ >
+
+-----
+
+.. class:: infomark
+
+**Output**
+
+* Example::
+
+ #FPS1
+ #num_bits=881
+ #type=CACTVS-E_SCREEN/1.0 extended=2
+ #software=CACTVS/unknown
+ #source=/home/mohammed/galaxy-central/database/files/000/dataset_409.dat
+ #date=2012-02-03T10:44:12
+ 07ce04000000000000000000000000000080060000000c0600
+ 00000000001a800f0000780008100000101487e9608c0bed32
+ 48000580644626204101b4844805901b041c2e19511e45039b
+ 8b2924101609401b13e4080000000000010020000004008000
+ 0010000002000000000000 28434379
+
+
+]]>
+    </help>
+    <citations>
+        <citation type="doi">10.1186/1758-2946-5-S1-P36</citation>
+    </citations>
+</tool>
b
diff -r 685a138131f0 -r 57a1a58056a6 simsearch.xml
--- a/simsearch.xml Sat May 20 12:45:01 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,121 +0,0 @@
-<tool id="ctb_simsearch" name="Similarity Search" version="0.2">
-    <description>of fingerprint data sets</description>
-    <requirements>
-        <requirement type="package" version="1.1p1">chemfp</requirement>
-    </requirements>
-    <command>
-<![CDATA[
-        #if $method_opts.method_opts_selector == "chemfp":
-            ln -s '${method_opts.query_opts.targets}' ./targets.fps &&
-
-            #if $method_opts.query_opts.query_opts_selector == "normal":
-                ln -s '${method_opts.query_opts.query}' ./query.fps &&
-            #end if
-
-            simsearch
-                #if int($method_opts.knn) == 0:
-                    #set $k = 'all'
-                    ## count is only available if k nearest neighbor search is disabled
-                    $method_opts.counts
-                #else:
-                    #set $k = int($method_opts.knn)
-                #end if
-
-                -k $k
-                --threshold $method_opts.threshold
-                -o ./output.fps
-
-                ## build and search an in-memory data structure (faster for multiple queries)
-                --memory
-
-                #if $method_opts.query_opts.query_opts_selector == "normal":
-                    -q ./query.fps
-                #else:
-                    --NxN
-                #end if
-
-                ./targets.fps
-                &&
-                mv ./output.fps '${outfile}'
-        #else:
-            ## OpenBabel needs the original molecule file (molecule.'fileformat') next to the fastsearch index (molecule.fs). We use a composite datatype to accomplish that.
-            ## Furthermore OpenBabel is really picky with file extensions. We need to specify every datatype. I did not find a solution to specify the query-filetype.
-            ## A workaround is to create a symlink with a proper file-extension.
-            ln -s '$method_opts.query' ./temp_query.$method_opts.query.ext
-            obabel 
-                -i fs '$method_opts.fastsearch.files_path/molecule.fs'
-                -S ./temp_query.$method_opts.query.ext
-                -at${method_opts.threshold}
-                -O '${outfile}'
-                -osmi
-                -aa
-        #end if
-]]>
-    </command>
-    <inputs>
-        <conditional name="method_opts">
-            <param name="method_opts_selector" type="select" label="Subject database/sequences">
-              <option value="chemfp">Chemfp fingerprint file</option>
-              <option value="obabel">OpenBabel Fastsearch Index</option>
-            </param>
-            <when value="chemfp">
-                <conditional name="query_opts">
-                    <param name="query_opts_selector" type="select" label="Query Mode">
-                      <option value="normal">Query molecules are stores in a separate file</option>
-                      <option value="nxn">Target molecules are also queries (NxN)</option>
-                    </param>
-                    <when value="normal">
-                        <param name='query' type='data' format="fps" label='Query molecules'/>
-                        <param name='targets' type='data' format="fps" label='Target molecules'/>
-                    </when>
-                    <when value="nxn">
-                        <param name='targets' type='data' format="fps" label='Target moleculs'/>
-                    </when>
-                </conditional>
-                <param name='knn' type='integer' value='0' label='select the k nearest neighbors' help='0 means all neighbors'>
-                   <validator type="in_range" min="0" />
-                </param>
-                <param name='threshold' type='float' value='0.7' label='threshold' />
-                <param name="counts" type="boolean" truevalue="-c" falsevalue="" checked="false" label="report counts (-c)" help="Is ignored if k nearest neighbor search is enabled" />
-            </when>
-            <when value="obabel">
-                <param name="query" type='data' format="smi,mol,sdf,inchi" label="query"/>
-                <param name="fastsearch" type='data' format="obfs" label="OpenBabel Fastsearch Index"/>
-                <param name="threshold" type='float' label="threshold" value='0.7'/>
-            </when>
-        </conditional>
-
-    </inputs>
-    <outputs>
-        <data name="outfile" format="tabular" />
-    </outputs>
-    <tests>
-        <test>
-            <param name="targets" ftype="fps" value="targets.fps"/>
-            <param name="query" ftype="fps" value="q.fps"/>
-            <param name="k" value='4'/>
-            <param name="th" value='0.7'/>
-            <output name="outfile" ftype="tabular" file="simsearch_on_tragets_and_q.tabular"/>
-        </test>
-    </tests>
-    <help>
-<![CDATA[
-
-
-.. class:: infomark
-
-**What this tool does**
-
-Similarity searches using a variety of different fingerprints using either the chemfp_ FPS type or the Open Babel FastSearch_ index.
-
-.. _chemfp: http://chemfp.com/
-.. _FastSearch: http://openbabel.org/wiki/FastSearch
-
-
-]]>
-    </help>
-    <citations>
-        <citation type="doi">10.1186/1758-2946-3-33</citation>
-        <citation type="doi">10.1186/1758-2946-5-S1-P36</citation>
-    </citations>
-</tool>
b
diff -r 685a138131f0 -r 57a1a58056a6 static/images/NxN_clustering.png
b
Binary file static/images/NxN_clustering.png has changed
b
diff -r 685a138131f0 -r 57a1a58056a6 static/images/NxN_clustering.svg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/static/images/NxN_clustering.svg Sat May 20 12:57:06 2017 -0400
b
b'@@ -0,0 +1,2275 @@\n+<?xml version="1.0" encoding="utf-8" standalone="no"?>\n+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"\n+  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n+<!-- Created with matplotlib (http://matplotlib.org/) -->\n+<svg height="432pt" version="1.1" viewBox="0 0 576 432" width="576pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">\n+ <defs>\n+  <style type="text/css">\n+*{stroke-linecap:square;stroke-linejoin:round;}\n+  </style>\n+ </defs>\n+ <g id="figure_1">\n+  <g id="patch_1">\n+   <path d="\n+M0 432\n+L576 432\n+L576 0\n+L0 0\n+z\n+" style="fill:#ffffff;"/>\n+  </g>\n+  <g id="axes_1">\n+   <g id="patch_2">\n+    <path d="\n+M72 388.8\n+L518.4 388.8\n+L518.4 43.2\n+L72 43.2\n+z\n+" style="fill:#ffffff;"/>\n+   </g>\n+   <g id="LineCollection_1">\n+    <defs>\n+     <path d="\n+M80.4759 -43.2\n+L80.4759 -89.4738\n+L86.1266 -89.4738\n+L86.1266 -43.2" id="C0_0_a27cbf3dad"/>\n+     <path d="\n+M74.8253 -43.2\n+L74.8253 -128.527\n+L83.3013 -128.527\n+L83.3013 -89.4738" id="C0_1_0365ccf33e"/>\n+     <path d="\n+M227.392 -43.2\n+L227.392 -110.195\n+L233.043 -110.195\n+L233.043 -43.2" id="C0_2_16a64a88b9"/>\n+     <path d="\n+M221.742 -43.2\n+L221.742 -111.088\n+L230.218 -111.088\n+L230.218 -110.195" id="C0_3_1e06391595"/>\n+     <path d="\n+M244.344 -43.2\n+L244.344 -178.829\n+L249.995 -178.829\n+L249.995 -43.2" id="C0_4_9522133b75"/>\n+     <path d="\n+M238.694 -43.2\n+L238.694 -187.132\n+L247.17 -187.132\n+L247.17 -178.829" id="C0_5_e4f3e58d26"/>\n+     <path d="\n+M225.98 -111.088\n+L225.98 -262.87\n+L242.932 -262.87\n+L242.932 -187.132" id="C0_6_ff944847e7"/>\n+     <path d="\n+M396.911 -43.2\n+L396.911 -224.631\n+L402.562 -224.631\n+L402.562 -43.2" id="C0_7_0906a9df02"/>\n+     <path d="\n+M391.261 -43.2\n+L391.261 -233.371\n+L399.737 -233.371\n+L399.737 -224.631" id="C0_8_ed58b0afb2"/>\n+     <path d="\n+M408.213 -43.2\n+L408.213 -243.035\n+L413.863 -243.035\n+L413.863 -43.2" id="C0_9_84c2cf03f5"/>\n+     <path d="\n+M395.499 -233.371\n+L395.499 -269.685\n+L411.038 -269.685\n+L411.038 -243.035" id="C0_a_6ef56ffb7b"/>\n+     <path d="\n+M385.61 -43.2\n+L385.61 -270.198\n+L403.268 -270.198\n+L403.268 -269.685" id="C0_b_c4ff70daa4"/>\n+    </defs>\n+    <g clip-path="url(#p7ff5b81e1d)">\n+     <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_0_a27cbf3dad" y="432.0"/>\n+    </g>\n+    <g clip-path="url(#p7ff5b81e1d)">\n+     <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_1_0365ccf33e" y="432.0"/>\n+    </g>\n+    <g clip-path="url(#p7ff5b81e1d)">\n+     <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_2_16a64a88b9" y="432.0"/>\n+    </g>\n+    <g clip-path="url(#p7ff5b81e1d)">\n+     <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_3_1e06391595" y="432.0"/>\n+    </g>\n+    <g clip-path="url(#p7ff5b81e1d)">\n+     <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_4_9522133b75" y="432.0"/>\n+    </g>\n+    <g clip-path="url(#p7ff5b81e1d)">\n+     <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_5_e4f3e58d26" y="432.0"/>\n+    </g>\n+    <g clip-path="url(#p7ff5b81e1d)">\n+     <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_6_ff944847e7" y="432.0"/>\n+    </g>\n+    <g clip-path="url(#p7ff5b81e1d)">\n+     <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_7_0906a9df02" y="432.0"/>\n+    </g>\n+    <g clip-path="url(#p7ff5b81e1d)">\n+     <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_8_ed58b0afb2" y="432.0"/>\n+    </g>\n+    <g clip-path="url(#p7ff5b81e1d)">\n+     <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_9_84c2cf03f5" y="432.0"/>\n+    </g>\n+    <g clip-path="url(#p7ff5b81e1d)">\n+     <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_a_6ef56ffb7b" y="432.0"/>\n+    </g>\n+    <g clip-path="url(#p7ff5b81e1d)">\n+     <use style="fill:n'..b'xlink:href="#m0d5b0a6425" y="286.871994251"/>\n+      </g>\n+     </g>\n+     <g id="text_82">\n+      <!-- 0.4 -->\n+      <g transform="translate(50.380625 291.239806751)scale(0.12 -0.12)">\n+       <use xlink:href="#BitstreamVeraSans-Roman-30"/>\n+       <use x="63.623046875" xlink:href="#BitstreamVeraSans-Roman-2e"/>\n+       <use x="95.41015625" xlink:href="#BitstreamVeraSans-Roman-34"/>\n+      </g>\n+     </g>\n+    </g>\n+    <g id="ytick_4">\n+     <g id="line2d_7">\n+      <g>\n+       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#mc8fcea1516" y="235.907991376"/>\n+      </g>\n+     </g>\n+     <g id="line2d_8">\n+      <g>\n+       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m0d5b0a6425" y="235.907991376"/>\n+      </g>\n+     </g>\n+     <g id="text_83">\n+      <!-- 0.6 -->\n+      <g transform="translate(50.463125 240.275803876)scale(0.12 -0.12)">\n+       <use xlink:href="#BitstreamVeraSans-Roman-30"/>\n+       <use x="63.623046875" xlink:href="#BitstreamVeraSans-Roman-2e"/>\n+       <use x="95.41015625" xlink:href="#BitstreamVeraSans-Roman-36"/>\n+      </g>\n+     </g>\n+    </g>\n+    <g id="ytick_5">\n+     <g id="line2d_9">\n+      <g>\n+       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#mc8fcea1516" y="184.943988502"/>\n+      </g>\n+     </g>\n+     <g id="line2d_10">\n+      <g>\n+       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m0d5b0a6425" y="184.943988502"/>\n+      </g>\n+     </g>\n+     <g id="text_84">\n+      <!-- 0.8 -->\n+      <g transform="translate(50.52875 189.311801002)scale(0.12 -0.12)">\n+       <use xlink:href="#BitstreamVeraSans-Roman-30"/>\n+       <use x="63.623046875" xlink:href="#BitstreamVeraSans-Roman-2e"/>\n+       <use x="95.41015625" xlink:href="#BitstreamVeraSans-Roman-38"/>\n+      </g>\n+     </g>\n+    </g>\n+    <g id="ytick_6">\n+     <g id="line2d_11">\n+      <g>\n+       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#mc8fcea1516" y="133.979985627"/>\n+      </g>\n+     </g>\n+     <g id="line2d_12">\n+      <g>\n+       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m0d5b0a6425" y="133.979985627"/>\n+      </g>\n+     </g>\n+     <g id="text_85">\n+      <!-- 1.0 -->\n+      <g transform="translate(51.03125 138.347798127)scale(0.12 -0.12)">\n+       <use xlink:href="#BitstreamVeraSans-Roman-31"/>\n+       <use x="63.623046875" xlink:href="#BitstreamVeraSans-Roman-2e"/>\n+       <use x="95.41015625" xlink:href="#BitstreamVeraSans-Roman-30"/>\n+      </g>\n+     </g>\n+    </g>\n+    <g id="ytick_7">\n+     <g id="line2d_13">\n+      <g>\n+       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#mc8fcea1516" y="83.0159827526"/>\n+      </g>\n+     </g>\n+     <g id="line2d_14">\n+      <g>\n+       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m0d5b0a6425" y="83.0159827526"/>\n+      </g>\n+     </g>\n+     <g id="text_86">\n+      <!-- 1.2 -->\n+      <g transform="translate(51.43625 87.4691077526)scale(0.12 -0.12)">\n+       <use xlink:href="#BitstreamVeraSans-Roman-31"/>\n+       <use x="63.623046875" xlink:href="#BitstreamVeraSans-Roman-2e"/>\n+       <use x="95.41015625" xlink:href="#BitstreamVeraSans-Roman-32"/>\n+      </g>\n+     </g>\n+    </g>\n+   </g>\n+   <g id="patch_3">\n+    <path d="\n+M72 43.2\n+L518.4 43.2" style="fill:none;stroke:#000000;"/>\n+   </g>\n+   <g id="patch_4">\n+    <path d="\n+M518.4 388.8\n+L518.4 43.2" style="fill:none;stroke:#000000;"/>\n+   </g>\n+   <g id="patch_5">\n+    <path d="\n+M72 388.8\n+L518.4 388.8" style="fill:none;stroke:#000000;"/>\n+   </g>\n+   <g id="patch_6">\n+    <path d="\n+M72 388.8\n+L72 43.2" style="fill:none;stroke:#000000;"/>\n+   </g>\n+  </g>\n+ </g>\n+ <defs>\n+  <clipPath id="p7ff5b81e1d">\n+   <rect height="345.6" width="446.4" x="72.0" y="43.2"/>\n+  </clipPath>\n+ </defs>\n+</svg>\n'
b
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244.can
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244.can Sat May 20 12:57:06 2017 -0400
b
@@ -0,0 +1,1 @@
+CC(=O)Oc1ccccc1C(=O)O 2244
b
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244.inchi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244.inchi Sat May 20 12:57:06 2017 -0400
b
@@ -0,0 +1,1 @@
+InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)
b
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244.sdf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244.sdf Sat May 20 12:57:06 2017 -0400
b
@@ -0,0 +1,154 @@
+2244
+  -OEChem-05151212332D
+
+ 21 21  0     0  0  0  0  0  0999 V2000
+    3.7320   -0.0600    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    6.3301    1.4400    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    4.5981    1.4400    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    2.8660   -1.5600    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    4.5981   -0.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    5.4641   -0.0600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    4.5981   -1.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    6.3301   -0.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    5.4641   -2.0600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    6.3301   -1.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    5.4641    0.9400    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.8660   -0.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.0000   -0.0600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    4.0611   -1.8700    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    6.8671   -0.2500    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    5.4641   -2.6800    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    6.8671   -1.8700    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    2.3100    0.4769    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    1.4631    0.2500    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    1.6900   -0.5969    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    6.3301    2.0600    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+  1  5  1  0  0  0  0
+  1 12  1  0  0  0  0
+  2 11  1  0  0  0  0
+  2 21  1  0  0  0  0
+  3 11  2  0  0  0  0
+  4 12  2  0  0  0  0
+  5  6  1  0  0  0  0
+  5  7  2  0  0  0  0
+  6  8  2  0  0  0  0
+  6 11  1  0  0  0  0
+  7  9  1  0  0  0  0
+  7 14  1  0  0  0  0
+  8 10  1  0  0  0  0
+  8 15  1  0  0  0  0
+  9 10  2  0  0  0  0
+  9 16  1  0  0  0  0
+ 10 17  1  0  0  0  0
+ 12 13  1  0  0  0  0
+ 13 18  1  0  0  0  0
+ 13 19  1  0  0  0  0
+ 13 20  1  0  0  0  0
+M  END
+> <PUBCHEM_COMPOUND_CID>
+2244
+
+> <PUBCHEM_COMPOUND_CANONICALIZED>
+1
+
+> <PUBCHEM_CACTVS_COMPLEXITY>
+212
+
+> <PUBCHEM_CACTVS_HBOND_ACCEPTOR>
+4
+
+> <PUBCHEM_CACTVS_HBOND_DONOR>
+1
+
+> <PUBCHEM_CACTVS_ROTATABLE_BOND>
+3
+
+> <PUBCHEM_CACTVS_SUBSKEYS>
+AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA==
+
+> <PUBCHEM_IUPAC_OPENEYE_NAME>
+2-acetoxybenzoic acid
+
+> <PUBCHEM_IUPAC_CAS_NAME>
+2-acetyloxybenzoic acid
+
+> <PUBCHEM_IUPAC_NAME>
+2-acetyloxybenzoic acid
+
+> <PUBCHEM_IUPAC_SYSTEMATIC_NAME>
+2-acetyloxybenzoic acid
+
+> <PUBCHEM_IUPAC_TRADITIONAL_NAME>
+2-acetoxybenzoic acid
+
+> <PUBCHEM_IUPAC_INCHI>
+InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)
+
+> <PUBCHEM_IUPAC_INCHIKEY>
+BSYNRYMUTXBXSQ-UHFFFAOYSA-N
+
+> <PUBCHEM_XLOGP3>
+1.2
+
+> <PUBCHEM_EXACT_MASS>
+180.042259
+
+> <PUBCHEM_MOLECULAR_FORMULA>
+C9H8O4
+
+> <PUBCHEM_MOLECULAR_WEIGHT>
+180.15742
+
+> <PUBCHEM_OPENEYE_CAN_SMILES>
+CC(=O)OC1=CC=CC=C1C(=O)O
+
+> <PUBCHEM_OPENEYE_ISO_SMILES>
+CC(=O)OC1=CC=CC=C1C(=O)O
+
+> <PUBCHEM_CACTVS_TPSA>
+63.6
+
+> <PUBCHEM_MONOISOTOPIC_WEIGHT>
+180.042259
+
+> <PUBCHEM_TOTAL_CHARGE>
+0
+
+> <PUBCHEM_HEAVY_ATOM_COUNT>
+13
+
+> <PUBCHEM_ATOM_DEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_ATOM_UDEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_BOND_DEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_BOND_UDEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_ISOTOPIC_ATOM_COUNT>
+0
+
+> <PUBCHEM_COMPONENT_COUNT>
+1
+
+> <PUBCHEM_CACTVS_TAUTO_COUNT>
+1
+
+> <PUBCHEM_COORDINATE_TYPE>
+1
+5
+255
+
+> <PUBCHEM_BONDANNOTATIONS>
+5  6  8
+5  7  8
+6  8  8
+7  9  8
+8  10  8
+9  10  8
+
+$$$$
b
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244.smi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244.smi Sat May 20 12:57:06 2017 -0400
b
@@ -0,0 +1,1 @@
+O(c1c(cccc1)C(=O)O)C(=O)C 2244
b
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244_FP2.fps
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244_FP2.fps Sat May 20 12:57:06 2017 -0400
b
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=1021
+#type=OpenBabel-FP2/1
+#software=OpenBabel/2.4.1
+#source=/tmp/tmptaAke4/files/000/dataset_3.dat
+#date=2017-05-19T13:52:59
+00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244
b
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244_FP3.fps
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244_FP3.fps Sat May 20 12:57:06 2017 -0400
b
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=1021
+#type=OpenBabel-FP2/1
+#software=OpenBabel/2.4.1
+#source=/tmp/tmptaAke4/files/000/dataset_7.dat
+#date=2017-05-19T13:53:45
+00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244
b
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244_FP4.fps
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244_FP4.fps Sat May 20 12:57:06 2017 -0400
b
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=1021
+#type=OpenBabel-FP2/1
+#software=OpenBabel/2.4.1
+#source=/tmp/tmptaAke4/files/000/dataset_11.dat
+#date=2017-05-19T13:54:39
+00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244
b
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244_MACCS.fps
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244_MACCS.fps Sat May 20 12:57:06 2017 -0400
b
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=1021
+#type=OpenBabel-FP2/1
+#software=OpenBabel/2.4.1
+#source=/tmp/tmptaAke4/files/000/dataset_15.dat
+#date=2017-05-19T13:55:30
+00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244
b
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244_maccs.fps
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244_maccs.fps Sat May 20 12:57:06 2017 -0400
b
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=166
+#type=OpenBabel-MACCS/2
+#software=OpenBabel/2.3.1
+#source=CID_2244.sdf
+#date=2012-05-15T17:00:39
+0000000000000000000000010000016480cca2d21e 2244
b
diff -r 685a138131f0 -r 57a1a58056a6 test-data/NxN_Clustering_on_q.svg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/NxN_Clustering_on_q.svg Sat May 20 12:57:06 2017 -0400
b
b'@@ -0,0 +1,707 @@\n+<?xml version="1.0" encoding="utf-8" standalone="no"?>\n+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"\n+  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n+<!-- Created with matplotlib (http://matplotlib.org/) -->\n+<svg height="345pt" version="1.1" viewBox="0 0 460 345" width="460pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">\n+ <defs>\n+  <style type="text/css">\n+*{stroke-linecap:butt;stroke-linejoin:round;}\n+  </style>\n+ </defs>\n+ <g id="figure_1">\n+  <g id="patch_1">\n+   <path d="M 0 345.6 \n+L 460.8 345.6 \n+L 460.8 0 \n+L 0 0 \n+z\n+" style="fill:#ffffff;"/>\n+  </g>\n+  <g id="axes_1">\n+   <g id="patch_2">\n+    <path d="M 57.6 307.584 \n+L 414.72 307.584 \n+L 414.72 41.472 \n+L 57.6 41.472 \n+z\n+" style="fill:#ffffff;"/>\n+   </g>\n+   <g id="matplotlib.axis_1">\n+    <g id="xtick_1">\n+     <g id="text_1">\n+      <!-- 55079807 -->\n+      <defs>\n+       <path d="M 10.796875 72.90625 \n+L 49.515625 72.90625 \n+L 49.515625 64.59375 \n+L 19.828125 64.59375 \n+L 19.828125 46.734375 \n+Q 21.96875 47.46875 24.109375 47.828125 \n+Q 26.265625 48.1875 28.421875 48.1875 \n+Q 40.625 48.1875 47.75 41.5 \n+Q 54.890625 34.8125 54.890625 23.390625 \n+Q 54.890625 11.625 47.5625 5.09375 \n+Q 40.234375 -1.421875 26.90625 -1.421875 \n+Q 22.3125 -1.421875 17.546875 -0.640625 \n+Q 12.796875 0.140625 7.71875 1.703125 \n+L 7.71875 11.625 \n+Q 12.109375 9.234375 16.796875 8.0625 \n+Q 21.484375 6.890625 26.703125 6.890625 \n+Q 35.15625 6.890625 40.078125 11.328125 \n+Q 45.015625 15.765625 45.015625 23.390625 \n+Q 45.015625 31 40.078125 35.4375 \n+Q 35.15625 39.890625 26.703125 39.890625 \n+Q 22.75 39.890625 18.8125 39.015625 \n+Q 14.890625 38.140625 10.796875 36.28125 \n+z\n+" id="DejaVuSans-35"/>\n+       <path d="M 31.78125 66.40625 \n+Q 24.171875 66.40625 20.328125 58.90625 \n+Q 16.5 51.421875 16.5 36.375 \n+Q 16.5 21.390625 20.328125 13.890625 \n+Q 24.171875 6.390625 31.78125 6.390625 \n+Q 39.453125 6.390625 43.28125 13.890625 \n+Q 47.125 21.390625 47.125 36.375 \n+Q 47.125 51.421875 43.28125 58.90625 \n+Q 39.453125 66.40625 31.78125 66.40625 \n+z\n+M 31.78125 74.21875 \n+Q 44.046875 74.21875 50.515625 64.515625 \n+Q 56.984375 54.828125 56.984375 36.375 \n+Q 56.984375 17.96875 50.515625 8.265625 \n+Q 44.046875 -1.421875 31.78125 -1.421875 \n+Q 19.53125 -1.421875 13.0625 8.265625 \n+Q 6.59375 17.96875 6.59375 36.375 \n+Q 6.59375 54.828125 13.0625 64.515625 \n+Q 19.53125 74.21875 31.78125 74.21875 \n+z\n+" id="DejaVuSans-30"/>\n+       <path d="M 8.203125 72.90625 \n+L 55.078125 72.90625 \n+L 55.078125 68.703125 \n+L 28.609375 0 \n+L 18.3125 0 \n+L 43.21875 64.59375 \n+L 8.203125 64.59375 \n+z\n+" id="DejaVuSans-37"/>\n+       <path d="M 10.984375 1.515625 \n+L 10.984375 10.5 \n+Q 14.703125 8.734375 18.5 7.8125 \n+Q 22.3125 6.890625 25.984375 6.890625 \n+Q 35.75 6.890625 40.890625 13.453125 \n+Q 46.046875 20.015625 46.78125 33.40625 \n+Q 43.953125 29.203125 39.59375 26.953125 \n+Q 35.25 24.703125 29.984375 24.703125 \n+Q 19.046875 24.703125 12.671875 31.3125 \n+Q 6.296875 37.9375 6.296875 49.421875 \n+Q 6.296875 60.640625 12.9375 67.421875 \n+Q 19.578125 74.21875 30.609375 74.21875 \n+Q 43.265625 74.21875 49.921875 64.515625 \n+Q 56.59375 54.828125 56.59375 36.375 \n+Q 56.59375 19.140625 48.40625 8.859375 \n+Q 40.234375 -1.421875 26.421875 -1.421875 \n+Q 22.703125 -1.421875 18.890625 -0.6875 \n+Q 15.09375 0.046875 10.984375 1.515625 \n+z\n+M 30.609375 32.421875 \n+Q 37.25 32.421875 41.125 36.953125 \n+Q 45.015625 41.5 45.015625 49.421875 \n+Q 45.015625 57.28125 41.125 61.84375 \n+Q 37.25 66.40625 30.609375 66.40625 \n+Q 23.96875 66.40625 20.09375 61.84375 \n+Q 16.21875 57.28125 16.21875 49.421875 \n+Q 16.21875 41.5 20.09375 36.953125 \n+Q 23.96875 32.421875 30.609375 32.421875 \n+z\n+" id="DejaVuSans-39"/>\n+       <path d="M 31.78125 34.625 \n+Q 24.75 34.625 20.71875 30.859375 \n+Q 16.703125 27.09375 16.703125 20.515625 \n+Q 16.703125 13.921875 20.71875 10.15625 \n+Q 24.75 6.390625 31.78125 6.390625 \n+Q 38.8125 6.390625 42.859375 10.171875 \n+Q 46.921875 13.96875 46.921875 20.515625 \n+Q 4'..b'59.033203" xlink:href="#DejaVuSans-36"/>\n+      </g>\n+     </g>\n+    </g>\n+    <g id="ytick_8">\n+     <g id="line2d_8">\n+      <g>\n+       <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#me28c3a741e" y="53.313113"/>\n+      </g>\n+     </g>\n+     <g id="text_21">\n+      <!-- 0.07 -->\n+      <g transform="translate(28.334375 57.112332)scale(0.1 -0.1)">\n+       <use xlink:href="#DejaVuSans-30"/>\n+       <use x="63.623047" xlink:href="#DejaVuSans-2e"/>\n+       <use x="95.410156" xlink:href="#DejaVuSans-30"/>\n+       <use x="159.033203" xlink:href="#DejaVuSans-37"/>\n+      </g>\n+     </g>\n+    </g>\n+   </g>\n+   <g id="LineCollection_1">\n+    <path clip-path="url(#p7a554818f3)" d="M 98.806154 307.584 \n+L 98.806154 160.244138 \n+L 126.276923 160.244138 \n+L 126.276923 307.584 \n+" style="fill:none;stroke:#008000;stroke-width:1.5;"/>\n+   </g>\n+   <g id="LineCollection_2">\n+    <path clip-path="url(#p7a554818f3)" d="M 208.689231 307.584 \n+L 208.689231 307.584 \n+L 236.16 307.584 \n+L 236.16 307.584 \n+" style="fill:none;stroke:#ff0000;stroke-width:1.5;"/>\n+    <path clip-path="url(#p7a554818f3)" d="M 181.218462 307.584 \n+L 181.218462 307.584 \n+L 222.424615 307.584 \n+L 222.424615 307.584 \n+" style="fill:none;stroke:#ff0000;stroke-width:1.5;"/>\n+    <path clip-path="url(#p7a554818f3)" d="M 263.630769 307.584 \n+L 263.630769 224.047744 \n+L 291.101538 224.047744 \n+L 291.101538 307.584 \n+" style="fill:none;stroke:#ff0000;stroke-width:1.5;"/>\n+    <path clip-path="url(#p7a554818f3)" d="M 201.821538 307.584 \n+L 201.821538 202.211048 \n+L 277.366154 202.211048 \n+L 277.366154 224.047744 \n+" style="fill:none;stroke:#ff0000;stroke-width:1.5;"/>\n+   </g>\n+   <g id="LineCollection_3">\n+    <path clip-path="url(#p7a554818f3)" d="M 318.572308 307.584 \n+L 318.572308 227.498079 \n+L 346.043077 227.498079 \n+L 346.043077 307.584 \n+" style="fill:none;stroke:#00bfbf;stroke-width:1.5;"/>\n+    <path clip-path="url(#p7a554818f3)" d="M 373.513846 307.584 \n+L 373.513846 225.958341 \n+L 400.984615 225.958341 \n+L 400.984615 307.584 \n+" style="fill:none;stroke:#00bfbf;stroke-width:1.5;"/>\n+    <path clip-path="url(#p7a554818f3)" d="M 332.307692 227.498079 \n+L 332.307692 178.92987 \n+L 387.249231 178.92987 \n+L 387.249231 225.958341 \n+" style="fill:none;stroke:#00bfbf;stroke-width:1.5;"/>\n+   </g>\n+   <g id="LineCollection_4">\n+    <path clip-path="url(#p7a554818f3)" d="M 239.593846 202.211048 \n+L 239.593846 126.040908 \n+L 359.778462 126.040908 \n+L 359.778462 178.92987 \n+" style="fill:none;stroke:#0000ff;stroke-width:1.5;"/>\n+    <path clip-path="url(#p7a554818f3)" d="M 153.747692 307.584 \n+L 153.747692 98.265487 \n+L 299.686154 98.265487 \n+L 299.686154 126.040908 \n+" style="fill:none;stroke:#0000ff;stroke-width:1.5;"/>\n+    <path clip-path="url(#p7a554818f3)" d="M 112.541538 160.244138 \n+L 112.541538 92.745033 \n+L 226.716923 92.745033 \n+L 226.716923 98.265487 \n+" style="fill:none;stroke:#0000ff;stroke-width:1.5;"/>\n+    <path clip-path="url(#p7a554818f3)" d="M 71.335385 307.584 \n+L 71.335385 54.144 \n+L 169.629231 54.144 \n+L 169.629231 92.745033 \n+" style="fill:none;stroke:#0000ff;stroke-width:1.5;"/>\n+   </g>\n+   <g id="patch_3">\n+    <path d="M 57.6 307.584 \n+L 57.6 41.472 \n+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/>\n+   </g>\n+   <g id="patch_4">\n+    <path d="M 414.72 307.584 \n+L 414.72 41.472 \n+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/>\n+   </g>\n+   <g id="patch_5">\n+    <path d="M 57.6 307.584 \n+L 414.72 307.584 \n+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/>\n+   </g>\n+   <g id="patch_6">\n+    <path d="M 57.6 41.472 \n+L 414.72 41.472 \n+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/>\n+   </g>\n+  </g>\n+ </g>\n+ <defs>\n+  <clipPath id="p7a554818f3">\n+   <rect height="266.112" width="357.12" x="57.6" y="41.472"/>\n+  </clipPath>\n+ </defs>\n+</svg>\n'
b
diff -r 685a138131f0 -r 57a1a58056a6 test-data/Taylor-Butina_Clustering_on_data_q.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Taylor-Butina_Clustering_on_data_q.txt Sat May 20 12:57:06 2017 -0400
b
@@ -0,0 +1,4 @@
+#0 true singletons
+#0 false singletons
+#clusters: 1
+55091752 12 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091467 55168823 55091849
b
diff -r 685a138131f0 -r 57a1a58056a6 test-data/sdf2fps_result1.fps
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sdf2fps_result1.fps Sat May 20 12:57:06 2017 -0400
b
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=881
+#type=CACTVS-E_SCREEN/1.0 extended=2
+#software=CACTVS/unknown
+#source=/tmp/tmpN2w37z/files/000/dataset_1.dat
+#date=2017-05-19T14:27:41
+030e1c000000000000000000000000000000000000000c00000000000000008000000058000010000030200119004c70010060001140044b100040100024040010118060101330e46c21ac5841de06a50788109de11113047100000000001000000000000000080000000000000000 2244
b
diff -r 685a138131f0 -r 57a1a58056a6 test-data/simsearch_on_tragets_and_q.tabular
--- a/test-data/simsearch_on_tragets_and_q.tabular Sat May 20 12:45:01 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,9 +0,0 @@
-#Simsearch/1
-#num_bits=881
-#type=Tanimoto k=all threshold=0.7
-#software=chemfp/1.1p1
-#queries=./query.fps
-#targets=./targets.fps
-#query_sources=CID_28434379.sdf
-#target_sources=Desktop/3579363516810334491.sdf
-13 28434379 6499094 0.9615 6485578 0.9679 55079807 0.9313 3153534 0.9557 55102353 0.9682 55091466 0.9682 55091416 0.9682 6485577 0.9497 55169009 0.9560 55091752 0.9684 55091467 0.9623 55168823 0.9563 55091849 0.9563