Previous changeset 2:70b071de9bee (2017-05-20) Next changeset 4:685a138131f0 (2017-05-20) |
Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/simsearch commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502 |
added:
simsearch.xml test-data/simsearch_on_tragets_and_q.tabular |
removed:
butina_clustering.py butina_clustering.xml mol2fps.xml nxn_clustering.py nxn_clustering.xml sdf2fps.xml static/images/NxN_clustering.png static/images/NxN_clustering.svg test-data/CID_2244.can test-data/CID_2244.inchi test-data/CID_2244.sdf test-data/CID_2244.smi test-data/CID_2244_FP2.fps test-data/CID_2244_FP3.fps test-data/CID_2244_FP4.fps test-data/CID_2244_MACCS.fps test-data/CID_2244_maccs.fps test-data/NxN_Clustering_on_q.svg test-data/Taylor-Butina_Clustering_on_data_q.txt test-data/sdf2fps_result1.fps |
b |
diff -r 70b071de9bee -r 892811a1f12c butina_clustering.py --- a/butina_clustering.py Sat May 20 08:31:44 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,134 +0,0 @@ -#!/usr/bin/env python -""" - Modified version of code examples from the chemfp project. - http://code.google.com/p/chem-fingerprints/ - Thanks to Andrew Dalke of Andrew Dalke Scientific! -""" - -import chemfp -import sys -import os -import tempfile -import argparse -import subprocess -from chemfp import search - -def unix_sort(results): - temp_unsorted = tempfile.NamedTemporaryFile(delete=False) - for (i,indices) in enumerate( results.iter_indices() ): - temp_unsorted.write('%s %s\n' % (len(indices), i)) - temp_unsorted.close() - temp_sorted = tempfile.NamedTemporaryFile(delete=False) - temp_sorted.close() - p = subprocess.Popen(['sort', '-n', '-r', '-k', '1,1'], stdin=open(temp_unsorted.name), stdout=open(temp_sorted.name, 'w+')) - stdout, stderr = p.communicate() - return_code = p.returncode - - if return_code: - sys.stdout.write(stdout) - sys.stderr.write(stderr) - sys.stderr.write("Return error code %i from command:\n" % return_code) - temp_sorted.close() - os.remove(temp_unsorted.name) - - for line in open(temp_sorted.name): - size, fp_idx = line.strip().split() - yield (int(size), int(fp_idx)) - - os.remove(temp_sorted.name) - -def butina( args ): - """ - Taylor-Butina clustering from the chemfp help. - """ - out = args.output_path - targets = chemfp.open( args.input_path, format='fps' ) - arena = chemfp.load_fingerprints( targets ) - - chemfp.set_num_threads( args.processors ) - results = search.threshold_tanimoto_search_symmetric(arena, threshold = args.tanimoto_threshold) - results.reorder_all("move-closest-first") - - sorted_ids = unix_sort(results) - - # Determine the true/false singletons and the clusters - true_singletons = [] - false_singletons = [] - clusters = [] - - seen = set() - #for (size, fp_idx, members) in results: - for (size, fp_idx) in sorted_ids: - members = results[fp_idx].get_indices() - #print arena.ids[ fp_idx ], [arena.ids[ m ] for m in members] - if fp_idx in seen: - # Can't use a centroid which is already assigned - continue - seen.add(fp_idx) - - if size == 0: - # The only fingerprint in the exclusion sphere is itself - true_singletons.append( fp_idx ) - continue - - # Figure out which ones haven't yet been assigned - unassigned = set(members) - seen - - if not unassigned: - false_singletons.append(fp_idx) - continue - - # this is a new cluster - clusters.append( (fp_idx, unassigned) ) - seen.update(unassigned) - - len_cluster = len(clusters) - #out.write( "#%s true singletons: %s\n" % ( len(true_singletons), " ".join(sorted(arena.ids[idx] for idx in true_singletons)) ) ) - #out.write( "#%s false singletons: %s\n" % ( len(false_singletons), " ".join(sorted(arena.ids[idx] for idx in false_singletons)) ) ) - - out.write( "#%s true singletons\n" % len(true_singletons) ) - out.write( "#%s false singletons\n" % len(false_singletons) ) - out.write( "#clusters: %s\n" % len_cluster ) - - # Sort so the cluster with the most compounds comes first, - # then by alphabetically smallest id - def cluster_sort_key(cluster): - centroid_idx, members = cluster - return -len(members), arena.ids[centroid_idx] - - clusters.sort(key=cluster_sort_key) - - for centroid_idx, members in clusters: - centroid_name = arena.ids[centroid_idx] - out.write("%s\t%s\t%s\n" % (centroid_name, len(members), " ".join(arena.ids[idx] for idx in members))) - #ToDo: len(members) need to be some biggest top 90% or something ... - - for idx in true_singletons: - out.write("%s\t%s\n" % (arena.ids[idx], 0)) - - out.close() - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="""Taylor-Butina clustering for fps files. -For more details please see the original publication or the chemfp documentation: -http://www.chemomine.co.uk/dbclus-paper.pdf -https://chemfp.readthedocs.org -""") - - parser.add_argument("-i", "--input", dest="input_path", - required=True, - help="Path to the input file.") - - parser.add_argument("-o", "--output", dest="output_path", type=argparse.FileType('w'), - default=sys.stdout, - help="Path to the output file.") - - parser.add_argument("-t", "--threshold", dest="tanimoto_threshold", type=float, - default=0.8, - help="Tanimoto threshold [0.8]") - - parser.add_argument('-p', '--processors', type=int, default=4) - - options = parser.parse_args() - butina( options ) |
b |
diff -r 70b071de9bee -r 892811a1f12c butina_clustering.xml --- a/butina_clustering.xml Sat May 20 08:31:44 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,93 +0,0 @@ -<tool id="ctb_chemfp_butina_clustering" name="Taylor-Butina Clustering" version="0.2"> - <description>of molecular fingerprints</description> - <requirements> - <requirement type="package" version="1.1p1">chemfp</requirement> - <requirement type="package" version="2.4.1">openbabel</requirement> - </requirements> - <command detect_errors="exit_code"> -<![CDATA[ - python '$__tool_directory__/butina_clustering.py' - -i '$infile' - -t $threshold - -o '$outfile' - -p \${GALAXY_SLOTS:-1} -]]> - </command> - <inputs> - <param name="infile" type="data" format="fps" label="Finperprint dataset" help="Dataset missing? See TIP below"/> - <param name='threshold' type='float' value='0.8'/> - </inputs> - <outputs> - <data format="tabular" name="outfile"/> - </outputs> - <tests> - <test> - <param name="infile" ftype="fps" value="targets.fps"/> - <param name='threshold' value='0.8' ></param> - <output name="outfile" ftype="tabular" file='Taylor-Butina_Clustering_on_data_q.txt'/> - </test> - </tests> -<help> -<![CDATA[ - - -.. class:: infomark - -**What this tool does** - -Unsupervised non-hierarchical clustering method based on the Taylor-Butina algorithm, which guarantees that every cluster contains molecules which are within a distance cutoff of the central molecule. This tool is based on the chemfp_ project. - -.. _chemfp: http://chemfp.com/ - ------ - -.. class:: infomark - -**Input** - -| Molecular fingerprints in FPS format. -| Open Babel Fastsearch index is not supported. - -* Example:: - - - fingerprints in FPS format - - #FPS1 - #num_bits=881 - #type=CACTVS-E_SCREEN/1.0 extended=2 - #software=CACTVS/unknown - #source=/home/mohammed/galaxy-central/database/files/000/dataset_423.dat - #date=2012-02-09T13:20:37 - 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960cc0bed3248000580644626004101b4844805901b041c2e - 19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000 55169009 - 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960cc0bed3248000580644626004101b4844805901b041c2e - 19111e45039b8b2926105609401313e40800000000000100200000040080000010000002000000000000 55079807 - ........ - - - Tanimoto threshold : 0.8 (between 0 and 1) - ------ - -.. class:: infomark - -**Output** - -* Example:: - - 0 true singletons - => - - 0 false singletons - => - - 1 clusters - 55091849 has 12 other members - => 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091752 55091467 55168823 - - -]]> - </help> - <citations> - <citation type="doi">10.1186/1758-2946-5-S1-P36</citation> - </citations> -</tool> |
b |
diff -r 70b071de9bee -r 892811a1f12c mol2fps.xml --- a/mol2fps.xml Sat May 20 08:31:44 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,276 +0,0 @@\n-<tool id="ctb_chemfp_mol2fps" name="Molecules to Fingerprints" version="0.3.0">\n- <description>with different fingerprint types</description>\n- <!--parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" merge_outputs="outfile"></parallelism-->\n- <requirements>\n- <requirement type="package" version="1.1p1">chemfp</requirement>\n- <requirement type="package" version="2016.03.3">rdkit</requirement>\n- <requirement type="package" version="2.4.1">openbabel</requirement>\n- </requirements>\n- <command>\n-<![CDATA[\n- #set $fptype = $fp_opts.fp_opts_selector\n-\n- #if $fptype in [\'--FP2\', \'--FP3\', \'--FP4\', \'--MACCS\']:\n- ## Open Babel fingerprints\n- ob2fps $fptype --in \'${infile.ext}\' \'${infile}\' -o \'${outfile}\'\n- #else:\n- ## RDKit fingerprints\n- rdkit2fps --in \'${infile.ext}\' \'${infile}\' -o \'${outfile}\'\n- #if $fp_opts.fp_opts_selector == "--RDK":\n- --RDK\n- --fpSize $fp_opts.fpSize\n- --minPath $fp_opts.minPath\n- --maxPath $fp_opts.maxPath\n- --nBitsPerHash $fp_opts.nBitsPerHash\n- $fp_opts.useHs\n- #elif $fp_opts.fp_opts_selector == "--torsions":\n- --torsions\n- --fpSize $fp_opts.fpSize\n- --targetSize $fp_opts.targetSize\n- #elif $fp_opts.fp_opts_selector == "--morgan":\n- --morgan\n- --fpSize $fp_opts.fpSize\n- --radius $fp_opts.radius\n- $fp_opts.useFeatures\n- $fp_opts.useChirality\n- $fp_opts.useBondTypes\n- #elif $fp_opts.fp_opts_selector == "--pairs":\n- --paris\n- --fpSize $fp_opts.fpSize\n- --minLength $fp_opts.minLength\n- --maxLength $fp_opts.maxLength\n- #elif $fp_opts.fp_opts_selector == "--maccs166":\n- --maccs166\n- #elif $fp_opts.fp_opts_selector == "--substruct":\n- --substruct\n- #end if\n- #end if\n- --errors report 2>&1\n-]]>\n- </command>\n- <inputs>\n- <param name="infile" type=\'data\' format="sdf,smi,mol,mol2,cml,inchi" label="molecule file"/>\n- <conditional name="fp_opts">\n- <param name="fp_opts_selector" type="select" label="Type of fingerprint">\n- <option value=\'--FP2\' selected="True">Open Babel FP2 fingerprints</option>\n- <option value=\'--FP3\'>Open Babel FP3 fingerprints</option>\n- <option value=\'--FP4\'>Open Babel FP4 fingerprints</option>\n- <option value=\'--MACCS\'>Open Babel MACCS fingerprints</option>\n- <option value=\'--RDK\'>RDKit topological fingerprint</option>\n- <option value=\'--torsions\'>RDKit topological Torsion fingerprints</option>\n- <option value=\'--morgan\'>RDKit Morgan fingerprints</option>\n- <option value=\'--pairs\'>RDKit Atom Pair fingerprints</option>\n- <option value=\'--maccs166\'>RDKit MACCS fingerprints</option>\n- <option value=\'--substruct\'>RDKit substructure fingerprints</option>\n- </param>\n- <when value="--FP2" />\n- <when value="--FP3" />\n- <when value="--FP4" />\n- <when value="--MACCS" />\n- <when value="--RDK">\n- <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help="">\n- <validator type="in_range" min="1" />\n- </param>\n- <param name="minPath" type="integer" value="1" label="minimum number of bonds to include in the subgraph" help="">\n- <validator type="in_range" min="1" />\n- </param>\n- <param name="maxPath" type="integer" value="7" label="maximum number of bonds to include in the s'..b' <test>\n- <param name="infile" value="CID_2244.smi" ftype="smi" />\n- <param name="fp_opts.fp_opts_selector" value="--FP3" />\n- <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" lines_diff="4"/>\n- </test>\n- <!-- FP4 -->\n- <test>\n- <param name="infile" value="CID_2244.sdf" ftype="sdf" />\n- <param name="fp_opts.fp_opts_selector" value="--FP4" />\n- <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" lines_diff="4"/>\n- </test>\n- <test>\n- <param name="infile" value="CID_2244.smi" ftype="smi" />\n- <param name="fp_opts.fp_opts_selector" value="--FP4" />\n- <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" lines_diff="4"/>\n- </test>\n- <!-- MACCS -->\n- <test>\n- <param name="infile" value="CID_2244.sdf" ftype="sdf" />\n- <param name="fp_opts.fp_opts_selector" value="--MACCS" />\n- <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" lines_diff="4"/>\n- </test>\n- <test>\n- <param name="infile" value="CID_2244.smi" ftype="smi" />\n- <param name="fp_opts.fp_opts_selector" value="--MACCS" />\n- <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" lines_diff="4"/>\n- </test>\n- </tests>\n- <help>\n-<![CDATA[\n-\n-.. class:: infomark\n-\n-**What this tool does**\n-\n-This tool uses chemfp_ to calculate 10 different fingerprints of common file formats. Chemfp uses `Open Babel`_, OpenEye_ and RDKit_.\n-\n-For more information check the websites listed below::\n-\n-\t- http://www.rdkit.org/docs/GettingStartedInPython.html#fingerprinting-and-molecular-similarity\n-\t- http://openbabel.org/wiki/Tutorial:Fingerprints\n-\n------\n-\n-.. class:: infomark\n-\n-**Input**\n-\n-FPS fingerprint file format\n-\n-* Example::\n-\n-\t - SDF File\n-\n-\t\t28434379\n-\t\t -OEChem-02031205132D\n-\n-\t\t 37 39 0 0 0 0 0 0 0999 V2000\n-\t\t 8.1648 -1.8842 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n-\t\t 6.0812 -0.2134 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n-\t\t 6.0812 -1.8229 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n-\t\t 2.5369 -2.0182 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n-\t\t 6.3919 0.7371 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n-\t\t 7.3704 0.9433 0.0000 C 0 0 0 0\n-\t\t ......\n-\t\t 1 15 1 0 0 0 0\n-\t\t 1 35 1 0 0 0 0\n-\t\t 2 5 1 0 0 0 0\n-\t\t 2 11 1 0 0 0 0\n-\t\t 2 12 1 0 0 0 0\n-\t\t 3 12 2 0 0 0 0\n-\t\t 3 13 1 0 0 0 0\n-\t\t 4 18 1 0 0 0 0\n-\t\t ......\n-\n-\t\t\t>PUBCHEM_COMPOUND_CID<\n-\t\t\t28434379\n-\n-\t\t\t> <PUBCHEM_COMPOUND_CANONICALIZED>\n-\t\t\t1\n-\n-\t\t\t> <PUBCHEM_CACTVS_COMPLEXITY>\n-\t\t\t280\n-\n-\t\t\t> <PUBCHEM_CACTVS_HBOND_ACCEPTOR>\n-\t\t\t2\n-\n-\t\t\t> <PUBCHEM_CACTVS_HBOND_DONOR>\n-\t\t\t2\n-\n-\t\t\t> <PUBCHEM_CACTVS_ROTATABLE_BOND>\n-\t\t\t2\n-\n-\t\t\t> <PUBCHEM_CACTVS_SUBSKEYS>\n-\t\t\tAAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA==\n-\n-\t\t\t>\n-\n-\t\t- type : FP2\n-\n------\n-\n-.. class:: infomark\n-\n-**Output**\n-\n-* Example::\n-\n-\t#FPS1\n-\t#num_bits=1021\n-\t#type=OpenBabel-FP2/1\n-\t#software=OpenBabel/2.3.0\n-\t#source=/tmp/dataset_409.dat.sdf\n-\t#date=2012-02-03T11:13:39\n-\tc0000000000008c0000846000400000000000010800000000000004000000000100010000700802170000018000000c\n-\t0010000000020600208008000008000000c000c02c00002000000c00000100000008001400c800001c0180000000300\n-\t10000000000080000000c0000060000c0000060810000010000000800102000000\t28434379\n-\n-\n-]]>\n- </help>\n- <citations>\n- <citation type="doi">10.1186/1758-2946-3-33</citation>\n- <citation type="doi">10.1186/1758-2946-5-S1-P36</citation>\n- <citation type="bibtex">\n- @electronic{rdkit,\n- title = {RDKit: Open-source cheminformatics},\n- url ={http://www.rdkit.org}\n- }\n- </citation>\n- </citations>\n-</tool>\n' |
b |
diff -r 70b071de9bee -r 892811a1f12c nxn_clustering.py --- a/nxn_clustering.py Sat May 20 08:31:44 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,80 +0,0 @@ -#!/usr/bin/env python -""" - Modified version of code examples from the chemfp project. - http://code.google.com/p/chem-fingerprints/ - Thanks to Andrew Dalke of Andrew Dalke Scientific! -""" -import matplotlib -matplotlib.use('Agg') -import argparse -import os -import chemfp -import scipy.cluster.hierarchy as hcluster -import pylab -import numpy - -def distance_matrix(arena, tanimoto_threshold = 0.0): - n = len(arena) - # Start off a similarity matrix with 1.0s along the diagonal - try: - similarities = numpy.identity(n, "d") - except: - raise Exception('Input dataset is to large!') - chemfp.set_num_threads( args.processors ) - - ## Compute the full similarity matrix. - # The implementation computes the upper-triangle then copies - # the upper-triangle into lower-triangle. It does not include - # terms for the diagonal. - results = chemfp.search.threshold_tanimoto_search_symmetric(arena, threshold=tanimoto_threshold) - - # Copy the results into the NumPy array. - for row_index, row in enumerate(results.iter_indices_and_scores()): - for target_index, target_score in row: - similarities[row_index, target_index] = target_score - - # Return the distance matrix using the similarity matrix - return 1.0 - similarities - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="""NxN clustering for fps files. -For more details please see the chemfp documentation: -https://chemfp.readthedocs.org -""") - - parser.add_argument("-i", "--input", dest="input_path", - required=True, - help="Path to the input file.") - - parser.add_argument("-c", "--cluster", dest="cluster_image", - help="Path to the output cluster image.") - - parser.add_argument("-s", "--smatrix", dest="similarity_matrix", - help="Path to the similarity matrix output file.") - - parser.add_argument("-t", "--threshold", dest="tanimoto_threshold", - type=float, default=0.0, - help="Tanimoto threshold [0.0]") - - parser.add_argument("--oformat", default='png', help="Output format (png, svg)") - - parser.add_argument('-p', '--processors', type=int, - default=4) - - args = parser.parse_args() - - targets = chemfp.open( args.input_path, format='fps' ) - arena = chemfp.load_fingerprints( targets ) - distances = distance_matrix( arena, args.tanimoto_threshold ) - - if args.similarity_matrix: - distances.tofile( args.similarity_matrix ) - - if args.cluster_image: - linkage = hcluster.linkage( distances, method="single", metric="euclidean" ) - - hcluster.dendrogram(linkage, labels=arena.ids) - - pylab.savefig( args.cluster_image, format=args.oformat ) - |
b |
diff -r 70b071de9bee -r 892811a1f12c nxn_clustering.xml --- a/nxn_clustering.xml Sat May 20 08:31:44 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,120 +0,0 @@ -<tool id="ctb_chemfp_nxn_clustering" name="NxN Clustering" version="0.4"> - <description>of molecular fingerprints</description> - <requirements> - <requirement type="package" version="1.1p1">chemfp</requirement> - <requirement type="package" version="2">python</requirement> - <requirement type="package" version="2.0.2">matplotlib</requirement> - <requirement type="package" version="0.19.0">scipy</requirement> - <requirement type="package" version="2.4.1">openbabel</requirement> - </requirements> - <command detect_errors="exit_code"> -<![CDATA[ - python '$__tool_directory__/nxn_clustering.py' - -i '$infile' - -t $threshold - #if str($output_files) in ['both', 'image']: - --cluster '$image' - #end if - #if str($output_files) in ['both', 'matrix']: - --smatrix '$smilarity_matrix' - #end if - --oformat '$oformat' -]]> - </command> - <inputs> - <param name="infile" type="data" format="fps" label="Finperprint dataset" help="Dataset missing? See TIP below"/> - <param name='threshold' type='float' value='0.0' /> - <param name='oformat' type='select' format='text' label="Format of the resulting picture"> - <option value='png'>PNG</option> - <option value='svg'>SVG</option> - </param> - <param name='output_files' type='select' format='text' label="Output options"> - <option value='both'>NxN matrix and Image</option> - <option value='image'>Image</option> - <option value='matrix'>NxN Matrix</option> - </param> - - </inputs> - <outputs> - <data name="image" format="svg" label="${tool.name} on ${on_string} - Cluster Image"> - <filter>output_files == "both" or output_files == "image"</filter> - <change_format> - <when input="oformat" value="png" format="png"/> - </change_format> - </data> - <data name="smilarity_matrix" format="binary" label="${tool.name} on ${on_string} - Similarity Matrix"> - <filter>output_files == "both" or output_files == "matrix"</filter> - </data> - </outputs> - <tests> - <test> - <param name="infile" ftype="fps" value="targets.fps" /> - <param name='treshold' value='0.75' /> - <param name='oformat' value='svg' /> - <param name='output_files' value='image' /> - <output name="image" file='NxN_Clustering_on_q.svg' ftype="svg" compare="sim_size"/> - </test> - </tests> - <help> -<![CDATA[ - -.. class:: infomark - -**What this tool does** - -Generating hierarchical clusters and visualizing clusters with dendrograms. -For the clustering and the fingerprint handling the chemfp_ project is used. - -.. _chemfp: http://chemfp.com/ - ------ - -.. class:: warningmark - -**Hint** - -The plotting of the cluster image is sensible only with a small dataset. - ------ - -.. class:: infomark - -**Input** - -Molecular fingerprints in FPS format. Open Babel Fastsearch index is not supported. - -* Example:: - - - fingerprints in FPS format - - #FPS1 - #num_bits=881 - #type=CACTVS-E_SCREEN/1.0 extended=2 - #software=CACTVS/unknown - #source=/home/mohammed/galaxy-central/database/files/000/dataset_423.dat - #date=2012-02-09T13:20:37 - 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960cc0bed3248000580644626004101b4844805901b041c2e - 19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000 55169009 - 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960cc0bed3248000580644626004101b4844805901b041c2e - 19111e45039b8b2926105609401313e40800000000000100200000040080000010000002000000000000 55079807 - ........ - - - Tanimoto threshold : 0.8 (between 0 and 1) - ------ - -.. class:: informark - -**Output** - -* Example:: - - .. image:: $PATH_TO_IMAGES/NxN_clustering.png - - -]]> - </help> - <citations> - <citation type="doi">10.1186/1758-2946-5-S1-P36</citation> - </citations> -</tool> |
b |
diff -r 70b071de9bee -r 892811a1f12c sdf2fps.xml --- a/sdf2fps.xml Sat May 20 08:31:44 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,116 +0,0 @@ -<tool id="ctb_sdf2fps" name="SDF to Fingerprint" version="0.2"> - <description>extract fingerprints from sdf files metadata</description> - <!--parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism--> - <requirements> - <requirement type="package" version="1.1p1">chemfp</requirement> - <requirement type="package" version="2.4.1">openbabel</requirement> - </requirements> - <command> -<![CDATA[ - sdf2fps --pubchem '${infile}' > '${outfile}' -]]> - </command> - <inputs> - <param name="infile" type='data' format="sdf" label="SDF file with fingerprints as metadata"/> - </inputs> - <outputs> - <data name="outfile" format="fps"/> - </outputs> - <tests> - <test> - <param name="infile" ftype="sdf" value="CID_2244.sdf" /> - <output name="outfile" file='sdf2fps_result1.fps' ftype="fps" lines_diff="4" /> - </test> - </tests> - <help> -<![CDATA[ - -.. class:: infomark - -**What this tool does** - -Read an input SD file, extract the fingerprints and store them in a FPS-file. - ------ - -.. class:: infomark - -**Input** - -`SD-Format`_ - -.. _`SD-Format`: http://en.wikipedia.org/wiki/Chemical_table_file - -* Example:: - - 28434379 - -OEChem-02031205132D - - 37 39 0 0 0 0 0 0 0999 V2000 - 8.1648 -1.8842 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 - 6.0812 -0.2134 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 - 6.0812 -1.8229 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 - 2.5369 -2.0182 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 - 6.3919 0.7371 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 7.3704 0.9433 0.0000 C 0 0 0 0 - ...... - 1 15 1 0 0 0 0 - 1 35 1 0 0 0 0 - 2 5 1 0 0 0 0 - 2 11 1 0 0 0 0 - 2 12 1 0 0 0 0 - 3 12 2 0 0 0 0 - 3 13 1 0 0 0 0 - 4 18 1 0 0 0 0 - ...... - - >PUBCHEM_COMPOUND_CID< - 28434379 - - > <PUBCHEM_COMPOUND_CANONICALIZED> - 1 - - > <PUBCHEM_CACTVS_COMPLEXITY> - 280 - - > <PUBCHEM_CACTVS_HBOND_ACCEPTOR> - 2 - - > <PUBCHEM_CACTVS_HBOND_DONOR> - 2 - - > <PUBCHEM_CACTVS_ROTATABLE_BOND> - 2 - - > <PUBCHEM_CACTVS_SUBSKEYS> - AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA== - - > - ------ - -.. class:: infomark - -**Output** - -* Example:: - - #FPS1 - #num_bits=881 - #type=CACTVS-E_SCREEN/1.0 extended=2 - #software=CACTVS/unknown - #source=/home/mohammed/galaxy-central/database/files/000/dataset_409.dat - #date=2012-02-03T10:44:12 - 07ce04000000000000000000000000000080060000000c0600 - 00000000001a800f0000780008100000101487e9608c0bed32 - 48000580644626204101b4844805901b041c2e19511e45039b - 8b2924101609401b13e4080000000000010020000004008000 - 0010000002000000000000 28434379 - - -]]> - </help> - <citations> - <citation type="doi">10.1186/1758-2946-5-S1-P36</citation> - </citations> -</tool> |
b |
diff -r 70b071de9bee -r 892811a1f12c simsearch.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/simsearch.xml Sat May 20 08:41:21 2017 -0400 |
[ |
@@ -0,0 +1,131 @@ +<tool id="ctb_simsearch" name="Similarity Search" version="0.1.1"> + <description>of fingerprint data sets</description> + <requirements> + <requirement type="package" version="1.1p1">chemfp</requirement> + </requirements> + <command> +<![CDATA[ + #if $method_opts.method_opts_selector == "chemfp": + ln -s "${method_opts.query_opts.targets}" ./targets.fps && + #if $method_opts.query_opts.query_opts_selector == "normal": + ln -s "${method_opts.query_opts.query}" ./query.fps && + #end if + + simsearch + #if int($method_opts.knn) == 0: + #set $k = 'all' + ## count is only available if k nearest neighbor search is disabled + $method_opts.counts + #else: + #set $k = int($method_opts.knn) + #end if + + -k $k + --threshold $method_opts.threshold + -o ./output.fps + + ## build and search an in-memory data structure (faster for multiple queries) + --memory + + #if $method_opts.query_opts.query_opts_selector == "normal": + -q ./query.fps + #else: + --NxN + #end if + + ./targets.fps + && + mv ./output.fps "${outfile}" + #else: + ## OpenBabel needs the original molecule file (molecule.'fileformat') next to the fastsearch index (molecule.fs). We use a composite datatype to accomplish that. + ## Furthermore OpenBabel is really picky with file extensions. We need to specify every datatype. I did not find a solution to specify the query-filetype. + ## A workaround is to create a symlink with a proper file-extension. + #import tempfile + #set $temp_file = tempfile.NamedTemporaryFile() + #set $temp_link = "%s.%s" % ($temp_file.name, $method_opts.query.ext) + $temp_file.close() + ln -s $method_opts.query $temp_link; + obabel -i fs "${os.path.join($method_opts.fastsearch.files_path,'molecule.fs')}" -S "${temp_link}" -at${method_opts.threshold} -O "${outfile}" -osmi -aa 2>&1; + rm $temp_link + #end if +]]> + </command> + <inputs> + + <conditional name="method_opts"> + <param name="method_opts_selector" type="select" label="Subject database/sequences"> + <option value="chemfp">Chemfp fingerprint file</option> + <option value="obabel">OpenBabel Fastsearch Index</option> + </param> + <when value="chemfp"> + <conditional name="query_opts"> + <param name="query_opts_selector" type="select" label="Query Mode"> + <option value="normal">Query molecules are stores in a separate file</option> + <option value="nxn">Target molecules are also queries (NxN)</option> + </param> + <when value="normal"> + <param name='query' type='data' format="fps" label='Query molecules'/> + <param name='targets' type='data' format="fps" label='Target molecules'/> + </when> + <when value="nxn"> + <param name='targets' type='data' format="fps" label='Target moleculs'/> + </when> + </conditional> + <param name='knn' type='integer' value='0' label='select the k nearest neighbors' help='0 means all neighbors'> + <validator type="in_range" min="0" /> + </param> + <param name='threshold' type='float' value='0.7' label='threshold' /> + <param name="counts" type="boolean" truevalue="-c" falsevalue="" checked="false" label="report counts (-c)" help="Is ignored if k nearest neighbor search is enabled" /> + </when> + <when value="obabel"> + <param name="query" type='data' format="smi,mol,sdf,inchi" label="query"/> + <param name="fastsearch" type='data' format="obfs" label="OpenBabel Fastsearch Index"/> + <param name="threshold" type='float' label="threshold" value='0.7'/> + </when> + </conditional> + + </inputs> + <outputs> + <data name="outfile" format="tabular" /> + </outputs> + <tests> + <test> + <param name="targets" ftype="fps" value="targets.fps"/> + <param name="query" ftype="fps" value="q.fps"/> + <param name="k" value='4'/> + <param name="th" value='0.7'/> + <output name="outfile" ftype="tabular" file="simsearch_on_tragets_and_q.tabular"/> + </test> + </tests> + <help> +<![CDATA[ + + +.. class:: infomark + +**What this tool does** + +Similarity searches using a variety of different fingerprints using either the chemfp_ FPS type or the Open Babel FastSearch_ index. + +.. _chemfp: http://chemfp.com/ +.. _FastSearch: http://openbabel.org/wiki/FastSearch + +----- + +.. class:: infomark + +**Cite** + +| The chemfp_ project +| +| N M O'Boyle, M Banck, C A James, C Morley, T Vandermeersch and G R Hutchison - `Open Babel: An open chemical toolbox`_ + +.. _`Open Babel: An open chemical toolbox`: http://www.jcheminf.com/content/3/1/33 + + +]]> + </help> + <citations> + <citation type="doi">10.1186/1758-2946-3-33</citation> + </citations> +</tool> |
b |
diff -r 70b071de9bee -r 892811a1f12c static/images/NxN_clustering.png |
b |
Binary file static/images/NxN_clustering.png has changed |
b |
diff -r 70b071de9bee -r 892811a1f12c static/images/NxN_clustering.svg --- a/static/images/NxN_clustering.svg Sat May 20 08:31:44 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,2275 +0,0 @@\n-<?xml version="1.0" encoding="utf-8" standalone="no"?>\n-<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"\n- "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n-<!-- Created with matplotlib (http://matplotlib.org/) -->\n-<svg height="432pt" version="1.1" viewBox="0 0 576 432" width="576pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">\n- <defs>\n- <style type="text/css">\n-*{stroke-linecap:square;stroke-linejoin:round;}\n- </style>\n- </defs>\n- <g id="figure_1">\n- <g id="patch_1">\n- <path d="\n-M0 432\n-L576 432\n-L576 0\n-L0 0\n-z\n-" style="fill:#ffffff;"/>\n- </g>\n- <g id="axes_1">\n- <g id="patch_2">\n- <path d="\n-M72 388.8\n-L518.4 388.8\n-L518.4 43.2\n-L72 43.2\n-z\n-" style="fill:#ffffff;"/>\n- </g>\n- <g id="LineCollection_1">\n- <defs>\n- <path d="\n-M80.4759 -43.2\n-L80.4759 -89.4738\n-L86.1266 -89.4738\n-L86.1266 -43.2" id="C0_0_a27cbf3dad"/>\n- <path d="\n-M74.8253 -43.2\n-L74.8253 -128.527\n-L83.3013 -128.527\n-L83.3013 -89.4738" id="C0_1_0365ccf33e"/>\n- <path d="\n-M227.392 -43.2\n-L227.392 -110.195\n-L233.043 -110.195\n-L233.043 -43.2" id="C0_2_16a64a88b9"/>\n- <path d="\n-M221.742 -43.2\n-L221.742 -111.088\n-L230.218 -111.088\n-L230.218 -110.195" id="C0_3_1e06391595"/>\n- <path d="\n-M244.344 -43.2\n-L244.344 -178.829\n-L249.995 -178.829\n-L249.995 -43.2" id="C0_4_9522133b75"/>\n- <path d="\n-M238.694 -43.2\n-L238.694 -187.132\n-L247.17 -187.132\n-L247.17 -178.829" id="C0_5_e4f3e58d26"/>\n- <path d="\n-M225.98 -111.088\n-L225.98 -262.87\n-L242.932 -262.87\n-L242.932 -187.132" id="C0_6_ff944847e7"/>\n- <path d="\n-M396.911 -43.2\n-L396.911 -224.631\n-L402.562 -224.631\n-L402.562 -43.2" id="C0_7_0906a9df02"/>\n- <path d="\n-M391.261 -43.2\n-L391.261 -233.371\n-L399.737 -233.371\n-L399.737 -224.631" id="C0_8_ed58b0afb2"/>\n- <path d="\n-M408.213 -43.2\n-L408.213 -243.035\n-L413.863 -243.035\n-L413.863 -43.2" id="C0_9_84c2cf03f5"/>\n- <path d="\n-M395.499 -233.371\n-L395.499 -269.685\n-L411.038 -269.685\n-L411.038 -243.035" id="C0_a_6ef56ffb7b"/>\n- <path d="\n-M385.61 -43.2\n-L385.61 -270.198\n-L403.268 -270.198\n-L403.268 -269.685" id="C0_b_c4ff70daa4"/>\n- </defs>\n- <g clip-path="url(#p7ff5b81e1d)">\n- <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_0_a27cbf3dad" y="432.0"/>\n- </g>\n- <g clip-path="url(#p7ff5b81e1d)">\n- <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_1_0365ccf33e" y="432.0"/>\n- </g>\n- <g clip-path="url(#p7ff5b81e1d)">\n- <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_2_16a64a88b9" y="432.0"/>\n- </g>\n- <g clip-path="url(#p7ff5b81e1d)">\n- <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_3_1e06391595" y="432.0"/>\n- </g>\n- <g clip-path="url(#p7ff5b81e1d)">\n- <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_4_9522133b75" y="432.0"/>\n- </g>\n- <g clip-path="url(#p7ff5b81e1d)">\n- <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_5_e4f3e58d26" y="432.0"/>\n- </g>\n- <g clip-path="url(#p7ff5b81e1d)">\n- <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_6_ff944847e7" y="432.0"/>\n- </g>\n- <g clip-path="url(#p7ff5b81e1d)">\n- <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_7_0906a9df02" y="432.0"/>\n- </g>\n- <g clip-path="url(#p7ff5b81e1d)">\n- <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_8_ed58b0afb2" y="432.0"/>\n- </g>\n- <g clip-path="url(#p7ff5b81e1d)">\n- <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_9_84c2cf03f5" y="432.0"/>\n- </g>\n- <g clip-path="url(#p7ff5b81e1d)">\n- <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_a_6ef56ffb7b" y="432.0"/>\n- </g>\n- <g clip-path="url(#p7ff5b81e1d)">\n- <use style="fill:n'..b'xlink:href="#m0d5b0a6425" y="286.871994251"/>\n- </g>\n- </g>\n- <g id="text_82">\n- <!-- 0.4 -->\n- <g transform="translate(50.380625 291.239806751)scale(0.12 -0.12)">\n- <use xlink:href="#BitstreamVeraSans-Roman-30"/>\n- <use x="63.623046875" xlink:href="#BitstreamVeraSans-Roman-2e"/>\n- <use x="95.41015625" xlink:href="#BitstreamVeraSans-Roman-34"/>\n- </g>\n- </g>\n- </g>\n- <g id="ytick_4">\n- <g id="line2d_7">\n- <g>\n- <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#mc8fcea1516" y="235.907991376"/>\n- </g>\n- </g>\n- <g id="line2d_8">\n- <g>\n- <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m0d5b0a6425" y="235.907991376"/>\n- </g>\n- </g>\n- <g id="text_83">\n- <!-- 0.6 -->\n- <g transform="translate(50.463125 240.275803876)scale(0.12 -0.12)">\n- <use xlink:href="#BitstreamVeraSans-Roman-30"/>\n- <use x="63.623046875" xlink:href="#BitstreamVeraSans-Roman-2e"/>\n- <use x="95.41015625" xlink:href="#BitstreamVeraSans-Roman-36"/>\n- </g>\n- </g>\n- </g>\n- <g id="ytick_5">\n- <g id="line2d_9">\n- <g>\n- <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#mc8fcea1516" y="184.943988502"/>\n- </g>\n- </g>\n- <g id="line2d_10">\n- <g>\n- <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m0d5b0a6425" y="184.943988502"/>\n- </g>\n- </g>\n- <g id="text_84">\n- <!-- 0.8 -->\n- <g transform="translate(50.52875 189.311801002)scale(0.12 -0.12)">\n- <use xlink:href="#BitstreamVeraSans-Roman-30"/>\n- <use x="63.623046875" xlink:href="#BitstreamVeraSans-Roman-2e"/>\n- <use x="95.41015625" xlink:href="#BitstreamVeraSans-Roman-38"/>\n- </g>\n- </g>\n- </g>\n- <g id="ytick_6">\n- <g id="line2d_11">\n- <g>\n- <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#mc8fcea1516" y="133.979985627"/>\n- </g>\n- </g>\n- <g id="line2d_12">\n- <g>\n- <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m0d5b0a6425" y="133.979985627"/>\n- </g>\n- </g>\n- <g id="text_85">\n- <!-- 1.0 -->\n- <g transform="translate(51.03125 138.347798127)scale(0.12 -0.12)">\n- <use xlink:href="#BitstreamVeraSans-Roman-31"/>\n- <use x="63.623046875" xlink:href="#BitstreamVeraSans-Roman-2e"/>\n- <use x="95.41015625" xlink:href="#BitstreamVeraSans-Roman-30"/>\n- </g>\n- </g>\n- </g>\n- <g id="ytick_7">\n- <g id="line2d_13">\n- <g>\n- <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#mc8fcea1516" y="83.0159827526"/>\n- </g>\n- </g>\n- <g id="line2d_14">\n- <g>\n- <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m0d5b0a6425" y="83.0159827526"/>\n- </g>\n- </g>\n- <g id="text_86">\n- <!-- 1.2 -->\n- <g transform="translate(51.43625 87.4691077526)scale(0.12 -0.12)">\n- <use xlink:href="#BitstreamVeraSans-Roman-31"/>\n- <use x="63.623046875" xlink:href="#BitstreamVeraSans-Roman-2e"/>\n- <use x="95.41015625" xlink:href="#BitstreamVeraSans-Roman-32"/>\n- </g>\n- </g>\n- </g>\n- </g>\n- <g id="patch_3">\n- <path d="\n-M72 43.2\n-L518.4 43.2" style="fill:none;stroke:#000000;"/>\n- </g>\n- <g id="patch_4">\n- <path d="\n-M518.4 388.8\n-L518.4 43.2" style="fill:none;stroke:#000000;"/>\n- </g>\n- <g id="patch_5">\n- <path d="\n-M72 388.8\n-L518.4 388.8" style="fill:none;stroke:#000000;"/>\n- </g>\n- <g id="patch_6">\n- <path d="\n-M72 388.8\n-L72 43.2" style="fill:none;stroke:#000000;"/>\n- </g>\n- </g>\n- </g>\n- <defs>\n- <clipPath id="p7ff5b81e1d">\n- <rect height="345.6" width="446.4" x="72.0" y="43.2"/>\n- </clipPath>\n- </defs>\n-</svg>\n' |
b |
diff -r 70b071de9bee -r 892811a1f12c test-data/CID_2244.can --- a/test-data/CID_2244.can Sat May 20 08:31:44 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,1 +0,0 @@ -CC(=O)Oc1ccccc1C(=O)O 2244 |
b |
diff -r 70b071de9bee -r 892811a1f12c test-data/CID_2244.inchi --- a/test-data/CID_2244.inchi Sat May 20 08:31:44 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,1 +0,0 @@ -InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) |
b |
diff -r 70b071de9bee -r 892811a1f12c test-data/CID_2244.sdf --- a/test-data/CID_2244.sdf Sat May 20 08:31:44 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,154 +0,0 @@ -2244 - -OEChem-05151212332D - - 21 21 0 0 0 0 0 0 0999 V2000 - 3.7320 -0.0600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 - 6.3301 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 - 4.5981 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 - 2.8660 -1.5600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 - 4.5981 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 5.4641 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 4.5981 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 6.3301 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 5.4641 -2.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 6.3301 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 5.4641 0.9400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.8660 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.0000 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 4.0611 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 6.8671 -0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 5.4641 -2.6800 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 6.8671 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 2.3100 0.4769 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 1.4631 0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 1.6900 -0.5969 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 6.3301 2.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 1 5 1 0 0 0 0 - 1 12 1 0 0 0 0 - 2 11 1 0 0 0 0 - 2 21 1 0 0 0 0 - 3 11 2 0 0 0 0 - 4 12 2 0 0 0 0 - 5 6 1 0 0 0 0 - 5 7 2 0 0 0 0 - 6 8 2 0 0 0 0 - 6 11 1 0 0 0 0 - 7 9 1 0 0 0 0 - 7 14 1 0 0 0 0 - 8 10 1 0 0 0 0 - 8 15 1 0 0 0 0 - 9 10 2 0 0 0 0 - 9 16 1 0 0 0 0 - 10 17 1 0 0 0 0 - 12 13 1 0 0 0 0 - 13 18 1 0 0 0 0 - 13 19 1 0 0 0 0 - 13 20 1 0 0 0 0 -M END -> <PUBCHEM_COMPOUND_CID> -2244 - -> <PUBCHEM_COMPOUND_CANONICALIZED> -1 - -> <PUBCHEM_CACTVS_COMPLEXITY> -212 - -> <PUBCHEM_CACTVS_HBOND_ACCEPTOR> -4 - -> <PUBCHEM_CACTVS_HBOND_DONOR> -1 - -> <PUBCHEM_CACTVS_ROTATABLE_BOND> -3 - -> <PUBCHEM_CACTVS_SUBSKEYS> -AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA== - -> <PUBCHEM_IUPAC_OPENEYE_NAME> -2-acetoxybenzoic acid - -> <PUBCHEM_IUPAC_CAS_NAME> -2-acetyloxybenzoic acid - -> <PUBCHEM_IUPAC_NAME> -2-acetyloxybenzoic acid - -> <PUBCHEM_IUPAC_SYSTEMATIC_NAME> -2-acetyloxybenzoic acid - -> <PUBCHEM_IUPAC_TRADITIONAL_NAME> -2-acetoxybenzoic acid - -> <PUBCHEM_IUPAC_INCHI> -InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) - -> <PUBCHEM_IUPAC_INCHIKEY> -BSYNRYMUTXBXSQ-UHFFFAOYSA-N - -> <PUBCHEM_XLOGP3> -1.2 - -> <PUBCHEM_EXACT_MASS> -180.042259 - -> <PUBCHEM_MOLECULAR_FORMULA> -C9H8O4 - -> <PUBCHEM_MOLECULAR_WEIGHT> -180.15742 - -> <PUBCHEM_OPENEYE_CAN_SMILES> -CC(=O)OC1=CC=CC=C1C(=O)O - -> <PUBCHEM_OPENEYE_ISO_SMILES> -CC(=O)OC1=CC=CC=C1C(=O)O - -> <PUBCHEM_CACTVS_TPSA> -63.6 - -> <PUBCHEM_MONOISOTOPIC_WEIGHT> -180.042259 - -> <PUBCHEM_TOTAL_CHARGE> -0 - -> <PUBCHEM_HEAVY_ATOM_COUNT> -13 - -> <PUBCHEM_ATOM_DEF_STEREO_COUNT> -0 - -> <PUBCHEM_ATOM_UDEF_STEREO_COUNT> -0 - -> <PUBCHEM_BOND_DEF_STEREO_COUNT> -0 - -> <PUBCHEM_BOND_UDEF_STEREO_COUNT> -0 - -> <PUBCHEM_ISOTOPIC_ATOM_COUNT> -0 - -> <PUBCHEM_COMPONENT_COUNT> -1 - -> <PUBCHEM_CACTVS_TAUTO_COUNT> -1 - -> <PUBCHEM_COORDINATE_TYPE> -1 -5 -255 - -> <PUBCHEM_BONDANNOTATIONS> -5 6 8 -5 7 8 -6 8 8 -7 9 8 -8 10 8 -9 10 8 - -$$$$ |
b |
diff -r 70b071de9bee -r 892811a1f12c test-data/CID_2244.smi --- a/test-data/CID_2244.smi Sat May 20 08:31:44 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,1 +0,0 @@ -O(c1c(cccc1)C(=O)O)C(=O)C 2244 |
b |
diff -r 70b071de9bee -r 892811a1f12c test-data/CID_2244_FP2.fps --- a/test-data/CID_2244_FP2.fps Sat May 20 08:31:44 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,7 +0,0 @@ -#FPS1 -#num_bits=1021 -#type=OpenBabel-FP2/1 -#software=OpenBabel/2.4.1 -#source=/tmp/tmptaAke4/files/000/dataset_3.dat -#date=2017-05-19T13:52:59 -00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244 |
b |
diff -r 70b071de9bee -r 892811a1f12c test-data/CID_2244_FP3.fps --- a/test-data/CID_2244_FP3.fps Sat May 20 08:31:44 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,7 +0,0 @@ -#FPS1 -#num_bits=1021 -#type=OpenBabel-FP2/1 -#software=OpenBabel/2.4.1 -#source=/tmp/tmptaAke4/files/000/dataset_7.dat -#date=2017-05-19T13:53:45 -00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244 |
b |
diff -r 70b071de9bee -r 892811a1f12c test-data/CID_2244_FP4.fps --- a/test-data/CID_2244_FP4.fps Sat May 20 08:31:44 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,7 +0,0 @@ -#FPS1 -#num_bits=1021 -#type=OpenBabel-FP2/1 -#software=OpenBabel/2.4.1 -#source=/tmp/tmptaAke4/files/000/dataset_11.dat -#date=2017-05-19T13:54:39 -00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244 |
b |
diff -r 70b071de9bee -r 892811a1f12c test-data/CID_2244_MACCS.fps --- a/test-data/CID_2244_MACCS.fps Sat May 20 08:31:44 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,7 +0,0 @@ -#FPS1 -#num_bits=1021 -#type=OpenBabel-FP2/1 -#software=OpenBabel/2.4.1 -#source=/tmp/tmptaAke4/files/000/dataset_15.dat -#date=2017-05-19T13:55:30 -00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244 |
b |
diff -r 70b071de9bee -r 892811a1f12c test-data/CID_2244_maccs.fps --- a/test-data/CID_2244_maccs.fps Sat May 20 08:31:44 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,7 +0,0 @@ -#FPS1 -#num_bits=166 -#type=OpenBabel-MACCS/2 -#software=OpenBabel/2.3.1 -#source=CID_2244.sdf -#date=2012-05-15T17:00:39 -0000000000000000000000010000016480cca2d21e 2244 |
b |
diff -r 70b071de9bee -r 892811a1f12c test-data/NxN_Clustering_on_q.svg --- a/test-data/NxN_Clustering_on_q.svg Sat May 20 08:31:44 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,707 +0,0 @@\n-<?xml version="1.0" encoding="utf-8" standalone="no"?>\n-<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"\n- "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n-<!-- Created with matplotlib (http://matplotlib.org/) -->\n-<svg height="345pt" version="1.1" viewBox="0 0 460 345" width="460pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">\n- <defs>\n- <style type="text/css">\n-*{stroke-linecap:butt;stroke-linejoin:round;}\n- </style>\n- </defs>\n- <g id="figure_1">\n- <g id="patch_1">\n- <path d="M 0 345.6 \n-L 460.8 345.6 \n-L 460.8 0 \n-L 0 0 \n-z\n-" style="fill:#ffffff;"/>\n- </g>\n- <g id="axes_1">\n- <g id="patch_2">\n- <path d="M 57.6 307.584 \n-L 414.72 307.584 \n-L 414.72 41.472 \n-L 57.6 41.472 \n-z\n-" style="fill:#ffffff;"/>\n- </g>\n- <g id="matplotlib.axis_1">\n- <g id="xtick_1">\n- <g id="text_1">\n- <!-- 55079807 -->\n- <defs>\n- <path d="M 10.796875 72.90625 \n-L 49.515625 72.90625 \n-L 49.515625 64.59375 \n-L 19.828125 64.59375 \n-L 19.828125 46.734375 \n-Q 21.96875 47.46875 24.109375 47.828125 \n-Q 26.265625 48.1875 28.421875 48.1875 \n-Q 40.625 48.1875 47.75 41.5 \n-Q 54.890625 34.8125 54.890625 23.390625 \n-Q 54.890625 11.625 47.5625 5.09375 \n-Q 40.234375 -1.421875 26.90625 -1.421875 \n-Q 22.3125 -1.421875 17.546875 -0.640625 \n-Q 12.796875 0.140625 7.71875 1.703125 \n-L 7.71875 11.625 \n-Q 12.109375 9.234375 16.796875 8.0625 \n-Q 21.484375 6.890625 26.703125 6.890625 \n-Q 35.15625 6.890625 40.078125 11.328125 \n-Q 45.015625 15.765625 45.015625 23.390625 \n-Q 45.015625 31 40.078125 35.4375 \n-Q 35.15625 39.890625 26.703125 39.890625 \n-Q 22.75 39.890625 18.8125 39.015625 \n-Q 14.890625 38.140625 10.796875 36.28125 \n-z\n-" id="DejaVuSans-35"/>\n- <path d="M 31.78125 66.40625 \n-Q 24.171875 66.40625 20.328125 58.90625 \n-Q 16.5 51.421875 16.5 36.375 \n-Q 16.5 21.390625 20.328125 13.890625 \n-Q 24.171875 6.390625 31.78125 6.390625 \n-Q 39.453125 6.390625 43.28125 13.890625 \n-Q 47.125 21.390625 47.125 36.375 \n-Q 47.125 51.421875 43.28125 58.90625 \n-Q 39.453125 66.40625 31.78125 66.40625 \n-z\n-M 31.78125 74.21875 \n-Q 44.046875 74.21875 50.515625 64.515625 \n-Q 56.984375 54.828125 56.984375 36.375 \n-Q 56.984375 17.96875 50.515625 8.265625 \n-Q 44.046875 -1.421875 31.78125 -1.421875 \n-Q 19.53125 -1.421875 13.0625 8.265625 \n-Q 6.59375 17.96875 6.59375 36.375 \n-Q 6.59375 54.828125 13.0625 64.515625 \n-Q 19.53125 74.21875 31.78125 74.21875 \n-z\n-" id="DejaVuSans-30"/>\n- <path d="M 8.203125 72.90625 \n-L 55.078125 72.90625 \n-L 55.078125 68.703125 \n-L 28.609375 0 \n-L 18.3125 0 \n-L 43.21875 64.59375 \n-L 8.203125 64.59375 \n-z\n-" id="DejaVuSans-37"/>\n- <path d="M 10.984375 1.515625 \n-L 10.984375 10.5 \n-Q 14.703125 8.734375 18.5 7.8125 \n-Q 22.3125 6.890625 25.984375 6.890625 \n-Q 35.75 6.890625 40.890625 13.453125 \n-Q 46.046875 20.015625 46.78125 33.40625 \n-Q 43.953125 29.203125 39.59375 26.953125 \n-Q 35.25 24.703125 29.984375 24.703125 \n-Q 19.046875 24.703125 12.671875 31.3125 \n-Q 6.296875 37.9375 6.296875 49.421875 \n-Q 6.296875 60.640625 12.9375 67.421875 \n-Q 19.578125 74.21875 30.609375 74.21875 \n-Q 43.265625 74.21875 49.921875 64.515625 \n-Q 56.59375 54.828125 56.59375 36.375 \n-Q 56.59375 19.140625 48.40625 8.859375 \n-Q 40.234375 -1.421875 26.421875 -1.421875 \n-Q 22.703125 -1.421875 18.890625 -0.6875 \n-Q 15.09375 0.046875 10.984375 1.515625 \n-z\n-M 30.609375 32.421875 \n-Q 37.25 32.421875 41.125 36.953125 \n-Q 45.015625 41.5 45.015625 49.421875 \n-Q 45.015625 57.28125 41.125 61.84375 \n-Q 37.25 66.40625 30.609375 66.40625 \n-Q 23.96875 66.40625 20.09375 61.84375 \n-Q 16.21875 57.28125 16.21875 49.421875 \n-Q 16.21875 41.5 20.09375 36.953125 \n-Q 23.96875 32.421875 30.609375 32.421875 \n-z\n-" id="DejaVuSans-39"/>\n- <path d="M 31.78125 34.625 \n-Q 24.75 34.625 20.71875 30.859375 \n-Q 16.703125 27.09375 16.703125 20.515625 \n-Q 16.703125 13.921875 20.71875 10.15625 \n-Q 24.75 6.390625 31.78125 6.390625 \n-Q 38.8125 6.390625 42.859375 10.171875 \n-Q 46.921875 13.96875 46.921875 20.515625 \n-Q 4'..b'59.033203" xlink:href="#DejaVuSans-36"/>\n- </g>\n- </g>\n- </g>\n- <g id="ytick_8">\n- <g id="line2d_8">\n- <g>\n- <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#me28c3a741e" y="53.313113"/>\n- </g>\n- </g>\n- <g id="text_21">\n- <!-- 0.07 -->\n- <g transform="translate(28.334375 57.112332)scale(0.1 -0.1)">\n- <use xlink:href="#DejaVuSans-30"/>\n- <use x="63.623047" xlink:href="#DejaVuSans-2e"/>\n- <use x="95.410156" xlink:href="#DejaVuSans-30"/>\n- <use x="159.033203" xlink:href="#DejaVuSans-37"/>\n- </g>\n- </g>\n- </g>\n- </g>\n- <g id="LineCollection_1">\n- <path clip-path="url(#p7a554818f3)" d="M 98.806154 307.584 \n-L 98.806154 160.244138 \n-L 126.276923 160.244138 \n-L 126.276923 307.584 \n-" style="fill:none;stroke:#008000;stroke-width:1.5;"/>\n- </g>\n- <g id="LineCollection_2">\n- <path clip-path="url(#p7a554818f3)" d="M 208.689231 307.584 \n-L 208.689231 307.584 \n-L 236.16 307.584 \n-L 236.16 307.584 \n-" style="fill:none;stroke:#ff0000;stroke-width:1.5;"/>\n- <path clip-path="url(#p7a554818f3)" d="M 181.218462 307.584 \n-L 181.218462 307.584 \n-L 222.424615 307.584 \n-L 222.424615 307.584 \n-" style="fill:none;stroke:#ff0000;stroke-width:1.5;"/>\n- <path clip-path="url(#p7a554818f3)" d="M 263.630769 307.584 \n-L 263.630769 224.047744 \n-L 291.101538 224.047744 \n-L 291.101538 307.584 \n-" style="fill:none;stroke:#ff0000;stroke-width:1.5;"/>\n- <path clip-path="url(#p7a554818f3)" d="M 201.821538 307.584 \n-L 201.821538 202.211048 \n-L 277.366154 202.211048 \n-L 277.366154 224.047744 \n-" style="fill:none;stroke:#ff0000;stroke-width:1.5;"/>\n- </g>\n- <g id="LineCollection_3">\n- <path clip-path="url(#p7a554818f3)" d="M 318.572308 307.584 \n-L 318.572308 227.498079 \n-L 346.043077 227.498079 \n-L 346.043077 307.584 \n-" style="fill:none;stroke:#00bfbf;stroke-width:1.5;"/>\n- <path clip-path="url(#p7a554818f3)" d="M 373.513846 307.584 \n-L 373.513846 225.958341 \n-L 400.984615 225.958341 \n-L 400.984615 307.584 \n-" style="fill:none;stroke:#00bfbf;stroke-width:1.5;"/>\n- <path clip-path="url(#p7a554818f3)" d="M 332.307692 227.498079 \n-L 332.307692 178.92987 \n-L 387.249231 178.92987 \n-L 387.249231 225.958341 \n-" style="fill:none;stroke:#00bfbf;stroke-width:1.5;"/>\n- </g>\n- <g id="LineCollection_4">\n- <path clip-path="url(#p7a554818f3)" d="M 239.593846 202.211048 \n-L 239.593846 126.040908 \n-L 359.778462 126.040908 \n-L 359.778462 178.92987 \n-" style="fill:none;stroke:#0000ff;stroke-width:1.5;"/>\n- <path clip-path="url(#p7a554818f3)" d="M 153.747692 307.584 \n-L 153.747692 98.265487 \n-L 299.686154 98.265487 \n-L 299.686154 126.040908 \n-" style="fill:none;stroke:#0000ff;stroke-width:1.5;"/>\n- <path clip-path="url(#p7a554818f3)" d="M 112.541538 160.244138 \n-L 112.541538 92.745033 \n-L 226.716923 92.745033 \n-L 226.716923 98.265487 \n-" style="fill:none;stroke:#0000ff;stroke-width:1.5;"/>\n- <path clip-path="url(#p7a554818f3)" d="M 71.335385 307.584 \n-L 71.335385 54.144 \n-L 169.629231 54.144 \n-L 169.629231 92.745033 \n-" style="fill:none;stroke:#0000ff;stroke-width:1.5;"/>\n- </g>\n- <g id="patch_3">\n- <path d="M 57.6 307.584 \n-L 57.6 41.472 \n-" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/>\n- </g>\n- <g id="patch_4">\n- <path d="M 414.72 307.584 \n-L 414.72 41.472 \n-" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/>\n- </g>\n- <g id="patch_5">\n- <path d="M 57.6 307.584 \n-L 414.72 307.584 \n-" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/>\n- </g>\n- <g id="patch_6">\n- <path d="M 57.6 41.472 \n-L 414.72 41.472 \n-" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/>\n- </g>\n- </g>\n- </g>\n- <defs>\n- <clipPath id="p7a554818f3">\n- <rect height="266.112" width="357.12" x="57.6" y="41.472"/>\n- </clipPath>\n- </defs>\n-</svg>\n' |
b |
diff -r 70b071de9bee -r 892811a1f12c test-data/Taylor-Butina_Clustering_on_data_q.txt --- a/test-data/Taylor-Butina_Clustering_on_data_q.txt Sat May 20 08:31:44 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ -#0 true singletons -#0 false singletons -#clusters: 1 -55091752 12 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091467 55168823 55091849 |
b |
diff -r 70b071de9bee -r 892811a1f12c test-data/sdf2fps_result1.fps --- a/test-data/sdf2fps_result1.fps Sat May 20 08:31:44 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,7 +0,0 @@ -#FPS1 -#num_bits=881 -#type=CACTVS-E_SCREEN/1.0 extended=2 -#software=CACTVS/unknown -#source=/tmp/tmpN2w37z/files/000/dataset_1.dat -#date=2017-05-19T14:27:41 -030e1c000000000000000000000000000000000000000c00000000000000008000000058000010000030200119004c70010060001140044b100040100024040010118060101330e46c21ac5841de06a50788109de11113047100000000001000000000000000080000000000000000 2244 |
b |
diff -r 70b071de9bee -r 892811a1f12c test-data/simsearch_on_tragets_and_q.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/simsearch_on_tragets_and_q.tabular Sat May 20 08:41:21 2017 -0400 |
b |
@@ -0,0 +1,9 @@ +#Simsearch/1 +#num_bits=881 +#type=Tanimoto k=all threshold=0.7 +#software=chemfp/1.1p1 +#queries=./query.fps +#targets=./targets.fps +#query_sources=CID_28434379.sdf +#target_sources=Desktop/3579363516810334491.sdf +13 28434379 6499094 0.9615 6485578 0.9679 55079807 0.9313 3153534 0.9557 55102353 0.9682 55091466 0.9682 55091416 0.9682 6485577 0.9497 55169009 0.9560 55091752 0.9684 55091467 0.9623 55168823 0.9563 55091849 0.9563 |