Previous changeset 4:685a138131f0 (2017-05-20) Next changeset 6:e3a7d6cc87af (2018-03-23) |
Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit d786052cd04f8b25eb4aff80b1b9724f62031b61 |
added:
butina_clustering.py butina_clustering.xml mol2fps.xml nxn_clustering.py nxn_clustering.xml sdf2fps.xml static/images/NxN_clustering.png static/images/NxN_clustering.svg test-data/CID_2244.can test-data/CID_2244.inchi test-data/CID_2244.sdf test-data/CID_2244.smi test-data/CID_2244_FP2.fps test-data/CID_2244_FP3.fps test-data/CID_2244_FP4.fps test-data/CID_2244_MACCS.fps test-data/CID_2244_maccs.fps test-data/NxN_Clustering_on_q.svg test-data/Taylor-Butina_Clustering_on_data_q.txt test-data/sdf2fps_result1.fps |
removed:
simsearch.xml test-data/simsearch_on_tragets_and_q.tabular |
b |
diff -r 685a138131f0 -r 57a1a58056a6 butina_clustering.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/butina_clustering.py Sat May 20 12:57:06 2017 -0400 |
[ |
@@ -0,0 +1,134 @@ +#!/usr/bin/env python +""" + Modified version of code examples from the chemfp project. + http://code.google.com/p/chem-fingerprints/ + Thanks to Andrew Dalke of Andrew Dalke Scientific! +""" + +import chemfp +import sys +import os +import tempfile +import argparse +import subprocess +from chemfp import search + +def unix_sort(results): + temp_unsorted = tempfile.NamedTemporaryFile(delete=False) + for (i,indices) in enumerate( results.iter_indices() ): + temp_unsorted.write('%s %s\n' % (len(indices), i)) + temp_unsorted.close() + temp_sorted = tempfile.NamedTemporaryFile(delete=False) + temp_sorted.close() + p = subprocess.Popen(['sort', '-n', '-r', '-k', '1,1'], stdin=open(temp_unsorted.name), stdout=open(temp_sorted.name, 'w+')) + stdout, stderr = p.communicate() + return_code = p.returncode + + if return_code: + sys.stdout.write(stdout) + sys.stderr.write(stderr) + sys.stderr.write("Return error code %i from command:\n" % return_code) + temp_sorted.close() + os.remove(temp_unsorted.name) + + for line in open(temp_sorted.name): + size, fp_idx = line.strip().split() + yield (int(size), int(fp_idx)) + + os.remove(temp_sorted.name) + +def butina( args ): + """ + Taylor-Butina clustering from the chemfp help. + """ + out = args.output_path + targets = chemfp.open( args.input_path, format='fps' ) + arena = chemfp.load_fingerprints( targets ) + + chemfp.set_num_threads( args.processors ) + results = search.threshold_tanimoto_search_symmetric(arena, threshold = args.tanimoto_threshold) + results.reorder_all("move-closest-first") + + sorted_ids = unix_sort(results) + + # Determine the true/false singletons and the clusters + true_singletons = [] + false_singletons = [] + clusters = [] + + seen = set() + #for (size, fp_idx, members) in results: + for (size, fp_idx) in sorted_ids: + members = results[fp_idx].get_indices() + #print arena.ids[ fp_idx ], [arena.ids[ m ] for m in members] + if fp_idx in seen: + # Can't use a centroid which is already assigned + continue + seen.add(fp_idx) + + if size == 0: + # The only fingerprint in the exclusion sphere is itself + true_singletons.append( fp_idx ) + continue + + # Figure out which ones haven't yet been assigned + unassigned = set(members) - seen + + if not unassigned: + false_singletons.append(fp_idx) + continue + + # this is a new cluster + clusters.append( (fp_idx, unassigned) ) + seen.update(unassigned) + + len_cluster = len(clusters) + #out.write( "#%s true singletons: %s\n" % ( len(true_singletons), " ".join(sorted(arena.ids[idx] for idx in true_singletons)) ) ) + #out.write( "#%s false singletons: %s\n" % ( len(false_singletons), " ".join(sorted(arena.ids[idx] for idx in false_singletons)) ) ) + + out.write( "#%s true singletons\n" % len(true_singletons) ) + out.write( "#%s false singletons\n" % len(false_singletons) ) + out.write( "#clusters: %s\n" % len_cluster ) + + # Sort so the cluster with the most compounds comes first, + # then by alphabetically smallest id + def cluster_sort_key(cluster): + centroid_idx, members = cluster + return -len(members), arena.ids[centroid_idx] + + clusters.sort(key=cluster_sort_key) + + for centroid_idx, members in clusters: + centroid_name = arena.ids[centroid_idx] + out.write("%s\t%s\t%s\n" % (centroid_name, len(members), " ".join(arena.ids[idx] for idx in members))) + #ToDo: len(members) need to be some biggest top 90% or something ... + + for idx in true_singletons: + out.write("%s\t%s\n" % (arena.ids[idx], 0)) + + out.close() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="""Taylor-Butina clustering for fps files. +For more details please see the original publication or the chemfp documentation: +http://www.chemomine.co.uk/dbclus-paper.pdf +https://chemfp.readthedocs.org +""") + + parser.add_argument("-i", "--input", dest="input_path", + required=True, + help="Path to the input file.") + + parser.add_argument("-o", "--output", dest="output_path", type=argparse.FileType('w'), + default=sys.stdout, + help="Path to the output file.") + + parser.add_argument("-t", "--threshold", dest="tanimoto_threshold", type=float, + default=0.8, + help="Tanimoto threshold [0.8]") + + parser.add_argument('-p', '--processors', type=int, default=4) + + options = parser.parse_args() + butina( options ) |
b |
diff -r 685a138131f0 -r 57a1a58056a6 butina_clustering.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/butina_clustering.xml Sat May 20 12:57:06 2017 -0400 |
[ |
@@ -0,0 +1,93 @@ +<tool id="ctb_chemfp_butina_clustering" name="Taylor-Butina Clustering" version="0.2"> + <description>of molecular fingerprints</description> + <requirements> + <requirement type="package" version="1.1p1">chemfp</requirement> + <requirement type="package" version="2.4.1">openbabel</requirement> + </requirements> + <command detect_errors="exit_code"> +<![CDATA[ + python '$__tool_directory__/butina_clustering.py' + -i '$infile' + -t $threshold + -o '$outfile' + -p \${GALAXY_SLOTS:-1} +]]> + </command> + <inputs> + <param name="infile" type="data" format="fps" label="Finperprint dataset" help="Dataset missing? See TIP below"/> + <param name='threshold' type='float' value='0.8'/> + </inputs> + <outputs> + <data format="tabular" name="outfile"/> + </outputs> + <tests> + <test> + <param name="infile" ftype="fps" value="targets.fps"/> + <param name='threshold' value='0.8' ></param> + <output name="outfile" ftype="tabular" file='Taylor-Butina_Clustering_on_data_q.txt'/> + </test> + </tests> +<help> +<![CDATA[ + + +.. class:: infomark + +**What this tool does** + +Unsupervised non-hierarchical clustering method based on the Taylor-Butina algorithm, which guarantees that every cluster contains molecules which are within a distance cutoff of the central molecule. This tool is based on the chemfp_ project. + +.. _chemfp: http://chemfp.com/ + +----- + +.. class:: infomark + +**Input** + +| Molecular fingerprints in FPS format. +| Open Babel Fastsearch index is not supported. + +* Example:: + + - fingerprints in FPS format + + #FPS1 + #num_bits=881 + #type=CACTVS-E_SCREEN/1.0 extended=2 + #software=CACTVS/unknown + #source=/home/mohammed/galaxy-central/database/files/000/dataset_423.dat + #date=2012-02-09T13:20:37 + 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960cc0bed3248000580644626004101b4844805901b041c2e + 19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000 55169009 + 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960cc0bed3248000580644626004101b4844805901b041c2e + 19111e45039b8b2926105609401313e40800000000000100200000040080000010000002000000000000 55079807 + ........ + + - Tanimoto threshold : 0.8 (between 0 and 1) + +----- + +.. class:: infomark + +**Output** + +* Example:: + + 0 true singletons + => + + 0 false singletons + => + + 1 clusters + 55091849 has 12 other members + => 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091752 55091467 55168823 + + +]]> + </help> + <citations> + <citation type="doi">10.1186/1758-2946-5-S1-P36</citation> + </citations> +</tool> |
b |
diff -r 685a138131f0 -r 57a1a58056a6 mol2fps.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mol2fps.xml Sat May 20 12:57:06 2017 -0400 |
[ |
b'@@ -0,0 +1,276 @@\n+<tool id="ctb_chemfp_mol2fps" name="Molecules to Fingerprints" version="0.3.0">\n+ <description>with different fingerprint types</description>\n+ <!--parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" merge_outputs="outfile"></parallelism-->\n+ <requirements>\n+ <requirement type="package" version="1.1p1">chemfp</requirement>\n+ <requirement type="package" version="2016.03.3">rdkit</requirement>\n+ <requirement type="package" version="2.4.1">openbabel</requirement>\n+ </requirements>\n+ <command>\n+<![CDATA[\n+ #set $fptype = $fp_opts.fp_opts_selector\n+\n+ #if $fptype in [\'--FP2\', \'--FP3\', \'--FP4\', \'--MACCS\']:\n+ ## Open Babel fingerprints\n+ ob2fps $fptype --in \'${infile.ext}\' \'${infile}\' -o \'${outfile}\'\n+ #else:\n+ ## RDKit fingerprints\n+ rdkit2fps --in \'${infile.ext}\' \'${infile}\' -o \'${outfile}\'\n+ #if $fp_opts.fp_opts_selector == "--RDK":\n+ --RDK\n+ --fpSize $fp_opts.fpSize\n+ --minPath $fp_opts.minPath\n+ --maxPath $fp_opts.maxPath\n+ --nBitsPerHash $fp_opts.nBitsPerHash\n+ $fp_opts.useHs\n+ #elif $fp_opts.fp_opts_selector == "--torsions":\n+ --torsions\n+ --fpSize $fp_opts.fpSize\n+ --targetSize $fp_opts.targetSize\n+ #elif $fp_opts.fp_opts_selector == "--morgan":\n+ --morgan\n+ --fpSize $fp_opts.fpSize\n+ --radius $fp_opts.radius\n+ $fp_opts.useFeatures\n+ $fp_opts.useChirality\n+ $fp_opts.useBondTypes\n+ #elif $fp_opts.fp_opts_selector == "--pairs":\n+ --paris\n+ --fpSize $fp_opts.fpSize\n+ --minLength $fp_opts.minLength\n+ --maxLength $fp_opts.maxLength\n+ #elif $fp_opts.fp_opts_selector == "--maccs166":\n+ --maccs166\n+ #elif $fp_opts.fp_opts_selector == "--substruct":\n+ --substruct\n+ #end if\n+ #end if\n+ --errors report 2>&1\n+]]>\n+ </command>\n+ <inputs>\n+ <param name="infile" type=\'data\' format="sdf,smi,mol,mol2,cml,inchi" label="molecule file"/>\n+ <conditional name="fp_opts">\n+ <param name="fp_opts_selector" type="select" label="Type of fingerprint">\n+ <option value=\'--FP2\' selected="True">Open Babel FP2 fingerprints</option>\n+ <option value=\'--FP3\'>Open Babel FP3 fingerprints</option>\n+ <option value=\'--FP4\'>Open Babel FP4 fingerprints</option>\n+ <option value=\'--MACCS\'>Open Babel MACCS fingerprints</option>\n+ <option value=\'--RDK\'>RDKit topological fingerprint</option>\n+ <option value=\'--torsions\'>RDKit topological Torsion fingerprints</option>\n+ <option value=\'--morgan\'>RDKit Morgan fingerprints</option>\n+ <option value=\'--pairs\'>RDKit Atom Pair fingerprints</option>\n+ <option value=\'--maccs166\'>RDKit MACCS fingerprints</option>\n+ <option value=\'--substruct\'>RDKit substructure fingerprints</option>\n+ </param>\n+ <when value="--FP2" />\n+ <when value="--FP3" />\n+ <when value="--FP4" />\n+ <when value="--MACCS" />\n+ <when value="--RDK">\n+ <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help="">\n+ <validator type="in_range" min="1" />\n+ </param>\n+ <param name="minPath" type="integer" value="1" label="minimum number of bonds to include in the subgraph" help="">\n+ <validator type="in_range" min="1" />\n+ </param>\n+ <param name="maxPath" type="integer" value="7" label="maximum number of bonds to include in the s'..b' <test>\n+ <param name="infile" value="CID_2244.smi" ftype="smi" />\n+ <param name="fp_opts.fp_opts_selector" value="--FP3" />\n+ <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" lines_diff="4"/>\n+ </test>\n+ <!-- FP4 -->\n+ <test>\n+ <param name="infile" value="CID_2244.sdf" ftype="sdf" />\n+ <param name="fp_opts.fp_opts_selector" value="--FP4" />\n+ <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" lines_diff="4"/>\n+ </test>\n+ <test>\n+ <param name="infile" value="CID_2244.smi" ftype="smi" />\n+ <param name="fp_opts.fp_opts_selector" value="--FP4" />\n+ <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" lines_diff="4"/>\n+ </test>\n+ <!-- MACCS -->\n+ <test>\n+ <param name="infile" value="CID_2244.sdf" ftype="sdf" />\n+ <param name="fp_opts.fp_opts_selector" value="--MACCS" />\n+ <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" lines_diff="4"/>\n+ </test>\n+ <test>\n+ <param name="infile" value="CID_2244.smi" ftype="smi" />\n+ <param name="fp_opts.fp_opts_selector" value="--MACCS" />\n+ <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" lines_diff="4"/>\n+ </test>\n+ </tests>\n+ <help>\n+<![CDATA[\n+\n+.. class:: infomark\n+\n+**What this tool does**\n+\n+This tool uses chemfp_ to calculate 10 different fingerprints of common file formats. Chemfp uses `Open Babel`_, OpenEye_ and RDKit_.\n+\n+For more information check the websites listed below::\n+\n+\t- http://www.rdkit.org/docs/GettingStartedInPython.html#fingerprinting-and-molecular-similarity\n+\t- http://openbabel.org/wiki/Tutorial:Fingerprints\n+\n+-----\n+\n+.. class:: infomark\n+\n+**Input**\n+\n+FPS fingerprint file format\n+\n+* Example::\n+\n+\t - SDF File\n+\n+\t\t28434379\n+\t\t -OEChem-02031205132D\n+\n+\t\t 37 39 0 0 0 0 0 0 0999 V2000\n+\t\t 8.1648 -1.8842 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n+\t\t 6.0812 -0.2134 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n+\t\t 6.0812 -1.8229 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n+\t\t 2.5369 -2.0182 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n+\t\t 6.3919 0.7371 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n+\t\t 7.3704 0.9433 0.0000 C 0 0 0 0\n+\t\t ......\n+\t\t 1 15 1 0 0 0 0\n+\t\t 1 35 1 0 0 0 0\n+\t\t 2 5 1 0 0 0 0\n+\t\t 2 11 1 0 0 0 0\n+\t\t 2 12 1 0 0 0 0\n+\t\t 3 12 2 0 0 0 0\n+\t\t 3 13 1 0 0 0 0\n+\t\t 4 18 1 0 0 0 0\n+\t\t ......\n+\n+\t\t\t>PUBCHEM_COMPOUND_CID<\n+\t\t\t28434379\n+\n+\t\t\t> <PUBCHEM_COMPOUND_CANONICALIZED>\n+\t\t\t1\n+\n+\t\t\t> <PUBCHEM_CACTVS_COMPLEXITY>\n+\t\t\t280\n+\n+\t\t\t> <PUBCHEM_CACTVS_HBOND_ACCEPTOR>\n+\t\t\t2\n+\n+\t\t\t> <PUBCHEM_CACTVS_HBOND_DONOR>\n+\t\t\t2\n+\n+\t\t\t> <PUBCHEM_CACTVS_ROTATABLE_BOND>\n+\t\t\t2\n+\n+\t\t\t> <PUBCHEM_CACTVS_SUBSKEYS>\n+\t\t\tAAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA==\n+\n+\t\t\t>\n+\n+\t\t- type : FP2\n+\n+-----\n+\n+.. class:: infomark\n+\n+**Output**\n+\n+* Example::\n+\n+\t#FPS1\n+\t#num_bits=1021\n+\t#type=OpenBabel-FP2/1\n+\t#software=OpenBabel/2.3.0\n+\t#source=/tmp/dataset_409.dat.sdf\n+\t#date=2012-02-03T11:13:39\n+\tc0000000000008c0000846000400000000000010800000000000004000000000100010000700802170000018000000c\n+\t0010000000020600208008000008000000c000c02c00002000000c00000100000008001400c800001c0180000000300\n+\t10000000000080000000c0000060000c0000060810000010000000800102000000\t28434379\n+\n+\n+]]>\n+ </help>\n+ <citations>\n+ <citation type="doi">10.1186/1758-2946-3-33</citation>\n+ <citation type="doi">10.1186/1758-2946-5-S1-P36</citation>\n+ <citation type="bibtex">\n+ @electronic{rdkit,\n+ title = {RDKit: Open-source cheminformatics},\n+ url ={http://www.rdkit.org}\n+ }\n+ </citation>\n+ </citations>\n+</tool>\n' |
b |
diff -r 685a138131f0 -r 57a1a58056a6 nxn_clustering.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nxn_clustering.py Sat May 20 12:57:06 2017 -0400 |
[ |
@@ -0,0 +1,80 @@ +#!/usr/bin/env python +""" + Modified version of code examples from the chemfp project. + http://code.google.com/p/chem-fingerprints/ + Thanks to Andrew Dalke of Andrew Dalke Scientific! +""" +import matplotlib +matplotlib.use('Agg') +import argparse +import os +import chemfp +import scipy.cluster.hierarchy as hcluster +import pylab +import numpy + +def distance_matrix(arena, tanimoto_threshold = 0.0): + n = len(arena) + # Start off a similarity matrix with 1.0s along the diagonal + try: + similarities = numpy.identity(n, "d") + except: + raise Exception('Input dataset is to large!') + chemfp.set_num_threads( args.processors ) + + ## Compute the full similarity matrix. + # The implementation computes the upper-triangle then copies + # the upper-triangle into lower-triangle. It does not include + # terms for the diagonal. + results = chemfp.search.threshold_tanimoto_search_symmetric(arena, threshold=tanimoto_threshold) + + # Copy the results into the NumPy array. + for row_index, row in enumerate(results.iter_indices_and_scores()): + for target_index, target_score in row: + similarities[row_index, target_index] = target_score + + # Return the distance matrix using the similarity matrix + return 1.0 - similarities + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="""NxN clustering for fps files. +For more details please see the chemfp documentation: +https://chemfp.readthedocs.org +""") + + parser.add_argument("-i", "--input", dest="input_path", + required=True, + help="Path to the input file.") + + parser.add_argument("-c", "--cluster", dest="cluster_image", + help="Path to the output cluster image.") + + parser.add_argument("-s", "--smatrix", dest="similarity_matrix", + help="Path to the similarity matrix output file.") + + parser.add_argument("-t", "--threshold", dest="tanimoto_threshold", + type=float, default=0.0, + help="Tanimoto threshold [0.0]") + + parser.add_argument("--oformat", default='png', help="Output format (png, svg)") + + parser.add_argument('-p', '--processors', type=int, + default=4) + + args = parser.parse_args() + + targets = chemfp.open( args.input_path, format='fps' ) + arena = chemfp.load_fingerprints( targets ) + distances = distance_matrix( arena, args.tanimoto_threshold ) + + if args.similarity_matrix: + distances.tofile( args.similarity_matrix ) + + if args.cluster_image: + linkage = hcluster.linkage( distances, method="single", metric="euclidean" ) + + hcluster.dendrogram(linkage, labels=arena.ids) + + pylab.savefig( args.cluster_image, format=args.oformat ) + |
b |
diff -r 685a138131f0 -r 57a1a58056a6 nxn_clustering.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nxn_clustering.xml Sat May 20 12:57:06 2017 -0400 |
[ |
@@ -0,0 +1,120 @@ +<tool id="ctb_chemfp_nxn_clustering" name="NxN Clustering" version="0.4"> + <description>of molecular fingerprints</description> + <requirements> + <requirement type="package" version="1.1p1">chemfp</requirement> + <requirement type="package" version="2">python</requirement> + <requirement type="package" version="2.0.2">matplotlib</requirement> + <requirement type="package" version="0.19.0">scipy</requirement> + <requirement type="package" version="2.4.1">openbabel</requirement> + </requirements> + <command detect_errors="exit_code"> +<![CDATA[ + python '$__tool_directory__/nxn_clustering.py' + -i '$infile' + -t $threshold + #if str($output_files) in ['both', 'image']: + --cluster '$image' + #end if + #if str($output_files) in ['both', 'matrix']: + --smatrix '$smilarity_matrix' + #end if + --oformat '$oformat' +]]> + </command> + <inputs> + <param name="infile" type="data" format="fps" label="Finperprint dataset" help="Dataset missing? See TIP below"/> + <param name='threshold' type='float' value='0.0' /> + <param name='oformat' type='select' format='text' label="Format of the resulting picture"> + <option value='png'>PNG</option> + <option value='svg'>SVG</option> + </param> + <param name='output_files' type='select' format='text' label="Output options"> + <option value='both'>NxN matrix and Image</option> + <option value='image'>Image</option> + <option value='matrix'>NxN Matrix</option> + </param> + + </inputs> + <outputs> + <data name="image" format="svg" label="${tool.name} on ${on_string} - Cluster Image"> + <filter>output_files == "both" or output_files == "image"</filter> + <change_format> + <when input="oformat" value="png" format="png"/> + </change_format> + </data> + <data name="smilarity_matrix" format="binary" label="${tool.name} on ${on_string} - Similarity Matrix"> + <filter>output_files == "both" or output_files == "matrix"</filter> + </data> + </outputs> + <tests> + <test> + <param name="infile" ftype="fps" value="targets.fps" /> + <param name='treshold' value='0.75' /> + <param name='oformat' value='svg' /> + <param name='output_files' value='image' /> + <output name="image" file='NxN_Clustering_on_q.svg' ftype="svg" compare="sim_size"/> + </test> + </tests> + <help> +<![CDATA[ + +.. class:: infomark + +**What this tool does** + +Generating hierarchical clusters and visualizing clusters with dendrograms. +For the clustering and the fingerprint handling the chemfp_ project is used. + +.. _chemfp: http://chemfp.com/ + +----- + +.. class:: warningmark + +**Hint** + +The plotting of the cluster image is sensible only with a small dataset. + +----- + +.. class:: infomark + +**Input** + +Molecular fingerprints in FPS format. Open Babel Fastsearch index is not supported. + +* Example:: + + - fingerprints in FPS format + + #FPS1 + #num_bits=881 + #type=CACTVS-E_SCREEN/1.0 extended=2 + #software=CACTVS/unknown + #source=/home/mohammed/galaxy-central/database/files/000/dataset_423.dat + #date=2012-02-09T13:20:37 + 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960cc0bed3248000580644626004101b4844805901b041c2e + 19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000 55169009 + 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960cc0bed3248000580644626004101b4844805901b041c2e + 19111e45039b8b2926105609401313e40800000000000100200000040080000010000002000000000000 55079807 + ........ + + - Tanimoto threshold : 0.8 (between 0 and 1) + +----- + +.. class:: informark + +**Output** + +* Example:: + + .. image:: $PATH_TO_IMAGES/NxN_clustering.png + + +]]> + </help> + <citations> + <citation type="doi">10.1186/1758-2946-5-S1-P36</citation> + </citations> +</tool> |
b |
diff -r 685a138131f0 -r 57a1a58056a6 sdf2fps.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sdf2fps.xml Sat May 20 12:57:06 2017 -0400 |
[ |
@@ -0,0 +1,116 @@ +<tool id="ctb_sdf2fps" name="SDF to Fingerprint" version="0.2"> + <description>extract fingerprints from sdf files metadata</description> + <!--parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism--> + <requirements> + <requirement type="package" version="1.1p1">chemfp</requirement> + <requirement type="package" version="2.4.1">openbabel</requirement> + </requirements> + <command> +<![CDATA[ + sdf2fps --pubchem '${infile}' > '${outfile}' +]]> + </command> + <inputs> + <param name="infile" type='data' format="sdf" label="SDF file with fingerprints as metadata"/> + </inputs> + <outputs> + <data name="outfile" format="fps"/> + </outputs> + <tests> + <test> + <param name="infile" ftype="sdf" value="CID_2244.sdf" /> + <output name="outfile" file='sdf2fps_result1.fps' ftype="fps" lines_diff="4" /> + </test> + </tests> + <help> +<![CDATA[ + +.. class:: infomark + +**What this tool does** + +Read an input SD file, extract the fingerprints and store them in a FPS-file. + +----- + +.. class:: infomark + +**Input** + +`SD-Format`_ + +.. _`SD-Format`: http://en.wikipedia.org/wiki/Chemical_table_file + +* Example:: + + 28434379 + -OEChem-02031205132D + + 37 39 0 0 0 0 0 0 0999 V2000 + 8.1648 -1.8842 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0812 -0.2134 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0812 -1.8229 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5369 -2.0182 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3919 0.7371 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 7.3704 0.9433 0.0000 C 0 0 0 0 + ...... + 1 15 1 0 0 0 0 + 1 35 1 0 0 0 0 + 2 5 1 0 0 0 0 + 2 11 1 0 0 0 0 + 2 12 1 0 0 0 0 + 3 12 2 0 0 0 0 + 3 13 1 0 0 0 0 + 4 18 1 0 0 0 0 + ...... + + >PUBCHEM_COMPOUND_CID< + 28434379 + + > <PUBCHEM_COMPOUND_CANONICALIZED> + 1 + + > <PUBCHEM_CACTVS_COMPLEXITY> + 280 + + > <PUBCHEM_CACTVS_HBOND_ACCEPTOR> + 2 + + > <PUBCHEM_CACTVS_HBOND_DONOR> + 2 + + > <PUBCHEM_CACTVS_ROTATABLE_BOND> + 2 + + > <PUBCHEM_CACTVS_SUBSKEYS> + AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA== + + > + +----- + +.. class:: infomark + +**Output** + +* Example:: + + #FPS1 + #num_bits=881 + #type=CACTVS-E_SCREEN/1.0 extended=2 + #software=CACTVS/unknown + #source=/home/mohammed/galaxy-central/database/files/000/dataset_409.dat + #date=2012-02-03T10:44:12 + 07ce04000000000000000000000000000080060000000c0600 + 00000000001a800f0000780008100000101487e9608c0bed32 + 48000580644626204101b4844805901b041c2e19511e45039b + 8b2924101609401b13e4080000000000010020000004008000 + 0010000002000000000000 28434379 + + +]]> + </help> + <citations> + <citation type="doi">10.1186/1758-2946-5-S1-P36</citation> + </citations> +</tool> |
b |
diff -r 685a138131f0 -r 57a1a58056a6 simsearch.xml --- a/simsearch.xml Sat May 20 12:45:01 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,121 +0,0 @@ -<tool id="ctb_simsearch" name="Similarity Search" version="0.2"> - <description>of fingerprint data sets</description> - <requirements> - <requirement type="package" version="1.1p1">chemfp</requirement> - </requirements> - <command> -<![CDATA[ - #if $method_opts.method_opts_selector == "chemfp": - ln -s '${method_opts.query_opts.targets}' ./targets.fps && - - #if $method_opts.query_opts.query_opts_selector == "normal": - ln -s '${method_opts.query_opts.query}' ./query.fps && - #end if - - simsearch - #if int($method_opts.knn) == 0: - #set $k = 'all' - ## count is only available if k nearest neighbor search is disabled - $method_opts.counts - #else: - #set $k = int($method_opts.knn) - #end if - - -k $k - --threshold $method_opts.threshold - -o ./output.fps - - ## build and search an in-memory data structure (faster for multiple queries) - --memory - - #if $method_opts.query_opts.query_opts_selector == "normal": - -q ./query.fps - #else: - --NxN - #end if - - ./targets.fps - && - mv ./output.fps '${outfile}' - #else: - ## OpenBabel needs the original molecule file (molecule.'fileformat') next to the fastsearch index (molecule.fs). We use a composite datatype to accomplish that. - ## Furthermore OpenBabel is really picky with file extensions. We need to specify every datatype. I did not find a solution to specify the query-filetype. - ## A workaround is to create a symlink with a proper file-extension. - ln -s '$method_opts.query' ./temp_query.$method_opts.query.ext - obabel - -i fs '$method_opts.fastsearch.files_path/molecule.fs' - -S ./temp_query.$method_opts.query.ext - -at${method_opts.threshold} - -O '${outfile}' - -osmi - -aa - #end if -]]> - </command> - <inputs> - <conditional name="method_opts"> - <param name="method_opts_selector" type="select" label="Subject database/sequences"> - <option value="chemfp">Chemfp fingerprint file</option> - <option value="obabel">OpenBabel Fastsearch Index</option> - </param> - <when value="chemfp"> - <conditional name="query_opts"> - <param name="query_opts_selector" type="select" label="Query Mode"> - <option value="normal">Query molecules are stores in a separate file</option> - <option value="nxn">Target molecules are also queries (NxN)</option> - </param> - <when value="normal"> - <param name='query' type='data' format="fps" label='Query molecules'/> - <param name='targets' type='data' format="fps" label='Target molecules'/> - </when> - <when value="nxn"> - <param name='targets' type='data' format="fps" label='Target moleculs'/> - </when> - </conditional> - <param name='knn' type='integer' value='0' label='select the k nearest neighbors' help='0 means all neighbors'> - <validator type="in_range" min="0" /> - </param> - <param name='threshold' type='float' value='0.7' label='threshold' /> - <param name="counts" type="boolean" truevalue="-c" falsevalue="" checked="false" label="report counts (-c)" help="Is ignored if k nearest neighbor search is enabled" /> - </when> - <when value="obabel"> - <param name="query" type='data' format="smi,mol,sdf,inchi" label="query"/> - <param name="fastsearch" type='data' format="obfs" label="OpenBabel Fastsearch Index"/> - <param name="threshold" type='float' label="threshold" value='0.7'/> - </when> - </conditional> - - </inputs> - <outputs> - <data name="outfile" format="tabular" /> - </outputs> - <tests> - <test> - <param name="targets" ftype="fps" value="targets.fps"/> - <param name="query" ftype="fps" value="q.fps"/> - <param name="k" value='4'/> - <param name="th" value='0.7'/> - <output name="outfile" ftype="tabular" file="simsearch_on_tragets_and_q.tabular"/> - </test> - </tests> - <help> -<![CDATA[ - - -.. class:: infomark - -**What this tool does** - -Similarity searches using a variety of different fingerprints using either the chemfp_ FPS type or the Open Babel FastSearch_ index. - -.. _chemfp: http://chemfp.com/ -.. _FastSearch: http://openbabel.org/wiki/FastSearch - - -]]> - </help> - <citations> - <citation type="doi">10.1186/1758-2946-3-33</citation> - <citation type="doi">10.1186/1758-2946-5-S1-P36</citation> - </citations> -</tool> |
b |
diff -r 685a138131f0 -r 57a1a58056a6 static/images/NxN_clustering.png |
b |
Binary file static/images/NxN_clustering.png has changed |
b |
diff -r 685a138131f0 -r 57a1a58056a6 static/images/NxN_clustering.svg --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/static/images/NxN_clustering.svg Sat May 20 12:57:06 2017 -0400 |
b |
b'@@ -0,0 +1,2275 @@\n+<?xml version="1.0" encoding="utf-8" standalone="no"?>\n+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"\n+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n+<!-- Created with matplotlib (http://matplotlib.org/) -->\n+<svg height="432pt" version="1.1" viewBox="0 0 576 432" width="576pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">\n+ <defs>\n+ <style type="text/css">\n+*{stroke-linecap:square;stroke-linejoin:round;}\n+ </style>\n+ </defs>\n+ <g id="figure_1">\n+ <g id="patch_1">\n+ <path d="\n+M0 432\n+L576 432\n+L576 0\n+L0 0\n+z\n+" style="fill:#ffffff;"/>\n+ </g>\n+ <g id="axes_1">\n+ <g id="patch_2">\n+ <path d="\n+M72 388.8\n+L518.4 388.8\n+L518.4 43.2\n+L72 43.2\n+z\n+" style="fill:#ffffff;"/>\n+ </g>\n+ <g id="LineCollection_1">\n+ <defs>\n+ <path d="\n+M80.4759 -43.2\n+L80.4759 -89.4738\n+L86.1266 -89.4738\n+L86.1266 -43.2" id="C0_0_a27cbf3dad"/>\n+ <path d="\n+M74.8253 -43.2\n+L74.8253 -128.527\n+L83.3013 -128.527\n+L83.3013 -89.4738" id="C0_1_0365ccf33e"/>\n+ <path d="\n+M227.392 -43.2\n+L227.392 -110.195\n+L233.043 -110.195\n+L233.043 -43.2" id="C0_2_16a64a88b9"/>\n+ <path d="\n+M221.742 -43.2\n+L221.742 -111.088\n+L230.218 -111.088\n+L230.218 -110.195" id="C0_3_1e06391595"/>\n+ <path d="\n+M244.344 -43.2\n+L244.344 -178.829\n+L249.995 -178.829\n+L249.995 -43.2" id="C0_4_9522133b75"/>\n+ <path d="\n+M238.694 -43.2\n+L238.694 -187.132\n+L247.17 -187.132\n+L247.17 -178.829" id="C0_5_e4f3e58d26"/>\n+ <path d="\n+M225.98 -111.088\n+L225.98 -262.87\n+L242.932 -262.87\n+L242.932 -187.132" id="C0_6_ff944847e7"/>\n+ <path d="\n+M396.911 -43.2\n+L396.911 -224.631\n+L402.562 -224.631\n+L402.562 -43.2" id="C0_7_0906a9df02"/>\n+ <path d="\n+M391.261 -43.2\n+L391.261 -233.371\n+L399.737 -233.371\n+L399.737 -224.631" id="C0_8_ed58b0afb2"/>\n+ <path d="\n+M408.213 -43.2\n+L408.213 -243.035\n+L413.863 -243.035\n+L413.863 -43.2" id="C0_9_84c2cf03f5"/>\n+ <path d="\n+M395.499 -233.371\n+L395.499 -269.685\n+L411.038 -269.685\n+L411.038 -243.035" id="C0_a_6ef56ffb7b"/>\n+ <path d="\n+M385.61 -43.2\n+L385.61 -270.198\n+L403.268 -270.198\n+L403.268 -269.685" id="C0_b_c4ff70daa4"/>\n+ </defs>\n+ <g clip-path="url(#p7ff5b81e1d)">\n+ <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_0_a27cbf3dad" y="432.0"/>\n+ </g>\n+ <g clip-path="url(#p7ff5b81e1d)">\n+ <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_1_0365ccf33e" y="432.0"/>\n+ </g>\n+ <g clip-path="url(#p7ff5b81e1d)">\n+ <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_2_16a64a88b9" y="432.0"/>\n+ </g>\n+ <g clip-path="url(#p7ff5b81e1d)">\n+ <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_3_1e06391595" y="432.0"/>\n+ </g>\n+ <g clip-path="url(#p7ff5b81e1d)">\n+ <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_4_9522133b75" y="432.0"/>\n+ </g>\n+ <g clip-path="url(#p7ff5b81e1d)">\n+ <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_5_e4f3e58d26" y="432.0"/>\n+ </g>\n+ <g clip-path="url(#p7ff5b81e1d)">\n+ <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_6_ff944847e7" y="432.0"/>\n+ </g>\n+ <g clip-path="url(#p7ff5b81e1d)">\n+ <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_7_0906a9df02" y="432.0"/>\n+ </g>\n+ <g clip-path="url(#p7ff5b81e1d)">\n+ <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_8_ed58b0afb2" y="432.0"/>\n+ </g>\n+ <g clip-path="url(#p7ff5b81e1d)">\n+ <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_9_84c2cf03f5" y="432.0"/>\n+ </g>\n+ <g clip-path="url(#p7ff5b81e1d)">\n+ <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_a_6ef56ffb7b" y="432.0"/>\n+ </g>\n+ <g clip-path="url(#p7ff5b81e1d)">\n+ <use style="fill:n'..b'xlink:href="#m0d5b0a6425" y="286.871994251"/>\n+ </g>\n+ </g>\n+ <g id="text_82">\n+ <!-- 0.4 -->\n+ <g transform="translate(50.380625 291.239806751)scale(0.12 -0.12)">\n+ <use xlink:href="#BitstreamVeraSans-Roman-30"/>\n+ <use x="63.623046875" xlink:href="#BitstreamVeraSans-Roman-2e"/>\n+ <use x="95.41015625" xlink:href="#BitstreamVeraSans-Roman-34"/>\n+ </g>\n+ </g>\n+ </g>\n+ <g id="ytick_4">\n+ <g id="line2d_7">\n+ <g>\n+ <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#mc8fcea1516" y="235.907991376"/>\n+ </g>\n+ </g>\n+ <g id="line2d_8">\n+ <g>\n+ <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m0d5b0a6425" y="235.907991376"/>\n+ </g>\n+ </g>\n+ <g id="text_83">\n+ <!-- 0.6 -->\n+ <g transform="translate(50.463125 240.275803876)scale(0.12 -0.12)">\n+ <use xlink:href="#BitstreamVeraSans-Roman-30"/>\n+ <use x="63.623046875" xlink:href="#BitstreamVeraSans-Roman-2e"/>\n+ <use x="95.41015625" xlink:href="#BitstreamVeraSans-Roman-36"/>\n+ </g>\n+ </g>\n+ </g>\n+ <g id="ytick_5">\n+ <g id="line2d_9">\n+ <g>\n+ <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#mc8fcea1516" y="184.943988502"/>\n+ </g>\n+ </g>\n+ <g id="line2d_10">\n+ <g>\n+ <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m0d5b0a6425" y="184.943988502"/>\n+ </g>\n+ </g>\n+ <g id="text_84">\n+ <!-- 0.8 -->\n+ <g transform="translate(50.52875 189.311801002)scale(0.12 -0.12)">\n+ <use xlink:href="#BitstreamVeraSans-Roman-30"/>\n+ <use x="63.623046875" xlink:href="#BitstreamVeraSans-Roman-2e"/>\n+ <use x="95.41015625" xlink:href="#BitstreamVeraSans-Roman-38"/>\n+ </g>\n+ </g>\n+ </g>\n+ <g id="ytick_6">\n+ <g id="line2d_11">\n+ <g>\n+ <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#mc8fcea1516" y="133.979985627"/>\n+ </g>\n+ </g>\n+ <g id="line2d_12">\n+ <g>\n+ <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m0d5b0a6425" y="133.979985627"/>\n+ </g>\n+ </g>\n+ <g id="text_85">\n+ <!-- 1.0 -->\n+ <g transform="translate(51.03125 138.347798127)scale(0.12 -0.12)">\n+ <use xlink:href="#BitstreamVeraSans-Roman-31"/>\n+ <use x="63.623046875" xlink:href="#BitstreamVeraSans-Roman-2e"/>\n+ <use x="95.41015625" xlink:href="#BitstreamVeraSans-Roman-30"/>\n+ </g>\n+ </g>\n+ </g>\n+ <g id="ytick_7">\n+ <g id="line2d_13">\n+ <g>\n+ <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#mc8fcea1516" y="83.0159827526"/>\n+ </g>\n+ </g>\n+ <g id="line2d_14">\n+ <g>\n+ <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m0d5b0a6425" y="83.0159827526"/>\n+ </g>\n+ </g>\n+ <g id="text_86">\n+ <!-- 1.2 -->\n+ <g transform="translate(51.43625 87.4691077526)scale(0.12 -0.12)">\n+ <use xlink:href="#BitstreamVeraSans-Roman-31"/>\n+ <use x="63.623046875" xlink:href="#BitstreamVeraSans-Roman-2e"/>\n+ <use x="95.41015625" xlink:href="#BitstreamVeraSans-Roman-32"/>\n+ </g>\n+ </g>\n+ </g>\n+ </g>\n+ <g id="patch_3">\n+ <path d="\n+M72 43.2\n+L518.4 43.2" style="fill:none;stroke:#000000;"/>\n+ </g>\n+ <g id="patch_4">\n+ <path d="\n+M518.4 388.8\n+L518.4 43.2" style="fill:none;stroke:#000000;"/>\n+ </g>\n+ <g id="patch_5">\n+ <path d="\n+M72 388.8\n+L518.4 388.8" style="fill:none;stroke:#000000;"/>\n+ </g>\n+ <g id="patch_6">\n+ <path d="\n+M72 388.8\n+L72 43.2" style="fill:none;stroke:#000000;"/>\n+ </g>\n+ </g>\n+ </g>\n+ <defs>\n+ <clipPath id="p7ff5b81e1d">\n+ <rect height="345.6" width="446.4" x="72.0" y="43.2"/>\n+ </clipPath>\n+ </defs>\n+</svg>\n' |
b |
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244.can --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/CID_2244.can Sat May 20 12:57:06 2017 -0400 |
b |
@@ -0,0 +1,1 @@ +CC(=O)Oc1ccccc1C(=O)O 2244 |
b |
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244.inchi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/CID_2244.inchi Sat May 20 12:57:06 2017 -0400 |
b |
@@ -0,0 +1,1 @@ +InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) |
b |
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244.sdf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/CID_2244.sdf Sat May 20 12:57:06 2017 -0400 |
b |
@@ -0,0 +1,154 @@ +2244 + -OEChem-05151212332D + + 21 21 0 0 0 0 0 0 0999 V2000 + 3.7320 -0.0600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -1.5600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 0.9400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0000 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.0611 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.6800 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3100 0.4769 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.4631 0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6900 -0.5969 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 2.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 5 1 0 0 0 0 + 1 12 1 0 0 0 0 + 2 11 1 0 0 0 0 + 2 21 1 0 0 0 0 + 3 11 2 0 0 0 0 + 4 12 2 0 0 0 0 + 5 6 1 0 0 0 0 + 5 7 2 0 0 0 0 + 6 8 2 0 0 0 0 + 6 11 1 0 0 0 0 + 7 9 1 0 0 0 0 + 7 14 1 0 0 0 0 + 8 10 1 0 0 0 0 + 8 15 1 0 0 0 0 + 9 10 2 0 0 0 0 + 9 16 1 0 0 0 0 + 10 17 1 0 0 0 0 + 12 13 1 0 0 0 0 + 13 18 1 0 0 0 0 + 13 19 1 0 0 0 0 + 13 20 1 0 0 0 0 +M END +> <PUBCHEM_COMPOUND_CID> +2244 + +> <PUBCHEM_COMPOUND_CANONICALIZED> +1 + +> <PUBCHEM_CACTVS_COMPLEXITY> +212 + +> <PUBCHEM_CACTVS_HBOND_ACCEPTOR> +4 + +> <PUBCHEM_CACTVS_HBOND_DONOR> +1 + +> <PUBCHEM_CACTVS_ROTATABLE_BOND> +3 + +> <PUBCHEM_CACTVS_SUBSKEYS> +AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA== + +> <PUBCHEM_IUPAC_OPENEYE_NAME> +2-acetoxybenzoic acid + +> <PUBCHEM_IUPAC_CAS_NAME> +2-acetyloxybenzoic acid + +> <PUBCHEM_IUPAC_NAME> +2-acetyloxybenzoic acid + +> <PUBCHEM_IUPAC_SYSTEMATIC_NAME> +2-acetyloxybenzoic acid + +> <PUBCHEM_IUPAC_TRADITIONAL_NAME> +2-acetoxybenzoic acid + +> <PUBCHEM_IUPAC_INCHI> +InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) + +> <PUBCHEM_IUPAC_INCHIKEY> +BSYNRYMUTXBXSQ-UHFFFAOYSA-N + +> <PUBCHEM_XLOGP3> +1.2 + +> <PUBCHEM_EXACT_MASS> +180.042259 + +> <PUBCHEM_MOLECULAR_FORMULA> +C9H8O4 + +> <PUBCHEM_MOLECULAR_WEIGHT> +180.15742 + +> <PUBCHEM_OPENEYE_CAN_SMILES> +CC(=O)OC1=CC=CC=C1C(=O)O + +> <PUBCHEM_OPENEYE_ISO_SMILES> +CC(=O)OC1=CC=CC=C1C(=O)O + +> <PUBCHEM_CACTVS_TPSA> +63.6 + +> <PUBCHEM_MONOISOTOPIC_WEIGHT> +180.042259 + +> <PUBCHEM_TOTAL_CHARGE> +0 + +> <PUBCHEM_HEAVY_ATOM_COUNT> +13 + +> <PUBCHEM_ATOM_DEF_STEREO_COUNT> +0 + +> <PUBCHEM_ATOM_UDEF_STEREO_COUNT> +0 + +> <PUBCHEM_BOND_DEF_STEREO_COUNT> +0 + +> <PUBCHEM_BOND_UDEF_STEREO_COUNT> +0 + +> <PUBCHEM_ISOTOPIC_ATOM_COUNT> +0 + +> <PUBCHEM_COMPONENT_COUNT> +1 + +> <PUBCHEM_CACTVS_TAUTO_COUNT> +1 + +> <PUBCHEM_COORDINATE_TYPE> +1 +5 +255 + +> <PUBCHEM_BONDANNOTATIONS> +5 6 8 +5 7 8 +6 8 8 +7 9 8 +8 10 8 +9 10 8 + +$$$$ |
b |
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244.smi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/CID_2244.smi Sat May 20 12:57:06 2017 -0400 |
b |
@@ -0,0 +1,1 @@ +O(c1c(cccc1)C(=O)O)C(=O)C 2244 |
b |
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244_FP2.fps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/CID_2244_FP2.fps Sat May 20 12:57:06 2017 -0400 |
b |
@@ -0,0 +1,7 @@ +#FPS1 +#num_bits=1021 +#type=OpenBabel-FP2/1 +#software=OpenBabel/2.4.1 +#source=/tmp/tmptaAke4/files/000/dataset_3.dat +#date=2017-05-19T13:52:59 +00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244 |
b |
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244_FP3.fps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/CID_2244_FP3.fps Sat May 20 12:57:06 2017 -0400 |
b |
@@ -0,0 +1,7 @@ +#FPS1 +#num_bits=1021 +#type=OpenBabel-FP2/1 +#software=OpenBabel/2.4.1 +#source=/tmp/tmptaAke4/files/000/dataset_7.dat +#date=2017-05-19T13:53:45 +00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244 |
b |
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244_FP4.fps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/CID_2244_FP4.fps Sat May 20 12:57:06 2017 -0400 |
b |
@@ -0,0 +1,7 @@ +#FPS1 +#num_bits=1021 +#type=OpenBabel-FP2/1 +#software=OpenBabel/2.4.1 +#source=/tmp/tmptaAke4/files/000/dataset_11.dat +#date=2017-05-19T13:54:39 +00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244 |
b |
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244_MACCS.fps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/CID_2244_MACCS.fps Sat May 20 12:57:06 2017 -0400 |
b |
@@ -0,0 +1,7 @@ +#FPS1 +#num_bits=1021 +#type=OpenBabel-FP2/1 +#software=OpenBabel/2.4.1 +#source=/tmp/tmptaAke4/files/000/dataset_15.dat +#date=2017-05-19T13:55:30 +00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244 |
b |
diff -r 685a138131f0 -r 57a1a58056a6 test-data/CID_2244_maccs.fps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/CID_2244_maccs.fps Sat May 20 12:57:06 2017 -0400 |
b |
@@ -0,0 +1,7 @@ +#FPS1 +#num_bits=166 +#type=OpenBabel-MACCS/2 +#software=OpenBabel/2.3.1 +#source=CID_2244.sdf +#date=2012-05-15T17:00:39 +0000000000000000000000010000016480cca2d21e 2244 |
b |
diff -r 685a138131f0 -r 57a1a58056a6 test-data/NxN_Clustering_on_q.svg --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/NxN_Clustering_on_q.svg Sat May 20 12:57:06 2017 -0400 |
b |
b'@@ -0,0 +1,707 @@\n+<?xml version="1.0" encoding="utf-8" standalone="no"?>\n+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"\n+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n+<!-- Created with matplotlib (http://matplotlib.org/) -->\n+<svg height="345pt" version="1.1" viewBox="0 0 460 345" width="460pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">\n+ <defs>\n+ <style type="text/css">\n+*{stroke-linecap:butt;stroke-linejoin:round;}\n+ </style>\n+ </defs>\n+ <g id="figure_1">\n+ <g id="patch_1">\n+ <path d="M 0 345.6 \n+L 460.8 345.6 \n+L 460.8 0 \n+L 0 0 \n+z\n+" style="fill:#ffffff;"/>\n+ </g>\n+ <g id="axes_1">\n+ <g id="patch_2">\n+ <path d="M 57.6 307.584 \n+L 414.72 307.584 \n+L 414.72 41.472 \n+L 57.6 41.472 \n+z\n+" style="fill:#ffffff;"/>\n+ </g>\n+ <g id="matplotlib.axis_1">\n+ <g id="xtick_1">\n+ <g id="text_1">\n+ <!-- 55079807 -->\n+ <defs>\n+ <path d="M 10.796875 72.90625 \n+L 49.515625 72.90625 \n+L 49.515625 64.59375 \n+L 19.828125 64.59375 \n+L 19.828125 46.734375 \n+Q 21.96875 47.46875 24.109375 47.828125 \n+Q 26.265625 48.1875 28.421875 48.1875 \n+Q 40.625 48.1875 47.75 41.5 \n+Q 54.890625 34.8125 54.890625 23.390625 \n+Q 54.890625 11.625 47.5625 5.09375 \n+Q 40.234375 -1.421875 26.90625 -1.421875 \n+Q 22.3125 -1.421875 17.546875 -0.640625 \n+Q 12.796875 0.140625 7.71875 1.703125 \n+L 7.71875 11.625 \n+Q 12.109375 9.234375 16.796875 8.0625 \n+Q 21.484375 6.890625 26.703125 6.890625 \n+Q 35.15625 6.890625 40.078125 11.328125 \n+Q 45.015625 15.765625 45.015625 23.390625 \n+Q 45.015625 31 40.078125 35.4375 \n+Q 35.15625 39.890625 26.703125 39.890625 \n+Q 22.75 39.890625 18.8125 39.015625 \n+Q 14.890625 38.140625 10.796875 36.28125 \n+z\n+" id="DejaVuSans-35"/>\n+ <path d="M 31.78125 66.40625 \n+Q 24.171875 66.40625 20.328125 58.90625 \n+Q 16.5 51.421875 16.5 36.375 \n+Q 16.5 21.390625 20.328125 13.890625 \n+Q 24.171875 6.390625 31.78125 6.390625 \n+Q 39.453125 6.390625 43.28125 13.890625 \n+Q 47.125 21.390625 47.125 36.375 \n+Q 47.125 51.421875 43.28125 58.90625 \n+Q 39.453125 66.40625 31.78125 66.40625 \n+z\n+M 31.78125 74.21875 \n+Q 44.046875 74.21875 50.515625 64.515625 \n+Q 56.984375 54.828125 56.984375 36.375 \n+Q 56.984375 17.96875 50.515625 8.265625 \n+Q 44.046875 -1.421875 31.78125 -1.421875 \n+Q 19.53125 -1.421875 13.0625 8.265625 \n+Q 6.59375 17.96875 6.59375 36.375 \n+Q 6.59375 54.828125 13.0625 64.515625 \n+Q 19.53125 74.21875 31.78125 74.21875 \n+z\n+" id="DejaVuSans-30"/>\n+ <path d="M 8.203125 72.90625 \n+L 55.078125 72.90625 \n+L 55.078125 68.703125 \n+L 28.609375 0 \n+L 18.3125 0 \n+L 43.21875 64.59375 \n+L 8.203125 64.59375 \n+z\n+" id="DejaVuSans-37"/>\n+ <path d="M 10.984375 1.515625 \n+L 10.984375 10.5 \n+Q 14.703125 8.734375 18.5 7.8125 \n+Q 22.3125 6.890625 25.984375 6.890625 \n+Q 35.75 6.890625 40.890625 13.453125 \n+Q 46.046875 20.015625 46.78125 33.40625 \n+Q 43.953125 29.203125 39.59375 26.953125 \n+Q 35.25 24.703125 29.984375 24.703125 \n+Q 19.046875 24.703125 12.671875 31.3125 \n+Q 6.296875 37.9375 6.296875 49.421875 \n+Q 6.296875 60.640625 12.9375 67.421875 \n+Q 19.578125 74.21875 30.609375 74.21875 \n+Q 43.265625 74.21875 49.921875 64.515625 \n+Q 56.59375 54.828125 56.59375 36.375 \n+Q 56.59375 19.140625 48.40625 8.859375 \n+Q 40.234375 -1.421875 26.421875 -1.421875 \n+Q 22.703125 -1.421875 18.890625 -0.6875 \n+Q 15.09375 0.046875 10.984375 1.515625 \n+z\n+M 30.609375 32.421875 \n+Q 37.25 32.421875 41.125 36.953125 \n+Q 45.015625 41.5 45.015625 49.421875 \n+Q 45.015625 57.28125 41.125 61.84375 \n+Q 37.25 66.40625 30.609375 66.40625 \n+Q 23.96875 66.40625 20.09375 61.84375 \n+Q 16.21875 57.28125 16.21875 49.421875 \n+Q 16.21875 41.5 20.09375 36.953125 \n+Q 23.96875 32.421875 30.609375 32.421875 \n+z\n+" id="DejaVuSans-39"/>\n+ <path d="M 31.78125 34.625 \n+Q 24.75 34.625 20.71875 30.859375 \n+Q 16.703125 27.09375 16.703125 20.515625 \n+Q 16.703125 13.921875 20.71875 10.15625 \n+Q 24.75 6.390625 31.78125 6.390625 \n+Q 38.8125 6.390625 42.859375 10.171875 \n+Q 46.921875 13.96875 46.921875 20.515625 \n+Q 4'..b'59.033203" xlink:href="#DejaVuSans-36"/>\n+ </g>\n+ </g>\n+ </g>\n+ <g id="ytick_8">\n+ <g id="line2d_8">\n+ <g>\n+ <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#me28c3a741e" y="53.313113"/>\n+ </g>\n+ </g>\n+ <g id="text_21">\n+ <!-- 0.07 -->\n+ <g transform="translate(28.334375 57.112332)scale(0.1 -0.1)">\n+ <use xlink:href="#DejaVuSans-30"/>\n+ <use x="63.623047" xlink:href="#DejaVuSans-2e"/>\n+ <use x="95.410156" xlink:href="#DejaVuSans-30"/>\n+ <use x="159.033203" xlink:href="#DejaVuSans-37"/>\n+ </g>\n+ </g>\n+ </g>\n+ </g>\n+ <g id="LineCollection_1">\n+ <path clip-path="url(#p7a554818f3)" d="M 98.806154 307.584 \n+L 98.806154 160.244138 \n+L 126.276923 160.244138 \n+L 126.276923 307.584 \n+" style="fill:none;stroke:#008000;stroke-width:1.5;"/>\n+ </g>\n+ <g id="LineCollection_2">\n+ <path clip-path="url(#p7a554818f3)" d="M 208.689231 307.584 \n+L 208.689231 307.584 \n+L 236.16 307.584 \n+L 236.16 307.584 \n+" style="fill:none;stroke:#ff0000;stroke-width:1.5;"/>\n+ <path clip-path="url(#p7a554818f3)" d="M 181.218462 307.584 \n+L 181.218462 307.584 \n+L 222.424615 307.584 \n+L 222.424615 307.584 \n+" style="fill:none;stroke:#ff0000;stroke-width:1.5;"/>\n+ <path clip-path="url(#p7a554818f3)" d="M 263.630769 307.584 \n+L 263.630769 224.047744 \n+L 291.101538 224.047744 \n+L 291.101538 307.584 \n+" style="fill:none;stroke:#ff0000;stroke-width:1.5;"/>\n+ <path clip-path="url(#p7a554818f3)" d="M 201.821538 307.584 \n+L 201.821538 202.211048 \n+L 277.366154 202.211048 \n+L 277.366154 224.047744 \n+" style="fill:none;stroke:#ff0000;stroke-width:1.5;"/>\n+ </g>\n+ <g id="LineCollection_3">\n+ <path clip-path="url(#p7a554818f3)" d="M 318.572308 307.584 \n+L 318.572308 227.498079 \n+L 346.043077 227.498079 \n+L 346.043077 307.584 \n+" style="fill:none;stroke:#00bfbf;stroke-width:1.5;"/>\n+ <path clip-path="url(#p7a554818f3)" d="M 373.513846 307.584 \n+L 373.513846 225.958341 \n+L 400.984615 225.958341 \n+L 400.984615 307.584 \n+" style="fill:none;stroke:#00bfbf;stroke-width:1.5;"/>\n+ <path clip-path="url(#p7a554818f3)" d="M 332.307692 227.498079 \n+L 332.307692 178.92987 \n+L 387.249231 178.92987 \n+L 387.249231 225.958341 \n+" style="fill:none;stroke:#00bfbf;stroke-width:1.5;"/>\n+ </g>\n+ <g id="LineCollection_4">\n+ <path clip-path="url(#p7a554818f3)" d="M 239.593846 202.211048 \n+L 239.593846 126.040908 \n+L 359.778462 126.040908 \n+L 359.778462 178.92987 \n+" style="fill:none;stroke:#0000ff;stroke-width:1.5;"/>\n+ <path clip-path="url(#p7a554818f3)" d="M 153.747692 307.584 \n+L 153.747692 98.265487 \n+L 299.686154 98.265487 \n+L 299.686154 126.040908 \n+" style="fill:none;stroke:#0000ff;stroke-width:1.5;"/>\n+ <path clip-path="url(#p7a554818f3)" d="M 112.541538 160.244138 \n+L 112.541538 92.745033 \n+L 226.716923 92.745033 \n+L 226.716923 98.265487 \n+" style="fill:none;stroke:#0000ff;stroke-width:1.5;"/>\n+ <path clip-path="url(#p7a554818f3)" d="M 71.335385 307.584 \n+L 71.335385 54.144 \n+L 169.629231 54.144 \n+L 169.629231 92.745033 \n+" style="fill:none;stroke:#0000ff;stroke-width:1.5;"/>\n+ </g>\n+ <g id="patch_3">\n+ <path d="M 57.6 307.584 \n+L 57.6 41.472 \n+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/>\n+ </g>\n+ <g id="patch_4">\n+ <path d="M 414.72 307.584 \n+L 414.72 41.472 \n+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/>\n+ </g>\n+ <g id="patch_5">\n+ <path d="M 57.6 307.584 \n+L 414.72 307.584 \n+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/>\n+ </g>\n+ <g id="patch_6">\n+ <path d="M 57.6 41.472 \n+L 414.72 41.472 \n+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/>\n+ </g>\n+ </g>\n+ </g>\n+ <defs>\n+ <clipPath id="p7a554818f3">\n+ <rect height="266.112" width="357.12" x="57.6" y="41.472"/>\n+ </clipPath>\n+ </defs>\n+</svg>\n' |
b |
diff -r 685a138131f0 -r 57a1a58056a6 test-data/Taylor-Butina_Clustering_on_data_q.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Taylor-Butina_Clustering_on_data_q.txt Sat May 20 12:57:06 2017 -0400 |
b |
@@ -0,0 +1,4 @@ +#0 true singletons +#0 false singletons +#clusters: 1 +55091752 12 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091467 55168823 55091849 |
b |
diff -r 685a138131f0 -r 57a1a58056a6 test-data/sdf2fps_result1.fps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sdf2fps_result1.fps Sat May 20 12:57:06 2017 -0400 |
b |
@@ -0,0 +1,7 @@ +#FPS1 +#num_bits=881 +#type=CACTVS-E_SCREEN/1.0 extended=2 +#software=CACTVS/unknown +#source=/tmp/tmpN2w37z/files/000/dataset_1.dat +#date=2017-05-19T14:27:41 +030e1c000000000000000000000000000000000000000c00000000000000008000000058000010000030200119004c70010060001140044b100040100024040010118060101330e46c21ac5841de06a50788109de11113047100000000001000000000000000080000000000000000 2244 |
b |
diff -r 685a138131f0 -r 57a1a58056a6 test-data/simsearch_on_tragets_and_q.tabular --- a/test-data/simsearch_on_tragets_and_q.tabular Sat May 20 12:45:01 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,9 +0,0 @@ -#Simsearch/1 -#num_bits=881 -#type=Tanimoto k=all threshold=0.7 -#software=chemfp/1.1p1 -#queries=./query.fps -#targets=./targets.fps -#query_sources=CID_28434379.sdf -#target_sources=Desktop/3579363516810334491.sdf -13 28434379 6499094 0.9615 6485578 0.9679 55079807 0.9313 3153534 0.9557 55102353 0.9682 55091466 0.9682 55091416 0.9682 6485577 0.9497 55169009 0.9560 55091752 0.9684 55091467 0.9623 55168823 0.9563 55091849 0.9563 |