changeset 0:52e5c0f2907f draft

Uploaded
author bgruening
date Thu, 15 Aug 2013 03:36:37 -0400
parents
children 0892f7ced10c
files simsearch.xml test-data/q.fps test-data/simsearch_on_tragets_and_q.tabular test-data/target.fps tool_dependencies.xml
diffstat 5 files changed, 165 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/simsearch.xml	Thu Aug 15 03:36:37 2013 -0400
@@ -0,0 +1,121 @@
+<tool id="ctb_simsearch" name="Similarity Search" version="0.1.1">
+    <description>of fingerprint data sets</description>
+    <requirements>
+        <requirement type="package" version="1.1p1">chemfp</requirement>
+        <requirement type="package" version="2.3.2">openbabel</requirement>
+    </requirements>
+    <command>
+        #if $method_opts.method_opts_selector == "chemfp":
+            simsearch 
+                #if int($method_opts.knn) == 0:
+                    #set $k = 'all'
+                    ## count is only available if k nearest neighbor search is disabled
+                    $method_opts.counts
+                #else:
+                    #set $k = int($method_opts.knn)
+                #end if
+
+                -k $k 
+
+                --threshold $method_opts.threshold
+                --query-format fps
+                --target-format fps
+                -o "${outfile}"
+
+                ## build and search an in-memory data structure (faster for multiple queries)
+                --memory
+
+                #if $method_opts.query_opts.query_opts_selector == "normal":
+                    -q "${method_opts.query_opts.query}"
+                #else:
+                    --NxN
+                #end if
+
+                "${method_opts.query_opts.targets}" 
+        #else:
+            ## OpenBabel needs the original molecule file (molecule.'fileformat') next to the fastsearch index (molecule.fs). We use a composite datatype to accomplish that.
+            ## Furthermore OpenBabel is really picky with file extensions. We need to specify every datatype. I did not find a solution to specify the query-filetype.
+            ## A workaround is to create a symlink with a proper file-extension.
+            #import tempfile
+            #set $temp_file = tempfile.NamedTemporaryFile()
+            #set $temp_link = "%s.%s" % ($temp_file.name, $method_opts.query.ext)
+            $temp_file.close()
+            ln -s $method_opts.query $temp_link;
+            obabel -i fs "${os.path.join($method_opts.fastsearch.extra_files_path,'molecule.fs')}" -S "${temp_link}" -at${method_opts.threshold} -O "${outfile}" -osmi -aa 2>&#38;1;
+            rm $temp_link
+        #end if
+    </command>
+    <inputs>
+
+        <conditional name="method_opts">
+            <param name="method_opts_selector" type="select" label="Subject database/sequences">
+              <option value="chemfp">Chemfp fingerprint file</option>
+              <option value="obabel">OpenBabel Fastsearch Index</option>
+            </param>
+            <when value="chemfp">
+                <conditional name="query_opts">
+                    <param name="query_opts_selector" type="select" label="Query Mode">
+                      <option value="normal">Query molecules are stores in a separate file</option>
+                      <option value="nxn">Target molecules are also queries (NxN)</option>
+                    </param>
+                    <when value="normal">
+                        <param name='query' type='data' format="fps" label='Query molecules'/>
+                        <param name='targets' type='data' format="fps" label='Target molecules'/>
+                    </when>
+                    <when value="nxn">
+                        <param name='targets' type='data' format="fps" label='Target moleculs'/>
+                    </when>
+                </conditional>
+                <param name='knn' type='integer' value='0' label='select the k nearest neighbors' help='0 means all neighbors'>
+                   <validator type="in_range" min="0" />
+                </param>
+                <param name='threshold' type='float' value='0.7' label='threshold' />
+                <param name="counts" type="boolean" truevalue="-c" falsevalue="" checked="false" label="report counts (-c)" help="Is ignored if k nearest neighbor search is enabled" />
+            </when>
+            <when value="obabel">
+                <param name="query" type='data' format="smi,mol,sdf,inchi" label="query"/>
+                <param name="fastsearch" type='data' format="obfs" label="OpenBabel Fastsearch Index"/>
+                <param name="threshold" type='float' label="threshold" value='0.7'/>
+            </when>
+        </conditional>
+
+    </inputs>
+    <outputs>
+        <data name="outfile" format="tabular" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="targets" ftype="fps" value="targets.fps"/>
+            <param name="query" ftype="fps" value="q.fps"/>
+            <param name="k" value='4'/>
+            <param name="th" value='0.7'/>
+            <output name="outfile" ftype="tabular" file="simsearch_on_tragets_and_q.tabular"/>
+        </test>
+    </tests>
+    <help>
+
+
+.. class:: infomark
+
+**What this tool does**
+
+Similarity searches using a variety of different fingerprints using either the chemfp_ FPS type or the Open Babel FastSearch_ index.
+
+.. _chemfp: http://chemfp.com/
+.. _FastSearch: http://openbabel.org/wiki/FastSearch
+
+-----
+
+.. class:: infomark
+
+**Cite**
+
+| The chemfp_ project
+| 
+| N M O'Boyle, M Banck, C A James, C Morley, T Vandermeersch and G R Hutchison - `Open Babel: An open chemical toolbox`_
+
+.. _`Open Babel: An open chemical toolbox`: http://www.jcheminf.com/content/3/1/33
+
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/q.fps	Thu Aug 15 03:36:37 2013 -0400
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=881
+#type=CACTVS-E_SCREEN/1.0 extended=2
+#software=CACTVS/unknown
+#source=CID_28434379.sdf
+#date=2012-02-03T13:08:39
+07ce04000000000000000000000000000080060000000c060000000000001a800f0000780008100000101487e9608c0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000	28434379
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/simsearch_on_tragets_and_q.tabular	Thu Aug 15 03:36:37 2013 -0400
@@ -0,0 +1,9 @@
+#Simsearch/1
+#num_bits=881
+#type=Tanimoto k=4 threshold=0.7
+#software=chemfp/1.0
+#queries=q.fps
+#targets=target.fps
+#query_sources=CID_28434379.sdf
+#target_sources=Desktop/3579363516810334491.sdf
+4	28434379	55091752	0.9684	55091466	0.9682	55091416	0.9682	55102353	0.9682
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/target.fps	Thu Aug 15 03:36:37 2013 -0400
@@ -0,0 +1,19 @@
+#FPS1
+#num_bits=881
+#type=CACTVS-E_SCREEN/1.0 extended=2
+#software=CACTVS/unknown
+#source=Desktop/3579363516810334491.sdf
+#date=2012-02-03T13:07:47
+07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000	55169009
+07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960cc0bed3248000580644626004101b4844805901b041c2e19111e45039b8b2926105609401313e40800000000000100200000040080000010000002000000000000	55079807
+07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01087e960cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000	3153534
+07ce0c000000000000000000000000000080060000000c000000000000001a800f0000780008100000103c87e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000	55168823
+07ce05000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000	55102353
+07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01487e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800003000000100200000040080000010000002000000000000	55091849
+07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000103c87e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000	55091752
+07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01487e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800001000000100200000040080000010000002000000000000	55091467
+07ce05000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000	55091466
+07ce05000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000	55091416
+03ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e960cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000	6499094
+03ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000	6485578
+07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01087e960cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2924101609401b13e40800001000000100200000040080000010000002000000000000	6485577
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Thu Aug 15 03:36:37 2013 -0400
@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="openbabel" version="2.3.2">
+        <repository changeset_revision="99a10425de93" name="package_openbabel_2_3" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="chemfp" version="1.1p1">
+        <repository changeset_revision="f2676c09c535" name="package_chemfp_1_1" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>