view simsearch.xml @ 1:0892f7ced10c draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/simsearch commit d786052cd04f8b25eb4aff80b1b9724f62031b61
author bgruening
date Sat, 20 May 2017 13:06:07 -0400
parents 52e5c0f2907f
children 497254c6c09e
line wrap: on
line source

<tool id="ctb_simsearch" name="Similarity Search" version="0.2">
    <description>of fingerprint data sets</description>
    <requirements>
        <requirement type="package" version="1.1p1">chemfp</requirement>
    </requirements>
    <command>
<![CDATA[
        #if $method_opts.method_opts_selector == "chemfp":
            ln -s '${method_opts.query_opts.targets}' ./targets.fps &&

            #if $method_opts.query_opts.query_opts_selector == "normal":
                ln -s '${method_opts.query_opts.query}' ./query.fps &&
            #end if

            simsearch
                #if int($method_opts.knn) == 0:
                    #set $k = 'all'
                    ## count is only available if k nearest neighbor search is disabled
                    $method_opts.counts
                #else:
                    #set $k = int($method_opts.knn)
                #end if

                -k $k
                --threshold $method_opts.threshold
                -o ./output.fps

                ## build and search an in-memory data structure (faster for multiple queries)
                --memory

                #if $method_opts.query_opts.query_opts_selector == "normal":
                    -q ./query.fps
                #else:
                    --NxN
                #end if

                ./targets.fps
                &&
                mv ./output.fps '${outfile}'
        #else:
            ## OpenBabel needs the original molecule file (molecule.'fileformat') next to the fastsearch index (molecule.fs). We use a composite datatype to accomplish that.
            ## Furthermore OpenBabel is really picky with file extensions. We need to specify every datatype. I did not find a solution to specify the query-filetype.
            ## A workaround is to create a symlink with a proper file-extension.
            ln -s '$method_opts.query' ./temp_query.$method_opts.query.ext
            obabel 
                -i fs '$method_opts.fastsearch.files_path/molecule.fs'
                -S ./temp_query.$method_opts.query.ext
                -at${method_opts.threshold}
                -O '${outfile}'
                -osmi
                -aa
        #end if
]]>
    </command>
    <inputs>
        <conditional name="method_opts">
            <param name="method_opts_selector" type="select" label="Subject database/sequences">
              <option value="chemfp">Chemfp fingerprint file</option>
              <option value="obabel">OpenBabel Fastsearch Index</option>
            </param>
            <when value="chemfp">
                <conditional name="query_opts">
                    <param name="query_opts_selector" type="select" label="Query Mode">
                      <option value="normal">Query molecules are stores in a separate file</option>
                      <option value="nxn">Target molecules are also queries (NxN)</option>
                    </param>
                    <when value="normal">
                        <param name='query' type='data' format="fps" label='Query molecules'/>
                        <param name='targets' type='data' format="fps" label='Target molecules'/>
                    </when>
                    <when value="nxn">
                        <param name='targets' type='data' format="fps" label='Target moleculs'/>
                    </when>
                </conditional>
                <param name='knn' type='integer' value='0' label='select the k nearest neighbors' help='0 means all neighbors'>
                   <validator type="in_range" min="0" />
                </param>
                <param name='threshold' type='float' value='0.7' label='threshold' />
                <param name="counts" type="boolean" truevalue="-c" falsevalue="" checked="false" label="report counts (-c)" help="Is ignored if k nearest neighbor search is enabled" />
            </when>
            <when value="obabel">
                <param name="query" type='data' format="smi,mol,sdf,inchi" label="query"/>
                <param name="fastsearch" type='data' format="obfs" label="OpenBabel Fastsearch Index"/>
                <param name="threshold" type='float' label="threshold" value='0.7'/>
            </when>
        </conditional>

    </inputs>
    <outputs>
        <data name="outfile" format="tabular" />
    </outputs>
    <tests>
        <test>
            <param name="targets" ftype="fps" value="targets.fps"/>
            <param name="query" ftype="fps" value="q.fps"/>
            <param name="k" value='4'/>
            <param name="th" value='0.7'/>
            <output name="outfile" ftype="tabular" file="simsearch_on_tragets_and_q.tabular"/>
        </test>
    </tests>
    <help>
<![CDATA[


.. class:: infomark

**What this tool does**

Similarity searches using a variety of different fingerprints using either the chemfp_ FPS type or the Open Babel FastSearch_ index.

.. _chemfp: http://chemfp.com/
.. _FastSearch: http://openbabel.org/wiki/FastSearch


]]>
    </help>
    <citations>
        <citation type="doi">10.1186/1758-2946-3-33</citation>
        <citation type="doi">10.1186/1758-2946-5-S1-P36</citation>
    </citations>
</tool>