view galaxy_stubs/FingerprintSimilaritySearch.xml @ 2:605370bc1def draft default tip

Uploaded
author luis
date Tue, 12 Jul 2016 12:33:33 -0400
parents
children
line wrap: on
line source

<?xml version='1.0' encoding='UTF-8'?>
<!--This is a configuration file for the integration of a tools into Galaxy (https://galaxyproject.org/). This file was automatically generated using CTD2Galaxy.-->
<!--Proposed Tool Section: [Chemoinformatics]-->
<tool id="FingerprintSimilaritySearch" name="FingerprintSimilaritySearch" version="1.1.0">
  <description>calculate similar molecules in a library</description>
  <macros>
    <token name="@EXECUTABLE@">FingerprintSimilaritySearch</token>
    <import>macros.xml</import>
  </macros>
  <expand macro="stdio"/>
  <expand macro="requirements"/>
  <command>FingerprintSimilaritySearch

#if $param_t:
  -t $param_t
#end if
#if $param_q:
  -q $param_q
#end if
#if $param_o:
  -o $param_o
#end if
#if $param_f:
  -f $param_f
#end if
#if $param_fp_col:
  -fp_col $param_fp_col
#end if
#if $param_id_col:
  -id_col $param_id_col
#end if
#if $param_fp_tag:
  -fp_tag     "$param_fp_tag"
#end if
#if $param_id_tag:
  -id_tag     "$param_id_tag"
#end if
#if $param_tc:
  -tc $param_tc
#end if
#if $param_nt:
  -nt     "$param_nt"
#end if
#if $param_bs:
  -bs $param_bs
#end if
#if $param_sdf_out:
  -sdf_out $param_sdf_out
#end if
</command>
  <inputs>
    <param name="param_t" type="data" format="smi.gz,csv,sdf.gz,sdf,txt.gz,smi,txt,csv.gz" optional="False" value="&lt;class 'CTDopts.CTDopts._Null'&gt;" label="Target library input file" help="(-t) "/>
    <param name="param_q" type="data" format="smi.gz,csv,sdf.gz,sdf,txt.gz,smi,txt,csv.gz" optional="False" value="&lt;class 'CTDopts.CTDopts._Null'&gt;" label="Query library input file" help="(-q) "/>
    <param name="param_f" type="integer" min="1" max="2" optional="False" value="0" label="Fingerprint format [1 = binary bitstring, 2 = comma separated feature list]" help="(-f) "/>
    <param name="param_fp_col" type="integer" value="-1" label="Column number for comma separated smiles input which contains the fingerprint" help="(-fp_col) "/>
    <param name="param_id_col" type="integer" value="-1" label="Column number for comma separated smiles input which contains the molecule identifie" help="(-id_col) "/>
    <param name="param_fp_tag" type="text" size="30" value=" " label="Tag name for SDF input which contains the fingerprint" help="(-fp_tag) ">
      <sanitizer>
        <valid initial="string.printable">
          <remove value="'"/>
          <remove value="&quot;"/>
        </valid>
      </sanitizer>
    </param>
    <param name="param_id_tag" type="text" size="30" value=" " label="Tag name for SDF input which contains the molecule identifie" help="(-id_tag) ">
      <sanitizer>
        <valid initial="string.printable">
          <remove value="'"/>
          <remove value="&quot;"/>
        </valid>
      </sanitizer>
    </param>
    <param name="param_tc" type="float" value="0.7" label="Tanimoto cutoff [default: 0.7]" help="(-tc) "/>
    <param name="param_nt" type="text" size="30" value="1" label="Number of parallel threads to use" help="(-nt) To use all possible threads enter &lt;max&gt; [default: 1]">
      <sanitizer>
        <valid initial="string.printable">
          <remove value="'"/>
          <remove value="&quot;"/>
        </valid>
      </sanitizer>
    </param>
    <param name="param_bs" type="integer" value="500" label="Block size [default: 500]" help="(-bs) "/>
    <param name="param_sdf_out" type="integer" min="0" max="1" optional="True" value="0" label="If query file has SD format, this flag activates writing of nearest neighbours as a new CSV tag in a copy of the query SD file" help="(-sdf_out) "/>
  </inputs>
  <expand macro="advanced_options"/>
  <outputs>
    <data name="param_o" metadata_source="param_t" format="input"/>
  </outputs>
  <help>This tool calculates all nearest neighbours above a similarity cutoff for given query molecules in a compound library on the basis of 2D binary fingerprints.
The first library to specify (i1) is the compound library to be searched, the second library (i2) is conseiderd as the query compounds.
Both files have to be comma separated values (csv) files and the binary fingerprints have to be encoded as feature lists or as binary bit strings.

WARNING: If similarity cutoff is chosen to be 0.0, the output will be the entire similarity matrix and has a size of n*m with n=|i1| and m=|i2|. 

======================================================================================================================================================

Examples:

$ FingerprintSimilaritySearch -t target.sdf -q query.sdf -o results -fp_tag FPRINT -f 1 -id_tag NAME
  tries to extract fingerprints as binary bitstrings (-f 1) from tag &lt;FPRINT&gt; and compound IDs from tag &lt;NAME&gt; of target.sdf and query.sdf.
  A similarity search is performed for all query molecules against all target molecules and pairs with similarity above Tanimoto cutoff 0.7 are written to outfile (results).

$ FingerprintSimilaritySearch -t target.sdf -q query.sdf -o results -fp_tag FPRINT -f 1 -id_tag NAME -sdf_out
  tries to extract fingerprints as binary bitstrings (-f 1) from tag &lt;FPRINT&gt; and compound IDs from tag &lt;NAME&gt; of target.sdf and query.sdf.
  A similarity search is performed for all query molecules against all target molecules and pairs with similarity above Tanimoto cutoff 0.7
  are added as a new SD tag to output file 'NN_TAGGED_query.sdf' as a list of TargetID:Similarity pairs.

$ FingerprintSimilaritySearch -t target.sdf -q query.smi -o results -fp_tag FPRINT -f 1 -id_tag NAME -fp_col 2
  tries to extract fingerprints as binary bitstrings (-f 1) from tag &lt;FPRINT&gt; and compound IDs from tag &lt;NAME&gt; of target.sdf
  and fingerprints as binary bitstrings of space separated query file from column 2 (-fp_col 2).
  A similarity search is performed for all query molecules against all target molecules and pairs with similarity above Tanimoto cutoff 0.7 are written to outfile (results).

</help>
</tool>