view nxn_clustering.xml @ 8:198b1e30c739 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit e78367b77f2294891914151f642685644d43a5b7"
author bgruening
date Tue, 10 Sep 2019 08:16:29 -0400
parents 0d88631bb7de
children 97899048dfa1
line wrap: on
line source

<tool id="ctb_chemfp_nxn_clustering" name="NxN clustering" version="1.5">
    <description>of molecular fingerprints</description>
    <requirements>
        <requirement type="package" version="1.5">chemfp</requirement>
        <requirement type="package" version="2.2.3">matplotlib</requirement>
        <requirement type="package" version="1.2.1">scipy</requirement>
        <requirement type="package" version="2.4.1">openbabel</requirement>
    </requirements>
    <command detect_errors="exit_code">
<![CDATA[
        python '$__tool_directory__/nxn_clustering.py'
            -i '$infile'
            -t $threshold
            #if str($output_files) in ['both', 'image']:
                --cluster '$image'
            #end if
            #if str($output_files) in ['both', 'matrix']:
                --smatrix '$similarity_matrix'
            #end if
            --oformat '$oformat'
]]>
    </command>
    <inputs>
        <param name="infile" type="data" format="fps" label="Fingerprint dataset" help="Dataset missing? See TIP below"/>
        <param name='threshold' label='Threshold' type='float' value='0.0' />
        <param name='oformat' type='select' format='text' label="Format of the resulting picture">
            <option value='png'>PNG</option>
            <option value='svg'>SVG</option>
        </param>
        <param name='output_files' type='select' format='text' label="Output options">
            <option value='both'>NxN matrix and image</option>
            <option value='image'>Image</option>
            <option value='matrix'>NxN ḿatrix</option>
        </param>

    </inputs>
    <outputs>
        <data name="image" format="svg" label="${tool.name} on ${on_string} - Dendrogram Image">
            <filter>output_files == "both" or output_files == "image"</filter>
            <change_format>
                <when input="oformat" value="png" format="png"/>
            </change_format>
        </data>
        <data name="similarity_matrix" format="txt" label="${tool.name} on ${on_string} - Similarity Matrix">
            <filter>output_files == "both" or output_files == "matrix"</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="infile" ftype="fps" value="targets.fps" />
            <param name='threshold' value='0.75' />
            <param name='oformat' value='svg' />
            <param name='output_files' value='matrix' />
            <output name="similarity_matrix" file='NxN_Clustering_on_q.txt' ftype="txt"/>
        </test>
    </tests>
    <help>
<![CDATA[

.. class:: infomark

**What this tool does**

Based on a set of fingerprints, generates a square self-similarity (NxN) matrix, as well as a dendrogram visualizing the clusters derived from it using hierarchical clustering. For the clustering and the fingerprint handling the chemfp_ project is used.

.. _chemfp: http://chemfp.com/

-----

.. class:: warningmark

**Hint**

The plotting of the dendrogram is sensible only with a small dataset - if more than around 20 fingerprints are used the plot will be unreadable.

-----

.. class:: infomark

**Input**

Molecular fingerprints in FPS format. Open Babel Fastsearch index is not supported.

* Example::

	-  fingerprints in FPS format

		#FPS1
		#num_bits=881
		#type=CACTVS-E_SCREEN/1.0 extended=2
		#software=CACTVS/unknown
		#source=/home/mohammed/galaxy-central/database/files/000/dataset_423.dat
		#date=2012-02-09T13:20:37
		07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960cc0bed3248000580644626004101b4844805901b041c2e
		19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000	55169009
		07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960cc0bed3248000580644626004101b4844805901b041c2e
		19111e45039b8b2926105609401313e40800000000000100200000040080000010000002000000000000	55079807
		........

	- Tanimoto threshold : 0.8 (between 0 and 1)

-----

.. class:: informark

**Output**

* Example::

	.. image:: $PATH_TO_IMAGES/NxN_clustering.png


]]>
    </help>
    <citations>
        <citation type="doi">10.1186/1758-2946-5-S1-P36</citation>
    </citations>
</tool>