view scHicClusterMinHash.xml @ 0:1c2e79e9311a draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/schicexplorer commit 2a80f777c0221752232882c0d43b55f2b1dcd223"
author iuc
date Thu, 23 Jan 2020 16:04:57 -0500
parents
children 68648299ffc4
line wrap: on
line source

<tool id="schicexplorer_schicclusterminhash" name="@BINARY@" version="@WRAPPER_VERSION@.0">
    <description>clusters single-cell Hi-C interaction matrices with MinHash dimension reduction</description>
    <macros>
        <token name="@BINARY@">scHicClusterMinHash</token>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
        @BINARY@

        --matrix '$matrix_mcooler'

        --numberOfClusters $numberOfClusters

        --clusterMethod $clusterMethod_selector
        --numberOfHashFunctions $numberOfHashFunctions
        #if $chromosomes:
            #set $chromosome = ' '.join([ '\'%s\'' % $chrom for $chrom in str($chromosomes).split(' ') ])
            --chromosomes $chromosome
        #end if

        #if $exactModeMinhash:
            $exactModeMinhash
        #end if

        --outFileName cluster_list.txt

        --threads @THREADS@



    ]]></command>
    <inputs>
        
        <expand macro="matrix_mcooler_macro"/>
        <param name="clusterMethod_selector" type="select" label="Cluster method:">
                <option value="kmeans" selected="True">K-means</option>
                <option value="spectral" >Spectral clustering</option>
        </param>

        <param name="numberOfClusters" type="integer" value="7"  label="Number of clusters" help='How many clusters should be computed by kmeans or spectral clustering' />   
        <param name="numberOfHashFunctions" type="integer" value="800"  label="Number of hash functions" help='How many hash functions the minHash algorithm uses.' />   

        <param name='chromosomes' type='text' label='List of chromosomes to consider' help='Please separate the chromosomes by space'/>
        <param name='exactModeMinhash' type='boolean' truevalue='--exactModeMinHash' label='The MinHash algorithm computes additional the exact euclidean distance.'/>

    </inputs>
    <outputs>
        <data name="outFileName" from_work_dir="cluster_list.txt" format="txt" label="${tool.name} on ${on_string}: Cluster results"/>
    </outputs>
    <tests>
        <test>
            <param name='matrix_mcooler' value='test_matrix.mcool' />
            <param name='clusterMethod_selector' value='kmeans' />
            <param name='numberOfClusters' value='3' />
            <param name='numberOfHashFunctions' value='800' />

            <output name="outFileName" file="scHicClusterMinHash/cluster_kmeans.txt" ftype="txt" compare="sim_size" delta="4000"/>
        </test>
        <test>
            <param name='matrix_mcooler' value='test_matrix.mcool' />
            <param name='clusterMethod_selector' value='spectral' />
            <param name='numberOfClusters' value='3' />
            <param name='numberOfHashFunctions' value='800' />

            <output name="outFileName" file="scHicClusterMinHash/cluster_spectral.txt" ftype="txt" compare="sim_size" delta="4000"/>
        </test>
        <test>
            <param name='matrix_mcooler' value='test_matrix.mcool' />
            <param name='clusterMethod_selector' value='kmeans' />
            <param name='numberOfClusters' value='3' />
            <param name='numberOfHashFunctions' value='800' />
            <param name='exactModeMinhash' value='true' />
            
            <output name="outFileName" file="scHicClusterMinHash/cluster_kmeans_exact.txt" ftype="txt" compare="sim_size" delta="4000"/>
        </test>
        <test>
            <param name='matrix_mcooler' value='test_matrix.mcool' />
            <param name='clusterMethod_selector' value='spectral' />
            <param name='numberOfClusters' value='3' />
            <param name='numberOfHashFunctions' value='800' />
            <param name='chromosomes' value='chr1 chr2' />

            
            <output name="outFileName" file="scHicClusterMinHash/cluster_spectral_chromosomes.txt" ftype="txt" compare="sim_size" delta="4000"/>
        </test>

        
    </tests>
    <help><![CDATA[

Clustering with dimension reduction via MinHash
===============================================

scHicClusterMinHash uses kmeans or spectral clustering to associate each cell to a cluster and therefore to its cell cycle. 
The clustering is applied on dimension reduced data based on an approximate kNN search with the local sensitive hashing technique MinHash. This approach reduces the number of dimensions from samples * (number of bins)^2 to samples * samples. 
Please consider also the other clustering and dimension reduction approaches of the scHicExplorer suite. They can give you better results, 
can be faster or less memory demanding.

For more information about scHiCExplorer please consider our documentation on readthedocs.io_

.. _readthedocs.io: http://schicexplorer.readthedocs.io/
]]></help>
    <expand macro="citations" />

</tool>