diff scHicClusterMinHash.xml @ 0:1c2e79e9311a draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/schicexplorer commit 2a80f777c0221752232882c0d43b55f2b1dcd223"
author iuc
date Thu, 23 Jan 2020 16:04:57 -0500
parents
children 68648299ffc4
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scHicClusterMinHash.xml	Thu Jan 23 16:04:57 2020 -0500
@@ -0,0 +1,106 @@
+<tool id="schicexplorer_schicclusterminhash" name="@BINARY@" version="@WRAPPER_VERSION@.0">
+    <description>clusters single-cell Hi-C interaction matrices with MinHash dimension reduction</description>
+    <macros>
+        <token name="@BINARY@">scHicClusterMinHash</token>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code"><![CDATA[
+        @BINARY@
+
+        --matrix '$matrix_mcooler'
+
+        --numberOfClusters $numberOfClusters
+
+        --clusterMethod $clusterMethod_selector
+        --numberOfHashFunctions $numberOfHashFunctions
+        #if $chromosomes:
+            #set $chromosome = ' '.join([ '\'%s\'' % $chrom for $chrom in str($chromosomes).split(' ') ])
+            --chromosomes $chromosome
+        #end if
+
+        #if $exactModeMinhash:
+            $exactModeMinhash
+        #end if
+
+        --outFileName cluster_list.txt
+
+        --threads @THREADS@
+
+
+
+    ]]></command>
+    <inputs>
+        
+        <expand macro="matrix_mcooler_macro"/>
+        <param name="clusterMethod_selector" type="select" label="Cluster method:">
+                <option value="kmeans" selected="True">K-means</option>
+                <option value="spectral" >Spectral clustering</option>
+        </param>
+
+        <param name="numberOfClusters" type="integer" value="7"  label="Number of clusters" help='How many clusters should be computed by kmeans or spectral clustering' />   
+        <param name="numberOfHashFunctions" type="integer" value="800"  label="Number of hash functions" help='How many hash functions the minHash algorithm uses.' />   
+
+        <param name='chromosomes' type='text' label='List of chromosomes to consider' help='Please separate the chromosomes by space'/>
+        <param name='exactModeMinhash' type='boolean' truevalue='--exactModeMinHash' label='The MinHash algorithm computes additional the exact euclidean distance.'/>
+
+    </inputs>
+    <outputs>
+        <data name="outFileName" from_work_dir="cluster_list.txt" format="txt" label="${tool.name} on ${on_string}: Cluster results"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name='matrix_mcooler' value='test_matrix.mcool' />
+            <param name='clusterMethod_selector' value='kmeans' />
+            <param name='numberOfClusters' value='3' />
+            <param name='numberOfHashFunctions' value='800' />
+
+            <output name="outFileName" file="scHicClusterMinHash/cluster_kmeans.txt" ftype="txt" compare="sim_size" delta="4000"/>
+        </test>
+        <test>
+            <param name='matrix_mcooler' value='test_matrix.mcool' />
+            <param name='clusterMethod_selector' value='spectral' />
+            <param name='numberOfClusters' value='3' />
+            <param name='numberOfHashFunctions' value='800' />
+
+            <output name="outFileName" file="scHicClusterMinHash/cluster_spectral.txt" ftype="txt" compare="sim_size" delta="4000"/>
+        </test>
+        <test>
+            <param name='matrix_mcooler' value='test_matrix.mcool' />
+            <param name='clusterMethod_selector' value='kmeans' />
+            <param name='numberOfClusters' value='3' />
+            <param name='numberOfHashFunctions' value='800' />
+            <param name='exactModeMinhash' value='true' />
+            
+            <output name="outFileName" file="scHicClusterMinHash/cluster_kmeans_exact.txt" ftype="txt" compare="sim_size" delta="4000"/>
+        </test>
+        <test>
+            <param name='matrix_mcooler' value='test_matrix.mcool' />
+            <param name='clusterMethod_selector' value='spectral' />
+            <param name='numberOfClusters' value='3' />
+            <param name='numberOfHashFunctions' value='800' />
+            <param name='chromosomes' value='chr1 chr2' />
+
+            
+            <output name="outFileName" file="scHicClusterMinHash/cluster_spectral_chromosomes.txt" ftype="txt" compare="sim_size" delta="4000"/>
+        </test>
+
+        
+    </tests>
+    <help><![CDATA[
+
+Clustering with dimension reduction via MinHash
+===============================================
+
+scHicClusterMinHash uses kmeans or spectral clustering to associate each cell to a cluster and therefore to its cell cycle. 
+The clustering is applied on dimension reduced data based on an approximate kNN search with the local sensitive hashing technique MinHash. This approach reduces the number of dimensions from samples * (number of bins)^2 to samples * samples. 
+Please consider also the other clustering and dimension reduction approaches of the scHicExplorer suite. They can give you better results, 
+can be faster or less memory demanding.
+
+For more information about scHiCExplorer please consider our documentation on readthedocs.io_
+
+.. _readthedocs.io: http://schicexplorer.readthedocs.io/
+]]></help>
+    <expand macro="citations" />
+
+</tool>