Mercurial > repos > iuc > schicexplorer_schicclusterminhash

--- a/macros.xml	Thu Jan 23 16:04:57 2020 -0500
+++ b/macros.xml	Tue Mar 10 15:11:23 2020 -0400
@@ -1,6 +1,6 @@
 <macros>
     <token name="@THREADS@">\${GALAXY_SLOTS:-4}</token>
-    <token name="@WRAPPER_VERSION@">1</token>
+    <token name="@WRAPPER_VERSION@">4</token>

      <xml name="requirements">
         <requirements>
@@ -9,8 +9,8 @@
         </requirements>
         <version_command>@BINARY@ --version</version_command>
     </xml>
-    <xml name='matrix_mcooler_macro'>
-        <param name='matrix_mcooler' type="data" format="mcool"
+    <xml name='matrix_scooler_macro'>
+        <param name='matrix_scooler' type="data" format="scool"
             label="Matrix to compute on"/>
     </xml>
     <xml name='matrix_cooler_multiple_macro'>
@@ -18,7 +18,7 @@
             label="Matricies to compute on" multiple="true"/>
     </xml>
     <token name="@ESCAPE_IDENTIFIER_FASTQ@"><![CDATA[re.sub('[^\s\w\.]', '_', str($fastq.element_identifier))]]></token>
-    <token name="@ESCAPE_IDENTIFIER_MCOOL@"><![CDATA[re.sub('[^\s\w\.]', '_', str($matrix_mcooler.element_identifier))]]></token>
+    <token name="@ESCAPE_IDENTIFIER_SCOOL@"><![CDATA[re.sub('[^\s\w\.]', '_', str($matrix_scooler.element_identifier))]]></token>
     <token name="@ESCAPE_IDENTIFIER_M@"><![CDATA[re.sub('[^\s\w\-\.]', '_', str($m.element_identifier))]]></token>

     <xml name="citations">
--- a/scHicClusterMinHash.xml	Thu Jan 23 16:04:57 2020 -0500
+++ b/scHicClusterMinHash.xml	Tue Mar 10 15:11:23 2020 -0400
@@ -8,7 +8,7 @@
     <command detect_errors="exit_code"><![CDATA[
         @BINARY@

-        --matrix '$matrix_mcooler'
+        --matrix '$matrix_scooler'

         --numberOfClusters $numberOfClusters

@@ -24,6 +24,7 @@
         #end if

         --outFileName cluster_list.txt
+        --numberOfNearestNeighbors $numberOfNearestNeighbors

         --threads @THREADS@

@@ -32,7 +33,7 @@
     ]]></command>
     <inputs>

-        <expand macro="matrix_mcooler_macro"/>
+        <expand macro="matrix_scooler_macro"/>
         <param name="clusterMethod_selector" type="select" label="Cluster method:">
                 <option value="kmeans" selected="True">K-means</option>
                 <option value="spectral" >Spectral clustering</option>
@@ -40,6 +41,7 @@

         <param name="numberOfClusters" type="integer" value="7"  label="Number of clusters" help='How many clusters should be computed by kmeans or spectral clustering' />
         <param name="numberOfHashFunctions" type="integer" value="800"  label="Number of hash functions" help='How many hash functions the minHash algorithm uses.' />
+        <param name="numberOfNearestNeighbors" type="integer" value="100"  label="Number of nearest neighbors" help='How many nearest neighbors should be computed for the k-nn graph?' />

         <param name='chromosomes' type='text' label='List of chromosomes to consider' help='Please separate the chromosomes by space'/>
         <param name='exactModeMinhash' type='boolean' truevalue='--exactModeMinHash' label='The MinHash algorithm computes additional the exact euclidean distance.'/>
@@ -50,7 +52,7 @@
     </outputs>
     <tests>
         <test>
-            <param name='matrix_mcooler' value='test_matrix.mcool' />
+            <param name='matrix_scooler' value='test_matrix.scool' />
             <param name='clusterMethod_selector' value='kmeans' />
             <param name='numberOfClusters' value='3' />
             <param name='numberOfHashFunctions' value='800' />
@@ -58,7 +60,7 @@
             <output name="outFileName" file="scHicClusterMinHash/cluster_kmeans.txt" ftype="txt" compare="sim_size" delta="4000"/>
         </test>
         <test>
-            <param name='matrix_mcooler' value='test_matrix.mcool' />
+            <param name='matrix_scooler' value='test_matrix.scool' />
             <param name='clusterMethod_selector' value='spectral' />
             <param name='numberOfClusters' value='3' />
             <param name='numberOfHashFunctions' value='800' />
@@ -66,7 +68,7 @@
             <output name="outFileName" file="scHicClusterMinHash/cluster_spectral.txt" ftype="txt" compare="sim_size" delta="4000"/>
         </test>
         <test>
-            <param name='matrix_mcooler' value='test_matrix.mcool' />
+            <param name='matrix_scooler' value='test_matrix.scool' />
             <param name='clusterMethod_selector' value='kmeans' />
             <param name='numberOfClusters' value='3' />
             <param name='numberOfHashFunctions' value='800' />
@@ -75,7 +77,7 @@
             <output name="outFileName" file="scHicClusterMinHash/cluster_kmeans_exact.txt" ftype="txt" compare="sim_size" delta="4000"/>
         </test>
         <test>
-            <param name='matrix_mcooler' value='test_matrix.mcool' />
+            <param name='matrix_scooler' value='test_matrix.scool' />
             <param name='clusterMethod_selector' value='spectral' />
             <param name='numberOfClusters' value='3' />
             <param name='numberOfHashFunctions' value='800' />
@@ -92,10 +94,10 @@
 Clustering with dimension reduction via MinHash
 ===============================================

-scHicClusterMinHash uses kmeans or spectral clustering to associate each cell to a cluster and therefore to its cell cycle.
-The clustering is applied on dimension reduced data based on an approximate kNN search with the local sensitive hashing technique MinHash. This approach reduces the number of dimensions from samples * (number of bins)^2 to samples * samples.
-Please consider also the other clustering and dimension reduction approaches of the scHicExplorer suite. They can give you better results,
-can be faster or less memory demanding.
+scHicClusterMinHash uses kmeans or spectral clustering to associate each cell to a cluster and therefore to its cell cycle.
+The clustering is applied on dimension reduced data based on an approximate kNN search with the local sensitive hashing technique MinHash. This approach reduces the number of dimensions from samples * (number of bins)^2 to samples * samples. 	The clustering is applied on dimension reduced data based on an approximate kNN search with the local sensitive hashing technique MinHash. This approach reduces the number of dimensions from samples * (number of bins)^2 to samples * samples.
+Please consider also the other clustering and dimension reduction approaches of the scHicExplorer suite such as `scHicCluster`, `scHicClusterMinHash` and `scHicClusterSVL`. They can give you better results, 	Please consider also the other clustering and dimension reduction approaches of the scHicExplorer suite such as `scHicCluster`, `scHicClusterCompartments` and `scHicClusterSVL`. They can give you better results,
+can be faster or less memory demanding.	can be faster or less memory demanding.

 For more information about scHiCExplorer please consider our documentation on readthedocs.io_
Binary file test-data/scHicConsensusMatrices/consensus_matrix.mcool has changed
Binary file test-data/scHicConsensusMatrices/consensus_matrix.scool has changed
Binary file test-data/scHicCreateBulkMatrix/bulk_matrix.cool has changed
Binary file test-data/scHicMergeToMCool/Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToMCool/Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToMCool/Diploid_1_CGTACTAG_CGTCTAAT_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToMCool/Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToMCool/Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToMCool/Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToMCool/Diploid_1_CGTACTAG_TATCCTCT_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToMCool/Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToMCool/Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToMCool/Diploid_1_TAAGGCGA_CGTCTAAT_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToMCool/Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToMCool/Diploid_2_AAGAGGCA_AAGGAGTA_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToMCool/Diploid_2_AAGAGGCA_ACTGCATA_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToMCool/Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToMCool/Diploid_2_AAGAGGCA_CTAAGCCT_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToMCool/Diploid_2_AAGAGGCA_CTCTCTAT_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToMCool/Diploid_2_AAGAGGCA_GTAAGGAG_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToMCool/Diploid_2_AAGAGGCA_TATCCTCT_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToMCool/Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToMCool/Diploid_2_AGGCAGAA_AAGGAGTA_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToSCool/Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToSCool/Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToSCool/Diploid_1_CGTACTAG_CGTCTAAT_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToSCool/Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToSCool/Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToSCool/Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToSCool/Diploid_1_CGTACTAG_TATCCTCT_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToSCool/Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToSCool/Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToSCool/Diploid_1_TAAGGCGA_CGTCTAAT_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToSCool/Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToSCool/Diploid_2_AAGAGGCA_AAGGAGTA_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToSCool/Diploid_2_AAGAGGCA_ACTGCATA_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToSCool/Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToSCool/Diploid_2_AAGAGGCA_CTAAGCCT_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToSCool/Diploid_2_AAGAGGCA_CTCTCTAT_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToSCool/Diploid_2_AAGAGGCA_GTAAGGAG_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToSCool/Diploid_2_AAGAGGCA_TATCCTCT_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToSCool/Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz.cool has changed
Binary file test-data/scHicMergeToSCool/Diploid_2_AGGCAGAA_AAGGAGTA_R1fastqgz.cool has changed
Binary file test-data/scHicQualityControl/coverage.png has changed
Binary file test-data/scHicQualityControl/density.png has changed
--- a/test-data/scHicQualityControl/qc_report.txt	Thu Jan 23 16:04:57 2020 -0500
+++ b/test-data/scHicQualityControl/qc_report.txt	Tue Mar 10 15:11:23 2020 -0400
@@ -1,6 +1,6 @@
-# QC report for single-cell Hi-C data generated by scHiCExplorer 1-dev
+# QC report for single-cell Hi-C data generated by scHiCExplorer 4
 scHi-C sample contained 20 cells:
 Number of removed matrices containing bad chromosomes 0
-Number of removed matrices due to low read coverage (< 100000): 8
+Number of removed matrices due to low read coverage (< 100000): 10
 Number of removed matrices due to too many zero bins (< 0.001 density, within 30000000 relative genomic distance): 0
-12 samples passed the quality control. Please consider matrices with a low read coverage may be the matrices with a low density and overlap therefore.
\ No newline at end of file
+10 samples passed the quality control. Please consider matrices with a low read coverage may be the matrices with a low density and overlap therefore.
\ No newline at end of file
Binary file test-data/test_matrix.mcool has changed
Binary file test-data/test_matrix.scool has changed