Mercurial > repos > iuc > schicexplorer_schicclusterminhash
changeset 1:68648299ffc4 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/schicexplorer commit 72e1e90ac05a32dbd6fc675073429c0086048b18"
line wrap: on
line diff
--- a/macros.xml Thu Jan 23 16:04:57 2020 -0500 +++ b/macros.xml Tue Mar 10 15:11:23 2020 -0400 @@ -1,6 +1,6 @@ <macros> <token name="@THREADS@">\${GALAXY_SLOTS:-4}</token> - <token name="@WRAPPER_VERSION@">1</token> + <token name="@WRAPPER_VERSION@">4</token> <xml name="requirements"> <requirements> @@ -9,8 +9,8 @@ </requirements> <version_command>@BINARY@ --version</version_command> </xml> - <xml name='matrix_mcooler_macro'> - <param name='matrix_mcooler' type="data" format="mcool" + <xml name='matrix_scooler_macro'> + <param name='matrix_scooler' type="data" format="scool" label="Matrix to compute on"/> </xml> <xml name='matrix_cooler_multiple_macro'> @@ -18,7 +18,7 @@ label="Matricies to compute on" multiple="true"/> </xml> <token name="@ESCAPE_IDENTIFIER_FASTQ@"><![CDATA[re.sub('[^\s\w\.]', '_', str($fastq.element_identifier))]]></token> - <token name="@ESCAPE_IDENTIFIER_MCOOL@"><![CDATA[re.sub('[^\s\w\.]', '_', str($matrix_mcooler.element_identifier))]]></token> + <token name="@ESCAPE_IDENTIFIER_SCOOL@"><![CDATA[re.sub('[^\s\w\.]', '_', str($matrix_scooler.element_identifier))]]></token> <token name="@ESCAPE_IDENTIFIER_M@"><![CDATA[re.sub('[^\s\w\-\.]', '_', str($m.element_identifier))]]></token> <xml name="citations">
--- a/scHicClusterMinHash.xml Thu Jan 23 16:04:57 2020 -0500 +++ b/scHicClusterMinHash.xml Tue Mar 10 15:11:23 2020 -0400 @@ -8,7 +8,7 @@ <command detect_errors="exit_code"><![CDATA[ @BINARY@ - --matrix '$matrix_mcooler' + --matrix '$matrix_scooler' --numberOfClusters $numberOfClusters @@ -24,6 +24,7 @@ #end if --outFileName cluster_list.txt + --numberOfNearestNeighbors $numberOfNearestNeighbors --threads @THREADS@ @@ -32,7 +33,7 @@ ]]></command> <inputs> - <expand macro="matrix_mcooler_macro"/> + <expand macro="matrix_scooler_macro"/> <param name="clusterMethod_selector" type="select" label="Cluster method:"> <option value="kmeans" selected="True">K-means</option> <option value="spectral" >Spectral clustering</option> @@ -40,6 +41,7 @@ <param name="numberOfClusters" type="integer" value="7" label="Number of clusters" help='How many clusters should be computed by kmeans or spectral clustering' /> <param name="numberOfHashFunctions" type="integer" value="800" label="Number of hash functions" help='How many hash functions the minHash algorithm uses.' /> + <param name="numberOfNearestNeighbors" type="integer" value="100" label="Number of nearest neighbors" help='How many nearest neighbors should be computed for the k-nn graph?' /> <param name='chromosomes' type='text' label='List of chromosomes to consider' help='Please separate the chromosomes by space'/> <param name='exactModeMinhash' type='boolean' truevalue='--exactModeMinHash' label='The MinHash algorithm computes additional the exact euclidean distance.'/> @@ -50,7 +52,7 @@ </outputs> <tests> <test> - <param name='matrix_mcooler' value='test_matrix.mcool' /> + <param name='matrix_scooler' value='test_matrix.scool' /> <param name='clusterMethod_selector' value='kmeans' /> <param name='numberOfClusters' value='3' /> <param name='numberOfHashFunctions' value='800' /> @@ -58,7 +60,7 @@ <output name="outFileName" file="scHicClusterMinHash/cluster_kmeans.txt" ftype="txt" compare="sim_size" delta="4000"/> </test> <test> - <param name='matrix_mcooler' value='test_matrix.mcool' /> + <param name='matrix_scooler' value='test_matrix.scool' /> <param name='clusterMethod_selector' value='spectral' /> <param name='numberOfClusters' value='3' /> <param name='numberOfHashFunctions' value='800' /> @@ -66,7 +68,7 @@ <output name="outFileName" file="scHicClusterMinHash/cluster_spectral.txt" ftype="txt" compare="sim_size" delta="4000"/> </test> <test> - <param name='matrix_mcooler' value='test_matrix.mcool' /> + <param name='matrix_scooler' value='test_matrix.scool' /> <param name='clusterMethod_selector' value='kmeans' /> <param name='numberOfClusters' value='3' /> <param name='numberOfHashFunctions' value='800' /> @@ -75,7 +77,7 @@ <output name="outFileName" file="scHicClusterMinHash/cluster_kmeans_exact.txt" ftype="txt" compare="sim_size" delta="4000"/> </test> <test> - <param name='matrix_mcooler' value='test_matrix.mcool' /> + <param name='matrix_scooler' value='test_matrix.scool' /> <param name='clusterMethod_selector' value='spectral' /> <param name='numberOfClusters' value='3' /> <param name='numberOfHashFunctions' value='800' /> @@ -92,10 +94,10 @@ Clustering with dimension reduction via MinHash =============================================== -scHicClusterMinHash uses kmeans or spectral clustering to associate each cell to a cluster and therefore to its cell cycle. -The clustering is applied on dimension reduced data based on an approximate kNN search with the local sensitive hashing technique MinHash. This approach reduces the number of dimensions from samples * (number of bins)^2 to samples * samples. -Please consider also the other clustering and dimension reduction approaches of the scHicExplorer suite. They can give you better results, -can be faster or less memory demanding. +scHicClusterMinHash uses kmeans or spectral clustering to associate each cell to a cluster and therefore to its cell cycle. +The clustering is applied on dimension reduced data based on an approximate kNN search with the local sensitive hashing technique MinHash. This approach reduces the number of dimensions from samples * (number of bins)^2 to samples * samples. The clustering is applied on dimension reduced data based on an approximate kNN search with the local sensitive hashing technique MinHash. This approach reduces the number of dimensions from samples * (number of bins)^2 to samples * samples. +Please consider also the other clustering and dimension reduction approaches of the scHicExplorer suite such as `scHicCluster`, `scHicClusterMinHash` and `scHicClusterSVL`. They can give you better results, Please consider also the other clustering and dimension reduction approaches of the scHicExplorer suite such as `scHicCluster`, `scHicClusterCompartments` and `scHicClusterSVL`. They can give you better results, +can be faster or less memory demanding. can be faster or less memory demanding. For more information about scHiCExplorer please consider our documentation on readthedocs.io_
--- a/test-data/scHicQualityControl/qc_report.txt Thu Jan 23 16:04:57 2020 -0500 +++ b/test-data/scHicQualityControl/qc_report.txt Tue Mar 10 15:11:23 2020 -0400 @@ -1,6 +1,6 @@ -# QC report for single-cell Hi-C data generated by scHiCExplorer 1-dev +# QC report for single-cell Hi-C data generated by scHiCExplorer 4 scHi-C sample contained 20 cells: Number of removed matrices containing bad chromosomes 0 -Number of removed matrices due to low read coverage (< 100000): 8 +Number of removed matrices due to low read coverage (< 100000): 10 Number of removed matrices due to too many zero bins (< 0.001 density, within 30000000 relative genomic distance): 0 -12 samples passed the quality control. Please consider matrices with a low read coverage may be the matrices with a low density and overlap therefore. \ No newline at end of file +10 samples passed the quality control. Please consider matrices with a low read coverage may be the matrices with a low density and overlap therefore. \ No newline at end of file