Mercurial > repos > iuc > clustering_from_distmat
diff clustering_from_distmat.xml @ 1:c0b01c55a0e0 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
author | iuc |
---|---|
date | Mon, 19 Aug 2024 15:33:16 +0000 |
parents | 8192b416f945 |
children | f8ee933de3ca |
line wrap: on
line diff
--- a/clustering_from_distmat.xml Thu Aug 08 19:34:36 2024 +0000 +++ b/clustering_from_distmat.xml Mon Aug 19 15:33:16 2024 +0000 @@ -1,5 +1,11 @@ -<tool id="clustering_from_distmat" name="Distance matrix-based hierarchical clustering" version="1.0" profile="23.0"> +<tool id="clustering_from_distmat" name="Distance matrix-based hierarchical clustering" version="1.1" profile="23.0"> <description>using Scipy</description> + <macros> + <xml name="cluster_assignment_options"> + <param name="min_cluster_size" type="integer" value="2" min="1" label="Mask clusters with less than this number of samples" help="Samples assigned to clusters smaller than this threshold will have '-' in the corresponding cluster ID column" /> + <param name="generate_dendrogram" type="boolean" label="Produce also the dendrogram of clustering results" /> + </xml> + </macros> <edam_topics> <edam_topic>topic_2269</edam_topic> <edam_topic>topic_0084</edam_topic> @@ -16,11 +22,15 @@ '$distmat' result --method $method + $missing_names #if str($cluster_assignment.select) == 'n-cluster': --n-clusters $cluster_assignment.n_cluster #elif str($cluster_assignment.select) == 'height': --height $cluster_assignment.height #end if + #if str($cluster_assignment.select) != 'dendrogram-only' and $cluster_assignment.min_cluster_size != 2: + --min-cluster-size $cluster_assignment.min_cluster_size + #end if ]]></command> <inputs> <param name="distmat" type="data" format="tabular" label="Distance matrix" /> @@ -33,6 +43,11 @@ <option value="median">WPGMC (scipy 'median' method)</option> <option value="ward">Ward/Incremental (scipy 'ward' method)</option> </param> + <param name="missing_names" type="select" label="How does the input specify sample names?"> + <option value="">First line and first column specify sample names (fully symmetric input)</option> + <option value="--nr">First line specifies sample names, subsequent lines only data</option> + <option value="--nc">Each line specifies sample name in first column, first line is not special</option> + </param> <conditional name="cluster_assignment"> <param name="select" type="select" label="Generate cluster assignments?"> <option value="dendrogram-only">No, just generate the dendrogram of clustering results</option> @@ -42,11 +57,11 @@ <when value="dendrogram-only" /> <when value="n-cluster"> <param name="n_cluster" type="integer" value="5" min="1" label="How many clusters to divide into?" /> - <param name="generate_dendrogram" type="boolean" label="Produce also the dendrogram of clustering results" /> + <expand macro="cluster_assignment_options" /> </when> <when value="height"> <param name="height" type="float" value="5.0" label="Distance threshold for clusters to be reported" /> - <param name="generate_dendrogram" type="boolean" label="Produce also the dendrogram of clustering results" /> + <expand macro="cluster_assignment_options" /> </when> </conditional> </inputs> @@ -74,14 +89,46 @@ <conditional name="cluster_assignment"> <param name="select" value="height" /> <param name="height" value="18" /> + <param name="min_cluster_size" value="1" /> </conditional> <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_h18.tsv" /> </test> + <test expect_num_outputs="1"> + <param name="distmat" value="test_matrix.tsv"/> + <conditional name="cluster_assignment"> + <param name="select" value="height" /> + <param name="height" value="18" /> + </conditional> + <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_h18_s2.tsv" /> + </test> <test expect_num_outputs="2"> <param name="distmat" value="test_matrix.tsv"/> <conditional name="cluster_assignment"> <param name="select" value="n-cluster" /> <param name="n_cluster" value="4" /> + <param name="min_cluster_size" value="1" /> + <param name="generate_dendrogram" value="true" /> + </conditional> + <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_n4.tsv" /> + </test> + <test expect_num_outputs="2"> + <param name="distmat" value="test_matrix_nr.tsv" /> + <param name="missing_names" value="--nr" /> + <conditional name="cluster_assignment"> + <param name="select" value="n-cluster" /> + <param name="n_cluster" value="4" /> + <param name="min_cluster_size" value="1" /> + <param name="generate_dendrogram" value="true" /> + </conditional> + <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_n4.tsv" /> + </test> + <test expect_num_outputs="2"> + <param name="distmat" value="test_matrix_nc.tsv" /> + <param name="missing_names" value="--nc" /> + <conditional name="cluster_assignment"> + <param name="select" value="n-cluster" /> + <param name="n_cluster" value="4" /> + <param name="min_cluster_size" value="1" /> <param name="generate_dendrogram" value="true" /> </conditional> <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_n4.tsv" /> @@ -95,7 +142,7 @@ This tool lets you perform hierarchical clustering of samples using the `scipy.cluster.hierarchy.linkage`_ function and any of the clustering methods supported by it. -As input it expects a symmetrical distance matrix with sample names on the first row and in the first column. +As input it expects a symmetrical distance matrix with sample names on the first row and/or in the first column. The clustering result can be reported in the form of a dendrogram in newick format.