diff clustering_from_distmat.xml @ 1:c0b01c55a0e0 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
author iuc
date Mon, 19 Aug 2024 15:33:16 +0000
parents 8192b416f945
children f8ee933de3ca
line wrap: on
line diff
--- a/clustering_from_distmat.xml	Thu Aug 08 19:34:36 2024 +0000
+++ b/clustering_from_distmat.xml	Mon Aug 19 15:33:16 2024 +0000
@@ -1,5 +1,11 @@
-<tool id="clustering_from_distmat" name="Distance matrix-based hierarchical clustering" version="1.0" profile="23.0">
+<tool id="clustering_from_distmat" name="Distance matrix-based hierarchical clustering" version="1.1" profile="23.0">
     <description>using Scipy</description>
+    <macros>
+        <xml name="cluster_assignment_options">
+            <param name="min_cluster_size" type="integer" value="2" min="1" label="Mask clusters with less than this number of samples" help="Samples assigned to clusters smaller than this threshold will have '-' in the corresponding cluster ID column" />
+            <param name="generate_dendrogram" type="boolean" label="Produce also the dendrogram of clustering results" />
+        </xml>
+    </macros>
     <edam_topics>
         <edam_topic>topic_2269</edam_topic>
         <edam_topic>topic_0084</edam_topic>
@@ -16,11 +22,15 @@
   '$distmat'
   result
   --method $method
+  $missing_names
   #if str($cluster_assignment.select) == 'n-cluster':
     --n-clusters $cluster_assignment.n_cluster
   #elif str($cluster_assignment.select) == 'height':
     --height $cluster_assignment.height
   #end if
+  #if str($cluster_assignment.select) != 'dendrogram-only' and $cluster_assignment.min_cluster_size != 2:
+    --min-cluster-size $cluster_assignment.min_cluster_size
+  #end if
     ]]></command>
     <inputs>
         <param name="distmat" type="data" format="tabular" label="Distance matrix" />
@@ -33,6 +43,11 @@
             <option value="median">WPGMC (scipy 'median' method)</option>
             <option value="ward">Ward/Incremental (scipy 'ward' method)</option>
         </param>
+        <param name="missing_names" type="select" label="How does the input specify sample names?">
+            <option value="">First line and first column specify sample names (fully symmetric input)</option>
+            <option value="--nr">First line specifies sample names, subsequent lines only data</option>
+            <option value="--nc">Each line specifies sample name in first column, first line is not special</option>
+        </param>
         <conditional name="cluster_assignment">
             <param name="select" type="select" label="Generate cluster assignments?">
                 <option value="dendrogram-only">No, just generate the dendrogram of clustering results</option>
@@ -42,11 +57,11 @@
             <when value="dendrogram-only" />
             <when value="n-cluster">
                 <param name="n_cluster" type="integer" value="5" min="1" label="How many clusters to divide into?" />
-                <param name="generate_dendrogram" type="boolean" label="Produce also the dendrogram of clustering results" />
+                <expand macro="cluster_assignment_options" />
             </when>
             <when value="height">
                 <param name="height" type="float" value="5.0" label="Distance threshold for clusters to be reported" />
-                <param name="generate_dendrogram" type="boolean" label="Produce also the dendrogram of clustering results" />
+                <expand macro="cluster_assignment_options" />
             </when>
         </conditional>
     </inputs>
@@ -74,14 +89,46 @@
             <conditional name="cluster_assignment">
                 <param name="select" value="height" />
                 <param name="height" value="18" />
+                <param name="min_cluster_size" value="1" />
             </conditional>
             <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_h18.tsv" />
         </test>
+        <test expect_num_outputs="1">
+            <param name="distmat" value="test_matrix.tsv"/>
+            <conditional name="cluster_assignment">
+                <param name="select" value="height" />
+                <param name="height" value="18" />
+            </conditional>
+            <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_h18_s2.tsv" />
+        </test>
         <test expect_num_outputs="2">
             <param name="distmat" value="test_matrix.tsv"/>
             <conditional name="cluster_assignment">
                 <param name="select" value="n-cluster" />
                 <param name="n_cluster" value="4" />
+                <param name="min_cluster_size" value="1" />
+                <param name="generate_dendrogram" value="true" />
+            </conditional>
+            <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_n4.tsv" />
+        </test>
+        <test expect_num_outputs="2">
+            <param name="distmat" value="test_matrix_nr.tsv" />
+            <param name="missing_names" value="--nr" />
+            <conditional name="cluster_assignment">
+                <param name="select" value="n-cluster" />
+                <param name="n_cluster" value="4" />
+                <param name="min_cluster_size" value="1" />
+                <param name="generate_dendrogram" value="true" />
+            </conditional>
+            <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_n4.tsv" />
+        </test>
+        <test expect_num_outputs="2">
+            <param name="distmat" value="test_matrix_nc.tsv" />
+            <param name="missing_names" value="--nc" />
+            <conditional name="cluster_assignment">
+                <param name="select" value="n-cluster" />
+                <param name="n_cluster" value="4" />
+                <param name="min_cluster_size" value="1" />
                 <param name="generate_dendrogram" value="true" />
             </conditional>
             <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_n4.tsv" />
@@ -95,7 +142,7 @@
 
 This tool lets you perform hierarchical clustering of samples using the `scipy.cluster.hierarchy.linkage`_ function and any of the clustering methods supported by it.
 
-As input it expects a symmetrical distance matrix with sample names on the first row and in the first column.
+As input it expects a symmetrical distance matrix with sample names on the first row and/or in the first column.
 
 The clustering result can be reported in the form of a dendrogram in newick format.