comparison clustering_from_distmat.xml @ 1:c0b01c55a0e0 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
author iuc
date Mon, 19 Aug 2024 15:33:16 +0000
parents 8192b416f945
children f8ee933de3ca
comparison
equal deleted inserted replaced
0:8192b416f945 1:c0b01c55a0e0
1 <tool id="clustering_from_distmat" name="Distance matrix-based hierarchical clustering" version="1.0" profile="23.0"> 1 <tool id="clustering_from_distmat" name="Distance matrix-based hierarchical clustering" version="1.1" profile="23.0">
2 <description>using Scipy</description> 2 <description>using Scipy</description>
3 <macros>
4 <xml name="cluster_assignment_options">
5 <param name="min_cluster_size" type="integer" value="2" min="1" label="Mask clusters with less than this number of samples" help="Samples assigned to clusters smaller than this threshold will have '-' in the corresponding cluster ID column" />
6 <param name="generate_dendrogram" type="boolean" label="Produce also the dendrogram of clustering results" />
7 </xml>
8 </macros>
3 <edam_topics> 9 <edam_topics>
4 <edam_topic>topic_2269</edam_topic> 10 <edam_topic>topic_2269</edam_topic>
5 <edam_topic>topic_0084</edam_topic> 11 <edam_topic>topic_0084</edam_topic>
6 </edam_topics> 12 </edam_topics>
7 <edam_operations> 13 <edam_operations>
14 <command detect_errors="exit_code"><![CDATA[ 20 <command detect_errors="exit_code"><![CDATA[
15 python '$__tool_directory__/clustering_from_distmat.py' 21 python '$__tool_directory__/clustering_from_distmat.py'
16 '$distmat' 22 '$distmat'
17 result 23 result
18 --method $method 24 --method $method
25 $missing_names
19 #if str($cluster_assignment.select) == 'n-cluster': 26 #if str($cluster_assignment.select) == 'n-cluster':
20 --n-clusters $cluster_assignment.n_cluster 27 --n-clusters $cluster_assignment.n_cluster
21 #elif str($cluster_assignment.select) == 'height': 28 #elif str($cluster_assignment.select) == 'height':
22 --height $cluster_assignment.height 29 --height $cluster_assignment.height
30 #end if
31 #if str($cluster_assignment.select) != 'dendrogram-only' and $cluster_assignment.min_cluster_size != 2:
32 --min-cluster-size $cluster_assignment.min_cluster_size
23 #end if 33 #end if
24 ]]></command> 34 ]]></command>
25 <inputs> 35 <inputs>
26 <param name="distmat" type="data" format="tabular" label="Distance matrix" /> 36 <param name="distmat" type="data" format="tabular" label="Distance matrix" />
27 <param name="method" type="select" label="Clustering method"> 37 <param name="method" type="select" label="Clustering method">
31 <option value="weighted">WPGMA (scipy 'weighted' method)</option> 41 <option value="weighted">WPGMA (scipy 'weighted' method)</option>
32 <option value="centroid">UPGMC (scipy 'centroid' method)</option> 42 <option value="centroid">UPGMC (scipy 'centroid' method)</option>
33 <option value="median">WPGMC (scipy 'median' method)</option> 43 <option value="median">WPGMC (scipy 'median' method)</option>
34 <option value="ward">Ward/Incremental (scipy 'ward' method)</option> 44 <option value="ward">Ward/Incremental (scipy 'ward' method)</option>
35 </param> 45 </param>
46 <param name="missing_names" type="select" label="How does the input specify sample names?">
47 <option value="">First line and first column specify sample names (fully symmetric input)</option>
48 <option value="--nr">First line specifies sample names, subsequent lines only data</option>
49 <option value="--nc">Each line specifies sample name in first column, first line is not special</option>
50 </param>
36 <conditional name="cluster_assignment"> 51 <conditional name="cluster_assignment">
37 <param name="select" type="select" label="Generate cluster assignments?"> 52 <param name="select" type="select" label="Generate cluster assignments?">
38 <option value="dendrogram-only">No, just generate the dendrogram of clustering results</option> 53 <option value="dendrogram-only">No, just generate the dendrogram of clustering results</option>
39 <option value="n-cluster">Yes, and divide into specified number of clusters </option> 54 <option value="n-cluster">Yes, and divide into specified number of clusters </option>
40 <option value="height">Yes, and use distance threshold to divide into clusters</option> 55 <option value="height">Yes, and use distance threshold to divide into clusters</option>
41 </param> 56 </param>
42 <when value="dendrogram-only" /> 57 <when value="dendrogram-only" />
43 <when value="n-cluster"> 58 <when value="n-cluster">
44 <param name="n_cluster" type="integer" value="5" min="1" label="How many clusters to divide into?" /> 59 <param name="n_cluster" type="integer" value="5" min="1" label="How many clusters to divide into?" />
45 <param name="generate_dendrogram" type="boolean" label="Produce also the dendrogram of clustering results" /> 60 <expand macro="cluster_assignment_options" />
46 </when> 61 </when>
47 <when value="height"> 62 <when value="height">
48 <param name="height" type="float" value="5.0" label="Distance threshold for clusters to be reported" /> 63 <param name="height" type="float" value="5.0" label="Distance threshold for clusters to be reported" />
49 <param name="generate_dendrogram" type="boolean" label="Produce also the dendrogram of clustering results" /> 64 <expand macro="cluster_assignment_options" />
50 </when> 65 </when>
51 </conditional> 66 </conditional>
52 </inputs> 67 </inputs>
53 <outputs> 68 <outputs>
54 <data name="clustering_dendrogram" format="newick" from_work_dir="result.tree.newick" label="${tool.name} on ${on_string}: Dendrogram"> 69 <data name="clustering_dendrogram" format="newick" from_work_dir="result.tree.newick" label="${tool.name} on ${on_string}: Dendrogram">
72 <test expect_num_outputs="1"> 87 <test expect_num_outputs="1">
73 <param name="distmat" value="test_matrix.tsv"/> 88 <param name="distmat" value="test_matrix.tsv"/>
74 <conditional name="cluster_assignment"> 89 <conditional name="cluster_assignment">
75 <param name="select" value="height" /> 90 <param name="select" value="height" />
76 <param name="height" value="18" /> 91 <param name="height" value="18" />
92 <param name="min_cluster_size" value="1" />
77 </conditional> 93 </conditional>
78 <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_h18.tsv" /> 94 <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_h18.tsv" />
95 </test>
96 <test expect_num_outputs="1">
97 <param name="distmat" value="test_matrix.tsv"/>
98 <conditional name="cluster_assignment">
99 <param name="select" value="height" />
100 <param name="height" value="18" />
101 </conditional>
102 <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_h18_s2.tsv" />
79 </test> 103 </test>
80 <test expect_num_outputs="2"> 104 <test expect_num_outputs="2">
81 <param name="distmat" value="test_matrix.tsv"/> 105 <param name="distmat" value="test_matrix.tsv"/>
82 <conditional name="cluster_assignment"> 106 <conditional name="cluster_assignment">
83 <param name="select" value="n-cluster" /> 107 <param name="select" value="n-cluster" />
84 <param name="n_cluster" value="4" /> 108 <param name="n_cluster" value="4" />
109 <param name="min_cluster_size" value="1" />
110 <param name="generate_dendrogram" value="true" />
111 </conditional>
112 <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_n4.tsv" />
113 </test>
114 <test expect_num_outputs="2">
115 <param name="distmat" value="test_matrix_nr.tsv" />
116 <param name="missing_names" value="--nr" />
117 <conditional name="cluster_assignment">
118 <param name="select" value="n-cluster" />
119 <param name="n_cluster" value="4" />
120 <param name="min_cluster_size" value="1" />
121 <param name="generate_dendrogram" value="true" />
122 </conditional>
123 <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_n4.tsv" />
124 </test>
125 <test expect_num_outputs="2">
126 <param name="distmat" value="test_matrix_nc.tsv" />
127 <param name="missing_names" value="--nc" />
128 <conditional name="cluster_assignment">
129 <param name="select" value="n-cluster" />
130 <param name="n_cluster" value="4" />
131 <param name="min_cluster_size" value="1" />
85 <param name="generate_dendrogram" value="true" /> 132 <param name="generate_dendrogram" value="true" />
86 </conditional> 133 </conditional>
87 <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_n4.tsv" /> 134 <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_n4.tsv" />
88 </test> 135 </test>
89 </tests> 136 </tests>
93 140
94 **What it does** 141 **What it does**
95 142
96 This tool lets you perform hierarchical clustering of samples using the `scipy.cluster.hierarchy.linkage`_ function and any of the clustering methods supported by it. 143 This tool lets you perform hierarchical clustering of samples using the `scipy.cluster.hierarchy.linkage`_ function and any of the clustering methods supported by it.
97 144
98 As input it expects a symmetrical distance matrix with sample names on the first row and in the first column. 145 As input it expects a symmetrical distance matrix with sample names on the first row and/or in the first column.
99 146
100 The clustering result can be reported in the form of a dendrogram in newick format. 147 The clustering result can be reported in the form of a dendrogram in newick format.
101 148
102 Additionally, the tool can report the assignment of the samples to clusters "cut" from the clustering tree using the `scipy.cluster.hierarchy.cut_tree`_ function. 149 Additionally, the tool can report the assignment of the samples to clusters "cut" from the clustering tree using the `scipy.cluster.hierarchy.cut_tree`_ function.
103 Reflecting the parameters of that function, you can specify *how* to cut the tree by specifying either the number of clusters to cut into or a distance threshold, i.e., the height at which to cut the tree as SciPy calls it. 150 Reflecting the parameters of that function, you can specify *how* to cut the tree by specifying either the number of clusters to cut into or a distance threshold, i.e., the height at which to cut the tree as SciPy calls it.