comparison clustering_from_distmat.xml @ 0:8192b416f945 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
author iuc
date Thu, 08 Aug 2024 19:34:36 +0000
parents
children c0b01c55a0e0
comparison
equal deleted inserted replaced
-1:000000000000 0:8192b416f945
1 <tool id="clustering_from_distmat" name="Distance matrix-based hierarchical clustering" version="1.0" profile="23.0">
2 <description>using Scipy</description>
3 <edam_topics>
4 <edam_topic>topic_2269</edam_topic>
5 <edam_topic>topic_0084</edam_topic>
6 </edam_topics>
7 <edam_operations>
8 <edam_operation>operation_3432</edam_operation>
9 </edam_operations>
10 <requirements>
11 <requirement type="package" version="3.12">python</requirement>
12 <requirement type="package" version="1.14.0">scipy</requirement>
13 </requirements>
14 <command detect_errors="exit_code"><![CDATA[
15 python '$__tool_directory__/clustering_from_distmat.py'
16 '$distmat'
17 result
18 --method $method
19 #if str($cluster_assignment.select) == 'n-cluster':
20 --n-clusters $cluster_assignment.n_cluster
21 #elif str($cluster_assignment.select) == 'height':
22 --height $cluster_assignment.height
23 #end if
24 ]]></command>
25 <inputs>
26 <param name="distmat" type="data" format="tabular" label="Distance matrix" />
27 <param name="method" type="select" label="Clustering method">
28 <option value="single">Nearest Point (scipy 'single' method)</option>
29 <option value="complete">Farthest Point (scipy 'complete' method)</option>
30 <option value="average" selected="true">UPGMA (scipy 'average' method)</option>
31 <option value="weighted">WPGMA (scipy 'weighted' method)</option>
32 <option value="centroid">UPGMC (scipy 'centroid' method)</option>
33 <option value="median">WPGMC (scipy 'median' method)</option>
34 <option value="ward">Ward/Incremental (scipy 'ward' method)</option>
35 </param>
36 <conditional name="cluster_assignment">
37 <param name="select" type="select" label="Generate cluster assignments?">
38 <option value="dendrogram-only">No, just generate the dendrogram of clustering results</option>
39 <option value="n-cluster">Yes, and divide into specified number of clusters </option>
40 <option value="height">Yes, and use distance threshold to divide into clusters</option>
41 </param>
42 <when value="dendrogram-only" />
43 <when value="n-cluster">
44 <param name="n_cluster" type="integer" value="5" min="1" label="How many clusters to divide into?" />
45 <param name="generate_dendrogram" type="boolean" label="Produce also the dendrogram of clustering results" />
46 </when>
47 <when value="height">
48 <param name="height" type="float" value="5.0" label="Distance threshold for clusters to be reported" />
49 <param name="generate_dendrogram" type="boolean" label="Produce also the dendrogram of clustering results" />
50 </when>
51 </conditional>
52 </inputs>
53 <outputs>
54 <data name="clustering_dendrogram" format="newick" from_work_dir="result.tree.newick" label="${tool.name} on ${on_string}: Dendrogram">
55 <filter>cluster_assignment["select"] == "dendrogram-only" or cluster_assignment["generate_dendrogram"]</filter>
56 </data>
57 <data name="clustering_assignment" format="tabular" from_work_dir="result.cluster_assignments.tsv" label="${tool.name} on ${on_string}: Cluster assignment">
58 <filter>cluster_assignment["select"] in ["n-cluster", "height"]</filter>
59 </data>
60 </outputs>
61 <tests>
62 <!-- Test data and expected results taken from https://en.wikipedia.org/wiki/UPGMA#Working_example -->
63 <test expect_num_outputs="1">
64 <param name="distmat" value="test_matrix.tsv"/>
65 <output name="clustering_dendrogram" ftype="newick" file="test_tree_average.newick" />
66 </test>
67 <test expect_num_outputs="1">
68 <param name="distmat" value="test_matrix.tsv" />
69 <param name="method" value="complete" />
70 <output name="clustering_dendrogram" ftype="newick" file="test_tree_complete.newick" />
71 </test>
72 <test expect_num_outputs="1">
73 <param name="distmat" value="test_matrix.tsv"/>
74 <conditional name="cluster_assignment">
75 <param name="select" value="height" />
76 <param name="height" value="18" />
77 </conditional>
78 <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_h18.tsv" />
79 </test>
80 <test expect_num_outputs="2">
81 <param name="distmat" value="test_matrix.tsv"/>
82 <conditional name="cluster_assignment">
83 <param name="select" value="n-cluster" />
84 <param name="n_cluster" value="4" />
85 <param name="generate_dendrogram" value="true" />
86 </conditional>
87 <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_n4.tsv" />
88 </test>
89 </tests>
90 <help><![CDATA[
91
92 .. class:: infomark
93
94 **What it does**
95
96 This tool lets you perform hierarchical clustering of samples using the `scipy.cluster.hierarchy.linkage`_ function and any of the clustering methods supported by it.
97
98 As input it expects a symmetrical distance matrix with sample names on the first row and in the first column.
99
100 The clustering result can be reported in the form of a dendrogram in newick format.
101
102 Additionally, the tool can report the assignment of the samples to clusters "cut" from the clustering tree using the `scipy.cluster.hierarchy.cut_tree`_ function.
103 Reflecting the parameters of that function, you can specify *how* to cut the tree by specifying either the number of clusters to cut into or a distance threshold, i.e., the height at which to cut the tree as SciPy calls it.
104
105 .. _`scipy.cluster.hierarchy.linkage`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html
106 .. _`scipy.cluster.hierarchy.cut_tree`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.cut_tree.html
107 ]]></help>
108 <citations>
109 <citation type="doi">10.1038/s41592-019-0686-2</citation>
110 </citations>
111 </tool>