Mercurial > repos > iuc > clustering_from_distmat
comparison clustering_from_distmat.xml @ 1:c0b01c55a0e0 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
author | iuc |
---|---|
date | Mon, 19 Aug 2024 15:33:16 +0000 |
parents | 8192b416f945 |
children | f8ee933de3ca |
comparison
equal
deleted
inserted
replaced
0:8192b416f945 | 1:c0b01c55a0e0 |
---|---|
1 <tool id="clustering_from_distmat" name="Distance matrix-based hierarchical clustering" version="1.0" profile="23.0"> | 1 <tool id="clustering_from_distmat" name="Distance matrix-based hierarchical clustering" version="1.1" profile="23.0"> |
2 <description>using Scipy</description> | 2 <description>using Scipy</description> |
3 <macros> | |
4 <xml name="cluster_assignment_options"> | |
5 <param name="min_cluster_size" type="integer" value="2" min="1" label="Mask clusters with less than this number of samples" help="Samples assigned to clusters smaller than this threshold will have '-' in the corresponding cluster ID column" /> | |
6 <param name="generate_dendrogram" type="boolean" label="Produce also the dendrogram of clustering results" /> | |
7 </xml> | |
8 </macros> | |
3 <edam_topics> | 9 <edam_topics> |
4 <edam_topic>topic_2269</edam_topic> | 10 <edam_topic>topic_2269</edam_topic> |
5 <edam_topic>topic_0084</edam_topic> | 11 <edam_topic>topic_0084</edam_topic> |
6 </edam_topics> | 12 </edam_topics> |
7 <edam_operations> | 13 <edam_operations> |
14 <command detect_errors="exit_code"><![CDATA[ | 20 <command detect_errors="exit_code"><![CDATA[ |
15 python '$__tool_directory__/clustering_from_distmat.py' | 21 python '$__tool_directory__/clustering_from_distmat.py' |
16 '$distmat' | 22 '$distmat' |
17 result | 23 result |
18 --method $method | 24 --method $method |
25 $missing_names | |
19 #if str($cluster_assignment.select) == 'n-cluster': | 26 #if str($cluster_assignment.select) == 'n-cluster': |
20 --n-clusters $cluster_assignment.n_cluster | 27 --n-clusters $cluster_assignment.n_cluster |
21 #elif str($cluster_assignment.select) == 'height': | 28 #elif str($cluster_assignment.select) == 'height': |
22 --height $cluster_assignment.height | 29 --height $cluster_assignment.height |
30 #end if | |
31 #if str($cluster_assignment.select) != 'dendrogram-only' and $cluster_assignment.min_cluster_size != 2: | |
32 --min-cluster-size $cluster_assignment.min_cluster_size | |
23 #end if | 33 #end if |
24 ]]></command> | 34 ]]></command> |
25 <inputs> | 35 <inputs> |
26 <param name="distmat" type="data" format="tabular" label="Distance matrix" /> | 36 <param name="distmat" type="data" format="tabular" label="Distance matrix" /> |
27 <param name="method" type="select" label="Clustering method"> | 37 <param name="method" type="select" label="Clustering method"> |
31 <option value="weighted">WPGMA (scipy 'weighted' method)</option> | 41 <option value="weighted">WPGMA (scipy 'weighted' method)</option> |
32 <option value="centroid">UPGMC (scipy 'centroid' method)</option> | 42 <option value="centroid">UPGMC (scipy 'centroid' method)</option> |
33 <option value="median">WPGMC (scipy 'median' method)</option> | 43 <option value="median">WPGMC (scipy 'median' method)</option> |
34 <option value="ward">Ward/Incremental (scipy 'ward' method)</option> | 44 <option value="ward">Ward/Incremental (scipy 'ward' method)</option> |
35 </param> | 45 </param> |
46 <param name="missing_names" type="select" label="How does the input specify sample names?"> | |
47 <option value="">First line and first column specify sample names (fully symmetric input)</option> | |
48 <option value="--nr">First line specifies sample names, subsequent lines only data</option> | |
49 <option value="--nc">Each line specifies sample name in first column, first line is not special</option> | |
50 </param> | |
36 <conditional name="cluster_assignment"> | 51 <conditional name="cluster_assignment"> |
37 <param name="select" type="select" label="Generate cluster assignments?"> | 52 <param name="select" type="select" label="Generate cluster assignments?"> |
38 <option value="dendrogram-only">No, just generate the dendrogram of clustering results</option> | 53 <option value="dendrogram-only">No, just generate the dendrogram of clustering results</option> |
39 <option value="n-cluster">Yes, and divide into specified number of clusters </option> | 54 <option value="n-cluster">Yes, and divide into specified number of clusters </option> |
40 <option value="height">Yes, and use distance threshold to divide into clusters</option> | 55 <option value="height">Yes, and use distance threshold to divide into clusters</option> |
41 </param> | 56 </param> |
42 <when value="dendrogram-only" /> | 57 <when value="dendrogram-only" /> |
43 <when value="n-cluster"> | 58 <when value="n-cluster"> |
44 <param name="n_cluster" type="integer" value="5" min="1" label="How many clusters to divide into?" /> | 59 <param name="n_cluster" type="integer" value="5" min="1" label="How many clusters to divide into?" /> |
45 <param name="generate_dendrogram" type="boolean" label="Produce also the dendrogram of clustering results" /> | 60 <expand macro="cluster_assignment_options" /> |
46 </when> | 61 </when> |
47 <when value="height"> | 62 <when value="height"> |
48 <param name="height" type="float" value="5.0" label="Distance threshold for clusters to be reported" /> | 63 <param name="height" type="float" value="5.0" label="Distance threshold for clusters to be reported" /> |
49 <param name="generate_dendrogram" type="boolean" label="Produce also the dendrogram of clustering results" /> | 64 <expand macro="cluster_assignment_options" /> |
50 </when> | 65 </when> |
51 </conditional> | 66 </conditional> |
52 </inputs> | 67 </inputs> |
53 <outputs> | 68 <outputs> |
54 <data name="clustering_dendrogram" format="newick" from_work_dir="result.tree.newick" label="${tool.name} on ${on_string}: Dendrogram"> | 69 <data name="clustering_dendrogram" format="newick" from_work_dir="result.tree.newick" label="${tool.name} on ${on_string}: Dendrogram"> |
72 <test expect_num_outputs="1"> | 87 <test expect_num_outputs="1"> |
73 <param name="distmat" value="test_matrix.tsv"/> | 88 <param name="distmat" value="test_matrix.tsv"/> |
74 <conditional name="cluster_assignment"> | 89 <conditional name="cluster_assignment"> |
75 <param name="select" value="height" /> | 90 <param name="select" value="height" /> |
76 <param name="height" value="18" /> | 91 <param name="height" value="18" /> |
92 <param name="min_cluster_size" value="1" /> | |
77 </conditional> | 93 </conditional> |
78 <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_h18.tsv" /> | 94 <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_h18.tsv" /> |
95 </test> | |
96 <test expect_num_outputs="1"> | |
97 <param name="distmat" value="test_matrix.tsv"/> | |
98 <conditional name="cluster_assignment"> | |
99 <param name="select" value="height" /> | |
100 <param name="height" value="18" /> | |
101 </conditional> | |
102 <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_h18_s2.tsv" /> | |
79 </test> | 103 </test> |
80 <test expect_num_outputs="2"> | 104 <test expect_num_outputs="2"> |
81 <param name="distmat" value="test_matrix.tsv"/> | 105 <param name="distmat" value="test_matrix.tsv"/> |
82 <conditional name="cluster_assignment"> | 106 <conditional name="cluster_assignment"> |
83 <param name="select" value="n-cluster" /> | 107 <param name="select" value="n-cluster" /> |
84 <param name="n_cluster" value="4" /> | 108 <param name="n_cluster" value="4" /> |
109 <param name="min_cluster_size" value="1" /> | |
110 <param name="generate_dendrogram" value="true" /> | |
111 </conditional> | |
112 <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_n4.tsv" /> | |
113 </test> | |
114 <test expect_num_outputs="2"> | |
115 <param name="distmat" value="test_matrix_nr.tsv" /> | |
116 <param name="missing_names" value="--nr" /> | |
117 <conditional name="cluster_assignment"> | |
118 <param name="select" value="n-cluster" /> | |
119 <param name="n_cluster" value="4" /> | |
120 <param name="min_cluster_size" value="1" /> | |
121 <param name="generate_dendrogram" value="true" /> | |
122 </conditional> | |
123 <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_n4.tsv" /> | |
124 </test> | |
125 <test expect_num_outputs="2"> | |
126 <param name="distmat" value="test_matrix_nc.tsv" /> | |
127 <param name="missing_names" value="--nc" /> | |
128 <conditional name="cluster_assignment"> | |
129 <param name="select" value="n-cluster" /> | |
130 <param name="n_cluster" value="4" /> | |
131 <param name="min_cluster_size" value="1" /> | |
85 <param name="generate_dendrogram" value="true" /> | 132 <param name="generate_dendrogram" value="true" /> |
86 </conditional> | 133 </conditional> |
87 <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_n4.tsv" /> | 134 <output name="clustering_assignment" ftype="tabular" file="test_assignment_average_n4.tsv" /> |
88 </test> | 135 </test> |
89 </tests> | 136 </tests> |
93 | 140 |
94 **What it does** | 141 **What it does** |
95 | 142 |
96 This tool lets you perform hierarchical clustering of samples using the `scipy.cluster.hierarchy.linkage`_ function and any of the clustering methods supported by it. | 143 This tool lets you perform hierarchical clustering of samples using the `scipy.cluster.hierarchy.linkage`_ function and any of the clustering methods supported by it. |
97 | 144 |
98 As input it expects a symmetrical distance matrix with sample names on the first row and in the first column. | 145 As input it expects a symmetrical distance matrix with sample names on the first row and/or in the first column. |
99 | 146 |
100 The clustering result can be reported in the form of a dendrogram in newick format. | 147 The clustering result can be reported in the form of a dendrogram in newick format. |
101 | 148 |
102 Additionally, the tool can report the assignment of the samples to clusters "cut" from the clustering tree using the `scipy.cluster.hierarchy.cut_tree`_ function. | 149 Additionally, the tool can report the assignment of the samples to clusters "cut" from the clustering tree using the `scipy.cluster.hierarchy.cut_tree`_ function. |
103 Reflecting the parameters of that function, you can specify *how* to cut the tree by specifying either the number of clusters to cut into or a distance threshold, i.e., the height at which to cut the tree as SciPy calls it. | 150 Reflecting the parameters of that function, you can specify *how* to cut the tree by specifying either the number of clusters to cut into or a distance threshold, i.e., the height at which to cut the tree as SciPy calls it. |