annotate COBRAxy/marea_cluster.xml @ 516:7726a4a7173f draft

Uploaded
author luca_milaz
date Thu, 09 Oct 2025 09:32:41 +0000
parents 4a385fdb9e58
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
1 <tool id="MaREAcluster" name="Cluster Analysis" version="2.0.0">
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
2 <description></description>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
3 <macros>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
4 <import>marea_macros.xml</import>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
5 </macros>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
6 <requirements>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
7 <requirement type="package" version="1.24.4">numpy</requirement>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
8 <requirement type="package" version="2.0.3">pandas</requirement>
307
d905439271fa Uploaded
francesco_lapi
parents: 4
diff changeset
9 <requirement type="package" version="1.11">scipy</requirement>
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
10 <requirement type="package" version="1.3.2">scikit-learn</requirement>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
11 <requirement type="package" version="3.7.3">matplotlib</requirement>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
12 <requirement type="package" version="5.2.2">lxml</requirement>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
13 </requirements>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
14 <command detect_errors="exit_code">
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
15 <![CDATA[
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
16 python $__tool_directory__/marea_cluster.py
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
17 --input $input
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
18 --tool_dir $__tool_directory__
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
19 --out_log $log
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
20 --best_cluster $best_cluster
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
21 --cluster_type ${data.clust_type}
427
4a385fdb9e58 Uploaded
francesco_lapi
parents: 333
diff changeset
22 --scaling $scaling
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
23 #if $data.clust_type == 'kmeans':
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
24 --k_min ${data.k_min}
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
25 --k_max ${data.k_max}
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
26 --elbow ${data.elbow}
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
27 --silhouette ${data.silhouette}
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
28 #end if
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
29 #if $data.clust_type == 'dbscan':
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
30 #if $data.dbscan_advanced.advanced == 'true'
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
31 --eps ${data.dbscan_advanced.eps}
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
32 --min_samples ${data.dbscan_advanced.min_samples}
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
33 #end if
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
34 #end if
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
35 #if $data.clust_type == 'hierarchy':
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
36 --k_min ${data.k_min}
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
37 --k_max ${data.k_max}
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
38 --silhouette ${data.silhouette}
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
39 #end if
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
40 ]]>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
41 </command>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
42 <inputs>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
43 <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="Input dataset" />
427
4a385fdb9e58 Uploaded
francesco_lapi
parents: 333
diff changeset
44 <param name="scaling" argument="--scaling" type="boolean" value="true" label="Apply scaling to the dataset before clustering" />
4a385fdb9e58 Uploaded
francesco_lapi
parents: 333
diff changeset
45
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
46 <conditional name="data">
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
47 <param name="clust_type" argument="--cluster_type" type="select" label="Choose clustering type:">
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
48 <option value="kmeans" selected="true">KMeans</option>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
49 <option value="dbscan">DBSCAN</option>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
50 <option value="hierarchy">Agglomerative Hierarchical</option>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
51 </param>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
52 <when value="kmeans">
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
53 <param name="k_min" argument="--k_min" type="integer" min="2" max="20" value="2" label="Min number of clusters (k) to be tested" />
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
54 <param name="k_max" argument="--k_max" type="integer" min="2" max="20" value="3" label="Max number of clusters (k) to be tested" />
316
10e9a5a86d37 Uploaded
francesco_lapi
parents: 315
diff changeset
55 <param name="elbow" argument="--elbow" type="boolean" value="true" label="Draw the elbow plot from k-min to k-max"/>
10e9a5a86d37 Uploaded
francesco_lapi
parents: 315
diff changeset
56 <param name="silhouette" argument="--silhouette" type="boolean" value="true" label="Draw the Silhouette plot from k-min to k-max"/>
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
57 </when>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
58 <when value="dbscan">
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
59 <conditional name="dbscan_advanced">
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
60 <param name="advanced" type="boolean" value="false" label="Want to use custom params for DBSCAN? (if not optimal values will be used)">
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
61 <option value="true">Yes</option>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
62 <option value="false">No</option>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
63 </param>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
64 <when value="false"></when>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
65 <when value="true">
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
66 <param name="eps" argument="--eps" type="float" value="0.5" label="Epsilon - The maximum distance between two samples for one to be considered as in the neighborhood of the other" />
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
67 <param name="min_samples" argument="min_samples" type="integer" value="5" label="Min samples - The number of samples in a neighborhood for a point to be considered as a core point (this includes the point itself)"/>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
68
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
69 </when>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
70 </conditional>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
71 </when>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
72 <when value="hierarchy">
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
73 <param name="k_min" argument="--k_min" type="integer" min="2" max="20" value="2" label="Min number of clusters (k) to be tested" />
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
74 <param name="k_max" argument="--k_max" type="integer" min="3" max="20" value="3" label="Max number of clusters (k) to be tested" />
316
10e9a5a86d37 Uploaded
francesco_lapi
parents: 315
diff changeset
75 <param name="silhouette" argument="--silhouette" type="boolean" value="true" label="Draw the Silhouette plot from k-min to k-max"/>
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
76 </when>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
77 </conditional>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
78 </inputs>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
79
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
80 <outputs>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
81 <data format="txt" name="log" label="${tool.name} - Log" />
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
82 <data format="tabular" name="best_cluster" label="${tool.name} - best cluster assignment" />
333
a7d52bde3efe Uploaded
francesco_lapi
parents: 316
diff changeset
83 <collection name="clustering" type="list" label="${tool.name} - Plots and results">
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
84 <discover_datasets pattern="__name_and_ext__" directory="clustering"/>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
85 <filter>data['clust_type'] == "kmeans" or data['clust_type'] == "hierarchy"</filter>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
86 </collection>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
87 </outputs>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
88 <help>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
89 <![CDATA[
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
90
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
91 What it does
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
92 -------------
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
93
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
94 The tool performs cluster analysis of any dataset, according to most used algorithms: K-means, agglomerative
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
95 clustering and DBSCAN (Density Based Spatial Clustering of Applications with Noise).
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
96
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
97 Accepted files are:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
98 - Tabular files in which rows indicate different variables and columns different observations. The first row reports the observations’ labels.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
99
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
100
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
101 Example of input dataset:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
102 -------------------------
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
103
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
104 +----------+----------+----------+
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
105 |TCGAA62670|TCGAA62671|TCGAA62672|
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
106 +==========+==========+==========+
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
107 | 0.523167 | 0.371355 | 0.925661 |
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
108 +----------+----------+----------+
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
109 | 0.568765 | 0.765567 | 0.456789 |
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
110 +----------+----------+----------+
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
111 | 0.876545 | 0.768933 | 0.987654 |
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
112 +----------+----------+----------+
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
113 | 0.456788 | 0.876543 | 0.876542 |
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
114 +----------+----------+----------+
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
115 | 0.876543 | 0.786543 | 0.897654 |
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
116 +----------+----------+----------+
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
117
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
118 .
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
119
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
120
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
121 Options:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
122 --------
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
123
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
124 The following clustering types can be chosen:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
125 - K-means. This option requires the number of clusters (k) to be set. Different values of k can be tested.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
126 - Agglomerative clustering. Different values of k can be set, to cut the resulting dendrogram.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
127 - DBSCAN. The DBSCAN method chooses the number of clusters based on parameters that define when a region is to be considered dense. Custom parameters may be used, namely the maximum distance between two samples for one to be considered as in the neighborhood of the other and the number of samples in a neighborhood for a point to be considered as a core point.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
128
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
129 The tool generates:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
130 - a tab-separated file: reporting the affiliation of each observation to a cluster. In case different numbers of clusters have been tested, the best cluster assignment is reported according to maximum average silhouette score. If desired, the elbow plot is generated, as well as silhouette plot for each k.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
131 - a list of items, including: 1) the cluster assignment for each tested number of clusters 2) the dendrogram in case of agglomerative clustering 3) elbow and silhouete plots in case of k-means clustering.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
132 - a log file (.txt).
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
133
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
134
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
135 .. class:: infomark
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
136
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
137 **TIP**: This tool has been conceived to cluster gene expression data, by using the RAS scores computed by `Ras tool`_.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
138
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
139 .. class:: infomark
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
140
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
141 **TIP**: If your data is not TAB delimited, use `Convert delimiters to TAB`_.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
142
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
143 @REFERENCE@
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
144
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
145 .. _Ras tool: http://bimib.disco.unimib.it:5555/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fbimib%2Fmarea%2FMaREA+RAS+Generator%2F1.0.6&version=1.0.6&__identifer=auulv6gbp76
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
146 .. _Convert delimiters to TAB: http://bimib.disco.unimib.it:5555/?tool_id=Convert+characters1&version=1.0.0&__identifer=76g7trea4j6
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
147
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
148 ]]>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
149 </help>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
150 <expand macro="citations" />
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
151 </tool>
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
152
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
153