comparison heatmap_config.xml @ 0:ad06aeed02c9 draft

planemo upload for repository https://github.com/workflow4metabolomics/heatmap.git commit 7e599d006e53fefb7e1b923ba8894b4fb19f9cfa-dirty
author ethevenot
date Tue, 02 Aug 2016 06:26:41 -0400
parents
children db1d80e89156
comparison
equal deleted inserted replaced
-1:000000000000 0:ad06aeed02c9
1 <tool id="Heatmap" name="Heatmap" version="2.2.0">
2 <description>Heatmap of the dataMatrix</description>
3
4 <requirements>
5 <requirement type="package" version="3.2.2">R</requirement>
6 <requirement type="package">r-batch</requirement>
7 </requirements>
8
9 <command><![CDATA[
10 Rscript $__tool_directory__/heatmap_wrapper.R
11 dataMatrix_in "$dataMatrix_in"
12 sampleMetadata_in "$sampleMetadata_in"
13 variableMetadata_in "$variableMetadata_in"
14
15 disC "$disC"
16 cutSamN "$cutSamN"
17 cutVarN "$cutVarN"
18
19 #if $advPar.oppC == "full"
20 corMetC "$advPar.corMetC"
21 aggMetC "$advPar.aggMetC"
22 colC "$advPar.colC"
23 scaL "$advPar.scaL"
24 cexN "$advPar.cexN"
25 #end if
26
27 dataMatrix_out "$dataMatrix_out"
28 sampleMetadata_out "$sampleMetadata_out"
29 variableMetadata_out "$variableMetadata_out"
30 figure "$figure"
31 information "$information"
32 ]]></command>
33
34 <inputs>
35 <param name="dataMatrix_in" type="data" label="Data matrix file" help="" format="tabular" />
36 <param name="sampleMetadata_in" type="data" label="Sample metadata file" help="" format="tabular" />
37 <param name="variableMetadata_in" type="data" label="Variable metadata file" help="" format="tabular" />
38
39 <param name="disC" label="Dissimilarity to be used for clustering" type="select" help="If correlation is selected, the pearson method will be used by default unless a specific method is selected in the advanced parameters below">
40 <option value="euclidean">euclidean</option>
41 <option value="maximum">maximum</option>
42 <option value="manhattan">manhattan</option>
43 <option value="canberra">canberra</option>
44 <option value="binary">binary</option>
45 <option value="minkowski">minkowski</option>
46 <option value="1-cor" selected="true">1-correlation</option>
47 <option value="1-abs(cor)">1-abs(correlation)</option>
48 </param>
49
50 <param name="cutSamN" label="Number of sample clusters to identify" type="select" help="">
51 <option value="1" selected="true">1</option>
52 <option value="2">2</option>
53 <option value="3">3</option>
54 <option value="4">4</option>
55 <option value="5">5</option>
56 <option value="6">6</option>
57 <option value="7">7</option>
58 <option value="8">8</option>
59 <option value="9">9</option>
60 <option value="10">10</option>
61 <option value="11">11</option>
62 <option value="12">12</option>
63 <option value="13">13</option>
64 <option value="14">14</option>
65 <option value="15">15</option>
66 </param>
67
68 <param name="cutVarN" label="Number of variable clusters to identify" type="select" help="">
69 <option value="1" selected="true">1</option>
70 <option value="2">2</option>
71 <option value="3">3</option>
72 <option value="4">4</option>
73 <option value="5">5</option>
74 <option value="6">6</option>
75 <option value="7">7</option>
76 <option value="8">8</option>
77 <option value="9">9</option>
78 <option value="10">10</option>
79 <option value="11">11</option>
80 <option value="12">12</option>
81 <option value="13">13</option>
82 <option value="14">14</option>
83 <option value="15">15</option>
84 </param>
85
86 <conditional name="advPar">
87 <param name="oppC" type="select" label="Advanced parameters" >
88 <option value="default" selected="true">Use default</option>
89 <option value="full">Full list</option>
90 </param>
91 <when value="default">
92 <param name="corMetC" type="hidden" value="pearson"/>
93 <param name="aggMetC" type="hidden" value="ward"/>
94 <param name="colC" type="hidden" value="blueOrangeRed"/>
95 <param name="scaL" type="hidden" value="TRUE"/>
96 <param name="cexN" type="hidden" value="0.8"/>
97 </when>
98 <when value="full">
99 <param name="corMetC" label="Method of correlation to be used" type="select" help="">
100 <option value="pearson" selected="true">pearson</option>
101 <option value="spearman">spearman</option>
102 <option value="kendall">kendall</option>
103 </param>
104 <param name="aggMetC" label="Method of agglomeration to be used" type="select" help="">
105 <option value="ward" selected="true">ward</option>
106 <option value="single">single</option>
107 <option value="complete">complete</option>
108 <option value="average">average</option>
109 <option value="mcquitty">mcquitty</option>
110 <option value="median">median</option>
111 <option value="centroid">centroid</option>
112 </param>
113 <param name="colC" label="Color scale" type="select" help="">
114 <option value="blueOrangeRed" selected="true">blue-orange-red</option>
115 <option value="redBlackGreen">red-black-green</option>
116 </param>
117 <param name="scaL" label="Variable standardization (for plotting only)" type="select" help="Standardization is performed after the clustering for display only (may enhance contrast) and does not modify cluster computation nor intensities in the output files">
118 <option value="TRUE" selected="true">yes</option>
119 <option value="FALSE">no</option>
120 </param>
121 <param name="cexN" label="Size of labels" type="select" help="">
122 <option value="0.5">0.5</option>
123 <option value="0.6">0.6</option>
124 <option value="0.7">0.7</option>
125 <option value="0.8" selected="true">0.8</option>
126 <option value="0.9">0.9</option>
127 <option value="1">1</option>
128 </param>
129 </when>
130 </conditional>
131
132 </inputs>
133
134 <outputs>
135 <data name="dataMatrix_out" label="${tool.name}_${dataMatrix_in.name}" format="tabular" ></data>
136 <data name="sampleMetadata_out" label="${tool.name}_${sampleMetadata_in.name}" format="tabular" ></data>
137 <data name="variableMetadata_out" label="${tool.name}_${variableMetadata_in.name}" format="tabular" ></data>
138 <data name="figure" label="${tool.name}_figure.pdf" format="pdf"/>
139 <data name="information" label="${tool.name}_information.txt" format="txt"/>
140 </outputs>
141
142 <tests>
143 <test>
144 <param name="dataMatrix_in" value="input-dataMatrix.tsv"/>
145 <param name="sampleMetadata_in" value="input-sampleMetadata.tsv"/>
146 <param name="variableMetadata_in" value="input-variableMetadata.tsv"/>
147 <param name="disC" value="1-cor"/>
148 <param name="cutSamN" value="4"/>
149 <param name="cutVarN" value="3"/>
150 <param name="oppC" value="full"/>
151 <param name="corMetC" value="spearman"/>
152 <param name="aggMetC" value="ward"/>
153 <param name="colC" value="blueOrangeRed"/>
154 <param name="scaL" value="TRUE"/>
155 <param name="cexN" value="0.8"/>
156 <output name="variableMetadata_out" file="output-variableMetadata.tsv"/>
157 </test>
158 </tests>
159
160 <help>
161
162 .. class:: infomark
163
164 | **Tool update: See the 'NEWS' section at the bottom of the page**
165
166 ---------------------------------------------------
167
168 .. class:: infomark
169
170 **Author** Etienne Thevenot (W4M Core Development Team, MetaboHUB Paris, CEA)
171
172 ---------------------------------------------------
173
174 .. class:: infomark
175
176 **References**
177
178 | Etienne A. Thevenot, Aurelie Roux, Ying Xu, Eric Ezan, and Christophe Junot (2015). Analysis of the human adult urinary metabolome variations with age, body mass index and gender by implementing a comprehensive workflow for univariate and OPLS statistical analyses. *Journal of Proteome Research*, **14**:3322-3335 (http://dx.doi.org/10.1021/acs.jproteome.5b00354).
179 | R Core Team (2013). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria (http://www.r-project.org)
180 |
181
182 ---------------------------------------------------
183
184 .. class:: infomark
185
186 **Tool updates**
187
188 See the **NEWS** section at the bottom of this page
189
190 ---------------------------------------------------
191
192
193 ========================
194 Heatmap
195 ========================
196
197 -----------
198 Description
199 -----------
200
201 | Performs hierarchical clustering on both the samples (rows) and variables (columns) of the dataMatrix
202 | Displays the dataMatrix with sorted rows and samples and the dendrograms (heatmap)
203 | In the output dataMatrix, sampleMetadata and variableMetadata files sample and variables are sorted according to the dendrograms
204 | Optionally, indicates the groups of samples and/or variables obtained by cutting the dendrograms into a specific number of partitions
205 |
206 | Note: 1) Computations rely on the 'hclust' function. The dissimilarity is 1 - cor (where cor is the Spearman correlation) and the 'ward.D' aggregating method is used.
207 | 2) A "blue-orange-red" palette is generated with the function 'colorRampPalette'; **By default, variables are standardized (mean-centered and unit-scaled) to enhance contrast on the figure**; standardization can be turned off by using the full list of parameters; in any case, standardizing is performed after the computation of clusters, for display only
208 | 3) When a specific number of sample and/or variable groups (i.e. > 1) are selected, the group numbers are indicated on the plot and in an additional 'heat_clust" column in the sampleMetadata and/or variableMetadata
209 | 4) Example of computation times: for 126 variables: a few seconds; for 4324 variables: 30 min
210 |
211
212
213 -----------------
214 Workflow position
215 -----------------
216
217 | In the workflow example below, the structure of the dataset (dataMatrix) is visualized by using first the "Quality Metrics" (for checking potential signal drift, sample outliers, etc.), then the "Heatmap" (for correlations between samples or variables), and finally the "Multivariate" (for PCA or PLS) modules.
218 |
219
220 .. image:: heatmap_workflowPositionImage.png
221 :width: 600
222
223
224
225 -----------
226 Input files
227 -----------
228
229 +--------------------------+-------------+
230 | File type | Format |
231 +==========================+=============+
232 | 1 : Data matrix | tabular |
233 +--------------------------+-------------+
234 | 2 : Sample metadata | tabular |
235 +--------------------------+-------------+
236 | 3 : Variable metadata | tabular |
237 +--------------------------+-------------+
238
239 |
240 | Required formats for the dataMatrix, sampleMetadata and variableMetadata files are described in the HowTo entitled 'Format Data For Postprocessing' available on the main page of Workflow4Metabolomics.org; formats of the three files can be further checked with the 'Check Data' module (in the 'Quality Control' section)
241 |
242
243 ----------
244 Parameters
245 ----------
246
247 Number of sample clusters
248 | By default (cluster = 1), only dendrograms are displayed; when a specific number of sample clusters is selected, the sample dendrogram is cut at the corresponding level: the sample groups are displayed on the dendrogram and a "heat_clust" column is added in the sampleMetadata file with the group of each sample
249 |
250
251 Number of variable clusters
252 | Same as above for variables
253 |
254
255 Standardization (Full list)
256 | By default, variables are standardized for display to enhance contrast of the heatmap (note that standardization is performed after the clustering for display only and does not modify cluster computation nor intensities in the output files)
257 |
258
259 Size of labels (Full list)
260 | The size of sample and variable names on the heatmap is 0.8 (note that names with more than 14 characters are truncated); this number may be lowered (or uppered) in case of many (few) names to display
261
262
263 ------------
264 Output files
265 ------------
266
267 dataMatrix_out.tabular
268 | dataMatrix file with rows and columns sorted according to the dendrogram
269 |
270
271 sampleMetadata_out.tabular
272 | sampleMetadata file with rows sorted according to the sample dendrogram; in case a number of sample groups is specified, and additional "heat_clust" column is added with the cluster group of each sample
273 |
274
275 variableMetadata_out.tabular
276 | variableMetadata file with rows sorted according to the variable dendrogram; in case a number of variable groups is specified, and additional "heat_clust" column is added with the cluster group of each variable
277 |
278
279 figure.pdf
280 | Heatmap
281 |
282
283 information.txt
284 | File with all messages and warnings generated during the computation
285 |
286
287 ---------------------------------------------------
288
289 ---------------
290 Working example
291 ---------------
292
293 .. class:: infomark
294
295 See the **W4M00001a_sacurine-subset-statistics** shared history in the **Shared Data/Published Histories** menu
296
297 ---------------------------------------------------
298
299 ----
300 NEWS
301 ----
302
303 CHANGES IN VERSION 2.2.0
304 ========================
305
306 NEW FEATURES
307
308 Default method for the correlation coefficient is now 'pearson', instead of 'spearman' previously (the latter can still be selected in the advanced parameters)
309
310 The 1-abs(correlation) dissimilarity is now available (in addition to the default '1-correlation') in case the sign of correlations between samples and between variables does not matter, as well as the euclidean, maximum, manhattan, canberra, binary, and minkowski dissimilarities
311
312 A new red-green color scale is available
313
314
315 CHANGES IN VERSION 2.1.2
316 ========================
317
318 INTERNAL MODIFICATIONS
319
320 Creating additional files for planemo and travis running and installation validation
321
322 CHANGES IN VERSION 2.1.1
323 ========================
324
325 Internal replacement of the as.hclust function which happened to produce error messages
326
327 </help>
328
329 <citations>
330 <citation type="bibtex">@Article{Thevenot2015,
331 Title = {Analysis of the human adult urinary metabolome variations with age, body mass index and gender by implementing a comprehensive workflow for univariate and OPLS statistical analyses},
332 Author = {Thevenot, Etienne A. and Roux, Aurelie and Xu, Ying and Ezan, Eric and Junot, Christophe},
333 Journal = {Journal of Proteome Research},
334 Year = {2015},
335 Note = {PMID: 26088811},
336 Number = {8},
337 Pages = {3322-3335},
338 Volume = {14},
339
340 Doi = {10.1021/acs.jproteome.5b00354},
341 Url = {http://pubs.acs.org/doi/full/10.1021/acs.jproteome.5b00354}
342 }</citation>
343 </citations>
344
345 </tool>