Mercurial > repos > artbio > gsc_high_dimensions_visualisation
comparison high_dim_visu.xml @ 0:cad0001b9cfb draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_high_dimension_visualization commit 09dcd74dbc01f448518cf3db3e646afb0675a6fe
author | artbio |
---|---|
date | Mon, 24 Jun 2019 13:39:11 -0400 |
parents | |
children | c756ab726a85 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:cad0001b9cfb |
---|---|
1 <tool id="high_dimensions_visualisation" name="Generate PCA, tSNE and HCPC" version="0.9.0"> | |
2 <description>from highly dimensional expression data</description> | |
3 <requirements> | |
4 <requirement type="package" version="1.3.2=r3.3.2_0">r-optparse</requirement> | |
5 <requirement type="package" version="1.39=r3.3.2_0">r-factominer</requirement> | |
6 <requirement type="package" version="1.0.5=r3.3.2_0">r-factoextra</requirement> | |
7 <requirement type="package" version="0.13=r3.3.2_0">r-rtsne</requirement> | |
8 <requirement type="package" version="2.2.1=r3.3.2_0">r-ggplot2</requirement> | |
9 <requirement type="package" version="0.4.1=r3.3.2_0">r-ggfortify</requirement> | |
10 </requirements> | |
11 <stdio> | |
12 <exit_code range="1:" level="fatal" description="Tool exception" /> | |
13 </stdio> | |
14 <command detect_errors="exit_code"><![CDATA[ | |
15 Rscript $__tool_directory__/high_dim_visu.R | |
16 --data '$input' | |
17 --sep '$input_sep' | |
18 --colnames '$input_header' | |
19 #if $factor_condition.factor_choice == 'Yes' | |
20 --factor '$factor_condition.factor' | |
21 #end if | |
22 #if $labels == "yes": | |
23 --labels 'TRUE' | |
24 #else | |
25 --labels 'FALSE' | |
26 #end if | |
27 #if $coord == "yes": | |
28 --table_coordinates '$table_coordinates' | |
29 #end if | |
30 --visu_choice '$visualisation.visu_choice' | |
31 #if $visualisation.visu_choice == "tSNE": | |
32 --Rtsne_seed '$visualisation.Rtsne_seed' | |
33 --Rtsne_perplexity '$visualisation.Rtsne_perplexity' | |
34 --Rtsne_theta '$visualisation.Rtsne_theta' | |
35 --Rtsne_max_iter '$visualisation.Rtsne_max_iter' | |
36 --Rtsne_dims '$visualisation.Rtsne_dims' | |
37 --Rtsne_initial_dims '$visualisation.Rtsne_initial_dims' | |
38 --Rtsne_pca '$visualisation.Rtsne_pca' | |
39 --Rtsne_pca_center '$visualisation.Rtsne_pca_center' | |
40 --Rtsne_pca_scale '$visualisation.Rtsne_pca_scale' | |
41 --Rtsne_normalize '$visualisation.Rtsne_normalize' | |
42 --Rtsne_exaggeration_factor '$visualisation.Rtsne_exaggeration_factor' | |
43 #end if | |
44 | |
45 #if $visualisation.visu_choice == "HCPC": | |
46 --HCPC_ncluster '$visualisation.HCPC_ncluster' | |
47 --HCPC_npc '$visualisation.HCPC_npc' | |
48 --HCPC_metric '$visualisation.HCPC_metric' | |
49 --HCPC_method '$visualisation.HCPC_method' | |
50 --HCPC_consol '$visualisation.HCPC_consol' | |
51 --HCPC_itermax '$visualisation.HCPC_itermax' | |
52 --HCPC_min '$visualisation.HCPC_min' | |
53 --HCPC_max '$visualisation.HCPC_max' | |
54 --HCPC_clusterCA '$visualisation.HCPC_clusterCA' | |
55 --HCPC_kk '$visualisation.HCPC_kk' | |
56 #end if | |
57 | |
58 #if $visualisation.visu_choice == "PCA": | |
59 --PCA_npc '$visualisation.PCA_npc' | |
60 #end if | |
61 | |
62 | |
63 --pdf_out '$pdf_out' | |
64 | |
65 ]]></command> | |
66 <inputs> | |
67 <param name="input" type="data" format="txt,tabular" label="expression data"/> | |
68 <param name="input_sep" type="select" label="Input column separator"> | |
69 <option value="tab" selected="true">Tabs</option> | |
70 <option value=",">Comma</option> | |
71 </param> | |
72 <param name="input_header" type="select" label="Consider first line of input file as header?"> | |
73 <option value="TRUE" selected="true">Yes</option> | |
74 <option value="FALSE">No</option> | |
75 </param> | |
76 <param name="labels" type="select" label="Add sample labels to scatter plot" > | |
77 <option value="no" selected="true">No Labels</option> | |
78 <option value="yes" >Label points</option> | |
79 </param> | |
80 <conditional name="factor_condition"> | |
81 <param label="Do you wish to contrast cells with a factor" name="factor_choice" type="select"> | |
82 <option value="Yes">Yes</option> | |
83 <option value="No" selected="true">No</option> | |
84 </param> | |
85 <when value="Yes"> | |
86 <param name="factor" type="data" format="tabular" label="Factor to constrast data" | |
87 help="A two-column data frame, first column contains data labels, second column contains the levels of a factor to contrast visualisation" /> | |
88 </when> | |
89 <when value="No"> | |
90 </when> | |
91 </conditional> | |
92 <conditional name="visualisation"> | |
93 <param label="Choose visualisation method" name="visu_choice" type="select"> | |
94 <option value="PCA" selected="True">PCA</option> | |
95 <option value="HCPC">HCPC</option> | |
96 <option value="tSNE">t-SNE</option> | |
97 </param> | |
98 <when value="tSNE"> | |
99 <param name="Rtsne_seed" value="42" type="integer" label="Seed value for reproducibility of t-SNE" help="Set to 42 as default" /> | |
100 <param name="Rtsne_dims" value="2" type="integer" label="dims (t-SNE)" help="Output dimensionality (should not be greater than 3)" /> | |
101 <param name="Rtsne_pca" type="select" label="pca (t-SNE)" help="Whether an initial PCA step should be performed" > | |
102 <option value="TRUE" selected="true">Yes</option> | |
103 <option value="FALSE">False</option> | |
104 </param> | |
105 <param name="Rtsne_initial_dims" value="50" type="integer" label="initial dims (t-SNE)" help="The number of dimensions that should be retained in the initial PCA step" /> | |
106 <param name="Rtsne_pca_center" type="select" label="Centering data" help="Should data be centered before pca is applied? " > | |
107 <option value="TRUE" selected="true">Yes</option> | |
108 <option value="FALSE">False</option> | |
109 </param> | |
110 <param name="Rtsne_pca_scale" type="select" label="Scalling data" help="Should data be scaled before pca is applied? " > | |
111 <option value="TRUE">Yes</option> | |
112 <option value="FALSE" selected="true">False</option> | |
113 </param> | |
114 <param name="Rtsne_normalize" type="select" label="Normalisation of data" | |
115 help="Should variables (gene expressions) be normalized internally prior to distance calculations? " > | |
116 <option value="TRUE" selected="true">Yes</option> | |
117 <option value="FALSE">False</option> | |
118 </param> | |
119 <param name="Rtsne_perplexity" value="10.0" type="float" label="perplexity (t-SNE)" help="should be less than ((nbr observations)-1)/3" /> | |
120 <param name="Rtsne_theta" value="1.0" type="float" label="theta (t-SNE)"/> | |
121 <param name="Rtsne_exaggeration_factor" value="12.0" type="float" label="Exageration factor" help="Exaggeration factor used to multiply the P matrix in the first part of the optimization" /> | |
122 <param name="Rtsne_max_iter" value="1000" type="integer" label="Number of iterations (default: 1000)" | |
123 help="The number of iterations that Rtsne executes to improve low dim representation (gradient descent optimization)" /> | |
124 </when> | |
125 <when value="HCPC"> | |
126 <param name="HCPC_npc" value="5" type="integer" label="Number of principal components to keep" | |
127 help="The number of dimensions which are kept for HCPC analysis (default=5)" /> | |
128 <param name="HCPC_ncluster" value="-1" type="integer" label="Number of clusters in Hierar. Clustering" | |
129 help="nb.clust, the number of clusters to consider in the hierarchical clustering. (default : -1, let HCPC to optimize the number)" /> | |
130 <param name="HCPC_metric" type="select" label="Dissimilarity metric" help="Metric to be used for calculating dissimilarities between observations, available 'euclidian' or 'manhattan'? " > | |
131 <option value="euclidian" selected="true">euclidian</option> | |
132 <option value="manhattan">manhattan</option> | |
133 </param> | |
134 <param name="HCPC_method" type="select" label="Clustering method" help="Clustering method between 'ward', 'average', 'single', 'complete', 'weighted' " > | |
135 <option value="ward" selected="true">ward</option> | |
136 <option value="average">average</option> | |
137 <option value="single">single</option> | |
138 <option value="complete">complete</option> | |
139 <option value="weighted">weighted</option> | |
140 </param> | |
141 <param name="HCPC_consol" type="select" label="k-means consolidation" help="If TRUE, a k-means consolidation is performed" > | |
142 <option value="TRUE" selected="true">Yes</option> | |
143 <option value="FALSE">False</option> | |
144 </param> | |
145 <param name="HCPC_itermax" value="10" type="integer" label="Maximum number of iterations for consolidation" | |
146 help=" (default=10)" /> | |
147 <param name="HCPC_min" value="3" type="integer" label="min number of clusters" | |
148 help=" The least possible number of clusters suggested (default=3)" /> | |
149 <param name="HCPC_max" value="-1" type="text" label="max number of clusters" | |
150 help=" The higher possible number of clusters suggested, by default the minimum between 10 and the number of individuals divided by 2. (default=-1)" /> | |
151 <param name="HCPC_clusterCA" type="select" label="clusterCA, Clustering against rows or columns" help="default(rows)" > | |
152 <option value="rows" selected="true">Rows</option> | |
153 <option value="cols">Columns</option> | |
154 </param> | |
155 <param name="HCPC_kk" value="-1" type="text" label="kk, Number of clusters used in a Kmeans preprocessing " | |
156 help="No k-means consolidation is done if a kk value is provided (default=-1)" /> | |
157 </when> | |
158 <when value="PCA"> | |
159 <param name="PCA_npc" value="5" type="integer" label="Number of principal components to keep" help="The number of dimensions which are kept for PCA analysis (default=5)" /> | |
160 </when> | |
161 </conditional> | |
162 <param label="Return scatter plot table coordinates" name="coord" type="select"> | |
163 <option value="no" selected="True">No</option> | |
164 <option value="yes">Yes</option> | |
165 </param> | |
166 | |
167 </inputs> | |
168 <outputs> | |
169 <data name="pdf_out" format="pdf" label="${visualisation.visu_choice} on ${on_string}" /> | |
170 <data name="table_coordinates" format="tabular" label="Scatter plot coordinates from ${tool.name} on ${on_string}" > | |
171 <filter>coord == 'yes'</filter> | |
172 </data> | |
173 </outputs> | |
174 <tests> | |
175 <!-- test PCA --> | |
176 <test> | |
177 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
178 <param name="labels" value="yes" /> | |
179 <param name="visu_choice" value="PCA" /> | |
180 <param name="factor_choice" value="No" /> | |
181 <output name="pdf_out" file="pca.labels.pdf" ftype="pdf"/> | |
182 </test> | |
183 <test> | |
184 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
185 <param name="labels" value="no" /> | |
186 <param name="visu_choice" value="PCA" /> | |
187 <param name="factor_choice" value="No" /> | |
188 <output name="pdf_out" file="pca.nolabels.pdf" ftype="pdf"/> | |
189 </test> | |
190 <!-- test Coordinates tables on PCA --> | |
191 <test> | |
192 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
193 <param name="labels" value="no" /> | |
194 <param name="visu_choice" value="PCA" /> | |
195 <param name="coord" value="yes" /> | |
196 <param name="factor_choice" value="No" /> | |
197 <output name="pdf_out" file="pca.nolabels.pdf" ftype="pdf"/> | |
198 <output name="table_coordinates" file="pca.coord.tab" ftype="tabular"/> | |
199 </test> | |
200 <!-- test factor contrasting on PCA --> | |
201 <test> | |
202 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
203 <param name="labels" value="no" /> | |
204 <param name="visu_choice" value="PCA" /> | |
205 <param name="factor_choice" value="Yes" /> | |
206 <param name="factor" value="factor.tsv" ftype="txt"/> | |
207 <output name="pdf_out" file="pca.nolabels.factors.pdf" ftype="pdf"/> | |
208 </test> | |
209 <test> | |
210 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
211 <param name="labels" value="no" /> | |
212 <param name="visu_choice" value="PCA" /> | |
213 <param name="factor_choice" value="Yes" /> | |
214 <param name="factor" value="shuffled_factor.tsv" ftype="txt"/> | |
215 <output name="pdf_out" file="pca.nolabels.factors.pdf" ftype="pdf"/> | |
216 </test> | |
217 <!-- test HCPC --> | |
218 <test> | |
219 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
220 <param name="labels" value="yes" /> | |
221 <param name="visu_choice" value="HCPC" /> | |
222 <param name="HCPC_npc" value="5"/> | |
223 <param name="HCPC_ncluster" value="-1"/> | |
224 <output name="pdf_out" file="hcpc.labels.pdf" ftype="pdf"/> | |
225 </test> | |
226 <!-- test factor contrasting on HCPC --> | |
227 <test> | |
228 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
229 <param name="labels" value="no" /> | |
230 <param name="visu_choice" value="HCPC" /> | |
231 <param name="HCPC_npc" value="5"/> | |
232 <param name="HCPC_ncluster" value="-1"/> | |
233 <param name="factor_choice" value="Yes" /> | |
234 <param name="factor" value="factor.tsv" ftype="txt"/> | |
235 <output name="pdf_out" file="hcpc.nolabels.factor.pdf" ftype="pdf"/> | |
236 </test> | |
237 <test> | |
238 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
239 <param name="labels" value="no" /> | |
240 <param name="HCPC_npc" value="5"/> | |
241 <param name="HCPC_ncluster" value="-1"/> | |
242 <param name="visu_choice" value="HCPC" /> | |
243 <output name="pdf_out" file="hcpc.nolabels.pdf" ftype="pdf"/> | |
244 </test> | |
245 <test> | |
246 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
247 <param name="labels" value="yes" /> | |
248 <param name="visu_choice" value="HCPC" /> | |
249 <param name="coord" value="yes" /> | |
250 <param name="HCPC_method" value="average"/> | |
251 <param name="HCPC_metric" value="manhattan"/> | |
252 <param name="HCPC_npc" value="4" /> | |
253 <output name="pdf_out" file="hcpc-2.labels.pdf" ftype="pdf"/> | |
254 <output name="table_coordinates" file="hcpc-2.coord.tab" ftype="tabular"/> | |
255 </test> | |
256 <test> | |
257 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
258 <param name="labels" value="yes" /> | |
259 <param name="visu_choice" value="HCPC" /> | |
260 <param name="coord" value="yes" /> | |
261 <param name="HCPC_method" value="single"/> | |
262 <param name="HCPC_metric" value="euclidian"/> | |
263 <param name="HCPC_npc" value="4" /> | |
264 <param name="HCPC_clusterCA" value="cols" /> | |
265 <output name="pdf_out" file="hcpc-3.labels.pdf" ftype="pdf"/> | |
266 <output name="table_coordinates" file="hcpc-3.coord.tab" ftype="tabular"/> | |
267 </test> | |
268 <!-- test t-SNE --> | |
269 <test> | |
270 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
271 <param name="labels" value="yes" /> | |
272 <param name="visu_choice" value="tSNE" /> | |
273 <param name="Rtsne_seed" value="49"/> | |
274 <param name="Rtsne_perplexity" value="10"/> | |
275 <param name="Rtsne_theta" value="1" /> | |
276 <output name="pdf_out" file="tsne.labels.pdf" ftype="pdf"/> | |
277 </test> | |
278 <test> | |
279 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
280 <param name="labels" value="no" /> | |
281 <param name="visu_choice" value="tSNE" /> | |
282 <param name="Rtsne_seed" value="49"/> | |
283 <param name="Rtsne_perplexity" value="10"/> | |
284 <param name="Rtsne_theta" value="1" /> | |
285 <output name="pdf_out" file="tsne.nolabels.pdf" ftype="pdf"/> | |
286 </test> | |
287 <test> | |
288 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
289 <param name="labels" value="no" /> | |
290 <param name="visu_choice" value="tSNE" /> | |
291 <param name="coord" value="yes" /> | |
292 <param name="Rtsne_seed" value="42"/> | |
293 <param name="Rtsne_perplexity" value="5.0"/> | |
294 <param name="Rtsne_theta" value="1.0" /> | |
295 <param name="Rtsne_dims" value="3" /> | |
296 <param name="Rtsne_exaggeration_factor" value="15.0" /> | |
297 <output name="pdf_out" file="tsne-2.nolabels.pdf" ftype="pdf"/> | |
298 <output name="table_coordinates" file="tsne-2.coord.tab" ftype="tabular"/> | |
299 </test> | |
300 <!-- test factor contrasting on t-SNE --> | |
301 <test> | |
302 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
303 <param name="labels" value="yes" /> | |
304 <param name="visu_choice" value="tSNE" /> | |
305 <param name="factor_choice" value="Yes" /> | |
306 <param name="factor" value="shuffled_factor.tsv" ftype="txt"/> | |
307 <param name="Rtsne_seed" value="49"/> | |
308 <param name="Rtsne_perplexity" value="10"/> | |
309 <param name="Rtsne_theta" value="1" /> | |
310 <output name="pdf_out" file="tsne.labels.factor.pdf" ftype="pdf"/> | |
311 </test> | |
312 </tests> | |
313 <help> | |
314 | |
315 **What it does** | |
316 | |
317 Takes as an input a matrix of n observations (columns, generally n RNAseq library) of k variables | |
318 (rows, generally k genes). | |
319 | |
320 k variables define a space of k dimensions. Any observation | |
321 of k expression values for k genes (the purpose of one RNAseq experiment) can be assigned | |
322 to a position in the k-dim space, of coordinates c1, c2, c3, ..., ck. | |
323 | |
324 Since visualisation in more than 3 dimensions is not easy for a human beeing, there is | |
325 a number of methods to "reduce" or "project" a k-dim space in a space of 2 or 3 dimensions. | |
326 This is of great help, not only to summarise the data, but also to find similarities, common trends | |
327 between the data (under the hypothesis that similar data are closer in the k-dimension space). | |
328 | |
329 This tool returns the visualisation of a dimensional reduction using either: | |
330 | |
331 * Principal Components Analysis (PCA) | |
332 * Hierarchical Clustering of Principal Components (HCPC) | |
333 * t-distributed Stochastic Neighbor Embedding | |
334 | |
335 The tool returns in addition the table of the coordinates of the observations (eg RNAseq libraries) | |
336 in the low dim space, which can be used for post-treatment or to further adjust the provided visualisation. | |
337 | |
338 ** Contrast data with a factor ** | |
339 The tool offers the possibility to colour data points according to the levels of a factor. | |
340 To use the option "Factor to contrast data", provide a tabulated-separated, two-column table | |
341 with first column containing the cell/data library identifiers (same identifiers as those | |
342 provided as column headers in the input data table) and second column containing the corresponding | |
343 factor levels value. This table does not need to be sorted in the same order as in the data | |
344 table. It may also contain more identifiers than those provided in the data table. | |
345 | |
346 </help> | |
347 <citations> | |
348 <citation type="bibtex">@Article{, | |
349 title = {Visualizing High-Dimensional Data Using t-SNE}, | |
350 volume = {9}, | |
351 pages = {2579-2605}, | |
352 year = {2008}, | |
353 author = {L.J.P. {van der Maaten} and G.E. Hinton}, | |
354 journal = {Journal of Machine Learning Research}, | |
355 } | |
356 </citation> | |
357 <citation type="bibtex">@Article{, | |
358 title = {Accelerating t-SNE using Tree-Based Algorithms}, | |
359 volume = {15}, | |
360 pages = {3221-3245}, | |
361 year = {2014}, | |
362 author = {L.J.P. {van der Maaten}}, | |
363 journal = {Journal of Machine Learning Research}, | |
364 } | |
365 </citation> | |
366 <citation type="bibtex">@Manual{, | |
367 title = {{Rtsne}: T-Distributed Stochastic Neighbor Embedding using | |
368 Barnes-Hut Implementation}, | |
369 author = {Jesse H. Krijthe}, | |
370 year = {2015}, | |
371 note = {R package version 0.15}, | |
372 url = {https://github.com/jkrijthe/Rtsne}, | |
373 } | |
374 </citation> | |
375 </citations> | |
376 </tool> |