comparison high_dim_visu.xml @ 8:fe6f76030168 draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_high_dimension_visualization commit a3dc683410fc240f428c8fbee3c63aa9965fbf38
author artbio
date Wed, 29 Nov 2023 17:28:18 +0000
parents 18a1dc4aec4a
children
comparison
equal deleted inserted replaced
7:18a1dc4aec4a 8:fe6f76030168
1 <tool id="high_dimensions_visualisation" name="Generate PCA, tSNE and HCPC" version="0.9.6"> 1 <tool id="high_dimensions_visualisation" name="Generate PCA, tSNE and HCPC" version="4.3+galaxy0" profile="20.01">
2 <description>from highly dimensional expression data</description> 2 <description>from highly dimensional expression data</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="1.6.2=r35h6115d3f_0">r-optparse</requirement> 4 <requirement type="package" version="1.7.3=r43hc72bb7e_2">r-optparse</requirement>
5 <requirement type="package" version="1.42=r35h6115d3f_0">r-factominer</requirement> 5 <requirement type="package" version="2.9=r43h57805ef_0">r-factominer</requirement>
6 <requirement type="package" version="1.0.5">r-factoextra</requirement> 6 <requirement type="package" version="1.0.7=r43hc72bb7e_3">r-factoextra</requirement>
7 <requirement type="package" version="0.15=r351he1b5a44_0">r-rtsne</requirement> 7 <requirement type="package" version="0.16=r43h7ce84a7_2">r-rtsne</requirement>
8 <requirement type="package" version="0.4.7=r351h6115d3f_0">r-ggfortify</requirement> 8 <requirement type="package" version="0.4.16=r43hc72bb7e_1">r-ggfortify</requirement>
9 <requirement type="package" version="1.1.9=r351h0357c0b_0">r-clusterr</requirement> 9 <requirement type="package" version="1.3.1=r43h08d816e_1">r-clusterr</requirement>
10 <requirement type="package" version="1.2.5=r35h6115d3f_0">r-polychrome</requirement> 10 <requirement type="package" version="1.5.1=r43hc72bb7e_2">r-polychrome</requirement>
11 </requirements> 11 </requirements>
12 <stdio> 12 <stdio>
13 <exit_code range="1:" level="fatal" description="Tool exception" /> 13 <exit_code range="1:" level="fatal" description="Tool exception" />
14 </stdio> 14 </stdio>
15 <command detect_errors="exit_code"><![CDATA[ 15 <command detect_errors="exit_code"><![CDATA[
16 Rscript $__tool_directory__/high_dim_visu.R 16 Rscript $__tool_directory__/high_dim_visu.R
17 --data '$input' 17 --data '$input'
18 --sep '$input_sep'
19 --colnames '$input_header'
20 #if $factor_condition.factor_choice == 'Yes' 18 #if $factor_condition.factor_choice == 'Yes'
21 --factor '$factor_condition.factor' 19 --factor '$factor_condition.factor'
22 #end if 20 #end if
23 #if $labels == "yes": 21 #if $labels == "yes":
24 --labels 'TRUE' 22 --labels 'TRUE'
25 #else 23 #else
26 --labels 'FALSE' 24 --labels 'FALSE'
27 #end if 25 #end if
28 #if $coord == "yes":
29 --table_coordinates '$table_coordinates'
30 #end if
31 --visu_choice '$visualisation.visu_choice' 26 --visu_choice '$visualisation.visu_choice'
27
32 #if $visualisation.visu_choice == "tSNE": 28 #if $visualisation.visu_choice == "tSNE":
33 --Rtsne_seed '$visualisation.Rtsne_seed' 29 --Rtsne_seed '$visualisation.Rtsne_seed'
34 --Rtsne_perplexity '$visualisation.Rtsne_perplexity' 30 --Rtsne_perplexity '$visualisation.Rtsne_perplexity'
35 --Rtsne_theta '$visualisation.Rtsne_theta' 31 --Rtsne_theta '$visualisation.Rtsne_theta'
36 --Rtsne_max_iter '$visualisation.Rtsne_max_iter' 32 --Rtsne_max_iter '$visualisation.Rtsne_max_iter'
52 --HCPC_itermax '$visualisation.HCPC_itermax' 48 --HCPC_itermax '$visualisation.HCPC_itermax'
53 --HCPC_min '$visualisation.HCPC_min' 49 --HCPC_min '$visualisation.HCPC_min'
54 --HCPC_max '$visualisation.HCPC_max' 50 --HCPC_max '$visualisation.HCPC_max'
55 --HCPC_clusterCA '$visualisation.HCPC_clusterCA' 51 --HCPC_clusterCA '$visualisation.HCPC_clusterCA'
56 --HCPC_kk '$visualisation.HCPC_kk' 52 --HCPC_kk '$visualisation.HCPC_kk'
57 --HCPC_cluster_description '$HCPC_cluster_description' 53 --HCPC_contributions '$HCPC_contributions'
58 #if $visualisation.res_clustering == "yes": 54 --HCPC_cell_clust '$HCPC_cell_clust'
59 --HCPC_clust '$HCPC_clust' 55 #if $factor_condition.factor_choice == "Yes":
60 #end if 56 --HCPC_mutual_info '$HCPC_mutual_info'
57 #end if
61 #end if 58 #end if
62 59
63 #if $visualisation.visu_choice == "PCA": 60 #if $visualisation.visu_choice == "PCA":
64 --PCA_npc '$visualisation.PCA_npc' 61 --PCA_npc '$visualisation.PCA_npc'
65 --PCA_x_axis '$visualisation.PCA_x_axis' 62 --x_axis '$visualisation.x_axis'
66 --PCA_y_axis '$visualisation.PCA_y_axis' 63 --y_axis '$visualisation.y_axis'
64 --item_size '$visualisation.item_size'
67 #end if 65 #end if
68
69 #if $visualisation.visu_choice == "HCPC" and $factor_condition.factor_choice == "Yes":
70 --HCPC_mutual_info '$HCPC_mutual_info'
71 #end if
72
73 --pdf_out '$pdf_out' 66 --pdf_out '$pdf_out'
74
75 ]]></command> 67 ]]></command>
76 <inputs> 68 <inputs>
77 <param name="input" type="data" format="txt,tabular" label="expression data"/> 69 <param name="input" type="data" format="txt,tabular" label="expression data"/>
78 <param name="input_sep" type="select" label="Input column separator">
79 <option value="tab" selected="true">Tabs</option>
80 <option value=",">Comma</option>
81 </param>
82 <param name="input_header" type="select" label="Consider first line of input file as header?">
83 <option value="TRUE" selected="true">Yes</option>
84 <option value="FALSE">No</option>
85 </param>
86 <param name="labels" type="select" label="Add sample labels to scatter plot" > 70 <param name="labels" type="select" label="Add sample labels to scatter plot" >
87 <option value="no" selected="true">No Labels</option> 71 <option value="no" selected="true">No Labels</option>
88 <option value="yes" >Label points</option> 72 <option value="yes" >Label points</option>
89 </param> 73 </param>
90 <conditional name="factor_condition"> 74 <conditional name="factor_condition">
92 <option value="Yes">Yes</option> 76 <option value="Yes">Yes</option>
93 <option value="No" selected="true">No</option> 77 <option value="No" selected="true">No</option>
94 </param> 78 </param>
95 <when value="Yes"> 79 <when value="Yes">
96 <param name="factor" type="data" format="tabular" label="Factor to constrast data" 80 <param name="factor" type="data" format="tabular" label="Factor to constrast data"
97 help="A two-column data frame, first column contains data labels, second column contains the levels of a factor to contrast visualisation" /> 81 help="A two-column data frame, with column headers, first column contains data labels,
82 second column contains the levels of a factor to contrast visualisation" />
98 </when> 83 </when>
99 <when value="No"> 84 <when value="No">
100 </when> 85 </when>
101 </conditional> 86 </conditional>
102 <conditional name="visualisation"> 87 <conditional name="visualisation">
104 <option value="PCA" selected="True">PCA</option> 89 <option value="PCA" selected="True">PCA</option>
105 <option value="HCPC">HCPC</option> 90 <option value="HCPC">HCPC</option>
106 <option value="tSNE">t-SNE</option> 91 <option value="tSNE">t-SNE</option>
107 </param> 92 </param>
108 <when value="tSNE"> 93 <when value="tSNE">
109 <param name="Rtsne_seed" value="42" type="integer" label="Seed value for reproducibility of t-SNE" help="Set to 42 as default" /> 94 <param name="Rtsne_seed" value="42" type="integer" label="Seed value for reproducibility of t-SNE" help="Set to 42 as default"/>
110 <param name="Rtsne_dims" value="2" type="integer" label="dims (t-SNE)" help="Output dimensionality (should not be greater than 3)" /> 95 <param name="Rtsne_dims" value="2" type="integer" label="dims (t-SNE)" help="Output dimensionality (should not be greater than 3)"/>
111 <param name="Rtsne_pca" type="select" label="pca (t-SNE)" help="Whether an initial PCA step should be performed" > 96 <param name="Rtsne_pca" type="select" label="pca (t-SNE)" help="Whether an initial PCA step should be performed" >
112 <option value="TRUE" selected="true">Yes</option> 97 <option value="TRUE" selected="true">Yes</option>
113 <option value="FALSE">False</option> 98 <option value="FALSE">False</option>
114 </param> 99 </param>
115 <param name="Rtsne_initial_dims" value="50" type="integer" label="initial dims (t-SNE)" help="The number of dimensions that should be retained in the initial PCA step" /> 100 <param name="Rtsne_initial_dims" value="50" type="integer" label="initial dims (t-SNE)"
116 <param name="Rtsne_pca_center" type="select" label="Centering data" help="Should data be centered before pca is applied? " > 101 help="The number of dimensions that should be retained in the initial PCA step"/>
117 <option value="TRUE" selected="true">Yes</option> 102 <param name="Rtsne_pca_center" type="select" label="Centering data" help="Should data be centered before pca is applied?">
118 <option value="FALSE">False</option> 103 <option value="TRUE" selected="true">Yes</option>
119 </param> 104 <option value="FALSE">False</option>
120 <param name="Rtsne_pca_scale" type="select" label="Scalling data" help="Should data be scaled before pca is applied? " > 105 </param>
121 <option value="TRUE">Yes</option> 106 <param name="Rtsne_pca_scale" type="select" label="Scalling data" help="Should data be scaled before pca is applied?">
122 <option value="FALSE" selected="true">False</option> 107 <option value="TRUE">Yes</option>
123 </param> 108 <option value="FALSE" selected="true">False</option>
124 <param name="Rtsne_normalize" type="select" label="Normalisation of data" 109 </param>
125 help="Should variables (gene expressions) be normalized internally prior to distance calculations? " > 110 <param name="Rtsne_normalize" type="select" label="Normalisation of data"
126 <option value="TRUE" selected="true">Yes</option> 111 help="Should variables (gene expressions) be normalized internally prior to distance calculations?">
127 <option value="FALSE">False</option> 112 <option value="TRUE" selected="true">Yes</option>
128 </param> 113 <option value="FALSE">False</option>
129 <param name="Rtsne_perplexity" value="10.0" type="float" label="perplexity (t-SNE)" help="should be less than ((nbr observations)-1)/3" /> 114 </param>
115 <param name="Rtsne_perplexity" value="10.0" type="float" label="perplexity (t-SNE)" help="should be less than ((nbr observations)-1)/3"/>
130 <param name="Rtsne_theta" value="1.0" type="float" label="theta (t-SNE)"/> 116 <param name="Rtsne_theta" value="1.0" type="float" label="theta (t-SNE)"/>
131 <param name="Rtsne_exaggeration_factor" value="12.0" type="float" label="Exageration factor" help="Exaggeration factor used to multiply the P matrix in the first part of the optimization" /> 117 <param name="Rtsne_exaggeration_factor" value="12.0" type="float" label="Exageration factor"
118 help="Exaggeration factor used to multiply the P matrix in the first part of the optimization"/>
132 <param name="Rtsne_max_iter" value="1000" type="integer" label="Number of iterations (default: 1000)" 119 <param name="Rtsne_max_iter" value="1000" type="integer" label="Number of iterations (default: 1000)"
133 help="The number of iterations that Rtsne executes to improve low dim representation (gradient descent optimization)" /> 120 help="The number of iterations that Rtsne executes to improve low dim representation (gradient descent optimization)"/>
134 </when> 121 </when>
135 <when value="HCPC"> 122 <when value="HCPC">
136 <param name="HCPC_npc" value="5" type="integer" label="Number of principal components to keep" 123 <param name="HCPC_npc" value="5" type="integer" label="Number of principal components to keep"
137 help="The number of dimensions which are kept for HCPC analysis (default=5)" /> 124 help="The number of dimensions which are kept for HCPC analysis (default=5)"/>
138 <param name="HCPC_ncluster" value="-1" type="integer" label="Number of clusters in Hierar. Clustering" 125 <param name="HCPC_ncluster" value="-1" type="integer" label="Number of clusters in Hierar. Clustering"
139 help="nb.clust - an integer. If 0, the tree is cut at the level the user clicks on (not working in Galaxy). If -1, the tree is 126 help="nb.clust - an integer. If 0, the tree is cut at the level the user clicks on (not working in Galaxy). If -1, the tree is
140 automatically cut at the suggested level (see details). If a (positive) integer, the tree is cut with nb.cluters clusters." /> 127 automatically cut at the suggested level (see details). If a (positive) integer, the tree is cut with nb.cluters clusters."/>
141 <param name="HCPC_metric" type="select" label="Dissimilarity metric" help="Metric to be used for calculating dissimilarities between observations, can be 'euclidean' or 'manhattan' " > 128 <param name="HCPC_metric" type="select" label="Dissimilarity metric"
142 <option value="euclidean" selected="true">euclidean</option> 129 help="Metric to be used for calculating dissimilarities between observations, can be 'euclidean' or 'manhattan'">
143 <option value="manhattan">manhattan</option> 130 <option value="euclidean" selected="true">euclidean</option>
144 </param> 131 <option value="manhattan">manhattan</option>
145 <param name="HCPC_method" type="select" label="Clustering method" help="character string defining the clustering method. 132 </param>
146 The four methods implemented are 'average' ([unweighted pair-]group [arithMetic] average method, aka ‘UPGMA’), 133 <param name="HCPC_method" type="select" label="Clustering method"
147 'single' (single linkage), 'complete' (complete linkage), and 'ward' (Ward's method). 134 help="character string defining the clustering method. The four methods implemented are 'average'
148 The default with this Galaxy tool is is 'ward'." > 135 ([unweighted pair-]group [arithMetic] average method, aka ‘UPGMA’), 'single' (single linkage), 'complete'
149 <option value="ward" selected="true">ward</option> 136 (complete linkage), and 'ward' (Ward's method). The default with this Galaxy tool is is 'ward'.">
150 <option value="average">average</option> 137 <option value="ward" selected="true">ward</option>
151 <option value="single">single</option> 138 <option value="average">average</option>
152 <option value="complete">complete</option> 139 <option value="single">single</option>
153 </param> 140 <option value="complete">complete</option>
154 <param name="HCPC_consol" type="select" label="k-means consolidation" help="a boolean. If TRUE, a k-means consolidation is performed 141 </param>
155 (consolidation cannot be performed if kk is used and equals a number)." > 142 <param name="HCPC_consol" type="select" label="k-means consolidation"
156 <option value="TRUE" selected="true">Yes</option> 143 help="A boolean. If TRUE, a k-means consolidation is performed (consolidation cannot be performed
157 <option value="FALSE">False</option> 144 if kk is used and equals a number).">
158 </param> 145 <option value="TRUE" selected="true">Yes</option>
159 <param name="HCPC_itermax" value="10" type="integer" label="Maximum number of iterations for consolidation" 146 <option value="FALSE">False</option>
160 help="An integer. The maximum number of iterations for the consolidation. (default=10)" /> 147 </param>
148 <param name="HCPC_itermax" value="10" type="integer" label="Maximum number of iterations for consolidation"
149 help="An integer. The maximum number of iterations for the consolidation. (default=10)"/>
161 <param name="HCPC_min" value="3" type="integer" label="min number of clusters" 150 <param name="HCPC_min" value="3" type="integer" label="min number of clusters"
162 help="an integer. The least possible number of clusters suggested. (default=3)" /> 151 help="an integer. The least possible number of clusters suggested. (default=3)"/>
163 <param name="HCPC_max" value="-1" type="text" label="max number of clusters" 152 <param name="HCPC_max" value="-1" type="text" label="max number of clusters"
164 help="The higher possible number of clusters suggested, by default the minimum between 10 and the number of individuals divided by 2. (default=-1)" /> 153 help="The higher possible number of clusters suggested, by default the minimum between 10
165 <param name="HCPC_clusterCA" type="select" label="cluster.CA, Clustering against rows or columns" 154 and the number of individuals divided by 2. (default = NULL)"/>
166 help="A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results.default(rows)"> 155 <param name="HCPC_clusterCA" type="select" label="cluster.CA, Clustering against rows or columns"
167 <option value="rows" selected="true">Rows</option> 156 help="A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results.default(rows)">
168 <option value="cols">Columns</option> 157 <option value="rows" selected="true">Rows</option>
169 </param> 158 <option value="cols">Columns</option>
170 <param name="HCPC_kk" value="Inf" type="text" label="kk, Number of clusters used in a Kmeans preprocessing " 159 </param>
160 <param name="HCPC_kk" value="Inf" type="text" label="kk, Number of clusters used in a Kmeans preprocessing"
171 help="An integer corresponding to the number of clusters used in a Kmeans preprocessing before the 161 help="An integer corresponding to the number of clusters used in a Kmeans preprocessing before the
172 hierarchical clustering; the top of the hierarchical tree is then constructed from this partition. 162 hierarchical clustering; the top of the hierarchical tree is then constructed from this partition.
173 This is very useful if the number of individuals is high. Note that consolidation cannot be performed 163 This is very useful if the number of individuals is high. Note that consolidation cannot be performed
174 if kk is different from Inf and some graphics are not drawn. Inf is used by default and no preprocessing 164 if kk is different from Inf and some graphics are not drawn. Inf is used by default and no preprocessing
175 is done, all the graphical outputs are then given." /> 165 is done, all the graphical outputs are then given."/>
176 <param label="Return HCPC clustering table" name="res_clustering" type="select">
177 <option value="no" selected="True">No</option>
178 <option value="yes">Yes</option>
179 </param>
180 </when> 166 </when>
181 <when value="PCA"> 167 <when value="PCA">
182 <param name="PCA_npc" value="5" type="integer" label="Number of principal components to keep" help="The number of dimensions which are kept for PCA analysis (default=5)" /> 168 <param name="PCA_npc" value="5" type="integer" label="Number of principal components to keep"
183 <param name="PCA_x_axis" value="1" type="integer" label="First principal component to plot" help="X axis for PCA plot (default=1)" /> 169 help="The number of dimensions which are kept for PCA analysis (default=5)"/>
184 <param name="PCA_y_axis" value="2" type="integer" label="Second principal component to plot" help="Y axis for PCA plot (default=2)" /> 170 <param name="item_size" value="1" type="float" label="Adjust size of points/labels in PCA graph" help="size of points/labels (default=1)"/>
171 <param name="x_axis" value="1" type="integer" label="Principal component to plot to x axis" help="PC to plot as x (default=1)"/>
172 <param name="y_axis" value="2" type="integer" label="Principal component to plot to y axis" help="PCA plot as y (default=2)"/>
185 </when> 173 </when>
186 </conditional> 174 </conditional>
187 <param label="Return scatter plot table coordinates" name="coord" type="select">
188 <option value="no" selected="True">No</option>
189 <option value="yes">Yes</option>
190 </param>
191
192 </inputs> 175 </inputs>
193 <outputs> 176 <outputs>
194 <data name="pdf_out" format="pdf" label="${visualisation.visu_choice} of ${on_string}" /> 177 <data name="pdf_out" format="pdf" label="${visualisation.visu_choice} of ${on_string}"/>
195 <data name="table_coordinates" format="tabular" label="Scatter plot coordinates from ${visualisation.visu_choice} of ${on_string}" > 178 <data name="HCPC_cell_clust" format="tabular" label="Clustering table from ${visualisation.visu_choice} of ${on_string}">
196 <filter>coord == 'yes'</filter> 179 <filter>visualisation['visu_choice'] == 'HCPC'</filter>
197 </data> 180 </data>
198 <data name="HCPC_mutual_info" format="txt" label="External validation of clustering from ${visualisation.visu_choice} of ${on_string}" > 181 <data name="HCPC_contributions" format="tabular" label="Cluster information from ${visualisation.visu_choice}">
182 <filter>visualisation['visu_choice'] == 'HCPC'</filter>
183 </data>
184 <data name="HCPC_mutual_info" format="txt" label="External validation of clustering from ${visualisation.visu_choice} of ${on_string}">
199 <filter>visualisation['visu_choice'] == 'HCPC' and factor_condition['factor_choice'] == 'Yes'</filter> 185 <filter>visualisation['visu_choice'] == 'HCPC' and factor_condition['factor_choice'] == 'Yes'</filter>
200 </data>
201 <data name="HCPC_clust" format="tabular" label="Clustering table from ${visualisation.visu_choice} of ${on_string}" >
202 <filter>visualisation['visu_choice'] == 'HCPC' and visualisation['res_clustering'] == 'yes'</filter>
203 </data>
204 <data name="HCPC_cluster_description" format="tabular" label="Cluster information from ${visualisation.visu_choice}" >
205 <filter>visualisation['visu_choice'] == 'HCPC' </filter>
206 </data> 186 </data>
207 </outputs> 187 </outputs>
208 <tests> 188 <tests>
209 <!-- test first (for developpers) --> 189 <!-- test tSNE -->
210 <!-- test PCA --> 190 <test expect_num_outputs="1">
211 <test>
212 <param name="input" value="cpm_input.tsv" ftype="txt"/>
213 <param name="labels" value="yes" />
214 <param name="visu_choice" value="PCA" />
215 <param name="factor_choice" value="No" />
216 <output name="pdf_out" file="pca.labels.pdf" ftype="pdf"/>
217 </test>
218 <test>
219 <param name="input" value="cpm_input.tsv" ftype="txt"/>
220 <param name="labels" value="no" />
221 <param name="visu_choice" value="PCA" />
222 <param name="factor_choice" value="No" />
223 <output name="pdf_out" file="pca.nolabels.pdf" ftype="pdf"/>
224 </test>
225 <!-- test PCA PC2 vs PC3 -->
226 <test>
227 <param name="input" value="cpm_input.tsv" ftype="txt"/>
228 <param name="labels" value="no" />
229 <param name="visu_choice" value="PCA" />
230 <param name="factor_choice" value="No" />
231 <param name="PCA_x_axis" value="2" />
232 <param name="PCA_y_axis" value="3" />
233 <output name="pdf_out" file="pca.2vs3.pdf" ftype="pdf"/>
234 </test>
235
236 <!-- test Coordinates tables on PCA -->
237 <test>
238 <param name="input" value="cpm_input.tsv" ftype="txt"/>
239 <param name="labels" value="no" />
240 <param name="visu_choice" value="PCA" />
241 <param name="coord" value="yes" />
242 <param name="factor_choice" value="No" />
243 <output name="pdf_out" file="pca.nolabels.pdf" ftype="pdf"/>
244 <output name="table_coordinates" file="pca.coord.tab" ftype="tabular"/>
245 </test>
246 <!-- test factor contrasting on PCA -->
247 <test>
248 <param name="input" value="cpm_input.tsv" ftype="txt"/>
249 <param name="labels" value="no" />
250 <param name="visu_choice" value="PCA" />
251 <param name="factor_choice" value="Yes" />
252 <param name="factor" value="factor.tsv" ftype="txt"/>
253 <output name="pdf_out" file="pca.nolabels.factors.pdf" ftype="pdf"/>
254 </test>
255 <!-- test two-level factor contrasting on PCA -->
256 <test>
257 <param name="input" value="cpm_input.tsv" ftype="txt"/>
258 <param name="labels" value="no" />
259 <param name="visu_choice" value="PCA" />
260 <param name="factor_choice" value="Yes" />
261 <param name="factor" value="2-lev_factor.tsv" ftype="txt"/>
262 <output name="pdf_out" file="pca.nolabels.2-lev-factor.pdf" compare="sim_size" ftype="pdf"/>
263 </test>
264 <!-- test numerical factor contrasting on PCA -->
265 <test>
266 <param name="input" value="cpm_input.tsv" ftype="txt"/>
267 <param name="labels" value="no" />
268 <param name="visu_choice" value="PCA" />
269 <param name="factor_choice" value="Yes" />
270 <param name="factor" value="numeric_factor.tsv" ftype="txt"/>
271 <output name="pdf_out" file="pca.nolabels.numerical-factor.pdf" compare="sim_size" ftype="pdf"/>
272 </test>
273 <test>
274 <param name="input" value="cpm_input.tsv" ftype="txt"/>
275 <param name="labels" value="no" />
276 <param name="visu_choice" value="PCA" />
277 <param name="factor_choice" value="Yes" />
278 <param name="factor" value="shuffled_factor.tsv" ftype="txt"/>
279 <output name="pdf_out" file="pca.nolabels.factors.pdf" compare="sim_size" ftype="pdf"/>
280 </test>
281 <!-- test HCPC -->
282 <test>
283 <param name="input" value="cpm_input.tsv" ftype="txt"/>
284 <param name="labels" value="yes" />
285 <param name="visu_choice" value="HCPC" />
286 <param name="HCPC_npc" value="5"/>
287 <param name="HCPC_ncluster" value="-1"/>
288 <output name="pdf_out" file="hcpc.labels.pdf" compare="sim_size" ftype="pdf"/>
289 <output name="HCPC_cluster_description" file="hcpc.cluster_description.1.tab" ftype="tabular"/>
290 </test>
291 <!-- test factor contrasting on HCPC -->
292 <test>
293 <param name="input" value="cpm_input.tsv" ftype="txt"/>
294 <param name="labels" value="no" />
295 <param name="visu_choice" value="HCPC" />
296 <param name="HCPC_npc" value="5"/>
297 <param name="HCPC_ncluster" value="-1"/>
298 <param name="res_clustering" value="yes"/>
299 <param name="factor_choice" value="Yes" />
300 <param name="factor" value="factor.tsv" ftype="txt"/>
301 <output name="pdf_out" file="hcpc.nolabels.factor.pdf" compare="sim_size" ftype="pdf"/>
302 <output name="HCPC_mutual_info" file="hcpc.factor.extval.txt" ftype="txt"/>
303 <output name="HCPC_clust" file="hcpc.clusters.tab" ftype="tabular"/>
304 <output name="HCPC_cluster_description" file="hcpc.cluster_description.1.tab" ftype="tabular"/>
305 </test>
306 <test>
307 <param name="input" value="cpm_input.tsv" ftype="txt"/>
308 <param name="labels" value="no" />
309 <param name="HCPC_npc" value="5"/>
310 <param name="HCPC_ncluster" value="-1"/>
311 <param name="visu_choice" value="HCPC" />
312 <output name="pdf_out" file="hcpc.nolabels.pdf" compare="sim_size" ftype="pdf"/>
313 <output name="HCPC_cluster_description" file="hcpc.cluster_description.1.tab" ftype="tabular"/>
314 </test>
315 <test>
316 <param name="input" value="cpm_input.tsv" ftype="txt"/>
317 <param name="labels" value="yes" />
318 <param name="visu_choice" value="HCPC" />
319 <param name="coord" value="yes" />
320 <param name="HCPC_method" value="average"/>
321 <param name="HCPC_metric" value="manhattan"/>
322 <param name="HCPC_npc" value="4" />
323 <output name="pdf_out" file="hcpc-2.labels.pdf" ftype="pdf"/>
324 <output name="table_coordinates" file="hcpc-2.coord.tab" ftype="tabular"/>
325 <output name="HCPC_cluster_description" file="hcpc.cluster_description.4.tab" ftype="tabular"/>
326 </test>
327 <test>
328 <param name="input" value="cpm_input.tsv" ftype="txt"/>
329 <param name="labels" value="yes" />
330 <param name="visu_choice" value="HCPC" />
331 <param name="coord" value="yes" />
332 <param name="HCPC_method" value="single"/>
333 <param name="HCPC_metric" value="euclidean"/>
334 <param name="HCPC_npc" value="4" />
335 <param name="HCPC_clusterCA" value="cols" />
336 <output name="pdf_out" file="hcpc-3.labels.pdf" compare="sim_size" ftype="pdf"/>
337 <output name="table_coordinates" file="hcpc-3.coord.tab" ftype="tabular"/>
338 <output name="HCPC_cluster_description" file="hcpc.cluster_description.5.tab" ftype="tabular"/>
339 </test>
340 <!-- test t-SNE -->
341 <test>
342 <param name="input" value="cpm_input.tsv" ftype="txt"/> 191 <param name="input" value="cpm_input.tsv" ftype="txt"/>
343 <param name="labels" value="yes" /> 192 <param name="labels" value="yes" />
344 <param name="visu_choice" value="tSNE" /> 193 <param name="visu_choice" value="tSNE" />
345 <param name="Rtsne_seed" value="49"/> 194 <param name="Rtsne_seed" value="49"/>
346 <param name="Rtsne_perplexity" value="10"/> 195 <param name="Rtsne_perplexity" value="10"/>
347 <param name="Rtsne_theta" value="1" /> 196 <param name="Rtsne_theta" value="1" />
348 <output name="pdf_out" file="tsne.labels.pdf" ftype="pdf" compare="sim_size" delta="500"/> 197 <output name="pdf_out" file="tsne.1.pdf" ftype="pdf" compare="sim_size" delta="500"/>
349 </test> 198 </test>
350 <test> 199 <test expect_num_outputs="1">
351 <param name="input" value="cpm_input.tsv" ftype="txt"/> 200 <param name="input" value="cpm_input.tsv" ftype="txt"/>
352 <param name="labels" value="no" /> 201 <param name="labels" value="no" />
353 <param name="visu_choice" value="tSNE" /> 202 <param name="visu_choice" value="tSNE" />
203 <param name="factor_choice" value="Yes" />
204 <param name="factor" value="numeric_factor.tsv" ftype="txt"/>
354 <param name="Rtsne_seed" value="49"/> 205 <param name="Rtsne_seed" value="49"/>
355 <param name="Rtsne_perplexity" value="10"/> 206 <param name="Rtsne_perplexity" value="10"/>
356 <param name="Rtsne_theta" value="1" /> 207 <param name="Rtsne_theta" value="1" />
357 <output name="pdf_out" file="tsne.nolabels.pdf" ftype="pdf" compare="sim_size" delta="500"/> 208 <output name="pdf_out" file="tsne.2.pdf" ftype="pdf" compare="sim_size" delta="500"/>
358 </test> 209 </test>
359 <!-- test factor contrasting on t-SNE --> 210 <test expect_num_outputs="1">
360 <test>
361 <param name="input" value="cpm_input.tsv" ftype="txt"/> 211 <param name="input" value="cpm_input.tsv" ftype="txt"/>
362 <param name="labels" value="yes" /> 212 <param name="labels" value="yes" />
363 <param name="visu_choice" value="tSNE" /> 213 <param name="visu_choice" value="tSNE" />
364 <param name="factor_choice" value="Yes" /> 214 <param name="factor_choice" value="Yes" />
365 <param name="factor" value="shuffled_factor.tsv" ftype="txt"/> 215 <param name="factor" value="shuffled_factor.tsv" ftype="txt"/>
366 <param name="Rtsne_seed" value="49"/> 216 <param name="Rtsne_seed" value="49"/>
367 <param name="Rtsne_perplexity" value="10"/> 217 <param name="Rtsne_perplexity" value="10"/>
368 <param name="Rtsne_theta" value="1" /> 218 <param name="Rtsne_theta" value="1" />
369 <output name="pdf_out" file="tsne.labels.factor.pdf" ftype="pdf" compare="sim_size" delta="500"/> 219 <output name="pdf_out" file="tsne.3.pdf" ftype="pdf" compare="sim_size" delta="500"/>
370 </test> 220 </test>
371 <test> 221 <test expect_num_outputs="1">
372 <param name="input" value="cpm_input.tsv" ftype="txt"/> 222 <param name="input" value="cpm_input.tsv" ftype="txt"/>
373 <param name="labels" value="no" /> 223 <param name="labels" value="no" />
374 <param name="visu_choice" value="tSNE" /> 224 <param name="visu_choice" value="tSNE" />
375 <param name="Rtsne_seed" value="49" /> 225 <param name="Rtsne_seed" value="49" />
376 <param name="coord" value="yes" />
377 <param name="Rtsne_dims" value="3" /> 226 <param name="Rtsne_dims" value="3" />
378 <param name="Rtsne_perplexity" value="10"/> 227 <param name="Rtsne_perplexity" value="10"/>
379 <param name="Rtsne_theta" value="1" /> 228 <param name="Rtsne_theta" value="1" />
380 <param name="Rtsne_normalize" value="FALSE" /> 229 <param name="Rtsne_normalize" value="FALSE" />
381 <output name="pdf_out" file="tsne-2.nolabels.pdf" ftype="pdf" compare="sim_size" delta="1000"/> 230 <output name="pdf_out" file="tsne.4.pdf" ftype="pdf" compare="sim_size" delta="1000"/>
382 <output name="table_coordinates" file="tsne-2.coord.tab" ftype="tabular" compare="sim_size" delta="1000"/> 231 </test>
232 <!-- test PCA -->
233 <test expect_num_outputs="1">
234 <param name="input" value="cpm_input.tsv" ftype="txt"/>
235 <param name="labels" value="yes" />
236 <param name="visu_choice" value="PCA" />
237 <param name="factor_choice" value="No" />
238 <param name="item_size" value="0.5" />
239 <output name="pdf_out" file="pca.1.pdf" ftype="pdf"/>
240 </test>
241 <!-- test PCA PC2 vs PC3 -->
242 <test expect_num_outputs="1">
243 <param name="input" value="cpm_input.tsv" ftype="txt"/>
244 <param name="labels" value="no" />
245 <param name="visu_choice" value="PCA" />
246 <param name="factor_choice" value="No" />
247 <param name="x_axis" value="2" />
248 <param name="y_axis" value="3" />
249 <output name="pdf_out" file="pca.2.pdf" ftype="pdf"/>
250 </test>
251 <!-- test factor contrasting on PCA -->
252 <test expect_num_outputs="1">
253 <param name="input" value="cpm_input.tsv" ftype="txt"/>
254 <param name="labels" value="no" />
255 <param name="visu_choice" value="PCA" />
256 <param name="factor_choice" value="Yes" />
257 <param name="factor" value="factor.tsv" ftype="txt"/>
258 <output name="pdf_out" file="pca.3.pdf" ftype="pdf"/>
259 </test>
260 <!-- test numerical factor contrasting on PCA -->
261 <test expect_num_outputs="1">
262 <param name="input" value="cpm_input.tsv" ftype="txt"/>
263 <param name="labels" value="no" />
264 <param name="visu_choice" value="PCA" />
265 <param name="factor_choice" value="Yes" />
266 <param name="factor" value="numeric_factor.tsv" ftype="txt"/>
267 <output name="pdf_out" file="pca.4.pdf" compare="sim_size" ftype="pdf"/>
268 </test>
269 <test expect_num_outputs="1">
270 <param name="input" value="cpm_input.tsv" ftype="txt"/>
271 <param name="labels" value="no" />
272 <param name="visu_choice" value="PCA" />
273 <param name="factor_choice" value="Yes" />
274 <param name="factor" value="shuffled_factor.tsv" ftype="txt"/>
275 <output name="pdf_out" file="pca.5.pdf" compare="sim_size" ftype="pdf"/>
276 </test>
277 <!-- HCPC tests -->
278 <test expect_num_outputs="3">
279 <param name="input" value="cpm_input.tsv" ftype="txt"/>
280 <param name="labels" value="yes" />
281 <param name="visu_choice" value="HCPC" />
282 <param name="HCPC_npc" value="5"/>
283 <param name="HCPC_ncluster" value="-1"/>
284 <output name="pdf_out" file="hcpc.1.pdf" compare="sim_size" ftype="pdf"/>
285 <output name="HCPC_cell_clust" file="hcpc.cell-cluster.1.tsv" ftype="tabular"/>
286 <output name="HCPC_contributions" file="hcpc.component-impact.1.tsv" ftype="tabular"/>
287 </test>
288 <test expect_num_outputs="4">
289 <param name="input" value="cpm_input.tsv" ftype="txt"/>
290 <param name="labels" value="no" />
291 <param name="visu_choice" value="HCPC" />
292 <param name="HCPC_npc" value="5"/>
293 <param name="HCPC_ncluster" value="-1"/>
294 <param name="factor_choice" value="Yes" />
295 <param name="factor" value="factor.tsv" ftype="txt"/>
296 <output name="pdf_out" file="hcpc.2.pdf" compare="sim_size" ftype="pdf"/>
297 <output name="HCPC_mutual_info" file="hcpc.factor.extval.txt" ftype="txt"/>
298 <output name="HCPC_cell_clust" file="hcpc.cell-cluster.2.tsv" ftype="tabular"/>
299 <output name="HCPC_contributions" file="hcpc.component-impact.2.tsv" ftype="tabular"/>
300 </test>
301 <test expect_num_outputs="4">
302 <param name="input" value="cpm_input.tsv" ftype="txt"/>
303 <param name="labels" value="yes" />
304 <param name="visu_choice" value="HCPC" />
305 <param name="factor_choice" value="Yes" />
306 <param name="factor" value="factor.tsv" ftype="txt"/>
307 <param name="HCPC_method" value="average"/>
308 <param name="HCPC_metric" value="manhattan"/>
309 <param name="HCPC_npc" value="4" />
310 <output name="pdf_out" file="hcpc.3.pdf" ftype="pdf"/>
311 <output name="HCPC_mutual_info" file="hcpc.extval.1.txt" ftype="txt"/>
312 <output name="HCPC_cell_clust" file="hcpc.cell-cluster.3.tsv" ftype="tabular"/>
313 <output name="HCPC_contributions" file="hcpc.component-impact.3.tsv" ftype="tabular"/>
314 </test>
315 <test expect_num_outputs="3">
316 <param name="input" value="cpm_input.tsv" ftype="txt"/>
317 <param name="labels" value="no" />
318 <param name="visu_choice" value="HCPC" />
319 <param name="HCPC_method" value="single"/>
320 <param name="HCPC_metric" value="euclidean"/>
321 <param name="HCPC_npc" value="4" />
322 <param name="HCPC_clusterCA" value="cols" />
323 <output name="pdf_out" file="hcpc.4.pdf" compare="sim_size" ftype="pdf"/>
324 <output name="HCPC_cell_clust" file="hcpc.cell-cluster.4.tsv" ftype="tabular"/>
325 <output name="HCPC_contributions" file="hcpc.component-impact.4.tsv" ftype="tabular"/>
383 </test> 326 </test>
384 </tests> 327 </tests>
385 <help> 328 <help>
386 329
387 **What it does** 330 **What it does**
388 331
389 Takes as an input a matrix of n observations (columns, generally n RNAseq library) of k variables 332 **Inputs**
390 (rows, generally k genes). 333 Takes as an input a tabulation separated value file (tsv) of n observations (columns,
334 generally n RNAseq library) of k variables (rows, generally k genes).
335
336 The table must contain a header, ie the first line describes the content of each column.
391 337
392 k variables define a space of k dimensions. Any observation 338 k variables define a space of k dimensions. Any observation
393 of k expression values for k genes (the purpose of one RNAseq experiment) can be assigned 339 of k expression values for k genes (the purpose of one RNAseq experiment) can be assigned
394 to a position in the k-dim space, of coordinates c1, c2, c3, ..., ck. 340 to a position in the k-dim space, of coordinates c1, c2, c3, ..., ck.
395 341
396 Since visualisation in more than 3 dimensions is not easy for a human beeing, there is 342 Since visualisation in more than 3 dimensions is not easy for a human beeing, there is
397 a number of methods to "reduce" or "project" a k-dim space in a space of 2 or 3 dimensions. 343 a number of methods to "reduce" or "project" a k-dim space in a space of 2 or 3 dimensions.
398 This is of great help, not only to summarise the data, but also to find similarities, common trends 344 This is of great help, not only to summarise the data, but also to find similarities, common trends
399 between the data (under the hypothesis that similar data are closer in the k-dimension space). 345 between the data (under the hypothesis that similar data are closer in the k-dimension space).
400 346
347 **Outputs**
348
401 This tool returns the visualisation of a dimensional reduction using either: 349 This tool returns the visualisation of a dimensional reduction using either:
402 350
403 * Principal Components Analysis (PCA) 351 * Principal Components Analysis (PCA)
404 * Hierarchical Clustering of Principal Components (HCPC) 352 * Hierarchical Clustering of Principal Components (HCPC)
405 * t-distributed Stochastic Neighbor Embedding (t-SNE) 353 * t-distributed Stochastic Neighbor Embedding (t-SNE)
406 354
407 The tool returns in addition the table of the coordinates of the observations (eg RNAseq libraries) 355 If HCPC is used, this tool can also return a 2-column cluster correspondence table:
408 in the low dim space, which can be used for post-treatment or to further adjust the provided visualisation.
409
410 If HCPC is used, this tool can also return the clustering table. It contains two columns of n observations :
411 356
412 * Observation labels 357 * Observation labels
413 * Cluster labels 358 * Cluster labels
414 359
415 ** Contrast data with a factor ** 360 **Contrast data with a factor**
361
416 The tool offers the possibility to colour data points according to the levels of a factor. 362 The tool offers the possibility to colour data points according to the levels of a factor.
417 To use the option "Factor to contrast data", provide a tabulated-separated, two-column table 363 To use the option "Factor to contrast data", provide a tabulated-separated, two-column table
418 with first column containing the cell/data library identifiers (same identifiers as those 364 with first column containing the cell/data library identifiers (same identifiers as those
419 provided as column headers in the input data table) and second column containing the corresponding 365 provided as column headers in the input data table) and second column containing the corresponding
420 factor levels value (if this vector is numerical, then the color palette used is quantitative). 366 factor levels value (if this vector is numerical, then the color palette used is quantitative).
421 This table does not need to be sorted in the same order as in the data 367 This table does not need to be sorted in the same order as in the data
422 table. It may also contain more identifiers than those provided in the data table. 368 table. It may also contain more identifiers than those provided in the data table.
423 369
424 If HCPC visualisation and constrasting data are chosen, an additional text file is given. It contains 370 If HCPC visualisation and constrasting factor is provided, a text file containing metrics
425 several metrics of external validation of clustering. It will compare the capacity of HCPC clustering 371 of external validation of the clustering is returned.
426 to recreate classes contained in the factor data file. If the constrasting factor is quantitative, 372 These metrics measure the capacity of HCPC clustering to find classes overlapping the levels
427 the file will be empty. 373 of the provided factor.
428
429 374
430 </help> 375 </help>
431 <citations> 376 <citations>
432 <citation type="bibtex">@Article{, 377 <citation type="bibtex">@Article{,
433 title = {Visualizing High-Dimensional Data Using t-SNE}, 378 title = {Visualizing High-Dimensional Data Using t-SNE},