comparison segmentation.xml @ 0:e56a955cd1c0 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit 0825a4ccd3ebf4ca8a298326d14f3e7b25ae8415
author galaxyp
date Mon, 01 Oct 2018 01:05:00 -0400
parents
children 98d48f081ad9
comparison
equal deleted inserted replaced
-1:000000000000 0:e56a955cd1c0
1 <tool id="cardinal_segmentations" name="MSI segmentation" version="@VERSION@.0">
2 <description>mass spectrometry imaging spatial clustering</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements">
7 <requirement type="package" version="2.2.1">r-gridextra</requirement>
8 <requirement type="package" version="0.20-35">r-lattice</requirement>
9 </expand>
10 <command detect_errors="exit_code">
11 <![CDATA[
12
13 @INPUT_LINKING@
14 cat '${MSI_segmentation}' &&
15 Rscript '${MSI_segmentation}'
16
17 ]]>
18 </command>
19 <configfiles>
20 <configfile name="MSI_segmentation"><![CDATA[
21
22
23 ################################# load libraries and read file #################
24
25 library(Cardinal)
26 library(gridExtra)
27 library(lattice)
28
29 @READING_MSIDATA@
30
31
32 ## create full matrix to make processed imzML files compatible with segmentation
33 iData(msidata) <- iData(msidata)[]
34
35 @DATA_PROPERTIES@
36
37 ######################################## PDF ###################################
38 ################################################################################
39 ################################################################################
40
41
42 pdf("segmentationpdf.pdf", fonts = "Times", pointsize = 12)
43 plot(0,type='n',axes=FALSE,ann=FALSE)
44
45 title(main=paste0("Spatial segmentation for file: \n\n", "$infile.display_name"))
46
47
48 ############################# I) numbers ####################################
49 #############################################################################
50 grid.table(property_df, rows= NULL)
51
52 if (npeaks > 0)
53 {
54
55 ######################## II) segmentation tools #############################
56 #############################################################################
57 #set $color_string = ','.join(['"%s"' % $color.feature_color for $color in $colours])
58 colourvector = c($color_string)
59
60 ### preparation for images and plots:
61 #if str($image_cond.image_type) == "standard_image":
62 print("standard image")
63
64 strip_input = TRUE
65 lattice_input = FALSE
66
67 #elif str($image_cond.image_type) == "lattice_image":
68 print("lattice image")
69
70 strip_input = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9))
71 lattice_input = TRUE
72
73 #end if
74
75 ## set seed to make analysis reproducible
76 set.seed($setseed)
77
78 #if str( $segm_cond.segmentationtool ) == 'pca':
79 print('pca')
80 ##pca
81
82 component_vector = character()
83 for (numberofcomponents in 1:$segm_cond.pca_ncomp)
84 {component_vector[numberofcomponents]= paste0("PC", numberofcomponents)}
85 pca_result = PCA(msidata, ncomp=$segm_cond.pca_ncomp, column = component_vector, superpose = FALSE,
86 method = "$segm_cond.pca_method", scale = $segm_cond.pca_scale, layout = c(ncomp, 1))
87
88 ### images in pdf file
89 print(image(pca_result, main="PCA image", lattice=lattice_input, strip = strip_input, col=colourvector, ylim=c(maximumy+2, minimumy-2)))
90 for (PCs in 1:$segm_cond.pca_ncomp){
91 print(image(pca_result, column = c(paste0("PC",PCs)), lattice=lattice_input, superpose = FALSE, col.regions = risk.colors(100), ylim=c(maximumy+2, minimumy-2)))}
92 ### plots in pdf file
93 print(plot(pca_result, main="PCA plot", lattice=lattice_input, col= colourvector, strip = strip_input))
94 for (PCs in 1:$segm_cond.pca_ncomp){
95 print(plot(pca_result, column = c(paste0("PC",PCs)),superpose = FALSE))}
96
97 ### values in tabular files
98 pcaloadings = (pca_result@resultData\$ncomp\$loadings) ### loading for each m/z value
99 pcaloadings2 = cbind(matrix(unlist(strsplit(rownames(pcaloadings), " = ")), ncol=2, byrow=TRUE)[,2], pcaloadings)
100 colnames(pcaloadings2) = c("mz", colnames(pcaloadings))
101 pcascores = (pca_result@resultData\$ncomp\$scores) ### scores for each pixel
102
103 ## pixel names and coordinates
104 pixel_names = gsub(", y = ", "_", rownames(pcascores))
105 pixel_names = gsub(" = ", "y_", pixel_names)
106 x_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,2]
107 y_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,3]
108 pcascores2 = data.frame(pixel_names, x_coordinates, y_coordinates, pcascores)
109 colnames(pcascores2) = c("pixel names", "x", "y", colnames(pcascores))
110 write.table(pcaloadings2, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
111 write.table(pcascores2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
112
113 ## optional output as .RData
114 #if $output_rdata:
115 ## save as (.RData)
116 save(pca, file="$segmentation_rdata")
117
118 #end if
119
120 #elif str( $segm_cond.segmentationtool ) == 'kmeans':
121 print('kmeans')
122 ##k-means
123
124 skm = spatialKMeans(msidata, r=c($segm_cond.kmeans_r), k=c($segm_cond.kmeans_k), method="$segm_cond.kmeans_method")
125 print(image(skm, key=TRUE, main="K-means clustering", lattice=lattice_input, strip=strip_input, col= colourvector, layout=c(1,1), ylim=c(maximumy+2, minimumy-2)))
126
127 print(plot(skm, main="K-means plot", lattice=lattice_input, col= colourvector, strip=strip_input, layout=c(1,1)))
128
129 skm_clusters = data.frame(matrix(NA, nrow = pixelcount, ncol = 0))
130 for (iteration in 1:length(skm@resultData)){
131 skm_cluster = ((skm@resultData)[[iteration]]\$cluster)
132 skm_clusters = cbind(skm_clusters, skm_cluster) }
133
134 ## pixel names and coordinates
135 pixel_names = gsub(", y = ", "_", rownames(skm_clusters))
136 pixel_names = gsub(" = ", "y_", pixel_names)
137 x_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,2]
138 y_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,3]
139 skm_clusters2 = data.frame(pixel_names, x_coordinates, y_coordinates, skm_clusters)
140 colnames(skm_clusters2) = c("pixel names", "x", "y",names(skm@resultData))
141
142 skm_toplabels = topLabels(skm, n=$segm_cond.kmeans_toplabels)
143
144 write.table(skm_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
145 write.table(skm_clusters2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
146
147 ## optional output as .RData
148 #if $output_rdata:
149
150 ## save as (.RData)
151 save(skm, file="$segmentation_rdata")
152
153 #end if
154
155 #elif str( $segm_cond.segmentationtool ) == 'centroids':
156 print('centroids')
157 ##centroids
158
159 ssc = spatialShrunkenCentroids(msidata, r=c($segm_cond.centroids_r), k=c($segm_cond.centroids_k), s=c($segm_cond.centroids_s), method="$segm_cond.centroids_method")
160 print(image(ssc, key=TRUE, main="Spatial shrunken centroids", lattice=lattice_input, strip = strip_input, col= colourvector,layout=c(1,1), ylim=c(maximumy+2, minimumy-2)))
161 print(plot(ssc, main="Spatial shrunken centroids plot", lattice=lattice_input, col= colourvector, strip = strip_input,layout=c(1,1)))
162 print(plot(ssc, mode = "tstatistics",key = TRUE, lattice=lattice_input, layout = c(1,1), main="t-statistics", col=colourvector))
163 plot(summary(ssc), main = "Number of segments")
164
165 ssc_classes = data.frame(matrix(NA, nrow = pixelcount, ncol = 0))
166 for (iteration in 1:length(ssc@resultData)){
167 ssc_class = ((ssc@resultData)[[iteration]]\$classes)
168 ssc_classes = cbind(ssc_classes, ssc_class) }
169
170 ## pixel names and coordinates
171 pixel_names = gsub(", y = ", "_", rownames(ssc_classes))
172 pixel_names = gsub(" = ", "y_", pixel_names)
173 x_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,2]
174 y_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,3]
175 ssc_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, ssc_classes)
176 colnames(ssc_classes2) = c("pixel names", "x", "y", names(ssc@resultData))
177
178 ssc_toplabels = topLabels(ssc, n=$segm_cond.centroids_toplabels)
179
180 write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
181 write.table(ssc_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
182
183 ## optional output as .RData
184 #if $output_rdata:
185
186 ## save as (.RData)
187 save(ssc, file="$segmentation_rdata")
188
189 #end if
190
191 #end if
192
193 dev.off()
194
195 }else{
196 print("Inputfile has no intensities > 0")
197 dev.off()
198 }
199
200 ]]></configfile>
201 </configfiles>
202 <inputs>
203 <expand macro="reading_msidata"/>
204 <conditional name="segm_cond">
205 <param name="segmentationtool" type="select" label="Select the tool for spatial clustering">
206 <option value="pca" selected="True">pca</option>
207 <option value="kmeans">k-means</option>
208 <option value="centroids">spatial shrunken centroids</option>
209 </param>
210 <when value="pca">
211 <param name="pca_ncomp" type="integer" value="2"
212 label="The number of principal components to calculate"/>
213 <param name="pca_method" type="select"
214 label="The function used to calculate the singular value decomposition">
215 <option value="irlba" selected="True">irlba</option>
216 <option value="svd">svd</option>
217 </param>
218 <param name="pca_scale" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Scaling of data before analysis"/>
219 </when>
220
221 <when value="kmeans">
222 <param name="kmeans_r" type="text" value="2"
223 label="The spatial neighborhood radius of nearby pixels to consider (r)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)">
224 <expand macro="sanitizer_multiple_digits"/>
225 </param>
226 <param name="kmeans_k" type="text" value="3"
227 label="The number of clusters (k)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)">
228 <expand macro="sanitizer_multiple_digits"/>
229 </param>
230 <param name="kmeans_method" type="select" display="radio"
231 label="The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) clustering, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) clustering">
232 <option value="gaussian">gaussian</option>
233 <option value="adaptive" selected="True">adaptive</option>
234 </param>
235 <param name="kmeans_toplabels" type="integer" value="500"
236 label="Number of toplabels (m/z) which should be written in tabular output"/>
237 </when>
238
239 <when value="centroids">
240 <param name="centroids_r" type="text" value="2"
241 label="The spatial neighborhood radius of nearby pixels to consider (r)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)">
242 <expand macro="sanitizer_multiple_digits"/>
243 </param>
244 <param name="centroids_k" type="text" value="5"
245 label="The initial number of clusters (k)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)">
246 <expand macro="sanitizer_multiple_digits"/>
247 </param>
248 <param name="centroids_s" type="text" value="2"
249 label="The sparsity thresholding parameter by which to shrink the t-statistics (s)"
250 help="As s increases, fewer m/z features (m/z values) will be used in the spatial segmentation, and only the informative m/z features will be retained. Multiple values are allowed (e.g. 1,2,3 or 2:5)">
251 <expand macro="sanitizer_multiple_digits"/>
252 </param>
253 <param name="centroids_method" type="select" display="radio" label="The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights">
254 <option value="gaussian">gaussian</option>
255 <option value="adaptive" selected="True">adaptive</option>
256 </param>
257 <param name="centroids_toplabels" type="integer" value="500"
258 label="Number of toplabels (m/z) which should be written in tabular output"/>
259 </when>
260 </conditional>
261 <conditional name="image_cond">
262 <param name="image_type" type="select" label="Select the image type">
263 <option value="standard_image" selected="True">standard</option>
264 <option value="lattice_image">lattice</option>
265 </param>
266 <when value="standard_image"/>
267 <when value="lattice_image"/>
268 </conditional>
269 <repeat name="colours" title="Colours for the plots" min="1" max="50">
270 <param name="feature_color" type="color" label="Colours" value="#ff00ff" help="Numbers of columns should be the same as number of components">
271 <sanitizer>
272 <valid initial="string.letters,string.digits">
273 <add value="#" />
274 </valid>
275 </sanitizer>
276 </param>
277 </repeat>
278 <param name="output_rdata" type="boolean" label="Results as .RData output"/>
279 <param name="setseed" type="integer" value="1" label="set seed" help="Use same value to reproduce previous results"/>
280 </inputs>
281 <outputs>
282 <data format="pdf" name="segmentationimages" from_work_dir="segmentationpdf.pdf" label = "${tool.name} on ${on_string}"/>
283 <data format="tabular" name="mzfeatures" label="${tool.name} on ${on_string}: features"/>
284 <data format="tabular" name="pixeloutput" label="${tool.name} on ${on_string}: pixels"/>
285 <data format="rdata" name="segmentation_rdata" label="${tool.name} on ${on_string}: results.RData">
286 <filter>output_rdata</filter>
287 </data>
288 </outputs>
289 <tests>
290 <test>
291 <expand macro="infile_imzml"/>
292 <param name="segmentationtool" value="pca"/>
293 <param name="image_type" value="lattice_image"/>
294 <repeat name="colours">
295 <param name="feature_color" value="#ff00ff"/>
296 </repeat>
297 <repeat name="colours">
298 <param name="feature_color" value="#0000FF"/>
299 </repeat>
300 <output name="segmentationimages" file="pca_imzml.pdf" compare="sim_size"/>
301 <output name="mzfeatures" file="loadings_pca.tabular"/>
302 <output name="pixeloutput" file="scores_pca.tabular"/>
303 </test>
304 <test>
305 <expand macro="infile_analyze75"/>
306 <param name="segmentationtool" value="kmeans"/>
307 <param name="kmeans_r" value="1:3"/>
308 <param name="kmeans_k" value="2,3"/>
309 <param name="kmeans_toplabels" value="20"/>
310 <repeat name="colours">
311 <param name="feature_color" value="#ff00ff"/>
312 </repeat>
313 <repeat name="colours">
314 <param name="feature_color" value="#0000FF"/>
315 </repeat>
316 <repeat name="colours">
317 <param name="feature_color" value="#00C957"/>
318 </repeat>
319 <param name="output_rdata" value="True"/>
320 <output name="segmentationimages" file="kmeans_analyze.pdf" compare="sim_size"/>
321 <output name="mzfeatures" file="toplabels_skm.tabular"/>
322 <output name="pixeloutput" file="cluster_skm.tabular"/>
323 <output name="segmentation_rdata" file="cluster_skm.RData" compare="sim_size"/>
324 </test>
325 <test>
326 <param name="infile" value="preprocessed.RData" ftype="rdata"/>
327 <param name="segmentationtool" value="centroids"/>
328 <param name="centroids_r" value="1,2"/>
329 <param name="centroids_k" value="3"/>
330 <param name="centroids_toplabels" value="50"/>
331 <repeat name="colours">
332 <param name="feature_color" value="#0000FF"/>
333 </repeat>
334 <repeat name="colours">
335 <param name="feature_color" value="#00C957"/>
336 </repeat>
337 <repeat name="colours">
338 <param name="feature_color" value="#B0171F"/>
339 </repeat>
340 <repeat name="colours">
341 <param name="feature_color" value="#FFD700"/>
342 </repeat>
343 <repeat name="colours">
344 <param name="feature_color" value="#848484"/>
345 </repeat>
346 <output name="segmentationimages" file="centroids_rdata.pdf" compare="sim_size"/>
347 <output name="mzfeatures" file="toplabels_ssc.tabular"/>
348 <output name="pixeloutput" file="classes_ssc.tabular"/>
349 </test>
350 </tests>
351 <help>
352 <![CDATA[
353
354 @CARDINAL_DESCRIPTION@
355
356 -----
357
358 This tool provides three different Cardinal functions for unsupervised clustering/spatial segmentation of mass spectrometry imaging data.
359
360 @MSIDATA_INPUT_DESCRIPTION@
361
362 **Options**
363
364 - PCA: principal component analysis
365 - k-means: spatially-aware k-means clustering
366 - spatial shrunken centroids: Allows the number of segments to decrease according to the data. This allows automatic selection of the number of clusters
367
368 **Output**
369
370 - Pdf with the heatmaps and plots for the segmentation
371 - Tabular file with information on m/z and pixels: loadings/scores (PCA), toplabels/clusters (k-means), toplabels/classes (spatial shrunken centroids)
372 - Optional .RData file which contains the segmentation results and can be used for further exploration in R using the Cardinal package
373
374 ]]>
375 </help>
376 <expand macro="citations"/>
377 </tool>