comparison segmentation_tool.xml @ 4:aec189b0c64d draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_segmentation commit 1c808d60243bb1eeda0cd26cb4b0a17ab05de2c0
author galaxyp
date Mon, 28 May 2018 12:39:28 -0400
parents 830c6df59603
children cee9cf693709
comparison
equal deleted inserted replaced
3:830c6df59603 4:aec189b0c64d
1 <tool id="mass_spectrometry_imaging_segmentations" name="MSI segmentation" version="1.7.0.3"> 1 <tool id="mass_spectrometry_imaging_segmentations" name="MSI segmentation" version="1.10.0.0">
2 <description>tool for spatial clustering</description> 2 <description>tool for spatial clustering</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="1.7.0">bioconductor-cardinal</requirement> 4 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement>
5 <requirement type="package" version="2.2.1">r-gridextra</requirement> 5 <requirement type="package" version="2.2.1">r-gridextra</requirement>
6 <requirement type="package" version="2.23-15">r-kernsmooth</requirement>
7 <requirement type="package" version="0.20-35">r-lattice</requirement> 6 <requirement type="package" version="0.20-35">r-lattice</requirement>
8 </requirements> 7 </requirements>
9 <command detect_errors="exit_code"> 8 <command detect_errors="exit_code">
10 <![CDATA[ 9 <![CDATA[
11 10
12 #if $infile.ext == 'imzml' 11 #if $infile.ext == 'imzml'
13 cp '${infile.extra_files_path}/imzml' infile.imzML && 12 ln -s '${infile.extra_files_path}/imzml' infile.imzML &&
14 cp '${infile.extra_files_path}/ibd' infile.ibd && 13 ln -s '${infile.extra_files_path}/ibd' infile.ibd &&
15 #elif $infile.ext == 'analyze75' 14 #elif $infile.ext == 'analyze75'
16 cp '${infile.extra_files_path}/hdr' infile.hdr && 15 ln -s '${infile.extra_files_path}/hdr' infile.hdr &&
17 cp '${infile.extra_files_path}/img' infile.img && 16 ln -s '${infile.extra_files_path}/img' infile.img &&
18 cp '${infile.extra_files_path}/t2m' infile.t2m && 17 ln -s '${infile.extra_files_path}/t2m' infile.t2m &&
19 #else 18 #else
20 ln -s $infile infile.RData && 19 ln -s $infile infile.RData &&
21 #end if 20 #end if
22 cat '${MSI_segmentation}' && 21 cat '${MSI_segmentation}' &&
23 echo ${MSI_segmentation} && 22 echo ${MSI_segmentation} &&
32 ################################# load libraries and read file ######################### 31 ################################# load libraries and read file #########################
33 32
34 33
35 library(Cardinal) 34 library(Cardinal)
36 library(gridExtra) 35 library(gridExtra)
37 library(KernSmooth)
38 library(lattice) 36 library(lattice)
39 37
40 ## Read MALDI Imaging dataset 38 ## Read MALDI Imaging dataset
41 39
42 #if $infile.ext == 'imzml' 40 #if $infile.ext == 'imzml'
43 msidata = readMSIData('infile.imzML') 41 msidata = readImzML('infile')
44 #elif $infile.ext == 'analyze75' 42 #elif $infile.ext == 'analyze75'
45 msidata = readMSIData('infile.hdr') 43 msidata = readAnalyze('infile')
46 #else 44 #else
47 load('infile.RData') 45 load('infile.RData')
48 #end if 46 #end if
49 47
50 ###################################### file properties in numbers ###################### 48 ###################################### file properties in numbers ##############
51 49
52 ## Number of features (mz) 50 ## Number of features (mz)
53 maxfeatures = length(features(msidata)) 51 maxfeatures = length(features(msidata))
54 ## Range mz 52 ## Range mz
55 minmz = round(min(mz(msidata)), digits=2) 53 minmz = round(min(mz(msidata)), digits=2)
143 paste0(centroidedinfo)) 141 paste0(centroidedinfo))
144 142
145 property_df = data.frame(properties, values) 143 property_df = data.frame(properties, values)
146 144
147 145
148 ######################################## PDF ############################################# 146 ######################################## PDF ###################################
149 ########################################################################################## 147 ################################################################################
150 ########################################################################################## 148 ################################################################################
151 149
152 150
153 pdf("segmentationpdf.pdf", fonts = "Times", pointsize = 12) 151 pdf("segmentationpdf.pdf", fonts = "Times", pointsize = 12)
154 plot(0,type='n',axes=FALSE,ann=FALSE) 152 plot(0,type='n',axes=FALSE,ann=FALSE)
155 153
178 for (numberofcomponents in 1:$segm_cond.pca_ncomp) 176 for (numberofcomponents in 1:$segm_cond.pca_ncomp)
179 {component_vector[numberofcomponents]= paste0("PC", numberofcomponents)} 177 {component_vector[numberofcomponents]= paste0("PC", numberofcomponents)}
180 pca = PCA(msidata, ncomp=$segm_cond.pca_ncomp, column = component_vector, superpose = FALSE, 178 pca = PCA(msidata, ncomp=$segm_cond.pca_ncomp, column = component_vector, superpose = FALSE,
181 method = "$segm_cond.pca_method", scale = $segm_cond.pca_scale, layout = c(ncomp, 1)) 179 method = "$segm_cond.pca_method", scale = $segm_cond.pca_scale, layout = c(ncomp, 1))
182 180
183 print(image(pca, main="PCA image", lattice=TRUE, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9)), col=colourvector, ylim=c(maximumy+2, 0))) 181 print(image(pca, main="PCA image", lattice=TRUE, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9)), col=colourvector))
184 print(plot(pca, main="PCA plot", lattice=TRUE, col= colourvector, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9)))) 182 print(plot(pca, main="PCA plot", lattice=TRUE, col= colourvector, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9))))
185 183
186 184
187 pcaloadings = (pca@resultData\$ncomp\$loadings) ### loading for each mz value 185 pcaloadings = (pca@resultData\$ncomp\$loadings) ### loading for each mz value
188 pcascores = (pca@resultData\$ncomp\$scores) ### scores for each pixel 186 pcascores = (pca@resultData\$ncomp\$scores) ### scores for each pixel
193 #elif str( $segm_cond.segmentationtool ) == 'kmeans': 191 #elif str( $segm_cond.segmentationtool ) == 'kmeans':
194 print('kmeans') 192 print('kmeans')
195 ##k-means 193 ##k-means
196 194
197 skm = spatialKMeans(msidata, r=c($segm_cond.kmeans_r), k=c($segm_cond.kmeans_k), method="$segm_cond.kmeans_method") 195 skm = spatialKMeans(msidata, r=c($segm_cond.kmeans_r), k=c($segm_cond.kmeans_k), method="$segm_cond.kmeans_method")
198 print(image(skm, key=TRUE, main="K-means clustering", lattice=TRUE, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9)), col= colourvector, ylim=c(maximumy+2, 0), layout=c(1,1))) 196 print(image(skm, key=TRUE, main="K-means clustering", lattice=TRUE, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9)), col= colourvector, layout=c(1,1)))
199 197
200 print(plot(skm, main="K-means plot", lattice=TRUE, col= colourvector, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9)), layout=c($segm_cond.kmeans_layout))) 198 print(plot(skm, main="K-means plot", lattice=TRUE, col= colourvector, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9)), layout=c($segm_cond.kmeans_layout)))
201 199
202 200
203 skm_clusters = data.frame(matrix(NA, nrow = pixelcount, ncol = 0)) 201 skm_clusters = data.frame(matrix(NA, nrow = pixelcount, ncol = 0))
215 #elif str( $segm_cond.segmentationtool ) == 'centroids': 213 #elif str( $segm_cond.segmentationtool ) == 'centroids':
216 print('centroids') 214 print('centroids')
217 ##centroids 215 ##centroids
218 216
219 ssc = spatialShrunkenCentroids(msidata, r=c($segm_cond.centroids_r), k=c($segm_cond.centroids_k), s=c($segm_cond.centroids_s), method="$segm_cond.centroids_method") 217 ssc = spatialShrunkenCentroids(msidata, r=c($segm_cond.centroids_r), k=c($segm_cond.centroids_k), s=c($segm_cond.centroids_s), method="$segm_cond.centroids_method")
220 print(image(ssc, key=TRUE, main="Spatial shrunken centroids", lattice=TRUE, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9)), col= colourvector, ylim=c(maximumy+2, 0),layout=c(1,1))) 218 print(image(ssc, key=TRUE, main="Spatial shrunken centroids", lattice=TRUE, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9)), col= colourvector,layout=c(1,1)))
221 print(plot(ssc, main="Spatial shrunken centroids plot", lattice=TRUE, col= colourvector, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9)),layout=c($segm_cond.centroids_layout))) 219 print(plot(ssc, main="Spatial shrunken centroids plot", lattice=TRUE, col= colourvector, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9)),layout=c($segm_cond.centroids_layout)))
222 220
223 ssc_classes = data.frame(matrix(NA, nrow = pixelcount, ncol = 0)) 221 ssc_classes = data.frame(matrix(NA, nrow = pixelcount, ncol = 0))
224 for (iteration in 1:length(ssc@resultData)){ 222 for (iteration in 1:length(ssc@resultData)){
225 ssc_class = ((ssc@resultData)[[iteration]]\$classes) 223 ssc_class = ((ssc@resultData)[[iteration]]\$classes)
246 <inputs> 244 <inputs>
247 <param name="infile" type="data" format="imzml, rdata, analyze75" 245 <param name="infile" type="data" format="imzml, rdata, analyze75"
248 label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData" 246 label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData"
249 help="Upload composite datatype imzml (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/> 247 help="Upload composite datatype imzml (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/>
250 <conditional name="segm_cond"> 248 <conditional name="segm_cond">
251 <param name="segmentationtool" type="select" label="Select the tool for spatial clustering."> 249 <param name="segmentationtool" type="select" label="Select the tool for spatial clustering">
252 <option value="pca" selected="True">pca</option> 250 <option value="pca" selected="True">pca</option>
253 <option value="kmeans">k-means</option> 251 <option value="kmeans">k-means</option>
254 <option value="centroids">shrunken centroids</option> 252 <option value="centroids">spatial shrunken centroids</option>
255 </param> 253 </param>
256 <when value="pca"> 254 <when value="pca">
257 <param name="pca_ncomp" type="integer" value="2" 255 <param name="pca_ncomp" type="integer" value="2"
258 label="The number of principal components to calculate."/> 256 label="The number of principal components to calculate"/>
259 <param name="pca_method" type="select" 257 <param name="pca_method" type="select"
260 label="The function used to calculate the singular value decomposition."> 258 label="The function used to calculate the singular value decomposition">
261 <option value="irlba" selected="True">irlba</option> 259 <option value="irlba" selected="True">irlba</option>
262 <option value="svd">svd</option> 260 <option value="svd">svd</option>
263 </param> 261 </param>
264 <param name="pca_scale" type="select" display="radio" optional="False" 262 <param name="pca_scale" type="select" display="radio" optional="False"
265 label="Shoud the data be scaled first?"> 263 label="Scaling of data before analysis">
266 <option value="TRUE">yes</option> 264 <option value="TRUE">yes</option>
267 <option value="FALSE" selected="True">no</option> 265 <option value="FALSE" selected="True">no</option>
268 </param> 266 </param>
269 </when> 267 </when>
270 268
271 <when value="kmeans"> 269 <when value="kmeans">
272 <param name="kmeans_r" type="text" value="2" 270 <param name="kmeans_r" type="text" value="2"
273 label="The spatial neighborhood radius of nearby pixels to consider (r)." help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"/> 271 label="The spatial neighborhood radius of nearby pixels to consider (r)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"/>
274 <param name="kmeans_k" type="text" value="3" 272 <param name="kmeans_k" type="text" value="3"
275 label="The number of clusters (k)." help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"/> 273 label="The number of clusters (k)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"/>
276 <param name="kmeans_method" type="select" display="radio" 274 <param name="kmeans_method" type="select" display="radio"
277 label="The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) clustering, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) clustering."> 275 label="The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) clustering, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) clustering">
278 <option value="gaussian">gaussian</option> 276 <option value="gaussian">gaussian</option>
279 <option value="adaptive" selected="True">adaptive</option> 277 <option value="adaptive" selected="True">adaptive</option>
280 </param> 278 </param>
281 <param name="kmeans_toplabels" type="integer" value="500" 279 <param name="kmeans_toplabels" type="integer" value="500"
282 label="Number of toplabels (masses) which should be written in tabular output"/> 280 label="Number of toplabels (masses) which should be written in tabular output"/>
284 label="Number of rows and columns to plot pictures in pdf output" help="e.g. 1,1 means 1 plot per page; 2,3 means 2 rows with 3 plots each = 6 plots per page"/> 282 label="Number of rows and columns to plot pictures in pdf output" help="e.g. 1,1 means 1 plot per page; 2,3 means 2 rows with 3 plots each = 6 plots per page"/>
285 </when> 283 </when>
286 284
287 <when value="centroids"> 285 <when value="centroids">
288 <param name="centroids_r" type="text" value="2" 286 <param name="centroids_r" type="text" value="2"
289 label="The spatial neighborhood radius of nearby pixels to consider (r)." help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"/> 287 label="The spatial neighborhood radius of nearby pixels to consider (r)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"/>
290 <param name="centroids_k" type="text" value="5" 288 <param name="centroids_k" type="text" value="5"
291 label="The initial number of clusters (k)." help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"/> 289 label="The initial number of clusters (k)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"/>
292 <param name="centroids_s" type="text" value="2" 290 <param name="centroids_s" type="text" value="2"
293 label="The sparsity thresholding parameter by which to shrink the t-statistics (s)." 291 label="The sparsity thresholding parameter by which to shrink the t-statistics (s)"
294 help="As s increases, fewer mass features (m/z values) will be used in the spatial segmentation, and only the informative mass features will be retained. Multiple values are allowed (e.g. 1,2,3 or 2:5)."/> 292 help="As s increases, fewer mass features (m/z values) will be used in the spatial segmentation, and only the informative mass features will be retained. Multiple values are allowed (e.g. 1,2,3 or 2:5)"/>
295 <param name="centroids_method" type="select" display="radio" label = "The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights."> 293 <param name="centroids_method" type="select" display="radio" label = "The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights">
296 <option value="gaussian" selected="True">gaussian</option> 294 <option value="gaussian" selected="True">gaussian</option>
297 <option value="adaptive">adaptive</option> 295 <option value="adaptive">adaptive</option>
298 </param> 296 </param>
299 <param name="centroids_toplabels" type="integer" value="500" 297 <param name="centroids_toplabels" type="integer" value="500"
300 label="Number of toplabels (masses) which should be written in tabular output"/> 298 label="Number of toplabels (masses) which should be written in tabular output"/>
341 <composite_data value="Analyze75.t2m" /> 339 <composite_data value="Analyze75.t2m" />
342 </param> 340 </param>
343 <param name="segmentationtool" value="kmeans"/> 341 <param name="segmentationtool" value="kmeans"/>
344 <param name="kmeans_r" value="1:3"/> 342 <param name="kmeans_r" value="1:3"/>
345 <param name="kmeans_k" value="2,3"/> 343 <param name="kmeans_k" value="2,3"/>
344 <param name="kmeans_toplabels" value="20"/>
346 <repeat name="colours"> 345 <repeat name="colours">
347 <param name="feature_color" value="#ff00ff"/> 346 <param name="feature_color" value="#ff00ff"/>
348 </repeat> 347 </repeat>
349 <repeat name="colours"> 348 <repeat name="colours">
350 <param name="feature_color" value="#0000FF"/> 349 <param name="feature_color" value="#0000FF"/>
351 </repeat> 350 </repeat>
352 <repeat name="colours"> 351 <repeat name="colours">
353 <param name="feature_color" value="#00C957"/> 352 <param name="feature_color" value="#00C957"/>
354 </repeat> 353 </repeat>
355 <output name="segmentationimages" file="kmeans_imzml.pdf" compare="sim_size" delta="20000"/> 354 <output name="segmentationimages" file="kmeans_analyze.pdf" compare="sim_size" delta="20000"/>
356 <output name="mzfeatures" file="toplabels_skm.tabular" compare="sim_size"/> 355 <output name="mzfeatures" file="toplabels_skm.tabular" compare="sim_size"/>
357 <output name="pixeloutput" file="cluster_skm.tabular" compare="sim_size"/> 356 <output name="pixeloutput" file="cluster_skm.tabular" compare="sim_size"/>
358 </test> 357 </test>
359 <test> 358 <test>
360 <param name="infile" value="preprocessing_results1.RData" ftype="rdata"/> 359 <param name="infile" value="preprocessed.RData" ftype="rdata"/>
361 <param name="segmentationtool" value="centroids"/> 360 <param name="segmentationtool" value="centroids"/>
362 <param name="centroids_r" ftype="text" value="1,2"/> 361 <param name="centroids_r" value="1,2"/>
363 <param name="centroids_k" ftype="text" value="5"/> 362 <param name="centroids_k" value="5"/>
364 <param name="centroids_toplabels" ftype="integer" value="100"/> 363 <param name="centroids_toplabels" value="50"/>
365 <repeat name="colours"> 364 <repeat name="colours">
366 <param name="feature_color" value="#0000FF"/> 365 <param name="feature_color" value="#0000FF"/>
367 </repeat> 366 </repeat>
368 <repeat name="colours"> 367 <repeat name="colours">
369 <param name="feature_color" value="#00C957"/> 368 <param name="feature_color" value="#00C957"/>
375 <param name="feature_color" value="#FFD700"/> 374 <param name="feature_color" value="#FFD700"/>
376 </repeat> 375 </repeat>
377 <repeat name="colours"> 376 <repeat name="colours">
378 <param name="feature_color" value="#848484"/> 377 <param name="feature_color" value="#848484"/>
379 </repeat> 378 </repeat>
380 <output name="segmentationimages" file="centroids_imzml.pdf" compare="sim_size" delta="20000"/> 379 <output name="segmentationimages" file="centroids_rdata.pdf" compare="sim_size" delta="20000"/>
381 <output name="mzfeatures" file="toplabels_ssc.tabular" compare="sim_size"/> 380 <output name="mzfeatures" file="toplabels_ssc.tabular" compare="sim_size"/>
382 <output name="pixeloutput" file="classes_ssc.tabular" compare="sim_size"/> 381 <output name="pixeloutput" file="classes_ssc.tabular" compare="sim_size"/>
383 </test> 382 </test>
384 </tests> 383 </tests>
385 <help> 384 <help>
386 <![CDATA[ 385 <![CDATA[
387 386
388 Spatially aware segmentation of mass-spectrometry imaging data by unsupervised clustering algorithms. Underlying structures can be identified with the following tools: pca, k-means clustering and spatial shrunken centroids. The spatialShrunkenCentroids method allows the number of segments to decrease according to the data. This allows automatic selection of the number 387 Cardinal is an R package that implements statistical & computational tools for analyzing mass spectrometry imaging datasets. `More information on Cardinal <http://cardinalmsi.org//>`_
389 of clusters. 388
389 This tool provides three different Cardinal functions for unsupervised clustering/spatial segmentation of mass-spectrometry imaging data.
390 390
391 Input data: 3 types of input data can be used: 391 Input data: 3 types of input data can be used:
392 392
393 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <http://ms-imaging.org/wp/introduction/>`_ 393 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_
394 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) 394 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function)
395 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) 395 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData)
396 396
397 The output of this tool contains a pdf with plots from the segmentation tools. 397 Options:
398
399 - PCA: principal component analysis
400 - k-means: patially-aware k-means clustering
401 - spatial shrunken centroids: Allows the number of segments to decrease according to the data. This allows automatic selection of the number of clusters
402
403 Output:
404
405 - Pdf with the heatmaps and plots for the segmentation
406 - Tabular file with information on masses and pixels: loadings/scores (PCA), toplabels/clusters (k-means), toplabels/classes (spatial shrunken centroids)
407
398 ]]> 408 ]]>
399 </help> 409 </help>
400 <citations> 410 <citations>
401 <citation type="doi">10.1093/bioinformatics/btv146</citation> 411 <citation type="doi">10.1093/bioinformatics/btv146</citation>
402 </citations> 412 </citations>