comparison segmentation_tool.xml @ 0:0c1a9b68f436 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/msi_segmentation commit da5a0723327f7cce689b230ccd69f3edecb1bc6b
author galaxyp
date Sat, 24 Feb 2018 13:51:32 -0500
parents
children d4158c9955ea
comparison
equal deleted inserted replaced
-1:000000000000 0:0c1a9b68f436
1 <tool id="mass_spectrometry_imaging_segmentations" name="MSI segmentation" version="1.7.0">
2 <description>tool for spatial clustering</description>
3 <requirements>
4 <requirement type="package" version="1.7.0">bioconductor-cardinal</requirement>
5 <requirement type="package" version="2.2.1">r-gridextra</requirement>
6 <requirement type="package" version="2.23-15">r-kernsmooth</requirement>
7 <requirement type="package" version="0.20-35">r-lattice</requirement>
8 </requirements>
9 <command detect_errors="exit_code">
10 <![CDATA[
11
12 #if $infile.ext == 'imzml'
13 cp '${infile.extra_files_path}/imzml' infile.imzML &&
14 cp '${infile.extra_files_path}/ibd' infile.ibd &&
15 #elif $infile.ext == 'analyze75'
16 cp '${infile.extra_files_path}/hdr' infile.hdr &&
17 cp '${infile.extra_files_path}/img' infile.img &&
18 cp '${infile.extra_files_path}/t2m' infile.t2m &&
19 #else
20 ln -s $infile infile.RData &&
21 #end if
22 cat '${MSI_segmentation}' &&
23 echo ${MSI_segmentation} &&
24 Rscript '${MSI_segmentation}'
25
26 ]]>
27 </command>
28 <configfiles>
29 <configfile name="MSI_segmentation"><![CDATA[
30
31
32 ################################# load libraries and read file #########################
33
34
35 library(Cardinal)
36 library(gridExtra)
37 library(KernSmooth)
38 library(lattice)
39
40 ## Read MALDI Imaging dataset
41
42 #if $infile.ext == 'imzml'
43 msidata <- readMSIData('infile.imzML')
44 #elif $infile.ext == 'analyze75'
45 msidata <- readMSIData('infile.hdr')
46 #else
47 load('infile.RData')
48 #end if
49
50 ###################################### file properties in numbers ######################
51
52 ## Number of features (mz)
53 maxfeatures = length(features(msidata))
54 ## Range mz
55 minmz = round(min(mz(msidata)), digits=2)
56 maxmz = round(max(mz(msidata)), digits=2)
57 ## Number of spectra (pixels)
58 pixelcount = length(pixels(msidata))
59 ## Range x coordinates
60 minimumx = min(coord(msidata)[,1])
61 maximumx = max(coord(msidata)[,1])
62 ## Range y coordinates
63 minimumy = min(coord(msidata)[,2])
64 maximumy = max(coord(msidata)[,2])
65 ## Range of intensities
66 minint = round(min(spectra(msidata)[]), digits=2)
67 maxint = round(max(spectra(msidata)[]), digits=2)
68 medint = round(median(spectra(msidata)[]), digits=2)
69 ## Number of intensities > 0
70 npeaks= sum(spectra(msidata)[]>0)
71 ## Spectra multiplied with mz (potential number of peaks)
72 numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[])
73 ## Percentage of intensities > 0
74 percpeaks = round(npeaks/numpeaks*100, digits=2)
75 ## Number of empty TICs
76 TICs = colSums(spectra(msidata)[])
77 NumemptyTIC = sum(TICs == 0)
78
79
80 ## Processing informations
81 processinginfo = processingData(msidata)
82 centroidedinfo = processinginfo@centroided # TRUE or FALSE
83
84 ## if TRUE write processinginfo if no write FALSE
85
86 ## normalization
87 if (length(processinginfo@normalization) == 0) {
88 normalizationinfo='FALSE'
89 } else {
90 normalizationinfo=processinginfo@normalization
91 }
92 ## smoothing
93 if (length(processinginfo@smoothing) == 0) {
94 smoothinginfo='FALSE'
95 } else {
96 smoothinginfo=processinginfo@smoothing
97 }
98 ## baseline
99 if (length(processinginfo@baselineReduction) == 0) {
100 baselinereductioninfo='FALSE'
101 } else {
102 baselinereductioninfo=processinginfo@baselineReduction
103 }
104 ## peak picking
105 if (length(processinginfo@peakPicking) == 0) {
106 peakpickinginfo='FALSE'
107 } else {
108 peakpickinginfo=processinginfo@peakPicking
109 }
110
111 #############################################################################
112
113 properties = c("Number of mz features",
114 "Range of mz values [Da]",
115 "Number of pixels",
116 "Range of x coordinates",
117 "Range of y coordinates",
118 "Range of intensities",
119 "Median of intensities",
120 "Intensities > 0",
121 "Number of zero TICs",
122 "Preprocessing",
123 "Normalization",
124 "Smoothing",
125 "Baseline reduction",
126 "Peak picking",
127 "Centroided")
128
129 values = c(paste0(maxfeatures),
130 paste0(minmz, " - ", maxmz),
131 paste0(pixelcount),
132 paste0(minimumx, " - ", maximumx),
133 paste0(minimumy, " - ", maximumy),
134 paste0(minint, " - ", maxint),
135 paste0(medint),
136 paste0(percpeaks, " %"),
137 paste0(NumemptyTIC),
138 paste0(" "),
139 paste0(normalizationinfo),
140 paste0(smoothinginfo),
141 paste0(baselinereductioninfo),
142 paste0(peakpickinginfo),
143 paste0(centroidedinfo))
144
145 property_df = data.frame(properties, values)
146
147
148 ######################################## PDF #############################################
149 ##########################################################################################
150 ##########################################################################################
151
152
153 pdf("segmentationpdf.pdf", fonts = "Times", pointsize = 12)
154 plot(0,type='n',axes=FALSE,ann=FALSE)
155
156 title(main=paste0("Spatial segmentation for file: \n\n", "$infile.display_name"))
157
158
159 ############################# I) numbers ####################################
160 #############################################################################
161 grid.table(property_df, rows= NULL)
162
163 if (npeaks > 0)
164 {
165
166
167 ######################## II) segmentation tools #############################
168 #############################################################################
169 #set $color_string = ','.join(['"%s"' % $color.feature_color for $color in $colours])
170 colourvector = c($color_string)
171
172
173 #if str( $segm_cond.segmentationtool ) == 'pca':
174 print('pca')
175 ##pca
176
177 component_vector = character()
178 for (numberofcomponents in 1:$segm_cond.pca_ncomp)
179 {component_vector[numberofcomponents]= paste0("PC", numberofcomponents)}
180 pca <- PCA(msidata, ncomp=$segm_cond.pca_ncomp, column = component_vector, superpose = FALSE,
181 method = "$segm_cond.pca_method", scale = $segm_cond.pca_scale, layout = c(ncomp, 1))
182
183 print(image(pca, main="PCA image", lattice=TRUE, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9)), contrast.enhance = "$segm_cond.pca_imagecontrast", smooth.image = "$segm_cond.pca_imagesmoothing", col=colourvector))
184 print(plot(pca, main="PCA plot", lattice=TRUE, col= colourvector, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9))))
185
186
187 pcaloadings = (pca@resultData\$ncomp\$loadings) ### loading for each mz value
188 pcascores = (pca@resultData\$ncomp\$scores) ### scores for each pixel
189
190 write.table(pcaloadings, file="$mzfeatures", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
191 write.table(pcascores, file="$pixeloutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
192
193 #elif str( $segm_cond.segmentationtool ) == 'kmeans':
194 print('kmeans')
195 ##k-means
196
197 skm <- spatialKMeans(msidata, r=$segm_cond.kmeans_r, k=$segm_cond.kmeans_k, method="$segm_cond.kmeans_method")
198 print(image(skm, key=TRUE, main="K-means clustering", lattice=TRUE, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9)), contrast.enhance = "$segm_cond.kmeans_imagecontrast", col= colourvector, smooth.image = "$segm_cond.kmeans_imagesmoothing"))
199 print(plot(skm, main="K-means plot", lattice=TRUE, col= colourvector, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9))))
200
201
202 skm_clusters = (skm@resultData\$r\$cluster)
203 skm_toplabels = topLabels(skm, n=500)
204
205 write.table(skm_toplabels, file="$mzfeatures", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
206 write.table(skm_clusters, file="$pixeloutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
207
208
209 #elif str( $segm_cond.segmentationtool ) == 'centroids':
210 print('centroids')
211 ##centroids
212
213 ssc <- spatialShrunkenCentroids(msidata, r=$segm_cond.centroids_r, k=$segm_cond.centroids_k, s=$segm_cond.centroids_s, method="$segm_cond.centroids_method")
214 print(image(ssc, key=TRUE, main="Spatial shrunken centroids", lattice=TRUE, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9)), contrast.enhance = "$segm_cond.centroids_imagecontrast", col= colourvector, smooth.image = "$segm_cond.centroids_imagesmoothing"))
215 print(plot(ssc, main="Spatial shrunken centroids plot", lattice=TRUE, col= colourvector, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9))))
216
217 ssc_classes = (ssc@resultData\$r\$classes)
218 ssc_toplabels = topLabels(ssc, n=500)
219
220 write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
221 write.table(ssc_classes, file="$pixeloutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
222
223
224 #end if
225
226 dev.off()
227
228 }else{
229 print("Inputfile has no intensities > 0")
230 dev.off()
231 }
232
233 ]]></configfile>
234 </configfiles>
235 <inputs>
236 <param name="infile" type="data" format="imzml, rdata, analyze75"
237 label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData"
238 help="Upload composite datatype imzml (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/>
239 <conditional name="segm_cond">
240 <param name="segmentationtool" type="select" label="Select the tool for spatial clustering.">
241 <option value="pca" selected="True">pca</option>
242 <option value="kmeans">k-means</option>
243 <option value="centroids">shrunken centroids</option>
244 </param>
245 <when value="pca">
246 <param name="pca_ncomp" type="integer" value="2"
247 label="The number of principal components to calculate."/>
248 <param name="pca_method" type="select"
249 label="The function used to calculate the singular value decomposition.">
250 <option value="irlba" selected="True">irlba</option>
251 <option value="svd">svd</option>
252 </param>
253 <param name="pca_scale" type="select" display="radio" optional="False"
254 label="Shoud the data be scaled first?">
255 <option value="TRUE">yes</option>
256 <option value="FALSE" selected="True">no</option>
257 </param>
258 <param name="pca_imagecontrast" type="select" label="Select a contrast enhancement function." help="The 'histogram' equalization method flatterns the distribution of intensities. The hotspot 'suppression' method uses thresholding to reduce the intensities of hotspots">
259 <option value="none" selected="True">none</option>
260 <option value="suppression">suppression</option>
261 <option value="histogram">histogram</option>
262 </param>
263 <param name="pca_imagesmoothing" type="select" label="Select an image smoothing function." help="The 'gaussian' smoothing method smooths images with a simple gaussian kernel. The 'adaptive' method uses bilateral filtering to preserve edges.">
264 <option value="none" selected="True">none</option>
265 <option value="gaussian">gaussian</option>
266 <option value="adaptive">adaptive</option>
267 </param>
268 </when>
269
270 <when value="kmeans">
271 <param name="kmeans_r" type="text" value="2"
272 label="The spatial neighborhood radius of nearby pixels to consider (r)."/>
273 <param name="kmeans_k" type="text" value="3"
274 label="The number of clusters (k)."/>
275 <param name="kmeans_method" type="select" display="radio"
276 label="The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) clustering, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) clustering.">
277 <option value="gaussian">gaussian</option>
278 <option value="adaptive" selected="True">adaptive</option>
279 </param>
280 <param name="kmeans_imagecontrast" type="select" label="Select a contrast enhancement function." help="The 'histogram' equalization method flatterns the distribution of intensities. The hotspot 'suppression' method uses thresholding to reduce the intensities of hotspots">
281 <option value="none" selected="True">none</option>
282 <option value="suppression">suppression</option>
283 <option value="histogram">histogram</option>
284 </param>
285 <param name="kmeans_imagesmoothing" type="select" label="Select an image smoothing function." help="The 'gaussian' smoothing method smooths images with a simple gaussian kernel. The 'adaptive' method uses bilateral filtering to preserve edges.">
286 <option value="none" selected="True">none</option>
287 <option value="gaussian">gaussian</option>
288 <option value="adaptive">adaptive</option>
289 </param>
290 </when>
291
292 <when value="centroids">
293 <param name="centroids_r" type="text" value="2"
294 label="The spatial neighborhood radius of nearby pixels to consider (r)."/>
295 <param name="centroids_k" type="text" value="5"
296 label="The initial number of clusters (k)."/>
297 <param name="centroids_s" type="integer" value="2"
298 label="The sparsity thresholding parameter by which to shrink the t-statistics (s)."
299 help="As s increases, fewer mass features (m/z values) will be used in the spatial segmentation, and only the informative mass features will be retained."/>
300 <param name="centroids_method" type="select" display="radio" label = "The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights.">
301 <option value="gaussian" selected="True">gaussian</option>
302 <option value="adaptive">adaptive</option>
303 </param>
304 <param name="centroids_imagecontrast" type="select" label="Select a contrast enhancement function." help="The 'histogram' equalization method flatterns the distribution of intensities. The hotspot 'suppression' method uses thresholding to reduce the intensities of hotspots">
305 <option value="none" selected="True">none</option>
306 <option value="suppression">suppression</option>
307 <option value="histogram">histogram</option>
308 </param>
309 <param name="centroids_imagesmoothing" type="select" label="Select an image smoothing function." help="The 'gaussian' smoothing method smooths images with a simple gaussian kernel. The 'adaptive' method uses bilateral filtering to preserve edges.">
310 <option value="none" selected="True">none</option>
311 <option value="gaussian">gaussian</option>
312 <option value="adaptive">adaptive</option>
313 </param>
314 </when>
315 </conditional>
316 <repeat name="colours" title="Colours for the plots" min="1" max="50">
317 <param name="feature_color" type="color" label="Colours" value="#ff00ff" help="Numbers of columns should be the same as number of components">
318 <sanitizer>
319 <valid initial="string.letters,string.digits">
320 <add value="#" />
321 </valid>
322 </sanitizer>
323 </param>
324 </repeat>
325 </inputs>
326 <outputs>
327 <data format="pdf" name="segmentationimages" from_work_dir="segmentationpdf.pdf" label = "${tool.name} on $infile.display_name"/>
328 <data format="tabular" name="mzfeatures" label="mzfeatures ${tool.name} on $infile.display_name"/>
329 <data format="tabular" name="pixeloutput" label="pixels ${tool.name} on $infile.display_name"/>
330 </outputs>
331 <tests>
332 <test>
333 <param name="infile" value="" ftype="imzml">
334 <composite_data value="Example_Continuous.imzML"/>
335 <composite_data value="Example_Continuous.ibd"/>
336 </param>
337 <param name="segmentationtool" value="pca"/>
338 <repeat name="colours">
339 <param name="feature_color" value="#ff00ff"/>
340 </repeat>
341 <repeat name="colours">
342 <param name="feature_color" value="#0000FF"/>
343 </repeat>
344 <output name="segmentationimages" file="pca_imzml.pdf" compare="sim_size" delta="20000"/>
345 <output name="mzfeatures" file="pcaloadings_results1.txt" compare="sim_size"/>
346 <output name="pixeloutput" file="pcascores_results1.txt" compare="sim_size"/>
347 </test>
348 <test>
349 <param name="infile" value="" ftype="analyze75">
350 <composite_data value="Analyze75.hdr" />
351 <composite_data value="Analyze75.img" />
352 <composite_data value="Analyze75.t2m" />
353 </param>
354 <param name="segmentationtool" value="kmeans"/>
355 <repeat name="colours">
356 <param name="feature_color" value="#ff00ff"/>
357 </repeat>
358 <repeat name="colours">
359 <param name="feature_color" value="#0000FF"/>
360 </repeat>
361 <repeat name="colours">
362 <param name="feature_color" value="#00C957"/>
363 </repeat>
364 <output name="segmentationimages" file="kmeans_imzml.pdf" compare="sim_size" delta="20000"/>
365 <output name="mzfeatures" file="toplabels_results1.txt" compare="sim_size"/>
366 <output name="pixeloutput" file="cluster_results1.txt" compare="sim_size"/>
367 </test>
368 <test>
369 <param name="infile" value="preprocessing_results1.RData" ftype="rdata"/>
370 <param name="segmentationtool" value="centroids"/>
371 <repeat name="colours">
372 <param name="feature_color" value="#0000FF"/>
373 </repeat>
374 <repeat name="colours">
375 <param name="feature_color" value="#00C957"/>
376 </repeat>
377 <repeat name="colours">
378 <param name="feature_color" value="#B0171F"/>
379 </repeat>
380 <repeat name="colours">
381 <param name="feature_color" value="#FFD700"/>
382 </repeat>
383 <repeat name="colours">
384 <param name="feature_color" value="#848484"/>
385 </repeat>
386 <output name="segmentationimages" file="centroids_imzml.pdf" compare="sim_size" delta="20000"/>
387 <output name="mzfeatures" file="toplabels_results1.txt" compare="sim_size"/>
388 <output name="pixeloutput" file="classes_results1.txt" compare="sim_size"/>
389 </test>
390 </tests>
391 <help>
392 <![CDATA[
393
394 Spatially aware segmentation of mass-spectrometry imaging data by unsupervised clustering algorithms. Underlying structures can be identified with the following tools: pca, k-means clustering and spatial shrunken centroids. The spatialShrunkenCentroids method allows the number of segments to decrease according to the data. This allows automatic selection of the number
395 of clusters.
396
397 Input data: 3 types of input data can be used:
398
399 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <http://ms-imaging.org/wp/introduction/>`_
400 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function)
401 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData)
402
403 The output of this tool contains a pdf with plots from the segmentation tools.
404 ]]>
405 </help>
406 <citations>
407 <citation type="doi">10.1093/bioinformatics/btv146</citation>
408 </citations>
409 </tool>