Mercurial > repos > galaxyp > mass_spectrometry_imaging_segmentations
view segmentation_tool.xml @ 6:80b6b96a175c draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_segmentation commit 37da74ed68228b16efbdbde776e7c38cc06eb5d5
author | galaxyp |
---|---|
date | Tue, 19 Jun 2018 18:08:36 -0400 |
parents | cee9cf693709 |
children | adfef12c7e31 |
line wrap: on
line source
<tool id="mass_spectrometry_imaging_segmentations" name="MSI segmentation" version="1.10.0.2"> <description>mass spectrometry imaging spatial clustering</description> <requirements> <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> <requirement type="package" version="2.2.1">r-gridextra</requirement> <requirement type="package" version="0.20-35">r-lattice</requirement> </requirements> <command detect_errors="exit_code"> <![CDATA[ #if $infile.ext == 'imzml' ln -s '${infile.extra_files_path}/imzml' infile.imzML && ln -s '${infile.extra_files_path}/ibd' infile.ibd && #elif $infile.ext == 'analyze75' ln -s '${infile.extra_files_path}/hdr' infile.hdr && ln -s '${infile.extra_files_path}/img' infile.img && ln -s '${infile.extra_files_path}/t2m' infile.t2m && #else ln -s $infile infile.RData && #end if cat '${MSI_segmentation}' && echo ${MSI_segmentation} && Rscript '${MSI_segmentation}' ]]> </command> <configfiles> <configfile name="MSI_segmentation"><![CDATA[ ################################# load libraries and read file ################# library(Cardinal) library(gridExtra) library(lattice) ## Read MALDI Imaging dataset #if $infile.ext == 'imzml' msidata <- readImzML('infile', mass.accuracy=$accuracy, units.accuracy = "$units") #elif $infile.ext == 'analyze75' msidata = readAnalyze('infile') #else load('infile.RData') #end if ## create full matrix to make processed imzML files compatible with segmentation iData(msidata) <- iData(msidata)[] ###################################### file properties in numbers ############## ## Number of features (m/z) maxfeatures = length(features(msidata)) ## Range m/z minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) ## Number of spectra (pixels) pixelcount = length(pixels(msidata)) ## Range x coordinates minimumx = min(coord(msidata)[,1]) maximumx = max(coord(msidata)[,1]) ## Range y coordinates minimumy = min(coord(msidata)[,2]) maximumy = max(coord(msidata)[,2]) ## Range of intensities minint = round(min(spectra(msidata)[]), digits=2) maxint = round(max(spectra(msidata)[]), digits=2) medint = round(median(spectra(msidata)[]), digits=2) ## Number of intensities > 0 npeaks= sum(spectra(msidata)[]>0) ## Spectra multiplied with m/z (potential number of peaks) numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) ## Percentage of intensities > 0 percpeaks = round(npeaks/numpeaks*100, digits=2) ## Number of empty TICs TICs = colSums(spectra(msidata)[]) NumemptyTIC = sum(TICs == 0) ## Processing informations processinginfo = processingData(msidata) centroidedinfo = processinginfo@centroided # TRUE or FALSE ## if TRUE write processinginfo if FALSE write FALSE ## normalization if (length(processinginfo@normalization) == 0) { normalizationinfo='FALSE' } else { normalizationinfo=processinginfo@normalization } ## smoothing if (length(processinginfo@smoothing) == 0) { smoothinginfo='FALSE' } else { smoothinginfo=processinginfo@smoothing } ## baseline if (length(processinginfo@baselineReduction) == 0) { baselinereductioninfo='FALSE' } else { baselinereductioninfo=processinginfo@baselineReduction } ## peak picking if (length(processinginfo@peakPicking) == 0) { peakpickinginfo='FALSE' } else { peakpickinginfo=processinginfo@peakPicking } properties = c("Number of m/z features", "Range of m/z values", "Number of pixels", "Range of x coordinates", "Range of y coordinates", "Range of intensities", "Median of intensities", "Intensities > 0", "Number of zero TICs", "Preprocessing", "Normalization", "Smoothing", "Baseline reduction", "Peak picking", "Centroided") values = c(paste0(maxfeatures), paste0(minmz, " - ", maxmz), paste0(pixelcount), paste0(minimumx, " - ", maximumx), paste0(minimumy, " - ", maximumy), paste0(minint, " - ", maxint), paste0(medint), paste0(percpeaks, " %"), paste0(NumemptyTIC), paste0(" "), paste0(normalizationinfo), paste0(smoothinginfo), paste0(baselinereductioninfo), paste0(peakpickinginfo), paste0(centroidedinfo)) property_df = data.frame(properties, values) ######################################## PDF ################################### ################################################################################ ################################################################################ pdf("segmentationpdf.pdf", fonts = "Times", pointsize = 12) plot(0,type='n',axes=FALSE,ann=FALSE) title(main=paste0("Spatial segmentation for file: \n\n", "$infile.display_name")) ############################# I) numbers #################################### ############################################################################# grid.table(property_df, rows= NULL) if (npeaks > 0) { ######################## II) segmentation tools ############################# ############################################################################# #set $color_string = ','.join(['"%s"' % $color.feature_color for $color in $colours]) colourvector = c($color_string) ### preparation for images and plots: #if str($image_cond.image_type) == "standard_image": print("standard image") strip_input = TRUE lattice_input = FALSE #elif str($image_cond.image_type) == "lattice_image": print("lattice image") strip_input = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9)) lattice_input = TRUE #end if #if str( $segm_cond.segmentationtool ) == 'pca': print('pca') ##pca component_vector = character() for (numberofcomponents in 1:$segm_cond.pca_ncomp) {component_vector[numberofcomponents]= paste0("PC", numberofcomponents)} pca_result = PCA(msidata, ncomp=$segm_cond.pca_ncomp, column = component_vector, superpose = FALSE, method = "$segm_cond.pca_method", scale = $segm_cond.pca_scale, layout = c(ncomp, 1)) ### images in pdf file print(image(pca_result, main="PCA image", lattice=lattice_input, strip = strip_input, col=colourvector)) for (PCs in 1:$segm_cond.pca_ncomp){ print(image(pca_result, column = c(paste0("PC",PCs)), superpose = FALSE, col.regions = risk.colors(100)))} ### plots in pdf file print(plot(pca_result, main="PCA plot", lattice=lattice_input, col= colourvector, strip = strip_input)) for (PCs in 1:$segm_cond.pca_ncomp){ print(plot(pca_result, column = c(paste0("PC",PCs)), superpose = FALSE))} ### values in tabular files pcaloadings = (pca_result@resultData\$ncomp\$loadings) ### loading for each m/z value pcascores = (pca_result@resultData\$ncomp\$scores) ### scores for each pixel write.table(pcaloadings, file="$mzfeatures", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") write.table(pcascores, file="$pixeloutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") ## optional output as .RData #if $output_rdata: ## save as (.RData) save(pca, file="$segmentation_rdata") #end if #elif str( $segm_cond.segmentationtool ) == 'kmeans': print('kmeans') ##k-means skm = spatialKMeans(msidata, r=c($segm_cond.kmeans_r), k=c($segm_cond.kmeans_k), method="$segm_cond.kmeans_method") print(image(skm, key=TRUE, main="K-means clustering", lattice=lattice_input, strip=strip_input, col= colourvector, layout=c(1,1))) print(plot(skm, main="K-means plot", lattice=lattice_input, col= colourvector, strip=strip_input, layout=c($segm_cond.kmeans_layout))) skm_clusters = data.frame(matrix(NA, nrow = pixelcount, ncol = 0)) for (iteration in 1:length(skm@resultData)){ skm_cluster = ((skm@resultData)[[iteration]]\$cluster) skm_clusters = cbind(skm_clusters, skm_cluster) } colnames(skm_clusters) = names((skm@resultData)) skm_toplabels = topLabels(skm, n=$segm_cond.kmeans_toplabels) write.table(skm_toplabels, file="$mzfeatures", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") write.table(skm_clusters, file="$pixeloutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") ## optional output as .RData #if $output_rdata: ## save as (.RData) save(skm, file="$segmentation_rdata") #end if #elif str( $segm_cond.segmentationtool ) == 'centroids': print('centroids') ##centroids ssc = spatialShrunkenCentroids(msidata, r=c($segm_cond.centroids_r), k=c($segm_cond.centroids_k), s=c($segm_cond.centroids_s), method="$segm_cond.centroids_method") print(image(ssc, key=TRUE, main="Spatial shrunken centroids", lattice=lattice_input, strip = strip_input, col= colourvector,layout=c(1,1))) print(plot(ssc, main="Spatial shrunken centroids plot", lattice=lattice_input, col= colourvector, strip = strip_input,layout=c($segm_cond.centroids_layout))) print(plot(ssc, mode = "tstatistics",key = TRUE, lattice=lattice_input, layout = c($segm_cond.centroids_layout), main="t-statistics", col=colourvector)) print(plot(summary(ssc), main = "Number of segments",lattice=lattice_input)) ssc_classes = data.frame(matrix(NA, nrow = pixelcount, ncol = 0)) for (iteration in 1:length(ssc@resultData)){ ssc_class = ((ssc@resultData)[[iteration]]\$classes) ssc_classes = cbind(ssc_classes, ssc_class) } colnames(ssc_classes) = names((ssc@resultData)) ssc_toplabels = topLabels(ssc, n=$segm_cond.centroids_toplabels) write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") write.table(ssc_classes, file="$pixeloutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") ## optional output as .RData #if $output_rdata: ## save as (.RData) save(ssc, file="$segmentation_rdata") #end if #end if dev.off() }else{ print("Inputfile has no intensities > 0") dev.off() } ]]></configfile> </configfiles> <inputs> <param name="infile" type="data" format="imzml,rdata,analyze75" label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData" help="Upload composite datatype imzml (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/> <param name="accuracy" type="float" value="50" label="Only for processed imzML files: enter mass accuracy to which the m/z values will be binned" help="This should be set to the native accuracy of the mass spectrometer, if known"/> <param name="units" display="radio" type="select" label="Only for processed imzML files: unit of the mass accuracy" help="either m/z or ppm"> <option value="mz" >mz</option> <option value="ppm" selected="True" >ppm</option> </param> <conditional name="segm_cond"> <param name="segmentationtool" type="select" label="Select the tool for spatial clustering"> <option value="pca" selected="True">pca</option> <option value="kmeans">k-means</option> <option value="centroids">spatial shrunken centroids</option> </param> <when value="pca"> <param name="pca_ncomp" type="integer" value="2" label="The number of principal components to calculate"/> <param name="pca_method" type="select" label="The function used to calculate the singular value decomposition"> <option value="irlba" selected="True">irlba</option> <option value="svd">svd</option> </param> <param name="pca_scale" type="select" display="radio" optional="False" label="Scaling of data before analysis"> <option value="TRUE">yes</option> <option value="FALSE" selected="True">no</option> </param> </when> <when value="kmeans"> <param name="kmeans_r" type="text" value="2" label="The spatial neighborhood radius of nearby pixels to consider (r)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"/> <param name="kmeans_k" type="text" value="3" label="The number of clusters (k)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"/> <param name="kmeans_method" type="select" display="radio" label="The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) clustering, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) clustering"> <option value="gaussian">gaussian</option> <option value="adaptive" selected="True">adaptive</option> </param> <param name="kmeans_toplabels" type="integer" value="500" label="Number of toplabels (m/z) which should be written in tabular output"/> <param name="kmeans_layout" type="text" value="1,1" label="Number of rows and columns to plot pictures in pdf output" help="e.g. 1,1 means 1 plot per page; 2,3 means 2 rows with 3 plots each = 6 plots per page"/> </when> <when value="centroids"> <param name="centroids_r" type="text" value="2" label="The spatial neighborhood radius of nearby pixels to consider (r)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"/> <param name="centroids_k" type="text" value="5" label="The initial number of clusters (k)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"/> <param name="centroids_s" type="text" value="2" label="The sparsity thresholding parameter by which to shrink the t-statistics (s)" help="As s increases, fewer m/z features (m/z values) will be used in the spatial segmentation, and only the informative m/z features will be retained. Multiple values are allowed (e.g. 1,2,3 or 2:5)"/> <param name="centroids_method" type="select" display="radio" label = "The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights"> <option value="gaussian">gaussian</option> <option value="adaptive" selected="True">adaptive</option> </param> <param name="centroids_toplabels" type="integer" value="500" label="Number of toplabels (m/z) which should be written in tabular output"/> <param name="centroids_layout" type="text" value="1,1" label="Number of rows and columns to plot pictures in pdf output" help="e.g. 1,1 means 1 plot per page; 2,3 means 2 rows with 3 plots each = 6 plots per page"/> </when> </conditional> <conditional name="image_cond"> <param name="image_type" type="select" label="Select the image type"> <option value="standard_image" selected="True">standard</option> <option value="lattice_image">lattice</option> </param> <when value="standard_image"/> <when value="lattice_image"/> </conditional> <repeat name="colours" title="Colours for the plots" min="1" max="50"> <param name="feature_color" type="color" label="Colours" value="#ff00ff" help="Numbers of columns should be the same as number of components"> <sanitizer> <valid initial="string.letters,string.digits"> <add value="#" /> </valid> </sanitizer> </param> </repeat> <param name="output_rdata" type="boolean" display="radio" label="Results as .RData output"/> </inputs> <outputs> <data format="pdf" name="segmentationimages" from_work_dir="segmentationpdf.pdf" label = "$infile.display_name segmentation"/> <data format="tabular" name="mzfeatures" label="$infile.display_name m/z features"/> <data format="tabular" name="pixeloutput" label="$infile.display_name pixels"/> <data format="rdata" name="segmentation_rdata" label="$infile.display_name segmentation"> <filter>output_rdata</filter> </data> </outputs> <tests> <test> <param name="infile" value="" ftype="imzml"> <composite_data value="Example_Continuous.imzML"/> <composite_data value="Example_Continuous.ibd"/> </param> <param name="segmentationtool" value="pca"/> <param name="image_type" value="lattice_image"/> <repeat name="colours"> <param name="feature_color" value="#ff00ff"/> </repeat> <repeat name="colours"> <param name="feature_color" value="#0000FF"/> </repeat> <output name="segmentationimages" file="pca_imzml.pdf" compare="sim_size" delta="20000"/> <output name="mzfeatures" file="loadings_pca.tabular" compare="sim_size"/> <output name="pixeloutput" file="scores_pca.tabular" compare="sim_size"/> </test> <test> <param name="infile" value="" ftype="analyze75"> <composite_data value="Analyze75.hdr" /> <composite_data value="Analyze75.img" /> <composite_data value="Analyze75.t2m" /> </param> <param name="segmentationtool" value="kmeans"/> <param name="kmeans_r" value="1:3"/> <param name="kmeans_k" value="2,3"/> <param name="kmeans_toplabels" value="20"/> <repeat name="colours"> <param name="feature_color" value="#ff00ff"/> </repeat> <repeat name="colours"> <param name="feature_color" value="#0000FF"/> </repeat> <repeat name="colours"> <param name="feature_color" value="#00C957"/> </repeat> <param name="output_rdata" value="True"/> <output name="segmentationimages" file="kmeans_analyze.pdf" compare="sim_size" delta="20000"/> <output name="mzfeatures" file="toplabels_skm.tabular" compare="sim_size"/> <output name="pixeloutput" file="cluster_skm.tabular" compare="sim_size"/> <output name="pixeloutput" file="cluster_skm.tabular" compare="sim_size"/> <output name="segmentation_rdata" file="cluster_skm.RData" compare="sim_size"/> </test> <test> <param name="infile" value="preprocessed.RData" ftype="rdata"/> <param name="segmentationtool" value="centroids"/> <param name="centroids_r" value="1,2"/> <param name="centroids_k" value="3"/> <param name="centroids_toplabels" value="50"/> <repeat name="colours"> <param name="feature_color" value="#0000FF"/> </repeat> <repeat name="colours"> <param name="feature_color" value="#00C957"/> </repeat> <repeat name="colours"> <param name="feature_color" value="#B0171F"/> </repeat> <repeat name="colours"> <param name="feature_color" value="#FFD700"/> </repeat> <repeat name="colours"> <param name="feature_color" value="#848484"/> </repeat> <output name="segmentationimages" file="centroids_rdata.pdf" compare="sim_size" delta="20000"/> <output name="mzfeatures" file="toplabels_ssc.tabular" compare="sim_size"/> <output name="pixeloutput" file="classes_ssc.tabular" compare="sim_size"/> </test> </tests> <help> <![CDATA[ Cardinal is an R package that implements statistical & computational tools for analyzing mass spectrometry imaging datasets. `More information on Cardinal <http://cardinalmsi.org//>`_ This tool provides three different Cardinal functions for unsupervised clustering/spatial segmentation of mass spectrometry imaging data. Input data: 3 types of input data can be used: - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) Options: - PCA: principal component analysis - k-means: spatially-aware k-means clustering - spatial shrunken centroids: Allows the number of segments to decrease according to the data. This allows automatic selection of the number of clusters Output: - Pdf with the heatmaps and plots for the segmentation - Tabular file with information on m/z and pixels: loadings/scores (PCA), toplabels/clusters (k-means), toplabels/classes (spatial shrunken centroids) - Optional .RData file which contains the segmentation results and can be used for further exploration in R ]]> </help> <citations> <citation type="doi">10.1093/bioinformatics/btv146</citation> </citations> </tool>