Mercurial > repos > galaxyp > cardinal_segmentations
view segmentation.xml @ 10:f4fcd7c81b5b draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit ca89f8e007c6b17f7c30066729e05b8686ab975a"
author | galaxyp |
---|---|
date | Sun, 27 Sep 2020 10:55:52 +0000 |
parents | b591450b3d1c |
children | 050bcc806da2 |
line wrap: on
line source
<tool id="cardinal_segmentations" name="MSI segmentation" version="@VERSION@.0"> <description>mass spectrometry imaging spatial clustering</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"> <requirement type="package" version="2.3">r-gridextra</requirement> </expand> <command detect_errors="exit_code"> <![CDATA[ @INPUT_LINKING@ cat '${MSI_segmentation}' && Rscript '${MSI_segmentation}' ]]> </command> <configfiles> <configfile name="MSI_segmentation"><![CDATA[ ################################# load libraries and read file ################# library(Cardinal) library(gridExtra) @READING_MSIDATA@ msidata = as(msidata, "MSImageSet") ##coercion to MSImageSet ## remove duplicated coordinates msidata <- msidata[,!duplicated(coord(msidata))] @DATA_PROPERTIES_INRAM@ ######################################## PDF ################################### ################################################################################ ################################################################################ pdf("segmentationpdf.pdf", fonts = "Times", pointsize = 12) plot(0,type='n',axes=FALSE,ann=FALSE) title(main=paste0("Spatial segmentation for file: \n\n", "$infile.display_name")) ############################# I) numbers #################################### ############################################################################# grid.table(property_df, rows= NULL) if (npeaks > 0 && sum(is.na(spectra(msidata)))==0) { ######################## II) segmentation tools ############################# ############################################################################# #set $color_string = ','.join(['"%s"' % $color.feature_color for $color in $colours]) colourvector = c($color_string) ## set seed to make analysis reproducible set.seed($setseed) #if str( $segm_cond.segmentationtool ) == 'pca': print('pca') ##pca component_vector = character() for (numberofcomponents in 1:$segm_cond.pca_ncomp) {component_vector[numberofcomponents]= paste0("PC", numberofcomponents)} pca_result = PCA(msidata, ncomp=$segm_cond.pca_ncomp, column = component_vector, superpose = FALSE, method = "$segm_cond.pca_method", scale = $segm_cond.pca_scale, layout = c(ncomp, 1)) ## remove msidata to clean up RAM space rm(msidata) gc() ### table in pdf file plot(0,type='n',axes=FALSE,ann=FALSE) sd_table = as.data.frame(round(pca_result@resultData\$ncomp\$sdev, digits=2)) colnames(sd_table) = "Standard deviation" PC_vector = character() for (PCs in 1:$segm_cond.pca_ncomp){ PC_vector[[PCs]] = c(paste0("PC",PCs))} sd_table = cbind(PC_vector, sd_table) colnames(sd_table)[1] = "Principal components" grid.table(sd_table, rows=NULL) ### images in pdf file print(image(pca_result, main="PCA image", strip = FALSE, col=colourvector, ylim=c(maximumy+2, minimumy-2))) for (PCs in 1:$segm_cond.pca_ncomp){ print(image(pca_result, column = c(paste0("PC",PCs)),strip = FALSE, superpose = FALSE, main=paste0("PC", PCs), col.regions = risk.colors(100), ylim=c(maximumy+2, minimumy-2)))} ### plots in pdf file print(plot(pca_result, main="PCA plot", col= colourvector, strip = FALSE)) for (PCs in 1:$segm_cond.pca_ncomp){ print(plot(pca_result, column = c(paste0("PC",PCs)),main=paste0("PC", PCs),strip = FALSE,superpose = FALSE))} ### values in tabular files pcaloadings = formatC(pca_result@resultData\$ncomp\$loadings, format = "e", digits = 6)### loading for each m/z value pcaloadings2 = cbind(matrix(unlist(strsplit(rownames(pcaloadings), " = ")), ncol=2, byrow=TRUE)[,2], pcaloadings) colnames(pcaloadings2) = c("mz", colnames(pcaloadings)) pcascores = round(pca_result@resultData\$ncomp\$scores, digits=6) ### scores for each pixel ## pixel names and coordinates ## to remove potential sample names and z dimension, split at comma and take only x and y x_coords = unlist(lapply(strsplit(rownames(pcascores), ","), `[[`, 1)) y_coords = unlist(lapply(strsplit(rownames(pcascores), ","), `[[`, 2)) x_coordinates = gsub("x = ","",x_coords) y_coordinates = gsub(" y = ","",y_coords) pixel_names = paste0("xy_", x_coordinates, "_", y_coordinates) pcascores2 = data.frame(pixel_names, x_coordinates, y_coordinates, pcascores) colnames(pcascores2) = c("pixel names", "x", "y", colnames(pcascores)) write.table(pcaloadings2, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") write.table(pcascores2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") ## optional output as .RData #if $output_rdata: ## save as (.RData) save(pca, file="$segmentation_rdata") #end if #elif str( $segm_cond.segmentationtool ) == 'kmeans': print('kmeans') ##k-means skm = spatialKMeans(msidata, r=c($segm_cond.kmeans_r), k=c($segm_cond.kmeans_k), method="$segm_cond.kmeans_method") ## remove msidata to clean up RAM space rm(msidata) gc() print(image(skm, key=TRUE, main="K-means clustering", strip=FALSE, col= colourvector, layout=c(1,1), ylim=c(maximumy+2, minimumy-2))) print(plot(skm, main="K-means plot", col= colourvector, strip=FALSE, layout=c(1,1))) skm_clusters = data.frame(matrix(NA, nrow = pixelcount, ncol = 0)) for (iteration in 1:length(skm@resultData)){ skm_cluster = ((skm@resultData)[[iteration]]\$cluster) skm_clusters = cbind(skm_clusters, skm_cluster) } ## pixel names and coordinates ## to remove potential sample names and z dimension, split at comma and take only x and y x_coords = unlist(lapply(strsplit(rownames(skm_clusters), ","), `[[`, 1)) y_coords = unlist(lapply(strsplit(rownames(skm_clusters), ","), `[[`, 2)) x_coordinates = gsub("x = ","",x_coords) y_coordinates = gsub(" y = ","",y_coords) pixel_names = paste0("xy_", x_coordinates, "_", y_coordinates) skm_clusters2 = data.frame(pixel_names, x_coordinates, y_coordinates, skm_clusters) colnames(skm_clusters2) = c("pixel names", "x", "y",names(skm@resultData)) skm_toplabels = topFeatures(skm, n=$segm_cond.kmeans_toplabels) write.table(skm_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") write.table(skm_clusters2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") ## optional output as .RData #if $output_rdata: ## save as (.RData) save(skm, file="$segmentation_rdata") #end if #elif str( $segm_cond.segmentationtool ) == 'centroids': print('centroids') ##centroids ssc = spatialShrunkenCentroids(msidata, r=c($segm_cond.centroids_r), k=c($segm_cond.centroids_k), s=c($segm_cond.centroids_s), method="$segm_cond.centroids_method") ## remove msidata to clean up RAM space rm(msidata) gc() print(image(ssc, key=TRUE, main="Spatial shrunken centroids", strip = TRUE, col= colourvector,layout=c(1,1), ylim=c(maximumy+2, minimumy-2))) print(plot(ssc, main="Spatial shrunken centroids plot", col= colourvector, strip = TRUE,layout=c(1,1))) print(plot(ssc, mode = "tstatistics",key = TRUE, layout = c(1,1), main="t-statistics", col=colourvector)) plot(summary(ssc), main = "Number of segments") ssc_classes = data.frame(matrix(NA, nrow = pixelcount, ncol = 0)) for (iteration in 1:length(ssc@resultData)){ ssc_class = ((ssc@resultData)[[iteration]]\$classes) ssc_classes = cbind(ssc_classes, ssc_class) } ## pixel names and coordinates ## to remove potential sample names and z dimension, split at comma and take only x and y x_coords = unlist(lapply(strsplit(rownames(ssc_classes), ","), `[[`, 1)) y_coords = unlist(lapply(strsplit(rownames(ssc_classes), ","), `[[`, 2)) x_coordinates = gsub("x = ","",x_coords) y_coordinates = gsub(" y = ","",y_coords) pixel_names = paste0("xy_", x_coordinates, "_", y_coordinates) ssc_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, ssc_classes) colnames(ssc_classes2) = c("pixel names", "x", "y", names(ssc@resultData)) ssc_toplabels = topFeatures(ssc, n=$segm_cond.centroids_toplabels) write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") write.table(ssc_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") ## optional output as .RData #if $output_rdata: ## save as (.RData) save(ssc, file="$segmentation_rdata") #end if #end if dev.off() ## optional svg output with original coordinates #if $svg_pixelimage: print("svg image") ## reverse y axis for svg output = correct order and nice svg image svg(file="svg_pixel_output.svg", width=maximumx, height=maximumy) par(mar=c(0,0,0,0)) #if str( $segm_cond.segmentationtool ) == 'pca': coord(pca_result)\$y <- max(coord(pca_result)\$y) - coord(pca_result)\$y + 1 image(pca_result, strip = FALSE, colorkey=FALSE, axes=FALSE, xlab=NA, ylab=NA, col=colourvector) #elif str( $segm_cond.segmentationtool ) == 'kmeans': coord(skm)\$y <- max(coord(skm)\$y) - coord(skm)\$y + 1 image(skm, key=FALSE, strip=FALSE, col= colourvector) #elif str( $segm_cond.segmentationtool ) == 'centroids': coord(ssc)\$y <- max(coord(ssc)\$y) - coord(ssc)\$y + 1 image(ssc, key=FALSE, strip = FALSE, col= colourvector) #end if dev.off() #end if }else{ print("Inputfile has no intensities > 0") dev.off() } ]]></configfile> </configfiles> <inputs> <expand macro="reading_msidata"/> <conditional name="segm_cond"> <param name="segmentationtool" type="select" label="Select the tool for spatial clustering"> <option value="pca" selected="True">pca</option> <option value="kmeans">k-means</option> <option value="centroids">spatial shrunken centroids</option> </param> <when value="pca"> <param name="pca_ncomp" type="integer" value="2" label="The number of principal components to calculate"/> <param name="pca_method" type="select" label="The function used to calculate the singular value decomposition"> <option value="irlba" selected="True">irlba</option> <option value="svd">svd</option> </param> <param name="pca_scale" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Scaling of data before analysis"/> </when> <when value="kmeans"> <param name="kmeans_r" type="text" value="2" label="The spatial neighborhood radius of nearby pixels to consider (r)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"> <expand macro="sanitizer_multiple_digits"/> </param> <param name="kmeans_k" type="text" value="3" label="The number of clusters (k)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"> <expand macro="sanitizer_multiple_digits"/> </param> <param name="kmeans_method" type="select" display="radio" label="The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) clustering, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) clustering"> <option value="gaussian">gaussian</option> <option value="adaptive" selected="True">adaptive</option> </param> <param name="kmeans_toplabels" type="integer" value="500" label="Number of toplabels (m/z) which should be written in tabular output"/> </when> <when value="centroids"> <param name="centroids_r" type="text" value="2" label="The spatial neighborhood radius of nearby pixels to consider (r)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"> <expand macro="sanitizer_multiple_digits"/> </param> <param name="centroids_k" type="text" value="5" label="The initial number of clusters (k)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"> <expand macro="sanitizer_multiple_digits"/> </param> <param name="centroids_s" type="text" value="2" label="The sparsity thresholding parameter by which to shrink the t-statistics (s)" help="As s increases, fewer m/z features (m/z values) will be used in the spatial segmentation, and only the informative m/z features will be retained. Multiple values are allowed (e.g. 1,2,3 or 2:5)"> <expand macro="sanitizer_multiple_digits"/> </param> <param name="centroids_method" type="select" display="radio" label="The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights"> <option value="gaussian">gaussian</option> <option value="adaptive" selected="True">adaptive</option> </param> <param name="centroids_toplabels" type="integer" value="500" label="Number of toplabels (m/z) which should be written in tabular output"/> </when> </conditional> <param name="svg_pixelimage" type="boolean" label="Export first segmentation image as svg"/> <repeat name="colours" title="Colours for the plots" min="1" max="50"> <param name="feature_color" type="color" label="Colours" value="#ff00ff" help="Numbers of colours should be the same as number of components"> <sanitizer> <valid initial="string.letters,string.digits"> <add value="#" /> </valid> </sanitizer> </param> </repeat> <param name="output_rdata" type="boolean" label="Results as .RData output"/> <param name="setseed" type="integer" value="1" label="set seed" help="Use same value to reproduce previous results"/> </inputs> <outputs> <data format="pdf" name="segmentationimages" from_work_dir="segmentationpdf.pdf" label = "${tool.name} on ${on_string}: results"/> <data format="tabular" name="mzfeatures" label="${tool.name} on ${on_string}: features"/> <data format="tabular" name="pixeloutput" label="${tool.name} on ${on_string}: pixels"/> <data format="rdata" name="segmentation_rdata" label="${tool.name} on ${on_string}: results.RData"> <filter>output_rdata</filter> </data> <data format="svg" name="svg_output" from_work_dir="svg_pixel_output.svg" label="${tool.name} on ${on_string}: image.svg"> <filter>svg_pixelimage</filter> </data> </outputs> <tests> <test> <expand macro="infile_imzml"/> <param name="segmentationtool" value="pca"/> <repeat name="colours"> <param name="feature_color" value="#ff00ff"/> </repeat> <repeat name="colours"> <param name="feature_color" value="#0000FF"/> </repeat> <output name="segmentationimages" file="pca_imzml.pdf" compare="sim_size"/> <output name="mzfeatures"> <assert_contents> <has_text text="300.1667" /> <has_text text="300.25" /> <has_text text="-4.234458e-04" /> <has_text text="3.878545e-10" /> <has_n_columns n="3" /> </assert_contents> </output> <output name="pixeloutput" file="scores_pca.tabular"/> </test> <test> <expand macro="infile_imzml"/> <param name="segmentationtool" value="kmeans"/> <param name="kmeans_r" value="1:3"/> <param name="kmeans_k" value="2,3"/> <param name="kmeans_toplabels" value="20"/> <repeat name="colours"> <param name="feature_color" value="#ff00ff"/> </repeat> <repeat name="colours"> <param name="feature_color" value="#0000FF"/> </repeat> <repeat name="colours"> <param name="feature_color" value="#00C957"/> </repeat> <param name="output_rdata" value="True"/> <output name="segmentationimages" file="kmeans_analyze.pdf" compare="sim_size"/> <output name="mzfeatures" file="toplabels_skm.tabular"/> <output name="pixeloutput" file="cluster_skm.tabular"/> <output name="segmentation_rdata" file="cluster_skm.RData" compare="sim_size"/> </test> <test> <param name="infile" value="preprocessed.RData" ftype="rdata"/> <param name="segmentationtool" value="centroids"/> <param name="centroids_r" value="1,2"/> <param name="centroids_k" value="3"/> <param name="centroids_toplabels" value="50"/> <repeat name="colours"> <param name="feature_color" value="#0000FF"/> </repeat> <repeat name="colours"> <param name="feature_color" value="#00C957"/> </repeat> <repeat name="colours"> <param name="feature_color" value="#B0171F"/> </repeat> <output name="segmentationimages" file="centroids_rdata.pdf" compare="sim_size"/> <output name="mzfeatures" file="toplabels_ssc.tabular"/> <output name="pixeloutput" file="classes_ssc.tabular"/> </test> <test> <param name="infile" value="" ftype="imzml"> <composite_data value="preprocessing_results3.imzml"/> <composite_data value="preprocessing_results3.ibd"/> </param> <conditional name="processed_cond"> <param name="processed_file" value="processed"/> <param name="accuracy" value="1"/> <param name="units" value="mz"/> </conditional> <param name="segmentationtool" value="centroids"/> <param name="centroids_r" value="1"/> <param name="centroids_k" value="2,3"/> <param name="centroids_s" value="0,3"/> <param name="centroids_toplabels" value="100"/> <repeat name="colours"> <param name="feature_color" value="#0000FF"/> </repeat> <repeat name="colours"> <param name="feature_color" value="#00C957"/> </repeat> <repeat name="colours"> <param name="feature_color" value="#B0171F"/> </repeat> <output name="segmentationimages" file="centroids_proc.pdf" compare="sim_size"/> <output name="pixeloutput" file="classes_proc.tabular"/> <output name="mzfeatures"> <assert_contents> <has_text text="1212" /> <has_text text="1297" /> <has_text text="1199" /> <has_text text="1298" /> <has_n_columns n="9" /> <has_n_lines n="101" /> </assert_contents> </output> </test> </tests> <help> <![CDATA[ @CARDINAL_DESCRIPTION@ ----- This tool provides three different Cardinal functions for unsupervised clustering/spatial segmentation of mass spectrometry imaging data. @MSIDATA_INPUT_DESCRIPTION@ - NA intensities are not allowed - duplicated coordinates will be removed **Options** - PCA: principal component analysis - k-means: spatially-aware k-means clustering (adopted from `Alexandrov and Kobarg <https://doi.org/10.1093/bioinformatics/btr246>`_) - spatial shrunken centroids: Allows the number of segments to decrease according to the data. This allows selection of the number of clusters (more details in `Bemis et al. <https://doi.org/10.1074/mcp.O115.053918>`_) **Output** - Pdf with the heatmaps and plots for the segmentation - Tabular file with information on m/z and pixels: loadings/scores (PCA), toplabels/clusters (k-means), toplabels/classes (spatial shrunken centroids) - Optional .RData file which contains the segmentation results and can be used for further exploration in R using the Cardinal package - Optional: svg file with the first segmentation image ]]> </help> <expand macro="citations"/> </tool>