# HG changeset patch # User galaxyp # Date 1532422374 14400 # Node ID bab12ded74a50004c478642e5f9401f1dcbd72b2 # Parent 3d5ac78fb2b09ea3ca09190ce3a06332afa07f7a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_filtering commit 5bceedc3a11c950790692a4c64bbb83d46897bee diff -r 3d5ac78fb2b0 -r bab12ded74a5 msi_filtering.xml --- a/msi_filtering.xml Fri Jul 06 14:13:22 2018 -0400 +++ b/msi_filtering.xml Tue Jul 24 04:52:54 2018 -0400 @@ -1,8 +1,9 @@ - + tool for filtering mass spectrometry imaging data bioconductor-cardinal r-gridextra + r-ggplot2 0, na.rm=TRUE) > 0) -{ - #if $outputs.outputs_select == "quality_control": +########################### QC numbers ######################## ## Number of features (m/z) maxfeatures = length(features(msidata)) @@ -80,7 +78,15 @@ ## Store features for QC plot featuresinfile = mz(msidata) - #end if +## Next steps will only run if there are more than 0 intensities/pixels/features in the file + +if (sum(spectra(msidata)[]>0, na.rm=TRUE) > 0) +{ + + + ## prepare dataframe for QC of pixel distribution (will be overwritten in filtering of pixels condition) + position_df = cbind(coord(msidata)[,1:2], rep("$infile.element_identifier", times=ncol(msidata))) + ###################################### Filtering of pixels ##################### ################################################################################ @@ -90,14 +96,22 @@ #if str($pixels_cond.pixel_filtering) == "single_column": print("single column") + ## read tabular file, count number of rows (= number of pixels), count how many pixels are valid input_list = read.delim("$pixels_cond.single_pixels", header = FALSE, stringsAsFactors = FALSE) - numberpixels = length(input_list[,$pixels_cond.pixel_column]) - valid_entries = input_list[,$pixels_cond.pixel_column] %in% names(pixels(msidata)) + startingrow = $pixels_cond.pixel_header+1 + numberpixels = length(startingrow:nrow(input_list)) + valid_entries = input_list[startingrow:nrow(input_list),$pixels_cond.pixel_column] %in% names(pixels(msidata)) validpixels = sum(valid_entries) + valid_annotations = input_list[valid_entries,c($pixels_cond.pixel_column, $pixels_cond.annotation_column)] + ## for valid pixels: filter file for pixels and create dataframe with x,y,annotation for QC if (validpixels != 0){ - pixelsofinterest = pixels(msidata)[names(pixels(msidata)) %in% input_list[valid_entries,$pixels_cond.pixel_column]] + ## filter file for pixels + pixelsofinterest = pixels(msidata)[names(pixels(msidata)) %in% valid_annotations[,1]] msidata = msidata[,pixelsofinterest] + ## position_df for QC + pixel_coords = coord(msidata)[names(pixels(msidata)) %in% valid_annotations[,1],1:2] + position_df = cbind(pixel_coords, valid_annotations[,2]) }else{ msidata = msidata[,0] validpixels=0} @@ -107,21 +121,22 @@ #elif str($pixels_cond.pixel_filtering) == "two_columns": print("two columns") + ## read tabular file, count number of rows (= number of pixels), extract dataframe with x,y,annotation (for QC), count number of valid pixels input_list = read.delim("$pixels_cond.two_columns_pixel", header = FALSE, stringsAsFactors = FALSE) - numberpixels = length(input_list[,$pixels_cond.pixel_column_x]) + startingrow = $pixels_cond.pixel_header+1 + numberpixels = length(startingrow:nrow(input_list)) + inputpixels = input_list[startingrow:nrow(input_list),c($pixels_cond.pixel_column_x, $pixels_cond.pixel_column_y, $pixels_cond.annotation_column_xy)] + colnames(inputpixels) = c("x", "y", "annotation") + position_df = merge(coord(msidata)[,1:2], inputpixels, by=c("x", "y"), all.x=TRUE) - inputpixel_x = input_list[,$pixels_cond.pixel_column_x] - inputpixel_y = input_list[,$pixels_cond.pixel_column_y] - inputpixels = cbind(inputpixel_x, inputpixel_y) - colnames(inputpixels) = c("x", "y") - valid_rows = merge(inputpixels, coord(msidata)[,1:2]) - validpixels = nrow(valid_rows) + validpixels = nrow(position_df) + ## for valid pixels: filter file for pixels if (validpixels != 0){ pixelvector = character() - for (pixel in 1:nrow(valid_rows)){ - pixelvector[pixel] = paste0("x = ", valid_rows[pixel,1],", ", "y = ", valid_rows[pixel,2])} + for (pixel in 1:nrow(position_df)){ + pixelvector[pixel] = paste0("x = ", position_df[pixel,1],", ", "y = ", position_df[pixel,2])} pixelsofinterest= pixels(msidata)[names(pixels(msidata)) %in% pixelvector] msidata = msidata[,pixelsofinterest] }else{ @@ -135,8 +150,7 @@ numberpixels = "range" validpixels = "range" - ## only filter pixels if at least one pixel will be left - + ## only filter pixels if at least one pixel will be left if (sum(coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range) > 0 & sum(coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range) > 0){ msidata = msidata[, coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range] @@ -153,35 +167,42 @@ #end if - }else{ print("Inputfile has no intensities > 0") - } -###################################### filtering of features ###################### -################################################################################## + ################################# filtering of features ###################### + ############################################################################## -######################## Keep m/z from tabular file ######################### + ####################### Keep m/z from tabular file ######################### -if (sum(spectra(msidata)[], na.rm=TRUE) > 0){ +## feature filtering only when pixels/features/intensities are left +if (sum(spectra(msidata)[], na.rm=TRUE) > 0) +{ #if str($features_cond.features_filtering) == "features_list": print("feature list") + ## read tabular file, define starting row, extract and count valid features input_features = read.delim("$inputfeatures", header = FALSE, stringsAsFactors = FALSE) startingrow = $features_cond.feature_header+1 extracted_features = input_features[startingrow:nrow(input_features),$features_cond.feature_column] numberfeatures = length(extracted_features) + ## find out type of tabular file (numeric or character format) if (grepl("m/z = ", input_features[startingrow,$features_cond.feature_column])==FALSE){ ### if input is in numeric format if (class(extracted_features) == "numeric"){ - ### max digits given in the input file will be used to match m/z + ### max digits given in the input file will be used to match m/z but the maximum is 4 max_digits = max(nchar(matrix(unlist(strsplit(as.character(extracted_features), "\\.")), ncol=2, byrow=TRUE)[,2])) - validfeatures = extracted_features %in% round(mz(msidata),max_digits) - featuresofinterest = features(msidata)[round(mz(msidata), digits = max_digits) %in% extracted_features[validfeatures]] + if (max_digits >4) + { + max_digits = 4 + } + + validfeatures = round(extracted_features, max_digits) %in% round(mz(msidata),max_digits) + featuresofinterest = features(msidata)[round(mz(msidata), digits = max_digits) %in% round(extracted_features[validfeatures], max_digits)] validmz = length(unique(featuresofinterest)) }else{ validmz = 0 @@ -198,7 +219,7 @@ msidata = msidata[featuresofinterest,] - ############### features within a given range are kept ######################### + ############### features within a given range are kept ##################### #elif str($features_cond.features_filtering) == "features_range": print("feature range") @@ -270,9 +291,14 @@ ## save msidata as Rfile save(msidata, file="$msidata_filtered") - #################### optional QC numbers ####################### +}else{ + print("Inputfile or file filtered for pixels has no intensities > 0") + numberfeatures = NA + validmz = NA +} - #if $outputs.outputs_select == "quality_control": + #################### QC numbers ####################### + ## Number of features (m/z) maxfeatures2 = length(features(msidata)) @@ -334,22 +360,37 @@ property_df = data.frame(properties, before, filtered) - ############################### optional PDF QC ################################ + ############################### PDF QC ################################ + pdf("filtertool_QC.pdf", fonts = "Times", pointsize = 12) plot(0,type='n',axes=FALSE,ann=FALSE) title(main=paste0("Qualitycontrol of filtering tool for file: \n\n", "$infile.display_name")) grid.table(property_df, rows= NULL) - ### heatmap image as visual pixel control - if (length(features(msidata))> 0 & length(pixels(msidata)) > 0){ - image(msidata, mz=$outputs.inputmz, plusminus = $outputs.plusminus_dalton, contrast.enhance = "none", - main= paste0($outputs.inputmz," ± ", $outputs.plusminus_dalton, " Da"), ylim = c(maximumy2+0.2*maximumy2,minimumy2-0.2*minimumy2)) +## QC report with more than value-table: only when pixels/features/intensities are left +if (sum(spectra(msidata)[], na.rm=TRUE) > 0) +{ + ### visual pixel control + + colnames(position_df)[3] = "annotation_name" + pixel_image = ggplot(position_df, aes(x=x, y=y, fill=annotation_name))+ + geom_tile() + + coord_fixed()+ + ggtitle("Spatial orientation of combined data")+ + theme_bw()+ + theme(text=element_text(family="ArialMT", face="bold", size=15))+ + theme(legend.position="bottom",legend.direction="vertical")+ + guides(fill=guide_legend(ncol=4,byrow=TRUE)) + coord_labels = aggregate(cbind(x,y)~annotation_name, data=position_df, mean, na.rm=TRUE, na.action="na.pass") + coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$annotation_name) + + print(pixel_image) ### control features which are removed hist(mz(msidata), xlab="m/z", main="Kept m/z values") #if str($features_cond.features_filtering) == "none": - print("no difference histogram as no m/z filtering took place") + print("no difference histogram as no m/z filtering took place") #else: if (isTRUE(all.equal(featuresinfile, mz(msidata)))){ @@ -357,13 +398,9 @@ }else{ hist(setdiff(featuresinfile, mz(msidata)), xlab="m/z", main="Removed m/z values")} #end if - }else{ - print("file has no features or pixels left")} dev.off() - #end if - ############################### optional intensity matrix ###################### #if $output_matrix: @@ -376,7 +413,8 @@ #end if }else{ - print("Inputfile or file filtered for pixels has no intensities > 0") + print("Inputfile or filtered file has no intensities > 0") + dev.off() } ]]> @@ -398,6 +436,7 @@ + @@ -410,12 +449,16 @@ + + + + @@ -424,6 +467,7 @@ + @@ -450,26 +494,14 @@ - + - - - - - - - - - - - + - - outputs["outputs_select"] == "quality_control" - + output_matrix @@ -487,9 +519,6 @@ - - - @@ -503,9 +532,6 @@ - - - @@ -522,9 +548,6 @@ - - - @@ -539,13 +562,11 @@ + - - - @@ -563,9 +584,6 @@ - - - @@ -581,11 +599,6 @@ - - - - - @@ -597,15 +610,10 @@ - - - - - - + @@ -613,6 +621,7 @@ + @@ -631,9 +640,9 @@ Options: -- pixel filtering: can use a tabular file containing x and y coordinates or by defining a range for x and y by hand +- pixel filtering/annotation: either with a tabular file containing x and y coordinates and pixel annotations or by defining a range for x and y by hand (for the latter no annotation is possible) - m/z feature filtering: can use a tabular file containing m/z of interest or by defining a range for the m/z values (! numeric input will be rounded to 2 digits before matching to m/z!) -- m/z feature removing: infering m/z such as matrix contaminants can be removed by specifying their m/z in a tabular file and optionally set a window (window in ppm or m/z in which peaks should be removed) +- m/z feature removing: perturbing m/z such as matrix contaminants can be removed by specifying their m/z in a tabular file and optionally set a window (window in ppm or m/z in which peaks should be removed) Output: diff -r 3d5ac78fb2b0 -r bab12ded74a5 test-data/analyze75_filtered2.pdf Binary file test-data/analyze75_filtered2.pdf has changed diff -r 3d5ac78fb2b0 -r bab12ded74a5 test-data/analyze_filtered.RData Binary file test-data/analyze_filtered.RData has changed diff -r 3d5ac78fb2b0 -r bab12ded74a5 test-data/analyze_filtered.pdf Binary file test-data/analyze_filtered.pdf has changed diff -r 3d5ac78fb2b0 -r bab12ded74a5 test-data/analyze_filteredoutside.RData Binary file test-data/analyze_filteredoutside.RData has changed diff -r 3d5ac78fb2b0 -r bab12ded74a5 test-data/imzml_filtered.RData Binary file test-data/imzml_filtered.RData has changed diff -r 3d5ac78fb2b0 -r bab12ded74a5 test-data/imzml_filtered.pdf Binary file test-data/imzml_filtered.pdf has changed diff -r 3d5ac78fb2b0 -r bab12ded74a5 test-data/imzml_filtered2.pdf Binary file test-data/imzml_filtered2.pdf has changed diff -r 3d5ac78fb2b0 -r bab12ded74a5 test-data/imzml_filtered3.RData Binary file test-data/imzml_filtered3.RData has changed diff -r 3d5ac78fb2b0 -r bab12ded74a5 test-data/imzml_filtered3.pdf Binary file test-data/imzml_filtered3.pdf has changed diff -r 3d5ac78fb2b0 -r bab12ded74a5 test-data/imzml_filtered4.RData Binary file test-data/imzml_filtered4.RData has changed diff -r 3d5ac78fb2b0 -r bab12ded74a5 test-data/imzml_filtered4.pdf Binary file test-data/imzml_filtered4.pdf has changed diff -r 3d5ac78fb2b0 -r bab12ded74a5 test-data/imzml_filtered5.RData Binary file test-data/imzml_filtered5.RData has changed diff -r 3d5ac78fb2b0 -r bab12ded74a5 test-data/imzml_filtered5.pdf Binary file test-data/imzml_filtered5.pdf has changed diff -r 3d5ac78fb2b0 -r bab12ded74a5 test-data/inputpixels.tabular --- a/test-data/inputpixels.tabular Fri Jul 06 14:13:22 2018 -0400 +++ b/test-data/inputpixels.tabular Tue Jul 24 04:52:54 2018 -0400 @@ -1,6 +1,6 @@ -x = 1, y = 1 -x = 1, y = 2 -x = 1, y = 3 -x = 3, y = 1 -x = 3, y = 2 -x = 3, y = 3 +x = 1, y = 1 ROI1 +x = 1, y = 2 ROI1 +x = 1, y = 3 ROI1 +x = 3, y = 1 ROI1 +x = 3, y = 2 ROI1 +x = 3, y = 3 ROI1 diff -r 3d5ac78fb2b0 -r bab12ded74a5 test-data/inputpixels_2column_challenge.tabular --- a/test-data/inputpixels_2column_challenge.tabular Fri Jul 06 14:13:22 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3 +0,0 @@ -12 A 1 -13 B 23 - diff -r 3d5ac78fb2b0 -r bab12ded74a5 test-data/rdata_notfiltered.pdf Binary file test-data/rdata_notfiltered.pdf has changed