# HG changeset patch # User galaxyp # Date 1532422359 14400 # Node ID f4aafc565aa33f3f38183ab6341210d17375248a # Parent ff91e78b5c5c187bc9c0d818daab9b1d27db1178 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_combine commit 5bceedc3a11c950790692a4c64bbb83d46897bee diff -r ff91e78b5c5c -r f4aafc565aa3 msi_combine.xml --- a/msi_combine.xml Fri Jul 06 14:13:08 2018 -0400 +++ b/msi_combine.xml Tue Jul 24 04:52:39 2018 -0400 @@ -1,4 +1,4 @@ - + combine several mass spectrometry imaging datasets into one @@ -20,6 +20,10 @@ ln -s '$infile' infile_${i}.RData && #end if #end for + #for $i, $annotation_file in enumerate($annotation_files): + ln -s '$annotation_file' annotation_file_${i}.tabular && + #end for + cat '${msi_combine}' && Rscript '${msi_combine}' @@ -67,25 +71,52 @@ #for $i, $infile in enumerate($infiles): -#if $infile.ext == 'imzml' - #if str($processed_cond.processed_file) == "processed": - msidata_$i <- readImzML('infile_${i}', mass.accuracy=$processed_cond.accuracy, units.accuracy = "$processed_cond.units") + ## read MSI data + + #if $infile.ext == 'imzml' + #if str($processed_cond.processed_file) == "processed": + msidata_$i <- readImzML('infile_${i}', mass.accuracy=$processed_cond.accuracy, units.accuracy = "$processed_cond.units") + #else + msidata_$i <- readImzML('infile_${i}') + #end if + #elif $infile.ext == 'analyze75' + msidata_$i <- readAnalyze('infile_${i}') #else - msidata <- readImzML('infile') + msidata_$i = loadRData('infile_${i}.RData') #end if -#elif $infile.ext == 'analyze75' - msidata_$i <- readAnalyze('infile_${i}') -#else - msidata_$i = loadRData('infile_${i}.RData') -#end if - sampleNames(msidata_$i) = "msidata" ## same name necessary to combine data in one single coordinate system + ## read annotation data, up to 5 annotations can be used for now + + ## read annotation tabular, set first two columns as x and y, merge with coordinates dataframe and order according to pixelorder in msidata + input_annotation = read.delim("annotation_file_${i}.tabular", header = TRUE, + stringsAsFactors = FALSE) + colnames(input_annotation)[1:2] = c("x", "y") + msidata_coordinates = cbind(coord(msidata_$i)[,1:2], 1:ncol(msidata_$i)) + colnames(msidata_coordinates)[3] = "pixel_index" + ## only first 5 annotation columns are kept + if (ncol(input_annotation) > 7){ + input_annotation = input_annotation[,1:7]} + + annotation_df = merge(msidata_coordinates, input_annotation, by=c("x", "y"), all.x=TRUE) + annotation_df_8 = cbind(annotation_df, data.frame(matrix(NA,ncol=8-ncol(annotation_df), nrow=ncol(msidata_$i)))) + annotation_df_8_sorted = annotation_df_8[order(annotation_df_8\$pixel_index),]## orders pixel according to msidata + + ## each annotation column is assigned to the pixel in the pData slot of the MSIdata + msidata_$i\$column1 = annotation_df_8_sorted[,4] + msidata_$i\$column2 = annotation_df_8_sorted[,5] + msidata_$i\$column3 = annotation_df_8_sorted[,6] + msidata_$i\$column4 = annotation_df_8_sorted[,7] + msidata_$i\$column5 = annotation_df_8_sorted[,8] + + ## same name for MSI data files necessary to combine data in one single coordinate system + sampleNames(msidata_$i) = "msidata" ################### preparation xy shifts ########################## #if str( $combine_conditional.combine_method ) == 'xy_shifts': + ## shift coordinates according to input tabular file and store file names coord(msidata_$i)\$x = coord(msidata_$i)\$x + input_list[$i+1,$combine_conditional.column_x] ## shifts x coordinates according to tabular file coord(msidata_$i)\$y = coord(msidata_$i)\$y + input_list[$i+1,$combine_conditional.column_y] ## shifts y coordinates according to tabular file pixel_vector = append(pixel_vector, rep(paste($i+1, input_list[$i+1,$combine_conditional.column_names], sep="_"),times=ncol(msidata_$i))) ## stores file name for each pixel @@ -132,6 +163,8 @@ #end for +## extract columnnames from (last) annotation tabular (for QC plot names) +annotation_colnames = colnames(input_annotation)[-c(1,2)] ###################### automatic combination ################################### ################################################################################ @@ -154,7 +187,7 @@ combine_plot = ggplot(position_df, aes(x=x, y=y, fill=sample_name))+ geom_tile() + coord_fixed()+ - ggtitle("Spatial orientation of combined data")+ + ggtitle("Spatial orientation of combined data (sample names)")+ theme_bw()+ theme(text=element_text(family="ArialMT", face="bold", size=15))+ theme(legend.position="bottom",legend.direction="vertical")+ @@ -179,82 +212,174 @@ #elif str( $combine_conditional.combine_method ) == 'xy_shifts': print("xy_shifts") - #if str($combine_conditional.combination_true) == "yes_combi": - print("combination with xy shifts") - - ## find duplicated coordinates - all_coordinates = do.call(rbind, list(#echo ','.join($pixelcoords)#)) - duplicated_coordinates= duplicated(all_coordinates[,1:2])| duplicated(all_coordinates[,1:2], fromLast=TRUE) + ## find duplicated coordinates + all_coordinates = do.call(rbind, list(#echo ','.join($pixelcoords)#)) + duplicated_coordinates= duplicated(all_coordinates[,1:2])| duplicated(all_coordinates[,1:2], fromLast=TRUE) print(paste0("Number of removed duplicated coordinates: ", sum(duplicated_coordinates)/2)) - unique_coordinates = all_coordinates[!duplicated_coordinates,] + unique_coordinates = all_coordinates[!duplicated_coordinates,] - ## remove duplicated coordinates - datasetlist = list() - count = 1 - for (usable_dataset in list(#echo ','.join($msidata)#)){ - pixelsofinterest = pixels(usable_dataset)[names(pixels(usable_dataset)) %in% rownames(unique_coordinates)] - filtered_dataset = usable_dataset[,pixelsofinterest] - if (ncol(filtered_dataset) > 0 ){ - datasetlist[[count]] = filtered_dataset} - count = count +1} + ## remove duplicated coordinates + datasetlist = list() + count = 1 + for (usable_dataset in list(#echo ','.join($msidata)#)){ + pixelsofinterest = pixels(usable_dataset)[names(pixels(usable_dataset)) %in% rownames(unique_coordinates)] + filtered_dataset = usable_dataset[,pixelsofinterest] + if (ncol(filtered_dataset) > 0 ){ + datasetlist[[count]] = filtered_dataset} + count = count +1} + + msidata_combined = do.call(combine, datasetlist) - msidata_combined = do.call(combine, datasetlist) + ## save as (.RData) + + msidata = msidata_combined + save(msidata, file="$msidata_combined") - ## save as (.RData) + ## create x,y,sample_name dataframe for QC pdf + + position_df = cbind(coord(msidata), msidata\$combined_sample) + colnames(position_df)[3] = "sample_name" + +#end if + - msidata = msidata_combined - save(msidata, file="$msidata_combined") - - ## create x,y,sample_name dataframe for QC pdf +################################## outputs #################################### +################################################################################ - position_df = cbind(coord(msidata), msidata\$combined_sample) - colnames(position_df)[3] = "sample_name" +########### QC with pixels and their annotations ################################ - #else: - print("no combination, only testing xy shifts") +pdf("Combined_qc.pdf", width=15, height=15) - position_df = do.call(rbind, list(#echo ','.join($pixelcoords)#)) - position_df\$sample_name = as.factor(pixel_vector) - -print(paste0("Number of duplicated coordinates: ", sum(duplicated(position_df[,1:2])))) +## combined plot +combine_plot = ggplot(position_df, aes(x=x, y=y, fill=sample_name))+ + geom_tile() + + coord_fixed()+ + ggtitle("Spatial orientation of combined data")+ + theme_bw()+ + theme(text=element_text(family="ArialMT", face="bold", size=15))+ + theme(legend.position="bottom",legend.direction="vertical")+ + guides(fill=guide_legend(ncol=5,byrow=TRUE)) +coord_labels = aggregate(cbind(x,y)~sample_name, data=position_df, mean) +coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$sample_name) +for(file_count in 1:nrow(coord_labels)) +{combine_plot = combine_plot + annotate("text",x=coord_labels[file_count,"x"], +y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,4]))} +print(combine_plot) - #end if + + ## annotation plots + + ## plot 1 - ## create PDF to show all pixels in PDF as QC + column1_df = cbind(coord(msidata), msidata\$column1) + colnames(column1_df)[3] = "column1" - pdf("Combined_qc.pdf", width=15, height=15) - combine_plot = ggplot(position_df, aes(x=x, y=y, fill=sample_name))+ + if (sum(is.na(column1_df[3])) < nrow(column1_df)){ + column1_plot = ggplot(column1_df, aes(x=x, y=y, fill=column1))+ geom_tile() + coord_fixed()+ - ggtitle("Spatial orientation of combined data")+ + ggtitle(paste0(annotation_colnames[1]))+ theme_bw()+ theme(text=element_text(family="ArialMT", face="bold", size=15))+ theme(legend.position="bottom",legend.direction="vertical")+ - guides(fill=guide_legend(ncol=5,byrow=TRUE)) - coord_labels = aggregate(cbind(x,y)~sample_name, data=position_df, mean) - coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$sample_name) - for(file_count in 1:nrow(coord_labels)) - {combine_plot = combine_plot + annotate("text",x=coord_labels[file_count,"x"], - y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,4]))} - print(combine_plot) + guides(fill=guide_legend(ncol=5,byrow=TRUE, title=annotation_colnames[1])) + print(column1_plot)} + ##rename columnname for output tabular file + colnames(column1_df)[3] = annotation_colnames[1] + + ## plot 2 + column2_df = cbind(coord(msidata), msidata\$column2) + colnames(column2_df)[3] = "column2" + + if (sum(is.na(column2_df[3])) < nrow(column2_df)){ + column2_plot = ggplot(column2_df, aes(x=x, y=y, fill=column2))+ + geom_tile() + + coord_fixed()+ + ggtitle(paste0(annotation_colnames[2]))+ + theme_bw()+ + theme(text=element_text(family="ArialMT", face="bold", size=15))+ + theme(legend.position="bottom",legend.direction="vertical")+ + guides(fill=guide_legend(ncol=5,byrow=TRUE, title=annotation_colnames[2])) + print(column2_plot)} + ##rename columnname for output tabular file + colnames(column2_df)[3] = annotation_colnames[2] + + ## plot 3 + column3_df = cbind(coord(msidata), msidata\$column3) + colnames(column3_df)[3] = "column3" + if (sum(is.na(column3_df[3])) < nrow(column3_df)){ + column3_plot = ggplot(column3_df, aes(x=x, y=y, fill=column3))+ + geom_tile() + + coord_fixed()+ + ggtitle(paste0(annotation_colnames[3]))+ + theme_bw()+ + theme(text=element_text(family="ArialMT", face="bold", size=15))+ + theme(legend.position="bottom",legend.direction="vertical")+ + guides(fill=guide_legend(ncol=5,byrow=TRUE, title=annotation_colnames[3])) + print(column3_plot)} + ##rename columnname for output tabular file + colnames(column3_df)[3] = annotation_colnames[3] + + ## plot 4 + column4_df = cbind(coord(msidata), msidata\$column4) + colnames(column4_df)[3] = "column4" + + if (sum(is.na(column4_df[3])) < nrow(column4_df)){ + column4_plot = ggplot(column4_df, aes(x=x, y=y, fill=column4))+ + geom_tile() + + coord_fixed()+ + ggtitle(paste0(annotation_colnames[4]))+ + theme_bw()+ + theme(text=element_text(family="ArialMT", face="bold", size=15))+ + theme(legend.position="bottom",legend.direction="vertical")+ + guides(fill=guide_legend(ncol=5,byrow=TRUE, title=annotation_colnames[4])) + print(column4_plot)} + ##rename columnname for output tabular file + colnames(column4_df)[3] = annotation_colnames[4] + + ## plot5 + + column5_df = cbind(coord(msidata), msidata\$column5) + colnames(column5_df)[3] = "column5" + if (sum(is.na(column5_df[3])) < nrow(column5_df)){ + column5_plot = ggplot(column5_df, aes(x=x, y=y, fill=column5))+ + geom_tile() + + coord_fixed()+ + ggtitle(paste0(annotation_colnames[5]))+ + theme_bw()+ + theme(text=element_text(family="ArialMT", face="bold", size=15))+ + theme(legend.position="bottom",legend.direction="vertical")+ + guides(fill=guide_legend(ncol=5,byrow=TRUE, title=annotation_colnames[5])) + print(column5_plot)} + ##rename columnname for output tabular file + colnames(column5_df)[3] = annotation_colnames[5] + dev.off() -#end if - -####################### optional matrix output ################################# - -#if $output_matrix: +##################### annotation tabular output ################################ if (length(features(msidata))> 0 & length(pixels(msidata)) > 0){ - spectramatrix = spectra(msidata)[] - spectramatrix = cbind(mz(msidata),spectramatrix) - newmatrix = rbind(c("mz | spectra", names(pixels(msidata))), spectramatrix) - write.table(newmatrix, file="$matrixasoutput", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") + annotation_df_list = list(position_df, column1_df, column2_df, column3_df, column4_df, column5_df) + combined_annotations = Reduce(function(...) merge(..., by=c("x", "y"), all=TRUE), annotation_df_list) + write.table(combined_annotations, file="$annotation_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") }else{ - print("file has no features or pixels left") + print("No annotation tabular output because file has no features or pixels left") } -#end if + ####################### optional matrix output ################################# + + #if $output_matrix: + + if (length(features(msidata))> 0 & length(pixels(msidata)) > 0){ + spectramatrix = spectra(msidata)[] + spectramatrix = cbind(mz(msidata),spectramatrix) + newmatrix = rbind(c("mz | spectra", names(pixels(msidata))), spectramatrix) + write.table(newmatrix, file="$matrixasoutput", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") + }else{ + print("No intensity matrix output because file has no features or pixels left") + } + + #end if ]]> @@ -276,19 +401,21 @@ + - + - + - @@ -296,47 +423,42 @@ + output_matrix - + + - + - - - - - - - - - - - - + + + - + + + @@ -354,20 +476,25 @@ - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) -Prerequisite: + +Input: -- m/z values need to be the same across all datasets (before using this tool), this can be achieved with the filtering tool (use same m/z range) and the preprocessing tool (use same binning parameter) +- MSI data files with same m/z values (to obtain same m/z values for different files: filtering tool same m/z range and preprocessing tool same binning width) +- Tabular files with pixel annotations need to have the x values in the first column, y values in the second column and then up to five annotations in the next columns. The order of the annotations in the columns must be the same for all files (x and y in column 1 and 2; annotation1 in column3, annotation2 in column4,...) +- The order and the number of MSI data files and annotation tabular files must be the same +- For xy shifts with tabular file: Tabular file with x and y coordinates shift and file name (see below) Options: -- "automatic combination": files are automatically arranged in a grid, subfiles are named according to input file name -- "xy shifts by hand": each file can be moved in x and y direction according to the users need (define one tabular file in the order in which the files are loaded in the history (bottom to top) and define for each file the x and y coordinates shifts in separate columns and the file name in a third column). To test if the pixels are correctly shifted use "combine datasets: No". -Combine datasets: Yes - Combines all datasets and removes all duplicated pixels (same x and y coordinates). +- "automatic combination": files are automatically arranged in a grid (duplicated pixels are allowed), subfiles are named according to the input file name +- "xy shifts by hand": each file can be moved in x and y direction according to the users need (define one tabular file in the order in which the files are loaded in the history (bottom to top) and define for each file the x and y coordinates shifts in separate columns and the file name in a third column). The xy shift option combines all datasets and removes all duplicated pixels (same x and y coordinates). + Output: -- imzML file containing multiple subfiles -- pdf that shows the pixel positions of the combined files +- single imzML file +- pdf that shows the pixel positions and annotations of the combined files +- Tabular file with pixel annotations (x,y,column with input file names, up to five annotation columns) - optional: intensity matrix as tabular file (intensities for m/z in rows and pixel in columns) diff -r ff91e78b5c5c -r f4aafc565aa3 test-data/112_annotation_output.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/112_annotation_output.tabular Tue Jul 24 04:52:39 2018 -0400 @@ -0,0 +1,16 @@ +x y sample_name file_name column_name NA.x NA.y NA +1 1 1_msidata_1.RData file_one col1 NA NA NA +1 2 1_msidata_1.RData file_one col1 NA NA NA +1 3 1_msidata_1.RData file_one col1 NA NA NA +10 1 2_123_combined.RData file_two col3 NA NA NA +10 2 2_123_combined.RData file_two col3 NA NA NA +10 3 2_123_combined.RData file_two col3 NA NA NA +15 1 2_123_combined.RData file_three col3 NA NA NA +15 2 2_123_combined.RData file_three col3 NA NA NA +15 3 2_123_combined.RData file_three col3 NA NA NA +7 1 2_123_combined.RData file_one col1 NA NA NA +7 2 2_123_combined.RData file_one col1 NA NA NA +7 3 2_123_combined.RData file_one col1 NA NA NA +9 1 2_123_combined.RData file_two col2 NA NA NA +9 2 2_123_combined.RData file_two col2 NA NA NA +9 3 2_123_combined.RData file_two col2 NA NA NA diff -r ff91e78b5c5c -r f4aafc565aa3 test-data/112_auto_combined.RData Binary file test-data/112_auto_combined.RData has changed diff -r ff91e78b5c5c -r f4aafc565aa3 test-data/112_auto_combined_QC.pdf Binary file test-data/112_auto_combined_QC.pdf has changed diff -r ff91e78b5c5c -r f4aafc565aa3 test-data/123_annotation.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/123_annotation.tabular Tue Jul 24 04:52:39 2018 -0400 @@ -0,0 +1,13 @@ +x y file_name column_name +1 1 file_one col1 +1 2 file_one col1 +1 3 file_one col1 +3 1 file_two col2 +3 2 file_two col2 +3 3 file_two col2 +4 1 file_two col3 +4 2 file_two col3 +4 3 file_two col3 +9 1 file_three col3 +9 2 file_three col3 +9 3 file_three col3 diff -r ff91e78b5c5c -r f4aafc565aa3 test-data/123_annotation_output.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/123_annotation_output.tabular Tue Jul 24 04:52:39 2018 -0400 @@ -0,0 +1,13 @@ +x y sample_name file_name column_name NA.x NA.y NA +1 1 1_File1 file_one col1 NA NA NA +1 2 1_File1 file_one col1 NA NA NA +1 3 1_File1 file_one col1 NA NA NA +3 1 2_File2 file_two col2 NA NA NA +3 2 2_File2 file_two col2 NA NA NA +3 3 2_File2 file_two col2 NA NA NA +4 1 2_File2 file_two col3 NA NA NA +4 2 2_File2 file_two col3 NA NA NA +4 3 2_File2 file_two col3 NA NA NA +9 1 3_File3 file_three col3 NA NA NA +9 2 3_File3 file_three col3 NA NA NA +9 3 3_File3 file_three col3 NA NA NA diff -r ff91e78b5c5c -r f4aafc565aa3 test-data/123_combined.RData Binary file test-data/123_combined.RData has changed diff -r ff91e78b5c5c -r f4aafc565aa3 test-data/123_combined_QC.pdf Binary file test-data/123_combined_QC.pdf has changed diff -r ff91e78b5c5c -r f4aafc565aa3 test-data/123_no_combi.RData diff -r ff91e78b5c5c -r f4aafc565aa3 test-data/123_no_combi_QC.pdf Binary file test-data/123_no_combi_QC.pdf has changed diff -r ff91e78b5c5c -r f4aafc565aa3 test-data/12_annotation_output.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/12_annotation_output.tabular Tue Jul 24 04:52:39 2018 -0400 @@ -0,0 +1,10 @@ +x y sample_name file_name column_name NA.x NA.y NA +1 1 1_msidata_1.RData file_one col1 NA NA NA +1 2 1_msidata_1.RData file_one col1 NA NA NA +1 3 1_msidata_1.RData file_one col1 NA NA NA +7 1 2_msidata_2.RData file_two col2 NA NA NA +7 2 2_msidata_2.RData file_two col2 NA NA NA +7 3 2_msidata_2.RData file_two col2 NA NA NA +8 1 2_msidata_2.RData file_two col3 NA NA NA +8 2 2_msidata_2.RData file_two col3 NA NA NA +8 3 2_msidata_2.RData file_two col3 NA NA NA diff -r ff91e78b5c5c -r f4aafc565aa3 test-data/12_combined.RData Binary file test-data/12_combined.RData has changed diff -r ff91e78b5c5c -r f4aafc565aa3 test-data/12_combined_QC.pdf Binary file test-data/12_combined_QC.pdf has changed diff -r ff91e78b5c5c -r f4aafc565aa3 test-data/annotations_file1.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotations_file1.tabular Tue Jul 24 04:52:39 2018 -0400 @@ -0,0 +1,10 @@ +X Y file_name column_name +1 1 file_one col1 +1 2 file_one col1 +1 3 file_one col1 +2 1 file_one col2 +2 2 file_one col2 +2 3 file_one col2 +3 1 file_one col3 +3 2 file_one col3 +3 3 file_one col3 diff -r ff91e78b5c5c -r f4aafc565aa3 test-data/annotations_file2.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotations_file2.tabular Tue Jul 24 04:52:39 2018 -0400 @@ -0,0 +1,10 @@ +X Y file_name column_name +1 1 file_two col1 +1 2 file_two col1 +1 3 file_two col1 +2 1 file_two col2 +2 2 file_two col2 +2 3 file_two col2 +3 1 file_two col3 +3 2 file_two col3 +3 3 file_two col3 diff -r ff91e78b5c5c -r f4aafc565aa3 test-data/annotations_file3.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotations_file3.tabular Tue Jul 24 04:52:39 2018 -0400 @@ -0,0 +1,10 @@ +X Y file_name column_name +1 1 file_three col1 +1 2 file_three col1 +1 3 file_three col1 +2 1 file_three col2 +2 2 file_three col2 +2 3 file_three col2 +3 1 file_three col3 +3 2 file_three col3 +3 3 file_three col3