comparison data_exporter.xml @ 4:e521b5767819 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit ecdc3a64aa245d80dbc5487b2bf10a85a43adc6d
author galaxyp
date Fri, 22 Mar 2019 08:16:20 -0400
parents d94770c22f13
children 350a84ea795c
comparison
equal deleted inserted replaced
3:d94770c22f13 4:e521b5767819
1 <tool id="cardinal_data_exporter" name="MSI data exporter" version="@VERSION@.1"> 1 <tool id="cardinal_data_exporter" name="MSI data exporter" version="@VERSION@.2">
2 <description> 2 <description>
3 exports imzML and Analyze7.5 to tabular files 3 exports imzML and Analyze7.5 to tabular files
4 </description> 4 </description>
5 <macros> 5 <macros>
6 <import>macros.xml</import> 6 <import>macros.xml</import>
22 22
23 library(Cardinal) 23 library(Cardinal)
24 24
25 @READING_MSIDATA_INRAM@ 25 @READING_MSIDATA_INRAM@
26 26
27 ## to make sure that processed files work as well:
28 iData(msidata) = iData(msidata)[]
29 27
30 ###################### Intensity matrix output ################################ 28 ###################### Intensity matrix output ################################
31 29
32 #if "int_matrix" in str($output_options).split(","): 30 #if "int_matrix" in str($output_options).split(","):
33 print("intensity matrix output") 31 print("intensity matrix output")
35 mz_names = gsub(" = ", "_", names(features(msidata))) 33 mz_names = gsub(" = ", "_", names(features(msidata)))
36 mz_names = gsub("/", "", mz_names) 34 mz_names = gsub("/", "", mz_names)
37 pixel_names = gsub(", y = ", "_", names(pixels(msidata))) 35 pixel_names = gsub(", y = ", "_", names(pixels(msidata)))
38 pixel_names = gsub(" = ", "y_", pixel_names) 36 pixel_names = gsub(" = ", "y_", pixel_names)
39 37
40 spectramatrix = cbind(mz_names,spectra(msidata)[]) 38 ##spectramatrix = cbind(mz_names,spectra(msidata))
41 newmatrix = rbind(c("mz_name", pixel_names), spectramatrix) 39 newmatrix = rbind(c("mz_name", pixel_names), cbind(mz_names,spectra(msidata)))
42 write.table(newmatrix, file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") 40 write.table(newmatrix, file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
43 ## free up RAM space in case furhter steps will be run: 41 ## free up RAM space in case furhter steps will be run:
44 rm(newmatrix) 42 rm(newmatrix)
45 rm(spectramatrix)
46 gc() 43 gc()
47 44
48 #end if 45 #end if
49 46
50 47
54 51
55 mz_names = gsub(" = ", "_", names(features(msidata))) 52 mz_names = gsub(" = ", "_", names(features(msidata)))
56 mz_names = gsub("/", "", mz_names) 53 mz_names = gsub("/", "", mz_names)
57 54
58 ## mean, median, sd and SEM intensity per file and mz 55 ## mean, median, sd and SEM intensity per file and mz
59 full_sample_mean = apply(spectra(msidata)[],1,mean, na.rm=TRUE) 56 full_sample_mean = rowMeans(spectra(msidata), na.rm=TRUE)
60 full_sample_median = apply(spectra(msidata)[],1,median, na.rm=TRUE) 57 full_sample_median = apply(spectra(msidata),1,median, na.rm=TRUE)
61 full_sample_sd = apply(spectra(msidata)[],1,sd, na.rm=TRUE) 58 full_sample_sd = apply(spectra(msidata),1,sd, na.rm=TRUE)
62 full_sample_sem = full_sample_sd/full_sample_mean*100 59 full_sample_sem = full_sample_sd/full_sample_mean*100
63 ## npeaks and sum of all intensities per spectrum and mz 60 ## npeaks and sum of all intensities per spectrum and mz
64 npeaks= sum(spectra(msidata)[]>0, na.rm=TRUE) 61 npeaks= sum(spectra(msidata)>0, na.rm=TRUE)
65 mzTIC = rowSums(spectra(msidata)[], na.rm=TRUE) ## calculate intensity sum for each m/z 62 mzTIC = rowSums(spectra(msidata), na.rm=TRUE) ## calculate intensity sum for each m/z
66 peakspermz = rowSums(spectra(msidata)[] > 0, na.rm=TRUE) ## calculate number of intensities > 0 for each m/z (max = number of spectra) 63 peakspermz = rowSums(spectra(msidata) > 0, na.rm=TRUE) ## calculate number of intensities > 0 for each m/z (max = number of spectra)
67 64
68 ## combine into dataframe, order is the same for all vectors 65 ## combine into dataframe, order is the same for all vectors
69 mz_df = data.frame(mz_names, mz(msidata), full_sample_mean, full_sample_median, full_sample_sd, full_sample_sem, mzTIC, peakspermz) 66 mz_df = data.frame(mz_names, mz(msidata), full_sample_mean, full_sample_median, full_sample_sd, full_sample_sem, mzTIC, peakspermz)
70 colnames(mz_df) = c("mz_names", "mz", "sample_mean", "sample_median", "sample_sd", "sample_sem", "intensity_sum", "number_peaks") 67 colnames(mz_df) = c("mz_names", "mz", "sample_mean", "sample_median", "sample_sd", "sample_sem", "intensity_sum", "number_peaks")
71 write.table(mz_df, file="$feature_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") 68 write.table(mz_df, file="$feature_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
69 ## free up RAM space in case furhter steps will be run:
70 rm(mz_df)
71 gc()
72 #end if 72 #end if
73 73
74 ###################### summarized m/z feature output ####################### 74 ###################### summarized m/z feature output #######################
75 75
76 #if str($tabular_annotation.load_annotation) == 'yes_annotation': 76 #if str($tabular_annotation.load_annotation) == 'yes_annotation':
99 ## calculate mean per annotation group 99 ## calculate mean per annotation group
100 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) 100 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
101 count = 1 101 count = 1
102 for (subsample in levels(msidata\$annotation)){ 102 for (subsample in levels(msidata\$annotation)){
103 subsample_pixels = msidata[,msidata\$annotation == subsample] 103 subsample_pixels = msidata[,msidata\$annotation == subsample]
104 subsample_calc = apply(spectra(subsample_pixels)[],1,mean, na.rm=TRUE) 104 subsample_calc = rowMeans(spectra(subsample_pixels), na.rm=TRUE)
105 sample_matrix = cbind(sample_matrix, subsample_calc) 105 sample_matrix = cbind(sample_matrix, subsample_calc)
106 count = count+1} 106 count = count+1}
107 sample_matrix_mean = cbind(mz_names,sample_matrix) 107 sample_matrix_mean = cbind(mz_names,sample_matrix)
108 sample_matrix_mean = rbind(c("mz_name", levels(msidata\$annotation)), sample_matrix_mean) 108 sample_matrix_mean = rbind(c("mz_name", levels(msidata\$annotation)), sample_matrix_mean)
109 write.table(sample_matrix_mean, file="$summarized_mean", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") 109 write.table(sample_matrix_mean, file="$summarized_mean", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
114 114
115 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) 115 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
116 count = 1 116 count = 1
117 for (subsample in levels(msidata\$annotation)){ 117 for (subsample in levels(msidata\$annotation)){
118 subsample_pixels = msidata[,msidata\$annotation == subsample] 118 subsample_pixels = msidata[,msidata\$annotation == subsample]
119 subsample_calc = apply(spectra(subsample_pixels)[],1,median, na.rm=TRUE) 119 subsample_calc = apply(spectra(subsample_pixels),1,median, na.rm=TRUE)
120 sample_matrix = cbind(sample_matrix, subsample_calc) 120 sample_matrix = cbind(sample_matrix, subsample_calc)
121 count = count+1} 121 count = count+1}
122 sample_matrix_median = cbind(mz_names,sample_matrix) 122 sample_matrix_median = cbind(mz_names,sample_matrix)
123 sample_matrix_median = rbind(c("mz name", levels(msidata\$annotation)), sample_matrix_median) 123 sample_matrix_median = rbind(c("mz name", levels(msidata\$annotation)), sample_matrix_median)
124 write.table(sample_matrix_median, file="$summarized_median", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") 124 write.table(sample_matrix_median, file="$summarized_median", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
129 129
130 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) 130 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
131 count = 1 131 count = 1
132 for (subsample in levels(msidata\$annotation)){ 132 for (subsample in levels(msidata\$annotation)){
133 subsample_pixels = msidata[,msidata\$annotation == subsample] 133 subsample_pixels = msidata[,msidata\$annotation == subsample]
134 subsample_calc = apply(spectra(subsample_pixels)[],1,sd, na.rm=TRUE) 134 subsample_calc = apply(spectra(subsample_pixels),1,sd, na.rm=TRUE)
135 sample_matrix = cbind(sample_matrix, subsample_calc) 135 sample_matrix = cbind(sample_matrix, subsample_calc)
136 count = count+1} 136 count = count+1}
137 sample_matrix_sd = cbind(mz_names,sample_matrix) 137 sample_matrix_sd = cbind(mz_names,sample_matrix)
138 sample_matrix_sd = rbind(c("mz name", levels(msidata\$annotation)), sample_matrix_sd) 138 sample_matrix_sd = rbind(c("mz name", levels(msidata\$annotation)), sample_matrix_sd)
139 write.table(sample_matrix_sd, file="$summarized_sd", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") 139 write.table(sample_matrix_sd, file="$summarized_sd", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
154 154
155 ## pixel order 155 ## pixel order
156 pixelxyarray=1:length(pixels(msidata)) 156 pixelxyarray=1:length(pixels(msidata))
157 157
158 ## number of pixels per spectrum: every intensity value > 0 counts as peak 158 ## number of pixels per spectrum: every intensity value > 0 counts as peak
159 peaksperpixel = apply(spectra(msidata)[]> 0, 2, sum, na.rm=TRUE) 159 peaksperpixel = colSums(spectra(msidata)>0, na.rm=TRUE)
160 160
161 ## Total ion chromatogram per spectrum 161 ## Total ion chromatogram per spectrum
162 TICs = round(apply(spectra(msidata)[],2, sum, na.rm=TRUE), digits = 2) 162 TICs = round(colSums(spectra(msidata), na.rm=TRUE), digits = 2)
163 163
164 ## Median ion intensity per spectrum 164 ## Median ion intensity per spectrum
165 med_int = round(apply(spectra(msidata)[], 2, median, na.rm=TRUE), digits = 2) 165 med_int = round(apply(spectra(msidata), 2, median, na.rm=TRUE), digits = 2)
166 166
167 ## Maximum ion intensity per spectrum 167 ## Maximum ion intensity per spectrum
168 max_int = round(apply(spectra(msidata)[], 2, max, na.rm=TRUE), digits = 2) 168 max_int = round(apply(spectra(msidata), 2, max, na.rm=TRUE), digits = 2)
169 169
170 ## Highest m/z per spectrum 170 ## Highest m/z per spectrum
171 highestmz = apply(spectra(msidata)[],2,which.max) 171 highestmz = apply(spectra(msidata),2,which.max)
172 highestmz_data = mz(msidata)[highestmz] 172 highestmz_data = mz(msidata)[highestmz]
173 173
174 ## Combine into dataframe; order is the same for all vectors 174 ## Combine into dataframe; order is the same for all vectors
175 spectra_df = data.frame(pixel_names, xycoordinates, pixelxyarray, peaksperpixel, med_int, TICs, max_int, highestmz_data) 175 spectra_df = data.frame(pixel_names, xycoordinates, pixelxyarray, peaksperpixel, med_int, TICs, max_int, highestmz_data)
176 colnames(spectra_df) = c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz") 176 colnames(spectra_df) = c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz")
195 195
196 ## filter for m/z window of each calibrant and calculate if sum of peak intensities > 0 196 ## filter for m/z window of each calibrant and calculate if sum of peak intensities > 0
197 197
198 for (mass in 1:length(inputcalibrantmasses)){ 198 for (mass in 1:length(inputcalibrantmasses)){
199 filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],] 199 filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],]
200 if (nrow(filtered_data) > 1 & sum(spectra(filtered_data)[],na.rm=TRUE) > 0){ 200 if (nrow(filtered_data) > 1 & sum(spectra(filtered_data),na.rm=TRUE) > 0){
201 ## intensity of all m/z > 0 201 ## intensity of all m/z > 0
202 intensity_sum = apply(spectra(filtered_data)[],2,sum, na.rm=TRUE) > 0 202 intensity_sum = colSums(spectra(filtered_data), na.rm=TRUE) > 0
203 203
204 }else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data)[], na.rm=TRUE) > 0){ 204 }else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data), na.rm=TRUE) > 0){
205 ## intensity of only m/z > 0 205 ## intensity of only m/z > 0
206 intensity_sum = spectra(filtered_data)[] > 0 206 intensity_sum = spectra(filtered_data) > 0
207 }else{ 207 }else{
208 intensity_sum = rep(FALSE, ncol(filtered_data))} 208 intensity_sum = rep(FALSE, ncol(filtered_data))}
209 ## for each pixel add sum of intensities > 0 in the given m/z range 209 ## for each pixel add sum of intensities > 0 in the given m/z range
210 pixelmatrix = rbind(pixelmatrix, intensity_sum) 210 pixelmatrix = rbind(pixelmatrix, intensity_sum)
211 } 211 }