Mercurial > repos > galaxyp > msi_preprocessing
comparison msi_preprocessing.xml @ 9:4d5578b57a77 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_preprocessing commit 5feaf3d0e0da8cef1241fecc1f4d6f81324594e6
author | galaxyp |
---|---|
date | Wed, 22 Aug 2018 13:43:04 -0400 |
parents | d77c5228fd1a |
children | df8d7f6f210b |
comparison
equal
deleted
inserted
replaced
8:d77c5228fd1a | 9:4d5578b57a77 |
---|---|
1 <tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.5"> | 1 <tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.6"> |
2 <description> | 2 <description> |
3 mass spectrometry imaging preprocessing | 3 mass spectrometry imaging preprocessing |
4 </description> | 4 </description> |
5 <requirements> | 5 <requirements> |
6 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> | 6 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> |
43 msidata <- readImzML('infile', attach.only=TRUE) | 43 msidata <- readImzML('infile', attach.only=TRUE) |
44 #end if | 44 #end if |
45 #elif $infile.ext == 'analyze75' | 45 #elif $infile.ext == 'analyze75' |
46 msidata = readAnalyze('infile', attach.only=TRUE) | 46 msidata = readAnalyze('infile', attach.only=TRUE) |
47 #else | 47 #else |
48 load('infile.RData') | 48 loadRData <- function(fileName){ |
49 load(fileName) | |
50 get(ls()[ls() != "fileName"]) | |
51 } | |
52 msidata = loadRData('infile.RData') | |
49 #end if | 53 #end if |
50 | 54 |
51 print(paste0("Number of NA in input file: ",sum(is.na(spectra(msidata)[])))) | 55 print(paste0("Number of NA in input file: ",sum(is.na(spectra(msidata)[])))) |
52 | 56 |
53 ## function to later read RData reference files in | 57 ## function to later read RData reference files in |
62 ######################### preparations for QC report ################# | 66 ######################### preparations for QC report ################# |
63 | 67 |
64 maxfeatures = length(features(msidata)) | 68 maxfeatures = length(features(msidata)) |
65 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) | 69 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) |
66 medint = round(median(spectra(msidata)[],na.rm=TRUE), digits=2) | 70 medint = round(median(spectra(msidata)[],na.rm=TRUE), digits=2) |
67 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) | 71 minmz = round(min(mz(msidata)), digits=2) |
68 QC_numbers= data.frame(inputdata = c(maxfeatures, medianpeaks, medint, TICs)) | 72 maxmz = round(max(mz(msidata)), digits=2) |
73 QC_numbers= data.frame(inputdata = c(minmz, maxmz,maxfeatures, medianpeaks, medint)) | |
69 vectorofactions = "inputdata" | 74 vectorofactions = "inputdata" |
70 | 75 |
71 ############################### Preprocessing steps ########################### | 76 ############################### Preprocessing steps ########################### |
72 ############################################################################### | 77 ############################################################################### |
73 | 78 |
84 ############################### QC ########################### | 89 ############################### QC ########################### |
85 | 90 |
86 maxfeatures = length(features(msidata)) | 91 maxfeatures = length(features(msidata)) |
87 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE),) | 92 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE),) |
88 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) | 93 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) |
89 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) | 94 minmz = round(min(mz(msidata)), digits=2) |
90 normalized = c(maxfeatures, medianpeaks, medint, TICs) | 95 maxmz = round(max(mz(msidata)), digits=2) |
96 normalized = c(minmz, maxmz,maxfeatures, medianpeaks, medint) | |
91 QC_numbers= cbind(QC_numbers, normalized) | 97 QC_numbers= cbind(QC_numbers, normalized) |
92 vectorofactions = append(vectorofactions, "normalized") | 98 vectorofactions = append(vectorofactions, "normalized") |
93 | 99 |
94 ############################### Baseline reduction ########################### | 100 ############################### Baseline reduction ########################### |
95 | 101 |
102 ############################### QC ########################### | 108 ############################### QC ########################### |
103 | 109 |
104 maxfeatures = length(features(msidata)) | 110 maxfeatures = length(features(msidata)) |
105 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) | 111 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) |
106 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) | 112 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) |
107 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) | 113 minmz = round(min(mz(msidata)), digits=2) |
108 baseline= c(maxfeatures, medianpeaks, medint, TICs) | 114 maxmz = round(max(mz(msidata)), digits=2) |
115 baseline = c(minmz, maxmz,maxfeatures, medianpeaks, medint) | |
109 QC_numbers= cbind(QC_numbers, baseline) | 116 QC_numbers= cbind(QC_numbers, baseline) |
110 vectorofactions = append(vectorofactions, "baseline red.") | 117 vectorofactions = append(vectorofactions, "baseline red.") |
111 | 118 |
112 ############################### Smoothing ########################### | 119 ############################### Smoothing ########################### |
113 | 120 |
134 ############################### QC ########################### | 141 ############################### QC ########################### |
135 | 142 |
136 maxfeatures = length(features(msidata)) | 143 maxfeatures = length(features(msidata)) |
137 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) | 144 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) |
138 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) | 145 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) |
139 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) | 146 minmz = round(min(mz(msidata)), digits=2) |
140 smoothed= c(maxfeatures, medianpeaks, medint, TICs) | 147 maxmz = round(max(mz(msidata)), digits=2) |
148 smoothed = c(minmz, maxmz,maxfeatures, medianpeaks, medint) | |
141 QC_numbers= cbind(QC_numbers, smoothed) | 149 QC_numbers= cbind(QC_numbers, smoothed) |
142 vectorofactions = append(vectorofactions, "smoothed") | 150 vectorofactions = append(vectorofactions, "smoothed") |
143 | 151 |
144 ############################### Peak picking ########################### | 152 ############################### Peak picking ########################### |
145 | 153 |
146 #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_picking': | 154 #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_picking': |
147 print('Peak_picking') | 155 print('Peak_picking') |
148 ## Peakpicking | 156 ## Peakpicking |
149 | 157 |
150 | |
151 ## remove duplicated coordinates, otherwise peak picking will fail | 158 ## remove duplicated coordinates, otherwise peak picking will fail |
152 print(paste0(sum(duplicated(coord(msidata))), " coordinates were removed")) | 159 print(paste0(sum(duplicated(coord(msidata))), " coordinates were removed")) |
153 msidata <- msidata[,!duplicated(coord(msidata))] | 160 msidata <- msidata[,!duplicated(coord(msidata))] |
154 | 161 |
155 #if str( $method.methods_conditional.methods_for_picking.picking_method) == 'adaptive': | 162 #if str( $method.methods_conditional.methods_for_picking.picking_method) == 'adaptive': |
172 ############################### QC ########################### | 179 ############################### QC ########################### |
173 | 180 |
174 maxfeatures = length(features(msidata)) | 181 maxfeatures = length(features(msidata)) |
175 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) | 182 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) |
176 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) | 183 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) |
177 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) | 184 minmz = round(min(mz(msidata)), digits=2) |
178 picked= c(maxfeatures, medianpeaks, medint, TICs) | 185 maxmz = round(max(mz(msidata)), digits=2) |
186 picked = c(minmz, maxmz,maxfeatures, medianpeaks, medint) | |
179 QC_numbers= cbind(QC_numbers, picked) | 187 QC_numbers= cbind(QC_numbers, picked) |
180 vectorofactions = append(vectorofactions, "picked") | 188 vectorofactions = append(vectorofactions, "picked") |
181 | 189 |
182 ############################### Peak alignment ########################### | 190 ############################### Peak alignment ########################### |
183 | 191 |
218 ############################### QC ########################### | 226 ############################### QC ########################### |
219 | 227 |
220 maxfeatures = length(features(msidata)) | 228 maxfeatures = length(features(msidata)) |
221 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) | 229 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) |
222 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) | 230 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) |
223 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) | 231 minmz = round(min(mz(msidata)), digits=2) |
224 aligned= c(maxfeatures, medianpeaks, medint, TICs) | 232 maxmz = round(max(mz(msidata)), digits=2) |
233 aligned = c(minmz, maxmz,maxfeatures, medianpeaks, medint) | |
225 QC_numbers= cbind(QC_numbers, aligned) | 234 QC_numbers= cbind(QC_numbers, aligned) |
226 vectorofactions = append(vectorofactions, "aligned") | 235 vectorofactions = append(vectorofactions, "aligned") |
227 | 236 |
228 ############################### Peak filtering ########################### | 237 ############################### Peak filtering ########################### |
229 | 238 |
235 ############################### QC ########################### | 244 ############################### QC ########################### |
236 | 245 |
237 maxfeatures = length(features(msidata)) | 246 maxfeatures = length(features(msidata)) |
238 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) | 247 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) |
239 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) | 248 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) |
240 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) | 249 minmz = round(min(mz(msidata)), digits=2) |
241 filtered= c(maxfeatures, medianpeaks, medint, TICs) | 250 maxmz = round(max(mz(msidata)), digits=2) |
251 filtered = c(minmz, maxmz,maxfeatures, medianpeaks, medint) | |
242 QC_numbers= cbind(QC_numbers, filtered) | 252 QC_numbers= cbind(QC_numbers, filtered) |
243 vectorofactions = append(vectorofactions, "filtered") | 253 vectorofactions = append(vectorofactions, "filtered") |
244 | 254 |
245 ############################### Data reduction ########################### | 255 ############################### Data reduction ########################### |
246 | 256 |
277 ############################### QC ########################### | 287 ############################### QC ########################### |
278 | 288 |
279 maxfeatures = length(features(msidata)) | 289 maxfeatures = length(features(msidata)) |
280 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) | 290 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) |
281 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) | 291 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) |
282 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) | 292 minmz = round(min(mz(msidata)), digits=2) |
283 reduced= c(maxfeatures, medianpeaks, medint, TICs) | 293 maxmz = round(max(mz(msidata)), digits=2) |
294 reduced = c(minmz, maxmz,maxfeatures, medianpeaks, medint) | |
284 QC_numbers= cbind(QC_numbers, reduced) | 295 QC_numbers= cbind(QC_numbers, reduced) |
285 vectorofactions = append(vectorofactions, "reduced") | 296 vectorofactions = append(vectorofactions, "reduced") |
286 | 297 |
287 ############################### Transformation ########################### | 298 ############################### Transformation ########################### |
288 | 299 |
306 ############################### QC ########################### | 317 ############################### QC ########################### |
307 | 318 |
308 maxfeatures = length(features(msidata)) | 319 maxfeatures = length(features(msidata)) |
309 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) | 320 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) |
310 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) | 321 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) |
311 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) | 322 minmz = round(min(mz(msidata)), digits=2) |
312 transformed= c(maxfeatures, medianpeaks, medint, TICs) | 323 maxmz = round(max(mz(msidata)), digits=2) |
324 transformed = c(minmz, maxmz,maxfeatures, medianpeaks, medint) | |
313 QC_numbers= cbind(QC_numbers, transformed) | 325 QC_numbers= cbind(QC_numbers, transformed) |
314 vectorofactions = append(vectorofactions, "transformed") | 326 vectorofactions = append(vectorofactions, "transformed") |
315 | 327 |
316 #end if | 328 #end if |
317 #end for | 329 #end for |
434 ## save QC report | 446 ## save QC report |
435 | 447 |
436 pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12) | 448 pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12) |
437 plot(0,type='n',axes=FALSE,ann=FALSE) | 449 plot(0,type='n',axes=FALSE,ann=FALSE) |
438 title(main=paste("Quality control during preprocessing \n", "Filename:", "$infile.display_name")) | 450 title(main=paste("Quality control during preprocessing \n", "Filename:", "$infile.display_name")) |
439 rownames(QC_numbers) = c("# features", "median # peaks", "median intensity", "median TIC") | 451 rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median\n# peaks", "median\nintensity") |
440 grid.table(t(QC_numbers)) | 452 grid.table(t(QC_numbers)) |
441 | 453 |
442 #if str($tabular_annotation.load_annotation) == 'yes_annotation': | 454 #if str($tabular_annotation.load_annotation) == 'yes_annotation': |
443 | 455 |
444 ## the more annotation groups a file has the smaller will be the legend | 456 ## the more annotation groups a file has the smaller will be the legend |
889 | 901 |
890 Cardinal is an R package that implements statistical & computational tools for analyzing mass spectrometry imaging datasets. `More information on Cardinal <http://cardinalmsi.org//>`_ | 902 Cardinal is an R package that implements statistical & computational tools for analyzing mass spectrometry imaging datasets. `More information on Cardinal <http://cardinalmsi.org//>`_ |
891 | 903 |
892 This tool provides provides multiple Cardinal functions to preprocess mass spectrometry imaging data. | 904 This tool provides provides multiple Cardinal functions to preprocess mass spectrometry imaging data. |
893 | 905 |
894 Input data: 3 types of input data can be used: | 906 Input data: 3 types of MSI data can be used: |
895 | 907 |
896 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ | 908 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ |
897 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) | 909 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) |
898 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) | 910 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) |
911 - Coordinates stored as decimals rather than integers will be rounded to obtain a regular pixel grid. This might lead to duplicated coordinates which will be automatically removed before peak picking. | |
899 - optional: tabular file with pixel annotations: x and y values in separate columns and the corresponding annotation in a third column | 912 - optional: tabular file with pixel annotations: x and y values in separate columns and the corresponding annotation in a third column |
900 | 913 |
901 Options: | 914 Options: |
902 | 915 |
903 - Normalization: Normalization of intensities to total ion current (TIC) | 916 - Normalization: Normalization of intensities to total ion current (TIC) |
904 - Baseline reduction: Baseline reduction removes backgroundintensity generated by chemical noise (common in MALDI datasets) | 917 - Baseline reduction: Baseline reduction removes background intensity generated by chemical noise (common in MALDI datasets) |
905 - Smoothening: Smoothing of the peaks reduces noise and improves peak detection | 918 - Smoothing: Smoothing of the peaks reduces noise and improves peak detection |
906 - Peak picking: relevant peaks are picked while noise-peaks are removed (needs peak alignment afterwards) | 919 - Peak picking: relevant peaks are picked while noise-peaks are removed (needs peak alignment afterwards) |
907 - Peak alignment: only possible after peak picking, m/z inaccuracies are removed by alignment of same peaks to a common m/z value | 920 - Peak alignment: only possible after peak picking, m/z inaccuracies are removed by alignment of same peaks to a common m/z value |
908 - Peak filtering: works only on centroided data (after peak picking and alignment or data reduction with peak filtering), removes peaks that occur only in a small proportion of pixels. If not sure which cutoff to chose run qualitycontrol first and decide according to the zero value plot. | 921 - Peak filtering: works only on centroided data (after peak picking and alignment or data reduction with peak filtering), removes peaks that occur only in a small proportion of pixels. If not sure which cutoff to chose run qualitycontrol first and decide according to the zero value plot. |
909 - Data reduction: binning, resampling or peak filtering to reduce data | 922 - Data reduction: binning, resampling or peak filtering to reduce data |
910 - Transformation: log2 or squareroot transformation of all intensities; when using log2 transformation zero intensities will become NA, this can lead to compatibility problems. | 923 - Transformation: log2 or squareroot transformation of all intensities; when using log2 transformation zero intensities will become NA, this can lead to compatibility problems. |
912 | 925 |
913 Output: | 926 Output: |
914 | 927 |
915 - imzML file, preprocessed | 928 - imzML file, preprocessed |
916 - pdf with key values after each processing step, in case of loaded annotations file overview plot of pixel annotations | 929 - pdf with key values after each processing step, in case of loaded annotations file overview plot of pixel annotations |
917 - optional: intensity matrix as tabular file (intensities for m/z in rows and pixel in columns) | 930 - optional: intensity matrix as tabular file (m/z in rows and pixel in columns, filled with intensity values) |
918 - optional: summarized intensity matrix: mean, median or standard deviation for each m/z feature; in case pixel annotations are provided the intensity values are summarized for each pixel group | 931 - optional: summarized intensity matrix: mean, median or standard deviation for each m/z feature; in case pixel annotations are provided the intensity values are summarized for each pixel group |
919 | 932 |
920 Tip: | 933 Tip: |
921 | 934 |
922 - Peak alignment works only after peak picking | 935 - Peak alignment works only after peak picking |