comparison msi_preprocessing.xml @ 7:1a3d477bc54a draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_preprocessing commit 8087490eb4dcaf4ead0f03eae4126780d21e5503
author galaxyp
date Fri, 06 Jul 2018 14:13:48 -0400
parents d3fd539f477e
children d77c5228fd1a
comparison
equal deleted inserted replaced
6:d3fd539f477e 7:1a3d477bc54a
1 <tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.3"> 1 <tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.4">
2 <description> 2 <description>
3 mass spectrometry imaging preprocessing 3 mass spectrometry imaging preprocessing
4 </description> 4 </description>
5 <requirements> 5 <requirements>
6 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> 6 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement>
36 36
37 #if $infile.ext == 'imzml' 37 #if $infile.ext == 'imzml'
38 #if str($processed_cond.processed_file) == "processed": 38 #if str($processed_cond.processed_file) == "processed":
39 msidata <- readImzML('infile', mass.accuracy=$processed_cond.accuracy, units.accuracy = "$processed_cond.units") 39 msidata <- readImzML('infile', mass.accuracy=$processed_cond.accuracy, units.accuracy = "$processed_cond.units")
40 #else 40 #else
41 msidata <- readImzML('infile') 41 msidata <- readImzML('infile', attach.only=TRUE)
42 #end if 42 #end if
43 #elif $infile.ext == 'analyze75' 43 #elif $infile.ext == 'analyze75'
44 msidata = readAnalyze('infile') 44 msidata = readAnalyze('infile', attach.only=TRUE)
45 #else 45 #else
46 load('infile.RData') 46 load('infile.RData')
47 #end if 47 #end if
48
49 print(paste0("Number of NA in input file: ",sum(is.na(spectra(msidata)[]))))
48 50
49 ## function to later read RData reference files in 51 ## function to later read RData reference files in
50 52
51 loadRData <- function(fileName){ 53 loadRData <- function(fileName){
52 #loads an RData file, and returns it 54 #loads an RData file, and returns it
53 load(fileName) 55 load(fileName)
54 get(ls()[ls() != "fileName"]) 56 get(ls()[ls() != "fileName"])
55 } 57 }
56 58
57 ######################### preparations for QC report ################# 59 if (sum(spectra(msidata)[]>0, na.rm=TRUE)> 0){
58 60 ######################### preparations for QC report #################
59 maxfeatures = length(features(msidata)) 61
60 medianpeaks = median(colSums(spectra(msidata)[]>0)) 62 maxfeatures = length(features(msidata))
61 medint = round(median(spectra(msidata)[]), digits=2) 63 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
62 TICs = round(mean(colSums(spectra(msidata)[])), digits=1) 64 medint = round(median(spectra(msidata)[],na.rm=TRUE), digits=2)
63 QC_numbers= data.frame(inputdata = c(maxfeatures, medianpeaks, medint, TICs)) 65 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
64 vectorofactions = "inputdata" 66 QC_numbers= data.frame(inputdata = c(maxfeatures, medianpeaks, medint, TICs))
65 67 vectorofactions = "inputdata"
66 ############################### Preprocessing steps ########################### 68
67 ############################################################################### 69 ############################### Preprocessing steps ###########################
68 70 ###############################################################################
69 #for $method in $methods: 71
70 72 #for $method in $methods:
71 ############################### Normalization ########################### 73
72 74 ############################### Normalization ###########################
73 #if str( $method.methods_conditional.preprocessing_method ) == 'Normalization': 75
74 print('Normalization') 76 #if str( $method.methods_conditional.preprocessing_method ) == 'Normalization':
75 ##normalization 77 print('Normalization')
76 78 ##normalization
77 msidata = normalize(msidata, method="tic") 79
78 80 msidata = normalize(msidata, method="tic")
79 ############################### QC ########################### 81
80 82 ############################### QC ###########################
81 maxfeatures = length(features(msidata)) 83
82 medianpeaks = median(colSums(spectra(msidata)[]>0)) 84 maxfeatures = length(features(msidata))
83 medint = round(median(spectra(msidata)[]), digits=2) 85 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE),)
84 TICs = round(mean(colSums(spectra(msidata)[])), digits=1) 86 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
85 normalized = c(maxfeatures, medianpeaks, medint, TICs) 87 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
86 QC_numbers= cbind(QC_numbers, normalized) 88 normalized = c(maxfeatures, medianpeaks, medint, TICs)
87 vectorofactions = append(vectorofactions, "normalized") 89 QC_numbers= cbind(QC_numbers, normalized)
88 90 vectorofactions = append(vectorofactions, "normalized")
89 ############################### Baseline reduction ########################### 91
90 92 ############################### Baseline reduction ###########################
91 #elif str( $method.methods_conditional.preprocessing_method ) == 'Baseline_reduction': 93
92 print('Baseline_reduction') 94 #elif str( $method.methods_conditional.preprocessing_method ) == 'Baseline_reduction':
93 ##baseline reduction 95 print('Baseline_reduction')
94 96 ##baseline reduction
95 msidata = reduceBaseline(msidata, method="median", blocks=$method.methods_conditional.blocks_baseline) 97
96 98 msidata = reduceBaseline(msidata, method="median", blocks=$method.methods_conditional.blocks_baseline)
97 ############################### QC ########################### 99
98 100 ############################### QC ###########################
99 maxfeatures = length(features(msidata)) 101
100 medianpeaks = median(colSums(spectra(msidata)[]>0)) 102 maxfeatures = length(features(msidata))
101 medint = round(median(spectra(msidata)[]), digits=2) 103 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
102 TICs = round(mean(colSums(spectra(msidata)[])), digits=1) 104 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
103 baseline= c(maxfeatures, medianpeaks, medint, TICs) 105 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
104 QC_numbers= cbind(QC_numbers, baseline) 106 baseline= c(maxfeatures, medianpeaks, medint, TICs)
105 vectorofactions = append(vectorofactions, "baseline red.") 107 QC_numbers= cbind(QC_numbers, baseline)
106 108 vectorofactions = append(vectorofactions, "baseline red.")
107 ############################### Smoothing ########################### 109
108 110 ############################### Smoothing ###########################
109 #elif str( $method.methods_conditional.preprocessing_method ) == 'Smoothing': 111
110 print('Smoothing') 112 #elif str( $method.methods_conditional.preprocessing_method ) == 'Smoothing':
111 ## Smoothing 113 print('Smoothing')
112 114 ## Smoothing
113 #if str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'gaussian': 115
114 print('gaussian smoothing') 116 #if str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'gaussian':
115 117 print('gaussian smoothing')
116 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, sd = $method.methods_conditional.methods_for_smoothing.sd_gaussian) 118
117 119 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, sd = $method.methods_conditional.methods_for_smoothing.sd_gaussian)
118 #elif str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'sgolay': 120
119 print('sgolay smoothing') 121 #elif str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'sgolay':
120 122 print('sgolay smoothing')
121 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, order = $method.methods_conditional.methods_for_smoothing.order_of_filters) 123
122 #elif str($method.methods_conditional.methods_for_smoothing.smoothing_method) == 'ma': 124 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, order = $method.methods_conditional.methods_for_smoothing.order_of_filters)
123 print('sgolay smoothing') 125 #elif str($method.methods_conditional.methods_for_smoothing.smoothing_method) == 'ma':
124 126 print('sgolay smoothing')
125 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter) 127
126 128 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter)
127 #end if
128
129 ############################### QC ###########################
130
131 maxfeatures = length(features(msidata))
132 medianpeaks = median(colSums(spectra(msidata)[]>0))
133 medint = round(median(spectra(msidata)[]), digits=2)
134 TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
135 smoothed= c(maxfeatures, medianpeaks, medint, TICs)
136 QC_numbers= cbind(QC_numbers, smoothed)
137 vectorofactions = append(vectorofactions, "smoothed")
138
139 ############################### Peak picking ###########################
140
141 #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_picking':
142 print('Peak_picking')
143 ## Peakpicking
144
145 #if str( $method.methods_conditional.methods_for_picking.picking_method) == 'adaptive':
146 print('adaptive peakpicking')
147
148 msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method, spar=$method.methods_conditional.methods_for_picking.spar_picking)
149
150 #elif str( $method.methods_conditional.methods_for_picking.picking_method) == 'limpic':
151 print('limpic peakpicking')
152
153 msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method, thresh=$method.methods_conditional.methods_for_picking.tresh_picking)
154
155 #elif str( $method.methods_conditional.methods_for_picking.picking_method) == 'simple':
156 print('simple peakpicking')
157
158 msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method)
159
160 #end if
161
162 ############################### QC ###########################
163
164 maxfeatures = length(features(msidata))
165 medianpeaks = median(colSums(spectra(msidata)[]>0))
166 medint = round(median(spectra(msidata)[]), digits=2)
167 TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
168 picked= c(maxfeatures, medianpeaks, medint, TICs)
169 QC_numbers= cbind(QC_numbers, picked)
170 vectorofactions = append(vectorofactions, "picked")
171
172 ############################### Peak alignment ###########################
173
174 #elif str( $method.methods_conditional.preprocessing_method ) == 'Peak_alignment':
175 print('Peak_alignment')
176 ## Peakalignment
177
178 #if str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_noref':
179
180 align_peak_reference = msidata
181
182 #elif str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_table':
183
184 align_reference_table = read.delim("$method.methods_conditional.align_ref_type.align_peaks_table", header = FALSE, stringsAsFactors = FALSE)
185 align_reference_column = align_reference_table[,$method.methods_conditional.align_ref_type.align_mass_column]
186 align_peak_reference = align_reference_column[align_reference_column>=min(mz(msidata)) & align_reference_column<=max(mz(msidata))]
187 if (length(align_peak_reference) == 0)
188 {align_peak_reference = 0}
189
190 #elif str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_msidata_ref':
191
192 align_peak_reference = loadRData('$method.methods_conditional.align_ref_type.align_peaks_msidata')
193
194 #end if
195
196 #if str( $method.methods_conditional.methods_for_alignment.alignment_method) == 'diff':
197 print('diff peakalignment')
198
199 msidata = peakAlign(msidata, method='$method.methods_conditional.methods_for_alignment.alignment_method',diff.max =$method.methods_conditional.methods_for_alignment.value_diffalignment, units = "$method.methods_conditional.methods_for_alignment.units_diffalignment", ref=align_peak_reference)
200
201 #elif str( $method.methods_conditional.methods_for_alignment.alignment_method) == 'DP':
202 print('DPpeakalignment')
203
204 msidata = peakAlign(msidata, method='$method.methods_conditional.methods_for_alignment.alignment_method',gap = $method.methods_conditional.methods_for_alignment.gap_DPalignment, ref=align_peak_reference)
205
206 #end if
207
208 ############################### QC ###########################
209
210 maxfeatures = length(features(msidata))
211 medianpeaks = median(colSums(spectra(msidata)[]>0))
212 medint = round(median(spectra(msidata)[]), digits=2)
213 TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
214 aligned= c(maxfeatures, medianpeaks, medint, TICs)
215 QC_numbers= cbind(QC_numbers, aligned)
216 vectorofactions = append(vectorofactions, "aligned")
217
218 ############################### Peak filtering ###########################
219
220 #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_filtering':
221 print('Peak_filtering')
222
223 msidata = peakFilter(msidata, method='freq', freq.min = $method.methods_conditional.frequ_filtering)
224
225 ############################### QC ###########################
226
227 maxfeatures = length(features(msidata))
228 medianpeaks = median(colSums(spectra(msidata)[]>0))
229 medint = round(median(spectra(msidata)[]), digits=2)
230 TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
231 filtered= c(maxfeatures, medianpeaks, medint, TICs)
232 QC_numbers= cbind(QC_numbers, filtered)
233 vectorofactions = append(vectorofactions, "filtered")
234
235 ############################### Data reduction ###########################
236
237 #elif str( $method.methods_conditional.preprocessing_method) == 'Data_reduction':
238 print('Data_reduction')
239
240 #if str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'bin':
241 print('bin reduction')
242
243 msidata = reduceDimension(msidata, method="bin", width=$method.methods_conditional.methods_for_reduction.bin_width, units="$method.methods_conditional.methods_for_reduction.bin_units", fun=$method.methods_conditional.methods_for_reduction.bin_fun)
244
245 #elif str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'resample':
246 print('resample reduction')
247
248 msidata = reduceDimension(msidata, method="resample", step=$method.methods_conditional.methods_for_reduction.resample_step)
249
250 #elif str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'peaks':
251 print('peaks reduction')
252
253 #if str( $method.methods_conditional.methods_for_reduction.ref_type.reference_datatype) == 'table':
254
255 reference_table = read.delim("$method.methods_conditional.methods_for_reduction.ref_type.peaks_table", header = FALSE, stringsAsFactors = FALSE)
256 reference_column = reference_table[,$method.methods_conditional.methods_for_reduction.ref_type.mass_column]
257 peak_reference = reference_column[reference_column>min(mz(msidata)) & reference_column<max(mz(msidata))]
258
259 #elif str( $method.methods_conditional.methods_for_reduction.ref_type.reference_datatype) == 'msidata_ref':
260
261 peak_reference = loadRData('$method.methods_conditional.methods_for_reduction.ref_type.peaks_msidata')
262 129
263 #end if 130 #end if
264 131
265 msidata = reduceDimension(msidata, method="peaks", ref=peak_reference, type="$method.methods_conditional.methods_for_reduction.peaks_type") 132 ############################### QC ###########################
266 #end if 133
267 ############################### QC ########################### 134 maxfeatures = length(features(msidata))
268 135 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
269 maxfeatures = length(features(msidata)) 136 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
270 medianpeaks = median(colSums(spectra(msidata)[]>0)) 137 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
271 medint = round(median(spectra(msidata)[]), digits=2) 138 smoothed= c(maxfeatures, medianpeaks, medint, TICs)
272 TICs = round(mean(colSums(spectra(msidata)[])), digits=1) 139 QC_numbers= cbind(QC_numbers, smoothed)
273 reduced= c(maxfeatures, medianpeaks, medint, TICs) 140 vectorofactions = append(vectorofactions, "smoothed")
274 QC_numbers= cbind(QC_numbers, reduced) 141
275 vectorofactions = append(vectorofactions, "reduced") 142 ############################### Peak picking ###########################
276 143
277 ############################### Transformation ########################### 144 #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_picking':
278 145 print('Peak_picking')
279 #elif str( $method.methods_conditional.preprocessing_method) == 'Transformation': 146 ## Peakpicking
280 print('Transformation') 147
281 148
282 #if str( $method.methods_conditional.transf_conditional.trans_type) == 'log2': 149 ## remove duplicated coordinates, otherwise peak picking will fail
283 print('log2 transformation') 150 print(paste0(sum(duplicated(coord(msidata))), " coordinates were removed"))
284 151 msidata <- msidata[,!duplicated(coord(msidata))]
285 spectra(msidata)[][spectra(msidata)[] ==0] = NA 152
286 print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra(msidata)[])))) 153 #if str( $method.methods_conditional.methods_for_picking.picking_method) == 'adaptive':
287 spectra(msidata)[] = log2(spectra(msidata)[]) 154 print('adaptive peakpicking')
288 155
289 #elif str( $method.methods_conditional.transf_conditional.trans_type) == 'sqrt': 156 msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method, spar=$method.methods_conditional.methods_for_picking.spar_picking)
290 print('squareroot transformation') 157
291 158 #elif str( $method.methods_conditional.methods_for_picking.picking_method) == 'limpic':
292 spectra(msidata)[] = sqrt(spectra(msidata)[]) 159 print('limpic peakpicking')
293 160
294 #end if 161 msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method, thresh=$method.methods_conditional.methods_for_picking.tresh_picking)
295 162
296 ############################### QC ########################### 163 #elif str( $method.methods_conditional.methods_for_picking.picking_method) == 'simple':
297 164 print('simple peakpicking')
298 maxfeatures = length(features(msidata)) 165
299 medianpeaks = median(colSums(spectra(msidata)[]>0), na.rm=TRUE) 166 msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method)
300 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) 167
301 TICs = round(mean(colSums(spectra(msidata)[]), na.rm=TRUE), digits=1) 168 #end if
302 transformed= c(maxfeatures, medianpeaks, medint, TICs) 169
303 QC_numbers= cbind(QC_numbers, transformed) 170 ############################### QC ###########################
304 vectorofactions = append(vectorofactions, "transformed") 171
305 172 maxfeatures = length(features(msidata))
306 #end if 173 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
307 #end for 174 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
308 175 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
309 ############# Outputs: summar matrix, RData, tabular and QC report ############# 176 picked= c(maxfeatures, medianpeaks, medint, TICs)
310 ################################################################################ 177 QC_numbers= cbind(QC_numbers, picked)
311 ## optional summarized matrix 178 vectorofactions = append(vectorofactions, "picked")
312 print('Summarized matrix') 179
313 180 ############################### Peak alignment ###########################
314 #if "mean" in str($summary_type).split(","): 181
315 print("mean matrix") 182 #elif str( $method.methods_conditional.preprocessing_method ) == 'Peak_alignment':
316 if (!is.null(levels(msidata\$combined_sample))){ 183 print('Peak_alignment')
317 184 ## Peakalignment
318 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) 185
319 count = 1 186 #if str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_noref':
320 for (subsample in levels(msidata\$combined_sample)){ 187
321 subsample_pixels = msidata[,msidata\$combined_sample == subsample] 188 align_peak_reference = msidata
322 subsample_calc = apply(spectra(subsample_pixels)[],1,mean, na.rm=TRUE) 189
323 sample_matrix = cbind(sample_matrix, subsample_calc) 190 #elif str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_table':
324 count = count+1 191
192 align_reference_table = read.delim("$method.methods_conditional.align_ref_type.align_peaks_table", header = FALSE, stringsAsFactors = FALSE)
193 align_reference_column = align_reference_table[,$method.methods_conditional.align_ref_type.align_mass_column]
194 align_peak_reference = align_reference_column[align_reference_column>=min(mz(msidata)) & align_reference_column<=max(mz(msidata))]
195 if (length(align_peak_reference) == 0)
196 {align_peak_reference = 0}
197
198 #elif str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_msidata_ref':
199
200 align_peak_reference = loadRData('$method.methods_conditional.align_ref_type.align_peaks_msidata')
201
202 #end if
203
204 #if str( $method.methods_conditional.methods_for_alignment.alignment_method) == 'diff':
205 print('diff peakalignment')
206
207 msidata = peakAlign(msidata, method='$method.methods_conditional.methods_for_alignment.alignment_method',diff.max =$method.methods_conditional.methods_for_alignment.value_diffalignment, units = "$method.methods_conditional.methods_for_alignment.units_diffalignment", ref=align_peak_reference)
208
209 #elif str( $method.methods_conditional.methods_for_alignment.alignment_method) == 'DP':
210 print('DPpeakalignment')
211
212 msidata = peakAlign(msidata, method='$method.methods_conditional.methods_for_alignment.alignment_method',gap = $method.methods_conditional.methods_for_alignment.gap_DPalignment, ref=align_peak_reference)
213
214 #end if
215
216 ############################### QC ###########################
217
218 maxfeatures = length(features(msidata))
219 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
220 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
221 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
222 aligned= c(maxfeatures, medianpeaks, medint, TICs)
223 QC_numbers= cbind(QC_numbers, aligned)
224 vectorofactions = append(vectorofactions, "aligned")
225
226 ############################### Peak filtering ###########################
227
228 #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_filtering':
229 print('Peak_filtering')
230
231 msidata = peakFilter(msidata, method='freq', freq.min = $method.methods_conditional.frequ_filtering)
232
233 ############################### QC ###########################
234
235 maxfeatures = length(features(msidata))
236 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
237 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
238 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
239 filtered= c(maxfeatures, medianpeaks, medint, TICs)
240 QC_numbers= cbind(QC_numbers, filtered)
241 vectorofactions = append(vectorofactions, "filtered")
242
243 ############################### Data reduction ###########################
244
245 #elif str( $method.methods_conditional.preprocessing_method) == 'Data_reduction':
246 print('Data_reduction')
247
248 #if str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'bin':
249 print('bin reduction')
250
251 msidata = reduceDimension(msidata, method="bin", width=$method.methods_conditional.methods_for_reduction.bin_width, units="$method.methods_conditional.methods_for_reduction.bin_units", fun=$method.methods_conditional.methods_for_reduction.bin_fun)
252
253 #elif str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'resample':
254 print('resample reduction')
255
256 msidata = reduceDimension(msidata, method="resample", step=$method.methods_conditional.methods_for_reduction.resample_step)
257
258 #elif str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'peaks':
259 print('peaks reduction')
260
261 #if str( $method.methods_conditional.methods_for_reduction.ref_type.reference_datatype) == 'table':
262
263 reference_table = read.delim("$method.methods_conditional.methods_for_reduction.ref_type.peaks_table", header = FALSE, stringsAsFactors = FALSE)
264 reference_column = reference_table[,$method.methods_conditional.methods_for_reduction.ref_type.mass_column]
265 peak_reference = reference_column[reference_column>min(mz(msidata)) & reference_column<max(mz(msidata))]
266
267 #elif str( $method.methods_conditional.methods_for_reduction.ref_type.reference_datatype) == 'msidata_ref':
268
269 peak_reference = loadRData('$method.methods_conditional.methods_for_reduction.ref_type.peaks_msidata')
270
271 #end if
272
273 msidata = reduceDimension(msidata, method="peaks", ref=peak_reference, type="$method.methods_conditional.methods_for_reduction.peaks_type")
274 #end if
275 ############################### QC ###########################
276
277 maxfeatures = length(features(msidata))
278 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
279 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
280 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
281 reduced= c(maxfeatures, medianpeaks, medint, TICs)
282 QC_numbers= cbind(QC_numbers, reduced)
283 vectorofactions = append(vectorofactions, "reduced")
284
285 ############################### Transformation ###########################
286
287 #elif str( $method.methods_conditional.preprocessing_method) == 'Transformation':
288 print('Transformation')
289
290 #if str( $method.methods_conditional.transf_conditional.trans_type) == 'log2':
291 print('log2 transformation')
292
293 spectra(msidata)[][spectra(msidata)[] ==0] = NA
294 print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra(msidata)[]))))
295 spectra(msidata)[] = log2(spectra(msidata)[])
296
297 #elif str( $method.methods_conditional.transf_conditional.trans_type) == 'sqrt':
298 print('squareroot transformation')
299
300 spectra(msidata)[] = sqrt(spectra(msidata)[])
301
302 #end if
303
304 ############################### QC ###########################
305
306 maxfeatures = length(features(msidata))
307 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
308 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
309 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
310 transformed= c(maxfeatures, medianpeaks, medint, TICs)
311 QC_numbers= cbind(QC_numbers, transformed)
312 vectorofactions = append(vectorofactions, "transformed")
313
314 #end if
315 #end for
316
317 ############# Outputs: summar matrix, RData, tabular and QC report #############
318 ################################################################################
319 ## optional summarized matrix
320 print('Summarized matrix')
321
322 #if "mean" in str($summary_type).split(","):
323 print("mean matrix")
324 if (!is.null(levels(msidata\$combined_sample))){
325
326 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
327 count = 1
328 for (subsample in levels(msidata\$combined_sample)){
329 subsample_pixels = msidata[,msidata\$combined_sample == subsample]
330 subsample_calc = apply(spectra(subsample_pixels)[],1,mean, na.rm=TRUE)
331 sample_matrix = cbind(sample_matrix, subsample_calc)
332 count = count+1
333 }
334 rownames(sample_matrix) = mz(msidata)
335 colnames(sample_matrix) = levels(msidata\$combined_sample)
336 write.table(sample_matrix, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
337 }else{
338 full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,mean, na.rm=TRUE))
339 rownames(full_sample_calc) = mz(msidata)
340 colnames(full_sample_calc) = "$infile.display_name"
341 write.table(full_sample_calc, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
325 } 342 }
326 rownames(sample_matrix) = mz(msidata) 343
327 colnames(sample_matrix) = levels(msidata\$combined_sample) 344 #end if
328 write.table(sample_matrix, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") 345
329 }else{ 346 #if "median" in str($summary_type).split(","):
330 full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,mean, na.rm=TRUE)) 347 print("median matrix")
331 rownames(full_sample_calc) = mz(msidata) 348 if (!is.null(levels(msidata\$combined_sample))){
332 colnames(full_sample_calc) = "$infile.display_name" 349 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
333 write.table(full_sample_calc, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") 350 count = 1
334 } 351 for (subsample in levels(msidata\$combined_sample)){
335 352 subsample_pixels = msidata[,msidata\$combined_sample == subsample]
336 #end if 353 subsample_calc = apply(spectra(subsample_pixels)[],1,median, na.rm=TRUE)
337 354 sample_matrix = cbind(sample_matrix, subsample_calc)
338 #if "median" in str($summary_type).split(","): 355 count = count+1
339 print("median matrix") 356 }
340 if (!is.null(levels(msidata\$combined_sample))){ 357
341 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) 358 rownames(sample_matrix) = mz(msidata)
342 count = 1 359 colnames(sample_matrix) = levels(msidata\$combined_sample)
343 for (subsample in levels(msidata\$combined_sample)){ 360 write.table(sample_matrix, file="$summarized_output_median", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
344 subsample_pixels = msidata[,msidata\$combined_sample == subsample] 361 }else{
345 subsample_calc = apply(spectra(subsample_pixels)[],1,median, na.rm=TRUE) 362 full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,median, na.rm=TRUE))
346 sample_matrix = cbind(sample_matrix, subsample_calc) 363 rownames(full_sample_calc) = mz(msidata)
347 count = count+1 364 colnames(full_sample_calc) = "$infile.display_name"
365 write.table(full_sample_calc, file="$summarized_output_median", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
348 } 366 }
349 367 #end if
350 rownames(sample_matrix) = mz(msidata) 368
351 colnames(sample_matrix) = levels(msidata\$combined_sample) 369 #if "sd" in str($summary_type).split(","):
352 write.table(sample_matrix, file="$summarized_output_median", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") 370 print("sd matrix")
353 }else{ 371 if (!is.null(levels(msidata\$combined_sample))){
354 full_sample_calc = apply(spectra(msidata)[],1,median, na.rm=TRUE) 372 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
355 rownames(full_sample_calc) = mz(msidata) 373 count = 1
356 colnames(full_sample_calc) = "$infile.display_name" 374 for (subsample in levels(msidata\$combined_sample)){
357 write.table(full_sample_calc, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") 375 subsample_pixels = msidata[,msidata\$combined_sample == subsample]
358 } 376 subsample_calc = apply(spectra(subsample_pixels)[],1,sd, na.rm=TRUE)
359 #end if 377 sample_matrix = cbind(sample_matrix, subsample_calc)
360 378 count = count+1
361 #if "sd" in str($summary_type).split(","): 379 }
362 print("sd matrix") 380
363 if (!is.null(levels(msidata\$combined_sample))){ 381 rownames(sample_matrix) = mz(msidata)
364 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) 382 colnames(sample_matrix) = levels(msidata\$combined_sample)
365 count = 1 383 write.table(sample_matrix, file="$summarized_output_sd", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
366 for (subsample in levels(msidata\$combined_sample)){ 384 }else{
367 subsample_pixels = msidata[,msidata\$combined_sample == subsample] 385
368 subsample_calc = apply(spectra(subsample_pixels)[],1,sd, na.rm=TRUE) 386 full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,sd, na.rm=TRUE))
369 sample_matrix = cbind(sample_matrix, subsample_calc) 387 rownames(full_sample_calc) = mz(msidata)
370 count = count+1 388 colnames(full_sample_calc) = "$infile.display_name"
389 write.table(full_sample_calc, file="$summarized_output_sd", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
371 } 390 }
372 391 #end if
373 rownames(sample_matrix) = mz(msidata) 392 print(paste0("Number of NA in output file: ",sum(is.na(spectra(msidata)[]))))
374 colnames(sample_matrix) = levels(msidata\$combined_sample) 393
375 write.table(sample_matrix, file="$summarized_output_sd", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") 394 ## save as (.RData)
376 }else{ 395 save(msidata, file="$msidata_preprocessed")
377 full_sample_calc = apply(spectra(msidata)[],1,sd, na.rm=TRUE) 396
378 rownames(full_sample_calc) = mz(msidata) 397 ## save output matrix
379 colnames(full_sample_calc) = "$infile.display_name" 398 #if $output_matrix:
380 write.table(full_sample_calc, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") 399
381 } 400 if (length(features(msidata))> 0 & length(pixels(msidata)) > 0){
382 #end if 401 spectramatrix = spectra(msidata)[]
383 402 spectramatrix = cbind(mz(msidata),spectramatrix)
384 ## save as (.RData) 403 newmatrix = rbind(c("mz | spectra", names(pixels(msidata))), spectramatrix)
385 save(msidata, file="$msidata_preprocessed") 404 write.table(newmatrix, file="$matrixasoutput", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
386
387 print(paste0("Number of NAs in intensity matrix: ", sum(is.na(spectra(msidata)[]))))
388
389 ## save output matrix
390 #if $output_matrix:
391
392 if (length(features(msidata))> 0)
393 {
394 ## save as intensity matrix
395 spectramatrix = spectra(msidata)[]
396 rownames(spectramatrix) = mz(msidata)
397 newmatrix = rbind(pixels(msidata), spectramatrix)
398 write.table(newmatrix[2:nrow(newmatrix),], file="$matrixasoutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
399
400 }else{ 405 }else{
401 print("file has no features left") 406 print("file has no features or pixels left")
402 write.table(matrix(rownames(coord(msidata)), ncol=ncol(msidata), nrow=1), file="$matrixasoutput", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
403 } 407 }
404 408 #end if
405 #end if 409
406 410 ## save QC report
407 ## save QC report
408 411
409 pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12) 412 pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12)
410 plot(0,type='n',axes=FALSE,ann=FALSE) 413 plot(0,type='n',axes=FALSE,ann=FALSE)
411 title(main=paste("Quality control during preprocessing \n", "Filename:", "$infile.display_name")) 414 title(main=paste("Quality control during preprocessing \n", "Filename:", "$infile.display_name"))
412 rownames(QC_numbers) = c("# features", "median # peaks", "median intensity", "median TIC") 415 rownames(QC_numbers) = c("# features", "median # peaks", "median intensity", "median TIC")
413 grid.table(t(QC_numbers)) 416 grid.table(t(QC_numbers))
414 dev.off() 417 dev.off()
415 418
419 }else{
420 print("inputfile has no intensities > 0")
421 }
422
416 ]]></configfile> 423 ]]></configfile>
417 </configfiles> 424 </configfiles>
418 <inputs> 425 <inputs>
419 <param name="infile" type="data" format="imzml,rdata,danalyze75" 426 <param name="infile" type="data" format="imzml,rdata,analyze75"
420 label="MSI data as imzml, analyze7.5 or Cardinal MSImageSet saved as RData" 427 label="MSI data as imzml, analyze7.5 or Cardinal MSImageSet saved as RData"
421 help="load imzml and ibd file by uploading composite datatype imzml"/> 428 help="load imzml and ibd file by uploading composite datatype imzml"/>
422 <conditional name="processed_cond"> 429 <conditional name="processed_cond">
423 <param name="processed_file" type="select" label="Is the input file a processed imzML file "> 430 <param name="processed_file" type="select" label="Is the input file a processed imzML file ">
424 <option value="no_processed" selected="True">not a processed imzML</option> 431 <option value="no_processed" selected="True">not a processed imzML</option>
608 </param> 615 </param>
609 <param name="output_matrix" type="boolean" label="Intensity matrix output"/> 616 <param name="output_matrix" type="boolean" label="Intensity matrix output"/>
610 </inputs> 617 </inputs>
611 <outputs> 618 <outputs>
612 <data format="rdata" name="msidata_preprocessed" label="$infile.display_name preprocessed"/> 619 <data format="rdata" name="msidata_preprocessed" label="$infile.display_name preprocessed"/>
613 <data format="pdf" name="QC_plots" from_work_dir="Preprocessing.pdf" label = "$infile.display_name preprocessed_QC"/> 620 <data format="pdf" name="QC_overview" from_work_dir="Preprocessing.pdf" label = "$infile.display_name preprocessed_QC"/>
614 <data format="tabular" name="summarized_output_mean" label="$infile.display_name mean_matrix"> 621 <data format="tabular" name="summarized_output_mean" label="$infile.display_name mean_matrix">
615 <filter>summary_type and "mean" in summary_type</filter> 622 <filter>summary_type and "mean" in summary_type</filter>
616 </data> 623 </data>
617 <data format="tabular" name="summarized_output_median" label="$infile.display_name median_matrix"> 624 <data format="tabular" name="summarized_output_median" label="$infile.display_name median_matrix">
618 <filter>summary_type and "median" in summary_type</filter> 625 <filter>summary_type and "median" in summary_type</filter>
676 </conditional> 683 </conditional>
677 </repeat> 684 </repeat>
678 <param name="output_matrix" value="True"/> 685 <param name="output_matrix" value="True"/>
679 <output name="msidata_preprocessed" file="preprocessing_results1.RData" compare="sim_size"/> 686 <output name="msidata_preprocessed" file="preprocessing_results1.RData" compare="sim_size"/>
680 <output name="matrixasoutput" file="preprocessing_results1.txt"/> 687 <output name="matrixasoutput" file="preprocessing_results1.txt"/>
681 <output name="QC_plots" file="preprocessing_results1.pdf" compare="sim_size"/> 688 <output name="QC_overview" file="preprocessing_results1.pdf" compare="sim_size"/>
682 </test> 689 </test>
683 <test expect_num_outputs="4"> 690 <test expect_num_outputs="4">
684 <param name="infile" value="123_combined.RData" ftype="rdata"/> 691 <param name="infile" value="123_combined.RData" ftype="rdata"/>
685 <repeat name="methods"> 692 <repeat name="methods">
686 <conditional name="methods_conditional"> 693 <conditional name="methods_conditional">
703 </repeat> 710 </repeat>
704 <param name="summary_type" value="median,sd"/> 711 <param name="summary_type" value="median,sd"/>
705 <output name="msidata_preprocessed" file="preprocessing_results2.RData" compare="sim_size"/> 712 <output name="msidata_preprocessed" file="preprocessing_results2.RData" compare="sim_size"/>
706 <output name="summarized_output_median" file="preprocessing_median2.txt" lines_diff="2"/> 713 <output name="summarized_output_median" file="preprocessing_median2.txt" lines_diff="2"/>
707 <output name="summarized_output_sd" file="preprocessing_sd2.txt" lines_diff="2"/> 714 <output name="summarized_output_sd" file="preprocessing_sd2.txt" lines_diff="2"/>
708 <output name="QC_plots" file="preprocessing_results2.pdf" compare="sim_size"/> 715 <output name="QC_overview" file="preprocessing_results2.pdf" compare="sim_size"/>
709 </test> 716 </test>
710 <test expect_num_outputs="3"> 717 <test expect_num_outputs="3">
711 <param name="infile" value="" ftype="analyze75"> 718 <param name="infile" value="" ftype="analyze75">
712 <composite_data value="Analyze75.hdr"/> 719 <composite_data value="Analyze75.hdr"/>
713 <composite_data value="Analyze75.img"/> 720 <composite_data value="Analyze75.img"/>
734 </conditional> 741 </conditional>
735 </conditional> 742 </conditional>
736 </repeat> 743 </repeat>
737 <param name="summary_type" value="mean"/> 744 <param name="summary_type" value="mean"/>
738 <output name="msidata_preprocessed" file="preprocessing_results3.RData" compare="sim_size"/> 745 <output name="msidata_preprocessed" file="preprocessing_results3.RData" compare="sim_size"/>
739 <output name="QC_plots" file="preprocessing_results3.pdf" compare="sim_size"/> 746 <output name="QC_overview" file="preprocessing_results3.pdf" compare="sim_size"/>
740 <output name="summarized_output_mean" file="preprocessing_mean3.txt" lines_diff="2"/> 747 <output name="summarized_output_mean" file="preprocessing_mean3.txt" lines_diff="2"/>
741 </test> 748 </test>
742 <test expect_num_outputs="3"> 749 <test expect_num_outputs="3">
743 <param name="infile" value="" ftype="analyze75"> 750 <param name="infile" value="" ftype="analyze75">
744 <composite_data value="Analyze75.hdr"/> 751 <composite_data value="Analyze75.hdr"/>
757 </conditional> 764 </conditional>
758 </repeat> 765 </repeat>
759 <param name="output_matrix" value="True"/> 766 <param name="output_matrix" value="True"/>
760 <output name="msidata_preprocessed" file="preprocessing_results4.RData" compare="sim_size"/> 767 <output name="msidata_preprocessed" file="preprocessing_results4.RData" compare="sim_size"/>
761 <output name="matrixasoutput" file="preprocessing_results4.txt"/> 768 <output name="matrixasoutput" file="preprocessing_results4.txt"/>
762 <output name="QC_plots" file="preprocessing_results4.pdf" compare="sim_size"/> 769 <output name="QC_overview" file="preprocessing_results4.pdf" compare="sim_size"/>
763 </test> 770 </test>
764 <test expect_num_outputs="2"> 771 <test expect_num_outputs="2">
765 <param name="infile" value="" ftype="imzml"> 772 <param name="infile" value="" ftype="imzml">
766 <composite_data value="Example_Continuous.imzML"/> 773 <composite_data value="Example_Continuous.imzML"/>
767 <composite_data value="Example_Continuous.ibd"/> 774 <composite_data value="Example_Continuous.ibd"/>
774 <param name="step_width" value="0.1"/> 781 <param name="step_width" value="0.1"/>
775 </conditional> 782 </conditional>
776 </conditional> 783 </conditional>
777 </repeat> 784 </repeat>
778 <output name="msidata_preprocessed" file="preprocessing_results5.RData" compare="sim_size"/> 785 <output name="msidata_preprocessed" file="preprocessing_results5.RData" compare="sim_size"/>
779 <output name="QC_plots" file="preprocessing_results5.pdf" compare="sim_size"/> 786 <output name="QC_overview" file="preprocessing_results5.pdf" compare="sim_size"/>
780 </test> 787 </test>
781 </tests> 788 </tests>
782 <help> 789 <help>
783 <![CDATA[ 790 <![CDATA[
784 791