Mercurial > repos > marie-tremblay-metatoul > nmr_preprocessing
comparison NmrPreprocessing_script.R @ 2:5e64657b4fe5 draft
planemo upload for repository https://github.com/workflow4metabolomics/nmr_preprocessing commit 22ca8782d7c4c0211e13c95b425d4f29f53f995e
author | lecorguille |
---|---|
date | Wed, 28 Mar 2018 08:05:12 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1:cbea5e9fd0b4 | 2:5e64657b4fe5 |
---|---|
1 ## ========================== | |
2 # Internal functions | |
3 ## ========================== | |
4 | |
5 # beginTreatment | |
6 beginTreatment <- function(name, Signal_data = NULL, Signal_info = NULL, | |
7 force.real = FALSE) { | |
8 | |
9 cat("Begin", name, "\n") | |
10 | |
11 | |
12 # Formatting the Signal_data and Signal_info ----------------------- | |
13 | |
14 vec <- is.vector(Signal_data) | |
15 if (vec) { | |
16 Signal_data <- vec2mat(Signal_data) | |
17 } | |
18 if (is.vector(Signal_info)) { | |
19 Signal_info <- vec2mat(Signal_info) | |
20 } | |
21 if (!is.null(Signal_data)) { | |
22 if (!is.matrix(Signal_data)) { | |
23 stop("Signal_data is not a matrix.") | |
24 } | |
25 if (!is.complex(Signal_data) && !is.numeric(Signal_data)) { | |
26 stop("Signal_data contains non-numerical values.") | |
27 } | |
28 } | |
29 if (!is.null(Signal_info) && !is.matrix(Signal_info)) { | |
30 stop("Signal_info is not a matrix.") | |
31 } | |
32 | |
33 | |
34 Original_data <- Signal_data | |
35 | |
36 # Extract the real part of the spectrum --------------------------- | |
37 | |
38 if (force.real) { | |
39 if (is.complex(Signal_data)) { | |
40 Signal_data <- Re(Signal_data) | |
41 } else { | |
42 # The signal is numeric Im(Signal_data) is zero anyway so let's avoid | |
43 # using complex(real=...,imaginary=0) which would give a complex signal | |
44 # in endTreatment() | |
45 force.real <- FALSE | |
46 } | |
47 } | |
48 | |
49 | |
50 # Return the formatted data and metadata entries -------------------- | |
51 | |
52 return(list(start = proc.time(), vec = vec, force.real = force.real, | |
53 Original_data = Original_data, Signal_data = Signal_data, Signal_info = Signal_info)) | |
54 } | |
55 | |
56 # endTreatment | |
57 endTreatment <- function(name, begin_info, Signal_data) { | |
58 | |
59 # begin_info: object outputted from beginTreatment | |
60 | |
61 | |
62 # Formatting the entries and printing process time ----------------------- | |
63 end_time <- proc.time() # record it as soon as possible | |
64 start_time <- begin_info[["start"]] | |
65 delta_time <- end_time - start_time | |
66 delta <- delta_time[] | |
67 cat("End", name, "\n") | |
68 cat("It lasted", round(delta["user.self"], 3), "s user time,", round(delta["sys.self"],3), | |
69 "s system time and", round(delta["elapsed"], 3), "s elapsed time.\n") | |
70 | |
71 | |
72 if (begin_info[["force.real"]]) { | |
73 # The imaginary part is left untouched | |
74 i <- complex(real = 0, imaginary = 1) | |
75 Signal_data <- Signal_data + i * Im(begin_info[["Original_data"]]) | |
76 } | |
77 | |
78 if (begin_info[["vec"]]) { | |
79 Signal_data <- Signal_data[1, ] | |
80 } | |
81 | |
82 # Return the formatted data and metadata entries -------------------- | |
83 return(Signal_data) | |
84 } | |
85 | |
86 # checkArg | |
87 checkArg <- function(arg, checks, can.be.null=FALSE) { | |
88 check.list <- list(bool=c(is.logical, "a boolean"), | |
89 int =c(function(x){x%%1==0}, "an integer"), | |
90 num =c(is.numeric, "a numeric"), | |
91 str =c(is.character, "a string"), | |
92 pos =c(function(x){x>0}, "positive"), | |
93 pos0=c(function(x){x>=0}, "positive or zero"), | |
94 l1 =c(function(x){length(x)==1}, "of length 1") | |
95 ) | |
96 if (is.null(arg)) { | |
97 if (!can.be.null) { | |
98 stop(deparse(substitute(arg)), " is null.") | |
99 } | |
100 } else { | |
101 if (is.matrix(arg)) { | |
102 stop(deparse(substitute(arg)), " is not scalar.") | |
103 } | |
104 for (c in checks) { | |
105 if (!check.list[[c]][[1]](arg)) { | |
106 stop(deparse(substitute(arg)), " is not ", check.list[[c]][[2]], ".") | |
107 } | |
108 } | |
109 } | |
110 } | |
111 | |
112 # getArg | |
113 getArg <- function(arg, info, argname, can.be.absent=FALSE) { | |
114 if (is.null(arg)) { | |
115 start <- paste("impossible to get argument", argname, "it was not given directly and"); | |
116 if (!is.matrix(info)) { | |
117 stop(paste(start, "the info matrix was not given")) | |
118 } | |
119 if (!(argname %in% colnames(info))) { | |
120 if (can.be.absent) { | |
121 return(NULL) | |
122 } else { | |
123 stop(paste(start, "is not in the info matrix")) | |
124 } | |
125 } | |
126 if (nrow(info) < 1) { | |
127 stop(paste(start, "the info matrix has no row")) | |
128 } | |
129 arg <- info[1,argname] | |
130 if (is.na(arg)) { | |
131 stop(paste(start, "it is NA in the info matrix")) | |
132 } | |
133 } | |
134 return(arg) | |
135 } | |
136 | |
137 # binarySearch | |
138 binarySearch <- function(a, target, lower = TRUE) { | |
139 # search the index i in a such that a[i] == target | |
140 # if it doesn't exists and lower, it searches the closer a[i] such that a[i] < target | |
141 # if !lower, it seraches the closer a[i] such that a[i] > target | |
142 # a should be monotone but can be increasing or decreasing | |
143 | |
144 # if a is increasing INVARIANT: a[amin] < target < a[amax] | |
145 N <- length(a) | |
146 if ((a[N] - target) * (a[N] - a[1]) <= 0) { | |
147 return(N) | |
148 } | |
149 if ((a[1] - target) * (a[N] - a[1]) >= 0) { | |
150 return(1) | |
151 } | |
152 amin <- 1 | |
153 amax <- N | |
154 while (amin + 1 < amax) { | |
155 amid <- floor((amin + amax)/2) | |
156 if ((a[amid] - target) * (a[amax] - a[amid]) < 0) { | |
157 amin <- amid | |
158 } else if ((a[amid] - target) * (a[amax] - a[amid]) > 0) { | |
159 amax <- amid | |
160 } else { | |
161 # a[amid] == a[amax] or a[amid] == target In both cases, a[amid] == | |
162 # target | |
163 return(amid) | |
164 } | |
165 } | |
166 if (xor(lower, a[amin] > a[amax])) { | |
167 # (lower && a[amin] < a[amax]) || (!lower && a[min] > a[max]) | |
168 # If increasing and we want the lower, we take amin | |
169 # If decreasing and we want the bigger, we take amin too | |
170 return(amin) | |
171 } else { | |
172 return(amax) | |
173 } | |
174 } | |
175 | |
176 # Interpol | |
177 Interpol <- function(t, y) { | |
178 # y: sample | |
179 # t : warping function | |
180 | |
181 m <- length(y) | |
182 # t <= m-1 | |
183 # because if t > m-1, y[ti+1] will be NA when we compute g | |
184 valid <- 1 <= t & t <= m-1 # FIXME it was '<' in Bubble v2 | |
185 s <- (1:m)[valid] | |
186 ti <- floor(t[s]) | |
187 tr <- t[s] - ti | |
188 g <- y[ti + 1] - y[ti] | |
189 f <- y[ti] + tr * g | |
190 list(f=f, s=s, g=g) | |
191 } | |
192 | |
193 # vec2mat | |
194 vec2mat <- function(vec) { | |
195 return(matrix(vec, nrow = 1, dimnames = list(c(1), names(vec)))) | |
196 | |
197 } | |
198 | |
199 # binarySearch | |
200 binarySearch <- function(a, target, lower = TRUE) { | |
201 # search the index i in a such that a[i] == target | |
202 # if it doesn't exists and lower, it searches the closer a[i] such that a[i] < target | |
203 # if !lower, it seraches the closer a[i] such that a[i] > target | |
204 # a should be monotone but can be increasing or decreasing | |
205 | |
206 # if a is increasing INVARIANT: a[amin] < target < a[amax] | |
207 N <- length(a) | |
208 if ((a[N] - target) * (a[N] - a[1]) <= 0) { | |
209 return(N) | |
210 } | |
211 if ((a[1] - target) * (a[N] - a[1]) >= 0) { | |
212 return(1) | |
213 } | |
214 amin <- 1 | |
215 amax <- N | |
216 while (amin + 1 < amax) { | |
217 amid <- floor((amin + amax)/2) | |
218 if ((a[amid] - target) * (a[amax] - a[amid]) < 0) { | |
219 amin <- amid | |
220 } else if ((a[amid] - target) * (a[amax] - a[amid]) > 0) { | |
221 amax <- amid | |
222 } else { | |
223 # a[amid] == a[amax] or a[amid] == target In both cases, a[amid] == | |
224 # target | |
225 return(amid) | |
226 } | |
227 } | |
228 if (xor(lower, a[amin] > a[amax])) { | |
229 # (lower && a[amin] < a[amax]) || (!lower && a[min] > a[max]) | |
230 # If increasing and we want the lower, we take amin | |
231 # If decreasing and we want the bigger, we take amin too | |
232 return(amin) | |
233 } else { | |
234 return(amax) | |
235 } | |
236 } | |
237 | |
238 | |
239 # indexInterval | |
240 indexInterval <- function (a, from, to, inclusive=TRUE) { | |
241 # If inclusive and from <= to, we need to take the lower | |
242 # If not inclusive and from > to, we need to take the lower too | |
243 lowerFrom <- xor(inclusive, from > to) | |
244 fromIndex <- binarySearch(a, from, lowerFrom) | |
245 toIndex <- binarySearch(a, to, !lowerFrom) | |
246 return(fromIndex:toIndex) | |
247 } | |
248 | |
249 | |
250 | |
251 ## ========================== | |
252 # GroupDelayCorrection | |
253 ## ========================== | |
254 GroupDelayCorrection <- function(Fid_data, Fid_info = NULL, group_delay = NULL) { | |
255 | |
256 | |
257 # Data initialisation and checks ---------------------------------------------- | |
258 | |
259 begin_info <- beginTreatment("GroupDelayCorrection", Fid_data, Fid_info) | |
260 Fid_data <- begin_info[["Signal_data"]] | |
261 dimension_names <- dimnames(Fid_data) | |
262 Fid_info <- begin_info[["Signal_info"]] | |
263 checkArg(group_delay, c("num", "pos0"), can.be.null = TRUE) | |
264 # if Fid_info and group_delay are NULL, getArg will generate an error | |
265 | |
266 group_delay <- getArg(group_delay, Fid_info, "GRPDLY", can.be.absent = TRUE) | |
267 | |
268 if (is.null(group_delay)) { | |
269 | |
270 # See DetermineBrukerDigitalFilter.m in matNMR MATLAB library | |
271 group_delay_matrix <- matrix(c(44.75, 46, 46.311, 33.5, 36.5, 36.53, 66.625, | |
272 48, 47.87, 59.0833, 50.1667, 50.229, 68.5625, 53.25, 53.289, 60.375, | |
273 69.5, 69.551, 69.5313, 72.25, 71.6, 61.0208, 70.1667, 70.184, 70.0156, | |
274 72.75, 72.138, 61.3438, 70.5, 70.528, 70.2578, 73, 72.348, 61.5052, 70.6667, | |
275 70.7, 70.3789, 72.5, 72.524, 61.5859, 71.3333, NA, 70.4395, 72.25, NA, | |
276 61.6263, 71.6667, NA, 70.4697, 72.125, NA, 61.6465, 71.8333, NA, 70.4849, | |
277 72.0625, NA, 61.6566, 71.9167, NA, 70.4924, 72.0313, NA), nrow = 21, | |
278 ncol = 3, byrow = TRUE, dimnames = list(c(2, 3, 4, 6, 8, 12, 16, 24, | |
279 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536, 2048), | |
280 c(10, 11, 12))) | |
281 decim <- Fid_info[1, "DECIM"] | |
282 dspfvs <- Fid_info[1, "DSPFVS"] | |
283 if (!(toString(decim) %in% rownames(group_delay_matrix))) { | |
284 stop(paste("Invalid DECIM", decim, "it should be one of", rownames(group_delay_matrix))) | |
285 } | |
286 if (!(toString(dspfvs) %in% colnames(group_delay_matrix))) { | |
287 stop(paste("Invalid DSPFVS", dspfvs, "it should be one of", colnames(group_delay_matrix))) | |
288 } | |
289 group_delay <- group_delay_matrix[toString(decim), toString(dspfvs)] | |
290 if (is.na(group_delay)) { | |
291 stop(paste("Invalid DECIM", decim, "for DSPFVS", dspfvs)) | |
292 } | |
293 } | |
294 m <- ncol(Fid_data) | |
295 n <- nrow(Fid_data) | |
296 | |
297 # GroupDelayCorrection ---------------------------------------------- | |
298 | |
299 # We do the shifting in the Fourier domain because the shift can be non-integer. | |
300 # That way we automatically have the circular behaviour of the shift and the | |
301 # interpolation if it is non-integer. | |
302 | |
303 Spectrum <- t(stats::mvfft(t(Fid_data))) | |
304 | |
305 # Spectrum <- FourierTransform(Fid_data, Fid_info) | |
306 p <- ceiling(m/2) | |
307 new_index <- c((p + 1):m, 1:p) | |
308 Spectrum <- Spectrum[,new_index] | |
309 Spectrum <- matrix(data = Spectrum, ncol = m, nrow = n) | |
310 | |
311 Omega <- (0:(m - 1))/m | |
312 i <- complex(real = 0, imaginary = 1) | |
313 | |
314 if (n>1) { | |
315 Spectrum <- sweep(Spectrum, MARGIN = 2, exp(i * group_delay * 2 * pi * Omega), `*`) | |
316 Spectrum <- Spectrum[,new_index] | |
317 }else { | |
318 Spectrum <- Spectrum* exp(i * group_delay * 2 * pi * Omega) | |
319 Spectrum <- Spectrum[new_index] | |
320 Spectrum <- matrix(data = Spectrum, ncol = m, nrow = n) | |
321 } | |
322 | |
323 | |
324 Fid_data <- t(stats::mvfft(t(Spectrum), inverse = TRUE))/m | |
325 colnames(Fid_data) <- dimension_names[[2]] | |
326 rownames(Fid_data) <- dimension_names[[1]] | |
327 | |
328 # Data finalisation ---------------------------------------------- | |
329 | |
330 return(endTreatment("GroupDelayCorrection", begin_info, Fid_data)) | |
331 } | |
332 | |
333 ## ========================== | |
334 # SolventSuppression | |
335 ## ========================== | |
336 SolventSuppression <- function(Fid_data, lambda.ss = 1e+06, ptw.ss = TRUE, | |
337 plotSolvent = F, returnSolvent = F) { | |
338 | |
339 # Data initialisation and checks ---------------------------------------------- | |
340 | |
341 begin_info <- beginTreatment("SolventSuppression", Fid_data) | |
342 Fid_data <- begin_info[["Signal_data"]] | |
343 checkArg(ptw.ss, c("bool")) | |
344 checkArg(lambda.ss, c("num", "pos0")) | |
345 | |
346 | |
347 # difsm function definition for the smoother ----------------------------------- | |
348 | |
349 if (ptw.ss) { | |
350 # Use of the function in ptw that smoothes signals with a finite difference | |
351 # penalty of order 2 | |
352 difsm <- ptw::difsm | |
353 } else { | |
354 # Or manual implementation based on sparse matrices for large data series (cf. | |
355 # Eilers, 2003. 'A perfect smoother') | |
356 difsm <- function(y, d = 2, lambda) { | |
357 | |
358 m <- length(y) | |
359 # Sparse identity matrix m x m | |
360 E <- Matrix::Diagonal(m) | |
361 D <- Matrix::diff(E, differences = d) | |
362 A <- E + lambda.ss * Matrix::t(D) %*% D | |
363 # base::chol does not take into account that A is sparse and is extremely slow | |
364 C <- Matrix::chol(A) | |
365 x <- Matrix::solve(C, Matrix::solve(Matrix::t(C), y)) | |
366 return(as.numeric(x)) | |
367 } | |
368 } | |
369 | |
370 # Solvent Suppression ---------------------------------------------- | |
371 | |
372 n <- dim(Fid_data)[1] | |
373 if (returnSolvent) { | |
374 SolventRe <- Fid_data | |
375 SolventIm <- Fid_data | |
376 } | |
377 for (i in 1:n) { | |
378 FidRe <- Re(Fid_data[i, ]) | |
379 FidIm <- Im(Fid_data[i, ]) | |
380 solventRe <- difsm(y = FidRe, lambda = lambda.ss) | |
381 solventIm <- difsm(y = FidIm, lambda = lambda.ss) | |
382 | |
383 if (plotSolvent) { | |
384 m <- length(FidRe) | |
385 graphics::plot(1:m, FidRe, type = "l", col = "red") | |
386 graphics::lines(1:m, solventRe, type = "l", col = "blue") | |
387 graphics::plot(1:m, FidIm, type = "l", col = "red") | |
388 graphics::lines(1:m, solventIm, type = "l", col = "blue") | |
389 } | |
390 FidRe <- FidRe - solventRe | |
391 FidIm <- FidIm - solventIm | |
392 Fid_data[i, ] <- complex(real = FidRe, imaginary = FidIm) | |
393 if (returnSolvent) { | |
394 SolventRe[i, ] <- solventRe | |
395 SolventIm[i, ] <- solventIm | |
396 } | |
397 } | |
398 | |
399 | |
400 # Data finalisation ---------------------------------------------- | |
401 | |
402 Fid_data <- endTreatment("SolventSuppression", begin_info, Fid_data) | |
403 if (returnSolvent) { | |
404 return(list(Fid_data = Fid_data, SolventRe = SolventRe, SolventIm = SolventIm)) | |
405 } else { | |
406 return(Fid_data) | |
407 } | |
408 } | |
409 | |
410 | |
411 ## ========================== | |
412 # Apodization | |
413 # ============================= | |
414 Apodization <- function(Fid_data, Fid_info = NULL, DT = NULL, | |
415 type.apod = c("exp","cos2", "blockexp", "blockcos2", | |
416 "gauss", "hanning", "hamming"), phase = 0, rectRatio = 1/2, | |
417 gaussLB = 1, expLB = 1, plotWindow = F, returnFactor = F) { | |
418 | |
419 # Data initialisation and checks ---------------------------------------------- | |
420 begin_info <- beginTreatment("Apodization", Fid_data, Fid_info) | |
421 Fid_data <- begin_info[["Signal_data"]] | |
422 Fid_info <- begin_info[["Signal_info"]] | |
423 # Data check | |
424 type.apod <- match.arg(type.apod) | |
425 checkArg(DT, c("num", "pos"), can.be.null = TRUE) | |
426 checkArg(phase, c("num")) | |
427 | |
428 # Apodization ---------------------------------------------- | |
429 DT <- getArg(DT, Fid_info, "DT") # Dwell Time | |
430 m <- ncol(Fid_data) | |
431 t <- (1:m) * DT # Time | |
432 rectSize <- ceiling(rectRatio * m) | |
433 gaussLB <- (gaussLB/(sqrt(8 * log(2)))) | |
434 # Define the types of apodization: | |
435 switch(type.apod, exp = { | |
436 # exponential | |
437 Factor <- exp(-expLB * t) | |
438 }, cos2 = { | |
439 # cos^2 | |
440 c <- cos((1:m) * pi/(2 * m) - phase * pi/2) | |
441 Factor <- c * c | |
442 }, blockexp = { | |
443 # block and exponential | |
444 Factor <- c(rep.int(1, rectSize), rep.int(0, m - rectSize)) | |
445 # | rectSize | 1 ___________ | \ 0 \____ | |
446 Factor[(rectSize + 1):m] <- exp(-expLB * t[1:(m - rectSize)]) | |
447 }, blockcos2 = { | |
448 # block and cos^2 | |
449 Factor <- c(rep.int(1, rectSize), rep.int(0, m - rectSize)) | |
450 c <- cos((1:(m - rectSize)) * pi/(2 * (m - rectSize))) | |
451 Factor[(rectSize + 1):m] <- c * c | |
452 }, gauss = { | |
453 # gaussian | |
454 Factor <- exp(-(gaussLB * t)^2/2) | |
455 Factor <- Factor/max(Factor) | |
456 }, hanning = { | |
457 # Hanning | |
458 Factor <- 0.5 + 0.5 * cos((1:m) * pi/m - phase * pi) | |
459 }, hamming = { | |
460 # Hamming | |
461 Factor <- 0.54 + 0.46 * cos((1:m) * pi/m - phase * pi) | |
462 }) | |
463 if (plotWindow) { | |
464 graphics::plot(1:m, Factor, "l") | |
465 # dev.off() # device independent, it is the responsability of the | |
466 # caller to do it | |
467 } | |
468 # Apply the apodization factor on the spectra | |
469 Fid_data <- sweep(Fid_data, MARGIN = 2, Factor, `*`) | |
470 | |
471 # Data finalisation ---------------------------------------------- | |
472 Fid_data <- endTreatment("Apodization", begin_info, Fid_data) | |
473 if (returnFactor) { | |
474 return(list(Fid_data = Fid_data, Factor = Factor)) | |
475 } else { | |
476 return(Fid_data) | |
477 } | |
478 } | |
479 | |
480 | |
481 ## ==================================================== | |
482 # FourierTransform | |
483 ## ==================================================== | |
484 | |
485 | |
486 # fftshift1D2D | |
487 fftshift1D2D <- function(x) { | |
488 vec <- F | |
489 if (is.vector(x)) { | |
490 x <- vec2mat(x) | |
491 vec <- T | |
492 } | |
493 m <- dim(x)[2] | |
494 p <- ceiling(m/2) | |
495 new_index <- c((p + 1):m, 1:p) | |
496 y <- x[, new_index, drop = vec] | |
497 } | |
498 | |
499 # FourierTransform | |
500 FourierTransform <- function(Fid_data, Fid_info = NULL, SW_h = NULL, SW = NULL, O1 = NULL, reverse.axis = TRUE) { | |
501 | |
502 # Data initialisation and checks ---------------------------------------------- | |
503 begin_info <- beginTreatment("FourierTransform", Fid_data, Fid_info) | |
504 Fid_data <- begin_info[["Signal_data"]] | |
505 Fid_info <- begin_info[["Signal_info"]] | |
506 | |
507 m <- ncol(Fid_data) | |
508 n <- nrow(Fid_data) | |
509 | |
510 if (is.null(SW_h)) { | |
511 SW_h <- getArg(SW_h, Fid_info, "SW_h") | |
512 } | |
513 | |
514 if (is.null(SW)) { | |
515 SW <- getArg(SW, Fid_info, "SW") # Sweep Width in ppm (semi frequency scale in ppm) | |
516 } | |
517 | |
518 | |
519 if (is.null(O1)) { | |
520 O1 <- getArg(O1, Fid_info, "O1") | |
521 } | |
522 | |
523 | |
524 checkArg(reverse.axis, c("bool")) | |
525 | |
526 # Fourier Transformation ---------------------------------------------- | |
527 # mvfft does the unnormalized fourier transform (see ?mvfft), so we need divide | |
528 # by m. It does not matter a lot in our case since the spectrum will be | |
529 # normalized. | |
530 | |
531 # FT | |
532 RawSpect_data <- fftshift1D2D(t(stats::mvfft(t(Fid_data)))) | |
533 # recover the frequencies values | |
534 f <- ((0:(m - 1)) - floor(m/2)) * Fid_info[1, "SW_h"]/(m-1) | |
535 | |
536 if(reverse.axis == TRUE) { | |
537 revind <- rev(1:m) | |
538 RawSpect_data <- RawSpect_data[,revind] # reverse the spectrum | |
539 } | |
540 | |
541 RawSpect_data <- matrix(RawSpect_data, nrow = n, ncol = m) | |
542 colnames(RawSpect_data) <- f | |
543 rownames(RawSpect_data) <- rownames(Fid_data) | |
544 | |
545 # PPM conversion ---------------------------------------------- | |
546 | |
547 # The Sweep Width has to be the same since the column names are the same | |
548 | |
549 ppmInterval <- SW/(m-1) | |
550 | |
551 O1index = round((m+1)/2+O1*(m - 1) / SW_h) | |
552 | |
553 end <- O1index - m | |
554 start <- O1index -1 | |
555 ppmScale <- (start:end) * ppmInterval | |
556 RawSpect_data <- matrix(RawSpect_data, nrow = n, ncol = -(end - start) + 1, dimnames = | |
557 list(rownames(RawSpect_data), ppmScale)) | |
558 | |
559 | |
560 # Data finalisation ---------------------------------------------- | |
561 return(endTreatment("FourierTransform", begin_info, RawSpect_data)) | |
562 } | |
563 | |
564 ## ==================================================== | |
565 # InternalReferencing | |
566 ## ==================================================== | |
567 | |
568 InternalReferencing <- function(Spectrum_data, Fid_info, method = c("max", "thres"), | |
569 range = c("nearvalue", "all", "window"), ppm.value = 0, | |
570 direction = "left", shiftHandling = c("zerofilling", "cut", | |
571 "NAfilling", "circular"), c = 2, pc = 0.02, fromto.RC = NULL, | |
572 ppm.ir = TRUE, rowindex_graph = NULL) { | |
573 | |
574 | |
575 | |
576 # Data initialisation and checks ---------------------------------------------- | |
577 | |
578 begin_info <- beginTreatment("InternalReferencing", Spectrum_data, Fid_info) | |
579 Spectrum_data <- begin_info[["Signal_data"]] | |
580 Fid_info <- begin_info[["Signal_info"]] | |
581 | |
582 | |
583 # Check input arguments | |
584 range <- match.arg(range) | |
585 shiftHandling <- match.arg(shiftHandling) | |
586 method <- match.arg(method) | |
587 plots <- NULL | |
588 | |
589 | |
590 checkArg(ppm.ir, c("bool")) | |
591 checkArg(unlist(fromto.RC), c("num"), can.be.null = TRUE) | |
592 checkArg(pc, c("num")) | |
593 checkArg(ppm.value, c("num")) | |
594 checkArg(rowindex_graph, "num", can.be.null = TRUE) | |
595 | |
596 # fromto.RC | |
597 if (!is.null(fromto.RC)) { | |
598 diff <- diff(unlist(fromto.RC))[1:length(diff(unlist(fromto.RC)))%%2 !=0] | |
599 for (i in 1:length(diff)) { | |
600 if (diff[i] >= 0) { | |
601 fromto <- c(fromto.RC[[i]][2], fromto.RC[[i]][1]) | |
602 fromto.RC[[i]] <- fromto | |
603 } | |
604 } | |
605 } | |
606 | |
607 | |
608 # findTMSPpeak function ---------------------------------------------- | |
609 findTMSPpeak <- function(ft, c = 2, direction = "left") { | |
610 ft <- Re(ft) # extraction de la partie réelle | |
611 N <- length(ft) | |
612 if (direction == "left") { | |
613 newindex <- rev(1:N) | |
614 ft <- rev(ft) | |
615 } | |
616 thres <- 99999 | |
617 i <- 1000 # Start at point 1000 to find the peak | |
618 vect <- ft[1:i] | |
619 | |
620 while (vect[i] <= (c * thres)) { | |
621 cumsd <- stats::sd(vect) | |
622 cummean <- mean(vect) | |
623 thres <- cummean + 3 * cumsd | |
624 i <- i + 1 | |
625 vect <- ft[1:i] | |
626 } | |
627 if (direction == "left") { | |
628 v <- newindex[i] | |
629 } else {v <- i} | |
630 | |
631 if (is.na(v)) { | |
632 warning("No peak found, need to lower the threshold.") | |
633 return(NA) | |
634 } else { | |
635 # recherche dans les 1% de points suivants du max trouve pour etre au sommet du | |
636 # pic | |
637 d <- which.max(ft[v:(v + N * 0.01)]) | |
638 new.peak <- v + d - 1 # nouveau pic du TMSP si d > 0 | |
639 | |
640 if (names(which.max(ft[v:(v + N * 0.01)])) != names(which.max(ft[v:(v + N * 0.03)]))) { | |
641 # recherche dans les 3% de points suivants du max trouve pour eviter un faux | |
642 # positif | |
643 warning("the TMSP peak might be located further away, increase the threshold to check.") | |
644 } | |
645 return(new.peak) | |
646 } | |
647 } | |
648 | |
649 | |
650 # Apply the method ('thres' or 'max') on spectra | |
651 # ---------------------------------------------- | |
652 | |
653 n <- nrow(Spectrum_data) | |
654 m <- ncol(Spectrum_data) | |
655 | |
656 # The Sweep Width has to be the same since the column names are the same | |
657 SW <- Fid_info[1, "SW"] # Sweep Width in ppm (semi frequency scale in ppm) | |
658 ppmInterval <- SW/(m-1) | |
659 | |
660 if (range == "all") { | |
661 Data <- Spectrum_data | |
662 } else { # range = "nearvalue" or "window" | |
663 | |
664 if (range == "nearvalue") { | |
665 fromto.RC <- list(c(-(SW * pc)/2 + ppm.value, (SW * pc)/2 + ppm.value)) # automatic fromto values in ppm | |
666 colindex <- as.numeric(colnames(Spectrum_data)) | |
667 } else { | |
668 if (ppm.ir == TRUE) { | |
669 colindex <- as.numeric(colnames(Spectrum_data)) | |
670 } else { | |
671 colindex <- 1:m | |
672 } | |
673 } | |
674 | |
675 | |
676 Int <- vector("list", length(fromto.RC)) | |
677 for (i in 1:length(fromto.RC)) { | |
678 Int[[i]] <- indexInterval(colindex, from = fromto.RC[[i]][1], | |
679 to = fromto.RC[[i]][2], inclusive = TRUE) | |
680 } | |
681 | |
682 vector <- rep(0, m) | |
683 vector[unlist(Int)] <- 1 | |
684 if (n > 1) { | |
685 Data <- sweep(Spectrum_data, MARGIN = 2, FUN = "*", vector) # Cropped_Spectrum | |
686 } else { | |
687 Data <- Spectrum_data * vector | |
688 } # Cropped_Spectrum | |
689 } | |
690 | |
691 | |
692 if (method == "thres") { | |
693 TMSPpeaks <- apply(Data, 1, findTMSPpeak, c = c, direction = direction) | |
694 } else { | |
695 TMSPpeaks <- apply(abs(Re(Data)), 1, which.max) | |
696 } | |
697 | |
698 # TMSPpeaks is an column index | |
699 maxpeak <- max(TMSPpeaks) | |
700 minpeak <- min(TMSPpeaks) | |
701 | |
702 | |
703 | |
704 # Shift spectra according to the TMSPpeaks found -------------------------------- | |
705 # Depends on the shiftHandling | |
706 | |
707 if (shiftHandling %in% c("zerofilling", "NAfilling", "cut")) { | |
708 fill <- NA | |
709 if (shiftHandling == "zerofilling") { | |
710 fill <- 0 | |
711 } | |
712 | |
713 start <- maxpeak - 1 | |
714 end <- minpeak - m | |
715 | |
716 ppmScale <- (start:end) * ppmInterval | |
717 | |
718 # check if ppm.value is in the ppmScale interval | |
719 if(ppm.value < min(ppmScale) | ppm.value > max(ppmScale)) { | |
720 warning("ppm.value = ", ppm.value, " is not in the ppm interval [", | |
721 round(min(ppmScale),2), ",", round(max(ppmScale),2), "], and is set to its default ppm.value 0") | |
722 ppm.value = 0 | |
723 } | |
724 | |
725 ppmScale <- ppmScale + ppm.value | |
726 | |
727 Spectrum_data_calib <- matrix(fill, nrow = n, ncol = -(end - start) + 1, | |
728 dimnames = list(rownames(Spectrum_data), ppmScale)) | |
729 for (i in 1:n) { | |
730 shift <- (1 - TMSPpeaks[i]) + start | |
731 Spectrum_data_calib[i, (1 + shift):(m + shift)] <- Spectrum_data[i, ] | |
732 } | |
733 | |
734 if (shiftHandling == "cut") { | |
735 Spectrum_data_calib = as.matrix(stats::na.omit(t(Spectrum_data_calib))) | |
736 Spectrum_data_calib = t(Spectrum_data_calib) | |
737 base::attr(Spectrum_data_calib, "na.action") <- NULL | |
738 } | |
739 | |
740 | |
741 } else { | |
742 # circular | |
743 start <- 1 - maxpeak | |
744 end <- m - maxpeak | |
745 | |
746 ppmScale <- (start:end) * ppmInterval | |
747 | |
748 # check if ppm.value in is the ppmScale interval | |
749 if(ppm.value < min(ppmScale) | ppm.value > max(ppmScale)) { | |
750 warning("ppm.value = ", ppm.value, " is not in the ppm interval [", | |
751 round(min(ppmScale),2), ",", round(max(ppmScale),2), "], and is set to its default ppm.value 0") | |
752 ppm.value = 0 | |
753 } | |
754 ppmScale <- ppmScale + ppm.value | |
755 | |
756 Spectrum_data_calib <- matrix(nrow=n, ncol=end-start+1, | |
757 dimnames=list(rownames(Spectrum_data), ppmScale)) | |
758 for (i in 1:n) { | |
759 shift <- (maxpeak-TMSPpeaks[i]) | |
760 Spectrum_data_calib[i,(1+shift):m] <- Spectrum_data[i,1:(m-shift)] | |
761 if (shift > 0) { | |
762 Spectrum_data_calib[i,1:shift] <- Spectrum_data[i,(m-shift+1):m] | |
763 } | |
764 } | |
765 } | |
766 | |
767 | |
768 | |
769 | |
770 # Plot of the spectra --------------------------------------------------- | |
771 | |
772 ppm = xstart = value = xend = Legend = NULL # only for R CMD check | |
773 | |
774 | |
775 # with the search zone for TMSP and the location of the peaks just found | |
776 if (!is.null(rowindex_graph)) { | |
777 | |
778 if (range == "window") { | |
779 if (ppm.ir == TRUE) { | |
780 fromto <- fromto.RC | |
781 } else { | |
782 fromto <- list() | |
783 idcol <- as.numeric(colnames(Spectrum_data)) | |
784 for (i in 1:length(fromto.RC)) { | |
785 fromto[[i]] <- as.numeric(colnames(Spectrum_data))[fromto.RC[[i]]] | |
786 } | |
787 } | |
788 } else { | |
789 fromto <- fromto.RC | |
790 } | |
791 | |
792 # TMSPloc in ppm | |
793 TMSPloc <- as.numeric(colnames(Spectrum_data))[TMSPpeaks[rowindex_graph]] | |
794 | |
795 # num plot per window | |
796 num.stacked <- 6 | |
797 | |
798 # rectanglar bands of color for the search zone | |
799 rects <- data.frame(xstart = sapply(fromto, function(x) x[[1]]), | |
800 xend = sapply(fromto, function(x) x[[2]]), | |
801 Legend = "TMSP search zone and location") | |
802 | |
803 # vlines for TMSP peak | |
804 addlines <- data.frame(rowname = rownames(Spectrum_data)[rowindex_graph],TMSPloc) | |
805 | |
806 nn <- length(rowindex_graph) | |
807 i <- 1 | |
808 j <- 1 | |
809 plots <- vector(mode = "list", length = ceiling(nn/num.stacked)) | |
810 | |
811 while (i <= nn) { | |
812 | |
813 last <- min(i + num.stacked - 1, nn) | |
814 | |
815 melted <- reshape2::melt(Re(Spectrum_data[i:last, ]), | |
816 varnames = c("rowname", "ppm")) | |
817 | |
818 plots[[j]] <- ggplot2::ggplot() + ggplot2::theme_bw() + | |
819 ggplot2::geom_line(data = melted, | |
820 ggplot2::aes(x = ppm, y = value)) + | |
821 ggplot2::geom_rect(data = rects, ggplot2::aes(xmin = xstart, xmax = xend, | |
822 ymin = -Inf, ymax = Inf, fill = Legend), alpha = 0.4) + | |
823 ggplot2::facet_grid(rowname ~ ., scales = "free_y") + | |
824 ggplot2::theme(legend.position = "none") + | |
825 ggplot2::geom_vline(data = addlines, ggplot2::aes(xintercept = TMSPloc), | |
826 color = "red", show.legend = TRUE) + | |
827 ggplot2::ggtitle("TMSP peak search zone and location") + | |
828 ggplot2::theme(legend.position = "top", legend.text = ggplot2::element_text()) | |
829 | |
830 | |
831 | |
832 if ((melted[1, "ppm"] - melted[(dim(melted)[1]), "ppm"]) > 0) { | |
833 plots[[j]] <- plots[[j]] + ggplot2::scale_x_reverse() | |
834 } | |
835 | |
836 i <- last + 1 | |
837 j <- j + 1 | |
838 } | |
839 | |
840 plots | |
841 } | |
842 | |
843 | |
844 # Return the results ---------------------------------------------- | |
845 Spectrum_data <- endTreatment("InternalReferencing", begin_info, Spectrum_data_calib) | |
846 | |
847 if (is.null(plots)) { | |
848 return(Spectrum_data) | |
849 } else { | |
850 return(list(Spectrum_data = Spectrum_data, plots = plots)) | |
851 } | |
852 | |
853 } | |
854 | |
855 ## ==================================================== | |
856 # ZeroOrderPhaseCorrection | |
857 ## ==================================================== | |
858 | |
859 ZeroOrderPhaseCorrection <- function(Spectrum_data, type.zopc = c("rms", "manual", "max"), | |
860 plot_rms = NULL, returnAngle = FALSE, createWindow = TRUE, | |
861 angle = NULL, plot_spectra = FALSE, | |
862 ppm.zopc = TRUE, exclude.zopc = list(c(5.1,4.5))) { | |
863 | |
864 | |
865 # Data initialisation and checks ---------------------------------------------- | |
866 | |
867 # Entry arguments definition: | |
868 # plot_rms : graph of rms criterion returnAngle : if TRUE, returns avector of | |
869 # optimal angles createWindow : for plot_rms plots angle : If angle is not NULL, | |
870 # spectra are rotated according to the angle vector values | |
871 # plot_spectra : if TRUE, plot rotated spectra | |
872 | |
873 | |
874 | |
875 begin_info <- beginTreatment("ZeroOrderPhaseCorrection", Spectrum_data) | |
876 Spectrum_data <- begin_info[["Signal_data"]] | |
877 n <- nrow(Spectrum_data) | |
878 m <- ncol(Spectrum_data) | |
879 | |
880 rnames <- rownames(Spectrum_data) | |
881 | |
882 # Check input arguments | |
883 type.zopc <- match.arg(type.zopc) | |
884 checkArg(ppm.zopc, c("bool")) | |
885 checkArg(unlist(exclude.zopc), c("num"), can.be.null = TRUE) | |
886 | |
887 | |
888 # type.zopc in c("max", "rms") ----------------------------------------- | |
889 if (type.zopc %in% c("max", "rms")) { | |
890 # angle is found by optimization | |
891 | |
892 # rms function to be optimised | |
893 rms <- function(ang, y, meth = c("max", "rms")) { | |
894 # if (debug_plot) { graphics::abline(v=ang, col='gray60') } | |
895 roty <- y * exp(complex(real = 0, imaginary = ang)) # spectrum rotation | |
896 Rey <- Re(roty) | |
897 | |
898 if (meth == "rms") { | |
899 ReyPos <- Rey[Rey >= 0] # select positive intensities | |
900 POSss <- sum((ReyPos)^2, na.rm = TRUE) # SS for positive intensities | |
901 ss <- sum((Rey)^2, na.rm = TRUE) # SS for all intensities | |
902 return(POSss/ss) # criterion : SS for positive values / SS for all intensities | |
903 } else { | |
904 maxi <- max(Rey, na.rm = TRUE) | |
905 return(maxi) | |
906 } | |
907 } | |
908 | |
909 | |
910 # Define the interval where to search for (by defining Data) | |
911 if (is.null(exclude.zopc)) { | |
912 Data <- Spectrum_data | |
913 } else { | |
914 | |
915 # if ppm.zopc == TRUE, then exclude.zopc is in the colnames values, else, in the column | |
916 # index | |
917 if (ppm.zopc == TRUE) { | |
918 colindex <- as.numeric(colnames(Spectrum_data)) | |
919 } else { | |
920 colindex <- 1:m | |
921 } | |
922 | |
923 # Second check for the argument exclude.zopc | |
924 diff <- diff(unlist(exclude.zopc))[1:length(diff(unlist(exclude.zopc)))%%2 !=0] | |
925 for (i in 1:length(diff)) { | |
926 if (ppm.zopc == TRUE & diff[i] >= 0) { | |
927 stop(paste("Invalid region removal because from <= to in ppm.zopc")) | |
928 } else if (ppm.zopc == FALSE & diff[i] <= 0) {stop(paste("Invalid region removal because from >= to in column index"))} | |
929 } | |
930 | |
931 | |
932 Int <- vector("list", length(exclude.zopc)) | |
933 for (i in 1:length(exclude.zopc)) { | |
934 Int[[i]] <- indexInterval(colindex, from = exclude.zopc[[i]][1], | |
935 to = exclude.zopc[[i]][2], inclusive = TRUE) | |
936 } | |
937 | |
938 vector <- rep(1, m) | |
939 vector[unlist(Int)] <- 0 | |
940 if (n > 1) { | |
941 Data <- sweep(Spectrum_data, MARGIN = 2, FUN = "*", vector) # Cropped_Spectrum | |
942 } else { | |
943 Data <- Spectrum_data * vector | |
944 } # Cropped_Spectrum | |
945 } | |
946 | |
947 | |
948 # angles computation | |
949 Angle <- c() | |
950 for (k in 1:n) | |
951 { | |
952 # The function is rms is periodic (period 2pi) and it seems that there is a phase | |
953 # x such that rms is unimodal (i.e. decreasing then increasing) on the interval | |
954 # [x; x+2pi]. However, if we do the optimization for example on [x-pi; x+pi], | |
955 # instead of being decreasing then increasing, it might be increasing then | |
956 # decreasing in which case optimize, thinking it is a valley will have to choose | |
957 # between the left or the right of this hill and if it chooses wrong, it will end | |
958 # up at like x-pi while the minimum is close to x+pi. | |
959 | |
960 # Supposing that rms is unimodal, the classical 1D unimodal optimization will | |
961 # work in either [-pi;pi] or [0;2pi] (this is not easy to be convinced by that I | |
962 # agree) and we can check which one it is simply by the following trick | |
963 | |
964 f0 <- rms(0, Data[k, ],meth = type.zopc) | |
965 fpi <- rms(pi, Data[k, ], meth = type.zopc) | |
966 if (f0 < fpi) { | |
967 interval <- c(-pi, pi) | |
968 } else { | |
969 interval <- c(0, 2 * pi) | |
970 } | |
971 | |
972 # graphs of rms criteria | |
973 debug_plot <- F # rms should not plot anything now, only when called by optimize | |
974 if (!is.null(plot_rms) && rnames[k] %in% plot_rms) { | |
975 x <- seq(min(interval), max(interval), length.out = 100) | |
976 y <- rep(1, 100) | |
977 for (K in (1:100)) { | |
978 y[K] <- rms(x[K], Data[k, ], meth = type.zopc) | |
979 } | |
980 if (createWindow == TRUE) { | |
981 grDevices::dev.new(noRStudioGD = FALSE) | |
982 } | |
983 graphics::plot(x, y, main = paste("Criterion maximization \n", | |
984 rownames(Data)[k]), ylim = c(0, 1.1), | |
985 ylab = "positiveness criterion", xlab = "angle ") | |
986 debug_plot <- T | |
987 } | |
988 | |
989 # Best angle | |
990 best <- stats::optimize(rms, interval = interval, maximum = TRUE, | |
991 y = Data[k,], meth = type.zopc) | |
992 ang <- best[["maximum"]] | |
993 | |
994 | |
995 if (debug_plot) { | |
996 graphics::abline(v = ang, col = "black") | |
997 graphics::text(x = (ang+0.1*ang), y = (y[ang]-0.1*y[ang]), labels = round(ang, 3)) | |
998 } | |
999 | |
1000 # Spectrum rotation | |
1001 Spectrum_data[k, ] <- Spectrum_data[k, ] * exp(complex(real = 0, imaginary = ang)) | |
1002 Angle <- c(Angle, ang) | |
1003 } | |
1004 | |
1005 | |
1006 | |
1007 | |
1008 } else { | |
1009 # type.zopc is "manual" ------------------------------------------------------- | |
1010 # if Angle is already specified and no optimisation is needed | |
1011 Angle <- angle | |
1012 | |
1013 if (!is.vector(angle)) { | |
1014 stop("angle is not a vector") | |
1015 } | |
1016 | |
1017 if (!is.numeric(angle)) { | |
1018 stop("angle is not a numeric") | |
1019 } | |
1020 | |
1021 if (length(angle) != n) { | |
1022 stop(paste("angle has length", length(angle), "and there are", n, "spectra to rotate.")) | |
1023 } | |
1024 for (k in 1:n) { | |
1025 Spectrum_data[k, ] <- Spectrum_data[k, ] * exp(complex(real = 0, imaginary = - angle[k])) | |
1026 } | |
1027 } | |
1028 | |
1029 | |
1030 # Draw spectra | |
1031 if (plot_spectra == TRUE) { | |
1032 nn <- ceiling(n/4) | |
1033 i <- 1 | |
1034 for (k in 1:nn) { | |
1035 if (createWindow == TRUE) { | |
1036 grDevices::dev.new(noRStudioGD = FALSE) | |
1037 } | |
1038 graphics::par(mfrow = c(4, 2)) | |
1039 while (i <= n) { | |
1040 last <- min(i + 4 - 1, n) | |
1041 graphics::plot(Re(Spectrum_data[i, ]), type = "l", ylab = "intensity", | |
1042 xlab = "Index", main = paste0(rownames(Spectrum_data)[i], " - Real part")) | |
1043 graphics::plot(Im(Spectrum_data[i, ]), type = "l", ylab = "intensity", | |
1044 xlab = "Index", main = paste0(rownames(Spectrum_data)[i], " - Imaginary part")) | |
1045 i <- i + 1 | |
1046 } | |
1047 i <- last + 1 | |
1048 } | |
1049 } | |
1050 | |
1051 | |
1052 # Data finalisation ---------------------------------------------- | |
1053 | |
1054 Spectrum_data <- endTreatment("ZeroOrderPhaseCorrection", begin_info, Spectrum_data) | |
1055 if (returnAngle) { | |
1056 return(list(Spectrum_data = Spectrum_data, Angle = Angle)) | |
1057 } else { | |
1058 return(Spectrum_data) | |
1059 } | |
1060 } | |
1061 | |
1062 | |
1063 ## ==================================================== | |
1064 # Baseline Correction | |
1065 ## ==================================================== | |
1066 BaselineCorrection <- function(Spectrum_data, ptw.bc = TRUE, maxIter = 42, | |
1067 lambda.bc = 1e+07, p.bc = 0.05, eps = 1e-08, | |
1068 ppm.bc = TRUE, exclude.bc = list(c(5.1,4.5)), | |
1069 returnBaseline = F) { | |
1070 | |
1071 # Data initialisation ---------------------------------------------- | |
1072 begin_info <- beginTreatment("BaselineCorrection", Spectrum_data, force.real = T) | |
1073 Spectrum_data <- begin_info[["Signal_data"]] | |
1074 p <- p.bc | |
1075 lambda <- lambda.bc | |
1076 n <- dim(Spectrum_data)[1] | |
1077 m <- dim(Spectrum_data)[2] | |
1078 | |
1079 | |
1080 # Data check | |
1081 checkArg(ptw.bc, c("bool")) | |
1082 checkArg(maxIter, c("int", "pos")) | |
1083 checkArg(lambda, c("num", "pos0")) | |
1084 checkArg(p.bc, c("num", "pos0")) | |
1085 checkArg(eps, c("num", "pos0")) | |
1086 checkArg(returnBaseline, c("bool")) | |
1087 checkArg(ppm.bc, c("bool")) | |
1088 checkArg(unlist(exclude.bc), c("num"), can.be.null = TRUE) | |
1089 | |
1090 # Define the interval where to search for (by defining Data) | |
1091 if (is.null(exclude.bc)) { | |
1092 exclude_index <- NULL | |
1093 } else { | |
1094 # if ppm.bc == TRUE, then exclude.bc is in the colnames values, else, in the column | |
1095 # index | |
1096 if (ppm.bc == TRUE) { | |
1097 colindex <- as.numeric(colnames(Spectrum_data)) | |
1098 } else { | |
1099 colindex <- 1:m | |
1100 } | |
1101 | |
1102 Int <- vector("list", length(exclude.bc)) | |
1103 for (i in 1:length(exclude.bc)) { | |
1104 Int[[i]] <- indexInterval(colindex, from = exclude.bc[[i]][1], | |
1105 to = exclude.bc[[i]][2], inclusive = TRUE) | |
1106 } | |
1107 exclude_index <- unlist(Int) | |
1108 } | |
1109 | |
1110 # Baseline Correction implementation definition ---------------------- | |
1111 | |
1112 # 2 Ways: either use the function asysm from the ptw package or by | |
1113 # built-in functions | |
1114 if (ptw.bc) { | |
1115 asysm <- ptw::asysm | |
1116 } else { | |
1117 difsmw <- function(y, lambda, w, d) { | |
1118 # Weighted smoothing with a finite difference penalty cf Eilers, 2003. | |
1119 # (A perfect smoother) | |
1120 # y: signal to be smoothed | |
1121 # lambda: smoothing parameter | |
1122 # w: weights (use0 zeros for missing values) | |
1123 # d: order of differences in penalty (generally 2) | |
1124 m <- length(y) | |
1125 W <- Matrix::Diagonal(x=w) | |
1126 E <- Matrix::Diagonal(m) | |
1127 D <- Matrix::diff(E, differences = d) | |
1128 C <- Matrix::chol(W + lambda * t(D) %*% D) | |
1129 x <- Matrix::solve(C, Matrix::solve(t(C), w * y)) | |
1130 return(as.numeric(x)) | |
1131 | |
1132 } | |
1133 asysm <- function(y, lambda, p, eps, exclude_index) { | |
1134 # Baseline estimation with asymmetric least squares | |
1135 # y: signal | |
1136 # lambda: smoothing parameter (generally 1e5 to 1e8) | |
1137 # p: asymmetry parameter (generally 0.001) | |
1138 # d: order of differences in penalty (generally 2) | |
1139 # eps: 1e-8 in ptw package | |
1140 m <- length(y) | |
1141 w <- rep(1, m) | |
1142 i <- 1 | |
1143 repeat { | |
1144 z <- difsmw(y, lambda, w, d = 2) | |
1145 w0 <- w | |
1146 p_vect <- rep((1-p), m) # if y <= z + eps | |
1147 p_vect[y > z + eps | y < 0] <- p # if y > z + eps | y < 0 | |
1148 if(!is.null(exclude_index)){ | |
1149 p_vect[exclude_index] <- 0 # if exclude area | |
1150 } | |
1151 | |
1152 w <- p_vect | |
1153 # w <- p * (y > z + eps | y < 0) + (1 - p) * (y <= z + eps) | |
1154 | |
1155 if (sum(abs(w - w0)) == 0) { | |
1156 break | |
1157 } | |
1158 i <- i + 1 | |
1159 if (i > maxIter) { | |
1160 warning("cannot find Baseline estimation in asysm") | |
1161 break | |
1162 } | |
1163 } | |
1164 return(z) | |
1165 } | |
1166 } | |
1167 | |
1168 # Baseline estimation ---------------------------------------------- | |
1169 Baseline <- matrix(NA, nrow = nrow(Spectrum_data), ncol = ncol(Spectrum_data)) | |
1170 | |
1171 # for (k in 1:n) { | |
1172 # Baseline[k, ] <- asysm(y = Spectrum_data[k, ], lambda = lambda, p = p, eps = eps) | |
1173 | |
1174 if (ptw.bc ){ | |
1175 Baseline <- apply(Spectrum_data,1, asysm, lambda = lambda, p = p, | |
1176 eps = eps) | |
1177 }else { | |
1178 Baseline <- apply(Spectrum_data,1, asysm, lambda = lambda, p = p, | |
1179 eps = eps, exclude_index = exclude_index) | |
1180 } | |
1181 | |
1182 | |
1183 Spectrum_data <- Spectrum_data - t(Baseline) | |
1184 # } | |
1185 | |
1186 # Data finalisation ---------------------------------------------- | |
1187 Spectrum_data <- endTreatment("BaselineCorrection", begin_info, Spectrum_data) # FIXME create removeImaginary filter ?? | |
1188 | |
1189 if (returnBaseline) { | |
1190 return(list(Spectrum_data = Spectrum_data, Baseline = Baseline)) | |
1191 } else { | |
1192 return(Spectrum_data) | |
1193 } | |
1194 } | |
1195 | |
1196 | |
1197 | |
1198 ## ==================================================== | |
1199 # NegativeValuesZeroing | |
1200 ## ==================================================== | |
1201 | |
1202 NegativeValuesZeroing <- function(Spectrum_data) { | |
1203 # Data initialisation and checks ---------------------------------------------- | |
1204 begin_info <- beginTreatment("NegativeValuesZeroing", Spectrum_data, force.real = T) | |
1205 Spectrum_data <- begin_info[["Signal_data"]] | |
1206 | |
1207 # NegativeValuesZeroing ---------------------------------------------- | |
1208 Spectrum_data[Spectrum_data < 0] <- 0 | |
1209 | |
1210 # Data finalisation ---------------------------------------------- | |
1211 return(endTreatment("NegativeValuesZeroing", begin_info, Spectrum_data)) | |
1212 } | |
1213 | |
1214 |