Mercurial > repos > marie-tremblay-metatoul > nmr_annotation
comparison nmr_preprocessing/NmrPreprocessing_script.R @ 2:7304ec2c9ab7 draft
Uploaded
author | marie-tremblay-metatoul |
---|---|
date | Mon, 30 Jul 2018 10:33:03 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1:b55559a2854f | 2:7304ec2c9ab7 |
---|---|
1 ## ========================== | |
2 # Internal functions | |
3 ## ========================== | |
4 | |
5 # beginTreatment | |
6 beginTreatment <- function(name, Signal_data = NULL, Signal_info = NULL, | |
7 force.real = FALSE) { | |
8 | |
9 cat("Begin", name, "\n") | |
10 | |
11 | |
12 # Formatting the Signal_data and Signal_info ----------------------- | |
13 | |
14 vec <- is.vector(Signal_data) | |
15 if (vec) { | |
16 Signal_data <- vec2mat(Signal_data) | |
17 } | |
18 if (is.vector(Signal_info)) { | |
19 Signal_info <- vec2mat(Signal_info) | |
20 } | |
21 if (!is.null(Signal_data)) { | |
22 if (!is.matrix(Signal_data)) { | |
23 stop("Signal_data is not a matrix.") | |
24 } | |
25 if (!is.complex(Signal_data) && !is.numeric(Signal_data)) { | |
26 stop("Signal_data contains non-numerical values.") | |
27 } | |
28 } | |
29 if (!is.null(Signal_info) && !is.matrix(Signal_info)) { | |
30 stop("Signal_info is not a matrix.") | |
31 } | |
32 | |
33 | |
34 Original_data <- Signal_data | |
35 | |
36 # Extract the real part of the spectrum --------------------------- | |
37 | |
38 if (force.real) { | |
39 if (is.complex(Signal_data)) { | |
40 Signal_data <- Re(Signal_data) | |
41 } else { | |
42 # The signal is numeric Im(Signal_data) is zero anyway so let's avoid | |
43 # using complex(real=...,imaginary=0) which would give a complex signal | |
44 # in endTreatment() | |
45 force.real <- FALSE | |
46 } | |
47 } | |
48 | |
49 | |
50 # Return the formatted data and metadata entries -------------------- | |
51 | |
52 return(list(start = proc.time(), vec = vec, force.real = force.real, | |
53 Original_data = Original_data, Signal_data = Signal_data, Signal_info = Signal_info)) | |
54 } | |
55 | |
56 # endTreatment | |
57 endTreatment <- function(name, begin_info, Signal_data) { | |
58 | |
59 # begin_info: object outputted from beginTreatment | |
60 | |
61 | |
62 # Formatting the entries and printing process time ----------------------- | |
63 end_time <- proc.time() # record it as soon as possible | |
64 start_time <- begin_info[["start"]] | |
65 delta_time <- end_time - start_time | |
66 delta <- delta_time[] | |
67 cat("End", name, "\n") | |
68 cat("It lasted", round(delta["user.self"], 3), "s user time,", round(delta["sys.self"],3), | |
69 "s system time and", round(delta["elapsed"], 3), "s elapsed time.\n") | |
70 | |
71 | |
72 if (begin_info[["force.real"]]) { | |
73 # The imaginary part is left untouched | |
74 i <- complex(real = 0, imaginary = 1) | |
75 Signal_data <- Signal_data + i * Im(begin_info[["Original_data"]]) | |
76 } | |
77 | |
78 if (begin_info[["vec"]]) { | |
79 Signal_data <- Signal_data[1, ] | |
80 } | |
81 | |
82 # Return the formatted data and metadata entries -------------------- | |
83 return(Signal_data) | |
84 } | |
85 | |
86 # checkArg | |
87 checkArg <- function(arg, checks, can.be.null=FALSE) { | |
88 check.list <- list(bool=c(is.logical, "a boolean"), | |
89 int =c(function(x){x%%1==0}, "an integer"), | |
90 num =c(is.numeric, "a numeric"), | |
91 str =c(is.character, "a string"), | |
92 pos =c(function(x){x>0}, "positive"), | |
93 pos0=c(function(x){x>=0}, "positive or zero"), | |
94 l1 =c(function(x){length(x)==1}, "of length 1") | |
95 ) | |
96 if (is.null(arg)) { | |
97 if (!can.be.null) { | |
98 stop(deparse(substitute(arg)), " is null.") | |
99 } | |
100 } else { | |
101 if (is.matrix(arg)) { | |
102 stop(deparse(substitute(arg)), " is not scalar.") | |
103 } | |
104 for (c in checks) { | |
105 if (!check.list[[c]][[1]](arg)) { | |
106 stop(deparse(substitute(arg)), " is not ", check.list[[c]][[2]], ".") | |
107 } | |
108 } | |
109 } | |
110 } | |
111 | |
112 # getArg | |
113 getArg <- function(arg, info, argname, can.be.absent=FALSE) { | |
114 if (is.null(arg)) { | |
115 start <- paste("impossible to get argument", argname, "it was not given directly and"); | |
116 if (!is.matrix(info)) { | |
117 stop(paste(start, "the info matrix was not given")) | |
118 } | |
119 if (!(argname %in% colnames(info))) { | |
120 if (can.be.absent) { | |
121 return(NULL) | |
122 } else { | |
123 stop(paste(start, "is not in the info matrix")) | |
124 } | |
125 } | |
126 if (nrow(info) < 1) { | |
127 stop(paste(start, "the info matrix has no row")) | |
128 } | |
129 arg <- info[1,argname] | |
130 if (is.na(arg)) { | |
131 stop(paste(start, "it is NA in the info matrix")) | |
132 } | |
133 } | |
134 return(arg) | |
135 } | |
136 | |
137 # binarySearch | |
138 binarySearch <- function(a, target, lower = TRUE) { | |
139 # search the index i in a such that a[i] == target | |
140 # if it doesn't exists and lower, it searches the closer a[i] such that a[i] < target | |
141 # if !lower, it seraches the closer a[i] such that a[i] > target | |
142 # a should be monotone but can be increasing or decreasing | |
143 | |
144 # if a is increasing INVARIANT: a[amin] < target < a[amax] | |
145 N <- length(a) | |
146 if ((a[N] - target) * (a[N] - a[1]) <= 0) { | |
147 return(N) | |
148 } | |
149 if ((a[1] - target) * (a[N] - a[1]) >= 0) { | |
150 return(1) | |
151 } | |
152 amin <- 1 | |
153 amax <- N | |
154 while (amin + 1 < amax) { | |
155 amid <- floor((amin + amax)/2) | |
156 if ((a[amid] - target) * (a[amax] - a[amid]) < 0) { | |
157 amin <- amid | |
158 } else if ((a[amid] - target) * (a[amax] - a[amid]) > 0) { | |
159 amax <- amid | |
160 } else { | |
161 # a[amid] == a[amax] or a[amid] == target In both cases, a[amid] == | |
162 # target | |
163 return(amid) | |
164 } | |
165 } | |
166 if (xor(lower, a[amin] > a[amax])) { | |
167 # (lower && a[amin] < a[amax]) || (!lower && a[min] > a[max]) | |
168 # If increasing and we want the lower, we take amin | |
169 # If decreasing and we want the bigger, we take amin too | |
170 return(amin) | |
171 } else { | |
172 return(amax) | |
173 } | |
174 } | |
175 | |
176 # Interpol | |
177 Interpol <- function(t, y) { | |
178 # y: sample | |
179 # t : warping function | |
180 | |
181 m <- length(y) | |
182 # t <= m-1 | |
183 # because if t > m-1, y[ti+1] will be NA when we compute g | |
184 valid <- 1 <= t & t <= m-1 # FIXME it was '<' in Bubble v2 | |
185 s <- (1:m)[valid] | |
186 ti <- floor(t[s]) | |
187 tr <- t[s] - ti | |
188 g <- y[ti + 1] - y[ti] | |
189 f <- y[ti] + tr * g | |
190 list(f=f, s=s, g=g) | |
191 } | |
192 | |
193 # vec2mat | |
194 vec2mat <- function(vec) { | |
195 return(matrix(vec, nrow = 1, dimnames = list(c(1), names(vec)))) | |
196 | |
197 } | |
198 | |
199 # binarySearch | |
200 binarySearch <- function(a, target, lower = TRUE) { | |
201 # search the index i in a such that a[i] == target | |
202 # if it doesn't exists and lower, it searches the closer a[i] such that a[i] < target | |
203 # if !lower, it seraches the closer a[i] such that a[i] > target | |
204 # a should be monotone but can be increasing or decreasing | |
205 | |
206 # if a is increasing INVARIANT: a[amin] < target < a[amax] | |
207 N <- length(a) | |
208 if ((a[N] - target) * (a[N] - a[1]) <= 0) { | |
209 return(N) | |
210 } | |
211 if ((a[1] - target) * (a[N] - a[1]) >= 0) { | |
212 return(1) | |
213 } | |
214 amin <- 1 | |
215 amax <- N | |
216 while (amin + 1 < amax) { | |
217 amid <- floor((amin + amax)/2) | |
218 if ((a[amid] - target) * (a[amax] - a[amid]) < 0) { | |
219 amin <- amid | |
220 } else if ((a[amid] - target) * (a[amax] - a[amid]) > 0) { | |
221 amax <- amid | |
222 } else { | |
223 # a[amid] == a[amax] or a[amid] == target In both cases, a[amid] == | |
224 # target | |
225 return(amid) | |
226 } | |
227 } | |
228 if (xor(lower, a[amin] > a[amax])) { | |
229 # (lower && a[amin] < a[amax]) || (!lower && a[min] > a[max]) | |
230 # If increasing and we want the lower, we take amin | |
231 # If decreasing and we want the bigger, we take amin too | |
232 return(amin) | |
233 } else { | |
234 return(amax) | |
235 } | |
236 } | |
237 | |
238 | |
239 # indexInterval | |
240 indexInterval <- function (a, from, to, inclusive=TRUE) { | |
241 # If inclusive and from <= to, we need to take the lower | |
242 # If not inclusive and from > to, we need to take the lower too | |
243 lowerFrom <- xor(inclusive, from > to) | |
244 fromIndex <- binarySearch(a, from, lowerFrom) | |
245 toIndex <- binarySearch(a, to, !lowerFrom) | |
246 return(fromIndex:toIndex) | |
247 } | |
248 | |
249 | |
250 | |
251 ## ========================== | |
252 # GroupDelayCorrection | |
253 ## ========================== | |
254 GroupDelayCorrection <- function(Fid_data, Fid_info = NULL, group_delay = NULL) { | |
255 | |
256 | |
257 # Data initialisation and checks ---------------------------------------------- | |
258 | |
259 begin_info <- beginTreatment("GroupDelayCorrection", Fid_data, Fid_info) | |
260 Fid_data <- begin_info[["Signal_data"]] | |
261 dimension_names <- dimnames(Fid_data) | |
262 Fid_info <- begin_info[["Signal_info"]] | |
263 checkArg(group_delay, c("num", "pos0"), can.be.null = TRUE) | |
264 # if Fid_info and group_delay are NULL, getArg will generate an error | |
265 | |
266 group_delay <- getArg(group_delay, Fid_info, "GRPDLY", can.be.absent = TRUE) | |
267 | |
268 if (is.null(group_delay)) { | |
269 | |
270 # See DetermineBrukerDigitalFilter.m in matNMR MATLAB library | |
271 group_delay_matrix <- matrix(c(44.75, 46, 46.311, 33.5, 36.5, 36.53, 66.625, | |
272 48, 47.87, 59.0833, 50.1667, 50.229, 68.5625, 53.25, 53.289, 60.375, | |
273 69.5, 69.551, 69.5313, 72.25, 71.6, 61.0208, 70.1667, 70.184, 70.0156, | |
274 72.75, 72.138, 61.3438, 70.5, 70.528, 70.2578, 73, 72.348, 61.5052, 70.6667, | |
275 70.7, 70.3789, 72.5, 72.524, 61.5859, 71.3333, NA, 70.4395, 72.25, NA, | |
276 61.6263, 71.6667, NA, 70.4697, 72.125, NA, 61.6465, 71.8333, NA, 70.4849, | |
277 72.0625, NA, 61.6566, 71.9167, NA, 70.4924, 72.0313, NA), nrow = 21, | |
278 ncol = 3, byrow = TRUE, dimnames = list(c(2, 3, 4, 6, 8, 12, 16, 24, | |
279 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536, 2048), | |
280 c(10, 11, 12))) | |
281 decim <- Fid_info[1, "DECIM"] | |
282 dspfvs <- Fid_info[1, "DSPFVS"] | |
283 if (!(toString(decim) %in% rownames(group_delay_matrix))) { | |
284 stop(paste("Invalid DECIM", decim, "it should be one of", rownames(group_delay_matrix))) | |
285 } | |
286 if (!(toString(dspfvs) %in% colnames(group_delay_matrix))) { | |
287 stop(paste("Invalid DSPFVS", dspfvs, "it should be one of", colnames(group_delay_matrix))) | |
288 } | |
289 group_delay <- group_delay_matrix[toString(decim), toString(dspfvs)] | |
290 if (is.na(group_delay)) { | |
291 stop(paste("Invalid DECIM", decim, "for DSPFVS", dspfvs)) | |
292 } | |
293 } | |
294 m <- ncol(Fid_data) | |
295 n <- nrow(Fid_data) | |
296 | |
297 # GroupDelayCorrection ---------------------------------------------- | |
298 | |
299 # We do the shifting in the Fourier domain because the shift can be non-integer. | |
300 # That way we automatically have the circular behaviour of the shift and the | |
301 # interpolation if it is non-integer. | |
302 | |
303 Spectrum <- t(stats::mvfft(t(Fid_data))) | |
304 | |
305 # Spectrum <- FourierTransform(Fid_data, Fid_info) | |
306 p <- ceiling(m/2) | |
307 new_index <- c((p + 1):m, 1:p) | |
308 Spectrum <- Spectrum[,new_index] | |
309 Spectrum <- matrix(data = Spectrum, ncol = m, nrow = n) | |
310 | |
311 Omega <- (0:(m - 1))/m | |
312 i <- complex(real = 0, imaginary = 1) | |
313 | |
314 if (n>1) { | |
315 Spectrum <- sweep(Spectrum, MARGIN = 2, exp(i * group_delay * 2 * pi * Omega), `*`) | |
316 Spectrum <- Spectrum[,new_index] | |
317 }else { | |
318 Spectrum <- Spectrum* exp(i * group_delay * 2 * pi * Omega) | |
319 Spectrum <- Spectrum[new_index] | |
320 Spectrum <- matrix(data = Spectrum, ncol = m, nrow = n) | |
321 } | |
322 | |
323 | |
324 Fid_data <- t(stats::mvfft(t(Spectrum), inverse = TRUE))/m | |
325 colnames(Fid_data) <- dimension_names[[2]] | |
326 rownames(Fid_data) <- dimension_names[[1]] | |
327 | |
328 # Data finalisation ---------------------------------------------- | |
329 | |
330 return(endTreatment("GroupDelayCorrection", begin_info, Fid_data)) | |
331 } | |
332 | |
333 ## ========================== | |
334 # SolventSuppression | |
335 ## ========================== | |
336 SolventSuppression <- function(Fid_data, lambda.ss = 1e+06, ptw.ss = TRUE, | |
337 plotSolvent = F, returnSolvent = F) { | |
338 | |
339 # Data initialisation and checks ---------------------------------------------- | |
340 | |
341 begin_info <- beginTreatment("SolventSuppression", Fid_data) | |
342 Fid_data <- begin_info[["Signal_data"]] | |
343 checkArg(ptw.ss, c("bool")) | |
344 checkArg(lambda.ss, c("num", "pos0")) | |
345 | |
346 | |
347 # difsm function definition for the smoother ----------------------------------- | |
348 | |
349 if (ptw.ss) { | |
350 # Use of the function in ptw that smoothes signals with a finite difference | |
351 # penalty of order 2 | |
352 difsm <- ptw::difsm | |
353 } else { | |
354 # Or manual implementation based on sparse matrices for large data series (cf. | |
355 # Eilers, 2003. 'A perfect smoother') | |
356 difsm <- function(y, d = 2, lambda) { | |
357 | |
358 m <- length(y) | |
359 # Sparse identity matrix m x m | |
360 E <- Matrix::Diagonal(m) | |
361 D <- Matrix::diff(E, differences = d) | |
362 A <- E + lambda.ss * Matrix::t(D) %*% D | |
363 # base::chol does not take into account that A is sparse and is extremely slow | |
364 C <- Matrix::chol(A) | |
365 x <- Matrix::solve(C, Matrix::solve(Matrix::t(C), y)) | |
366 return(as.numeric(x)) | |
367 } | |
368 } | |
369 | |
370 # Solvent Suppression ---------------------------------------------- | |
371 | |
372 n <- dim(Fid_data)[1] | |
373 if (returnSolvent) { | |
374 SolventRe <- Fid_data | |
375 SolventIm <- Fid_data | |
376 } | |
377 for (i in 1:n) { | |
378 FidRe <- Re(Fid_data[i, ]) | |
379 FidIm <- Im(Fid_data[i, ]) | |
380 solventRe <- difsm(y = FidRe, lambda = lambda.ss) | |
381 solventIm <- difsm(y = FidIm, lambda = lambda.ss) | |
382 | |
383 if (plotSolvent) { | |
384 m <- length(FidRe) | |
385 graphics::plot(1:m, FidRe, type = "l", col = "red") | |
386 graphics::lines(1:m, solventRe, type = "l", col = "blue") | |
387 graphics::plot(1:m, FidIm, type = "l", col = "red") | |
388 graphics::lines(1:m, solventIm, type = "l", col = "blue") | |
389 } | |
390 FidRe <- FidRe - solventRe | |
391 FidIm <- FidIm - solventIm | |
392 Fid_data[i, ] <- complex(real = FidRe, imaginary = FidIm) | |
393 if (returnSolvent) { | |
394 SolventRe[i, ] <- solventRe | |
395 SolventIm[i, ] <- solventIm | |
396 } | |
397 } | |
398 | |
399 | |
400 # Data finalisation ---------------------------------------------- | |
401 | |
402 Fid_data <- endTreatment("SolventSuppression", begin_info, Fid_data) | |
403 if (returnSolvent) { | |
404 return(list(Fid_data = Fid_data, SolventRe = SolventRe, SolventIm = SolventIm)) | |
405 } else { | |
406 return(Fid_data) | |
407 } | |
408 } | |
409 | |
410 | |
411 ## ========================== | |
412 # Apodization | |
413 # ============================= | |
414 Apodization <- function(Fid_data, Fid_info = NULL, DT = NULL, | |
415 type.apod = c("exp","cos2", "blockexp", "blockcos2", | |
416 "gauss", "hanning", "hamming"), phase = 0, rectRatio = 1/2, | |
417 gaussLB = 1, expLB = 1, plotWindow = F, returnFactor = F) { | |
418 | |
419 # Data initialisation and checks ---------------------------------------------- | |
420 begin_info <- beginTreatment("Apodization", Fid_data, Fid_info) | |
421 Fid_data <- begin_info[["Signal_data"]] | |
422 Fid_info <- begin_info[["Signal_info"]] | |
423 # Data check | |
424 type.apod <- match.arg(type.apod) | |
425 checkArg(DT, c("num", "pos"), can.be.null = TRUE) | |
426 checkArg(phase, c("num")) | |
427 | |
428 # Apodization ---------------------------------------------- | |
429 DT <- getArg(DT, Fid_info, "DT") # Dwell Time | |
430 m <- ncol(Fid_data) | |
431 t <- (1:m) * DT # Time | |
432 rectSize <- ceiling(rectRatio * m) | |
433 gaussLB <- (gaussLB/(sqrt(8 * log(2)))) | |
434 # Define the types of apodization: | |
435 switch(type.apod, exp = { | |
436 # exponential | |
437 Factor <- exp(-expLB * t) | |
438 }, cos2 = { | |
439 # cos^2 | |
440 c <- cos((1:m) * pi/(2 * m) - phase * pi/2) | |
441 Factor <- c * c | |
442 }, blockexp = { | |
443 # block and exponential | |
444 Factor <- c(rep.int(1, rectSize), rep.int(0, m - rectSize)) | |
445 # | rectSize | 1 ___________ | \ 0 \____ | |
446 Factor[(rectSize + 1):m] <- exp(-expLB * t[1:(m - rectSize)]) | |
447 }, blockcos2 = { | |
448 # block and cos^2 | |
449 Factor <- c(rep.int(1, rectSize), rep.int(0, m - rectSize)) | |
450 c <- cos((1:(m - rectSize)) * pi/(2 * (m - rectSize))) | |
451 Factor[(rectSize + 1):m] <- c * c | |
452 }, gauss = { | |
453 # gaussian | |
454 Factor <- exp(-(gaussLB * t)^2/2) | |
455 Factor <- Factor/max(Factor) | |
456 }, hanning = { | |
457 # Hanning | |
458 Factor <- 0.5 + 0.5 * cos((1:m) * pi/m - phase * pi) | |
459 }, hamming = { | |
460 # Hamming | |
461 Factor <- 0.54 + 0.46 * cos((1:m) * pi/m - phase * pi) | |
462 }) | |
463 if (plotWindow) { | |
464 graphics::plot(1:m, Factor, "l") | |
465 # dev.off() # device independent, it is the responsability of the | |
466 # caller to do it | |
467 } | |
468 # Apply the apodization factor on the spectra | |
469 Fid_data <- sweep(Fid_data, MARGIN = 2, Factor, `*`) | |
470 | |
471 # Data finalisation ---------------------------------------------- | |
472 Fid_data <- endTreatment("Apodization", begin_info, Fid_data) | |
473 if (returnFactor) { | |
474 return(list(Fid_data = Fid_data, Factor = Factor)) | |
475 } else { | |
476 return(Fid_data) | |
477 } | |
478 } | |
479 | |
480 | |
481 ## ==================================================== | |
482 # FourierTransform | |
483 ## ==================================================== | |
484 | |
485 | |
486 # fftshift1D2D | |
487 fftshift1D2D <- function(x) { | |
488 vec <- F | |
489 if (is.vector(x)) { | |
490 x <- vec2mat(x) | |
491 vec <- T | |
492 } | |
493 m <- dim(x)[2] | |
494 p <- ceiling(m/2) | |
495 new_index <- c((p + 1):m, 1:p) | |
496 y <- x[, new_index, drop = vec] | |
497 } | |
498 | |
499 # FourierTransform | |
500 FourierTransform <- function(Fid_data, Fid_info = NULL, SW_h = NULL, SW = NULL, O1 = NULL, reverse.axis = TRUE) { | |
501 | |
502 # Data initialisation and checks ---------------------------------------------- | |
503 begin_info <- beginTreatment("FourierTransform", Fid_data, Fid_info) | |
504 Fid_data <- begin_info[["Signal_data"]] | |
505 Fid_info <- begin_info[["Signal_info"]] | |
506 | |
507 m <- ncol(Fid_data) | |
508 n <- nrow(Fid_data) | |
509 | |
510 if (is.null(SW_h)) { | |
511 SW_h <- getArg(SW_h, Fid_info, "SW_h") | |
512 } | |
513 | |
514 if (is.null(SW)) { | |
515 SW <- getArg(SW, Fid_info, "SW") # Sweep Width in ppm (semi frequency scale in ppm) | |
516 } | |
517 | |
518 | |
519 if (is.null(O1)) { | |
520 O1 <- getArg(O1, Fid_info, "O1") | |
521 } | |
522 | |
523 | |
524 checkArg(reverse.axis, c("bool")) | |
525 | |
526 # Fourier Transformation ---------------------------------------------- | |
527 # mvfft does the unnormalized fourier transform (see ?mvfft), so we need divide | |
528 # by m. It does not matter a lot in our case since the spectrum will be | |
529 # normalized. | |
530 | |
531 # FT | |
532 RawSpect_data <- fftshift1D2D(t(stats::mvfft(t(Fid_data)))) | |
533 # recover the frequencies values | |
534 f <- ((0:(m - 1)) - floor(m/2)) * Fid_info[1, "SW_h"]/(m-1) | |
535 | |
536 if(reverse.axis == TRUE) { | |
537 revind <- rev(1:m) | |
538 RawSpect_data <- RawSpect_data[,revind] # reverse the spectrum | |
539 } | |
540 | |
541 RawSpect_data <- matrix(RawSpect_data, nrow = n, ncol = m) | |
542 colnames(RawSpect_data) <- f | |
543 rownames(RawSpect_data) <- rownames(Fid_data) | |
544 | |
545 # PPM conversion ---------------------------------------------- | |
546 | |
547 # The Sweep Width has to be the same since the column names are the same | |
548 | |
549 ppmInterval <- SW/(m-1) | |
550 | |
551 O1index = round((m+1)/2+O1*(m - 1) / SW_h) | |
552 | |
553 end <- O1index - m | |
554 start <- O1index -1 | |
555 ppmScale <- (start:end) * ppmInterval | |
556 RawSpect_data <- matrix(RawSpect_data, nrow = n, ncol = -(end - start) + 1, dimnames = | |
557 list(rownames(RawSpect_data), ppmScale)) | |
558 | |
559 | |
560 # Data finalisation ---------------------------------------------- | |
561 return(endTreatment("FourierTransform", begin_info, RawSpect_data)) | |
562 } | |
563 | |
564 ## ==================================================== | |
565 # InternalReferencing | |
566 ## ==================================================== | |
567 | |
568 InternalReferencing <- function(Spectrum_data, Fid_info, method = c("max", "thres"), | |
569 range = c("nearvalue", "all", "window"), ppm.value = 0, | |
570 direction = "left", shiftHandling = c("zerofilling", "cut", | |
571 "NAfilling", "circular"), c = 2, pc = 0.02, fromto.RC = NULL, | |
572 ppm.ir = TRUE, rowindex_graph = NULL) { | |
573 | |
574 | |
575 | |
576 # Data initialisation and checks ---------------------------------------------- | |
577 | |
578 begin_info <- beginTreatment("InternalReferencing", Spectrum_data, Fid_info) | |
579 Spectrum_data <- begin_info[["Signal_data"]] | |
580 Fid_info <- begin_info[["Signal_info"]] | |
581 | |
582 | |
583 ######## Check input arguments | |
584 | |
585 range <- match.arg(range) | |
586 shiftHandling <- match.arg(shiftHandling) | |
587 method <- match.arg(method) | |
588 plots <- NULL | |
589 | |
590 checkArg(ppm.ir, c("bool")) | |
591 checkArg(unlist(fromto.RC), c("num"), can.be.null = TRUE) | |
592 checkArg(pc, c("num")) | |
593 checkArg(ppm.value, c("num")) | |
594 checkArg(rowindex_graph, "num", can.be.null = TRUE) | |
595 | |
596 # fromto.RC : if range == "window", | |
597 # fromto.RC defines the spectral window where to search for the peak | |
598 if (!is.null(fromto.RC)) { | |
599 diff <- diff(unlist(fromto.RC))[1:length(diff(unlist(fromto.RC)))%%2 !=0] | |
600 for (i in 1:length(diff)) { | |
601 if (diff[i] >= 0) { | |
602 fromto <- c(fromto.RC[[i]][2], fromto.RC[[i]][1]) | |
603 fromto.RC[[i]] <- fromto | |
604 } | |
605 } | |
606 } | |
607 | |
608 | |
609 # findTMSPpeak function ---------------------------------------------- | |
610 # If method == "tresh", findTMSPpeak will find the position of the first | |
611 # peak (from left or right) which is higher than a predefined threshold | |
612 # and is computed as: c*(cumulated_mean/cumulated_sd) | |
613 findTMSPpeak <- function(ft, c = 2, direction = "left") { | |
614 ft <- Re(ft) # extraction de la partie réelle | |
615 N <- length(ft) | |
616 if (direction == "left") { | |
617 newindex <- rev(1:N) | |
618 ft <- rev(ft) | |
619 } | |
620 thres <- 99999 | |
621 i <- 1000 # Start at point 1000 to find the peak | |
622 vect <- ft[1:i] | |
623 | |
624 while (vect[i] <= (c * thres)) { | |
625 cumsd <- stats::sd(vect) | |
626 cummean <- mean(vect) | |
627 thres <- cummean + 3 * cumsd | |
628 i <- i + 1 | |
629 vect <- ft[1:i] | |
630 } | |
631 if (direction == "left") { | |
632 v <- newindex[i] | |
633 } else {v <- i} | |
634 | |
635 if (is.na(v)) { | |
636 warning("No peak found, need to lower the threshold.") | |
637 return(NA) | |
638 } else { | |
639 # recherche dans les 1% de points suivants du max trouve pour etre au sommet du | |
640 # pic | |
641 d <- which.max(ft[v:(v + N * 0.01)]) | |
642 new.peak <- v + d - 1 # nouveau pic du TMSP si d > 0 | |
643 | |
644 if (names(which.max(ft[v:(v + N * 0.01)])) != names(which.max(ft[v:(v + N * 0.03)]))) { | |
645 # recherche dans les 3% de points suivants du max trouve pour eviter un faux | |
646 # positif | |
647 warning("the TMSP peak might be located further away, increase the threshold to check.") | |
648 } | |
649 return(new.peak) | |
650 } | |
651 } | |
652 | |
653 | |
654 # Define the search zone ---------------------------------------- | |
655 | |
656 n <- nrow(Spectrum_data) | |
657 m <- ncol(Spectrum_data) | |
658 | |
659 # The Sweep Width (SW) has to be the same since the column names are the same | |
660 SW <- Fid_info[1, "SW"] # Sweep Width in ppm | |
661 ppmInterval <- SW/(m-1) # size of a ppm interval | |
662 | |
663 # range: How the search zone is defined ("all", "nearvalue" or "window") | |
664 if (range == "all") { | |
665 | |
666 Data <- Spectrum_data | |
667 | |
668 } else { # range = "nearvalue" or "window" | |
669 # Need to define colindex (column indexes) to apply indexInterval on it | |
670 | |
671 if (range == "nearvalue") { | |
672 | |
673 fromto.RC <- list(c(-(SW * pc)/2 + ppm.value, (SW * pc)/2 + ppm.value)) # automatic fromto values in ppm | |
674 colindex <- as.numeric(colnames(Spectrum_data)) | |
675 | |
676 } else { | |
677 # range == "window" | |
678 # fromto.RC is already user-defined | |
679 if (ppm.ir == TRUE) { | |
680 colindex <- as.numeric(colnames(Spectrum_data)) | |
681 } else { | |
682 colindex <- 1:m | |
683 } | |
684 } | |
685 | |
686 # index intervals taking into account the different elements in the list fromto.RC | |
687 Int <- vector("list", length(fromto.RC)) | |
688 for (i in 1:length(fromto.RC)) { | |
689 Int[[i]] <- indexInterval(colindex, from = fromto.RC[[i]][1], | |
690 to = fromto.RC[[i]][2], inclusive = TRUE) | |
691 } | |
692 | |
693 # define Data as the cropped spectrum including the index intervals | |
694 # outside the research zone, the intensities are set to the minimal | |
695 # intensity of the research zone | |
696 | |
697 if (n > 1){ | |
698 Data <- apply(Re(Spectrum_data[,unlist(Int)]),1, function(x) rep(min(x), m)) | |
699 Data <- t(Data) | |
700 Data[,unlist(Int)] <- Re(Spectrum_data[,unlist(Int)]) | |
701 } else { | |
702 Data <- rep(min(Re(Spectrum_data)) ,m) | |
703 Data[unlist(Int)] <- Re(Spectrum_data[unlist(Int)]) | |
704 } | |
705 | |
706 } | |
707 | |
708 | |
709 # Apply the peak location search method ('thres' or 'max') on spectra | |
710 # ----------------------------------------------------------------------- | |
711 | |
712 if (method == "thres") { | |
713 TMSPpeaks <- apply(Data, 1, findTMSPpeak, c = c, direction = direction) | |
714 } else { # method == "max | |
715 TMSPpeaks <- apply(Re(Data), 1, which.max) | |
716 } | |
717 | |
718 | |
719 # Shift spectra according to the TMSPpeaks found -------------------------------- | |
720 # Depends on the shiftHandling | |
721 | |
722 # TMSPpeaks is a column index | |
723 maxpeak <- max(TMSPpeaks) # max accross spectra | |
724 minpeak <- min(TMSPpeaks) # min accross spectra | |
725 | |
726 | |
727 if (shiftHandling %in% c("zerofilling", "NAfilling", "cut")) { | |
728 fill <- NA | |
729 if (shiftHandling == "zerofilling") { | |
730 fill <- 0 | |
731 } | |
732 | |
733 start <- maxpeak - 1 | |
734 end <- minpeak - m | |
735 | |
736 # ppm values of each interval for the whole spectral range of the spectral matrix | |
737 ppmScale <- (start:end) * ppmInterval | |
738 | |
739 # check if ppm.value is in the ppmScale interval | |
740 if(ppm.value < min(ppmScale) | ppm.value > max(ppmScale)) { | |
741 warning("ppm.value = ", ppm.value, " is not in the ppm interval [", | |
742 round(min(ppmScale),2), ",", round(max(ppmScale),2), "], and is set to its default ppm.value 0") | |
743 ppm.value = 0 | |
744 } | |
745 | |
746 # if ppm.value != 0, ppmScale is adapted | |
747 ppmScale <- ppmScale + ppm.value | |
748 | |
749 # create the spectral matrix with realigned spectra | |
750 Spectrum_data_calib <- matrix(fill, nrow = n, ncol = -(end - start) + 1, | |
751 dimnames = list(rownames(Spectrum_data), ppmScale)) | |
752 | |
753 # fills in Spectrum_data_calib with shifted spectra | |
754 for (i in 1:n) { | |
755 shift <- (1 - TMSPpeaks[i]) + start | |
756 Spectrum_data_calib[i, (1 + shift):(m + shift)] <- Spectrum_data[i, ] | |
757 } | |
758 | |
759 if (shiftHandling == "cut") { | |
760 Spectrum_data_calib = as.matrix(stats::na.omit(t(Spectrum_data_calib))) | |
761 Spectrum_data_calib = t(Spectrum_data_calib) | |
762 base::attr(Spectrum_data_calib, "na.action") <- NULL | |
763 } | |
764 | |
765 | |
766 } else { | |
767 # circular | |
768 start <- 1 - maxpeak | |
769 end <- m - maxpeak | |
770 | |
771 ppmScale <- (start:end) * ppmInterval | |
772 | |
773 # check if ppm.value in is the ppmScale interval | |
774 if(ppm.value < min(ppmScale) | ppm.value > max(ppmScale)) { | |
775 warning("ppm.value = ", ppm.value, " is not in the ppm interval [", | |
776 round(min(ppmScale),2), ",", round(max(ppmScale),2), "], and is set to its default ppm.value 0") | |
777 ppm.value = 0 | |
778 } | |
779 | |
780 # if ppm.value != 0, ppmScale is adapted | |
781 ppmScale <- ppmScale + ppm.value | |
782 | |
783 # create the spectral matrix with realigned spectra | |
784 Spectrum_data_calib <- matrix(nrow=n, ncol=end-start+1, | |
785 dimnames=list(rownames(Spectrum_data), ppmScale)) | |
786 | |
787 # fills in Spectrum_data_calib with shifted spectra | |
788 for (i in 1:n) { | |
789 shift <- (maxpeak-TMSPpeaks[i]) | |
790 Spectrum_data_calib[i,(1+shift):m] <- Spectrum_data[i,1:(m-shift)] | |
791 if (shift > 0) { | |
792 Spectrum_data_calib[i,1:shift] <- Spectrum_data[i,(m-shift+1):m] | |
793 } | |
794 } | |
795 } | |
796 | |
797 | |
798 | |
799 | |
800 # Plot of the spectra (depending on rowindex_graph) --------------------------------------------------- | |
801 | |
802 ppm = xstart = value = xend = Legend = NULL # only for R CMD check | |
803 | |
804 | |
805 # with the search zone for TMSP and the location of the peaks just found | |
806 if (!is.null(rowindex_graph)) { | |
807 | |
808 if (range == "window") { | |
809 if (ppm.ir == TRUE) { | |
810 fromto <- fromto.RC | |
811 } else { | |
812 fromto <- list() | |
813 idcol <- as.numeric(colnames(Spectrum_data)) | |
814 for (i in 1:length(fromto.RC)) { | |
815 fromto[[i]] <- as.numeric(colnames(Spectrum_data))[fromto.RC[[i]]] | |
816 } | |
817 } | |
818 } else { | |
819 fromto <- fromto.RC | |
820 } | |
821 | |
822 # TMSPloc in ppm | |
823 TMSPloc <- as.numeric(colnames(Spectrum_data))[TMSPpeaks[rowindex_graph]] | |
824 | |
825 # num plot per window | |
826 num.stacked <- 6 | |
827 | |
828 # rectanglar bands of color for the search zone | |
829 rects <- data.frame(xstart = sapply(fromto, function(x) x[[1]]), | |
830 xend = sapply(fromto, function(x) x[[2]]), | |
831 Legend = "Peak search zone and location") | |
832 | |
833 # vlines for TMSP peak | |
834 addlines <- data.frame(rowname = rownames(Spectrum_data)[rowindex_graph],TMSPloc) | |
835 | |
836 nn <- length(rowindex_graph) | |
837 i <- 1 | |
838 j <- 1 | |
839 plots <- vector(mode = "list", length = ceiling(nn/num.stacked)) | |
840 | |
841 while (i <= nn) { | |
842 | |
843 last <- min(i + num.stacked - 1, nn) | |
844 | |
845 melted <- reshape2::melt(Re(Spectrum_data[i:last, ]), | |
846 varnames = c("rowname", "ppm")) | |
847 | |
848 plots[[j]] <- ggplot2::ggplot() + ggplot2::theme_bw() + | |
849 ggplot2::geom_line(data = melted, | |
850 ggplot2::aes(x = ppm, y = value)) + | |
851 ggplot2::geom_rect(data = rects, ggplot2::aes(xmin = xstart, xmax = xend, | |
852 ymin = -Inf, ymax = Inf, fill = Legend), alpha = 0.4) + | |
853 ggplot2::facet_grid(rowname ~ ., scales = "free_y") + | |
854 ggplot2::theme(legend.position = "none") + | |
855 ggplot2::geom_vline(data = addlines, ggplot2::aes(xintercept = TMSPloc), | |
856 color = "red", show.legend = TRUE) + | |
857 ggplot2::ggtitle("Peak search zone and location") + | |
858 ggplot2::theme(legend.position = "top", legend.text = ggplot2::element_text()) | |
859 | |
860 | |
861 | |
862 if ((melted[1, "ppm"] - melted[(dim(melted)[1]), "ppm"]) > 0) { | |
863 plots[[j]] <- plots[[j]] + ggplot2::scale_x_reverse() | |
864 } | |
865 | |
866 i <- last + 1 | |
867 j <- j + 1 | |
868 } | |
869 | |
870 plots | |
871 } | |
872 | |
873 | |
874 # Return the results ---------------------------------------------- | |
875 Spectrum_data <- endTreatment("InternalReferencing", begin_info, Spectrum_data_calib) | |
876 | |
877 if (is.null(plots)) { | |
878 return(Spectrum_data) | |
879 } else { | |
880 return(list(Spectrum_data = Spectrum_data, plots = plots)) | |
881 } | |
882 | |
883 } | |
884 | |
885 ## ==================================================== | |
886 # ZeroOrderPhaseCorrection | |
887 ## ==================================================== | |
888 | |
889 ZeroOrderPhaseCorrection <- function(Spectrum_data, type.zopc = c("rms", "manual", "max"), | |
890 plot_rms = NULL, returnAngle = FALSE, createWindow = TRUE, | |
891 angle = NULL, plot_spectra = FALSE, | |
892 ppm.zopc = TRUE, exclude.zopc = list(c(5.1,4.5))) { | |
893 | |
894 | |
895 # Data initialisation and checks ---------------------------------------------- | |
896 | |
897 # Entry arguments definition: | |
898 # plot_rms : graph of rms criterion returnAngle : if TRUE, returns avector of | |
899 # optimal angles createWindow : for plot_rms plots angle : If angle is not NULL, | |
900 # spectra are rotated according to the angle vector values | |
901 # plot_spectra : if TRUE, plot rotated spectra | |
902 | |
903 | |
904 | |
905 begin_info <- beginTreatment("ZeroOrderPhaseCorrection", Spectrum_data) | |
906 Spectrum_data <- begin_info[["Signal_data"]] | |
907 n <- nrow(Spectrum_data) | |
908 m <- ncol(Spectrum_data) | |
909 | |
910 rnames <- rownames(Spectrum_data) | |
911 | |
912 # Check input arguments | |
913 type.zopc <- match.arg(type.zopc) | |
914 checkArg(ppm.zopc, c("bool")) | |
915 checkArg(unlist(exclude.zopc), c("num"), can.be.null = TRUE) | |
916 | |
917 | |
918 # type.zopc in c("max", "rms") ----------------------------------------- | |
919 if (type.zopc %in% c("max", "rms")) { | |
920 # angle is found by optimization | |
921 | |
922 # rms function to be optimised | |
923 rms <- function(ang, y, meth = c("max", "rms")) { | |
924 # if (debug_plot) { graphics::abline(v=ang, col='gray60') } | |
925 roty <- y * exp(complex(real = 0, imaginary = ang)) # spectrum rotation | |
926 Rey <- Re(roty) | |
927 | |
928 if (meth == "rms") { | |
929 ReyPos <- Rey[Rey >= 0] # select positive intensities | |
930 POSss <- sum((ReyPos)^2, na.rm = TRUE) # SS for positive intensities | |
931 ss <- sum((Rey)^2, na.rm = TRUE) # SS for all intensities | |
932 return(POSss/ss) # criterion : SS for positive values / SS for all intensities | |
933 } else { | |
934 maxi <- max(Rey, na.rm = TRUE) | |
935 return(maxi) | |
936 } | |
937 } | |
938 | |
939 | |
940 # Define the interval where to search for (by defining Data) | |
941 if (is.null(exclude.zopc)) { | |
942 Data <- Spectrum_data | |
943 } else { | |
944 | |
945 # if ppm.zopc == TRUE, then exclude.zopc is in the colnames values, else, in the column | |
946 # index | |
947 if (ppm.zopc == TRUE) { | |
948 colindex <- as.numeric(colnames(Spectrum_data)) | |
949 } else { | |
950 colindex <- 1:m | |
951 } | |
952 | |
953 # Second check for the argument exclude.zopc | |
954 diff <- diff(unlist(exclude.zopc))[1:length(diff(unlist(exclude.zopc)))%%2 !=0] | |
955 for (i in 1:length(diff)) { | |
956 if (ppm.zopc == TRUE & diff[i] >= 0) { | |
957 stop(paste("Invalid region removal because from <= to in ppm.zopc")) | |
958 } else if (ppm.zopc == FALSE & diff[i] <= 0) {stop(paste("Invalid region removal because from >= to in column index"))} | |
959 } | |
960 | |
961 | |
962 Int <- vector("list", length(exclude.zopc)) | |
963 for (i in 1:length(exclude.zopc)) { | |
964 Int[[i]] <- indexInterval(colindex, from = exclude.zopc[[i]][1], | |
965 to = exclude.zopc[[i]][2], inclusive = TRUE) | |
966 } | |
967 | |
968 vector <- rep(1, m) | |
969 vector[unlist(Int)] <- 0 | |
970 if (n > 1) { | |
971 Data <- sweep(Spectrum_data, MARGIN = 2, FUN = "*", vector) # Cropped_Spectrum | |
972 } else { | |
973 Data <- Spectrum_data * vector | |
974 } # Cropped_Spectrum | |
975 } | |
976 | |
977 | |
978 # angles computation | |
979 Angle <- c() | |
980 for (k in 1:n) | |
981 { | |
982 # The function is rms is periodic (period 2pi) and it seems that there is a phase | |
983 # x such that rms is unimodal (i.e. decreasing then increasing) on the interval | |
984 # [x; x+2pi]. However, if we do the optimization for example on [x-pi; x+pi], | |
985 # instead of being decreasing then increasing, it might be increasing then | |
986 # decreasing in which case optimize, thinking it is a valley will have to choose | |
987 # between the left or the right of this hill and if it chooses wrong, it will end | |
988 # up at like x-pi while the minimum is close to x+pi. | |
989 | |
990 # Supposing that rms is unimodal, the classical 1D unimodal optimization will | |
991 # work in either [-pi;pi] or [0;2pi] (this is not easy to be convinced by that I | |
992 # agree) and we can check which one it is simply by the following trick | |
993 | |
994 f0 <- rms(0, Data[k, ],meth = type.zopc) | |
995 fpi <- rms(pi, Data[k, ], meth = type.zopc) | |
996 if (f0 < fpi) { | |
997 interval <- c(-pi, pi) | |
998 } else { | |
999 interval <- c(0, 2 * pi) | |
1000 } | |
1001 | |
1002 # graphs of rms criteria | |
1003 debug_plot <- F # rms should not plot anything now, only when called by optimize | |
1004 if (!is.null(plot_rms) && rnames[k] %in% plot_rms) { | |
1005 x <- seq(min(interval), max(interval), length.out = 100) | |
1006 y <- rep(1, 100) | |
1007 for (K in (1:100)) { | |
1008 y[K] <- rms(x[K], Data[k, ], meth = type.zopc) | |
1009 } | |
1010 if (createWindow == TRUE) { | |
1011 grDevices::dev.new(noRStudioGD = FALSE) | |
1012 } | |
1013 graphics::plot(x, y, main = paste("Criterion maximization \n", | |
1014 rownames(Data)[k]), ylim = c(0, 1.1), | |
1015 ylab = "positiveness criterion", xlab = "angle ") | |
1016 debug_plot <- T | |
1017 } | |
1018 | |
1019 # Best angle | |
1020 best <- stats::optimize(rms, interval = interval, maximum = TRUE, | |
1021 y = Data[k,], meth = type.zopc) | |
1022 ang <- best[["maximum"]] | |
1023 | |
1024 | |
1025 if (debug_plot) { | |
1026 graphics::abline(v = ang, col = "black") | |
1027 graphics::text(x = (ang+0.1*ang), y = (y[ang]-0.1*y[ang]), labels = round(ang, 3)) | |
1028 } | |
1029 | |
1030 # Spectrum rotation | |
1031 Spectrum_data[k, ] <- Spectrum_data[k, ] * exp(complex(real = 0, imaginary = ang)) | |
1032 Angle <- c(Angle, ang) | |
1033 } | |
1034 | |
1035 | |
1036 | |
1037 | |
1038 } else { | |
1039 # type.zopc is "manual" ------------------------------------------------------- | |
1040 # if Angle is already specified and no optimisation is needed | |
1041 Angle <- angle | |
1042 | |
1043 if (!is.vector(angle)) { | |
1044 stop("angle is not a vector") | |
1045 } | |
1046 | |
1047 if (!is.numeric(angle)) { | |
1048 stop("angle is not a numeric") | |
1049 } | |
1050 | |
1051 if (length(angle) != n) { | |
1052 stop(paste("angle has length", length(angle), "and there are", n, "spectra to rotate.")) | |
1053 } | |
1054 for (k in 1:n) { | |
1055 Spectrum_data[k, ] <- Spectrum_data[k, ] * exp(complex(real = 0, imaginary = - angle[k])) | |
1056 } | |
1057 } | |
1058 | |
1059 | |
1060 # Draw spectra | |
1061 if (plot_spectra == TRUE) { | |
1062 nn <- ceiling(n/4) | |
1063 i <- 1 | |
1064 for (k in 1:nn) { | |
1065 if (createWindow == TRUE) { | |
1066 grDevices::dev.new(noRStudioGD = FALSE) | |
1067 } | |
1068 graphics::par(mfrow = c(4, 2)) | |
1069 while (i <= n) { | |
1070 last <- min(i + 4 - 1, n) | |
1071 graphics::plot(Re(Spectrum_data[i, ]), type = "l", ylab = "intensity", | |
1072 xlab = "Index", main = paste0(rownames(Spectrum_data)[i], " - Real part")) | |
1073 graphics::plot(Im(Spectrum_data[i, ]), type = "l", ylab = "intensity", | |
1074 xlab = "Index", main = paste0(rownames(Spectrum_data)[i], " - Imaginary part")) | |
1075 i <- i + 1 | |
1076 } | |
1077 i <- last + 1 | |
1078 } | |
1079 } | |
1080 | |
1081 | |
1082 # Data finalisation ---------------------------------------------- | |
1083 | |
1084 Spectrum_data <- endTreatment("ZeroOrderPhaseCorrection", begin_info, Spectrum_data) | |
1085 if (returnAngle) { | |
1086 return(list(Spectrum_data = Spectrum_data, Angle = Angle)) | |
1087 } else { | |
1088 return(Spectrum_data) | |
1089 } | |
1090 } | |
1091 | |
1092 | |
1093 ## ==================================================== | |
1094 # Baseline Correction | |
1095 ## ==================================================== | |
1096 BaselineCorrection <- function(Spectrum_data, ptw.bc = TRUE, maxIter = 42, | |
1097 lambda.bc = 1e+07, p.bc = 0.05, eps = 1e-08, | |
1098 ppm.bc = TRUE, exclude.bc = list(c(5.1,4.5)), | |
1099 returnBaseline = F) { | |
1100 | |
1101 # Data initialisation ---------------------------------------------- | |
1102 begin_info <- beginTreatment("BaselineCorrection", Spectrum_data, force.real = T) | |
1103 Spectrum_data <- begin_info[["Signal_data"]] | |
1104 p <- p.bc | |
1105 lambda <- lambda.bc | |
1106 n <- dim(Spectrum_data)[1] | |
1107 m <- dim(Spectrum_data)[2] | |
1108 | |
1109 | |
1110 # Data check | |
1111 checkArg(ptw.bc, c("bool")) | |
1112 checkArg(maxIter, c("int", "pos")) | |
1113 checkArg(lambda, c("num", "pos0")) | |
1114 checkArg(p.bc, c("num", "pos0")) | |
1115 checkArg(eps, c("num", "pos0")) | |
1116 checkArg(returnBaseline, c("bool")) | |
1117 checkArg(ppm.bc, c("bool")) | |
1118 checkArg(unlist(exclude.bc), c("num"), can.be.null = TRUE) | |
1119 | |
1120 # Define the interval where to search for (by defining Data) | |
1121 if (is.null(exclude.bc)) { | |
1122 exclude_index <- NULL | |
1123 } else { | |
1124 # if ppm.bc == TRUE, then exclude.bc is in the colnames values, else, in the column | |
1125 # index | |
1126 if (ppm.bc == TRUE) { | |
1127 colindex <- as.numeric(colnames(Spectrum_data)) | |
1128 } else { | |
1129 colindex <- 1:m | |
1130 } | |
1131 | |
1132 Int <- vector("list", length(exclude.bc)) | |
1133 for (i in 1:length(exclude.bc)) { | |
1134 Int[[i]] <- indexInterval(colindex, from = exclude.bc[[i]][1], | |
1135 to = exclude.bc[[i]][2], inclusive = TRUE) | |
1136 } | |
1137 exclude_index <- unlist(Int) | |
1138 } | |
1139 | |
1140 # Baseline Correction implementation definition ---------------------- | |
1141 | |
1142 # 2 Ways: either use the function asysm from the ptw package or by | |
1143 # built-in functions | |
1144 if (ptw.bc) { | |
1145 asysm <- ptw::asysm | |
1146 } else { | |
1147 difsmw <- function(y, lambda, w, d) { | |
1148 # Weighted smoothing with a finite difference penalty cf Eilers, 2003. | |
1149 # (A perfect smoother) | |
1150 # y: signal to be smoothed | |
1151 # lambda: smoothing parameter | |
1152 # w: weights (use0 zeros for missing values) | |
1153 # d: order of differences in penalty (generally 2) | |
1154 m <- length(y) | |
1155 W <- Matrix::Diagonal(x=w) | |
1156 E <- Matrix::Diagonal(m) | |
1157 D <- Matrix::diff(E, differences = d) | |
1158 C <- Matrix::chol(W + lambda * t(D) %*% D) | |
1159 x <- Matrix::solve(C, Matrix::solve(t(C), w * y)) | |
1160 return(as.numeric(x)) | |
1161 | |
1162 } | |
1163 asysm <- function(y, lambda, p, eps, exclude_index) { | |
1164 # Baseline estimation with asymmetric least squares | |
1165 # y: signal | |
1166 # lambda: smoothing parameter (generally 1e5 to 1e8) | |
1167 # p: asymmetry parameter (generally 0.001) | |
1168 # d: order of differences in penalty (generally 2) | |
1169 # eps: 1e-8 in ptw package | |
1170 m <- length(y) | |
1171 w <- rep(1, m) | |
1172 i <- 1 | |
1173 repeat { | |
1174 z <- difsmw(y, lambda, w, d = 2) | |
1175 w0 <- w | |
1176 p_vect <- rep((1-p), m) # if y <= z + eps | |
1177 p_vect[y > z + eps | y < 0] <- p # if y > z + eps | y < 0 | |
1178 if(!is.null(exclude_index)){ | |
1179 p_vect[exclude_index] <- 0 # if exclude area | |
1180 } | |
1181 | |
1182 w <- p_vect | |
1183 # w <- p * (y > z + eps | y < 0) + (1 - p) * (y <= z + eps) | |
1184 | |
1185 if (sum(abs(w - w0)) == 0) { | |
1186 break | |
1187 } | |
1188 i <- i + 1 | |
1189 if (i > maxIter) { | |
1190 warning("cannot find Baseline estimation in asysm") | |
1191 break | |
1192 } | |
1193 } | |
1194 return(z) | |
1195 } | |
1196 } | |
1197 | |
1198 # Baseline estimation ---------------------------------------------- | |
1199 Baseline <- matrix(NA, nrow = nrow(Spectrum_data), ncol = ncol(Spectrum_data)) | |
1200 | |
1201 # for (k in 1:n) { | |
1202 # Baseline[k, ] <- asysm(y = Spectrum_data[k, ], lambda = lambda, p = p, eps = eps) | |
1203 | |
1204 if (ptw.bc ){ | |
1205 Baseline <- apply(Spectrum_data,1, asysm, lambda = lambda, p = p, | |
1206 eps = eps) | |
1207 }else { | |
1208 Baseline <- apply(Spectrum_data,1, asysm, lambda = lambda, p = p, | |
1209 eps = eps, exclude_index = exclude_index) | |
1210 } | |
1211 | |
1212 | |
1213 Spectrum_data <- Spectrum_data - t(Baseline) | |
1214 # } | |
1215 | |
1216 # Data finalisation ---------------------------------------------- | |
1217 Spectrum_data <- endTreatment("BaselineCorrection", begin_info, Spectrum_data) # FIXME create removeImaginary filter ?? | |
1218 | |
1219 if (returnBaseline) { | |
1220 return(list(Spectrum_data = Spectrum_data, Baseline = Baseline)) | |
1221 } else { | |
1222 return(Spectrum_data) | |
1223 } | |
1224 } | |
1225 | |
1226 | |
1227 | |
1228 ## ==================================================== | |
1229 # NegativeValuesZeroing | |
1230 ## ==================================================== | |
1231 | |
1232 NegativeValuesZeroing <- function(Spectrum_data) { | |
1233 # Data initialisation and checks ---------------------------------------------- | |
1234 begin_info <- beginTreatment("NegativeValuesZeroing", Spectrum_data, force.real = T) | |
1235 Spectrum_data <- begin_info[["Signal_data"]] | |
1236 | |
1237 # NegativeValuesZeroing ---------------------------------------------- | |
1238 Spectrum_data[Spectrum_data < 0] <- 0 | |
1239 | |
1240 # Data finalisation ---------------------------------------------- | |
1241 return(endTreatment("NegativeValuesZeroing", begin_info, Spectrum_data)) | |
1242 } | |
1243 | |
1244 |