comparison nmr_preprocessing/NmrPreprocessing_script.R @ 2:7304ec2c9ab7 draft

Uploaded
author marie-tremblay-metatoul
date Mon, 30 Jul 2018 10:33:03 -0400
parents
children
comparison
equal deleted inserted replaced
1:b55559a2854f 2:7304ec2c9ab7
1 ## ==========================
2 # Internal functions
3 ## ==========================
4
5 # beginTreatment
6 beginTreatment <- function(name, Signal_data = NULL, Signal_info = NULL,
7 force.real = FALSE) {
8
9 cat("Begin", name, "\n")
10
11
12 # Formatting the Signal_data and Signal_info -----------------------
13
14 vec <- is.vector(Signal_data)
15 if (vec) {
16 Signal_data <- vec2mat(Signal_data)
17 }
18 if (is.vector(Signal_info)) {
19 Signal_info <- vec2mat(Signal_info)
20 }
21 if (!is.null(Signal_data)) {
22 if (!is.matrix(Signal_data)) {
23 stop("Signal_data is not a matrix.")
24 }
25 if (!is.complex(Signal_data) && !is.numeric(Signal_data)) {
26 stop("Signal_data contains non-numerical values.")
27 }
28 }
29 if (!is.null(Signal_info) && !is.matrix(Signal_info)) {
30 stop("Signal_info is not a matrix.")
31 }
32
33
34 Original_data <- Signal_data
35
36 # Extract the real part of the spectrum ---------------------------
37
38 if (force.real) {
39 if (is.complex(Signal_data)) {
40 Signal_data <- Re(Signal_data)
41 } else {
42 # The signal is numeric Im(Signal_data) is zero anyway so let's avoid
43 # using complex(real=...,imaginary=0) which would give a complex signal
44 # in endTreatment()
45 force.real <- FALSE
46 }
47 }
48
49
50 # Return the formatted data and metadata entries --------------------
51
52 return(list(start = proc.time(), vec = vec, force.real = force.real,
53 Original_data = Original_data, Signal_data = Signal_data, Signal_info = Signal_info))
54 }
55
56 # endTreatment
57 endTreatment <- function(name, begin_info, Signal_data) {
58
59 # begin_info: object outputted from beginTreatment
60
61
62 # Formatting the entries and printing process time -----------------------
63 end_time <- proc.time() # record it as soon as possible
64 start_time <- begin_info[["start"]]
65 delta_time <- end_time - start_time
66 delta <- delta_time[]
67 cat("End", name, "\n")
68 cat("It lasted", round(delta["user.self"], 3), "s user time,", round(delta["sys.self"],3),
69 "s system time and", round(delta["elapsed"], 3), "s elapsed time.\n")
70
71
72 if (begin_info[["force.real"]]) {
73 # The imaginary part is left untouched
74 i <- complex(real = 0, imaginary = 1)
75 Signal_data <- Signal_data + i * Im(begin_info[["Original_data"]])
76 }
77
78 if (begin_info[["vec"]]) {
79 Signal_data <- Signal_data[1, ]
80 }
81
82 # Return the formatted data and metadata entries --------------------
83 return(Signal_data)
84 }
85
86 # checkArg
87 checkArg <- function(arg, checks, can.be.null=FALSE) {
88 check.list <- list(bool=c(is.logical, "a boolean"),
89 int =c(function(x){x%%1==0}, "an integer"),
90 num =c(is.numeric, "a numeric"),
91 str =c(is.character, "a string"),
92 pos =c(function(x){x>0}, "positive"),
93 pos0=c(function(x){x>=0}, "positive or zero"),
94 l1 =c(function(x){length(x)==1}, "of length 1")
95 )
96 if (is.null(arg)) {
97 if (!can.be.null) {
98 stop(deparse(substitute(arg)), " is null.")
99 }
100 } else {
101 if (is.matrix(arg)) {
102 stop(deparse(substitute(arg)), " is not scalar.")
103 }
104 for (c in checks) {
105 if (!check.list[[c]][[1]](arg)) {
106 stop(deparse(substitute(arg)), " is not ", check.list[[c]][[2]], ".")
107 }
108 }
109 }
110 }
111
112 # getArg
113 getArg <- function(arg, info, argname, can.be.absent=FALSE) {
114 if (is.null(arg)) {
115 start <- paste("impossible to get argument", argname, "it was not given directly and");
116 if (!is.matrix(info)) {
117 stop(paste(start, "the info matrix was not given"))
118 }
119 if (!(argname %in% colnames(info))) {
120 if (can.be.absent) {
121 return(NULL)
122 } else {
123 stop(paste(start, "is not in the info matrix"))
124 }
125 }
126 if (nrow(info) < 1) {
127 stop(paste(start, "the info matrix has no row"))
128 }
129 arg <- info[1,argname]
130 if (is.na(arg)) {
131 stop(paste(start, "it is NA in the info matrix"))
132 }
133 }
134 return(arg)
135 }
136
137 # binarySearch
138 binarySearch <- function(a, target, lower = TRUE) {
139 # search the index i in a such that a[i] == target
140 # if it doesn't exists and lower, it searches the closer a[i] such that a[i] < target
141 # if !lower, it seraches the closer a[i] such that a[i] > target
142 # a should be monotone but can be increasing or decreasing
143
144 # if a is increasing INVARIANT: a[amin] < target < a[amax]
145 N <- length(a)
146 if ((a[N] - target) * (a[N] - a[1]) <= 0) {
147 return(N)
148 }
149 if ((a[1] - target) * (a[N] - a[1]) >= 0) {
150 return(1)
151 }
152 amin <- 1
153 amax <- N
154 while (amin + 1 < amax) {
155 amid <- floor((amin + amax)/2)
156 if ((a[amid] - target) * (a[amax] - a[amid]) < 0) {
157 amin <- amid
158 } else if ((a[amid] - target) * (a[amax] - a[amid]) > 0) {
159 amax <- amid
160 } else {
161 # a[amid] == a[amax] or a[amid] == target In both cases, a[amid] ==
162 # target
163 return(amid)
164 }
165 }
166 if (xor(lower, a[amin] > a[amax])) {
167 # (lower && a[amin] < a[amax]) || (!lower && a[min] > a[max])
168 # If increasing and we want the lower, we take amin
169 # If decreasing and we want the bigger, we take amin too
170 return(amin)
171 } else {
172 return(amax)
173 }
174 }
175
176 # Interpol
177 Interpol <- function(t, y) {
178 # y: sample
179 # t : warping function
180
181 m <- length(y)
182 # t <= m-1
183 # because if t > m-1, y[ti+1] will be NA when we compute g
184 valid <- 1 <= t & t <= m-1 # FIXME it was '<' in Bubble v2
185 s <- (1:m)[valid]
186 ti <- floor(t[s])
187 tr <- t[s] - ti
188 g <- y[ti + 1] - y[ti]
189 f <- y[ti] + tr * g
190 list(f=f, s=s, g=g)
191 }
192
193 # vec2mat
194 vec2mat <- function(vec) {
195 return(matrix(vec, nrow = 1, dimnames = list(c(1), names(vec))))
196
197 }
198
199 # binarySearch
200 binarySearch <- function(a, target, lower = TRUE) {
201 # search the index i in a such that a[i] == target
202 # if it doesn't exists and lower, it searches the closer a[i] such that a[i] < target
203 # if !lower, it seraches the closer a[i] such that a[i] > target
204 # a should be monotone but can be increasing or decreasing
205
206 # if a is increasing INVARIANT: a[amin] < target < a[amax]
207 N <- length(a)
208 if ((a[N] - target) * (a[N] - a[1]) <= 0) {
209 return(N)
210 }
211 if ((a[1] - target) * (a[N] - a[1]) >= 0) {
212 return(1)
213 }
214 amin <- 1
215 amax <- N
216 while (amin + 1 < amax) {
217 amid <- floor((amin + amax)/2)
218 if ((a[amid] - target) * (a[amax] - a[amid]) < 0) {
219 amin <- amid
220 } else if ((a[amid] - target) * (a[amax] - a[amid]) > 0) {
221 amax <- amid
222 } else {
223 # a[amid] == a[amax] or a[amid] == target In both cases, a[amid] ==
224 # target
225 return(amid)
226 }
227 }
228 if (xor(lower, a[amin] > a[amax])) {
229 # (lower && a[amin] < a[amax]) || (!lower && a[min] > a[max])
230 # If increasing and we want the lower, we take amin
231 # If decreasing and we want the bigger, we take amin too
232 return(amin)
233 } else {
234 return(amax)
235 }
236 }
237
238
239 # indexInterval
240 indexInterval <- function (a, from, to, inclusive=TRUE) {
241 # If inclusive and from <= to, we need to take the lower
242 # If not inclusive and from > to, we need to take the lower too
243 lowerFrom <- xor(inclusive, from > to)
244 fromIndex <- binarySearch(a, from, lowerFrom)
245 toIndex <- binarySearch(a, to, !lowerFrom)
246 return(fromIndex:toIndex)
247 }
248
249
250
251 ## ==========================
252 # GroupDelayCorrection
253 ## ==========================
254 GroupDelayCorrection <- function(Fid_data, Fid_info = NULL, group_delay = NULL) {
255
256
257 # Data initialisation and checks ----------------------------------------------
258
259 begin_info <- beginTreatment("GroupDelayCorrection", Fid_data, Fid_info)
260 Fid_data <- begin_info[["Signal_data"]]
261 dimension_names <- dimnames(Fid_data)
262 Fid_info <- begin_info[["Signal_info"]]
263 checkArg(group_delay, c("num", "pos0"), can.be.null = TRUE)
264 # if Fid_info and group_delay are NULL, getArg will generate an error
265
266 group_delay <- getArg(group_delay, Fid_info, "GRPDLY", can.be.absent = TRUE)
267
268 if (is.null(group_delay)) {
269
270 # See DetermineBrukerDigitalFilter.m in matNMR MATLAB library
271 group_delay_matrix <- matrix(c(44.75, 46, 46.311, 33.5, 36.5, 36.53, 66.625,
272 48, 47.87, 59.0833, 50.1667, 50.229, 68.5625, 53.25, 53.289, 60.375,
273 69.5, 69.551, 69.5313, 72.25, 71.6, 61.0208, 70.1667, 70.184, 70.0156,
274 72.75, 72.138, 61.3438, 70.5, 70.528, 70.2578, 73, 72.348, 61.5052, 70.6667,
275 70.7, 70.3789, 72.5, 72.524, 61.5859, 71.3333, NA, 70.4395, 72.25, NA,
276 61.6263, 71.6667, NA, 70.4697, 72.125, NA, 61.6465, 71.8333, NA, 70.4849,
277 72.0625, NA, 61.6566, 71.9167, NA, 70.4924, 72.0313, NA), nrow = 21,
278 ncol = 3, byrow = TRUE, dimnames = list(c(2, 3, 4, 6, 8, 12, 16, 24,
279 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536, 2048),
280 c(10, 11, 12)))
281 decim <- Fid_info[1, "DECIM"]
282 dspfvs <- Fid_info[1, "DSPFVS"]
283 if (!(toString(decim) %in% rownames(group_delay_matrix))) {
284 stop(paste("Invalid DECIM", decim, "it should be one of", rownames(group_delay_matrix)))
285 }
286 if (!(toString(dspfvs) %in% colnames(group_delay_matrix))) {
287 stop(paste("Invalid DSPFVS", dspfvs, "it should be one of", colnames(group_delay_matrix)))
288 }
289 group_delay <- group_delay_matrix[toString(decim), toString(dspfvs)]
290 if (is.na(group_delay)) {
291 stop(paste("Invalid DECIM", decim, "for DSPFVS", dspfvs))
292 }
293 }
294 m <- ncol(Fid_data)
295 n <- nrow(Fid_data)
296
297 # GroupDelayCorrection ----------------------------------------------
298
299 # We do the shifting in the Fourier domain because the shift can be non-integer.
300 # That way we automatically have the circular behaviour of the shift and the
301 # interpolation if it is non-integer.
302
303 Spectrum <- t(stats::mvfft(t(Fid_data)))
304
305 # Spectrum <- FourierTransform(Fid_data, Fid_info)
306 p <- ceiling(m/2)
307 new_index <- c((p + 1):m, 1:p)
308 Spectrum <- Spectrum[,new_index]
309 Spectrum <- matrix(data = Spectrum, ncol = m, nrow = n)
310
311 Omega <- (0:(m - 1))/m
312 i <- complex(real = 0, imaginary = 1)
313
314 if (n>1) {
315 Spectrum <- sweep(Spectrum, MARGIN = 2, exp(i * group_delay * 2 * pi * Omega), `*`)
316 Spectrum <- Spectrum[,new_index]
317 }else {
318 Spectrum <- Spectrum* exp(i * group_delay * 2 * pi * Omega)
319 Spectrum <- Spectrum[new_index]
320 Spectrum <- matrix(data = Spectrum, ncol = m, nrow = n)
321 }
322
323
324 Fid_data <- t(stats::mvfft(t(Spectrum), inverse = TRUE))/m
325 colnames(Fid_data) <- dimension_names[[2]]
326 rownames(Fid_data) <- dimension_names[[1]]
327
328 # Data finalisation ----------------------------------------------
329
330 return(endTreatment("GroupDelayCorrection", begin_info, Fid_data))
331 }
332
333 ## ==========================
334 # SolventSuppression
335 ## ==========================
336 SolventSuppression <- function(Fid_data, lambda.ss = 1e+06, ptw.ss = TRUE,
337 plotSolvent = F, returnSolvent = F) {
338
339 # Data initialisation and checks ----------------------------------------------
340
341 begin_info <- beginTreatment("SolventSuppression", Fid_data)
342 Fid_data <- begin_info[["Signal_data"]]
343 checkArg(ptw.ss, c("bool"))
344 checkArg(lambda.ss, c("num", "pos0"))
345
346
347 # difsm function definition for the smoother -----------------------------------
348
349 if (ptw.ss) {
350 # Use of the function in ptw that smoothes signals with a finite difference
351 # penalty of order 2
352 difsm <- ptw::difsm
353 } else {
354 # Or manual implementation based on sparse matrices for large data series (cf.
355 # Eilers, 2003. 'A perfect smoother')
356 difsm <- function(y, d = 2, lambda) {
357
358 m <- length(y)
359 # Sparse identity matrix m x m
360 E <- Matrix::Diagonal(m)
361 D <- Matrix::diff(E, differences = d)
362 A <- E + lambda.ss * Matrix::t(D) %*% D
363 # base::chol does not take into account that A is sparse and is extremely slow
364 C <- Matrix::chol(A)
365 x <- Matrix::solve(C, Matrix::solve(Matrix::t(C), y))
366 return(as.numeric(x))
367 }
368 }
369
370 # Solvent Suppression ----------------------------------------------
371
372 n <- dim(Fid_data)[1]
373 if (returnSolvent) {
374 SolventRe <- Fid_data
375 SolventIm <- Fid_data
376 }
377 for (i in 1:n) {
378 FidRe <- Re(Fid_data[i, ])
379 FidIm <- Im(Fid_data[i, ])
380 solventRe <- difsm(y = FidRe, lambda = lambda.ss)
381 solventIm <- difsm(y = FidIm, lambda = lambda.ss)
382
383 if (plotSolvent) {
384 m <- length(FidRe)
385 graphics::plot(1:m, FidRe, type = "l", col = "red")
386 graphics::lines(1:m, solventRe, type = "l", col = "blue")
387 graphics::plot(1:m, FidIm, type = "l", col = "red")
388 graphics::lines(1:m, solventIm, type = "l", col = "blue")
389 }
390 FidRe <- FidRe - solventRe
391 FidIm <- FidIm - solventIm
392 Fid_data[i, ] <- complex(real = FidRe, imaginary = FidIm)
393 if (returnSolvent) {
394 SolventRe[i, ] <- solventRe
395 SolventIm[i, ] <- solventIm
396 }
397 }
398
399
400 # Data finalisation ----------------------------------------------
401
402 Fid_data <- endTreatment("SolventSuppression", begin_info, Fid_data)
403 if (returnSolvent) {
404 return(list(Fid_data = Fid_data, SolventRe = SolventRe, SolventIm = SolventIm))
405 } else {
406 return(Fid_data)
407 }
408 }
409
410
411 ## ==========================
412 # Apodization
413 # =============================
414 Apodization <- function(Fid_data, Fid_info = NULL, DT = NULL,
415 type.apod = c("exp","cos2", "blockexp", "blockcos2",
416 "gauss", "hanning", "hamming"), phase = 0, rectRatio = 1/2,
417 gaussLB = 1, expLB = 1, plotWindow = F, returnFactor = F) {
418
419 # Data initialisation and checks ----------------------------------------------
420 begin_info <- beginTreatment("Apodization", Fid_data, Fid_info)
421 Fid_data <- begin_info[["Signal_data"]]
422 Fid_info <- begin_info[["Signal_info"]]
423 # Data check
424 type.apod <- match.arg(type.apod)
425 checkArg(DT, c("num", "pos"), can.be.null = TRUE)
426 checkArg(phase, c("num"))
427
428 # Apodization ----------------------------------------------
429 DT <- getArg(DT, Fid_info, "DT") # Dwell Time
430 m <- ncol(Fid_data)
431 t <- (1:m) * DT # Time
432 rectSize <- ceiling(rectRatio * m)
433 gaussLB <- (gaussLB/(sqrt(8 * log(2))))
434 # Define the types of apodization:
435 switch(type.apod, exp = {
436 # exponential
437 Factor <- exp(-expLB * t)
438 }, cos2 = {
439 # cos^2
440 c <- cos((1:m) * pi/(2 * m) - phase * pi/2)
441 Factor <- c * c
442 }, blockexp = {
443 # block and exponential
444 Factor <- c(rep.int(1, rectSize), rep.int(0, m - rectSize))
445 # | rectSize | 1 ___________ | \ 0 \____
446 Factor[(rectSize + 1):m] <- exp(-expLB * t[1:(m - rectSize)])
447 }, blockcos2 = {
448 # block and cos^2
449 Factor <- c(rep.int(1, rectSize), rep.int(0, m - rectSize))
450 c <- cos((1:(m - rectSize)) * pi/(2 * (m - rectSize)))
451 Factor[(rectSize + 1):m] <- c * c
452 }, gauss = {
453 # gaussian
454 Factor <- exp(-(gaussLB * t)^2/2)
455 Factor <- Factor/max(Factor)
456 }, hanning = {
457 # Hanning
458 Factor <- 0.5 + 0.5 * cos((1:m) * pi/m - phase * pi)
459 }, hamming = {
460 # Hamming
461 Factor <- 0.54 + 0.46 * cos((1:m) * pi/m - phase * pi)
462 })
463 if (plotWindow) {
464 graphics::plot(1:m, Factor, "l")
465 # dev.off() # device independent, it is the responsability of the
466 # caller to do it
467 }
468 # Apply the apodization factor on the spectra
469 Fid_data <- sweep(Fid_data, MARGIN = 2, Factor, `*`)
470
471 # Data finalisation ----------------------------------------------
472 Fid_data <- endTreatment("Apodization", begin_info, Fid_data)
473 if (returnFactor) {
474 return(list(Fid_data = Fid_data, Factor = Factor))
475 } else {
476 return(Fid_data)
477 }
478 }
479
480
481 ## ====================================================
482 # FourierTransform
483 ## ====================================================
484
485
486 # fftshift1D2D
487 fftshift1D2D <- function(x) {
488 vec <- F
489 if (is.vector(x)) {
490 x <- vec2mat(x)
491 vec <- T
492 }
493 m <- dim(x)[2]
494 p <- ceiling(m/2)
495 new_index <- c((p + 1):m, 1:p)
496 y <- x[, new_index, drop = vec]
497 }
498
499 # FourierTransform
500 FourierTransform <- function(Fid_data, Fid_info = NULL, SW_h = NULL, SW = NULL, O1 = NULL, reverse.axis = TRUE) {
501
502 # Data initialisation and checks ----------------------------------------------
503 begin_info <- beginTreatment("FourierTransform", Fid_data, Fid_info)
504 Fid_data <- begin_info[["Signal_data"]]
505 Fid_info <- begin_info[["Signal_info"]]
506
507 m <- ncol(Fid_data)
508 n <- nrow(Fid_data)
509
510 if (is.null(SW_h)) {
511 SW_h <- getArg(SW_h, Fid_info, "SW_h")
512 }
513
514 if (is.null(SW)) {
515 SW <- getArg(SW, Fid_info, "SW") # Sweep Width in ppm (semi frequency scale in ppm)
516 }
517
518
519 if (is.null(O1)) {
520 O1 <- getArg(O1, Fid_info, "O1")
521 }
522
523
524 checkArg(reverse.axis, c("bool"))
525
526 # Fourier Transformation ----------------------------------------------
527 # mvfft does the unnormalized fourier transform (see ?mvfft), so we need divide
528 # by m. It does not matter a lot in our case since the spectrum will be
529 # normalized.
530
531 # FT
532 RawSpect_data <- fftshift1D2D(t(stats::mvfft(t(Fid_data))))
533 # recover the frequencies values
534 f <- ((0:(m - 1)) - floor(m/2)) * Fid_info[1, "SW_h"]/(m-1)
535
536 if(reverse.axis == TRUE) {
537 revind <- rev(1:m)
538 RawSpect_data <- RawSpect_data[,revind] # reverse the spectrum
539 }
540
541 RawSpect_data <- matrix(RawSpect_data, nrow = n, ncol = m)
542 colnames(RawSpect_data) <- f
543 rownames(RawSpect_data) <- rownames(Fid_data)
544
545 # PPM conversion ----------------------------------------------
546
547 # The Sweep Width has to be the same since the column names are the same
548
549 ppmInterval <- SW/(m-1)
550
551 O1index = round((m+1)/2+O1*(m - 1) / SW_h)
552
553 end <- O1index - m
554 start <- O1index -1
555 ppmScale <- (start:end) * ppmInterval
556 RawSpect_data <- matrix(RawSpect_data, nrow = n, ncol = -(end - start) + 1, dimnames =
557 list(rownames(RawSpect_data), ppmScale))
558
559
560 # Data finalisation ----------------------------------------------
561 return(endTreatment("FourierTransform", begin_info, RawSpect_data))
562 }
563
564 ## ====================================================
565 # InternalReferencing
566 ## ====================================================
567
568 InternalReferencing <- function(Spectrum_data, Fid_info, method = c("max", "thres"),
569 range = c("nearvalue", "all", "window"), ppm.value = 0,
570 direction = "left", shiftHandling = c("zerofilling", "cut",
571 "NAfilling", "circular"), c = 2, pc = 0.02, fromto.RC = NULL,
572 ppm.ir = TRUE, rowindex_graph = NULL) {
573
574
575
576 # Data initialisation and checks ----------------------------------------------
577
578 begin_info <- beginTreatment("InternalReferencing", Spectrum_data, Fid_info)
579 Spectrum_data <- begin_info[["Signal_data"]]
580 Fid_info <- begin_info[["Signal_info"]]
581
582
583 ######## Check input arguments
584
585 range <- match.arg(range)
586 shiftHandling <- match.arg(shiftHandling)
587 method <- match.arg(method)
588 plots <- NULL
589
590 checkArg(ppm.ir, c("bool"))
591 checkArg(unlist(fromto.RC), c("num"), can.be.null = TRUE)
592 checkArg(pc, c("num"))
593 checkArg(ppm.value, c("num"))
594 checkArg(rowindex_graph, "num", can.be.null = TRUE)
595
596 # fromto.RC : if range == "window",
597 # fromto.RC defines the spectral window where to search for the peak
598 if (!is.null(fromto.RC)) {
599 diff <- diff(unlist(fromto.RC))[1:length(diff(unlist(fromto.RC)))%%2 !=0]
600 for (i in 1:length(diff)) {
601 if (diff[i] >= 0) {
602 fromto <- c(fromto.RC[[i]][2], fromto.RC[[i]][1])
603 fromto.RC[[i]] <- fromto
604 }
605 }
606 }
607
608
609 # findTMSPpeak function ----------------------------------------------
610 # If method == "tresh", findTMSPpeak will find the position of the first
611 # peak (from left or right) which is higher than a predefined threshold
612 # and is computed as: c*(cumulated_mean/cumulated_sd)
613 findTMSPpeak <- function(ft, c = 2, direction = "left") {
614 ft <- Re(ft) # extraction de la partie réelle
615 N <- length(ft)
616 if (direction == "left") {
617 newindex <- rev(1:N)
618 ft <- rev(ft)
619 }
620 thres <- 99999
621 i <- 1000 # Start at point 1000 to find the peak
622 vect <- ft[1:i]
623
624 while (vect[i] <= (c * thres)) {
625 cumsd <- stats::sd(vect)
626 cummean <- mean(vect)
627 thres <- cummean + 3 * cumsd
628 i <- i + 1
629 vect <- ft[1:i]
630 }
631 if (direction == "left") {
632 v <- newindex[i]
633 } else {v <- i}
634
635 if (is.na(v)) {
636 warning("No peak found, need to lower the threshold.")
637 return(NA)
638 } else {
639 # recherche dans les 1% de points suivants du max trouve pour etre au sommet du
640 # pic
641 d <- which.max(ft[v:(v + N * 0.01)])
642 new.peak <- v + d - 1 # nouveau pic du TMSP si d > 0
643
644 if (names(which.max(ft[v:(v + N * 0.01)])) != names(which.max(ft[v:(v + N * 0.03)]))) {
645 # recherche dans les 3% de points suivants du max trouve pour eviter un faux
646 # positif
647 warning("the TMSP peak might be located further away, increase the threshold to check.")
648 }
649 return(new.peak)
650 }
651 }
652
653
654 # Define the search zone ----------------------------------------
655
656 n <- nrow(Spectrum_data)
657 m <- ncol(Spectrum_data)
658
659 # The Sweep Width (SW) has to be the same since the column names are the same
660 SW <- Fid_info[1, "SW"] # Sweep Width in ppm
661 ppmInterval <- SW/(m-1) # size of a ppm interval
662
663 # range: How the search zone is defined ("all", "nearvalue" or "window")
664 if (range == "all") {
665
666 Data <- Spectrum_data
667
668 } else { # range = "nearvalue" or "window"
669 # Need to define colindex (column indexes) to apply indexInterval on it
670
671 if (range == "nearvalue") {
672
673 fromto.RC <- list(c(-(SW * pc)/2 + ppm.value, (SW * pc)/2 + ppm.value)) # automatic fromto values in ppm
674 colindex <- as.numeric(colnames(Spectrum_data))
675
676 } else {
677 # range == "window"
678 # fromto.RC is already user-defined
679 if (ppm.ir == TRUE) {
680 colindex <- as.numeric(colnames(Spectrum_data))
681 } else {
682 colindex <- 1:m
683 }
684 }
685
686 # index intervals taking into account the different elements in the list fromto.RC
687 Int <- vector("list", length(fromto.RC))
688 for (i in 1:length(fromto.RC)) {
689 Int[[i]] <- indexInterval(colindex, from = fromto.RC[[i]][1],
690 to = fromto.RC[[i]][2], inclusive = TRUE)
691 }
692
693 # define Data as the cropped spectrum including the index intervals
694 # outside the research zone, the intensities are set to the minimal
695 # intensity of the research zone
696
697 if (n > 1){
698 Data <- apply(Re(Spectrum_data[,unlist(Int)]),1, function(x) rep(min(x), m))
699 Data <- t(Data)
700 Data[,unlist(Int)] <- Re(Spectrum_data[,unlist(Int)])
701 } else {
702 Data <- rep(min(Re(Spectrum_data)) ,m)
703 Data[unlist(Int)] <- Re(Spectrum_data[unlist(Int)])
704 }
705
706 }
707
708
709 # Apply the peak location search method ('thres' or 'max') on spectra
710 # -----------------------------------------------------------------------
711
712 if (method == "thres") {
713 TMSPpeaks <- apply(Data, 1, findTMSPpeak, c = c, direction = direction)
714 } else { # method == "max
715 TMSPpeaks <- apply(Re(Data), 1, which.max)
716 }
717
718
719 # Shift spectra according to the TMSPpeaks found --------------------------------
720 # Depends on the shiftHandling
721
722 # TMSPpeaks is a column index
723 maxpeak <- max(TMSPpeaks) # max accross spectra
724 minpeak <- min(TMSPpeaks) # min accross spectra
725
726
727 if (shiftHandling %in% c("zerofilling", "NAfilling", "cut")) {
728 fill <- NA
729 if (shiftHandling == "zerofilling") {
730 fill <- 0
731 }
732
733 start <- maxpeak - 1
734 end <- minpeak - m
735
736 # ppm values of each interval for the whole spectral range of the spectral matrix
737 ppmScale <- (start:end) * ppmInterval
738
739 # check if ppm.value is in the ppmScale interval
740 if(ppm.value < min(ppmScale) | ppm.value > max(ppmScale)) {
741 warning("ppm.value = ", ppm.value, " is not in the ppm interval [",
742 round(min(ppmScale),2), ",", round(max(ppmScale),2), "], and is set to its default ppm.value 0")
743 ppm.value = 0
744 }
745
746 # if ppm.value != 0, ppmScale is adapted
747 ppmScale <- ppmScale + ppm.value
748
749 # create the spectral matrix with realigned spectra
750 Spectrum_data_calib <- matrix(fill, nrow = n, ncol = -(end - start) + 1,
751 dimnames = list(rownames(Spectrum_data), ppmScale))
752
753 # fills in Spectrum_data_calib with shifted spectra
754 for (i in 1:n) {
755 shift <- (1 - TMSPpeaks[i]) + start
756 Spectrum_data_calib[i, (1 + shift):(m + shift)] <- Spectrum_data[i, ]
757 }
758
759 if (shiftHandling == "cut") {
760 Spectrum_data_calib = as.matrix(stats::na.omit(t(Spectrum_data_calib)))
761 Spectrum_data_calib = t(Spectrum_data_calib)
762 base::attr(Spectrum_data_calib, "na.action") <- NULL
763 }
764
765
766 } else {
767 # circular
768 start <- 1 - maxpeak
769 end <- m - maxpeak
770
771 ppmScale <- (start:end) * ppmInterval
772
773 # check if ppm.value in is the ppmScale interval
774 if(ppm.value < min(ppmScale) | ppm.value > max(ppmScale)) {
775 warning("ppm.value = ", ppm.value, " is not in the ppm interval [",
776 round(min(ppmScale),2), ",", round(max(ppmScale),2), "], and is set to its default ppm.value 0")
777 ppm.value = 0
778 }
779
780 # if ppm.value != 0, ppmScale is adapted
781 ppmScale <- ppmScale + ppm.value
782
783 # create the spectral matrix with realigned spectra
784 Spectrum_data_calib <- matrix(nrow=n, ncol=end-start+1,
785 dimnames=list(rownames(Spectrum_data), ppmScale))
786
787 # fills in Spectrum_data_calib with shifted spectra
788 for (i in 1:n) {
789 shift <- (maxpeak-TMSPpeaks[i])
790 Spectrum_data_calib[i,(1+shift):m] <- Spectrum_data[i,1:(m-shift)]
791 if (shift > 0) {
792 Spectrum_data_calib[i,1:shift] <- Spectrum_data[i,(m-shift+1):m]
793 }
794 }
795 }
796
797
798
799
800 # Plot of the spectra (depending on rowindex_graph) ---------------------------------------------------
801
802 ppm = xstart = value = xend = Legend = NULL # only for R CMD check
803
804
805 # with the search zone for TMSP and the location of the peaks just found
806 if (!is.null(rowindex_graph)) {
807
808 if (range == "window") {
809 if (ppm.ir == TRUE) {
810 fromto <- fromto.RC
811 } else {
812 fromto <- list()
813 idcol <- as.numeric(colnames(Spectrum_data))
814 for (i in 1:length(fromto.RC)) {
815 fromto[[i]] <- as.numeric(colnames(Spectrum_data))[fromto.RC[[i]]]
816 }
817 }
818 } else {
819 fromto <- fromto.RC
820 }
821
822 # TMSPloc in ppm
823 TMSPloc <- as.numeric(colnames(Spectrum_data))[TMSPpeaks[rowindex_graph]]
824
825 # num plot per window
826 num.stacked <- 6
827
828 # rectanglar bands of color for the search zone
829 rects <- data.frame(xstart = sapply(fromto, function(x) x[[1]]),
830 xend = sapply(fromto, function(x) x[[2]]),
831 Legend = "Peak search zone and location")
832
833 # vlines for TMSP peak
834 addlines <- data.frame(rowname = rownames(Spectrum_data)[rowindex_graph],TMSPloc)
835
836 nn <- length(rowindex_graph)
837 i <- 1
838 j <- 1
839 plots <- vector(mode = "list", length = ceiling(nn/num.stacked))
840
841 while (i <= nn) {
842
843 last <- min(i + num.stacked - 1, nn)
844
845 melted <- reshape2::melt(Re(Spectrum_data[i:last, ]),
846 varnames = c("rowname", "ppm"))
847
848 plots[[j]] <- ggplot2::ggplot() + ggplot2::theme_bw() +
849 ggplot2::geom_line(data = melted,
850 ggplot2::aes(x = ppm, y = value)) +
851 ggplot2::geom_rect(data = rects, ggplot2::aes(xmin = xstart, xmax = xend,
852 ymin = -Inf, ymax = Inf, fill = Legend), alpha = 0.4) +
853 ggplot2::facet_grid(rowname ~ ., scales = "free_y") +
854 ggplot2::theme(legend.position = "none") +
855 ggplot2::geom_vline(data = addlines, ggplot2::aes(xintercept = TMSPloc),
856 color = "red", show.legend = TRUE) +
857 ggplot2::ggtitle("Peak search zone and location") +
858 ggplot2::theme(legend.position = "top", legend.text = ggplot2::element_text())
859
860
861
862 if ((melted[1, "ppm"] - melted[(dim(melted)[1]), "ppm"]) > 0) {
863 plots[[j]] <- plots[[j]] + ggplot2::scale_x_reverse()
864 }
865
866 i <- last + 1
867 j <- j + 1
868 }
869
870 plots
871 }
872
873
874 # Return the results ----------------------------------------------
875 Spectrum_data <- endTreatment("InternalReferencing", begin_info, Spectrum_data_calib)
876
877 if (is.null(plots)) {
878 return(Spectrum_data)
879 } else {
880 return(list(Spectrum_data = Spectrum_data, plots = plots))
881 }
882
883 }
884
885 ## ====================================================
886 # ZeroOrderPhaseCorrection
887 ## ====================================================
888
889 ZeroOrderPhaseCorrection <- function(Spectrum_data, type.zopc = c("rms", "manual", "max"),
890 plot_rms = NULL, returnAngle = FALSE, createWindow = TRUE,
891 angle = NULL, plot_spectra = FALSE,
892 ppm.zopc = TRUE, exclude.zopc = list(c(5.1,4.5))) {
893
894
895 # Data initialisation and checks ----------------------------------------------
896
897 # Entry arguments definition:
898 # plot_rms : graph of rms criterion returnAngle : if TRUE, returns avector of
899 # optimal angles createWindow : for plot_rms plots angle : If angle is not NULL,
900 # spectra are rotated according to the angle vector values
901 # plot_spectra : if TRUE, plot rotated spectra
902
903
904
905 begin_info <- beginTreatment("ZeroOrderPhaseCorrection", Spectrum_data)
906 Spectrum_data <- begin_info[["Signal_data"]]
907 n <- nrow(Spectrum_data)
908 m <- ncol(Spectrum_data)
909
910 rnames <- rownames(Spectrum_data)
911
912 # Check input arguments
913 type.zopc <- match.arg(type.zopc)
914 checkArg(ppm.zopc, c("bool"))
915 checkArg(unlist(exclude.zopc), c("num"), can.be.null = TRUE)
916
917
918 # type.zopc in c("max", "rms") -----------------------------------------
919 if (type.zopc %in% c("max", "rms")) {
920 # angle is found by optimization
921
922 # rms function to be optimised
923 rms <- function(ang, y, meth = c("max", "rms")) {
924 # if (debug_plot) { graphics::abline(v=ang, col='gray60') }
925 roty <- y * exp(complex(real = 0, imaginary = ang)) # spectrum rotation
926 Rey <- Re(roty)
927
928 if (meth == "rms") {
929 ReyPos <- Rey[Rey >= 0] # select positive intensities
930 POSss <- sum((ReyPos)^2, na.rm = TRUE) # SS for positive intensities
931 ss <- sum((Rey)^2, na.rm = TRUE) # SS for all intensities
932 return(POSss/ss) # criterion : SS for positive values / SS for all intensities
933 } else {
934 maxi <- max(Rey, na.rm = TRUE)
935 return(maxi)
936 }
937 }
938
939
940 # Define the interval where to search for (by defining Data)
941 if (is.null(exclude.zopc)) {
942 Data <- Spectrum_data
943 } else {
944
945 # if ppm.zopc == TRUE, then exclude.zopc is in the colnames values, else, in the column
946 # index
947 if (ppm.zopc == TRUE) {
948 colindex <- as.numeric(colnames(Spectrum_data))
949 } else {
950 colindex <- 1:m
951 }
952
953 # Second check for the argument exclude.zopc
954 diff <- diff(unlist(exclude.zopc))[1:length(diff(unlist(exclude.zopc)))%%2 !=0]
955 for (i in 1:length(diff)) {
956 if (ppm.zopc == TRUE & diff[i] >= 0) {
957 stop(paste("Invalid region removal because from <= to in ppm.zopc"))
958 } else if (ppm.zopc == FALSE & diff[i] <= 0) {stop(paste("Invalid region removal because from >= to in column index"))}
959 }
960
961
962 Int <- vector("list", length(exclude.zopc))
963 for (i in 1:length(exclude.zopc)) {
964 Int[[i]] <- indexInterval(colindex, from = exclude.zopc[[i]][1],
965 to = exclude.zopc[[i]][2], inclusive = TRUE)
966 }
967
968 vector <- rep(1, m)
969 vector[unlist(Int)] <- 0
970 if (n > 1) {
971 Data <- sweep(Spectrum_data, MARGIN = 2, FUN = "*", vector) # Cropped_Spectrum
972 } else {
973 Data <- Spectrum_data * vector
974 } # Cropped_Spectrum
975 }
976
977
978 # angles computation
979 Angle <- c()
980 for (k in 1:n)
981 {
982 # The function is rms is periodic (period 2pi) and it seems that there is a phase
983 # x such that rms is unimodal (i.e. decreasing then increasing) on the interval
984 # [x; x+2pi]. However, if we do the optimization for example on [x-pi; x+pi],
985 # instead of being decreasing then increasing, it might be increasing then
986 # decreasing in which case optimize, thinking it is a valley will have to choose
987 # between the left or the right of this hill and if it chooses wrong, it will end
988 # up at like x-pi while the minimum is close to x+pi.
989
990 # Supposing that rms is unimodal, the classical 1D unimodal optimization will
991 # work in either [-pi;pi] or [0;2pi] (this is not easy to be convinced by that I
992 # agree) and we can check which one it is simply by the following trick
993
994 f0 <- rms(0, Data[k, ],meth = type.zopc)
995 fpi <- rms(pi, Data[k, ], meth = type.zopc)
996 if (f0 < fpi) {
997 interval <- c(-pi, pi)
998 } else {
999 interval <- c(0, 2 * pi)
1000 }
1001
1002 # graphs of rms criteria
1003 debug_plot <- F # rms should not plot anything now, only when called by optimize
1004 if (!is.null(plot_rms) && rnames[k] %in% plot_rms) {
1005 x <- seq(min(interval), max(interval), length.out = 100)
1006 y <- rep(1, 100)
1007 for (K in (1:100)) {
1008 y[K] <- rms(x[K], Data[k, ], meth = type.zopc)
1009 }
1010 if (createWindow == TRUE) {
1011 grDevices::dev.new(noRStudioGD = FALSE)
1012 }
1013 graphics::plot(x, y, main = paste("Criterion maximization \n",
1014 rownames(Data)[k]), ylim = c(0, 1.1),
1015 ylab = "positiveness criterion", xlab = "angle ")
1016 debug_plot <- T
1017 }
1018
1019 # Best angle
1020 best <- stats::optimize(rms, interval = interval, maximum = TRUE,
1021 y = Data[k,], meth = type.zopc)
1022 ang <- best[["maximum"]]
1023
1024
1025 if (debug_plot) {
1026 graphics::abline(v = ang, col = "black")
1027 graphics::text(x = (ang+0.1*ang), y = (y[ang]-0.1*y[ang]), labels = round(ang, 3))
1028 }
1029
1030 # Spectrum rotation
1031 Spectrum_data[k, ] <- Spectrum_data[k, ] * exp(complex(real = 0, imaginary = ang))
1032 Angle <- c(Angle, ang)
1033 }
1034
1035
1036
1037
1038 } else {
1039 # type.zopc is "manual" -------------------------------------------------------
1040 # if Angle is already specified and no optimisation is needed
1041 Angle <- angle
1042
1043 if (!is.vector(angle)) {
1044 stop("angle is not a vector")
1045 }
1046
1047 if (!is.numeric(angle)) {
1048 stop("angle is not a numeric")
1049 }
1050
1051 if (length(angle) != n) {
1052 stop(paste("angle has length", length(angle), "and there are", n, "spectra to rotate."))
1053 }
1054 for (k in 1:n) {
1055 Spectrum_data[k, ] <- Spectrum_data[k, ] * exp(complex(real = 0, imaginary = - angle[k]))
1056 }
1057 }
1058
1059
1060 # Draw spectra
1061 if (plot_spectra == TRUE) {
1062 nn <- ceiling(n/4)
1063 i <- 1
1064 for (k in 1:nn) {
1065 if (createWindow == TRUE) {
1066 grDevices::dev.new(noRStudioGD = FALSE)
1067 }
1068 graphics::par(mfrow = c(4, 2))
1069 while (i <= n) {
1070 last <- min(i + 4 - 1, n)
1071 graphics::plot(Re(Spectrum_data[i, ]), type = "l", ylab = "intensity",
1072 xlab = "Index", main = paste0(rownames(Spectrum_data)[i], " - Real part"))
1073 graphics::plot(Im(Spectrum_data[i, ]), type = "l", ylab = "intensity",
1074 xlab = "Index", main = paste0(rownames(Spectrum_data)[i], " - Imaginary part"))
1075 i <- i + 1
1076 }
1077 i <- last + 1
1078 }
1079 }
1080
1081
1082 # Data finalisation ----------------------------------------------
1083
1084 Spectrum_data <- endTreatment("ZeroOrderPhaseCorrection", begin_info, Spectrum_data)
1085 if (returnAngle) {
1086 return(list(Spectrum_data = Spectrum_data, Angle = Angle))
1087 } else {
1088 return(Spectrum_data)
1089 }
1090 }
1091
1092
1093 ## ====================================================
1094 # Baseline Correction
1095 ## ====================================================
1096 BaselineCorrection <- function(Spectrum_data, ptw.bc = TRUE, maxIter = 42,
1097 lambda.bc = 1e+07, p.bc = 0.05, eps = 1e-08,
1098 ppm.bc = TRUE, exclude.bc = list(c(5.1,4.5)),
1099 returnBaseline = F) {
1100
1101 # Data initialisation ----------------------------------------------
1102 begin_info <- beginTreatment("BaselineCorrection", Spectrum_data, force.real = T)
1103 Spectrum_data <- begin_info[["Signal_data"]]
1104 p <- p.bc
1105 lambda <- lambda.bc
1106 n <- dim(Spectrum_data)[1]
1107 m <- dim(Spectrum_data)[2]
1108
1109
1110 # Data check
1111 checkArg(ptw.bc, c("bool"))
1112 checkArg(maxIter, c("int", "pos"))
1113 checkArg(lambda, c("num", "pos0"))
1114 checkArg(p.bc, c("num", "pos0"))
1115 checkArg(eps, c("num", "pos0"))
1116 checkArg(returnBaseline, c("bool"))
1117 checkArg(ppm.bc, c("bool"))
1118 checkArg(unlist(exclude.bc), c("num"), can.be.null = TRUE)
1119
1120 # Define the interval where to search for (by defining Data)
1121 if (is.null(exclude.bc)) {
1122 exclude_index <- NULL
1123 } else {
1124 # if ppm.bc == TRUE, then exclude.bc is in the colnames values, else, in the column
1125 # index
1126 if (ppm.bc == TRUE) {
1127 colindex <- as.numeric(colnames(Spectrum_data))
1128 } else {
1129 colindex <- 1:m
1130 }
1131
1132 Int <- vector("list", length(exclude.bc))
1133 for (i in 1:length(exclude.bc)) {
1134 Int[[i]] <- indexInterval(colindex, from = exclude.bc[[i]][1],
1135 to = exclude.bc[[i]][2], inclusive = TRUE)
1136 }
1137 exclude_index <- unlist(Int)
1138 }
1139
1140 # Baseline Correction implementation definition ----------------------
1141
1142 # 2 Ways: either use the function asysm from the ptw package or by
1143 # built-in functions
1144 if (ptw.bc) {
1145 asysm <- ptw::asysm
1146 } else {
1147 difsmw <- function(y, lambda, w, d) {
1148 # Weighted smoothing with a finite difference penalty cf Eilers, 2003.
1149 # (A perfect smoother)
1150 # y: signal to be smoothed
1151 # lambda: smoothing parameter
1152 # w: weights (use0 zeros for missing values)
1153 # d: order of differences in penalty (generally 2)
1154 m <- length(y)
1155 W <- Matrix::Diagonal(x=w)
1156 E <- Matrix::Diagonal(m)
1157 D <- Matrix::diff(E, differences = d)
1158 C <- Matrix::chol(W + lambda * t(D) %*% D)
1159 x <- Matrix::solve(C, Matrix::solve(t(C), w * y))
1160 return(as.numeric(x))
1161
1162 }
1163 asysm <- function(y, lambda, p, eps, exclude_index) {
1164 # Baseline estimation with asymmetric least squares
1165 # y: signal
1166 # lambda: smoothing parameter (generally 1e5 to 1e8)
1167 # p: asymmetry parameter (generally 0.001)
1168 # d: order of differences in penalty (generally 2)
1169 # eps: 1e-8 in ptw package
1170 m <- length(y)
1171 w <- rep(1, m)
1172 i <- 1
1173 repeat {
1174 z <- difsmw(y, lambda, w, d = 2)
1175 w0 <- w
1176 p_vect <- rep((1-p), m) # if y <= z + eps
1177 p_vect[y > z + eps | y < 0] <- p # if y > z + eps | y < 0
1178 if(!is.null(exclude_index)){
1179 p_vect[exclude_index] <- 0 # if exclude area
1180 }
1181
1182 w <- p_vect
1183 # w <- p * (y > z + eps | y < 0) + (1 - p) * (y <= z + eps)
1184
1185 if (sum(abs(w - w0)) == 0) {
1186 break
1187 }
1188 i <- i + 1
1189 if (i > maxIter) {
1190 warning("cannot find Baseline estimation in asysm")
1191 break
1192 }
1193 }
1194 return(z)
1195 }
1196 }
1197
1198 # Baseline estimation ----------------------------------------------
1199 Baseline <- matrix(NA, nrow = nrow(Spectrum_data), ncol = ncol(Spectrum_data))
1200
1201 # for (k in 1:n) {
1202 # Baseline[k, ] <- asysm(y = Spectrum_data[k, ], lambda = lambda, p = p, eps = eps)
1203
1204 if (ptw.bc ){
1205 Baseline <- apply(Spectrum_data,1, asysm, lambda = lambda, p = p,
1206 eps = eps)
1207 }else {
1208 Baseline <- apply(Spectrum_data,1, asysm, lambda = lambda, p = p,
1209 eps = eps, exclude_index = exclude_index)
1210 }
1211
1212
1213 Spectrum_data <- Spectrum_data - t(Baseline)
1214 # }
1215
1216 # Data finalisation ----------------------------------------------
1217 Spectrum_data <- endTreatment("BaselineCorrection", begin_info, Spectrum_data) # FIXME create removeImaginary filter ??
1218
1219 if (returnBaseline) {
1220 return(list(Spectrum_data = Spectrum_data, Baseline = Baseline))
1221 } else {
1222 return(Spectrum_data)
1223 }
1224 }
1225
1226
1227
1228 ## ====================================================
1229 # NegativeValuesZeroing
1230 ## ====================================================
1231
1232 NegativeValuesZeroing <- function(Spectrum_data) {
1233 # Data initialisation and checks ----------------------------------------------
1234 begin_info <- beginTreatment("NegativeValuesZeroing", Spectrum_data, force.real = T)
1235 Spectrum_data <- begin_info[["Signal_data"]]
1236
1237 # NegativeValuesZeroing ----------------------------------------------
1238 Spectrum_data[Spectrum_data < 0] <- 0
1239
1240 # Data finalisation ----------------------------------------------
1241 return(endTreatment("NegativeValuesZeroing", begin_info, Spectrum_data))
1242 }
1243
1244