comparison NmrPreprocessing_script.R @ 2:5e64657b4fe5 draft

planemo upload for repository https://github.com/workflow4metabolomics/nmr_preprocessing commit 22ca8782d7c4c0211e13c95b425d4f29f53f995e
author lecorguille
date Wed, 28 Mar 2018 08:05:12 -0400
parents
children
comparison
equal deleted inserted replaced
1:cbea5e9fd0b4 2:5e64657b4fe5
1 ## ==========================
2 # Internal functions
3 ## ==========================
4
5 # beginTreatment
6 beginTreatment <- function(name, Signal_data = NULL, Signal_info = NULL,
7 force.real = FALSE) {
8
9 cat("Begin", name, "\n")
10
11
12 # Formatting the Signal_data and Signal_info -----------------------
13
14 vec <- is.vector(Signal_data)
15 if (vec) {
16 Signal_data <- vec2mat(Signal_data)
17 }
18 if (is.vector(Signal_info)) {
19 Signal_info <- vec2mat(Signal_info)
20 }
21 if (!is.null(Signal_data)) {
22 if (!is.matrix(Signal_data)) {
23 stop("Signal_data is not a matrix.")
24 }
25 if (!is.complex(Signal_data) && !is.numeric(Signal_data)) {
26 stop("Signal_data contains non-numerical values.")
27 }
28 }
29 if (!is.null(Signal_info) && !is.matrix(Signal_info)) {
30 stop("Signal_info is not a matrix.")
31 }
32
33
34 Original_data <- Signal_data
35
36 # Extract the real part of the spectrum ---------------------------
37
38 if (force.real) {
39 if (is.complex(Signal_data)) {
40 Signal_data <- Re(Signal_data)
41 } else {
42 # The signal is numeric Im(Signal_data) is zero anyway so let's avoid
43 # using complex(real=...,imaginary=0) which would give a complex signal
44 # in endTreatment()
45 force.real <- FALSE
46 }
47 }
48
49
50 # Return the formatted data and metadata entries --------------------
51
52 return(list(start = proc.time(), vec = vec, force.real = force.real,
53 Original_data = Original_data, Signal_data = Signal_data, Signal_info = Signal_info))
54 }
55
56 # endTreatment
57 endTreatment <- function(name, begin_info, Signal_data) {
58
59 # begin_info: object outputted from beginTreatment
60
61
62 # Formatting the entries and printing process time -----------------------
63 end_time <- proc.time() # record it as soon as possible
64 start_time <- begin_info[["start"]]
65 delta_time <- end_time - start_time
66 delta <- delta_time[]
67 cat("End", name, "\n")
68 cat("It lasted", round(delta["user.self"], 3), "s user time,", round(delta["sys.self"],3),
69 "s system time and", round(delta["elapsed"], 3), "s elapsed time.\n")
70
71
72 if (begin_info[["force.real"]]) {
73 # The imaginary part is left untouched
74 i <- complex(real = 0, imaginary = 1)
75 Signal_data <- Signal_data + i * Im(begin_info[["Original_data"]])
76 }
77
78 if (begin_info[["vec"]]) {
79 Signal_data <- Signal_data[1, ]
80 }
81
82 # Return the formatted data and metadata entries --------------------
83 return(Signal_data)
84 }
85
86 # checkArg
87 checkArg <- function(arg, checks, can.be.null=FALSE) {
88 check.list <- list(bool=c(is.logical, "a boolean"),
89 int =c(function(x){x%%1==0}, "an integer"),
90 num =c(is.numeric, "a numeric"),
91 str =c(is.character, "a string"),
92 pos =c(function(x){x>0}, "positive"),
93 pos0=c(function(x){x>=0}, "positive or zero"),
94 l1 =c(function(x){length(x)==1}, "of length 1")
95 )
96 if (is.null(arg)) {
97 if (!can.be.null) {
98 stop(deparse(substitute(arg)), " is null.")
99 }
100 } else {
101 if (is.matrix(arg)) {
102 stop(deparse(substitute(arg)), " is not scalar.")
103 }
104 for (c in checks) {
105 if (!check.list[[c]][[1]](arg)) {
106 stop(deparse(substitute(arg)), " is not ", check.list[[c]][[2]], ".")
107 }
108 }
109 }
110 }
111
112 # getArg
113 getArg <- function(arg, info, argname, can.be.absent=FALSE) {
114 if (is.null(arg)) {
115 start <- paste("impossible to get argument", argname, "it was not given directly and");
116 if (!is.matrix(info)) {
117 stop(paste(start, "the info matrix was not given"))
118 }
119 if (!(argname %in% colnames(info))) {
120 if (can.be.absent) {
121 return(NULL)
122 } else {
123 stop(paste(start, "is not in the info matrix"))
124 }
125 }
126 if (nrow(info) < 1) {
127 stop(paste(start, "the info matrix has no row"))
128 }
129 arg <- info[1,argname]
130 if (is.na(arg)) {
131 stop(paste(start, "it is NA in the info matrix"))
132 }
133 }
134 return(arg)
135 }
136
137 # binarySearch
138 binarySearch <- function(a, target, lower = TRUE) {
139 # search the index i in a such that a[i] == target
140 # if it doesn't exists and lower, it searches the closer a[i] such that a[i] < target
141 # if !lower, it seraches the closer a[i] such that a[i] > target
142 # a should be monotone but can be increasing or decreasing
143
144 # if a is increasing INVARIANT: a[amin] < target < a[amax]
145 N <- length(a)
146 if ((a[N] - target) * (a[N] - a[1]) <= 0) {
147 return(N)
148 }
149 if ((a[1] - target) * (a[N] - a[1]) >= 0) {
150 return(1)
151 }
152 amin <- 1
153 amax <- N
154 while (amin + 1 < amax) {
155 amid <- floor((amin + amax)/2)
156 if ((a[amid] - target) * (a[amax] - a[amid]) < 0) {
157 amin <- amid
158 } else if ((a[amid] - target) * (a[amax] - a[amid]) > 0) {
159 amax <- amid
160 } else {
161 # a[amid] == a[amax] or a[amid] == target In both cases, a[amid] ==
162 # target
163 return(amid)
164 }
165 }
166 if (xor(lower, a[amin] > a[amax])) {
167 # (lower && a[amin] < a[amax]) || (!lower && a[min] > a[max])
168 # If increasing and we want the lower, we take amin
169 # If decreasing and we want the bigger, we take amin too
170 return(amin)
171 } else {
172 return(amax)
173 }
174 }
175
176 # Interpol
177 Interpol <- function(t, y) {
178 # y: sample
179 # t : warping function
180
181 m <- length(y)
182 # t <= m-1
183 # because if t > m-1, y[ti+1] will be NA when we compute g
184 valid <- 1 <= t & t <= m-1 # FIXME it was '<' in Bubble v2
185 s <- (1:m)[valid]
186 ti <- floor(t[s])
187 tr <- t[s] - ti
188 g <- y[ti + 1] - y[ti]
189 f <- y[ti] + tr * g
190 list(f=f, s=s, g=g)
191 }
192
193 # vec2mat
194 vec2mat <- function(vec) {
195 return(matrix(vec, nrow = 1, dimnames = list(c(1), names(vec))))
196
197 }
198
199 # binarySearch
200 binarySearch <- function(a, target, lower = TRUE) {
201 # search the index i in a such that a[i] == target
202 # if it doesn't exists and lower, it searches the closer a[i] such that a[i] < target
203 # if !lower, it seraches the closer a[i] such that a[i] > target
204 # a should be monotone but can be increasing or decreasing
205
206 # if a is increasing INVARIANT: a[amin] < target < a[amax]
207 N <- length(a)
208 if ((a[N] - target) * (a[N] - a[1]) <= 0) {
209 return(N)
210 }
211 if ((a[1] - target) * (a[N] - a[1]) >= 0) {
212 return(1)
213 }
214 amin <- 1
215 amax <- N
216 while (amin + 1 < amax) {
217 amid <- floor((amin + amax)/2)
218 if ((a[amid] - target) * (a[amax] - a[amid]) < 0) {
219 amin <- amid
220 } else if ((a[amid] - target) * (a[amax] - a[amid]) > 0) {
221 amax <- amid
222 } else {
223 # a[amid] == a[amax] or a[amid] == target In both cases, a[amid] ==
224 # target
225 return(amid)
226 }
227 }
228 if (xor(lower, a[amin] > a[amax])) {
229 # (lower && a[amin] < a[amax]) || (!lower && a[min] > a[max])
230 # If increasing and we want the lower, we take amin
231 # If decreasing and we want the bigger, we take amin too
232 return(amin)
233 } else {
234 return(amax)
235 }
236 }
237
238
239 # indexInterval
240 indexInterval <- function (a, from, to, inclusive=TRUE) {
241 # If inclusive and from <= to, we need to take the lower
242 # If not inclusive and from > to, we need to take the lower too
243 lowerFrom <- xor(inclusive, from > to)
244 fromIndex <- binarySearch(a, from, lowerFrom)
245 toIndex <- binarySearch(a, to, !lowerFrom)
246 return(fromIndex:toIndex)
247 }
248
249
250
251 ## ==========================
252 # GroupDelayCorrection
253 ## ==========================
254 GroupDelayCorrection <- function(Fid_data, Fid_info = NULL, group_delay = NULL) {
255
256
257 # Data initialisation and checks ----------------------------------------------
258
259 begin_info <- beginTreatment("GroupDelayCorrection", Fid_data, Fid_info)
260 Fid_data <- begin_info[["Signal_data"]]
261 dimension_names <- dimnames(Fid_data)
262 Fid_info <- begin_info[["Signal_info"]]
263 checkArg(group_delay, c("num", "pos0"), can.be.null = TRUE)
264 # if Fid_info and group_delay are NULL, getArg will generate an error
265
266 group_delay <- getArg(group_delay, Fid_info, "GRPDLY", can.be.absent = TRUE)
267
268 if (is.null(group_delay)) {
269
270 # See DetermineBrukerDigitalFilter.m in matNMR MATLAB library
271 group_delay_matrix <- matrix(c(44.75, 46, 46.311, 33.5, 36.5, 36.53, 66.625,
272 48, 47.87, 59.0833, 50.1667, 50.229, 68.5625, 53.25, 53.289, 60.375,
273 69.5, 69.551, 69.5313, 72.25, 71.6, 61.0208, 70.1667, 70.184, 70.0156,
274 72.75, 72.138, 61.3438, 70.5, 70.528, 70.2578, 73, 72.348, 61.5052, 70.6667,
275 70.7, 70.3789, 72.5, 72.524, 61.5859, 71.3333, NA, 70.4395, 72.25, NA,
276 61.6263, 71.6667, NA, 70.4697, 72.125, NA, 61.6465, 71.8333, NA, 70.4849,
277 72.0625, NA, 61.6566, 71.9167, NA, 70.4924, 72.0313, NA), nrow = 21,
278 ncol = 3, byrow = TRUE, dimnames = list(c(2, 3, 4, 6, 8, 12, 16, 24,
279 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536, 2048),
280 c(10, 11, 12)))
281 decim <- Fid_info[1, "DECIM"]
282 dspfvs <- Fid_info[1, "DSPFVS"]
283 if (!(toString(decim) %in% rownames(group_delay_matrix))) {
284 stop(paste("Invalid DECIM", decim, "it should be one of", rownames(group_delay_matrix)))
285 }
286 if (!(toString(dspfvs) %in% colnames(group_delay_matrix))) {
287 stop(paste("Invalid DSPFVS", dspfvs, "it should be one of", colnames(group_delay_matrix)))
288 }
289 group_delay <- group_delay_matrix[toString(decim), toString(dspfvs)]
290 if (is.na(group_delay)) {
291 stop(paste("Invalid DECIM", decim, "for DSPFVS", dspfvs))
292 }
293 }
294 m <- ncol(Fid_data)
295 n <- nrow(Fid_data)
296
297 # GroupDelayCorrection ----------------------------------------------
298
299 # We do the shifting in the Fourier domain because the shift can be non-integer.
300 # That way we automatically have the circular behaviour of the shift and the
301 # interpolation if it is non-integer.
302
303 Spectrum <- t(stats::mvfft(t(Fid_data)))
304
305 # Spectrum <- FourierTransform(Fid_data, Fid_info)
306 p <- ceiling(m/2)
307 new_index <- c((p + 1):m, 1:p)
308 Spectrum <- Spectrum[,new_index]
309 Spectrum <- matrix(data = Spectrum, ncol = m, nrow = n)
310
311 Omega <- (0:(m - 1))/m
312 i <- complex(real = 0, imaginary = 1)
313
314 if (n>1) {
315 Spectrum <- sweep(Spectrum, MARGIN = 2, exp(i * group_delay * 2 * pi * Omega), `*`)
316 Spectrum <- Spectrum[,new_index]
317 }else {
318 Spectrum <- Spectrum* exp(i * group_delay * 2 * pi * Omega)
319 Spectrum <- Spectrum[new_index]
320 Spectrum <- matrix(data = Spectrum, ncol = m, nrow = n)
321 }
322
323
324 Fid_data <- t(stats::mvfft(t(Spectrum), inverse = TRUE))/m
325 colnames(Fid_data) <- dimension_names[[2]]
326 rownames(Fid_data) <- dimension_names[[1]]
327
328 # Data finalisation ----------------------------------------------
329
330 return(endTreatment("GroupDelayCorrection", begin_info, Fid_data))
331 }
332
333 ## ==========================
334 # SolventSuppression
335 ## ==========================
336 SolventSuppression <- function(Fid_data, lambda.ss = 1e+06, ptw.ss = TRUE,
337 plotSolvent = F, returnSolvent = F) {
338
339 # Data initialisation and checks ----------------------------------------------
340
341 begin_info <- beginTreatment("SolventSuppression", Fid_data)
342 Fid_data <- begin_info[["Signal_data"]]
343 checkArg(ptw.ss, c("bool"))
344 checkArg(lambda.ss, c("num", "pos0"))
345
346
347 # difsm function definition for the smoother -----------------------------------
348
349 if (ptw.ss) {
350 # Use of the function in ptw that smoothes signals with a finite difference
351 # penalty of order 2
352 difsm <- ptw::difsm
353 } else {
354 # Or manual implementation based on sparse matrices for large data series (cf.
355 # Eilers, 2003. 'A perfect smoother')
356 difsm <- function(y, d = 2, lambda) {
357
358 m <- length(y)
359 # Sparse identity matrix m x m
360 E <- Matrix::Diagonal(m)
361 D <- Matrix::diff(E, differences = d)
362 A <- E + lambda.ss * Matrix::t(D) %*% D
363 # base::chol does not take into account that A is sparse and is extremely slow
364 C <- Matrix::chol(A)
365 x <- Matrix::solve(C, Matrix::solve(Matrix::t(C), y))
366 return(as.numeric(x))
367 }
368 }
369
370 # Solvent Suppression ----------------------------------------------
371
372 n <- dim(Fid_data)[1]
373 if (returnSolvent) {
374 SolventRe <- Fid_data
375 SolventIm <- Fid_data
376 }
377 for (i in 1:n) {
378 FidRe <- Re(Fid_data[i, ])
379 FidIm <- Im(Fid_data[i, ])
380 solventRe <- difsm(y = FidRe, lambda = lambda.ss)
381 solventIm <- difsm(y = FidIm, lambda = lambda.ss)
382
383 if (plotSolvent) {
384 m <- length(FidRe)
385 graphics::plot(1:m, FidRe, type = "l", col = "red")
386 graphics::lines(1:m, solventRe, type = "l", col = "blue")
387 graphics::plot(1:m, FidIm, type = "l", col = "red")
388 graphics::lines(1:m, solventIm, type = "l", col = "blue")
389 }
390 FidRe <- FidRe - solventRe
391 FidIm <- FidIm - solventIm
392 Fid_data[i, ] <- complex(real = FidRe, imaginary = FidIm)
393 if (returnSolvent) {
394 SolventRe[i, ] <- solventRe
395 SolventIm[i, ] <- solventIm
396 }
397 }
398
399
400 # Data finalisation ----------------------------------------------
401
402 Fid_data <- endTreatment("SolventSuppression", begin_info, Fid_data)
403 if (returnSolvent) {
404 return(list(Fid_data = Fid_data, SolventRe = SolventRe, SolventIm = SolventIm))
405 } else {
406 return(Fid_data)
407 }
408 }
409
410
411 ## ==========================
412 # Apodization
413 # =============================
414 Apodization <- function(Fid_data, Fid_info = NULL, DT = NULL,
415 type.apod = c("exp","cos2", "blockexp", "blockcos2",
416 "gauss", "hanning", "hamming"), phase = 0, rectRatio = 1/2,
417 gaussLB = 1, expLB = 1, plotWindow = F, returnFactor = F) {
418
419 # Data initialisation and checks ----------------------------------------------
420 begin_info <- beginTreatment("Apodization", Fid_data, Fid_info)
421 Fid_data <- begin_info[["Signal_data"]]
422 Fid_info <- begin_info[["Signal_info"]]
423 # Data check
424 type.apod <- match.arg(type.apod)
425 checkArg(DT, c("num", "pos"), can.be.null = TRUE)
426 checkArg(phase, c("num"))
427
428 # Apodization ----------------------------------------------
429 DT <- getArg(DT, Fid_info, "DT") # Dwell Time
430 m <- ncol(Fid_data)
431 t <- (1:m) * DT # Time
432 rectSize <- ceiling(rectRatio * m)
433 gaussLB <- (gaussLB/(sqrt(8 * log(2))))
434 # Define the types of apodization:
435 switch(type.apod, exp = {
436 # exponential
437 Factor <- exp(-expLB * t)
438 }, cos2 = {
439 # cos^2
440 c <- cos((1:m) * pi/(2 * m) - phase * pi/2)
441 Factor <- c * c
442 }, blockexp = {
443 # block and exponential
444 Factor <- c(rep.int(1, rectSize), rep.int(0, m - rectSize))
445 # | rectSize | 1 ___________ | \ 0 \____
446 Factor[(rectSize + 1):m] <- exp(-expLB * t[1:(m - rectSize)])
447 }, blockcos2 = {
448 # block and cos^2
449 Factor <- c(rep.int(1, rectSize), rep.int(0, m - rectSize))
450 c <- cos((1:(m - rectSize)) * pi/(2 * (m - rectSize)))
451 Factor[(rectSize + 1):m] <- c * c
452 }, gauss = {
453 # gaussian
454 Factor <- exp(-(gaussLB * t)^2/2)
455 Factor <- Factor/max(Factor)
456 }, hanning = {
457 # Hanning
458 Factor <- 0.5 + 0.5 * cos((1:m) * pi/m - phase * pi)
459 }, hamming = {
460 # Hamming
461 Factor <- 0.54 + 0.46 * cos((1:m) * pi/m - phase * pi)
462 })
463 if (plotWindow) {
464 graphics::plot(1:m, Factor, "l")
465 # dev.off() # device independent, it is the responsability of the
466 # caller to do it
467 }
468 # Apply the apodization factor on the spectra
469 Fid_data <- sweep(Fid_data, MARGIN = 2, Factor, `*`)
470
471 # Data finalisation ----------------------------------------------
472 Fid_data <- endTreatment("Apodization", begin_info, Fid_data)
473 if (returnFactor) {
474 return(list(Fid_data = Fid_data, Factor = Factor))
475 } else {
476 return(Fid_data)
477 }
478 }
479
480
481 ## ====================================================
482 # FourierTransform
483 ## ====================================================
484
485
486 # fftshift1D2D
487 fftshift1D2D <- function(x) {
488 vec <- F
489 if (is.vector(x)) {
490 x <- vec2mat(x)
491 vec <- T
492 }
493 m <- dim(x)[2]
494 p <- ceiling(m/2)
495 new_index <- c((p + 1):m, 1:p)
496 y <- x[, new_index, drop = vec]
497 }
498
499 # FourierTransform
500 FourierTransform <- function(Fid_data, Fid_info = NULL, SW_h = NULL, SW = NULL, O1 = NULL, reverse.axis = TRUE) {
501
502 # Data initialisation and checks ----------------------------------------------
503 begin_info <- beginTreatment("FourierTransform", Fid_data, Fid_info)
504 Fid_data <- begin_info[["Signal_data"]]
505 Fid_info <- begin_info[["Signal_info"]]
506
507 m <- ncol(Fid_data)
508 n <- nrow(Fid_data)
509
510 if (is.null(SW_h)) {
511 SW_h <- getArg(SW_h, Fid_info, "SW_h")
512 }
513
514 if (is.null(SW)) {
515 SW <- getArg(SW, Fid_info, "SW") # Sweep Width in ppm (semi frequency scale in ppm)
516 }
517
518
519 if (is.null(O1)) {
520 O1 <- getArg(O1, Fid_info, "O1")
521 }
522
523
524 checkArg(reverse.axis, c("bool"))
525
526 # Fourier Transformation ----------------------------------------------
527 # mvfft does the unnormalized fourier transform (see ?mvfft), so we need divide
528 # by m. It does not matter a lot in our case since the spectrum will be
529 # normalized.
530
531 # FT
532 RawSpect_data <- fftshift1D2D(t(stats::mvfft(t(Fid_data))))
533 # recover the frequencies values
534 f <- ((0:(m - 1)) - floor(m/2)) * Fid_info[1, "SW_h"]/(m-1)
535
536 if(reverse.axis == TRUE) {
537 revind <- rev(1:m)
538 RawSpect_data <- RawSpect_data[,revind] # reverse the spectrum
539 }
540
541 RawSpect_data <- matrix(RawSpect_data, nrow = n, ncol = m)
542 colnames(RawSpect_data) <- f
543 rownames(RawSpect_data) <- rownames(Fid_data)
544
545 # PPM conversion ----------------------------------------------
546
547 # The Sweep Width has to be the same since the column names are the same
548
549 ppmInterval <- SW/(m-1)
550
551 O1index = round((m+1)/2+O1*(m - 1) / SW_h)
552
553 end <- O1index - m
554 start <- O1index -1
555 ppmScale <- (start:end) * ppmInterval
556 RawSpect_data <- matrix(RawSpect_data, nrow = n, ncol = -(end - start) + 1, dimnames =
557 list(rownames(RawSpect_data), ppmScale))
558
559
560 # Data finalisation ----------------------------------------------
561 return(endTreatment("FourierTransform", begin_info, RawSpect_data))
562 }
563
564 ## ====================================================
565 # InternalReferencing
566 ## ====================================================
567
568 InternalReferencing <- function(Spectrum_data, Fid_info, method = c("max", "thres"),
569 range = c("nearvalue", "all", "window"), ppm.value = 0,
570 direction = "left", shiftHandling = c("zerofilling", "cut",
571 "NAfilling", "circular"), c = 2, pc = 0.02, fromto.RC = NULL,
572 ppm.ir = TRUE, rowindex_graph = NULL) {
573
574
575
576 # Data initialisation and checks ----------------------------------------------
577
578 begin_info <- beginTreatment("InternalReferencing", Spectrum_data, Fid_info)
579 Spectrum_data <- begin_info[["Signal_data"]]
580 Fid_info <- begin_info[["Signal_info"]]
581
582
583 # Check input arguments
584 range <- match.arg(range)
585 shiftHandling <- match.arg(shiftHandling)
586 method <- match.arg(method)
587 plots <- NULL
588
589
590 checkArg(ppm.ir, c("bool"))
591 checkArg(unlist(fromto.RC), c("num"), can.be.null = TRUE)
592 checkArg(pc, c("num"))
593 checkArg(ppm.value, c("num"))
594 checkArg(rowindex_graph, "num", can.be.null = TRUE)
595
596 # fromto.RC
597 if (!is.null(fromto.RC)) {
598 diff <- diff(unlist(fromto.RC))[1:length(diff(unlist(fromto.RC)))%%2 !=0]
599 for (i in 1:length(diff)) {
600 if (diff[i] >= 0) {
601 fromto <- c(fromto.RC[[i]][2], fromto.RC[[i]][1])
602 fromto.RC[[i]] <- fromto
603 }
604 }
605 }
606
607
608 # findTMSPpeak function ----------------------------------------------
609 findTMSPpeak <- function(ft, c = 2, direction = "left") {
610 ft <- Re(ft) # extraction de la partie réelle
611 N <- length(ft)
612 if (direction == "left") {
613 newindex <- rev(1:N)
614 ft <- rev(ft)
615 }
616 thres <- 99999
617 i <- 1000 # Start at point 1000 to find the peak
618 vect <- ft[1:i]
619
620 while (vect[i] <= (c * thres)) {
621 cumsd <- stats::sd(vect)
622 cummean <- mean(vect)
623 thres <- cummean + 3 * cumsd
624 i <- i + 1
625 vect <- ft[1:i]
626 }
627 if (direction == "left") {
628 v <- newindex[i]
629 } else {v <- i}
630
631 if (is.na(v)) {
632 warning("No peak found, need to lower the threshold.")
633 return(NA)
634 } else {
635 # recherche dans les 1% de points suivants du max trouve pour etre au sommet du
636 # pic
637 d <- which.max(ft[v:(v + N * 0.01)])
638 new.peak <- v + d - 1 # nouveau pic du TMSP si d > 0
639
640 if (names(which.max(ft[v:(v + N * 0.01)])) != names(which.max(ft[v:(v + N * 0.03)]))) {
641 # recherche dans les 3% de points suivants du max trouve pour eviter un faux
642 # positif
643 warning("the TMSP peak might be located further away, increase the threshold to check.")
644 }
645 return(new.peak)
646 }
647 }
648
649
650 # Apply the method ('thres' or 'max') on spectra
651 # ----------------------------------------------
652
653 n <- nrow(Spectrum_data)
654 m <- ncol(Spectrum_data)
655
656 # The Sweep Width has to be the same since the column names are the same
657 SW <- Fid_info[1, "SW"] # Sweep Width in ppm (semi frequency scale in ppm)
658 ppmInterval <- SW/(m-1)
659
660 if (range == "all") {
661 Data <- Spectrum_data
662 } else { # range = "nearvalue" or "window"
663
664 if (range == "nearvalue") {
665 fromto.RC <- list(c(-(SW * pc)/2 + ppm.value, (SW * pc)/2 + ppm.value)) # automatic fromto values in ppm
666 colindex <- as.numeric(colnames(Spectrum_data))
667 } else {
668 if (ppm.ir == TRUE) {
669 colindex <- as.numeric(colnames(Spectrum_data))
670 } else {
671 colindex <- 1:m
672 }
673 }
674
675
676 Int <- vector("list", length(fromto.RC))
677 for (i in 1:length(fromto.RC)) {
678 Int[[i]] <- indexInterval(colindex, from = fromto.RC[[i]][1],
679 to = fromto.RC[[i]][2], inclusive = TRUE)
680 }
681
682 vector <- rep(0, m)
683 vector[unlist(Int)] <- 1
684 if (n > 1) {
685 Data <- sweep(Spectrum_data, MARGIN = 2, FUN = "*", vector) # Cropped_Spectrum
686 } else {
687 Data <- Spectrum_data * vector
688 } # Cropped_Spectrum
689 }
690
691
692 if (method == "thres") {
693 TMSPpeaks <- apply(Data, 1, findTMSPpeak, c = c, direction = direction)
694 } else {
695 TMSPpeaks <- apply(abs(Re(Data)), 1, which.max)
696 }
697
698 # TMSPpeaks is an column index
699 maxpeak <- max(TMSPpeaks)
700 minpeak <- min(TMSPpeaks)
701
702
703
704 # Shift spectra according to the TMSPpeaks found --------------------------------
705 # Depends on the shiftHandling
706
707 if (shiftHandling %in% c("zerofilling", "NAfilling", "cut")) {
708 fill <- NA
709 if (shiftHandling == "zerofilling") {
710 fill <- 0
711 }
712
713 start <- maxpeak - 1
714 end <- minpeak - m
715
716 ppmScale <- (start:end) * ppmInterval
717
718 # check if ppm.value is in the ppmScale interval
719 if(ppm.value < min(ppmScale) | ppm.value > max(ppmScale)) {
720 warning("ppm.value = ", ppm.value, " is not in the ppm interval [",
721 round(min(ppmScale),2), ",", round(max(ppmScale),2), "], and is set to its default ppm.value 0")
722 ppm.value = 0
723 }
724
725 ppmScale <- ppmScale + ppm.value
726
727 Spectrum_data_calib <- matrix(fill, nrow = n, ncol = -(end - start) + 1,
728 dimnames = list(rownames(Spectrum_data), ppmScale))
729 for (i in 1:n) {
730 shift <- (1 - TMSPpeaks[i]) + start
731 Spectrum_data_calib[i, (1 + shift):(m + shift)] <- Spectrum_data[i, ]
732 }
733
734 if (shiftHandling == "cut") {
735 Spectrum_data_calib = as.matrix(stats::na.omit(t(Spectrum_data_calib)))
736 Spectrum_data_calib = t(Spectrum_data_calib)
737 base::attr(Spectrum_data_calib, "na.action") <- NULL
738 }
739
740
741 } else {
742 # circular
743 start <- 1 - maxpeak
744 end <- m - maxpeak
745
746 ppmScale <- (start:end) * ppmInterval
747
748 # check if ppm.value in is the ppmScale interval
749 if(ppm.value < min(ppmScale) | ppm.value > max(ppmScale)) {
750 warning("ppm.value = ", ppm.value, " is not in the ppm interval [",
751 round(min(ppmScale),2), ",", round(max(ppmScale),2), "], and is set to its default ppm.value 0")
752 ppm.value = 0
753 }
754 ppmScale <- ppmScale + ppm.value
755
756 Spectrum_data_calib <- matrix(nrow=n, ncol=end-start+1,
757 dimnames=list(rownames(Spectrum_data), ppmScale))
758 for (i in 1:n) {
759 shift <- (maxpeak-TMSPpeaks[i])
760 Spectrum_data_calib[i,(1+shift):m] <- Spectrum_data[i,1:(m-shift)]
761 if (shift > 0) {
762 Spectrum_data_calib[i,1:shift] <- Spectrum_data[i,(m-shift+1):m]
763 }
764 }
765 }
766
767
768
769
770 # Plot of the spectra ---------------------------------------------------
771
772 ppm = xstart = value = xend = Legend = NULL # only for R CMD check
773
774
775 # with the search zone for TMSP and the location of the peaks just found
776 if (!is.null(rowindex_graph)) {
777
778 if (range == "window") {
779 if (ppm.ir == TRUE) {
780 fromto <- fromto.RC
781 } else {
782 fromto <- list()
783 idcol <- as.numeric(colnames(Spectrum_data))
784 for (i in 1:length(fromto.RC)) {
785 fromto[[i]] <- as.numeric(colnames(Spectrum_data))[fromto.RC[[i]]]
786 }
787 }
788 } else {
789 fromto <- fromto.RC
790 }
791
792 # TMSPloc in ppm
793 TMSPloc <- as.numeric(colnames(Spectrum_data))[TMSPpeaks[rowindex_graph]]
794
795 # num plot per window
796 num.stacked <- 6
797
798 # rectanglar bands of color for the search zone
799 rects <- data.frame(xstart = sapply(fromto, function(x) x[[1]]),
800 xend = sapply(fromto, function(x) x[[2]]),
801 Legend = "TMSP search zone and location")
802
803 # vlines for TMSP peak
804 addlines <- data.frame(rowname = rownames(Spectrum_data)[rowindex_graph],TMSPloc)
805
806 nn <- length(rowindex_graph)
807 i <- 1
808 j <- 1
809 plots <- vector(mode = "list", length = ceiling(nn/num.stacked))
810
811 while (i <= nn) {
812
813 last <- min(i + num.stacked - 1, nn)
814
815 melted <- reshape2::melt(Re(Spectrum_data[i:last, ]),
816 varnames = c("rowname", "ppm"))
817
818 plots[[j]] <- ggplot2::ggplot() + ggplot2::theme_bw() +
819 ggplot2::geom_line(data = melted,
820 ggplot2::aes(x = ppm, y = value)) +
821 ggplot2::geom_rect(data = rects, ggplot2::aes(xmin = xstart, xmax = xend,
822 ymin = -Inf, ymax = Inf, fill = Legend), alpha = 0.4) +
823 ggplot2::facet_grid(rowname ~ ., scales = "free_y") +
824 ggplot2::theme(legend.position = "none") +
825 ggplot2::geom_vline(data = addlines, ggplot2::aes(xintercept = TMSPloc),
826 color = "red", show.legend = TRUE) +
827 ggplot2::ggtitle("TMSP peak search zone and location") +
828 ggplot2::theme(legend.position = "top", legend.text = ggplot2::element_text())
829
830
831
832 if ((melted[1, "ppm"] - melted[(dim(melted)[1]), "ppm"]) > 0) {
833 plots[[j]] <- plots[[j]] + ggplot2::scale_x_reverse()
834 }
835
836 i <- last + 1
837 j <- j + 1
838 }
839
840 plots
841 }
842
843
844 # Return the results ----------------------------------------------
845 Spectrum_data <- endTreatment("InternalReferencing", begin_info, Spectrum_data_calib)
846
847 if (is.null(plots)) {
848 return(Spectrum_data)
849 } else {
850 return(list(Spectrum_data = Spectrum_data, plots = plots))
851 }
852
853 }
854
855 ## ====================================================
856 # ZeroOrderPhaseCorrection
857 ## ====================================================
858
859 ZeroOrderPhaseCorrection <- function(Spectrum_data, type.zopc = c("rms", "manual", "max"),
860 plot_rms = NULL, returnAngle = FALSE, createWindow = TRUE,
861 angle = NULL, plot_spectra = FALSE,
862 ppm.zopc = TRUE, exclude.zopc = list(c(5.1,4.5))) {
863
864
865 # Data initialisation and checks ----------------------------------------------
866
867 # Entry arguments definition:
868 # plot_rms : graph of rms criterion returnAngle : if TRUE, returns avector of
869 # optimal angles createWindow : for plot_rms plots angle : If angle is not NULL,
870 # spectra are rotated according to the angle vector values
871 # plot_spectra : if TRUE, plot rotated spectra
872
873
874
875 begin_info <- beginTreatment("ZeroOrderPhaseCorrection", Spectrum_data)
876 Spectrum_data <- begin_info[["Signal_data"]]
877 n <- nrow(Spectrum_data)
878 m <- ncol(Spectrum_data)
879
880 rnames <- rownames(Spectrum_data)
881
882 # Check input arguments
883 type.zopc <- match.arg(type.zopc)
884 checkArg(ppm.zopc, c("bool"))
885 checkArg(unlist(exclude.zopc), c("num"), can.be.null = TRUE)
886
887
888 # type.zopc in c("max", "rms") -----------------------------------------
889 if (type.zopc %in% c("max", "rms")) {
890 # angle is found by optimization
891
892 # rms function to be optimised
893 rms <- function(ang, y, meth = c("max", "rms")) {
894 # if (debug_plot) { graphics::abline(v=ang, col='gray60') }
895 roty <- y * exp(complex(real = 0, imaginary = ang)) # spectrum rotation
896 Rey <- Re(roty)
897
898 if (meth == "rms") {
899 ReyPos <- Rey[Rey >= 0] # select positive intensities
900 POSss <- sum((ReyPos)^2, na.rm = TRUE) # SS for positive intensities
901 ss <- sum((Rey)^2, na.rm = TRUE) # SS for all intensities
902 return(POSss/ss) # criterion : SS for positive values / SS for all intensities
903 } else {
904 maxi <- max(Rey, na.rm = TRUE)
905 return(maxi)
906 }
907 }
908
909
910 # Define the interval where to search for (by defining Data)
911 if (is.null(exclude.zopc)) {
912 Data <- Spectrum_data
913 } else {
914
915 # if ppm.zopc == TRUE, then exclude.zopc is in the colnames values, else, in the column
916 # index
917 if (ppm.zopc == TRUE) {
918 colindex <- as.numeric(colnames(Spectrum_data))
919 } else {
920 colindex <- 1:m
921 }
922
923 # Second check for the argument exclude.zopc
924 diff <- diff(unlist(exclude.zopc))[1:length(diff(unlist(exclude.zopc)))%%2 !=0]
925 for (i in 1:length(diff)) {
926 if (ppm.zopc == TRUE & diff[i] >= 0) {
927 stop(paste("Invalid region removal because from <= to in ppm.zopc"))
928 } else if (ppm.zopc == FALSE & diff[i] <= 0) {stop(paste("Invalid region removal because from >= to in column index"))}
929 }
930
931
932 Int <- vector("list", length(exclude.zopc))
933 for (i in 1:length(exclude.zopc)) {
934 Int[[i]] <- indexInterval(colindex, from = exclude.zopc[[i]][1],
935 to = exclude.zopc[[i]][2], inclusive = TRUE)
936 }
937
938 vector <- rep(1, m)
939 vector[unlist(Int)] <- 0
940 if (n > 1) {
941 Data <- sweep(Spectrum_data, MARGIN = 2, FUN = "*", vector) # Cropped_Spectrum
942 } else {
943 Data <- Spectrum_data * vector
944 } # Cropped_Spectrum
945 }
946
947
948 # angles computation
949 Angle <- c()
950 for (k in 1:n)
951 {
952 # The function is rms is periodic (period 2pi) and it seems that there is a phase
953 # x such that rms is unimodal (i.e. decreasing then increasing) on the interval
954 # [x; x+2pi]. However, if we do the optimization for example on [x-pi; x+pi],
955 # instead of being decreasing then increasing, it might be increasing then
956 # decreasing in which case optimize, thinking it is a valley will have to choose
957 # between the left or the right of this hill and if it chooses wrong, it will end
958 # up at like x-pi while the minimum is close to x+pi.
959
960 # Supposing that rms is unimodal, the classical 1D unimodal optimization will
961 # work in either [-pi;pi] or [0;2pi] (this is not easy to be convinced by that I
962 # agree) and we can check which one it is simply by the following trick
963
964 f0 <- rms(0, Data[k, ],meth = type.zopc)
965 fpi <- rms(pi, Data[k, ], meth = type.zopc)
966 if (f0 < fpi) {
967 interval <- c(-pi, pi)
968 } else {
969 interval <- c(0, 2 * pi)
970 }
971
972 # graphs of rms criteria
973 debug_plot <- F # rms should not plot anything now, only when called by optimize
974 if (!is.null(plot_rms) && rnames[k] %in% plot_rms) {
975 x <- seq(min(interval), max(interval), length.out = 100)
976 y <- rep(1, 100)
977 for (K in (1:100)) {
978 y[K] <- rms(x[K], Data[k, ], meth = type.zopc)
979 }
980 if (createWindow == TRUE) {
981 grDevices::dev.new(noRStudioGD = FALSE)
982 }
983 graphics::plot(x, y, main = paste("Criterion maximization \n",
984 rownames(Data)[k]), ylim = c(0, 1.1),
985 ylab = "positiveness criterion", xlab = "angle ")
986 debug_plot <- T
987 }
988
989 # Best angle
990 best <- stats::optimize(rms, interval = interval, maximum = TRUE,
991 y = Data[k,], meth = type.zopc)
992 ang <- best[["maximum"]]
993
994
995 if (debug_plot) {
996 graphics::abline(v = ang, col = "black")
997 graphics::text(x = (ang+0.1*ang), y = (y[ang]-0.1*y[ang]), labels = round(ang, 3))
998 }
999
1000 # Spectrum rotation
1001 Spectrum_data[k, ] <- Spectrum_data[k, ] * exp(complex(real = 0, imaginary = ang))
1002 Angle <- c(Angle, ang)
1003 }
1004
1005
1006
1007
1008 } else {
1009 # type.zopc is "manual" -------------------------------------------------------
1010 # if Angle is already specified and no optimisation is needed
1011 Angle <- angle
1012
1013 if (!is.vector(angle)) {
1014 stop("angle is not a vector")
1015 }
1016
1017 if (!is.numeric(angle)) {
1018 stop("angle is not a numeric")
1019 }
1020
1021 if (length(angle) != n) {
1022 stop(paste("angle has length", length(angle), "and there are", n, "spectra to rotate."))
1023 }
1024 for (k in 1:n) {
1025 Spectrum_data[k, ] <- Spectrum_data[k, ] * exp(complex(real = 0, imaginary = - angle[k]))
1026 }
1027 }
1028
1029
1030 # Draw spectra
1031 if (plot_spectra == TRUE) {
1032 nn <- ceiling(n/4)
1033 i <- 1
1034 for (k in 1:nn) {
1035 if (createWindow == TRUE) {
1036 grDevices::dev.new(noRStudioGD = FALSE)
1037 }
1038 graphics::par(mfrow = c(4, 2))
1039 while (i <= n) {
1040 last <- min(i + 4 - 1, n)
1041 graphics::plot(Re(Spectrum_data[i, ]), type = "l", ylab = "intensity",
1042 xlab = "Index", main = paste0(rownames(Spectrum_data)[i], " - Real part"))
1043 graphics::plot(Im(Spectrum_data[i, ]), type = "l", ylab = "intensity",
1044 xlab = "Index", main = paste0(rownames(Spectrum_data)[i], " - Imaginary part"))
1045 i <- i + 1
1046 }
1047 i <- last + 1
1048 }
1049 }
1050
1051
1052 # Data finalisation ----------------------------------------------
1053
1054 Spectrum_data <- endTreatment("ZeroOrderPhaseCorrection", begin_info, Spectrum_data)
1055 if (returnAngle) {
1056 return(list(Spectrum_data = Spectrum_data, Angle = Angle))
1057 } else {
1058 return(Spectrum_data)
1059 }
1060 }
1061
1062
1063 ## ====================================================
1064 # Baseline Correction
1065 ## ====================================================
1066 BaselineCorrection <- function(Spectrum_data, ptw.bc = TRUE, maxIter = 42,
1067 lambda.bc = 1e+07, p.bc = 0.05, eps = 1e-08,
1068 ppm.bc = TRUE, exclude.bc = list(c(5.1,4.5)),
1069 returnBaseline = F) {
1070
1071 # Data initialisation ----------------------------------------------
1072 begin_info <- beginTreatment("BaselineCorrection", Spectrum_data, force.real = T)
1073 Spectrum_data <- begin_info[["Signal_data"]]
1074 p <- p.bc
1075 lambda <- lambda.bc
1076 n <- dim(Spectrum_data)[1]
1077 m <- dim(Spectrum_data)[2]
1078
1079
1080 # Data check
1081 checkArg(ptw.bc, c("bool"))
1082 checkArg(maxIter, c("int", "pos"))
1083 checkArg(lambda, c("num", "pos0"))
1084 checkArg(p.bc, c("num", "pos0"))
1085 checkArg(eps, c("num", "pos0"))
1086 checkArg(returnBaseline, c("bool"))
1087 checkArg(ppm.bc, c("bool"))
1088 checkArg(unlist(exclude.bc), c("num"), can.be.null = TRUE)
1089
1090 # Define the interval where to search for (by defining Data)
1091 if (is.null(exclude.bc)) {
1092 exclude_index <- NULL
1093 } else {
1094 # if ppm.bc == TRUE, then exclude.bc is in the colnames values, else, in the column
1095 # index
1096 if (ppm.bc == TRUE) {
1097 colindex <- as.numeric(colnames(Spectrum_data))
1098 } else {
1099 colindex <- 1:m
1100 }
1101
1102 Int <- vector("list", length(exclude.bc))
1103 for (i in 1:length(exclude.bc)) {
1104 Int[[i]] <- indexInterval(colindex, from = exclude.bc[[i]][1],
1105 to = exclude.bc[[i]][2], inclusive = TRUE)
1106 }
1107 exclude_index <- unlist(Int)
1108 }
1109
1110 # Baseline Correction implementation definition ----------------------
1111
1112 # 2 Ways: either use the function asysm from the ptw package or by
1113 # built-in functions
1114 if (ptw.bc) {
1115 asysm <- ptw::asysm
1116 } else {
1117 difsmw <- function(y, lambda, w, d) {
1118 # Weighted smoothing with a finite difference penalty cf Eilers, 2003.
1119 # (A perfect smoother)
1120 # y: signal to be smoothed
1121 # lambda: smoothing parameter
1122 # w: weights (use0 zeros for missing values)
1123 # d: order of differences in penalty (generally 2)
1124 m <- length(y)
1125 W <- Matrix::Diagonal(x=w)
1126 E <- Matrix::Diagonal(m)
1127 D <- Matrix::diff(E, differences = d)
1128 C <- Matrix::chol(W + lambda * t(D) %*% D)
1129 x <- Matrix::solve(C, Matrix::solve(t(C), w * y))
1130 return(as.numeric(x))
1131
1132 }
1133 asysm <- function(y, lambda, p, eps, exclude_index) {
1134 # Baseline estimation with asymmetric least squares
1135 # y: signal
1136 # lambda: smoothing parameter (generally 1e5 to 1e8)
1137 # p: asymmetry parameter (generally 0.001)
1138 # d: order of differences in penalty (generally 2)
1139 # eps: 1e-8 in ptw package
1140 m <- length(y)
1141 w <- rep(1, m)
1142 i <- 1
1143 repeat {
1144 z <- difsmw(y, lambda, w, d = 2)
1145 w0 <- w
1146 p_vect <- rep((1-p), m) # if y <= z + eps
1147 p_vect[y > z + eps | y < 0] <- p # if y > z + eps | y < 0
1148 if(!is.null(exclude_index)){
1149 p_vect[exclude_index] <- 0 # if exclude area
1150 }
1151
1152 w <- p_vect
1153 # w <- p * (y > z + eps | y < 0) + (1 - p) * (y <= z + eps)
1154
1155 if (sum(abs(w - w0)) == 0) {
1156 break
1157 }
1158 i <- i + 1
1159 if (i > maxIter) {
1160 warning("cannot find Baseline estimation in asysm")
1161 break
1162 }
1163 }
1164 return(z)
1165 }
1166 }
1167
1168 # Baseline estimation ----------------------------------------------
1169 Baseline <- matrix(NA, nrow = nrow(Spectrum_data), ncol = ncol(Spectrum_data))
1170
1171 # for (k in 1:n) {
1172 # Baseline[k, ] <- asysm(y = Spectrum_data[k, ], lambda = lambda, p = p, eps = eps)
1173
1174 if (ptw.bc ){
1175 Baseline <- apply(Spectrum_data,1, asysm, lambda = lambda, p = p,
1176 eps = eps)
1177 }else {
1178 Baseline <- apply(Spectrum_data,1, asysm, lambda = lambda, p = p,
1179 eps = eps, exclude_index = exclude_index)
1180 }
1181
1182
1183 Spectrum_data <- Spectrum_data - t(Baseline)
1184 # }
1185
1186 # Data finalisation ----------------------------------------------
1187 Spectrum_data <- endTreatment("BaselineCorrection", begin_info, Spectrum_data) # FIXME create removeImaginary filter ??
1188
1189 if (returnBaseline) {
1190 return(list(Spectrum_data = Spectrum_data, Baseline = Baseline))
1191 } else {
1192 return(Spectrum_data)
1193 }
1194 }
1195
1196
1197
1198 ## ====================================================
1199 # NegativeValuesZeroing
1200 ## ====================================================
1201
1202 NegativeValuesZeroing <- function(Spectrum_data) {
1203 # Data initialisation and checks ----------------------------------------------
1204 begin_info <- beginTreatment("NegativeValuesZeroing", Spectrum_data, force.real = T)
1205 Spectrum_data <- begin_info[["Signal_data"]]
1206
1207 # NegativeValuesZeroing ----------------------------------------------
1208 Spectrum_data[Spectrum_data < 0] <- 0
1209
1210 # Data finalisation ----------------------------------------------
1211 return(endTreatment("NegativeValuesZeroing", begin_info, Spectrum_data))
1212 }
1213
1214