Mercurial > repos > marie-tremblay-metatoul > nmr_preprocessing

## ==========================
# Internal functions
## ==========================

# beginTreatment
beginTreatment <- function(name, Signal_data = NULL, Signal_info = NULL,
                           force.real = FALSE) {

  cat("Begin", name, "\n")


  # Formatting the Signal_data and Signal_info -----------------------

  vec <- is.vector(Signal_data)
  if (vec) {
    Signal_data <- vec2mat(Signal_data)
  }
  if (is.vector(Signal_info)) {
    Signal_info <- vec2mat(Signal_info)
  }
  if (!is.null(Signal_data)) {
    if (!is.matrix(Signal_data)) {
      stop("Signal_data is not a matrix.")
    }
    if (!is.complex(Signal_data) && !is.numeric(Signal_data)) {
      stop("Signal_data contains non-numerical values.")
    }
  }
  if (!is.null(Signal_info) && !is.matrix(Signal_info)) {
    stop("Signal_info is not a matrix.")
  }


  Original_data <- Signal_data

  # Extract the real part of the spectrum ---------------------------

  if (force.real) {
    if (is.complex(Signal_data)) {
      Signal_data <- Re(Signal_data)
    } else {
      # The signal is numeric Im(Signal_data) is zero anyway so let's avoid
      # using complex(real=...,imaginary=0) which would give a complex signal
      # in endTreatment()
      force.real <- FALSE
    }
  }


  # Return the formatted data and metadata entries --------------------

  return(list(start = proc.time(), vec = vec, force.real = force.real,
              Original_data = Original_data, Signal_data = Signal_data, Signal_info = Signal_info))
}

# endTreatment
endTreatment <- function(name, begin_info, Signal_data) {

  # begin_info: object outputted from beginTreatment


  # Formatting the entries and printing process time -----------------------
  end_time <- proc.time()  # record it as soon as possible
  start_time <- begin_info[["start"]]
  delta_time <- end_time - start_time
  delta <- delta_time[]
  cat("End", name, "\n")
  cat("It lasted", round(delta["user.self"], 3), "s user time,", round(delta["sys.self"],3),
      "s system time and", round(delta["elapsed"], 3), "s elapsed time.\n")


  if (begin_info[["force.real"]]) {
    # The imaginary part is left untouched
    i <- complex(real = 0, imaginary = 1)
    Signal_data <- Signal_data + i * Im(begin_info[["Original_data"]])
  }

  if (begin_info[["vec"]]) {
    Signal_data <- Signal_data[1, ]
  }

  # Return the formatted data and metadata entries --------------------
  return(Signal_data)
}

# checkArg
checkArg <- function(arg, checks, can.be.null=FALSE) {
  check.list <- list(bool=c(is.logical, "a boolean"),
                     int =c(function(x){x%%1==0}, "an integer"),
                     num =c(is.numeric, "a numeric"),
                     str =c(is.character, "a string"),
                     pos =c(function(x){x>0}, "positive"),
                     pos0=c(function(x){x>=0}, "positive or zero"),
                     l1 =c(function(x){length(x)==1}, "of length 1")
  )
  if (is.null(arg)) {
    if (!can.be.null) {
      stop(deparse(substitute(arg)), " is null.")
    }
  } else {
    if (is.matrix(arg)) {
      stop(deparse(substitute(arg)), " is not scalar.")
    }
    for (c in checks) {
      if (!check.list[[c]][[1]](arg)) {
        stop(deparse(substitute(arg)), " is not ", check.list[[c]][[2]], ".")
      }
    }
  }
}

# getArg
getArg <- function(arg, info, argname, can.be.absent=FALSE) {
  if (is.null(arg)) {
    start <- paste("impossible to get argument", argname, "it was not given directly and");
    if (!is.matrix(info)) {
      stop(paste(start, "the info matrix was not given"))
    }
    if (!(argname %in% colnames(info))) {
      if (can.be.absent) {
        return(NULL)
      } else {
        stop(paste(start, "is not in the info matrix"))
      }
    }
    if (nrow(info) < 1) {
      stop(paste(start, "the info matrix has no row"))
    }
    arg <- info[1,argname]
    if (is.na(arg)) {
      stop(paste(start, "it is NA in the info matrix"))
    }
  }
  return(arg)
}

# binarySearch
binarySearch <- function(a, target, lower = TRUE) {
  # search the index i in a such that a[i] == target
  # if it doesn't exists and lower, it searches the closer a[i] such that a[i] < target
  # if !lower, it seraches the closer a[i] such that a[i] > target
  # a should be monotone but can be increasing or decreasing

  # if a is increasing INVARIANT: a[amin] < target < a[amax]
  N <- length(a)
  if ((a[N] - target) * (a[N] - a[1]) <= 0) {
    return(N)
  }
  if ((a[1] - target) * (a[N] - a[1]) >= 0) {
    return(1)
  }
  amin <- 1
  amax <- N
  while (amin + 1 < amax) {
    amid <- floor((amin + amax)/2)
    if ((a[amid] - target) * (a[amax] - a[amid]) < 0) {
      amin <- amid
    } else if ((a[amid] - target) * (a[amax] - a[amid]) > 0) {
      amax <- amid
    } else {
      # a[amid] == a[amax] or a[amid] == target In both cases, a[amid] ==
      # target
      return(amid)
    }
  }
  if (xor(lower, a[amin] > a[amax])) {
    # (lower && a[amin] < a[amax]) || (!lower && a[min] > a[max])
    # If increasing and we want the lower, we take amin
    # If decreasing and we want the bigger, we take amin too
    return(amin)
  } else {
    return(amax)
  }
}

# Interpol
Interpol <- function(t, y) {
  # y: sample
  # t : warping function

  m <- length(y)
  # t <= m-1
  # because if t > m-1, y[ti+1] will be NA when we compute g
  valid <- 1 <= t & t <= m-1 # FIXME it was '<' in Bubble v2
  s <- (1:m)[valid]
  ti <- floor(t[s])
  tr <- t[s] - ti
  g <- y[ti + 1] - y[ti]
  f <- y[ti] + tr * g
  list(f=f, s=s, g=g)
}

# vec2mat
vec2mat <- function(vec) {
  return(matrix(vec, nrow = 1, dimnames = list(c(1), names(vec))))

}

# binarySearch
binarySearch <- function(a, target, lower = TRUE) {
  # search the index i in a such that a[i] == target
  # if it doesn't exists and lower, it searches the closer a[i] such that a[i] < target
  # if !lower, it seraches the closer a[i] such that a[i] > target
  # a should be monotone but can be increasing or decreasing

  # if a is increasing INVARIANT: a[amin] < target < a[amax]
  N <- length(a)
  if ((a[N] - target) * (a[N] - a[1]) <= 0) {
    return(N)
  }
  if ((a[1] - target) * (a[N] - a[1]) >= 0) {
    return(1)
  }
  amin <- 1
  amax <- N
  while (amin + 1 < amax) {
    amid <- floor((amin + amax)/2)
    if ((a[amid] - target) * (a[amax] - a[amid]) < 0) {
      amin <- amid
    } else if ((a[amid] - target) * (a[amax] - a[amid]) > 0) {
      amax <- amid
    } else {
      # a[amid] == a[amax] or a[amid] == target In both cases, a[amid] ==
      # target
      return(amid)
    }
  }
  if (xor(lower, a[amin] > a[amax])) {
    # (lower && a[amin] < a[amax]) || (!lower && a[min] > a[max])
    # If increasing and we want the lower, we take amin
    # If decreasing and we want the bigger, we take amin too
    return(amin)
  } else {
    return(amax)
  }
}


# indexInterval
indexInterval <- function (a, from, to, inclusive=TRUE) {
  # If inclusive and from <= to, we need to take the lower
  # If not inclusive and from > to, we need to take the lower too
  lowerFrom <- xor(inclusive, from > to)
  fromIndex <- binarySearch(a, from, lowerFrom)
  toIndex <- binarySearch(a, to, !lowerFrom)
  return(fromIndex:toIndex)
}


## ==========================
# GroupDelayCorrection
## ==========================
GroupDelayCorrection <- function(Fid_data, Fid_info = NULL, group_delay = NULL) {


    # Data initialisation and checks ----------------------------------------------

    begin_info <- beginTreatment("GroupDelayCorrection", Fid_data, Fid_info)
    Fid_data <- begin_info[["Signal_data"]]
    dimension_names <- dimnames(Fid_data)
    Fid_info <- begin_info[["Signal_info"]]
    checkArg(group_delay, c("num", "pos0"), can.be.null = TRUE)
    # if Fid_info and group_delay are NULL, getArg will generate an error

    group_delay <- getArg(group_delay, Fid_info, "GRPDLY", can.be.absent = TRUE)

    if (is.null(group_delay)) {

      # See DetermineBrukerDigitalFilter.m in matNMR MATLAB library
      group_delay_matrix <- matrix(c(44.75, 46, 46.311, 33.5, 36.5, 36.53, 66.625,
                                     48, 47.87, 59.0833, 50.1667, 50.229, 68.5625, 53.25, 53.289, 60.375,
                                     69.5, 69.551, 69.5313, 72.25, 71.6, 61.0208, 70.1667, 70.184, 70.0156,
                                     72.75, 72.138, 61.3438, 70.5, 70.528, 70.2578, 73, 72.348, 61.5052, 70.6667,
                                     70.7, 70.3789, 72.5, 72.524, 61.5859, 71.3333, NA, 70.4395, 72.25, NA,
                                     61.6263, 71.6667, NA, 70.4697, 72.125, NA, 61.6465, 71.8333, NA, 70.4849,
                                     72.0625, NA, 61.6566, 71.9167, NA, 70.4924, 72.0313, NA), nrow = 21,
                                   ncol = 3, byrow = TRUE, dimnames = list(c(2, 3, 4, 6, 8, 12, 16, 24,
                                                                             32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536, 2048),
                                                                           c(10, 11, 12)))
      decim <- Fid_info[1, "DECIM"]
      dspfvs <- Fid_info[1, "DSPFVS"]
      if (!(toString(decim) %in% rownames(group_delay_matrix)))  {
        stop(paste("Invalid DECIM", decim, "it should be one of", rownames(group_delay_matrix)))
      }
      if (!(toString(dspfvs) %in% colnames(group_delay_matrix)))  {
        stop(paste("Invalid DSPFVS", dspfvs, "it should be one of", colnames(group_delay_matrix)))
      }
      group_delay <- group_delay_matrix[toString(decim), toString(dspfvs)]
      if (is.na(group_delay))  {
        stop(paste("Invalid DECIM", decim, "for DSPFVS", dspfvs))
      }
    }
    m <- ncol(Fid_data)
    n <- nrow(Fid_data)

    # GroupDelayCorrection ----------------------------------------------

    # We do the shifting in the Fourier domain because the shift can be non-integer.
    # That way we automatically have the circular behaviour of the shift and the
    # interpolation if it is non-integer.

    Spectrum <- t(stats::mvfft(t(Fid_data)))

    # Spectrum <- FourierTransform(Fid_data, Fid_info)
    p <- ceiling(m/2)
    new_index <- c((p + 1):m, 1:p)
    Spectrum <- Spectrum[,new_index]
    Spectrum <- matrix(data = Spectrum, ncol = m, nrow = n)

    Omega <- (0:(m - 1))/m
    i <- complex(real = 0, imaginary = 1)

    if (n>1) {
      Spectrum <- sweep(Spectrum, MARGIN = 2, exp(i * group_delay * 2 * pi * Omega), `*`)
      Spectrum <- Spectrum[,new_index]
    }else {
      Spectrum <- Spectrum* exp(i * group_delay * 2 * pi * Omega)
      Spectrum <- Spectrum[new_index]
      Spectrum <- matrix(data = Spectrum, ncol = m, nrow = n)
    }


    Fid_data <- t(stats::mvfft(t(Spectrum), inverse = TRUE))/m
    colnames(Fid_data) <- dimension_names[[2]]
    rownames(Fid_data) <- dimension_names[[1]]

    # Data finalisation ----------------------------------------------

    return(endTreatment("GroupDelayCorrection", begin_info, Fid_data))
  }

## ==========================
# SolventSuppression
## ==========================
SolventSuppression <- function(Fid_data, lambda.ss = 1e+06, ptw.ss = TRUE,
                               plotSolvent = F, returnSolvent = F) {

  # Data initialisation and checks ----------------------------------------------

  begin_info <- beginTreatment("SolventSuppression", Fid_data)
  Fid_data <- begin_info[["Signal_data"]]
  checkArg(ptw.ss, c("bool"))
  checkArg(lambda.ss, c("num", "pos0"))


  # difsm function definition for the smoother -----------------------------------

  if (ptw.ss) {
    # Use of the function in ptw that smoothes signals with a finite difference
    # penalty of order 2
    difsm <- ptw::difsm
  } else  {
    # Or manual implementation based on sparse matrices for large data series (cf.
    # Eilers, 2003. 'A perfect smoother')
    difsm <- function(y, d = 2, lambda)  {

      m <- length(y)
      # Sparse identity matrix m x m
      E <- Matrix::Diagonal(m)
      D <- Matrix::diff(E, differences = d)
      A <- E + lambda.ss * Matrix::t(D) %*% D
      # base::chol does not take into account that A is sparse and is extremely slow
      C <- Matrix::chol(A)
      x <- Matrix::solve(C, Matrix::solve(Matrix::t(C), y))
      return(as.numeric(x))
    }
  }

  # Solvent Suppression ----------------------------------------------

  n <- dim(Fid_data)[1]
  if (returnSolvent)  {
    SolventRe <- Fid_data
    SolventIm <- Fid_data
  }
  for (i in 1:n) {
    FidRe <- Re(Fid_data[i, ])
    FidIm <- Im(Fid_data[i, ])
    solventRe <- difsm(y = FidRe, lambda = lambda.ss)
    solventIm <- difsm(y = FidIm, lambda = lambda.ss)

    if (plotSolvent)  {
      m <- length(FidRe)
      graphics::plot(1:m, FidRe, type = "l", col = "red")
      graphics::lines(1:m, solventRe, type = "l", col = "blue")
      graphics::plot(1:m, FidIm, type = "l", col = "red")
      graphics::lines(1:m, solventIm, type = "l", col = "blue")
    }
    FidRe <- FidRe - solventRe
    FidIm <- FidIm - solventIm
    Fid_data[i, ] <- complex(real = FidRe, imaginary = FidIm)
    if (returnSolvent) {
      SolventRe[i, ] <- solventRe
      SolventIm[i, ] <- solventIm
    }
  }


  # Data finalisation ----------------------------------------------

  Fid_data <- endTreatment("SolventSuppression", begin_info, Fid_data)
  if (returnSolvent) {
    return(list(Fid_data = Fid_data, SolventRe = SolventRe, SolventIm = SolventIm))
  } else {
    return(Fid_data)
  }
}


## ==========================
# Apodization
# =============================
Apodization <- function(Fid_data, Fid_info = NULL, DT = NULL,
                        type.apod = c("exp","cos2", "blockexp", "blockcos2",
                                      "gauss", "hanning", "hamming"), phase = 0, rectRatio = 1/2,
                        gaussLB = 1, expLB = 1, plotWindow = F, returnFactor = F) {

  # Data initialisation and checks ----------------------------------------------
  begin_info <- beginTreatment("Apodization", Fid_data, Fid_info)
  Fid_data <- begin_info[["Signal_data"]]
  Fid_info <- begin_info[["Signal_info"]]
  # Data check
  type.apod <- match.arg(type.apod)
  checkArg(DT, c("num", "pos"), can.be.null = TRUE)
  checkArg(phase, c("num"))

  # Apodization ----------------------------------------------
  DT <- getArg(DT, Fid_info, "DT")  # Dwell Time
  m <- ncol(Fid_data)
  t <- (1:m) * DT  # Time
  rectSize <- ceiling(rectRatio * m)
  gaussLB <- (gaussLB/(sqrt(8 * log(2))))
  # Define the types of apodization:
  switch(type.apod, exp = {
    # exponential
    Factor <- exp(-expLB * t)
  }, cos2 = {
    # cos^2
    c <- cos((1:m) * pi/(2 * m) - phase * pi/2)
    Factor <- c * c
  }, blockexp = {
    # block and exponential
    Factor <- c(rep.int(1, rectSize), rep.int(0, m - rectSize))
    # | rectSize | 1 ___________ | \ 0 \____
    Factor[(rectSize + 1):m] <- exp(-expLB * t[1:(m - rectSize)])
  }, blockcos2 = {
    # block and cos^2
    Factor <- c(rep.int(1, rectSize), rep.int(0, m - rectSize))
    c <- cos((1:(m - rectSize)) * pi/(2 * (m - rectSize)))
    Factor[(rectSize + 1):m] <- c * c
  }, gauss = {
    # gaussian
    Factor <- exp(-(gaussLB * t)^2/2)
    Factor <- Factor/max(Factor)
  }, hanning = {
    # Hanning
    Factor <- 0.5 + 0.5 * cos((1:m) * pi/m - phase * pi)
  }, hamming = {
    # Hamming
    Factor <- 0.54 + 0.46 * cos((1:m) * pi/m - phase * pi)
  })
  if (plotWindow) {
    graphics::plot(1:m, Factor, "l")
    # dev.off() # device independent, it is the responsability of the
    # caller to do it
  }
  # Apply the apodization factor on the spectra
  Fid_data <- sweep(Fid_data, MARGIN = 2, Factor, `*`)

  # Data finalisation ----------------------------------------------
  Fid_data <- endTreatment("Apodization", begin_info, Fid_data)
  if (returnFactor) {
    return(list(Fid_data = Fid_data, Factor = Factor))
  } else {
    return(Fid_data)
  }
}


## ====================================================
# FourierTransform
## ====================================================


# fftshift1D2D
fftshift1D2D <- function(x) {
  vec <- F
  if (is.vector(x)) {
    x <- vec2mat(x)
    vec <- T
  }
  m <- dim(x)[2]
  p <- ceiling(m/2)
  new_index <- c((p + 1):m, 1:p)
  y <- x[, new_index, drop = vec]
}

# FourierTransform
FourierTransform <- function(Fid_data, Fid_info = NULL, SW_h = NULL, SW = NULL, O1 = NULL, reverse.axis = TRUE) {

  # Data initialisation and checks ----------------------------------------------
  begin_info <- beginTreatment("FourierTransform", Fid_data, Fid_info)
  Fid_data <- begin_info[["Signal_data"]]
  Fid_info <- begin_info[["Signal_info"]]

  m <- ncol(Fid_data)
  n <- nrow(Fid_data)

  if (is.null(SW_h)) {
    SW_h <- getArg(SW_h, Fid_info, "SW_h")
  }

  if (is.null(SW)) {
    SW <- getArg(SW, Fid_info, "SW")  # Sweep Width in ppm (semi frequency scale in ppm)
  }


  if (is.null(O1)) {
    O1 <- getArg(O1, Fid_info, "O1")
  }


  checkArg(reverse.axis, c("bool"))

  # Fourier Transformation ----------------------------------------------
  # mvfft does the unnormalized fourier transform (see ?mvfft), so we need divide
  # by m.  It does not matter a lot in our case since the spectrum will be
  # normalized.

  # FT
  RawSpect_data <- fftshift1D2D(t(stats::mvfft(t(Fid_data))))
  # recover the frequencies values
  f <- ((0:(m - 1)) - floor(m/2)) * Fid_info[1, "SW_h"]/(m-1)

  if(reverse.axis == TRUE) {
    revind <- rev(1:m)
    RawSpect_data <- RawSpect_data[,revind] # reverse the spectrum
  }

  RawSpect_data <- matrix(RawSpect_data, nrow = n, ncol = m)
  colnames(RawSpect_data) <- f
  rownames(RawSpect_data) <- rownames(Fid_data)

  # PPM conversion ----------------------------------------------

  # The Sweep Width has to be the same since the column names are the same

  ppmInterval <- SW/(m-1)

  O1index = round((m+1)/2+O1*(m - 1) / SW_h)

  end <- O1index - m
  start <- O1index -1
  ppmScale <- (start:end) * ppmInterval
  RawSpect_data <- matrix(RawSpect_data, nrow = n, ncol =  -(end - start) + 1, dimnames =
                            list(rownames(RawSpect_data), ppmScale))


  # Data finalisation ----------------------------------------------
  return(endTreatment("FourierTransform", begin_info, RawSpect_data))
}

## ====================================================
#   InternalReferencing
## ====================================================

InternalReferencing <- function(Spectrum_data, Fid_info, method = c("max", "thres"),
                                range = c("nearvalue", "all", "window"), ppm.value = 0,
                                direction = "left", shiftHandling = c("zerofilling", "cut",
                                                                      "NAfilling", "circular"), c = 2, pc = 0.02, fromto.RC = NULL,
                                ppm.ir = TRUE, rowindex_graph = NULL) {


  # Data initialisation and checks ----------------------------------------------

  begin_info <- beginTreatment("InternalReferencing", Spectrum_data, Fid_info)
  Spectrum_data <- begin_info[["Signal_data"]]
  Fid_info <- begin_info[["Signal_info"]]


  ######## Check input arguments

  range <- match.arg(range)
  shiftHandling <- match.arg(shiftHandling)
  method <- match.arg(method)
  plots <- NULL

  checkArg(ppm.ir, c("bool"))
  checkArg(unlist(fromto.RC), c("num"), can.be.null = TRUE)
  checkArg(pc, c("num"))
  checkArg(ppm.value, c("num"))
  checkArg(rowindex_graph, "num", can.be.null = TRUE)

  # fromto.RC : if range == "window",
  # fromto.RC defines the spectral window where to search for the peak
  if (!is.null(fromto.RC)) {
    diff <- diff(unlist(fromto.RC))[1:length(diff(unlist(fromto.RC)))%%2 !=0]
    for (i in 1:length(diff)) {
      if (diff[i] >= 0)  {
        fromto <- c(fromto.RC[[i]][2], fromto.RC[[i]][1])
        fromto.RC[[i]] <- fromto
      }
    }
  }


  # findTMSPpeak function ----------------------------------------------
  # If method == "tresh", findTMSPpeak will find the position of the first
  # peak (from left or right) which is higher than a predefined threshold
  # and is computed as: c*(cumulated_mean/cumulated_sd)
  findTMSPpeak <- function(ft, c = 2, direction = "left") {
    ft <- Re(ft)  # extraction de la partie réelle
    N <- length(ft)
    if (direction == "left") {
      newindex <- rev(1:N)
      ft <- rev(ft)
    }
    thres <- 99999
    i <- 1000  # Start at point 1000 to find the peak
    vect <- ft[1:i]

    while (vect[i] <= (c * thres)) {
      cumsd <- stats::sd(vect)
      cummean <- mean(vect)
      thres <- cummean + 3 * cumsd
      i <- i + 1
      vect <- ft[1:i]
    }
    if (direction == "left") {
      v <- newindex[i]
    } else {v <- i}

    if (is.na(v))  {
      warning("No peak found, need to lower the threshold.")
      return(NA)
    } else  {
      # recherche dans les 1% de points suivants du max trouve pour etre au sommet du
      # pic
      d <- which.max(ft[v:(v + N * 0.01)])
      new.peak <- v + d - 1  # nouveau pic du TMSP si d > 0

      if (names(which.max(ft[v:(v + N * 0.01)])) != names(which.max(ft[v:(v + N * 0.03)])))   {
        # recherche dans les 3% de points suivants du max trouve pour eviter un faux
        # positif
        warning("the TMSP peak might be located further away, increase the threshold to check.")
      }
      return(new.peak)
    }
  }


  # Define the search zone  ----------------------------------------

  n <- nrow(Spectrum_data)
  m <- ncol(Spectrum_data)

  # The Sweep Width (SW) has to be the same since the column names are the same
  SW <- Fid_info[1, "SW"]  # Sweep Width in ppm
  ppmInterval <- SW/(m-1)  # size of a ppm interval

  # range: How the search zone is defined ("all", "nearvalue" or "window")
  if (range == "all") {

    Data <- Spectrum_data

  } else { # range = "nearvalue" or "window"
    # Need to define colindex (column indexes) to apply indexInterval on it

    if (range == "nearvalue")  {

      fromto.RC <- list(c(-(SW * pc)/2 + ppm.value, (SW * pc)/2 + ppm.value))  # automatic fromto values in ppm
      colindex <- as.numeric(colnames(Spectrum_data))

    } else {
      # range == "window"
      # fromto.RC is already user-defined
      if (ppm.ir == TRUE)   {
        colindex <- as.numeric(colnames(Spectrum_data))
      } else   {
        colindex <- 1:m
      }
    }

    # index intervals taking into account the different elements in the list fromto.RC
    Int <- vector("list", length(fromto.RC))
    for (i in 1:length(fromto.RC))  {
      Int[[i]] <- indexInterval(colindex, from = fromto.RC[[i]][1],
                                to = fromto.RC[[i]][2], inclusive = TRUE)
    }

    # define Data as the cropped spectrum including the index intervals
    # outside the research zone, the intensities are set to the minimal
    # intensity of the research zone

    if (n > 1){
      Data <- apply(Re(Spectrum_data[,unlist(Int)]),1, function(x) rep(min(x), m))
      Data <- t(Data)
      Data[,unlist(Int)] <- Re(Spectrum_data[,unlist(Int)])
    } else {
      Data <- rep(min(Re(Spectrum_data)) ,m)
      Data[unlist(Int)] <- Re(Spectrum_data[unlist(Int)])
    }

  }


  # Apply the peak location search method ('thres' or 'max') on spectra
  # -----------------------------------------------------------------------

  if (method == "thres") {
    TMSPpeaks <- apply(Data, 1, findTMSPpeak, c = c, direction = direction)
  } else { # method == "max
    TMSPpeaks <- apply(Re(Data), 1, which.max)
  }


  # Shift spectra according to the TMSPpeaks found --------------------------------
  # Depends on the shiftHandling

  # TMSPpeaks is a column index
  maxpeak <- max(TMSPpeaks) # max accross spectra
  minpeak <- min(TMSPpeaks) # min accross spectra


  if (shiftHandling %in% c("zerofilling", "NAfilling",  "cut")) {
    fill <- NA
    if (shiftHandling == "zerofilling")  {
      fill <- 0
    }

    start <-  maxpeak - 1
    end <- minpeak - m

    # ppm values of each interval for the whole spectral range of the spectral matrix
    ppmScale <- (start:end) * ppmInterval

    # check if ppm.value is in the ppmScale interval
    if(ppm.value < min(ppmScale) | ppm.value > max(ppmScale)) {
      warning("ppm.value = ", ppm.value, " is not in the ppm interval [",
              round(min(ppmScale),2), ",", round(max(ppmScale),2), "], and is set to its default ppm.value 0")
      ppm.value = 0
    }

    # if ppm.value != 0, ppmScale is adapted
    ppmScale <- ppmScale + ppm.value

    # create the spectral matrix with realigned spectra
    Spectrum_data_calib <- matrix(fill, nrow = n, ncol =  -(end - start) + 1,
                                  dimnames = list(rownames(Spectrum_data), ppmScale))

    # fills in Spectrum_data_calib with shifted spectra
    for (i in 1:n)  {
      shift <- (1 - TMSPpeaks[i]) + start
      Spectrum_data_calib[i, (1 + shift):(m + shift)] <- Spectrum_data[i, ]
    }

    if (shiftHandling == "cut")  {
      Spectrum_data_calib = as.matrix(stats::na.omit(t(Spectrum_data_calib)))
      Spectrum_data_calib = t(Spectrum_data_calib)
      base::attr(Spectrum_data_calib, "na.action") <- NULL
    }


  } else {
    # circular
    start <- 1 - maxpeak
    end <- m - maxpeak

    ppmScale <- (start:end) * ppmInterval

    # check if ppm.value in is the ppmScale interval
    if(ppm.value < min(ppmScale) | ppm.value > max(ppmScale)) {
      warning("ppm.value = ", ppm.value, " is not in the ppm interval [",
              round(min(ppmScale),2), ",", round(max(ppmScale),2), "], and is set to its default ppm.value 0")
      ppm.value = 0
    }

    # if ppm.value != 0, ppmScale is adapted
    ppmScale <- ppmScale + ppm.value

    # create the spectral matrix with realigned spectra
    Spectrum_data_calib <- matrix(nrow=n, ncol=end-start+1,
                                  dimnames=list(rownames(Spectrum_data), ppmScale))

    # fills in Spectrum_data_calib with shifted spectra
    for (i in 1:n) {
      shift <- (maxpeak-TMSPpeaks[i])
      Spectrum_data_calib[i,(1+shift):m] <- Spectrum_data[i,1:(m-shift)]
      if (shift > 0) {
        Spectrum_data_calib[i,1:shift] <- Spectrum_data[i,(m-shift+1):m]
      }
    }
  }


  # Plot of the spectra (depending on rowindex_graph) ---------------------------------------------------

  ppm = xstart = value = xend = Legend = NULL # only for R CMD check


  # with the search zone for TMSP and the location of the peaks just found
  if (!is.null(rowindex_graph)) {

    if (range == "window")  {
      if (ppm.ir == TRUE)   {
        fromto <- fromto.RC
      } else  {
        fromto <- list()
        idcol <- as.numeric(colnames(Spectrum_data))
        for (i in 1:length(fromto.RC)) {
          fromto[[i]] <- as.numeric(colnames(Spectrum_data))[fromto.RC[[i]]]
        }
      }
    } else {
      fromto <- fromto.RC
    }

    # TMSPloc in ppm
    TMSPloc <- as.numeric(colnames(Spectrum_data))[TMSPpeaks[rowindex_graph]]

    # num plot per window
    num.stacked <- 6

    # rectanglar bands of color for the search zone
    rects <- data.frame(xstart = sapply(fromto, function(x) x[[1]]),
                        xend = sapply(fromto, function(x) x[[2]]),
                        Legend = "Peak search zone and location")

    # vlines for TMSP peak
    addlines <- data.frame(rowname = rownames(Spectrum_data)[rowindex_graph],TMSPloc)

    nn <- length(rowindex_graph)
    i <- 1
    j <- 1
    plots <- vector(mode = "list", length = ceiling(nn/num.stacked))

    while (i <= nn) {

      last <- min(i + num.stacked - 1, nn)

      melted <- reshape2::melt(Re(Spectrum_data[i:last, ]),
                               varnames = c("rowname", "ppm"))

      plots[[j]] <- ggplot2::ggplot() + ggplot2::theme_bw() +
        ggplot2::geom_line(data = melted,
                           ggplot2::aes(x = ppm, y = value)) +
        ggplot2::geom_rect(data = rects, ggplot2::aes(xmin = xstart, xmax = xend,
                                                      ymin = -Inf, ymax = Inf, fill = Legend), alpha = 0.4) +
        ggplot2::facet_grid(rowname ~ ., scales = "free_y") +
        ggplot2::theme(legend.position = "none") +
        ggplot2::geom_vline(data = addlines, ggplot2::aes(xintercept = TMSPloc),
                            color = "red", show.legend = TRUE) +
        ggplot2::ggtitle("Peak search zone and location") +
        ggplot2::theme(legend.position = "top", legend.text = ggplot2::element_text())


      if ((melted[1, "ppm"] - melted[(dim(melted)[1]), "ppm"]) > 0) {
        plots[[j]] <- plots[[j]] + ggplot2::scale_x_reverse()
      }

      i <- last + 1
      j <- j + 1
    }

    plots
  }


  # Return the results ----------------------------------------------
  Spectrum_data <- endTreatment("InternalReferencing", begin_info, Spectrum_data_calib)

  if (is.null(plots)) {
    return(Spectrum_data)
  } else {
    return(list(Spectrum_data = Spectrum_data, plots = plots))
  }

}

## ====================================================
# ZeroOrderPhaseCorrection
## ====================================================

ZeroOrderPhaseCorrection <- function(Spectrum_data, type.zopc = c("rms", "manual", "max"),
                                     plot_rms = NULL, returnAngle = FALSE, createWindow = TRUE,
                                     angle = NULL, plot_spectra = FALSE,
                                     ppm.zopc = TRUE, exclude.zopc = list(c(5.1,4.5))) {


  # Data initialisation and checks ----------------------------------------------

  # Entry arguments definition:
  # plot_rms : graph of rms criterion returnAngle : if TRUE, returns avector of
  # optimal angles createWindow : for plot_rms plots angle : If angle is not NULL,
  # spectra are rotated according to the angle vector values
  # plot_spectra : if TRUE, plot rotated spectra


  begin_info <- beginTreatment("ZeroOrderPhaseCorrection", Spectrum_data)
  Spectrum_data <- begin_info[["Signal_data"]]
  n <- nrow(Spectrum_data)
  m <- ncol(Spectrum_data)

  rnames <- rownames(Spectrum_data)

  # Check input arguments
  type.zopc <- match.arg(type.zopc)
  checkArg(ppm.zopc, c("bool"))
  checkArg(unlist(exclude.zopc), c("num"), can.be.null = TRUE)


  # type.zopc in c("max", "rms") -----------------------------------------
  if (type.zopc %in% c("max", "rms")) {
    # angle is found by optimization

    # rms function to be optimised
    rms <- function(ang, y, meth = c("max", "rms"))  {
      # if (debug_plot) { graphics::abline(v=ang, col='gray60') }
      roty <- y * exp(complex(real = 0, imaginary = ang))  # spectrum rotation
      Rey <- Re(roty)

      if (meth == "rms")  {
        ReyPos <- Rey[Rey >= 0]  # select positive intensities
        POSss <- sum((ReyPos)^2, na.rm = TRUE)  # SS for positive intensities
        ss <- sum((Rey)^2, na.rm = TRUE)  #  SS for all intensities
        return(POSss/ss)  # criterion : SS for positive values / SS for all intensities
      } else  {
        maxi <- max(Rey, na.rm = TRUE)
        return(maxi)
      }
    }


    # Define the interval where to search for (by defining Data)
    if (is.null(exclude.zopc)) {
      Data <- Spectrum_data
    } else  {

      # if ppm.zopc == TRUE, then exclude.zopc is in the colnames values, else, in the column
      # index
      if (ppm.zopc == TRUE)  {
        colindex <- as.numeric(colnames(Spectrum_data))
      } else  {
        colindex <- 1:m
      }

      # Second check for the argument exclude.zopc
      diff <- diff(unlist(exclude.zopc))[1:length(diff(unlist(exclude.zopc)))%%2 !=0]
      for (i in 1:length(diff)) {
        if (ppm.zopc == TRUE & diff[i] >= 0)  {
          stop(paste("Invalid region removal because from <= to in ppm.zopc"))
        } else if (ppm.zopc == FALSE & diff[i] <= 0) {stop(paste("Invalid region removal because from >= to in column index"))}
      }


      Int <- vector("list", length(exclude.zopc))
      for (i in 1:length(exclude.zopc))  {
        Int[[i]] <- indexInterval(colindex, from = exclude.zopc[[i]][1],
                                  to = exclude.zopc[[i]][2], inclusive = TRUE)
      }

      vector <- rep(1, m)
      vector[unlist(Int)] <- 0
      if (n > 1)  {
        Data <- sweep(Spectrum_data, MARGIN = 2, FUN = "*", vector)  # Cropped_Spectrum
      } else   {
        Data <- Spectrum_data * vector
      }  # Cropped_Spectrum
    }


    # angles computation
    Angle <- c()
    for (k in 1:n)
    {
      # The function is rms is periodic (period 2pi) and it seems that there is a phase
      # x such that rms is unimodal (i.e. decreasing then increasing) on the interval
      # [x; x+2pi].  However, if we do the optimization for example on [x-pi; x+pi],
      # instead of being decreasing then increasing, it might be increasing then
      # decreasing in which case optimize, thinking it is a valley will have to choose
      # between the left or the right of this hill and if it chooses wrong, it will end
      # up at like x-pi while the minimum is close to x+pi.

      # Supposing that rms is unimodal, the classical 1D unimodal optimization will
      # work in either [-pi;pi] or [0;2pi] (this is not easy to be convinced by that I
      # agree) and we can check which one it is simply by the following trick

      f0 <- rms(0, Data[k, ],meth = type.zopc)
      fpi <- rms(pi, Data[k, ], meth = type.zopc)
      if (f0 < fpi) {
        interval <- c(-pi, pi)
      } else {
        interval <- c(0, 2 * pi)
      }

      # graphs of rms criteria
      debug_plot <- F  # rms should not plot anything now, only when called by optimize
      if (!is.null(plot_rms) && rnames[k] %in% plot_rms) {
        x <- seq(min(interval), max(interval), length.out = 100)
        y <- rep(1, 100)
        for (K in (1:100))   {
          y[K] <- rms(x[K], Data[k, ],  meth = type.zopc)
        }
        if (createWindow == TRUE)  {
          grDevices::dev.new(noRStudioGD = FALSE)
        }
        graphics::plot(x, y, main = paste("Criterion maximization \n",
                                          rownames(Data)[k]), ylim = c(0, 1.1),
                       ylab = "positiveness criterion", xlab = "angle ")
        debug_plot <- T
      }

      # Best angle
      best <- stats::optimize(rms, interval = interval, maximum = TRUE,
                              y = Data[k,],  meth = type.zopc)
      ang <- best[["maximum"]]


      if (debug_plot)  {
        graphics::abline(v = ang, col = "black")
        graphics::text(x = (ang+0.1*ang), y = (y[ang]-0.1*y[ang]), labels = round(ang, 3))
      }

      # Spectrum rotation
      Spectrum_data[k, ] <- Spectrum_data[k, ] * exp(complex(real = 0, imaginary = ang))
      Angle <- c(Angle, ang)
    }


  } else {
    # type.zopc is "manual" -------------------------------------------------------
    # if Angle is already specified and no optimisation is needed
    Angle <- angle

    if (!is.vector(angle)) {
      stop("angle is not a vector")
    }

    if (!is.numeric(angle))  {
      stop("angle is not a numeric")
    }

    if (length(angle) != n) {
      stop(paste("angle has length", length(angle), "and there are", n, "spectra to rotate."))
    }
    for (k in 1:n)  {
      Spectrum_data[k, ] <- Spectrum_data[k, ] * exp(complex(real = 0, imaginary = - angle[k]))
    }
  }


  #  Draw spectra
  if (plot_spectra == TRUE) {
    nn <- ceiling(n/4)
    i <- 1
    for (k in 1:nn)  {
      if (createWindow == TRUE)  {
        grDevices::dev.new(noRStudioGD = FALSE)
      }
      graphics::par(mfrow = c(4, 2))
      while (i <= n)   {
        last <- min(i + 4 - 1, n)
        graphics::plot(Re(Spectrum_data[i, ]), type = "l", ylab = "intensity",
                       xlab = "Index", main = paste0(rownames(Spectrum_data)[i], " - Real part"))
        graphics::plot(Im(Spectrum_data[i, ]), type = "l", ylab = "intensity",
                       xlab = "Index", main = paste0(rownames(Spectrum_data)[i], " - Imaginary part"))
        i <- i + 1
      }
      i <- last + 1
    }
  }


  # Data finalisation ----------------------------------------------

  Spectrum_data <- endTreatment("ZeroOrderPhaseCorrection", begin_info, Spectrum_data)
  if (returnAngle) {
    return(list(Spectrum_data = Spectrum_data, Angle = Angle))
  } else {
    return(Spectrum_data)
  }
}


## ====================================================
# Baseline Correction
## ====================================================
BaselineCorrection <- function(Spectrum_data, ptw.bc = TRUE, maxIter = 42,
                               lambda.bc = 1e+07, p.bc = 0.05, eps = 1e-08,
                               ppm.bc = TRUE, exclude.bc = list(c(5.1,4.5)),
                               returnBaseline = F) {

  # Data initialisation ----------------------------------------------
  begin_info <- beginTreatment("BaselineCorrection", Spectrum_data, force.real = T)
  Spectrum_data <- begin_info[["Signal_data"]]
  p <- p.bc
  lambda <- lambda.bc
  n <- dim(Spectrum_data)[1]
  m <- dim(Spectrum_data)[2]


  # Data check
  checkArg(ptw.bc, c("bool"))
  checkArg(maxIter, c("int", "pos"))
  checkArg(lambda, c("num", "pos0"))
  checkArg(p.bc, c("num", "pos0"))
  checkArg(eps, c("num", "pos0"))
  checkArg(returnBaseline, c("bool"))
  checkArg(ppm.bc, c("bool"))
  checkArg(unlist(exclude.bc), c("num"), can.be.null = TRUE)

  # Define the interval where to search for (by defining Data)
  if (is.null(exclude.bc)) {
    exclude_index <- NULL
  } else  {
    # if ppm.bc == TRUE, then exclude.bc is in the colnames values, else, in the column
    # index
    if (ppm.bc == TRUE)  {
      colindex <- as.numeric(colnames(Spectrum_data))
    } else  {
      colindex <- 1:m
    }

    Int <- vector("list", length(exclude.bc))
    for (i in 1:length(exclude.bc))  {
      Int[[i]] <- indexInterval(colindex, from = exclude.bc[[i]][1],
                                to = exclude.bc[[i]][2], inclusive = TRUE)
    }
    exclude_index <- unlist(Int)
  }

  # Baseline Correction implementation definition ----------------------

  # 2 Ways: either use the function asysm from the ptw package or by
  # built-in functions
  if (ptw.bc) {
    asysm <- ptw::asysm
  } else {
    difsmw <- function(y, lambda, w, d) {
      # Weighted smoothing with a finite difference penalty cf Eilers, 2003.
      # (A perfect smoother)
      # y: signal to be smoothed
      # lambda: smoothing parameter
      # w: weights (use0 zeros for missing values)
      # d: order of differences in penalty (generally 2)
      m <- length(y)
      W <- Matrix::Diagonal(x=w)
      E <- Matrix::Diagonal(m)
      D <- Matrix::diff(E, differences = d)
      C <- Matrix::chol(W + lambda * t(D) %*% D)
      x <- Matrix::solve(C, Matrix::solve(t(C), w * y))
      return(as.numeric(x))

    }
    asysm <- function(y, lambda, p, eps, exclude_index) {
      # Baseline estimation with asymmetric least squares
      # y: signal
      # lambda: smoothing parameter (generally 1e5 to 1e8)
      # p: asymmetry parameter (generally 0.001)
      # d: order of differences in penalty (generally 2)
      # eps: 1e-8 in ptw package
      m <- length(y)
      w <- rep(1, m)
      i <- 1
      repeat {
        z <- difsmw(y, lambda, w, d = 2)
        w0 <- w
        p_vect <- rep((1-p), m) # if y <= z + eps
        p_vect[y > z + eps | y < 0] <- p  # if y > z + eps | y < 0
        if(!is.null(exclude_index)){
          p_vect[exclude_index] <- 0 # if exclude area
        }

        w <- p_vect
        # w <- p * (y > z + eps | y < 0) + (1 - p) * (y <= z + eps)

        if (sum(abs(w - w0)) == 0) {
          break
        }
        i <- i + 1
        if (i > maxIter) {
          warning("cannot find Baseline estimation in asysm")
          break
        }
      }
      return(z)
    }
  }

  # Baseline estimation ----------------------------------------------
  Baseline <- matrix(NA, nrow = nrow(Spectrum_data), ncol = ncol(Spectrum_data))

  # for (k in 1:n) {
  # Baseline[k, ] <- asysm(y = Spectrum_data[k, ], lambda = lambda, p = p, eps = eps)

  if (ptw.bc ){
    Baseline <- apply(Spectrum_data,1, asysm, lambda = lambda, p = p,
                      eps = eps)
  }else {
    Baseline <- apply(Spectrum_data,1, asysm, lambda = lambda, p = p,
                      eps = eps, exclude_index = exclude_index)
  }


  Spectrum_data <- Spectrum_data - t(Baseline)
  # }

  # Data finalisation ----------------------------------------------
  Spectrum_data <- endTreatment("BaselineCorrection", begin_info, Spectrum_data)  # FIXME create removeImaginary filter ??

  if (returnBaseline) {
    return(list(Spectrum_data = Spectrum_data, Baseline = Baseline))
  } else {
    return(Spectrum_data)
  }
}


## ====================================================
# NegativeValuesZeroing
## ====================================================

NegativeValuesZeroing <- function(Spectrum_data) {
  # Data initialisation and checks ----------------------------------------------
  begin_info <- beginTreatment("NegativeValuesZeroing", Spectrum_data, force.real = T)
  Spectrum_data <- begin_info[["Signal_data"]]

  # NegativeValuesZeroing ----------------------------------------------
  Spectrum_data[Spectrum_data < 0] <- 0

  # Data finalisation ----------------------------------------------
  return(endTreatment("NegativeValuesZeroing", begin_info, Spectrum_data))
}
author	workflow4metabolomics
date	Wed, 13 May 2020 03:52:09 -0400
parents	5b06800f3449
children