Mercurial > repos > prog > lcmsmatching
diff spec-dist.R @ 2:20d69a062da3 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
author | prog |
---|---|
date | Thu, 02 Mar 2017 08:55:00 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/spec-dist.R Thu Mar 02 08:55:00 2017 -0500 @@ -0,0 +1,196 @@ +#dyn.load('src/closeMatchPpm.dll') +# commented out for refactoring as package +#dyn.load('src/closeMatchPpm.so') + +matchPpm <- function(x, y, ppm = 3, mzmin = 0) { + if (any(is.na(y))) + stop("NA's are not allowed in y !\n") + ok <- !(is.na(x)) + ans <- order(x) + keep <- seq_along(ok)[ok] + xidx <- ans[ans %in% keep] + xs <- x[xidx] + yidx <- order(y) + ys <- y[yidx] + if (!is.double(xs)) + xs <- as.double(xs) + if (!is.double(ys)) + ys <- as.double(ys) + if (!is.integer(xidx)) + xidx <- as.integer(xidx) + if (!is.integer(yidx)) + yidx <- as.integer(yidx) + + fm <- + .Call( + "closeMatchPpm", + xs, + ys, + xidx, + yidx, + as.integer(length(x)), + as.double(ppm), + as.double(mzmin) + ) + fm +} + + +simpList <- function(v) { + vapply(v, function(x) { + if (is.null(x)) { + -1 + } else{ + x + } + }, FUN.VALUE = -1) +} + + +##Stein and scott values : mzexp 3 intexp 0.6 +##Massbank values : mzexp 2 intexp 0.5 + + +cosine <- + function(mz1, + mz2, + int1, + int2, + mzexp = 2, + intexp = 0.5, + ppm, + dmz = 0.005) { + matchList <- matchPpm(mz1, mz2, ppm, dmz) + ###Weigthed intensity + pfound <- which(!sapply(matchList, is.null, simplify = TRUE)) + + ###If no peak is found. + if (length(pfound) == 0) + return(list(measure = 0, matched = rep(-1, length(mz1)))) + w1 <- int1 ^ intexp * mz1 ^ mzexp + w2 <- int2 ^ intexp * mz2 ^ mzexp + cat(w1[pfound], w2[unlist(matchList[pfound])],'\n') + cos_value <- + sum((w1[pfound] * w2[unlist(matchList[pfound])]) ^ 2) / (sum(w1[pfound] ^ + 2) * sum(w2[unlist(matchList[pfound])] ^ 2)) + + ####Adding the penality if needed. + list(measure = cos_value, matched = simpList(matchList)) + } + + +###penalized cosine + +wcosine <- + function(mz1, + mz2, + int1, + int2, + mzexp = 2, + intexp = 0.5, + ppm, + dmz = 0.005, + penality = c("rweigth")) { + penality <- match.arg(penality) + matchList <- matchPpm(mz1, mz2, ppm, dmz) + ###Weigthed intensity + pfound <- which(!sapply(matchList, is.null, simplify = TRUE)) + ###If no peak is found. + if (length(pfound) == 0) + return(list(measure = 0, matched = rep(-1, length(mz1)))) + w1 <- int1 ^ intexp * mz1 ^ mzexp + w2 <- int2 ^ intexp * mz2 ^ mzexp + + cos_value <- + sum((w1[pfound] * w2[unlist(matchList[pfound])]) ^ 2) / (sum(w1[pfound] ^ + 2) * sum(w2[unlist(matchList[pfound])] ^ 2)) + + if(is.nan(cos_value)) cos_value <- 0 + ####Adding the penality if needed. + div = 1 + if (penality == "rweigth") { + p <- + (sum(w1[pfound]) / sum(w1) + sum(w2[unlist(matchList[pfound])]) / sum(w2)) / + 2 + div = 2 + } else{ + p <- 0 + } + + measure <- (cos_value + p) / div + if(is.nan(measure)) measure <- (cos_value) / div + list(measure = measure, + matched = simpList(matchList)) + } + +##A gaussian of the two spectra seen as a mixture of gaussian, derived form Heinonen et al 2012 +pkernel <- + function(mz1, + mz2, + int1, + int2, + mzexp = 2, + intexp = 0.5, + ppm, + dmz = 0.005, + sigint = 0.5, + penality = c("rweigth")) { + ###We first match the peak + matchList <- matchPpm(mz1, mz2, ppm, dmz) + # ###Weigthed intensity + pfound <- which(!sapply(matchList, is.null, simplify = TRUE)) + # + ###If no peak is found. + if (length(pfound) == 0) + return(list(measure = 0, matched = rep(-1, length(mz1)))) + w1 <- int1 ^ intexp * mz1 ^ mzexp + w2 <- int2 ^ intexp * mz2 ^ mzexp + w1 <- w1 * 1 / sum(w1) + w2 <- w2 * 1 / sum(w2) + l1 <- length(w1) + l2 <- length(w2) + ###The mz dev + vsig1 = mz1 * ppm * 3 * 10 ^ -6 + vsig1 = sapply(vsig1, function(x, y) { + return(max(x, y)) + }, y = dmz) + + vsig2 = mz2 * ppm * 3 * 10 ^ -6 + vsig2 = sapply(vsig2, function(x, y) { + return(max(x, y)) + }, y = dmz) + accu = 0 + ###TO DO rcopder en C + for (i in 1:l1) { + for (j in 1:l2) { + divisor = max(stats::dnorm( + mz1[i], + mean = mz1[i], + sd = sqrt(vsig1[i] ^ 2 + vsig1[i] ^ 2) + ), + stats::dnorm( + mz2[j], + mean = mz2[j], + sd = sqrt(vsig2[j] ^ 2 + vsig2[j] ^ 2) + )) + if (divisor == 0) + next + scalet = stats::dnorm(mz1[i], + mean = mz2[j], + sd = sqrt(vsig1[i] ^ 2 + vsig2[j] ^ 2)) + accu = accu + scalet / divisor + } + } + div = 1 + if (penality == "rweigth") { + p <- + (sum(w1[pfound]) / sum(w1) + sum(w2[unlist(matchList[pfound])]) / sum(w2)) / + 2 + div = 2 + } else{ + p <- 0 + } + accu = accu / (l2 * l1) + list(measure = (accu + p) / div, + matched = simpList(matchList)) + }