comparison spec-dist.R @ 2:20d69a062da3 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
author prog
date Thu, 02 Mar 2017 08:55:00 -0500
parents
children
comparison
equal deleted inserted replaced
1:253d531a0193 2:20d69a062da3
1 #dyn.load('src/closeMatchPpm.dll')
2 # commented out for refactoring as package
3 #dyn.load('src/closeMatchPpm.so')
4
5 matchPpm <- function(x, y, ppm = 3, mzmin = 0) {
6 if (any(is.na(y)))
7 stop("NA's are not allowed in y !\n")
8 ok <- !(is.na(x))
9 ans <- order(x)
10 keep <- seq_along(ok)[ok]
11 xidx <- ans[ans %in% keep]
12 xs <- x[xidx]
13 yidx <- order(y)
14 ys <- y[yidx]
15 if (!is.double(xs))
16 xs <- as.double(xs)
17 if (!is.double(ys))
18 ys <- as.double(ys)
19 if (!is.integer(xidx))
20 xidx <- as.integer(xidx)
21 if (!is.integer(yidx))
22 yidx <- as.integer(yidx)
23
24 fm <-
25 .Call(
26 "closeMatchPpm",
27 xs,
28 ys,
29 xidx,
30 yidx,
31 as.integer(length(x)),
32 as.double(ppm),
33 as.double(mzmin)
34 )
35 fm
36 }
37
38
39 simpList <- function(v) {
40 vapply(v, function(x) {
41 if (is.null(x)) {
42 -1
43 } else{
44 x
45 }
46 }, FUN.VALUE = -1)
47 }
48
49
50 ##Stein and scott values : mzexp 3 intexp 0.6
51 ##Massbank values : mzexp 2 intexp 0.5
52
53
54 cosine <-
55 function(mz1,
56 mz2,
57 int1,
58 int2,
59 mzexp = 2,
60 intexp = 0.5,
61 ppm,
62 dmz = 0.005) {
63 matchList <- matchPpm(mz1, mz2, ppm, dmz)
64 ###Weigthed intensity
65 pfound <- which(!sapply(matchList, is.null, simplify = TRUE))
66
67 ###If no peak is found.
68 if (length(pfound) == 0)
69 return(list(measure = 0, matched = rep(-1, length(mz1))))
70 w1 <- int1 ^ intexp * mz1 ^ mzexp
71 w2 <- int2 ^ intexp * mz2 ^ mzexp
72 cat(w1[pfound], w2[unlist(matchList[pfound])],'\n')
73 cos_value <-
74 sum((w1[pfound] * w2[unlist(matchList[pfound])]) ^ 2) / (sum(w1[pfound] ^
75 2) * sum(w2[unlist(matchList[pfound])] ^ 2))
76
77 ####Adding the penality if needed.
78 list(measure = cos_value, matched = simpList(matchList))
79 }
80
81
82 ###penalized cosine
83
84 wcosine <-
85 function(mz1,
86 mz2,
87 int1,
88 int2,
89 mzexp = 2,
90 intexp = 0.5,
91 ppm,
92 dmz = 0.005,
93 penality = c("rweigth")) {
94 penality <- match.arg(penality)
95 matchList <- matchPpm(mz1, mz2, ppm, dmz)
96 ###Weigthed intensity
97 pfound <- which(!sapply(matchList, is.null, simplify = TRUE))
98 ###If no peak is found.
99 if (length(pfound) == 0)
100 return(list(measure = 0, matched = rep(-1, length(mz1))))
101 w1 <- int1 ^ intexp * mz1 ^ mzexp
102 w2 <- int2 ^ intexp * mz2 ^ mzexp
103
104 cos_value <-
105 sum((w1[pfound] * w2[unlist(matchList[pfound])]) ^ 2) / (sum(w1[pfound] ^
106 2) * sum(w2[unlist(matchList[pfound])] ^ 2))
107
108 if(is.nan(cos_value)) cos_value <- 0
109 ####Adding the penality if needed.
110 div = 1
111 if (penality == "rweigth") {
112 p <-
113 (sum(w1[pfound]) / sum(w1) + sum(w2[unlist(matchList[pfound])]) / sum(w2)) /
114 2
115 div = 2
116 } else{
117 p <- 0
118 }
119
120 measure <- (cos_value + p) / div
121 if(is.nan(measure)) measure <- (cos_value) / div
122 list(measure = measure,
123 matched = simpList(matchList))
124 }
125
126 ##A gaussian of the two spectra seen as a mixture of gaussian, derived form Heinonen et al 2012
127 pkernel <-
128 function(mz1,
129 mz2,
130 int1,
131 int2,
132 mzexp = 2,
133 intexp = 0.5,
134 ppm,
135 dmz = 0.005,
136 sigint = 0.5,
137 penality = c("rweigth")) {
138 ###We first match the peak
139 matchList <- matchPpm(mz1, mz2, ppm, dmz)
140 # ###Weigthed intensity
141 pfound <- which(!sapply(matchList, is.null, simplify = TRUE))
142 #
143 ###If no peak is found.
144 if (length(pfound) == 0)
145 return(list(measure = 0, matched = rep(-1, length(mz1))))
146 w1 <- int1 ^ intexp * mz1 ^ mzexp
147 w2 <- int2 ^ intexp * mz2 ^ mzexp
148 w1 <- w1 * 1 / sum(w1)
149 w2 <- w2 * 1 / sum(w2)
150 l1 <- length(w1)
151 l2 <- length(w2)
152 ###The mz dev
153 vsig1 = mz1 * ppm * 3 * 10 ^ -6
154 vsig1 = sapply(vsig1, function(x, y) {
155 return(max(x, y))
156 }, y = dmz)
157
158 vsig2 = mz2 * ppm * 3 * 10 ^ -6
159 vsig2 = sapply(vsig2, function(x, y) {
160 return(max(x, y))
161 }, y = dmz)
162 accu = 0
163 ###TO DO rcopder en C
164 for (i in 1:l1) {
165 for (j in 1:l2) {
166 divisor = max(stats::dnorm(
167 mz1[i],
168 mean = mz1[i],
169 sd = sqrt(vsig1[i] ^ 2 + vsig1[i] ^ 2)
170 ),
171 stats::dnorm(
172 mz2[j],
173 mean = mz2[j],
174 sd = sqrt(vsig2[j] ^ 2 + vsig2[j] ^ 2)
175 ))
176 if (divisor == 0)
177 next
178 scalet = stats::dnorm(mz1[i],
179 mean = mz2[j],
180 sd = sqrt(vsig1[i] ^ 2 + vsig2[j] ^ 2))
181 accu = accu + scalet / divisor
182 }
183 }
184 div = 1
185 if (penality == "rweigth") {
186 p <-
187 (sum(w1[pfound]) / sum(w1) + sum(w2[unlist(matchList[pfound])]) / sum(w2)) /
188 2
189 div = 2
190 } else{
191 p <- 0
192 }
193 accu = accu / (l2 * l1)
194 list(measure = (accu + p) / div,
195 matched = simpList(matchList))
196 }