Mercurial > repos > prog > lcmsmatching
comparison spec-dist.R @ 2:20d69a062da3 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
author | prog |
---|---|
date | Thu, 02 Mar 2017 08:55:00 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1:253d531a0193 | 2:20d69a062da3 |
---|---|
1 #dyn.load('src/closeMatchPpm.dll') | |
2 # commented out for refactoring as package | |
3 #dyn.load('src/closeMatchPpm.so') | |
4 | |
5 matchPpm <- function(x, y, ppm = 3, mzmin = 0) { | |
6 if (any(is.na(y))) | |
7 stop("NA's are not allowed in y !\n") | |
8 ok <- !(is.na(x)) | |
9 ans <- order(x) | |
10 keep <- seq_along(ok)[ok] | |
11 xidx <- ans[ans %in% keep] | |
12 xs <- x[xidx] | |
13 yidx <- order(y) | |
14 ys <- y[yidx] | |
15 if (!is.double(xs)) | |
16 xs <- as.double(xs) | |
17 if (!is.double(ys)) | |
18 ys <- as.double(ys) | |
19 if (!is.integer(xidx)) | |
20 xidx <- as.integer(xidx) | |
21 if (!is.integer(yidx)) | |
22 yidx <- as.integer(yidx) | |
23 | |
24 fm <- | |
25 .Call( | |
26 "closeMatchPpm", | |
27 xs, | |
28 ys, | |
29 xidx, | |
30 yidx, | |
31 as.integer(length(x)), | |
32 as.double(ppm), | |
33 as.double(mzmin) | |
34 ) | |
35 fm | |
36 } | |
37 | |
38 | |
39 simpList <- function(v) { | |
40 vapply(v, function(x) { | |
41 if (is.null(x)) { | |
42 -1 | |
43 } else{ | |
44 x | |
45 } | |
46 }, FUN.VALUE = -1) | |
47 } | |
48 | |
49 | |
50 ##Stein and scott values : mzexp 3 intexp 0.6 | |
51 ##Massbank values : mzexp 2 intexp 0.5 | |
52 | |
53 | |
54 cosine <- | |
55 function(mz1, | |
56 mz2, | |
57 int1, | |
58 int2, | |
59 mzexp = 2, | |
60 intexp = 0.5, | |
61 ppm, | |
62 dmz = 0.005) { | |
63 matchList <- matchPpm(mz1, mz2, ppm, dmz) | |
64 ###Weigthed intensity | |
65 pfound <- which(!sapply(matchList, is.null, simplify = TRUE)) | |
66 | |
67 ###If no peak is found. | |
68 if (length(pfound) == 0) | |
69 return(list(measure = 0, matched = rep(-1, length(mz1)))) | |
70 w1 <- int1 ^ intexp * mz1 ^ mzexp | |
71 w2 <- int2 ^ intexp * mz2 ^ mzexp | |
72 cat(w1[pfound], w2[unlist(matchList[pfound])],'\n') | |
73 cos_value <- | |
74 sum((w1[pfound] * w2[unlist(matchList[pfound])]) ^ 2) / (sum(w1[pfound] ^ | |
75 2) * sum(w2[unlist(matchList[pfound])] ^ 2)) | |
76 | |
77 ####Adding the penality if needed. | |
78 list(measure = cos_value, matched = simpList(matchList)) | |
79 } | |
80 | |
81 | |
82 ###penalized cosine | |
83 | |
84 wcosine <- | |
85 function(mz1, | |
86 mz2, | |
87 int1, | |
88 int2, | |
89 mzexp = 2, | |
90 intexp = 0.5, | |
91 ppm, | |
92 dmz = 0.005, | |
93 penality = c("rweigth")) { | |
94 penality <- match.arg(penality) | |
95 matchList <- matchPpm(mz1, mz2, ppm, dmz) | |
96 ###Weigthed intensity | |
97 pfound <- which(!sapply(matchList, is.null, simplify = TRUE)) | |
98 ###If no peak is found. | |
99 if (length(pfound) == 0) | |
100 return(list(measure = 0, matched = rep(-1, length(mz1)))) | |
101 w1 <- int1 ^ intexp * mz1 ^ mzexp | |
102 w2 <- int2 ^ intexp * mz2 ^ mzexp | |
103 | |
104 cos_value <- | |
105 sum((w1[pfound] * w2[unlist(matchList[pfound])]) ^ 2) / (sum(w1[pfound] ^ | |
106 2) * sum(w2[unlist(matchList[pfound])] ^ 2)) | |
107 | |
108 if(is.nan(cos_value)) cos_value <- 0 | |
109 ####Adding the penality if needed. | |
110 div = 1 | |
111 if (penality == "rweigth") { | |
112 p <- | |
113 (sum(w1[pfound]) / sum(w1) + sum(w2[unlist(matchList[pfound])]) / sum(w2)) / | |
114 2 | |
115 div = 2 | |
116 } else{ | |
117 p <- 0 | |
118 } | |
119 | |
120 measure <- (cos_value + p) / div | |
121 if(is.nan(measure)) measure <- (cos_value) / div | |
122 list(measure = measure, | |
123 matched = simpList(matchList)) | |
124 } | |
125 | |
126 ##A gaussian of the two spectra seen as a mixture of gaussian, derived form Heinonen et al 2012 | |
127 pkernel <- | |
128 function(mz1, | |
129 mz2, | |
130 int1, | |
131 int2, | |
132 mzexp = 2, | |
133 intexp = 0.5, | |
134 ppm, | |
135 dmz = 0.005, | |
136 sigint = 0.5, | |
137 penality = c("rweigth")) { | |
138 ###We first match the peak | |
139 matchList <- matchPpm(mz1, mz2, ppm, dmz) | |
140 # ###Weigthed intensity | |
141 pfound <- which(!sapply(matchList, is.null, simplify = TRUE)) | |
142 # | |
143 ###If no peak is found. | |
144 if (length(pfound) == 0) | |
145 return(list(measure = 0, matched = rep(-1, length(mz1)))) | |
146 w1 <- int1 ^ intexp * mz1 ^ mzexp | |
147 w2 <- int2 ^ intexp * mz2 ^ mzexp | |
148 w1 <- w1 * 1 / sum(w1) | |
149 w2 <- w2 * 1 / sum(w2) | |
150 l1 <- length(w1) | |
151 l2 <- length(w2) | |
152 ###The mz dev | |
153 vsig1 = mz1 * ppm * 3 * 10 ^ -6 | |
154 vsig1 = sapply(vsig1, function(x, y) { | |
155 return(max(x, y)) | |
156 }, y = dmz) | |
157 | |
158 vsig2 = mz2 * ppm * 3 * 10 ^ -6 | |
159 vsig2 = sapply(vsig2, function(x, y) { | |
160 return(max(x, y)) | |
161 }, y = dmz) | |
162 accu = 0 | |
163 ###TO DO rcopder en C | |
164 for (i in 1:l1) { | |
165 for (j in 1:l2) { | |
166 divisor = max(stats::dnorm( | |
167 mz1[i], | |
168 mean = mz1[i], | |
169 sd = sqrt(vsig1[i] ^ 2 + vsig1[i] ^ 2) | |
170 ), | |
171 stats::dnorm( | |
172 mz2[j], | |
173 mean = mz2[j], | |
174 sd = sqrt(vsig2[j] ^ 2 + vsig2[j] ^ 2) | |
175 )) | |
176 if (divisor == 0) | |
177 next | |
178 scalet = stats::dnorm(mz1[i], | |
179 mean = mz2[j], | |
180 sd = sqrt(vsig1[i] ^ 2 + vsig2[j] ^ 2)) | |
181 accu = accu + scalet / divisor | |
182 } | |
183 } | |
184 div = 1 | |
185 if (penality == "rweigth") { | |
186 p <- | |
187 (sum(w1[pfound]) / sum(w1) + sum(w2[unlist(matchList[pfound])]) / sum(w2)) / | |
188 2 | |
189 div = 2 | |
190 } else{ | |
191 p <- 0 | |
192 } | |
193 accu = accu / (l2 * l1) | |
194 list(measure = (accu + p) / div, | |
195 matched = simpList(matchList)) | |
196 } |