Mercurial > repos > prog > lcmsmatching
annotate massdb-helper.R @ 2:20d69a062da3 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
author | prog |
---|---|
date | Thu, 02 Mar 2017 08:55:00 -0500 |
parents | |
children |
rev | line source |
---|---|
2
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
1 simplifySpectrum <- function(spec) { |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
2 if(length(spec) == 0){ |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
3 return(NA_real_) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
4 } |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
5 #print(spec) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
6 if (nrow(spec) == 0) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
7 return(NA_real_) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
8 if (ncol(spec) != 2) { |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
9 spec[, BIODB.PEAK.MZ] |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
10 mint <- BIODB.GROUP.INTENSITY %in% colnames(spec) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
11 pint <- which(mint[1]) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
12 if (length(pint) == 0) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
13 stop( |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
14 "No intensity column founds, if there is more than 2 column, columns should be named", |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
15 paste0(BIODB.GROUP.INTENSITY, collapse = ", ") |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
16 ) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
17 spec <- spec[, c(BIODB.PEAK.MZ, BIODB.GROUP.INTENSITY[pint[1]])] |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
18 ###Normalizing the intenities. |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
19 } |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
20 spec[, 2] <- as.numeric(spec[, 2]) * 100 / max(as.numeric(spec[, 2])) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
21 colnames(spec) <- c(BIODB.PEAK.MZ, BIODB.PEAK.RELATIVE.INTENSITY) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
22 spec |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
23 } |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
24 |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
25 |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
26 |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
27 calcDistance <- |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
28 function(spec1 , |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
29 spec2, |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
30 npmin = 2, |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
31 fun = c("wcosine"), |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
32 params = list()) { |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
33 #fun <- match.arg(fun) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
34 |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
35 #SPec are always notmlized in pourcentage toa voir issues; |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
36 spec1 <- simplifySpectrum(spec1) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
37 spec2 <- simplifySpectrum(spec2) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
38 if(is.na(spec1)||is.na(spec2)) return(list(matched=numeric(0),similarity=0)) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
39 params$mz1 <- as.numeric(spec1[, BIODB.PEAK.MZ]) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
40 params$mz2 <- as.numeric(spec2[, BIODB.PEAK.MZ]) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
41 params$int1 <- as.numeric(spec1[, BIODB.PEAK.RELATIVE.INTENSITY]) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
42 params$int2 <- as.numeric(spec2[, BIODB.PEAK.RELATIVE.INTENSITY]) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
43 res <- do.call(fun, args = params) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
44 if (sum(res$matched != -1) < npmin) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
45 return(list(matched = res$matched, similarity = 0)) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
46 list(matched = res$matched, |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
47 similarity = res$measure) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
48 } |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
49 |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
50 |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
51 |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
52 ###The returned sim list is not ordered |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
53 compareSpectra <- |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
54 function(spec, |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
55 libspec, |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
56 npmin = 2, |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
57 fun = BIODB.MSMS.DIST.WCOSINE, |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
58 params = list(), |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
59 decreasing = TRUE) { |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
60 #fun <- match.arg(fun) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
61 if (length(libspec) == 0) { |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
62 return(NULL) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
63 } |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
64 if (nrow(spec) == 0) { |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
65 return(NULL) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
66 } |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
67 |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
68 ####spec is directly normalized. |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
69 vall <- |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
70 sapply( |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
71 libspec, |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
72 calcDistance, |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
73 spec1 = spec, |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
74 params = params, |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
75 fun = fun, |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
76 simplify = FALSE |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
77 ) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
78 ####the list is ordered with the chosen metric. |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
79 sim <- |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
80 vapply(vall, |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
81 '[[', |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
82 i = "similarity", |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
83 FUN.VALUE = ifelse(decreasing, 0, 1)) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
84 osim <- order(sim, decreasing = decreasing) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
85 matched <- sapply(vall, '[[', i = "matched", simplify = FALSE) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
86 |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
87 return(list( |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
88 ord = osim, |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
89 matched = matched, |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
90 similarity = sim |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
91 )) |
20d69a062da3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff
changeset
|
92 } |