annotate massdb-helper.R @ 2:20d69a062da3 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
author prog
date Thu, 02 Mar 2017 08:55:00 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
1 simplifySpectrum <- function(spec) {
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
2 if(length(spec) == 0){
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
3 return(NA_real_)
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
4 }
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
5 #print(spec)
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
6 if (nrow(spec) == 0)
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
7 return(NA_real_)
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
8 if (ncol(spec) != 2) {
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
9 spec[, BIODB.PEAK.MZ]
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
10 mint <- BIODB.GROUP.INTENSITY %in% colnames(spec)
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
11 pint <- which(mint[1])
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
12 if (length(pint) == 0)
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
13 stop(
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
14 "No intensity column founds, if there is more than 2 column, columns should be named",
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
15 paste0(BIODB.GROUP.INTENSITY, collapse = ", ")
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
16 )
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
17 spec <- spec[, c(BIODB.PEAK.MZ, BIODB.GROUP.INTENSITY[pint[1]])]
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
18 ###Normalizing the intenities.
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
19 }
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
20 spec[, 2] <- as.numeric(spec[, 2]) * 100 / max(as.numeric(spec[, 2]))
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
21 colnames(spec) <- c(BIODB.PEAK.MZ, BIODB.PEAK.RELATIVE.INTENSITY)
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
22 spec
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
23 }
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
24
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
25
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
26
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
27 calcDistance <-
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
28 function(spec1 ,
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
29 spec2,
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
30 npmin = 2,
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
31 fun = c("wcosine"),
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
32 params = list()) {
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
33 #fun <- match.arg(fun)
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
34
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
35 #SPec are always notmlized in pourcentage toa voir issues;
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
36 spec1 <- simplifySpectrum(spec1)
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
37 spec2 <- simplifySpectrum(spec2)
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
38 if(is.na(spec1)||is.na(spec2)) return(list(matched=numeric(0),similarity=0))
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
39 params$mz1 <- as.numeric(spec1[, BIODB.PEAK.MZ])
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
40 params$mz2 <- as.numeric(spec2[, BIODB.PEAK.MZ])
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
41 params$int1 <- as.numeric(spec1[, BIODB.PEAK.RELATIVE.INTENSITY])
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
42 params$int2 <- as.numeric(spec2[, BIODB.PEAK.RELATIVE.INTENSITY])
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
43 res <- do.call(fun, args = params)
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
44 if (sum(res$matched != -1) < npmin)
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
45 return(list(matched = res$matched, similarity = 0))
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
46 list(matched = res$matched,
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
47 similarity = res$measure)
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
48 }
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
49
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
50
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
51
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
52 ###The returned sim list is not ordered
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
53 compareSpectra <-
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
54 function(spec,
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
55 libspec,
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
56 npmin = 2,
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
57 fun = BIODB.MSMS.DIST.WCOSINE,
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
58 params = list(),
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
59 decreasing = TRUE) {
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
60 #fun <- match.arg(fun)
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
61 if (length(libspec) == 0) {
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
62 return(NULL)
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
63 }
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
64 if (nrow(spec) == 0) {
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
65 return(NULL)
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
66 }
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
67
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
68 ####spec is directly normalized.
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
69 vall <-
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
70 sapply(
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
71 libspec,
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
72 calcDistance,
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
73 spec1 = spec,
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
74 params = params,
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
75 fun = fun,
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
76 simplify = FALSE
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
77 )
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
78 ####the list is ordered with the chosen metric.
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
79 sim <-
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
80 vapply(vall,
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
81 '[[',
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
82 i = "similarity",
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
83 FUN.VALUE = ifelse(decreasing, 0, 1))
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
84 osim <- order(sim, decreasing = decreasing)
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
85 matched <- sapply(vall, '[[', i = "matched", simplify = FALSE)
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
86
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
87 return(list(
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
88 ord = osim,
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
89 matched = matched,
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
90 similarity = sim
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
91 ))
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents:
diff changeset
92 }