Mercurial > repos > prog > lcmsmatching
comparison MassbankSpectrum.R @ 1:253d531a0193 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 36c9d8099c20a1ae848f1337c16564335dd8fb2b
author | prog |
---|---|
date | Sat, 03 Sep 2016 17:02:01 -0400 |
parents | e66bb061af06 |
children |
comparison
equal
deleted
inserted
replaced
0:e66bb061af06 | 1:253d531a0193 |
---|---|
19 | 19 |
20 spectra <- list() | 20 spectra <- list() |
21 | 21 |
22 # Define fields regex | 22 # Define fields regex |
23 regex <- character() | 23 regex <- character() |
24 regex[[RBIODB.ACCESSION]] <- "^ACCESSION: (.+)$" | 24 regex[[BIODB.ACCESSION]] <- "^ACCESSION: (.+)$" |
25 regex[[RBIODB.MSDEV]] <- "^AC\\$INSTRUMENT: (.+)$" | 25 regex[[BIODB.MSDEV]] <- "^AC\\$INSTRUMENT: (.+)$" |
26 regex[[RBIODB.MSDEVTYPE]] <- "^AC\\$INSTRUMENT_TYPE: (.+)$" | 26 regex[[BIODB.MSDEVTYPE]] <- "^AC\\$INSTRUMENT_TYPE: (.+)$" |
27 regex[[RBIODB.MSTYPE]] <- "^AC\\$MASS_SPECTROMETRY: MS_TYPE (.+)$" | 27 regex[[BIODB.MSTYPE]] <- "^AC\\$MASS_SPECTROMETRY: MS_TYPE (.+)$" |
28 regex[[RBIODB.MSPRECMZ]] <- "^MS\\$FOCUSED_ION: PRECURSOR_M/Z (.+)$" | 28 regex[[BIODB.MSPRECMZ]] <- "^MS\\$FOCUSED_ION: PRECURSOR_M/Z (.+)$" |
29 regex[[RBIODB.NB.PEAKS]] <- "^PK\\$NUM_PEAK: ([0-9]+)$" | 29 regex[[BIODB.NB.PEAKS]] <- "^PK\\$NUM_PEAK: ([0-9]+)$" |
30 regex[[RBIODB.MSPRECANNOT]] <- "^MS\\$FOCUSED_ION: PRECURSOR_TYPE (.+)$" | 30 regex[[BIODB.MSPRECANNOT]] <- "^MS\\$FOCUSED_ION: PRECURSOR_TYPE (.+)$" |
31 | 31 |
32 for (text in contents) { | 32 for (text in contents) { |
33 | 33 |
34 # Create instance | 34 # Create instance |
35 spectrum <- MassbankSpectrum$new() | 35 spectrum <- MassbankSpectrum$new() |
52 next | 52 next |
53 | 53 |
54 # MS MODE | 54 # MS MODE |
55 g <- str_match(s, "^AC\\$MASS_SPECTROMETRY: ION_MODE (.+)$") | 55 g <- str_match(s, "^AC\\$MASS_SPECTROMETRY: ION_MODE (.+)$") |
56 if ( ! is.na(g[1,1])) { | 56 if ( ! is.na(g[1,1])) { |
57 spectrum$setField(RBIODB.MSMODE, if (g[1,2] == 'POSITIVE') RBIODB.MSMODE.POS else RBIODB.MSMODE.NEG) | 57 spectrum$setField(BIODB.MSMODE, if (g[1,2] == 'POSITIVE') BIODB.MSMODE.POS else BIODB.MSMODE.NEG) |
58 next | 58 next |
59 } | 59 } |
60 | 60 |
61 # PEAKS | 61 # PEAKS |
62 if (.parse.peak.line(spectrum, s)) | 62 if (.parse.peak.line(spectrum, s)) |
65 | 65 |
66 spectra <- c(spectra, spectrum) | 66 spectra <- c(spectra, spectrum) |
67 } | 67 } |
68 | 68 |
69 # Replace elements with no accession id by NULL | 69 # Replace elements with no accession id by NULL |
70 spectra <- lapply(spectra, function(x) if (is.na(x$getField(RBIODB.ACCESSION))) NULL else x) | 70 spectra <- lapply(spectra, function(x) if (is.na(x$getField(BIODB.ACCESSION))) NULL else x) |
71 | 71 |
72 # Set associated compounds | 72 # Set associated compounds |
73 compounds <- createMassbankCompoundFromTxt(contents) | 73 compounds <- createMassbankCompoundFromTxt(contents) |
74 for (i in seq(spectra)) | 74 for (i in seq(spectra)) |
75 if ( ! is.null(spectra[[i]])) | 75 if ( ! is.null(spectra[[i]])) |
76 spectra[[i]]$setField(RBIODB.COMPOUND, compounds[[i]]) | 76 spectra[[i]]$setField(BIODB.COMPOUND, compounds[[i]]) |
77 | 77 |
78 # If the input was a single element, then output a single object | 78 # If the input was a single element, then output a single object |
79 if (drop && length(contents) == 1) | 79 if (drop && length(contents) == 1) |
80 spectra <- spectra[[1]] | 80 spectra <- spectra[[1]] |
81 | 81 |
86 # PARSE PEAK LINE # | 86 # PARSE PEAK LINE # |
87 ################### | 87 ################### |
88 | 88 |
89 .parse.peak.line <- function(spectrum, line) { | 89 .parse.peak.line <- function(spectrum, line) { |
90 | 90 |
91 peaks <- RBIODB.PEAK.DF.EXAMPLE | 91 peaks <- BIODB.PEAK.DF.EXAMPLE |
92 | 92 |
93 # Annotation | 93 # Annotation |
94 g <- str_match(line, "^\\s+([0-9][0-9.]*) ([A-Z0-9+-]+) ([0-9]+) ([0-9][0-9.]*) ([0-9][0-9.]*)$") | 94 g <- str_match(line, "^\\s+([0-9][0-9.]*) ([A-Z0-9+-]+) ([0-9]+) ([0-9][0-9.]*) ([0-9][0-9.]*)$") |
95 if ( ! is.na(g[1,1])) | 95 if ( ! is.na(g[1,1])) |
96 peaks[1, c(RBIODB.PEAK.MZ, RBIODB.PEAK.FORMULA, RBIODB.PEAK.FORMULA.COUNT, RBIODB.PEAK.MASS, RBIODB.PEAK.ERROR.PPM)] <- list(as.double(g[1,2]), g[1,3], as.integer(g[1,4]), as.double(g[1,5]), as.double(g[1,6])) | 96 peaks[1, c(BIODB.PEAK.MZ, BIODB.PEAK.FORMULA, BIODB.PEAK.FORMULA.COUNT, BIODB.PEAK.MASS, BIODB.PEAK.ERROR.PPM)] <- list(as.double(g[1,2]), g[1,3], as.integer(g[1,4]), as.double(g[1,5]), as.double(g[1,6])) |
97 | 97 |
98 # Peak | 98 # Peak |
99 g <- str_match(line, "^\\s+([0-9][0-9.]*) ([0-9][0-9.]*) ([0-9]+)$") | 99 g <- str_match(line, "^\\s+([0-9][0-9.]*) ([0-9][0-9.]*) ([0-9]+)$") |
100 if ( ! is.na(g[1,1])) | 100 if ( ! is.na(g[1,1])) |
101 peaks[1, c(RBIODB.PEAK.MZ, RBIODB.PEAK.INTENSITY, RBIODB.PEAK.RELATIVE.INTENSITY)] <- list(as.double(g[1,2]), as.double(g[1,3]), as.integer(g[1,4])) | 101 peaks[1, c(BIODB.PEAK.MZ, BIODB.PEAK.INTENSITY, BIODB.PEAK.RELATIVE.INTENSITY)] <- list(as.double(g[1,2]), as.double(g[1,3]), as.integer(g[1,4])) |
102 | 102 |
103 if (nrow(peaks) > 0) { | 103 if (nrow(peaks) > 0) { |
104 | 104 |
105 # Get curent peaks and merge with new peaks | 105 # Get curent peaks and merge with new peaks |
106 current.peaks <- spectrum$getField(RBIODB.PEAKS) | 106 current.peaks <- spectrum$getField(BIODB.PEAKS) |
107 if ( ! is.null(current.peaks)) | 107 if ( ! is.null(current.peaks)) |
108 peaks <- rbind(current.peaks, peaks) | 108 peaks <- rbind(current.peaks, peaks) |
109 | 109 |
110 spectrum$setField(RBIODB.PEAKS, peaks) | 110 spectrum$setField(BIODB.PEAKS, peaks) |
111 | 111 |
112 return(TRUE) | 112 return(TRUE) |
113 } | 113 } |
114 | 114 |
115 return(FALSE) | 115 return(FALSE) |