Mercurial > repos > prog > lcmsmatching
comparison MassbankSpectrum.R @ 1:253d531a0193 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 36c9d8099c20a1ae848f1337c16564335dd8fb2b
| author | prog |
|---|---|
| date | Sat, 03 Sep 2016 17:02:01 -0400 |
| parents | e66bb061af06 |
| children |
comparison
equal
deleted
inserted
replaced
| 0:e66bb061af06 | 1:253d531a0193 |
|---|---|
| 19 | 19 |
| 20 spectra <- list() | 20 spectra <- list() |
| 21 | 21 |
| 22 # Define fields regex | 22 # Define fields regex |
| 23 regex <- character() | 23 regex <- character() |
| 24 regex[[RBIODB.ACCESSION]] <- "^ACCESSION: (.+)$" | 24 regex[[BIODB.ACCESSION]] <- "^ACCESSION: (.+)$" |
| 25 regex[[RBIODB.MSDEV]] <- "^AC\\$INSTRUMENT: (.+)$" | 25 regex[[BIODB.MSDEV]] <- "^AC\\$INSTRUMENT: (.+)$" |
| 26 regex[[RBIODB.MSDEVTYPE]] <- "^AC\\$INSTRUMENT_TYPE: (.+)$" | 26 regex[[BIODB.MSDEVTYPE]] <- "^AC\\$INSTRUMENT_TYPE: (.+)$" |
| 27 regex[[RBIODB.MSTYPE]] <- "^AC\\$MASS_SPECTROMETRY: MS_TYPE (.+)$" | 27 regex[[BIODB.MSTYPE]] <- "^AC\\$MASS_SPECTROMETRY: MS_TYPE (.+)$" |
| 28 regex[[RBIODB.MSPRECMZ]] <- "^MS\\$FOCUSED_ION: PRECURSOR_M/Z (.+)$" | 28 regex[[BIODB.MSPRECMZ]] <- "^MS\\$FOCUSED_ION: PRECURSOR_M/Z (.+)$" |
| 29 regex[[RBIODB.NB.PEAKS]] <- "^PK\\$NUM_PEAK: ([0-9]+)$" | 29 regex[[BIODB.NB.PEAKS]] <- "^PK\\$NUM_PEAK: ([0-9]+)$" |
| 30 regex[[RBIODB.MSPRECANNOT]] <- "^MS\\$FOCUSED_ION: PRECURSOR_TYPE (.+)$" | 30 regex[[BIODB.MSPRECANNOT]] <- "^MS\\$FOCUSED_ION: PRECURSOR_TYPE (.+)$" |
| 31 | 31 |
| 32 for (text in contents) { | 32 for (text in contents) { |
| 33 | 33 |
| 34 # Create instance | 34 # Create instance |
| 35 spectrum <- MassbankSpectrum$new() | 35 spectrum <- MassbankSpectrum$new() |
| 52 next | 52 next |
| 53 | 53 |
| 54 # MS MODE | 54 # MS MODE |
| 55 g <- str_match(s, "^AC\\$MASS_SPECTROMETRY: ION_MODE (.+)$") | 55 g <- str_match(s, "^AC\\$MASS_SPECTROMETRY: ION_MODE (.+)$") |
| 56 if ( ! is.na(g[1,1])) { | 56 if ( ! is.na(g[1,1])) { |
| 57 spectrum$setField(RBIODB.MSMODE, if (g[1,2] == 'POSITIVE') RBIODB.MSMODE.POS else RBIODB.MSMODE.NEG) | 57 spectrum$setField(BIODB.MSMODE, if (g[1,2] == 'POSITIVE') BIODB.MSMODE.POS else BIODB.MSMODE.NEG) |
| 58 next | 58 next |
| 59 } | 59 } |
| 60 | 60 |
| 61 # PEAKS | 61 # PEAKS |
| 62 if (.parse.peak.line(spectrum, s)) | 62 if (.parse.peak.line(spectrum, s)) |
| 65 | 65 |
| 66 spectra <- c(spectra, spectrum) | 66 spectra <- c(spectra, spectrum) |
| 67 } | 67 } |
| 68 | 68 |
| 69 # Replace elements with no accession id by NULL | 69 # Replace elements with no accession id by NULL |
| 70 spectra <- lapply(spectra, function(x) if (is.na(x$getField(RBIODB.ACCESSION))) NULL else x) | 70 spectra <- lapply(spectra, function(x) if (is.na(x$getField(BIODB.ACCESSION))) NULL else x) |
| 71 | 71 |
| 72 # Set associated compounds | 72 # Set associated compounds |
| 73 compounds <- createMassbankCompoundFromTxt(contents) | 73 compounds <- createMassbankCompoundFromTxt(contents) |
| 74 for (i in seq(spectra)) | 74 for (i in seq(spectra)) |
| 75 if ( ! is.null(spectra[[i]])) | 75 if ( ! is.null(spectra[[i]])) |
| 76 spectra[[i]]$setField(RBIODB.COMPOUND, compounds[[i]]) | 76 spectra[[i]]$setField(BIODB.COMPOUND, compounds[[i]]) |
| 77 | 77 |
| 78 # If the input was a single element, then output a single object | 78 # If the input was a single element, then output a single object |
| 79 if (drop && length(contents) == 1) | 79 if (drop && length(contents) == 1) |
| 80 spectra <- spectra[[1]] | 80 spectra <- spectra[[1]] |
| 81 | 81 |
| 86 # PARSE PEAK LINE # | 86 # PARSE PEAK LINE # |
| 87 ################### | 87 ################### |
| 88 | 88 |
| 89 .parse.peak.line <- function(spectrum, line) { | 89 .parse.peak.line <- function(spectrum, line) { |
| 90 | 90 |
| 91 peaks <- RBIODB.PEAK.DF.EXAMPLE | 91 peaks <- BIODB.PEAK.DF.EXAMPLE |
| 92 | 92 |
| 93 # Annotation | 93 # Annotation |
| 94 g <- str_match(line, "^\\s+([0-9][0-9.]*) ([A-Z0-9+-]+) ([0-9]+) ([0-9][0-9.]*) ([0-9][0-9.]*)$") | 94 g <- str_match(line, "^\\s+([0-9][0-9.]*) ([A-Z0-9+-]+) ([0-9]+) ([0-9][0-9.]*) ([0-9][0-9.]*)$") |
| 95 if ( ! is.na(g[1,1])) | 95 if ( ! is.na(g[1,1])) |
| 96 peaks[1, c(RBIODB.PEAK.MZ, RBIODB.PEAK.FORMULA, RBIODB.PEAK.FORMULA.COUNT, RBIODB.PEAK.MASS, RBIODB.PEAK.ERROR.PPM)] <- list(as.double(g[1,2]), g[1,3], as.integer(g[1,4]), as.double(g[1,5]), as.double(g[1,6])) | 96 peaks[1, c(BIODB.PEAK.MZ, BIODB.PEAK.FORMULA, BIODB.PEAK.FORMULA.COUNT, BIODB.PEAK.MASS, BIODB.PEAK.ERROR.PPM)] <- list(as.double(g[1,2]), g[1,3], as.integer(g[1,4]), as.double(g[1,5]), as.double(g[1,6])) |
| 97 | 97 |
| 98 # Peak | 98 # Peak |
| 99 g <- str_match(line, "^\\s+([0-9][0-9.]*) ([0-9][0-9.]*) ([0-9]+)$") | 99 g <- str_match(line, "^\\s+([0-9][0-9.]*) ([0-9][0-9.]*) ([0-9]+)$") |
| 100 if ( ! is.na(g[1,1])) | 100 if ( ! is.na(g[1,1])) |
| 101 peaks[1, c(RBIODB.PEAK.MZ, RBIODB.PEAK.INTENSITY, RBIODB.PEAK.RELATIVE.INTENSITY)] <- list(as.double(g[1,2]), as.double(g[1,3]), as.integer(g[1,4])) | 101 peaks[1, c(BIODB.PEAK.MZ, BIODB.PEAK.INTENSITY, BIODB.PEAK.RELATIVE.INTENSITY)] <- list(as.double(g[1,2]), as.double(g[1,3]), as.integer(g[1,4])) |
| 102 | 102 |
| 103 if (nrow(peaks) > 0) { | 103 if (nrow(peaks) > 0) { |
| 104 | 104 |
| 105 # Get curent peaks and merge with new peaks | 105 # Get curent peaks and merge with new peaks |
| 106 current.peaks <- spectrum$getField(RBIODB.PEAKS) | 106 current.peaks <- spectrum$getField(BIODB.PEAKS) |
| 107 if ( ! is.null(current.peaks)) | 107 if ( ! is.null(current.peaks)) |
| 108 peaks <- rbind(current.peaks, peaks) | 108 peaks <- rbind(current.peaks, peaks) |
| 109 | 109 |
| 110 spectrum$setField(RBIODB.PEAKS, peaks) | 110 spectrum$setField(BIODB.PEAKS, peaks) |
| 111 | 111 |
| 112 return(TRUE) | 112 return(TRUE) |
| 113 } | 113 } |
| 114 | 114 |
| 115 return(FALSE) | 115 return(FALSE) |
