Mercurial > repos > prog > lcmsmatching
comparison MsDbOutputDataFrameStream.R @ 6:f86fec07f392 draft default tip
planemo upload commit c397cd8a93953798d733fd62653f7098caac30ce
| author | prog |
|---|---|
| date | Fri, 22 Feb 2019 16:04:22 -0500 |
| parents | fb9c0409d85c |
| children |
comparison
equal
deleted
inserted
replaced
| 5:fb9c0409d85c | 6:f86fec07f392 |
|---|---|
| 1 if ( ! exists('MsDbOutputDataFrameStream')) { # Do not load again if already loaded | |
| 2 | |
| 3 library(methods) | |
| 4 source('MsDbOutputStream.R') | |
| 5 source('dfhlp.R', chdir = TRUE) | |
| 6 | |
| 7 ##################### | |
| 8 # CLASS DECLARATION # | |
| 9 ##################### | |
| 10 | |
| 11 MsDbOutputDataFrameStream <- setRefClass("MsDbOutputDataFrameStream", contains = 'MsDbOutputStream', fields = list( .df = "ANY", .output.fields = "ANY")) | |
| 12 | |
| 13 ############### | |
| 14 # CONSTRUCTOR # | |
| 15 ############### | |
| 16 | |
| 17 MsDbOutputDataFrameStream$methods( initialize = function(keep.unused = FALSE, one.line = FALSE, match.sep = MSDB.DFT.MATCH.SEP, output.fields = NULL, multval.field.sep = MSDB.DFT.OUTPUT.MULTIVAL.FIELD.SEP, first.val = FALSE, ascii = FALSE, noapostrophe = FALSE, noplusminus = FALSE, nogreek = FALSE, ...) { | |
| 18 | |
| 19 callSuper(keep.unused = keep.unused, one.line = one.line, match.sep = match.sep, multval.field.sep = multval.field.sep, first.val = first.val, ascii = ascii, noapostrophe = noapostrophe, noplusminus = noplusminus, nogreek = nogreek, ...) | |
| 20 | |
| 21 .df <<- data.frame() | |
| 22 .output.fields <<- output.fields | |
| 23 }) | |
| 24 | |
| 25 ################## | |
| 26 # GET DATA FRAME # | |
| 27 ################## | |
| 28 | |
| 29 MsDbOutputDataFrameStream$methods( getDataFrame = function() { | |
| 30 | |
| 31 # Put at least a column name if empty | |
| 32 if (nrow(.self$.df) == 0) | |
| 33 .self$.df[[.self$.output.fields[[MSDB.TAG.MZ]]]] <- numeric() | |
| 34 | |
| 35 return(.self$.df) | |
| 36 }) | |
| 37 | |
| 38 # Move columns to beginning {{{1 | |
| 39 | |
| 40 MsDbOutputDataFrameStream$methods( moveColumnsToBeginning = function(cols) { | |
| 41 all.cols <- colnames(.self$.df) | |
| 42 other.cols <- all.cols[ ! all.cols %in% cols] | |
| 43 cols <- cols[cols %in% all.cols] | |
| 44 .df <<- .self$.df[c(cols, other.cols)] | |
| 45 }) | |
| 46 | |
| 47 ################# | |
| 48 # MATCHED PEAKS # | |
| 49 ################# | |
| 50 | |
| 51 MsDbOutputDataFrameStream$methods( matchedPeaks = function(mz, rt = NULL, unused = NULL, peaks = NULL) { | |
| 52 | |
| 53 library(plyr) | |
| 54 | |
| 55 # Set input values | |
| 56 x <- data.frame(mz = mz) | |
| 57 colnames(x) <- MSDB.TAG.MZ | |
| 58 if ( ! is.null(rt)) { | |
| 59 x.rt <- data.frame(rt = rt) | |
| 60 colnames(x.rt) <- MSDB.TAG.RT | |
| 61 if (.self$.rtunit == MSDB.RTUNIT.MIN) | |
| 62 x.rt[[MSDB.TAG.RT]] <- x.rt[[MSDB.TAG.RT]] / 60 | |
| 63 x <- cbind(x, x.rt) | |
| 64 } | |
| 65 | |
| 66 | |
| 67 # Merge input values with matched peaks | |
| 68 if ( ! is.null(peaks)) { | |
| 69 | |
| 70 # No rows | |
| 71 if (nrow(peaks) == 0) { | |
| 72 # Add NA values | |
| 73 peaks[1, ] <- NA | |
| 74 | |
| 75 # Process existing rows | |
| 76 } else { | |
| 77 | |
| 78 # Convert RT | |
| 79 if (.self$.rtunit == MSDB.RTUNIT.MIN) | |
| 80 if (MSDB.TAG.COLRT %in% colnames(peaks)) | |
| 81 peaks[[MSDB.TAG.COLRT]] <- peaks[[MSDB.TAG.COLRT]] / 60 | |
| 82 | |
| 83 # Process multi-value fields | |
| 84 for (c in colnames(peaks)) | |
| 85 if (c %in% MSDB.MULTIVAL.FIELDS) { | |
| 86 | |
| 87 # Keep only first value in multi-value fields | |
| 88 if (.self$.first.val) | |
| 89 peaks[[c]] <- vapply(peaks[[c]], function(s) split.str(s, sep = MSDB.MULTIVAL.FIELD.SEP, unlist = TRUE)[[1]], FUN.VALUE = '') | |
| 90 | |
| 91 # Change separator | |
| 92 else | |
| 93 peaks[[c]] <- vapply(peaks[[c]], function(s) paste0(split.str(s, sep = MSDB.MULTIVAL.FIELD.SEP, unlist = TRUE), collapse = .self$.multval.field.sep), FUN.VALUE = '') | |
| 94 | |
| 95 } | |
| 96 | |
| 97 # Concatenate results in one line | |
| 98 if (.self$.one.line) { | |
| 99 # For each column, concatenate all values in one string. | |
| 100 for (c in seq(peaks)) { | |
| 101 v <- peaks[[c]] | |
| 102 v <- v[ ! is.na(v)] # remove NA values | |
| 103 v <- v[ ! duplicated(v)] # remove duplicates | |
| 104 peaks[1, c] <- paste0(v, collapse = .self$.match.sep, FUN.VALUE = '') | |
| 105 } | |
| 106 peaks <- peaks[1, ] # Keep only first line | |
| 107 } | |
| 108 } | |
| 109 | |
| 110 # Merge | |
| 111 x <- cbind(x, peaks, row.names = NULL) | |
| 112 } | |
| 113 | |
| 114 # Rename columns for output | |
| 115 x <- rename.col(x, names(.self$.output.fields), .self$.output.fields) | |
| 116 | |
| 117 # Add unused columns | |
| 118 if ( .self$.keep.unused && ! is.null(unused)) { | |
| 119 x <- cbind(x, unused, row.names = NULL) | |
| 120 } | |
| 121 | |
| 122 # Convert strings to ASCII | |
| 123 if (.self$.ascii || .self$.noapostrophe || .self$.noplusminus || .self$.nogreek) | |
| 124 for (c in seq(x)) | |
| 125 if (class(x[[c]]) == 'character') { | |
| 126 if (.self$.noapostrophe) | |
| 127 x[[c]] <- gsub("'", 'prime', x[[c]], perl = TRUE) | |
| 128 if (.self$.noplusminus) | |
| 129 x[[c]] <- gsub('±', '+-', x[[c]], perl = TRUE) | |
| 130 if (.self$.nogreek) { | |
| 131 x[[c]] <- gsub('α', 'alpha', x[[c]], perl = TRUE) | |
| 132 x[[c]] <- gsub('β', 'beta', x[[c]], perl = TRUE) | |
| 133 x[[c]] <- gsub('γ', 'gamma', x[[c]], perl = TRUE) | |
| 134 x[[c]] <- gsub('δ', 'delta', x[[c]], perl = TRUE) | |
| 135 } | |
| 136 if (.self$.ascii) { | |
| 137 x[[c]] <- gsub('[^\u0001-\u007F]', '_', x[[c]], perl = TRUE) | |
| 138 } | |
| 139 } | |
| 140 | |
| 141 # Add new rows to data frame | |
| 142 .df <<- rbind.fill(.self$.df, x) | |
| 143 }) | |
| 144 | |
| 145 } # end of load safe guard |
