comparison MsDbOutputDataFrameStream.R @ 6:f86fec07f392 draft default tip

planemo upload commit c397cd8a93953798d733fd62653f7098caac30ce
author prog
date Fri, 22 Feb 2019 16:04:22 -0500
parents fb9c0409d85c
children
comparison
equal deleted inserted replaced
5:fb9c0409d85c 6:f86fec07f392
1 if ( ! exists('MsDbOutputDataFrameStream')) { # Do not load again if already loaded
2
3 library(methods)
4 source('MsDbOutputStream.R')
5 source('dfhlp.R', chdir = TRUE)
6
7 #####################
8 # CLASS DECLARATION #
9 #####################
10
11 MsDbOutputDataFrameStream <- setRefClass("MsDbOutputDataFrameStream", contains = 'MsDbOutputStream', fields = list( .df = "ANY", .output.fields = "ANY"))
12
13 ###############
14 # CONSTRUCTOR #
15 ###############
16
17 MsDbOutputDataFrameStream$methods( initialize = function(keep.unused = FALSE, one.line = FALSE, match.sep = MSDB.DFT.MATCH.SEP, output.fields = NULL, multval.field.sep = MSDB.DFT.OUTPUT.MULTIVAL.FIELD.SEP, first.val = FALSE, ascii = FALSE, noapostrophe = FALSE, noplusminus = FALSE, nogreek = FALSE, ...) {
18
19 callSuper(keep.unused = keep.unused, one.line = one.line, match.sep = match.sep, multval.field.sep = multval.field.sep, first.val = first.val, ascii = ascii, noapostrophe = noapostrophe, noplusminus = noplusminus, nogreek = nogreek, ...)
20
21 .df <<- data.frame()
22 .output.fields <<- output.fields
23 })
24
25 ##################
26 # GET DATA FRAME #
27 ##################
28
29 MsDbOutputDataFrameStream$methods( getDataFrame = function() {
30
31 # Put at least a column name if empty
32 if (nrow(.self$.df) == 0)
33 .self$.df[[.self$.output.fields[[MSDB.TAG.MZ]]]] <- numeric()
34
35 return(.self$.df)
36 })
37
38 # Move columns to beginning {{{1
39
40 MsDbOutputDataFrameStream$methods( moveColumnsToBeginning = function(cols) {
41 all.cols <- colnames(.self$.df)
42 other.cols <- all.cols[ ! all.cols %in% cols]
43 cols <- cols[cols %in% all.cols]
44 .df <<- .self$.df[c(cols, other.cols)]
45 })
46
47 #################
48 # MATCHED PEAKS #
49 #################
50
51 MsDbOutputDataFrameStream$methods( matchedPeaks = function(mz, rt = NULL, unused = NULL, peaks = NULL) {
52
53 library(plyr)
54
55 # Set input values
56 x <- data.frame(mz = mz)
57 colnames(x) <- MSDB.TAG.MZ
58 if ( ! is.null(rt)) {
59 x.rt <- data.frame(rt = rt)
60 colnames(x.rt) <- MSDB.TAG.RT
61 if (.self$.rtunit == MSDB.RTUNIT.MIN)
62 x.rt[[MSDB.TAG.RT]] <- x.rt[[MSDB.TAG.RT]] / 60
63 x <- cbind(x, x.rt)
64 }
65
66
67 # Merge input values with matched peaks
68 if ( ! is.null(peaks)) {
69
70 # No rows
71 if (nrow(peaks) == 0) {
72 # Add NA values
73 peaks[1, ] <- NA
74
75 # Process existing rows
76 } else {
77
78 # Convert RT
79 if (.self$.rtunit == MSDB.RTUNIT.MIN)
80 if (MSDB.TAG.COLRT %in% colnames(peaks))
81 peaks[[MSDB.TAG.COLRT]] <- peaks[[MSDB.TAG.COLRT]] / 60
82
83 # Process multi-value fields
84 for (c in colnames(peaks))
85 if (c %in% MSDB.MULTIVAL.FIELDS) {
86
87 # Keep only first value in multi-value fields
88 if (.self$.first.val)
89 peaks[[c]] <- vapply(peaks[[c]], function(s) split.str(s, sep = MSDB.MULTIVAL.FIELD.SEP, unlist = TRUE)[[1]], FUN.VALUE = '')
90
91 # Change separator
92 else
93 peaks[[c]] <- vapply(peaks[[c]], function(s) paste0(split.str(s, sep = MSDB.MULTIVAL.FIELD.SEP, unlist = TRUE), collapse = .self$.multval.field.sep), FUN.VALUE = '')
94
95 }
96
97 # Concatenate results in one line
98 if (.self$.one.line) {
99 # For each column, concatenate all values in one string.
100 for (c in seq(peaks)) {
101 v <- peaks[[c]]
102 v <- v[ ! is.na(v)] # remove NA values
103 v <- v[ ! duplicated(v)] # remove duplicates
104 peaks[1, c] <- paste0(v, collapse = .self$.match.sep, FUN.VALUE = '')
105 }
106 peaks <- peaks[1, ] # Keep only first line
107 }
108 }
109
110 # Merge
111 x <- cbind(x, peaks, row.names = NULL)
112 }
113
114 # Rename columns for output
115 x <- rename.col(x, names(.self$.output.fields), .self$.output.fields)
116
117 # Add unused columns
118 if ( .self$.keep.unused && ! is.null(unused)) {
119 x <- cbind(x, unused, row.names = NULL)
120 }
121
122 # Convert strings to ASCII
123 if (.self$.ascii || .self$.noapostrophe || .self$.noplusminus || .self$.nogreek)
124 for (c in seq(x))
125 if (class(x[[c]]) == 'character') {
126 if (.self$.noapostrophe)
127 x[[c]] <- gsub("'", 'prime', x[[c]], perl = TRUE)
128 if (.self$.noplusminus)
129 x[[c]] <- gsub('±', '+-', x[[c]], perl = TRUE)
130 if (.self$.nogreek) {
131 x[[c]] <- gsub('α', 'alpha', x[[c]], perl = TRUE)
132 x[[c]] <- gsub('β', 'beta', x[[c]], perl = TRUE)
133 x[[c]] <- gsub('γ', 'gamma', x[[c]], perl = TRUE)
134 x[[c]] <- gsub('δ', 'delta', x[[c]], perl = TRUE)
135 }
136 if (.self$.ascii) {
137 x[[c]] <- gsub('[^\u0001-\u007F]', '_', x[[c]], perl = TRUE)
138 }
139 }
140
141 # Add new rows to data frame
142 .df <<- rbind.fill(.self$.df, x)
143 })
144
145 } # end of load safe guard