comparison MassFiledbConn.R @ 6:f86fec07f392 draft default tip

planemo upload commit c397cd8a93953798d733fd62653f7098caac30ce
author prog
date Fri, 22 Feb 2019 16:04:22 -0500
parents fb9c0409d85c
children
comparison
equal deleted inserted replaced
5:fb9c0409d85c 6:f86fec07f392
1 # LCMS File db.
2 # In this type of database, a single file is provided in CSV format. Default separator is tabulation.
3 # Each line is a MS peak measure, .
4 # The file contains molecule and spectrum information. Each spectrum has an accession id.
5
6 # TODO Rename setField into setFieldName + addNewField, and setMsMode into setMsModeValue
7
8 #############
9 # CONSTANTS #
10 #############
11
12 # Default database fields
13 .BIODB.DFT.DB.FIELDS <- list()
14 for (f in c(BIODB.ACCESSION, BIODB.NAME, BIODB.FULLNAMES, BIODB.COMPOUND.ID, BIODB.MSMODE, BIODB.PEAK.MZEXP, BIODB.PEAK.MZTHEO, BIODB.PEAK.COMP, BIODB.PEAK.ATTR, BIODB.CHROM.COL, BIODB.CHROM.COL.RT, BIODB.FORMULA, BIODB.MASS))
15 .BIODB.DFT.DB.FIELDS[[f]] <- f
16
17 #####################
18 # CLASS DECLARATION #
19 #####################
20
21 MassFiledbConn <- methods::setRefClass("MassFiledbConn", contains = "MassdbConn", fields = list(.file = "character", .file.sep = "character", .file.quote = "character", .field.multval.sep = 'character', .db = "ANY", .db.orig.colnames = "character", .fields = "list", .ms.modes = "character"))
22
23 ###############
24 # CONSTRUCTOR #
25 ###############
26
27 MassFiledbConn$methods( initialize = function(file = NA_character_, file.sep = "\t", file.quote = "\"", ...) {
28
29 # Check file
30 (! is.null(file) && ! is.na(file)) || stop("You must specify a file database to load.")
31 file.exists(file) || stop(paste0("Cannot locate the file database \"", file ,"\"."))
32
33 # Set fields
34 .db <<- NULL
35 .db.orig.colnames <<- NA_character_
36 .file <<- file
37 .file.sep <<- file.sep
38 .file.quote <<- file.quote
39 .fields <<- .BIODB.DFT.DB.FIELDS
40 .field.multval.sep <<- ';'
41 .ms.modes <<- c(BIODB.MSMODE.NEG, BIODB.MSMODE.POS)
42 names(.self$.ms.modes) <- .self$.ms.modes
43
44 callSuper(...)
45 })
46
47 ######################
48 # Is valid field tag #
49 ######################
50
51 MassFiledbConn$methods( isValidFieldTag = function(tag) {
52 return (tag %in% names(.self$.fields))
53 })
54
55 ###########
56 # INIT DB #
57 ###########
58
59 MassFiledbConn$methods( .init.db = function() {
60
61 if (is.null(.self$.db)) {
62
63 # Load database
64 .db <<- read.table(.self$.file, sep = .self$.file.sep, .self$.file.quote, header = TRUE, stringsAsFactors = FALSE, row.names = NULL, comment.char = '')
65
66 # Save column names
67 .db.orig.colnames <<- colnames(.self$.db)
68 }
69 })
70
71 #############
72 # Set field #
73 #############
74
75 MassFiledbConn$methods( setField = function(tag, colname) {
76
77 ( ! is.null(tag) && ! is.na(tag)) || stop("No tag specified.")
78 ( ! is.null(colname) && ! is.na(colname)) || stop("No column name specified.")
79
80 # Load database file
81 .self$.init.db()
82
83 # Check that this field tag is defined in the fields list
84 .self$isValidFieldTag(tag) || stop(paste0("Database field tag \"", tag, "\" is not valid."))
85
86 # Check that columns are defined in database file
87 all(colname %in% names(.self$.db)) || stop(paste0("One or more columns among ", paste(colname, collapse = ", "), " are not defined in database file."))
88
89 # Set new definition
90 if (length(colname) == 1)
91 .fields[[tag]] <<- colname
92 else {
93 new.col <- paste(colname, collapse = ".")
94 .self$.db[[new.col]] <- vapply(seq(nrow(.self$.db)), function(i) { paste(.self$.db[i, colname], collapse = '.') }, FUN.VALUE = '')
95 .fields[[tag]] <<- new.col
96 }
97
98 # Update data frame column names
99 colnames(.self$.db) <- vapply(.self$.db.orig.colnames, function(c) if (c %in% .self$.fields) names(.self$.fields)[.self$.fields %in% c] else c, FUN.VALUE = '')
100 })
101
102 ######################################
103 # SET FIELD MULTIPLE VALUE SEPARATOR #
104 ######################################
105
106 MassFiledbConn$methods( setFieldMultValSep = function(sep) {
107 .field.multval.sep <<- sep
108 })
109
110 ################
111 # SET MS MODES #
112 ################
113
114 MassFiledbConn$methods( setMsMode = function(mode, value) {
115 .self$.ms.modes[[mode]] <- value
116 })
117
118 ##########################
119 # GET ENTRY CONTENT TYPE #
120 ##########################
121
122 MassFiledbConn$methods( getEntryContentType = function(type) {
123 return(BIODB.DATAFRAME)
124 })
125
126 ################
127 # CHECK FIELDS #
128 ################
129
130 MassFiledbConn$methods( .check.fields = function(fields) {
131
132 if (length(fields) ==0 || (length(fields) == 1 && is.na(fields)))
133 return
134
135 # Check if fields are known
136 unknown.fields <- names(.self$.fields)[ ! fields %in% names(.self$.fields)]
137 if (length(unknown.fields) > 0)
138 stop(paste0("Field(s) ", paste(fields, collapse = ", "), " is/are unknown."))
139
140 # Init db
141 .self$.init.db()
142
143 # Check if fields are defined in file database
144 undefined.fields <- colnames(.self$.db)[ ! fields %in% colnames(.self$.db)]
145 if (length(undefined.fields) > 0)
146 stop(paste0("Column(s) ", paste(fields), collapse = ", "), " is/are undefined in file database.")
147 })
148
149 ##########
150 # SELECT #
151 ##########
152
153 # Select data from database
154 MassFiledbConn$methods( .select = function(cols = NULL, mode = NULL, compound.ids = NULL, drop = FALSE, uniq = FALSE, sort = FALSE, max.rows = NA_integer_) {
155
156 x <- NULL
157
158 # Init db
159 .self$.init.db()
160
161 # Get db
162 db <- .self$.db
163
164 # Filter db on mode
165 if ( ! is.null(mode) && ! is.na(mode)) {
166
167 # Check mode value
168 mode %in% names(.self$.ms.modes) || stop(paste0("Unknown mode value '", mode, "'."))
169 .self$.check.fields(BIODB.MSMODE)
170
171 # Filter on mode
172 db <- db[db[[unlist(.self$.fields[BIODB.MSMODE])]] %in% .self$.ms.modes[[mode]], ]
173 }
174
175 # Filter db on compound ids
176 # TODO
177
178 if ( ! is.null(cols) && ! is.na(cols))
179 .self$.check.fields(cols)
180
181 # Get subset
182 if (is.null(cols) || is.na(cols))
183 x <- db
184 else
185 x <- db[, unlist(.self$.fields[cols]), drop = drop]
186
187 # Rearrange
188 if (drop && is.vector(x)) {
189 if (uniq)
190 x <- x[ ! duplicated(x)]
191 if (sort)
192 x <- sort(x)
193 }
194
195 # Cut
196 if ( ! is.na(max.rows))
197 x <- if (is.vector(x)) x[1:max.rows] else x[1:max.rows, ]
198
199 return(x)
200 })
201
202 #################
203 # GET ENTRY IDS #
204 #################
205
206 MassFiledbConn$methods( getEntryIds = function(type) {
207
208 ids <- NA_character_
209
210 if (type %in% c(BIODB.SPECTRUM, BIODB.COMPOUND))
211 ids <- as.character(.self$.select(cols = if (type == BIODB.SPECTRUM) BIODB.ACCESSION else BIODB.COMPOUND.ID, drop = TRUE, uniq = TRUE, sort = TRUE))
212
213 return(ids)
214 })
215
216 ##################
217 # GET NB ENTRIES #
218 ##################
219
220 MassFiledbConn$methods( getNbEntries = function(type) {
221 return(length(.self$getEntryIds(type)))
222 })
223
224 ###############################
225 # GET CHROMATOGRAPHIC COLUMNS #
226 ###############################
227
228 # Inherited from MassdbConn.
229 MassFiledbConn$methods( getChromCol = function(compound.ids = NULL) {
230
231 # Extract needed columns
232 db <- .self$.select(cols = c(BIODB.COMPOUND.ID, BIODB.CHROM.COL))
233
234 # Filter on molecule IDs
235 if ( ! is.null(compound.ids))
236 db <- db[db[[BIODB.COMPOUND.ID]] %in% compound.ids, ]
237
238 # Get column names
239 cols <- db[[BIODB.CHROM.COL]]
240
241 # Remove duplicates
242 cols <- cols[ ! duplicated(cols)]
243
244 # Make data frame
245 chrom.cols <- data.frame(cols, cols, stringsAsFactors = FALSE)
246 colnames(chrom.cols) <- c(BIODB.ID, BIODB.TITLE)
247
248 return(chrom.cols)
249 })
250
251 #################
252 # GET MZ VALUES #
253 #################
254
255 # Inherited from MassdbConn.
256 MassFiledbConn$methods( getMzValues = function(mode = NULL, max.results = NA_integer_) {
257
258 # Get mz values
259 mz <- .self$.select(cols = BIODB.PEAK.MZ, mode = mode, drop = TRUE, uniq = TRUE, sort = TRUE, max.rows = max.results)
260
261 return(mz)
262 })
263
264 ################
265 # GET NB PEAKS #
266 ################
267
268 # Inherited from MassdbConn.
269 MassFiledbConn$methods( getNbPeaks = function(mode = NULL, compound.ids = NULL) {
270
271 # Get peaks
272 peaks <- .self$.select(cols = BIODB.PEAK.MZTHEO, mode = mode, compound.ids = compound.ids)
273
274 return(length(peaks))
275 })