comparison MassFiledbConn.R @ 1:253d531a0193 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 36c9d8099c20a1ae848f1337c16564335dd8fb2b
author prog
date Sat, 03 Sep 2016 17:02:01 -0400
parents
children 20d69a062da3
comparison
equal deleted inserted replaced
0:e66bb061af06 1:253d531a0193
1 if ( ! exists('MassFiledbConn')) {
2
3 source('MassdbConn.R')
4
5 # LCMS File db.
6 # In this type of database, a single file is provided in CSV format. Default separator is tabulation.
7 # Each line is a MS peak measure, .
8 # The file contains molecule and spectrum information. Each spectrum has an accession id.
9
10 # TODO Rename setField into setFieldName + addNewField, and setMsMode into setMsModeValue
11
12 #############
13 # CONSTANTS #
14 #############
15
16 # Default database fields
17 .BIODB.DFT.DB.FIELDS <- list()
18 for (f in c(BIODB.ACCESSION, BIODB.NAME, BIODB.FULLNAMES, BIODB.COMPOUND.ID, BIODB.MSMODE, BIODB.PEAK.MZ, BIODB.PEAK.COMP, BIODB.PEAK.ATTR, BIODB.CHROM.COL, BIODB.CHROM.COL.RT, BIODB.FORMULA, BIODB.MASS))
19 .BIODB.DFT.DB.FIELDS[[f]] <- f
20
21 #####################
22 # CLASS DECLARATION #
23 #####################
24
25 MassFiledbConn <- setRefClass("MassFiledbConn", contains = "MassdbConn", fields = list(.file = "character", .file.sep = "character", .file.quote = "character", .field.multval.sep = 'character', .db = "ANY", .fields = "list", .ms.modes = "character"))
26
27 ###############
28 # CONSTRUCTOR #
29 ###############
30
31 MassFiledbConn$methods( initialize = function(file = NA_character_, file.sep = "\t", file.quote = "\"", ...) {
32
33 # Check file
34 (! is.null(file) && ! is.na(file)) || stop("You must specify a file database to load.")
35 file.exists(file) || stop(paste0("Cannot locate the file database \"", file ,"\"."))
36
37 # Set fields
38 .db <<- NULL
39 .file <<- file
40 .file.sep <<- file.sep
41 .file.quote <<- file.quote
42 .fields <<- .BIODB.DFT.DB.FIELDS
43 .field.multval.sep <<- ';'
44 .ms.modes <<- c(BIODB.MSMODE.NEG, BIODB.MSMODE.POS)
45 names(.self$.ms.modes) <- .self$.ms.modes
46
47 callSuper(...)
48 })
49
50 ######################
51 # Is valid field tag #
52 ######################
53
54 MassFiledbConn$methods( isValidFieldTag = function(tag) {
55 return (tag %in% names(.self$.fields))
56 })
57
58 #############
59 # Set field #
60 #############
61
62 MassFiledbConn$methods( setField = function(tag, colname) {
63
64 ( ! is.null(tag) && ! is.na(tag)) || stop("No tag specified.")
65 ( ! is.null(colname) && ! is.na(colname)) || stop("No column name specified.")
66
67 # Load database file
68 .self$.init.db()
69
70 # Check that this field tag is defined in the fields list
71 .self$isValidFieldTag(tag) || stop(paste0("Database field tag \"", tag, "\" is not valid."))
72
73 # Check that columns are defined in database file
74 all(colname %in% names(.self$.db)) || stop(paste0("One or more columns among ", paste(colname, collapse = ", "), " are not defined in database file."))
75
76 # Set new definition
77 if (length(colname) == 1)
78 .fields[[tag]] <<- colname
79 else {
80 new.col <- paste(colname, collapse = ".")
81 .self$.db[[new.col]] <- vapply(seq(nrow(.self$.db)), function(i) { paste(.self$.db[i, colname], collapse = '.') }, FUN.VALUE = '')
82 .fields[[tag]] <<- new.col
83 }
84 })
85
86 ######################################
87 # SET FIELD MULTIPLE VALUE SEPARATOR #
88 ######################################
89
90 MassFiledbConn$methods( setFieldMultValSep = function(sep) {
91 .field.multval.sep <<- sep
92 })
93
94 ################
95 # SET MS MODES #
96 ################
97
98 MassFiledbConn$methods( setMsMode = function(mode, value) {
99 .self$.ms.modes[[mode]] <- value
100 })
101
102 ##########################
103 # GET ENTRY CONTENT TYPE #
104 ##########################
105
106 MassFiledbConn$methods( getEntryContentType = function(type) {
107 return(BIODB.DATAFRAME)
108 })
109
110 ###########
111 # INIT DB #
112 ###########
113
114 MassFiledbConn$methods( .init.db = function() {
115
116 if (is.null(.self$.db)) {
117
118 # Load database
119 .db <<- read.table(.self$.file, sep = .self$.file.sep, .self$.file.quote, header = TRUE, stringsAsFactors = FALSE, row.names = NULL)
120
121 # Rename columns
122 colnames(.self$.db) <- vapply(colnames(.self$.db), function(c) if (c %in% .self$.fields) names(.self$.fields)[.self$.fields %in% c] else c, FUN.VALUE = '')
123 }
124 })
125
126 ################
127 # CHECK FIELDS #
128 ################
129
130 MassFiledbConn$methods( .check.fields = function(fields) {
131
132 # Check if fields are known
133 unknown.fields <- names(.self$.fields)[ ! fields %in% names(.self$.fields)]
134 if (length(unknown.fields) > 0)
135 stop(paste0("Field(s) ", paste(fields, collapse = ", "), " is/are unknown."))
136
137 # Init db
138 .self$.init.db()
139
140 # Check if fields are defined in file database
141 undefined.fields <- colnames(.self$.init.db)[ ! unlist(.self$.fields[fields]) %in% colnames(.self$.init.db)]
142 if (length(undefined.fields) > 0)
143 stop(paste0("Column(s) ", paste(unlist(.self$.fields[fields]), collapse = ", "), " is/are undefined in file database."))
144 })
145
146 ################
147 # EXTRACT COLS #
148 ################
149
150 MassFiledbConn$methods( .extract.cols = function(cols, mode = NULL, drop = FALSE, uniq = FALSE, sort = FALSE, max.rows = NA_integer_) {
151
152 x <- NULL
153
154 if ( ! is.null(cols) && ! is.na(cols)) {
155
156 # Init db
157 .self$.init.db()
158
159 # TODO check existence of cols/fields
160
161 # Get db, eventually filtering it.
162 if (is.null(mode))
163 db <- .self$.db
164 else {
165 # Check mode value
166 mode %in% names(.self$.ms.modes) || stop(paste0("Unknown mode value '", mode, "'."))
167 .self$.check.fields(BIODB.MSMODE)
168
169 # Filter on mode
170 db <- .self$.db[.self$.db[[unlist(.self$.fields[BIODB.MSMODE])]] %in% .self$.ms.modes[[mode]], ]
171 }
172
173 # Get subset
174 x <- db[, unlist(.self$.fields[cols]), drop = drop]
175
176 # Rename columns
177 if (is.data.frame(x))
178 colnames(x) <- cols
179
180 # Rearrange
181 if (drop && is.vector(x)) {
182 if (uniq)
183 x <- x[ ! duplicated(x)]
184 if (sort)
185 x <- sort(x)
186 }
187
188 # Cut
189 if ( ! is.na(max.rows))
190 x <- if (is.vector(x)) x[1:max.rows] else x[1:max.rows, ]
191 }
192
193 return(x)
194 })
195
196 #################
197 # GET ENTRY IDS #
198 #################
199
200 MassFiledbConn$methods( getEntryIds = function(type) {
201
202 ids <- NA_character_
203
204 if (type %in% c(BIODB.SPECTRUM, BIODB.COMPOUND))
205 ids <- as.character(.self$.extract.cols(if (type == BIODB.SPECTRUM) BIODB.ACCESSION else BIODB.COMPOUND.ID, drop = TRUE, uniq = TRUE, sort = TRUE))
206
207 return(ids)
208 })
209
210 ##################
211 # GET NB ENTRIES #
212 ##################
213
214 MassFiledbConn$methods( getNbEntries = function(type) {
215 return(length(.self$getEntryIds(type)))
216 })
217
218 ###############################
219 # GET CHROMATOGRAPHIC COLUMNS #
220 ###############################
221
222 # Inherited from MassdbConn.
223 MassFiledbConn$methods( getChromCol = function(compound.ids = NULL) {
224
225 # Extract needed columns
226 db <- .self$.extract.cols(c(BIODB.COMPOUND.ID, BIODB.CHROM.COL))
227
228 # Filter on molecule IDs
229 if ( ! is.null(compound.ids))
230 db <- db[db[[BIODB.COMPOUND.ID]] %in% compound.ids, ]
231
232 # Get column names
233 cols <- db[[BIODB.CHROM.COL]]
234
235 # Remove duplicates
236 cols <- cols[ ! duplicated(cols)]
237
238 # Make data frame
239 chrom.cols <- data.frame(cols, cols, stringsAsFactors = FALSE)
240 colnames(chrom.cols) <- c(BIODB.ID, BIODB.TITLE)
241
242 return(chrom.cols)
243 })
244
245 #################
246 # GET MZ VALUES #
247 #################
248
249 # Inherited from MassdbConn.
250 MassFiledbConn$methods( getMzValues = function(mode = NULL, max.results = NA_integer_) {
251
252 # Get mz values
253 mz <- .self$.extract.cols(BIODB.PEAK.MZ, mode = mode, drop = TRUE, uniq = TRUE, sort = TRUE, max.rows = max.results)
254
255 return(mz)
256 })
257
258 }