Mercurial > repos > prog > lcmsmatching
comparison MassFiledbConn.R @ 6:f86fec07f392 draft default tip
planemo upload commit c397cd8a93953798d733fd62653f7098caac30ce
author | prog |
---|---|
date | Fri, 22 Feb 2019 16:04:22 -0500 |
parents | fb9c0409d85c |
children |
comparison
equal
deleted
inserted
replaced
5:fb9c0409d85c | 6:f86fec07f392 |
---|---|
1 # LCMS File db. | |
2 # In this type of database, a single file is provided in CSV format. Default separator is tabulation. | |
3 # Each line is a MS peak measure, . | |
4 # The file contains molecule and spectrum information. Each spectrum has an accession id. | |
5 | |
6 # TODO Rename setField into setFieldName + addNewField, and setMsMode into setMsModeValue | |
7 | |
8 ############# | |
9 # CONSTANTS # | |
10 ############# | |
11 | |
12 # Default database fields | |
13 .BIODB.DFT.DB.FIELDS <- list() | |
14 for (f in c(BIODB.ACCESSION, BIODB.NAME, BIODB.FULLNAMES, BIODB.COMPOUND.ID, BIODB.MSMODE, BIODB.PEAK.MZEXP, BIODB.PEAK.MZTHEO, BIODB.PEAK.COMP, BIODB.PEAK.ATTR, BIODB.CHROM.COL, BIODB.CHROM.COL.RT, BIODB.FORMULA, BIODB.MASS)) | |
15 .BIODB.DFT.DB.FIELDS[[f]] <- f | |
16 | |
17 ##################### | |
18 # CLASS DECLARATION # | |
19 ##################### | |
20 | |
21 MassFiledbConn <- methods::setRefClass("MassFiledbConn", contains = "MassdbConn", fields = list(.file = "character", .file.sep = "character", .file.quote = "character", .field.multval.sep = 'character', .db = "ANY", .db.orig.colnames = "character", .fields = "list", .ms.modes = "character")) | |
22 | |
23 ############### | |
24 # CONSTRUCTOR # | |
25 ############### | |
26 | |
27 MassFiledbConn$methods( initialize = function(file = NA_character_, file.sep = "\t", file.quote = "\"", ...) { | |
28 | |
29 # Check file | |
30 (! is.null(file) && ! is.na(file)) || stop("You must specify a file database to load.") | |
31 file.exists(file) || stop(paste0("Cannot locate the file database \"", file ,"\".")) | |
32 | |
33 # Set fields | |
34 .db <<- NULL | |
35 .db.orig.colnames <<- NA_character_ | |
36 .file <<- file | |
37 .file.sep <<- file.sep | |
38 .file.quote <<- file.quote | |
39 .fields <<- .BIODB.DFT.DB.FIELDS | |
40 .field.multval.sep <<- ';' | |
41 .ms.modes <<- c(BIODB.MSMODE.NEG, BIODB.MSMODE.POS) | |
42 names(.self$.ms.modes) <- .self$.ms.modes | |
43 | |
44 callSuper(...) | |
45 }) | |
46 | |
47 ###################### | |
48 # Is valid field tag # | |
49 ###################### | |
50 | |
51 MassFiledbConn$methods( isValidFieldTag = function(tag) { | |
52 return (tag %in% names(.self$.fields)) | |
53 }) | |
54 | |
55 ########### | |
56 # INIT DB # | |
57 ########### | |
58 | |
59 MassFiledbConn$methods( .init.db = function() { | |
60 | |
61 if (is.null(.self$.db)) { | |
62 | |
63 # Load database | |
64 .db <<- read.table(.self$.file, sep = .self$.file.sep, .self$.file.quote, header = TRUE, stringsAsFactors = FALSE, row.names = NULL, comment.char = '') | |
65 | |
66 # Save column names | |
67 .db.orig.colnames <<- colnames(.self$.db) | |
68 } | |
69 }) | |
70 | |
71 ############# | |
72 # Set field # | |
73 ############# | |
74 | |
75 MassFiledbConn$methods( setField = function(tag, colname) { | |
76 | |
77 ( ! is.null(tag) && ! is.na(tag)) || stop("No tag specified.") | |
78 ( ! is.null(colname) && ! is.na(colname)) || stop("No column name specified.") | |
79 | |
80 # Load database file | |
81 .self$.init.db() | |
82 | |
83 # Check that this field tag is defined in the fields list | |
84 .self$isValidFieldTag(tag) || stop(paste0("Database field tag \"", tag, "\" is not valid.")) | |
85 | |
86 # Check that columns are defined in database file | |
87 all(colname %in% names(.self$.db)) || stop(paste0("One or more columns among ", paste(colname, collapse = ", "), " are not defined in database file.")) | |
88 | |
89 # Set new definition | |
90 if (length(colname) == 1) | |
91 .fields[[tag]] <<- colname | |
92 else { | |
93 new.col <- paste(colname, collapse = ".") | |
94 .self$.db[[new.col]] <- vapply(seq(nrow(.self$.db)), function(i) { paste(.self$.db[i, colname], collapse = '.') }, FUN.VALUE = '') | |
95 .fields[[tag]] <<- new.col | |
96 } | |
97 | |
98 # Update data frame column names | |
99 colnames(.self$.db) <- vapply(.self$.db.orig.colnames, function(c) if (c %in% .self$.fields) names(.self$.fields)[.self$.fields %in% c] else c, FUN.VALUE = '') | |
100 }) | |
101 | |
102 ###################################### | |
103 # SET FIELD MULTIPLE VALUE SEPARATOR # | |
104 ###################################### | |
105 | |
106 MassFiledbConn$methods( setFieldMultValSep = function(sep) { | |
107 .field.multval.sep <<- sep | |
108 }) | |
109 | |
110 ################ | |
111 # SET MS MODES # | |
112 ################ | |
113 | |
114 MassFiledbConn$methods( setMsMode = function(mode, value) { | |
115 .self$.ms.modes[[mode]] <- value | |
116 }) | |
117 | |
118 ########################## | |
119 # GET ENTRY CONTENT TYPE # | |
120 ########################## | |
121 | |
122 MassFiledbConn$methods( getEntryContentType = function(type) { | |
123 return(BIODB.DATAFRAME) | |
124 }) | |
125 | |
126 ################ | |
127 # CHECK FIELDS # | |
128 ################ | |
129 | |
130 MassFiledbConn$methods( .check.fields = function(fields) { | |
131 | |
132 if (length(fields) ==0 || (length(fields) == 1 && is.na(fields))) | |
133 return | |
134 | |
135 # Check if fields are known | |
136 unknown.fields <- names(.self$.fields)[ ! fields %in% names(.self$.fields)] | |
137 if (length(unknown.fields) > 0) | |
138 stop(paste0("Field(s) ", paste(fields, collapse = ", "), " is/are unknown.")) | |
139 | |
140 # Init db | |
141 .self$.init.db() | |
142 | |
143 # Check if fields are defined in file database | |
144 undefined.fields <- colnames(.self$.db)[ ! fields %in% colnames(.self$.db)] | |
145 if (length(undefined.fields) > 0) | |
146 stop(paste0("Column(s) ", paste(fields), collapse = ", "), " is/are undefined in file database.") | |
147 }) | |
148 | |
149 ########## | |
150 # SELECT # | |
151 ########## | |
152 | |
153 # Select data from database | |
154 MassFiledbConn$methods( .select = function(cols = NULL, mode = NULL, compound.ids = NULL, drop = FALSE, uniq = FALSE, sort = FALSE, max.rows = NA_integer_) { | |
155 | |
156 x <- NULL | |
157 | |
158 # Init db | |
159 .self$.init.db() | |
160 | |
161 # Get db | |
162 db <- .self$.db | |
163 | |
164 # Filter db on mode | |
165 if ( ! is.null(mode) && ! is.na(mode)) { | |
166 | |
167 # Check mode value | |
168 mode %in% names(.self$.ms.modes) || stop(paste0("Unknown mode value '", mode, "'.")) | |
169 .self$.check.fields(BIODB.MSMODE) | |
170 | |
171 # Filter on mode | |
172 db <- db[db[[unlist(.self$.fields[BIODB.MSMODE])]] %in% .self$.ms.modes[[mode]], ] | |
173 } | |
174 | |
175 # Filter db on compound ids | |
176 # TODO | |
177 | |
178 if ( ! is.null(cols) && ! is.na(cols)) | |
179 .self$.check.fields(cols) | |
180 | |
181 # Get subset | |
182 if (is.null(cols) || is.na(cols)) | |
183 x <- db | |
184 else | |
185 x <- db[, unlist(.self$.fields[cols]), drop = drop] | |
186 | |
187 # Rearrange | |
188 if (drop && is.vector(x)) { | |
189 if (uniq) | |
190 x <- x[ ! duplicated(x)] | |
191 if (sort) | |
192 x <- sort(x) | |
193 } | |
194 | |
195 # Cut | |
196 if ( ! is.na(max.rows)) | |
197 x <- if (is.vector(x)) x[1:max.rows] else x[1:max.rows, ] | |
198 | |
199 return(x) | |
200 }) | |
201 | |
202 ################# | |
203 # GET ENTRY IDS # | |
204 ################# | |
205 | |
206 MassFiledbConn$methods( getEntryIds = function(type) { | |
207 | |
208 ids <- NA_character_ | |
209 | |
210 if (type %in% c(BIODB.SPECTRUM, BIODB.COMPOUND)) | |
211 ids <- as.character(.self$.select(cols = if (type == BIODB.SPECTRUM) BIODB.ACCESSION else BIODB.COMPOUND.ID, drop = TRUE, uniq = TRUE, sort = TRUE)) | |
212 | |
213 return(ids) | |
214 }) | |
215 | |
216 ################## | |
217 # GET NB ENTRIES # | |
218 ################## | |
219 | |
220 MassFiledbConn$methods( getNbEntries = function(type) { | |
221 return(length(.self$getEntryIds(type))) | |
222 }) | |
223 | |
224 ############################### | |
225 # GET CHROMATOGRAPHIC COLUMNS # | |
226 ############################### | |
227 | |
228 # Inherited from MassdbConn. | |
229 MassFiledbConn$methods( getChromCol = function(compound.ids = NULL) { | |
230 | |
231 # Extract needed columns | |
232 db <- .self$.select(cols = c(BIODB.COMPOUND.ID, BIODB.CHROM.COL)) | |
233 | |
234 # Filter on molecule IDs | |
235 if ( ! is.null(compound.ids)) | |
236 db <- db[db[[BIODB.COMPOUND.ID]] %in% compound.ids, ] | |
237 | |
238 # Get column names | |
239 cols <- db[[BIODB.CHROM.COL]] | |
240 | |
241 # Remove duplicates | |
242 cols <- cols[ ! duplicated(cols)] | |
243 | |
244 # Make data frame | |
245 chrom.cols <- data.frame(cols, cols, stringsAsFactors = FALSE) | |
246 colnames(chrom.cols) <- c(BIODB.ID, BIODB.TITLE) | |
247 | |
248 return(chrom.cols) | |
249 }) | |
250 | |
251 ################# | |
252 # GET MZ VALUES # | |
253 ################# | |
254 | |
255 # Inherited from MassdbConn. | |
256 MassFiledbConn$methods( getMzValues = function(mode = NULL, max.results = NA_integer_) { | |
257 | |
258 # Get mz values | |
259 mz <- .self$.select(cols = BIODB.PEAK.MZ, mode = mode, drop = TRUE, uniq = TRUE, sort = TRUE, max.rows = max.results) | |
260 | |
261 return(mz) | |
262 }) | |
263 | |
264 ################ | |
265 # GET NB PEAKS # | |
266 ################ | |
267 | |
268 # Inherited from MassdbConn. | |
269 MassFiledbConn$methods( getNbPeaks = function(mode = NULL, compound.ids = NULL) { | |
270 | |
271 # Get peaks | |
272 peaks <- .self$.select(cols = BIODB.PEAK.MZTHEO, mode = mode, compound.ids = compound.ids) | |
273 | |
274 return(length(peaks)) | |
275 }) |