Mercurial > repos > prog > lcmsmatching
comparison MassFiledbConn.R @ 2:20d69a062da3 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
author | prog |
---|---|
date | Thu, 02 Mar 2017 08:55:00 -0500 |
parents | 253d531a0193 |
children | fb9c0409d85c |
comparison
equal
deleted
inserted
replaced
1:253d531a0193 | 2:20d69a062da3 |
---|---|
1 if ( ! exists('MassFiledbConn')) { | 1 # LCMS File db. |
2 | 2 # In this type of database, a single file is provided in CSV format. Default separator is tabulation. |
3 source('MassdbConn.R') | 3 # Each line is a MS peak measure, . |
4 | 4 # The file contains molecule and spectrum information. Each spectrum has an accession id. |
5 # LCMS File db. | 5 |
6 # In this type of database, a single file is provided in CSV format. Default separator is tabulation. | 6 # TODO Rename setField into setFieldName + addNewField, and setMsMode into setMsModeValue |
7 # Each line is a MS peak measure, . | 7 |
8 # The file contains molecule and spectrum information. Each spectrum has an accession id. | 8 ############# |
9 | 9 # CONSTANTS # |
10 # TODO Rename setField into setFieldName + addNewField, and setMsMode into setMsModeValue | 10 ############# |
11 | 11 |
12 ############# | 12 # Default database fields |
13 # CONSTANTS # | 13 .BIODB.DFT.DB.FIELDS <- list() |
14 ############# | 14 for (f in c(BIODB.ACCESSION, BIODB.NAME, BIODB.FULLNAMES, BIODB.COMPOUND.ID, BIODB.MSMODE, BIODB.PEAK.MZEXP, BIODB.PEAK.MZTHEO, BIODB.PEAK.COMP, BIODB.PEAK.ATTR, BIODB.CHROM.COL, BIODB.CHROM.COL.RT, BIODB.FORMULA, BIODB.MASS)) |
15 | 15 .BIODB.DFT.DB.FIELDS[[f]] <- f |
16 # Default database fields | 16 |
17 .BIODB.DFT.DB.FIELDS <- list() | 17 ##################### |
18 for (f in c(BIODB.ACCESSION, BIODB.NAME, BIODB.FULLNAMES, BIODB.COMPOUND.ID, BIODB.MSMODE, BIODB.PEAK.MZ, BIODB.PEAK.COMP, BIODB.PEAK.ATTR, BIODB.CHROM.COL, BIODB.CHROM.COL.RT, BIODB.FORMULA, BIODB.MASS)) | 18 # CLASS DECLARATION # |
19 .BIODB.DFT.DB.FIELDS[[f]] <- f | 19 ##################### |
20 | 20 |
21 ##################### | 21 MassFiledbConn <- methods::setRefClass("MassFiledbConn", contains = "MassdbConn", fields = list(.file = "character", .file.sep = "character", .file.quote = "character", .field.multval.sep = 'character', .db = "ANY", .db.orig.colnames = "character", .fields = "list", .ms.modes = "character")) |
22 # CLASS DECLARATION # | 22 |
23 ##################### | 23 ############### |
24 | 24 # CONSTRUCTOR # |
25 MassFiledbConn <- setRefClass("MassFiledbConn", contains = "MassdbConn", fields = list(.file = "character", .file.sep = "character", .file.quote = "character", .field.multval.sep = 'character', .db = "ANY", .fields = "list", .ms.modes = "character")) | 25 ############### |
26 | 26 |
27 ############### | 27 MassFiledbConn$methods( initialize = function(file = NA_character_, file.sep = "\t", file.quote = "\"", ...) { |
28 # CONSTRUCTOR # | 28 |
29 ############### | 29 # Check file |
30 | 30 (! is.null(file) && ! is.na(file)) || stop("You must specify a file database to load.") |
31 MassFiledbConn$methods( initialize = function(file = NA_character_, file.sep = "\t", file.quote = "\"", ...) { | 31 file.exists(file) || stop(paste0("Cannot locate the file database \"", file ,"\".")) |
32 | 32 |
33 # Check file | 33 # Set fields |
34 (! is.null(file) && ! is.na(file)) || stop("You must specify a file database to load.") | 34 .db <<- NULL |
35 file.exists(file) || stop(paste0("Cannot locate the file database \"", file ,"\".")) | 35 .db.orig.colnames <<- NA_character_ |
36 | 36 .file <<- file |
37 # Set fields | 37 .file.sep <<- file.sep |
38 .db <<- NULL | 38 .file.quote <<- file.quote |
39 .file <<- file | 39 .fields <<- .BIODB.DFT.DB.FIELDS |
40 .file.sep <<- file.sep | 40 .field.multval.sep <<- ';' |
41 .file.quote <<- file.quote | 41 .ms.modes <<- c(BIODB.MSMODE.NEG, BIODB.MSMODE.POS) |
42 .fields <<- .BIODB.DFT.DB.FIELDS | 42 names(.self$.ms.modes) <- .self$.ms.modes |
43 .field.multval.sep <<- ';' | 43 |
44 .ms.modes <<- c(BIODB.MSMODE.NEG, BIODB.MSMODE.POS) | 44 callSuper(...) |
45 names(.self$.ms.modes) <- .self$.ms.modes | 45 }) |
46 | 46 |
47 callSuper(...) | 47 ###################### |
48 }) | 48 # Is valid field tag # |
49 | 49 ###################### |
50 ###################### | 50 |
51 # Is valid field tag # | 51 MassFiledbConn$methods( isValidFieldTag = function(tag) { |
52 ###################### | 52 return (tag %in% names(.self$.fields)) |
53 | 53 }) |
54 MassFiledbConn$methods( isValidFieldTag = function(tag) { | 54 |
55 return (tag %in% names(.self$.fields)) | 55 ########### |
56 }) | 56 # INIT DB # |
57 | 57 ########### |
58 ############# | 58 |
59 # Set field # | 59 MassFiledbConn$methods( .init.db = function() { |
60 ############# | 60 |
61 | 61 if (is.null(.self$.db)) { |
62 MassFiledbConn$methods( setField = function(tag, colname) { | 62 |
63 | 63 # Load database |
64 ( ! is.null(tag) && ! is.na(tag)) || stop("No tag specified.") | 64 .db <<- read.table(.self$.file, sep = .self$.file.sep, .self$.file.quote, header = TRUE, stringsAsFactors = FALSE, row.names = NULL) |
65 ( ! is.null(colname) && ! is.na(colname)) || stop("No column name specified.") | 65 |
66 | 66 # Save column names |
67 # Load database file | 67 .db.orig.colnames <<- colnames(.self$.db) |
68 .self$.init.db() | 68 } |
69 | 69 }) |
70 # Check that this field tag is defined in the fields list | 70 |
71 .self$isValidFieldTag(tag) || stop(paste0("Database field tag \"", tag, "\" is not valid.")) | 71 ############# |
72 | 72 # Set field # |
73 # Check that columns are defined in database file | 73 ############# |
74 all(colname %in% names(.self$.db)) || stop(paste0("One or more columns among ", paste(colname, collapse = ", "), " are not defined in database file.")) | 74 |
75 | 75 MassFiledbConn$methods( setField = function(tag, colname) { |
76 # Set new definition | 76 |
77 if (length(colname) == 1) | 77 ( ! is.null(tag) && ! is.na(tag)) || stop("No tag specified.") |
78 .fields[[tag]] <<- colname | 78 ( ! is.null(colname) && ! is.na(colname)) || stop("No column name specified.") |
79 else { | 79 |
80 new.col <- paste(colname, collapse = ".") | 80 # Load database file |
81 .self$.db[[new.col]] <- vapply(seq(nrow(.self$.db)), function(i) { paste(.self$.db[i, colname], collapse = '.') }, FUN.VALUE = '') | 81 .self$.init.db() |
82 .fields[[tag]] <<- new.col | 82 |
83 } | 83 # Check that this field tag is defined in the fields list |
84 }) | 84 .self$isValidFieldTag(tag) || stop(paste0("Database field tag \"", tag, "\" is not valid.")) |
85 | 85 |
86 ###################################### | 86 # Check that columns are defined in database file |
87 # SET FIELD MULTIPLE VALUE SEPARATOR # | 87 all(colname %in% names(.self$.db)) || stop(paste0("One or more columns among ", paste(colname, collapse = ", "), " are not defined in database file.")) |
88 ###################################### | 88 |
89 | 89 # Set new definition |
90 MassFiledbConn$methods( setFieldMultValSep = function(sep) { | 90 if (length(colname) == 1) |
91 .field.multval.sep <<- sep | 91 .fields[[tag]] <<- colname |
92 }) | 92 else { |
93 | 93 new.col <- paste(colname, collapse = ".") |
94 ################ | 94 .self$.db[[new.col]] <- vapply(seq(nrow(.self$.db)), function(i) { paste(.self$.db[i, colname], collapse = '.') }, FUN.VALUE = '') |
95 # SET MS MODES # | 95 .fields[[tag]] <<- new.col |
96 ################ | 96 } |
97 | 97 |
98 MassFiledbConn$methods( setMsMode = function(mode, value) { | 98 # Update data frame column names |
99 .self$.ms.modes[[mode]] <- value | 99 colnames(.self$.db) <- vapply(.self$.db.orig.colnames, function(c) if (c %in% .self$.fields) names(.self$.fields)[.self$.fields %in% c] else c, FUN.VALUE = '') |
100 }) | 100 }) |
101 | 101 |
102 ########################## | 102 ###################################### |
103 # GET ENTRY CONTENT TYPE # | 103 # SET FIELD MULTIPLE VALUE SEPARATOR # |
104 ########################## | 104 ###################################### |
105 | 105 |
106 MassFiledbConn$methods( getEntryContentType = function(type) { | 106 MassFiledbConn$methods( setFieldMultValSep = function(sep) { |
107 return(BIODB.DATAFRAME) | 107 .field.multval.sep <<- sep |
108 }) | 108 }) |
109 | 109 |
110 ########### | 110 ################ |
111 # INIT DB # | 111 # SET MS MODES # |
112 ########### | 112 ################ |
113 | 113 |
114 MassFiledbConn$methods( .init.db = function() { | 114 MassFiledbConn$methods( setMsMode = function(mode, value) { |
115 | 115 .self$.ms.modes[[mode]] <- value |
116 if (is.null(.self$.db)) { | 116 }) |
117 | 117 |
118 # Load database | 118 ########################## |
119 .db <<- read.table(.self$.file, sep = .self$.file.sep, .self$.file.quote, header = TRUE, stringsAsFactors = FALSE, row.names = NULL) | 119 # GET ENTRY CONTENT TYPE # |
120 | 120 ########################## |
121 # Rename columns | 121 |
122 colnames(.self$.db) <- vapply(colnames(.self$.db), function(c) if (c %in% .self$.fields) names(.self$.fields)[.self$.fields %in% c] else c, FUN.VALUE = '') | 122 MassFiledbConn$methods( getEntryContentType = function(type) { |
123 } | 123 return(BIODB.DATAFRAME) |
124 }) | 124 }) |
125 | 125 |
126 ################ | 126 ################ |
127 # CHECK FIELDS # | 127 # CHECK FIELDS # |
128 ################ | 128 ################ |
129 | 129 |
130 MassFiledbConn$methods( .check.fields = function(fields) { | 130 MassFiledbConn$methods( .check.fields = function(fields) { |
131 | 131 |
132 # Check if fields are known | 132 if (length(fields) ==0 || (length(fields) == 1 && is.na(fields))) |
133 unknown.fields <- names(.self$.fields)[ ! fields %in% names(.self$.fields)] | 133 return |
134 if (length(unknown.fields) > 0) | 134 |
135 stop(paste0("Field(s) ", paste(fields, collapse = ", "), " is/are unknown.")) | 135 # Check if fields are known |
136 | 136 unknown.fields <- names(.self$.fields)[ ! fields %in% names(.self$.fields)] |
137 # Init db | 137 if (length(unknown.fields) > 0) |
138 .self$.init.db() | 138 stop(paste0("Field(s) ", paste(fields, collapse = ", "), " is/are unknown.")) |
139 | 139 |
140 # Check if fields are defined in file database | 140 # Init db |
141 undefined.fields <- colnames(.self$.init.db)[ ! unlist(.self$.fields[fields]) %in% colnames(.self$.init.db)] | 141 .self$.init.db() |
142 if (length(undefined.fields) > 0) | 142 |
143 stop(paste0("Column(s) ", paste(unlist(.self$.fields[fields]), collapse = ", "), " is/are undefined in file database.")) | 143 # Check if fields are defined in file database |
144 }) | 144 undefined.fields <- colnames(.self$.db)[ ! fields %in% colnames(.self$.db)] |
145 | 145 if (length(undefined.fields) > 0) |
146 ################ | 146 stop(paste0("Column(s) ", paste(fields), collapse = ", "), " is/are undefined in file database.") |
147 # EXTRACT COLS # | 147 }) |
148 ################ | 148 |
149 | 149 ########## |
150 MassFiledbConn$methods( .extract.cols = function(cols, mode = NULL, drop = FALSE, uniq = FALSE, sort = FALSE, max.rows = NA_integer_) { | 150 # SELECT # |
151 | 151 ########## |
152 x <- NULL | 152 |
153 | 153 # Select data from database |
154 if ( ! is.null(cols) && ! is.na(cols)) { | 154 MassFiledbConn$methods( .select = function(cols = NULL, mode = NULL, compound.ids = NULL, drop = FALSE, uniq = FALSE, sort = FALSE, max.rows = NA_integer_) { |
155 | 155 |
156 # Init db | 156 x <- NULL |
157 .self$.init.db() | 157 |
158 | 158 # Init db |
159 # TODO check existence of cols/fields | 159 .self$.init.db() |
160 | 160 |
161 # Get db, eventually filtering it. | 161 # Get db |
162 if (is.null(mode)) | 162 db <- .self$.db |
163 db <- .self$.db | 163 |
164 else { | 164 # Filter db on mode |
165 # Check mode value | 165 if ( ! is.null(mode) && ! is.na(mode)) { |
166 mode %in% names(.self$.ms.modes) || stop(paste0("Unknown mode value '", mode, "'.")) | 166 |
167 .self$.check.fields(BIODB.MSMODE) | 167 # Check mode value |
168 | 168 mode %in% names(.self$.ms.modes) || stop(paste0("Unknown mode value '", mode, "'.")) |
169 # Filter on mode | 169 .self$.check.fields(BIODB.MSMODE) |
170 db <- .self$.db[.self$.db[[unlist(.self$.fields[BIODB.MSMODE])]] %in% .self$.ms.modes[[mode]], ] | 170 |
171 } | 171 # Filter on mode |
172 | 172 db <- db[db[[unlist(.self$.fields[BIODB.MSMODE])]] %in% .self$.ms.modes[[mode]], ] |
173 # Get subset | 173 } |
174 x <- db[, unlist(.self$.fields[cols]), drop = drop] | 174 |
175 | 175 # Filter db on compound ids |
176 # Rename columns | 176 # TODO |
177 if (is.data.frame(x)) | 177 |
178 colnames(x) <- cols | 178 if ( ! is.null(cols) && ! is.na(cols)) |
179 | 179 .self$.check.fields(cols) |
180 # Rearrange | 180 |
181 if (drop && is.vector(x)) { | 181 # Get subset |
182 if (uniq) | 182 if (is.null(cols) || is.na(cols)) |
183 x <- x[ ! duplicated(x)] | 183 x <- db |
184 if (sort) | 184 else |
185 x <- sort(x) | 185 x <- db[, unlist(.self$.fields[cols]), drop = drop] |
186 } | 186 |
187 | 187 # Rearrange |
188 # Cut | 188 if (drop && is.vector(x)) { |
189 if ( ! is.na(max.rows)) | 189 if (uniq) |
190 x <- if (is.vector(x)) x[1:max.rows] else x[1:max.rows, ] | 190 x <- x[ ! duplicated(x)] |
191 } | 191 if (sort) |
192 | 192 x <- sort(x) |
193 return(x) | 193 } |
194 }) | 194 |
195 | 195 # Cut |
196 ################# | 196 if ( ! is.na(max.rows)) |
197 # GET ENTRY IDS # | 197 x <- if (is.vector(x)) x[1:max.rows] else x[1:max.rows, ] |
198 ################# | 198 |
199 | 199 return(x) |
200 MassFiledbConn$methods( getEntryIds = function(type) { | 200 }) |
201 | 201 |
202 ids <- NA_character_ | 202 ################# |
203 | 203 # GET ENTRY IDS # |
204 if (type %in% c(BIODB.SPECTRUM, BIODB.COMPOUND)) | 204 ################# |
205 ids <- as.character(.self$.extract.cols(if (type == BIODB.SPECTRUM) BIODB.ACCESSION else BIODB.COMPOUND.ID, drop = TRUE, uniq = TRUE, sort = TRUE)) | 205 |
206 | 206 MassFiledbConn$methods( getEntryIds = function(type) { |
207 return(ids) | 207 |
208 }) | 208 ids <- NA_character_ |
209 | 209 |
210 ################## | 210 if (type %in% c(BIODB.SPECTRUM, BIODB.COMPOUND)) |
211 # GET NB ENTRIES # | 211 ids <- as.character(.self$.select(cols = if (type == BIODB.SPECTRUM) BIODB.ACCESSION else BIODB.COMPOUND.ID, drop = TRUE, uniq = TRUE, sort = TRUE)) |
212 ################## | 212 |
213 | 213 return(ids) |
214 MassFiledbConn$methods( getNbEntries = function(type) { | 214 }) |
215 return(length(.self$getEntryIds(type))) | 215 |
216 }) | 216 ################## |
217 | 217 # GET NB ENTRIES # |
218 ############################### | 218 ################## |
219 # GET CHROMATOGRAPHIC COLUMNS # | 219 |
220 ############################### | 220 MassFiledbConn$methods( getNbEntries = function(type) { |
221 | 221 return(length(.self$getEntryIds(type))) |
222 # Inherited from MassdbConn. | 222 }) |
223 MassFiledbConn$methods( getChromCol = function(compound.ids = NULL) { | 223 |
224 | 224 ############################### |
225 # Extract needed columns | 225 # GET CHROMATOGRAPHIC COLUMNS # |
226 db <- .self$.extract.cols(c(BIODB.COMPOUND.ID, BIODB.CHROM.COL)) | 226 ############################### |
227 | 227 |
228 # Filter on molecule IDs | 228 # Inherited from MassdbConn. |
229 if ( ! is.null(compound.ids)) | 229 MassFiledbConn$methods( getChromCol = function(compound.ids = NULL) { |
230 db <- db[db[[BIODB.COMPOUND.ID]] %in% compound.ids, ] | 230 |
231 | 231 # Extract needed columns |
232 # Get column names | 232 db <- .self$.select(cols = c(BIODB.COMPOUND.ID, BIODB.CHROM.COL)) |
233 cols <- db[[BIODB.CHROM.COL]] | 233 |
234 | 234 # Filter on molecule IDs |
235 # Remove duplicates | 235 if ( ! is.null(compound.ids)) |
236 cols <- cols[ ! duplicated(cols)] | 236 db <- db[db[[BIODB.COMPOUND.ID]] %in% compound.ids, ] |
237 | 237 |
238 # Make data frame | 238 # Get column names |
239 chrom.cols <- data.frame(cols, cols, stringsAsFactors = FALSE) | 239 cols <- db[[BIODB.CHROM.COL]] |
240 colnames(chrom.cols) <- c(BIODB.ID, BIODB.TITLE) | 240 |
241 | 241 # Remove duplicates |
242 return(chrom.cols) | 242 cols <- cols[ ! duplicated(cols)] |
243 }) | 243 |
244 | 244 # Make data frame |
245 ################# | 245 chrom.cols <- data.frame(cols, cols, stringsAsFactors = FALSE) |
246 # GET MZ VALUES # | 246 colnames(chrom.cols) <- c(BIODB.ID, BIODB.TITLE) |
247 ################# | 247 |
248 | 248 return(chrom.cols) |
249 # Inherited from MassdbConn. | 249 }) |
250 MassFiledbConn$methods( getMzValues = function(mode = NULL, max.results = NA_integer_) { | 250 |
251 | 251 ################# |
252 # Get mz values | 252 # GET MZ VALUES # |
253 mz <- .self$.extract.cols(BIODB.PEAK.MZ, mode = mode, drop = TRUE, uniq = TRUE, sort = TRUE, max.rows = max.results) | 253 ################# |
254 | 254 |
255 return(mz) | 255 # Inherited from MassdbConn. |
256 }) | 256 MassFiledbConn$methods( getMzValues = function(mode = NULL, max.results = NA_integer_) { |
257 | 257 |
258 } | 258 # Get mz values |
259 mz <- .self$.select(cols = BIODB.PEAK.MZ, mode = mode, drop = TRUE, uniq = TRUE, sort = TRUE, max.rows = max.results) | |
260 | |
261 return(mz) | |
262 }) | |
263 | |
264 ################ | |
265 # GET NB PEAKS # | |
266 ################ | |
267 | |
268 # Inherited from MassdbConn. | |
269 MassFiledbConn$methods( getNbPeaks = function(mode = NULL, compound.ids = NULL) { | |
270 | |
271 # Get peaks | |
272 peaks <- .self$.select(cols = BIODB.PEAK.MZTHEO, mode = mode, compound.ids = compound.ids) | |
273 | |
274 return(length(peaks)) | |
275 }) |