0
|
1
|
|
2
|
19
|
3 assign("TOOL_NAME", "XSeekerPreparator", envir = globalenv())
|
|
4 lockBinding("TOOL_NAME", globalenv())
|
|
5 assign("VERSION", "1.3.0", envir = globalenv())
|
|
6 lockBinding("VERSION", globalenv())
|
|
7 assign("DEBUG_FAST", FALSE, envir = globalenv())
|
|
8 lockBinding("DEBUG_FAST", globalenv())
|
|
9 assign("DEBUG_FAST_IGNORE_SLOW_OP", DEBUG_FAST, envir = globalenv())
|
|
10 lockBinding("DEBUG_FAST_IGNORE_SLOW_OP", globalenv())
|
|
11 assign("PROCESS_SMOL_BATCH", DEBUG_FAST, envir = globalenv())
|
|
12 lockBinding("PROCESS_SMOL_BATCH", globalenv())
|
|
13 assign("FAST_FEATURE_RATIO", 10, envir = globalenv())
|
|
14 lockBinding("FAST_FEATURE_RATIO", globalenv())
|
|
15 assign("OUTPUT_SPECIFIC_TOOL", "XSeeker_Galaxy", envir = globalenv())
|
|
16 lockBinding("OUTPUT_SPECIFIC_TOOL", globalenv())
|
14
|
17
|
19
|
18 assign(
|
|
19 "ENRICHED_RDATA_VERSION",
|
|
20 paste(VERSION, OUTPUT_SPECIFIC_TOOL, sep = "-"),
|
|
21 envir = globalenv()
|
|
22 )
|
|
23 lockBinding("ENRICHED_RDATA_VERSION", globalenv())
|
|
24 assign("ENRICHED_RDATA_DOC", sprintf("
|
0
|
25 Welcome to the enriched <Version %s> of the output of CAMERA/xcms.
|
|
26 This doc was generated by the tool: %s - Version %s
|
|
27 To show the different variables contained in this rdata, type:
|
|
28 - `load('this_rdata.rdata', rdata_env <- new.env())`
|
|
29 - `names(rdata_env)`
|
|
30
|
|
31 Sections
|
|
32 ######
|
|
33
|
|
34
|
|
35 This tools helpers
|
|
36 ------
|
|
37 The version number is somewhat special because the evolution of the
|
|
38 rdata's format is non-linear.
|
|
39 There may be different branches, each evolving separatly.
|
|
40 To reflect these branches's diversions, there may be a prepended
|
|
41 branch name following this format:
|
|
42 major.minor.patch-branch_name
|
|
43 Like this, we can process rdata with the same tool, and output
|
|
44 rdata formated differently, for each tool.
|
|
45
|
|
46
|
|
47 - enriched_rdata:
|
|
48 - Description: flag created by that tool to tell it was enriched.
|
|
49 - Retrieval method: enriched_rdata <- TRUE
|
|
50
|
|
51 - enriched_rdata_version:
|
|
52 - Description: A flag created by that tool to tell which version of
|
|
53 this tool has enriched the rdata.
|
19
|
54 - Retrieval method:
|
|
55 enriched_rdata_version <- sprintf(
|
|
56 \"%s\",
|
|
57 ENRICHED_RDATA_VERSION
|
|
58 )
|
0
|
59
|
|
60 - enriched_rdata_doc:
|
|
61 - Description: Contains the documentation string.
|
|
62
|
|
63 Data from original mzxml file
|
|
64 ------
|
|
65 - tic:
|
|
66 - Description: Those are the tic values from the original mzxml
|
|
67 file, extracted using xcms 2.
|
|
68 - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@tic
|
|
69 - xcms version: 2.0
|
|
70
|
|
71 - mz:
|
|
72 - Description: Those are the m/z values from the original mzxml
|
|
73 file, extracted using xcms 2.
|
|
74 - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@env$mz
|
|
75 - xcms version: 2.0
|
|
76
|
|
77 - scanindex:
|
|
78 - Description: Those are the scanindex values from the original mzxml
|
|
79 file, extracted using xcms 2.
|
|
80 - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@scanindex
|
|
81 - xcms version: 2.0
|
|
82
|
|
83 - scantime:
|
|
84 - Description: Those are the scantime values from the original mzxml
|
|
85 file, extracted using xcms 2.
|
|
86 - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@scantime
|
|
87 - xcms version: 2.0
|
|
88
|
|
89 - intensity:
|
|
90 - Description: Those are the intensity values from the original mzxml
|
|
91 file, extracted using xcms 2.
|
|
92 - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@env$intensity
|
|
93 - xcms version: 2.0
|
|
94
|
|
95 - polarity:
|
|
96 - Description: Those are the polarity values from the original mzxml
|
|
97 file, extracted using xcms 2.
|
19
|
98 - Retrieval method:
|
|
99 as.character(xcms::xcmsRaw(
|
|
100 'original_file.mzxml'
|
|
101 )@polarity[[1]])
|
0
|
102 - xcms version: 2.0
|
|
103
|
|
104 Data taken from incoming rdata
|
|
105 ------
|
|
106 - variableMetadata:
|
|
107 - Description: Unmodified copy of variableMetadata from incoming rdata.
|
|
108 - Retrieval method: rdata_file$variableMetadata
|
|
109
|
|
110 - process_params:
|
|
111 - Description: Those are the processing parameters values from the
|
|
112 curent rdata. They have been simplified to allow easy access like:
|
|
113 for (params in process_params) {
|
|
114 if (params[[\"xfunction\"]] == \"annotatediff\") {
|
|
115 process_peak_picking_params(params)
|
|
116 }
|
|
117 }
|
|
118 - Retrieval method:
|
|
119 ## just he same list, but simplified
|
|
120 process_params <- list()
|
|
121 for (list_name in names(rdata_file$listOFlistArguments)) {
|
|
122 param_list <- list()
|
19
|
123 for (param_name in names(
|
|
124 rdata_file$listOFlistArguments[[list_name]]
|
|
125 )) {
|
|
126 param_list[[param_name]] <- rdata_file$listOFlistArguments[[
|
|
127 list_name
|
|
128 ]][[param_name]]
|
0
|
129 }
|
|
130 process_params[[length(process_params)+1]] <- param_list
|
|
131 }
|
19
|
132 ", ENRICHED_RDATA_VERSION, TOOL_NAME, VERSION, ENRICHED_RDATA_VERSION),
|
|
133 envir = globalenv())
|
|
134 lockBinding("ENRICHED_RDATA_DOC", globalenv())
|
0
|
135
|
|
136
|
|
137
|
|
138 get_models <- function(path) {
|
|
139 if (is.null(path)) {
|
|
140 stop("No models to define the database schema")
|
|
141 } else {
|
|
142 message(sprintf("Loading models from %s", path))
|
|
143 }
|
|
144 ## galaxy mangles the "@" to a "__at__"
|
|
145 if (substr(path, 1, 9) == "git__at__") {
|
19
|
146 path <- sub("^git__at__", "git@", path, perl = TRUE)
|
0
|
147 }
|
|
148 if (
|
|
149 substr(path, 1, 4) == "git@"
|
19
|
150 || substr(path, length(path) - 4, 4) == ".git"
|
0
|
151 ) {
|
19
|
152 return(get_models_from_git(path))
|
0
|
153 }
|
|
154 if (substr(path, 1, 4) == "http") {
|
19
|
155 return(get_models_from_url(path))
|
0
|
156 }
|
19
|
157 return(source(path)$value)
|
0
|
158 }
|
|
159
|
19
|
160 get_models_from_git <- function(url, target_file = "models.R", rm = TRUE) {
|
0
|
161 tmp <- tempdir()
|
|
162 message(sprintf("Cloning %s", url))
|
|
163 system2("git", c("clone", url, tmp))
|
|
164 result <- search_tree(file.path(tmp, dir), target_file)
|
|
165 if (!is.null(result)) {
|
|
166 models <- source(result)$value
|
|
167 if (rm) {
|
19
|
168 unlink(tmp, recursive = TRUE)
|
0
|
169 }
|
19
|
170 return(models)
|
0
|
171 }
|
|
172 if (rm) {
|
19
|
173 unlink(tmp, recursive = TRUE)
|
0
|
174 }
|
|
175 stop(sprintf(
|
|
176 "Could not find any file named \"%s\" in this repo",
|
|
177 target_file
|
|
178 ))
|
|
179 }
|
|
180
|
19
|
181 get_models_from_url <- function(url, target_file = "models.R", rm = TRUE) {
|
0
|
182 tmp <- tempdir()
|
|
183 message(sprintf("Downloading %s", url))
|
|
184 result <- file.path(tmp, target_file)
|
19
|
185 if (download.file(url, destfile = result) == 0) {
|
0
|
186 models <- source(result)$value
|
|
187 if (rm) {
|
19
|
188 unlink(tmp, recursive = TRUE)
|
0
|
189 }
|
19
|
190 return(models)
|
0
|
191 }
|
|
192 if (rm) {
|
19
|
193 unlink(tmp, recursive = TRUE)
|
0
|
194 }
|
|
195 stop("Could not download any file at this adress.")
|
|
196 }
|
|
197
|
|
198 search_tree <- function(path, target) {
|
|
199 target <- tolower(target)
|
|
200 for (file in list.files(path)) {
|
20
|
201 if (fs::is.dir(file)) {
|
0
|
202 result <- search_tree(file.path(path, file), target)
|
|
203 if (!is.null(result)) {
|
19
|
204 return(result)
|
0
|
205 }
|
|
206 } else if (tolower(file) == target) {
|
19
|
207 return(file.path(path, file))
|
0
|
208 }
|
|
209 }
|
19
|
210 return(NULL)
|
0
|
211 }
|
|
212
|
|
213 create_database <- function(orm) {
|
19
|
214 orm$recreate_database(no_exists = FALSE)
|
0
|
215 set_database_version(orm, "created")
|
|
216 }
|
|
217
|
|
218 insert_adducts <- function(orm) {
|
|
219 message("Creating adducts...")
|
|
220 adducts <- list(
|
19
|
221 list("[M-H2O-H]-", 1, -1, -48.992020312000001069, 1, 0, 0.5, "H0", "H1O3"),
|
|
222 list("[M-H-Cl+O]-", 1, -1, -19.981214542000000022, 2, 0, 0.5, "O1", "H1Cl1"),
|
|
223 list("[M-Cl+O]-", 1, -1, -18.973389510000000512, 3, 0, 0.5, "O1", "Cl1"),
|
|
224 list("[M-3H]3-", 1, -3, -3.0218293560000000219, 4, 0, 1.0, "H0", "H3"),
|
|
225 list("[2M-3H]3-", 2, -3, -3.0218293560000000219, 4, 0, 0.5, "H0", "H3"),
|
|
226 list("[3M-3H]3-", 3, -3, -3.0218293560000000219, 4, 0, 0.5, "H0", "H3"),
|
|
227 list("[M-2H]2-", 1, -2, -2.0145529039999998666, 5, 0, 1.0, "H0", "H2"),
|
|
228 list("[2M-2H]2-", 2, -2, -2.0145529039999998666, 5, 0, 0.5, "H0", "H2"),
|
|
229 list("[3M-2H]2-", 3, -2, -2.0145529039999998666, 5, 0, 0.5, "H0", "H2"),
|
|
230 list("[M-H]-", 1, -1, -1.0072764519999999333, 6, 1, 1.0, "H0", "H1"),
|
|
231 list("[2M-H]-", 2, -1, -1.0072764519999999333, 6, 0, 0.5, "H0", "H1"),
|
|
232 list("[3M-H]-", 3, -1, -1.0072764519999999333, 6, 0, 0.5, "H0", "H1"),
|
|
233 list("[M]+", 1, 1, -0.00054858000000000000945, 7, 1, 1.0, "H0", "H0"),
|
|
234 list("[M]-", 1, -1, 0.00054858000000000000945, 8, 1, 1.0, "H0", "H0"),
|
|
235 list("[M+H]+", 1, 1, 1.0072764519999999333, 9, 1, 1.0, "H1", "H0"),
|
|
236 list("[2M+H]+", 2, 1, 1.0072764519999999333, 9, 0, 0.5, "H1", "H0"),
|
|
237 list("[3M+H]+", 3, 1, 1.0072764519999999333, 9, 0, 0.25, "H1", "H0"),
|
|
238 list("[M+2H]2+", 1, 2, 2.0145529039999998666, 10, 0, 0.75, "H2", "H0"),
|
|
239 list("[2M+2H]2+", 2, 2, 2.0145529039999998666, 10, 0, 0.5, "H2", "H0"),
|
|
240 list("[3M+2H]2+", 3, 2, 2.0145529039999998666, 10, 0, 0.25, "H2", "H0"),
|
|
241 list("[M+3H]3+", 1, 3, 3.0218293560000000219, 11, 0, 0.75, "H3", "H0"),
|
|
242 list("[2M+3H]3+", 2, 3, 3.0218293560000000219, 11, 0, 0.5, "H3", "H0"),
|
|
243 list("[3M+3H]3+", 3, 3, 3.0218293560000000219, 11, 0, 0.25, "H3", "H0"),
|
|
244 list("[M-2H+NH4]-", 1, -1, 16.019272654000001665, 12, 0, 0.25, "N1H4", "H2"),
|
|
245 list("[2M-2H+NH4]-", 2, -1, 16.019272654000001665, 12, 0, 0.0, "N1H4", "H2"),
|
|
246 list("[3M-2H+NH4]-", 3, -1, 16.019272654000001665, 12, 0, 0.25, "N1H4", "H2"),
|
|
247 list("[M+NH4]+", 1, 1, 18.033825558000000199, 13, 1, 1.0, "N1H4", "H0"),
|
|
248 list("[2M+NH4]+", 2, 1, 18.033825558000000199, 13, 0, 0.5, "N1H4", "H0"),
|
|
249 list("[3M+NH4]+", 3, 1, 18.033825558000000199, 13, 0, 0.25, "N1H4", "H0"),
|
|
250 list("[M+H+NH4]2+", 1, 2, 19.041102009999999467, 14, 0, 0.5, "N1H5", "H0"),
|
|
251 list("[2M+H+NH4]2+", 2, 2, 19.041102009999999467, 14, 0, 0.5, "N1H5", "H0"),
|
|
252 list("[3M+H+NH4]2+", 3, 2, 19.041102009999999467, 14, 0, 0.25, "N1H5", "H0"),
|
|
253 list("[M+Na-2H]-", 1, -1, 20.974668176000001551, 15, 0, 0.75, "Na1", "H2"),
|
|
254 list("[2M-2H+Na]-", 2, -1, 20.974668176000001551, 15, 0, 0.25, "Na1", "H2"),
|
|
255 list("[3M-2H+Na]-", 3, -1, 20.974668176000001551, 15, 0, 0.25, "Na1", "H2"),
|
|
256 list("[M+Na]+", 1, 1, 22.989221080000000086, 16, 1, 1.0, "Na1", "H0"),
|
|
257 list("[2M+Na]+", 2, 1, 22.989221080000000086, 16, 0, 0.5, "Na1", "H0"),
|
|
258 list("[3M+Na]+", 3, 1, 22.989221080000000086, 16, 0, 0.25, "Na1", "H0"),
|
|
259 list("[M+H+Na]2+", 1, 2, 23.996497531999999353, 17, 0, 0.5, "Na1H1", "H0"),
|
|
260 list("[2M+H+Na]2+", 2, 2, 23.996497531999999353, 17, 0, 0.5, "Na1H1", "H0"),
|
|
261 list("[3M+H+Na]2+", 3, 2, 23.996497531999999353, 17, 0, 0.25, "Na1H1", "H0"),
|
|
262 list("[M+2H+Na]3+", 1, 3, 25.003773983999998619, 18, 0, 0.25, "H2Na1", "H0"),
|
|
263 list("[M+CH3OH+H]+", 1, 1, 33.033491200000000276, 19, 0, 0.25, "C1O1H5", "H0"),
|
|
264 list("[M-H+Cl]2-", 1, -2, 33.962124838000001148, 20, 0, 1.0, "Cl1", "H1"),
|
|
265 list("[2M-H+Cl]2-", 2, -2, 33.962124838000001148, 20, 0, 0.5, "Cl1", "H1"),
|
|
266 list("[3M-H+Cl]2-", 3, -2, 33.962124838000001148, 20, 0, 0.5, "Cl1", "H1"),
|
|
267 list("[M+Cl]-", 1, -1, 34.969401290000000416, 21, 1, 1.0, "Cl1", "H0"),
|
|
268 list("[2M+Cl]-", 2, -1, 34.969401290000000416, 21, 0, 0.5, "Cl1", "H0"),
|
|
269 list("[3M+Cl]-", 3, -1, 34.969401290000000416, 21, 0, 0.5, "Cl1", "H0"),
|
|
270 list("[M+K-2H]-", 1, -1, 36.948605415999999479, 22, 0, 0.5, "K1", "H2"),
|
|
271 list("[2M-2H+K]-", 2, -1, 36.948605415999999479, 22, 0, 0.0, "K1", "H2"),
|
|
272 list("[3M-2H+K]-", 3, -1, 36.948605415999999479, 22, 0, 0.0, "K1", "H2"),
|
|
273 list("[M+K]+", 1, 1, 38.963158319999998013, 23, 1, 1.0, "K1", "H0"),
|
|
274 list("[2M+K]+", 2, 1, 38.963158319999998013, 23, 0, 0.5, "K1", "H0"),
|
|
275 list("[3M+K]+", 3, 1, 38.963158319999998013, 23, 0, 0.25, "K1", "H0"),
|
|
276 list("[M+H+K]2+", 1, 2, 39.970434771999997281, 24, 0, 0.5, "K1H1", "H0"),
|
|
277 list("[2M+H+K]2+", 2, 2, 39.970434771999997281, 24, 0, 0.5, "K1H1", "H0"),
|
|
278 list("[3M+H+K]2+", 3, 2, 39.970434771999997281, 24, 0, 0.25, "K1H1", "H0"),
|
|
279 list("[M+ACN+H]+", 1, 1, 42.033825557999996646, 25, 0, 0.25, "C2H4N1", "H0"),
|
|
280 list("[2M+ACN+H]+", 2, 1, 42.033825557999996646, 25, 0, 0.25, "C2H4N1", "H0"),
|
|
281 list("[M+2Na-H]+", 1, 1, 44.971165708000000902, 26, 0, 0.5, "Na2", "H1"),
|
|
282 list("[2M+2Na-H]+", 2, 1, 44.971165708000000902, 26, 0, 0.25, "Na2", "H1"),
|
|
283 list("[3M+2Na-H]+", 3, 1, 44.971165708000000902, 26, 0, 0.25, "Na2", "H1"),
|
|
284 list("[2M+FA-H]-", 2, -1, 44.998202851999998586, 27, 0, 0.25, "C1O2H2", "H1"),
|
|
285 list("[M+FA-H]-", 1, -1, 44.998202851999998586, 27, 0, 0.5, "C1O2H2", "H1"),
|
|
286 list("[M+2Na]2+", 1, 2, 45.978442160000000172, 28, 0, 0.5, "Na2", "H0"),
|
|
287 list("[2M+2Na]2+", 2, 2, 45.978442160000000172, 28, 0, 0.5, "Na2", "H0"),
|
|
288 list("[3M+2Na]2+", 3, 2, 45.978442160000000172, 28, 0, 0.25, "Na2", "H0"),
|
|
289 list("[M+H+2Na]3+", 1, 3, 46.985718611999999438, 29, 0, 0.25, "H1Na2", "H0"),
|
|
290 list("[M+H+FA]+", 1, 1, 47.012755755999997122, 30, 0, 0.25, "C1O2H3", "H0"),
|
|
291 list("[M+Hac-H]-", 1, -1, 59.013852915999997607, 31, 0, 0.25, "C2O2H4", "H1"),
|
|
292 list("[2M+Hac-H]-", 2, -1, 59.013852915999997607, 31, 0, 0.25, "C2O2H4", "H1"),
|
|
293 list("[M+IsoProp+H]+", 1, 1, 61.064791327999998317, 32, 0, 0.25, "C3H9O1", "H0"),
|
|
294 list("[M+Na+K]2+", 1, 2, 61.9523793999999981, 33, 0, 0.5, "Na1K1", "H0"),
|
|
295 list("[2M+Na+K]2+", 2, 2, 61.9523793999999981, 33, 0, 0.5, "Na1K1", "H0"),
|
|
296 list("[3M+Na+K]2+", 3, 2, 61.9523793999999981, 33, 0, 0.25, "Na1K1", "H0"),
|
|
297 list("[M+NO3]-", 1, -1, 61.988366450000000895, 34, 0, 0.5, "N1O3", "H0"),
|
|
298 list("[M+ACN+Na]+", 1, 1, 64.015770185999997464, 35, 0, 0.25, "C2H3N1Na1", "H0"),
|
|
299 list("[2M+ACN+Na]+", 2, 1, 64.015770185999997464, 35, 0, 0.25, "C2H3N1Na1", "H0"),
|
|
300 list("[M+NH4+FA]+", 1, 1, 64.039304861999994502, 36, 0, 0.25, "N1C1O2H6", "H0"),
|
|
301 list("[M-2H+Na+FA]-", 1, -1, 66.980147479999999405, 37, 0, 0.5, "NaC1O2H2", "H2"),
|
|
302 list("[M+3Na]3+", 1, 3, 68.967663239999993153, 38, 0, 0.25, "Na3", "H0"),
|
|
303 list("[M+Na+FA]+", 1, 1, 68.99470038399999794, 39, 0, 0.25, "Na1C1O2H2", "H0"),
|
|
304 list("[M+2Cl]2-", 1, -2, 69.938802580000000832, 40, 0, 1.0, "Cl2", "H0"),
|
|
305 list("[2M+2Cl]2-", 2, -2, 69.938802580000000832, 40, 0, 0.5, "Cl2", "H0"),
|
|
306 list("[3M+2Cl]2-", 3, -2, 69.938802580000000832, 40, 0, 0.5, "Cl2", "H0"),
|
|
307 list("[M+2K-H]+", 1, 1, 76.919040187999996758, 41, 0, 0.5, "K2", "H1"),
|
|
308 list("[2M+2K-H]+", 2, 1, 76.919040187999996758, 41, 0, 0.25, "K2", "H1"),
|
|
309 list("[3M+2K-H]+", 3, 1, 76.919040187999996758, 41, 0, 0.25, "K2", "H1"),
|
|
310 list("[M+2K]2+", 1, 2, 77.926316639999996028, 42, 0, 0.5, "K2", "H0"),
|
|
311 list("[2M+2K]2+", 2, 2, 77.926316639999996028, 42, 0, 0.5, "K2", "H0"),
|
|
312 list("[3M+2K]2+", 3, 2, 77.926316639999996028, 42, 0, 0.25, "K2", "H0"),
|
|
313 list("[M+Br]-", 1, -1, 78.918886479999997619, 43, 1, 1.0, "Br1", "H0"),
|
|
314 list("[M+Cl+FA]-", 1, -1, 80.974880593999998268, 44, 0, 0.5, "Cl1C1O2H2", "H0"),
|
|
315 list("[M+AcNa-H]-", 1, -1, 80.995797543999998426, 45, 0, 0.25, "C2H3Na1O2", "H1"),
|
|
316 list("[M+2ACN+2H]2+", 1, 2, 84.067651115999993292, 46, 0, 0.25, "C4H8N2", "H0"),
|
|
317 list("[M+K+FA]+", 1, 1, 84.968637623999995868, 47, 0, 0.25, "K1C1O2H2", "H0"),
|
|
318 list("[M+Cl+Na+FA-H]-", 1, -1, 102.95682522200000619, 48, 0, 0.5, "Cl1Na1C1O2H2", "H1"),
|
|
319 list("[2M+3H2O+2H]+", 2, 1, 104.03153939599999944, 49, 0, 0.25, "H8O6", "H0"),
|
|
320 list("[M+TFA-H]-", 1, -1, 112.98558742000000165, 50, 0, 0.5, "C2F3O2H1", "H1"),
|
|
321 list("[M+H+TFA]+", 1, 1, 115.00014032400000019, 51, 0, 0.25, "C2F3O2H2", "H0"),
|
|
322 list("[M+3ACN+2H]2+", 1, 2, 125.09420022199999778, 52, 0, 0.25, "C6H11N3", "H0"),
|
|
323 list("[M+NH4+TFA]+", 1, 1, 132.02668943000000468, 53, 0, 0.25, "N1C2F3O2H5", "H0"),
|
|
324 list("[M+Na+TFA]+", 1, 1, 136.98208495200000811, 54, 0, 0.25, "Na1C2F3O2H1", "H0"),
|
|
325 list("[M+Cl+TFA]-", 1, -1, 148.96226516199999423, 55, 0, 0.5, "Cl1C2F3O2H1", "H0"),
|
|
326 list("[M+K+TFA]+", 1, 1, 152.95602219200000604, 56, 0, 0.25, "K1C2F3O2H1","H0")
|
0
|
327 )
|
|
328 dummy_adduct <- orm$adduct()
|
|
329 for (adduct in adducts) {
|
|
330 i <- 0
|
19
|
331 dummy_adduct$set_name(adduct[[i <- i + 1]])
|
|
332 dummy_adduct$set_multi(adduct[[i <- i + 1]])
|
|
333 dummy_adduct$set_charge(adduct[[i <- i + 1]])
|
|
334 dummy_adduct$set_mass(adduct[[i <- i + 1]])
|
|
335 dummy_adduct$set_oidscore(adduct[[i <- i + 1]])
|
|
336 dummy_adduct$set_quasi(adduct[[i <- i + 1]])
|
|
337 dummy_adduct$set_ips(adduct[[i <- i + 1]])
|
|
338 dummy_adduct$set_formula_add(adduct[[i <- i + 1]])
|
|
339 dummy_adduct$set_formula_ded(adduct[[i <- i + 1]])
|
11
|
340 invisible(dummy_adduct$save())
|
19
|
341 dummy_adduct$clear(unset_id = TRUE)
|
0
|
342 }
|
|
343 message("Adducts created")
|
|
344 }
|
|
345
|
19
|
346 insert_base_data <- function(orm, path, archetype = FALSE) {
|
0
|
347 if (archetype) {
|
|
348 ## not implemented yet
|
19
|
349 return()
|
0
|
350 }
|
|
351 base_data <- readLines(path)
|
19
|
352 for (sql in strsplit(paste(base_data, collapse = " "), ";")[[1]]) {
|
0
|
353 orm$execute(sql)
|
|
354 }
|
|
355 set_database_version(orm, "enriched")
|
|
356 }
|
|
357
|
|
358 insert_compounds <- function(orm, compounds_path) {
|
19
|
359 compounds <- read.csv(file = compounds_path, sep = "\t")
|
0
|
360 if (is.null(compounds <- translate_compounds(compounds))) {
|
|
361 stop("Could not find asked compound's attributes in csv file.")
|
|
362 }
|
|
363 dummy_compound <- orm$compound()
|
|
364 compound_list <- list()
|
|
365 for (i in seq_len(nrow(compounds))) {
|
|
366 dummy_compound$set_mz(compounds[i, "mz"])
|
|
367 dummy_compound$set_name(compounds[i, "name"])
|
|
368 dummy_compound$set_common_name(compounds[i, "common_name"])
|
|
369 dummy_compound$set_formula(compounds[i, "formula"])
|
19
|
370 compound_list[[length(compound_list) + 1]] <- as.list(
|
0
|
371 dummy_compound,
|
|
372 c("mz", "name", "common_name", "formula")
|
|
373 )
|
19
|
374 dummy_compound$clear(unset_id = TRUE)
|
0
|
375 }
|
19
|
376 invisible(dummy_compound$save(bulk = compound_list))
|
0
|
377 }
|
|
378
|
|
379 translate_compounds <- function(compounds) {
|
|
380 recognized_headers <- list(
|
19
|
381 c(
|
|
382 "HMDB_ID", "MzBank", "X.M.H..", "X.M.H...1",
|
|
383 "MetName", "ChemFormula", "INChIkey"
|
|
384 )
|
0
|
385 )
|
|
386 header_translators <- list(
|
|
387 hmdb_header_translator
|
|
388 )
|
|
389 for (index in seq_along(recognized_headers)) {
|
|
390 headers <- recognized_headers[[index]]
|
|
391 if (identical(colnames(compounds), headers)) {
|
19
|
392 return(header_translators[[index]](compounds))
|
0
|
393 }
|
|
394 }
|
|
395 if (is.null(translator <- guess_translator(colnames(compounds)))) {
|
19
|
396 return(NULL)
|
0
|
397 }
|
19
|
398 return(csv_header_translator(translator, compounds))
|
0
|
399 }
|
|
400
|
|
401 guess_translator <- function(header) {
|
|
402 result <- list(
|
19
|
403 mz = NULL,
|
|
404 name = NULL,
|
|
405 common_name = NULL,
|
20
|
406 formula = NULL
|
0
|
407 )
|
|
408 asked_cols <- names(result)
|
|
409 for (asked_col in asked_cols) {
|
|
410 for (col in header) {
|
|
411 if ((twisted <- tolower(col)) == asked_col
|
|
412 || gsub("-", "_", twisted) == asked_col
|
|
413 || gsub(" ", "_", twisted) == asked_col
|
|
414 || tolower(gsub("(.)([A-Z])", "\\1_\\2", col)) == asked_col
|
|
415 ) {
|
|
416 result[[asked_col]] <- col
|
|
417 next
|
|
418 }
|
|
419 }
|
|
420 }
|
|
421 if (any(mapply(is.null, result))) {
|
19
|
422 return(NULL)
|
0
|
423 }
|
19
|
424 return(result)
|
0
|
425 }
|
|
426
|
|
427 hmdb_header_translator <- function(compounds) {
|
19
|
428 return(csv_header_translator(
|
0
|
429 list(
|
19
|
430 HMDB_ID = "HMDB_ID",
|
|
431 mz = "MzBank",
|
|
432 name = "MetName",
|
|
433 common_name = "MetName",
|
|
434 formula = "ChemFormula",
|
|
435 inchi_key = "INChIkey"
|
0
|
436 ), compounds
|
|
437 ))
|
|
438 }
|
|
439
|
|
440 csv_header_translator <- function(translation_table, csv) {
|
|
441 header_names <- names(translation_table)
|
19
|
442 result <- data.frame(seq_len(nrow(csv)))
|
0
|
443 for (i in seq_along(header_names)) {
|
|
444 result[, header_names[[i]]] <- csv[, translation_table[[i]]]
|
|
445 }
|
|
446 result[, "mz"] <- as.numeric(result[, "mz"])
|
19
|
447 return(result)
|
0
|
448 }
|
|
449
|
|
450 set_database_version <- function(orm, version) {
|
|
451 orm$set_tag(
|
|
452 version,
|
19
|
453 tag_name = "database_version",
|
|
454 tag_table_name = "XSeeker_tagging_table"
|
0
|
455 )
|
|
456 }
|
|
457
|
|
458 process_rdata <- function(orm, rdata, options) {
|
|
459 mzml_tmp_dir <- gather_mzml_files(rdata)
|
|
460 samples <- names(rdata$singlefile)
|
|
461 if (!is.null(options$samples)) {
|
|
462 samples <- samples[options$samples %in% samples]
|
|
463 }
|
|
464 show_percent <- (
|
|
465 is.null(options$`not-show-percent`)
|
|
466 || options$`not-show-percent` == FALSE
|
|
467 )
|
|
468 error <- tryCatch({
|
|
469 process_sample_list(
|
|
470 orm, rdata, samples,
|
19
|
471 show_percent = show_percent,
|
20
|
472 file_grouping_var = options$class,
|
|
473 options = options
|
0
|
474 )
|
|
475 NULL
|
19
|
476 }, error = function(e) {
|
20
|
477 return(e)
|
0
|
478 })
|
|
479 if (!is.null(mzml_tmp_dir)) {
|
19
|
480 unlink(mzml_tmp_dir, recursive = TRUE)
|
0
|
481 }
|
|
482 if (!is.null(error)) {
|
|
483 stop(error)
|
|
484 }
|
20
|
485 return(!is.null(error))
|
0
|
486 }
|
|
487
|
|
488 gather_mzml_files <- function(rdata) {
|
|
489 if (is.null(rdata$singlefile)) {
|
|
490 message("Extracting mxml files")
|
|
491 tmp <- tempdir()
|
19
|
492 rdata$singlefile <- utils::unzip(rdata$zipfile, exdir = tmp)
|
|
493 names(rdata$singlefile) <- tools::file_path_sans_ext(
|
|
494 basename(rdata$singlefile)
|
|
495 )
|
0
|
496 message("Extracted")
|
19
|
497 return(tmp)
|
0
|
498 } else {
|
19
|
499 message(sprintf(
|
|
500 "Not a zip file, loading files directly from path: %s",
|
|
501 paste(rdata$singlefile, collapse = " ; ")
|
|
502 ))
|
0
|
503 }
|
19
|
504 return(NULL)
|
0
|
505 }
|
|
506
|
19
|
507 process_sample_list <- function(
|
|
508 orm,
|
|
509 rdata,
|
|
510 sample_names,
|
|
511 show_percent,
|
20
|
512 file_grouping_var = NULL,
|
|
513 options = list()
|
19
|
514 ) {
|
|
515 if (is.null(file_grouping_var)) {
|
|
516 file_grouping_var <- find_grouping_var(rdata$variableMetadata)
|
|
517 if (is.null(file_grouping_var)) {
|
|
518 stop("Malformed variableMetada.")
|
|
519 }
|
|
520 }
|
|
521 tryCatch({
|
|
522 headers <- colnames(rdata$variableMetadata)
|
|
523 file_grouping_var <- headers[[as.numeric(file_grouping_var)]]
|
|
524 }, error = function(e) NULL)
|
|
525 if (
|
|
526 is.null(file_grouping_var)
|
|
527 || !(file_grouping_var %in% colnames(rdata$variableMetadata))
|
|
528 ) {
|
|
529 stop(sprintf(
|
|
530 "Could not find grouping variable %s in var meta file.",
|
|
531 file_grouping_var
|
|
532 ))
|
|
533 }
|
0
|
534 message("Processing samples.")
|
|
535 message(sprintf("File grouping variable: %s", file_grouping_var))
|
|
536
|
11
|
537 context <- new.env()
|
|
538 context$samples <- list()
|
|
539 context$peaks <- rdata$xa@xcmsSet@peaks
|
|
540 context$groupidx <- rdata$xa@xcmsSet@groupidx
|
|
541 xcms_set <- rdata$xa@xcmsSet
|
|
542 singlefile <- rdata$singlefile
|
0
|
543 process_arg_list <- rdata$listOFlistArguments
|
11
|
544 var_meta <- rdata$variableMetadata
|
|
545
|
0
|
546 process_params <- list()
|
12
|
547 if (is.null(process_arg_list)) {
|
|
548 for (history in xcms_set@.processHistory) {
|
|
549 if (
|
|
550 class(history@param) == "CentWaveParam"
|
|
551 && history@type == "Peak detection"
|
|
552 ) {
|
|
553 params <- history@param
|
|
554 process_params <- list(list(
|
19
|
555 xfunction = "annotatediff",
|
|
556 ppm = params@ppm,
|
|
557 peakwidth = sprintf(
|
|
558 "%s - %s",
|
|
559 params@peakwidth[[1]],
|
|
560 params@peakwidth[[2]]
|
|
561 ),
|
|
562 snthresh = params@snthresh,
|
|
563 prefilterStep = params@prefilter[[1]],
|
|
564 prefilterLevel = params@prefilter[[2]],
|
|
565 mzdiff = params@mzdiff,
|
|
566 fitgauss = params@fitgauss,
|
|
567 noise = params@noise,
|
|
568 mzCenterFun = params@mzCenterFun,
|
|
569 integrate = params@integrate,
|
|
570 firstBaselineCheck = params@firstBaselineCheck,
|
|
571 snthreshIsoROIs = !identical(params@roiScales, numeric(0))
|
12
|
572 ))
|
|
573 break
|
|
574 }
|
0
|
575 }
|
12
|
576 } else {
|
|
577 for (list_name in names(process_arg_list)) {
|
|
578 param_list <- list()
|
|
579 for (param_name in names(process_arg_list[[list_name]])) {
|
19
|
580 param_list[[param_name]] <- process_arg_list[[
|
|
581 list_name
|
|
582 ]][[param_name]]
|
12
|
583 }
|
19
|
584 process_params[[length(process_params) + 1]] <- param_list
|
12
|
585 }
|
0
|
586 }
|
12
|
587
|
0
|
588 message("Parameters from previous processes extracted.")
|
|
589
|
|
590 smol_xcms_set <- orm$smol_xcms_set()
|
|
591 mz_tab_info <- new.env()
|
|
592 g <- xcms::groups(xcms_set)
|
|
593 mz_tab_info$group_length <- nrow(g)
|
|
594 mz_tab_info$dataset_path <- xcms::filepaths(xcms_set)
|
|
595 mz_tab_info$sampnames <- xcms::sampnames(xcms_set)
|
|
596 mz_tab_info$sampclass <- xcms::sampclass(xcms_set)
|
19
|
597 mz_tab_info$rtmed <- g[, "rtmed"]
|
|
598 mz_tab_info$mzmed <- g[, "mzmed"]
|
|
599 mz_tab_info$smallmolecule_abundance_assay <- xcms::groupval(
|
|
600 xcms_set,
|
|
601 value = "into"
|
|
602 )
|
|
603 blogified <- blob::blob(fst::compress_fst(
|
|
604 serialize(mz_tab_info, NULL),
|
|
605 compression = 100
|
|
606 ))
|
11
|
607 rm(mz_tab_info)
|
|
608
|
|
609 invisible(smol_xcms_set$set_raw(blogified)$save())
|
|
610 smol_xcms_set_id <- smol_xcms_set$get_id()
|
|
611 rm(smol_xcms_set)
|
|
612
|
20
|
613 for (no in seq_along(names(singlefile))) {
|
11
|
614 sample_name <- names(singlefile)[[no]]
|
|
615 sample_path <- singlefile[[no]]
|
0
|
616 if (
|
|
617 is.na(no)
|
|
618 || is.null(sample_path)
|
|
619 || !(sample_name %in% sample_names)
|
|
620 ) {
|
|
621 next
|
|
622 }
|
|
623 env <- new.env()
|
11
|
624
|
14
|
625 if (!DEBUG_FAST_IGNORE_SLOW_OP) {
|
|
626 ms_file <- xcms::xcmsRaw(sample_path)
|
|
627 env$tic <- ms_file@tic
|
|
628 env$mz <- ms_file@env$mz
|
|
629 env$scanindex <- ms_file@scanindex
|
|
630 env$scantime <- ms_file@scantime * 60
|
|
631 env$intensity <- ms_file@env$intensity
|
|
632 env$polarity <- as.character(ms_file@polarity[[1]])
|
11
|
633
|
14
|
634 ## Again, ms file is huge, so we get rid of it quickly.
|
|
635 rm(ms_file)
|
|
636
|
|
637 }
|
0
|
638 env$sample_name <- sample_name
|
|
639 env$dataset_path <- sample_path
|
|
640 env$process_params <- process_params
|
|
641 env$enriched_rdata <- TRUE
|
|
642 env$enriched_rdata_version <- ENRICHED_RDATA_VERSION
|
|
643 env$tool_name <- TOOL_NAME
|
|
644 env$enriched_rdata_doc <- ENRICHED_RDATA_DOC
|
14
|
645
|
11
|
646 sample <- add_sample_to_database(orm, env, context, smol_xcms_set_id)
|
19
|
647 rm(env)
|
11
|
648 context$samples[no] <- sample$get_id()
|
19
|
649 rm(sample)
|
0
|
650 }
|
11
|
651 context$clusters <- list()
|
|
652 context$show_percent <- show_percent
|
|
653 context$cluster_mean_rt_abundance <- list()
|
|
654 context$central_feature <- list()
|
12
|
655 context$adducts <- list()
|
11
|
656 load_variable_metadata(orm, var_meta, context)
|
|
657 clusters <- context$clusters
|
|
658 rm(context)
|
0
|
659 message("Features enrichment")
|
11
|
660 complete_features(orm, clusters, show_percent)
|
0
|
661 message("Features enrichment done.")
|
19
|
662 return(NULL)
|
0
|
663 }
|
|
664
|
|
665 find_grouping_var <- function(var_meta) {
|
19
|
666 known_colnames <- c(
|
6
|
667 "name", "namecustom", "mz", "mzmin", "mzmax",
|
19
|
668 "rt", "rtmin", "rtmax", "npeaks", "isotopes", "adduct",
|
|
669 "pcgroup", "ms_level"
|
6
|
670 )
|
|
671 col_names <- colnames(var_meta)
|
19
|
672 classes <- list()
|
6
|
673 for (name in col_names) {
|
|
674 if (!(name %in% known_colnames)) {
|
19
|
675 classes[[length(classes) + 1]] <- name
|
0
|
676 }
|
|
677 }
|
6
|
678 if (length(classes) > 1) {
|
19
|
679 stop(sprintf(
|
|
680 "Only one class expected in the variable metadata. Found %d .",
|
|
681 length(classes)
|
|
682 ))
|
6
|
683 }
|
7
|
684 if (length(classes) == 0) {
|
6
|
685 stop("Could not find any class column in your variableMetadata.")
|
|
686 }
|
19
|
687 return(classes[[1]])
|
0
|
688 }
|
|
689
|
11
|
690 add_sample_to_database <- function(orm, env, context, smol_xcms_set_id) {
|
0
|
691 message(sprintf("Processing sample %s", env$sample_name))
|
|
692 sample <- (
|
|
693 orm$sample()
|
|
694 $set_name(env$sample_name)
|
|
695 $set_path(env$dataset_path)
|
|
696 $set_kind("enriched_rdata")
|
|
697 $set_polarity(
|
19
|
698 if (
|
|
699 is.null(env$polarity)
|
|
700 || identical(env$polarity, character(0))
|
|
701 ) ""
|
0
|
702 else env$polarity
|
|
703 )
|
|
704 $set_raw(blob::blob(fst::compress_fst(
|
|
705 serialize(env, NULL),
|
19
|
706 compression = 100
|
0
|
707 )))
|
|
708 )
|
11
|
709 sample[["smol_xcms_set_id"]] <- smol_xcms_set_id
|
|
710 sample$modified__[["smol_xcms_set_id"]] <- smol_xcms_set_id
|
|
711 sample <- sample$save()
|
0
|
712 load_process_params(orm, sample, env$process_params)
|
|
713 message(sprintf("Sample %s inserted.", env$sample_name))
|
19
|
714 return(sample)
|
0
|
715 }
|
|
716
|
|
717
|
11
|
718 load_variable_metadata <- function(orm, var_meta, context) {
|
0
|
719 all_clusters <- orm$cluster()$all()
|
|
720
|
11
|
721 next_feature_id <- get_next_id(orm$feature()$all(), "featureID") + 1
|
|
722 next_cluster_id <- 0
|
0
|
723 next_pc_group <- get_next_id(all_clusters, "pc_group")
|
11
|
724 next_align_group <- get_next_id(all_clusters, "align_group") + 1
|
0
|
725 message("Extracting features")
|
|
726 invisible(create_features(
|
11
|
727 orm, var_meta, context,
|
0
|
728 next_feature_id, next_cluster_id,
|
|
729 next_pc_group, next_align_group
|
|
730 ))
|
|
731 message("Extracting features done.")
|
19
|
732 return(NULL)
|
0
|
733 }
|
|
734
|
|
735 get_next_id <- function(models, attribute) {
|
|
736 if ((id <- models$max(attribute)) == Inf || id == -Inf) {
|
19
|
737 return(0)
|
0
|
738 }
|
19
|
739 return(id)
|
0
|
740 }
|
|
741
|
|
742 create_features <- function(
|
11
|
743 orm, var_meta, context,
|
0
|
744 next_feature_id, next_cluster_id,
|
|
745 next_pc_group, next_align_group
|
|
746 ) {
|
|
747 field_names <- as.list(names(orm$feature()$fields__))
|
19
|
748 field_names[field_names == "id"] <- NULL
|
0
|
749
|
|
750 dummy_feature <- orm$feature()
|
|
751
|
|
752 if (show_percent <- context$show_percent) {
|
|
753 percent <- -1
|
|
754 total <- nrow(var_meta)
|
|
755 }
|
14
|
756 rows <- seq_len(nrow(var_meta))
|
|
757 if (PROCESS_SMOL_BATCH) {
|
|
758
|
19
|
759 rows <- rows[1:as.integer(FAST_FEATURE_RATIO / 100.0 * length(rows))]
|
14
|
760 }
|
20
|
761 # features <- list()
|
|
762 features <- as.list(rows) ## allocate all memory before processing
|
|
763 # cluster_row <- list()
|
|
764 cluster_row <- as.list(rows) ## allocate all memory before processing
|
14
|
765 for (row in rows) {
|
0
|
766 if (show_percent && (row / total) * 100 > percent) {
|
|
767 percent <- percent + 1
|
19
|
768 message("\r", sprintf("\r%d %%", percent), appendLF = FALSE)
|
0
|
769 }
|
|
770
|
11
|
771 dummy_feature$set_featureID(next_feature_id)
|
|
772 next_feature_id <- next_feature_id + 1
|
14
|
773
|
|
774 curent_var_meta <- var_meta[row, ]
|
|
775 set_feature_fields_from_var_meta(dummy_feature, curent_var_meta)
|
11
|
776 fake_iso <- dummy_feature$get_iso()
|
|
777 iso <- extract_iso(fake_iso)
|
|
778 clusterID <- extract_clusterID(fake_iso, next_cluster_id)
|
|
779 context$clusterID <- clusterID
|
|
780 dummy_feature$set_iso(iso)
|
|
781
|
0
|
782 peak_list <- context$peaks[context$groupidx[[row]], ]
|
10
|
783 if (! ("matrix" %in% class(peak_list))) {
|
19
|
784 peak_list <- matrix(
|
|
785 peak_list,
|
|
786 nrow = 1,
|
|
787 ncol = length(peak_list),
|
|
788 dimnames = list(c(), names(peak_list))
|
|
789 )
|
10
|
790 }
|
11
|
791
|
|
792 clusterID <- as.character(clusterID)
|
|
793 if (is.null(context$central_feature[[clusterID]])) {
|
|
794 int_o <- extract_peak_var(peak_list, "into")
|
|
795 context$central_feature[[clusterID]] <- (
|
19
|
796 peak_list[peak_list[, "into"] == int_o, ]["sample"]
|
11
|
797 )
|
|
798 }
|
|
799
|
14
|
800 if (!DEBUG_FAST_IGNORE_SLOW_OP) {
|
19
|
801 central_feature <- context$central_feature[[clusterID]]
|
|
802 sample_peak_list <- peak_list[
|
|
803 as.integer(peak_list[, "sample"]) == central_feature,
|
|
804 ,
|
|
805 drop = FALSE
|
|
806 ]
|
|
807 if (
|
|
808 !identical(sample_peak_list, numeric(0))
|
|
809 && !is.null(nrow(sample_peak_list))
|
|
810 && nrow(sample_peak_list) != 0
|
|
811 ) {
|
|
812 int_o <- extract_peak_var(sample_peak_list, "into")
|
|
813 if (!is.na(int_o)) {
|
14
|
814 dummy_feature$set_int_o(int_o)
|
|
815 }
|
19
|
816 int_b <- extract_peak_var(sample_peak_list, "intb")
|
|
817 if (!is.na(int_b)) {
|
14
|
818 dummy_feature$set_int_b(int_b)
|
|
819 }
|
19
|
820 max_o <- extract_peak_var(sample_peak_list, "maxo")
|
|
821 if (!is.na(max_o)) {
|
14
|
822 dummy_feature$set_max_o(max_o)
|
|
823 }
|
0
|
824 }
|
|
825 }
|
14
|
826
|
|
827 cluster_row[[row]] <- create_associated_cluster(
|
12
|
828 orm,
|
14
|
829 context$samples[context$central_feature[[clusterID]]][[1]],
|
11
|
830 dummy_feature, clusterID,
|
0
|
831 context, curent_var_meta, next_pc_group,
|
|
832 next_align_group
|
|
833 )
|
|
834 next_align_group <- next_align_group + 1
|
20
|
835 features[[row]] <- as.list(dummy_feature, field_names)
|
|
836 # features[[length(features) + 1]] <- as.list(dummy_feature, field_names)
|
0
|
837 dummy_feature$clear()
|
|
838 }
|
14
|
839 rm(var_meta)
|
|
840 message("")
|
0
|
841 message("Saving features")
|
19
|
842 invisible(dummy_feature$save(bulk = features))
|
14
|
843
|
|
844 ## We link manually clusters to the sample they're in.
|
|
845 link_cache <- list()
|
|
846 for (row in rows) {
|
|
847 sample_nos <- unique(context$peaks[context$groupidx[[row]], "sample"])
|
|
848 for (sample_id in context$samples[sample_nos]) {
|
|
849 cluster_id <- cluster_row[[row]]$get_id()
|
19
|
850 id <- paste(sample_id, cluster_id, sep = ";")
|
|
851 if (is.null(link_cache[[id]])) {
|
14
|
852 link_cache[[id]] <- 1
|
|
853 orm$cluster_sample(
|
19
|
854 sample_id = sample_id,
|
|
855 cluster_id = cluster_id
|
14
|
856 )$save()
|
|
857 }
|
|
858 }
|
|
859 }
|
|
860
|
0
|
861 message("Saved.")
|
19
|
862 return(context$clusters)
|
0
|
863 }
|
|
864
|
19
|
865 extract_peak_var <- function(peak_list, var_name, selector = max) {
|
0
|
866 value <- peak_list[, var_name]
|
|
867 names(value) <- NULL
|
19
|
868 return(selector(value))
|
0
|
869 }
|
|
870
|
|
871 set_feature_fields_from_var_meta <- function(feature, var_meta) {
|
|
872 if (!is.null(mz <- var_meta[["mz"]]) && !is.na(mz)) {
|
|
873 feature$set_mz(mz)
|
|
874 }
|
|
875 if (!is.null(mzmin <- var_meta[["mzmin"]]) && !is.na(mzmin)) {
|
|
876 feature$set_mz_min(mzmin)
|
|
877 }
|
|
878 if (!is.null(mzmax <- var_meta[["mzmax"]]) && !is.na(mzmax)) {
|
|
879 feature$set_mz_max(mzmax)
|
|
880 }
|
|
881 if (!is.null(rt <- var_meta[["rt"]]) && !is.na(rt)) {
|
|
882 feature$set_rt(rt)
|
|
883 }
|
|
884 if (!is.null(rtmin <- var_meta[["rtmin"]]) && !is.na(rtmin)) {
|
|
885 feature$set_rt_min(rtmin)
|
|
886 }
|
|
887 if (!is.null(rtmax <- var_meta[["rtmax"]]) && !is.na(rtmax)) {
|
|
888 feature$set_rt_max(rtmax)
|
|
889 }
|
|
890 if (!is.null(isotopes <- var_meta[["isotopes"]]) && !is.na(isotopes)) {
|
|
891 feature$set_iso(isotopes)
|
|
892 }
|
19
|
893 return(feature)
|
0
|
894 }
|
|
895
|
|
896 extract_iso <- function(weird_data) {
|
|
897 if (grepl("^\\[\\d+\\]", weird_data)[[1]]) {
|
19
|
898 return(sub("^\\[\\d+\\]", "", weird_data, perl = TRUE))
|
0
|
899 }
|
19
|
900 return(weird_data)
|
0
|
901 }
|
|
902
|
19
|
903 extract_clusterID <- function(weird_data, next_cluster_id) {
|
0
|
904 if (grepl("^\\[\\d+\\]", weird_data)[[1]]) {
|
|
905 clusterID <- stringr::str_extract(weird_data, "^\\[\\d+\\]")
|
|
906 clusterID <- as.numeric(stringr::str_extract(clusterID, "\\d+"))
|
|
907 } else {
|
|
908 clusterID <- 0
|
|
909 }
|
19
|
910 return(clusterID + next_cluster_id)
|
0
|
911 }
|
|
912
|
|
913 create_associated_cluster <- function(
|
12
|
914 orm,
|
14
|
915 main_sample_id, feature, clusterID,
|
0
|
916 context, curent_var_meta, next_pc_group, next_align_group
|
|
917 ) {
|
11
|
918 clusterID <- as.character(clusterID)
|
|
919 if (is.null(cluster <- context$clusters[[clusterID]])) {
|
12
|
920 pcgroup <- as.numeric(curent_var_meta[["pcgroup"]])
|
|
921 adduct_name <- as.character(curent_var_meta[["adduct"]])
|
|
922 annotation <- curent_var_meta[["isotopes"]]
|
11
|
923 cluster <- context$clusters[[clusterID]] <- orm$cluster(
|
19
|
924 pc_group = pcgroup + next_pc_group,
|
12
|
925 # adduct=adduct,
|
19
|
926 align_group = next_align_group,
|
0
|
927 # curent_group=curent_group,
|
19
|
928 clusterID = context$clusterID,
|
|
929 annotation = annotation
|
11
|
930 )
|
12
|
931 if (is.null(adduct <- context$adducts[[adduct_name]])) {
|
19
|
932 context$adducts[[adduct_name]] <- orm$adduct()$load_by(
|
|
933 name = adduct_name
|
|
934 )$first()
|
12
|
935 if (is.null(adduct <- context$adducts[[adduct_name]])) {
|
19
|
936 adduct <- context$adducts[[adduct_name]] <- orm$adduct(
|
|
937 name = adduct_name,
|
|
938 charge = 0
|
|
939 )
|
12
|
940 adduct$save()
|
|
941 }
|
|
942 }
|
|
943 cluster$set_adduct(adduct)
|
19
|
944 ## Crappy hack to assign sample id to cluster without loading the
|
|
945 ## sample. Samples are too big (their sample$env) and slows the
|
|
946 ## process, and eat all the menory so we dont't want to load them.
|
14
|
947 cluster[["sample_id"]] <- main_sample_id
|
|
948 cluster$modified__[["sample_id"]] <- main_sample_id
|
0
|
949 } else {
|
|
950 if (context$clusterID != 0 && cluster$get_clusterID() == 0) {
|
|
951 cluster$set_clusterID(context$clusterID)
|
|
952 }
|
|
953 }
|
|
954 cluster$save()
|
|
955 feature$set_cluster(cluster)
|
20
|
956 feature$save()
|
19
|
957 return(cluster)
|
0
|
958 }
|
|
959
|
11
|
960 complete_features <- function(orm, clusters, show_percent) {
|
|
961 total <- length(clusters)
|
|
962 percent <- -1
|
|
963 i <- 0
|
|
964 for (cluster in clusters) {
|
19
|
965 i <- i + 1
|
11
|
966 if (show_percent && (i / total) * 100 > percent) {
|
|
967 percent <- percent + 1
|
19
|
968 message("\r", sprintf("\r%d %%", percent), appendLF = FALSE)
|
11
|
969 }
|
19
|
970 features <- orm$feature()$load_by(cluster_id = cluster$get_id())
|
0
|
971 if (features$any()) {
|
|
972 if (!is.null(rt <- features$mean("rt"))) {
|
|
973 cluster$set_mean_rt(rt)$save()
|
|
974 }
|
|
975 features_df <- as.data.frame(features)
|
19
|
976 central_feature <- features_df[
|
|
977 grepl("^\\[M\\]", features_df[, "iso"]),
|
|
978 ]
|
0
|
979 central_feature_into <- central_feature[["int_o"]]
|
19
|
980 if (
|
|
981 !identical(central_feature_into, numeric(0))
|
|
982 && central_feature_into != 0
|
|
983 ) {
|
0
|
984 for (feature in as.vector(features)) {
|
|
985 feature$set_abundance(
|
|
986 feature$get_int_o() / central_feature_into * 100
|
|
987 )$save()
|
|
988 }
|
|
989 }
|
|
990 }
|
|
991 }
|
19
|
992 return(NULL)
|
0
|
993 }
|
|
994
|
|
995 load_process_params <- function(orm, sample, params) {
|
|
996 for (param_list in params) {
|
|
997 if (is.null(param_list[["xfunction"]])) {
|
|
998 next
|
|
999 }
|
|
1000 if (param_list[["xfunction"]] == "annotatediff") {
|
|
1001 load_process_params_peak_picking(orm, sample, param_list)
|
|
1002 }
|
|
1003 }
|
19
|
1004 return(sample)
|
0
|
1005 }
|
|
1006
|
19
|
1007 load_process_params_peak_picking <- function(
|
|
1008 orm,
|
|
1009 sample,
|
|
1010 peak_picking_params
|
|
1011 ) {
|
|
1012 return(add_sample_process_parameters(
|
|
1013 params = peak_picking_params,
|
|
1014 params_translation = list(
|
|
1015 ppm = "ppm",
|
|
1016 maxcharge = "maxCharge",
|
|
1017 maxiso = "maxIso"
|
0
|
1018 ),
|
19
|
1019 param_model_generator = orm$peak_picking_parameters,
|
|
1020 sample_param_setter = sample$set_peak_picking_parameters
|
0
|
1021 ))
|
|
1022 }
|
|
1023
|
|
1024 add_sample_process_parameters <- function(
|
|
1025 params,
|
|
1026 params_translation,
|
|
1027 param_model_generator,
|
|
1028 sample_param_setter
|
|
1029 ) {
|
|
1030 model_params <- list()
|
|
1031 for (rdata_param_name in names(params_translation)) {
|
|
1032 database_param_name <- params_translation[[rdata_param_name]]
|
|
1033 if (is.null(rdata_param <- params[[rdata_param_name]])) {
|
|
1034 next
|
|
1035 }
|
|
1036 model_params[[database_param_name]] <- rdata_param
|
|
1037 }
|
|
1038 params_models <- do.call(param_model_generator()$load_by, model_params)
|
|
1039 if (params_models$any()) {
|
|
1040 params_model <- params_models$first()
|
|
1041 } else {
|
|
1042 params_model <- do.call(param_model_generator, model_params)
|
|
1043 params_model$save()
|
|
1044 }
|
19
|
1045 return(sample_param_setter(params_model)$save())
|
0
|
1046 }
|
|
1047
|
|
1048
|
|
1049 library(optparse)
|
|
1050
|
|
1051 option_list <- list(
|
|
1052 optparse::make_option(
|
|
1053 c("-v", "--version"),
|
19
|
1054 action = "store_true",
|
|
1055 help = "Display this tool's version and exits"
|
0
|
1056 ),
|
|
1057 optparse::make_option(
|
20
|
1058 c("-V", "--verbose"),
|
|
1059 action = "store_true",
|
|
1060 help = "Does more verbose outputs",
|
|
1061 default = FALSE
|
|
1062 ),
|
|
1063 optparse::make_option(
|
0
|
1064 c("-i", "--input"),
|
19
|
1065 type = "character",
|
|
1066 help = "The rdata path to import in XSeeker"
|
0
|
1067 ),
|
|
1068 optparse::make_option(
|
|
1069 c("-s", "--samples"),
|
19
|
1070 type = "character",
|
|
1071 help = "Samples to visualise in XSeeker"
|
0
|
1072 ),
|
|
1073 optparse::make_option(
|
|
1074 c("-B", "--archetype"),
|
19
|
1075 type = "character",
|
|
1076 help = "The name of the base database"
|
0
|
1077 ),
|
|
1078 optparse::make_option(
|
|
1079 c("-b", "--database"),
|
19
|
1080 type = "character",
|
|
1081 help = "The base database's path"
|
0
|
1082 ),
|
|
1083 optparse::make_option(
|
|
1084 c("-c", "--compounds-csv"),
|
19
|
1085 type = "character",
|
|
1086 help = "The csv containing compounds"
|
0
|
1087 ),
|
|
1088 optparse::make_option(
|
|
1089 c("-m", "--models"),
|
19
|
1090 type = "character",
|
|
1091 help = paste(
|
|
1092 "The path or url (must begin with http[s]:// or git@) to",
|
|
1093 "the database's models"
|
|
1094 )
|
0
|
1095 ),
|
|
1096 optparse::make_option(
|
19
|
1097 c("-k", "--class"),
|
|
1098 type = "character",
|
|
1099 help = "The name of the column containing the classes"
|
|
1100 ),
|
|
1101 optparse::make_option(
|
0
|
1102 c("-o", "--output"),
|
19
|
1103 type = "character",
|
|
1104 help = "The path where to output sqlite"
|
0
|
1105 ),
|
|
1106 optparse::make_option(
|
|
1107 c("-P", "--not-show-percent"),
|
19
|
1108 action = "store_true",
|
|
1109 help = "Flag not to show the percents",
|
|
1110 default = FALSE
|
0
|
1111 )
|
|
1112 )
|
|
1113
|
19
|
1114 options(error = function(){traceback(3)})
|
0
|
1115
|
19
|
1116 parser <- OptionParser(
|
|
1117 usage = "%prog [options] file",
|
|
1118 option_list = option_list
|
|
1119 )
|
|
1120 args <- parse_args(parser, positional_arguments = 0)
|
0
|
1121
|
|
1122 err_code <- 0
|
|
1123
|
|
1124 if (!is.null(args$options$version)) {
|
|
1125 message(sprintf("%s %s", TOOL_NAME, VERSION))
|
|
1126 quit()
|
|
1127 }
|
|
1128
|
|
1129 models <- get_models(args$options$models)
|
|
1130 orm <- DBModelR::ORM(
|
19
|
1131 connection_params = list(dbname=args$options$output),
|
|
1132 dbms = "SQLite"
|
0
|
1133 )
|
|
1134
|
|
1135 invisible(orm$models(models))
|
|
1136 invisible(create_database(orm))
|
|
1137
|
|
1138 message("Database model created")
|
|
1139
|
|
1140 insert_adducts(orm)
|
|
1141
|
|
1142 if (!is.null(args$options$database)) {
|
|
1143 insert_base_data(orm, args$options$database)
|
|
1144 }
|
|
1145 message(sprintf("Base data inserted using %s.", args$options$database))
|
|
1146
|
|
1147 if (!is.null(args$options$archetype)) {
|
19
|
1148 insert_base_data(orm, args$options$archetype, archetype = TRUE)
|
0
|
1149 }
|
|
1150 if (!is.null(args$options$`compounds-csv`)) {
|
|
1151 insert_compounds(orm, args$options$`compounds-csv`)
|
|
1152 }
|
|
1153
|
|
1154 # if (!is.null(args$options$rdata)) {
|
|
1155 # load_rdata_in_base(args$options$rdata, args$options$samples, args$options$`not-show-percent`)
|
|
1156 # }
|
|
1157
|
|
1158
|
|
1159 load(args$options$input, rdata <- new.env())
|
|
1160
|
20
|
1161 args$options$verbose <- (
|
|
1162 if (args$options$verbose) {
|
|
1163 message("Verbose outputs.")
|
|
1164 \(...) {
|
|
1165 message(sprintf(...))
|
|
1166 }
|
|
1167 } else {
|
|
1168 \(...) {
|
|
1169 }
|
|
1170 }
|
|
1171 )
|
|
1172
|
|
1173 err_code <- process_rdata(orm, rdata, args$options)
|
0
|
1174
|
19
|
1175 quit(status = err_code)
|