Mercurial > repos > lain > xseekerpreparator
comparison galaxy/tools/LC-MSMS/XSeekerPreparator.R @ 19:2937e72e5891 draft
" master branch Updating"
author | lain |
---|---|
date | Tue, 18 Oct 2022 12:57:28 +0000 |
parents | 2c7e7fd1f740 |
children | ce94e7a141bb |
comparison
equal
deleted
inserted
replaced
18:2c7e7fd1f740 | 19:2937e72e5891 |
---|---|
1 | 1 |
2 | 2 |
3 TOOL_NAME <- "XSeekerPreparator" | 3 assign("TOOL_NAME", "XSeekerPreparator", envir = globalenv()) |
4 VERSION <- "1.2.4" | 4 lockBinding("TOOL_NAME", globalenv()) |
5 | 5 assign("VERSION", "1.3.0", envir = globalenv()) |
6 DEBUG_FAST <- FALSE | 6 lockBinding("VERSION", globalenv()) |
7 DEBUG_FAST_IGNORE_SLOW_OP <- DEBUG_FAST | 7 assign("DEBUG_FAST", FALSE, envir = globalenv()) |
8 PROCESS_SMOL_BATCH <- DEBUG_FAST | 8 lockBinding("DEBUG_FAST", globalenv()) |
9 FAST_FEATURE_RATIO <- 10 | 9 assign("DEBUG_FAST_IGNORE_SLOW_OP", DEBUG_FAST, envir = globalenv()) |
10 | 10 lockBinding("DEBUG_FAST_IGNORE_SLOW_OP", globalenv()) |
11 OUTPUT_SPECIFIC_TOOL <- "XSeeker_Galaxy" | 11 assign("PROCESS_SMOL_BATCH", DEBUG_FAST, envir = globalenv()) |
12 | 12 lockBinding("PROCESS_SMOL_BATCH", globalenv()) |
13 ENRICHED_RDATA_VERSION <- paste("1.2.4", OUTPUT_SPECIFIC_TOOL, sep="-") | 13 assign("FAST_FEATURE_RATIO", 10, envir = globalenv()) |
14 ENRICHED_RDATA_DOC <- sprintf(" | 14 lockBinding("FAST_FEATURE_RATIO", globalenv()) |
15 assign("OUTPUT_SPECIFIC_TOOL", "XSeeker_Galaxy", envir = globalenv()) | |
16 lockBinding("OUTPUT_SPECIFIC_TOOL", globalenv()) | |
17 | |
18 assign( | |
19 "ENRICHED_RDATA_VERSION", | |
20 paste(VERSION, OUTPUT_SPECIFIC_TOOL, sep = "-"), | |
21 envir = globalenv() | |
22 ) | |
23 lockBinding("ENRICHED_RDATA_VERSION", globalenv()) | |
24 assign("ENRICHED_RDATA_DOC", sprintf(" | |
15 Welcome to the enriched <Version %s> of the output of CAMERA/xcms. | 25 Welcome to the enriched <Version %s> of the output of CAMERA/xcms. |
16 This doc was generated by the tool: %s - Version %s | 26 This doc was generated by the tool: %s - Version %s |
17 To show the different variables contained in this rdata, type: | 27 To show the different variables contained in this rdata, type: |
18 - `load('this_rdata.rdata', rdata_env <- new.env())` | 28 - `load('this_rdata.rdata', rdata_env <- new.env())` |
19 - `names(rdata_env)` | 29 - `names(rdata_env)` |
39 - Retrieval method: enriched_rdata <- TRUE | 49 - Retrieval method: enriched_rdata <- TRUE |
40 | 50 |
41 - enriched_rdata_version: | 51 - enriched_rdata_version: |
42 - Description: A flag created by that tool to tell which version of | 52 - Description: A flag created by that tool to tell which version of |
43 this tool has enriched the rdata. | 53 this tool has enriched the rdata. |
44 - Retrieval method: enriched_rdata_version <- sprintf(\"%s\", ENRICHED_RDATA_VERSION) | 54 - Retrieval method: |
55 enriched_rdata_version <- sprintf( | |
56 \"%s\", | |
57 ENRICHED_RDATA_VERSION | |
58 ) | |
45 | 59 |
46 - enriched_rdata_doc: | 60 - enriched_rdata_doc: |
47 - Description: Contains the documentation string. | 61 - Description: Contains the documentation string. |
48 | 62 |
49 Data from original mzxml file | 63 Data from original mzxml file |
79 - xcms version: 2.0 | 93 - xcms version: 2.0 |
80 | 94 |
81 - polarity: | 95 - polarity: |
82 - Description: Those are the polarity values from the original mzxml | 96 - Description: Those are the polarity values from the original mzxml |
83 file, extracted using xcms 2. | 97 file, extracted using xcms 2. |
84 - Retrieval method: as.character(xcms::xcmsRaw('original_file.mzxml')@polarity[[1]]) | 98 - Retrieval method: |
99 as.character(xcms::xcmsRaw( | |
100 'original_file.mzxml' | |
101 )@polarity[[1]]) | |
85 - xcms version: 2.0 | 102 - xcms version: 2.0 |
86 | 103 |
87 Data taken from incoming rdata | 104 Data taken from incoming rdata |
88 ------ | 105 ------ |
89 - variableMetadata: | 106 - variableMetadata: |
101 - Retrieval method: | 118 - Retrieval method: |
102 ## just he same list, but simplified | 119 ## just he same list, but simplified |
103 process_params <- list() | 120 process_params <- list() |
104 for (list_name in names(rdata_file$listOFlistArguments)) { | 121 for (list_name in names(rdata_file$listOFlistArguments)) { |
105 param_list <- list() | 122 param_list <- list() |
106 for (param_name in names(rdata_file$listOFlistArguments[[list_name]])) { | 123 for (param_name in names( |
107 param_list[[param_name]] <- rdata_file$listOFlistArguments[[list_name]][[param_name]] | 124 rdata_file$listOFlistArguments[[list_name]] |
125 )) { | |
126 param_list[[param_name]] <- rdata_file$listOFlistArguments[[ | |
127 list_name | |
128 ]][[param_name]] | |
108 } | 129 } |
109 process_params[[length(process_params)+1]] <- param_list | 130 process_params[[length(process_params)+1]] <- param_list |
110 } | 131 } |
111 ", ENRICHED_RDATA_VERSION, TOOL_NAME, VERSION, ENRICHED_RDATA_VERSION) | 132 ", ENRICHED_RDATA_VERSION, TOOL_NAME, VERSION, ENRICHED_RDATA_VERSION), |
133 envir = globalenv()) | |
134 lockBinding("ENRICHED_RDATA_DOC", globalenv()) | |
112 | 135 |
113 | 136 |
114 | 137 |
115 get_models <- function(path) { | 138 get_models <- function(path) { |
116 if (is.null(path)) { | 139 if (is.null(path)) { |
118 } else { | 141 } else { |
119 message(sprintf("Loading models from %s", path)) | 142 message(sprintf("Loading models from %s", path)) |
120 } | 143 } |
121 ## galaxy mangles the "@" to a "__at__" | 144 ## galaxy mangles the "@" to a "__at__" |
122 if (substr(path, 1, 9) == "git__at__") { | 145 if (substr(path, 1, 9) == "git__at__") { |
123 path <- sub("^git__at__", "git@", path, perl=TRUE) | 146 path <- sub("^git__at__", "git@", path, perl = TRUE) |
124 } | 147 } |
125 if ( | 148 if ( |
126 substr(path, 1, 4) == "git@" | 149 substr(path, 1, 4) == "git@" |
127 || substr(path, length(path)-4, 4) == ".git" | 150 || substr(path, length(path) - 4, 4) == ".git" |
128 ) { | 151 ) { |
129 return (get_models_from_git(path)) | 152 return(get_models_from_git(path)) |
130 } | 153 } |
131 if (substr(path, 1, 4) == "http") { | 154 if (substr(path, 1, 4) == "http") { |
132 return (get_models_from_url(path)) | 155 return(get_models_from_url(path)) |
133 } | 156 } |
134 return (source(path)$value) | 157 return(source(path)$value) |
135 } | 158 } |
136 | 159 |
137 get_models_from_git <- function (url, target_file="models.R", rm=TRUE) { | 160 get_models_from_git <- function(url, target_file = "models.R", rm = TRUE) { |
138 tmp <- tempdir() | 161 tmp <- tempdir() |
139 message(sprintf("Cloning %s", url)) | 162 message(sprintf("Cloning %s", url)) |
140 system2("git", c("clone", url, tmp)) | 163 system2("git", c("clone", url, tmp)) |
141 result <- search_tree(file.path(tmp, dir), target_file) | 164 result <- search_tree(file.path(tmp, dir), target_file) |
142 if (!is.null(result)) { | 165 if (!is.null(result)) { |
143 models <- source(result)$value | 166 models <- source(result)$value |
144 if (rm) { | 167 if (rm) { |
145 unlink(tmp, recursive=TRUE) | 168 unlink(tmp, recursive = TRUE) |
146 } | 169 } |
147 return (models) | 170 return(models) |
148 } | 171 } |
149 if (rm) { | 172 if (rm) { |
150 unlink(tmp, recursive=TRUE) | 173 unlink(tmp, recursive = TRUE) |
151 } | 174 } |
152 stop(sprintf( | 175 stop(sprintf( |
153 "Could not find any file named \"%s\" in this repo", | 176 "Could not find any file named \"%s\" in this repo", |
154 target_file | 177 target_file |
155 )) | 178 )) |
156 } | 179 } |
157 | 180 |
158 get_models_from_url <- function (url, target_file="models.R", rm=TRUE) { | 181 get_models_from_url <- function(url, target_file = "models.R", rm = TRUE) { |
159 tmp <- tempdir() | 182 tmp <- tempdir() |
160 message(sprintf("Downloading %s", url)) | 183 message(sprintf("Downloading %s", url)) |
161 result <- file.path(tmp, target_file) | 184 result <- file.path(tmp, target_file) |
162 if (download.file(url, destfile=result) == 0) { | 185 if (download.file(url, destfile = result) == 0) { |
163 models <- source(result)$value | 186 models <- source(result)$value |
164 if (rm) { | 187 if (rm) { |
165 unlink(tmp, recursive=TRUE) | 188 unlink(tmp, recursive = TRUE) |
166 } | 189 } |
167 return (models) | 190 return(models) |
168 } | 191 } |
169 if (rm) { | 192 if (rm) { |
170 unlink(tmp, recursive=TRUE) | 193 unlink(tmp, recursive = TRUE) |
171 } | 194 } |
172 stop("Could not download any file at this adress.") | 195 stop("Could not download any file at this adress.") |
173 } | 196 } |
174 | 197 |
175 search_tree <- function(path, target) { | 198 search_tree <- function(path, target) { |
176 target <- tolower(target) | 199 target <- tolower(target) |
177 for (file in list.files(path)) { | 200 for (file in list.files(path)) { |
178 if (is.dir(file)) { | 201 if (is.dir(file)) { |
179 result <- search_tree(file.path(path, file), target) | 202 result <- search_tree(file.path(path, file), target) |
180 if (!is.null(result)) { | 203 if (!is.null(result)) { |
181 return (result) | 204 return(result) |
182 } | 205 } |
183 } else if (tolower(file) == target) { | 206 } else if (tolower(file) == target) { |
184 return (file.path(path, file)) | 207 return(file.path(path, file)) |
185 } | 208 } |
186 } | 209 } |
187 return (NULL) | 210 return(NULL) |
188 } | 211 } |
189 | 212 |
190 create_database <- function(orm) { | 213 create_database <- function(orm) { |
191 orm$recreate_database(no_exists=FALSE) | 214 orm$recreate_database(no_exists = FALSE) |
192 set_database_version(orm, "created") | 215 set_database_version(orm, "created") |
193 } | 216 } |
194 | 217 |
195 insert_adducts <- function(orm) { | 218 insert_adducts <- function(orm) { |
196 message("Creating adducts...") | 219 message("Creating adducts...") |
197 adducts <- list( | 220 adducts <- list( |
198 list("[M-H2O-H]-",1,-1,-48.992020312000001069,1,0,0.5,"H0","H1O3"), | 221 list("[M-H2O-H]-", 1, -1, -48.992020312000001069, 1, 0, 0.5, "H0", "H1O3"), |
199 list("[M-H-Cl+O]-",1,-1,-19.981214542000000022,2,0,0.5,"O1","H1Cl1"), | 222 list("[M-H-Cl+O]-", 1, -1, -19.981214542000000022, 2, 0, 0.5, "O1", "H1Cl1"), |
200 list("[M-Cl+O]-",1,-1,-18.973389510000000512,3,0,0.5,"O1","Cl1"), | 223 list("[M-Cl+O]-", 1, -1, -18.973389510000000512, 3, 0, 0.5, "O1", "Cl1"), |
201 list("[M-3H]3-",1,-3,-3.0218293560000000219,4,0,1.0,"H0","H3"), | 224 list("[M-3H]3-", 1, -3, -3.0218293560000000219, 4, 0, 1.0, "H0", "H3"), |
202 list("[2M-3H]3-",2,-3,-3.0218293560000000219,4,0,0.5,"H0","H3"), | 225 list("[2M-3H]3-", 2, -3, -3.0218293560000000219, 4, 0, 0.5, "H0", "H3"), |
203 list("[3M-3H]3-",3,-3,-3.0218293560000000219,4,0,0.5,"H0","H3"), | 226 list("[3M-3H]3-", 3, -3, -3.0218293560000000219, 4, 0, 0.5, "H0", "H3"), |
204 list("[M-2H]2-",1,-2,-2.0145529039999998666,5,0,1.0,"H0","H2"), | 227 list("[M-2H]2-", 1, -2, -2.0145529039999998666, 5, 0, 1.0, "H0", "H2"), |
205 list("[2M-2H]2-",2,-2,-2.0145529039999998666,5,0,0.5,"H0","H2"), | 228 list("[2M-2H]2-", 2, -2, -2.0145529039999998666, 5, 0, 0.5, "H0", "H2"), |
206 list("[3M-2H]2-",3,-2,-2.0145529039999998666,5,0,0.5,"H0","H2"), | 229 list("[3M-2H]2-", 3, -2, -2.0145529039999998666, 5, 0, 0.5, "H0", "H2"), |
207 list("[M-H]-",1,-1,-1.0072764519999999333,6,1,1.0,"H0","H1"), | 230 list("[M-H]-", 1, -1, -1.0072764519999999333, 6, 1, 1.0, "H0", "H1"), |
208 list("[2M-H]-",2,-1,-1.0072764519999999333,6,0,0.5,"H0","H1"), | 231 list("[2M-H]-", 2, -1, -1.0072764519999999333, 6, 0, 0.5, "H0", "H1"), |
209 list("[3M-H]-",3,-1,-1.0072764519999999333,6,0,0.5,"H0","H1"), | 232 list("[3M-H]-", 3, -1, -1.0072764519999999333, 6, 0, 0.5, "H0", "H1"), |
210 list("[M]+",1,1,-0.00054858000000000000945,7,1,1.0,"H0","H0"), | 233 list("[M]+", 1, 1, -0.00054858000000000000945, 7, 1, 1.0, "H0", "H0"), |
211 list("[M]-",1,-1,0.00054858000000000000945,8,1,1.0,"H0","H0"), | 234 list("[M]-", 1, -1, 0.00054858000000000000945, 8, 1, 1.0, "H0", "H0"), |
212 list("[M+H]+",1,1,1.0072764519999999333,9,1,1.0,"H1","H0"), | 235 list("[M+H]+", 1, 1, 1.0072764519999999333, 9, 1, 1.0, "H1", "H0"), |
213 list("[2M+H]+",2,1,1.0072764519999999333,9,0,0.5,"H1","H0"), | 236 list("[2M+H]+", 2, 1, 1.0072764519999999333, 9, 0, 0.5, "H1", "H0"), |
214 list("[3M+H]+",3,1,1.0072764519999999333,9,0,0.25,"H1","H0"), | 237 list("[3M+H]+", 3, 1, 1.0072764519999999333, 9, 0, 0.25, "H1", "H0"), |
215 list("[M+2H]2+",1,2,2.0145529039999998666,10,0,0.75,"H2","H0"), | 238 list("[M+2H]2+", 1, 2, 2.0145529039999998666, 10, 0, 0.75, "H2", "H0"), |
216 list("[2M+2H]2+",2,2,2.0145529039999998666,10,0,0.5,"H2","H0"), | 239 list("[2M+2H]2+", 2, 2, 2.0145529039999998666, 10, 0, 0.5, "H2", "H0"), |
217 list("[3M+2H]2+",3,2,2.0145529039999998666,10,0,0.25,"H2","H0"), | 240 list("[3M+2H]2+", 3, 2, 2.0145529039999998666, 10, 0, 0.25, "H2", "H0"), |
218 list("[M+3H]3+",1,3,3.0218293560000000219,11,0,0.75,"H3","H0"), | 241 list("[M+3H]3+", 1, 3, 3.0218293560000000219, 11, 0, 0.75, "H3", "H0"), |
219 list("[2M+3H]3+",2,3,3.0218293560000000219,11,0,0.5,"H3","H0"), | 242 list("[2M+3H]3+", 2, 3, 3.0218293560000000219, 11, 0, 0.5, "H3", "H0"), |
220 list("[3M+3H]3+",3,3,3.0218293560000000219,11,0,0.25,"H3","H0"), | 243 list("[3M+3H]3+", 3, 3, 3.0218293560000000219, 11, 0, 0.25, "H3", "H0"), |
221 list("[M-2H+NH4]-",1,-1,16.019272654000001665,12,0,0.25,"N1H4","H2"), | 244 list("[M-2H+NH4]-", 1, -1, 16.019272654000001665, 12, 0, 0.25, "N1H4", "H2"), |
222 list("[2M-2H+NH4]-",2,-1,16.019272654000001665,12,0,0.0,"N1H4","H2"), | 245 list("[2M-2H+NH4]-", 2, -1, 16.019272654000001665, 12, 0, 0.0, "N1H4", "H2"), |
223 list("[3M-2H+NH4]-",3,-1,16.019272654000001665,12,0,0.25,"N1H4","H2"), | 246 list("[3M-2H+NH4]-", 3, -1, 16.019272654000001665, 12, 0, 0.25, "N1H4", "H2"), |
224 list("[M+NH4]+",1,1,18.033825558000000199,13,1,1.0,"N1H4","H0"), | 247 list("[M+NH4]+", 1, 1, 18.033825558000000199, 13, 1, 1.0, "N1H4", "H0"), |
225 list("[2M+NH4]+",2,1,18.033825558000000199,13,0,0.5,"N1H4","H0"), | 248 list("[2M+NH4]+", 2, 1, 18.033825558000000199, 13, 0, 0.5, "N1H4", "H0"), |
226 list("[3M+NH4]+",3,1,18.033825558000000199,13,0,0.25,"N1H4","H0"), | 249 list("[3M+NH4]+", 3, 1, 18.033825558000000199, 13, 0, 0.25, "N1H4", "H0"), |
227 list("[M+H+NH4]2+",1,2,19.041102009999999467,14,0,0.5,"N1H5","H0"), | 250 list("[M+H+NH4]2+", 1, 2, 19.041102009999999467, 14, 0, 0.5, "N1H5", "H0"), |
228 list("[2M+H+NH4]2+",2,2,19.041102009999999467,14,0,0.5,"N1H5","H0"), | 251 list("[2M+H+NH4]2+", 2, 2, 19.041102009999999467, 14, 0, 0.5, "N1H5", "H0"), |
229 list("[3M+H+NH4]2+",3,2,19.041102009999999467,14,0,0.25,"N1H5","H0"), | 252 list("[3M+H+NH4]2+", 3, 2, 19.041102009999999467, 14, 0, 0.25, "N1H5", "H0"), |
230 list("[M+Na-2H]-",1,-1,20.974668176000001551,15,0,0.75,"Na1","H2"), | 253 list("[M+Na-2H]-", 1, -1, 20.974668176000001551, 15, 0, 0.75, "Na1", "H2"), |
231 list("[2M-2H+Na]-",2,-1,20.974668176000001551,15,0,0.25,"Na1","H2"), | 254 list("[2M-2H+Na]-", 2, -1, 20.974668176000001551, 15, 0, 0.25, "Na1", "H2"), |
232 list("[3M-2H+Na]-",3,-1,20.974668176000001551,15,0,0.25,"Na1","H2"), | 255 list("[3M-2H+Na]-", 3, -1, 20.974668176000001551, 15, 0, 0.25, "Na1", "H2"), |
233 list("[M+Na]+",1,1,22.989221080000000086,16,1,1.0,"Na1","H0"), | 256 list("[M+Na]+", 1, 1, 22.989221080000000086, 16, 1, 1.0, "Na1", "H0"), |
234 list("[2M+Na]+",2,1,22.989221080000000086,16,0,0.5,"Na1","H0"), | 257 list("[2M+Na]+", 2, 1, 22.989221080000000086, 16, 0, 0.5, "Na1", "H0"), |
235 list("[3M+Na]+",3,1,22.989221080000000086,16,0,0.25,"Na1","H0"), | 258 list("[3M+Na]+", 3, 1, 22.989221080000000086, 16, 0, 0.25, "Na1", "H0"), |
236 list("[M+H+Na]2+",1,2,23.996497531999999353,17,0,0.5,"Na1H1","H0"), | 259 list("[M+H+Na]2+", 1, 2, 23.996497531999999353, 17, 0, 0.5, "Na1H1", "H0"), |
237 list("[2M+H+Na]2+",2,2,23.996497531999999353,17,0,0.5,"Na1H1","H0"), | 260 list("[2M+H+Na]2+", 2, 2, 23.996497531999999353, 17, 0, 0.5, "Na1H1", "H0"), |
238 list("[3M+H+Na]2+",3,2,23.996497531999999353,17,0,0.25,"Na1H1","H0"), | 261 list("[3M+H+Na]2+", 3, 2, 23.996497531999999353, 17, 0, 0.25, "Na1H1", "H0"), |
239 list("[M+2H+Na]3+",1,3,25.003773983999998619,18,0,0.25,"H2Na1","H0"), | 262 list("[M+2H+Na]3+", 1, 3, 25.003773983999998619, 18, 0, 0.25, "H2Na1", "H0"), |
240 list("[M+CH3OH+H]+",1,1,33.033491200000000276,19,0,0.25,"C1O1H5","H0"), | 263 list("[M+CH3OH+H]+", 1, 1, 33.033491200000000276, 19, 0, 0.25, "C1O1H5", "H0"), |
241 list("[M-H+Cl]2-",1,-2,33.962124838000001148,20,0,1.0,"Cl1","H1"), | 264 list("[M-H+Cl]2-", 1, -2, 33.962124838000001148, 20, 0, 1.0, "Cl1", "H1"), |
242 list("[2M-H+Cl]2-",2,-2,33.962124838000001148,20,0,0.5,"Cl1","H1"), | 265 list("[2M-H+Cl]2-", 2, -2, 33.962124838000001148, 20, 0, 0.5, "Cl1", "H1"), |
243 list("[3M-H+Cl]2-",3,-2,33.962124838000001148,20,0,0.5,"Cl1","H1"), | 266 list("[3M-H+Cl]2-", 3, -2, 33.962124838000001148, 20, 0, 0.5, "Cl1", "H1"), |
244 list("[M+Cl]-",1,-1,34.969401290000000416,21,1,1.0,"Cl1","H0"), | 267 list("[M+Cl]-", 1, -1, 34.969401290000000416, 21, 1, 1.0, "Cl1", "H0"), |
245 list("[2M+Cl]-",2,-1,34.969401290000000416,21,0,0.5,"Cl1","H0"), | 268 list("[2M+Cl]-", 2, -1, 34.969401290000000416, 21, 0, 0.5, "Cl1", "H0"), |
246 list("[3M+Cl]-",3,-1,34.969401290000000416,21,0,0.5,"Cl1","H0"), | 269 list("[3M+Cl]-", 3, -1, 34.969401290000000416, 21, 0, 0.5, "Cl1", "H0"), |
247 list("[M+K-2H]-",1,-1,36.948605415999999479,22,0,0.5,"K1","H2"), | 270 list("[M+K-2H]-", 1, -1, 36.948605415999999479, 22, 0, 0.5, "K1", "H2"), |
248 list("[2M-2H+K]-",2,-1,36.948605415999999479,22,0,0.0,"K1","H2"), | 271 list("[2M-2H+K]-", 2, -1, 36.948605415999999479, 22, 0, 0.0, "K1", "H2"), |
249 list("[3M-2H+K]-",3,-1,36.948605415999999479,22,0,0.0,"K1","H2"), | 272 list("[3M-2H+K]-", 3, -1, 36.948605415999999479, 22, 0, 0.0, "K1", "H2"), |
250 list("[M+K]+",1,1,38.963158319999998013,23,1,1.0,"K1","H0"), | 273 list("[M+K]+", 1, 1, 38.963158319999998013, 23, 1, 1.0, "K1", "H0"), |
251 list("[2M+K]+",2,1,38.963158319999998013,23,0,0.5,"K1","H0"), | 274 list("[2M+K]+", 2, 1, 38.963158319999998013, 23, 0, 0.5, "K1", "H0"), |
252 list("[3M+K]+",3,1,38.963158319999998013,23,0,0.25,"K1","H0"), | 275 list("[3M+K]+", 3, 1, 38.963158319999998013, 23, 0, 0.25, "K1", "H0"), |
253 list("[M+H+K]2+",1,2,39.970434771999997281,24,0,0.5,"K1H1","H0"), | 276 list("[M+H+K]2+", 1, 2, 39.970434771999997281, 24, 0, 0.5, "K1H1", "H0"), |
254 list("[2M+H+K]2+",2,2,39.970434771999997281,24,0,0.5,"K1H1","H0"), | 277 list("[2M+H+K]2+", 2, 2, 39.970434771999997281, 24, 0, 0.5, "K1H1", "H0"), |
255 list("[3M+H+K]2+",3,2,39.970434771999997281,24,0,0.25,"K1H1","H0"), | 278 list("[3M+H+K]2+", 3, 2, 39.970434771999997281, 24, 0, 0.25, "K1H1", "H0"), |
256 list("[M+ACN+H]+",1,1,42.033825557999996646,25,0,0.25,"C2H4N1","H0"), | 279 list("[M+ACN+H]+", 1, 1, 42.033825557999996646, 25, 0, 0.25, "C2H4N1", "H0"), |
257 list("[2M+ACN+H]+",2,1,42.033825557999996646,25,0,0.25,"C2H4N1","H0"), | 280 list("[2M+ACN+H]+", 2, 1, 42.033825557999996646, 25, 0, 0.25, "C2H4N1", "H0"), |
258 list("[M+2Na-H]+",1,1,44.971165708000000902,26,0,0.5,"Na2","H1"), | 281 list("[M+2Na-H]+", 1, 1, 44.971165708000000902, 26, 0, 0.5, "Na2", "H1"), |
259 list("[2M+2Na-H]+",2,1,44.971165708000000902,26,0,0.25,"Na2","H1"), | 282 list("[2M+2Na-H]+", 2, 1, 44.971165708000000902, 26, 0, 0.25, "Na2", "H1"), |
260 list("[3M+2Na-H]+",3,1,44.971165708000000902,26,0,0.25,"Na2","H1"), | 283 list("[3M+2Na-H]+", 3, 1, 44.971165708000000902, 26, 0, 0.25, "Na2", "H1"), |
261 list("[2M+FA-H]-",2,-1,44.998202851999998586,27,0,0.25,"C1O2H2","H1"), | 284 list("[2M+FA-H]-", 2, -1, 44.998202851999998586, 27, 0, 0.25, "C1O2H2", "H1"), |
262 list("[M+FA-H]-",1,-1,44.998202851999998586,27,0,0.5,"C1O2H2","H1"), | 285 list("[M+FA-H]-", 1, -1, 44.998202851999998586, 27, 0, 0.5, "C1O2H2", "H1"), |
263 list("[M+2Na]2+",1,2,45.978442160000000172,28,0,0.5,"Na2","H0"), | 286 list("[M+2Na]2+", 1, 2, 45.978442160000000172, 28, 0, 0.5, "Na2", "H0"), |
264 list("[2M+2Na]2+",2,2,45.978442160000000172,28,0,0.5,"Na2","H0"), | 287 list("[2M+2Na]2+", 2, 2, 45.978442160000000172, 28, 0, 0.5, "Na2", "H0"), |
265 list("[3M+2Na]2+",3,2,45.978442160000000172,28,0,0.25,"Na2","H0"), | 288 list("[3M+2Na]2+", 3, 2, 45.978442160000000172, 28, 0, 0.25, "Na2", "H0"), |
266 list("[M+H+2Na]3+",1,3,46.985718611999999438,29,0,0.25,"H1Na2","H0"), | 289 list("[M+H+2Na]3+", 1, 3, 46.985718611999999438, 29, 0, 0.25, "H1Na2", "H0"), |
267 list("[M+H+FA]+",1,1,47.012755755999997122,30,0,0.25,"C1O2H3","H0"), | 290 list("[M+H+FA]+", 1, 1, 47.012755755999997122, 30, 0, 0.25, "C1O2H3", "H0"), |
268 list("[M+Hac-H]-",1,-1,59.013852915999997607,31,0,0.25,"C2O2H4","H1"), | 291 list("[M+Hac-H]-", 1, -1, 59.013852915999997607, 31, 0, 0.25, "C2O2H4", "H1"), |
269 list("[2M+Hac-H]-",2,-1,59.013852915999997607,31,0,0.25,"C2O2H4","H1"), | 292 list("[2M+Hac-H]-", 2, -1, 59.013852915999997607, 31, 0, 0.25, "C2O2H4", "H1"), |
270 list("[M+IsoProp+H]+",1,1,61.064791327999998317,32,0,0.25,"C3H9O1","H0"), | 293 list("[M+IsoProp+H]+", 1, 1, 61.064791327999998317, 32, 0, 0.25, "C3H9O1", "H0"), |
271 list("[M+Na+K]2+",1,2,61.9523793999999981,33,0,0.5,"Na1K1","H0"), | 294 list("[M+Na+K]2+", 1, 2, 61.9523793999999981, 33, 0, 0.5, "Na1K1", "H0"), |
272 list("[2M+Na+K]2+",2,2,61.9523793999999981,33,0,0.5,"Na1K1","H0"), | 295 list("[2M+Na+K]2+", 2, 2, 61.9523793999999981, 33, 0, 0.5, "Na1K1", "H0"), |
273 list("[3M+Na+K]2+",3,2,61.9523793999999981,33,0,0.25,"Na1K1","H0"), | 296 list("[3M+Na+K]2+", 3, 2, 61.9523793999999981, 33, 0, 0.25, "Na1K1", "H0"), |
274 list("[M+NO3]-",1,-1,61.988366450000000895,34,0,0.5,"N1O3","H0"), | 297 list("[M+NO3]-", 1, -1, 61.988366450000000895, 34, 0, 0.5, "N1O3", "H0"), |
275 list("[M+ACN+Na]+",1,1,64.015770185999997464,35,0,0.25,"C2H3N1Na1","H0"), | 298 list("[M+ACN+Na]+", 1, 1, 64.015770185999997464, 35, 0, 0.25, "C2H3N1Na1", "H0"), |
276 list("[2M+ACN+Na]+",2,1,64.015770185999997464,35,0,0.25,"C2H3N1Na1","H0"), | 299 list("[2M+ACN+Na]+", 2, 1, 64.015770185999997464, 35, 0, 0.25, "C2H3N1Na1", "H0"), |
277 list("[M+NH4+FA]+",1,1,64.039304861999994502,36,0,0.25,"N1C1O2H6","H0"), | 300 list("[M+NH4+FA]+", 1, 1, 64.039304861999994502, 36, 0, 0.25, "N1C1O2H6", "H0"), |
278 list("[M-2H+Na+FA]-",1,-1,66.980147479999999405,37,0,0.5,"NaC1O2H2","H2"), | 301 list("[M-2H+Na+FA]-", 1, -1, 66.980147479999999405, 37, 0, 0.5, "NaC1O2H2", "H2"), |
279 list("[M+3Na]3+",1,3,68.967663239999993153,38,0,0.25,"Na3","H0"), | 302 list("[M+3Na]3+", 1, 3, 68.967663239999993153, 38, 0, 0.25, "Na3", "H0"), |
280 list("[M+Na+FA]+",1,1,68.99470038399999794,39,0,0.25,"Na1C1O2H2","H0"), | 303 list("[M+Na+FA]+", 1, 1, 68.99470038399999794, 39, 0, 0.25, "Na1C1O2H2", "H0"), |
281 list("[M+2Cl]2-",1,-2,69.938802580000000832,40,0,1.0,"Cl2","H0"), | 304 list("[M+2Cl]2-", 1, -2, 69.938802580000000832, 40, 0, 1.0, "Cl2", "H0"), |
282 list("[2M+2Cl]2-",2,-2,69.938802580000000832,40,0,0.5,"Cl2","H0"), | 305 list("[2M+2Cl]2-", 2, -2, 69.938802580000000832, 40, 0, 0.5, "Cl2", "H0"), |
283 list("[3M+2Cl]2-",3,-2,69.938802580000000832,40,0,0.5,"Cl2","H0"), | 306 list("[3M+2Cl]2-", 3, -2, 69.938802580000000832, 40, 0, 0.5, "Cl2", "H0"), |
284 list("[M+2K-H]+",1,1,76.919040187999996758,41,0,0.5,"K2","H1"), | 307 list("[M+2K-H]+", 1, 1, 76.919040187999996758, 41, 0, 0.5, "K2", "H1"), |
285 list("[2M+2K-H]+",2,1,76.919040187999996758,41,0,0.25,"K2","H1"), | 308 list("[2M+2K-H]+", 2, 1, 76.919040187999996758, 41, 0, 0.25, "K2", "H1"), |
286 list("[3M+2K-H]+",3,1,76.919040187999996758,41,0,0.25,"K2","H1"), | 309 list("[3M+2K-H]+", 3, 1, 76.919040187999996758, 41, 0, 0.25, "K2", "H1"), |
287 list("[M+2K]2+",1,2,77.926316639999996028,42,0,0.5,"K2","H0"), | 310 list("[M+2K]2+", 1, 2, 77.926316639999996028, 42, 0, 0.5, "K2", "H0"), |
288 list("[2M+2K]2+",2,2,77.926316639999996028,42,0,0.5,"K2","H0"), | 311 list("[2M+2K]2+", 2, 2, 77.926316639999996028, 42, 0, 0.5, "K2", "H0"), |
289 list("[3M+2K]2+",3,2,77.926316639999996028,42,0,0.25,"K2","H0"), | 312 list("[3M+2K]2+", 3, 2, 77.926316639999996028, 42, 0, 0.25, "K2", "H0"), |
290 list("[M+Br]-",1,-1,78.918886479999997619,43,1,1.0,"Br1","H0"), | 313 list("[M+Br]-", 1, -1, 78.918886479999997619, 43, 1, 1.0, "Br1", "H0"), |
291 list("[M+Cl+FA]-",1,-1,80.974880593999998268,44,0,0.5,"Cl1C1O2H2","H0"), | 314 list("[M+Cl+FA]-", 1, -1, 80.974880593999998268, 44, 0, 0.5, "Cl1C1O2H2", "H0"), |
292 list("[M+AcNa-H]-",1,-1,80.995797543999998426,45,0,0.25,"C2H3Na1O2","H1"), | 315 list("[M+AcNa-H]-", 1, -1, 80.995797543999998426, 45, 0, 0.25, "C2H3Na1O2", "H1"), |
293 list("[M+2ACN+2H]2+",1,2,84.067651115999993292,46,0,0.25,"C4H8N2","H0"), | 316 list("[M+2ACN+2H]2+", 1, 2, 84.067651115999993292, 46, 0, 0.25, "C4H8N2", "H0"), |
294 list("[M+K+FA]+",1,1,84.968637623999995868,47,0,0.25,"K1C1O2H2","H0"), | 317 list("[M+K+FA]+", 1, 1, 84.968637623999995868, 47, 0, 0.25, "K1C1O2H2", "H0"), |
295 list("[M+Cl+Na+FA-H]-",1,-1,102.95682522200000619,48,0,0.5,"Cl1Na1C1O2H2","H1"), | 318 list("[M+Cl+Na+FA-H]-", 1, -1, 102.95682522200000619, 48, 0, 0.5, "Cl1Na1C1O2H2", "H1"), |
296 list("[2M+3H2O+2H]+",2,1,104.03153939599999944,49,0,0.25,"H8O6","H0"), | 319 list("[2M+3H2O+2H]+", 2, 1, 104.03153939599999944, 49, 0, 0.25, "H8O6", "H0"), |
297 list("[M+TFA-H]-",1,-1,112.98558742000000165,50,0,0.5,"C2F3O2H1","H1"), | 320 list("[M+TFA-H]-", 1, -1, 112.98558742000000165, 50, 0, 0.5, "C2F3O2H1", "H1"), |
298 list("[M+H+TFA]+",1,1,115.00014032400000019,51,0,0.25,"C2F3O2H2","H0"), | 321 list("[M+H+TFA]+", 1, 1, 115.00014032400000019, 51, 0, 0.25, "C2F3O2H2", "H0"), |
299 list("[M+3ACN+2H]2+",1,2,125.09420022199999778,52,0,0.25,"C6H11N3","H0"), | 322 list("[M+3ACN+2H]2+", 1, 2, 125.09420022199999778, 52, 0, 0.25, "C6H11N3", "H0"), |
300 list("[M+NH4+TFA]+",1,1,132.02668943000000468,53,0,0.25,"N1C2F3O2H5","H0"), | 323 list("[M+NH4+TFA]+", 1, 1, 132.02668943000000468, 53, 0, 0.25, "N1C2F3O2H5", "H0"), |
301 list("[M+Na+TFA]+",1,1,136.98208495200000811,54,0,0.25,"Na1C2F3O2H1","H0"), | 324 list("[M+Na+TFA]+", 1, 1, 136.98208495200000811, 54, 0, 0.25, "Na1C2F3O2H1", "H0"), |
302 list("[M+Cl+TFA]-",1,-1,148.96226516199999423,55,0,0.5,"Cl1C2F3O2H1","H0"), | 325 list("[M+Cl+TFA]-", 1, -1, 148.96226516199999423, 55, 0, 0.5, "Cl1C2F3O2H1", "H0"), |
303 list("[M+K+TFA]+",1,1,152.95602219200000604,56,0,0.25,"K1C2F3O2H1","H0") | 326 list("[M+K+TFA]+", 1, 1, 152.95602219200000604, 56, 0, 0.25, "K1C2F3O2H1","H0") |
304 ) | 327 ) |
305 dummy_adduct <- orm$adduct() | 328 dummy_adduct <- orm$adduct() |
306 for (adduct in adducts) { | 329 for (adduct in adducts) { |
307 i <- 0 | 330 i <- 0 |
308 dummy_adduct$set_name(adduct[[i <- i+1]]) | 331 dummy_adduct$set_name(adduct[[i <- i + 1]]) |
309 dummy_adduct$set_multi(adduct[[i <- i+1]]) | 332 dummy_adduct$set_multi(adduct[[i <- i + 1]]) |
310 dummy_adduct$set_charge(adduct[[i <- i+1]]) | 333 dummy_adduct$set_charge(adduct[[i <- i + 1]]) |
311 dummy_adduct$set_mass(adduct[[i <- i+1]]) | 334 dummy_adduct$set_mass(adduct[[i <- i + 1]]) |
312 dummy_adduct$set_oidscore(adduct[[i <- i+1]]) | 335 dummy_adduct$set_oidscore(adduct[[i <- i + 1]]) |
313 dummy_adduct$set_quasi(adduct[[i <- i+1]]) | 336 dummy_adduct$set_quasi(adduct[[i <- i + 1]]) |
314 dummy_adduct$set_ips(adduct[[i <- i+1]]) | 337 dummy_adduct$set_ips(adduct[[i <- i + 1]]) |
315 dummy_adduct$set_formula_add(adduct[[i <- i+1]]) | 338 dummy_adduct$set_formula_add(adduct[[i <- i + 1]]) |
316 dummy_adduct$set_formula_ded(adduct[[i <- i+1]]) | 339 dummy_adduct$set_formula_ded(adduct[[i <- i + 1]]) |
317 invisible(dummy_adduct$save()) | 340 invisible(dummy_adduct$save()) |
318 dummy_adduct$clear(unset_id=TRUE) | 341 dummy_adduct$clear(unset_id = TRUE) |
319 } | 342 } |
320 message("Adducts created") | 343 message("Adducts created") |
321 } | 344 } |
322 | 345 |
323 insert_base_data <- function(orm, path, archetype=FALSE) { | 346 insert_base_data <- function(orm, path, archetype = FALSE) { |
324 if (archetype) { | 347 if (archetype) { |
325 ## not implemented yet | 348 ## not implemented yet |
326 return () | 349 return() |
327 } | 350 } |
328 base_data <- readLines(path) | 351 base_data <- readLines(path) |
329 for (sql in strsplit(paste(base_data, collapse=" "), ";")[[1]]) { | 352 for (sql in strsplit(paste(base_data, collapse = " "), ";")[[1]]) { |
330 orm$execute(sql) | 353 orm$execute(sql) |
331 } | 354 } |
332 set_database_version(orm, "enriched") | 355 set_database_version(orm, "enriched") |
333 } | 356 } |
334 | 357 |
335 insert_compounds <- function(orm, compounds_path) { | 358 insert_compounds <- function(orm, compounds_path) { |
336 compounds <- read.csv(file=compounds_path, sep="\t") | 359 compounds <- read.csv(file = compounds_path, sep = "\t") |
337 if (is.null(compounds <- translate_compounds(compounds))) { | 360 if (is.null(compounds <- translate_compounds(compounds))) { |
338 stop("Could not find asked compound's attributes in csv file.") | 361 stop("Could not find asked compound's attributes in csv file.") |
339 } | 362 } |
340 dummy_compound <- orm$compound() | 363 dummy_compound <- orm$compound() |
341 compound_list <- list() | 364 compound_list <- list() |
342 for (i in seq_len(nrow(compounds))) { | 365 for (i in seq_len(nrow(compounds))) { |
343 dummy_compound$set_mz(compounds[i, "mz"]) | 366 dummy_compound$set_mz(compounds[i, "mz"]) |
344 dummy_compound$set_name(compounds[i, "name"]) | 367 dummy_compound$set_name(compounds[i, "name"]) |
345 dummy_compound$set_common_name(compounds[i, "common_name"]) | 368 dummy_compound$set_common_name(compounds[i, "common_name"]) |
346 dummy_compound$set_formula(compounds[i, "formula"]) | 369 dummy_compound$set_formula(compounds[i, "formula"]) |
347 compound_list[[length(compound_list)+1]] <- as.list( | 370 compound_list[[length(compound_list) + 1]] <- as.list( |
348 dummy_compound, | 371 dummy_compound, |
349 c("mz", "name", "common_name", "formula") | 372 c("mz", "name", "common_name", "formula") |
350 ) | 373 ) |
351 dummy_compound$clear(unset_id=TRUE) | 374 dummy_compound$clear(unset_id = TRUE) |
352 } | 375 } |
353 invisible(dummy_compound$save(bulk=compound_list)) | 376 invisible(dummy_compound$save(bulk = compound_list)) |
354 } | 377 } |
355 | 378 |
356 translate_compounds <- function(compounds) { | 379 translate_compounds <- function(compounds) { |
357 recognized_headers <- list( | 380 recognized_headers <- list( |
358 c("HMDB_ID", "MzBank", "X.M.H..", "X.M.H...1", "MetName", "ChemFormula", "INChIkey") | 381 c( |
382 "HMDB_ID", "MzBank", "X.M.H..", "X.M.H...1", | |
383 "MetName", "ChemFormula", "INChIkey" | |
384 ) | |
359 ) | 385 ) |
360 header_translators <- list( | 386 header_translators <- list( |
361 hmdb_header_translator | 387 hmdb_header_translator |
362 ) | 388 ) |
363 for (index in seq_along(recognized_headers)) { | 389 for (index in seq_along(recognized_headers)) { |
364 headers <- recognized_headers[[index]] | 390 headers <- recognized_headers[[index]] |
365 if (identical(colnames(compounds), headers)) { | 391 if (identical(colnames(compounds), headers)) { |
366 return (header_translators[[index]](compounds)) | 392 return(header_translators[[index]](compounds)) |
367 } | 393 } |
368 } | 394 } |
369 if (is.null(translator <- guess_translator(colnames(compounds)))) { | 395 if (is.null(translator <- guess_translator(colnames(compounds)))) { |
370 return (NULL) | 396 return(NULL) |
371 } | 397 } |
372 return (csv_header_translator(translator, compounds)) | 398 return(csv_header_translator(translator, compounds)) |
373 } | 399 } |
374 | 400 |
375 guess_translator <- function(header) { | 401 guess_translator <- function(header) { |
376 result <- list( | 402 result <- list( |
377 # HMDB_ID=NULL, | 403 # HMDB_ID = NULL, |
378 mz=NULL, | 404 mz = NULL, |
379 name=NULL, | 405 name = NULL, |
380 common_name=NULL, | 406 common_name = NULL, |
381 formula=NULL, | 407 formula = NULL, |
382 # inchi_key=NULL | 408 # inchi_key = NULL |
383 ) | 409 ) |
384 asked_cols <- names(result) | 410 asked_cols <- names(result) |
385 for (asked_col in asked_cols) { | 411 for (asked_col in asked_cols) { |
386 for (col in header) { | 412 for (col in header) { |
387 if ((twisted <- tolower(col)) == asked_col | 413 if ((twisted <- tolower(col)) == asked_col |
393 next | 419 next |
394 } | 420 } |
395 } | 421 } |
396 } | 422 } |
397 if (any(mapply(is.null, result))) { | 423 if (any(mapply(is.null, result))) { |
398 return (NULL) | 424 return(NULL) |
399 } | 425 } |
400 return (result) | 426 return(result) |
401 } | 427 } |
402 | 428 |
403 hmdb_header_translator <- function(compounds) { | 429 hmdb_header_translator <- function(compounds) { |
404 return (csv_header_translator( | 430 return(csv_header_translator( |
405 list( | 431 list( |
406 HMDB_ID="HMDB_ID", | 432 HMDB_ID = "HMDB_ID", |
407 mz="MzBank", | 433 mz = "MzBank", |
408 name="MetName", | 434 name = "MetName", |
409 common_name="MetName", | 435 common_name = "MetName", |
410 formula="ChemFormula", | 436 formula = "ChemFormula", |
411 inchi_key="INChIkey" | 437 inchi_key = "INChIkey" |
412 ), compounds | 438 ), compounds |
413 )) | 439 )) |
414 } | 440 } |
415 | 441 |
416 csv_header_translator <- function(translation_table, csv) { | 442 csv_header_translator <- function(translation_table, csv) { |
417 header_names <- names(translation_table) | 443 header_names <- names(translation_table) |
418 result <- data.frame(1:nrow(csv)) | 444 result <- data.frame(seq_len(nrow(csv))) |
419 for (i in seq_along(header_names)) { | 445 for (i in seq_along(header_names)) { |
420 result[, header_names[[i]]] <- csv[, translation_table[[i]]] | 446 result[, header_names[[i]]] <- csv[, translation_table[[i]]] |
421 } | 447 } |
422 result[, "mz"] <- as.numeric(result[, "mz"]) | 448 result[, "mz"] <- as.numeric(result[, "mz"]) |
423 return (result) | 449 return(result) |
424 } | 450 } |
425 | 451 |
426 set_database_version <- function(orm, version) { | 452 set_database_version <- function(orm, version) { |
427 orm$set_tag( | 453 orm$set_tag( |
428 version, | 454 version, |
429 tag_name="database_version", | 455 tag_name = "database_version", |
430 tag_table_name="XSeeker_tagging_table" | 456 tag_table_name = "XSeeker_tagging_table" |
431 ) | 457 ) |
432 } | 458 } |
433 | 459 |
434 process_rdata <- function(orm, rdata, options) { | 460 process_rdata <- function(orm, rdata, options) { |
435 mzml_tmp_dir <- gather_mzml_files(rdata) | 461 mzml_tmp_dir <- gather_mzml_files(rdata) |
442 || options$`not-show-percent` == FALSE | 468 || options$`not-show-percent` == FALSE |
443 ) | 469 ) |
444 error <- tryCatch({ | 470 error <- tryCatch({ |
445 process_sample_list( | 471 process_sample_list( |
446 orm, rdata, samples, | 472 orm, rdata, samples, |
447 show_percent=show_percent | 473 show_percent = show_percent, |
474 file_grouping_var = options$class | |
448 ) | 475 ) |
449 NULL | 476 NULL |
450 }, error=function(e) { | 477 }, error = function(e) { |
451 message(e) | 478 message(e) |
452 e | 479 e |
453 }) | 480 }) |
454 if (!is.null(mzml_tmp_dir)) { | 481 if (!is.null(mzml_tmp_dir)) { |
455 unlink(mzml_tmp_dir, recursive=TRUE) | 482 unlink(mzml_tmp_dir, recursive = TRUE) |
456 } | 483 } |
457 if (!is.null(error)) { | 484 if (!is.null(error)) { |
458 stop(error) | 485 stop(error) |
459 } | 486 } |
460 } | 487 } |
461 | 488 |
462 gather_mzml_files <- function(rdata) { | 489 gather_mzml_files <- function(rdata) { |
463 if (is.null(rdata$singlefile)) { | 490 if (is.null(rdata$singlefile)) { |
464 message("Extracting mxml files") | 491 message("Extracting mxml files") |
465 tmp <- tempdir() | 492 tmp <- tempdir() |
466 rdata$singlefile <- utils::unzip(rdata$zipfile, exdir=tmp) | 493 rdata$singlefile <- utils::unzip(rdata$zipfile, exdir = tmp) |
467 names(rdata$singlefile) <- tools::file_path_sans_ext(basename(rdata$singlefile)) | 494 names(rdata$singlefile) <- tools::file_path_sans_ext( |
495 basename(rdata$singlefile) | |
496 ) | |
468 message("Extracted") | 497 message("Extracted") |
469 return (tmp) | 498 return(tmp) |
470 } else { | 499 } else { |
471 message(sprintf("Not a zip file, loading files directly from path: %s", paste(rdata$singlefile, collapse=" ; "))) | 500 message(sprintf( |
472 } | 501 "Not a zip file, loading files directly from path: %s", |
473 return (NULL) | 502 paste(rdata$singlefile, collapse = " ; ") |
474 } | 503 )) |
475 | 504 } |
476 process_sample_list <- function(orm, radta, sample_names, show_percent) { | 505 return(NULL) |
477 file_grouping_var <- find_grouping_var(rdata$variableMetadata) | 506 } |
507 | |
508 process_sample_list <- function( | |
509 orm, | |
510 rdata, | |
511 sample_names, | |
512 show_percent, | |
513 file_grouping_var = NULL | |
514 ) { | |
515 if (is.null(file_grouping_var)) { | |
516 file_grouping_var <- find_grouping_var(rdata$variableMetadata) | |
517 if (is.null(file_grouping_var)) { | |
518 stop("Malformed variableMetada.") | |
519 } | |
520 } | |
521 tryCatch({ | |
522 headers <- colnames(rdata$variableMetadata) | |
523 file_grouping_var <- headers[[as.numeric(file_grouping_var)]] | |
524 }, error = function(e) NULL) | |
525 if ( | |
526 is.null(file_grouping_var) | |
527 || !(file_grouping_var %in% colnames(rdata$variableMetadata)) | |
528 ) { | |
529 stop(sprintf( | |
530 "Could not find grouping variable %s in var meta file.", | |
531 file_grouping_var | |
532 )) | |
533 } | |
478 message("Processing samples.") | 534 message("Processing samples.") |
479 message(sprintf("File grouping variable: %s", file_grouping_var)) | 535 message(sprintf("File grouping variable: %s", file_grouping_var)) |
480 if(is.null(file_grouping_var)) { | |
481 stop("Malformed variableMetada.") | |
482 } | |
483 | 536 |
484 context <- new.env() | 537 context <- new.env() |
485 context$samples <- list() | 538 context$samples <- list() |
486 context$peaks <- rdata$xa@xcmsSet@peaks | 539 context$peaks <- rdata$xa@xcmsSet@peaks |
487 context$groupidx <- rdata$xa@xcmsSet@groupidx | 540 context$groupidx <- rdata$xa@xcmsSet@groupidx |
490 process_arg_list <- rdata$listOFlistArguments | 543 process_arg_list <- rdata$listOFlistArguments |
491 var_meta <- rdata$variableMetadata | 544 var_meta <- rdata$variableMetadata |
492 | 545 |
493 process_params <- list() | 546 process_params <- list() |
494 if (is.null(process_arg_list)) { | 547 if (is.null(process_arg_list)) { |
495 histories <- list() | |
496 for (history in xcms_set@.processHistory) { | 548 for (history in xcms_set@.processHistory) { |
497 if ( | 549 if ( |
498 class(history@param) == "CentWaveParam" | 550 class(history@param) == "CentWaveParam" |
499 && history@type == "Peak detection" | 551 && history@type == "Peak detection" |
500 ) { | 552 ) { |
501 params <- history@param | 553 params <- history@param |
502 process_params <- list(list( | 554 process_params <- list(list( |
503 xfunction="annotatediff", | 555 xfunction = "annotatediff", |
504 ppm=params@ppm, | 556 ppm = params@ppm, |
505 peakwidth=sprintf("%s - %s", params@peakwidth[[1]], params@peakwidth[[2]]), | 557 peakwidth = sprintf( |
506 snthresh=params@snthresh, | 558 "%s - %s", |
507 prefilterStep=params@prefilter[[1]], | 559 params@peakwidth[[1]], |
508 prefilterLevel=params@prefilter[[2]], | 560 params@peakwidth[[2]] |
509 mzdiff=params@mzdiff, | 561 ), |
510 fitgauss=params@fitgauss, | 562 snthresh = params@snthresh, |
511 noise=params@noise, | 563 prefilterStep = params@prefilter[[1]], |
512 mzCenterFun=params@mzCenterFun, | 564 prefilterLevel = params@prefilter[[2]], |
513 integrate=params@integrate, | 565 mzdiff = params@mzdiff, |
514 firstBaselineCheck=params@firstBaselineCheck, | 566 fitgauss = params@fitgauss, |
515 snthreshIsoROIs=!identical(params@roiScales, numeric(0)) | 567 noise = params@noise, |
568 mzCenterFun = params@mzCenterFun, | |
569 integrate = params@integrate, | |
570 firstBaselineCheck = params@firstBaselineCheck, | |
571 snthreshIsoROIs = !identical(params@roiScales, numeric(0)) | |
516 )) | 572 )) |
517 break | 573 break |
518 } | 574 } |
519 } | 575 } |
520 } else { | 576 } else { |
521 for (list_name in names(process_arg_list)) { | 577 for (list_name in names(process_arg_list)) { |
522 param_list <- list() | 578 param_list <- list() |
523 for (param_name in names(process_arg_list[[list_name]])) { | 579 for (param_name in names(process_arg_list[[list_name]])) { |
524 param_list[[param_name]] <- process_arg_list[[list_name]][[param_name]] | 580 param_list[[param_name]] <- process_arg_list[[ |
581 list_name | |
582 ]][[param_name]] | |
525 } | 583 } |
526 process_params[[length(process_params)+1]] <- param_list | 584 process_params[[length(process_params) + 1]] <- param_list |
527 } | 585 } |
528 } | 586 } |
529 | 587 |
530 message("Parameters from previous processes extracted.") | 588 message("Parameters from previous processes extracted.") |
531 | 589 |
532 | 590 |
533 indices <- as.numeric(unique(var_meta[, file_grouping_var])) | 591 indices <- as.numeric(unique(var_meta[, file_grouping_var])) |
592 if (any(is.null(names(singlefile)[indices]))) { | |
593 stop(sprintf( | |
594 paste( | |
595 "Indices defined by grouping variable %s are not all present", | |
596 "in singlefile names (%s).\nCannot continue. Indices: %s" | |
597 ), | |
598 file_grouping_var, | |
599 paste(names(singlefile), collapse = ", "), | |
600 paste(indices, collapse = ", ") | |
601 )) | |
602 } | |
534 smol_xcms_set <- orm$smol_xcms_set() | 603 smol_xcms_set <- orm$smol_xcms_set() |
535 mz_tab_info <- new.env() | 604 mz_tab_info <- new.env() |
536 g <- xcms::groups(xcms_set) | 605 g <- xcms::groups(xcms_set) |
537 mz_tab_info$group_length <- nrow(g) | 606 mz_tab_info$group_length <- nrow(g) |
538 mz_tab_info$dataset_path <- xcms::filepaths(xcms_set) | 607 mz_tab_info$dataset_path <- xcms::filepaths(xcms_set) |
539 mz_tab_info$sampnames <- xcms::sampnames(xcms_set) | 608 mz_tab_info$sampnames <- xcms::sampnames(xcms_set) |
540 mz_tab_info$sampclass <- xcms::sampclass(xcms_set) | 609 mz_tab_info$sampclass <- xcms::sampclass(xcms_set) |
541 mz_tab_info$rtmed <- g[,"rtmed"] | 610 mz_tab_info$rtmed <- g[, "rtmed"] |
542 mz_tab_info$mzmed <- g[,"mzmed"] | 611 mz_tab_info$mzmed <- g[, "mzmed"] |
543 mz_tab_info$smallmolecule_abundance_assay <- xcms::groupval(xcms_set, value="into") | 612 mz_tab_info$smallmolecule_abundance_assay <- xcms::groupval( |
544 blogified <- blob::blob(fst::compress_fst(serialize(mz_tab_info, NULL), compression=100)) | 613 xcms_set, |
614 value = "into" | |
615 ) | |
616 blogified <- blob::blob(fst::compress_fst( | |
617 serialize(mz_tab_info, NULL), | |
618 compression = 100 | |
619 )) | |
545 rm(mz_tab_info) | 620 rm(mz_tab_info) |
546 | 621 |
547 invisible(smol_xcms_set$set_raw(blogified)$save()) | 622 invisible(smol_xcms_set$set_raw(blogified)$save()) |
548 smol_xcms_set_id <- smol_xcms_set$get_id() | 623 smol_xcms_set_id <- smol_xcms_set$get_id() |
549 rm(smol_xcms_set) | 624 rm(smol_xcms_set) |
580 env$enriched_rdata_version <- ENRICHED_RDATA_VERSION | 655 env$enriched_rdata_version <- ENRICHED_RDATA_VERSION |
581 env$tool_name <- TOOL_NAME | 656 env$tool_name <- TOOL_NAME |
582 env$enriched_rdata_doc <- ENRICHED_RDATA_DOC | 657 env$enriched_rdata_doc <- ENRICHED_RDATA_DOC |
583 | 658 |
584 sample <- add_sample_to_database(orm, env, context, smol_xcms_set_id) | 659 sample <- add_sample_to_database(orm, env, context, smol_xcms_set_id) |
585 rm (env) | 660 rm(env) |
586 context$samples[no] <- sample$get_id() | 661 context$samples[no] <- sample$get_id() |
587 rm (sample) | 662 rm(sample) |
588 } | 663 } |
589 context$clusters <- list() | 664 context$clusters <- list() |
590 context$show_percent <- show_percent | 665 context$show_percent <- show_percent |
591 context$cluster_mean_rt_abundance <- list() | 666 context$cluster_mean_rt_abundance <- list() |
592 context$central_feature <- list() | 667 context$central_feature <- list() |
595 clusters <- context$clusters | 670 clusters <- context$clusters |
596 rm(context) | 671 rm(context) |
597 message("Features enrichment") | 672 message("Features enrichment") |
598 complete_features(orm, clusters, show_percent) | 673 complete_features(orm, clusters, show_percent) |
599 message("Features enrichment done.") | 674 message("Features enrichment done.") |
600 return (NULL) | 675 return(NULL) |
601 } | 676 } |
602 | 677 |
603 find_grouping_var <- function(var_meta) { | 678 find_grouping_var <- function(var_meta) { |
604 known_colnames = c( | 679 known_colnames <- c( |
605 "name", "namecustom", "mz", "mzmin", "mzmax", | 680 "name", "namecustom", "mz", "mzmin", "mzmax", |
606 "rt", "rtmin", "rtmax", "npeaks", "isotopes", "adduct", "pcgroup" | 681 "rt", "rtmin", "rtmax", "npeaks", "isotopes", "adduct", |
682 "pcgroup", "ms_level" | |
607 ) | 683 ) |
608 col_names <- colnames(var_meta) | 684 col_names <- colnames(var_meta) |
609 classes = list() | 685 classes <- list() |
610 for (name in col_names) { | 686 for (name in col_names) { |
611 if (!(name %in% known_colnames)) { | 687 if (!(name %in% known_colnames)) { |
612 classes[[length(classes)+1]] = name | 688 classes[[length(classes) + 1]] <- name |
613 } | 689 } |
614 } | 690 } |
615 if (length(classes) > 1) { | 691 if (length(classes) > 1) { |
616 stop(sprintf("Only one class expected in the variable metadata. Found %d .", length(classes))) | 692 stop(sprintf( |
693 "Only one class expected in the variable metadata. Found %d .", | |
694 length(classes) | |
695 )) | |
617 } | 696 } |
618 if (length(classes) == 0) { | 697 if (length(classes) == 0) { |
619 stop("Could not find any class column in your variableMetadata.") | 698 stop("Could not find any class column in your variableMetadata.") |
620 } | 699 } |
621 return (classes[[1]]) | 700 return(classes[[1]]) |
622 } | 701 } |
623 | 702 |
624 add_sample_to_database <- function(orm, env, context, smol_xcms_set_id) { | 703 add_sample_to_database <- function(orm, env, context, smol_xcms_set_id) { |
625 message(sprintf("Processing sample %s", env$sample_name)) | 704 message(sprintf("Processing sample %s", env$sample_name)) |
626 sample <- ( | 705 sample <- ( |
627 orm$sample() | 706 orm$sample() |
628 $set_name(env$sample_name) | 707 $set_name(env$sample_name) |
629 $set_path(env$dataset_path) | 708 $set_path(env$dataset_path) |
630 $set_kind("enriched_rdata") | 709 $set_kind("enriched_rdata") |
631 $set_polarity( | 710 $set_polarity( |
632 if (is.null(env$polarity) || identical(env$polarity, character(0))) "" | 711 if ( |
712 is.null(env$polarity) | |
713 || identical(env$polarity, character(0)) | |
714 ) "" | |
633 else env$polarity | 715 else env$polarity |
634 ) | 716 ) |
635 $set_raw(blob::blob(fst::compress_fst( | 717 $set_raw(blob::blob(fst::compress_fst( |
636 serialize(env, NULL), | 718 serialize(env, NULL), |
637 compression=100 | 719 compression = 100 |
638 ))) | 720 ))) |
639 ) | 721 ) |
640 sample[["smol_xcms_set_id"]] <- smol_xcms_set_id | 722 sample[["smol_xcms_set_id"]] <- smol_xcms_set_id |
641 sample$modified__[["smol_xcms_set_id"]] <- smol_xcms_set_id | 723 sample$modified__[["smol_xcms_set_id"]] <- smol_xcms_set_id |
642 sample <- sample$save() | 724 sample <- sample$save() |
643 load_process_params(orm, sample, env$process_params) | 725 load_process_params(orm, sample, env$process_params) |
644 message(sprintf("Sample %s inserted.", env$sample_name)) | 726 message(sprintf("Sample %s inserted.", env$sample_name)) |
645 return (sample) | 727 return(sample) |
646 } | 728 } |
647 | 729 |
648 | 730 |
649 load_variable_metadata <- function(orm, var_meta, context) { | 731 load_variable_metadata <- function(orm, var_meta, context) { |
650 all_clusters <- orm$cluster()$all() | 732 all_clusters <- orm$cluster()$all() |
658 orm, var_meta, context, | 740 orm, var_meta, context, |
659 next_feature_id, next_cluster_id, | 741 next_feature_id, next_cluster_id, |
660 next_pc_group, next_align_group | 742 next_pc_group, next_align_group |
661 )) | 743 )) |
662 message("Extracting features done.") | 744 message("Extracting features done.") |
663 return (NULL) | 745 return(NULL) |
664 } | 746 } |
665 | 747 |
666 get_next_id <- function(models, attribute) { | 748 get_next_id <- function(models, attribute) { |
667 if ((id <- models$max(attribute)) == Inf || id == -Inf) { | 749 if ((id <- models$max(attribute)) == Inf || id == -Inf) { |
668 return (0) | 750 return(0) |
669 } | 751 } |
670 return (id) | 752 return(id) |
671 } | 753 } |
672 | 754 |
673 create_features <- function( | 755 create_features <- function( |
674 orm, var_meta, context, | 756 orm, var_meta, context, |
675 next_feature_id, next_cluster_id, | 757 next_feature_id, next_cluster_id, |
676 next_pc_group, next_align_group | 758 next_pc_group, next_align_group |
677 ) { | 759 ) { |
678 field_names <- as.list(names(orm$feature()$fields__)) | 760 field_names <- as.list(names(orm$feature()$fields__)) |
679 field_names[field_names=="id"] <- NULL | 761 field_names[field_names == "id"] <- NULL |
680 | 762 |
681 features <- list() | 763 features <- list() |
682 dummy_feature <- orm$feature() | 764 dummy_feature <- orm$feature() |
683 | 765 |
684 if (show_percent <- context$show_percent) { | 766 if (show_percent <- context$show_percent) { |
686 total <- nrow(var_meta) | 768 total <- nrow(var_meta) |
687 } | 769 } |
688 rows <- seq_len(nrow(var_meta)) | 770 rows <- seq_len(nrow(var_meta)) |
689 if (PROCESS_SMOL_BATCH) { | 771 if (PROCESS_SMOL_BATCH) { |
690 | 772 |
691 rows <- rows[1:as.integer(FAST_FEATURE_RATIO/100.0 * length(rows))] | 773 rows <- rows[1:as.integer(FAST_FEATURE_RATIO / 100.0 * length(rows))] |
692 } | 774 } |
693 cluster_row <- list() | 775 cluster_row <- list() |
694 for (row in rows) { | 776 for (row in rows) { |
695 if (show_percent && (row / total) * 100 > percent) { | 777 if (show_percent && (row / total) * 100 > percent) { |
696 percent <- percent + 1 | 778 percent <- percent + 1 |
697 message("\r", sprintf("\r%d %%", percent), appendLF=FALSE) | 779 message("\r", sprintf("\r%d %%", percent), appendLF = FALSE) |
698 } | 780 } |
699 | 781 |
700 dummy_feature$set_featureID(next_feature_id) | 782 dummy_feature$set_featureID(next_feature_id) |
701 next_feature_id <- next_feature_id + 1 | 783 next_feature_id <- next_feature_id + 1 |
702 | 784 |
708 context$clusterID <- clusterID | 790 context$clusterID <- clusterID |
709 dummy_feature$set_iso(iso) | 791 dummy_feature$set_iso(iso) |
710 | 792 |
711 peak_list <- context$peaks[context$groupidx[[row]], ] | 793 peak_list <- context$peaks[context$groupidx[[row]], ] |
712 if (! ("matrix" %in% class(peak_list))) { | 794 if (! ("matrix" %in% class(peak_list))) { |
713 peak_list <- matrix(peak_list, nrow=1, ncol=length(peak_list), dimnames=list(c(), names(peak_list))) | 795 peak_list <- matrix( |
796 peak_list, | |
797 nrow = 1, | |
798 ncol = length(peak_list), | |
799 dimnames = list(c(), names(peak_list)) | |
800 ) | |
714 } | 801 } |
715 | 802 |
716 clusterID <- as.character(clusterID) | 803 clusterID <- as.character(clusterID) |
717 if (is.null(context$central_feature[[clusterID]])) { | 804 if (is.null(context$central_feature[[clusterID]])) { |
718 int_o <- extract_peak_var(peak_list, "into") | 805 int_o <- extract_peak_var(peak_list, "into") |
719 context$central_feature[[clusterID]] <- ( | 806 context$central_feature[[clusterID]] <- ( |
720 peak_list[peak_list[, "into"] == int_o,]["sample"] | 807 peak_list[peak_list[, "into"] == int_o, ]["sample"] |
721 ) | 808 ) |
722 } | 809 } |
723 | 810 |
724 if (!DEBUG_FAST_IGNORE_SLOW_OP) { | 811 if (!DEBUG_FAST_IGNORE_SLOW_OP) { |
725 sample_peak_list <- peak_list[as.integer(peak_list[, "sample"]) == context$central_feature[[clusterID]], , drop=FALSE] | 812 central_feature <- context$central_feature[[clusterID]] |
726 if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) { | 813 sample_peak_list <- peak_list[ |
727 if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) { | 814 as.integer(peak_list[, "sample"]) == central_feature, |
815 , | |
816 drop = FALSE | |
817 ] | |
818 if ( | |
819 !identical(sample_peak_list, numeric(0)) | |
820 && !is.null(nrow(sample_peak_list)) | |
821 && nrow(sample_peak_list) != 0 | |
822 ) { | |
823 int_o <- extract_peak_var(sample_peak_list, "into") | |
824 if (!is.na(int_o)) { | |
728 dummy_feature$set_int_o(int_o) | 825 dummy_feature$set_int_o(int_o) |
729 } | 826 } |
730 if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) { | 827 int_b <- extract_peak_var(sample_peak_list, "intb") |
828 if (!is.na(int_b)) { | |
731 dummy_feature$set_int_b(int_b) | 829 dummy_feature$set_int_b(int_b) |
732 } | 830 } |
733 if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) { | 831 max_o <- extract_peak_var(sample_peak_list, "maxo") |
832 if (!is.na(max_o)) { | |
734 dummy_feature$set_max_o(max_o) | 833 dummy_feature$set_max_o(max_o) |
735 } | 834 } |
736 } | 835 } |
737 } | 836 } |
738 | 837 |
742 dummy_feature, clusterID, | 841 dummy_feature, clusterID, |
743 context, curent_var_meta, next_pc_group, | 842 context, curent_var_meta, next_pc_group, |
744 next_align_group | 843 next_align_group |
745 ) | 844 ) |
746 next_align_group <- next_align_group + 1 | 845 next_align_group <- next_align_group + 1 |
747 features[[length(features)+1]] <- as.list(dummy_feature, field_names) | 846 features[[length(features) + 1]] <- as.list(dummy_feature, field_names) |
748 dummy_feature$clear() | 847 dummy_feature$clear() |
749 } | 848 } |
750 rm(var_meta) | 849 rm(var_meta) |
751 message("") | 850 message("") |
752 message("Saving features") | 851 message("Saving features") |
753 invisible(dummy_feature$save(bulk=features)) | 852 invisible(dummy_feature$save(bulk = features)) |
754 | 853 |
755 ## We link manually clusters to the sample they're in. | 854 ## We link manually clusters to the sample they're in. |
756 link_cache <- list() | 855 link_cache <- list() |
757 for (row in rows) { | 856 for (row in rows) { |
758 sample_nos <- unique(context$peaks[context$groupidx[[row]], "sample"]) | 857 sample_nos <- unique(context$peaks[context$groupidx[[row]], "sample"]) |
759 for (sample_id in context$samples[sample_nos]) { | 858 for (sample_id in context$samples[sample_nos]) { |
760 cluster_id <- cluster_row[[row]]$get_id() | 859 cluster_id <- cluster_row[[row]]$get_id() |
761 if (is.null(link_cache[[id <- paste(sample_id, cluster_id, sep=";")]])) { | 860 id <- paste(sample_id, cluster_id, sep = ";") |
861 if (is.null(link_cache[[id]])) { | |
762 link_cache[[id]] <- 1 | 862 link_cache[[id]] <- 1 |
763 orm$cluster_sample( | 863 orm$cluster_sample( |
764 sample_id=sample_id, | 864 sample_id = sample_id, |
765 cluster_id=cluster_id | 865 cluster_id = cluster_id |
766 )$save() | 866 )$save() |
767 } | 867 } |
768 } | 868 } |
769 } | 869 } |
770 | 870 |
771 message("Saved.") | 871 message("Saved.") |
772 return (context$clusters) | 872 return(context$clusters) |
773 } | 873 } |
774 | 874 |
775 extract_peak_var <- function(peak_list, var_name, selector=max) { | 875 extract_peak_var <- function(peak_list, var_name, selector = max) { |
776 value <- peak_list[, var_name] | 876 value <- peak_list[, var_name] |
777 names(value) <- NULL | 877 names(value) <- NULL |
778 return (selector(value)) | 878 return(selector(value)) |
779 } | 879 } |
780 | 880 |
781 set_feature_fields_from_var_meta <- function(feature, var_meta) { | 881 set_feature_fields_from_var_meta <- function(feature, var_meta) { |
782 if (!is.null(mz <- var_meta[["mz"]]) && !is.na(mz)) { | 882 if (!is.null(mz <- var_meta[["mz"]]) && !is.na(mz)) { |
783 feature$set_mz(mz) | 883 feature$set_mz(mz) |
798 feature$set_rt_max(rtmax) | 898 feature$set_rt_max(rtmax) |
799 } | 899 } |
800 if (!is.null(isotopes <- var_meta[["isotopes"]]) && !is.na(isotopes)) { | 900 if (!is.null(isotopes <- var_meta[["isotopes"]]) && !is.na(isotopes)) { |
801 feature$set_iso(isotopes) | 901 feature$set_iso(isotopes) |
802 } | 902 } |
803 return (feature) | 903 return(feature) |
804 } | 904 } |
805 | 905 |
806 extract_iso <- function(weird_data) { | 906 extract_iso <- function(weird_data) { |
807 if (grepl("^\\[\\d+\\]", weird_data)[[1]]) { | 907 if (grepl("^\\[\\d+\\]", weird_data)[[1]]) { |
808 return (sub("^\\[\\d+\\]", "", weird_data, perl=TRUE)) | 908 return(sub("^\\[\\d+\\]", "", weird_data, perl = TRUE)) |
809 } | 909 } |
810 return (weird_data) | 910 return(weird_data) |
811 } | 911 } |
812 | 912 |
813 extract_clusterID <- function(weird_data, next_cluster_id){ | 913 extract_clusterID <- function(weird_data, next_cluster_id) { |
814 if (grepl("^\\[\\d+\\]", weird_data)[[1]]) { | 914 if (grepl("^\\[\\d+\\]", weird_data)[[1]]) { |
815 clusterID <- stringr::str_extract(weird_data, "^\\[\\d+\\]") | 915 clusterID <- stringr::str_extract(weird_data, "^\\[\\d+\\]") |
816 clusterID <- as.numeric(stringr::str_extract(clusterID, "\\d+")) | 916 clusterID <- as.numeric(stringr::str_extract(clusterID, "\\d+")) |
817 } else { | 917 } else { |
818 clusterID <- 0 | 918 clusterID <- 0 |
819 } | 919 } |
820 return (clusterID + next_cluster_id) | 920 return(clusterID + next_cluster_id) |
821 } | 921 } |
822 | 922 |
823 create_associated_cluster <- function( | 923 create_associated_cluster <- function( |
824 orm, | 924 orm, |
825 main_sample_id, feature, clusterID, | 925 main_sample_id, feature, clusterID, |
829 if (is.null(cluster <- context$clusters[[clusterID]])) { | 929 if (is.null(cluster <- context$clusters[[clusterID]])) { |
830 pcgroup <- as.numeric(curent_var_meta[["pcgroup"]]) | 930 pcgroup <- as.numeric(curent_var_meta[["pcgroup"]]) |
831 adduct_name <- as.character(curent_var_meta[["adduct"]]) | 931 adduct_name <- as.character(curent_var_meta[["adduct"]]) |
832 annotation <- curent_var_meta[["isotopes"]] | 932 annotation <- curent_var_meta[["isotopes"]] |
833 cluster <- context$clusters[[clusterID]] <- orm$cluster( | 933 cluster <- context$clusters[[clusterID]] <- orm$cluster( |
834 pc_group=pcgroup + next_pc_group, | 934 pc_group = pcgroup + next_pc_group, |
835 # adduct=adduct, | 935 # adduct=adduct, |
836 align_group=next_align_group, | 936 align_group = next_align_group, |
837 # curent_group=curent_group, | 937 # curent_group=curent_group, |
838 clusterID=context$clusterID, | 938 clusterID = context$clusterID, |
839 annotation=annotation | 939 annotation = annotation |
840 ) | 940 ) |
841 if (is.null(adduct <- context$adducts[[adduct_name]])) { | 941 if (is.null(adduct <- context$adducts[[adduct_name]])) { |
842 context$adducts[[adduct_name]] <- orm$adduct()$load_by(name=adduct_name)$first() | 942 context$adducts[[adduct_name]] <- orm$adduct()$load_by( |
943 name = adduct_name | |
944 )$first() | |
843 if (is.null(adduct <- context$adducts[[adduct_name]])) { | 945 if (is.null(adduct <- context$adducts[[adduct_name]])) { |
844 adduct <- context$adducts[[adduct_name]] <- orm$adduct(name=adduct_name, charge=0) | 946 adduct <- context$adducts[[adduct_name]] <- orm$adduct( |
947 name = adduct_name, | |
948 charge = 0 | |
949 ) | |
845 adduct$save() | 950 adduct$save() |
846 } | 951 } |
847 } | 952 } |
848 cluster$set_adduct(adduct) | 953 cluster$set_adduct(adduct) |
849 ## Crappy hack to assign sample id to cluster without loading the sample. | 954 ## Crappy hack to assign sample id to cluster without loading the |
850 ## Samples are too big (their sample$env) and slows the process, and eat all the menory | 955 ## sample. Samples are too big (their sample$env) and slows the |
851 ## so we dont't want to load them. | 956 ## process, and eat all the menory so we dont't want to load them. |
852 cluster[["sample_id"]] <- main_sample_id | 957 cluster[["sample_id"]] <- main_sample_id |
853 cluster$modified__[["sample_id"]] <- main_sample_id | 958 cluster$modified__[["sample_id"]] <- main_sample_id |
854 } else { | 959 } else { |
855 if (context$clusterID != 0 && cluster$get_clusterID() == 0) { | 960 if (context$clusterID != 0 && cluster$get_clusterID() == 0) { |
856 cluster$set_clusterID(context$clusterID) | 961 cluster$set_clusterID(context$clusterID) |
857 } | 962 } |
858 } | 963 } |
859 cluster$save() | 964 cluster$save() |
860 feature$set_cluster(cluster) | 965 feature$set_cluster(cluster) |
861 return (cluster) | 966 return(cluster) |
862 } | 967 } |
863 | 968 |
864 complete_features <- function(orm, clusters, show_percent) { | 969 complete_features <- function(orm, clusters, show_percent) { |
865 total <- length(clusters) | 970 total <- length(clusters) |
866 percent <- -1 | 971 percent <- -1 |
867 i <- 0 | 972 i <- 0 |
868 for (cluster in clusters) { | 973 for (cluster in clusters) { |
869 i <- i+1 | 974 i <- i + 1 |
870 if (show_percent && (i / total) * 100 > percent) { | 975 if (show_percent && (i / total) * 100 > percent) { |
871 percent <- percent + 1 | 976 percent <- percent + 1 |
872 message("\r", sprintf("\r%d %%", percent), appendLF=FALSE) | 977 message("\r", sprintf("\r%d %%", percent), appendLF = FALSE) |
873 } | 978 } |
874 features <- orm$feature()$load_by(cluster_id=cluster$get_id()) | 979 features <- orm$feature()$load_by(cluster_id = cluster$get_id()) |
875 if (features$any()) { | 980 if (features$any()) { |
876 if (!is.null(rt <- features$mean("rt"))) { | 981 if (!is.null(rt <- features$mean("rt"))) { |
877 cluster$set_mean_rt(rt)$save() | 982 cluster$set_mean_rt(rt)$save() |
878 } | 983 } |
879 features_df <- as.data.frame(features) | 984 features_df <- as.data.frame(features) |
880 central_feature <- features_df[grepl("^\\[M\\]", features_df[, "iso"]), ] | 985 central_feature <- features_df[ |
986 grepl("^\\[M\\]", features_df[, "iso"]), | |
987 ] | |
881 central_feature_into <- central_feature[["int_o"]] | 988 central_feature_into <- central_feature[["int_o"]] |
882 if (!identical(central_feature_into, numeric(0)) && central_feature_into != 0) { | 989 if ( |
990 !identical(central_feature_into, numeric(0)) | |
991 && central_feature_into != 0 | |
992 ) { | |
883 for (feature in as.vector(features)) { | 993 for (feature in as.vector(features)) { |
884 feature$set_abundance( | 994 feature$set_abundance( |
885 feature$get_int_o() / central_feature_into * 100 | 995 feature$get_int_o() / central_feature_into * 100 |
886 )$save() | 996 )$save() |
887 } | 997 } |
888 } | 998 } |
889 } | 999 } |
890 } | 1000 } |
891 return (NULL) | 1001 return(NULL) |
892 } | 1002 } |
893 | 1003 |
894 load_process_params <- function(orm, sample, params) { | 1004 load_process_params <- function(orm, sample, params) { |
895 for (param_list in params) { | 1005 for (param_list in params) { |
896 if (is.null(param_list[["xfunction"]])) { | 1006 if (is.null(param_list[["xfunction"]])) { |
898 } | 1008 } |
899 if (param_list[["xfunction"]] == "annotatediff") { | 1009 if (param_list[["xfunction"]] == "annotatediff") { |
900 load_process_params_peak_picking(orm, sample, param_list) | 1010 load_process_params_peak_picking(orm, sample, param_list) |
901 } | 1011 } |
902 } | 1012 } |
903 return (sample) | 1013 return(sample) |
904 } | 1014 } |
905 | 1015 |
906 load_process_params_peak_picking <- function(orm, sample, peak_picking_params) { | 1016 load_process_params_peak_picking <- function( |
907 return (add_sample_process_parameters( | 1017 orm, |
908 params=peak_picking_params, | 1018 sample, |
909 params_translation=list( | 1019 peak_picking_params |
910 ppm="ppm", | 1020 ) { |
911 maxcharge="maxCharge", | 1021 return(add_sample_process_parameters( |
912 maxiso="maxIso" | 1022 params = peak_picking_params, |
1023 params_translation = list( | |
1024 ppm = "ppm", | |
1025 maxcharge = "maxCharge", | |
1026 maxiso = "maxIso" | |
913 ), | 1027 ), |
914 param_model_generator=orm$peak_picking_parameters, | 1028 param_model_generator = orm$peak_picking_parameters, |
915 sample_param_setter=sample$set_peak_picking_parameters | 1029 sample_param_setter = sample$set_peak_picking_parameters |
916 )) | 1030 )) |
917 } | 1031 } |
918 | 1032 |
919 add_sample_process_parameters <- function( | 1033 add_sample_process_parameters <- function( |
920 params, | 1034 params, |
935 params_model <- params_models$first() | 1049 params_model <- params_models$first() |
936 } else { | 1050 } else { |
937 params_model <- do.call(param_model_generator, model_params) | 1051 params_model <- do.call(param_model_generator, model_params) |
938 params_model$save() | 1052 params_model$save() |
939 } | 1053 } |
940 return (sample_param_setter(params_model)$save()) | 1054 return(sample_param_setter(params_model)$save()) |
941 } | 1055 } |
942 | 1056 |
943 | 1057 |
944 library(optparse) | 1058 library(optparse) |
945 | 1059 |
946 option_list <- list( | 1060 option_list <- list( |
947 optparse::make_option( | 1061 optparse::make_option( |
948 c("-v", "--version"), | 1062 c("-v", "--version"), |
949 action="store_true", | 1063 action = "store_true", |
950 help="Display this tool's version and exits" | 1064 help = "Display this tool's version and exits" |
951 ), | 1065 ), |
952 optparse::make_option( | 1066 optparse::make_option( |
953 c("-i", "--input"), | 1067 c("-i", "--input"), |
954 type="character", | 1068 type = "character", |
955 help="The rdata path to import in XSeeker" | 1069 help = "The rdata path to import in XSeeker" |
956 ), | 1070 ), |
957 optparse::make_option( | 1071 optparse::make_option( |
958 c("-s", "--samples"), | 1072 c("-s", "--samples"), |
959 type="character", | 1073 type = "character", |
960 help="Samples to visualise in XSeeker" | 1074 help = "Samples to visualise in XSeeker" |
961 ), | 1075 ), |
962 optparse::make_option( | 1076 optparse::make_option( |
963 c("-B", "--archetype"), | 1077 c("-B", "--archetype"), |
964 type="character", | 1078 type = "character", |
965 help="The name of the base database" | 1079 help = "The name of the base database" |
966 ), | 1080 ), |
967 optparse::make_option( | 1081 optparse::make_option( |
968 c("-b", "--database"), | 1082 c("-b", "--database"), |
969 type="character", | 1083 type = "character", |
970 help="The base database's path" | 1084 help = "The base database's path" |
971 ), | 1085 ), |
972 optparse::make_option( | 1086 optparse::make_option( |
973 c("-c", "--compounds-csv"), | 1087 c("-c", "--compounds-csv"), |
974 type="character", | 1088 type = "character", |
975 help="The csv containing compounds" | 1089 help = "The csv containing compounds" |
976 ), | 1090 ), |
977 optparse::make_option( | 1091 optparse::make_option( |
978 c("-m", "--models"), | 1092 c("-m", "--models"), |
979 type="character", | 1093 type = "character", |
980 help="The path or url (must begin with http[s]:// or git@) to the database's models" | 1094 help = paste( |
1095 "The path or url (must begin with http[s]:// or git@) to", | |
1096 "the database's models" | |
1097 ) | |
981 ), | 1098 ), |
982 optparse::make_option( | 1099 optparse::make_option( |
1100 c("-k", "--class"), | |
1101 type = "character", | |
1102 help = "The name of the column containing the classes" | |
1103 ), | |
1104 optparse::make_option( | |
983 c("-o", "--output"), | 1105 c("-o", "--output"), |
984 type="character", | 1106 type = "character", |
985 help="The path where to output sqlite" | 1107 help = "The path where to output sqlite" |
986 ), | 1108 ), |
987 optparse::make_option( | 1109 optparse::make_option( |
988 c("-P", "--not-show-percent"), | 1110 c("-P", "--not-show-percent"), |
989 action="store_true", | 1111 action = "store_true", |
990 help="Flag not to show the percents", | 1112 help = "Flag not to show the percents", |
991 default=FALSE | 1113 default = FALSE |
992 ) | 1114 ) |
993 ) | 1115 ) |
994 | 1116 |
995 options(error=function(){traceback(3)}) | 1117 options(error = function(){traceback(3)}) |
996 | 1118 |
997 parser <- OptionParser(usage="%prog [options] file", option_list=option_list) | 1119 parser <- OptionParser( |
998 args <- parse_args(parser, positional_arguments=0) | 1120 usage = "%prog [options] file", |
1121 option_list = option_list | |
1122 ) | |
1123 args <- parse_args(parser, positional_arguments = 0) | |
999 | 1124 |
1000 err_code <- 0 | 1125 err_code <- 0 |
1001 | 1126 |
1002 if (!is.null(args$options$version)) { | 1127 if (!is.null(args$options$version)) { |
1003 message(sprintf("%s %s", TOOL_NAME, VERSION)) | 1128 message(sprintf("%s %s", TOOL_NAME, VERSION)) |
1004 quit() | 1129 quit() |
1005 } | 1130 } |
1006 | 1131 |
1007 models <- get_models(args$options$models) | 1132 models <- get_models(args$options$models) |
1008 orm <- DBModelR::ORM( | 1133 orm <- DBModelR::ORM( |
1009 connection_params=list(dbname=args$options$output), | 1134 connection_params = list(dbname=args$options$output), |
1010 dbms="SQLite" | 1135 dbms = "SQLite" |
1011 ) | 1136 ) |
1012 | 1137 |
1013 invisible(orm$models(models)) | 1138 invisible(orm$models(models)) |
1014 invisible(create_database(orm)) | 1139 invisible(create_database(orm)) |
1015 | 1140 |
1021 insert_base_data(orm, args$options$database) | 1146 insert_base_data(orm, args$options$database) |
1022 } | 1147 } |
1023 message(sprintf("Base data inserted using %s.", args$options$database)) | 1148 message(sprintf("Base data inserted using %s.", args$options$database)) |
1024 | 1149 |
1025 if (!is.null(args$options$archetype)) { | 1150 if (!is.null(args$options$archetype)) { |
1026 insert_base_data(orm, args$options$archetype, archetype=TRUE) | 1151 insert_base_data(orm, args$options$archetype, archetype = TRUE) |
1027 } | 1152 } |
1028 if (!is.null(args$options$`compounds-csv`)) { | 1153 if (!is.null(args$options$`compounds-csv`)) { |
1029 insert_compounds(orm, args$options$`compounds-csv`) | 1154 insert_compounds(orm, args$options$`compounds-csv`) |
1030 } | 1155 } |
1031 | 1156 |
1036 | 1161 |
1037 load(args$options$input, rdata <- new.env()) | 1162 load(args$options$input, rdata <- new.env()) |
1038 | 1163 |
1039 process_rdata(orm, rdata, args$options) | 1164 process_rdata(orm, rdata, args$options) |
1040 | 1165 |
1041 quit(status=err_code) | 1166 quit(status = err_code) |
1042 | |
1043 |