xseekerpreparator: XSeekerPreparator.R comparison

comparison XSeekerPreparator.R @ 19:2937e72e5891 draft

" master branch Updating"

author	lain
date	Tue, 18 Oct 2022 12:57:28 +0000
parents	2c7e7fd1f740
children	ce94e7a141bb

comparison

equal deleted inserted replaced

-:2c7e7fd1f740
+:2937e72e5891
-TOOL_NAME <- "XSeekerPreparator"
+assign("TOOL_NAME", "XSeekerPreparator", envir = globalenv())
-VERSION <- "1.2.4"
+lockBinding("TOOL_NAME", globalenv())
+assign("VERSION", "1.3.0", envir = globalenv())
-DEBUG_FAST <- FALSE
+lockBinding("VERSION", globalenv())
-DEBUG_FAST_IGNORE_SLOW_OP <- DEBUG_FAST
+assign("DEBUG_FAST", FALSE, envir = globalenv())
-PROCESS_SMOL_BATCH <- DEBUG_FAST
+lockBinding("DEBUG_FAST", globalenv())
-FAST_FEATURE_RATIO <- 10
+assign("DEBUG_FAST_IGNORE_SLOW_OP", DEBUG_FAST, envir = globalenv())
+lockBinding("DEBUG_FAST_IGNORE_SLOW_OP", globalenv())
-OUTPUT_SPECIFIC_TOOL <- "XSeeker_Galaxy"
+assign("PROCESS_SMOL_BATCH", DEBUG_FAST, envir = globalenv())
+lockBinding("PROCESS_SMOL_BATCH", globalenv())
-ENRICHED_RDATA_VERSION <- paste("1.2.4", OUTPUT_SPECIFIC_TOOL, sep="-")
+assign("FAST_FEATURE_RATIO", 10, envir = globalenv())
-ENRICHED_RDATA_DOC <- sprintf("
+lockBinding("FAST_FEATURE_RATIO", globalenv())
+assign("OUTPUT_SPECIFIC_TOOL", "XSeeker_Galaxy", envir = globalenv())
+lockBinding("OUTPUT_SPECIFIC_TOOL", globalenv())
+assign(
+"ENRICHED_RDATA_VERSION",
+paste(VERSION, OUTPUT_SPECIFIC_TOOL, sep = "-"),
+envir = globalenv()
+)
+lockBinding("ENRICHED_RDATA_VERSION", globalenv())
+assign("ENRICHED_RDATA_DOC", sprintf("
 Welcome to the enriched <Version %s> of the output of CAMERA/xcms.
 This doc was generated by the tool: %s - Version %s
 To show the different variables contained in this rdata, type:
 - `load('this_rdata.rdata', rdata_env <- new.env())`
 - `names(rdata_env)`
 - Retrieval method: enriched_rdata <- TRUE
 - enriched_rdata_version:
 - Description: A flag created by that tool to tell which version of
 this tool has enriched the rdata.
-- Retrieval method: enriched_rdata_version <- sprintf(\"%s\", ENRICHED_RDATA_VERSION)
+- Retrieval method:
+enriched_rdata_version <- sprintf(
+\"%s\",
+ENRICHED_RDATA_VERSION
+)
 - enriched_rdata_doc:
 - Description: Contains the documentation string.
 Data from original mzxml file
 - xcms version: 2.0
 - polarity:
 - Description: Those are the polarity values from the original mzxml
 file, extracted using xcms 2.
-- Retrieval method: as.character(xcms::xcmsRaw('original_file.mzxml')@polarity[[1]])
+- Retrieval method:
+as.character(xcms::xcmsRaw(
+'original_file.mzxml'
+)@polarity[[1]])
 - xcms version: 2.0
 Data taken from incoming rdata
 ------
 - variableMetadata:
 - Retrieval method:
 ## just he same list, but simplified
 process_params <- list()
 for (list_name in names(rdata_file$listOFlistArguments)) {
 param_list <- list()
-for (param_name in names(rdata_file$listOFlistArguments[[list_name]])) {
+for (param_name in names(
-param_list[[param_name]] <- rdata_file$listOFlistArguments[[list_name]][[param_name]]
+rdata_file$listOFlistArguments[[list_name]]
+)) {
+param_list[[param_name]] <- rdata_file$listOFlistArguments[[
+list_name
+]][[param_name]]
 }
 process_params[[length(process_params)+1]] <- param_list
 }
-", ENRICHED_RDATA_VERSION, TOOL_NAME, VERSION, ENRICHED_RDATA_VERSION)
+", ENRICHED_RDATA_VERSION, TOOL_NAME, VERSION, ENRICHED_RDATA_VERSION),
+envir = globalenv())
+lockBinding("ENRICHED_RDATA_DOC", globalenv())
 get_models <- function(path) {
 if (is.null(path)) {
 } else {
 message(sprintf("Loading models from %s", path))
 }
 ## galaxy mangles the "@" to a "__at__"
 if (substr(path, 1, 9) == "git__at__") {
-path <- sub("^git__at__", "git@", path, perl=TRUE)
+path <- sub("^git__at__", "git@", path, perl = TRUE)
 }
 if (
 substr(path, 1, 4) == "git@"
-|| substr(path, length(path)-4, 4) == ".git"
+|| substr(path, length(path) - 4, 4) == ".git"
 ) {
-return (get_models_from_git(path))
+return(get_models_from_git(path))
 }
 if (substr(path, 1, 4) == "http") {
-return (get_models_from_url(path))
+return(get_models_from_url(path))
 }
-return (source(path)$value)
+return(source(path)$value)
 }
-get_models_from_git <- function (url, target_file="models.R", rm=TRUE) {
+get_models_from_git <- function(url, target_file = "models.R", rm = TRUE) {
 tmp <- tempdir()
 message(sprintf("Cloning %s", url))
 system2("git", c("clone", url, tmp))
 result <- search_tree(file.path(tmp, dir), target_file)
 if (!is.null(result)) {
 models <- source(result)$value
 if (rm) {
-unlink(tmp, recursive=TRUE)
+unlink(tmp, recursive = TRUE)
 }
-return (models)
+return(models)
 }
 if (rm) {
-unlink(tmp, recursive=TRUE)
+unlink(tmp, recursive = TRUE)
 }
 stop(sprintf(
 "Could not find any file named \"%s\" in this repo",
 target_file
 ))
 }
-get_models_from_url <- function (url, target_file="models.R", rm=TRUE) {
+get_models_from_url <- function(url, target_file = "models.R", rm = TRUE) {
 tmp <- tempdir()
 message(sprintf("Downloading %s", url))
 result <- file.path(tmp, target_file)
-if (download.file(url, destfile=result) == 0) {
+if (download.file(url, destfile = result) == 0) {
 models <- source(result)$value
 if (rm) {
-unlink(tmp, recursive=TRUE)
+unlink(tmp, recursive = TRUE)
 }
-return (models)
+return(models)
 }
 if (rm) {
-unlink(tmp, recursive=TRUE)
+unlink(tmp, recursive = TRUE)
 }
 stop("Could not download any file at this adress.")
 }
 search_tree <- function(path, target) {
 target <- tolower(target)
 for (file in list.files(path)) {
 if (is.dir(file)) {
 result <- search_tree(file.path(path, file), target)
 if (!is.null(result)) {
-return (result)
+return(result)
 }
 } else if (tolower(file) == target) {
-return (file.path(path, file))
+return(file.path(path, file))
 }
 }
-return (NULL)
+return(NULL)
 }
 create_database <- function(orm) {
-orm$recreate_database(no_exists=FALSE)
+orm$recreate_database(no_exists = FALSE)
 set_database_version(orm, "created")
 }
 insert_adducts <- function(orm) {
 message("Creating adducts...")
 adducts <- list(
-list("[M-H2O-H]-",1,-1,-48.992020312000001069,1,0,0.5,"H0","H1O3"),
+list("[M-H2O-H]-", 1, -1, -48.992020312000001069, 1, 0, 0.5, "H0", "H1O3"),
-list("[M-H-Cl+O]-",1,-1,-19.981214542000000022,2,0,0.5,"O1","H1Cl1"),
+list("[M-H-Cl+O]-", 1, -1, -19.981214542000000022, 2, 0, 0.5, "O1", "H1Cl1"),
-list("[M-Cl+O]-",1,-1,-18.973389510000000512,3,0,0.5,"O1","Cl1"),
+list("[M-Cl+O]-", 1, -1, -18.973389510000000512, 3, 0, 0.5, "O1", "Cl1"),
-list("[M-3H]3-",1,-3,-3.0218293560000000219,4,0,1.0,"H0","H3"),
+list("[M-3H]3-", 1, -3, -3.0218293560000000219, 4, 0, 1.0, "H0", "H3"),
-list("[2M-3H]3-",2,-3,-3.0218293560000000219,4,0,0.5,"H0","H3"),
+list("[2M-3H]3-", 2, -3, -3.0218293560000000219, 4, 0, 0.5, "H0", "H3"),
-list("[3M-3H]3-",3,-3,-3.0218293560000000219,4,0,0.5,"H0","H3"),
+list("[3M-3H]3-", 3, -3, -3.0218293560000000219, 4, 0, 0.5, "H0", "H3"),
-list("[M-2H]2-",1,-2,-2.0145529039999998666,5,0,1.0,"H0","H2"),
+list("[M-2H]2-", 1, -2, -2.0145529039999998666, 5, 0, 1.0, "H0", "H2"),
-list("[2M-2H]2-",2,-2,-2.0145529039999998666,5,0,0.5,"H0","H2"),
+list("[2M-2H]2-", 2, -2, -2.0145529039999998666, 5, 0, 0.5, "H0", "H2"),
-list("[3M-2H]2-",3,-2,-2.0145529039999998666,5,0,0.5,"H0","H2"),
+list("[3M-2H]2-", 3, -2, -2.0145529039999998666, 5, 0, 0.5, "H0", "H2"),
-list("[M-H]-",1,-1,-1.0072764519999999333,6,1,1.0,"H0","H1"),
+list("[M-H]-", 1, -1, -1.0072764519999999333, 6, 1, 1.0, "H0", "H1"),
-list("[2M-H]-",2,-1,-1.0072764519999999333,6,0,0.5,"H0","H1"),
+list("[2M-H]-", 2, -1, -1.0072764519999999333, 6, 0, 0.5, "H0", "H1"),
-list("[3M-H]-",3,-1,-1.0072764519999999333,6,0,0.5,"H0","H1"),
+list("[3M-H]-", 3, -1, -1.0072764519999999333, 6, 0, 0.5, "H0", "H1"),
-list("[M]+",1,1,-0.00054858000000000000945,7,1,1.0,"H0","H0"),
+list("[M]+", 1, 1, -0.00054858000000000000945, 7, 1, 1.0, "H0", "H0"),
-list("[M]-",1,-1,0.00054858000000000000945,8,1,1.0,"H0","H0"),
+list("[M]-", 1, -1, 0.00054858000000000000945, 8, 1, 1.0, "H0", "H0"),
-list("[M+H]+",1,1,1.0072764519999999333,9,1,1.0,"H1","H0"),
+list("[M+H]+", 1, 1, 1.0072764519999999333, 9, 1, 1.0, "H1", "H0"),
-list("[2M+H]+",2,1,1.0072764519999999333,9,0,0.5,"H1","H0"),
+list("[2M+H]+", 2, 1, 1.0072764519999999333, 9, 0, 0.5, "H1", "H0"),
-list("[3M+H]+",3,1,1.0072764519999999333,9,0,0.25,"H1","H0"),
+list("[3M+H]+", 3, 1, 1.0072764519999999333, 9, 0, 0.25, "H1", "H0"),
-list("[M+2H]2+",1,2,2.0145529039999998666,10,0,0.75,"H2","H0"),
+list("[M+2H]2+", 1, 2, 2.0145529039999998666, 10, 0, 0.75, "H2", "H0"),
-list("[2M+2H]2+",2,2,2.0145529039999998666,10,0,0.5,"H2","H0"),
+list("[2M+2H]2+", 2, 2, 2.0145529039999998666, 10, 0, 0.5, "H2", "H0"),
-list("[3M+2H]2+",3,2,2.0145529039999998666,10,0,0.25,"H2","H0"),
+list("[3M+2H]2+", 3, 2, 2.0145529039999998666, 10, 0, 0.25, "H2", "H0"),
-list("[M+3H]3+",1,3,3.0218293560000000219,11,0,0.75,"H3","H0"),
+list("[M+3H]3+", 1, 3, 3.0218293560000000219, 11, 0, 0.75, "H3", "H0"),
-list("[2M+3H]3+",2,3,3.0218293560000000219,11,0,0.5,"H3","H0"),
+list("[2M+3H]3+", 2, 3, 3.0218293560000000219, 11, 0, 0.5, "H3", "H0"),
-list("[3M+3H]3+",3,3,3.0218293560000000219,11,0,0.25,"H3","H0"),
+list("[3M+3H]3+", 3, 3, 3.0218293560000000219, 11, 0, 0.25, "H3", "H0"),
-list("[M-2H+NH4]-",1,-1,16.019272654000001665,12,0,0.25,"N1H4","H2"),
+list("[M-2H+NH4]-", 1, -1, 16.019272654000001665, 12, 0, 0.25, "N1H4", "H2"),
-list("[2M-2H+NH4]-",2,-1,16.019272654000001665,12,0,0.0,"N1H4","H2"),
+list("[2M-2H+NH4]-", 2, -1, 16.019272654000001665, 12, 0, 0.0, "N1H4", "H2"),
-list("[3M-2H+NH4]-",3,-1,16.019272654000001665,12,0,0.25,"N1H4","H2"),
+list("[3M-2H+NH4]-", 3, -1, 16.019272654000001665, 12, 0, 0.25, "N1H4", "H2"),
-list("[M+NH4]+",1,1,18.033825558000000199,13,1,1.0,"N1H4","H0"),
+list("[M+NH4]+", 1, 1, 18.033825558000000199, 13, 1, 1.0, "N1H4", "H0"),
-list("[2M+NH4]+",2,1,18.033825558000000199,13,0,0.5,"N1H4","H0"),
+list("[2M+NH4]+", 2, 1, 18.033825558000000199, 13, 0, 0.5, "N1H4", "H0"),
-list("[3M+NH4]+",3,1,18.033825558000000199,13,0,0.25,"N1H4","H0"),
+list("[3M+NH4]+", 3, 1, 18.033825558000000199, 13, 0, 0.25, "N1H4", "H0"),
-list("[M+H+NH4]2+",1,2,19.041102009999999467,14,0,0.5,"N1H5","H0"),
+list("[M+H+NH4]2+", 1, 2, 19.041102009999999467, 14, 0, 0.5, "N1H5", "H0"),
-list("[2M+H+NH4]2+",2,2,19.041102009999999467,14,0,0.5,"N1H5","H0"),
+list("[2M+H+NH4]2+", 2, 2, 19.041102009999999467, 14, 0, 0.5, "N1H5", "H0"),
-list("[3M+H+NH4]2+",3,2,19.041102009999999467,14,0,0.25,"N1H5","H0"),
+list("[3M+H+NH4]2+", 3, 2, 19.041102009999999467, 14, 0, 0.25, "N1H5", "H0"),
-list("[M+Na-2H]-",1,-1,20.974668176000001551,15,0,0.75,"Na1","H2"),
+list("[M+Na-2H]-", 1, -1, 20.974668176000001551, 15, 0, 0.75, "Na1", "H2"),
-list("[2M-2H+Na]-",2,-1,20.974668176000001551,15,0,0.25,"Na1","H2"),
+list("[2M-2H+Na]-", 2, -1, 20.974668176000001551, 15, 0, 0.25, "Na1", "H2"),
-list("[3M-2H+Na]-",3,-1,20.974668176000001551,15,0,0.25,"Na1","H2"),
+list("[3M-2H+Na]-", 3, -1, 20.974668176000001551, 15, 0, 0.25, "Na1", "H2"),
-list("[M+Na]+",1,1,22.989221080000000086,16,1,1.0,"Na1","H0"),
+list("[M+Na]+", 1, 1, 22.989221080000000086, 16, 1, 1.0, "Na1", "H0"),
-list("[2M+Na]+",2,1,22.989221080000000086,16,0,0.5,"Na1","H0"),
+list("[2M+Na]+", 2, 1, 22.989221080000000086, 16, 0, 0.5, "Na1", "H0"),
-list("[3M+Na]+",3,1,22.989221080000000086,16,0,0.25,"Na1","H0"),
+list("[3M+Na]+", 3, 1, 22.989221080000000086, 16, 0, 0.25, "Na1", "H0"),
-list("[M+H+Na]2+",1,2,23.996497531999999353,17,0,0.5,"Na1H1","H0"),
+list("[M+H+Na]2+", 1, 2, 23.996497531999999353, 17, 0, 0.5, "Na1H1", "H0"),
-list("[2M+H+Na]2+",2,2,23.996497531999999353,17,0,0.5,"Na1H1","H0"),
+list("[2M+H+Na]2+", 2, 2, 23.996497531999999353, 17, 0, 0.5, "Na1H1", "H0"),
-list("[3M+H+Na]2+",3,2,23.996497531999999353,17,0,0.25,"Na1H1","H0"),
+list("[3M+H+Na]2+", 3, 2, 23.996497531999999353, 17, 0, 0.25, "Na1H1", "H0"),
-list("[M+2H+Na]3+",1,3,25.003773983999998619,18,0,0.25,"H2Na1","H0"),
+list("[M+2H+Na]3+", 1, 3, 25.003773983999998619, 18, 0, 0.25, "H2Na1", "H0"),
-list("[M+CH3OH+H]+",1,1,33.033491200000000276,19,0,0.25,"C1O1H5","H0"),
+list("[M+CH3OH+H]+", 1, 1, 33.033491200000000276, 19, 0, 0.25, "C1O1H5", "H0"),
-list("[M-H+Cl]2-",1,-2,33.962124838000001148,20,0,1.0,"Cl1","H1"),
+list("[M-H+Cl]2-", 1, -2, 33.962124838000001148, 20, 0, 1.0, "Cl1", "H1"),
-list("[2M-H+Cl]2-",2,-2,33.962124838000001148,20,0,0.5,"Cl1","H1"),
+list("[2M-H+Cl]2-", 2, -2, 33.962124838000001148, 20, 0, 0.5, "Cl1", "H1"),
-list("[3M-H+Cl]2-",3,-2,33.962124838000001148,20,0,0.5,"Cl1","H1"),
+list("[3M-H+Cl]2-", 3, -2, 33.962124838000001148, 20, 0, 0.5, "Cl1", "H1"),
-list("[M+Cl]-",1,-1,34.969401290000000416,21,1,1.0,"Cl1","H0"),
+list("[M+Cl]-", 1, -1, 34.969401290000000416, 21, 1, 1.0, "Cl1", "H0"),
-list("[2M+Cl]-",2,-1,34.969401290000000416,21,0,0.5,"Cl1","H0"),
+list("[2M+Cl]-", 2, -1, 34.969401290000000416, 21, 0, 0.5, "Cl1", "H0"),
-list("[3M+Cl]-",3,-1,34.969401290000000416,21,0,0.5,"Cl1","H0"),
+list("[3M+Cl]-", 3, -1, 34.969401290000000416, 21, 0, 0.5, "Cl1", "H0"),
-list("[M+K-2H]-",1,-1,36.948605415999999479,22,0,0.5,"K1","H2"),
+list("[M+K-2H]-", 1, -1, 36.948605415999999479, 22, 0, 0.5, "K1", "H2"),
-list("[2M-2H+K]-",2,-1,36.948605415999999479,22,0,0.0,"K1","H2"),
+list("[2M-2H+K]-", 2, -1, 36.948605415999999479, 22, 0, 0.0, "K1", "H2"),
-list("[3M-2H+K]-",3,-1,36.948605415999999479,22,0,0.0,"K1","H2"),
+list("[3M-2H+K]-", 3, -1, 36.948605415999999479, 22, 0, 0.0, "K1", "H2"),
-list("[M+K]+",1,1,38.963158319999998013,23,1,1.0,"K1","H0"),
+list("[M+K]+", 1, 1, 38.963158319999998013, 23, 1, 1.0, "K1", "H0"),
-list("[2M+K]+",2,1,38.963158319999998013,23,0,0.5,"K1","H0"),
+list("[2M+K]+", 2, 1, 38.963158319999998013, 23, 0, 0.5, "K1", "H0"),
-list("[3M+K]+",3,1,38.963158319999998013,23,0,0.25,"K1","H0"),
+list("[3M+K]+", 3, 1, 38.963158319999998013, 23, 0, 0.25, "K1", "H0"),
-list("[M+H+K]2+",1,2,39.970434771999997281,24,0,0.5,"K1H1","H0"),
+list("[M+H+K]2+", 1, 2, 39.970434771999997281, 24, 0, 0.5, "K1H1", "H0"),
-list("[2M+H+K]2+",2,2,39.970434771999997281,24,0,0.5,"K1H1","H0"),
+list("[2M+H+K]2+", 2, 2, 39.970434771999997281, 24, 0, 0.5, "K1H1", "H0"),
-list("[3M+H+K]2+",3,2,39.970434771999997281,24,0,0.25,"K1H1","H0"),
+list("[3M+H+K]2+", 3, 2, 39.970434771999997281, 24, 0, 0.25, "K1H1", "H0"),
-list("[M+ACN+H]+",1,1,42.033825557999996646,25,0,0.25,"C2H4N1","H0"),
+list("[M+ACN+H]+", 1, 1, 42.033825557999996646, 25, 0, 0.25, "C2H4N1", "H0"),
-list("[2M+ACN+H]+",2,1,42.033825557999996646,25,0,0.25,"C2H4N1","H0"),
+list("[2M+ACN+H]+", 2, 1, 42.033825557999996646, 25, 0, 0.25, "C2H4N1", "H0"),
-list("[M+2Na-H]+",1,1,44.971165708000000902,26,0,0.5,"Na2","H1"),
+list("[M+2Na-H]+", 1, 1, 44.971165708000000902, 26, 0, 0.5, "Na2", "H1"),
-list("[2M+2Na-H]+",2,1,44.971165708000000902,26,0,0.25,"Na2","H1"),
+list("[2M+2Na-H]+", 2, 1, 44.971165708000000902, 26, 0, 0.25, "Na2", "H1"),
-list("[3M+2Na-H]+",3,1,44.971165708000000902,26,0,0.25,"Na2","H1"),
+list("[3M+2Na-H]+", 3, 1, 44.971165708000000902, 26, 0, 0.25, "Na2", "H1"),
-list("[2M+FA-H]-",2,-1,44.998202851999998586,27,0,0.25,"C1O2H2","H1"),
+list("[2M+FA-H]-", 2, -1, 44.998202851999998586, 27, 0, 0.25, "C1O2H2", "H1"),
-list("[M+FA-H]-",1,-1,44.998202851999998586,27,0,0.5,"C1O2H2","H1"),
+list("[M+FA-H]-", 1, -1, 44.998202851999998586, 27, 0, 0.5, "C1O2H2", "H1"),
-list("[M+2Na]2+",1,2,45.978442160000000172,28,0,0.5,"Na2","H0"),
+list("[M+2Na]2+", 1, 2, 45.978442160000000172, 28, 0, 0.5, "Na2", "H0"),
-list("[2M+2Na]2+",2,2,45.978442160000000172,28,0,0.5,"Na2","H0"),
+list("[2M+2Na]2+", 2, 2, 45.978442160000000172, 28, 0, 0.5, "Na2", "H0"),
-list("[3M+2Na]2+",3,2,45.978442160000000172,28,0,0.25,"Na2","H0"),
+list("[3M+2Na]2+", 3, 2, 45.978442160000000172, 28, 0, 0.25, "Na2", "H0"),
-list("[M+H+2Na]3+",1,3,46.985718611999999438,29,0,0.25,"H1Na2","H0"),
+list("[M+H+2Na]3+", 1, 3, 46.985718611999999438, 29, 0, 0.25, "H1Na2", "H0"),
-list("[M+H+FA]+",1,1,47.012755755999997122,30,0,0.25,"C1O2H3","H0"),
+list("[M+H+FA]+", 1, 1, 47.012755755999997122, 30, 0, 0.25, "C1O2H3", "H0"),
-list("[M+Hac-H]-",1,-1,59.013852915999997607,31,0,0.25,"C2O2H4","H1"),
+list("[M+Hac-H]-", 1, -1, 59.013852915999997607, 31, 0, 0.25, "C2O2H4", "H1"),
-list("[2M+Hac-H]-",2,-1,59.013852915999997607,31,0,0.25,"C2O2H4","H1"),
+list("[2M+Hac-H]-", 2, -1, 59.013852915999997607, 31, 0, 0.25, "C2O2H4", "H1"),
-list("[M+IsoProp+H]+",1,1,61.064791327999998317,32,0,0.25,"C3H9O1","H0"),
+list("[M+IsoProp+H]+", 1, 1, 61.064791327999998317, 32, 0, 0.25, "C3H9O1", "H0"),
-list("[M+Na+K]2+",1,2,61.9523793999999981,33,0,0.5,"Na1K1","H0"),
+list("[M+Na+K]2+", 1, 2, 61.9523793999999981, 33, 0, 0.5, "Na1K1", "H0"),
-list("[2M+Na+K]2+",2,2,61.9523793999999981,33,0,0.5,"Na1K1","H0"),
+list("[2M+Na+K]2+", 2, 2, 61.9523793999999981, 33, 0, 0.5, "Na1K1", "H0"),
-list("[3M+Na+K]2+",3,2,61.9523793999999981,33,0,0.25,"Na1K1","H0"),
+list("[3M+Na+K]2+", 3, 2, 61.9523793999999981, 33, 0, 0.25, "Na1K1", "H0"),
-list("[M+NO3]-",1,-1,61.988366450000000895,34,0,0.5,"N1O3","H0"),
+list("[M+NO3]-", 1, -1, 61.988366450000000895, 34, 0, 0.5, "N1O3", "H0"),
-list("[M+ACN+Na]+",1,1,64.015770185999997464,35,0,0.25,"C2H3N1Na1","H0"),
+list("[M+ACN+Na]+", 1, 1, 64.015770185999997464, 35, 0, 0.25, "C2H3N1Na1", "H0"),
-list("[2M+ACN+Na]+",2,1,64.015770185999997464,35,0,0.25,"C2H3N1Na1","H0"),
+list("[2M+ACN+Na]+", 2, 1, 64.015770185999997464, 35, 0, 0.25, "C2H3N1Na1", "H0"),
-list("[M+NH4+FA]+",1,1,64.039304861999994502,36,0,0.25,"N1C1O2H6","H0"),
+list("[M+NH4+FA]+", 1, 1, 64.039304861999994502, 36, 0, 0.25, "N1C1O2H6", "H0"),
-list("[M-2H+Na+FA]-",1,-1,66.980147479999999405,37,0,0.5,"NaC1O2H2","H2"),
+list("[M-2H+Na+FA]-", 1, -1, 66.980147479999999405, 37, 0, 0.5, "NaC1O2H2", "H2"),
-list("[M+3Na]3+",1,3,68.967663239999993153,38,0,0.25,"Na3","H0"),
+list("[M+3Na]3+", 1, 3, 68.967663239999993153, 38, 0, 0.25, "Na3", "H0"),
-list("[M+Na+FA]+",1,1,68.99470038399999794,39,0,0.25,"Na1C1O2H2","H0"),
+list("[M+Na+FA]+", 1, 1, 68.99470038399999794, 39, 0, 0.25, "Na1C1O2H2", "H0"),
-list("[M+2Cl]2-",1,-2,69.938802580000000832,40,0,1.0,"Cl2","H0"),
+list("[M+2Cl]2-", 1, -2, 69.938802580000000832, 40, 0, 1.0, "Cl2", "H0"),
-list("[2M+2Cl]2-",2,-2,69.938802580000000832,40,0,0.5,"Cl2","H0"),
+list("[2M+2Cl]2-", 2, -2, 69.938802580000000832, 40, 0, 0.5, "Cl2", "H0"),
-list("[3M+2Cl]2-",3,-2,69.938802580000000832,40,0,0.5,"Cl2","H0"),
+list("[3M+2Cl]2-", 3, -2, 69.938802580000000832, 40, 0, 0.5, "Cl2", "H0"),
-list("[M+2K-H]+",1,1,76.919040187999996758,41,0,0.5,"K2","H1"),
+list("[M+2K-H]+", 1, 1, 76.919040187999996758, 41, 0, 0.5, "K2", "H1"),
-list("[2M+2K-H]+",2,1,76.919040187999996758,41,0,0.25,"K2","H1"),
+list("[2M+2K-H]+", 2, 1, 76.919040187999996758, 41, 0, 0.25, "K2", "H1"),
-list("[3M+2K-H]+",3,1,76.919040187999996758,41,0,0.25,"K2","H1"),
+list("[3M+2K-H]+", 3, 1, 76.919040187999996758, 41, 0, 0.25, "K2", "H1"),
-list("[M+2K]2+",1,2,77.926316639999996028,42,0,0.5,"K2","H0"),
+list("[M+2K]2+", 1, 2, 77.926316639999996028, 42, 0, 0.5, "K2", "H0"),
-list("[2M+2K]2+",2,2,77.926316639999996028,42,0,0.5,"K2","H0"),
+list("[2M+2K]2+", 2, 2, 77.926316639999996028, 42, 0, 0.5, "K2", "H0"),
-list("[3M+2K]2+",3,2,77.926316639999996028,42,0,0.25,"K2","H0"),
+list("[3M+2K]2+", 3, 2, 77.926316639999996028, 42, 0, 0.25, "K2", "H0"),
-list("[M+Br]-",1,-1,78.918886479999997619,43,1,1.0,"Br1","H0"),
+list("[M+Br]-", 1, -1, 78.918886479999997619, 43, 1, 1.0, "Br1", "H0"),
-list("[M+Cl+FA]-",1,-1,80.974880593999998268,44,0,0.5,"Cl1C1O2H2","H0"),
+list("[M+Cl+FA]-", 1, -1, 80.974880593999998268, 44, 0, 0.5, "Cl1C1O2H2", "H0"),
-list("[M+AcNa-H]-",1,-1,80.995797543999998426,45,0,0.25,"C2H3Na1O2","H1"),
+list("[M+AcNa-H]-", 1, -1, 80.995797543999998426, 45, 0, 0.25, "C2H3Na1O2", "H1"),
-list("[M+2ACN+2H]2+",1,2,84.067651115999993292,46,0,0.25,"C4H8N2","H0"),
+list("[M+2ACN+2H]2+", 1, 2, 84.067651115999993292, 46, 0, 0.25, "C4H8N2", "H0"),
-list("[M+K+FA]+",1,1,84.968637623999995868,47,0,0.25,"K1C1O2H2","H0"),
+list("[M+K+FA]+", 1, 1, 84.968637623999995868, 47, 0, 0.25, "K1C1O2H2", "H0"),
-list("[M+Cl+Na+FA-H]-",1,-1,102.95682522200000619,48,0,0.5,"Cl1Na1C1O2H2","H1"),
+list("[M+Cl+Na+FA-H]-", 1, -1, 102.95682522200000619, 48, 0, 0.5, "Cl1Na1C1O2H2", "H1"),
-list("[2M+3H2O+2H]+",2,1,104.03153939599999944,49,0,0.25,"H8O6","H0"),
+list("[2M+3H2O+2H]+", 2, 1, 104.03153939599999944, 49, 0, 0.25, "H8O6", "H0"),
-list("[M+TFA-H]-",1,-1,112.98558742000000165,50,0,0.5,"C2F3O2H1","H1"),
+list("[M+TFA-H]-", 1, -1, 112.98558742000000165, 50, 0, 0.5, "C2F3O2H1", "H1"),
-list("[M+H+TFA]+",1,1,115.00014032400000019,51,0,0.25,"C2F3O2H2","H0"),
+list("[M+H+TFA]+", 1, 1, 115.00014032400000019, 51, 0, 0.25, "C2F3O2H2", "H0"),
-list("[M+3ACN+2H]2+",1,2,125.09420022199999778,52,0,0.25,"C6H11N3","H0"),
+list("[M+3ACN+2H]2+", 1, 2, 125.09420022199999778, 52, 0, 0.25, "C6H11N3", "H0"),
-list("[M+NH4+TFA]+",1,1,132.02668943000000468,53,0,0.25,"N1C2F3O2H5","H0"),
+list("[M+NH4+TFA]+", 1, 1, 132.02668943000000468, 53, 0, 0.25, "N1C2F3O2H5", "H0"),
-list("[M+Na+TFA]+",1,1,136.98208495200000811,54,0,0.25,"Na1C2F3O2H1","H0"),
+list("[M+Na+TFA]+", 1, 1, 136.98208495200000811, 54, 0, 0.25, "Na1C2F3O2H1", "H0"),
-list("[M+Cl+TFA]-",1,-1,148.96226516199999423,55,0,0.5,"Cl1C2F3O2H1","H0"),
+list("[M+Cl+TFA]-", 1, -1, 148.96226516199999423, 55, 0, 0.5, "Cl1C2F3O2H1", "H0"),
-list("[M+K+TFA]+",1,1,152.95602219200000604,56,0,0.25,"K1C2F3O2H1","H0")
+list("[M+K+TFA]+", 1, 1, 152.95602219200000604, 56, 0, 0.25, "K1C2F3O2H1","H0")
 )
 dummy_adduct <- orm$adduct()
 for (adduct in adducts) {
 i <- 0
-dummy_adduct$set_name(adduct[[i <- i+1]])
+dummy_adduct$set_name(adduct[[i <- i + 1]])
-dummy_adduct$set_multi(adduct[[i <- i+1]])
+dummy_adduct$set_multi(adduct[[i <- i + 1]])
-dummy_adduct$set_charge(adduct[[i <- i+1]])
+dummy_adduct$set_charge(adduct[[i <- i + 1]])
-dummy_adduct$set_mass(adduct[[i <- i+1]])
+dummy_adduct$set_mass(adduct[[i <- i + 1]])
-dummy_adduct$set_oidscore(adduct[[i <- i+1]])
+dummy_adduct$set_oidscore(adduct[[i <- i + 1]])
-dummy_adduct$set_quasi(adduct[[i <- i+1]])
+dummy_adduct$set_quasi(adduct[[i <- i + 1]])
-dummy_adduct$set_ips(adduct[[i <- i+1]])
+dummy_adduct$set_ips(adduct[[i <- i + 1]])
-dummy_adduct$set_formula_add(adduct[[i <- i+1]])
+dummy_adduct$set_formula_add(adduct[[i <- i + 1]])
-dummy_adduct$set_formula_ded(adduct[[i <- i+1]])
+dummy_adduct$set_formula_ded(adduct[[i <- i + 1]])
 invisible(dummy_adduct$save())
-dummy_adduct$clear(unset_id=TRUE)
+dummy_adduct$clear(unset_id = TRUE)
 }
 message("Adducts created")
 }
-insert_base_data <- function(orm, path, archetype=FALSE) {
+insert_base_data <- function(orm, path, archetype = FALSE) {
 if (archetype) {
 ## not implemented yet
-return ()
+return()
 }
 base_data <- readLines(path)
-for (sql in strsplit(paste(base_data, collapse=" "), ";")[[1]]) {
+for (sql in strsplit(paste(base_data, collapse = " "), ";")[[1]]) {
 orm$execute(sql)
 }
 set_database_version(orm, "enriched")
 }
 insert_compounds <- function(orm, compounds_path) {
-compounds <- read.csv(file=compounds_path, sep="\t")
+compounds <- read.csv(file = compounds_path, sep = "\t")
 if (is.null(compounds <- translate_compounds(compounds))) {
 stop("Could not find asked compound's attributes in csv file.")
 }
 dummy_compound <- orm$compound()
 compound_list <- list()
 for (i in seq_len(nrow(compounds))) {
 dummy_compound$set_mz(compounds[i, "mz"])
 dummy_compound$set_name(compounds[i, "name"])
 dummy_compound$set_common_name(compounds[i, "common_name"])
 dummy_compound$set_formula(compounds[i, "formula"])
-compound_list[[length(compound_list)+1]] <- as.list(
+compound_list[[length(compound_list) + 1]] <- as.list(
 dummy_compound,
 c("mz", "name", "common_name", "formula")
 )
-dummy_compound$clear(unset_id=TRUE)
+dummy_compound$clear(unset_id = TRUE)
 }
-invisible(dummy_compound$save(bulk=compound_list))
+invisible(dummy_compound$save(bulk = compound_list))
 }
 translate_compounds <- function(compounds) {
 recognized_headers <- list(
-c("HMDB_ID", "MzBank", "X.M.H..", "X.M.H...1", "MetName", "ChemFormula", "INChIkey")
+c(
+"HMDB_ID", "MzBank", "X.M.H..", "X.M.H...1",
+"MetName", "ChemFormula", "INChIkey"
+)
 )
 header_translators <- list(
 hmdb_header_translator
 )
 for (index in seq_along(recognized_headers)) {
 headers <- recognized_headers[[index]]
 if (identical(colnames(compounds), headers)) {
-return (header_translators[[index]](compounds))
+return(header_translators[[index]](compounds))
 }
 }
 if (is.null(translator <- guess_translator(colnames(compounds)))) {
-return (NULL)
+return(NULL)
 }
-return (csv_header_translator(translator, compounds))
+return(csv_header_translator(translator, compounds))
 }
 guess_translator <- function(header) {
 result <- list(
-# HMDB_ID=NULL,
+# HMDB_ID = NULL,
-mz=NULL,
+mz = NULL,
-name=NULL,
+name = NULL,
-common_name=NULL,
+common_name = NULL,
-formula=NULL,
+formula = NULL,
-# inchi_key=NULL
+# inchi_key = NULL
 )
 asked_cols <- names(result)
 for (asked_col in asked_cols) {
 for (col in header) {
 if ((twisted <- tolower(col)) == asked_col
 next
 }
 }
 }
 if (any(mapply(is.null, result))) {
-return (NULL)
+return(NULL)
 }
-return (result)
+return(result)
 }
 hmdb_header_translator <- function(compounds) {
-return (csv_header_translator(
+return(csv_header_translator(
 list(
-HMDB_ID="HMDB_ID",
+HMDB_ID = "HMDB_ID",
-mz="MzBank",
+mz = "MzBank",
-name="MetName",
+name = "MetName",
-common_name="MetName",
+common_name = "MetName",
-formula="ChemFormula",
+formula = "ChemFormula",
-inchi_key="INChIkey"
+inchi_key = "INChIkey"
 ), compounds
 ))
 }
 csv_header_translator <- function(translation_table, csv) {
 header_names <- names(translation_table)
-result <- data.frame(1:nrow(csv))
+result <- data.frame(seq_len(nrow(csv)))
 for (i in seq_along(header_names)) {
 result[, header_names[[i]]] <- csv[, translation_table[[i]]]
 }
 result[, "mz"] <- as.numeric(result[, "mz"])
-return (result)
+return(result)
 }
 set_database_version <- function(orm, version) {
 orm$set_tag(
 version,
-tag_name="database_version",
+tag_name = "database_version",
-tag_table_name="XSeeker_tagging_table"
+tag_table_name = "XSeeker_tagging_table"
 )
 }
 process_rdata <- function(orm, rdata, options) {
 mzml_tmp_dir <- gather_mzml_files(rdata)
 || options$`not-show-percent` == FALSE
 )
 error <- tryCatch({
 process_sample_list(
 orm, rdata, samples,
-show_percent=show_percent
+show_percent = show_percent,
+file_grouping_var = options$class
 )
 NULL
-}, error=function(e) {
+}, error = function(e) {
 message(e)
 e
 })
 if (!is.null(mzml_tmp_dir)) {
-unlink(mzml_tmp_dir, recursive=TRUE)
+unlink(mzml_tmp_dir, recursive = TRUE)
 }
 if (!is.null(error)) {
 stop(error)
 }
 }
 gather_mzml_files <- function(rdata) {
 if (is.null(rdata$singlefile)) {
 message("Extracting mxml files")
 tmp <- tempdir()
-rdata$singlefile <- utils::unzip(rdata$zipfile, exdir=tmp)
+rdata$singlefile <- utils::unzip(rdata$zipfile, exdir = tmp)
-names(rdata$singlefile) <- tools::file_path_sans_ext(basename(rdata$singlefile))
+names(rdata$singlefile) <- tools::file_path_sans_ext(
+basename(rdata$singlefile)
+)
 message("Extracted")
-return (tmp)
+return(tmp)
 } else {
-message(sprintf("Not a zip file, loading files directly from path: %s", paste(rdata$singlefile, collapse=" ; ")))
+message(sprintf(
-}
+"Not a zip file, loading files directly from path: %s",
-return (NULL)
+paste(rdata$singlefile, collapse = " ; ")
-}
+))
+}
-process_sample_list <- function(orm, radta, sample_names, show_percent) {
+return(NULL)
-file_grouping_var <- find_grouping_var(rdata$variableMetadata)
+}
+process_sample_list <- function(
+orm,
+rdata,
+sample_names,
+show_percent,
+file_grouping_var = NULL
+) {
+if (is.null(file_grouping_var)) {
+file_grouping_var <- find_grouping_var(rdata$variableMetadata)
+if (is.null(file_grouping_var)) {
+stop("Malformed variableMetada.")
+}
+}
+tryCatch({
+headers <- colnames(rdata$variableMetadata)
+file_grouping_var <- headers[[as.numeric(file_grouping_var)]]
+}, error = function(e) NULL)
+if (
+is.null(file_grouping_var)
+|| !(file_grouping_var %in% colnames(rdata$variableMetadata))
+) {
+stop(sprintf(
+"Could not find grouping variable %s in var meta file.",
+file_grouping_var
+))
+}
 message("Processing samples.")
 message(sprintf("File grouping variable: %s", file_grouping_var))
-if(is.null(file_grouping_var)) {
-stop("Malformed variableMetada.")
-}
 context <- new.env()
 context$samples <- list()
 context$peaks <- rdata$xa@xcmsSet@peaks
 context$groupidx <- rdata$xa@xcmsSet@groupidx
 process_arg_list <- rdata$listOFlistArguments
 var_meta <- rdata$variableMetadata
 process_params <- list()
 if (is.null(process_arg_list)) {
-histories <- list()
 for (history in xcms_set@.processHistory) {
 if (
 class(history@param) == "CentWaveParam"
 && history@type == "Peak detection"
 ) {
 params <- history@param
 process_params <- list(list(
-xfunction="annotatediff",
+xfunction = "annotatediff",
-ppm=params@ppm,
+ppm = params@ppm,
-peakwidth=sprintf("%s - %s", params@peakwidth[[1]], params@peakwidth[[2]]),
+peakwidth = sprintf(
-snthresh=params@snthresh,
+"%s - %s",
-prefilterStep=params@prefilter[[1]],
+params@peakwidth[[1]],
-prefilterLevel=params@prefilter[[2]],
+params@peakwidth[[2]]
-mzdiff=params@mzdiff,
+),
-fitgauss=params@fitgauss,
+snthresh = params@snthresh,
-noise=params@noise,
+prefilterStep = params@prefilter[[1]],
-mzCenterFun=params@mzCenterFun,
+prefilterLevel = params@prefilter[[2]],
-integrate=params@integrate,
+mzdiff = params@mzdiff,
-firstBaselineCheck=params@firstBaselineCheck,
+fitgauss = params@fitgauss,
-snthreshIsoROIs=!identical(params@roiScales, numeric(0))
+noise = params@noise,
+mzCenterFun = params@mzCenterFun,
+integrate = params@integrate,
+firstBaselineCheck = params@firstBaselineCheck,
+snthreshIsoROIs = !identical(params@roiScales, numeric(0))
 ))
 break
 }
 }
 } else {
 for (list_name in names(process_arg_list)) {
 param_list <- list()
 for (param_name in names(process_arg_list[[list_name]])) {
-param_list[[param_name]] <- process_arg_list[[list_name]][[param_name]]
+param_list[[param_name]] <- process_arg_list[[
+list_name
+]][[param_name]]
 }
-process_params[[length(process_params)+1]] <- param_list
+process_params[[length(process_params) + 1]] <- param_list
 }
 }
 message("Parameters from previous processes extracted.")
 indices <- as.numeric(unique(var_meta[, file_grouping_var]))
+if (any(is.null(names(singlefile)[indices]))) {
+stop(sprintf(
+paste(
+"Indices defined by grouping variable %s are not all present",
+"in singlefile names (%s).\nCannot continue. Indices: %s"
+),
+file_grouping_var,
+paste(names(singlefile), collapse = ", "),
+paste(indices, collapse = ", ")
+))
+}
 smol_xcms_set <- orm$smol_xcms_set()
 mz_tab_info <- new.env()
 g <- xcms::groups(xcms_set)
 mz_tab_info$group_length <- nrow(g)
 mz_tab_info$dataset_path <- xcms::filepaths(xcms_set)
 mz_tab_info$sampnames <- xcms::sampnames(xcms_set)
 mz_tab_info$sampclass <- xcms::sampclass(xcms_set)
-mz_tab_info$rtmed <- g[,"rtmed"]
+mz_tab_info$rtmed <- g[, "rtmed"]
-mz_tab_info$mzmed <- g[,"mzmed"]
+mz_tab_info$mzmed <- g[, "mzmed"]
-mz_tab_info$smallmolecule_abundance_assay <- xcms::groupval(xcms_set, value="into")
+mz_tab_info$smallmolecule_abundance_assay <- xcms::groupval(
-blogified <- blob::blob(fst::compress_fst(serialize(mz_tab_info, NULL), compression=100))
+xcms_set,
+value = "into"
+)
+blogified <- blob::blob(fst::compress_fst(
+serialize(mz_tab_info, NULL),
+compression = 100
+))
 rm(mz_tab_info)
 invisible(smol_xcms_set$set_raw(blogified)$save())
 smol_xcms_set_id <- smol_xcms_set$get_id()
 rm(smol_xcms_set)
 env$enriched_rdata_version <- ENRICHED_RDATA_VERSION
 env$tool_name <- TOOL_NAME
 env$enriched_rdata_doc <- ENRICHED_RDATA_DOC
 sample <- add_sample_to_database(orm, env, context, smol_xcms_set_id)
-rm (env)
+rm(env)
 context$samples[no] <- sample$get_id()
-rm (sample)
+rm(sample)
 }
 context$clusters <- list()
 context$show_percent <- show_percent
 context$cluster_mean_rt_abundance <- list()
 context$central_feature <- list()
 clusters <- context$clusters
 rm(context)
 message("Features enrichment")
 complete_features(orm, clusters, show_percent)
 message("Features enrichment done.")
-return (NULL)
+return(NULL)
 }
 find_grouping_var <- function(var_meta) {
-known_colnames = c(
+known_colnames <- c(
 "name", "namecustom", "mz", "mzmin", "mzmax",
-"rt", "rtmin", "rtmax", "npeaks", "isotopes", "adduct", "pcgroup"
+"rt", "rtmin", "rtmax", "npeaks", "isotopes", "adduct",
+"pcgroup", "ms_level"
 )
 col_names <- colnames(var_meta)
-classes = list()
+classes <- list()
 for (name in col_names) {
 if (!(name %in% known_colnames)) {
-classes[[length(classes)+1]] = name
+classes[[length(classes) + 1]] <- name
 }
 }
 if (length(classes) > 1) {
-stop(sprintf("Only one class expected in the variable metadata. Found %d .", length(classes)))
+stop(sprintf(
+"Only one class expected in the variable metadata. Found %d .",
+length(classes)
+))
 }
 if (length(classes) == 0) {
 stop("Could not find any class column in your variableMetadata.")
 }
-return (classes[[1]])
+return(classes[[1]])
 }
 add_sample_to_database <- function(orm, env, context, smol_xcms_set_id) {
 message(sprintf("Processing sample %s", env$sample_name))
 sample <- (
 orm$sample()
 $set_name(env$sample_name)
 $set_path(env$dataset_path)
 $set_kind("enriched_rdata")
 $set_polarity(
-if (is.null(env$polarity) || identical(env$polarity, character(0))) ""
+if (
+is.null(env$polarity)
+|| identical(env$polarity, character(0))
+) ""
 else env$polarity
 )
 $set_raw(blob::blob(fst::compress_fst(
 serialize(env, NULL),
-compression=100
+compression = 100
 )))
 )
 sample[["smol_xcms_set_id"]] <- smol_xcms_set_id
 sample$modified__[["smol_xcms_set_id"]] <- smol_xcms_set_id
 sample <- sample$save()
 load_process_params(orm, sample, env$process_params)
 message(sprintf("Sample %s inserted.", env$sample_name))
-return (sample)
+return(sample)
 }
 load_variable_metadata <- function(orm, var_meta, context) {
 all_clusters <- orm$cluster()$all()
 orm, var_meta, context,
 next_feature_id, next_cluster_id,
 next_pc_group, next_align_group
 ))
 message("Extracting features done.")
-return (NULL)
+return(NULL)
 }
 get_next_id <- function(models, attribute) {
 if ((id <- models$max(attribute)) == Inf || id == -Inf) {
-return (0)
+return(0)
 }
-return (id)
+return(id)
 }
 create_features <- function(
 orm, var_meta, context,
 next_feature_id, next_cluster_id,
 next_pc_group, next_align_group
 ) {
 field_names <- as.list(names(orm$feature()$fields__))
-field_names[field_names=="id"] <- NULL
+field_names[field_names == "id"] <- NULL
 features <- list()
 dummy_feature <- orm$feature()
 if (show_percent <- context$show_percent) {
 total <- nrow(var_meta)
 }
 rows <- seq_len(nrow(var_meta))
 if (PROCESS_SMOL_BATCH) {
-rows <- rows[1:as.integer(FAST_FEATURE_RATIO/100.0 * length(rows))]
+rows <- rows[1:as.integer(FAST_FEATURE_RATIO / 100.0 * length(rows))]
 }
 cluster_row <- list()
 for (row in rows) {
 if (show_percent && (row / total) * 100 > percent) {
 percent <- percent + 1
-message("\r", sprintf("\r%d %%", percent), appendLF=FALSE)
+message("\r", sprintf("\r%d %%", percent), appendLF = FALSE)
 }
 dummy_feature$set_featureID(next_feature_id)
 next_feature_id <- next_feature_id + 1
 context$clusterID <- clusterID
 dummy_feature$set_iso(iso)
 peak_list <- context$peaks[context$groupidx[[row]], ]
 if (! ("matrix" %in% class(peak_list))) {
-peak_list <- matrix(peak_list, nrow=1, ncol=length(peak_list), dimnames=list(c(), names(peak_list)))
+peak_list <- matrix(
+peak_list,
+nrow = 1,
+ncol = length(peak_list),
+dimnames = list(c(), names(peak_list))
+)
 }
 clusterID <- as.character(clusterID)
 if (is.null(context$central_feature[[clusterID]])) {
 int_o <- extract_peak_var(peak_list, "into")
 context$central_feature[[clusterID]] <- (
-peak_list[peak_list[, "into"] == int_o,]["sample"]
+peak_list[peak_list[, "into"] == int_o, ]["sample"]
 )
 }
 if (!DEBUG_FAST_IGNORE_SLOW_OP) {
-sample_peak_list <- peak_list[as.integer(peak_list[, "sample"]) == context$central_feature[[clusterID]], , drop=FALSE]
+central_feature <- context$central_feature[[clusterID]]
-if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) {
+sample_peak_list <- peak_list[
-if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) {
+as.integer(peak_list[, "sample"]) == central_feature,
+,
+drop = FALSE
+]
+if (
+!identical(sample_peak_list, numeric(0))
+&& !is.null(nrow(sample_peak_list))
+&& nrow(sample_peak_list) != 0
+) {
+int_o <- extract_peak_var(sample_peak_list, "into")
+if (!is.na(int_o)) {
 dummy_feature$set_int_o(int_o)
 }
-if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) {
+int_b <- extract_peak_var(sample_peak_list, "intb")
+if (!is.na(int_b)) {
 dummy_feature$set_int_b(int_b)
 }
-if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) {
+max_o <- extract_peak_var(sample_peak_list, "maxo")
+if (!is.na(max_o)) {
 dummy_feature$set_max_o(max_o)
 }
 }
 }
 dummy_feature, clusterID,
 context, curent_var_meta, next_pc_group,
 next_align_group
 )
 next_align_group <- next_align_group + 1
-features[[length(features)+1]] <- as.list(dummy_feature, field_names)
+features[[length(features) + 1]] <- as.list(dummy_feature, field_names)
 dummy_feature$clear()
 }
 rm(var_meta)
 message("")
 message("Saving features")
-invisible(dummy_feature$save(bulk=features))
+invisible(dummy_feature$save(bulk = features))
 ## We link manually clusters to the sample they're in.
 link_cache <- list()
 for (row in rows) {
 sample_nos <- unique(context$peaks[context$groupidx[[row]], "sample"])
 for (sample_id in context$samples[sample_nos]) {
 cluster_id <- cluster_row[[row]]$get_id()
-if (is.null(link_cache[[id <- paste(sample_id, cluster_id, sep=";")]])) {
+id <- paste(sample_id, cluster_id, sep = ";")
+if (is.null(link_cache[[id]])) {
 link_cache[[id]] <- 1
 orm$cluster_sample(
-sample_id=sample_id,
+sample_id = sample_id,
-cluster_id=cluster_id
+cluster_id = cluster_id
 )$save()
 }
 }
 }
 message("Saved.")
-return (context$clusters)
+return(context$clusters)
 }
-extract_peak_var <- function(peak_list, var_name, selector=max) {
+extract_peak_var <- function(peak_list, var_name, selector = max) {
 value <- peak_list[, var_name]
 names(value) <- NULL
-return (selector(value))
+return(selector(value))
 }
 set_feature_fields_from_var_meta <- function(feature, var_meta) {
 if (!is.null(mz <- var_meta[["mz"]]) && !is.na(mz)) {
 feature$set_mz(mz)
 feature$set_rt_max(rtmax)
 }
 if (!is.null(isotopes <- var_meta[["isotopes"]]) && !is.na(isotopes)) {
 feature$set_iso(isotopes)
 }
-return (feature)
+return(feature)
 }
 extract_iso  <- function(weird_data) {
 if (grepl("^\\[\\d+\\]", weird_data)[[1]]) {
-return (sub("^\\[\\d+\\]", "", weird_data, perl=TRUE))
+return(sub("^\\[\\d+\\]", "", weird_data, perl = TRUE))
 }
-return (weird_data)
+return(weird_data)
 }
-extract_clusterID <- function(weird_data, next_cluster_id){
+extract_clusterID <- function(weird_data, next_cluster_id) {
 if (grepl("^\\[\\d+\\]", weird_data)[[1]]) {
 clusterID <- stringr::str_extract(weird_data, "^\\[\\d+\\]")
 clusterID <- as.numeric(stringr::str_extract(clusterID, "\\d+"))
 } else {
 clusterID <- 0
 }
-return (clusterID + next_cluster_id)
+return(clusterID + next_cluster_id)
 }
 create_associated_cluster <- function(
 orm,
 main_sample_id, feature, clusterID,
 if (is.null(cluster <- context$clusters[[clusterID]])) {
 pcgroup <- as.numeric(curent_var_meta[["pcgroup"]])
 adduct_name <- as.character(curent_var_meta[["adduct"]])
 annotation <- curent_var_meta[["isotopes"]]
 cluster <- context$clusters[[clusterID]] <- orm$cluster(
-pc_group=pcgroup + next_pc_group,
+pc_group = pcgroup + next_pc_group,
 # adduct=adduct,
-align_group=next_align_group,
+align_group = next_align_group,
 # curent_group=curent_group,
-clusterID=context$clusterID,
+clusterID = context$clusterID,
-annotation=annotation
+annotation = annotation
 )
 if (is.null(adduct <- context$adducts[[adduct_name]])) {
-context$adducts[[adduct_name]] <- orm$adduct()$load_by(name=adduct_name)$first()
+context$adducts[[adduct_name]] <- orm$adduct()$load_by(
+name = adduct_name
+)$first()
 if (is.null(adduct <- context$adducts[[adduct_name]])) {
-adduct <- context$adducts[[adduct_name]] <- orm$adduct(name=adduct_name, charge=0)
+adduct <- context$adducts[[adduct_name]] <- orm$adduct(
+name = adduct_name,
+charge = 0
+)
 adduct$save()
 }
 }
 cluster$set_adduct(adduct)
-## Crappy hack to assign sample id to cluster without loading the sample.
+## Crappy hack to assign sample id to cluster without loading the
-## Samples are too big (their sample$env) and slows the process, and eat all the menory
+## sample. Samples are too big (their sample$env) and slows the
-## so we dont't want to load them.
+## process, and eat all the menory so we dont't want to load them.
 cluster[["sample_id"]] <- main_sample_id
 cluster$modified__[["sample_id"]] <- main_sample_id
 } else {
 if (context$clusterID != 0 && cluster$get_clusterID() == 0) {
 cluster$set_clusterID(context$clusterID)
 }
 }
 cluster$save()
 feature$set_cluster(cluster)
-return (cluster)
+return(cluster)
 }
 complete_features <- function(orm, clusters, show_percent) {
 total <- length(clusters)
 percent <- -1
 i <- 0
 for (cluster in clusters) {
-i <- i+1
+i <- i + 1
 if (show_percent && (i / total) * 100 > percent) {
 percent <- percent + 1
-message("\r", sprintf("\r%d %%", percent), appendLF=FALSE)
+message("\r", sprintf("\r%d %%", percent), appendLF = FALSE)
 }
-features <- orm$feature()$load_by(cluster_id=cluster$get_id())
+features <- orm$feature()$load_by(cluster_id = cluster$get_id())
 if (features$any()) {
 if (!is.null(rt <- features$mean("rt"))) {
 cluster$set_mean_rt(rt)$save()
 }
 features_df <- as.data.frame(features)
-central_feature <- features_df[grepl("^\\[M\\]", features_df[, "iso"]), ]
+central_feature <- features_df[
+grepl("^\\[M\\]", features_df[, "iso"]),
+]
 central_feature_into <- central_feature[["int_o"]]
-if (!identical(central_feature_into, numeric(0)) && central_feature_into != 0) {
+if (
+!identical(central_feature_into, numeric(0))
+&& central_feature_into != 0
+) {
 for (feature in as.vector(features)) {
 feature$set_abundance(
 feature$get_int_o() / central_feature_into * 100
 )$save()
 }
 }
 }
 }
-return (NULL)
+return(NULL)
 }
 load_process_params <- function(orm, sample, params) {
 for (param_list in params) {
 if (is.null(param_list[["xfunction"]])) {
 }
 if (param_list[["xfunction"]] == "annotatediff") {
 load_process_params_peak_picking(orm, sample, param_list)
 }
 }
-return (sample)
+return(sample)
 }
-load_process_params_peak_picking <- function(orm, sample, peak_picking_params) {
+load_process_params_peak_picking <- function(
-return (add_sample_process_parameters(
+orm,
-params=peak_picking_params,
+sample,
-params_translation=list(
+peak_picking_params
-ppm="ppm",
+) {
-maxcharge="maxCharge",
+return(add_sample_process_parameters(
-maxiso="maxIso"
+params = peak_picking_params,
+params_translation = list(
+ppm = "ppm",
+maxcharge = "maxCharge",
+maxiso = "maxIso"
 ),
-param_model_generator=orm$peak_picking_parameters,
+param_model_generator = orm$peak_picking_parameters,
-sample_param_setter=sample$set_peak_picking_parameters
+sample_param_setter = sample$set_peak_picking_parameters
 ))
 }
 add_sample_process_parameters <- function(
 params,
 params_model <- params_models$first()
 } else {
 params_model <- do.call(param_model_generator, model_params)
 params_model$save()
 }
-return (sample_param_setter(params_model)$save())
+return(sample_param_setter(params_model)$save())
 }
 library(optparse)
 option_list <- list(
 optparse::make_option(
 c("-v", "--version"),
-action="store_true",
+action = "store_true",
-help="Display this tool's version and exits"
+help = "Display this tool's version and exits"
 ),
 optparse::make_option(
 c("-i", "--input"),
-type="character",
+type = "character",
-help="The rdata path to import in XSeeker"
+help = "The rdata path to import in XSeeker"
 ),
 optparse::make_option(
 c("-s", "--samples"),
-type="character",
+type = "character",
-help="Samples to visualise in XSeeker"
+help = "Samples to visualise in XSeeker"
 ),
 optparse::make_option(
 c("-B", "--archetype"),
-type="character",
+type = "character",
-help="The name of the base database"
+help = "The name of the base database"
 ),
 optparse::make_option(
 c("-b", "--database"),
-type="character",
+type = "character",
-help="The base database's path"
+help = "The base database's path"
 ),
 optparse::make_option(
 c("-c", "--compounds-csv"),
-type="character",
+type = "character",
-help="The csv containing compounds"
+help = "The csv containing compounds"
 ),
 optparse::make_option(
 c("-m", "--models"),
-type="character",
+type = "character",
-help="The path or url (must begin with http[s]:// or git@) to the database's models"
+help = paste(
+"The path or url (must begin with http[s]:// or git@) to",
+"the database's models"
+)
 ),
 optparse::make_option(
+c("-k", "--class"),
+type = "character",
+help = "The name of the column containing the classes"
+),
+optparse::make_option(
 c("-o", "--output"),
-type="character",
+type = "character",
-help="The path where to output sqlite"
+help = "The path where to output sqlite"
 ),
 optparse::make_option(
 c("-P", "--not-show-percent"),
-action="store_true",
+action = "store_true",
-help="Flag not to show the percents",
+help = "Flag not to show the percents",
-default=FALSE
+default = FALSE
 )
 )
-options(error=function(){traceback(3)})
+options(error = function(){traceback(3)})
-parser <- OptionParser(usage="%prog [options] file", option_list=option_list)
+parser <- OptionParser(
-args <- parse_args(parser, positional_arguments=0)
+usage = "%prog [options] file",
+option_list = option_list
+)
+args <- parse_args(parser, positional_arguments = 0)
 err_code <- 0
 if (!is.null(args$options$version)) {
 message(sprintf("%s %s", TOOL_NAME, VERSION))
 quit()
 }
 models <- get_models(args$options$models)
 orm <- DBModelR::ORM(
-connection_params=list(dbname=args$options$output),
+connection_params = list(dbname=args$options$output),
-dbms="SQLite"
+dbms = "SQLite"
 )
 invisible(orm$models(models))
 invisible(create_database(orm))
 insert_base_data(orm, args$options$database)
 }
 message(sprintf("Base data inserted using %s.", args$options$database))
 if (!is.null(args$options$archetype)) {
-insert_base_data(orm, args$options$archetype, archetype=TRUE)
+insert_base_data(orm, args$options$archetype, archetype = TRUE)
 }
 if (!is.null(args$options$`compounds-csv`)) {
 insert_compounds(orm, args$options$`compounds-csv`)
 }
 load(args$options$input, rdata <- new.env())
 process_rdata(orm, rdata, args$options)
-quit(status=err_code)
+quit(status = err_code)

Mercurial > repos > lain > xseekerpreparator

comparison XSeekerPreparator.R @ 19:2937e72e5891 draft