Mercurial > repos > lain > xseekerpreparator

diff XSeekerPreparator.R @ 19:2937e72e5891 draft
" master branch Updating"
author: lain
date: Tue, 18 Oct 2022 12:57:28 +0000
parents: 2c7e7fd1f740
children: ce94e7a141bb
--- a/XSeekerPreparator.R	Tue Feb 01 18:09:11 2022 +0000
+++ b/XSeekerPreparator.R	Tue Oct 18 12:57:28 2022 +0000
@@ -1,17 +1,27 @@
 
 
-TOOL_NAME <- "XSeekerPreparator"
-VERSION <- "1.2.4"
+assign("TOOL_NAME", "XSeekerPreparator", envir = globalenv())
+lockBinding("TOOL_NAME", globalenv())
+assign("VERSION", "1.3.0", envir = globalenv())
+lockBinding("VERSION", globalenv())
+assign("DEBUG_FAST", FALSE, envir = globalenv())
+lockBinding("DEBUG_FAST", globalenv())
+assign("DEBUG_FAST_IGNORE_SLOW_OP", DEBUG_FAST, envir = globalenv())
+lockBinding("DEBUG_FAST_IGNORE_SLOW_OP", globalenv())
+assign("PROCESS_SMOL_BATCH", DEBUG_FAST, envir = globalenv())
+lockBinding("PROCESS_SMOL_BATCH", globalenv())
+assign("FAST_FEATURE_RATIO", 10, envir = globalenv())
+lockBinding("FAST_FEATURE_RATIO", globalenv())
+assign("OUTPUT_SPECIFIC_TOOL", "XSeeker_Galaxy", envir = globalenv())
+lockBinding("OUTPUT_SPECIFIC_TOOL", globalenv())
 
-DEBUG_FAST <- FALSE
-DEBUG_FAST_IGNORE_SLOW_OP <- DEBUG_FAST
-PROCESS_SMOL_BATCH <- DEBUG_FAST
-FAST_FEATURE_RATIO <- 10
-
-OUTPUT_SPECIFIC_TOOL <- "XSeeker_Galaxy"
-
-ENRICHED_RDATA_VERSION <- paste("1.2.4", OUTPUT_SPECIFIC_TOOL, sep="-")
-ENRICHED_RDATA_DOC <- sprintf("
+assign(
+    "ENRICHED_RDATA_VERSION",
+    paste(VERSION, OUTPUT_SPECIFIC_TOOL, sep = "-"),
+    envir = globalenv()
+)
+lockBinding("ENRICHED_RDATA_VERSION", globalenv())
+assign("ENRICHED_RDATA_DOC", sprintf("
 Welcome to the enriched <Version %s> of the output of CAMERA/xcms.
 This doc was generated by the tool: %s - Version %s
 To show the different variables contained in this rdata, type:
@@ -41,7 +51,11 @@
   - enriched_rdata_version:
     - Description: A flag created by that tool to tell which version of
         this tool has enriched the rdata.
-    - Retrieval method: enriched_rdata_version <- sprintf(\"%s\", ENRICHED_RDATA_VERSION)
+    - Retrieval method:
+      enriched_rdata_version <- sprintf(
+          \"%s\",
+          ENRICHED_RDATA_VERSION
+      )
 
   - enriched_rdata_doc:
     - Description: Contains the documentation string.
@@ -81,7 +95,10 @@
   - polarity:
     - Description: Those are the polarity values from the original mzxml
         file, extracted using xcms 2.
-    - Retrieval method: as.character(xcms::xcmsRaw('original_file.mzxml')@polarity[[1]])
+    - Retrieval method:
+        as.character(xcms::xcmsRaw(
+            'original_file.mzxml'
+        )@polarity[[1]])
     - xcms version: 2.0
 
 Data taken from incoming rdata
@@ -103,12 +120,18 @@
         process_params <- list()
         for (list_name in names(rdata_file$listOFlistArguments)) {
             param_list <- list()
-            for (param_name in names(rdata_file$listOFlistArguments[[list_name]])) {
-                param_list[[param_name]] <- rdata_file$listOFlistArguments[[list_name]][[param_name]]
+            for (param_name in names(
+                    rdata_file$listOFlistArguments[[list_name]]
+            )) {
+                param_list[[param_name]] <- rdata_file$listOFlistArguments[[
+                    list_name
+                ]][[param_name]]
             }
             process_params[[length(process_params)+1]] <- param_list
         }
-", ENRICHED_RDATA_VERSION, TOOL_NAME, VERSION, ENRICHED_RDATA_VERSION)
+", ENRICHED_RDATA_VERSION, TOOL_NAME, VERSION, ENRICHED_RDATA_VERSION),
+envir = globalenv())
+lockBinding("ENRICHED_RDATA_DOC", globalenv())
 
 
 
@@ -120,21 +143,21 @@
     }
     ## galaxy mangles the "@" to a "__at__"
     if (substr(path, 1, 9) == "git__at__") {
-        path <- sub("^git__at__", "git@", path, perl=TRUE)
+        path <- sub("^git__at__", "git@", path, perl = TRUE)
     }
     if (
         substr(path, 1, 4) == "git@"
-        || substr(path, length(path)-4, 4) == ".git"
+        || substr(path, length(path) - 4, 4) == ".git"
     ) {
-        return (get_models_from_git(path))
+        return(get_models_from_git(path))
     }
     if (substr(path, 1, 4) == "http") {
-        return (get_models_from_url(path))
+        return(get_models_from_url(path))
     }
-    return (source(path)$value)
+    return(source(path)$value)
 }
 
-get_models_from_git <- function (url, target_file="models.R", rm=TRUE) {
+get_models_from_git <- function(url, target_file = "models.R", rm = TRUE) {
     tmp <- tempdir()
     message(sprintf("Cloning %s", url))
     system2("git", c("clone", url, tmp))
@@ -142,12 +165,12 @@
     if (!is.null(result)) {
         models <- source(result)$value
         if (rm) {
-            unlink(tmp, recursive=TRUE)
+            unlink(tmp, recursive = TRUE)
         }
-        return (models)
+        return(models)
     }
     if (rm) {
-        unlink(tmp, recursive=TRUE)
+        unlink(tmp, recursive = TRUE)
     }
     stop(sprintf(
         "Could not find any file named \"%s\" in this repo",
@@ -155,19 +178,19 @@
     ))
 }
 
-get_models_from_url <- function (url, target_file="models.R", rm=TRUE) {
+get_models_from_url <- function(url, target_file = "models.R", rm = TRUE) {
     tmp <- tempdir()
     message(sprintf("Downloading %s", url))
     result <- file.path(tmp, target_file)
-    if (download.file(url, destfile=result) == 0) {
+    if (download.file(url, destfile = result) == 0) {
         models <- source(result)$value
         if (rm) {
-            unlink(tmp, recursive=TRUE)
+            unlink(tmp, recursive = TRUE)
         }
-        return (models)
+        return(models)
     }
     if (rm) {
-        unlink(tmp, recursive=TRUE)
+        unlink(tmp, recursive = TRUE)
     }
     stop("Could not download any file at this adress.")
 }
@@ -178,162 +201,162 @@
         if (is.dir(file)) {
             result <- search_tree(file.path(path, file), target)
             if (!is.null(result)) {
-                return (result)
+                return(result)
             }
         } else if (tolower(file) == target) {
-            return (file.path(path, file))
+            return(file.path(path, file))
         }
     }
-    return (NULL)
+    return(NULL)
 }
 
 create_database <- function(orm) {
-    orm$recreate_database(no_exists=FALSE)
+    orm$recreate_database(no_exists = FALSE)
     set_database_version(orm, "created")
 }
 
 insert_adducts <- function(orm) {
     message("Creating adducts...")
     adducts <- list(
-        list("[M-H2O-H]-",1,-1,-48.992020312000001069,1,0,0.5,"H0","H1O3"),
-        list("[M-H-Cl+O]-",1,-1,-19.981214542000000022,2,0,0.5,"O1","H1Cl1"),
-        list("[M-Cl+O]-",1,-1,-18.973389510000000512,3,0,0.5,"O1","Cl1"),
-        list("[M-3H]3-",1,-3,-3.0218293560000000219,4,0,1.0,"H0","H3"),
-        list("[2M-3H]3-",2,-3,-3.0218293560000000219,4,0,0.5,"H0","H3"),
-        list("[3M-3H]3-",3,-3,-3.0218293560000000219,4,0,0.5,"H0","H3"),
-        list("[M-2H]2-",1,-2,-2.0145529039999998666,5,0,1.0,"H0","H2"),
-        list("[2M-2H]2-",2,-2,-2.0145529039999998666,5,0,0.5,"H0","H2"),
-        list("[3M-2H]2-",3,-2,-2.0145529039999998666,5,0,0.5,"H0","H2"),
-        list("[M-H]-",1,-1,-1.0072764519999999333,6,1,1.0,"H0","H1"),
-        list("[2M-H]-",2,-1,-1.0072764519999999333,6,0,0.5,"H0","H1"),
-        list("[3M-H]-",3,-1,-1.0072764519999999333,6,0,0.5,"H0","H1"),
-        list("[M]+",1,1,-0.00054858000000000000945,7,1,1.0,"H0","H0"),
-        list("[M]-",1,-1,0.00054858000000000000945,8,1,1.0,"H0","H0"),
-        list("[M+H]+",1,1,1.0072764519999999333,9,1,1.0,"H1","H0"),
-        list("[2M+H]+",2,1,1.0072764519999999333,9,0,0.5,"H1","H0"),
-        list("[3M+H]+",3,1,1.0072764519999999333,9,0,0.25,"H1","H0"),
-        list("[M+2H]2+",1,2,2.0145529039999998666,10,0,0.75,"H2","H0"),
-        list("[2M+2H]2+",2,2,2.0145529039999998666,10,0,0.5,"H2","H0"),
-        list("[3M+2H]2+",3,2,2.0145529039999998666,10,0,0.25,"H2","H0"),
-        list("[M+3H]3+",1,3,3.0218293560000000219,11,0,0.75,"H3","H0"),
-        list("[2M+3H]3+",2,3,3.0218293560000000219,11,0,0.5,"H3","H0"),
-        list("[3M+3H]3+",3,3,3.0218293560000000219,11,0,0.25,"H3","H0"),
-        list("[M-2H+NH4]-",1,-1,16.019272654000001665,12,0,0.25,"N1H4","H2"),
-        list("[2M-2H+NH4]-",2,-1,16.019272654000001665,12,0,0.0,"N1H4","H2"),
-        list("[3M-2H+NH4]-",3,-1,16.019272654000001665,12,0,0.25,"N1H4","H2"),
-        list("[M+NH4]+",1,1,18.033825558000000199,13,1,1.0,"N1H4","H0"),
-        list("[2M+NH4]+",2,1,18.033825558000000199,13,0,0.5,"N1H4","H0"),
-        list("[3M+NH4]+",3,1,18.033825558000000199,13,0,0.25,"N1H4","H0"),
-        list("[M+H+NH4]2+",1,2,19.041102009999999467,14,0,0.5,"N1H5","H0"),
-        list("[2M+H+NH4]2+",2,2,19.041102009999999467,14,0,0.5,"N1H5","H0"),
-        list("[3M+H+NH4]2+",3,2,19.041102009999999467,14,0,0.25,"N1H5","H0"),
-        list("[M+Na-2H]-",1,-1,20.974668176000001551,15,0,0.75,"Na1","H2"),
-        list("[2M-2H+Na]-",2,-1,20.974668176000001551,15,0,0.25,"Na1","H2"),
-        list("[3M-2H+Na]-",3,-1,20.974668176000001551,15,0,0.25,"Na1","H2"),
-        list("[M+Na]+",1,1,22.989221080000000086,16,1,1.0,"Na1","H0"),
-        list("[2M+Na]+",2,1,22.989221080000000086,16,0,0.5,"Na1","H0"),
-        list("[3M+Na]+",3,1,22.989221080000000086,16,0,0.25,"Na1","H0"),
-        list("[M+H+Na]2+",1,2,23.996497531999999353,17,0,0.5,"Na1H1","H0"),
-        list("[2M+H+Na]2+",2,2,23.996497531999999353,17,0,0.5,"Na1H1","H0"),
-        list("[3M+H+Na]2+",3,2,23.996497531999999353,17,0,0.25,"Na1H1","H0"),
-        list("[M+2H+Na]3+",1,3,25.003773983999998619,18,0,0.25,"H2Na1","H0"),
-        list("[M+CH3OH+H]+",1,1,33.033491200000000276,19,0,0.25,"C1O1H5","H0"),
-        list("[M-H+Cl]2-",1,-2,33.962124838000001148,20,0,1.0,"Cl1","H1"),
-        list("[2M-H+Cl]2-",2,-2,33.962124838000001148,20,0,0.5,"Cl1","H1"),
-        list("[3M-H+Cl]2-",3,-2,33.962124838000001148,20,0,0.5,"Cl1","H1"),
-        list("[M+Cl]-",1,-1,34.969401290000000416,21,1,1.0,"Cl1","H0"),
-        list("[2M+Cl]-",2,-1,34.969401290000000416,21,0,0.5,"Cl1","H0"),
-        list("[3M+Cl]-",3,-1,34.969401290000000416,21,0,0.5,"Cl1","H0"),
-        list("[M+K-2H]-",1,-1,36.948605415999999479,22,0,0.5,"K1","H2"),
-        list("[2M-2H+K]-",2,-1,36.948605415999999479,22,0,0.0,"K1","H2"),
-        list("[3M-2H+K]-",3,-1,36.948605415999999479,22,0,0.0,"K1","H2"),
-        list("[M+K]+",1,1,38.963158319999998013,23,1,1.0,"K1","H0"),
-        list("[2M+K]+",2,1,38.963158319999998013,23,0,0.5,"K1","H0"),
-        list("[3M+K]+",3,1,38.963158319999998013,23,0,0.25,"K1","H0"),
-        list("[M+H+K]2+",1,2,39.970434771999997281,24,0,0.5,"K1H1","H0"),
-        list("[2M+H+K]2+",2,2,39.970434771999997281,24,0,0.5,"K1H1","H0"),
-        list("[3M+H+K]2+",3,2,39.970434771999997281,24,0,0.25,"K1H1","H0"),
-        list("[M+ACN+H]+",1,1,42.033825557999996646,25,0,0.25,"C2H4N1","H0"),
-        list("[2M+ACN+H]+",2,1,42.033825557999996646,25,0,0.25,"C2H4N1","H0"),
-        list("[M+2Na-H]+",1,1,44.971165708000000902,26,0,0.5,"Na2","H1"),
-        list("[2M+2Na-H]+",2,1,44.971165708000000902,26,0,0.25,"Na2","H1"),
-        list("[3M+2Na-H]+",3,1,44.971165708000000902,26,0,0.25,"Na2","H1"),
-        list("[2M+FA-H]-",2,-1,44.998202851999998586,27,0,0.25,"C1O2H2","H1"),
-        list("[M+FA-H]-",1,-1,44.998202851999998586,27,0,0.5,"C1O2H2","H1"),
-        list("[M+2Na]2+",1,2,45.978442160000000172,28,0,0.5,"Na2","H0"),
-        list("[2M+2Na]2+",2,2,45.978442160000000172,28,0,0.5,"Na2","H0"),
-        list("[3M+2Na]2+",3,2,45.978442160000000172,28,0,0.25,"Na2","H0"),
-        list("[M+H+2Na]3+",1,3,46.985718611999999438,29,0,0.25,"H1Na2","H0"),
-        list("[M+H+FA]+",1,1,47.012755755999997122,30,0,0.25,"C1O2H3","H0"),
-        list("[M+Hac-H]-",1,-1,59.013852915999997607,31,0,0.25,"C2O2H4","H1"),
-        list("[2M+Hac-H]-",2,-1,59.013852915999997607,31,0,0.25,"C2O2H4","H1"),
-        list("[M+IsoProp+H]+",1,1,61.064791327999998317,32,0,0.25,"C3H9O1","H0"),
-        list("[M+Na+K]2+",1,2,61.9523793999999981,33,0,0.5,"Na1K1","H0"),
-        list("[2M+Na+K]2+",2,2,61.9523793999999981,33,0,0.5,"Na1K1","H0"),
-        list("[3M+Na+K]2+",3,2,61.9523793999999981,33,0,0.25,"Na1K1","H0"),
-        list("[M+NO3]-",1,-1,61.988366450000000895,34,0,0.5,"N1O3","H0"),
-        list("[M+ACN+Na]+",1,1,64.015770185999997464,35,0,0.25,"C2H3N1Na1","H0"),
-        list("[2M+ACN+Na]+",2,1,64.015770185999997464,35,0,0.25,"C2H3N1Na1","H0"),
-        list("[M+NH4+FA]+",1,1,64.039304861999994502,36,0,0.25,"N1C1O2H6","H0"),
-        list("[M-2H+Na+FA]-",1,-1,66.980147479999999405,37,0,0.5,"NaC1O2H2","H2"),
-        list("[M+3Na]3+",1,3,68.967663239999993153,38,0,0.25,"Na3","H0"),
-        list("[M+Na+FA]+",1,1,68.99470038399999794,39,0,0.25,"Na1C1O2H2","H0"),
-        list("[M+2Cl]2-",1,-2,69.938802580000000832,40,0,1.0,"Cl2","H0"),
-        list("[2M+2Cl]2-",2,-2,69.938802580000000832,40,0,0.5,"Cl2","H0"),
-        list("[3M+2Cl]2-",3,-2,69.938802580000000832,40,0,0.5,"Cl2","H0"),
-        list("[M+2K-H]+",1,1,76.919040187999996758,41,0,0.5,"K2","H1"),
-        list("[2M+2K-H]+",2,1,76.919040187999996758,41,0,0.25,"K2","H1"),
-        list("[3M+2K-H]+",3,1,76.919040187999996758,41,0,0.25,"K2","H1"),
-        list("[M+2K]2+",1,2,77.926316639999996028,42,0,0.5,"K2","H0"),
-        list("[2M+2K]2+",2,2,77.926316639999996028,42,0,0.5,"K2","H0"),
-        list("[3M+2K]2+",3,2,77.926316639999996028,42,0,0.25,"K2","H0"),
-        list("[M+Br]-",1,-1,78.918886479999997619,43,1,1.0,"Br1","H0"),
-        list("[M+Cl+FA]-",1,-1,80.974880593999998268,44,0,0.5,"Cl1C1O2H2","H0"),
-        list("[M+AcNa-H]-",1,-1,80.995797543999998426,45,0,0.25,"C2H3Na1O2","H1"),
-        list("[M+2ACN+2H]2+",1,2,84.067651115999993292,46,0,0.25,"C4H8N2","H0"),
-        list("[M+K+FA]+",1,1,84.968637623999995868,47,0,0.25,"K1C1O2H2","H0"),
-        list("[M+Cl+Na+FA-H]-",1,-1,102.95682522200000619,48,0,0.5,"Cl1Na1C1O2H2","H1"),
-        list("[2M+3H2O+2H]+",2,1,104.03153939599999944,49,0,0.25,"H8O6","H0"),
-        list("[M+TFA-H]-",1,-1,112.98558742000000165,50,0,0.5,"C2F3O2H1","H1"),
-        list("[M+H+TFA]+",1,1,115.00014032400000019,51,0,0.25,"C2F3O2H2","H0"),
-        list("[M+3ACN+2H]2+",1,2,125.09420022199999778,52,0,0.25,"C6H11N3","H0"),
-        list("[M+NH4+TFA]+",1,1,132.02668943000000468,53,0,0.25,"N1C2F3O2H5","H0"),
-        list("[M+Na+TFA]+",1,1,136.98208495200000811,54,0,0.25,"Na1C2F3O2H1","H0"),
-        list("[M+Cl+TFA]-",1,-1,148.96226516199999423,55,0,0.5,"Cl1C2F3O2H1","H0"),
-        list("[M+K+TFA]+",1,1,152.95602219200000604,56,0,0.25,"K1C2F3O2H1","H0")
+        list("[M-H2O-H]-", 1, -1, -48.992020312000001069, 1, 0, 0.5, "H0", "H1O3"),
+        list("[M-H-Cl+O]-", 1, -1, -19.981214542000000022, 2, 0, 0.5, "O1", "H1Cl1"),
+        list("[M-Cl+O]-", 1, -1, -18.973389510000000512, 3, 0, 0.5, "O1", "Cl1"),
+        list("[M-3H]3-", 1, -3, -3.0218293560000000219, 4, 0, 1.0, "H0", "H3"),
+        list("[2M-3H]3-", 2, -3, -3.0218293560000000219, 4, 0, 0.5, "H0", "H3"),
+        list("[3M-3H]3-", 3, -3, -3.0218293560000000219, 4, 0, 0.5, "H0", "H3"),
+        list("[M-2H]2-", 1, -2, -2.0145529039999998666, 5, 0, 1.0, "H0", "H2"),
+        list("[2M-2H]2-", 2, -2, -2.0145529039999998666, 5, 0, 0.5, "H0", "H2"),
+        list("[3M-2H]2-", 3, -2, -2.0145529039999998666, 5, 0, 0.5, "H0", "H2"),
+        list("[M-H]-", 1, -1, -1.0072764519999999333, 6, 1, 1.0, "H0", "H1"),
+        list("[2M-H]-", 2, -1, -1.0072764519999999333, 6, 0, 0.5, "H0", "H1"),
+        list("[3M-H]-", 3, -1, -1.0072764519999999333, 6, 0, 0.5, "H0", "H1"),
+        list("[M]+", 1, 1, -0.00054858000000000000945, 7, 1, 1.0, "H0", "H0"),
+        list("[M]-", 1, -1, 0.00054858000000000000945, 8, 1, 1.0, "H0", "H0"),
+        list("[M+H]+", 1, 1, 1.0072764519999999333, 9, 1, 1.0, "H1", "H0"),
+        list("[2M+H]+", 2, 1, 1.0072764519999999333, 9, 0, 0.5, "H1", "H0"),
+        list("[3M+H]+", 3, 1, 1.0072764519999999333, 9, 0, 0.25, "H1", "H0"),
+        list("[M+2H]2+", 1, 2, 2.0145529039999998666, 10, 0, 0.75, "H2", "H0"),
+        list("[2M+2H]2+", 2, 2, 2.0145529039999998666, 10, 0, 0.5, "H2", "H0"),
+        list("[3M+2H]2+", 3, 2, 2.0145529039999998666, 10, 0, 0.25, "H2", "H0"),
+        list("[M+3H]3+", 1, 3, 3.0218293560000000219, 11, 0, 0.75, "H3", "H0"),
+        list("[2M+3H]3+", 2, 3, 3.0218293560000000219, 11, 0, 0.5, "H3", "H0"),
+        list("[3M+3H]3+", 3, 3, 3.0218293560000000219, 11, 0, 0.25, "H3", "H0"),
+        list("[M-2H+NH4]-", 1, -1, 16.019272654000001665, 12, 0, 0.25, "N1H4", "H2"),
+        list("[2M-2H+NH4]-", 2, -1, 16.019272654000001665, 12, 0, 0.0, "N1H4", "H2"),
+        list("[3M-2H+NH4]-", 3, -1, 16.019272654000001665, 12, 0, 0.25, "N1H4", "H2"),
+        list("[M+NH4]+", 1, 1, 18.033825558000000199, 13, 1, 1.0, "N1H4", "H0"),
+        list("[2M+NH4]+", 2, 1, 18.033825558000000199, 13, 0, 0.5, "N1H4", "H0"),
+        list("[3M+NH4]+", 3, 1, 18.033825558000000199, 13, 0, 0.25, "N1H4", "H0"),
+        list("[M+H+NH4]2+", 1, 2, 19.041102009999999467, 14, 0, 0.5, "N1H5", "H0"),
+        list("[2M+H+NH4]2+", 2, 2, 19.041102009999999467, 14, 0, 0.5, "N1H5", "H0"),
+        list("[3M+H+NH4]2+", 3, 2, 19.041102009999999467, 14, 0, 0.25, "N1H5", "H0"),
+        list("[M+Na-2H]-", 1, -1, 20.974668176000001551, 15, 0, 0.75, "Na1", "H2"),
+        list("[2M-2H+Na]-", 2, -1, 20.974668176000001551, 15, 0, 0.25, "Na1", "H2"),
+        list("[3M-2H+Na]-", 3, -1, 20.974668176000001551, 15, 0, 0.25, "Na1", "H2"),
+        list("[M+Na]+", 1, 1, 22.989221080000000086, 16, 1, 1.0, "Na1", "H0"),
+        list("[2M+Na]+", 2, 1, 22.989221080000000086, 16, 0, 0.5, "Na1", "H0"),
+        list("[3M+Na]+", 3, 1, 22.989221080000000086, 16, 0, 0.25, "Na1", "H0"),
+        list("[M+H+Na]2+", 1, 2, 23.996497531999999353, 17, 0, 0.5, "Na1H1", "H0"),
+        list("[2M+H+Na]2+", 2, 2, 23.996497531999999353, 17, 0, 0.5, "Na1H1", "H0"),
+        list("[3M+H+Na]2+", 3, 2, 23.996497531999999353, 17, 0, 0.25, "Na1H1", "H0"),
+        list("[M+2H+Na]3+", 1, 3, 25.003773983999998619, 18, 0, 0.25, "H2Na1", "H0"),
+        list("[M+CH3OH+H]+", 1, 1, 33.033491200000000276, 19, 0, 0.25, "C1O1H5", "H0"),
+        list("[M-H+Cl]2-", 1, -2, 33.962124838000001148, 20, 0, 1.0, "Cl1", "H1"),
+        list("[2M-H+Cl]2-", 2, -2, 33.962124838000001148, 20, 0, 0.5, "Cl1", "H1"),
+        list("[3M-H+Cl]2-", 3, -2, 33.962124838000001148, 20, 0, 0.5, "Cl1", "H1"),
+        list("[M+Cl]-", 1, -1, 34.969401290000000416, 21, 1, 1.0, "Cl1", "H0"),
+        list("[2M+Cl]-", 2, -1, 34.969401290000000416, 21, 0, 0.5, "Cl1", "H0"),
+        list("[3M+Cl]-", 3, -1, 34.969401290000000416, 21, 0, 0.5, "Cl1", "H0"),
+        list("[M+K-2H]-", 1, -1, 36.948605415999999479, 22, 0, 0.5, "K1", "H2"),
+        list("[2M-2H+K]-", 2, -1, 36.948605415999999479, 22, 0, 0.0, "K1", "H2"),
+        list("[3M-2H+K]-", 3, -1, 36.948605415999999479, 22, 0, 0.0, "K1", "H2"),
+        list("[M+K]+", 1, 1, 38.963158319999998013, 23, 1, 1.0, "K1", "H0"),
+        list("[2M+K]+", 2, 1, 38.963158319999998013, 23, 0, 0.5, "K1", "H0"),
+        list("[3M+K]+", 3, 1, 38.963158319999998013, 23, 0, 0.25, "K1", "H0"),
+        list("[M+H+K]2+", 1, 2, 39.970434771999997281, 24, 0, 0.5, "K1H1", "H0"),
+        list("[2M+H+K]2+", 2, 2, 39.970434771999997281, 24, 0, 0.5, "K1H1", "H0"),
+        list("[3M+H+K]2+", 3, 2, 39.970434771999997281, 24, 0, 0.25, "K1H1", "H0"),
+        list("[M+ACN+H]+", 1, 1, 42.033825557999996646, 25, 0, 0.25, "C2H4N1", "H0"),
+        list("[2M+ACN+H]+", 2, 1, 42.033825557999996646, 25, 0, 0.25, "C2H4N1", "H0"),
+        list("[M+2Na-H]+", 1, 1, 44.971165708000000902, 26, 0, 0.5, "Na2", "H1"),
+        list("[2M+2Na-H]+", 2, 1, 44.971165708000000902, 26, 0, 0.25, "Na2", "H1"),
+        list("[3M+2Na-H]+", 3, 1, 44.971165708000000902, 26, 0, 0.25, "Na2", "H1"),
+        list("[2M+FA-H]-", 2, -1, 44.998202851999998586, 27, 0, 0.25, "C1O2H2", "H1"),
+        list("[M+FA-H]-", 1, -1, 44.998202851999998586, 27, 0, 0.5, "C1O2H2", "H1"),
+        list("[M+2Na]2+", 1, 2, 45.978442160000000172, 28, 0, 0.5, "Na2", "H0"),
+        list("[2M+2Na]2+", 2, 2, 45.978442160000000172, 28, 0, 0.5, "Na2", "H0"),
+        list("[3M+2Na]2+", 3, 2, 45.978442160000000172, 28, 0, 0.25, "Na2", "H0"),
+        list("[M+H+2Na]3+", 1, 3, 46.985718611999999438, 29, 0, 0.25, "H1Na2", "H0"),
+        list("[M+H+FA]+", 1, 1, 47.012755755999997122, 30, 0, 0.25, "C1O2H3", "H0"),
+        list("[M+Hac-H]-", 1, -1, 59.013852915999997607, 31, 0, 0.25, "C2O2H4", "H1"),
+        list("[2M+Hac-H]-", 2, -1, 59.013852915999997607, 31, 0, 0.25, "C2O2H4", "H1"),
+        list("[M+IsoProp+H]+", 1, 1, 61.064791327999998317, 32, 0, 0.25, "C3H9O1", "H0"),
+        list("[M+Na+K]2+", 1, 2, 61.9523793999999981, 33, 0, 0.5, "Na1K1", "H0"),
+        list("[2M+Na+K]2+", 2, 2, 61.9523793999999981, 33, 0, 0.5, "Na1K1", "H0"),
+        list("[3M+Na+K]2+", 3, 2, 61.9523793999999981, 33, 0, 0.25, "Na1K1", "H0"),
+        list("[M+NO3]-", 1, -1, 61.988366450000000895, 34, 0, 0.5, "N1O3", "H0"),
+        list("[M+ACN+Na]+", 1, 1, 64.015770185999997464, 35, 0, 0.25, "C2H3N1Na1", "H0"),
+        list("[2M+ACN+Na]+", 2, 1, 64.015770185999997464, 35, 0, 0.25, "C2H3N1Na1", "H0"),
+        list("[M+NH4+FA]+", 1, 1, 64.039304861999994502, 36, 0, 0.25, "N1C1O2H6", "H0"),
+        list("[M-2H+Na+FA]-", 1, -1, 66.980147479999999405, 37, 0, 0.5, "NaC1O2H2", "H2"),
+        list("[M+3Na]3+", 1, 3, 68.967663239999993153, 38, 0, 0.25, "Na3", "H0"),
+        list("[M+Na+FA]+", 1, 1, 68.99470038399999794, 39, 0, 0.25, "Na1C1O2H2", "H0"),
+        list("[M+2Cl]2-", 1, -2, 69.938802580000000832, 40, 0, 1.0, "Cl2", "H0"),
+        list("[2M+2Cl]2-", 2, -2, 69.938802580000000832, 40, 0, 0.5, "Cl2", "H0"),
+        list("[3M+2Cl]2-", 3, -2, 69.938802580000000832, 40, 0, 0.5, "Cl2", "H0"),
+        list("[M+2K-H]+", 1, 1, 76.919040187999996758, 41, 0, 0.5, "K2", "H1"),
+        list("[2M+2K-H]+", 2, 1, 76.919040187999996758, 41, 0, 0.25, "K2", "H1"),
+        list("[3M+2K-H]+", 3, 1, 76.919040187999996758, 41, 0, 0.25, "K2", "H1"),
+        list("[M+2K]2+", 1, 2, 77.926316639999996028, 42, 0, 0.5, "K2", "H0"),
+        list("[2M+2K]2+", 2, 2, 77.926316639999996028, 42, 0, 0.5, "K2", "H0"),
+        list("[3M+2K]2+", 3, 2, 77.926316639999996028, 42, 0, 0.25, "K2", "H0"),
+        list("[M+Br]-", 1, -1, 78.918886479999997619, 43, 1, 1.0, "Br1", "H0"),
+        list("[M+Cl+FA]-", 1, -1, 80.974880593999998268, 44, 0, 0.5, "Cl1C1O2H2", "H0"),
+        list("[M+AcNa-H]-", 1, -1, 80.995797543999998426, 45, 0, 0.25, "C2H3Na1O2", "H1"),
+        list("[M+2ACN+2H]2+", 1, 2, 84.067651115999993292, 46, 0, 0.25, "C4H8N2", "H0"),
+        list("[M+K+FA]+", 1, 1, 84.968637623999995868, 47, 0, 0.25, "K1C1O2H2", "H0"),
+        list("[M+Cl+Na+FA-H]-", 1, -1, 102.95682522200000619, 48, 0, 0.5, "Cl1Na1C1O2H2", "H1"),
+        list("[2M+3H2O+2H]+", 2, 1, 104.03153939599999944, 49, 0, 0.25, "H8O6", "H0"),
+        list("[M+TFA-H]-", 1, -1, 112.98558742000000165, 50, 0, 0.5, "C2F3O2H1", "H1"),
+        list("[M+H+TFA]+", 1, 1, 115.00014032400000019, 51, 0, 0.25, "C2F3O2H2", "H0"),
+        list("[M+3ACN+2H]2+", 1, 2, 125.09420022199999778, 52, 0, 0.25, "C6H11N3", "H0"),
+        list("[M+NH4+TFA]+", 1, 1, 132.02668943000000468, 53, 0, 0.25, "N1C2F3O2H5", "H0"),
+        list("[M+Na+TFA]+", 1, 1, 136.98208495200000811, 54, 0, 0.25, "Na1C2F3O2H1", "H0"),
+        list("[M+Cl+TFA]-", 1, -1, 148.96226516199999423, 55, 0, 0.5, "Cl1C2F3O2H1", "H0"),
+        list("[M+K+TFA]+", 1, 1, 152.95602219200000604, 56, 0, 0.25, "K1C2F3O2H1","H0")
     )
     dummy_adduct <- orm$adduct()
     for (adduct in adducts) {
         i <- 0
-        dummy_adduct$set_name(adduct[[i <- i+1]])
-        dummy_adduct$set_multi(adduct[[i <- i+1]])
-        dummy_adduct$set_charge(adduct[[i <- i+1]])
-        dummy_adduct$set_mass(adduct[[i <- i+1]])
-        dummy_adduct$set_oidscore(adduct[[i <- i+1]])
-        dummy_adduct$set_quasi(adduct[[i <- i+1]])
-        dummy_adduct$set_ips(adduct[[i <- i+1]])
-        dummy_adduct$set_formula_add(adduct[[i <- i+1]])
-        dummy_adduct$set_formula_ded(adduct[[i <- i+1]])
+        dummy_adduct$set_name(adduct[[i <- i + 1]])
+        dummy_adduct$set_multi(adduct[[i <- i + 1]])
+        dummy_adduct$set_charge(adduct[[i <- i + 1]])
+        dummy_adduct$set_mass(adduct[[i <- i + 1]])
+        dummy_adduct$set_oidscore(adduct[[i <- i + 1]])
+        dummy_adduct$set_quasi(adduct[[i <- i + 1]])
+        dummy_adduct$set_ips(adduct[[i <- i + 1]])
+        dummy_adduct$set_formula_add(adduct[[i <- i + 1]])
+        dummy_adduct$set_formula_ded(adduct[[i <- i + 1]])
         invisible(dummy_adduct$save())
-        dummy_adduct$clear(unset_id=TRUE)
+        dummy_adduct$clear(unset_id = TRUE)
     }
     message("Adducts created")
 }
 
-insert_base_data <- function(orm, path, archetype=FALSE) {
+insert_base_data <- function(orm, path, archetype = FALSE) {
     if (archetype) {
         ## not implemented yet
-        return ()
+        return()
     }
     base_data <- readLines(path)
-    for (sql in strsplit(paste(base_data, collapse=" "), ";")[[1]]) {
+    for (sql in strsplit(paste(base_data, collapse = " "), ";")[[1]]) {
         orm$execute(sql)
     }
     set_database_version(orm, "enriched")
 }
 
 insert_compounds <- function(orm, compounds_path) {
-    compounds <- read.csv(file=compounds_path, sep="\t")
+    compounds <- read.csv(file = compounds_path, sep = "\t")
     if (is.null(compounds <- translate_compounds(compounds))) {
         stop("Could not find asked compound's attributes in csv file.")
     }
@@ -344,18 +367,21 @@
         dummy_compound$set_name(compounds[i, "name"])
         dummy_compound$set_common_name(compounds[i, "common_name"])
         dummy_compound$set_formula(compounds[i, "formula"])
-        compound_list[[length(compound_list)+1]] <- as.list(
+        compound_list[[length(compound_list) + 1]] <- as.list(
             dummy_compound,
             c("mz", "name", "common_name", "formula")
         )
-        dummy_compound$clear(unset_id=TRUE)
+        dummy_compound$clear(unset_id = TRUE)
     }
-    invisible(dummy_compound$save(bulk=compound_list))
+    invisible(dummy_compound$save(bulk = compound_list))
 }
 
 translate_compounds <- function(compounds) {
     recognized_headers <- list(
-        c("HMDB_ID", "MzBank", "X.M.H..", "X.M.H...1", "MetName", "ChemFormula", "INChIkey")
+        c(
+            "HMDB_ID", "MzBank", "X.M.H..", "X.M.H...1",
+            "MetName", "ChemFormula", "INChIkey"
+        )
     )
     header_translators <- list(
         hmdb_header_translator
@@ -363,23 +389,23 @@
     for (index in seq_along(recognized_headers)) {
         headers <- recognized_headers[[index]]
         if (identical(colnames(compounds), headers)) {
-            return (header_translators[[index]](compounds))
+            return(header_translators[[index]](compounds))
         }
     }
     if (is.null(translator <- guess_translator(colnames(compounds)))) {
-        return (NULL)
+        return(NULL)
     }
-    return (csv_header_translator(translator, compounds))
+    return(csv_header_translator(translator, compounds))
 }
 
 guess_translator <- function(header) {
     result <- list(
-        # HMDB_ID=NULL,
-        mz=NULL,
-        name=NULL,
-        common_name=NULL,
-        formula=NULL,
-        # inchi_key=NULL
+        # HMDB_ID = NULL,
+        mz = NULL,
+        name = NULL,
+        common_name = NULL,
+        formula = NULL,
+        # inchi_key = NULL
     )
     asked_cols <- names(result)
     for (asked_col in asked_cols) {
@@ -395,39 +421,39 @@
         }
     }
     if (any(mapply(is.null, result))) {
-        return (NULL)
+        return(NULL)
     }
-    return (result)
+    return(result)
 }
 
 hmdb_header_translator <- function(compounds) {
-    return (csv_header_translator(
+    return(csv_header_translator(
         list(
-            HMDB_ID="HMDB_ID",
-            mz="MzBank",
-            name="MetName",
-            common_name="MetName",
-            formula="ChemFormula",
-            inchi_key="INChIkey"
+            HMDB_ID = "HMDB_ID",
+            mz = "MzBank",
+            name = "MetName",
+            common_name = "MetName",
+            formula = "ChemFormula",
+            inchi_key = "INChIkey"
         ), compounds
     ))
 }
 
 csv_header_translator <- function(translation_table, csv) {
     header_names <- names(translation_table)
-    result <- data.frame(1:nrow(csv))
+    result <- data.frame(seq_len(nrow(csv)))
     for (i in seq_along(header_names)) {
         result[, header_names[[i]]] <- csv[, translation_table[[i]]]
     }
     result[, "mz"] <- as.numeric(result[, "mz"])
-    return (result)
+    return(result)
 }
 
 set_database_version <- function(orm, version) {
     orm$set_tag(
         version,
-        tag_name="database_version",
-        tag_table_name="XSeeker_tagging_table"
+        tag_name = "database_version",
+        tag_table_name = "XSeeker_tagging_table"
     )
 }
 
@@ -444,15 +470,16 @@
     error <- tryCatch({
         process_sample_list(
             orm, rdata, samples,
-            show_percent=show_percent
+            show_percent = show_percent,
+            file_grouping_var = options$class
         )
         NULL
-    }, error=function(e) {
+    }, error = function(e) {
         message(e)
         e
     })
     if (!is.null(mzml_tmp_dir)) {
-        unlink(mzml_tmp_dir, recursive=TRUE)
+        unlink(mzml_tmp_dir, recursive = TRUE)
     }
     if (!is.null(error)) {
         stop(error)
@@ -463,23 +490,49 @@
     if (is.null(rdata$singlefile)) {
         message("Extracting mxml files")
         tmp <- tempdir()
-        rdata$singlefile <- utils::unzip(rdata$zipfile, exdir=tmp)
-        names(rdata$singlefile) <- tools::file_path_sans_ext(basename(rdata$singlefile))
+        rdata$singlefile <- utils::unzip(rdata$zipfile, exdir = tmp)
+        names(rdata$singlefile) <- tools::file_path_sans_ext(
+            basename(rdata$singlefile)
+        )
         message("Extracted")
-        return (tmp)
+        return(tmp)
     } else {
-        message(sprintf("Not a zip file, loading files directly from path: %s", paste(rdata$singlefile, collapse=" ; ")))
+        message(sprintf(
+            "Not a zip file, loading files directly from path: %s",
+            paste(rdata$singlefile, collapse = " ; ")
+        ))
     }
-    return (NULL)
+    return(NULL)
 }
 
-process_sample_list <- function(orm, radta, sample_names, show_percent) {
-    file_grouping_var <- find_grouping_var(rdata$variableMetadata)
+process_sample_list <- function(
+    orm,
+    rdata,
+    sample_names,
+    show_percent,
+    file_grouping_var = NULL
+) {
+    if (is.null(file_grouping_var)) {
+        file_grouping_var <- find_grouping_var(rdata$variableMetadata)
+        if (is.null(file_grouping_var)) {
+            stop("Malformed variableMetada.")
+        }
+    }
+    tryCatch({
+        headers <- colnames(rdata$variableMetadata)
+        file_grouping_var <- headers[[as.numeric(file_grouping_var)]]
+    }, error = function(e) NULL)
+    if (
+        is.null(file_grouping_var)
+        || !(file_grouping_var %in% colnames(rdata$variableMetadata))
+    ) {
+        stop(sprintf(
+            "Could not find grouping variable %s in var meta file.",
+            file_grouping_var
+        ))
+    }
     message("Processing samples.")
     message(sprintf("File grouping variable: %s", file_grouping_var))
-    if(is.null(file_grouping_var)) {
-        stop("Malformed variableMetada.")
-    }
 
     context <- new.env()
     context$samples <- list()
@@ -492,7 +545,6 @@
 
     process_params <- list()
     if (is.null(process_arg_list)) {
-        histories <- list()
         for (history in xcms_set@.processHistory) {
             if (
                 class(history@param) == "CentWaveParam"
@@ -500,19 +552,23 @@
             ) {
                 params <- history@param
                 process_params <- list(list(
-                    xfunction="annotatediff",
-                    ppm=params@ppm,
-                    peakwidth=sprintf("%s - %s", params@peakwidth[[1]], params@peakwidth[[2]]),
-                    snthresh=params@snthresh,
-                    prefilterStep=params@prefilter[[1]],
-                    prefilterLevel=params@prefilter[[2]],
-                    mzdiff=params@mzdiff,
-                    fitgauss=params@fitgauss,
-                    noise=params@noise,
-                    mzCenterFun=params@mzCenterFun,
-                    integrate=params@integrate,
-                    firstBaselineCheck=params@firstBaselineCheck,
-                    snthreshIsoROIs=!identical(params@roiScales, numeric(0))
+                    xfunction = "annotatediff",
+                    ppm = params@ppm,
+                    peakwidth = sprintf(
+                        "%s - %s",
+                        params@peakwidth[[1]],
+                        params@peakwidth[[2]]
+                    ),
+                    snthresh = params@snthresh,
+                    prefilterStep = params@prefilter[[1]],
+                    prefilterLevel = params@prefilter[[2]],
+                    mzdiff = params@mzdiff,
+                    fitgauss = params@fitgauss,
+                    noise = params@noise,
+                    mzCenterFun = params@mzCenterFun,
+                    integrate = params@integrate,
+                    firstBaselineCheck = params@firstBaselineCheck,
+                    snthreshIsoROIs = !identical(params@roiScales, numeric(0))
                 ))
                 break
             }
@@ -521,9 +577,11 @@
         for (list_name in names(process_arg_list)) {
             param_list <- list()
             for (param_name in names(process_arg_list[[list_name]])) {
-                param_list[[param_name]] <- process_arg_list[[list_name]][[param_name]]
+                param_list[[param_name]] <- process_arg_list[[
+                    list_name
+                ]][[param_name]]
             }
-            process_params[[length(process_params)+1]] <- param_list
+            process_params[[length(process_params) + 1]] <- param_list
         }
     }
 
@@ -531,6 +589,17 @@
 
 
     indices <- as.numeric(unique(var_meta[, file_grouping_var]))
+    if (any(is.null(names(singlefile)[indices]))) {
+        stop(sprintf(
+            paste(
+                "Indices defined by grouping variable %s are not all present",
+                "in singlefile names (%s).\nCannot continue. Indices: %s"
+            ),
+            file_grouping_var,
+            paste(names(singlefile), collapse = ", "),
+            paste(indices, collapse = ", ")
+        ))
+    }
     smol_xcms_set <- orm$smol_xcms_set()
     mz_tab_info <- new.env()
     g <- xcms::groups(xcms_set)
@@ -538,10 +607,16 @@
     mz_tab_info$dataset_path <- xcms::filepaths(xcms_set)
     mz_tab_info$sampnames <- xcms::sampnames(xcms_set)
     mz_tab_info$sampclass <- xcms::sampclass(xcms_set)
-    mz_tab_info$rtmed <- g[,"rtmed"]
-    mz_tab_info$mzmed <- g[,"mzmed"]
-    mz_tab_info$smallmolecule_abundance_assay <- xcms::groupval(xcms_set, value="into")
-    blogified <- blob::blob(fst::compress_fst(serialize(mz_tab_info, NULL), compression=100))
+    mz_tab_info$rtmed <- g[, "rtmed"]
+    mz_tab_info$mzmed <- g[, "mzmed"]
+    mz_tab_info$smallmolecule_abundance_assay <- xcms::groupval(
+        xcms_set,
+        value = "into"
+    )
+    blogified <- blob::blob(fst::compress_fst(
+        serialize(mz_tab_info, NULL),
+        compression = 100
+    ))
     rm(mz_tab_info)
 
     invisible(smol_xcms_set$set_raw(blogified)$save())
@@ -582,9 +657,9 @@
         env$enriched_rdata_doc <- ENRICHED_RDATA_DOC
 
         sample <- add_sample_to_database(orm, env, context, smol_xcms_set_id)
-        rm (env)
+        rm(env)
         context$samples[no] <- sample$get_id()
-        rm (sample)
+        rm(sample)
     }
     context$clusters <- list()
     context$show_percent <- show_percent
@@ -597,28 +672,32 @@
     message("Features enrichment")
     complete_features(orm, clusters, show_percent)
     message("Features enrichment done.")
-    return (NULL)
+    return(NULL)
 }
 
 find_grouping_var <- function(var_meta) {
-    known_colnames = c(
+    known_colnames <- c(
         "name", "namecustom", "mz", "mzmin", "mzmax",
-        "rt", "rtmin", "rtmax", "npeaks", "isotopes", "adduct", "pcgroup"
+        "rt", "rtmin", "rtmax", "npeaks", "isotopes", "adduct",
+        "pcgroup", "ms_level"
     )
     col_names <- colnames(var_meta)
-    classes = list()
+    classes <- list()
     for (name in col_names) {
         if (!(name %in% known_colnames)) {
-            classes[[length(classes)+1]] = name
+            classes[[length(classes) + 1]] <- name
         }
     }
     if (length(classes) > 1) {
-        stop(sprintf("Only one class expected in the variable metadata. Found %d .", length(classes)))
+        stop(sprintf(
+            "Only one class expected in the variable metadata. Found %d .",
+            length(classes)
+        ))
     }
     if (length(classes) == 0) {
         stop("Could not find any class column in your variableMetadata.")
     }
-    return (classes[[1]])
+    return(classes[[1]])
 }
 
 add_sample_to_database <- function(orm, env, context, smol_xcms_set_id) {
@@ -629,12 +708,15 @@
         $set_path(env$dataset_path)
         $set_kind("enriched_rdata")
         $set_polarity(
-            if (is.null(env$polarity) || identical(env$polarity, character(0))) ""
+            if (
+                is.null(env$polarity)
+                || identical(env$polarity, character(0))
+            ) ""
             else env$polarity
         )
         $set_raw(blob::blob(fst::compress_fst(
             serialize(env, NULL),
-            compression=100
+            compression = 100
         )))
     )
     sample[["smol_xcms_set_id"]] <- smol_xcms_set_id
@@ -642,7 +724,7 @@
     sample <- sample$save()
     load_process_params(orm, sample, env$process_params)
     message(sprintf("Sample %s inserted.", env$sample_name))
-    return (sample)
+    return(sample)
 }
 
 
@@ -660,14 +742,14 @@
         next_pc_group, next_align_group
     ))
     message("Extracting features done.")
-    return (NULL)
+    return(NULL)
 }
 
 get_next_id <- function(models, attribute) {
     if ((id <- models$max(attribute)) == Inf || id == -Inf) {
-        return (0)
+        return(0)
     }
-    return (id)
+    return(id)
 }
 
 create_features <- function(
@@ -676,7 +758,7 @@
     next_pc_group, next_align_group
 ) {
     field_names <- as.list(names(orm$feature()$fields__))
-    field_names[field_names=="id"] <- NULL
+    field_names[field_names == "id"] <- NULL
 
     features <- list()
     dummy_feature <- orm$feature()
@@ -688,13 +770,13 @@
     rows <- seq_len(nrow(var_meta))
     if (PROCESS_SMOL_BATCH) {
 
-        rows <- rows[1:as.integer(FAST_FEATURE_RATIO/100.0 * length(rows))]
+        rows <- rows[1:as.integer(FAST_FEATURE_RATIO / 100.0 * length(rows))]
     }
     cluster_row <- list()
     for (row in rows) {
         if (show_percent && (row / total) * 100 > percent) {
             percent <- percent + 1
-            message("\r", sprintf("\r%d %%", percent), appendLF=FALSE)
+            message("\r", sprintf("\r%d %%", percent), appendLF = FALSE)
         }
 
         dummy_feature$set_featureID(next_feature_id)
@@ -710,27 +792,44 @@
 
         peak_list <- context$peaks[context$groupidx[[row]], ]
         if (! ("matrix" %in% class(peak_list))) {
-            peak_list <- matrix(peak_list, nrow=1, ncol=length(peak_list), dimnames=list(c(), names(peak_list)))
+            peak_list <- matrix(
+                peak_list,
+                nrow = 1,
+                ncol = length(peak_list),
+                dimnames = list(c(), names(peak_list))
+            )
         }
 
         clusterID <- as.character(clusterID)
         if (is.null(context$central_feature[[clusterID]])) {
             int_o <- extract_peak_var(peak_list, "into")
             context$central_feature[[clusterID]] <- (
-                peak_list[peak_list[, "into"] == int_o,]["sample"]
+                peak_list[peak_list[, "into"] == int_o, ]["sample"]
             )
         }
 
         if (!DEBUG_FAST_IGNORE_SLOW_OP) {
-            sample_peak_list <- peak_list[as.integer(peak_list[, "sample"]) == context$central_feature[[clusterID]], , drop=FALSE]
-            if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) {
-                if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) {
+            central_feature <- context$central_feature[[clusterID]]
+            sample_peak_list <- peak_list[
+                as.integer(peak_list[, "sample"]) == central_feature,
+                ,
+                drop = FALSE
+            ]
+            if (
+                !identical(sample_peak_list, numeric(0))
+                && !is.null(nrow(sample_peak_list))
+                && nrow(sample_peak_list) != 0
+            ) {
+                int_o <- extract_peak_var(sample_peak_list, "into")
+                if (!is.na(int_o)) {
                     dummy_feature$set_int_o(int_o)
                 }
-                if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) {
+                int_b <- extract_peak_var(sample_peak_list, "intb")
+                if (!is.na(int_b)) {
                     dummy_feature$set_int_b(int_b)
                 }
-                if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) {
+                max_o <- extract_peak_var(sample_peak_list, "maxo")
+                if (!is.na(max_o)) {
                     dummy_feature$set_max_o(max_o)
                 }
             }
@@ -744,13 +843,13 @@
             next_align_group
         )
         next_align_group <- next_align_group + 1
-        features[[length(features)+1]] <- as.list(dummy_feature, field_names)
+        features[[length(features) + 1]] <- as.list(dummy_feature, field_names)
         dummy_feature$clear()
     }
     rm(var_meta)
     message("")
     message("Saving features")
-    invisible(dummy_feature$save(bulk=features))
+    invisible(dummy_feature$save(bulk = features))
 
     ## We link manually clusters to the sample they're in.
     link_cache <- list()
@@ -758,24 +857,25 @@
         sample_nos <- unique(context$peaks[context$groupidx[[row]], "sample"])
         for (sample_id in context$samples[sample_nos]) {
             cluster_id <- cluster_row[[row]]$get_id()
-            if (is.null(link_cache[[id <- paste(sample_id, cluster_id, sep=";")]])) {
+            id <- paste(sample_id, cluster_id, sep = ";")
+            if (is.null(link_cache[[id]])) {
                 link_cache[[id]] <- 1
                 orm$cluster_sample(
-                    sample_id=sample_id,
-                    cluster_id=cluster_id
+                    sample_id = sample_id,
+                    cluster_id = cluster_id
                 )$save()
             }
         }
     }
 
     message("Saved.")
-    return (context$clusters)
+    return(context$clusters)
 }
 
-extract_peak_var <- function(peak_list, var_name, selector=max) {
+extract_peak_var <- function(peak_list, var_name, selector = max) {
     value <- peak_list[, var_name]
     names(value) <- NULL
-    return (selector(value))
+    return(selector(value))
 }
 
 set_feature_fields_from_var_meta <- function(feature, var_meta) {
@@ -800,24 +900,24 @@
     if (!is.null(isotopes <- var_meta[["isotopes"]]) && !is.na(isotopes)) {
         feature$set_iso(isotopes)
     }
-    return (feature)
+    return(feature)
 }
 
 extract_iso  <- function(weird_data) {
     if (grepl("^\\[\\d+\\]", weird_data)[[1]]) {
-        return (sub("^\\[\\d+\\]", "", weird_data, perl=TRUE))
+        return(sub("^\\[\\d+\\]", "", weird_data, perl = TRUE))
     }
-    return (weird_data)
+    return(weird_data)
 }
 
-extract_clusterID <- function(weird_data, next_cluster_id){
+extract_clusterID <- function(weird_data, next_cluster_id) {
     if (grepl("^\\[\\d+\\]", weird_data)[[1]]) {
         clusterID <- stringr::str_extract(weird_data, "^\\[\\d+\\]")
         clusterID <- as.numeric(stringr::str_extract(clusterID, "\\d+"))
     } else {
         clusterID <- 0
     }
-    return (clusterID + next_cluster_id)
+    return(clusterID + next_cluster_id)
 }
 
 create_associated_cluster <- function(
@@ -831,24 +931,29 @@
         adduct_name <- as.character(curent_var_meta[["adduct"]])
         annotation <- curent_var_meta[["isotopes"]]
         cluster <- context$clusters[[clusterID]] <- orm$cluster(
-            pc_group=pcgroup + next_pc_group,
+            pc_group = pcgroup + next_pc_group,
             # adduct=adduct,
-            align_group=next_align_group,
+            align_group = next_align_group,
             # curent_group=curent_group,
-            clusterID=context$clusterID,
-            annotation=annotation
+            clusterID = context$clusterID,
+            annotation = annotation
         )
         if (is.null(adduct <- context$adducts[[adduct_name]])) {
-            context$adducts[[adduct_name]] <- orm$adduct()$load_by(name=adduct_name)$first()
+            context$adducts[[adduct_name]] <- orm$adduct()$load_by(
+                name = adduct_name
+            )$first()
             if (is.null(adduct <- context$adducts[[adduct_name]])) {
-                adduct <- context$adducts[[adduct_name]] <- orm$adduct(name=adduct_name, charge=0)
+                adduct <- context$adducts[[adduct_name]] <- orm$adduct(
+                    name = adduct_name,
+                    charge = 0
+                )
                 adduct$save()
             }
         }
         cluster$set_adduct(adduct)
-        ## Crappy hack to assign sample id to cluster without loading the sample.
-        ## Samples are too big (their sample$env) and slows the process, and eat all the menory
-        ## so we dont't want to load them.
+        ## Crappy hack to assign sample id to cluster without loading the
+        ## sample. Samples are too big (their sample$env) and slows the
+        ## process, and eat all the menory so we dont't want to load them.
         cluster[["sample_id"]] <- main_sample_id
         cluster$modified__[["sample_id"]] <- main_sample_id
     } else {
@@ -858,7 +963,7 @@
     }
     cluster$save()
     feature$set_cluster(cluster)
-    return (cluster)
+    return(cluster)
 }
 
 complete_features <- function(orm, clusters, show_percent) {
@@ -866,20 +971,25 @@
     percent <- -1
     i <- 0
     for (cluster in clusters) {
-        i <- i+1
+        i <- i + 1
         if (show_percent && (i / total) * 100 > percent) {
             percent <- percent + 1
-            message("\r", sprintf("\r%d %%", percent), appendLF=FALSE)
+            message("\r", sprintf("\r%d %%", percent), appendLF = FALSE)
         }
-        features <- orm$feature()$load_by(cluster_id=cluster$get_id())
+        features <- orm$feature()$load_by(cluster_id = cluster$get_id())
         if (features$any()) {
             if (!is.null(rt <- features$mean("rt"))) {
                 cluster$set_mean_rt(rt)$save()
             }
             features_df <- as.data.frame(features)
-            central_feature <- features_df[grepl("^\\[M\\]", features_df[, "iso"]), ]
+            central_feature <- features_df[
+                grepl("^\\[M\\]", features_df[, "iso"]),
+            ]
             central_feature_into <- central_feature[["int_o"]]
-            if (!identical(central_feature_into, numeric(0)) && central_feature_into != 0) {
+            if (
+                !identical(central_feature_into, numeric(0))
+                && central_feature_into != 0
+            ) {
                 for (feature in as.vector(features)) {
                     feature$set_abundance(
                         feature$get_int_o() / central_feature_into * 100
@@ -888,7 +998,7 @@
             }
         }
     }
-    return (NULL)
+    return(NULL)
 }
 
 load_process_params <- function(orm, sample, params) {
@@ -900,19 +1010,23 @@
             load_process_params_peak_picking(orm, sample, param_list)
         }
     }
-    return (sample)
+    return(sample)
 }
 
-load_process_params_peak_picking <- function(orm, sample, peak_picking_params) {
-    return (add_sample_process_parameters(
-        params=peak_picking_params,
-        params_translation=list(
-            ppm="ppm",
-            maxcharge="maxCharge",
-            maxiso="maxIso"
+load_process_params_peak_picking <- function(
+    orm,
+    sample,
+    peak_picking_params
+) {
+    return(add_sample_process_parameters(
+        params = peak_picking_params,
+        params_translation = list(
+            ppm = "ppm",
+            maxcharge = "maxCharge",
+            maxiso = "maxIso"
         ),
-        param_model_generator=orm$peak_picking_parameters,
-        sample_param_setter=sample$set_peak_picking_parameters
+        param_model_generator = orm$peak_picking_parameters,
+        sample_param_setter = sample$set_peak_picking_parameters
     ))
 }
 
@@ -937,7 +1051,7 @@
         params_model <- do.call(param_model_generator, model_params)
         params_model$save()
     }
-    return (sample_param_setter(params_model)$save())
+    return(sample_param_setter(params_model)$save())
 }
 
 
@@ -946,56 +1060,67 @@
 option_list <- list(
     optparse::make_option(
         c("-v", "--version"),
-        action="store_true",
-        help="Display this tool's version and exits"
+        action = "store_true",
+        help = "Display this tool's version and exits"
     ),
     optparse::make_option(
         c("-i", "--input"),
-        type="character",
-        help="The rdata path to import in XSeeker"
+        type = "character",
+        help = "The rdata path to import in XSeeker"
     ),
     optparse::make_option(
         c("-s", "--samples"),
-        type="character",
-        help="Samples to visualise in XSeeker"
+        type = "character",
+        help = "Samples to visualise in XSeeker"
     ),
     optparse::make_option(
         c("-B", "--archetype"),
-        type="character",
-        help="The name of the base database"
+        type = "character",
+        help = "The name of the base database"
     ),
     optparse::make_option(
         c("-b", "--database"),
-        type="character",
-        help="The base database's path"
+        type = "character",
+        help = "The base database's path"
     ),
     optparse::make_option(
         c("-c", "--compounds-csv"),
-        type="character",
-        help="The csv containing compounds"
+        type = "character",
+        help = "The csv containing compounds"
     ),
     optparse::make_option(
         c("-m", "--models"),
-        type="character",
-        help="The path or url (must begin with http[s]:// or git@) to the database's models"
+        type = "character",
+        help = paste(
+            "The path or url (must begin with http[s]:// or git@) to",
+            "the database's models"
+        )
     ),
     optparse::make_option(
+        c("-k", "--class"),
+        type = "character",
+        help = "The name of the column containing the classes" 
+   ),
+    optparse::make_option(
         c("-o", "--output"),
-        type="character",
-        help="The path where to output sqlite"
+        type = "character",
+        help = "The path where to output sqlite"
     ),
     optparse::make_option(
         c("-P", "--not-show-percent"),
-        action="store_true",
-        help="Flag not to show the percents",
-        default=FALSE
+        action = "store_true",
+        help = "Flag not to show the percents",
+        default = FALSE
     )
 )
 
-options(error=function(){traceback(3)})
+options(error = function(){traceback(3)})
 
-parser <- OptionParser(usage="%prog [options] file", option_list=option_list)
-args <- parse_args(parser, positional_arguments=0)
+parser <- OptionParser(
+    usage = "%prog [options] file",
+    option_list = option_list
+)
+args <- parse_args(parser, positional_arguments = 0)
 
 err_code <- 0
 
@@ -1006,8 +1131,8 @@
 
 models <- get_models(args$options$models)
 orm <- DBModelR::ORM(
-    connection_params=list(dbname=args$options$output),
-    dbms="SQLite"
+    connection_params = list(dbname=args$options$output),
+    dbms = "SQLite"
 )
 
 invisible(orm$models(models))
@@ -1023,7 +1148,7 @@
 message(sprintf("Base data inserted using %s.", args$options$database))
 
 if (!is.null(args$options$archetype)) {
-    insert_base_data(orm, args$options$archetype, archetype=TRUE)
+    insert_base_data(orm, args$options$archetype, archetype = TRUE)
 }
 if (!is.null(args$options$`compounds-csv`)) {
     insert_compounds(orm, args$options$`compounds-csv`)
@@ -1038,6 +1163,4 @@
 
 process_rdata(orm, rdata, args$options)
 
-quit(status=err_code)
-
-
+quit(status = err_code)
author	lain
date	Tue, 18 Oct 2022 12:57:28 +0000
parents	2c7e7fd1f740
children	ce94e7a141bb