Mercurial > repos > fgiacomoni > xseekerpreparator

diff galaxy/tools/LC-MSMS/XSeekerPreparator.R @ 1:c5d524c1db86 draft default tip
Deleted selected files - deprecated repo
author: fgiacomoni
date: Wed, 25 Nov 2020 13:19:41 +0000
parents: 1660665c081e
--- a/galaxy/tools/LC-MSMS/XSeekerPreparator.R	Thu Nov 19 14:28:46 2020 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,917 +0,0 @@
-
-
-TOOL_NAME <- "XSeekerPreparator"
-VERSION <- "1.1.2"
-
-OUTPUT_SPECIFIC_TOOL <- "XSeeker_Galaxy"
-
-ENRICHED_RDATA_VERSION <- paste("1.1.2", OUTPUT_SPECIFIC_TOOL, sep="-")
-ENRICHED_RDATA_DOC <- sprintf("
-Welcome to the enriched <Version %s> of the output of CAMERA/xcms.
-This doc was generated by the tool: %s - Version %s
-To show the different variables contained in this rdata, type:
- - `load('this_rdata.rdata', rdata_env <- new.env())`
- - `names(rdata_env)`
-
-Sections
-######
-
-
-This tools helpers
-------
-    The version number is somewhat special because the evolution of the
-    rdata's format is non-linear.
-    There may be different branches, each evolving separatly.
-    To reflect these branches's diversions, there may be a prepended
-    branch name following this format:
-        major.minor.patch-branch_name
-    Like this, we can process rdata with the same tool, and output
-    rdata formated differently, for each tool.
-
-
-  - enriched_rdata:
-    - Description: flag created by that tool to tell it was enriched.
-    - Retrieval method: enriched_rdata <- TRUE
-
-  - enriched_rdata_version:
-    - Description: A flag created by that tool to tell which version of
-        this tool has enriched the rdata.
-    - Retrieval method: enriched_rdata_version <- sprintf(\"%s\", ENRICHED_RDATA_VERSION)
-
-  - enriched_rdata_doc:
-    - Description: Contains the documentation string.
-
-Data from original mzxml file
-------
-  - tic:
-    - Description: Those are the tic values from the original mzxml
-        file, extracted using xcms 2.
-    - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@tic
-    - xcms version: 2.0
-
-  - mz:
-    - Description: Those are the m/z values from the original mzxml
-        file, extracted using xcms 2.
-    - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@env$mz
-    - xcms version: 2.0
-
-  - scanindex:
-    - Description: Those are the scanindex values from the original mzxml
-        file, extracted using xcms 2.
-    - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@scanindex
-    - xcms version: 2.0
-
-  - scantime:
-    - Description: Those are the scantime values from the original mzxml
-        file, extracted using xcms 2.
-    - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@scantime
-    - xcms version: 2.0
-
-  - intensity:
-    - Description: Those are the intensity values from the original mzxml
-        file, extracted using xcms 2.
-    - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@env$intensity
-    - xcms version: 2.0
-
-  - polarity:
-    - Description: Those are the polarity values from the original mzxml
-        file, extracted using xcms 2.
-    - Retrieval method: as.character(xcms::xcmsRaw('original_file.mzxml')@polarity[[1]])
-    - xcms version: 2.0
-
-Data taken from incoming rdata
-------
-  - variableMetadata:
-    - Description: Unmodified copy of variableMetadata from incoming rdata.
-    - Retrieval method: rdata_file$variableMetadata
-
-  - process_params:
-    - Description: Those are the processing parameters values from the
-        curent rdata. They have been simplified to allow easy access like:
-        for (params in process_params) {
-            if (params[[\"xfunction\"]] == \"annotatediff\") {
-                process_peak_picking_params(params)
-            }
-        }
-    - Retrieval method:
-        ## just he same list, but simplified
-        process_params <- list()
-        for (list_name in names(rdata_file$listOFlistArguments)) {
-            param_list <- list()
-            for (param_name in names(rdata_file$listOFlistArguments[[list_name]])) {
-                param_list[[param_name]] <- rdata_file$listOFlistArguments[[list_name]][[param_name]]
-            }
-            process_params[[length(process_params)+1]] <- param_list
-        }
-", ENRICHED_RDATA_VERSION, TOOL_NAME, VERSION, ENRICHED_RDATA_VERSION)
-
-
-
-get_models <- function(path) {
-    if (is.null(path)) {
-        stop("No models to define the database schema")
-    } else {
-        message(sprintf("Loading models from %s", path))
-    }
-    ## galaxy mangles the "@" to a "__at__"
-    if (substr(path, 1, 9) == "git__at__") {
-        path <- sub("^git__at__", "git@", path, perl=TRUE)
-    }
-    if (
-        substr(path, 1, 4) == "git@"
-        || substr(path, length(path)-4, 4) == ".git"
-    ) {
-        return (get_models_from_git(path))
-    }
-    if (substr(path, 1, 4) == "http") {
-        return (get_models_from_url(path))
-    }
-    return (source(path)$value)
-}
-
-get_models_from_git <- function (url, target_file="models.R", rm=TRUE) {
-    tmp <- tempdir()
-    message(sprintf("Cloning %s", url))
-    system2("git", c("clone", url, tmp))
-    result <- search_tree(file.path(tmp, dir), target_file)
-    if (!is.null(result)) {
-        models <- source(result)$value
-        if (rm) {
-            unlink(tmp, recursive=TRUE)
-        }
-        return (models)
-    }
-    if (rm) {
-        unlink(tmp, recursive=TRUE)
-    }
-    stop(sprintf(
-        "Could not find any file named \"%s\" in this repo",
-        target_file
-    ))
-}
-
-get_models_from_url <- function (url, target_file="models.R", rm=TRUE) {
-    tmp <- tempdir()
-    message(sprintf("Downloading %s", url))
-    result <- file.path(tmp, target_file)
-    if (download.file(url, destfile=result) == 0) {
-        models <- source(result)$value
-        if (rm) {
-            unlink(tmp, recursive=TRUE)
-        }
-        return (models)
-    }
-    if (rm) {
-        unlink(tmp, recursive=TRUE)
-    }
-    stop("Could not download any file at this adress.")
-}
-
-search_tree <- function(path, target) {
-    target <- tolower(target)
-    for (file in list.files(path)) {
-        if (is.dir(file)) {
-            result <- search_tree(file.path(path, file), target)
-            if (!is.null(result)) {
-                return (result)
-            }
-        } else if (tolower(file) == target) {
-            return (file.path(path, file))
-        }
-    }
-    return (NULL)
-}
-
-create_database <- function(orm) {
-    orm$recreate_database(no_exists=FALSE)
-    set_database_version(orm, "created")
-}
-
-insert_adducts <- function(orm) {
-    message("Creating adducts...")
-    adducts <- list(
-        list("[M-H2O-H]-",1,-1,-48.992020312000001069,1,0,0.5,"H0","H1O3"),
-        list("[M-H-Cl+O]-",1,-1,-19.981214542000000022,2,0,0.5,"O1","H1Cl1"),
-        list("[M-Cl+O]-",1,-1,-18.973389510000000512,3,0,0.5,"O1","Cl1"),
-        list("[M-3H]3-",1,-3,-3.0218293560000000219,4,0,1.0,"H0","H3"),
-        list("[2M-3H]3-",2,-3,-3.0218293560000000219,4,0,0.5,"H0","H3"),
-        list("[3M-3H]3-",3,-3,-3.0218293560000000219,4,0,0.5,"H0","H3"),
-        list("[M-2H]2-",1,-2,-2.0145529039999998666,5,0,1.0,"H0","H2"),
-        list("[2M-2H]2-",2,-2,-2.0145529039999998666,5,0,0.5,"H0","H2"),
-        list("[3M-2H]2-",3,-2,-2.0145529039999998666,5,0,0.5,"H0","H2"),
-        list("[M-H]-",1,-1,-1.0072764519999999333,6,1,1.0,"H0","H1"),
-        list("[2M-H]-",2,-1,-1.0072764519999999333,6,0,0.5,"H0","H1"),
-        list("[3M-H]-",3,-1,-1.0072764519999999333,6,0,0.5,"H0","H1"),
-        list("[M]+",1,1,-0.00054858000000000000945,7,1,1.0,"H0","H0"),
-        list("[M]-",1,-1,0.00054858000000000000945,8,1,1.0,"H0","H0"),
-        list("[M+H]+",1,1,1.0072764519999999333,9,1,1.0,"H1","H0"),
-        list("[2M+H]+",2,1,1.0072764519999999333,9,0,0.5,"H1","H0"),
-        list("[3M+H]+",3,1,1.0072764519999999333,9,0,0.25,"H1","H0"),
-        list("[M+2H]2+",1,2,2.0145529039999998666,10,0,0.75,"H2","H0"),
-        list("[2M+2H]2+",2,2,2.0145529039999998666,10,0,0.5,"H2","H0"),
-        list("[3M+2H]2+",3,2,2.0145529039999998666,10,0,0.25,"H2","H0"),
-        list("[M+3H]3+",1,3,3.0218293560000000219,11,0,0.75,"H3","H0"),
-        list("[2M+3H]3+",2,3,3.0218293560000000219,11,0,0.5,"H3","H0"),
-        list("[3M+3H]3+",3,3,3.0218293560000000219,11,0,0.25,"H3","H0"),
-        list("[M-2H+NH4]-",1,-1,16.019272654000001665,12,0,0.25,"N1H4","H2"),
-        list("[2M-2H+NH4]-",2,-1,16.019272654000001665,12,0,0.0,"N1H4","H2"),
-        list("[3M-2H+NH4]-",3,-1,16.019272654000001665,12,0,0.25,"N1H4","H2"),
-        list("[M+NH4]+",1,1,18.033825558000000199,13,1,1.0,"N1H4","H0"),
-        list("[2M+NH4]+",2,1,18.033825558000000199,13,0,0.5,"N1H4","H0"),
-        list("[3M+NH4]+",3,1,18.033825558000000199,13,0,0.25,"N1H4","H0"),
-        list("[M+H+NH4]2+",1,2,19.041102009999999467,14,0,0.5,"N1H5","H0"),
-        list("[2M+H+NH4]2+",2,2,19.041102009999999467,14,0,0.5,"N1H5","H0"),
-        list("[3M+H+NH4]2+",3,2,19.041102009999999467,14,0,0.25,"N1H5","H0"),
-        list("[M+Na-2H]-",1,-1,20.974668176000001551,15,0,0.75,"Na1","H2"),
-        list("[2M-2H+Na]-",2,-1,20.974668176000001551,15,0,0.25,"Na1","H2"),
-        list("[3M-2H+Na]-",3,-1,20.974668176000001551,15,0,0.25,"Na1","H2"),
-        list("[M+Na]+",1,1,22.989221080000000086,16,1,1.0,"Na1","H0"),
-        list("[2M+Na]+",2,1,22.989221080000000086,16,0,0.5,"Na1","H0"),
-        list("[3M+Na]+",3,1,22.989221080000000086,16,0,0.25,"Na1","H0"),
-        list("[M+H+Na]2+",1,2,23.996497531999999353,17,0,0.5,"Na1H1","H0"),
-        list("[2M+H+Na]2+",2,2,23.996497531999999353,17,0,0.5,"Na1H1","H0"),
-        list("[3M+H+Na]2+",3,2,23.996497531999999353,17,0,0.25,"Na1H1","H0"),
-        list("[M+2H+Na]3+",1,3,25.003773983999998619,18,0,0.25,"H2Na1","H0"),
-        list("[M+CH3OH+H]+",1,1,33.033491200000000276,19,0,0.25,"C1O1H5","H0"),
-        list("[M-H+Cl]2-",1,-2,33.962124838000001148,20,0,1.0,"Cl1","H1"),
-        list("[2M-H+Cl]2-",2,-2,33.962124838000001148,20,0,0.5,"Cl1","H1"),
-        list("[3M-H+Cl]2-",3,-2,33.962124838000001148,20,0,0.5,"Cl1","H1"),
-        list("[M+Cl]-",1,-1,34.969401290000000416,21,1,1.0,"Cl1","H0"),
-        list("[2M+Cl]-",2,-1,34.969401290000000416,21,0,0.5,"Cl1","H0"),
-        list("[3M+Cl]-",3,-1,34.969401290000000416,21,0,0.5,"Cl1","H0"),
-        list("[M+K-2H]-",1,-1,36.948605415999999479,22,0,0.5,"K1","H2"),
-        list("[2M-2H+K]-",2,-1,36.948605415999999479,22,0,0.0,"K1","H2"),
-        list("[3M-2H+K]-",3,-1,36.948605415999999479,22,0,0.0,"K1","H2"),
-        list("[M+K]+",1,1,38.963158319999998013,23,1,1.0,"K1","H0"),
-        list("[2M+K]+",2,1,38.963158319999998013,23,0,0.5,"K1","H0"),
-        list("[3M+K]+",3,1,38.963158319999998013,23,0,0.25,"K1","H0"),
-        list("[M+H+K]2+",1,2,39.970434771999997281,24,0,0.5,"K1H1","H0"),
-        list("[2M+H+K]2+",2,2,39.970434771999997281,24,0,0.5,"K1H1","H0"),
-        list("[3M+H+K]2+",3,2,39.970434771999997281,24,0,0.25,"K1H1","H0"),
-        list("[M+ACN+H]+",1,1,42.033825557999996646,25,0,0.25,"C2H4N1","H0"),
-        list("[2M+ACN+H]+",2,1,42.033825557999996646,25,0,0.25,"C2H4N1","H0"),
-        list("[M+2Na-H]+",1,1,44.971165708000000902,26,0,0.5,"Na2","H1"),
-        list("[2M+2Na-H]+",2,1,44.971165708000000902,26,0,0.25,"Na2","H1"),
-        list("[3M+2Na-H]+",3,1,44.971165708000000902,26,0,0.25,"Na2","H1"),
-        list("[2M+FA-H]-",2,-1,44.998202851999998586,27,0,0.25,"C1O2H2","H1"),
-        list("[M+FA-H]-",1,-1,44.998202851999998586,27,0,0.5,"C1O2H2","H1"),
-        list("[M+2Na]2+",1,2,45.978442160000000172,28,0,0.5,"Na2","H0"),
-        list("[2M+2Na]2+",2,2,45.978442160000000172,28,0,0.5,"Na2","H0"),
-        list("[3M+2Na]2+",3,2,45.978442160000000172,28,0,0.25,"Na2","H0"),
-        list("[M+H+2Na]3+",1,3,46.985718611999999438,29,0,0.25,"H1Na2","H0"),
-        list("[M+H+FA]+",1,1,47.012755755999997122,30,0,0.25,"C1O2H3","H0"),
-        list("[M+Hac-H]-",1,-1,59.013852915999997607,31,0,0.25,"C2O2H4","H1"),
-        list("[2M+Hac-H]-",2,-1,59.013852915999997607,31,0,0.25,"C2O2H4","H1"),
-        list("[M+IsoProp+H]+",1,1,61.064791327999998317,32,0,0.25,"C3H9O1","H0"),
-        list("[M+Na+K]2+",1,2,61.9523793999999981,33,0,0.5,"Na1K1","H0"),
-        list("[2M+Na+K]2+",2,2,61.9523793999999981,33,0,0.5,"Na1K1","H0"),
-        list("[3M+Na+K]2+",3,2,61.9523793999999981,33,0,0.25,"Na1K1","H0"),
-        list("[M+NO3]-",1,-1,61.988366450000000895,34,0,0.5,"N1O3","H0"),
-        list("[M+ACN+Na]+",1,1,64.015770185999997464,35,0,0.25,"C2H3N1Na1","H0"),
-        list("[2M+ACN+Na]+",2,1,64.015770185999997464,35,0,0.25,"C2H3N1Na1","H0"),
-        list("[M+NH4+FA]+",1,1,64.039304861999994502,36,0,0.25,"N1C1O2H6","H0"),
-        list("[M-2H+Na+FA]-",1,-1,66.980147479999999405,37,0,0.5,"NaC1O2H2","H2"),
-        list("[M+3Na]3+",1,3,68.967663239999993153,38,0,0.25,"Na3","H0"),
-        list("[M+Na+FA]+",1,1,68.99470038399999794,39,0,0.25,"Na1C1O2H2","H0"),
-        list("[M+2Cl]2-",1,-2,69.938802580000000832,40,0,1.0,"Cl2","H0"),
-        list("[2M+2Cl]2-",2,-2,69.938802580000000832,40,0,0.5,"Cl2","H0"),
-        list("[3M+2Cl]2-",3,-2,69.938802580000000832,40,0,0.5,"Cl2","H0"),
-        list("[M+2K-H]+",1,1,76.919040187999996758,41,0,0.5,"K2","H1"),
-        list("[2M+2K-H]+",2,1,76.919040187999996758,41,0,0.25,"K2","H1"),
-        list("[3M+2K-H]+",3,1,76.919040187999996758,41,0,0.25,"K2","H1"),
-        list("[M+2K]2+",1,2,77.926316639999996028,42,0,0.5,"K2","H0"),
-        list("[2M+2K]2+",2,2,77.926316639999996028,42,0,0.5,"K2","H0"),
-        list("[3M+2K]2+",3,2,77.926316639999996028,42,0,0.25,"K2","H0"),
-        list("[M+Br]-",1,-1,78.918886479999997619,43,1,1.0,"Br1","H0"),
-        list("[M+Cl+FA]-",1,-1,80.974880593999998268,44,0,0.5,"Cl1C1O2H2","H0"),
-        list("[M+AcNa-H]-",1,-1,80.995797543999998426,45,0,0.25,"C2H3Na1O2","H1"),
-        list("[M+2ACN+2H]2+",1,2,84.067651115999993292,46,0,0.25,"C4H8N2","H0"),
-        list("[M+K+FA]+",1,1,84.968637623999995868,47,0,0.25,"K1C1O2H2","H0"),
-        list("[M+Cl+Na+FA-H]-",1,-1,102.95682522200000619,48,0,0.5,"Cl1Na1C1O2H2","H1"),
-        list("[2M+3H2O+2H]+",2,1,104.03153939599999944,49,0,0.25,"H8O6","H0"),
-        list("[M+TFA-H]-",1,-1,112.98558742000000165,50,0,0.5,"C2F3O2H1","H1"),
-        list("[M+H+TFA]+",1,1,115.00014032400000019,51,0,0.25,"C2F3O2H2","H0"),
-        list("[M+3ACN+2H]2+",1,2,125.09420022199999778,52,0,0.25,"C6H11N3","H0"),
-        list("[M+NH4+TFA]+",1,1,132.02668943000000468,53,0,0.25,"N1C2F3O2H5","H0"),
-        list("[M+Na+TFA]+",1,1,136.98208495200000811,54,0,0.25,"Na1C2F3O2H1","H0"),
-        list("[M+Cl+TFA]-",1,-1,148.96226516199999423,55,0,0.5,"Cl1C2F3O2H1","H0"),
-        list("[M+K+TFA]+",1,1,152.95602219200000604,56,0,0.25,"K1C2F3O2H1","H0")
-    )
-    dummy_adduct <- orm$adduct()
-    for (adduct in adducts) {
-        i <- 0
-        dummy_adduct$set_name(adduct[[i <- i+1]])
-        dummy_adduct$set_multi(adduct[[i <- i+1]])
-        dummy_adduct$set_charge(adduct[[i <- i+1]])
-        dummy_adduct$set_mass(adduct[[i <- i+1]])
-        dummy_adduct$set_oidscore(adduct[[i <- i+1]])
-        dummy_adduct$set_quasi(adduct[[i <- i+1]])
-        dummy_adduct$set_ips(adduct[[i <- i+1]])
-        dummy_adduct$set_formula_add(adduct[[i <- i+1]])
-        dummy_adduct$set_formula_ded(adduct[[i <- i+1]])
-        dummy_adduct$save()
-        dummy_adduct$clear(unset_id=TRUE)
-    }
-    message("Adducts created")
-}
-
-insert_base_data <- function(orm, path, archetype=FALSE) {
-    if (archetype) {
-        ## not implemented yet
-        return ()
-    }
-    base_data <- readLines(path)
-    for (sql in strsplit(paste(base_data, collapse=" "), ";")[[1]]) {
-        orm$execute(sql)
-    }
-    set_database_version(orm, "enriched")
-}
-
-insert_compounds <- function(orm, compounds_path) {
-    compounds <- read.csv(file=compounds_path, sep="\t")
-    if (is.null(compounds <- translate_compounds(compounds))) {
-        stop("Could not find asked compound's attributes in csv file.")
-    }
-    dummy_compound <- orm$compound()
-    compound_list <- list()
-    for (i in seq_len(nrow(compounds))) {
-        dummy_compound$set_mz(compounds[i, "mz"])
-        dummy_compound$set_name(compounds[i, "name"])
-        dummy_compound$set_common_name(compounds[i, "common_name"])
-        dummy_compound$set_formula(compounds[i, "formula"])
-        # dummy_compound$set_mz(compounds[i, "mz"])
-        # dummy_compound$set_mz(compounds[i, "mz"])
-        compound_list[[length(compound_list)+1]] <- as.list(
-            dummy_compound,
-            c("mz", "name", "common_name", "formula")
-        )
-        dummy_compound$clear(unset_id=TRUE)
-    }
-    dummy_compound$save(bulk=compound_list)
-}
-
-translate_compounds <- function(compounds) {
-    recognized_headers <- list(
-        c("HMDB_ID", "MzBank", "X.M.H..", "X.M.H...1", "MetName", "ChemFormula", "INChIkey")
-    )
-    header_translators <- list(
-        hmdb_header_translator
-    )
-    for (index in seq_along(recognized_headers)) {
-        headers <- recognized_headers[[index]]
-        if (identical(colnames(compounds), headers)) {
-            return (header_translators[[index]](compounds))
-        }
-    }
-    if (is.null(translator <- guess_translator(colnames(compounds)))) {
-        return (NULL)
-    }
-    return (csv_header_translator(translator, compounds))
-}
-
-guess_translator <- function(header) {
-    result <- list(
-        # HMDB_ID=NULL,<
-        mz=NULL,
-        name=NULL,
-        common_name=NULL,
-        formula=NULL,
-        # inchi_key=NULL
-    )
-    asked_cols <- names(result)
-    for (asked_col in asked_cols) {
-        for (col in header) {
-            if ((twisted <- tolower(col)) == asked_col
-                || gsub("-", "_", twisted) == asked_col
-                || gsub(" ", "_", twisted) == asked_col
-                || tolower(gsub("(.)([A-Z])", "\\1_\\2", col)) == asked_col
-            ) {
-                result[[asked_col]] <- col
-                next
-            }
-        }
-    }
-    if (any(mapply(is.null, result))) {
-        return (NULL)
-    }
-    return (result)
-}
-
-hmdb_header_translator <- function(compounds) {
-    return (csv_header_translator(
-        list(
-            HMDB_ID="HMDB_ID",
-            mz="MzBank",
-            name="MetName",
-            common_name="MetName",
-            formula="ChemFormula",
-            inchi_key="INChIkey"
-        ), compounds
-    ))
-}
-
-csv_header_translator <- function(translation_table, csv) {
-    header_names <- names(translation_table)
-    result <- data.frame(1:nrow(csv))
-    # colnames(result) <- header_names
-    for (i in seq_along(header_names)) {
-        result[, header_names[[i]]] <- csv[, translation_table[[i]]]
-    }
-    print(result[, "mz"])
-    result[, "mz"] <- as.numeric(result[, "mz"])
-    print(result[, "mz"])
-    return (result)
-}
-
-set_database_version <- function(orm, version) {
-    orm$set_tag(
-        version,
-        tag_name="database_version",
-        tag_table_name="XSeeker_tagging_table"
-    )
-}
-
-process_rdata <- function(orm, rdata, options) {
-    mzml_tmp_dir <- gather_mzml_files(rdata)
-    samples <- names(rdata$singlefile)
-    if (!is.null(options$samples)) {
-        samples <- samples[options$samples %in% samples]
-    }
-    show_percent <- (
-        is.null(options$`not-show-percent`)
-        || options$`not-show-percent` == FALSE
-    )
-    error <- tryCatch({
-        process_sample_list(
-            orm, rdata, samples,
-            show_percent=show_percent
-        )
-        NULL
-    }, error=function(e) {
-        message(e)
-        e
-    })
-    if (!is.null(mzml_tmp_dir)) {
-        unlink(mzml_tmp_dir, recursive=TRUE)
-    }
-    if (!is.null(error)) {
-        stop(error)
-    }
-}
-
-gather_mzml_files <- function(rdata) {
-    if (is.null(rdata$singlefile)) {
-        message("Extracting mxml files")
-        tmp <- tempdir()
-        rdata$singlefile <- utils::unzip(rdata$zipfile, exdir=tmp)
-        names(rdata$singlefile) <- tools::file_path_sans_ext(basename(rdata$singlefile))
-        message("Extracted")
-        return (tmp)
-    }
-    return (NULL)
-}
-
-process_sample_list <- function(orm, radta, sample_names, show_percent) {
-    file_grouping_var <- find_grouping_var(rdata$variableMetadata)
-    message("Processing samples.")
-    message(sprintf("File grouping variable: %s", file_grouping_var))
-    if(is.null(file_grouping_var)) {
-        stop("Malformed variableMetada.")
-    }
-
-    process_arg_list <- rdata$listOFlistArguments
-    process_params <- list()
-    for (list_name in names(process_arg_list)) {
-        param_list <- list()
-        for (param_name in names(process_arg_list[[list_name]])) {
-            param_list[[param_name]] <- process_arg_list[[list_name]][[param_name]]
-        }
-        process_params[[length(process_params)+1]] <- param_list
-    }
-    message("Parameters from previous processes extracted.")
-
-    var_meta <- rdata$variableMetadata
-    align_group <- rep(0, nrow(var_meta))
-    var_meta <- cbind(var_meta, align_group)
-    context <- new.env()
-    context$clusters <- list()
-    context$groupidx <- rdata$xa@xcmsSet@groupidx
-    context$peaks <- rdata$xa@xcmsSet@peaks
-    context$show_percent <- show_percent
-
-    indices <- as.numeric(unique(var_meta[, file_grouping_var]))
-    smol_xcms_set <- orm$smol_xcms_set()
-    mz_tab_info <- new.env()
-    xcms_set <- rdata$xa@xcmsSet
-    g <- xcms::groups(xcms_set)
-    mz_tab_info$group_length <- nrow(g)
-    mz_tab_info$dataset_path <- xcms::filepaths(xcms_set)
-    mz_tab_info$sampnames <- xcms::sampnames(xcms_set)
-    mz_tab_info$sampclass <- xcms::sampclass(xcms_set)
-    mz_tab_info$rtmed <- g[,"rtmed"]
-    mz_tab_info$mzmed <- g[,"mzmed"]
-    mz_tab_info$smallmolecule_abundance_assay <- xcms::groupval(xcms_set, value="into")
-    blogified <- blob::blob(fst::compress_fst(serialize(mz_tab_info, NULL), compression=100))
-    smol_xcms_set$set_raw(blogified)$save()
-    for (no in indices) {
-        sample_name <- names(rdata$singlefile)[[no]]
-        sample_path <- rdata$singlefile[[no]]
-        if (
-            is.na(no)
-            || is.null(sample_path)
-            || !(sample_name %in% sample_names)
-        ) {
-            next
-        }
-        ms_file=xcms::xcmsRaw(sample_path)
-        env <- new.env()
-        env$variableMetadata <- var_meta[var_meta[, file_grouping_var]==no,]
-        env$tic <- ms_file@tic
-        env$mz <- ms_file@env$mz
-        env$scanindex <- ms_file@scanindex
-        env$scantime <- ms_file@scantime
-        env$intensity <- ms_file@env$intensity
-        env$polarity <- as.character(ms_file@polarity[[1]])
-        env$sample_name <- sample_name
-        env$dataset_path <- sample_path
-        env$process_params <- process_params
-        env$enriched_rdata <- TRUE
-        env$enriched_rdata_version <- ENRICHED_RDATA_VERSION
-        env$tool_name <- TOOL_NAME
-        env$enriched_rdata_doc <- ENRICHED_RDATA_DOC
-        context$sample_no <- no
-        add_sample_to_database(orm, env, context, smol_xcms_set)
-    }
-    message("Features enrichment")
-    complete_features(orm, context)
-    message("Features enrichment done.")
-    return (NULL)
-}
-
-find_grouping_var <- function(var_meta) {
-    for (grouping_var in c(".", "Bio")) {
-        if (!is.null(rdata$variableMetadata[[grouping_var]])) {
-            return (grouping_var)
-        }
-    }
-    return (NULL)
-}
-
-add_sample_to_database <- function(orm, env, context, smol_xcms_set) {
-    message(sprintf("Processing sample %s", env$sample_name))
-    sample <- (
-        orm$sample()
-        $set_name(env$sample_name)
-        $set_path(env$dataset_path)
-        $set_kind("enriched_rdata")
-        $set_polarity(
-            if (is.null(env$polarity) || identical(env$polarity, character(0))) ""
-            else env$polarity
-        )
-        $set_smol_xcms_set(smol_xcms_set)
-        $set_raw(blob::blob(fst::compress_fst(
-            serialize(env, NULL),
-            compression=100
-        )))
-        $save()
-    )
-    load_variable_metadata(orm, sample, env$variableMetadata, context)
-    load_process_params(orm, sample, env$process_params)
-    message(sprintf("Sample %s inserted.", env$sample_name))
-    return (sample)
-}
-
-
-load_variable_metadata <- function(orm, sample, var_meta, context) {
-    all_clusters <- orm$cluster()$all()
-
-    next_feature_id <- get_next_id(orm$feature()$all(), "featureID")
-    next_cluster_id <- get_next_id(all_clusters, "clusterID")
-    next_pc_group <- get_next_id(all_clusters, "pc_group")
-    next_align_group <- get_next_id(all_clusters, "align_group")
-    message("Extracting features")
-    invisible(create_features(
-        orm, sample, var_meta, context,
-        next_feature_id, next_cluster_id,
-        next_pc_group, next_align_group
-    ))
-    message("Extracting features done.")
-    return (NULL)
-}
-
-get_next_id <- function(models, attribute) {
-    if ((id <- models$max(attribute)) == Inf || id == -Inf) {
-        return (1)
-    }
-    return (id + 1)
-}
-
-create_features <- function(
-    orm, sample, var_meta, context,
-    next_feature_id, next_cluster_id,
-    next_pc_group, next_align_group
-) {
-    field_names <- as.list(names(orm$feature()$fields__))
-    field_names[field_names=="id"] <- NULL
-
-    features <- list()
-    dummy_feature <- orm$feature()
-
-    if (show_percent <- context$show_percent) {
-        percent <- -1
-        total <- nrow(var_meta)
-    }
-    for (row in seq_len(nrow(var_meta))) {
-        if (show_percent && (row / total) * 100 > percent) {
-            percent <- percent + 1
-            message("\r", sprintf("\r%d %%", percent), appendLF=FALSE)
-        }
-
-        curent_var_meta <- var_meta[row, ]
-
-        peak_list <- context$peaks[context$groupidx[[row]], ]
-        sample_peak_list <- peak_list[peak_list[, "sample"] == context$sample_no, , drop=FALSE]
-        if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) {
-            if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) {
-                dummy_feature$set_int_o(int_o)
-            }
-            if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) {
-                dummy_feature$set_int_b(int_b)
-            }
-            if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) {
-                dummy_feature$set_max_o(max_o)
-            }
-        }
-
-        set_feature_fields_from_var_meta(dummy_feature, curent_var_meta)
-
-        dummy_feature$set_featureID(next_feature_id)
-        next_feature_id <- next_feature_id + 1
-        fake_iso <- dummy_feature$get_iso()
-        iso <- extract_iso(fake_iso)
-        clusterID <- extract_clusterID(fake_iso, next_cluster_id)
-        context$clusterID <- clusterID
-        dummy_feature$set_iso(iso)
-        create_associated_cluster(
-            sample, dummy_feature, clusterID,
-            context, curent_var_meta, next_pc_group,
-            next_align_group
-        )
-        next_align_group <- next_align_group + 1
-        features[[length(features)+1]] <- as.list(dummy_feature, field_names)
-        dummy_feature$clear()
-    }
-    message("")## +\n for previous message 
-    message("Saving features")
-    dummy_feature$save(bulk=features)
-    message("Saved.")
-    return (context$clusters)
-}
-
-extract_peak_var <- function(peak_list, var_name, selector=max) {
-    value <- peak_list[, var_name]
-    names(value) <- NULL
-    return (selector(value))
-}
-
-set_feature_fields_from_var_meta <- function(feature, var_meta) {
-    if (!is.null(mz <- var_meta[["mz"]]) && !is.na(mz)) {
-        feature$set_mz(mz)
-    }
-    if (!is.null(mzmin <- var_meta[["mzmin"]]) && !is.na(mzmin)) {
-        feature$set_mz_min(mzmin)
-    }
-    if (!is.null(mzmax <- var_meta[["mzmax"]]) && !is.na(mzmax)) {
-        feature$set_mz_max(mzmax)
-    }
-    if (!is.null(rt <- var_meta[["rt"]]) && !is.na(rt)) {
-        feature$set_rt(rt)
-    }
-    if (!is.null(rtmin <- var_meta[["rtmin"]]) && !is.na(rtmin)) {
-        feature$set_rt_min(rtmin)
-    }
-    if (!is.null(rtmax <- var_meta[["rtmax"]]) && !is.na(rtmax)) {
-        feature$set_rt_max(rtmax)
-    }
-    if (!is.null(isotopes <- var_meta[["isotopes"]]) && !is.na(isotopes)) {
-        feature$set_iso(isotopes)
-    }
-    return (feature)
-}
-
-extract_iso  <- function(weird_data) {
-    if (grepl("^\\[\\d+\\]", weird_data)[[1]]) {
-        return (sub("^\\[\\d+\\]", "", weird_data, perl=TRUE))
-    }
-    return (weird_data)
-}
-
-extract_clusterID <- function(weird_data, next_cluster_id){
-    if (grepl("^\\[\\d+\\]", weird_data)[[1]]) {
-        clusterID <- stringr::str_extract(weird_data, "^\\[\\d+\\]")
-        clusterID <- as.numeric(stringr::str_extract(clusterID, "\\d+"))
-    } else {
-        clusterID <- 0
-    }
-    return (clusterID + next_cluster_id)
-}
-
-create_associated_cluster <- function(
-    sample, feature, grouping_variable,
-    context, curent_var_meta, next_pc_group, next_align_group
-) {
-    pcgroup <- as.numeric(curent_var_meta[["pcgroup"]])
-    adduct <- as.character(curent_var_meta[["adduct"]])
-    annotation <- curent_var_meta[["isotopes"]]
-    grouping_variable <- as.character(grouping_variable)
-    if (is.null(cluster <- context$clusters[[grouping_variable]])) {
-        cluster <- context$clusters[[grouping_variable]] <- orm$cluster(
-            pc_group=pcgroup + next_pc_group,
-            adduct=adduct,
-            align_group=next_align_group,
-            # curent_group=curent_group,
-            clusterID=context$clusterID,
-            annotation=annotation
-        )$set_sample(sample)
-    } else {
-        if (context$clusterID != 0 && cluster$get_clusterID() == 0) {
-            cluster$set_clusterID(context$clusterID)
-        }
-    }
-    cluster$save()
-    feature$set_cluster(cluster)
-    return (feature)
-}
-
-complete_features <- function(orm, context) {
-    for (cluster in context$clusters) {
-        features <- orm$feature()$load_by(cluster_id=cluster$get_id())
-        if (features$any()) {
-            if (!is.null(rt <- features$mean("rt"))) {
-                cluster$set_mean_rt(rt)$save()
-            }
-            features_df <- as.data.frame(features)
-            central_feature <- features_df[grepl("^\\[M\\]", features_df[, "iso"]), ]
-            central_feature_into <- central_feature[["int_o"]]
-            if (!identical(central_feature_into, numeric(0)) && central_feature_into != 0) {
-                for (feature in as.vector(features)) {
-                    feature$set_abundance(
-                        feature$get_int_o() / central_feature_into * 100
-                    )$save()
-                }
-            }
-        }
-    }
-    return (NULL)
-}
-
-load_process_params <- function(orm, sample, params) {
-    for (param_list in params) {
-        if (is.null(param_list[["xfunction"]])) {
-            next
-        }
-        if (param_list[["xfunction"]] == "annotatediff") {
-            load_process_params_peak_picking(orm, sample, param_list)
-        }
-    }
-    return (sample)
-}
-
-load_process_params_peak_picking <- function(orm, sample, peak_picking_params) {
-    return (add_sample_process_parameters(
-        params=peak_picking_params,
-        params_translation=list(
-            ppm="ppm",
-            maxcharge="maxCharge",
-            maxiso="maxIso"
-        ),
-        param_model_generator=orm$peak_picking_parameters,
-        sample_param_setter=sample$set_peak_picking_parameters
-    ))
-}
-
-add_sample_process_parameters <- function(
-    params,
-    params_translation,
-    param_model_generator,
-    sample_param_setter
-) {
-    model_params <- list()
-    for (rdata_param_name in names(params_translation)) {
-        database_param_name <- params_translation[[rdata_param_name]]
-        if (is.null(rdata_param <- params[[rdata_param_name]])) {
-            next
-        }
-        model_params[[database_param_name]] <- rdata_param
-    }
-    params_models <- do.call(param_model_generator()$load_by, model_params)
-    if (params_models$any()) {
-        params_model <- params_models$first()
-    } else {
-        params_model <- do.call(param_model_generator, model_params)
-        params_model$save()
-    }
-    return (sample_param_setter(params_model)$save())
-}
-
-
-library(optparse)
-
-option_list <- list(
-    optparse::make_option(
-        c("-v", "--version"),
-        action="store_true",
-        help="Display this tool's version and exits"
-    ),
-    optparse::make_option(
-        c("-i", "--input"),
-        type="character",
-        help="The rdata path to import in XSeeker"
-    ),
-    optparse::make_option(
-        c("-s", "--samples"),
-        type="character",
-        help="Samples to visualise in XSeeker"
-    ),
-    optparse::make_option(
-        c("-B", "--archetype"),
-        type="character",
-        help="The name of the base database"
-    ),
-    optparse::make_option(
-        c("-b", "--database"),
-        type="character",
-        help="The base database's path"
-    ),
-    optparse::make_option(
-        c("-c", "--compounds-csv"),
-        type="character",
-        help="The csv containing compounds"
-    ),
-    optparse::make_option(
-        c("-m", "--models"),
-        type="character",
-        help="The path or url (must begin with http[s]:// or git@) to the database's models"
-    ),
-    optparse::make_option(
-        c("-o", "--output"),
-        type="character",
-        help="The path where to output sqlite"
-    ),
-    optparse::make_option(
-        c("-P", "--not-show-percent"),
-        action="store_true",
-        help="Flag not to show the percents",
-        default=FALSE
-    )
-)
-
-options(error=function(){traceback(3)})
-
-parser <- OptionParser(usage="%prog [options] file", option_list=option_list)
-args <- parse_args(parser, positional_arguments=0)
-
-err_code <- 0
-
-if (!is.null(args$options$version)) {
-    message(sprintf("%s %s", TOOL_NAME, VERSION))
-    quit()
-}
-
-models <- get_models(args$options$models)
-orm <- DBModelR::ORM(
-    connection_params=list(dbname=args$options$output),
-    dbms="SQLite"
-)
-
-invisible(orm$models(models))
-invisible(create_database(orm))
-
-message("Database model created")
-
-insert_adducts(orm)
-
-if (!is.null(args$options$database)) {
-    insert_base_data(orm, args$options$database)
-}
-message(sprintf("Base data inserted using %s.", args$options$database))
-
-if (!is.null(args$options$archetype)) {
-    insert_base_data(orm, args$options$archetype, archetype=TRUE)
-}
-if (!is.null(args$options$`compounds-csv`)) {
-    insert_compounds(orm, args$options$`compounds-csv`)
-}
-
-# if (!is.null(args$options$rdata)) {
-#     load_rdata_in_base(args$options$rdata, args$options$samples, args$options$`not-show-percent`)
-# }
-
-
-load(args$options$input, rdata <- new.env())
-
-process_rdata(orm, rdata, args$options)
-
-quit(status=err_code)
-
-
author	fgiacomoni
date	Wed, 25 Nov 2020 13:19:41 +0000
parents	1660665c081e
children