Mercurial > repos > lain > xseekerpreparator
changeset 20:ce94e7a141bb draft default tip
" master branch Updating"
author | lain |
---|---|
date | Tue, 06 Dec 2022 10:18:10 +0000 |
parents | 2937e72e5891 |
children | |
files | README.md XSeekerPreparator.R XSeekerPreparator.xml format_versionning.MD galaxy/tools/LC-MSMS/XSeekerPreparator.R |
diffstat | 5 files changed, 267 insertions(+), 166 deletions(-) [+] |
line wrap: on
line diff
--- a/README.md Tue Oct 18 12:57:28 2022 +0000 +++ b/README.md Tue Dec 06 10:18:10 2022 +0000 @@ -96,7 +96,7 @@ -------- - **@name**: XSeekerPreparator - - **@version**: 1.3.0 + - **@version**: 1.3.1 - **@authors**: Lain Pavot - **@date creation**: 15/09/2020
--- a/XSeekerPreparator.R Tue Oct 18 12:57:28 2022 +0000 +++ b/XSeekerPreparator.R Tue Dec 06 10:18:10 2022 +0000 @@ -198,7 +198,7 @@ search_tree <- function(path, target) { target <- tolower(target) for (file in list.files(path)) { - if (is.dir(file)) { + if (fs::is.dir(file)) { result <- search_tree(file.path(path, file), target) if (!is.null(result)) { return(result) @@ -400,12 +400,10 @@ guess_translator <- function(header) { result <- list( - # HMDB_ID = NULL, mz = NULL, name = NULL, common_name = NULL, - formula = NULL, - # inchi_key = NULL + formula = NULL ) asked_cols <- names(result) for (asked_col in asked_cols) { @@ -471,12 +469,12 @@ process_sample_list( orm, rdata, samples, show_percent = show_percent, - file_grouping_var = options$class + file_grouping_var = options$class, + options = options ) NULL }, error = function(e) { - message(e) - e + return(e) }) if (!is.null(mzml_tmp_dir)) { unlink(mzml_tmp_dir, recursive = TRUE) @@ -484,6 +482,7 @@ if (!is.null(error)) { stop(error) } + return(!is.null(error)) } gather_mzml_files <- function(rdata) { @@ -510,7 +509,8 @@ rdata, sample_names, show_percent, - file_grouping_var = NULL + file_grouping_var = NULL, + options = list() ) { if (is.null(file_grouping_var)) { file_grouping_var <- find_grouping_var(rdata$variableMetadata) @@ -587,19 +587,6 @@ message("Parameters from previous processes extracted.") - - indices <- as.numeric(unique(var_meta[, file_grouping_var])) - if (any(is.null(names(singlefile)[indices]))) { - stop(sprintf( - paste( - "Indices defined by grouping variable %s are not all present", - "in singlefile names (%s).\nCannot continue. Indices: %s" - ), - file_grouping_var, - paste(names(singlefile), collapse = ", "), - paste(indices, collapse = ", ") - )) - } smol_xcms_set <- orm$smol_xcms_set() mz_tab_info <- new.env() g <- xcms::groups(xcms_set) @@ -623,7 +610,7 @@ smol_xcms_set_id <- smol_xcms_set$get_id() rm(smol_xcms_set) - for (no in indices) { + for (no in seq_along(names(singlefile))) { sample_name <- names(singlefile)[[no]] sample_path <- singlefile[[no]] if ( @@ -760,7 +747,6 @@ field_names <- as.list(names(orm$feature()$fields__)) field_names[field_names == "id"] <- NULL - features <- list() dummy_feature <- orm$feature() if (show_percent <- context$show_percent) { @@ -772,7 +758,10 @@ rows <- rows[1:as.integer(FAST_FEATURE_RATIO / 100.0 * length(rows))] } - cluster_row <- list() + # features <- list() + features <- as.list(rows) ## allocate all memory before processing + # cluster_row <- list() + cluster_row <- as.list(rows) ## allocate all memory before processing for (row in rows) { if (show_percent && (row / total) * 100 > percent) { percent <- percent + 1 @@ -843,7 +832,8 @@ next_align_group ) next_align_group <- next_align_group + 1 - features[[length(features) + 1]] <- as.list(dummy_feature, field_names) + features[[row]] <- as.list(dummy_feature, field_names) + # features[[length(features) + 1]] <- as.list(dummy_feature, field_names) dummy_feature$clear() } rm(var_meta) @@ -963,6 +953,7 @@ } cluster$save() feature$set_cluster(cluster) + feature$save() return(cluster) } @@ -1064,6 +1055,12 @@ help = "Display this tool's version and exits" ), optparse::make_option( + c("-V", "--verbose"), + action = "store_true", + help = "Does more verbose outputs", + default = FALSE + ), + optparse::make_option( c("-i", "--input"), type = "character", help = "The rdata path to import in XSeeker" @@ -1161,6 +1158,18 @@ load(args$options$input, rdata <- new.env()) -process_rdata(orm, rdata, args$options) +args$options$verbose <- ( + if (args$options$verbose) { + message("Verbose outputs.") + \(...) { + message(sprintf(...)) + } + } else { + \(...) { + } + } +) + +err_code <- process_rdata(orm, rdata, args$options) quit(status = err_code)
--- a/XSeekerPreparator.xml Tue Oct 18 12:57:28 2022 +0000 +++ b/XSeekerPreparator.xml Tue Dec 06 10:18:10 2022 +0000 @@ -1,34 +1,26 @@ -<tool id="xseeker_preparator" name="XSeeker Preparator" version="1.3.0"> +<tool id="xseeker_preparator" name="XSeeker Preparator" version="@VERSION@+galaxy@VERSION_SUFFIX@" profile="22.01"> <description>prepares RData file from XCMS+CAMERA for XSeeker</description> + <macros> + <token name="@VERSION@">1.3.1</token> + <token name="@VERSION_SUFFIX@">0</token> + </macros> <edam_operations> <edam_operation>operation_1812</edam_operation> <edam_operation>operation_0335</edam_operation> </edam_operations> <requirements> - <requirement type="package" >bioconductor-xcms</requirement> - <requirement type="package" version="1.48.0">bioconductor-camera</requirement> - <requirement type="package" >git</requirement> - <requirement type="package" >r-blob</requirement> - <requirement type="package" >r-dbi</requirement> - <requirement type="package" >r-fst</requirement> - <requirement type="package" >r-optparse</requirement> - <requirement type="package" >r-stringr</requirement> - <requirement type="package" >r-purrr</requirement> - <requirement type="package" >r-rsqlite</requirement> - <requirement type="package" >r-dbmodelr</requirement> - <!-- - <requirement type="package" version="3.12.0">bioconductor-xcms</requirement> + <requirement type="package" version="4.1.3">r-base</requirement> + <requirement type="package" version="3.14.0">bioconductor-xcms</requirement> <requirement type="package" version="1.48.0">bioconductor-camera</requirement> <requirement type="package" version="2.29.2">git</requirement> - <requirement type="package" version="1.2.1">r-blob</requirement> - <requirement type="package" version="1.1.0">r-dbi</requirement> - <requirement type="package" version="0.9.4">r-fst</requirement> + <requirement type="package" version="1.2.3">r-blob</requirement> + <requirement type="package" version="1.1.3">r-dbi</requirement> + <requirement type="package" version="0.9.8">r-fst</requirement> <requirement type="package" version="1.6.6">r-optparse</requirement> <requirement type="package" version="1.4.0">r-stringr</requirement> - <requirement type="package" version="0.3.4">r-purrr</requirement> - <requirement type="package" version="2.2.1">r-rsqlite</requirement> + <requirement type="package" version="0.3.5">r-purrr</requirement> + <requirement type="package" version="2.2.18">r-rsqlite</requirement> <requirement type="package" version="0.2.0">r-dbmodelr</requirement> - --> </requirements> <stdio> <exit_code @@ -67,104 +59,14 @@ #else --models '${database.models.url}' #end if - #if $class_column - --class '${class_column}' + #if $class + --class '${class}' #end if ]]> </command> - <inputs> - <param - name="input" - type="data" - multiple="false" - label="Rdata to prepare" - optional="false" - format="rdata" - > - </param> - <param - name="class_column" - type="text" - value="" - label="Column class name" - help=" - The name of the column containing the classes - - leave empty to let xsprep guess - " - optional="true" - > - </param> - <section name="samples" title="Samples Options" expanded="false"> - <param - name="selected" - type="data" - multiple="true" - label="Samples to visualize" - optional="true" - format="mzml" - > - </param> - </section> - <section name="database" title="Database Options" expanded="false"> - <param - name="archetypes" - type="select" - multiple="true" - label="Molecule family (for database's compounds enrichment)" - > - <option value="G" selected="true">General</option> - <option value="H">Halogenates</option> - </param> - <conditional name="base"> - <param name="kind" type="select" label="File containing compound's type"> - <option value="none" selected="true">None (deafult)</option> - <option value="tabular">tabular</option> - <option value="sql">sql</option> - </param> - <when value="tabular"> - <param - name="tabular" - type="data" - multiple="true" - label="Tabular file containing compound to use in XSeeker" - optional="true" - format="tabular" - > - </param> - </when> - <when value="sql"> - <param - name="sql" - type="data" - multiple="true" - label="SQL file containing compound to use in XSeeker" - optional="true" - format="sql" - > - </param> - </when> - </conditional> - <conditional name="models"> - <param name="kind" type="select" label="How is the database's model defined"> - <option value="default" selected="true">Default (regular XSeeker Database)</option> - <option value="url">Download model file</option> - <option value="git">Get versionned model file</option> - </param> - <when value="url"> - <param name="url" type="text" format="url" label="File URL"/> - </when> - <when value="git"> - <param name="url" type="text" format="url" label="Repo URL"/> - </when> - </conditional> - </section> - </inputs> - <outputs> - <data format="sqlite" name="output" /> - <!-- <data format="xseeker.sqlite" name="output" /> --> - </outputs> <configfiles> <configfile name="base_config"> +<![CDATA[ tryCatch({ DBModelR::ModelDefinition(table="yui", fields=list(yui="INTEGER")) }, error=function(e) { @@ -340,6 +242,179 @@ ) ) ) + ]]> </configfile> </configfiles> + <inputs> + <param + argument="input" + type="data" + multiple="false" + optional="false" + format="rdata" + label="Rdata to prepare" + help=" + This rdata must be produced by a xcms+camera processing + and original files must still be in the history. + " + > + </param> + <param + argument="class" + type="text" + value="" + label="Column class name" + help=" + The name of the column containing the classes - + leave empty to let XSeeker Preparator guess + " + optional="true" + > + </param> + <section name="samples" title="Samples Options" expanded="false"> + <param + name="selected" + type="data" + multiple="true" + label="Samples to visualize" + optional="true" + format="mzml" + > + </param> + </section> + <section name="database" title="Database Options" expanded="false"> + <param + name="archetypes" + type="select" + multiple="true" + label="Molecule family (for database's compounds enrichment)" + > + <option value="G" selected="true">General</option> + <option value="H">Halogenates</option> + </param> + <conditional name="base"> + <param name="kind" type="select" label="File containing compound's type"> + <option value="none" selected="true">None (default)</option> + <option value="tabular">tabular</option> + <option value="sql">sql</option> + </param> + <when value="none" /> + <when value="sql" /> + <when value="tabular"> + <param + name="tabular" + type="data" + multiple="true" + label="Tabular file containing compound to use in XSeeker" + optional="true" + format="tabular" + > + </param> + </when> + <when value="sql"> + <param + name="sql" + type="data" + multiple="true" + label="SQL file containing compound to use in XSeeker" + optional="true" + format="sql" + > + </param> + </when> + </conditional> + <conditional name="models"> + <param name="kind" type="select" label="How is the database's model defined"> + <option value="default" selected="true">Default (regular XSeeker Database)</option> + <option value="url">Download model file</option> + <option value="git">Get versionned model file</option> + </param> + <when value="default" /> + <when value="url"> + <param name="url" type="text" format="url" label="File URL"/> + </when> + <when value="git"> + <param name="url" type="text" format="url" label="Repo URL"/> + </when> + </conditional> + </section> + </inputs> + <outputs> + <data format="sqlite" name="output" /> + <!-- <data format="xseeker.sqlite" name="output" /> --> + </outputs> + <help> + +.. class:: infomark + +**Authors** Lain Pavot (lain.pavot@inrae.fr) + +-------------------------------------- + +================== +XSeeker Preparator +================== + +----------- +Description +----------- + +A preparation tool to precalculate and reorginize data from XCMS+CAMERA for +XSeeker displayer tool to display them faster (from 30 min/some hours to some seconds). + + +----------------- +Workflow position +----------------- + +**Upstream tools** + +================ ======================== =========== =============== + Name output file format parameter +================ ======================== =========== =============== + CAMERA rdata.camera.quick rdata +================ ======================== =========== =============== + + +**Downstream tools** + +================ ======================== =========== =============== + Name output file format parameter +================ ======================== =========== =============== + XSeeker sqlite sqlite3 NA +================ ======================== =========== =============== + + + +----------- +Input files +----------- + +Takes a rdata outputed by camera (after xcms) as input. The rdata has must have +been produced using some mzml files - mxml files which must still be present +in the history when doing xseekerpreparator: +The rdata contains only some data and the paths to the original mzml files. +These mzml files contains a lot of usefull informations needed to prepare +the data for XSeeker. These files are not given as input, but are still +needed. + +---------- +Parameters +---------- +Column class name: + - the name of the column that defines classes in your sample metadata. +Sample options: + - provide mzml file names to process. Other files defined in the rdata will + not be processed, and will not be available in xseeker. +Database Options: + - provide the molecular family you want to annotate, a compound file. + - The database's model can be re-defined - only for devs users. + +------------ +Output files +------------ +An SQLite3 file is a database file that organizes data in such a way it is +easily stored, filtered, modified, retrieved. + </help> + <citations></citations> </tool>
--- a/format_versionning.MD Tue Oct 18 12:57:28 2022 +0000 +++ b/format_versionning.MD Tue Dec 06 10:18:10 2022 +0000 @@ -9,6 +9,14 @@ The first version (the older one) is at the bottom of this file, and the modifications provided by the newest versions are on top of the file. +Also, the changelog will be mixed to this file + + +VERSION 1.3.1 +===== +bugfix in files processing, some files were not processed due to +a missunderstanding of the meaning of a field in the rdata. + VERSION 1.3.0 =====
--- a/galaxy/tools/LC-MSMS/XSeekerPreparator.R Tue Oct 18 12:57:28 2022 +0000 +++ b/galaxy/tools/LC-MSMS/XSeekerPreparator.R Tue Dec 06 10:18:10 2022 +0000 @@ -198,7 +198,7 @@ search_tree <- function(path, target) { target <- tolower(target) for (file in list.files(path)) { - if (is.dir(file)) { + if (fs::is.dir(file)) { result <- search_tree(file.path(path, file), target) if (!is.null(result)) { return(result) @@ -400,12 +400,10 @@ guess_translator <- function(header) { result <- list( - # HMDB_ID = NULL, mz = NULL, name = NULL, common_name = NULL, - formula = NULL, - # inchi_key = NULL + formula = NULL ) asked_cols <- names(result) for (asked_col in asked_cols) { @@ -471,12 +469,12 @@ process_sample_list( orm, rdata, samples, show_percent = show_percent, - file_grouping_var = options$class + file_grouping_var = options$class, + options = options ) NULL }, error = function(e) { - message(e) - e + return(e) }) if (!is.null(mzml_tmp_dir)) { unlink(mzml_tmp_dir, recursive = TRUE) @@ -484,6 +482,7 @@ if (!is.null(error)) { stop(error) } + return(!is.null(error)) } gather_mzml_files <- function(rdata) { @@ -510,7 +509,8 @@ rdata, sample_names, show_percent, - file_grouping_var = NULL + file_grouping_var = NULL, + options = list() ) { if (is.null(file_grouping_var)) { file_grouping_var <- find_grouping_var(rdata$variableMetadata) @@ -587,19 +587,6 @@ message("Parameters from previous processes extracted.") - - indices <- as.numeric(unique(var_meta[, file_grouping_var])) - if (any(is.null(names(singlefile)[indices]))) { - stop(sprintf( - paste( - "Indices defined by grouping variable %s are not all present", - "in singlefile names (%s).\nCannot continue. Indices: %s" - ), - file_grouping_var, - paste(names(singlefile), collapse = ", "), - paste(indices, collapse = ", ") - )) - } smol_xcms_set <- orm$smol_xcms_set() mz_tab_info <- new.env() g <- xcms::groups(xcms_set) @@ -623,7 +610,7 @@ smol_xcms_set_id <- smol_xcms_set$get_id() rm(smol_xcms_set) - for (no in indices) { + for (no in seq_along(names(singlefile))) { sample_name <- names(singlefile)[[no]] sample_path <- singlefile[[no]] if ( @@ -760,7 +747,6 @@ field_names <- as.list(names(orm$feature()$fields__)) field_names[field_names == "id"] <- NULL - features <- list() dummy_feature <- orm$feature() if (show_percent <- context$show_percent) { @@ -772,7 +758,10 @@ rows <- rows[1:as.integer(FAST_FEATURE_RATIO / 100.0 * length(rows))] } - cluster_row <- list() + # features <- list() + features <- as.list(rows) ## allocate all memory before processing + # cluster_row <- list() + cluster_row <- as.list(rows) ## allocate all memory before processing for (row in rows) { if (show_percent && (row / total) * 100 > percent) { percent <- percent + 1 @@ -843,7 +832,8 @@ next_align_group ) next_align_group <- next_align_group + 1 - features[[length(features) + 1]] <- as.list(dummy_feature, field_names) + features[[row]] <- as.list(dummy_feature, field_names) + # features[[length(features) + 1]] <- as.list(dummy_feature, field_names) dummy_feature$clear() } rm(var_meta) @@ -963,6 +953,7 @@ } cluster$save() feature$set_cluster(cluster) + feature$save() return(cluster) } @@ -1064,6 +1055,12 @@ help = "Display this tool's version and exits" ), optparse::make_option( + c("-V", "--verbose"), + action = "store_true", + help = "Does more verbose outputs", + default = FALSE + ), + optparse::make_option( c("-i", "--input"), type = "character", help = "The rdata path to import in XSeeker" @@ -1161,6 +1158,18 @@ load(args$options$input, rdata <- new.env()) -process_rdata(orm, rdata, args$options) +args$options$verbose <- ( + if (args$options$verbose) { + message("Verbose outputs.") + \(...) { + message(sprintf(...)) + } + } else { + \(...) { + } + } +) + +err_code <- process_rdata(orm, rdata, args$options) quit(status = err_code)