Mercurial > repos > recetox > recetox_aplcms_compute_clusters
diff utils.R @ 7:550667ce03f1 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 97249a1af94ac5c387e1ede274dec5364f71cde9
author | recetox |
---|---|
date | Wed, 11 Oct 2023 11:17:39 +0000 |
parents | 149b9cc7499b |
children | b9b19a74ac01 |
line wrap: on
line diff
--- a/utils.R Wed Jul 19 00:27:18 2023 +0000 +++ b/utils.R Wed Oct 11 11:17:39 2023 +0000 @@ -1,94 +1,98 @@ library(recetox.aplcms) get_env_sample_name <- function() { - sample_name <- Sys.getenv("SAMPLE_NAME", unset = NA) - if (nchar(sample_name) == 0) { - sample_name <- NA - } - if (is.na(sample_name)) { - message("The mzML file does not contain run ID.") - } - return(sample_name) + sample_name <- Sys.getenv("SAMPLE_NAME", unset = NA) + if (nchar(sample_name) == 0) { + sample_name <- NA + } + if (is.na(sample_name)) { + message("The mzML file does not contain run ID.") + } + return(sample_name) } save_sample_name <- function(df, sample_name) { - attr(df, "sample_name") <- sample_name - return(df) + attr(df, "sample_name") <- sample_name + return(df) } restore_sample_name <- function(df) { - return(df$sample_id[1]) + return(df$sample_id[1]) } load_sample_name <- function(df) { - sample_name <- attr(df, "sample_name") - if (is.null(sample_name)) { - return(NA) - } else { - return(sample_name) - } + sample_name <- attr(df, "sample_name") + if (is.null(sample_name)) { + return(NA) + } else { + return(sample_name) + } } save_data_as_parquet_file <- function(data, filename) { - arrow::write_parquet(data, filename) + arrow::write_parquet(data, filename) } load_data_from_parquet_file <- function(filename) { - return(arrow::read_parquet(filename)) + return(arrow::read_parquet(filename)) } load_parquet_collection <- function(files) { - features <- lapply(files, arrow::read_parquet) - features <- lapply(features, tibble::as_tibble) - return(features) + features <- lapply(files, arrow::read_parquet) + features <- lapply(features, tibble::as_tibble) + return(features) } save_parquet_collection <- function(feature_tables, sample_names, subdir) { - dir.create(subdir) - for (i in seq_len(length(feature_tables))) { - filename <- file.path(subdir, paste0(sample_names[i], ".parquet")) - feature_table <- as.data.frame(feature_tables[[i]]) - feature_table <- save_sample_name(feature_table, sample_names[i]) - arrow::write_parquet(feature_table, filename) - } + dir.create(subdir) + for (i in seq_len(length(feature_tables))) { + filename <- file.path(subdir, paste0(sample_names[i], ".parquet")) + feature_table <- as.data.frame(feature_tables[[i]]) + feature_table <- save_sample_name(feature_table, sample_names[i]) + arrow::write_parquet(feature_table, filename) + } } sort_by_sample_name <- function(tables, sample_names) { - return(tables[order(sample_names)]) + return(tables[order(sample_names)]) } save_tolerances <- function(table, tol_file) { - mz_tolerance <- c(table$mz_tol_relative) - rt_tolerance <- c(table$rt_tol_relative) - arrow::write_parquet(data.frame(mz_tolerance, rt_tolerance), tol_file) + mz_tolerance <- c(table$mz_tol_relative) + rt_tolerance <- c(table$rt_tol_relative) + arrow::write_parquet(data.frame(mz_tolerance, rt_tolerance), tol_file) } save_aligned_features <- function(aligned_features, metadata_file, rt_file, intensity_file) { - save_data_as_parquet_file(aligned_features$metadata, metadata_file) - save_data_as_parquet_file(aligned_features$rt, rt_file) - save_data_as_parquet_file(aligned_features$intensity, intensity_file) + save_data_as_parquet_file(aligned_features$metadata, metadata_file) + save_data_as_parquet_file(aligned_features$rt, rt_file) + save_data_as_parquet_file(aligned_features$intensity, intensity_file) } select_table_with_sample_name <- function(tables, sample_name) { - sample_names <- lapply(tables, load_sample_name) - index <- which(sample_names == sample_name) - if (length(index) > 0) { - return(tables[[index]]) - } else { - stop(sprintf("Mismatch - sample name '%s' not present in %s", - sample_name, paste(sample_names, collapse = ", "))) - } + sample_names <- lapply(tables, load_sample_name) + index <- which(sample_names == sample_name) + if (length(index) > 0) { + return(tables[[index]]) + } else { + stop(sprintf( + "Mismatch - sample name '%s' not present in %s", + sample_name, paste(sample_names, collapse = ", ") + )) + } } select_adjusted <- function(recovered_features) { - return(recovered_features$adjusted_features) + return(recovered_features$adjusted_features) } known_table_columns <- function() { - c("chemical_formula", "HMDB_ID", "KEGG_compound_ID", "mass", "ion.type", + c( + "chemical_formula", "HMDB_ID", "KEGG_compound_ID", "mass", "ion.type", "m.z", "Number_profiles_processed", "Percent_found", "mz_min", "mz_max", "RT_mean", "RT_sd", "RT_min", "RT_max", "int_mean(log)", "int_sd(log)", - "int_min(log)", "int_max(log)") + "int_min(log)", "int_max(log)" + ) } save_known_table <- function(table, filename) { @@ -101,7 +105,9 @@ } save_pairing <- function(table, filename) { - df <- table$pairing %>% as_tibble() %>% setNames(c("new", "old")) + df <- table$pairing %>% + as_tibble() %>% + setNames(c("new", "old")) arrow::write_parquet(df, filename) } @@ -114,18 +120,20 @@ } validate_sample_names <- function(sample_names) { - if ((any(is.na(sample_names))) || (length(unique(sample_names)) != length(sample_names))) { - stop(sprintf("Sample names absent or not unique - provided sample names: %s", - paste(sample_names, collapse = ", "))) - } + if ((any(is.na(sample_names))) || (length(unique(sample_names)) != length(sample_names))) { + stop(sprintf( + "Sample names absent or not unique - provided sample names: %s", + paste(sample_names, collapse = ", ") + )) + } } determine_sigma_ratios <- function(sigma_ratio_lim_min = NA, sigma_ratio_lim_max = NA) { - if (is.na(sigma_ratio_lim_min)) { - sigma_ratio_lim_min <- 0 - } - if (is.na(sigma_ratio_lim_max)) { - sigma_ratio_lim_max <- Inf - } - return(c(sigma_ratio_lim_min, sigma_ratio_lim_max)) + if (is.na(sigma_ratio_lim_min)) { + sigma_ratio_lim_min <- 0 + } + if (is.na(sigma_ratio_lim_max)) { + sigma_ratio_lim_max <- Inf + } + return(c(sigma_ratio_lim_min, sigma_ratio_lim_max)) }