Mercurial > repos > recetox > recetox_aplcms_compute_clusters
comparison utils.R @ 0:82737757f3d5 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 506df2aef355b3791567283e1a175914f06b405a
| author | recetox |
|---|---|
| date | Mon, 13 Feb 2023 10:27:56 +0000 |
| parents | |
| children | 092bbb03a217 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:82737757f3d5 |
|---|---|
| 1 library(recetox.aplcms) | |
| 2 | |
| 3 get_env_sample_name <- function() { | |
| 4 sample_name <- Sys.getenv("SAMPLE_NAME", unset = NA) | |
| 5 if (nchar(sample_name) == 0) { | |
| 6 sample_name <- NA | |
| 7 } | |
| 8 if (is.na(sample_name)) { | |
| 9 message("The mzML file does not contain run ID.") | |
| 10 } | |
| 11 return(sample_name) | |
| 12 } | |
| 13 | |
| 14 save_sample_name <- function(df, sample_name) { | |
| 15 attr(df, "sample_name") <- sample_name | |
| 16 return(df) | |
| 17 } | |
| 18 | |
| 19 load_sample_name <- function(df) { | |
| 20 sample_name <- attr(df, "sample_name") | |
| 21 if (is.null(sample_name)) { | |
| 22 return(NA) | |
| 23 } else { | |
| 24 return(sample_name) | |
| 25 } | |
| 26 } | |
| 27 | |
| 28 save_data_as_parquet_file <- function(data, filename) { | |
| 29 arrow::write_parquet(data, filename) | |
| 30 } | |
| 31 | |
| 32 load_data_from_parquet_file <- function(filename) { | |
| 33 return(arrow::read_parquet(filename)) | |
| 34 } | |
| 35 | |
| 36 load_parquet_collection <- function(files) { | |
| 37 features <- lapply(files, arrow::read_parquet) | |
| 38 features <- lapply(features, tibble::as_tibble) | |
| 39 return(features) | |
| 40 } | |
| 41 | |
| 42 save_parquet_collection <- function(table, sample_names, subdir) { | |
| 43 dir.create(subdir) | |
| 44 for (i in seq_len(length(table$feature_tables))) { | |
| 45 filename <- file.path(subdir, paste0(subdir, "_", sample_names[i], ".parquet")) | |
| 46 feature_table <- as.data.frame(table$feature_tables[[i]]) | |
| 47 feature_table <- save_sample_name(feature_table, sample_names[i]) | |
| 48 arrow::write_parquet(feature_table, filename) | |
| 49 } | |
| 50 } | |
| 51 | |
| 52 sort_by_sample_name <- function(tables, sample_names) { | |
| 53 return(tables[order(sample_names)]) | |
| 54 } | |
| 55 | |
| 56 save_tolerances <- function(table, tol_file) { | |
| 57 mz_tolerance <- c(table$mz_tol_relative) | |
| 58 rt_tolerance <- c(table$rt_tol_relative) | |
| 59 arrow::write_parquet(data.frame(mz_tolerance, rt_tolerance), tol_file) | |
| 60 } | |
| 61 | |
| 62 get_mz_tol <- function(tolerances) { | |
| 63 return(tolerances$mz_tolerance) | |
| 64 } | |
| 65 | |
| 66 get_rt_tol <- function(tolerances) { | |
| 67 return(tolerances$rt_tolerance) | |
| 68 } | |
| 69 | |
| 70 save_aligned_features <- function(aligned_features, metadata_file, rt_file, intensity_file) { | |
| 71 save_data_as_parquet_file(aligned_features$metadata, metadata_file) | |
| 72 save_data_as_parquet_file(aligned_features$rt, rt_file) | |
| 73 save_data_as_parquet_file(aligned_features$intensity, intensity_file) | |
| 74 } | |
| 75 | |
| 76 select_table_with_sample_name <- function(tables, sample_name) { | |
| 77 sample_names <- lapply(tables, load_sample_name) | |
| 78 index <- which(sample_names == sample_name) | |
| 79 if (length(index) > 0) { | |
| 80 return(tables[[index]]) | |
| 81 } else { | |
| 82 stop(sprintf("Mismatch - sample name '%s' not present in %s", | |
| 83 sample_name, paste(sample_names, collapse = ", "))) | |
| 84 } | |
| 85 } | |
| 86 | |
| 87 select_adjusted <- function(recovered_features) { | |
| 88 return(recovered_features$adjusted_features) | |
| 89 } | |
| 90 | |
| 91 known_table_columns <- function() { | |
| 92 c("chemical_formula", "HMDB_ID", "KEGG_compound_ID", "mass", "ion.type", | |
| 93 "m.z", "Number_profiles_processed", "Percent_found", "mz_min", "mz_max", | |
| 94 "RT_mean", "RT_sd", "RT_min", "RT_max", "int_mean(log)", "int_sd(log)", | |
| 95 "int_min(log)", "int_max(log)") | |
| 96 } | |
| 97 | |
| 98 save_known_table <- function(table, filename) { | |
| 99 columns <- known_table_columns() | |
| 100 arrow::write_parquet(table$known_table[columns], filename) | |
| 101 } | |
| 102 | |
| 103 read_known_table <- function(filename) { | |
| 104 arrow::read_parquet(filename, col_select = known_table_columns()) | |
| 105 } | |
| 106 | |
| 107 save_pairing <- function(table, filename) { | |
| 108 df <- table$pairing %>% as_tibble() %>% setNames(c("new", "old")) | |
| 109 arrow::write_parquet(df, filename) | |
| 110 } | |
| 111 | |
| 112 join_tables_to_list <- function(metadata, rt_table, intensity_table) { | |
| 113 features <- new("list") | |
| 114 features$metadata <- metadata | |
| 115 features$intensity <- intensity_table | |
| 116 features$rt <- rt_table | |
| 117 return(features) | |
| 118 } | |
| 119 | |
| 120 validate_sample_names <- function(sample_names) { | |
| 121 if ((any(is.na(sample_names))) || (length(unique(sample_names)) != length(sample_names))) { | |
| 122 stop(sprintf("Sample names absent or not unique - provided sample names: %s", | |
| 123 paste(sample_names, collapse = ", "))) | |
| 124 } | |
| 125 } |
