comparison utils.R @ 0:2810c956ec39 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 506df2aef355b3791567283e1a175914f06b405a
author recetox
date Mon, 13 Feb 2023 10:26:41 +0000
parents
children e8962c8340f4
comparison
equal deleted inserted replaced
-1:000000000000 0:2810c956ec39
1 library(recetox.aplcms)
2
3 get_env_sample_name <- function() {
4 sample_name <- Sys.getenv("SAMPLE_NAME", unset = NA)
5 if (nchar(sample_name) == 0) {
6 sample_name <- NA
7 }
8 if (is.na(sample_name)) {
9 message("The mzML file does not contain run ID.")
10 }
11 return(sample_name)
12 }
13
14 save_sample_name <- function(df, sample_name) {
15 attr(df, "sample_name") <- sample_name
16 return(df)
17 }
18
19 load_sample_name <- function(df) {
20 sample_name <- attr(df, "sample_name")
21 if (is.null(sample_name)) {
22 return(NA)
23 } else {
24 return(sample_name)
25 }
26 }
27
28 save_data_as_parquet_file <- function(data, filename) {
29 arrow::write_parquet(data, filename)
30 }
31
32 load_data_from_parquet_file <- function(filename) {
33 return(arrow::read_parquet(filename))
34 }
35
36 load_parquet_collection <- function(files) {
37 features <- lapply(files, arrow::read_parquet)
38 features <- lapply(features, tibble::as_tibble)
39 return(features)
40 }
41
42 save_parquet_collection <- function(table, sample_names, subdir) {
43 dir.create(subdir)
44 for (i in seq_len(length(table$feature_tables))) {
45 filename <- file.path(subdir, paste0(subdir, "_", sample_names[i], ".parquet"))
46 feature_table <- as.data.frame(table$feature_tables[[i]])
47 feature_table <- save_sample_name(feature_table, sample_names[i])
48 arrow::write_parquet(feature_table, filename)
49 }
50 }
51
52 sort_by_sample_name <- function(tables, sample_names) {
53 return(tables[order(sample_names)])
54 }
55
56 save_tolerances <- function(table, tol_file) {
57 mz_tolerance <- c(table$mz_tol_relative)
58 rt_tolerance <- c(table$rt_tol_relative)
59 arrow::write_parquet(data.frame(mz_tolerance, rt_tolerance), tol_file)
60 }
61
62 get_mz_tol <- function(tolerances) {
63 return(tolerances$mz_tolerance)
64 }
65
66 get_rt_tol <- function(tolerances) {
67 return(tolerances$rt_tolerance)
68 }
69
70 save_aligned_features <- function(aligned_features, metadata_file, rt_file, intensity_file) {
71 save_data_as_parquet_file(aligned_features$metadata, metadata_file)
72 save_data_as_parquet_file(aligned_features$rt, rt_file)
73 save_data_as_parquet_file(aligned_features$intensity, intensity_file)
74 }
75
76 select_table_with_sample_name <- function(tables, sample_name) {
77 sample_names <- lapply(tables, load_sample_name)
78 index <- which(sample_names == sample_name)
79 if (length(index) > 0) {
80 return(tables[[index]])
81 } else {
82 stop(sprintf("Mismatch - sample name '%s' not present in %s",
83 sample_name, paste(sample_names, collapse = ", ")))
84 }
85 }
86
87 select_adjusted <- function(recovered_features) {
88 return(recovered_features$adjusted_features)
89 }
90
91 known_table_columns <- function() {
92 c("chemical_formula", "HMDB_ID", "KEGG_compound_ID", "mass", "ion.type",
93 "m.z", "Number_profiles_processed", "Percent_found", "mz_min", "mz_max",
94 "RT_mean", "RT_sd", "RT_min", "RT_max", "int_mean(log)", "int_sd(log)",
95 "int_min(log)", "int_max(log)")
96 }
97
98 save_known_table <- function(table, filename) {
99 columns <- known_table_columns()
100 arrow::write_parquet(table$known_table[columns], filename)
101 }
102
103 read_known_table <- function(filename) {
104 arrow::read_parquet(filename, col_select = known_table_columns())
105 }
106
107 save_pairing <- function(table, filename) {
108 df <- table$pairing %>% as_tibble() %>% setNames(c("new", "old"))
109 arrow::write_parquet(df, filename)
110 }
111
112 join_tables_to_list <- function(metadata, rt_table, intensity_table) {
113 features <- new("list")
114 features$metadata <- metadata
115 features$intensity <- intensity_table
116 features$rt <- rt_table
117 return(features)
118 }
119
120 validate_sample_names <- function(sample_names) {
121 if ((any(is.na(sample_names))) || (length(unique(sample_names)) != length(sample_names))) {
122 stop(sprintf("Sample names absent or not unique - provided sample names: %s",
123 paste(sample_names, collapse = ", ")))
124 }
125 }