comparison main.R @ 0:c67f0eb6a909 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit b3e0c34f12c9c38fb34bf48197974620d60b50c5
author recetox
date Thu, 26 May 2022 16:47:09 +0000
parents
children 87b669bdb51a
comparison
equal deleted inserted replaced
-1:000000000000 0:c67f0eb6a909
1 library(recetox.aplcms)
2 library(dplyr)
3
4 save_extracted_features <- function(df, filename) {
5 df <- as.data.frame(df)
6 columns <- c("mz", "pos", "sd1", "sd2", "area")
7 arrow::write_parquet(df[columns], filename)
8 }
9
10 save_aligned_feature_table <- function(df, filename) {
11 columns <- c("feature", "mz", "rt", "sample", "sample_rt", "sample_intensity")
12 arrow::write_parquet(df[columns], filename)
13 }
14
15 save_recovered_feature_table <- function(df, filename, out_format) {
16 columns <- c("feature", "mz", "rt", "sample", "sample_rt", "sample_intensity")
17 if (out_format == "recetox") {
18 peak_table <- df[columns]
19 recetox_peak_table <- rcx_aplcms_to_rcx_xmsannotator(peak_table)
20 arrow::write_parquet(recetox_peak_table, filename)
21 } else {
22 arrow::write_parquet(df[columns], filename)
23 }
24 }
25
26 rcx_aplcms_to_rcx_xmsannotator <- function(peak_table) {
27 col_base <- c("feature", "mz", "rt")
28 output_table <- peak_table %>% distinct(across(any_of(col_base)))
29
30 for (level in levels(factor(peak_table$sample))) {
31 subdata <- peak_table %>%
32 filter(sample == level) %>%
33 select(any_of(c(col_base, "sample_intensity"))) %>%
34 rename(!!level := "sample_intensity")
35 output_table <- inner_join(output_table, subdata, by = col_base)
36 }
37 output_table <- output_table %>% rename(peak = feature)
38 return(output_table)
39 }
40
41 known_table_columns <- function() {
42 c("chemical_formula", "HMDB_ID", "KEGG_compound_ID", "mass", "ion.type",
43 "m.z", "Number_profiles_processed", "Percent_found", "mz_min", "mz_max",
44 "RT_mean", "RT_sd", "RT_min", "RT_max", "int_mean(log)", "int_sd(log)",
45 "int_min(log)", "int_max(log)")
46 }
47
48 save_known_table <- function(df, filename) {
49 columns <- known_table_columns()
50 arrow::write_parquet(df[columns], filename)
51 }
52
53 read_known_table <- function(filename) {
54 arrow::read_parquet(filename, col_select = known_table_columns())
55 }
56
57 save_pairing <- function(df, filename) {
58 write.table(df, filename, row.names = FALSE, col.names = c("new", "old"))
59 }
60
61 save_all_extracted_features <- function(dfs, filenames) {
62 filenames <- tools::file_path_sans_ext(basename(filenames))
63 filenames <- paste0(filenames, ".parquet")
64 filenames <- file.path("extracted", filenames)
65 dir.create("extracted")
66 mapply(save_extracted_features, dfs, filenames)
67 }
68
69 save_all_corrected_features <- function(dfs, filenames) {
70 filenames <- tools::file_path_sans_ext(basename(filenames))
71 filenames <- paste0(filenames, ".parquet")
72 filenames <- file.path("corrected", filenames)
73 dir.create("corrected")
74 mapply(save_extracted_features, dfs, filenames)
75 }
76
77 unsupervised_main <- function(sample_files, aligned_file, recovered_file, out_format, ...) {
78 sample_files <- sort_samples_by_acquisition_number(sample_files)
79
80 res <- unsupervised(filenames = sample_files, ...)
81
82 save_all_extracted_features(res$extracted_features, sample_files)
83 save_all_corrected_features(res$corrected_features, sample_files)
84
85 save_aligned_feature_table(res$aligned_feature_sample_table, aligned_file)
86 save_recovered_feature_table(res$recovered_feature_sample_table, recovered_file, out_format)
87 }
88
89 hybrid_main <- function(sample_files, known_table_file, updated_known_table_file, pairing_file, aligned_file, recovered_file, out_format, ...) {
90 sample_files <- sort_samples_by_acquisition_number(sample_files)
91
92 known <- read_known_table(known_table_file)
93 res <- hybrid(filenames = sample_files, known_table = known, ...)
94
95 save_known_table(res$updated_known_table, updated_known_table_file)
96 save_pairing(res$features_known_table_pairing, pairing_file)
97
98 save_all_extracted_features(res$extracted_features, sample_files)
99 save_all_corrected_features(res$corrected_features, sample_files)
100
101 save_aligned_feature_table(res$aligned_feature_sample_table, aligned_file)
102 save_recovered_feature_table(res$recovered_feature_sample_table, recovered_file, out_format)
103 }
104
105 two_step_hybrid_main <- function(sample_files, known_table_file, updated_known_table_file, recovered_file, aligned_file, out_format, metadata, ...) {
106 sample_files <- sort_samples_by_acquisition_number(sample_files)
107 metadata <- read.table(metadata, sep = ",", header = TRUE)
108
109 known_table <- read_known_table(known_table_file)
110 res <- two.step.hybrid(filenames = sample_files, known.table = known_table, work_dir = getwd(), metadata = metadata, ...)
111
112 save_known_table(res$known_table, updated_known_table_file)
113 save_aligned_feature_table(res$aligned_features, aligned_file)
114 save_recovered_feature_table(res$final_features, recovered_file, out_format)
115 }