Mercurial > repos > recetox > recetox_aplcms_unsupervised
view main.R @ 1:4e6281945270 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit b3e0c34f12c9c38fb34bf48197974620d60b50c5
author | recetox |
---|---|
date | Thu, 26 May 2022 16:46:13 +0000 |
parents | d740352d272a |
children | 10093dea9b3f |
line wrap: on
line source
library(recetox.aplcms) library(dplyr) save_extracted_features <- function(df, filename) { df <- as.data.frame(df) columns <- c("mz", "pos", "sd1", "sd2", "area") arrow::write_parquet(df[columns], filename) } save_aligned_feature_table <- function(df, filename) { columns <- c("feature", "mz", "rt", "sample", "sample_rt", "sample_intensity") arrow::write_parquet(df[columns], filename) } save_recovered_feature_table <- function(df, filename, out_format) { columns <- c("feature", "mz", "rt", "sample", "sample_rt", "sample_intensity") if (out_format == "recetox") { peak_table <- df[columns] recetox_peak_table <- rcx_aplcms_to_rcx_xmsannotator(peak_table) arrow::write_parquet(recetox_peak_table, filename) } else { arrow::write_parquet(df[columns], filename) } } rcx_aplcms_to_rcx_xmsannotator <- function(peak_table) { col_base <- c("feature", "mz", "rt") output_table <- peak_table %>% distinct(across(any_of(col_base))) for (level in levels(factor(peak_table$sample))) { subdata <- peak_table %>% filter(sample == level) %>% select(any_of(c(col_base, "sample_intensity"))) %>% rename(!!level := "sample_intensity") output_table <- inner_join(output_table, subdata, by = col_base) } output_table <- output_table %>% rename(peak = feature) return(output_table) } known_table_columns <- function() { c("chemical_formula", "HMDB_ID", "KEGG_compound_ID", "mass", "ion.type", "m.z", "Number_profiles_processed", "Percent_found", "mz_min", "mz_max", "RT_mean", "RT_sd", "RT_min", "RT_max", "int_mean(log)", "int_sd(log)", "int_min(log)", "int_max(log)") } save_known_table <- function(df, filename) { columns <- known_table_columns() arrow::write_parquet(df[columns], filename) } read_known_table <- function(filename) { arrow::read_parquet(filename, col_select = known_table_columns()) } save_pairing <- function(df, filename) { write.table(df, filename, row.names = FALSE, col.names = c("new", "old")) } save_all_extracted_features <- function(dfs, filenames) { filenames <- tools::file_path_sans_ext(basename(filenames)) filenames <- paste0(filenames, ".parquet") filenames <- file.path("extracted", filenames) dir.create("extracted") mapply(save_extracted_features, dfs, filenames) } save_all_corrected_features <- function(dfs, filenames) { filenames <- tools::file_path_sans_ext(basename(filenames)) filenames <- paste0(filenames, ".parquet") filenames <- file.path("corrected", filenames) dir.create("corrected") mapply(save_extracted_features, dfs, filenames) } unsupervised_main <- function(sample_files, aligned_file, recovered_file, out_format, ...) { sample_files <- sort_samples_by_acquisition_number(sample_files) res <- unsupervised(filenames = sample_files, ...) save_all_extracted_features(res$extracted_features, sample_files) save_all_corrected_features(res$corrected_features, sample_files) save_aligned_feature_table(res$aligned_feature_sample_table, aligned_file) save_recovered_feature_table(res$recovered_feature_sample_table, recovered_file, out_format) } hybrid_main <- function(sample_files, known_table_file, updated_known_table_file, pairing_file, aligned_file, recovered_file, out_format, ...) { sample_files <- sort_samples_by_acquisition_number(sample_files) known <- read_known_table(known_table_file) res <- hybrid(filenames = sample_files, known_table = known, ...) save_known_table(res$updated_known_table, updated_known_table_file) save_pairing(res$features_known_table_pairing, pairing_file) save_all_extracted_features(res$extracted_features, sample_files) save_all_corrected_features(res$corrected_features, sample_files) save_aligned_feature_table(res$aligned_feature_sample_table, aligned_file) save_recovered_feature_table(res$recovered_feature_sample_table, recovered_file, out_format) } two_step_hybrid_main <- function(sample_files, known_table_file, updated_known_table_file, recovered_file, aligned_file, out_format, metadata, ...) { sample_files <- sort_samples_by_acquisition_number(sample_files) metadata <- read.table(metadata, sep = ",", header = TRUE) known_table <- read_known_table(known_table_file) res <- two.step.hybrid(filenames = sample_files, known.table = known_table, work_dir = getwd(), metadata = metadata, ...) save_known_table(res$known_table, updated_known_table_file) save_aligned_feature_table(res$aligned_features, aligned_file) save_recovered_feature_table(res$final_features, recovered_file, out_format) }