# HG changeset patch
# User recetox
# Date 1648035330 0
# Node ID 3287108909633e793bb5595416f6d625eaa41b97
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
diff -r 000000000000 -r 328710890963 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Wed Mar 23 11:35:30 2022 +0000
@@ -0,0 +1,127 @@
+
+ 0.2.0
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ `_;
+ .. [2] when using 'Multiple batches', please cite the WaveICA (2019) paper; else, cite WaveICA 2.0 (2021) paper;
+ ]]>
+
+
diff -r 000000000000 -r 328710890963 test-data/incomplete_metadata_data.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/incomplete_metadata_data.csv Wed Mar 23 11:35:30 2022 +0000
@@ -0,0 +1,6 @@
+sampleName,class,sampleType,batch,M85T34,M86T41,M86T518,M86T539
+VT_160120_002,sample,sample,1,228520.06430737,35646729.2154397,2386896.97966461,1026645.83653468
+VT_160120_004,sample,sample,1,90217.384387202,35735702.457216,2456290.69621518,1089246.46040563
+VT_160120_006,sample,sample,1,235656.752883839,37021134.4527116,8873450.40260241,837856.449608585
+VT_160120_008,sample,sample,1,16622.9351783435,44302499.262606,2466946.89667101,994979.069689685
+VT_160120_010,sample,sample,1,62385.0742465736,44639738.0735709,2389372.85729467,954938.131337246
diff -r 000000000000 -r 328710890963 test-data/input_data.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_data.csv Wed Mar 23 11:35:30 2022 +0000
@@ -0,0 +1,6 @@
+sampleName,class,sampleType,injectionOrder,batch,M85T34,M86T41,M86T518,M86T539
+VT_160120_002,sample,sample,1,1,228520.06430737,35646729.21543971,2386896.97966461,1026645.83653468
+VT_160120_004,sample,sample,2,1,90217.384387202,35735702.457215995,2456290.69621518,1089246.46040563
+VT_160120_006,sample,sample,3,1,235656.75288383896,37021134.452711605,8873450.40260241,837856.449608585
+VT_160120_008,sample,sample,4,1,16622.9351783435,44302499.262606,2466946.89667101,994979.069689685
+VT_160120_010,sample,sample,5,1,62385.0742465736,44639738.0735709,2389372.85729467,954938.131337246
diff -r 000000000000 -r 328710890963 test-data/input_data_nobatch.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_data_nobatch.csv Wed Mar 23 11:35:30 2022 +0000
@@ -0,0 +1,6 @@
+sampleName,class,sampleType,injectionOrder,M85T34,M86T41,M86T518,M86T539
+VT_160120_002,sample,sample,1,228520.06430737,35646729.21543971,2386896.97966461,1026645.83653468
+VT_160120_004,sample,sample,2,90217.384387202,35735702.457215995,2456290.69621518,1089246.46040563
+VT_160120_006,sample,sample,3,235656.75288383896,37021134.452711605,8873450.40260241,837856.449608585
+VT_160120_008,sample,sample,4,16622.9351783435,44302499.262606,2466946.89667101,994979.069689685
+VT_160120_010,sample,sample,5,62385.0742465736,44639738.0735709,2389372.85729467,954938.131337246
diff -r 000000000000 -r 328710890963 test-data/na_data.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/na_data.csv Wed Mar 23 11:35:30 2022 +0000
@@ -0,0 +1,6 @@
+sampleName,class,sampleType,injectionOrder,batch,M85T34,M86T41,M86T518,M86T539
+VT_160120_002,sample,sample,1,1,NA,35646729.2154397,2386896.97966461,1026645.83653468
+VT_160120_004,sample,sample,2,1,90217.384387202,35735702.457216,2456290.69621518,1089246.46040563
+VT_160120_006,sample,sample,3,1,235656.752883839,37021134.4527116,8873450.40260241,837856.449608585
+VT_160120_008,sample,sample,4,1,16622.9351783435,44302499.262606,2466946.89667101,994979.069689685
+VT_160120_010,sample,sample,5,1,62385.0742465736,44639738.0735709,2389372.85729467,954938.131337246
diff -r 000000000000 -r 328710890963 test-data/normalized_data.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/normalized_data.tsv Wed Mar 23 11:35:30 2022 +0000
@@ -0,0 +1,6 @@
+sampleName class sampleType injectionOrder batch M85T34 M86T41 M86T518 M86T539
+VT_160120_002 sample sample 1 1 355200.506508035 75115889.9077485 6101488.54615418 2007379.02604984
+VT_160120_004 sample sample 2 1 216897.826587868 75204863.1495248 6170882.26270475 2069979.64992079
+VT_160120_006 sample sample 3 1 362337.195084504 76490295.1450204 12588041.969092 1818589.63912375
+VT_160120_008 sample sample 4 1 143303.377379009 83771659.9549148 6181538.46316058 1975712.25920485
+VT_160120_010 sample sample 5 1 189065.516447239 84108898.7658797 6103964.42378424 1935671.32085241
diff -r 000000000000 -r 328710890963 test-data/normalized_data_nobatch.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/normalized_data_nobatch.tsv Wed Mar 23 11:35:30 2022 +0000
@@ -0,0 +1,6 @@
+sampleName class sampleType injectionOrder M85T34 M86T41 M86T518 M86T539
+VT_160120_002 sample sample 1 -9795801.68327296 29546678.5668352 -6207890.55898405 -8941748.93595845
+VT_160120_004 sample sample 2 -9798910.74239713 29543569.5077111 -6210999.61810821 -8944857.99508262
+VT_160120_006 sample sample 3 -9797307.93141959 29545172.3186886 -6209396.80713068 -8943255.18410509
+VT_160120_008 sample sample 4 -9793706.69204694 29548773.5580612 -6205795.56775803 -8939653.94473244
+VT_160120_010 sample sample 5 -9800711.45464277 29541768.7954654 -6212800.33035386 -8946658.70732827
diff -r 000000000000 -r 328710890963 waveica.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/waveica.xml Wed Mar 23 11:35:30 2022 +0000
@@ -0,0 +1,105 @@
+
+ removal of batch effects for untargeted metabolomics data
+
+ macros.xml
+
+
+
+
+ r-recetox-waveica
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 10.1016/j.aca.2019.02.010
+ 10.1007/s11306-021-01839-7
+
+
+
diff -r 000000000000 -r 328710890963 waveica_wrapper.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/waveica_wrapper.R Wed Mar 23 11:35:30 2022 +0000
@@ -0,0 +1,155 @@
+waveica <- function(data,
+ wavelet_filter,
+ wavelet_length,
+ k,
+ t,
+ t2,
+ alpha,
+ exclude_blanks) {
+
+ # get input from the Galaxy, preprocess data
+ data <- read.csv(data, header = TRUE)
+
+ required_columns <- c("sampleName", "class", "sampleType", "injectionOrder", "batch")
+ verify_input_dataframe(data, required_columns)
+
+ data <- sort_by_injection_order(data)
+
+ # separate data into features, batch and group
+ feature_columns <- colnames(data)[!colnames(data) %in% required_columns]
+ features <- data[, feature_columns]
+ group <- enumerate_groups(as.character(data$sampleType))
+ batch <- data$batch
+
+ # run WaveICA
+ features <- recetox.waveica::waveica(
+ data = features,
+ wf = get_wf(wavelet_filter, wavelet_length),
+ batch = batch,
+ group = group,
+ K = k,
+ t = t,
+ t2 = t2,
+ alpha = alpha
+ )
+
+ data[, feature_columns] <- features
+
+ # remove blanks from dataset
+ if (exclude_blanks) {
+ data <- exclude_group(data, group)
+ }
+
+ return(data)
+}
+
+
+waveica_singlebatch <- function(data,
+ wavelet_filter,
+ wavelet_length,
+ k,
+ alpha,
+ cutoff,
+ exclude_blanks) {
+
+ # get input from the Galaxy, preprocess data
+ data <- read.csv(data, header = TRUE)
+
+ required_columns <- c("sampleName", "class", "sampleType", "injectionOrder")
+ optional_columns <- c("batch")
+ verify_input_dataframe(data, required_columns)
+
+ data <- sort_by_injection_order(data)
+
+ feature_columns <- colnames(data)[!colnames(data) %in% c(required_columns, optional_columns)]
+ features <- data[, feature_columns]
+ injection_order <- data$injectionOrder
+
+ # run WaveICA
+ features <- recetox.waveica::waveica_nonbatchwise(
+ data = features,
+ wf = get_wf(wavelet_filter, wavelet_length),
+ injection_order = injection_order,
+ K = k,
+ alpha = alpha,
+ cutoff = cutoff
+ )
+
+ data[, feature_columns] <- features
+
+ # remove blanks from dataset
+ if (exclude_blanks) {
+ data <- exclude_group(data, group)
+ }
+
+ return(data)
+}
+
+
+sort_by_injection_order <- function(data) {
+ if ("batch" %in% colnames(data)) {
+ data <- data[order(data[, "batch"],
+ data[, "injectionOrder"],
+ decreasing = FALSE
+ ), ]
+ } else {
+ data <- data[order(data[, "injectionOrder"],
+ decreasing = FALSE
+ ), ]
+ }
+ return(data)
+}
+
+
+verify_input_dataframe <- function(data, required_columns) {
+ if (anyNA(data)) {
+ stop("Error: dataframe cannot contain NULL values!
+Make sure that your dataframe does not contain empty cells")
+ } else if (!all(required_columns %in% colnames(data))) {
+ stop("Error: missing metadata!
+Make sure that the following columns are present in your dataframe: ", paste(required_columns, collapse = ", "))
+ }
+}
+
+
+# Match group labels with [blank/sample/qc] and enumerate them
+enumerate_groups <- function(group) {
+ group[grepl("blank", tolower(group))] <- 0
+ group[grepl("sample", tolower(group))] <- 1
+ group[grepl("qc", tolower(group))] <- 2
+
+ return(group)
+}
+
+
+# Create appropriate input for R wavelets function
+get_wf <- function(wavelet_filter, wavelet_length) {
+ wf <- paste(wavelet_filter, wavelet_length, sep = "")
+
+ # exception to the wavelet function
+ if (wf == "d2") {
+ wf <- "haar"
+ }
+
+ return(wf)
+}
+
+
+# Exclude blanks from a dataframe
+exclude_group <- function(data, group) {
+ row_idx_to_exclude <- which(group %in% 0)
+ if (length(row_idx_to_exclude) > 0) {
+ data_without_blanks <- data[-c(row_idx_to_exclude), ]
+ cat("Blank samples have been excluded from the dataframe.\n")
+ return(data_without_blanks)
+ } else {
+ return(data)
+ }
+}
+
+
+# Store output of WaveICA in a tsv file
+store_data <- function(data, output) {
+ write.table(data, file = output, sep = "\t", row.names = FALSE, quote = FALSE)
+ cat("Normalization has been completed.\n")
+}