Mercurial > repos > recetox > waveica
comparison waveica_wrapper.R @ 2:6480c6d5fa36 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 77bea6c84c5a932fa203723d4d5fea6147f06505
| author | recetox |
|---|---|
| date | Fri, 23 Jun 2023 07:27:29 +0000 |
| parents | b77023c41c76 |
| children | dbbedb14b44c |
comparison
equal
deleted
inserted
replaced
| 1:b77023c41c76 | 2:6480c6d5fa36 |
|---|---|
| 1 read_file <- function(file, metadata, ft_ext, mt_ext) { | 1 read_file <- function(file, metadata, ft_ext, mt_ext, transpose) { |
| 2 data <- read_data(file, ft_ext) | 2 data <- read_data(file, ft_ext) |
| 3 | |
| 4 if (transpose) { | |
| 5 col_names <- c("sampleName", data[[1]]) | |
| 6 t_data <- data[-1] | |
| 7 t_data <- t(t_data) | |
| 8 data <- data.frame(rownames(t_data), t_data) | |
| 9 colnames(data) <- col_names | |
| 10 } | |
| 3 | 11 |
| 4 if (!is.na(metadata)) { | 12 if (!is.na(metadata)) { |
| 5 mt_data <- read_data(metadata, mt_ext) | 13 mt_data <- read_data(metadata, mt_ext) |
| 6 data <- merge(mt_data, data, by = "sampleName") | 14 data <- merge(mt_data, data, by = "sampleName") |
| 7 } | 15 } |
| 22 } | 30 } |
| 23 | 31 |
| 24 waveica <- function(file, | 32 waveica <- function(file, |
| 25 metadata = NA, | 33 metadata = NA, |
| 26 ext, | 34 ext, |
| 35 transpose = FALSE, | |
| 27 wavelet_filter, | 36 wavelet_filter, |
| 28 wavelet_length, | 37 wavelet_length, |
| 29 k, | 38 k, |
| 30 t, | 39 t, |
| 31 t2, | 40 t2, |
| 32 alpha, | 41 alpha, |
| 33 exclude_blanks) { | 42 exclude_blanks) { |
| 34 | |
| 35 # get input from the Galaxy, preprocess data | 43 # get input from the Galaxy, preprocess data |
| 36 ext <- strsplit(x = ext, split = "\\,")[[1]] | 44 ext <- strsplit(x = ext, split = "\\,")[[1]] |
| 37 | 45 |
| 38 ft_ext <- ext[1] | 46 ft_ext <- ext[1] |
| 39 mt_ext <- ext[2] | 47 mt_ext <- ext[2] |
| 40 | 48 |
| 41 data <- read_file(file, metadata, ft_ext, mt_ext) | 49 data <- read_file(file, metadata, ft_ext, mt_ext, transpose) |
| 42 | 50 |
| 43 required_columns <- c("sampleName", "class", "sampleType", "injectionOrder", "batch") | 51 required_columns <- c( |
| 44 verify_input_dataframe(data, required_columns) | 52 "sampleName", "class", "sampleType", |
| 53 "injectionOrder", "batch" | |
| 54 ) | |
| 55 data <- verify_input_dataframe(data, required_columns) | |
| 45 | 56 |
| 46 data <- sort_by_injection_order(data) | 57 data <- sort_by_injection_order(data) |
| 47 | 58 |
| 48 # separate data into features, batch and group | 59 # separate data into features, batch and group |
| 49 feature_columns <- colnames(data)[!colnames(data) %in% required_columns] | 60 feature_columns <- colnames(data)[!colnames(data) %in% required_columns] |
| 74 } | 85 } |
| 75 | 86 |
| 76 waveica_singlebatch <- function(file, | 87 waveica_singlebatch <- function(file, |
| 77 metadata = NA, | 88 metadata = NA, |
| 78 ext, | 89 ext, |
| 90 transpose = FALSE, | |
| 79 wavelet_filter, | 91 wavelet_filter, |
| 80 wavelet_length, | 92 wavelet_length, |
| 81 k, | 93 k, |
| 82 alpha, | 94 alpha, |
| 83 cutoff, | 95 cutoff, |
| 84 exclude_blanks) { | 96 exclude_blanks) { |
| 85 | |
| 86 # get input from the Galaxy, preprocess data | 97 # get input from the Galaxy, preprocess data |
| 87 ext <- strsplit(x = ext, split = "\\,")[[1]] | 98 ext <- strsplit(x = ext, split = "\\,")[[1]] |
| 88 | 99 |
| 89 ft_ext <- ext[1] | 100 ft_ext <- ext[1] |
| 90 mt_ext <- ext[2] | 101 mt_ext <- ext[2] |
| 91 | 102 |
| 92 data <- read_file(file, metadata, ft_ext, mt_ext) | 103 data <- read_file(file, metadata, ft_ext, mt_ext, transpose) |
| 93 | 104 |
| 94 required_columns <- c("sampleName", "class", "sampleType", "injectionOrder") | 105 required_columns <- c("sampleName", "class", "sampleType", "injectionOrder") |
| 95 optional_columns <- c("batch") | 106 optional_columns <- c("batch") |
| 96 verify_input_dataframe(data, required_columns) | 107 |
| 108 data <- verify_input_dataframe(data, required_columns) | |
| 97 | 109 |
| 98 data <- sort_by_injection_order(data) | 110 data <- sort_by_injection_order(data) |
| 99 | 111 |
| 100 feature_columns <- colnames(data)[!colnames(data) %in% c(required_columns, optional_columns)] | 112 feature_columns <- colnames(data)[!colnames(data) %in% |
| 113 c(required_columns, optional_columns)] | |
| 101 features <- data[, feature_columns] | 114 features <- data[, feature_columns] |
| 102 injection_order <- data$injectionOrder | 115 injection_order <- data$injectionOrder |
| 103 | 116 |
| 104 # run WaveICA | 117 # run WaveICA |
| 105 features <- recetox.waveica::waveica_nonbatchwise( | 118 features <- recetox.waveica::waveica_nonbatchwise( |
| 110 alpha = alpha, | 123 alpha = alpha, |
| 111 cutoff = cutoff | 124 cutoff = cutoff |
| 112 ) | 125 ) |
| 113 | 126 |
| 114 data[, feature_columns] <- features | 127 data[, feature_columns] <- features |
| 115 | 128 group <- enumerate_groups(as.character(data$sampleType)) |
| 116 # remove blanks from dataset | 129 # remove blanks from dataset |
| 117 if (exclude_blanks) { | 130 if (exclude_blanks) { |
| 118 data <- exclude_group(data, group) | 131 data <- exclude_group(data, group) |
| 119 } | 132 } |
| 120 | 133 |
| 140 verify_input_dataframe <- function(data, required_columns) { | 153 verify_input_dataframe <- function(data, required_columns) { |
| 141 if (anyNA(data)) { | 154 if (anyNA(data)) { |
| 142 stop("Error: dataframe cannot contain NULL values! | 155 stop("Error: dataframe cannot contain NULL values! |
| 143 Make sure that your dataframe does not contain empty cells") | 156 Make sure that your dataframe does not contain empty cells") |
| 144 } else if (!all(required_columns %in% colnames(data))) { | 157 } else if (!all(required_columns %in% colnames(data))) { |
| 145 stop("Error: missing metadata! | 158 stop( |
| 146 Make sure that the following columns are present in your dataframe: ", paste(required_columns, collapse = ", ")) | 159 "Error: missing metadata! |
| 147 } | 160 Make sure that the following columns are present in your dataframe: ", |
| 161 paste(required_columns, collapse = ", ") | |
| 162 ) | |
| 163 } | |
| 164 | |
| 165 data <- verify_column_types(data, required_columns) | |
| 166 | |
| 167 return(data) | |
| 168 } | |
| 169 | |
| 170 verify_column_types <- function(data, required_columns) { | |
| 171 # Specify the column names and their expected types | |
| 172 column_types <- list( | |
| 173 "sampleName" = c("character", "factor"), | |
| 174 "class" = c("character", "factor"), | |
| 175 "sampleType" = c("character", "factor"), | |
| 176 "injectionOrder" = "integer", | |
| 177 "batch" = "integer" | |
| 178 ) | |
| 179 | |
| 180 column_types <- column_types[required_columns] | |
| 181 | |
| 182 for (col_name in names(data)) { | |
| 183 actual_type <- class(data[[col_name]]) | |
| 184 if (col_name %in% names(column_types)) { | |
| 185 expected_types <- column_types[[col_name]] | |
| 186 | |
| 187 if (!actual_type %in% expected_types) { | |
| 188 stop( | |
| 189 "Column ", col_name, " is of type ", actual_type, | |
| 190 " but expected type is ", | |
| 191 paste(expected_types, collapse = " or "), "\n" | |
| 192 ) | |
| 193 } | |
| 194 } else { | |
| 195 if (actual_type != "numeric") { | |
| 196 data[[col_name]] <- as.numeric(as.character(data[[col_name]])) | |
| 197 } | |
| 198 } | |
| 199 } | |
| 200 return(data) | |
| 148 } | 201 } |
| 149 | 202 |
| 150 | 203 |
| 151 # Match group labels with [blank/sample/qc] and enumerate them | 204 # Match group labels with [blank/sample/qc] and enumerate them |
| 152 enumerate_groups <- function(group) { | 205 enumerate_groups <- function(group) { |
| 185 | 238 |
| 186 store_data <- function(data, output, ext) { | 239 store_data <- function(data, output, ext) { |
| 187 if (ext == "csv") { | 240 if (ext == "csv") { |
| 188 write.csv(data, file = output, row.names = FALSE, quote = FALSE) | 241 write.csv(data, file = output, row.names = FALSE, quote = FALSE) |
| 189 } else if (ext == "tsv") { | 242 } else if (ext == "tsv") { |
| 190 write.table(data, file = output, sep = "\t", row.names = FALSE, quote = FALSE) | 243 write.table(data, |
| 244 file = output, sep = "\t", | |
| 245 row.names = FALSE, quote = FALSE | |
| 246 ) | |
| 191 } else { | 247 } else { |
| 192 arrow::write_parquet(data, sink = output) | 248 arrow::write_parquet(data, sink = output) |
| 193 } | 249 } |
| 194 cat("Normalization has been completed.\n") | 250 cat("Normalization has been completed.\n") |
| 195 } | 251 } |
