Mercurial > repos > recetox > isolib
comparison isolib.R @ 4:2b1118bce0b1 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/isolib commit e21ab1b7f16bc0a58b33b8e46f828e150372c307
| author | recetox |
|---|---|
| date | Fri, 01 Nov 2024 08:45:59 +0000 |
| parents | 6b0fef8a77c0 |
| children | 964b4559eb1b |
comparison
equal
deleted
inserted
replaced
| 3:6b0fef8a77c0 | 4:2b1118bce0b1 |
|---|---|
| 1 library(enviPat) | 1 library(enviPat) |
| 2 library(Spectra) | 2 library(Spectra) |
| 3 library(MsBackendMsp) | 3 library(MsBackendMsp) |
| 4 library(MetaboCoreUtils) | 4 library(MetaboCoreUtils) |
| 5 library(readr) | 5 library(readr) |
| 6 library(tidyselect) | |
| 6 | 7 |
| 7 #' @param args A list of command line arguments. | 8 |
| 8 main <- function() { | 9 parse_args <- function() { |
| 10 args <- commandArgs(trailingOnly = TRUE) | |
| 11 | |
| 12 compound_table <- read_tsv( | |
| 13 file = args[1], | |
| 14 col_types = "ccd", | |
| 15 col_select = all_of(c("name", "formula")) | any_of("rt") | |
| 16 ) | |
| 17 | |
| 18 parsed <- list( | |
| 19 compound_table = compound_table, | |
| 20 adducts_to_use = c(unlist(strsplit(args[2], ",", fixed = TRUE))), | |
| 21 threshold = as.numeric(args[3]), | |
| 22 append_adducts = args[4], | |
| 23 append_isotopes = args[5], | |
| 24 out_format = args[6], | |
| 25 outfile = args[7] | |
| 26 ) | |
| 27 return(parsed) | |
| 28 } | |
| 29 | |
| 30 generate_isotope_spectra <- function(compound_table, | |
| 31 adducts_to_use, | |
| 32 append_adducts, | |
| 33 threshold) { | |
| 9 data(isotopes) | 34 data(isotopes) |
| 10 data(adducts) | 35 data(adducts) |
| 11 | 36 |
| 12 args <- commandArgs(trailingOnly = TRUE) | 37 monoisotopic <- isotopes |> |
| 13 compound_table <- read_tsv( | 38 dplyr::group_by(element) |> |
| 14 file = args[1], | 39 dplyr::slice_max(abundance, n = 1) |> |
| 15 col_types = "ccd", | 40 dplyr::filter(!stringr::str_detect(element, "\\[|\\]")) |
| 16 col_select = tidyselect::all_of(c("name", "formula")) | tidyselect::any_of("rt") | |
| 17 ) | |
| 18 adducts_to_use <- c(unlist(strsplit(args[2], ",", fixed = TRUE))) | |
| 19 | 41 |
| 20 chemforms <- compound_table$formula | 42 chemforms <- check_chemform(isotopes, compound_table$formula)[, 2] |
| 21 chemforms <- check_chemform(isotopes, chemforms)[, 2] | |
| 22 | |
| 23 spectra <- data.frame() | 43 spectra <- data.frame() |
| 24 | 44 |
| 25 for (current in adducts_to_use) { | 45 for (current in adducts_to_use) { |
| 26 adduct <- adducts[adducts$Name == current, ] | 46 adduct <- adducts[adducts$Name == current, ] |
| 27 multiplied_chemforms <- multiform(chemforms, adduct$Mult) | 47 multiplied_chemforms <- multiform(chemforms, adduct$Mult) |
| 30 merged_chemforms <- subform(multiplied_chemforms, adduct$Formula_ded) | 50 merged_chemforms <- subform(multiplied_chemforms, adduct$Formula_ded) |
| 31 } else { | 51 } else { |
| 32 merged_chemforms <- mergeform(multiplied_chemforms, adduct$Formula_add) | 52 merged_chemforms <- mergeform(multiplied_chemforms, adduct$Formula_add) |
| 33 } | 53 } |
| 34 | 54 |
| 35 charge_string <- paste0(if (adduct$Charge > 0) "+" else "-", if (abs(adduct$Charge) > 1) abs(adduct$Charge) else "") | 55 charge_string <- paste0( |
| 56 if (adduct$Charge > 0) "+" else "-", | |
| 57 if (abs(adduct$Charge) > 1) abs(adduct$Charge) else "" | |
| 58 ) | |
| 36 adduct_string <- paste0("[", adduct$Name, "]", charge_string) | 59 adduct_string <- paste0("[", adduct$Name, "]", charge_string) |
| 37 precursor_mz <- calculateMass(multiplied_chemforms) + adduct$Mass | 60 precursor_mz <- calculateMass(multiplied_chemforms) + adduct$Mass |
| 38 | 61 |
| 39 if (args[4] == TRUE) { | 62 if (append_adducts == TRUE) { |
| 40 names <- paste(compound_table$name, paste0("(", adduct$Name, ")"), sep = " ") | 63 names <- paste( |
| 64 compound_table$name, | |
| 65 paste0("(", adduct$Name, ")"), | |
| 66 sep = " " | |
| 67 ) | |
| 41 } else { | 68 } else { |
| 42 names <- compound_table$name | 69 names <- compound_table$name |
| 43 } | 70 } |
| 44 | 71 |
| 45 spectra_df <- data.frame( | 72 spectra_df <- data.frame( |
| 58 | 85 |
| 59 patterns <- enviPat::isopattern( | 86 patterns <- enviPat::isopattern( |
| 60 isotopes = isotopes, | 87 isotopes = isotopes, |
| 61 chemforms = merged_chemforms, | 88 chemforms = merged_chemforms, |
| 62 charge = adduct$Charge, | 89 charge = adduct$Charge, |
| 63 threshold = as.numeric(args[3]), | 90 threshold = threshold, |
| 64 ) | 91 ) |
| 65 | 92 |
| 66 mzs <- list() | 93 mzs <- list() |
| 67 intensities <- list() | 94 intensities <- list() |
| 95 isos <- list() | |
| 96 | |
| 68 for (i in seq_along(patterns)) { | 97 for (i in seq_along(patterns)) { |
| 69 mzs <- append(mzs, list(patterns[[i]][, 1])) | 98 mzs <- append(mzs, list(patterns[[i]][, 1])) |
| 70 intensities <- append(intensities, list(patterns[[i]][, 2])) | 99 intensities <- append(intensities, list(patterns[[i]][, 2])) |
| 100 | |
| 101 # select all columns which describe the elemental composition | |
| 102 # remove all 12C, 35Cl etc. | |
| 103 # remove isotopes which don't occur | |
| 104 compositions <- as.data.frame(patterns[[i]][, -c(1, 2)]) |> | |
| 105 dplyr::select(-tidyselect::any_of(monoisotopic$isotope)) |> | |
| 106 dplyr::select_if(~ !all(. == 0)) | |
| 107 | |
| 108 # combine elemental composition into single string | |
| 109 compositions <- compositions |> | |
| 110 dplyr::rowwise() |> | |
| 111 dplyr::mutate(isotopes = paste( | |
| 112 purrr::map2_chr( | |
| 113 names(compositions), | |
| 114 dplyr::c_across(everything()), | |
| 115 ~ paste(.x, .y, sep = ":") | |
| 116 ), | |
| 117 collapse = ", " | |
| 118 )) |> | |
| 119 dplyr::ungroup() |> | |
| 120 dplyr::select(isotopes) | |
| 121 isos <- append(isos, list(compositions$isotopes)) | |
| 71 } | 122 } |
| 72 | 123 |
| 73 spectra_df$mz <- mzs | 124 spectra_df$mz <- mzs |
| 74 spectra_df$intensity <- intensities | 125 spectra_df$intensity <- intensities |
| 126 spectra_df$isotopes <- isos | |
| 75 spectra <- rbind(spectra, spectra_df) | 127 spectra <- rbind(spectra, spectra_df) |
| 76 } | 128 } |
| 77 | 129 return(spectra) |
| 78 sps <- Spectra(spectra) | |
| 79 export(sps, MsBackendMsp(), file = args[5]) | |
| 80 } | 130 } |
| 81 | 131 |
| 82 # Get the command line arguments | 132 write_to_msp <- function(spectra, file) { |
| 83 args <- commandArgs(trailingOnly = TRUE) | 133 sps <- Spectra(dplyr::select(spectra, -isotopes)) |
| 134 export(sps, MsBackendMsp(), file = file) | |
| 135 } | |
| 136 | |
| 137 write_to_table <- function(spectra, file, append_isotopes) { | |
| 138 entries <- spectra |> | |
| 139 dplyr::rowwise() |> | |
| 140 dplyr::mutate(peaks = paste(unlist(mz), collapse = ";")) |> | |
| 141 dplyr::mutate(isos = paste(unlist(isotopes), collapse = ";")) | |
| 142 result <- tidyr::separate_longer_delim( | |
| 143 entries, | |
| 144 all_of(c("peaks", "isos")), | |
| 145 ";" | |
| 146 ) | |
| 147 result <- result |> | |
| 148 dplyr::select(-c("mz", "intensity", "isotopes")) |> | |
| 149 dplyr::rename(mz = peaks, isotopes = isos, rt = retention_time) | |
| 150 | |
| 151 if (append_isotopes) { | |
| 152 result <- result |> | |
| 153 dplyr::mutate(result, | |
| 154 full_formula = paste0(formula, " (", isotopes, ")") | |
| 155 ) |> | |
| 156 dplyr::select(-all_of(c("formula", "isotopes"))) |> | |
| 157 dplyr::rename(formula = full_formula) |> | |
| 158 dplyr::relocate(formula, .after = name) | |
| 159 } | |
| 160 readr::write_tsv(result, file = file) | |
| 161 } | |
| 162 | |
| 163 main <- function() { | |
| 164 args <- parse_args() | |
| 165 spectra <- generate_isotope_spectra( | |
| 166 args$compound_table, | |
| 167 args$adducts_to_use, | |
| 168 args$append_adducts, | |
| 169 args$threshold | |
| 170 ) | |
| 171 | |
| 172 if (args$out_format == "msp") { | |
| 173 write_to_msp(spectra, args$outfile) | |
| 174 } else if (args$out_format == "tabular") { | |
| 175 write_to_table(spectra, args$outfile, args$append_isotopes) | |
| 176 } | |
| 177 } | |
| 178 | |
| 84 # Call the main function | 179 # Call the main function |
| 85 main() | 180 main() |
