diff isolib.R @ 4:2b1118bce0b1 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/isolib commit e21ab1b7f16bc0a58b33b8e46f828e150372c307
author recetox
date Fri, 01 Nov 2024 08:45:59 +0000
parents 6b0fef8a77c0
children 964b4559eb1b
line wrap: on
line diff
--- a/isolib.R	Thu May 30 14:52:02 2024 +0000
+++ b/isolib.R	Fri Nov 01 08:45:59 2024 +0000
@@ -3,23 +3,43 @@
 library(MsBackendMsp)
 library(MetaboCoreUtils)
 library(readr)
+library(tidyselect)
 
-#' @param args A list of command line arguments.
-main <- function() {
+
+parse_args <- function() {
+    args <- commandArgs(trailingOnly = TRUE)
+
+    compound_table <- read_tsv(
+        file = args[1],
+        col_types = "ccd",
+        col_select = all_of(c("name", "formula")) | any_of("rt")
+    )
+
+    parsed <- list(
+        compound_table = compound_table,
+        adducts_to_use = c(unlist(strsplit(args[2], ",", fixed = TRUE))),
+        threshold = as.numeric(args[3]),
+        append_adducts = args[4],
+        append_isotopes = args[5],
+        out_format = args[6],
+        outfile = args[7]
+    )
+    return(parsed)
+}
+
+generate_isotope_spectra <- function(compound_table,
+                                     adducts_to_use,
+                                     append_adducts,
+                                     threshold) {
     data(isotopes)
     data(adducts)
 
-    args <- commandArgs(trailingOnly = TRUE)
-    compound_table <- read_tsv(
-        file = args[1],
-        col_types = "ccd",
-        col_select = tidyselect::all_of(c("name", "formula")) | tidyselect::any_of("rt")
-    )
-    adducts_to_use <- c(unlist(strsplit(args[2], ",", fixed = TRUE)))
+    monoisotopic <- isotopes |>
+        dplyr::group_by(element) |>
+        dplyr::slice_max(abundance, n = 1) |>
+        dplyr::filter(!stringr::str_detect(element, "\\[|\\]"))
 
-    chemforms <- compound_table$formula
-    chemforms <- check_chemform(isotopes, chemforms)[, 2]
-
+    chemforms <- check_chemform(isotopes, compound_table$formula)[, 2]
     spectra <- data.frame()
 
     for (current in adducts_to_use) {
@@ -32,12 +52,19 @@
             merged_chemforms <- mergeform(multiplied_chemforms, adduct$Formula_add)
         }
 
-        charge_string <- paste0(if (adduct$Charge > 0) "+" else "-", if (abs(adduct$Charge) > 1) abs(adduct$Charge) else "")
+        charge_string <- paste0(
+            if (adduct$Charge > 0) "+" else "-",
+            if (abs(adduct$Charge) > 1) abs(adduct$Charge) else ""
+        )
         adduct_string <- paste0("[", adduct$Name, "]", charge_string)
         precursor_mz <- calculateMass(multiplied_chemforms) + adduct$Mass
 
-        if (args[4] == TRUE) {
-            names <- paste(compound_table$name, paste0("(", adduct$Name, ")"), sep = " ")
+        if (append_adducts == TRUE) {
+            names <- paste(
+                compound_table$name,
+                paste0("(", adduct$Name, ")"),
+                sep = " "
+            )
         } else {
             names <- compound_table$name
         }
@@ -60,26 +87,94 @@
             isotopes = isotopes,
             chemforms = merged_chemforms,
             charge = adduct$Charge,
-            threshold = as.numeric(args[3]),
+            threshold = threshold,
         )
 
         mzs <- list()
         intensities <- list()
+        isos <- list()
+
         for (i in seq_along(patterns)) {
             mzs <- append(mzs, list(patterns[[i]][, 1]))
             intensities <- append(intensities, list(patterns[[i]][, 2]))
+
+            # select all columns which describe the elemental composition
+            # remove all 12C, 35Cl etc.
+            # remove isotopes which don't occur
+            compositions <- as.data.frame(patterns[[i]][, -c(1, 2)]) |>
+                dplyr::select(-tidyselect::any_of(monoisotopic$isotope)) |>
+                dplyr::select_if(~ !all(. == 0))
+
+            # combine elemental composition into single string
+            compositions <- compositions |>
+                dplyr::rowwise() |>
+                dplyr::mutate(isotopes = paste(
+                    purrr::map2_chr(
+                        names(compositions),
+                        dplyr::c_across(everything()),
+                        ~ paste(.x, .y, sep = ":")
+                    ),
+                    collapse = ", "
+                )) |>
+                dplyr::ungroup() |>
+                dplyr::select(isotopes)
+            isos <- append(isos, list(compositions$isotopes))
         }
 
         spectra_df$mz <- mzs
         spectra_df$intensity <- intensities
+        spectra_df$isotopes <- isos
         spectra <- rbind(spectra, spectra_df)
     }
+    return(spectra)
+}
 
-    sps <- Spectra(spectra)
-    export(sps, MsBackendMsp(), file = args[5])
+write_to_msp <- function(spectra, file) {
+    sps <- Spectra(dplyr::select(spectra, -isotopes))
+    export(sps, MsBackendMsp(), file = file)
 }
 
-# Get the command line arguments
-args <- commandArgs(trailingOnly = TRUE)
+write_to_table <- function(spectra, file, append_isotopes) {
+    entries <- spectra |>
+        dplyr::rowwise() |>
+        dplyr::mutate(peaks = paste(unlist(mz), collapse = ";")) |>
+        dplyr::mutate(isos = paste(unlist(isotopes), collapse = ";"))
+    result <- tidyr::separate_longer_delim(
+        entries,
+        all_of(c("peaks", "isos")),
+        ";"
+    )
+    result <- result |>
+        dplyr::select(-c("mz", "intensity", "isotopes")) |>
+        dplyr::rename(mz = peaks, isotopes = isos, rt = retention_time)
+
+    if (append_isotopes) {
+        result <- result |>
+            dplyr::mutate(result,
+                full_formula = paste0(formula, " (", isotopes, ")")
+            ) |>
+            dplyr::select(-all_of(c("formula", "isotopes"))) |>
+            dplyr::rename(formula = full_formula) |>
+            dplyr::relocate(formula, .after = name)
+    }
+    readr::write_tsv(result, file = file)
+}
+
+main <- function() {
+    args <- parse_args()
+    spectra <- generate_isotope_spectra(
+        args$compound_table,
+        args$adducts_to_use,
+        args$append_adducts,
+        args$threshold
+    )
+
+    if (args$out_format == "msp") {
+        write_to_msp(spectra, args$outfile)
+    } else if (args$out_format == "tabular") {
+        write_to_table(spectra, args$outfile, args$append_isotopes)
+    }
+}
+
 # Call the main function
 main()