comparison isolib.R @ 4:2b1118bce0b1 draft default tip

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/isolib commit e21ab1b7f16bc0a58b33b8e46f828e150372c307
author recetox
date Fri, 01 Nov 2024 08:45:59 +0000
parents 6b0fef8a77c0
children
comparison
equal deleted inserted replaced
3:6b0fef8a77c0 4:2b1118bce0b1
1 library(enviPat) 1 library(enviPat)
2 library(Spectra) 2 library(Spectra)
3 library(MsBackendMsp) 3 library(MsBackendMsp)
4 library(MetaboCoreUtils) 4 library(MetaboCoreUtils)
5 library(readr) 5 library(readr)
6 library(tidyselect)
6 7
7 #' @param args A list of command line arguments. 8
8 main <- function() { 9 parse_args <- function() {
10 args <- commandArgs(trailingOnly = TRUE)
11
12 compound_table <- read_tsv(
13 file = args[1],
14 col_types = "ccd",
15 col_select = all_of(c("name", "formula")) | any_of("rt")
16 )
17
18 parsed <- list(
19 compound_table = compound_table,
20 adducts_to_use = c(unlist(strsplit(args[2], ",", fixed = TRUE))),
21 threshold = as.numeric(args[3]),
22 append_adducts = args[4],
23 append_isotopes = args[5],
24 out_format = args[6],
25 outfile = args[7]
26 )
27 return(parsed)
28 }
29
30 generate_isotope_spectra <- function(compound_table,
31 adducts_to_use,
32 append_adducts,
33 threshold) {
9 data(isotopes) 34 data(isotopes)
10 data(adducts) 35 data(adducts)
11 36
12 args <- commandArgs(trailingOnly = TRUE) 37 monoisotopic <- isotopes |>
13 compound_table <- read_tsv( 38 dplyr::group_by(element) |>
14 file = args[1], 39 dplyr::slice_max(abundance, n = 1) |>
15 col_types = "ccd", 40 dplyr::filter(!stringr::str_detect(element, "\\[|\\]"))
16 col_select = tidyselect::all_of(c("name", "formula")) | tidyselect::any_of("rt")
17 )
18 adducts_to_use <- c(unlist(strsplit(args[2], ",", fixed = TRUE)))
19 41
20 chemforms <- compound_table$formula 42 chemforms <- check_chemform(isotopes, compound_table$formula)[, 2]
21 chemforms <- check_chemform(isotopes, chemforms)[, 2]
22
23 spectra <- data.frame() 43 spectra <- data.frame()
24 44
25 for (current in adducts_to_use) { 45 for (current in adducts_to_use) {
26 adduct <- adducts[adducts$Name == current, ] 46 adduct <- adducts[adducts$Name == current, ]
27 multiplied_chemforms <- multiform(chemforms, adduct$Mult) 47 multiplied_chemforms <- multiform(chemforms, adduct$Mult)
30 merged_chemforms <- subform(multiplied_chemforms, adduct$Formula_ded) 50 merged_chemforms <- subform(multiplied_chemforms, adduct$Formula_ded)
31 } else { 51 } else {
32 merged_chemforms <- mergeform(multiplied_chemforms, adduct$Formula_add) 52 merged_chemforms <- mergeform(multiplied_chemforms, adduct$Formula_add)
33 } 53 }
34 54
35 charge_string <- paste0(if (adduct$Charge > 0) "+" else "-", if (abs(adduct$Charge) > 1) abs(adduct$Charge) else "") 55 charge_string <- paste0(
56 if (adduct$Charge > 0) "+" else "-",
57 if (abs(adduct$Charge) > 1) abs(adduct$Charge) else ""
58 )
36 adduct_string <- paste0("[", adduct$Name, "]", charge_string) 59 adduct_string <- paste0("[", adduct$Name, "]", charge_string)
37 precursor_mz <- calculateMass(multiplied_chemforms) + adduct$Mass 60 precursor_mz <- calculateMass(multiplied_chemforms) + adduct$Mass
38 61
39 if (args[4] == TRUE) { 62 if (append_adducts == TRUE) {
40 names <- paste(compound_table$name, paste0("(", adduct$Name, ")"), sep = " ") 63 names <- paste(
64 compound_table$name,
65 paste0("(", adduct$Name, ")"),
66 sep = " "
67 )
41 } else { 68 } else {
42 names <- compound_table$name 69 names <- compound_table$name
43 } 70 }
44 71
45 spectra_df <- data.frame( 72 spectra_df <- data.frame(
58 85
59 patterns <- enviPat::isopattern( 86 patterns <- enviPat::isopattern(
60 isotopes = isotopes, 87 isotopes = isotopes,
61 chemforms = merged_chemforms, 88 chemforms = merged_chemforms,
62 charge = adduct$Charge, 89 charge = adduct$Charge,
63 threshold = as.numeric(args[3]), 90 threshold = threshold,
64 ) 91 )
65 92
66 mzs <- list() 93 mzs <- list()
67 intensities <- list() 94 intensities <- list()
95 isos <- list()
96
68 for (i in seq_along(patterns)) { 97 for (i in seq_along(patterns)) {
69 mzs <- append(mzs, list(patterns[[i]][, 1])) 98 mzs <- append(mzs, list(patterns[[i]][, 1]))
70 intensities <- append(intensities, list(patterns[[i]][, 2])) 99 intensities <- append(intensities, list(patterns[[i]][, 2]))
100
101 # select all columns which describe the elemental composition
102 # remove all 12C, 35Cl etc.
103 # remove isotopes which don't occur
104 compositions <- as.data.frame(patterns[[i]][, -c(1, 2)]) |>
105 dplyr::select(-tidyselect::any_of(monoisotopic$isotope)) |>
106 dplyr::select_if(~ !all(. == 0))
107
108 # combine elemental composition into single string
109 compositions <- compositions |>
110 dplyr::rowwise() |>
111 dplyr::mutate(isotopes = paste(
112 purrr::map2_chr(
113 names(compositions),
114 dplyr::c_across(everything()),
115 ~ paste(.x, .y, sep = ":")
116 ),
117 collapse = ", "
118 )) |>
119 dplyr::ungroup() |>
120 dplyr::select(isotopes)
121 isos <- append(isos, list(compositions$isotopes))
71 } 122 }
72 123
73 spectra_df$mz <- mzs 124 spectra_df$mz <- mzs
74 spectra_df$intensity <- intensities 125 spectra_df$intensity <- intensities
126 spectra_df$isotopes <- isos
75 spectra <- rbind(spectra, spectra_df) 127 spectra <- rbind(spectra, spectra_df)
76 } 128 }
77 129 return(spectra)
78 sps <- Spectra(spectra)
79 export(sps, MsBackendMsp(), file = args[5])
80 } 130 }
81 131
82 # Get the command line arguments 132 write_to_msp <- function(spectra, file) {
83 args <- commandArgs(trailingOnly = TRUE) 133 sps <- Spectra(dplyr::select(spectra, -isotopes))
134 export(sps, MsBackendMsp(), file = file)
135 }
136
137 write_to_table <- function(spectra, file, append_isotopes) {
138 entries <- spectra |>
139 dplyr::rowwise() |>
140 dplyr::mutate(peaks = paste(unlist(mz), collapse = ";")) |>
141 dplyr::mutate(isos = paste(unlist(isotopes), collapse = ";"))
142 result <- tidyr::separate_longer_delim(
143 entries,
144 all_of(c("peaks", "isos")),
145 ";"
146 )
147 result <- result |>
148 dplyr::select(-c("mz", "intensity", "isotopes")) |>
149 dplyr::rename(mz = peaks, isotopes = isos, rt = retention_time)
150
151 if (append_isotopes) {
152 result <- result |>
153 dplyr::mutate(result,
154 full_formula = paste0(formula, " (", isotopes, ")")
155 ) |>
156 dplyr::select(-all_of(c("formula", "isotopes"))) |>
157 dplyr::rename(formula = full_formula) |>
158 dplyr::relocate(formula, .after = name)
159 }
160 readr::write_tsv(result, file = file)
161 }
162
163 main <- function() {
164 args <- parse_args()
165 spectra <- generate_isotope_spectra(
166 args$compound_table,
167 args$adducts_to_use,
168 args$append_adducts,
169 args$threshold
170 )
171
172 if (args$out_format == "msp") {
173 write_to_msp(spectra, args$outfile)
174 } else if (args$out_format == "tabular") {
175 write_to_table(spectra, args$outfile, args$append_isotopes)
176 }
177 }
178
84 # Call the main function 179 # Call the main function
85 main() 180 main()