Mercurial > repos > recetox > isolib
comparison isolib.R @ 4:2b1118bce0b1 draft default tip
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/isolib commit e21ab1b7f16bc0a58b33b8e46f828e150372c307
author | recetox |
---|---|
date | Fri, 01 Nov 2024 08:45:59 +0000 |
parents | 6b0fef8a77c0 |
children |
comparison
equal
deleted
inserted
replaced
3:6b0fef8a77c0 | 4:2b1118bce0b1 |
---|---|
1 library(enviPat) | 1 library(enviPat) |
2 library(Spectra) | 2 library(Spectra) |
3 library(MsBackendMsp) | 3 library(MsBackendMsp) |
4 library(MetaboCoreUtils) | 4 library(MetaboCoreUtils) |
5 library(readr) | 5 library(readr) |
6 library(tidyselect) | |
6 | 7 |
7 #' @param args A list of command line arguments. | 8 |
8 main <- function() { | 9 parse_args <- function() { |
10 args <- commandArgs(trailingOnly = TRUE) | |
11 | |
12 compound_table <- read_tsv( | |
13 file = args[1], | |
14 col_types = "ccd", | |
15 col_select = all_of(c("name", "formula")) | any_of("rt") | |
16 ) | |
17 | |
18 parsed <- list( | |
19 compound_table = compound_table, | |
20 adducts_to_use = c(unlist(strsplit(args[2], ",", fixed = TRUE))), | |
21 threshold = as.numeric(args[3]), | |
22 append_adducts = args[4], | |
23 append_isotopes = args[5], | |
24 out_format = args[6], | |
25 outfile = args[7] | |
26 ) | |
27 return(parsed) | |
28 } | |
29 | |
30 generate_isotope_spectra <- function(compound_table, | |
31 adducts_to_use, | |
32 append_adducts, | |
33 threshold) { | |
9 data(isotopes) | 34 data(isotopes) |
10 data(adducts) | 35 data(adducts) |
11 | 36 |
12 args <- commandArgs(trailingOnly = TRUE) | 37 monoisotopic <- isotopes |> |
13 compound_table <- read_tsv( | 38 dplyr::group_by(element) |> |
14 file = args[1], | 39 dplyr::slice_max(abundance, n = 1) |> |
15 col_types = "ccd", | 40 dplyr::filter(!stringr::str_detect(element, "\\[|\\]")) |
16 col_select = tidyselect::all_of(c("name", "formula")) | tidyselect::any_of("rt") | |
17 ) | |
18 adducts_to_use <- c(unlist(strsplit(args[2], ",", fixed = TRUE))) | |
19 | 41 |
20 chemforms <- compound_table$formula | 42 chemforms <- check_chemform(isotopes, compound_table$formula)[, 2] |
21 chemforms <- check_chemform(isotopes, chemforms)[, 2] | |
22 | |
23 spectra <- data.frame() | 43 spectra <- data.frame() |
24 | 44 |
25 for (current in adducts_to_use) { | 45 for (current in adducts_to_use) { |
26 adduct <- adducts[adducts$Name == current, ] | 46 adduct <- adducts[adducts$Name == current, ] |
27 multiplied_chemforms <- multiform(chemforms, adduct$Mult) | 47 multiplied_chemforms <- multiform(chemforms, adduct$Mult) |
30 merged_chemforms <- subform(multiplied_chemforms, adduct$Formula_ded) | 50 merged_chemforms <- subform(multiplied_chemforms, adduct$Formula_ded) |
31 } else { | 51 } else { |
32 merged_chemforms <- mergeform(multiplied_chemforms, adduct$Formula_add) | 52 merged_chemforms <- mergeform(multiplied_chemforms, adduct$Formula_add) |
33 } | 53 } |
34 | 54 |
35 charge_string <- paste0(if (adduct$Charge > 0) "+" else "-", if (abs(adduct$Charge) > 1) abs(adduct$Charge) else "") | 55 charge_string <- paste0( |
56 if (adduct$Charge > 0) "+" else "-", | |
57 if (abs(adduct$Charge) > 1) abs(adduct$Charge) else "" | |
58 ) | |
36 adduct_string <- paste0("[", adduct$Name, "]", charge_string) | 59 adduct_string <- paste0("[", adduct$Name, "]", charge_string) |
37 precursor_mz <- calculateMass(multiplied_chemforms) + adduct$Mass | 60 precursor_mz <- calculateMass(multiplied_chemforms) + adduct$Mass |
38 | 61 |
39 if (args[4] == TRUE) { | 62 if (append_adducts == TRUE) { |
40 names <- paste(compound_table$name, paste0("(", adduct$Name, ")"), sep = " ") | 63 names <- paste( |
64 compound_table$name, | |
65 paste0("(", adduct$Name, ")"), | |
66 sep = " " | |
67 ) | |
41 } else { | 68 } else { |
42 names <- compound_table$name | 69 names <- compound_table$name |
43 } | 70 } |
44 | 71 |
45 spectra_df <- data.frame( | 72 spectra_df <- data.frame( |
58 | 85 |
59 patterns <- enviPat::isopattern( | 86 patterns <- enviPat::isopattern( |
60 isotopes = isotopes, | 87 isotopes = isotopes, |
61 chemforms = merged_chemforms, | 88 chemforms = merged_chemforms, |
62 charge = adduct$Charge, | 89 charge = adduct$Charge, |
63 threshold = as.numeric(args[3]), | 90 threshold = threshold, |
64 ) | 91 ) |
65 | 92 |
66 mzs <- list() | 93 mzs <- list() |
67 intensities <- list() | 94 intensities <- list() |
95 isos <- list() | |
96 | |
68 for (i in seq_along(patterns)) { | 97 for (i in seq_along(patterns)) { |
69 mzs <- append(mzs, list(patterns[[i]][, 1])) | 98 mzs <- append(mzs, list(patterns[[i]][, 1])) |
70 intensities <- append(intensities, list(patterns[[i]][, 2])) | 99 intensities <- append(intensities, list(patterns[[i]][, 2])) |
100 | |
101 # select all columns which describe the elemental composition | |
102 # remove all 12C, 35Cl etc. | |
103 # remove isotopes which don't occur | |
104 compositions <- as.data.frame(patterns[[i]][, -c(1, 2)]) |> | |
105 dplyr::select(-tidyselect::any_of(monoisotopic$isotope)) |> | |
106 dplyr::select_if(~ !all(. == 0)) | |
107 | |
108 # combine elemental composition into single string | |
109 compositions <- compositions |> | |
110 dplyr::rowwise() |> | |
111 dplyr::mutate(isotopes = paste( | |
112 purrr::map2_chr( | |
113 names(compositions), | |
114 dplyr::c_across(everything()), | |
115 ~ paste(.x, .y, sep = ":") | |
116 ), | |
117 collapse = ", " | |
118 )) |> | |
119 dplyr::ungroup() |> | |
120 dplyr::select(isotopes) | |
121 isos <- append(isos, list(compositions$isotopes)) | |
71 } | 122 } |
72 | 123 |
73 spectra_df$mz <- mzs | 124 spectra_df$mz <- mzs |
74 spectra_df$intensity <- intensities | 125 spectra_df$intensity <- intensities |
126 spectra_df$isotopes <- isos | |
75 spectra <- rbind(spectra, spectra_df) | 127 spectra <- rbind(spectra, spectra_df) |
76 } | 128 } |
77 | 129 return(spectra) |
78 sps <- Spectra(spectra) | |
79 export(sps, MsBackendMsp(), file = args[5]) | |
80 } | 130 } |
81 | 131 |
82 # Get the command line arguments | 132 write_to_msp <- function(spectra, file) { |
83 args <- commandArgs(trailingOnly = TRUE) | 133 sps <- Spectra(dplyr::select(spectra, -isotopes)) |
134 export(sps, MsBackendMsp(), file = file) | |
135 } | |
136 | |
137 write_to_table <- function(spectra, file, append_isotopes) { | |
138 entries <- spectra |> | |
139 dplyr::rowwise() |> | |
140 dplyr::mutate(peaks = paste(unlist(mz), collapse = ";")) |> | |
141 dplyr::mutate(isos = paste(unlist(isotopes), collapse = ";")) | |
142 result <- tidyr::separate_longer_delim( | |
143 entries, | |
144 all_of(c("peaks", "isos")), | |
145 ";" | |
146 ) | |
147 result <- result |> | |
148 dplyr::select(-c("mz", "intensity", "isotopes")) |> | |
149 dplyr::rename(mz = peaks, isotopes = isos, rt = retention_time) | |
150 | |
151 if (append_isotopes) { | |
152 result <- result |> | |
153 dplyr::mutate(result, | |
154 full_formula = paste0(formula, " (", isotopes, ")") | |
155 ) |> | |
156 dplyr::select(-all_of(c("formula", "isotopes"))) |> | |
157 dplyr::rename(formula = full_formula) |> | |
158 dplyr::relocate(formula, .after = name) | |
159 } | |
160 readr::write_tsv(result, file = file) | |
161 } | |
162 | |
163 main <- function() { | |
164 args <- parse_args() | |
165 spectra <- generate_isotope_spectra( | |
166 args$compound_table, | |
167 args$adducts_to_use, | |
168 args$append_adducts, | |
169 args$threshold | |
170 ) | |
171 | |
172 if (args$out_format == "msp") { | |
173 write_to_msp(spectra, args$outfile) | |
174 } else if (args$out_format == "tabular") { | |
175 write_to_table(spectra, args$outfile, args$append_isotopes) | |
176 } | |
177 } | |
178 | |
84 # Call the main function | 179 # Call the main function |
85 main() | 180 main() |