# HG changeset patch # User recetox # Date 1687787756 0 # Node ID cfd2e19f00a9b33b7ea400a7bcc67555b863e4f8 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox-xmsannotator commit 1ab1a1dabfcebe11720de1411927a7438c1b64c1 diff -r 000000000000 -r cfd2e19f00a9 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Jun 26 13:55:56 2023 +0000 @@ -0,0 +1,144 @@ + +0.10.0 + + + + r-recetox-xmsannotator + + + + + + + + + + + + + + + + + + + + + + + + compound_id, monoisotopic_mass, and molecular_formula. + ]]> + + + + adduct, charge, mass, and n_molecules. + ]]> + + + + + A weight-by-adduct table. + + + + + + + + + + + + + + + + Mass tolerance in ppm for database matching. + + + + + +Description +=========== + +Annotate the peak intensity table (e.g. from an apLCMS run) with compounds from the compounds database +using advanced methods. + +The annotation process generates all possible compound-adduct pairs and matches those pairs to the measured +peaks. A compound-adduct pair is pronounced as a match to a certain peak when the difference of their masses are +withing some tolerance. + +Then, a score and a confidence level is assigned to each match based on peak correlation +clustering, metabolite pathway associations, adducts expectations, and isotope conformations. + +Input tables description +------------------------ + +(*) Metadata table +~~~~~~~~~~~~~~~~~~ + +The output from recetox-aplcms tool. +The `npeaks` column denotes the number of peaks which have been grouped into this feature. +The columns with the sample names indicate whether this feature is present in the sample. +Only id, mz, and rt columns are required to be present. + ++-------+--------------+--------------+---------------+----------------+---------------+---------------+-----------+------------------------+------------------------+------------------------+ +| id | mz | mzmin | mzmax | rt | rtmin | rtmax | npeaks | 21_qc_no_dil_milliq | 29_qc_no_dil_milliq | 8_qc_no_dil_milliq | ++=======+==============+==============+===============+================+===============+===============+===========+========================+========================+========================+ +| 1 | 70.03707021 | 70.037066 | 70.0370750 | 294.1038014 | 294.0634942 | 294.149985 | 3 | 1 | 1 | 1 | ++-------+--------------+--------------+---------------+----------------+---------------+---------------+-----------+------------------------+------------------------+------------------------+ +| 2 | 70.06505677 | 70.065045 | 70.0650676 | 141.9560055 | 140.5762528 | 143.335758 | 2 | 1 | 0 | 1 | ++-------+--------------+--------------+---------------+----------------+---------------+---------------+-----------+------------------------+------------------------+------------------------+ +| 57 | 78.04643252 | 78.046429 | 78.0464325 | 294.0063397 | 293.9406777 | 294.072001 | 2 | 1 | 1 | 0 | ++-------+--------------+--------------+---------------+----------------+---------------+---------------+-----------+------------------------+------------------------+------------------------+ +| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ++-------+--------------+--------------+---------------+----------------+---------------+---------------+-----------+------------------------+------------------------+------------------------+ + +(**) Intensity table +~~~~~~~~~~~~~~~~~~~~ + +The output from recetox-aplcms tool. +This table contains the peak area for aligned features in all samples. + ++-------+------------------------+------------------------+------------------------+ +| id | 21_qc_no_dil_milliq | 29_qc_no_dil_milliq | 8_qc_no_dil_milliq | ++=======+========================+========================+========================+ +| 1 | 13187487.20482895 | 7957395.699119729 | 11700594.397257797 | ++-------+------------------------+------------------------+------------------------+ +| 2 | 2075168.6398983458 | 0 | 2574362.159289044 | ++-------+------------------------+------------------------+------------------------+ +| 57 | 2934524.4406785755 | 1333044.5065971944 | 0 | ++-------+------------------------+------------------------+------------------------+ +| ... | ... | ... | ... | ++-------+------------------------+------------------------+------------------------+ + + + + 10.1021/acs.analchem.6b01214 + + diff -r 000000000000 -r cfd2e19f00a9 recetox_xmsannotator_advanced.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/recetox_xmsannotator_advanced.xml Mon Jun 26 13:55:56 2023 +0000 @@ -0,0 +1,169 @@ + + + annotate peak intensity table including scores and confidence levels + + macros.xml + + + + recetox-xmsannotator + + + + + + + + + + + + + + + Retention time tolerance in seconds for finding peaks derived from the same parent compound. + + + +
+ + + Correlation threshold between peaks to qualify as adducts/isotopes of the same metabolite. + + + + The minimum number of nodes to be considered as a cluster. + + + + + Deep split provides a rough control over sensitivity to cluster splitting. The higher the value, + the more and smaller clusters will be produced (see WGCNA package documentation). + + + + + + Network type parameter affects how the network's adjacency matrix is created from the correlation + matrix (see WGCNA package documentation). + + + + +
+
+ + + + Boost the scores of metabolites that not only belongs to the same pathway but also to the same + cluster. Otherwise, do not account for cluster membership. + + + + + + Minimum number of adducts/isotopes to be present for a match to be considered as a high confidence match. + + + + + + Maximum number of adducts/isotopes to be present for a match to be considered as a high confidence match. + + + + + Whether to filter out low-scored multiple matcher or not. + +
+ + + A numeric threshold by which an intensity ratio of two isotopic peaks may differ from their actual abundance ratio. + + + + Maximum difference in mass defect between two peaks of the same compound. + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
diff -r 000000000000 -r cfd2e19f00a9 utils.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils.R Mon Jun 26 13:55:56 2023 +0000 @@ -0,0 +1,37 @@ +library(recetox.xmsannotator) +library(dplyr) + +load_table <- function(filename, filetype) { + if (filename == "None") { + return(NULL) + } + if (filetype == "csv") { + return(as.data.frame(read.csv(filename))) + } else { + return(as.data.frame(arrow::read_parquet(filename))) + } +} + +save_table <- function(table, filename, filetype) { + if (filetype == "csv") { + write.csv(table, filename, row.names = FALSE) + } else { + arrow::write_parquet(table, filename) + } +} + +create_filter_by_adducts <- function(comma_separated_values) { + if (comma_separated_values == "None") { + return(NA) + } + filter_by <- strsplit(trimws(comma_separated_values), ",")[[1]] + return(filter_by) +} + +create_peak_table <- function(metadata_table, intensity_table) { + metadata_table <- select(metadata_table, id, mz, rt) + peak_table <- inner_join(metadata_table, intensity_table, by = "id") + peak_table <- rename(peak_table, peak = id) + peak_table$peak <- as.integer(peak_table$peak) + return(peak_table) +}