Repository 'mspurity_createdatabase'
hg clone https://toolshed.g2.bx.psu.edu/repos/computational-metabolomics/mspurity_createdatabase

Changeset 8:efd14b326007 (2024-06-12)
Previous changeset 7:0cc6b67dccb8 (2022-02-08) Next changeset 9:751b7378a683 (2024-06-13)
Commit message:
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 7e1748612a9f9dce11a9e54ff36752b600e7aea3
modified:
README.rst
averageFragSpectra.R
combineAnnotations.R
createDatabase.R
createMSP.R
dimsPredictPuritySingle.R
filterFragSpectra.R
flagRemove.R
frag4feature.R
macros.xml
purityA.R
purityX.R
test-data/combineAnnotations_combined_annotations.sqlite
test-data/combineAnnotations_combined_annotations.tsv
test-data/purityX_output.tsv
added:
test-data/purityX_output.RData
removed:
test-data/purityX_output.Rdata
b
diff -r 0cc6b67dccb8 -r efd14b326007 README.rst
--- a/README.rst Tue Feb 08 13:55:12 2022 +0000
+++ b/README.rst Wed Jun 12 16:05:01 2024 +0000
b
@@ -1,15 +1,15 @@
 msPurity for Galaxy
 ========================
-|Build Status (Travis)| |Git| |Bioconda| |License|
+|Git| |Bioconda| |License|
 
 
-Version v1.16.2+galaxy1
+Version v1.16.2+galaxy2
 ------------------------
 
   - msPurity
      - bioconductor-mspurity v1.16.2
   - Galaxy tools
-     - v1
+     - v2
 
 About
 ------
@@ -54,6 +54,13 @@
 
 Changes
 -------------------------
+v1.16.2-galaxy2 
+  - Fix for purityX galaxy tool (https://github.com/computational-metabolomics/mspurity-galaxy/issues/53) 
+  - Cleanup of xml based on updated lint requirements
+  - Fix combineAnnotation tests
+  - Cleanup of repository folders
+  - github actions temp update (lintr removed)
+
 v1.16.2-galaxy1
   - Fix for "scan" option for spectral matching
   - Add allfrag option for filterFragSpectra
b
diff -r 0cc6b67dccb8 -r efd14b326007 averageFragSpectra.R
--- a/averageFragSpectra.R Tue Feb 08 13:55:12 2022 +0000
+++ b/averageFragSpectra.R Wed Jun 12 16:05:01 2024 +0000
[
@@ -5,30 +5,28 @@
 
 
 get_av_spectra <- function(x) {
-
   if (length(x$av_intra) > 0) {
     av_intra_df <- plyr::ldply(x$av_intra)
 
     if (nrow(av_intra_df) == 0) {
       av_intra_df <- NULL
-    }else{
+    } else {
       av_intra_df$method <- "intra"
     }
-
-  }else{
+  } else {
     av_intra_df <- NULL
   }
 
   if ((is.null(x$av_inter)) || (nrow(x$av_inter) == 0)) {
     av_inter_df <- NULL
-  }else{
+  } else {
     av_inter_df <- x$av_inter
     av_inter_df$method <- "inter"
   }
 
   if ((is.null(x$av_all)) || (nrow(x$av_all) == 0)) {
     av_all_df <- NULL
-  }else{
+  } else {
     av_all_df <- x$av_all
     av_all_df$method <- "all"
   }
@@ -60,9 +58,9 @@
 
 
 load_r_data <- function(rdata_path, name) {
-    #loads an RData file, and returns the named xset object if it is there
-    load(rdata_path)
-    return(get(ls()[ls() %in% name]))
+  # loads an RData file, and returns the named xset object if it is there
+  load(rdata_path)
+  return(get(ls()[ls() %in% name]))
 }
 
 # Requires
@@ -72,59 +70,58 @@
 
 if (is.null(opt$rmp)) {
   rmp <- FALSE
-}else{
+} else {
   rmp <- TRUE
 }
 
 if (is.null(opt$sumi)) {
   sumi <- FALSE
-}else{
+} else {
   sumi <- TRUE
 }
 
 if (opt$av_level == "intra") {
   pa <- msPurity::averageIntraFragSpectra(pa,
-                                          minfrac = opt$minfrac,
-                                          minnum = opt$minnum,
-                                          ppm = opt$ppm,
-                                          snr = opt$snr,
-                                          ra = opt$ra,
-                                          av = opt$av,
-                                          sumi = sumi,
-                                          rmp = rmp,
-                                          cores = opt$cores)
-
+    minfrac = opt$minfrac,
+    minnum = opt$minnum,
+    ppm = opt$ppm,
+    snr = opt$snr,
+    ra = opt$ra,
+    av = opt$av,
+    sumi = sumi,
+    rmp = rmp,
+    cores = opt$cores
+  )
 } else if (opt$av_level == "inter") {
-
   pa <- msPurity::averageInterFragSpectra(pa,
-                                          minfrac = opt$minfrac,
-                                          minnum = opt$minnum,
-                                          ppm = opt$ppm,
-                                          snr = opt$snr,
-                                          ra = opt$ra,
-                                          av = opt$av,
-                                          sumi = sumi,
-                                          rmp = rmp,
-                                          cores = opt$cores)
+    minfrac = opt$minfrac,
+    minnum = opt$minnum,
+    ppm = opt$ppm,
+    snr = opt$snr,
+    ra = opt$ra,
+    av = opt$av,
+    sumi = sumi,
+    rmp = rmp,
+    cores = opt$cores
+  )
 } else if (opt$av_level == "all") {
-
   pa <- msPurity::averageAllFragSpectra(pa,
-                                        minfrac = opt$minfrac,
-                                        minnum = opt$minnum,
-                                        ppm = opt$ppm,
-                                        snr = opt$snr,
-                                        ra = opt$ra,
-                                        av = opt$av,
-                                        sumi = sumi,
-                                        rmp = rmp,
-                                        cores = opt$cores)
+    minfrac = opt$minfrac,
+    minnum = opt$minnum,
+    ppm = opt$ppm,
+    snr = opt$snr,
+    ra = opt$ra,
+    av = opt$av,
+    sumi = sumi,
+    rmp = rmp,
+    cores = opt$cores
+  )
 }
 
 print(pa)
 save(pa, file = opt$out_rdata)
 
 if (length(pa) > 0) {
-
   av_spectra <- plyr::ldply(pa@av_spectra, get_av_spectra)
 
   if (nrow(av_spectra) == 0) {
@@ -138,18 +135,20 @@
       colnames(av_spectra)[2] <- "fileid"
       av_spectra$avid <- seq_len(nrow(av_spectra))
 
-      filenames <- sapply(av_spectra$fileid,
-                          function(x) names(pa@fileList)[as.integer(x)])
+      filenames <- sapply(
+        av_spectra$fileid,
+        function(x) names(pa@fileList)[as.integer(x)]
+      )
       # filenames_galaxy <- sapply(
       #    av_spectra$fileid, function(x) basename(pa@fileList[as.integer(x)]))
 
-        av_spectra <- as.data.frame(
-          append(av_spectra, list(filename = filenames), after = 2))
+      av_spectra <- as.data.frame(
+        append(av_spectra, list(filename = filenames), after = 2)
+      )
     }
 
 
     print(head(av_spectra))
     write.table(av_spectra, opt$out_peaklist, row.names = FALSE, sep = "\t")
-
   }
 }
b
diff -r 0cc6b67dccb8 -r efd14b326007 combineAnnotations.R
--- a/combineAnnotations.R Tue Feb 08 13:55:12 2022 +0000
+++ b/combineAnnotations.R Wed Jun 12 16:05:01 2024 +0000
[
@@ -9,23 +9,18 @@
   make_option(c("-c", "--sirius_csi_resultPth"), type = "character"),
   make_option(c("-p", "--probmetab_resultPth"), type = "character"),
   make_option(c("-l", "--ms1_lookup_resultPth"), type = "character"),
-
   make_option("--ms1_lookup_checkAdducts", action = "store_true"),
   make_option("--ms1_lookup_keepAdducts", type = "character", default = NA),
   make_option("--ms1_lookup_dbSource", type = "character", default = "hmdb"),
-
   make_option("--sm_weight", type = "numeric"),
   make_option("--metfrag_weight", type = "numeric"),
   make_option("--sirius_csi_weight", type = "numeric"),
   make_option("--probmetab_weight", type = "numeric"),
   make_option("--ms1_lookup_weight", type = "numeric"),
   make_option("--biosim_weight", type = "numeric"),
-
   make_option("--summaryOutput", action = "store_true"),
-
   make_option("--create_new_database", action = "store_true"),
   make_option("--outdir", type = "character", default = "."),
-
   make_option("--compoundDbType", type = "character", default = "sqlite"),
   make_option("--compoundDbPth", type = "character", default = NA),
   make_option("--compoundDbHost", type = "character", default = NA)
@@ -37,7 +32,7 @@
 if (!is.null(opt$create_new_database)) {
   sm_resultPth <- file.path(opt$outdir, "combined_annotations.sqlite")
   file.copy(opt$sm_resultPth, sm_resultPth)
-}else{
+} else {
   sm_resultPth <- opt$sm_resultPth
 }
 
@@ -45,18 +40,19 @@
   opt$ms1_lookup_checkAdducts <- FALSE
 }
 if (!is.null(opt$ms1_lookup_keepAdducts)) {
-    opt$ms1_lookup_keepAdducts <- gsub("__ob__", "[", opt$ms1_lookup_keepAdducts)
-    opt$ms1_lookup_keepAdducts <- gsub("__cb__", "]", opt$ms1_lookup_keepAdducts)
-    ms1_lookup_keepAdducts <- strsplit(opt$ms1_lookup_keepAdducts, ",")[[1]]
+  opt$ms1_lookup_keepAdducts <- gsub("__ob__", "[", opt$ms1_lookup_keepAdducts)
+  opt$ms1_lookup_keepAdducts <- gsub("__cb__", "]", opt$ms1_lookup_keepAdducts)
+  ms1_lookup_keepAdducts <- strsplit(opt$ms1_lookup_keepAdducts, ",")[[1]]
 }
 
-weights <- list("sm" = opt$sm_weight,
-               "metfrag" = opt$metfrag_weight,
-               "sirius_csifingerid" = opt$sirius_csi_weight,
-               "probmetab" = opt$probmetab_weight,
-               "ms1_lookup" = opt$ms1_lookup_weight,
-               "biosim" = opt$biosim_weight
-               )
+weights <- list(
+  "sm" = opt$sm_weight,
+  "metfrag" = opt$metfrag_weight,
+  "sirius_csifingerid" = opt$sirius_csi_weight,
+  "probmetab" = opt$probmetab_weight,
+  "ms1_lookup" = opt$ms1_lookup_weight,
+  "biosim" = opt$biosim_weight
+)
 print(weights)
 
 if (is.null(opt$probmetab_resultPth)) {
@@ -69,8 +65,8 @@
 
 if (is.null(opt$summaryOutput)) {
   summaryOutput <- FALSE
-}else{
- summaryOutput <- TRUE
+} else {
+  summaryOutput <- TRUE
 }
 
 if (opt$compoundDbType == "local_config") {
@@ -82,7 +78,7 @@
     source(paste(base_dir, fname, sep = "/"))
   }
   source_local("dbconfig.R")
-}else{
+} else {
   compoundDbPth <- opt$compoundDbPth
   compoundDbType <- opt$compoundDbType
   compoundDbName <- NA
@@ -93,31 +89,33 @@
 }
 
 summary_output <- msPurity::combineAnnotations(
-                          sm_resultPth = sm_resultPth,
-                          compoundDbPth = compoundDbPth,
-                          metfrag_resultPth = opt$metfrag_resultPth,
-                          sirius_csi_resultPth = opt$sirius_csi_resultPth,
-                          probmetab_resultPth = opt$probmetab_resultPth,
-                          ms1_lookup_resultPth = opt$ms1_lookup_resultPth,
-                          ms1_lookup_keepAdducts = ms1_lookup_keepAdducts,
-                          ms1_lookup_checkAdducts = opt$ms1_lookup_checkAdducts,
-
-                         compoundDbType = compoundDbType,
-                         compoundDbName = compoundDbName,
-                         compoundDbHost = compoundDbHost,
-                         compoundDbPort = compoundDbPort,
-                         compoundDbUser = compoundDbUser,
-                         compoundDbPass = compoundDbPass,
-                         weights = weights,
-                         summaryOutput = summaryOutput)
+  sm_resultPth = sm_resultPth,
+  compoundDbPth = compoundDbPth,
+  metfrag_resultPth = opt$metfrag_resultPth,
+  sirius_csi_resultPth = opt$sirius_csi_resultPth,
+  probmetab_resultPth = opt$probmetab_resultPth,
+  ms1_lookup_resultPth = opt$ms1_lookup_resultPth,
+  ms1_lookup_keepAdducts = ms1_lookup_keepAdducts,
+  ms1_lookup_checkAdducts = opt$ms1_lookup_checkAdducts,
+  compoundDbType = compoundDbType,
+  compoundDbName = compoundDbName,
+  compoundDbHost = compoundDbHost,
+  compoundDbPort = compoundDbPort,
+  compoundDbUser = compoundDbUser,
+  compoundDbPass = compoundDbPass,
+  weights = weights,
+  summaryOutput = summaryOutput
+)
 if (summaryOutput) {
   write.table(summary_output,
-              file.path(opt$outdir, "combined_annotations.tsv"),
-              sep = "\t", row.names = FALSE)
+    file.path(opt$outdir, "combined_annotations.tsv"),
+    sep = "\t", row.names = FALSE
+  )
 }
 
 write.table(summary_output,
-            file.path(opt$outdir, "combined_annotations.tsv"),
-            sep = "\t", row.names = FALSE)
+  file.path(opt$outdir, "combined_annotations.tsv"),
+  sep = "\t", row.names = FALSE
+)
 
 closeAllConnections()
b
diff -r 0cc6b67dccb8 -r efd14b326007 createDatabase.R
--- a/createDatabase.R Tue Feb 08 13:55:12 2022 +0000
+++ b/createDatabase.R Wed Jun 12 16:05:01 2024 +0000
[
@@ -6,7 +6,6 @@
 print("CREATING DATABASE")
 
 xset_pa_filename_fix <- function(opt, pa, xset) {
-
   if (!is.null(opt$mzML_files) && !is.null(opt$galaxy_names)) {
     # NOTE: Relies on the pa@fileList having the names of files given as 'names' of the variables
     # needs to be done due to Galaxy moving the files around and screwing up any links to files
@@ -18,7 +17,7 @@
     galaxy_names <- galaxy_names[galaxy_names != ""]
 
     nsave <- names(pa@fileList)
-    old_filenames  <- basename(pa@fileList)
+    old_filenames <- basename(pa@fileList)
     pa@fileList <- filepaths[match(names(pa@fileList), galaxy_names)]
     names(pa@fileList) <- nsave
 
@@ -27,12 +26,12 @@
   }
 
 
- if (!all(basename(pa@fileList) == basename(xset@filepaths))) {
+  if (!all(basename(pa@fileList) == basename(xset@filepaths))) {
     if (!all(names(pa@fileList) == basename(xset@filepaths))) {
-       print("FILELISTS DO NOT MATCH")
-       message("FILELISTS DO NOT MATCH")
-       quit(status = 1)
-    }else{
+      print("FILELISTS DO NOT MATCH")
+      message("FILELISTS DO NOT MATCH")
+      quit(status = 1)
+    } else {
       xset@filepaths <- unname(pa@fileList)
     }
   }
@@ -64,22 +63,23 @@
 print(opt)
 
 loadRData <- function(rdata_path, name) {
-#loads an RData file, and returns the named xset object if it is there
-    load(rdata_path)
-    return(get(ls()[ls() %in% name]))
+  # loads an RData file, and returns the named xset object if it is there
+  load(rdata_path)
+  return(get(ls()[ls() %in% name]))
 }
 
 getxcmsSetObject <- function(xobject) {
-    # XCMS 1.x
-    if (class(xobject) == "xcmsSet")
-        return(xobject)
-    # XCMS 3.x
-    if (class(xobject) == "XCMSnExp") {
-        # Get the legacy xcmsSet object
-        suppressWarnings(xset <- as(xobject, "xcmsSet"))
-        xcms::sampclass(xset) <- xset@phenoData$sample_group
-        return(xset)
-    }
+  # XCMS 1.x
+  if (class(xobject) == "xcmsSet") {
+    return(xobject)
+  }
+  # XCMS 3.x
+  if (class(xobject) == "XCMSnExp") {
+    # Get the legacy xcmsSet object
+    suppressWarnings(xset <- as(xobject, "xcmsSet"))
+    xcms::sampclass(xset) <- xset@phenoData$sample_group
+    return(xset)
+  }
 }
 
 
@@ -96,19 +96,17 @@
 # Missing list element causes failures (should be updated
 # in msPurity R package for future releases)
 if (!exists("allfrag", where = pa@filter_frag_params)) {
-    pa@filter_frag_params$allfrag <- FALSE
+  pa@filter_frag_params$allfrag <- FALSE
 }
 
 if (opt$xcms_camera_option == "xcms") {
-
   xset <- loadRData(opt$xset, c("xset", "xdata"))
   xset <- getxcmsSetObject(xset)
   fix <- xset_pa_filename_fix(opt, pa, xset)
   pa <- fix[[1]]
   xset <- fix[[2]]
   xa <- NULL
-}else{
-
+} else {
   xa <- loadRData(opt$xset, "xa")
   fix <- xset_pa_filename_fix(opt, pa, xa@xcmsSet)
   pa <- fix[[1]]
@@ -119,16 +117,16 @@
 
 if (is.null(opt$grpPeaklist)) {
   grpPeaklist <- NA
-}else{
+} else {
   grpPeaklist <- opt$grpPeaklist
 }
 
 dbPth <- msPurity::createDatabase(pa,
-                                   xset = xset,
-                                   xsa = xa,
-                                   outDir = opt$outDir,
-                                   grpPeaklist = grpPeaklist,
-                                   dbName = "createDatabase_output.sqlite"
+  xset = xset,
+  xsa = xa,
+  outDir = opt$outDir,
+  grpPeaklist = grpPeaklist,
+  dbName = "createDatabase_output.sqlite"
 )
 
 
@@ -136,9 +134,8 @@
 
 
 if (!is.null(opt$eic)) {
-
   if (is.null(xset)) {
-      xset <- xa@xcmsSet
+    xset <- xa@xcmsSet
   }
   # previous check should have matched filelists together
   xset@filepaths <- unname(pa@fileList)
@@ -150,19 +147,19 @@
     x$rtmin_raw <- xset@rt$raw[[sid]][match(x$rtmin, xset@rt$corrected[[sid]])]
     x$rtmax_raw <- xset@rt$raw[[sid]][match(x$rtmax, xset@rt$corrected[[sid]])]
     return(x)
-
   }
 
   xset@peaks <- as.matrix(
-    plyr::ddply(data.frame(xset@peaks), ~ sample, convert2Raw, xset = xset))
+    plyr::ddply(data.frame(xset@peaks), ~sample, convert2Raw, xset = xset)
+  )
 
   # Saves the EICS into the previously created database
   px <- msPurity::purityX(xset,
-                          saveEIC = TRUE,
-                          cores = 1,
-                          sqlitePth = dbPth,
-                          rtrawColumns = TRUE)
-
+    saveEIC = TRUE,
+    cores = 1,
+    sqlitePth = dbPth,
+    rtrawColumns = TRUE
+  )
 }
 
 closeAllConnections()
b
diff -r 0cc6b67dccb8 -r efd14b326007 createMSP.R
--- a/createMSP.R Tue Feb 08 13:55:12 2022 +0000
+++ b/createMSP.R Wed Jun 12 16:05:01 2024 +0000
[
@@ -26,59 +26,58 @@
 
 if (is.null(opt$metadata)) {
   metadata <- NULL
-}else{
-  metadata <- read.table(opt$metadata,  header = TRUE, sep = "\t",
-                         stringsAsFactors = FALSE, check.names = FALSE)
+} else {
+  metadata <- read.table(opt$metadata,
+    header = TRUE, sep = "\t",
+    stringsAsFactors = FALSE, check.names = FALSE
+  )
 
   if (!opt$metadata_cols_filter == "") {
-     metadata_cols_filter <- strsplit(opt$metadata_cols_filter, ",")[[1]]
+    metadata_cols_filter <- strsplit(opt$metadata_cols_filter, ",")[[1]]
 
-     metadata <- metadata[, metadata_cols_filter, drop = FALSE]
-     print(metadata)
+    metadata <- metadata[, metadata_cols_filter, drop = FALSE]
+    print(metadata)
 
-     if (!"grpid" %in% colnames(metadata)) {
-       metadata$grpid <- seq_len(nrow(metadata))
-     }
+    if (!"grpid" %in% colnames(metadata)) {
+      metadata$grpid <- seq_len(nrow(metadata))
+    }
 
-     print(metadata)
-
+    print(metadata)
   }
-
 }
 
 
 
 if (is.null(opt$metadata_cols) || opt$metadata_cols == "") {
-    metadata_cols <- NULL
-}else{
-    metadata_cols <- opt$metadata_cols
-
+  metadata_cols <- NULL
+} else {
+  metadata_cols <- opt$metadata_cols
 }
 
 
 if (is.null(opt$adduct_split)) {
   adduct_split <- FALSE
-}else{
+} else {
   adduct_split <- TRUE
 }
 
 if (is.null(opt$xcms_groupids)) {
   xcms_groupids <- NULL
-}else{
+} else {
   xcms_groupids <- trimws(strsplit(opt$xcms_groupids, ",")[[1]])
 }
 
 
 if (is.null(opt$include_adducts_custom)) {
   include_adducts_custom <- ""
-}else{
+} else {
   include_adducts_custom <- opt$include_adducts_custom
 }
 
 
 if (opt$include_adducts == "None") {
   include_adducts <- ""
-}else{
+} else {
   include_adducts <- opt$include_adducts
 }
 
@@ -96,22 +95,23 @@
 
 if (is.null(opt$filter)) {
   filter <- FALSE
-}else{
+} else {
   filter <- TRUE
 }
 
 
 
 msPurity::createMSP(pa,
-                    msp_file_pth = file.path(opt$out_dir, "lcmsms_spectra.msp"),
-                    metadata = metadata,
-                    metadata_cols = metadata_cols,
-                    method = opt$method,
-                    adduct_split = adduct_split,
-                    xcms_groupids = xcms_groupids,
-                    filter = filter,
-                    intensity_ra = opt$intensity_ra,
-                    include_adducts = include_adducts_all,
-                    msp_schema = opt$msp_schema)
+  msp_file_pth = file.path(opt$out_dir, "lcmsms_spectra.msp"),
+  metadata = metadata,
+  metadata_cols = metadata_cols,
+  method = opt$method,
+  adduct_split = adduct_split,
+  xcms_groupids = xcms_groupids,
+  filter = filter,
+  intensity_ra = opt$intensity_ra,
+  include_adducts = include_adducts_all,
+  msp_schema = opt$msp_schema
+)
 
-print("msp created")
+print("msp created")
\ No newline at end of file
b
diff -r 0cc6b67dccb8 -r efd14b326007 dimsPredictPuritySingle.R
--- a/dimsPredictPuritySingle.R Tue Feb 08 13:55:12 2022 +0000
+++ b/dimsPredictPuritySingle.R Wed Jun 12 16:05:01 2024 +0000
[
@@ -3,23 +3,23 @@
 print(sessionInfo())
 
 option_list <- list(
-  make_option(c("--mzML_file"), type = "character"),
-  make_option(c("--mzML_files"), type = "character"),
-  make_option(c("--mzML_filename"), type = "character", default = ""),
-  make_option(c("--mzML_galaxy_names"), type = "character", default = ""),
-  make_option(c("--peaks_file"), type = "character"),
-  make_option(c("-o", "--out_dir"), type = "character"),
-  make_option("--minoffset", default = 0.5),
-  make_option("--maxoffset", default = 0.5),
-  make_option("--ilim", default = 0.05),
-  make_option("--ppm", default = 4),
-  make_option("--dimspy", action = "store_true"),
-  make_option("--sim", action = "store_true"),
-  make_option("--remove_nas", action = "store_true"),
-  make_option("--iwNorm", default = "none", type = "character"),
-  make_option("--file_num_dimspy", default = 1),
-  make_option("--exclude_isotopes", action = "store_true"),
-  make_option("--isotope_matrix", type = "character")
+    make_option(c("--mzML_file"), type = "character"),
+    make_option(c("--mzML_files"), type = "character"),
+    make_option(c("--mzML_filename"), type = "character", default = ""),
+    make_option(c("--mzML_galaxy_names"), type = "character", default = ""),
+    make_option(c("--peaks_file"), type = "character"),
+    make_option(c("-o", "--out_dir"), type = "character"),
+    make_option("--minoffset", default = 0.5),
+    make_option("--maxoffset", default = 0.5),
+    make_option("--ilim", default = 0.05),
+    make_option("--ppm", default = 4),
+    make_option("--dimspy", action = "store_true"),
+    make_option("--sim", action = "store_true"),
+    make_option("--remove_nas", action = "store_true"),
+    make_option("--iwNorm", default = "none", type = "character"),
+    make_option("--file_num_dimspy", default = 1),
+    make_option("--exclude_isotopes", action = "store_true"),
+    make_option("--isotope_matrix", type = "character")
 )
 
 # store options
@@ -43,7 +43,7 @@
     galaxy_names <- str_to_vec(galaxy_names)
     if (mzML_filename %in% galaxy_names) {
         return(mzML_files[galaxy_names == mzML_filename])
-    }else{
+    } else {
         stop(paste("mzML file not found - ", mzML_filename))
     }
 }
@@ -53,15 +53,18 @@
     df <- read.table(opt$peaks_file, header = TRUE, sep = "\t")
     if (file.exists(opt$mzML_file)) {
         mzML_file <- opt$mzML_file
-    }else if (!is.null(opt$mzML_files)) {
-        mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names,
-                                    opt$mzML_filename)
-    }else{
+    } else if (!is.null(opt$mzML_files)) {
+        mzML_file <- find_mzml_file(
+            opt$mzML_files, opt$mzML_galaxy_names,
+            opt$mzML_filename
+        )
+    } else {
         mzML_file <- file.path(opt$mzML_file, filename)
     }
-}else{
+} else {
     indf <- read.table(opt$peaks_file,
-                       header = TRUE, sep = "\t", stringsAsFactors = FALSE)
+        header = TRUE, sep = "\t", stringsAsFactors = FALSE
+    )
 
     filename <- colnames(indf)[8:ncol(indf)][opt$file_num_dimspy]
     print(filename)
@@ -75,9 +78,9 @@
 
     if (file.exists(opt$mzML_file)) {
         mzML_file <- opt$mzML_file
-    }else if (!is.null(opt$mzML_files)) {
+    } else if (!is.null(opt$mzML_files)) {
         mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, filename)
-    }else{
+    } else {
         mzML_file <- file.path(opt$mzML_file, filename)
     }
 
@@ -95,25 +98,26 @@
 }
 
 if (!is.null(opt$remove_nas)) {
-  df <- df[!is.na(df$mz), ]
+    df <- df[!is.na(df$mz), ]
 }
 
 if (is.null(opt$isotope_matrix)) {
     im <- NULL
-}else{
+} else {
     im <- read.table(opt$isotope_matrix,
-                     header = TRUE, sep = "\t", stringsAsFactors = FALSE)
+        header = TRUE, sep = "\t", stringsAsFactors = FALSE
+    )
 }
 
 if (is.null(opt$exclude_isotopes)) {
     isotopes <- FALSE
-}else{
+} else {
     isotopes <- TRUE
 }
 
 if (is.null(opt$sim)) {
     sim <- FALSE
-}else{
+} else {
     sim <- TRUE
 }
 
@@ -123,13 +127,13 @@
 if (opt$iwNorm == "none") {
     iwNorm <- FALSE
     iwNormFun <- NULL
-}else if (opt$iwNorm == "gauss") {
+} else if (opt$iwNorm == "gauss") {
     iwNorm <- TRUE
     iwNormFun <- msPurity::iwNormGauss(minOff = -minOffset, maxOff = maxOffset)
-}else if (opt$iwNorm == "rcosine") {
+} else if (opt$iwNorm == "rcosine") {
     iwNorm <- TRUE
     iwNormFun <- msPurity::iwNormRcosine(minOff = -minOffset, maxOff = maxOffset)
-}else if (opt$iwNorm == "QE5") {
+} else if (opt$iwNorm == "QE5") {
     iwNorm <- TRUE
     iwNormFun <- msPurity::iwNormQE.5()
 }
@@ -138,23 +142,24 @@
 print(head(df))
 print(mzML_file)
 predicted <- msPurity::dimsPredictPuritySingle(df$mz,
-                                     filepth = mzML_file,
-                                     minOffset = minOffset,
-                                     maxOffset = maxOffset,
-                                     ppm = opt$ppm,
-                                     mzML = TRUE,
-                                     sim = sim,
-                                     ilim = opt$ilim,
-                                     isotopes = isotopes,
-                                     im = im,
-                                     iwNorm = iwNorm,
-                                     iwNormFun = iwNormFun
-                                     )
+    filepth = mzML_file,
+    minOffset = minOffset,
+    maxOffset = maxOffset,
+    ppm = opt$ppm,
+    mzML = TRUE,
+    sim = sim,
+    ilim = opt$ilim,
+    isotopes = isotopes,
+    im = im,
+    iwNorm = iwNorm,
+    iwNormFun = iwNormFun
+)
 predicted <- cbind(df, predicted)
 
 print(head(predicted))
 print(file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv"))
 
 write.table(predicted,
-            file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv"),
-            row.names = FALSE, sep = "\t")
+    file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv"),
+    row.names = FALSE, sep = "\t"
+)
b
diff -r 0cc6b67dccb8 -r efd14b326007 filterFragSpectra.R
--- a/filterFragSpectra.R Tue Feb 08 13:55:12 2022 +0000
+++ b/filterFragSpectra.R Wed Jun 12 16:05:01 2024 +0000
[
@@ -9,13 +9,10 @@
   make_option("--out_peaklist_prec", type = "character"),
   make_option("--out_peaklist_frag", type = "character"),
   make_option("--pa", type = "character"),
-
   make_option("--ilim", default = 0.0),
   make_option("--plim", default = 0.0),
-
   make_option("--ra", default = 0.0),
   make_option("--snr", default = 0.0),
-
   make_option("--rmp", action = "store_true"),
   make_option("--snmeth", default = "median", type = "character"),
   make_option("--allfrag", action = "store_true")
@@ -26,9 +23,9 @@
 
 
 loadRData <- function(rdata_path, name) {
-    #loads an RData file, and returns the named xset object if it is there
-    load(rdata_path)
-    return(get(ls()[ls() %in% name]))
+  # loads an RData file, and returns the named xset object if it is there
+  load(rdata_path)
+  return(get(ls()[ls() %in% name]))
 }
 
 # Requires
@@ -36,24 +33,25 @@
 
 if (is.null(opt$rmp)) {
   opt$rmp <- FALSE
-}else{
+} else {
   opt$rmp <- TRUE
 }
 
 if (is.null(opt$allfrag)) {
   opt$allfrag <- FALSE
-}else{
+} else {
   opt$allfrag <- TRUE
 }
 
 pa <- filterFragSpectra(pa,
-                        ilim = opt$ilim,
-                        plim = opt$plim,
-                        ra = opt$ra,
-                        snr = opt$snr,
-                        rmp = opt$rmp,
-                        allfrag = opt$allfrag,
-                        snmeth = opt$snmeth)
+  ilim = opt$ilim,
+  plim = opt$plim,
+  ra = opt$ra,
+  snr = opt$snr,
+  rmp = opt$rmp,
+  allfrag = opt$allfrag,
+  snmeth = opt$snmeth
+)
 
 print(pa)
 save(pa, file = opt$out_rdata)
@@ -82,11 +80,9 @@
 
 
 if (length(pa) > 0) {
-
   if (length(pa@grped_ms2) == 0) {
     message("No spectra available")
   } else {
-
     # get group ids
     grpids <- unique(as.character(pa@grped_df$grpid))
 
@@ -94,7 +90,7 @@
     df_fragments <- plyr::adply(grpids, 1, msmsgrp, pa = pa)
     df_fragments <- merge(df_fragments, pa@puritydf[, c("pid", "acquisitionNum", "precursorScanNum")], by = "pid")
     df_fragments <- df_fragments[order(df_fragments$grpid, df_fragments$pid, df_fragments$mz), ]
-    #select and reorder columns
+    # select and reorder columns
     df_fragments <- df_fragments[, c("grpid", "pid", "precursorScanNum", "acquisitionNum", "fileid", "mz", "i", "snr", "ra", "purity_pass_flag", "intensity_pass_flag", "ra_pass_flag", "snr_pass_flag", "pass_flag")]
 
     pa@grped_df$filename <- sapply(pa@grped_df$fileid, function(x) names(pa@fileList)[as.integer(x)])
b
diff -r 0cc6b67dccb8 -r efd14b326007 flagRemove.R
--- a/flagRemove.R Tue Feb 08 13:55:12 2022 +0000
+++ b/flagRemove.R Wed Jun 12 16:05:01 2024 +0000
[
b'@@ -2,77 +2,93 @@\n library(optparse)\n print(sessionInfo())\n option_list <- list(\n-  make_option(c("-o", "--out_dir"), type = "character", default = getwd(),\n-              help = "Output folder for resulting files [default = %default]"\n-  ),\n-  make_option(c("-x", "--xset_path"), type = "character", default = file.path(getwd(), "xset.rds"),\n-              help = "The path to the xcmsSet object [default = %default]"\n-  ),\n-  make_option("--polarity", default = NA,\n-              help = "polarity (just used for naming purpose for files being saved) [positive, negative, NA] [default %default]"\n-  ),\n-  make_option("--rsd_i_blank", default = 100,\n-              help = "RSD threshold for the blank [default = %default]"\n-  ),\n-  make_option("--minfrac_blank", default = 0.5,\n-              help = "minimum fraction of files for features needed for the blank [default = %default]"\n-  ),\n-  make_option("--rsd_rt_blank", default = 100,\n-              help = "RSD threshold for the RT of the blank [default = %default]"\n-  ),\n-\n-  make_option("--ithres_blank", default = 0,\n-              help = "Intensity threshold for the blank [default = %default]"\n-  ),\n-  make_option("--s2b", default = 10,\n-              help = "fold change (sample/blank) needed for sample peak to be allowed. e.g.\n+    make_option(c("-o", "--out_dir"),\n+        type = "character", default = getwd(),\n+        help = "Output folder for resulting files [default = %default]"\n+    ),\n+    make_option(c("-x", "--xset_path"),\n+        type = "character", default = file.path(getwd(), "xset.rds"),\n+        help = "The path to the xcmsSet object [default = %default]"\n+    ),\n+    make_option("--polarity",\n+        default = NA,\n+        help = "polarity (just used for naming purpose for files being saved) [positive, negative, NA] [default %default]"\n+    ),\n+    make_option("--rsd_i_blank",\n+        default = 100,\n+        help = "RSD threshold for the blank [default = %default]"\n+    ),\n+    make_option("--minfrac_blank",\n+        default = 0.5,\n+        help = "minimum fraction of files for features needed for the blank [default = %default]"\n+    ),\n+    make_option("--rsd_rt_blank",\n+        default = 100,\n+        help = "RSD threshold for the RT of the blank [default = %default]"\n+    ),\n+    make_option("--ithres_blank",\n+        default = 0,\n+        help = "Intensity threshold for the blank [default = %default]"\n+    ),\n+    make_option("--s2b",\n+        default = 10,\n+        help = "fold change (sample/blank) needed for sample peak to be allowed. e.g.\n                     if s2b set to 10 and the recorded sample \'intensity\' value was 100 and blank was 10.\n                     1000/10 = 100, so sample has fold change higher than the threshold and the peak\n                     is not considered a blank [default = %default]"\n-  ),\n-  make_option("--blank_class", default = "blank", type = "character",\n-              help = "A string representing the class that will be used for the blank.[default = %default]"\n-  ),\n-  make_option("--egauss_thr", default = NA,\n-              help = "Threshold for filtering out non gaussian shaped peaks. Note this only works\n+    ),\n+    make_option("--blank_class",\n+        default = "blank", type = "character",\n+        help = "A string representing the class that will be used for the blank.[default = %default]"\n+    ),\n+    make_option("--egauss_thr",\n+        default = NA,\n+        help = "Threshold for filtering out non gaussian shaped peaks. Note this only works\n                             if the \'verbose columns\' and \'fit gauss\' was used with xcms\n                             [default = %default]"\n-  ),\n-  make_option("--rsd_i_sample", default = 100,\n-              help = "RSD threshold for the samples [default = %default]"\n-  ),\n-  make_option("--minfrac_sample", default = 0.8,\n-              help = "minimum fraction of files for features needed for the samples [default = %default]"\n-  ),\n-  make_option("--rsd_rt_sample", default = 10'..b'   # XCMS 1.x\n-    if (class(xobject) == "xcmsSet")\n+    if (class(xobject) == "xcmsSet") {\n         return(xobject)\n+    }\n     # XCMS 3.x\n     if (class(xobject) == "XCMSnExp") {\n         # Get the legacy xcmsSet object\n@@ -116,7 +133,7 @@\n \n \n loadRData <- function(rdata_path, name) {\n-#loads an RData file, and returns the named xset object if it is there\n+    # loads an RData file, and returns the named xset object if it is there\n     load(rdata_path)\n     return(get(ls()[ls() %in% name]))\n }\n@@ -126,7 +143,7 @@\n print(xset)\n if (is.null(opt$samplelist)) {\n     blank_class <- opt$blank_class\n-}else{\n+} else {\n     samplelist <- read.table(opt$samplelist, sep = "\\t", header = TRUE)\n     samplelist_blank <- unique(samplelist$sample_class[samplelist$blank == "yes"])\n \n@@ -142,25 +159,26 @@\n \n if (is.null(opt$multilist)) {\n     ffrm_out <- flag_remove(xset,\n-                        pol = opt$polarity,\n-                        rsd_i_blank = opt$rsd_i_blank,\n-                        minfrac_blank = opt$minfrac_blank,\n-                        rsd_rt_blank = opt$rsd_rt_blank,\n-                        ithres_blank = opt$ithres_blank,\n-                        s2b = opt$s2b,\n-                        ref.class = blank_class,\n-                        egauss_thr = opt$egauss_thr,\n-                        rsd_i_sample = opt$rsd_i_sample,\n-                        minfrac_sample = opt$minfrac_sample,\n-                        rsd_rt_sample = opt$rsd_rt_sample,\n-                        ithres_sample = opt$ithres_sample,\n-                        minfrac_xcms = opt$minfrac_xcms,\n-                        mzwid = opt$mzwid,\n-                        bw = opt$bw,\n-                        out_dir = opt$out_dir,\n-                        temp_save = temp_save,\n-                        remove_spectra = remove_spectra,\n-                        grp_rm_ids = unlist(strsplit(as.character(opt$grp_rm_ids), split = ", "))[[1]])\n+        pol = opt$polarity,\n+        rsd_i_blank = opt$rsd_i_blank,\n+        minfrac_blank = opt$minfrac_blank,\n+        rsd_rt_blank = opt$rsd_rt_blank,\n+        ithres_blank = opt$ithres_blank,\n+        s2b = opt$s2b,\n+        ref.class = blank_class,\n+        egauss_thr = opt$egauss_thr,\n+        rsd_i_sample = opt$rsd_i_sample,\n+        minfrac_sample = opt$minfrac_sample,\n+        rsd_rt_sample = opt$rsd_rt_sample,\n+        ithres_sample = opt$ithres_sample,\n+        minfrac_xcms = opt$minfrac_xcms,\n+        mzwid = opt$mzwid,\n+        bw = opt$bw,\n+        out_dir = opt$out_dir,\n+        temp_save = temp_save,\n+        remove_spectra = remove_spectra,\n+        grp_rm_ids = unlist(strsplit(as.character(opt$grp_rm_ids), split = ", "))[[1]]\n+    )\n     print("flag remove finished")\n     xset <- ffrm_out[[1]]\n     grp_peaklist <- ffrm_out[[2]]\n@@ -172,26 +190,26 @@\n     peak_pth <- file.path(opt$out_dir, "peaklist_filtered.tsv")\n     print(peak_pth)\n     write.table(data.frame("grpid" = rownames(grp_peaklist), "ID" = rownames(grp_peaklist), grp_peaklist),\n-                peak_pth, row.names = FALSE, sep = "\\t")\n+        peak_pth,\n+        row.names = FALSE, sep = "\\t"\n+    )\n \n     removed_peaks <- data.frame(removed_peaks)\n     write.table(data.frame("ID" = rownames(removed_peaks), removed_peaks),\n-        file.path(opt$out_dir, "removed_peaks.tsv"), row.names = FALSE, sep = "\\t")\n-\n-}else{\n-\n-   # nolint start\n-   # TODO\n-   #xsets <- split(xset, multilist_df$multlist)\n-   #\n-   #mult_grps <- unique(multilist_df$multlist)\n-   #\n-   #for (mgrp in mult_grps){\n-   #   xset_i <- xsets[mgrp]\n-   #   xcms::group(xset_i,\n-   #\n-   # }\n-   # nolint end\n-\n-\n+        file.path(opt$out_dir, "removed_peaks.tsv"),\n+        row.names = FALSE, sep = "\\t"\n+    )\n+} else {\n+    # nolint start\n+    # TODO\n+    # xsets <- split(xset, multilist_df$multlist)\n+    #\n+    # mult_grps <- unique(multilist_df$multlist)\n+    #\n+    # for (mgrp in mult_grps){\n+    #   xset_i <- xsets[mgrp]\n+    #   xcms::group(xset_i,\n+    #\n+    # }\n+    # nolint end\n }\n'
b
diff -r 0cc6b67dccb8 -r efd14b326007 frag4feature.R
--- a/frag4feature.R Tue Feb 08 13:55:12 2022 +0000
+++ b/frag4feature.R Wed Jun 12 16:05:01 2024 +0000
[
@@ -3,64 +3,61 @@
 library(xcms)
 print(sessionInfo())
 
-xset_pa_filename_fix <- function(opt, pa, xset=NULL) {
-
+xset_pa_filename_fix <- function(opt, pa, xset = NULL) {
+    if (!is.null(opt$mzML_files) && !is.null(opt$galaxy_names)) {
+        # NOTE: Relies on the pa@fileList having the names of files given as 'names' of the variables
+        # needs to be done due to Galaxy moving the files around and screwing up any links to files
 
-  if (!is.null(opt$mzML_files) && !is.null(opt$galaxy_names)) {
-    # NOTE: Relies on the pa@fileList having the names of files given as 'names' of the variables
-    # needs to be done due to Galaxy moving the files around and screwing up any links to files
+        filepaths <- trimws(strsplit(opt$mzML_files, ",")[[1]]) # nolint
 
-    filepaths <- trimws(strsplit(opt$mzML_files, ",")[[1]]) # nolint
+        filepaths <- filepaths[filepaths != ""]
 
-    filepaths <- filepaths[filepaths != ""]
-
-    galaxy_names <- trimws(strsplit(opt$galaxy_names, ",")[[1]])
-    galaxy_names <- galaxy_names[galaxy_names != ""]
+        galaxy_names <- trimws(strsplit(opt$galaxy_names, ",")[[1]])
+        galaxy_names <- galaxy_names[galaxy_names != ""]
 
-    nsave <- names(pa@fileList)
-    old_filenames  <- basename(pa@fileList)
+        nsave <- names(pa@fileList)
+        old_filenames <- basename(pa@fileList)
 
-    pa@fileList <- filepaths[match(names(pa@fileList), galaxy_names)]
-    names(pa@fileList) <- nsave
+        pa@fileList <- filepaths[match(names(pa@fileList), galaxy_names)]
+        names(pa@fileList) <- nsave
 
-    pa@puritydf$filename <- basename(pa@fileList[match(pa@puritydf$filename, old_filenames)])
-    pa@grped_df$filename <- basename(pa@fileList[match(pa@grped_df$filename, old_filenames)])
-  }
-  print(pa@fileList)
+        pa@puritydf$filename <- basename(pa@fileList[match(pa@puritydf$filename, old_filenames)])
+        pa@grped_df$filename <- basename(pa@fileList[match(pa@grped_df$filename, old_filenames)])
+    }
+    print(pa@fileList)
 
-  if (!is.null(xset)) {
-
-    print(xset@filepaths)
+    if (!is.null(xset)) {
+        print(xset@filepaths)
 
-    if (!all(basename(pa@fileList) == basename(xset@filepaths))) {
-       if (!all(names(pa@fileList) == basename(xset@filepaths))) {
-          print("FILELISTS DO NOT MATCH")
-          message("FILELISTS DO NOT MATCH")
-          quit(status = 1)
-       }else{
-          xset@filepaths <- unname(pa@fileList)
-       }
-   }
- }
+        if (!all(basename(pa@fileList) == basename(xset@filepaths))) {
+            if (!all(names(pa@fileList) == basename(xset@filepaths))) {
+                print("FILELISTS DO NOT MATCH")
+                message("FILELISTS DO NOT MATCH")
+                quit(status = 1)
+            } else {
+                xset@filepaths <- unname(pa@fileList)
+            }
+        }
+    }
 
-  return(list(pa, xset))
+    return(list(pa, xset))
 }
 
 
 option_list <- list(
-  make_option(c("-o", "--out_dir"), type = "character"),
-  make_option("--pa", type = "character"),
-  make_option("--xset", type = "character"),
-  make_option("--ppm", default = 10),
-  make_option("--plim", default = 0.0),
-  make_option("--convert2RawRT", action = "store_true"),
-  make_option("--intense", action = "store_true"),
-  make_option("--createDB", action = "store_true"),
-  make_option("--cores", default = 4),
-  make_option("--mzML_files", type = "character"),
-  make_option("--galaxy_names", type = "character"),
-  make_option("--grp_peaklist", type = "character"),
-  make_option("--useGroup", action = "store_true")
+    make_option(c("-o", "--out_dir"), type = "character"),
+    make_option("--pa", type = "character"),
+    make_option("--xset", type = "character"),
+    make_option("--ppm", default = 10),
+    make_option("--plim", default = 0.0),
+    make_option("--convert2RawRT", action = "store_true"),
+    make_option("--intense", action = "store_true"),
+    make_option("--createDB", action = "store_true"),
+    make_option("--cores", default = 4),
+    make_option("--mzML_files", type = "character"),
+    make_option("--galaxy_names", type = "character"),
+    make_option("--grp_peaklist", type = "character"),
+    make_option("--useGroup", action = "store_true")
 )
 
 # store options
@@ -68,17 +65,18 @@
 print(opt)
 
 loadRData <- function(rdata_path, name) {
-#loads an RData file, and returns the named xset object if it is there
+    # loads an RData file, and returns the named xset object if it is there
     load(rdata_path)
     return(get(ls()[ls() %in% name]))
 }
 
 # This function retrieve a xset like object
-#@author Gildas Le Corguille lecorguille@sb-roscoff.fr
+# @author Gildas Le Corguille lecorguille@sb-roscoff.fr
 getxcmsSetObject <- function(xobject) {
     # XCMS 1.x
-    if (class(xobject) == "xcmsSet")
+    if (class(xobject) == "xcmsSet") {
         return(xobject)
+    }
     # XCMS 3.x
     if (class(xobject) == "XCMSnExp") {
         # Get the legacy xcmsSet object
@@ -99,20 +97,20 @@
 print(xset@filepaths)
 
 if (is.null(opt$intense)) {
-  intense <- FALSE
-}else{
-  intense <- TRUE
+    intense <- FALSE
+} else {
+    intense <- TRUE
 }
 
 if (is.null(opt$convert2RawRT)) {
     convert2RawRT <- FALSE
-}else{
+} else {
     convert2RawRT <- TRUE
 }
 
 if (is.null(opt$createDB)) {
     createDB <- FALSE
-}else{
+} else {
     createDB <- TRUE
 }
 
@@ -121,7 +119,7 @@
     pa <- fix[[1]]
     xset <- fix[[2]]
     useGroup <- FALSE
-}else{
+} else {
     # if are only aligning to the group not eah file we do not need to align the files between the xset and pa object
     print("useGroup")
     fix <- xset_pa_filename_fix(opt, pa)
@@ -132,22 +130,24 @@
 
 if (is.null(opt$grp_peaklist)) {
     grp_peaklist <- NA
-}else{
+} else {
     grp_peaklist <- opt$grp_peaklist
 }
 print(useGroup)
 
-pa <- msPurity::frag4feature(pa = pa,
-                             xset = xset,
-                             ppm = opt$ppm,
-                             plim = opt$plim,
-                             intense = intense,
-                             convert2RawRT = convert2RawRT,
-                             db_name = "alldata.sqlite",
-                             out_dir = opt$out_dir,
-                             grp_peaklist = grp_peaklist,
-                             create_db = createDB,
-                             use_group = useGroup)
+pa <- msPurity::frag4feature(
+    pa = pa,
+    xset = xset,
+    ppm = opt$ppm,
+    plim = opt$plim,
+    intense = intense,
+    convert2RawRT = convert2RawRT,
+    db_name = "alldata.sqlite",
+    out_dir = opt$out_dir,
+    grp_peaklist = grp_peaklist,
+    create_db = createDB,
+    use_group = useGroup
+)
 print(pa)
 save(pa, file = file.path(opt$out_dir, "frag4feature_output.RData"))
 
b
diff -r 0cc6b67dccb8 -r efd14b326007 macros.xml
--- a/macros.xml Tue Feb 08 13:55:12 2022 +0000
+++ b/macros.xml Wed Jun 12 16:05:01 2024 +0000
b
@@ -1,14 +1,14 @@
 <?xml version="1.0"?>
 <macros>
     <token name="@TOOL_VERSION@">1.16.2</token>
-    <token name="@GALAXY_TOOL_VERSION@">1</token>
+    <token name="@GALAXY_TOOL_VERSION@">2</token>
 
     <xml name="requirements">
         <requirements>
            <requirement type="package" version="@TOOL_VERSION@" >bioconductor-mspurity</requirement>
            <requirement type="package" version="1.46.0" >bioconductor-camera</requirement>
            <requirement type="package" version="3.12.0" >bioconductor-xcms</requirement>
-           <requirement type="package" version="1.16.0" >bioconductor-mspuritydata</requirement>
+           <requirement type="package" version="1.18.0" >bioconductor-mspuritydata</requirement>
            <requirement type="package" version="1.6.6">r-optparse</requirement>
            <requirement type="package" version="1.3.1">r-rpostgres</requirement>
            <requirement type="package" version="0.10.21">r-rmysql</requirement>
b
diff -r 0cc6b67dccb8 -r efd14b326007 purityA.R
--- a/purityA.R Tue Feb 08 13:55:12 2022 +0000
+++ b/purityA.R Wed Jun 12 16:05:01 2024 +0000
b
@@ -3,20 +3,20 @@
 print(sessionInfo())
 
 option_list <- list(
-  make_option(c("-o", "--out_dir"), type = "character"),
-  make_option("--mzML_files", type = "character"),
-  make_option("--galaxy_names", type = "character"),
-  make_option("--minOffset", type = "numeric"),
-  make_option("--maxOffset", type = "numeric"),
-  make_option("--ilim", type = "numeric"),
-  make_option("--iwNorm", default = "none", type = "character"),
-  make_option("--exclude_isotopes", action = "store_true"),
-  make_option("--isotope_matrix", type = "character"),
-  make_option("--mostIntense", action = "store_true"),
-  make_option("--plotP", action = "store_true"),
-  make_option("--nearest", action = "store_true"),
-  make_option("--cores", default = 4),
-  make_option("--ppmInterp", default = 7)
+    make_option(c("-o", "--out_dir"), type = "character"),
+    make_option("--mzML_files", type = "character"),
+    make_option("--galaxy_names", type = "character"),
+    make_option("--minOffset", type = "numeric"),
+    make_option("--maxOffset", type = "numeric"),
+    make_option("--ilim", type = "numeric"),
+    make_option("--iwNorm", default = "none", type = "character"),
+    make_option("--exclude_isotopes", action = "store_true"),
+    make_option("--isotope_matrix", type = "character"),
+    make_option("--mostIntense", action = "store_true"),
+    make_option("--plotP", action = "store_true"),
+    make_option("--nearest", action = "store_true"),
+    make_option("--cores", default = 4),
+    make_option("--ppmInterp", default = 7)
 )
 
 opt <- parse_args(OptionParser(option_list = option_list))
@@ -25,23 +25,27 @@
 if (opt$iwNorm == "none") {
     iwNorm <- FALSE
     iwNormFun <- NULL
-}else if (opt$iwNorm == "gauss") {
+} else if (opt$iwNorm == "gauss") {
     iwNorm <- TRUE
     if (is.null(opt$minOffset) || is.null(opt$maxOffset)) {
-      print("User has to define offsets if using Gaussian normalisation")
-    }else{
-      iwNormFun <- msPurity::iwNormGauss(minOff = -as.numeric(opt$minOffset),
-                                         maxOff = as.numeric(opt$maxOffset))
+        print("User has to define offsets if using Gaussian normalisation")
+    } else {
+        iwNormFun <- msPurity::iwNormGauss(
+            minOff = -as.numeric(opt$minOffset),
+            maxOff = as.numeric(opt$maxOffset)
+        )
     }
-}else if (opt$iwNorm == "rcosine") {
+} else if (opt$iwNorm == "rcosine") {
     iwNorm <- TRUE
     if (is.null(opt$minOffset) || is.null(opt$maxOffset)) {
-      print("User has to define offsets if using R-cosine normalisation")
-    }else{
-      iwNormFun <- msPurity::iwNormRcosine(minOff = -as.numeric(opt$minOffset),
-                                           maxOff = as.numeric(opt$maxOffset))
+        print("User has to define offsets if using R-cosine normalisation")
+    } else {
+        iwNormFun <- msPurity::iwNormRcosine(
+            minOff = -as.numeric(opt$minOffset),
+            maxOff = as.numeric(opt$maxOffset)
+        )
     }
-}else if (opt$iwNorm == "QE5") {
+} else if (opt$iwNorm == "QE5") {
     iwNorm <- TRUE
     iwNormFun <- msPurity::iwNormQE.5()
 }
@@ -53,27 +57,27 @@
 
 if (is.null(opt$minOffset) || is.null(opt$maxOffset)) {
     offsets <- NA
-}else{
+} else {
     offsets <- as.numeric(c(opt$minOffset, opt$maxOffset))
 }
 
 
 if (is.null(opt$mostIntense)) {
     mostIntense <- FALSE
-}else{
+} else {
     mostIntense <- TRUE
 }
 
 if (is.null(opt$nearest)) {
     nearest <- FALSE
-}else{
+} else {
     nearest <- TRUE
 }
 
 if (is.null(opt$plotP)) {
     plotP <- FALSE
     plotdir <- NULL
-}else{
+} else {
     plotP <- TRUE
     plotdir <- opt$out_dir
 }
@@ -81,32 +85,34 @@
 
 if (is.null(opt$isotope_matrix)) {
     im <- NULL
-}else{
+} else {
     im <- read.table(opt$isotope_matrix,
-                     header = TRUE, sep = "\t", stringsAsFactors = FALSE)
+        header = TRUE, sep = "\t", stringsAsFactors = FALSE
+    )
 }
 
 if (is.null(opt$exclude_isotopes)) {
     isotopes <- FALSE
-}else{
+} else {
     isotopes <- TRUE
 }
 
 pa <- msPurity::purityA(filepaths,
-                        cores = opt$cores,
-                        mostIntense = mostIntense,
-                        nearest = nearest,
-                        offsets = offsets,
-                        plotP = plotP,
-                        plotdir = plotdir,
-                        interpol = "linear",
-                        iwNorm = iwNorm,
-                        iwNormFun = iwNormFun,
-                        ilim = opt$ilim,
-                        mzRback = "pwiz",
-                        isotopes = isotopes,
-                        im = im,
-                        ppmInterp = opt$ppmInterp)
+    cores = opt$cores,
+    mostIntense = mostIntense,
+    nearest = nearest,
+    offsets = offsets,
+    plotP = plotP,
+    plotdir = plotdir,
+    interpol = "linear",
+    iwNorm = iwNorm,
+    iwNormFun = iwNormFun,
+    ilim = opt$ilim,
+    mzRback = "pwiz",
+    isotopes = isotopes,
+    im = im,
+    ppmInterp = opt$ppmInterp
+)
 
 
 if (!is.null(opt$galaxy_names)) {
b
diff -r 0cc6b67dccb8 -r efd14b326007 purityX.R
--- a/purityX.R Tue Feb 08 13:55:12 2022 +0000
+++ b/purityX.R Wed Jun 12 16:05:01 2024 +0000
[
@@ -22,7 +22,7 @@
   make_option("--galaxy_files", type = "character"),
   make_option("--choose_class", type = "character"),
   make_option("--ignore_files", type = "character"),
-  make_option("--rtraw_columns",  action = "store_true")
+  make_option("--rtraw_columns", action = "store_true")
 )
 
 
@@ -31,9 +31,9 @@
 
 
 if (!is.null(opt$xgroups)) {
-    xgroups <- as.numeric(strsplit(opt$xgroups, ",")[[1]])
-}else{
-    xgroups <- NULL
+  xgroups <- as.numeric(strsplit(opt$xgroups, ",")[[1]])
+} else {
+  xgroups <- NULL
 }
 
 
@@ -44,55 +44,75 @@
 }
 
 if (is.null(opt$isotope_matrix)) {
-    im <- NULL
-}else{
-    im <- read.table(opt$isotope_matrix,
-                     header = TRUE, sep = "\t", stringsAsFactors = FALSE)
+  im <- NULL
+} else {
+  im <- read.table(opt$isotope_matrix,
+    header = TRUE, sep = "\t", stringsAsFactors = FALSE
+  )
 }
 
 if (is.null(opt$exclude_isotopes)) {
-    isotopes <- FALSE
-}else{
-    isotopes <- TRUE
+  isotopes <- FALSE
+} else {
+  isotopes <- TRUE
 }
 
 if (is.null(opt$rtraw_columns)) {
-    rtraw_columns <- FALSE
-}else{
-    rtraw_columns <- TRUE
+  rtraw_columns <- FALSE
+} else {
+  rtraw_columns <- TRUE
 }
 
 loadRData <- function(rdata_path, xset_name) {
-#loads an RData file, and returns the named xset object if it is there
-    load(rdata_path)
-    return(get(ls()[ls() == xset_name]))
+  # loads an RData file, and returns the named xset object if it is there
+  load(rdata_path)
+  return(get(ls()[ls() == xset_name]))
+}
+
+
+
+
+getxcmsSetObject <- function(xobject) {
+  # XCMS 1.x
+  if (class(xobject) == "xcmsSet") {
+    return(xobject)
+  }
+  # XCMS 3.x
+  if (class(xobject) == "XCMSnExp") {
+    # Get the legacy xcmsSet object
+    suppressWarnings(xset <- as(xobject, "xcmsSet"))
+    sampclass(xset) <- xset@phenoData$sample_group
+    return(xset)
+  }
 }
 
 target_obj <- loadRData(opt$xset_path, opt$rdata_name)
 
 if (opt$camera_xcms == "camera") {
-    xset <- target_obj@xcmsSet
-}else{
-    xset <- target_obj
+  xset <- target_obj@xcmsSet
+} else {
+  xset <- target_obj
 }
 
+xset <- getxcmsSetObject(xset)
+
 print(xset)
 
 minOffset <- as.numeric(opt$minOffset)
 maxOffset <- as.numeric(opt$maxOffset)
 
 if (opt$iwNorm == "none") {
-    iwNorm <- FALSE
-    iwNormFun <- NULL
-}else if (opt$iwNorm == "gauss") {
-    iwNorm <- TRUE
-    iwNormFun <- msPurity::iwNormGauss(minOff = -minOffset, maxOff = maxOffset)
-}else if (opt$iwNorm == "rcosine") {
-    iwNorm <- TRUE
-    iwNormFun <- msPurity::iwNormRcosine(minOff = -minOffset, maxOff = maxOffset)
-}else if (opt$iwNorm == "QE5") {
-    iwNorm <- TRUE
-    iwNormFun <- msPurity::iwNormQE.5()
+  iwNorm <- FALSE
+  iwNormFun <- NULL
+} else if (opt$iwNorm == "gauss") {
+  iwNorm <- TRUE
+  iwNormFun <- msPurity::iwNormGauss(minOff = -minOffset, maxOff = maxOffset)
+} else if (opt$iwNorm == "rcosine") {
+  iwNorm <- TRUE
+  iwNormFun <- msPurity::iwNormRcosine(minOff = -minOffset, maxOff = maxOffset)
+} else if (opt$iwNorm == "QE5") {
+  iwNorm <- TRUE
+  iwNormFun <- msPurity::iwNormQE.5()
 }
 
 print(xset@filepaths)
@@ -105,13 +125,13 @@
   original_filenames <- basename(xset@filepaths)
   update_idx <- match(updated_filenames, original_filenames)
 
-    if (!is.null(opt$galaxy_files)) {
-        galaxy_files <- trimws(strsplit(opt$galaxy_files, ",")[[1]])
-        galaxy_files <- galaxy_files[galaxy_files != ""]
-        xset@filepaths <- galaxy_files[update_idx]
-    }else{
-        xset@filepaths <- updated_filepaths[update_idx]
-    }
+  if (!is.null(opt$galaxy_files)) {
+    galaxy_files <- trimws(strsplit(opt$galaxy_files, ",")[[1]])
+    galaxy_files <- galaxy_files[galaxy_files != ""]
+    xset@filepaths <- galaxy_files[update_idx]
+  } else {
+    xset@filepaths <- updated_filepaths[update_idx]
+  }
 }
 
 if (!is.null(opt$choose_class)) {
@@ -121,7 +141,7 @@
 
   print("choose class")
   print(ignore_files_class)
-}else{
+} else {
   ignore_files_class <- NA
 }
 
@@ -132,37 +152,40 @@
 
   ignore_files <- unique(c(ignore_files, ignore_files_class))
   ignore_files <- ignore_files[ignore_files != ""]
-}else{
+} else {
   if (anyNA(ignore_files_class)) {
     ignore_files <- NULL
-  }else{
+  } else {
     ignore_files <- ignore_files_class
   }
-
 }
 
 print("ignore_files")
 print(ignore_files)
 
 
-ppLCMS <- msPurity::purityX(xset = xset,
-                            offsets = c(minOffset, maxOffset),
-                            cores = opt$cores,
-                            xgroups = xgroups,
-                            purityType = opt$purityType,
-                            ilim = opt$ilim,
-                            isotopes = isotopes,
-                            im = im,
-                            iwNorm = iwNorm,
-                            iwNormFun = iwNormFun,
-                            singleFile = opt$singleFile,
-                            fileignore = ignore_files,
-                            rtrawColumns = rtraw_columns)
+ppLCMS <- msPurity::purityX(
+  xset = xset,
+  offsets = c(minOffset, maxOffset),
+  cores = opt$cores,
+  xgroups = xgroups,
+  purityType = opt$purityType,
+  ilim = opt$ilim,
+  isotopes = isotopes,
+  im = im,
+  iwNorm = iwNorm,
+  iwNormFun = iwNormFun,
+  singleFile = opt$singleFile,
+  fileignore = ignore_files,
+  rtrawColumns = rtraw_columns
+)
 
 dfp <- ppLCMS@predictions
 
 # to make compatable with deconrank
-colnames(dfp)[colnames(dfp) == "grpid"] <- "peakID"
+# (keep grpid for other compatibility)
+dfp <- data.frame("peakID"=dfp$grpid, dfp)
+
 colnames(dfp)[colnames(dfp) == "median"] <- "medianPurity"
 colnames(dfp)[colnames(dfp) == "mean"] <- "meanPurity"
 colnames(dfp)[colnames(dfp) == "sd"] <- "sdPurity"
b
diff -r 0cc6b67dccb8 -r efd14b326007 test-data/combineAnnotations_combined_annotations.sqlite
b
Binary file test-data/combineAnnotations_combined_annotations.sqlite has changed
b
diff -r 0cc6b67dccb8 -r efd14b326007 test-data/combineAnnotations_combined_annotations.tsv
--- a/test-data/combineAnnotations_combined_annotations.tsv Tue Feb 08 13:55:12 2022 +0000
+++ b/test-data/combineAnnotations_combined_annotations.tsv Wed Jun 12 16:05:01 2024 +0000
[
b'@@ -1,102 +1,106 @@\n "grpid"\t"grp_name"\t"mz"\t"rt"\t"inchikey"\t"inchi"\t"inchikey"\t"inchikey1"\t"inchikey2"\t"inchikey3"\t"name"\t"exact_mass"\t"molecular_formula"\t"pubchem_cids"\t"kegg_cids"\t"kegg_brite"\t"kegg_drugs"\t"hmdb_ids"\t"hmdb_bio_custom_flag"\t"hmdb_drug_flag"\t"biosim_max_count"\t"biosim_hmdb_ids"\t"fragmentation_acquistion_num"\t"mean_precursor_ion_purity"\t"accession"\t"sirius_score"\t"sirius_wscore"\t"metfrag_score"\t"metfrag_wscore"\t"sm_score"\t"sm_wscore"\t"probmetab_score"\t"probmetab_wscore"\t"ms1_lookup_score"\t"ms1_lookup_wscore"\t"biosim_max_score"\t"biosim_wscore"\t"wscore"\t"rank"\t"adduct_overall"\n 12\t"M116T48"\t116.070597631071\t47.7346706134597\t"ONIBWKKTOPOVIA-UHFFFAOYSA-N"\t"InChI=1S/C5H9NO2/c7-5(8)4-2-1-3-6-4/h4,6H,1-3H2,(H,7,8)"\t"ONIBWKKTOPOVIA-UHFFFAOYSA-N"\t"ONIBWKKTOPOVIA"\t"UHFFFAOYSA"\t"N"\t"DL-Proline"\t115.063328534\t"C5H9NO2"\t"614,25246272"\t"C16435"\t""\t""\t""\t""\t""\t33\t"HMDB0000162"\t"277,343,409,410"\t0.99\t"CCMSLIB00000577898"\t"1.0"\t0.2\t0.9\t0.18\t0.87867085623127\t0.263601256869381\t0\t0\t0\t0\t1\t0.25\t0.893601256869381\t1\t"[M+H]+,M+H"\n-12\t"M116T48"\t116.070597631071\t47.7346706134597\t"ONIBWKKTOPOVIA-AZXPZELESA-N"\t"InChI=1S/C5H9NO2/c7-5(8)4-2-1-3-6-4/h4,6H,1-3H2,(H,7,8)/i4+1"\t"ONIBWKKTOPOVIA-AZXPZELESA-N"\t"ONIBWKKTOPOVIA"\t"AZXPZELESA"\t"N"\t""\t116.066683369\t"C5H9NO2"\t"10290769"\t""\t""\t""\t""\t""\t""\t33\t"HMDB0000162"\t"277,343,409,410"\t0.99\tNA\t"1.0"\t0.2\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0.25\t0.45\t2\t"[M+H]+"\n-12\t"M116T48"\t116.070597631071\t47.7346706134597\t"ONIBWKKTOPOVIA-BYPYZUCNSA-N"\t"InChI=1S/C5H9NO2/c7-5(8)4-2-1-3-6-4/h4,6H,1-3H2,(H,7,8)/t4-/m0/s1"\t"ONIBWKKTOPOVIA-BYPYZUCNSA-N"\t"ONIBWKKTOPOVIA"\t"BYPYZUCNSA"\t"N"\t"L-Pro"\t115.063328534\t"C5H9NO2"\t"145742,6971047"\t"C00148"\t"Compounds with biological roles [BR:br08001]"\t"D00035"\t"HMDB0000162"\t"True"\t"False"\t33\t"HMDB0000162"\t"277,343,409,410"\t0.99\tNA\t"1.0"\t0.2\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0.25\t0.45\t2\t"[M+H]+"\n-12\t"M116T48"\t116.070597631071\t47.7346706134597\t"ONIBWKKTOPOVIA-GIZBTRSZSA-N"\t"InChI=1S/C5H9NO2/c7-5(8)4-2-1-3-6-4/h4,6H,1-3H2,(H,7,8)/t4-/m0/s1/i1+2,2+2,3+2,4+2,5+2"\t"ONIBWKKTOPOVIA-GIZBTRSZSA-N"\t"ONIBWKKTOPOVIA"\t"GIZBTRSZSA"\t"N"\t"L-PROLINE, [U-14C]"\t125.0795385\t"C5H9NO2"\t"12210869"\t""\t""\t""\t""\t""\t""\t33\t"HMDB0000162"\t"277,343,409,410"\t0.99\tNA\t"1.0"\t0.2\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0.25\t0.45\t2\t"[M+H]+"\n-12\t"M116T48"\t116.070597631071\t47.7346706134597\t"ONIBWKKTOPOVIA-GTTLGWSSSA-N"\t"InChI=1S/C5H9NO2/c7-5(8)4-2-1-3-6-4/h4,6H,1-3H2,(H,7,8)/t4-/m0/s1/i1+1D,4+1,6+1/t1?,4-"\t"ONIBWKKTOPOVIA-GTTLGWSSSA-N"\t"ONIBWKKTOPOVIA"\t"GTTLGWSSSA"\t"N"\t"SCHEMBL16945363"\t119.07334984\t"C5H9NO2"\t"118264374"\t""\t""\t""\t""\t""\t""\t33\t"HMDB0000162"\t"277,343,409,410"\t0.99\tNA\t"1.0"\t0.2\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0.25\t0.45\t2\t"[M+H]+"\n-12\t"M116T48"\t116.070597631071\t47.7346706134597\t"ONIBWKKTOPOVIA-HOSYLAQJSA-N"\t"InChI=1S/C5H9NO2/c7-5(8)4-2-1-3-6-4/h4,6H,1-3H2,(H,7,8)/i5+1"\t"ONIBWKKTOPOVIA-HOSYLAQJSA-N"\t"ONIBWKKTOPOVIA"\t"HOSYLAQJSA"\t"N"\t"DL-Proline-1-13C"\t116.066683369\t"C5H9NO2"\t"59340910"\t""\t""\t""\t""\t""\t""\t33\t"HMDB0000162"\t"277,343,409,410"\t0.99\tNA\t"1.0"\t0.2\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0.25\t0.45\t2\t"[M+H]+"\n-12\t"M116T48"\t116.070597631071\t47.7346706134597\t"ONIBWKKTOPOVIA-IDEBNGHGSA-N"\t"InChI=1S/C5H9NO2/c7-5(8)4-2-1-3-6-4/h4,6H,1-3H2,(H,7,8)/i1+1,2+1,3+1,4+1,5+1,6+1"\t"ONIBWKKTOPOVIA-IDEBNGHGSA-N"\t"ONIBWKKTOPOVIA"\t"IDEBNGHGSA"\t"N"\t"SCHEMBL18875607"\t121.0771376\t"C5H9NO2"\t"129148531"\t""\t""\t""\t""\t""\t""\t33\t"HMDB0000162"\t"277,343,409,410"\t0.99\tNA\t"1.0"\t0.2\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0.25\t0.45\t2\t"[M+H]+"\n-12\t"M116T48"\t116.070597631071\t47.7346706134597\t"ONIBWKKTOPOVIA-IXBOUXNVSA-N"\t"InChI=1S/C5H9NO2/c7-5(8)4-2-1-3-6-4/h4,6H,1-3H2,(H,7,8)/t4-/m0/s1/i4+1"\t"ONIBWKKTOPOVIA-IXBOUXNVSA-N"\t"ONIBWKKTOPOVIA"\t"IXBOUXNVSA"\t"N"\t""\t116.066683369\t"C5H9NO2"\t"100915807"\t""\t""\t""\t""\t""\t""\t33\t"HMDB0000162"\t"277,343,409,410"\t0.99\tNA\t"1.0"\t0.2\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0.25\t0.45\t2\t"[M+H]+"\n-12\t"M116T48"\t116.070597631071\t47.7346706134597\t"ONIBWKKTOPOVIA-JGTYJTGKSA-N"\t"InChI=1S/C5H9NO2/c7-5(8)4-2-1-3-6-4/h4,6H,1-3H2,(H,7,8)/t4-/m0/s1/i6+1"\t"ONIBWKKTOPOVIA-JGTYJTGKSA-N"\t"ONIBWKKTOPOVIA"\t"JGTYJTGKSA"\t"N"\t"L-Proline-15N"\t116.06036343\t"C5H9NO2"\t'..b'/i3D2,4D2"\t"RWRDLPDLKQPQOW-KHORGVISSA-N"\t"RWRDLPDLKQPQOW"\t"KHORGVISSA"\t"N"\t"ACM42403258"\t75.098606278\t"C4H9N"\t"12196049"\t""\t""\t""\t""\t""\t""\t1\t"HMDB0031641"\t"277,343,409,410"\t0.99\tNA\t"0.0"\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.8696\t0.2174\t0.2174\t16\t"[M+H]+"\n+12\t"M116T48"\t116.070597631071\t47.7346706134597\t"RWRDLPDLKQPQOW-MNYXATJNSA-N"\t"InChI=1S/C4H9N/c1-2-4-5-3-1/h5H,1-4H2/i/hT"\t"RWRDLPDLKQPQOW-MNYXATJNSA-N"\t"RWRDLPDLKQPQOW"\t"MNYXATJNSA"\t"N"\t""\t73.081723544\t"C4H9N"\t"57750053"\t""\t""\t""\t""\t""\t""\t1\t"HMDB0031641"\t"277,343,409,410"\t0.99\tNA\t"0.0"\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.8696\t0.2174\t0.2174\t16\t"[M+H]+"\n+12\t"M116T48"\t116.070597631071\t47.7346706134597\t"RWRDLPDLKQPQOW-SMZGMGDZSA-N"\t"InChI=1S/C4H9N/c1-2-4-5-3-1/h5H,1-4H2/i3D2"\t"RWRDLPDLKQPQOW-SMZGMGDZSA-N"\t"RWRDLPDLKQPQOW"\t"SMZGMGDZSA"\t"N"\t""\t73.086052786\t"C4H9N"\t"57608710"\t""\t""\t""\t""\t""\t""\t1\t"HMDB0031641"\t"277,343,409,410"\t0.99\tNA\t"0.0"\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.8696\t0.2174\t0.2174\t16\t"[M+H]+"\n+12\t"M116T48"\t116.070597631071\t47.7346706134597\t"RWRDLPDLKQPQOW-LNLMKGTHSA-N"\t"InChI=1S/C4H9N/c1-2-4-5-3-1/h5H,1-4H2/i1D2,2D2"\t"RWRDLPDLKQPQOW-LNLMKGTHSA-N"\t"RWRDLPDLKQPQOW"\t"LNLMKGTHSA"\t"N"\t""\t75.098606278\t"C4H9N"\t"57608709"\t""\t""\t""\t""\t""\t""\t2\t"HMDB0031641"\t"277,343,409,410"\t0.99\tNA\t"0.0"\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.8333\t0.208325\t0.208325\t17\t"[M+H]+"\n+12\t"M116T48"\t116.070597631071\t47.7346706134597\t"RWRDLPDLKQPQOW-QAOQSSEZSA-N"\t"InChI=1S/C4H9N/c1-2-4-5-3-1/h5H,1-4H2/i1D,3D2"\t"RWRDLPDLKQPQOW-QAOQSSEZSA-N"\t"RWRDLPDLKQPQOW"\t"QAOQSSEZSA"\t"N"\t""\t74.092329532\t"C4H9N"\t"90927493"\t""\t""\t""\t""\t""\t""\t2\t"HMDB0031641"\t"277,343,409,410"\t0.99\tNA\t"0.0"\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.8\t0.2\t0.2\t18\t"[M+H]+"\n+12\t"M116T48"\t116.070597631071\t47.7346706134597\t"RWRDLPDLKQPQOW-SVYQBANQSA-N"\t"InChI=1S/C4H9N/c1-2-4-5-3-1/h5H,1-4H2/i1D2,2D2,3D2,4D2"\t"RWRDLPDLKQPQOW-SVYQBANQSA-N"\t"RWRDLPDLKQPQOW"\t"SVYQBANQSA"\t"N"\t"Pyrrolidine-2,2,3,3,4,4,5,5-d8"\t79.123713262\t"C4H9N"\t"12196044"\t""\t""\t""\t""\t""\t""\t2\t"HMDB0031641"\t"277,343,409,410"\t0.99\tNA\t"0.0"\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.8\t0.2\t0.2\t18\t"[M+H]+"\n+12\t"M116T48"\t116.070597631071\t47.7346706134597\t"RWRDLPDLKQPQOW-HJOWPTDZSA-N"\t"InChI=1S/C4H9N/c1-2-4-5-3-1/h5H,1-4H2/i1D2,2D2,3D,4D2/hD"\t"RWRDLPDLKQPQOW-HJOWPTDZSA-N"\t"RWRDLPDLKQPQOW"\t"HJOWPTDZSA"\t"N"\t"pyrrolidine-d8"\t79.123713262\t"C4H9N"\t"129715569"\t""\t""\t""\t""\t""\t""\t1\t"HMDB0031641"\t"277,343,409,410"\t0.99\tNA\t"0.0"\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.7143\t0.178575\t0.178575\t19\t"[M+H]+"\n+12\t"M116T48"\t116.070597631071\t47.7346706134597\t"RWRDLPDLKQPQOW-KLRAWXKOSA-N"\t"InChI=1S/C4H9N/c1-2-4-5-3-1/h5H,1-4H2/i1D2,2D2,3D2,4D2/hD"\t"RWRDLPDLKQPQOW-KLRAWXKOSA-N"\t"RWRDLPDLKQPQOW"\t"KLRAWXKOSA"\t"N"\t""\t80.12999001\t"C4H9N"\t"60135501"\t""\t""\t""\t""\t""\t""\t1\t"HMDB0031641"\t"277,343,409,410"\t0.99\tNA\t"0.0"\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.7143\t0.178575\t0.178575\t19\t"[M+H]+"\n+12\t"M116T48"\t116.070597631071\t47.7346706134597\t"MWFMGBPGAXYFAR-UHFFFAOYSA-N"\t"InChI=1S/C4H7NO/c1-4(2,6)3-5/h6H,1-2H3"\t"MWFMGBPGAXYFAR-UHFFFAOYSA-N"\t"MWFMGBPGAXYFAR"\t"UHFFFAOYSA"\t"N"\t"ACETONE CYANOHYDRIN"\t85.05276385\t"C4H7NO"\t"6406"\t"C02659"\t""\t""\t"HMDB0060427"\t"False"\t"False"\t0\t"HMDB0031456"\t"277,343,409,410"\t0.99\tNA\t"0"\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.6923\t0.173075\t0.173075\t20\t"[M+H]+"\n+12\t"M116T48"\t116.070597631071\t47.7346706134597\t"RRUDCFGSUDOHDG-UHFFFAOYSA-N"\t"InChI=1S/C2H5NO2/c1-2(4)3-5/h5H,1H3,(H,3,4)"\t"RRUDCFGSUDOHDG-UHFFFAOYSA-N"\t"RRUDCFGSUDOHDG"\t"UHFFFAOYSA"\t"N"\t"acetohydroxamic acid"\t75.032028405\t"C2H5NO2"\t"1990"\t"C06808"\t"Anatomical Therapeutic Chemical (ATC) classification [BR:br08303]"\t"D00220"\t"HMDB0014691"\t"False"\t"True"\t0\t"HMDB0003338"\t"277,343,409,410"\t0.99\tNA\t"0"\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.5455\t0.136375\t0.136375\t21\t"[M+H]+"\n 27\t"M132T74"\t132.101827517612\t73.554846736017\t"AGPKZVBTJJNPAG-UHFFFAOYSA-N"\t"InChI=1S/C6H13NO2/c1-3-4(2)5(7)6(8)9/h4-5H,3,7H2,1-2H3,(H,8,9)"\t"AGPKZVBTJJNPAG-UHFFFAOYSA-N"\t"AGPKZVBTJJNPAG"\t"UHFFFAOYSA"\t"N"\t"2-ammonio-3-methylpentanoate"\t131.09462866\t"C6H13NO2"\t"791,57397079"\t"C16434"\t""\t""\t"HMDB0033923"\t"True"\t"False"\t18\t"HMDB0000172"\t"478,547,616,475,541,607"\t1\t"CE000616"\t"0"\t0\t0\t0\t0.940890528192141\t0.282267158457642\t0\t0\t0\t0\t1\t0.25\t0.532267158457642\t1\t"[M+H]+"\n'
b
diff -r 0cc6b67dccb8 -r efd14b326007 test-data/purityX_output.RData
b
Binary file test-data/purityX_output.RData has changed
b
diff -r 0cc6b67dccb8 -r efd14b326007 test-data/purityX_output.tsv
--- a/test-data/purityX_output.tsv Tue Feb 08 13:55:12 2022 +0000
+++ b/test-data/purityX_output.tsv Wed Jun 12 16:05:01 2024 +0000
b
@@ -0,0 +1,16 @@
+"peakID" "grpid" "meanPurity" "medianPurity" "sdPurity" "sdePurity" "cvPurity" "pknmPurity" "i" "mz"
+1 1 1 1 0 0 0 1 61925043.3566382 102.091560881174
+2 2 1 1 0 0 0 1 25719001.4822415 103.054435681307
+3 3 1 1 0 0 0 1 3791623.64689146 103.094714179046
+4 4 1 1 0 0 0 1 701844279.87476 104.107044269271
+5 5 0.753862395887935 1 0.49227520822413 0.246137604112065 65.3004064016622 1 79111202.7215235 105.051698129502
+6 6 0.849777173964078 0.846538530688927 0.0251183222605269 0.0125591611302635 2.95587161318465 2 5735625.74118929 106.062931911485
+7 7 0.846273037079121 0.84212559075998 0.0578505242577427 0.0289252621288714 6.83591721856241 2.25 2124533.28227443 107.050950030548
+8 8 1 1 0 0 0 1 1965138.01789635 108.006429726299
+9 9 1 1 0 0 0 1 362848.770317471 108.055140508271
+10 10 1 1 0 0 0 1 25053032.2987587 110.071444659587
+11 11 0.657159557553984 0.66469615675609 0.396069303079897 0.198034651539948 60.269884007182 3 8168571.28564153 111.020205687498
+12 12 0.986752431352875 0.987973697141727 0.0154264299310987 0.00771321496554933 1.56335362761138 1.25 21655562.9118763 112.043935634745
+13 13 0.969931320369093 0.973488648484343 0.035202929071034 0.017601464535517 3.6294249223376 1.5 16160213.3493717 112.923083776231
+14 14 1 1 0 0 0 1 82880594.6142923 113.035283604395
+15 15 0.613847507731082 0.613847507731082 0.00533923101597845 0.0037754064577199 0.869797620538273 4 3386134.01927015 114.037770729384