Mercurial > repos > workflow4metabolomics > ms2snoop

diff MS2snoop.R @ 6:77abacd33c31 draft
planemo upload commit 1f791337b9b8f08326c01bf4799f50493ef54f80
author: workflow4metabolomics
date: Fri, 30 Sep 2022 16:18:56 +0000
parents: 78d5a12406c2
children: 2a1f120a6874
--- a/MS2snoop.R	Fri Aug 05 17:25:45 2022 +0000
+++ b/MS2snoop.R	Fri Sep 30 16:18:56 2022 +0000
@@ -1,1236 +1,1233 @@
-#'
-#' read and process mspurity W4M files
-#' create a summary of fragment for each precursor and a graphics of peseudo
-#' spectra + correlation on which checking of fragment is based on
-#' V3 try to identify and process multiple files for 1 precursor which may
-#' occur if different collision energy are used
-#' V4 elimination of correlation = NA. Correlation is done with precursor, if
-#' precursor is not present correlation with most intense peak
-#' author: Jean-Francois Martin
-#' V5 is versionned, lintR-compliant, packaged, unit-tested, documented and
-#' tested against data from other labs.
-#' new maintainer: Lain Pavot - lain.pavot@inrae.fr
-#'
-#' @import optparse
-#'
-
-
-get_version <- function() {
-  cmd <- commandArgs(trailingOnly = FALSE)
-  root <- dirname(gsub("--file=", "", cmd[grep("--file=", cmd)]))
-  readme <- readLines(file.path(root, "README.md"))
-  version_line <- readme[grepl(" * **@version**: ", readme, fixed = TRUE)]
-  return(gsub(".*: ", "", version_line))
-}
-
-defaults <- list(
-  MS2SNOOP_VERSION = get_version(),
-  MISSING_PARAMETER_ERROR = 1,
-  BAD_PARAMETER_VALUE_ERROR = 2,
-  MISSING_INPUT_FILE_ERROR = 3,
-  NO_ANY_RESULT_ERROR = 255,
-  DEFAULT_PRECURSOR_PATH = NULL,
-  DEFAULT_FRAGMENTS_PATH = NULL,
-  DEFAULT_COMPOUNDS_PATH = NULL,
-  DEFAULT_OUTPUT_PATH = "compound_fragments_result.txt",
-  DEFAULT_TOLMZ = 0.01,
-  DEFAULT_TOLRT = 20,
-  DEFAULT_MZDECIMAL = 3,
-  DEFAULT_R_THRESHOLD = 0.85,
-  DEFAULT_MINNUMBERSCAN = 8,
-  DEFAULT_SEUIL_RA = 0.05,
-  DEFAULT_FRAGMENTS_MATCH_DELTA = 10,
-  DEFAULT_FRAGMENTS_MATCH_DELTA_UNIT = "ppm",
-  DEFAULT_PDF_PATH = ""
-)
-env <- globalenv()
-for (default in names(defaults)) {
-  assign(default, defaults[[default]], envir = env)
-  lockBinding(default, env)
-}
-
-########################################################################
-
-get_formulas <- function(
-  mzref,
-  spectra,
-  nominal_mz_list,
-  processing_parameters,
-  background = !TRUE
-) {
-  if (is.vector(mzref) && length(mzref) > 1) {
-    return(lapply(
-      mzref,
-      function(mz) {
-        return(get_formulas(
-          mzref = mz,
-          spectra = spectra,
-          nominal_mz_list = nominal_mz_list,
-          processing_parameters = processing_parameters,
-          background = background
-        ))
-      }
-    ))
-  }
-  input <- sprintf(
-    "%s-%s.ms",
-    gsub("[[:space:]]", "_", processing_parameters$c_name),
-    mzref
-  )
-  create_ms_file(input, mzref, spectra, processing_parameters)
-  output <- sprintf(
-    "out/%s-%s.out",
-    gsub("[[:space:]]", "_", processing_parameters$c_name),
-    mzref
-  )
-  command <- sprintf(
-    paste(
-      "sirius",
-      "--noCite",
-      "--noSummaries",
-      "--loglevel=WARNING",
-      "-i='%s'",
-      "-o='%s'",
-      "tree",
-      ## loglevel is not working taken into account during
-      ## sirius startup, so we filter outputs...
-      "2>&1 | grep '^(WARNING|SEVERE)'"
-    ),
-    input,
-    output
-  )
-  verbose_catf(
-    ">> Sirius is running %swith the command: %s\n",
-    if (background) "in the background " else "",
-    command
-  )
-  system(
-    command,
-    wait = !background,
-    ignore.stdout = background,
-    ignore.stderr = background
-  )
-  return(extract_sirius_results(output, spectra$mz, processing_parameters))
-}
-
-create_ms_file <- function(
-  path,
-  mzref,
-  spectra,
-  processing_parameters
-) {
-  file_content <- paste(
-    sprintf(">compound %s", processing_parameters$c_name),
-    sprintf(">ionization %s", processing_parameters$ionization),
-    sprintf(">parentmass %s", mzref),
-    sprintf(">formula %s", processing_parameters$elemcomposition),
-    sep = "\n"
-  )
-  displayed_file_content <- sprintf(
-    "%s\n>collision\n%s",
-    file_content,
-    paste(
-      sprintf(
-        "%s %s",
-        spectra[1:3, "mz"],
-        spectra[1:3, "intensities"]
-      ),
-      collapse = "\n"
-    )
-  )
-  if (nrow(spectra) > 3) {
-    displayed_file_content <- sprintf(
-      "%s\n[... %s more rows of mz and intensities ...]",
-      displayed_file_content,
-      nrow(spectra) - 3
-    )
-  }
-  catf(
-    ">> MS file created for %s with content:\n%s\n",
-    processing_parameters$c_name,
-    displayed_file_content
-  )
-  file_content <- sprintf(
-    "%s\n\n>collision\n%s",
-    file_content,
-    paste(
-      sprintf("%s %s", spectra$mz, spectra$intensities),
-      collapse = "\n"
-    )
-  )
-  cat(file_content, file = path, append = FALSE)
-}
-
-extract_sirius_results <- function(
-  output,
-  mz_list,
-  processing_parameters
-) {
-
-  delta <- processing_parameters$fragment_match_delta
-  delta_unit <- tolower(processing_parameters$fragment_match_delta_unit)
-
-  output <- list.dirs(output, recursive = FALSE)[[1]]
-
-  spectra_out_dir <- sprintf("%s/spectra", output)
-  spectra_filename <- sprintf(
-    "%s/%s",
-    spectra_out_dir,
-    list.files(spectra_out_dir)[[1]]
-  )
-
-  trees_out_dir <- sprintf("%s/trees", output)
-  trees_filename <- sprintf(
-    "%s/%s",
-    trees_out_dir,
-    list.files(trees_out_dir)[[1]]
-  )
-
-  if (!is.null(spectra_filename)) {
-    sirius_results <- get_csv_or_tsv(spectra_filename)
-  } else {
-    return(rep(NA, length(mz_list)))
-  }
-  if (!is.null(trees_filename)) {
-    sirius_results <- cbind(sirius_results, extract_sirius_ppm(trees_filename))
-  } else {
-    return(rep(NA, length(mz_list)))
-  }
-
-  fragment_matchings <- data.frame(
-    formula = NA,
-    ppm = NA,
-    mz = mz_list,
-    error = NA
-  )
-
-  sirius_results <- filter_sirius_with_delta(
-    sirius_results = sirius_results,
-    original_mz = fragment_matchings$mz,
-    delta = delta,
-    delta_unit = delta_unit
-  )
-
-  for (index in seq_len(nrow(sirius_results))) {
-    result <- sirius_results[index, ]
-    filter <- which(order(abs(fragment_matchings$mz - result$mz)) == 1)
-    fragment_matchings[filter, "formula"] <- result$formula
-    fragment_matchings[filter, "ppm"] <- result$ppm
-    catf(
-      "[OK] Fragment with m/z=%s matches %s with a difference of %s ppm\n",
-      fragment_matchings[filter, "mz"], result$formula, result$ppm
-    )
-  }
-  return(fragment_matchings)
-}
-
-filter_sirius_with_delta <- function(
-  sirius_results,
-  original_mz,
-  delta,
-  delta_unit
-) {
-  if (is.numeric(delta) && !is.na(delta) && delta > 0) {
-    if (delta_unit == "ppm") {
-      filter <- abs(sirius_results$ppm) <= delta
-      fine <- which(filter)
-      not_fine <- which(!filter)
-      catf(
-        paste("[KO] fragment %s (m/z=%s) eleminated because ppm=%s is greater",
-          "than delta=%s\n"
-        ),
-        sirius_results[not_fine, ]$formula,
-        sirius_results[not_fine, ]$mz,
-        sirius_results[not_fine, ]$ppm,
-        delta
-      )
-      sirius_results <- sirius_results[fine, ]
-    } else if (delta_unit == "mz") {
-      differences <- sapply(
-        sirius_results$mz,
-        function(mz) min(abs(original_mz - mz))
-      )
-      fine <- which(sapply(
-        sirius_results$mz,
-        function(mz) any(abs(original_mz - mz) <= delta)
-      ))
-      not_fine <- which(sapply(
-        sirius_results$mz,
-        function(mz) all(abs(original_mz - mz) > delta)
-      ))
-      catf(
-        paste(
-          "[KO] fragment %s eleminated because mz difference=%s is",
-          "greater than delta=%s\n"
-        ),
-        sirius_results[not_fine, ]$formula,
-        differences[not_fine],
-        delta
-      )
-      sirius_results <- sirius_results[fine, ]
-    }
-  }
-  return(sirius_results)
-}
-
-extract_sirius_ppm <- function(path) {
-  json <- file(path, "r")
-  suppressWarnings(json_lines <- readLines(json))
-  close(json)
-  json_lines <- json_lines[
-    grepl("\\s+\"(massDeviation|recalibratedMass)\" :", json_lines)
-  ]
-  json_lines <- gsub("^\\s+\"[^\"]+\" : \"?", "", json_lines)
-  ppms <- json_lines[seq(1, length(json_lines), 2)]
-  mz <- json_lines[seq(2, length(json_lines), 2)]
-  ppms <- as.numeric(gsub(" ppm .*", "", ppms))
-  mz <- as.numeric(gsub(",$", "", mz))
-  ordered <- order(mz)
-  return(list(ppm = ppms[ordered], recalibrated_mz = mz[ordered]))
-}
-
-#' @title plot_pseudo_spectra
-#' @param x
-#' @param fid
-#' @param sum_int
-#' @param vmz
-#' @param cor_abs_int
-#' @param refcol
-#' @param c_name
-#' @description plot_pseudo_spectra
-#' function to compute sum of intensities among scans for all
-#' m/z kept (cor > r_threshold & minimum number of scans)
-#' and plot pseudo spectra
-#' x dataframe scan X fragments with scans number in the 1st column and
-#' ions in next with intensities
-#' fid file id when several a precursor has been detected in several files
-plot_pseudo_spectra <- function(
-  x,
-  fid,
-  sum_int,
-  vmz,
-  cor_abs_int,
-  refcol,
-  meaned_mz,
-  processing_parameters
-) {
-  ## du fait de la difference de nombre de colonne entre la dataframe qui
-  ## inclue les scans en 1ere col, mzRef se decale de 1
-  refcol <- refcol - 1
-  ## compute relative intensities max=100%
-  rel_int <- sum_int[-1]
-  rel_int <- rel_int / max(rel_int)
-
-  if (processing_parameters$do_pdf) {
-    ## define max value on vertical axis (need to increase in order to plot the
-    ## label of fragments)
-    ymax <- max(rel_int) + 0.2 * max(rel_int)
-
-    par(mfrow = c(2, 1))
-    plot(vmz, rel_int, type = "h", ylim = c(0, ymax),
-      main = processing_parameters$c_name
-    )
-    ## low correl coef. will be display in grey
-    cor_low <- which(round(cor_abs_int, 2) < processing_parameters$r_threshold)
-
-    lbmzcor <- sprintf("%s(r=%s)", vmz, round(cor_abs_int, 2))
-
-    if (length(cor_low) > 0) {
-      text(
-        vmz[cor_low],
-        rel_int[cor_low],
-        lbmzcor[cor_low],
-        cex = 0.5,
-        col = "grey",
-        srt = 90,
-        adj = 0
-      )
-      if (length(vmz) - length(cor_low) > 1) {
-        text(
-          vmz[-c(refcol, cor_low)],
-          rel_int[-c(refcol, cor_low)],
-          lbmzcor[-c(refcol, cor_low)],
-          cex = 0.6,
-          col = 1,
-          srt = 90,
-          adj = 0
-        )
-      }
-    } else {
-      if (length(vmz) > 1) {
-        text(
-          vmz[-c(refcol)],
-          rel_int[-c(refcol)],
-          lbmzcor[-c(refcol)],
-          cex = 0.6,
-          col = 1,
-          srt = 90,
-          adj = 0
-        )
-      }
-    }
-
-    text(
-      vmz[refcol],
-      rel_int[refcol],
-      lbmzcor[refcol],
-      cex = 0.8,
-      col = 2,
-      srt = 90,
-      adj = 0
-    )
-  }
-
-  ## prepare result file
-  cor_valid <- (round(cor_abs_int, 2) >= processing_parameters$r_threshold)
-
-  do_sirius <- TRUE
-  verbose_catf("Checking sirius parameters...\n")
-  if (is.null(processing_parameters$ionization)) {
-    do_sirius <- FALSE
-    verbose_catf("[KO] No ionization passed in parameter.\n")
-  } else {
-    verbose_catf("[OK] Ionization=%s.\n", processing_parameters$ionization)
-  }
-  if (is.na(processing_parameters$elemcomposition)) {
-    do_sirius <- FALSE
-    verbose_catf("[KO] Elemental composition is NA.\n")
-  } else if (length(processing_parameters$elemcomposition) < 1) {
-    do_sirius <- FALSE
-    verbose_catf("[KO] No elemental composition is provided.\n")
-  } else if (processing_parameters$elemcomposition == "") {
-    do_sirius <- FALSE
-    verbose_catf("[KO] Elemental composition is an empty string.\n")
-  } else {
-    verbose_catf(
-      "[OK] Elemental composition=%s.\n",
-      processing_parameters$elemcomposition
-    )
-  }
-
-  cp_res_length <- length(vmz)
-  ppm <- rep(NA, cp_res_length)
-  formulas <- rep(NA, cp_res_length)
-  if (do_sirius) {
-    verbose_catf("Everything is ok, preparing for sirius.\n")
-    formulas <- get_formulas(
-      mzref = processing_parameters$mzref,
-      spectra = data.frame(mz = meaned_mz, intensities = sum_int[-1]),
-      nominal_mz_list = vmz,
-      processing_parameters = processing_parameters
-    )
-    if (nrow(formulas) == 0) {
-      catf("No formula found.\n")
-    } else {
-      ppm <- formulas$ppm
-      formulas <- formulas$formula
-      catf(
-        "Found %s formula for %s fragments\n",
-        length(formulas[which(!(is.na(formulas)))]),
-        cp_res_length
-      )
-    }
-  } else {
-    verbose_catf("Sirius cannot be run.\n")
-  }
-  cp_res <- data.frame(
-    rep(processing_parameters$c_name, cp_res_length),
-    rep(processing_parameters$inchikey, cp_res_length),
-    rep(processing_parameters$elemcomposition, cp_res_length),
-    formulas,
-    vmz,
-    ppm,
-    rep(fid, cp_res_length),
-    cor_abs_int,
-    sum_int[-1],
-    rel_int,
-    cor_valid
-  )
-
-  colnames(cp_res) <- c(
-    "compoundName",
-    "inchikey",
-    "elemcomposition",
-    "fragment",
-    "fragment_mz",
-    "ppm",
-    "fileid",
-    "CorWithPrecursor",
-    "AbsoluteIntensity",
-    "relativeIntensity",
-    "corValid"
-  )
-  return(cp_res)
-}
-
-#'
-#' @title extract_fragments
-#'
-#' @param precursors the precursor list from mspurity
-#' @param fragments the fragments list from ms purity
-# ' @param mzref
-# ' @param rtref
-# ' @param c_name
-# ' @param inchikey
-# ' @param elemcomposition
-#' @param processing_parameters
-#' @returns
-#'
-#' @description
-#' function for extraction of fragments corresponding to precursors
-#' detected by MSPurity
-extract_fragments <- function( ## nolint cyclocomp_linter
-  precursors,
-  fragments,
-  processing_parameters
-) {
-  ## filter precursor in the precursors file based on mz and rt in the
-  ## compound list
-  catf("processing %s\n", processing_parameters$c_name)
-  verbose_catf("===\n")
-  param <- processing_parameters
-  selected_precursors <- which(
-    (abs(precursors$precurMtchMZ - param$mzref) <= param$tolmz)
-    & (abs(precursors$precurMtchRT - param$rtref) <= param$tolrt)
-  )
-  rm(param)
-
-  verbose_catf(
-    "> %s precursors selected with mz=%s±%s and rt=%s±%s\n",
-    length(selected_precursors),
-    processing_parameters$mzref,
-    processing_parameters$tolmz,
-    processing_parameters$rtref,
-    processing_parameters$tolrt
-  )
-
-  ## check if there is the precursor in the file
-
-  if (length(selected_precursors) < 1) {
-    cat("> non detected in precursor file\n")
-    show_end_processing()
-    return(NULL)
-  }
-
-  precursors <- precursors[selected_precursors, ]
-
-  ## check if fragments corresponding to precursor are found in several
-  ## files (collision energy)
-  ## this lead to a processing for each fileid
-  file_ids <- as.character(sort(unique(precursors$fileid)))
-  if (length(file_ids) > 1) {
-    catf("> several files detected for this compounds :\n")
-  } else if (length(file_ids) < 1 || nrow(precursors) < 1) {
-    return(data.frame())
-  }
-
-  res_comp <- data.frame()
-  for (curent_file_id in file_ids) {
-    curent_precursors <- precursors[precursors$fileid == curent_file_id, ]
-    selected_fragments <- fragments[
-      fragments$grpid %in% as.character(curent_precursors$grpid)
-      & fragments$fileid == curent_file_id,
-    ]
-    filtered_fragments <- selected_fragments[
-      selected_fragments$ra > processing_parameters$seuil_ra,
-    ]
-    if (nrow(filtered_fragments) != 0) {
-      res_comp_by_file <- process_file(
-        curent_file_id = curent_file_id,
-        precursor_mz = curent_precursors$mz,
-        filtered_fragments = filtered_fragments,
-        processing_parameters = processing_parameters
-      )
-      if (!is.null(res_comp_by_file)) {
-        res_comp <- rbind(res_comp, res_comp_by_file)
-      }
-    } else {
-      catf("No fragment found for in fragment file\n")
-    }
-  }
-  return(unique(res_comp))
-}
-
-process_file <- function(
-  curent_file_id,
-  precursor_mz,
-  filtered_fragments,
-  processing_parameters
-) {
-  mznominal <- round(x = filtered_fragments$mz, digits = 0)
-  meaned_mz <- round(
-    aggregate(
-      data.frame(
-        mz = filtered_fragments$mz,
-        mznominal = mznominal
-      ),
-      list(mznominal),
-      FUN = mean
-    )$mz,
-    digits = processing_parameters$mzdecimal
-  )
-  filtered_fragments <- data.frame(filtered_fragments, mznominal)
-
-  ## creation of cross table row=scan col=mz X=ra
-
-  vmz <- as.character(sort(unique(filtered_fragments$mznominal)))
-
-  ds_abs_int <- create_ds_abs_int(vmz, filtered_fragments)
-
-  if (global_debug) {
-    print(ds_abs_int)
-  }
-
-  ## elimination of mz with less than min_number_scan scans (user defined
-  ## parameter)
-  xmz <- rep(NA, ncol(ds_abs_int) - 1)
-  sum_int <- rep(NA, ncol(ds_abs_int))
-  nbxmz <- 0
-  nb_scan_check <- min(nrow(ds_abs_int), processing_parameters$min_number_scan)
-
-  for (j in 2:ncol(ds_abs_int)) {
-    sum_int[j] <- sum(ds_abs_int[j], na.rm = TRUE)
-    if (sum(!is.na(ds_abs_int[[j]])) < nb_scan_check) {
-      nbxmz <- nbxmz + 1
-      xmz[nbxmz] <- j
-    }
-  }
-
-  xmz <- xmz[-which(is.na(xmz))]
-  if (length(xmz) > 0) {
-    ds_abs_int <- ds_abs_int[, -c(xmz)]
-    sum_int <- sum_int[-c(xmz)]
-    ## liste des mz keeped decale de 1 avec ds_abs_int
-    vmz <- as.numeric(vmz[-c(xmz - 1)])
-    meaned_mz <- meaned_mz[-c(xmz - 1)]
-  }
-
-  ## mz of precursor in data precursor to check correlation with
-  mz_prec <- paste0(
-    "mz",
-    round(mean(precursor_mz), processing_parameters$mzdecimal)
-  )
-  ## reference ion for correlation computing = precursor OR maximum
-  ## intensity ion in precursor is not present
-  refcol <- which(colnames(ds_abs_int) == mz_prec)
-  if (length(refcol) == 0) {
-    refcol <- which(sum_int == max(sum_int, na.rm = TRUE))
-  }
-
-  if (processing_parameters$do_pdf) {
-    start_pdf(processing_parameters, curent_file_id)
-  }
-
-  ## Pearson correlations between absolute intensities computing
-  cor_abs_int <- rep(NA, length(vmz))
-
-  if (length(refcol) > 0) {
-    for (i in 2:length(ds_abs_int)) {
-      cor_abs_int[i - 1] <- stats::cor(
-        x = ds_abs_int[[refcol]],
-        y = ds_abs_int[[i]],
-        use = "pairwise.complete.obs",
-        method = "pearson"
-      )
-      debug_catf(
-        "Correlation between %s and %s: %s\n",
-        paste(ds_abs_int[[refcol]], collapse = ";"),
-        paste(ds_abs_int[[i]], collapse = ";"),
-        paste(cor_abs_int[i - 1], collapse = ";")
-      )
-      if (processing_parameters$do_pdf) {
-        pdf_plot_ds_abs_int(
-          processing_parameters$c_name,
-          ds_abs_int,
-          refcol,
-          i,
-          round(cor_abs_int[i - 1], 2)
-        )
-      }
-    }
-    ## plot pseudo spectra
-    res_comp_by_file <- plot_pseudo_spectra(
-      x = ds_abs_int,
-      fid = curent_file_id,
-      sum_int = sum_int,
-      vmz = vmz,
-      cor_abs_int = cor_abs_int,
-      refcol = refcol,
-      meaned_mz = meaned_mz,
-      processing_parameters = processing_parameters
-    )
-    catf(
-      "%s has been processed and %s fragments have been found.\n",
-      processing_parameters$c_name,
-      nrow(res_comp_by_file)
-    )
-  } else {
-    res_comp_by_file <- NULL
-    cat(">> non detected in fragments file \n")
-  }
-  show_end_processing()
-  if (processing_parameters$do_pdf) {
-    end_pdf()
-  }
-  return(res_comp_by_file)
-}
-
-create_ds_abs_int <- function(vmz, filtered_fragments) {
-  verbose_catf(
-    ">> fragments: %s\n",
-    paste(vmz, collapse = " ")
-  )
-  ds_abs_int <- create_int_mz(vmz[1], filtered_fragments)
-  for (mz in vmz[-1]) {
-    int_mz <- create_int_mz(mz, filtered_fragments)
-    ds_abs_int <- merge(
-      x = ds_abs_int,
-      y = int_mz,
-      by.x = 1,
-      by.y = 1,
-      all.x = TRUE,
-      all.y = TRUE
-    )
-  }
-  return(ds_abs_int)
-}
-
-create_int_mz <- function(mz, filtered_fragments) {
-  ## absolute intensity
-  int_mz <- filtered_fragments[
-    filtered_fragments$mznominal == mz,
-    c("acquisitionNum", "i")
-  ]
-  colnames(int_mz)[2] <- paste0("mz", mz)
-  ## average intensities of mass in duplicate scans
-  comp_scans <- aggregate(x = int_mz, by = list(int_mz[[1]]), FUN = mean)
-  return(comp_scans[, -1])
-}
-
-show_end_processing <- function() {
-  verbose_catf("==========\n")
-  cat("\n")
-}
-
-start_pdf <- function(processing_parameters, curent_file_id) {
-  if (!dir.exists(processing_parameters$pdf_path)) {
-    dir.create(processing_parameters$pdf_path, recursive = TRUE)
-  }
-  pdf(
-    file = sprintf(
-      "%s/%s_processing_file%s.pdf",
-      processing_parameters$pdf_path,
-      processing_parameters$c_name,
-      curent_file_id
-    ),
-    width = 8,
-    height = 11
-  )
-  par(mfrow = c(3, 2))
-}
-
-pdf_plot_ds_abs_int <- function(c_name, ds_abs_int, refcol, i, r_coef) {
-  plot(
-    ds_abs_int[[refcol]],
-    ds_abs_int[[i]],
-     xlab = colnames(ds_abs_int)[refcol],
-     ylab = colnames(ds_abs_int)[i],
-     main = sprintf(
-      "%s corr coeff r=%s", c_name, r_coef
-    )
-  )
-}
-end_pdf <- function() {
-  dev.off()
-}
-
-set_global <- function(var, value) {
-  assign(var, value, envir = globalenv())
-}
-
-set_debug <- function() {
-  set_global("global_debug", TRUE)
-}
-
-unset_debug <- function() {
-  set_global("global_debug", FALSE)
-}
-
-set_verbose <- function() {
-  set_global("global_verbose", TRUE)
-}
-
-unset_verbose <- function() {
-  set_global("global_verbose", FALSE)
-}
-
-verbose_catf <- function(...) {
-  if (global_verbose) {
-    cat(sprintf(...), sep = "")
-  }
-}
-
-
-debug_catf <- function(...) {
-  if (global_debug) {
-    cat(sprintf(...), sep = "")
-  }
-}
-
-catf <- function(...) {
-  cat(sprintf(...), sep = "")
-}
-
-create_parser <- function() {
-  parser <- optparse::OptionParser()
-  parser <- optparse::add_option(
-    parser,
-    c("-v", "--verbose"),
-    action = "store_true",
-    default = FALSE,
-    help = paste(
-      "[default %default]",
-      "Print extra output"
-    )
-  )
-  parser <- optparse::add_option(
-    parser,
-    c("-V", "--version"),
-    action = "store_true",
-    default = FALSE,
-    help = "Prints version and exits"
-  )
-  parser <- optparse::add_option(
-    parser,
-    c("-d", "--debug"),
-    action = "store_true",
-    default = FALSE,
-    help = paste(
-      "[default %default]",
-      "Print debug outputs"
-    )
-  )
-  parser <- optparse::add_option(
-    parser,
-    c("-o", "--output"),
-    type = "character",
-    default = DEFAULT_OUTPUT_PATH,
-    action = "store",
-    help = "Path to the output file [default %default]"
-  )
-  parser <- optparse::add_option(
-    parser,
-    c("-p", "--precursors"),
-    type = "character",
-    default = DEFAULT_PRECURSOR_PATH,
-    action = "store",
-    help = "Path to the precursors file [default %default]"
-  )
-  parser <- optparse::add_option(
-    parser,
-    c("-f", "--fragments"),
-    type = "character",
-    default = DEFAULT_FRAGMENTS_PATH,
-    action = "store",
-    help = "Path to the fragments file [default %default]"
-  )
-  parser <- optparse::add_option(
-    parser,
-    c("-c", "--compounds"),
-    type = "character",
-    default = DEFAULT_COMPOUNDS_PATH,
-    action = "store",
-    help = "Path to the compounds file [default %default]"
-  )
-  parser <- optparse::add_option(
-    parser,
-    c("--tolmz"),
-    type = "numeric",
-    action = "store",
-    default = DEFAULT_TOLMZ,
-    metavar = "number",
-    help = paste(
-      "[default %default]",
-      "Tolerance for MZ (in Dalton) to match the standard in the compounds"
-    )
-  )
-  parser <- optparse::add_option(
-    parser,
-    c("--tolrt"),
-    type = "integer",
-    action = "store",
-    default = DEFAULT_TOLRT,
-    metavar = "number",
-    help = paste(
-      "[default %default]",
-      "RT (in seconds) to match the standard in the compounds"
-    )
-  )
-  parser <- optparse::add_option(
-    parser,
-    c("--seuil_ra"),
-    type = "numeric",
-    action = "store",
-    default = DEFAULT_SEUIL_RA,
-    metavar = "number",
-    help = paste(
-      "[default %default]",
-      "relative intensity threshold"
-    ),
-  )
-  parser <- optparse::add_option(
-    parser,
-    c("--mzdecimal"),
-    type = "integer",
-    default = DEFAULT_MZDECIMAL,
-    action = "store",
-    help = paste(
-      "[default %default]",
-      "Number of decimal to write for MZ"
-    ),
-    metavar = "number"
-  )
-  parser <- optparse::add_option(
-    parser,
-    c("--r_threshold"),
-    type = "integer",
-    default = DEFAULT_R_THRESHOLD,
-    action = "store",
-    help = paste(
-      "[default %default]",
-      "R-Pearson correlation threshold between precursor and fragment",
-      "absolute intensity"
-    ),
-    metavar = "number"
-  )
-  parser <- optparse::add_option(
-    parser,
-    c("--min_number_scan"),
-    type = "numeric",
-    action = "store",
-    default = DEFAULT_MINNUMBERSCAN,
-    help = paste(
-      "[default %default]",
-      "Fragments are kept if there are found in a minimum number",
-      "of min_number_scan scans"
-    ),
-    metavar = "number"
-  )
-  parser <- optparse::add_option(
-    parser,
-    c("--pdf_path"),
-    type = "character",
-    default = DEFAULT_PDF_PATH,
-    help = paste(
-      "[default %default]",
-      "PDF files output path"
-    )
-  )
-  parser <- optparse::add_option(
-    parser,
-    c("--ionization"),
-    type = "character",
-    action = "store",
-    default = "None",
-    help = paste(
-      "[default %default]",
-      "Which ionization to use for sirius"
-    ),
-    metavar = "character"
-  )
-  parser <- optparse::add_option(
-    parser,
-    c("--fragment_match_delta"),
-    type = "numeric",
-    action = "store",
-    default = DEFAULT_FRAGMENTS_MATCH_DELTA,
-    help = paste(
-      "[default %default]",
-      "Fragment match delta"
-    ),
-    metavar = "numeric"
-  )
-  parser <- optparse::add_option(
-    parser,
-    c("--fragment_match_delta_unit"),
-    type = "character",
-    action = "store",
-    default = DEFAULT_FRAGMENTS_MATCH_DELTA_UNIT,
-    help = paste(
-      "[default %default]",
-      "Fragment match delta"
-    ),
-    metavar = "character"
-  )
-  return(parser)
-}
-
-stop_with_status <- function(msg, status) {
-  sink(stderr())
-  message(sprintf("Error: %s", msg))
-  message(sprintf("Error code: %s", status))
-  sink(NULL)
-  base::quit(status = status)
-}
-
-check_args_validity <- function(args) { ## nolint cyclocomp_linter
-  if (length(args$output) == 0 || nchar(args$output[1]) == 0) {
-    stop_with_status(
-      "Missing output parameters. Please set it with --output.",
-      MISSING_PARAMETER_ERROR
-    )
-  }
-  if (length(args$precursors) == 0 || nchar(args$precursors[1]) == 0) {
-    stop_with_status(
-      "Missing precursors parameters. Please set it with --precursors.",
-      MISSING_PARAMETER_ERROR
-    )
-  }
-  if (length(args$fragments) == 0 || nchar(args$fragments[1]) == 0) {
-    stop_with_status(
-      "Missing fragments parameters. Please set it with --fragments.",
-      MISSING_PARAMETER_ERROR
-    )
-  }
-  if (length(args$compounds) == 0 || nchar(args$compounds[1]) == 0) {
-    stop_with_status(
-      "Missing compounds parameters. Please set it with --compounds.",
-      MISSING_PARAMETER_ERROR
-    )
-  }
-  if (!file.exists(args$precursors)) {
-    stop_with_status(
-      sprintf(
-        "Precursors file %s does not exist or cannot be accessed.",
-        args$precursors
-      ),
-      MISSING_INPUT_FILE_ERROR
-    )
-  }
-  if (!file.exists(args$fragments)) {
-    stop_with_status(
-      sprintf(
-        "Fragments file %s does not exist or cannot be accessed.",
-        args$fragments
-      ),
-      MISSING_INPUT_FILE_ERROR
-    )
-  }
-  if (!file.exists(args$compounds)) {
-    stop_with_status(
-      sprintf(
-        "Compounds file %s does not exist or cannot be accessed.",
-        args$compounds
-      ),
-      MISSING_INPUT_FILE_ERROR
-    )
-  }
-  if (in_galaxy_env()) {
-    check_galaxy_args_validity(args)
-  }
-}
-
-in_galaxy_env <- function() {
-  sysvars <- Sys.getenv()
-  sysvarnames <- names(sysvars)
-  return(
-    "_GALAXY_JOB_HOME_DIR" %in% sysvarnames
-    || "_GALAXY_JOB_TMP_DIR" %in% sysvarnames
-    || "GALAXY_MEMORY_MB" %in% sysvarnames
-    || "GALAXY_MEMORY_MB_PER_SLOT" %in% sysvarnames
-    || "GALAXY_SLOTS" %in% sysvarnames
-  )
-}
-
-check_galaxy_args_validity <- function(args) {
-  if (!file.exists(args$output)) {
-    stop_with_status(
-      sprintf(
-        "Output file %s does not exist or cannot be accessed.",
-        args$output
-      ),
-      MISSING_INPUT_FILE_ERROR
-    )
-  }
-}
-
-get_csv_or_tsv <- function(
-  path,
-  sep_stack = c("\t", ",", ";"),
-  sep_names = c("tab", "comma", "semicolon"),
-  header = TRUE,
-  quote = "\""
-) {
-  sep <- determine_csv_or_tsv_sep(
-    path = path,
-    sep_stack = sep_stack,
-    header = header,
-    quote = quote
-  )
-  verbose_catf(
-    "%s separator has been determined for %s.\n",
-    sep_names[sep_stack == sep],
-    path
-  )
-  return(read.table(
-    file = path,
-    sep = sep,
-    header = header,
-    quote = quote
-  ))
-}
-
-determine_csv_or_tsv_sep <- function(
-  path,
-  sep_stack = c("\t", ",", ";"),
-  header = TRUE,
-  quote = "\""
-) {
-  count <- -1
-  best_sep <- sep_stack[1]
-  for (sep in sep_stack) {
-    tryCatch({
-      table <- read.table(
-        file = path,
-        sep = sep,
-        header = header,
-        quote = quote,
-        nrows = 1
-      )
-      if (ncol(table) > count) {
-        count <- ncol(table)
-        best_sep <- sep
-      }
-    })
-  }
-  return(best_sep)
-}
-
-uniformize_columns <- function(df) {
-  cols <- colnames(df)
-  for (func in c(tolower)) {
-    cols <- func(cols)
-  }
-  colnames(df) <- cols
-  return(df)
-}
-
-handle_galaxy_param <- function(args) {
-  for (param in names(args)) {
-    if (is.character(args[[param]])) {
-      args[[param]] <- gsub("__ob__", "[", args[[param]])
-      args[[param]] <- gsub("__cb__", "]", args[[param]])
-    }
-  }
-  return(args)
-}
-
-zip_pdfs <- function(processing_parameters) {
-  if (processing_parameters$do_pdf) {
-    if (zip <- Sys.getenv("R_ZIPCMD", "zip") == "") {
-      catf("R could not fin the zip executable. Trying luck: zip = \"zip\"")
-      zip <- "zip"
-    } else {
-      catf("Found zip executable at %s .", zip)
-    }
-    utils::zip(
-      processing_parameters$pdf_zip_path,
-      processing_parameters$pdf_path,
-      zip = zip
-    )
-  }
-}
-
-main <- function(args) {
-  if (args$version) {
-    catf("%s\n", MS2SNOOP_VERSION)
-    base::quit(status = 0)
-  }
-  if (in_galaxy_env()) {
-    print(sessionInfo())
-    cat("\n\n")
-  }
-  check_args_validity(args)
-  args <- handle_galaxy_param(args)
-  if (args$ionization == "None") {
-    args$ionization <- NULL
-  }
-  if (args$debug) {
-    set_debug()
-  }
-  if (args$verbose) {
-    set_verbose()
-  }
-  precursors <- get_csv_or_tsv(args$precursors)
-  fragments <- get_csv_or_tsv(args$fragments)
-  compounds <- get_csv_or_tsv(args$compounds)
-
-  compounds <- uniformize_columns(compounds)
-  mandatory_columns <- c(
-    "compound_name",
-    "mz",
-    "rtsec",
-    "inchikey"
-  )
-  presents <- mandatory_columns %in% colnames(compounds)
-  if (!all(presents)) {
-    stop_with_status(
-      sprintf(
-        "Some columns are missing: %s",
-        paste(mandatory_columns[which(!presents)], collapse = ", ")
-      ),
-      BAD_PARAMETER_VALUE_ERROR
-    )
-  }
-
-  res_all <- data.frame()
-  processing_parameters <- list(
-    min_number_scan = args$min_number_scan,
-    mzdecimal = args$mzdecimal,
-    r_threshold = args$r_threshold,
-    seuil_ra = args$seuil_ra,
-    tolmz = args$tolmz,
-    tolrt = args$tolrt,
-    ionization = args$ionization,
-    do_pdf = nchar(args$pdf_path) > 0,
-    pdf_zip_path = args$pdf_path,
-    pdf_path = tempdir(),
-    fragment_match_delta = args$fragment_match_delta,
-    fragment_match_delta_unit = args$fragment_match_delta_unit
-  )
-  for (i in seq_len(nrow(compounds))) {
-    processing_parameters$mzref <- compounds[["mz"]][i]
-    processing_parameters$rtref <- compounds[["rtsec"]][i]
-    processing_parameters$c_name <- compounds[["compound_name"]][i]
-    processing_parameters$inchikey <- compounds[["inchikey"]][i]
-    processing_parameters$elemcomposition <- compounds[["elemcomposition"]][i]
-    res_cor <- extract_fragments(
-      precursors = precursors,
-      fragments = fragments,
-      processing_parameters = processing_parameters
-    )
-    if (!is.null(res_cor)) {
-      res_all <- rbind(res_all, res_cor)
-    }
-  }
-
-  if (nrow(res_all) == 0) {
-    stop_with_status("No result at all!", NO_ANY_RESULT_ERROR)
-  }
-
-  write.table(
-    x = res_all,
-    file = args$output,
-    sep = "\t",
-    row.names = FALSE
-  )
-  zip_pdfs(processing_parameters)
-  unlink(processing_parameters$pdf_path, recursive = TRUE)
-}
-
-global_debug <- FALSE
-global_verbose <- FALSE
-args <- optparse::parse_args(create_parser())
-main(args)
-
-warnings()
+#' read and process mspurity W4M files
+#' create a summary of fragment for each precursor and a graphics of peseudo
+#' spectra + correlation on which checking of fragment is based on
+#' V3 try to identify and process multiple files for 1 precursor which may
+#' occur if different collision energy are used
+#' V4 elimination of correlation = NA. Correlation is done with precursor, if
+#' precursor is not present correlation with most intense peak
+#' author: Jean-Francois Martin
+#' V5 is versionned, lintR-compliant, packaged, unit-tested, documented and
+#' tested against data from other labs.
+#' new maintainer: Lain Pavot - lain.pavot@inrae.fr
+#'
+#' @import optparse
+#'
+
+
+get_version <- function() {
+  cmd <- commandArgs(trailingOnly = FALSE)
+  root <- dirname(gsub("--file=", "", cmd[grep("--file=", cmd)]))
+  readme <- readLines(file.path(root, "README.md"))
+  version_line <- readme[grepl(" * **@version**: ", readme, fixed = TRUE)]
+  return(gsub(".*: ", "", version_line))
+}
+
+defaults <- list(
+  MS2SNOOP_VERSION = get_version(),
+  MISSING_PARAMETER_ERROR = 1,
+  BAD_PARAMETER_VALUE_ERROR = 2,
+  MISSING_INPUT_FILE_ERROR = 3,
+  NO_ANY_RESULT_ERROR = 255,
+  DEFAULT_PRECURSOR_PATH = NULL,
+  DEFAULT_FRAGMENTS_PATH = NULL,
+  DEFAULT_COMPOUNDS_PATH = NULL,
+  DEFAULT_OUTPUT_PATH = "compound_fragments_result.txt",
+  DEFAULT_TOLMZ = 0.01,
+  DEFAULT_TOLRT = 20,
+  DEFAULT_MZDECIMAL = 3,
+  DEFAULT_R_THRESHOLD = 0.85,
+  DEFAULT_MINNUMBERSCAN = 8,
+  DEFAULT_SEUIL_RA = 0.05,
+  DEFAULT_FRAGMENTS_MATCH_DELTA = 10,
+  DEFAULT_FRAGMENTS_MATCH_DELTA_UNIT = "ppm",
+  DEFAULT_PDF_PATH = ""
+)
+env <- globalenv()
+for (default in names(defaults)) {
+  assign(default, defaults[[default]], envir = env)
+  lockBinding(default, env)
+}
+
+########################################################################
+
+get_formulas <- function(
+  mzref,
+  spectra,
+  processing_parameters,
+  background = !TRUE
+) {
+  if (is.vector(mzref) && length(mzref) > 1) {
+    return(lapply(
+      mzref,
+      function(mz) {
+        return(get_formulas(
+          mzref = mz,
+          spectra = spectra,
+          processing_parameters = processing_parameters,
+          background = background
+        ))
+      }
+    ))
+  }
+  input <- sprintf(
+    "%s-%s.ms",
+    gsub("[[:space:]]", "_", processing_parameters$c_name),
+    mzref
+  )
+  create_ms_file(input, mzref, spectra, processing_parameters)
+  output <- sprintf(
+    "out/%s-%s.out",
+    gsub("[[:space:]]", "_", processing_parameters$c_name),
+    mzref
+  )
+  command <- sprintf(
+    paste(
+      "sirius",
+      "--noCite",
+      "--noSummaries",
+      "--loglevel=WARNING",
+      "-i='%s'",
+      "-o='%s'",
+      "tree",
+      ## loglevel is not working taken into account during
+      ## sirius startup, so we filter outputs...
+      "2>&1 | grep '^(WARNING|SEVERE)'"
+    ),
+    input,
+    output
+  )
+  verbose_catf(
+    ">> Sirius is running %swith the command: %s\n",
+    if (background) "in the background " else "",
+    command
+  )
+  system(
+    command,
+    wait = !background,
+    ignore.stdout = background,
+    ignore.stderr = background
+  )
+  return(extract_sirius_results(output, spectra$mz, processing_parameters))
+}
+
+create_ms_file <- function(
+  path,
+  mzref,
+  spectra,
+  processing_parameters
+) {
+  file_content <- paste(
+    sprintf(">compound %s", processing_parameters$c_name),
+    sprintf(">ionization %s", processing_parameters$ionization),
+    sprintf(">parentmass %s", mzref),
+    sprintf(">formula %s", processing_parameters$elemcomposition),
+    sep = "\n"
+  )
+  displayed_file_content <- sprintf(
+    "%s\n>collision\n%s",
+    file_content,
+    paste(
+      sprintf(
+        "%s %s",
+        spectra[1:3, "mz"],
+        spectra[1:3, "intensities"]
+      ),
+      collapse = "\n"
+    )
+  )
+  if (nrow(spectra) > 3) {
+    displayed_file_content <- sprintf(
+      "%s\n[... %s more rows of mz and intensities ...]",
+      displayed_file_content,
+      nrow(spectra) - 3
+    )
+  }
+  catf(
+    ">> MS file created for %s with content:\n%s\n",
+    processing_parameters$c_name,
+    displayed_file_content
+  )
+  file_content <- sprintf(
+    "%s\n\n>collision\n%s",
+    file_content,
+    paste(
+      paste(spectra$mz, spectra$intensities),
+      collapse = "\n"
+    )
+  )
+  cat(file_content, file = path, append = FALSE)
+}
+
+extract_sirius_results <- function(
+  output,
+  mz_list,
+  processing_parameters
+) {
+
+  delta <- processing_parameters$fragment_match_delta
+  delta_unit <- tolower(processing_parameters$fragment_match_delta_unit)
+
+  output <- list.dirs(output, recursive = FALSE)[[1]]
+
+  spectra_out_dir <- sprintf("%s/spectra", output)
+  spectra_filename <- sprintf(
+    "%s/%s",
+    spectra_out_dir,
+    list.files(spectra_out_dir)[[1]]
+  )
+
+  trees_out_dir <- sprintf("%s/trees", output)
+  trees_filename <- sprintf(
+    "%s/%s",
+    trees_out_dir,
+    list.files(trees_out_dir)[[1]]
+  )
+
+  if (!is.null(spectra_filename)) {
+    sirius_results <- get_csv_or_tsv(spectra_filename)
+  } else {
+    return(rep(NA, length(mz_list)))
+  }
+  if (!is.null(trees_filename)) {
+    sirius_results <- cbind(sirius_results, extract_sirius_ppm(trees_filename))
+  } else {
+    return(rep(NA, length(mz_list)))
+  }
+
+  fragment_matchings <- data.frame(
+    formula = NA,
+    ppm = NA,
+    mz = mz_list,
+    error = NA
+  )
+
+  sirius_results <- filter_sirius_with_delta(
+    sirius_results = sirius_results,
+    original_mz = fragment_matchings$mz,
+    delta = delta,
+    delta_unit = delta_unit
+  )
+
+  for (index in seq_len(nrow(sirius_results))) {
+    result <- sirius_results[index, ]
+    filter <- which(order(abs(fragment_matchings$mz - result$mz)) == 1)
+    fragment_matchings[filter, "formula"] <- result$formula
+    fragment_matchings[filter, "ppm"] <- result$ppm
+    catf(
+      "[OK] Fragment with m/z=%s matches %s with a difference of %s ppm\n",
+      fragment_matchings[filter, "mz"], result$formula, result$ppm
+    )
+  }
+  return(fragment_matchings)
+}
+
+filter_sirius_with_delta <- function(
+  sirius_results,
+  original_mz,
+  delta,
+  delta_unit
+) {
+  if (is.numeric(delta) && !is.na(delta) && delta > 0) {
+    if (delta_unit == "ppm") {
+      filter <- abs(sirius_results$ppm) <= delta
+      fine <- which(filter)
+      not_fine <- which(!filter)
+      catf(
+        paste(
+          "[KO] fragment %s (m/z=%s) eleminated because ppm=%s is greater",
+          "than delta=%s\n"
+        ),
+        sirius_results[not_fine, ]$formula,
+        sirius_results[not_fine, ]$mz,
+        sirius_results[not_fine, ]$ppm,
+        delta
+      )
+      sirius_results <- sirius_results[fine, ]
+    } else if (delta_unit == "mz") {
+      differences <- sapply(
+        sirius_results$mz,
+        function(mz) min(abs(original_mz - mz))
+      )
+      fine <- which(sapply(
+        sirius_results$mz,
+        function(mz) any(abs(original_mz - mz) <= delta)
+      ))
+      not_fine <- which(sapply(
+        sirius_results$mz,
+        function(mz) all(abs(original_mz - mz) > delta)
+      ))
+      catf(
+        paste(
+          "[KO] fragment %s eleminated because mz difference=%s is",
+          "greater than delta=%s\n"
+        ),
+        sirius_results[not_fine, ]$formula,
+        differences[not_fine],
+        delta
+      )
+      sirius_results <- sirius_results[fine, ]
+    }
+  }
+  return(sirius_results)
+}
+
+extract_sirius_ppm <- function(path) {
+  json <- file(path, "r")
+  suppressWarnings(json_lines <- readLines(json))
+  close(json)
+  json_lines <- json_lines[
+    grepl("\\s+\"(massDeviation|recalibratedMass)\" :", json_lines)
+  ]
+  json_lines <- gsub("^\\s+\"[^\"]+\" : \"?", "", json_lines)
+  ppms <- json_lines[seq(1, length(json_lines), 2)]
+  mz <- json_lines[seq(2, length(json_lines), 2)]
+  ppms <- as.numeric(gsub(" ppm .*", "", ppms))
+  mz <- as.numeric(gsub(",$", "", mz))
+  ordered <- order(mz)
+  return(list(ppm = ppms[ordered], recalibrated_mz = mz[ordered]))
+}
+
+#' @title plot_pseudo_spectra
+#' @param x
+#' @param fid
+#' @param sum_int
+#' @param vmz
+#' @param cor_abs_int
+#' @param refcol
+#' @param c_name
+#' @description plot_pseudo_spectra
+#' function to compute sum of intensities among scans for all
+#' m/z kept (cor > r_threshold & minimum number of scans)
+#' and plot pseudo spectra
+#' x dataframe scan X fragments with scans number in the 1st column and
+#' ions in next with intensities
+#' fid file id when several a precursor has been detected in several files
+plot_pseudo_spectra <- function(
+  x,
+  fid,
+  sum_int,
+  vmz,
+  cor_abs_int,
+  refcol,
+  meaned_mz,
+  processing_parameters
+) {
+  ## du fait de la difference de nombre de colonne entre la dataframe qui
+  ## inclue les scans en 1ere col, mzRef se decale de 1
+  refcol <- refcol - 1
+  ## compute relative intensities max=100%
+  rel_int <- sum_int[-1]
+  rel_int <- rel_int / max(rel_int)
+
+  if (processing_parameters$do_pdf) {
+    ## define max value on vertical axis (need to increase in order to plot the
+    ## label of fragments)
+    ymax <- max(rel_int) + 0.2 * max(rel_int)
+
+    par(mfrow = c(2, 1))
+    plot(vmz, rel_int, type = "h", ylim = c(0, ymax),
+      main = processing_parameters$c_name
+    )
+    ## low correl coef. will be display in grey
+    cor_low <- which(round(cor_abs_int, 2) < processing_parameters$r_threshold)
+
+    lbmzcor <- sprintf("%s(r=%s)", vmz, round(cor_abs_int, 2))
+
+    if (length(cor_low) > 0) {
+      text(
+        vmz[cor_low],
+        rel_int[cor_low],
+        lbmzcor[cor_low],
+        cex = 0.5,
+        col = "grey",
+        srt = 90,
+        adj = 0
+      )
+      if (length(vmz) - length(cor_low) > 1) {
+        text(
+          vmz[-c(refcol, cor_low)],
+          rel_int[-c(refcol, cor_low)],
+          lbmzcor[-c(refcol, cor_low)],
+          cex = 0.6,
+          col = 1,
+          srt = 90,
+          adj = 0
+        )
+      }
+    } else {
+      if (length(vmz) > 1) {
+        text(
+          vmz[-c(refcol)],
+          rel_int[-c(refcol)],
+          lbmzcor[-c(refcol)],
+          cex = 0.6,
+          col = 1,
+          srt = 90,
+          adj = 0
+        )
+      }
+    }
+
+    text(
+      vmz[refcol],
+      rel_int[refcol],
+      lbmzcor[refcol],
+      cex = 0.8,
+      col = 2,
+      srt = 90,
+      adj = 0
+    )
+  }
+
+  ## prepare result file
+  cor_valid <- (round(cor_abs_int, 2) >= processing_parameters$r_threshold)
+
+  do_sirius <- TRUE
+  verbose_catf("Checking sirius parameters...\n")
+  if (is.null(processing_parameters$ionization)) {
+    do_sirius <- FALSE
+    verbose_catf("[KO] No ionization passed in parameter.\n")
+  } else {
+    verbose_catf("[OK] Ionization=%s.\n", processing_parameters$ionization)
+  }
+  if (is.na(processing_parameters$elemcomposition)) {
+    do_sirius <- FALSE
+    verbose_catf("[KO] Elemental composition is NA.\n")
+  } else if (length(processing_parameters$elemcomposition) < 1) {
+    do_sirius <- FALSE
+    verbose_catf("[KO] No elemental composition is provided.\n")
+  } else if (processing_parameters$elemcomposition == "") {
+    do_sirius <- FALSE
+    verbose_catf("[KO] Elemental composition is an empty string.\n")
+  } else {
+    verbose_catf(
+      "[OK] Elemental composition=%s.\n",
+      processing_parameters$elemcomposition
+    )
+  }
+
+  cp_res_length <- length(vmz)
+  ppm <- rep(NA, cp_res_length)
+  formulas <- rep(NA, cp_res_length)
+  if (do_sirius) {
+    verbose_catf("Everything is ok, preparing for sirius.\n")
+    formulas <- get_formulas(
+      mzref = processing_parameters$mzref,
+      spectra = data.frame(mz = meaned_mz, intensities = sum_int[-1]),
+      processing_parameters = processing_parameters
+    )
+    if (nrow(formulas) == 0) {
+      catf("No formula found.\n")
+    } else {
+      ppm <- formulas$ppm
+      formulas <- formulas$formula
+      catf(
+        "Found %s formula for %s fragments\n",
+        length(formulas[which(!(is.na(formulas)))]),
+        cp_res_length
+      )
+    }
+  } else {
+    verbose_catf("Sirius cannot be run.\n")
+  }
+  cp_res <- data.frame(
+    rep(processing_parameters$c_name, cp_res_length),
+    rep(processing_parameters$inchikey, cp_res_length),
+    rep(processing_parameters$elemcomposition, cp_res_length),
+    formulas,
+    meaned_mz,
+    ppm,
+    rep(fid, cp_res_length),
+    cor_abs_int,
+    sum_int[-1],
+    rel_int,
+    cor_valid
+  )
+
+  colnames(cp_res) <- c(
+    "compoundName",
+    "inchikey",
+    "elemcomposition",
+    "fragment",
+    "fragment_mz",
+    "ppm",
+    "fileid",
+    "CorWithPrecursor",
+    "AbsoluteIntensity",
+    "relativeIntensity",
+    "corValid"
+  )
+  return(cp_res)
+}
+
+#'
+#' @title extract_fragments
+#'
+#' @param precursors the precursor list from mspurity
+#' @param fragments the fragments list from ms purity
+# ' @param mzref
+# ' @param rtref
+# ' @param c_name
+# ' @param inchikey
+# ' @param elemcomposition
+#' @param processing_parameters
+#' @returns
+#'
+#' @description
+#' function for extraction of fragments corresponding to precursors
+#' detected by MSPurity
+extract_fragments <- function( ## nolint cyclocomp_linter
+  precursors,
+  fragments,
+  processing_parameters
+) {
+  ## filter precursor in the precursors file based on mz and rt in the
+  ## compound list
+  catf("processing %s\n", processing_parameters$c_name)
+  verbose_catf("===\n")
+  param <- processing_parameters
+  selected_precursors <- which(
+    (abs(precursors$precurMtchMZ - param$mzref) <= param$tolmz)
+    & (abs(precursors$precurMtchRT - param$rtref) <= param$tolrt)
+  )
+  rm(param)
+
+  verbose_catf(
+    "> %s precursors selected with mz=%s±%s and rt=%s±%s\n",
+    length(selected_precursors),
+    processing_parameters$mzref,
+    processing_parameters$tolmz,
+    processing_parameters$rtref,
+    processing_parameters$tolrt
+  )
+
+  ## check if there is the precursor in the file
+
+  if (length(selected_precursors) < 1) {
+    cat("> non detected in precursor file\n")
+    show_end_processing()
+    return(NULL)
+  }
+
+  precursors <- precursors[selected_precursors, ]
+
+  ## check if fragments corresponding to precursor are found in several
+  ## files (collision energy)
+  ## this lead to a processing for each fileid
+  file_ids <- as.character(sort(unique(precursors$fileid)))
+  if (length(file_ids) > 1) {
+    catf("> several files detected for this compounds :\n")
+  } else if (length(file_ids) < 1 || nrow(precursors) < 1) {
+    return(data.frame())
+  }
+
+  res_comp <- data.frame()
+  for (curent_file_id in file_ids) {
+    curent_precursors <- precursors[precursors$fileid == curent_file_id, ]
+    selected_fragments <- fragments[
+      fragments$grpid %in% as.character(curent_precursors$grpid)
+      & fragments$fileid == curent_file_id,
+    ]
+    filtered_fragments <- selected_fragments[
+      selected_fragments$ra > processing_parameters$seuil_ra,
+    ]
+    if (nrow(filtered_fragments) != 0) {
+      res_comp_by_file <- process_file(
+        curent_file_id = curent_file_id,
+        precursor_mz = curent_precursors$mz,
+        filtered_fragments = filtered_fragments,
+        processing_parameters = processing_parameters
+      )
+      if (!is.null(res_comp_by_file)) {
+        res_comp <- rbind(res_comp, res_comp_by_file)
+      }
+    } else {
+      catf("No fragment found for in fragment file\n")
+    }
+  }
+  return(unique(res_comp))
+}
+
+process_file <- function(
+  curent_file_id,
+  precursor_mz,
+  filtered_fragments,
+  processing_parameters
+) {
+  mznominal <- round(x = filtered_fragments$mz, digits = 0)
+  meaned_mz <- round(
+    aggregate(
+      data.frame(
+        mz = filtered_fragments$mz,
+        mznominal = mznominal
+      ),
+      list(mznominal),
+      FUN = mean
+    )$mz,
+    digits = processing_parameters$mzdecimal
+  )
+  filtered_fragments <- data.frame(filtered_fragments, mznominal)
+
+  ## creation of cross table row=scan col=mz X=ra
+
+  vmz <- as.character(sort(unique(filtered_fragments$mznominal)))
+
+  ds_abs_int <- create_ds_abs_int(vmz, filtered_fragments)
+
+  if (global_debug) {
+    print(ds_abs_int)
+  }
+
+  ## elimination of mz with less than min_number_scan scans (user defined
+  ## parameter)
+  xmz <- rep(NA, ncol(ds_abs_int) - 1)
+  sum_int <- rep(NA, ncol(ds_abs_int))
+  nbxmz <- 0
+  nb_scan_check <- min(nrow(ds_abs_int), processing_parameters$min_number_scan)
+
+  for (j in 2:ncol(ds_abs_int)) {
+    sum_int[j] <- sum(ds_abs_int[j], na.rm = TRUE)
+    if (sum(!is.na(ds_abs_int[[j]])) < nb_scan_check) {
+      nbxmz <- nbxmz + 1
+      xmz[nbxmz] <- j
+    }
+  }
+
+  xmz <- xmz[-which(is.na(xmz))]
+  if (length(xmz) > 0) {
+    ds_abs_int <- ds_abs_int[, -c(xmz)]
+    sum_int <- sum_int[-c(xmz)]
+    ## liste des mz keeped decale de 1 avec ds_abs_int
+    vmz <- as.numeric(vmz[-c(xmz - 1)])
+    meaned_mz <- meaned_mz[-c(xmz - 1)]
+  }
+
+  ## mz of precursor in data precursor to check correlation with
+  mz_prec <- paste0(
+    "mz",
+    round(mean(precursor_mz), processing_parameters$mzdecimal)
+  )
+  ## reference ion for correlation computing = precursor OR maximum
+  ## intensity ion in precursor is not present
+  refcol <- which(colnames(ds_abs_int) == mz_prec)
+  if (length(refcol) == 0) {
+    refcol <- which(sum_int == max(sum_int, na.rm = TRUE))
+  }
+
+  if (processing_parameters$do_pdf) {
+    start_pdf(processing_parameters, curent_file_id)
+  }
+
+  ## Pearson correlations between absolute intensities computing
+  cor_abs_int <- rep(NA, length(vmz))
+
+  if (length(refcol) > 0) {
+    for (i in 2:length(ds_abs_int)) {
+      cor_abs_int[i - 1] <- stats::cor(
+        x = ds_abs_int[[refcol]],
+        y = ds_abs_int[[i]],
+        use = "pairwise.complete.obs",
+        method = "pearson"
+      )
+      debug_catf(
+        "Correlation between %s and %s: %s\n",
+        paste(ds_abs_int[[refcol]], collapse = ";"),
+        paste(ds_abs_int[[i]], collapse = ";"),
+        paste(cor_abs_int[i - 1], collapse = ";")
+      )
+      if (processing_parameters$do_pdf) {
+        pdf_plot_ds_abs_int(
+          processing_parameters$c_name,
+          ds_abs_int,
+          refcol,
+          i,
+          round(cor_abs_int[i - 1], 2)
+        )
+      }
+    }
+    ## plot pseudo spectra
+    res_comp_by_file <- plot_pseudo_spectra(
+      x = ds_abs_int,
+      fid = curent_file_id,
+      sum_int = sum_int,
+      vmz = vmz,
+      cor_abs_int = cor_abs_int,
+      refcol = refcol,
+      meaned_mz = meaned_mz,
+      processing_parameters = processing_parameters
+    )
+    catf(
+      "%s has been processed and %s fragments have been found.\n",
+      processing_parameters$c_name,
+      nrow(res_comp_by_file)
+    )
+  } else {
+    res_comp_by_file <- NULL
+    cat(">> non detected in fragments file \n")
+  }
+  show_end_processing()
+  if (processing_parameters$do_pdf) {
+    end_pdf()
+  }
+  return(res_comp_by_file)
+}
+
+create_ds_abs_int <- function(vmz, filtered_fragments) {
+  verbose_catf(
+    ">> fragments: %s\n",
+    paste(vmz, collapse = " ")
+  )
+  ds_abs_int <- create_int_mz(vmz[1], filtered_fragments)
+  for (mz in vmz[-1]) {
+    int_mz <- create_int_mz(mz, filtered_fragments)
+    ds_abs_int <- merge(
+      x = ds_abs_int,
+      y = int_mz,
+      by.x = 1,
+      by.y = 1,
+      all.x = TRUE,
+      all.y = TRUE
+    )
+  }
+  return(ds_abs_int)
+}
+
+create_int_mz <- function(mz, filtered_fragments) {
+  ## absolute intensity
+  int_mz <- filtered_fragments[
+    filtered_fragments$mznominal == mz,
+    c("acquisitionNum", "i")
+  ]
+  colnames(int_mz)[2] <- paste0("mz", mz)
+  ## average intensities of mass in duplicate scans
+  comp_scans <- aggregate(x = int_mz, by = list(int_mz[[1]]), FUN = mean)
+  return(comp_scans[, -1])
+}
+
+show_end_processing <- function() {
+  verbose_catf("==========\n")
+  cat("\n")
+}
+
+start_pdf <- function(processing_parameters, curent_file_id) {
+  if (!dir.exists(processing_parameters$pdf_path)) {
+    dir.create(processing_parameters$pdf_path, recursive = TRUE)
+  }
+  pdf(
+    file = sprintf(
+      "%s/%s_processing_file%s.pdf",
+      processing_parameters$pdf_path,
+      processing_parameters$c_name,
+      curent_file_id
+    ),
+    width = 8,
+    height = 11
+  )
+  par(mfrow = c(3, 2))
+}
+
+pdf_plot_ds_abs_int <- function(c_name, ds_abs_int, refcol, i, r_coef) {
+  plot(
+    ds_abs_int[[refcol]],
+    ds_abs_int[[i]],
+     xlab = colnames(ds_abs_int)[refcol],
+     ylab = colnames(ds_abs_int)[i],
+     main = sprintf(
+      "%s corr coeff r=%s", c_name, r_coef
+    )
+  )
+}
+end_pdf <- function() {
+  dev.off()
+}
+
+set_global <- function(var, value) {
+  assign(var, value, envir = globalenv())
+}
+
+set_debug <- function() {
+  set_global("global_debug", TRUE)
+}
+
+unset_debug <- function() {
+  set_global("global_debug", FALSE)
+}
+
+set_verbose <- function() {
+  set_global("global_verbose", TRUE)
+}
+
+unset_verbose <- function() {
+  set_global("global_verbose", FALSE)
+}
+
+verbose_catf <- function(...) {
+  if (global_verbose) {
+    cat(sprintf(...), sep = "")
+  }
+}
+
+
+debug_catf <- function(...) {
+  if (global_debug) {
+    cat(sprintf(...), sep = "")
+  }
+}
+
+catf <- function(...) {
+  cat(sprintf(...), sep = "")
+}
+
+create_parser <- function() {
+  parser <- optparse::OptionParser()
+  parser <- optparse::add_option(
+    parser,
+    c("-v", "--verbose"),
+    action = "store_true",
+    default = FALSE,
+    help = paste(
+      "[default %default]",
+      "Print extra output"
+    )
+  )
+  parser <- optparse::add_option(
+    parser,
+    c("-V", "--version"),
+    action = "store_true",
+    default = FALSE,
+    help = "Prints version and exits"
+  )
+  parser <- optparse::add_option(
+    parser,
+    c("-d", "--debug"),
+    action = "store_true",
+    default = FALSE,
+    help = paste(
+      "[default %default]",
+      "Print debug outputs"
+    )
+  )
+  parser <- optparse::add_option(
+    parser,
+    c("-o", "--output"),
+    type = "character",
+    default = DEFAULT_OUTPUT_PATH,
+    action = "store",
+    help = "Path to the output file [default %default]"
+  )
+  parser <- optparse::add_option(
+    parser,
+    c("-p", "--precursors"),
+    type = "character",
+    default = DEFAULT_PRECURSOR_PATH,
+    action = "store",
+    help = "Path to the precursors file [default %default]"
+  )
+  parser <- optparse::add_option(
+    parser,
+    c("-f", "--fragments"),
+    type = "character",
+    default = DEFAULT_FRAGMENTS_PATH,
+    action = "store",
+    help = "Path to the fragments file [default %default]"
+  )
+  parser <- optparse::add_option(
+    parser,
+    c("-c", "--compounds"),
+    type = "character",
+    default = DEFAULT_COMPOUNDS_PATH,
+    action = "store",
+    help = "Path to the compounds file [default %default]"
+  )
+  parser <- optparse::add_option(
+    parser,
+    c("--tolmz"),
+    type = "numeric",
+    action = "store",
+    default = DEFAULT_TOLMZ,
+    metavar = "number",
+    help = paste(
+      "[default %default]",
+      "Tolerance for MZ (in Dalton) to match the standard in the compounds"
+    )
+  )
+  parser <- optparse::add_option(
+    parser,
+    c("--tolrt"),
+    type = "integer",
+    action = "store",
+    default = DEFAULT_TOLRT,
+    metavar = "number",
+    help = paste(
+      "[default %default]",
+      "RT (in seconds) to match the standard in the compounds"
+    )
+  )
+  parser <- optparse::add_option(
+    parser,
+    c("--seuil_ra"),
+    type = "numeric",
+    action = "store",
+    default = DEFAULT_SEUIL_RA,
+    metavar = "number",
+    help = paste(
+      "[default %default]",
+      "relative intensity threshold"
+    ),
+  )
+  parser <- optparse::add_option(
+    parser,
+    c("--mzdecimal"),
+    type = "integer",
+    default = DEFAULT_MZDECIMAL,
+    action = "store",
+    help = paste(
+      "[default %default]",
+      "Number of decimal to write for MZ"
+    ),
+    metavar = "number"
+  )
+  parser <- optparse::add_option(
+    parser,
+    c("--r_threshold"),
+    type = "integer",
+    default = DEFAULT_R_THRESHOLD,
+    action = "store",
+    help = paste(
+      "[default %default]",
+      "R-Pearson correlation threshold between precursor and fragment",
+      "absolute intensity"
+    ),
+    metavar = "number"
+  )
+  parser <- optparse::add_option(
+    parser,
+    c("--min_number_scan"),
+    type = "numeric",
+    action = "store",
+    default = DEFAULT_MINNUMBERSCAN,
+    help = paste(
+      "[default %default]",
+      "Fragments are kept if there are found in a minimum number",
+      "of min_number_scan scans"
+    ),
+    metavar = "number"
+  )
+  parser <- optparse::add_option(
+    parser,
+    c("--pdf_path"),
+    type = "character",
+    default = DEFAULT_PDF_PATH,
+    help = paste(
+      "[default %default]",
+      "PDF files output path"
+    )
+  )
+  parser <- optparse::add_option(
+    parser,
+    c("--ionization"),
+    type = "character",
+    action = "store",
+    default = "None",
+    help = paste(
+      "[default %default]",
+      "Which ionization to use for sirius"
+    ),
+    metavar = "character"
+  )
+  parser <- optparse::add_option(
+    parser,
+    c("--fragment_match_delta"),
+    type = "numeric",
+    action = "store",
+    default = DEFAULT_FRAGMENTS_MATCH_DELTA,
+    help = paste(
+      "[default %default]",
+      "Fragment match delta"
+    ),
+    metavar = "numeric"
+  )
+  parser <- optparse::add_option(
+    parser,
+    c("--fragment_match_delta_unit"),
+    type = "character",
+    action = "store",
+    default = DEFAULT_FRAGMENTS_MATCH_DELTA_UNIT,
+    help = paste(
+      "[default %default]",
+      "Fragment match delta"
+    ),
+    metavar = "character"
+  )
+  return(parser)
+}
+
+stop_with_status <- function(msg, status) {
+  sink(stderr())
+  message(sprintf("Error: %s", msg))
+  message(sprintf("Error code: %s", status))
+  sink(NULL)
+  base::quit(status = status)
+}
+
+check_args_validity <- function(args) { ## nolint cyclocomp_linter
+  if (length(args$output) == 0 || nchar(args$output[1]) == 0) {
+    stop_with_status(
+      "Missing output parameters. Please set it with --output.",
+      MISSING_PARAMETER_ERROR
+    )
+  }
+  if (length(args$precursors) == 0 || nchar(args$precursors[1]) == 0) {
+    stop_with_status(
+      "Missing precursors parameters. Please set it with --precursors.",
+      MISSING_PARAMETER_ERROR
+    )
+  }
+  if (length(args$fragments) == 0 || nchar(args$fragments[1]) == 0) {
+    stop_with_status(
+      "Missing fragments parameters. Please set it with --fragments.",
+      MISSING_PARAMETER_ERROR
+    )
+  }
+  if (length(args$compounds) == 0 || nchar(args$compounds[1]) == 0) {
+    stop_with_status(
+      "Missing compounds parameters. Please set it with --compounds.",
+      MISSING_PARAMETER_ERROR
+    )
+  }
+  if (!file.exists(args$precursors)) {
+    stop_with_status(
+      sprintf(
+        "Precursors file %s does not exist or cannot be accessed.",
+        args$precursors
+      ),
+      MISSING_INPUT_FILE_ERROR
+    )
+  }
+  if (!file.exists(args$fragments)) {
+    stop_with_status(
+      sprintf(
+        "Fragments file %s does not exist or cannot be accessed.",
+        args$fragments
+      ),
+      MISSING_INPUT_FILE_ERROR
+    )
+  }
+  if (!file.exists(args$compounds)) {
+    stop_with_status(
+      sprintf(
+        "Compounds file %s does not exist or cannot be accessed.",
+        args$compounds
+      ),
+      MISSING_INPUT_FILE_ERROR
+    )
+  }
+  if (in_galaxy_env()) {
+    check_galaxy_args_validity(args)
+  }
+}
+
+in_galaxy_env <- function() {
+  sysvars <- Sys.getenv()
+  sysvarnames <- names(sysvars)
+  return(
+    "_GALAXY_JOB_HOME_DIR" %in% sysvarnames
+    || "_GALAXY_JOB_TMP_DIR" %in% sysvarnames
+    || "GALAXY_MEMORY_MB" %in% sysvarnames
+    || "GALAXY_MEMORY_MB_PER_SLOT" %in% sysvarnames
+    || "GALAXY_SLOTS" %in% sysvarnames
+  )
+}
+
+check_galaxy_args_validity <- function(args) {
+  if (!file.exists(args$output)) {
+    stop_with_status(
+      sprintf(
+        "Output file %s does not exist or cannot be accessed.",
+        args$output
+      ),
+      MISSING_INPUT_FILE_ERROR
+    )
+  }
+}
+
+get_csv_or_tsv <- function(
+  path,
+  sep_stack = c("\t", ",", ";"),
+  sep_names = c("tab", "comma", "semicolon"),
+  header = TRUE,
+  quote = "\""
+) {
+  sep <- determine_csv_or_tsv_sep(
+    path = path,
+    sep_stack = sep_stack,
+    header = header,
+    quote = quote
+  )
+  verbose_catf(
+    "%s separator has been determined for %s.\n",
+    sep_names[sep_stack == sep],
+    path
+  )
+  return(read.table(
+    file = path,
+    sep = sep,
+    header = header,
+    quote = quote
+  ))
+}
+
+determine_csv_or_tsv_sep <- function(
+  path,
+  sep_stack = c("\t", ",", ";"),
+  header = TRUE,
+  quote = "\""
+) {
+  count <- -1
+  best_sep <- sep_stack[1]
+  for (sep in sep_stack) {
+    tryCatch({
+      table <- read.table(
+        file = path,
+        sep = sep,
+        header = header,
+        quote = quote,
+        nrows = 1
+      )
+      if (ncol(table) > count) {
+        count <- ncol(table)
+        best_sep <- sep
+      }
+    })
+  }
+  return(best_sep)
+}
+
+uniformize_columns <- function(df) {
+  cols <- colnames(df)
+  for (func in c(tolower)) {
+    cols <- func(cols)
+  }
+  colnames(df) <- cols
+  return(df)
+}
+
+handle_galaxy_param <- function(args) {
+  for (param in names(args)) {
+    if (is.character(args[[param]])) {
+      args[[param]] <- gsub("__ob__", "[", args[[param]])
+      args[[param]] <- gsub("__cb__", "]", args[[param]])
+    }
+  }
+  return(args)
+}
+
+zip_pdfs <- function(processing_parameters) {
+  if (processing_parameters$do_pdf) {
+    if (zip <- Sys.getenv("R_ZIPCMD", "zip") == "") {
+      catf("R could not fin the zip executable. Trying luck: zip = \"zip\"")
+      zip <- "zip"
+    } else {
+      catf("Found zip executable at %s .", zip)
+    }
+    utils::zip(
+      processing_parameters$pdf_zip_path,
+      processing_parameters$pdf_path,
+      zip = zip
+    )
+  }
+}
+
+main <- function(args) {
+  if (args$version) {
+    catf("%s\n", MS2SNOOP_VERSION)
+    base::quit(status = 0)
+  }
+  if (in_galaxy_env()) {
+    print(sessionInfo())
+    cat("\n\n")
+  }
+  check_args_validity(args)
+  args <- handle_galaxy_param(args)
+  if (args$ionization == "None") {
+    args$ionization <- NULL
+  }
+  if (args$debug) {
+    set_debug()
+  }
+  if (args$verbose) {
+    set_verbose()
+  }
+  precursors <- get_csv_or_tsv(args$precursors)
+  fragments <- get_csv_or_tsv(args$fragments)
+  compounds <- get_csv_or_tsv(args$compounds)
+
+  compounds <- uniformize_columns(compounds)
+  mandatory_columns <- c(
+    "compound_name",
+    "mz",
+    "rtsec",
+    "inchikey"
+  )
+  presents <- mandatory_columns %in% colnames(compounds)
+  if (!all(presents)) {
+    stop_with_status(
+      sprintf(
+        "Some columns are missing: %s",
+        paste(mandatory_columns[which(!presents)], collapse = ", ")
+      ),
+      BAD_PARAMETER_VALUE_ERROR
+    )
+  }
+
+  res_all <- data.frame()
+  processing_parameters <- list(
+    min_number_scan = args$min_number_scan,
+    mzdecimal = args$mzdecimal,
+    r_threshold = args$r_threshold,
+    seuil_ra = args$seuil_ra,
+    tolmz = args$tolmz,
+    tolrt = args$tolrt,
+    ionization = args$ionization,
+    do_pdf = nchar(args$pdf_path) > 0,
+    pdf_zip_path = args$pdf_path,
+    pdf_path = tempdir(),
+    fragment_match_delta = args$fragment_match_delta,
+    fragment_match_delta_unit = args$fragment_match_delta_unit
+  )
+  for (i in seq_len(nrow(compounds))) {
+    processing_parameters$mzref <- compounds[["mz"]][i]
+    processing_parameters$rtref <- compounds[["rtsec"]][i]
+    processing_parameters$c_name <- compounds[["compound_name"]][i]
+    processing_parameters$inchikey <- compounds[["inchikey"]][i]
+    processing_parameters$elemcomposition <- compounds[["elemcomposition"]][i]
+    res_cor <- extract_fragments(
+      precursors = precursors,
+      fragments = fragments,
+      processing_parameters = processing_parameters
+    )
+    if (!is.null(res_cor)) {
+      res_all <- rbind(res_all, res_cor)
+    }
+  }
+
+  if (nrow(res_all) == 0) {
+    stop_with_status("No result at all!", NO_ANY_RESULT_ERROR)
+  }
+
+  write.table(
+    x = res_all,
+    file = args$output,
+    sep = "\t",
+    row.names = FALSE
+  )
+  zip_pdfs(processing_parameters)
+  unlink(processing_parameters$pdf_path, recursive = TRUE)
+}
+
+global_debug <- FALSE
+global_verbose <- FALSE
+args <- optparse::parse_args(create_parser())
+main(args)
+
+warnings()
author	workflow4metabolomics
date	Fri, 30 Sep 2022 16:18:56 +0000
parents	78d5a12406c2
children	2a1f120a6874