Repository 'rmassbank'
hg clone https://toolshed.g2.bx.psu.edu/repos/recetox/rmassbank

Changeset 0:0b28816c1c2c (2023-05-18)
Next changeset 1:c1a9c990d4b0 (2023-10-12)
Commit message:
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rmassbank commit 02414aa4c20f249c2069e5e3d587e3a8cda923a8
added:
addBabelPath.py
createSettingsFile.py
macros.xml
mysettings.ini
rmassbank.r
rmassbank.xml
test-data/1_3_Trifluoromethylphenyl_piperazin_2819_pos.mzML
test-data/compoundList.csv
test-data/failedPeaks.csv
test-data/infolist.csv
test-data/mysettings_galaxy.ini
test-data/output.zip
test-data/plot.pdf
test-data/resultsFull.RData
test-data/results_RA.RData
test-data/results_RF.RData
b
diff -r 000000000000 -r 0b28816c1c2c addBabelPath.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/addBabelPath.py Thu May 18 13:01:04 2023 +0000
[
@@ -0,0 +1,15 @@
+import sys
+
+# Script that replaces OpenBabel path in settings file used within Galaxy (tools installed via conda)#
+# python script.py /path/settings_file /babeldir/path
+
+fin = open(sys.argv[1], "r")
+fout = open("mysettings_galaxy.ini", "wt")
+
+for line in fin:
+    if "babeldir:" in line and "#" not in line:
+        fout.write("babeldir: '" + sys.argv[2] + "'\n")
+    else:
+        fout.write(line)
+fin.close()
+fout.close()
b
diff -r 000000000000 -r 0b28816c1c2c createSettingsFile.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/createSettingsFile.py Thu May 18 13:01:04 2023 +0000
[
@@ -0,0 +1,81 @@
+import sys
+
+# Script that creates a .ini settings file for RMassBank pipeline
+# Intended to be wrapped as a tool to Galaxy (in docker) to create users of RMassBank the mysettings.ini file -> very "stupid" script, does not check correctness of user's input, that will do Galaxy XML wrapper
+# INPUT: mysettings.ini (default file downloaded via R package RMassBank, will be stored in the docker)
+#
+# python script.py mysettings.ini 0.34 0.34 34 FALSE FALSE 34 34 34 34 34 0.34 dmz none 34 34 34 34 34 34 34 34 34 34 34 0.34 0.34 0.34 34 0.34 TRUE /babeldir/path FALSE charged
+
+fin = open(sys.argv[1], "r")
+fout = open("mysettings_galaxy.ini", "wt")
+for line in fin:
+    if "rtMargin:" in line:
+        fout.write("rtMargin: " + sys.argv[2] + "\n")
+    elif "rtShift:" in line:
+        fout.write("rtShift: " + sys.argv[3] + "\n")
+    elif "babeldir:" in line and "#" not in line:
+        fout.write("babeldir: '" + sys.argv[32] + "'\n")
+    elif "use_version:" in line:
+        fout.write("use_version: " + sys.argv[4] + "\n")
+    elif "use_rean_peaks:" in line:
+        fout.write("use_rean_peaks: " + sys.argv[5] + "\n")
+    elif "add_annotation:" in line:
+        fout.write("add_annotation: " + sys.argv[6] + "\n")
+    elif "include_sp_tags:" in line:
+        fout.write("include_sp_tags: " + sys.argv[33] + "\n")
+    elif "pH:" in line:
+        fout.write("    pH: " + sys.argv[7] + " # [M+H]+: Accession numbers 1-14\n")
+    elif "pM:" in line:
+        fout.write("    pM: " + sys.argv[8] + " # [M]+: 17-30\n")
+    elif "pNa:" in line:
+        fout.write("    pNa: " + sys.argv[9] + " # [M+Na]+: 33-46\n")
+    elif "mH:" in line:
+        fout.write("    mH: " + sys.argv[10] + " # [M-H]-: 51-64\n")
+    elif "mFA:" in line:
+        fout.write("    mFA: " + sys.argv[11] + " # [M+FA]-: 67-80\n")
+    elif "electronicNoiseWidth:" in line:
+        fout.write("electronicNoiseWidth: " + sys.argv[12] + "\n")
+    elif "recalibrateBy:" in line:
+        fout.write("recalibrateBy: " + sys.argv[13] + "\n")
+    elif "recalibrateMS1:" in line:
+        fout.write("recalibrateMS1: " + sys.argv[14] + "\n")
+    elif "recalibrateMS1Window:" in line:
+        fout.write("recalibrateMS1Window: " + sys.argv[15] + "\n")
+    elif "multiplicityFilter:" in line:
+        fout.write("multiplicityFilter: " + sys.argv[16] + "\n")
+    elif "ppmHighMass:" in line:
+        fout.write("    ppmHighMass: " + sys.argv[17] + "\n")
+    elif "ppmLowMass:" in line:
+        fout.write("    ppmLowMass: " + sys.argv[18] + "\n")
+    elif "massRangeDivision:" in line:
+        fout.write("    massRangeDivision: " + sys.argv[19] + "\n")
+    elif "ppmFine:" in line:
+        fout.write("    ppmFine: " + sys.argv[20] + "\n")
+    elif "prelimCut:" in line:
+        fout.write("    prelimCut: " + sys.argv[21] + "\n")
+    elif "prelimCutRatio:" in line:
+        fout.write("    prelimCutRatio: " + sys.argv[22] + "\n")
+    elif "fineCut:" in line:
+        fout.write("    fineCut: " + sys.argv[23] + "\n")
+    elif "fineCutRatio:" in line:
+        fout.write("    fineCutRatio: " + sys.argv[24] + "\n")
+    elif "specOkLimit:" in line:
+        fout.write("    specOkLimit: " + sys.argv[25] + "\n")
+    elif "dbeMinLimit:" in line:
+        fout.write("    dbeMinLimit: " + sys.argv[26] + "\n")
+    elif "satelliteMzLimit:" in line:
+        fout.write("    satelliteMzLimit: " + sys.argv[27] + "\n")
+    elif "satelliteIntLimit:" in line:
+        fout.write("    satelliteIntLimit: " + sys.argv[28] + "\n")
+    elif "ppmFine:" in line:
+        fout.write("    ppmFine: " + sys.argv[29] + "\n")
+    elif "mzCoarse:" in line:
+        fout.write("    mzCoarse: " + sys.argv[30] + "\n")
+    elif "fillPrecursorScan:" in line:
+        fout.write("    fillPrecursorScan: " + sys.argv[31] + "\n")
+    elif "unknownMass:" in line:
+        fout.write("unknownMass: " + sys.argv[34] + "\n")
+    else:
+        fout.write(line)
+fin.close()
+fout.close()
b
diff -r 000000000000 -r 0b28816c1c2c macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu May 18 13:01:04 2023 +0000
[
@@ -0,0 +1,83 @@
+<macros>
+    <token name="@TOOL_VERSION@">3.0.0</token>
+
+    <xml name="creator">
+        <creator>
+            <person
+                givenName="KarolĂ­na"
+                familyName="Trachtová"
+                url="https://github.com/trachtok" />
+            <organization
+                url="https://www.recetox.muni.cz/"
+                email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"
+                name="RECETOX MUNI" />
+        </creator>
+    </xml>
+
+    <token name="@HELP@">
+        <![CDATA[
+        RMassBank workflow - part I and II
+            This is implementation of RMassBank workflow (Bioconductor R package) based on publication by Stravs et al. 2013.
+
+            Original RMassBank package is used to process tandem MS files and build MassBank records. Functions include
+            automated extraction of tandem MS spectra, formula assignment to tandem MS fragments, recalibration of tandem
+            MS spectra with assigned fragments, spectrum cleanup, automated retrieval of compound information from Internet
+            databases, and export to MassBank records.
+
+            This script is a full pipeline of RmassBank that loads settings file, compounds list, infolist and input .mzML
+            files (one or multiple) and goes through all 8 steps of data-preprocessing referenced in RMassBank's
+            Bioconductor manual (Mass spectrometry workflow, function msmsWorkflow()) and all steps of MassBank record
+            generation (MassBank record workflow, function mbWorkflow()). To make it easier for user, based on template
+            of settings required by RMassBank package the corresponding settings file is created as well
+            (see section Settings file).
+
+            User's inputs:
+                - *compounds list* **(must have suffix .csv)** - separator must be ',', required columns: ID, Name_deu, Name_Eng, Name, SMILES, RT, CAS
+                - *mode* - either **pH** (positive H) specifies [M+H]+ OR **pNa** specifies [M+Na]+ OR **pM** specifies [M]+ OR **mFA** specify [M-H]- and [M+FA]-
+                - *input mzML files* - files for RMassBank package must have specific naming, each filename should be in format: xxxxxxxx_1234_xxx.mzXML where the xxx denotes anything and 1234 is compound ID (must be stated in ID column of compound csv file).
+                - *infolist* **(must have suffix .csv)** - separator must be ',', columns (required columns are **bold**): '' (column name empty, values are numbers from 1 to n=number of chemicals), OK, id, dbcas, dbname_d, dbname_e, dataused, COMMENT.CONFIDENCE, COMMENT.EAWAG_UCHEM_ID, **CH$NAME1**, CH$NAME2, CH$NAME3, CH$COMPOUND_CLASS, **CH$FORMULA**, **CH$EXACT_MASS**, **CH$SMILES**, **CH$IUPAC**, CH$LINK.CAS, CH$LINK.CHEBI, CH$LINK.HMDB, CH$LINK.KEGG, CH$LINK.LIPIDMAPS, CH$LINK.PUBCHEM, **CH$LINK.INCHIKEY**, CH$LINK.CHEMSPIDER
+                - *settings parameters* - either filled in Galaxy or the whole settings file can be uploaded and used to run the RMassBank workflow
+
+            Galaxy outputs:
+                - CSV file with failed peaks (empty if there were no failed peaks)
+                - PDF plot showing performance of the recalibration
+                - Full R environment (.RData) that should be used as an input for RMassBank workflow - part II
+                - R environment with re-analyzed spectra (_RA.RData, see RMassBank manual for more information)
+                - R environment with refiltered spectra (_RF.RData, see RMassBank manual for more information)
+                - *zipped folder* with records ready to be uploaded to MassBank database
+                - *settings file* used to run the RMassBank
+
+                For more information about the data-preprocessing and record generation of the RMassBank pipeline, please refer
+                to `Bioconductor manual <https://www.bioconductor.org/packages/release/bioc/html/RMassBank.html>`_ for RMassBank.
+
+            Settings file
+                In the original RMassBank R package, user has to specify multiple parameters via the settings file. To make
+                life easier for users of this tool, most of the parameters can be specified in the Galaxy intrface for this
+                tool and are automatically filled in the settings template that can is then used for RMassBank workflow.
+                If the user wants to supply his/her own settings files, that is also possible. Path to OpenBabel will be
+                automatically filled no matter what is the original specification in the supplied settings file.
+
+                For all parameters, please refer to the manual for RMassBank at `Bioconductor website
+                <https://www.bioconductor.org/packages/release/bioc/html/RMassBank.html>`_ .
+
+            Parameters that can only be changed manually are:
+                - *deprofile:* leave empty if input data are already in centroid mode (usuall case), otherwise use one of the values from deprofile.spline, deprofile.fwhm or deprofile.localMax to convert the input $
+                - *babeldir:* do not change this if the settings file is to be further used in Galaxy
+                - *annotations:* several fields containg information about author, used instrument etc.
+                - *spectraList:* list of data-dependent scans in their order (relative to the parent scan), for annotation of the MassBank records
+                - *electronicNoise:* list of known electronic noise peaks
+                - *recalibrator:* custom recalibration function
+                - *titleFormat:* defines the title format
+
+                **All parameters are more thoroughly described in the created settings file, which can be edited in any simple
+                text editor.**
+
+                **NOTE:** In case of manual editing of the settings file, do not indent with TAB characters! Use only spaces.
+
+        Authors of RMassBank: Michael Stravs, Emma Schymanski, Steffen Neumann, Erik Mueller, with contributions
+        from Tobias Schulze.
+
+        RMassBank maintainer: at Eawag (massbank@eawag.ch)
+        ]]>
+    </token>
+</macros>
b
diff -r 000000000000 -r 0b28816c1c2c mysettings.ini
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mysettings.ini Thu May 18 13:01:04 2023 +0000
[
@@ -0,0 +1,236 @@
+# Sample configuration file for RMassBank.
+# Adapt this file to your needs.
+# NOTE: Do not indent with TAB characters! Use only spaces.
+# (If your editor converts TAB to a certain number of spaces, it's OK.)
+# Use a space after the colon.
+
+# Deprofile input data?
+# Leave empty if input data is already in "centroid" mode.
+# Use values deprofile.spline, deprofile.fwhm or deprofile.localMax to convert the input data with the
+# corresponding algorithm. See ?deprofile
+deprofile: 
+
+# Deviation (in minutes) allowed the for retention time
+rtMargin: 0.4
+# Systematic retention time shift
+rtShift: 0.0
+
+# Directory to OpenBabel. Required for creating molfiles for MassBank export.
+# If no OpenBabel directory is given, RMassBank will attempt to use the CACTUS webservice
+# for SDF generation. You really should install OpenBabel though; the CACTUS structures
+# have explicit hydrogen atoms...
+# Points to the directory where babel.exe (or the Linux "babel" equivalent) lies.
+babeldir:
+
+# Which MassBank record version to use; version 2 is advised.
+use_version: 2
+
+# Include reanalyzed peaks?
+use_rean_peaks: TRUE
+
+# annotate the spectra files with (putative) molecular formulas for fragments?
+add_annotation: TRUE
+
+# Annotations for the spectrum:
+annotations:
+    # Author etc. annotation
+    authors: Nomen Nescio, The Unseen University
+    copyright: Copyright (C) XXX
+    publication: 
+    license: CC BY
+    instrument: LTQ Orbitrap XL Thermo Scientific
+    instrument_type: LC-ESI-ITFT
+    confidence_comment: standard compound
+    compound_class: N/A; Environmental Standard
+    internal_id_fieldname: INTERNAL_ID
+    #
+    # HPLC annotations:
+    #
+    # example: lc_gradient: 90/10 at 0 min, 50/50 at 4 min, 5/95 at 17 min, 5/95 at 25 min, 90/10 at 25.1 min, 90/10 at 30 min
+    lc_gradient: 
+    # example: lc_flow: 200 uL/min
+    lc_flow: 
+    # example: lc_solvent_a: water with 0.1% formic acid
+    lc_solvent_a: 
+    lc_solvent_b: 
+    # example: lc_column: XBridge C18 3.5um, 2.1x50mm, Waters
+    lc_column: 
+    # Prefix for MassBank accession IDs
+    entry_prefix: XX
+    ms_type: MS2
+    ionization: ESI
+    ms_dataprocessing:
+        RECALIBRATE: loess on assigned fragments and MS1
+
+include_sp_tags: FALSE
+
+# Annotator:
+# by default, "annotator.default" is used.
+# If you want to build your custom annotator (check ?annotator.default and the source code),
+# select it here by using e.g.
+# annotator: annotator.myown
+# for a function annotator.myown(annotation)
+
+# List of data-dependent scans in their order (relative to the parent scan), for annotation of the MassBank records
+# For every data-dependent scan event, specify an element with:
+# mode: fragmentation mode, e.g. CID
+# ces: "short" format collision energy (for record title)
+# ce: "long" format collision energy (for annotation field)
+# res: FT resolution
+spectraList:
+ # First scan: CID 35% NCE, resolution 7500 
+- mode: CID
+  ces: 35%
+  ce: 35 % (nominal)
+  res: 7500
+ # Second scan: HCD 15% NCE, resolution 7500
+- mode: HCD
+  ces: 15%
+  ce: 15 % (nominal)
+  res: 7500
+ # Third scan, etc.
+- mode: HCD
+  ces: 30%
+  ce: 30 % (nominal)
+  res: 7500
+- mode: HCD
+  ces: 45%
+  ce: 45 % (nominal)
+  res: 7500
+- mode: HCD
+  ces: 60%
+  ce: 60 % (nominal)
+  res: 7500
+- mode: HCD
+  ces: 75%
+  ce: 75 % (nominal)
+  res: 7500
+- mode: HCD
+  ces: 90%
+  ce: 90 % (nominal)
+  res: 7500
+- mode: HCD
+  ces: 15%
+  ce: 15 % (nominal)
+  res: 15000
+- mode: HCD
+  ces: 30%
+  ce: 30 % (nominal)
+  res: 15000
+- mode: HCD
+  ces: 45%
+  ce: 45 % (nominal)
+  res: 15000
+- mode: HCD
+  ces: 60%
+  ce: 60 % (nominal)
+  res: 15000
+- mode: HCD
+  ces: 75%
+  ce: 75 % (nominal)
+  res: 15000
+- mode: HCD
+  ces: 90%
+  ce: 90 % (nominal)
+  res: 15000
+- mode: CID
+  ces: 35%
+  ce: 35 % (nominal)
+  res: 15000
+
+# Shifts of the starting points for RMassBank accession numbers.
+# Change these if you measure different adducts 
+accessionNumberShifts:
+    pH: 0 # [M+H]+: Accession numbers 1-14
+    pM: 16 # [M]+: 17-30
+    pNa: 32 # [M+Na]+: 33-46
+    mH: 50 # [M-H]-: 51-64
+    mFA: 66 # [M+FA]-: 67-80
+
+# A list of known electronic noise peaks
+electronicNoise:
+- 189.825
+- 201.725
+- 196.875
+# Exclusion width of electronic noise peaks (from unmatched peaks, prior to
+# reanalysis)
+electronicNoiseWidth: 0.3
+
+# recalibration settings:
+# recalibrate by: dppm or dmz
+recalibrateBy: dppm
+
+# recalibrate MS1:
+# separately (separate)
+# with common curve (common)
+# do not recalibrate (none)
+recalibrateMS1: common
+# Window width to look for MS1 peaks to recalibrate (in ppm)
+recalibrateMS1Window: 15
+
+# Custom recalibration function: You can overwrite the recal function by
+# making any function which takes rcdata$recalfield ~ rcdata$mzFound.
+# The settings define which recal function is used.
+# Note: if recalibrateMS1 is "common", the setting "recalibrator: MS1" is meaningless
+# because the MS1 points will be recalibrated together with the MS2 points with 
+# the MS2 recalibration function.
+recalibrator:
+    MS1: recalibrate.loess
+    MS2: recalibrate.loess
+
+# Define the multiplicity filtering level
+# Default is 2 (peak occurs at least twice)
+# Set this to 1 if you want to turn this option off.
+# Set this to anything > 2 if you want harder filtering
+multiplicityFilter: 2
+
+# Define the title format.
+# You can use all entries from MassBank records as tokens
+# plus the additional token RECORD_TITLE_CE, which is a shortened
+# version of the collision energy specifically for use in the title.
+# Every line is one entry and must have one token in curly brackets
+# e.g. {CH$NAME} or {AC$MASS_SPECTROMETRY: MS_TYPE} plus optionally
+# additional text in front or behind e.g.
+# R={AC$MASS_SPECTROMETRY: RESOLUTION}
+# If this is not specified, it defaults to a title of the format
+# "Dinotefuran; LC-ESI-QFT; MS2; CE: 35%; R=35000; [M+H]+"
+# Note how everything must be in "" here because otherwise the : are getting mangled!
+titleFormat:
+- "{CH$NAME}"
+- "{AC$INSTRUMENT_TYPE}"
+- "{AC$MASS_SPECTROMETRY: MS_TYPE}"
+- "CE: {RECORD_TITLE_CE}"
+- "R={AC$MASS_SPECTROMETRY: RESOLUTION}"
+- "{MS$FOCUSED_ION: PRECURSOR_TYPE}"
+
+# Define filter settings.
+# For Orbitrap, settings of 15 ppm in low mass range, 10 ppm in high
+# mass range, m/z = 120 as mass range division and 5 ppm for recalibrated
+# data overall are recommended. 
+filterSettings:
+    ppmHighMass: 10
+    ppmLowMass: 15
+    massRangeDivision: 120
+    ppmFine: 5
+    prelimCut: 1000
+    prelimCutRatio: 0
+    fineCut: 0
+    fineCutRatio: 0
+    specOkLimit: 1000
+    dbeMinLimit: -0.5
+    satelliteMzLimit: 0.5
+    satelliteIntLimit: 0.05
+    
+ # Define raw MS retrieval settings.
+findMsMsRawSettings:
+    ppmFine: 10
+    mzCoarse: 0.5
+    # fillPrecursorScan is FALSE for "good" mzML files which have all the info needed.
+    # However, for example AB Sciex files will have missing precursor scan information,
+    # in which case fillPrecursorScan = TRUE is needed. Try it out.
+    fillPrecursorScan: FALSE
+    
+# Select how to treat unknown compound masses: 
+# "charged" (the default, also if no option set) treats unknown (level 5) compound masses as the m/z,
+# "neutral" treats unknown (level 5) compound masses as the neutral mass and applies [M+H]+ and [M-H]- calculations accordingly.
+unknownMass: charged
b
diff -r 000000000000 -r 0b28816c1c2c rmassbank.r
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rmassbank.r Thu May 18 13:01:04 2023 +0000
[
@@ -0,0 +1,84 @@
+#!/usr/bin/env Rscript
+# Script for running RMassBank pipeline:
+#   - part1 (all 8 steps at once)
+#   - part2 (generation of MassBank records)
+#
+# Author: Karolina Trachtova (k.trachtova@gmail.com)
+# Original authors of RMassBank: Michael Stravs, Emma Schymanski,
+# Steffen Neumann, Erik Mueller, with contributions from Tobias Schulze.
+#
+# INPUT1: settings list
+# INPUT2: compounds csv list
+# INPUT3: mode
+# INPUT4: folder with infolists
+# INPUT5: one or more mzML files
+#
+# RUN:
+# Rscript rmassbank_galaxy_part1.r mysettings.ini Compoundlist.csv pH \
+# /path/to/files/1_3_Chlorophenyl_piperazin_2818_pos.mzML \
+# /path/to/files/1_3_Trifluoromethylphenyl_piperazin_2819_pos.mzML
+#############################################################
+# Load libraries
+rm(list = ls(all = TRUE))
+
+suppressMessages(library("RMassBank", warn.conflicts = TRUE, quietly = TRUE))
+
+#############################################################
+# Read arguments
+args <- commandArgs(trailingOnly = TRUE)
+
+stt <- args[1] #file with settings
+cmp <- args[2] #csv file with compounds
+md <- args[3] #mode
+inf <- args[4] #folder with csv infolist
+files <- (args[5:length(args)]) #one or multiple mzML files
+
+print(paste0("Used settings file: ", stt))
+print(paste0("Used compound list: ", cmp))
+print(paste0("RMassBank pipeline will be run in mode: ", md))
+print(paste0("Input files: ", files))
+
+#############################################################
+## PART I
+#
+# Preparing environment for running RMassBank
+print("Preparing environment for part I ...")
+
+## Load file with settings
+loadRmbSettings(stt)
+
+## create a workspace
+w <- newMsmsWorkspace()
+
+## Load compound list
+loadList(cmp)
+
+## Load input files
+w@files <- files
+
+# Running RMassBank pipeline part I
+print("Running RMassBank pipeline part 1 - all 8 steps...")
+
+prf <- c("results")
+w <- msmsWorkflow(w, mode = md, steps = c(1:8), archivename = prf)
+
+## Part II
+#
+# Preparing environment for running RMassBank
+print("Preparing environment for part II ...")
+
+mb <- newMbWorkspace(w)
+
+loadList(cmp)
+
+loadRmbSettings(stt)
+
+print("Loading infolist...")
+
+mb <- resetInfolists(mb)
+
+mb <- loadInfolists(mb, inf)
+
+print("Running RMassBank pipeline for generation of MassBank records...")
+
+mb <- mbWorkflow(mb)
b
diff -r 000000000000 -r 0b28816c1c2c rmassbank.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rmassbank.xml Thu May 18 13:01:04 2023 +0000
[
b'@@ -0,0 +1,232 @@\n+<tool id="rmassbank" name="RMassBank" version="@TOOL_VERSION@+galaxy2">\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+    <expand macro="creator"/>\n+\n+    <requirements>\n+        <requirement type="package" version="3.7">python</requirement>\n+        <requirement type="package" version="2.4.1">openbabel</requirement>\n+        <requirement type="package" version="3.0.0">bioconductor-rmassbank</requirement>\n+        <requirement type="package" version="3.1.1">r-gplots</requirement>\n+        <requirement type="package" version="3.0">zip</requirement>\n+    </requirements>\n+    <stdio>\n+        <exit_code range="1:"/>\n+        <exit_code range=":-1"/>\n+    </stdio>\n+    <command detect_errors="aggressive"><![CDATA[\n+         #for $infile in $param_in:\n+            ln -s \'${infile}\' \'${infile.element_identifier}\' &&\n+         #end for\n+\n+         ln -s \'${compounds}\' \'${compounds.element_identifier}\' &&\n+\n+         mkdir -p ./infolists &&\n+         ln -s $infolist ./infolists/${infolist.element_identifier} &&\n+\n+         #if $select_settings.setting_file == "template"\n+           python3 \'${__tool_directory__}/createSettingsFile.py\'\n+           \'${__tool_directory__}/mysettings.ini\'\n+           \'${select_settings.general_param.rtMargin}\'\n+           \'${select_settings.general_param.rtShift}\'\n+           \'${select_settings.general_param.use_version}\'\n+           \'${select_settings.general_param.use_rean_peaks}\'\n+           \'${select_settings.general_param.add_annotation}\'\n+           \'${select_settings.shifts.pH}\'\n+           \'${select_settings.shifts.pM}\'\n+           \'${select_settings.shifts.pNa}\'\n+           \'${select_settings.shifts.mH}\'\n+           \'${select_settings.shifts.mFA}\'\n+           \'${select_settings.noise_recalibration.electronicNoiseWidth}\'\n+           \'${select_settings.noise_recalibration.recalibrateBy}\'\n+           \'${select_settings.noise_recalibration.recalibrateMS1}\'\n+           \'${select_settings.noise_recalibration.recalibrateMS1Window}\'\n+           \'${select_settings.noise_recalibration.multiplicityFilter}\'\n+           \'${select_settings.data_filtering.ppmHighMass}\'\n+           \'${select_settings.data_filtering.ppmLowMass}\'\n+           \'${select_settings.data_filtering.massRangeDivision}\'\n+           \'${select_settings.data_filtering.ppmFine}\'\n+           \'${select_settings.data_filtering.prelimCut}\'\n+           \'${select_settings.data_filtering.prelimCutRatio}\'\n+           \'${select_settings.data_filtering.fineCut}\'\n+           \'${select_settings.data_filtering.fineCutRatio}\'\n+           \'${select_settings.data_filtering.specOkLimit}\'\n+           \'${select_settings.data_filtering.dbeMinLimit}\'\n+           \'${select_settings.data_filtering.satelliteMzLimit}\'\n+           \'${select_settings.data_filtering.satelliteIntLimit}\'\n+           \'${select_settings.raw_settings.ppmFine}\'\n+           \'${select_settings.raw_settings.mzCoarse}\'\n+           \'${select_settings.raw_settings.fillPrecursorScan}\'\n+           \\$(echo `which babel` | sed s/babel//)\n+           \'${select_settings.general_param.include_sp_tags}\'\n+           \'${select_settings.raw_settings.unknownMass}\'\n+           &&\n+         #else\n+           python3 \'${__tool_directory__}/addBabelPath.py\'\n+           \'${select_settings.setting_file_ext}\'\n+           \\$(echo `which babel` | sed s/babel//)\n+           &&\n+         #end if\n+\n+         Rscript \'${__tool_directory__}/rmassbank.r\'\n+          mysettings_galaxy.ini\n+          \'${compounds.element_identifier}\'\n+          \'${mode}\'\n+          ./infolists\n+          #for $infile in $param_in:\n+            \'${infile.element_identifier}\'\n+          #end for\n+         &&\n+\n+         mv mysettings_galaxy.ini $outputINI &&\n+         mv results_RA.RData $rafile &&\n+         mv results_RF.RData $rffile &&\n+         mv Rplots.pdf $pdfplot &&\n+         mv results.RData $fllrdata &&\n+         zip -r mb_records.zip XX &&\n+         mv mb_records.zip $output &&\n+\n+         [ ! -f'..b'ision" type="integer" value="120" label="massRangeDivision"/>\n+                    <param name="ppmFine" type="integer" value="5" label="ppmFine"/>\n+                    <param name="prelimCut" type="integer" value="1000" label="prelimCut"/>\n+                    <param name="prelimCutRatio" type="float" value="0.0" label="prelimCutRatio"/>\n+                    <param name="fineCut" type="float" value="0.0" label="fineCut"/>\n+                    <param name="fineCutRatio" type="float" value="0.0" label="fineCutRatio"/>\n+                    <param name="specOkLimit" type="integer" value="1000" label="specOkLimit"/>\n+                    <param name="dbeMinLimit" type="float" value="-0.5" label="dbeMinLimit"/>\n+                    <param name="satelliteMzLimit" type="float" value="0.5" label="satelliteMzLimit"/>\n+                    <param name="satelliteIntLimit" type="float" value="0.05" label="satelliteIntLimit"/>\n+                </section>\n+\n+                <section name="raw_settings" title="Degine raw MS retrieval settings">\n+                    <param name="ppmFine" type="integer" value="10" label="ppmFine"/>\n+                    <param name="mzCoarse" type="float" value="0.5" label="mzCoarse"/>\n+                    <param name="fillPrecursorScan" type="select" value="FALSE"\n+                           label="fillPrecursorScan (FALSE for \'good\' mzML files which have all the info needed)">\n+                        <option value="TRUE">TRUE</option>\n+                        <option value="FALSE" selected="true">FALSE</option>\n+                    </param>\n+                    <param name="unknownMass" type="select" value="charged"\n+                           label="Select how to treat unknown compound masses.">\n+                        <option value="charged" selected="true">charged</option>\n+                        <option value="neutral">neutral</option>\n+                    </param>\n+                </section>\n+            </when>\n+            <when value="external">\n+                <param name="setting_file_ext" type="data" format="txt" label="Settings file"/>\n+            </when>\n+        </conditional>\n+\n+    </inputs>\n+\n+    <outputs>\n+        <data format="txt" name="outputINI" label="mysettings_galaxy.ini"/>\n+        <data format="csv" name="csvfile" label="CSV file with failed peaks."/>\n+        <data format="pdf" name="pdfplot" label="PDF plot."/>\n+        <data format="rdata" name="fllrdata" label="Full R environment."/>\n+        <data format="rdata" name="rafile" label="RData file with re-analyzed spectra (_RA.RData)."/>\n+        <data format="rdata" name="rffile" label="RData file with refiltered spectra (_RF.RData)."/>\n+        <data format="zip" name="output" label="Zipped folder with results."/>\n+    </outputs>\n+    <tests>\n+        <test>\n+            <param name="compounds" value="compoundList.csv"/>\n+            <param name="infolist" value="infolist.csv"/>\n+            <param name="mode" value="pH"/>\n+            <param name="param_in" value="1_3_Trifluoromethylphenyl_piperazin_2819_pos.mzML"/>\n+            <param name="ppmFine" value="5"/>\n+\n+            <output name="outputINI" value="mysettings_galaxy.ini" ftype="txt" lines_diff="2"/>\n+            <output name="csvfile" value="failedPeaks.csv" ftype="csv" lines_diff="1"/>\n+            <output name="pdfplot" value="plot.pdf" compare="sim_size" delta_frac="0.1"/>\n+            <output name="fllrdata" value="resultsFull.RData" compare="sim_size" delta_frac="0.1"/>\n+            <output name="rafile" value="results_RA.RData" compare="sim_size" delta_frac="0.1"/>\n+            <output name="rffile" value="results_RF.RData" compare="sim_size" delta_frac="0.1"/>\n+            <output name="output" value="output.zip" compare="sim_size" delta_frac="0.1"/>\n+        </test>\n+    </tests>\n+\n+    <help>\n+        <![CDATA[\n+        @HELP@\n+        ]]>\n+    </help>\n+\n+    <citations>\n+        <citation type="doi">10.1002/jms.3131</citation>\n+    </citations>\n+</tool>\n'
b
diff -r 000000000000 -r 0b28816c1c2c test-data/1_3_Trifluoromethylphenyl_piperazin_2819_pos.mzML
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/1_3_Trifluoromethylphenyl_piperazin_2819_pos.mzML Thu May 18 13:01:04 2023 +0000
b
b'@@ -0,0 +1,171753 @@\n+<?xml version="1.0" encoding="ISO-8859-1"?>\n+<indexedmzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.1_idx.xsd">\n+  <mzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd" id="1_3_Trifluoromethylphenyl_piperazin_2819_pos" version="1.1.0">\n+    <cvList count="2">\n+      <cv id="MS" fullName="Proteomics Standards Initiative Mass Spectrometry Ontology" version="3.25.0" URI="http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo"/>\n+      <cv id="UO" fullName="Unit Ontology" version="12:10:2011" URI="http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo"/>\n+    </cvList>\n+    <fileDescription>\n+      <fileContent>\n+        <cvParam cvRef="MS" accession="MS:1000579" name="MS1 spectrum" value=""/>\n+        <cvParam cvRef="MS" accession="MS:1000580" name="MSn spectrum" value=""/>\n+      </fileContent>\n+      <sourceFileList count="1">\n+        <sourceFile id="RAW1" name="1_3_Trifluoromethylphenyl_piperazin_2819_pos.RAW" location="file:///D:/Uchem/Pestizide/Screening/111115_pos_standards-single">\n+          <cvParam cvRef="MS" accession="MS:1000768" name="Thermo nativeID format" value=""/>\n+          <cvParam cvRef="MS" accession="MS:1000563" name="Thermo RAW file" value=""/>\n+          <cvParam cvRef="MS" accession="MS:1000569" name="SHA-1" value="671f6d325a8def25e59cd7a73d9581d7c9d51960"/>\n+        </sourceFile>\n+      </sourceFileList>\n+    </fileDescription>\n+    <referenceableParamGroupList count="1">\n+      <referenceableParamGroup id="CommonInstrumentParams">\n+        <cvParam cvRef="MS" accession="MS:1000556" name="LTQ Orbitrap XL" value=""/>\n+        <cvParam cvRef="MS" accession="MS:1000529" name="instrument serial number" value="SN01040B"/>\n+      </referenceableParamGroup>\n+    </referenceableParamGroupList>\n+    <softwareList count="2">\n+      <software id="Xcalibur" version="2.5.5">\n+        <cvParam cvRef="MS" accession="MS:1000532" name="Xcalibur" value=""/>\n+      </software>\n+      <software id="pwiz" version="3.0.3691">\n+        <cvParam cvRef="MS" accession="MS:1000615" name="ProteoWizard" value=""/>\n+      </software>\n+    </softwareList>\n+    <instrumentConfigurationList count="2">\n+      <instrumentConfiguration id="IC1">\n+        <referenceableParamGroupRef ref="CommonInstrumentParams"/>\n+        <componentList count="3">\n+          <source order="1">\n+            <cvParam cvRef="MS" accession="MS:1000073" name="electrospray ionization" value=""/>\n+            <cvParam cvRef="MS" accession="MS:1000057" name="electrospray inlet" value=""/>\n+          </source>\n+          <analyzer order="2">\n+            <cvParam cvRef="MS" accession="MS:1000484" name="orbitrap" value=""/>\n+          </analyzer>\n+          <detector order="3">\n+            <cvParam cvRef="MS" accession="MS:1000624" name="inductive detector" value=""/>\n+          </detector>\n+        </componentList>\n+        <softwareRef ref="Xcalibur"/>\n+      </instrumentConfiguration>\n+      <instrumentConfiguration id="IC2">\n+        <referenceableParamGroupRef ref="CommonInstrumentParams"/>\n+        <componentList count="3">\n+          <source order="1">\n+            <cvParam cvRef="MS" accession="MS:1000073" name="electrospray ionization" value=""/>\n+            <cvParam cvRef="MS" accession="MS:1000057" name="electrospray inlet" value=""/>\n+          </source>\n+          <analyzer order="2">\n+            <cvParam cvRef="MS" accession="MS:1000083" name="radial ejection linear ion trap" value=""/>\n+          </analyzer>\n+          <detector order="3">\n+            <cvParam cvRef="MS" accession="MS:1000253" name="electron multiplier" value=""/>\n+          </detector>\n+        </componentList>\n'..b'+      <offset idRef="controllerType=0 controllerNumber=1 scan=2797">17135457</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2798">17143643</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2799">17152119</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2800">17160326</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2801">17168734</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2802">17177055</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2803">17185189</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2804">17193302</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2805">17201533</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2806">17209854</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2807">17218073</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2808">17226454</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2809">17234588</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2810">17242596</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2811">17250763</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2812">17259099</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2813">17267438</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2814">17275750</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2815">17284061</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2816">17292167</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2817">17300114</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2818">17308370</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2819">17316579</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2820">17324723</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2821">17332813</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2822">17340842</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2823">17349000</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2824">17357085</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2825">17365302</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2826">17373492</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2827">17381668</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2828">17389946</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2829">17398078</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2830">17406201</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2831">17414521</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2832">17422793</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2833">17431172</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2834">17439356</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2835">17447561</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2836">17455660</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2837">17463812</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2838">17471952</offset>\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=2839">17480097</offset>\n+    </index>\n+    <index name="chromatogram">\n+      <offset idRef="TIC">17488278</offset>\n+    </index>\n+  </indexList>\n+  <indexListOffset>17531661</indexListOffset>\n+  <fileChecksum>21efedc54a774023afcdef3203b929eb93bce540</fileChecksum>\n+</indexedmzML>\n'
b
diff -r 000000000000 -r 0b28816c1c2c test-data/compoundList.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/compoundList.csv Thu May 18 13:01:04 2023 +0000
[
@@ -0,0 +1,16 @@
+ID,Name_deu,Name_Eng,Name,SMILES,RT,CAS
+2817,Kokain,Cocaine,Cocaine,[C@H]1([C@@H]2N([C@@H](C[C@@H]1OC(=O)c1ccccc1)CC2)C)C(=O)OC,5.15,50-36-2
+2818,1-(3-Chlorophenyl)-piperazin,1-(3-Chlorophenyl)piperazine,1-(3-Chlorophenyl)piperazine,c1c(Cl)cccc1N1CCNCC1,5.25,6640-24-0
+2819,1-(3-Trifluoromethylphenyl)-piperazin,1-(3-Trifluoromethylphenyl)-piperazine,1-(3-Trifluoromethylphenyl)-piperazine,c1c(C(F)(F)F)cccc1N1CCNCC1,6.05,15532-75-9
+2820,1-Benzylpiperazin,1-Benzylpiperazine,1-Benzylpiperazine,C1CN(CCN1)Cc1ccccc1,1.64,2759-28-6
+2821,Amitriptylin,Amitriptyline,Amitriptyline,C1(\c2c(CCc3c1cccc3)cccc2)=C\CCN(C)C,8.65,50-48-6
+2822,Amphetamin,Amphetamine,Amphetamine,c1(ccccc1)CC(N)C,3.7,300-62-9
+2823,Benzoylecgonin,Benzoylecgonine,Benzoylecgonine,O(C(=O)c1ccccc1)[C@@H]1[C@@H]([C@@H]2N([C@@H](C1)CC2)C)C(=O)O,4.7,519-09-5
+2824,Dextromethorphan,Dextromethorphan,Dextromethorphan,c12[C@]34[C@@H]([C@@H]([N@@](C)CC3)Cc1ccc(c2)OC)CCCC4,6.7,125-71-3
+2825,"EDDP (2-Ethylidene-1,5-dimethyl-3,3-diphenylpyrrolidin)","EDDP (2-Ethylidene-1,5-dimethyl-3,3-diphenylpyrrolidin)","EDDP (2-Ethylidene-1,5-dimethyl-3,3-diphenylpyrrolidin)",C1[C@@H](C)N(C)C(\C1(c1ccccc1)c1ccccc1)=C\C,6.65,30223-73-5
+2826,Ketamin,Ketamine,Ketamine,c1([C@@]2(C(CCCC2)=O)NC)c(cccc1)Cl,4.7,6740-88-1
+2827,Mephedron (4-Methylmethcathinon),Mephedrone (4-Methylmethcathinone),Mephedrone (4-Methylmethcathinone),Cc1ccc(cc1)C(=O)C(C)NC,4.45,1189805-46-6
+2828,Methadon,Methadone,Methadone,CCC(=O)C(CC(C)N(C)C)(c1ccccc1)c2ccccc2,8.35,76-99-3
+2829,Methamphetamin,Methamphetamine,Methamphetamine,c1cccc(c1)C[C@H](C)NC,3.85,537-46-2
+2830,Naltrexon,Naltrexone,Naltrexone,c12[C@]34[C@@]5([C@H]([N@](CC6CC6)CC4)Cc2ccc(c1O[C@H]3C(=O)CC5)O)O,3.25,16590-41-3
+2758,Ephedrin,Ephedrine,Ephedrine,c1([C@H]([C@@H](NC)C)O)ccccc1,3.03,299-42-3
b
diff -r 000000000000 -r 0b28816c1c2c test-data/infolist.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/infolist.csv Thu May 18 13:01:04 2023 +0000
[
@@ -0,0 +1,16 @@
+,OK,id,dbcas,dbname_d,dbname_e,dataused,COMMENT.CONFIDENCE,COMMENT.EAWAG_UCHEM_ID,CH$NAME1,CH$NAME2,CH$NAME3,CH$COMPOUND_CLASS,CH$FORMULA,CH$EXACT_MASS,CH$SMILES,CH$IUPAC,CH$LINK.CAS,CH$LINK.CHEBI,CH$LINK.HMDB,CH$LINK.KEGG,CH$LINK.LIPIDMAPS,CH$LINK.PUBCHEM,CH$LINK.INCHIKEY,CH$LINK.CHEMSPIDER
+1,1,2818,6640-24-0,1-(3-Chlorophenyl)-piperazin,1-(3-Chlorophenyl)piperazine,smiles,standard compound,2818,1-(3-Chlorophenyl)piperazine,,,N/A; Environmental Standard,C10H13ClN2,196.0767261,c1c(Cl)cccc1N1CCNCC1,"InChI=1S/C10H13ClN2/c11-9-2-1-3-10(8-9)13-6-4-12-5-7-13/h1-3,8,12H,4-7H2",6640-24-0,10588,,C11738,,CID:1355,VHFVKMTVMIZMIK-UHFFFAOYSA-N,1314
+2,1,2819,15532-75-9,1-(3-Trifluoromethylphenyl)-piperazin,1-(3-Trifluoromethylphenyl)-piperazine,smiles,standard compound,2819,1-(3-(Trifluoromethyl)phenyl)piperazine,,,N/A; Environmental Standard,C11H13F3N2,230.1030831,c1c(C(F)(F)F)cccc1N1CCNCC1,"InChI=1S/C11H13F3N2/c12-11(13,14)9-2-1-3-10(8-9)16-6-4-15-5-7-16/h1-3,8,15H,4-7H2",15532-75-9,,,,,CID:4296,KKIMDKMETPPURN-UHFFFAOYSA-N,4145
+3,1,2820,2759-28-6,1-Benzylpiperazin,1-Benzylpiperazine,smiles,standard compound,2820,1-Benzylpiperazine,N-Benzylpiperazine,1-(phenylmethyl)piperazine,N/A; Environmental Standard,C11H16N2,176.1313485,C1CN(CCN1)Cc1ccccc1,"InChI=1S/C11H16N2/c1-2-4-11(5-3-1)10-13-8-6-12-7-9-13/h1-5,12H,6-10H2",2759-28-6,,,,,CID:75994,IQXXEPZFOOTTBA-UHFFFAOYSA-N,68493
+4,1,2821,50-48-6,Amitriptylin,Amitriptyline,smiles,standard compound,2821,Amitriptyline,"3-(10,11-Dihydro-5H-dibenzo[a,d][7]annulen-5-ylidene)-N,N-dimethyl-1-propanamine",,N/A; Environmental Standard,C20H23N,277.1830497,C1(\c2c(CCc3c1cccc3)cccc2)=C\CCN(C)C,"InChI=1S/C20H23N/c1-21(2)15-7-12-20-18-10-5-3-8-16(18)13-14-17-9-4-6-11-19(17)20/h3-6,8-12H,7,13-15H2,1-2H3",50-48-6,2666,,D07448,,CID:2160,KRMDCWKBEZIMAB-UHFFFAOYSA-N,2075
+5,1,2822,300-62-9,Amphetamin,Amphetamine,smiles,standard compound,2822,Amphetamine,Amfetamine,(1-methyl-2-phenyl-ethyl)amine,N/A; Environmental Standard,C9H13N,135.1047994,c1(ccccc1)CC(N)C,"InChI=1S/C9H13N/c1-8(10)7-9-5-3-2-4-6-9/h2-6,8H,7,10H2,1H3",300-62-9,2679,,D07445,,CID:3007,KWTSXDURSIMDCE-UHFFFAOYSA-N,13852819
+6,1,2823,519-09-5,Benzoylecgonin,Benzoylecgonine,dbname,standard compound,2823,Benzoylecgonine,, 3-benzoyloxy-8-methyl-8-azabicyclo[3.2.1]octane-4-carboxylic acid,N/A; Environmental Standard,C16H19NO4,289.1314081,CN1C2CCC1C(C(C2)OC(=O)C3=CC=CC=C3)C(=O)O,"InChI=1S/C16H19NO4/c1-17-11-7-8-12(17)14(15(18)19)13(9-11)21-16(20)10-5-3-2-4-6-10/h2-6,11-14H,7-9H2,1H3,(H,18,19)",519-09-5,,,,,CID:2340,GVGYEFKIHJTNQZ-UHFFFAOYSA-N,2250
+7,1,2817,50-36-2,Kokain,Cocaine,dbname,standard compound,2817,Cocaine,," (1S,3S,4R,5R)-3-benzoyloxy-8-methyl-8-azabicyclo[3.2.1]octane-4-carboxylic acid methyl ester",N/A; Environmental Standard,C17H21NO4,303.1470582,CN1C2CCC1C(C(C2)OC(=O)C3=CC=CC=C3)C(=O)OC,"InChI=1S/C17H21NO4/c1-18-12-8-9-13(18)15(17(20)21-2)14(10-12)22-16(19)11-6-4-3-5-7-11/h3-7,12-15H,8-10H2,1-2H3/t12-,13+,14-,15+/m0/s1",50-36-2,27958,,C01416,,CID:446220,ZPUCINDJVBIVPJ-LJISPDSOSA-N,10194104
+8,1,2824,125-71-3,Dextromethorphan,Dextromethorphan,smiles,standard compound,2824,Dextromethorphan,DXM,(14alpha)-3-Methoxy-17-methylmorphinan,N/A; Environmental Standard,C18H25NO,271.1936144,O(c1ccc3c(c1)[C@@]24[C@@H]([C@H](N(CC2)C)C3)CCCC4)C,"InChI=1S/C18H25NO/c1-19-10-9-18-8-4-3-5-15(18)17(19)11-13-6-7-14(20-2)12-16(13)18/h6-7,12,15,17H,3-5,8-11H2,1-2H3/t15-,17+,18+/m1/s1",125-71-3,,,D03742,,CID:6916184,MKXZASYAUGDDCJ-NJAFHUGGSA-N,13109865
+9,1,2825,30223-73-5,"EDDP (2-Ethylidene-1,5-dimethyl-3,3-diphenylpyrrolidin)","EDDP (2-Ethylidene-1,5-dimethyl-3,3-diphenylpyrrolidin)",dbname,standard compound,2825,EDDP,"(2E)-2-ethylidene-1,5-dimethyl-3,3-diphenyl-pyrrolidine",,N/A; Environmental Standard,C20H23N,277.1830497,C/C=C/1\C(CC(N1C)C)(C2=CC=CC=C2)C3=CC=CC=C3,"InChI=1S/C20H23N/c1-4-19-20(15-16(2)21(19)3,17-11-7-5-8-12-17)18-13-9-6-10-14-18/h4-14,16H,15H2,1-3H3/b19-4+",30223-73-5,,,C16659,,CID:5352621,AJRJPORIQGYFMT-RMOCHZDMSA-N,4509491
+10,1,2758,299-42-3,Ephedrin,Ephedrine,smiles,standard compound,2758,Ephedrine,"(1R,2S)-2-(methylamino)-1-phenyl-1-propanol",,N/A; Environmental Standard,C10H15NO,165.1153641,c1([C@H]([C@@H](NC)C)O)ccccc1,"InChI=1S/C10H15NO/c1-8(11-2)10(12)9-6-4-3-5-7-9/h3-8,10-12H,1-2H3/t8-,10-/m0/s1",299-42-3,15407,,C01575,,CID:9294,KWGRBVOPPLSCSI-WPRPVWTQSA-N,8935
+11,1,2826,6740-88-1,Ketamin,Ketamine,smiles,standard compound,2826,Ketamine,2-(2-chlorophenyl)-2-(methylamino)-1-cyclohexanone,,N/A; Environmental Standard,C13H16ClNO,237.0920418,CNC1(CCCCC1=O)C2=CC=CC=C2Cl,"InChI=1S/C13H16ClNO/c1-15-13(9-5-4-8-12(13)16)10-6-2-3-7-11(10)14/h2-3,6-7,15H,4-5,8-9H2,1H3",6740-88-1,,,C07525,,CID:3821,YQEZLKZALYSWHR-UHFFFAOYSA-N,3689
+12,1,2827,1189805-46-6,Mephedron (4-Methylmethcathinon),Mephedrone (4-Methylmethcathinone),dbname,standard compound,2827,Mephedrone,2-(Methylamino)-1-(4-methylphenyl)-1-propanone,4-methylmethcathinone,N/A; Environmental Standard,C11H15NO,177.1153641,Cc1ccc(cc1)C(=O)C(C)NC,"InChI=1S/C11H15NO/c1-8-4-6-10(7-5-8)11(13)9(2)12-3/h4-7,9,12H,1-3H3",1189805-46-6,59331,,,,CID:45266826,YELGFTGWJGBAQU-UHFFFAOYSA-N,21485694
+13,1,2828,76-99-3,Methadon,Methadone,smiles,standard compound,2828,Methadone,"6-(dimethylamino)-4,4-diphenyl-3-heptanone",,N/A; Environmental Standard,C21H27NO,309.2092645,CCC(=O)C(CC(C)N(C)C)(c1ccccc1)c2ccccc2,"InChI=1S/C21H27NO/c1-5-20(23)21(16-17(2)22(3)4,18-12-8-6-9-13-18)19-14-10-7-11-15-19/h6-15,17H,5,16H2,1-4H3",76-99-3,6807,,C07163,,CID:4095,USSIQXCVUWKGNF-UHFFFAOYSA-N,3953
+14,1,2829,537-46-2,Methamphetamin,Methamphetamine,smiles,standard compound,2829,Methamphetamine,(2S)-N-methyl-1-phenylpropan-2-amine,,N/A; Environmental Standard,C10H15N,149.1204495,c1cccc(c1)C[C@H](C)NC,"InChI=1S/C10H15N/c1-9(11-2)8-10-6-4-3-5-7-10/h3-7,9,11H,8H2,1-2H3/t9-/m0/s1",537-46-2,6809,,C07164,,CID:10836,MYWUZJCMWCOHBA-VIFPVBQESA-N,10379
+15,1,2830,16590-41-3,Naltrexon,Naltrexone,smiles,standard compound,2830,Naltrexone,"(5alpha)-17-(Cyclopropylmethyl)-3,14-dihydroxy-4,5-epoxymorphinan-6-one",,N/A; Environmental Standard,C20H23NO4,341.1627082,c12[C@]34[C@@]5([C@H]([N@](CC6CC6)CC4)Cc2ccc(c1O[C@H]3C(=O)CC5)O)O,"InChI=1S/C20H23NO4/c22-13-4-3-12-9-15-20(24)6-5-14(23)18-19(20,16(12)17(13)25-18)7-8-21(15)10-11-1-2-11/h3-4,11,15,18,22,24H,1-2,5-10H2/t15-,18+,19+,20-/m1/s1",16590-41-3,,,C07253,,CID:5360515,DQCKKXVULJGBQN-XFWGSAIBSA-N,4514524
b
diff -r 000000000000 -r 0b28816c1c2c test-data/mysettings_galaxy.ini
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mysettings_galaxy.ini Thu May 18 13:01:04 2023 +0000
[
@@ -0,0 +1,236 @@
+# Sample configuration file for RMassBank.
+# Adapt this file to your needs.
+# NOTE: Do not indent with TAB characters! Use only spaces.
+# (If your editor converts TAB to a certain number of spaces, it's OK.)
+# Use a space after the colon.
+
+# Deprofile input data?
+# Leave empty if input data is already in "centroid" mode.
+# Use values deprofile.spline, deprofile.fwhm or deprofile.localMax to convert the input data with the
+# corresponding algorithm. See ?deprofile
+deprofile: 
+
+# Deviation (in minutes) allowed the for retention time
+rtMargin: 0.4
+# Systematic retention time shift
+rtShift: 0.0
+
+# Directory to OpenBabel. Required for creating molfiles for MassBank export.
+# If no OpenBabel directory is given, RMassBank will attempt to use the CACTUS webservice
+# for SDF generation. You really should install OpenBabel though; the CACTUS structures
+# have explicit hydrogen atoms...
+# Points to the directory where babel.exe (or the Linux "babel" equivalent) lies.
+babeldir: '/home/debian/galaxy/database/dependencies/_conda/envs/mulled-v1-43859fe15e1521a7d623c43fc3f4525ebe6dbf17b1890fad8281284c7ee7581a/bin/'
+
+# Which MassBank record version to use; version 2 is advised.
+use_version: 2
+
+# Include reanalyzed peaks?
+use_rean_peaks: TRUE
+
+# annotate the spectra files with (putative) molecular formulas for fragments?
+add_annotation: TRUE
+
+# Annotations for the spectrum:
+annotations:
+    # Author etc. annotation
+    authors: Nomen Nescio, The Unseen University
+    copyright: Copyright (C) XXX
+    publication: 
+    license: CC BY
+    instrument: LTQ Orbitrap XL Thermo Scientific
+    instrument_type: LC-ESI-ITFT
+    confidence_comment: standard compound
+    compound_class: N/A; Environmental Standard
+    internal_id_fieldname: INTERNAL_ID
+    #
+    # HPLC annotations:
+    #
+    # example: lc_gradient: 90/10 at 0 min, 50/50 at 4 min, 5/95 at 17 min, 5/95 at 25 min, 90/10 at 25.1 min, 90/10 at 30 min
+    lc_gradient: 
+    # example: lc_flow: 200 uL/min
+    lc_flow: 
+    # example: lc_solvent_a: water with 0.1% formic acid
+    lc_solvent_a: 
+    lc_solvent_b: 
+    # example: lc_column: XBridge C18 3.5um, 2.1x50mm, Waters
+    lc_column: 
+    # Prefix for MassBank accession IDs
+    entry_prefix: XX
+    ms_type: MS2
+    ionization: ESI
+    ms_dataprocessing:
+        RECALIBRATE: loess on assigned fragments and MS1
+
+include_sp_tags: FALSE
+
+# Annotator:
+# by default, "annotator.default" is used.
+# If you want to build your custom annotator (check ?annotator.default and the source code),
+# select it here by using e.g.
+# annotator: annotator.myown
+# for a function annotator.myown(annotation)
+
+# List of data-dependent scans in their order (relative to the parent scan), for annotation of the MassBank records
+# For every data-dependent scan event, specify an element with:
+# mode: fragmentation mode, e.g. CID
+# ces: "short" format collision energy (for record title)
+# ce: "long" format collision energy (for annotation field)
+# res: FT resolution
+spectraList:
+ # First scan: CID 35% NCE, resolution 7500 
+- mode: CID
+  ces: 35%
+  ce: 35 % (nominal)
+  res: 7500
+ # Second scan: HCD 15% NCE, resolution 7500
+- mode: HCD
+  ces: 15%
+  ce: 15 % (nominal)
+  res: 7500
+ # Third scan, etc.
+- mode: HCD
+  ces: 30%
+  ce: 30 % (nominal)
+  res: 7500
+- mode: HCD
+  ces: 45%
+  ce: 45 % (nominal)
+  res: 7500
+- mode: HCD
+  ces: 60%
+  ce: 60 % (nominal)
+  res: 7500
+- mode: HCD
+  ces: 75%
+  ce: 75 % (nominal)
+  res: 7500
+- mode: HCD
+  ces: 90%
+  ce: 90 % (nominal)
+  res: 7500
+- mode: HCD
+  ces: 15%
+  ce: 15 % (nominal)
+  res: 15000
+- mode: HCD
+  ces: 30%
+  ce: 30 % (nominal)
+  res: 15000
+- mode: HCD
+  ces: 45%
+  ce: 45 % (nominal)
+  res: 15000
+- mode: HCD
+  ces: 60%
+  ce: 60 % (nominal)
+  res: 15000
+- mode: HCD
+  ces: 75%
+  ce: 75 % (nominal)
+  res: 15000
+- mode: HCD
+  ces: 90%
+  ce: 90 % (nominal)
+  res: 15000
+- mode: CID
+  ces: 35%
+  ce: 35 % (nominal)
+  res: 15000
+
+# Shifts of the starting points for RMassBank accession numbers.
+# Change these if you measure different adducts 
+accessionNumberShifts:
+    pH: 0 # [M+H]+: Accession numbers 1-14
+    pM: 16 # [M]+: 17-30
+    pNa: 32 # [M+Na]+: 33-46
+    mH: 50 # [M-H]-: 51-64
+    mFA: 66 # [M+FA]-: 67-80
+
+# A list of known electronic noise peaks
+electronicNoise:
+- 189.825
+- 201.725
+- 196.875
+# Exclusion width of electronic noise peaks (from unmatched peaks, prior to
+# reanalysis)
+electronicNoiseWidth: 0.3
+
+# recalibration settings:
+# recalibrate by: dppm or dmz
+recalibrateBy: dppm
+
+# recalibrate MS1:
+# separately (separate)
+# with common curve (common)
+# do not recalibrate (none)
+recalibrateMS1: common
+# Window width to look for MS1 peaks to recalibrate (in ppm)
+recalibrateMS1Window: 15
+
+# Custom recalibration function: You can overwrite the recal function by
+# making any function which takes rcdata$recalfield ~ rcdata$mzFound.
+# The settings define which recal function is used.
+# Note: if recalibrateMS1 is "common", the setting "recalibrator: MS1" is meaningless
+# because the MS1 points will be recalibrated together with the MS2 points with 
+# the MS2 recalibration function.
+recalibrator:
+    MS1: recalibrate.loess
+    MS2: recalibrate.loess
+
+# Define the multiplicity filtering level
+# Default is 2 (peak occurs at least twice)
+# Set this to 1 if you want to turn this option off.
+# Set this to anything > 2 if you want harder filtering
+multiplicityFilter: 2
+
+# Define the title format.
+# You can use all entries from MassBank records as tokens
+# plus the additional token RECORD_TITLE_CE, which is a shortened
+# version of the collision energy specifically for use in the title.
+# Every line is one entry and must have one token in curly brackets
+# e.g. {CH$NAME} or {AC$MASS_SPECTROMETRY: MS_TYPE} plus optionally
+# additional text in front or behind e.g.
+# R={AC$MASS_SPECTROMETRY: RESOLUTION}
+# If this is not specified, it defaults to a title of the format
+# "Dinotefuran; LC-ESI-QFT; MS2; CE: 35%; R=35000; [M+H]+"
+# Note how everything must be in "" here because otherwise the : are getting mangled!
+titleFormat:
+- "{CH$NAME}"
+- "{AC$INSTRUMENT_TYPE}"
+- "{AC$MASS_SPECTROMETRY: MS_TYPE}"
+- "CE: {RECORD_TITLE_CE}"
+- "R={AC$MASS_SPECTROMETRY: RESOLUTION}"
+- "{MS$FOCUSED_ION: PRECURSOR_TYPE}"
+
+# Define filter settings.
+# For Orbitrap, settings of 15 ppm in low mass range, 10 ppm in high
+# mass range, m/z = 120 as mass range division and 5 ppm for recalibrated
+# data overall are recommended. 
+filterSettings:
+    ppmHighMass: 10
+    ppmLowMass: 15
+    massRangeDivision: 120
+    ppmFine: 5
+    prelimCut: 1000
+    prelimCutRatio: 0.0
+    fineCut: 0.0
+    fineCutRatio: 0.0
+    specOkLimit: 1000
+    dbeMinLimit: -0.5
+    satelliteMzLimit: 0.5
+    satelliteIntLimit: 0.05
+    
+ # Define raw MS retrieval settings.
+findMsMsRawSettings:
+    ppmFine: 5
+    mzCoarse: 0.5
+    # fillPrecursorScan is FALSE for "good" mzML files which have all the info needed.
+    # However, for example AB Sciex files will have missing precursor scan information,
+    # in which case fillPrecursorScan = TRUE is needed. Try it out.
+    fillPrecursorScan: FALSE
+    
+# Select how to treat unknown compound masses: 
+# "charged" (the default, also if no option set) treats unknown (level 5) compound masses as the m/z,
+# "neutral" treats unknown (level 5) compound masses as the neutral mass and applies [M+H]+ and [M-H]- calculations accordingly.
+unknownMass: charged
b
diff -r 000000000000 -r 0b28816c1c2c test-data/output.zip
b
Binary file test-data/output.zip has changed
b
diff -r 000000000000 -r 0b28816c1c2c test-data/plot.pdf
b
Binary file test-data/plot.pdf has changed
b
diff -r 000000000000 -r 0b28816c1c2c test-data/resultsFull.RData
b
Binary file test-data/resultsFull.RData has changed
b
diff -r 000000000000 -r 0b28816c1c2c test-data/results_RA.RData
b
Binary file test-data/results_RA.RData has changed
b
diff -r 000000000000 -r 0b28816c1c2c test-data/results_RF.RData
b
Binary file test-data/results_RF.RData has changed