Repository 'ramclustr'
hg clone https://toolshed.g2.bx.psu.edu/repos/recetox/ramclustr

Changeset 4:050cfef6ba65 (2023-06-15)
Previous changeset 3:2ec9253a647e (2022-09-20) Next changeset 5:2410de08b55a (2023-06-22)
Commit message:
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/ramclustr commit 981ab05cdced6cbcbb1f13aa492e127365a4e9ed
modified:
macros.xml
ramclustr.xml
ramclustr_wrapper.R
added:
test-data/test5_df_featureDefinitions_input.parquet
test-data/test5_df_featureValues_input.parquet
test-data/test5_spec_abundance.csv
test-data/test5_spectra.msp
b
diff -r 2ec9253a647e -r 050cfef6ba65 macros.xml
--- a/macros.xml Tue Sep 20 14:43:56 2022 +0000
+++ b/macros.xml Thu Jun 15 14:01:48 2023 +0000
[
@@ -1,5 +1,5 @@
 <macros>
-    <token name="@TOOL_VERSION@">1.2.4</token>
+    <token name="@TOOL_VERSION@">1.3.0</token>
 
     <xml name="creator">
         <creator>
@@ -23,6 +23,11 @@
                 familyName="Čech"
                 url="https://github.com/martenson"
                 identifier="0000-0002-9318-1781" />
+            <person
+                givenName="Zargham"
+                familyName="Ahmad"
+                url="https://github.com/zargham-ahmad"
+                identifier="0000-0002-6096-224X" />
             <organization
                 url="https://www.recetox.muni.cz/"
                 email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"
@@ -36,6 +41,8 @@
                    help="Features as columns, rows as samples. Column header in format mz_rt."/>
             <param label="idMSMS" name="idmsms" type="data" format="csv" optional="true"
                    help="Optional idMSMS / MSe csv data. Same dimension and names as in input CSV are required."/>
+            <param label="phenoData" name="csv_phenoData" type="data" format="csv" optional="true"
+                   help="Optional csv containing phenoData."/>
         </section>
     </xml>
 
@@ -43,8 +50,17 @@
         <section name="xcms" title="Input MS Data as XCMS" expanded="true">
             <param name="input_xcms" label="Input XCMS" type="data" format="rdata.xcms.fillpeaks"
                    help="Grouped feature data for clustering." />
-            <param label="Preserve phenotype" name="usePheno" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true"
-                   help="Transfer phenotype data from XCMS object to Spec abundance file."/>
+        </section>
+    </xml>
+
+    <xml name="parameters_recetox_aplcms">
+        <section name="ms_dataframe" title="Input MS Data as parquet (output from recetox-aplcms)" expanded="true">
+            <param label="Input MS1 featureDefinitions" name="ms1_featureDefinitions" type="data" format="parquet"
+                   help="Metadata with columns: mz, rt, feature names containing MS data."/>
+            <param label="Input MS1 featureValues" name="ms1_featureValues" type="data" format="parquet"
+                   help="data with rownames = sample names, colnames = feature names containing MS data."/>
+            <param label="phenoData" name="df_phenoData" type="data" format="tsv,csv" optional="true"
+                   help="CSV/TSV file containing phenoData (optional)."/>
         </section>
     </xml>
 
@@ -53,7 +69,6 @@
         <param label="Correlation method" name="cor_method" type="select" display="radio"
                help="Choose correlational method to be used - see [1] for details.">
             <option value="pearson" selected="true">pearson</option>
-            <option value="everything">everything</option>
             <option value="spearman">spearman</option>
             <option value="kendall">kendall</option>
         </param>
@@ -90,9 +105,10 @@
                     <option value="TIC">TIC</option>
                     <option value="quantile">quantile</option>
                     <option value="batch.qc">batch.qc</option>
+                    <option value="qc">qc</option>
                 </param>
                 <when value="batch.qc">
-                    <param label="Metadata details" name="batch_order_qc" type="data" format="csv" optional="true"
+                    <param label="Metadata details" name="batch_order_qc" type="data" format="csv"
                            help="CSV with sample names (or indices, currently not handled) on rows and columns with:
                            batch number ('batch'), position in sequence ('order'), and whether it is a QC sample or not
                            ('qc' with true/false OR 'sampleType' with 'sample/qc/blank')."/>
@@ -105,6 +121,25 @@
                              detection), while wider values provide less local precision in normalization but better
                              stability to individual peak areas."/>
                 </when>
+                <when value="qc">
+                    <param label="Metadata details" name="batch_order_qc" type="data" format="csv" optional="true"
+                           help="CSV with sample names (or indices, currently not handled) on rows and columns with:
+                           batch number ('batch'), position in sequence ('order'), and whether it is a QC sample or not
+                           ('qc' with true/false OR 'sampleType' with 'sample/qc/blank')."/>
+                    <param label="p.cut" name="p_cut" type="float" value="0.05" 
+                            help="Numeric when run order correction is applied, only features showing a run order vs 
+                            signal with a linear p-value (after FDR correction) &lt; p.cut will be adjusted.  also requires 
+                            r-squared &lt; rsq.cut."/>
+                    <param label="rsq.cut" name="rsq_cut" type="float" value="0.1" 
+                            help="Numeric when run order correction is applied, only features showing a run order vs signal 
+                            with a linear r-squared &gt; rsq.cut will be adjusted. also requires p values &lt; p.cut."/>
+                    <param label="p.adjust" name="p_adjust" type="text" value="none" 
+                            help="Which p-value adjustment should be used? one of ['holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 
+                            'BY', 'fdr', 'none']"/>
+                </when>
+                <when value="none"/>
+                <when value="TIC"/>
+                <when value="quantile"/>
             </conditional>
         </section>
 
@@ -143,7 +178,7 @@
     </xml>
 
     <xml name="output_msp">
-       <collection label="Mass spectra from ${tool.name} on ${on_string}" name="mass_spectra_collection" type="list">
+       <collection label="Mass spectra from ${tool.name} on ${on_string} list" name="mass_spectra_collection" type="list">
            <discover_datasets pattern="__name_and_ext__" directory="spectra" recurse="true" ext="msp"/>
            <filter>not msp_output_details['merge_msp']</filter>
        </collection>
b
diff -r 2ec9253a647e -r 050cfef6ba65 ramclustr.xml
--- a/ramclustr.xml Tue Sep 20 14:43:56 2022 +0000
+++ b/ramclustr.xml Thu Jun 15 14:01:48 2023 +0000
[
b'@@ -1,4 +1,4 @@\n-<tool id="ramclustr" name="RAMClustR" version="@TOOL_VERSION@+galaxy2">\n+<tool id="ramclustr" name="RAMClustR" version="@TOOL_VERSION@+galaxy0" profile="21.09">\n     <description>A feature clustering algorithm for non-targeted mass spectrometric metabolomics data.</description>\n     <macros>\n         <import>macros.xml</import>\n@@ -7,10 +7,11 @@\n \n     <requirements>\n         <requirement type="package" version="@TOOL_VERSION@">r-ramclustr</requirement>\n-        <requirement type="package" version="3.14.0">bioconductor-xcms</requirement>\n+        <requirement type="package" version="3.20.0">bioconductor-xcms</requirement>\n+        <requirement type="package" version="9.0.0">r-arrow</requirement>\n     </requirements>\n \n-    <command detect_errors="aggressive"><![CDATA[\n+    <command detect_errors="exit_code"><![CDATA[\n         Rscript\n             -e \'source("${__tool_directory__}/ramclustr_wrapper.R")\'\n             -e \'source("${ramclustr_method}")\'\n@@ -18,52 +19,94 @@\n     </command>\n     <configfiles>\n         <configfile name="ramclustr_method">\n-            store_output(\n-                #if $filetype.type_choice == "xcms":\n-                ramclustr_xcms(\n-                    input_xcms = "$filetype.xcms.input_xcms",\n-                    use_pheno = $filetype.xcms.usePheno,\n-                #else:\n-                ramclustr_csv(\n-                    ms = "$filetype.ms_csv.ms",\n+            #if $filetype.type_choice == "xcms":\n+            obj = load("$filetype.xcms.input_xcms")\n+            ramclustObj = RAMClustR::rc.get.xcms.data(\n+                xcmsObj = xdata,\n+                #if $extras.ExpDes:\n+                    ExpDes = load_experiment_definition("${$extras.ExpDes}"),\n+                #end if\n+                mzdec = $msp_output_details.mzdec,\n+                ensure.no.na = $extras.replace_zeros\n+            )\n+            #else if $filetype.type_choice == "csv":\n+            ramclustObj = RAMClustR::rc.get.csv.data(\n+                csv = "$filetype.ms_csv.ms",\n+                #if $filetype.ms_csv.idmsms:\n                     idmsms = "$filetype.ms_csv.idmsms",\n                 #end if\n-                    sr = $filetype.required.sr,\n-                    #if $filetype.type_choice == "xcms":\n-                        #if $filetype.required.st\n-                            st = $filetype.required.st,\n-                        #end if\n-                    #else:\n-                        st = $filetype.required.st,\n-                    #end if\n-                    cor_method = "$filetype.required.cor_method",\n-                    maxt = $filetype.required.maxt,\n-                    linkage = "$clustering.linkage",\n-                    min_module_size = $clustering.minModuleSize,\n-                    hmax = $clustering.hmax,\n-                    deep_split = "$clustering.deepSplit",\n-                    normalize = "$normalisation.normalisation_method.normalize",\n-                    #if "$normalisation.normalisation_method.normalize" == "batch.qc":\n-                        metadata_file = "$normalisation.normalisation_method.batch_order_qc",\n-                        qc_inj_range = $normalisation.normalisation_method.qc_inj_range,\n-                    #end if\n-                    block_size = $performance.blocksize,\n-                    mult = $performance.mult,\n-                    mzdec = $msp_output_details.mzdec,\n-                    rt_only_low_n = $extras.rt_only_low_n,\n-                    replace_zeros = $extras.replace_zeros,\n-                    #if $extras.ExpDes:\n-                        exp_design = "${$extras.ExpDes}"\n-                    #end if\n-                ),\n-                $msp_output_details.merge_msp,\n-                "$spec_abundance",\n-            #if $msp_output_details.merge_msp:\n-                "$mass_spectra_merged"\n-            #else:\n-                NULL\n+                #if $filetype.ms_csv.csv_phenoData:\n+                    phenoData = "$filetype.ms_csv.csv_p'..b'ze,\n+                linkage = "$clustering.linkage",\n+                cor.method = "$filetype.required.cor_method",\n+                rt.only.low.n = $extras.rt_only_low_n,\n+                fftempdir = NULL,\n+            )\n+            \n+            store_output(\n+                ramclustr_obj = ramclustObj,\n+                output_merge_msp = $msp_output_details.merge_msp,\n+                output_spec_abundance = "$spec_abundance",\n+                #if $msp_output_details.merge_msp:\n+                    msp_file = "$mass_spectra_merged"\n+                #else:\n+                    msp_file = NULL\n+                #end if\n+            )\n         </configfile>\n     </configfiles>\n     <inputs>\n@@ -71,6 +114,7 @@\n             <param name="type_choice" type="select" label="Choose input format:">\n                 <option value="xcms" selected="true">XCMS</option>\n                 <option value="csv">CSV</option>\n+                <option value="recetox-aplcms">RECETOX-APLCMS</option>\n             </param>\n             <when value="xcms">\n                 <expand macro="parameters_xcms" />\n@@ -88,6 +132,14 @@\n                     <expand macro="parameters_required" />\n                 </section>\n             </when>\n+            <when value="recetox-aplcms">\n+                <expand macro="parameters_recetox_aplcms" />\n+                <section name="required" title="General parameters" expanded="true">\n+                    <param label="Sigma t" name="st" type="float" value="1" help="Retention time similarity.\n+                           A recommended starting point is half the value of your average chromatographic peak width at half max (seconds))."/>\n+                    <expand macro="parameters_required" />\n+                </section>\n+            </when>\n         </conditional>\n         <expand macro="main_parameters" />\n     </inputs>\n@@ -108,6 +160,9 @@\n                     <param name="maxt" value="259.8"/>\n                 </section>\n             </section>\n+            <section name="msp_output_details">\n+                <param name="mzdec" value="4"/>\n+            </section>\n             <section name="extras">\n                 <param name="ExpDes" value="lc-ramclustr-define-experiment.csv" ftype="csv"/>\n             </section>\n@@ -124,12 +179,15 @@\n                     <param name="maxt" value="78.4"/>\n                 </section>\n             </section>\n+            <section name="msp_output_details">\n+                <param name="mzdec" value="4"/>\n+            </section>\n             <section name="normalisation">\n                 <section name="normalisation_method">\n                     <param name="batch_order_qc" value="test2_sample_metadata_xcms_2.csv" ftype="csv" />\n                 </section>\n             </section>\n-            <output name="mass_spectra_merged" file="test2_fill_xcms_2.msp" ftype="msp" compare="diff" lines_diff="20"/>\n+            <output name="mass_spectra_merged" file="test2_fill_xcms_2.msp" ftype="msp" compare="sim_size" delta="100"/>\n             <output name="spec_abundance" file="test2_spec_abundance_xcms_2.csv" ftype="csv" compare="sim_size" delta="100"/>\n         </test>\n         <test><!-- TEST 3 -->\n@@ -159,6 +217,16 @@\n             </section>\n             <output name="mass_spectra_merged" file="test4_spectra_csv_2.msp" ftype="msp" lines_diff="10"/>\n         </test>\n+        <test><!-- TEST 5 -->\n+            <section name="filetype">\n+                <param name="type_choice" value="recetox-aplcms"/>\n+                <section name="ms_dataframe">\n+                    <param name="ms1_featureDefinitions" value="test5_df_featureDefinitions_input.parquet" ftype="parquet"/>\n+                    <param name="ms1_featureValues" value="test5_df_featureValues_input.parquet" ftype="parquet"/>\n+                </section>\n+            </section>\n+            <output name="mass_spectra_merged" file="test5_spectra.msp" ftype="msp" lines_diff="10"/>\n+        </test>\n     </tests>\n \n     <help>\n'
b
diff -r 2ec9253a647e -r 050cfef6ba65 ramclustr_wrapper.R
--- a/ramclustr_wrapper.R Tue Sep 20 14:43:56 2022 +0000
+++ b/ramclustr_wrapper.R Thu Jun 15 14:01:48 2023 +0000
[
@@ -3,10 +3,15 @@
                          output_spec_abundance,
                          msp_file) {
     RAMClustR::write.msp(ramclustr_obj, one.file = output_merge_msp)
-    write.csv(ramclustr_obj$SpecAbund, file = output_spec_abundance, row.names = TRUE, quote = FALSE)
+    write.csv(ramclustr_obj$SpecAbund,
+        file = output_spec_abundance,
+        row.names = TRUE, quote = FALSE
+    )
 
     if (!is.null(msp_file)) {
-        exp_name <- ramclustr_obj$ExpDes[[1]][which(row.names(ramclustr_obj$ExpDes[[1]]) == "Experiment"), 1]
+        exp_name <- ramclustr_obj$ExpDes[[1]][which(
+            row.names(ramclustr_obj$ExpDes[[1]]) == "Experiment"
+        ), 1]
         filename <- paste("spectra/", exp_name, ".msp", sep = "")
         file.copy(from = filename, to = msp_file, overwrite = TRUE)
     }
@@ -35,136 +40,100 @@
     return(data)
 }
 
-ramclustr_xcms <- function(input_xcms,
-                           use_pheno,
-                           sr,
-                           st = NULL,
-                           cor_method,
-                           maxt,
-                           linkage,
-                           min_module_size,
-                           hmax,
-                           deep_split,
-                           normalize,
-                           metadata_file = NULL,
-                           qc_inj_range,
-                           block_size,
-                           mult,
-                           mzdec,
-                           rt_only_low_n,
-                           replace_zeros,
-                           exp_design = NULL) {
-    obj <- load(input_xcms)
+read_ramclustr_aplcms <- function(ms1_featuredefinitions = NULL,
+                                  ms1_featurevalues = NULL,
+                                  df_phenodata = NULL,
+                                  phenodata_ext = NULL,
+                                  exp_des = NULL,
+                                  st = NULL,
+                                  ensure_no_na = TRUE) {
+    ms1_featuredefinitions <- arrow::read_parquet(ms1_featuredefinitions)
+    ms1_featurevalues <- arrow::read_parquet(ms1_featurevalues)
 
+    if (!is.null(df_phenodata)) {
+        if (phenodata_ext == "csv") {
+            df_phenodata <- read.csv(
+                file = df_phenodata,
+                header = TRUE, check.names = FALSE
+            )
+        } else {
+            df_phenodata <- read.csv(
+                file = df_phenodata,
+                header = TRUE, check.names = FALSE, sep = "\t"
+            )
+        }
+    }
+    if (!is.null(exp_des)) {
+        exp_des <- load_experiment_definition(exp_des)
+    }
+
+    feature_values <- ms1_featurevalues[-1]
+    feature_values <- t(feature_values)
+    colnames(feature_values) <- ms1_featurevalues[[1]]
+
+    feature_definitions <- data.frame(ms1_featuredefinitions)
+
+    ramclustr_obj <- RAMClustR::rc.get.df.data(
+        ms1_featureDefinitions = feature_definitions,
+        ms1_featureValues = feature_values,
+        phenoData = df_phenodata,
+        ExpDes = exp_des,
+        st = st,
+        ensure.no.na = ensure_no_na
+    )
+    return(ramclustr_obj)
+}
+
+apply_normalisation <- function(ramclustr_obj = NULL,
+                                normalize_method,
+                                metadata_file = NULL,
+                                qc_inj_range,
+                                p_cut,
+                                rsq_cut,
+                                p_adjust) {
     batch <- NULL
     order <- NULL
     qc <- NULL
 
-    if (!is.null(metadata_file)) {
-        metadata <- read_metadata(metadata_file)
-        batch <- metadata$batch
-        order <- metadata$order
-        qc <- metadata$qc
-    }
-
-    experiment <- NULL
-
-    if (!is.null(exp_design)) {
-        experiment <- load_experiment_definition(exp_design)
-    }
-
-    x <- RAMClustR::ramclustR(
-        xcmsObj = xdata,
-        st = st,
-        maxt = maxt,
-        sr = sr,
-        deepSplit = deep_split,
-        blocksize = block_size,
-        mult = mult,
-        hmax = hmax,
-        usePheno = use_pheno,
-        mspout = FALSE,
-        qc.inj.range = qc_inj_range,
-        normalize = normalize,
-        minModuleSize = min_module_size,
-        linkage = linkage,
-        mzdec = mzdec,
-        cor.method = cor_method,
-        rt.only.low.n = rt_only_low_n,
-        fftempdir = NULL,
-        replace.zeros = replace_zeros,
-        batch = batch,
-        order = order,
-        qc = qc,
-        ExpDes = experiment
-    )
-    return(x)
-}
+    if (normalize_method == "TIC") {
+        ramclustr_obj <- RAMClustR::rc.feature.normalize.tic(
+            ramclustObj =
+                ramclustr_obj
+        )
+    } else if (normalize_method == "quantile") {
+        ramclustr_obj <- RAMClustR::rc.feature.normalize.quantile(ramclustr_obj)
+    } else if (normalize_method == "batch.qc") {
+        if (!(is.null(metadata_file) || metadata_file == "None")) {
+            metadata <- read_metadata(metadata_file)
+            batch <- metadata$batch
+            order <- metadata$order
+            qc <- metadata$qc
+        }
 
-ramclustr_csv <- function(ms,
-                          idmsms,
-                          sr,
-                          st,
-                          cor_method,
-                          maxt,
-                          linkage,
-                          min_module_size,
-                          hmax,
-                          deep_split,
-                          normalize,
-                          metadata_file = NULL,
-                          qc_inj_range,
-                          block_size,
-                          mult,
-                          mzdec,
-                          rt_only_low_n,
-                          replace_zeros,
-                          exp_design = NULL) {
-    if (!file.exists(idmsms)) {
-        idmsms <- NULL
-    }
-
-    batch <- NULL
-    order <- NULL
-    qc <- NULL
-
-    if (!is.null(metadata_file)) {
-        metadata <- read_metadata(metadata_file)
-        batch <- metadata$batch
-        order <- metadata$order
-        qc <- metadata$qc
-    }
+        ramclustr_obj <- RAMClustR::rc.feature.normalize.batch.qc(
+            order = order,
+            batch = batch,
+            qc = qc,
+            ramclustObj = ramclustr_obj,
+            qc.inj.range = qc_inj_range
+        )
+    } else {
+        if (!(is.null(metadata_file) || metadata_file == "None")) {
+            metadata <- read_metadata(metadata_file)
+            batch <- metadata$batch
+            order <- metadata$order
+            qc <- metadata$qc
+        }
 
-    experiment <- NULL
-
-    if (!is.null(exp_design)) {
-        experiment <- load_experiment_definition(exp_design)
+        ramclustr_obj <- RAMClustR::rc.feature.normalize.qc(
+            order = order,
+            batch = batch,
+            qc = qc,
+            ramclustObj = ramclustr_obj,
+            p.cut = p_cut,
+            rsq.cut = rsq_cut,
+            p.adjust = p_adjust
+        )
     }
-
-    x <- RAMClustR::ramclustR(
-        ms = ms,
-        idmsms = idmsms,
-        st = st,
-        maxt = maxt,
-        sr = sr,
-        deepSplit = deep_split,
-        blocksize = block_size,
-        mult = mult,
-        hmax = hmax,
-        mspout = FALSE,
-        qc.inj.range = qc_inj_range,
-        normalize = normalize,
-        minModuleSize = min_module_size,
-        linkage = linkage,
-        mzdec = mzdec,
-        cor.method = cor_method,
-        rt.only.low.n = rt_only_low_n,
-        fftempdir = NULL,
-        replace.zeros = replace_zeros,
-        batch = batch,
-        order = order,
-        qc = qc,
-        ExpDes = experiment
-    )
-    return(x)
+    return(ramclustr_obj)
 }
b
diff -r 2ec9253a647e -r 050cfef6ba65 test-data/test5_df_featureDefinitions_input.parquet
b
Binary file test-data/test5_df_featureDefinitions_input.parquet has changed
b
diff -r 2ec9253a647e -r 050cfef6ba65 test-data/test5_df_featureValues_input.parquet
b
Binary file test-data/test5_df_featureValues_input.parquet has changed
b
diff -r 2ec9253a647e -r 050cfef6ba65 test-data/test5_spec_abundance.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test5_spec_abundance.csv Thu Jun 15 14:01:48 2023 +0000
b
b'@@ -0,0 +1,10 @@\n+,C0001,C0002,C0003,C0004,C0005,C0006,C0007,C0008,C0009,C0010,C0011,C0012,C0013,C0014,C0015,C0016,C0017,C0018,C0019,C0020,C0021,C0022,C0023,C0024,C0025,C0026,C0027,C0028,C0029,C0030,C0031,C0032,C0033,C0034,C0035,C0036,C0037,C0038,C0039,C0040,C0041,C0042,C0043,C0044,C0045,C0046,C0047,C0048,C0049,C0050,C0051,C0052,C0053,C0054,C0055,C0056,C0057,C0058,C0059,C0060,C0061,C0062,C0063,C0064,C0065,C0066,C0067,C0068,C0069,C0070,C0071,C0072,C0073,C0074,C0075,C0076,C0077,C0078,C0079,C0080,C0081,C0082,C0083,C0084,C0085,C0086,C0087,C0088,C0089,C0090,C0091,C0092,C0093,C0094,C0095,C0096,C0097,C0098,C0099,C0100,C0101,C0102,C0103,C0104,C0105,C0106,C0107,C0108,C0109,C0110,C0111,C0112,C0113,C0114,C0115,C0116,C0117,C0118,C0119,C0120,C0121,C0122,C0123,C0124,C0125,C0126,C0127,C0128,C0129,C0130,C0131,C0132,C0133,C0134,C0135,C0136,C0137,C0138,C0139,C0140,C0141,C0142,C0143,C0144,C0145,C0146,C0147,C0148,C0149,C0150,C0151,C0152,C0153,C0154,C0155,C0156,C0157,C0158,C0159,C0160,C0161,C0162,C0163,C0164,C0165,C0166,C0167,C0168,C0169,C0170,C0171,C0172,C0173,C0174,C0175,C0176,C0177,C0178,C0179,C0180,C0181,C0182,C0183,C0184,C0185,C0186,C0187,C0188,C0189,C0190,C0191,C0192,C0193,C0194,C0195,C0196,C0197,C0198,C0199,C0200,C0201,C0202,C0203,C0204,C0205,C0206,C0207,C0208,C0209,C0210,C0211,C0212,C0213,C0214,C0215,C0216,C0217,C0218,C0219,C0220,C0221,C0222,C0223,C0224,C0225,C0226,C0227,C0228,C0229,C0230,C0231,C0232,C0233,C0234,C0235,C0236,C0237,C0238,C0239,C0240,C0241,C0242,C0243,C0244,C0245,C0246,C0247,C0248,C0249,C0250,C0251,C0252,C0253,C0254,C0255,C0256,C0257,C0258,C0259,C0260,C0261,C0262,C0263,C0264,C0265,C0266,C0267,C0268,C0269,C0270,C0271,C0272,C0273,C0274,C0275,C0276,C0277,C0278,C0279,C0280,C0281,C0282,C0283,C0284,C0285,C0286,C0287,C0288,C0289,C0290,C0291,C0292,C0293,C0294,C0295,C0296,C0297,C0298,C0299,C0300,C0301,C0302,C0303,C0304,C0305,C0306,C0307,C0308,C0309,C0310,C0311,C0312,C0313,C0314,C0315,C0316,C0317,C0318,C0319,C0320,C0321,C0322,C0323,C0324,C0325,C0326,C0327,C0328,C0329,C0330,C0331,C0332,C0333,C0334,C0335,C0336,C0337,C0338,C0339,C0340,C0341,C0342,C0343,C0344,C0345,C0346,C0347,C0348,C0349,C0350,C0351,C0352,C0353,C0354,C0355,C0356,C0357,C0358,C0359,C0360,C0361,C0362,C0363,C0364,C0365,C0366,C0367,C0368,C0369,C0370,C0371,C0372,C0373,C0374,C0375,C0376,C0377,C0378,C0379,C0380,C0381,C0382,C0383,C0384,C0385,C0386,C0387,C0388,C0389,C0390,C0391,C0392,C0393,C0394,C0395,C0396,C0397,C0398,C0399,C0400,C0401,C0402,C0403,C0404,C0405,C0406,C0407,C0408,C0409,C0410,C0411,C0412,C0413,C0414,C0415,C0416,C0417,C0418,C0419,C0420,C0421,C0422,C0423,C0424,C0425,C0426,C0427,C0428,C0429,C0430,C0431,C0432,C0433,C0434,C0435,C0436,C0437,C0438,C0439,C0440,C0441,C0442,C0443,C0444,C0445,C0446,C0447,C0448,C0449,C0450,C0451,C0452,C0453,C0454,C0455,C0456,C0457,C0458,C0459,C0460,C0461,C0462,C0463,C0464,C0465,C0466,C0467,C0468,C0469,C0470,C0471,C0472,C0473,C0474,C0475,C0476,C0477,C0478,C0479,C0480,C0481,C0482,C0483,C0484,C0485,C0486,C0487,C0488,C0489,C0490,C0491,C0492,C0493,C0494,C0495,C0496,C0497,C0498,C0499,C0500,C0501,C0502,C0503,C0504,C0505,C0506,C0507,C0508,C0509,C0510,C0511,C0512,C0513,C0514,C0515,C0516,C0517,C0518,C0519,C0520,C0521,C0522,C0523,C0524,C0525,C0526,C0527,C0528,C0529,C0530,C0531,C0532,C0533,C0534,C0535,C0536,C0537,C0538,C0539,C0540,C0541,C0542,C0543,C0544,C0545,C0546,C0547,C0548,C0549,C0550,C0551,C0552,C0553,C0554,C0555,C0556,C0557,C0558,C0559,C0560,C0561,C0562,C0563,C0564,C0565,C0566,C0567,C0568,C0569,C0570,C0571,C0572,C0573,C0574,C0575,C0576,C0577,C0578,C0579,C0580,C0581,C0582,C0583,C0584,C0585,C0586,C0587,C0588,C0589,C0590,C0591,C0592,C0593,C0594,C0595,C0596,C0597,C0598,C0599,C0600,C0601,C0602,C0603,C0604,C0605,C0606,C0607,C0608,C0609,C0610,C0611,C0612,C0613,C0614,C0615,C0616,C0617,C0618,C0619,C0620,C0621,C0622,C0623,C0624,C0625,C0626,C0627,C0628,C0629,C0630,C0631,C0632,C0633,C0634,C0635,C0636,C0637,C0638,C0639,C0640,C0641,C0642,C0643,C0644,C0645,C0646,C0647,C0648,C0649,C0650,C0651,C0652,C0653,C0654,C0655,C0656,C0657,C0658,C0659,C0660,C0661,C0662,C0663,C0'..b'1890827.54044429,1058535.48305162,1174473.94668038,25217.3424295541,0.0157980531143513,0.0158681577860054,0.00322019739768238,15271010.6001102,847810.073076849,1291640.28346778,0.0119955242820067,359407.3409201,0.00332466443669215,79224622.8515383,0.00065679111515097,85017.6354693117,3209.01803760194,0.013375936976232,0.00779540507999573,0.00865885461145645,1381187.16749368,4108128.408631,63569.9402404967,6251999.09870808,1028790.99802463,0.0107228932552127,0.0123152504453853,0.00459653096399393,3020750.90624301,1403261.73943271,723959.304918289,372172.360016221,0.0106952165509209,3858645.11851003,281854.484921607,8395.16679826753,3951847.06807996,4640309.0873198,327826.590818107,180012.388440151,488523.456453254,0.00548781116531023,0.00969025794255547,0.00752963001387607,273700.657950388,885745.950340352,5193699.6675535,1205705.01783407,799496.835680277,1019940.87662112,0.018211403481616,0.0106002085592618,0.0114403595014993,0.012419304035615,0.0121605486944937,0.0161786655588611,3571142.47461488,0.00326347542754756,0.0145699705119435,3637871.85973549,6481063.24335507,374454.114558227,0.0147488188605358,0.00804143717457782,0.00579937198478806,91131.5548937152,0.00758982074214222,0.0160006857994935,3246900.83710245,2254162.93614828,760527.659053816,243614.612047007,0.0117736876533314,466356.361794022,0.0120760986083024,0.0119889900894197,0.0176835983854204,0.0102852885041217,381845.035974721,0.0170340573194117,0.00642636059225177,0.00753021141417445,0.0119803617103096,12291978.4301951,20266566.4103422,2202243.28703946,0.0120207637099657,0.00690012298062719,1341059.48520704,0.0010662543524747,0.0196354997626402,0.0064159830786625,0.0175376424134065,0.0168404645218497,0.00930705231205572,0.0100890324765043,0.00648948129211162,0.00553850311403431,15113.1329819612,0.00697601729571753,0.00285311927898023,0.0055571346005208,0.0163144118779965,0.0130940774386822,284146.254945212,402488.948448869,38048.2199377507,748173.964039947,0.00300041523498714,1386462.04932415,438749.699328453,1232017.9431832,0.0171029809899327,0.0110525287741327,0.00413935190132715,447947.582194029,1258318.51182324,71436.5816948712,107919.004373265,0.00871989222265083,0.00516459469008707,0.00317456189997491,0.00717740123299709,0.00727164746826902,365487.585879244,132382.56252777,586335.093744435,26769974.2369952,2026930.68997741,0.0115265520727976,0.00574862970238496,0.00985347585658259,140462.097233743,2501834.14846523,40179.9873538106,0.00577544056122501,0.0173584676165052,0.00919427228000263,346758.944590846,0.0180788489209951,0.00969540089887517,0.00690013015393262,1394376.1039038,1353640.74042544,0.00897641172929476,0.00675454786456528,291471.08595734,621653.225744108,89983.456544691,0.0141564081496082,17178878.9684831,160653.222444391,0.0135038949130495,0.0161928434569168,428633.865306067,0.0167990714211718,0.0109808026382489,0.0104581835955353,845711.611092909,0.0053500351760699,0.0116633682872025,0.0130525964261726,0.0135494822890305,0.00337880948896675,1341312.70720503,15348708.4352543,0.0156850081941013,0.0126887601423887,1045273.42175466,0.0163862699053983,15605868.1142038,577506.779083093,1198491.88265811,0.00457552847587717,0.0163407316778427,0.0119127179773472,0.00705603801503002,0.012650965448446,729257.702479405,1234538.61715666,219500.27834802,2314595.49192053,1266584.29623062,3717870.47997578,1344.99209115395,0.00907103299211448,2331165.66915602,882375.556733652,45051.623805762,26852395.5389656,0.0182597166855682,0.0117916852233897,0.0107474635768621,0.0113155900099838,24774.0891282544,0.0158285937135226,0.0107864707827384,1415602.23765573,0.0105125862252067,0.00166528976020863,0.00933746903958296,0.0152250415460129,0.00535395634437432,18610063.949502,0.0168170944401245,0.00781541097984697,4295756.37306493,1641071.76166332,23555507.5057717,0.00996997943713332,0.00522763507881969,490180.130682682,0.0143466465918042,0.00523025524104635,2495597.7389432,0.000198239600863095,2513784.29396115,0.0132713155816398,4643458.78955113,0.00499961753935876\n'
b
diff -r 2ec9253a647e -r 050cfef6ba65 test-data/test5_spectra.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test5_spectra.msp Thu Jun 15 14:01:48 2023 +0000
b
b'@@ -0,0 +1,14171 @@\n+NAME:C0001\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:470.69\n+Num Peaks:32\n+329.320030885118 95921664\n+105.069898581096 53951329\n+91.0543250819912 43503607\n+79.0542392964586 25280222\n+328.311842360746 24181665\n+233.226555922738 14213490\n+459.39666852927 9947599\n+339.304462380417 8348080\n+200.155786780012 7004715\n+206.203020005769 6829470\n+275.27324668673 6776215\n+229.194910259537 5614922\n+248.245435591285 5483664\n+444.373969027837 5360199\n+118.077761374297 4527689\n+231.210290096656 4072177\n+94.0733321149056 3937006\n+160.120098367129 3509771\n+214.16685234509 3430575\n+218.198326790874 2628333\n+148.11997640655 2603466\n+134.10456604167 2592051\n+234.229678217422 2540542\n+136.120150597711 2436646\n+299.273342380218 2284047\n+220.213891246307 2187428\n+139.148240427626 2077668\n+214.171904636019 1843577\n+261.25804339695 1141487\n+536.233558298461 952203\n+430.361322439393 845697\n+257.217500942411 731317\n+\n+NAME:C0002\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:290.94\n+Num Peaks:24\n+154.091827211408 2636611196\n+230.114959319393 1064161158\n+434.26210932195 221151008\n+176.109343745776 218515756\n+243.170609468218 64752152\n+239.139125191974 52905810\n+166.104657459597 51809433\n+371.186390608199 45076208\n+318.22684218037 44737791\n+153.083976828303 39978804\n+158.12288847125 32077931\n+156.127810872887 31923469\n+130.039728825066 30020298\n+274.147000800867 25971174\n+155.112179882663 14255533\n+164.092155064065 12509170\n+118.013800776908 11212126\n+159.120387284554 10915306\n+138.073334615124 10326203\n+198.112048259213 7876781\n+246.115746291703 7293404\n+170.063223190415 7134382\n+147.080950930427 6591250\n+131.049756462764 6404861\n+\n+NAME:C0003\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:470.88\n+Num Peaks:21\n+91.0541289917318 42358706\n+159.116684117123 36907629\n+443.370116697341 12153183\n+273.257182724595 7972223\n+339.304518730044 7145212\n+234.229698785294 5766074\n+122.104373005636 4123378\n+134.108770536737 3624342\n+120.088801610101 3453937\n+179.179126369117 3129740\n+331.326044157168 3108267\n+174.135162173121 3017572\n+190.166661665059 1634622\n+124.120066748969 1260493\n+186.139772935879 1086305\n+98.1043885936417 1070756\n+232.213759225042 1055393\n+235.151220455034 1009053\n+139.147965511766 633199\n+257.217554565482 426444\n+234.22588291533 29014\n+\n+NAME:C0004\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:470.97\n+Num Peaks:20\n+330.323245536248 20289880\n+160.124554323803 16567020\n+161.132013916703 12652058\n+219.210545113183 12083458\n+217.194971116005 11989949\n+201.16353232154 10050360\n+273.257197752287 6418444\n+191.179297726521 5696784\n+193.195061985428 4328283\n+122.10862923592 3029781\n+125.132281951254 2473288\n+186.139909384506 2206787\n+174.135340640516 1788507\n+82.3282808922271 1596725\n+425.359608163586 1165919\n+430.361445585504 608057\n+221.226035306342 556174\n+138.13577778429 550448\n+283.117696065253 431524\n+135.11223027509 426104\n+\n+NAME:C0005\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:268.77\n+Num Peaks:19\n+274.099238828342 2186088796\n+348.116328130013 467119133\n+363.147124333623 378987671\n+184.050122479308 278405714\n+465.16080147453 122617316\n+184.046102124084 110866594\n+259.081056747508 102267854\n+273.079051463741 93704595\n+213.020833586087 88467251\n+213.038330796751 64190616\n+169.031061296032 52602804\n+217.034418803142 52146959\n+185.050673838337 36962896\n+139.002572533292 31530344\n+201.057203963479 25978772\n+185.026360198645 19457463\n+212.040474648238 12671666\n+421.171555423464 10861372\n+233.083306128073 10173436\n+\n+NAME:C0006\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:470.69\n+Num Peaks:15\n+219.210508371105 9671353\n+203.179504675755 9440592\n+215.179089952416 5630025\n+273.257142983603 4035939\n+134.108808962852 2754329\n+248.245468712258 2709290\n+134.104625727431 2054174\n+204.182791878452 1623716\n+214.17193172209 1502906\n+202.167300601052 1424736\n+158.10453197112 1418583\n+274.266118433347 935800\n+261.258064479441 568213\n+209.13539977311 404966\n+346.26370421483 3871'..b'eaks:2\n+163.147858225186 15035757\n+178.077744562786 2311938\n+\n+NAME:C1468\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:508.71\n+Num Peaks:2\n+477.302694076761 2835138\n+79.0497160832291 655889\n+\n+NAME:C1469\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:508.82\n+Num Peaks:2\n+247.241859892694 8071832\n+241.946619635007 41284\n+\n+NAME:C1470\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:523.69\n+Num Peaks:2\n+444.990708588497 31055\n+444.985672027994 10210\n+\n+NAME:C1471\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:582.61\n+Num Peaks:2\n+212.008065958889 769129\n+222.972474551264 45358\n+\n+NAME:C1472\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:579.91\n+Num Peaks:2\n+311.276455224541 3811945\n+271.096305820302 292465\n+\n+NAME:C1473\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:579.79\n+Num Peaks:2\n+342.996433325145 2100017\n+240.004005093911 380964\n+\n+NAME:C1474\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:575.46\n+Num Peaks:2\n+198.954353403373 1766113\n+256.959694533167 246572\n+\n+NAME:C1475\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:577.57\n+Num Peaks:2\n+311.276498621859 40590456\n+255.065468611285 1099338\n+\n+NAME:C1476\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:572.46\n+Num Peaks:2\n+209.029058084424 6820817\n+194.976556603722 863967\n+\n+NAME:C1477\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:564.88\n+Num Peaks:2\n+418.994666295499 2609585\n+387.968443134167 465557\n+\n+NAME:C1478\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:567.43\n+Num Peaks:2\n+193.979353826777 4448043\n+363.184224468183 1050817\n+\n+NAME:C1479\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:556.65\n+Num Peaks:2\n+191.000610662536 19797908\n+183.029031974598 72261\n+\n+NAME:C1480\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:558.53\n+Num Peaks:2\n+227.039362902454 5631824\n+210.010212085581 4883844\n+\n+NAME:C1481\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:561.46\n+Num Peaks:2\n+104.982086452186 2289304\n+609.129873308464 927316\n+\n+NAME:C1482\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:591.17\n+Num Peaks:2\n+77.0385058318749 8554601\n+370.312710235668 717716\n+\n+NAME:C1483\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:590.63\n+Num Peaks:2\n+285.0785608387 1687433\n+85.2526896975934 46195\n+\n+NAME:C1484\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:600.58\n+Num Peaks:2\n+311.276621921995 18115920\n+308.991450630174 51579\n+\n+NAME:C1485\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:604.24\n+Num Peaks:2\n+120.976916742413 1718044\n+297.099570161605 1487715\n+\n+NAME:C1486\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:604.36\n+Num Peaks:2\n+180.980290755228 2624751\n+328.962610891879 1826343\n+\n+NAME:C1487\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:605.22\n+Num Peaks:2\n+225.025739387981 504128\n+308.294377310992 99752\n+\n+NAME:C1488\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:608.09\n+Num Peaks:2\n+74.7637445268427 1398749\n+478.140506225444 882508\n+\n+NAME:C1489\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:613.2\n+Num Peaks:2\n+197.974358671559 2227781\n+119.081069222589 26990\n+\n+NAME:C1490\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:612.77\n+Num Peaks:2\n+463.08958220377 2533963\n+388.948709863541 259895\n+\n+NAME:C1491\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:621.14\n+Num Peaks:2\n+149.07774081938 1199096\n+371.040548811374 25542\n+\n+NAME:C1492\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:627.36\n+Num Peaks:2\n+248.988396241285 2804749\n+251.03715016044 2081727\n+\n+NAME:C1493\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:653.04\n+Num Peaks:2\n+96.0338714962068 317238\n+212.083451514648 260976\n+\n+NAME:C1494\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:638.11\n+Num Peaks:2\n+248.988324187652 4757373\n+195.046773530451 1731831\n+\n+NAME:C1495\n+IONMODE:Negative\n+SPECTRUMTYPE:Centroid\n+RETENTIONTIME:648.57\n+Num Peaks:2\n+271.117871917616 584223\n+347.043196067196 296006\n+\n'