Repository 'bioconductor_msnbase_centroid'
hg clone https://toolshed.g2.bx.psu.edu/repos/recetox/bioconductor_msnbase_centroid

Changeset 0:4018639dc0a5 (2025-01-24)
Commit message:
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/bioconductor-msnbase commit 8a94b9932a94c323a33dfe858ee0a2c57fb04701
added:
bioconductor_msnbase_centroid.xml
macros.xml
test-data/29_qc_no_dil_milliq_subset.mzML
b
diff -r 000000000000 -r 4018639dc0a5 bioconductor_msnbase_centroid.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bioconductor_msnbase_centroid.xml Fri Jan 24 15:58:07 2025 +0000
[
b'@@ -0,0 +1,142 @@\n+<tool id="bioconductor_msnbase_centroid" name="bioconductor-msnbase centroid" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01" license="MIT">\n+    <description>centroid raw profile-mode MS data</description>\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+    <expand macro="xrefs"/>\n+    <expand macro="creator"/>\n+    <expand macro="requirements"/>\n+    <command detect_errors="exit_code"><![CDATA[\n+        Rscript "${run_script}"\n+    ]]></command>\n+    <configfiles>\n+        <configfile name="run_script"><![CDATA[\n+            data_prof <- MSnbase::readMSData("$input_file", msLevel = $mslevel)\n+\n+            data_centroided <- MSnbase::pickPeaks(\n+                data_prof,\n+                halfWindowSize = ${halfWindowSize},\n+                method = "${estimate_noise_method}",\n+                SNR = ${snr},\n+                refineMz = "${refinement.method}",\n+                #if "$refinement.method" == "kNeighbors"\n+                k = ${refinement.k}\n+                #else if "$refinement.method" == "descendPeak"\n+                signalPercentage = ${refinement.signal_percentage},\n+                stopAtTwo = ${refinement.stop_at_two}\n+                #end if\n+            )\n+\n+            MSnbase::writeMSData(\n+                data_centroided,\n+                file = "centroided.mzml",\n+                copy = TRUE,\n+                outformat = "mzml"\n+            )\n+        ]]></configfile>\n+    </configfiles>\n+    <inputs>\n+        <param name="input_file" type="data" format="mzml" label="Input mzML File"\n+            help="The input mzML file containing the mass spectrometry data to be centroided."/>\n+        <param argument="mslevel" type="boolean" truevalue="2" falsevalue="1" checked="false" label="MS2"\n+            help="Specify if the dataset contains MS2 (tandem mass spectrometry) data." />\n+        <param name="halfWindowSize" type="integer" label="Half window size" min="1" value="2"\n+            help="The half window size for the centroiding method. This determines the number of data points on either side of the center point to include in the centroiding calculation."/>\n+        <param name="estimate_noise_method" type="select" label="Noise estimation method"\n+            help="Method to choose to estimate the noise in the spectrum.">\n+            <option value="MAD" selected="true">Median Absolute Deviation</option>\n+            <option value="SuperSmoother">Friedman\'s Super Smoother</option>\n+        </param>\n+        <param argument="--snr" type="float" min="0" value="3" label="Signal-to-noise ratio (SNR)"\n+            help="The signal-to-noise ratio threshold for removing noisy signals." />\n+        <conditional name="refinement">\n+            <param name="method" type="select" label="Peak refinement method" \n+                help="The method refines the m/z value of the identified centroids by considering data points that belong (most likely) to the same mass peak.\n+                The m/z value is calculated as an intensity weighted average of the m/z values within the peak region.\n+                How the peak region is defined depends on the method chosen.">\n+                <option value="none" selected="true">None</option>\n+                <option value="kNeighbors">K-Neighbors</option>\n+                <option value="descendPeak">Descend Peak</option>\n+            </param>\n+            <when value="kNeighbors">\n+                <param argument="--k" type="integer" min="1" max="10" value="2" label="K"\n+                    help="The number of 2*K nearest neighbors to consider for m/z interpolation during peak refinement."/>\n+            </when>\n+            <when value="descendPeak">\n+                <param argument="--signal_percentage" type="integer" min="0" max="100" value="20" label="Intensity threshold (%)"\n+                    help="The signal intensity cutoff threshold (as a percentage) for including values in the m/z calculation during peak ref'..b'                  help="Specify whether to stop the descent only after encountering two increasing scans, instead of stopping at the first increasing scan." />\n+            </when>\n+            <when value="none"/>\n+        </conditional>\n+    </inputs>\n+    <outputs>\n+        <data name="output_file" format="mzml" label="${on_string} centroided with refinement ${refinement.method}" from_work_dir="centroided.mzml"/>\n+    </outputs>\n+    <tests>\n+        <test>\n+            <param name="input_file" value="29_qc_no_dil_milliq_subset.mzML"/>\n+            <output name="output_file">\n+                <assert_contents>\n+                    <expand macro="assertions_centroiding"/>\n+                </assert_contents>\n+            </output>\n+        </test>\n+        <test>\n+            <param name="input_file" value="29_qc_no_dil_milliq_subset.mzML"/>\n+            <param name="method" value="kNeighbors"/>\n+            <param name="k" value="3"/>\n+            <output name="output_file">\n+                <assert_contents>\n+                    <expand macro="assertions_centroiding"/>\n+                </assert_contents>\n+            </output>\n+        </test>\n+        <test>\n+            <param name="input_file" value="29_qc_no_dil_milliq_subset.mzML"/>\n+            <param name="method" value="descendPeak"/>\n+            <param name="signal_percentage" value="10"/>\n+            <output name="output_file">\n+                <assert_contents>\n+                    <expand macro="assertions_centroiding"/>\n+                </assert_contents>\n+            </output>\n+        </test>\n+    </tests>\n+    <help><![CDATA[\n+        .. class:: infomark\n+\n+**What it does**\n+\n+This tool performs centroiding on mass spectrometry data using the MSnbase package in R. Centroiding is a process that converts profile mode data to centroid mode by identifying the peaks in the mass spectrum and representing them as single points.\n+\n+**Usage**\n+\n+- **Input**: Provide the input mzML file containing the mass spectrometry data to be centroided.\n+- **Parameters**:\n+   - **Input mzML File**: The input mzML file containing the mass spectrometry data to be centroided.\n+   - **MS2**: Specify if the dataset contains MS2 (tandem mass spectrometry) data.\n+   - **Half window size**: The number of data points on either side of the center point to include in the centroiding calculation.\n+   - **Noise estimation method**: Choose the method to estimate the noise in the spectrum. Options include Median Absolute Deviation (MAD) and Friedman\'s Super Smoother.\n+   - **Signal-to-noise ratio (SNR)**: The signal-to-noise ratio threshold for removing noisy signals. A higher value will result in more noise being filtered out.\n+   - **Peak refinement method**: Select the method to refine the m/z value of the identified centroids. Options include None, K-Neighbors, and Descend Peak.\n+     - **K**: The number of 2*K nearest neighbors to consider for m/z interpolation during peak refinement (only applicable if K-Neighbors method is selected).\n+     - **Intensity threshold (%)**: The signal intensity cutoff threshold (as a percentage) for including values in the m/z calculation during peak refinement (only applicable if Descend Peak method is selected).\n+     - **Stop at two**: Specify whether to stop the descent only after encountering two increasing scans, instead of stopping at the first increasing scan (only applicable if Descend Peak method is selected).\n+- **Output**: The centroided mzML file.\n+\n+**Input**\n+\n+- **Input mzML File**: The input mzML file containing the mass spectrometry data to be centroided.\n+\n+**Output**\n+\n+- **Output mzML File**: The resulting mzML file after applying the centroiding algorithm.\n+\n+**References**\n+\n+For more detailed information, please refer to the original documentation available via Bioconductor: https://bioconductor.org/packages/release/bioc/html/MSnbase.html\n+        ]]></help>\n+        <expand macro="citations"/>\n+</tool>\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 4018639dc0a5 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Fri Jan 24 15:58:07 2025 +0000
b
@@ -0,0 +1,46 @@
+<macros>
+    <xml name="creator">
+        <creator>
+            <person
+                givenName="Helge"
+                familyName="Hecht"
+                url="https://github.com/hechth"
+                identifier="0000-0001-6744-996X" />
+            <organization
+                url="https://www.recetox.muni.cz/"
+                email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"
+                name="RECETOX MUNI" />
+        </creator>
+    </xml>
+
+    <xml name="xrefs">
+        <xrefs>
+            <xref type="bio.tools">msnbase</xref>
+            <xref type="bioconductor">msnbase</xref>
+        </xrefs>
+    </xml>
+
+    <token name="@TOOL_VERSION@">2.32.0</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">bioconductor-msnbase</requirement>
+        </requirements>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.18129/B9.bioc.MSnbase</citation>
+        </citations>
+    </xml>
+    <xml name="assertions_smoothing">
+        <has_text text='id="sf_ru_0"'/>
+        <has_text text="f3fdaed6b23f3690e004d16921be328018771bf1"/>
+        <has_n_lines n="1775"/>
+    </xml>
+
+    <xml name="assertions_centroiding">
+        <has_n_lines n="1512"/>
+        <has_text text='accession="MS:1000035" name="peak picking"'/>
+        <has_size size="129630" delta="50"/>
+    </xml>
+</macros>
\ No newline at end of file
b
diff -r 000000000000 -r 4018639dc0a5 test-data/29_qc_no_dil_milliq_subset.mzML
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/29_qc_no_dil_milliq_subset.mzML Fri Jan 24 15:58:07 2025 +0000
[
b'@@ -0,0 +1,1844 @@\n+<?xml version="1.0" encoding="ISO-8859-1"?>\n+<indexedmzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0_idx.xsd">\n+<mzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd" accession="" version="1.1.0">\n+\t<cvList count="5">\n+\t\t<cv id="MS" fullName="Proteomics Standards Initiative Mass Spectrometry Ontology" URI="http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo"/>\n+\t\t<cv id="UO" fullName="Unit Ontology" URI="http://obo.cvs.sourceforge.net/obo/obo/ontology/phenotype/unit.obo"/>\n+\t\t<cv id="BTO" fullName="BrendaTissue545" version="unknown" URI="http://www.brenda-enzymes.info/ontology/tissue/tree/update/update_files/BrendaTissueOBO"/>\n+\t\t<cv id="GO" fullName="Gene Ontology - Slim Versions" version="unknown" URI="http://www.geneontology.org/GO_slims/goslim_goa.obo"/>\n+\t\t<cv id="PATO" fullName="Quality ontology" version="unknown" URI="http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/quality.obo"/>\n+\t</cvList>\n+\t<fileDescription>\n+\t\t<fileContent>\n+\t\t\t<cvParam cvRef="MS" accession="MS:1000579" name="MS1 spectrum" />\n+\t\t</fileContent>\n+\t\t<sourceFileList count="1">\n+\t\t\t<sourceFile id="sf_ru_0" name="Galaxy1-[29_qc_no_dil_milliq.mzml].mzml" location="file:///C:/Users/473355/Downloads">\n+\t\t\t\t<cvParam cvRef="MS" accession="MS:1000569" name="SHA-1" value="f3fdaed6b23f3690e004d16921be328018771bf1" />\n+\t\t\t\t<cvParam cvRef="MS" accession="MS:1000584" name="mzML format" />\n+\t\t\t\t<cvParam cvRef="MS" accession="MS:1000768" name="Thermo nativeID format" />\n+\t\t\t</sourceFile>\n+\t\t</sourceFileList>\n+\t</fileDescription>\n+\t<sampleList count="1">\n+\t\t<sample id="sa_0" name="">\n+\t\t\t<cvParam cvRef="MS" accession="MS:1000004" name="sample mass" value="0" unitAccession="UO:0000021" unitName="gram" unitCvRef="UO" />\n+\t\t\t<cvParam cvRef="MS" accession="MS:1000005" name="sample volume" value="0" unitAccession="UO:0000098" unitName="milliliter" unitCvRef="UO" />\n+\t\t\t<cvParam cvRef="MS" accession="MS:1000006" name="sample concentration" value="0" unitAccession="UO:0000175" unitName="gram per liter" unitCvRef="UO" />\n+\t\t</sample>\n+\t</sampleList>\n+\t<softwareList count="3">\n+\t\t<software id="so_in_0" version="" >\n+\t\t\t<cvParam cvRef="MS" accession="MS:1000799" name="custom unreleased software tool" value="" />\n+\t\t</software>\n+\t\t<software id="so_default" version="" >\n+\t\t\t<cvParam cvRef="MS" accession="MS:1000799" name="custom unreleased software tool" value="" />\n+\t\t</software>\n+\t\t<software id="so_dp_sp_0_pm_0" version="1.3.4" >\n+\t\t\t<cvParam cvRef="MS" accession="MS:1003145" name="ThermoRawFileParser" />\n+\t\t</software>\n+\t</softwareList>\n+\t<instrumentConfigurationList count="1">\n+\t\t<instrumentConfiguration id="ic_0">\n+\t\t\t<cvParam cvRef="MS" accession="MS:1001911" name="Q Exactive" />\n+\t\t\t<cvParam cvRef="MS" accession="MS:1000529" name="instrument serial number" value="RECETOX Brno"/>\n+\t\t\t<componentList count="3">\n+\t\t\t\t<source order="1">\n+\t\t\t\t\t<cvParam cvRef="MS" accession="MS:1000008" name="ionization type" />\n+\t\t\t\t</source>\n+\t\t\t\t<analyzer order="2">\n+\t\t\t\t\t<cvParam cvRef="MS" accession="MS:1000014" name="accuracy" value="0" unitAccession="UO:0000169" unitName="parts per million" unitCvRef="UO" />\n+\t\t\t\t\t<cvParam cvRef="MS" accession="MS:1000022" name="TOF Total Path Length" value="0" unitAccession="UO:0000008" unitName="meter" unitCvRef="UO" />\n+\t\t\t\t\t<cvParam cvRef="MS" accession="MS:1000024" name="final MS exponent" value="0" />\n+\t\t\t\t\t<cvParam cvRef="MS" accession="MS:1000025" name="magnetic field strength" value="0" unitAccession="UO:0000228" unitName="tesla" unitCvRef="UO" />\n+\t\t\t\t\t<cvParam cvRef="MS" accession="MS:1000079" name="fourier transform ion cyclotron resonance mass spectrometer" />\n+\t\t\t\t</analyzer>\n+'..b'JIzgIOSK77Pkdcl05GcumWRUWuAEZzI2FGu+F5RgcPPEaCsbpFMd18RWTKhEZwLoBHGmgASGwdQ0jZUDpICxDqRz2qU0dbuFJGkFlSRRQtd0UoIyRGSb+QRuRTskatk55GkiZHRpgHpkU=</binary>\n+\t\t\t\t\t</binaryDataArray>\n+\t\t\t\t</binaryDataArrayList>\n+\t\t\t</spectrum>\n+\t\t</spectrumList>\n+\t</run>\n+</mzML>\n+<indexList count="1">\n+\t<index name="spectrum">\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1215">4943</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1216">12545</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1217">20000</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1218">27067</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1219">33179</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1220">38693</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1221">43939</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1222">49122</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1223">55197</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1224">61932</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1225">68344</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1226">75881</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1227">82688</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1228">89640</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1229">96581</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1230">103086</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1231">109101</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1232">114765</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1233">120568</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1234">127412</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1235">134809</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1236">142840</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1237">150308</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1238">157219</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1239">164340</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1240">171137</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1241">177921</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1242">184497</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1243">191766</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1244">199091</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1245">206509</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1246">213514</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1247">220040</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1248">226684</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1249">232940</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1250">239073</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1251">244656</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1252">249772</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1253">254827</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1254">259607</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1255">264386</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1256">269250</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1257">274833</offset>\n+\t\t<offset idRef="controllerType=0 controllerNumber=1 scan=1258">280560</offset>\n+\t</index>\n+</indexList>\n+<indexListOffset>287471</indexListOffset>\n+<fileChecksum>0</fileChecksum>\n+</indexedmzML>\n\\ No newline at end of file\n'