diff ipapy2_MS2_annotation.xml @ 0:6a23970e8093 draft default tip

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/ipapy2 commit 64b61ff2823b4f54868c0ab7a4c0dc49eaf2979a
author recetox
date Fri, 16 May 2025 08:01:15 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ipapy2_MS2_annotation.xml	Fri May 16 08:01:15 2025 +0000
@@ -0,0 +1,168 @@
+<tool id="ipapy2_MS2_annotation" name="ipaPy2 MS2 annotation" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
+    <description>annotate features from MS2 using a supplied MS2 database</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    
+    <expand macro="requirements"/>
+
+    <command detect_errors="exit_code"><![CDATA[
+        #set $computed_ppmthr = float($ppm) * 2
+         python3  '${__tool_directory__}/ipapy2_MS2_annotation.py'
+        --input_dataset_mapped_isotope_patterns '${mapped_isotope_patterns}' '${mapped_isotope_patterns.ext}'
+        --input_dataset_MS2 '${MS2_fragmentation_data}' '${MS2_fragmentation_data.ext}'
+        --input_dataset_adducts '${all_adducts}' '${all_adducts.ext}'
+        --input_dataset_MS2_DB '${MS2_DB}' '${MS2_DB.ext}'
+        --ppm ${ppm}
+        --ratiosd ${ratiosd}
+        #if $ppmunk
+            --ppmunk ${ppmunk}
+        #else
+            --ppmunk ${ppm}
+        #end if
+            --ratiounk ${ratiounk}
+        #if $ppmthr
+            --ppmthr ${ppmthr}
+        #else
+            --ppmthr ${computed_ppmthr}
+        #end if
+        --pRTNone ${pRTNone}
+        --pRTout ${pRTout}
+        --mzdCS ${mzdCS}
+        --ppmCS ${ppmCS}
+        --CSunk ${CSunk}
+        --evfilt ${evfilt}
+        --output_dataset '${MS2_annotations}' '${MS2_annotations.ext}'
+        --ncores \${GALAXY_SLOTS:-1}
+    ]]></command>
+
+    <inputs>
+        <param label="Mapped isotope patterns" name="mapped_isotope_patterns" type="data" format="csv,tsv,tabular,parquet" help="A dataset containing the MS1 data. Ideally obtained from map_isotope_patterns" />
+        <param label="MS2 fragmentation data" name="MS2_fragmentation_data" type="data" format="csv,tsv,tabular,parquet" help="A dataset containing the MS2 data. (If a csv file fails kindly use a tsv file.)" />
+        <param label="all possible adducts table" name="all_adducts" type="data" format="csv,tsv,tabular,parquet" help="A dataset containing the information on all the possible adducts given the database." />
+        <param label="MS2 Database" name="MS2_DB" type="data" format="csv,tsv,tabular,parquet" help="A dataset containing the database containing the MS2 information." />
+        <expand macro="ppm"/>
+        <section name="unknown" title="unknown settings">
+            <expand macro="ms_unknown"/>
+            <param name="CSunk" type="float" value="0.7">
+                <label>cosine similarity for unknown</label>
+                <help>cosine similarity score associated with the 'unknown' annotation. Default 0.7.</help>
+            </param>
+        </section>
+
+        <section name="optional_settings" title="optional settings">
+            <expand macro="ms_options"/>
+            <param name="mzdCS" type="integer" value="0" label="MS2 mz threshold">
+                <help>maximum mz difference allowed when computing cosine similarity scores. 
+                If one wants to use this parameter instead of ppmCS, this must be set to 0. Default 0.</help>
+            </param>
+            <param name="ppmCS" type="integer" value="10" label="maximum ppm for cosine similarity scores">
+                <help>maximum ppm allowed when computing cosine similarity scores. 
+                If one wants to use this parameter instead of mzdCS, this must be set to 0. Default 10.</help>
+            </param>
+            <param name="evfilt" type="select" label="same collision energy">
+                <help>If true, only spectrum acquired with the same collision energy are considered. Default value False.</help>
+                <option value="False">False</option>
+                <option value="True">True</option>
+            </param>
+        </section>
+    </inputs>
+
+    <outputs>
+        <data label="${tool.name} on ${on_string}" name="MS2_annotations"  format_source="mapped_isotope_patterns"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="mapped_isotope_patterns" value="mapped_isotope_patterns.csv"/>
+            <param name="MS2_fragmentation_data" value="MS2_fragmentation_data.tsv"/>
+            <param name="all_adducts" value="all_adducts.csv"/>
+            <param name="MS2_DB" value="MS2_DB.csv"/>
+            <param name="ppm" value="3"/>
+            <output name="MS2_annotations">
+                <assert_contents>
+                    <has_n_columns n="13" sep=","/>
+                    <has_n_lines n="158" delta="5" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+
+.. _ipapy2_ms2_annotation:
+
+==========================
+ipaPy2 MS2 Annotation Tool
+==========================
+
+**Tool Description**
+
+This tool performs annotation of features in your dataset using MS2 fragmentation data and a supplied MS2 database. It integrates MS1 and MS2 information to provide high-confidence annotations, leveraging prior and posterior probabilities based on mass, retention time (RT), known chemical knowledge, and isotope patterns.
+
+How it works
+------------
+
+- **Prior probabilities** are calculated using only the mass information.
+- **Posterior probabilities** incorporate mass, RT, prior knowledge, and isotope patterns for more accurate annotation.
+- The tool matches features in your data to entries in the MS2 database, considering user-defined tolerances for mass (ppm), retention time, and cosine similarity thresholds for spectral matching.
+
+Inputs
+------
+
+1. **Mapped isotope patterns**  
+   Dataset containing MS1 data, ideally obtained from the ``map_isotope_patterns`` tool.
+
+2. **MS2 fragmentation data**  
+   Dataset with MS2 spectra. If a CSV file fails, try using a TSV file.
+
+3. **All possible adducts table**  
+   Table listing all possible adducts for the database.
+
+4. **MS2 Database**  
+   Reference database containing MS2 information for annotation.
+
+5. **Parameters**  
+   - **ppm**: Mass tolerance in parts per million for matching.
+   - **Cosine similarity thresholds**:  
+   - ``mzdCS``: Maximum m/z difference for cosine similarity (set to 0 to use ``ppmCS``).
+   - ``ppmCS``: Maximum ppm for cosine similarity (set to 0 to use ``mzdCS``).
+   - **evfilt**: If set to True, only spectra acquired with the same collision energy are considered.
+   - **Unknown settings**: Parameters for handling unknowns, such as cosine similarity threshold for unknown annotations.
+
+Outputs
+-------
+
+- **MS2_annotations**  
+  Annotated dataset with additional columns describing the best-matching database entries, probabilities, and spectral similarity scores.
+
+Example
+-------
+
+Suppose you have mapped isotope patterns, MS2 fragmentation data, a list of all adducts, and an MS2 database. You can use this tool to annotate your features as follows:
+
+.. code-block::
+
+    mapped_isotope_patterns.csv
+    MS2_fragmentation_data.tsv
+    all_adducts.csv
+    MS2_DB.csv
+
+Set the desired tolerances (e.g., ``ppm = 3``) and run the tool. The output will be a table with annotations for each feature.
+
+Notes
+-----
+
+- For best results, ensure your input files are correctly formatted and contain the required columns.
+- If you encounter issues with CSV files, try converting them to TSV format.
+- The tool is designed to be flexible and can handle various input formats (CSV, TSV, Parquet, Tabular).
+
+References
+----------
+
+- For more details on the annotation algorithm and scoring, refer to the ipaPy2 documentation or associated publications.
+
+    ]]></help>
+
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file