changeset 8:2ada1099f42e draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msmetaenhancer commit 538d6c063bb049fe7a08d7d4bd88af91c0d696e8
author recetox
date Thu, 18 May 2023 12:57:50 +0000
parents 37afe368348b
children 90745ecc5e9f
files macros.xml msmetaenhancer.xml msmetaenhancer_wrapper.py test-data/sample_out.msp
diffstat 4 files changed, 73 insertions(+), 55 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Tue May 09 09:18:10 2023 +0000
+++ b/macros.xml	Thu May 18 12:57:50 2023 +0000
@@ -1,5 +1,5 @@
 <macros>
-    <token name="@TOOL_VERSION@">0.2.5</token>
+    <token name="@TOOL_VERSION@">0.3.0</token>
 
     <xml name="creator">
         <creator>
@@ -17,7 +17,7 @@
 
     <token name="@HELP@">
         <![CDATA[
-        MSMetaEnhancer will fetch and update various metadata included in your spectra .msp dataset.
+        MSMetaEnhancer will fetch and update various metadata included in your spectra dataset.
         It does so through a series of conversions ran on all entries in a given file. These conversions fetch
         metadata from various online services. If no conversions are specified, all possible jobs will be executed in an arbitrary order.
 
@@ -33,6 +33,8 @@
 
     <xml name="job_options">
         <option value="canonical_smiles mw RDKit">RDKit: canonical_smiles -> mw</option>
+        <option value="inchi canonical_smiles RDKit">RDKit: inchi -> canonical_smiles</option>
+        <option value="inchi isomeric_smiles RDKit">RDKit: inchi -> isomeric_smiles</option>
         <option value="isomeric_smiles mw RDKit">RDKit: isomeric_smiles -> mw</option>
         <option value="smiles mw RDKit">RDKit: smiles -> mw</option>
         <option value="compound_name canonical_smiles IDSM">IDSM: compound_name -> canonical_smiles</option>
@@ -61,12 +63,6 @@
         <option value="inchikey inchi CIR">CIR: inchikey -> inchi</option>
         <option value="inchikey smiles CIR">CIR: inchikey -> smiles</option>
         <option value="smiles inchikey CIR">CIR: smiles -> inchikey</option>
-        <option value="compound_name casno NLM">NLM: compound_name -> casno</option>
-        <option value="compound_name formula NLM">NLM: compound_name -> formula</option>
-        <option value="compound_name inchikey NLM">NLM: compound_name -> inchikey</option>
-        <option value="inchikey casno NLM">NLM: inchikey -> casno</option>
-        <option value="inchikey compound_name NLM">NLM: inchikey -> compound_name</option>
-        <option value="inchikey formula NLM">NLM: inchikey -> formula</option>
         <option value="compound_name canonical_smiles PubChem">PubChem: compound_name -> canonical_smiles</option>
         <option value="compound_name formula PubChem">PubChem: compound_name -> formula</option>
         <option value="compound_name inchi PubChem">PubChem: compound_name -> inchi</option>
@@ -81,36 +77,36 @@
         <option value="inchi pubchemid PubChem">PubChem: inchi -> pubchemid</option>
         <option value="pubchemid hmdbid PubChem">PubChem: pubchemid -> hmdbid</option>
         <option value="pubchemid inchi PubChem">PubChem: pubchemid -> inchi</option>
-        <option value="chebiid chemspiderid BridgeDB">BridgeDB: chebiid -> chemspiderid</option>
-        <option value="chebiid hmdbid BridgeDB">BridgeDB: chebiid -> hmdbid</option>
-        <option value="chebiid keggid BridgeDB">BridgeDB: chebiid -> keggid</option>
-        <option value="chebiid pubchemid BridgeDB">BridgeDB: chebiid -> pubchemid</option>
-        <option value="chebiid wikidataid BridgeDB">BridgeDB: chebiid -> wikidataid</option>
-        <option value="chemspiderid chebiid BridgeDB">BridgeDB: chemspiderid -> chebiid</option>
-        <option value="chemspiderid hmdbid BridgeDB">BridgeDB: chemspiderid -> hmdbid</option>
-        <option value="chemspiderid keggid BridgeDB">BridgeDB: chemspiderid -> keggid</option>
-        <option value="chemspiderid pubchemid BridgeDB">BridgeDB: chemspiderid -> pubchemid</option>
-        <option value="chemspiderid wikidataid BridgeDB">BridgeDB: chemspiderid -> wikidataid</option>
-        <option value="hmdbid chebiid BridgeDB">BridgeDB: hmdbid -> chebiid</option>
-        <option value="hmdbid chemspiderid BridgeDB">BridgeDB: hmdbid -> chemspiderid</option>
-        <option value="hmdbid keggid BridgeDB">BridgeDB: hmdbid -> keggid</option>
-        <option value="hmdbid pubchemid BridgeDB">BridgeDB: hmdbid -> pubchemid</option>
-        <option value="hmdbid wikidataid BridgeDB">BridgeDB: hmdbid -> wikidataid</option>
-        <option value="keggid chebiid BridgeDB">BridgeDB: keggid -> chebiid</option>
-        <option value="keggid chemspiderid BridgeDB">BridgeDB: keggid -> chemspiderid</option>
-        <option value="keggid hmdbid BridgeDB">BridgeDB: keggid -> hmdbid</option>
-        <option value="keggid pubchemid BridgeDB">BridgeDB: keggid -> pubchemid</option>
-        <option value="keggid wikidataid BridgeDB">BridgeDB: keggid -> wikidataid</option>
-        <option value="pubchemid chebiid BridgeDB">BridgeDB: pubchemid -> chebiid</option>
-        <option value="pubchemid chemspiderid BridgeDB">BridgeDB: pubchemid -> chemspiderid</option>
-        <option value="pubchemid hmdbid BridgeDB">BridgeDB: pubchemid -> hmdbid</option>
-        <option value="pubchemid keggid BridgeDB">BridgeDB: pubchemid -> keggid</option>
-        <option value="pubchemid wikidataid BridgeDB">BridgeDB: pubchemid -> wikidataid</option>
-        <option value="wikidataid chebiid BridgeDB">BridgeDB: wikidataid -> chebiid</option>
-        <option value="wikidataid chemspiderid BridgeDB">BridgeDB: wikidataid -> chemspiderid</option>
-        <option value="wikidataid hmdbid BridgeDB">BridgeDB: wikidataid -> hmdbid</option>
-        <option value="wikidataid keggid BridgeDB">BridgeDB: wikidataid -> keggid</option>
-        <option value="wikidataid pubchemid BridgeDB">BridgeDB: wikidataid -> pubchemid</option>
+        <option value="chebiid chemspiderid BridgeDb">BridgeDb: chebiid -> chemspiderid</option>
+        <option value="chebiid hmdbid BridgeDb">BridgeDb: chebiid -> hmdbid</option>
+        <option value="chebiid keggid BridgeDb">BridgeDb: chebiid -> keggid</option>
+        <option value="chebiid pubchemid BridgeDb">BridgeDb: chebiid -> pubchemid</option>
+        <option value="chebiid wikidataid BridgeDb">BridgeDb: chebiid -> wikidataid</option>
+        <option value="chemspiderid chebiid BridgeDb">BridgeDb: chemspiderid -> chebiid</option>
+        <option value="chemspiderid hmdbid BridgeDb">BridgeDb: chemspiderid -> hmdbid</option>
+        <option value="chemspiderid keggid BridgeDb">BridgeDb: chemspiderid -> keggid</option>
+        <option value="chemspiderid pubchemid BridgeDb">BridgeDb: chemspiderid -> pubchemid</option>
+        <option value="chemspiderid wikidataid BridgeDb">BridgeDb: chemspiderid -> wikidataid</option>
+        <option value="hmdbid chebiid BridgeDb">BridgeDb: hmdbid -> chebiid</option>
+        <option value="hmdbid chemspiderid BridgeDb">BridgeDb: hmdbid -> chemspiderid</option>
+        <option value="hmdbid keggid BridgeDb">BridgeDb: hmdbid -> keggid</option>
+        <option value="hmdbid pubchemid BridgeDb">BridgeDb: hmdbid -> pubchemid</option>
+        <option value="hmdbid wikidataid BridgeDb">BridgeDb: hmdbid -> wikidataid</option>
+        <option value="keggid chebiid BridgeDb">BridgeDb: keggid -> chebiid</option>
+        <option value="keggid chemspiderid BridgeDb">BridgeDb: keggid -> chemspiderid</option>
+        <option value="keggid hmdbid BridgeDb">BridgeDb: keggid -> hmdbid</option>
+        <option value="keggid pubchemid BridgeDb">BridgeDb: keggid -> pubchemid</option>
+        <option value="keggid wikidataid BridgeDb">BridgeDb: keggid -> wikidataid</option>
+        <option value="pubchemid chebiid BridgeDb">BridgeDb: pubchemid -> chebiid</option>
+        <option value="pubchemid chemspiderid BridgeDb">BridgeDb: pubchemid -> chemspiderid</option>
+        <option value="pubchemid hmdbid BridgeDb">BridgeDb: pubchemid -> hmdbid</option>
+        <option value="pubchemid keggid BridgeDb">BridgeDb: pubchemid -> keggid</option>
+        <option value="pubchemid wikidataid BridgeDb">BridgeDb: pubchemid -> wikidataid</option>
+        <option value="wikidataid chebiid BridgeDb">BridgeDb: wikidataid -> chebiid</option>
+        <option value="wikidataid chemspiderid BridgeDb">BridgeDb: wikidataid -> chemspiderid</option>
+        <option value="wikidataid hmdbid BridgeDb">BridgeDb: wikidataid -> hmdbid</option>
+        <option value="wikidataid keggid BridgeDb">BridgeDb: wikidataid -> keggid</option>
+        <option value="wikidataid pubchemid BridgeDb">BridgeDb: wikidataid -> pubchemid</option>
     </xml>
 
 </macros>
--- a/msmetaenhancer.xml	Tue May 09 09:18:10 2023 +0000
+++ b/msmetaenhancer.xml	Thu May 18 12:57:50 2023 +0000
@@ -1,4 +1,4 @@
-<tool id="msmetaenhancer" name="MSMetaEnhancer" version="@TOOL_VERSION@+galaxy2">
+<tool id="msmetaenhancer" name="MSMetaEnhancer" version="@TOOL_VERSION@+galaxy0">
     <description>annotate MS data</description>
 
     <macros>
@@ -23,6 +23,7 @@
         <configfile name="msmetaenhancer_python_cli">
             python3 ${__tool_directory__}/msmetaenhancer_wrapper.py \
             --input_file "$input_file" \
+            --file_format "$input_file.ext" \
             --output_file "$output_file" \
             #if $options.logging.output_log:
             --log_file "$log_file" \
@@ -44,7 +45,7 @@
     </configfiles>
 
     <inputs>
-        <param label="Input spectra dataset" name="input_file" type="data" format="msp" />
+        <param label="Input spectra dataset" name="input_file" type="data" format="msp,mgf,json,csv,tsv,xlsx" />
 
         <repeat name="ordered_jobs" title="Ordered conversions">
             <param name="ordered_jobs_select" type="select" label="Available conversions" multiple="false" optional="true">
@@ -73,7 +74,15 @@
     </inputs>
 
     <outputs>
-        <data label="MSMetaEnhancer on ${on_string}" name="output_file" format="msp" />
+        <data label="MSMetaEnhancer on ${on_string}" name="output_file" format="msp">
+            <change_format>
+                <when input="input_file.ext" value="mgf" format="mgf" />
+                <when input="input_file.ext" value="json" format="json" />
+                <when input="input_file.ext" value="csv" format="csv" />
+                <when input="input_file.ext" value="tsv" format="tsv" />
+                <when input="input_file.ext" value="xlsx" format="xlsx" />
+            </change_format>
+        </data>    
         <data label="Log of MSMetaEnhancer on ${on_string}" name="log_file" format="txt">
            <filter>options['logging']['output_log']</filter>
        </data>
--- a/msmetaenhancer_wrapper.py	Tue May 09 09:18:10 2023 +0000
+++ b/msmetaenhancer_wrapper.py	Thu May 18 12:57:50 2023 +0000
@@ -1,14 +1,24 @@
 import argparse
 import asyncio
+import os
+import shutil
 import sys
 
 from matchms import set_matchms_logger_level
 from MSMetaEnhancer import Application
 
 
+def handle_xlsx_file(app, filename):
+    basename = os.path.splitext(filename)[0]
+    temp_file = basename + '.xlsx'
+    app.save_data(temp_file, file_format='xlsx')
+    shutil.copyfile(temp_file, filename)
+
+
 def main(argv):
-    parser = argparse.ArgumentParser(description="Annotate MSP file.")
-    parser.add_argument("--input_file", type=str, help="Path to query spectra file in MSP format.")
+    parser = argparse.ArgumentParser(description="Annotate spectra file.")
+    parser.add_argument("--input_file", type=str, help="Path to query spectra file.")
+    parser.add_argument("--file_format", type=str, help="Format of the input and the output files.")
     parser.add_argument("--output_file", type=str, help="Path to output spectra file.")
     parser.add_argument("--jobs", type=str, help="Sequence of conversion jobs to be used.")
     parser.add_argument("--log_file", type=str, help="Path to log with details of the annotation process.")
@@ -20,17 +30,17 @@
 
     # set matchms logging level to avoid extensive messages in stdout while reading file
     set_matchms_logger_level("ERROR")
-    # import .msp file
-    app.load_spectra(args.input_file, file_format='msp')
+    # import spectra file
+    app.load_data(args.input_file, file_format=args.file_format)
 
     # set matchms logging level back to warning
     set_matchms_logger_level("WARNING")
 
     # curate given metadata
-    app.curate_spectra()
+    app.curate_metadata()
 
     # specify requested services and jobs
-    services = ['PubChem', 'CTS', 'CIR', 'NLM', 'RDKit', 'IDSM', 'BridgeDB']
+    services = ['PubChem', 'CTS', 'CIR', 'RDKit', 'IDSM', 'BridgeDb']
 
     if len(args.jobs) != 0:
         jobs = []
@@ -42,8 +52,11 @@
         # execute without jobs parameter to run all possible jobs
         asyncio.run(app.annotate_spectra(services))
 
-    # export .msp file
-    app.save_spectra(args.output_file, file_format="msp")
+    # export spectra file
+    if args.file_format == 'xlsx':
+        handle_xlsx_file(app, args.output_file)
+    else:
+        app.save_data(args.output_file, file_format=args.file_format)
     return 0
 
 
--- a/test-data/sample_out.msp	Tue May 09 09:18:10 2023 +0000
+++ b/test-data/sample_out.msp	Thu May 18 12:57:50 2023 +0000
@@ -1,9 +1,9 @@
 FORMULA: H2
-MW: 2
 CASNO: 1333-74-0
 ID: 1
 COMMENT: NIST MS# 245692, Seq# M1
 COMPOUND_NAME: Hydrogen
+NOMINAL_MASS: 2
 INCHI: InChI=1S/H2/h1H
 CANONICAL_SMILES: [HH]
 NUM PEAKS: 2
@@ -11,11 +11,11 @@
 2.0         999.0
 
 FORMULA: D2
-MW: 4
 CASNO: 7782-39-0
 ID: 2
 COMMENT: NIST MS# 61316, Seq# M2
 COMPOUND_NAME: Deuterium
+NOMINAL_MASS: 4
 INCHI: InChI=1S/H2/h1H/i1+1D
 CANONICAL_SMILES: [HH]
 NUM PEAKS: 2
@@ -23,11 +23,11 @@
 4.0         999.0
 
 FORMULA: CH4
-MW: 16
 CASNO: 74-82-8
 ID: 3
 COMMENT: Any=100 ; NIST MS# 61313, Seq# M64
 COMPOUND_NAME: Methane
+NOMINAL_MASS: 16
 INCHI: InChI=1S/CH4/h1H4
 CANONICAL_SMILES: C
 NUM PEAKS: 6
@@ -39,11 +39,11 @@
 17.0        15.99
 
 FORMULA: CH4
-MW: 16
 CASNO: 74-82-8
 ID: 4
 COMMENT: Any=100 ; NIST MS# 18807, Seq# R26
 COMPOUND_NAME: Methane
+NOMINAL_MASS: 16
 INCHI: InChI=1S/CH4/h1H4
 CANONICAL_SMILES: C
 NUM PEAKS: 6
@@ -55,11 +55,11 @@
 17.0        10.99
 
 FORMULA: CH4
-MW: 16
 CASNO: 74-82-8
 ID: 5
 COMMENT: Any=100 ; NIST MS# 18809, Seq# R27
 COMPOUND_NAME: Methane
+NOMINAL_MASS: 16
 INCHI: InChI=1S/CH4/h1H4
 CANONICAL_SMILES: C
 NUM PEAKS: 6
@@ -71,11 +71,11 @@
 17.0        11.99
 
 FORMULA: CH4
-MW: 16
 CASNO: 74-82-8
 ID: 6
 COMMENT: Any=100 ; NIST MS# 423924, Seq# R28
 COMPOUND_NAME: Methane
+NOMINAL_MASS: 16
 INCHI: InChI=1S/CH4/h1H4
 CANONICAL_SMILES: C
 NUM PEAKS: 6