changeset 0:ce612a11b455 draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msmetaenhancer commit 26bede767f65ec97ac84b8cc3309db0aced22d53"
author recetox
date Tue, 22 Mar 2022 15:33:37 +0000
parents
children b50c2e941cae
files macros.xml msmetaenhancer.xml msmetaenhancer_wrapper.py test-data/sample.msp test-data/sample_out.msp
diffstat 5 files changed, 389 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Tue Mar 22 15:33:37 2022 +0000
@@ -0,0 +1,106 @@
+<macros>
+    <token name="@TOOL_VERSION@">0.2.0</token>
+
+    <xml name="creator">
+        <creator>
+            <person
+                givenName="Matej"
+                familyName="Troják"
+                url="https://github.com/xtrojak"
+                identifier="0000-0003-0841-2707" />
+            <organization
+                url="https://www.recetox.muni.cz/"
+                email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"
+                name="RECETOX MUNI" />
+        </creator>
+    </xml>
+
+    <token name="@HELP@">
+        <![CDATA[
+        MSMetaEnhancer will fetch and update various metadata included in your spectra .msp dataset.
+        It does so through a series of conversions ran on all entries in a given file. These conversions fetch
+        metadata from various online services. If no conversions are specified, all possible jobs will be executed in an arbitrary order.
+
+        Every conversion specifies `service`, `source` and `target` attributes. This can be read as the `service` will receive the `source` attribute
+        and based on that determines the `target` attribute. For example, the conversion `PubChem: inchi -> smiles` uses PubChem to convert
+        InChI to SMILES.
+
+        The execution speed and results themselves can be affected by specifying the conversions, or their order.
+
+        For detailed documentation of the tool please visit https://msmetaenhancer.readthedocs.io/.
+    ]]>
+    </token>
+
+    <xml name="job_options">
+        <option value="canonical_smiles mw RDKit">RDKit: canonical_smiles -> mw</option>
+        <option value="isomeric_smiles mw RDKit">RDKit: isomeric_smiles -> mw</option>
+        <option value="smiles mw RDKit">RDKit: smiles -> mw</option>
+        <option value="compound_name canonical_smiles IDSM">IDSM: compound_name -> canonical_smiles</option>
+        <option value="compound_name formula IDSM">IDSM: compound_name -> formula</option>
+        <option value="compound_name inchi IDSM">IDSM: compound_name -> inchi</option>
+        <option value="compound_name inchikey IDSM">IDSM: compound_name -> inchikey</option>
+        <option value="compound_name isomeric_smiles IDSM">IDSM: compound_name -> isomeric_smiles</option>
+        <option value="compound_name iupac_name IDSM">IDSM: compound_name -> iupac_name</option>
+        <option value="inchi canonical_smiles IDSM">IDSM: inchi -> canonical_smiles</option>
+        <option value="inchi formula IDSM">IDSM: inchi -> formula</option>
+        <option value="inchi inchikey IDSM">IDSM: inchi -> inchikey</option>
+        <option value="inchi isomeric_smiles IDSM">IDSM: inchi -> isomeric_smiles</option>
+        <option value="inchi iupac_name IDSM">IDSM: inchi -> iupac_name</option>
+        <option value="iupac_name inchi IDSM">IDSM: iupac_name -> inchi</option>
+        <option value="casno inchi CTS">CTS: casno -> inchi</option>
+        <option value="casno inchikey CTS">CTS: casno -> inchikey</option>
+        <option value="compound_name inchikey CTS">CTS: compound_name -> inchikey</option>
+        <option value="hmdbid inchi CTS">CTS: hmdbid -> inchi</option>
+        <option value="inchikey compound_name CTS">CTS: inchikey -> compound_name</option>
+        <option value="inchikey inchi CTS">CTS: inchikey -> inchi</option>
+        <option value="inchikey iupac_name CTS">CTS: inchikey -> iupac_name</option>
+        <option value="casno smiles CIR">CIR: casno -> smiles</option>
+        <option value="inchi smiles CIR">CIR: inchi -> smiles</option>
+        <option value="inchikey casno CIR">CIR: inchikey -> casno</option>
+        <option value="inchikey formula CIR">CIR: inchikey -> formula</option>
+        <option value="inchikey inchi CIR">CIR: inchikey -> inchi</option>
+        <option value="inchikey smiles CIR">CIR: inchikey -> smiles</option>
+        <option value="smiles inchikey CIR">CIR: smiles -> inchikey</option>
+        <option value="compound_name casno NLM">NLM: compound_name -> casno</option>
+        <option value="compound_name formula NLM">NLM: compound_name -> formula</option>
+        <option value="compound_name inchikey NLM">NLM: compound_name -> inchikey</option>
+        <option value="inchikey casno NLM">NLM: inchikey -> casno</option>
+        <option value="inchikey compound_name NLM">NLM: inchikey -> compound_name</option>
+        <option value="inchikey formula NLM">NLM: inchikey -> formula</option>
+        <option value="compound_name canonical_smiles PubChem">PubChem: compound_name -> canonical_smiles</option>
+        <option value="compound_name formula PubChem">PubChem: compound_name -> formula</option>
+        <option value="compound_name inchi PubChem">PubChem: compound_name -> inchi</option>
+        <option value="compound_name inchikey PubChem">PubChem: compound_name -> inchikey</option>
+        <option value="compound_name isomeric_smiles PubChem">PubChem: compound_name -> isomeric_smiles</option>
+        <option value="compound_name iupac_name PubChem">PubChem: compound_name -> iupac_name</option>
+        <option value="inchi canonical_smiles PubChem">PubChem: inchi -> canonical_smiles</option>
+        <option value="inchi formula PubChem">PubChem: inchi -> formula</option>
+        <option value="inchi inchikey PubChem">PubChem: inchi -> inchikey</option>
+        <option value="inchi isomeric_smiles PubChem">PubChem: inchi -> isomeric_smiles</option>
+        <option value="inchi iupac_name PubChem">PubChem: inchi -> iupac_name</option>
+        <option value="inchi pubchemid PubChem">PubChem: inchi -> pubchemid</option>
+        <option value="pubchemid hmdbid PubChem">PubChem: pubchemid -> hmdbid</option>
+        <option value="pubchemid inchi PubChem">PubChem: pubchemid -> inchi</option>
+        <option value="chebiid chemspiderid BridgeDB">BridgeDB: chebiid -> chemspiderid</option>
+        <option value="chebiid hmdbid BridgeDB">BridgeDB: chebiid -> hmdbid</option>
+        <option value="chebiid pubchemid BridgeDB">BridgeDB: chebiid -> pubchemid</option>
+        <option value="chebiid wikidataid BridgeDB">BridgeDB: chebiid -> wikidataid</option>
+        <option value="chemspiderid chebiid BridgeDB">BridgeDB: chemspiderid -> chebiid</option>
+        <option value="chemspiderid hmdbid BridgeDB">BridgeDB: chemspiderid -> hmdbid</option>
+        <option value="chemspiderid pubchemid BridgeDB">BridgeDB: chemspiderid -> pubchemid</option>
+        <option value="chemspiderid wikidataid BridgeDB">BridgeDB: chemspiderid -> wikidataid</option>
+        <option value="hmdbid chebiid BridgeDB">BridgeDB: hmdbid -> chebiid</option>
+        <option value="hmdbid chemspiderid BridgeDB">BridgeDB: hmdbid -> chemspiderid</option>
+        <option value="hmdbid pubchemid BridgeDB">BridgeDB: hmdbid -> pubchemid</option>
+        <option value="hmdbid wikidataid BridgeDB">BridgeDB: hmdbid -> wikidataid</option>
+        <option value="pubchemid chebiid BridgeDB">BridgeDB: pubchemid -> chebiid</option>
+        <option value="pubchemid chemspiderid BridgeDB">BridgeDB: pubchemid -> chemspiderid</option>
+        <option value="pubchemid hmdbid BridgeDB">BridgeDB: pubchemid -> hmdbid</option>
+        <option value="pubchemid wikidataid BridgeDB">BridgeDB: pubchemid -> wikidataid</option>
+        <option value="wikidataid chebiid BridgeDB">BridgeDB: wikidataid -> chebiid</option>
+        <option value="wikidataid chemspiderid BridgeDB">BridgeDB: wikidataid -> chemspiderid</option>
+        <option value="wikidataid hmdbid BridgeDB">BridgeDB: wikidataid -> hmdbid</option>
+        <option value="wikidataid pubchemid BridgeDB">BridgeDB: wikidataid -> pubchemid</option>
+    </xml>
+
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/msmetaenhancer.xml	Tue Mar 22 15:33:37 2022 +0000
@@ -0,0 +1,80 @@
+<tool id="msmetaenhancer" name="MSMetaEnhancer" version="@TOOL_VERSION@+galaxy0">
+    <description>annotate MS data</description>
+
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="creator"/>
+
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">msmetaenhancer</requirement>
+    </requirements>
+
+    <command detect_errors="exit_code"><![CDATA[
+        sh ${msmetaenhancer_python_cli}
+    ]]> </command>
+
+    <configfiles>
+        <configfile name="msmetaenhancer_python_cli">
+            python3 ${__tool_directory__}/msmetaenhancer_wrapper.py \
+            --input_file "$input_file" \
+            --output_file "$output_file" \
+            #if $options.output_log:
+            --log_file "$log_file"\
+            #end if
+            #if len($ordered_jobs) != 0:
+                #set ordered_jobs = ",".join([str($job.ordered_jobs_select) for $job in $ordered_jobs]) + ","
+            #else:
+                #set ordered_jobs = ""
+            #end if
+            #if $jobs != 'None':
+                #set random_jobs = str($jobs)
+            #else:
+                #set random_jobs = ""
+            #end if
+            #set all_jobs = str($ordered_jobs) + str($random_jobs)
+            --jobs "$all_jobs"
+        </configfile>
+    </configfiles>
+
+    <inputs>
+        <param label="Input spectra dataset" name="input_file" type="data" format="msp" />
+
+        <repeat name="ordered_jobs" title="Ordered conversions">
+            <param name="ordered_jobs_select" type="select" label="Available conversions" multiple="false" optional="true">
+                <expand macro="job_options" />
+            </param>
+        </repeat>
+
+        <param name="jobs" type="select" label="Other conversions" multiple="true" optional="true">
+            <expand macro="job_options" />
+        </param>
+
+        <section name="options" title="Options">
+            <param label="Save the log file" name="output_log" type="boolean" truevalue="TRUE" falsevalue="FALSE"
+                   checked="false" help="Preserve a log with details about the annotation process."/>
+        </section>
+    </inputs>
+
+    <outputs>
+        <data label="MSMetaEnhancer on ${on_string}" name="output_file" format="msp" />
+        <data label="Log of MSMetaEnhancer on ${on_string}" name="log_file" format="txt">
+           <filter>options['output_log']</filter>
+       </data>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="input_file" value="sample.msp" ftype="msp" />
+            <param name="jobs" value="compound_name inchi PubChem,inchi canonical_smiles PubChem"/>
+            <output name="output_file" file="sample_out.msp" ftype="msp"/>
+        </test>
+    </tests>
+
+    <help>
+        <![CDATA[
+        @HELP@
+        ]]>
+    </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/msmetaenhancer_wrapper.py	Tue Mar 22 15:33:37 2022 +0000
@@ -0,0 +1,44 @@
+import argparse
+import asyncio
+import sys
+
+
+from MSMetaEnhancer import Application
+
+
+def main(argv):
+    parser = argparse.ArgumentParser(description="Annotate MSP file.")
+    parser.add_argument("--input_file", type=str, help="Path to query spectra file in MSP format.")
+    parser.add_argument("--output_file", type=str, help="Path to output spectra file.")
+    parser.add_argument("--jobs", type=str, help="Sequence of conversion jobs to be used.")
+    parser.add_argument("--log_file", type=str, help="Path to log with details of the annotation process.")
+    args = parser.parse_args()
+
+    app = Application(log_file=args.log_file)
+
+    # import .msp file
+    app.load_spectra(args.input_file, file_format='msp')
+
+    # curate given metadata
+    app.curate_spectra()
+
+    # specify requested services and jobs
+    services = ['PubChem', 'CTS', 'CIR', 'NLM', 'RDKit', 'IDSM', 'BridgeDB']
+
+    if len(args.jobs) != 0:
+        jobs = []
+        for job in args.jobs.split(","):
+            if len(job) != 0:
+                jobs.append(job.split())
+        asyncio.run(app.annotate_spectra(services, jobs))
+    else:
+        # execute without jobs parameter to run all possible jobs
+        asyncio.run(app.annotate_spectra(services))
+
+    # export .msp file
+    app.save_spectra(args.output_file, file_format="msp")
+    return 0
+
+
+if __name__ == "__main__":
+    main(argv=sys.argv[1:])
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample.msp	Tue Mar 22 15:33:37 2022 +0000
@@ -0,0 +1,75 @@
+NAME: Hydrogen
+FORMULA: H2
+MW: 2
+CASNO: 1333740
+ID: 1
+COMMENT: NIST MS# 245692, Seq# M1
+NUM PEAKS: 2
+1.0	20.98
+2.0	999.0
+
+NAME: Deuterium
+FORMULA: D2
+MW: 4
+CASNO: 7782390
+ID: 2
+COMMENT: NIST MS# 61316, Seq# M2
+NUM PEAKS: 2
+2.0	14.99
+4.0	999.0
+
+NAME: Methane
+FORMULA: CH4
+MW: 16
+CASNO: 74828
+ID: 3
+COMMENT: Any=100 ; NIST MS# 61313, Seq# M64
+NUM PEAKS: 6
+12.0	37.97
+13.0	105.9
+14.0	203.82
+15.0	886.2
+16.0	999.0
+17.0	15.99
+
+NAME: Methane
+FORMULA: CH4
+MW: 16
+CASNO: 74828
+ID: 4
+COMMENT: Any=100 ; NIST MS# 18807, Seq# R26
+NUM PEAKS: 6
+12.0	25.98
+13.0	85.92
+14.0	170.85
+15.0	855.23
+16.0	999.0
+17.0	10.99
+
+NAME: Methane
+FORMULA: CH4
+MW: 16
+CASNO: 74828
+ID: 5
+COMMENT: Any=100 ; NIST MS# 18809, Seq# R27
+NUM PEAKS: 6
+12.0	7.99
+13.0	28.97
+14.0	74.93
+15.0	753.32
+16.0	999.0
+17.0	11.99
+
+NAME: Methane
+FORMULA: CH4
+MW: 16
+CASNO: 74828
+ID: 6
+COMMENT: Any=100 ; NIST MS# 423924, Seq# R28
+NUM PEAKS: 6
+12.0	25.98
+13.0	74.93
+14.0	152.86
+15.0	829.25
+16.0	999.0
+17.0	10.99
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample_out.msp	Tue Mar 22 15:33:37 2022 +0000
@@ -0,0 +1,84 @@
+FORMULA: H2
+MW: 2
+CASNO: 1333-74-0
+ID: 1
+COMMENT: NIST MS# 245692, Seq# M1
+COMPOUND_NAME: Hydrogen
+INCHI: InChI=1S/H2/h1H
+CANONICAL_SMILES: [HH]
+NUM PEAKS: 2
+1.0         20.98
+2.0         999.0
+
+FORMULA: D2
+MW: 4
+CASNO: 7782-39-0
+ID: 2
+COMMENT: NIST MS# 61316, Seq# M2
+COMPOUND_NAME: Deuterium
+INCHI: InChI=1S/H2/h1H/i1+1D
+CANONICAL_SMILES: [HH]
+NUM PEAKS: 2
+2.0         14.99
+4.0         999.0
+
+FORMULA: CH4
+MW: 16
+CASNO: 74-82-8
+ID: 3
+COMMENT: Any=100 ; NIST MS# 61313, Seq# M64
+COMPOUND_NAME: Methane
+INCHI: InChI=1S/CH4/h1H4
+NUM PEAKS: 6
+12.0        37.97
+13.0        105.9
+14.0        203.82
+15.0        886.2
+16.0        999.0
+17.0        15.99
+
+FORMULA: CH4
+MW: 16
+CASNO: 74-82-8
+ID: 4
+COMMENT: Any=100 ; NIST MS# 18807, Seq# R26
+COMPOUND_NAME: Methane
+INCHI: InChI=1S/CH4/h1H4
+NUM PEAKS: 6
+12.0        25.98
+13.0        85.92
+14.0        170.85
+15.0        855.23
+16.0        999.0
+17.0        10.99
+
+FORMULA: CH4
+MW: 16
+CASNO: 74-82-8
+ID: 5
+COMMENT: Any=100 ; NIST MS# 18809, Seq# R27
+COMPOUND_NAME: Methane
+INCHI: InChI=1S/CH4/h1H4
+NUM PEAKS: 6
+12.0        7.99
+13.0        28.97
+14.0        74.93
+15.0        753.32
+16.0        999.0
+17.0        11.99
+
+FORMULA: CH4
+MW: 16
+CASNO: 74-82-8
+ID: 6
+COMMENT: Any=100 ; NIST MS# 423924, Seq# R28
+COMPOUND_NAME: Methane
+INCHI: InChI=1S/CH4/h1H4
+NUM PEAKS: 6
+12.0        25.98
+13.0        74.93
+14.0        152.86
+15.0        829.25
+16.0        999.0
+17.0        10.99
+