Mercurial > repos > recetox > msmetaenhancer
changeset 8:2ada1099f42e draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msmetaenhancer commit 538d6c063bb049fe7a08d7d4bd88af91c0d696e8
author | recetox |
---|---|
date | Thu, 18 May 2023 12:57:50 +0000 |
parents | 37afe368348b |
children | 90745ecc5e9f |
files | macros.xml msmetaenhancer.xml msmetaenhancer_wrapper.py test-data/sample_out.msp |
diffstat | 4 files changed, 73 insertions(+), 55 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Tue May 09 09:18:10 2023 +0000 +++ b/macros.xml Thu May 18 12:57:50 2023 +0000 @@ -1,5 +1,5 @@ <macros> - <token name="@TOOL_VERSION@">0.2.5</token> + <token name="@TOOL_VERSION@">0.3.0</token> <xml name="creator"> <creator> @@ -17,7 +17,7 @@ <token name="@HELP@"> <![CDATA[ - MSMetaEnhancer will fetch and update various metadata included in your spectra .msp dataset. + MSMetaEnhancer will fetch and update various metadata included in your spectra dataset. It does so through a series of conversions ran on all entries in a given file. These conversions fetch metadata from various online services. If no conversions are specified, all possible jobs will be executed in an arbitrary order. @@ -33,6 +33,8 @@ <xml name="job_options"> <option value="canonical_smiles mw RDKit">RDKit: canonical_smiles -> mw</option> + <option value="inchi canonical_smiles RDKit">RDKit: inchi -> canonical_smiles</option> + <option value="inchi isomeric_smiles RDKit">RDKit: inchi -> isomeric_smiles</option> <option value="isomeric_smiles mw RDKit">RDKit: isomeric_smiles -> mw</option> <option value="smiles mw RDKit">RDKit: smiles -> mw</option> <option value="compound_name canonical_smiles IDSM">IDSM: compound_name -> canonical_smiles</option> @@ -61,12 +63,6 @@ <option value="inchikey inchi CIR">CIR: inchikey -> inchi</option> <option value="inchikey smiles CIR">CIR: inchikey -> smiles</option> <option value="smiles inchikey CIR">CIR: smiles -> inchikey</option> - <option value="compound_name casno NLM">NLM: compound_name -> casno</option> - <option value="compound_name formula NLM">NLM: compound_name -> formula</option> - <option value="compound_name inchikey NLM">NLM: compound_name -> inchikey</option> - <option value="inchikey casno NLM">NLM: inchikey -> casno</option> - <option value="inchikey compound_name NLM">NLM: inchikey -> compound_name</option> - <option value="inchikey formula NLM">NLM: inchikey -> formula</option> <option value="compound_name canonical_smiles PubChem">PubChem: compound_name -> canonical_smiles</option> <option value="compound_name formula PubChem">PubChem: compound_name -> formula</option> <option value="compound_name inchi PubChem">PubChem: compound_name -> inchi</option> @@ -81,36 +77,36 @@ <option value="inchi pubchemid PubChem">PubChem: inchi -> pubchemid</option> <option value="pubchemid hmdbid PubChem">PubChem: pubchemid -> hmdbid</option> <option value="pubchemid inchi PubChem">PubChem: pubchemid -> inchi</option> - <option value="chebiid chemspiderid BridgeDB">BridgeDB: chebiid -> chemspiderid</option> - <option value="chebiid hmdbid BridgeDB">BridgeDB: chebiid -> hmdbid</option> - <option value="chebiid keggid BridgeDB">BridgeDB: chebiid -> keggid</option> - <option value="chebiid pubchemid BridgeDB">BridgeDB: chebiid -> pubchemid</option> - <option value="chebiid wikidataid BridgeDB">BridgeDB: chebiid -> wikidataid</option> - <option value="chemspiderid chebiid BridgeDB">BridgeDB: chemspiderid -> chebiid</option> - <option value="chemspiderid hmdbid BridgeDB">BridgeDB: chemspiderid -> hmdbid</option> - <option value="chemspiderid keggid BridgeDB">BridgeDB: chemspiderid -> keggid</option> - <option value="chemspiderid pubchemid BridgeDB">BridgeDB: chemspiderid -> pubchemid</option> - <option value="chemspiderid wikidataid BridgeDB">BridgeDB: chemspiderid -> wikidataid</option> - <option value="hmdbid chebiid BridgeDB">BridgeDB: hmdbid -> chebiid</option> - <option value="hmdbid chemspiderid BridgeDB">BridgeDB: hmdbid -> chemspiderid</option> - <option value="hmdbid keggid BridgeDB">BridgeDB: hmdbid -> keggid</option> - <option value="hmdbid pubchemid BridgeDB">BridgeDB: hmdbid -> pubchemid</option> - <option value="hmdbid wikidataid BridgeDB">BridgeDB: hmdbid -> wikidataid</option> - <option value="keggid chebiid BridgeDB">BridgeDB: keggid -> chebiid</option> - <option value="keggid chemspiderid BridgeDB">BridgeDB: keggid -> chemspiderid</option> - <option value="keggid hmdbid BridgeDB">BridgeDB: keggid -> hmdbid</option> - <option value="keggid pubchemid BridgeDB">BridgeDB: keggid -> pubchemid</option> - <option value="keggid wikidataid BridgeDB">BridgeDB: keggid -> wikidataid</option> - <option value="pubchemid chebiid BridgeDB">BridgeDB: pubchemid -> chebiid</option> - <option value="pubchemid chemspiderid BridgeDB">BridgeDB: pubchemid -> chemspiderid</option> - <option value="pubchemid hmdbid BridgeDB">BridgeDB: pubchemid -> hmdbid</option> - <option value="pubchemid keggid BridgeDB">BridgeDB: pubchemid -> keggid</option> - <option value="pubchemid wikidataid BridgeDB">BridgeDB: pubchemid -> wikidataid</option> - <option value="wikidataid chebiid BridgeDB">BridgeDB: wikidataid -> chebiid</option> - <option value="wikidataid chemspiderid BridgeDB">BridgeDB: wikidataid -> chemspiderid</option> - <option value="wikidataid hmdbid BridgeDB">BridgeDB: wikidataid -> hmdbid</option> - <option value="wikidataid keggid BridgeDB">BridgeDB: wikidataid -> keggid</option> - <option value="wikidataid pubchemid BridgeDB">BridgeDB: wikidataid -> pubchemid</option> + <option value="chebiid chemspiderid BridgeDb">BridgeDb: chebiid -> chemspiderid</option> + <option value="chebiid hmdbid BridgeDb">BridgeDb: chebiid -> hmdbid</option> + <option value="chebiid keggid BridgeDb">BridgeDb: chebiid -> keggid</option> + <option value="chebiid pubchemid BridgeDb">BridgeDb: chebiid -> pubchemid</option> + <option value="chebiid wikidataid BridgeDb">BridgeDb: chebiid -> wikidataid</option> + <option value="chemspiderid chebiid BridgeDb">BridgeDb: chemspiderid -> chebiid</option> + <option value="chemspiderid hmdbid BridgeDb">BridgeDb: chemspiderid -> hmdbid</option> + <option value="chemspiderid keggid BridgeDb">BridgeDb: chemspiderid -> keggid</option> + <option value="chemspiderid pubchemid BridgeDb">BridgeDb: chemspiderid -> pubchemid</option> + <option value="chemspiderid wikidataid BridgeDb">BridgeDb: chemspiderid -> wikidataid</option> + <option value="hmdbid chebiid BridgeDb">BridgeDb: hmdbid -> chebiid</option> + <option value="hmdbid chemspiderid BridgeDb">BridgeDb: hmdbid -> chemspiderid</option> + <option value="hmdbid keggid BridgeDb">BridgeDb: hmdbid -> keggid</option> + <option value="hmdbid pubchemid BridgeDb">BridgeDb: hmdbid -> pubchemid</option> + <option value="hmdbid wikidataid BridgeDb">BridgeDb: hmdbid -> wikidataid</option> + <option value="keggid chebiid BridgeDb">BridgeDb: keggid -> chebiid</option> + <option value="keggid chemspiderid BridgeDb">BridgeDb: keggid -> chemspiderid</option> + <option value="keggid hmdbid BridgeDb">BridgeDb: keggid -> hmdbid</option> + <option value="keggid pubchemid BridgeDb">BridgeDb: keggid -> pubchemid</option> + <option value="keggid wikidataid BridgeDb">BridgeDb: keggid -> wikidataid</option> + <option value="pubchemid chebiid BridgeDb">BridgeDb: pubchemid -> chebiid</option> + <option value="pubchemid chemspiderid BridgeDb">BridgeDb: pubchemid -> chemspiderid</option> + <option value="pubchemid hmdbid BridgeDb">BridgeDb: pubchemid -> hmdbid</option> + <option value="pubchemid keggid BridgeDb">BridgeDb: pubchemid -> keggid</option> + <option value="pubchemid wikidataid BridgeDb">BridgeDb: pubchemid -> wikidataid</option> + <option value="wikidataid chebiid BridgeDb">BridgeDb: wikidataid -> chebiid</option> + <option value="wikidataid chemspiderid BridgeDb">BridgeDb: wikidataid -> chemspiderid</option> + <option value="wikidataid hmdbid BridgeDb">BridgeDb: wikidataid -> hmdbid</option> + <option value="wikidataid keggid BridgeDb">BridgeDb: wikidataid -> keggid</option> + <option value="wikidataid pubchemid BridgeDb">BridgeDb: wikidataid -> pubchemid</option> </xml> </macros>
--- a/msmetaenhancer.xml Tue May 09 09:18:10 2023 +0000 +++ b/msmetaenhancer.xml Thu May 18 12:57:50 2023 +0000 @@ -1,4 +1,4 @@ -<tool id="msmetaenhancer" name="MSMetaEnhancer" version="@TOOL_VERSION@+galaxy2"> +<tool id="msmetaenhancer" name="MSMetaEnhancer" version="@TOOL_VERSION@+galaxy0"> <description>annotate MS data</description> <macros> @@ -23,6 +23,7 @@ <configfile name="msmetaenhancer_python_cli"> python3 ${__tool_directory__}/msmetaenhancer_wrapper.py \ --input_file "$input_file" \ + --file_format "$input_file.ext" \ --output_file "$output_file" \ #if $options.logging.output_log: --log_file "$log_file" \ @@ -44,7 +45,7 @@ </configfiles> <inputs> - <param label="Input spectra dataset" name="input_file" type="data" format="msp" /> + <param label="Input spectra dataset" name="input_file" type="data" format="msp,mgf,json,csv,tsv,xlsx" /> <repeat name="ordered_jobs" title="Ordered conversions"> <param name="ordered_jobs_select" type="select" label="Available conversions" multiple="false" optional="true"> @@ -73,7 +74,15 @@ </inputs> <outputs> - <data label="MSMetaEnhancer on ${on_string}" name="output_file" format="msp" /> + <data label="MSMetaEnhancer on ${on_string}" name="output_file" format="msp"> + <change_format> + <when input="input_file.ext" value="mgf" format="mgf" /> + <when input="input_file.ext" value="json" format="json" /> + <when input="input_file.ext" value="csv" format="csv" /> + <when input="input_file.ext" value="tsv" format="tsv" /> + <when input="input_file.ext" value="xlsx" format="xlsx" /> + </change_format> + </data> <data label="Log of MSMetaEnhancer on ${on_string}" name="log_file" format="txt"> <filter>options['logging']['output_log']</filter> </data>
--- a/msmetaenhancer_wrapper.py Tue May 09 09:18:10 2023 +0000 +++ b/msmetaenhancer_wrapper.py Thu May 18 12:57:50 2023 +0000 @@ -1,14 +1,24 @@ import argparse import asyncio +import os +import shutil import sys from matchms import set_matchms_logger_level from MSMetaEnhancer import Application +def handle_xlsx_file(app, filename): + basename = os.path.splitext(filename)[0] + temp_file = basename + '.xlsx' + app.save_data(temp_file, file_format='xlsx') + shutil.copyfile(temp_file, filename) + + def main(argv): - parser = argparse.ArgumentParser(description="Annotate MSP file.") - parser.add_argument("--input_file", type=str, help="Path to query spectra file in MSP format.") + parser = argparse.ArgumentParser(description="Annotate spectra file.") + parser.add_argument("--input_file", type=str, help="Path to query spectra file.") + parser.add_argument("--file_format", type=str, help="Format of the input and the output files.") parser.add_argument("--output_file", type=str, help="Path to output spectra file.") parser.add_argument("--jobs", type=str, help="Sequence of conversion jobs to be used.") parser.add_argument("--log_file", type=str, help="Path to log with details of the annotation process.") @@ -20,17 +30,17 @@ # set matchms logging level to avoid extensive messages in stdout while reading file set_matchms_logger_level("ERROR") - # import .msp file - app.load_spectra(args.input_file, file_format='msp') + # import spectra file + app.load_data(args.input_file, file_format=args.file_format) # set matchms logging level back to warning set_matchms_logger_level("WARNING") # curate given metadata - app.curate_spectra() + app.curate_metadata() # specify requested services and jobs - services = ['PubChem', 'CTS', 'CIR', 'NLM', 'RDKit', 'IDSM', 'BridgeDB'] + services = ['PubChem', 'CTS', 'CIR', 'RDKit', 'IDSM', 'BridgeDb'] if len(args.jobs) != 0: jobs = [] @@ -42,8 +52,11 @@ # execute without jobs parameter to run all possible jobs asyncio.run(app.annotate_spectra(services)) - # export .msp file - app.save_spectra(args.output_file, file_format="msp") + # export spectra file + if args.file_format == 'xlsx': + handle_xlsx_file(app, args.output_file) + else: + app.save_data(args.output_file, file_format=args.file_format) return 0
--- a/test-data/sample_out.msp Tue May 09 09:18:10 2023 +0000 +++ b/test-data/sample_out.msp Thu May 18 12:57:50 2023 +0000 @@ -1,9 +1,9 @@ FORMULA: H2 -MW: 2 CASNO: 1333-74-0 ID: 1 COMMENT: NIST MS# 245692, Seq# M1 COMPOUND_NAME: Hydrogen +NOMINAL_MASS: 2 INCHI: InChI=1S/H2/h1H CANONICAL_SMILES: [HH] NUM PEAKS: 2 @@ -11,11 +11,11 @@ 2.0 999.0 FORMULA: D2 -MW: 4 CASNO: 7782-39-0 ID: 2 COMMENT: NIST MS# 61316, Seq# M2 COMPOUND_NAME: Deuterium +NOMINAL_MASS: 4 INCHI: InChI=1S/H2/h1H/i1+1D CANONICAL_SMILES: [HH] NUM PEAKS: 2 @@ -23,11 +23,11 @@ 4.0 999.0 FORMULA: CH4 -MW: 16 CASNO: 74-82-8 ID: 3 COMMENT: Any=100 ; NIST MS# 61313, Seq# M64 COMPOUND_NAME: Methane +NOMINAL_MASS: 16 INCHI: InChI=1S/CH4/h1H4 CANONICAL_SMILES: C NUM PEAKS: 6 @@ -39,11 +39,11 @@ 17.0 15.99 FORMULA: CH4 -MW: 16 CASNO: 74-82-8 ID: 4 COMMENT: Any=100 ; NIST MS# 18807, Seq# R26 COMPOUND_NAME: Methane +NOMINAL_MASS: 16 INCHI: InChI=1S/CH4/h1H4 CANONICAL_SMILES: C NUM PEAKS: 6 @@ -55,11 +55,11 @@ 17.0 10.99 FORMULA: CH4 -MW: 16 CASNO: 74-82-8 ID: 5 COMMENT: Any=100 ; NIST MS# 18809, Seq# R27 COMPOUND_NAME: Methane +NOMINAL_MASS: 16 INCHI: InChI=1S/CH4/h1H4 CANONICAL_SMILES: C NUM PEAKS: 6 @@ -71,11 +71,11 @@ 17.0 11.99 FORMULA: CH4 -MW: 16 CASNO: 74-82-8 ID: 6 COMMENT: Any=100 ; NIST MS# 423924, Seq# R28 COMPOUND_NAME: Methane +NOMINAL_MASS: 16 INCHI: InChI=1S/CH4/h1H4 CANONICAL_SMILES: C NUM PEAKS: 6