Mercurial > repos > recetox > msmetaenhancer
changeset 0:ce612a11b455 draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msmetaenhancer commit 26bede767f65ec97ac84b8cc3309db0aced22d53"
author | recetox |
---|---|
date | Tue, 22 Mar 2022 15:33:37 +0000 |
parents | |
children | b50c2e941cae |
files | macros.xml msmetaenhancer.xml msmetaenhancer_wrapper.py test-data/sample.msp test-data/sample_out.msp |
diffstat | 5 files changed, 389 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue Mar 22 15:33:37 2022 +0000 @@ -0,0 +1,106 @@ +<macros> + <token name="@TOOL_VERSION@">0.2.0</token> + + <xml name="creator"> + <creator> + <person + givenName="Matej" + familyName="Troják" + url="https://github.com/xtrojak" + identifier="0000-0003-0841-2707" /> + <organization + url="https://www.recetox.muni.cz/" + email="GalaxyToolsDevelopmentandDeployment@space.muni.cz" + name="RECETOX MUNI" /> + </creator> + </xml> + + <token name="@HELP@"> + <![CDATA[ + MSMetaEnhancer will fetch and update various metadata included in your spectra .msp dataset. + It does so through a series of conversions ran on all entries in a given file. These conversions fetch + metadata from various online services. If no conversions are specified, all possible jobs will be executed in an arbitrary order. + + Every conversion specifies `service`, `source` and `target` attributes. This can be read as the `service` will receive the `source` attribute + and based on that determines the `target` attribute. For example, the conversion `PubChem: inchi -> smiles` uses PubChem to convert + InChI to SMILES. + + The execution speed and results themselves can be affected by specifying the conversions, or their order. + + For detailed documentation of the tool please visit https://msmetaenhancer.readthedocs.io/. + ]]> + </token> + + <xml name="job_options"> + <option value="canonical_smiles mw RDKit">RDKit: canonical_smiles -> mw</option> + <option value="isomeric_smiles mw RDKit">RDKit: isomeric_smiles -> mw</option> + <option value="smiles mw RDKit">RDKit: smiles -> mw</option> + <option value="compound_name canonical_smiles IDSM">IDSM: compound_name -> canonical_smiles</option> + <option value="compound_name formula IDSM">IDSM: compound_name -> formula</option> + <option value="compound_name inchi IDSM">IDSM: compound_name -> inchi</option> + <option value="compound_name inchikey IDSM">IDSM: compound_name -> inchikey</option> + <option value="compound_name isomeric_smiles IDSM">IDSM: compound_name -> isomeric_smiles</option> + <option value="compound_name iupac_name IDSM">IDSM: compound_name -> iupac_name</option> + <option value="inchi canonical_smiles IDSM">IDSM: inchi -> canonical_smiles</option> + <option value="inchi formula IDSM">IDSM: inchi -> formula</option> + <option value="inchi inchikey IDSM">IDSM: inchi -> inchikey</option> + <option value="inchi isomeric_smiles IDSM">IDSM: inchi -> isomeric_smiles</option> + <option value="inchi iupac_name IDSM">IDSM: inchi -> iupac_name</option> + <option value="iupac_name inchi IDSM">IDSM: iupac_name -> inchi</option> + <option value="casno inchi CTS">CTS: casno -> inchi</option> + <option value="casno inchikey CTS">CTS: casno -> inchikey</option> + <option value="compound_name inchikey CTS">CTS: compound_name -> inchikey</option> + <option value="hmdbid inchi CTS">CTS: hmdbid -> inchi</option> + <option value="inchikey compound_name CTS">CTS: inchikey -> compound_name</option> + <option value="inchikey inchi CTS">CTS: inchikey -> inchi</option> + <option value="inchikey iupac_name CTS">CTS: inchikey -> iupac_name</option> + <option value="casno smiles CIR">CIR: casno -> smiles</option> + <option value="inchi smiles CIR">CIR: inchi -> smiles</option> + <option value="inchikey casno CIR">CIR: inchikey -> casno</option> + <option value="inchikey formula CIR">CIR: inchikey -> formula</option> + <option value="inchikey inchi CIR">CIR: inchikey -> inchi</option> + <option value="inchikey smiles CIR">CIR: inchikey -> smiles</option> + <option value="smiles inchikey CIR">CIR: smiles -> inchikey</option> + <option value="compound_name casno NLM">NLM: compound_name -> casno</option> + <option value="compound_name formula NLM">NLM: compound_name -> formula</option> + <option value="compound_name inchikey NLM">NLM: compound_name -> inchikey</option> + <option value="inchikey casno NLM">NLM: inchikey -> casno</option> + <option value="inchikey compound_name NLM">NLM: inchikey -> compound_name</option> + <option value="inchikey formula NLM">NLM: inchikey -> formula</option> + <option value="compound_name canonical_smiles PubChem">PubChem: compound_name -> canonical_smiles</option> + <option value="compound_name formula PubChem">PubChem: compound_name -> formula</option> + <option value="compound_name inchi PubChem">PubChem: compound_name -> inchi</option> + <option value="compound_name inchikey PubChem">PubChem: compound_name -> inchikey</option> + <option value="compound_name isomeric_smiles PubChem">PubChem: compound_name -> isomeric_smiles</option> + <option value="compound_name iupac_name PubChem">PubChem: compound_name -> iupac_name</option> + <option value="inchi canonical_smiles PubChem">PubChem: inchi -> canonical_smiles</option> + <option value="inchi formula PubChem">PubChem: inchi -> formula</option> + <option value="inchi inchikey PubChem">PubChem: inchi -> inchikey</option> + <option value="inchi isomeric_smiles PubChem">PubChem: inchi -> isomeric_smiles</option> + <option value="inchi iupac_name PubChem">PubChem: inchi -> iupac_name</option> + <option value="inchi pubchemid PubChem">PubChem: inchi -> pubchemid</option> + <option value="pubchemid hmdbid PubChem">PubChem: pubchemid -> hmdbid</option> + <option value="pubchemid inchi PubChem">PubChem: pubchemid -> inchi</option> + <option value="chebiid chemspiderid BridgeDB">BridgeDB: chebiid -> chemspiderid</option> + <option value="chebiid hmdbid BridgeDB">BridgeDB: chebiid -> hmdbid</option> + <option value="chebiid pubchemid BridgeDB">BridgeDB: chebiid -> pubchemid</option> + <option value="chebiid wikidataid BridgeDB">BridgeDB: chebiid -> wikidataid</option> + <option value="chemspiderid chebiid BridgeDB">BridgeDB: chemspiderid -> chebiid</option> + <option value="chemspiderid hmdbid BridgeDB">BridgeDB: chemspiderid -> hmdbid</option> + <option value="chemspiderid pubchemid BridgeDB">BridgeDB: chemspiderid -> pubchemid</option> + <option value="chemspiderid wikidataid BridgeDB">BridgeDB: chemspiderid -> wikidataid</option> + <option value="hmdbid chebiid BridgeDB">BridgeDB: hmdbid -> chebiid</option> + <option value="hmdbid chemspiderid BridgeDB">BridgeDB: hmdbid -> chemspiderid</option> + <option value="hmdbid pubchemid BridgeDB">BridgeDB: hmdbid -> pubchemid</option> + <option value="hmdbid wikidataid BridgeDB">BridgeDB: hmdbid -> wikidataid</option> + <option value="pubchemid chebiid BridgeDB">BridgeDB: pubchemid -> chebiid</option> + <option value="pubchemid chemspiderid BridgeDB">BridgeDB: pubchemid -> chemspiderid</option> + <option value="pubchemid hmdbid BridgeDB">BridgeDB: pubchemid -> hmdbid</option> + <option value="pubchemid wikidataid BridgeDB">BridgeDB: pubchemid -> wikidataid</option> + <option value="wikidataid chebiid BridgeDB">BridgeDB: wikidataid -> chebiid</option> + <option value="wikidataid chemspiderid BridgeDB">BridgeDB: wikidataid -> chemspiderid</option> + <option value="wikidataid hmdbid BridgeDB">BridgeDB: wikidataid -> hmdbid</option> + <option value="wikidataid pubchemid BridgeDB">BridgeDB: wikidataid -> pubchemid</option> + </xml> + +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/msmetaenhancer.xml Tue Mar 22 15:33:37 2022 +0000 @@ -0,0 +1,80 @@ +<tool id="msmetaenhancer" name="MSMetaEnhancer" version="@TOOL_VERSION@+galaxy0"> + <description>annotate MS data</description> + + <macros> + <import>macros.xml</import> + </macros> + <expand macro="creator"/> + + <requirements> + <requirement type="package" version="@TOOL_VERSION@">msmetaenhancer</requirement> + </requirements> + + <command detect_errors="exit_code"><![CDATA[ + sh ${msmetaenhancer_python_cli} + ]]> </command> + + <configfiles> + <configfile name="msmetaenhancer_python_cli"> + python3 ${__tool_directory__}/msmetaenhancer_wrapper.py \ + --input_file "$input_file" \ + --output_file "$output_file" \ + #if $options.output_log: + --log_file "$log_file"\ + #end if + #if len($ordered_jobs) != 0: + #set ordered_jobs = ",".join([str($job.ordered_jobs_select) for $job in $ordered_jobs]) + "," + #else: + #set ordered_jobs = "" + #end if + #if $jobs != 'None': + #set random_jobs = str($jobs) + #else: + #set random_jobs = "" + #end if + #set all_jobs = str($ordered_jobs) + str($random_jobs) + --jobs "$all_jobs" + </configfile> + </configfiles> + + <inputs> + <param label="Input spectra dataset" name="input_file" type="data" format="msp" /> + + <repeat name="ordered_jobs" title="Ordered conversions"> + <param name="ordered_jobs_select" type="select" label="Available conversions" multiple="false" optional="true"> + <expand macro="job_options" /> + </param> + </repeat> + + <param name="jobs" type="select" label="Other conversions" multiple="true" optional="true"> + <expand macro="job_options" /> + </param> + + <section name="options" title="Options"> + <param label="Save the log file" name="output_log" type="boolean" truevalue="TRUE" falsevalue="FALSE" + checked="false" help="Preserve a log with details about the annotation process."/> + </section> + </inputs> + + <outputs> + <data label="MSMetaEnhancer on ${on_string}" name="output_file" format="msp" /> + <data label="Log of MSMetaEnhancer on ${on_string}" name="log_file" format="txt"> + <filter>options['output_log']</filter> + </data> + </outputs> + + <tests> + <test> + <param name="input_file" value="sample.msp" ftype="msp" /> + <param name="jobs" value="compound_name inchi PubChem,inchi canonical_smiles PubChem"/> + <output name="output_file" file="sample_out.msp" ftype="msp"/> + </test> + </tests> + + <help> + <![CDATA[ + @HELP@ + ]]> + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/msmetaenhancer_wrapper.py Tue Mar 22 15:33:37 2022 +0000 @@ -0,0 +1,44 @@ +import argparse +import asyncio +import sys + + +from MSMetaEnhancer import Application + + +def main(argv): + parser = argparse.ArgumentParser(description="Annotate MSP file.") + parser.add_argument("--input_file", type=str, help="Path to query spectra file in MSP format.") + parser.add_argument("--output_file", type=str, help="Path to output spectra file.") + parser.add_argument("--jobs", type=str, help="Sequence of conversion jobs to be used.") + parser.add_argument("--log_file", type=str, help="Path to log with details of the annotation process.") + args = parser.parse_args() + + app = Application(log_file=args.log_file) + + # import .msp file + app.load_spectra(args.input_file, file_format='msp') + + # curate given metadata + app.curate_spectra() + + # specify requested services and jobs + services = ['PubChem', 'CTS', 'CIR', 'NLM', 'RDKit', 'IDSM', 'BridgeDB'] + + if len(args.jobs) != 0: + jobs = [] + for job in args.jobs.split(","): + if len(job) != 0: + jobs.append(job.split()) + asyncio.run(app.annotate_spectra(services, jobs)) + else: + # execute without jobs parameter to run all possible jobs + asyncio.run(app.annotate_spectra(services)) + + # export .msp file + app.save_spectra(args.output_file, file_format="msp") + return 0 + + +if __name__ == "__main__": + main(argv=sys.argv[1:])
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample.msp Tue Mar 22 15:33:37 2022 +0000 @@ -0,0 +1,75 @@ +NAME: Hydrogen +FORMULA: H2 +MW: 2 +CASNO: 1333740 +ID: 1 +COMMENT: NIST MS# 245692, Seq# M1 +NUM PEAKS: 2 +1.0 20.98 +2.0 999.0 + +NAME: Deuterium +FORMULA: D2 +MW: 4 +CASNO: 7782390 +ID: 2 +COMMENT: NIST MS# 61316, Seq# M2 +NUM PEAKS: 2 +2.0 14.99 +4.0 999.0 + +NAME: Methane +FORMULA: CH4 +MW: 16 +CASNO: 74828 +ID: 3 +COMMENT: Any=100 ; NIST MS# 61313, Seq# M64 +NUM PEAKS: 6 +12.0 37.97 +13.0 105.9 +14.0 203.82 +15.0 886.2 +16.0 999.0 +17.0 15.99 + +NAME: Methane +FORMULA: CH4 +MW: 16 +CASNO: 74828 +ID: 4 +COMMENT: Any=100 ; NIST MS# 18807, Seq# R26 +NUM PEAKS: 6 +12.0 25.98 +13.0 85.92 +14.0 170.85 +15.0 855.23 +16.0 999.0 +17.0 10.99 + +NAME: Methane +FORMULA: CH4 +MW: 16 +CASNO: 74828 +ID: 5 +COMMENT: Any=100 ; NIST MS# 18809, Seq# R27 +NUM PEAKS: 6 +12.0 7.99 +13.0 28.97 +14.0 74.93 +15.0 753.32 +16.0 999.0 +17.0 11.99 + +NAME: Methane +FORMULA: CH4 +MW: 16 +CASNO: 74828 +ID: 6 +COMMENT: Any=100 ; NIST MS# 423924, Seq# R28 +NUM PEAKS: 6 +12.0 25.98 +13.0 74.93 +14.0 152.86 +15.0 829.25 +16.0 999.0 +17.0 10.99 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample_out.msp Tue Mar 22 15:33:37 2022 +0000 @@ -0,0 +1,84 @@ +FORMULA: H2 +MW: 2 +CASNO: 1333-74-0 +ID: 1 +COMMENT: NIST MS# 245692, Seq# M1 +COMPOUND_NAME: Hydrogen +INCHI: InChI=1S/H2/h1H +CANONICAL_SMILES: [HH] +NUM PEAKS: 2 +1.0 20.98 +2.0 999.0 + +FORMULA: D2 +MW: 4 +CASNO: 7782-39-0 +ID: 2 +COMMENT: NIST MS# 61316, Seq# M2 +COMPOUND_NAME: Deuterium +INCHI: InChI=1S/H2/h1H/i1+1D +CANONICAL_SMILES: [HH] +NUM PEAKS: 2 +2.0 14.99 +4.0 999.0 + +FORMULA: CH4 +MW: 16 +CASNO: 74-82-8 +ID: 3 +COMMENT: Any=100 ; NIST MS# 61313, Seq# M64 +COMPOUND_NAME: Methane +INCHI: InChI=1S/CH4/h1H4 +NUM PEAKS: 6 +12.0 37.97 +13.0 105.9 +14.0 203.82 +15.0 886.2 +16.0 999.0 +17.0 15.99 + +FORMULA: CH4 +MW: 16 +CASNO: 74-82-8 +ID: 4 +COMMENT: Any=100 ; NIST MS# 18807, Seq# R26 +COMPOUND_NAME: Methane +INCHI: InChI=1S/CH4/h1H4 +NUM PEAKS: 6 +12.0 25.98 +13.0 85.92 +14.0 170.85 +15.0 855.23 +16.0 999.0 +17.0 10.99 + +FORMULA: CH4 +MW: 16 +CASNO: 74-82-8 +ID: 5 +COMMENT: Any=100 ; NIST MS# 18809, Seq# R27 +COMPOUND_NAME: Methane +INCHI: InChI=1S/CH4/h1H4 +NUM PEAKS: 6 +12.0 7.99 +13.0 28.97 +14.0 74.93 +15.0 753.32 +16.0 999.0 +17.0 11.99 + +FORMULA: CH4 +MW: 16 +CASNO: 74-82-8 +ID: 6 +COMMENT: Any=100 ; NIST MS# 423924, Seq# R28 +COMPOUND_NAME: Methane +INCHI: InChI=1S/CH4/h1H4 +NUM PEAKS: 6 +12.0 25.98 +13.0 74.93 +14.0 152.86 +15.0 829.25 +16.0 999.0 +17.0 10.99 +