Mercurial > repos > computational-metabolomics > metfrag
diff metfrag.xml @ 0:fd5c0b39569a draft
"planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit e20ce56f23d9fe30df64542ece2295d654ca142d"
author | computational-metabolomics |
---|---|
date | Wed, 05 Feb 2020 12:30:06 -0500 |
parents | |
children | 9ee2e2ceb2c9 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/metfrag.xml Wed Feb 05 12:30:06 2020 -0500 @@ -0,0 +1,473 @@ +<tool id="metfrag" name="MetFrag" version="2.4.5+galaxy1"> + <description> + in silico fragmentor for compound annotation of mass spectrometry fragmentation spectra + </description> + <macros> + <import>macros.xml</import> + </macros> + <requirements> + <requirement type="package" version="2.4.5">metfrag</requirement> + </requirements> + <stdio> + <regex match="Cannot allocate memory" + source="stderr" + level="fatal_oom" + description="Out of memory error occurred" /> + </stdio> + <command detect_errors="exit_code"> + <![CDATA[ + python '$__tool_directory__/metfrag.py' + --input_pth '$input' + --result_pth '$results' + --temp_dir './temp/' + + --cores_top_level \${GALAXY_SLOTS:-4} + + --MetFragDatabaseType '$db_select.MetFragDatabaseType' + + #if $db_select.MetFragDatabaseType == 'LocalCSV': + --LocalDatabasePath '$db_select.LocalDatabasePath' + #elif $db_select.MetFragDatabaseType == 'MetChem': + --LocalMetChemDatabaseServerIp '$db_select.LocalMetChemDatabaseServerIp' + #end if + + --DatabaseSearchRelativeMassDeviation $DatabaseSearchRelativeMassDeviation + --FragmentPeakMatchRelativeMassDeviation $FragmentPeakMatchRelativeMassDeviation + --FragmentPeakMatchAbsoluteMassDeviation $FragmentPeakMatchAbsoluteMassDeviation + --polarity '$polarity' + + --MetFragScoreTypes '$suspectlist.MetFragScoreTypes' + --MetFragScoreWeights '$suspectlist.MetFragScoreWeights' + + #if $suspectlist.suspectselector == 'includesuspects': + #if $suspectlist.includesuspects_default_cond: + --ScoreSuspectLists '$__tool_directory__/UNPD_DB.inchikeys.txt' + #else + --ScoreSuspectLists '$suspectlist.includesuspects_custom_cond.ScoreSuspectLists' + #end if + #end if + + --meta_select_col $meta_select_col + --minMSMSpeaks $minMSMSpeaks + --schema $schema + + $PreProcessFilter.UnconnectedCompoundFilter + $PreProcessFilter.IsotopeFilter + + --FilterMinimumElements '$PreProcessFilter.FilterMinimumElements' + --FilterMaximumElements '$PreProcessFilter.FilterMaximumElements' + --FilterSmartsInclusionList '$PreProcessFilter.FilterSmartsInclusionList' + --FilterSmartsExclusionList '$PreProcessFilter.FilterSmartsExclusionList' + --FilterIncludedElements '$PreProcessFilter.FilterIncludedElements' + --FilterExcludedElements '$PreProcessFilter.FilterExcludedElements' + --FilterIncludedExclusiveElements '$PreProcessFilter.FilterIncludedExclusiveElements' + + $skip_invalid_adducts + --score_thrshld $PostProcessFilter.score_thrshld + --pctexplpeak_thrshld $PostProcessFilter.pctexplpeak_thrshld + + ]]></command> + <inputs> + <param name="input" type="data" format="msp" label="MSP file (Output from Create MSP tool)"/> + <conditional name="db_select"> + <param argument="--MetFragDatabaseType" type="select" label="Choose Compound Database"> + <option value="PubChem" selected="true">PubChem</option> + <option value="KEGG">KEGG</option> + <option value="LocalCSV">Local database (csv)</option> + <option value="MetChem">MetChem</option> + </param> + <when value="MetChem"> + <param argument="--LocalMetChemDatabaseServerIp" type="text" label="MetChem URL"/> + </when> + <when value="LocalCSV"> + <param argument="--LocalDatabasePath" type="data" format="csv" + label="Local database of compounds (CSV format)" /> + </when> + <when value="KEGG"/> + <when value="PubChem"/> + </conditional> + <param argument="--DatabaseSearchRelativeMassDeviation" type="float" min="0" value="10" + label="Relative Mass Deviation for database search (ppm)" + help="A value in ppm that defines the deviation of theoretical masses in the database + vs. the measured masses"/> + <param argument="--FragmentPeakMatchRelativeMassDeviation" type="float" min="0" value="5" + label="Fragment Peak Match Relative Mass Deviation (ppm)" + help="Relative mass deviation in ppm of theoretical fragment peaks vs. measured fragment peaks" /> + <param argument="--FragmentPeakMatchAbsoluteMassDeviation" type="float" min="0" value="0.001" + label="Fragment Peak Match Absolute Mass Deviation (Da)" + help="Absolute mass deviation in Dalton of theoretical fragment peaks vs. measured fragment peaks" /> + <param argument="--polarity" type="select" label="Polarity" + help="The polarity used for the mode of acquisition"> + <option value="pos" selected="true">Positive</option> + <option value="neg">Negative</option> + </param> + <param argument="--schema" type="select" label="Schema" + help="The schema used for the MSP file (auto will try automatically determine the schema)"> + <option value="auto" selected="True">Auto</option> + <option value="msp">Generic MSP</option> + <option value="massbank">MassBank</option> + </param> + <param argument="--meta_select_col" type="select" + label="Choose how additional metadata columns are extracted" + help="The MetFrag output can have additional meta data columns added, these can be either extracted + from all MSP parameters or from the 'Name' and 'RECORD_TITLE' MSP parameter. Additionally, columns + can be added from the 'Name' or 'RECORD_TITLE' parameter by splitting on | and : + e.g. 'MZ:100.2 | RT:20 | xcms_grp_id:1' would create MZ,RT and xcms_grp_id columns"> + <option value="name" selected="true">Extra metadata columns from the Name or RECORD_TITLE</option> + <option value="name_split">Extra metadata columns from the Name or RECORD_TITLE (each column is split on "|" and ":" ) </option> + <option value="all">Extra metadata columns from all MSP parameters</option> + </param> + <conditional name="suspectlist"> + <param name="suspectselector" type="select" label="Suspect list" + help="Choose whether to include a suspect list"> + <option value="includesuspects" >Include suspect list</option> + <option value="excludesuspects" selected="True">Do not include suspect list</option> + </param> + <when value="includesuspects"> + <conditional name="includesuspects_default_cond"> + <param name="includesuspects_default_bool" type="boolean" + label="Use default list of suspect compounds?" + help="Either provide a file containing a list of suspect compounds or a default file + of an aggregated list of in silico predicted MS/MS spectra of natural products + from the Universal Natural Products Database (http://pkuxxj.pku.edu.cn/UNPD/index.php). + The list is an aggregated version of the github repository https://github.com/oolonek/ISDB/tree/master/Data/dbs."/> + <when value="true"/> + <when value="false"> + <param argument="--ScoreSuspectLists" type="data" format="txt" optional="True" + label="Suspect list file" help="File containing a list of suspects inchikeys" /> + </when> + </conditional> + <expand macro="metfrag_scoring"/> + </when> + <when value="excludesuspects"> + <expand macro="metfrag_scoring" suspectlistscore="False" weights="1.0,1.0"/> + </when> + </conditional> + <param argument="--minMSMSpeaks" type="integer" label="Minimum number of MS/MS peaks" value="0"/> + <param argument="--skip_invalid_adducts" type="boolean" label="Skip invalid or undefined adduct types?" + truevalue="--skip_invalid_adducts" falsevalue="" checked="true" + help="If no adduct type is provided within the MSP file or if the adduct type is not usable + with MetFrag, set to 'yes' if these spectra should be skipped or 'no' if the default + of [M+H]+ for pos data or [M-H]- for neg data should be used"/> + <section name="PreProcessFilter" title="PreProcessing filters" expanded="False"> + <param argument="--UnconnectedCompoundFilter" type="boolean" checked="false" + truevalue="--UnconnectedCompoundFilter" falsevalue="" + label="filter non-connected compounds (e.g. salts)" help=""/> + <param argument="--IsotopeFilter" type="boolean" checked="false" truevalue="--IsotopeFilter" + falsevalue="" label="filter compounds containing non-standard isotopes" help=""/> + <param argument="--FilterMinimumElements" type="text" + optional="true" label="Minimum Elements Filter" + help="Filter by minimum of contained elements. Ex: N2O3 include compounds with at least + 2 nitrogens and 3 oxygens"> + <expand macro="text-alphanumeric-regex-validator"/> + </param> + <param argument="--FilterMaximumElements" type="text" + optional="true" label="Maximum Elements Filter" + help="Filter by maximum of contained elements. Ex: N5O7 filter out compounds with at + maximum 5 nitrogens and 7 oxygens"> + <expand macro="text-alphanumeric-regex-validator"/> + </param> + <param argument="--FilterSmartsInclusionList" type="text" + optional="true" label="Include substructures" + help="Filter by presence of defined sub-structures. Ex: c1ccccc1 include compounds + containing benzene"/> + <param argument="--FilterSmartsExclusionList" type="text" + optional="true" label="Exclude substructures" + help="Filter by absence of defined sub-structures. Ex: [OX2H] filter out compounds + containing hydroxyl groups"/> + <param argument="--FilterIncludedElements" type="text" + optional="true" label="Include elements" + help="Filter by presence of defined elements (other elements are allowed). + Ex: 'N,O' include compounds containing nitrogen and oxygen" > + <expand macro="text-alphanumeric-comma-regex-validator"/> + </param> + <param argument="--FilterIncludedExclusiveElements" type="text" + optional="true" label="Include elements (exclusive)" + help="Filter by presence of defined elements (no other elements are allowed). + Ex: 'N,O' include compounds only composed of nitrogen and oxygen" > + <expand macro="text-alphanumeric-comma-regex-validator"/> + </param> + <param argument="--FilterExcludedElements" type="text" + optional="true" label="Exclude elements" + help="Filter by absence of defined sub-structures. Ex: 'Cl,Br' filter out + compounds including bromine or chlorine"> + <expand macro="text-alphanumeric-comma-regex-validator"/> + </param> + </section> + <section name="PostProcessFilter" title="PostProcessing filters" expanded="False"> + <param argument="--score_thrshld" type="float" label="Threshold for score after MetFrag search" + max="1" min="0" value="0"/> + <param argument="--pctexplpeak_thrshld" type="float" label="Minimum percentage of explain peaks" + max="100" min="0" value="0"/> + </section> + </inputs> + <outputs> + <data name="results" format="tabular"/> + </outputs> + <tests> + <test> + <!-- Test "massbank" style data format --> + <param name="input" value="massbank_format.txt"/> + <param name="schema" value="massbank"/> + <param name="skip_invalid_adducts" value="false"/> + <param name="MetFragDatabaseType" value="PubChem"/> + <param name="MetFragDatabaseType" value="LocalCSV"/> + <param name="LocalDatabasePath" value="demo_db.csv"/> + <output name="results" file="metfrag_massbank.tabular"/> + </test> + <test> + <!-- Test "generic" style data format --> + <param name="input" value="generic_format.msp"/> + <param name="schema" value="msp"/> + <param name="MetFragDatabaseType" value="PubChem"/> + <param name="skip_invalid_adducts" value="false"/> + <param name="MetFragDatabaseType" value="LocalCSV"/> + <param name="LocalDatabasePath" value="demo_db.csv"/> + <output name="results" file="metfrag_msp.tabular"/> + </test> + <test> + <!-- Test PubChem API with "winter" dataset --> + <param name="input" value="winter_pos.msp"/> + <section name="PostProcessFilter"> + <param name="score_thrshld" value="0.9"/> + </section> + <param name="MetFragDatabaseType" value="PubChem"/> + <output name="results" file="winter_pos.tabular"/> + </test> + <test> + <!-- Test actual MassBank data for Glucose --> + <param name="input" value="RP022611.txt"/> + <param name="MetFragDatabaseType" value="LocalCSV"/> + <param name="LocalDatabasePath" value="demo_db.csv"/> + <output name="results" file="RP022611.tabular"/> + </test> + <test> + <!-- Test actual MassBank data for Glucose (all metadata columns in output--> + <param name="input" value="RP022611.txt"/> + <param name="schema" value="massbank"/> + <param name="MetFragDatabaseType" value="LocalCSV"/> + <param name="LocalDatabasePath" value="demo_db.csv"/> + <param name="meta_select_col" value="all"/> + <output name="results" file="RP022611_all_col.tabular"/> + </test> + <test> + <!-- Test actual MassBank data for Glucose (include suspect list - default)--> + <param name="input" value="RP022611.txt"/> + <param name="schema" value="massbank"/> + <conditional name="suspectlist"> + <param name="suspectselector" value="includesuspects"/> + <conditional name="includesuspects_default_cond"> + <param name="includesuspects_default_bool" value="true"/> + </conditional> + </conditional> + <output name="results" file="RP022611_suspect_default.txt"/> + </test> + <test> + <!-- Test invalid adduct --> + <param name="input" value="invalid_adduct.msp"/> + <param name="skip_invalid_adducts" value="true"/> + <output name="results" file="invalid_adduct_result.txt" ftype="tabular"/> + </test> + </tests> + <help> +------- +MetFrag +------- + +Description +----------- + +MetFrag is a freely available software for the annotation of high precision tandem mass spectra of metabolites which is +a first and critical step for the identification of a molecule's structure. Candidate molecules of different databases +are fragmented "in silico" and matched against mass to charge values. A score calculated using the fragment peak +matches gives hints to the quality of the candidate spectrum assignment. + +Website: http://ipb-halle.github.io/MetFrag/ + +Parameters +---------- + +**\1. MSP file** + +MSP file created using *Create MSP* tool + +**\2a. MetFragDatabaseType (public databases)** + +* PubChem + +* KEGG + + +**\2b. MetFragDatabaseType (local CSV file database)** + + +Custom database file in CSV format with the following structure: + ++-------------+------------------+----------+---------------------------------------------+----------------------+---+ +| Identifier | MonoisotopicMass | SMILES | InChI | Name |...| ++-------------+------------------+----------+---------------------------------------------+----------------------+---+ +| HMDB0000123 | 75.03202841 | NCC(O)=O | InChI=1S/C2H5NO2/c3-1-2(4)5/h1,3H2,(H,4,5) | Glycine |...| ++-------------+------------------+----------+---------------------------------------------+----------------------+---+ +| HMDB0002151 | 78.0139355 | CS(C)=O | InChI=1S/C2H6OS/c1-4(2)3/h1-2H3 | Dimethyl sulfoxide |...| ++-------------+------------------+----------+---------------------------------------------+----------------------+---+ +| ... | ... | ... | ... | ... |...| ++-------------+------------------+----------+---------------------------------------------+----------------------+---+ + + +Table continued: + ++---+------------------+-----------------------------+------------------+------------+-------------+ +|...| MolecularFormula | InChIKey | InChIKey1 | InChIKey2 | InChIKey3 | ++---+------------------+-----------------------------+------------------+------------+-------------+ +|...| C2H5NO2 | DHMQDGOQFOQNFH-UHFFFAOYSA-N | DHMQDGOQFOQNFH | UHFFFAOYSA | N | ++---+------------------+-----------------------------+------------------+------------+-------------+ +|...| C2H6OS | IAZDPXIOMUYVGZ-UHFFFAOYSA-N | IAZDPXIOMUYVGZ | UHFFFAOYSA | N | ++---+------------------+-----------------------------+------------------+------------+-------------+ +|...| ... | ... | ... | ... | ... | ++---+------------------+-----------------------------+------------------+------------+-------------+ + + + +**\2b. MetFragDatabaseType MetChem** + +MetChem is a modified PubChem database and can be used in replace of PubChem +for performing API calls to the public PubChem instance. + +**\3. Database Search Relative Mass Deviation - ppm** + +A value in ppm that defines the deviation of theoretical masses in the database vs. the measured masses. + +**\4. Fragment Peak Match Relative Mass Deviation - ppm** + +Relative mass deviation in ppm of theoretical fragment peaks vs. measured fragment peaks. + +**\5. Fragment Peak Match Absolute Mass Deviation (Da)** + +Absolute mass deviation in Dalton of theoretical fragment peaks vs. measured fragment peaks. + +**\6. Polarity** + +The polarity used for the mode of acquisition. + +**\7. Schema** + +The Schema used by the MSP file (e.g. generic MSP format or MassBank format) + +**\8. Suspect list** + +Choose whether to include a file containing a list of suspects. + +**\9. MetFrag Score Types** + +The type of scores MetFrac is using for the calculations. Please do not change the values unless you know what you are doing! + +**\10. MetFrag Score Weights** + +The weights of the different score types, separated with a comma and without whitespaces. 1.0 means 100%. + +**\11. MetFrag Database Type** + +Database to choose from. + +**\12. minMSMSpeaks** + +Minimum MS/MS peaks within a MS/MS spectra to be used for the MetFrag calculation + +**\13. PreProcessFilter** + +Various filters can be performed on the potential compounds prior to predicting the in silico spectra + +**\14. PostProcessFilter** + +To make the output more manageble results below certain criteria can be removed from the various filters can be +performed on the potential compounds prior to predicting the in silico spectra + +Output +------- + +These columns are derived from any metadata in the MSP input file (additional columns can included if they are recorded in the MSP file) + ++-------------+--------------------------------------------+---+ +| adduct | name |...| ++-------------+--------------------------------------------+---+ +| [M-H]- | D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; |...| ++-------------+--------------------------------------------+---+ +| [M-H]- | D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; |...| ++-------------+--------------------------------------------+---+ +| ... | ... |...| ++-------------+--------------------------------------------+---+ + +Table continued (these columns are derived from the MetFrag result): + ++---+------------------+----------------------------------------------------------+-------------------------------------------------------------------------------------+-----+ +|...| sample_name | ExplPeaks | FormulasOfExplPeaks | ... | ++---+------------------+----------------------------------------------------------+-------------------------------------------------------------------------------------+-----+ +|...| 1_metfrag_result | 59.0138_715.8;71.014_679.7;89.0251_999.0;101.0234_103.0 | 59.0138:[C2H4O2]-H-;71.014:[C3H5O2-H]-H-;89.0251:[C3H6O3]-H-;101.0234:[C4H7O3-H]-H- | ... | ++---+------------------+----------------------------------------------------------+-------------------------------------------------------------------------------------+-----+ +|...| 1_metfrag_result | 59.0138_715.8;71.014_679.7;89.0251_999.0;101.0234_103.0 | 59.0138:[C2H4O2]-H-;71.014:[C3H5O2-H]-H-;89.0251:[C3H6O3]-H-;101.0234:[C4H7O3-H]-H- | ... | ++---+------------------+----------------------------------------------------------+-------------------------------------------------------------------------------------+-----+ +|...| ... | ... | ... | ... | ++---+------------------+----------------------------------------------------------+-------------------------------------------------------------------------------------+-----+ + + +Table continued (columns are derived from the MetFrag result): + ++---+------------------+----------------------------+------------------------------------------------------+------------+---------------------------------------------------------------------------------+---+ +|...| FragmenterScore | FragmenterScore_Values | FormulasOfExplPeaks | Identifier | InChI |...| ++---+------------------+----------------------------+------------------------------------------------------+------------+---------------------------------------------------------------------------------+---+ +|...| 105.844569063138 | 696.0;1156.0;696.0;1156.0 | 6-(hydroxymethyl)oxane-2,3,4,5-tetrol | 206 | InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H |...| ++---+------------------+----------------------------+------------------------------------------------------+------------+---------------------------------------------------------------------------------+---+ +|...| 105.844569063138 | 696.0;1156.0;696.0;1156.0 | (3R,4S,5S,6R)-6-(hydroxymethyl)oxane-2,3,4,5-tetrol | 5793 | InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2/t2-,3-,4+,5-,6?/m1/s1 |...| ++---+------------------+----------------------------+------------------------------------------------------+------------+---------------------------------------------------------------------------------+---+ +|...| ... | ... | ... | ... | ... |...| ++---+------------------+----------------------------+------------------------------------------------------+------------+---------------------------------------------------------------------------------+---+ + +Table continued (columns are derived from the MetFrag result): + + ++---+-------------+-----------------+-----------------------+----------------------------------------------+------------------+------------------+--------+ +|...| NoExplPeaks | NumberPeaksUsed | OfflineMetFusionScore | SMILES | Score | SuspectListScore | XlogP3 | ++---+-------------+-----------------+-----------------------+----------------------------------------------+------------------+------------------+--------+ +|...| 4 | 5 | 2.84566828424078 | C(C1C(C(C(C(O1)O)O)O)O)O | 1.82678219603441 | 1 | -2.6 | ++---+-------------+-----------------+-----------------------+----------------------------------------------+------------------+------------------+--------+ +|...| 4 | 5 | 2.84566828424078 | C([C@@H]1[C@H]([C@@H]([C@H](C(O1)O)O)O)O)O | 1.82678219603441 | 1 | -2.6 | ++---+-------------+-----------------+-----------------------+----------------------------------------------+------------------+------------------+--------+ +|...| ... | ... | ... | ... | ... | ... | ... | ++---+-------------+-----------------+-----------------------+----------------------------------------------+------------------+------------------+--------+ + + +Additional notes +-------------------- + +The following adducts (and format) are currently supported in the MSP file. The neutral mass is automatically + calculated for the precursor m/z by subtracting the adduct mass + +- '[M+H]+': 1.007276, +- '[M+NH4]+': 18.034374, +- '[M+Na]+': 22.989218, +- '[M+K]+': 38.963158, +- '[M+CH3OH+H]+': 33.033489, +- '[M+ACN+H]+': 42.033823, +- '[M+ACN+Na]+': 64.015765, +- '[M+2ACN+H]+': 83.06037, +- '[M-H]-': -1.007276, +- '[M+Cl]-': 34.969402, +- '[M+HCOO]-': 44.99819, +- '[M-H+HCOOH]-': 44.99819, +- '[M+CH3COO]-': 59.01385, +- '[M-H+CH3COOH]-': 59.01385 + +Developers and contributors +--------------------------- +- **Jordi Capellades (j.capellades.to@gmail.com) - Universitat Rovira i Virgili (SP)** +- **Julien Saint-Vanne (julien.saint-vanne@sb-roscoff.fr) - ABiMS (France)** +- **Tom Lawson (t.n.lawson@bham.ac.uk) - University of Birmingham (UK)** +- **Ralf Weber (r.j.weber@bham.ac.uk) - University of Birmingham (UK)** +- **Kristian Peters (kpeters@ipb-halle.de) - IPB Halle (Germany)** +- **Payam Emami (payam.emami@medsci.uu.se) - Uppsala Universitet (Sweden)** +- **Christoph Ruttkies (christoph.ruttkies@ipb-halle.de) - IPB Halle (Germany)** + </help> + <citations> + <citation type="doi">10.1186/s13321-016-0115-9</citation> + </citations> +</tool>