comparison macros.xml @ 0:0a0529822d91 draft default tip

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/ms2deepscore commit 4bd610e0cbbcbed51a6bfb880179777fc8034fd6
author recetox
date Mon, 02 Sep 2024 12:12:30 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:0a0529822d91
1 <macros>
2 <token name="@TOOL_VERSION@">2.0.0</token>
3 <token name="@ONNX_VERSION@">1.16.2</token>
4
5 <xml name="creator">
6 <creator>
7 <person
8 givenName="Zargham"
9 familyName="Ahmad"
10 url="https://github.com/zargham-ahmad"
11 identifier="0000-0002-6096-224X" />
12 <organization
13 url="https://www.recetox.muni.cz/"
14 email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"
15 name="RECETOX MUNI" />
16 </creator>
17 </xml>
18
19 <xml name="edam">
20 <xrefs>
21 <xref type="bio.tools">ms2deepscore</xref>
22 </xrefs>
23 </xml>
24
25 <xml name="input_param">
26 <conditional name="scores">
27 <param name="use_scores" label="Use Scores Object" type="select">
28 <option value="False" selected="true">FALSE</option>
29 <option value="True">TRUE</option>
30 </param>
31 <when value="True">
32 <param label="Scores object" name="scores_in" type="data" format="json"
33 help="Scores objects calculated previously using one of the matchms similarity tools." />
34 </when>
35 <when value="False">
36 <param label="Queries spectra" name="queries" type="data" format="msp"
37 help="Query mass spectra to match against references."/>
38 <param label="Reference spectra" name="references" type="data" format="msp"
39 help="Reference mass spectra to match against as library."/>
40 </when>
41 </conditional>
42 <param name="model" type="data" format="onnx" label="Model"
43 help="Select the trained MS2DeepScore model file (onnx format) in the ONNX format as created by the 'MS2DeepScore Training' tool."/>
44 <param name="model_param" type="data" format="json" label="Configuration"
45 help="Select the MS2DeepScore model configurations in JSON format. Can be created using the 'MS2DeepScore Config Generator' tool."/>
46 </xml>
47
48 <xml name="training_param">
49 <param label="Training Dataset" name="spectra" type="data" format="msp,mgf"
50 help="Spectra file that should be used for training. (it will be split in training, validation and test sets)."/>
51 <param name="model_param" type="data" format="json" label="Model Settings" help="json file with the MS2Deepscore model settings."/>
52 <param name="validation_split_fraction" type="integer" min="0" max="100" value="20" label="Validation split fraction [%]"
53 help="The fraction of the inchikeys that will be used for validation and test"/>
54 </xml>
55
56 <xml name="config_generator">
57 <section name="model_structure" title="Model Structure" expanded="true">
58 <repeat name="layers" title="Layer" min="1" default="1" >
59 <param name="dims" type="integer" label="Dimensions" min = "0" value="2000" help="Size of the in-between layer to add." />
60 </repeat>
61 <param name="embedding_dim" type="integer" label="Embedding Dimension" value="400" help="The dimension of the final embedding layer." />
62 <param name="ionisation_mode" type="select" label="Ionisation Mode">
63 <option value="positive" selected="true">Positive</option>
64 <option value="negative">Negative</option>
65 <option value="both">Both</option>
66 </param>
67 </section>
68
69 <section name="tensorization_settings" title="Tensorization Settings" expanded="true">
70 <param name="min_mz" type="integer" label="Min m/z" value="10" />
71 <param name="max_mz" type="integer" label="Max m/z" value="1000" />
72 <param name="mz_bin_width" type="float" label="m/z Bin Width" value="0.1" />
73 <param name="intensity_scaling" type="float" label="Intensity Scaling" value="0.5" />
74 <param name="fingerprint_type" type="text" value="daylight" label="Fingerprint Type" help="The fingerprint type that should be used for tanimoto score calculations." />
75 <param name="fingerprint_nbits" type="integer" label="Fingerprint Number of Bits" value="2048" help="The number of bits to use for the fingerprint." />
76 </section>
77
78
79 <section name="training_settings" title="Training Settings" expanded="false">
80 <param name="dropout_rate" type="float" label="Dropout Rate" value="0.0" />
81 <param name="learning_rate" type="float" label="Learning Rate" value="0.00025" />
82 <param name="epochs" type="integer" label="Epochs" value="250" />
83 <param name="patience" type="integer" label="Patience" value="20" help="How long the model should keep training if validation does not improve" />
84 <param name="loss_function" type="select" label="Loss Function">
85 <option value="mse" selected="true">Mean Squared Error (mse)</option>
86 <option value="mae">Mean Absolute Error (mae)</option>
87 <option value="rmse">Root Mean Squared Error (rmse)</option>
88 <option value="risk_mae">Risk Aware MAE (risk_aware_mae)</option>
89 <option value="risk_mse">Risk Aware MSE (risk_aware_mse)</option>
90 </param>
91 <param name="weighting_factor" type="integer" label="Weighting Factor" value="0" />
92 <param name="batch_size" type="integer" value="32" label="Batch Size" help="Number of pairs per batch" />
93 <param name="average_pairs_per_bin" type="integer" value="20" label="Average pairs per bin" help="The aimed average number of pairs of spectra per spectrum in each bin." />
94 <param name="random_seed" type="text" label="Random seed" value="None" help="Specify random seed for reproducible random number generation." />
95 </section>
96 </xml>
97
98 <xml name="citations">
99 <citations>
100 <citation type="doi">https://doi.org/10.1186/s13321-021-00558-4</citation>
101 <citation type="doi">https://doi.org/10.1101/2024.03.25.586580</citation>
102 </citations>
103 </xml>
104
105
106 <token name="@HELP@">
107 ms2deepscore provides a Siamese neural network that is trained to predict molecular structural similarities (Tanimoto scores) from pairs of mass spectrometry spectra.
108 The library provides an intuitive classes to prepare data, train a siamese model, and compute similarities between pairs of spectra.
109 In addition to the prediction of a structural similarity, MS2DeepScore can also make use of Monte-Carlo dropout to assess the model uncertainty.
110 MS2DeepScore is able to identify highly-reliable structural matches and to predict Tanimoto scores for pairs of molecules based on their fragment spectra with a root mean squared error of about 0.15.
111 Furthermore, the prediction uncertainty estimate can be used to select a subset of predictions with a root mean squared error of about 0.1.
112 MS2DeepScore can also be used to create chemically meaningful mass spectral embeddings that could be used to cluster large numbers of spectra.
113 </token>
114
115
116 <token name="@init_scores@">
117 from matchms.importing import load_from_msp, scores_from_json
118 from matchms import Scores
119 #if $scores.use_scores == "True"
120 scores = scores_from_json("${scores_in}")
121 #else
122 scores = Scores(references=list(load_from_msp("$references")), queries=list(load_from_msp("$queries")), is_symmetric=False)
123 #end if
124 </token>
125
126 <token name="@init_logger@">
127 from matchms import set_matchms_logger_level
128 set_matchms_logger_level("WARNING")
129 </token>
130
131 <token name="@json_load@">
132 import numpy as np
133 import json
134
135 with open("$model_param", 'r') as json_file:
136 model_params = json.load(json_file)
137
138 # Conditionally convert specific keys if they are present
139 if 'base_dims' in model_params:
140 model_params['base_dims'] = tuple(model_params['base_dims'])
141
142 if 'same_prob_bins' in model_params:
143 model_params['same_prob_bins'] = np.array(model_params['same_prob_bins'])
144
145 if 'additional_metadata' in model_params:
146 model_params['additional_metadata'] = [
147 (entry[0], entry[1]) for entry in model_params['additional_metadata']
148 ]
149 </token>
150 </macros>