comparison macros.xml @ 0:1633a2a34255 draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
author recetox
date Tue, 22 Mar 2022 16:10:17 +0000
parents
children 867c737a7f10
comparison
equal deleted inserted replaced
-1:000000000000 0:1633a2a34255
1 <macros>
2 <token name="@TOOL_VERSION@">0.9.3</token>
3 <xml name="requirements">
4 <requirements>
5 <requirement type="package" version="4.1.0">r-base</requirement>
6 <requirement type="package" version="4.0.1">r-arrow</requirement>
7 <requirement type="package" version="0.9.3">r-recetox-aplcms</requirement>
8 <requirement type="package" version="1.0.7">r-dplyr</requirement>
9 </requirements>
10 </xml>
11
12 <xml name="creator">
13 <creator>
14 <person
15 givenName="Martin"
16 familyName="Čech"
17 url="https://github.com/martenson"
18 identifier="0000-0002-9318-1781" />
19 <person
20 givenName="Jiří"
21 familyName="Novotný"
22 url="https://github.com/xtracko"
23 identifier="0000-0001-5449-3523" />
24 <organization
25 url="https://www.recetox.muni.cz/"
26 email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"
27 name="RECETOX MUNI"/>
28 </creator>
29 </xml>
30
31 <xml name="inputs">
32 <inputs>
33 <param name="files" type="data" format="mzdata,mzml,mzxml,netcdf" multiple="true" min="3" label="data"
34 help="Mass spectrometry files for peak extraction." />
35 <yield />
36 </inputs>
37 </xml>
38
39 <xml name="history_db">
40 <param name="known_table" type="data" format="parquet" label="known_table"
41 help="A data table containing the known metabolite ions and previously found features. The table must contain these 18 columns: chemical_formula (optional), HMDB_ID (optional), KEGG_compound_ID (optional), neutral.mass (optional), ion.type (the ion form - optional), m.z (either theoretical or mean observed m/z value of previously found features), Number_profiles_processed (the total number of processed samples to build this database), Percent_found (the percentage of historically processed samples in which the feature appeared), mz_min (minimum observed m/z value), mz_max (maximum observed m/z value), RT_mean (mean observed retention time), RT_sd (standard deviation of observed retention time), RT_min (minimum observed retention time), RT_max (maximum observed retention time), int_mean.log. (mean observed log intensity), int_sd.log. (standard deviation of observed log intensity), int_min.log. (minimum observed log intensity), int_max.log. (maximum observed log intensity)." />
42 <section name="history_db" title="Known-Table settings">
43 <param name="match_tol_ppm" type="integer" optional="true" min="0" label="match_tol_ppm (optional)"
44 help="The ppm tolerance to match identified features to known metabolites/features." />
45 <param name="new_feature_min_count" type="integer" value="2" min="1" label="new_feature_min_count"
46 help="The minimum number of occurrences of a historically unseen (unknown) feature to add this feature into the database of known features." />
47 </section>
48 </xml>
49
50 <xml name="noise_filtering">
51 <section name="noise_filtering" title="Noise filtering and peak detection">
52 <param name="min_exp" type="integer" min="1" value="2"
53 label="min_exp"
54 help="If a feature is to be included in the final feature table, it must be present in at least this number of spectra." />
55 <param name="min_pres" type="float" value="0.5"
56 label="min_pres"
57 help="The minimum proportion of presence in the time period for a series of signals grouped by m/z to be considered a peak." />
58 <param name="min_run" type="float" value="12"
59 label="min_run"
60 help="The minimum length of elution time for a series of signals grouped by m/z to be considered a peak." />
61 <param name="mz_tol" type="float" value="1e-05"
62 label="mz_tol"
63 help="The m/z tolerance level for the grouping of data points. This value is expressed as the fraction of the m/z value. This value, multiplied by the m/z value, becomes the cutoff level. The recommended value is the machine's nominal accuracy level. Divide the ppm value by 1e6. For FTMS, 1e-5 is recommended." />
64 <param name="baseline_correct" type="float" value="0" label="baseline_correct"
65 help="After grouping the observations, the highest intensity in each group is found. If the highest is lower than this value, the entire group will be deleted. The default value is NA, in which case the program uses a percentile of the height of the noise groups. If given a value, the value will be used as the threshold, and baseline.correct.noise.percentile will be ignored." />
66 <param name="baseline_correct_noise_percentile" type="float" value="0.05"
67 label="baseline_correct_noise_percentile"
68 help="The percentile of signal strength of those EIC that don't pass the run filter, to be used as the baseline threshold of signal strength." />
69 <param name="intensity_weighted" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE"
70 label="intensity_weighted"
71 help="Whether to weight the local density by signal intensities in initial peak detection." />
72 </section>
73 </xml>
74
75 <xml name="feature_detection">
76 <section name="feature_detection" title="Feature detection">
77 <param name="shape_model" type="select" display="radio"
78 label="shape_model"
79 help="The mathematical model for the shape of a peak. There are two choices - bi-Gaussian and Gaussian. When the peaks are asymmetric, the bi-Gaussian is better.">
80 <option value="Gaussian">Gaussian</option>
81 <option value="bi-Gaussian" selected="true">bi-Gaussian</option>
82 </param>
83 <param name="BIC_factor" type="float" value="2.0"
84 label="BIC_factor"
85 help="The factor that is multiplied on the number of parameters to modify the BIC criterion. If larger than 1, models with more peaks are penalized more." />
86 <param name="peak_estim_method" type="select" display="radio"
87 label="peak_estim_method"
88 help="The estimation method for the bi-Gaussian peak model. Two possible values: moment and EM.">
89 <option value="moment" selected="true">Moment</option>
90 <option value="EM">EM</option>
91 </param>
92 <param name="min_bandwidth" type="float" optional="true"
93 label="min_bandwidth (optional)"
94 help="The minimum bandwidth to use in the kernel smoother." />
95 <param name="max_bandwidth" type="float" optional="true"
96 label="max_bandwidth (optional)"
97 help="The maximum bandwidth to use in the kernel smoother." />
98 <param name="sd_cut_min" type="float" value="0.01"
99 label="sd_cut_min"
100 help="The minimum standard deviation of a feature to be not eliminated." />
101 <param name="sd_cut_max" type="float" value="500"
102 label="sd_cut_max"
103 help="The maximum standard deviation of a feature to be not eliminated." />
104 <param name="sigma_ratio_lim_min" type="float" value="0.01"
105 label="sigma_ratio_lim_min"
106 help="The lower limit of the believed ratio range between the left-standard deviation and the right-standard deviation of the bi-Gaussian function used to fit the data." />
107 <param name="sigma_ratio_lim_max" type="float" value="100"
108 label="sigma_ratio_lim_max"
109 help="The upper limit of the believed ratio range between the left-standard deviation and the right-standard deviation of the bi-Gaussian function used to fit the data." />
110 <param name="component_eliminate" type="float" value="0.01"
111 label="component_eliminate"
112 help="In fitting mixture of bi-Gaussian (or Gaussian) model of an EIC, when a component accounts for a proportion of intensities less than this value, the component will be ignored." />
113 <param name="moment_power" type="float" value="1"
114 label="moment_power"
115 help="The power parameter for data transformation when fitting the bi-Gaussian or Gaussian mixture model in an EIC." />
116 </section>
117 </xml>
118
119 <xml name="peak_alignment">
120 <section name="peak_alignment" title="Peak Alignment">
121 <param name="align_chr_tol" type="float" optional="true"
122 label="align_chr_tol (optional)"
123 help="The retention time tolerance level for peak alignment. The default is NA, which allows the program to search for the tolerance level based on the data." />
124 <param name="align_mz_tol" type="float" optional="true"
125 label="align_mz_tol (optional)"
126 help="The m/z tolerance level for peak alignment. The default is NA, which allows the program to search for the tolerance level based on the data. The tolerance is given in absolute numbers, not scaled, i.e. for 10ppm tolerance enter '1e-05'. This value, multiplied by the m/z value, becomes the cutoff level." />
127 <param name="max_align_mz_diff" type="float" value="0.01"
128 label="max_align_mz_diff"
129 help="As the m/z tolerance is expressed in relative terms (ppm), it may not be suitable when the m/z range is wide. This parameter limits the tolerance in absolute terms. It mostly influences feature matching in higher m/z range." />
130 </section>
131 </xml>
132
133 <xml name="weak_signal_recovery">
134 <section name="weak_signal_recovery" title="Weak Signal Recovery">
135 <param name="recover_mz_range" type="float" optional="true"
136 label="recover_mz_range (optional)"
137 help="The m/z around the feature m/z to search for observations. The default value is NA, in which case 1.5 times the m/z tolerance in the aligned object will be used." />
138 <param name="recover_chr_range" type="float" optional="true"
139 label="recover_chr_range (optional)"
140 help="The retention time around the feature retention time to search for observations. The default value is NA, in which case 0.5 times the retention time tolerance in the aligned object will be used." />
141 <param name="use_observed_range" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE"
142 label="use_observed_range"
143 help="If the value is true, the actual range of the observed locations of the feature in all the spectra will be used." />
144 <param name="recover_min_count" type="integer" value="3"
145 label="recover_min_count"
146 help="The minimum number of raw data points to be considered as a true feature." />
147 </section>
148 </xml>
149 <xml name="output_format">
150 <section name="output_format" title="Output Format">
151 <param name="out_format" type="boolean" checked="false" truevalue="recetox" falsevalue="original" label="Use custom RECETOX output format?" />
152 </section>
153 </xml>
154
155 <xml name="unsupervised_outputs">
156 <data name="recovered_feature_sample_table" format="parquet" label="${tool.name} recovered_feature_sample_table on ${on_string}" />
157 <data name="aligned_feature_sample_table" format="parquet" label="${tool.name} aligned_feature_sample_table on ${on_string}" hidden="true" />
158 <collection name="corrected_features" type="list" label="${tool.name} corrected_features on ${on_string}">
159 <discover_datasets pattern="__designation__" directory="corrected" format="parquet" />
160 </collection >
161 <collection name="extracted_features" type="list" label="${tool.name} extracted_features on ${on_string}">
162 <discover_datasets pattern="__designation__" directory="extracted" format="parquet" />
163 </collection >
164 </xml>
165
166 <xml name="citations">
167 <citations>
168 <citation type="doi">10.1093/bioinformatics/btp291</citation>
169 <citation type="doi">10.1186/1471-2105-11-559</citation>
170 <citation type="doi">10.1021/pr301053d</citation>
171 <citation type="doi">10.1093/bioinformatics/btu430</citation>
172 </citations>
173 </xml>
174
175 <token name="@HELP_hybrid@">
176 <![CDATA[
177 This is the Hybrid version of apLCMS which is incorporating the knowledge of known metabolites and historically
178 detected features on the same machinery to help detect and quantify lower-intensity peaks.
179
180 CAUTION: To use such knowledge, especially historical data, you must keep using (1) the same chromatography
181 system (otherwise the retention time will not match), and (2) the same type of samples with similar extraction
182 technique, such as human serum.
183
184 @GENERAL_HELP@
185 ]]>
186 </token>
187
188 <token name="@HELP_unsupervised@">
189 <![CDATA[
190 This is the Unsupervised version of apLCMS which is not relying on any existing knowledge about metabolites or
191 any historically detected features. For such functionality please use the Hybrid version of apLCMS.
192
193 @GENERAL_HELP@
194 ]]>
195 </token>
196
197 <token name="@GENERAL_HELP@">
198 apLCMS is a software which generates a feature table from a batch of LC/MS spectra. The m/z and retention time
199 tolerance levels are estimated from the data. A run-filter is used to detect peaks and remove noise.
200 Non-parametric statistical methods are used to find-tune peak selection and grouping. After retention time
201 correction, a feature table is generated by aligning peaks across spectra. For further information on apLCMS
202 please refer to https://mypage.cuhk.edu.cn/academics/yutianwei/apLCMS/.
203 </token>
204 </macros>