comparison msclust.xml @ 62:9bd2597c8851 default tip

r
author pieter.lukasse@wur.nl
date Fri, 06 Feb 2015 15:49:26 +0100
parents d685210eef3e
children
comparison
equal deleted inserted replaced
61:d685210eef3e 62:9bd2597c8851
1 <tool name="MsClust" id="msclust2" version="2.0.7">
2 <description>Extracts fragmentation spectra from aligned data</description>
3 <!--
4 For remote debugging start you listener on port 8000 and use the following as command interpreter:
5 java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000
6 //////////////////////////
7
8 TODO in command below: add conditionals according to options of using or NOT the tolerances/thresholds from previous steps
9 -->
10 <command interpreter="java -jar ">
11 MsClust.jar
12 -peaksFileName $inputPeaks
13 -dataType $dataType
14 -imputationMethod $imputationMethod.type
15 #if $imputationMethod.type == "valueRange"
16 -rangeUpperLimit $imputationMethod.rangeUpperLimit
17 #end if
18 -plInputFormat $plInputFormat
19 -potDensFuncType $potDensFuncType.type
20 -centerSelectionType $centerSelectionType.type
21 -clusteringType $clusteringType.type
22 -neighborhoodWindowSize $potDensFuncType.pdf_neighborhoodWindowSize
23 -clusterSearchStopCriterium $centerSelectionType.cs_stop_criterion
24 -pearsonDistTreshold $potDensFuncType.pdf_pears_treshold
25 -pearsonTresholdConfidence $potDensFuncType.pdf_pears_conf
26 -pearsonPDReductionThreshold $centerSelectionType.cs_pears_pd_reductionTreshold
27 -pearsonPDReductionSlope $centerSelectionType.cs_pears_pd_reductionSlope
28 -rtDistTolUnit $potDensFuncType.rt_dist_tol_unit.type
29 -rtDistTol $potDensFuncType.rt_dist_tol_unit.pdf_rt_toler
30 -rtDistanceConfidence $potDensFuncType.pdf_scan_conf
31 #if $clusteringType.type == "original"
32 -clustMembershipCutoff $clusteringType.clust_membership_cutoff
33 #end if
34 -centrotypesOut $centrotypesOut
35 -simOut $simOut
36 -micOut $micOut
37 -mspOut $mspOut
38 -classOut $classOut
39 -outReport $htmlReportFile
40 -outReportPicturesPath $htmlReportFile.files_path
41 #if $clusteringType.type == "fuzzyCMeans"
42 -fcmMembershipWeightingExponent $clusteringType.fcmMembershipWeightingExponent
43 -fcmStopCriterion $clusteringType.fcmStopCriterion
44 -fcmCorrelationWeight $clusteringType.fcmCorrelationWeight
45 -fcmFinalAssemblyType $clusteringType.finalClusterAssembly.type
46 #if $clusteringType.finalClusterAssembly.type == "membershipBased"
47 -fcmMembershipCutoff $clusteringType.finalClusterAssembly.fcmMembershipCutoff
48 #end if
49 #end if
50 -verbose "false"
51 #if $advancedSettings.settings == True
52 -advancedSettings YES
53 -saturationLimit $advancedSettings.saturationLimit
54 -sampleSelectionSortType $advancedSettings.sampleSelectionSortType
55 -simSelectionAlgorithm $advancedSettings.simSelectionAlgorithm
56 -simMassFilter "$advancedSettings.simMassFilter"
57 -simMembershipThreshold $advancedSettings.simMembershipThreshold
58 -simSaturationThreshold $advancedSettings.simSaturationThreshold
59 -simAbsenseThreshold $advancedSettings.simAbsenseThreshold
60 -micMembershipThreshold $advancedSettings.micMembershipThreshold
61 -peakIntensityCorrectionAlgorithm $advancedSettings.peakIntensityCorrectionAlgorithm
62 #else
63 -advancedSettings YES
64 -sampleSelectionSortType SIM_INTENSITY
65 -peakIntensityCorrectionAlgorithm CORRELATION_BASED
66 #end if
67
68 </command>
69 <inputs>
70
71 <param name="inputPeaks" type="data" format="txt" label="Ion-wise aligned data (e.g. MetAlign or XCMS/metaMS output data)" />
72 <param name="plInputFormat" type="select" size="30" label="Data format">
73 <option value="metalign" selected="true">MetAlign</option>
74 <option value="xcms">XCMS/metaMS (beta)</option>
75 </param>
76 <param name="dataType" type="select" size="30" label="Data type">
77 <option value="gcms" selected="true">GC-MS</option>
78 <option value="lcms">LC-MS</option>
79 </param>
80 <conditional name="imputationMethod">
81 <param name="type" type="select" size="30" label="Select the approach used for imputing missing values (optional)" help="select how you generated the values to fill in the data gaps">
82 <option value="none" >none</option>
83 <option value="metot" selected="true">MeTot</option>
84 <option value="valueRange">Values range</option>
85 </param>
86 <when value="valueRange">
87 <param name="rangeUpperLimit" type="integer" size="10" value="0" label="Range upper limit" help="values up to this limit will be considered 'generated' values" />
88 </when>
89 <when value="metot">
90 </when>
91 <when value="none">
92 </when>
93 </conditional>
94 <conditional name="potDensFuncType">
95 <param name="type" type="select" size="30" label="Select PD function type =====================================================">
96 <option value="original" selected="true">Original</option>
97 </param>
98 <when value="original">
99 <param name="pdf_neighborhoodWindowSize" type="integer" size="10" value="200" label="Effective Peaks" />
100 <conditional name="rt_dist_tol_unit">
101 <param name="type" type="select" size="30" label="Peak time unit">
102 <option value="1" selected="true">scan nr (MetAlign)</option>
103 <option value="2" >(average) micro minutes (MetAlign)</option>
104 <option value="3" >(average) minutes (XCMS)</option>
105 </param>
106 <when value="1">
107 <param name="pdf_rt_toler" type="float" size="10" value="10" label="Peak Width, in scans" />
108 </when>
109 <when value="2">
110 <param name="pdf_rt_toler" type="float" size="10" value="100000" label="Peak Width, in micro minutes" help="e.g. 100,000=6 seconds" />
111 </when>
112 <when value="3">
113 <param name="pdf_rt_toler" type="float" size="10" value="0.1" label="Peak Width, in minutes" help="e.g. 0.1=6 seconds" />
114 </when>
115 </conditional>
116 <param name="pdf_scan_conf" type="float" size="10" value="80" label="Peak Width confidence (0.0 to 99.99)" help="example: 0[no confidence]...50[good guess]...99.9[quite certain])" />
117 <param name="pdf_pears_treshold" type="float" size="10" value="0.8" label="Correlation threshold (0.0 - 1.0)" />
118 <param name="pdf_pears_conf" type="float" size="10" value="98.0" label="Correlation threshold confidence (0.0 to 99.99)" help="example: 0[no confidence]...50[good guess]...99.9[quite certain])" />
119 </when>
120 </conditional>
121 <conditional name="centerSelectionType">
122 <param name="type" type="select" label="Initial Centers selection type ==================================================" >
123 <option value="original" selected="true">Original - Subtractive potential reductions with stop criterion and REUSE tolerances (from PD function)</option>
124 </param>
125 <when value="original">
126 <param name="cs_pears_pd_reductionTreshold" type="float" size="10" value="0.8" label="Potential Density reduction (0.0 - 1.0)" />
127 <param name="cs_pears_pd_reductionSlope" type="float" size="10" value="0.01" label="Potential Density reduction softness " />
128 <param name="cs_stop_criterion" type="float" size="10" value="2" label="Stop Criterion " />
129 </when>
130 </conditional>
131 <conditional name="clusteringType">
132 <param name="type" type="select" label="Classify using ===========================================================">
133 <option value="original" selected="true">Original - Fuzzy clustering, keep original centers and REUSE (scan distance) tolerances</option>
134 <option value="fuzzyCMeans">(experimental) Fuzzy C-Means - Fuzzy clustering, optimize centers</option>
135 </param>
136 <when value="original">
137 <param name="clust_membership_cutoff" type="float" size="10" value=""
138 label="Membership cutoff (0.0 - 1.0)"
139 help="Items with membership below this value are NOT added to the cluster"/>
140 </when>
141 <!-- one idea would be to have clustering specific tolerance values, not reusing the centrotype selection ones
142 <when value="originalNewTol">
143 <param name="clust_scan_toler" type="float" size="10" value="10" label="Peak Width, in scans" />
144 <param name="clust_scan_slope" type="float" size="10" value="2" label="Peak Width margin softness" />
145 </when>
146 -->
147 <when value="fuzzyCMeans">
148 <param name="fcmMembershipWeightingExponent" type="float" size="10" value="2.0" label="Membership Weighting Exponent" help="Influences cluster center repositioning in the iterations 1.1 (exploratory) to around 3.0 (conservative)" />
149 <param name="fcmStopCriterion" type="float" size="10" value="0.05" label="Stop Criterion" help="When convergence is 'reached' (e.g. 0.05 means memberships only changed with 5% in last iteration)" />
150 <param name="fcmCorrelationWeight" type="float" size="10" value="2" label="Correlation weight factor" help="Increase this if you think the correlation is reliable (e.g. you have a high number of samples)" />
151 <conditional name="finalClusterAssembly">
152 <param name="type" type="select" label="Final cluster assembly" >
153 <option value="original" selected="true">Original - distance based</option>
154 <option value="membershipBased">Membership based</option>
155 </param>
156 <when value="membershipBased">
157 <param name="fcmMembershipCutoff" type="select" label="Maximum allowed peak overlap" >
158 <option value="0.05" >~7 clusters</option>
159 <option value="0.10" >~5 clusters</option>
160 <option value="0.20" >~3 clusters</option>
161 </param>
162 </when>
163 <when value="original">
164 <!-- nothing -->
165 </when>
166 </conditional>
167 </when>
168 </conditional>
169
170 <param name="summaryReport" type="boolean" checked="true" label="Generate summary report" help="NB: this will increase the processing time (in some cases up to a few extra minutes)"/>
171
172 <conditional name="advancedSettings">
173 <param name="settings" type="boolean" truevalue="Yes" falsevalue="No" checked="false" label="Advanced settings ========================================================"/>
174 <when value="Yes">
175 <param name="saturationLimit" optional="true" type="integer" size="10" label="Saturation limit (optional)" help="fill in if you have saturation problems in your data" />
176 <param name="sampleSelectionSortType" type="select" label="Sample selection scheme for spectrum peak intensity correction algorithm (optional/experimental)" help="The intensity values to use to select the samples for each cluster/metabolite in which it is most intense/abundant. These samples are used in the peak intensity correction (see parameter below). Use this option to try to avoid samples that have insufficient signal or saturation." >
177 <option value="None">None</option>
178 <!-- in order of best FORWARD scoring when tested on /test/data/report_test_sets/(P2) Relative peak heights in spectra/Input (Test set 1) -->
179 <option value="SIM_INTENSITY" selected="true">SIM intensities</option>
180 <option value="MAX_INTENSITY">Maximum intensities</option>
181 <option value="CENTROTYPE_INTENSITY">Centrotype peak intensities</option>
182 <option value="MIC_INTENSITY">MIC intensities</option>
183 </param>
184 <param name="peakIntensityCorrectionAlgorithm" type="select" label="Spectrum peak intensity correction algorithm (optional/experimental)" help="Whether spectrum peak heights should be adjusted according to their membership to the cluster or to their correlation to the cluster's centrotype ion" >
185 <option value="MEMBERSHIP_BASED">Membership based (msclust 1.0 mode)</option>
186 <option value="CORRELATION_BASED" selected="true">Correlation based</option>
187 </param>
188 <param name="simSelectionAlgorithm" type="select" label="SIM selection algorithm (experimental)" help="Set this if you want to deviate from the standard which is: allow shared SIM peaks for GC-MS data, and force unique SIM peaks for LC-MS data">
189 <option value="" selected="true"></option>
190 <option value="uniqueSIM">Unique SIM peak</option>
191 <option value="sharedSIM">Shared SIM peak</option>
192 </param>
193 <param name="simMassFilter" type="text" optional="true" size="30" label="SIM mass exclusion list" help="Comma-separated list of masses NOT to use as SIM peaks. E.g. '73,147,...' " />
194 <param name="simMembershipThreshold" optional="true" type="float" size="10" label="SIM membership threshold" help="Minimum membership a peak should have to qualify as a SIM candidate. E.g. 0.8 " />
195 <param name="simSaturationThreshold" optional="true" type="float" size="10" label="SIM saturation threshold (%)" help="Maximum % of samples in which a SIM candidate peak may be saturated. If the candidate peak exceeds this threshold, then another peak is chosen. If no peak can be found this criteria, mass 0 is reported" />
196 <param name="simAbsenseThreshold" optional="true" type="float" size="10" label="SIM absence threshold (%)" help="Maximum % of samples in which a SIM candidate peak may be absent. If the candidate peak exceeds this threshold, then another peak is chosen. If no peak can be found meeting this criteria, mass 0 is reported" />
197
198 <param name="micMembershipThreshold" optional="true" type="float" size="10" label="MIC membership threshold" help="Minimum membership a peak should have to be counted in the MIC sum. E.g. 0.8 " />
199
200 </when>
201 <when value="No">
202 </when>
203 </conditional>
204
205
206 </inputs>
207 <outputs>
208 <data name="centrotypesOut" format="msclust.csv" label="${tool.name} on ${on_string} - centrotypes file"/>
209 <data name="simOut" format="msclust.csv" label="${tool.name} on ${on_string} - SIM file"/>
210 <data name="micOut" format="msclust.csv" label="${tool.name} on ${on_string} - MIC file"/>
211 <data name="mspOut" format="msp" label="${tool.name} on ${on_string} - SPECTRA file"/>
212 <data name="classOut" format="msclust.csv" label="${tool.name} on ${on_string} - Classification file"/>
213 <data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - HTML report">
214 <!-- If the expression is false, the file is not created -->
215 <filter>( summaryReport == True )</filter>
216 </data>
217 </outputs>
218 <tests>
219 <!-- find out how to use -->
220 </tests>
221 <help>
222
223 <!-- see also http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#hyperlink-targets -->
224
225 .. class:: infomark
226
227 This tool extracts spectra from ion-wise aligned MS(/MS) results. It uses expression profiles and
228 retention times of the putative ions to cluster them. Each cluster is then used to generate
229 one spectrum containing the clustered ions (peaks).
230
231 .. image:: msclust_summary.png
232
233
234 -----
235
236 **Input**
237
238 The input file should contain the following columns (in this order), followed by the sample intensity columns (one column with the
239 intensity value for each sample):
240
241 *ScanNR*
242
243 *Ret(umin)*
244
245 *Mass(uD)*
246
247 *(Optional)retentionMean*
248
249 *(only required if retentionMean is present)retentionSD*
250
251 *N sample intensity columns...*
252
253
254 -----
255
256 **Output**
257
258 This tools returns a number of ouptut files and a small report.
259
260 **Parameters index**
261
262
263 *Select the approach used for imputing missing values:* only select this if you have used a specific method to
264 fill in the data gaps in the input file. One example is replacing zero values by some randomly generated low value.
265 If MeTot is chosen, then a value is considered generated if: the value contains a dot '.' and some number
266 other than 0 (zero) after the dot.
267
268 *Effective Peaks:* Neighborhood window size to consider when calculating density. Smaller values increase
269 performance but are less reliable.
270
271 *Peak Width, in scans:* Scan window width of scans to consider 'close'. One can see this as the
272 'tolerated variation in scans' for the apex positions of the fragment peaks composing a cluster.
273 Note: if MetAlign was used, this is the variation *after* pre-processing by MetAlign.
274
275 *Peak Width confidence:* The higher the confidence, the stricter the threshold.
276
277 *Correlation threshold (0.0 - 1.0):* Tolerance center for pearson distance calculation. The higher this value,
278 the higher the correlation between 2 items has to be for them to be considered 'close'.
279
280 *Correlation threshold confidence:* The higher the confidence, the stricter the threshold. `More...`__
281
282 *Potential Density reduction (0.0 - 1.0):* Reduction tolerance center for pearson distance calculation.
283 The higher this value, the less the low correlated items get reduced, getting a chance to form a cluster of their own.
284
285 *Potential Density reduction softness:* Reduction curve slope for pearson distance tolerance. Lower
286 values = stricter separation at the value determined in 'Potential Density reduction' above
287 (TODO review this comment).
288
289 *Stop Criterion:* When to stop reducing and looking for new clusters. Lower values = more iterations
290
291 .. __: javascript:window.open('.. image:: confidence_and_slope_params_explain.png'.replace('.. image:: ', ''),'popUpWindow','height=700,width=800,left=10,top=10,resizable=yes,scrollbars=yes,toolbar=yes,menubar=no,location=no,directories=no,status=yes')
292
293
294 -----
295
296 **Output files described below**
297
298 -----
299
300 *SPECTRA:* this file can be submitted to NIST for identification of the spectra.
301
302 `Click here for more details on the Sample selection and Spectrum peak intensity correction algorithm parameters related to SPECTRA generation`_
303
304 .. _Click here for more details on the Sample selection and Spectrum peak intensity correction algorithm parameters related to SPECTRA generation: javascript:window.open('.. image:: sample_sel_and_peak_height_correction.png'.replace('.. image:: ', ''),'popUpWindow','height=700,width=800,left=10,top=10,resizable=yes,scrollbars=yes,toolbar=yes,menubar=no,location=no,directories=no,status=yes')
305
306 -----
307
308 *MIC:* stands for Measured Ions Count -> it contains, for each cluster, the sum of the ion count
309 values (corrected by their membership) for all MEASURED cluster ions in the given sample.
310
311 The MIC for a **cluster i** in **sample s**, where **cluster i** has **n** members is thus:
312
313 sum ( [intensity of member n in **sample s**] x [membership value of member n in **cluster i** ] )
314
315 -----
316
317 *SIM:* stands for Selective Ion Mode -> it contains, for each cluster, the intensity values of the
318 most representative member ion peak of this cluster. The most representative member peak is the one with the
319 highest membership*average_intensity. This definition leads to conflicts as a peak can have a
320 membership in two or more clusters. The assignment of a SIM peak to a cluster depends on
321 the configured data type (LC or GC-MS). NB: this can be overruled in the "advanced settings":
322
323 (1) LC-MS SIM: select SIM peak only once and for the centrotype in which this specific mass has its
324 highest membership; for neighboring centrotypes use its "second best SIM", etcetera. In other words,
325 if the SIM peak has been identified as the SIM in more than 1 cluster, assign as SIM to the cluster
326 with highest membership. Continue searching for other SIM peaks to assign to the other clusters until
327 all ambiguities are solved.
328
329 (2) GC-MS SIM: the SIM peak can be "shared" by multiple clusters. However, the intensity values are corrected
330 by the membership value of the peak in the cluster in case the SIM peak is "shared". If the SIM peak is not
331 "shared" then the "raw" intensity values of the SIM peak are recorded in the SIM file.
332
333 `Click here for more details on the SIM output file`_
334
335 .. _Click here for more details on the SIM output file: javascript:window.open('.. image:: sample_SIM.png'.replace('.. image:: ', ''),'popUpWindow','height=700,width=800,left=10,top=10,resizable=yes,scrollbars=yes,toolbar=yes,menubar=no,location=no,directories=no,status=yes')
336
337
338 **References**
339
340 If you use this Galaxy tool in work leading to a scientific publication please
341 cite the following papers:
342
343 Y. M. Tikunov, S. Laptenok, R. D. Hall, A. Bovy, and R. C. H. de Vos (2012).
344 MSClust: a tool for unsupervised mass spectra extraction of
345 chromatography-mass spectrometry ion-wise aligned data
346 http://dx.doi.org/10.1007%2Fs11306-011-0368-2
347
348 <citations>
349 <citation type="doi">10.1007%2Fs11306-011-0368-2</citation> <!-- example
350 see also https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set
351 -->
352 </citations>
353
354
355 </help>
356 </tool>