comparison msclust.xml @ 3:2c1c9f0d8aa6

using normal versioning
author pieter.lukasse@wur.nl
date Fri, 17 Jan 2014 12:39:28 +0100
parents
children 80075a4c6543
comparison
equal deleted inserted replaced
2:a35b55bfe96c 3:2c1c9f0d8aa6
1 <tool name="MsClust" id="msclust2" version="2.0.2">
2 <description>Extracts fragmentation spectra from aligned data</description>
3 <!--
4 For remote debugging start you listener on port 8000 and use the following as command interpreter:
5 java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000
6 //////////////////////////
7
8 TODO in command below: add conditionals according to options of using or NOT the tolerances/thresholds from previous steps
9 -->
10 <command interpreter="java -jar ">
11 MsClust.jar
12 -peaksFileName $inputPeaks
13 -dataType $dataType
14 -imputationMethod $imputationMethod.type
15 #if $imputationMethod.type == "valueRange"
16 -rangeUpperLimit $imputationMethod.rangeUpperLimit
17 #end if
18 -plInputFormat "metalign"
19 -potDensFuncType $potDensFuncType.type
20 -centerSelectionType $centerSelectionType.type
21 -clusteringType $clusteringType.type
22 -neighborhoodWindowSize $potDensFuncType.pdf_neighborhoodWindowSize
23 -clusterSearchStopCriterium $centerSelectionType.cs_stop_criterion
24 -pearsonDistTreshold $potDensFuncType.pdf_pears_treshold
25 -pearsonTresholdConfidence $potDensFuncType.pdf_pears_conf
26 -pearsonPDReductionThreshold $centerSelectionType.cs_pears_pd_reductionTreshold
27 -pearsonPDReductionSlope $centerSelectionType.cs_pears_pd_reductionSlope
28 -scanDistTol $potDensFuncType.pdf_scan_toler
29 -scanDistanceConfidence $potDensFuncType.pdf_scan_conf
30 -centrotypesOut $centrotypesOut
31 -simOut $simOut
32 -micOut $micOut
33 -mspOut $mspOut
34 -classOut $classOut
35 -outReport $htmlReportFile
36 -outReportPicturesPath $htmlReportFile.files_path
37 #if $clusteringType.type == "fuzzyCMeans"
38 -fcmMembershipWeightingExponent $clusteringType.fcmMembershipWeightingExponent
39 -fcmStopCriterion $clusteringType.fcmStopCriterion
40 -fcmCorrelationWeight $clusteringType.fcmCorrelationWeight
41 -fcmFinalAssemblyType $clusteringType.finalClusterAssembly.type
42 #if $clusteringType.finalClusterAssembly.type == "membershipBased"
43 -fcmMembershipCutoff $clusteringType.finalClusterAssembly.fcmMembershipCutoff
44 #end if
45 #end if
46 -verbose "false"
47 #if $advancedSettings.settings == True
48 -advancedSettings YES
49 -saturationLimit $advancedSettings.saturationLimit
50 -sampleSelectionSortType $advancedSettings.sampleSelectionSortType
51 -simSelectionAlgorithm $advancedSettings.simSelectionAlgorithm
52 -simMassFilter "$advancedSettings.simMassFilter"
53 -simMembershipThreshold $advancedSettings.simMembershipThreshold
54 -simSaturationThreshold $advancedSettings.simSaturationThreshold
55 -simAbsenseThreshold $advancedSettings.simAbsenseThreshold
56 -micMembershipThreshold $advancedSettings.micMembershipThreshold
57 -peakIntensityCorrectionAlgorithm $advancedSettings.peakIntensityCorrectionAlgorithm
58 #else
59 -advancedSettings YES
60 -sampleSelectionSortType SIM_INTENSITY
61 -peakIntensityCorrectionAlgorithm CORRELATION_BASED
62 #end if
63
64 </command>
65 <inputs>
66 <!-- <param name="rankingWeightConfig" type="text" area="true" size="11x70" label="NB - TEST VERSION"
67 value="VERSION BEING TESTED AT THIS MOMENT...NOT READY FOR USE..."/>
68 -->
69 <param name="inputPeaks" type="data" format="txt" label="Ion-wise aligned data (e.g. MetAlign output data)" />
70 <param name="dataType" type="select" size="30" label="Data type">
71 <option value="gcms" selected="true">GC-MS</option>
72 <option value="lcms">LC-MS</option>
73 </param>
74 <conditional name="imputationMethod">
75 <param name="type" type="select" size="30" label="Select the approach used for imputing missing values (optional)" help="select how you generated the values to fill in the data gaps">
76 <option value="none" >none</option>
77 <option value="metot" selected="true">MeTot</option>
78 <option value="valueRange">Values range</option>
79 </param>
80 <when value="valueRange">
81 <param name="rangeUpperLimit" type="integer" size="10" value="0" label="Range upper limit" help="values up to this limit will be considered 'generated' values" />
82 </when>
83 </conditional>
84 <conditional name="potDensFuncType">
85 <param name="type" type="select" size="30" label="Select PD function type =====================================================">
86 <option value="original" selected="true">Original</option>
87 </param>
88 <when value="original">
89 <param name="pdf_neighborhoodWindowSize" type="integer" size="10" value="200" label="Effective Peaks" />
90 <param name="pdf_scan_toler" type="float" size="10" value="10" label="Peak Width, in scans" />
91 <param name="pdf_scan_conf" type="float" size="10" value="80" label="Peak Width confidence (0.0 to 99.99)" help="example: 0[no confidence]...50[good guess]...99.9[quite certain])" />
92 <param name="pdf_pears_treshold" type="float" size="10" value="0.8" label="Correlation threshold (0.0 - 1.0)" />
93 <param name="pdf_pears_conf" type="float" size="10" value="98.0" label="Correlation threshold confidence (0.0 to 99.99)" help="example: 0[no confidence]...50[good guess]...99.9[quite certain])" />
94 </when>
95 </conditional>
96 <conditional name="centerSelectionType">
97 <param name="type" type="select" label="Initial Centers selection type ==================================================" >
98 <option value="original" selected="true">Original - Subtractive potential reductions with stop criterion and REUSE tolerances (from PD function)</option>
99 </param>
100 <when value="original">
101 <param name="cs_pears_pd_reductionTreshold" type="float" size="10" value="0.8" label="Potential Density reduction (0.0 - 1.0)" />
102 <param name="cs_pears_pd_reductionSlope" type="float" size="10" value="0.01" label="Potential Density reduction softness " />
103 <param name="cs_stop_criterion" type="float" size="10" value="2" label="Stop Criterion " />
104 </when>
105 </conditional>
106 <conditional name="clusteringType">
107 <param name="type" type="select" label="Classify using ===========================================================">
108 <option value="original" selected="true">Original - Fuzzy clustering, keep original centers and REUSE (scan distance) tolerances</option>
109 <option value="fuzzyCMeans">(experimental) Fuzzy C-Means - Fuzzy clustering, optimize centers</option>
110 </param>
111 <when value="original">
112 <!-- nothing -->
113 </when>
114 <when value="originalNewTol">
115 <param name="clust_scan_toler" type="float" size="10" value="10" label="Peak Width, in scans" />
116 <param name="clust_scan_slope" type="float" size="10" value="2" label="Peak Width margin softness" />
117 </when>
118 <when value="fuzzyCMeans">
119 <param name="fcmMembershipWeightingExponent" type="float" size="10" value="2.0" label="Membership Weighting Exponent" help="Influences cluster center repositioning in the iterations 1.1 (exploratory) to around 3.0 (conservative)" />
120 <param name="fcmStopCriterion" type="float" size="10" value="0.05" label="Stop Criterion" help="When convergence is 'reached' (e.g. 0.05 means memberships only changed with 5% in last iteration)" />
121 <param name="fcmCorrelationWeight" type="float" size="10" value="2" label="Correlation weight factor" help="Increase this if you think the correlation is reliable (e.g. you have a high number of samples)" />
122 <conditional name="finalClusterAssembly">
123 <param name="type" type="select" label="Final cluster assembly" >
124 <option value="original" selected="true">Original - distance based</option>
125 <option value="membershipBased">Membership based</option>
126 </param>
127 <when value="membershipBased">
128 <param name="fcmMembershipCutoff" type="select" label="Maximum allowed peak overlap" >
129 <option value="0.05" >~7 clusters</option>
130 <option value="0.10" >~5 clusters</option>
131 <option value="0.20" >~3 clusters</option>
132 </param>
133 </when>
134 <when value="original">
135 <!-- nothing -->
136 </when>
137 </conditional>
138 </when>
139 </conditional>
140
141 <param name="summaryReport" type="boolean" checked="true" label="Generate summary report" help="NB: this will increase the processing time (in some cases up to a few extra minutes)"/>
142
143 <conditional name="advancedSettings">
144 <param name="settings" type="boolean" truevalue="Yes" falsevalue="No" checked="false" label="Advanced settings ========================================================"/>
145 <when value="Yes">
146 <param name="saturationLimit" optional="true" type="integer" size="10" label="Saturation limit (optional)" help="fill in if you have saturation problems in your data" />
147 <param name="sampleSelectionSortType" type="select" label="Sample selection scheme for spectrum peak intensity correction algorithm (optional/experimental)" help="The intensity values to use to select the samples for each cluster/metabolite in which it is most intense/abundant. These samples are used in the peak intensity correction (see parameter below). Use this option to try to avoid samples that have insufficient signal or saturation." >
148 <option value="None">None</option>
149 <!-- in order of best FORWARD scoring when tested on /test/data/report_test_sets/(P2) Relative peak heights in spectra/Input (Test set 1) -->
150 <option value="SIM_INTENSITY" selected="true">SIM intensities</option>
151 <option value="MAX_INTENSITY">Maximum intensities</option>
152 <option value="CENTROTYPE_INTENSITY">Centrotype peak intensities</option>
153 <option value="MIC_INTENSITY">MIC intensities</option>
154 </param>
155 <param name="peakIntensityCorrectionAlgorithm" type="select" label="Spectrum peak intensity correction algorithm (optional/experimental)" help="Whether spectrum peak heights should be adjusted according to their membership to the cluster or to their correlation to the cluster's centrotype ion" >
156 <option value="MEMBERSHIP_BASED">Membership based (msclust 1.0 mode)</option>
157 <option value="CORRELATION_BASED" selected="true">Correlation based</option>
158 </param>
159 <param name="simSelectionAlgorithm" type="select" label="SIM selection algorithm (experimental)" help="Set this if you want to deviate from the standard which is: allow shared SIM peaks for GC-MS data, and force unique SIM peaks for LC-MS data">
160 <option value="" selected="true"></option>
161 <option value="uniqueSIM">Unique SIM peak</option>
162 <option value="sharedSIM">Shared SIM peak</option>
163 </param>
164 <param name="simMassFilter" type="text" optional="true" size="30" label="SIM mass exclusion list" help="Comma-separated list of masses NOT to use as SIM peaks. E.g. '73,147,...' " />
165 <param name="simMembershipThreshold" optional="true" type="float" size="10" label="SIM membership threshold" help="Minimum membership a peak should have to qualify as a SIM candidate. E.g. 0.8 " />
166 <param name="simSaturationThreshold" optional="true" type="float" size="10" label="SIM saturation threshold (%)" help="Maximum % of samples in which a SIM candidate peak may be saturated. If the candidate peak exceeds this threshold, then another peak is chosen. If no peak can be found this criteria, mass 0 is reported" />
167 <param name="simAbsenseThreshold" optional="true" type="float" size="10" label="SIM absence threshold (%)" help="Maximum % of samples in which a SIM candidate peak may be absent. If the candidate peak exceeds this threshold, then another peak is chosen. If no peak can be found meeting this criteria, mass 0 is reported" />
168
169 <param name="micMembershipThreshold" optional="true" type="float" size="10" label="MIC membership threshold" help="Minimum membership a peak should have to be counted in the MIC sum. E.g. 0.8 " />
170
171 </when>
172 </conditional>
173
174
175 </inputs>
176 <outputs>
177 <data name="centrotypesOut" format="msclust.csv" label="${tool.name} on ${on_string} - centrotypes file"/>
178 <data name="simOut" format="msclust.csv" label="${tool.name} on ${on_string} - SIM file"/>
179 <data name="micOut" format="msclust.csv" label="${tool.name} on ${on_string} - MIC file"/>
180 <data name="mspOut" format="msp" label="${tool.name} on ${on_string} - SPECTRA file"/>
181 <data name="classOut" format="msclust.csv" label="${tool.name} on ${on_string} - Classification file"/>
182 <data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - HTML report">
183 <!-- If the expression is false, the file is not created -->
184 <filter>( summaryReport == True )</filter>
185 </data>
186 </outputs>
187 <tests>
188 <!-- find out how to use -->
189 </tests>
190 <help>
191
192 <!-- see also http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#hyperlink-targets -->
193
194 .. class:: infomark
195
196 This tool extracts spectra from ion-wise aligned MS(/MS) results. It uses expression profiles and
197 retention times of the putative ions to cluster them. Each cluster is then used to generate
198 one spectrum containing the clustered ions (peaks).
199
200 .. image:: $PATH_TO_IMAGES/msclust_summary.png
201
202
203 -----
204
205 **Output**
206
207 This tools returns a number of ouptut files and a small report.
208
209 **Parameters index**
210
211
212 *Select the approach used for imputing missing values:* only select this if you have used a specific method to
213 fill in the data gaps in the input file. One example is replacing zero values by some randomly generated low value.
214 If MeTot is chosen, then a value is considered generated if: the value contains a dot '.' and some number
215 other than 0 (zero) after the dot.
216
217 *Effective Peaks:* Neighborhood window size to consider when calculating density. Smaller values increase
218 performance but are less reliable.
219
220 *Peak Width, in scans:* Scan window width of scans to consider 'close'. One can see this as the
221 'tolerated variation in scans' for the apex positions of the fragment peaks composing a cluster.
222 Note: if MetAlign was used, this is the variation *after* pre-processing by MetAlign.
223
224 *Peak Width confidence:* The higher the confidence, the stricter the threshold.
225
226 *Correlation threshold (0.0 - 1.0):* Tolerance center for pearson distance calculation. The higher this value,
227 the higher the correlation between 2 items has to be for them to be considered 'close'.
228
229 *Correlation threshold confidence:* The higher the confidence, the stricter the threshold. `More...`__
230
231 *Potential Density reduction (0.0 - 1.0):* Reduction tolerance center for pearson distance calculation.
232 The higher this value, the less the low correlated items get reduced, getting a chance to form a cluster of their own.
233
234 *Potential Density reduction softness:* Reduction curve slope for pearson distance tolerance. Lower
235 values = stricter separation at the value determined in 'Potential Density reduction' above
236 (TODO review this comment).
237
238 *Stop Criterion:* When to stop reducing and looking for new clusters. Lower values = more iterations
239
240 .. __: javascript:window.open('$PATH_TO_IMAGES/confidence_and_slope_params_explain.png','popUpWindow','height=700,width=800,left=10,top=10,resizable=yes,scrollbars=yes,toolbar=yes,menubar=no,location=no,directories=no,status=yes')
241
242
243 -----
244
245 **Output files described below**
246
247 -----
248
249 *SPECTRA:* this file can be submitted to NIST for identification of the spectra.
250
251 `Click here for more details on the Sample selection and Spectrum peak intensity correction algorithm parameters related to SPECTRA generation`_
252
253 .. _Click here for more details on the Sample selection and Spectrum peak intensity correction algorithm parameters related to SPECTRA generation: javascript:window.open('$PATH_TO_IMAGES/sample_sel_and_peak_height_correction.png','popUpWindow','height=700,width=800,left=10,top=10,resizable=yes,scrollbars=yes,toolbar=yes,menubar=no,location=no,directories=no,status=yes')
254
255 -----
256
257 *MIC:* stands for Measured Ions Count -> it contains, for each cluster, the sum of the ion count
258 values (corrected by their membership) for all MEASURED cluster ions in the given sample.
259
260 The MIC for a **cluster i** in **sample s**, where **cluster i** has **n** members is thus:
261
262 sum ( [intensity of member n in **sample s**] x [membership value of member n in **cluster i** ] )
263
264 -----
265
266 *SIM:* stands for Selective Ion Mode -> it contains, for each cluster, the intensity values of the
267 most representative member ion peak of this cluster. The most representative member peak is the one with the
268 highest membership*average_intensity. This definition leads to conflicts as a peak can have a
269 membership in two or more clusters. The assignment of a SIM peak to a cluster depends on
270 the configured data type (LC or GC-MS). NB: this can be overruled in the "advanced settings":
271
272 (1) LC-MS SIM: select SIM peak only once and for the centrotype in which this specific mass has its
273 highest membership; for neighboring centrotypes use its "second best SIM", etcetera. In other words,
274 if the SIM peak has been identified as the SIM in more than 1 cluster, assign as SIM to the cluster
275 with highest membership. Continue searching for other SIM peaks to assign to the other clusters until
276 all ambiguities are solved.
277
278 (2) GC-MS SIM: the SIM peak can be "shared" by multiple clusters. However, the intensity values are corrected
279 by the membership value of the peak in the cluster in case the SIM peak is "shared". If the SIM peak is not
280 "shared" then the "raw" intensity values of the SIM peak are recorded in the SIM file.
281
282 `Click here for more details on the SIM output file`_
283
284 .. _Click here for more details on the SIM output file: javascript:window.open('$PATH_TO_IMAGES/sample_SIM.png','popUpWindow','height=700,width=800,left=10,top=10,resizable=yes,scrollbars=yes,toolbar=yes,menubar=no,location=no,directories=no,status=yes')
285
286
287
288 </help>
289 </tool>