comparison dia_umpire_se.xml @ 0:22a1fa7d9d6a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dia_umpire commit 2379480213ba2e084a93bf82052fac858ffd074f
author galaxyp
date Mon, 04 Mar 2019 11:50:10 -0500
parents
children 2b785516abfc
comparison
equal deleted inserted replaced
-1:000000000000 0:22a1fa7d9d6a
1 <tool id="dia_umpire_se" name="DIA_Umpire_SE" version="@VERSION@.0">
2 <description>DIA signal extraction</description>
3 <macros>
4 <import>dia_umpire_macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <expand macro="stdio" />
8 <command>
9 <![CDATA[
10 #import re
11 ## want to save all outputs in a directory output.extra_files_path to be used by dia_umpire_quant
12 ## Is file naming going to be a problem? May need to have a name param
13 #if $se_extraction_data:
14 #set se_params = $se_ser
15 #set $ser_dir = $se_ser.extra_files_path
16 mkdir $ser_dir
17 && ln -s '$ser_dir' '$output_dir'
18 && cat $se_config > $se_ser
19 #else:
20 #set se_params = $params
21 mkdir '$output_dir'
22 && cat $se_config > $se_params
23 #end if
24 ##
25 && echo " " >> $se_params
26 && echo "Thread = \$GALAXY_SLOTS" >> $se_params
27 #if $input_prefix and len($input_prefix.strip()) > 0:
28 #set $input_path = str($output_dir) + '/' + $input_prefix.__str__ + '_rep' + str($i + 1) + '.mzXML'
29 #else:
30 #set $input_path = str($output_dir) + '/' + $re.sub('\.[mM]\w+$','',$re.sub('[^-a-zA-Z0-9_.]','_',$input.name)) + '.mzXML'
31 #end if
32 && ln -s '${input}' '$input_path'
33 && dia_umpire_se '$input_path' '$se_params'
34 && cat $output_dir/*.log >> "$logfile"
35 #if not $mgfs_as_collection:
36 && cp "$output_dir/"*_Q1.mgf '$q1_mgf'
37 && cp "$output_dir/"*_Q2.mgf '$q2_mgf'
38 && cp "$output_dir/"*_Q3.mgf '$q3_mgf'
39 #end if
40 #if $ExportPrecursorPeak:
41 && cp "$output_dir/"*PeakCluster.csv '$PrecursorPeak'
42 #end if
43 ]]>
44 </command>
45 <configfiles>
46 <configfile name="se_config"><![CDATA[#slurp
47 #DIA-Umpire (version @VERSION@)
48 #Data Independent Acquisition data processing and analysis package (Signal extraction module)
49
50 #import re
51 #if $input_prefix:
52 #set $input_path = $input_prefix.__str__ + "_rep" + str($i + 1) + ".mzXML"
53 #else:
54 #set $input_path = $re.sub('\.[mM]\w+$','',$re.sub('[^-a-zA-Z0-9_.]','_',$input.name)) + ".mzXML"
55 #end if
56 # $input.name $input_path $input
57
58 #No of threads
59 Thread = 6
60
61 #Report peak
62 ExportPrecursorPeak = $ExportPrecursorPeak
63 ExportFragmentPeak = $ExportFragmentPeak
64
65 #Signal extraction parameters
66 #if $instrument.model == 'Thermo_Orbitrap':
67 SE.MS1PPM = #if $instrument.SE_MS1PPM then $instrument.SE_MS1PPM else 5#
68 SE.MS2PPM = #if $instrument.SE_MS2PPM then $instrument.SE_MS2PPM else 5#
69 SE.Resolution = #if $instrument.SE_Resolution then $instrument.SE_Resolution else 17000#
70 SE.StartCharge = #if $instrument.SE_StartCharge then $instrument.SE_StartCharge else 2#
71 SE.EndCharge = #if $instrument.SE_EndCharge then $instrument.SE_EndCharge else 4#
72 SE.MS2StartCharge = #if $instrument.SE_MS2StartCharge then $instrument.SE_MS2StartCharge else 2#
73 SE.MS2EndCharge = #if $instrument.SE_MS2EndCharge then $instrument.SE_MS2EndCharge else 4#
74 #else if $instrument.model == 'AB_SCIEX_Triple_TOF_5600':
75 SE.MS1PPM = #if $instrument.SE_MS1PPM then $instrument.SE_MS1PPM else 30#
76 SE.MS2PPM = #if $instrument.SE_MS2PPM then $instrument.SE_MS2PPM else 40#
77 SE.Resolution = #if $instrument.SE_Resolution then $instrument.SE_Resolution else 17000#
78 SE.StartCharge = #if $instrument.SE_StartCharge then $instrument.SE_StartCharge else 2#
79 SE.EndCharge = #if $instrument.SE_EndCharge then $instrument.SE_EndCharge else 4#
80 SE.MS2StartCharge = #if $instrument.SE_MS2StartCharge then $instrument.SE_MS2StartCharge else 2#
81 SE.MS2EndCharge = #if $instrument.SE_MS2EndCharge then $instrument.SE_MS2EndCharge else 4#
82 #else:
83 SE.MS1PPM = #if $instrument.SE_MS1PPM then $instrument.SE_MS1PPM else 30#
84 SE.MS2PPM = #if $instrument.SE_MS2PPM then $instrument.SE_MS2PPM else 40#
85 SE.Resolution = #if $instrument.SE_Resolution then $instrument.SE_Resolution else 17000#
86 SE.StartCharge = #if $instrument.SE_StartCharge then $instrument.SE_StartCharge else 2#
87 SE.EndCharge = #if $instrument.SE_EndCharge then $instrument.SE_EndCharge else 4#
88 SE.MS2StartCharge = #if $instrument.SE_MS2StartCharge then $instrument.SE_MS2StartCharge else 2#
89 SE.MS2EndCharge = #if $instrument.SE_MS2EndCharge then $instrument.SE_MS2EndCharge else 4#
90 #end if
91
92 #if $frag_settings.advanced == 'yes':
93 #Fragment grouping parameters
94 RPmax = #if $frag_settings.RPmax then $frag_settings.RPmax else 25#
95 RFmax = #if $frag_settings.RFmax then $frag_settings.RFmax else 300#
96 CorrThreshold = #if $frag_settings.CorrThreshold then $frag_settings.CorrThreshold else 0.2#
97 DeltaApex = #if $frag_settings.DeltaApex then $frag_settings.DeltaApex else 0.6#
98 RTOverlap = #if $frag_settings.RTOverlap then $frag_settings.RTOverlap else 0.3#
99 AdjustFragIntensity = #if $frag_settings.AdjustFragIntensity then $frag_settings.AdjustFragIntensity else true#
100 BoostComplementaryIon = #if $frag_settings.BoostComplementaryIon then $frag_settings.BoostComplementaryIon else true#
101 #else:
102 #Fragment grouping parameters
103 RPmax = 25
104 RFmax = 300
105 CorrThreshold = 0.2
106 DeltaApex = 0.6
107 RTOverlap = 0.3
108 AdjustFragIntensity = true
109 BoostComplementaryIon = true
110 #end if
111
112 #if $se_settings.advanced == 'yes':
113 #Signal extraction parameters
114 SE.SN = #if $se_settings.SE_SN then $se_settings.SE_SN else 2#
115 SE.MS2SN = #if $se_settings.SE_MS2SN then $se_settings.SE_MS2SN else 2#
116 SE.MinMSIntensity = #if $se_settings.SE_MinMSIntensity then $se_settings.SE_MinMSIntensity else 10#
117 SE.MinMSMSIntensity = #if $se_settings.SE_MinMSMSIntensity then $se_settings.SE_MinMSMSIntensity else 10#
118 SE.MaxCurveRTRange = #if $se_settings.SE_MaxCurveRTRange then $se_settings.SE_MaxCurveRTRange else 1#
119 SE.NoMissedScan = #if $se_settings.SE_NoMissedScan then $se_settings.SE_NoMissedScan else 1#
120 SE.MinFrag = #if $se_settings.SE_MinFrag then $se_settings.SE_MinFrag else 10#
121 SE.EstimateBG = #if $se_settings.SE_EstimateBG then $se_settings.SE_EstimateBG else true#
122 SE.MinNoPeakCluster = #if $se_settings.SE_MinNoPeakCluster then $se_settings.SE_MinNoPeakCluster else 2#
123 SE.MaxNoPeakCluster = #if $se_settings.SE_MaxNoPeakCluster then $se_settings.SE_MaxNoPeakCluster else 4#
124 SE.StartRT = #if $se_settings.SE_StartRT then $se_settings.SE_MaxNoPeakCluster else 0#
125 SE.EndRT = #if $se_settings.SE_EndRT then $se_settings.SE_EndRT else 9999#
126 SE.MinMZ = #if $se_settings.SE_MinMZ then $se_settings.SE_MinMZ else 200#
127 SE.MinPrecursorMass = #if $se_settings.SE_MinPrecursorMass then $se_settings.SE_MinPrecursorMass else 700#
128 SE.MaxPrecursorMass = #if $se_settings.SE_MaxPrecursorMass then $se_settings.SE_MaxPrecursorMass else 5000#
129 SE.IsoPattern = #if $se_settings.SE_IsoPattern then $se_settings.SE_IsoPattern else 0.3#
130 SE.MassDefectFilter = #if $se_settings.SE_MassDefectFilter then $se_settings.SE_MassDefectFilter else true#
131 SE.MassDefectOffset = #if $se_settings.SE_MassDefectOffset then $se_settings.SE_MassDefectOffset else 0.1#
132 #if $se_settings.SE_MinMS2NoPeakCluster:
133 SE.MinMS2NoPeakCluster = $se_settings.SE_MinMS2NoPeakCluster
134 #end if
135 #if $se_settings.SE_MinRTRange:
136 SE.MinRTRange = $se_settings.SE_MinRTRange
137 #end if
138 #if $se_settings.SE_RTtol:
139 SE.RTtol = $se_settings.SE_RTtol
140 #end if
141 #if $se_settings.SE_Denoise:
142 SE.Denoise = $se_settings.SE_Denoise
143 #end if
144 #if $se_settings.SE_NoPeakPerMin:
145 SE.NoPeakPerMin = $se_settings.SE_NoPeakPerMin
146 #end if
147 #if $se_settings.SE_RemoveGroupedPeaks:
148 SE.RemoveGroupedPeaks = $se_settings.SE_RemoveGroupedPeaks
149 #end if
150 #else:
151 #Signal extraction parameters
152 SE.SN = 2
153 SE.MS2SN = 2
154 SE.MinMSIntensity = 5
155 SE.MinMSMSIntensity = 1
156 SE.MaxCurveRTRange = 1
157 SE.NoMissedScan = 1
158 SE.MinFrag=10
159 SE.EstimateBG = true
160 SE.MinNoPeakCluster = 2
161 SE.MaxNoPeakCluster = 3#slurp
162 #end if
163
164 ## SE.RemoveGroupedPeaksRTOverlap
165 ## SE.RemoveGroupedPeaksCorr
166 ## SE.IsoCorrThreshold
167
168 #if $instrument.model != 'Thermo_Orbitrap':
169 WindowType = $instrument.window.WindowType
170 #if $instrument.window.WindowType == 'SWATH':
171 WindowSize = #if $instrument.window.WindowSize then $instrument.window.WindowSize else 25#
172 #else if $instrument.window.WindowType == 'V_SWATH':
173 ==window setting begin
174 #if $instrument.window.window_list.window_list_src == 'history':
175 #set $fh = open(str($instrument.window.window_list.WindowListFile),'r')
176 #for $i,$line in enumerate($fh):
177 #set $fields = $line.split()
178 #if len($fields) >= 2:
179 #set $win = '\t'.join($fields[:2])
180 $win
181 #end if
182 #end for
183 $fh.close()
184 #else:
185 #set $win_list = $instrument.window.window_list.WindowList.split('\n')
186 #for $win in $win_list:
187 #set $row = '\t'.join($win.split())
188 $row
189 #end for
190 #end if
191 ==window setting end
192 #echo '#'
193 #end if
194 #end if
195 #slurp]]>
196 </configfile>
197 </configfiles>
198
199 <inputs>
200 <param name="input" type="data" format="mzxml" label="Proteomics Spectrum files in mzXML format"/>
201 <param name="input_prefix" type="text" value="" optional="true" label="File name prefix" help="Names inputs: prefix_rep#.mzXML Leave blank to use History names of input">
202 <validator type="regex" message="">[a-zA-Z][a-zA-Z0-9_-]*</validator>
203 </param>
204 <param name="output_dir" type="hidden" value="gx_path"/>
205
206 <conditional name="instrument">
207 <param name="model" type="select" label="instrument used" help="Sets default parameters">
208 <option value="Thermo_Orbitrap">Thermo Orbitrap</option>
209 <option value="AB_SCIEX_Triple_TOF_5600">AB SCIEX Triple TOF 5600</option>
210 <option value="other">other</option>
211 </param>
212 <when value="Thermo_Orbitrap">
213
214 <param name="SE_MS1PPM" type="float" value="5" min="1" max="20" optional="true" label="Maximum mass error for two MS1 peaks">
215 <help>
216 SE.MS1PPM: (Unit: ppm) Maximum mass error for two MS1 peaks in consecutive spectra to be considered signal of the same ion. Used in MS1 signal detection and precursor alignment between samples/runs.
217 Recommended value: Depends on the instrument. Typical values are 5-10 ppm for Thermo Orbitrap.
218 </help>
219 </param>
220 <param name="SE_MS2PPM" type="integer" value="5" min="1" max="20" optional="true" label="Maximum mass error for two MS2 peaks">
221 <help>
222 SE.MS2PPM: (Unit: ppm) Maximum mass error for two MS2 peaks in consecutive spectra to be considered signal of the same ion.
223 Recommended value: Depends on the instrument. If fragmentation spectra are measured with the same detector as MS1 spectra, set the same as Para.MS1PPM or a little higher, e.g. if you've set Para.MS1PPM=30 ppm for AB SCIEX Triple TOF 5600, consider setting to 40ppm.
224 </help>
225 </param>
226 <expand macro="common_se_params" />
227
228 </when>
229 <when value="AB_SCIEX_Triple_TOF_5600">
230 <param name="SE_MS1PPM" type="float" value="30" min="1" max="50" optional="true" label="Maximum mass error for two MS1 peaks">
231 <help>
232 SE.MS1PPM: (Unit: ppm) Maximum mass error for two MS1 peaks in consecutive spectra to be considered signal of the same ion. Used in MS1 signal detection and precursor alignment between samples/runs.
233 Recommended value: Depends on the instrument. Typical values are 20-40pm for AB SCIEX Triple TOF 5600.
234 </help>
235 </param>
236 <param name="SE_MS2PPM" type="integer" value="40" min="1" max="50" optional="true" label="Maximum mass error for two MS2 peaks">
237 <help>
238 SE.MS2PPM: (Unit: ppm) Maximum mass error for two MS2 peaks in consecutive spectra to be considered signal of the same ion.
239 Recommended value: Depends on the instrument. If fragmentation spectra are measured with the same detector as MS1 spectra, set the same as Para.MS1PPM or a little higher, e.g. if you've set Para.MS1PPM=30 ppm for AB SCIEX Triple TOF 5600, consider setting to 40ppm.
240 </help>
241 </param>
242 <expand macro="common_se_params" />
243 <expand macro="se_window_params" />
244 </when>
245 <when value="other">
246 <param name="SE_MS1PPM" type="float" value="30" min="1" max="100" optional="true" label="Maximum mass error for two MS1 peaks">
247 <help>
248 SE.MS1PPM: (Unit: ppm) Maximum mass error for two MS1 peaks in consecutive spectra to be considered signal of the same ion. Used in MS1 signal detection and precursor alignment between samples/runs.
249 Recommended value: Depends on the instrument. Typical values are 5-10 ppm for Thermo Orbitrap, 20-40pm for AB SCIEX Triple TOF 5600.
250 </help>
251 </param>
252 <param name="SE_MS2PPM" type="integer" value="40" min="1" max="100" optional="true" label="Maximum mass error for two MS2 peaks">
253 <help>
254 SE.MS2PPM: (Unit: ppm) Maximum mass error for two MS2 peaks in consecutive spectra to be considered signal of the same ion.
255 Recommended value: Depends on the instrument. If fragmentation spectra are measured with the same detector as MS1 spectra, set the same as Para.MS1PPM or a little higher, e.g. if you've set Para.MS1PPM=30 ppm for AB SCIEX Triple TOF 5600, consider setting to 40ppm.
256 </help>
257 </param>
258 <expand macro="common_se_params" />
259 <expand macro="se_window_params" />
260 </when>
261 </conditional>
262
263 <conditional name="frag_settings">
264 <param name="advanced" type="select" label="Advanced Precursor-fragment Grouping Settings" help="Usually do not need to be changed">
265 <option value="no">no</option>
266 <option value="yes">yes</option>
267 </param>
268 <when value="no"/>
269 <when value="yes">
270 <param name="RPmax" type="integer" value="25" optional="true" min="1" label="RPmax">
271 <help>
272 RPmax: Determines how many precursors a single fragment is allowed to be grouped to. Precursors are first sorted by Pearson correlation of elution profiles; this option specifies the rank of a precursor in this sorted list. Lowering the value for this parameter increases the stringency of precursor-fragments grouping. (Default: 25)
273 </help>
274 </param>
275 <param name="RFmax" type="integer" value="300" optional="true" min="1" label="RFmax">
276 <help>
277 RFmax: Determines how many fragments a single precursor is allowed to have. Fragments are first sorted by Pearson correlation of elution profiles; this option specifies the rank of a fragment in this sorted list. The lower - the more stringent. (Default: 300)
278 </help>
279 </param>
280 <param name="CorrThreshold" type="float" value=".2" optional="true" min="0." max="1." label="CorrThreshold">
281 <help>
282 CorrThreshold: (0.0~1.0) Minimum Pearson correlation between a precursor and a fragment to be considered, the higher, the more stringent. (Default: 0.2)
283 </help>
284 </param>
285 <param name="RTOverlap" type="float" value=".3" min="0" optional="true" label="RTOverlap" >
286 <help>
287 RTOverlap: Retention time overlap. (Default: 0.3)
288 </help>
289 </param>
290
291 <param name="DeltaApex" type="float" value=".6" min="0" optional="true" label="DeltaApex" >
292 <help>
293 DeltaApex: (Unit: minute) Maximum retention time difference of LC profile apexes between precursor and fragment (the lower, the more stringent). (Default: 0.6)
294 </help>
295 </param>
296 <param name="BoostComplementaryIon" type="boolean" truevalue="true" falsevalue="false" checked="true" label="BoostComplementaryIon" >
297 <help>
298 BoostComplementaryIon: set to true if you want to boost complementary ions' intensity. The process of complementary ion boosting will also deisotope fragment peaks into singly charged m/z position. (Default: true)
299 </help>
300 </param>
301 <param name="AdjustFragIntensity" type="boolean" truevalue="true" falsevalue="false" checked="true" label="AdjustFragIntensity" >
302 <help>
303 AdjustFragIntensity: set to true if you want to adjust fragment intensity by the Pearson correlation between a precursor and a fragment. (Default: true)
304 </help>
305 </param>
306 </when>
307 </conditional>
308
309 <conditional name="se_settings">
310 <param name="advanced" type="select" label="Advanced Signal Extraction Settings" help="Usually do not need to be changed">
311 <option value="no">no</option>
312 <option value="yes">yes</option>
313 </param>
314 <when value="no"/>
315 <when value="yes">
316
317 <param name="SE_MinMSIntensity" type="float" value="" optional="true" label="MinMSIntensity" >
318 <help>
319 SE.MinMSIntensity: Minimum signal intensity for a peak in an MS1 spectrum to be considered as a valid signal. Any MS1 peak having intensity lower than this threshold will be ignored. It is the main parameter controlling how many peaks and isotopic envelopes will be detected.
320 Recommended value: Depends on the data. Check raw data for average noise- levels. E.g. TOF data often have thousands of random small intensity peaks.
321 Warning: Setting this parameter too low (or zero) in such a case will significantly increase processing time and memory requirements.
322 </help>
323 </param>
324 <param name="SE_MinMSMSIntensity" type="float" value="" optional="true" label="MinMSMSIntensity" >
325 <help>
326 SE.MinMSMSIntensity: Same as MinMSIntensity, but for MS2 signals.
327 </help>
328 </param>
329 <param name="SE_MaxCurveRTRange" type="float" value="" optional="true" label="MaxCurveRTRange" >
330 <help>
331 SE.MaxCurveRTRange: (Unit: minute) The maximum allowed retention time (RT) range for elution profile of a single ion. If a detected elution profile exceeds that time span, it will be trimmed around the apex to fit into this range. Used to avoid having lots of ions which elute during the whole LC/MS run or over a very long period of time, as this greatly complicates grouping of precursors to fragments. Such long-eluting ions are likely to be contaminants, lock-mass ions, calibrants, etc.
332 Recommended value: The expected maximum peak chromatographic time. E.g. set to several percent of the whole run time, if the run was 100 min long, set to 5 min.
333 </help>
334 </param>
335 <param name="SE_SN" type="float" value="" optional="true" label="SN Threshold" >
336 <help>
337 SE.SN: Minimum signal-to-noise threshold for MS1 precursor signal detection. It is not the real S/N value, but rather a multiplier for MinMSIntensity, if a detected elution profile is less intense in the apex than SN x MinMSIntensity) it will be discarded.
338 Recommended value: Typical values depend on the MinMSIntensity setting. If you've set MinMSIntensity to a very low value, consider setting this one to some small number in range 1.0 - 5.0.
339 </help>
340 </param>
341 <param name="SE_MS2SN" type="float" value="" optional="true" label="MS2SN Threshold" >
342 <help>
343 Same as para.SN, but for possible unfragmented precursors in MS2 data (i.e. for selecting precursors to generate Q3 tier pseudo spectra).
344 </help>
345 </param>
346 <param name="SE_StartRT" type="float" value="0" optional="true" label="StartRT" >
347 <help>
348 </help>
349 </param>
350 <param name="SE_EndRT" type="float" value="9999" optional="true" label="EndRT" >
351 <help>
352 </help>
353 </param>
354 <param name="SE_MinMZ" type="float" value="200" optional="true" label="MinMZ" >
355 <help>
356 </help>
357 </param>
358 <param name="SE_MinPrecursorMass" type="float" value="700" optional="true" label="MinPrecursorMass" >
359 <help>
360 </help>
361 </param>
362 <param name="SE_MaxPrecursorMass" type="float" value="5000" optional="true" label="MaxPrecursorMass" >
363 <help>
364 </help>
365 </param>
366 <param name="SE_IsoPattern" type="float" value="0.3" optional="true" label="IsoPattern" >
367 <help>
368 </help>
369 </param>
370 <param name="SE_MassDefectFilter" type="boolean" truevalue="true" falsevalue="false" checked="true" label="MassDefectFilter" >
371 <help>
372 </help>
373 </param>
374 <param name="SE_MassDefectOffset" type="float" value="0.1" optional="true" label="MassDefectOffset" >
375 <help>
376 </help>
377 </param>
378
379 <param name="SE_MinRTRange" type="float" value="" optional="true" label="MinRTRange" >
380 <help>
381
382 </help>
383 </param>
384 <param name="SE_MaxNoPeakCluster" type="integer" value="" optional="true" label="MaxNoPeakCluster" >
385 <help>
386 SE.MaxNoPeakCluster (new parameter in v1.4): Maximum number of isotope peaks for a precursor feature.
387 </help>
388 </param>
389 <param name="SE_MinNoPeakCluster" type="integer" value="" optional="true" label="MinNoPeakCluster" >
390 <help>
391 SE.MinNoPeakCluster (new parameter in v1.4): Minimum number of isotope peaks for a precursor feature. When it is set as 1, the algorithm will group fragments even for peaks without any isotope signal being found. For these cases, the assumed charged states will be from the parameter SE.StartCharge to SE.EndCharge.
392 </help>
393 </param>
394 <param name="SE_MinMS2NoPeakCluster" type="integer" value="" optional="true" label="MinMS2NoPeakCluster" >
395 <help>
396 SE.MinMS2NoPeakCluster (new parameter in v1.4): Minimum number of isotope peaks for a MS2 feature. When it is set as 1, the algorithm will group fragments even for peaks without any isotope signal being found. For these cases, the assumed charged states will be from the parameter SE.StartCharge to SE.EndCharge.
397
398 </help>
399 </param>
400 <param name="SE_RTtol" type="float" value="" optional="true" label="RTtol" >
401 <help>
402 </help>
403 </param>
404 <param name="SE_NoPeakPerMin" type="integer" value="" optional="true" label="NoPeakPerMin" >
405 <help>
406 </help>
407 </param>
408 <param name="SE_NoMissedScan" type="integer" value="" optional="true" label="NoMissedScan" >
409 <help>
410 SE.NoMissedScan: Maximum number of consecutive "gaps" allowed during extraction of elution profile (scans, in which the precursor mass being traced was not detected). E.g. if set to 1 and a particular mass can be found at every second scan, the algorithm will trace such a peak unless it can't find the peak in 2 scans in a row.
411 </help>
412 </param>
413 <param name="SE_Denoise" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Denoise" >
414 <help>
415 </help>
416 </param>
417 <param name="SE_EstimateBG" type="boolean" truevalue="true" falsevalue="false" checked="true" label="EstimateBG" >
418 <help>
419 SE.MinFrag: Minimum number of fragments for a precursor. Precursors which have less than the set number of fragments will be removed from pseudo MS/MS spectra.
420 </help>
421 </param>
422 <param name="SE_RemoveGroupedPeaks" type="boolean" truevalue="true" falsevalue="false" checked="true" label="RemoveGroupedPeaks" >
423 <help>
424 </help>
425 </param>
426 <param name="SE_MinFrag" type="integer" value="" optional="true" label="MinFrag" >
427 <help>
428 SE.MinFrag: Minimum number of fragments for a precursor. Precursors which have less than the set number of fragments will be removed from pseudo MS/MS spectra.
429 </help>
430 </param>
431 </when>
432 </conditional>
433
434 <param name="ExportPrecursorPeak" type="boolean" truevalue="true" falsevalue="false" checked="false"
435 label="ExportPrecursorPeak"
436 help="Output detailed information about detected MS1 precursor and MS2 unfragmented precursor signals"/>
437 <param name="ExportFragmentPeak" type="boolean" truevalue="true" falsevalue="false" checked="false"
438 label="ExportFragmentPeak"
439 help="Output detailed information about detected MS2 signals"/>
440 <param name="se_extraction_data" type="boolean" truevalue="Signal Extraction data" falsevalue="diaumpire_se.params" checked="false"
441 label="Output Signal Extraction data for DIA_Umpire_Quant" />
442 <param name="mgfs_as_collection" type="boolean" truevalue="true" falsevalue="false" checked="false"
443 label="Output MGFs as a collection" />
444
445 </inputs>
446
447 <outputs>
448 <data format="txt" name="logfile" label="${tool.name} ${on_string} log"/>
449 <data format="dia_umpire.ser" name="se_ser" label="${tool.name} ${input.name} ${se_extraction_data}">
450 <filter>se_extraction_data</filter>
451 </data>
452 <data format="txt" name="params" label="${tool.name} ${input.name} ${se_extraction_data}">
453 <filter>not se_extraction_data</filter>
454 </data>
455 <data format="csv" name="PrecursorPeak" label="${tool.name} ${input.name} PeakCluster.csv" from_work_dir="gx_path/swath_PeakCurve.csv">
456 <filter>ExportPrecursorPeak</filter>
457 </data>
458 <!--
459 <data format="csv" name="FragmentPeak" label="" from_work_dir="gx_path/swath_PeakCurve.csv">
460 <filter>ExportFragmentPeak</filter>
461 </data>
462 -->
463 <collection name="dia_umpire_se_mgfs" type="list" label="${tool.name} MGFs">
464 <filter>mgfs_as_collection</filter>
465 <discover_datasets pattern="(?P&lt;name&gt;.*Q[1-3]\.mgf)" ext="mgf" visible="false" directory="gx_path" />
466 </collection>
467 <data format="mgf" name="q1_mgf" label="${tool.name} ${($input.name[:-6] if $input.name.endswith('.mxXML') or $input.name.endswith('.mxxml') else $input.name).rsplit('.',1)[0]}_Q1.mgf" >
468 <filter>not mgfs_as_collection</filter>
469 </data>
470 <data format="mgf" name="q2_mgf" label="${tool.name} ${($input.name[:-6] if $input.name.endswith('.mxXML') or $input.name.endswith('.mxxml') else $input.name).rsplit('.',1)[0]}_Q2.mgf" >
471 <filter>not mgfs_as_collection</filter>
472 </data>
473 <data format="mgf" name="q3_mgf" label="${tool.name} ${($input.name[:-6] if $input.name.endswith('.mxXML') or $input.name.endswith('.mxxml') else $input.name).rsplit('.',1)[0]}_Q3.mgf" >
474 <filter>not mgfs_as_collection</filter>
475 </data>
476 </outputs>
477 <tests>
478 <test>
479 <param name="input" value="LongSwath_UPS1_1ug_rep1_xs.mzXML" ftype="mzxml"/>
480 <conditional name="instrument">
481 <param name="model" value="AB_SCIEX_Triple_TOF_5600"/>
482 <param name="SE_MS1PPM" value="30"/>
483 <param name="SE_MS2PPM" value="40"/>
484 <conditional name="window">
485 <param name="WindowType" value="SWATH"/>
486 <param name="WindowSize" value="25"/>
487 </conditional>
488 </conditional>
489 <output name="q2_mgf">
490 <assert_contents>
491 <has_text text="BEGIN IONS" />
492 <has_text_matching expression="^PEPMASS=740.\d+$" />
493 </assert_contents>
494 </output>
495 </test>
496 </tests>
497 <help>
498 <![CDATA[
499 ================================
500 **DIA-Umpire signal extraction**
501 ================================
502
503 DIA_Umpire_SE.jar provides the signal extraction module for DIA data (regular SWATH with fixed isolation window size, variable window SWATH, MSX) which generates pseudo MS/MS spectra to be searched against a protein database using conventional proteomics search engines such as X!Tandem, SEQUEST, MSGF+, OMSSA, etc.
504
505 Manual: http://sourceforge.net/projects/diaumpire/files/Manual/DIA_Umpire_Manual_v1.4_pre.pdf
506
507 **Input** (DIA-Umpire signal extraction module):
508 ================================================
509
510 1. Spectral data in mzXML format
511
512 **Important**: for AB SCIEX data, use AB SCIEX MS Data Converter (https://sciex.com/x32750):
513
514 Galaxy tool: https://toolshed.g2.bx.psu.edu/view/galaxyp/ms_data_converter/a36e9f847308
515
516 Use it for .wiff -> .mzML conversion, then use MSConvert for .mzML -> .mzXML. Read "Raw spectral data files conversion to mzXML" section in the manual for more details.
517
518
519 **Signal extraction parameters**:
520 =================================
521
522 *SE.MS1PPM*: (Unit: ppm) Maximum mass error for two MS1 peaks in consecutive spectra to be considered signal of the same ion. Used in MS1 signal detection and precursor alignment between samples/runs. Recommended value: Depends on the instrument. Typical values are 5-10 ppm for Thermo Orbitrap, 20-40pm for AB SCIEX Triple TOF 5600.
523
524 *SE.MS2PPM*: (Unit: ppm) Maximum mass error for two MS2 peaks in consecutive spectra to be considered signal of the same ion. Recommended value: Depends on the instrument. If fragmentation spectra are measured with the same detector as MS1 spectra, set the same as Para.MS1PPM or a little higher, e.g. if you've set Para.MS1PPM=30 ppm for AB SCIEX Triple TOF 5600, consider setting to 40ppm.
525
526 *SE.Resolution*: Used only if the input spectra are stored in profile mode (i.e. not centroided, e.g. by using "Peak Picking" option in MSConvert when converting raw spectral data to mzXML format). Profile spectra will be centroided using a sliding window. The window is moved across the entire mass range of a spectrum. Only the most intense peak in the window centered at the peak m/z is kept, others are discarded. The window width is calculated based on this parameter as: width = mz / para.Resolution. Recommended value: Depends on the instrument and acquisition settings. Either check raw data to see the real average resolution of peaks in spectra or consult vendor specifications for the instrument. For AB SCIEX TripleTOF 5600 we use 15000-20000.
527
528 *SE.StartCharge*: The minimum charge state for MS1 precursor ion to be detected during isotopic peak grouping.
529
530 *SE.EndCharge*: The maximum charge state for MS1 precursor ion to be detected during isotopic peak grouping. Recommended value: it is not recommended to set this parameter higher than 5 for typical proteomic experiments, as it is unlikely to observe peptides of higher charge states.
531
532 *SE.MS2StartCharge*: The minimum charge state for MS2 unfragmented precursor ion to be detected during isotopic peak grouping.
533
534 *SE.MS2EndCharge*: The maximum charge state for MS2 unfragmented precursor ion to be detected during isotopic peak grouping. Recommended value: it is not recommended to set this parameter higher than 5 for typical proteomic experiments, as it is unlikely to observe peptides of higher charge states.
535
536 **DIA isolation window settings**:
537
538 *WindowType*: DIA experiment type. DIA is implemented differently by different vendors and current support for data-formats is lacking, so the program needs additional info to properly interpret input spectral data. Supported values in this version:
539
540 * SWATH - fixed window size SWATH, as described in the original SWATH paper. If you're using this option, it's mandatory to specify WindowSize option as well.
541
542 * V_SWATH - variable window size SWATH. If you're using this option, it's mandatory to specify Variable SWATH window setting (see section below).
543
544 * MSX - 2Da isolation window, its position is shuffled randomly until the whole MS1 range is covered, the process is then repeated but coverage of MS1 range by isolation windows will be different because of randomization.
545
546 * MSE - as originally implemented in Waters instruments. The full MS1 range is being fragmented at once.
547
548 *WindowSize*: Isolation window size setting for fixed window SWATH. (Please skip this part if the data is from Thermo instrument) Note: The window size is to be specified including overlapping regions. I.e. if your windows are: 399.5-425.5, 424.5 - 450.5, etc., then the window size should be set to 26. Note: Was tested only on AB SCIEX TripleTOF 5600 and Thermo Q-Exactive and Fusion data.
549
550 Variable SWATH window setting: Isolation settings for variable window size SWATH. (Please skip this part if the data is from Thermo instrument). The format should be a tab-delimited list of m/z low and high values, one window per row.
551
552
553 **Output files of DIA-Umpire signal extraction module**:
554 ========================================================
555
556 1. *DIA_Umpire_SE MGFs* - Three .mgf files per input .mzXML file - pseudo MS/MS spectra sets for different quality categories of detected precursor signals (see the Online Methods of the publication for details). These can be either individual history items or a dataset collection. Example:
557
558 1. <filename>_Q1.mgf
559 2. <filename>_Q2.mgf
560 3. <filename>_Q3.mgf
561
562 Note: Each file corresponds to a different "quality level" of precursor ions (Q1= More than two isotopic peaks detected in MS1, Q2 = only two isotopic peak detected, Q3 = detected unfragmented precursor in MS2). These spectra are written to separate files, because they must be searched separately against a protein database as a consequence of differences in FDR estimates for these varying quality data.
563
564 2. *DIA_Umpire_SE Signal Extraction data* - includes the binary files (.ser) containing contain all necessary information for quantitation procedures (parameter settings, all detected precursor and fragment peaks, precursor-fragment grouping information).
565
566 3. If ExportPrecursorPeak and/or ExportFragmentPeak options were set to true, text files with detailed information about detected MS1 and/or MS2 features will be generated.
567
568
569 ]]>
570 </help>
571 <expand macro="citations" />
572 </tool>