Mercurial > repos > galaxyp > msstats
comparison msstats.xml @ 0:80b40b9ab835 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msstats commit d2341b64b690d975bc6e29c81f7d13b66c0c5b7f"
author | galaxyp |
---|---|
date | Sat, 25 Jul 2020 13:21:47 -0400 |
parents | |
children | 3e2606fa85bf |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:80b40b9ab835 |
---|---|
1 <tool id="msstats" name="MSstats" version="@VERSION@.0" python_template_version="3.5"> | |
2 <description>statistical relative protein significance analysis in DDA, SRM and DIA Mass Spectrometry</description> | |
3 <macros> | |
4 <token name="@VERSION@">3.20.1</token> | |
5 <xml name="useUniquePeptide"> | |
6 <param name="useUniquePeptide" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="remove peptides that are assigned for more than one proteins" help="We assume to use unique peptide for each protein"/> | |
7 </xml> | |
8 <xml name="summaryforMultipleRows"> | |
9 <param name="summaryforMultipleRows" type="select" label="Summary for MultipleRows" help="summaryforMultipleRows - when there are multiple measurements for certain feature and certain run, use highest or sum of all"> | |
10 <option value="max" selected="true">max</option> | |
11 <option value="sum">sum</option> | |
12 </param> | |
13 </xml> | |
14 <xml name="fewMeasurements"> | |
15 <param name="fewMeasurements" type="select" label="Remove the features that have 1 or 2 measurements across runs" help="(fewMeasurements)"> | |
16 <option value="remove" selected="true">remove</option> | |
17 <option value="keep">keep</option> | |
18 </param> | |
19 </xml> | |
20 <xml name="removeProtein_with1Peptide"> | |
21 <param name="removeProtein_with1Peptide" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Remove the proteins which have only 1 peptide and charge"/> | |
22 </xml> | |
23 | |
24 </macros> | |
25 <requirements> | |
26 <requirement type="package" version="@VERSION@">bioconductor-msstats</requirement> | |
27 </requirements> | |
28 <command detect_errors="exit_code"><![CDATA[ | |
29 cat '$msstats_script' > '$r_script' && | |
30 cat '$msstats_script' && | |
31 Rscript '$msstats_script' | |
32 && cat msstats*.log > $log | |
33 ]]></command> | |
34 <configfiles> | |
35 <configfile name="msstats_script"><![CDATA[ | |
36 library('MSstats', warn.conflicts = F, quietly = T, verbose = F) | |
37 | |
38 #if $input.input_src == 'MSstats' | |
39 | |
40 #if $input.msstats_input.is_of_type('csv') | |
41 raw <- read.csv("$input.msstats_input") | |
42 #else | |
43 raw <- read.table("$input.msstats_input", sep="\t", header=TRUE) | |
44 #end if | |
45 | |
46 #elif $input.input_src == 'MaxQuant' | |
47 \# Read in MaxQuant files | |
48 mq_evidence <- read.table("$input.evidence", sep="\t", header=TRUE) | |
49 | |
50 mq_proteinGroups <- read.table("$input.proteinGroups", sep="\t", header=TRUE) | |
51 | |
52 \# Read in annotation including condition and biological replicates per run. | |
53 \# Users should make this annotation file. It is not the output from MaxQuant. | |
54 #if $input.annotation.is_of_type('csv') | |
55 annot <- read.csv("$input.annotation", header=TRUE) | |
56 #else | |
57 annot <- read.table("$input.annotation", sep="\t", header=TRUE) | |
58 #end if | |
59 | |
60 raw <- MaxQtoMSstatsFormat(evidence=mq_evidence, | |
61 proteinGroups=mq_proteinGroups, | |
62 annotation=annot, | |
63 proteinID="$input.proteinID", | |
64 useUniquePeptide=$input.input_options.useUniquePeptide, | |
65 summaryforMultipleRows=$input.input_options.summaryforMultipleRows, | |
66 fewMeasurements="$input.input_options.fewMeasurements", | |
67 removeMpeptides=$input.input_options.removeMpeptides, | |
68 removeOxidationMpeptides=$input.input_options.removeOxidationMpeptides, | |
69 removeProtein_with1Peptide=$input.input_options.removeProtein_with1Peptide) | |
70 | |
71 #elif $input.input_src == 'OpenMS' | |
72 | |
73 #if $input.evidence.is_of_type('csv') | |
74 input <- read.csv("$input.evidence", header=TRUE) | |
75 #else | |
76 input <- read.table("$input.evidence", sep="\t", header=TRUE) | |
77 #end if | |
78 #if $input.annotation.is_of_type('csv') | |
79 annot <- read.csv("$input.annotation", header=TRUE) | |
80 #else | |
81 annot <- read.table("$input.annotation", sep="\t", header=TRUE) | |
82 #end if | |
83 | |
84 raw <- OpenMStoMSstatsFormat(input, | |
85 annotation=annot, | |
86 useUniquePeptide=$input.input_options.useUniquePeptide, | |
87 summaryforMultipleRows=$input.input_options.summaryforMultipleRows, | |
88 fewMeasurements="$input.input_options.fewMeasurements", | |
89 removeProtein_with1Peptide=$input.input_options.removeProtein_with1Peptide) | |
90 | |
91 #elif $input.input_src == 'OpenSWATH' | |
92 | |
93 #if $input.evidence.is_of_type('csv') | |
94 input <- read.csv("$input.evidence", header=TRUE) | |
95 #else | |
96 input <- read.table("$input.evidence", sep="\t", header=TRUE) | |
97 #end if | |
98 #if $input.annotation.is_of_type('csv') | |
99 annot <- read.csv("$input.annotation", header=TRUE) | |
100 #else | |
101 annot <- read.table("$input.annotation", sep="\t", header=TRUE) | |
102 #end if | |
103 | |
104 raw <- OpenSWATHtoMSstatsFormat(input, | |
105 annotation=annot, | |
106 filter_with_mscore=$input.input_options.filter_with_mscore, | |
107 mscore_cutoff=$input.input_options.mscore_cutoff, | |
108 useUniquePeptide=$input.input_options.useUniquePeptide, | |
109 fewMeasurements="$input.input_options.fewMeasurements", | |
110 removeProtein_with1Feature=$input.input_options.removeProtein_with1Feature, | |
111 summaryforMultipleRows=$input.input_options.summaryforMultipleRows) | |
112 | |
113 #end if | |
114 | |
115 processed_data <- dataProcess(raw, | |
116 logTrans=$dp_options.logTrans, | |
117 normalization="$dp_options.norm.normalization", | |
118 #if $dp_options.norm.normalization == 'globalStandards' | |
119 nameStandards=c($dp_options.norm.nameStandards), | |
120 #end if | |
121 ## address=$dp_options.address, | |
122 fillIncompleteRows=$dp_options.fillIncompleteRows, | |
123 featureSubset="$dp_options.features.featureSubset", | |
124 #if $dp_options.features.featureSubset == 'topN' | |
125 n_top_feature=$dp_options.features.n_top_feature, | |
126 #end if | |
127 #if $dp_options.features.featureSubset == 'highQuality' | |
128 remove_uninformative_feature_outlier=$dp_options.features.remove_uninformative_feature_outlier, | |
129 #end if | |
130 summaryMethod="$dp_options.summarize.summaryMethod", | |
131 #if $dp_options.summarize.summaryMethod == 'TMP' | |
132 MBimpute=$dp_options.summarize.MBimpute, | |
133 remove50missing=$dp_options.summarize.remove50missing, | |
134 #end if | |
135 #if $dp_options.summarize.summaryMethod == 'linear' | |
136 equalFeatureVar=$dp_options.summarize.equalFeatureVar, | |
137 #end if | |
138 #if $dp_options.censoredInt == 'NULL' | |
139 censoredInt=NULL, | |
140 #else | |
141 censoredInt="$dp_options.censoredInt", | |
142 #end if | |
143 cutoffCensored="$dp_options.cutoffCensored", | |
144 maxQuantileforCensored=$dp_options.maxQuantileforCensored, | |
145 clusters=NULL) | |
146 | |
147 #if 'processed_data' in $selected_outputs | |
148 write.table(processed_data\$ProcessedData, "ProcessedData.tsv", sep = "\t", quote = F, row.names = F, dec = ".") | |
149 #end if | |
150 #if 'runlevel_data' in $selected_outputs | |
151 write.table(processed_data\$RunlevelData, "RunlevelData.tsv", sep = "\t", quote = F, row.names = F, dec = ".") | |
152 #end if | |
153 | |
154 #if 'qcplot' in $selected_outputs | |
155 dataProcessPlots(data = processed_data, type="QCplot", ylimUp=35, | |
156 width=5, height=5, address="MSStats_only_") | |
157 #end if | |
158 | |
159 #if 'profile_plot' in $selected_outputs | |
160 dataProcessPlots(data = processed_data, type="ProfilePlot", ylimUp=35, featureName="NA", width=5, height=5, address="MSStats_only_") | |
161 #end if | |
162 | |
163 #if 'condition_plot' in $selected_outputs | |
164 dataProcessPlots(data = processed_data, type="ConditionPlot", width=5, height=5, address="MSStats_only_") | |
165 #end if | |
166 | |
167 ## Quantifiaction | |
168 #if 'quant_sample_matrix' in $selected_outputs | |
169 sampleQuantMatrix <- quantification(processed_data, type="Sample") | |
170 write.table(sampleQuantMatrix, "SampleQuantificationMatrix.tsv", sep = "\t", quote = F, row.names = F, dec = ".") | |
171 #end if | |
172 | |
173 #if 'quant_sample_long' in $selected_outputs | |
174 sampleQuantLong <- quantification(processed_data, type="Sample", format="long") | |
175 write.table(sampleQuantLong, "SampleQuantificationLong.tsv", sep = "\t", quote = F, row.names = F, dec = ".") | |
176 #end if | |
177 | |
178 #if 'quant_group_matrix' in $selected_outputs | |
179 groupQuantMatrix <- quantification(processed_data, type="Group") | |
180 write.table(groupQuantMatrix, "GroupQuantificationMatrix.tsv", sep = "\t", quote = F, row.names = F, dec = ".") | |
181 #end if | |
182 | |
183 #if 'quant_group_long' in $selected_outputs | |
184 groupQuantLong <- quantification(processed_data, type="Group", format="long") | |
185 write.table(groupQuantLong, "GroupQuantificationLong.tsv", sep = "\t", quote = F, row.names = F, dec = ".") | |
186 #end if | |
187 | |
188 ## Group Comparison | |
189 #if $group.group_comparison == 'yes' | |
190 \# Group Comparison | |
191 #if $group.comparison_matrix.is_of_type('csv') | |
192 comp_matrix <- read.csv("$group.comparison_matrix", header=TRUE) | |
193 #else | |
194 comp_matrix <- read.table("$group.comparison_matrix", sep="\t", header=TRUE) | |
195 #end if | |
196 | |
197 ## first columns contains comparison names, use as row name | |
198 comparison <- comp_matrix[,-1] | |
199 row.names(comparison) <- as.character(comp_matrix[,1]) | |
200 ## order of conditions has to be the same as they appear in the levels function | |
201 comparison <- as.matrix(comparison[levels(processed_data\$ProcessedData\$GROUP_ORIGINAL)]) | |
202 | |
203 ## perform group comparison | |
204 comparisons <- groupComparison(contrast.matrix = comparison, data = processed_data) | |
205 | |
206 print(comparisons\$fittedmodel) | |
207 #if 'fittedmodel' in $group.select_outputs | |
208 capture.output(print(comparisons\$fittedmodel), file="ComparisonFittedModel.txt") | |
209 #end if | |
210 | |
211 | |
212 #if 'comparison_result' in $group.select_outputs | |
213 write.table(comparisons\$ComparisonResult, "ComparisonResult.tsv", sep = "\t", quote = F, row.names = F, dec = ".") | |
214 #end if | |
215 | |
216 #if 'model_qc' in $group.select_outputs | |
217 write.table(comparisons\$ModelQC, "ModelQC.tsv", sep = "\t", quote = F, row.names = F, dec = ".") | |
218 #end if | |
219 | |
220 ## TODO: transform fittedmodel to table | |
221 ##class(DDA2009.comparisons$fittedmodel) # list, probably good to output this somehow | |
222 | |
223 ## Visualizations: | |
224 | |
225 #if 'qqplot' in $group.select_outputs | |
226 \# normal quantile-quantile plots | |
227 modelBasedQCPlots(data=comparisons, type="QQPlots", | |
228 width=5, height=5, address="MSStats_group_") | |
229 #end if | |
230 | |
231 #if 'residualplot' in $group.select_outputs | |
232 \# residual plots | |
233 modelBasedQCPlots(data=comparisons, type="ResidualPlots", | |
234 width=5, height=5, address="MSStats_group_") | |
235 #end if | |
236 | |
237 #if 'volcanoplot' in $group.select_outputs | |
238 \# volcano plot | |
239 groupComparisonPlots(data = comparisons\$ComparisonResult, type = 'VolcanoPlot', | |
240 width=5, height=5, address="MSStats_group_") | |
241 #end if | |
242 | |
243 #if 'heatmap' in $group.select_outputs | |
244 \# heatmap - works only for more than 1 comparison | |
245 if (nrow(comparison)>1) | |
246 { | |
247 groupComparisonPlots(data = comparisons\$ComparisonResult, type = 'Heatmap', address="MSStats_group_") | |
248 } | |
249 #end if | |
250 | |
251 #if 'comparisonplot' in $group.select_outputs | |
252 \#comparison | |
253 groupComparisonPlots(data=comparisons\$ComparisonResult, type="ComparisonPlot", | |
254 width=5, height=5, address="MSStats_group_") | |
255 #end if | |
256 | |
257 #end if | |
258 ]]></configfile> | |
259 </configfiles> | |
260 <inputs> | |
261 <conditional name="input"> | |
262 <param name="input_src" type="select" label="input source"> | |
263 <option value="MSstats">MStats 10 column format</option> | |
264 <option value="MaxQuant">MaxQuant</option> | |
265 <option value="OpenMS">OpenMS</option> | |
266 <option value="OpenSWATH">OpenSWATH</option> | |
267 </param> | |
268 <when value="MSstats"> | |
269 <param name="msstats_input" type="data" format="tabular,csv" label="MSstats 10-column input"/> | |
270 </when> | |
271 <when value="MaxQuant"> | |
272 <param name="evidence" type="data" format="tabular,csv" label="evidence.txt - feature-level data"/> | |
273 <param name="annotation" type="data" format="tabular,csv" label="annotation.txt data which includes Raw.file, Condition, BioReplicate, Run, IsotopeLabelType information"/> | |
274 <param name="proteinGroups" type="data" format="tabular,csv" label="proteinGroups.txt" help="It needs to matching protein group ID. If proteinGroups=NULL, use 'Proteins' column in 'evidence.txt'"/> | |
275 <param name="proteinID" type="select" label="Select Protein ID in evidence.txt"> | |
276 <option value="Proteins">Protein column</option> | |
277 <option value="Leading.razor.protein">Leading razor protein column</option> | |
278 </param> | |
279 <section name="input_options" title="MaxQtoMSstatsFormat Options" expanded="false"> | |
280 <expand macro="useUniquePeptide"/> | |
281 <expand macro="summaryforMultipleRows"/> | |
282 <expand macro="fewMeasurements"/> | |
283 <param name="removeMpeptides" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Remove the peptides including 'M' sequence"/> | |
284 <param name="removeOxidationMpeptides" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Remove the peptides including Oxidized 'M' sequence"/> | |
285 <expand macro="removeProtein_with1Peptide"/> | |
286 </section> | |
287 </when> | |
288 <when value="OpenMS"> | |
289 <param name="evidence" type="data" format="tabular,csv" label="OpenSWATH_input"/> | |
290 <param name="annotation" type="data" format="tabular,csv" label="OpenSWATH_annotation"/> | |
291 <section name="input_options" title="MaxQtoMSstatsFormat Options" expanded="false"> | |
292 <expand macro="useUniquePeptide"/> | |
293 <expand macro="summaryforMultipleRows"/> | |
294 <expand macro="fewMeasurements"/> | |
295 <expand macro="removeProtein_with1Peptide"/> | |
296 </section> | |
297 </when> | |
298 <when value="OpenSWATH"> | |
299 <param name="evidence" type="data" format="tabular,csv" label="OpenSWATH_input"/> | |
300 <param name="annotation" type="data" format="tabular,csv" label="OpenSWATH_annotation"/> | |
301 <section name="input_options" title="OpenSWATHtoMSstatsFormat Options" expanded="false"> | |
302 <param name="filter_with_mscore" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Remove the peptides including 'M' sequence"/> | |
303 <param name="mscore_cutoff" type="float" value="0.01" min="0" max="1.0" label="mscore_cutoff"/> | |
304 <expand macro="useUniquePeptide"/> | |
305 <expand macro="fewMeasurements"/> | |
306 <expand macro="summaryforMultipleRows"/> | |
307 <param name="removeProtein_with1Feature" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Remove the proteins which have only 1 peptide and charge"/> | |
308 </section> | |
309 </when> | |
310 </conditional> | |
311 <section name="dp_options" title="dataProcess Options" expanded="false"> | |
312 <param name="logTrans" type="select" label="Log-transform Variable ABUNDANCE with base:" help="(logTrans)"> | |
313 <option value="2" selected="true">2</option> | |
314 <option value="10">10</option> | |
315 </param> | |
316 <conditional name="norm"> | |
317 <param name="normalization" type="select" label="Normalization to remove systematic bias between MS runs"> | |
318 <option value="equalizeMedians" selected="true">equalizeMedians - represents constant normalization</option> | |
319 <option value="quantile">quantile - quantile normalization</option> | |
320 <option value="globalStandards">globalStandards - normalization with global standards proteins</option> | |
321 <option value="FALSE">no normalization is performed</option> | |
322 </param> | |
323 <when value="equalizeMedians"/> | |
324 <when value="quantile"/> | |
325 <when value="globalStandards"> | |
326 <param name="nameStandards" type="text" value="" label="global standard peptide names"> | |
327 <help>peptide names should be double-quoted and separated by commas</help> | |
328 <validator type="empty_field" /> | |
329 <validator type="regex" message="double-quoted names separated by commas"><![CDATA[^".+"(,".+")*$]]></validator> | |
330 </param> | |
331 </when> | |
332 <when value="FALSE"/> | |
333 </conditional> | |
334 <param name="fillIncompleteRows" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Fill Incomplete Rows" help=" If the input dataset has incomplete rows, TRUE (default) adds the rows with intensity value=NA for missing peaks. FALSE reports error message with list of features which have incomplete rows"/> | |
335 <conditional name="features"> | |
336 <param name="featureSubset" type="select" label="Features to use"> | |
337 <option value="all" selected="true">Use all features that the data set has</option> | |
338 <option value="top3">Use the top 3 features which have highest average of log2(intensity) across runs</option> | |
339 <option value="topN">Use the top N features which have highest average of log2(intensity) across runs</option> | |
340 <option value="highQuality">Flag uninformative feature and outliers</option> | |
341 </param> | |
342 <when value="all"/> | |
343 <when value="top3"/> | |
344 <when value="topN"> | |
345 <param name="n_top_feature" type="integer" value="3" min="1" label="The number of top features for featureSubset"/> | |
346 </when> | |
347 <when value="highQuality"> | |
348 <param name="remove_uninformative_feature_outlier" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Remove features flagged with Uninformative feature_quality"/> | |
349 </when> | |
350 </conditional> | |
351 <conditional name="summarize"> | |
352 <param name="summaryMethod" type="select" label="Summary Method"> | |
353 <option value="TMP" selected="true">TMP - Tukey's median polish</option> | |
354 <option value="linear" selected="true">linear - linear mixed model</option> | |
355 </param> | |
356 <when value="TMP"> | |
357 <param name="MBimpute" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Impute Missing Values 'NA' or '0' (depending on censoredInt option) by Accelated failure model" help="(MBimpute) TRUE - inserts 'NA' or '0' (depending on censoredInt option), . FALSE uses the values assigned by cutoffCensored"/> | |
358 <param name="remove50missing" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Remove runs which have more than 50% missing values"/> | |
359 </when> | |
360 <when value="linear"> | |
361 <param name="equalFeatureVar" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Account for heterogeneous variation among intensities from different features" help="(equalFeatureVar) TRUE assumes equal variance among intensities from features. FALSE means that we cannot assume equal variance among intensities from features, then we will account for heterogeneous variation from different features"/> | |
362 </when> | |
363 </conditional> | |
364 <param name="censoredInt" type="select" label="Missing values to censor"> | |
365 <help>The output from Skyline and Progenesis should use '0'</help> | |
366 <option value="NA" selected="true">Assume that all 'NA's in 'Intensity' column are censored</option> | |
367 <option value="0">Use zero intensities '0' as censored intensity</option> | |
368 <option value="NULL">Assume all NA intensites are randomly missing</option> | |
369 </param> | |
370 <param name="cutoffCensored" type="select" label="Cutoff value for censoring"> | |
371 <option value="minFeature" selected="true">minimum value for each feature</option> | |
372 <option value="minRun">minimum value for each run</option> | |
373 <option value="minFeatureNRun">smallest between minimum value of corresponding feature and minimum value of corresponding run</option> | |
374 </param> | |
375 <param name="maxQuantileforCensored" type="float" value="0.999" min="0.75" max="1.0" label="Maximum quantile for deciding censored missing values"/> | |
376 </section> | |
377 <param name="selected_outputs" type="select" multiple="true" optional="false" label="Select outputs"> | |
378 <option value="log" selected="true">MSstats log</option> | |
379 <option value="r_script" selected="false">MSstats Rscript</option> | |
380 <option value="processed_data" selected="true">MSstats ProcessedData</option> | |
381 <option value="runlevel_data" selected="false">MSstats RunlevelData</option> | |
382 <option value="qcplot" selected="true">MSstats QCPlot.pdf</option> | |
383 <option value="profile_plot" selected="false">MSstats ProfilePlot.pdf</option> | |
384 <option value="profile_wsum_plot" selected="false">MSstats ProfilePlot_wSummarization.pdf</option> | |
385 <option value="condition_plot" selected="false">MSstats ConditionPlot.pdf</option> | |
386 <option value="quant_sample_matrix" selected="false">Sample Quantification Matrix Table</option> | |
387 <option value="quant_sample_long" selected="false">Sample Quantification Long Table</option> | |
388 <option value="quant_group_matrix" selected="true">Group Quantification Matrix Table</option> | |
389 <option value="quant_group_long" selected="false">Group Quantification Long Table</option> | |
390 </param> | |
391 | |
392 <conditional name="group"> | |
393 <param name="group_comparison" type="select" label="Compare Groups"> | |
394 <option value="no">No</option> | |
395 <option value="yes">Yes</option> | |
396 </param> | |
397 <when value="no"/> | |
398 <when value="yes"> | |
399 <param name="comparison_matrix" type="data" format="tabular,csv" label="Comparison Matrix"/> | |
400 <param name="select_outputs" type="select" multiple="true" label="Select outputs"> | |
401 <help>Heatmap requires more than one comparison</help> | |
402 <option value="fittedmodel" selected="true">MSstats ComparisonFittedModel.txt</option> | |
403 <option value="comparison_result" selected="true">MSstats ComparisonResult.tsv</option> | |
404 <option value="model_qc" selected="false">MSstats ModelQC.tsv</option> | |
405 <option value="qqplot" selected="false">MSstats QQPlot.pdf</option> | |
406 <option value="residualplot" selected="false">MSstats ResidualPlot.pdf</option> | |
407 <option value="volcanoplot" selected="true">MSstats VolcanoPlot.pdf</option> | |
408 <option value="heatmap" selected="false">MSstats Heatmap.pdf</option> | |
409 <option value="comparisonplot" selected="true">MSstats ComparisonPlot.pdf</option> | |
410 </param> | |
411 </when> | |
412 </conditional> | |
413 </inputs> | |
414 | |
415 <outputs> | |
416 <data name="log" format="txt" label="MSstats log"> | |
417 <filter>'log' in selected_outputs</filter> | |
418 </data> | |
419 <data name="r_script" format="txt" label="MSstats Rscript"> | |
420 <filter>'r_script' in selected_outputs</filter> | |
421 </data> | |
422 <data name="processed_data" format="tabular" label="MSstats ProcessedData" from_work_dir="ProcessedData.tsv"> | |
423 <filter>'processed_data' in selected_outputs</filter> | |
424 </data> | |
425 <data name="runlevel_data" format="tabular" label="MSstats RunlevelData" from_work_dir="RunlevelData.tsv"> | |
426 <filter>'runlevel_data' in selected_outputs</filter> | |
427 </data> | |
428 <data name="qcplot" format="pdf" label="MSstats QCPlot.pdf" from_work_dir="MSStats_only_QCPlot.pdf"> | |
429 <filter>'qcplot' in selected_outputs</filter> | |
430 </data> | |
431 <data name="profile_plot" format="pdf" label="MSstats ProfilePlot.pdf" from_work_dir="MSStats_only_ProfilePlot.pdf"> | |
432 <filter>'profile_plot' in selected_outputs</filter> | |
433 </data> | |
434 <data name="profile_wsum_plot" format="pdf" label="MSstats ProfilePlot_wSummarization.pdf" from_work_dir="MSStats_only_ProfilePlot_wSummarization.pdf"> | |
435 <filter>'profile_wsum_plot' in selected_outputs</filter> | |
436 </data> | |
437 <data name="condition_plot" format="pdf" label="MSstats ConditionPlot.pdf" from_work_dir="MSStats_only_ConditionPlot.pdf"> | |
438 <filter>'condition_plot' in selected_outputs</filter> | |
439 </data> | |
440 <data name="quant_sample_matrix" format="tabular" label="MSstats SampleQuantificationMatrix.tsv" from_work_dir="SampleQuantificationMatrix.tsv"> | |
441 <filter>'quant_sample_matrix' in selected_outputs</filter> | |
442 </data> | |
443 <data name="quant_sample_long" format="tabular" label="MSstats SampleQuantificationLong.tsv" from_work_dir="SampleQuantificationLong.tsv"> | |
444 <filter>'quant_sample_long' in selected_outputs</filter> | |
445 </data> | |
446 <data name="quant_group_matrix" format="tabular" label="MSstats GroupQuantificationMatrix.tsv" from_work_dir="GroupQuantificationMatrix.tsv"> | |
447 <filter>'quant_group_matrix' in selected_outputs</filter> | |
448 </data> | |
449 <data name="quant_group_long" format="tabular" label="MSstats GroupQuantificationLong.tsv" from_work_dir="GroupQuantificationLong.tsv"> | |
450 <filter>'quant_group_long' in selected_outputs</filter> | |
451 </data> | |
452 <data name="comparison_result" format="tabular" label="MSstats ComparisonResult.tsv" from_work_dir="ComparisonResult.tsv"> | |
453 <filter> group['group_comparison'] == 'yes' and 'comparison_result' in group['select_outputs']</filter> | |
454 </data> | |
455 <data name="fittedmodel" format="txt" label="MSstats ComparisonFittedModel.txt" from_work_dir="ComparisonFittedModel.txt"> | |
456 <filter> group['group_comparison'] == 'yes' and 'fittedmodel' in group['select_outputs']</filter> | |
457 </data> | |
458 <data name="model_qc" format="tabular" label="MSstats ModelQC.tsv" from_work_dir="ModelQC.tsv"> | |
459 <filter> group['group_comparison'] == 'yes' and 'model_qc' in group['select_outputs']</filter> | |
460 </data> | |
461 <data name="qqplot" format="pdf" label="MSstats ModelQQ.pdf" from_work_dir="MSStats_group_QQPlot.pdf"> | |
462 <filter> group['group_comparison'] == 'yes' and 'qqplot' in group['select_outputs']</filter> | |
463 </data> | |
464 <data name="residualplot" format="pdf" label="MSstats ResidualPlot.pdf" from_work_dir="MSStats_group_ResidualPlot.pdf"> | |
465 <filter> group['group_comparison'] == 'yes' and 'residualplot' in group['select_outputs']</filter> | |
466 </data> | |
467 <data name="volcanoplot" format="pdf" label="MSstats VolcanoPlot.pdf" from_work_dir="MSStats_group_VolcanoPlot.pdf"> | |
468 <filter> group['group_comparison'] == 'yes' and 'volcanoplot' in group['select_outputs']</filter> | |
469 </data> | |
470 <data name="heatmap" format="pdf" label="MSstats Heatmap.pdf" from_work_dir="MSStats_group_Heatmap.pdf"> | |
471 <filter> group['group_comparison'] == 'yes' and 'heatmap' in group['select_outputs']</filter> | |
472 </data> | |
473 <data name="comparisonplot" format="pdf" label="MSstats ComparisonPlot.pdf" from_work_dir="MSStats_group_ComparisonPlot.pdf"> | |
474 <filter> group['group_comparison'] == 'yes' and 'comparisonplot' in group['select_outputs']</filter> | |
475 </data> | |
476 <!-- | |
477 Tabular file (from groupcomparison): "fittedmodel" | |
478 --> | |
479 </outputs> | |
480 <tests> | |
481 | |
482 <test> | |
483 <conditional name="input"> | |
484 <param name="input_src" value="MSstats"/> | |
485 <param name="msstats_input" ftype="csv" value="msstats_testfile.txt"/> | |
486 </conditional> | |
487 <param name="selected_outputs" value="processed_data,profile_plot,profile_wsum_plot,quant_sample_matrix,quant_group_long"/> | |
488 <output name="processed_data"> | |
489 <assert_contents> | |
490 <has_text text="D.GPLTGTYR" /> | |
491 <has_n_columns n="16" /> | |
492 <has_n_lines n="2071" /> | |
493 </assert_contents> | |
494 </output> | |
495 <output name="quant_sample_matrix"> | |
496 <assert_contents> | |
497 <has_text text="C2_1" /> | |
498 <has_n_columns n="7" /> | |
499 <has_n_lines n="7" /> | |
500 </assert_contents> | |
501 </output> | |
502 <output name="quant_group_long"> | |
503 <assert_contents> | |
504 <has_text text="LogIntensity" /> | |
505 <has_n_columns n="3" /> | |
506 <has_n_lines n="37" /> | |
507 </assert_contents> | |
508 </output> | |
509 <output name="profile_plot" file="MSstats ProfilePlot.pdf" compare="sim_size"/> | |
510 <output name="profile_wsum_plot" file="profile_wsum_plot.pdf" compare="sim_size"/> | |
511 </test> | |
512 | |
513 <test> | |
514 <conditional name="input"> | |
515 <param name="input_src" value="MSstats"/> | |
516 <param name="msstats_input" ftype="tabular" value="msstats_testfile.tsv"/> | |
517 </conditional> | |
518 <conditional name="group"> | |
519 <param name="group_comparison" value="yes"/> | |
520 <param name="comparison_matrix" ftype="csv" value="comparison_matrix.csv"/> | |
521 </conditional> | |
522 <param name="select_outputs" value="residualplot,model_qc"/> | |
523 <output name="processed_data"> | |
524 <assert_contents> | |
525 <has_text text="D.GPLTGTYR" /> | |
526 <has_n_columns n="16" /> | |
527 <has_n_lines n="2071" /> | |
528 </assert_contents> | |
529 </output> | |
530 <output name="model_qc"> | |
531 <assert_contents> | |
532 <has_text text="MissingPercentage" /> | |
533 <has_n_columns n="15" /> | |
534 <has_n_lines n="108" /> | |
535 </assert_contents> | |
536 </output> | |
537 <output name="residualplot" file="residual_plot.pdf" compare="sim_size"/> | |
538 </test> | |
539 | |
540 <test> | |
541 <conditional name="input"> | |
542 <param name="input_src" value="MaxQuant"/> | |
543 <param name="evidence" ftype="tabular" value="test_MQ_evidence.tabular"/> | |
544 <param name="annotation" ftype="tabular" value="test_MQ_annotation.txt"/> | |
545 <param name="proteinGroups" ftype="tabular" value="test_MQ_proteingroups.tabular"/> | |
546 </conditional> | |
547 <param name="selected_outputs" value="condition_plot,processed_data,runlevel_data"/> | |
548 <conditional name="group"> | |
549 <param name="group_comparison" value="yes"/> | |
550 <param name="comparison_matrix" ftype="csv" value="test_MQ_group12_comparison_matrix.csv"/> | |
551 </conditional> | |
552 <param name="select_outputs" value="qqplot,comparison_result"/> | |
553 <output name="processed_data"> | |
554 <assert_contents> | |
555 <has_text text="SPILVATAVAAR" /> | |
556 <has_n_columns n="16" /> | |
557 <has_n_lines n="57" /> | |
558 </assert_contents> | |
559 </output> | |
560 <output name="runlevel_data"> | |
561 <assert_contents> | |
562 <has_text text="qx017084.raw.thermo" /> | |
563 <has_n_columns n="13" /> | |
564 <has_n_lines n="13" /> | |
565 </assert_contents> | |
566 </output> | |
567 <output name="comparison_result"> | |
568 <assert_contents> | |
569 <has_text text="r2-r1" /> | |
570 <has_n_columns n="11" /> | |
571 <has_n_lines n="4" /> | |
572 </assert_contents> | |
573 </output> | |
574 <output name="condition_plot" file="condition_plot.pdf" compare="sim_size"/> | |
575 <output name="qqplot" file="qq_plot.pdf" compare="sim_size"/> | |
576 </test> | |
577 | |
578 | |
579 | |
580 | |
581 <!-- | |
582 <test> | |
583 <conditional name="input"> | |
584 <param name="input_src" value="OpenMS"/> | |
585 <param name="evidence" ftype="tabular" value=""/> | |
586 <param name="annotation" ftype="tabular" value=""/> | |
587 </conditional> | |
588 <output name="processed_data"> | |
589 <assert_contents> | |
590 <has_text text="D.GPLTGTYR" /> | |
591 </assert_contents> | |
592 </output> | |
593 </test> | |
594 --> | |
595 | |
596 <test> | |
597 <conditional name="input"> | |
598 <param name="input_src" value="OpenSWATH"/> | |
599 <param name="evidence" ftype="tabular" value="test_swath_input_data.tabular"/> | |
600 <param name="annotation" ftype="tabular" value="test_swath_annotations.tabular"/> | |
601 </conditional> | |
602 <output name="processed_data"> | |
603 <assert_contents> | |
604 <has_text text="GETLGLIGFGR" /> | |
605 <has_n_columns n="16" /> | |
606 <has_n_lines n="253" /> | |
607 </assert_contents> | |
608 </output> | |
609 <output name="qcplot" file="QC_plot.pdf" compare="sim_size"/> | |
610 </test> | |
611 | |
612 <test> | |
613 <conditional name="input"> | |
614 <param name="input_src" value="OpenSWATH"/> | |
615 <param name="evidence" ftype="tabular" value="test_swath_input_data.tabular"/> | |
616 <param name="annotation" ftype="tabular" value="test_swath_annotations.tabular"/> | |
617 </conditional> | |
618 <param name="selected_outputs" value="r_script,processed_data,quant_sample_long"/> | |
619 <conditional name="group"> | |
620 <param name="group_comparison" value="yes"/> | |
621 <param name="comparison_matrix" ftype="csv" value="test_swath_group12_comparison_matrix.csv"/> | |
622 </conditional> | |
623 <param name="select_outputs" value="comparison_result,volcanoplot,residualplot"/> | |
624 <output name="processed_data"> | |
625 <assert_contents> | |
626 <has_text text="GETLGLIGFGR" /> | |
627 <has_n_columns n="16" /> | |
628 <has_n_lines n="253" /> | |
629 </assert_contents> | |
630 </output> | |
631 <output name="quant_sample_long"> | |
632 <assert_contents> | |
633 <has_text text="NPT_96" /> | |
634 <has_n_columns n="3" /> | |
635 <has_n_lines n="31" /> | |
636 </assert_contents> | |
637 </output> | |
638 <output name="comparison_result"> | |
639 <assert_contents> | |
640 <has_text text="Q5VYK3" /> | |
641 <has_n_columns n="11" /> | |
642 <has_n_lines n="6" /> | |
643 </assert_contents> | |
644 </output> | |
645 <output name="volcanoplot" file="volcanoplot.pdf" compare="sim_size"/> | |
646 <output name="residualplot" file="residualplot.pdf" compare="sim_size"/> | |
647 </test> | |
648 | |
649 </tests> | |
650 <help><![CDATA[ | |
651 MSstats is an open-source R package for statistical relative quantification of proteins and peptides in global, targeted and data-independent proteomics. `More information on MSstats <http://msstats.org/>`_ | |
652 | |
653 The MSstats Galaxy tool (version @VERSION@) allows the detection of differentially abundant proteins for label-free MS experiments with complex designs on data derived from open-source proteomics software available in Galaxy (e.g. MaxQuant, OpenMS, OpenSWATH). Processing functionalities such as log transformation, normalization, feature selection, missing value imputation and quantification are available as well. | |
654 | |
655 ----- | |
656 | |
657 **Input data** | |
658 | |
659 - Data in tabular or csv format, generated by spectral processing tools such as `MaxQuant <http://coxdocs.org/doku.php?id=maxquant:start/>`_, `OpenSWATH <http://openswath.org/en/latest/>`_ will be automatically converted to 10-column MSstats format | |
660 | |
661 - MaxQuant format: evidence.txt, proteinGroups.txt | |
662 - OpenSWATH format: pyprophet export file | |
663 - MSstats format: tabular file with 10 column either manually curated or other sources such as swath2stats tool which is implemented in Pyprophet export in Galaxy. For manual curation: Names of headers are fixed but not case sensitive: | |
664 | |
665 - ProteinName: protein ID or peptide ID for peptide-level modeling and analysis; statistical analysis will be done separately for each unique label in this column | |
666 - PeptideSequence: Amino acid sequence for each peptides. If the peptide sequences should be distinguished based on post-translational modifications, this column can be renamed to PeptideModifiedSequence. | |
667 - PrecursorCharge: charge state of precursor. | |
668 - FragmentIon: e.g. b4, y3, if unknown use a single value for all entries. | |
669 - ProductCharge: charge state of product. If unknown use 0 for all entries. | |
670 - IsotopeLabelType: This column indicates whether this measurement is based on the endogenous peptides (use “L”) or labeled reference peptides (use “H”). | |
671 - Condition: For group comparison experiments, this column indicates groups of interest (such as “Disease” or “Control”). For time-course experiments, this column indicates time points (such as “T1”, “T2”, etc). If the experimental design contains both distinct groups of subjects and multiple time points per subject, this column should indicate a combination of these values (such as “Disease_T1”, “Disease_T2”, “Control_T1”, “Control_T2”, etc.). | |
672 - BioReplicate: This column should contain a unique identifier for each biological replicate in the experiment. For example, in a clinical proteomic investigation this should be a unique patient id. Patients from distinct groups should have distinct ids. MSstats does not require the presence of technical replicates in the experiment. If the technical replicates are present, all samples or runs from a same biological replicate should have a same id. MSstats automatically detects the presence of technical replicates and accounts for them in the model-based analysis. | |
673 - Run: This column contains the identifier of a mass spectrometry run. Each mass spectrometry run should have a unique identifier, regardless of the origin of the biological sample. In SRM experiments, if all the transitions of a biological or a technical replicate are split into multiple “methods” due to the technical limitations, each method should have a separate identifier. When processed by Skyline, distinct values of runs correspond to distinct input file names. It is possible to use the actual input file names as values in the column Run. | |
674 - Intensity: This column should contain the quantified signal of a feature in a run without any transformation (in particular, no logarithm transform). The signals can be quantified as the peak height or the peak of area under curve. Any other quantitative representation of abundance can also be used. | |
675 - Example file header: | |
676 :: | |
677 | |
678 proteinname peptidesequence precursorcharge fragmention productcharge | |
679 P02768 DLGEENFK 3 y7 0 | |
680 P02768 DLGEENFK 3 y8 0 | |
681 P02768 ETYGEMADCCAK 2 b3 0 | |
682 P02768 ETYGEMADCCAK 2 b4 0 | |
683 ... ... ... ... ... | |
684 | |
685 isotopelabeltype condition bioreplicate run intensity | |
686 L 1 ReplA 1 4298.12 | |
687 H 1 ReplA 1 1974.59 | |
688 L 1 ReplA 1 7183.22 | |
689 H 1 ReplA 1 8467.58 | |
690 ... ... ... ... ... | |
691 | |
692 | |
693 - Annotations as tabular file are needed for all input options except MSstats format | |
694 | |
695 - 4 columns: Filename, Condition, Bioreplicate, Run; additional 5th column only for MaxQuant: Isotopelabeltype | |
696 | |
697 - Filename: the file name has to be exactly as it appears in the other input files (e.g. S1207.raw.thermo; in/AA12_mzML.mzML) | |
698 - all other columns: see description above for MSstats format columns | |
699 | |
700 - Comparison matrix as tabular file | |
701 | |
702 - 1st column: name of comparison | |
703 - additionally one column for each condition that is present in the tabular file. Use 1 and -1 to indicate the conditions to compare and 0 for conditions that are not compared. Multiple groups can be combined by using 0.5. | |
704 - first row contains the names of the groups, they must exactly match the condition name used in the annotation file | |
705 - each additional row represents one comparison | |
706 - Example for a two group comparison | |
707 | |
708 :: | |
709 | |
710 names groupA groupB | |
711 groupA-groupB 1 -1 | |
712 | |
713 | |
714 - Example for an experiment with 5 groups and 4 different comparisons | |
715 | |
716 :: | |
717 | |
718 names G1 G2 G3 G4 G5 | |
719 G2-G1 -1 1 0 0 0 | |
720 G4-G5 0 0 0 1 -1 | |
721 G3-G5 0 0 -1 0 1 | |
722 G1+G2-G5 0.5 0.5 0 0 -1 | |
723 | |
724 **Options** | |
725 | |
726 - data conversion from MaxQuant and OpenSWATH to MSstats format: | |
727 | |
728 - MaxQuant input: + Contaminant, + Reverse, + Only.identified.by.site, proteins are automatically removed during conversion | |
729 | |
730 - data processing options: | |
731 | |
732 - MaxQuant input: Contaminants and reverse and only ID by site) from MaxQuant tool are automatically removed; | |
733 - log transformation | |
734 - normalization of MS runs | |
735 - Feature selection | |
736 - Missing value imputation: | |
737 | |
738 - MaxQuant input: All missing values are NA, usecensoredInt must be 'NA' | |
739 - OpenSWATH input: secensoredInt must be '0' | |
740 - Summary method: TMP + censoredInt = NULL: It assumes that all intensities are missing at random, therefore no action with MBimpute = FALSE or error with MBimpute = TRUE | |
741 - censoredInt='NA'or'0'& MBimpute=TRUE: AFT model-based imputation usingcutoffCensoredvalue in the AFT model | |
742 - censoredInt='NA'or'0'&MBimpute=FALSE: censored intensities (hereNA’s) will be replaced withthe value specified incutoffCensored. | |
743 - Summarizing intensities per MS run | |
744 - group comparison: automatic detection of differentially abundant proteins between two conditions, conditions have to be specified with the 'comparison matrix' | |
745 - quantification per sample or group | |
746 | |
747 - sample: relative protein abundance in each biological replicate. If there are technical replicates for biological replicates,sample quantification will be the median among technical replicates. If there is no technical replicate for biological replicate (sample), sample quantification will be the same as run-level summarization. | |
748 - group: relative protein abundance in each condition, summarized over the biological replicates (median among sample quantification). In presence of completely missing values in a condition, the estimates will be zero | |
749 | |
750 **Output options** | |
751 | |
752 - Different outputs available. Especially for studies with many proteins, it is suggested to select only the necessary pdf outputs as many of them generate one plot per protein. | |
753 | |
754 - MSstats log - check log file for warnings and information on the analysis steps (txt) | |
755 - r-script - can be used to re-run analysis outside Galaxy (txt) | |
756 - processed_data - transformed, normalized, imputed intensities (tabular) | |
757 - runlevel_data - summarized intensities per run (tabular) | |
758 - qcplot - log2 intensity boxplot for all proteins and run on first page, followed by one boxplot per protein (pdf) | |
759 - profile_plot - log2 intensity profiles one plot per protein and run (pdf) | |
760 - profile_wsum_plot - log2 intensity profiles one plot per protein and run with run summarization (pdf) | |
761 - condition_plot - log2 intensity range for each protein and condition (pdf) | |
762 - quant_sample_matrix - relative protein abundance in each biological replicate (tabular) | |
763 - quant_sample_long - relative protein abundance in each biological replicate, long format (tabular) | |
764 - quant_group_matrix - relative protein abundance in each condition (tabular) | |
765 - quant_group_long - relative protein abundance in each condition, long format (tabular) | |
766 - comparison_result - summary of statistical results per protein and comparison (tabular) | |
767 - model_qc - summary statistics per run (tabular) | |
768 - qqplot - one QQplot per protein (pdf) | |
769 - residualplot - one residual plot per protein (pdf) | |
770 - volcanoplot - one volcano plot per comparison (pdf) | |
771 - heatmap - needs at least 2 comparisons, one heatmap for all proteins and comparisons (pdf) | |
772 - comparisonplot - log2 intensity range for each protein and comparison (pdf) | |
773 | |
774 For additional help please visit the `MSstats documentation <http://msstats.org/msstats-2/>`_ | |
775 | |
776 | |
777 ]]></help> | |
778 <citations> | |
779 <citation type="doi">10.1093/bioinformatics/btu305</citation> | |
780 </citations> | |
781 </tool> | |
782 |