comparison msstatstmt.xml @ 0:fd3dc69b78ff draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msstatstmt commit 7c5032273bfe66383e072457aeea888917f7ebed"
author galaxyp
date Sun, 24 Jan 2021 16:40:12 +0000
parents
children b79d4c961009
comparison
equal deleted inserted replaced
-1:000000000000 0:fd3dc69b78ff
1 <tool id="msstatstmt" name="MSstatsTMT" version="@TOOL_VERSION@+galaxy@GALAXY_VERSION@">
2 <description>protein significance analysis in shotgun mass spectrometry-based proteomic experiments with tandem mass tag (TMT) labeling</description>
3 <macros>
4 <token name="@TOOL_VERSION@">1.8.0</token>
5 <token name="@GALAXY_VERSION@">0</token>
6 <xml name="input_options_shared">
7 <param name="useUniquePeptide" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Use unique peptide" help="Yes (default) removes peptides that are assigned for more than one protein. We assume to use unique peptide for each protein." />
8 <param name="rmPSM_withMissing_withinRun" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Remove PSM with missing value within run" help="Yes will remove PSM with any missing value within each run. Default is No." />
9 <param name="rmPSM_withfewMea_withinRun" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Remove PSM with few measurements within run" help="Only for 'Remove PSM with missing value within run = No'. Yes (default) will remove the features that have 1 or 2 measurements within each run." />
10 <param name="rmProtein_with1Feature" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Remove proteins with one feature" help="Yes will remove the proteins which have only 1 peptide and charge. Default is No." />
11 </xml>
12 </macros>
13 <requirements>
14 <requirement type="package" version="@TOOL_VERSION@">bioconductor-msstatstmt</requirement>
15 </requirements>
16 <command detect_errors="exit_code"><![CDATA[
17 cat '$msstatstmt_script' > '$out_r_script' &&
18 Rscript '$msstatstmt_script'
19 && cat msstats*.log > '$out_msstats_log'
20 && cat msstatstmt.log > '$out_msstatstmt_log'
21 ]]></command>
22 <configfiles>
23 <configfile name="msstatstmt_script"><![CDATA[
24 library(MSstatsTMT, warn.conflicts = F, quietly = T, verbose = F)
25
26 #if $input.input_src == 'MSstatsTMT'
27 input <- read.table("$input.msstatstmt_input", sep="\t", header=TRUE)
28
29 #elif $input.input_src == 'MaxQuant'
30 proteinGroups.mq <- read.table("$input.proteinGroups", sep="\t", header=TRUE)
31 evidence.mq <- read.table("$input.evidence", sep="\t", header=TRUE)
32 annotation.mq <- read.table("$input.annotation", sep="\t", header=TRUE)
33
34 input <- MaxQtoMSstatsTMTFormat(evidence = evidence.mq,
35 proteinGroups = proteinGroups.mq,
36 annotation = annotation.mq,
37 which.proteinid = "$input.proteinID",
38 rmProt_Only.identified.by.site = $input.input_options.rmProt_Onlyidentifiedbysite,
39 useUniquePeptide = $input.input_options.useUniquePeptide,
40 rmPSM_withMissing_withinRun = $input.input_options.rmPSM_withMissing_withinRun,
41 rmPSM_withfewMea_withinRun = $input.input_options.rmPSM_withfewMea_withinRun,
42 rmProtein_with1Feature = $input.input_options.rmProtein_with1Feature,
43 summaryforMultipleRows = $input.input_options.summaryforMultipleRows)
44
45 #elif $input.input_src == 'OpenMS'
46 input.oms <- read.table("$input.oms_input", sep="\t", header=TRUE)
47
48 input <- OpenMStoMSstatsTMTFormat(input.oms,
49 useUniquePeptide = $input.input_options.useUniquePeptide,
50 rmPSM_withMissing_withinRun = $input.input_options.rmPSM_withMissing_withinRun,
51 rmPSM_withfewMea_withinRun = $input.input_options.rmPSM_withfewMea_withinRun,
52 rmProtein_with1Feature = $input.input_options.rmProtein_with1Feature,
53 summaryforMultiplePSMs = $input.input_options.summaryforMultiplePSMs)
54 #end if
55
56 quant <- proteinSummarization(input,
57 method = "$proteinSummarization.method",
58 global_norm = $proteinSummarization.global_norm,
59 reference_norm = $proteinSummarization.reference_norm,
60 remove_norm_channel = $proteinSummarization.remove_norm_channel,
61 remove_empty_channel = $proteinSummarization.remove_empty_channel,
62 MBimpute = $proteinSummarization.MBimpute,
63 #if $proteinSummarization.maxQuantileforCensored == ''
64 maxQuantileforCensored = NULL)
65 #else
66 maxQuantileforCensored = $proteinSummarization.maxQuantileforCensored)
67 #end if
68
69 #for $plot_type in $selected_outputs
70 #if $plot_type[-4:] == "Plot"
71 dataProcessPlotsTMT(input,
72 quant,
73 type = '$plot_type',
74 ylimUp = $out_plots_opt.ylimUp,
75 ylimDown = $out_plots_opt.ylimDown,
76 x.axis.size = $out_plots_opt.x_axis_size,
77 y.axis.size = $out_plots_opt.y_axis_size,
78 text.size = $out_plots_opt.text_size,
79 text.angle = $out_plots_opt.text_angle,
80 legend.size = $out_plots_opt.legend_size,
81 dot.size.profile = $out_plots_opt.dot_size_profile,
82 ncol.guide = $out_plots_opt.ncol_guide,
83 width = $out_plots_opt.width,
84 height = $out_plots_opt.height,
85 #if $out_plots_opt.which_Protein.select != 'list'
86 which.Protein = "$out_plots_opt.which_Protein.select",
87 #else
88 which.Protein = unlist(read.table("$out_plots_opt.which_Protein.protein_list", sep = "\n", header = FALSE), use.names = FALSE),
89 #end if
90 originalPlot = $out_plots_opt.originalPlot,
91 summaryPlot = $out_plots_opt.summaryPlot)
92 #end if
93 #end for
94
95 #if $group.group_comparison == 'true'
96 #if $group.use_comp_matrix.select == 'true'
97 comp_matrix <- read.table("$group.use_comp_matrix.comparison_matrix", sep="\t", header=TRUE, check.names=FALSE)
98
99 comparison <- comp_matrix[,-1]
100 row.names(comparison) <- as.character(comp_matrix[,1])
101 comparison <- as.matrix(comparison[levels(quant\$Condition)])
102 #end if
103
104 comparisons <- groupComparisonTMT(data = quant,
105 #if $group.use_comp_matrix.select == 'true'
106 contrast.matrix = comparison,
107 #end if
108 moderated = $group.moderated,
109 adj.method = "$group.adj_method",
110 remove_norm_channel = $group.remove_norm_channel,
111 remove_empty_channel = $group.remove_empty_channel)
112
113 write.table(comparisons,
114 "ComparisonResult.tsv",
115 sep = "\t",
116 quote = F,
117 row.names = F,
118 dec = ".")
119 #end if
120 ]]></configfile>
121 </configfiles>
122 <inputs>
123 <conditional name="input">
124 <param name="input_src" type="select" label="Input Source">
125 <option value="MSstatsTMT">MStatsTMT (11 column format)</option>
126 <option value="MaxQuant">MaxQuant</option>
127 <option value="OpenMS">OpenMS</option>
128 </param>
129 <when value="MSstatsTMT">
130 <param name="msstatstmt_input" type="data" format="tabular" label="MStatsTMT (11 column format)"/>
131 </when>
132 <when value="MaxQuant">
133 <param name="evidence" type="data" format="tabular" label="evidence.txt - feature-level data"/>
134 <param name="proteinGroups" type="data" format="tabular" label="proteinGroups.txt" help="It needs to matching protein group ID. If proteinGroups=NULL, use 'Proteins' column in 'evidence.txt'"/>
135 <param name="annotation" type="data" format="tabular" label="annotation.txt data which includes Raw.file, Condition, BioReplicate, Run, IsotopeLabelType information" />
136 <param name="proteinID" type="select" label="Select Protein ID in evidence.txt">
137 <option value="Proteins">Protein column</option>
138 <option value="Leading.razor.protein">Leading razor protein column</option>
139 </param>
140 <section name="input_options" title="MaxQtoMSstatsTMTFormat Options" expanded="false">
141 <param name="rmProt_Onlyidentifiedbysite" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Remove proteins only identified by site" help="Yes will remove proteins with ’+’ in ’Only.identified.by.site’ column from proteinGroups.txt, which was identified only by a modification site. No is the default." />
142 <expand macro="input_options_shared"/>
143 <param name="summaryforMultipleRows" type="select" label="Summary for multiple rows" help="When there are multiple measurements for certain feature in certain run, select the feature with the largest summation or maximal value.">
144 <option value="max">max</option>
145 <option value="sum" selected="true">sum</option>
146 </param>
147 </section>
148 </when>
149 <when value="OpenMS">
150 <param name="oms_input" type="data" format="tabular" label="OpenMS input"/>
151 <section name="input_options" title="OpenMStoMSstatsTMTFormat Options" expanded="false">
152 <expand macro="input_options_shared"/>
153 <param name="summaryforMultiplePSMs" type="select" label="Summary for multiple PSMs" help="When there are multiple measurements for certain feature in certain run, select the feature with the largest summation or maximal value.">
154 <option value="max">max</option>
155 <option value="sum" selected="true">sum</option>
156 </param>
157 </section>
158 </when>
159 </conditional>
160 <section name="proteinSummarization" title="proteinSummarization Options" expanded="false">
161 <param name="method" type="select" multiple="false" label="Select method">
162 <option value="msstats" selected="true">msstats</option>
163 <option value="MedianPolish">MedianPolish</option>
164 <option value="Median">Median</option>
165 <option value="LogSum">LogSum</option>
166 </param>
167 <param name="global_norm" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Global median normalization" help="Global median normalization on peptide level data (equalizing the medians across all the channels and MS runs). Default is Yes. It will be performed before protein-level summarization."/>
168 <param name="reference_norm" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Reference channel based normalization" help="Reference channel based normalization between MS runs on protein level data. Yes (default) needs at least one reference channel in each MS run, annotated by ’Norm’ in condition column. It will be performed after protein-level summarization. No will not perform this normalization step. If data only has one run, then use No"/>
169 <param name="remove_norm_channel" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Remove ’Norm’ channels from protein level data."/>
170 <param name="remove_empty_channel" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Remove ’Empty’ channels from protein level data"/>
171 <param name="MBimpute" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="MBimpute" help="Only for 'method = msstats'. Yes (default) imputes missing values by accelerated failure time (AFT) model. No uses minimum value to impute the missing value for each peptide precursor ion."/>
172 <param name="maxQuantileforCensored" type="float" optional="true" min="0" max="0.999" value="" label="Maximum quantile for deciding censored missing value" help="We assume missing values are censored. maxQuantileforCensored is maximum quantile for deciding censored missing value, for instance, 0.999. Default is empty"/>
173 </section>
174 <conditional name="group">
175 <param name="group_comparison" type="select" label="Compare Groups">
176 <option value="false">No</option>
177 <option value="true">Yes</option>
178 </param>
179 <when value="false"/>
180 <when value="true">
181 <conditional name="use_comp_matrix">
182 <param name="select" type="select" label="Use comparison matrix?">
183 <option value="false">No</option>
184 <option value="true">Yes</option>
185 </param>
186 <when value="false"/>
187 <when value="true">
188 <param name="comparison_matrix" type="data" format="tabular" label="Comparison Matrix"/>
189 </when>
190 </conditional>
191 <param name="moderated" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Moderate t statistic" help="No (default) uses ordinary t statistic"/>
192 <param name="adj_method" type="select" label="Adjusted p value method for multiple comparison">
193 <option value="holm">holm</option>
194 <option value="hochberg">hochberg</option>
195 <option value="hommel">hommel</option>
196 <option value="bonferroni">bonferroni</option>
197 <option value="BH" selected="true">BH</option>
198 <option value="BY">BY</option>
199 <option value="fdr">fdr</option>
200 <option value="none">none</option>
201 </param>
202 <param name="remove_norm_channel" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Remove ’Norm’ channels from protein level data"/>
203 <param name="remove_empty_channel" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Remove ’Empty’ channels from protein level data"/>
204 </when>
205 </conditional>
206 <param name="selected_outputs" type="select" multiple="true" optional="false" label="Select Outputs">
207 <option value="msstats_log" selected="true">MSstats log</option>
208 <option value="msstatstmt_log" selected="true">MSstatsTMT log</option>
209 <option value="r_script" selected="false">MSstats Rscript</option>
210 <option value="ProfilePlot" selected="false">Profile Plot</option>
211 <option value="QCPlot" selected="false">QC Plot</option>
212 </param>
213 <section name="out_plots_opt" title="Plot Output Options" expanded="false">
214 <param name="ylimUp" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Upper limit for y-axis in the log scale" help="No (Default) for Profile Plot and QC Plot uses the upper limit as rounded off maximum of log2(intensities) after normalization + 3."/>
215 <param name="ylimDown" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Lower limit for y-axis in the log scale" help="No (Default) for Profile Plot and QCPlot uses 0."/>
216 <param name="x_axis_size" type="integer" min="1" value="10" label="Size of x-axis labeling"/>
217 <param name="y_axis_size" type="integer" min="1" value="10" label="Size of y-axis labeling"/>
218 <param name="text_size" type="integer" min="1" value="4" label="Size of labels represented each condition at the top"/>
219 <param name="text_angle" type="integer" min="0" max="360" value="90" label="Angle of labels represented each condition at the top"/>
220 <param name="legend_size" type="integer" min="1" value="7" label="Size of legend above Profile plot"/>
221 <param name="dot_size_profile" type="integer" min="1" value="2" label="Size of dots in Profile plot"/>
222 <param name="ncol_guide" type="integer" min="1" value="5" label="Number of columns for legends at the top of plot"/>
223 <param name="width" type="integer" min="1" value="10" label="Width of the saved pdf file"/>
224 <param name="height" type="integer" min="1" value="10" label="Height of the saved pdf file"/>
225 <conditional name="which_Protein">
226 <param name="select" type="select" label="Select protein IDs to draw plots">
227 <option value="all" selected="true">generate all plots for each protein</option>
228 <option value="allonly">Option for QC plot: "allonly" will generate one QC plot with all proteins</option>
229 <option value="list">Protein IDs as tabular input</option>
230 </param>
231 <when value="all"/>
232 <when value="allonly"/>
233 <when value="list">
234 <param name="protein_list" type="data" format="tabular" label="List of proteins"/>
235 </when>
236 </conditional>
237 <param name="originalPlot" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Draw original profile plots without normalization"/>
238 <param name="summaryPlot" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Draw profile plots with protein summarization for each channel and MS run"/>
239 </section>
240 </inputs>
241 <outputs>
242 <data name="out_msstats_log" format="txt" label="${tool.name} on ${on_string}: MSstats log">
243 <filter>'msstats_log' in selected_outputs</filter>
244 </data>
245 <data name="out_msstatstmt_log" format="txt" label="${tool.name} on ${on_string}: MSstatsTMT log">
246 <filter>'msstatstmt_log' in selected_outputs</filter>
247 </data>
248 <data name="out_r_script" format="txt" label="${tool.name} on ${on_string}: Rscript">
249 <filter>'r_script' in selected_outputs</filter>
250 </data>
251 <data name="out_profile_plot" from_work_dir="ProfilePlot.pdf" format="pdf" label="${tool.name} on ${on_string}: Profile Plot">
252 <filter>'ProfilePlot' in selected_outputs</filter>
253 </data>
254 <data name="out_qc_plot" from_work_dir="QCPlot.pdf" format="pdf" label="${tool.name} on ${on_string}: QC Plot">
255 <filter>'QCPlot' in selected_outputs</filter>
256 </data>
257 <data name="out_group_comp" from_work_dir="ComparisonResult.tsv" format="tsv" label="${tool.name} on ${on_string}: Group Comparison">
258 <filter>group['group_comparison'] == 'true'</filter>
259 </data>
260 </outputs>
261 <tests>
262 <test expect_num_outputs="5">
263 <conditional name="input">
264 <param name="input_src" value="MSstatsTMT"/>
265 <param name="msstatstmt_input" ftype="tabular" value="input.msstatstmt.txt"/>
266 </conditional>
267 <param name="selected_outputs" value="msstats_log,msstatstmt_log,r_script,ProfilePlot,QCPlot"/>
268 <output name="out_msstats_log">
269 <assert_contents>
270 <has_text text="1 level of Isotope type labeling in this experiment" />
271 </assert_contents>
272 </output>
273 <output name="out_msstatstmt_log">
274 <assert_contents>
275 <has_text text="MSstatsTMT - proteinSummarization function" />
276 </assert_contents>
277 </output>
278 <output name="out_r_script">
279 <assert_contents>
280 <has_n_lines n="51" />
281 </assert_contents>
282 </output>
283 <output name="out_profile_plot" file="ProfilePlot.pdf" compare="sim_size"/>
284 <output name="out_qc_plot" file="QCPlot.pdf" compare="sim_size"/>
285 </test>
286 <test expect_num_outputs="2">
287 <conditional name="input">
288 <param name="input_src" value="MaxQuant"/>
289 <param name="evidence" ftype="tabular" value="evidence.txt"/>
290 <param name="annotation" ftype="tabular" value="annotation.txt"/>
291 <param name="proteinGroups" ftype="tabular" value="proteinGroups.txt"/>
292 </conditional>
293 <conditional name="group">
294 <param name="group_comparison" value="true"/>
295 <conditional name="use_comp_matrix">
296 <param name="select" value="true"/>
297 <param name="comparison_matrix" ftype="tabular" value="comparison_matrix.txt"/>
298 </conditional>
299 </conditional>
300 <param name="selected_outputs" value="ProfilePlot"/>
301 <conditional name="which_Protein">
302 <param name="select" value="list"/>
303 <param name="protein_list" ftype="tabular" value="proteinIDs.txt"/>
304 </conditional>
305 <output name="out_group_comp">
306 <assert_contents>
307 <has_n_lines n="21" />
308 <has_n_columns n="8" />
309 <has_text text="A0AVT1" />
310 <has_text text="A0AVT1" />
311 <has_text text="O43324" />
312 </assert_contents>
313 </output>
314 <output name="out_profile_plot" file="ProfilePlot_list.pdf" compare="sim_size"/>
315 </test>
316 <test expect_num_outputs="2">
317 <conditional name="input">
318 <param name="input_src" value="OpenMS"/>
319 <param name="oms_input" ftype="tabular" value="input.oms.txt"/>
320 </conditional>
321 <conditional name="group">
322 <param name="group_comparison" value="true"/>
323 </conditional>
324 <param name="selected_outputs" value="msstats_log"/>
325 <output name="out_group_comp">
326 <assert_contents>
327 <has_n_lines n="51" />
328 <has_n_columns n="8" />
329 <has_text text="Long_LF-Short_HF" />
330 <has_text text="sp|O35226|PSMD4_MOUSE" />
331 </assert_contents>
332 </output>
333 </test>
334 </tests>
335 <help><![CDATA[
336 MSstatsTMT is an R-based package for detecting differentially abundant proteins in shotgun mass spectrometry-based proteomic experiments with tandem mass tag (TMT) labeling. It is applicable to isobaric labeling quantitative proteomics, including iTRAQ and TMT data. MSstatsTMT provides functionalities for two types of analysis: 1) Protein summarization based on peptide quantification data and visualization; 2) Model-based group comparison to detect significant changes in abundance.
337
338 **Notes**
339
340 - MSstatsTMT 11 column format: For TMT datasets an additional 'Channel' column is required.
341
342 ::
343
344 #> ProteinName PeptideSequence
345 #> 1 sp|Q60854|SPB6_MOUSE .(TMT6plex)AFVEVNEEGTEAAAATAGMM(Oxidation)TVR
346 #> 2 sp|Q60854|SPB6_MOUSE .(TMT6plex)AFVEVNEEGTEAAAATAGMM(Oxidation)TVR
347 #> 3 sp|Q60854|SPB6_MOUSE .(TMT6plex)AFVEVNEEGTEAAAATAGMM(Oxidation)TVR
348 #> 4 sp|Q60854|SPB6_MOUSE .(TMT6plex)AFVEVNEEGTEAAAATAGMM(Oxidation)TVR
349 #> 5 sp|Q60854|SPB6_MOUSE .(TMT6plex)AFVEVNEEGTEAAAATAGMM(Oxidation)TVR
350 #> 6 sp|Q60854|SPB6_MOUSE .(TMT6plex)AFVEVNEEGTEAAAATAGMM(Oxidation)TVR
351 #> PrecursorCharge PSM Mixture
352 #> 1 3 .(TMT6plex)AFVEVNEEGTEAAAATAGMM(Oxidation)TVR_3 3
353 #> 2 3 .(TMT6plex)AFVEVNEEGTEAAAATAGMM(Oxidation)TVR_3 3
354 #> 3 3 .(TMT6plex)AFVEVNEEGTEAAAATAGMM(Oxidation)TVR_3 3
355 #> 4 3 .(TMT6plex)AFVEVNEEGTEAAAATAGMM(Oxidation)TVR_3 3
356 #> 5 3 .(TMT6plex)AFVEVNEEGTEAAAATAGMM(Oxidation)TVR_3 3
357 #> 6 3 .(TMT6plex)AFVEVNEEGTEAAAATAGMM(Oxidation)TVR_3 3
358 #> TechRepMixture Run Channel BioReplicate Condition Intensity
359 #> 1 3_3 3_3_3 1 21 Long_HF NA
360 #> 2 3_3 3_3_3 2 22 Norm 1068.580
361 #> 3 3_3 3_3_3 3 23 Long_M 1508.330
362 #> 4 3_3 3_3_3 4 24 Long_HF NA
363 #> 5 3_3 3_3_3 5 25 Long_LF 1580.951
364 #> 6 3_3 3_3_3 6 26 Long_HF 1820.072
365
366 For more information please visit the `MSstatsConvert documentation <https://bioconductor.org/packages/devel/bioc/vignettes/MSstatsConvert/inst/doc/msstats_data_format.html>`_
367
368 - Comparison matrix as tabular file
369
370 - 1st column: name of comparison
371 - additionally one column for each condition that is present in the tabular file. Use 1 and -1 to indicate the conditions to compare and 0 for conditions that are not compared. Multiple groups can be combined by using 0.5.
372 - first row contains the names of the groups, they must exactly match the condition name used in the annotation file
373 - each additional row represents one comparison
374 - Example for a two group comparison
375
376 ::
377
378 names groupA groupB
379 groupA-groupB 1 -1
380
381
382 - Example for an experiment with 5 groups and 4 different comparisons
383
384 ::
385
386 names G1 G2 G3 G4 G5
387 G2-G1 -1 1 0 0 0
388 G4-G5 0 0 0 1 -1
389 G3-G5 0 0 -1 0 1
390 G1+G2-G5 0.5 0.5 0 0 -1
391
392 For additional help please visit the `MSstatsTMT documentation <https://msstats.org/msstatstmt/>`_
393 ]]>
394 </help>
395 <citations>
396 <citation type="doi">10.1074/mcp.ra120.002105</citation>
397 </citations>
398 </tool>