msstats: msstats.xml comparison

comparison msstats.xml @ 5:28434abe6c5c draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msstats commit 0e253b8114e2fe6f4e33edcc5f1a4602073064c3"

author	galaxyp
date	Fri, 06 Aug 2021 20:06:19 +0000
parents	593839e1f2c3
children	b7034eff0db1

comparison

equal deleted inserted replaced

-:593839e1f2c3
+:28434abe6c5c
-<tool id="msstats" name="MSstats" version="@VERSION@.1">
+<tool id="msstats" name="MSstats" version="@VERSION@.0">
 <description>statistical relative protein significance analysis in DDA, SRM and DIA Mass Spectrometry</description>
 <macros>
-<token name="@VERSION@">3.22.0</token>
+<token name="@VERSION@">4.0.0</token>
 <xml name="useUniquePeptide">
 <param name="useUniquePeptide" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Remove peptides that are assigned for more than one proteins"/>
 </xml>
 <xml name="summaryforMultipleRows">
 <param name="summaryforMultipleRows" type="select" label="Summary for MultipleRows" help="When there are multiple measurements for certain feature and certain run, use highest or sum of all">
 <requirement type="package" version="@VERSION@">bioconductor-msstats</requirement>
 </requirements>
 <command detect_errors="exit_code"><![CDATA[
 cat '$msstats_script' > '$r_script' &&
 Rscript '$msstats_script'
-&& cat msstats*.log > '$log'
 ]]></command>
 <configfiles>
 <configfile name="msstats_script"><![CDATA[
 library('MSstats', warn.conflicts = F, quietly = T, verbose = F)
 useUniquePeptide=$input.input_options.useUniquePeptide,
 summaryforMultipleRows=$input.input_options.summaryforMultipleRows,
 fewMeasurements="$input.input_options.fewMeasurements",
 removeMpeptides=$input.input_options.removeMpeptides,
 removeOxidationMpeptides=$input.input_options.removeOxidationMpeptides,
-removeProtein_with1Peptide=$input.input_options.removeProtein_with1Peptide)
+removeProtein_with1Peptide=$input.input_options.removeProtein_with1Peptide,
+use_log_file = TRUE,
+append = TRUE,
+log_file_pat = "log.txt")
 #elif $input.input_src == 'OpenMS'
 #if $input.openms_input.is_of_type('csv')
 	input <- read.csv("$input.openms_input", header=TRUE)
 annotation=annot,
 #end if
 useUniquePeptide=$input.input_options.useUniquePeptide,
 summaryforMultipleRows=$input.input_options.summaryforMultipleRows,
 fewMeasurements="$input.input_options.fewMeasurements",
-removeProtein_with1Feature=$input.input_options.removeProtein_with1Feature)
+removeProtein_with1Feature=$input.input_options.removeProtein_with1Feature,
+use_log_file = TRUE,
+append = TRUE,
+log_file_pat = "log.txt")
 #elif $input.input_src == 'OpenSWATH'
 #if $input.openswath_input.is_of_type('csv')
 filter_with_mscore=$input.input_options.filter_with_mscore,
 mscore_cutoff=$input.input_options.mscore_cutoff,
 useUniquePeptide=$input.input_options.useUniquePeptide,
 fewMeasurements="$input.input_options.fewMeasurements",
 removeProtein_with1Feature=$input.input_options.removeProtein_with1Feature,
-summaryforMultipleRows=$input.input_options.summaryforMultipleRows)
+summaryforMultipleRows=$input.input_options.summaryforMultipleRows,
+use_log_file = TRUE,
+append = TRUE,
+log_file_pat = "log.txt")
 #elif $input.input_src == 'Skyline'
 #if $input.skyline_input.is_of_type('csv')
 	input <- read.csv("$input.skyline_input", header=TRUE)
 #else
 				filter_with_Qvalue = $input.input_options.filter_with_Qvalue,
 				qvalue_cutoff = $input.input_options.qvalue_cutoff,
 				useUniquePeptide = $input.input_options.useUniquePeptide,
 				fewMeasurements="$input.input_options.fewMeasurements",
 				removeOxidationMpeptides = $input.input_options.removeOxidationMpeptides,
-				removeProtein_with1Feature = $input.input_options.removeProtein_with1Feature)
+				removeProtein_with1Feature = $input.input_options.removeProtein_with1Feature,
+				use_log_file = TRUE,
+				append = TRUE,
+		                log_file_pat = "log.txt")
 #end if
 processed_data <- dataProcess(raw,
 logTrans=$dp_options.logTrans,
 normalization="$dp_options.norm.normalization",
 #if $dp_options.norm.normalization == 'globalStandards'
 nameStandards=c($dp_options.norm.nameStandards),
 #end if
-fillIncompleteRows=$dp_options.fillIncompleteRows,
 featureSubset="$dp_options.features.featureSubset",
 #if $dp_options.features.featureSubset == 'topN'
 n_top_feature=$dp_options.features.n_top_feature,
 #end if
 #if $dp_options.features.featureSubset == 'highQuality'
 #if $dp_options.censoredInt == 'NULL'
 censoredInt=NULL,
 #else
 censoredInt="$dp_options.censoredInt",
 #end if
-cutoffCensored="$dp_options.cutoffCensored",
 #if $dp_options.maxQuantileforCensored == ''
-maxQuantileforCensored = NULL)
+maxQuantileforCensored = NULL,
 #else
-maxQuantileforCensored = $dp_options.maxQuantileforCensored)
+maxQuantileforCensored = $dp_options.maxQuantileforCensored,
 #end if
+use_log_file = TRUE,
+append = TRUE,
+log_file_pat = "log.txt")
 #if 'raw_data' in $dp_options.selected_outputs
 write.table(raw, "raw.tsv", sep = "\t", quote = F, row.names = F, dec = ".")
 #end if
-#if 'processed_data' in $dp_options.selected_outputs
+#if 'featurelevel_data' in $dp_options.selected_outputs
-write.table(processed_data\$ProcessedData, "ProcessedData.tsv", sep = "\t", quote = F, row.names = F, dec = ".")
+write.table(processed_data\$FeatureLevelData, "featurelevelData.tsv", sep = "\t", quote = F, row.names = F, dec = ".")
 #end if
-#if 'runlevel_data' in $dp_options.selected_outputs
+#if 'proteinlevel_data' in $dp_options.selected_outputs
-write.table(processed_data\$RunlevelData, "RunlevelData.tsv", sep = "\t", quote = F, row.names = F, dec = ".")
+write.table(processed_data\$ProteinLevelData, "proteinlevelData.tsv", sep = "\t", quote = F, row.names = F, dec = ".")
 #end if
 #for $plot_type in $dp_options.out_plots_opt.selected_vis_outputs
 legend.size = $dp_options.out_plots_opt.proc_plots_advanced.legend_size,
 dot.size.profile = $dp_options.out_plots_opt.proc_plots_advanced.dot_size_profile,
 dot.size.condition = $dp_options.out_plots_opt.proc_plots_advanced.dot_size_condition,
 width = $dp_options.out_plots_opt.width,
 height = $dp_options.out_plots_opt.height,
-#if $dp_options.out_plots_opt.which_Protein.select != 'list'
+#if $dp_options.out_plots_opt.which_Protein.select == 'list'
-which.Protein = "$dp_options.out_plots_opt.which_Protein.select",
+which.Protein = unlist(read.table("$dp_options.out_plots_opt.which_Protein.protein_list", sep = "\n", header = FALSE), use.names = FALSE),
+#elif $dp_options.out_plots_opt.which_Protein.select == 'allonly'
+	#if $plot_type == "QCPlot"
+which.Protein = "allonly",
+#else
+which.Protein = "all",
+#end if
 #else
-which.Protein = unlist(read.table("$dp_options.out_plots_opt.which_Protein.protein_list", sep = "\n", header = FALSE), use.names = FALSE),
+which.Protein = "all",
 #end if
 remove_uninformative_feature_outlier = $dp_options.out_plots_opt.proc_plots_advanced.remove_uninformative_feature_outlier,
 address="MSStats_only_")
 #end if
 #end for
 ## Quantifiaction
 #if 'quant_sample_matrix' in $dp_options.selected_outputs
-sampleQuantMatrix <- quantification(processed_data,  type="Sample")
+sampleQuantMatrix <- quantification(processed_data,  type="Sample", use_log_file = TRUE, append = TRUE, log_file_pat = "log.txt")
 write.table(sampleQuantMatrix, "SampleQuantificationMatrix.tsv", sep = "\t", quote = F, row.names = F, dec = ".")
 #end if
 #if 'quant_sample_long' in $dp_options.selected_outputs
-sampleQuantLong <- quantification(processed_data,  type="Sample", format="long")
+sampleQuantLong <- quantification(processed_data,  type="Sample", format="long", use_log_file = TRUE, append = TRUE,  log_file_pat = "log.txt")
 write.table(sampleQuantLong, "SampleQuantificationLong.tsv", sep = "\t", quote = F, row.names = F, dec = ".")
 #end if
 #if 'quant_group_matrix' in $dp_options.selected_outputs
-groupQuantMatrix <- quantification(processed_data,  type="Group")
+groupQuantMatrix <- quantification(processed_data,  type="Group", use_log_file = TRUE, append = TRUE,  log_file_pat = "log.txt")
 write.table(groupQuantMatrix, "GroupQuantificationMatrix.tsv", sep = "\t", quote = F, row.names = F, dec = ".")
 #end if
 #if 'quant_group_long' in $dp_options.selected_outputs
-groupQuantLong <- quantification(processed_data,  type="Group", format="long")
+groupQuantLong <- quantification(processed_data,  type="Group", format="long", use_log_file = TRUE, append = TRUE,  log_file_pat = "log.txt")
 write.table(groupQuantLong, "GroupQuantificationLong.tsv", sep = "\t", quote = F, row.names = F, dec = ".")
 #end if
 ## Group Comparison
 #if $group.group_comparison == 'yes'
 #end if
 ## first columns contains comparison names, use as row name
 comparison <- comp_matrix[,-1]
 row.names(comparison) <- as.character(comp_matrix[,1])
 ## order of conditions has to be the same as they appear in the levels function
-comparison <- as.matrix(comparison[levels(processed_data\$ProcessedData\$GROUP_ORIGINAL)])
+comparison <- as.matrix(comparison[levels(processed_data\$FeatureLevelData\$GROUP)])
 ## perform group comparison
-comparisons <- groupComparison(contrast.matrix = comparison, data = processed_data)
+comparisons <- groupComparison(contrast.matrix = comparison, data = processed_data, use_log_file = TRUE, append = TRUE, log_file_pat = "log.txt")
-print(comparisons\$fittedmodel)
+#if 'fittedmodel' in $group.select_outputs
-#if 'fittedmodel' in $group.select_outputs
+capture.output(print(comparisons\$FittedModel), file="ComparisonFittedModel.txt")
-capture.output(print(comparisons\$fittedmodel), file="ComparisonFittedModel.txt")
+#end if
-#end if
 #if 'comparison_result' in $group.select_outputs
 write.table(comparisons\$ComparisonResult, "ComparisonResult.tsv", sep = "\t", quote = F, row.names = F, dec = ".")
 #end if
 	modelBasedQCPlots(data = comparisons,
 				type = "$plot_type",
 				axis.size = $group.comparison_plots_opt.comparison_vis_options.axis_size,
 				dot.size = $group.comparison_plots_opt.comparison_vis_options.dot_size,
-				text.size = $group.comparison_plots_opt.comparison_vis_options.text_size,
-				legend.size = $group.comparison_plots_opt.comparison_vis_options.legend_size,
 				width = $group.comparison_plots_opt.width,
 				height = $group.comparison_plots_opt.height,
 				#if $group.comparison_plots_opt.which_Protein.select != 'list'
 		                which.Protein = "$group.comparison_plots_opt.which_Protein.select",
 		                #else
 <section name="dp_options" title="dataProcess Options" expanded="true">
 <param name="selected_outputs" type="select" display="checkboxes" multiple="true" label="Select outputs">
 		<option value="log" selected="true">MSstats log</option>
 		<option value="r_script" selected="false">MSstats Rscript</option>
 		<option value="raw_data" selected="true">MSstats RawData</option>
-		<option value="processed_data" selected="true">MSstats ProcessedData</option>
+		<option value="featurelevel_data" selected="true">MSstats FeatureLevelData</option>
-		<option value="runlevel_data" selected="false">MSstats RunlevelData</option>
+		<option value="proteinlevel_data" selected="false">MSstats ProteinLevelData</option>
 		<option value="quant_sample_matrix" selected="false">Sample Quantification Matrix Table</option>
 		<option value="quant_sample_long" selected="false">Sample Quantification Long Table</option>
 		<option value="quant_group_matrix" selected="true">Group Quantification Matrix Table</option>
 		<option value="quant_group_long" selected="false">Group Quantification Long Table</option>
 	    </param>
 <validator type="regex" message="double-quoted names separated by commas"><![CDATA[^".+"(,".+")*$]]></validator>
 </param>
 </when>
 <when value="FALSE"/>
 </conditional>
-<param name="fillIncompleteRows" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Fill Incomplete Rows" help="If the input dataset has incomplete rows, 'Yes' (default) adds the rows with intensity value=NA for missing peaks. 'No' reports error message with list of features which have incomplete rows"/>
 <conditional name="features">
 <param name="featureSubset" type="select" label="Feature Subset">
 <option value="all" selected="true">Use all features that the data set has</option>
 <option value="top3">Use the top 3 features which have highest average of log2(intensity) across runs</option>
 <option value="topN">Use the top N features which have highest average of log2(intensity) across runs</option>
 </conditional>
 <param name="censoredInt" type="select" label="Censored intensity">
 <help>The processing tools report missing values differently. This option is for distinguish which value should be considered as missing, and further whether it is censored or at random. Skyline and OpenSWATH input should use '0'. MaxQuant input should use 'NA'</help>
 <option value="NA" selected="true">NA - Assume that all 'NA's in 'Intensity' column are censored</option>
 <option value="0">0 - Use zero intensities '0' as censored intensity</option>
-<option value="NULL">NULL - Assume all NA intensites are randomly missing</option>
+<!--option value="NULL">NULL - Assume all NA intensites are randomly missing</option-->
-</param>
-<param name="cutoffCensored" type="select" label="Cutoff value for censoring">
-<option value="minFeature" selected="true">minimum value for each feature</option>
-<option value="minRun">minimum value for each run</option>
-<option value="minFeatureNRun">smallest between minimum value of corresponding feature and minimum value of corresponding run</option>
 </param>
 <param name="maxQuantileforCensored" type="float" optional="true" value="0.999" min="0" max="1.0" label="Maximum quantile for deciding censored missing values." help="If you don't want to apply the threshold of noise intensity in your data, remove the value (empty field)"/>
 <section name="out_plots_opt" title="DataProcess Plot Options" expanded="false">
 		</section>
 </when>
 </conditional>
 </inputs>
 <outputs>
-<data name="log" format="txt" label="${tool.name} on ${on_string}: MSstats log">
+<data name="log" format="txt" label="${tool.name} on ${on_string}: log" from_work_dir="log.txt">
-<filter>'log' in in dp_options['selected_outputs']</filter>
+<filter>'log' in dp_options['selected_outputs']</filter>
 </data>
 <data name="r_script" format="txt" label="${tool.name} on ${on_string}: Rscript">
 <filter>'r_script' in dp_options['selected_outputs']</filter>
 </data>
 <data name="raw_data" format="tabular" label="${tool.name} on ${on_string}: RawData" from_work_dir="raw.tsv">
 <filter>'raw_data' in dp_options['selected_outputs']</filter>
 </data>
-<data name="processed_data" format="tabular" label="${tool.name} on ${on_string}: ProcessedData" from_work_dir="ProcessedData.tsv">
+<data name="featurelevel_data" format="tabular" label="${tool.name} on ${on_string}: FeatureLevelData" from_work_dir="featurelevelData.tsv">
-<filter>'processed_data' in dp_options['selected_outputs']</filter>
+<filter>'featurelevel_data' in dp_options['selected_outputs']</filter>
 <!--actions>
 <action name="column_names" type="metadata" default="PROTEIN,PEPTIDE,TRANSITION,FEATURE,LABEL,GROUP_ORIGINAL,SUBJECT_ORIGINAL,RUN,GROUP,SUBJECT,INTENSITY,SUBJECT_NESTED,ABUNDANCE,FRACTION,originalRUN,censored" />
 </actions-->
 </data>
-<data name="runlevel_data" format="tabular" label="${tool.name} on ${on_string}: RunlevelData" from_work_dir="RunlevelData.tsv">
+<data name="proteinlevel_data" format="tabular" label="${tool.name} on ${on_string}: ProteinLevelData" from_work_dir="proteinlevelData.tsv">
-<filter>'runlevel_data' in dp_options['selected_outputs']</filter>
+<filter>'proteinlevel_data' in dp_options['selected_outputs']</filter>
 <!--actions>
 <action name="column_names" type="metadata" default="RUN,Protein,LogIntensities,NumMeasuredFeature,MissingPercentage,more50missing,NumImputedFeature,originalRUN,GROUP,GROUP_ORIGINAL,SUBJECT_ORIGINAL,SUBJECT_NESTED,SUBJECT" />
 </actions-->
 </data>
 <data name="QCPlot" format="pdf" label="${tool.name} on ${on_string}: QCPlot" from_work_dir="MSStats_only_QCPlot.pdf">
 <data name="ComparisonPlot" format="pdf" label="${tool.name} on ${on_string}: Comparison Plot" from_work_dir="MSStats_group_ComparisonPlot.pdf">
 <filter> group['group_comparison'] == 'yes' and group['comparison_plots_opt']['select_comparison_plots'] and 'ComparisonPlot' in group['comparison_plots_opt']['select_comparison_plots']</filter>
 </data>
 </outputs>
 <tests>
-<test>
+<test expect_num_outputs="6">
 <conditional name="input">
 <param name="input_src" value="MSstats"/>
 <param name="msstats_input" ftype="csv" value="msstats_testfile.txt"/>
 </conditional>
-<param name="selected_outputs" value="raw_data,processed_data,quant_sample_matrix,quant_group_long"/>
+<param name="selected_outputs" value="raw_data,featurelevel_data,quant_sample_matrix,quant_group_long"/>
 <param name="selected_vis_outputs" value="ProfilePlot,profile_wsum_plot"/>
-<output name="processed_data">
+<output name="featurelevel_data">
 <assert_contents>
-<has_text text="D.GPLTGTYR" />
+<has_text text="-.PHSHPALTPEQK_347_NA_347_NA" />
-<has_n_columns n="16" />
+<has_n_columns n="15" />
 <has_n_lines n="2071" />
 </assert_contents>
 </output>
 <output name="quant_sample_matrix">
 <assert_contents>
 <has_n_columns n="3" />
 <has_n_lines n="37" />
 </assert_contents>
 </output>
 <output name="ProfilePlot" file="MSstats ProfilePlot.pdf" compare="sim_size"/>
 <output name="profile_wsum_plot" file="profile_wsum_plot.pdf" compare="sim_size"/>
 </test>
-<test>
+<test expect_num_outputs="6">
 <conditional name="input">
 <param name="input_src" value="MSstats"/>
 <param name="msstats_input" ftype="tabular" value="msstats_testfile.tsv"/>
 </conditional>
 <conditional name="group">
 <param name="group_comparison" value="yes"/>
 <param name="comparison_matrix" ftype="csv" value="comparison_matrix.csv"/>
 </conditional>
 <param name="select_outputs" value="model_qc"/>
 <param name="select_comparison_plots" value="ResidualPlots"/>
-<output name="processed_data">
+<output name="featurelevel_data">
 <assert_contents>
 <has_text text="D.GPLTGTYR" />
-<has_n_columns n="16" />
+<has_n_columns n="15" />
 <has_n_lines n="2071" />
 </assert_contents>
 </output>
 <output name="model_qc">
 <assert_contents>
 <has_text text="MissingPercentage" />
-<has_n_columns n="15" />
+<has_n_columns n="13" />
 <has_n_lines n="108" />
 </assert_contents>
 </output>
 <output name="ResidualPlots" file="residual_plot.pdf" compare="sim_size"/>
 </test>
-<test>
+<test expect_num_outputs="5">
 <conditional name="input">
 <param name="input_src" value="MaxQuant"/>
 <param name="evidence" ftype="tabular" value="test_MQ_evidence.tabular"/>
 <param name="annotation" ftype="tabular" value="test_MQ_annotation.txt"/>
 <param name="proteinGroups" ftype="tabular" value="test_MQ_proteingroups.tabular"/>
 </conditional>
-<param name="selected_outputs" value="processed_data,runlevel_data"/>
+<param name="selected_outputs" value="featurelevel_data,proteinlevel_data"/>
 <param name="selected_vis_outputs" value="ConditionPlot"/>
 <conditional name="group">
 <param name="group_comparison" value="yes"/>
 <param name="comparison_matrix" ftype="csv" value="test_MQ_group12_comparison_matrix.csv"/>
 </conditional>
 <param name="select_outputs" value="comparison_result"/>
 <param name="select_comparison_plots" value="QQPlots"/>
-<output name="processed_data">
+<output name="featurelevel_data">
 <assert_contents>
 <has_text text="SPILVATAVAAR" />
-<has_n_columns n="16" />
+<has_n_columns n="15" />
 <has_n_lines n="61" />
 </assert_contents>
 </output>
-<output name="runlevel_data">
+<output name="proteinlevel_data">
 <assert_contents>
-<has_text text="qx017084.raw.thermo" />
+<has_text text="qx017084rawthermo" />
-<has_n_columns n="13" />
+<has_text text="sp|O75340|PDCD6_HUMANProgrammedcelldeathprotein6OS=HomosapiensOX=9606GN=PDCD6PE=1SV=1" />
+<has_n_columns n="11" />
 <has_n_lines n="13" />
 </assert_contents>
 </output>
 <output name="comparison_result">
 <assert_contents>
 </output>
 <output name="ConditionPlot" file="condition_plot.pdf" compare="sim_size"/>
 <output name="QQPlots" file="qq_plot.pdf" compare="sim_size"/>
 </test>
-<test>
+<test expect_num_outputs="5">
 <conditional name="input">
 <param name="input_src" value="OpenMS"/>
 <param name="openms_input" ftype="tabular" value="openms_input.tabular"/>
 </conditional>
-<param name="selected_outputs" value="processed_data,runlevel_data"/>
+<param name="selected_outputs" value="featurelevel_data,proteinlevel_data"/>
 <param name="selected_vis_outputs" value="ConditionPlot"/>
 <conditional name="group">
 <param name="group_comparison" value="yes"/>
 <param name="comparison_matrix" ftype="tabular" value="openms_comparisonmatrix.tabular"/>
 </conditional>
 <param name="select_comparison_plots" value="Heatmap"/>
-<output name="processed_data">
+<output name="featurelevel_data">
 <assert_contents>
 <has_text text="AAAPGIQLVAGEGFQSPLEDR_2_NA_0" />
 <has_text text="sp|P09938|RIR2_YEAST" />
-<has_n_columns n="16" />
+<has_n_columns n="15" />
 <has_n_lines n="121" />
 </assert_contents>
 </output>
-<output name="runlevel_data">
+<output name="proteinlevel_data">
 <assert_contents>
 <has_text text="sp|P09457|ATPO_YEAST" />
-<has_n_columns n="13" />
+<has_n_columns n="11" />
 <has_n_lines n="76" />
 </assert_contents>
 </output>
 <output name="ConditionPlot" file="condition_plot_openms.pdf" compare="sim_size"/>
 <output name="Heatmap" file="Heatmap_openms.pdf" compare="sim_size"/>
 </test>
-<test>
+<test expect_num_outputs="7">
 <conditional name="input">
 <param name="input_src" value="Skyline"/>
 <param name="skyline_input" ftype="csv" value="skyline_input_first100.csv"/>
 <param name="annotation" ftype="csv" value="skyline_annotations.csv"/>
 <param name="removeProtein_with1Peptide" value="TRUE"/>
 </conditional>
 <conditional name="summarize">
 <param name="MBimpute" value="FALSE"/>
-<param name="censoredInt" value="NULL"/>
+</conditional>
-</conditional>
+<param name="censoredInt" value="NA"/>
-<param name="selected_outputs" value="log,processed_data,quant_sample_long"/>
+<param name="selected_outputs" value="log,featurelevel_data,quant_sample_long"/>
 <param name="selected_vis_outputs" value="ProfilePlot"/>
-<param name="featureName" value="Peptide"/>
 <param name="width" value="10"/>
 <param name="height" value="7"/>
+<param name="featureName" value="Peptide"/>
 <conditional name="group">
 <param name="group_comparison" value="yes"/>
 <param name="comparison_matrix" ftype="tabular" value="comparison_matrix_skyline.tabular"/>
 </conditional>
 <section name="comparison_plots_opt">
 <has_n_lines n="6" />
 </assert_contents>
 </output>
 <output name="log">
 <assert_contents>
-<has_text text="ADVGFLC" />
+<has_text text="3-3" />
-<has_text text="1 level of Isotope type labeling in this experiment" />
+<has_text text="summaryforMultipleRows: sum" />
-<has_text text="The required input : provided - okay" />
+<has_text text="Shared peptides are removed" />
 </assert_contents>
 </output>
-<output name="processed_data">
+<output name="featurelevel_data">
 <assert_contents>
-<has_text text="ADVGFLC[+57]NMLER_2_sum_NA" />
+<has_text text="ADVGFLC[+57]NMLER_2" />
 <has_text text="319070944" />
-<has_n_columns n="15" />
+<has_n_columns n="14" />
 <has_n_lines n="46" />
 </assert_contents>
 </output>
 <output name="comparison_result">
 <assert_contents>
 <has_text text="c1-c4" />
 <has_text text="log2FC" />
+<has_n_columns n="11" />
 <has_n_lines n="4" />
 </assert_contents>
 </output>
 <output name="ProfilePlot" file="Profile_plot_skyline.pdf" compare="sim_size"/>
 <output name="VolcanoPlot" file="Volcano_plot_skyline.pdf" compare="sim_size"/>
 <output name="ComparisonPlot" file="Comparison_plot_skyline.pdf" compare="sim_size"/>
 </test>
-<test>
+<test expect_num_outputs="3">
+<conditional name="input">
+<param name="input_src" value="Skyline"/>
+<param name="skyline_input" ftype="csv" value="skyline_input_first100.csv"/>
+<param name="annotation" ftype="csv" value="skyline_annotations.csv"/>
+<param name="removeProtein_with1Peptide" value="TRUE"/>
+</conditional>
+<conditional name="summarize">
+<param name="MBimpute" value="TRUE"/>
+<param name="featureSubset" value="highQuality"/>
+<param name="remove_uninformative_feature_outlier" value="TRUE"/>
+</conditional>
+<param name="censoredInt" value="0"/>
+<param name="selected_outputs" value="log,featurelevel_data,quant_sample_matrix"/>
+<output name="quant_sample_matrix">
+<assert_contents>
+<has_text text="P32125" />
+<has_text text="Condition5_5" />
+<has_n_columns n="6" />
+<has_n_lines n="2" />
+</assert_contents>
+</output>
+<output name="log">
+<assert_contents>
+<has_text text="3-3" />
+<has_text text="summaryforMultipleRows: sum" />
+<has_text text="Shared peptides are removed" />
+</assert_contents>
+</output>
+<output name="featurelevel_data">
+<assert_contents>
+<has_text text="AFAEAMANNSFNADEK_2" />
+<has_text text="114949068" />
+<has_n_columns n="15" />
+<has_n_lines n="46" />
+</assert_contents>
+</output>
+</test>
+<test expect_num_outputs="5">
 <conditional name="input">
 <param name="input_src" value="OpenSWATH"/>
 <param name="openswath_input" ftype="tabular" value="test_swath_input_data.tabular"/>
 <param name="annotation" ftype="tabular" value="test_swath_annotations.tabular"/>
 </conditional>
 <param name="selected_vis_outputs" value="QCPlot"/>
-<output name="processed_data">
+<output name="featurelevel_data">
 <assert_contents>
 <has_text text="GETLGLIGFGR" />
-<has_n_columns n="16" />
+<has_n_columns n="15" />
 <has_n_lines n="253" />
 </assert_contents>
 </output>
 <output name="QCPlot" file="QC_plot.pdf" compare="sim_size"/>
 </test>
-<test>
+<test expect_num_outputs="6">
 <conditional name="input">
 <param name="input_src" value="OpenSWATH"/>
 <param name="openswath_input" ftype="tabular" value="test_swath_input_data.tabular"/>
 <param name="annotation" ftype="tabular" value="test_swath_annotations.tabular"/>
 </conditional>
-<param name="selected_outputs" value="r_script,processed_data,quant_sample_long"/>
+<param name="selected_outputs" value="r_script,featurelevel_data,quant_sample_long"/>
 <conditional name="group">
 <param name="group_comparison" value="yes"/>
 <param name="comparison_matrix" ftype="csv" value="test_swath_group12_comparison_matrix.csv"/>
 </conditional>
 <param name="select_outputs" value="comparison_result"/>
 <param name="select_comparison_plots" value="VolcanoPlot,ResidualPlots"/>
-<output name="processed_data">
+<output name="featurelevel_data">
 <assert_contents>
 <has_text text="GETLGLIGFGR" />
-<has_n_columns n="16" />
+<has_n_columns n="15" />
 <has_n_lines n="253" />
 </assert_contents>
 </output>
 <output name="quant_sample_long">
 <assert_contents>
 	- Account for heterogeneous variation among intensities from different features: Yes: assumes equal variance among intensities from features. No: means that we cannot assume equal variance among intensities from features, then we will account for heterogeneous variation from different features
 - Missing value imputation:
-- Impute Missing Values: Only possible for Summarization Method TMP. Censored missing values will be determined (by censored intensity; cutoff value for censoring and Maximum quantile for deciding censored missing values") and imputed by Accelerated Failure Time model.
+- Impute Missing Values: Only possible for Summarization Method TMP. Censored missing values will be determined and imputed by Accelerated Failure Time model.
 - Remove runs which have more than 50% missing values: Yes or no.
 - Censored Intensity: The processing tools report missing values differently. This option is for distinguishwhich value should be considered as missing, and further whether it is censored or at random
 - NA - It assumes that all NAs in Intensity column are censored.
 - 0 - It assumes that all values between 0 and 1 in Intensity column are censored. If there areNAs inIntensitywith this option, NAs will be considered as random missing.
-- NULL - It assumes that all missing values are randomly missing.
 - Skyline and OpenSWATH input should use '0'. MaxQuant input should use 'NA'
-- Cutoff value for censoring: cutoff for AFT model; only with censored intensity 'NA' or '0'; if NULL it assumes that there is no censored missing and any imputation will not be performed. In case that there are completely missing measurements in a run for a protein, any imputation will not be performed. In addition, the condition, which has no measurement at all in a protein, will be not impute.
-- minimum value for each feature: cutoff for AFT model will be the minimum value for each feature across runs. With this option, those runs with substantial missing measurements will be biased by the cutoff value. In such case, you may remove the runs that have more than 50% missing values from the analysis.
-- minimum value for each run: cutoff for AFT model will be the minimum value for each run across features
-- smallest between minimum value of corresponding feature and minimum value of corresponding run: cutoff for AFT model will be the smallest value between minimum valueof corresponding feature and minimum value of corresponding run
-- Maximum quantile for deciding censored missing values: If you don’t want to apply the threshold of noise intensity in your data, you can use maxQuantileforCensored=NULL.
-- Missing value imputation combination with summarization method TMP:
-- Summarization method: TMP + censored intensity: 'NULL': It assumes that all intensities are missing at random, therefore no action with missing value imputation: No; or error with missing value imputation: Yes.
-- Missing value imputation: Yes + censored intensity:'NA' or '0': AFT model-based imputation using cutoff value for censoring in the AFT model
-- Missing value imputation: No + censored intensity:'NA' or '0': censored intensities will be replaced with the value specified  in cutoff value for censoring
-- Missing value imputation: No + censored intensity: NULL: no imputation
 - Group comparison: automatic detection of differentially abundant proteins between two conditions, conditions have to be specified with the 'comparison matrix'
 - Quantification per sample or group: choose the corresponding output option
 - Sample: relative protein abundance in each biological replicate. If there are technical replicates for biological replicates,sample quantification will be the median among technical replicates. If there is no technical replicate for biological replicate (sample), sample quantification will be the same as run-level summarization.
 - Different outputs available. Especially for studies with many proteins, it is suggested to select only the necessary pdf outputs as many of them generate one plot per protein.
 - MSstats log - check log file for warnings and information on the analysis steps (txt)
 - MSstats Rscript - can be used to re-run analysis outside Galaxy or to inspect the executed code (txt)
 - MSstats RawData - raw files combined into MSstats format (tabular)
-- MSstats ProcessedData - transformed, normalized, imputed intensities (tabular)
+- MSstats FeatureLevelData - transformed, normalized, imputed intensities (tabular)
 - Intensity column:  includes original intensities values
 - Abundance column:  contains the log2 transformed and normalized intensities and it will used for run-level summarization
 - Censored column:  has the decision about censored missing or not, based on censored Intensity and maximum quantile for deciding censored missing values options. Abundances with TRUE value in censored column will be considered as censored missing and imputed when Missing value imputation: Yes.
-- MSstats RunlevelData - run and protein level summarized data (tabular)
+- MSstats ProteinLevelData - run and protein level summarized data (tabular)
 - LogIntensities: log intensity summarized per run and protein, they will be used for the group comparison and summarized profile plot
 - NumMeasuredFeature: shows how many features were used for summarization of the corresponding run and protein
 - MissingPercentage: percentage of random and censoredmissing in the corresponding run and protein out of the total number of feature in the corresponding protein.
 - more50missing: whether MissingPercentage is greater than 50% or not

Mercurial > repos > galaxyp > msstats

comparison msstats.xml @ 5:28434abe6c5c draft