Mercurial > repos > galaxyp > msstats
changeset 5:28434abe6c5c draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msstats commit 0e253b8114e2fe6f4e33edcc5f1a4602073064c3"
author | galaxyp |
---|---|
date | Fri, 06 Aug 2021 20:06:19 +0000 |
parents | 593839e1f2c3 |
children | b7034eff0db1 |
files | msstats.xml test-data/Comparison_plot_skyline.pdf test-data/MSstats ProfilePlot.pdf test-data/Profile_plot_skyline.pdf test-data/Volcano_plot_skyline.pdf test-data/featurelevel_data_skyline.tabular test-data/profile_wsum_plot.pdf |
diffstat | 7 files changed, 188 insertions(+), 103 deletions(-) [+] |
line wrap: on
line diff
--- a/msstats.xml Thu Feb 25 08:41:37 2021 +0000 +++ b/msstats.xml Fri Aug 06 20:06:19 2021 +0000 @@ -1,7 +1,7 @@ -<tool id="msstats" name="MSstats" version="@VERSION@.1"> +<tool id="msstats" name="MSstats" version="@VERSION@.0"> <description>statistical relative protein significance analysis in DDA, SRM and DIA Mass Spectrometry</description> <macros> - <token name="@VERSION@">3.22.0</token> + <token name="@VERSION@">4.0.0</token> <xml name="useUniquePeptide"> <param name="useUniquePeptide" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Remove peptides that are assigned for more than one proteins"/> </xml> @@ -28,7 +28,6 @@ <command detect_errors="exit_code"><![CDATA[ cat '$msstats_script' > '$r_script' && Rscript '$msstats_script' - && cat msstats*.log > '$log' ]]></command> <configfiles> <configfile name="msstats_script"><![CDATA[ @@ -66,7 +65,10 @@ fewMeasurements="$input.input_options.fewMeasurements", removeMpeptides=$input.input_options.removeMpeptides, removeOxidationMpeptides=$input.input_options.removeOxidationMpeptides, - removeProtein_with1Peptide=$input.input_options.removeProtein_with1Peptide) + removeProtein_with1Peptide=$input.input_options.removeProtein_with1Peptide, + use_log_file = TRUE, + append = TRUE, + log_file_pat = "log.txt") #elif $input.input_src == 'OpenMS' @@ -91,7 +93,10 @@ useUniquePeptide=$input.input_options.useUniquePeptide, summaryforMultipleRows=$input.input_options.summaryforMultipleRows, fewMeasurements="$input.input_options.fewMeasurements", - removeProtein_with1Feature=$input.input_options.removeProtein_with1Feature) + removeProtein_with1Feature=$input.input_options.removeProtein_with1Feature, + use_log_file = TRUE, + append = TRUE, + log_file_pat = "log.txt") #elif $input.input_src == 'OpenSWATH' @@ -114,8 +119,11 @@ useUniquePeptide=$input.input_options.useUniquePeptide, fewMeasurements="$input.input_options.fewMeasurements", removeProtein_with1Feature=$input.input_options.removeProtein_with1Feature, - summaryforMultipleRows=$input.input_options.summaryforMultipleRows) - + summaryforMultipleRows=$input.input_options.summaryforMultipleRows, + use_log_file = TRUE, + append = TRUE, + log_file_pat = "log.txt") + #elif $input.input_src == 'Skyline' #if $input.skyline_input.is_of_type('csv') @@ -142,7 +150,10 @@ useUniquePeptide = $input.input_options.useUniquePeptide, fewMeasurements="$input.input_options.fewMeasurements", removeOxidationMpeptides = $input.input_options.removeOxidationMpeptides, - removeProtein_with1Feature = $input.input_options.removeProtein_with1Feature) + removeProtein_with1Feature = $input.input_options.removeProtein_with1Feature, + use_log_file = TRUE, + append = TRUE, + log_file_pat = "log.txt") #end if @@ -152,7 +163,6 @@ #if $dp_options.norm.normalization == 'globalStandards' nameStandards=c($dp_options.norm.nameStandards), #end if - fillIncompleteRows=$dp_options.fillIncompleteRows, featureSubset="$dp_options.features.featureSubset", #if $dp_options.features.featureSubset == 'topN' n_top_feature=$dp_options.features.n_top_feature, @@ -173,23 +183,26 @@ #else censoredInt="$dp_options.censoredInt", #end if - cutoffCensored="$dp_options.cutoffCensored", #if $dp_options.maxQuantileforCensored == '' - maxQuantileforCensored = NULL) + maxQuantileforCensored = NULL, #else - maxQuantileforCensored = $dp_options.maxQuantileforCensored) + maxQuantileforCensored = $dp_options.maxQuantileforCensored, #end if + use_log_file = TRUE, + append = TRUE, + log_file_pat = "log.txt") + #if 'raw_data' in $dp_options.selected_outputs write.table(raw, "raw.tsv", sep = "\t", quote = F, row.names = F, dec = ".") #end if -#if 'processed_data' in $dp_options.selected_outputs -write.table(processed_data\$ProcessedData, "ProcessedData.tsv", sep = "\t", quote = F, row.names = F, dec = ".") +#if 'featurelevel_data' in $dp_options.selected_outputs +write.table(processed_data\$FeatureLevelData, "featurelevelData.tsv", sep = "\t", quote = F, row.names = F, dec = ".") #end if -#if 'runlevel_data' in $dp_options.selected_outputs -write.table(processed_data\$RunlevelData, "RunlevelData.tsv", sep = "\t", quote = F, row.names = F, dec = ".") +#if 'proteinlevel_data' in $dp_options.selected_outputs +write.table(processed_data\$ProteinLevelData, "proteinlevelData.tsv", sep = "\t", quote = F, row.names = F, dec = ".") #end if #for $plot_type in $dp_options.out_plots_opt.selected_vis_outputs @@ -217,35 +230,40 @@ dot.size.condition = $dp_options.out_plots_opt.proc_plots_advanced.dot_size_condition, width = $dp_options.out_plots_opt.width, height = $dp_options.out_plots_opt.height, - #if $dp_options.out_plots_opt.which_Protein.select != 'list' - which.Protein = "$dp_options.out_plots_opt.which_Protein.select", + #if $dp_options.out_plots_opt.which_Protein.select == 'list' + which.Protein = unlist(read.table("$dp_options.out_plots_opt.which_Protein.protein_list", sep = "\n", header = FALSE), use.names = FALSE), + #elif $dp_options.out_plots_opt.which_Protein.select == 'allonly' + #if $plot_type == "QCPlot" + which.Protein = "allonly", + #else + which.Protein = "all", + #end if #else - which.Protein = unlist(read.table("$dp_options.out_plots_opt.which_Protein.protein_list", sep = "\n", header = FALSE), use.names = FALSE), + which.Protein = "all", #end if remove_uninformative_feature_outlier = $dp_options.out_plots_opt.proc_plots_advanced.remove_uninformative_feature_outlier, address="MSStats_only_") - #end if #end for ## Quantifiaction #if 'quant_sample_matrix' in $dp_options.selected_outputs -sampleQuantMatrix <- quantification(processed_data, type="Sample") +sampleQuantMatrix <- quantification(processed_data, type="Sample", use_log_file = TRUE, append = TRUE, log_file_pat = "log.txt") write.table(sampleQuantMatrix, "SampleQuantificationMatrix.tsv", sep = "\t", quote = F, row.names = F, dec = ".") #end if #if 'quant_sample_long' in $dp_options.selected_outputs -sampleQuantLong <- quantification(processed_data, type="Sample", format="long") +sampleQuantLong <- quantification(processed_data, type="Sample", format="long", use_log_file = TRUE, append = TRUE, log_file_pat = "log.txt") write.table(sampleQuantLong, "SampleQuantificationLong.tsv", sep = "\t", quote = F, row.names = F, dec = ".") #end if #if 'quant_group_matrix' in $dp_options.selected_outputs -groupQuantMatrix <- quantification(processed_data, type="Group") +groupQuantMatrix <- quantification(processed_data, type="Group", use_log_file = TRUE, append = TRUE, log_file_pat = "log.txt") write.table(groupQuantMatrix, "GroupQuantificationMatrix.tsv", sep = "\t", quote = F, row.names = F, dec = ".") #end if #if 'quant_group_long' in $dp_options.selected_outputs -groupQuantLong <- quantification(processed_data, type="Group", format="long") +groupQuantLong <- quantification(processed_data, type="Group", format="long", use_log_file = TRUE, append = TRUE, log_file_pat = "log.txt") write.table(groupQuantLong, "GroupQuantificationLong.tsv", sep = "\t", quote = F, row.names = F, dec = ".") #end if @@ -261,16 +279,16 @@ ## first columns contains comparison names, use as row name comparison <- comp_matrix[,-1] row.names(comparison) <- as.character(comp_matrix[,1]) + ## order of conditions has to be the same as they appear in the levels function -comparison <- as.matrix(comparison[levels(processed_data\$ProcessedData\$GROUP_ORIGINAL)]) +comparison <- as.matrix(comparison[levels(processed_data\$FeatureLevelData\$GROUP)]) ## perform group comparison -comparisons <- groupComparison(contrast.matrix = comparison, data = processed_data) +comparisons <- groupComparison(contrast.matrix = comparison, data = processed_data, use_log_file = TRUE, append = TRUE, log_file_pat = "log.txt") -print(comparisons\$fittedmodel) - #if 'fittedmodel' in $group.select_outputs -capture.output(print(comparisons\$fittedmodel), file="ComparisonFittedModel.txt") - #end if +#if 'fittedmodel' in $group.select_outputs + capture.output(print(comparisons\$FittedModel), file="ComparisonFittedModel.txt") +#end if #if 'comparison_result' in $group.select_outputs @@ -292,8 +310,6 @@ type = "$plot_type", axis.size = $group.comparison_plots_opt.comparison_vis_options.axis_size, dot.size = $group.comparison_plots_opt.comparison_vis_options.dot_size, - text.size = $group.comparison_plots_opt.comparison_vis_options.text_size, - legend.size = $group.comparison_plots_opt.comparison_vis_options.legend_size, width = $group.comparison_plots_opt.width, height = $group.comparison_plots_opt.height, #if $group.comparison_plots_opt.which_Protein.select != 'list' @@ -424,8 +440,8 @@ <option value="log" selected="true">MSstats log</option> <option value="r_script" selected="false">MSstats Rscript</option> <option value="raw_data" selected="true">MSstats RawData</option> - <option value="processed_data" selected="true">MSstats ProcessedData</option> - <option value="runlevel_data" selected="false">MSstats RunlevelData</option> + <option value="featurelevel_data" selected="true">MSstats FeatureLevelData</option> + <option value="proteinlevel_data" selected="false">MSstats ProteinLevelData</option> <option value="quant_sample_matrix" selected="false">Sample Quantification Matrix Table</option> <option value="quant_sample_long" selected="false">Sample Quantification Long Table</option> <option value="quant_group_matrix" selected="true">Group Quantification Matrix Table</option> @@ -452,7 +468,6 @@ </when> <when value="FALSE"/> </conditional> - <param name="fillIncompleteRows" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Fill Incomplete Rows" help="If the input dataset has incomplete rows, 'Yes' (default) adds the rows with intensity value=NA for missing peaks. 'No' reports error message with list of features which have incomplete rows"/> <conditional name="features"> <param name="featureSubset" type="select" label="Feature Subset"> <option value="all" selected="true">Use all features that the data set has</option> @@ -486,12 +501,7 @@ <help>The processing tools report missing values differently. This option is for distinguish which value should be considered as missing, and further whether it is censored or at random. Skyline and OpenSWATH input should use '0'. MaxQuant input should use 'NA'</help> <option value="NA" selected="true">NA - Assume that all 'NA's in 'Intensity' column are censored</option> <option value="0">0 - Use zero intensities '0' as censored intensity</option> - <option value="NULL">NULL - Assume all NA intensites are randomly missing</option> - </param> - <param name="cutoffCensored" type="select" label="Cutoff value for censoring"> - <option value="minFeature" selected="true">minimum value for each feature</option> - <option value="minRun">minimum value for each run</option> - <option value="minFeatureNRun">smallest between minimum value of corresponding feature and minimum value of corresponding run</option> + <!--option value="NULL">NULL - Assume all NA intensites are randomly missing</option--> </param> <param name="maxQuantileforCensored" type="float" optional="true" value="0.999" min="0" max="1.0" label="Maximum quantile for deciding censored missing values." help="If you don't want to apply the threshold of noise intensity in your data, remove the value (empty field)"/> @@ -623,8 +633,8 @@ </conditional> </inputs> <outputs> - <data name="log" format="txt" label="${tool.name} on ${on_string}: MSstats log"> - <filter>'log' in in dp_options['selected_outputs']</filter> + <data name="log" format="txt" label="${tool.name} on ${on_string}: log" from_work_dir="log.txt"> + <filter>'log' in dp_options['selected_outputs']</filter> </data> <data name="r_script" format="txt" label="${tool.name} on ${on_string}: Rscript"> <filter>'r_script' in dp_options['selected_outputs']</filter> @@ -632,14 +642,14 @@ <data name="raw_data" format="tabular" label="${tool.name} on ${on_string}: RawData" from_work_dir="raw.tsv"> <filter>'raw_data' in dp_options['selected_outputs']</filter> </data> - <data name="processed_data" format="tabular" label="${tool.name} on ${on_string}: ProcessedData" from_work_dir="ProcessedData.tsv"> - <filter>'processed_data' in dp_options['selected_outputs']</filter> + <data name="featurelevel_data" format="tabular" label="${tool.name} on ${on_string}: FeatureLevelData" from_work_dir="featurelevelData.tsv"> + <filter>'featurelevel_data' in dp_options['selected_outputs']</filter> <!--actions> <action name="column_names" type="metadata" default="PROTEIN,PEPTIDE,TRANSITION,FEATURE,LABEL,GROUP_ORIGINAL,SUBJECT_ORIGINAL,RUN,GROUP,SUBJECT,INTENSITY,SUBJECT_NESTED,ABUNDANCE,FRACTION,originalRUN,censored" /> </actions--> </data> - <data name="runlevel_data" format="tabular" label="${tool.name} on ${on_string}: RunlevelData" from_work_dir="RunlevelData.tsv"> - <filter>'runlevel_data' in dp_options['selected_outputs']</filter> + <data name="proteinlevel_data" format="tabular" label="${tool.name} on ${on_string}: ProteinLevelData" from_work_dir="proteinlevelData.tsv"> + <filter>'proteinlevel_data' in dp_options['selected_outputs']</filter> <!--actions> <action name="column_names" type="metadata" default="RUN,Protein,LogIntensities,NumMeasuredFeature,MissingPercentage,more50missing,NumImputedFeature,originalRUN,GROUP,GROUP_ORIGINAL,SUBJECT_ORIGINAL,SUBJECT_NESTED,SUBJECT" /> </actions--> @@ -706,17 +716,17 @@ </data> </outputs> <tests> - <test> + <test expect_num_outputs="6"> <conditional name="input"> <param name="input_src" value="MSstats"/> <param name="msstats_input" ftype="csv" value="msstats_testfile.txt"/> </conditional> - <param name="selected_outputs" value="raw_data,processed_data,quant_sample_matrix,quant_group_long"/> + <param name="selected_outputs" value="raw_data,featurelevel_data,quant_sample_matrix,quant_group_long"/> <param name="selected_vis_outputs" value="ProfilePlot,profile_wsum_plot"/> - <output name="processed_data"> + <output name="featurelevel_data"> <assert_contents> - <has_text text="D.GPLTGTYR" /> - <has_n_columns n="16" /> + <has_text text="-.PHSHPALTPEQK_347_NA_347_NA" /> + <has_n_columns n="15" /> <has_n_lines n="2071" /> </assert_contents> </output> @@ -735,10 +745,10 @@ </assert_contents> </output> <output name="ProfilePlot" file="MSstats ProfilePlot.pdf" compare="sim_size"/> - <output name="profile_wsum_plot" file="profile_wsum_plot.pdf" compare="sim_size"/> + <output name="profile_wsum_plot" file="profile_wsum_plot.pdf" compare="sim_size"/> </test> - <test> + <test expect_num_outputs="6"> <conditional name="input"> <param name="input_src" value="MSstats"/> <param name="msstats_input" ftype="tabular" value="msstats_testfile.tsv"/> @@ -749,31 +759,31 @@ </conditional> <param name="select_outputs" value="model_qc"/> <param name="select_comparison_plots" value="ResidualPlots"/> - <output name="processed_data"> + <output name="featurelevel_data"> <assert_contents> <has_text text="D.GPLTGTYR" /> - <has_n_columns n="16" /> + <has_n_columns n="15" /> <has_n_lines n="2071" /> </assert_contents> </output> <output name="model_qc"> <assert_contents> <has_text text="MissingPercentage" /> - <has_n_columns n="15" /> + <has_n_columns n="13" /> <has_n_lines n="108" /> </assert_contents> </output> <output name="ResidualPlots" file="residual_plot.pdf" compare="sim_size"/> </test> - <test> + <test expect_num_outputs="5"> <conditional name="input"> <param name="input_src" value="MaxQuant"/> <param name="evidence" ftype="tabular" value="test_MQ_evidence.tabular"/> <param name="annotation" ftype="tabular" value="test_MQ_annotation.txt"/> <param name="proteinGroups" ftype="tabular" value="test_MQ_proteingroups.tabular"/> </conditional> - <param name="selected_outputs" value="processed_data,runlevel_data"/> + <param name="selected_outputs" value="featurelevel_data,proteinlevel_data"/> <param name="selected_vis_outputs" value="ConditionPlot"/> <conditional name="group"> <param name="group_comparison" value="yes"/> @@ -781,17 +791,18 @@ </conditional> <param name="select_outputs" value="comparison_result"/> <param name="select_comparison_plots" value="QQPlots"/> - <output name="processed_data"> + <output name="featurelevel_data"> <assert_contents> <has_text text="SPILVATAVAAR" /> - <has_n_columns n="16" /> + <has_n_columns n="15" /> <has_n_lines n="61" /> </assert_contents> </output> - <output name="runlevel_data"> + <output name="proteinlevel_data"> <assert_contents> - <has_text text="qx017084.raw.thermo" /> - <has_n_columns n="13" /> + <has_text text="qx017084rawthermo" /> + <has_text text="sp|O75340|PDCD6_HUMANProgrammedcelldeathprotein6OS=HomosapiensOX=9606GN=PDCD6PE=1SV=1" /> + <has_n_columns n="11" /> <has_n_lines n="13" /> </assert_contents> </output> @@ -806,37 +817,38 @@ <output name="QQPlots" file="qq_plot.pdf" compare="sim_size"/> </test> - <test> + <test expect_num_outputs="5"> <conditional name="input"> <param name="input_src" value="OpenMS"/> <param name="openms_input" ftype="tabular" value="openms_input.tabular"/> </conditional> - <param name="selected_outputs" value="processed_data,runlevel_data"/> + <param name="selected_outputs" value="featurelevel_data,proteinlevel_data"/> <param name="selected_vis_outputs" value="ConditionPlot"/> <conditional name="group"> <param name="group_comparison" value="yes"/> <param name="comparison_matrix" ftype="tabular" value="openms_comparisonmatrix.tabular"/> </conditional> <param name="select_comparison_plots" value="Heatmap"/> - <output name="processed_data"> + <output name="featurelevel_data"> <assert_contents> <has_text text="AAAPGIQLVAGEGFQSPLEDR_2_NA_0" /> <has_text text="sp|P09938|RIR2_YEAST" /> - <has_n_columns n="16" /> + <has_n_columns n="15" /> <has_n_lines n="121" /> </assert_contents> </output> - <output name="runlevel_data"> + <output name="proteinlevel_data"> <assert_contents> <has_text text="sp|P09457|ATPO_YEAST" /> - <has_n_columns n="13" /> + <has_n_columns n="11" /> <has_n_lines n="76" /> </assert_contents> </output> <output name="ConditionPlot" file="condition_plot_openms.pdf" compare="sim_size"/> <output name="Heatmap" file="Heatmap_openms.pdf" compare="sim_size"/> </test> - <test> + + <test expect_num_outputs="7"> <conditional name="input"> <param name="input_src" value="Skyline"/> <param name="skyline_input" ftype="csv" value="skyline_input_first100.csv"/> @@ -845,13 +857,13 @@ </conditional> <conditional name="summarize"> <param name="MBimpute" value="FALSE"/> - <param name="censoredInt" value="NULL"/> </conditional> - <param name="selected_outputs" value="log,processed_data,quant_sample_long"/> + <param name="censoredInt" value="NA"/> + <param name="selected_outputs" value="log,featurelevel_data,quant_sample_long"/> <param name="selected_vis_outputs" value="ProfilePlot"/> - <param name="featureName" value="Peptide"/> <param name="width" value="10"/> <param name="height" value="7"/> + <param name="featureName" value="Peptide"/> <conditional name="group"> <param name="group_comparison" value="yes"/> <param name="comparison_matrix" ftype="tabular" value="comparison_matrix_skyline.tabular"/> @@ -877,16 +889,16 @@ </output> <output name="log"> <assert_contents> - <has_text text="ADVGFLC" /> - <has_text text="1 level of Isotope type labeling in this experiment" /> - <has_text text="The required input : provided - okay" /> + <has_text text="3-3" /> + <has_text text="summaryforMultipleRows: sum" /> + <has_text text="Shared peptides are removed" /> </assert_contents> </output> - <output name="processed_data"> + <output name="featurelevel_data"> <assert_contents> - <has_text text="ADVGFLC[+57]NMLER_2_sum_NA" /> + <has_text text="ADVGFLC[+57]NMLER_2" /> <has_text text="319070944" /> - <has_n_columns n="15" /> + <has_n_columns n="14" /> <has_n_lines n="46" /> </assert_contents> </output> @@ -894,6 +906,7 @@ <assert_contents> <has_text text="c1-c4" /> <has_text text="log2FC" /> + <has_n_columns n="11" /> <has_n_lines n="4" /> </assert_contents> </output> @@ -901,41 +914,80 @@ <output name="VolcanoPlot" file="Volcano_plot_skyline.pdf" compare="sim_size"/> <output name="ComparisonPlot" file="Comparison_plot_skyline.pdf" compare="sim_size"/> </test> + + <test expect_num_outputs="3"> + <conditional name="input"> + <param name="input_src" value="Skyline"/> + <param name="skyline_input" ftype="csv" value="skyline_input_first100.csv"/> + <param name="annotation" ftype="csv" value="skyline_annotations.csv"/> + <param name="removeProtein_with1Peptide" value="TRUE"/> + </conditional> + <conditional name="summarize"> + <param name="MBimpute" value="TRUE"/> + <param name="featureSubset" value="highQuality"/> + <param name="remove_uninformative_feature_outlier" value="TRUE"/> + </conditional> + <param name="censoredInt" value="0"/> + <param name="selected_outputs" value="log,featurelevel_data,quant_sample_matrix"/> + <output name="quant_sample_matrix"> + <assert_contents> + <has_text text="P32125" /> + <has_text text="Condition5_5" /> + <has_n_columns n="6" /> + <has_n_lines n="2" /> + </assert_contents> + </output> + <output name="log"> + <assert_contents> + <has_text text="3-3" /> + <has_text text="summaryforMultipleRows: sum" /> + <has_text text="Shared peptides are removed" /> + </assert_contents> + </output> + <output name="featurelevel_data"> + <assert_contents> + <has_text text="AFAEAMANNSFNADEK_2" /> + <has_text text="114949068" /> + <has_n_columns n="15" /> + <has_n_lines n="46" /> + </assert_contents> + </output> + </test> - <test> + <test expect_num_outputs="5"> <conditional name="input"> <param name="input_src" value="OpenSWATH"/> <param name="openswath_input" ftype="tabular" value="test_swath_input_data.tabular"/> <param name="annotation" ftype="tabular" value="test_swath_annotations.tabular"/> </conditional> <param name="selected_vis_outputs" value="QCPlot"/> - <output name="processed_data"> + <output name="featurelevel_data"> <assert_contents> <has_text text="GETLGLIGFGR" /> - <has_n_columns n="16" /> + <has_n_columns n="15" /> <has_n_lines n="253" /> </assert_contents> </output> <output name="QCPlot" file="QC_plot.pdf" compare="sim_size"/> </test> - <test> + <test expect_num_outputs="6"> <conditional name="input"> <param name="input_src" value="OpenSWATH"/> <param name="openswath_input" ftype="tabular" value="test_swath_input_data.tabular"/> <param name="annotation" ftype="tabular" value="test_swath_annotations.tabular"/> </conditional> - <param name="selected_outputs" value="r_script,processed_data,quant_sample_long"/> + <param name="selected_outputs" value="r_script,featurelevel_data,quant_sample_long"/> <conditional name="group"> <param name="group_comparison" value="yes"/> <param name="comparison_matrix" ftype="csv" value="test_swath_group12_comparison_matrix.csv"/> </conditional> <param name="select_outputs" value="comparison_result"/> <param name="select_comparison_plots" value="VolcanoPlot,ResidualPlots"/> - <output name="processed_data"> + <output name="featurelevel_data"> <assert_contents> <has_text text="GETLGLIGFGR" /> - <has_n_columns n="16" /> + <has_n_columns n="15" /> <has_n_lines n="253" /> </assert_contents> </output> @@ -1079,27 +1131,14 @@ - Missing value imputation: - - Impute Missing Values: Only possible for Summarization Method TMP. Censored missing values will be determined (by censored intensity; cutoff value for censoring and Maximum quantile for deciding censored missing values") and imputed by Accelerated Failure Time model. + - Impute Missing Values: Only possible for Summarization Method TMP. Censored missing values will be determined and imputed by Accelerated Failure Time model. - Remove runs which have more than 50% missing values: Yes or no. - Censored Intensity: The processing tools report missing values differently. This option is for distinguishwhich value should be considered as missing, and further whether it is censored or at random - NA - It assumes that all NAs in Intensity column are censored. - 0 - It assumes that all values between 0 and 1 in Intensity column are censored. If there areNAs inIntensitywith this option, NAs will be considered as random missing. - - NULL - It assumes that all missing values are randomly missing. - Skyline and OpenSWATH input should use '0'. MaxQuant input should use 'NA' - - Cutoff value for censoring: cutoff for AFT model; only with censored intensity 'NA' or '0'; if NULL it assumes that there is no censored missing and any imputation will not be performed. In case that there are completely missing measurements in a run for a protein, any imputation will not be performed. In addition, the condition, which has no measurement at all in a protein, will be not impute. - - - minimum value for each feature: cutoff for AFT model will be the minimum value for each feature across runs. With this option, those runs with substantial missing measurements will be biased by the cutoff value. In such case, you may remove the runs that have more than 50% missing values from the analysis. - - minimum value for each run: cutoff for AFT model will be the minimum value for each run across features - - smallest between minimum value of corresponding feature and minimum value of corresponding run: cutoff for AFT model will be the smallest value between minimum valueof corresponding feature and minimum value of corresponding run - - Maximum quantile for deciding censored missing values: If you don’t want to apply the threshold of noise intensity in your data, you can use maxQuantileforCensored=NULL. - - Missing value imputation combination with summarization method TMP: - - - Summarization method: TMP + censored intensity: 'NULL': It assumes that all intensities are missing at random, therefore no action with missing value imputation: No; or error with missing value imputation: Yes. - - Missing value imputation: Yes + censored intensity:'NA' or '0': AFT model-based imputation using cutoff value for censoring in the AFT model - - Missing value imputation: No + censored intensity:'NA' or '0': censored intensities will be replaced with the value specified in cutoff value for censoring - - Missing value imputation: No + censored intensity: NULL: no imputation - Group comparison: automatic detection of differentially abundant proteins between two conditions, conditions have to be specified with the 'comparison matrix' - Quantification per sample or group: choose the corresponding output option @@ -1115,13 +1154,13 @@ - MSstats log - check log file for warnings and information on the analysis steps (txt) - MSstats Rscript - can be used to re-run analysis outside Galaxy or to inspect the executed code (txt) - MSstats RawData - raw files combined into MSstats format (tabular) - - MSstats ProcessedData - transformed, normalized, imputed intensities (tabular) + - MSstats FeatureLevelData - transformed, normalized, imputed intensities (tabular) - Intensity column: includes original intensities values - Abundance column: contains the log2 transformed and normalized intensities and it will used for run-level summarization - Censored column: has the decision about censored missing or not, based on censored Intensity and maximum quantile for deciding censored missing values options. Abundances with TRUE value in censored column will be considered as censored missing and imputed when Missing value imputation: Yes. - - MSstats RunlevelData - run and protein level summarized data (tabular) + - MSstats ProteinLevelData - run and protein level summarized data (tabular) - LogIntensities: log intensity summarized per run and protein, they will be used for the group comparison and summarized profile plot - NumMeasuredFeature: shows how many features were used for summarization of the corresponding run and protein
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/featurelevel_data_skyline.tabular Fri Aug 06 20:06:19 2021 +0000 @@ -0,0 +1,46 @@ +PROTEIN PEPTIDE TRANSITION FEATURE LABEL GROUP RUN SUBJECT FRACTION originalRUN censored INTENSITY ABUNDANCE newABUNDANCE +P32125 ADVGFLC[+57]NMLER_2 NA_NA ADVGFLC[+57]NMLER_2_NA_NA L Condition1 1 1 1 121219_S_CCES_01_01_LysC_Try_1to10_Mixt_1_1raw FALSE 319070944 29.3098413927068 29.3098413927068 +P32125 AFAEAMANNSFNADEK_2 NA_NA AFAEAMANNSFNADEK_2_NA_NA L Condition1 1 1 1 121219_S_CCES_01_01_LysC_Try_1to10_Mixt_1_1raw FALSE 114949068 27.8369589250684 27.8369589250684 +P32125 AGAAQTIVASQQR_2 NA_NA AGAAQTIVASQQR_2_NA_NA L Condition1 1 1 1 121219_S_CCES_01_01_LysC_Try_1to10_Mixt_1_1raw FALSE 132974798 28.0471170016311 28.0471170016311 +P32125 ADVGFLC[+57]NMLER_2 NA_NA ADVGFLC[+57]NMLER_2_NA_NA L Condition1 2 1 1 121219_S_CCES_01_02_LysC_Try_1to10_Mixt_1_2raw FALSE 301443168 28.0471170016311 28.0471170016311 +P32125 AFAEAMANNSFNADEK_2 NA_NA AFAEAMANNSFNADEK_2_NA_NA L Condition1 2 1 1 121219_S_CCES_01_02_LysC_Try_1to10_Mixt_1_2raw FALSE 864180704 29.5665639874012 29.5665639874012 +P32125 AGAAQTIVASQQR_2 NA_NA AGAAQTIVASQQR_2_NA_NA L Condition1 2 1 1 121219_S_CCES_01_02_LysC_Try_1to10_Mixt_1_2raw FALSE 99948868 26.4544931020782 26.4544931020782 +P32125 ADVGFLC[+57]NMLER_2 NA_NA ADVGFLC[+57]NMLER_2_NA_NA L Condition1 3 1 1 121219_S_CCES_01_03_LysC_Try_1to10_Mixt_1_3raw FALSE 275819008 28.0471170016311 28.0471170016311 +P32125 AFAEAMANNSFNADEK_2 NA_NA AFAEAMANNSFNADEK_2_NA_NA L Condition1 3 1 1 121219_S_CCES_01_03_LysC_Try_1to10_Mixt_1_3raw FALSE 866460512 29.6985291199933 29.6985291199933 +P32125 AGAAQTIVASQQR_2 NA_NA AGAAQTIVASQQR_2_NA_NA L Condition1 3 1 1 121219_S_CCES_01_03_LysC_Try_1to10_Mixt_1_3raw FALSE 104015372 26.6401918724078 26.6401918724078 +P32125 ADVGFLC[+57]NMLER_2 NA_NA ADVGFLC[+57]NMLER_2_NA_NA L Condition2 4 2 1 121219_S_CCES_01_04_LysC_Try_1to10_Mixt_2_1raw FALSE 282914080 28.0471170016311 28.0471170016311 +P32125 AFAEAMANNSFNADEK_2 NA_NA AFAEAMANNSFNADEK_2_NA_NA L Condition2 4 2 1 121219_S_CCES_01_04_LysC_Try_1to10_Mixt_2_1raw FALSE 887988992 29.6972948156548 29.6972948156548 +P32125 AGAAQTIVASQQR_2 NA_NA AGAAQTIVASQQR_2_NA_NA L Condition2 4 2 1 121219_S_CCES_01_04_LysC_Try_1to10_Mixt_2_1raw FALSE 57196342 25.7407478107147 25.7407478107147 +P32125 ADVGFLC[+57]NMLER_2 NA_NA ADVGFLC[+57]NMLER_2_NA_NA L Condition2 5 2 1 121219_S_CCES_01_05_LysC_Try_1to10_Mixt_2_2raw FALSE 306392288 28.0471170016311 28.0471170016311 +P32125 AFAEAMANNSFNADEK_2 NA_NA AFAEAMANNSFNADEK_2_NA_NA L Condition2 5 2 1 121219_S_CCES_01_05_LysC_Try_1to10_Mixt_2_2raw FALSE 562341792 28.9231842858172 28.9231842858172 +P32125 AGAAQTIVASQQR_2 NA_NA AGAAQTIVASQQR_2_NA_NA L Condition2 5 2 1 121219_S_CCES_01_05_LysC_Try_1to10_Mixt_2_2raw FALSE 61787154 25.7371158446414 25.7371158446414 +P32125 ADVGFLC[+57]NMLER_2 NA_NA ADVGFLC[+57]NMLER_2_NA_NA L Condition2 6 2 1 121219_S_CCES_01_06_LysC_Try_1to10_Mixt_2_3raw FALSE 311995872 28.0471170016311 28.0471170016311 +P32125 AFAEAMANNSFNADEK_2 NA_NA AFAEAMANNSFNADEK_2_NA_NA L Condition2 6 2 1 121219_S_CCES_01_06_LysC_Try_1to10_Mixt_2_3raw FALSE 947982416 29.6504503596479 29.6504503596479 +P32125 AGAAQTIVASQQR_2 NA_NA AGAAQTIVASQQR_2_NA_NA L Condition2 6 2 1 121219_S_CCES_01_06_LysC_Try_1to10_Mixt_2_3raw FALSE 57000615 25.5946394508612 25.5946394508612 +P32125 ADVGFLC[+57]NMLER_2 NA_NA ADVGFLC[+57]NMLER_2_NA_NA L Condition3 7 3 1 121219_S_CCES_01_07_LysC_Try_1to10_Mixt_3_1raw FALSE 317509760 28.0471170016311 28.0471170016311 +P32125 AFAEAMANNSFNADEK_2 NA_NA AFAEAMANNSFNADEK_2_NA_NA L Condition3 7 3 1 121219_S_CCES_01_07_LysC_Try_1to10_Mixt_3_1raw FALSE 831511536 29.4360523406791 29.4360523406791 +P32125 AGAAQTIVASQQR_2 NA_NA AGAAQTIVASQQR_2_NA_NA L Condition3 7 3 1 121219_S_CCES_01_07_LysC_Try_1to10_Mixt_3_1raw FALSE 110326472 26.5220950579513 26.5220950579513 +P32125 ADVGFLC[+57]NMLER_2 NA_NA ADVGFLC[+57]NMLER_2_NA_NA L Condition3 8 3 1 121219_S_CCES_01_08_LysC_Try_1to10_Mixt_3_2raw FALSE 253089648 28.0471170016311 28.0471170016311 +P32125 AFAEAMANNSFNADEK_2 NA_NA AFAEAMANNSFNADEK_2_NA_NA L Condition3 8 3 1 121219_S_CCES_01_08_LysC_Try_1to10_Mixt_3_2raw FALSE 909277584 29.8921892900012 29.8921892900012 +P32125 AGAAQTIVASQQR_2 NA_NA AGAAQTIVASQQR_2_NA_NA L Condition3 8 3 1 121219_S_CCES_01_08_LysC_Try_1to10_Mixt_3_2raw FALSE 124922268 27.0284991700692 27.0284991700692 +P32125 ADVGFLC[+57]NMLER_2 NA_NA ADVGFLC[+57]NMLER_2_NA_NA L Condition3 9 3 1 121219_S_CCES_01_09_LysC_Try_1to10_Mixt_3_3raw FALSE 330089504 28.0471170016311 28.0471170016311 +P32125 AFAEAMANNSFNADEK_2 NA_NA AFAEAMANNSFNADEK_2_NA_NA L Condition3 9 3 1 121219_S_CCES_01_09_LysC_Try_1to10_Mixt_3_3raw FALSE 735864592 29.2037000536836 29.2037000536836 +P32125 AGAAQTIVASQQR_2 NA_NA AGAAQTIVASQQR_2_NA_NA L Condition3 9 3 1 121219_S_CCES_01_09_LysC_Try_1to10_Mixt_3_3raw FALSE 54330676 25.4440986390611 25.4440986390611 +P32125 ADVGFLC[+57]NMLER_2 NA_NA ADVGFLC[+57]NMLER_2_NA_NA L Condition4 10 4 1 121219_S_CCES_01_10_LysC_Try_1to10_Mixt_4_1raw FALSE NA NA NA +P32125 AFAEAMANNSFNADEK_2 NA_NA AFAEAMANNSFNADEK_2_NA_NA L Condition4 10 4 1 121219_S_CCES_01_10_LysC_Try_1to10_Mixt_4_1raw FALSE 943487632 29.7700692177507 29.7700692177507 +P32125 AGAAQTIVASQQR_2 NA_NA AGAAQTIVASQQR_2_NA_NA L Condition4 10 4 1 121219_S_CCES_01_10_LysC_Try_1to10_Mixt_4_1raw FALSE 86579608 26.3241647855115 26.3241647855115 +P32125 ADVGFLC[+57]NMLER_2 NA_NA ADVGFLC[+57]NMLER_2_NA_NA L Condition4 11 4 1 121219_S_CCES_01_11_LysC_Try_1to10_Mixt_4_2raw FALSE NA NA NA +P32125 AFAEAMANNSFNADEK_2 NA_NA AFAEAMANNSFNADEK_2_NA_NA L Condition4 11 4 1 121219_S_CCES_01_11_LysC_Try_1to10_Mixt_4_2raw FALSE 600179488 29.4051505227937 29.4051505227937 +P32125 AGAAQTIVASQQR_2 NA_NA AGAAQTIVASQQR_2_NA_NA L Condition4 11 4 1 121219_S_CCES_01_11_LysC_Try_1to10_Mixt_4_2raw FALSE 91340524 26.6890834804686 26.6890834804686 +P32125 ADVGFLC[+57]NMLER_2 NA_NA ADVGFLC[+57]NMLER_2_NA_NA L Condition4 12 4 1 121219_S_CCES_01_12_LysC_Try_1to10_Mixt_4_3raw FALSE NA NA NA +P32125 AFAEAMANNSFNADEK_2 NA_NA AFAEAMANNSFNADEK_2_NA_NA L Condition4 12 4 1 121219_S_CCES_01_12_LysC_Try_1to10_Mixt_4_3raw FALSE 709016080 29.5578987363561 29.5578987363561 +P32125 AGAAQTIVASQQR_2 NA_NA AGAAQTIVASQQR_2_NA_NA L Condition4 12 4 1 121219_S_CCES_01_12_LysC_Try_1to10_Mixt_4_3raw FALSE 87312183 26.5363352669061 26.5363352669061 +P32125 ADVGFLC[+57]NMLER_2 NA_NA ADVGFLC[+57]NMLER_2_NA_NA L Condition5 13 5 1 121219_S_CCES_01_13_LysC_Try_1to10_Mixt_5_1raw FALSE NA NA NA +P32125 AFAEAMANNSFNADEK_2 NA_NA AFAEAMANNSFNADEK_2_NA_NA L Condition5 13 5 1 121219_S_CCES_01_13_LysC_Try_1to10_Mixt_5_1raw FALSE NA NA NA +P32125 AGAAQTIVASQQR_2 NA_NA AGAAQTIVASQQR_2_NA_NA L Condition5 13 5 1 121219_S_CCES_01_13_LysC_Try_1to10_Mixt_5_1raw FALSE 91599334 28.0471170016311 28.0471170016311 +P32125 ADVGFLC[+57]NMLER_2 NA_NA ADVGFLC[+57]NMLER_2_NA_NA L Condition5 14 5 1 121219_S_CCES_01_14_LysC_Try_1to10_Mixt_5_2raw FALSE NA NA NA +P32125 AFAEAMANNSFNADEK_2 NA_NA AFAEAMANNSFNADEK_2_NA_NA L Condition5 14 5 1 121219_S_CCES_01_14_LysC_Try_1to10_Mixt_5_2raw FALSE NA NA NA +P32125 AGAAQTIVASQQR_2 NA_NA AGAAQTIVASQQR_2_NA_NA L Condition5 14 5 1 121219_S_CCES_01_14_LysC_Try_1to10_Mixt_5_2raw FALSE 96063704 28.0471170016311 28.0471170016311 +P32125 ADVGFLC[+57]NMLER_2 NA_NA ADVGFLC[+57]NMLER_2_NA_NA L Condition5 15 5 1 121219_S_CCES_01_15_LysC_Try_1to10_Mixt_5_3raw FALSE NA NA NA +P32125 AFAEAMANNSFNADEK_2 NA_NA AFAEAMANNSFNADEK_2_NA_NA L Condition5 15 5 1 121219_S_CCES_01_15_LysC_Try_1to10_Mixt_5_3raw FALSE 836397712 29.6396138744808 29.6396138744808 +P32125 AGAAQTIVASQQR_2 NA_NA AGAAQTIVASQQR_2_NA_NA L Condition5 15 5 1 121219_S_CCES_01_15_LysC_Try_1to10_Mixt_5_3raw FALSE 91967458 26.4546201287815 26.4546201287815