mqppep_anova: mqppep_anova.xml comparison

comparison mqppep_anova.xml @ 0:dbff53e6f75f draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe

author	galaxyp
date	Mon, 11 Jul 2022 19:22:25 +0000
parents
children	08678c931f5d

comparison

equal deleted inserted replaced

--1:000000000000
+:dbff53e6f75f
+<tool
+id="mqppep_anova"
+name="MaxQuant Phosphopeptide ANOVA"
+version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"
+profile="21.05"
+>
+<description>Runs ANOVA and KSEA for phosphopeptides.</description>
+<macros>
+<import>macros.xml</import>
+</macros>
+<edam_topics>
+<edam_topic>topic_0121</edam_topic><!-- proteomics -->
+<edam_topic>topic_3520</edam_topic><!-- proteomics experiment-->
+</edam_topics>
+<edam_operations>
+<edam_operation>operation_0276</edam_operation><!-- Analyse a network of protein interactions. -->
+<edam_operation>operation_0531</edam_operation><!-- Heat map generation -->
+<edam_operation>operation_2938</edam_operation><!-- Dendrogram generation -->
+<edam_operation>operation_2938</edam_operation><!-- Imputation -->
+<edam_operation>operation_3435</edam_operation><!-- Standardisation and normalisation -->
+<edam_operation>operation_3501</edam_operation><!-- Enrichment analysis -->
+<edam_operation>operation_3658</edam_operation><!-- Statistical inference -->
+</edam_operations>
+<expand macro="requirements"/>
+<!--
+The weird invocation used here is because knitr and install_tinytex
+both need access to a writeable directory, but most directories in a
+biocontainer are read-only, so this builds a pseudo-home under /tmp
+-->
+<command detect_errors="exit_code"><![CDATA[
+cp '$__tool_directory__/mqppep_anova_script.Rmd' . &&
+cp '$__tool_directory__/mqppep_anova.R'          . &&
+Rscript mqppep_anova.R
+--inputFile '$input_file'
+--alphaFile '$alpha_file'
+--preproc_sqlite '$preproc_sqlite'
+--firstDataColumn $intensity_column_regex_f
+--imputationMethod $imputation.imputation_method
+#if $imputation.imputation_method == "random"
+--meanPercentile '$imputation.meanPercentile'
+--sdPercentile   '$imputation.sdPercentile'
+#end if
+--regexSampleNames $sample_names_regex_f
+--regexSampleGrouping $sample_grouping_regex_f
+--imputedDataFile $imputed_data_file
+--imputedQNLTDataFile '$imp_qn_lt_file'
+--ksea_sqlite '$ksea_sqlite'
+--ksea_cutoff_threshold '$ksea_cutoff_threshold'
+--ksea_cutoff_statistic 'FDR'
+--reportFile '$report_file'
+--anova_ksea_metadata '$anova_ksea_metadata'
+]]></command>
+<configfiles>
+<configfile name="sample_names_regex_f">
+$sample_names_regex
+</configfile>
+<configfile name="sample_grouping_regex_f">
+$sample_grouping_regex
+</configfile>
+<configfile name="intensity_column_regex_f">
+$intensity_column_regex
+</configfile>
+</configfiles>
+<inputs>
+<param name="input_file" type="data" format="tabular" label="Filtered Phosphopeptide Intensities"
+help="Phosphopeptide intensities filtered for minimal quality.  First column label 'Phosphopeptide'; sample-intensities must begin in column 10 and must have column labels to match argument [sample_names_regex]"
+/>
+<param name="alpha_file" type="data" format="tabular" label="ANOVA alpha cutoff level"
+help="ANOVA alpha cutoff values for significance testing: tabular data having one column and no header"
+/>
+<param name="preproc_sqlite" type="data" format="sqlite" label="preproc_sqlite dataset from mqppep_preproc"
+help="'preproc_sqlite' dataset produced by 'MaxQuant Phosphopeptide Preprocessing' tool"
+/>
+<param name="intensity_column_regex" type="text" value="^Intensity[^_]"
+label="Intensity-column pattern"
+help="Pattern matching columns that have peptide intensity data (PERL-compatible regular expression matching column label)"
+/>
+<!-- imputation_method <- c("group-median","median","mean","random")[1] -->
+<conditional name="imputation">
+<param name="imputation_method" type="select" label="Imputation method"
+help="Impute missing values by (1) using median for each sample-group; (2) using median across all samples; (3) using mean across all samples; or (4) using randomly generated values having same std. dev. as across all samples (with mean specified by [meanPercentile])"
+>
+<option value="random" selected="true">random</option>
+<option value="group-median">group-median</option>
+<option value="median">median</option>
+<option value="mean">mean</option>
+</param>
+<when value="group-median" />
+<when value="median" />
+<when value="mean" />
+<when value="random">
+<param name="meanPercentile" type="integer" value="1" min="1" max="99"
+label="Mean percentile for random values"
+help="Percentile center of random values; range [1,99]"
+/>
+<param name="sdPercentile" type="float" value="1.0"
+label="Percentile std. dev. for random values"
+help="Standard deviation adjustment-factor for random values; real number.  (1.0 means SD equal to the SD for the entire data set.)"
+/>
+</when>
+</conditional>
+<param name="sample_names_regex" type="text" value="\.\d+[A-Z]$"
+help="Pattern extracting sample-names from names of columns that have peptide intensity data (PERL-compatible regular expression)"
+label="Sample-extraction pattern">
+<sanitizer>
+<valid initial="string.printable">
+<remove value="&apos;"/>
+</valid>
+</sanitizer>
+</param>
+<param name="sample_grouping_regex" type="text" value="\d+"
+help="Pattern extracting sample-group from the sample-names that are extracted by 'Sample-extraction pattern' (PERL-compatible regular expression)"
+label="Group-extraction pattern">
+<sanitizer>
+<valid initial="string.printable">
+<remove value="&apos;"/>
+</valid>
+</sanitizer>
+</param>
+<param name="ksea_cutoff_threshold" type="float" value="0.05"
+label="KSEA threshold level"
+help="Maximum FDR to be used to score a kinase enrichment as significant"
+/>
+</inputs>
+<outputs>
+<data name="imputed_data_file" format="tabular" label="${input_file.name}.${imputation.imputation_method}-imputed_intensities" ></data>
+<data name="imp_qn_lt_file" format="tabular" label="${input_file.name}.${imputation.imputation_method}-imputed_QN_LT_intensities" ></data>
+<data name="anova_ksea_metadata" format="tabular" label="${input_file.name}.${imputation.imputation_method}-anova_ksea_metadata" ></data>
+<!--
+<data name="report_file" format="html" label="${input_file.name}.${imputation.imputation_method}-imputed_report (download/unzip to view)" ></data>
+-->
+<data name="report_file" format="pdf" label="${input_file.name}.${imputation.imputation_method}-imputed_report" ></data>
+<data name="ksea_sqlite" format="sqlite" label="${input_file.name}..${imputation.imputation_method}-imputed_ksea_sqlite">
+</data>
+</outputs>
+<tests>
+<test>
+<param name="input_file" ftype="tabular" value="test_input_for_anova.tabular"/>
+<param name="preproc_sqlite" ftype="sqlite" value="test_input_for_anova.sqlite"/>
+<param name="alpha_file" ftype="tabular" value="alpha_levels.tabular"/>
+<param name="intensity_column_regex" value="^Intensity[^_]"/>
+<param name="imputation_method" value="median"/>
+<param name="sample_names_regex" value="\.\d+[A-Z]$"/>
+<param name="sample_grouping_regex" value="\d+"/>
+<output name="imputed_data_file">
+<assert_contents>
+<has_text text="Phosphopeptide" />
+<has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" />
+<!--                                               missing missing observd missing observd observd  -->
+<has_text_matching expression="pSQKQEEENPAEETGEEK.*8765300.8765300.8765300.8765300.2355900.14706000" />
+</assert_contents>
+</output>
+<output name="imp_qn_lt_file">
+<assert_contents>
+<has_text text="Phosphopeptide" />
+<has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" />
+<!--                                               missing   missing   observed  missing   observed  observed  -->
+<has_text_matching expression="pSQKQEEENPAEETGEEK.*6.962256.*6.908828.*6.814580.*6.865411.*6.908828.*7.088909" />
+<has_text text="pSQKQEEENPAEETGEEK" />
+</assert_contents>
+</output>
+</test>
+<test>
+<param name="input_file" ftype="tabular" value="test_input_for_anova.tabular"/>
+<param name="preproc_sqlite" ftype="sqlite" value="test_input_for_anova.sqlite"/>
+<param name="alpha_file" ftype="tabular" value="alpha_levels.tabular"/>
+<param name="intensity_column_regex" value="^Intensity[^_]"/>
+<param name="imputation_method" value="mean"/>
+<param name="sample_names_regex" value="\.\d+[A-Z]$"/>
+<param name="sample_grouping_regex" value="\d+"/>
+<output name="imputed_data_file">
+<assert_contents>
+<has_text text="Phosphopeptide" />
+<has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" />
+<!--                                               missing missing observd missing observd observd  -->
+<has_text_matching expression="pSQKQEEENPAEETGEEK.*6721601.6721601.8765300.6721601.2355900.14706000" />
+</assert_contents>
+</output>
+<output name="imp_qn_lt_file">
+<assert_contents>
+<has_text text="Phosphopeptide" />
+<has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" />
+<!--                                               missing   missing   observed  missing   observed  observed  -->
+<has_text_matching expression="pSQKQEEENPAEETGEEK.*6.839850.*6.797424.*6.797424.*6.797424.*6.896609.*7.092451" />
+</assert_contents>
+</output>
+</test>
+<test>
+<param name="input_file" ftype="tabular" value="test_input_for_anova.tabular"/>
+<param name="preproc_sqlite" ftype="sqlite" value="test_input_for_anova.sqlite"/>
+<param name="alpha_file" ftype="tabular" value="alpha_levels.tabular"/>
+<param name="intensity_column_regex" value="^Intensity[^_]"/>
+<param name="imputation_method" value="group-median"/>
+<param name="sample_names_regex" value="\.\d+[A-Z]$"/>
+<param name="sample_grouping_regex" value="\d+"/>
+<output name="imputed_data_file">
+<assert_contents>
+<has_text text="Phosphopeptide" />
+<has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" />
+<!--                                               missing missing observd missing observd observd  -->
+<has_text_matching expression="pSQKQEEENPAEETGEEK.*8765300.8765300.8765300.5886074.2355900.14706000" />
+</assert_contents>
+</output>
+<output name="imp_qn_lt_file">
+<assert_contents>
+<has_text text="Phosphopeptide" />
+<has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" />
+<!--                                               missing   missing   observed  missing   observed  observed  -->
+<has_text_matching expression="pSQKQEEENPAEETGEEK.*6.946112.*6.888985.*6.792137.*6.792137.*6.888985.*7.089555" />
+</assert_contents>
+</output>
+</test>
+<test>
+<param name="input_file" ftype="tabular" value="test_input_for_anova.tabular"/>
+<param name="preproc_sqlite" ftype="sqlite" value="test_input_for_anova.sqlite"/>
+<param name="alpha_file" ftype="tabular" value="alpha_levels.tabular"/>
+<param name="intensity_column_regex" value="^Intensity[^_]"/>
+<param name="imputation_method" value="random"/>
+<param name="meanPercentile" value="1" />
+<param name="sdPercentile" value="1.0" />
+<param name="sample_names_regex" value="\.\d+[A-Z]$"/>
+<param name="sample_grouping_regex" value="\d+"/>
+<output name="imputed_data_file">
+<assert_contents>
+<has_text text="Phosphopeptide" />
+<has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" />
+<!--                           observd  observd  observd  -->
+<has_text_matching expression="pSQKQEEENPAEETGEEK.*8765300.*2355900.*4706000" />
+</assert_contents>
+</output>
+<output name="imp_qn_lt_file">
+<assert_contents>
+<has_text text="Phosphopeptide" />
+<has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" />
+<has_text text="5.409549" /> <!-- log-transformed value for pTYVDPFTpYEDPNQAVR .1B -->
+<has_text text="6.464714" /> <!-- log-transformed value for pSQKQEEENPAEETGEEK .2A -->
+</assert_contents>
+</output>
+</test>
+</tests>
+<help><![CDATA[
+====================================================
+Phopsphoproteomic Enrichment Pipeline ANOVA and KSEA
+====================================================
+**Input files**
+``Filtered Phosphopeptide Intensities``
+Phosphopeptides annotated with SwissProt and phosphosite metadata (in tabular format).
+This is the output from the "Phopsphoproteomic Enrichment Pipeline Merge and Filter"
+(``mqppep_mrgflt``) tool.
+``ANOVA alpha cutoff level``
+List of alpha cutoff values for significance testing; text file having one column and no header.  For example:
+::
+0.2
+0.1
+0.05
+**Input parameters**
+``Intensity-column pattern``
+First column of ``input_file`` having intensity values (integer or PERL-compatible regular expression matching column label). Default: **Intensity**
+``Imputation method``
+Impute missing values by:
+1. ``group-median`` - use median for each sample-group;
+2. ``mean`` - use mean across all samples; or
+3. ``median`` - use median across all samples;
+4. ``random`` - use randomly generated values where:
+- ``Mean percentile for random values`` specifies the percentile among non-missing values to be used as mean of random values, and
+- ``Percentile std. dev. for random values`` specifies the factor to be multiplied by the standard deviation among the non-missing values (across all samples) to determine the standard deviation of random values.
+``Sample-extraction pattern``
+PERL-compatible regular expression extracting the sample-name from the the name of a column of instensities (from ``input_file``) for one sample.
+- For example, ``"\.\d+[A-Z]$"`` applied to ``Intensity.splunge.10A`` would produce ``.10A``
+- Note that *this is case sensitive* by default.
+``Group-extraction pattern``
+PERL-compatible regular expression extracting the sample-grouping from the sample-name that was extracted with ``sample_names_regex`` from a column of intensites (from ``input_file``).
+- For example, ``"\d+$"`` applied to ``.10A`` would produce ``10``
+- Note that *this is case sensitive* by default.
+``KSEA threshold level``
+Specifies minimum FDR at which a kinase will be considered to be enriched; the default choice of 0.05 is arbitrary.
+**Outputs**
+``imputed_intensities (input_file.imputation_method-imputed_intensities)``
+Phosphopeptide MS intensities where missing values have been **imputed** by the chosen method, in tabular format.
+``imputed_QN_LT_intensities (input_file.imputation_method-imputed_QN_LT_intensities)``
+Phosphopeptide MS intensities where missing values have been **imputed** by the chosen method, quantile-normalized (**QN**), and log10-transformed (**LT**), in tabular format.
+``report_file (input_file.imputation_method-imputed_report)``
+Summary report for normalization, imputation, and **ANOVA**, in PDF format.
+``anova_ksea_metadata (input_file.imputation_method-imputed_anova_ksea_metadata)``
+Phosphopeptide metadata including ANOVA significance and KSEA enrichments.
+``ksea_sqlite (input_file.imputation_method-imputed_ksea_sqlite)``
+SQLite database for ad-hoc report creation.
+**Algorithm**
+The KSEA algorithm used here is as in the KSEAapp package as reported in [Wiredja 2017].
+The code is adapted from "Danica D. Wiredja (2017). KSEAapp: Kinase-Substrate Enrichment Analysis. R package version 0.99.0." to work with output from the "MaxQuant Phosphopeptide Preprocessing" Galaxy tool.
+**Authors**
+``Larry C. Cheng``
+(`ORCiD 0000-0002-6922-6433 <https://orcid.org/0000-0002-6922-6433>`_) wrote the original script.
+``Arthur C. Eschenlauer``
+(`ORCiD 0000-0002-2882-0508 <https://orcid.org/0000-0002-2882-0508>`_) adapted the script to run in Galaxy.
+===================================
+PERL-compatible regular expressions
+===================================
+Note that the PERL-compatible regular expressions accepted by this tool are documented at http://rdrr.io/r/base/regex.html
+]]></help>
+<citations>
+<!-- Cheng_2018 "Phosphopeptide Enrichment ..." PMID: 30124664 -->
+<citation type="doi">10.3791/57996</citation>
+<!-- Wiredja_2017 "The KSEA App ..." PMID: 28655153 -->
+<citation type="doi">10.1093/bioinformatics/btx415</citation>
+</citations>
+</tool>

Mercurial > repos > galaxyp > mqppep_anova

comparison mqppep_anova.xml @ 0:dbff53e6f75f draft