view principal_component_analysis.xml @ 2:caba07f41453 draft default tip

"planemo upload for repository https://github.com/secimTools/SECIMTools/tree/main/galaxy commit 498abad641099412df56f04ff6e144e4193bbc34-dirty"
author malex
date Thu, 10 Jun 2021 15:41:17 +0000
parents 2e7d47c0b027
children
line wrap: on
line source

<tool id="secimtools_principal_component_analysis" name="Principal Component Analysis (PCA)" version="@WRAPPER_VERSION@">
    <description>for visual summaries of the components.</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
principal_component_analysis.py
--input $input
--design $design
--ID $uniqID
--load_out $loadings
--score_out $scores
--summary_out $summary
--figure $figures

#if $group
    --group $group
#end if
    ]]></command>
    <inputs>
        <param name="input" type="data" format="tabular" label="Wide Dataset" help="Input your tab-separated wide format dataset. If file is not tab separated see TIP below."/>
        <param name="design" type="data" format="tabular" label="Design File" help="Input your design file (tab-separated). Note you need a 'sampleID' column. If not tab separated see TIP below."/>
        <param name="uniqID" type="text" size="30" value="" label="Unique Feature ID" help="Name of the column in your wide dataset that has unique identifiers."/>
        <param name="group" type="text" size="30" label="Group/Treatment [Optional]" help="Name of the column in your design file that contains group classifications."/>
    </inputs>
    <outputs>
        <data format="tabular" name="loadings" label="${tool.name} on ${on_string}: loadings"/>
        <data format="tabular" name="scores" label="${tool.name} on ${on_string}: scores"/>
        <data format="tabular" name="summary" label="${tool.name} on ${on_string}: summary"/>
        <data format="pdf" name="figures" label="${tool.name} on ${on_string}: scatter plots"/>
    </outputs>
    <tests>
        <test>
            <param name="input"  value="ST000006_data.tsv"/>
            <param name="design" value="ST000006_design.tsv"/>
            <param name="uniqID" value="Retention_Index" />
            <param name="group"  value="White_wine_type_and_source" />
            <output name="loadings" file="ST000006_principal_component_analysis_load_out.tsv" />
            <output name="scores"   file="ST000006_principal_component_analysis_score_out.tsv" />
            <output name="summary"  file="ST000006_principal_component_analysis_summary_out.tsv" />
            <output name="figures"  file="ST000006_principal_component_analysis_figure.pdf" compare="sim_size" delta="10000" />
        </test>
    </tests>
    <help><![CDATA[

@TIP_AND_WARNING@

**Tool Description**

The tool performs principal component analysis (PCA) of the data.
Visual summaries are provided in the from of 2D and 3D scatter plots for the first three principal components.
Samples in the scatter plots are colored based on the group classification.

--------------------------------------------------------------------------------

**Note**

- This tool currently treats all variables as continuous numeric variables. Running the tool on categorical variables might result in incorrect results.
- Rows containing non-numeric (or missing) data in any of the chosen columns will be skipped from the analysis.

--------------------------------------------------------------------------------

**Input**

    - Two input datasets are required.

@WIDE@

**NOTE:** The sample IDs must match the sample IDs in the Design File (below).
Extra columns will automatically be ignored.


@METADATA@

@UNIQID@

@GROUP_OPTIONAL@

--------------------------------------------------------------------------------

**Output**

Four different outputs are produced by the Principal Component Analysis tool:

(1) a TSV file containing eigenvectors/variable loadings
(2) a TSV file containing scores of input data on principal components
(3) a TSV file with the summary for each component
(4) and a PDF file of scatter plots of the first three principal components

There are a total of four scatterplots: three pairwise plots for the first three components and a single 3D plot of the first three components.

    ]]></help>
    <expand macro="citations"/>
</tool>