view coefficient_variation_flags.xml @ 2:caba07f41453 draft default tip

"planemo upload for repository https://github.com/secimTools/SECIMTools/tree/main/galaxy commit 498abad641099412df56f04ff6e144e4193bbc34-dirty"
author malex
date Thu, 10 Jun 2021 15:41:17 +0000
parents 2e7d47c0b027
children
line wrap: on
line source

<tool id="secimtools_coefficient_variation_flags" name="Coefficient of Variation (CV) Flags" version="@WRAPPER_VERSION@">
    <description>- Calculate the coefficient of variation and flag potential outliers.</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
coefficient_variation_flags.py
--input $input
--design $design
--ID $uniqID
--figure $CVplot
--flag $CVflag
#if $group:
    --group $group
#end if
#if $CVcutoff:
    --CVcutoff $CVcutoff
#end if
    ]]></command>
    <inputs>
        <param name="input" type="data" format="tabular" label="Wide Dataset" help="Input your tab-separated wide format dataset. If file is not tab separated see TIP below."/>
        <param name="design" type="data" format="tabular" label="Design File" help="Input your design file (tab-separated). Note: you need a 'sampleID' column. If not tab separated see TIP below."/>
        <param name="uniqID" type="text" size="30" value="" label="Unique Feature ID" help="Name of the column in your wide dataset that contains unique feature identifiers."/>
        <param name="group" type="text" size="30" label="Group/Treatment [Optional]" help="Name of the column in your design file that contains group classifications." />
        <param name="CVcutoff"  type="float" optional="true" size="4" value="0.1" label="CV Cutoff [Optional]" help="The cutoff (in decimals) that specifies the proportion of features to flag.  The default CV cutoff is 0.1 which implies that 10% of the features with the largest CV-s will be flagged."/>
    </inputs>
    <outputs>
        <data format="pdf" name="CVplot"  label="${tool.name} on ${on_string}: Plot" />
        <data format="tabular" name="CVflag"  label="${tool.name} on ${on_string}: Flag" />
    </outputs>
    <tests>
        <test>
            <param name="input"   value="ST000006_data.tsv"/>
            <param name="design"  value="ST000006_design.tsv"/>
            <param name="uniqID"  value="Retention_Index" />
            <param name="group"   value="White_wine_type_and_source" />
            <output name="CVplot" file="ST000006_coefficient_variation_flags_with_group_figure.pdf" compare="sim_size" delta="100000"/>
            <output name="CVflag" file="ST000006_coefficient_variation_flags_with_group_flag.tsv" />
        </test>
    </tests>
    <help><![CDATA[

@TIP_AND_WARNING@

**Tool Description**

This tool calculates the coefficient of variation (standard deviation as a percentage of the mean) and is often used to look at the consistency of features across samples.
The user can define what percent of features with the highest CV to flag.
If no percentage is selected, then the top 10% of features with the highest CV are flagged (default value of 0.1).
The CV value corresponding to the percentage is given in the resulting histogram plot.

--------------------------------------------------------------------------------

**Input**

    - Two input datasets are required.

@WIDE@

**NOTE:** The sample IDs must match the sample IDs in the Design File (below).
Extra columns will automatically be ignored.

@METADATA@

@UNIQID@

**CV cutoff [Optional]**

    - The cutoff point (in decimals) that specifies the proportion of the features to flag.  The default CV cutoff is 0.1 which implies that 10% of the features with the largest CV will be flagged. If the Group/Treatment variable is provided, then the analysis is performed independently for each group. If no Group/Treatment variable is provided, the analysis is performed on the entire dataset.

@GROUP_OPTIONAL@


--------------------------------------------------------------------------------

**Output**

This tool outputs two different files:

(1) a TSV file containing the CV Flags for each feature for each group (if group variable is specified).  A flag value of one (1) corresponds to features with large CV values as specified by the CV cutoff.
(2) a PDF file containing histograms with overlayed density plots of the coefficients of variation for each group (optional) and a summary density plot containing the densities for each group without the histograms.
    ]]></help>
    <expand macro="citations"/>
</tool>