view bland_altman_plot.xml @ 1:2e7d47c0b027 draft

"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
author malex
date Mon, 08 Mar 2021 22:04:06 +0000
parents
children caba07f41453
line wrap: on
line source

<tool id="secimtools_bland_altman_plot" name="Bland-Altman (BA) Plot" version="@WRAPPER_VERSION@">
    <description>- Create pairwise BA plots for outlier detection.</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
bland_altman_plot.py
--input $input
--design $design
--ID $uniqID
--figure $ba_plots
--flag_dist $outlier_dist_plots
--flag_sample $flag_sample
--flag_feature $flag_feature
--resid_cutoff $resid_cutoff
--sample_flag_cutoff $sample_cutoff
--feature_flag_cutoff $feature_cutoff
--prop_feature $proportion_of_features
--prop_sample $proportion_of_samples

#if $group
    --group $group

    #if $processOnly:
        --process_only "$processOnly"
    #end if
#end if
    ]]></command>
    <inputs>
        <param name="input" type="data" format="tabular" label="Wide Dataset" help="Input your tab-separated wide format dataset. If file is not tab-separated see TIP below."/>
        <param name="design" type="data" format="tabular" label="Design File" help="Input your design file (tab-separated). Note you need a 'sampleID' column. If not tab separated see TIP below."/>
        <param name="uniqID" size="30" type="text" value="" label="Unique Feature ID" help="Name of the column in your wide dataset that contains unique feature identifiers."/>
        <param name="resid_cutoff" type="integer" size="30" value="3" label="Outlier Cutoff" help="Residual cutoff value, this value will flag samples with residuals ≥ this cutoff value."/>
        <param name="sample_cutoff" type="float" size="30" value="0.2" min="0" max="1" label="Sample Flag Cutoff" help="Flag a sample as 1 if the proportion of features within a sample that are outliers exceeds this cutoff. [Number between 0-1]."/>
        <param name="feature_cutoff" type="float" size="30" value="0.05" min="0" max="1" label="Feature Flag Cutoff" help="Flag a feature as 1 if the proportion of times this feature was identified as an outlier exceeds this cutoff. [Number between 0-1]."/>
        <param name="group" type="text" size="30"  value="" optional="true" label="Group/Treatment [Optional]" help="Name of the column in your Design File that contains group classifications."/>
        <param name="processOnly" size="30" type="text" value="" optional="true" label="Group Name [Optional]" help="Name of the group(s) that you want to process. Separate multiple group names with spaces (e.g. RC,control,treatment). Leave blank to process all groups. Requires the group parameter."/>
    </inputs>
    <outputs>
        <data format="pdf" name="ba_plots" label= "${tool.name} on ${on_string}: BA plot"  />
        <data format="pdf" name="outlier_dist_plots" label= "${tool.name} on ${on_string}: Distribution"/>
        <data format="tabular" name="flag_sample" label= "${tool.name} on ${on_string}: Flag Sample"/>
        <data format="tabular" name="flag_feature" label= "${tool.name} on ${on_string}: Flag Feature"/>
        <data format="tabular" name="proportion_of_features" label= "${tool.name} on ${on_string}: Proportion of Feature"/>
        <data format="tabular" name="proportion_of_samples" label= "${tool.name} on ${on_string}: Proportion of Samples"/>
    </outputs>
    <tests>
        <test>
            <param name="input"  value="ST000006_data.tsv"/>
            <param name="design" value="ST000006_design.tsv"/>
            <param name="uniqID" value="Retention_Index" />
            <param name="group"  value="White_wine_type_and_source" />
            <output name="ba_plots"           file="ST000006_bland_altman_plot_with_group_figure.pdf" compare="sim_size" delta="10000" />
            <output name="outlier_dist_plots" file="ST000006_bland_altman_plot_with_group_flag_distribution.pdf" compare="sim_size" delta="10000" />
            <output name="flag_sample"        file="ST000006_bland_altman_plot_with_group_flag_sample.tsv" />
            <output name="flag_feature"       file="ST000006_bland_altman_plot_with_group_flag_feature.tsv" />
            <output name="prop_feature"       file="ST000006_bland_altman_plot_with_group_proportion_feature.tsv" />
            <output name="prop_sample"        file="ST000006_bland_altman_plot_with_group_proportion_sample.tsv" />
        </test>
    </tests>
    <help><![CDATA[

@TIP_AND_WARNING@

**Tool Description**

The Bland-Altman plot (BA-Plot) is used to look at the concordance of data between pairs of samples, particularly between replicates.
The script generates BA-plots for all pairwise combinations of samples.
If the Group/Treatment column and group name(s) in that column are provided then BA-Plots are generated only for pairwise combinations within the specified Group -- group name combination.
In addition to generating the BA-plots, a linear regression fit is calculated between the values that correspond to the pair of samples to identify (flag) any unusual outlying values.
The flags produced by the regression fit are used to generate distribution plots and text files for (i) each sample (column) and for (ii) each feature (row).


--------------------------------------------------------------------------------

**Input**

    - Two input datasets are required.

@WIDE@

**NOTE:** The sample IDs must match the sample IDs in the Design File
(below). Extra columns will automatically be ignored.

@METADATA@

@UNIQID@


**Outlier Cutoff – flagging values**

- Residual cutoff value, this value will flag samples with residuals ≥ than this cutoff value.

        (1) If the magnitude of the residuals from the linear regression on the BA-plot exceeds the user-defined threshold, then a value is flagged as an outlier. This cutoff can be adjusted by the user, the default is 3.

        (2) If a value is identified as a leverage point using Cook's D with a p-value cutoff of 0.5, then the value is flagged. This cannot be adjusted.

        (3) If a value is identified as a leverage point using the DFFITS technique it is also flagged. This cannot be adjusted.

**Sample Flag Cutoff – flagging samples**

    - Flag a sample as 1 if the proportion of features within a sample that are outliers exceeds this cutoff. [Number between 0-1].

**Feature Flag Cutoff  – flagging features**

    - Flag a feature as 1 if the proportion of times this feature was identified as an outlier exceeds this cutoff. [Number between 0-1].

@GROUP@

**Group ID**

    - Name of the group(s) that you want to process. Separate multiple groupIDs with spaces. Leave blank to process all groups. Requires the group parameter.

--------------------------------------------------------------------------------

**Output**

This tool outputs four (or five) different files depending on the input settings:
(1) a PDF file containing BA-plots and scatterplots for each pair of samples
(2) a PDF file containing histograms of the most flagged features and samples
(3) two TSV files containing flags: one for samples and one for features
(4) if a grouping variable name is specified in the input, a TSV file containing flags for each group is also generated.
(5) two TSV files containing (i) the proportion of features flagged per sample and (ii) the proportion of samples flagged per feature.
If a sample (or feature) is flagged, the user should consider removing it from further analysis.

    ]]></help>
    <expand macro="citations"/>
</tool>