view standardized_euclidean_distance.xml @ 1:2e7d47c0b027 draft

"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
author malex
date Mon, 08 Mar 2021 22:04:06 +0000
parents
children
line wrap: on
line source

<tool id="secimtools_standardized_euclidean_distance" name="Standardized Euclidean Distance (SED)" version="@WRAPPER_VERSION@">
    <description></description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
standardized_euclidean_distance.py
--input $input
--design $design
--ID $uniqID
--fig $plot
--SEDtoMean $out1
--SEDpairwise $out2

#if $group
    --group $group
#end if
#if $levels
	--levels $levels
#end if
#if $p
    --per $p
#end if

#if $order
    --order $order
#end if
    ]]></command>
    <inputs>
        <param name="input" type="data" format="tabular" label="Wide Dataset" help="Input your tab-separated wide format dataset. If file not tab separated see TIP below."/>
        <param name="design" type="data" format="tabular" label="Design File" help="Input your design file (tab-separated). Note you need a 'sampleID' column. If not tab separated see TIP below."/>
        <param name="uniqID" type="text" size="30" value="" label="Unique Feature ID" help="Name of the column in your wide dataset that has unique identifiers."/>
        <param name="group" type="text" size="30" label="Group/Treatment [Optional]" help="Name of the column in your design file that contains group classifications." />
        <param name="order" type="text" size="30" label="Input Run Order Name [Optional]" help="Enter the name of the column containing the order samples were run. Spelling and capitalization must be exact." />
        <param name="levels" type="text" size="30" label="Additional groups to separate by [Optional]" help="Enter additional group(s) name(s). Spelling and capitalization must be exact. If more than one group separate with a ','." />
        <param name="p" type="float" value= "0.95" size="6" label="Threshold" help="Threshold for standard distribution, specified as percentile. Default = 0.95." />
    </inputs>
    <outputs>
        <data format="pdf" name="plot" label="${tool.name} on ${on_string}: Plot" />
        <data format="tabular" name="out1" label="${tool.name} on ${on_string}: SEDtoMean" />
        <data format="tabular" name="out2" label="${tool.name} on ${on_string}: SEDpairwise" />
    </outputs>
    <tests>
        <test>
            <param name="input"  value="ST000006_data.tsv"/>
            <param name="design" value="ST000006_design.tsv"/>
            <param name="uniqID" value="Retention_Index" />
            <param name="group"  value="White_wine_type_and_source" />
            <output name="plot"  value="ST000006_standardized_euclidean_distance_figure.pdf" compare="sim_size" delta="50000" />
            <output name="out1"  file="ST000006_standardized_euclidean_distance_to_mean.tsv" />
            <output name="out2"  file="ST000006_standardized_euclidean_distance_pairwise.tsv" />
        </test>
    </tests>
    <help><![CDATA[

@TIP_AND_WARNING@

**Tool Description**

The tool is designed to identify samples that are different using the standardized Euclidian distance (SED) between samples.
The tool estimates the variance of features and calculates the SED between each pair of samples in addition to the SED between each sample and the estimated mean.
If a group or treatment variable is provided, then the same distance plots are generated for each group and for all samples together.

**NOTE:** Groups with less than three samples will be excluded from the analysis.



**Input**

    - Two input datasets are required.

@WIDE@

**NOTE:** The sample IDs must match the sample IDs in the Design File
(below). Extra columns will automatically be ignored.

@METADATA@

@UNIQID@

@GROUP_OPTIONAL@

    - **Warning:** All groups must contain 3 or more samples.


@RUNORDER_OPTIONAL@

**Additional groups to separate by [Optional]**

    - Enter group(s) name(s). Spelling and capitalization must be exact. If more than one group, separate with commas.
    - **Warning:** All groups must contain 3 or more samples.
    - **NOTE:** Groups with one element will be excluded from the analysis.


**Percentile cutoff**

- The percentile cutoff for standard distributions. The default is 0.95.

--------------------------------------------------------------------------------

**Output**

The tool outputs three different files:

(1) a TSV file that contains a n x n matrix (where n is the number computed samples) of the pairwise distances between the samples.
If the Group/Treatment [Optional] variable is specified, the distances will be computed within groups.

(2) A PDF file containing:
(i) Boxplots of the distribution of distances.  The distances are computed between samples in the group and summarized as boxplots.
The outliers (blue dots), means (red squares) and median (blue bars) of the distances are presented for each sample within the group.
(ii) 2D scatter plots that show distances computed pairwise within the group

    ]]></help>
    <expand macro="citations"/>
</tool>