0
|
1 <tool id="secimtools_standardized_euclidean_distance" name="Standardized Euclidean Distance (SED)" version="@WRAPPER_VERSION@">
|
|
2 <description>calculated for the data.</description>
|
|
3 <macros>
|
|
4 <import>macros.xml</import>
|
|
5 </macros>
|
|
6 <expand macro="requirements" />
|
|
7 <command detect_errors="exit_code"><![CDATA[
|
|
8 standardized_euclidean_distance.py
|
|
9 --input $input
|
|
10 --design $design
|
|
11 --ID $uniqID
|
|
12 --fig $plot
|
|
13 --SEDtoMean $out1
|
|
14 --SEDpairwise $out2
|
|
15
|
|
16 #if $group
|
|
17 --group $group
|
|
18 #end if
|
|
19 #if $levels
|
|
20 --levels $levels
|
|
21 #end if
|
|
22 #if $p
|
|
23 --per $p
|
|
24 #end if
|
|
25
|
|
26 #if $order
|
|
27 --order $order
|
|
28 #end if
|
|
29 ]]></command>
|
|
30 <inputs>
|
|
31 <param name="input" type="data" format="tabular" label="Wide Dataset" help="Input dataset in wide format and tab separated. If file not tab separated see TIP below."/>
|
|
32 <param name="design" type="data" format="tabular" label="Design File" help="Design file tab separated. Note you need a 'sampleID' column. If not tab separated see TIP below."/>
|
|
33 <param name="uniqID" type="text" size="30" value="" label="Unique Feature ID" help="Name of the column in your Wide Dataset that has unique Feature IDs."/>
|
|
34 <param name="group" type="text" size="30" label="Group/Treatment [Optional]" help="Name of the column in your Design File that contains group classifications." />
|
|
35 <param name="order" type="text" size="30" label="Input Run Order Name [Optional]" help="Enter the name of the column containing the order samples were run. Spelling and capitalization must be exact." />
|
|
36 <param name="levels" type="text" size="30" label="Additional groups to separate by [Optional]" help="Enter additional group(s) name(s). Spelling and capitalization must be exact. If more than one group separate with a ','." />
|
|
37 <param name="p" type="float" value= "0.95" size="6" label="Threshold" help="Threshold for standard distribution, specified as percentile. Default = 0.95." />
|
|
38 </inputs>
|
|
39 <outputs>
|
|
40 <data format="pdf" name="plot" label="${tool.name} on ${on_string}: Plot" />
|
|
41 <data format="tabular" name="out1" label="${tool.name} on ${on_string}: SEDtoMean" />
|
|
42 <data format="tabular" name="out2" label="${tool.name} on ${on_string}: SEDpairwise" />
|
|
43 </outputs>
|
|
44 <tests>
|
|
45 <test>
|
|
46 <param name="input" value="ST000006_data.tsv"/>
|
|
47 <param name="design" value="ST000006_design.tsv"/>
|
|
48 <param name="uniqID" value="Retention_Index" />
|
|
49 <param name="group" value="White_wine_type_and_source" />
|
|
50 <output name="plot" value="ST000006_standardized_euclidean_distance_figure.pdf" compare="sim_size" delta="50000" />
|
|
51 <output name="out1" file="ST000006_standardized_euclidean_distance_to_mean.tsv" />
|
|
52 <output name="out2" file="ST000006_standardized_euclidean_distance_pairwise.tsv" />
|
|
53 </test>
|
|
54 </tests>
|
|
55 <help><![CDATA[
|
|
56
|
|
57 @TIP_AND_WARNING@
|
|
58
|
|
59 **Tool Description**
|
|
60
|
|
61 The tool is designed to identify samples that are different using the standardized Euclidian distance (SED) between samples.
|
|
62 The tool estimates the variance of features and calculates the SED between each pair of samples in addition to the SED between each sample and the estimated mean.
|
|
63 If a group or treatment variable is provided, then the same distance plots are generated for each group and for all samples together.
|
|
64
|
|
65 **NOTE:** Groups with less than three samples will be excluded from the analysis.
|
|
66
|
|
67
|
|
68
|
|
69 **Input**
|
|
70
|
|
71 - Two input datasets are required.
|
|
72
|
|
73 @WIDE@
|
|
74
|
|
75 **NOTE:** The sample IDs must match the sample IDs in the Design File
|
|
76 (below). Extra columns will automatically be ignored.
|
|
77
|
|
78 @METADATA@
|
|
79
|
|
80 @UNIQID@
|
|
81
|
|
82 @GROUP_OPTIONAL@
|
|
83
|
|
84 - **Warning:** All groups must contain 3 or more samples.
|
|
85
|
|
86
|
|
87 @RUNORDER_OPTIONAL@
|
|
88
|
|
89 **Additional groups to separate by [Optional]**
|
|
90
|
|
91 - Enter group(s) name(s). Spelling and capitalization must be exact. If more than one group, separate with commas.
|
|
92 - **Warning:** All groups must contain 3 or more samples.
|
|
93 - **NOTE:** Groups with one element will be excluded from the analysis.
|
|
94
|
|
95
|
|
96 **Percentile cutoff**
|
|
97
|
|
98 - The percentile cutoff for standard distributions. The default is 0.95.
|
|
99
|
|
100 --------------------------------------------------------------------------------
|
|
101
|
|
102 **Output**
|
|
103
|
|
104 The tool outputs three different files:
|
|
105
|
|
106 (1) a TSV file that contains a n x n matrix (where n is the number computed samples) of the pairwise distances between the samples.
|
|
107 If the Group/Treatment [Optional] variable is specified, the distances will be computed within groups.
|
|
108
|
|
109 (2) A PDF file containing:
|
|
110 (i) Boxplots of the distribution of distances. The distances are computed between samples in the group and summarized as boxplots.
|
|
111 The outliers (blue dots), means (red squares) and median (blue bars) of the distances are presented for each sample within the group.
|
|
112 (ii) 2D scatter plots that show distances computed pairwise within the group
|
|
113
|
|
114 ]]></help>
|
|
115 <expand macro="citations"/>
|
|
116 </tool>
|