comparison standardized_euclidean_distance.xml @ 1:2e7d47c0b027 draft

"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
author malex
date Mon, 08 Mar 2021 22:04:06 +0000
parents
children
comparison
equal deleted inserted replaced
0:b54326490b4d 1:2e7d47c0b027
1 <tool id="secimtools_standardized_euclidean_distance" name="Standardized Euclidean Distance (SED)" version="@WRAPPER_VERSION@">
2 <description></description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <command detect_errors="exit_code"><![CDATA[
8 standardized_euclidean_distance.py
9 --input $input
10 --design $design
11 --ID $uniqID
12 --fig $plot
13 --SEDtoMean $out1
14 --SEDpairwise $out2
15
16 #if $group
17 --group $group
18 #end if
19 #if $levels
20 --levels $levels
21 #end if
22 #if $p
23 --per $p
24 #end if
25
26 #if $order
27 --order $order
28 #end if
29 ]]></command>
30 <inputs>
31 <param name="input" type="data" format="tabular" label="Wide Dataset" help="Input your tab-separated wide format dataset. If file not tab separated see TIP below."/>
32 <param name="design" type="data" format="tabular" label="Design File" help="Input your design file (tab-separated). Note you need a 'sampleID' column. If not tab separated see TIP below."/>
33 <param name="uniqID" type="text" size="30" value="" label="Unique Feature ID" help="Name of the column in your wide dataset that has unique identifiers."/>
34 <param name="group" type="text" size="30" label="Group/Treatment [Optional]" help="Name of the column in your design file that contains group classifications." />
35 <param name="order" type="text" size="30" label="Input Run Order Name [Optional]" help="Enter the name of the column containing the order samples were run. Spelling and capitalization must be exact." />
36 <param name="levels" type="text" size="30" label="Additional groups to separate by [Optional]" help="Enter additional group(s) name(s). Spelling and capitalization must be exact. If more than one group separate with a ','." />
37 <param name="p" type="float" value= "0.95" size="6" label="Threshold" help="Threshold for standard distribution, specified as percentile. Default = 0.95." />
38 </inputs>
39 <outputs>
40 <data format="pdf" name="plot" label="${tool.name} on ${on_string}: Plot" />
41 <data format="tabular" name="out1" label="${tool.name} on ${on_string}: SEDtoMean" />
42 <data format="tabular" name="out2" label="${tool.name} on ${on_string}: SEDpairwise" />
43 </outputs>
44 <tests>
45 <test>
46 <param name="input" value="ST000006_data.tsv"/>
47 <param name="design" value="ST000006_design.tsv"/>
48 <param name="uniqID" value="Retention_Index" />
49 <param name="group" value="White_wine_type_and_source" />
50 <output name="plot" value="ST000006_standardized_euclidean_distance_figure.pdf" compare="sim_size" delta="50000" />
51 <output name="out1" file="ST000006_standardized_euclidean_distance_to_mean.tsv" />
52 <output name="out2" file="ST000006_standardized_euclidean_distance_pairwise.tsv" />
53 </test>
54 </tests>
55 <help><![CDATA[
56
57 @TIP_AND_WARNING@
58
59 **Tool Description**
60
61 The tool is designed to identify samples that are different using the standardized Euclidian distance (SED) between samples.
62 The tool estimates the variance of features and calculates the SED between each pair of samples in addition to the SED between each sample and the estimated mean.
63 If a group or treatment variable is provided, then the same distance plots are generated for each group and for all samples together.
64
65 **NOTE:** Groups with less than three samples will be excluded from the analysis.
66
67
68
69 **Input**
70
71 - Two input datasets are required.
72
73 @WIDE@
74
75 **NOTE:** The sample IDs must match the sample IDs in the Design File
76 (below). Extra columns will automatically be ignored.
77
78 @METADATA@
79
80 @UNIQID@
81
82 @GROUP_OPTIONAL@
83
84 - **Warning:** All groups must contain 3 or more samples.
85
86
87 @RUNORDER_OPTIONAL@
88
89 **Additional groups to separate by [Optional]**
90
91 - Enter group(s) name(s). Spelling and capitalization must be exact. If more than one group, separate with commas.
92 - **Warning:** All groups must contain 3 or more samples.
93 - **NOTE:** Groups with one element will be excluded from the analysis.
94
95
96 **Percentile cutoff**
97
98 - The percentile cutoff for standard distributions. The default is 0.95.
99
100 --------------------------------------------------------------------------------
101
102 **Output**
103
104 The tool outputs three different files:
105
106 (1) a TSV file that contains a n x n matrix (where n is the number computed samples) of the pairwise distances between the samples.
107 If the Group/Treatment [Optional] variable is specified, the distances will be computed within groups.
108
109 (2) A PDF file containing:
110 (i) Boxplots of the distribution of distances. The distances are computed between samples in the group and summarized as boxplots.
111 The outliers (blue dots), means (red squares) and median (blue bars) of the distances are presented for each sample within the group.
112 (ii) 2D scatter plots that show distances computed pairwise within the group
113
114 ]]></help>
115 <expand macro="citations"/>
116 </tool>