comparison mahalanobis_distance.xml @ 1:2e7d47c0b027 draft

"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
author malex
date Mon, 08 Mar 2021 22:04:06 +0000
parents
children
comparison
equal deleted inserted replaced
0:b54326490b4d 1:2e7d47c0b027
1 <tool id="secimtools_mahalanobis_distance" name="Penalized Mahalanobis Distance (PMD)" version="@WRAPPER_VERSION@">
2 <description>to compare groups</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <stdio>
8 <exit_code range="1:" level="warning" description="RuntimeWarning"/>
9 </stdio>
10 <command detect_errors="exit_code"><![CDATA[
11 mahalanobis_distance.py
12 --input $input
13 --design $design
14 --ID $uniqID
15 --figure $plot
16 --distanceToMean $out1
17 --distancePairwise $out2
18
19 #if $group
20 --group $group
21 #end if
22
23 #if $levels
24 --levels $levels
25 #end if
26
27 #if $p
28 --per $p
29 #end if
30
31 #if $order
32 --order $order
33 #end if
34
35 #if $penalty
36 --penalty $penalty
37 #end if
38 ]]></command>
39 <inputs>
40 <param name="input" type="data" format="tabular" label="Wide Dataset" help="Input your tab-separated wide format dataset. If file not tab separated see TIP below."/>
41 <param name="design" type="data" format="tabular" label="Design File" help="Input your design file (tab-separated). Note you need a 'sampleID' column. If not tab separated see TIP below."/>
42 <param name="uniqID" type="text" size="30" value="" label="Unique Feature ID" help="Name of the column in your wide dataset that has unique identifiers.."/>
43 <param name="group" type="text" size="30" label="Group/Treatment [Optional]" help="Name of the column in your design file that contains group classifications." />
44 <param name="order" type="text" size="30" label="Input Run Order Name [Optional]" help="Enter the name of the column containing the order samples were run. Spelling and capitalization must be exact." />
45 <param name="levels" type="text" size="30" label="Additional groups to separate by [Optional]" help="Enter additional group(s) name(s) to include. Spelling and capitalization must be exact. If more than one group separate with ','." />
46 <param name="p" type="float" value= ".95" size="6" label="Threshold" help="Threshold for standard distribution, specified as a percentile. Default = 0.95." />
47 <param name="penalty" type="float" value= "0.5" size="6" label="λ Penalty" help="λ Penalty to use in the distance. The default is λ=0.5." />
48 </inputs>
49 <outputs>
50 <data format="pdf" name="plot" label="${tool.name} on ${on_string}: plot" />
51 <data format="tabular" name="out1" label="${tool.name} on ${on_string}: toMean" />
52 <data format="tabular" name="out2" label="${tool.name} on ${on_string}: pairwise" />
53 </outputs>
54 <tests>
55 <test>
56 <param name="input" value="ST000006_data.tsv"/>
57 <param name="design" value="ST000006_design.tsv"/>
58 <param name="uniqID" value="Retention_Index" />
59 <param name="group" value="White_wine_type_and_source" />
60 <param name="penalty" value="0.5" />
61 <output name="plot" file="ST000006_mahalanobis_distance_figure.pdf" compare="sim_size" delta="10000" />
62 <output name="out1" file="ST000006_mahalanobis_distance_to_mean.tsv" />
63 <output name="out2" file="ST000006_mahalanobis_distance_pairwise.tsv" />
64 </test>
65 </tests>
66 <help><![CDATA[
67
68 @TIP_AND_WARNING@
69
70 **Tool Description**
71
72 The Penalized Mahalanobis distance (PMD) tool can be used to compare samples within a group and accounts for the correlation structure between metabolites.
73 In contrast, Standardized Euclidian distance (SED) relies solely on geometric distance and ignores any dependency structures between features.
74 PMD incorporates the correlation structure inside the distance measurement.
75
76 When correlation structure and dependency between metabolites is ignored, the features inverse variance-covariance matrix simplifies to a diagonal matrix with diagonal values - in this case, MD simplifies to SED.
77 When the number of features is greater than the number of samples, the inverse of the features variance-covariance matrix does not exist.
78 This is the case for most -omic data. Here, the inverse is estimated using a regularization method (Archambeau et al. 2004).
79 The details of the regularization algorithm can be found in Supplementary file 3 in Kirpich et al. 2017.
80
81 Archambeau C, Vrins F, Verleysen M. Flexible and Robust Bayesian Classification by Finite Mixture Models. InESANN 2004 (pp. 75-80).​
82
83 **NOTE:** Because of the nature of the tool, groups with less than 3 samples will be discarded from the analysis.
84
85
86 **Input**
87
88 - Two input datasets are required.
89
90 @WIDE@
91
92 **NOTE:** The sample IDs must match the sample IDs in the Design File
93 (below). Extra columns will automatically be ignored.
94
95 @METADATA@
96
97 @UNIQID@
98
99 @GROUP_OPTIONAL@
100
101 - **Warning:** All groups must contain 3 or more samples.
102
103
104 @RUNORDER_OPTIONAL@
105
106 **Additional groups to separate by [Optional]**
107
108 - Enter additional group(s) name(s) to include. Spelling and capitalization must be exact. If more than one group, separate them with a comma
109 - **Warning:** All groups must contain 3 or more samples.
110
111
112 **Percentile cutoff**
113
114 - The percentile cutoff for standard distributions. The default is 0.95.
115
116 **λ Penalty**
117
118 - λ Penalty to use in the distance. The default is λ=0.5.
119
120 --------------------------------------------------------------------------------
121
122 **Output**
123
124 The tool outputs three different files:
125
126 (1) a PDF file containing 2D scatter plots and boxplots for the distances
127
128 (2) a TSV file containing distances from the sample to the estimated mean
129
130 (3) a TSV file containing distances from the sample to other samples.
131
132 If the grouping variable is specified by the user, the distances are computed both within the groups and for the entire dataset.
133
134 ]]></help>
135 <expand macro="citations"/>
136 </tool>