annotate secimtools/mahalanobis_distance.xml @ 0:b54326490b4d draft

Upload 21.3.4.2 release
author malex
date Mon, 08 Mar 2021 20:55:03 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
1 <tool id="secimtools_mahalanobis_distance" name="Penalized Mahalanobis Distance (PMD)" version="@WRAPPER_VERSION@">
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
2 <description>to compare groups</description>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
3 <macros>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
4 <import>macros.xml</import>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
5 </macros>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
6 <expand macro="requirements" />
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
7 <stdio>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
8 <exit_code range="1:" level="warning" description="RuntimeWarning"/>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
9 </stdio>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
10 <command detect_errors="exit_code"><![CDATA[
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
11 mahalanobis_distance.py
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
12 --input $input
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
13 --design $design
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
14 --ID $uniqID
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
15 --figure $plot
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
16 --distanceToMean $out1
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
17 --distancePairwise $out2
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
18
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
19 #if $group
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
20 --group $group
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
21 #end if
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
22
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
23 #if $levels
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
24 --levels $levels
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
25 #end if
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
26
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
27 #if $p
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
28 --per $p
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
29 #end if
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
30
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
31 #if $order
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
32 --order $order
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
33 #end if
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
34
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
35 #if $penalty
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
36 --penalty $penalty
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
37 #end if
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
38 ]]></command>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
39 <inputs>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
40 <param name="input" type="data" format="tabular" label="Wide Dataset" help="Input your tab-separated wide format dataset. If file not tab separated see TIP below."/>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
41 <param name="design" type="data" format="tabular" label="Design File" help="Input your design file (tab-separated). Note you need a 'sampleID' column. If not tab separated see TIP below."/>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
42 <param name="uniqID" type="text" size="30" value="" label="Unique Feature ID" help="Name of the column in your wide dataset that has unique identifiers.."/>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
43 <param name="group" type="text" size="30" label="Group/Treatment [Optional]" help="Name of the column in your design file that contains group classifications." />
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
44 <param name="order" type="text" size="30" label="Input Run Order Name [Optional]" help="Enter the name of the column containing the order samples were run. Spelling and capitalization must be exact." />
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
45 <param name="levels" type="text" size="30" label="Additional groups to separate by [Optional]" help="Enter additional group(s) name(s) to include. Spelling and capitalization must be exact. If more than one group separate with ','." />
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
46 <param name="p" type="float" value= ".95" size="6" label="Threshold" help="Threshold for standard distribution, specified as a percentile. Default = 0.95." />
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
47 <param name="penalty" type="float" value= "0.5" size="6" label="λ Penalty" help="λ Penalty to use in the distance. The default is λ=0.5." />
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
48 </inputs>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
49 <outputs>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
50 <data format="pdf" name="plot" label="${tool.name} on ${on_string}: plot" />
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
51 <data format="tabular" name="out1" label="${tool.name} on ${on_string}: toMean" />
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
52 <data format="tabular" name="out2" label="${tool.name} on ${on_string}: pairwise" />
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
53 </outputs>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
54 <tests>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
55 <test>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
56 <param name="input" value="ST000006_data.tsv"/>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
57 <param name="design" value="ST000006_design.tsv"/>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
58 <param name="uniqID" value="Retention_Index" />
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
59 <param name="group" value="White_wine_type_and_source" />
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
60 <param name="penalty" value="0.5" />
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
61 <output name="plot" file="ST000006_mahalanobis_distance_figure.pdf" compare="sim_size" delta="10000" />
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
62 <output name="out1" file="ST000006_mahalanobis_distance_to_mean.tsv" />
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
63 <output name="out2" file="ST000006_mahalanobis_distance_pairwise.tsv" />
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
64 </test>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
65 </tests>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
66 <help><![CDATA[
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
67
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
68 @TIP_AND_WARNING@
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
69
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
70 **Tool Description**
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
71
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
72 The Penalized Mahalanobis distance (PMD) tool can be used to compare samples within a group and accounts for the correlation structure between metabolites.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
73 In contrast, Standardized Euclidian distance (SED) relies solely on geometric distance and ignores any dependency structures between features.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
74 PMD incorporates the correlation structure inside the distance measurement.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
75
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
76 When correlation structure and dependency between metabolites is ignored, the features inverse variance-covariance matrix simplifies to a diagonal matrix with diagonal values - in this case, MD simplifies to SED.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
77 When the number of features is greater than the number of samples, the inverse of the features variance-covariance matrix does not exist.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
78 This is the case for most -omic data. Here, the inverse is estimated using a regularization method (Archambeau et al. 2004).
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
79 The details of the regularization algorithm can be found in Supplementary file 3 in Kirpich et al. 2017.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
80
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
81 Archambeau C, Vrins F, Verleysen M. Flexible and Robust Bayesian Classification by Finite Mixture Models. InESANN 2004 (pp. 75-80).​
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
82
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
83 **NOTE:** Because of the nature of the tool, groups with less than 3 samples will be discarded from the analysis.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
84
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
85
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
86 **Input**
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
87
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
88 - Two input datasets are required.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
89
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
90 @WIDE@
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
91
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
92 **NOTE:** The sample IDs must match the sample IDs in the Design File
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
93 (below). Extra columns will automatically be ignored.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
94
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
95 @METADATA@
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
96
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
97 @UNIQID@
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
98
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
99 @GROUP_OPTIONAL@
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
100
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
101 - **Warning:** All groups must contain 3 or more samples.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
102
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
103
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
104 @RUNORDER_OPTIONAL@
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
105
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
106 **Additional groups to separate by [Optional]**
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
107
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
108 - Enter additional group(s) name(s) to include. Spelling and capitalization must be exact. If more than one group, separate them with a comma
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
109 - **Warning:** All groups must contain 3 or more samples.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
110
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
111
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
112 **Percentile cutoff**
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
113
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
114 - The percentile cutoff for standard distributions. The default is 0.95.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
115
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
116 **λ Penalty**
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
117
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
118 - λ Penalty to use in the distance. The default is λ=0.5.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
119
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
120 --------------------------------------------------------------------------------
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
121
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
122 **Output**
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
123
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
124 The tool outputs three different files:
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
125
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
126 (1) a PDF file containing 2D scatter plots and boxplots for the distances
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
127
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
128 (2) a TSV file containing distances from the sample to the estimated mean
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
129
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
130 (3) a TSV file containing distances from the sample to other samples.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
131
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
132 If the grouping variable is specified by the user, the distances are computed both within the groups and for the entire dataset.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
133
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
134 ]]></help>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
135 <expand macro="citations"/>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
136 </tool>