annotate secimtools/linear_discriminant_analysis.xml @ 0:b54326490b4d draft

Upload 21.3.4.2 release
author malex
date Mon, 08 Mar 2021 20:55:03 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
1 <tool id="secimtools_linear_discriminant_analysis" name="Linear Discriminant Analysis (LDA)" version="@WRAPPER_VERSION@">
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
2 <description>.</description>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
3 <macros>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
4 <import>macros.xml</import>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
5 </macros>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
6 <expand macro="requirements" />
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
7 <command detect_errors="exit_code"><![CDATA[
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
8 linear_discriminant_analysis.py
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
9 --input $input
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
10 --design $design
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
11 --ID $uniqID
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
12 --group $group
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
13 --cross_validation $cross_validation
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
14 --outClassification $outClassification
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
15 --outClassificationAccuracy $outClassificationAccuracy
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
16 --nComponents $nComponents
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
17 --out $out
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
18 --figure $figure
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
19 ]]></command>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
20 <inputs>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
21 <param name="input" type="data" format="tabular" label="Wide Dataset" help="Input your tab-separated wide format dataset. If file is not tab separated see TIP below."/>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
22 <param name="design" type="data" format="tabular" label="Design File" help="Input your design file (tab-separated). Note you need a 'sampleID' column. If not tab separated see TIP below."/>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
23 <param name="uniqID" type="text" size="30" value="" label="Unique Feature ID" help="Name of the column in your wide dataset that has unique identifiers.."/>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
24 <param name="group" type="text" size="30" value="" label="Group/Treatment" help="Name of the column in your design file that contains group classifications."/>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
25 <param name="cross_validation" type="select" size="30" display="radio" value="double" label="Cross-Validation Choice - NOTE: a minimum of 100 samples is required for single or nested cross validation">
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
26 <option value="none">None</option>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
27 <option value="single">Single</option>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
28 <option value="double">Double</option>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
29 </param>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
30 <param name="nComponents" type="text" size="30" value="2" label="Number of Components" help="Enter the number of components to use in the analysis. This value should be less than the number of groups and is used only when the cross-validation options field is set to 'none'."/>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
31 </inputs>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
32 <outputs>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
33 <data format="tabular" name="out" label="${tool.name} on ${on_string}: Components"/>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
34 <data format="tabular" name="outClassification" label="${tool.name} on ${on_string}: Classification of Samples"/>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
35 <data format='tabular' name="outClassificationAccuracy" label="${tool.name} on ${on_string}: Classification Accuracy of Samples"/>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
36 <data format="pdf" name="figure" label="${tool.name} on ${on_string}: Scatter Plots"/>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
37 </outputs>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
38 <tests>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
39 <test>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
40 <param name="input" value="ST000006_data.tsv"/>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
41 <param name="design" value="ST000006_design.tsv"/>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
42 <param name="uniqID" value="Retention_Index" />
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
43 <param name="group" value="White_wine_type_and_source" />
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
44 <param name="cross_validation" value="none"/>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
45 <param name="nComponents" value="2"/>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
46 <output name="out" file="ST000006_linear_discriminant_analysis_none_scores.tsv" />
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
47 <output name="outClassification" file="ST000006_linear_discriminant_analysis_none_classification.tsv" />
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
48 <output name="outClassificationAccuracy" file="ST000006_linear_discriminant_analysis_none_classification_accuracy.tsv" />
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
49 <output name="figure" file="ST000006_linear_discriminant_analysis_none_figure.pdf" compare="sim_size" delta="10000"/>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
50 </test>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
51 </tests>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
52 <help><![CDATA[
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
53
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
54 @TIP_AND_WARNING@
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
55
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
56 **Tool Description**
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
57
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
58
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
59 The tool performs linear discriminant analysis (LDA) on the data.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
60
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
61 ***NOTE: A minimum of 100 samples is required by the tool for single or double cross validation***
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
62
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
63 LDA is a supervised method based on the projection of data in the linear subspace to achieve maximum separation between samples in different groups and minimum separation between samples within groups. The subspace dimension defines the number of components used to describe the variability within the data.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
64 Due to the LDA method specification, the subspace dimension must be less than the number of treatment groups. The user has an option to specify the dimension of the subspace directly (default = 2) or to perform single or double cross-validation to determine the dimension of the subspace. For single and double cross-validation, the dataset is split when model fit is performed. For double cross-validation, the data set is split into pieces and the model fit is performed on one piece using cross-validation and evaluated on the other pieces. For single cross-validation, the data are used to both fit and evaluate the model using a three-fold cross validation.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
65
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
66 Visual summaries are provided in the form of a 2D plot where samples are colored by group and plotted along the determined subspace components pairwise.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
67
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
68 More details about the method are available via:
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
69
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
70 Trevor J.. Hastie, Tibshirani, R. J., and Friedman, J. H. (2011). The elements of statistical learning: data mining, inference, and prediction. Springer. p106-119
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
71
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
72
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
73 --------------------------------------------------------------------------------
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
74
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
75 **Note**
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
76
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
77 - This tool currently treats all variables as continuous numeric variables. Running the tool on categorical variables may result in incorrect results.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
78 - Rows containing non-numeric (or missing) data in any of the chosen columns will be skipped from the analysis.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
79
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
80 --------------------------------------------------------------------------------
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
81
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
82 **Input**
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
83
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
84 - Two input datasets are required.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
85
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
86
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
87 @WIDE@
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
88
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
89 **NOTE:** The sample IDs must match the sample IDs in the Design File
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
90 (below). Extra columns will automatically be ignored.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
91
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
92 @METADATA@
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
93
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
94 @UNIQID@
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
95
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
96 @GROUP@
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
97
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
98 **Cross-Validation Choice**
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
99
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
100 - The choice of cross-validation options available for the user. None corresponds to no cross-validation where the user specifies the number of components manually. ***The tool requires a minimum of 100 samples***.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
101
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
102
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
103 **Number of Components**
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
104
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
105 - This parameter is used only when the "None" cross-validation option is selected. If the field is left blank, the number of components is set to the default value (2).
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
106
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
107
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
108 --------------------------------------------------------------------------------
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
109
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
110 **Output**
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
111
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
112 This tool outputs:
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
113
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
114 (1) TSV file containing the components produced by the model for each sample.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
115 Component_{i}: contains the score values for each sample. The number of levels {i} is specified in the Number of components text box or determined via cross validation.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
116
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
117 (2) TSV file containing the sample classifications produced by the model.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
118 Group_Observed: Initial group labels.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
119 Group_Predicted: Predicted group labels.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
120
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
121 (3) TSV file containing the classification accuracy (in percent) of the algorithm with respect to the number of correctly classified samples.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
122
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
123 (4) A PDF file containing 2D plots for all pairwise comparisons of components. Colored by treatment group.
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
124
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
125
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
126 ]]></help>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
127 <expand macro="citations"/>
b54326490b4d Upload 21.3.4.2 release
malex
parents:
diff changeset
128 </tool>