annotate run_lefse.xml @ 0:e7cd19afda2e draft

Lefse
author george-weingart
date Tue, 13 May 2014 21:57:00 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
george-weingart
parents:
diff changeset
1 <tool id="LEfSe_run" name="B) LDA Effect Size (LEfSe)" version="1.0">
george-weingart
parents:
diff changeset
2 <description></description>
george-weingart
parents:
diff changeset
3 <!-- <command interpreter="python">./run_lefse.py $inp_data $output -l $lda_th -a $kw_alpha -w $w_alpha -e $w_pol -s $mtc -y $multiclass </command> -->
george-weingart
parents:
diff changeset
4 <command interpreter="python">run_lefse.py $inp_data $output -l $lda_th -a $kw_alpha -w $w_alpha -e $w_pol -y $multiclass -f 0.9</command>
george-weingart
parents:
diff changeset
5 <inputs>
george-weingart
parents:
diff changeset
6 <page>
george-weingart
parents:
diff changeset
7 <param format="lefse" name="inp_data" type="data" label="Select data" help=""/>
george-weingart
parents:
diff changeset
8 <param name="kw_alpha" type="float" size="2" value="0.05" label="Alpha value for the factorial Kruskal-Wallis test among classes"/>
george-weingart
parents:
diff changeset
9 <param name="w_alpha" type="float" size="2" value="0.05" label="Alpha value for the pairwise Wilcoxon test between subclasses"/>
george-weingart
parents:
diff changeset
10 <param name="lda_th" type="float" size="2" value="2.0" label="Threshold on the logarithmic LDA score for discriminative features"/>
george-weingart
parents:
diff changeset
11 <param name="w_pol" type="select" label="Do you want the pairwise comparisons among subclasses to be performed only among the subclasses with the same name?" help="">
george-weingart
parents:
diff changeset
12 <option value="0" selected="0">No</option>
george-weingart
parents:
diff changeset
13 <option value="1">Yes</option>
george-weingart
parents:
diff changeset
14 </param>
george-weingart
parents:
diff changeset
15 <!-- <param name="mtc" type="select" label="Set the multiple testing correction (no correction recommended) (to check the parameter passing here)" help="">
george-weingart
parents:
diff changeset
16 <option value="0" selected="0">No correction</option>
george-weingart
parents:
diff changeset
17 <option value="1">Correction for independent comparisons</option>
george-weingart
parents:
diff changeset
18 <option value="2">Correction for dependent comparisons</option>
george-weingart
parents:
diff changeset
19 </param> -->
george-weingart
parents:
diff changeset
20 <param name="multiclass" type="select" label="Set the strategy for multi-class analysis" help="">
george-weingart
parents:
diff changeset
21 <option value="1" selected="True">All-against-all (more strict)</option>
george-weingart
parents:
diff changeset
22 <option value="0">One-against-all (less strict)</option>
george-weingart
parents:
diff changeset
23 </param>
george-weingart
parents:
diff changeset
24 </page>
george-weingart
parents:
diff changeset
25 </inputs>
george-weingart
parents:
diff changeset
26 <outputs>
george-weingart
parents:
diff changeset
27 <data format="lefse_res" name="output" />
george-weingart
parents:
diff changeset
28 </outputs>
george-weingart
parents:
diff changeset
29 <tests>
george-weingart
parents:
diff changeset
30 <test>
george-weingart
parents:
diff changeset
31 <param name="input1" value="13.bed" dbkey="hg18" ftype="bed"/>
george-weingart
parents:
diff changeset
32 <param name="maf_source" value="cached"/>
george-weingart
parents:
diff changeset
33 <param name="maf_identifier" value="17_WAY_MULTIZ_hg18"/>
george-weingart
parents:
diff changeset
34 <param name="species" value="hg18,mm8"/>
george-weingart
parents:
diff changeset
35 <param name="overwrite_with_gaps" value="True"/>
george-weingart
parents:
diff changeset
36 <output name="out_file1" file="interval_maf_to_merged_fasta_out3.fasta" />
george-weingart
parents:
diff changeset
37 </test>
george-weingart
parents:
diff changeset
38 <test>
george-weingart
parents:
diff changeset
39 <param name="input1" value="1.bed" dbkey="hg17" ftype="bed"/>
george-weingart
parents:
diff changeset
40 <param name="maf_source" value="cached"/>
george-weingart
parents:
diff changeset
41 <param name="maf_identifier" value="8_WAY_MULTIZ_hg17"/>
george-weingart
parents:
diff changeset
42 <param name="species" value="canFam1,hg17,mm5,panTro1,rn3"/>
george-weingart
parents:
diff changeset
43 <param name="overwrite_with_gaps" value="True"/>
george-weingart
parents:
diff changeset
44 <output name="out_file1" file="interval_maf_to_merged_fasta_out.dat" />
george-weingart
parents:
diff changeset
45 </test>
george-weingart
parents:
diff changeset
46 <test>
george-weingart
parents:
diff changeset
47 <param name="input1" value="1.bed" dbkey="hg17" ftype="bed"/>
george-weingart
parents:
diff changeset
48 <param name="maf_source" value="user"/>
george-weingart
parents:
diff changeset
49 <param name="maf_file" value="5.maf"/>
george-weingart
parents:
diff changeset
50 <param name="species" value="canFam1,hg17,mm5,panTro1,rn3"/>
george-weingart
parents:
diff changeset
51 <param name="overwrite_with_gaps" value="True"/>
george-weingart
parents:
diff changeset
52 <output name="out_file1" file="interval_maf_to_merged_fasta_user_out.dat" />
george-weingart
parents:
diff changeset
53 </test>
george-weingart
parents:
diff changeset
54 </tests>
george-weingart
parents:
diff changeset
55 <help>
george-weingart
parents:
diff changeset
56 **What it does**
george-weingart
parents:
diff changeset
57
george-weingart
parents:
diff changeset
58 Lda Effective Size (LEfSe) is a biomarker discovery and explanation tool for high-dimensional data. It couples statistical significance with biological consistency and effect size estimation. For an overview of LEfSe please refer to the "Introduction" module or to `(Segata et. al 2011)`_.
george-weingart
parents:
diff changeset
59
george-weingart
parents:
diff changeset
60 The scheme and the description below illustrates how the algorithm works:
george-weingart
parents:
diff changeset
61
george-weingart
parents:
diff changeset
62 .. image:: https://bytebucket.org/biobakery/galaxy_lefse/wiki/lefse_met.png
george-weingart
parents:
diff changeset
63
george-weingart
parents:
diff changeset
64 Input data consist of a collection of m samples (columns) each made up of n numerical features (rows, typically normalized per-sample, red representing high values and green low). These samples are labeled with a class (taking two or more possible values) that represents the main biological hypothesis under investigation; they may also have one or more subclass labels reflecting within-class groupings.
george-weingart
parents:
diff changeset
65
george-weingart
parents:
diff changeset
66 - Step 1: the Kruskall-Wallis test analyzes all features, testing whether the values in different classes are differentially distributed. Features violating the null hypothesis are further analyzed in Step 2.
george-weingart
parents:
diff changeset
67
george-weingart
parents:
diff changeset
68 - Step 2: the pairwise Wilcoxon test checks whether all pairwise comparisons between subclasses within different classes significantly agree with the class level trend.
george-weingart
parents:
diff changeset
69
george-weingart
parents:
diff changeset
70 - Step 3: the resulting subset of vectors is used to build a Linear Discriminant Analysis model from which the relative difference among classes is used to rank the features. The final output thus consists of a list of features that are discriminative with respect to the classes, consistent with the subclass grouping within classes, and ranked according to the effect size with which they differentiate classes.
george-weingart
parents:
diff changeset
71
george-weingart
parents:
diff changeset
72 **Input format**
george-weingart
parents:
diff changeset
73
george-weingart
parents:
diff changeset
74 The input for this module must be generated with the **"Format Input for LEfSe"** tool.
george-weingart
parents:
diff changeset
75
george-weingart
parents:
diff changeset
76 ------
george-weingart
parents:
diff changeset
77
george-weingart
parents:
diff changeset
78 **Output format**
george-weingart
parents:
diff changeset
79
george-weingart
parents:
diff changeset
80 The output consists of a tabular file listing all the features, the logarithm value of the highest mean among all the classes, and if the feature is discriminative, the class with the highest mean and the logarithmic LDA score.
george-weingart
parents:
diff changeset
81
george-weingart
parents:
diff changeset
82 The output file can be conveniently visualized with the "Plot LEfSe Results" module and, if feature names define a hierarchy, with the "Plot Cladogram" module. The output can also be used for generating the histograms of the raw data of the discriminative features using the "Plot Differential Features" module.
george-weingart
parents:
diff changeset
83
george-weingart
parents:
diff changeset
84 ------
george-weingart
parents:
diff changeset
85
george-weingart
parents:
diff changeset
86 **Parameters**
george-weingart
parents:
diff changeset
87
george-weingart
parents:
diff changeset
88 The input parameters are the alpha-values for the factorial Kruskal-Wallis test and for the pairwise Wilcoxon test among subclasses (steps 1 and 2 in the schematic picture above) and the non-negative threshold for the logarithmic LDA score. Moreover, the user can decide the pairwise Wilcoxon test to be applied only among subclasses in different classes with the same name (less stringent) and select the multi-class strategy to be the All-against-all (more stringent) or the One-against-all (less stringent).
george-weingart
parents:
diff changeset
89
george-weingart
parents:
diff changeset
90 .. _here: http://www.huttenhower.org/webfm_send/73
george-weingart
parents:
diff changeset
91 .. _(Segata et. al 2011): http://www.ncbi.nlm.nih.gov/pubmed/21702898
george-weingart
parents:
diff changeset
92 .. _(Garrett et. al 2010): http://www.ncbi.nlm.nih.gov/pubmed/20833380
george-weingart
parents:
diff changeset
93 .. _(Veiga et. al 2010): http://www.ncbi.nlm.nih.gov/pubmed/20921388
george-weingart
parents:
diff changeset
94 .. _contact us: nsegata@hsph.harvard.edu
george-weingart
parents:
diff changeset
95
george-weingart
parents:
diff changeset
96 **Example**
george-weingart
parents:
diff changeset
97
george-weingart
parents:
diff changeset
98 For the mouse model dataset (see the "Introduction" module) it is suggested to use alpha=0.01 as the sample size is not very large.
george-weingart
parents:
diff changeset
99
george-weingart
parents:
diff changeset
100 </help>
george-weingart
parents:
diff changeset
101 </tool>