annotate home/ubuntu/lefse_to_export/format_input.xml @ 1:db64b6287cd6 draft

Modified datatypes
author george-weingart
date Wed, 20 Aug 2014 16:56:51 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
1 <tool id="LEfSe_for" name="A) Format Data for LEfSe" version="1.0">
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
2 <code file="format_input_selector.py"/>
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
3 <description></description>
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
4 <!-- <command interpreter="python">./format_input.py $inp_data $formatted_input -f $feat_dir -c $cls_n -s $subcls_n -u $subj_n -o 1000000.0 </command> -->
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
5 <command interpreter="python">format_input.py $inp_data $formatted_input -f $cond.feat_dir -c $cond.cls_n -s $cond.subcls_n -u $cond.subj_n -o $norm </command>
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
6 <inputs>
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
7 <page>
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
8 <param format="tabular" name="inp_data" type="data" label="Upload a tabular file of relative abundances and class labels (possibly also subclass and subjects labels) for LEfSe - See samples below - Please use Galaxy Get-Data/Upload-File. Use File-Type = tabular" help=""/>
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
9 <param name="cond" type="data_column" data_ref="inp_data" accept_default="true" />
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
10
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
11 <conditional name="cond" type="data_column" data_ref="inp_data" accept_default="true">
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
12 <param name="feat_dir" type="select" data_ref="inp_data" label="Select whether the vectors (features and meta-data information) are listed in rows or columns" help="">
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
13 <option value="r" selected='True'>Rows</option>
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
14 <option value="c">Columns</option>
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
15 </param>
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
16
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
17 <when value="r">
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
18 <param name="cls_n" label="Select which row to use as class" size ="70" type='select' dynamic_options="get_cols(inp_data,'r','cl')" />
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
19 <param name="subcls_n" label="Select which row to use as subclass" type='select' dynamic_options="get_cols(inp_data,'r','subclass')" />
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
20 <param name="subj_n" label="Select which row to use as subject" type='select' dynamic_options="get_cols(inp_data,'r','subject')" />
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
21 </when>
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
22 <when value="c">
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
23 <param name="cls_n" label="Select which column to use as class" type='select' dynamic_options="get_cols(inp_data,'c','cl')" />
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
24 <param name="subcls_n" label="Select which column to use as subclass" type='select' dynamic_options="get_cols(inp_data,'c','subclass')" />
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
25 <param name="subj_n" label="Select which column to use as subject" type='select' dynamic_options="get_cols(inp_data,'c','subject')" />
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
26 </when>
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
27
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
28 </conditional>
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
29
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
30 <param name="norm" type="select" label="Per-sample normalization of the sum of the values to 1M (recommended when very low values are present)" help="">
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
31 <option value="1000000.0" selected='True'>Yes</option>
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
32 <option value="-1">No</option>
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
33 </param>
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
34
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
35 <!-- <param name="row" label="on row" type="data_row" data_ref="inp_data" accept_default="true" /> -->
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
36 </page>
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
37 </inputs>
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
38 <outputs>
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
39 <data format="lefse_internal_for" name="formatted_input" />
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
40 </outputs>
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
41
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
42 <tests>
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
43 <test>
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
44 <param name="inp_data" value="lefse_input" ftype="tabular" />
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
45 <param name="cond.feat_dir" value="r" />
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
46 <param name="cond.cls_n" value="1" />
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
47 <param name="cond.subcls" value="-1" />
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
48 <param name="cond.subj" value="-1" />
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
49 <param name="norm" value="1000000" />
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
50 <output name="formatted_input" file="lefse_output_a" />
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
51 </test>
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
52 </tests>
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
53
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
54
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
55
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
56
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
57 <help>
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
58
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
59
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
60 **What it does**
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
61
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
62 LDA Effect Size (LEfSe) `(Segata et. al 2010)`_ is an algorithm for high-dimensional biomarker discovery and
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
63 explanation that identifies genomic features (genes, pathways, or taxa) characterizing
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
64 the differences between two or more biological conditions (or classes, see figure below). It
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
65 emphasizes both statistical significance and biological relevance, allowing
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
66 researchers to identify differentially abundant features that are also consistent with
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
67 biologically meaningful categories (subclasses). LEfSe first robustly
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
68 identifies features that are statistically different among biological classes. It then
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
69 performs additional tests to assess whether these differences are consistent with
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
70 respect to expected biological behavior.
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
71
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
72 Specifically, we first use the non-parametric factorial
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
73 Kruskal-Wallis (KW) sum-rank test to detect features with
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
74 significant differential abundance with respect to the class of interest; biological
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
75 significance is subsequently investigated using a set of pairwise tests among
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
76 subclasses using the (unpaired) Wilcoxon rank-sum test. As a last step, LEfSe uses
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
77 Linear Discriminant Analysis to estimate the effect size of each differentially
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
78 abundant feature and, if desired by the investigator, to perform dimension reduction.
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
79
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
80 LEfSe consists of six modules performing the following steps (see the figure below).
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
81
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
82 The first step consists of **uploading your file** by using Galaxy's "Get-Data / Upload-file"
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
83
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
84
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
85 The next steps are:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
86
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
87 + **A) Format Data for LEfSe**: selects the structure of the problem (classes, subclasses, subjects) and formats the tabular abundance data for the B module
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
88 + **B) LDA Effect Size (LEfSe)**: performs the analysis using the data formatted with module A and provides input for the visualization modules (C, D, E, F)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
89 + **C) Plot LEfSe Results**: graphically reports the discovered biomarkes (output of B) with their effect sizes
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
90 + **D) Plot Cladogram**: graphically represents the discovered biomarkers (output of B) in a taxonomic tree specified by the hierarchical feature names (not available for non-hierarchical features)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
91 + **E) Plot One Feature**: plots the row values of a feature (biomarker or not) as an abundance histogram with classes and subclasses structure (only one feature at the time)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
92 + **F) Plot Differential Features**: plots the row values of all features (biomarkers or not) as abundance histograms with classes and subclasses structure and provides a zip archive of the figures
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
93
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
94 .. image:: https://bytebucket.org/biobakery/galaxy_lefse/wiki/lefse_ove.png
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
95
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
96
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
97 ------
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
98
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
99
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
100 **Input file format**
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
101
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
102 The text tab-delimited input file consists of a list of numerical features, the class vector and optionally the subclass and subject vectors. The features can be read counts directly or abundance floating-point values more generally, and the first field is the name of the feature. Class, subclass and subject vectors have a name (the first field) and a list of non-numerical strings.
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
103
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
104 Although both column and row feature organization is accepted, given the high-dimensional nature of metagenomic data, the listing of the features in rows is preferred. A partial example of an input file follows (all values are separated by single-tab)::
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
105
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
106 bodysite mucosal mucosal mucosal mucosal mucosal non_mucosal non_mucosal non_mucosal non_mucosal non_mucosal
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
107 subsite oral gut oral oral gut skin nasal skin ear nasal
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
108 id 1023 1023 1672 1876 1672 159005010 1023 1023 1023 1672
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
109 Bacteria 0.99999 0.99999 0.999993 0.999989 0.999997 0.999927 0.999977 0.999987 0.999997 0.999993
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
110 Bacteria|Actinobacteria 0.311037 0.000864363 0.00446132 0.0312045 0.000773642 0.359354 0.761108 0.603002 0.95913 0.753688
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
111 Bacteria|Bacteroidetes 0.0689602 0.804293 0.00983343 0.0303561 0.859838 0.0195298 0.0212741 0.145729 0.0115617 0.0114511
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
112 Bacteria|Firmicutes 0.494223 0.173411 0.715345 0.813046 0.124552 0.177961 0.189178 0.188964 0.0226835 0.192665
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
113 Bacteria|Proteobacteria 0.0914284 0.0180378 0.265664 0.109549 0.00941215 0.430869 0.0225884 0.0532684 0.00512034 0.0365453
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
114 Bacteria|Firmicutes|Clostridia 0.090041 0.170246 0.00483188 0.0465328 0.122702 0.0402301 0.0460614 0.135201 0.0115835 0.0537381
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
115
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
116 In this case one may want to use bodysite as class, subsite as subclass and id as subject. Notice that the features have a hierarchical structure specified using the character \|.
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
117
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
118
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
119 **Input file sample**
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
120
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
121 You can try the LEfSe modules using the dataset available here_. You can upload the dataset using Galaxy's **Get-Data / Upload File**
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
122
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
123 This is a 16S dataset from `(Garrett et. al 2010)`_ and `(Veiga et. al 2010)`_ for studying the characteristics of the fecal microbiota in a mouse model of spontaneous colitis. The dataset contains 30 abundance profiles (obtained processing the 16S reads with RDP) belonging to 10 rag2 (control) and 20 truc (case) mice. The metadata consists in class information only, as we don't have subject or subclass information. The same dataset is used to show the graphical results in the module descriptions.
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
124
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
125
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
126
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
127 ------
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
128
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
129 STEP A:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
130 -------
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
131
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
132
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
133 **What STEP A does**
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
134
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
135 Preprocessing module for the biomarker discovery tool called LEfSe:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
136
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
137 This module of LEfSe preprocesses metagenomic abundance data for the analyses to be carried out with the "Run LEfSe" module. This module is separated from the "Run LEfSe" because one may want to preprocess the data only once but run multiple analyses.
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
138
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
139 For an overview of LEfSe please refer to the "Introduction" module or to `(Segata et. al 2011)`_.
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
140
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
141 ------
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
142
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
143 **Input format**
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
144
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
145 The module accepts tabular data with the feature list in rows or columns.
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
146
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
147 ------
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
148
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
149 **Output format**
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
150
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
151 The module generates data readable by the "Run LEfSe" module only.
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
152
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
153 ------
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
154
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
155 **Parameters**
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
156
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
157 The class vector represents the labels of the main condition under investigation. The (optional) subclass vector denotes the internal groupings with biological meaning inside each class (subclasses of different classes with the same name are processed as different subclasses). The subject vector (optional) reports a third dimension denoting meta-data (subject id, sample type, ... ) which is independent from the class and subclass definition.
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
158
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
159 The labels can have a hierarchical organization (see example below) reflecting taxonomies (like NCBI or RDB microbial taxonomy, SEED subsystems or GO terms). The taxonomic levels are specified using the character \|.
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
160
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
161 The per-sample normalization is usually applied for metagenomic data in which the relative abundances are taken into account.
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
162
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
163 ------
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
164
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
165 **Example**
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
166
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
167 Although both column and row feature organization is accepted, given the high-dimensional nature of metagenomic data, the listing of the features in rows is preferred. A partial example of an input file follows (all values are separated by single-tab)::
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
168
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
169 bodysite mucosal mucosal mucosal mucosal mucosal non_mucosal non_mucosal non_mucosal non_mucosal non_mucosal
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
170 subsite oral gut oral oral gut skin nasal skin ear nasal
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
171 id 1023 1023 1672 1876 1672 159005010 1023 1023 1023 1672
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
172 Bacteria 0.99999 0.99999 0.999993 0.999989 0.999997 0.999927 0.999977 0.999987 0.999997 0.999993
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
173 Bacteria|Actinobacteria 0.311037 0.000864363 0.00446132 0.0312045 0.000773642 0.359354 0.761108 0.603002 0.95913 0.753688
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
174 Bacteria|Bacteroidetes 0.0689602 0.804293 0.00983343 0.0303561 0.859838 0.0195298 0.0212741 0.145729 0.0115617 0.0114511
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
175 Bacteria|Firmicutes 0.494223 0.173411 0.715345 0.813046 0.124552 0.177961 0.189178 0.188964 0.0226835 0.192665
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
176 Bacteria|Proteobacteria 0.0914284 0.0180378 0.265664 0.109549 0.00941215 0.430869 0.0225884 0.0532684 0.00512034 0.0365453
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
177 Bacteria|Firmicutes|Clostridia 0.090041 0.170246 0.00483188 0.0465328 0.122702 0.0402301 0.0460614 0.135201 0.0115835 0.0537381
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
178
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
179 In this case one may want to use bodysite as class, subsite as subclass and id as subject. Notice that the features have a hierarchical structure specified using the character \|.
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
180
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
181 **Example with the "mouse model dataset"**
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
182
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
183 You can try the LEfSe modules using the dataset available here_. This is a 16S dataset from `(Garrett et. al 2010)`_ and `(Veiga et. al 2010)`_ for studying the characteristics of the fecal microbiota in a mouse model of spontaneous colitis. The dataset contains 30 abundance profiles (obtained processing the 16S reads with RDP) belonging to 10 rag2 (control) and 20 truc (case) mice. The metadata consists of class information only, as we don't have subject or subclass information. The dataset contains the features organized in rows; you need to select the first row as class, whereas you have to select "no subclass" and "no subject" options.
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
184
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
185
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
186 .. _here: http://www.huttenhower.org/webfm_send/73
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
187 .. _(Segata et. al 2011): http://www.ncbi.nlm.nih.gov/pubmed/21702898
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
188 .. _(Garrett et. al 2010): http://www.ncbi.nlm.nih.gov/pubmed/20833380
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
189 .. _(Veiga et. al 2010): http://www.ncbi.nlm.nih.gov/pubmed/20921388
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
190 .. _contact us: nsegata@hsph.harvard.edu
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
191
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
192
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
193
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
194
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
195 **How to Cite LEfSe**
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
196
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
197 If you find LEfSe usefull in your research please city our paper `(Segata et. al 2010)`_:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
198
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
199 | `Nicola Segata`_, Jacques Izard, Levi Walron, Dirk Gevers, Larisa Miropolsky, Wendy Garrett, `Curtis Huttenhower`_.
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
200 | "`Metagenomic Biomarker Discovery and Explanation`_"
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
201 | Genome Biology, 2011 Jun 24;12(6):R60
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
202
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
203
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
204 Please do not hesitate to `contact us`_ for any questions of comments.
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
205
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
206 .. _here: http://www.huttenhower.org/webfm_send/73
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
207 .. _(Segata et. al 2010): http://www.ncbi.nlm.nih.gov/pubmed/21702898
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
208 .. _(Garrett et. al 2010): http://www.ncbi.nlm.nih.gov/pubmed/20833380
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
209 .. _(Veiga et. al 2010): http://www.ncbi.nlm.nih.gov/pubmed/20921388
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
210 .. _contact us: nsegata@hsph.harvard.edu
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
211 .. _Nicola Segata: nsegata@hsph.harvard.edu
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
212 .. _Curtis Huttenhower: chuttenh@hsph.harvard.edu
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
213 .. _Metagenomic Biomarker Discovery and Explanation: http://genomebiology.com/2011/12/6/R60
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
214
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
215
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
216
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
217
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
218 </help>
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
219 </tool>