annotate edgeR.xml @ 1:aab4a565c0e8 draft

Uploaded
author amawla
date Tue, 04 Aug 2015 15:25:43 -0400
parents 91ca33096034
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
91ca33096034 Uploaded
amawla
parents:
diff changeset
1 <tool id="edgeR" name="edgeR" version="0.0.3">
91ca33096034 Uploaded
amawla
parents:
diff changeset
2 <description> - Estimates differential gene expression for short read sequence count using methods appropriate for count data</description>
91ca33096034 Uploaded
amawla
parents:
diff changeset
3 <requirements>
91ca33096034 Uploaded
amawla
parents:
diff changeset
4 <requirement type="R-module">edgeR</requirement>
91ca33096034 Uploaded
amawla
parents:
diff changeset
5 <requirement type="R-module">limma</requirement>
91ca33096034 Uploaded
amawla
parents:
diff changeset
6 </requirements>
91ca33096034 Uploaded
amawla
parents:
diff changeset
7 <command interpreter="perl">
91ca33096034 Uploaded
amawla
parents:
diff changeset
8 edgeR.pl -a $analysis_type.analysis -e $html_file.files_path -f BH -h $html_file -o $output
91ca33096034 Uploaded
amawla
parents:
diff changeset
9
91ca33096034 Uploaded
amawla
parents:
diff changeset
10 <!--Pairwise comparisons 1 Factor Analysis-->
91ca33096034 Uploaded
amawla
parents:
diff changeset
11 #if $analysis_type.analysis == "pw":
91ca33096034 Uploaded
amawla
parents:
diff changeset
12 -r $analysis_type.rowsumfilter
91ca33096034 Uploaded
amawla
parents:
diff changeset
13 #if $analysis_type.tagwise_disp.twd == "TRUE":
91ca33096034 Uploaded
amawla
parents:
diff changeset
14 -u movingave
91ca33096034 Uploaded
amawla
parents:
diff changeset
15 -t
91ca33096034 Uploaded
amawla
parents:
diff changeset
16 #end if
91ca33096034 Uploaded
amawla
parents:
diff changeset
17 <!--GLM Generalized Linear Models (Multiple Factors)-->
91ca33096034 Uploaded
amawla
parents:
diff changeset
18 #else if $analysis_type.analysis == "glm":
91ca33096034 Uploaded
amawla
parents:
diff changeset
19 #if $analysis_type.exp.export_norm == "true":
91ca33096034 Uploaded
amawla
parents:
diff changeset
20 -n $norm_exp
91ca33096034 Uploaded
amawla
parents:
diff changeset
21 #end if
91ca33096034 Uploaded
amawla
parents:
diff changeset
22 -d tag
91ca33096034 Uploaded
amawla
parents:
diff changeset
23 $analysis_type.cont_pw
91ca33096034 Uploaded
amawla
parents:
diff changeset
24 #for $cnt in $analysis_type.contrasts:
91ca33096034 Uploaded
amawla
parents:
diff changeset
25 "cnt::${cnt.add_cont}"
91ca33096034 Uploaded
amawla
parents:
diff changeset
26 #end for
91ca33096034 Uploaded
amawla
parents:
diff changeset
27
91ca33096034 Uploaded
amawla
parents:
diff changeset
28 <!--LIMMA Linearized Models (Multiple Factors)-->
91ca33096034 Uploaded
amawla
parents:
diff changeset
29 #else
91ca33096034 Uploaded
amawla
parents:
diff changeset
30 #if $analysis_type.exp.export_norm == "true":
91ca33096034 Uploaded
amawla
parents:
diff changeset
31 -n $norm_exp $analysis_type.exp.log
91ca33096034 Uploaded
amawla
parents:
diff changeset
32 #end if
91ca33096034 Uploaded
amawla
parents:
diff changeset
33 $analysis_type.cont_pw
91ca33096034 Uploaded
amawla
parents:
diff changeset
34 #for $cnt in $analysis_type.contrasts:
91ca33096034 Uploaded
amawla
parents:
diff changeset
35 "cnt::${cnt.add_cont}"
91ca33096034 Uploaded
amawla
parents:
diff changeset
36 #end for
91ca33096034 Uploaded
amawla
parents:
diff changeset
37 #end if
91ca33096034 Uploaded
amawla
parents:
diff changeset
38 $matrix
91ca33096034 Uploaded
amawla
parents:
diff changeset
39
91ca33096034 Uploaded
amawla
parents:
diff changeset
40 </command>
91ca33096034 Uploaded
amawla
parents:
diff changeset
41
91ca33096034 Uploaded
amawla
parents:
diff changeset
42 <inputs>
91ca33096034 Uploaded
amawla
parents:
diff changeset
43 <param name="matrix" type="data" format="tabular" label="Digital Expression Matrix"/>
91ca33096034 Uploaded
amawla
parents:
diff changeset
44 <conditional name="analysis_type">
91ca33096034 Uploaded
amawla
parents:
diff changeset
45 <param name="analysis" type="select" label="Type Of Analysis">
91ca33096034 Uploaded
amawla
parents:
diff changeset
46 <option value="pw">Pairwise comparisons (1 Factor Analysis)</option>
91ca33096034 Uploaded
amawla
parents:
diff changeset
47 <option value="glm" selected="true">Generalized Linear Models (Multiple Factor Analysis using GLM)</option>
91ca33096034 Uploaded
amawla
parents:
diff changeset
48 <option value="limma">Linear Models for RNA-Seq (Multiple Factor Analysis using LIMMA)</option>
91ca33096034 Uploaded
amawla
parents:
diff changeset
49 </param>
91ca33096034 Uploaded
amawla
parents:
diff changeset
50 <when value="pw">
91ca33096034 Uploaded
amawla
parents:
diff changeset
51 <param name="rowsumfilter" type="integer" value="5" label="Common Dispersion Rowsum Filter" help="Numeric scalar giving a value for the filtering out of low abundance tags in the estimation of the common dispersion. Only tags with total sum of counts above this value are used in the estimation of the common dispersion. Low abundance tags can adversely affect the estimation of the common dispersion, so this argument allows the user to select an appropriate filter threshold for the tag abundance."/>
91ca33096034 Uploaded
amawla
parents:
diff changeset
52 <conditional name="tagwise_disp">
91ca33096034 Uploaded
amawla
parents:
diff changeset
53 <param name="twd" type="select" label="Maximize the Negative Binomial Weighted Conditional Likelihood" help="Calculate and use an estimate of the dispersion parameter for each tag">
91ca33096034 Uploaded
amawla
parents:
diff changeset
54 <option value="TRUE" selected="true">True</option>
91ca33096034 Uploaded
amawla
parents:
diff changeset
55 <option value="FALSE">False</option>
91ca33096034 Uploaded
amawla
parents:
diff changeset
56 </param>
91ca33096034 Uploaded
amawla
parents:
diff changeset
57 </conditional>
91ca33096034 Uploaded
amawla
parents:
diff changeset
58 </when>
91ca33096034 Uploaded
amawla
parents:
diff changeset
59 <when value="glm">
91ca33096034 Uploaded
amawla
parents:
diff changeset
60 <param name="cont_pw" type="boolean" truevalue="-m" falsevalue="" checked="True" label="Perform all pairwise comparisons" help="Include all pairwise comparisons in the contrast matrix."/>
91ca33096034 Uploaded
amawla
parents:
diff changeset
61 <repeat name="contrasts" title="Contrast">
91ca33096034 Uploaded
amawla
parents:
diff changeset
62 <param name="add_cont" title="Contrast" type="text" label="Enter the contrast of interest, e.g. (G1+G2)/2-G3 (no spaces or commas)"/>
91ca33096034 Uploaded
amawla
parents:
diff changeset
63 </repeat>
91ca33096034 Uploaded
amawla
parents:
diff changeset
64 <conditional name="exp">
91ca33096034 Uploaded
amawla
parents:
diff changeset
65 <param name="export_norm" type="select" label="Save Normalised DGE Matrix">
91ca33096034 Uploaded
amawla
parents:
diff changeset
66 <option value="true">Yes</option>
91ca33096034 Uploaded
amawla
parents:
diff changeset
67 <option value="false">No</option>
91ca33096034 Uploaded
amawla
parents:
diff changeset
68 </param>
91ca33096034 Uploaded
amawla
parents:
diff changeset
69 </conditional>
91ca33096034 Uploaded
amawla
parents:
diff changeset
70 </when>
91ca33096034 Uploaded
amawla
parents:
diff changeset
71 <when value="limma">
91ca33096034 Uploaded
amawla
parents:
diff changeset
72 <param name="cont_pw" type="boolean" truevalue="-m" falsevalue="" checked="True" label="Perform all pairwise comparisons" help="Include all pairwise comparisons in the contrast matrix."/>
91ca33096034 Uploaded
amawla
parents:
diff changeset
73 <repeat name="contrasts" title="Contrast">
91ca33096034 Uploaded
amawla
parents:
diff changeset
74 <param name="add_cont" title="Contrast" type="text" label="Enter the contrast of interest, e.g. (G1+G2)/2-G3 (no spaces or commas)"/>
91ca33096034 Uploaded
amawla
parents:
diff changeset
75 </repeat>
91ca33096034 Uploaded
amawla
parents:
diff changeset
76 <conditional name="exp">
91ca33096034 Uploaded
amawla
parents:
diff changeset
77 <param name="export_norm" type="select" label="Save Normalised DGE Matrix">
91ca33096034 Uploaded
amawla
parents:
diff changeset
78 <option value="true">Yes</option>
91ca33096034 Uploaded
amawla
parents:
diff changeset
79 <option value="false">No</option>
91ca33096034 Uploaded
amawla
parents:
diff changeset
80 </param>
91ca33096034 Uploaded
amawla
parents:
diff changeset
81 <when value="true">
91ca33096034 Uploaded
amawla
parents:
diff changeset
82 <param name="log" type="boolean" truevalue="-l" falsevalue="" checked="True" label="Export Normalised DGE Matrix in Log2" help="Selecting this will log base 2 transform the Normalised Digital Gene Expression Matrix."/>
91ca33096034 Uploaded
amawla
parents:
diff changeset
83 </when>
91ca33096034 Uploaded
amawla
parents:
diff changeset
84 </conditional>
91ca33096034 Uploaded
amawla
parents:
diff changeset
85 </when>
91ca33096034 Uploaded
amawla
parents:
diff changeset
86 </conditional>
91ca33096034 Uploaded
amawla
parents:
diff changeset
87 </inputs>
91ca33096034 Uploaded
amawla
parents:
diff changeset
88
91ca33096034 Uploaded
amawla
parents:
diff changeset
89 <outputs>
91ca33096034 Uploaded
amawla
parents:
diff changeset
90 <data format="tabular" name="output" label="EdgeR analysis on ${matrix.name}"/>
91ca33096034 Uploaded
amawla
parents:
diff changeset
91 <data name="html_file" format="html" label="EdgeR analysis plots for ${matrix.name}"/>
91ca33096034 Uploaded
amawla
parents:
diff changeset
92 <data name="norm_exp" format="tabular" label="EdgeR Norm Expr Matrix for ${matrix.name}">
91ca33096034 Uploaded
amawla
parents:
diff changeset
93 <filter>analysis_type[ "analysis" ] != "pw" and analysis_type[ "exp" ][ "export_norm" ] == "true"</filter>
91ca33096034 Uploaded
amawla
parents:
diff changeset
94 </data>
91ca33096034 Uploaded
amawla
parents:
diff changeset
95 </outputs>
91ca33096034 Uploaded
amawla
parents:
diff changeset
96
91ca33096034 Uploaded
amawla
parents:
diff changeset
97 <help>
91ca33096034 Uploaded
amawla
parents:
diff changeset
98
91ca33096034 Uploaded
amawla
parents:
diff changeset
99 .. class:: infomark
91ca33096034 Uploaded
amawla
parents:
diff changeset
100
91ca33096034 Uploaded
amawla
parents:
diff changeset
101 **What it does**
91ca33096034 Uploaded
amawla
parents:
diff changeset
102
91ca33096034 Uploaded
amawla
parents:
diff changeset
103 Estimates differential gene expression for short read sequence count using methods appropriate for count data.
91ca33096034 Uploaded
amawla
parents:
diff changeset
104 If you have paired data you may also want to consider Tophat/Cufflinks.
91ca33096034 Uploaded
amawla
parents:
diff changeset
105 Input must be raw count data for each sequence arranged in a rectangular matrix as a tabular file.
91ca33096034 Uploaded
amawla
parents:
diff changeset
106 Note - no scaling - please make sure you have untransformed raw counts of reads for each sequence.
91ca33096034 Uploaded
amawla
parents:
diff changeset
107
91ca33096034 Uploaded
amawla
parents:
diff changeset
108 Performs digital differential gene expression analysis between groups (eg a treatment and control).
91ca33096034 Uploaded
amawla
parents:
diff changeset
109 Biological replicates provide information about experimental variability required for reliable inference.
91ca33096034 Uploaded
amawla
parents:
diff changeset
110
91ca33096034 Uploaded
amawla
parents:
diff changeset
111 **What it does not do**
91ca33096034 Uploaded
amawla
parents:
diff changeset
112 edgeR_ requires biological replicates.
91ca33096034 Uploaded
amawla
parents:
diff changeset
113 Without replicates you can't account for known important experimental sources of variability that the approach implemented here requires.
91ca33096034 Uploaded
amawla
parents:
diff changeset
114
91ca33096034 Uploaded
amawla
parents:
diff changeset
115
91ca33096034 Uploaded
amawla
parents:
diff changeset
116 **Input**
91ca33096034 Uploaded
amawla
parents:
diff changeset
117 A count matrix containing sequence names as rows and sample specific counts of reads from this sequence as columns.
91ca33096034 Uploaded
amawla
parents:
diff changeset
118 The matrix must have 2 header rows, the first indicating the group assignment and the second uniquely identifiying the samples. It must also contain a unique set of (eg Feature) names in the first column.
91ca33096034 Uploaded
amawla
parents:
diff changeset
119
91ca33096034 Uploaded
amawla
parents:
diff changeset
120 Example::
91ca33096034 Uploaded
amawla
parents:
diff changeset
121
91ca33096034 Uploaded
amawla
parents:
diff changeset
122 # G1:Mut G1:Mut G1:Mut G2:WT G2:WT G2:WT
91ca33096034 Uploaded
amawla
parents:
diff changeset
123 #Feature Spl1 Spl2 Spl3 Spl4 Spl5 Spl6
91ca33096034 Uploaded
amawla
parents:
diff changeset
124 NM_001001130 97 43 61 34 73 26
91ca33096034 Uploaded
amawla
parents:
diff changeset
125 NM_001001144 25 8 9 3 5 5
91ca33096034 Uploaded
amawla
parents:
diff changeset
126 NM_001001152 72 45 29 20 31 13
91ca33096034 Uploaded
amawla
parents:
diff changeset
127 NM_001001160 0 1 1 1 0 0
91ca33096034 Uploaded
amawla
parents:
diff changeset
128 NM_001001177 0 1 0 4 3 3
91ca33096034 Uploaded
amawla
parents:
diff changeset
129 NM_001001178 0 2 1 0 4 0
91ca33096034 Uploaded
amawla
parents:
diff changeset
130 NM_001001179 0 0 0 0 0 2
91ca33096034 Uploaded
amawla
parents:
diff changeset
131 NM_001001180 0 0 0 0 0 2
91ca33096034 Uploaded
amawla
parents:
diff changeset
132 NM_001001181 415 319 462 185 391 155
91ca33096034 Uploaded
amawla
parents:
diff changeset
133 NM_001001182 1293 945 987 297 938 496
91ca33096034 Uploaded
amawla
parents:
diff changeset
134 NM_001001183 5 4 11 7 11 2
91ca33096034 Uploaded
amawla
parents:
diff changeset
135 NM_001001184 135 198 178 110 205 64
91ca33096034 Uploaded
amawla
parents:
diff changeset
136 NM_001001185 186 1 0 1 1 0
91ca33096034 Uploaded
amawla
parents:
diff changeset
137 NM_001001186 75 90 91 34 63 54
91ca33096034 Uploaded
amawla
parents:
diff changeset
138 NM_001001187 267 236 170 165 202 51
91ca33096034 Uploaded
amawla
parents:
diff changeset
139 NM_001001295 5 2 6 1 7 0
91ca33096034 Uploaded
amawla
parents:
diff changeset
140 NM_001001309 1 0 0 1 2 1
91ca33096034 Uploaded
amawla
parents:
diff changeset
141 ...
91ca33096034 Uploaded
amawla
parents:
diff changeset
142
91ca33096034 Uploaded
amawla
parents:
diff changeset
143
91ca33096034 Uploaded
amawla
parents:
diff changeset
144 Please use the "Count reads in features with htseq-count" tool to generate the count matrix.
91ca33096034 Uploaded
amawla
parents:
diff changeset
145
91ca33096034 Uploaded
amawla
parents:
diff changeset
146 **Output**
91ca33096034 Uploaded
amawla
parents:
diff changeset
147
91ca33096034 Uploaded
amawla
parents:
diff changeset
148 A tabular file containing relative expression levels, statistical estimates of differential expression probability, R scripts, log, and some helpful diagnostic plots.
91ca33096034 Uploaded
amawla
parents:
diff changeset
149
91ca33096034 Uploaded
amawla
parents:
diff changeset
150 **Fixed Parameters**
91ca33096034 Uploaded
amawla
parents:
diff changeset
151
91ca33096034 Uploaded
amawla
parents:
diff changeset
152 Method for allowing the prior distribution for the dispersion to be abundance-dependent used: movingave
91ca33096034 Uploaded
amawla
parents:
diff changeset
153
91ca33096034 Uploaded
amawla
parents:
diff changeset
154 False discovery rate adjustment method used: Benjamini and Hochberg (1995)
91ca33096034 Uploaded
amawla
parents:
diff changeset
155
91ca33096034 Uploaded
amawla
parents:
diff changeset
156 GLM dispersion estimate used: Tagwise Dispersion
91ca33096034 Uploaded
amawla
parents:
diff changeset
157
91ca33096034 Uploaded
amawla
parents:
diff changeset
158 Gene filter used: less than 1 count per million reads
91ca33096034 Uploaded
amawla
parents:
diff changeset
159
91ca33096034 Uploaded
amawla
parents:
diff changeset
160 .. class:: infomark
91ca33096034 Uploaded
amawla
parents:
diff changeset
161
91ca33096034 Uploaded
amawla
parents:
diff changeset
162 **Attribution**
91ca33096034 Uploaded
amawla
parents:
diff changeset
163 This tool wraps the edgeR_ Bioconductor package so all calculations and plots are controlled by that code. See edgeR_ for all documentation and appropriate attribution.
91ca33096034 Uploaded
amawla
parents:
diff changeset
164 Recommended reference is Mark D. Robinson, Davis J. McCarthy, Gordon K. Smyth, PMCID: PMC2796818
91ca33096034 Uploaded
amawla
parents:
diff changeset
165
91ca33096034 Uploaded
amawla
parents:
diff changeset
166 .. class:: infomark
91ca33096034 Uploaded
amawla
parents:
diff changeset
167
91ca33096034 Uploaded
amawla
parents:
diff changeset
168 **Attribution**
91ca33096034 Uploaded
amawla
parents:
diff changeset
169 When applying the LIMMA (Linear models for RNA-Seq) anlysis the tool also makes use of the limma_ Bioconductor package.
91ca33096034 Uploaded
amawla
parents:
diff changeset
170 Recommended reference is Smyth, G. K. (2005). Limma: linear models for microarray data. In: 'Bioinformatics and Computational Biology Solutions using R and Bioconductor'. R. Gentleman, V. Carey, S. Dudoit, R. Irizarry, W. Huber (eds), Springer, New York, pages 397--420.
91ca33096034 Uploaded
amawla
parents:
diff changeset
171
91ca33096034 Uploaded
amawla
parents:
diff changeset
172 .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
91ca33096034 Uploaded
amawla
parents:
diff changeset
173 .. _limma: http://www.bioconductor.org/packages/release/bioc/html/limma.html
91ca33096034 Uploaded
amawla
parents:
diff changeset
174
91ca33096034 Uploaded
amawla
parents:
diff changeset
175
91ca33096034 Uploaded
amawla
parents:
diff changeset
176 </help>
91ca33096034 Uploaded
amawla
parents:
diff changeset
177
91ca33096034 Uploaded
amawla
parents:
diff changeset
178 </tool>