comparison edgeR.xml @ 3:6965066838fc draft

Uploaded
author fcaramia
date Wed, 12 Sep 2012 23:45:02 -0400
parents
children b6de7e3b8239
comparison
equal deleted inserted replaced
2:674c75219f15 3:6965066838fc
1 <tool id="edgeR" name="Empirical analysis of digital gene expression data" version="0.0.1">
2
3 <command interpreter="perl">
4 edgeR.pl -a $analysis_type.analysis -e $html_file.files_path -f $fdr -h $html_file -o $output
5 ## Pairwise comparisons
6 #if $analysis_type.analysis == "pw":
7 -r $analysis_type.rowsumfilter
8 #if $analysis_type.tagwise_disp.twd == "TRUE":
9 -p $analysis_type.tagwise_disp.twd_prop
10 -u $analysis_type.tagwise_disp.twd_trend
11 -t
12 #end if
13 ## GLM
14 #else if $analysis_type.analysis == "glm":
15 #if $analysis_type.exp.export_norm == "true":
16 -n $norm_exp
17 #end if
18 -d $analysis_type.disp
19 $analysis_type.cont_pw
20 #for $fct in $analysis_type.factors:
21 factor::${$fct.fact_name}::${$fct.fact}
22 #end for
23 #for $c in $analysis_type.cont_pred:
24 cp::${c.cp_name}::${c.cp}
25 #end for
26 #for $cnt in $analysis_type.contrasts:
27 "cnt::${cnt.add_cont}"
28 #end for
29 ## LIMMA
30 #else
31 #if $analysis_type.exp.export_norm == "true":
32 -n $norm_exp $analysis_type.exp.log
33 #end if
34 $analysis_type.cont_pw
35 #for $fct in $analysis_type.factors:
36 factor::${$fct.fact_name}::${$fct.fact}
37 #end for
38 #for $c in $analysis_type.cont_pred:
39 cp::${c.cp_name}::${c.cp}
40 #end for
41 #for $cnt in $analysis_type.contrasts:
42 "cnt::${cnt.add_cont}"
43 #end for
44 #end if
45 $matrix
46
47 </command>
48
49 <inputs>
50 <param name="matrix" type="data" format="tabular" label="Digital Expression Matrix"/>
51 <conditional name="analysis_type">
52 <param name="analysis" type="select" label="Type Of Analysis">
53 <option value="pw">Pairwise comparisons (1 Factor Analysis)</option>
54 <option value="glm" selected="true">Generalized Linear Models (Multiple Factor Analysis using GLM)</option>
55 <option value="limma">Linear Models for RNA-Seq (Multiple Factor Analysis using LIMMA)</option>
56 </param>
57 <when value="pw">
58 <param name="rowsumfilter" type="integer" value="5" label="Common Dispersion Rowsum Filter" help="Numeric scalar giving a value for the filtering out of low abundance tags in the estimation of the common dispersion. Only tags with total sum of counts above this value are used in the estimation of the common dispersion. Low abundance tags can adversely affect the estimation of the common dispersion, so this argument allows the user to select an appropriate filter threshold for the tag abundance."/>
59 <conditional name="tagwise_disp">
60 <param name="twd" type="select" label="Maximize the Negative Binomial Weighted Conditional Likelihood" help="Calculate and use an estimate of the dispersion parameter for each tag">
61 <option value="TRUE" selected="true">True</option>
62 <option value="FALSE">False</option>
63 </param>
64 <when value="TRUE">
65 <param name="twd_trend" type="select" label="Method for allowing the prior distribution for the dispersion to be abundance-dependent">
66 <option value="movingave" selected="true">Movingave</option>
67 <option value="tricube">Tricube</option>
68 <option value="none">None</option>
69 </param>
70 <param name="twd_prop" type="float" value="0.3" label="The proportion of all tags/genes to be used for the locally weighted estimation of the tagwise dispersion, allowing the dispersion estimates to vary with abundance (expression level)"/>
71 </when>
72 </conditional>
73 </when>
74 <when value="glm">
75 <param name="disp" type="select" label="Select The Dispersion Estimate To Use:">
76 <option value="common">Common Dispersion</option>
77 <option value="trend">Trended Dispersion</option>
78 <option value="tag" selected="true">Tagwise Dispersion</option>
79 </param>
80 <repeat name="factors" title="Factor">
81 <param name="fact_name" title="Factor Name" type="text" label="Name Of Factor (no spaces or commas)"/>
82 <param name="fact" title="Factor" type="text" size="100" label="The Level Of Each Sample Seperated By A Colon (no spaces or commas)"/>
83 </repeat>
84 <repeat name="cont_pred" title="Continuous Predictor">
85 <param name="cp_name" title="Continuous Predictor Name" type="text" label="Name Of Continuous Predictor (no spaces or commas)"/>
86 <param name="cp" title="Continuous Predictor" type="text" size="100" label="The Numerical Value For Each Sample Seperated By A Colon (no spaces or commas)"/>
87 </repeat>
88 <param name="cont_pw" type="boolean" truevalue="-m" falsevalue="" checked="True" label="Perform all pairwise comparisons" help="Include all pairwise comparisons in the contrast matrix."/>
89 <repeat name="contrasts" title="Contrast">
90 <param name="add_cont" title="Contrast" type="text" label="Enter the contrast of interest, e.g. (G1+G2)/2-G3 (no spaces or commas)"/>
91 </repeat>
92 <conditional name="exp">
93 <param name="export_norm" type="select" label="Save Normalised DGE Matrix">
94 <option value="true">Yes</option>
95 <option value="false">No</option>
96 </param>
97 </conditional>
98 </when>
99 <when value="limma">
100 <repeat name="factors" title="Factor">
101 <param name="fact_name" title="Factor Name" type="text" label="Name Of Factor (no spaces or commas)"/>
102 <param name="fact" title="Factor" type="text" size="100" label="The Level Of Each Sample Seperated By A Colon (no spaces or commas)"/>
103 </repeat>
104 <repeat name="cont_pred" title="Continuous Predictor">
105 <param name="cp_name" title="Continuous Predictor Name" type="text" label="Name Of Continuous Predictor (no spaces or commas)"/>
106 <param name="cp" title="Continuous Predictor" type="text" size="100" label="The Numerical Value For Each Sample Seperated By A Colon (no spaces or commas)"/>
107 </repeat>
108 <param name="cont_pw" type="boolean" truevalue="-m" falsevalue="" checked="True" label="Perform all pairwise comparisons" help="Include all pairwise comparisons in the contrast matrix."/>
109 <repeat name="contrasts" title="Contrast">
110 <param name="add_cont" title="Contrast" type="text" label="Enter the contrast of interest, e.g. (G1+G2)/2-G3 (no spaces or commas)"/>
111 </repeat>
112 <conditional name="exp">
113 <param name="export_norm" type="select" label="Save Normalised DGE Matrix">
114 <option value="true">Yes</option>
115 <option value="false">No</option>
116 </param>
117 <when value="true">
118 <param name="log" type="boolean" truevalue="-l" falsevalue="" checked="True" label="Export Normalised DGE Matrix in Log2" help="Selecting this will log base 2 transform the Normalised Digital Gene Expression Matrix."/>
119 </when>
120 </conditional>
121 </when>
122 </conditional>
123 <param name="fdr" type="select" label="False discovery rate adjustment method">
124 <option value="BH">Benjamini and Hochberg (1995)</option>
125 <option value="holm">Holm (1979)</option>
126 <option value="hochberg">Hochberg (1988)</option>
127 <option value="hommel">Hommel (1988)</option>
128 <option value="BY">Benjamini and Yekutieli (2001)</option>
129 <option value="none">None</option>
130 </param>
131 </inputs>
132
133 <outputs>
134 <data format="tabular" name="output" label="EdgeR analysis on ${matrix.name}"/>
135 <data name="html_file" format="html" label="EdgeR analysis plots for ${matrix.name}"/>
136 <data name="norm_exp" format="tabular" label="EdgeR Norm Expr Matrix for ${matrix.name}">
137 <filter>analysis_type[ "analysis" ] != "pw" and analysis_type[ "exp" ][ "export_norm" ] == "true"</filter>
138 </data>
139 </outputs>
140
141 <help>
142
143 .. class:: infomark
144
145 **What it does**
146
147 Estimates differential gene expression for short read sequence count using methods appropriate for count data.
148 If you have paired data you may also want to consider Tophat/Cufflinks.
149 Input must be raw count data for each sequence arranged in a rectangular matrix as a tabular file.
150 Note - no scaling - please make sure you have untransformed raw counts of reads for each sequence.
151
152 Performs digital differential gene expression analysis between groups (eg a treatment and control).
153 Biological replicates provide information about experimental variability required for reliable inference.
154
155 **What it does not do**
156 edgeR_ requires biological replicates.
157 Without replicates you can't account for known important experimental sources of variability that the approach implemented here requires.
158
159 **Input**
160 A count matrix containing sequence names as rows and sample specific counts of reads from this sequence as columns.
161 The matrix must have 2 header rows, the first indicating the group assignment and the second uniquely identifiying the samples. It must also contain a unique set of (eg Feature) names in the first column.
162
163 Example::
164
165 # G1:Mut G1:Mut G1:Mut G2:WT G2:WT G2:WT
166 #Feature Spl1 Spl2 Spl3 Spl4 Spl5 Spl6
167 NM_001001130 97 43 61 34 73 26
168 NM_001001144 25 8 9 3 5 5
169 NM_001001152 72 45 29 20 31 13
170 NM_001001160 0 1 1 1 0 0
171 NM_001001177 0 1 0 4 3 3
172 NM_001001178 0 2 1 0 4 0
173 NM_001001179 0 0 0 0 0 2
174 NM_001001180 0 0 0 0 0 2
175 NM_001001181 415 319 462 185 391 155
176 NM_001001182 1293 945 987 297 938 496
177 NM_001001183 5 4 11 7 11 2
178 NM_001001184 135 198 178 110 205 64
179 NM_001001185 186 1 0 1 1 0
180 NM_001001186 75 90 91 34 63 54
181 NM_001001187 267 236 170 165 202 51
182 NM_001001295 5 2 6 1 7 0
183 NM_001001309 1 0 0 1 2 1
184 ...
185
186
187 Please use the "Count reads in features with htseq-count" tool to generate the count matrix.
188
189 **Output**
190
191 A tabular file containing relative expression levels, statistical estimates of differential expression probability, R scripts, log, and some helpful diagnostic plots.
192
193 .. class:: infomark
194
195 **Attribution**
196 This tool wraps the edgeR_ Bioconductor package so all calculations and plots are controlled by that code. See edgeR_ for all documentation and appropriate attribution.
197 Recommended reference is Mark D. Robinson, Davis J. McCarthy, Gordon K. Smyth, PMCID: PMC2796818
198
199 .. class:: infomark
200
201 **Attribution**
202 When applying the LIMMA (Linear models for RNA-Seq) anlysis the tool also makes use of the limma_ Bioconductor package.
203 Recommended reference is Smyth, G. K. (2005). Limma: linear models for microarray data. In: 'Bioinformatics and Computational Biology Solutions using R and Bioconductor'. R. Gentleman, V. Carey, S. Dudoit, R. Irizarry, W. Huber (eds), Springer, New York, pages 397--420.
204
205 .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
206 .. _limma: http://www.bioconductor.org/packages/release/bioc/html/limma.html
207
208 </help>
209
210 </tool>