Mercurial > repos > iuc > limma_voom
comparison limma_voom.xml @ 0:bdebdea5f6a7 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/limma_voom commit 2f34a215c35f08c3666f314a87d235437baa1d21
author | iuc |
---|---|
date | Mon, 12 Jun 2017 07:41:02 -0400 |
parents | |
children | 76d01fe0ec36 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:bdebdea5f6a7 |
---|---|
1 <tool id="limma_voom" name="limma-voom" version="1.1.1"> | |
2 <description> | |
3 Differential expression with optional sample weights | |
4 </description> | |
5 | |
6 <requirements> | |
7 <requirement type="package" version="3.16.5">bioconductor-edger</requirement> | |
8 <requirement type="package" version="3.30.13">bioconductor-limma</requirement> | |
9 <requirement type="package" version="1.4.29">r-statmod</requirement> | |
10 <requirement type="package" version="0.4.1">r-scales</requirement> | |
11 </requirements> | |
12 | |
13 <version_command> | |
14 <![CDATA[ | |
15 echo $(R --version | grep version | grep -v GNU)", limma version" $(R --vanilla --slave -e "library(limma); cat(sessionInfo()\$otherPkgs\$limma\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", edgeR version" $(R --vanilla --slave -e "library(edgeR); cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null | grep -v -i "WARNING: ") | |
16 ]]> | |
17 </version_command> | |
18 | |
19 <command detect_errors="exit_code"> | |
20 <![CDATA[ | |
21 Rscript '$__tool_directory__/limma_voom.R' | |
22 '$counts' | |
23 | |
24 #if $anno.annoOpt=='yes': | |
25 '$geneanno' | |
26 #else: | |
27 None | |
28 #end if | |
29 | |
30 '$outReport' | |
31 '$outReport.files_path' | |
32 $rdaOption | |
33 $normalisationOption | |
34 $weightOption | |
35 '$contrast' | |
36 | |
37 #if $filterCPM.filterLowCPM=='yes': | |
38 '$filterCPM.cpmReq' | |
39 '$filterCPM.sampleReq' | |
40 #else: | |
41 0 | |
42 0 | |
43 #end if | |
44 | |
45 #if $testOpt.wantOpt=='yes': | |
46 '$testOpt.pAdjust' | |
47 '$testOpt.pVal' | |
48 '$testOpt.lfc' | |
49 #else: | |
50 "BH" | |
51 0.05 | |
52 0 | |
53 #end if | |
54 | |
55 '$factName::$factLevel' | |
56 | |
57 && | |
58 mkdir ./output_dir | |
59 | |
60 && | |
61 mv '$outReport.files_path'/*.tsv output_dir/ | |
62 | |
63 ]]> | |
64 </command> | |
65 | |
66 <inputs> | |
67 <param name="counts" type="data" format="tabular" label="Counts Data"/> | |
68 | |
69 <conditional name="anno"> | |
70 <param name="annoOpt" type="select" | |
71 label="Use Gene Annotations?" | |
72 help="If an annotation file is provided, annotations will be added to the table of differential expression results to provide descriptions for each gene."> | |
73 <option value="no">No</option> | |
74 <option value="yes">Yes</option> | |
75 </param> | |
76 <when value="yes"> | |
77 <param name="geneanno" type="data" format="tabular" label="Gene Annotations"/> | |
78 </when> | |
79 <when value="no" /> | |
80 </conditional> | |
81 | |
82 <!--*Code commented until solution for multiple factors is found* | |
83 <repeat name="factors" title="Factors" min="1" max="5" default="1"> | |
84 <param name="factName" type="text" label="Factor Name (No spaces)" | |
85 help="Eg. Genotype"/> | |
86 <param name="factLevel" type="text" size="100" | |
87 label="Factor Levels (No spaces)" | |
88 help="Eg. WT,WT,Mut,Mut,WT"/> | |
89 </repeat> | |
90 --> | |
91 | |
92 <param name="factName" type="text" label="Factor Name" help="Eg. Genotype."/> | |
93 <param name="factLevel" type="text" label="Factor Values" | |
94 help="Eg. WT,WT,WT,Mut,Mut,Mut | |
95 NOTE: Please ensure that the same levels are typed identically with cases matching."/> | |
96 <param name="contrast" type="text" label="Contrasts of interest" help="Eg. Mut-WT,KD-Control"/> | |
97 | |
98 <conditional name="filterCPM"> | |
99 <param name="filterLowCPM" type="select" label="Filter Low CPM?" | |
100 help="Treat genes with very low expression as unexpressed and filter out to speed up computation."> | |
101 <option value="yes" selected="True">Yes</option> | |
102 <option value="no">No</option> | |
103 </param> | |
104 <when value="yes"> | |
105 <param name="cpmReq" type="float" value="0.5" min="0" label="Minimum CPM"/> | |
106 | |
107 <param name="sampleReq" type="integer" value="1" min="0" label="Minimum Samples" | |
108 help="Filter out all the genes that do not meet the minimum CPM in at least this many samples."/> | |
109 </when> | |
110 <when value="no"/> | |
111 </conditional> | |
112 | |
113 <param name="weightOption" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Apply sample weights?" | |
114 help="Apply weights if outliers are present."> | |
115 </param> | |
116 | |
117 <param name="normalisationOption" type="select" label="Normalisation Method"> | |
118 <option value="TMM">TMM</option> | |
119 <option value="RLE">RLE</option> | |
120 <option value="upperquartile">Upperquartile</option> | |
121 <option value="none">None (Don't normalise)</option> | |
122 </param> | |
123 | |
124 <param name="rdaOption" type="boolean" truevalue="yes" falsevalue="no" checked="false" | |
125 label="Output RData?" | |
126 help="Output all the data used by R to construct the plots and tables, can be loaded into R. A link to the RData file will be provided in the HTML report."> | |
127 </param> | |
128 | |
129 <conditional name="testOpt"> | |
130 <param name="wantOpt" type="select" label="Use Advanced Testing Options?" | |
131 help="Enable choices for p-value adjustment method, p-value threshold and log2-fold-change threshold."> | |
132 <option value="no" selected="True">No</option> | |
133 <option value="yes">Yes</option> | |
134 </param> | |
135 <when value="yes"> | |
136 <param name="pAdjust" type="select" label="P-Value Adjustment Method."> | |
137 <option value="BH">Benjamini and Hochberg (1995)</option> | |
138 <option value="BY">Benjamini and Yekutieli (2001)</option> | |
139 <option value="holm">Holm (1979)</option> | |
140 <option value="none">None</option> | |
141 </param> | |
142 <param name="pVal" type="float" value="0.05" min="0" max="1" | |
143 label="Adjusted Threshold" | |
144 help="Genes below this threshold are considered significant and highlighted in the MA plot. If either BH(1995) or BY(2001) were selected then this value is a false-discovery-rate control. If Holm(1979) was selected then this is an adjusted p-value for family-wise error rate."/> | |
145 <param name="lfc" type="float" value="0" min="0" | |
146 label="Minimum log2-fold-change Required" | |
147 help="Genes above this threshold and below the p-value threshold are considered significant and highlighted in the MA plot."/> | |
148 </when> | |
149 <when value="no"/> | |
150 </conditional> | |
151 | |
152 </inputs> | |
153 | |
154 <outputs> | |
155 <data format="html" name="outReport" label="${tool.name} on ${on_string}: Report" /> | |
156 <collection name="voom_results" type="list" label="${tool.name} on ${on_string}: DE genes"> | |
157 <discover_datasets pattern="(?P<name>.+)\.tsv$" format="tabular" directory="output_dir" visible="false" /> | |
158 </collection> | |
159 </outputs> | |
160 | |
161 <tests> | |
162 <test> | |
163 <param name="counts" value="matrix.txt" /> | |
164 <param name="factName" value="Genotype" /> | |
165 <param name="factLevel" value="WT,WT,WT,Mut,Mut,Mut" /> | |
166 <param name="contrast" value="Mut-WT,WT-Mut" /> | |
167 <param name="normalisationOption" value="TMM" /> | |
168 <output_collection name="voom_results" count="2"> | |
169 <element name="limma-voom_Mut-WT" ftype="tabular" file="limma-voom_Mut-WT.tsv" /> | |
170 <element name="limma-voom_WT-Mut" ftype="tabular" file="limma-voom_WT-Mut.tsv" /> | |
171 </output_collection> | |
172 <output name="outReport" > | |
173 <assert_contents> | |
174 <has_text text="Limma-voom Analysis Output" /> | |
175 <not_has_text text="RData" /> | |
176 </assert_contents> | |
177 </output> | |
178 </test> | |
179 <test> | |
180 <param name="annoOpt" value="yes" /> | |
181 <param name="geneanno" value="anno.txt" /> | |
182 <param name="counts" value="matrix.txt" /> | |
183 <param name="factName" value="Genotype" /> | |
184 <param name="factLevel" value="WT,WT,WT,Mut,Mut,Mut" /> | |
185 <param name="contrast" value="Mut-WT" /> | |
186 <param name="normalisationOption" value="TMM" /> | |
187 <output_collection name="voom_results" > | |
188 <element name="limma-voom_Mut-WT" ftype="tabular" file="limma-voom_Mut-WTanno.tsv" /> | |
189 </output_collection> | |
190 </test> | |
191 <test> | |
192 <param name="rdaOption" value="yes" /> | |
193 <param name="counts" value="matrix.txt" /> | |
194 <param name="factName" value="Genotype" /> | |
195 <param name="factLevel" value="WT,WT,WT,Mut,Mut,Mut" /> | |
196 <param name="contrast" value="Mut-WT" /> | |
197 <param name="normalisationOption" value="TMM" /> | |
198 <output name="outReport" > | |
199 <assert_contents> | |
200 <has_text text="RData" /> | |
201 </assert_contents> | |
202 </output> | |
203 </test> | |
204 </tests> | |
205 | |
206 <help> | |
207 <![CDATA[ | |
208 .. class:: infomark | |
209 | |
210 **What it does** | |
211 | |
212 Given a matrix of counts (e.g. from featureCounts) and optional information about the genes, this tool | |
213 produces plots and tables useful in the analysis of differential gene | |
214 expression. | |
215 | |
216 ----- | |
217 | |
218 **Inputs** | |
219 | |
220 **Counts Data:** | |
221 A matrix of counts, with rows corresponding to genes | |
222 and columns corresponding to counts for the samples. | |
223 Values must be tab separated, with the first row containing the sample/column | |
224 labels and the first column containing the row/gene labels. | |
225 | |
226 Example: | |
227 | |
228 ========== ======= ======= ======= ======== ======== ======== | |
229 **GeneID** **WT1** **WT2** **WT3** **Mut1** **Mut2** **Mut3** | |
230 ---------- ------- ------- ------- -------- -------- -------- | |
231 11287 1699 1528 1601 1463 1441 1495 | |
232 11298 1905 1744 1834 1345 1291 1346 | |
233 11302 6 8 7 5 6 5 | |
234 11303 2099 1974 2100 1574 1519 1654 | |
235 11304 356 312 337 361 397 346 | |
236 11305 2528 2438 2493 1762 1942 2027 | |
237 ========== ======= ======= ======= ======== ======== ======== | |
238 | |
239 **Gene Annotations:** | |
240 Optional input for gene annotations, this can contain more | |
241 information about the genes than just an ID number. The annotations will | |
242 be avaiable in the differential expression results table. | |
243 | |
244 Example: | |
245 | |
246 ========== ========== =================================================== | |
247 **GeneID** **Symbol** **GeneName** | |
248 ---------- ---------- --------------------------------------------------- | |
249 1287 Pzp pregnancy zone protein | |
250 1298 Aanat arylalkylamine N-acetyltransferase | |
251 1302 Aatk apoptosis-associated tyrosine kinase | |
252 1303 Abca1 ATP-binding cassette, sub-family A (ABC1), member 1 | |
253 1304 Abca4 ATP-binding cassette, sub-family A (ABC1), member 4 | |
254 1305 Abca2 ATP-binding cassette, sub-family A (ABC1), member 2 | |
255 ========== ========== =================================================== | |
256 | |
257 **Factor Name:** | |
258 The name of the factor being investigated. This tool currently assumes | |
259 that only one factor is of interest. | |
260 | |
261 **Factor Levels:** | |
262 The levels of the factor of interest, this must be entered in the same | |
263 order as the samples to which the levels correspond as listed in the | |
264 columns of the counts matrix. | |
265 | |
266 The values should be seperated by commas, and spaces must not be used. | |
267 | |
268 **Contrasts of Interest:** | |
269 The contrasts you wish to make between levels. | |
270 | |
271 A common contrast would be a simple difference between two levels: "Mut-WT" | |
272 represents the difference between the mutant and wild type genotypes. | |
273 | |
274 The values should be seperated by commas and spaces must not be used. | |
275 | |
276 **Filter Low CPM:** | |
277 Option to ignore the genes that do not show significant levels of | |
278 expression, this filtering is dependent on two criteria: | |
279 | |
280 * **Minimum CPM:** This is the counts per million that a gene must have in at | |
281 least some specified number of samples. | |
282 | |
283 * **Minumum Samples:** This is the number of samples in which the CPM | |
284 requirement must be met in order for that gene to be acknowledged. | |
285 | |
286 Only genes that exhibit a CPM greater than the required amount in at least the | |
287 number of samples specified will be used for analysis. Care should be taken to | |
288 ensure that the sample requirement is appropriate. In the case of an experiment | |
289 with two experimental groups each with two members, if there is a change from | |
290 insignificant cpm to significant cpm but the sample requirement is set to 3, | |
291 then this will cause that gene to fail the criteria. When in doubt simply do not | |
292 filter. | |
293 | |
294 | |
295 **Normalisation Method:** | |
296 Option for using different methods to rescale the raw library | |
297 size. For more information, see calcNormFactor section in the edgeR_ user's | |
298 manual. | |
299 | |
300 **Apply Sample Weights:** | |
301 Option to downweight outlier samples such that their information is still | |
302 used in the statistical analysis but their impact is reduced. Use this | |
303 whenever significant outliers are present. The MDS plotting tool in this package | |
304 is useful for identifying outliers. For more information on this option see Liu et al. (2015). | |
305 | |
306 **Use Advanced Testing Options?:** | |
307 By default error rate for multiple testing is controlled using Benjamini and | |
308 Hochberg's false discovery rate control at a threshold value of 0.05. However | |
309 there are options to change this to custom values. | |
310 | |
311 * **P-Value Adjustment Method:** | |
312 Change the multiple testing control method, the options are BH(1995) and | |
313 BY(2001) which are both false discovery rate controls. There is also | |
314 Holm(1979) which is a method for family-wise error rate control. | |
315 | |
316 * **Adjusted Threshold:** | |
317 Set the threshold for the resulting value of the multiple testing control | |
318 method. Only observations whose statistic falls below this value is | |
319 considered significant, thus highlighted in the MA plot. | |
320 | |
321 * **Minimum log2-fold-change Required:** | |
322 In addition to meeting the requirement for the adjusted statistic for | |
323 multiple testing, the observation must have an absolute log2-fold-change | |
324 greater than this threshold to be considered significant, thus highlighted | |
325 in the MA plot. | |
326 | |
327 ----- | |
328 | |
329 **Citations:** | |
330 | |
331 .. class:: infomark | |
332 | |
333 limma | |
334 | |
335 Please cite the paper below for the limma software itself. Please also try | |
336 to cite the appropriate methodology articles that describe the statistical | |
337 methods implemented in limma, depending on which limma functions you are | |
338 using. The methodology articles are listed in Section 2.1 of the limma | |
339 User's Guide. | |
340 | |
341 * Smyth GK (2005). Limma: linear models for microarray data. In: | |
342 'Bioinformatics and Computational Biology Solutions using R and | |
343 Bioconductor'. R. Gentleman, V. Carey, S. Dudoit, R. Irizarry, | |
344 W. Huber (eds), Springer, New York, pages 397-420. | |
345 | |
346 * Law CW, Chen Y, Shi W, and Smyth GK (2014). Voom: | |
347 precision weights unlock linear model analysis tools for | |
348 RNA-seq read counts. Genome Biology 15, R29. | |
349 | |
350 * Liu R, Holik AZ, Su S, Jansz N, Chen K, Leong HS, Blewitt ME, Asselin-Labat ML, Smyth GK, Ritchie ME (2015). Why weight? Modelling sample and observational level variability improves power in RNA-seq analyses. Nucleic Acids Research, 43(15), e97. | |
351 | |
352 * Ritchie, M. E., Diyagama, D., Neilson, J., van Laar, R., Dobrovic, | |
353 A., Holloway, A., and Smyth, G. K. (2006). Empirical array quality weights | |
354 for microarray data. BMC Bioinformatics 7, Article 261. | |
355 | |
356 .. class:: infomark | |
357 | |
358 edgeR | |
359 | |
360 Please cite the first paper for the software itself and the other papers for | |
361 the various original statistical methods implemented in edgeR. See | |
362 Section 1.2 in the User's Guide for more detail. | |
363 | |
364 * Robinson MD, McCarthy DJ and Smyth GK (2010). edgeR: a Bioconductor | |
365 package for differential expression analysis of digital gene expression | |
366 data. Bioinformatics 26, 139-140 | |
367 | |
368 * Robinson MD and Smyth GK (2007). Moderated statistical tests for assessing | |
369 differences in tag abundance. Bioinformatics 23, 2881-2887 | |
370 | |
371 * Robinson MD and Smyth GK (2008). Small-sample estimation of negative | |
372 binomial dispersion, with applications to SAGE data. | |
373 Biostatistics, 9, 321-332 | |
374 | |
375 * McCarthy DJ, Chen Y and Smyth GK (2012). Differential expression analysis | |
376 of multifactor RNA-Seq experiments with respect to biological variation. | |
377 Nucleic Acids Research 40, 4288-4297 | |
378 | |
379 Please report problems or suggestions to: su.s@wehi.edu.au | |
380 | |
381 .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html | |
382 .. _limma: http://www.bioconductor.org/packages/release/bioc/html/limma.html | |
383 ]]> | |
384 </help> | |
385 <citations> | |
386 <citation type="doi">10.1093/nar/gkv412</citation> | |
387 </citations> | |
388 </tool> |