comparison edgeR_Differential_Gene_Expression.xml @ 1:a4a4c88783ea draft

planemo upload for repository https://bitbucket.org/EMCbioinf/galaxy-tool-shed-tools/raw/master/edger_with_design_matrix commit 2700e500a4fb135a20ede7d52221a9d31f1aaa5e-dirty
author yhoogstrate
date Tue, 01 Sep 2015 04:59:05 -0400
parents
children ec951a5017f8
comparison
equal deleted inserted replaced
0:acb56603b268 1:a4a4c88783ea
1 <?xml version="1.0" encoding="UTF-8"?>
2 <tool id="edger_dge" name="edgeR: Differential Gene(Expression) Analysis" version="3.11.0.a">
3 <description>RNA-Seq gene expression analysis using edgeR (R package)</description>
4
5 <macros>
6 <import>edgeR_macros.xml</import>
7 </macros>
8
9 <requirements>
10 <requirement type="package" version="3.11.0">edger</requirement>
11 </requirements>
12
13 <stdio>
14 <regex match="Error in[^a-z]+contrasts"
15 source="both"
16 level="fatal"
17 description="Have the design- and expression-matrix been swapped?" />
18 <regex match="Execution halted"
19 source="both"
20 level="fatal" />
21 <regex match="Calculating library sizes from column"
22 source="stderr"
23 level="log" />
24 <regex match="During startup - Warning messages"
25 source="stderr"
26 level="log" />
27 <regex match="Setting LC_[^ ]+ failed"
28 source="stderr"
29 level="warning"
30 description="LOCALE has not been set correctly" />
31 </stdio>
32
33 <version_command>echo $(R --version | grep version | grep -v GNU) ", EdgeR version" $(R --vanilla --slave -e "library(edgeR) ; cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2&gt; /dev/null | grep -v -i "WARNING: ")</version_command>
34
35 <command>
36 R --vanilla --slave -f $R_script '--args
37 $expression_matrix
38 $design_matrix
39 $contrast
40
41 $fdr
42
43 $output_count_edgeR
44 $output_cpm
45
46 /dev/null <!-- Calculation of FPKM/RPKM should come here -->
47
48 #if $output_raw_counts:
49 $output_raw_counts
50 #else:
51 /dev/null
52 #end if
53
54 #if $output_MDSplot_logFC:
55 $output_MDSplot_logFC
56 #else:
57 /dev/null
58 #end if
59
60 #if $output_MDSplot_bcv:
61 $output_MDSplot_bcv
62 #else:
63 /dev/null
64 #end if
65
66 #if $output_BCVplot:
67 $output_BCVplot
68 #else:
69 /dev/null
70 #end if
71
72 #if $output_MAplot:
73 $output_MAplot
74 #else:
75 /dev/null
76 #end if
77
78 #if $output_PValue_distribution_plot:
79 $output_PValue_distribution_plot
80 #else:
81 /dev/null
82 #end if
83
84 #if $output_hierarchical_clustering_plot:
85 $output_hierarchical_clustering_plot
86 #else:
87 /dev/null
88 #end if
89
90 #if $output_heatmap_plot:
91 $output_heatmap_plot
92 #else:
93 /dev/null
94 #end if
95
96 #if $output_RData_obj:
97 $output_RData_obj
98 #else:
99 /dev/null
100 #end if
101
102 $output_format_images
103 '
104 #if $output_R:
105 > $output_R
106 #else:
107 > /dev/null
108 #end if
109 </command>
110
111 <configfiles>
112 <configfile name="R_script">
113 <![CDATA[
114
115 library(limma,quietly=TRUE) ## quietly to avoid unnecessaity stderr messages
116 library(edgeR,quietly=TRUE) ## quietly to avoid unnecessaity stderr messages
117 library(splines,quietly=TRUE)## quietly to avoid unnecessaity stderr messages
118
119 ## Fetch commandline arguments
120 args <- commandArgs(trailingOnly = TRUE)
121
122 expression_matrix_file <- args[1]
123 design_matrix_file <- args[2]
124 contrast <- args[3]
125
126 fdr <- args[4]
127
128 output_count_edgeR <- args[5]
129 output_cpm <- args[6]
130
131 output_xpkm <- args[7] ##FPKM file - to be implemented
132
133 output_raw_counts <- args[8]
134 output_MDSplot_logFC <- args[9]
135 output_MDSplot_bcv <- args[10]
136 output_BCVplot <- args[11]
137 output_MAplot <- args[12]
138 output_PValue_distribution_plot <- args[13]
139 output_hierarchical_clustering_plot <- args[14]
140 output_heatmap_plot <- args[15]
141 output_RData_obj <- args[16]
142 output_format_images <- args[17]
143
144
145 ## Obtain read-counts
146 expression_matrix <- read.delim(expression_matrix_file,header=T,stringsAsFactors=F,row.names=1,check.names=FALSE,na.strings=c(""))
147 design_matrix <- read.delim(design_matrix_file,header=T,stringsAsFactors=F,row.names=1,check.names=FALSE,na.strings=c(""))
148
149 colnames(design_matrix) <- make.names(colnames(design_matrix))
150
151 for(i in 1:ncol(design_matrix)) {
152 old <- design_matrix[,i]
153 design_matrix[,i] <- make.names(design_matrix[,i])
154 if(paste(design_matrix[,i],collapse="\t") != paste(old,collapse="\t")) {
155 print("Renaming of factors:")
156 print(old)
157 print("To:")
158 print(design_matrix[,i])
159 }
160 ## The following line seems to malfunction the script:
161 ##design_matrix[,i] <- as.factor(design_matrix[,i])
162 }
163
164 ## 1) In the expression matrix, you only want to have the samples described in the design matrix
165 columns <- match(rownames(design_matrix),colnames(expression_matrix))
166 columns <- columns[!is.na(columns)]
167 read_counts <- expression_matrix[,columns]
168
169 ## 2) In the design matrix, you only want to have samples of which you really have the counts
170 columns <- match(colnames(read_counts),rownames(design_matrix))
171 columns <- columns[!is.na(columns)]
172 design_matrix <- design_matrix[columns,,drop=FALSE]
173
174 ## Filter for HTSeq predifined counts:
175 exclude_HTSeq <- c("no_feature","ambiguous","too_low_aQual","not_aligned","alignment_not_unique")
176 exclude_DEXSeq <- c("_ambiguous","_empty","_lowaqual","_notaligned")
177
178 exclude <- match(c(exclude_HTSeq, exclude_DEXSeq),rownames(read_counts))
179 exclude <- exclude[is.na(exclude)==0]
180 if(length(exclude) != 0) {
181 read_counts <- read_counts[-exclude,]
182 }
183
184
185 ## sorting expression matrix with the order of the read_counts
186 ##order <- match(colnames(read_counts) , rownames(design_matrix))
187 ##read_counts_ordered <- read_counts[,order2]
188
189 empty_samples <- apply(read_counts,2,function(x) sum(x) == 0)
190 if(sum(empty_samples) > 0) {
191 write(paste("There are ",sum(empty_samples)," empty samples found:",sep=""),stderr())
192 write(colnames(read_counts)[empty_samples],stderr())
193 } else {
194
195 dge <- DGEList(counts=read_counts,genes=rownames(read_counts))
196
197 formula <- paste(c("~0",make.names(colnames(design_matrix))),collapse = " + ")
198 design_matrix_tmp <- design_matrix
199 colnames(design_matrix_tmp) <- make.names(colnames(design_matrix_tmp))
200 design <- model.matrix(as.formula(formula),design_matrix_tmp)
201 rm(design_matrix_tmp)
202
203 # Filter prefixes
204 prefixes = colnames(design_matrix)[attr(design,"assign")]
205 avoid = nchar(prefixes) == nchar(colnames(design))
206 replacements = substr(colnames(design),nchar(prefixes)+1,nchar(colnames(design)))
207 replacements[avoid] = colnames(design)[avoid]
208 colnames(design) = replacements
209
210 # Do normalization
211 write("Calculating normalization factors...",stdout())
212 dge <- calcNormFactors(dge)
213 write("Estimating common dispersion...",stdout())
214 dge <- estimateGLMCommonDisp(dge,design)
215 write("Estimating trended dispersion...",stdout())
216 dge <- estimateGLMTrendedDisp(dge,design)
217 write("Estimating tagwise dispersion...",stdout())
218 dge <- estimateGLMTagwiseDisp(dge,design)
219
220
221 if(output_MDSplot_logFC != "/dev/null") {
222 write("Creating MDS plot (logFC method)",stdout())
223 points <- plotMDS.DGEList(dge,top=500,labels=rep("",nrow(dge\$samples)))# Get coordinates of unflexible plot
224 dev.off()# Kill it
225
226 if(output_format_images == "pdf") {
227 pdf(output_MDSplot_logFC,height=14,width=14)
228 } else if(output_format_images == "svg") {
229 svg(output_MDSplot_logFC,height=14,width=14)
230 } else {
231 ## png(output_MDSplot_logFC)
232 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
233
234 bitmap(output_MDSplot_logFC,type="png16m",height=14,width=14)
235 }
236
237
238 diff_x <- abs(max(points\$x)-min(points\$x))
239 diff_y <-(max(points\$y)-min(points\$y))
240 plot(c(min(points\$x),max(points\$x) + 0.45 * diff_x), c(min(points\$y) - 0.05 * diff_y,max(points\$y) + 0.05 * diff_y), main="edgeR logFC-MDS Plot on top 500 genes",type="n", xlab="Leading logFC dim 1", ylab="Leading logFC dim 2")
241 points(points\$x,points\$y,pch=20)
242 text(points\$x, points\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4)
243 rm(diff_x,diff_y)
244
245 dev.off()
246 }
247
248 if(output_MDSplot_bcv != "/dev/null") {
249 write("Creating MDS plot (bcv method)",stdout())
250
251 ## 1. First create a virtual plot to obtain the desired coordinates
252 pdf("bcvmds.pdf")
253 points <- plotMDS.DGEList(dge,method="bcv",top=500,labels=rep("",nrow(dge\$samples)))
254 dev.off()# Kill it
255
256 ## 2. Re-plot the coordinates in a new figure with the size and settings.
257 if(output_format_images == "pdf") {
258 pdf(output_MDSplot_bcv,height=14,width=14)
259 } else if(output_format_images == "svg") {
260 svg(output_MDSplot_bcv,height=14,width=14)
261 } else {
262 ## png(output_MDSplot_bcv)
263 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
264
265 bitmap(output_MDSplot_bcv,type="png16m",height=14,width=14)
266 }
267
268 diff_x <- abs(max(points\$x)-min(points\$x))
269 diff_y <- (max(points\$y)-min(points\$y))
270 plot(c(min(points\$x),max(points\$x) + 0.45 * diff_x), c(min(points\$y) - 0.05 * diff_y,max(points\$y) + 0.05 * diff_y), main="edgeR BCV-MDS Plot",type="n", xlab="Leading BCV dim 1", ylab="Leading BCV dim 2")
271 points(points\$x,points\$y,pch=20)
272 text(points\$x, points\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4)
273 rm(diff_x,diff_y)
274
275 dev.off()
276 }
277
278
279 if(output_BCVplot != "/dev/null") {
280 write("Creating Biological coefficient of variation plot",stdout())
281
282 if(output_format_images == "pdf") {
283 pdf(output_BCVplot)
284 } else if(output_format_images == "svg") {
285 svg(output_BCVplot)
286 } else {
287 ## png(output_BCVplot)
288 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
289
290 bitmap(output_BCVplot,type="png16m")
291 }
292
293 plotBCV(dge, cex=0.4, main="edgeR: Biological coefficient of variation (BCV) vs abundance")
294 dev.off()
295 }
296
297
298 write("Fitting GLM...",stdout())
299 fit <- glmFit(dge,design)
300
301 write(paste("Performing likelihood ratio test: ",contrast,sep=""),stdout())
302 cont <- c(contrast)
303 cont <- makeContrasts(contrasts=cont, levels=design)
304
305 lrt <- glmLRT(fit, contrast=cont[,1])
306 write(paste("Exporting to file: ",output_count_edgeR,sep=""),stdout())
307 write.table(file=output_count_edgeR,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=TRUE,col.names=NA)
308 write.table(file=output_cpm,cpm(dge,normalized.lib.sizes=TRUE),sep="\t",row.names=TRUE,col.names=NA)
309
310 ## todo EXPORT FPKM
311 write.table(file=output_raw_counts,dge\$counts,sep="\t",row.names=TRUE,col.names=NA)
312
313 if(output_MAplot != "/dev/null" || output_PValue_distribution_plot != "/dev/null") {
314 etable <- topTags(lrt, n=nrow(dge))\$table
315 etable <- etable[order(etable\$FDR), ]
316
317 if(output_MAplot != "/dev/null") {
318 write("Creating MA plot...",stdout())
319
320 if(output_format_images == "pdf") {
321 pdf(output_MAplot)
322 } else if(output_format_images == "svg") {
323 svg(output_MAplot)
324 } else {
325 ## png(output_MAplot)
326 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
327
328 bitmap(output_MAplot,type="png16m")
329 }
330
331 with(etable, plot(logCPM, logFC, pch=20, main="edgeR: Fold change vs abundance"))
332 with(subset(etable, FDR < fdr), points(logCPM, logFC, pch=20, col="red"))
333 abline(h=c(-1,1), col="blue")
334 dev.off()
335 }
336
337 if(output_PValue_distribution_plot != "/dev/null") {
338 write("Creating P-value distribution plot...",stdout())
339
340 if(output_format_images == "pdf") {
341 pdf(output_PValue_distribution_plot,width=14,height=14)
342 } else if(output_format_images == "svg") {
343 svg(output_PValue_distribution_plot,width=14,height=14)
344 } else {
345 ## png(output_PValue_distribution_plot)
346 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
347
348 bitmap(output_PValue_distribution_plot,type="png16m",width=14,height=14)
349 }
350
351 expressed_genes <- subset(etable, PValue < 0.99)
352 h <- hist(expressed_genes\$PValue,breaks=nrow(expressed_genes)/15,main="Binned P-Values (< 0.99)")
353 center <- sum(h\$counts) / length(h\$counts)
354 lines(c(0,1),c(center,center),lty=2,col="red",lwd=2)
355 k <- ksmooth(h\$mid, h\$counts)
356 lines(k\$x,k\$y,col="red",lwd=2)
357 rmsd <- (h\$counts) - center
358 rmsd <- rmsd^2
359 rmsd <- sum(rmsd)
360 rmsd <- sqrt(rmsd)
361 text(0,max(h\$counts),paste("e=",round(rmsd,2),sep=""),pos=4,col="blue")
362 ## change e into epsilon somehow
363 dev.off()
364 }
365 }
366
367 if(output_heatmap_plot != "/dev/null") {
368
369 if(output_format_images == "pdf") {
370 pdf(output_heatmap_plot,width=10.5)
371 } else if(output_format_images == "svg") {
372 svg(output_heatmap_plot,width=10.5)
373 } else {
374 ## png(output_heatmap_plot)
375 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
376
377 bitmap(output_heatmap_plot,type="png16m",width=10.5)
378 }
379
380 etable2 <- topTags(lrt, n=100)\$table
381 order <- rownames(etable2)
382 cpm_sub <- cpm(dge,normalized.lib.sizes=TRUE,log=TRUE)[as.numeric(order),]
383 heatmap(t(cpm_sub))
384 dev.off()
385 }
386
387 ##output_hierarchical_clustering_plot = args[13]
388
389 if(output_RData_obj != "/dev/null") {
390 save.image(output_RData_obj)
391 }
392
393 write("Done!",stdout())
394 }
395 ]]>
396 </configfile>
397 </configfiles>
398
399 <inputs>
400 <param name="expression_matrix" type="data" format="tabular" label="Expression (read count) matrix" />
401 <param name="design_matrix" type="data" format="tabular" label="Design matrix" help="Ensure your samplenames are identical to those in the expression matrix. Preferentially, create the contrast matrix using 'edgeR: Design- from Expression matrix'." />
402
403 <param name="contrast" type="text" label="Contrast (biological question)" help="e.g. 'tumor-normal' or '(G1+G2)/2-G3' using the factors chosen in the design matrix. Read the 'makeContrasts' manual from Limma package for more info: http://www.bioconductor.org/packages/release/bioc/html/limma.html and http://www.bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf." />
404
405 <param name="fdr" type="float" min="0" max="1" value="0.05" label="False Discovery Rate (FDR)" />
406
407 <param name="outputs" type="select" label="Optional desired outputs" multiple="true" display="checkboxes">
408 <option value="make_output_raw_counts">Raw counts table</option>
409 <option value="make_output_MDSplot_logFC">MDS-plot (logFC-method)</option>
410 <option value="make_output_MDSplot_bcv">MDS-plot (BCV-method; much slower)</option>
411 <option value="make_output_BCVplot">BCV-plot</option>
412 <option value="make_output_MAplot">MA-plot</option>
413 <option value="make_output_PValue_distribution_plot">P-Value distribution plot</option>
414 <option value="make_output_hierarchical_clustering_plot">Hierarchical custering (under contstruction)</option>
415 <option value="make_output_heatmap_plot">Heatmap</option>
416
417 <option value="make_output_R_stdout">R stdout</option>
418 <option value="make_output_RData_obj">R Data object</option>
419 </param>
420
421 <param name="output_format_images" type="select" label="Output format of images" display="radio">
422 <option value="png">Portable network graphics (.png)</option>
423 <option value="pdf">Portable document format (.pdf)</option>
424 <option value="svg">Scalable vector graphics (.svg)</option>
425 </param>
426 </inputs>
427
428 <outputs>
429 <data format="tabular" name="output_count_edgeR" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - differentially expressed genes" />
430 <data format="tabular" name="output_cpm" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - CPM" />
431
432 <data format="tabular" name="output_raw_counts" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - raw counts">
433 <filter>outputs and ("make_output_raw_counts" in outputs)</filter>
434 </data>
435
436 <data format="png" name="output_MDSplot_logFC" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MDS-plot (logFC method)">
437 <filter>outputs and ("make_output_MDSplot_logFC" in outputs)</filter>
438
439 <change_format>
440 <when input="output_format_images" value="png" format="png" />
441 <when input="output_format_images" value="pdf" format="pdf" />
442 <when input="output_format_images" value="svg" format="svg" />
443 </change_format>
444 </data>
445
446 <data format="png" name="output_MDSplot_bcv" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MDS-plot (bcv method)">
447 <filter>outputs and ("make_output_MDSplot_bcv" in outputs)</filter>
448
449 <change_format>
450 <when input="output_format_images" value="png" format="png" />
451 <when input="output_format_images" value="pdf" format="pdf" />
452 <when input="output_format_images" value="svg" format="svg" />
453 </change_format>
454 </data>
455
456 <data format="png" name="output_BCVplot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - BCV-plot">
457 <filter>outputs and ("make_output_BCVplot" in outputs)</filter>
458
459 <change_format>
460 <when input="output_format_images" value="png" format="png" />
461 <when input="output_format_images" value="pdf" format="pdf" />
462 <when input="output_format_images" value="svg" format="svg" />
463 </change_format>
464 </data>
465
466 <data format="png" name="output_MAplot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MA-plot">
467 <filter>outputs and ("make_output_MAplot" in outputs)</filter>
468
469 <change_format>
470 <when input="output_format_images" value="png" format="png" />
471 <when input="output_format_images" value="pdf" format="pdf" />
472 <when input="output_format_images" value="svg" format="svg" />
473 </change_format>
474 </data>
475
476 <data format="png" name="output_PValue_distribution_plot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - P-Value distribution">
477 <filter>outputs and ("make_output_PValue_distribution_plot" in outputs)</filter>
478
479 <change_format>
480 <when input="output_format_images" value="png" format="png" />
481 <when input="output_format_images" value="pdf" format="pdf" />
482 <when input="output_format_images" value="svg" format="svg" />
483 </change_format>
484 </data>
485
486 <data format="png" name="output_hierarchical_clustering_plot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - Hierarchical custering">
487 <filter>outputs and ("make_output_hierarchical_clustering_plot" in outputs)</filter>
488
489 <change_format>
490 <when input="output_format_images" value="png" format="png" />
491 <when input="output_format_images" value="pdf" format="pdf" />
492 <when input="output_format_images" value="svg" format="svg" />
493 </change_format>
494 </data>
495
496 <data format="png" name="output_heatmap_plot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - Heatmap">
497 <filter>outputs and ("make_output_heatmap_plot" in outputs)</filter>
498
499 <change_format>
500 <when input="output_format_images" value="png" format="png" />
501 <when input="output_format_images" value="pdf" format="pdf" />
502 <when input="output_format_images" value="svg" format="svg" />
503 </change_format>
504 </data>
505
506 <data format="RData" name="output_RData_obj" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - R data object">
507 <filter>outputs and ("make_output_RData_obj" in outputs)</filter>
508 </data>
509
510 <data format="txt" name="output_R" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - R output (debug)" >
511 <filter>outputs and ("make_output_R_stdout" in outputs)</filter>
512 </data>
513 </outputs>
514
515 <tests>
516 <test>
517 <param name="expression_matrix" value="Differential_Gene_Expression/expression_matrix.tabular.txt" />
518 <param name="design_matrix" value="Differential_Gene_Expression/design_matrix.tabular.txt" />
519
520 <param name="contrast" value="E-C"/>
521
522 <param name="fdr" value="0.05" />
523
524 <param name="output_format_images" value="png" />
525
526 <output name="output_count_edgeR" file="Differential_Gene_Expression/differentially_expressed_genes.tabular.txt" />
527 </test>
528 </tests>
529
530 <help>
531 edgeR: Differential Gene(Expression) Analysis
532 #############################################
533
534 Overview
535 --------
536 Differential expression analysis of RNA-seq and digital gene expression profiles with biological replication. Uses empirical Bayes estimation and exact tests based on the negative binomial distribution. Also useful for differential signal analysis with other types of genome-scale count data [1].
537
538 For every experiment, the algorithm requires a design matrix. This matrix describes which samples belong to which groups.
539 More details on this are given in the edgeR manual: http://www.bioconductor.org/packages/2.12/bioc/vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf
540 and the limma manual.
541
542 Because the creation of a design matrix can be complex and time consuming, especially if no GUI is used, this package comes with an alternative tool which can help you with it.
543 This tool is called *edgeR Design Matrix Creator*.
544 If the appropriate design matrix (with corresponding links to the files) is given,
545 the correct contrast ( http://en.wikipedia.org/wiki/Contrast_(statistics) ) has to be given.
546
547 If you have for example two groups, with an equal weight, you would like to compare either
548 "g1-g2" or "normal-cancer".
549
550 The test function makes use of a MCF7 dataset used in a study that indicates that a higher sequencing depth is not neccesairily more important than a higher amount of replaciates[2].
551
552 Input
553 -----
554 Expression matrix
555 ^^^^^^^^^^^^^^^^^
556 ::
557
558 Geneid "\t" Sample-1 "\t" Sample-2 "\t" Sample-3 "\t" Sample-4 [...] "\n"
559 SMURF "\t" 123 "\t" 21 "\t" 34545 "\t" 98 ... "\n"
560 BRCA1 "\t" 435 "\t" 6655 "\t" 45 "\t" 55 ... "\n"
561 LINK33 "\t" 4 "\t" 645 "\t" 345 "\t" 1 ... "\n"
562 SNORD78 "\t" 498 "\t" 65 "\t" 98 "\t" 27 ... "\n"
563 [...]
564
565 *Note: Make sure the number of columns in the header is identical to the number of columns in the body.*
566
567 Design matrix
568 ^^^^^^^^^^^^^
569 ::
570
571 Sample "\t" Condition "\t" Ethnicity "\t" Patient "\t" Batch "\n"
572 Sample-1 "\t" Tumor "\t" European "\t" 1 "\t" 1 "\n"
573 Sample-2 "\t" Normal "\t" European "\t" 1 "\t" 1 "\n"
574 Sample-3 "\t" Tumor "\t" European "\t" 2 "\t" 1 "\n"
575 Sample-4 "\t" Normal "\t" European "\t" 2 "\t" 1 "\n"
576 Sample-5 "\t" Tumor "\t" African "\t" 3 "\t" 1 "\n"
577 Sample-6 "\t" Normal "\t" African "\t" 3 "\t" 1 "\n"
578 Sample-7 "\t" Tumor "\t" African "\t" 4 "\t" 2 "\n"
579 Sample-8 "\t" Normal "\t" African "\t" 4 "\t" 2 "\n"
580 Sample-9 "\t" Tumor "\t" Asian "\t" 5 "\t" 2 "\n"
581 Sample-10 "\t" Normal "\t" Asian "\t" 5 "\t" 2 "\n"
582 Sample-11 "\t" Tumor "\t" Asian "\t" 6 "\t" 2 "\n"
583 Sample-12 "\t" Normal "\t" Asian "\t" 6 "\t" 2 "\n"
584
585 *Note: Avoid factor names that are (1) numerical, (2) contain mathematical symbols and preferebly only use letters.*
586
587 Contrast
588 ^^^^^^^^
589 The contrast represents the biological question. There can be many questions asked, e.g.:
590
591 - Tumor-Normal
592 - African-European
593 - 0.5*(Control+Placebo) / Treated
594
595 Installation
596 ------------
597
598 This tool requires no specific configurations. The following dependencies are installed automatically:
599
600 - R
601 - limma
602 - edgeR
603
604 License
605 -------
606 - R
607 - GPL 2 &amp; GPL 3
608 - limma
609 - GPL (&gt;=2)
610 - edgeR
611 - GPL (&gt;=2)
612
613 References
614 ----------
615
616 EdgeR
617 ^^^^^
618 **[1] edgeR: a Bioconductor package for differential expression analysis of digital gene expression data.**
619
620 *Mark D. Robinson, Davis J. McCarthy and Gordon K. Smyth* - Bioinformatics (2010) 26 (1): 139-140.
621
622 - http://www.bioconductor.org/packages/2.12/bioc/html/edgeR.html
623 - http://dx.doi.org/10.1093/bioinformatics/btp616
624 - http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
625
626 Test-data (MCF7)
627 ^^^^^^^^^^^^^^^^
628 **[2] RNA-seq differential expression studies: more sequence or more replication?**
629
630 *Yuwen Liu, Jie Zhou and Kevin P. White* - Bioinformatics (2014) 30 (3): 301-304.
631
632 - http://www.ncbi.nlm.nih.gov/pubmed/24319002
633 - http://dx.doi.org/10.1093/bioinformatics/btt688
634
635 @CONTACT@
636 </help>
637
638 <expand macro="citations" />
639 </tool>