comparison limma_voom.R @ 8:00a42b66e522 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/limma_voom commit 48125e8638453668f889a67791421f14d3ebe623
author iuc
date Tue, 15 May 2018 02:36:36 -0400
parents e6a4ff41af6b
children 4255881bebb1
comparison
equal deleted inserted replaced
7:e6a4ff41af6b 8:00a42b66e522
21 # normOpt", "n", 1, "character" -String specifying type of normalisation used 21 # normOpt", "n", 1, "character" -String specifying type of normalisation used
22 # robOpt", "b", 0, "logical" -String specifying if robust options should be used 22 # robOpt", "b", 0, "logical" -String specifying if robust options should be used
23 # trend", "t", 1, "double" -Float for prior.count if limma-trend is used instead of voom 23 # trend", "t", 1, "double" -Float for prior.count if limma-trend is used instead of voom
24 # weightOpt", "w", 0, "logical" -String specifying if voomWithQualityWeights should be used 24 # weightOpt", "w", 0, "logical" -String specifying if voomWithQualityWeights should be used
25 # topgenes", "G", 1, "integer" -Integer specifying no. of genes to highlight in volcano and heatmap 25 # topgenes", "G", 1, "integer" -Integer specifying no. of genes to highlight in volcano and heatmap
26 # treatOpt", "T", 0, "logical" -String specifying if TREAT function shuld be used 26 # treatOpt", "T", 0, "logical" -String specifying if TREAT function should be used
27 # plots, "P", 1, "character" -String specifying additional plots to be created
27 # 28 #
28 # OUT: 29 # OUT:
29 # Density Plots (if filtering) 30 # Density Plots (if filtering)
30 # Box Plots (if normalising) 31 # Box Plots (if normalising)
31 # MDS Plot 32 # MDS Plot
123 HtmlLink <- function(address, label=address) { 124 HtmlLink <- function(address, label=address) {
124 cata("<a href=\"", address, "\" target=\"_blank\">", label, "</a><br />\n") 125 cata("<a href=\"", address, "\" target=\"_blank\">", label, "</a><br />\n")
125 } 126 }
126 127
127 # Function to write code for html images 128 # Function to write code for html images
128 HtmlImage <- function(source, label=source, height=600, width=600) { 129 HtmlImage <- function(source, label=source, height=500, width=500) {
129 cata("<img src=\"", source, "\" alt=\"", label, "\" height=\"", height) 130 cata("<img src=\"", source, "\" alt=\"", label, "\" height=\"", height)
130 cata("\" width=\"", width, "\"/>\n") 131 cata("\" width=\"", width, "\"/>\n")
131 } 132 }
132 133
133 # Function to write code for html list items 134 # Function to write code for html list items
173 "normOpt", "n", 1, "character", 174 "normOpt", "n", 1, "character",
174 "robOpt", "b", 0, "logical", 175 "robOpt", "b", 0, "logical",
175 "trend", "t", 1, "double", 176 "trend", "t", 1, "double",
176 "weightOpt", "w", 0, "logical", 177 "weightOpt", "w", 0, "logical",
177 "topgenes", "G", 1, "integer", 178 "topgenes", "G", 1, "integer",
178 "treatOpt", "T", 0, "logical"), 179 "treatOpt", "T", 0, "logical",
180 "plots", "P", 1, "character"),
179 byrow=TRUE, ncol=4) 181 byrow=TRUE, ncol=4)
180 opt <- getopt(spec) 182 opt <- getopt(spec)
181 183
182 184
183 if (is.null(opt$matrixPath) & is.null(opt$filesPath)) { 185 if (is.null(opt$matrixPath) & is.null(opt$filesPath)) {
320 # Split up contrasts seperated by comma into a vector then sanitise 322 # Split up contrasts seperated by comma into a vector then sanitise
321 contrastData <- unlist(strsplit(opt$contrastData, split=",")) 323 contrastData <- unlist(strsplit(opt$contrastData, split=","))
322 contrastData <- sanitiseEquation(contrastData) 324 contrastData <- sanitiseEquation(contrastData)
323 contrastData <- gsub(" ", ".", contrastData, fixed=TRUE) 325 contrastData <- gsub(" ", ".", contrastData, fixed=TRUE)
324 326
327 plots <- character()
328 if (!is.null(opt$plots)) {
329 plots <- unlist(strsplit(opt$plots, split=","))
330 }
331
325 denOutPng <- makeOut("densityplots.png") 332 denOutPng <- makeOut("densityplots.png")
326 denOutPdf <- makeOut("DensityPlots.pdf") 333 denOutPdf <- makeOut("densityplots.pdf")
334 cpmOutPdf <- makeOut("cpmplots.pdf")
327 boxOutPng <- makeOut("boxplots.png") 335 boxOutPng <- makeOut("boxplots.png")
328 boxOutPdf <- makeOut("BoxPlots.pdf") 336 boxOutPdf <- makeOut("boxplots.pdf")
329 mdsOutPdf <- character() # Initialise character vector 337 mdsscreeOutPng <- makeOut("mdsscree.png")
330 mdsOutPng <- character() 338 mdsscreeOutPdf <- makeOut("mdsscree.pdf")
331 for (i in 1:ncol(factors)) { 339 mdsxOutPdf <- makeOut("mdsplot_extra.pdf")
332 mdsOutPdf[i] <- makeOut(paste0("mdsplot_", names(factors)[i], ".pdf")) 340 mdsxOutPng <- makeOut("mdsplot_extra.png")
333 mdsOutPng[i] <- makeOut(paste0("mdsplot_", names(factors)[i], ".png")) 341 mdsamOutPdf <- makeOut("mdplots_samples.pdf")
334 } 342 mdOutPdf <- character() # Initialise character vector
335
336 mdOutPdf <- character()
337 volOutPdf <- character() 343 volOutPdf <- character()
338 heatOutPdf <- character() 344 heatOutPdf <- character()
345 stripOutPdf <- character()
339 mdvolOutPng <- character() 346 mdvolOutPng <- character()
340 topOut <- character() 347 topOut <- character()
341 for (i in 1:length(contrastData)) { 348 for (i in 1:length(contrastData)) {
342 mdOutPdf[i] <- makeOut(paste0("mdplot_", contrastData[i], ".pdf")) 349 con <- contrastData[i]
343 volOutPdf[i] <- makeOut(paste0("volplot_", contrastData[i], ".pdf")) 350 con <- gsub("\\(|\\)", "", con)
344 heatOutPdf[i] <- makeOut(paste0("heatmap_", contrastData[i], ".pdf")) 351 mdOutPdf[i] <- makeOut(paste0("mdplot_", con, ".pdf"))
345 mdvolOutPng[i] <- makeOut(paste0("mdvolplot_", contrastData[i], ".png")) 352 volOutPdf[i] <- makeOut(paste0("volplot_", con, ".pdf"))
346 topOut[i] <- makeOut(paste0(deMethod, "_", contrastData[i], ".tsv")) 353 heatOutPdf[i] <- makeOut(paste0("heatmap_", con, ".pdf"))
354 stripOutPdf[i] <- makeOut(paste0("stripcharts_", con, ".pdf"))
355 mdvolOutPng[i] <- makeOut(paste0("mdvolplot_", con, ".png"))
356 topOut[i] <- makeOut(paste0(deMethod, "_", con, ".tsv"))
347 } 357 }
348 358
349 normOut <- makeOut(paste0(deMethod, "_normcounts.tsv")) 359 normOut <- makeOut(paste0(deMethod, "_normcounts.tsv"))
350 rdaOut <- makeOut(paste0(deMethod, "_analysis.RData")) 360 rdaOut <- makeOut(paste0(deMethod, "_analysis.RData"))
351 sessionOut <- makeOut("session_info.txt") 361 sessionOut <- makeOut("session_info.txt")
376 data$genes <- annoord 386 data$genes <- annoord
377 } else { 387 } else {
378 data$genes <- data.frame(GeneID=row.names(counts)) 388 data$genes <- data.frame(GeneID=row.names(counts))
379 } 389 }
380 390
391 # Creating naming data
392 samplenames <- colnames(data$counts)
393 sampleanno <- data.frame("sampleID"=samplenames, factors)
394
395 # Creating colours for the groups
396 cols <- as.numeric(factors[, 1])
397 col.group <- palette()[cols]
398
381 # If filter crieteria set, filter out genes that do not have a required cpm/counts in a required number of 399 # If filter crieteria set, filter out genes that do not have a required cpm/counts in a required number of
382 # samples. Default is no filtering 400 # samples. Default is no filtering
383 preFilterCount <- nrow(data$counts) 401 preFilterCount <- nrow(data$counts)
402 nsamples <- ncol(data$counts)
384 403
385 if (filtCPM || filtSmpCount || filtTotCount) { 404 if (filtCPM || filtSmpCount || filtTotCount) {
386 405
387 if (filtTotCount) { 406 if (filtTotCount) {
388 keep <- rowSums(data$counts) >= opt$cntReq 407 keep <- rowSums(data$counts) >= opt$cntReq
389 } else if (filtSmpCount) { 408 } else if (filtSmpCount) {
390 keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq 409 keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq
391 } else if (filtCPM) { 410 } else if (filtCPM) {
392 keep <- rowSums(cpm(data$counts) >= opt$cpmReq) >= opt$sampleReq 411 myCPM <- cpm(data$counts)
412 thresh <- myCPM >= opt$cpmReq
413 keep <- rowSums(thresh) >= opt$sampleReq
414
415 if ("c" %in% plots) {
416 # Plot CPM vs raw counts (to check threshold)
417 pdf(cpmOutPdf, width=6.5, height=10)
418 par(mfrow=c(3, 2))
419 for (i in 1:nsamples) {
420 plot(data$counts[, i], myCPM[, i], xlim=c(0,50), ylim=c(0,3), main=samplenames[i], xlab="Raw counts", ylab="CPM")
421 abline(v=10, col="red", lty=2, lwd=2)
422 abline(h=opt$cpmReq, col=4)
423 }
424 linkName <- "CpmPlots.pdf"
425 linkAddr <- "cpmplots.pdf"
426 linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE))
427 invisible(dev.off())
428 }
393 } 429 }
394 430
395 data$counts <- data$counts[keep, ] 431 data$counts <- data$counts[keep, ]
396 data$genes <- data$genes[keep, , drop=FALSE] 432 data$genes <- data$genes[keep, , drop=FALSE]
433
434 # Plot Density
435 if ("d" %in% plots) {
436 # PNG
437 png(denOutPng, width=1000, height=500)
438 par(mfrow=c(1,2), cex.axis=0.8)
439
440 # before filtering
441 lcpm1 <- cpm(counts, log=TRUE)
442 plot(density(lcpm1[, 1]), col=col.group[1], lwd=2, las=2, main="", xlab="")
443 title(main="Density Plot: Raw counts", xlab="Log-cpm")
444 for (i in 2:nsamples){
445 den <- density(lcpm1[, i])
446 lines(den$x, den$y, col=col.group[i], lwd=2)
447 }
448
449 # after filtering
450 lcpm2 <- cpm(data$counts, log=TRUE)
451 plot(density(lcpm2[,1]), col=col.group[1], lwd=2, las=2, main="", xlab="")
452 title(main="Density Plot: Filtered counts", xlab="Log-cpm")
453 for (i in 2:nsamples){
454 den <- density(lcpm2[, i])
455 lines(den$x, den$y, col=col.group[i], lwd=2)
456 }
457 legend("topright", samplenames, text.col=col.group, bty="n")
458 imgName <- "Densityplots.png"
459 imgAddr <- "densityplots.png"
460 imageData <- rbind(imageData, data.frame(Label=imgName, Link=imgAddr, stringsAsFactors=FALSE))
461 invisible(dev.off())
462
463 # PDF
464 pdf(denOutPdf, width=14)
465 par(mfrow=c(1,2), cex.axis=0.8)
466 plot(density(lcpm1[, 1]), col=col.group[1], lwd=2, las=2, main="", xlab="")
467 title(main="Density Plot: Raw counts", xlab="Log-cpm")
468 for (i in 2:nsamples){
469 den <- density(lcpm1[, i])
470 lines(den$x, den$y, col=col.group[i], lwd=2)
471 }
472 plot(density(lcpm2[, 1]), col=col.group[1], lwd=2, las=2, main="", xlab="")
473 title(main="Density Plot: Filtered counts", xlab="Log-cpm")
474 for (i in 2:nsamples){
475 den <- density(lcpm2[, i])
476 lines(den$x, den$y, col=col.group[i], lwd=2)
477 }
478 legend("topright", samplenames, text.col=col.group, bty="n")
479 linkName <- "DensityPlots.pdf"
480 linkAddr <- "densityplots.pdf"
481 linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE))
482 invisible(dev.off())
483 }
397 } 484 }
398 485
399 postFilterCount <- nrow(data$counts) 486 postFilterCount <- nrow(data$counts)
400 filteredCount <- preFilterCount-postFilterCount 487 filteredCount <- preFilterCount-postFilterCount
401
402 # Creating naming data
403 samplenames <- colnames(data$counts)
404 sampleanno <- data.frame("sampleID"=samplenames, factors)
405
406 488
407 # Generating the DGEList object "y" 489 # Generating the DGEList object "y"
408 print("Generating DGEList object") 490 print("Generating DGEList object")
409 data$samples <- sampleanno 491 data$samples <- sampleanno
410 data$samples$lib.size <- colSums(data$counts) 492 data$samples$lib.size <- colSums(data$counts)
436 518
437 ################################################################################ 519 ################################################################################
438 ### Data Output 520 ### Data Output
439 ################################################################################ 521 ################################################################################
440 522
441 # Plot Density (if filtering low counts)
442 if (filtCPM || filtSmpCount || filtTotCount) {
443 nsamples <- ncol(counts)
444
445 # PNG
446 png(denOutPng, width=1200, height=600)
447 par(mfrow=c(1,2), cex.axis=0.8)
448
449 # before filtering
450 logcpm <- cpm(counts, log=TRUE)
451 plot(density(logcpm[,1]), col=as.numeric(factors[1, 1]), lwd=2, las=2, main="", xlab="")
452 title(main="Density Plot: Raw counts", xlab="Log-cpm")
453 for (i in 2:nsamples){
454 den <- density(logcpm[,i])
455 lines(den$x, den$y, col=as.numeric(factors[i, 1]), lwd=2)
456 }
457
458 # after filtering
459 logcpm <- cpm(data$counts, log=TRUE)
460 plot(density(logcpm[,1]), col=as.numeric(factors[1, 1]), lwd=2, las=2, main="", xlab="")
461 title(main="Density Plot: Filtered counts", xlab="Log-cpm")
462 for (i in 2:nsamples){
463 den <- density(logcpm[,i])
464 lines(den$x, den$y, col=as.numeric(factors[i, 1]), lwd=2)
465 }
466 legend("topright", samplenames, text.col=as.numeric(factors[, 1]), bty="n")
467 imgName <- "Densityplots.png"
468 imgAddr <- "densityplots.png"
469 imageData <- rbind(imageData, data.frame(Label=imgName, Link=imgAddr, stringsAsFactors=FALSE))
470 invisible(dev.off())
471
472 # PDF
473 pdf(denOutPdf, width=14)
474 par(mfrow=c(1,2), cex.axis=0.8)
475 logcpm <- cpm(counts, log=TRUE)
476 plot(density(logcpm[,1]), col=as.numeric(factors[1, 1]), lwd=2, las=2, main="", xlab="")
477 title(main="Density Plot: Raw counts", xlab="Log-cpm")
478 for (i in 2:nsamples){
479 den <- density(logcpm[,i])
480 lines(den$x, den$y, col=as.numeric(factors[i, 1]), lwd=2)
481 }
482 logcpm <- cpm(data$counts, log=TRUE)
483 plot(density(logcpm[,1]), col=as.numeric(factors[1, 1]), lwd=2, las=2, main="", xlab="")
484 title(main="Density Plot: Filtered counts", xlab="Log-cpm")
485 for (i in 2:nsamples){
486 den <- density(logcpm[,i])
487 lines(den$x, den$y, col=as.numeric(factors[i, 1]), lwd=2)
488 }
489 legend("topright", samplenames, text.col=as.numeric(factors[, 1]), bty="n")
490 linkName <- "DensityPlots.pdf"
491 linkAddr <- "densityplots.pdf"
492 linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE))
493 invisible(dev.off())
494 }
495
496 # Plot Box plots (before and after normalisation) 523 # Plot Box plots (before and after normalisation)
497 if (opt$normOpt != "none") { 524 if (opt$normOpt != "none" & "b" %in% plots) {
498 png(boxOutPng, width=1200, height=600) 525 png(boxOutPng, width=1000, height=500)
499 par(mfrow=c(1,2), cex.axis=0.8) 526 par(mfrow=c(1,2), mar=c(6,4,2,2)+0.1)
500 logcpm <- cpm(y$counts, log=TRUE) 527 labels <- colnames(counts)
501 boxplot(logcpm, las=2, col=as.numeric(factors[, 1])) 528
502 abline(h=median(logcpm), col=4) 529 lcpm1 <- cpm(y$counts, log=TRUE)
530 boxplot(lcpm1, las=2, col=col.group, xaxt="n", xlab="")
531 axis(1, at=seq_along(labels), labels = FALSE)
532 abline(h=median(lcpm1), col=4)
533 text(x=seq_along(labels), y=par("usr")[3]-1, srt=45, adj=1, labels=labels, xpd=TRUE)
503 title(main="Box Plot: Unnormalised counts", ylab="Log-cpm") 534 title(main="Box Plot: Unnormalised counts", ylab="Log-cpm")
504 lcpm <- cpm(y, log=TRUE) 535
505 boxplot(lcpm, las=2, col=as.numeric(factors[, 1])) 536 lcpm2 <- cpm(y, log=TRUE)
506 abline(h=median(lcpm), col=4) 537 boxplot(lcpm2, las=2, col=col.group, xaxt="n", xlab="")
538 axis(1, at=seq_along(labels), labels = FALSE)
539 text(x=seq_along(labels), y=par("usr")[3]-1, srt=45, adj=1, labels=labels, xpd=TRUE)
540 abline(h=median(lcpm2), col=4)
507 title(main="Box Plot: Normalised counts", ylab="Log-cpm") 541 title(main="Box Plot: Normalised counts", ylab="Log-cpm")
542
508 imgName <- "Boxplots.png" 543 imgName <- "Boxplots.png"
509 imgAddr <- "boxplots.png" 544 imgAddr <- "boxplots.png"
510 imageData <- rbind(imageData, data.frame(Label=imgName, Link=imgAddr, stringsAsFactors=FALSE)) 545 imageData <- rbind(imageData, data.frame(Label=imgName, Link=imgAddr, stringsAsFactors=FALSE))
511 invisible(dev.off()) 546 invisible(dev.off())
512 547
513 pdf(boxOutPdf, width=14) 548 pdf(boxOutPdf, width=14)
514 par(mfrow=c(1,2), cex.axis=0.8) 549 par(mfrow=c(1,2), mar=c(6,4,2,2)+0.1)
515 logcpm <- cpm(y$counts, log=TRUE) 550 boxplot(lcpm1, las=2, col=col.group, xaxt="n", xlab="")
516 boxplot(logcpm, las=2, col=as.numeric(factors[, 1])) 551 axis(1, at=seq_along(labels), labels = FALSE)
517 abline(h=median(logcpm), col=4) 552 abline(h=median(lcpm1), col=4)
553 text(x=seq_along(labels), y=par("usr")[3]-1, srt=45, adj=1, labels=labels, xpd=TRUE)
518 title(main="Box Plot: Unnormalised counts", ylab="Log-cpm") 554 title(main="Box Plot: Unnormalised counts", ylab="Log-cpm")
519 lcpm <- cpm(y, log=TRUE) 555 boxplot(lcpm2, las=2, col=col.group, xaxt="n", xlab="")
520 boxplot(lcpm, las=2, col=as.numeric(factors[, 1])) 556 axis(1, at=seq_along(labels), labels = FALSE)
521 abline(h=median(lcpm), col=4) 557 text(x=seq_along(labels), y=par("usr")[3]-1, srt=45, adj=1, labels=labels, xpd=TRUE)
558 abline(h=median(lcpm2), col=4)
522 title(main="Box Plot: Normalised counts", ylab="Log-cpm") 559 title(main="Box Plot: Normalised counts", ylab="Log-cpm")
523 linkName <- "BoxPlots.pdf" 560 linkName <- "BoxPlots.pdf"
524 linkAddr <- "boxplots.pdf" 561 linkAddr <- "boxplots.pdf"
525 linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE)) 562 linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE))
526 invisible(dev.off()) 563 invisible(dev.off())
528 565
529 # Plot MDS 566 # Plot MDS
530 print("Generating MDS plot") 567 print("Generating MDS plot")
531 labels <- names(counts) 568 labels <- names(counts)
532 569
533 for (i in 1:ncol(factors)) { 570 # Scree plot (Variance Explained) code copied from Glimma
534 png(mdsOutPng[i], width=600, height=600) 571
535 plotMDS(y, labels=labels, col=as.numeric(factors[, i]), cex=0.8, main=paste("MDS Plot:", names(factors)[i])) 572 # get column of matrix
536 imgName <- paste0("MDSPlot_", names(factors)[i], ".png") 573 getCols <- function(x, inds) {
537 imgAddr <- paste0("mdsplot_", names(factors)[i], ".png") 574 x[, inds, drop=FALSE]
575 }
576
577 x <- cpm(y, log=TRUE)
578 ndim <- nsamples - 1
579 nprobes <- nrow(x)
580 top <- 500
581 top <- min(top, nprobes)
582 cn <- colnames(x)
583 bad <- rowSums(is.finite(x)) < nsamples
584
585 if (any(bad)) {
586 warning("Rows containing infinite values have been removed")
587 x <- x[!bad, , drop=FALSE]
588 }
589
590 dd <- matrix(0, nrow=nsamples, ncol=nsamples, dimnames=list(cn, cn))
591 topindex <- nprobes - top + 1L
592 for (i in 2L:(nsamples)) {
593 for (j in 1L:(i - 1L)) {
594 dists <- (getCols(x, i) - getCols(x, j))^2
595 dists <- sort.int(dists, partial = topindex )
596 topdist <- dists[topindex:nprobes]
597 dd[i, j] <- sqrt(mean(topdist))
598 }
599 }
600
601 a1 <- suppressWarnings(cmdscale(as.dist(dd), k=min(ndim, 8), eig=TRUE))
602 eigen <- data.frame(name = 1:min(ndim, 8), eigen = round(a1$eig[1:min(ndim, 8)]/sum(a1$eig), 2))
603
604 png(mdsscreeOutPng, width=1000, height=500)
605 par(mfrow=c(1, 2))
606 plotMDS(y, labels=samplenames, col=as.numeric(factors[, 1]), main="MDS Plot: Dims 1 and 2")
607 barplot(eigen$eigen, names.arg=eigen$name, main = "Scree Plot: Variance Explained", xlab = "Dimension", ylab = "Proportion", las=1)
608 imgName <- paste0("MDSPlot_", names(factors)[1], ".png")
609 imgAddr <- "mdsscree.png"
610 imageData <- rbind(imageData, data.frame(Label=imgName, Link=imgAddr, stringsAsFactors=FALSE))
611 invisible(dev.off())
612
613 pdf(mdsscreeOutPdf, width=14)
614 par(mfrow=c(1, 2))
615 plotMDS(y, labels=samplenames, col=as.numeric(factors[, 1]), main="MDS Plot: Dims 1 and 2")
616 barplot(eigen$eigen, names.arg=eigen$name, main = "Scree Plot: Variance Explained", xlab = "Dimension", ylab = "Proportion", las=1)
617 linkName <- paste0("MDSPlot_", names(factors)[1], ".pdf")
618 linkAddr <- "mdsscree.pdf"
619 linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE))
620 invisible(dev.off())
621
622 if ("x" %in% plots) {
623 png(mdsxOutPng, width=1000, height=500)
624 par(mfrow=c(1, 2))
625 for (i in 2:3) {
626 dim1 <- i
627 dim2 <- i + 1
628 plotMDS(y, dim=c(dim1, dim2), labels=samplenames, col=as.numeric(factors[, 1]), main=paste("MDS Plot: Dims", dim1, "and", dim2))
629 }
630 imgName <- paste0("MDSPlot_extra.png")
631 imgAddr <- paste0("mdsplot_extra.png")
538 imageData <- rbind(imageData, data.frame(Label=imgName, Link=imgAddr, stringsAsFactors=FALSE)) 632 imageData <- rbind(imageData, data.frame(Label=imgName, Link=imgAddr, stringsAsFactors=FALSE))
539 invisible(dev.off()) 633 invisible(dev.off())
540 634
541 pdf(mdsOutPdf[i]) 635 pdf(mdsxOutPdf, width=14)
542 plotMDS(y, labels=labels, col=as.numeric(factors[, i]), cex=0.8, main=paste("MDS Plot:", names(factors)[i])) 636 par(mfrow=c(1, 2))
543 linkName <- paste0("MDSPlot_", names(factors)[i], ".pdf") 637 for (i in 2:3) {
544 linkAddr <- paste0("mdsplot_", names(factors)[i], ".pdf") 638 dim1 <- i
639 dim2 <- i + 1
640 plotMDS(y, dim=c(dim1, dim2), labels=samplenames, col=as.numeric(factors[, 1]), main=paste("MDS Plot: Dims", dim1, "and", dim2))
641 }
642 linkName <- "MDSPlot_extra.pdf"
643 linkAddr <- "mdsplot_extra.pdf"
545 linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE)) 644 linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE))
546 invisible(dev.off()) 645 invisible(dev.off())
547 } 646 }
647
648 if ("m" %in% plots) {
649 # Plot MD plots for individual samples
650 print("Generating MD plots for samples")
651 pdf(mdsamOutPdf, width=6.5, height=10)
652 par(mfrow=c(3, 2))
653 for (i in 1:nsamples) {
654 plotMD(y, column = i)
655 abline(h=0, col="red", lty=2, lwd=2)
656 }
657 linkName <- "MDPlots_Samples.pdf"
658 linkAddr <- "mdplots_samples.pdf"
659 linkData <- rbind(linkData, c(linkName, linkAddr))
660 invisible(dev.off())
661 }
662
548 663
549 if (wantTrend) { 664 if (wantTrend) {
550 # limma-trend approach 665 # limma-trend approach
551 logCPM <- cpm(y, log=TRUE, prior.count=opt$trend) 666 logCPM <- cpm(y, log=TRUE, prior.count=opt$trend)
552 fit <- lmFit(logCPM, design) 667 fit <- lmFit(logCPM, design)
559 } 674 }
560 # plot fit with plotSA 675 # plot fit with plotSA
561 saOutPng <- makeOut("saplot.png") 676 saOutPng <- makeOut("saplot.png")
562 saOutPdf <- makeOut("saplot.pdf") 677 saOutPdf <- makeOut("saplot.pdf")
563 678
564 png(saOutPng, width=600, height=600) 679 png(saOutPng, width=500, height=500)
565 plotSA(fit, main="SA Plot") 680 plotSA(fit, main="SA Plot")
566 imgName <- "SAPlot.png" 681 imgName <- "SAPlot.png"
567 imgAddr <- "saplot.png" 682 imgAddr <- "saplot.png"
568 imageData <- rbind(imageData, c(imgName, imgAddr)) 683 imageData <- rbind(imageData, c(imgName, imgAddr))
569 invisible(dev.off()) 684 invisible(dev.off())
587 voomOutPdf <- makeOut("voomplot.pdf") 702 voomOutPdf <- makeOut("voomplot.pdf")
588 voomOutPng <- makeOut("voomplot.png") 703 voomOutPng <- makeOut("voomplot.png")
589 704
590 if (wantWeight) { 705 if (wantWeight) {
591 # Creating voom data object and plot 706 # Creating voom data object and plot
592 png(voomOutPng, width=1000, height=600) 707 png(voomOutPng, width=1000, height=500)
593 vData <- voomWithQualityWeights(y, design=design, plot=TRUE) 708 vData <- voomWithQualityWeights(y, design=design, plot=TRUE)
594 imgName <- "VoomPlot.png" 709 imgName <- "VoomPlot.png"
595 imgAddr <- "voomplot.png" 710 imgAddr <- "voomplot.png"
596 imageData <- rbind(imageData, c(imgName, imgAddr)) 711 imageData <- rbind(imageData, c(imgName, imgAddr))
597 invisible(dev.off()) 712 invisible(dev.off())
607 wts <- vData$weights 722 wts <- vData$weights
608 voomFit <- lmFit(vData, design, weights=wts) 723 voomFit <- lmFit(vData, design, weights=wts)
609 724
610 } else { 725 } else {
611 # Creating voom data object and plot 726 # Creating voom data object and plot
612 png(voomOutPng, width=600, height=600) 727 png(voomOutPng, width=500, height=500)
613 vData <- voom(y, design=design, plot=TRUE) 728 vData <- voom(y, design=design, plot=TRUE)
614 imgName <- "VoomPlot" 729 imgName <- "VoomPlot"
615 imgAddr <- "voomplot.png" 730 imgAddr <- "voomplot.png"
616 imageData <- rbind(imageData, c(imgName, imgAddr)) 731 imageData <- rbind(imageData, c(imgName, imgAddr))
617 invisible(dev.off()) 732 invisible(dev.off())
659 status = decideTests(fit, adjust.method=opt$pAdjOpt, p.value=opt$pValReq, 774 status = decideTests(fit, adjust.method=opt$pAdjOpt, p.value=opt$pValReq,
660 lfc=opt$lfcReq) 775 lfc=opt$lfcReq)
661 sumStatus <- summary(status) 776 sumStatus <- summary(status)
662 777
663 for (i in 1:length(contrastData)) { 778 for (i in 1:length(contrastData)) {
779 con <- contrastData[i]
780 con <- gsub("\\(|\\)", "", con)
664 # Collect counts for differential expression 781 # Collect counts for differential expression
665 upCount[i] <- sumStatus["Up", i] 782 upCount[i] <- sumStatus["Up", i]
666 downCount[i] <- sumStatus["Down", i] 783 downCount[i] <- sumStatus["Down", i]
667 flatCount[i] <- sumStatus["NotSig", i] 784 flatCount[i] <- sumStatus["NotSig", i]
668 785
671 top <- topTreat(fit, coef=i, number=Inf, sort.by="P") 788 top <- topTreat(fit, coef=i, number=Inf, sort.by="P")
672 } else{ 789 } else{
673 top <- topTable(fit, coef=i, number=Inf, sort.by="P") 790 top <- topTable(fit, coef=i, number=Inf, sort.by="P")
674 } 791 }
675 write.table(top, file=topOut[i], row.names=FALSE, sep="\t", quote=FALSE) 792 write.table(top, file=topOut[i], row.names=FALSE, sep="\t", quote=FALSE)
676 793 linkName <- paste0(deMethod, "_", con, ".tsv")
677 linkName <- paste0(deMethod, "_", contrastData[i], ".tsv") 794 linkAddr <- paste0(deMethod, "_", con, ".tsv")
678 linkAddr <- paste0(deMethod, "_", contrastData[i], ".tsv")
679 linkData <- rbind(linkData, c(linkName, linkAddr)) 795 linkData <- rbind(linkData, c(linkName, linkAddr))
680 796
681 # Plot MD (log ratios vs mean average) using limma package on weighted 797 # Plot MD (log ratios vs mean average) using limma package on weighted
682 pdf(mdOutPdf[i]) 798 pdf(mdOutPdf[i])
683 limma::plotMD(fit, status=status[, i], coef=i, 799 limma::plotMD(fit, status=status[, i], coef=i,
684 main=paste("MD Plot:", unmake.names(contrastData[i])), 800 main=paste("MD Plot:", unmake.names(con)),
685 hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1), 801 hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1),
686 xlab="Average Expression", ylab="logFC") 802 xlab="Average Expression", ylab="logFC")
687
688 abline(h=0, col="grey", lty=2) 803 abline(h=0, col="grey", lty=2)
689 804 linkName <- paste0("MDPlot_", con, ".pdf")
690 linkName <- paste0("MDPlot_", contrastData[i], ".pdf") 805 linkAddr <- paste0("mdplot_", con, ".pdf")
691 linkAddr <- paste0("mdplot_", contrastData[i], ".pdf")
692 linkData <- rbind(linkData, c(linkName, linkAddr)) 806 linkData <- rbind(linkData, c(linkName, linkAddr))
693 invisible(dev.off()) 807 invisible(dev.off())
694 808
695 # Plot Volcano 809 # Plot Volcano
696 pdf(volOutPdf[i]) 810 pdf(volOutPdf[i])
697 if (haveAnno) { 811 if (haveAnno) {
698 # labels must be in second column currently 812 # labels must be in second column currently
699 limma::volcanoplot(fit, coef=i, 813 labels <- fit$genes[, 2]
700 main=paste("Volcano Plot:", unmake.names(contrastData[i])),
701 highlight=opt$topgenes,
702 names=fit$genes[, 2])
703 } else { 814 } else {
704 limma::volcanoplot(fit, coef=i, 815 labels <- fit$genes$GeneID
705 main=paste("Volcano Plot:", unmake.names(contrastData[i])), 816 }
706 highlight=opt$topgenes, 817 limma::volcanoplot(fit, coef=i,
707 names=fit$genes$GeneID) 818 main=paste("Volcano Plot:", unmake.names(con)),
708 } 819 highlight=opt$topgenes,
709 linkName <- paste0("VolcanoPlot_", contrastData[i], ".pdf") 820 names=labels)
710 linkAddr <- paste0("volplot_", contrastData[i], ".pdf") 821 linkName <- paste0("VolcanoPlot_", con, ".pdf")
822 linkAddr <- paste0("volplot_", con, ".pdf")
711 linkData <- rbind(linkData, c(linkName, linkAddr)) 823 linkData <- rbind(linkData, c(linkName, linkAddr))
712 invisible(dev.off()) 824 invisible(dev.off())
713 825
714 # Plot Heatmap
715 topgenes <- rownames(top[1:opt$topgenes, ])
716 if (wantTrend) {
717 topexp <- plotData[topgenes, ]
718 } else {
719 topexp <- plotData$E[topgenes, ]
720 }
721
722 pdf(heatOutPdf[i])
723 par(cex.main=0.8)
724 if (haveAnno) {
725 # labels must be in second column currently
726 heatmap.2(topexp, scale="row",
727 main=paste("Heatmap:", unmake.names(contrastData[i])),
728 col="bluered", trace="none", density.info="none",
729 margin=c(8,6), lhei=c(2,10), dendrogram="column",
730 cexRow=0.7, cexCol=0.7, srtCol=45,
731 labRow=top[topgenes, 2])
732 } else {
733 heatmap.2(topexp, scale="row",
734 main=paste("Heatmap:", unmake.names(contrastData[i])),
735 col="bluered", trace="none", density.info="none",
736 margin=c(8,6), lhei=c(2,10), dendrogram="column",
737 cexRow=0.7, cexCol=0.7, srtCol=45)
738 }
739 linkName <- paste0("Heatmap_", contrastData[i], ".pdf")
740 linkAddr <- paste0("heatmap_", contrastData[i], ".pdf")
741 linkData <- rbind(linkData, c(linkName, linkAddr))
742 invisible(dev.off())
743
744 # PNG of MD and Volcano 826 # PNG of MD and Volcano
745 png(mdvolOutPng[i], width=1200, height=600) 827 png(mdvolOutPng[i], width=1000, height=500)
746 par(mfrow=c(1, 2), mar=c(5,4,2,2)+0.1, oma=c(0,0,3,0)) 828 par(mfrow=c(1, 2), mar=c(5,4,2,2)+0.1, oma=c(0,0,3,0))
829
830 # MD plot
747 limma::plotMD(fit, status=status[, i], coef=i, main="MD Plot", 831 limma::plotMD(fit, status=status[, i], coef=i, main="MD Plot",
748 hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1), 832 hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1),
749 xlab="Average Expression", ylab="logFC") 833 xlab="Average Expression", ylab="logFC")
750 abline(h=0, col="grey", lty=2) 834 abline(h=0, col="grey", lty=2)
751 835
752 # Volcano plots 836 # Volcano
753 if (haveAnno) { 837 if (haveAnno) {
754 # labels must be in second column currently 838 # labels must be in second column currently
755 limma::volcanoplot(fit, coef=i, main="Volcano Plot", 839 limma::volcanoplot(fit, coef=i, main="Volcano Plot",
756 highlight=opt$topgenes, 840 highlight=opt$topgenes,
757 names=fit$genes[, 2]) 841 names=fit$genes[, 2])
759 limma::volcanoplot(fit, coef=i, main="Volcano Plot", 843 limma::volcanoplot(fit, coef=i, main="Volcano Plot",
760 highlight=opt$topgenes, 844 highlight=opt$topgenes,
761 names=fit$genes$GeneID) 845 names=fit$genes$GeneID)
762 } 846 }
763 847
764 imgName <- paste0("MDVolPlot_", contrastData[i]) 848 imgName <- paste0("MDVolPlot_", con)
765 imgAddr <- paste0("mdvolplot_", contrastData[i], ".png") 849 imgAddr <- paste0("mdvolplot_", con, ".png")
766 imageData <- rbind(imageData, c(imgName, imgAddr)) 850 imageData <- rbind(imageData, c(imgName, imgAddr))
767 title(paste0("Contrast: ", unmake.names(contrastData[i])), outer=TRUE, cex.main=1.5) 851 title(paste0("Contrast: ", unmake.names(con)), outer=TRUE, cex.main=1.5)
768 invisible(dev.off()) 852 invisible(dev.off())
853
854 if ("h" %in% plots) {
855 # Plot Heatmap
856 topgenes <- rownames(top[1:opt$topgenes, ])
857 if (wantTrend) {
858 topexp <- plotData[topgenes, ]
859 } else {
860 topexp <- plotData$E[topgenes, ]
861 }
862 pdf(heatOutPdf[i])
863 mycol <- colorpanel(1000,"blue","white","red")
864 if (haveAnno) {
865 # labels must be in second column currently
866 labels <- top[topgenes, 2]
867 } else {
868 labels <- rownames(topexp)
869 }
870 heatmap.2(topexp, scale="row", Colv=FALSE, Rowv=FALSE, dendrogram="none",
871 main=paste("Contrast:", unmake.names(con), "\nTop", opt$topgenes, "genes by adj.P.Val"),
872 trace="none", density.info="none", lhei=c(2,10), margin=c(8, 6), labRow=labels, cexRow=0.7, srtCol=45,
873 col=mycol, ColSideColors=col.group)
874 linkName <- paste0("Heatmap_", con, ".pdf")
875 linkAddr <- paste0("heatmap_", con, ".pdf")
876 linkData <- rbind(linkData, c(linkName, linkAddr))
877 invisible(dev.off())
878 }
879
880 if ("s" %in% plots) {
881 # Plot Stripcharts of top genes
882 pdf(stripOutPdf[i], title=paste("Contrast:", unmake.names(con)))
883 par(mfrow = c(3,2), cex.main=0.8, cex.axis=0.8)
884 cols <- unique(col.group)
885
886 for (j in 1:length(topgenes)) {
887 lfc <- round(top[topgenes[j], "logFC"], 2)
888 pval <- round(top[topgenes[j], "adj.P.Val"], 5)
889 if (wantTrend) {
890 stripchart(plotData[topgenes[j], ] ~ factors[, 1], vertical=TRUE, las=2, pch=16, cex=0.8, cex.lab=0.8, col=cols,
891 method="jitter", ylab="Normalised log2 expression", main=paste0(labels[j], "\nlogFC=", lfc, ", adj.P.Val=", pval))
892 } else {
893 stripchart(plotData$E[topgenes[j], ] ~ factors[, 1], vertical=TRUE, las=2, pch=16, cex=0.8, cex.lab=0.8, col=cols,
894 method="jitter", ylab="Normalised log2 expression", main=paste0(labels[j], "\nlogFC=", lfc, ", adj.P.Val=", pval))
895 }
896 }
897 linkName <- paste0("Stripcharts_", con, ".pdf")
898 linkAddr <- paste0("stripcharts_", con, ".pdf")
899 linkData <- rbind(linkData, c(linkName, linkAddr))
900 invisible(dev.off())
901 }
769 } 902 }
770 sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount) 903 sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount)
771 row.names(sigDiff) <- contrastData 904 row.names(sigDiff) <- contrastData
772 905
773 # Save relevant items as rda object 906 # Save relevant items as rda object
774 if (wantRda) { 907 if (wantRda) {
775 print("Saving RData") 908 print("Saving RData")
776 if (wantWeight) { 909 if (wantWeight) {
777 save(counts, data, y, status, plotData, labels, factors, wts, fit, top, contrasts, design, 910 save(counts, data, y, status, plotData, labels, factors, wts, fit, top, contrastData, contrasts, design,
778 file=rdaOut, ascii=TRUE) 911 file=rdaOut, ascii=TRUE)
779 } else { 912 } else {
780 save(counts, data, y, status, plotData, labels, factors, fit, top, contrasts, design, 913 save(counts, data, y, status, plotData, labels, factors, fit, top, contrastData, contrasts, design,
781 file=rdaOut, ascii=TRUE) 914 file=rdaOut, ascii=TRUE)
782 } 915 }
783 linkData <- rbind(linkData, c((paste0(deMethod, "_analysis.RData")), (paste0(deMethod, "_analysis.RData")))) 916 linkData <- rbind(linkData, c((paste0(deMethod, "_analysis.RData")), (paste0(deMethod, "_analysis.RData"))))
784 } 917 }
785 918
803 cata("<body>\n") 936 cata("<body>\n")
804 cata("<h3>Limma Analysis Output:</h3>\n") 937 cata("<h3>Limma Analysis Output:</h3>\n")
805 cata("Links to PDF copies of plots are in 'Plots' section below <br />\n") 938 cata("Links to PDF copies of plots are in 'Plots' section below <br />\n")
806 939
807 for (i in 1:nrow(imageData)) { 940 for (i in 1:nrow(imageData)) {
808 if (grepl("density|box|mdvol", imageData$Link[i])) { 941 if (grepl("density|box|mds|mdvol", imageData$Link[i])) {
809 HtmlImage(imageData$Link[i], imageData$Label[i], width=1200) 942 HtmlImage(imageData$Link[i], imageData$Label[i], width=1000)
810 } else if (wantWeight) { 943 } else if (wantWeight) {
811 HtmlImage(imageData$Link[i], imageData$Label[i], width=1000) 944 HtmlImage(imageData$Link[i], imageData$Label[i], width=1000)
812 } else { 945 } else {
813 HtmlImage(imageData$Link[i], imageData$Label[i]) 946 HtmlImage(imageData$Link[i], imageData$Label[i])
814 } 947 }
832 cata("</tr>\n") 965 cata("</tr>\n")
833 } 966 }
834 cata("</table>") 967 cata("</table>")
835 968
836 cata("<h4>Plots:</h4>\n") 969 cata("<h4>Plots:</h4>\n")
970 #PDFs
837 for (i in 1:nrow(linkData)) { 971 for (i in 1:nrow(linkData)) {
838 if (grepl(".pdf", linkData$Link[i])) { 972 if (grepl("density|cpm|boxplot|mds|mdplots|voomplot|saplot", linkData$Link[i])) {
973 HtmlLink(linkData$Link[i], linkData$Label[i])
974 }
975 }
976
977 for (i in 1:nrow(linkData)) {
978 if (grepl("mdplot_", linkData$Link[i])) {
979 HtmlLink(linkData$Link[i], linkData$Label[i])
980 }
981 }
982
983 for (i in 1:nrow(linkData)) {
984 if (grepl("volplot", linkData$Link[i])) {
985 HtmlLink(linkData$Link[i], linkData$Label[i])
986 }
987 }
988
989 for (i in 1:nrow(linkData)) {
990 if (grepl("heatmap", linkData$Link[i])) {
991 HtmlLink(linkData$Link[i], linkData$Label[i])
992 }
993 }
994
995 for (i in 1:nrow(linkData)) {
996 if (grepl("stripcharts", linkData$Link[i])) {
839 HtmlLink(linkData$Link[i], linkData$Label[i]) 997 HtmlLink(linkData$Link[i], linkData$Label[i])
840 } 998 }
841 } 999 }
842 1000
843 cata("<h4>Tables:</h4>\n") 1001 cata("<h4>Tables:</h4>\n")