comparison test-data/out_rscript.txt @ 4:a61a6e62e91f draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/limma_voom commit 6a458881c0819b75e55e64b3f494679d43bb9ee8
author iuc
date Sun, 29 Apr 2018 17:36:42 -0400
parents
children d8a55b5f0de0
comparison
equal deleted inserted replaced
3:38aab66ae5cb 4:a61a6e62e91f
1 # This tool takes in a matrix of feature counts as well as gene annotations and
2 # outputs a table of top expressions as well as various plots for differential
3 # expression analysis
4 #
5 # ARGS: htmlPath", "R", 1, "character" -Path to html file linking to other outputs
6 # outPath", "o", 1, "character" -Path to folder to write all output to
7 # filesPath", "j", 2, "character" -JSON list object if multiple files input
8 # matrixPath", "m", 2, "character" -Path to count matrix
9 # factFile", "f", 2, "character" -Path to factor information file
10 # factInput", "i", 2, "character" -String containing factors if manually input
11 # annoPath", "a", 2, "character" -Path to input containing gene annotations
12 # contrastData", "C", 1, "character" -String containing contrasts of interest
13 # cpmReq", "c", 2, "double" -Float specifying cpm requirement
14 # cntReq", "z", 2, "integer" -Integer specifying minimum total count requirement
15 # sampleReq", "s", 2, "integer" -Integer specifying cpm requirement
16 # normCounts", "x", 0, "logical" -String specifying if normalised counts should be output
17 # rdaOpt", "r", 0, "logical" -String specifying if RData should be output
18 # lfcReq", "l", 1, "double" -Float specifying the log-fold-change requirement
19 # pValReq", "p", 1, "double" -Float specifying the p-value requirement
20 # pAdjOpt", "d", 1, "character" -String specifying the p-value adjustment method
21 # normOpt", "n", 1, "character" -String specifying type of normalisation used
22 # robOpt", "b", 0, "logical" -String specifying if robust options should be used
23 # trend", "t", 1, "double" -Float for prior.count if limma-trend is used instead of voom
24 # weightOpt", "w", 0, "logical" -String specifying if voomWithQualityWeights should be used
25 #
26 # OUT:
27 # MDS Plot
28 # Voom/SA plot
29 # MD Plot
30 # Expression Table
31 # HTML file linking to the ouputs
32 # Optional:
33 # Normalised counts Table
34 # RData file
35 #
36 #
37 # Author: Shian Su - registertonysu@gmail.com - Jan 2014
38 # Modified by: Maria Doyle - Jun 2017, Jan 2018
39
40 # Record starting time
41 timeStart <- as.character(Sys.time())
42
43 # Load all required libraries
44 library(methods, quietly=TRUE, warn.conflicts=FALSE)
45 library(statmod, quietly=TRUE, warn.conflicts=FALSE)
46 library(splines, quietly=TRUE, warn.conflicts=FALSE)
47 library(edgeR, quietly=TRUE, warn.conflicts=FALSE)
48 library(limma, quietly=TRUE, warn.conflicts=FALSE)
49 library(scales, quietly=TRUE, warn.conflicts=FALSE)
50 library(getopt, quietly=TRUE, warn.conflicts=FALSE)
51
52 if (packageVersion("limma") < "3.20.1") {
53 stop("Please update 'limma' to version >= 3.20.1 to run this tool")
54 }
55
56 ################################################################################
57 ### Function Delcaration
58 ################################################################################
59 # Function to sanitise contrast equations so there are no whitespaces
60 # surrounding the arithmetic operators, leading or trailing whitespace
61 sanitiseEquation <- function(equation) {
62 equation <- gsub(" *[+] *", "+", equation)
63 equation <- gsub(" *[-] *", "-", equation)
64 equation <- gsub(" *[/] *", "/", equation)
65 equation <- gsub(" *[*] *", "*", equation)
66 equation <- gsub("^\\s+|\\s+$", "", equation)
67 return(equation)
68 }
69
70 # Function to sanitise group information
71 sanitiseGroups <- function(string) {
72 string <- gsub(" *[,] *", ",", string)
73 string <- gsub("^\\s+|\\s+$", "", string)
74 return(string)
75 }
76
77 # Function to change periods to whitespace in a string
78 unmake.names <- function(string) {
79 string <- gsub(".", " ", string, fixed=TRUE)
80 return(string)
81 }
82
83 # Generate output folder and paths
84 makeOut <- function(filename) {
85 return(paste0(opt$outPath, "/", filename))
86 }
87
88 # Generating design information
89 pasteListName <- function(string) {
90 return(paste0("factors$", string))
91 }
92
93 # Create cata function: default path set, default seperator empty and appending
94 # true by default (Ripped straight from the cat function with altered argument
95 # defaults)
96 cata <- function(..., file = opt$htmlPath, sep = "", fill = FALSE, labels = NULL,
97 append = TRUE) {
98 if (is.character(file))
99 if (file == "")
100 file <- stdout()
101 else if (substring(file, 1L, 1L) == "|") {
102 file <- pipe(substring(file, 2L), "w")
103 on.exit(close(file))
104 }
105 else {
106 file <- file(file, ifelse(append, "a", "w"))
107 on.exit(close(file))
108 }
109 .Internal(cat(list(...), file, sep, fill, labels, append))
110 }
111
112 # Function to write code for html head and title
113 HtmlHead <- function(title) {
114 cata("<head>\n")
115 cata("<title>", title, "</title>\n")
116 cata("</head>\n")
117 }
118
119 # Function to write code for html links
120 HtmlLink <- function(address, label=address) {
121 cata("<a href=\"", address, "\" target=\"_blank\">", label, "</a><br />\n")
122 }
123
124 # Function to write code for html images
125 HtmlImage <- function(source, label=source, height=600, width=600) {
126 cata("<img src=\"", source, "\" alt=\"", label, "\" height=\"", height)
127 cata("\" width=\"", width, "\"/>\n")
128 }
129
130 # Function to write code for html list items
131 ListItem <- function(...) {
132 cata("<li>", ..., "</li>\n")
133 }
134
135 TableItem <- function(...) {
136 cata("<td>", ..., "</td>\n")
137 }
138
139 TableHeadItem <- function(...) {
140 cata("<th>", ..., "</th>\n")
141 }
142
143 ################################################################################
144 ### Input Processing
145 ################################################################################
146
147 # Collect arguments from command line
148 args <- commandArgs(trailingOnly=TRUE)
149
150 # Get options, using the spec as defined by the enclosed list.
151 # Read the options from the default: commandArgs(TRUE).
152 spec <- matrix(c(
153 "htmlPath", "R", 1, "character",
154 "outPath", "o", 1, "character",
155 "filesPath", "j", 2, "character",
156 "matrixPath", "m", 2, "character",
157 "factFile", "f", 2, "character",
158 "factInput", "i", 2, "character",
159 "annoPath", "a", 2, "character",
160 "contrastData", "C", 1, "character",
161 "cpmReq", "c", 1, "double",
162 "totReq", "y", 0, "logical",
163 "cntReq", "z", 1, "integer",
164 "sampleReq", "s", 1, "integer",
165 "normCounts", "x", 0, "logical",
166 "rdaOpt", "r", 0, "logical",
167 "lfcReq", "l", 1, "double",
168 "pValReq", "p", 1, "double",
169 "pAdjOpt", "d", 1, "character",
170 "normOpt", "n", 1, "character",
171 "robOpt", "b", 0, "logical",
172 "trend", "t", 1, "double",
173 "weightOpt", "w", 0, "logical"),
174 byrow=TRUE, ncol=4)
175 opt <- getopt(spec)
176
177
178 if (is.null(opt$matrixPath) & is.null(opt$filesPath)) {
179 cat("A counts matrix (or a set of counts files) is required.\n")
180 q(status=1)
181 }
182
183 if (is.null(opt$cpmReq)) {
184 filtCPM <- FALSE
185 } else {
186 filtCPM <- TRUE
187 }
188
189 if (is.null(opt$cntReq) || is.null(opt$sampleReq)) {
190 filtSmpCount <- FALSE
191 } else {
192 filtSmpCount <- TRUE
193 }
194
195 if (is.null(opt$totReq)) {
196 filtTotCount <- FALSE
197 } else {
198 filtTotCount <- TRUE
199 }
200
201 if (is.null(opt$rdaOpt)) {
202 wantRda <- FALSE
203 } else {
204 wantRda <- TRUE
205 }
206
207 if (is.null(opt$annoPath)) {
208 haveAnno <- FALSE
209 } else {
210 haveAnno <- TRUE
211 }
212
213 if (is.null(opt$normCounts)) {
214 wantNorm <- FALSE
215 } else {
216 wantNorm <- TRUE
217 }
218
219 if (is.null(opt$robOpt)) {
220 wantRobust <- FALSE
221 } else {
222 wantRobust <- TRUE
223 }
224
225 if (is.null(opt$weightOpt)) {
226 wantWeight <- FALSE
227 } else {
228 wantWeight <- TRUE
229 }
230
231 if (is.null(opt$trend)) {
232 wantTrend <- FALSE
233 deMethod <- "limma-voom"
234 } else {
235 wantTrend <- TRUE
236 deMethod <- "limma-trend"
237 priorCount <- opt$trend
238 }
239
240
241 if (!is.null(opt$filesPath)) {
242 # Process the separate count files (adapted from DESeq2 wrapper)
243 library("rjson")
244 parser <- newJSONParser()
245 parser$addData(opt$filesPath)
246 factorList <- parser$getObject()
247 factors <- sapply(factorList, function(x) x[[1]])
248 filenamesIn <- unname(unlist(factorList[[1]][[2]]))
249 sampleTable <- data.frame(sample=basename(filenamesIn),
250 filename=filenamesIn,
251 row.names=filenamesIn,
252 stringsAsFactors=FALSE)
253 for (factor in factorList) {
254 factorName <- factor[[1]]
255 sampleTable[[factorName]] <- character(nrow(sampleTable))
256 lvls <- sapply(factor[[2]], function(x) names(x))
257 for (i in seq_along(factor[[2]])) {
258 files <- factor[[2]][[i]][[1]]
259 sampleTable[files,factorName] <- lvls[i]
260 }
261 sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls)
262 }
263 rownames(sampleTable) <- sampleTable$sample
264 rem <- c("sample","filename")
265 factors <- sampleTable[, !(names(sampleTable) %in% rem), drop=FALSE]
266
267 #read in count files and create single table
268 countfiles <- lapply(sampleTable$filename, function(x){read.delim(x, row.names=1)})
269 counts <- do.call("cbind", countfiles)
270
271 } else {
272 # Process the single count matrix
273 counts <- read.table(opt$matrixPath, header=TRUE, sep="\t", stringsAsFactors=FALSE)
274 row.names(counts) <- counts[, 1]
275 counts <- counts[ , -1]
276 countsRows <- nrow(counts)
277
278 # Process factors
279 if (is.null(opt$factInput)) {
280 factorData <- read.table(opt$factFile, header=TRUE, sep="\t")
281 factors <- factorData[, -1, drop=FALSE]
282 } else {
283 factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE))
284 factorData <- list()
285 for (fact in factors) {
286 newFact <- unlist(strsplit(fact, split="::"))
287 factorData <- rbind(factorData, newFact)
288 } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor.
289
290 # Set the row names to be the name of the factor and delete first row
291 row.names(factorData) <- factorData[, 1]
292 factorData <- factorData[, -1]
293 factorData <- sapply(factorData, sanitiseGroups)
294 factorData <- sapply(factorData, strsplit, split=",")
295 factorData <- sapply(factorData, make.names)
296 # Transform factor data into data frame of R factor objects
297 factors <- data.frame(factorData)
298 }
299 }
300
301 # if annotation file provided
302 if (haveAnno) {
303 geneanno <- read.table(opt$annoPath, header=TRUE, sep="\t", stringsAsFactors=FALSE)
304 }
305
306 #Create output directory
307 dir.create(opt$outPath, showWarnings=FALSE)
308
309 # Split up contrasts seperated by comma into a vector then sanitise
310 contrastData <- unlist(strsplit(opt$contrastData, split=","))
311 contrastData <- sanitiseEquation(contrastData)
312 contrastData <- gsub(" ", ".", contrastData, fixed=TRUE)
313
314
315 mdsOutPdf <- makeOut("mdsplot_nonorm.pdf")
316 mdsOutPng <- makeOut("mdsplot_nonorm.png")
317 nmdsOutPdf <- makeOut("mdsplot.pdf")
318 nmdsOutPng <- makeOut("mdsplot.png")
319 maOutPdf <- character() # Initialise character vector
320 maOutPng <- character()
321 topOut <- character()
322 for (i in 1:length(contrastData)) {
323 maOutPdf[i] <- makeOut(paste0("maplot_", contrastData[i], ".pdf"))
324 maOutPng[i] <- makeOut(paste0("maplot_", contrastData[i], ".png"))
325 topOut[i] <- makeOut(paste0(deMethod, "_", contrastData[i], ".tsv"))
326 }
327 normOut <- makeOut(paste0(deMethod, "_normcounts.tsv"))
328 rdaOut <- makeOut(paste0(deMethod, "_analysis.RData"))
329 sessionOut <- makeOut("session_info.txt")
330
331 # Initialise data for html links and images, data frame with columns Label and
332 # Link
333 linkData <- data.frame(Label=character(), Link=character(),
334 stringsAsFactors=FALSE)
335 imageData <- data.frame(Label=character(), Link=character(),
336 stringsAsFactors=FALSE)
337
338 # Initialise vectors for storage of up/down/neutral regulated counts
339 upCount <- numeric()
340 downCount <- numeric()
341 flatCount <- numeric()
342
343 ################################################################################
344 ### Data Processing
345 ################################################################################
346
347 # Extract counts and annotation data
348 print("Extracting counts")
349 data <- list()
350 data$counts <- counts
351 if (haveAnno) {
352 # order annotation by genes in counts (assumes gene ids are in 1st column of geneanno)
353 annoord <- geneanno[match(row.names(counts), geneanno[,1]), ]
354 data$genes <- annoord
355 } else {
356 data$genes <- data.frame(GeneID=row.names(counts))
357 }
358
359 # If filter crieteria set, filter out genes that do not have a required cpm/counts in a required number of
360 # samples. Default is no filtering
361 preFilterCount <- nrow(data$counts)
362
363 if (filtCPM || filtSmpCount || filtTotCount) {
364
365 if (filtTotCount) {
366 keep <- rowSums(data$counts) >= opt$cntReq
367 } else if (filtSmpCount) {
368 keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq
369 } else if (filtCPM) {
370 keep <- rowSums(cpm(data$counts) >= opt$cpmReq) >= opt$sampleReq
371 }
372
373 data$counts <- data$counts[keep, ]
374 data$genes <- data$genes[keep, , drop=FALSE]
375 }
376
377 postFilterCount <- nrow(data$counts)
378 filteredCount <- preFilterCount-postFilterCount
379
380 # Creating naming data
381 samplenames <- colnames(data$counts)
382 sampleanno <- data.frame("sampleID"=samplenames, factors)
383
384
385 # Generating the DGEList object "data"
386 print("Generating DGEList object")
387 data$samples <- sampleanno
388 data$samples$lib.size <- colSums(data$counts)
389 data$samples$norm.factors <- 1
390 row.names(data$samples) <- colnames(data$counts)
391 data <- new("DGEList", data)
392
393 print("Generating Design")
394 # Name rows of factors according to their sample
395 row.names(factors) <- names(data$counts)
396 factorList <- sapply(names(factors), pasteListName)
397 formula <- "~0"
398 for (i in 1:length(factorList)) {
399 formula <- paste(formula,factorList[i], sep="+")
400 }
401 formula <- formula(formula)
402 design <- model.matrix(formula)
403 for (i in 1:length(factorList)) {
404 colnames(design) <- gsub(factorList[i], "", colnames(design), fixed=TRUE)
405 }
406
407 # Calculating normalising factors
408 print("Calculating Normalisation Factors")
409 data <- calcNormFactors(data, method=opt$normOpt)
410
411 # Generate contrasts information
412 print("Generating Contrasts")
413 contrasts <- makeContrasts(contrasts=contrastData, levels=design)
414
415 ################################################################################
416 ### Data Output
417 ################################################################################
418 # Plot MDS
419 print("Generating MDS plot")
420 labels <- names(counts)
421 png(mdsOutPng, width=600, height=600)
422 # Currently only using a single factor
423 plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main="MDS Plot (unnormalised)")
424 imageData[1, ] <- c("MDS Plot (unnormalised)", "mdsplot_nonorm.png")
425 invisible(dev.off())
426
427 pdf(mdsOutPdf)
428 plotMDS(data, labels=labels, cex=0.5)
429 linkData[1, ] <- c("MDS Plot (unnormalised).pdf", "mdsplot_nonorm.pdf")
430 invisible(dev.off())
431
432 if (wantTrend) {
433 # limma-trend approach
434 logCPM <- cpm(data, log=TRUE, prior.count=opt$trend)
435 fit <- lmFit(logCPM, design)
436 fit$genes <- data$genes
437 fit <- contrasts.fit(fit, contrasts)
438 if (wantRobust) {
439 fit <- eBayes(fit, trend=TRUE, robust=TRUE)
440 } else {
441 fit <- eBayes(fit, trend=TRUE, robust=FALSE)
442 }
443 # plot fit with plotSA
444 saOutPng <- makeOut("saplot.png")
445 saOutPdf <- makeOut("saplot.pdf")
446
447 png(saOutPng, width=600, height=600)
448 plotSA(fit, main="SA Plot")
449 imgName <- "SA Plot.png"
450 imgAddr <- "saplot.png"
451 imageData <- rbind(imageData, c(imgName, imgAddr))
452 invisible(dev.off())
453
454 pdf(saOutPdf, width=14)
455 plotSA(fit, main="SA Plot")
456 linkName <- paste0("SA Plot.pdf")
457 linkAddr <- paste0("saplot.pdf")
458 linkData <- rbind(linkData, c(linkName, linkAddr))
459 invisible(dev.off())
460
461 plotData <- logCPM
462
463 # Save normalised counts (log2cpm)
464 if (wantNorm) {
465 write.table(logCPM, file=normOut, row.names=TRUE, sep="\t", quote=FALSE)
466 linkData <- rbind(linkData, c((paste0(deMethod, "_", "normcounts.tsv")), (paste0(deMethod, "_", "normcounts.tsv"))))
467 }
468 } else {
469 # limma-voom approach
470 voomOutPdf <- makeOut("voomplot.pdf")
471 voomOutPng <- makeOut("voomplot.png")
472
473 if (wantWeight) {
474 # Creating voom data object and plot
475 png(voomOutPng, width=1000, height=600)
476 vData <- voomWithQualityWeights(data, design=design, plot=TRUE)
477 imgName <- "Voom Plot.png"
478 imgAddr <- "voomplot.png"
479 imageData <- rbind(imageData, c(imgName, imgAddr))
480 invisible(dev.off())
481
482 pdf(voomOutPdf, width=14)
483 vData <- voomWithQualityWeights(data, design=design, plot=TRUE)
484 linkName <- paste0("Voom Plot.pdf")
485 linkAddr <- paste0("voomplot.pdf")
486 linkData <- rbind(linkData, c(linkName, linkAddr))
487 invisible(dev.off())
488
489 # Generating fit data and top table with weights
490 wts <- vData$weights
491 voomFit <- lmFit(vData, design, weights=wts)
492
493 } else {
494 # Creating voom data object and plot
495 png(voomOutPng, width=600, height=600)
496 vData <- voom(data, design=design, plot=TRUE)
497 imgName <- "Voom Plot"
498 imgAddr <- "voomplot.png"
499 imageData <- rbind(imageData, c(imgName, imgAddr))
500 invisible(dev.off())
501
502 pdf(voomOutPdf)
503 vData <- voom(data, design=design, plot=TRUE)
504 linkName <- paste0("Voom Plot.pdf")
505 linkAddr <- paste0("voomplot.pdf")
506 linkData <- rbind(linkData, c(linkName, linkAddr))
507 invisible(dev.off())
508
509 # Generate voom fit
510 voomFit <- lmFit(vData, design)
511 }
512
513 # Save normalised counts (log2cpm)
514 if (wantNorm) {
515 norm_counts <- data.frame(vData$genes, vData$E)
516 write.table(norm_counts, file=normOut, row.names=FALSE, sep="\t", quote=FALSE)
517 linkData <- rbind(linkData, c((paste0(deMethod, "_", "normcounts.tsv")), (paste0(deMethod, "_", "normcounts.tsv"))))
518 }
519
520 # Fit linear model and estimate dispersion with eBayes
521 voomFit <- contrasts.fit(voomFit, contrasts)
522 if (wantRobust) {
523 fit <- eBayes(voomFit, robust=TRUE)
524 } else {
525 fit <- eBayes(voomFit, robust=FALSE)
526 }
527 plotData <- vData
528 }
529
530 print("Generating normalised MDS plot")
531 png(nmdsOutPng, width=600, height=600)
532 # Currently only using a single factor
533 plotMDS(plotData, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main="MDS Plot (normalised)")
534 imgName <- "MDS Plot (normalised)"
535 imgAddr <- "mdsplot.png"
536 imageData <- rbind(imageData, c(imgName, imgAddr))
537 invisible(dev.off())
538
539 pdf(nmdsOutPdf)
540 plotMDS(plotData, labels=labels, cex=0.5)
541 linkName <- paste0("MDS Plot (normalised).pdf")
542 linkAddr <- paste0("mdsplot.pdf")
543 linkData <- rbind(linkData, c(linkName, linkAddr))
544 invisible(dev.off())
545
546
547 print("Generating DE results")
548 status = decideTests(fit, adjust.method=opt$pAdjOpt, p.value=opt$pValReq,
549 lfc=opt$lfcReq)
550 sumStatus <- summary(status)
551
552 for (i in 1:length(contrastData)) {
553 # Collect counts for differential expression
554 upCount[i] <- sumStatus["Up", i]
555 downCount[i] <- sumStatus["Down", i]
556 flatCount[i] <- sumStatus["NotSig", i]
557
558 # Write top expressions table
559 top <- topTable(fit, coef=i, number=Inf, sort.by="P")
560 write.table(top, file=topOut[i], row.names=FALSE, sep="\t", quote=FALSE)
561
562 linkName <- paste0(deMethod, "_", contrastData[i], ".tsv")
563 linkAddr <- paste0(deMethod, "_", contrastData[i], ".tsv")
564 linkData <- rbind(linkData, c(linkName, linkAddr))
565
566 # Plot MA (log ratios vs mean average) using limma package on weighted
567 pdf(maOutPdf[i])
568 limma::plotMD(fit, status=status, coef=i,
569 main=paste("MA Plot:", unmake.names(contrastData[i])),
570 col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"),
571 xlab="Average Expression", ylab="logFC")
572
573 abline(h=0, col="grey", lty=2)
574
575 linkName <- paste0("MA Plot_", contrastData[i], " (.pdf)")
576 linkAddr <- paste0("maplot_", contrastData[i], ".pdf")
577 linkData <- rbind(linkData, c(linkName, linkAddr))
578 invisible(dev.off())
579
580 png(maOutPng[i], height=600, width=600)
581 limma::plotMD(fit, status=status, coef=i,
582 main=paste("MA Plot:", unmake.names(contrastData[i])),
583 col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"),
584 xlab="Average Expression", ylab="logFC")
585
586 abline(h=0, col="grey", lty=2)
587
588 imgName <- paste0("MA Plot_", contrastData[i])
589 imgAddr <- paste0("maplot_", contrastData[i], ".png")
590 imageData <- rbind(imageData, c(imgName, imgAddr))
591 invisible(dev.off())
592 }
593 sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount)
594 row.names(sigDiff) <- contrastData
595
596 # Save relevant items as rda object
597 if (wantRda) {
598 print("Saving RData")
599 if (wantWeight) {
600 save(data, status, plotData, labels, factors, wts, fit, top, contrasts,
601 design,
602 file=rdaOut, ascii=TRUE)
603 } else {
604 save(data, status, plotData, labels, factors, fit, top, contrasts, design,
605 file=rdaOut, ascii=TRUE)
606 }
607 linkData <- rbind(linkData, c((paste0(deMethod, "_analysis.RData")), (paste0(deMethod, "_analysis.RData"))))
608 }
609
610 # Record session info
611 writeLines(capture.output(sessionInfo()), sessionOut)
612 linkData <- rbind(linkData, c("Session Info", "session_info.txt"))
613
614 # Record ending time and calculate total run time
615 timeEnd <- as.character(Sys.time())
616 timeTaken <- capture.output(round(difftime(timeEnd,timeStart), digits=3))
617 timeTaken <- gsub("Time difference of ", "", timeTaken, fixed=TRUE)
618 ################################################################################
619 ### HTML Generation
620 ################################################################################
621
622 # Clear file
623 cat("", file=opt$htmlPath)
624
625 cata("<html>\n")
626
627 cata("<body>\n")
628 cata("<h3>Limma Analysis Output:</h3>\n")
629 cata("Links to PDF copies of plots are in 'Plots' section below />\n")
630 if (wantWeight) {
631 HtmlImage(imageData$Link[1], imageData$Label[1], width=1000)
632 } else {
633 HtmlImage(imageData$Link[1], imageData$Label[1])
634 }
635
636 for (i in 2:nrow(imageData)) {
637 HtmlImage(imageData$Link[i], imageData$Label[i])
638 }
639
640 cata("<h4>Differential Expression Counts:</h4>\n")
641
642 cata("<table border=\"1\" cellpadding=\"4\">\n")
643 cata("<tr>\n")
644 TableItem()
645 for (i in colnames(sigDiff)) {
646 TableHeadItem(i)
647 }
648 cata("</tr>\n")
649 for (i in 1:nrow(sigDiff)) {
650 cata("<tr>\n")
651 TableHeadItem(unmake.names(row.names(sigDiff)[i]))
652 for (j in 1:ncol(sigDiff)) {
653 TableItem(as.character(sigDiff[i, j]))
654 }
655 cata("</tr>\n")
656 }
657 cata("</table>")
658
659 cata("<h4>Plots:</h4>\n")
660 for (i in 1:nrow(linkData)) {
661 if (grepl(".pdf", linkData$Link[i])) {
662 HtmlLink(linkData$Link[i], linkData$Label[i])
663 }
664 }
665
666 cata("<h4>Tables:</h4>\n")
667 for (i in 1:nrow(linkData)) {
668 if (grepl(".tsv", linkData$Link[i])) {
669 HtmlLink(linkData$Link[i], linkData$Label[i])
670 }
671 }
672
673 if (wantRda) {
674 cata("<h4>R Data Object:</h4>\n")
675 for (i in 1:nrow(linkData)) {
676 if (grepl(".RData", linkData$Link[i])) {
677 HtmlLink(linkData$Link[i], linkData$Label[i])
678 }
679 }
680 }
681
682 cata("<p>Alt-click links to download file.</p>\n")
683 cata("<p>Click floppy disc icon associated history item to download ")
684 cata("all files.</p>\n")
685 cata("<p>.tsv files can be viewed in Excel or any spreadsheet program.</p>\n")
686
687 cata("<h4>Additional Information</h4>\n")
688 cata("<ul>\n")
689
690 if (filtCPM || filtSmpCount || filtTotCount) {
691 if (filtCPM) {
692 tempStr <- paste("Genes without more than", opt$cmpReq,
693 "CPM in at least", opt$sampleReq, "samples are insignificant",
694 "and filtered out.")
695 } else if (filtSmpCount) {
696 tempStr <- paste("Genes without more than", opt$cntReq,
697 "counts in at least", opt$sampleReq, "samples are insignificant",
698 "and filtered out.")
699 } else if (filtTotCount) {
700 tempStr <- paste("Genes without more than", opt$cntReq,
701 "counts, after summing counts for all samples, are insignificant",
702 "and filtered out.")
703 }
704
705 ListItem(tempStr)
706 filterProp <- round(filteredCount/preFilterCount*100, digits=2)
707 tempStr <- paste0(filteredCount, " of ", preFilterCount," (", filterProp,
708 "%) genes were filtered out for low expression.")
709 ListItem(tempStr)
710 }
711 ListItem(opt$normOpt, " was the method used to normalise library sizes.")
712 if (wantTrend) {
713 ListItem("The limma-trend method was used.")
714 } else {
715 ListItem("The limma-voom method was used.")
716 }
717 if (wantWeight) {
718 ListItem("Weights were applied to samples.")
719 } else {
720 ListItem("Weights were not applied to samples.")
721 }
722 if (wantRobust) {
723 ListItem("eBayes was used with robust settings (robust=TRUE).")
724 }
725 if (opt$pAdjOpt!="none") {
726 if (opt$pAdjOpt=="BH" || opt$pAdjOpt=="BY") {
727 tempStr <- paste0("MA-Plot highlighted genes are significant at FDR ",
728 "of ", opt$pValReq," and exhibit log2-fold-change of at ",
729 "least ", opt$lfcReq, ".")
730 ListItem(tempStr)
731 } else if (opt$pAdjOpt=="holm") {
732 tempStr <- paste0("MA-Plot highlighted genes are significant at adjusted ",
733 "p-value of ", opt$pValReq," by the Holm(1979) ",
734 "method, and exhibit log2-fold-change of at least ",
735 opt$lfcReq, ".")
736 ListItem(tempStr)
737 }
738 } else {
739 tempStr <- paste0("MA-Plot highlighted genes are significant at p-value ",
740 "of ", opt$pValReq," and exhibit log2-fold-change of at ",
741 "least ", opt$lfcReq, ".")
742 ListItem(tempStr)
743 }
744 cata("</ul>\n")
745
746 cata("<h4>Summary of experimental data:</h4>\n")
747
748 cata("<p>*CHECK THAT SAMPLES ARE ASSOCIATED WITH CORRECT GROUP(S)*</p>\n")
749
750 cata("<table border=\"1\" cellpadding=\"3\">\n")
751 cata("<tr>\n")
752 TableHeadItem("SampleID")
753 TableHeadItem(names(factors)[1]," (Primary Factor)")
754
755 if (ncol(factors) > 1) {
756 for (i in names(factors)[2:length(names(factors))]) {
757 TableHeadItem(i)
758 }
759 cata("</tr>\n")
760 }
761
762 for (i in 1:nrow(factors)) {
763 cata("<tr>\n")
764 TableHeadItem(row.names(factors)[i])
765 for (j in 1:ncol(factors)) {
766 TableItem(as.character(unmake.names(factors[i, j])))
767 }
768 cata("</tr>\n")
769 }
770 cata("</table>")
771
772 cit <- character()
773 link <- character()
774 link[1] <- paste0("<a href=\"",
775 "http://www.bioconductor.org/packages/release/bioc/",
776 "vignettes/limma/inst/doc/usersguide.pdf",
777 "\">", "limma User's Guide", "</a>.")
778
779 link[2] <- paste0("<a href=\"",
780 "http://www.bioconductor.org/packages/release/bioc/",
781 "vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf",
782 "\">", "edgeR User's Guide", "</a>")
783
784 cit[1] <- paste("Please cite the following paper for this tool:")
785
786 cit[2] <- paste("Liu R, Holik AZ, Su S, Jansz N, Chen K, Leong HS, Blewitt ME,",
787 "Asselin-Labat ML, Smyth GK, Ritchie ME (2015). Why weight? ",
788 "Modelling sample and observational level variability improves power ",
789 "in RNA-seq analyses. Nucleic Acids Research, 43(15), e97.")
790
791 cit[3] <- paste("Please cite the paper below for the limma software itself.",
792 "Please also try to cite the appropriate methodology articles",
793 "that describe the statistical methods implemented in limma,",
794 "depending on which limma functions you are using. The",
795 "methodology articles are listed in Section 2.1 of the",
796 link[1],
797 "Cite no. 3 only if sample weights were used.")
798 cit[4] <- paste("Smyth GK (2005). Limma: linear models for microarray data.",
799 "In: 'Bioinformatics and Computational Biology Solutions using",
800 "R and Bioconductor'. R. Gentleman, V. Carey, S. doit,.",
801 "Irizarry, W. Huber (eds), Springer, New York, pages 397-420.")
802 cit[5] <- paste("Please cite the first paper for the software itself and the",
803 "other papers for the various original statistical methods",
804 "implemented in edgeR. See Section 1.2 in the", link[2],
805 "for more detail.")
806 cit[6] <- paste("Robinson MD, McCarthy DJ and Smyth GK (2010). edgeR: a",
807 "Bioconductor package for differential expression analysis",
808 "of digital gene expression data. Bioinformatics 26, 139-140")
809 cit[7] <- paste("Robinson MD and Smyth GK (2007). Moderated statistical tests",
810 "for assessing differences in tag abundance. Bioinformatics",
811 "23, 2881-2887")
812 cit[8] <- paste("Robinson MD and Smyth GK (2008). Small-sample estimation of",
813 "negative binomial dispersion, with applications to SAGE data.",
814 "Biostatistics, 9, 321-332")
815 cit[9] <- paste("McCarthy DJ, Chen Y and Smyth GK (2012). Differential",
816 "expression analysis of multifactor RNA-Seq experiments with",
817 "respect to biological variation. Nucleic Acids Research 40,",
818 "4288-4297")
819 cit[10] <- paste("Law CW, Chen Y, Shi W, and Smyth GK (2014). Voom:",
820 "precision weights unlock linear model analysis tools for",
821 "RNA-seq read counts. Genome Biology 15, R29.")
822 cit[11] <- paste("Ritchie ME, Diyagama D, Neilson J, van Laar R,",
823 "Dobrovic A, Holloway A and Smyth GK (2006).",
824 "Empirical array quality weights for microarray data.",
825 "BMC Bioinformatics 7, Article 261.")
826 cata("<h3>Citations</h3>\n")
827 cata(cit[1], "\n")
828 cata("<br>\n")
829 cata(cit[2], "\n")
830
831 cata("<h4>limma</h4>\n")
832 cata(cit[3], "\n")
833 cata("<ol>\n")
834 ListItem(cit[4])
835 ListItem(cit[10])
836 ListItem(cit[11])
837 cata("</ol>\n")
838
839 cata("<h4>edgeR</h4>\n")
840 cata(cit[5], "\n")
841 cata("<ol>\n")
842 ListItem(cit[6])
843 ListItem(cit[7])
844 ListItem(cit[8])
845 ListItem(cit[9])
846 cata("</ol>\n")
847
848 cata("<p>Please report problems or suggestions to: su.s@wehi.edu.au</p>\n")
849
850 for (i in 1:nrow(linkData)) {
851 if (grepl("session_info", linkData$Link[i])) {
852 HtmlLink(linkData$Link[i], linkData$Label[i])
853 }
854 }
855
856 cata("<table border=\"0\">\n")
857 cata("<tr>\n")
858 TableItem("Task started at:"); TableItem(timeStart)
859 cata("</tr>\n")
860 cata("<tr>\n")
861 TableItem("Task ended at:"); TableItem(timeEnd)
862 cata("</tr>\n")
863 cata("<tr>\n")
864 TableItem("Task run time:"); TableItem(timeTaken)
865 cata("<tr>\n")
866 cata("</table>\n")
867
868 cata("</body>\n")
869 cata("</html>")