comparison edger.R @ 0:9bdff28ae1b1 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/edger commit eac022c9c6e51e661c1513306b9fefdad673487d
author iuc
date Tue, 07 Nov 2017 08:18:14 -0500
parents
children 2a16413ec60d
comparison
equal deleted inserted replaced
-1:000000000000 0:9bdff28ae1b1
1 # This tool takes in a matrix of feature counts as well as gene annotations and
2 # outputs a table of top expressions as well as various plots for differential
3 # expression analysis
4 #
5 # ARGS: htmlPath", "R", 1, "character" -Path to html file linking to other outputs
6 # outPath", "o", 1, "character" -Path to folder to write all output to
7 # filesPath", "j", 2, "character" -JSON list object if multiple files input
8 # matrixPath", "m", 2, "character" -Path to count matrix
9 # factFile", "f", 2, "character" -Path to factor information file
10 # factInput", "i", 2, "character" -String containing factors if manually input
11 # annoPath", "a", 2, "character" -Path to input containing gene annotations
12 # contrastData", "C", 1, "character" -String containing contrasts of interest
13 # cpmReq", "c", 2, "double" -Float specifying cpm requirement
14 # cntReq", "z", 2, "integer" -Integer specifying minimum total count requirement
15 # sampleReq", "s", 2, "integer" -Integer specifying cpm requirement
16 # normCounts", "x", 0, "logical" -String specifying if normalised counts should be output
17 # rdaOpt", "r", 0, "logical" -String specifying if RData should be output
18 # lfcReq", "l", 1, "double" -Float specifying the log-fold-change requirement
19 # pValReq", "p", 1, "double" -Float specifying the p-value requirement
20 # pAdjOpt", "d", 1, "character" -String specifying the p-value adjustment method
21 # normOpt", "n", 1, "character" -String specifying type of normalisation used
22 # robOpt", "b", 0, "logical" -String specifying if robust options should be used
23 # lrtOpt", "t", 0, "logical" -String specifying whether to perform LRT test instead
24 #
25 # OUT:
26 # MDS Plot
27 # BCV Plot
28 # QL Plot
29 # MD Plot
30 # Expression Table
31 # HTML file linking to the ouputs
32 # Optional:
33 # Normalised counts Table
34 # RData file
35 #
36 # Author: Shian Su - registertonysu@gmail.com - Jan 2014
37 # Modified by: Maria Doyle - Oct 2017 (some code taken from the DESeq2 wrapper)
38
39 # Record starting time
40 timeStart <- as.character(Sys.time())
41
42 # setup R error handling to go to stderr
43 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
44
45 # we need that to not crash galaxy with an UTF8 error on German LC settings.
46 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
47
48 # Load all required libraries
49 library(methods, quietly=TRUE, warn.conflicts=FALSE)
50 library(statmod, quietly=TRUE, warn.conflicts=FALSE)
51 library(splines, quietly=TRUE, warn.conflicts=FALSE)
52 library(edgeR, quietly=TRUE, warn.conflicts=FALSE)
53 library(limma, quietly=TRUE, warn.conflicts=FALSE)
54 library(scales, quietly=TRUE, warn.conflicts=FALSE)
55 library(getopt, quietly=TRUE, warn.conflicts=FALSE)
56
57 ################################################################################
58 ### Function Delcaration
59 ################################################################################
60 # Function to sanitise contrast equations so there are no whitespaces
61 # surrounding the arithmetic operators, leading or trailing whitespace
62 sanitiseEquation <- function(equation) {
63 equation <- gsub(" *[+] *", "+", equation)
64 equation <- gsub(" *[-] *", "-", equation)
65 equation <- gsub(" *[/] *", "/", equation)
66 equation <- gsub(" *[*] *", "*", equation)
67 equation <- gsub("^\\s+|\\s+$", "", equation)
68 return(equation)
69 }
70
71 # Function to sanitise group information
72 sanitiseGroups <- function(string) {
73 string <- gsub(" *[,] *", ",", string)
74 string <- gsub("^\\s+|\\s+$", "", string)
75 return(string)
76 }
77
78 # Function to change periods to whitespace in a string
79 unmake.names <- function(string) {
80 string <- gsub(".", " ", string, fixed=TRUE)
81 return(string)
82 }
83
84 # Generate output folder and paths
85 makeOut <- function(filename) {
86 return(paste0(opt$outPath, "/", filename))
87 }
88
89 # Generating design information
90 pasteListName <- function(string) {
91 return(paste0("factors$", string))
92 }
93
94 # Create cata function: default path set, default seperator empty and appending
95 # true by default (Ripped straight from the cat function with altered argument
96 # defaults)
97 cata <- function(..., file=opt$htmlPath, sep="", fill=FALSE, labels=NULL,
98 append=TRUE) {
99 if (is.character(file))
100 if (file == "")
101 file <- stdout()
102 else if (substring(file, 1L, 1L) == "|") {
103 file <- pipe(substring(file, 2L), "w")
104 on.exit(close(file))
105 }
106 else {
107 file <- file(file, ifelse(append, "a", "w"))
108 on.exit(close(file))
109 }
110 .Internal(cat(list(...), file, sep, fill, labels, append))
111 }
112
113 # Function to write code for html head and title
114 HtmlHead <- function(title) {
115 cata("<head>\n")
116 cata("<title>", title, "</title>\n")
117 cata("</head>\n")
118 }
119
120 # Function to write code for html links
121 HtmlLink <- function(address, label=address) {
122 cata("<a href=\"", address, "\" target=\"_blank\">", label, "</a><br />\n")
123 }
124
125 # Function to write code for html images
126 HtmlImage <- function(source, label=source, height=600, width=600) {
127 cata("<img src=\"", source, "\" alt=\"", label, "\" height=\"", height)
128 cata("\" width=\"", width, "\"/>\n")
129 }
130
131 # Function to write code for html list items
132 ListItem <- function(...) {
133 cata("<li>", ..., "</li>\n")
134 }
135
136 TableItem <- function(...) {
137 cata("<td>", ..., "</td>\n")
138 }
139
140 TableHeadItem <- function(...) {
141 cata("<th>", ..., "</th>\n")
142 }
143
144 ################################################################################
145 ### Input Processing
146 ################################################################################
147
148 # Collect arguments from command line
149 args <- commandArgs(trailingOnly=TRUE)
150
151 # Get options, using the spec as defined by the enclosed list.
152 # Read the options from the default: commandArgs(TRUE).
153 spec <- matrix(c(
154 "htmlPath", "R", 1, "character",
155 "outPath", "o", 1, "character",
156 "filesPath", "j", 2, "character",
157 "matrixPath", "m", 2, "character",
158 "factFile", "f", 2, "character",
159 "factInput", "i", 2, "character",
160 "annoPath", "a", 2, "character",
161 "contrastData", "C", 1, "character",
162 "cpmReq", "c", 1, "double",
163 "totReq", "y", 0, "logical",
164 "cntReq", "z", 1, "integer",
165 "sampleReq", "s", 1, "integer",
166 "normCounts", "x", 0, "logical",
167 "rdaOpt", "r", 0, "logical",
168 "lfcReq", "l", 1, "double",
169 "pValReq", "p", 1, "double",
170 "pAdjOpt", "d", 1, "character",
171 "normOpt", "n", 1, "character",
172 "robOpt", "b", 0, "logical",
173 "lrtOpt", "t", 0, "logical"),
174 byrow=TRUE, ncol=4)
175 opt <- getopt(spec)
176
177
178 if (is.null(opt$matrixPath) & is.null(opt$filesPath)) {
179 cat("A counts matrix (or a set of counts files) is required.\n")
180 q(status=1)
181 }
182
183 if (is.null(opt$cpmReq)) {
184 filtCPM <- FALSE
185 } else {
186 filtCPM <- TRUE
187 }
188
189 if (is.null(opt$cntReq) || is.null(opt$sampleReq)) {
190 filtSmpCount <- FALSE
191 } else {
192 filtSmpCount <- TRUE
193 }
194
195 if (is.null(opt$totReq)) {
196 filtTotCount <- FALSE
197 } else {
198 filtTotCount <- TRUE
199 }
200
201 if (is.null(opt$lrtOpt)) {
202 wantLRT <- FALSE
203 } else {
204 wantLRT <- TRUE
205 }
206
207 if (is.null(opt$rdaOpt)) {
208 wantRda <- FALSE
209 } else {
210 wantRda <- TRUE
211 }
212
213 if (is.null(opt$annoPath)) {
214 haveAnno <- FALSE
215 } else {
216 haveAnno <- TRUE
217 }
218
219 if (is.null(opt$normCounts)) {
220 wantNorm <- FALSE
221 } else {
222 wantNorm <- TRUE
223 }
224
225 if (is.null(opt$robOpt)) {
226 wantRobust <- FALSE
227 } else {
228 wantRobust <- TRUE
229 }
230
231
232 if (!is.null(opt$filesPath)) {
233 # Process the separate count files (adapted from DESeq2 wrapper)
234 library("rjson")
235 parser <- newJSONParser()
236 parser$addData(opt$filesPath)
237 factorList <- parser$getObject()
238 factors <- sapply(factorList, function(x) x[[1]])
239 filenamesIn <- unname(unlist(factorList[[1]][[2]]))
240 sampleTable <- data.frame(sample=basename(filenamesIn),
241 filename=filenamesIn,
242 row.names=filenamesIn,
243 stringsAsFactors=FALSE)
244 for (factor in factorList) {
245 factorName <- factor[[1]]
246 sampleTable[[factorName]] <- character(nrow(sampleTable))
247 lvls <- sapply(factor[[2]], function(x) names(x))
248 for (i in seq_along(factor[[2]])) {
249 files <- factor[[2]][[i]][[1]]
250 sampleTable[files,factorName] <- lvls[i]
251 }
252 sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls)
253 }
254 rownames(sampleTable) <- sampleTable$sample
255 rem <- c("sample","filename")
256 factors <- sampleTable[, !(names(sampleTable) %in% rem), drop=FALSE]
257
258 #read in count files and create single table
259 countfiles <- lapply(sampleTable$filename, function(x){read.delim(x, row.names=1)})
260 counts <- do.call("cbind", countfiles)
261
262 } else {
263 # Process the single count matrix
264 counts <- read.table(opt$matrixPath, header=TRUE, sep="\t", stringsAsFactors=FALSE)
265 row.names(counts) <- counts[, 1]
266 counts <- counts[ , -1]
267 countsRows <- nrow(counts)
268
269 # Process factors
270 if (is.null(opt$factInput)) {
271 factorData <- read.table(opt$factFile, header=TRUE, sep="\t")
272 factors <- factorData[, -1, drop=FALSE]
273 } else {
274 factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE))
275 factorData <- list()
276 for (fact in factors) {
277 newFact <- unlist(strsplit(fact, split="::"))
278 factorData <- rbind(factorData, newFact)
279 } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor.
280
281 # Set the row names to be the name of the factor and delete first row
282 row.names(factorData) <- factorData[, 1]
283 factorData <- factorData[, -1]
284 factorData <- sapply(factorData, sanitiseGroups)
285 factorData <- sapply(factorData, strsplit, split=",")
286 factorData <- sapply(factorData, make.names)
287 # Transform factor data into data frame of R factor objects
288 factors <- data.frame(factorData)
289 }
290 }
291
292 # if annotation file provided
293 if (haveAnno) {
294 geneanno <- read.table(opt$annoPath, header=TRUE, sep="\t", stringsAsFactors=FALSE)
295 }
296
297 #Create output directory
298 dir.create(opt$outPath, showWarnings=FALSE)
299
300 # Split up contrasts separated by comma into a vector then sanitise
301 contrastData <- unlist(strsplit(opt$contrastData, split=","))
302 contrastData <- sanitiseEquation(contrastData)
303 contrastData <- gsub(" ", ".", contrastData, fixed=TRUE)
304
305 bcvOutPdf <- makeOut("bcvplot.pdf")
306 bcvOutPng <- makeOut("bcvplot.png")
307 qlOutPdf <- makeOut("qlplot.pdf")
308 qlOutPng <- makeOut("qlplot.png")
309 mdsOutPdf <- makeOut("mdsplot.pdf")
310 mdsOutPng <- makeOut("mdsplot.png")
311 mdOutPdf <- character() # Initialise character vector
312 mdOutPng <- character()
313 topOut <- character()
314 for (i in 1:length(contrastData)) {
315 mdOutPdf[i] <- makeOut(paste0("mdplot_", contrastData[i], ".pdf"))
316 mdOutPng[i] <- makeOut(paste0("mdplot_", contrastData[i], ".png"))
317 topOut[i] <- makeOut(paste0("edgeR_", contrastData[i], ".tsv"))
318 } # Save output paths for each contrast as vectors
319 normOut <- makeOut("edgeR_normcounts.tsv")
320 rdaOut <- makeOut("edgeR_analysis.RData")
321 sessionOut <- makeOut("session_info.txt")
322
323 # Initialise data for html links and images, data frame with columns Label and
324 # Link
325 linkData <- data.frame(Label=character(), Link=character(), stringsAsFactors=FALSE)
326 imageData <- data.frame(Label=character(), Link=character(), stringsAsFactors=FALSE)
327
328 # Initialise vectors for storage of up/down/neutral regulated counts
329 upCount <- numeric()
330 downCount <- numeric()
331 flatCount <- numeric()
332
333 ################################################################################
334 ### Data Processing
335 ################################################################################
336
337 # Extract counts and annotation data
338 data <- list()
339 data$counts <- counts
340 if (haveAnno) {
341 data$genes <- geneanno
342 } else {
343 data$genes <- data.frame(GeneID=row.names(counts))
344 }
345
346 # If filter crieteria set, filter out genes that do not have a required cpm/counts in a required number of
347 # samples. Default is no filtering
348 preFilterCount <- nrow(data$counts)
349
350 if (filtCPM || filtSmpCount || filtTotCount) {
351
352 if (filtTotCount) {
353 keep <- rowSums(data$counts) >= opt$cntReq
354 } else if (filtSmpCount) {
355 keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq
356 } else if (filtCPM) {
357 keep <- rowSums(cpm(data$counts) >= opt$cpmReq) >= opt$sampleReq
358 }
359
360 data$counts <- data$counts[keep, ]
361 data$genes <- data$genes[keep, , drop=FALSE]
362 }
363
364 postFilterCount <- nrow(data$counts)
365 filteredCount <- preFilterCount-postFilterCount
366
367 # Creating naming data
368 samplenames <- colnames(data$counts)
369 sampleanno <- data.frame("sampleID"=samplenames, factors)
370
371
372 # Generating the DGEList object "data"
373 data$samples <- sampleanno
374 data$samples$lib.size <- colSums(data$counts)
375 data$samples$norm.factors <- 1
376 row.names(data$samples) <- colnames(data$counts)
377 data <- new("DGEList", data)
378
379 # Name rows of factors according to their sample
380 row.names(factors) <- names(data$counts)
381 factorList <- sapply(names(factors), pasteListName)
382
383 formula <- "~0"
384 for (i in 1:length(factorList)) {
385 formula <- paste(formula, factorList[i], sep="+")
386 }
387
388 formula <- formula(formula)
389 design <- model.matrix(formula)
390
391 for (i in 1:length(factorList)) {
392 colnames(design) <- gsub(factorList[i], "", colnames(design), fixed=TRUE)
393 }
394
395 # Calculating normalising factor, estimating dispersion
396 data <- calcNormFactors(data, method=opt$normOpt)
397
398 if (wantRobust) {
399 data <- estimateDisp(data, design=design, robust=TRUE)
400 } else {
401 data <- estimateDisp(data, design=design)
402 }
403
404 # Generate contrasts information
405 contrasts <- makeContrasts(contrasts=contrastData, levels=design)
406
407 ################################################################################
408 ### Data Output
409 ################################################################################
410
411 # Plot MDS
412 labels <- names(counts)
413 png(mdsOutPng, width=600, height=600)
414 # Currently only using a single factor
415 plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main="MDS Plot")
416 imageData[1, ] <- c("MDS Plot", "mdsplot.png")
417 invisible(dev.off())
418
419 pdf(mdsOutPdf)
420 plotMDS(data, labels=labels, cex=0.5)
421 linkData[1, ] <- c("MDS Plot.pdf", "mdsplot.pdf")
422 invisible(dev.off())
423
424 # BCV Plot
425 png(bcvOutPng, width=600, height=600)
426 plotBCV(data, main="BCV Plot")
427 imgName <- "BCV Plot"
428 imgAddr <- "bcvplot.png"
429 imageData <- rbind(imageData, c(imgName, imgAddr))
430 invisible(dev.off())
431
432 pdf(bcvOutPdf)
433 plotBCV(data, main="BCV Plot")
434 linkName <- paste0("BCV Plot.pdf")
435 linkAddr <- paste0("bcvplot.pdf")
436 linkData <- rbind(linkData, c(linkName, linkAddr))
437 invisible(dev.off())
438
439 # Generate fit
440 if (wantLRT) {
441
442 fit <- glmFit(data, design)
443
444 } else {
445
446 if (wantRobust) {
447 fit <- glmQLFit(data, design, robust=TRUE)
448 } else {
449 fit <- glmQLFit(data, design)
450 }
451
452 # Plot QL dispersions
453 png(qlOutPng, width=600, height=600)
454 plotQLDisp(fit, main="QL Plot")
455 imgName <- "QL Plot"
456 imgAddr <- "qlplot.png"
457 imageData <- rbind(imageData, c(imgName, imgAddr))
458 invisible(dev.off())
459
460 pdf(qlOutPdf)
461 plotQLDisp(fit, main="QL Plot")
462 linkName <- "QL Plot.pdf"
463 linkAddr <- "qlplot.pdf"
464 linkData <- rbind(linkData, c(linkName, linkAddr))
465 invisible(dev.off())
466 }
467
468 # Save normalised counts (log2cpm)
469 if (wantNorm) {
470 normalisedCounts <- cpm(data, normalized.lib.sizes=TRUE, log=TRUE)
471 normalisedCounts <- data.frame(data$genes, normalisedCounts)
472 write.table (normalisedCounts, file=normOut, row.names=FALSE, sep="\t")
473 linkData <- rbind(linkData, c("edgeR_normcounts.tsv", "edgeR_normcounts.tsv"))
474 }
475
476
477 for (i in 1:length(contrastData)) {
478 if (wantLRT) {
479 res <- glmLRT(fit, contrast=contrasts[, i])
480 } else {
481 res <- glmQLFTest(fit, contrast=contrasts[, i])
482 }
483
484 status = decideTestsDGE(res, adjust.method=opt$pAdjOpt, p.value=opt$pValReq,
485 lfc=opt$lfcReq)
486 sumStatus <- summary(status)
487
488 # Collect counts for differential expression
489 upCount[i] <- sumStatus["1", ]
490 downCount[i] <- sumStatus["-1", ]
491 flatCount[i] <- sumStatus["0", ]
492
493 # Write top expressions table
494 top <- topTags(res, n=Inf, sort.by="PValue")
495 write.table(top, file=topOut[i], row.names=FALSE, sep="\t")
496
497 linkName <- paste0("edgeR_", contrastData[i], ".tsv")
498 linkAddr <- paste0("edgeR_", contrastData[i], ".tsv")
499 linkData <- rbind(linkData, c(linkName, linkAddr))
500
501 # Plot MD (log ratios vs mean difference) using limma package
502 pdf(mdOutPdf[i])
503 limma::plotMD(res, status=status,
504 main=paste("MD Plot:", unmake.names(contrastData[i])),
505 col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"),
506 xlab="Average Expression", ylab="logFC")
507
508 abline(h=0, col="grey", lty=2)
509
510 linkName <- paste0("MD Plot_", contrastData[i], ".pdf")
511 linkAddr <- paste0("mdplot_", contrastData[i], ".pdf")
512 linkData <- rbind(linkData, c(linkName, linkAddr))
513 invisible(dev.off())
514
515 png(mdOutPng[i], height=600, width=600)
516 limma::plotMD(res, status=status,
517 main=paste("MD Plot:", unmake.names(contrastData[i])),
518 col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"),
519 xlab="Average Expression", ylab="logFC")
520
521 abline(h=0, col="grey", lty=2)
522
523 imgName <- paste0("MD Plot_", contrastData[i], ".png")
524 imgAddr <- paste0("mdplot_", contrastData[i], ".png")
525 imageData <- rbind(imageData, c(imgName, imgAddr))
526 invisible(dev.off())
527 }
528 sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount)
529 row.names(sigDiff) <- contrastData
530
531 # Save relevant items as rda object
532 if (wantRda) {
533 if (wantNorm) {
534 save(counts, data, status, normalisedCounts, labels, factors, fit, res, top, contrasts, design,
535 file=rdaOut, ascii=TRUE)
536 } else {
537 save(counts, data, status, labels, factors, fit, res, top, contrasts, design,
538 file=rdaOut, ascii=TRUE)
539 }
540 linkData <- rbind(linkData, c("edgeR_analysis.RData", "edgeR_analysis.RData"))
541 }
542
543 # Record session info
544 writeLines(capture.output(sessionInfo()), sessionOut)
545 linkData <- rbind(linkData, c("Session Info", "session_info.txt"))
546
547 # Record ending time and calculate total run time
548 timeEnd <- as.character(Sys.time())
549 timeTaken <- capture.output(round(difftime(timeEnd, timeStart), digits=3))
550 timeTaken <- gsub("Time difference of ", "", timeTaken, fixed=TRUE)
551
552 ################################################################################
553 ### HTML Generation
554 ################################################################################
555
556 # Clear file
557 cat("", file=opt$htmlPath)
558
559 cata("<html>\n")
560
561 cata("<body>\n")
562 cata("<h3>edgeR Analysis Output:</h3>\n")
563 cata("Links to PDF copies of plots are in 'Plots' section below.<br />\n")
564
565 HtmlImage(imageData$Link[1], imageData$Label[1])
566
567 for (i in 2:nrow(imageData)) {
568 HtmlImage(imageData$Link[i], imageData$Label[i])
569 }
570
571 cata("<h4>Differential Expression Counts:</h4>\n")
572
573 cata("<table border=\"1\" cellpadding=\"4\">\n")
574 cata("<tr>\n")
575 TableItem()
576 for (i in colnames(sigDiff)) {
577 TableHeadItem(i)
578 }
579 cata("</tr>\n")
580 for (i in 1:nrow(sigDiff)) {
581 cata("<tr>\n")
582 TableHeadItem(unmake.names(row.names(sigDiff)[i]))
583 for (j in 1:ncol(sigDiff)) {
584 TableItem(as.character(sigDiff[i, j]))
585 }
586 cata("</tr>\n")
587 }
588 cata("</table>")
589
590 cata("<h4>Plots:</h4>\n")
591 for (i in 1:nrow(linkData)) {
592 if (grepl(".pdf", linkData$Link[i])) {
593 HtmlLink(linkData$Link[i], linkData$Label[i])
594 }
595 }
596
597 cata("<h4>Tables:</h4>\n")
598 for (i in 1:nrow(linkData)) {
599 if (grepl(".tsv", linkData$Link[i])) {
600 HtmlLink(linkData$Link[i], linkData$Label[i])
601 }
602 }
603
604 if (wantRda) {
605 cata("<h4>R Data Objects:</h4>\n")
606 for (i in 1:nrow(linkData)) {
607 if (grepl(".RData", linkData$Link[i])) {
608 HtmlLink(linkData$Link[i], linkData$Label[i])
609 }
610 }
611 }
612
613 cata("<p>Alt-click links to download file.</p>\n")
614 cata("<p>Click floppy disc icon associated history item to download ")
615 cata("all files.</p>\n")
616 cata("<p>.tsv files can be viewed in Excel or any spreadsheet program.</p>\n")
617
618 cata("<h4>Additional Information</h4>\n")
619 cata("<ul>\n")
620
621 if (filtCPM || filtSmpCount || filtTotCount) {
622 if (filtCPM) {
623 tempStr <- paste("Genes without more than", opt$cmpReq,
624 "CPM in at least", opt$sampleReq, "samples are insignificant",
625 "and filtered out.")
626 } else if (filtSmpCount) {
627 tempStr <- paste("Genes without more than", opt$cntReq,
628 "counts in at least", opt$sampleReq, "samples are insignificant",
629 "and filtered out.")
630 } else if (filtTotCount) {
631 tempStr <- paste("Genes without more than", opt$cntReq,
632 "counts, after summing counts for all samples, are insignificant",
633 "and filtered out.")
634 }
635
636 ListItem(tempStr)
637 filterProp <- round(filteredCount/preFilterCount*100, digits=2)
638 tempStr <- paste0(filteredCount, " of ", preFilterCount," (", filterProp,
639 "%) genes were filtered out for low expression.")
640 ListItem(tempStr)
641 }
642 ListItem(opt$normOpt, " was the method used to normalise library sizes.")
643 if (wantLRT) {
644 ListItem("The edgeR likelihood ratio test was used.")
645 } else {
646 if (wantRobust) {
647 ListItem("The edgeR quasi-likelihood test was used with robust settings (robust=TRUE with estimateDisp and glmQLFit).")
648 } else {
649 ListItem("The edgeR quasi-likelihood test was used.")
650 }
651 }
652 if (opt$pAdjOpt!="none") {
653 if (opt$pAdjOpt=="BH" || opt$pAdjOpt=="BY") {
654 tempStr <- paste0("MD-Plot highlighted genes are significant at FDR ",
655 "of ", opt$pValReq," and exhibit log2-fold-change of at ",
656 "least ", opt$lfcReq, ".")
657 ListItem(tempStr)
658 } else if (opt$pAdjOpt=="holm") {
659 tempStr <- paste0("MD-Plot highlighted genes are significant at adjusted ",
660 "p-value of ", opt$pValReq," by the Holm(1979) ",
661 "method, and exhibit log2-fold-change of at least ",
662 opt$lfcReq, ".")
663 ListItem(tempStr)
664 }
665 } else {
666 tempStr <- paste0("MD-Plot highlighted genes are significant at p-value ",
667 "of ", opt$pValReq," and exhibit log2-fold-change of at ",
668 "least ", opt$lfcReq, ".")
669 ListItem(tempStr)
670 }
671 cata("</ul>\n")
672
673 cata("<h4>Summary of experimental data:</h4>\n")
674
675 cata("<p>*CHECK THAT SAMPLES ARE ASSOCIATED WITH CORRECT GROUP(S)*</p>\n")
676
677 cata("<table border=\"1\" cellpadding=\"3\">\n")
678 cata("<tr>\n")
679 TableHeadItem("SampleID")
680 TableHeadItem(names(factors)[1], " (Primary Factor)")
681
682 if (ncol(factors) > 1) {
683 for (i in names(factors)[2:length(names(factors))]) {
684 TableHeadItem(i)
685 }
686 cata("</tr>\n")
687 }
688
689 for (i in 1:nrow(factors)) {
690 cata("<tr>\n")
691 TableHeadItem(row.names(factors)[i])
692 for (j in 1:ncol(factors)) {
693 TableItem(as.character(unmake.names(factors[i, j])))
694 }
695 cata("</tr>\n")
696 }
697 cata("</table>")
698
699 for (i in 1:nrow(linkData)) {
700 if (grepl("session_info", linkData$Link[i])) {
701 HtmlLink(linkData$Link[i], linkData$Label[i])
702 }
703 }
704
705 cata("<table border=\"0\">\n")
706 cata("<tr>\n")
707 TableItem("Task started at:"); TableItem(timeStart)
708 cata("</tr>\n")
709 cata("<tr>\n")
710 TableItem("Task ended at:"); TableItem(timeEnd)
711 cata("</tr>\n")
712 cata("<tr>\n")
713 TableItem("Task run time:"); TableItem(timeTaken)
714 cata("<tr>\n")
715 cata("</table>\n")
716
717 cata("</body>\n")
718 cata("</html>")