2
|
1 # Record starting time
|
|
2 timeStart <- as.character(Sys.time())
|
|
3
|
|
4 # Loading and checking required packages
|
|
5 library(methods, quietly=TRUE, warn.conflicts=FALSE)
|
|
6 library(statmod, quietly=TRUE, warn.conflicts=FALSE)
|
|
7 library(splines, quietly=TRUE, warn.conflicts=FALSE)
|
|
8 library(edgeR, quietly=TRUE, warn.conflicts=FALSE)
|
|
9 library(limma, quietly=TRUE, warn.conflicts=FALSE)
|
|
10
|
|
11 if (packageVersion("edgeR") < "3.5.23") {
|
|
12 message("Please update 'edgeR' to version >= 3.5.23 to run this script")
|
|
13 }
|
|
14
|
|
15 ################################################################################
|
|
16 ### Function declarations
|
|
17 ################################################################################
|
|
18
|
|
19 # Function to sanitise contrast equations so there are no whitespaces
|
|
20 # surrounding the arithmetic operators, leading or trailing whitespace
|
|
21 sanitiseEquation <- function(equation) {
|
|
22 equation <- gsub(" *[+] *", "+", equation)
|
|
23 equation <- gsub(" *[-] *", "-", equation)
|
|
24 equation <- gsub(" *[/] *", "/", equation)
|
|
25 equation <- gsub(" *[*] *", "*", equation)
|
|
26 equation <- gsub("^\\s+|\\s+$", "", equation)
|
|
27 return(equation)
|
|
28 }
|
|
29
|
|
30 # Function to sanitise group information
|
|
31 sanitiseGroups <- function(string) {
|
|
32 string <- gsub(" *[,] *", ",", string)
|
|
33 string <- gsub("^\\s+|\\s+$", "", string)
|
|
34 return(string)
|
|
35 }
|
|
36
|
|
37 # Function to change periods to whitespace in a string
|
|
38 unmake.names <- function(string) {
|
|
39 string <- gsub(".", " ", string, fixed=TRUE)
|
|
40 return(string)
|
|
41 }
|
|
42
|
|
43 # Function has string input and generates an output path string
|
|
44 makeOut <- function(filename) {
|
|
45 return(paste0(folderPath, "/", filename))
|
|
46 }
|
|
47
|
|
48 # Function has string input and generates both a pdf and png output strings
|
|
49 imgOut <- function(filename) {
|
|
50 assign(paste0(filename, "Png"), makeOut(paste0(filename,".png")),
|
|
51 envir = .GlobalEnv)
|
|
52 assign(paste0(filename, "Pdf"), makeOut(paste0(filename,".pdf")),
|
|
53 envir = .GlobalEnv)
|
|
54 }
|
|
55
|
|
56 # Create cat function default path set, default seperator empty and appending
|
|
57 # true by default (Ripped straight from the cat function with altered argument
|
|
58 # defaults)
|
|
59 cata <- function(..., file = htmlPath, sep = "", fill = FALSE, labels = NULL,
|
|
60 append = TRUE) {
|
|
61 if (is.character(file))
|
|
62 if (file == "")
|
|
63 file <- stdout()
|
|
64 else if (substring(file, 1L, 1L) == "|") {
|
|
65 file <- pipe(substring(file, 2L), "w")
|
|
66 on.exit(close(file))
|
|
67 }
|
|
68 else {
|
|
69 file <- file(file, ifelse(append, "a", "w"))
|
|
70 on.exit(close(file))
|
|
71 }
|
|
72 .Internal(cat(list(...), file, sep, fill, labels, append))
|
|
73 }
|
|
74
|
|
75 # Function to write code for html head and title
|
|
76 HtmlHead <- function(title) {
|
|
77 cata("<head>\n")
|
|
78 cata("<title>", title, "</title>\n")
|
|
79 cata("</head>\n")
|
|
80 }
|
|
81
|
|
82 # Function to write code for html links
|
|
83 HtmlLink <- function(address, label=address) {
|
|
84 cata("<a href=\"", address, "\" target=\"_blank\">", label, "</a><br />\n")
|
|
85 }
|
|
86
|
|
87 # Function to write code for html images
|
|
88 HtmlImage <- function(source, label=source, height=600, width=600) {
|
|
89 cata("<img src=\"", source, "\" alt=\"", label, "\" height=\"", height)
|
|
90 cata("\" width=\"", width, "\"/>\n")
|
|
91 }
|
|
92
|
|
93 # Function to write code for html list items
|
|
94 ListItem <- function(...) {
|
|
95 cata("<li>", ..., "</li>\n")
|
|
96 }
|
|
97
|
|
98 TableItem <- function(...) {
|
|
99 cata("<td>", ..., "</td>\n")
|
|
100 }
|
|
101
|
|
102 TableHeadItem <- function(...) {
|
|
103 cata("<th>", ..., "</th>\n")
|
|
104 }
|
|
105 ################################################################################
|
|
106 ### Input Processing
|
|
107 ################################################################################
|
|
108
|
|
109 # Grabbing arguments from command line
|
|
110 argv <- commandArgs(TRUE)
|
|
111
|
|
112 # Remove fastq file paths after collecting from argument vector
|
|
113 inputType <- as.character(argv[1])
|
|
114 if (inputType=="fastq") {
|
|
115 fastqPath <- as.character(gsub("fastq::", "", argv[grepl("fastq::", argv)],
|
|
116 fixed=TRUE))
|
|
117 argv <- argv[!grepl("fastq::", argv, fixed=TRUE)]
|
|
118 hairpinPath <- as.character(argv[2])
|
|
119 samplePath <- as.character(argv[3])
|
|
120 barStart <- as.numeric(argv[4])
|
|
121 barEnd <- as.numeric(argv[5])
|
|
122 hpStart <- as.numeric(argv[6])
|
|
123 hpEnd <- as.numeric(argv[7])
|
|
124 } else if (inputType=="counts") {
|
|
125 countPath <- as.character(argv[2])
|
|
126 annoPath <- as.character(argv[3])
|
|
127 samplePath <- as.character(argv[4])
|
|
128 }
|
|
129
|
|
130 cpmReq <- as.numeric(argv[8])
|
|
131 sampleReq <- as.numeric(argv[9])
|
|
132 fdrThresh <- as.numeric(argv[10])
|
|
133 lfcThresh <- as.numeric(argv[11])
|
|
134 workMode <- as.character(argv[12])
|
|
135 htmlPath <- as.character(argv[13])
|
|
136 folderPath <- as.character(argv[14])
|
|
137 if (workMode=="classic") {
|
|
138 pairData <- character()
|
|
139 pairData[2] <- as.character(argv[15])
|
|
140 pairData[1] <- as.character(argv[16])
|
|
141 } else if (workMode=="glm") {
|
|
142 contrastData <- as.character(argv[15])
|
|
143 roastOpt <- as.character(argv[16])
|
|
144 hairpinReq <- as.numeric(argv[17])
|
|
145 selectOpt <- as.character(argv[18])
|
|
146 selectVals <- as.character(argv[19])
|
|
147 }
|
|
148
|
|
149 # Read in inputs
|
|
150 if (inputType=="fastq") {
|
|
151 samples <- read.table(samplePath, header=TRUE, sep="\t")
|
|
152 hairpins <- read.table(hairpinPath, header=TRUE, sep="\t")
|
|
153 } else if (inputType=="counts") {
|
|
154 samples <- read.table(samplePath, header=TRUE, sep="\t")
|
|
155 counts <- read.table(countPath, header=TRUE, sep="\t")
|
|
156 anno <- read.table(annoPath, header=TRUE, sep="\t")
|
|
157 }
|
|
158 ###################### Check inputs for correctness ############################
|
|
159 samples$ID <- make.names(samples$ID)
|
|
160
|
|
161 if (!any(grepl("group", names(samples)))) {
|
|
162 stop("'group' column not specified in sample annotation file")
|
|
163 } # Check if grouping variable has been specified
|
|
164
|
|
165 if (any(table(samples$ID)>1)){
|
|
166 tab <- table(samples$ID)
|
|
167 offenders <- paste(names(tab[tab>1]), collapse=", ")
|
|
168 offenders <- unmake.names(offenders)
|
|
169 stop("ID column of sample annotation must have unique values, values ",
|
|
170 offenders, " are repeated")
|
|
171 } # Check that IDs in sample annotation are unique
|
|
172
|
|
173 if (any(is.na(match(samples$ID, colnames(counts))))) {
|
|
174 stop("not all samples have groups specified")
|
|
175 } # Check that a group has be specifed for each sample
|
|
176
|
|
177 if (inputType=="fastq") {
|
|
178
|
|
179 if (any(table(hairpins$ID)>1)){
|
|
180 tab <- table(hairpins$ID)
|
|
181 offenders <- paste(names(tab[tab>1]), collapse=", ")
|
|
182 stop("ID column of hairpin annotation must have unique values, values ",
|
|
183 offenders, " are repeated")
|
|
184 } # Check that IDs in hairpin annotation are unique
|
|
185
|
|
186 } else if (inputType=="counts") {
|
|
187
|
|
188 if (any(table(counts$ID)>1)){
|
|
189 tab <- table(counts$ID)
|
|
190 offenders <- paste(names(tab[tab>1]), collapse=", ")
|
|
191 stop("ID column of count table must have unique values, values ",
|
|
192 offenders, " are repeated")
|
|
193 } # Check that IDs in count table are unique
|
|
194 }
|
|
195 ################################################################################
|
|
196
|
|
197 # Process arguments
|
|
198 if (workMode=="glm") {
|
|
199 if (roastOpt=="yes") {
|
|
200 wantRoast <- TRUE
|
|
201 } else {
|
|
202 wantRoast <- FALSE
|
|
203 }
|
|
204 }
|
|
205
|
|
206 # Split up contrasts seperated by comma into a vector and replace spaces with
|
|
207 # periods
|
|
208 if (exists("contrastData")) {
|
|
209 contrastData <- unlist(strsplit(contrastData, split=","))
|
|
210 contrastData <- sanitiseEquation(contrastData)
|
|
211 contrastData <- gsub(" ", ".", contrastData, fixed=TRUE)
|
|
212 }
|
|
213
|
|
214 # Replace spaces with periods in pair data
|
|
215 if (exists("pairData")) {
|
|
216 pairData <- make.names(pairData)
|
|
217 }
|
|
218
|
|
219 # Generate output folder and paths
|
|
220 dir.create(folderPath)
|
|
221
|
|
222 # Generate links for outputs
|
|
223 imgOut("barHairpin")
|
|
224 imgOut("barIndex")
|
|
225 imgOut("mds")
|
|
226 imgOut("bcv")
|
|
227 if (workMode == "classic") {
|
|
228 smearPng <- makeOut(paste0("smear(", pairData[2], "-", pairData[1],").png"))
|
|
229 smearPdf <- makeOut(paste0("smear(", pairData[2], "-", pairData[1],").pdf"))
|
|
230 topOut <- makeOut(paste0("toptag(", pairData[2], "-", pairData[1],").tsv"))
|
|
231 } else if (workMode=="glm") {
|
|
232 smearPng <- character()
|
|
233 smearPdf <- character()
|
|
234 topOut <- character()
|
|
235 roastOut <- character()
|
|
236 barcodePng <- character()
|
|
237 barcodePdf <- character()
|
|
238 for (i in 1:length(contrastData)) {
|
|
239 smearPng[i] <- makeOut(paste0("smear(", contrastData[i], ").png"))
|
|
240 smearPdf[i] <- makeOut(paste0("smear(", contrastData[i], ").pdf"))
|
|
241 topOut[i] <- makeOut(paste0("toptag(", contrastData[i], ").tsv"))
|
|
242 roastOut[i] <- makeOut(paste0("roast(", contrastData[i], ").tsv"))
|
|
243 barcodePng[i] <- makeOut(paste0("barcode(", contrastData[i], ").png"))
|
|
244 barcodePdf[i] <- makeOut(paste0("barcode(", contrastData[i], ").pdf"))
|
|
245 }
|
|
246 }
|
|
247 # Initialise data for html links and images, table with the link label and
|
|
248 # link address
|
|
249 linkData <- data.frame(Label=character(), Link=character(),
|
|
250 stringsAsFactors=FALSE)
|
|
251 imageData <- data.frame(Label=character(), Link=character(),
|
|
252 stringsAsFactors=FALSE)
|
|
253 ################################################################################
|
|
254 ### Data Processing
|
|
255 ################################################################################
|
|
256
|
|
257 # Transform gene selection from string into index values for mroast
|
|
258 if (workMode=="glm") {
|
|
259 if (selectOpt=="rank") {
|
|
260 selectVals <- gsub(" ", "", selectVals, fixed=TRUE)
|
|
261 selectVals <- unlist(strsplit(selectVals, ","))
|
|
262
|
|
263 for (i in 1:length(selectVals)) {
|
|
264 if (grepl(":", selectVals[i], fixed=TRUE)) {
|
|
265 temp <- unlist(strsplit(selectVals[i], ":"))
|
|
266 selectVals <- selectVals[-i]
|
|
267 a <- as.numeric(temp[1])
|
|
268 b <- as.numeric(temp[2])
|
|
269 selectVals <- c(selectVals, a:b)
|
|
270 }
|
|
271 }
|
|
272 selectVals <- as.numeric(unique(selectVals))
|
|
273 } else {
|
|
274 selectVals <- gsub(" ", "", selectVals, fixed=TRUE)
|
|
275 selectVals <- unlist(strsplit(selectVals, " "))
|
|
276 }
|
|
277 }
|
|
278
|
|
279 if (inputType=="fastq") {
|
|
280 # Use EdgeR hairpin process and capture outputs
|
|
281 hpReadout <- capture.output(
|
|
282 data <- processHairpinReads(fastqPath, samplePath, hairpinPath,
|
|
283 hairpinStart=hpStart, hairpinEnd=hpEnd,
|
|
284 verbose=TRUE)
|
|
285 )
|
|
286
|
|
287 # Remove function output entries that show processing data or is empty
|
|
288 hpReadout <- hpReadout[hpReadout!=""]
|
|
289 hpReadout <- hpReadout[!grepl("Processing", hpReadout)]
|
|
290 hpReadout <- hpReadout[!grepl("in file", hpReadout)]
|
|
291 hpReadout <- gsub(" -- ", "", hpReadout, fixed=TRUE)
|
|
292
|
|
293
|
|
294 # Make the names of groups syntactically valid (replace spaces with periods)
|
|
295 data$samples$group <- make.names(data$samples$group)
|
|
296 } else {
|
|
297 # Process counts information, set ID column to be row names
|
|
298 rownames(counts) <- counts$ID
|
|
299 counts <- counts[ , !(colnames(counts)=="ID")]
|
|
300 countsRows <- nrow(counts)
|
|
301
|
|
302 # Process group information
|
|
303 factors <- samples$group[match(samples$ID, colnames(counts))]
|
|
304 annoRows <- nrow(anno)
|
|
305 anno <- anno[match(rownames(counts), anno$ID), ]
|
|
306 annoMatched <- sum(!is.na(anno$ID))
|
|
307
|
|
308 if (any(is.na(anno$ID))) {
|
|
309 warningStr <- paste("count table contained more hairpins than",
|
|
310 "specified in hairpin annotation file")
|
|
311 warning(warningStr)
|
|
312 }
|
|
313
|
|
314 # Filter out rows with zero counts
|
|
315 sel <- rowSums(counts)!=0
|
|
316 counts <- counts[sel, ]
|
|
317 anno <- anno[sel, ]
|
|
318
|
|
319 # Create DGEList
|
|
320 data <- DGEList(counts=counts, lib.size=colSums(counts),
|
|
321 norm.factors=rep(1,ncol(counts)), genes=anno, group=factors)
|
|
322
|
|
323 # Make the names of groups syntactically valid (replace spaces with periods)
|
|
324 data$samples$group <- make.names(data$samples$group)
|
|
325 }
|
|
326
|
|
327 # Filter hairpins with low counts
|
|
328 sel <- rowSums(cpm(data$counts) > cpmReq) >= sampleReq
|
|
329 data <- data[sel, ]
|
|
330
|
|
331 # Estimate dispersions
|
|
332 data <- estimateDisp(data)
|
|
333 commonBCV <- sqrt(data$common.dispersion)
|
|
334
|
|
335 ################################################################################
|
|
336 ### Output Processing
|
|
337 ################################################################################
|
|
338
|
|
339 # Plot number of hairpins that could be matched per sample
|
|
340 png(barIndexPng, width=600, height=600)
|
|
341 barplot(height<-colSums(data$counts), las=2, main="Counts per index",
|
|
342 cex.names=1.0, cex.axis=0.8, ylim=c(0, max(height)*1.2))
|
|
343 imageData[1, ] <- c("Counts per Index", "barIndex.png")
|
|
344 invisible(dev.off())
|
|
345
|
|
346 pdf(barIndexPdf)
|
|
347 barplot(height<-colSums(data$counts), las=2, main="Counts per index",
|
|
348 cex.names=1.0, cex.axis=0.8, ylim=c(0, max(height)*1.2))
|
|
349 linkData[1, ] <- c("Counts per Index Barplot (.pdf)", "barIndex.pdf")
|
|
350 invisible(dev.off())
|
|
351
|
|
352 # Plot per hairpin totals across all samples
|
|
353 png(barHairpinPng, width=600, height=600)
|
|
354 if (nrow(data$counts)<50) {
|
|
355 barplot(height<-rowSums(data$counts), las=2, main="Counts per hairpin",
|
|
356 cex.names=0.8, cex.axis=0.8, ylim=c(0, max(height)*1.2))
|
|
357 } else {
|
|
358 barplot(height<-rowSums(data$counts), las=2, main="Counts per hairpin",
|
|
359 cex.names=0.8, cex.axis=0.8, ylim=c(0, max(height)*1.2),
|
|
360 names.arg=FALSE)
|
|
361 }
|
|
362 imageData <- rbind(imageData, c("Counts per Hairpin", "barHairpin.png"))
|
|
363 invisible(dev.off())
|
|
364
|
|
365 pdf(barHairpinPdf)
|
|
366 if (nrow(data$counts)<50) {
|
|
367 barplot(height<-rowSums(data$counts), las=2, main="Counts per hairpin",
|
|
368 cex.names=0.8, cex.axis=0.8, ylim=c(0, max(height)*1.2))
|
|
369 } else {
|
|
370 barplot(height<-rowSums(data$counts), las=2, main="Counts per hairpin",
|
|
371 cex.names=0.8, cex.axis=0.8, ylim=c(0, max(height)*1.2),
|
|
372 names.arg=FALSE)
|
|
373 }
|
|
374 newEntry <- c("Counts per Hairpin Barplot (.pdf)", "barHairpin.pdf")
|
|
375 linkData <- rbind(linkData, newEntry)
|
|
376 invisible(dev.off())
|
|
377
|
|
378 # Make an MDS plot to visualise relationships between replicate samples
|
|
379 png(mdsPng, width=600, height=600)
|
|
380 plotMDS(data, labels=data$samples$group, col=as.numeric(data$samples$group),
|
|
381 main="MDS Plot")
|
|
382 imageData <- rbind(imageData, c("MDS Plot", "mds.png"))
|
|
383 invisible(dev.off())
|
|
384
|
|
385 pdf(mdsPdf)
|
|
386 plotMDS(data, labels=data$samples$group, col=as.numeric(data$samples$group),
|
|
387 main="MDS Plot")
|
|
388 newEntry <- c("MDS Plot (.pdf)", "mds.pdf")
|
|
389 linkData <- rbind(linkData, newEntry)
|
|
390 invisible(dev.off())
|
|
391
|
|
392 if (workMode=="classic") {
|
|
393 # Assess differential representation using classic exact testing methodology
|
|
394 # in edgeR
|
|
395 testData <- exactTest(data, pair=pairData)
|
|
396
|
|
397 top <- topTags(testData, n=Inf)
|
|
398 topIDs <- top$table[(top$table$FDR < fdrThresh) &
|
|
399 (abs(top$table$logFC) > lfcThresh), 1]
|
|
400 write.table(top, file=topOut, row.names=FALSE, sep="\t")
|
|
401 linkName <- paste0("Top Tags Table(", pairData[2], "-", pairData[1],
|
|
402 ") (.tsv)")
|
|
403 linkAddr <- paste0("toptag(", pairData[2], "-", pairData[1], ").tsv")
|
|
404 linkData <- rbind(linkData, c(linkName, linkAddr))
|
|
405
|
|
406 # Select hairpins with FDR < 0.05 to highlight on plot
|
|
407 png(smearPng, width=600, height=600)
|
|
408 plotTitle <- gsub(".", " ",
|
|
409 paste0("Smear Plot: ", pairData[2], "-", pairData[1]),
|
|
410 fixed = TRUE)
|
|
411 plotSmear(testData, de.tags=topIDs,
|
|
412 pch=20, cex=1.0, main=plotTitle)
|
|
413 abline(h = c(-1, 0, 1), col = c("dodgerblue", "yellow", "dodgerblue"), lty=2)
|
|
414 imgName <- paste0("Smear Plot(", pairData[2], "-", pairData[1], ")")
|
|
415 imgAddr <- paste0("smear(", pairData[2], "-", pairData[1],").png")
|
|
416 imageData <- rbind(imageData, c(imgName, imgAddr))
|
|
417 invisible(dev.off())
|
|
418
|
|
419 pdf(smearPdf)
|
|
420 plotTitle <- gsub(".", " ",
|
|
421 paste0("Smear Plot: ", pairData[2], "-", pairData[1]),
|
|
422 fixed = TRUE)
|
|
423 plotSmear(testData, de.tags=topIDs,
|
|
424 pch=20, cex=1.0, main=plotTitle)
|
|
425 abline(h = c(-1, 0, 1), col = c("dodgerblue", "yellow", "dodgerblue"), lty=2)
|
|
426 imgName <- paste0("Smear Plot(", pairData[2], "-", pairData[1], ") (.pdf)")
|
|
427 imgAddr <- paste0("smear(", pairData[2], "-", pairData[1], ").pdf")
|
|
428 linkData <- rbind(linkData, c(imgName, imgAddr))
|
|
429 invisible(dev.off())
|
|
430 } else if (workMode=="glm") {
|
|
431 # Generating design information
|
|
432 factors <- factor(data$sample$group)
|
|
433 design <- model.matrix(~0+factors)
|
|
434
|
|
435 colnames(design) <- gsub("factors", "", colnames(design), fixed=TRUE)
|
|
436
|
|
437 # Split up contrasts seperated by comma into a vector
|
|
438 contrastData <- unlist(strsplit(contrastData, split=","))
|
|
439 for (i in 1:length(contrastData)) {
|
|
440 # Generate contrasts information
|
|
441 contrasts <- makeContrasts(contrasts=contrastData[i], levels=design)
|
|
442
|
|
443 # Fit negative bionomial GLM
|
|
444 fit = glmFit(data, design)
|
|
445 # Carry out Likelihood ratio test
|
|
446 testData = glmLRT(fit, contrast=contrasts)
|
|
447
|
|
448 # Select hairpins with FDR < 0.05 to highlight on plot
|
|
449 top <- topTags(testData, n=Inf)
|
|
450 topIDs <- top$table[(top$table$FDR < fdrThresh) &
|
|
451 (abs(top$table$logFC) > lfcThresh), 1]
|
|
452 write.table(top, file=topOut[i], row.names=FALSE, sep="\t")
|
|
453
|
|
454 linkName <- paste0("Top Tags Table(", contrastData[i], ") (.tsv)")
|
|
455 linkAddr <- paste0("toptag(", contrastData[i], ").tsv")
|
|
456 linkData <- rbind(linkData, c(linkName, linkAddr))
|
|
457
|
|
458 # Make a plot of logFC versus logCPM
|
|
459 png(smearPng[i], height=600, width=600)
|
|
460 plotTitle <- paste("Smear Plot:", gsub(".", " ", contrastData[i],
|
|
461 fixed=TRUE))
|
|
462 plotSmear(testData, de.tags=topIDs, pch=20, cex=0.8, main=plotTitle)
|
|
463 abline(h=c(-1, 0, 1), col=c("dodgerblue", "yellow", "dodgerblue"), lty=2)
|
|
464
|
|
465 imgName <- paste0("Smear Plot(", contrastData[i], ")")
|
|
466 imgAddr <- paste0("smear(", contrastData[i], ").png")
|
|
467 imageData <- rbind(imageData, c(imgName, imgAddr))
|
|
468 invisible(dev.off())
|
|
469
|
|
470 pdf(smearPdf[i])
|
|
471 plotTitle <- paste("Smear Plot:", gsub(".", " ", contrastData[i],
|
|
472 fixed=TRUE))
|
|
473 plotSmear(testData, de.tags=topIDs, pch=20, cex=0.8, main=plotTitle)
|
|
474 abline(h=c(-1, 0, 1), col=c("dodgerblue", "yellow", "dodgerblue"), lty=2)
|
|
475
|
|
476 linkName <- paste0("Smear Plot(", contrastData[i], ") (.pdf)")
|
|
477 linkAddr <- paste0("smear(", contrastData[i], ").pdf")
|
|
478 linkData <- rbind(linkData, c(linkName, linkAddr))
|
|
479 invisible(dev.off())
|
|
480
|
|
481 genes <- as.character(data$genes$Gene)
|
|
482 unq <- unique(genes)
|
|
483 unq <- unq[!is.na(unq)]
|
|
484 geneList <- list()
|
|
485 for (gene in unq) {
|
|
486 if (length(which(genes==gene)) >= hairpinReq) {
|
|
487 geneList[[gene]] <- which(genes==gene)
|
|
488 }
|
|
489 }
|
|
490
|
|
491 if (wantRoast) {
|
|
492 # Input preparaton for roast
|
|
493 nrot = 9999
|
|
494 set.seed(602214129)
|
|
495 roastData <- mroast(data, index=geneList, design=design,
|
|
496 contrast=contrasts, nrot=nrot)
|
|
497 roastData <- cbind(GeneID=rownames(roastData), roastData)
|
|
498 write.table(roastData, file=roastOut[i], row.names=FALSE, sep="\t")
|
|
499 linkName <- paste0("Gene Level Analysis Table(", contrastData[i],
|
|
500 ") (.tsv)")
|
|
501 linkAddr <- paste0("roast(", contrastData[i], ").tsv")
|
|
502 linkData <- rbind(linkData, c(linkName, linkAddr))
|
|
503 if (selectOpt=="rank") {
|
|
504 selectedGenes <- rownames(roastData)[selectVals]
|
|
505 } else {
|
|
506 selectedGenes <- selectVals
|
|
507 }
|
|
508
|
|
509 if (packageVersion("limma")<"3.19.19") {
|
|
510 png(barcodePng[i], width=600, height=length(selectedGenes)*150)
|
|
511 } else {
|
|
512 png(barcodePng[i], width=600, height=length(selectedGenes)*300)
|
|
513 }
|
|
514 par(mfrow=c(length(selectedGenes), 1))
|
|
515 for (gene in selectedGenes) {
|
|
516 barcodeplot(testData$table$logFC, index=geneList[[gene]],
|
|
517 main=paste("Barcode Plot for", gene, "(logFCs)",
|
|
518 gsub(".", " ", contrastData[i])),
|
|
519 labels=c("Positive logFC", "Negative logFC"))
|
|
520 }
|
|
521 imgName <- paste0("Barcode Plot(", contrastData[i], ")")
|
|
522 imgAddr <- paste0("barcode(", contrastData[i], ").png")
|
|
523 imageData <- rbind(imageData, c(imgName, imgAddr))
|
|
524 dev.off()
|
|
525 if (packageVersion("limma")<"3.19.19") {
|
|
526 pdf(barcodePdf[i], width=8, height=2)
|
|
527 } else {
|
|
528 pdf(barcodePdf[i], width=8, height=4)
|
|
529 }
|
|
530 for (gene in selectedGenes) {
|
|
531 barcodeplot(testData$table$logFC, index=geneList[[gene]],
|
|
532 main=paste("Barcode Plot for", gene, "(logFCs)",
|
|
533 gsub(".", " ", contrastData[i])),
|
|
534 labels=c("Positive logFC", "Negative logFC"))
|
|
535 }
|
|
536 linkName <- paste0("Barcode Plot(", contrastData[i], ") (.pdf)")
|
|
537 linkAddr <- paste0("barcode(", contrastData[i], ").pdf")
|
|
538 linkData <- rbind(linkData, c(linkName, linkAddr))
|
|
539 dev.off()
|
|
540 }
|
|
541 }
|
|
542 }
|
|
543
|
|
544 # Record ending time
|
|
545 timeEnd <- as.character(Sys.time())
|
|
546 ################################################################################
|
|
547 ### HTML Generation
|
|
548 ################################################################################
|
|
549 # Clear file
|
|
550 cat("", file=htmlPath)
|
|
551
|
|
552 cata("<html>\n")
|
|
553 HtmlHead("EdgeR Output")
|
|
554
|
|
555 cata("<body>\n")
|
|
556 cata("<h3>EdgeR Analysis Output:</h3>\n")
|
|
557 cata("<h4>Input Summary:</h4>\n")
|
|
558 if (inputType=="fastq") {
|
|
559 cata("<ul>\n")
|
|
560 ListItem(hpReadout[1])
|
|
561 ListItem(hpReadout[2])
|
|
562 cata("</ul>\n")
|
|
563 cata(hpReadout[3], "<br/>\n")
|
|
564 cata("<ul>\n")
|
|
565 ListItem(hpReadout[4])
|
|
566 ListItem(hpReadout[7])
|
|
567 cata("</ul>\n")
|
|
568 cata(hpReadout[8:11], sep="<br/>\n")
|
|
569 cata("<br />\n")
|
|
570 cata("<b>Please check that read percentages are consistent with ")
|
|
571 cata("expectations.</b><br >\n")
|
|
572 } else if (inputType=="counts") {
|
|
573 cata("<ul>\n")
|
|
574 ListItem("Number of Samples: ", ncol(data$counts))
|
|
575 ListItem("Number of Hairpins: ", countsRows)
|
|
576 ListItem("Number of annotations provided: ", annoRows)
|
|
577 ListItem("Number of annotations matched to hairpin: ", annoMatched)
|
|
578 cata("</ul>\n")
|
|
579 }
|
|
580
|
|
581 cata("The estimated common biological coefficient of variation (BCV) is: ",
|
|
582 commonBCV, "<br />\n")
|
|
583
|
|
584 cata("<h4>Output:</h4>\n")
|
|
585 cata("All images displayed have PDF copy at the bottom of the page, these can ")
|
|
586 cata("exported in a pdf viewer to high resolution image format. <br/>\n")
|
|
587 for (i in 1:nrow(imageData)) {
|
|
588 if (grepl("barcode", imageData$Link[i])) {
|
|
589 if (packageVersion("limma")<"3.19.19") {
|
|
590 HtmlImage(imageData$Link[i], imageData$Label[i],
|
|
591 height=length(selectedGenes)*150)
|
|
592 } else {
|
|
593 HtmlImage(imageData$Link[i], imageData$Label[i],
|
|
594 height=length(selectedGenes)*300)
|
|
595 }
|
|
596 } else {
|
|
597 HtmlImage(imageData$Link[i], imageData$Label[i])
|
|
598 }
|
|
599 }
|
|
600 cata("<br/>\n")
|
|
601
|
|
602 cata("<h4>Plots:</h4>\n")
|
|
603 for (i in 1:nrow(linkData)) {
|
|
604 if (!grepl(".tsv", linkData$Link[i])) {
|
|
605 HtmlLink(linkData$Link[i], linkData$Label[i])
|
|
606 }
|
|
607 }
|
|
608
|
|
609 cata("<h4>Tables:</h4>\n")
|
|
610 for (i in 1:nrow(linkData)) {
|
|
611 if (grepl(".tsv", linkData$Link[i])) {
|
|
612 HtmlLink(linkData$Link[i], linkData$Label[i])
|
|
613 }
|
|
614 }
|
|
615
|
|
616 cata("<p>alt-click any of the links to download the file, or click the name ")
|
|
617 cata("of this task in the galaxy history panel and click on the floppy ")
|
|
618 cata("disk icon to download all files in a zip archive.</p>\n")
|
|
619 cata("<p>.tsv files are tab seperated files that can be viewed using Excel ")
|
|
620 cata("or other spreadsheet programs</p>\n")
|
|
621 cata("<table border=\"0\">\n")
|
|
622
|
|
623 cata("<tr>\n")
|
|
624 TableItem("Task started at:"); TableItem(timeStart)
|
|
625 cata("</tr>\n")
|
|
626 cata("<tr>\n")
|
|
627 TableItem("Task ended at:"); TableItem(timeEnd)
|
|
628 cata("</tr>\n")
|
|
629
|
|
630 cata("</body>\n")
|
|
631 cata("</html>")
|