repenrich2: edgeR_repenrich.R comparison

comparison edgeR_repenrich.R @ 0:4905a332a094 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537

author	artbio
date	Sat, 20 Apr 2024 11:56:53 +0000
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:4905a332a094
+# setup R error handling to go to stderr
+options(show.error.messages = FALSE, error = function() {
+cat(geterrmessage(), file = stderr())
+q("no", 1, FALSE)
+})
+# To not crash galaxy with an UTF8 error with not-US LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+# load libraries
+library("getopt")
+library("tools")
+library("rjson")
+suppressPackageStartupMessages({
+library("edgeR")
+library("limma")
+})
+options(stringAsFactors = FALSE, useFancyQuotes = FALSE)
+# get options, using the spec as defined by the enclosed list.
+spec <- matrix(
+c(
+"quiet", "q", 0, "logical",
+"outfile", "o", 1, "character",
+"countsfile", "n", 1, "character",
+"factorName", "N", 1, "character",
+"levelNameA", "A", 1, "character",
+"levelNameB", "B", 1, "character",
+"levelAfiles", "a", 1, "character",
+"levelBfiles", "b", 1, "character",
+"plots", "p", 1, "character"
+),
+byrow = TRUE, ncol = 4
+)
+opt <- getopt(spec)
+# build levels A and B file lists
+filesA <- fromJSON(opt$levelAfiles, method = "C", unexpected.escape = "error")
+filesB <- fromJSON(opt$levelBfiles, method = "C", unexpected.escape = "error")
+listA <- list()
+indice <- 0
+listA[["level"]] <- opt$levelNameA
+for (file in filesA) {
+indice <- indice + 1
+listA[[paste0(opt$levelNameA, "_", indice)]] <- read.delim(file, header = FALSE)
+}
+listB <- list()
+indice <- 0
+listB[["level"]] <- opt$levelNameB
+for (file in filesB) {
+indice <- indice + 1
+listB[[paste0(opt$levelNameB, "_", indice)]] <- read.delim(file, header = FALSE)
+}
+# build a counts table
+counts <- data.frame(row.names = listA[[2]][, 1])
+for (element in names(listA[-1])) {
+counts <- cbind(counts, listA[[element]][, 4])
+}
+for (element in names(listB[-1])) {
+counts <- cbind(counts, listB[[element]][, 4])
+}
+colnames(counts) <- c(names(listA[-1]), names(listB[-1]))
+sizes <- colSums(counts)
+# build a meta data object
+meta <- data.frame(
+row.names = colnames(counts),
+condition = c(rep(opt$levelNameA, length(filesA)), rep(opt$levelNameB, length(filesB))),
+libsize = sizes
+)
+# Define the library size and conditions for the GLM
+libsize <- meta$libsize
+condition <- factor(meta$condition)
+design <- model.matrix(~ 0 + condition)
+colnames(design) <- levels(condition)
+# Build a DGE object for the GLM
+y <- DGEList(counts = counts, lib.size = libsize)
+# Normalize the data
+y <- calcNormFactors(y)
+# Estimate the variance
+y <- estimateGLMCommonDisp(y, design)
+y <- estimateGLMTrendedDisp(y, design)
+y <- estimateGLMTagwiseDisp(y, design)
+# Builds and outputs an object to contain the normalized read abundance in counts per million of reads
+cpm <- cpm(y, log = FALSE, lib.size = libsize)
+cpm <- as.data.frame(cpm)
+colnames(cpm) <- colnames(counts)
+if (!is.null(opt$countsfile)) {
+normalizedAbundance <- data.frame(Tag = rownames(cpm))
+normalizedAbundance <- cbind(normalizedAbundance, cpm)
+write.table(normalizedAbundance, file = opt$countsfile, sep = "\t", col.names = TRUE, row.names = FALSE, quote = FALSE)
+}
+# Conduct fitting of the GLM
+yfit <- glmFit(y, design)
+# Initialize result matrices to contain the results of the GLM
+results <- matrix(nrow = dim(counts)[1], ncol = 0)
+logfc <- matrix(nrow = dim(counts)[1], ncol = 0)
+# Make the comparisons for the GLM
+my.contrasts <- makeContrasts(
+paste0(opt$levelNameA, "_", opt$levelNameB, " = ", opt$levelNameA, " - ", opt$levelNameB),
+levels = design
+)
+# Define the contrasts used in the comparisons
+allcontrasts <- paste0(opt$levelNameA, " vs ", opt$levelNameB)
+# Conduct a for loop that will do the fitting of the GLM for each comparison
+# Put the results into the results objects
+lrt <- glmLRT(yfit, contrast = my.contrasts[, 1])
+res <- topTags(lrt, n = dim(c)[1], sort.by = "none")$table
+results <- cbind(results, res[, c(1, 5)])
+logfc <- cbind(logfc, res[c(1)])
+# Add the repeat types back into the results.
+# We should still have the same order as the input data
+results$class <- listA[[2]][, 2]
+results$type <- listA[[2]][, 3]
+# Sort the results table by the FDR
+results <- results[with(results, order(FDR)), ]
+# Plot Fold Changes for repeat classes and types
+# open the device and plots
+if (!is.null(opt$plots)) {
+pdf(opt$plots)
+plotMDS(y, main = "Multidimensional Scaling Plot Of Distances Between Samples")
+plotBCV(y, xlab = "Gene abundance (Average log CPM)", main = "Biological Coefficient of Variation Plot")
+logFC <- results[, "logFC"]
+# Plot the repeat classes
+classes <- with(results, reorder(class, -logFC, median))
+classes
+par(mar = c(6, 10, 4, 1))
+boxplot(logFC ~ classes,
+data = results, outline = FALSE, horizontal = TRUE,
+las = 2, xlab = "log2(Fold Change)", ylab = "", cex.axis = 0.7, main = paste0(allcontrasts, ", by Class")
+)
+abline(v = 0)
+# Plot the repeat types
+types <- with(results, reorder(type, -logFC, median))
+boxplot(logFC ~ types,
+data = results, outline = FALSE, horizontal = TRUE,
+las = 2, xlab = "log2(Fold Change)", ylab = "", cex.axis = 0.7, main = paste0(allcontrasts, ", by Type")
+)
+abline(v = 0)
+# volcano plot
+TEdata <- cbind(rownames(results), as.data.frame(results), score = -log(results$FDR, 10))
+colnames(TEdata) <- c("Tag", "log2FC", "FDR", "Class", "Type", "score")
+color <- ifelse(TEdata$FDR < 0.05, "red", "black")
+s <- subset(TEdata, FDR < 0.01)
+with(TEdata, plot(log2FC, score, pch = 20, col = color, main = "Volcano plot (all tag types)", ylab = "-log10(FDR)"))
+text(s[, 2], s[, 6], labels = s[, 5], pos = seq(from = 1, to = 3), cex = 0.5)
+}
+# close the plot device
+if (!is.null(opt$plots)) {
+cat("closing plot device\n")
+dev.off()
+}
+# Save the results
+results <- cbind(TE_item = rownames(results), results)
+colnames(results) <- c("TE_item", "log2FC", "FDR", "Class", "Type")
+results$log2FC <- format(results$log2FC, digits = 5)
+results$FDR <- format(results$FDR, digits = 5)
+write.table(results, opt$outfile, quote = FALSE, sep = "\t", col.names = TRUE, row.names = FALSE)

Mercurial > repos > artbio > repenrich2

comparison edgeR_repenrich.R @ 0:4905a332a094 draft