diff diffbind.R @ 18:f907216064f6 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit fd148a124034b44d0d61db3eec32ff991d8c152c
author iuc
date Mon, 08 Jul 2024 18:31:51 +0000
parents 2605cbdaa7d8
children
line wrap: on
line diff
--- a/diffbind.R	Fri Dec 15 19:39:14 2023 +0000
+++ b/diffbind.R	Mon Jul 08 18:31:51 2024 +0000
@@ -1,45 +1,47 @@
 ## Setup R error handling to go to stderr
-options(show.error.messages = FALSE, error = function() {
-  cat(geterrmessage(), file = stderr())
-  q("no", 1, FALSE)
+options(show.error.messages = F, error = function() {
+    cat(geterrmessage(), file = stderr())
+    q("no", 1, F)
 })
 # we need that to not crash galaxy with an UTF8 error on German LC settings.
 Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
 
 suppressPackageStartupMessages({
-  library("getopt")
-  library("DiffBind")
-  library("rjson")
+    library("getopt")
+    library("DiffBind")
+    library("rjson")
 })
 
 options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
 args <- commandArgs(trailingOnly = TRUE)
 
-#get options, using the spec as defined by the enclosed list.
-#we read the options from the default: commandArgs(TRUE).
+# get options, using the spec as defined by the enclosed list.
+# we read the options from the default: commandArgs(TRUE).
 spec <- matrix(c(
-  "infile", "i", 1, "character",
-  "outfile", "o", 1, "character",
-  "scorecol", "n", 1, "integer",
-  "lowerbetter", "l", 1, "logical",
-  "summits", "s", 1, "integer",
-  "th", "t", 1, "double",
-  "format", "f", 1, "character",
-  "plots", "p", 2, "character",
-  "bmatrix", "b", 0, "logical",
-  "rdaOpt", "r", 0, "logical",
-  "infoOpt", "a", 0, "logical",
-  "verbose", "v", 2, "integer",
-  "help", "h", 0, "logical"
+    "infile", "i", 1, "character",
+    "outfile", "o", 1, "character",
+    "method", "m", 1, "character",
+    "scorecol", "n", 1, "integer",
+    "lowerbetter", "l", 1, "logical",
+    "summits", "s", 1, "integer",
+    "th", "t", 1, "double",
+    "minoverlap", "O", 1, "integer",
+    "use_blacklist", "B", 0, "logical",
+    "format", "f", 1, "character",
+    "plots", "p", 2, "character",
+    "bmatrix", "b", 0, "logical",
+    "rdaOpt", "r", 0, "logical",
+    "infoOpt", "a", 0, "logical",
+    "verbose", "v", 2, "integer",
+    "help", "h", 0, "logical"
 ), byrow = TRUE, ncol = 4)
 
 opt <- getopt(spec)
-
 # if help was asked for print a friendly message
 # and exit with a non-zero error code
 if (!is.null(opt$help)) {
-  cat(getopt(spec, usage = TRUE))
-  q(status = 1)
+    cat(getopt(spec, usage = TRUE))
+    q(status = 1)
 }
 
 parser <- newJSONParser()
@@ -55,115 +57,127 @@
 samples <- sapply(strsplit(peaks, "-"), `[`, 2)
 
 if (length(ctrls) != 0) {
-  sample_table <- data.frame(
-    SampleID = samples,
-    Condition = groups,
-    bamReads = bams,
-    bamControl = ctrls,
-    Peaks = peaks,
-    Tissue = samples
-  ) # using "Tissue" column to display ids as labels in PCA plot
+    sample_table <- data.frame(
+        SampleID = samples,
+        Condition = groups,
+        bamReads = bams,
+        bamControl = ctrls,
+        Peaks = peaks,
+        Tissue = samples
+    ) # using "Tissue" column to display ids as labels in PCA plot
 } else {
-  sample_table <- data.frame(
-    SampleID = samples,
-    Replicate = samples,
-    Condition = groups,
-    bamReads = bams,
-    Peaks = peaks,
-    Tissue = samples
-  )
+    sample_table <- data.frame(
+        SampleID = samples,
+        Replicate = samples,
+        Condition = groups,
+        bamReads = bams,
+        Peaks = peaks,
+        Tissue = samples
+    )
 }
 
-sample <- dba(sampleSheet = sample_table, peakFormat = "bed", scoreCol = opt$scorecol, bLowerScoreBetter = opt$lowerbetter)
+sample <- dba(sampleSheet = sample_table, peakFormat = "bed", scoreCol = opt$scorecol, bLowerScoreBetter = opt$lowerbetter, minOverlap = opt$minoverlap)
+
+if (!is.null(opt$use_blacklist)) {
+    sample <- dba.blacklist(sample, blacklist = TRUE)
+}
 
 if (!is.null(opt$summits)) {
-  sample_count <- dba.count(sample, summits = opt$summits)
+    sample_count <- dba.count(sample, summits = opt$summits)
 } else {
-  sample_count <- dba.count(sample)
+    sample_count <- dba.count(sample)
 }
 
 sample_contrast <- dba.contrast(sample_count, categories = DBA_CONDITION, minMembers = 2)
-sample_analyze <- dba.analyze(sample_contrast)
-diff_bind <- dba.report(sample_analyze, th = opt$th)
+
+if (opt$method == "DBA_DESEQ2") {
+    method <- DBA_DESEQ2
+} else if (opt$method == "DBA_EDGER") {
+    method <- DBA_EDGER
+}
+
+sample_analyze <- dba.analyze(sample_contrast, method = method, bBlacklist = FALSE, bGreylist = FALSE)
+
+diff_bind <- dba.report(sample_analyze, th = opt$th, method = method)
 
 # Generate plots
 if (!is.null(opt$plots)) {
-  pdf(opt$plots)
-  orvals <- dba.plotHeatmap(sample_analyze, contrast = 1, correlations = FALSE, cexCol = 0.8, th = opt$th)
-  dba.plotPCA(sample_analyze, contrast = 1, th = opt$th, label = DBA_TISSUE, labelSize = 0.3)
-  dba.plotMA(sample_analyze, th = opt$th)
-  dba.plotVolcano(sample_analyze, th = opt$th)
-  dba.plotBox(sample_analyze, th = opt$th)
-  dev.off()
+    pdf(opt$plots)
+    orvals <- dba.plotHeatmap(sample_analyze, contrast = 1, correlations = FALSE, cexCol = 0.8, th = opt$th, method = method)
+    dba.plotPCA(sample_analyze, contrast = 1, th = opt$th, label = DBA_TISSUE, labelSize = 0.3, method = method)
+    dba.plotMA(sample_analyze, th = opt$th, method = method)
+    dba.plotVolcano(sample_analyze, th = opt$th, method = method)
+    dba.plotBox(sample_analyze, th = opt$th, method = method)
+    dev.off()
 }
 
 # Output differential binding sites
 res_sorted <- diff_bind[order(diff_bind$FDR), ]
 # Convert from GRanges (1-based) to 0-based format (adapted from https://www.biostars.org/p/89341/)
 if (opt$format == "bed") {
-  res_sorted  <- data.frame(
-    Chrom = seqnames(res_sorted),
-    Start = start(res_sorted) - 1,
-    End = end(res_sorted),
-    Name = rep("DiffBind", length(res_sorted)),
-    Score = rep("0", length(res_sorted)),
-    Strand = gsub("\\*", ".", strand(res_sorted))
-  )
+    res_sorted <- data.frame(
+        Chrom = seqnames(res_sorted),
+        Start = start(res_sorted) - 1,
+        End = end(res_sorted),
+        Name = rep("DiffBind", length(res_sorted)),
+        Score = rep("0", length(res_sorted)),
+        Strand = gsub("\\*", ".", strand(res_sorted))
+    )
 } else if (opt$format == "interval") {
-  # Output as interval
-  df <- as.data.frame(res_sorted)
-  extrainfo <- NULL
-  for (i in seq_len(nrow(df))) {
-    extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse = "|")
-  }
-  res_sorted  <- data.frame(
-    Chrom = seqnames(res_sorted),
-    Start = start(res_sorted) - 1,
-    End = end(res_sorted),
-    Name = rep("DiffBind", length(res_sorted)),
-    Score = rep("0", length(res_sorted)),
-    Strand = gsub("\\*", ".", strand(res_sorted)),
-    Comment = extrainfo
-  )
+    # Output as interval
+    df <- as.data.frame(res_sorted)
+    extrainfo <- NULL
+    for (i in seq_len(nrow(df))) {
+        extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse = "|")
+    }
+    res_sorted <- data.frame(
+        Chrom = seqnames(res_sorted),
+        Start = start(res_sorted) - 1,
+        End = end(res_sorted),
+        Name = rep("DiffBind", length(res_sorted)),
+        Score = rep("0", length(res_sorted)),
+        Strand = gsub("\\*", ".", strand(res_sorted)),
+        Comment = extrainfo
+    )
 } else {
-  # Output as 0-based tabular
-  res_sorted <- data.frame(
-    Chrom = seqnames(res_sorted),
-    Start = start(res_sorted) - 1,
-    End = end(res_sorted),
-    Name = rep("DiffBind", length(res_sorted)),
-    Score = rep("0", length(res_sorted)),
-    Strand = gsub("\\*", ".", strand(res_sorted)),
-    mcols(res_sorted)
-  )
+    # Output as 0-based tabular
+    res_sorted <- data.frame(
+        Chrom = seqnames(res_sorted),
+        Start = start(res_sorted) - 1,
+        End = end(res_sorted),
+        Name = rep("DiffBind", length(res_sorted)),
+        Score = rep("0", length(res_sorted)),
+        Strand = gsub("\\*", ".", strand(res_sorted)),
+        mcols(res_sorted)
+    )
 }
 write.table(res_sorted, file = opt$outfile, sep = "\t", quote = FALSE, row.names = FALSE)
 
 # Output binding affinity scores
 if (!is.null(opt$bmatrix)) {
-  bmat <- dba.peakset(sample_count, bRetrieve = TRUE, DataType = DBA_DATA_FRAME)
-  # Output as 0-based tabular
-  bmat <- data.frame(
-    Chrom = bmat[, 1],
-    Start = bmat[, 2] - 1,
-    End = bmat[, 3],
-    bmat[, 4:ncol(bmat)]
-  )
-  write.table(bmat, file = "bmatrix.tab", sep = "\t", quote = FALSE, row.names = FALSE)
+    bmat <- dba.peakset(sample_count, bRetrieve = TRUE, DataType = DBA_DATA_FRAME, minOverlap = opt$minoverlap)
+    # Output as 0-based tabular
+    bmat <- data.frame(
+        Chrom = bmat[, 1],
+        Start = bmat[, 2] - 1,
+        End = bmat[, 3],
+        bmat[, 4:ncol(bmat)]
+    )
+    write.table(bmat, file = "bmatrix.tab", sep = "\t", quote = FALSE, row.names = FALSE)
 }
 
 # Output RData file
 if (!is.null(opt$rdaOpt)) {
-  save.image(file = "DiffBind_analysis.RData")
+    save.image(file = "DiffBind_analysis.RData")
 }
 
 # Output analysis info
 if (!is.null(opt$infoOpt)) {
-  info <- "DiffBind_analysis_info.txt"
-  cat("dba.count Info\n\n", file = info, append = TRUE)
-  capture.output(sample, file = info, append = TRUE)
-  cat("\ndba.analyze Info\n\n", file = info, append = TRUE)
-  capture.output(sample_analyze, file = info, append = TRUE)
-  cat("\nSessionInfo\n\n", file = info, append = TRUE)
-  capture.output(sessionInfo(), file = info, append = TRUE)
+    info <- "DiffBind_analysis_info.txt"
+    cat("dba.count Info\n\n", file = info, append = TRUE)
+    capture.output(sample, file = info, append = TRUE)
+    cat("\ndba.analyze Info\n\n", file = info, append = TRUE)
+    capture.output(sample_analyze, file = info, append = TRUE)
+    cat("\nSessionInfo\n\n", file = info, append = TRUE)
+    capture.output(sessionInfo(), file = info, append = TRUE)
 }