comparison diffbind.R @ 18:f907216064f6 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit fd148a124034b44d0d61db3eec32ff991d8c152c
author iuc
date Mon, 08 Jul 2024 18:31:51 +0000
parents 2605cbdaa7d8
children
comparison
equal deleted inserted replaced
17:2605cbdaa7d8 18:f907216064f6
1 ## Setup R error handling to go to stderr 1 ## Setup R error handling to go to stderr
2 options(show.error.messages = FALSE, error = function() { 2 options(show.error.messages = F, error = function() {
3 cat(geterrmessage(), file = stderr()) 3 cat(geterrmessage(), file = stderr())
4 q("no", 1, FALSE) 4 q("no", 1, F)
5 }) 5 })
6 # we need that to not crash galaxy with an UTF8 error on German LC settings. 6 # we need that to not crash galaxy with an UTF8 error on German LC settings.
7 Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") 7 Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
8 8
9 suppressPackageStartupMessages({ 9 suppressPackageStartupMessages({
10 library("getopt") 10 library("getopt")
11 library("DiffBind") 11 library("DiffBind")
12 library("rjson") 12 library("rjson")
13 }) 13 })
14 14
15 options(stringAsfactors = FALSE, useFancyQuotes = FALSE) 15 options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
16 args <- commandArgs(trailingOnly = TRUE) 16 args <- commandArgs(trailingOnly = TRUE)
17 17
18 #get options, using the spec as defined by the enclosed list. 18 # get options, using the spec as defined by the enclosed list.
19 #we read the options from the default: commandArgs(TRUE). 19 # we read the options from the default: commandArgs(TRUE).
20 spec <- matrix(c( 20 spec <- matrix(c(
21 "infile", "i", 1, "character", 21 "infile", "i", 1, "character",
22 "outfile", "o", 1, "character", 22 "outfile", "o", 1, "character",
23 "scorecol", "n", 1, "integer", 23 "method", "m", 1, "character",
24 "lowerbetter", "l", 1, "logical", 24 "scorecol", "n", 1, "integer",
25 "summits", "s", 1, "integer", 25 "lowerbetter", "l", 1, "logical",
26 "th", "t", 1, "double", 26 "summits", "s", 1, "integer",
27 "format", "f", 1, "character", 27 "th", "t", 1, "double",
28 "plots", "p", 2, "character", 28 "minoverlap", "O", 1, "integer",
29 "bmatrix", "b", 0, "logical", 29 "use_blacklist", "B", 0, "logical",
30 "rdaOpt", "r", 0, "logical", 30 "format", "f", 1, "character",
31 "infoOpt", "a", 0, "logical", 31 "plots", "p", 2, "character",
32 "verbose", "v", 2, "integer", 32 "bmatrix", "b", 0, "logical",
33 "help", "h", 0, "logical" 33 "rdaOpt", "r", 0, "logical",
34 "infoOpt", "a", 0, "logical",
35 "verbose", "v", 2, "integer",
36 "help", "h", 0, "logical"
34 ), byrow = TRUE, ncol = 4) 37 ), byrow = TRUE, ncol = 4)
35 38
36 opt <- getopt(spec) 39 opt <- getopt(spec)
37
38 # if help was asked for print a friendly message 40 # if help was asked for print a friendly message
39 # and exit with a non-zero error code 41 # and exit with a non-zero error code
40 if (!is.null(opt$help)) { 42 if (!is.null(opt$help)) {
41 cat(getopt(spec, usage = TRUE)) 43 cat(getopt(spec, usage = TRUE))
42 q(status = 1) 44 q(status = 1)
43 } 45 }
44 46
45 parser <- newJSONParser() 47 parser <- newJSONParser()
46 parser$addData(opt$infile) 48 parser$addData(opt$infile)
47 factor_list <- parser$getObject() 49 factor_list <- parser$getObject()
53 # get the group and sample id from the peaks filenames 55 # get the group and sample id from the peaks filenames
54 groups <- sapply(strsplit(peaks, "-"), `[`, 1) 56 groups <- sapply(strsplit(peaks, "-"), `[`, 1)
55 samples <- sapply(strsplit(peaks, "-"), `[`, 2) 57 samples <- sapply(strsplit(peaks, "-"), `[`, 2)
56 58
57 if (length(ctrls) != 0) { 59 if (length(ctrls) != 0) {
58 sample_table <- data.frame( 60 sample_table <- data.frame(
59 SampleID = samples, 61 SampleID = samples,
60 Condition = groups, 62 Condition = groups,
61 bamReads = bams, 63 bamReads = bams,
62 bamControl = ctrls, 64 bamControl = ctrls,
63 Peaks = peaks, 65 Peaks = peaks,
64 Tissue = samples 66 Tissue = samples
65 ) # using "Tissue" column to display ids as labels in PCA plot 67 ) # using "Tissue" column to display ids as labels in PCA plot
66 } else { 68 } else {
67 sample_table <- data.frame( 69 sample_table <- data.frame(
68 SampleID = samples, 70 SampleID = samples,
69 Replicate = samples, 71 Replicate = samples,
70 Condition = groups, 72 Condition = groups,
71 bamReads = bams, 73 bamReads = bams,
72 Peaks = peaks, 74 Peaks = peaks,
73 Tissue = samples 75 Tissue = samples
74 ) 76 )
75 } 77 }
76 78
77 sample <- dba(sampleSheet = sample_table, peakFormat = "bed", scoreCol = opt$scorecol, bLowerScoreBetter = opt$lowerbetter) 79 sample <- dba(sampleSheet = sample_table, peakFormat = "bed", scoreCol = opt$scorecol, bLowerScoreBetter = opt$lowerbetter, minOverlap = opt$minoverlap)
80
81 if (!is.null(opt$use_blacklist)) {
82 sample <- dba.blacklist(sample, blacklist = TRUE)
83 }
78 84
79 if (!is.null(opt$summits)) { 85 if (!is.null(opt$summits)) {
80 sample_count <- dba.count(sample, summits = opt$summits) 86 sample_count <- dba.count(sample, summits = opt$summits)
81 } else { 87 } else {
82 sample_count <- dba.count(sample) 88 sample_count <- dba.count(sample)
83 } 89 }
84 90
85 sample_contrast <- dba.contrast(sample_count, categories = DBA_CONDITION, minMembers = 2) 91 sample_contrast <- dba.contrast(sample_count, categories = DBA_CONDITION, minMembers = 2)
86 sample_analyze <- dba.analyze(sample_contrast) 92
87 diff_bind <- dba.report(sample_analyze, th = opt$th) 93 if (opt$method == "DBA_DESEQ2") {
94 method <- DBA_DESEQ2
95 } else if (opt$method == "DBA_EDGER") {
96 method <- DBA_EDGER
97 }
98
99 sample_analyze <- dba.analyze(sample_contrast, method = method, bBlacklist = FALSE, bGreylist = FALSE)
100
101 diff_bind <- dba.report(sample_analyze, th = opt$th, method = method)
88 102
89 # Generate plots 103 # Generate plots
90 if (!is.null(opt$plots)) { 104 if (!is.null(opt$plots)) {
91 pdf(opt$plots) 105 pdf(opt$plots)
92 orvals <- dba.plotHeatmap(sample_analyze, contrast = 1, correlations = FALSE, cexCol = 0.8, th = opt$th) 106 orvals <- dba.plotHeatmap(sample_analyze, contrast = 1, correlations = FALSE, cexCol = 0.8, th = opt$th, method = method)
93 dba.plotPCA(sample_analyze, contrast = 1, th = opt$th, label = DBA_TISSUE, labelSize = 0.3) 107 dba.plotPCA(sample_analyze, contrast = 1, th = opt$th, label = DBA_TISSUE, labelSize = 0.3, method = method)
94 dba.plotMA(sample_analyze, th = opt$th) 108 dba.plotMA(sample_analyze, th = opt$th, method = method)
95 dba.plotVolcano(sample_analyze, th = opt$th) 109 dba.plotVolcano(sample_analyze, th = opt$th, method = method)
96 dba.plotBox(sample_analyze, th = opt$th) 110 dba.plotBox(sample_analyze, th = opt$th, method = method)
97 dev.off() 111 dev.off()
98 } 112 }
99 113
100 # Output differential binding sites 114 # Output differential binding sites
101 res_sorted <- diff_bind[order(diff_bind$FDR), ] 115 res_sorted <- diff_bind[order(diff_bind$FDR), ]
102 # Convert from GRanges (1-based) to 0-based format (adapted from https://www.biostars.org/p/89341/) 116 # Convert from GRanges (1-based) to 0-based format (adapted from https://www.biostars.org/p/89341/)
103 if (opt$format == "bed") { 117 if (opt$format == "bed") {
104 res_sorted <- data.frame( 118 res_sorted <- data.frame(
105 Chrom = seqnames(res_sorted), 119 Chrom = seqnames(res_sorted),
106 Start = start(res_sorted) - 1, 120 Start = start(res_sorted) - 1,
107 End = end(res_sorted), 121 End = end(res_sorted),
108 Name = rep("DiffBind", length(res_sorted)), 122 Name = rep("DiffBind", length(res_sorted)),
109 Score = rep("0", length(res_sorted)), 123 Score = rep("0", length(res_sorted)),
110 Strand = gsub("\\*", ".", strand(res_sorted)) 124 Strand = gsub("\\*", ".", strand(res_sorted))
111 ) 125 )
112 } else if (opt$format == "interval") { 126 } else if (opt$format == "interval") {
113 # Output as interval 127 # Output as interval
114 df <- as.data.frame(res_sorted) 128 df <- as.data.frame(res_sorted)
115 extrainfo <- NULL 129 extrainfo <- NULL
116 for (i in seq_len(nrow(df))) { 130 for (i in seq_len(nrow(df))) {
117 extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse = "|") 131 extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse = "|")
118 } 132 }
119 res_sorted <- data.frame( 133 res_sorted <- data.frame(
120 Chrom = seqnames(res_sorted), 134 Chrom = seqnames(res_sorted),
121 Start = start(res_sorted) - 1, 135 Start = start(res_sorted) - 1,
122 End = end(res_sorted), 136 End = end(res_sorted),
123 Name = rep("DiffBind", length(res_sorted)), 137 Name = rep("DiffBind", length(res_sorted)),
124 Score = rep("0", length(res_sorted)), 138 Score = rep("0", length(res_sorted)),
125 Strand = gsub("\\*", ".", strand(res_sorted)), 139 Strand = gsub("\\*", ".", strand(res_sorted)),
126 Comment = extrainfo 140 Comment = extrainfo
127 ) 141 )
128 } else { 142 } else {
129 # Output as 0-based tabular 143 # Output as 0-based tabular
130 res_sorted <- data.frame( 144 res_sorted <- data.frame(
131 Chrom = seqnames(res_sorted), 145 Chrom = seqnames(res_sorted),
132 Start = start(res_sorted) - 1, 146 Start = start(res_sorted) - 1,
133 End = end(res_sorted), 147 End = end(res_sorted),
134 Name = rep("DiffBind", length(res_sorted)), 148 Name = rep("DiffBind", length(res_sorted)),
135 Score = rep("0", length(res_sorted)), 149 Score = rep("0", length(res_sorted)),
136 Strand = gsub("\\*", ".", strand(res_sorted)), 150 Strand = gsub("\\*", ".", strand(res_sorted)),
137 mcols(res_sorted) 151 mcols(res_sorted)
138 ) 152 )
139 } 153 }
140 write.table(res_sorted, file = opt$outfile, sep = "\t", quote = FALSE, row.names = FALSE) 154 write.table(res_sorted, file = opt$outfile, sep = "\t", quote = FALSE, row.names = FALSE)
141 155
142 # Output binding affinity scores 156 # Output binding affinity scores
143 if (!is.null(opt$bmatrix)) { 157 if (!is.null(opt$bmatrix)) {
144 bmat <- dba.peakset(sample_count, bRetrieve = TRUE, DataType = DBA_DATA_FRAME) 158 bmat <- dba.peakset(sample_count, bRetrieve = TRUE, DataType = DBA_DATA_FRAME, minOverlap = opt$minoverlap)
145 # Output as 0-based tabular 159 # Output as 0-based tabular
146 bmat <- data.frame( 160 bmat <- data.frame(
147 Chrom = bmat[, 1], 161 Chrom = bmat[, 1],
148 Start = bmat[, 2] - 1, 162 Start = bmat[, 2] - 1,
149 End = bmat[, 3], 163 End = bmat[, 3],
150 bmat[, 4:ncol(bmat)] 164 bmat[, 4:ncol(bmat)]
151 ) 165 )
152 write.table(bmat, file = "bmatrix.tab", sep = "\t", quote = FALSE, row.names = FALSE) 166 write.table(bmat, file = "bmatrix.tab", sep = "\t", quote = FALSE, row.names = FALSE)
153 } 167 }
154 168
155 # Output RData file 169 # Output RData file
156 if (!is.null(opt$rdaOpt)) { 170 if (!is.null(opt$rdaOpt)) {
157 save.image(file = "DiffBind_analysis.RData") 171 save.image(file = "DiffBind_analysis.RData")
158 } 172 }
159 173
160 # Output analysis info 174 # Output analysis info
161 if (!is.null(opt$infoOpt)) { 175 if (!is.null(opt$infoOpt)) {
162 info <- "DiffBind_analysis_info.txt" 176 info <- "DiffBind_analysis_info.txt"
163 cat("dba.count Info\n\n", file = info, append = TRUE) 177 cat("dba.count Info\n\n", file = info, append = TRUE)
164 capture.output(sample, file = info, append = TRUE) 178 capture.output(sample, file = info, append = TRUE)
165 cat("\ndba.analyze Info\n\n", file = info, append = TRUE) 179 cat("\ndba.analyze Info\n\n", file = info, append = TRUE)
166 capture.output(sample_analyze, file = info, append = TRUE) 180 capture.output(sample_analyze, file = info, append = TRUE)
167 cat("\nSessionInfo\n\n", file = info, append = TRUE) 181 cat("\nSessionInfo\n\n", file = info, append = TRUE)
168 capture.output(sessionInfo(), file = info, append = TRUE) 182 capture.output(sessionInfo(), file = info, append = TRUE)
169 } 183 }