comparison diffbind.R @ 16:163688bb8f73 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit 77828512472bf9815bdda725a58a2321f7803661"
author iuc
date Wed, 18 Nov 2020 12:54:07 +0000
parents 194e3f2c1d86
children 2605cbdaa7d8
comparison
equal deleted inserted replaced
15:194e3f2c1d86 16:163688bb8f73
1 ## Setup R error handling to go to stderr 1 ## Setup R error handling to go to stderr
2 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) 2 options(show.error.messages = F, error = function() {
3 cat(geterrmessage(), file = stderr()); q("no", 1, F)
4 })
3 # we need that to not crash galaxy with an UTF8 error on German LC settings. 5 # we need that to not crash galaxy with an UTF8 error on German LC settings.
4 Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") 6 Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
5 7
6 suppressPackageStartupMessages({ 8 suppressPackageStartupMessages({
7 library('getopt') 9 library("getopt")
8 library('DiffBind') 10 library("DiffBind")
9 library('rjson') 11 library("rjson")
10 }) 12 })
11 13
12 options(stringAsfactors = FALSE, useFancyQuotes = FALSE) 14 options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
13 args <- commandArgs(trailingOnly = TRUE) 15 args <- commandArgs(trailingOnly = TRUE)
14 16
15 #get options, using the spec as defined by the enclosed list. 17 #get options, using the spec as defined by the enclosed list.
16 #we read the options from the default: commandArgs(TRUE). 18 #we read the options from the default: commandArgs(TRUE).
17 spec = matrix(c( 19 spec <- matrix(c(
18 'infile' , 'i', 1, "character", 20 "infile", "i", 1, "character",
19 'outfile' , 'o', 1, "character", 21 "outfile", "o", 1, "character",
20 'scorecol', 'n', 1, "integer", 22 "scorecol", "n", 1, "integer",
21 'lowerbetter', 'l', 1, "logical", 23 "lowerbetter", "l", 1, "logical",
22 'summits', 's', 1, "integer", 24 "summits", "s", 1, "integer",
23 'th', 't', 1, "double", 25 "th", "t", 1, "double",
24 'format', 'f', 1, "character", 26 "format", "f", 1, "character",
25 'plots' , 'p', 2, "character", 27 "plots", "p", 2, "character",
26 'bmatrix', 'b', 0, "logical", 28 "bmatrix", "b", 0, "logical",
27 "rdaOpt", "r", 0, "logical", 29 "rdaOpt", "r", 0, "logical",
28 'infoOpt' , 'a', 0, "logical", 30 "infoOpt", "a", 0, "logical",
29 'verbose', 'v', 2, "integer", 31 "verbose", "v", 2, "integer",
30 'help' , 'h', 0, "logical" 32 "help", "h", 0, "logical"
31 ), byrow=TRUE, ncol=4); 33 ), byrow = TRUE, ncol = 4);
32 34
33 opt = getopt(spec); 35 opt <- getopt(spec);
34 36
35 # if help was asked for print a friendly message 37 # if help was asked for print a friendly message
36 # and exit with a non-zero error code 38 # and exit with a non-zero error code
37 if ( !is.null(opt$help) ) { 39 if (!is.null(opt$help)) {
38 cat(getopt(spec, usage=TRUE)); 40 cat(getopt(spec, usage = TRUE));
39 q(status=1); 41 q(status = 1);
40 } 42 }
41 43
42 parser <- newJSONParser() 44 parser <- newJSONParser()
43 parser$addData(opt$infile) 45 parser$addData(opt$infile)
44 factorList <- parser$getObject() 46 factor_list <- parser$getObject()
45 filenamesIn <- unname(unlist(factorList[[1]][[2]])) 47 filenames_in <- unname(unlist(factor_list[[1]][[2]]))
46 peaks <- filenamesIn[grepl("peaks.bed", filenamesIn)] 48 peaks <- filenames_in[grepl("peaks.bed", filenames_in)]
47 bams <- filenamesIn[grepl("bamreads.bam", filenamesIn)] 49 bams <- filenames_in[grepl("bamreads.bam", filenames_in)]
48 ctrls <- filenamesIn[grepl("bamcontrol.bam", filenamesIn)] 50 ctrls <- filenames_in[grepl("bamcontrol.bam", filenames_in)]
49 51
50 # get the group and sample id from the peaks filenames 52 # get the group and sample id from the peaks filenames
51 groups <- sapply(strsplit(peaks,"-"), `[`, 1) 53 groups <- sapply(strsplit(peaks, "-"), `[`, 1)
52 samples <- sapply(strsplit(peaks,"-"), `[`, 2) 54 samples <- sapply(strsplit(peaks, "-"), `[`, 2)
53 55
54 if ( length(ctrls) != 0 ) { 56 if (length(ctrls) != 0) {
55 sampleTable <- data.frame(SampleID=samples, 57 sample_table <- data.frame(SampleID = samples,
56 Condition=groups, 58 Condition = groups,
57 bamReads=bams, 59 bamReads = bams,
58 bamControl=ctrls, 60 bamControl = ctrls,
59 Peaks=peaks, 61 Peaks = peaks,
60 Tissue=samples) # using "Tissue" column to display ids as labels in PCA plot 62 Tissue = samples) # using "Tissue" column to display ids as labels in PCA plot
61 } else { 63 } else {
62 64
63 sampleTable <- data.frame(SampleID=samples, 65 sample_table <- data.frame(SampleID = samples,
64 Replicate=samples, 66 Replicate = samples,
65 Condition=groups, 67 Condition = groups,
66 bamReads=bams, 68 bamReads = bams,
67 Peaks=peaks, 69 Peaks = peaks,
68 Tissue=samples) 70 Tissue = samples)
69 } 71 }
70 72
71 sample = dba(sampleSheet=sampleTable, peakFormat='bed', scoreCol=opt$scorecol, bLowerScoreBetter=opt$lowerbetter) 73 sample <- dba(sampleSheet = sample_table, peakFormat = "bed", scoreCol = opt$scorecol, bLowerScoreBetter = opt$lowerbetter)
72 74
73 if ( !is.null(opt$summits) ) { 75 if (!is.null(opt$summits)) {
74 sample_count = dba.count(sample, summits=opt$summits) 76 sample_count <- dba.count(sample, summits = opt$summits)
75 } else { 77 } else {
76 sample_count = dba.count(sample) 78 sample_count <- dba.count(sample)
77 } 79 }
78 80
79 sample_contrast = dba.contrast(sample_count, categories=DBA_CONDITION, minMembers=2) 81 sample_contrast <- dba.contrast(sample_count, categories = DBA_CONDITION, minMembers = 2)
80 sample_analyze = dba.analyze(sample_contrast) 82 sample_analyze <- dba.analyze(sample_contrast)
81 diff_bind = dba.report(sample_analyze, th=opt$th) 83 diff_bind <- dba.report(sample_analyze, th = opt$th)
82 84
83 # Generate plots 85 # Generate plots
84 if ( !is.null(opt$plots) ) { 86 if (!is.null(opt$plots)) {
85 pdf(opt$plots) 87 pdf(opt$plots)
86 orvals = dba.plotHeatmap(sample_analyze, contrast=1, correlations=FALSE, cexCol=0.8, th=opt$th) 88 orvals <- dba.plotHeatmap(sample_analyze, contrast = 1, correlations = FALSE, cexCol = 0.8, th = opt$th)
87 dba.plotPCA(sample_analyze, contrast=1, th=opt$th, label=DBA_TISSUE, labelSize=0.3) 89 dba.plotPCA(sample_analyze, contrast = 1, th = opt$th, label = DBA_TISSUE, labelSize = 0.3)
88 dba.plotMA(sample_analyze, th=opt$th) 90 dba.plotMA(sample_analyze, th = opt$th)
89 dba.plotVolcano(sample_analyze, th=opt$th) 91 dba.plotVolcano(sample_analyze, th = opt$th)
90 dba.plotBox(sample_analyze, th=opt$th) 92 dba.plotBox(sample_analyze, th = opt$th)
91 dev.off() 93 dev.off()
92 } 94 }
93 95
94 # Output differential binding sites 96 # Output differential binding sites
95 resSorted <- diff_bind[order(diff_bind$FDR),] 97 res_sorted <- diff_bind[order(diff_bind$FDR), ]
96 # Convert from GRanges (1-based) to 0-based format (adapted from https://www.biostars.org/p/89341/) 98 # Convert from GRanges (1-based) to 0-based format (adapted from https://www.biostars.org/p/89341/)
97 if (opt$format == "bed") { 99 if (opt$format == "bed") {
98 resSorted <- data.frame(Chrom=seqnames(resSorted), 100 res_sorted <- data.frame(Chrom = seqnames(res_sorted),
99 Start=start(resSorted) - 1, 101 Start = start(res_sorted) - 1,
100 End=end(resSorted), 102 End = end(res_sorted),
101 Name=rep("DiffBind", length(resSorted)), 103 Name = rep("DiffBind", length(res_sorted)),
102 Score=rep("0", length(resSorted)), 104 Score = rep("0", length(res_sorted)),
103 Strand=gsub("\\*", ".", strand(resSorted))) 105 Strand = gsub("\\*", ".", strand(res_sorted)))
104 } else if (opt$format == "interval") { 106 } else if (opt$format == "interval") {
105 # Output as interval 107 # Output as interval
106 df <- as.data.frame(resSorted) 108 df <- as.data.frame(res_sorted)
107 extrainfo <- NULL 109 extrainfo <- NULL
108 for (i in 1:nrow(df)) { 110 for (i in seq_len(nrow(df))) {
109 extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse="|") 111 extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse = "|")
110 } 112 }
111 resSorted <- data.frame(Chrom=seqnames(resSorted), 113 res_sorted <- data.frame(Chrom = seqnames(res_sorted),
112 Start=start(resSorted) - 1, 114 Start = start(res_sorted) - 1,
113 End=end(resSorted), 115 End = end(res_sorted),
114 Name=rep("DiffBind", length(resSorted)), 116 Name = rep("DiffBind", length(res_sorted)),
115 Score=rep("0", length(resSorted)), 117 Score = rep("0", length(res_sorted)),
116 Strand=gsub("\\*", ".", strand(resSorted)), 118 Strand = gsub("\\*", ".", strand(res_sorted)),
117 Comment=extrainfo) 119 Comment = extrainfo)
118 } else { 120 } else {
119 # Output as 0-based tabular 121 # Output as 0-based tabular
120 resSorted <- data.frame(Chrom=seqnames(resSorted), 122 res_sorted <- data.frame(Chrom = seqnames(res_sorted),
121 Start=start(resSorted) - 1, 123 Start = start(res_sorted) - 1,
122 End=end(resSorted), 124 End = end(res_sorted),
123 Name=rep("DiffBind", length(resSorted)), 125 Name = rep("DiffBind", length(res_sorted)),
124 Score=rep("0", length(resSorted)), 126 Score = rep("0", length(res_sorted)),
125 Strand=gsub("\\*", ".", strand(resSorted)), 127 Strand = gsub("\\*", ".", strand(res_sorted)),
126 mcols(resSorted)) 128 mcols(res_sorted))
127 } 129 }
128 write.table(resSorted, file = opt$outfile, sep="\t", quote = FALSE, row.names = FALSE) 130 write.table(res_sorted, file = opt$outfile, sep = "\t", quote = FALSE, row.names = FALSE)
129 131
130 # Output binding affinity scores 132 # Output binding affinity scores
131 if (!is.null(opt$bmatrix)) { 133 if (!is.null(opt$bmatrix)) {
132 bmat <- dba.peakset(sample_count, bRetrieve=TRUE, DataType=DBA_DATA_FRAME) 134 bmat <- dba.peakset(sample_count, bRetrieve = TRUE, DataType = DBA_DATA_FRAME)
133 # Output as 0-based tabular 135 # Output as 0-based tabular
134 bmat <- data.frame(Chrom=bmat[, 1], 136 bmat <- data.frame(Chrom = bmat[, 1],
135 Start=bmat[, 2] - 1, 137 Start = bmat[, 2] - 1,
136 End=bmat[, 3], 138 End = bmat[, 3],
137 bmat[, 4:ncol(bmat)]) 139 bmat[, 4:ncol(bmat)])
138 write.table(bmat, file="bmatrix.tab", sep="\t", quote=FALSE, row.names=FALSE) 140 write.table(bmat, file = "bmatrix.tab", sep = "\t", quote = FALSE, row.names = FALSE)
139 } 141 }
140 142
141 # Output RData file 143 # Output RData file
142 if (!is.null(opt$rdaOpt)) { 144 if (!is.null(opt$rdaOpt)) {
143 save.image(file = "DiffBind_analysis.RData") 145 save.image(file = "DiffBind_analysis.RData")
144 } 146 }
145 147
146 # Output analysis info 148 # Output analysis info
147 if (!is.null(opt$infoOpt)) { 149 if (!is.null(opt$infoOpt)) {
148 info <- "DiffBind_analysis_info.txt" 150 info <- "DiffBind_analysis_info.txt"
149 cat("dba.count Info\n\n", file=info, append = TRUE) 151 cat("dba.count Info\n\n", file = info, append = TRUE)
150 capture.output(sample, file=info, append=TRUE) 152 capture.output(sample, file = info, append = TRUE)
151 cat("\ndba.analyze Info\n\n", file=info, append = TRUE) 153 cat("\ndba.analyze Info\n\n", file = info, append = TRUE)
152 capture.output(sample_analyze, file=info, append=TRUE) 154 capture.output(sample_analyze, file = info, append = TRUE)
153 cat("\nSessionInfo\n\n", file=info, append = TRUE) 155 cat("\nSessionInfo\n\n", file = info, append = TRUE)
154 capture.output(sessionInfo(), file=info, append=TRUE) 156 capture.output(sessionInfo(), file = info, append = TRUE)
155 } 157 }