comparison deseq2.R @ 12:bd06df00180a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 62e9101c1e7b8467e395f31ccbd9321de01a6418
author iuc
date Mon, 29 Jan 2018 07:30:18 -0500
parents 25204a860b79
children d0c39b5e78cf
comparison
equal deleted inserted replaced
11:25204a860b79 12:bd06df00180a
50 "quiet", "q", 0, "logical", 50 "quiet", "q", 0, "logical",
51 "help", "h", 0, "logical", 51 "help", "h", 0, "logical",
52 "outfile", "o", 1, "character", 52 "outfile", "o", 1, "character",
53 "countsfile", "n", 1, "character", 53 "countsfile", "n", 1, "character",
54 "factors", "f", 1, "character", 54 "factors", "f", 1, "character",
55 "files_to_labels", "l", 1, "character",
55 "plots" , "p", 1, "character", 56 "plots" , "p", 1, "character",
56 "sample_table", "s", 1, "character", 57 "sample_table", "s", 1, "character",
57 "tximport", "i", 0, "logical", 58 "tximport", "i", 0, "logical",
58 "txtype", "y", 1, "character", 59 "txtype", "y", 1, "character",
59 "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF file (auto detect .gtf/.GTF) 60 "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF file (auto detect .gtf/.GTF)
116 if (!is.null(opt$factors)) { 117 if (!is.null(opt$factors)) {
117 library("rjson") 118 library("rjson")
118 parser <- newJSONParser() 119 parser <- newJSONParser()
119 parser$addData(opt$factors) 120 parser$addData(opt$factors)
120 factorList <- parser$getObject() 121 factorList <- parser$getObject()
122 filenames_to_labels <- fromJSON(opt$files_to_labels)
121 factors <- sapply(factorList, function(x) x[[1]]) 123 factors <- sapply(factorList, function(x) x[[1]])
122 primaryFactor <- factors[1] 124 primaryFactor <- factors[1]
123 filenamesIn <- unname(unlist(factorList[[1]][[2]])) 125 filenamesIn <- unname(unlist(factorList[[1]][[2]]))
126 labs = unname(unlist(filenames_to_labels[basename(filenamesIn)]))
124 sampleTable <- data.frame(sample=basename(filenamesIn), 127 sampleTable <- data.frame(sample=basename(filenamesIn),
125 filename=filenamesIn, 128 filename=filenamesIn,
126 row.names=filenamesIn, 129 row.names=filenamesIn,
127 stringsAsFactors=FALSE) 130 stringsAsFactors=FALSE)
128 for (factor in factorList) { 131 for (factor in factorList) {
133 files <- factor[[2]][[i]][[1]] 136 files <- factor[[2]][[i]][[1]]
134 sampleTable[files,factorName] <- trim(lvls[i]) 137 sampleTable[files,factorName] <- trim(lvls[i])
135 } 138 }
136 sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls) 139 sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls)
137 } 140 }
138 rownames(sampleTable) <- sampleTable$sample 141 rownames(sampleTable) <- labs
139 } else { 142 } else {
140 # read the sample_table argument 143 # read the sample_table argument
141 # this table is described in ?DESeqDataSet 144 # this table is described in ?DESeqDataSet
142 # one column for the sample name, one for the filename, and 145 # one column for the sample name, one for the filename, and
143 # the remaining columns for factors in the analysis 146 # the remaining columns for factors in the analysis
161 cat("\n\n") 164 cat("\n\n")
162 } 165 }
163 166
164 # these are plots which are made once for each analysis 167 # these are plots which are made once for each analysis
165 generateGenericPlots <- function(dds, factors) { 168 generateGenericPlots <- function(dds, factors) {
169 library("ggplot2")
170 library("ggrepel")
171 library("pheatmap")
172
166 rld <- rlog(dds) 173 rld <- rlog(dds)
167 d=plotPCA(rld, intgroup=rev(factors), returnData=TRUE) 174 p <- plotPCA(rld, intgroup=rev(factors))
168 labs <- paste0(seq_len(ncol(dds)), ": ", do.call(paste, as.list(colData(dds)[factors]))) 175 print(p + geom_text_repel(aes_string(x = "PC1", y = "PC2", label = factor(colnames(dds))), size=3) + geom_point())
169 library(ggplot2)
170 print(ggplot(d, aes(x=PC1,y=PC2, col=group,label=factor(labs)), environment = environment()) + geom_point() + geom_text(size=3))
171 dat <- assay(rld) 176 dat <- assay(rld)
172 colnames(dat) <- labs
173 distsRL <- dist(t(dat)) 177 distsRL <- dist(t(dat))
174 mat <- as.matrix(distsRL) 178 mat <- as.matrix(distsRL)
175 hc <- hclust(distsRL) 179 colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
176 hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100) 180 pheatmap(mat,
177 heatmap.2(mat, Rowv=as.dendrogram(hc), symm=TRUE, trace="none", col = rev(hmcol), 181 clustering_distance_rows=distsRL,
178 main="Sample-to-sample distances", margin=c(13,13)) 182 clustering_distance_cols=distsRL,
183 col=colors,
184 main="Sample-to-sample distances")
179 plotDispEsts(dds, main="Dispersion estimates") 185 plotDispEsts(dds, main="Dispersion estimates")
180 } 186 }
181 187
182 # these are plots which can be made for each comparison, e.g. 188 # these are plots which can be made for each comparison, e.g.
183 # once for C vs A and once for B vs A 189 # once for C vs A and once for B vs A
221 if (!useTXI) { 227 if (!useTXI) {
222 # construct the object from HTSeq files 228 # construct the object from HTSeq files
223 dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, 229 dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,
224 directory = dir, 230 directory = dir,
225 design = designFormula) 231 design = designFormula)
232 labs <- unname(unlist(filenames_to_labels[colnames(dds)]))
233 colnames(dds) <- labs
226 } else { 234 } else {
227 # construct the object using tximport 235 # construct the object using tximport
228 # first need to make the tx2gene table 236 # first need to make the tx2gene table
229 # this takes ~2-3 minutes using Bioconductor functions 237 # this takes ~2-3 minutes using Bioconductor functions
230 if (!is.null(gtfFile)) { 238 if (!is.null(gtfFile)) {
236 df <- select(txdb, keys = k, keytype = "GENEID", columns = "TXNAME") 244 df <- select(txdb, keys = k, keytype = "GENEID", columns = "TXNAME")
237 tx2gene <- df[, 2:1] # tx ID, then gene ID 245 tx2gene <- df[, 2:1] # tx ID, then gene ID
238 } 246 }
239 library("tximport") 247 library("tximport")
240 txiFiles <- as.character(sampleTable[,2]) 248 txiFiles <- as.character(sampleTable[,2])
241 names(txiFiles) <- as.character(sampleTable[,1]) 249 labs <- unname(unlist(filenames_to_labels[sampleTable[,1]]))
250 names(txiFiles) <- labs
242 txi <- tximport(txiFiles, type=opt$txtype, tx2gene=tx2gene) 251 txi <- tximport(txiFiles, type=opt$txtype, tx2gene=tx2gene)
243 dds <- DESeqDataSetFromTximport(txi, 252 dds <- DESeqDataSetFromTximport(txi,
244 sampleTable[,3:ncol(sampleTable),drop=FALSE], 253 sampleTable[,3:ncol(sampleTable),drop=FALSE],
245 designFormula) 254 designFormula)
246 } 255 }
297 306
298 n <- nlevels(colData(dds)[[primaryFactor]]) 307 n <- nlevels(colData(dds)[[primaryFactor]])
299 allLevels <- levels(colData(dds)[[primaryFactor]]) 308 allLevels <- levels(colData(dds)[[primaryFactor]])
300 309
301 if (!is.null(opt$countsfile)) { 310 if (!is.null(opt$countsfile)) {
302 labs <- paste0(seq_len(ncol(dds)), ": ", do.call(paste, as.list(colData(dds)[factors])))
303 normalizedCounts<-counts(dds,normalized=TRUE) 311 normalizedCounts<-counts(dds,normalized=TRUE)
304 colnames(normalizedCounts)<-labs
305 write.table(normalizedCounts, file=opt$countsfile, sep="\t", col.names=NA, quote=FALSE) 312 write.table(normalizedCounts, file=opt$countsfile, sep="\t", col.names=NA, quote=FALSE)
306 } 313 }
307 314
308 if (is.null(opt$many_contrasts)) { 315 if (is.null(opt$many_contrasts)) {
309 # only contrast the first and second level of the primary factor 316 # only contrast the first and second level of the primary factor