Mercurial > repos > iuc > deseq2
comparison deseq2.R @ 12:bd06df00180a draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 62e9101c1e7b8467e395f31ccbd9321de01a6418
author | iuc |
---|---|
date | Mon, 29 Jan 2018 07:30:18 -0500 |
parents | 25204a860b79 |
children | d0c39b5e78cf |
comparison
equal
deleted
inserted
replaced
11:25204a860b79 | 12:bd06df00180a |
---|---|
50 "quiet", "q", 0, "logical", | 50 "quiet", "q", 0, "logical", |
51 "help", "h", 0, "logical", | 51 "help", "h", 0, "logical", |
52 "outfile", "o", 1, "character", | 52 "outfile", "o", 1, "character", |
53 "countsfile", "n", 1, "character", | 53 "countsfile", "n", 1, "character", |
54 "factors", "f", 1, "character", | 54 "factors", "f", 1, "character", |
55 "files_to_labels", "l", 1, "character", | |
55 "plots" , "p", 1, "character", | 56 "plots" , "p", 1, "character", |
56 "sample_table", "s", 1, "character", | 57 "sample_table", "s", 1, "character", |
57 "tximport", "i", 0, "logical", | 58 "tximport", "i", 0, "logical", |
58 "txtype", "y", 1, "character", | 59 "txtype", "y", 1, "character", |
59 "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF file (auto detect .gtf/.GTF) | 60 "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF file (auto detect .gtf/.GTF) |
116 if (!is.null(opt$factors)) { | 117 if (!is.null(opt$factors)) { |
117 library("rjson") | 118 library("rjson") |
118 parser <- newJSONParser() | 119 parser <- newJSONParser() |
119 parser$addData(opt$factors) | 120 parser$addData(opt$factors) |
120 factorList <- parser$getObject() | 121 factorList <- parser$getObject() |
122 filenames_to_labels <- fromJSON(opt$files_to_labels) | |
121 factors <- sapply(factorList, function(x) x[[1]]) | 123 factors <- sapply(factorList, function(x) x[[1]]) |
122 primaryFactor <- factors[1] | 124 primaryFactor <- factors[1] |
123 filenamesIn <- unname(unlist(factorList[[1]][[2]])) | 125 filenamesIn <- unname(unlist(factorList[[1]][[2]])) |
126 labs = unname(unlist(filenames_to_labels[basename(filenamesIn)])) | |
124 sampleTable <- data.frame(sample=basename(filenamesIn), | 127 sampleTable <- data.frame(sample=basename(filenamesIn), |
125 filename=filenamesIn, | 128 filename=filenamesIn, |
126 row.names=filenamesIn, | 129 row.names=filenamesIn, |
127 stringsAsFactors=FALSE) | 130 stringsAsFactors=FALSE) |
128 for (factor in factorList) { | 131 for (factor in factorList) { |
133 files <- factor[[2]][[i]][[1]] | 136 files <- factor[[2]][[i]][[1]] |
134 sampleTable[files,factorName] <- trim(lvls[i]) | 137 sampleTable[files,factorName] <- trim(lvls[i]) |
135 } | 138 } |
136 sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls) | 139 sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls) |
137 } | 140 } |
138 rownames(sampleTable) <- sampleTable$sample | 141 rownames(sampleTable) <- labs |
139 } else { | 142 } else { |
140 # read the sample_table argument | 143 # read the sample_table argument |
141 # this table is described in ?DESeqDataSet | 144 # this table is described in ?DESeqDataSet |
142 # one column for the sample name, one for the filename, and | 145 # one column for the sample name, one for the filename, and |
143 # the remaining columns for factors in the analysis | 146 # the remaining columns for factors in the analysis |
161 cat("\n\n") | 164 cat("\n\n") |
162 } | 165 } |
163 | 166 |
164 # these are plots which are made once for each analysis | 167 # these are plots which are made once for each analysis |
165 generateGenericPlots <- function(dds, factors) { | 168 generateGenericPlots <- function(dds, factors) { |
169 library("ggplot2") | |
170 library("ggrepel") | |
171 library("pheatmap") | |
172 | |
166 rld <- rlog(dds) | 173 rld <- rlog(dds) |
167 d=plotPCA(rld, intgroup=rev(factors), returnData=TRUE) | 174 p <- plotPCA(rld, intgroup=rev(factors)) |
168 labs <- paste0(seq_len(ncol(dds)), ": ", do.call(paste, as.list(colData(dds)[factors]))) | 175 print(p + geom_text_repel(aes_string(x = "PC1", y = "PC2", label = factor(colnames(dds))), size=3) + geom_point()) |
169 library(ggplot2) | |
170 print(ggplot(d, aes(x=PC1,y=PC2, col=group,label=factor(labs)), environment = environment()) + geom_point() + geom_text(size=3)) | |
171 dat <- assay(rld) | 176 dat <- assay(rld) |
172 colnames(dat) <- labs | |
173 distsRL <- dist(t(dat)) | 177 distsRL <- dist(t(dat)) |
174 mat <- as.matrix(distsRL) | 178 mat <- as.matrix(distsRL) |
175 hc <- hclust(distsRL) | 179 colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) |
176 hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100) | 180 pheatmap(mat, |
177 heatmap.2(mat, Rowv=as.dendrogram(hc), symm=TRUE, trace="none", col = rev(hmcol), | 181 clustering_distance_rows=distsRL, |
178 main="Sample-to-sample distances", margin=c(13,13)) | 182 clustering_distance_cols=distsRL, |
183 col=colors, | |
184 main="Sample-to-sample distances") | |
179 plotDispEsts(dds, main="Dispersion estimates") | 185 plotDispEsts(dds, main="Dispersion estimates") |
180 } | 186 } |
181 | 187 |
182 # these are plots which can be made for each comparison, e.g. | 188 # these are plots which can be made for each comparison, e.g. |
183 # once for C vs A and once for B vs A | 189 # once for C vs A and once for B vs A |
221 if (!useTXI) { | 227 if (!useTXI) { |
222 # construct the object from HTSeq files | 228 # construct the object from HTSeq files |
223 dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, | 229 dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, |
224 directory = dir, | 230 directory = dir, |
225 design = designFormula) | 231 design = designFormula) |
232 labs <- unname(unlist(filenames_to_labels[colnames(dds)])) | |
233 colnames(dds) <- labs | |
226 } else { | 234 } else { |
227 # construct the object using tximport | 235 # construct the object using tximport |
228 # first need to make the tx2gene table | 236 # first need to make the tx2gene table |
229 # this takes ~2-3 minutes using Bioconductor functions | 237 # this takes ~2-3 minutes using Bioconductor functions |
230 if (!is.null(gtfFile)) { | 238 if (!is.null(gtfFile)) { |
236 df <- select(txdb, keys = k, keytype = "GENEID", columns = "TXNAME") | 244 df <- select(txdb, keys = k, keytype = "GENEID", columns = "TXNAME") |
237 tx2gene <- df[, 2:1] # tx ID, then gene ID | 245 tx2gene <- df[, 2:1] # tx ID, then gene ID |
238 } | 246 } |
239 library("tximport") | 247 library("tximport") |
240 txiFiles <- as.character(sampleTable[,2]) | 248 txiFiles <- as.character(sampleTable[,2]) |
241 names(txiFiles) <- as.character(sampleTable[,1]) | 249 labs <- unname(unlist(filenames_to_labels[sampleTable[,1]])) |
250 names(txiFiles) <- labs | |
242 txi <- tximport(txiFiles, type=opt$txtype, tx2gene=tx2gene) | 251 txi <- tximport(txiFiles, type=opt$txtype, tx2gene=tx2gene) |
243 dds <- DESeqDataSetFromTximport(txi, | 252 dds <- DESeqDataSetFromTximport(txi, |
244 sampleTable[,3:ncol(sampleTable),drop=FALSE], | 253 sampleTable[,3:ncol(sampleTable),drop=FALSE], |
245 designFormula) | 254 designFormula) |
246 } | 255 } |
297 | 306 |
298 n <- nlevels(colData(dds)[[primaryFactor]]) | 307 n <- nlevels(colData(dds)[[primaryFactor]]) |
299 allLevels <- levels(colData(dds)[[primaryFactor]]) | 308 allLevels <- levels(colData(dds)[[primaryFactor]]) |
300 | 309 |
301 if (!is.null(opt$countsfile)) { | 310 if (!is.null(opt$countsfile)) { |
302 labs <- paste0(seq_len(ncol(dds)), ": ", do.call(paste, as.list(colData(dds)[factors]))) | |
303 normalizedCounts<-counts(dds,normalized=TRUE) | 311 normalizedCounts<-counts(dds,normalized=TRUE) |
304 colnames(normalizedCounts)<-labs | |
305 write.table(normalizedCounts, file=opt$countsfile, sep="\t", col.names=NA, quote=FALSE) | 312 write.table(normalizedCounts, file=opt$countsfile, sep="\t", col.names=NA, quote=FALSE) |
306 } | 313 } |
307 | 314 |
308 if (is.null(opt$many_contrasts)) { | 315 if (is.null(opt$many_contrasts)) { |
309 # only contrast the first and second level of the primary factor | 316 # only contrast the first and second level of the primary factor |