diff deseq2.R @ 12:bd06df00180a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 62e9101c1e7b8467e395f31ccbd9321de01a6418
author iuc
date Mon, 29 Jan 2018 07:30:18 -0500
parents 25204a860b79
children d0c39b5e78cf
line wrap: on
line diff
--- a/deseq2.R	Sun Jan 28 04:04:12 2018 -0500
+++ b/deseq2.R	Mon Jan 29 07:30:18 2018 -0500
@@ -52,6 +52,7 @@
   "outfile", "o", 1, "character",
   "countsfile", "n", 1, "character",
   "factors", "f", 1, "character",
+  "files_to_labels", "l", 1, "character",
   "plots" , "p", 1, "character",
   "sample_table", "s", 1, "character",
   "tximport", "i", 0, "logical",
@@ -118,9 +119,11 @@
   parser <- newJSONParser()
   parser$addData(opt$factors)
   factorList <- parser$getObject()
+  filenames_to_labels <- fromJSON(opt$files_to_labels)
   factors <- sapply(factorList, function(x) x[[1]])
   primaryFactor <- factors[1]
   filenamesIn <- unname(unlist(factorList[[1]][[2]]))
+  labs = unname(unlist(filenames_to_labels[basename(filenamesIn)]))
   sampleTable <- data.frame(sample=basename(filenamesIn),
                             filename=filenamesIn,
                             row.names=filenamesIn,
@@ -135,7 +138,7 @@
     }
     sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls)
   }
-  rownames(sampleTable) <- sampleTable$sample
+  rownames(sampleTable) <- labs
 } else {
   # read the sample_table argument
   # this table is described in ?DESeqDataSet
@@ -163,19 +166,22 @@
 
 # these are plots which are made once for each analysis
 generateGenericPlots <- function(dds, factors) {
+  library("ggplot2")
+  library("ggrepel")
+  library("pheatmap")
+
   rld <- rlog(dds)
-  d=plotPCA(rld, intgroup=rev(factors), returnData=TRUE)
-  labs <- paste0(seq_len(ncol(dds)), ": ", do.call(paste, as.list(colData(dds)[factors])))
-  library(ggplot2)
-  print(ggplot(d, aes(x=PC1,y=PC2, col=group,label=factor(labs)), environment = environment()) + geom_point() + geom_text(size=3))  
+  p <- plotPCA(rld, intgroup=rev(factors))
+  print(p + geom_text_repel(aes_string(x = "PC1", y = "PC2", label = factor(colnames(dds))), size=3)  + geom_point())
   dat <- assay(rld)
-  colnames(dat) <- labs
   distsRL <- dist(t(dat))
   mat <- as.matrix(distsRL)
-  hc <- hclust(distsRL)
-  hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100)
-  heatmap.2(mat, Rowv=as.dendrogram(hc), symm=TRUE, trace="none", col = rev(hmcol),
-            main="Sample-to-sample distances", margin=c(13,13))
+  colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
+  pheatmap(mat,
+           clustering_distance_rows=distsRL,
+           clustering_distance_cols=distsRL,
+           col=colors,
+           main="Sample-to-sample distances")
   plotDispEsts(dds, main="Dispersion estimates")
 }
 
@@ -223,6 +229,8 @@
   dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,
                                     directory = dir,
                                     design =  designFormula)
+  labs <- unname(unlist(filenames_to_labels[colnames(dds)]))
+  colnames(dds) <- labs
 } else {
     # construct the object using tximport
     # first need to make the tx2gene table
@@ -238,7 +246,8 @@
     }
     library("tximport")
     txiFiles <- as.character(sampleTable[,2])
-    names(txiFiles) <- as.character(sampleTable[,1])
+    labs <- unname(unlist(filenames_to_labels[sampleTable[,1]]))
+    names(txiFiles) <- labs
     txi <- tximport(txiFiles, type=opt$txtype, tx2gene=tx2gene)
     dds <- DESeqDataSetFromTximport(txi,
                                     sampleTable[,3:ncol(sampleTable),drop=FALSE],
@@ -299,9 +308,7 @@
 allLevels <- levels(colData(dds)[[primaryFactor]])
 
 if (!is.null(opt$countsfile)) {
-    labs <- paste0(seq_len(ncol(dds)), ": ", do.call(paste, as.list(colData(dds)[factors])))
     normalizedCounts<-counts(dds,normalized=TRUE)
-    colnames(normalizedCounts)<-labs
     write.table(normalizedCounts, file=opt$countsfile, sep="\t", col.names=NA, quote=FALSE)
 }