Repository 'edger'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/edger

Changeset 3:d79ed3ec25fe (2018-05-06)
Previous changeset 2:a1634a9c2ee1 (2018-04-19) Next changeset 4:4730985c816f (2019-01-05)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/edger commit e646be741e315df9332b5206cec1e47c11370ff1
modified:
edger.R
edger.xml
test-data/anno.txt
test-data/out_rscript.txt
b
diff -r a1634a9c2ee1 -r d79ed3ec25fe edger.R
--- a/edger.R Thu Apr 19 17:26:38 2018 -0400
+++ b/edger.R Sun May 06 13:38:41 2018 -0400
[
@@ -306,9 +306,13 @@
 bcvOutPng <- makeOut("bcvplot.png")
 qlOutPdf <- makeOut("qlplot.pdf")
 qlOutPng <- makeOut("qlplot.png")
-mdsOutPdf <- makeOut("mdsplot.pdf")
-mdsOutPng <- makeOut("mdsplot.png")
-mdOutPdf <- character()   # Initialise character vector
+mdsOutPdf <- character()   # Initialise character vector
+mdsOutPng <- character()
+for (i in 1:ncol(factors)) {
+    mdsOutPdf[i] <- makeOut(paste0("mdsplot_", names(factors)[i], ".pdf"))
+    mdsOutPng[i] <- makeOut(paste0("mdsplot_", names(factors)[i], ".png"))
+}
+mdOutPdf <- character()
 mdOutPng <- character()
 topOut <- character()
 for (i in 1:length(contrastData)) {
@@ -338,7 +342,9 @@
 data <- list()
 data$counts <- counts
 if (haveAnno) {
-    data$genes <- geneanno
+  # order annotation by genes in counts (assumes gene ids are in 1st column of geneanno)
+  annoord <- geneanno[match(row.names(counts), geneanno[,1]), ]
+  data$genes <- annoord
 } else {
     data$genes <- data.frame(GeneID=row.names(counts))
 }
@@ -410,17 +416,41 @@
 
 # Plot MDS
 labels <- names(counts)
+
+# MDS plot
 png(mdsOutPng, width=600, height=600)
-# Currently only using a single factor
-plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main="MDS Plot")
-imageData[1, ] <- c("MDS Plot", "mdsplot.png")
+plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main=paste("MDS Plot:", names(factors)[1]))
+imgName <- paste0("MDS Plot_", names(factors)[1], ".png")
+imgAddr <- paste0("mdsplot_", names(factors)[1], ".png")
+imageData[1, ] <- c(imgName, imgAddr)
 invisible(dev.off())
 
 pdf(mdsOutPdf)
-plotMDS(data, labels=labels, cex=0.5)
-linkData[1, ] <- c("MDS Plot.pdf", "mdsplot.pdf")
+plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main=paste("MDS Plot:", names(factors)[1]))
+linkName <- paste0("MDS Plot_", names(factors)[1], ".pdf")
+linkAddr <- paste0("mdsplot_", names(factors)[1], ".pdf")
+linkData[1, ] <- c(linkName, linkAddr)
 invisible(dev.off())
 
+# If additional factors create additional MDS plots coloured by factor
+if (ncol(factors) > 1) {
+    for (i in 2:ncol(factors)) {
+        png(mdsOutPng[i], width=600, height=600)
+        plotMDS(data, labels=labels, col=as.numeric(factors[, i]), cex=0.8, main=paste("MDS Plot:", names(factors)[i]))
+        imgName <- paste0("MDS Plot_", names(factors)[i], ".png")
+        imgAddr <- paste0("mdsplot_", names(factors)[i], ".png")
+        imageData <- rbind(imageData, c(imgName, imgAddr))
+        invisible(dev.off())
+
+        pdf(mdsOutPdf[i])
+        plotMDS(data, labels=labels, col=as.numeric(factors[, i]), cex=0.8, main=paste("MDS Plot:", names(factors)[i]))
+        linkName <- paste0("MDS Plot_", names(factors)[i], ".pdf")
+        linkAddr <- paste0("mdsplot_", names(factors)[i], ".pdf")
+        linkData <- rbind(linkData, c(linkName, linkAddr))
+        invisible(dev.off())
+    }
+}
+
 # BCV Plot
 png(bcvOutPng, width=600, height=600)
 plotBCV(data, main="BCV Plot")
@@ -469,7 +499,7 @@
 if (wantNorm) { 
         normalisedCounts <- cpm(data, normalized.lib.sizes=TRUE, log=TRUE) 
         normalisedCounts <- data.frame(data$genes, normalisedCounts)
-        write.table (normalisedCounts, file=normOut, row.names=FALSE, sep="\t")
+        write.table (normalisedCounts, file=normOut, row.names=FALSE, sep="\t", quote=FALSE)
         linkData <- rbind(linkData, c("edgeR_normcounts.tsv", "edgeR_normcounts.tsv"))
 }
 
@@ -492,7 +522,7 @@
                                              
     # Write top expressions table
     top <- topTags(res, n=Inf, sort.by="PValue")
-    write.table(top, file=topOut[i], row.names=FALSE, sep="\t")
+    write.table(top, file=topOut[i], row.names=FALSE, sep="\t", quote=FALSE)
     
     linkName <- paste0("edgeR_", contrastData[i], ".tsv")
     linkAddr <- paste0("edgeR_", contrastData[i], ".tsv")
@@ -502,7 +532,7 @@
     pdf(mdOutPdf[i])
     limma::plotMD(res, status=status,
                                 main=paste("MD Plot:", unmake.names(contrastData[i])), 
-                                col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"),
+                                hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1),
                                 xlab="Average Expression", ylab="logFC")
     
     abline(h=0, col="grey", lty=2)
@@ -515,7 +545,7 @@
     png(mdOutPng[i], height=600, width=600)
     limma::plotMD(res, status=status,
                                 main=paste("MD Plot:", unmake.names(contrastData[i])), 
-                                col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"),
+                                hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1),
                                 xlab="Average Expression", ylab="logFC")
     
     abline(h=0, col="grey", lty=2)
@@ -620,7 +650,7 @@
 
 if (filtCPM || filtSmpCount || filtTotCount) {
     if (filtCPM) {
-    tempStr <- paste("Genes without more than", opt$cmpReq,
+    tempStr <- paste("Genes without more than", opt$cpmReq,
                                      "CPM in at least", opt$sampleReq, "samples are insignificant",
                                      "and filtered out.")
     } else if (filtSmpCount) {
b
diff -r a1634a9c2ee1 -r d79ed3ec25fe edger.xml
--- a/edger.xml Thu Apr 19 17:26:38 2018 -0400
+++ b/edger.xml Sun May 06 13:38:41 2018 -0400
b
@@ -1,4 +1,4 @@
-<tool id="edger" name="edgeR" version="3.20.7.1">
+<tool id="edger" name="edgeR" version="3.20.7.2">
     <description>
         Perform differential expression of count data
     </description>
@@ -120,12 +120,12 @@
                     </param>
                     <repeat name="rep_group" title="Group" min="2" default="2">
                         <param name="groupName" type="text" label="Name"
-                        help="Name of group that the counts files(s) belong to (e.g. WT or Mut). NOTE: Please only use letters, numbers or underscores (case sensitive).">
+                        help="Name of group that the counts files belong to (e.g. WT or Mut). NOTE: Please only use letters, numbers or underscores (case sensitive).">
                         <sanitizer>
                             <valid initial="string.letters,string.digits"><add value="_" /></valid>
                         </sanitizer>
                         </param>
-                        <param name="countsFile" type="data" format="tabular" multiple="true" label="Counts file(s)"/>
+                        <param name="countsFile" type="data" format="tabular" multiple="true" label="Counts files"/>
                     </repeat>
                 </repeat>
             </when>
@@ -683,19 +683,19 @@
 **Gene Annotations:**
 Optional input for gene annotations, this can contain more
 information about the genes than just an ID number. The annotations will
-be available in the differential expression results table and the optional normalised counts table.
+be available in the differential expression results table and the optional normalised counts table. The file must contain a header row and have the gene IDs in the first column. The number of rows should match that of the counts files, add NA for any gene IDs with no annotation. The Galaxy tool **annotateMyIDs** can be used to obtain annotations for human, mouse, fly and zebrafish.
 
 Example:
 
     ==========  ==========  ===================================================
     **GeneID**  **Symbol**  **GeneName**
     ----------  ----------  ---------------------------------------------------
-    1287        Pzp         pregnancy zone protein
-    1298        Aanat       arylalkylamine N-acetyltransferase
-    1302        Aatk        apoptosis-associated tyrosine kinase
-    1303        Abca1       ATP-binding cassette, sub-family A (ABC1), member 1
-    1304        Abca4       ATP-binding cassette, sub-family A (ABC1), member 4
-    1305        Abca2       ATP-binding cassette, sub-family A (ABC1), member 2
+    11287        Pzp         pregnancy zone protein
+    11298        Aanat       arylalkylamine N-acetyltransferase
+    11302        Aatk        apoptosis-associated tyrosine kinase
+    11303        Abca1       ATP-binding cassette, sub-family A (ABC1), member 1
+    11304        Abca4       ATP-binding cassette, sub-family A (ABC1), member 4
+    11305        Abca2       ATP-binding cassette, sub-family A (ABC1), member 2
     ==========  ==========  ===================================================
 
 **Contrasts of Interest:**
b
diff -r a1634a9c2ee1 -r d79ed3ec25fe test-data/anno.txt
--- a/test-data/anno.txt Thu Apr 19 17:26:38 2018 -0400
+++ b/test-data/anno.txt Sun May 06 13:38:41 2018 -0400
b
@@ -1,7 +1,7 @@
 EntrezID Symbol GeneName Chr Length
+11302 Aatk apoptosis-associated tyrosine kinase 11 5743
 11287 Pzp pregnancy zone protein 6 4681
-11298 Aanat arylalkylamine N-acetyltransferase 11 1455
-11302 Aatk apoptosis-associated tyrosine kinase 11 5743
 11303 Abca1 ATP-binding cassette, sub-family A (ABC1), member 1 4 10260
 11304 Abca4 ATP-binding cassette, sub-family A (ABC1), member 4 3 7248
-11305 Abca2 ATP-binding cassette, sub-family A (ABC1), member 2 2 8061
\ No newline at end of file
+11305 Abca2 ATP-binding cassette, sub-family A (ABC1), member 2 2 8061
+11298 Aanat arylalkylamine N-acetyltransferase 11 1455
b
diff -r a1634a9c2ee1 -r d79ed3ec25fe test-data/out_rscript.txt
--- a/test-data/out_rscript.txt Thu Apr 19 17:26:38 2018 -0400
+++ b/test-data/out_rscript.txt Sun May 06 13:38:41 2018 -0400
[
@@ -306,9 +306,13 @@
 bcvOutPng <- makeOut("bcvplot.png")
 qlOutPdf <- makeOut("qlplot.pdf")
 qlOutPng <- makeOut("qlplot.png")
-mdsOutPdf <- makeOut("mdsplot.pdf")
-mdsOutPng <- makeOut("mdsplot.png")
-mdOutPdf <- character()   # Initialise character vector
+mdsOutPdf <- character()   # Initialise character vector
+mdsOutPng <- character()
+for (i in 1:ncol(factors)) {
+    mdsOutPdf[i] <- makeOut(paste0("mdsplot_", names(factors)[i], ".pdf"))
+    mdsOutPng[i] <- makeOut(paste0("mdsplot_", names(factors)[i], ".png"))
+}
+mdOutPdf <- character()
 mdOutPng <- character()
 topOut <- character()
 for (i in 1:length(contrastData)) {
@@ -338,7 +342,9 @@
 data <- list()
 data$counts <- counts
 if (haveAnno) {
-    data$genes <- geneanno
+  # order annotation by genes in counts (assumes gene ids are in 1st column of geneanno)
+  annoord <- geneanno[match(row.names(counts), geneanno[,1]), ]
+  data$genes <- annoord
 } else {
     data$genes <- data.frame(GeneID=row.names(counts))
 }
@@ -410,17 +416,41 @@
 
 # Plot MDS
 labels <- names(counts)
+
+# MDS plot
 png(mdsOutPng, width=600, height=600)
-# Currently only using a single factor
-plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main="MDS Plot")
-imageData[1, ] <- c("MDS Plot", "mdsplot.png")
+plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main=paste("MDS Plot:", names(factors)[1]))
+imgName <- paste0("MDS Plot_", names(factors)[1], ".png")
+imgAddr <- paste0("mdsplot_", names(factors)[1], ".png")
+imageData[1, ] <- c(imgName, imgAddr)
 invisible(dev.off())
 
 pdf(mdsOutPdf)
-plotMDS(data, labels=labels, cex=0.5)
-linkData[1, ] <- c("MDS Plot.pdf", "mdsplot.pdf")
+plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main=paste("MDS Plot:", names(factors)[1]))
+linkName <- paste0("MDS Plot_", names(factors)[1], ".pdf")
+linkAddr <- paste0("mdsplot_", names(factors)[1], ".pdf")
+linkData[1, ] <- c(linkName, linkAddr)
 invisible(dev.off())
 
+# If additional factors create additional MDS plots coloured by factor
+if (ncol(factors) > 1) {
+    for (i in 2:ncol(factors)) {
+        png(mdsOutPng[i], width=600, height=600)
+        plotMDS(data, labels=labels, col=as.numeric(factors[, i]), cex=0.8, main=paste("MDS Plot:", names(factors)[i]))
+        imgName <- paste0("MDS Plot_", names(factors)[i], ".png")
+        imgAddr <- paste0("mdsplot_", names(factors)[i], ".png")
+        imageData <- rbind(imageData, c(imgName, imgAddr))
+        invisible(dev.off())
+
+        pdf(mdsOutPdf[i])
+        plotMDS(data, labels=labels, col=as.numeric(factors[, i]), cex=0.8, main=paste("MDS Plot:", names(factors)[i]))
+        linkName <- paste0("MDS Plot_", names(factors)[i], ".pdf")
+        linkAddr <- paste0("mdsplot_", names(factors)[i], ".pdf")
+        linkData <- rbind(linkData, c(linkName, linkAddr))
+        invisible(dev.off())
+    }
+}
+
 # BCV Plot
 png(bcvOutPng, width=600, height=600)
 plotBCV(data, main="BCV Plot")
@@ -469,7 +499,7 @@
 if (wantNorm) { 
         normalisedCounts <- cpm(data, normalized.lib.sizes=TRUE, log=TRUE) 
         normalisedCounts <- data.frame(data$genes, normalisedCounts)
-        write.table (normalisedCounts, file=normOut, row.names=FALSE, sep="\t")
+        write.table (normalisedCounts, file=normOut, row.names=FALSE, sep="\t", quote=FALSE)
         linkData <- rbind(linkData, c("edgeR_normcounts.tsv", "edgeR_normcounts.tsv"))
 }
 
@@ -492,7 +522,7 @@
                                              
     # Write top expressions table
     top <- topTags(res, n=Inf, sort.by="PValue")
-    write.table(top, file=topOut[i], row.names=FALSE, sep="\t")
+    write.table(top, file=topOut[i], row.names=FALSE, sep="\t", quote=FALSE)
     
     linkName <- paste0("edgeR_", contrastData[i], ".tsv")
     linkAddr <- paste0("edgeR_", contrastData[i], ".tsv")
@@ -502,7 +532,7 @@
     pdf(mdOutPdf[i])
     limma::plotMD(res, status=status,
                                 main=paste("MD Plot:", unmake.names(contrastData[i])), 
-                                col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"),
+                                hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1),
                                 xlab="Average Expression", ylab="logFC")
     
     abline(h=0, col="grey", lty=2)
@@ -515,7 +545,7 @@
     png(mdOutPng[i], height=600, width=600)
     limma::plotMD(res, status=status,
                                 main=paste("MD Plot:", unmake.names(contrastData[i])), 
-                                col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"),
+                                hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1),
                                 xlab="Average Expression", ylab="logFC")
     
     abline(h=0, col="grey", lty=2)
@@ -620,7 +650,7 @@
 
 if (filtCPM || filtSmpCount || filtTotCount) {
     if (filtCPM) {
-    tempStr <- paste("Genes without more than", opt$cmpReq,
+    tempStr <- paste("Genes without more than", opt$cpmReq,
                                      "CPM in at least", opt$sampleReq, "samples are insignificant",
                                      "and filtered out.")
     } else if (filtSmpCount) {