changeset 14:3133e833b3ce draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/limma_voom commit c915bd7e68cb3a2944397aaf184c2b1db97cb3a5
author iuc
date Fri, 04 Jan 2019 04:11:56 -0500
parents d5a940112511
children 41573afd6871
files limma_voom.R limma_voom.xml test-data/factorinfo.txt
diffstat 3 files changed, 83 insertions(+), 54 deletions(-) [+]
line wrap: on
line diff
--- a/limma_voom.R	Sun Sep 30 10:51:29 2018 -0400
+++ b/limma_voom.R	Fri Jan 04 04:11:56 2019 -0500
@@ -305,6 +305,8 @@
     # Process factors
     if (is.null(opt$factInput)) {
             factorData <- read.table(opt$factFile, header=TRUE, sep="\t", strip.white=TRUE)
+            # order samples as in counts matrix
+            factorData <- factorData[match(colnames(counts), factorData[, 1]), ]
             factors <- factorData[, -1, drop=FALSE]
     }  else {
             factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE))
@@ -425,7 +427,7 @@
         keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq
     } else if (filtCPM) {
         myCPM <- cpm(data$counts)
-        thresh <- myCPM >= opt$cpmReq 
+        thresh <- myCPM >= opt$cpmReq
         keep <- rowSums(thresh) >= opt$sampleReq
 
         if ("c" %in% plots) {
@@ -643,6 +645,15 @@
 linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE))
 invisible(dev.off())
 
+# generate Glimma interactive MDS Plot
+if ("i" %in% plots) {
+    Glimma::glMDSPlot(y, labels=samplenames, groups=factors[, 1],
+        folder="glimma_MDS", launch=FALSE)
+    linkName <- "Glimma_MDSPlot.html"
+    linkAddr <- "glimma_MDS/MDS-Plot.html"
+    linkData <- rbind(linkData, c(linkName, linkAddr))
+}
+
 if ("x" %in% plots) {
     png(mdsxOutPng, width=1000, height=500)
     par(mfrow=c(1, 2))
@@ -700,23 +711,6 @@
     } else {
         fit <- eBayes(fit, trend=TRUE, robust=FALSE)
     }
-    # plot fit with plotSA
-    saOutPng <- makeOut("saplot.png")
-    saOutPdf <- makeOut("saplot.pdf")
-
-    png(saOutPng, width=500, height=500)
-    plotSA(fit, main="SA Plot")
-    imgName <- "SAPlot.png"
-    imgAddr <- "saplot.png"
-    imageData <- rbind(imageData, c(imgName, imgAddr))
-    invisible(dev.off())
-
-    pdf(saOutPdf, width=14)
-    plotSA(fit, main="SA Plot")
-    linkName <- "SAPlot.pdf"
-    linkAddr <- "saplot.pdf"
-    linkData <- rbind(linkData, c(linkName, linkAddr))
-    invisible(dev.off())
 
     plotData <- logCPM
 
@@ -727,22 +721,22 @@
     }
 } else {
     # limma-voom approach
-    voomOutPdf <- makeOut("voomplot.pdf")
-    voomOutPng <- makeOut("voomplot.png")
 
     if (wantWeight) {
+        voomWtsOutPdf <- makeOut("voomwtsplot.pdf")
+        voomWtsOutPng <- makeOut("voomwtsplot.png")
         # Creating voom data object and plot
-        png(voomOutPng, width=1000, height=500)
+        png(voomWtsOutPng, width=1000, height=500)
         vData <- voomWithQualityWeights(y, design=design, plot=TRUE)
-        imgName <- "VoomPlot.png"
-        imgAddr <- "voomplot.png"
+        imgName <- "VoomWithQualityWeightsPlot.png"
+        imgAddr <- "voomwtsplot.png"
         imageData <- rbind(imageData, c(imgName, imgAddr))
         invisible(dev.off())
 
-        pdf(voomOutPdf, width=14)
+        pdf(voomWtsOutPdf, width=14)
         vData <- voomWithQualityWeights(y, design=design, plot=TRUE)
-        linkName <- "VoomPlot.pdf"
-        linkAddr <- "voomplot.pdf"
+        linkName <- "VoomWithQualityWeightsPlot.pdf"
+        linkAddr <- "voomwtsplot.pdf"
         linkData <- rbind(linkData, c(linkName, linkAddr))
         invisible(dev.off())
 
@@ -751,6 +745,8 @@
         voomFit <- lmFit(vData, design, weights=wts)
 
     } else {
+        voomOutPdf <- makeOut("voomplot.pdf")
+        voomOutPng <- makeOut("voomplot.png")
         # Creating voom data object and plot
         png(voomOutPng, width=500, height=500)
         vData <- voom(y, design=design, plot=TRUE)
@@ -787,6 +783,24 @@
     plotData <- vData
 }
 
+# plot final model mean-variance trend with plotSA
+saOutPng <- makeOut("saplot.png")
+saOutPdf <- makeOut("saplot.pdf")
+
+png(saOutPng, width=500, height=500)
+plotSA(fit, main="Final model: Mean-variance trend (SA Plot)")
+imgName <- "SAPlot.png"
+imgAddr <- "saplot.png"
+imageData <- rbind(imageData, c(imgName, imgAddr))
+invisible(dev.off())
+
+pdf(saOutPdf)
+plotSA(fit, main="Final model: Mean-variance trend (SA Plot)")
+linkName <- "SAPlot.pdf"
+linkAddr <- "saplot.pdf"
+linkData <- rbind(linkData, c(linkName, linkAddr))
+invisible(dev.off())
+
  # Save library size info
 if (wantLibinfo) {
     efflibsize <- round(y$samples$lib.size * y$samples$norm.factors)
@@ -844,11 +858,13 @@
     linkData <- rbind(linkData, c(linkName, linkAddr))
     invisible(dev.off())
 
-    # Generate Glimma interactive MD plot and table, requires annotation file (assumes gene labels/symbols in 2nd column)
-    if (haveAnno) {
+    # Generate Glimma interactive Volcano, MD plot and tables, requires annotation file (assumes gene labels/symbols in 2nd column)
+    if ("i" %in% plots & haveAnno) {
         # make gene labels unique to handle NAs
         geneanno <- y$genes
         geneanno[, 2] <- make.unique(geneanno[, 2])
+
+        # MD plot
         Glimma::glMDPlot(fit, coef=i, counts=y$counts, anno=geneanno, groups=factors[, 1],
              status=status[, i], sample.cols=col.group,
              main=paste("MD Plot:", unmake.names(con)), side.main=colnames(y$genes)[2],
@@ -856,6 +872,16 @@
         linkName <- paste0("Glimma_MDPlot_", con, ".html")
         linkAddr <- paste0("glimma_", con, "/MD-Plot.html")
         linkData <- rbind(linkData, c(linkName, linkAddr))
+
+        # Volcano plot
+        Glimma::glXYPlot(x=fit$coefficients[, i], y=fit$lods[, i], counts=y$counts, anno=geneanno, groups=factors[, 1], 
+            status=status[, i], sample.cols=col.group,
+            main=paste("Volcano Plot:", unmake.names(con)), side.main=colnames(y$genes)[2],
+            xlab="logFC", ylab="logodds",
+            folder=paste0("glimma_volcano_", unmake.names(con)), launch=FALSE)
+        linkName <- paste0("Glimma_VolcanoPlot_", con, ".html")
+        linkAddr <- paste0("glimma_volcano_", con, "/XY-Plot.html")
+        linkData <- rbind(linkData, c(linkName, linkAddr))
     }
 
     # Plot Volcano
@@ -942,7 +968,7 @@
                 stripchart(plotData[topgenes[j], ] ~ factors[, 1], vertical=TRUE, las=2, pch=16, cex=0.8, cex.lab=0.8, col=cols,
                     method="jitter", ylab="Normalised log2 expression", main=paste0(labels[j], "\nlogFC=", lfc, ", adj.P.Val=", pval))
             } else {
-                stripchart(plotData$E[topgenes[j], ] ~ factors[, 1], vertical=TRUE, las=2, pch=16, cex=0.8, cex.lab=0.8, col=cols, 
+                stripchart(plotData$E[topgenes[j], ] ~ factors[, 1], vertical=TRUE, las=2, pch=16, cex=0.8, cex.lab=0.8, col=cols,
                     method="jitter", ylab="Normalised log2 expression", main=paste0(labels[j], "\nlogFC=", lfc, ", adj.P.Val=", pval))
             }
         }
@@ -990,9 +1016,7 @@
 cata("Links to PDF copies of plots are in 'Plots' section below <br />\n")
 
 for (i in 1:nrow(imageData)) {
-    if (grepl("density|box|mds|mdvol", imageData$Link[i])) {
-        HtmlImage(imageData$Link[i], imageData$Label[i], width=1000)
-    } else if (wantWeight) {
+    if (grepl("density|box|mds|mdvol|wts", imageData$Link[i])) {
         HtmlImage(imageData$Link[i], imageData$Label[i], width=1000)
     } else {
         HtmlImage(imageData$Link[i], imageData$Label[i])
@@ -1021,7 +1045,7 @@
 cata("<h4>Plots:</h4>\n")
 #PDFs
 for (i in 1:nrow(linkData)) {
-    if (grepl("density|cpm|boxplot|mds|mdplots|voomplot|saplot", linkData$Link[i])) {
+    if (grepl("density|cpm|boxplot|mds|mdplots|voom|saplot", linkData$Link[i])) {
         HtmlLink(linkData$Link[i], linkData$Label[i])
   }
 }
@@ -1068,12 +1092,14 @@
     }
 }
 
-cata("<h4>Glimma Interactive Results:</h4>\n")
-    for (i in 1:nrow(linkData)) {
-        if (grepl("glimma", linkData$Link[i])) {
-            HtmlLink(linkData$Link[i], linkData$Label[i])
+if ("i" %in% plots) {
+    cata("<h4>Glimma Interactive Results:</h4>\n")
+        for (i in 1:nrow(linkData)) {
+            if (grepl("glimma", linkData$Link[i])) {
+                HtmlLink(linkData$Link[i], linkData$Label[i])
+            }
         }
-    }
+}
 
 cata("<p>Alt-click links to download file.</p>\n")
 cata("<p>Click floppy disc icon associated history item to download ")
--- a/limma_voom.xml	Sun Sep 30 10:51:29 2018 -0400
+++ b/limma_voom.xml	Fri Jan 04 04:11:56 2019 -0500
@@ -1,4 +1,4 @@
-<tool id="limma_voom" name="limma" version="3.34.9.9">
+<tool id="limma_voom" name="limma" version="3.34.9.10">
     <description>
         Perform differential expression with limma-voom or limma-trend
     </description>
@@ -120,12 +120,12 @@
 mkdir ./output_dir
 
 &&
-#if $anno.annoOpt=='yes':
-    cp -r ./glimma* '$outReport.files_path' &&
+cp '$outReport.files_path'/*tsv output_dir/
+
+#if 'i' in str($out.plots).split( "," ):
+    && cp -r ./glimma* '$outReport.files_path'
 #end if
 
-cp '$outReport.files_path'/*tsv output_dir/
-
 #if $out.filtCounts or $out.normCounts:
     && cp '$outReport.files_path'/*counts output_dir/
 #end if
@@ -212,7 +212,7 @@
         <!-- Gene Annotations -->
         <conditional name="anno">
             <param name="annoOpt" type="select" label="Use Gene Annotations?"
-                    help="If you provide an annotation file, annotations will be added to the table(s) of differential expression results to provide descriptions for each gene, and used to label the top genes in the Volcano plot. An interactive Glimma MD plot and table will also be generated. See Help section below.">
+                    help="If you provide an annotation file, annotations will be added to the table(s) of differential expression results to provide descriptions for each gene, and used to label the top genes in the Volcano plot. Interactive Glimma Volcano and MD plots will also be generated. See Help section below.">
                 <option value="no">No</option>
                 <option value="yes">Yes</option>
             </param>
@@ -272,7 +272,8 @@
 
         <!-- Output Options -->
         <section name="out" expanded="false" title="Output Options">
-            <param name="plots" type="select" display="checkboxes" multiple="True" optional="True" label="Additional Plots" help="Select additional plots to output in the report and as PDFs">
+            <param name="plots" type="select" display="checkboxes" multiple="True" optional="True" label="Additional Plots" help="Select additional plots to output in the report">
+                <option value="i" selected="True">Glimma Interactive Plots</option>
                 <option value="d">Density Plots (if filtering)</option>
                 <option value="c">CpmsVsCounts Plots (if filtering on cpms)</option>
                 <option value="b">Box Plots (if normalising)</option>
@@ -384,6 +385,7 @@
             <output name="outReport" >
                 <assert_contents>
                     <has_text text="Limma Analysis Output" />
+                    <has_text text="Glimma Interactive Results" />
                     <not_has_text text="RData" />
                 </assert_contents>
             </output>
@@ -464,7 +466,7 @@
                 </element>
             </output_collection>
         </test>
-        <!-- Ensure factors file input works -->
+        <!-- Ensure factors file with unordered samples works -->
         <test>
             <param name="format" value="matrix" />
             <param name="ffile" value="yes" />
@@ -653,7 +655,7 @@
 
 **What it does**
 
-Given a matrix of counts (e.g. from featureCounts) and optional information about the genes, performs differential expression (DE) using the limma_ Bioconductor package and produces plots and tables useful in DE analysis. If an annotation file is provided, interactive Glimma_ plots and a table of differentially expressed genes will also be generated. See an example workflow here_.
+Given a matrix of counts (e.g. from featureCounts) and optional information about the genes, this tool performs differential expression (DE) using the limma_ Bioconductor package and produces plots and tables useful in DE analysis. Interactive Glimma_ plots and tables can also be generated and links to the Glimma plots will be provided in the report. See an example workflow here_.
 
 In the `limma approach`_ to RNA-seq, read counts are converted to log2-counts-per-million (logCPM) and the mean-variance relationship is modelled either with precision weights or with an empirical Bayes prior trend. The precision weights approach is called “voom” and the prior trend approach is called “limma-trend”. For more information, see the Help section below.
 
@@ -699,7 +701,7 @@
 **Gene Annotations:**
 Optional input for gene annotations, this can contain more
 information about the genes than just an ID number. The annotations will
-be available in the differential expression results table and the optional normalised counts table. They will also be used to generate interactive Glimma_ MD plots and table of differential expression, a link to the Glimma plots will be provided in the report. The input annotation file must contain a header row and have the gene IDs in the first column. The second column will be used to label the genes in the Volcano plot and interactive Glimma plots, additional columns will be available in the Glimma interactive table. The number of rows should match that of the counts files, add NA for any gene IDs with no annotation. The Galaxy tool **annotateMyIDs** can be used to obtain annotations for human, mouse, fly and zebrafish.
+be available in the differential expression results table and the optional normalised counts table. They will also be used to generate interactive Glimma_ Volcano, MD plots and tables of differential expression. The input annotation file must contain a header row and have the gene IDs in the first column. The second column will be used to label the genes in the Volcano plot and interactive Glimma plots, additional columns will be available in the Glimma interactive table. The number of rows should match that of the counts files, add NA for any gene IDs with no annotation. The Galaxy tool **annotateMyIDs** can be used to obtain annotations for human, mouse, fly and zebrafish.
 
 Example:
 
@@ -715,7 +717,7 @@
     ==========  ==========  ===================================================
 
 **Factor Information:**
-Enter factor names and groups in the tool form, or provide a tab-separated file that has the samples in the same order as listed in the columns of the counts matrix. The second column should contain the primary factor levels (e.g. WT, Mut) with optional additional columns for any secondary factors.
+Enter factor names and groups in the tool form, or provide a tab-separated file that has the names of the samples in the first column and one header row. The sample names must be the same as the names in the columns of the count matrix. The second column should contain the primary factor levels (e.g. WT, Mut) with optional additional columns for any secondary factors.
 
 Example:
 
@@ -730,7 +732,7 @@
     Mut3       Mut          b3
     ========== ============ =========
 
-*Factor Name:* The name of the experimental factor being investigated e.g. Genotype, Treatment. One factor must be entered and spaces must not be used. Optionally, additional factors can be included, these are variables that might influence your experiment e.g. Batch, Gender, Subject. If additional factors are entered, edgeR will fit an additive linear model.
+*Factor Name:* The name of the experimental factor being investigated e.g. Genotype, Treatment. One factor must be entered and spaces must not be used. Optionally, additional factors can be included, these are variables that might influence your experiment e.g. Batch, Gender, Subject. If additional factors are entered, an additive linear model will be used.
 
 *Groups:* The names of the groups for the factor. These must be entered in the same order as the samples (to which the groups correspond) are listed in the columns of the counts matrix. Spaces must not be used and if entered into the tool form above, the values should be separated by commas.
 
@@ -835,10 +837,11 @@
 
     * a table of differentially expressed genes for each contrast of interest
     * a HTML report with plots and additional information
-    * an interactive Glimma MD plot and table (if annotation file provided)
 
 Optionally, under **Output Options** you can choose to output
 
+    * interactive Glimma plots and tables: MDS plot, and (if annotation file is input) Volcano plot and MD plot (default: Yes)
+    * additional plots in the report and as PDFs
     * a normalised counts table
     * a library size information file
     * the R script used by this tool
@@ -899,7 +902,7 @@
 
 .. _limma: http://www.bioconductor.org/packages/release/bioc/html/limma.html
 .. _Glimma: https://bioconductor.org/packages/release/bioc/html/Glimma.html
-.. _here: https://f1000research.com/articles/5-1408/v2
+.. _here: https://f1000research.com/articles/5-1408/v3
 .. _limma approach: https://www.ncbi.nlm.nih.gov/pubmed/25605792
 .. _limma User's Guide: http://bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf
 .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
--- a/test-data/factorinfo.txt	Sun Sep 30 10:51:29 2018 -0400
+++ b/test-data/factorinfo.txt	Fri Jan 04 04:11:56 2019 -0500
@@ -1,7 +1,7 @@
 Sample	Genotype	Batch
-Mut1	Mut	b1
+WT3	WT	b3
 Mut2	Mut	b2
 Mut3	Mut	b3
 WT1	WT	b1
 WT2	WT	b2
-WT3	WT	b3
+Mut1	Mut	b1