# HG changeset patch # User iuc # Date 1499243982 14400 # Node ID 76d01fe0ec36bbf9e7bba01a7a89811e2b972a69 # Parent bdebdea5f6a79d5ff7d30b3f511db323673ba830 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/limma_voom commit 58c05c0ce9334f8b9c800283cfd1f40573546edd diff -r bdebdea5f6a7 -r 76d01fe0ec36 limma_voom.R --- a/limma_voom.R Mon Jun 12 07:41:02 2017 -0400 +++ b/limma_voom.R Wed Jul 05 04:39:42 2017 -0400 @@ -3,7 +3,7 @@ # expression analysis # # ARGS: 1.countPath -Path to RData input containing counts -# 2.annoPath -Path to RData input containing gene annotations +# 2.annoPath -Path to input containing gene annotations # 3.htmlPath -Path to html file linking to other outputs # 4.outPath -Path to folder to write all output to # 5.rdaOpt -String specifying if RData should be saved @@ -15,15 +15,18 @@ # 11.pAdjOpt -String specifying the p-value adjustment method # 12.pValReq -Float specifying the p-value requirement # 13.lfcReq -Float specifying the log-fold-change requirement -# 14.factorData -String containing factor names and values +# 14.normCounts -String specifying if normalised counts should be output +# 15.factPath -Path to factor information file +# 16.factorData -Strings containing factor names and values if manually input # # OUT: Voom Plot # BCV Plot # MA Plot -# Top Expression Table +# Expression Table # HTML file linking to the ouputs # # Author: Shian Su - registertonysu@gmail.com - Jan 2014 +# Modified by: Maria Doyle - Jun 2017 # Record starting time timeStart <- as.character(Sys.time()) @@ -148,13 +151,31 @@ pAdjOpt <- as.character(argv[11]) pValReq <- as.numeric(argv[12]) lfcReq <- as.numeric(argv[13]) -factorData <- list() -for (i in 14:length(argv)) { - newFact <- unlist(strsplit(as.character(argv[i]), split="::")) - factorData <- rbind(factorData, newFact) -} # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... +normCounts <- as.character(argv[14]) +factPath <- as.character(argv[15]) +# Process factors +if (as.character(argv[16])=="None") { + factorData <- read.table(factPath, header=TRUE, sep="\t") + factors <- factorData[,-1] +} else { + factorData <- list() + for (i in 16:length(argv)) { + newFact <- unlist(strsplit(as.character(argv[i]), split="::")) + factorData <- rbind(factorData, newFact) + } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor. -# Process arguments + # Set the row names to be the name of the factor and delete first row + row.names(factorData) <- factorData[, 1] + factorData <- factorData[, -1] + factorData <- sapply(factorData, sanitiseGroups) + factorData <- sapply(factorData, strsplit, split=",") + factorData <- sapply(factorData, make.names) + # Transform factor data into data frame of R factor objects + factors <- data.frame(factorData) + +} + +# Process other arguments if (weightOpt=="yes") { wantWeight <- TRUE } else { @@ -173,15 +194,12 @@ haveAnno <- TRUE } -# Set the row names to be the name of the factor and delete first row -row.names(factorData) <- factorData[, 1] -factorData <- factorData[, -1] -factorData <- sapply(factorData, sanitiseGroups) -factorData <- sapply(factorData, strsplit, split=",") -factorData <- sapply(factorData, make.names) +if (normCounts=="yes") { + wantNorm <- TRUE +} else { + wantNorm <- FALSE +} -# Transform factor data into data frame of R factor objects -factors <- data.frame(factorData) #Create output directory dir.create(outPath, showWarnings=FALSE) @@ -205,6 +223,7 @@ maOutPng[i] <- makeOut(paste0("maplot_", contrastData[i], ".png")) topOut[i] <- makeOut(paste0("limma-voom_", contrastData[i], ".tsv")) } # Save output paths for each contrast as vectors +normOut <- makeOut("limma-voom_normcounts.tsv") rdaOut <- makeOut("RData.rda") sessionOut <- makeOut("session_info.txt") @@ -221,12 +240,12 @@ flatCount <- numeric() # Read in counts and geneanno data -counts <- read.table(countPath, header=TRUE, sep="\t") +counts <- read.table(countPath, header=TRUE, sep="\t", stringsAsFactors=FALSE) row.names(counts) <- counts[, 1] counts <- counts[ , -1] countsRows <- nrow(counts) if (haveAnno) { - geneanno <- read.table(annoPath, header=TRUE, sep="\t") + geneanno <- read.table(annoPath, header=TRUE, sep="\t", stringsAsFactors=FALSE) } ################################################################################ @@ -247,7 +266,7 @@ preFilterCount <- nrow(data$counts) sel <- rowSums(cpm(data$counts) > cpmReq) >= sampleReq data$counts <- data$counts[sel, ] -data$genes <- data$genes[sel, ] +data$genes <- data$genes[sel, ,drop = FALSE] postFilterCount <- nrow(data$counts) filteredCount <- preFilterCount-postFilterCount @@ -255,6 +274,7 @@ samplenames <- colnames(data$counts) sampleanno <- data.frame("sampleID"=samplenames, factors) + # Generating the DGEList object "data" data$samples <- sampleanno data$samples$lib.size <- colSums(data$counts) @@ -263,14 +283,17 @@ data <- new("DGEList", data) factorList <- sapply(names(factors), pasteListName) -formula <- "~0" + +formula <- "~0" for (i in 1:length(factorList)) { - formula <- paste(formula, factorList[i], sep="+") + formula <- paste(formula,factorList[i], sep="+") } + formula <- formula(formula) design <- model.matrix(formula) + for (i in 1:length(factorList)) { - colnames(design) <- gsub(factorList[i], "", colnames(design), fixed=TRUE) + colnames(design) <- gsub(factorList[i], "", colnames(design), fixed=TRUE) } # Calculating normalising factor, estimating dispersion @@ -330,6 +353,13 @@ } + # Save normalised counts (log2cpm) +if (wantNorm) { + norm_counts <- data.frame(vData$genes, vData$E) + write.table (norm_counts, file=normOut, row.names=FALSE, sep="\t") + linkData <- rbind(linkData, c("limma-voom_normcounts.tsv", "limma-voom_normcounts.tsv")) +} + # Fit linear model and estimate dispersion with eBayes voomFit <- contrasts.fit(voomFit, contrasts) voomFit <- eBayes(voomFit) @@ -368,12 +398,11 @@ top <- topTable(voomFit, coef=i, number=Inf, sort.by="P") write.table(top, file=topOut[i], row.names=FALSE, sep="\t") - linkName <- paste0("limma-voom_", contrastData[i], - ".tsv") + linkName <- paste0("limma-voom_", contrastData[i], ".tsv") linkAddr <- paste0("limma-voom_", contrastData[i], ".tsv") linkData <- rbind(linkData, c(linkName, linkAddr)) - # Plot MA (log ratios vs mean average) using limma package on weighted data + # Plot MA (log ratios vs mean average) using limma package on weighted pdf(maOutPdf[i]) limma::plotMA(voomFit, status=status, coef=i, main=paste("MA Plot:", unmake.names(contrastData[i])), @@ -534,12 +563,14 @@ cata("

Summary of experimental data:

\n") -cata("

*CHECK THAT SAMPLES ARE ASSOCIATED WITH CORRECT GROUP*

\n") +cata("

*CHECK THAT SAMPLES ARE ASSOCIATED WITH CORRECT GROUP(S)*

\n") cata("\n") cata("\n") -TableItem() -for (i in names(factors)) { +TableHeadItem("SampleID") +TableHeadItem(names(factors)[1]," (Primary Factor)") + +for (i in names(factors)[2:length(names(factors))]) { TableHeadItem(i) } cata("\n") @@ -547,7 +578,7 @@ for (i in 1:nrow(factors)) { cata("\n") TableHeadItem(row.names(factors)[i]) - for (j in ncol(factors)) { + for (j in 1:ncol(factors)) { TableItem(as.character(unmake.names(factors[i, j]))) } cata("\n") diff -r bdebdea5f6a7 -r 76d01fe0ec36 limma_voom.xml --- a/limma_voom.xml Mon Jun 12 07:41:02 2017 -0400 +++ b/limma_voom.xml Wed Jul 05 04:39:42 2017 -0400 @@ -1,8 +1,8 @@ - + Differential expression with optional sample weights - + bioconductor-edger bioconductor-limma @@ -10,66 +10,71 @@ r-scales - - /dev/null | grep -v -i "WARNING: ")", edgeR version" $(R --vanilla --slave -e "library(edgeR); cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null | grep -v -i "WARNING: ") - ]]> - - - - + + - - +&& +mkdir ./output_dir + +&& +mv '$outReport.files_path'/*.tsv output_dir/ + ]]> + - + - + @@ -79,22 +84,45 @@ - - - - - - + + + + + + + + + + + + ^[\w]+$ + + + + ^[\w,]+$ + + + + + ^[\w]+$ + + + + ^[\w,]+$ + + + + + + + + ^[\w,-]+$ + + @@ -103,17 +131,16 @@ - - - + + + help="Apply weights if outliers are present."> - + @@ -121,39 +148,44 @@ - + + + + help="Output all the data used by R to construct the plots and tables, can be loaded into R. A link to the RData file will be provided in the HTML report."> - + - + - + - - + + - + - - + + @@ -161,38 +193,38 @@ - - + + - + - + - + - - + + - + - + - - - + + + @@ -201,16 +233,51 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - + ]]> 10.1093/nar/gkv412 diff -r bdebdea5f6a7 -r 76d01fe0ec36 test-data/factorinfo.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/factorinfo.txt Wed Jul 05 04:39:42 2017 -0400 @@ -0,0 +1,7 @@ +Sample Genotype Batch +WT1 WT b1 +WT2 WT b2 +WT3 WT b3 +Mut1 Mut b1 +Mut2 Mut b2 +Mut3 Mut b3 diff -r bdebdea5f6a7 -r 76d01fe0ec36 test-data/limma-voom_Mut-WT.tsv --- a/test-data/limma-voom_Mut-WT.tsv Mon Jun 12 07:41:02 2017 -0400 +++ b/test-data/limma-voom_Mut-WT.tsv Wed Jul 05 04:39:42 2017 -0400 @@ -1,4 +1,4 @@ -"ID" "logFC" "AveExpr" "t" "P.Value" "adj.P.Val" "B" +"GeneID" "logFC" "AveExpr" "t" "P.Value" "adj.P.Val" "B" "11304" 0.457332061341026 15.5254133001226 6.50459574633681 9.98720685006039e-07 5.99232411003624e-06 14.0741948485896 "11287" 0.190749727701785 17.6546448244617 5.09535410066402 3.26518807654125e-05 9.79556422962375e-05 5.46773893802392 "11298" -0.138014418336201 17.6747285193431 -3.33168485842331 0.00278753263633162 0.00557506527266324 -1.84301342041449 diff -r bdebdea5f6a7 -r 76d01fe0ec36 test-data/limma-voom_Mut-WTmultifact.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/limma-voom_Mut-WTmultifact.tsv Wed Jul 05 04:39:42 2017 -0400 @@ -0,0 +1,7 @@ +"GeneID" "logFC" "AveExpr" "t" "P.Value" "adj.P.Val" "B" +"11287" 0.190521683937291 17.6546448244617 14.4538942795625 5.93483727834149e-09 3.56090236700489e-08 96.5864491178815 +"11298" -0.139877413200757 17.6747285193431 -7.71901694642401 5.4113978311412e-06 1.62341934934236e-05 22.3495333961486 +"11304" 0.459011254726061 15.5254133001226 5.64083198409048 0.000108849060923731 0.000217698121847462 8.76657226635859 +"11303" -0.0641599901594212 17.886791401216 -2.99008181539252 0.0112725655419959 0.0169088483129939 -2.70089035288952 +"11305" -0.0651479753016204 18.1585474109909 -2.28935282063873 0.0409794711446019 0.0491753653735222 -4.27133526604488 +"11302" -0.035881713464434 9.78883119065989 -0.439436789626921 0.668154169055758 0.668154169055758 -5.75838315483926 diff -r bdebdea5f6a7 -r 76d01fe0ec36 test-data/limma-voom_WT-Mut.tsv --- a/test-data/limma-voom_WT-Mut.tsv Mon Jun 12 07:41:02 2017 -0400 +++ b/test-data/limma-voom_WT-Mut.tsv Wed Jul 05 04:39:42 2017 -0400 @@ -1,4 +1,4 @@ -"ID" "logFC" "AveExpr" "t" "P.Value" "adj.P.Val" "B" +"GeneID" "logFC" "AveExpr" "t" "P.Value" "adj.P.Val" "B" "11304" -0.457332061341026 15.5254133001226 -6.50459574633681 9.98720685006039e-07 5.99232411003624e-06 14.0741948485896 "11287" -0.190749727701785 17.6546448244617 -5.09535410066402 3.26518807654125e-05 9.79556422962375e-05 5.46773893802392 "11298" 0.138014418336201 17.6747285193431 3.33168485842331 0.00278753263633162 0.00557506527266324 -1.84301342041449 diff -r bdebdea5f6a7 -r 76d01fe0ec36 test-data/limma-voom_normcounts.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/limma-voom_normcounts.tsv Wed Jul 05 04:39:42 2017 -0400 @@ -0,0 +1,7 @@ +"GeneID" "WT1" "WT2" "WT3" "Mut1" "Mut2" "Mut3" +"11287" 17.6076545522668 17.5079905058358 17.5639197719462 17.7719335903598 17.7105244453357 17.7658460810258 +"11298" 17.7727137985373 17.6986875511432 17.7598828784201 17.6506532355915 17.5520012519588 17.6144324004081 +"11302" 9.57719962386619 10.0175525101992 9.82560228478005 9.71615817858834 9.91760904198188 9.67886550454371 +"11303" 17.9125899785601 17.8773613214092 17.955228759658 17.8774046020864 17.7865502833631 17.911613462219 +"11304" 15.354518172169 15.2178020484983 15.3174553811097 15.7545783969022 15.8519803740125 15.6561454280436 +"11305" 18.1808259688524 18.1818679259847 18.2026681768366 18.0401341021246 18.1408682071359 18.204920085011