Previous changeset 10:86292c2b0ba9 (2013-05-14) Next changeset 12:d49d06d35683 (2013-08-21) |
Commit message:
Uploaded |
modified:
edgeR.pl |
b |
diff -r 86292c2b0ba9 -r e5fcbabbdea7 edgeR.pl --- a/edgeR.pl Tue May 14 21:40:04 2013 -0400 +++ b/edgeR.pl Wed Aug 21 22:13:03 2013 -0400 |
[ |
b'@@ -8,9 +8,9 @@\n $| = 1;\n \n # Grab and set all options\n-my %OPTIONS = (a => "glm", d => "tag", f => "BH", p => 0.3, r => 5, u => "movingave");\n+my %OPTIONS = (a => "glm", d => "tag", f => "BH", r => 5, u => "movingave");\n \n-getopts(\'a:d:e:f:h:lmn:o:p:r:tu:\', \\%OPTIONS);\n+getopts(\'a:d:e:f:h:lmn:o:r:tu:\', \\%OPTIONS);\n \n die qq(\n Usage: edgeR.pl [OPTIONS] factor::factor1::levels [factor::factor2::levels ...] cp::cont_pred1::values [cp::cont_pred2::values ...] cnt::contrast1 [cnt::contrast2] matrix\n@@ -24,7 +24,6 @@\n \t\t\t-m\t\tPerform all pairwise comparisons\n \t\t\t-n\tSTR\tFile name to output the normalised digital gene expression matrix (only applicable when usinf glm or limma model)\n \t\t\t-o\tSTR\tFile name to output csv file with results\n-\t\t\t-p\tFLT\tThe proportion of all tags/genes to be used for the locally weighted estimation of the tagwise dispersion, ony applicable when 1 factor analysis selected (default: $OPTIONS{p})\n \t\t\t-r\tINT\tCommon Dispersion Rowsum Filter, ony applicable when 1 factor analysis selected (default: $OPTIONS{r})\n \t\t\t-t\t\tEstimate Tagwise Disp when performing 1 factor analysis\n \t\t\t-u\tSTR\tMethod for allowing the prior distribution for the dispersion to be abundance- dependent ["movingave", "tricube", "none"] (default: $OPTIONS{u})\n@@ -34,46 +33,46 @@\n my $matrix = pop @ARGV;\n \n make_path($OPTIONS{e});\n-open(Rcmd,">$OPTIONS{e}/r_script.R") or die "Cannot open $OPTIONS{e}/r_script.R\\n\\n"; \n+open(Rcmd,">$OPTIONS{e}/r_script.R") or die "Cannot open $OPTIONS{e}/r_script.R\\n\\n";\n print Rcmd "\n-\tzz <- file(\\"$OPTIONS{e}/r_script.err\\", open=\\"wt\\")\n-\tsink(zz)\n-\tsink(zz, type=\\"message\\")\n-\t\n-\tlibrary(edgeR)\n-\tlibrary(limma)\n+zz <- file(\\"$OPTIONS{e}/r_script.err\\", open=\\"wt\\")\n+sink(zz)\n+sink(zz, type=\\"message\\")\n+\n+library(edgeR)\n+library(limma)\n \n-\t# read in matrix and groups\n-\ttoc <- read.table(\\"$matrix\\", sep=\\"\\\\t\\", comment=\\"\\", as.is=T)\n-\tgroups <- sapply(toc[1, -1], strsplit, \\":\\")\n-\tfor(i in 1:length(groups)) { g <- make.names(groups[[i]][2]); names(groups)[i] <- g; groups[[i]] <- groups[[i]][-2] }\n-\tcolnames(toc) <- make.names(toc[2,])\n-\ttoc[,1] <- gsub(\\",\\", \\".\\", toc[,1])\n-\ttagnames <- toc[-(1:2), 1]\n-\trownames(toc) <- toc[,1]\n-\ttoc <- toc[-(1:2), -1]\n-\tfor(i in colnames(toc)) toc[, i] <- as.numeric(toc[,i])\n-\tnorm_factors <- calcNormFactors(as.matrix(toc))\n+# read in matrix and groups\n+toc <- read.table(\\"$matrix\\", sep=\\"\\\\t\\", comment=\\"\\", as.is=T)\n+groups <- sapply(toc[1, -1], strsplit, \\":\\")\n+for(i in 1:length(groups)) { g <- make.names(groups[[i]][2]); names(groups)[i] <- g; groups[[i]] <- groups[[i]][-2] }\n+colnames(toc) <- make.names(toc[2,])\n+toc[,1] <- gsub(\\",\\", \\".\\", toc[,1])\n+tagnames <- toc[-(1:2), 1]\n+rownames(toc) <- toc[,1]\n+toc <- toc[-(1:2), -1]\n+for(i in colnames(toc)) toc[, i] <- as.numeric(toc[,i])\n+norm_factors <- calcNormFactors(as.matrix(toc))\n \n-\tpw_tests <- list()\n-\tuniq_groups <- unique(names(groups))\n-\tfor(i in 1:(length(uniq_groups)-1)) for(j in (i+1):length(uniq_groups)) pw_tests[[length(pw_tests)+1]] <- c(uniq_groups[i], uniq_groups[j])\n-\tDGE <- DGEList(toc, lib.size=norm_factors*colSums(toc), group=names(groups))\n-\tpdf(\\"$OPTIONS{e}/MA_plots_normalisation.pdf\\", width=14)\n-\tfor(i in 1:length(pw_tests)) {\n-\t\tj <- c(which(names(groups) == pw_tests[[i]][1])[1], which(names(groups) == pw_tests[[i]][2])[1])\n-\t\tpar(mfrow = c(1, 2))\n-\t\tmaPlot(toc[, j[1]], toc[, j[2]], normalize = TRUE, pch = 19, cex = 0.2, ylim = c(-10, 10), main=paste(\\"MA Plot\\", colnames(toc)[j[1]], \\"vs\\", colnames(toc)[j[2]]))\n-\t\tgrid(col = \\"blue\\")\n-\t\tabline(h = log2(norm_factors[j[2]]), col = \\"red\\", lwd = 4)\n-\t\tmaPlot(DGE\\$counts[, j[1]]/DGE\\$samples\\$lib.size[j[1]], DGE\\$counts[, j[2]]/DGE\\$samples\\$lib.size[j[2]], normalize = FALSE, pch = 19, cex = 0.2, ylim = c(-8, 8), main=paste(\\"MA Plot\\", colnames(toc)[j[1]], \\"vs\\", colnames(toc)[j[2]], \\"Normalised\\"))\n-\t\tgrid(col = \\"blue\\")\n-\t}\n-\tdev.off()\n-\tpdf(file=\\"$OPTIONS{e}/MDSplot.pdf\\")\n-\tplotMDS(DGE, main=\\"MDS Plot\\", col=as.numeric(factor(nam'..b'ONS{n}\\", quote=F, sep=\\"\\\\t\\", row.names=F)\n-\t\t";\n-\t}\t\t\n+write.table(tab, \\"$OPTIONS{n}\\", quote=F, sep=\\"\\\\t\\", row.names=F)\n+";\n+\t}\n \tif(@add_cont) {\n \t\t$all_cont = "\\"".join("\\", \\"", @add_cont)."\\"";\n \t\tprint Rcmd "\n-\t\t\tcont <- c(${all_cont})\n-\t\t\tfor(i in uniq_groups) cont <- gsub(paste(groups[[i]], \\"([^0-9])\\", sep=\\"\\"), paste(i, \\"\\\\\\\\1\\", sep=\\"\\"), cont)\n-\t\t\tfor(i in uniq_groups) cont <- gsub(paste(groups[[i]], \\"\\$\\", sep=\\"\\"), i, cont)\n-\t\t";\n+cont <- c(${all_cont})\n+for(i in uniq_groups) cont <- gsub(paste(groups[[i]], \\"([^0-9])\\", sep=\\"\\"), paste(i, \\"\\\\\\\\1\\", sep=\\"\\"), cont)\n+for(i in uniq_groups) cont <- gsub(paste(groups[[i]], \\"\\$\\", sep=\\"\\"), i, cont)\n+";\n \t} else {\n \t\tprint Rcmd "\n-\t\t\tcont <- NULL\n-\t\t";\n+cont <- NULL\n+";\n \t}\n \tif(defined $OPTIONS{m}) {\n \t\tprint Rcmd "\n-\t\t\tfor(i in 1:length(pw_tests)) cont <- c(cont, paste(pw_tests[[i]][2], \\"-\\", pw_tests[[i]][1], sep=\\"\\"))\n-\t\t";\n+for(i in 1:length(pw_tests)) cont <- c(cont, paste(pw_tests[[i]][2], \\"-\\", pw_tests[[i]][1], sep=\\"\\"))\n+";\n \t}\n \tif(!defined $OPTIONS{m} && !@add_cont){\n \t\tdie("No Contrasts have been specified, you must at least either select multiple pairwise comparisons or specify a custom contrast\\n");\n \t}\n \tprint Rcmd "\n-\t\tcont <- makeContrasts(contrasts=cont, levels=design)\n-\t\tfit2 <- contrasts.fit(fit, cont)\n-\t\tfit2 <- eBayes(fit2)\n-\t";\n+cont <- makeContrasts(contrasts=cont, levels=design)\n+fit2 <- contrasts.fit(fit, cont)\n+fit2 <- eBayes(fit2)\n+";\n } else {\n \tdie("Anaysis type $OPTIONS{a} not found\\n");\n-\t\n+\n }\n \n if($OPTIONS{a} ne "limma") {\n \tprint Rcmd "\n-\t\toptions(digits = 6)\n-\t\ttab <- NULL\n-\t\tfor(i in names(tested)) {\n-\t\t\ttab_tmp <- topTags(tested[[i]], n=Inf, adjust.method=\\"$OPTIONS{f}\\")[[1]]\n-\t\t\tcolnames(tab_tmp) <- paste(i, colnames(tab_tmp), sep=\\":\\")\n-\t\t\ttab_tmp <- tab_tmp[tagnames,]\n-\t\t\tif(is.null(tab)) {\n-\t\t\t\ttab <- tab_tmp\n-\t\t\t} else tab <- cbind(tab, tab_tmp)\n-\t\t}\n-\t\ttab <- cbind(Feature=rownames(tab), tab)\n-\t";\n+options(digits = 6)\n+tab <- NULL\n+for(i in names(tested)) {\n+\ttab_tmp <- topTags(tested[[i]], n=Inf, adjust.method=\\"$OPTIONS{f}\\")[[1]]\n+\tcolnames(tab_tmp) <- paste(i, colnames(tab_tmp), sep=\\":\\")\n+\ttab_tmp <- tab_tmp[tagnames,]\n+\tif(is.null(tab)) {\n+\t\ttab <- tab_tmp\n+\t} else tab <- cbind(tab, tab_tmp)\n+}\n+tab <- cbind(Feature=rownames(tab), tab)\n+";\n } else {\n \tprint Rcmd "\n-\t\ttab <- NULL\n-\t\toptions(digits = 6)\n-\t\tfor(i in colnames(fit2)) {\n-\t\t\ttab_tmp <- topTable(fit2, coef=i, n=Inf, sort.by=\\"none\\", adjust.method=\\"$OPTIONS{f}\\")\n-\t\t\tcolnames(tab_tmp)[-1] <- paste(i, colnames(tab_tmp)[-1], sep=\\":\\")\n-\t\t\tif(is.null(tab)) {\n-\t\t\t\ttab <- tab_tmp\n-\t\t\t} else tab <- cbind(tab, tab_tmp[,-1])\n-\t\t}\n-\t";\n+tab <- NULL\n+options(digits = 6)\n+for(i in colnames(fit2)) {\n+\ttab_tmp <- topTable(fit2, coef=i, n=Inf, sort.by=\\"none\\", adjust.method=\\"$OPTIONS{f}\\")\n+\tcolnames(tab_tmp)[-1] <- paste(i, colnames(tab_tmp)[-1], sep=\\":\\")\n+\tif(is.null(tab)) {\n+\t\ttab <- tab_tmp\n+\t} else tab <- cbind(tab, tab_tmp[,-1])\n+}\n+";\n }\n print Rcmd "\n-\twrite.table(tab, \\"$OPTIONS{o}\\", quote=F, sep=\\"\\\\t\\", row.names=F)\n-\tsink(type=\\"message\\")\n-\tsink()\n+write.table(tab, \\"$OPTIONS{o}\\", quote=F, sep=\\"\\\\t\\", row.names=F)\n+sink(type=\\"message\\")\n+sink()\n ";\n close(Rcmd);\n system("R --no-restore --no-save --no-readline < $OPTIONS{e}/r_script.R > $OPTIONS{e}/r_script.out");\n@@ -352,6 +376,9 @@\n \tprint HTML "<li><a href=Smear_Plots.pdf>Smear_Plots.pdf</a></li>\\n";\n } elsif($OPTIONS{a} eq "glm" && $OPTIONS{d} eq "tag") {\n \tprint HTML "<li><a href=Tagwise_Dispersion_vs_Abundance.pdf>Tagwise_Dispersion_vs_Abundance.pdf</a></li>\\n";\n+\tprint HTML "<li><a href=Smear_Plots.pdf>Smear_Plots.pdf</a></li>\\n";\n+} elsif($OPTIONS{a} eq "glm" && ($OPTIONS{d} eq "trend" || $OPTIONS{d} eq "common")) {\n+\tprint HTML "<li><a href=Smear_Plots.pdf>Smear_Plots.pdf</a></li>\\n";\n } elsif($OPTIONS{a} eq "limma") {\n \tprint HTML "<li><a href=LIMMA_MDS_plot.pdf>LIMMA_MDS_plot.pdf</a></li>\\n";\n \tprint HTML "<li><a href=LIMMA_voom.pdf>LIMMA_voom.pdf</a></li>\\n";\n' |