ruvseq: ruvseq.R comparison

comparison ruvseq.R @ 2:fed9d0350d72 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ruvseq commit 4daa375d022673d2437d609b1865b78c64b04415"

author	iuc
date	Fri, 15 Jan 2021 17:53:15 +0000
parents	c24765926774
children	3a083c78896e

comparison

equal deleted inserted replaced

-:c24765926774
+:fed9d0350d72
 # setup R error handling to go to stderr
 library("getopt")
-options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+options(show.error.messages = F, error = function() {
+cat(geterrmessage(), file = stderr()); q("no", 1, F)
+})
 options(stringAsFactors = FALSE, useFancyQuotes = FALSE)
 setup_cmdline_options <- function() {
 args <- commandArgs(trailingOnly = TRUE)
 spec <- matrix(c(
 "alpha", "a", 1, "double",
 "min_mean_count", "min_c", 1, "double",
 "min_k", "min_k", 1, "double",
 "max_k", "max_k", 1, "double",
 "sample_json", "s", 1, "character",
-"plots" , "p", 1, "character",
+"plots", "p", 1, "character",
 "header", "H", 0, "logical",
 "txtype", "y", 1, "character",
 "tx2gene", "x", 1, "character"), # a space-sep tx-to-gene map or GTF file (auto detect .gtf/.GTF)
-byrow=TRUE, ncol=4)
+byrow = TRUE, ncol = 4)
 opt <- getopt(spec)
 # if help was asked for print a friendly message
 # and exit with a non-zero error code
 if (!is.null(opt$help)) {
-cat(getopt(spec, usage=TRUE))
+cat(getopt(spec, usage = TRUE))
-q(status=1)
+q(status = 1)
 } else {
 load_libraries()
 }
 return(opt)
 }
 library("tximport")
 library("DESeq2")
 library("ggrepel")
 }
-source_local <- function(fname){
+source_local <- function(fname) {
 argv <- commandArgs(trailingOnly = FALSE)
 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
-source(paste(base_dir, fname, sep="/"))
+source(paste(base_dir, fname, sep = "/"))
 }
 # Source get_deseq_dataset.R for getting deseq dataset from htseq/featurecounts/tximport
-source_local('get_deseq_dataset.R')
+source_local("get_deseq_dataset.R")
 # RUVseq function definitions
-plot_pca_rle <- function (set, title) {
+plot_pca_rle <- function(set, title) {
-x <- pData(set)[,1]
+x <- pData(set)[, 1]
 colors <- brewer.pal(3, "Set2")
-label <- paste0(' for ', title)
+label <- paste0(" for ", title)
-plotRLE(set, outline=FALSE, ylim=c(-4, 4), col=colors[x])
+plotRLE(set, outline = FALSE, ylim = c(-4, 4), col = colors[x])
-title(main=paste0("RLE", label))
+title(main = paste0("RLE", label))
-plotPCA(set, col=colors[x], cex=1.2)
+plotPCA(set, col = colors[x], cex = 1.2)
-title(main=paste0("PCA", label))
+title(main = paste0("PCA", label))
 }
-plot_factors_of_unwanted_variation <- function(set, method, k){
+plot_factors_of_unwanted_var <- function(set, method, k) {
 pd <- pData(set)
-pd['sample'] <- row.names(pd)
+pd["sample"] <- row.names(pd)
-colnames(pd)[1] <- 'condition'
+colnames(pd)[1] <- "condition"
-d = melt(pd, id.vars = c('sample', 'condition'))
+d <- melt(pd, id.vars = c("sample", "condition"))
-d['x'] <- 1  # There is no information on the X, so we just fake it to be able to do a scatterplot
+d["x"] <- 1  # There is no information on the X, so we just fake it to be able to do a scatterplot
-print(ggplot(d, aes(x=x, y=value, color=condition, label=sample)) +
+print(ggplot(d, aes(x = x, y = value, color = condition, label = sample)) +
 geom_point() +
-ggtitle(paste0('Factors of unwanted variation for method: ', method, ", k=", k)) +
+ggtitle(paste0("Factors of unwanted variation for method: ", method, ", k=", k)) +
-facet_wrap( ~ variable, scales = "free_x") +
+facet_wrap(~ variable, scales = "free_x") +
 geom_text_repel() +
-theme(axis.title.x=element_blank(),
+theme(axis.title.x = element_blank(),
-axis.text.x=element_blank(),
+axis.text.x = element_blank(),
-axis.ticks.x=element_blank(),
+axis.ticks.x = element_blank(),
 plot.title = element_text(hjust = 0.5))
 )
 }
-create_seq_expression_set <- function (dds, min_mean_count) {
+create_seq_expression_set <- function(dds, min_mean_count) {
 count_values <- counts(dds)
-print(paste0("feature count before filtering :",nrow(count_values),"\n"))
+print(paste0("feature count before filtering :", nrow(count_values), "\n"))
 print(paste0("Filtering features which mean expression is less or eq. than ", min_mean_count, " counts\n"))
 filter <- apply(count_values, 1, function(x) mean(x) > min_mean_count)
-filtered <- count_values[filter,]
+filtered <- count_values[filter, ]
-print(paste0("feature count after filtering :",nrow(filtered),"\n"))
+print(paste0("feature count after filtering :", nrow(filtered), "\n"))
-set = newSeqExpressionSet(as.matrix(filtered),
+set <- newSeqExpressionSet(as.matrix(filtered),
-phenoData = data.frame(colData(dds)$condition, row.names=colnames(filtered)))
+phenoData = data.frame(colData(dds)$condition, row.names = colnames(filtered)))
-plot_pca_rle(set = set, title = 'raw data')
+plot_pca_rle(set = set, title = "raw data")
-set <- betweenLaneNormalization(set, which="upper")
+set <- betweenLaneNormalization(set, which = "upper")
-plot_pca_rle(set = set, title = 'upper quartile normalized')
+plot_pca_rle(set = set, title = "upper quartile normalized")
 return(set)
 }
 get_empirical_control_genes <- function(set, cutoff_p) {
-x <- pData(set)[,1]
+x <- pData(set)[, 1]
-design <- model.matrix(~x, data=pData(set))
+design <- model.matrix(~x, data = pData(set))
-y <- DGEList(counts=counts(set), group=x)
+y <- DGEList(counts = counts(set), group = x)
-y <- calcNormFactors(y, method="upperquartile")
+y <- calcNormFactors(y, method = "upperquartile")
 y <- estimateGLMCommonDisp(y, design)
 y <- estimateGLMTagwiseDisp(y, design)
 fit <- glmFit(y, design)
-lrt <- glmLRT(fit, coef=2)
+lrt <- glmLRT(fit, coef = 2)
-top <- topTags(lrt, n=nrow(set))$table
+top <- topTags(lrt, n = nrow(set))$table
 top_rows <- rownames(top)[which(top$PValue < cutoff_p)]
 empirical <- rownames(set)[which(!(rownames(set) %in% top_rows))]
 return(empirical)
 }
-ruv_control_gene_method <- function(set, k, control_genes='empirical', cutoff_p=0.2) {
+ruv_control_gene_method <- function(set, k, control_genes = "empirical", cutoff_p = 0.2) {
-if (control_genes == 'empirical') {
+if (control_genes == "empirical") {
-control_genes = get_empirical_control_genes(set, cutoff_p=cutoff_p)
+control_genes <- get_empirical_control_genes(set, cutoff_p = cutoff_p)
 }
-set <- RUVg(set, control_genes, k=k)
+set <- RUVg(set, control_genes, k = k)
 plot_pca_rle(set, paste0("RUVg with empirical control genes, k=", k))
-plot_factors_of_unwanted_variation(set, method="RUVg with empirical control genes", k=k)
+plot_factors_of_unwanted_var(set, method = "RUVg with empirical control genes", k = k)
 return(set)
 }
 ruv_residual_method <- function(set, k) {
 genes <- rownames(counts(set))
-x <- pData(set)[,1]
+x <- pData(set)[, 1]
 # Initial edger residuals
-design <- model.matrix(~x, data=pData(set))
+design <- model.matrix(~x, data = pData(set))
-y <- DGEList(counts=counts(set), group=x)
+y <- DGEList(counts = counts(set), group = x)
-y <- calcNormFactors(y, method="upperquartile")
+y <- calcNormFactors(y, method = "upperquartile")
 y <- estimateGLMCommonDisp(y, design)
 y <- estimateGLMTagwiseDisp(y, design)
 fit <- glmFit(y, design)
-res <- residuals(fit, type="deviance")
+res <- residuals(fit, type = "deviance")
-set <- RUVr(set, genes, k=k, res)
+set <- RUVr(set, genes, k = k, res)
-plot_pca_rle(set = set, title = paste0('RUVr using residuals, k=', k))
+plot_pca_rle(set = set, title = paste0("RUVr using residuals, k=", k))
-plot_factors_of_unwanted_variation(set, method="RUVr using residuals", k=k)
+plot_factors_of_unwanted_var(set, method = "RUVr using residuals", k = k)
 return(set)
 }
-ruv_replicate_method <- function (set, k) {
+ruv_replicate_method <- function(set, k) {
 genes <- rownames(counts(set))
-x <- pData(set)[,1]
+x <- pData(set)[, 1]
 differences <- makeGroups(x)
-set <- RUVs(set, genes, k=k, differences)
+set <- RUVs(set, genes, k = k, differences)
-plot_pca_rle(set, paste0('RUVs with replicate samples, k=', k))
+plot_pca_rle(set, paste0("RUVs with replicate samples, k=", k))
-plot_factors_of_unwanted_variation(set, method="RUVs using replicates", k=k)
+plot_factors_of_unwanted_var(set, method = "RUVs using replicates", k = k)
 return(set)
-}
-get_differentially_expressed_genes <- function(dds, contrast, alpha=0.01) {
-r <- results(dds, contrast=contrast, alpha=alpha)
-return(rownames(r[which(r$padj < alpha),]))
 }
 opt <- setup_cmdline_options()
 alpha <- opt$alpha
 min_k <- opt$min_k
 min_c <- opt$min_mean_count
 sample_json <- fromJSON(opt$sample_json)
 sample_paths <- sample_json$path
 sample_names <- sample_json$label
 condition <- as.factor(sample_json$condition)
-sampleTable <- data.frame(samplename=sample_names,
+sample_table <- data.frame(samplename = sample_names, filename = sample_paths, condition = condition)
-filename = sample_paths,
+rownames(sample_table) <- sample_names
-condition=condition)
-rownames(sampleTable) <- sample_names
-dds <- get_deseq_dataset(sampleTable, header=opt$header, designFormula= ~ condition, tximport=opt$txtype, txtype=opt$txtype, tx2gene=opt$tx2gene)
+dds <- get_deseq_dataset(sample_table, header = opt$header, design_formula = ~ condition, tximport = opt$txtype, txtype = opt$txtype, tx2gene = opt$tx2gene)
 if (!is.null(opt$plots)) {
 pdf(opt$plots)
 }
 # Run through the ruvseq variants
 set <- create_seq_expression_set(dds, min_mean_count = min_c)
 result <- list(no_correction = set)
 for (k in seq(min_k, max_k)) {
-result[[paste0('residual_method_k', k)]] <- ruv_residual_method(set, k=k)
+result[[paste0("residual_method_k", k)]] <- ruv_residual_method(set, k = k)
-result[[paste0('replicate_method_k', k)]] <- ruv_replicate_method(set, k=k)
+result[[paste0("replicate_method_k", k)]] <- ruv_replicate_method(set, k = k)
-result[[paste0('control_method_k', k)]] <- ruv_control_gene_method(set, k=k, cutoff_p=0.5)
+result[[paste0("control_method_k", k)]] <- ruv_control_gene_method(set, k = k, cutoff_p = 0.5)
 }
 for (name in names(result)) {
 if (!startsWith(name, "no_correction")) {
 set <- result[[name]]
 unwanted_variation <- pData(set)
 df <- data.frame(identifier = rownames(unwanted_variation))
 df <- cbind(df, unwanted_variation)
-colnames(df)[2] <- 'condition'
+colnames(df)[2] <- "condition"
-write.table(df, file=paste0("batch_effects_", name, ".tabular"),  sep="\t", quote=F, row.names=F)
+write.table(df, file = paste0("batch_effects_", name, ".tabular"),  sep = "\t", quote = F, row.names = F)
 }
 }
 # close the plot device
 if (!is.null(opt$plots)) {

Mercurial > repos > iuc > ruvseq

comparison ruvseq.R @ 2:fed9d0350d72 draft