Mercurial > repos > bgruening > music_construct_eset
changeset 3:7ffaa0968da3 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/music/ commit 8beed1a19fcd9dc59f7746e1dfa735a2d5f29784"
author | bgruening |
---|---|
date | Thu, 10 Feb 2022 12:53:22 +0000 |
parents | 7902cd31b9b5 |
children | 282819d09a4f |
files | macros.xml scripts/compare.R scripts/estimateprops.R test-data/default_output_no_disease_nnls.pdf test-data/out_filt1.pdf test-data/out_heat2.pdf |
diffstat | 6 files changed, 522 insertions(+), 59 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Sat Jan 29 12:52:10 2022 +0000 +++ b/macros.xml Thu Feb 10 12:53:22 2022 +0000 @@ -1,5 +1,5 @@ <macros> - <token name="@VERSION_SUFFIX@">2</token> + <token name="@VERSION_SUFFIX@">3</token> <!-- The ESet inspector/constructor and MuSiC tool can have independent Galaxy versions but should reference the same package version always. -->
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/compare.R Thu Feb 10 12:53:22 2022 +0000 @@ -0,0 +1,440 @@ +suppressWarnings(suppressPackageStartupMessages(library(xbioc))) +suppressWarnings(suppressPackageStartupMessages(library(MuSiC))) +suppressWarnings(suppressPackageStartupMessages(library(reshape2))) +suppressWarnings(suppressPackageStartupMessages(library(cowplot))) +## We use this script to estimate the effectiveness of proportion methods + +## Load Conf +args <- commandArgs(trailingOnly = TRUE) +source(args[1]) + +method_key <- list("MuSiC" = "est_music", + "NNLS" = "est_nnls")[[est_method]] + + +scale_yaxes <- function(gplot, value) { + if (is.na(value)) { + gplot + } else { + gplot + scale_y_continuous(lim = c(0, value)) + } +} + + +set_factor_data <- function(bulk_data, factor_name = NULL) { + if (is.null(factor_name)) { + factor_name <- "None" ## change to something plottable + } + pdat <- pData(bulk_data) + sam_fact <- NULL + if (factor_name %in% colnames(pdat)) { + sam_fact <- cbind(rownames(pdat), + as.character(pdat[[factor_name]])) + cat(paste0(" - factor: ", factor_name, + " found in phenotypes\n")) + } else { + ## We assign this as the factor for the entire dataset + sam_fact <- cbind(rownames(pdat), + factor_name) + cat(paste0(" - factor: assigning \"", factor_name, + "\" to whole dataset\n")) + } + colnames(sam_fact) <- c("Samples", "Factors") + return(as.data.frame(sam_fact)) +} + +## Due to limiting sizes, we need to load and unload +## possibly very large datasets. +process_pair <- function(sc_data, bulk_data, + ctypes_label, samples_label, ctypes, + factor_group) { + ## - Generate + est_prop <- music_prop( + bulk.eset = bulk_data, sc.eset = sc_data, + clusters = ctypes_label, + samples = samples_label, select.ct = ctypes, verbose = T) + ## - + estimated_music_props <- est_prop$Est.prop.weighted + estimated_nnls_props <- est_prop$Est.prop.allgene + ## - + fact_data <- set_factor_data(bulk_data, factor_group) + ## - + return(list(est_music = estimated_music_props, + est_nnls = estimated_nnls_props, + bulk_sample_totals = colSums(exprs(bulk_data)), + plot_groups = fact_data)) +} + +music_on_all <- function(files) { + results <- list() + for (sc_name in names(files)) { + cat(paste0("sc-group:", sc_name, "\n")) + scgroup <- files[[sc_name]] + ## - sc Data + sc_est <- readRDS(scgroup$dataset) + ## - params + celltypes_label <- scgroup$label_cell + samples_label <- scgroup$label_sample + celltypes <- scgroup$celltype + + results[[sc_name]] <- list() + for (bulk_name in names(scgroup$bulk)) { + cat(paste0(" - bulk-group:", bulk_name, "\n")) + bulkgroup <- scgroup$bulk[[bulk_name]] + ## - bulk Data + bulk_est <- readRDS(bulkgroup$dataset) + ## - bulk params + pheno_facts <- bulkgroup$pheno_facts + pheno_excl <- bulkgroup$pheno_excl + ## + results[[sc_name]][[bulk_name]] <- process_pair( + sc_est, bulk_est, + celltypes_label, samples_label, + celltypes, bulkgroup$factor_group) + ## + rm(bulk_est) ## unload + } + rm(sc_est) ## unload + } + return(results) +} + +plot_all_individual_heatmaps <- function(results) { + pdf(out_heatmulti_pdf, width = 8, height = 8) + for (sc_name in names(results)) { + for (bk_name in names(results[[sc_name]])) { + res <- results[[sc_name]][[bk_name]] + plot_hmap <- Prop_heat_Est( + data.matrix(res[[method_key]]), method.name = est_method) + + ggtitle(paste0("[", est_method, "Cell type ", + "proportions in ", + bk_name, " (Bulk) based on ", + sc_name, " (scRNA)")) + + xlab("Cell Types (scRNA)") + + ylab("Samples (Bulk)") + + theme(axis.text.x = element_text(angle = -90), + axis.text.y = element_text(size = 6)) + print(plot_hmap) + } + } + dev.off() +} + +merge_factors_spread <- function(grudat_spread, factor_groups) { + ## Generated + merge_it <- function(matr, plot_groups, valname) { + ren <- melt(lapply(matr, function(mat) { + mat["ct"] <- rownames(mat); return(mat)})) + ## - Grab factors and merge into list + ren_new <- merge(ren, plot_groups, by.x = "variable", by.y = "Samples") + colnames(ren_new) <- c("Sample", "Cell", valname, "Bulk", "Factors") + return(ren_new) + } + tab <- merge(merge_it(grudat$spread$prop, factor_groups, "value.prop"), + merge_it(grudat$spread$scale, factor_groups, "value.scale"), + by = c("Sample", "Cell", "Bulk", "Factors")) + return(tab) +} + + +plot_grouped_heatmaps <- function(results) { + pdf(out_heatmulti_pdf, width = 8, height = 8) + for (sc_name in names(results)) { + named_list <- sapply( + names(results[[sc_name]]), + function(n) { + ## We transpose the data here, because + ## the plotting function omits by default + ## the Y-axis which are the samples. + ## Since the celltypes are the common factor + ## these should be the Y-axis instead. + t(data.matrix(results[[sc_name]][[n]][[method_key]])) + }, simplify = F, USE.NAMES = T) + named_methods <- names(results[[sc_name]]) + ## + plot_hmap <- Prop_heat_Est( + named_list, + method.name = named_methods) + + ggtitle(paste0("[", est_method, "] Cell type ", + "proportions of ", + "Bulk Datasets based on ", + sc_name, " (scRNA)")) + + xlab("Samples (Bulk)") + + ylab("Cell Types (scRNA)") + + theme(axis.text.x = element_text(angle = -90), + axis.text.y = element_text(size = 6)) + print(plot_hmap) + } + dev.off() +} + +## Desired plots +## 1. Pie chart: +## - Per Bulk dataset (using just normalised proportions) +## - Per Bulk dataset (multiplying proportions by nreads) + +unlist_names <- function(results, method, prepend_bkname=FALSE) { + unique(sort( + unlist(lapply(names(results), function(scname) { + lapply(names(results[[scname]]), function(bkname) { + res <- get(method)(results[[scname]][[bkname]][[method_key]]) + if (prepend_bkname) { + ## We *do not* assume unique bulk sample names + ## across different bulk datasets. + res <- paste0(bkname, "::", res) + } + return(res) + }) + })) + )) +} + +summarized_matrix <- function(results) { # nolint + ## We assume that cell types MUST be unique, but that sample + ## names do not need to be. For this reason, we must prepend + ## the bulk dataset name to the individual sample names. + all_celltypes <- unlist_names(results, "colnames") + all_samples <- unlist_names(results, "rownames", prepend_bkname = TRUE) + + ## Iterate through all possible samples and populate a table. + ddff <- data.frame() + ddff_scale <- data.frame() + for (cell in all_celltypes) { + for (sample in all_samples) { + group_sname <- unlist(strsplit(sample, split = "::")) + bulk <- group_sname[1] + id_sample <- group_sname[2] + for (scgroup in names(results)) { + if (bulk %in% names(results[[scgroup]])) { + mat_prop <- results[[scgroup]][[bulk]][[method_key]] + vec_counts <- results[[scgroup]][[bulk]]$bulk_sample_totals + ## - We use sample instead of id_sample because we need to + ## extract bulk sets from the complete matrix later. It's + ## messy, yes. + if (cell %in% colnames(mat_prop)) { + ddff[cell, sample] <- mat_prop[id_sample, cell] + ddff_scale[cell, sample] <- mat_prop[id_sample, cell] * vec_counts[[id_sample]] #nolint + } else { + ddff[cell, sample] <- 0 + ddff_scale[cell, sample] <- 0 + } + } + } + } + } + return(list(prop = ddff, scaled = ddff_scale)) +} + +flatten_factor_list <- function(results) { + ## Get a 2d DF of all factors across all bulk samples. + res <- c() + for (scgroup in names(results)) { + for (bulkgroup in names(results[[scgroup]])) { + dat <- results[[scgroup]][[bulkgroup]]$plot_groups + dat$Samples <- paste0(bulkgroup, "::", dat$Samples) #nolint + res <- rbind(res, dat) + } + } + return(res) +} + +group_by_dataset <- function(summat) { + bulk_names <- unlist( + lapply(names(files), function(x) names(files[[x]]$bulk))) + mat_names <- colnames(summat$prop) + bd <- list() + bd_scale <- list() + bd_spread_scale <- list() + bd_spread_prop <- list() + for (bname in bulk_names) { + subs <- mat_names[startsWith(mat_names, paste0(bname, "::"))] + ## - + bd[[bname]] <- rowSums(summat$prop[, subs]) + bd_scale[[bname]] <- rowSums(summat$scaled[, subs]) + bd_spread_scale[[bname]] <- summat$scaled[, subs] + bd_spread_prop[[bname]] <- summat$prop[, subs] + } + return(list(prop = as.data.frame(bd), + scaled = as.data.frame(bd_scale), + spread = list(scale = bd_spread_scale, + prop = bd_spread_prop))) +} + +summarize_heatmaps <- function(grudat_spread_melt, do_factors) { + ## - + do_single <- function(grudat_melted, yaxis, xaxis, fillval, title, + ylabs = element_blank(), xlabs = element_blank(), + use_log = TRUE, size = 11) { + ## Convert from matrix to long format + melted <- grudat_melted ## copy? + if (use_log) { + melted[[fillval]] <- log10(melted[[fillval]] + 1) + } + return(ggplot(melted) + + geom_tile(aes_string(y = yaxis, x = xaxis, fill = fillval), + colour = "white") + + scale_fill_gradient2(low = "steelblue", high = "red", + mid = "white", name = element_blank()) + + theme(axis.text.x = element_text(angle = -90, hjust = 0, + size = size)) + + ggtitle(label = title) + xlab(xlabs) + ylab(ylabs)) + } + + do_gridplot <- function(title, xvar, plot="both", ncol=2, size = 11) { + do_logged <- (plot %in% c("log", "both")) + do_normal <- (plot %in% c("normal", "both")) + plist <- list() + if (do_logged) { + plist[["1"]] <- do_single(grudat_spread_melt, "Cell", xvar, + "value.scale", "Reads (log10+1)", + size = size) + plist[["2"]] <- do_single(grudat_spread_melt, "Cell", xvar, + "value.prop", "Sample (log10+1)", + size = size) + } + if (do_normal) { + plist[["A"]] <- do_single(grudat_spread_melt, "Cell", xvar, + "value.scale", "Reads", use_log = F, + size = size) + plist[["B"]] <- do_single(grudat_spread_melt, "Cell", xvar, + "value.prop", "Sample", use_log = F, + size = size) + } + return(plot_grid(ggdraw() + draw_label(title, fontface = "bold"), + plot_grid(plotlist = plist, ncol = ncol), + ncol = 1, rel_heights = c(0.05, 0.95))) + + } + p1 <- do_gridplot("Cell Types vs Bulk Datasets", "Bulk", "both", ) + p2a <- do_gridplot("Cell Types vs Samples", "Sample", "normal", 1, + size = 8) + p2b <- do_gridplot("Cell Types vs Samples (log10+1)", "Sample", "log", 1, + size = 8) + p3 <- ggplot + theme_void() + if (do_factors) { + p3 <- do_gridplot("Cell Types against Factors", "Factors", "both") + } + return(list(bulk = p1, + samples = list(log = p2b, normal = p2a), + factors = p3)) +} + +summarize_boxplots <- function(grudat_spread, do_factors) { + common1 <- ggplot(grudat_spread, aes(x = value.prop)) + ggtitle("Sample") + + xlab(element_blank()) + ylab(element_blank()) + common2 <- ggplot(grudat_spread, aes(x = value.scale)) + ggtitle("Reads") + + xlab(element_blank()) + ylab(element_blank()) + + A <- B <- list() #nolint + ## Cell type by sample + A$p1 <- common2 + geom_boxplot(aes(y = Cell, color = Bulk)) + A$p2 <- common1 + geom_boxplot(aes(y = Cell, color = Bulk)) + ## Sample by Cell type + B$p1 <- common2 + geom_boxplot(aes(y = Bulk, color = Cell)) + + ylab("Bulk Dataset") + B$p2 <- common1 + geom_boxplot(aes(y = Bulk, color = Cell)) + + ylab("Bulk Dataset") + ## -- Factor plots are optional + A$p3 <- B$p3 <- A$p4 <- B$p4 <- ggplot() + theme_void() + + if (do_factors) { + A$p3 <- common1 + geom_boxplot(aes(y = Cell, color = Factors)) + A$p4 <- common2 + geom_boxplot(aes(y = Cell, color = Factors)) + B$p3 <- common1 + geom_boxplot(aes(y = Bulk, color = Factors)) + + ylab("Bulk Dataset") + B$p4 <- common2 + geom_boxplot(aes(y = Bulk, color = Factors)) + + ylab("Bulk Dataset") + } + + title_a <- "Cell Types against Bulk" + title_b <- "Bulk Datasets against Cells" + if (do_factors) { + title_a <- paste0(title_a, " and Factors") + title_b <- paste0(title_b, " and Factors") + } + + a_all <- plot_grid(ggdraw() + draw_label(title_a, fontface = "bold"), + plot_grid(plotlist = A, ncol = 2), + ncol = 1, rel_heights = c(0.05, 0.95)) + b_all <- plot_grid(ggdraw() + draw_label(title_b, fontface = "bold"), + plot_grid(plotlist = B, ncol = 2), + ncol = 1, rel_heights = c(0.05, 0.95)) + return(list(cell = a_all, bulk = b_all)) +} + +filter_output <- function(grudat_spread_melt, out_filt) { + print_red <- function(comment, red_list) { + cat(paste(comment, paste(red_list, collapse = ", "), "\n")) + } + grudat_filt <- grudat_spread_melt + print_red("Total Cell types:", unique(grudat_filt$Cell)) + if (!is.null(out_filt$cells)) { + grudat_filt <- grudat_filt[grudat_filt$Cell %in% out_filt$cells, ] + print_red(" - selecting:", out_filt$cells) + } + print_red("Total Factors:", unique(grudat_spread_melt$Factors)) + if (!is.null(out_filt$facts)) { + grudat_filt <- grudat_filt[grudat_filt$Factors %in% out_filt$facts, ] + print_red(" - selecting:", out_filt$facts) + } + return(grudat_filt) +} + + +results <- music_on_all(files) + +if (heat_grouped_p) { + plot_grouped_heatmaps(results) +} else { + plot_all_individual_heatmaps(results) +} + +save.image("/tmp/sesh.rds") + +summat <- summarized_matrix(results) +grudat <- group_by_dataset(summat) +grudat_spread_melt <- merge_factors_spread(grudat$spread, + flatten_factor_list(results)) + + + +## The output filters ONLY apply to boxplots, since these take +do_factors <- (length(unique(grudat_spread_melt[["Factors"]])) > 1) + +grudat_spread_melt_filt <- filter_output(grudat_spread_melt, out_filt) + +heat_maps <- summarize_heatmaps(grudat_spread_melt_filt, do_factors) +box_plots <- summarize_boxplots(grudat_spread_melt_filt, do_factors) + +pdf(out_heatsumm_pdf, width = 14, height = 14) +print(heat_maps) +print(box_plots) +dev.off() + +## Generate output tables +stats_prop <- lapply(grudat$spread$prop, function(x) { + t(apply(x, 1, summary))}) +stats_scale <- lapply(grudat$spread$scale, function(x) { + t(apply(x, 1, summary))}) + +writable2 <- function(obj, prefix, title) { + write.table(obj, + file = paste0("report_data/", prefix, "_", + title, ".tabular"), + quote = F, sep = "\t", col.names = NA) +} +## Make the value table printable +grudat_spread_melt$value.scale <- as.integer(grudat_spread_melt$value.scale) # nolint +colnames(grudat_spread_melt) <- c("Sample", "Cell", "Bulk", "Factors", + "CT Prop in Sample", "Number of Reads") + +writable2(grudat_spread_melt, "values", "Data Table") +writable2(summat$prop, "values", "Matrix of Cell Type Sample Proportions") +writable2({ + aa <- as.matrix(summat$scaled); mode(aa) <- "integer"; aa +}, "values", "Matrix of Cell Type Read Counts") + +for (bname in names(stats_prop)) { + writable2(stats_prop[[bname]], "stats", paste0(bname, ": Sample Props")) + writable2(stats_scale[[bname]], "stats", paste0(bname, ": Read Props")) +}
--- a/scripts/estimateprops.R Sat Jan 29 12:52:10 2022 +0000 +++ b/scripts/estimateprops.R Thu Feb 10 12:53:22 2022 +0000 @@ -14,8 +14,12 @@ clusters = celltypes_label, samples = samples_label, select.ct = celltypes, verbose = T) + estimated_music_props <- est_prop$Est.prop.weighted estimated_nnls_props <- est_prop$Est.prop.allgene +## +estimated_music_props_flat <- melt(estimated_music_props) +estimated_nnls_props_flat <- melt(estimated_nnls_props) scale_yaxes <- function(gplot, value) { if (is.na(value)) { @@ -25,23 +29,36 @@ } } +sieve_data <- function(func, music_data, nnls_data) { + if (func == "list") { + res <- list(if ("MuSiC" %in% methods) music_data else NULL, + if ("NNLS" %in% methods) nnls_data else NULL) + res[lengths(res) > 0] ## filter out NULL elements + } else if (func == "rbind") { + rbind(if ("MuSiC" %in% methods) music_data else NULL, + if ("NNLS" %in% methods) nnls_data else NULL) + } else if (func == "c") { + c(if ("MuSiC" %in% methods) music_data else NULL, + if ("NNLS" %in% methods) nnls_data else NULL) + } +} + + ## Show different in estimation methods ## Jitter plot of estimated cell type proportions jitter_fig <- scale_yaxes(Jitter_Est( - list(data.matrix(estimated_music_props), - data.matrix(estimated_nnls_props)), + sieve_data("list", + data.matrix(estimated_music_props), + data.matrix(estimated_nnls_props)), method.name = methods, title = "Jitter plot of Est Proportions", size = 2, alpha = 0.7) + theme_minimal(), maxyscale) - ## Make a Plot ## A more sophisticated jitter plot is provided as below. We separated ## the T2D subjects and normal subjects by their disease factor levels. -estimated_music_props_flat <- melt(estimated_music_props) -estimated_nnls_props_flat <- melt(estimated_nnls_props) - -m_prop <- rbind(estimated_music_props_flat, - estimated_nnls_props_flat) +m_prop <- sieve_data("rbind", + estimated_music_props_flat, + estimated_nnls_props_flat) colnames(m_prop) <- c("Sub", "CellType", "Prop") if (is.null(celltypes)) { @@ -69,7 +86,7 @@ phenotype_target_threshold <- -Inf message("phenotype target threshold set to -Inf") } - + ## the "2" here is to do with the sample groups, not number of methods m_prop$Disease_factor <- rep(bulk_eset[[phenotype_target]], 2 * length(celltypes)) # nolint m_prop <- m_prop[!is.na(m_prop$Disease_factor), ] ## Generate a TRUE/FALSE table of Normal == 1 and Disease == 2 @@ -84,8 +101,10 @@ m_prop <- rbind(subset(m_prop, Disease != sample_disease_group), subset(m_prop, Disease == sample_disease_group)) - jitter_new <- scale_yaxes(ggplot(m_prop, aes(Method, Prop)) + - geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease), + jitter_new <- scale_yaxes( + ggplot(m_prop, aes(Method, Prop)) + + geom_point(aes(fill = Method, color = Disease, + stroke = D, shape = Disease), size = 2, alpha = 0.7, position = position_jitter(width = 0.25, height = 0)) + facet_wrap(~ CellType, scales = "free") + @@ -100,21 +119,23 @@ ## Create dataframe for beta cell proportions and Disease_factor levels ## - Ugly code. Essentially, doubles the cell type proportions for each ## set of MuSiC and NNLS methods - m_prop_ana <- data.frame(pData(bulk_eset)[rep(1:nrow(estimated_music_props), 2), #nolint - phenotype_factors], - ## get proportions of target cell type - ct.prop = c(estimated_music_props[, phenotype_scrna_target], - estimated_nnls_props[, phenotype_scrna_target]), - ## - Method = factor(rep(methods, - each = nrow(estimated_music_props)), - levels = methods)) + m_prop_ana <- data.frame( + pData(bulk_eset)[rep(1:nrow(estimated_music_props), length(methods)), #nolint + phenotype_factors], + ## get proportions of target cell type + ct.prop = sieve_data("c", + estimated_music_props[, phenotype_scrna_target], + estimated_nnls_props[, phenotype_scrna_target]), + ## + Method = factor(rep(methods, + each = nrow(estimated_music_props)), + levels = methods)) ## - fix headers colnames(m_prop_ana)[1:length(phenotype_factors)] <- phenotype_factors #nolint ## - drop NA for target phenotype (e.g. hba1c) m_prop_ana <- subset(m_prop_ana, !is.na(m_prop_ana[phenotype_target])) m_prop_ana$Disease <- factor( # nolint - ## - Here we set Normal/Disease assignments across the two MuSiC and NNLS methods + ## - Here we set Normal/Disease assignments across the methods sample_groups[( m_prop_ana[phenotype_target] > phenotype_target_threshold) + 1 ], @@ -123,12 +144,15 @@ m_prop_ana$D <- (m_prop_ana$Disease == # nolint sample_disease_group) / sample_disease_group_scale - jitt_compare <- scale_yaxes(ggplot(m_prop_ana, aes_string(phenotype_target, "ct.prop")) + + jitt_compare <- scale_yaxes( + ggplot(m_prop_ana, aes_string(phenotype_target, "ct.prop")) + geom_smooth(method = "lm", se = FALSE, col = "black", lwd = 0.25) + - geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease), + geom_point(aes(fill = Method, color = Disease, + stroke = D, shape = Disease), size = 2, alpha = 0.7) + facet_wrap(~ Method) + ggtitle(paste0(toupper(phenotype_target), " vs. ", - toupper(phenotype_scrna_target), " Cell Type Proportion")) + + toupper(phenotype_scrna_target), + " Cell Type Proportion")) + theme_minimal() + ylab(paste0("Proportion of ", phenotype_scrna_target, " cells")) + @@ -138,19 +162,22 @@ } ## BoxPlot -plot_box <- scale_yaxes(Boxplot_Est(list( - data.matrix(estimated_music_props), - data.matrix(estimated_nnls_props)), - method.name = c("MuSiC", "NNLS")) + +plot_box <- scale_yaxes(Boxplot_Est( + sieve_data("list", + data.matrix(estimated_music_props), + data.matrix(estimated_nnls_props)), + method.name = methods) + theme(axis.text.x = element_text(angle = -90), axis.text.y = element_text(size = 8)) + ggtitle(element_blank()) + theme_minimal(), maxyscale) ## Heatmap -plot_hmap <- Prop_heat_Est(list( - data.matrix(estimated_music_props), - data.matrix(estimated_nnls_props)), - method.name = c("MuSiC", "NNLS")) + +plot_hmap <- Prop_heat_Est( + sieve_data( + "list", + data.matrix(estimated_music_props), + data.matrix(estimated_nnls_props)), + method.name = methods) + theme(axis.text.x = element_text(angle = -90), axis.text.y = element_text(size = 6)) @@ -167,33 +194,29 @@ plot_hmap message(dev.off()) -## Output Proportions +writable <- function(obj, prefix, title) { + write.table(obj, + file = paste0("report_data/", prefix, "_", + title, ".tabular"), + quote = F, sep = "\t", col.names = NA) +} -write.table(est_prop$Est.prop.weighted, - file = paste0("report_data/prop_", - "Music Estimated Proportions of Cell Types", - ".tabular"), - quote = F, sep = "\t", col.names = NA) -write.table(est_prop$Est.prop.allgene, - file = paste0("report_data/prop_", - "NNLS Estimated Proportions of Cell Types", - ".tabular"), - quote = F, sep = "\t", col.names = NA) -write.table(est_prop$Weight.gene, - file = paste0("report_data/weightgene_", - "Music Estimated Proportions of Cell Types (by Gene)", - ".tabular"), - quote = F, sep = "\t", col.names = NA) -write.table(est_prop$r.squared.full, - file = paste0("report_data/rsquared_", - "Music R-sqr Estimated Proportions of Each Subject", - ".tabular"), - quote = F, sep = "\t", col.names = NA) -write.table(est_prop$Var.prop, - file = paste0("report_data/varprop_", - "Matrix of Variance of MuSiC Estimates", - ".tabular"), - quote = F, sep = "\t", col.names = NA) +## Output Proportions +if ("NNLS" %in% methods) { + writable(est_prop$Est.prop.allgene, "prop", + "NNLS Estimated Proportions of Cell Types") +} + +if ("MuSiC" %in% methods) { + writable(est_prop$Est.prop.weighted, "prop", + "Music Estimated Proportions of Cell Types") + writable(est_prop$Weight.gene, "weightgene", + "Music Estimated Proportions of Cell Types (by Gene)") + writable(est_prop$r.squared.full, "rsquared", + "Music R-sqr Estimated Proportions of Each Subject") + writable(est_prop$Var.prop, "varprop", + "Matrix of Variance of MuSiC Estimates") +} if (use_disease_factor) {