music_inspect_eset: scripts/compare.R comparison

comparison scripts/compare.R @ 4:7705cc75ac18 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/music/ commit d007ae51743e621dc47524f681501e72ef3a2910"

author	bgruening
date	Mon, 02 May 2022 09:59:49 +0000
parents	8c64a2da3869
children

comparison

equal deleted inserted replaced

-:8c64a2da3869
+:7705cc75ac18
 source(args[1])
 method_key <- list("MuSiC" = "est_music",
 "NNLS" = "est_nnls")[[est_method]]
+delim <- "::" ## separator bulk datasets and their samples
 scale_yaxes <- function(gplot, value) {
 if (is.na(value)) {
 gplot
 } else {
 merge_it(grudat$spread$scale, factor_groups, "value.scale"),
 by = c("Sample", "Cell", "Bulk", "Factors"))
 return(tab)
 }
-plot_grouped_heatmaps <- function(results) {
-pdf(out_heatmulti_pdf, width = 8, height = 8)
-for (sc_name in names(results)) {
-named_list <- sapply(
-names(results[[sc_name]]),
-function(n) {
-## We transpose the data here, because
-## the plotting function omits by default
-## the Y-axis which are the samples.
-##  Since the celltypes are the common factor
-## these should be the Y-axis instead.
-t(data.matrix(results[[sc_name]][[n]][[method_key]]))
-}, simplify = F, USE.NAMES = T)
-named_methods <- names(results[[sc_name]])
-##
-plot_hmap <- Prop_heat_Est(
-named_list,
-method.name = named_methods) +
-ggtitle(paste0("[", est_method, "] Cell type ",
-"proportions of ",
-"Bulk Datasets based on ",
-sc_name, " (scRNA)")) +
-xlab("Samples (Bulk)") +
-ylab("Cell Types (scRNA)") +
-theme(axis.text.x = element_text(angle = -90),
-axis.text.y = element_text(size = 6))
-print(plot_hmap)
-}
-dev.off()
-}
-## Desired plots
-## 1. Pie chart:
-##  - Per Bulk dataset (using just normalised proportions)
-##  - Per Bulk dataset (multiplying proportions by nreads)
 unlist_names <- function(results, method, prepend_bkname=FALSE) {
 unique(sort(
 unlist(lapply(names(results), function(scname) {
 lapply(names(results[[scname]]), function(bkname) {
 res <- get(method)(results[[scname]][[bkname]][[method_key]])
 if (prepend_bkname) {
 ## We *do not* assume unique bulk sample names
 ## across different bulk datasets.
-res <- paste0(bkname, "::", res)
+res <- paste0(bkname, delim, res)
 }
 return(res)
 })
 }))
 ))
 ## Iterate through all possible samples and populate a table.
 ddff <- data.frame()
 ddff_scale <- data.frame()
 for (cell in all_celltypes) {
 for (sample in all_samples) {
-group_sname <- unlist(strsplit(sample, split = "::"))
+group_sname <- unlist(strsplit(sample, split = delim))
 bulk <- group_sname[1]
 id_sample <- group_sname[2]
 for (scgroup in names(results)) {
 if (bulk %in% names(results[[scgroup]])) {
 mat_prop <- results[[scgroup]][[bulk]][[method_key]]
 ## Get a 2d DF of all factors across all bulk samples.
 res <- c()
 for (scgroup in names(results)) {
 for (bulkgroup in names(results[[scgroup]])) {
 dat <- results[[scgroup]][[bulkgroup]]$plot_groups
-dat$Samples <- paste0(bulkgroup, "::", dat$Samples) #nolint
+dat$Samples <- paste0(bulkgroup, delim, dat$Samples) #nolint
 res <- rbind(res, dat)
 }
 }
 return(res)
 }
 bd <- list()
 bd_scale <- list()
 bd_spread_scale <- list()
 bd_spread_prop <- list()
 for (bname in bulk_names) {
-subs <- mat_names[startsWith(mat_names, paste0(bname, "::"))]
+subs <- mat_names[startsWith(mat_names, paste0(bname, delim))]
 ## -
 bd[[bname]] <- rowSums(summat$prop[, subs])
 bd_scale[[bname]] <- rowSums(summat$scaled[, subs])
 bd_spread_scale[[bname]] <- summat$scaled[, subs]
 bd_spread_prop[[bname]] <- summat$prop[, subs]
 scaled = as.data.frame(bd_scale),
 spread = list(scale = bd_spread_scale,
 prop = bd_spread_prop)))
 }
-summarize_heatmaps <- function(grudat_spread_melt, do_factors) {
+do_cluster <- function(grudat_spread_melt, xaxis, yaxis, value_name,
-## -
+xlabs="", ylabs="", titled="",
+order_col=T, order_row=T, size=11) {
+data_m <- grudat_spread_melt
+data_matrix <- {
+tmp <- dcast(data_m, formula(paste0(yaxis, " ~ ", xaxis)), value.var = value_name)
+rownames(tmp) <- tmp[[yaxis]]
+tmp[[yaxis]] <- NULL
+tmp
+}
+dist_method <- "euclidean"
+clust_method <- "complete"
+if (order_row) {
+dd_row <- as.dendrogram(hclust(dist(data_matrix, method = dist_method), method = clust_method))
+row_ord <- order.dendrogram(dd_row)
+ordered_row_names <- row.names(data_matrix[row_ord, ])
+data_m[[yaxis]] <- factor(data_m[[yaxis]], levels = ordered_row_names)
+}
+if (order_col) {
+dd_col <- as.dendrogram(hclust(dist(t(data_matrix), method = dist_method),
+method = clust_method))
+col_ord <- order.dendrogram(dd_col)
+ordered_col_names <- colnames(data_matrix[, col_ord])
+data_m[[xaxis]] <- factor(data_m[[xaxis]], levels = ordered_col_names)
+}
+heat_plot <- ggplot(data_m, aes_string(x = xaxis, y = yaxis, fill = value_name)) +
+geom_tile(colour = "white") +
+scale_fill_gradient2(low = "steelblue", high = "red", mid = "white",
+name = element_blank()) +
+scale_y_discrete(position = "right") +
+theme(axis.text.x = element_text(angle = -90, hjust = 0,
+size = size)) +
+ggtitle(label = titled) + xlab(xlabs) + ylab(ylabs)
+## Graphics
+dendro_linesize <- 0.5
+dendro_colunit <- 0.2
+dendro_rowunit <- 0.1
+final_plot <- heat_plot
+if (order_row) {
+dendro_data_row <- ggdendro::dendro_data(dd_row, type = "rectangle")
+dendro_row <- cowplot::axis_canvas(heat_plot, axis = "y", coord_flip = TRUE) +
+ggplot2::geom_segment(data = ggdendro::segment(dendro_data_row),
+ggplot2::aes(y = -y, x = x, xend = xend, yend = -yend),
+size = dendro_linesize) + ggplot2::coord_flip()
+final_plot <- cowplot::insert_yaxis_grob(
+final_plot, dendro_row, grid::unit(dendro_colunit, "null"),
+position = "left")
+}
+if (order_col) {
+dendro_data_col <- ggdendro::dendro_data(dd_col, type = "rectangle")
+dendro_col <- cowplot::axis_canvas(heat_plot, axis = "x") +
+ggplot2::geom_segment(data = ggdendro::segment(dendro_data_col),
+ggplot2::aes(x = x, y = y, xend = xend, yend = yend),
+size = dendro_linesize)
+final_plot <- cowplot::insert_xaxis_grob(
+final_plot, dendro_col, grid::unit(dendro_rowunit, "null"),
+position = "top")
+}
+return(cowplot::ggdraw(final_plot))
+}
+summarize_heatmaps <- function(grudat_spread_melt, do_factors, cluster="None") {
+## - Cluster is either "Rows", "Cols", "Both", or "None"
 do_single <- function(grudat_melted, yaxis, xaxis, fillval, title,
 ylabs = element_blank(), xlabs = element_blank(),
 use_log = TRUE, size = 11) {
 ## Convert from matrix to long format
 melted <- grudat_melted ## copy?
 if (use_log) {
 melted[[fillval]] <- log10(melted[[fillval]] + 1)
 }
-return(ggplot(melted) +
+if (cluster == "None") {
-geom_tile(aes_string(y = yaxis, x = xaxis, fill = fillval),
+return(ggplot(melted) +
-colour = "white") +
+geom_tile(aes_string(y = yaxis, x = xaxis, fill = fillval),
-scale_fill_gradient2(low = "steelblue", high = "red",
+colour = "white") +
-mid = "white", name = element_blank()) +
+scale_fill_gradient2(
-theme(axis.text.x = element_text(angle = -90, hjust = 0,
+low = "steelblue", high = "red", mid = "white",
-size = size)) +
+name = element_blank()) +
-ggtitle(label = title) + xlab(xlabs) + ylab(ylabs))
+theme(axis.text.x = element_text(
+angle = -90, hjust = 0, size = size)) +
+ggtitle(label = title) + xlab(xlabs) + ylab(ylabs))
+} else {
+return(do_cluster(grudat_spread_melt, xaxis, yaxis, fillval,
+xlabs, ylabs, title,
+(cluster %in% c("Cols", "Both")),
+(cluster %in% c("Rows", "Both"))))
+}
 }
 do_gridplot <- function(title, xvar, plot="both", ncol=2, size = 11) {
 do_logged <- (plot %in% c("log", "both"))
 do_normal <- (plot %in% c("normal", "both"))
 size = size)
 }
 return(plot_grid(ggdraw() + draw_label(title, fontface = "bold"),
 plot_grid(plotlist = plist, ncol = ncol),
 ncol = 1, rel_heights = c(0.05, 0.95)))
+}
-}
-p1 <- do_gridplot("Cell Types vs Bulk Datasets", "Bulk", "both", )
+p1 <- do_gridplot("Cell Types vs Bulk Datasets", "Bulk", "both")
-p2a <- do_gridplot("Cell Types vs Samples", "Sample", "normal", 1,
+p2a <- do_gridplot("Cell Types vs Samples", "Sample", "normal",
-size = 8)
+ncol = 1, size = 8)
-p2b <- do_gridplot("Cell Types vs Samples (log10+1)", "Sample", "log", 1,
+p2b <- do_gridplot("Cell Types vs Samples (log10+1)", "Sample", "log",
-size = 8)
+ncol = 1, size = 8)
 p3 <- ggplot + theme_void()
 if (do_factors) {
-p3 <- do_gridplot("Cell Types against Factors", "Factors", "both")
+p3 <- do_gridplot("Cell Types vs Factors", "Factors", "both")
 }
 return(list(bulk = p1,
 samples = list(log = p2b, normal = p2a),
 factors = p3))
 }
 ylab("Bulk Dataset")
 B$p4 <- common2 + geom_boxplot(aes(y = Bulk, color = Factors)) +
 ylab("Bulk Dataset")
 }
-title_a <- "Cell Types against Bulk"
+title_a <- "Cell Types vs Bulk Datasets"
-title_b <- "Bulk Datasets against Cells"
+title_b <- "Bulk Datasets vs Cell Types"
 if (do_factors) {
 title_a <- paste0(title_a, " and Factors")
 title_b <- paste0(title_b, " and Factors")
 }
 print_red(" - selecting:", out_filt$facts)
 }
 return(grudat_filt)
 }
+writable2 <- function(obj, prefix, title) {
+write.table(obj,
+file = paste0("report_data/", prefix, "_",
+title, ".tabular"),
+quote = F, sep = "\t", col.names = NA)
+}
 results <- music_on_all(files)
-if (heat_grouped_p) {
-plot_grouped_heatmaps(results)
-} else {
-plot_all_individual_heatmaps(results)
-}
-save.image("/tmp/sesh.rds")
 summat <- summarized_matrix(results)
 grudat <- group_by_dataset(summat)
 grudat_spread_melt <- merge_factors_spread(grudat$spread,
 flatten_factor_list(results))
+grudat_spread_melt_filt <- filter_output(grudat_spread_melt, out_filt)
+plot_all_individual_heatmaps(results)
 ## The output filters ONLY apply to boxplots, since these take
 do_factors <- (length(unique(grudat_spread_melt[["Factors"]])) > 1)
-grudat_spread_melt_filt <- filter_output(grudat_spread_melt, out_filt)
-heat_maps <- summarize_heatmaps(grudat_spread_melt_filt, do_factors)
 box_plots <- summarize_boxplots(grudat_spread_melt_filt, do_factors)
+heat_maps <- summarize_heatmaps(grudat_spread_melt_filt, do_factors,
+dendro_setting)
 pdf(out_heatsumm_pdf, width = 14, height = 14)
 print(heat_maps)
 print(box_plots)
 dev.off()
 stats_prop <- lapply(grudat$spread$prop, function(x) {
 t(apply(x, 1, summary))})
 stats_scale <- lapply(grudat$spread$scale, function(x) {
 t(apply(x, 1, summary))})
-writable2 <- function(obj, prefix, title) {
-write.table(obj,
-file = paste0("report_data/", prefix, "_",
-title, ".tabular"),
-quote = F, sep = "\t", col.names = NA)
-}
 ## Make the value table printable
 grudat_spread_melt$value.scale <- as.integer(grudat_spread_melt$value.scale) # nolint
 colnames(grudat_spread_melt) <- c("Sample", "Cell", "Bulk", "Factors",
 "CT Prop in Sample", "Number of Reads")

Mercurial > repos > bgruening > music_inspect_eset

comparison scripts/compare.R @ 4:7705cc75ac18 draft default tip