Galaxy |

Changeset 3:7ffaa0968da3 (2022-02-10)

Previous changeset 2:7902cd31b9b5 (2022-01-29) Next changeset 4:282819d09a4f (2022-05-02)

Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/music/ commit 8beed1a19fcd9dc59f7746e1dfa735a2d5f29784"

modified:
macros.xml
scripts/estimateprops.R

added:
scripts/compare.R
test-data/default_output_no_disease_nnls.pdf
test-data/out_filt1.pdf
test-data/out_heat2.pdf

diff -r 7902cd31b9b5 -r 7ffaa0968da3 macros.xml
--- a/macros.xml Sat Jan 29 12:52:10 2022 +0000
+++ b/macros.xml Thu Feb 10 12:53:22 2022 +0000

@@ -1,5 +1,5 @@
<macros>
-    <token name="@VERSION_SUFFIX@">2</token>
+    <token name="@VERSION_SUFFIX@">3</token>

diff -r 7902cd31b9b5 -r 7ffaa0968da3 scripts/compare.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/compare.R Thu Feb 10 12:53:22 2022 +0000

[

b'@@ -0,0 +1,440 @@\n+suppressWarnings(suppressPackageStartupMessages(library(xbioc)))\n+suppressWarnings(suppressPackageStartupMessages(library(MuSiC)))\n+suppressWarnings(suppressPackageStartupMessages(library(reshape2)))\n+suppressWarnings(suppressPackageStartupMessages(library(cowplot)))\n+## We use this script to estimate the effectiveness of proportion methods\n+\n+## Load Conf\n+args <- commandArgs(trailingOnly = TRUE)\n+source(args[1])\n+\n+method_key <- list("MuSiC" = "est_music",\n+ "NNLS" = "est_nnls")[[est_method]]\n+\n+\n+scale_yaxes <- function(gplot, value) {\n+ if (is.na(value)) {\n+ gplot\n+ } else {\n+ gplot + scale_y_continuous(lim = c(0, value))\n+ }\n+}\n+\n+\n+set_factor_data <- function(bulk_data, factor_name = NULL) {\n+ if (is.null(factor_name)) {\n+ factor_name <- "None" ## change to something plottable\n+ }\n+ pdat <- pData(bulk_data)\n+ sam_fact <- NULL\n+ if (factor_name %in% colnames(pdat)) {\n+ sam_fact <- cbind(rownames(pdat),\n+ as.character(pdat[[factor_name]]))\n+ cat(paste0(" - factor: ", factor_name,\n+ " found in phenotypes\\n"))\n+ } else {\n+ ## We assign this as the factor for the entire dataset\n+ sam_fact <- cbind(rownames(pdat),\n+ factor_name)\n+ cat(paste0(" - factor: assigning \\"", factor_name,\n+ "\\" to whole dataset\\n"))\n+ }\n+ colnames(sam_fact) <- c("Samples", "Factors")\n+ return(as.data.frame(sam_fact))\n+}\n+\n+## Due to limiting sizes, we need to load and unload\n+## possibly very large datasets.\n+process_pair <- function(sc_data, bulk_data,\n+ ctypes_label, samples_label, ctypes,\n+ factor_group) {\n+ ## - Generate\n+ est_prop <- music_prop(\n+ bulk.eset = bulk_data, sc.eset = sc_data,\n+ clusters = ctypes_label,\n+ samples = samples_label, select.ct = ctypes, verbose = T)\n+ ## -\n+ estimated_music_props <- est_prop$Est.prop.weighted\n+ estimated_nnls_props <- est_prop$Est.prop.allgene\n+ ## -\n+ fact_data <- set_factor_data(bulk_data, factor_group)\n+ ## -\n+ return(list(est_music = estimated_music_props,\n+ est_nnls = estimated_nnls_props,\n+ bulk_sample_totals = colSums(exprs(bulk_data)),\n+ plot_groups = fact_data))\n+}\n+\n+music_on_all <- function(files) {\n+ results <- list()\n+ for (sc_name in names(files)) {\n+ cat(paste0("sc-group:", sc_name, "\\n"))\n+ scgroup <- files[[sc_name]]\n+ ## - sc Data\n+ sc_est <- readRDS(scgroup$dataset)\n+ ## - params\n+ celltypes_label <- scgroup$label_cell\n+ samples_label <- scgroup$label_sample\n+ celltypes <- scgroup$celltype\n+\n+ results[[sc_name]] <- list()\n+ for (bulk_name in names(scgroup$bulk)) {\n+ cat(paste0(" - bulk-group:", bulk_name, "\\n"))\n+ bulkgroup <- scgroup$bulk[[bulk_name]]\n+ ## - bulk Data\n+ bulk_est <- readRDS(bulkgroup$dataset)\n+ ## - bulk params\n+ pheno_facts <- bulkgroup$pheno_facts\n+ pheno_excl <- bulkgroup$pheno_excl\n+ ##\n+ results[[sc_name]][[bulk_name]] <- process_pair(\n+ sc_est, bulk_est,\n+ celltypes_label, samples_label,\n+ celltypes, bulkgroup$factor_group)\n+ ##\n+ rm(bulk_est) ## unload\n+ }\n+ rm(sc_est) ## unload\n+ }\n+ return(results)\n+}\n+\n+plot_all_individual_heatmaps <- function(results) {\n+ pdf(out_heatmulti_pdf, width = 8, height = 8)\n+ for (sc_name in names(results)) {\n+ for (bk_name in names(results[[sc_name]])) {\n+ res <- results[[sc_name]][[bk_name]]\n+ plot_hmap <- Prop_heat_Est(\n+ data.matrix(res[[method_key]]), method.name = est_method) +\n+ ggtitle(paste0("[", est_method, "Cell type ",\n+ '..b'\n+ B$p2 <- common1 + geom_boxplot(aes(y = Bulk, color = Cell)) +\n+ ylab("Bulk Dataset")\n+ ## -- Factor plots are optional\n+ A$p3 <- B$p3 <- A$p4 <- B$p4 <- ggplot() + theme_void()\n+\n+ if (do_factors) {\n+ A$p3 <- common1 + geom_boxplot(aes(y = Cell, color = Factors))\n+ A$p4 <- common2 + geom_boxplot(aes(y = Cell, color = Factors))\n+ B$p3 <- common1 + geom_boxplot(aes(y = Bulk, color = Factors)) +\n+ ylab("Bulk Dataset")\n+ B$p4 <- common2 + geom_boxplot(aes(y = Bulk, color = Factors)) +\n+ ylab("Bulk Dataset")\n+ }\n+\n+ title_a <- "Cell Types against Bulk"\n+ title_b <- "Bulk Datasets against Cells"\n+ if (do_factors) {\n+ title_a <- paste0(title_a, " and Factors")\n+ title_b <- paste0(title_b, " and Factors")\n+ }\n+\n+ a_all <- plot_grid(ggdraw() + draw_label(title_a, fontface = "bold"),\n+ plot_grid(plotlist = A, ncol = 2),\n+ ncol = 1, rel_heights = c(0.05, 0.95))\n+ b_all <- plot_grid(ggdraw() + draw_label(title_b, fontface = "bold"),\n+ plot_grid(plotlist = B, ncol = 2),\n+ ncol = 1, rel_heights = c(0.05, 0.95))\n+ return(list(cell = a_all, bulk = b_all))\n+}\n+\n+filter_output <- function(grudat_spread_melt, out_filt) {\n+ print_red <- function(comment, red_list) {\n+ cat(paste(comment, paste(red_list, collapse = ", "), "\\n"))\n+ }\n+ grudat_filt <- grudat_spread_melt\n+ print_red("Total Cell types:", unique(grudat_filt$Cell))\n+ if (!is.null(out_filt$cells)) {\n+ grudat_filt <- grudat_filt[grudat_filt$Cell %in% out_filt$cells, ]\n+ print_red(" - selecting:", out_filt$cells)\n+ }\n+ print_red("Total Factors:", unique(grudat_spread_melt$Factors))\n+ if (!is.null(out_filt$facts)) {\n+ grudat_filt <- grudat_filt[grudat_filt$Factors %in% out_filt$facts, ]\n+ print_red(" - selecting:", out_filt$facts)\n+ }\n+ return(grudat_filt)\n+}\n+\n+\n+results <- music_on_all(files)\n+\n+if (heat_grouped_p) {\n+ plot_grouped_heatmaps(results)\n+} else {\n+ plot_all_individual_heatmaps(results)\n+}\n+\n+save.image("/tmp/sesh.rds")\n+\n+summat <- summarized_matrix(results)\n+grudat <- group_by_dataset(summat)\n+grudat_spread_melt <- merge_factors_spread(grudat$spread,\n+ flatten_factor_list(results))\n+\n+\n+\n+## The output filters ONLY apply to boxplots, since these take\n+do_factors <- (length(unique(grudat_spread_melt[["Factors"]])) > 1)\n+\n+grudat_spread_melt_filt <- filter_output(grudat_spread_melt, out_filt)\n+\n+heat_maps <- summarize_heatmaps(grudat_spread_melt_filt, do_factors)\n+box_plots <- summarize_boxplots(grudat_spread_melt_filt, do_factors)\n+\n+pdf(out_heatsumm_pdf, width = 14, height = 14)\n+print(heat_maps)\n+print(box_plots)\n+dev.off()\n+\n+## Generate output tables\n+stats_prop <- lapply(grudat$spread$prop, function(x) {\n+ t(apply(x, 1, summary))})\n+stats_scale <- lapply(grudat$spread$scale, function(x) {\n+ t(apply(x, 1, summary))})\n+\n+writable2 <- function(obj, prefix, title) {\n+ write.table(obj,\n+ file = paste0("report_data/", prefix, "_",\n+ title, ".tabular"),\n+ quote = F, sep = "\\t", col.names = NA)\n+}\n+## Make the value table printable\n+grudat_spread_melt$value.scale <- as.integer(grudat_spread_melt$value.scale) # nolint\n+colnames(grudat_spread_melt) <- c("Sample", "Cell", "Bulk", "Factors",\n+ "CT Prop in Sample", "Number of Reads")\n+\n+writable2(grudat_spread_melt, "values", "Data Table")\n+writable2(summat$prop, "values", "Matrix of Cell Type Sample Proportions")\n+writable2({\n+ aa <- as.matrix(summat$scaled); mode(aa) <- "integer"; aa\n+}, "values", "Matrix of Cell Type Read Counts")\n+\n+for (bname in names(stats_prop)) {\n+ writable2(stats_prop[[bname]], "stats", paste0(bname, ": Sample Props"))\n+ writable2(stats_scale[[bname]], "stats", paste0(bname, ": Read Props"))\n+}\n'

diff -r 7902cd31b9b5 -r 7ffaa0968da3 scripts/estimateprops.R
--- a/scripts/estimateprops.R Sat Jan 29 12:52:10 2022 +0000
+++ b/scripts/estimateprops.R Thu Feb 10 12:53:22 2022 +0000

[

b'@@ -14,8 +14,12 @@\n clusters = celltypes_label,\n samples = samples_label, select.ct = celltypes, verbose = T)\n \n+\n estimated_music_props <- est_prop$Est.prop.weighted\n estimated_nnls_props <- est_prop$Est.prop.allgene\n+##\n+estimated_music_props_flat <- melt(estimated_music_props)\n+estimated_nnls_props_flat <- melt(estimated_nnls_props)\n \n scale_yaxes <- function(gplot, value) {\n if (is.na(value)) {\n@@ -25,23 +29,36 @@\n }\n }\n \n+sieve_data <- function(func, music_data, nnls_data) {\n+ if (func == "list") {\n+ res <- list(if ("MuSiC" %in% methods) music_data else NULL,\n+ if ("NNLS" %in% methods) nnls_data else NULL)\n+ res[lengths(res) > 0] ## filter out NULL elements\n+ } else if (func == "rbind") {\n+ rbind(if ("MuSiC" %in% methods) music_data else NULL,\n+ if ("NNLS" %in% methods) nnls_data else NULL)\n+ } else if (func == "c") {\n+ c(if ("MuSiC" %in% methods) music_data else NULL,\n+ if ("NNLS" %in% methods) nnls_data else NULL)\n+ }\n+}\n+\n+\n ## Show different in estimation methods\n ## Jitter plot of estimated cell type proportions\n jitter_fig <- scale_yaxes(Jitter_Est(\n- list(data.matrix(estimated_music_props),\n- data.matrix(estimated_nnls_props)),\n+ sieve_data("list",\n+ data.matrix(estimated_music_props),\n+ data.matrix(estimated_nnls_props)),\n method.name = methods, title = "Jitter plot of Est Proportions",\n size = 2, alpha = 0.7) + theme_minimal(), maxyscale)\n \n-\n ## Make a Plot\n ## A more sophisticated jitter plot is provided as below. We separated\n ## the T2D subjects and normal subjects by their disease factor levels.\n-estimated_music_props_flat <- melt(estimated_music_props)\n-estimated_nnls_props_flat <- melt(estimated_nnls_props)\n-\n-m_prop <- rbind(estimated_music_props_flat,\n- estimated_nnls_props_flat)\n+m_prop <- sieve_data("rbind",\n+ estimated_music_props_flat,\n+ estimated_nnls_props_flat)\n colnames(m_prop) <- c("Sub", "CellType", "Prop")\n \n if (is.null(celltypes)) {\n@@ -69,7 +86,7 @@\n phenotype_target_threshold <- -Inf\n message("phenotype target threshold set to -Inf")\n }\n-\n+ ## the "2" here is to do with the sample groups, not number of methods\n m_prop$Disease_factor <- rep(bulk_eset[[phenotype_target]], 2 * length(celltypes)) # nolint\n m_prop <- m_prop[!is.na(m_prop$Disease_factor), ]\n ## Generate a TRUE/FALSE table of Normal == 1 and Disease == 2\n@@ -84,8 +101,10 @@\n m_prop <- rbind(subset(m_prop, Disease != sample_disease_group),\n subset(m_prop, Disease == sample_disease_group))\n \n- jitter_new <- scale_yaxes(ggplot(m_prop, aes(Method, Prop)) +\n- geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease),\n+ jitter_new <- scale_yaxes(\n+ ggplot(m_prop, aes(Method, Prop)) +\n+ geom_point(aes(fill = Method, color = Disease,\n+ stroke = D, shape = Disease),\n size = 2, alpha = 0.7,\n position = position_jitter(width = 0.25, height = 0)) +\n facet_wrap(~ CellType, scales = "free") +\n@@ -100,21 +119,23 @@\n ## Create dataframe for beta cell proportions and Disease_factor levels\n ## - Ugly code. Essentially, doubles the cell type proportions for each\n ## set of MuSiC and NNLS methods\n- m_prop_ana <- data.frame(pData(bulk_eset)[rep(1:nrow(estimated_music_props), 2), #nolint\n- phenotype_factors],\n- ## get proportions of target cell type\n- ct.prop = c(estimated_music_props[, phenotype_scrna_target],\n- estimated_nnls_props[, phenotype_scrna_target]),\n- ##\n- Method = factor(rep(methods,\n- each = nrow(estimated_music_'..b' geom_smooth(method = "lm", se = FALSE, col = "black", lwd = 0.25) +\n- geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease),\n+ geom_point(aes(fill = Method, color = Disease,\n+ stroke = D, shape = Disease),\n size = 2, alpha = 0.7) + facet_wrap(~ Method) +\n ggtitle(paste0(toupper(phenotype_target), " vs. ",\n- toupper(phenotype_scrna_target), " Cell Type Proportion")) +\n+ toupper(phenotype_scrna_target),\n+ " Cell Type Proportion")) +\n theme_minimal() +\n ylab(paste0("Proportion of ",\n phenotype_scrna_target, " cells")) +\n@@ -138,19 +162,22 @@\n }\n \n ## BoxPlot\n-plot_box <- scale_yaxes(Boxplot_Est(list(\n- data.matrix(estimated_music_props),\n- data.matrix(estimated_nnls_props)),\n- method.name = c("MuSiC", "NNLS")) +\n+plot_box <- scale_yaxes(Boxplot_Est(\n+ sieve_data("list",\n+ data.matrix(estimated_music_props),\n+ data.matrix(estimated_nnls_props)),\n+ method.name = methods) +\n theme(axis.text.x = element_text(angle = -90),\n axis.text.y = element_text(size = 8)) +\n ggtitle(element_blank()) + theme_minimal(), maxyscale)\n \n ## Heatmap\n-plot_hmap <- Prop_heat_Est(list(\n- data.matrix(estimated_music_props),\n- data.matrix(estimated_nnls_props)),\n- method.name = c("MuSiC", "NNLS")) +\n+plot_hmap <- Prop_heat_Est(\n+ sieve_data(\n+ "list",\n+ data.matrix(estimated_music_props),\n+ data.matrix(estimated_nnls_props)),\n+ method.name = methods) +\n theme(axis.text.x = element_text(angle = -90),\n axis.text.y = element_text(size = 6))\n \n@@ -167,33 +194,29 @@\n plot_hmap\n message(dev.off())\n \n-## Output Proportions\n+writable <- function(obj, prefix, title) {\n+ write.table(obj,\n+ file = paste0("report_data/", prefix, "_",\n+ title, ".tabular"),\n+ quote = F, sep = "\\t", col.names = NA)\n+}\n \n-write.table(est_prop$Est.prop.weighted,\n- file = paste0("report_data/prop_",\n- "Music Estimated Proportions of Cell Types",\n- ".tabular"),\n- quote = F, sep = "\\t", col.names = NA)\n-write.table(est_prop$Est.prop.allgene,\n- file = paste0("report_data/prop_",\n- "NNLS Estimated Proportions of Cell Types",\n- ".tabular"),\n- quote = F, sep = "\\t", col.names = NA)\n-write.table(est_prop$Weight.gene,\n- file = paste0("report_data/weightgene_",\n- "Music Estimated Proportions of Cell Types (by Gene)",\n- ".tabular"),\n- quote = F, sep = "\\t", col.names = NA)\n-write.table(est_prop$r.squared.full,\n- file = paste0("report_data/rsquared_",\n- "Music R-sqr Estimated Proportions of Each Subject",\n- ".tabular"),\n- quote = F, sep = "\\t", col.names = NA)\n-write.table(est_prop$Var.prop,\n- file = paste0("report_data/varprop_",\n- "Matrix of Variance of MuSiC Estimates",\n- ".tabular"),\n- quote = F, sep = "\\t", col.names = NA)\n+## Output Proportions\n+if ("NNLS" %in% methods) {\n+ writable(est_prop$Est.prop.allgene, "prop",\n+ "NNLS Estimated Proportions of Cell Types")\n+}\n+\n+if ("MuSiC" %in% methods) {\n+ writable(est_prop$Est.prop.weighted, "prop",\n+ "Music Estimated Proportions of Cell Types")\n+ writable(est_prop$Weight.gene, "weightgene",\n+ "Music Estimated Proportions of Cell Types (by Gene)")\n+ writable(est_prop$r.squared.full, "rsquared",\n+ "Music R-sqr Estimated Proportions of Each Subject")\n+ writable(est_prop$Var.prop, "varprop",\n+ "Matrix of Variance of MuSiC Estimates")\n+}\n \n \n if (use_disease_factor) {\n'

diff -r 7902cd31b9b5 -r 7ffaa0968da3 test-data/default_output_no_disease_nnls.pdf

Binary file test-data/default_output_no_disease_nnls.pdf has changed

diff -r 7902cd31b9b5 -r 7ffaa0968da3 test-data/out_filt1.pdf

Binary file test-data/out_filt1.pdf has changed

diff -r 7902cd31b9b5 -r 7ffaa0968da3 test-data/out_heat2.pdf

Binary file test-data/out_heat2.pdf has changed