diff scripts/dendrogram.R @ 1:3ca0132c182a draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/music/ commit 683bb72ae92b5759a239b7e3bf4c5a229ed35b54"
author bgruening
date Fri, 26 Nov 2021 15:54:51 +0000
parents 224721e76869
children fd7a16d073c5
line wrap: on
line diff
--- a/scripts/dendrogram.R	Sun Sep 12 19:48:48 2021 +0000
+++ b/scripts/dendrogram.R	Fri Nov 26 15:54:51 2021 +0000
@@ -17,31 +17,6 @@
 args <- commandArgs(trailingOnly = TRUE)
 source(args[1])
 
-## We then perform bulk tissue cell type estimation with pre-grouping
-## of cell types: C, list_of_cell_types, marker genes name, marker
-## genes list.
-## data.to.use = list(
-##     "C1" = list(cell.types = c("Neutro"),
-##                 marker.names=NULL,
-##                 marker.list=NULL),
-##     "C2" = list(cell.types = c("Podo"),
-##                 marker.names=NULL,
-##                 marker.list=NULL),
-##     "C3" = list(cell.types = c("Endo","CD-PC","LOH","CD-IC","DCT","PT"),
-##                 marker.names = "Epithelial",
-##                 marker.list = read_list("../test-data/epith.markers")),
-##     "C4" = list(cell.types = c("Macro","Fib","B lymph","NK","T lymph"),
-##                 marker.names = "Immune",
-##                 marker.list = read_list("../test-data/immune.markers"))
-## )
-grouped_celltypes <- lapply(data.to.use, function(x) {
-    x$cell.types
-})
-marker_groups <- lapply(data.to.use, function(x) {
-    x$marker.list
-})
-names(marker_groups) <- names(data.to.use)
-
 
 ## Perform the estimation
 ## Produce the first step information
@@ -51,33 +26,107 @@
 
 ## Plot the dendrogram of design matrix and cross-subject mean of
 ## realtive abundance
-par(mfrow = c(1, 2))
-d <- dist(t(log(sub.basis$Disgn.mtx + 1e-6)), method = "euclidean")
+## Hierarchical clustering using Complete Linkage
+d1 <- dist(t(log(sub.basis$Disgn.mtx + 1e-6)), method = "euclidean")
+hc1 <- hclust(d1, method = "complete")
 ## Hierarchical clustering using Complete Linkage
-hc1 <- hclust(d, method = "complete")
-## Plot the obtained dendrogram
-plot(hc1, cex = 0.6, hang = -1, main = "Cluster log(Design Matrix)")
-d <- dist(t(log(sub.basis$M.theta + 1e-8)), method = "euclidean")
-## Hierarchical clustering using Complete Linkage
-hc2 <- hclust(d, method = "complete")
-## Plot the obtained dendrogram
-pdf(file = outfile_pdf, width = 8, height = 8)
-plot(hc2, cex = 0.6, hang = -1, main = "Cluster log(Mean of RA)")
+d2 <- dist(t(log(sub.basis$M.theta + 1e-8)), method = "euclidean")
+hc2 <- hclust(d2, method = "complete")
+
 
-cl_type <- as.character(scrna_eset[[celltypes_label]])
+if (length(data.to.use) > 0) {
+    ## We then perform bulk tissue cell type estimation with pre-grouping
+    ## of cell types: C, list_of_cell_types, marker genes name, marker
+    ## genes list.
+    ## data.to.use = list(
+    ##     "C1" = list(cell.types = c("Neutro"),
+    ##                 marker.names=NULL,
+    ##                 marker.list=NULL),
+    ##     "C2" = list(cell.types = c("Podo"),
+    ##                 marker.names=NULL,
+    ##                 marker.list=NULL),
+    ##     "C3" = list(cell.types = c("Endo","CD-PC","LOH","CD-IC","DCT","PT"),
+    ##                 marker.names = "Epithelial",
+    ##                 marker.list = read_list("../test-data/epith.markers")),
+    ##     "C4" = list(cell.types = c("Macro","Fib","B lymph","NK","T lymph"),
+    ##                 marker.names = "Immune",
+    ##                 marker.list = read_list("../test-data/immune.markers"))
+    ## )
+    grouped_celltypes <- lapply(data.to.use, function(x) {
+        x$cell.types
+    })
+    marker_groups <- lapply(data.to.use, function(x) {
+        x$marker.list
+    })
+    names(marker_groups) <- names(data.to.use)
+
+
+    cl_type <- as.character(scrna_eset[[celltypes_label]])
+
+    for (cl in seq_len(length(grouped_celltypes))) {
+        cl_type[cl_type %in%
+                grouped_celltypes[[cl]]] <- names(grouped_celltypes)[cl]
+    }
+    pData(scrna_eset)[[clustertype_label]] <- factor(
+        cl_type, levels = c(names(grouped_celltypes),
+                            "CD-Trans", "Novel1", "Novel2"))
+
+    est_bulk <- music_prop.cluster(
+        bulk.eset = bulk_eset, sc.eset = scrna_eset,
+        group.markers = marker_groups, clusters = celltypes_label,
+        groups = clustertype_label, samples = samples_label,
+        clusters.type = grouped_celltypes
+    )
 
-for (cl in seq_len(length(grouped_celltypes))) {
-  cl_type[cl_type %in% grouped_celltypes[[cl]]] <- names(grouped_celltypes)[cl]
-}
-pData(scrna_eset)[[clustertype_label]] <- factor(
-    cl_type, levels = c(names(grouped_celltypes),
-                        "CD-Trans", "Novel1", "Novel2"))
+    estimated_music_props <- est_bulk$Est.prop.weighted.cluster
+    ## NNLS is not calculated here
+
+    ## Show different in estimation methods
+    ## Jitter plot of estimated cell type proportions
+    methods_list <- c("MuSiC")
+
+    jitter_fig <- Jitter_Est(
+        list(data.matrix(estimated_music_props)),
+        method.name = methods_list, title = "Jitter plot of Est Proportions",
+        size = 2, alpha = 0.7) +
+        theme_minimal() +
+        labs(x = element_blank(), y = element_blank()) +
+        theme(axis.text = element_text(size = 6),
+              axis.text.x = element_blank(),
+              legend.position = "none")
+
+    plot_box <- Boxplot_Est(list(
+        data.matrix(estimated_music_props)),
+        method.name = methods_list) +
+        theme_minimal() +
+        labs(x = element_blank(), y = element_blank()) +
+        theme(axis.text = element_text(size = 6),
+              axis.text.x = element_blank(),
+              legend.position = "none")
 
-est_bulk <- music_prop.cluster(
-    bulk.eset = bulk_eset, sc.eset = scrna_eset,
-    group.markers = marker_groups, clusters = celltypes_label,
-    groups = clustertype_label, samples = samples_label,
-    clusters.type = grouped_celltypes)
+    plot_hmap <- Prop_heat_Est(list(
+        data.matrix(estimated_music_props)),
+        method.name = methods_list) +
+        labs(x = element_blank(), y = element_blank()) +
+        theme(axis.text.y = element_text(size = 6),
+              axis.text.x = element_text(angle = -90, size = 5),
+              plot.title = element_text(size = 9),
+              legend.key.width = unit(0.15, "cm"),
+              legend.text = element_text(size = 5),
+              legend.title = element_text(size = 5))
 
-write.table(est_bulk, file = outfile_tab, quote = F, col.names = NA, sep = "\t")
-dev.off()
+}
+    
+pdf(file = outfile_pdf, width = 8, height = 8)
+par(mfrow = c(1, 2))
+plot(hc1, cex = 0.6, hang = -1, main = "Cluster log(Design Matrix)")
+plot(hc2, cex = 0.6, hang = -1, main = "Cluster log(Mean of RA)")
+if (length(data.to.use) > 0) {
+    plot_grid(jitter_fig, plot_box, plot_hmap, ncol = 2, nrow = 2)
+}
+message(dev.off())
+
+if (length(data.to.use) > 0) {
+    write.table(estimated_music_props,
+                file = outfile_tab, quote = F, col.names = NA, sep = "\t")
+}