comparison scripts/dendrogram.R @ 1:be91cb6f48e7 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/music/ commit 683bb72ae92b5759a239b7e3bf4c5a229ed35b54"
author bgruening
date Fri, 26 Nov 2021 15:55:11 +0000
parents 2cfd0db49bbc
children 7902cd31b9b5
comparison
equal deleted inserted replaced
0:2cfd0db49bbc 1:be91cb6f48e7
15 } 15 }
16 16
17 args <- commandArgs(trailingOnly = TRUE) 17 args <- commandArgs(trailingOnly = TRUE)
18 source(args[1]) 18 source(args[1])
19 19
20 ## We then perform bulk tissue cell type estimation with pre-grouping
21 ## of cell types: C, list_of_cell_types, marker genes name, marker
22 ## genes list.
23 ## data.to.use = list(
24 ## "C1" = list(cell.types = c("Neutro"),
25 ## marker.names=NULL,
26 ## marker.list=NULL),
27 ## "C2" = list(cell.types = c("Podo"),
28 ## marker.names=NULL,
29 ## marker.list=NULL),
30 ## "C3" = list(cell.types = c("Endo","CD-PC","LOH","CD-IC","DCT","PT"),
31 ## marker.names = "Epithelial",
32 ## marker.list = read_list("../test-data/epith.markers")),
33 ## "C4" = list(cell.types = c("Macro","Fib","B lymph","NK","T lymph"),
34 ## marker.names = "Immune",
35 ## marker.list = read_list("../test-data/immune.markers"))
36 ## )
37 grouped_celltypes <- lapply(data.to.use, function(x) {
38 x$cell.types
39 })
40 marker_groups <- lapply(data.to.use, function(x) {
41 x$marker.list
42 })
43 names(marker_groups) <- names(data.to.use)
44
45 20
46 ## Perform the estimation 21 ## Perform the estimation
47 ## Produce the first step information 22 ## Produce the first step information
48 sub.basis <- music_basis(scrna_eset, clusters = celltypes_label, 23 sub.basis <- music_basis(scrna_eset, clusters = celltypes_label,
49 samples = samples_label, 24 samples = samples_label,
50 select.ct = celltypes) 25 select.ct = celltypes)
51 26
52 ## Plot the dendrogram of design matrix and cross-subject mean of 27 ## Plot the dendrogram of design matrix and cross-subject mean of
53 ## realtive abundance 28 ## realtive abundance
29 ## Hierarchical clustering using Complete Linkage
30 d1 <- dist(t(log(sub.basis$Disgn.mtx + 1e-6)), method = "euclidean")
31 hc1 <- hclust(d1, method = "complete")
32 ## Hierarchical clustering using Complete Linkage
33 d2 <- dist(t(log(sub.basis$M.theta + 1e-8)), method = "euclidean")
34 hc2 <- hclust(d2, method = "complete")
35
36
37 if (length(data.to.use) > 0) {
38 ## We then perform bulk tissue cell type estimation with pre-grouping
39 ## of cell types: C, list_of_cell_types, marker genes name, marker
40 ## genes list.
41 ## data.to.use = list(
42 ## "C1" = list(cell.types = c("Neutro"),
43 ## marker.names=NULL,
44 ## marker.list=NULL),
45 ## "C2" = list(cell.types = c("Podo"),
46 ## marker.names=NULL,
47 ## marker.list=NULL),
48 ## "C3" = list(cell.types = c("Endo","CD-PC","LOH","CD-IC","DCT","PT"),
49 ## marker.names = "Epithelial",
50 ## marker.list = read_list("../test-data/epith.markers")),
51 ## "C4" = list(cell.types = c("Macro","Fib","B lymph","NK","T lymph"),
52 ## marker.names = "Immune",
53 ## marker.list = read_list("../test-data/immune.markers"))
54 ## )
55 grouped_celltypes <- lapply(data.to.use, function(x) {
56 x$cell.types
57 })
58 marker_groups <- lapply(data.to.use, function(x) {
59 x$marker.list
60 })
61 names(marker_groups) <- names(data.to.use)
62
63
64 cl_type <- as.character(scrna_eset[[celltypes_label]])
65
66 for (cl in seq_len(length(grouped_celltypes))) {
67 cl_type[cl_type %in%
68 grouped_celltypes[[cl]]] <- names(grouped_celltypes)[cl]
69 }
70 pData(scrna_eset)[[clustertype_label]] <- factor(
71 cl_type, levels = c(names(grouped_celltypes),
72 "CD-Trans", "Novel1", "Novel2"))
73
74 est_bulk <- music_prop.cluster(
75 bulk.eset = bulk_eset, sc.eset = scrna_eset,
76 group.markers = marker_groups, clusters = celltypes_label,
77 groups = clustertype_label, samples = samples_label,
78 clusters.type = grouped_celltypes
79 )
80
81 estimated_music_props <- est_bulk$Est.prop.weighted.cluster
82 ## NNLS is not calculated here
83
84 ## Show different in estimation methods
85 ## Jitter plot of estimated cell type proportions
86 methods_list <- c("MuSiC")
87
88 jitter_fig <- Jitter_Est(
89 list(data.matrix(estimated_music_props)),
90 method.name = methods_list, title = "Jitter plot of Est Proportions",
91 size = 2, alpha = 0.7) +
92 theme_minimal() +
93 labs(x = element_blank(), y = element_blank()) +
94 theme(axis.text = element_text(size = 6),
95 axis.text.x = element_blank(),
96 legend.position = "none")
97
98 plot_box <- Boxplot_Est(list(
99 data.matrix(estimated_music_props)),
100 method.name = methods_list) +
101 theme_minimal() +
102 labs(x = element_blank(), y = element_blank()) +
103 theme(axis.text = element_text(size = 6),
104 axis.text.x = element_blank(),
105 legend.position = "none")
106
107 plot_hmap <- Prop_heat_Est(list(
108 data.matrix(estimated_music_props)),
109 method.name = methods_list) +
110 labs(x = element_blank(), y = element_blank()) +
111 theme(axis.text.y = element_text(size = 6),
112 axis.text.x = element_text(angle = -90, size = 5),
113 plot.title = element_text(size = 9),
114 legend.key.width = unit(0.15, "cm"),
115 legend.text = element_text(size = 5),
116 legend.title = element_text(size = 5))
117
118 }
119
120 pdf(file = outfile_pdf, width = 8, height = 8)
54 par(mfrow = c(1, 2)) 121 par(mfrow = c(1, 2))
55 d <- dist(t(log(sub.basis$Disgn.mtx + 1e-6)), method = "euclidean")
56 ## Hierarchical clustering using Complete Linkage
57 hc1 <- hclust(d, method = "complete")
58 ## Plot the obtained dendrogram
59 plot(hc1, cex = 0.6, hang = -1, main = "Cluster log(Design Matrix)") 122 plot(hc1, cex = 0.6, hang = -1, main = "Cluster log(Design Matrix)")
60 d <- dist(t(log(sub.basis$M.theta + 1e-8)), method = "euclidean")
61 ## Hierarchical clustering using Complete Linkage
62 hc2 <- hclust(d, method = "complete")
63 ## Plot the obtained dendrogram
64 pdf(file = outfile_pdf, width = 8, height = 8)
65 plot(hc2, cex = 0.6, hang = -1, main = "Cluster log(Mean of RA)") 123 plot(hc2, cex = 0.6, hang = -1, main = "Cluster log(Mean of RA)")
124 if (length(data.to.use) > 0) {
125 plot_grid(jitter_fig, plot_box, plot_hmap, ncol = 2, nrow = 2)
126 }
127 message(dev.off())
66 128
67 cl_type <- as.character(scrna_eset[[celltypes_label]]) 129 if (length(data.to.use) > 0) {
68 130 write.table(estimated_music_props,
69 for (cl in seq_len(length(grouped_celltypes))) { 131 file = outfile_tab, quote = F, col.names = NA, sep = "\t")
70 cl_type[cl_type %in% grouped_celltypes[[cl]]] <- names(grouped_celltypes)[cl]
71 } 132 }
72 pData(scrna_eset)[[clustertype_label]] <- factor(
73 cl_type, levels = c(names(grouped_celltypes),
74 "CD-Trans", "Novel1", "Novel2"))
75
76 est_bulk <- music_prop.cluster(
77 bulk.eset = bulk_eset, sc.eset = scrna_eset,
78 group.markers = marker_groups, clusters = celltypes_label,
79 groups = clustertype_label, samples = samples_label,
80 clusters.type = grouped_celltypes)
81
82 write.table(est_bulk, file = outfile_tab, quote = F, col.names = NA, sep = "\t")
83 dev.off()