comparison scripts/estimateprops.R @ 3:8c64a2da3869 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/music/ commit 8beed1a19fcd9dc59f7746e1dfa735a2d5f29784"
author bgruening
date Thu, 10 Feb 2022 12:52:56 +0000
parents 577435e5e6b2
children
comparison
equal deleted inserted replaced
2:577435e5e6b2 3:8c64a2da3869
12 est_prop <- music_prop( 12 est_prop <- music_prop(
13 bulk.eset = bulk_eset, sc.eset = scrna_eset, 13 bulk.eset = bulk_eset, sc.eset = scrna_eset,
14 clusters = celltypes_label, 14 clusters = celltypes_label,
15 samples = samples_label, select.ct = celltypes, verbose = T) 15 samples = samples_label, select.ct = celltypes, verbose = T)
16 16
17
17 estimated_music_props <- est_prop$Est.prop.weighted 18 estimated_music_props <- est_prop$Est.prop.weighted
18 estimated_nnls_props <- est_prop$Est.prop.allgene 19 estimated_nnls_props <- est_prop$Est.prop.allgene
20 ##
21 estimated_music_props_flat <- melt(estimated_music_props)
22 estimated_nnls_props_flat <- melt(estimated_nnls_props)
19 23
20 scale_yaxes <- function(gplot, value) { 24 scale_yaxes <- function(gplot, value) {
21 if (is.na(value)) { 25 if (is.na(value)) {
22 gplot 26 gplot
23 } else { 27 } else {
24 gplot + scale_y_continuous(lim = c(0, value)) 28 gplot + scale_y_continuous(lim = c(0, value))
25 } 29 }
26 } 30 }
27 31
32 sieve_data <- function(func, music_data, nnls_data) {
33 if (func == "list") {
34 res <- list(if ("MuSiC" %in% methods) music_data else NULL,
35 if ("NNLS" %in% methods) nnls_data else NULL)
36 res[lengths(res) > 0] ## filter out NULL elements
37 } else if (func == "rbind") {
38 rbind(if ("MuSiC" %in% methods) music_data else NULL,
39 if ("NNLS" %in% methods) nnls_data else NULL)
40 } else if (func == "c") {
41 c(if ("MuSiC" %in% methods) music_data else NULL,
42 if ("NNLS" %in% methods) nnls_data else NULL)
43 }
44 }
45
46
28 ## Show different in estimation methods 47 ## Show different in estimation methods
29 ## Jitter plot of estimated cell type proportions 48 ## Jitter plot of estimated cell type proportions
30 jitter_fig <- scale_yaxes(Jitter_Est( 49 jitter_fig <- scale_yaxes(Jitter_Est(
31 list(data.matrix(estimated_music_props), 50 sieve_data("list",
32 data.matrix(estimated_nnls_props)), 51 data.matrix(estimated_music_props),
52 data.matrix(estimated_nnls_props)),
33 method.name = methods, title = "Jitter plot of Est Proportions", 53 method.name = methods, title = "Jitter plot of Est Proportions",
34 size = 2, alpha = 0.7) + theme_minimal(), maxyscale) 54 size = 2, alpha = 0.7) + theme_minimal(), maxyscale)
35
36 55
37 ## Make a Plot 56 ## Make a Plot
38 ## A more sophisticated jitter plot is provided as below. We separated 57 ## A more sophisticated jitter plot is provided as below. We separated
39 ## the T2D subjects and normal subjects by their disease factor levels. 58 ## the T2D subjects and normal subjects by their disease factor levels.
40 estimated_music_props_flat <- melt(estimated_music_props) 59 m_prop <- sieve_data("rbind",
41 estimated_nnls_props_flat <- melt(estimated_nnls_props) 60 estimated_music_props_flat,
42 61 estimated_nnls_props_flat)
43 m_prop <- rbind(estimated_music_props_flat,
44 estimated_nnls_props_flat)
45 colnames(m_prop) <- c("Sub", "CellType", "Prop") 62 colnames(m_prop) <- c("Sub", "CellType", "Prop")
46 63
47 if (is.null(celltypes)) { 64 if (is.null(celltypes)) {
48 celltypes <- levels(m_prop$CellType) 65 celltypes <- levels(m_prop$CellType)
49 message("No celltypes declared, using:") 66 message("No celltypes declared, using:")
67 84
68 if (phenotype_target_threshold == -99) { 85 if (phenotype_target_threshold == -99) {
69 phenotype_target_threshold <- -Inf 86 phenotype_target_threshold <- -Inf
70 message("phenotype target threshold set to -Inf") 87 message("phenotype target threshold set to -Inf")
71 } 88 }
72 89 ## the "2" here is to do with the sample groups, not number of methods
73 m_prop$Disease_factor <- rep(bulk_eset[[phenotype_target]], 2 * length(celltypes)) # nolint 90 m_prop$Disease_factor <- rep(bulk_eset[[phenotype_target]], 2 * length(celltypes)) # nolint
74 m_prop <- m_prop[!is.na(m_prop$Disease_factor), ] 91 m_prop <- m_prop[!is.na(m_prop$Disease_factor), ]
75 ## Generate a TRUE/FALSE table of Normal == 1 and Disease == 2 92 ## Generate a TRUE/FALSE table of Normal == 1 and Disease == 2
76 sample_groups <- c("Normal", sample_disease_group) 93 sample_groups <- c("Normal", sample_disease_group)
77 m_prop$Disease <- factor(sample_groups[(m_prop$Disease_factor > phenotype_target_threshold) + 1], # nolint 94 m_prop$Disease <- factor(sample_groups[(m_prop$Disease_factor > phenotype_target_threshold) + 1], # nolint
82 sample_disease_group) / sample_disease_group_scale 99 sample_disease_group) / sample_disease_group_scale
83 ## NA's are not included in the comparison below 100 ## NA's are not included in the comparison below
84 m_prop <- rbind(subset(m_prop, Disease != sample_disease_group), 101 m_prop <- rbind(subset(m_prop, Disease != sample_disease_group),
85 subset(m_prop, Disease == sample_disease_group)) 102 subset(m_prop, Disease == sample_disease_group))
86 103
87 jitter_new <- scale_yaxes(ggplot(m_prop, aes(Method, Prop)) + 104 jitter_new <- scale_yaxes(
88 geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease), 105 ggplot(m_prop, aes(Method, Prop)) +
106 geom_point(aes(fill = Method, color = Disease,
107 stroke = D, shape = Disease),
89 size = 2, alpha = 0.7, 108 size = 2, alpha = 0.7,
90 position = position_jitter(width = 0.25, height = 0)) + 109 position = position_jitter(width = 0.25, height = 0)) +
91 facet_wrap(~ CellType, scales = "free") + 110 facet_wrap(~ CellType, scales = "free") +
92 scale_colour_manual(values = c("white", "gray20")) + 111 scale_colour_manual(values = c("white", "gray20")) +
93 scale_shape_manual(values = c(21, 24)) + theme_minimal(), maxyscale) 112 scale_shape_manual(values = c(21, 24)) + theme_minimal(), maxyscale)
98 117
99 ## Plot to compare method effectiveness 118 ## Plot to compare method effectiveness
100 ## Create dataframe for beta cell proportions and Disease_factor levels 119 ## Create dataframe for beta cell proportions and Disease_factor levels
101 ## - Ugly code. Essentially, doubles the cell type proportions for each 120 ## - Ugly code. Essentially, doubles the cell type proportions for each
102 ## set of MuSiC and NNLS methods 121 ## set of MuSiC and NNLS methods
103 m_prop_ana <- data.frame(pData(bulk_eset)[rep(1:nrow(estimated_music_props), 2), #nolint 122 m_prop_ana <- data.frame(
104 phenotype_factors], 123 pData(bulk_eset)[rep(1:nrow(estimated_music_props), length(methods)), #nolint
105 ## get proportions of target cell type 124 phenotype_factors],
106 ct.prop = c(estimated_music_props[, phenotype_scrna_target], 125 ## get proportions of target cell type
107 estimated_nnls_props[, phenotype_scrna_target]), 126 ct.prop = sieve_data("c",
108 ## 127 estimated_music_props[, phenotype_scrna_target],
109 Method = factor(rep(methods, 128 estimated_nnls_props[, phenotype_scrna_target]),
110 each = nrow(estimated_music_props)), 129 ##
111 levels = methods)) 130 Method = factor(rep(methods,
131 each = nrow(estimated_music_props)),
132 levels = methods))
112 ## - fix headers 133 ## - fix headers
113 colnames(m_prop_ana)[1:length(phenotype_factors)] <- phenotype_factors #nolint 134 colnames(m_prop_ana)[1:length(phenotype_factors)] <- phenotype_factors #nolint
114 ## - drop NA for target phenotype (e.g. hba1c) 135 ## - drop NA for target phenotype (e.g. hba1c)
115 m_prop_ana <- subset(m_prop_ana, !is.na(m_prop_ana[phenotype_target])) 136 m_prop_ana <- subset(m_prop_ana, !is.na(m_prop_ana[phenotype_target]))
116 m_prop_ana$Disease <- factor( # nolint 137 m_prop_ana$Disease <- factor( # nolint
117 ## - Here we set Normal/Disease assignments across the two MuSiC and NNLS methods 138 ## - Here we set Normal/Disease assignments across the methods
118 sample_groups[( 139 sample_groups[(
119 m_prop_ana[phenotype_target] > phenotype_target_threshold) + 1 140 m_prop_ana[phenotype_target] > phenotype_target_threshold) + 1
120 ], 141 ],
121 sample_groups) 142 sample_groups)
122 ## - Then we scale this binary assignment to a plotable factor 143 ## - Then we scale this binary assignment to a plotable factor
123 m_prop_ana$D <- (m_prop_ana$Disease == # nolint 144 m_prop_ana$D <- (m_prop_ana$Disease == # nolint
124 sample_disease_group) / sample_disease_group_scale 145 sample_disease_group) / sample_disease_group_scale
125 146
126 jitt_compare <- scale_yaxes(ggplot(m_prop_ana, aes_string(phenotype_target, "ct.prop")) + 147 jitt_compare <- scale_yaxes(
148 ggplot(m_prop_ana, aes_string(phenotype_target, "ct.prop")) +
127 geom_smooth(method = "lm", se = FALSE, col = "black", lwd = 0.25) + 149 geom_smooth(method = "lm", se = FALSE, col = "black", lwd = 0.25) +
128 geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease), 150 geom_point(aes(fill = Method, color = Disease,
151 stroke = D, shape = Disease),
129 size = 2, alpha = 0.7) + facet_wrap(~ Method) + 152 size = 2, alpha = 0.7) + facet_wrap(~ Method) +
130 ggtitle(paste0(toupper(phenotype_target), " vs. ", 153 ggtitle(paste0(toupper(phenotype_target), " vs. ",
131 toupper(phenotype_scrna_target), " Cell Type Proportion")) + 154 toupper(phenotype_scrna_target),
155 " Cell Type Proportion")) +
132 theme_minimal() + 156 theme_minimal() +
133 ylab(paste0("Proportion of ", 157 ylab(paste0("Proportion of ",
134 phenotype_scrna_target, " cells")) + 158 phenotype_scrna_target, " cells")) +
135 xlab(paste0("Level of bulk factor (", phenotype_target, ")")) + 159 xlab(paste0("Level of bulk factor (", phenotype_target, ")")) +
136 scale_colour_manual(values = c("white", "gray20")) + 160 scale_colour_manual(values = c("white", "gray20")) +
137 scale_shape_manual(values = c(21, 24)), maxyscale) 161 scale_shape_manual(values = c(21, 24)), maxyscale)
138 } 162 }
139 163
140 ## BoxPlot 164 ## BoxPlot
141 plot_box <- scale_yaxes(Boxplot_Est(list( 165 plot_box <- scale_yaxes(Boxplot_Est(
142 data.matrix(estimated_music_props), 166 sieve_data("list",
143 data.matrix(estimated_nnls_props)), 167 data.matrix(estimated_music_props),
144 method.name = c("MuSiC", "NNLS")) + 168 data.matrix(estimated_nnls_props)),
169 method.name = methods) +
145 theme(axis.text.x = element_text(angle = -90), 170 theme(axis.text.x = element_text(angle = -90),
146 axis.text.y = element_text(size = 8)) + 171 axis.text.y = element_text(size = 8)) +
147 ggtitle(element_blank()) + theme_minimal(), maxyscale) 172 ggtitle(element_blank()) + theme_minimal(), maxyscale)
148 173
149 ## Heatmap 174 ## Heatmap
150 plot_hmap <- Prop_heat_Est(list( 175 plot_hmap <- Prop_heat_Est(
151 data.matrix(estimated_music_props), 176 sieve_data(
152 data.matrix(estimated_nnls_props)), 177 "list",
153 method.name = c("MuSiC", "NNLS")) + 178 data.matrix(estimated_music_props),
179 data.matrix(estimated_nnls_props)),
180 method.name = methods) +
154 theme(axis.text.x = element_text(angle = -90), 181 theme(axis.text.x = element_text(angle = -90),
155 axis.text.y = element_text(size = 6)) 182 axis.text.y = element_text(size = 6))
156 183
157 pdf(file = outfile_pdf, width = 8, height = 8) 184 pdf(file = outfile_pdf, width = 8, height = 8)
158 if (length(celltypes) <= 8) { 185 if (length(celltypes) <= 8) {
165 plot_grid(jitter_new, jitt_compare, labels = "auto", ncol = 1, nrow = 2) 192 plot_grid(jitter_new, jitt_compare, labels = "auto", ncol = 1, nrow = 2)
166 } 193 }
167 plot_hmap 194 plot_hmap
168 message(dev.off()) 195 message(dev.off())
169 196
197 writable <- function(obj, prefix, title) {
198 write.table(obj,
199 file = paste0("report_data/", prefix, "_",
200 title, ".tabular"),
201 quote = F, sep = "\t", col.names = NA)
202 }
203
170 ## Output Proportions 204 ## Output Proportions
171 205 if ("NNLS" %in% methods) {
172 write.table(est_prop$Est.prop.weighted, 206 writable(est_prop$Est.prop.allgene, "prop",
173 file = paste0("report_data/prop_", 207 "NNLS Estimated Proportions of Cell Types")
174 "Music Estimated Proportions of Cell Types", 208 }
175 ".tabular"), 209
176 quote = F, sep = "\t", col.names = NA) 210 if ("MuSiC" %in% methods) {
177 write.table(est_prop$Est.prop.allgene, 211 writable(est_prop$Est.prop.weighted, "prop",
178 file = paste0("report_data/prop_", 212 "Music Estimated Proportions of Cell Types")
179 "NNLS Estimated Proportions of Cell Types", 213 writable(est_prop$Weight.gene, "weightgene",
180 ".tabular"), 214 "Music Estimated Proportions of Cell Types (by Gene)")
181 quote = F, sep = "\t", col.names = NA) 215 writable(est_prop$r.squared.full, "rsquared",
182 write.table(est_prop$Weight.gene, 216 "Music R-sqr Estimated Proportions of Each Subject")
183 file = paste0("report_data/weightgene_", 217 writable(est_prop$Var.prop, "varprop",
184 "Music Estimated Proportions of Cell Types (by Gene)", 218 "Matrix of Variance of MuSiC Estimates")
185 ".tabular"), 219 }
186 quote = F, sep = "\t", col.names = NA)
187 write.table(est_prop$r.squared.full,
188 file = paste0("report_data/rsquared_",
189 "Music R-sqr Estimated Proportions of Each Subject",
190 ".tabular"),
191 quote = F, sep = "\t", col.names = NA)
192 write.table(est_prop$Var.prop,
193 file = paste0("report_data/varprop_",
194 "Matrix of Variance of MuSiC Estimates",
195 ".tabular"),
196 quote = F, sep = "\t", col.names = NA)
197 220
198 221
199 if (use_disease_factor) { 222 if (use_disease_factor) {
200 ## Summary table of linear regressions of disease factors 223 ## Summary table of linear regressions of disease factors
201 for (meth in methods) { 224 for (meth in methods) {