comparison scripts/estimateprops.R @ 1:3ca0132c182a draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/music/ commit 683bb72ae92b5759a239b7e3bf4c5a229ed35b54"
author bgruening
date Fri, 26 Nov 2021 15:54:51 +0000
parents 224721e76869
children fd7a16d073c5
comparison
equal deleted inserted replaced
0:224721e76869 1:3ca0132c182a
12 est_prop <- music_prop( 12 est_prop <- music_prop(
13 bulk.eset = bulk_eset, sc.eset = scrna_eset, 13 bulk.eset = bulk_eset, sc.eset = scrna_eset,
14 clusters = celltypes_label, 14 clusters = celltypes_label,
15 samples = samples_label, select.ct = celltypes, verbose = T) 15 samples = samples_label, select.ct = celltypes, verbose = T)
16 16
17 estimated_music_props <- est_prop$Est.prop.weighted
18 estimated_nnls_props <- est_prop$Est.prop.allgene
17 19
18 ## Show different in estimation methods 20 ## Show different in estimation methods
19 ## Jitter plot of estimated cell type proportions 21 ## Jitter plot of estimated cell type proportions
20 jitter.fig <- Jitter_Est( 22 jitter_fig <- Jitter_Est(
21 list(data.matrix(est_prop$Est.prop.weighted), 23 list(data.matrix(estimated_music_props),
22 data.matrix(est_prop$Est.prop.allgene)), 24 data.matrix(estimated_nnls_props)),
23 method.name = methods, title = "Jitter plot of Est Proportions") 25 method.name = methods, title = "Jitter plot of Est Proportions",
26 size = 2, alpha = 0.7) + theme_minimal()
24 27
25 28
26 ## Make a Plot 29 ## Make a Plot
27 ## A more sophisticated jitter plot is provided as below. We separated 30 ## A more sophisticated jitter plot is provided as below. We separated
28 ## the T2D subjects and normal subjects by their HbA1c levels. 31 ## the T2D subjects and normal subjects by their disease factor levels.
29 m_prop <- rbind(melt(est_prop$Est.prop.weighted), 32 estimated_music_props_flat <- melt(estimated_music_props)
30 melt(est_prop$Est.prop.allgene)) 33 estimated_nnls_props_flat <- melt(estimated_nnls_props)
31 34
35 m_prop <- rbind(estimated_music_props_flat,
36 estimated_nnls_props_flat)
32 colnames(m_prop) <- c("Sub", "CellType", "Prop") 37 colnames(m_prop) <- c("Sub", "CellType", "Prop")
33 38
39 if (is.null(celltypes)) {
40 celltypes <- levels(m_prop$CellType)
41 message("No celltypes declared, using:")
42 message(celltypes)
43 }
44
45 if (phenotype_target_threshold == -99) {
46 phenotype_target_threshold <- -Inf
47 message("phenotype target threshold set to -Inf")
48 }
49
50 if (is.null(phenotype_factors)) {
51 phenotype_factors <- colnames(pData(bulk_eset))
52 }
53 ## filter out unwanted factors like "sampleID" and "subjectName"
54 phenotype_factors <- phenotype_factors[
55 !(phenotype_factors %in% phenotype_factors_always_exclude)]
56 message("Phenotype Factors to use:")
57 message(phenotype_factors)
58
59
34 m_prop$CellType <- factor(m_prop$CellType, levels = celltypes) # nolint 60 m_prop$CellType <- factor(m_prop$CellType, levels = celltypes) # nolint
35 m_prop$Method <- factor(rep(methods, each = 89 * 6), levels = methods) # nolint 61 m_prop$Method <- factor(rep(methods, each = nrow(estimated_music_props_flat)), # nolint
36 m_prop$HbA1c <- rep(bulk_eset$hba1c, 2 * 6) # nolint 62 levels = methods)
37 m_prop <- m_prop[!is.na(m_prop$HbA1c), ] 63 m_prop$Disease_factor <- rep(bulk_eset[[phenotype_target]], 2 * length(celltypes)) # nolint
38 m_prop$Disease <- factor(sample_groups[(m_prop$HbA1c > 6.5) + 1], # nolint 64 m_prop <- m_prop[!is.na(m_prop$Disease_factor), ]
65 ## Generate a TRUE/FALSE table of Normal == 1 and Disease == 2
66 sample_groups <- c("Normal", sample_disease_group)
67 m_prop$Disease <- factor(sample_groups[(m_prop$Disease_factor > phenotype_target_threshold) + 1], # nolint
39 levels = sample_groups) 68 levels = sample_groups)
40 69
70 ## Binary to scale: e.g. TRUE / 5 = 0.2
41 m_prop$D <- (m_prop$Disease == # nolint 71 m_prop$D <- (m_prop$Disease == # nolint
42 sample_disease_group) / sample_disease_group_scale 72 sample_disease_group) / sample_disease_group_scale
43 m_prop <- rbind(subset(m_prop, Disease == healthy_phenotype), 73 ## NA's are not included in the comparison below
44 subset(m_prop, Disease != healthy_phenotype)) 74 m_prop <- rbind(subset(m_prop, Disease != sample_disease_group),
75 subset(m_prop, Disease == sample_disease_group))
45 76
46 jitter.new <- ggplot(m_prop, aes(Method, Prop)) + 77 jitter_new <- ggplot(m_prop, aes(Method, Prop)) +
47 geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease), 78 geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease),
48 size = 2, alpha = 0.7, 79 size = 2, alpha = 0.7,
49 position = position_jitter(width = 0.25, height = 0)) + 80 position = position_jitter(width = 0.25, height = 0)) +
50 facet_wrap(~ CellType, scales = "free") + 81 facet_wrap(~ CellType, scales = "free") +
51 scale_colour_manual(values = c("white", "gray20")) + 82 scale_colour_manual(values = c("white", "gray20")) +
52 scale_shape_manual(values = c(21, 24)) + theme_minimal() 83 scale_shape_manual(values = c(21, 24)) + theme_minimal()
53 84
54 ## Plot to compare method effectiveness 85 ## Plot to compare method effectiveness
55 ## Create dataframe for beta cell proportions and HbA1c levels 86 ## Create dataframe for beta cell proportions and Disease_factor levels
56 m_prop_ana <- data.frame(pData(bulk_eset)[rep(1:89, 2), phenotype_factors], 87 m_prop_ana <- data.frame(pData(bulk_eset)[rep(1:nrow(estimated_music_props), 2), #nolint
57 ct.prop = c(est_prop$Est.prop.weighted[, 2], 88 phenotype_factors],
58 est_prop$Est.prop.allgene[, 2]), 89 ct.prop = c(estimated_music_props[, 2],
59 Method = factor(rep(methods, each = 89), 90 estimated_nnls_props[, 2]),
91 Method = factor(rep(methods,
92 each = nrow(estimated_music_props)),
60 levels = methods)) 93 levels = methods))
61 colnames(m_prop_ana)[1:4] <- phenotype_factors 94 colnames(m_prop_ana)[1:length(phenotype_factors)] <- phenotype_factors #nolint
62 m_prop_ana <- subset(m_prop_ana, !is.na(m_prop_ana[phenotype_gene])) 95 m_prop_ana <- subset(m_prop_ana, !is.na(m_prop_ana[phenotype_target]))
63 m_prop_ana$Disease <- factor(sample_groups[( # nolint 96 m_prop_ana$Disease <- factor(sample_groups[( # nolint
64 m_prop_ana[phenotype_gene] > 6.5) + 1], sample_groups) 97 m_prop_ana[phenotype_target] > phenotype_target_threshold) + 1],
98 sample_groups)
65 m_prop_ana$D <- (m_prop_ana$Disease == # nolint 99 m_prop_ana$D <- (m_prop_ana$Disease == # nolint
66 sample_disease_group) / sample_disease_group_scale 100 sample_disease_group) / sample_disease_group_scale
67 101
68 jitt_compare <- ggplot(m_prop_ana, aes_string(phenotype_gene, "ct.prop")) + 102 jitt_compare <- ggplot(m_prop_ana, aes_string(phenotype_target, "ct.prop")) +
69 geom_smooth(method = "lm", se = FALSE, col = "black", lwd = 0.25) + 103 geom_smooth(method = "lm", se = FALSE, col = "black", lwd = 0.25) +
70 geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease), 104 geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease),
71 size = 2, alpha = 0.7) + facet_wrap(~ Method) + 105 size = 2, alpha = 0.7) + facet_wrap(~ Method) +
72 ggtitle(compare_title) + theme_minimal() + 106 ggtitle(compare_title) + theme_minimal() +
73 scale_colour_manual(values = c("white", "gray20")) + 107 scale_colour_manual(values = c("white", "gray20")) +
74 scale_shape_manual(values = c(21, 24)) 108 scale_shape_manual(values = c(21, 24))
75 109
110 ## BoxPlot
111 plot_box <- Boxplot_Est(list(
112 data.matrix(estimated_music_props),
113 data.matrix(estimated_nnls_props)),
114 method.name = c("MuSiC", "NNLS")) +
115 theme(axis.text.x = element_text(angle = -90),
116 axis.text.y = element_text(size = 8)) +
117 ggtitle(element_blank()) + theme_minimal()
118
119 ## Heatmap
120 plot_hmap <- Prop_heat_Est(list(
121 data.matrix(estimated_music_props),
122 data.matrix(estimated_nnls_props)),
123 method.name = c("MuSiC", "NNLS")) +
124 theme(axis.text.x = element_text(angle = -90),
125 axis.text.y = element_text(size = 6))
76 126
77 pdf(file = outfile_pdf, width = 8, height = 8) 127 pdf(file = outfile_pdf, width = 8, height = 8)
78 plot_grid(jitter.fig, jitter.new, labels = "auto", ncol = 1, nrow = 2) 128 plot_grid(jitter_fig, plot_box, labels = "auto", ncol = 1, nrow = 2)
79 jitt_compare 129 plot_grid(jitter_new, jitt_compare, labels = "auto", ncol = 1, nrow = 2)
80 dev.off() 130 plot_hmap
131 message(dev.off())
132
133 ## Output Proportions
134
135 write.table(est_prop$Est.prop.weighted,
136 file = paste0("report_data/prop_",
137 "Music Estimated Proportions of Cell Types",
138 ".tabular"),
139 quote = F, sep = "\t", col.names = NA)
140 write.table(est_prop$Est.prop.allgene,
141 file = paste0("report_data/prop_",
142 "NNLS Estimated Proportions of Cell Types",
143 ".tabular"),
144 quote = F, sep = "\t", col.names = NA)
145 write.table(est_prop$Weight.gene,
146 file = paste0("report_data/weightgene_",
147 "Music Estimated Proportions of Cell Types (by Gene)",
148 ".tabular"),
149 quote = F, sep = "\t", col.names = NA)
150 write.table(est_prop$r.squared.full,
151 file = paste0("report_data/rsquared_",
152 "Music R-sqr Estimated Proportions of Each Subject",
153 ".tabular"),
154 quote = F, sep = "\t", col.names = NA)
155 write.table(est_prop$Var.prop,
156 file = paste0("report_data/varprop_",
157 "Matrix of Variance of MuSiC Estimates",
158 ".tabular"),
159 quote = F, sep = "\t", col.names = NA)
160
81 161
82 ## Summary table 162 ## Summary table
83 for (meth in methods) { 163 for (meth in methods) {
84 ##lm_beta_meth = lm(ct.prop ~ age + bmi + hba1c + gender, data = 164 ##lm_beta_meth = lm(ct.prop ~ age + bmi + hba1c + gender, data =
85 ##subset(m_prop_ana, Method == meth)) 165 sub_data <- subset(m_prop_ana, Method == meth)
166 ## We can only do regression where there are more than 1 factors
167 ## so we must find and exclude the ones which are not
168 gt1_facts <- sapply(phenotype_factors, function(facname) {
169 return(length(unique(sort(sub_data[[facname]]))) == 1)
170 })
171 form_factors <- phenotype_factors
172 exclude_facts <- names(gt1_facts)[gt1_facts]
173 if (length(exclude_facts) > 0) {
174 message("Factors with only one level will be excluded:")
175 message(exclude_facts)
176 form_factors <- phenotype_factors[
177 !(phenotype_factors %in% exclude_facts)]
178 }
86 lm_beta_meth <- lm(as.formula( 179 lm_beta_meth <- lm(as.formula(
87 paste("ct.prop", paste(phenotype_factors, collapse = " + "), 180 paste("ct.prop", paste(form_factors, collapse = " + "),
88 sep = " ~ ")), 181 sep = " ~ ")), data = sub_data)
89 data = subset(m_prop_ana, Method == meth)) 182 message(paste0("Summary: ", meth))
90 print(paste0("Summary: ", meth))
91 capture.output(summary(lm_beta_meth), 183 capture.output(summary(lm_beta_meth),
92 file = paste0("report_data/summ_", meth, ".txt")) 184 file = paste0("report_data/summ_Log of ",
185 meth,
186 " fitting.txt"))
93 } 187 }