Mercurial > repos > dereeper > blup_calculator
comparison blupcal.R @ 0:45d215f2be74 draft
Uploaded
author | dereeper |
---|---|
date | Sat, 29 Dec 2018 18:44:05 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:45d215f2be74 |
---|---|
1 # Calculation of BLUE/BLUP | |
2 # Umesh Rosyara, CIMMYT | |
3 blupcal<- function(data, | |
4 Replication = "Rep", | |
5 Genotype = "Entry", | |
6 y = "y", | |
7 design = "rcbd", | |
8 Block = NULL, | |
9 summarizeby = NULL, | |
10 groupvar1 = NULL, | |
11 groupvar2 = NULL) { | |
12 | |
13 suppressMessages(library(arm)) | |
14 # so that it would not throw messages at the stderr channel of Galaxy | |
15 library(lme4, quietly = TRUE) | |
16 | |
17 # Basic summary of the variables eused | |
18 print(paste("Replication variable = ", Replication)) | |
19 print(paste("block variable = ", Block)) | |
20 print(paste("Genotype variable = ", Genotype)) | |
21 print(paste("summarizeby (included in the model) = ", summarizeby)) | |
22 print(paste("groupvariable 1 (included in the model) = ", groupvar1)) | |
23 print(paste("groupvariable 2 (included in the model) = ", groupvar2)) | |
24 print(paste("design = ", design)) | |
25 print(paste("y = ", y)) | |
26 | |
27 # Summary of data | |
28 print("*************************************") | |
29 print("*************************************") | |
30 print(summary(data)) | |
31 print("*************************************") | |
32 print("*************************************") | |
33 | |
34 data_list <- list() | |
35 if (length(summarizeby) != 0) { | |
36 data$summarizeby <- as.factor(data[,summarizeby]) | |
37 data_list = split(data, f = data$summarizeby) | |
38 } else { | |
39 data$summarizeby <- "none" | |
40 data_list[[1]] <- data | |
41 } | |
42 | |
43 all_results <- list() | |
44 | |
45 for (i in 1: length(data_list)) { | |
46 data1 <- data_list[[i]] | |
47 | |
48 data1$Rep <- as.factor(data1[, Replication]) | |
49 cat("\ndata1$Rep:\n") | |
50 print(data1$Rep) | |
51 | |
52 data1$Entry <- as.factor(data1[, Genotype]) | |
53 cat("\ndata1$Entry:\n") | |
54 print(data1$Entry) | |
55 | |
56 if (design == "lattice") { | |
57 data1$Subblock <- as.factor(data1[, Block]) | |
58 } | |
59 | |
60 data1$y <- as.numeric(data1[, y]) | |
61 cat("\ndata1$y:\n") | |
62 print(data1$y) | |
63 | |
64 if (length(groupvar1) != 0) { | |
65 data1$groupvar1 <- as.factor(data1[, groupvar1]) | |
66 cat("\ndata1$groupvar1:\n") | |
67 print(data1$groupvar1) | |
68 } | |
69 | |
70 if (length(groupvar2) != 0) { | |
71 data1$groupvar2 <- as.factor(data1[, groupvar2]) | |
72 cat("\ndata1$groupvar2:\n") | |
73 print(data1$groupvar2) | |
74 } | |
75 | |
76 if (design == "rcbd") { | |
77 if (length(groupvar1) != 0) { | |
78 if (length(groupvar2) != 0) { | |
79 fm1 <- lmer(y ~ 1 + | |
80 (1|Entry) + | |
81 (1|groupvar1) + | |
82 (1|groupvar2) + | |
83 (1|Entry:groupvar1) + | |
84 (1|Entry:groupvar2) + | |
85 (1|Entry:groupvar1:groupvar2) + | |
86 (1|Rep), | |
87 data1) | |
88 fm2 <- lmer(y ~ (-1) + | |
89 Entry + | |
90 (1|groupvar1) + | |
91 (1|groupvar2) + | |
92 (1|Entry:groupvar1) + | |
93 (1|Entry:groupvar2) + | |
94 (1|Entry:groupvar1:groupvar2) + | |
95 (1|Rep), | |
96 data1) | |
97 } | |
98 if (length(groupvar2) == 0) { | |
99 fm1 <- lmer(y ~ 1 + | |
100 (1|Entry) + | |
101 (1|groupvar1) + | |
102 (1|Entry:groupvar1) + | |
103 (1|Rep), | |
104 data1) | |
105 fm2 <- lmer(y ~ (-1) + | |
106 Entry + | |
107 (1|groupvar1) + | |
108 (1|Entry:groupvar1) + | |
109 (1|Rep), | |
110 data1) | |
111 } | |
112 } | |
113 if (length(groupvar1) == 0) { | |
114 if (length(groupvar2) != 0) { | |
115 fm1 <- lmer(y ~ 1 + | |
116 (1|Entry) + | |
117 (1|groupvar2) + | |
118 (1|Entry:groupvar2) + | |
119 (1|Rep), | |
120 data1) | |
121 fm2 <- lmer(y ~ (-1) + | |
122 Entry + | |
123 (1|groupvar2) + | |
124 (1|Entry:groupvar2) + | |
125 (1|Rep), | |
126 data1) | |
127 } | |
128 if (length(groupvar2) == 0) { | |
129 fm1 <- lmer(y ~ 1 + | |
130 (1|Entry) + | |
131 (1|Rep), | |
132 data1) | |
133 fm2 <- lmer(y ~ (-1) + | |
134 Entry + | |
135 (1|Rep), | |
136 data1) | |
137 } | |
138 } | |
139 } | |
140 | |
141 if (design == "lattice") { | |
142 if (length(groupvar1) != 0) { | |
143 if (length(groupvar2) != 0) { | |
144 fm1 <- lmer(y ~ 1 + | |
145 (1|Entry) + | |
146 (1|groupvar1) + | |
147 (1|groupvar2) + | |
148 (1|Entry:groupvar1) + | |
149 (1|Entry:groupvar2) + | |
150 (1|Entry:groupvar1:groupvar2) + | |
151 (1|Rep) + | |
152 (1|Rep:Subblock), | |
153 data1) | |
154 fm2 <- lmer(y ~ (-1) + | |
155 Entry + | |
156 (1|groupvar1) + | |
157 (1|groupvar2) + | |
158 (1|Entry:groupvar1) + | |
159 (1|Entry:groupvar2) + | |
160 (1|Entry:groupvar1:groupvar2) + | |
161 (1|Rep) + | |
162 (1|Rep:Subblock), | |
163 data1) | |
164 } | |
165 if (length(groupvar2) == 0) { | |
166 fm1 <- lmer(y ~ 1 + | |
167 (1|Entry) + | |
168 (1|groupvar1) + | |
169 (1|Entry:groupvar1) + | |
170 (1|Rep) + | |
171 (1|Rep:Subblock), | |
172 data1) | |
173 fm2 <- lmer(y ~ (-1) + | |
174 Entry + | |
175 (1|groupvar1) + | |
176 (1|Entry:groupvar1) + | |
177 (1|Rep) + | |
178 (1|Rep:Subblock), | |
179 data1) | |
180 } | |
181 } | |
182 if (length(groupvar1) == 0) { | |
183 if (length(groupvar2) != 0) { | |
184 fm1 <- lmer(y ~ 1 + | |
185 (1|Entry) + | |
186 (1|groupvar2) + | |
187 (1|Entry:groupvar2) + | |
188 (1|Rep) + | |
189 (1|Rep:Subblock), | |
190 data1) | |
191 fm2 <- lmer(y ~ (-1) + | |
192 Entry + | |
193 (1|groupvar2) + | |
194 (1|Entry:groupvar2) + | |
195 (1|Rep) + | |
196 (1|Rep:Subblock), | |
197 data1) | |
198 } | |
199 if (length(groupvar2) == 0) { | |
200 fm1 <- lmer(y ~ 1 + | |
201 (1|Entry) + | |
202 (1|Rep) + | |
203 (1|Rep:Subblock), | |
204 data1) | |
205 fm2 <- lmer(y ~ (-1) + | |
206 Entry + | |
207 (1|Rep) + | |
208 (1|Rep:Subblock), | |
209 data1) | |
210 } | |
211 } | |
212 } | |
213 | |
214 cat("\nfm1:\n") | |
215 print(fm1) | |
216 cat("\nfm2:\n") | |
217 print(fm2) | |
218 | |
219 mean1 <- mean(data1$y, na.rm = TRUE) | |
220 tp <- ranef(fm1)$Entry | |
221 if (all(tp == 0)) { | |
222 stop("error in model: all BLUP effects are zero") | |
223 } | |
224 | |
225 ######## | |
226 # BLUP | |
227 ######## | |
228 blup <- tp + mean1 | |
229 names(blup) <- "blup" | |
230 | |
231 varComponents <- as.data.frame(VarCorr(fm1)) | |
232 | |
233 # extract the genetic variance component | |
234 Vg <- varComponents[match('Entry', varComponents[,1]), 'vcov'] | |
235 | |
236 # This function extracts standard errors of model random effect from modeled object in lmer | |
237 SErr <- se.ranef(fm1)$Entry[,1] | |
238 | |
239 # Prediction Error Variance (PEV) | |
240 PEV <- (SErr) ^ 2 | |
241 | |
242 # PEV reliability | |
243 pevReliability <- 1 - (PEV / Vg) | |
244 names(pevReliability) <- "PEV reliability" | |
245 | |
246 blupdf = data.frame(genotype = rownames(ranef(fm1)$Entry), | |
247 blup = blup, | |
248 BLUP_PEV = PEV, | |
249 BLUP_pevReliability = pevReliability) | |
250 | |
251 ######## | |
252 # BLUE | |
253 ######## | |
254 blue <- fixef(fm2) | |
255 | |
256 bluedf <- data.frame(genotype = substr(names(blue), 6, nchar(names(blue))), | |
257 blue = blue) | |
258 | |
259 ######### | |
260 # MEANS | |
261 ######### | |
262 means <- with(data1, tapply(y, Entry, mean, na.rm = TRUE)) | |
263 | |
264 meandf <- data.frame(genotype = names(means), | |
265 means = means) | |
266 | |
267 resultdf1 <- merge(meandf, bluedf, by = "genotype") | |
268 resultdf <- merge(resultdf1, blupdf, by = "genotype") | |
269 | |
270 if (length(summarizeby) != 0) { | |
271 results <- data.frame(row.names = NULL, | |
272 genotype = resultdf$genotype, | |
273 blue = resultdf$blue, | |
274 blup = resultdf$blup, | |
275 BLUP_PEV = resultdf$BLUP_PEV, | |
276 pevReliability = resultdf$BLUP_pevReliability, | |
277 means = resultdf$means, | |
278 group = levels(data$summarizeby)[i]) | |
279 names(results) <- c(Genotype, | |
280 paste(y, "_blue", sep = ""), | |
281 paste(y, "_blup", sep = ""), | |
282 paste(y, "_PEV", sep = ""), | |
283 paste(y, "_pevReliability", sep = ""), | |
284 paste(y, "_means", sep = ""), | |
285 summarizeby) | |
286 } else { | |
287 results <- data.frame(row.names = NULL, | |
288 genotype = resultdf$genotype, | |
289 blue = resultdf$blue, | |
290 blup = resultdf$blup, | |
291 BLUP_PEV = resultdf$BLUP_PEV, | |
292 pevReliability = resultdf$BLUP_pevReliability, | |
293 means = resultdf$means) | |
294 names(results) <- c(Genotype, | |
295 paste(y, "_blue", sep = ""), | |
296 paste(y, "_blup", sep = ""), | |
297 paste(y, "_PEV", sep = ""), | |
298 paste(y, "_pevReliability", sep = ""), | |
299 paste(y, "_means", sep = "")) | |
300 } | |
301 | |
302 all_results[[i]] <- results | |
303 } | |
304 outdf <- do.call("rbind", all_results) | |
305 class(outdf) <- c("blupcal", class(outdf)) | |
306 return(outdf) | |
307 } | |
308 | |
309 # plot function of blupcal module | |
310 plot2.blupcal <- function(outdf) { | |
311 hist_with_box <- function(data, main = main, hist.col, box.col) { | |
312 histpar <- hist(data, plot = FALSE) | |
313 hist(data, col = hist.col, main = main, ylim = c(0, max(histpar$density) + max(histpar$density) * 0.3), prob = TRUE) | |
314 m = mean(data, na.rm = TRUE) | |
315 std = sd(data, na.rm = TRUE) | |
316 curve(dnorm(x, mean = m, sd = std), col = "yellow", lwd = 2, add = TRUE, yaxt = "n") | |
317 boxout <- boxplot(data, plot = FALSE) | |
318 points(boxout$out, y = rep(max(histpar$density) * 0.3, length(boxout$out)), col = "red", pch = 1) | |
319 texts <- paste("mean = ", round(mean(data, na.rm = TRUE), 2), " sd = ", round(sd(data, na.rm = TRUE), 2), " n = ", length(data)) | |
320 text(min(histpar$breaks), max(histpar$density) + max(histpar$density) * 0.2, labels = texts, pos = 4) | |
321 } | |
322 par(mfrow = c(3,1), mar = c(3.1, 3.1, 1.1, 2.1)) | |
323 hist_with_box(outdf[,2], main = names(outdf)[2], hist.col = "green4", box.col = "green1") | |
324 hist_with_box(outdf[,3], main = names(outdf)[3], hist.col = "blue4", box.col = "blue1") | |
325 hist_with_box(outdf[,4], main = names(outdf)[4], hist.col = "gray20", box.col = "gray60") | |
326 } |