0
|
1 # Calculation of BLUE/BLUP
|
|
2 # Umesh Rosyara, CIMMYT
|
|
3 blupcal<- function(data,
|
|
4 Replication = "Rep",
|
|
5 Genotype = "Entry",
|
|
6 y = "y",
|
|
7 design = "rcbd",
|
|
8 Block = NULL,
|
|
9 summarizeby = NULL,
|
|
10 groupvar1 = NULL,
|
|
11 groupvar2 = NULL) {
|
|
12
|
|
13 suppressMessages(library(arm))
|
|
14 # so that it would not throw messages at the stderr channel of Galaxy
|
|
15 library(lme4, quietly = TRUE)
|
|
16
|
|
17 # Basic summary of the variables eused
|
|
18 print(paste("Replication variable = ", Replication))
|
|
19 print(paste("block variable = ", Block))
|
|
20 print(paste("Genotype variable = ", Genotype))
|
|
21 print(paste("summarizeby (included in the model) = ", summarizeby))
|
|
22 print(paste("groupvariable 1 (included in the model) = ", groupvar1))
|
|
23 print(paste("groupvariable 2 (included in the model) = ", groupvar2))
|
|
24 print(paste("design = ", design))
|
|
25 print(paste("y = ", y))
|
|
26
|
|
27 # Summary of data
|
|
28 print("*************************************")
|
|
29 print("*************************************")
|
|
30 print(summary(data))
|
|
31 print("*************************************")
|
|
32 print("*************************************")
|
|
33
|
|
34 data_list <- list()
|
|
35 if (length(summarizeby) != 0) {
|
|
36 data$summarizeby <- as.factor(data[,summarizeby])
|
|
37 data_list = split(data, f = data$summarizeby)
|
|
38 } else {
|
|
39 data$summarizeby <- "none"
|
|
40 data_list[[1]] <- data
|
|
41 }
|
|
42
|
|
43 all_results <- list()
|
|
44
|
|
45 for (i in 1: length(data_list)) {
|
|
46 data1 <- data_list[[i]]
|
|
47
|
|
48 data1$Rep <- as.factor(data1[, Replication])
|
|
49 cat("\ndata1$Rep:\n")
|
|
50 print(data1$Rep)
|
|
51
|
|
52 data1$Entry <- as.factor(data1[, Genotype])
|
|
53 cat("\ndata1$Entry:\n")
|
|
54 print(data1$Entry)
|
|
55
|
|
56 if (design == "lattice") {
|
|
57 data1$Subblock <- as.factor(data1[, Block])
|
|
58 }
|
|
59
|
|
60 data1$y <- as.numeric(data1[, y])
|
|
61 cat("\ndata1$y:\n")
|
|
62 print(data1$y)
|
|
63
|
|
64 if (length(groupvar1) != 0) {
|
|
65 data1$groupvar1 <- as.factor(data1[, groupvar1])
|
|
66 cat("\ndata1$groupvar1:\n")
|
|
67 print(data1$groupvar1)
|
|
68 }
|
|
69
|
|
70 if (length(groupvar2) != 0) {
|
|
71 data1$groupvar2 <- as.factor(data1[, groupvar2])
|
|
72 cat("\ndata1$groupvar2:\n")
|
|
73 print(data1$groupvar2)
|
|
74 }
|
|
75
|
|
76 if (design == "rcbd") {
|
|
77 if (length(groupvar1) != 0) {
|
|
78 if (length(groupvar2) != 0) {
|
|
79 fm1 <- lmer(y ~ 1 +
|
|
80 (1|Entry) +
|
|
81 (1|groupvar1) +
|
|
82 (1|groupvar2) +
|
|
83 (1|Entry:groupvar1) +
|
|
84 (1|Entry:groupvar2) +
|
|
85 (1|Entry:groupvar1:groupvar2) +
|
|
86 (1|Rep),
|
|
87 data1)
|
|
88 fm2 <- lmer(y ~ (-1) +
|
|
89 Entry +
|
|
90 (1|groupvar1) +
|
|
91 (1|groupvar2) +
|
|
92 (1|Entry:groupvar1) +
|
|
93 (1|Entry:groupvar2) +
|
|
94 (1|Entry:groupvar1:groupvar2) +
|
|
95 (1|Rep),
|
|
96 data1)
|
|
97 }
|
|
98 if (length(groupvar2) == 0) {
|
|
99 fm1 <- lmer(y ~ 1 +
|
|
100 (1|Entry) +
|
|
101 (1|groupvar1) +
|
|
102 (1|Entry:groupvar1) +
|
|
103 (1|Rep),
|
|
104 data1)
|
|
105 fm2 <- lmer(y ~ (-1) +
|
|
106 Entry +
|
|
107 (1|groupvar1) +
|
|
108 (1|Entry:groupvar1) +
|
|
109 (1|Rep),
|
|
110 data1)
|
|
111 }
|
|
112 }
|
|
113 if (length(groupvar1) == 0) {
|
|
114 if (length(groupvar2) != 0) {
|
|
115 fm1 <- lmer(y ~ 1 +
|
|
116 (1|Entry) +
|
|
117 (1|groupvar2) +
|
|
118 (1|Entry:groupvar2) +
|
|
119 (1|Rep),
|
|
120 data1)
|
|
121 fm2 <- lmer(y ~ (-1) +
|
|
122 Entry +
|
|
123 (1|groupvar2) +
|
|
124 (1|Entry:groupvar2) +
|
|
125 (1|Rep),
|
|
126 data1)
|
|
127 }
|
|
128 if (length(groupvar2) == 0) {
|
|
129 fm1 <- lmer(y ~ 1 +
|
|
130 (1|Entry) +
|
|
131 (1|Rep),
|
|
132 data1)
|
|
133 fm2 <- lmer(y ~ (-1) +
|
|
134 Entry +
|
|
135 (1|Rep),
|
|
136 data1)
|
|
137 }
|
|
138 }
|
|
139 }
|
|
140
|
|
141 if (design == "lattice") {
|
|
142 if (length(groupvar1) != 0) {
|
|
143 if (length(groupvar2) != 0) {
|
|
144 fm1 <- lmer(y ~ 1 +
|
|
145 (1|Entry) +
|
|
146 (1|groupvar1) +
|
|
147 (1|groupvar2) +
|
|
148 (1|Entry:groupvar1) +
|
|
149 (1|Entry:groupvar2) +
|
|
150 (1|Entry:groupvar1:groupvar2) +
|
|
151 (1|Rep) +
|
|
152 (1|Rep:Subblock),
|
|
153 data1)
|
|
154 fm2 <- lmer(y ~ (-1) +
|
|
155 Entry +
|
|
156 (1|groupvar1) +
|
|
157 (1|groupvar2) +
|
|
158 (1|Entry:groupvar1) +
|
|
159 (1|Entry:groupvar2) +
|
|
160 (1|Entry:groupvar1:groupvar2) +
|
|
161 (1|Rep) +
|
|
162 (1|Rep:Subblock),
|
|
163 data1)
|
|
164 }
|
|
165 if (length(groupvar2) == 0) {
|
|
166 fm1 <- lmer(y ~ 1 +
|
|
167 (1|Entry) +
|
|
168 (1|groupvar1) +
|
|
169 (1|Entry:groupvar1) +
|
|
170 (1|Rep) +
|
|
171 (1|Rep:Subblock),
|
|
172 data1)
|
|
173 fm2 <- lmer(y ~ (-1) +
|
|
174 Entry +
|
|
175 (1|groupvar1) +
|
|
176 (1|Entry:groupvar1) +
|
|
177 (1|Rep) +
|
|
178 (1|Rep:Subblock),
|
|
179 data1)
|
|
180 }
|
|
181 }
|
|
182 if (length(groupvar1) == 0) {
|
|
183 if (length(groupvar2) != 0) {
|
|
184 fm1 <- lmer(y ~ 1 +
|
|
185 (1|Entry) +
|
|
186 (1|groupvar2) +
|
|
187 (1|Entry:groupvar2) +
|
|
188 (1|Rep) +
|
|
189 (1|Rep:Subblock),
|
|
190 data1)
|
|
191 fm2 <- lmer(y ~ (-1) +
|
|
192 Entry +
|
|
193 (1|groupvar2) +
|
|
194 (1|Entry:groupvar2) +
|
|
195 (1|Rep) +
|
|
196 (1|Rep:Subblock),
|
|
197 data1)
|
|
198 }
|
|
199 if (length(groupvar2) == 0) {
|
|
200 fm1 <- lmer(y ~ 1 +
|
|
201 (1|Entry) +
|
|
202 (1|Rep) +
|
|
203 (1|Rep:Subblock),
|
|
204 data1)
|
|
205 fm2 <- lmer(y ~ (-1) +
|
|
206 Entry +
|
|
207 (1|Rep) +
|
|
208 (1|Rep:Subblock),
|
|
209 data1)
|
|
210 }
|
|
211 }
|
|
212 }
|
|
213
|
|
214 cat("\nfm1:\n")
|
|
215 print(fm1)
|
|
216 cat("\nfm2:\n")
|
|
217 print(fm2)
|
|
218
|
|
219 mean1 <- mean(data1$y, na.rm = TRUE)
|
|
220 tp <- ranef(fm1)$Entry
|
|
221 if (all(tp == 0)) {
|
|
222 stop("error in model: all BLUP effects are zero")
|
|
223 }
|
|
224
|
|
225 ########
|
|
226 # BLUP
|
|
227 ########
|
|
228 blup <- tp + mean1
|
|
229 names(blup) <- "blup"
|
|
230
|
|
231 varComponents <- as.data.frame(VarCorr(fm1))
|
|
232
|
|
233 # extract the genetic variance component
|
|
234 Vg <- varComponents[match('Entry', varComponents[,1]), 'vcov']
|
|
235
|
|
236 # This function extracts standard errors of model random effect from modeled object in lmer
|
|
237 SErr <- se.ranef(fm1)$Entry[,1]
|
|
238
|
|
239 # Prediction Error Variance (PEV)
|
|
240 PEV <- (SErr) ^ 2
|
|
241
|
|
242 # PEV reliability
|
|
243 pevReliability <- 1 - (PEV / Vg)
|
|
244 names(pevReliability) <- "PEV reliability"
|
|
245
|
|
246 blupdf = data.frame(genotype = rownames(ranef(fm1)$Entry),
|
|
247 blup = blup,
|
|
248 BLUP_PEV = PEV,
|
|
249 BLUP_pevReliability = pevReliability)
|
|
250
|
|
251 ########
|
|
252 # BLUE
|
|
253 ########
|
|
254 blue <- fixef(fm2)
|
|
255
|
|
256 bluedf <- data.frame(genotype = substr(names(blue), 6, nchar(names(blue))),
|
|
257 blue = blue)
|
|
258
|
|
259 #########
|
|
260 # MEANS
|
|
261 #########
|
|
262 means <- with(data1, tapply(y, Entry, mean, na.rm = TRUE))
|
|
263
|
|
264 meandf <- data.frame(genotype = names(means),
|
|
265 means = means)
|
|
266
|
|
267 resultdf1 <- merge(meandf, bluedf, by = "genotype")
|
|
268 resultdf <- merge(resultdf1, blupdf, by = "genotype")
|
|
269
|
|
270 if (length(summarizeby) != 0) {
|
|
271 results <- data.frame(row.names = NULL,
|
|
272 genotype = resultdf$genotype,
|
|
273 blue = resultdf$blue,
|
|
274 blup = resultdf$blup,
|
|
275 BLUP_PEV = resultdf$BLUP_PEV,
|
|
276 pevReliability = resultdf$BLUP_pevReliability,
|
|
277 means = resultdf$means,
|
|
278 group = levels(data$summarizeby)[i])
|
|
279 names(results) <- c(Genotype,
|
|
280 paste(y, "_blue", sep = ""),
|
|
281 paste(y, "_blup", sep = ""),
|
|
282 paste(y, "_PEV", sep = ""),
|
|
283 paste(y, "_pevReliability", sep = ""),
|
|
284 paste(y, "_means", sep = ""),
|
|
285 summarizeby)
|
|
286 } else {
|
|
287 results <- data.frame(row.names = NULL,
|
|
288 genotype = resultdf$genotype,
|
|
289 blue = resultdf$blue,
|
|
290 blup = resultdf$blup,
|
|
291 BLUP_PEV = resultdf$BLUP_PEV,
|
|
292 pevReliability = resultdf$BLUP_pevReliability,
|
|
293 means = resultdf$means)
|
|
294 names(results) <- c(Genotype,
|
|
295 paste(y, "_blue", sep = ""),
|
|
296 paste(y, "_blup", sep = ""),
|
|
297 paste(y, "_PEV", sep = ""),
|
|
298 paste(y, "_pevReliability", sep = ""),
|
|
299 paste(y, "_means", sep = ""))
|
|
300 }
|
|
301
|
|
302 all_results[[i]] <- results
|
|
303 }
|
|
304 outdf <- do.call("rbind", all_results)
|
|
305 class(outdf) <- c("blupcal", class(outdf))
|
|
306 return(outdf)
|
|
307 }
|
|
308
|
|
309 # plot function of blupcal module
|
|
310 plot2.blupcal <- function(outdf) {
|
|
311 hist_with_box <- function(data, main = main, hist.col, box.col) {
|
|
312 histpar <- hist(data, plot = FALSE)
|
|
313 hist(data, col = hist.col, main = main, ylim = c(0, max(histpar$density) + max(histpar$density) * 0.3), prob = TRUE)
|
|
314 m = mean(data, na.rm = TRUE)
|
|
315 std = sd(data, na.rm = TRUE)
|
|
316 curve(dnorm(x, mean = m, sd = std), col = "yellow", lwd = 2, add = TRUE, yaxt = "n")
|
|
317 boxout <- boxplot(data, plot = FALSE)
|
|
318 points(boxout$out, y = rep(max(histpar$density) * 0.3, length(boxout$out)), col = "red", pch = 1)
|
|
319 texts <- paste("mean = ", round(mean(data, na.rm = TRUE), 2), " sd = ", round(sd(data, na.rm = TRUE), 2), " n = ", length(data))
|
|
320 text(min(histpar$breaks), max(histpar$density) + max(histpar$density) * 0.2, labels = texts, pos = 4)
|
|
321 }
|
|
322 par(mfrow = c(3,1), mar = c(3.1, 3.1, 1.1, 2.1))
|
|
323 hist_with_box(outdf[,2], main = names(outdf)[2], hist.col = "green4", box.col = "green1")
|
|
324 hist_with_box(outdf[,3], main = names(outdf)[3], hist.col = "blue4", box.col = "blue1")
|
|
325 hist_with_box(outdf[,4], main = names(outdf)[4], hist.col = "gray20", box.col = "gray60")
|
|
326 }
|