Mercurial > repos > dereeper > blup_calculator
diff blupcal.R @ 0:45d215f2be74 draft
Uploaded
author | dereeper |
---|---|
date | Sat, 29 Dec 2018 18:44:05 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/blupcal.R Sat Dec 29 18:44:05 2018 -0500 @@ -0,0 +1,326 @@ +# Calculation of BLUE/BLUP +# Umesh Rosyara, CIMMYT +blupcal<- function(data, + Replication = "Rep", + Genotype = "Entry", + y = "y", + design = "rcbd", + Block = NULL, + summarizeby = NULL, + groupvar1 = NULL, + groupvar2 = NULL) { + + suppressMessages(library(arm)) + # so that it would not throw messages at the stderr channel of Galaxy + library(lme4, quietly = TRUE) + + # Basic summary of the variables eused + print(paste("Replication variable = ", Replication)) + print(paste("block variable = ", Block)) + print(paste("Genotype variable = ", Genotype)) + print(paste("summarizeby (included in the model) = ", summarizeby)) + print(paste("groupvariable 1 (included in the model) = ", groupvar1)) + print(paste("groupvariable 2 (included in the model) = ", groupvar2)) + print(paste("design = ", design)) + print(paste("y = ", y)) + + # Summary of data + print("*************************************") + print("*************************************") + print(summary(data)) + print("*************************************") + print("*************************************") + + data_list <- list() + if (length(summarizeby) != 0) { + data$summarizeby <- as.factor(data[,summarizeby]) + data_list = split(data, f = data$summarizeby) + } else { + data$summarizeby <- "none" + data_list[[1]] <- data + } + + all_results <- list() + + for (i in 1: length(data_list)) { + data1 <- data_list[[i]] + + data1$Rep <- as.factor(data1[, Replication]) + cat("\ndata1$Rep:\n") + print(data1$Rep) + + data1$Entry <- as.factor(data1[, Genotype]) + cat("\ndata1$Entry:\n") + print(data1$Entry) + + if (design == "lattice") { + data1$Subblock <- as.factor(data1[, Block]) + } + + data1$y <- as.numeric(data1[, y]) + cat("\ndata1$y:\n") + print(data1$y) + + if (length(groupvar1) != 0) { + data1$groupvar1 <- as.factor(data1[, groupvar1]) + cat("\ndata1$groupvar1:\n") + print(data1$groupvar1) + } + + if (length(groupvar2) != 0) { + data1$groupvar2 <- as.factor(data1[, groupvar2]) + cat("\ndata1$groupvar2:\n") + print(data1$groupvar2) + } + + if (design == "rcbd") { + if (length(groupvar1) != 0) { + if (length(groupvar2) != 0) { + fm1 <- lmer(y ~ 1 + + (1|Entry) + + (1|groupvar1) + + (1|groupvar2) + + (1|Entry:groupvar1) + + (1|Entry:groupvar2) + + (1|Entry:groupvar1:groupvar2) + + (1|Rep), + data1) + fm2 <- lmer(y ~ (-1) + + Entry + + (1|groupvar1) + + (1|groupvar2) + + (1|Entry:groupvar1) + + (1|Entry:groupvar2) + + (1|Entry:groupvar1:groupvar2) + + (1|Rep), + data1) + } + if (length(groupvar2) == 0) { + fm1 <- lmer(y ~ 1 + + (1|Entry) + + (1|groupvar1) + + (1|Entry:groupvar1) + + (1|Rep), + data1) + fm2 <- lmer(y ~ (-1) + + Entry + + (1|groupvar1) + + (1|Entry:groupvar1) + + (1|Rep), + data1) + } + } + if (length(groupvar1) == 0) { + if (length(groupvar2) != 0) { + fm1 <- lmer(y ~ 1 + + (1|Entry) + + (1|groupvar2) + + (1|Entry:groupvar2) + + (1|Rep), + data1) + fm2 <- lmer(y ~ (-1) + + Entry + + (1|groupvar2) + + (1|Entry:groupvar2) + + (1|Rep), + data1) + } + if (length(groupvar2) == 0) { + fm1 <- lmer(y ~ 1 + + (1|Entry) + + (1|Rep), + data1) + fm2 <- lmer(y ~ (-1) + + Entry + + (1|Rep), + data1) + } + } + } + + if (design == "lattice") { + if (length(groupvar1) != 0) { + if (length(groupvar2) != 0) { + fm1 <- lmer(y ~ 1 + + (1|Entry) + + (1|groupvar1) + + (1|groupvar2) + + (1|Entry:groupvar1) + + (1|Entry:groupvar2) + + (1|Entry:groupvar1:groupvar2) + + (1|Rep) + + (1|Rep:Subblock), + data1) + fm2 <- lmer(y ~ (-1) + + Entry + + (1|groupvar1) + + (1|groupvar2) + + (1|Entry:groupvar1) + + (1|Entry:groupvar2) + + (1|Entry:groupvar1:groupvar2) + + (1|Rep) + + (1|Rep:Subblock), + data1) + } + if (length(groupvar2) == 0) { + fm1 <- lmer(y ~ 1 + + (1|Entry) + + (1|groupvar1) + + (1|Entry:groupvar1) + + (1|Rep) + + (1|Rep:Subblock), + data1) + fm2 <- lmer(y ~ (-1) + + Entry + + (1|groupvar1) + + (1|Entry:groupvar1) + + (1|Rep) + + (1|Rep:Subblock), + data1) + } + } + if (length(groupvar1) == 0) { + if (length(groupvar2) != 0) { + fm1 <- lmer(y ~ 1 + + (1|Entry) + + (1|groupvar2) + + (1|Entry:groupvar2) + + (1|Rep) + + (1|Rep:Subblock), + data1) + fm2 <- lmer(y ~ (-1) + + Entry + + (1|groupvar2) + + (1|Entry:groupvar2) + + (1|Rep) + + (1|Rep:Subblock), + data1) + } + if (length(groupvar2) == 0) { + fm1 <- lmer(y ~ 1 + + (1|Entry) + + (1|Rep) + + (1|Rep:Subblock), + data1) + fm2 <- lmer(y ~ (-1) + + Entry + + (1|Rep) + + (1|Rep:Subblock), + data1) + } + } + } + + cat("\nfm1:\n") + print(fm1) + cat("\nfm2:\n") + print(fm2) + + mean1 <- mean(data1$y, na.rm = TRUE) + tp <- ranef(fm1)$Entry + if (all(tp == 0)) { + stop("error in model: all BLUP effects are zero") + } + + ######## + # BLUP + ######## + blup <- tp + mean1 + names(blup) <- "blup" + + varComponents <- as.data.frame(VarCorr(fm1)) + + # extract the genetic variance component + Vg <- varComponents[match('Entry', varComponents[,1]), 'vcov'] + + # This function extracts standard errors of model random effect from modeled object in lmer + SErr <- se.ranef(fm1)$Entry[,1] + + # Prediction Error Variance (PEV) + PEV <- (SErr) ^ 2 + + # PEV reliability + pevReliability <- 1 - (PEV / Vg) + names(pevReliability) <- "PEV reliability" + + blupdf = data.frame(genotype = rownames(ranef(fm1)$Entry), + blup = blup, + BLUP_PEV = PEV, + BLUP_pevReliability = pevReliability) + + ######## + # BLUE + ######## + blue <- fixef(fm2) + + bluedf <- data.frame(genotype = substr(names(blue), 6, nchar(names(blue))), + blue = blue) + + ######### + # MEANS + ######### + means <- with(data1, tapply(y, Entry, mean, na.rm = TRUE)) + + meandf <- data.frame(genotype = names(means), + means = means) + + resultdf1 <- merge(meandf, bluedf, by = "genotype") + resultdf <- merge(resultdf1, blupdf, by = "genotype") + + if (length(summarizeby) != 0) { + results <- data.frame(row.names = NULL, + genotype = resultdf$genotype, + blue = resultdf$blue, + blup = resultdf$blup, + BLUP_PEV = resultdf$BLUP_PEV, + pevReliability = resultdf$BLUP_pevReliability, + means = resultdf$means, + group = levels(data$summarizeby)[i]) + names(results) <- c(Genotype, + paste(y, "_blue", sep = ""), + paste(y, "_blup", sep = ""), + paste(y, "_PEV", sep = ""), + paste(y, "_pevReliability", sep = ""), + paste(y, "_means", sep = ""), + summarizeby) + } else { + results <- data.frame(row.names = NULL, + genotype = resultdf$genotype, + blue = resultdf$blue, + blup = resultdf$blup, + BLUP_PEV = resultdf$BLUP_PEV, + pevReliability = resultdf$BLUP_pevReliability, + means = resultdf$means) + names(results) <- c(Genotype, + paste(y, "_blue", sep = ""), + paste(y, "_blup", sep = ""), + paste(y, "_PEV", sep = ""), + paste(y, "_pevReliability", sep = ""), + paste(y, "_means", sep = "")) + } + + all_results[[i]] <- results + } + outdf <- do.call("rbind", all_results) + class(outdf) <- c("blupcal", class(outdf)) + return(outdf) +} + +# plot function of blupcal module +plot2.blupcal <- function(outdf) { + hist_with_box <- function(data, main = main, hist.col, box.col) { + histpar <- hist(data, plot = FALSE) + hist(data, col = hist.col, main = main, ylim = c(0, max(histpar$density) + max(histpar$density) * 0.3), prob = TRUE) + m = mean(data, na.rm = TRUE) + std = sd(data, na.rm = TRUE) + curve(dnorm(x, mean = m, sd = std), col = "yellow", lwd = 2, add = TRUE, yaxt = "n") + boxout <- boxplot(data, plot = FALSE) + points(boxout$out, y = rep(max(histpar$density) * 0.3, length(boxout$out)), col = "red", pch = 1) + texts <- paste("mean = ", round(mean(data, na.rm = TRUE), 2), " sd = ", round(sd(data, na.rm = TRUE), 2), " n = ", length(data)) + text(min(histpar$breaks), max(histpar$density) + max(histpar$density) * 0.2, labels = texts, pos = 4) + } + par(mfrow = c(3,1), mar = c(3.1, 3.1, 1.1, 2.1)) + hist_with_box(outdf[,2], main = names(outdf)[2], hist.col = "green4", box.col = "green1") + hist_with_box(outdf[,3], main = names(outdf)[3], hist.col = "blue4", box.col = "blue1") + hist_with_box(outdf[,4], main = names(outdf)[4], hist.col = "gray20", box.col = "gray60") +}