Previous changeset 3:0f87be78e151 (2016-10-21) Next changeset 5:53a999633824 (2016-10-21) |
Commit message:
Uploaded |
added:
evaluation.R |
b |
diff -r 0f87be78e151 -r 62e7a8d66b1f evaluation.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/evaluation.R Fri Oct 21 06:25:28 2016 -0400 |
[ |
@@ -0,0 +1,124 @@ + +######################################################## +# +# creation date : 08/01/16 +# last modification : 23/06/16 +# author : Dr Nicolas Beaume +# owner : IRRI +# +######################################################## + +log <- file(paste(getwd(), "log_evaluation.txt", sep="/"), open = "wt") +sink(file = log, type="message") + +library("pROC") +library("randomForest") +library("miscTools") + +predictionCorrelation <- function(prediction, obs) { + return(cor(prediction, obs, method = "pearson")) +} + +r2.plot <- function(true, predicted, scatterplot=T) { + if(scatterplot) { + plot(true, predicted, xlab="trait value", ylab="predicted value", main="", pch=16, + ylim=c(min(min(true), min(predicted)), max(max(true), max(predicted)))) + lines(true, true, col="red") + } else { + plot(true, ylim=c(min(min(true), min(predicted)), max(max(true), max(predicted))), + xlab="individual index", ylab="traitValue", type="l", main="") + lines(predicted, col="red") + legend("topright", legend = c("pedicted", "traitValue"), col = c("red", "black"), lty=c(1,1)) + } +} + +r2 <- function(target, prediction) { + sst <- sum((target-mean(target))^2) + ssr <- sum((target-prediction)^2) + return(1-ssr/sst) +} + +################################## main function ########################### + +evaluatePredictions <- function(data, prediction=NULL, traitValue=NULL, doR2=F, + prefix="data", folds=NULL, classes=NULL, doCor=F, path=".") { + eval <- NULL + if(doR2) { + eval <- c(eval, list(r2=NULL)) + } + if(doCor) { + eval <- c(eval, list(cor=NULL)) + } + for (i in 1:length(folds)) { + train <- unlist(folds[-i]) + test <- folds[[i]] + if(doR2) { + if(!is.null(traitValue)) { + jpeg(paste(path, "/", prefix,"fold_",i,"_scatterPlot.jpeg", sep="")) + eval$r2 <- c(eval$r2, r2(traitValue[test], prediction[[i]])) + r2.plot(traitValue[test], prediction[[i]]) + dev.off() + } else { + eval$r2 <- c(eval$r2, NA) + } + } + if(doCor) { + if(!is.null(traitValue)) { + eval$cor <- c(eval$cor, cor(traitValue[test], prediction[[i]])) + } else { + eval$cor <- c(eval$cor, NA) + } + } + } + print(eval) + write.table(eval, file = paste(path,"/",prefix,"_evaluation.csv", sep=""), sep="\t", row.names = F) +} +############################ main ############################# +# evaluation.sh -i path_to_data -p prediction -f folds -t phenotype -r -c -a -n name -o path_to_result_directory +## -i : path to the file that contains the genotypes. +# please note that the table must be called "encoded" when your datafile is saved into .rda (automatic if encode.R is used) + +## -p : prediction made through any methods +# please note that the table must be called "prediction" when your datafile is saved into .rda +# (automatic if prediction methods from this pipeline were used) + +## -f : index of the folds to which belong each individual +# please note that the list must be called "folds" (automatic if folds.R was used) + +## -t : phenotype of each individual +# please note that the table must be called "phenotype" when your datafile is saved into .rda (automatic if loadGenotype.R was used) + +## -r : flag to run a R2 evaluation + +## -c : flag to run a correlation evaluation + +## -n : prefix of the names of all result files + +## -o : path to the directory where the evaluation results are stored. +cmd <- commandArgs(trailingOnly = T) +source(cmd[1]) +# set which evaluation are used +if(as.integer(doR2) == 1) { + doR2 <- T +} +if(as.integer(doCor) == 1) { + doCor <- T +} +# load genotype & phenotype +con = file(genotype) +genotype <- readLines(con = con, n = 1, ok=T) +close(con) +genotype <- read.table(genotype, sep="\t",h=T) +phenotype <- read.table(phenotype, sep="\t",h=T)[,1] +# load prediction +con = file(prediction) +prediction <- readLines(con = con, n = 1, ok=T) +close(con) +prediction <- readRDS(prediction) +# load folds +con = file(folds) +folds <- readLines(con = con, n = 1, ok=T) +close(con) +folds <- readRDS(folds) +evaluatePredictions(data=genotype, prediction=prediction, traitValue=phenotype, doR2=doR2, + prefix=name, folds=folds, doCor=doCor, path=out) \ No newline at end of file |