| 
4
 | 
     1 
 | 
| 
 | 
     2 ########################################################
 | 
| 
 | 
     3 #
 | 
| 
 | 
     4 # creation date : 08/01/16
 | 
| 
 | 
     5 # last modification : 23/06/16
 | 
| 
 | 
     6 # author : Dr Nicolas Beaume
 | 
| 
 | 
     7 # owner : IRRI
 | 
| 
 | 
     8 #
 | 
| 
 | 
     9 ########################################################
 | 
| 
 | 
    10 
 | 
| 
 | 
    11 log <- file(paste(getwd(), "log_evaluation.txt", sep="/"), open = "wt")
 | 
| 
 | 
    12 sink(file = log, type="message")
 | 
| 
 | 
    13 
 | 
| 
 | 
    14 library("pROC")
 | 
| 
 | 
    15 library("randomForest")
 | 
| 
 | 
    16 library("miscTools")
 | 
| 
 | 
    17 
 | 
| 
 | 
    18 predictionCorrelation <- function(prediction, obs) {
 | 
| 
 | 
    19   return(cor(prediction, obs, method = "pearson"))
 | 
| 
 | 
    20 }
 | 
| 
 | 
    21 
 | 
| 
 | 
    22 r2.plot <- function(true, predicted, scatterplot=T) {
 | 
| 
 | 
    23   if(scatterplot) {
 | 
| 
 | 
    24     plot(true, predicted, xlab="trait value", ylab="predicted value", main="", pch=16, 
 | 
| 
 | 
    25          ylim=c(min(min(true), min(predicted)), max(max(true), max(predicted))))
 | 
| 
 | 
    26     lines(true, true, col="red")
 | 
| 
 | 
    27   } else {
 | 
| 
 | 
    28     plot(true, ylim=c(min(min(true), min(predicted)), max(max(true), max(predicted))), 
 | 
| 
 | 
    29          xlab="individual index", ylab="traitValue", type="l", main="")
 | 
| 
 | 
    30     lines(predicted, col="red")
 | 
| 
 | 
    31     legend("topright", legend = c("pedicted", "traitValue"), col = c("red", "black"), lty=c(1,1))
 | 
| 
 | 
    32   }
 | 
| 
 | 
    33 }
 | 
| 
 | 
    34 
 | 
| 
 | 
    35 r2 <- function(target, prediction) {
 | 
| 
 | 
    36   sst <- sum((target-mean(target))^2)
 | 
| 
 | 
    37   ssr <- sum((target-prediction)^2)
 | 
| 
 | 
    38   return(1-ssr/sst)
 | 
| 
 | 
    39 }
 | 
| 
 | 
    40 
 | 
| 
 | 
    41 ################################## main function ###########################
 | 
| 
 | 
    42 
 | 
| 
 | 
    43 evaluatePredictions <- function(data, prediction=NULL, traitValue=NULL, doR2=F, 
 | 
| 
 | 
    44                                 prefix="data", folds=NULL, classes=NULL, doCor=F, path=".") {
 | 
| 
 | 
    45   eval <- NULL
 | 
| 
 | 
    46   if(doR2) {
 | 
| 
 | 
    47     eval <- c(eval, list(r2=NULL))
 | 
| 
 | 
    48   }
 | 
| 
 | 
    49   if(doCor) {
 | 
| 
 | 
    50     eval <- c(eval, list(cor=NULL))
 | 
| 
 | 
    51   }
 | 
| 
 | 
    52   for (i in 1:length(folds)) {
 | 
| 
 | 
    53     train <- unlist(folds[-i])
 | 
| 
 | 
    54     test <- folds[[i]]
 | 
| 
 | 
    55     if(doR2) {
 | 
| 
 | 
    56       if(!is.null(traitValue)) {
 | 
| 
 | 
    57         jpeg(paste(path, "/", prefix,"fold_",i,"_scatterPlot.jpeg", sep=""))
 | 
| 
 | 
    58         eval$r2 <- c(eval$r2, r2(traitValue[test], prediction[[i]]))
 | 
| 
 | 
    59         r2.plot(traitValue[test], prediction[[i]])
 | 
| 
 | 
    60         dev.off()
 | 
| 
 | 
    61       } else {
 | 
| 
 | 
    62         eval$r2 <- c(eval$r2, NA)
 | 
| 
 | 
    63       }
 | 
| 
 | 
    64     }
 | 
| 
 | 
    65     if(doCor) {
 | 
| 
 | 
    66       if(!is.null(traitValue)) {
 | 
| 
 | 
    67         eval$cor <- c(eval$cor, cor(traitValue[test], prediction[[i]]))
 | 
| 
 | 
    68       } else {
 | 
| 
 | 
    69         eval$cor <- c(eval$cor, NA)
 | 
| 
 | 
    70       }
 | 
| 
 | 
    71     }
 | 
| 
 | 
    72   }
 | 
| 
 | 
    73   print(eval)
 | 
| 
 | 
    74   write.table(eval, file = paste(path,"/",prefix,"_evaluation.csv", sep=""), sep="\t", row.names = F)
 | 
| 
 | 
    75 }
 | 
| 
 | 
    76 ############################ main #############################
 | 
| 
 | 
    77 # evaluation.sh -i path_to_data -p prediction -f folds -t phenotype -r -c -a -n name -o path_to_result_directory 
 | 
| 
 | 
    78 ## -i : path to the file that contains the genotypes.
 | 
| 
 | 
    79 # please note that the table must be called "encoded" when your datafile is saved into .rda (automatic if encode.R is used)
 | 
| 
 | 
    80 
 | 
| 
 | 
    81 ## -p : prediction made through any methods
 | 
| 
 | 
    82 # please note that the table must be called "prediction" when your datafile is saved into .rda
 | 
| 
 | 
    83 # (automatic if prediction methods from this pipeline were used)
 | 
| 
 | 
    84 
 | 
| 
 | 
    85 ## -f : index of the folds to which belong each individual
 | 
| 
 | 
    86 # please note that the list must be called "folds" (automatic if folds.R was used)
 | 
| 
 | 
    87 
 | 
| 
 | 
    88 ## -t : phenotype of each individual
 | 
| 
 | 
    89 # please note that the table must be called "phenotype" when your datafile is saved into .rda (automatic if loadGenotype.R was used)
 | 
| 
 | 
    90 
 | 
| 
 | 
    91 ## -r : flag to run a R2 evaluation
 | 
| 
 | 
    92 
 | 
| 
 | 
    93 ## -c : flag to run a correlation evaluation
 | 
| 
 | 
    94 
 | 
| 
 | 
    95 ## -n : prefix of the names of all result files
 | 
| 
 | 
    96 
 | 
| 
 | 
    97 ## -o : path to the directory where the evaluation results are stored.
 | 
| 
 | 
    98 cmd <- commandArgs(trailingOnly = T)
 | 
| 
 | 
    99 source(cmd[1])
 | 
| 
 | 
   100 # set which evaluation are used
 | 
| 
 | 
   101 if(as.integer(doR2) == 1) {
 | 
| 
 | 
   102   doR2 <- T
 | 
| 
 | 
   103 }
 | 
| 
 | 
   104 if(as.integer(doCor) == 1) {
 | 
| 
 | 
   105   doCor <- T
 | 
| 
 | 
   106 }
 | 
| 
 | 
   107 # load genotype & phenotype
 | 
| 
 | 
   108 con = file(genotype)
 | 
| 
 | 
   109 genotype <- readLines(con = con, n = 1, ok=T)
 | 
| 
 | 
   110 close(con)
 | 
| 
 | 
   111 genotype <- read.table(genotype, sep="\t",h=T)
 | 
| 
 | 
   112 phenotype <- read.table(phenotype, sep="\t",h=T)[,1]
 | 
| 
 | 
   113 # load prediction
 | 
| 
 | 
   114 con = file(prediction)
 | 
| 
 | 
   115 prediction <- readLines(con = con, n = 1, ok=T)
 | 
| 
 | 
   116 close(con)
 | 
| 
 | 
   117 prediction <- readRDS(prediction)
 | 
| 
 | 
   118 # load folds
 | 
| 
 | 
   119 con = file(folds)
 | 
| 
 | 
   120 folds <- readLines(con = con, n = 1, ok=T)
 | 
| 
 | 
   121 close(con)
 | 
| 
 | 
   122 folds <- readRDS(folds)
 | 
| 
 | 
   123 evaluatePredictions(data=genotype, prediction=prediction, traitValue=phenotype, doR2=doR2, 
 | 
| 
 | 
   124                     prefix=name, folds=folds, doCor=doCor, path=out) |