comparison evaluation.R @ 31:4f017b111d6c draft

Uploaded
author nicolas
date Tue, 25 Oct 2016 14:39:30 -0400
parents 62e7a8d66b1f
children
comparison
equal deleted inserted replaced
30:e85041eeda80 31:4f017b111d6c
6 # author : Dr Nicolas Beaume 6 # author : Dr Nicolas Beaume
7 # owner : IRRI 7 # owner : IRRI
8 # 8 #
9 ######################################################## 9 ########################################################
10 10
11 log <- file(paste(getwd(), "log_evaluation.txt", sep="/"), open = "wt") 11 # compute correlation
12 sink(file = log, type="message")
13
14 library("pROC")
15 library("randomForest")
16 library("miscTools")
17
18 predictionCorrelation <- function(prediction, obs) { 12 predictionCorrelation <- function(prediction, obs) {
19 return(cor(prediction, obs, method = "pearson")) 13 return(cor(prediction, obs, method = "pearson"))
20 } 14 }
21 15
22 r2.plot <- function(true, predicted, scatterplot=T) { 16 # compute r2 by computing the classic formula
23 if(scatterplot) { 17 # compare the sum of square difference from target to prediciton
24 plot(true, predicted, xlab="trait value", ylab="predicted value", main="", pch=16, 18 # to the sum of square difference from target to the mean of the target
25 ylim=c(min(min(true), min(predicted)), max(max(true), max(predicted))))
26 lines(true, true, col="red")
27 } else {
28 plot(true, ylim=c(min(min(true), min(predicted)), max(max(true), max(predicted))),
29 xlab="individual index", ylab="traitValue", type="l", main="")
30 lines(predicted, col="red")
31 legend("topright", legend = c("pedicted", "traitValue"), col = c("red", "black"), lty=c(1,1))
32 }
33 }
34
35 r2 <- function(target, prediction) { 19 r2 <- function(target, prediction) {
36 sst <- sum((target-mean(target))^2) 20 sst <- sum((target-mean(target))^2)
37 ssr <- sum((target-prediction)^2) 21 ssr <- sum((target-prediction)^2)
38 return(1-ssr/sst) 22 return(1-ssr/sst)
39 } 23 }
40 24
41 ################################## main function ########################### 25 ################################## main function ###########################
42 26
43 evaluatePredictions <- function(data, prediction=NULL, traitValue=NULL, doR2=F, 27 evaluatePredictions <- function(data, prediction=NULL, traitValue=NULL, doR2=F,
44 prefix="data", folds=NULL, classes=NULL, doCor=F, path=".") { 28 folds=NULL, classes=NULL, doCor=F) {
45 eval <- NULL 29 eval <- NULL
30 # case of r2
46 if(doR2) { 31 if(doR2) {
47 eval <- c(eval, list(r2=NULL)) 32 eval <- c(eval, list(r2=NULL))
48 } 33 }
49 if(doCor) { 34 if(doCor) {
50 eval <- c(eval, list(cor=NULL)) 35 eval <- c(eval, list(cor=NULL))
52 for (i in 1:length(folds)) { 37 for (i in 1:length(folds)) {
53 train <- unlist(folds[-i]) 38 train <- unlist(folds[-i])
54 test <- folds[[i]] 39 test <- folds[[i]]
55 if(doR2) { 40 if(doR2) {
56 if(!is.null(traitValue)) { 41 if(!is.null(traitValue)) {
57 jpeg(paste(path, "/", prefix,"fold_",i,"_scatterPlot.jpeg", sep=""))
58 eval$r2 <- c(eval$r2, r2(traitValue[test], prediction[[i]])) 42 eval$r2 <- c(eval$r2, r2(traitValue[test], prediction[[i]]))
59 r2.plot(traitValue[test], prediction[[i]])
60 dev.off()
61 } else { 43 } else {
62 eval$r2 <- c(eval$r2, NA) 44 eval$r2 <- c(eval$r2, NA)
63 } 45 }
64 } 46 }
47 # case of correlation
65 if(doCor) { 48 if(doCor) {
66 if(!is.null(traitValue)) { 49 if(!is.null(traitValue)) {
67 eval$cor <- c(eval$cor, cor(traitValue[test], prediction[[i]])) 50 eval$cor <- c(eval$cor, cor(traitValue[test], prediction[[i]]))
68 } else { 51 } else {
69 eval$cor <- c(eval$cor, NA) 52 eval$cor <- c(eval$cor, NA)
70 } 53 }
71 } 54 }
72 } 55 }
56 # print output
73 print(eval) 57 print(eval)
74 write.table(eval, file = paste(path,"/",prefix,"_evaluation.csv", sep=""), sep="\t", row.names = F) 58 if(doR2) {
59 print(paste("mean r2 : ", mean(eval$r2), "standard deviation :", sd(eval$r2)))
60 }
61 if(doCor) {
62 print(paste("mean correlation : ", mean(eval$cor), "standard deviation :", sd(eval$cor)))
63 }
75 } 64 }
76 ############################ main ############################# 65 ############################ main #############################
77 # evaluation.sh -i path_to_data -p prediction -f folds -t phenotype -r -c -a -n name -o path_to_result_directory
78 ## -i : path to the file that contains the genotypes.
79 # please note that the table must be called "encoded" when your datafile is saved into .rda (automatic if encode.R is used)
80 66
81 ## -p : prediction made through any methods 67 # load arguments
82 # please note that the table must be called "prediction" when your datafile is saved into .rda
83 # (automatic if prediction methods from this pipeline were used)
84
85 ## -f : index of the folds to which belong each individual
86 # please note that the list must be called "folds" (automatic if folds.R was used)
87
88 ## -t : phenotype of each individual
89 # please note that the table must be called "phenotype" when your datafile is saved into .rda (automatic if loadGenotype.R was used)
90
91 ## -r : flag to run a R2 evaluation
92
93 ## -c : flag to run a correlation evaluation
94
95 ## -n : prefix of the names of all result files
96
97 ## -o : path to the directory where the evaluation results are stored.
98 cmd <- commandArgs(trailingOnly = T) 68 cmd <- commandArgs(trailingOnly = T)
99 source(cmd[1]) 69 source(cmd[1])
100 # set which evaluation are used 70 # set which evaluation are used
101 if(as.integer(doR2) == 1) { 71 if(as.integer(doR2) == 1) {
102 doR2 <- T 72 doR2 <- T
118 # load folds 88 # load folds
119 con = file(folds) 89 con = file(folds)
120 folds <- readLines(con = con, n = 1, ok=T) 90 folds <- readLines(con = con, n = 1, ok=T)
121 close(con) 91 close(con)
122 folds <- readRDS(folds) 92 folds <- readRDS(folds)
93 # evaluation
123 evaluatePredictions(data=genotype, prediction=prediction, traitValue=phenotype, doR2=doR2, 94 evaluatePredictions(data=genotype, prediction=prediction, traitValue=phenotype, doR2=doR2,
124 prefix=name, folds=folds, doCor=doCor, path=out) 95 folds=folds, doCor=doCor)