Mercurial > repos > nicolas > oghma
comparison evaluation.R @ 31:4f017b111d6c draft
Uploaded
author | nicolas |
---|---|
date | Tue, 25 Oct 2016 14:39:30 -0400 |
parents | 62e7a8d66b1f |
children |
comparison
equal
deleted
inserted
replaced
30:e85041eeda80 | 31:4f017b111d6c |
---|---|
6 # author : Dr Nicolas Beaume | 6 # author : Dr Nicolas Beaume |
7 # owner : IRRI | 7 # owner : IRRI |
8 # | 8 # |
9 ######################################################## | 9 ######################################################## |
10 | 10 |
11 log <- file(paste(getwd(), "log_evaluation.txt", sep="/"), open = "wt") | 11 # compute correlation |
12 sink(file = log, type="message") | |
13 | |
14 library("pROC") | |
15 library("randomForest") | |
16 library("miscTools") | |
17 | |
18 predictionCorrelation <- function(prediction, obs) { | 12 predictionCorrelation <- function(prediction, obs) { |
19 return(cor(prediction, obs, method = "pearson")) | 13 return(cor(prediction, obs, method = "pearson")) |
20 } | 14 } |
21 | 15 |
22 r2.plot <- function(true, predicted, scatterplot=T) { | 16 # compute r2 by computing the classic formula |
23 if(scatterplot) { | 17 # compare the sum of square difference from target to prediciton |
24 plot(true, predicted, xlab="trait value", ylab="predicted value", main="", pch=16, | 18 # to the sum of square difference from target to the mean of the target |
25 ylim=c(min(min(true), min(predicted)), max(max(true), max(predicted)))) | |
26 lines(true, true, col="red") | |
27 } else { | |
28 plot(true, ylim=c(min(min(true), min(predicted)), max(max(true), max(predicted))), | |
29 xlab="individual index", ylab="traitValue", type="l", main="") | |
30 lines(predicted, col="red") | |
31 legend("topright", legend = c("pedicted", "traitValue"), col = c("red", "black"), lty=c(1,1)) | |
32 } | |
33 } | |
34 | |
35 r2 <- function(target, prediction) { | 19 r2 <- function(target, prediction) { |
36 sst <- sum((target-mean(target))^2) | 20 sst <- sum((target-mean(target))^2) |
37 ssr <- sum((target-prediction)^2) | 21 ssr <- sum((target-prediction)^2) |
38 return(1-ssr/sst) | 22 return(1-ssr/sst) |
39 } | 23 } |
40 | 24 |
41 ################################## main function ########################### | 25 ################################## main function ########################### |
42 | 26 |
43 evaluatePredictions <- function(data, prediction=NULL, traitValue=NULL, doR2=F, | 27 evaluatePredictions <- function(data, prediction=NULL, traitValue=NULL, doR2=F, |
44 prefix="data", folds=NULL, classes=NULL, doCor=F, path=".") { | 28 folds=NULL, classes=NULL, doCor=F) { |
45 eval <- NULL | 29 eval <- NULL |
30 # case of r2 | |
46 if(doR2) { | 31 if(doR2) { |
47 eval <- c(eval, list(r2=NULL)) | 32 eval <- c(eval, list(r2=NULL)) |
48 } | 33 } |
49 if(doCor) { | 34 if(doCor) { |
50 eval <- c(eval, list(cor=NULL)) | 35 eval <- c(eval, list(cor=NULL)) |
52 for (i in 1:length(folds)) { | 37 for (i in 1:length(folds)) { |
53 train <- unlist(folds[-i]) | 38 train <- unlist(folds[-i]) |
54 test <- folds[[i]] | 39 test <- folds[[i]] |
55 if(doR2) { | 40 if(doR2) { |
56 if(!is.null(traitValue)) { | 41 if(!is.null(traitValue)) { |
57 jpeg(paste(path, "/", prefix,"fold_",i,"_scatterPlot.jpeg", sep="")) | |
58 eval$r2 <- c(eval$r2, r2(traitValue[test], prediction[[i]])) | 42 eval$r2 <- c(eval$r2, r2(traitValue[test], prediction[[i]])) |
59 r2.plot(traitValue[test], prediction[[i]]) | |
60 dev.off() | |
61 } else { | 43 } else { |
62 eval$r2 <- c(eval$r2, NA) | 44 eval$r2 <- c(eval$r2, NA) |
63 } | 45 } |
64 } | 46 } |
47 # case of correlation | |
65 if(doCor) { | 48 if(doCor) { |
66 if(!is.null(traitValue)) { | 49 if(!is.null(traitValue)) { |
67 eval$cor <- c(eval$cor, cor(traitValue[test], prediction[[i]])) | 50 eval$cor <- c(eval$cor, cor(traitValue[test], prediction[[i]])) |
68 } else { | 51 } else { |
69 eval$cor <- c(eval$cor, NA) | 52 eval$cor <- c(eval$cor, NA) |
70 } | 53 } |
71 } | 54 } |
72 } | 55 } |
56 # print output | |
73 print(eval) | 57 print(eval) |
74 write.table(eval, file = paste(path,"/",prefix,"_evaluation.csv", sep=""), sep="\t", row.names = F) | 58 if(doR2) { |
59 print(paste("mean r2 : ", mean(eval$r2), "standard deviation :", sd(eval$r2))) | |
60 } | |
61 if(doCor) { | |
62 print(paste("mean correlation : ", mean(eval$cor), "standard deviation :", sd(eval$cor))) | |
63 } | |
75 } | 64 } |
76 ############################ main ############################# | 65 ############################ main ############################# |
77 # evaluation.sh -i path_to_data -p prediction -f folds -t phenotype -r -c -a -n name -o path_to_result_directory | |
78 ## -i : path to the file that contains the genotypes. | |
79 # please note that the table must be called "encoded" when your datafile is saved into .rda (automatic if encode.R is used) | |
80 | 66 |
81 ## -p : prediction made through any methods | 67 # load arguments |
82 # please note that the table must be called "prediction" when your datafile is saved into .rda | |
83 # (automatic if prediction methods from this pipeline were used) | |
84 | |
85 ## -f : index of the folds to which belong each individual | |
86 # please note that the list must be called "folds" (automatic if folds.R was used) | |
87 | |
88 ## -t : phenotype of each individual | |
89 # please note that the table must be called "phenotype" when your datafile is saved into .rda (automatic if loadGenotype.R was used) | |
90 | |
91 ## -r : flag to run a R2 evaluation | |
92 | |
93 ## -c : flag to run a correlation evaluation | |
94 | |
95 ## -n : prefix of the names of all result files | |
96 | |
97 ## -o : path to the directory where the evaluation results are stored. | |
98 cmd <- commandArgs(trailingOnly = T) | 68 cmd <- commandArgs(trailingOnly = T) |
99 source(cmd[1]) | 69 source(cmd[1]) |
100 # set which evaluation are used | 70 # set which evaluation are used |
101 if(as.integer(doR2) == 1) { | 71 if(as.integer(doR2) == 1) { |
102 doR2 <- T | 72 doR2 <- T |
118 # load folds | 88 # load folds |
119 con = file(folds) | 89 con = file(folds) |
120 folds <- readLines(con = con, n = 1, ok=T) | 90 folds <- readLines(con = con, n = 1, ok=T) |
121 close(con) | 91 close(con) |
122 folds <- readRDS(folds) | 92 folds <- readRDS(folds) |
93 # evaluation | |
123 evaluatePredictions(data=genotype, prediction=prediction, traitValue=phenotype, doR2=doR2, | 94 evaluatePredictions(data=genotype, prediction=prediction, traitValue=phenotype, doR2=doR2, |
124 prefix=name, folds=folds, doCor=doCor, path=out) | 95 folds=folds, doCor=doCor) |