Mercurial > repos > nicolas > oghma
comparison evaluation.R @ 4:62e7a8d66b1f draft
Uploaded
| author | nicolas |
|---|---|
| date | Fri, 21 Oct 2016 06:25:28 -0400 |
| parents | |
| children | 4f017b111d6c |
comparison
equal
deleted
inserted
replaced
| 3:0f87be78e151 | 4:62e7a8d66b1f |
|---|---|
| 1 | |
| 2 ######################################################## | |
| 3 # | |
| 4 # creation date : 08/01/16 | |
| 5 # last modification : 23/06/16 | |
| 6 # author : Dr Nicolas Beaume | |
| 7 # owner : IRRI | |
| 8 # | |
| 9 ######################################################## | |
| 10 | |
| 11 log <- file(paste(getwd(), "log_evaluation.txt", sep="/"), open = "wt") | |
| 12 sink(file = log, type="message") | |
| 13 | |
| 14 library("pROC") | |
| 15 library("randomForest") | |
| 16 library("miscTools") | |
| 17 | |
| 18 predictionCorrelation <- function(prediction, obs) { | |
| 19 return(cor(prediction, obs, method = "pearson")) | |
| 20 } | |
| 21 | |
| 22 r2.plot <- function(true, predicted, scatterplot=T) { | |
| 23 if(scatterplot) { | |
| 24 plot(true, predicted, xlab="trait value", ylab="predicted value", main="", pch=16, | |
| 25 ylim=c(min(min(true), min(predicted)), max(max(true), max(predicted)))) | |
| 26 lines(true, true, col="red") | |
| 27 } else { | |
| 28 plot(true, ylim=c(min(min(true), min(predicted)), max(max(true), max(predicted))), | |
| 29 xlab="individual index", ylab="traitValue", type="l", main="") | |
| 30 lines(predicted, col="red") | |
| 31 legend("topright", legend = c("pedicted", "traitValue"), col = c("red", "black"), lty=c(1,1)) | |
| 32 } | |
| 33 } | |
| 34 | |
| 35 r2 <- function(target, prediction) { | |
| 36 sst <- sum((target-mean(target))^2) | |
| 37 ssr <- sum((target-prediction)^2) | |
| 38 return(1-ssr/sst) | |
| 39 } | |
| 40 | |
| 41 ################################## main function ########################### | |
| 42 | |
| 43 evaluatePredictions <- function(data, prediction=NULL, traitValue=NULL, doR2=F, | |
| 44 prefix="data", folds=NULL, classes=NULL, doCor=F, path=".") { | |
| 45 eval <- NULL | |
| 46 if(doR2) { | |
| 47 eval <- c(eval, list(r2=NULL)) | |
| 48 } | |
| 49 if(doCor) { | |
| 50 eval <- c(eval, list(cor=NULL)) | |
| 51 } | |
| 52 for (i in 1:length(folds)) { | |
| 53 train <- unlist(folds[-i]) | |
| 54 test <- folds[[i]] | |
| 55 if(doR2) { | |
| 56 if(!is.null(traitValue)) { | |
| 57 jpeg(paste(path, "/", prefix,"fold_",i,"_scatterPlot.jpeg", sep="")) | |
| 58 eval$r2 <- c(eval$r2, r2(traitValue[test], prediction[[i]])) | |
| 59 r2.plot(traitValue[test], prediction[[i]]) | |
| 60 dev.off() | |
| 61 } else { | |
| 62 eval$r2 <- c(eval$r2, NA) | |
| 63 } | |
| 64 } | |
| 65 if(doCor) { | |
| 66 if(!is.null(traitValue)) { | |
| 67 eval$cor <- c(eval$cor, cor(traitValue[test], prediction[[i]])) | |
| 68 } else { | |
| 69 eval$cor <- c(eval$cor, NA) | |
| 70 } | |
| 71 } | |
| 72 } | |
| 73 print(eval) | |
| 74 write.table(eval, file = paste(path,"/",prefix,"_evaluation.csv", sep=""), sep="\t", row.names = F) | |
| 75 } | |
| 76 ############################ main ############################# | |
| 77 # evaluation.sh -i path_to_data -p prediction -f folds -t phenotype -r -c -a -n name -o path_to_result_directory | |
| 78 ## -i : path to the file that contains the genotypes. | |
| 79 # please note that the table must be called "encoded" when your datafile is saved into .rda (automatic if encode.R is used) | |
| 80 | |
| 81 ## -p : prediction made through any methods | |
| 82 # please note that the table must be called "prediction" when your datafile is saved into .rda | |
| 83 # (automatic if prediction methods from this pipeline were used) | |
| 84 | |
| 85 ## -f : index of the folds to which belong each individual | |
| 86 # please note that the list must be called "folds" (automatic if folds.R was used) | |
| 87 | |
| 88 ## -t : phenotype of each individual | |
| 89 # please note that the table must be called "phenotype" when your datafile is saved into .rda (automatic if loadGenotype.R was used) | |
| 90 | |
| 91 ## -r : flag to run a R2 evaluation | |
| 92 | |
| 93 ## -c : flag to run a correlation evaluation | |
| 94 | |
| 95 ## -n : prefix of the names of all result files | |
| 96 | |
| 97 ## -o : path to the directory where the evaluation results are stored. | |
| 98 cmd <- commandArgs(trailingOnly = T) | |
| 99 source(cmd[1]) | |
| 100 # set which evaluation are used | |
| 101 if(as.integer(doR2) == 1) { | |
| 102 doR2 <- T | |
| 103 } | |
| 104 if(as.integer(doCor) == 1) { | |
| 105 doCor <- T | |
| 106 } | |
| 107 # load genotype & phenotype | |
| 108 con = file(genotype) | |
| 109 genotype <- readLines(con = con, n = 1, ok=T) | |
| 110 close(con) | |
| 111 genotype <- read.table(genotype, sep="\t",h=T) | |
| 112 phenotype <- read.table(phenotype, sep="\t",h=T)[,1] | |
| 113 # load prediction | |
| 114 con = file(prediction) | |
| 115 prediction <- readLines(con = con, n = 1, ok=T) | |
| 116 close(con) | |
| 117 prediction <- readRDS(prediction) | |
| 118 # load folds | |
| 119 con = file(folds) | |
| 120 folds <- readLines(con = con, n = 1, ok=T) | |
| 121 close(con) | |
| 122 folds <- readRDS(folds) | |
| 123 evaluatePredictions(data=genotype, prediction=prediction, traitValue=phenotype, doR2=doR2, | |
| 124 prefix=name, folds=folds, doCor=doCor, path=out) |
