Previous changeset 92:d1e92ce799c1 (2016-10-31) Next changeset 94:f90c741e2a32 (2016-10-31) |
Commit message:
Deleted selected files |
removed:
evaluate_aggregation.R |
b |
diff -r d1e92ce799c1 -r 3cdfa1e4b1c1 evaluate_aggregation.R --- a/evaluate_aggregation.R Mon Oct 31 04:53:30 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,452 +0,0 @@\n-########################################################\n-#\n-# creation date : 10/10/16\n-# last modification : 29/10/16\n-# author : Dr Nicolas Beaume\n-#\n-########################################################\n-\n-suppressWarnings(suppressMessages(library(GA)))\n-library("miscTools")\n-library(rpart)\n-suppressWarnings(suppressMessages(library(randomForest)))\n-library(e1071)\n-suppressWarnings(suppressMessages(library(glmnet)))\n-library(rrBLUP)\n-options(warn=-1)\n-############################ helper functions #######################\n-\n-##### classifiers\n-prediction <- function(genotype, model, classifier="unknown") {\n- # run prediction according to the classifier\n- switch(classifier,\n- rrBLUP={\n- predictions <- as.matrix(genotype) %*% as.matrix(model$u);\n- predictions <- predictions[,1]+model$beta;\n- },\n- rf={\n- predictions <- predict(model, genotype);\n- },\n- svm={\n- predictions <- predict(model, genotype);\n- },\n- lasso={\n- predictions <- predict(model, as.matrix(genotype), type = "response");\n- },\n- {warning("unkonwn classifier, please choose among the following : rrBLUP, rf, svm, lasso")})\n- return(predictions)\n-}\n-\n-# extract parameter from a model, excluding rrBLUP which auto-optimize\n-extractParameter <- function(model, classifierName) {\n- param <- NULL\n- switch(classifierName,\n- # random forest\n- rf={\n- param <- model$ntree\n- param <- c(param, list(model$mtry))\n- names(param) <- c("ntree", "mtry")\n- },\n- # svm\n- svm={\n- param <- as.numeric(model$cost)\n- param <- c(param, list(model$gamma))\n- param <- c(param, list(model$coef0))\n- param <- c(param, list(model$degree))\n- param <- c(param, list(model$kernel))\n- names(param) <- c("c", "g", "coef", "d", "kernel")\n- switch((model$kernel+1),\n- param$kernel <- "linear",\n- param$kernel <- "polynomial",\n- param$kernel <- "radial",\n- param$kernel <- "sigmoid"\n- )\n- },\n- # lasso\n- lasso={\n- param <- as.list(model$lambda)\n- names(param) <- "lambda"\n- },\n- {print(paste("unknown classifier, please choose among rf, svm, lasso"));\n- stop()}\n- )\n- return(param)\n-}\n-\n-##### Genetic algorithm\n-\n-# compute r2 by computing the classic formula\n-# compare the sum of square difference from target to prediciton\n-# to the sum of square difference from target to the mean of the target\n-r2 <- function(target, prediction) {\n- sst <- sum((target-mean(target))^2)\n- ssr <- sum((target-prediction)^2)\n- return(1-ssr/sst)\n-}\n-\n-optimizeOneIndividual <- function(values, trueValue) {\n- # change the value into a function\n- f <- function(w) {sum(values * w/sum(w))}\n- fitness <- function(x) {1/abs(trueValue-f(x))}\n- resp <- ga(type = "real-valued", fitness = fitness, min = rep(0, length(values)), max = rep(1, length(values)), \n- maxiter = 1000, monitor = NULL, keepBest = T)\n- resp@solution <- resp@solution/sum(resp@solution)\n- return(resp)\n-}\n-\n-optimizeWeight <- function(values, trueValue, n=1000) {\n- fitnessAll <- function(w) {\n- predicted <- apply(values, 1, weightedPrediction.vec, w)\n- return(mean(r2(trueValue, predicted)))\n- #return(mean(1/abs(trueValue-predicted)))\n- }\n- resp <- ga(type = "real-valued", fitness = fitnessAll, min = rep(0, ncol(values)), max = rep(1, ncol(values)), \n- maxiter = n, monitor = NULL, keepBest = T)\n- resp@solution <- resp@solution/sum(resp@solution)\n- return(resp)\n-}\n-\n-weightedPrediction <- function(classifiers, w) {\n- if(length(w) > ncol(classifiers)) {\n- warning("more weights than classifiers, extra weigths are ignored")\n- w <- w[1:ncol(classifiers)]\n- } else if(length(w) < ncol(classifiers)) {\n- w'..b'\n- # for each classifiers, compute prediction on both sets \n- # and evaluate r2 to find the best classifier\n- for(j in 1:length(classifNames)) {\n- switch(classifNames[j],\n- # random forest\n- rf={\n- # predict train and test\n- param <- extractParameter(rfModel, "rf")\n- model <- randomForest(x=train, y=phenoTrain, mtry = param$mtry, \n- ntree = param$ntree); \n- predictionTrain[,j] <- prediction(train, model, classifier = "rf");\n- predictionTest[,j] <- prediction(test, model, classifier = "rf");\n- r2Classif <- c(r2Classif, rf=r2(phenoTest, predictionTest[,"rf"]))},\n- # svm\n- svm={\n- # predict train and test\n- param <- extractParameter(svmModel, "svm");\n- model <- svm(train, phenoTrain, kernel = param$kernel, cost = param$c,\n- gamma=param$g, degree = param$d, coef0 = param$coef, scale = F)\n- predictionTrain[,j] <- prediction(train, model, classifier = "svm");\n- predictionTest[,j] <- prediction(test, model, classifier = "svm");\n- r2Classif <- c(r2Classif, svm=r2(phenoTest, predictionTest[,"svm"]))},\n- # lasso\n- lasso={\n- # predict train and test\n- param <- extractParameter(lassoModel, "lasso");\n- model <- glmnet(x=as.matrix(train), y=phenoTrain, lambda = param$lambda);\n- predictionTrain[,j] <- prediction(train, model, classifier = "lasso");\n- predictionTest[,j] <- prediction(test, model, classifier = "lasso");\n- r2Classif <- c(r2Classif, lasso=r2(phenoTest, predictionTest[,"lasso"]))},\n- # rrBLUP\n- rrBLUP={\n- # predict train and test\n- model <- mixed.solve(phenoTrain, Z=train,K=NULL, SE=F,return.Hinv = F);\n- predictionTrain[,j] <- prediction(train, model, classifier = "rrBLUP");\n- predictionTest[,j] <- prediction(test, model, classifier = "rrBLUP");\n- r2Classif <- c(r2Classif, rrBLUP=r2(phenoTest, predictionTest[,"rrBLUP"]))},\n- {print(paste("unknown classifier, please choose among rf, svm, lasso, rrBLUP"));\n- stop()}\n- )\n- }\n- predictionTrain.list <- c(predictionTrain.list, list(predictionTrain))\n- predictionTest.list <- c(predictionTest.list, list(predictionTest))\n- r2Classif.list <- c(r2Classif.list, list(r2Classif))\n-}\n-# aggregate !\n-switch(method,\n- geneticMean={\n- aggreg <- aggregateGeneticMean(train=predictionTrain.list, test=predictionTest.list,\n- target = phenotype, folds=folds)\n- },\n- dt={\n- aggreg <- aggregateDT(train=predictionTrain.list, test=predictionTest.list,\n- target = phenotype, folds=folds)\n- },\n- lasso={\n- aggreg <- aggregateLASSO(train=predictionTrain.list, test=predictionTest.list,\n- target = phenotype, folds=folds)\n- },\n- rf={\n- aggreg <- aggregateRF(train=predictionTrain.list, test=predictionTest.list,\n- target = phenotype, folds=folds)\n- },\n- # svm, by default\n- {aggreg <- aggregateSVM(train=predictionTrain.list, test=predictionTest.list,\n- target = phenotype, folds=folds, kernel=kernel)}\n-)\n-# determine best classifier\n-# first, transform list into a matrix\n-saveRDS(r2Classif.list, "/Users/nbeaume/Desktop/r2Classif.rds")\n-r2Classif.list <- t(data.frame(r2Classif.list))\n-# then, compute the mean r2 for each classifier\n-meanR2Classif <- apply(r2Classif.list, 2, mean)\n-# choose the best one\n-bestClassif <- which.max(meanR2Classif)\n-# compare aggregation and best classifiers\n-finalRes <- cbind(bestClassif=r2Classif.list[,bestClassif], aggreg=aggreg,\n- diff=(aggreg-r2Classif.list[,bestClassif]))\n-print(apply(finalRes, 2, mean))\n' |