Repository 'oghma'
hg clone https://toolshed.g2.bx.psu.edu/repos/nicolas/oghma

Changeset 93:3cdfa1e4b1c1 (2016-10-31)
Previous changeset 92:d1e92ce799c1 (2016-10-31) Next changeset 94:f90c741e2a32 (2016-10-31)
Commit message:
Deleted selected files
removed:
evaluate_aggregation.R
b
diff -r d1e92ce799c1 -r 3cdfa1e4b1c1 evaluate_aggregation.R
--- a/evaluate_aggregation.R Mon Oct 31 04:53:30 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,452 +0,0 @@\n-########################################################\n-#\n-# creation date : 10/10/16\n-# last modification : 29/10/16\n-# author : Dr Nicolas Beaume\n-#\n-########################################################\n-\n-suppressWarnings(suppressMessages(library(GA)))\n-library("miscTools")\n-library(rpart)\n-suppressWarnings(suppressMessages(library(randomForest)))\n-library(e1071)\n-suppressWarnings(suppressMessages(library(glmnet)))\n-library(rrBLUP)\n-options(warn=-1)\n-############################ helper functions #######################\n-\n-##### classifiers\n-prediction <- function(genotype, model, classifier="unknown") {\n-  # run prediction according to the classifier\n-  switch(classifier,\n-         rrBLUP={\n-           predictions <- as.matrix(genotype) %*% as.matrix(model$u);\n-           predictions <- predictions[,1]+model$beta;\n-         },\n-         rf={\n-           predictions <- predict(model, genotype);\n-         },\n-         svm={\n-           predictions <- predict(model, genotype);\n-         },\n-         lasso={\n-           predictions <- predict(model, as.matrix(genotype), type = "response");\n-         },\n-         {warning("unkonwn classifier, please choose among the following : rrBLUP, rf, svm, lasso")})\n-  return(predictions)\n-}\n-\n-# extract parameter from a model, excluding rrBLUP which auto-optimize\n-extractParameter <- function(model, classifierName) {\n-  param <- NULL\n-  switch(classifierName,\n-         # random forest\n-         rf={\n-          param <- model$ntree\n-          param <- c(param, list(model$mtry))\n-          names(param) <- c("ntree", "mtry")\n-           },\n-         # svm\n-         svm={\n-          param <- as.numeric(model$cost)\n-          param <- c(param, list(model$gamma))\n-          param <- c(param, list(model$coef0))\n-          param <- c(param, list(model$degree))\n-          param <- c(param, list(model$kernel))\n-          names(param) <- c("c", "g", "coef", "d", "kernel")\n-          switch((model$kernel+1),\n-                 param$kernel <- "linear",\n-                 param$kernel <- "polynomial",\n-                 param$kernel <- "radial",\n-                 param$kernel <- "sigmoid"\n-                 )\n-           },\n-         # lasso\n-         lasso={\n-           param <- as.list(model$lambda)\n-           names(param) <- "lambda"\n-          },\n-         {print(paste("unknown classifier, please choose among rf, svm, lasso"));\n-           stop()}\n-  )\n-  return(param)\n-}\n-\n-##### Genetic algorithm\n-\n-# compute r2 by computing the classic formula\n-# compare the sum of square difference from target to prediciton\n-# to the sum of square difference from target to the mean of the target\n-r2 <- function(target, prediction) {\n-  sst <- sum((target-mean(target))^2)\n-  ssr <- sum((target-prediction)^2)\n-  return(1-ssr/sst)\n-}\n-\n-optimizeOneIndividual <- function(values, trueValue) {\n-  # change the value into a function\n-  f <- function(w) {sum(values * w/sum(w))}\n-  fitness <- function(x) {1/abs(trueValue-f(x))}\n-  resp <- ga(type = "real-valued", fitness = fitness, min = rep(0, length(values)), max = rep(1, length(values)), \n-             maxiter = 1000, monitor = NULL, keepBest = T)\n-  resp@solution <- resp@solution/sum(resp@solution)\n-  return(resp)\n-}\n-\n-optimizeWeight <- function(values, trueValue, n=1000) {\n-  fitnessAll <- function(w) {\n-    predicted <- apply(values, 1, weightedPrediction.vec, w)\n-    return(mean(r2(trueValue, predicted)))\n-    #return(mean(1/abs(trueValue-predicted)))\n-  }\n-  resp <- ga(type = "real-valued", fitness = fitnessAll, min = rep(0, ncol(values)), max = rep(1, ncol(values)), \n-             maxiter = n, monitor = NULL, keepBest = T)\n-  resp@solution <- resp@solution/sum(resp@solution)\n-  return(resp)\n-}\n-\n-weightedPrediction <- function(classifiers, w) {\n-  if(length(w) > ncol(classifiers)) {\n-    warning("more weights than classifiers, extra weigths are ignored")\n-    w <- w[1:ncol(classifiers)]\n-  } else if(length(w) < ncol(classifiers)) {\n-    w'..b'\n-  # for each classifiers, compute prediction on both sets \n-  # and evaluate r2 to find the best classifier\n-  for(j in 1:length(classifNames)) {\n-    switch(classifNames[j],\n-           # random forest\n-           rf={\n-            # predict train and test\n-            param <- extractParameter(rfModel, "rf")\n-            model <- randomForest(x=train, y=phenoTrain, mtry = param$mtry, \n-                                  ntree = param$ntree); \n-            predictionTrain[,j] <- prediction(train, model, classifier = "rf");\n-            predictionTest[,j] <- prediction(test, model, classifier = "rf");\n-            r2Classif <- c(r2Classif, rf=r2(phenoTest, predictionTest[,"rf"]))},\n-           # svm\n-           svm={\n-            # predict train and test\n-            param <- extractParameter(svmModel, "svm");\n-            model <- svm(train, phenoTrain, kernel = param$kernel, cost = param$c,\n-                         gamma=param$g, degree = param$d, coef0 = param$coef, scale = F)\n-            predictionTrain[,j] <- prediction(train, model, classifier = "svm");\n-            predictionTest[,j] <- prediction(test, model, classifier = "svm");\n-            r2Classif <- c(r2Classif, svm=r2(phenoTest, predictionTest[,"svm"]))},\n-           # lasso\n-           lasso={\n-            # predict train and test\n-            param <- extractParameter(lassoModel, "lasso");\n-            model <- glmnet(x=as.matrix(train), y=phenoTrain, lambda = param$lambda);\n-            predictionTrain[,j] <- prediction(train, model, classifier = "lasso");\n-            predictionTest[,j] <- prediction(test, model, classifier = "lasso");\n-            r2Classif <- c(r2Classif, lasso=r2(phenoTest, predictionTest[,"lasso"]))},\n-           # rrBLUP\n-           rrBLUP={\n-            # predict train and test\n-            model <- mixed.solve(phenoTrain, Z=train,K=NULL, SE=F,return.Hinv = F);\n-            predictionTrain[,j] <- prediction(train, model, classifier = "rrBLUP");\n-            predictionTest[,j] <- prediction(test, model, classifier = "rrBLUP");\n-            r2Classif <- c(r2Classif, rrBLUP=r2(phenoTest, predictionTest[,"rrBLUP"]))},\n-           {print(paste("unknown classifier, please choose among rf, svm, lasso, rrBLUP"));\n-             stop()}\n-      )\n-  }\n-  predictionTrain.list <- c(predictionTrain.list, list(predictionTrain))\n-  predictionTest.list <- c(predictionTest.list, list(predictionTest))\n-  r2Classif.list <- c(r2Classif.list, list(r2Classif))\n-}\n-# aggregate !\n-switch(method,\n-       geneticMean={\n-         aggreg <- aggregateGeneticMean(train=predictionTrain.list, test=predictionTest.list,\n-                                        target = phenotype, folds=folds)\n-       },\n-       dt={\n-         aggreg <- aggregateDT(train=predictionTrain.list, test=predictionTest.list,\n-                               target = phenotype, folds=folds)\n-       },\n-       lasso={\n-         aggreg <- aggregateLASSO(train=predictionTrain.list, test=predictionTest.list,\n-                               target = phenotype, folds=folds)\n-       },\n-       rf={\n-         aggreg <- aggregateRF(train=predictionTrain.list, test=predictionTest.list,\n-                               target = phenotype, folds=folds)\n-       },\n-       # svm, by default\n-       {aggreg <- aggregateSVM(train=predictionTrain.list, test=predictionTest.list,\n-                               target = phenotype, folds=folds, kernel=kernel)}\n-)\n-# determine best classifier\n-# first, transform list into a matrix\n-saveRDS(r2Classif.list, "/Users/nbeaume/Desktop/r2Classif.rds")\n-r2Classif.list <- t(data.frame(r2Classif.list))\n-# then, compute the mean r2 for each classifier\n-meanR2Classif <- apply(r2Classif.list, 2, mean)\n-# choose the best one\n-bestClassif <- which.max(meanR2Classif)\n-# compare aggregation and best classifiers\n-finalRes <- cbind(bestClassif=r2Classif.list[,bestClassif], aggreg=aggreg,\n-                  diff=(aggreg-r2Classif.list[,bestClassif]))\n-print(apply(finalRes, 2, mean))\n'