Repository 'oghma'
hg clone https://toolshed.g2.bx.psu.edu/repos/nicolas/oghma

Changeset 94:f90c741e2a32 (2016-10-31)
Previous changeset 93:3cdfa1e4b1c1 (2016-10-31) Next changeset 95:5a0b751594e6 (2016-10-31)
Commit message:
Uploaded
added:
evaluate_aggregation.R
b
diff -r 3cdfa1e4b1c1 -r f90c741e2a32 evaluate_aggregation.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/evaluate_aggregation.R Mon Oct 31 05:50:25 2016 -0400
[
b'@@ -0,0 +1,451 @@\n+########################################################\n+#\n+# creation date : 10/10/16\n+# last modification : 29/10/16\n+# author : Dr Nicolas Beaume\n+#\n+########################################################\n+\n+suppressWarnings(suppressMessages(library(GA)))\n+library("miscTools")\n+library(rpart)\n+suppressWarnings(suppressMessages(library(randomForest)))\n+library(e1071)\n+suppressWarnings(suppressMessages(library(glmnet)))\n+library(rrBLUP)\n+options(warn=-1)\n+############################ helper functions #######################\n+\n+##### classifiers\n+prediction <- function(genotype, model, classifier="unknown") {\n+  # run prediction according to the classifier\n+  switch(classifier,\n+         rrBLUP={\n+           predictions <- as.matrix(genotype) %*% as.matrix(model$u);\n+           predictions <- predictions[,1]+model$beta;\n+         },\n+         rf={\n+           predictions <- predict(model, genotype);\n+         },\n+         svm={\n+           predictions <- predict(model, genotype);\n+         },\n+         lasso={\n+           predictions <- predict(model, as.matrix(genotype), type = "response");\n+         },\n+         {warning("unkonwn classifier, please choose among the following : rrBLUP, rf, svm, lasso")})\n+  return(predictions)\n+}\n+\n+# extract parameter from a model, excluding rrBLUP which auto-optimize\n+extractParameter <- function(model, classifierName) {\n+  param <- NULL\n+  switch(classifierName,\n+         # random forest\n+         rf={\n+          param <- model$ntree\n+          param <- c(param, list(model$mtry))\n+          names(param) <- c("ntree", "mtry")\n+           },\n+         # svm\n+         svm={\n+          param <- as.numeric(model$cost)\n+          param <- c(param, list(model$gamma))\n+          param <- c(param, list(model$coef0))\n+          param <- c(param, list(model$degree))\n+          param <- c(param, list(model$kernel))\n+          names(param) <- c("c", "g", "coef", "d", "kernel")\n+          switch((model$kernel+1),\n+                 param$kernel <- "linear",\n+                 param$kernel <- "polynomial",\n+                 param$kernel <- "radial",\n+                 param$kernel <- "sigmoid"\n+                 )\n+           },\n+         # lasso\n+         lasso={\n+           param <- as.list(model$lambda)\n+           names(param) <- "lambda"\n+          },\n+         {print(paste("unknown classifier, please choose among rf, svm, lasso"));\n+           stop()}\n+  )\n+  return(param)\n+}\n+\n+##### Genetic algorithm\n+\n+# compute r2 by computing the classic formula\n+# compare the sum of square difference from target to prediciton\n+# to the sum of square difference from target to the mean of the target\n+r2 <- function(target, prediction) {\n+  sst <- sum((target-mean(target))^2)\n+  ssr <- sum((target-prediction)^2)\n+  return(1-ssr/sst)\n+}\n+\n+optimizeOneIndividual <- function(values, trueValue) {\n+  # change the value into a function\n+  f <- function(w) {sum(values * w/sum(w))}\n+  fitness <- function(x) {1/abs(trueValue-f(x))}\n+  resp <- ga(type = "real-valued", fitness = fitness, min = rep(0, length(values)), max = rep(1, length(values)), \n+             maxiter = 1000, monitor = NULL, keepBest = T)\n+  resp@solution <- resp@solution/sum(resp@solution)\n+  return(resp)\n+}\n+\n+optimizeWeight <- function(values, trueValue, n=1000) {\n+  fitnessAll <- function(w) {\n+    predicted <- apply(values, 1, weightedPrediction.vec, w)\n+    return(mean(r2(trueValue, predicted)))\n+    #return(mean(1/abs(trueValue-predicted)))\n+  }\n+  resp <- ga(type = "real-valued", fitness = fitnessAll, min = rep(0, ncol(values)), max = rep(1, ncol(values)), \n+             maxiter = n, monitor = NULL, keepBest = T)\n+  resp@solution <- resp@solution/sum(resp@solution)\n+  return(resp)\n+}\n+\n+weightedPrediction <- function(classifiers, w) {\n+  if(length(w) > ncol(classifiers)) {\n+    warning("more weights than classifiers, extra weigths are ignored")\n+    w <- w[1:ncol(classifiers)]\n+  } else if(length(w) < ncol(classifiers)) {\n+    w'..b'\n+  colnames(predictionTest) <- classifNames\n+  r2Classif <- NULL\n+  # for each classifiers, compute prediction on both sets \n+  # and evaluate r2 to find the best classifier\n+  for(j in 1:length(classifNames)) {\n+    switch(classifNames[j],\n+           # random forest\n+           rf={\n+            # predict train and test\n+            param <- extractParameter(rfModel, "rf")\n+            model <- randomForest(x=train, y=phenoTrain, mtry = param$mtry, \n+                                  ntree = param$ntree); \n+            predictionTrain[,j] <- prediction(train, model, classifier = "rf");\n+            predictionTest[,j] <- prediction(test, model, classifier = "rf");\n+            r2Classif <- c(r2Classif, rf=r2(phenoTest, predictionTest[,"rf"]))},\n+           # svm\n+           svm={\n+            # predict train and test\n+            param <- extractParameter(svmModel, "svm");\n+            model <- svm(train, phenoTrain, kernel = param$kernel, cost = param$c,\n+                         gamma=param$g, degree = param$d, coef0 = param$coef, scale = F)\n+            predictionTrain[,j] <- prediction(train, model, classifier = "svm");\n+            predictionTest[,j] <- prediction(test, model, classifier = "svm");\n+            r2Classif <- c(r2Classif, svm=r2(phenoTest, predictionTest[,"svm"]))},\n+           # lasso\n+           lasso={\n+            # predict train and test\n+            param <- extractParameter(lassoModel, "lasso");\n+            model <- glmnet(x=as.matrix(train), y=phenoTrain, lambda = param$lambda);\n+            predictionTrain[,j] <- prediction(train, model, classifier = "lasso");\n+            predictionTest[,j] <- prediction(test, model, classifier = "lasso");\n+            r2Classif <- c(r2Classif, lasso=r2(phenoTest, predictionTest[,"lasso"]))},\n+           # rrBLUP\n+           rrBLUP={\n+            # predict train and test\n+            model <- mixed.solve(phenoTrain, Z=train,K=NULL, SE=F,return.Hinv = F);\n+            predictionTrain[,j] <- prediction(train, model, classifier = "rrBLUP");\n+            predictionTest[,j] <- prediction(test, model, classifier = "rrBLUP");\n+            r2Classif <- c(r2Classif, rrBLUP=r2(phenoTest, predictionTest[,"rrBLUP"]))},\n+           {print(paste("unknown classifier, please choose among rf, svm, lasso, rrBLUP"));\n+             stop()}\n+      )\n+  }\n+  predictionTrain.list <- c(predictionTrain.list, list(predictionTrain))\n+  predictionTest.list <- c(predictionTest.list, list(predictionTest))\n+  r2Classif.list <- c(r2Classif.list, list(r2Classif))\n+}\n+# aggregate !\n+switch(method,\n+       geneticMean={\n+         aggreg <- aggregateGeneticMean(train=predictionTrain.list, test=predictionTest.list,\n+                                        target = phenotype, folds=folds)\n+       },\n+       dt={\n+         aggreg <- aggregateDT(train=predictionTrain.list, test=predictionTest.list,\n+                               target = phenotype, folds=folds)\n+       },\n+       lasso={\n+         aggreg <- aggregateLASSO(train=predictionTrain.list, test=predictionTest.list,\n+                               target = phenotype, folds=folds)\n+       },\n+       rf={\n+         aggreg <- aggregateRF(train=predictionTrain.list, test=predictionTest.list,\n+                               target = phenotype, folds=folds)\n+       },\n+       # svm, by default\n+       {aggreg <- aggregateSVM(train=predictionTrain.list, test=predictionTest.list,\n+                               target = phenotype, folds=folds, kernel=kernel)}\n+)\n+# determine best classifier\n+# first, transform list into a matrix\n+r2Classif.list <- t(data.frame(r2Classif.list))\n+# then, compute the mean r2 for each classifier\n+meanR2Classif <- apply(r2Classif.list, 2, mean)\n+# choose the best one\n+bestClassif <- which.max(meanR2Classif)\n+# compare aggregation and best classifiers\n+finalRes <- cbind(bestClassif=r2Classif.list[,bestClassif], aggreg=aggreg,\n+                  diff=(aggreg-r2Classif.list[,bestClassif]))\n+print(apply(finalRes, 2, mean))\n'