| 94 | 1 ######################################################## | 
|  | 2 # | 
|  | 3 # creation date : 10/10/16 | 
|  | 4 # last modification : 29/10/16 | 
|  | 5 # author : Dr Nicolas Beaume | 
|  | 6 # | 
|  | 7 ######################################################## | 
|  | 8 | 
|  | 9 suppressWarnings(suppressMessages(library(GA))) | 
|  | 10 library("miscTools") | 
|  | 11 library(rpart) | 
|  | 12 suppressWarnings(suppressMessages(library(randomForest))) | 
|  | 13 library(e1071) | 
|  | 14 suppressWarnings(suppressMessages(library(glmnet))) | 
|  | 15 library(rrBLUP) | 
|  | 16 options(warn=-1) | 
|  | 17 ############################ helper functions ####################### | 
|  | 18 | 
|  | 19 ##### classifiers | 
|  | 20 prediction <- function(genotype, model, classifier="unknown") { | 
|  | 21   # run prediction according to the classifier | 
|  | 22   switch(classifier, | 
|  | 23          rrBLUP={ | 
|  | 24            predictions <- as.matrix(genotype) %*% as.matrix(model$u); | 
|  | 25            predictions <- predictions[,1]+model$beta; | 
|  | 26          }, | 
|  | 27          rf={ | 
|  | 28            predictions <- predict(model, genotype); | 
|  | 29          }, | 
|  | 30          svm={ | 
|  | 31            predictions <- predict(model, genotype); | 
|  | 32          }, | 
|  | 33          lasso={ | 
|  | 34            predictions <- predict(model, as.matrix(genotype), type = "response"); | 
|  | 35          }, | 
|  | 36          {warning("unkonwn classifier, please choose among the following : rrBLUP, rf, svm, lasso")}) | 
|  | 37   return(predictions) | 
|  | 38 } | 
|  | 39 | 
|  | 40 # extract parameter from a model, excluding rrBLUP which auto-optimize | 
|  | 41 extractParameter <- function(model, classifierName) { | 
|  | 42   param <- NULL | 
|  | 43   switch(classifierName, | 
|  | 44          # random forest | 
|  | 45          rf={ | 
|  | 46           param <- model$ntree | 
|  | 47           param <- c(param, list(model$mtry)) | 
|  | 48           names(param) <- c("ntree", "mtry") | 
|  | 49            }, | 
|  | 50          # svm | 
|  | 51          svm={ | 
|  | 52           param <- as.numeric(model$cost) | 
|  | 53           param <- c(param, list(model$gamma)) | 
|  | 54           param <- c(param, list(model$coef0)) | 
|  | 55           param <- c(param, list(model$degree)) | 
|  | 56           param <- c(param, list(model$kernel)) | 
|  | 57           names(param) <- c("c", "g", "coef", "d", "kernel") | 
|  | 58           switch((model$kernel+1), | 
|  | 59                  param$kernel <- "linear", | 
|  | 60                  param$kernel <- "polynomial", | 
|  | 61                  param$kernel <- "radial", | 
|  | 62                  param$kernel <- "sigmoid" | 
|  | 63                  ) | 
|  | 64            }, | 
|  | 65          # lasso | 
|  | 66          lasso={ | 
|  | 67            param <- as.list(model$lambda) | 
|  | 68            names(param) <- "lambda" | 
|  | 69           }, | 
|  | 70          {print(paste("unknown classifier, please choose among rf, svm, lasso")); | 
|  | 71            stop()} | 
|  | 72   ) | 
|  | 73   return(param) | 
|  | 74 } | 
|  | 75 | 
|  | 76 ##### Genetic algorithm | 
|  | 77 | 
|  | 78 # compute r2 by computing the classic formula | 
|  | 79 # compare the sum of square difference from target to prediciton | 
|  | 80 # to the sum of square difference from target to the mean of the target | 
|  | 81 r2 <- function(target, prediction) { | 
|  | 82   sst <- sum((target-mean(target))^2) | 
|  | 83   ssr <- sum((target-prediction)^2) | 
|  | 84   return(1-ssr/sst) | 
|  | 85 } | 
|  | 86 | 
|  | 87 optimizeOneIndividual <- function(values, trueValue) { | 
|  | 88   # change the value into a function | 
|  | 89   f <- function(w) {sum(values * w/sum(w))} | 
|  | 90   fitness <- function(x) {1/abs(trueValue-f(x))} | 
|  | 91   resp <- ga(type = "real-valued", fitness = fitness, min = rep(0, length(values)), max = rep(1, length(values)), | 
|  | 92              maxiter = 1000, monitor = NULL, keepBest = T) | 
|  | 93   resp@solution <- resp@solution/sum(resp@solution) | 
|  | 94   return(resp) | 
|  | 95 } | 
|  | 96 | 
|  | 97 optimizeWeight <- function(values, trueValue, n=1000) { | 
|  | 98   fitnessAll <- function(w) { | 
|  | 99     predicted <- apply(values, 1, weightedPrediction.vec, w) | 
|  | 100     return(mean(r2(trueValue, predicted))) | 
|  | 101     #return(mean(1/abs(trueValue-predicted))) | 
|  | 102   } | 
|  | 103   resp <- ga(type = "real-valued", fitness = fitnessAll, min = rep(0, ncol(values)), max = rep(1, ncol(values)), | 
|  | 104              maxiter = n, monitor = NULL, keepBest = T) | 
|  | 105   resp@solution <- resp@solution/sum(resp@solution) | 
|  | 106   return(resp) | 
|  | 107 } | 
|  | 108 | 
|  | 109 weightedPrediction <- function(classifiers, w) { | 
|  | 110   if(length(w) > ncol(classifiers)) { | 
|  | 111     warning("more weights than classifiers, extra weigths are ignored") | 
|  | 112     w <- w[1:ncol(classifiers)] | 
|  | 113   } else if(length(w) < ncol(classifiers)) { | 
|  | 114     warning("less weights than classifiers, extra classifiers are ignored") | 
|  | 115     classifiers <- classifiers[,1:length(w)] | 
|  | 116   } | 
|  | 117   prediction <- NULL | 
|  | 118   prediction <- c(prediction, apply(classifiers, 1, weightedPrediction.vec, w)) | 
|  | 119   return(prediction) | 
|  | 120 } | 
|  | 121 | 
|  | 122 weightedPrediction.vec <- function(values, w) { | 
|  | 123   return(sum(values * w/sum(w))) | 
|  | 124 } | 
|  | 125 | 
|  | 126 ##### meta-decision tree | 
|  | 127 | 
|  | 128 tuneTree <- function(data, target) { | 
|  | 129   data <- data.frame(data, target=target) | 
|  | 130   size <-  nrow(data) | 
|  | 131   xerror <-  NULL | 
|  | 132   split <-  1:ceiling(size/5) | 
|  | 133   leafSize <-  1:ceiling(size/10) | 
|  | 134   xerror <- matrix(rep(-1, length(split)*length(leafSize)), ncol=length(leafSize)) | 
|  | 135   cp <- matrix(rep(-1, length(split)*length(leafSize)), ncol=length(leafSize)) | 
|  | 136   for(i in 1:length(split)) { | 
|  | 137     for(j in 1:length(leafSize)) { | 
|  | 138       op <- list(minsplit=split[i], minbucket=leafSize[j]) | 
|  | 139       tree <- rpart(target ~., data=data, control=op, method="anova") | 
|  | 140       xerror[i,j] <- tree$cptable[which.min(tree$cptable[,"xerror"]),"xerror"] | 
|  | 141       cp[i,j] <- tree$cptable[which.min(tree$cptable[,"xerror"]),"CP"] | 
|  | 142     } | 
|  | 143   } | 
|  | 144   index <- which(xerror==min(xerror), arr.ind = T) | 
|  | 145   op <- list(minsplit=split[index[1]], minbucket=leafSize[index[2]], cp=cp[index[1], index[2]]) | 
|  | 146   return(op) | 
|  | 147 } | 
|  | 148 | 
|  | 149 ###### meta-LASSO | 
|  | 150 # create fold by picking at random row indexes | 
|  | 151 createFolds <- function(nbObs, n) { | 
|  | 152   # pick indexes | 
|  | 153   index <- sample(1:n, size=nbObs, replace = T) | 
|  | 154   # populate folds | 
|  | 155   folds <- NULL | 
|  | 156   for(i in 1:n) { | 
|  | 157     folds <- c(folds, list(which(index==i))) | 
|  | 158   } | 
|  | 159   return(folds) | 
|  | 160 } | 
|  | 161 | 
|  | 162 searchParamLASSO <- function(genotype, phenotype, alpha=seq(0,1,0.1), n=7) { | 
|  | 163   folds <- createFolds(nrow(genotype), n = n) | 
|  | 164   acc <- NULL | 
|  | 165   indexAlpha <- 1 | 
|  | 166   for(a in alpha) { | 
|  | 167     curAcc <- NULL | 
|  | 168     for(i in 1:n) { | 
|  | 169       train <- genotype[-folds[[i]],] | 
|  | 170       test <- genotype[folds[[i]],] | 
|  | 171       phenoTrain <- phenotype[-folds[[i]]] | 
|  | 172       phenoTest <- phenotype[folds[[i]]] | 
|  | 173       cv <- cv.glmnet(x=as.matrix(train), y=phenoTrain, alpha=a) | 
|  | 174       model <- glmnet(x=as.matrix(train), y=phenoTrain, alpha=a, lambda = cv$lambda.1se) | 
|  | 175       pred <- predict(model, test, type = "response") | 
|  | 176       curAcc <- c(curAcc, r2(phenoTest, pred)) | 
|  | 177     } | 
|  | 178     acc <- c(acc, mean(curAcc)) | 
|  | 179   } | 
|  | 180   names(acc) <- alpha | 
|  | 181   return(as.numeric(names(acc)[which.max(acc)])) | 
|  | 182 } | 
|  | 183 | 
|  | 184 ###### meta-random forest | 
|  | 185 | 
|  | 186 searchParamRF <- function(genotype, phenotype, rangeNtree, mtry=ncol(genotype)) { | 
|  | 187   n <- ceiling(nrow(genotype)/3) | 
|  | 188   indexTest <- sample(1:nrow(genotype), size=n) | 
|  | 189   train <- genotype[-indexTest,] | 
|  | 190   test <- genotype[indexTest,] | 
|  | 191   phenoTrain <- phenotype[-indexTest] | 
|  | 192   phenoTest <- phenotype[indexTest] | 
|  | 193   acc <- NULL | 
|  | 194   indexNtree <- 1 | 
|  | 195   for(ntree in rangeNtree) { | 
|  | 196     model <- randomForest(x=train, y=phenoTrain, ntree = ntree, mtry = mtry) | 
|  | 197     pred <- predict(model, test) | 
|  | 198     acc <- c(acc, r2(phenoTest, pred)) | 
|  | 199   } | 
|  | 200   names(acc) <- rangeNtree | 
|  | 201   best <- which.max(acc) | 
|  | 202   return(as.numeric(names(acc)[best])) | 
|  | 203 } | 
|  | 204 | 
|  | 205 ###### meta-SVM | 
|  | 206 searchParamSVM <- function(train, target, kernel="radial") { | 
|  | 207   # tuning parameters then train | 
|  | 208   model <- NULL | 
|  | 209   switch(kernel, | 
|  | 210          sigmoid={ | 
|  | 211            tune <-  tune.svm(train, target, gamma = 10^(-6:-1), cost = 10^(0:2), kernel="sigmoid"); | 
|  | 212            g <- tune$best.parameters[[1]]; | 
|  | 213            c <- tune$best.parameters[[2]]; | 
|  | 214            model <-  svm(x=train, y=target, gamma = g, cost = c, kernel = "sigmoid")}, | 
|  | 215          linear={ | 
|  | 216            tune <-  tune.svm(train, target, cost = 10^(0:2), kernel="linear"); | 
|  | 217            c <- tune$best.parameters[[1]]; | 
|  | 218            model <-  svm(x=train, y=target, cost = c, kernel = "linear")}, | 
|  | 219          polynomial={ | 
|  | 220            tune <-  tune.svm(train, target, gamma = 10^(-6:-1), cost = 10^(0:2), degree = 0:4, coef0 = 0:3, kernel="polynomial"); | 
|  | 221            d <- tune$best.parameters[[1]]; | 
|  | 222            g <- tune$best.parameters[[2]]; | 
|  | 223            coef <- tune$best.parameters[[3]]; | 
|  | 224            c <- tune$best.parameters[[4]]; | 
|  | 225            model <-  svm(x=train, y=target, gamma = g, cost = c, kernel = "polynomial", degree = d, coef0 = coef)}, | 
|  | 226          { | 
|  | 227            tune <-  tune.svm(train, target, gamma = 10^(-6:-1), cost = 10^(0:3), kernel="radial"); | 
|  | 228            g <- tune$best.parameters[[1]]; | 
|  | 229            c <- tune$best.parameters[[2]]; | 
|  | 230            model <-  svm(x=train, y=target, gamma = g, cost = c, kernel = "radial")} | 
|  | 231   ) | 
|  | 232   return(model) | 
|  | 233 } | 
|  | 234 | 
|  | 235 #################### upper level functions ##################### | 
|  | 236 | 
|  | 237 aggregateDT <- function(train, test, target, folds) { | 
|  | 238   r2Aggreg <- NULL | 
|  | 239   for (i in 1:length(folds)) { | 
|  | 240     trainIndex <- unlist(folds[-i]) | 
|  | 241     testIndex <- folds[[i]] | 
|  | 242     treeParam <- tuneTree(train[[i]], target[trainIndex]) | 
|  | 243     data <- data.frame(train[[i]], target=target[trainIndex]) | 
|  | 244     model <- rpart(target ~., data=data, method = "anova", control = treeParam) | 
|  | 245     model <- prune(model, cp=treeParam["cp"]) | 
|  | 246     pred <- predict(model, data.frame(test[[i]])) | 
|  | 247     r2Aggreg <- c(r2Aggreg, r2(target[testIndex], pred)) | 
|  | 248   } | 
|  | 249   return(r2Aggreg) | 
|  | 250 } | 
|  | 251 | 
|  | 252 aggregateGeneticMean <- function(train, test, target, folds) { | 
|  | 253   r2Aggreg <- NULL | 
|  | 254   for (i in 1:length(folds)) { | 
|  | 255     trainIndex <- unlist(folds[-i]) | 
|  | 256     testIndex <- folds[[i]] | 
|  | 257     opt <- optimizeWeight(values = train[[i]], trueValue = target[trainIndex]) | 
|  | 258     pred <- weightedPrediction(test[[i]], opt@solution) | 
|  | 259     r2Aggreg <- c(r2Aggreg, r2(target[testIndex], pred)) | 
|  | 260   } | 
|  | 261   return(r2Aggreg) | 
|  | 262 } | 
|  | 263 | 
|  | 264 aggregateLASSO <- function(train, test, target, folds) { | 
|  | 265   r2Aggreg <- NULL | 
|  | 266   for (i in 1:length(folds)) { | 
|  | 267     trainIndex <- unlist(folds[-i]) | 
|  | 268     testIndex <- folds[[i]] | 
|  | 269     alpha <- searchParamLASSO(train[[i]], target[trainIndex]) | 
|  | 270     cv <- cv.glmnet(x=as.matrix(train[[i]]), y=target[trainIndex], alpha=alpha) | 
|  | 271     model <- glmnet(x=as.matrix(train[[i]]), y=target[trainIndex], alpha=alpha, lambda = cv$lambda.1se) | 
|  | 272     pred <- predict(model, test[[i]]) | 
|  | 273     r2Aggreg <- c(r2Aggreg, r2(target[testIndex], pred)) | 
|  | 274   } | 
|  | 275   return(r2Aggreg) | 
|  | 276 } | 
|  | 277 | 
|  | 278 aggregateRF <- function(train, test, target, folds) { | 
|  | 279   r2Aggreg <- NULL | 
|  | 280   for (i in 1:length(folds)) { | 
|  | 281     trainIndex <- unlist(folds[-i]) | 
|  | 282     testIndex <- folds[[i]] | 
|  | 283     ntree <- searchParamRF(genotype = train[[i]], phenotype = target[trainIndex], | 
|  | 284                            rangeNtree = seq(100, 1000, 100)) | 
|  | 285     model <- randomForest(x=as.matrix(train[[i]]), y=target[trainIndex], | 
|  | 286                           ntree = ntree, mtry = ncol(train[[i]])) | 
|  | 287     pred <- predict(model, test[[i]]) | 
|  | 288     r2Aggreg <- c(r2Aggreg, r2(target[testIndex], pred)) | 
|  | 289   } | 
|  | 290   return(r2Aggreg) | 
|  | 291 } | 
|  | 292 | 
|  | 293 aggregateSVM <- function(train, test, target, folds, kernel="linear") { | 
|  | 294   r2Aggreg <- NULL | 
|  | 295   for (i in 1:length(folds)) { | 
|  | 296     trainIndex <- unlist(folds[-i]) | 
|  | 297     testIndex <- folds[[i]] | 
|  | 298     model <- searchParamSVM(train = train[[i]], target = target[trainIndex], kernel = kernel) | 
|  | 299     pred <- predict(model, test[[i]]) | 
|  | 300     r2Aggreg <- c(r2Aggreg, r2(target[testIndex], pred)) | 
|  | 301   } | 
|  | 302   return(r2Aggreg) | 
|  | 303 } | 
|  | 304 | 
|  | 305 ################################### main ############################# | 
|  | 306 # # load argument | 
|  | 307 cmd <- commandArgs(T) | 
|  | 308 source(cmd[1]) | 
|  | 309 # load folds | 
|  | 310 con = file(folds) | 
|  | 311 folds <- readLines(con = con, n = 1, ok=T) | 
|  | 312 close(con) | 
|  | 313 folds <- readRDS(folds) | 
|  | 314 # phenotype is written as a table (in columns) but it must be sent as a vector for mixed.solve | 
|  | 315 phenotype <- read.table(phenotype, sep="\t", h=T)[,1] | 
|  | 316 # load genotype | 
|  | 317 con = file(genotype) | 
|  | 318 genotype <- readLines(con = con, n = 1, ok=T) | 
|  | 319 close(con) | 
|  | 320 genotype <- read.table(genotype, sep="\t", h=T) | 
|  | 321 # find which classifiers will be used for aggregation | 
|  | 322 classifNames <- NULL | 
|  | 323 if(lassoModel !="None"){ | 
|  | 324    classifNames <- c(classifNames, "lasso") | 
|  | 325    con = file(lassoModel) | 
|  | 326    lassoModel <- readLines(con = con, n = 1, ok=T) | 
|  | 327    close(con) | 
|  | 328    lassoModel <- readRDS(lassoModel) | 
|  | 329 } | 
|  | 330 if(rrBLUPModel !="None"){ | 
|  | 331   classifNames <- c(classifNames, "rrBLUP") | 
|  | 332   con = file(rrBLUPModel) | 
|  | 333   rrBLUPModel <- readLines(con = con, n = 1, ok=T) | 
|  | 334   close(con) | 
|  | 335   rrBLUPModel <- readRDS(rrBLUPModel) | 
|  | 336 } | 
|  | 337 if(rfModel !="None"){ | 
|  | 338   classifNames <- c(classifNames, "rf") | 
|  | 339   con = file(rfModel) | 
|  | 340   rfModel <- readLines(con = con, n = 1, ok=T) | 
|  | 341   close(con) | 
|  | 342   rfModel <- readRDS(rfModel) | 
|  | 343 } | 
|  | 344 if(svmModel !="None"){ | 
|  | 345   classifNames <- c(classifNames, "svm") | 
|  | 346   con = file(svmModel) | 
|  | 347   svmModel <- readLines(con = con, n = 1, ok=T) | 
|  | 348   close(con) | 
|  | 349   svmModel <- readRDS(svmModel) | 
|  | 350 } | 
|  | 351 | 
|  | 352 # compute prediction of the training set and test set for each fold and each classifiers | 
|  | 353 # train predictions and test prediction are stored in separate lists | 
|  | 354 # where each element of the list represent a folds | 
|  | 355 predictionTrain.list <- NULL | 
|  | 356 predictionTest.list <- NULL | 
|  | 357 r2Classif.list <- NULL | 
|  | 358 for(i in 1:length(folds)) { | 
|  | 359   # for the current fold, create training set and test set | 
|  | 360   trainIndex <- unlist(folds[-i]) | 
|  | 361   testIndex <- folds[[i]] | 
|  | 362   train <- genotype[trainIndex,] | 
|  | 363   phenoTrain <- phenotype[trainIndex] | 
|  | 364   test <- genotype[testIndex,] | 
|  | 365   phenoTest <- phenotype[testIndex] | 
|  | 366   # only to intialize data frame containing predictions | 
|  | 367   predictionTrain <- matrix(rep(-1, length(classifNames)*length(trainIndex)), | 
|  | 368                             ncol=length(classifNames)) | 
|  | 369   colnames(predictionTrain) <- classifNames | 
|  | 370   predictionTest <- matrix(rep(-1, length(classifNames)*length(testIndex)), | 
|  | 371                            ncol=length(classifNames)) | 
|  | 372   colnames(predictionTest) <- classifNames | 
|  | 373   r2Classif <- NULL | 
|  | 374   # for each classifiers, compute prediction on both sets | 
|  | 375   # and evaluate r2 to find the best classifier | 
|  | 376   for(j in 1:length(classifNames)) { | 
|  | 377     switch(classifNames[j], | 
|  | 378            # random forest | 
|  | 379            rf={ | 
|  | 380             # predict train and test | 
|  | 381             param <- extractParameter(rfModel, "rf") | 
|  | 382             model <- randomForest(x=train, y=phenoTrain, mtry = param$mtry, | 
|  | 383                                   ntree = param$ntree); | 
|  | 384             predictionTrain[,j] <- prediction(train, model, classifier = "rf"); | 
|  | 385             predictionTest[,j] <- prediction(test, model, classifier = "rf"); | 
|  | 386             r2Classif <- c(r2Classif, rf=r2(phenoTest, predictionTest[,"rf"]))}, | 
|  | 387            # svm | 
|  | 388            svm={ | 
|  | 389             # predict train and test | 
|  | 390             param <- extractParameter(svmModel, "svm"); | 
|  | 391             model <- svm(train, phenoTrain, kernel = param$kernel, cost = param$c, | 
|  | 392                          gamma=param$g, degree = param$d, coef0 = param$coef, scale = F) | 
|  | 393             predictionTrain[,j] <- prediction(train, model, classifier = "svm"); | 
|  | 394             predictionTest[,j] <- prediction(test, model, classifier = "svm"); | 
|  | 395             r2Classif <- c(r2Classif, svm=r2(phenoTest, predictionTest[,"svm"]))}, | 
|  | 396            # lasso | 
|  | 397            lasso={ | 
|  | 398             # predict train and test | 
|  | 399             param <- extractParameter(lassoModel, "lasso"); | 
|  | 400             model <- glmnet(x=as.matrix(train), y=phenoTrain, lambda = param$lambda); | 
|  | 401             predictionTrain[,j] <- prediction(train, model, classifier = "lasso"); | 
|  | 402             predictionTest[,j] <- prediction(test, model, classifier = "lasso"); | 
|  | 403             r2Classif <- c(r2Classif, lasso=r2(phenoTest, predictionTest[,"lasso"]))}, | 
|  | 404            # rrBLUP | 
|  | 405            rrBLUP={ | 
|  | 406             # predict train and test | 
|  | 407             model <- mixed.solve(phenoTrain, Z=train,K=NULL, SE=F,return.Hinv = F); | 
|  | 408             predictionTrain[,j] <- prediction(train, model, classifier = "rrBLUP"); | 
|  | 409             predictionTest[,j] <- prediction(test, model, classifier = "rrBLUP"); | 
|  | 410             r2Classif <- c(r2Classif, rrBLUP=r2(phenoTest, predictionTest[,"rrBLUP"]))}, | 
|  | 411            {print(paste("unknown classifier, please choose among rf, svm, lasso, rrBLUP")); | 
|  | 412              stop()} | 
|  | 413       ) | 
|  | 414   } | 
|  | 415   predictionTrain.list <- c(predictionTrain.list, list(predictionTrain)) | 
|  | 416   predictionTest.list <- c(predictionTest.list, list(predictionTest)) | 
|  | 417   r2Classif.list <- c(r2Classif.list, list(r2Classif)) | 
|  | 418 } | 
|  | 419 # aggregate ! | 
|  | 420 switch(method, | 
|  | 421        geneticMean={ | 
|  | 422          aggreg <- aggregateGeneticMean(train=predictionTrain.list, test=predictionTest.list, | 
|  | 423                                         target = phenotype, folds=folds) | 
|  | 424        }, | 
|  | 425        dt={ | 
|  | 426          aggreg <- aggregateDT(train=predictionTrain.list, test=predictionTest.list, | 
|  | 427                                target = phenotype, folds=folds) | 
|  | 428        }, | 
|  | 429        lasso={ | 
|  | 430          aggreg <- aggregateLASSO(train=predictionTrain.list, test=predictionTest.list, | 
|  | 431                                target = phenotype, folds=folds) | 
|  | 432        }, | 
|  | 433        rf={ | 
|  | 434          aggreg <- aggregateRF(train=predictionTrain.list, test=predictionTest.list, | 
|  | 435                                target = phenotype, folds=folds) | 
|  | 436        }, | 
|  | 437        # svm, by default | 
|  | 438        {aggreg <- aggregateSVM(train=predictionTrain.list, test=predictionTest.list, | 
|  | 439                                target = phenotype, folds=folds, kernel=kernel)} | 
|  | 440 ) | 
|  | 441 # determine best classifier | 
|  | 442 # first, transform list into a matrix | 
|  | 443 r2Classif.list <- t(data.frame(r2Classif.list)) | 
|  | 444 # then, compute the mean r2 for each classifier | 
|  | 445 meanR2Classif <- apply(r2Classif.list, 2, mean) | 
|  | 446 # choose the best one | 
|  | 447 bestClassif <- which.max(meanR2Classif) | 
|  | 448 # compare aggregation and best classifiers | 
|  | 449 finalRes <- cbind(bestClassif=r2Classif.list[,bestClassif], aggreg=aggreg, | 
|  | 450                   diff=(aggreg-r2Classif.list[,bestClassif])) | 
|  | 451 print(apply(finalRes, 2, mean)) |