| 89 | 1 ######################################################## | 
|  | 2 # | 
|  | 3 # creation date : 07/01/16 | 
|  | 4 # last modification : 03/07/16 | 
|  | 5 # author : Dr Nicolas Beaume | 
|  | 6 # owner : IRRI | 
|  | 7 # | 
|  | 8 ######################################################## | 
|  | 9 library("e1071") | 
|  | 10 options(warn=-1) | 
|  | 11 ############################ helper functions ####################### | 
|  | 12 # optimize svm parameters | 
|  | 13 optimizeSVM <- function(train, target, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) { | 
|  | 14   # tuning parameters then train | 
|  | 15   model <- NULL | 
|  | 16   if(is.null(g)){g <- 10^(-6:0)} | 
|  | 17   if(is.null(c)){c <- 10^(-1:0)} | 
|  | 18   # optimize parameter for the kernel in use | 
|  | 19   switch(kernel, | 
|  | 20          # sigmoid kernel : need gamma, cost and coef | 
|  | 21          sigmoid={ | 
|  | 22            if(is.null(coef)){coef <- 0:3}; | 
|  | 23            # optimize then extract best parameters | 
|  | 24            tune <-  tune.svm(train, target, gamma = g, cost = 10^(0:2), kernel="sigmoid", coef0 = coef); | 
|  | 25            g <- tune$best.parameters[[1]]; | 
|  | 26            c <- tune$best.parameters[[3]]; | 
|  | 27            coef <- tune$best.parameters[[2]]; | 
|  | 28            # compute model | 
|  | 29            model <-  svm(x=train, y=target, gamma = g, cost = c, kernel = "sigmoid")}, | 
|  | 30          # linear kernel, only cost is required | 
|  | 31          linear={ | 
|  | 32            # optimize then extract best parameters | 
|  | 33            tune <-  tune.svm(train, target, cost = c, kernel="linear"); | 
|  | 34            c <- tune$best.parameters[[1]]; | 
|  | 35            # compute model | 
|  | 36            model <-  svm(x=train, y=target, gamma = g, cost = c, kernel = "linear")}, | 
|  | 37          # polynomial kernel, use degree, gamma, cost and coef as parameters | 
|  | 38          polynomial={ | 
|  | 39            # optimize and extract best parameters | 
|  | 40            if(is.null(coef)){coef <- 0:3}; | 
|  | 41            if(is.null(d)){d <- 0:4}; | 
|  | 42            tune <-  tune.svm(train, target, gamma = g, cost = c, degree = d, coef0 = coef, kernel="polynomial"); | 
|  | 43            d <- tune$best.parameters[[1]]; | 
|  | 44            g <- tune$best.parameters[[2]]; | 
|  | 45            coef <- tune$best.parameters[[3]]; | 
|  | 46            c <- tune$best.parameters[[4]]; | 
|  | 47            # compute model | 
|  | 48            model <-  svm(x=train, y=target, gamma = g, cost = c, kernel = "polynomial", degree = d, coef0 = coef)}, | 
|  | 49          # default : radial kernel, use gamma and cost as parameters | 
|  | 50          { | 
|  | 51            # optimize and extract parameters | 
|  | 52            tune <-  tune.svm(train, target, gamma = g, cost = c, kernel="radial"); | 
|  | 53            g <- tune$best.parameters[[1]]; | 
|  | 54            c <- tune$best.parameters[[2]]; | 
|  | 55            # compute model | 
|  | 56            model <-  svm(x=train, y=target, gamma = g, cost = c, kernel = "radial")} | 
|  | 57   ) | 
|  | 58   return(model) | 
|  | 59 } | 
|  | 60 ################################## main function ########################### | 
|  | 61 svmClassifier <- function(genotype, phenotype, evaluation = T, outFile, folds, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) { | 
|  | 62   # optimize classifier if any parameter is NULL | 
|  | 63   switch(kernel, | 
|  | 64          # sigmoid kernel : need gamma, cost and coef | 
|  | 65          sigmoid={ | 
|  | 66            if(any(is.null(c(coef, c, g)))){ | 
|  | 67              fit <- optimizeSVM(genotype, phenotype, kernel = "sigmoid", | 
|  | 68                                                               g = g, c=c, coef = coef); | 
|  | 69               c <- fit$cost; | 
|  | 70               g <- fit$gamma; | 
|  | 71               coef <- fit$coef0; | 
|  | 72             } | 
|  | 73            }, | 
|  | 74          # linear kernel, only cost is required | 
|  | 75          linear={ | 
|  | 76            if(is.null(c)){fit <- optimizeSVM(genotype, phenotype, kernel = "linear", c=c); | 
|  | 77             c <- fit$cost; | 
|  | 78            } | 
|  | 79          }, | 
|  | 80          # polynomial kernel, use degree, gamma, cost and coef as parameters | 
|  | 81          polynomial={ | 
|  | 82            if(any(is.null(c(coef, c, g, d)))){fit <- optimizeSVM(genotype, phenotype, kernel = "polynomial", | 
|  | 83                                                               g = g, c=c, coef = coef, d = d); | 
|  | 84               c <- fit$cost; | 
|  | 85               g <- fit$gamma; | 
|  | 86               coef <- fit$coef0; | 
|  | 87               d <- fit$degree | 
|  | 88            } | 
|  | 89          }, | 
|  | 90          # default : radial kernel, use gamma and cost as parameters | 
|  | 91          {if(any(is.null(c(c, g)))){fit <- optimizeSVM(genotype, phenotype, kernel = "radial", | 
|  | 92                                                              g = g, c=c); | 
|  | 93             c <- fit$cost; | 
|  | 94             g <- fit$gamma; | 
|  | 95           } | 
|  | 96          } | 
|  | 97   ) | 
|  | 98   # do evaluation | 
|  | 99   if(evaluation) { | 
|  | 100     prediction <- NULL | 
|  | 101     # iterate over folds | 
|  | 102     for(i in 1:length(folds)) { | 
|  | 103       # prepare indexes for training and test | 
|  | 104       test <- folds[[i]] | 
|  | 105       train <- unlist(folds[-i]) | 
|  | 106       # compute model | 
|  | 107       svm.fit <- optimizeSVM(train = genotype[train,], target = phenotype[train], kernel=kernel, | 
|  | 108                           g=g, c=c, coef=coef, d=d) | 
|  | 109       # predict on test set of the current fold | 
|  | 110       prediction <- c(prediction, list(predict(svm.fit, genotype[test,]))) | 
|  | 111     } | 
|  | 112     # save all prediction for further evaluation | 
|  | 113     saveRDS(prediction, file=paste(outFile, ".rds", sep = "")) | 
|  | 114   } else { | 
|  | 115     # compute model and save it | 
|  | 116     switch(kernel, | 
|  | 117            # sigmoid kernel : need gamma, cost and coef | 
|  | 118            sigmoid={ | 
|  | 119              model <- svm(x = genotype, y = phenotype, kernel="sigmoid", gamma =g, | 
|  | 120                           cost =c, coef0=coef) | 
|  | 121            }, | 
|  | 122            # linear kernel, only cost is required | 
|  | 123            linear={ | 
|  | 124              model <- svm(x = genotype, y = phenotype, kernel="linear", cost =c) | 
|  | 125            }, | 
|  | 126            # polynomial kernel, use degree, gamma, cost and coef as parameters | 
|  | 127            polynomial={ | 
|  | 128              model <- svm(x = genotype, y = phenotype, kernel="polynomial", gamma =g, cost =c, | 
|  | 129                           coef0=coef, degree =d) | 
|  | 130            }, | 
|  | 131            # default : radial kernel, use gamma and cost as parameters | 
|  | 132            { model <- svm(x = genotype, y = phenotype, kernel="radial", gamma =g, cost =c) | 
|  | 133            }) | 
|  | 134     saveRDS(model, file=paste(outFile, ".rds", sep = "")) | 
|  | 135   } | 
|  | 136 } | 
|  | 137 | 
|  | 138 ############################ main ############################# | 
|  | 139 # load argument | 
|  | 140 cmd <- commandArgs(T) | 
|  | 141 source(cmd[1]) | 
|  | 142 # check for svm paramater, especially to know if optimization is requiered | 
|  | 143 if(as.numeric(g) == -1) {g <- NULL} | 
|  | 144 if(as.numeric(c) == -1) {c <- NULL} | 
|  | 145 if(as.numeric(coef) == -1) {coef <- NULL} | 
|  | 146 if(as.numeric(d) == -1) {d <- NULL} | 
|  | 147 # check if evaluation is required | 
|  | 148 evaluation <- F | 
|  | 149 if(as.integer(doEvaluation) == 1) { | 
|  | 150   evaluation <- T | 
|  | 151   con = file(folds) | 
|  | 152   folds <- readLines(con = con, n = 1, ok=T) | 
|  | 153   close(con) | 
|  | 154   folds <- readRDS(folds) | 
|  | 155 } | 
|  | 156 # load genotype and phenotype | 
|  | 157 con = file(genotype) | 
|  | 158 genotype <- readLines(con = con, n = 1, ok=T) | 
|  | 159 close(con) | 
|  | 160 genotype <- read.table(genotype, sep="\t", h=T) | 
|  | 161 # phenotype is written as a table (in columns) but it must be sent as a vector for mixed.solve | 
|  | 162 phenotype <- read.table(phenotype, sep="\t", h=T)[,1] | 
|  | 163 # run ! | 
|  | 164 svmClassifier(genotype = genotype, phenotype = phenotype, | 
|  | 165              evaluation = evaluation, outFile = out, folds = folds, g=g, c=c, coef=coef, d=d, kernel=kernel) | 
|  | 166 # retunr path of the result file to galaxy | 
|  | 167 cat(paste(paste(out, ".rds", sep = ""), "\n", sep="")) |