Previous changeset 88:aef3240b58ac (2016-10-28) Next changeset 90:634533b40622 (2016-10-28) |
Commit message:
Uploaded |
added:
svm.R |
b |
diff -r aef3240b58ac -r c2efdf0c23a1 svm.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/svm.R Fri Oct 28 08:49:43 2016 -0400 |
[ |
@@ -0,0 +1,167 @@ +######################################################## +# +# creation date : 07/01/16 +# last modification : 03/07/16 +# author : Dr Nicolas Beaume +# owner : IRRI +# +######################################################## +library("e1071") +options(warn=-1) +############################ helper functions ####################### +# optimize svm parameters +optimizeSVM <- function(train, target, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) { + # tuning parameters then train + model <- NULL + if(is.null(g)){g <- 10^(-6:0)} + if(is.null(c)){c <- 10^(-1:0)} + # optimize parameter for the kernel in use + switch(kernel, + # sigmoid kernel : need gamma, cost and coef + sigmoid={ + if(is.null(coef)){coef <- 0:3}; + # optimize then extract best parameters + tune <- tune.svm(train, target, gamma = g, cost = 10^(0:2), kernel="sigmoid", coef0 = coef); + g <- tune$best.parameters[[1]]; + c <- tune$best.parameters[[3]]; + coef <- tune$best.parameters[[2]]; + # compute model + model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "sigmoid")}, + # linear kernel, only cost is required + linear={ + # optimize then extract best parameters + tune <- tune.svm(train, target, cost = c, kernel="linear"); + c <- tune$best.parameters[[1]]; + # compute model + model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "linear")}, + # polynomial kernel, use degree, gamma, cost and coef as parameters + polynomial={ + # optimize and extract best parameters + if(is.null(coef)){coef <- 0:3}; + if(is.null(d)){d <- 0:4}; + tune <- tune.svm(train, target, gamma = g, cost = c, degree = d, coef0 = coef, kernel="polynomial"); + d <- tune$best.parameters[[1]]; + g <- tune$best.parameters[[2]]; + coef <- tune$best.parameters[[3]]; + c <- tune$best.parameters[[4]]; + # compute model + model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "polynomial", degree = d, coef0 = coef)}, + # default : radial kernel, use gamma and cost as parameters + { + # optimize and extract parameters + tune <- tune.svm(train, target, gamma = g, cost = c, kernel="radial"); + g <- tune$best.parameters[[1]]; + c <- tune$best.parameters[[2]]; + # compute model + model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "radial")} + ) + return(model) +} +################################## main function ########################### +svmClassifier <- function(genotype, phenotype, evaluation = T, outFile, folds, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) { + # optimize classifier if any parameter is NULL + switch(kernel, + # sigmoid kernel : need gamma, cost and coef + sigmoid={ + if(any(is.null(c(coef, c, g)))){ + fit <- optimizeSVM(genotype, phenotype, kernel = "sigmoid", + g = g, c=c, coef = coef); + c <- fit$cost; + g <- fit$gamma; + coef <- fit$coef0; + } + }, + # linear kernel, only cost is required + linear={ + if(is.null(c)){fit <- optimizeSVM(genotype, phenotype, kernel = "linear", c=c); + c <- fit$cost; + } + }, + # polynomial kernel, use degree, gamma, cost and coef as parameters + polynomial={ + if(any(is.null(c(coef, c, g, d)))){fit <- optimizeSVM(genotype, phenotype, kernel = "polynomial", + g = g, c=c, coef = coef, d = d); + c <- fit$cost; + g <- fit$gamma; + coef <- fit$coef0; + d <- fit$degree + } + }, + # default : radial kernel, use gamma and cost as parameters + {if(any(is.null(c(c, g)))){fit <- optimizeSVM(genotype, phenotype, kernel = "radial", + g = g, c=c); + c <- fit$cost; + g <- fit$gamma; + } + } + ) + # do evaluation + if(evaluation) { + prediction <- NULL + # iterate over folds + for(i in 1:length(folds)) { + # prepare indexes for training and test + test <- folds[[i]] + train <- unlist(folds[-i]) + # compute model + svm.fit <- optimizeSVM(train = genotype[train,], target = phenotype[train], kernel=kernel, + g=g, c=c, coef=coef, d=d) + # predict on test set of the current fold + prediction <- c(prediction, list(predict(svm.fit, genotype[test,]))) + } + # save all prediction for further evaluation + saveRDS(prediction, file=paste(outFile, ".rds", sep = "")) + } else { + # compute model and save it + switch(kernel, + # sigmoid kernel : need gamma, cost and coef + sigmoid={ + model <- svm(x = genotype, y = phenotype, kernel="sigmoid", gamma =g, + cost =c, coef0=coef) + }, + # linear kernel, only cost is required + linear={ + model <- svm(x = genotype, y = phenotype, kernel="linear", cost =c) + }, + # polynomial kernel, use degree, gamma, cost and coef as parameters + polynomial={ + model <- svm(x = genotype, y = phenotype, kernel="polynomial", gamma =g, cost =c, + coef0=coef, degree =d) + }, + # default : radial kernel, use gamma and cost as parameters + { model <- svm(x = genotype, y = phenotype, kernel="radial", gamma =g, cost =c) + }) + saveRDS(model, file=paste(outFile, ".rds", sep = "")) + } +} + +############################ main ############################# +# load argument +cmd <- commandArgs(T) +source(cmd[1]) +# check for svm paramater, especially to know if optimization is requiered +if(as.numeric(g) == -1) {g <- NULL} +if(as.numeric(c) == -1) {c <- NULL} +if(as.numeric(coef) == -1) {coef <- NULL} +if(as.numeric(d) == -1) {d <- NULL} +# check if evaluation is required +evaluation <- F +if(as.integer(doEvaluation) == 1) { + evaluation <- T + con = file(folds) + folds <- readLines(con = con, n = 1, ok=T) + close(con) + folds <- readRDS(folds) +} +# load genotype and phenotype +con = file(genotype) +genotype <- readLines(con = con, n = 1, ok=T) +close(con) +genotype <- read.table(genotype, sep="\t", h=T) +# phenotype is written as a table (in columns) but it must be sent as a vector for mixed.solve +phenotype <- read.table(phenotype, sep="\t", h=T)[,1] +# run ! +svmClassifier(genotype = genotype, phenotype = phenotype, + evaluation = evaluation, outFile = out, folds = folds, g=g, c=c, coef=coef, d=d, kernel=kernel) +# retunr path of the result file to galaxy +cat(paste(paste(out, ".rds", sep = ""), "\n", sep="")) |