Mercurial > repos > nicolas > oghma
changeset 44:8cdeaa91ebc3 draft
Uploaded
| author | nicolas | 
|---|---|
| date | Tue, 25 Oct 2016 14:43:31 -0400 | 
| parents | e80b87a35c61 | 
| children | 96dc9d514099 | 
| files | svm.R | 
| diffstat | 1 files changed, 91 insertions(+), 15 deletions(-) [+] | 
line wrap: on
 line diff
--- a/svm.R Tue Oct 25 14:43:16 2016 -0400 +++ b/svm.R Tue Oct 25 14:43:31 2016 -0400 @@ -6,26 +6,37 @@ # owner : IRRI # ######################################################## -log <- file(paste(getwd(), "log_SVM.txt", sep="/"), open = "wt") -sink(file = log, type="message") library("e1071") +options(warn=-1) ############################ helper functions ####################### -svmModel <- function(train, target, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) { +# optimize svm parameters +optimizeSVM <- function(train, target, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) { # tuning parameters then train model <- NULL if(is.null(g)){g <- 10^(-6:0)} - if(is.null(c)){c <- 10^(0:2)} + if(is.null(c)){c <- 10^(-1:0)} + # optimize parameter for the kernel in use switch(kernel, + # sigmoid kernel : need gamma, cost and coef sigmoid={ - tune <- tune.svm(train, target, gamma = , cost = 10^(0:2), kernel="sigmoid"); + if(is.null(coef)){coef <- 0:3}; + # optimize then extract best parameters + tune <- tune.svm(train, target, gamma = g, cost = 10^(0:2), kernel="sigmoid", coef0 = coef); g <- tune$best.parameters[[1]]; - c <- tune$best.parameters[[2]]; + c <- tune$best.parameters[[3]]; + coef <- tune$best.parameters[[2]]; + # compute model model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "sigmoid")}, + # linear kernel, only cost is required linear={ + # optimize then extract best parameters tune <- tune.svm(train, target, cost = c, kernel="linear"); - c <- tune$best.parameters[[2]]; + c <- tune$best.parameters[[1]]; + # compute model model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "linear")}, + # polynomial kernel, use degree, gamma, cost and coef as parameters polynomial={ + # optimize and extract best parameters if(is.null(coef)){coef <- 0:3}; if(is.null(d)){d <- 0:4}; tune <- tune.svm(train, target, gamma = g, cost = c, degree = d, coef0 = coef, kernel="polynomial"); @@ -33,38 +44,102 @@ g <- tune$best.parameters[[2]]; coef <- tune$best.parameters[[3]]; c <- tune$best.parameters[[4]]; + # compute model model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "polynomial", degree = d, coef0 = coef)}, + # default : radial kernel, use gamma and cost as parameters { + # optimize and extract parameters tune <- tune.svm(train, target, gamma = g, cost = c, kernel="radial"); g <- tune$best.parameters[[1]]; c <- tune$best.parameters[[2]]; + # compute model model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "radial")} ) return(model) } ################################## main function ########################### -svmSelection <- function(genotype, evaluation = T, outFile, folds, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) { - # build model - labelIndex <- match("label", colnames(genotype)) +svmClassifier <- function(genotype, phenotype, evaluation = T, outFile, folds, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) { + # optimize classifier if any parameter is NULL + switch(kernel, + # sigmoid kernel : need gamma, cost and coef + sigmoid={ + if(any(is.null(c(coef, c, g)))){ + fit <- optimizeSVM(genotype, phenotype, kernel = "sigmoid", + g = g, c=c, coef = coef); + c <- fit$cost; + g <- fit$gamma; + coef <- fit$coef0; + } + }, + # linear kernel, only cost is required + linear={ + if(is.null(c)){fit <- optimizeSVM(genotype, phenotype, kernel = "linear", c=c); + c <- fit$cost; + } + }, + # polynomial kernel, use degree, gamma, cost and coef as parameters + polynomial={ + if(any(is.null(c(coef, c, g, d)))){fit <- optimizeSVM(genotype, phenotype, kernel = "polynomial", + g = g, c=c, coef = coef, d = d); + c <- fit$cost; + g <- fit$gamma; + coef <- fit$coef0; + d <- fit$degree + } + }, + # default : radial kernel, use gamma and cost as parameters + {if(any(is.null(c(c, g)))){fit <- optimizeSVM(genotype, phenotype, kernel = "radial", + g = g, c=c); + c <- fit$cost; + g <- fit$gamma; + } + } + ) + # do evaluation if(evaluation) { prediction <- NULL + # iterate over folds for(i in 1:length(folds)) { + # prepare indexes for training and test test <- folds[[i]] train <- unlist(folds[-i]) - svm.fit <- svmModel(train = genotype[train,-labelIndex], target = genotype[train,labelIndex], kernel=kernel, g=g, c=c, coef=coef, d=d) - prediction <- c(prediction, list(predict(svm.fit, genotype[test,-labelIndex]))) + # compute model + svm.fit <- optimizeSVM(train = genotype[train,], target = phenotype[train], kernel=kernel, + g=g, c=c, coef=coef, d=d) + # predict on test set of the current fold + prediction <- c(prediction, list(predict(svm.fit, genotype[test,]))) } + # save all prediction for further evaluation saveRDS(prediction, file=paste(outFile, ".rds", sep = "")) } else { - model <- svmModel(train = genotype[,-labelIndex], target = genotype[,labelIndex], kernel=kernel, g=g, c=c, coef=coef, d=d) + # compute model and save it + switch(kernel, + # sigmoid kernel : need gamma, cost and coef + sigmoid={ + model <- svm(x = genotype, y = phenotype, kernel="sigmoid", gamma =g, + cost =c, coef0=coef) + }, + # linear kernel, only cost is required + linear={ + model <- svm(x = genotype, y = phenotype, kernel="linear", cost =c) + }, + # polynomial kernel, use degree, gamma, cost and coef as parameters + polynomial={ + model <- svm(x = genotype, y = phenotype, kernel="polynomial", gamma =g, cost =c, + coef0=coef, degree =d) + }, + # default : radial kernel, use gamma and cost as parameters + { model <- svm(x = genotype, y = phenotype, kernel="radial", gamma =g, cost =c) + }) saveRDS(model, file=paste(outFile, ".rds", sep = "")) } } ############################ main ############################# - +# load argument cmd <- commandArgs(T) source(cmd[1]) +# check for svm paramater, especially to know if optimization is requiered if(as.numeric(g) == -1) {g <- NULL} if(as.numeric(c) == -1) {c <- NULL} if(as.numeric(coef) == -1) {coef <- NULL} @@ -86,6 +161,7 @@ # phenotype is written as a table (in columns) but it must be sent as a vector for mixed.solve phenotype <- read.table(phenotype, sep="\t", h=T)[,1] # run ! -svmSelection(genotype = data.frame(genotype, label=phenotype, check.names = F, stringsAsFactors = F), +svmClassifier(genotype = genotype, phenotype = phenotype, evaluation = evaluation, outFile = out, folds = folds, g=g, c=c, coef=coef, d=d, kernel=kernel) +# retunr path of the result file to galaxy cat(paste(paste(out, ".rds", sep = ""), "\n", sep=""))
