Mercurial > repos > nicolas > oghma
comparison svm.R @ 44:8cdeaa91ebc3 draft
Uploaded
| author | nicolas |
|---|---|
| date | Tue, 25 Oct 2016 14:43:31 -0400 |
| parents | f9d2d5058395 |
| children |
comparison
equal
deleted
inserted
replaced
| 43:e80b87a35c61 | 44:8cdeaa91ebc3 |
|---|---|
| 4 # last modification : 03/07/16 | 4 # last modification : 03/07/16 |
| 5 # author : Dr Nicolas Beaume | 5 # author : Dr Nicolas Beaume |
| 6 # owner : IRRI | 6 # owner : IRRI |
| 7 # | 7 # |
| 8 ######################################################## | 8 ######################################################## |
| 9 log <- file(paste(getwd(), "log_SVM.txt", sep="/"), open = "wt") | |
| 10 sink(file = log, type="message") | |
| 11 library("e1071") | 9 library("e1071") |
| 10 options(warn=-1) | |
| 12 ############################ helper functions ####################### | 11 ############################ helper functions ####################### |
| 13 svmModel <- function(train, target, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) { | 12 # optimize svm parameters |
| 13 optimizeSVM <- function(train, target, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) { | |
| 14 # tuning parameters then train | 14 # tuning parameters then train |
| 15 model <- NULL | 15 model <- NULL |
| 16 if(is.null(g)){g <- 10^(-6:0)} | 16 if(is.null(g)){g <- 10^(-6:0)} |
| 17 if(is.null(c)){c <- 10^(0:2)} | 17 if(is.null(c)){c <- 10^(-1:0)} |
| 18 # optimize parameter for the kernel in use | |
| 18 switch(kernel, | 19 switch(kernel, |
| 20 # sigmoid kernel : need gamma, cost and coef | |
| 19 sigmoid={ | 21 sigmoid={ |
| 20 tune <- tune.svm(train, target, gamma = , cost = 10^(0:2), kernel="sigmoid"); | 22 if(is.null(coef)){coef <- 0:3}; |
| 23 # optimize then extract best parameters | |
| 24 tune <- tune.svm(train, target, gamma = g, cost = 10^(0:2), kernel="sigmoid", coef0 = coef); | |
| 21 g <- tune$best.parameters[[1]]; | 25 g <- tune$best.parameters[[1]]; |
| 22 c <- tune$best.parameters[[2]]; | 26 c <- tune$best.parameters[[3]]; |
| 27 coef <- tune$best.parameters[[2]]; | |
| 28 # compute model | |
| 23 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "sigmoid")}, | 29 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "sigmoid")}, |
| 30 # linear kernel, only cost is required | |
| 24 linear={ | 31 linear={ |
| 32 # optimize then extract best parameters | |
| 25 tune <- tune.svm(train, target, cost = c, kernel="linear"); | 33 tune <- tune.svm(train, target, cost = c, kernel="linear"); |
| 26 c <- tune$best.parameters[[2]]; | 34 c <- tune$best.parameters[[1]]; |
| 35 # compute model | |
| 27 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "linear")}, | 36 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "linear")}, |
| 37 # polynomial kernel, use degree, gamma, cost and coef as parameters | |
| 28 polynomial={ | 38 polynomial={ |
| 39 # optimize and extract best parameters | |
| 29 if(is.null(coef)){coef <- 0:3}; | 40 if(is.null(coef)){coef <- 0:3}; |
| 30 if(is.null(d)){d <- 0:4}; | 41 if(is.null(d)){d <- 0:4}; |
| 31 tune <- tune.svm(train, target, gamma = g, cost = c, degree = d, coef0 = coef, kernel="polynomial"); | 42 tune <- tune.svm(train, target, gamma = g, cost = c, degree = d, coef0 = coef, kernel="polynomial"); |
| 32 d <- tune$best.parameters[[1]]; | 43 d <- tune$best.parameters[[1]]; |
| 33 g <- tune$best.parameters[[2]]; | 44 g <- tune$best.parameters[[2]]; |
| 34 coef <- tune$best.parameters[[3]]; | 45 coef <- tune$best.parameters[[3]]; |
| 35 c <- tune$best.parameters[[4]]; | 46 c <- tune$best.parameters[[4]]; |
| 47 # compute model | |
| 36 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "polynomial", degree = d, coef0 = coef)}, | 48 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "polynomial", degree = d, coef0 = coef)}, |
| 49 # default : radial kernel, use gamma and cost as parameters | |
| 37 { | 50 { |
| 51 # optimize and extract parameters | |
| 38 tune <- tune.svm(train, target, gamma = g, cost = c, kernel="radial"); | 52 tune <- tune.svm(train, target, gamma = g, cost = c, kernel="radial"); |
| 39 g <- tune$best.parameters[[1]]; | 53 g <- tune$best.parameters[[1]]; |
| 40 c <- tune$best.parameters[[2]]; | 54 c <- tune$best.parameters[[2]]; |
| 55 # compute model | |
| 41 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "radial")} | 56 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "radial")} |
| 42 ) | 57 ) |
| 43 return(model) | 58 return(model) |
| 44 } | 59 } |
| 45 ################################## main function ########################### | 60 ################################## main function ########################### |
| 46 svmSelection <- function(genotype, evaluation = T, outFile, folds, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) { | 61 svmClassifier <- function(genotype, phenotype, evaluation = T, outFile, folds, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) { |
| 47 # build model | 62 # optimize classifier if any parameter is NULL |
| 48 labelIndex <- match("label", colnames(genotype)) | 63 switch(kernel, |
| 64 # sigmoid kernel : need gamma, cost and coef | |
| 65 sigmoid={ | |
| 66 if(any(is.null(c(coef, c, g)))){ | |
| 67 fit <- optimizeSVM(genotype, phenotype, kernel = "sigmoid", | |
| 68 g = g, c=c, coef = coef); | |
| 69 c <- fit$cost; | |
| 70 g <- fit$gamma; | |
| 71 coef <- fit$coef0; | |
| 72 } | |
| 73 }, | |
| 74 # linear kernel, only cost is required | |
| 75 linear={ | |
| 76 if(is.null(c)){fit <- optimizeSVM(genotype, phenotype, kernel = "linear", c=c); | |
| 77 c <- fit$cost; | |
| 78 } | |
| 79 }, | |
| 80 # polynomial kernel, use degree, gamma, cost and coef as parameters | |
| 81 polynomial={ | |
| 82 if(any(is.null(c(coef, c, g, d)))){fit <- optimizeSVM(genotype, phenotype, kernel = "polynomial", | |
| 83 g = g, c=c, coef = coef, d = d); | |
| 84 c <- fit$cost; | |
| 85 g <- fit$gamma; | |
| 86 coef <- fit$coef0; | |
| 87 d <- fit$degree | |
| 88 } | |
| 89 }, | |
| 90 # default : radial kernel, use gamma and cost as parameters | |
| 91 {if(any(is.null(c(c, g)))){fit <- optimizeSVM(genotype, phenotype, kernel = "radial", | |
| 92 g = g, c=c); | |
| 93 c <- fit$cost; | |
| 94 g <- fit$gamma; | |
| 95 } | |
| 96 } | |
| 97 ) | |
| 98 # do evaluation | |
| 49 if(evaluation) { | 99 if(evaluation) { |
| 50 prediction <- NULL | 100 prediction <- NULL |
| 101 # iterate over folds | |
| 51 for(i in 1:length(folds)) { | 102 for(i in 1:length(folds)) { |
| 103 # prepare indexes for training and test | |
| 52 test <- folds[[i]] | 104 test <- folds[[i]] |
| 53 train <- unlist(folds[-i]) | 105 train <- unlist(folds[-i]) |
| 54 svm.fit <- svmModel(train = genotype[train,-labelIndex], target = genotype[train,labelIndex], kernel=kernel, g=g, c=c, coef=coef, d=d) | 106 # compute model |
| 55 prediction <- c(prediction, list(predict(svm.fit, genotype[test,-labelIndex]))) | 107 svm.fit <- optimizeSVM(train = genotype[train,], target = phenotype[train], kernel=kernel, |
| 108 g=g, c=c, coef=coef, d=d) | |
| 109 # predict on test set of the current fold | |
| 110 prediction <- c(prediction, list(predict(svm.fit, genotype[test,]))) | |
| 56 } | 111 } |
| 112 # save all prediction for further evaluation | |
| 57 saveRDS(prediction, file=paste(outFile, ".rds", sep = "")) | 113 saveRDS(prediction, file=paste(outFile, ".rds", sep = "")) |
| 58 } else { | 114 } else { |
| 59 model <- svmModel(train = genotype[,-labelIndex], target = genotype[,labelIndex], kernel=kernel, g=g, c=c, coef=coef, d=d) | 115 # compute model and save it |
| 116 switch(kernel, | |
| 117 # sigmoid kernel : need gamma, cost and coef | |
| 118 sigmoid={ | |
| 119 model <- svm(x = genotype, y = phenotype, kernel="sigmoid", gamma =g, | |
| 120 cost =c, coef0=coef) | |
| 121 }, | |
| 122 # linear kernel, only cost is required | |
| 123 linear={ | |
| 124 model <- svm(x = genotype, y = phenotype, kernel="linear", cost =c) | |
| 125 }, | |
| 126 # polynomial kernel, use degree, gamma, cost and coef as parameters | |
| 127 polynomial={ | |
| 128 model <- svm(x = genotype, y = phenotype, kernel="polynomial", gamma =g, cost =c, | |
| 129 coef0=coef, degree =d) | |
| 130 }, | |
| 131 # default : radial kernel, use gamma and cost as parameters | |
| 132 { model <- svm(x = genotype, y = phenotype, kernel="radial", gamma =g, cost =c) | |
| 133 }) | |
| 60 saveRDS(model, file=paste(outFile, ".rds", sep = "")) | 134 saveRDS(model, file=paste(outFile, ".rds", sep = "")) |
| 61 } | 135 } |
| 62 } | 136 } |
| 63 | 137 |
| 64 ############################ main ############################# | 138 ############################ main ############################# |
| 65 | 139 # load argument |
| 66 cmd <- commandArgs(T) | 140 cmd <- commandArgs(T) |
| 67 source(cmd[1]) | 141 source(cmd[1]) |
| 142 # check for svm paramater, especially to know if optimization is requiered | |
| 68 if(as.numeric(g) == -1) {g <- NULL} | 143 if(as.numeric(g) == -1) {g <- NULL} |
| 69 if(as.numeric(c) == -1) {c <- NULL} | 144 if(as.numeric(c) == -1) {c <- NULL} |
| 70 if(as.numeric(coef) == -1) {coef <- NULL} | 145 if(as.numeric(coef) == -1) {coef <- NULL} |
| 71 if(as.numeric(d) == -1) {d <- NULL} | 146 if(as.numeric(d) == -1) {d <- NULL} |
| 72 # check if evaluation is required | 147 # check if evaluation is required |
| 84 close(con) | 159 close(con) |
| 85 genotype <- read.table(genotype, sep="\t", h=T) | 160 genotype <- read.table(genotype, sep="\t", h=T) |
| 86 # phenotype is written as a table (in columns) but it must be sent as a vector for mixed.solve | 161 # phenotype is written as a table (in columns) but it must be sent as a vector for mixed.solve |
| 87 phenotype <- read.table(phenotype, sep="\t", h=T)[,1] | 162 phenotype <- read.table(phenotype, sep="\t", h=T)[,1] |
| 88 # run ! | 163 # run ! |
| 89 svmSelection(genotype = data.frame(genotype, label=phenotype, check.names = F, stringsAsFactors = F), | 164 svmClassifier(genotype = genotype, phenotype = phenotype, |
| 90 evaluation = evaluation, outFile = out, folds = folds, g=g, c=c, coef=coef, d=d, kernel=kernel) | 165 evaluation = evaluation, outFile = out, folds = folds, g=g, c=c, coef=coef, d=d, kernel=kernel) |
| 166 # retunr path of the result file to galaxy | |
| 91 cat(paste(paste(out, ".rds", sep = ""), "\n", sep="")) | 167 cat(paste(paste(out, ".rds", sep = ""), "\n", sep="")) |
