Mercurial > repos > nicolas > oghma
comparison svm.R @ 44:8cdeaa91ebc3 draft
Uploaded
author | nicolas |
---|---|
date | Tue, 25 Oct 2016 14:43:31 -0400 |
parents | f9d2d5058395 |
children |
comparison
equal
deleted
inserted
replaced
43:e80b87a35c61 | 44:8cdeaa91ebc3 |
---|---|
4 # last modification : 03/07/16 | 4 # last modification : 03/07/16 |
5 # author : Dr Nicolas Beaume | 5 # author : Dr Nicolas Beaume |
6 # owner : IRRI | 6 # owner : IRRI |
7 # | 7 # |
8 ######################################################## | 8 ######################################################## |
9 log <- file(paste(getwd(), "log_SVM.txt", sep="/"), open = "wt") | |
10 sink(file = log, type="message") | |
11 library("e1071") | 9 library("e1071") |
10 options(warn=-1) | |
12 ############################ helper functions ####################### | 11 ############################ helper functions ####################### |
13 svmModel <- function(train, target, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) { | 12 # optimize svm parameters |
13 optimizeSVM <- function(train, target, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) { | |
14 # tuning parameters then train | 14 # tuning parameters then train |
15 model <- NULL | 15 model <- NULL |
16 if(is.null(g)){g <- 10^(-6:0)} | 16 if(is.null(g)){g <- 10^(-6:0)} |
17 if(is.null(c)){c <- 10^(0:2)} | 17 if(is.null(c)){c <- 10^(-1:0)} |
18 # optimize parameter for the kernel in use | |
18 switch(kernel, | 19 switch(kernel, |
20 # sigmoid kernel : need gamma, cost and coef | |
19 sigmoid={ | 21 sigmoid={ |
20 tune <- tune.svm(train, target, gamma = , cost = 10^(0:2), kernel="sigmoid"); | 22 if(is.null(coef)){coef <- 0:3}; |
23 # optimize then extract best parameters | |
24 tune <- tune.svm(train, target, gamma = g, cost = 10^(0:2), kernel="sigmoid", coef0 = coef); | |
21 g <- tune$best.parameters[[1]]; | 25 g <- tune$best.parameters[[1]]; |
22 c <- tune$best.parameters[[2]]; | 26 c <- tune$best.parameters[[3]]; |
27 coef <- tune$best.parameters[[2]]; | |
28 # compute model | |
23 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "sigmoid")}, | 29 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "sigmoid")}, |
30 # linear kernel, only cost is required | |
24 linear={ | 31 linear={ |
32 # optimize then extract best parameters | |
25 tune <- tune.svm(train, target, cost = c, kernel="linear"); | 33 tune <- tune.svm(train, target, cost = c, kernel="linear"); |
26 c <- tune$best.parameters[[2]]; | 34 c <- tune$best.parameters[[1]]; |
35 # compute model | |
27 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "linear")}, | 36 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "linear")}, |
37 # polynomial kernel, use degree, gamma, cost and coef as parameters | |
28 polynomial={ | 38 polynomial={ |
39 # optimize and extract best parameters | |
29 if(is.null(coef)){coef <- 0:3}; | 40 if(is.null(coef)){coef <- 0:3}; |
30 if(is.null(d)){d <- 0:4}; | 41 if(is.null(d)){d <- 0:4}; |
31 tune <- tune.svm(train, target, gamma = g, cost = c, degree = d, coef0 = coef, kernel="polynomial"); | 42 tune <- tune.svm(train, target, gamma = g, cost = c, degree = d, coef0 = coef, kernel="polynomial"); |
32 d <- tune$best.parameters[[1]]; | 43 d <- tune$best.parameters[[1]]; |
33 g <- tune$best.parameters[[2]]; | 44 g <- tune$best.parameters[[2]]; |
34 coef <- tune$best.parameters[[3]]; | 45 coef <- tune$best.parameters[[3]]; |
35 c <- tune$best.parameters[[4]]; | 46 c <- tune$best.parameters[[4]]; |
47 # compute model | |
36 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "polynomial", degree = d, coef0 = coef)}, | 48 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "polynomial", degree = d, coef0 = coef)}, |
49 # default : radial kernel, use gamma and cost as parameters | |
37 { | 50 { |
51 # optimize and extract parameters | |
38 tune <- tune.svm(train, target, gamma = g, cost = c, kernel="radial"); | 52 tune <- tune.svm(train, target, gamma = g, cost = c, kernel="radial"); |
39 g <- tune$best.parameters[[1]]; | 53 g <- tune$best.parameters[[1]]; |
40 c <- tune$best.parameters[[2]]; | 54 c <- tune$best.parameters[[2]]; |
55 # compute model | |
41 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "radial")} | 56 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "radial")} |
42 ) | 57 ) |
43 return(model) | 58 return(model) |
44 } | 59 } |
45 ################################## main function ########################### | 60 ################################## main function ########################### |
46 svmSelection <- function(genotype, evaluation = T, outFile, folds, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) { | 61 svmClassifier <- function(genotype, phenotype, evaluation = T, outFile, folds, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) { |
47 # build model | 62 # optimize classifier if any parameter is NULL |
48 labelIndex <- match("label", colnames(genotype)) | 63 switch(kernel, |
64 # sigmoid kernel : need gamma, cost and coef | |
65 sigmoid={ | |
66 if(any(is.null(c(coef, c, g)))){ | |
67 fit <- optimizeSVM(genotype, phenotype, kernel = "sigmoid", | |
68 g = g, c=c, coef = coef); | |
69 c <- fit$cost; | |
70 g <- fit$gamma; | |
71 coef <- fit$coef0; | |
72 } | |
73 }, | |
74 # linear kernel, only cost is required | |
75 linear={ | |
76 if(is.null(c)){fit <- optimizeSVM(genotype, phenotype, kernel = "linear", c=c); | |
77 c <- fit$cost; | |
78 } | |
79 }, | |
80 # polynomial kernel, use degree, gamma, cost and coef as parameters | |
81 polynomial={ | |
82 if(any(is.null(c(coef, c, g, d)))){fit <- optimizeSVM(genotype, phenotype, kernel = "polynomial", | |
83 g = g, c=c, coef = coef, d = d); | |
84 c <- fit$cost; | |
85 g <- fit$gamma; | |
86 coef <- fit$coef0; | |
87 d <- fit$degree | |
88 } | |
89 }, | |
90 # default : radial kernel, use gamma and cost as parameters | |
91 {if(any(is.null(c(c, g)))){fit <- optimizeSVM(genotype, phenotype, kernel = "radial", | |
92 g = g, c=c); | |
93 c <- fit$cost; | |
94 g <- fit$gamma; | |
95 } | |
96 } | |
97 ) | |
98 # do evaluation | |
49 if(evaluation) { | 99 if(evaluation) { |
50 prediction <- NULL | 100 prediction <- NULL |
101 # iterate over folds | |
51 for(i in 1:length(folds)) { | 102 for(i in 1:length(folds)) { |
103 # prepare indexes for training and test | |
52 test <- folds[[i]] | 104 test <- folds[[i]] |
53 train <- unlist(folds[-i]) | 105 train <- unlist(folds[-i]) |
54 svm.fit <- svmModel(train = genotype[train,-labelIndex], target = genotype[train,labelIndex], kernel=kernel, g=g, c=c, coef=coef, d=d) | 106 # compute model |
55 prediction <- c(prediction, list(predict(svm.fit, genotype[test,-labelIndex]))) | 107 svm.fit <- optimizeSVM(train = genotype[train,], target = phenotype[train], kernel=kernel, |
108 g=g, c=c, coef=coef, d=d) | |
109 # predict on test set of the current fold | |
110 prediction <- c(prediction, list(predict(svm.fit, genotype[test,]))) | |
56 } | 111 } |
112 # save all prediction for further evaluation | |
57 saveRDS(prediction, file=paste(outFile, ".rds", sep = "")) | 113 saveRDS(prediction, file=paste(outFile, ".rds", sep = "")) |
58 } else { | 114 } else { |
59 model <- svmModel(train = genotype[,-labelIndex], target = genotype[,labelIndex], kernel=kernel, g=g, c=c, coef=coef, d=d) | 115 # compute model and save it |
116 switch(kernel, | |
117 # sigmoid kernel : need gamma, cost and coef | |
118 sigmoid={ | |
119 model <- svm(x = genotype, y = phenotype, kernel="sigmoid", gamma =g, | |
120 cost =c, coef0=coef) | |
121 }, | |
122 # linear kernel, only cost is required | |
123 linear={ | |
124 model <- svm(x = genotype, y = phenotype, kernel="linear", cost =c) | |
125 }, | |
126 # polynomial kernel, use degree, gamma, cost and coef as parameters | |
127 polynomial={ | |
128 model <- svm(x = genotype, y = phenotype, kernel="polynomial", gamma =g, cost =c, | |
129 coef0=coef, degree =d) | |
130 }, | |
131 # default : radial kernel, use gamma and cost as parameters | |
132 { model <- svm(x = genotype, y = phenotype, kernel="radial", gamma =g, cost =c) | |
133 }) | |
60 saveRDS(model, file=paste(outFile, ".rds", sep = "")) | 134 saveRDS(model, file=paste(outFile, ".rds", sep = "")) |
61 } | 135 } |
62 } | 136 } |
63 | 137 |
64 ############################ main ############################# | 138 ############################ main ############################# |
65 | 139 # load argument |
66 cmd <- commandArgs(T) | 140 cmd <- commandArgs(T) |
67 source(cmd[1]) | 141 source(cmd[1]) |
142 # check for svm paramater, especially to know if optimization is requiered | |
68 if(as.numeric(g) == -1) {g <- NULL} | 143 if(as.numeric(g) == -1) {g <- NULL} |
69 if(as.numeric(c) == -1) {c <- NULL} | 144 if(as.numeric(c) == -1) {c <- NULL} |
70 if(as.numeric(coef) == -1) {coef <- NULL} | 145 if(as.numeric(coef) == -1) {coef <- NULL} |
71 if(as.numeric(d) == -1) {d <- NULL} | 146 if(as.numeric(d) == -1) {d <- NULL} |
72 # check if evaluation is required | 147 # check if evaluation is required |
84 close(con) | 159 close(con) |
85 genotype <- read.table(genotype, sep="\t", h=T) | 160 genotype <- read.table(genotype, sep="\t", h=T) |
86 # phenotype is written as a table (in columns) but it must be sent as a vector for mixed.solve | 161 # phenotype is written as a table (in columns) but it must be sent as a vector for mixed.solve |
87 phenotype <- read.table(phenotype, sep="\t", h=T)[,1] | 162 phenotype <- read.table(phenotype, sep="\t", h=T)[,1] |
88 # run ! | 163 # run ! |
89 svmSelection(genotype = data.frame(genotype, label=phenotype, check.names = F, stringsAsFactors = F), | 164 svmClassifier(genotype = genotype, phenotype = phenotype, |
90 evaluation = evaluation, outFile = out, folds = folds, g=g, c=c, coef=coef, d=d, kernel=kernel) | 165 evaluation = evaluation, outFile = out, folds = folds, g=g, c=c, coef=coef, d=d, kernel=kernel) |
166 # retunr path of the result file to galaxy | |
91 cat(paste(paste(out, ".rds", sep = ""), "\n", sep="")) | 167 cat(paste(paste(out, ".rds", sep = ""), "\n", sep="")) |