comparison svm.R @ 44:8cdeaa91ebc3 draft

Uploaded
author nicolas
date Tue, 25 Oct 2016 14:43:31 -0400
parents f9d2d5058395
children
comparison
equal deleted inserted replaced
43:e80b87a35c61 44:8cdeaa91ebc3
4 # last modification : 03/07/16 4 # last modification : 03/07/16
5 # author : Dr Nicolas Beaume 5 # author : Dr Nicolas Beaume
6 # owner : IRRI 6 # owner : IRRI
7 # 7 #
8 ######################################################## 8 ########################################################
9 log <- file(paste(getwd(), "log_SVM.txt", sep="/"), open = "wt")
10 sink(file = log, type="message")
11 library("e1071") 9 library("e1071")
10 options(warn=-1)
12 ############################ helper functions ####################### 11 ############################ helper functions #######################
13 svmModel <- function(train, target, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) { 12 # optimize svm parameters
13 optimizeSVM <- function(train, target, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) {
14 # tuning parameters then train 14 # tuning parameters then train
15 model <- NULL 15 model <- NULL
16 if(is.null(g)){g <- 10^(-6:0)} 16 if(is.null(g)){g <- 10^(-6:0)}
17 if(is.null(c)){c <- 10^(0:2)} 17 if(is.null(c)){c <- 10^(-1:0)}
18 # optimize parameter for the kernel in use
18 switch(kernel, 19 switch(kernel,
20 # sigmoid kernel : need gamma, cost and coef
19 sigmoid={ 21 sigmoid={
20 tune <- tune.svm(train, target, gamma = , cost = 10^(0:2), kernel="sigmoid"); 22 if(is.null(coef)){coef <- 0:3};
23 # optimize then extract best parameters
24 tune <- tune.svm(train, target, gamma = g, cost = 10^(0:2), kernel="sigmoid", coef0 = coef);
21 g <- tune$best.parameters[[1]]; 25 g <- tune$best.parameters[[1]];
22 c <- tune$best.parameters[[2]]; 26 c <- tune$best.parameters[[3]];
27 coef <- tune$best.parameters[[2]];
28 # compute model
23 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "sigmoid")}, 29 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "sigmoid")},
30 # linear kernel, only cost is required
24 linear={ 31 linear={
32 # optimize then extract best parameters
25 tune <- tune.svm(train, target, cost = c, kernel="linear"); 33 tune <- tune.svm(train, target, cost = c, kernel="linear");
26 c <- tune$best.parameters[[2]]; 34 c <- tune$best.parameters[[1]];
35 # compute model
27 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "linear")}, 36 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "linear")},
37 # polynomial kernel, use degree, gamma, cost and coef as parameters
28 polynomial={ 38 polynomial={
39 # optimize and extract best parameters
29 if(is.null(coef)){coef <- 0:3}; 40 if(is.null(coef)){coef <- 0:3};
30 if(is.null(d)){d <- 0:4}; 41 if(is.null(d)){d <- 0:4};
31 tune <- tune.svm(train, target, gamma = g, cost = c, degree = d, coef0 = coef, kernel="polynomial"); 42 tune <- tune.svm(train, target, gamma = g, cost = c, degree = d, coef0 = coef, kernel="polynomial");
32 d <- tune$best.parameters[[1]]; 43 d <- tune$best.parameters[[1]];
33 g <- tune$best.parameters[[2]]; 44 g <- tune$best.parameters[[2]];
34 coef <- tune$best.parameters[[3]]; 45 coef <- tune$best.parameters[[3]];
35 c <- tune$best.parameters[[4]]; 46 c <- tune$best.parameters[[4]];
47 # compute model
36 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "polynomial", degree = d, coef0 = coef)}, 48 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "polynomial", degree = d, coef0 = coef)},
49 # default : radial kernel, use gamma and cost as parameters
37 { 50 {
51 # optimize and extract parameters
38 tune <- tune.svm(train, target, gamma = g, cost = c, kernel="radial"); 52 tune <- tune.svm(train, target, gamma = g, cost = c, kernel="radial");
39 g <- tune$best.parameters[[1]]; 53 g <- tune$best.parameters[[1]];
40 c <- tune$best.parameters[[2]]; 54 c <- tune$best.parameters[[2]];
55 # compute model
41 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "radial")} 56 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "radial")}
42 ) 57 )
43 return(model) 58 return(model)
44 } 59 }
45 ################################## main function ########################### 60 ################################## main function ###########################
46 svmSelection <- function(genotype, evaluation = T, outFile, folds, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) { 61 svmClassifier <- function(genotype, phenotype, evaluation = T, outFile, folds, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) {
47 # build model 62 # optimize classifier if any parameter is NULL
48 labelIndex <- match("label", colnames(genotype)) 63 switch(kernel,
64 # sigmoid kernel : need gamma, cost and coef
65 sigmoid={
66 if(any(is.null(c(coef, c, g)))){
67 fit <- optimizeSVM(genotype, phenotype, kernel = "sigmoid",
68 g = g, c=c, coef = coef);
69 c <- fit$cost;
70 g <- fit$gamma;
71 coef <- fit$coef0;
72 }
73 },
74 # linear kernel, only cost is required
75 linear={
76 if(is.null(c)){fit <- optimizeSVM(genotype, phenotype, kernel = "linear", c=c);
77 c <- fit$cost;
78 }
79 },
80 # polynomial kernel, use degree, gamma, cost and coef as parameters
81 polynomial={
82 if(any(is.null(c(coef, c, g, d)))){fit <- optimizeSVM(genotype, phenotype, kernel = "polynomial",
83 g = g, c=c, coef = coef, d = d);
84 c <- fit$cost;
85 g <- fit$gamma;
86 coef <- fit$coef0;
87 d <- fit$degree
88 }
89 },
90 # default : radial kernel, use gamma and cost as parameters
91 {if(any(is.null(c(c, g)))){fit <- optimizeSVM(genotype, phenotype, kernel = "radial",
92 g = g, c=c);
93 c <- fit$cost;
94 g <- fit$gamma;
95 }
96 }
97 )
98 # do evaluation
49 if(evaluation) { 99 if(evaluation) {
50 prediction <- NULL 100 prediction <- NULL
101 # iterate over folds
51 for(i in 1:length(folds)) { 102 for(i in 1:length(folds)) {
103 # prepare indexes for training and test
52 test <- folds[[i]] 104 test <- folds[[i]]
53 train <- unlist(folds[-i]) 105 train <- unlist(folds[-i])
54 svm.fit <- svmModel(train = genotype[train,-labelIndex], target = genotype[train,labelIndex], kernel=kernel, g=g, c=c, coef=coef, d=d) 106 # compute model
55 prediction <- c(prediction, list(predict(svm.fit, genotype[test,-labelIndex]))) 107 svm.fit <- optimizeSVM(train = genotype[train,], target = phenotype[train], kernel=kernel,
108 g=g, c=c, coef=coef, d=d)
109 # predict on test set of the current fold
110 prediction <- c(prediction, list(predict(svm.fit, genotype[test,])))
56 } 111 }
112 # save all prediction for further evaluation
57 saveRDS(prediction, file=paste(outFile, ".rds", sep = "")) 113 saveRDS(prediction, file=paste(outFile, ".rds", sep = ""))
58 } else { 114 } else {
59 model <- svmModel(train = genotype[,-labelIndex], target = genotype[,labelIndex], kernel=kernel, g=g, c=c, coef=coef, d=d) 115 # compute model and save it
116 switch(kernel,
117 # sigmoid kernel : need gamma, cost and coef
118 sigmoid={
119 model <- svm(x = genotype, y = phenotype, kernel="sigmoid", gamma =g,
120 cost =c, coef0=coef)
121 },
122 # linear kernel, only cost is required
123 linear={
124 model <- svm(x = genotype, y = phenotype, kernel="linear", cost =c)
125 },
126 # polynomial kernel, use degree, gamma, cost and coef as parameters
127 polynomial={
128 model <- svm(x = genotype, y = phenotype, kernel="polynomial", gamma =g, cost =c,
129 coef0=coef, degree =d)
130 },
131 # default : radial kernel, use gamma and cost as parameters
132 { model <- svm(x = genotype, y = phenotype, kernel="radial", gamma =g, cost =c)
133 })
60 saveRDS(model, file=paste(outFile, ".rds", sep = "")) 134 saveRDS(model, file=paste(outFile, ".rds", sep = ""))
61 } 135 }
62 } 136 }
63 137
64 ############################ main ############################# 138 ############################ main #############################
65 139 # load argument
66 cmd <- commandArgs(T) 140 cmd <- commandArgs(T)
67 source(cmd[1]) 141 source(cmd[1])
142 # check for svm paramater, especially to know if optimization is requiered
68 if(as.numeric(g) == -1) {g <- NULL} 143 if(as.numeric(g) == -1) {g <- NULL}
69 if(as.numeric(c) == -1) {c <- NULL} 144 if(as.numeric(c) == -1) {c <- NULL}
70 if(as.numeric(coef) == -1) {coef <- NULL} 145 if(as.numeric(coef) == -1) {coef <- NULL}
71 if(as.numeric(d) == -1) {d <- NULL} 146 if(as.numeric(d) == -1) {d <- NULL}
72 # check if evaluation is required 147 # check if evaluation is required
84 close(con) 159 close(con)
85 genotype <- read.table(genotype, sep="\t", h=T) 160 genotype <- read.table(genotype, sep="\t", h=T)
86 # phenotype is written as a table (in columns) but it must be sent as a vector for mixed.solve 161 # phenotype is written as a table (in columns) but it must be sent as a vector for mixed.solve
87 phenotype <- read.table(phenotype, sep="\t", h=T)[,1] 162 phenotype <- read.table(phenotype, sep="\t", h=T)[,1]
88 # run ! 163 # run !
89 svmSelection(genotype = data.frame(genotype, label=phenotype, check.names = F, stringsAsFactors = F), 164 svmClassifier(genotype = genotype, phenotype = phenotype,
90 evaluation = evaluation, outFile = out, folds = folds, g=g, c=c, coef=coef, d=d, kernel=kernel) 165 evaluation = evaluation, outFile = out, folds = folds, g=g, c=c, coef=coef, d=d, kernel=kernel)
166 # retunr path of the result file to galaxy
91 cat(paste(paste(out, ".rds", sep = ""), "\n", sep="")) 167 cat(paste(paste(out, ".rds", sep = ""), "\n", sep=""))