Mercurial > repos > nicolas > oghma
comparison svm.R @ 89:c2efdf0c23a1 draft
Uploaded
author | nicolas |
---|---|
date | Fri, 28 Oct 2016 08:49:43 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
88:aef3240b58ac | 89:c2efdf0c23a1 |
---|---|
1 ######################################################## | |
2 # | |
3 # creation date : 07/01/16 | |
4 # last modification : 03/07/16 | |
5 # author : Dr Nicolas Beaume | |
6 # owner : IRRI | |
7 # | |
8 ######################################################## | |
9 library("e1071") | |
10 options(warn=-1) | |
11 ############################ helper functions ####################### | |
12 # optimize svm parameters | |
13 optimizeSVM <- function(train, target, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) { | |
14 # tuning parameters then train | |
15 model <- NULL | |
16 if(is.null(g)){g <- 10^(-6:0)} | |
17 if(is.null(c)){c <- 10^(-1:0)} | |
18 # optimize parameter for the kernel in use | |
19 switch(kernel, | |
20 # sigmoid kernel : need gamma, cost and coef | |
21 sigmoid={ | |
22 if(is.null(coef)){coef <- 0:3}; | |
23 # optimize then extract best parameters | |
24 tune <- tune.svm(train, target, gamma = g, cost = 10^(0:2), kernel="sigmoid", coef0 = coef); | |
25 g <- tune$best.parameters[[1]]; | |
26 c <- tune$best.parameters[[3]]; | |
27 coef <- tune$best.parameters[[2]]; | |
28 # compute model | |
29 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "sigmoid")}, | |
30 # linear kernel, only cost is required | |
31 linear={ | |
32 # optimize then extract best parameters | |
33 tune <- tune.svm(train, target, cost = c, kernel="linear"); | |
34 c <- tune$best.parameters[[1]]; | |
35 # compute model | |
36 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "linear")}, | |
37 # polynomial kernel, use degree, gamma, cost and coef as parameters | |
38 polynomial={ | |
39 # optimize and extract best parameters | |
40 if(is.null(coef)){coef <- 0:3}; | |
41 if(is.null(d)){d <- 0:4}; | |
42 tune <- tune.svm(train, target, gamma = g, cost = c, degree = d, coef0 = coef, kernel="polynomial"); | |
43 d <- tune$best.parameters[[1]]; | |
44 g <- tune$best.parameters[[2]]; | |
45 coef <- tune$best.parameters[[3]]; | |
46 c <- tune$best.parameters[[4]]; | |
47 # compute model | |
48 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "polynomial", degree = d, coef0 = coef)}, | |
49 # default : radial kernel, use gamma and cost as parameters | |
50 { | |
51 # optimize and extract parameters | |
52 tune <- tune.svm(train, target, gamma = g, cost = c, kernel="radial"); | |
53 g <- tune$best.parameters[[1]]; | |
54 c <- tune$best.parameters[[2]]; | |
55 # compute model | |
56 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "radial")} | |
57 ) | |
58 return(model) | |
59 } | |
60 ################################## main function ########################### | |
61 svmClassifier <- function(genotype, phenotype, evaluation = T, outFile, folds, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) { | |
62 # optimize classifier if any parameter is NULL | |
63 switch(kernel, | |
64 # sigmoid kernel : need gamma, cost and coef | |
65 sigmoid={ | |
66 if(any(is.null(c(coef, c, g)))){ | |
67 fit <- optimizeSVM(genotype, phenotype, kernel = "sigmoid", | |
68 g = g, c=c, coef = coef); | |
69 c <- fit$cost; | |
70 g <- fit$gamma; | |
71 coef <- fit$coef0; | |
72 } | |
73 }, | |
74 # linear kernel, only cost is required | |
75 linear={ | |
76 if(is.null(c)){fit <- optimizeSVM(genotype, phenotype, kernel = "linear", c=c); | |
77 c <- fit$cost; | |
78 } | |
79 }, | |
80 # polynomial kernel, use degree, gamma, cost and coef as parameters | |
81 polynomial={ | |
82 if(any(is.null(c(coef, c, g, d)))){fit <- optimizeSVM(genotype, phenotype, kernel = "polynomial", | |
83 g = g, c=c, coef = coef, d = d); | |
84 c <- fit$cost; | |
85 g <- fit$gamma; | |
86 coef <- fit$coef0; | |
87 d <- fit$degree | |
88 } | |
89 }, | |
90 # default : radial kernel, use gamma and cost as parameters | |
91 {if(any(is.null(c(c, g)))){fit <- optimizeSVM(genotype, phenotype, kernel = "radial", | |
92 g = g, c=c); | |
93 c <- fit$cost; | |
94 g <- fit$gamma; | |
95 } | |
96 } | |
97 ) | |
98 # do evaluation | |
99 if(evaluation) { | |
100 prediction <- NULL | |
101 # iterate over folds | |
102 for(i in 1:length(folds)) { | |
103 # prepare indexes for training and test | |
104 test <- folds[[i]] | |
105 train <- unlist(folds[-i]) | |
106 # compute model | |
107 svm.fit <- optimizeSVM(train = genotype[train,], target = phenotype[train], kernel=kernel, | |
108 g=g, c=c, coef=coef, d=d) | |
109 # predict on test set of the current fold | |
110 prediction <- c(prediction, list(predict(svm.fit, genotype[test,]))) | |
111 } | |
112 # save all prediction for further evaluation | |
113 saveRDS(prediction, file=paste(outFile, ".rds", sep = "")) | |
114 } else { | |
115 # compute model and save it | |
116 switch(kernel, | |
117 # sigmoid kernel : need gamma, cost and coef | |
118 sigmoid={ | |
119 model <- svm(x = genotype, y = phenotype, kernel="sigmoid", gamma =g, | |
120 cost =c, coef0=coef) | |
121 }, | |
122 # linear kernel, only cost is required | |
123 linear={ | |
124 model <- svm(x = genotype, y = phenotype, kernel="linear", cost =c) | |
125 }, | |
126 # polynomial kernel, use degree, gamma, cost and coef as parameters | |
127 polynomial={ | |
128 model <- svm(x = genotype, y = phenotype, kernel="polynomial", gamma =g, cost =c, | |
129 coef0=coef, degree =d) | |
130 }, | |
131 # default : radial kernel, use gamma and cost as parameters | |
132 { model <- svm(x = genotype, y = phenotype, kernel="radial", gamma =g, cost =c) | |
133 }) | |
134 saveRDS(model, file=paste(outFile, ".rds", sep = "")) | |
135 } | |
136 } | |
137 | |
138 ############################ main ############################# | |
139 # load argument | |
140 cmd <- commandArgs(T) | |
141 source(cmd[1]) | |
142 # check for svm paramater, especially to know if optimization is requiered | |
143 if(as.numeric(g) == -1) {g <- NULL} | |
144 if(as.numeric(c) == -1) {c <- NULL} | |
145 if(as.numeric(coef) == -1) {coef <- NULL} | |
146 if(as.numeric(d) == -1) {d <- NULL} | |
147 # check if evaluation is required | |
148 evaluation <- F | |
149 if(as.integer(doEvaluation) == 1) { | |
150 evaluation <- T | |
151 con = file(folds) | |
152 folds <- readLines(con = con, n = 1, ok=T) | |
153 close(con) | |
154 folds <- readRDS(folds) | |
155 } | |
156 # load genotype and phenotype | |
157 con = file(genotype) | |
158 genotype <- readLines(con = con, n = 1, ok=T) | |
159 close(con) | |
160 genotype <- read.table(genotype, sep="\t", h=T) | |
161 # phenotype is written as a table (in columns) but it must be sent as a vector for mixed.solve | |
162 phenotype <- read.table(phenotype, sep="\t", h=T)[,1] | |
163 # run ! | |
164 svmClassifier(genotype = genotype, phenotype = phenotype, | |
165 evaluation = evaluation, outFile = out, folds = folds, g=g, c=c, coef=coef, d=d, kernel=kernel) | |
166 # retunr path of the result file to galaxy | |
167 cat(paste(paste(out, ".rds", sep = ""), "\n", sep="")) |