Mercurial > repos > testtool > get_geo
comparison accuracy.R @ 39:5447fc8946ed draft default tip
Uploaded
| author | testtool |
|---|---|
| date | Fri, 13 Oct 2017 11:22:49 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 38:608ab56a90d0 | 39:5447fc8946ed |
|---|---|
| 1 require(caret, quietly = TRUE) | |
| 2 | |
| 3 args <- commandArgs(trailingOnly = TRUE) | |
| 4 | |
| 5 input = args[1] | |
| 6 p = args[2] | |
| 7 output1 = args[3] | |
| 8 output2 = args[4] | |
| 9 | |
| 10 dataset <- read.csv(input, header=TRUE) | |
| 11 | |
| 12 validation_index <- createDataPartition(dataset$Species, p=p, list=FALSE) | |
| 13 | |
| 14 validation <- dataset[-validation_index,] | |
| 15 | |
| 16 validdataset <- dataset[validation_index,] | |
| 17 | |
| 18 percentage <- prop.table(table(validdataset$Species)) * 100 | |
| 19 cbind(freq=table(validdataset$Species), percentage=percentage) | |
| 20 | |
| 21 output_summary <- summary(validdataset) | |
| 22 write.csv(output_summary,output1) | |
| 23 | |
| 24 control <- trainControl(method="cv", number=10) | |
| 25 metric <- "Accuracy" | |
| 26 | |
| 27 # a) linear algorithms | |
| 28 set.seed(7) | |
| 29 fit.lda <- train(Species~., data=validdataset, method="lda", metric=metric, trControl=control) | |
| 30 # b) nonlinear algorithms | |
| 31 # CART | |
| 32 set.seed(7) | |
| 33 fit.cart <- train(Species~., data=validdataset, method="rpart", metric=metric, trControl=control) | |
| 34 # kNN | |
| 35 set.seed(7) | |
| 36 fit.knn <- train(Species~., data=validdataset, method="knn", metric=metric, trControl=control) | |
| 37 # c) advanced algorithms | |
| 38 # SVM | |
| 39 set.seed(7) | |
| 40 fit.svm <- train(Species~., data=validdataset, method="svmRadial", metric=metric, trControl=control) | |
| 41 # Random Forest | |
| 42 set.seed(7) | |
| 43 fit.rf <- train(Species~., data=validdataset, method="rf", metric=metric, trControl=control) | |
| 44 | |
| 45 results <- resamples(list(lda=fit.lda, cart=fit.cart, knn=fit.knn, svm=fit.svm, rf=fit.rf)) | |
| 46 output_results <- summary(results) | |
| 47 | |
| 48 write.csv(as.matrix(output_results),output2) |
