# HG changeset patch # User testtool # Date 1507904069 14400 # Node ID a5a5716e0317752648c23730a4a7f63a9a90b948 # Parent 6169ba9ed42abf76779aa3c544c10bd9c173a7ec Uploaded diff -r 6169ba9ed42a -r a5a5716e0317 accuracy.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/accuracy.R Fri Oct 13 10:14:29 2017 -0400 @@ -0,0 +1,48 @@ +require(caret, quietly = TRUE) + +args <- commandArgs(trailingOnly = TRUE) + +input = args[1] +p = args[2] +output1 = args[3] +output2 = args[4] + +dataset <- read.csv(input, header=TRUE) + +validation_index <- createDataPartition(dataset$Species, p=p, list=FALSE) + +validation <- dataset[-validation_index,] + +validdataset <- dataset[validation_index,] + +percentage <- prop.table(table(validdataset$Species)) * 100 +cbind(freq=table(validdataset$Species), percentage=percentage) + +output_summary <- summary(validdataset) +write.csv(output_summary,output1) + +control <- trainControl(method="cv", number=10) +metric <- "Accuracy" + +# a) linear algorithms +set.seed(7) +fit.lda <- train(Species~., data=validdataset, method="lda", metric=metric, trControl=control) +# b) nonlinear algorithms +# CART +set.seed(7) +fit.cart <- train(Species~., data=validdataset, method="rpart", metric=metric, trControl=control) +# kNN +set.seed(7) +fit.knn <- train(Species~., data=validdataset, method="knn", metric=metric, trControl=control) +# c) advanced algorithms +# SVM +set.seed(7) +fit.svm <- train(Species~., data=validdataset, method="svmRadial", metric=metric, trControl=control) +# Random Forest +set.seed(7) +fit.rf <- train(Species~., data=validdataset, method="rf", metric=metric, trControl=control) + +results <- resamples(list(lda=fit.lda, cart=fit.cart, knn=fit.knn, svm=fit.svm, rf=fit.rf)) +output_results <- summary(results) + +write.csv(as.matrix(output_results),output2)