# HG changeset patch # User testtool # Date 1507904108 14400 # Node ID 4494c973f643427ef373747669dcfe6f965b24c6 # Parent a5a5716e0317752648c23730a4a7f63a9a90b948 Deleted selected files diff -r a5a5716e0317 -r 4494c973f643 accuracy.R --- a/accuracy.R Fri Oct 13 10:14:29 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,48 +0,0 @@ -require(caret, quietly = TRUE) - -args <- commandArgs(trailingOnly = TRUE) - -input = args[1] -p = args[2] -output1 = args[3] -output2 = args[4] - -dataset <- read.csv(input, header=TRUE) - -validation_index <- createDataPartition(dataset$Species, p=p, list=FALSE) - -validation <- dataset[-validation_index,] - -validdataset <- dataset[validation_index,] - -percentage <- prop.table(table(validdataset$Species)) * 100 -cbind(freq=table(validdataset$Species), percentage=percentage) - -output_summary <- summary(validdataset) -write.csv(output_summary,output1) - -control <- trainControl(method="cv", number=10) -metric <- "Accuracy" - -# a) linear algorithms -set.seed(7) -fit.lda <- train(Species~., data=validdataset, method="lda", metric=metric, trControl=control) -# b) nonlinear algorithms -# CART -set.seed(7) -fit.cart <- train(Species~., data=validdataset, method="rpart", metric=metric, trControl=control) -# kNN -set.seed(7) -fit.knn <- train(Species~., data=validdataset, method="knn", metric=metric, trControl=control) -# c) advanced algorithms -# SVM -set.seed(7) -fit.svm <- train(Species~., data=validdataset, method="svmRadial", metric=metric, trControl=control) -# Random Forest -set.seed(7) -fit.rf <- train(Species~., data=validdataset, method="rf", metric=metric, trControl=control) - -results <- resamples(list(lda=fit.lda, cart=fit.cart, knn=fit.knn, svm=fit.svm, rf=fit.rf)) -output_results <- summary(results) - -write.csv(as.matrix(output_results),output2) diff -r a5a5716e0317 -r 4494c973f643 accuracy.xml --- a/accuracy.xml Fri Oct 13 10:14:29 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,56 +0,0 @@ - - model creation and accuracy estimation - - r-caret - - - Rscript '$__tool_directory__/accuracy.R' '$input' '$p' '$output1' '$output2' - - - - - - - - - - - - - - - - - - - - - - - -Tool allow us to build 5 different models to predict e.g. species from flower measurements. -In the end we can select the best model for further analysis. - -Let’s evaluate 5 different algorithms: - -**Linear Discriminant Analysis (LDA)** -**Classification and Regression Trees (CART).** -**k-Nearest Neighbors (kNN).** -**Support Vector Machines (SVM) with a linear kernel.** -**Random Forest (RF)** - -This is a good mixture of simple linear (LDA), nonlinear (CART, kNN) and complex nonlinear methods (SVM, RF). -We reset the random number seed before reach run to ensure that the evaluation of each algorithm is performed -using exactly the same data splits. It ensures the results are directly comparable. - - - - https://CRAN.R-project.org/package=caret - -