library(mlbench)
## Warning: package 'mlbench' was built under R version 4.3.3
library(caret)
## Warning: package 'caret' was built under R version 4.3.3
## Loading required package: ggplot2
## Loading required package: lattice
data("PimaIndiansDiabetes")
#prepare training scheme
trainControl <- trainControl(method="repeatedcv", number=10, repeats=3)
#CART
set.seed(7)
fit.cart <- train(diabetes~., data=PimaIndiansDiabetes, method="rpart", trControl=trainControl)
#LDA
set.seed(7)
fit.lda <- train(diabetes~., data=PimaIndiansDiabetes, method="lda", trControl=trainControl)
#SVM
set.seed(7)
fit.svm <- train(diabetes~., data=PimaIndiansDiabetes, method="svmRadial", trControl=trainControl)
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.3.3
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
#Random Forest
set.seed(7)
fit.rf <- train(diabetes~., data=PimaIndiansDiabetes, method="rf", trControl=trainControl)
#KNN
set.seed(7)
fit.knn <- train(diabetes~., data=PimaIndiansDiabetes, method="knn", trControl=trainControl)
#collect resamples
results <- resamples(list(CART=fit.cart, LDA=fit.lda, SVM=fit.svm, RF=fit.rf, KNN=fit.knn))
summary(results)
##
## Call:
## summary.resamples(object = results)
##
## Models: CART, LDA, SVM, RF, KNN
## Number of resamples: 30
##
## Accuracy
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## CART 0.6753247 0.7272727 0.7532468 0.7469697 0.7662338 0.7922078 0
## LDA 0.7142857 0.7508117 0.7662338 0.7791069 0.8000256 0.9078947 0
## SVM 0.7236842 0.7508117 0.7631579 0.7712919 0.7915243 0.8947368 0
## RF 0.6842105 0.7305195 0.7597403 0.7638528 0.8019481 0.8421053 0
## KNN 0.6753247 0.7036056 0.7272727 0.7369503 0.7662338 0.8311688 0
##
## Kappa
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## CART 0.2762566 0.3620724 0.4241878 0.4151867 0.4861107 0.5250000 0
## LDA 0.3011551 0.4192537 0.4662541 0.4862025 0.5308596 0.7812500 0
## SVM 0.3391908 0.3997116 0.4460612 0.4621585 0.5234605 0.7475083 0
## RF 0.2951613 0.3778304 0.4640696 0.4630809 0.5447483 0.6426332 0
## KNN 0.2553191 0.3406000 0.3841761 0.3984995 0.4539789 0.6195363 0
table(PimaIndiansDiabetes$diabetes)
##
## neg pos
## 500 268
prop.table(table(PimaIndiansDiabetes$diabetes))
##
## neg pos
## 0.6510417 0.3489583
scales <- list(x = list(relation="free"), y=list(relation="free"))
bwplot(results, scales=scales)

densityplot(results, scales=scales, pch="l")

densityplot(results, scales=scales, pch="l", xlab("Method") + ylab("Density"))

dotplot(results, scales=scales)

splom(results)

#difference in results
diffs <- diff(results)
#summarize p-values for pairwise comparisons
summary(diffs)
##
## Call:
## summary.diff.resamples(object = diffs)
##
## p-value adjustment: bonferroni
## Upper diagonal: estimates of the difference
## Lower diagonal: p-value for H0: difference = 0
##
## Accuracy
## CART LDA SVM RF KNN
## CART -0.032137 -0.024322 -0.016883 0.010019
## LDA 0.0011862 0.007815 0.015254 0.042157
## SVM 0.0116401 0.9156892 0.007439 0.034342
## RF 0.2727542 0.4490617 1.0000000 0.026902
## KNN 1.0000000 6.68e-05 0.0002941 0.0183793
##
## Kappa
## CART LDA SVM RF KNN
## CART -0.0710158 -0.0469717 -0.0478942 0.0166872
## LDA 0.0008086 0.0240440 0.0231215 0.0877029
## SVM 0.0258079 0.3562734 -0.0009225 0.0636589
## RF 0.0211763 1.0000000 1.0000000 0.0645814
## KNN 1.0000000 0.0003858 0.0040823 0.0158974