#load the packages
library(caret)
## Warning: package 'caret' was built under R version 4.3.3
## Loading required package: ggplot2
## Loading required package: lattice
library(klaR)
## Warning: package 'klaR' was built under R version 4.3.3
## Loading required package: MASS
#load the iris dataset
data(iris)
#define 80%/20% train/test split of the dataset
trainIndex <- createDataPartition(iris$Species, p=0.80, list=FALSE)
dataTrain <- iris[ trainIndex, ]
dataTest <- iris[ -trainIndex, ]
#train a naive Bayes moddel
fit <- NaiveBayes(Species~., data=dataTrain)
#make predictions
predictions <- predict(fit, dataTest[,1:4])
#summarize results
confusionMatrix(predictions$class, dataTest$Species)
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 10 0 0
## versicolor 0 9 1
## virginica 0 1 9
##
## Overall Statistics
##
## Accuracy : 0.9333
## 95% CI : (0.7793, 0.9918)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : 8.747e-12
##
## Kappa : 0.9
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.9000 0.9000
## Specificity 1.0000 0.9500 0.9500
## Pos Pred Value 1.0000 0.9000 0.9000
## Neg Pred Value 1.0000 0.9500 0.9500
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3000 0.3000
## Detection Prevalence 0.3333 0.3333 0.3333
## Balanced Accuracy 1.0000 0.9250 0.9250
#define training control- bootstrap method
trainControl <- trainControl(method="boot", number=100)
#evaluate the model
fit <- train(Species~., data=iris, trControl=trainControl, method="nb")
#display the results
print(fit)
## Naive Bayes
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## No pre-processing
## Resampling: Bootstrapped (100 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE 0.9552417 0.9321643
## TRUE 0.9576018 0.9357006
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
#define training control- k-fold cv method
trainControl <- trainControl(method="cv", number=10)
#evaluate the model
fit <- train(Species~., data=iris, trControl=trainControl, method="nb")
#display the results
print(fit)
## Naive Bayes
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 135, 135, 135, 135, 135, 135, ...
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE 0.9533333 0.93
## TRUE 0.9600000 0.94
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
#define training control- repeated k-fold cv method
trainControl <- trainControl(method="repeatedcv", number=10, repeats=3)
#results in 30 folds total (10*3)
#evaluate the model
fit <- train(Species~., data=iris, trControl=trainControl, method="nb")
#display the results
print(fit)
## Naive Bayes
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 3 times)
## Summary of sample sizes: 135, 135, 135, 135, 135, 135, ...
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE 0.9533333 0.9300000
## TRUE 0.9555556 0.9333333
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
#define training control- Leave out one CV method
trainControl <- trainControl(method="LOOCV")
#evaluate the model
fit <- train(Species~., data=iris, trControl=trainControl, method="nb")
#display the results
print(fit)
## Naive Bayes
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## No pre-processing
## Resampling: Leave-One-Out Cross-Validation
## Summary of sample sizes: 149, 149, 149, 149, 149, 149, ...
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE 0.9533333 0.93
## TRUE 0.9600000 0.94
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.