## load packages
library (caret)
## Loading required package: ggplot2
## Loading required package: lattice
library (klaR)
## Warning: package 'klaR' was built under R version 4.3.3
## Loading required package: MASS
#load Iris Dataset
data(iris)
set.seed(1234)
#defin an 80%/20% train/test split of the dataset
trainIndex <- createDataPartition(iris$Species, p=0.80, list=FALSE)
dataTrain <- iris[trainIndex,]
dataTest <- iris[-trainIndex,]
#Train Naive Bayes Model
fit <- NaiveBayes(Species ~ . , data=dataTrain)
#Make predictions
predictions <- predict(fit, dataTest[, 1:4])
#Summarize results
confusionMatrix(predictions$class, dataTest$Species)
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 10 0 0
## versicolor 0 9 2
## virginica 0 1 8
##
## Overall Statistics
##
## Accuracy : 0.9
## 95% CI : (0.7347, 0.9789)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : 1.665e-10
##
## Kappa : 0.85
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.9000 0.8000
## Specificity 1.0000 0.9000 0.9500
## Pos Pred Value 1.0000 0.8182 0.8889
## Neg Pred Value 1.0000 0.9474 0.9048
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3000 0.2667
## Detection Prevalence 0.3333 0.3667 0.3000
## Balanced Accuracy 1.0000 0.9000 0.8750
####################################Boostrapping #############33
library(caret)
#LOAD IRIS dataset
data(iris)
#define trainining control
tfControl <- trainControl(method="boot", number=100)
#trainControl
fit <- train (Species ~ . , data=iris , trControl=tfControl , method="nb")
print(fit)
## Naive Bayes
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## No pre-processing
## Resampling: Bootstrapped (100 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE 0.9489081 0.9227193
## TRUE 0.9497630 0.9240659
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
################ Cross Validation, CV #######################3
library(caret)
#LOAD IRIS dataset
data(iris)
#define trainining control
tfControl <- trainControl(method="cv", number=10)
#trainControl
fit <- train (Species ~ . , data=iris , trControl=tfControl , method="nb")
print(fit)
## Naive Bayes
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 135, 135, 135, 135, 135, 135, ...
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE 0.9533333 0.93
## TRUE 0.9600000 0.94
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
################ Repeated Cross Validation, CV #######################3
library(caret)
#LOAD IRIS dataset
data(iris)
#define trainining control
tfControl <- trainControl(method="repeatedcv", number=10, repeats=3)
#trainControl
fit <- train (Species ~ . , data=iris , trControl=tfControl , method="nb")
print(fit)
## Naive Bayes
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 3 times)
## Summary of sample sizes: 135, 135, 135, 135, 135, 135, ...
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE 0.9533333 0.93
## TRUE 0.9600000 0.94
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
################ LOOCV #######################3
library(caret)
#LOAD IRIS dataset
data(iris)
#define trainining control
tfControl <- trainControl(method="LOOCV")
#trainControl
fit <- train (Species ~ . , data=iris , trControl=tfControl , method="nb")
print(fit)
## Naive Bayes
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## No pre-processing
## Resampling: Leave-One-Out Cross-Validation
## Summary of sample sizes: 149, 149, 149, 149, 149, 149, ...
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE 0.9533333 0.93
## TRUE 0.9600000 0.94
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.