CW-11 23-04-2024

Including Plots

You can also embed plots, for example:

library(caret)

## Loading required package: ggplot2

## Loading required package: lattice

library(klaR)

## Warning: package 'klaR' was built under R version 4.3.3

## Loading required package: MASS

#load the iris data set
data(iris)

#DATA SPLIT

#1. Training and testing division
#define an 80%/20% train/test split of the data set
trainIndex<-createDataPartition(iris$Species,p=0.80,list=FALSE)
dataTrain<-iris[trainIndex,]
dataTest<-iris[-trainIndex,]
#train a Naive Bayes Model
fit<-NaiveBayes(Species~., data=dataTrain)
#make predictions
predictions<-predict(fit,dataTest[,1:4])
#summarize results
confusionMatrix(predictions$class,dataTest$Species)

## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0          9         0
##   virginica       0          1        10
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.8278, 0.9992)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 2.963e-13       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9000           1.0000
## Specificity                 1.0000            1.0000           0.9500
## Pos Pred Value              1.0000            1.0000           0.9091
## Neg Pred Value              1.0000            0.9524           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3000           0.3333
## Detection Prevalence        0.3333            0.3000           0.3667
## Balanced Accuracy           1.0000            0.9500           0.9750

#2. Bootstrap 
#define training control
trainControl<- trainControl(method="boot", number=100) #boot is for bootstrap
#evaluate the model
fit<-train(Species~., data=iris,trControl=trainControl,method="nb")

## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 45

#Display the results
print(fit)

## Naive Bayes 
## 
## 150 samples
##   4 predictor
##   3 classes: 'setosa', 'versicolor', 'virginica' 
## 
## No pre-processing
## Resampling: Bootstrapped (100 reps) 
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ... 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa    
##   FALSE      0.9560796  0.9333422
##    TRUE      0.9557846  0.9329001
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = FALSE and adjust
##  = 1.

#3 k-fold Cross Validation
#define training control
trainControl<- trainControl(method="cv", number=10) #CV is for CrossValidation
#evaluate the model
fit<-train(Species~., data=iris,trControl=trainControl,method="nb")
#Display the results
print(fit)

## Naive Bayes 
## 
## 150 samples
##   4 predictor
##   3 classes: 'setosa', 'versicolor', 'virginica' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 135, 135, 135, 135, 135, 135, ... 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa
##   FALSE      0.9533333  0.93 
##    TRUE      0.9600000  0.94 
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.

#4 Repeated k-fold Cross Validation
#define training control
trainControl<- trainControl(method="repeatedcv", number=10, repeats=3) #repeatedCV is for repeated Cross Validation
#evaluate the model
fit<-train(Species~., data=iris,trControl=trainControl,method="nb")
#Display the results
print(fit)

## Naive Bayes 
## 
## 150 samples
##   4 predictor
##   3 classes: 'setosa', 'versicolor', 'virginica' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 3 times) 
## Summary of sample sizes: 135, 135, 135, 135, 135, 135, ... 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa    
##   FALSE      0.9555556  0.9333333
##    TRUE      0.9555556  0.9333333
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = FALSE and adjust
##  = 1.

#4 Leave One Out Cross Validation (LOOCV)
#define training control
trainControl<- trainControl(method="LOOCV") #repeatedCV is for repeated Cross Validation
#evaluate the model
fit<-train(Species~., data=iris,trControl=trainControl,method="nb")
#Display the results
print(fit)

## Naive Bayes 
## 
## 150 samples
##   4 predictor
##   3 classes: 'setosa', 'versicolor', 'virginica' 
## 
## No pre-processing
## Resampling: Leave-One-Out Cross-Validation 
## Summary of sample sizes: 149, 149, 149, 149, 149, 149, ... 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa
##   FALSE      0.9533333  0.93 
##    TRUE      0.9600000  0.94 
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.

# Set train control
ctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 3)

# CART model
cart_model <- train(Species ~ ., data = iris, method = "rpart", trControl = ctrl)

# LDA model
lda_model <- train(Species ~ ., data = iris, method = "lda", trControl = ctrl)

# SVM model
svm_model <- train(Species ~ ., data = iris, method = "svmLinear", trControl = ctrl)

# KNN model
knn_model <- train(Species ~ ., data =iris, method = "knn", trControl = ctrl)

# Random Forest model
rf_model <- train(Species ~ ., data = iris, method = "rf", trControl = ctrl)

# Predictions
cart_pred <- predict(cart_model, newdata = dataTest)
lda_pred <- predict(lda_model, newdata = dataTest)
svm_pred <- predict(svm_model, newdata = dataTest)
knn_pred <- predict(knn_model, newdata = dataTest)
rf_pred <- predict(rf_model, newdata = dataTest)

# Model accuracies
cart_accuracy <- sum(cart_pred == dataTest$Species) / nrow(dataTest)
lda_accuracy <- sum(lda_pred == dataTest$Species) / nrow(dataTest)
svm_accuracy <- sum(svm_pred == dataTest$Species) / nrow(dataTest)
knn_accuracy <- sum(knn_pred == dataTest$Species) / nrow(dataTest)
rf_accuracy <- sum(rf_pred == dataTest$Species) / nrow(dataTest)

# Print accuracies
cat("CART Accuracy:", cart_accuracy, "\n")

## CART Accuracy: 0.9333333

cat("LDA Accuracy:", lda_accuracy, "\n")

## LDA Accuracy: 0.9666667

cat("SVM Accuracy:", svm_accuracy, "\n")

## SVM Accuracy: 0.9666667

cat("KNN Accuracy:", knn_accuracy, "\n")

## KNN Accuracy: 1

cat("Random Forest Accuracy:", rf_accuracy, "\n")

## Random Forest Accuracy: 1

# Calculate accuracies
accuracies <- c( CART = cart_accuracy, LDA = lda_accuracy,  SVM = svm_accuracy,  KNN = knn_accuracy,  RF = rf_accuracy)

# Find the difference in accuracies
accuracy_diff <- diff(accuracies)

# Print the difference in accuracies
print(accuracy_diff)

##        LDA        SVM        KNN         RF 
## 0.03333333 0.00000000 0.03333333 0.00000000

# Plot the difference in accuracies
barplot(accuracy_diff, main = "Difference in Model Accuracies", ylab = "Accuracy Difference", col = "skyblue")

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

CW-11 23-04-2024

Tooba Maryam

2024-04-24

R Markdown

Including Plots