Naive Bayes
Library
#- load required libraries
library(knitr)
library(ggplot2)
library(tidyr)
library(caret)
library(e1071)
library(ROCR)
Data
## # A tibble: 6 × 5
## age income student credit_rating buys_computer
## <chr> <chr> <chr> <chr> <chr>
## 1 a high no fair no
## 2 a high no excellent no
## 3 b high no fair yes
## 4 c medium no fair yes
## 5 c low yes fair yes
## 6 c low yes excellent no
Splitting Data
set.seed(101)
#- split data in training and test set.
Index <- sample(1:nrow(prakt), size = round(0.7*nrow(prakt)), replace=FALSE)
train <- prakt[Index ,]
test <- prakt[-Index ,]
train
## # A tibble: 10 × 5
## age income student credit_rating buys_computer
## <chr> <chr> <chr> <chr> <chr>
## 1 a low yes fair yes
## 2 c medium no excellent no
## 3 b low yes excellent yes
## 4 a high no fair no
## 5 c medium yes fair yes
## 6 c low yes excellent no
## 7 b high no fair yes
## 8 a medium no fair no
## 9 a medium yes excellent yes
## 10 b medium no excellent yes
## # A tibble: 4 × 5
## age income student credit_rating buys_computer
## <chr> <chr> <chr> <chr> <chr>
## 1 a high no excellent no
## 2 c medium no fair yes
## 3 c low yes fair yes
## 4 b high Yes fair yes
Naive Bayes Classifier
Training Process
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## no yes
## 0.4 0.6
##
## Conditional probabilities:
## age
## Y a b c
## no 0.5000000 0.0000000 0.5000000
## yes 0.3333333 0.5000000 0.1666667
##
## income
## Y high low medium
## no 0.2500000 0.2500000 0.5000000
## yes 0.1666667 0.3333333 0.5000000
##
## student
## Y no yes
## no 0.7500000 0.2500000
## yes 0.3333333 0.6666667
##
## credit_rating
## Y excellent fair
## no 0.5 0.5
## yes 0.5 0.5
Testing Process
# Predict using Naive Bayes
test$predicted <- predict(NBClassifier,test)
test$actual <- test$buys_computer
## Confusion Matrix and Statistics
##
## Reference
## Prediction no yes
## no 1 1
## yes 0 2
##
## Accuracy : 0.75
## 95% CI : (0.1941, 0.9937)
## No Information Rate : 0.75
## P-Value [Acc > NIR] : 0.7383
##
## Kappa : 0.5
##
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 1.0000
## Specificity : 0.6667
## Pos Pred Value : 0.5000
## Neg Pred Value : 1.0000
## Prevalence : 0.2500
## Detection Rate : 0.2500
## Detection Prevalence : 0.5000
## Balanced Accuracy : 0.8333
##
## 'Positive' Class : no
##