Source code : https://www.r-bloggers.com/2021/04/naive-bayes-classification-in-r/
Source data : https://github.com/csquared/udacity-dlnd/blob/master/nn/binary.csv
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 4.2.2
## naivebayes 0.9.7 loaded
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(psych)
## Warning: package 'psych' was built under R version 4.2.2
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
data <- read.csv("D:\\Tugas Sekolah\\Pseudo\\binary.csv", header = T)
head(data)
## admit gre gpa rank
## 1 0 380 3.61 3
## 2 1 660 3.67 3
## 3 1 800 4.00 1
## 4 1 640 3.19 4
## 5 0 520 2.93 4
## 6 1 760 3.00 2
Identifikasi Frekuensi
xtabs(~admit+rank, data = data)
## rank
## admit 1 2 3 4
## 0 0 2 1 1
## 1 2 1 2 1
str(data)
## 'data.frame': 10 obs. of 4 variables:
## $ admit: int 0 1 1 1 0 1 1 0 1 0
## $ gre : int 380 660 800 640 520 760 560 400 540 700
## $ gpa : num 3.61 3.67 4 3.19 2.93 3 2.98 3.08 3.39 3.92
## $ rank : int 3 3 1 4 4 2 1 2 3 2
data$Rank <- as.factor(data$rank)
data$Admit <- as.factor(data$admit)
pairs.panels(data[-1])
Visualisasi dengan ggplot
data %>%
ggplot(aes(x = Admit, y = gpa, fill = Admit)) +
geom_boxplot() + theme_bw() +
ggtitle("Box Plot")
Data Partisi
#Data Partition
set.seed(1234)
ind <- sample(2, nrow(data), replace = T, prob = c(0.8, 0.2))
train <- data[ind == 1, ]
test <- data[ind == 2, ]
model <- naive_bayes(Admit ~ ., data = train, usekernel = T)
## Warning: naive_bayes(): Feature Rank - zero probabilities are present. Consider
## Laplace smoothing.
df <- model
plot(df)
Prediksi
p <- predict(model, train, type = 'prob')
## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.
head(cbind(p, train))
## 0 1 admit gre gpa rank Rank Admit
## 1 9.940641e-01 0.005935856 0 380 3.61 3 3 0
## 2 1.987680e-01 0.801231974 1 660 3.67 3 3 1
## 3 1.756002e-05 0.999982440 1 800 4.00 1 1 1
## 4 1.006202e-05 0.999989938 1 640 3.19 4 4 1
## 6 4.975102e-01 0.502489806 1 760 3.00 2 2 1
## 7 5.496920e-06 0.999994503 1 560 2.98 1 1 1
Confusion Matrix – train data
p1 <- predict(model, train)
## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.
(tab1 <- table(p1, train$admit))
##
## p1 0 1
## 0 3 0
## 1 0 6
Confusion Matrix – test data
p2 <- predict(model, test)
## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.
(tab2 <- table(p2, test$admit))
##
## p2 0
## 0 0
## 1 1