Percobaan Klasifikasi NB dengan Rstudio

Source code : https://www.r-bloggers.com/2021/04/naive-bayes-classification-in-r/

Source data : https://github.com/csquared/udacity-dlnd/blob/master/nn/binary.csv

library(naivebayes)

## Warning: package 'naivebayes' was built under R version 4.2.2

## naivebayes 0.9.7 loaded

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
library(psych)

## Warning: package 'psych' was built under R version 4.2.2

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

data <- read.csv("D:\\Tugas Sekolah\\Pseudo\\binary.csv", header = T)
head(data)

##   admit gre  gpa rank
## 1     0 380 3.61    3
## 2     1 660 3.67    3
## 3     1 800 4.00    1
## 4     1 640 3.19    4
## 5     0 520 2.93    4
## 6     1 760 3.00    2

Identifikasi Frekuensi

xtabs(~admit+rank, data = data)

##      rank
## admit 1 2 3 4
##     0 0 2 1 1
##     1 2 1 2 1

str(data)

## 'data.frame':    10 obs. of  4 variables:
##  $ admit: int  0 1 1 1 0 1 1 0 1 0
##  $ gre  : int  380 660 800 640 520 760 560 400 540 700
##  $ gpa  : num  3.61 3.67 4 3.19 2.93 3 2.98 3.08 3.39 3.92
##  $ rank : int  3 3 1 4 4 2 1 2 3 2

data$Rank <- as.factor(data$rank)
data$Admit <- as.factor(data$admit)

pairs.panels(data[-1])

Visualisasi dengan ggplot

data %>%
  ggplot(aes(x = Admit, y = gpa, fill = Admit)) +
  geom_boxplot() + theme_bw() +
  ggtitle("Box Plot")

Data Partisi

#Data Partition
set.seed(1234)
ind <- sample(2, nrow(data), replace = T, prob = c(0.8, 0.2))
train <- data[ind == 1, ]
test <- data[ind == 2, ]

model <- naive_bayes(Admit ~ ., data = train, usekernel = T)

## Warning: naive_bayes(): Feature Rank - zero probabilities are present. Consider
## Laplace smoothing.

df <- model 
plot(df)

Prediksi

p <- predict(model, train, type = 'prob')

## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.

head(cbind(p, train))

##              0           1 admit gre  gpa rank Rank Admit
## 1 9.940641e-01 0.005935856     0 380 3.61    3    3     0
## 2 1.987680e-01 0.801231974     1 660 3.67    3    3     1
## 3 1.756002e-05 0.999982440     1 800 4.00    1    1     1
## 4 1.006202e-05 0.999989938     1 640 3.19    4    4     1
## 6 4.975102e-01 0.502489806     1 760 3.00    2    2     1
## 7 5.496920e-06 0.999994503     1 560 2.98    1    1     1

Confusion Matrix – train data

p1 <- predict(model, train)

## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.

(tab1 <- table(p1, train$admit))

##    
## p1  0 1
##   0 3 0
##   1 0 6

Confusion Matrix – test data

p2 <- predict(model, test)

## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.

(tab2 <- table(p2, test$admit))

##    
## p2  0
##   0 0
##   1 1

Percobaan Klasifikasi NB dengan Rstudio

Fachrizal

2022-12-11