DATAMINING12

data <- read.csv("calonpembelimobil.csv", sep = ",")
str(data)

## 'data.frame':    1000 obs. of  7 variables:
##  $ ID            : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Usia          : int  32 49 52 26 45 39 38 29 30 51 ...
##  $ Status        : int  1 2 1 2 3 2 2 1 2 1 ...
##  $ Kelamin       : int  0 1 0 1 0 0 1 1 0 1 ...
##  $ Memiliki_Mobil: int  0 1 2 1 2 1 0 0 0 0 ...
##  $ Penghasilan   : int  240 100 250 130 237 280 150 143 200 174 ...
##  $ Beli_Mobil    : int  1 0 1 0 1 1 0 0 1 0 ...

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(caret)

## Warning: package 'caret' was built under R version 4.4.3

## Loading required package: ggplot2

## Loading required package: lattice

library(e1071)
library(ggplot2)
library(mice)

## Warning: package 'mice' was built under R version 4.4.3

## 
## Attaching package: 'mice'

## The following object is masked from 'package:stats':
## 
##     filter

## The following objects are masked from 'package:base':
## 
##     cbind, rbind

library(caTools)

## Warning: package 'caTools' was built under R version 4.4.3

# Buat label beli
data <- data %>%
  mutate(beli = ifelse(Beli_Mobil == 1 , "beli", "tidak"),
         beli = as.factor(beli))

summary(data)

##        ID              Usia            Status         Kelamin     
##  Min.   :   1.0   Min.   : 24.00   Min.   :0.000   Min.   :0.000  
##  1st Qu.: 250.8   1st Qu.: 33.00   1st Qu.:1.000   1st Qu.:0.000  
##  Median : 500.5   Median : 43.00   Median :1.000   Median :0.000  
##  Mean   : 500.5   Mean   : 43.53   Mean   :1.469   Mean   :0.481  
##  3rd Qu.: 750.2   3rd Qu.: 53.00   3rd Qu.:2.000   3rd Qu.:1.000  
##  Max.   :1000.0   Max.   :164.00   Max.   :3.000   Max.   :1.000  
##  Memiliki_Mobil   Penghasilan      Beli_Mobil       beli    
##  Min.   :0.000   Min.   : 95.0   Min.   :0.000   beli :633  
##  1st Qu.:0.000   1st Qu.:187.0   1st Qu.:0.000   tidak:367  
##  Median :1.000   Median :258.5   Median :1.000              
##  Mean   :0.952   Mean   :270.1   Mean   :0.633              
##  3rd Qu.:2.000   3rd Qu.:352.2   3rd Qu.:1.000              
##  Max.   :4.000   Max.   :490.0   Max.   :1.000

table(data$beli)

## 
##  beli tidak 
##   633   367

set.seed(123)
split <- sample.split(data$beli, SplitRatio = 0.8)
training_set <- subset(data, split == TRUE)
test_set <- subset(data, split == FALSE)
dim(training_set)

## [1] 800   8

dim(training_set)

## [1] 800   8

dim(test_set)

## [1] 200   8

topredict_set<-test_set[2:8] 
dim(topredict_set)

## [1] 200   7

summary(data)

##        ID              Usia            Status         Kelamin     
##  Min.   :   1.0   Min.   : 24.00   Min.   :0.000   Min.   :0.000  
##  1st Qu.: 250.8   1st Qu.: 33.00   1st Qu.:1.000   1st Qu.:0.000  
##  Median : 500.5   Median : 43.00   Median :1.000   Median :0.000  
##  Mean   : 500.5   Mean   : 43.53   Mean   :1.469   Mean   :0.481  
##  3rd Qu.: 750.2   3rd Qu.: 53.00   3rd Qu.:2.000   3rd Qu.:1.000  
##  Max.   :1000.0   Max.   :164.00   Max.   :3.000   Max.   :1.000  
##  Memiliki_Mobil   Penghasilan      Beli_Mobil       beli    
##  Min.   :0.000   Min.   : 95.0   Min.   :0.000   beli :633  
##  1st Qu.:0.000   1st Qu.:187.0   1st Qu.:0.000   tidak:367  
##  Median :1.000   Median :258.5   Median :1.000              
##  Mean   :0.952   Mean   :270.1   Mean   :0.633              
##  3rd Qu.:2.000   3rd Qu.:352.2   3rd Qu.:1.000              
##  Max.   :4.000   Max.   :490.0   Max.   :1.000

model_nb <- naiveBayes(beli ~ Usia + Status + Kelamin + Memiliki_Mobil + Penghasilan, data = training_set)
summary(model_nb)

##           Length Class  Mode     
## apriori   2      table  numeric  
## tables    5      -none- list     
## levels    2      -none- character
## isnumeric 5      -none- logical  
## call      4      -none- call

prediksi <- predict(model_nb, newdata = test_set)
head(prediksi)

## [1] beli  tidak tidak tidak tidak tidak
## Levels: beli tidak

confusionMatrix(prediksi, test_set$beli)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction beli tidak
##      beli   113     7
##      tidak   14    66
##                                          
##                Accuracy : 0.895          
##                  95% CI : (0.844, 0.9338)
##     No Information Rate : 0.635          
##     P-Value [Acc > NIR] : <2e-16         
##                                          
##                   Kappa : 0.778          
##                                          
##  Mcnemar's Test P-Value : 0.1904         
##                                          
##             Sensitivity : 0.8898         
##             Specificity : 0.9041         
##          Pos Pred Value : 0.9417         
##          Neg Pred Value : 0.8250         
##              Prevalence : 0.6350         
##          Detection Rate : 0.5650         
##    Detection Prevalence : 0.6000         
##       Balanced Accuracy : 0.8969         
##                                          
##        'Positive' Class : beli           
##

DATAMINING12

Dwi Arifa

2025-05-13