Libraries dan Setup

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
library(class)
library(gtools)
library(gmodels)
## Warning: package 'gmodels' was built under R version 4.2.2

1 INTRO

Saya menggunakan dataset mengenai Mobile Price, dimana akan mengklasifikasikan harga suatu mobile phone. Dataset ini memiliki informasi fitur beberapa tipe mobile phone berserta spesifikasi seperti bluetooth, layar, RAM, dll.

Berawal dari seorang jonny ingin memulai perusahaan ponsel sendiri namun dia tidak tahu bagaimana memperkirakan harga ponsel yang akan diciptkan oleh perusahaannya. Dari ponsel yang saat ini sudah dipasaran, dia tidak bisa berasumsi subjektif, sehingga dia mengumpulkan data penjualan ponsel dari berbagai perusahaan.

2 PERSIAPAN DATA

phone_train <- read.csv("data_input/train.csv")

Jumlah Data Train

nrow(phone_train)
## [1] 2000

Melihat Tipe Data

glimpse(phone_train)
## Rows: 2,000
## Columns: 21
## $ battery_power <int> 842, 1021, 563, 615, 1821, 1859, 1821, 1954, 1445, 509, …
## $ blue          <int> 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1,…
## $ clock_speed   <dbl> 2.2, 0.5, 0.5, 2.5, 1.2, 0.5, 1.7, 0.5, 0.5, 0.6, 2.9, 2…
## $ dual_sim      <int> 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1,…
## $ fc            <int> 1, 0, 2, 0, 13, 3, 4, 0, 0, 2, 0, 5, 2, 7, 13, 3, 1, 7, …
## $ four_g        <int> 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0,…
## $ int_memory    <int> 7, 53, 41, 10, 44, 22, 10, 24, 53, 9, 9, 33, 33, 17, 52,…
## $ m_dep         <dbl> 0.6, 0.7, 0.9, 0.8, 0.6, 0.7, 0.8, 0.8, 0.7, 0.1, 0.1, 0…
## $ mobile_wt     <int> 188, 136, 145, 131, 141, 164, 139, 187, 174, 93, 182, 17…
## $ n_cores       <int> 2, 3, 5, 6, 2, 1, 8, 4, 7, 5, 5, 8, 4, 4, 1, 2, 8, 3, 5,…
## $ pc            <int> 2, 6, 6, 9, 14, 7, 10, 0, 14, 15, 1, 18, 17, 11, 17, 16,…
## $ px_height     <int> 20, 905, 1263, 1216, 1208, 1004, 381, 512, 386, 1137, 24…
## $ px_width      <int> 756, 1988, 1716, 1786, 1212, 1654, 1018, 1149, 836, 1224…
## $ ram           <int> 2549, 2631, 2603, 2769, 1411, 1067, 3220, 700, 1099, 513…
## $ sc_h          <int> 9, 17, 11, 16, 8, 17, 13, 16, 17, 19, 5, 14, 18, 7, 14, …
## $ sc_w          <int> 7, 3, 2, 8, 2, 1, 8, 3, 1, 10, 2, 9, 0, 1, 9, 15, 9, 2, …
## $ talk_time     <int> 19, 7, 9, 11, 15, 10, 18, 5, 20, 12, 7, 13, 2, 4, 3, 11,…
## $ three_g       <int> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ touch_screen  <int> 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1,…
## $ wifi          <int> 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0,…
## $ price_range   <int> 1, 2, 2, 2, 1, 1, 3, 0, 0, 0, 3, 3, 1, 2, 0, 0, 3, 3, 1,…

cek apakah ada nilai yang NA..

anyNA(phone_train)
## [1] FALSE

Target dari prediksi adalah price_range, maka kita lihat dulu jumlah data pengkategorian di price_range serta value yang ada.

table(phone_train$price_range)
## 
##   0   1   2   3 
## 500 500 500 500
phone_train <- phone_train %>% 
  mutate(
    price_range=factor(price_range, levels = c(0,1,2,3), labels = c(0,0,1,1))
  )
str(phone_train$price_range)
##  Factor w/ 2 levels "0","1": 1 2 2 2 1 1 2 1 1 1 ...

Dari hasil diatas jumlah dari masing-masing value adalah sama, kemudian karena klasifikasi dari case study logistic regression ini adalah biner (binomial) yaitu 0 dan 1, maka perlu ada penyesuaian dari value tersebut dimana value 0,1,2,3 nantinya saya sesuaikan menjadi rendah (0) dan tinggi (1) dengan kriteria 0-1 adalah rendah dan 2-3 adalah tinggi.

Melakukan pemilihan variabel prediktor yang berhubungan

mobilespec <- glm(price_range~., data=phone_train, family = "binomial")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(mobilespec)
## 
## Call:
## glm(formula = price_range ~ ., family = "binomial", data = phone_train)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.911   0.000   0.000   0.000   3.116  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -3.056e+02  7.408e+01  -4.126 3.69e-05 ***
## battery_power  5.515e-02  1.343e-02   4.106 4.02e-05 ***
## blue           1.290e-02  1.358e+00   0.010   0.9924    
## clock_speed    4.119e-01  7.533e-01   0.547   0.5845    
## dual_sim      -1.022e+00  1.206e+00  -0.847   0.3968    
## fc            -9.439e-02  1.569e-01  -0.602   0.5474    
## four_g        -1.958e+00  1.442e+00  -1.358   0.1746    
## int_memory     8.821e-02  3.583e-02   2.462   0.0138 *  
## m_dep         -2.789e+00  1.943e+00  -1.435   0.1513    
## mobile_wt     -8.644e-02  2.146e-02  -4.027 5.64e-05 ***
## n_cores        5.025e-01  2.340e-01   2.148   0.0317 *  
## pc             2.282e-01  1.062e-01   2.149   0.0316 *  
## px_height      3.003e-02  7.156e-03   4.197 2.71e-05 ***
## px_width       3.342e-02  8.343e-03   4.005 6.20e-05 ***
## ram            8.744e-02  2.123e-02   4.119 3.80e-05 ***
## sc_h          -8.939e-02  1.415e-01  -0.632   0.5276    
## sc_w           8.493e-02  1.355e-01   0.627   0.5309    
## talk_time      2.519e-02  8.980e-02   0.280   0.7791    
## three_g        6.688e-01  1.220e+00   0.548   0.5837    
## touch_screen  -8.454e-01  1.114e+00  -0.759   0.4479    
## wifi          -3.026e+00  1.351e+00  -2.240   0.0251 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2772.589  on 1999  degrees of freedom
## Residual deviance:   43.521  on 1979  degrees of freedom
## AIC: 85.521
## 
## Number of Fisher Scoring iterations: 15

Hasil AIC adalah 85.521 dan prediktor yang signifikan dengan target variabelnya adalah : - battery_power - mobile_wt - px_height - px_width - ram

Namun ada variabel prediktor yang ada hubungan dengan target variabel yaitu - int_memory - n_cores - pc - wifi

Kemudian melihat hasil dengan semua prediktor menggunakan fungsi step

step(mobilespec)
## Start:  AIC=85.52
## price_range ~ battery_power + blue + clock_speed + dual_sim + 
##     fc + four_g + int_memory + m_dep + mobile_wt + n_cores + 
##     pc + px_height + px_width + ram + sc_h + sc_w + talk_time + 
##     three_g + touch_screen + wifi
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##                 Df Deviance     AIC
## - blue           1    43.52   83.52
## - talk_time      1    43.60   83.60
## - three_g        1    43.83   83.83
## - clock_speed    1    43.83   83.83
## - fc             1    43.88   83.88
## - sc_w           1    43.92   83.92
## - sc_h           1    43.93   83.93
## - touch_screen   1    44.10   84.10
## - dual_sim       1    44.25   84.25
## <none>                43.52   85.52
## - four_g         1    45.75   85.75
## - m_dep          1    46.15   86.15
## - n_cores        1    49.17   89.17
## - pc             1    49.46   89.46
## - wifi           1    51.44   91.44
## - int_memory     1    52.60   92.60
## - mobile_wt      1    85.16  125.16
## - px_height      1   252.49  292.49
## - px_width       1   264.03  304.03
## - battery_power  1   578.20  618.20
## - ram            1  2679.94 2719.94
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=83.52
## price_range ~ battery_power + clock_speed + dual_sim + fc + four_g + 
##     int_memory + m_dep + mobile_wt + n_cores + pc + px_height + 
##     px_width + ram + sc_h + sc_w + talk_time + three_g + touch_screen + 
##     wifi
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##                 Df Deviance     AIC
## - talk_time      1    43.60   81.60
## - three_g        1    43.83   81.83
## - clock_speed    1    43.84   81.84
## - fc             1    43.88   81.88
## - sc_w           1    43.93   81.93
## - sc_h           1    44.04   82.04
## - touch_screen   1    44.11   82.11
## - dual_sim       1    44.43   82.43
## <none>                43.52   83.52
## - four_g         1    45.75   83.75
## - m_dep          1    46.26   84.26
## - pc             1    49.46   87.46
## - n_cores        1    49.59   87.59
## - wifi           1    52.04   90.04
## - int_memory     1    52.76   90.76
## - mobile_wt      1    85.21  123.21
## - px_height      1   252.58  290.58
## - px_width       1   264.80  302.80
## - battery_power  1   579.24  617.24
## - ram            1  2680.48 2718.48
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=81.6
## price_range ~ battery_power + clock_speed + dual_sim + fc + four_g + 
##     int_memory + m_dep + mobile_wt + n_cores + pc + px_height + 
##     px_width + ram + sc_h + sc_w + three_g + touch_screen + wifi
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##                 Df Deviance     AIC
## - three_g        1    43.84   79.84
## - clock_speed    1    43.99   79.99
## - fc             1    44.01   80.01
## - sc_w           1    44.03   80.03
## - touch_screen   1    44.12   80.12
## - sc_h           1    44.28   80.28
## - dual_sim       1    44.62   80.62
## <none>                43.60   81.60
## - four_g         1    46.18   82.18
## - m_dep          1    46.39   82.39
## - pc             1    49.80   85.80
## - n_cores        1    49.97   85.97
## - wifi           1    52.12   88.12
## - int_memory     1    53.56   89.56
## - mobile_wt      1    86.34  122.34
## - px_height      1   252.94  288.94
## - px_width       1   266.00  302.00
## - battery_power  1   581.13  617.13
## - ram            1  2680.48 2716.48
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=79.84
## price_range ~ battery_power + clock_speed + dual_sim + fc + four_g + 
##     int_memory + m_dep + mobile_wt + n_cores + pc + px_height + 
##     px_width + ram + sc_h + sc_w + touch_screen + wifi
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##                 Df Deviance     AIC
## - clock_speed    1    44.23   78.23
## - touch_screen   1    44.27   78.27
## - sc_w           1    44.32   78.32
## - fc             1    44.33   78.33
## - sc_h           1    44.57   78.57
## - dual_sim       1    45.11   79.11
## <none>                43.84   79.84
## - four_g         1    46.20   80.20
## - m_dep          1    46.56   80.56
## - pc             1    49.89   83.89
## - n_cores        1    50.01   84.01
## - wifi           1    52.15   86.15
## - int_memory     1    53.73   87.73
## - mobile_wt      1    86.39  120.39
## - px_height      1   252.96  286.96
## - px_width       1   267.49  301.49
## - battery_power  1   584.11  618.11
## - ram            1  2682.48 2716.48
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=78.23
## price_range ~ battery_power + dual_sim + fc + four_g + int_memory + 
##     m_dep + mobile_wt + n_cores + pc + px_height + px_width + 
##     ram + sc_h + sc_w + touch_screen + wifi
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##                 Df Deviance     AIC
## - sc_w           1    44.70   76.70
## - touch_screen   1    44.75   76.75
## - sc_h           1    44.89   76.89
## - fc             1    45.01   77.01
## - dual_sim       1    45.80   77.80
## - four_g         1    46.22   78.22
## <none>                44.23   78.23
## - m_dep          1    46.83   78.83
## - n_cores        1    50.57   82.57
## - wifi           1    52.52   84.52
## - pc             1    52.75   84.75
## - int_memory     1    53.77   85.77
## - mobile_wt      1    87.22  119.22
## - px_height      1   253.95  285.95
## - px_width       1   268.10  300.10
## - battery_power  1   584.48  616.48
## - ram            1  2682.52 2714.52
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=76.7
## price_range ~ battery_power + dual_sim + fc + four_g + int_memory + 
##     m_dep + mobile_wt + n_cores + pc + px_height + px_width + 
##     ram + sc_h + touch_screen + wifi
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##                 Df Deviance     AIC
## - sc_h           1    45.01   75.01
## - fc             1    45.30   75.30
## - touch_screen   1    45.47   75.47
## - dual_sim       1    46.11   76.11
## - four_g         1    46.43   76.43
## <none>                44.70   76.70
## - m_dep          1    46.96   76.96
## - n_cores        1    51.20   81.20
## - wifi           1    52.73   82.73
## - pc             1    53.07   83.07
## - int_memory     1    53.85   83.85
## - mobile_wt      1    88.52  118.52
## - px_height      1   253.96  283.96
## - px_width       1   269.49  299.49
## - battery_power  1   584.49  614.49
## - ram            1  2684.95 2714.95
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=75.01
## price_range ~ battery_power + dual_sim + fc + four_g + int_memory + 
##     m_dep + mobile_wt + n_cores + pc + px_height + px_width + 
##     ram + touch_screen + wifi
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##                 Df Deviance     AIC
## - fc             1    45.56   73.56
## - touch_screen   1    45.72   73.72
## - dual_sim       1    46.31   74.31
## - four_g         1    46.51   74.51
## - m_dep          1    47.00   75.00
## <none>                45.01   75.01
## - n_cores        1    51.61   79.61
## - wifi           1    52.74   80.74
## - pc             1    53.43   81.43
## - int_memory     1    54.58   82.58
## - mobile_wt      1    89.56  117.56
## - px_height      1   253.98  281.98
## - px_width       1   270.42  298.42
## - battery_power  1   584.66  612.66
## - ram            1  2685.03 2713.03
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=73.56
## price_range ~ battery_power + dual_sim + four_g + int_memory + 
##     m_dep + mobile_wt + n_cores + pc + px_height + px_width + 
##     ram + touch_screen + wifi
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##                 Df Deviance     AIC
## - touch_screen   1    45.94   71.94
## - dual_sim       1    46.84   72.84
## - four_g         1    46.84   72.84
## <none>                45.56   73.56
## - m_dep          1    47.78   73.78
## - n_cores        1    52.94   78.94
## - wifi           1    53.09   79.09
## - pc             1    54.47   80.47
## - int_memory     1    55.44   81.44
## - mobile_wt      1    89.78  115.78
## - px_height      1   254.78  280.78
## - px_width       1   272.16  298.16
## - battery_power  1   585.19  611.19
## - ram            1  2685.07 2711.07
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=71.94
## price_range ~ battery_power + dual_sim + four_g + int_memory + 
##     m_dep + mobile_wt + n_cores + pc + px_height + px_width + 
##     ram + wifi
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##                 Df Deviance     AIC
## - four_g         1    47.08   71.08
## <none>                45.94   71.94
## - m_dep          1    48.46   72.46
## - dual_sim       1    48.58   72.58
## - n_cores        1    53.34   77.34
## - wifi           1    54.40   78.40
## - pc             1    54.69   78.69
## - int_memory     1    56.23   80.23
## - mobile_wt      1    91.30  115.30
## - px_height      1   255.93  279.93
## - px_width       1   272.58  296.58
## - battery_power  1   586.84  610.84
## - ram            1  2688.26 2712.26
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=71.08
## price_range ~ battery_power + dual_sim + int_memory + m_dep + 
##     mobile_wt + n_cores + pc + px_height + px_width + ram + wifi
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##                 Df Deviance     AIC
## - m_dep          1    48.96   70.96
## <none>                47.08   71.08
## - dual_sim       1    50.44   72.44
## - n_cores        1    55.37   77.37
## - pc             1    55.82   77.82
## - wifi           1    56.14   78.14
## - int_memory     1    56.85   78.85
## - mobile_wt      1    92.27  114.27
## - px_height      1   262.16  284.16
## - px_width       1   272.90  294.90
## - battery_power  1   586.92  608.92
## - ram            1  2688.26 2710.26
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=70.96
## price_range ~ battery_power + dual_sim + int_memory + mobile_wt + 
##     n_cores + pc + px_height + px_width + ram + wifi
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##                 Df Deviance     AIC
## <none>                48.96   70.96
## - dual_sim       1    51.51   71.51
## - wifi           1    56.34   76.34
## - n_cores        1    56.60   76.60
## - pc             1    57.53   77.53
## - int_memory     1    57.89   77.89
## - mobile_wt      1    94.63  114.63
## - px_height      1   269.16  289.16
## - px_width       1   272.90  292.90
## - battery_power  1   588.35  608.35
## - ram            1  2689.78 2709.78
## 
## Call:  glm(formula = price_range ~ battery_power + dual_sim + int_memory + 
##     mobile_wt + n_cores + pc + px_height + px_width + ram + wifi, 
##     family = "binomial", data = phone_train)
## 
## Coefficients:
##   (Intercept)  battery_power       dual_sim     int_memory      mobile_wt  
##    -266.41698        0.04798       -1.34273        0.07051       -0.08240  
##       n_cores             pc      px_height       px_width            ram  
##       0.50714        0.19489        0.02736        0.02814        0.07597  
##          wifi  
##      -2.12398  
## 
## Degrees of Freedom: 1999 Total (i.e. Null);  1989 Residual
## Null Deviance:       2773 
## Residual Deviance: 48.96     AIC: 70.96

hasil AIC adalah 70.96 dan prediktor dari hasil step() adalah - battery_power - dual_sim - int_memory - mobile_wt - n_cores - pc - px_height - px_width - ram - wifi

Karena ada hubungan negatif maka, nilai negatifnya di keluarkan dari list sehingga prediktor yang digunakan adalah - battery_power - int_memory - n_cores - pc - px_height - px_width - ram

Sehingga hasilnya adalah

mobilespec2 <- glm(price_range~n_cores+battery_power+px_width+int_memory+pc+px_height+ram, data=phone_train, family = "binomial")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(mobilespec)
## 
## Call:
## glm(formula = price_range ~ ., family = "binomial", data = phone_train)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.911   0.000   0.000   0.000   3.116  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -3.056e+02  7.408e+01  -4.126 3.69e-05 ***
## battery_power  5.515e-02  1.343e-02   4.106 4.02e-05 ***
## blue           1.290e-02  1.358e+00   0.010   0.9924    
## clock_speed    4.119e-01  7.533e-01   0.547   0.5845    
## dual_sim      -1.022e+00  1.206e+00  -0.847   0.3968    
## fc            -9.439e-02  1.569e-01  -0.602   0.5474    
## four_g        -1.958e+00  1.442e+00  -1.358   0.1746    
## int_memory     8.821e-02  3.583e-02   2.462   0.0138 *  
## m_dep         -2.789e+00  1.943e+00  -1.435   0.1513    
## mobile_wt     -8.644e-02  2.146e-02  -4.027 5.64e-05 ***
## n_cores        5.025e-01  2.340e-01   2.148   0.0317 *  
## pc             2.282e-01  1.062e-01   2.149   0.0316 *  
## px_height      3.003e-02  7.156e-03   4.197 2.71e-05 ***
## px_width       3.342e-02  8.343e-03   4.005 6.20e-05 ***
## ram            8.744e-02  2.123e-02   4.119 3.80e-05 ***
## sc_h          -8.939e-02  1.415e-01  -0.632   0.5276    
## sc_w           8.493e-02  1.355e-01   0.627   0.5309    
## talk_time      2.519e-02  8.980e-02   0.280   0.7791    
## three_g        6.688e-01  1.220e+00   0.548   0.5837    
## touch_screen  -8.454e-01  1.114e+00  -0.759   0.4479    
## wifi          -3.026e+00  1.351e+00  -2.240   0.0251 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2772.589  on 1999  degrees of freedom
## Residual deviance:   43.521  on 1979  degrees of freedom
## AIC: 85.521
## 
## Number of Fisher Scoring iterations: 15

Dari model tersebut menghasilkan nilai AIC adalah 117.67 , namun dari hasil tersebut ada 2 prediktor yang tidak ada hubungannya yaitu int_memory dan n_cores, sehingga buat model baru tanpa prediktor tersebut

mobilespec3 <- glm(price_range~battery_power+pc+px_height+px_width+ram, data=phone_train, family = "binomial")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(mobilespec3)
## 
## Call:
## glm(formula = price_range ~ battery_power + pc + px_height + 
##     px_width + ram, family = "binomial", data = phone_train)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -2.367   0.000   0.000   0.000   2.202  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -1.173e+02  1.511e+01  -7.763 8.31e-15 ***
## battery_power  2.028e-02  2.673e-03   7.588 3.25e-14 ***
## pc             8.918e-02  4.539e-02   1.965   0.0495 *  
## px_height      1.140e-02  1.603e-03   7.112 1.14e-12 ***
## px_width       1.206e-02  1.713e-03   7.042 1.89e-12 ***
## ram            3.260e-02  4.189e-03   7.782 7.14e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2772.59  on 1999  degrees of freedom
## Residual deviance:  106.35  on 1994  degrees of freedom
## AIC: 118.35
## 
## Number of Fisher Scoring iterations: 12

Dari model tersebut menghasilkan nilai AIC adalah 118.35

3 Split Train-Test

Dari dataset yang disediakan yaitu train, perlu di pecah jadi 2 yaitu train dan test

set.seed(456)
indexm <- sample(nrow(phone_train), nrow(phone_train) * 0.8)

mobile_train <- phone_train[indexm,]
mobile_test <- phone_train[-indexm,]

Cek proporsi dari jumlah masing-masing target prediktor

table(mobile_train$price_range)
## 
##   0   1 
## 796 804
prop.table(table(mobile_train$price_range))
## 
##      0      1 
## 0.4975 0.5025

3.1 Solusi target variabel yang tidak seimbang

Jika jumlah datanya beda maka perlu dilakukan Down sampling :

mobile_train_down <-  downSample(x = phone_train[, -20], y = phone_train[, 21], yname = "price_range")

table(mobile_train_down$price_range)
## 
##    0    1 
## 1000 1000

3.2 Modeling

Modeling dengan menggunakan step yang sudah dijalankan

Modeling menggunakan imbalance dataset

mobilespec_model <- glm(price_range~battery_power+pc+px_height+px_width+ram, data=mobile_train, family = "binomial")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
mobilespec_model
## 
## Call:  glm(formula = price_range ~ battery_power + pc + px_height + 
##     px_width + ram, family = "binomial", data = mobile_train)
## 
## Coefficients:
##   (Intercept)  battery_power             pc      px_height       px_width  
##    -121.56855        0.02156        0.09770        0.01208        0.01194  
##           ram  
##       0.03359  
## 
## Degrees of Freedom: 1599 Total (i.e. Null);  1594 Residual
## Null Deviance:       2218 
## Residual Deviance: 81.26     AIC: 93.26

Hasil AIC yang di dapat adalah 93.26

Modeling menggunakan down sampling

mobilespec_model_down <- glm(price_range~battery_power+pc+px_height+px_width+ram, data=mobile_train_down, family = "binomial")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
mobilespec_model_down
## 
## Call:  glm(formula = price_range ~ battery_power + pc + px_height + 
##     px_width + ram, family = "binomial", data = mobile_train_down)
## 
## Coefficients:
##   (Intercept)  battery_power             pc      px_height       px_width  
##    -117.32630        0.02028        0.08918        0.01140        0.01206  
##           ram  
##       0.03260  
## 
## Degrees of Freedom: 1999 Total (i.e. Null);  1994 Residual
## Null Deviance:       2773 
## Residual Deviance: 106.4     AIC: 118.4

Hasil AIC yang di dapat adalah 118.4

4 Predicting

Prediksi menggunakan data yang imbalance

mobile_train$peluang <- predict(mobilespec_model, mobile_train, type = "response")
mobile_train$predik <- as.factor(ifelse(mobile_train$peluang > 0.5, 1, 0))
mobile_test$peluang <- predict(mobilespec_model, mobile_test, type = "response")
mobile_test$predik <- as.factor(ifelse(mobile_test$peluang > 0.5, 1, 0))

Predicting menggunakan down sampling dataset

mobile_train_down$peluang <- predict(mobilespec_model_down, mobile_train_down, type = "response")
mobile_train_down$predik <- as.factor(ifelse(mobile_train_down$peluang > 0.5, 1, 0))
mobile_test$peluang_down <- predict(mobilespec_model_down, mobile_test, type = "response")
mobile_test$predik_down <- as.factor(ifelse(mobile_test$peluang_down > 0.5, 1, 0))

5 Evaluasi Model

Model evaluation menggunakan imbalance dataset

confusionMatrix(mobile_train$predik, mobile_train$price_range, positive = "1")
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 787  11
##          1   9 793
##                                           
##                Accuracy : 0.9875          
##                  95% CI : (0.9808, 0.9923)
##     No Information Rate : 0.5025          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.975           
##                                           
##  Mcnemar's Test P-Value : 0.8231          
##                                           
##             Sensitivity : 0.9863          
##             Specificity : 0.9887          
##          Pos Pred Value : 0.9888          
##          Neg Pred Value : 0.9862          
##              Prevalence : 0.5025          
##          Detection Rate : 0.4956          
##    Detection Prevalence : 0.5012          
##       Balanced Accuracy : 0.9875          
##                                           
##        'Positive' Class : 1               
## 
confusionMatrix(mobile_test$predik, mobile_test$price_range, positive = "1")
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 203   3
##          1   1 193
##                                           
##                Accuracy : 0.99            
##                  95% CI : (0.9746, 0.9973)
##     No Information Rate : 0.51            
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.98            
##                                           
##  Mcnemar's Test P-Value : 0.6171          
##                                           
##             Sensitivity : 0.9847          
##             Specificity : 0.9951          
##          Pos Pred Value : 0.9948          
##          Neg Pred Value : 0.9854          
##              Prevalence : 0.4900          
##          Detection Rate : 0.4825          
##    Detection Prevalence : 0.4850          
##       Balanced Accuracy : 0.9899          
##                                           
##        'Positive' Class : 1               
## 

Model evaluation menggunakan down sampling dataset

confusionMatrix(mobile_train_down$predik, mobile_train_down$price_range, positive = "1")
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 989  11
##          1  11 989
##                                           
##                Accuracy : 0.989           
##                  95% CI : (0.9834, 0.9931)
##     No Information Rate : 0.5             
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.978           
##                                           
##  Mcnemar's Test P-Value : 1               
##                                           
##             Sensitivity : 0.9890          
##             Specificity : 0.9890          
##          Pos Pred Value : 0.9890          
##          Neg Pred Value : 0.9890          
##              Prevalence : 0.5000          
##          Detection Rate : 0.4945          
##    Detection Prevalence : 0.5000          
##       Balanced Accuracy : 0.9890          
##                                           
##        'Positive' Class : 1               
## 
confusionMatrix(mobile_test$predik_down, mobile_test$price_range, positive = "1")
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 203   3
##          1   1 193
##                                           
##                Accuracy : 0.99            
##                  95% CI : (0.9746, 0.9973)
##     No Information Rate : 0.51            
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.98            
##                                           
##  Mcnemar's Test P-Value : 0.6171          
##                                           
##             Sensitivity : 0.9847          
##             Specificity : 0.9951          
##          Pos Pred Value : 0.9948          
##          Neg Pred Value : 0.9854          
##              Prevalence : 0.4900          
##          Detection Rate : 0.4825          
##    Detection Prevalence : 0.4850          
##       Balanced Accuracy : 0.9899          
##                                           
##        'Positive' Class : 1               
## 

Dari semua data yang kita evaluasi memiliki Presisi hingga 99%

5.1 Treeshold

performa <- function(cutoff, prob, ref, postarget, negtarget) 
{
  predict <- as.factor(ifelse(prob >= cutoff, postarget, negtarget))
  conf <- caret::confusionMatrix(predict , ref, positive = postarget)
  acc <- conf$overall[1]
  rec <- conf$byClass[1]
  prec <- conf$byClass[3]
  spec <- conf$byClass[2]
  mat <- t(as.matrix(c(rec , acc , prec, spec))) 
  colnames(mat) <- c("recall", "accuracy", "precicion", "specificity")
  return(mat)
}

co <- seq(0.01,0.8,length=100)
result <- matrix(0,100,4)

for(i in 1:100){
  result[i,] = performa(cutoff = co[i], 
                     prob = mobile_test$peluang_down, 
                     ref = mobile_test$price_range, 
                     postarget = "1", 
                     negtarget = "0")
}

data_frame("Recall" = result[,1],
           "Accuracy" = result[,2],
           "Precision" = result[,3],
           "Specificity" = result[,4],
                   "Cutoff" = co) %>% 
  gather(key = "performa", value = "value", 1:4) %>% 
  ggplot(aes(x = Cutoff, y = value, col = performa)) +
  geom_line(lwd = 1.5) +
  scale_color_manual(values = c("darkred","darkgreen","orange", "blue")) +
  scale_y_continuous(breaks = seq(0,1,0.1), limits = c(0,1)) +
  scale_x_continuous(breaks = seq(0,1,0.1)) +
  labs(title = "Tradeoff model perfomance") +
  theme_minimal() +
  theme(legend.position = "top",
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank())
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.

Data model menghasilkan presisi hampir mendekati angka 1

6 K-Nearest Neighbour

6.1 Pre-processing data

mobilephone_train_all <- read.csv("data_input/train.csv")
mobilephone_train_all <- mobilephone_train_all %>% 
  mutate(
    price_range=factor(price_range, levels = c(0,1,2,3), labels = c(0,0,1,1))
  )

6.2 Scalling

normalize <- function(x){
  return ( 
    (x - min(x))/(max(x) - min(x)) 
           )
}
mobilephone_train_all_s <- mobilephone_train_all %>% 
  mutate_if(is.numeric, normalize)

6.3 Spilt train dan test

set.seed(567)
# data yang di train dan test dari dataset sebanyak 80%
index <- sample(nrow(mobilephone_train_all),nrow(mobilephone_train_all)*0.8)
mobile_train_s <- mobilephone_train_all_s[index, ]
mobile_test_s <- mobilephone_train_all_s[-index, ]

6.4 Predicting

mobilephone_train_all_mm <- sqrt(nrow(mobilephone_train_all))
nilaik_all_mm <- round(mobilephone_train_all_mm)

test_spredik1 <- knn(train = mobile_train_s[, -21], 
                     test = mobile_test_s[, -21], 
                     cl = mobile_train_s[, 21],
                     k = (nilaik_all_mm-4))

test_spredik2 <- knn(train = mobile_train_s[, -21], 
                     test = mobile_test_s[, -21], 
                     cl = mobile_train_s[, 21],
                     k = (nilaik_all_mm-2))

test_spredik3 <- knn(train = mobile_train_s[, -21], 
                     test = mobile_test_s[, -21], 
                     cl = mobile_train_s[, 21],
                     k = nilaik_all_mm)

test_spredik4 <- knn(train = mobile_train_s[, -21], 
                     test = mobile_test_s[, -21], 
                     cl = mobile_train_s[, 21],
                     k = (nilaik_all_mm+2))

test_spredik5 <- knn(train = mobile_train_s[, -21], 
                     test = mobile_test_s[, -21], 
                     cl = mobile_train_s[, 21],
                     k = (nilaik_all_mm+4))

nilai k default adalah 45 , kemudian dicoba dengan nilai K yang berbeda yaitu 41 , 43 , 45 , 47 , 49

6.5 Model evaluation

hasilcm_1 <- confusionMatrix(test_spredik1, mobile_test_s$price_range)
hasilcm_2 <- confusionMatrix(test_spredik2, mobile_test_s$price_range)
hasilcm_3 <- confusionMatrix(test_spredik3, mobile_test_s$price_range)
hasilcm_4 <- confusionMatrix(test_spredik4, mobile_test_s$price_range)
hasilcm_5 <- confusionMatrix(test_spredik5, mobile_test_s$price_range)

Hasil perbandingan nya adalah

resultcm <- matrix(0,5,4)
colnames(resultcm) <- c("recall", "accuracy", "precision", "specificity")
resultcm[1,] <- c(hasilcm_1$overall[1],hasilcm_1$overall[2],hasilcm_1$overall[3],hasilcm_1$overall[4])
resultcm[2,] <- c(hasilcm_2$overall[1],hasilcm_2$overall[2],hasilcm_2$overall[3],hasilcm_2$overall[4])
resultcm[3,] <- c(hasilcm_3$overall[1],hasilcm_3$overall[2],hasilcm_3$overall[3],hasilcm_3$overall[4])
resultcm[4,] <- c(hasilcm_4$overall[1],hasilcm_4$overall[2],hasilcm_4$overall[3],hasilcm_4$overall[4])
resultcm[5,] <- c(hasilcm_5$overall[1],hasilcm_5$overall[2],hasilcm_5$overall[3],hasilcm_5$overall[4])
resultcm
##      recall  accuracy precision specificity
## [1,] 0.8525 0.7050000 0.8138982   0.8857819
## [2,] 0.8600 0.7198950 0.8220795   0.8924822
## [3,] 0.8575 0.7148289 0.8193489   0.8902523
## [4,] 0.8525 0.7048229 0.8138982   0.8857819
## [5,] 0.8550 0.7098912 0.8166218   0.8880189

Dari hasil tersebut maka nilai K yang tinggi adalah 43 dengan nilai presisi adalah 0.8220795

7 Kesimpulan

Dari 2 Metode yang kita gunakan pada logistik regresi diperoleh model dengan nilai AIC (Akaike information criterion) 118.4 dan pada K-Nearest Neighbor menggunakan nilai k adalah 43 dengan nilai akurasinya adalah 0.7198950 , dari 2 metode tersebut didapat kesimpulan bahwa logistik regresi didapatkan dari tingkat lost informasinya, sedangkan K-NN mendapatkan nilai presisi,akurasi,spesific dan recall nya.